aboutsummaryrefslogtreecommitdiffstats
path: root/bsfs/triple_store/sparql
diff options
context:
space:
mode:
authorMatthias Baumgartner <dev@igsor.net>2023-01-15 21:00:12 +0100
committerMatthias Baumgartner <dev@igsor.net>2023-01-15 21:00:12 +0100
commit80a97bfa9f22d0d6dd25928fe1754a3a0d1de78a (patch)
tree30d30fb669d7b43d7324ef8027306c24c1ec1ac2 /bsfs/triple_store/sparql
parentccaee71e2b6135d3b324fe551c8652940b67aab3 (diff)
downloadbsfs-80a97bfa9f22d0d6dd25928fe1754a3a0d1de78a.tar.gz
bsfs-80a97bfa9f22d0d6dd25928fe1754a3a0d1de78a.tar.bz2
bsfs-80a97bfa9f22d0d6dd25928fe1754a3a0d1de78a.zip
Distance filter ast node
Diffstat (limited to 'bsfs/triple_store/sparql')
-rw-r--r--bsfs/triple_store/sparql/distance.py56
-rw-r--r--bsfs/triple_store/sparql/parse_filter.py41
-rw-r--r--bsfs/triple_store/sparql/sparql.py13
3 files changed, 108 insertions, 2 deletions
diff --git a/bsfs/triple_store/sparql/distance.py b/bsfs/triple_store/sparql/distance.py
new file mode 100644
index 0000000..2f5387a
--- /dev/null
+++ b/bsfs/triple_store/sparql/distance.py
@@ -0,0 +1,56 @@
+"""
+
+Part of the BlackStar filesystem (bsfs) module.
+A copy of the license is provided with the project.
+Author: Matthias Baumgartner, 2022
+"""
+# standard imports
+import typing
+
+# external imports
+import numpy as np
+
+# bsfs imports
+from bsfs.namespace import ns
+
+# constants
+EPS = 1e-9
+
+# exports
+__all__: typing.Sequence[str] = (
+ 'DISTANCE_FU',
+ )
+
+
+## code ##
+
+def euclid(fst, snd) -> float:
+ """Euclidean distance (l2 norm)."""
+ fst = np.array(fst)
+ snd = np.array(snd)
+ return float(np.linalg.norm(fst - snd))
+
+def cosine(fst, snd) -> float:
+ """Cosine distance."""
+ fst = np.array(fst)
+ snd = np.array(snd)
+ if (fst == snd).all():
+ return 0.0
+ nrm0 = np.linalg.norm(fst)
+ nrm1 = np.linalg.norm(snd)
+ return float(1.0 - np.dot(fst, snd) / (nrm0 * nrm1 + EPS))
+
+def manhatten(fst, snd) -> float:
+ """Manhatten (cityblock) distance (l1 norm)."""
+ fst = np.array(fst)
+ snd = np.array(snd)
+ return float(np.abs(fst - snd).sum())
+
+# Known distance functions.
+DISTANCE_FU = {
+ ns.bsfs.euclidean: euclid,
+ ns.bsfs.cosine: cosine,
+ ns.bsfs.manhatten: manhatten,
+}
+
+## EOF ##
diff --git a/bsfs/triple_store/sparql/parse_filter.py b/bsfs/triple_store/sparql/parse_filter.py
index 5d8a2d9..8b6b976 100644
--- a/bsfs/triple_store/sparql/parse_filter.py
+++ b/bsfs/triple_store/sparql/parse_filter.py
@@ -5,19 +5,29 @@ A copy of the license is provided with the project.
Author: Matthias Baumgartner, 2022
"""
# imports
+import operator
import typing
+# external imports
+import rdflib
+
# bsfs imports
from bsfs import schema as bsc
from bsfs.namespace import ns
from bsfs.query import ast
from bsfs.utils import URI, errors
+# inner-module imports
+from .distance import DISTANCE_FU
+
# exports
__all__: typing.Sequence[str] = (
'Filter',
)
+
+## code ##
+
class _GenHopName():
"""Generator that produces a new unique symbol name with each iteration."""
@@ -46,7 +56,8 @@ class Filter():
# Generator that produces unique symbol names.
ngen: _GenHopName
- def __init__(self, schema):
+ def __init__(self, graph, schema):
+ self.graph = graph
self.schema = schema
self.ngen = _GenHopName()
@@ -84,6 +95,8 @@ class Filter():
return self._not(type_, node, head)
if isinstance(node, ast.filter.Has):
return self._has(type_, node, head)
+ if isinstance(node, ast.filter.Distance):
+ return self._distance(type_, node, head)
if isinstance(node, ast.filter.Any):
return self._any(type_, node, head)
if isinstance(node, ast.filter.All):
@@ -243,6 +256,32 @@ class Filter():
# combine
return num_preds + ' . ' + count_bounds
+ def _distance(self, node_type: bsc.Vertex, node: ast.filter.Distance, head: str) -> str:
+ """
+ """
+ if not isinstance(node_type, bsc.Feature):
+ raise errors.BackendError(f'expected Feature, found {node_type}')
+ if len(node.reference) != node_type.dimension:
+ raise errors.ConsistencyError(
+ f'reference has dimension {len(node.reference)}, expected {node_type.dimension}')
+ # get distance metric
+ dist = DISTANCE_FU[node_type.distance]
+ # get operator
+ cmp = operator.lt if node.strict else operator.le
+ # get candidate values
+ candidates = {
+ f'"{cand}"^^<{node_type.uri}>'
+ for cand
+ in self.graph.objects()
+ if isinstance(cand, rdflib.Literal)
+ and cand.datatype == rdflib.URIRef(node_type.uri)
+ and cmp(dist(cand.value, node.reference), node.threshold)
+ }
+ # combine candidate values
+ values = ' '.join(candidates) if len(candidates) else f'"impossible value"^^<{ns.xsd.string}>'
+ # return sparql fragment
+ return f'VALUES {head} {{ {values} }}'
+
def _is(self, node_type: bsc.Vertex, node: ast.filter.Is, head: str) -> str:
"""
"""
diff --git a/bsfs/triple_store/sparql/sparql.py b/bsfs/triple_store/sparql/sparql.py
index 3877d1a..dfd9871 100644
--- a/bsfs/triple_store/sparql/sparql.py
+++ b/bsfs/triple_store/sparql/sparql.py
@@ -18,6 +18,7 @@ from bsfs.utils import errors, URI
# inner-module imports
from . import parse_filter
from .. import base
+from .distance import DISTANCE_FU
# exports
@@ -97,7 +98,7 @@ class SparqlStore(base.TripleStoreBase):
self._transaction = _Transaction(self._graph)
# NOTE: parsing bsfs.query.ast.filter.Has requires xsd:integer.
self._schema = bsc.Schema(literals={bsc.ROOT_NUMBER.child(ns.xsd.integer)})
- self._filter_parser = parse_filter.Filter(self._schema)
+ self._filter_parser = parse_filter.Filter(self._graph, self._schema)
# NOTE: mypy and pylint complain about the **kwargs not being listed (contrasting super)
# However, not having it here is clearer since it's explicit that there are no arguments.
@@ -123,6 +124,16 @@ class SparqlStore(base.TripleStoreBase):
# check compatibility: No contradicting definitions
if not self.schema.consistent_with(schema):
raise errors.ConsistencyError(f'{schema} is inconsistent with {self.schema}')
+ # check distance functions of features
+ invalid = {
+ (cand.uri, cand.distance)
+ for cand
+ in schema.literals()
+ if isinstance(cand, bsc.Feature) and cand.distance not in DISTANCE_FU}
+ if len(invalid) > 0:
+ cand, dist = zip(*invalid)
+ raise ValueError(
+ f'unknown distance function {",".join(dist)} in feature {", ".join(cand)}')
# commit the current transaction
self.commit()