aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--bsfs/graph/resolve.py5
-rw-r--r--bsfs/query/ast/filter_.py59
-rw-r--r--bsfs/query/validator.py16
-rw-r--r--bsfs/triple_store/sparql/distance.py56
-rw-r--r--bsfs/triple_store/sparql/parse_filter.py41
-rw-r--r--bsfs/triple_store/sparql/sparql.py13
-rw-r--r--test/graph/test_resolve.py13
-rw-r--r--test/query/ast_test/test_filter_.py35
-rw-r--r--test/query/test_validator.py27
-rw-r--r--test/triple_store/sparql/test_distance.py61
-rw-r--r--test/triple_store/sparql/test_parse_filter.py50
-rw-r--r--test/triple_store/sparql/test_sparql.py17
12 files changed, 375 insertions, 18 deletions
diff --git a/bsfs/graph/resolve.py b/bsfs/graph/resolve.py
index b671204..00b778b 100644
--- a/bsfs/graph/resolve.py
+++ b/bsfs/graph/resolve.py
@@ -63,6 +63,8 @@ class Filter():
return self._and(type_, node)
if isinstance(node, ast.filter.Or):
return self._or(type_, node)
+ if isinstance(node, ast.filter.Distance):
+ return self._distance(type_, node)
if isinstance(node, (ast.filter.Equals, ast.filter.Substring, \
ast.filter.StartsWith, ast.filter.EndsWith)):
return self._value(type_, node)
@@ -125,6 +127,9 @@ class Filter():
def _has(self, type_: bsc.Vertex, node: ast.filter.Has) -> ast.filter.Has: # pylint: disable=unused-argument
return node
+ def _distance(self, type_: bsc.Vertex, node: ast.filter.Distance): # pylint: disable=unused-argument
+ return node
+
def _value(self, type_: bsc.Vertex, node: ast.filter._Value) -> ast.filter._Value: # pylint: disable=unused-argument
return node
diff --git a/bsfs/query/ast/filter_.py b/bsfs/query/ast/filter_.py
index b129ded..2f0270c 100644
--- a/bsfs/query/ast/filter_.py
+++ b/bsfs/query/ast/filter_.py
@@ -252,8 +252,7 @@ class Has(FilterExpression):
class _Value(FilterExpression):
- """
- """
+ """Matches some value."""
# target value.
value: typing.Any
@@ -277,13 +276,13 @@ class Is(_Value):
class Equals(_Value):
"""Value matches exactly.
- NOTE: Value format must correspond to literal type; can be a string, a number, or a Node
+ NOTE: Value must correspond to literal type.
"""
class Substring(_Value):
"""Value matches a substring
- NOTE: value format must be a string
+ NOTE: value must be a string.
"""
@@ -295,9 +294,49 @@ class EndsWith(_Value):
"""Value ends with a given string."""
+class Distance(FilterExpression):
+ """Distance to a reference is (strictly) below a threshold. Assumes a Feature literal."""
+
+ # FIXME:
+ # (a) pass a node/predicate as anchor instead of a value.
+ # Then we don't need to materialize the reference.
+ # (b) pass a FilterExpression (_Bounded) instead of a threshold.
+ # Then, we could also query values greater than a threshold.
+
+ # reference value.
+ reference: typing.Any
+
+ # distance threshold.
+ threshold: float
+
+ # closed (True) or open (False) bound.
+ strict: bool
+
+ def __init__(
+ self,
+ reference: typing.Any,
+ threshold: float,
+ strict: bool = False,
+ ):
+ self.reference = reference
+ self.threshold = float(threshold)
+ self.strict = bool(strict)
+
+ def __repr__(self) -> str:
+ return f'{typename(self)}({self.reference}, {self.threshold}, {self.strict})'
+
+ def __hash__(self) -> int:
+ return hash((super().__hash__(), tuple(self.reference), self.threshold, self.strict))
+
+ def __eq__(self, other) -> bool:
+ return super().__eq__(other) \
+ and self.reference == other.reference \
+ and self.threshold == other.threshold \
+ and self.strict == other.strict
+
+
class _Bounded(FilterExpression):
- """
- """
+ """Value is bounded by a threshold. Assumes a Number literal."""
# bound.
threshold: float
@@ -327,15 +366,11 @@ class _Bounded(FilterExpression):
class LessThan(_Bounded):
- """Value is (strictly) smaller than threshold.
- NOTE: only on numerical literals
- """
+ """Value is (strictly) smaller than threshold. Assumes a Number literal."""
class GreaterThan(_Bounded):
- """Value is (strictly) larger than threshold
- NOTE: only on numerical literals
- """
+ """Value is (strictly) larger than threshold. Assumes a Number literal."""
class Predicate(PredicateExpression):
diff --git a/bsfs/query/validator.py b/bsfs/query/validator.py
index ecea951..1b7f688 100644
--- a/bsfs/query/validator.py
+++ b/bsfs/query/validator.py
@@ -69,6 +69,8 @@ class Filter():
return self._not(type_, node)
if isinstance(node, ast.filter.Has):
return self._has(type_, node)
+ if isinstance(node, ast.filter.Distance):
+ return self._distance(type_, node)
if isinstance(node, (ast.filter.Any, ast.filter.All)):
return self._branch(type_, node)
if isinstance(node, (ast.filter.And, ast.filter.Or)):
@@ -177,6 +179,20 @@ class Filter():
# node.count is a numerical expression
self._parse_filter_expression(self.schema.literal(ns.bsfs.Number), node.count)
+ def _distance(self, type_: bsc.Vertex, node: ast.filter.Distance):
+ # type is a Literal
+ if not isinstance(type_, bsc.Feature):
+ raise errors.ConsistencyError(f'expected a Feature, found {type_}')
+ # type exists in the schema
+ if type_ not in self.schema.literals():
+ raise errors.ConsistencyError(f'literal {type_} is not in the schema')
+ # reference matches type_
+ if len(node.reference) != type_.dimension:
+ raise errors.ConsistencyError(f'reference has dimension {len(node.reference)}, expected {type_.dimension}')
+ # FIXME:
+ #if node.reference.dtype != type_.dtype:
+ # raise errors.ConsistencyError(f'')
+
## conditions
diff --git a/bsfs/triple_store/sparql/distance.py b/bsfs/triple_store/sparql/distance.py
new file mode 100644
index 0000000..2f5387a
--- /dev/null
+++ b/bsfs/triple_store/sparql/distance.py
@@ -0,0 +1,56 @@
+"""
+
+Part of the BlackStar filesystem (bsfs) module.
+A copy of the license is provided with the project.
+Author: Matthias Baumgartner, 2022
+"""
+# standard imports
+import typing
+
+# external imports
+import numpy as np
+
+# bsfs imports
+from bsfs.namespace import ns
+
+# constants
+EPS = 1e-9
+
+# exports
+__all__: typing.Sequence[str] = (
+ 'DISTANCE_FU',
+ )
+
+
+## code ##
+
+def euclid(fst, snd) -> float:
+ """Euclidean distance (l2 norm)."""
+ fst = np.array(fst)
+ snd = np.array(snd)
+ return float(np.linalg.norm(fst - snd))
+
+def cosine(fst, snd) -> float:
+ """Cosine distance."""
+ fst = np.array(fst)
+ snd = np.array(snd)
+ if (fst == snd).all():
+ return 0.0
+ nrm0 = np.linalg.norm(fst)
+ nrm1 = np.linalg.norm(snd)
+ return float(1.0 - np.dot(fst, snd) / (nrm0 * nrm1 + EPS))
+
+def manhatten(fst, snd) -> float:
+ """Manhatten (cityblock) distance (l1 norm)."""
+ fst = np.array(fst)
+ snd = np.array(snd)
+ return float(np.abs(fst - snd).sum())
+
+# Known distance functions.
+DISTANCE_FU = {
+ ns.bsfs.euclidean: euclid,
+ ns.bsfs.cosine: cosine,
+ ns.bsfs.manhatten: manhatten,
+}
+
+## EOF ##
diff --git a/bsfs/triple_store/sparql/parse_filter.py b/bsfs/triple_store/sparql/parse_filter.py
index 5d8a2d9..8b6b976 100644
--- a/bsfs/triple_store/sparql/parse_filter.py
+++ b/bsfs/triple_store/sparql/parse_filter.py
@@ -5,19 +5,29 @@ A copy of the license is provided with the project.
Author: Matthias Baumgartner, 2022
"""
# imports
+import operator
import typing
+# external imports
+import rdflib
+
# bsfs imports
from bsfs import schema as bsc
from bsfs.namespace import ns
from bsfs.query import ast
from bsfs.utils import URI, errors
+# inner-module imports
+from .distance import DISTANCE_FU
+
# exports
__all__: typing.Sequence[str] = (
'Filter',
)
+
+## code ##
+
class _GenHopName():
"""Generator that produces a new unique symbol name with each iteration."""
@@ -46,7 +56,8 @@ class Filter():
# Generator that produces unique symbol names.
ngen: _GenHopName
- def __init__(self, schema):
+ def __init__(self, graph, schema):
+ self.graph = graph
self.schema = schema
self.ngen = _GenHopName()
@@ -84,6 +95,8 @@ class Filter():
return self._not(type_, node, head)
if isinstance(node, ast.filter.Has):
return self._has(type_, node, head)
+ if isinstance(node, ast.filter.Distance):
+ return self._distance(type_, node, head)
if isinstance(node, ast.filter.Any):
return self._any(type_, node, head)
if isinstance(node, ast.filter.All):
@@ -243,6 +256,32 @@ class Filter():
# combine
return num_preds + ' . ' + count_bounds
+ def _distance(self, node_type: bsc.Vertex, node: ast.filter.Distance, head: str) -> str:
+ """
+ """
+ if not isinstance(node_type, bsc.Feature):
+ raise errors.BackendError(f'expected Feature, found {node_type}')
+ if len(node.reference) != node_type.dimension:
+ raise errors.ConsistencyError(
+ f'reference has dimension {len(node.reference)}, expected {node_type.dimension}')
+ # get distance metric
+ dist = DISTANCE_FU[node_type.distance]
+ # get operator
+ cmp = operator.lt if node.strict else operator.le
+ # get candidate values
+ candidates = {
+ f'"{cand}"^^<{node_type.uri}>'
+ for cand
+ in self.graph.objects()
+ if isinstance(cand, rdflib.Literal)
+ and cand.datatype == rdflib.URIRef(node_type.uri)
+ and cmp(dist(cand.value, node.reference), node.threshold)
+ }
+ # combine candidate values
+ values = ' '.join(candidates) if len(candidates) else f'"impossible value"^^<{ns.xsd.string}>'
+ # return sparql fragment
+ return f'VALUES {head} {{ {values} }}'
+
def _is(self, node_type: bsc.Vertex, node: ast.filter.Is, head: str) -> str:
"""
"""
diff --git a/bsfs/triple_store/sparql/sparql.py b/bsfs/triple_store/sparql/sparql.py
index 3877d1a..dfd9871 100644
--- a/bsfs/triple_store/sparql/sparql.py
+++ b/bsfs/triple_store/sparql/sparql.py
@@ -18,6 +18,7 @@ from bsfs.utils import errors, URI
# inner-module imports
from . import parse_filter
from .. import base
+from .distance import DISTANCE_FU
# exports
@@ -97,7 +98,7 @@ class SparqlStore(base.TripleStoreBase):
self._transaction = _Transaction(self._graph)
# NOTE: parsing bsfs.query.ast.filter.Has requires xsd:integer.
self._schema = bsc.Schema(literals={bsc.ROOT_NUMBER.child(ns.xsd.integer)})
- self._filter_parser = parse_filter.Filter(self._schema)
+ self._filter_parser = parse_filter.Filter(self._graph, self._schema)
# NOTE: mypy and pylint complain about the **kwargs not being listed (contrasting super)
# However, not having it here is clearer since it's explicit that there are no arguments.
@@ -123,6 +124,16 @@ class SparqlStore(base.TripleStoreBase):
# check compatibility: No contradicting definitions
if not self.schema.consistent_with(schema):
raise errors.ConsistencyError(f'{schema} is inconsistent with {self.schema}')
+ # check distance functions of features
+ invalid = {
+ (cand.uri, cand.distance)
+ for cand
+ in schema.literals()
+ if isinstance(cand, bsc.Feature) and cand.distance not in DISTANCE_FU}
+ if len(invalid) > 0:
+ cand, dist = zip(*invalid)
+ raise ValueError(
+ f'unknown distance function {",".join(dist)} in feature {", ".join(cand)}')
# commit the current transaction
self.commit()
diff --git a/test/graph/test_resolve.py b/test/graph/test_resolve.py
index 0861a53..0918b02 100644
--- a/test/graph/test_resolve.py
+++ b/test/graph/test_resolve.py
@@ -46,6 +46,13 @@ class TestFilter(unittest.TestCase):
bsfs:Feature rdfs:subClassOf bsfs:Array .
xsd:integer rdfs:subClassOf bsfs:Number .
+ bsfs:Colors rdfs:subClassOf bsfs:Feature ;
+ bsfs:dimension "5"^^xsd:integer .
+
+ bse:colors rdfs:subClassOf bsfs:Predicate ;
+ rdfs:domain bsfs:Entity ;
+ rdfs:range bsfs:Colors .
+
bse:comment rdfs:subClassOf bsfs:Predicate ;
rdfs:domain bsfs:Entity ;
rdfs:range xsd:string ;
@@ -147,12 +154,18 @@ class TestFilter(unittest.TestCase):
self.assertEqual(resolver(schema.node(ns.bsfs.Entity),
ast.filter.Has(ns.bse.comment)),
ast.filter.Has(ns.bse.comment))
+ # for sake of completeness: Distance
+ self.assertEqual(resolver(schema.node(ns.bsfs.Entity),
+ ast.filter.Any(ns.bse.colors, ast.filter.Distance([1,2,3,4,5], 1))),
+ ast.filter.Any(ns.bse.colors, ast.filter.Distance([1,2,3,4,5], 1)))
# route errors
self.assertRaises(errors.BackendError, resolver, schema.node(ns.bsfs.Tag),
ast.filter.Predicate(ns.bse.comment))
self.assertRaises(errors.BackendError, resolver, schema.node(ns.bsfs.Tag),
ast.filter.Any(ast.filter.PredicateExpression(), ast.filter.Equals('foo')))
self.assertRaises(errors.BackendError, resolver._one_of, ast.filter.OneOf(ast.filter.Predicate(ns.bsfs.Predicate)))
+ # for sake of coverage completeness: valid OneOf
+ self.assertIsNotNone(resolver._one_of(ast.filter.OneOf(ast.filter.Predicate(ns.bse.colors))))
# check schema consistency
self.assertRaises(errors.ConsistencyError, resolver, schema.node(ns.bsfs.Tag),
diff --git a/test/query/ast_test/test_filter_.py b/test/query/ast_test/test_filter_.py
index 4f69bdc..9eb92e2 100644
--- a/test/query/ast_test/test_filter_.py
+++ b/test/query/ast_test/test_filter_.py
@@ -15,7 +15,7 @@ from bsfs.utils import URI
from bsfs.query.ast.filter_ import _Expression, FilterExpression, PredicateExpression
from bsfs.query.ast.filter_ import _Branch, Any, All
from bsfs.query.ast.filter_ import _Agg, And, Or
-from bsfs.query.ast.filter_ import Not, Has
+from bsfs.query.ast.filter_ import Not, Has, Distance
from bsfs.query.ast.filter_ import _Value, Is, Equals, Substring, StartsWith, EndsWith
from bsfs.query.ast.filter_ import _Bounded, LessThan, GreaterThan
from bsfs.query.ast.filter_ import Predicate, OneOf
@@ -284,6 +284,39 @@ class TestValue(unittest.TestCase):
self.assertEqual(cls(f).value, f)
+class TestDistance(unittest.TestCase):
+ def test_essentials(self):
+ ref = (1,2,3)
+ # comparison
+ self.assertEqual(Distance(ref, 3), Distance(ref, 3))
+ self.assertEqual(hash(Distance(ref, 3)), hash(Distance(ref, 3)))
+ # comparison respects type
+ self.assertNotEqual(Distance(ref, 3), FilterExpression())
+ self.assertNotEqual(hash(Distance(ref, 3)), hash(FilterExpression()))
+ # comparison respects reference
+ self.assertNotEqual(Distance((1,2,3), 3, False), Distance((1,2), 3, False))
+ self.assertNotEqual(hash(Distance((1,2,3), 3, False)), hash(Distance((1,2), 3, False)))
+ self.assertNotEqual(Distance((1,2,3), 3, False), Distance((1,5,3), 3, False))
+ self.assertNotEqual(hash(Distance((1,2,3), 3, False)), hash(Distance((1,5,3), 3, False)))
+ # comparison respects threshold
+ self.assertNotEqual(Distance((1,2,3), 3, False), Distance((1,2,3), 3.1, False))
+ self.assertNotEqual(hash(Distance((1,2,3), 3, False)), hash(Distance((1,2,3), 3.1, False)))
+ # comparison respects strict flag
+ self.assertNotEqual(Distance((1,2,3), 3, False), Distance((1,2,3), 3, True))
+ self.assertNotEqual(hash(Distance((1,2,3), 3, False)), hash(Distance((1,2,3), 3, True)))
+ # string conversion
+ self.assertEqual(str(Distance(ref, 3, False)), 'Distance((1, 2, 3), 3.0, False)')
+ self.assertEqual(repr(Distance(ref, 3, False)), 'Distance((1, 2, 3), 3.0, False)')
+
+ def test_members(self):
+ self.assertEqual(Distance((1,2,3), 3, False).reference, (1,2,3))
+ self.assertEqual(Distance((3,2,1), 3, False).reference, (3,2,1))
+ self.assertEqual(Distance((1,2,3), 3, False).threshold, 3.0)
+ self.assertEqual(Distance((1,2,3), 53.45, False).threshold, 53.45)
+ self.assertEqual(Distance((1,2,3), 3, False).strict, False)
+ self.assertEqual(Distance((1,2,3), 3, True).strict, True)
+
+
class TestBounded(unittest.TestCase):
def test_essentials(self):
# comparison respects type
diff --git a/test/query/test_validator.py b/test/query/test_validator.py
index 63ead52..dc9d913 100644
--- a/test/query/test_validator.py
+++ b/test/query/test_validator.py
@@ -38,6 +38,15 @@ class TestFilter(unittest.TestCase):
bsfs:Feature rdfs:subClassOf bsfs:Array .
xsd:integer rdfs:subClassOf bsfs:Number .
+ bsfs:Colors rdfs:subClassOf bsfs:Feature ;
+ bsfs:dimension "5"^^xsd:integer ;
+ bsfs:dtype bsfs:f32 .
+
+ bse:color rdfs:subClassOf bsfs:Predicate ;
+ rdfs:domain bsfs:Node ;
+ rdfs:range bsfs:Colors ;
+ bsfs:unique "true"^^xsd:boolean .
+
bse:comment rdfs:subClassOf bsfs:Predicate ;
rdfs:domain bsfs:Node ;
rdfs:range xsd:string ;
@@ -88,6 +97,7 @@ class TestFilter(unittest.TestCase):
),
ast.filter.Not(ast.filter.Any(ns.bse.comment,
ast.filter.Not(ast.filter.Equals('hello world')))),
+ ast.filter.Any(ns.bse.color, ast.filter.Distance([1,2,3,4,5], 3)),
)))))
# invalid paths raise consistency error
self.assertRaises(errors.ConsistencyError, self.validate, self.schema.node(ns.bsfs.Entity),
@@ -257,6 +267,23 @@ class TestFilter(unittest.TestCase):
self.assertIsNone(self.validate._bounded(self.schema.literal(ns.xsd.integer), ast.filter.LessThan(0)))
self.assertIsNone(self.validate._bounded(self.schema.literal(ns.xsd.integer), ast.filter.GreaterThan(0)))
+ def test_distance(self):
+ # type must be a literal
+ self.assertRaises(errors.ConsistencyError, self.validate._distance, self.schema.node(ns.bsfs.Node),
+ ast.filter.Distance([1,2,3], 1, False))
+ # type must be a feature
+ self.assertRaises(errors.ConsistencyError, self.validate._distance, self.schema.literal(ns.bsfs.Array),
+ ast.filter.Distance([1,2,3], 1, False))
+ # type must be in the schema
+ self.assertRaises(errors.ConsistencyError, self.validate._distance, self.schema.literal(ns.bsfs.Feature).child(ns.bsfs.Invalid),
+ ast.filter.Distance([1,2,3], 1, False))
+ # FIXME: reference must be a numpy array
+ # reference must have the correct dimension
+ self.assertRaises(errors.ConsistencyError, self.validate._distance, self.schema.literal(ns.bsfs.Colors),
+ ast.filter.Distance([1,2,3], 1, False))
+ # FIXME: reference must have the correct dtype
+ # distance accepts correct expressions
+ self.assertIsNone(self.validate._distance(self.schema.literal(ns.bsfs.Colors), ast.filter.Distance([1,2,3,4,5], 1, False)))
## main ##
diff --git a/test/triple_store/sparql/test_distance.py b/test/triple_store/sparql/test_distance.py
new file mode 100644
index 0000000..0659459
--- /dev/null
+++ b/test/triple_store/sparql/test_distance.py
@@ -0,0 +1,61 @@
+"""
+
+Part of the bsfs test suite.
+A copy of the license is provided with the project.
+Author: Matthias Baumgartner, 2022
+"""
+# imports
+import numpy as np
+import unittest
+
+# objects to test
+from bsfs.triple_store.sparql import distance
+
+
+## code ##
+
+class TestDistance(unittest.TestCase):
+
+ def test_euclid(self):
+ # self-distance is zero
+ self.assertEqual(distance.euclid([1,2,3,4], [1,2,3,4]), 0.0)
+ # accepts list-like arguments
+ self.assertAlmostEqual(distance.euclid([1,2,3,4], [2,3,4,5]), 2.0, 3)
+ self.assertAlmostEqual(distance.euclid((1,2,3,4), (2,3,4,5)), 2.0, 3)
+ # dimension can vary
+ self.assertAlmostEqual(distance.euclid([1,2,3], [2,3,4]), 1.732, 3)
+ self.assertAlmostEqual(distance.euclid([1,2,3,4,5], [2,3,4,5,6]), 2.236, 3)
+ # vector can be zero
+ self.assertAlmostEqual(distance.euclid([0,0,0], [1,2,3]), 3.742, 3)
+
+ def test_cosine(self):
+ # self-distance is zero
+ self.assertEqual(distance.cosine([1,2,3,4], [1,2,3,4]), 0.0)
+ # accepts list-like arguments
+ self.assertAlmostEqual(distance.cosine([1,2,3,4], [4,3,2,1]), 0.333, 3)
+ self.assertAlmostEqual(distance.cosine((1,2,3,4), (4,3,2,1)), 0.333, 3)
+ # dimension can vary
+ self.assertAlmostEqual(distance.cosine([1,2,3], [3,2,1]), 0.286, 3)
+ self.assertAlmostEqual(distance.cosine([1,2,3,4,5], [5,4,3,2,1]), 0.364, 3)
+ # vector can be zero
+ self.assertAlmostEqual(distance.cosine([0,0,0], [1,2,3]), 1.0, 3)
+
+ def test_manhatten(self):
+ # self-distance is zero
+ self.assertEqual(distance.manhatten([1,2,3,4], [1,2,3,4]), 0.0)
+ # accepts list-like arguments
+ self.assertAlmostEqual(distance.manhatten([1,2,3,4], [2,3,4,5]), 4.0, 3)
+ self.assertAlmostEqual(distance.manhatten((1,2,3,4), (2,3,4,5)), 4.0, 3)
+ # dimension can vary
+ self.assertAlmostEqual(distance.manhatten([1,2,3], [2,3,4]), 3.0, 3)
+ self.assertAlmostEqual(distance.manhatten([1,2,3,4,5], [2,3,4,5,6]), 5.0, 3)
+ # vector can be zero
+ self.assertAlmostEqual(distance.manhatten([0,0,0], [1,2,3]), 6.0, 3)
+
+
+## main ##
+
+if __name__ == '__main__':
+ unittest.main()
+
+## EOF ##
diff --git a/test/triple_store/sparql/test_parse_filter.py b/test/triple_store/sparql/test_parse_filter.py
index 5c16f11..8764535 100644
--- a/test/triple_store/sparql/test_parse_filter.py
+++ b/test/triple_store/sparql/test_parse_filter.py
@@ -42,6 +42,15 @@ class TestParseFilter(unittest.TestCase):
xsd:integer rdfs:subClassOf bsfs:Number .
bsfs:URI rdfs:subClassOf bsfs:Literal .
+ bsfs:Colors rdfs:subClassOf bsfs:Feature ;
+ bsfs:dimension "4"^^xsd:integer ;
+ bsfs:dtype xsd:integer ;
+ bsfs:distance bsfs:euclidean .
+
+ bse:colors rdfs:subClassOf bsfs:Predicate ;
+ rdfs:domain bsfs:Entity ;
+ rdfs:range bsfs:Colors .
+
bse:comment rdfs:subClassOf bsfs:Predicate ;
rdfs:domain bsfs:Node ;
rdfs:range xsd:string ;
@@ -74,9 +83,6 @@ class TestParseFilter(unittest.TestCase):
''')
- # parser instance
- self.parser = Filter(self.schema)
-
# graph to test queries
self.graph = rdflib.Graph()
# schema hierarchies
@@ -117,6 +123,13 @@ class TestParseFilter(unittest.TestCase):
# image iso
self.graph.add((rdflib.URIRef('http://example.com/image#1234'), rdflib.URIRef(ns.bse.iso), rdflib.Literal(1234, datatype=rdflib.XSD.integer)))
self.graph.add((rdflib.URIRef('http://example.com/image#4321'), rdflib.URIRef(ns.bse.iso), rdflib.Literal(4321, datatype=rdflib.XSD.integer)))
+ # color features
+ self.graph.add((rdflib.URIRef('http://example.com/entity#1234'), rdflib.URIRef(ns.bse.colors), rdflib.Literal([1,2,3,4], datatype=rdflib.URIRef(ns.bsfs.Colors))))
+ self.graph.add((rdflib.URIRef('http://example.com/entity#4321'), rdflib.URIRef(ns.bse.colors), rdflib.Literal([4,3,2,1], datatype=rdflib.URIRef(ns.bsfs.Colors))))
+ self.graph.add((rdflib.URIRef('http://example.com/image#1234'), rdflib.URIRef(ns.bse.colors), rdflib.Literal([3,4,2,1], datatype=rdflib.URIRef(ns.bsfs.Colors))))
+
+ # parser instance
+ self.parser = Filter(self.graph, self.schema)
def test_routing(self):
@@ -617,6 +630,37 @@ class TestParseFilter(unittest.TestCase):
{'http://example.com/tag#1234'})
+ def test_distance(self):
+ # node colors distance to [2,4,3,1]
+ # entity#1234 [1,2,3,4] 3.742
+ # entity#4321 [4,3,2,1] 2.449
+ # image#1234 [3,4,2,1] 1.414
+
+ # _distance expects a feature
+ self.assertRaises(errors.BackendError, self.parser._distance, self.schema.node(ns.bsfs.Entity), ast.filter.Distance([1,2,3,4], 1), '')
+ # reference must have the correct dimension
+ self.assertRaises(errors.ConsistencyError, self.parser._distance, self.schema.literal(ns.bsfs.Colors), ast.filter.Distance([1,2,3], 1), '')
+ self.assertRaises(errors.ConsistencyError, self.parser._distance, self.schema.literal(ns.bsfs.Colors), ast.filter.Distance([1,2,3,4,5], 1), '')
+ # _distance respects threshold
+ q = self.parser(self.schema.node(ns.bsfs.Entity), ast.filter.Any(ns.bse.colors, ast.filter.Distance([2,4,3,1], 4)))
+ self.assertSetEqual({str(guid) for guid, in self.graph.query(q)},
+ {'http://example.com/entity#1234', 'http://example.com/entity#4321', 'http://example.com/image#1234'})
+ q = self.parser(self.schema.node(ns.bsfs.Entity), ast.filter.Any(ns.bse.colors, ast.filter.Distance([2,4,3,1], 3)))
+ self.assertSetEqual({str(guid) for guid, in self.graph.query(q)},
+ {'http://example.com/entity#4321', 'http://example.com/image#1234'})
+ q = self.parser(self.schema.node(ns.bsfs.Entity), ast.filter.Any(ns.bse.colors, ast.filter.Distance([2,4,3,1], 2)))
+ self.assertSetEqual({str(guid) for guid, in self.graph.query(q)},
+ {'http://example.com/image#1234'})
+ # result set can be empty
+ q = self.parser(self.schema.node(ns.bsfs.Entity), ast.filter.Any(ns.bse.colors, ast.filter.Distance([2,4,3,1], 1)))
+ self.assertSetEqual({str(guid) for guid, in self.graph.query(q)}, set())
+ # _distance respects strict
+ q = self.parser(self.schema.node(ns.bsfs.Entity), ast.filter.Any(ns.bse.colors, ast.filter.Distance([1,2,3,4], 0, False)))
+ self.assertSetEqual({str(guid) for guid, in self.graph.query(q)},
+ {'http://example.com/entity#1234'})
+ q = self.parser(self.schema.node(ns.bsfs.Entity), ast.filter.Any(ns.bse.colors, ast.filter.Distance([1,2,3,4], 0, True)))
+ self.assertSetEqual({str(guid) for guid, in self.graph.query(q)}, set())
+
def test_one_of(self):
# _one_of expects a node
self.assertRaises(errors.BackendError, self.parser._one_of,
diff --git a/test/triple_store/sparql/test_sparql.py b/test/triple_store/sparql/test_sparql.py
index 1f56a7e..435ca28 100644
--- a/test/triple_store/sparql/test_sparql.py
+++ b/test/triple_store/sparql/test_sparql.py
@@ -392,6 +392,23 @@ class TestSparqlStore(unittest.TestCase):
class Foo(): pass
self.assertRaises(TypeError, setattr, store, 'schema', Foo())
+ # cannot define features w/o known distance function
+ invalid = bsc.from_string('''
+ prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#>
+ prefix xsd: <http://www.w3.org/2001/XMLSchema#>
+ prefix bsfs: <http://bsfs.ai/schema/>
+ prefix bse: <http://bsfs.ai/schema/Entity#>
+
+ bsfs:Array rdfs:subClassOf bsfs:Literal .
+ bsfs:Feature rdfs:subClassOf bsfs:Array .
+
+ bsfs:Colors rdfs:subClassOf bsfs:Feature ;
+ bsfs:dimension "4"^^xsd:integer ;
+ bsfs:distance bsfs:foobar .
+
+ ''')
+ self.assertRaises(ValueError, setattr, store, 'schema', invalid)
+
# cannot migrate to incompatible schema
invalid = bsc.from_string('''
prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#>