From 80a97bfa9f22d0d6dd25928fe1754a3a0d1de78a Mon Sep 17 00:00:00 2001 From: Matthias Baumgartner Date: Sun, 15 Jan 2023 21:00:12 +0100 Subject: Distance filter ast node --- bsfs/graph/resolve.py | 5 +++ bsfs/query/ast/filter_.py | 59 ++++++++++++++++++++------ bsfs/query/validator.py | 16 +++++++ bsfs/triple_store/sparql/distance.py | 56 ++++++++++++++++++++++++ bsfs/triple_store/sparql/parse_filter.py | 41 +++++++++++++++++- bsfs/triple_store/sparql/sparql.py | 13 +++++- test/graph/test_resolve.py | 13 ++++++ test/query/ast_test/test_filter_.py | 35 ++++++++++++++- test/query/test_validator.py | 27 ++++++++++++ test/triple_store/sparql/test_distance.py | 61 +++++++++++++++++++++++++++ test/triple_store/sparql/test_parse_filter.py | 50 ++++++++++++++++++++-- test/triple_store/sparql/test_sparql.py | 17 ++++++++ 12 files changed, 375 insertions(+), 18 deletions(-) create mode 100644 bsfs/triple_store/sparql/distance.py create mode 100644 test/triple_store/sparql/test_distance.py diff --git a/bsfs/graph/resolve.py b/bsfs/graph/resolve.py index b671204..00b778b 100644 --- a/bsfs/graph/resolve.py +++ b/bsfs/graph/resolve.py @@ -63,6 +63,8 @@ class Filter(): return self._and(type_, node) if isinstance(node, ast.filter.Or): return self._or(type_, node) + if isinstance(node, ast.filter.Distance): + return self._distance(type_, node) if isinstance(node, (ast.filter.Equals, ast.filter.Substring, \ ast.filter.StartsWith, ast.filter.EndsWith)): return self._value(type_, node) @@ -125,6 +127,9 @@ class Filter(): def _has(self, type_: bsc.Vertex, node: ast.filter.Has) -> ast.filter.Has: # pylint: disable=unused-argument return node + def _distance(self, type_: bsc.Vertex, node: ast.filter.Distance): # pylint: disable=unused-argument + return node + def _value(self, type_: bsc.Vertex, node: ast.filter._Value) -> ast.filter._Value: # pylint: disable=unused-argument return node diff --git a/bsfs/query/ast/filter_.py b/bsfs/query/ast/filter_.py index b129ded..2f0270c 100644 --- a/bsfs/query/ast/filter_.py +++ b/bsfs/query/ast/filter_.py @@ -252,8 +252,7 @@ class Has(FilterExpression): class _Value(FilterExpression): - """ - """ + """Matches some value.""" # target value. value: typing.Any @@ -277,13 +276,13 @@ class Is(_Value): class Equals(_Value): """Value matches exactly. - NOTE: Value format must correspond to literal type; can be a string, a number, or a Node + NOTE: Value must correspond to literal type. """ class Substring(_Value): """Value matches a substring - NOTE: value format must be a string + NOTE: value must be a string. """ @@ -295,9 +294,49 @@ class EndsWith(_Value): """Value ends with a given string.""" +class Distance(FilterExpression): + """Distance to a reference is (strictly) below a threshold. Assumes a Feature literal.""" + + # FIXME: + # (a) pass a node/predicate as anchor instead of a value. + # Then we don't need to materialize the reference. + # (b) pass a FilterExpression (_Bounded) instead of a threshold. + # Then, we could also query values greater than a threshold. + + # reference value. + reference: typing.Any + + # distance threshold. + threshold: float + + # closed (True) or open (False) bound. + strict: bool + + def __init__( + self, + reference: typing.Any, + threshold: float, + strict: bool = False, + ): + self.reference = reference + self.threshold = float(threshold) + self.strict = bool(strict) + + def __repr__(self) -> str: + return f'{typename(self)}({self.reference}, {self.threshold}, {self.strict})' + + def __hash__(self) -> int: + return hash((super().__hash__(), tuple(self.reference), self.threshold, self.strict)) + + def __eq__(self, other) -> bool: + return super().__eq__(other) \ + and self.reference == other.reference \ + and self.threshold == other.threshold \ + and self.strict == other.strict + + class _Bounded(FilterExpression): - """ - """ + """Value is bounded by a threshold. Assumes a Number literal.""" # bound. threshold: float @@ -327,15 +366,11 @@ class _Bounded(FilterExpression): class LessThan(_Bounded): - """Value is (strictly) smaller than threshold. - NOTE: only on numerical literals - """ + """Value is (strictly) smaller than threshold. Assumes a Number literal.""" class GreaterThan(_Bounded): - """Value is (strictly) larger than threshold - NOTE: only on numerical literals - """ + """Value is (strictly) larger than threshold. Assumes a Number literal.""" class Predicate(PredicateExpression): diff --git a/bsfs/query/validator.py b/bsfs/query/validator.py index ecea951..1b7f688 100644 --- a/bsfs/query/validator.py +++ b/bsfs/query/validator.py @@ -69,6 +69,8 @@ class Filter(): return self._not(type_, node) if isinstance(node, ast.filter.Has): return self._has(type_, node) + if isinstance(node, ast.filter.Distance): + return self._distance(type_, node) if isinstance(node, (ast.filter.Any, ast.filter.All)): return self._branch(type_, node) if isinstance(node, (ast.filter.And, ast.filter.Or)): @@ -177,6 +179,20 @@ class Filter(): # node.count is a numerical expression self._parse_filter_expression(self.schema.literal(ns.bsfs.Number), node.count) + def _distance(self, type_: bsc.Vertex, node: ast.filter.Distance): + # type is a Literal + if not isinstance(type_, bsc.Feature): + raise errors.ConsistencyError(f'expected a Feature, found {type_}') + # type exists in the schema + if type_ not in self.schema.literals(): + raise errors.ConsistencyError(f'literal {type_} is not in the schema') + # reference matches type_ + if len(node.reference) != type_.dimension: + raise errors.ConsistencyError(f'reference has dimension {len(node.reference)}, expected {type_.dimension}') + # FIXME: + #if node.reference.dtype != type_.dtype: + # raise errors.ConsistencyError(f'') + ## conditions diff --git a/bsfs/triple_store/sparql/distance.py b/bsfs/triple_store/sparql/distance.py new file mode 100644 index 0000000..2f5387a --- /dev/null +++ b/bsfs/triple_store/sparql/distance.py @@ -0,0 +1,56 @@ +""" + +Part of the BlackStar filesystem (bsfs) module. +A copy of the license is provided with the project. +Author: Matthias Baumgartner, 2022 +""" +# standard imports +import typing + +# external imports +import numpy as np + +# bsfs imports +from bsfs.namespace import ns + +# constants +EPS = 1e-9 + +# exports +__all__: typing.Sequence[str] = ( + 'DISTANCE_FU', + ) + + +## code ## + +def euclid(fst, snd) -> float: + """Euclidean distance (l2 norm).""" + fst = np.array(fst) + snd = np.array(snd) + return float(np.linalg.norm(fst - snd)) + +def cosine(fst, snd) -> float: + """Cosine distance.""" + fst = np.array(fst) + snd = np.array(snd) + if (fst == snd).all(): + return 0.0 + nrm0 = np.linalg.norm(fst) + nrm1 = np.linalg.norm(snd) + return float(1.0 - np.dot(fst, snd) / (nrm0 * nrm1 + EPS)) + +def manhatten(fst, snd) -> float: + """Manhatten (cityblock) distance (l1 norm).""" + fst = np.array(fst) + snd = np.array(snd) + return float(np.abs(fst - snd).sum()) + +# Known distance functions. +DISTANCE_FU = { + ns.bsfs.euclidean: euclid, + ns.bsfs.cosine: cosine, + ns.bsfs.manhatten: manhatten, +} + +## EOF ## diff --git a/bsfs/triple_store/sparql/parse_filter.py b/bsfs/triple_store/sparql/parse_filter.py index 5d8a2d9..8b6b976 100644 --- a/bsfs/triple_store/sparql/parse_filter.py +++ b/bsfs/triple_store/sparql/parse_filter.py @@ -5,19 +5,29 @@ A copy of the license is provided with the project. Author: Matthias Baumgartner, 2022 """ # imports +import operator import typing +# external imports +import rdflib + # bsfs imports from bsfs import schema as bsc from bsfs.namespace import ns from bsfs.query import ast from bsfs.utils import URI, errors +# inner-module imports +from .distance import DISTANCE_FU + # exports __all__: typing.Sequence[str] = ( 'Filter', ) + +## code ## + class _GenHopName(): """Generator that produces a new unique symbol name with each iteration.""" @@ -46,7 +56,8 @@ class Filter(): # Generator that produces unique symbol names. ngen: _GenHopName - def __init__(self, schema): + def __init__(self, graph, schema): + self.graph = graph self.schema = schema self.ngen = _GenHopName() @@ -84,6 +95,8 @@ class Filter(): return self._not(type_, node, head) if isinstance(node, ast.filter.Has): return self._has(type_, node, head) + if isinstance(node, ast.filter.Distance): + return self._distance(type_, node, head) if isinstance(node, ast.filter.Any): return self._any(type_, node, head) if isinstance(node, ast.filter.All): @@ -243,6 +256,32 @@ class Filter(): # combine return num_preds + ' . ' + count_bounds + def _distance(self, node_type: bsc.Vertex, node: ast.filter.Distance, head: str) -> str: + """ + """ + if not isinstance(node_type, bsc.Feature): + raise errors.BackendError(f'expected Feature, found {node_type}') + if len(node.reference) != node_type.dimension: + raise errors.ConsistencyError( + f'reference has dimension {len(node.reference)}, expected {node_type.dimension}') + # get distance metric + dist = DISTANCE_FU[node_type.distance] + # get operator + cmp = operator.lt if node.strict else operator.le + # get candidate values + candidates = { + f'"{cand}"^^<{node_type.uri}>' + for cand + in self.graph.objects() + if isinstance(cand, rdflib.Literal) + and cand.datatype == rdflib.URIRef(node_type.uri) + and cmp(dist(cand.value, node.reference), node.threshold) + } + # combine candidate values + values = ' '.join(candidates) if len(candidates) else f'"impossible value"^^<{ns.xsd.string}>' + # return sparql fragment + return f'VALUES {head} {{ {values} }}' + def _is(self, node_type: bsc.Vertex, node: ast.filter.Is, head: str) -> str: """ """ diff --git a/bsfs/triple_store/sparql/sparql.py b/bsfs/triple_store/sparql/sparql.py index 3877d1a..dfd9871 100644 --- a/bsfs/triple_store/sparql/sparql.py +++ b/bsfs/triple_store/sparql/sparql.py @@ -18,6 +18,7 @@ from bsfs.utils import errors, URI # inner-module imports from . import parse_filter from .. import base +from .distance import DISTANCE_FU # exports @@ -97,7 +98,7 @@ class SparqlStore(base.TripleStoreBase): self._transaction = _Transaction(self._graph) # NOTE: parsing bsfs.query.ast.filter.Has requires xsd:integer. self._schema = bsc.Schema(literals={bsc.ROOT_NUMBER.child(ns.xsd.integer)}) - self._filter_parser = parse_filter.Filter(self._schema) + self._filter_parser = parse_filter.Filter(self._graph, self._schema) # NOTE: mypy and pylint complain about the **kwargs not being listed (contrasting super) # However, not having it here is clearer since it's explicit that there are no arguments. @@ -123,6 +124,16 @@ class SparqlStore(base.TripleStoreBase): # check compatibility: No contradicting definitions if not self.schema.consistent_with(schema): raise errors.ConsistencyError(f'{schema} is inconsistent with {self.schema}') + # check distance functions of features + invalid = { + (cand.uri, cand.distance) + for cand + in schema.literals() + if isinstance(cand, bsc.Feature) and cand.distance not in DISTANCE_FU} + if len(invalid) > 0: + cand, dist = zip(*invalid) + raise ValueError( + f'unknown distance function {",".join(dist)} in feature {", ".join(cand)}') # commit the current transaction self.commit() diff --git a/test/graph/test_resolve.py b/test/graph/test_resolve.py index 0861a53..0918b02 100644 --- a/test/graph/test_resolve.py +++ b/test/graph/test_resolve.py @@ -46,6 +46,13 @@ class TestFilter(unittest.TestCase): bsfs:Feature rdfs:subClassOf bsfs:Array . xsd:integer rdfs:subClassOf bsfs:Number . + bsfs:Colors rdfs:subClassOf bsfs:Feature ; + bsfs:dimension "5"^^xsd:integer . + + bse:colors rdfs:subClassOf bsfs:Predicate ; + rdfs:domain bsfs:Entity ; + rdfs:range bsfs:Colors . + bse:comment rdfs:subClassOf bsfs:Predicate ; rdfs:domain bsfs:Entity ; rdfs:range xsd:string ; @@ -147,12 +154,18 @@ class TestFilter(unittest.TestCase): self.assertEqual(resolver(schema.node(ns.bsfs.Entity), ast.filter.Has(ns.bse.comment)), ast.filter.Has(ns.bse.comment)) + # for sake of completeness: Distance + self.assertEqual(resolver(schema.node(ns.bsfs.Entity), + ast.filter.Any(ns.bse.colors, ast.filter.Distance([1,2,3,4,5], 1))), + ast.filter.Any(ns.bse.colors, ast.filter.Distance([1,2,3,4,5], 1))) # route errors self.assertRaises(errors.BackendError, resolver, schema.node(ns.bsfs.Tag), ast.filter.Predicate(ns.bse.comment)) self.assertRaises(errors.BackendError, resolver, schema.node(ns.bsfs.Tag), ast.filter.Any(ast.filter.PredicateExpression(), ast.filter.Equals('foo'))) self.assertRaises(errors.BackendError, resolver._one_of, ast.filter.OneOf(ast.filter.Predicate(ns.bsfs.Predicate))) + # for sake of coverage completeness: valid OneOf + self.assertIsNotNone(resolver._one_of(ast.filter.OneOf(ast.filter.Predicate(ns.bse.colors)))) # check schema consistency self.assertRaises(errors.ConsistencyError, resolver, schema.node(ns.bsfs.Tag), diff --git a/test/query/ast_test/test_filter_.py b/test/query/ast_test/test_filter_.py index 4f69bdc..9eb92e2 100644 --- a/test/query/ast_test/test_filter_.py +++ b/test/query/ast_test/test_filter_.py @@ -15,7 +15,7 @@ from bsfs.utils import URI from bsfs.query.ast.filter_ import _Expression, FilterExpression, PredicateExpression from bsfs.query.ast.filter_ import _Branch, Any, All from bsfs.query.ast.filter_ import _Agg, And, Or -from bsfs.query.ast.filter_ import Not, Has +from bsfs.query.ast.filter_ import Not, Has, Distance from bsfs.query.ast.filter_ import _Value, Is, Equals, Substring, StartsWith, EndsWith from bsfs.query.ast.filter_ import _Bounded, LessThan, GreaterThan from bsfs.query.ast.filter_ import Predicate, OneOf @@ -284,6 +284,39 @@ class TestValue(unittest.TestCase): self.assertEqual(cls(f).value, f) +class TestDistance(unittest.TestCase): + def test_essentials(self): + ref = (1,2,3) + # comparison + self.assertEqual(Distance(ref, 3), Distance(ref, 3)) + self.assertEqual(hash(Distance(ref, 3)), hash(Distance(ref, 3))) + # comparison respects type + self.assertNotEqual(Distance(ref, 3), FilterExpression()) + self.assertNotEqual(hash(Distance(ref, 3)), hash(FilterExpression())) + # comparison respects reference + self.assertNotEqual(Distance((1,2,3), 3, False), Distance((1,2), 3, False)) + self.assertNotEqual(hash(Distance((1,2,3), 3, False)), hash(Distance((1,2), 3, False))) + self.assertNotEqual(Distance((1,2,3), 3, False), Distance((1,5,3), 3, False)) + self.assertNotEqual(hash(Distance((1,2,3), 3, False)), hash(Distance((1,5,3), 3, False))) + # comparison respects threshold + self.assertNotEqual(Distance((1,2,3), 3, False), Distance((1,2,3), 3.1, False)) + self.assertNotEqual(hash(Distance((1,2,3), 3, False)), hash(Distance((1,2,3), 3.1, False))) + # comparison respects strict flag + self.assertNotEqual(Distance((1,2,3), 3, False), Distance((1,2,3), 3, True)) + self.assertNotEqual(hash(Distance((1,2,3), 3, False)), hash(Distance((1,2,3), 3, True))) + # string conversion + self.assertEqual(str(Distance(ref, 3, False)), 'Distance((1, 2, 3), 3.0, False)') + self.assertEqual(repr(Distance(ref, 3, False)), 'Distance((1, 2, 3), 3.0, False)') + + def test_members(self): + self.assertEqual(Distance((1,2,3), 3, False).reference, (1,2,3)) + self.assertEqual(Distance((3,2,1), 3, False).reference, (3,2,1)) + self.assertEqual(Distance((1,2,3), 3, False).threshold, 3.0) + self.assertEqual(Distance((1,2,3), 53.45, False).threshold, 53.45) + self.assertEqual(Distance((1,2,3), 3, False).strict, False) + self.assertEqual(Distance((1,2,3), 3, True).strict, True) + + class TestBounded(unittest.TestCase): def test_essentials(self): # comparison respects type diff --git a/test/query/test_validator.py b/test/query/test_validator.py index 63ead52..dc9d913 100644 --- a/test/query/test_validator.py +++ b/test/query/test_validator.py @@ -38,6 +38,15 @@ class TestFilter(unittest.TestCase): bsfs:Feature rdfs:subClassOf bsfs:Array . xsd:integer rdfs:subClassOf bsfs:Number . + bsfs:Colors rdfs:subClassOf bsfs:Feature ; + bsfs:dimension "5"^^xsd:integer ; + bsfs:dtype bsfs:f32 . + + bse:color rdfs:subClassOf bsfs:Predicate ; + rdfs:domain bsfs:Node ; + rdfs:range bsfs:Colors ; + bsfs:unique "true"^^xsd:boolean . + bse:comment rdfs:subClassOf bsfs:Predicate ; rdfs:domain bsfs:Node ; rdfs:range xsd:string ; @@ -88,6 +97,7 @@ class TestFilter(unittest.TestCase): ), ast.filter.Not(ast.filter.Any(ns.bse.comment, ast.filter.Not(ast.filter.Equals('hello world')))), + ast.filter.Any(ns.bse.color, ast.filter.Distance([1,2,3,4,5], 3)), ))))) # invalid paths raise consistency error self.assertRaises(errors.ConsistencyError, self.validate, self.schema.node(ns.bsfs.Entity), @@ -257,6 +267,23 @@ class TestFilter(unittest.TestCase): self.assertIsNone(self.validate._bounded(self.schema.literal(ns.xsd.integer), ast.filter.LessThan(0))) self.assertIsNone(self.validate._bounded(self.schema.literal(ns.xsd.integer), ast.filter.GreaterThan(0))) + def test_distance(self): + # type must be a literal + self.assertRaises(errors.ConsistencyError, self.validate._distance, self.schema.node(ns.bsfs.Node), + ast.filter.Distance([1,2,3], 1, False)) + # type must be a feature + self.assertRaises(errors.ConsistencyError, self.validate._distance, self.schema.literal(ns.bsfs.Array), + ast.filter.Distance([1,2,3], 1, False)) + # type must be in the schema + self.assertRaises(errors.ConsistencyError, self.validate._distance, self.schema.literal(ns.bsfs.Feature).child(ns.bsfs.Invalid), + ast.filter.Distance([1,2,3], 1, False)) + # FIXME: reference must be a numpy array + # reference must have the correct dimension + self.assertRaises(errors.ConsistencyError, self.validate._distance, self.schema.literal(ns.bsfs.Colors), + ast.filter.Distance([1,2,3], 1, False)) + # FIXME: reference must have the correct dtype + # distance accepts correct expressions + self.assertIsNone(self.validate._distance(self.schema.literal(ns.bsfs.Colors), ast.filter.Distance([1,2,3,4,5], 1, False))) ## main ## diff --git a/test/triple_store/sparql/test_distance.py b/test/triple_store/sparql/test_distance.py new file mode 100644 index 0000000..0659459 --- /dev/null +++ b/test/triple_store/sparql/test_distance.py @@ -0,0 +1,61 @@ +""" + +Part of the bsfs test suite. +A copy of the license is provided with the project. +Author: Matthias Baumgartner, 2022 +""" +# imports +import numpy as np +import unittest + +# objects to test +from bsfs.triple_store.sparql import distance + + +## code ## + +class TestDistance(unittest.TestCase): + + def test_euclid(self): + # self-distance is zero + self.assertEqual(distance.euclid([1,2,3,4], [1,2,3,4]), 0.0) + # accepts list-like arguments + self.assertAlmostEqual(distance.euclid([1,2,3,4], [2,3,4,5]), 2.0, 3) + self.assertAlmostEqual(distance.euclid((1,2,3,4), (2,3,4,5)), 2.0, 3) + # dimension can vary + self.assertAlmostEqual(distance.euclid([1,2,3], [2,3,4]), 1.732, 3) + self.assertAlmostEqual(distance.euclid([1,2,3,4,5], [2,3,4,5,6]), 2.236, 3) + # vector can be zero + self.assertAlmostEqual(distance.euclid([0,0,0], [1,2,3]), 3.742, 3) + + def test_cosine(self): + # self-distance is zero + self.assertEqual(distance.cosine([1,2,3,4], [1,2,3,4]), 0.0) + # accepts list-like arguments + self.assertAlmostEqual(distance.cosine([1,2,3,4], [4,3,2,1]), 0.333, 3) + self.assertAlmostEqual(distance.cosine((1,2,3,4), (4,3,2,1)), 0.333, 3) + # dimension can vary + self.assertAlmostEqual(distance.cosine([1,2,3], [3,2,1]), 0.286, 3) + self.assertAlmostEqual(distance.cosine([1,2,3,4,5], [5,4,3,2,1]), 0.364, 3) + # vector can be zero + self.assertAlmostEqual(distance.cosine([0,0,0], [1,2,3]), 1.0, 3) + + def test_manhatten(self): + # self-distance is zero + self.assertEqual(distance.manhatten([1,2,3,4], [1,2,3,4]), 0.0) + # accepts list-like arguments + self.assertAlmostEqual(distance.manhatten([1,2,3,4], [2,3,4,5]), 4.0, 3) + self.assertAlmostEqual(distance.manhatten((1,2,3,4), (2,3,4,5)), 4.0, 3) + # dimension can vary + self.assertAlmostEqual(distance.manhatten([1,2,3], [2,3,4]), 3.0, 3) + self.assertAlmostEqual(distance.manhatten([1,2,3,4,5], [2,3,4,5,6]), 5.0, 3) + # vector can be zero + self.assertAlmostEqual(distance.manhatten([0,0,0], [1,2,3]), 6.0, 3) + + +## main ## + +if __name__ == '__main__': + unittest.main() + +## EOF ## diff --git a/test/triple_store/sparql/test_parse_filter.py b/test/triple_store/sparql/test_parse_filter.py index 5c16f11..8764535 100644 --- a/test/triple_store/sparql/test_parse_filter.py +++ b/test/triple_store/sparql/test_parse_filter.py @@ -42,6 +42,15 @@ class TestParseFilter(unittest.TestCase): xsd:integer rdfs:subClassOf bsfs:Number . bsfs:URI rdfs:subClassOf bsfs:Literal . + bsfs:Colors rdfs:subClassOf bsfs:Feature ; + bsfs:dimension "4"^^xsd:integer ; + bsfs:dtype xsd:integer ; + bsfs:distance bsfs:euclidean . + + bse:colors rdfs:subClassOf bsfs:Predicate ; + rdfs:domain bsfs:Entity ; + rdfs:range bsfs:Colors . + bse:comment rdfs:subClassOf bsfs:Predicate ; rdfs:domain bsfs:Node ; rdfs:range xsd:string ; @@ -74,9 +83,6 @@ class TestParseFilter(unittest.TestCase): ''') - # parser instance - self.parser = Filter(self.schema) - # graph to test queries self.graph = rdflib.Graph() # schema hierarchies @@ -117,6 +123,13 @@ class TestParseFilter(unittest.TestCase): # image iso self.graph.add((rdflib.URIRef('http://example.com/image#1234'), rdflib.URIRef(ns.bse.iso), rdflib.Literal(1234, datatype=rdflib.XSD.integer))) self.graph.add((rdflib.URIRef('http://example.com/image#4321'), rdflib.URIRef(ns.bse.iso), rdflib.Literal(4321, datatype=rdflib.XSD.integer))) + # color features + self.graph.add((rdflib.URIRef('http://example.com/entity#1234'), rdflib.URIRef(ns.bse.colors), rdflib.Literal([1,2,3,4], datatype=rdflib.URIRef(ns.bsfs.Colors)))) + self.graph.add((rdflib.URIRef('http://example.com/entity#4321'), rdflib.URIRef(ns.bse.colors), rdflib.Literal([4,3,2,1], datatype=rdflib.URIRef(ns.bsfs.Colors)))) + self.graph.add((rdflib.URIRef('http://example.com/image#1234'), rdflib.URIRef(ns.bse.colors), rdflib.Literal([3,4,2,1], datatype=rdflib.URIRef(ns.bsfs.Colors)))) + + # parser instance + self.parser = Filter(self.graph, self.schema) def test_routing(self): @@ -617,6 +630,37 @@ class TestParseFilter(unittest.TestCase): {'http://example.com/tag#1234'}) + def test_distance(self): + # node colors distance to [2,4,3,1] + # entity#1234 [1,2,3,4] 3.742 + # entity#4321 [4,3,2,1] 2.449 + # image#1234 [3,4,2,1] 1.414 + + # _distance expects a feature + self.assertRaises(errors.BackendError, self.parser._distance, self.schema.node(ns.bsfs.Entity), ast.filter.Distance([1,2,3,4], 1), '') + # reference must have the correct dimension + self.assertRaises(errors.ConsistencyError, self.parser._distance, self.schema.literal(ns.bsfs.Colors), ast.filter.Distance([1,2,3], 1), '') + self.assertRaises(errors.ConsistencyError, self.parser._distance, self.schema.literal(ns.bsfs.Colors), ast.filter.Distance([1,2,3,4,5], 1), '') + # _distance respects threshold + q = self.parser(self.schema.node(ns.bsfs.Entity), ast.filter.Any(ns.bse.colors, ast.filter.Distance([2,4,3,1], 4))) + self.assertSetEqual({str(guid) for guid, in self.graph.query(q)}, + {'http://example.com/entity#1234', 'http://example.com/entity#4321', 'http://example.com/image#1234'}) + q = self.parser(self.schema.node(ns.bsfs.Entity), ast.filter.Any(ns.bse.colors, ast.filter.Distance([2,4,3,1], 3))) + self.assertSetEqual({str(guid) for guid, in self.graph.query(q)}, + {'http://example.com/entity#4321', 'http://example.com/image#1234'}) + q = self.parser(self.schema.node(ns.bsfs.Entity), ast.filter.Any(ns.bse.colors, ast.filter.Distance([2,4,3,1], 2))) + self.assertSetEqual({str(guid) for guid, in self.graph.query(q)}, + {'http://example.com/image#1234'}) + # result set can be empty + q = self.parser(self.schema.node(ns.bsfs.Entity), ast.filter.Any(ns.bse.colors, ast.filter.Distance([2,4,3,1], 1))) + self.assertSetEqual({str(guid) for guid, in self.graph.query(q)}, set()) + # _distance respects strict + q = self.parser(self.schema.node(ns.bsfs.Entity), ast.filter.Any(ns.bse.colors, ast.filter.Distance([1,2,3,4], 0, False))) + self.assertSetEqual({str(guid) for guid, in self.graph.query(q)}, + {'http://example.com/entity#1234'}) + q = self.parser(self.schema.node(ns.bsfs.Entity), ast.filter.Any(ns.bse.colors, ast.filter.Distance([1,2,3,4], 0, True))) + self.assertSetEqual({str(guid) for guid, in self.graph.query(q)}, set()) + def test_one_of(self): # _one_of expects a node self.assertRaises(errors.BackendError, self.parser._one_of, diff --git a/test/triple_store/sparql/test_sparql.py b/test/triple_store/sparql/test_sparql.py index 1f56a7e..435ca28 100644 --- a/test/triple_store/sparql/test_sparql.py +++ b/test/triple_store/sparql/test_sparql.py @@ -392,6 +392,23 @@ class TestSparqlStore(unittest.TestCase): class Foo(): pass self.assertRaises(TypeError, setattr, store, 'schema', Foo()) + # cannot define features w/o known distance function + invalid = bsc.from_string(''' + prefix rdfs: + prefix xsd: + prefix bsfs: + prefix bse: + + bsfs:Array rdfs:subClassOf bsfs:Literal . + bsfs:Feature rdfs:subClassOf bsfs:Array . + + bsfs:Colors rdfs:subClassOf bsfs:Feature ; + bsfs:dimension "4"^^xsd:integer ; + bsfs:distance bsfs:foobar . + + ''') + self.assertRaises(ValueError, setattr, store, 'schema', invalid) + # cannot migrate to incompatible schema invalid = bsc.from_string(''' prefix rdfs: -- cgit v1.2.3