From 3940cb3c79937a431ba2ae3b57fd0c6c2ccfff33 Mon Sep 17 00:00:00 2001 From: Matthias Baumgartner Date: Thu, 12 Jan 2023 10:12:43 +0100 Subject: use Vertex in type annotations --- bsfs/triple_store/sparql/parse_filter.py | 39 +++++++++++++++----------------- 1 file changed, 18 insertions(+), 21 deletions(-) (limited to 'bsfs/triple_store/sparql') diff --git a/bsfs/triple_store/sparql/parse_filter.py b/bsfs/triple_store/sparql/parse_filter.py index d4db0aa..a851888 100644 --- a/bsfs/triple_store/sparql/parse_filter.py +++ b/bsfs/triple_store/sparql/parse_filter.py @@ -46,9 +46,6 @@ class Filter(): # Generator that produces unique symbol names. ngen: _GenHopName - # Vertex type. - T_VERTEX = typing.Union[bsc.Node, bsc.Literal] - def __init__(self, schema): self.schema = schema self.ngen = _GenHopName() @@ -79,7 +76,7 @@ class Filter(): }} ''' - def _parse_filter_expression(self, type_: T_VERTEX, node: ast.filter.FilterExpression, head: str) -> str: + def _parse_filter_expression(self, type_: bsc.Vertex, node: ast.filter.FilterExpression, head: str) -> str: """Route *node* to the handler of the respective FilterExpression subclass.""" if isinstance(node, ast.filter.Is): return self._is(type_, node, head) @@ -112,9 +109,9 @@ class Filter(): def _parse_predicate_expression( self, - type_: T_VERTEX, + type_: bsc.Vertex, node: ast.filter.PredicateExpression - ) -> typing.Tuple[str, T_VERTEX]: + ) -> typing.Tuple[str, bsc.Vertex]: """Route *node* to the handler of the respective PredicateExpression subclass.""" if isinstance(node, ast.filter.Predicate): return self._predicate(type_, node) @@ -123,7 +120,7 @@ class Filter(): # invalid node raise errors.BackendError(f'expected predicate expression, found {node}') - def _one_of(self, node_type: T_VERTEX, node: ast.filter.OneOf) -> typing.Tuple[str, T_VERTEX]: + def _one_of(self, node_type: bsc.Vertex, node: ast.filter.OneOf) -> typing.Tuple[str, bsc.Vertex]: """ """ if not isinstance(node_type, bsc.Node): @@ -150,7 +147,7 @@ class Filter(): # return joint predicate expression and next range return '|'.join(suburi), rng - def _predicate(self, node_type: T_VERTEX, node: ast.filter.Predicate) -> typing.Tuple[str, T_VERTEX]: + def _predicate(self, node_type: bsc.Vertex, node: ast.filter.Predicate) -> typing.Tuple[str, bsc.Vertex]: """ """ # check node_type @@ -178,7 +175,7 @@ class Filter(): # return predicate URI and next node type return puri, rng - def _any(self, node_type: T_VERTEX, node: ast.filter.Any, head: str) -> str: + def _any(self, node_type: bsc.Vertex, node: ast.filter.Any, head: str) -> str: """ """ if not isinstance(node_type, bsc.Node): @@ -191,7 +188,7 @@ class Filter(): # combine results return f'{head} {pred} {nexthead} . {expr}' - def _all(self, node_type: T_VERTEX, node: ast.filter.All, head: str) -> str: + def _all(self, node_type: bsc.Vertex, node: ast.filter.All, head: str) -> str: """ """ # NOTE: All(P, E) := Not(Any(P, Not(E))) and EXISTS(P, ?) @@ -208,13 +205,13 @@ class Filter(): # return existence and rewritten expression return f'FILTER EXISTS {{ {head} {pred} {temphead} }} . ' + expr - def _and(self, node_type: T_VERTEX, node: ast.filter.And, head: str) -> str: + def _and(self, node_type: bsc.Vertex, node: ast.filter.And, head: str) -> str: """ """ sub = [self._parse_filter_expression(node_type, expr, head) for expr in node] return ' . '.join(sub) - def _or(self, node_type: T_VERTEX, node: ast.filter.Or, head: str) -> str: + def _or(self, node_type: bsc.Vertex, node: ast.filter.Or, head: str) -> str: """ """ # potential special case optimization: @@ -224,7 +221,7 @@ class Filter(): sub = ['{' + expr + '}' for expr in sub] return ' UNION '.join(sub) - def _not(self, node_type: T_VERTEX, node: ast.filter.Not, head: str) -> str: + def _not(self, node_type: bsc.Vertex, node: ast.filter.Not, head: str) -> str: """ """ expr = self._parse_filter_expression(node_type, node.expr, head) @@ -235,7 +232,7 @@ class Filter(): # The simplest (and non-interfering) choice is a type statement. return f'MINUS {{ {head} <{ns.rdf.type}>/<{ns.rdfs.subClassOf}>* <{node_type.uri}> . {expr} }}' - def _has(self, node_type: T_VERTEX, node: ast.filter.Has, head: str) -> str: + def _has(self, node_type: bsc.Vertex, node: ast.filter.Has, head: str) -> str: """ """ if not isinstance(node_type, bsc.Node): @@ -253,42 +250,42 @@ class Filter(): # combine return num_preds + ' . ' + count_bounds - def _is(self, node_type: T_VERTEX, node: ast.filter.Is, head: str) -> str: + def _is(self, node_type: bsc.Vertex, node: ast.filter.Is, head: str) -> str: """ """ if not isinstance(node_type, bsc.Node): raise errors.BackendError(f'expected Node, found {node_type}') return f'VALUES {head} {{ <{node.value}> }}' - def _equals(self, node_type: T_VERTEX, node: ast.filter.Equals, head: str) -> str: + def _equals(self, node_type: bsc.Vertex, node: ast.filter.Equals, head: str) -> str: """ """ if not isinstance(node_type, bsc.Literal): raise errors.BackendError(f'expected Literal, found {node}') return f'VALUES {head} {{ "{node.value}"^^<{node_type.uri}> }}' - def _substring(self, node_type: T_VERTEX, node: ast.filter.Substring, head: str) -> str: + def _substring(self, node_type: bsc.Vertex, node: ast.filter.Substring, head: str) -> str: """ """ if not isinstance(node_type, bsc.Literal): raise errors.BackendError(f'expected Literal, found {node_type}') return f'FILTER contains(str({head}), "{node.value}")' - def _starts_with(self, node_type: T_VERTEX, node: ast.filter.StartsWith, head: str) -> str: + def _starts_with(self, node_type: bsc.Vertex, node: ast.filter.StartsWith, head: str) -> str: """ """ if not isinstance(node_type, bsc.Literal): raise errors.BackendError(f'expected Literal, found {node_type}') return f'FILTER strstarts(str({head}), "{node.value}")' - def _ends_with(self, node_type: T_VERTEX, node: ast.filter.EndsWith, head: str) -> str: + def _ends_with(self, node_type: bsc.Vertex, node: ast.filter.EndsWith, head: str) -> str: """ """ if not isinstance(node_type, bsc.Literal): raise errors.BackendError(f'expected Literal, found {node_type}') return f'FILTER strends(str({head}), "{node.value}")' - def _less_than(self, node_type: T_VERTEX, node: ast.filter.LessThan, head: str) -> str: + def _less_than(self, node_type: bsc.Vertex, node: ast.filter.LessThan, head: str) -> str: """ """ if not isinstance(node_type, bsc.Literal): @@ -296,7 +293,7 @@ class Filter(): equality = '=' if not node.strict else '' return f'FILTER ({head} <{equality} {float(node.threshold)})' - def _greater_than(self, node_type: T_VERTEX, node: ast.filter.GreaterThan, head: str) -> str: + def _greater_than(self, node_type: bsc.Vertex, node: ast.filter.GreaterThan, head: str) -> str: """ """ if not isinstance(node_type, bsc.Literal): -- cgit v1.2.3 From 6b3e32b29799a8143e8ce9d20c5f27e3e166b9bb Mon Sep 17 00:00:00 2001 From: Matthias Baumgartner Date: Thu, 12 Jan 2023 10:17:07 +0100 Subject: changed path to from_string in clients --- bsfs/triple_store/sparql/sparql.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'bsfs/triple_store/sparql') diff --git a/bsfs/triple_store/sparql/sparql.py b/bsfs/triple_store/sparql/sparql.py index c3cbff6..ddace35 100644 --- a/bsfs/triple_store/sparql/sparql.py +++ b/bsfs/triple_store/sparql/sparql.py @@ -94,7 +94,7 @@ class SparqlStore(base.TripleStoreBase): super().__init__(None) self._graph = rdflib.Graph() self._transaction = _Transaction(self._graph) - self._schema = bsc.Schema.Empty() + self._schema = bsc.Schema() self._filter_parser = parse_filter.Filter(self._schema) # NOTE: mypy and pylint complain about the **kwargs not being listed (contrasting super) -- cgit v1.2.3 From 7e7284d5fc01c0a081aa79d67736f51069864a7d Mon Sep 17 00:00:00 2001 From: Matthias Baumgartner Date: Thu, 12 Jan 2023 10:22:59 +0100 Subject: adapt to non-optional range in query checks --- bsfs/triple_store/sparql/parse_filter.py | 13 +++++-------- 1 file changed, 5 insertions(+), 8 deletions(-) (limited to 'bsfs/triple_store/sparql') diff --git a/bsfs/triple_store/sparql/parse_filter.py b/bsfs/triple_store/sparql/parse_filter.py index a851888..0297cbc 100644 --- a/bsfs/triple_store/sparql/parse_filter.py +++ b/bsfs/triple_store/sparql/parse_filter.py @@ -140,12 +140,10 @@ class Filter(): raise errors.ConsistencyError(f'ranges {subrng} and {rng} are not related') except TypeError as err: # subrng and rng are not comparable raise errors.ConsistencyError(f'ranges {subrng} and {rng} are not related') from err - if rng is None: - # for mypy to be certain of the rng type - # if rng were None, we'd have gotten a TypeError above (None > None) - raise errors.UnreachableError() # return joint predicate expression and next range - return '|'.join(suburi), rng + # OneOf guarantees at least one expression, rng is always a bsc.Vertex. + # mypy does not realize this, hence we ignore the warning. + return '|'.join(suburi), rng # type: ignore [return-value] def _predicate(self, node_type: bsc.Vertex, node: ast.filter.Predicate) -> typing.Tuple[str, bsc.Vertex]: """ @@ -159,9 +157,8 @@ class Filter(): if not self.schema.has_predicate(puri): raise errors.ConsistencyError(f'predicate {puri} is not in the schema') pred = self.schema.predicate(puri) - if pred.range is None: - # FIXME: It is a design error that Predicates can have a None range... - raise errors.BackendError(f'predicate {pred} has no range') + if not isinstance(pred.range, (bsc.Node, bsc.Literal)): + raise errors.BackendError(f'the range of predicate {pred} is undefined') dom, rng = pred.domain, pred.range # encapsulate predicate uri puri = f'<{puri}>' # type: ignore [assignment] # variable re-use confuses mypy -- cgit v1.2.3 From b0ff4ed674ad78bf113c3cc0c2ccd187ccb91048 Mon Sep 17 00:00:00 2001 From: Matthias Baumgartner Date: Thu, 12 Jan 2023 10:26:30 +0100 Subject: number literal adaptions --- bsfs/triple_store/sparql/parse_filter.py | 1 - bsfs/triple_store/sparql/sparql.py | 4 +++- 2 files changed, 3 insertions(+), 2 deletions(-) (limited to 'bsfs/triple_store/sparql') diff --git a/bsfs/triple_store/sparql/parse_filter.py b/bsfs/triple_store/sparql/parse_filter.py index 0297cbc..18a3288 100644 --- a/bsfs/triple_store/sparql/parse_filter.py +++ b/bsfs/triple_store/sparql/parse_filter.py @@ -242,7 +242,6 @@ class Filter(): # predicate count expression (fetch number of predicates at *head*) num_preds = f'{{ SELECT (COUNT(distinct {inner}) as {outer}) WHERE {{ {head} {pred} {inner} }} }}' # count expression - # FIXME: We have to ensure that ns.xsd.integer is always known in the schema! count_bounds = self._parse_filter_expression(self.schema.literal(ns.xsd.integer), node.count, outer) # combine return num_preds + ' . ' + count_bounds diff --git a/bsfs/triple_store/sparql/sparql.py b/bsfs/triple_store/sparql/sparql.py index ddace35..87467ff 100644 --- a/bsfs/triple_store/sparql/sparql.py +++ b/bsfs/triple_store/sparql/sparql.py @@ -11,6 +11,7 @@ import rdflib # bsfs imports from bsfs import schema as bsc +from bsfs.namespace import ns from bsfs.query import ast from bsfs.utils import errors, URI @@ -94,7 +95,8 @@ class SparqlStore(base.TripleStoreBase): super().__init__(None) self._graph = rdflib.Graph() self._transaction = _Transaction(self._graph) - self._schema = bsc.Schema() + # NOTE: parsing bsfs.query.ast.filter.Has requires xsd:integer. + self._schema = bsc.Schema(literals={bsc.ROOT_NUMBER.child(ns.xsd.integer)}) self._filter_parser = parse_filter.Filter(self._schema) # NOTE: mypy and pylint complain about the **kwargs not being listed (contrasting super) -- cgit v1.2.3 From e708016ae366e96051281f3a744af35a8c06d98b Mon Sep 17 00:00:00 2001 From: Matthias Baumgartner Date: Thu, 12 Jan 2023 10:28:16 +0100 Subject: cleanup and cosmetic changes --- bsfs/triple_store/sparql/sparql.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'bsfs/triple_store/sparql') diff --git a/bsfs/triple_store/sparql/sparql.py b/bsfs/triple_store/sparql/sparql.py index 87467ff..3877d1a 100644 --- a/bsfs/triple_store/sparql/sparql.py +++ b/bsfs/triple_store/sparql/sparql.py @@ -139,7 +139,7 @@ class SparqlStore(base.TripleStoreBase): for src, trg in self._graph.subject_objects(rdflib.URIRef(pred.uri)): self._transaction.remove((src, rdflib.URIRef(pred.uri), trg)) # remove predicate definition - if pred.parent is not None: + if pred.parent is not None: # NOTE: there shouldn't be any predicate w/o parent self._transaction.remove(( rdflib.URIRef(pred.uri), rdflib.RDFS.subClassOf, @@ -159,7 +159,7 @@ class SparqlStore(base.TripleStoreBase): # remove instance self._transaction.remove((inst, rdflib.RDF.type, rdflib.URIRef(node.uri))) # remove node definition - if node.parent is not None: + if node.parent is not None: # NOTE: there shouldn't be any node w/o parent self._transaction.remove(( rdflib.URIRef(node.uri), rdflib.RDFS.subClassOf, @@ -168,7 +168,7 @@ class SparqlStore(base.TripleStoreBase): for lit in sub.literals: # remove literal definition - if lit.parent is not None: + if lit.parent is not None: # NOTE: there shouldn't be any literal w/o parent self._transaction.remove(( rdflib.URIRef(lit.uri), rdflib.RDFS.subClassOf, -- cgit v1.2.3 From 60257ed3c2aa6ea2891f362a691bde9d7ef17831 Mon Sep 17 00:00:00 2001 From: Matthias Baumgartner Date: Fri, 13 Jan 2023 12:22:34 +0100 Subject: schema type comparison across classes --- bsfs/triple_store/sparql/parse_filter.py | 15 ++++++--------- 1 file changed, 6 insertions(+), 9 deletions(-) (limited to 'bsfs/triple_store/sparql') diff --git a/bsfs/triple_store/sparql/parse_filter.py b/bsfs/triple_store/sparql/parse_filter.py index 18a3288..5d8a2d9 100644 --- a/bsfs/triple_store/sparql/parse_filter.py +++ b/bsfs/triple_store/sparql/parse_filter.py @@ -131,15 +131,12 @@ class Filter(): puri, subrng = self._parse_predicate_expression(node_type, pred) # track predicate uris suburi.add(puri) - try: - # check for more generic range - if rng is None or subrng > rng: - rng = subrng - # check range consistency - if not subrng <= rng and not subrng >= rng: - raise errors.ConsistencyError(f'ranges {subrng} and {rng} are not related') - except TypeError as err: # subrng and rng are not comparable - raise errors.ConsistencyError(f'ranges {subrng} and {rng} are not related') from err + # check for more generic range + if rng is None or subrng > rng: + rng = subrng + # check range consistency + if not subrng <= rng and not subrng >= rng: + raise errors.ConsistencyError(f'ranges {subrng} and {rng} are not related') # return joint predicate expression and next range # OneOf guarantees at least one expression, rng is always a bsc.Vertex. # mypy does not realize this, hence we ignore the warning. -- cgit v1.2.3 From 80a97bfa9f22d0d6dd25928fe1754a3a0d1de78a Mon Sep 17 00:00:00 2001 From: Matthias Baumgartner Date: Sun, 15 Jan 2023 21:00:12 +0100 Subject: Distance filter ast node --- bsfs/triple_store/sparql/distance.py | 56 ++++++++++++++++++++++++++++++++ bsfs/triple_store/sparql/parse_filter.py | 41 ++++++++++++++++++++++- bsfs/triple_store/sparql/sparql.py | 13 +++++++- 3 files changed, 108 insertions(+), 2 deletions(-) create mode 100644 bsfs/triple_store/sparql/distance.py (limited to 'bsfs/triple_store/sparql') diff --git a/bsfs/triple_store/sparql/distance.py b/bsfs/triple_store/sparql/distance.py new file mode 100644 index 0000000..2f5387a --- /dev/null +++ b/bsfs/triple_store/sparql/distance.py @@ -0,0 +1,56 @@ +""" + +Part of the BlackStar filesystem (bsfs) module. +A copy of the license is provided with the project. +Author: Matthias Baumgartner, 2022 +""" +# standard imports +import typing + +# external imports +import numpy as np + +# bsfs imports +from bsfs.namespace import ns + +# constants +EPS = 1e-9 + +# exports +__all__: typing.Sequence[str] = ( + 'DISTANCE_FU', + ) + + +## code ## + +def euclid(fst, snd) -> float: + """Euclidean distance (l2 norm).""" + fst = np.array(fst) + snd = np.array(snd) + return float(np.linalg.norm(fst - snd)) + +def cosine(fst, snd) -> float: + """Cosine distance.""" + fst = np.array(fst) + snd = np.array(snd) + if (fst == snd).all(): + return 0.0 + nrm0 = np.linalg.norm(fst) + nrm1 = np.linalg.norm(snd) + return float(1.0 - np.dot(fst, snd) / (nrm0 * nrm1 + EPS)) + +def manhatten(fst, snd) -> float: + """Manhatten (cityblock) distance (l1 norm).""" + fst = np.array(fst) + snd = np.array(snd) + return float(np.abs(fst - snd).sum()) + +# Known distance functions. +DISTANCE_FU = { + ns.bsfs.euclidean: euclid, + ns.bsfs.cosine: cosine, + ns.bsfs.manhatten: manhatten, +} + +## EOF ## diff --git a/bsfs/triple_store/sparql/parse_filter.py b/bsfs/triple_store/sparql/parse_filter.py index 5d8a2d9..8b6b976 100644 --- a/bsfs/triple_store/sparql/parse_filter.py +++ b/bsfs/triple_store/sparql/parse_filter.py @@ -5,19 +5,29 @@ A copy of the license is provided with the project. Author: Matthias Baumgartner, 2022 """ # imports +import operator import typing +# external imports +import rdflib + # bsfs imports from bsfs import schema as bsc from bsfs.namespace import ns from bsfs.query import ast from bsfs.utils import URI, errors +# inner-module imports +from .distance import DISTANCE_FU + # exports __all__: typing.Sequence[str] = ( 'Filter', ) + +## code ## + class _GenHopName(): """Generator that produces a new unique symbol name with each iteration.""" @@ -46,7 +56,8 @@ class Filter(): # Generator that produces unique symbol names. ngen: _GenHopName - def __init__(self, schema): + def __init__(self, graph, schema): + self.graph = graph self.schema = schema self.ngen = _GenHopName() @@ -84,6 +95,8 @@ class Filter(): return self._not(type_, node, head) if isinstance(node, ast.filter.Has): return self._has(type_, node, head) + if isinstance(node, ast.filter.Distance): + return self._distance(type_, node, head) if isinstance(node, ast.filter.Any): return self._any(type_, node, head) if isinstance(node, ast.filter.All): @@ -243,6 +256,32 @@ class Filter(): # combine return num_preds + ' . ' + count_bounds + def _distance(self, node_type: bsc.Vertex, node: ast.filter.Distance, head: str) -> str: + """ + """ + if not isinstance(node_type, bsc.Feature): + raise errors.BackendError(f'expected Feature, found {node_type}') + if len(node.reference) != node_type.dimension: + raise errors.ConsistencyError( + f'reference has dimension {len(node.reference)}, expected {node_type.dimension}') + # get distance metric + dist = DISTANCE_FU[node_type.distance] + # get operator + cmp = operator.lt if node.strict else operator.le + # get candidate values + candidates = { + f'"{cand}"^^<{node_type.uri}>' + for cand + in self.graph.objects() + if isinstance(cand, rdflib.Literal) + and cand.datatype == rdflib.URIRef(node_type.uri) + and cmp(dist(cand.value, node.reference), node.threshold) + } + # combine candidate values + values = ' '.join(candidates) if len(candidates) else f'"impossible value"^^<{ns.xsd.string}>' + # return sparql fragment + return f'VALUES {head} {{ {values} }}' + def _is(self, node_type: bsc.Vertex, node: ast.filter.Is, head: str) -> str: """ """ diff --git a/bsfs/triple_store/sparql/sparql.py b/bsfs/triple_store/sparql/sparql.py index 3877d1a..dfd9871 100644 --- a/bsfs/triple_store/sparql/sparql.py +++ b/bsfs/triple_store/sparql/sparql.py @@ -18,6 +18,7 @@ from bsfs.utils import errors, URI # inner-module imports from . import parse_filter from .. import base +from .distance import DISTANCE_FU # exports @@ -97,7 +98,7 @@ class SparqlStore(base.TripleStoreBase): self._transaction = _Transaction(self._graph) # NOTE: parsing bsfs.query.ast.filter.Has requires xsd:integer. self._schema = bsc.Schema(literals={bsc.ROOT_NUMBER.child(ns.xsd.integer)}) - self._filter_parser = parse_filter.Filter(self._schema) + self._filter_parser = parse_filter.Filter(self._graph, self._schema) # NOTE: mypy and pylint complain about the **kwargs not being listed (contrasting super) # However, not having it here is clearer since it's explicit that there are no arguments. @@ -123,6 +124,16 @@ class SparqlStore(base.TripleStoreBase): # check compatibility: No contradicting definitions if not self.schema.consistent_with(schema): raise errors.ConsistencyError(f'{schema} is inconsistent with {self.schema}') + # check distance functions of features + invalid = { + (cand.uri, cand.distance) + for cand + in schema.literals() + if isinstance(cand, bsc.Feature) and cand.distance not in DISTANCE_FU} + if len(invalid) > 0: + cand, dist = zip(*invalid) + raise ValueError( + f'unknown distance function {",".join(dist)} in feature {", ".join(cand)}') # commit the current transaction self.commit() -- cgit v1.2.3 From 3504609e1ba1f7f653fa79910474bebd3ec24d8a Mon Sep 17 00:00:00 2001 From: Matthias Baumgartner Date: Mon, 16 Jan 2023 21:41:20 +0100 Subject: various minor fixes --- bsfs/triple_store/sparql/sparql.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'bsfs/triple_store/sparql') diff --git a/bsfs/triple_store/sparql/sparql.py b/bsfs/triple_store/sparql/sparql.py index dfd9871..fedd227 100644 --- a/bsfs/triple_store/sparql/sparql.py +++ b/bsfs/triple_store/sparql/sparql.py @@ -132,7 +132,7 @@ class SparqlStore(base.TripleStoreBase): if isinstance(cand, bsc.Feature) and cand.distance not in DISTANCE_FU} if len(invalid) > 0: cand, dist = zip(*invalid) - raise ValueError( + raise errors.UnsupportedError( f'unknown distance function {",".join(dist)} in feature {", ".join(cand)}') # commit the current transaction -- cgit v1.2.3