From ca7ee6c59d2eb3f4ec4d16e392d12d946cd85e4d Mon Sep 17 00:00:00 2001 From: Matthias Baumgartner Date: Thu, 22 Dec 2022 20:33:00 +0100 Subject: filter-ast based get interface in graph. * Graph interface: Graph.get added * Node instance resolver so that Nodes can be used in a filter ast * AC interface: filter_read added to interface * upstream test adjustments of previous sparql store changes --- bsfs/graph/resolve.py | 161 ++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 161 insertions(+) create mode 100644 bsfs/graph/resolve.py (limited to 'bsfs/graph/resolve.py') diff --git a/bsfs/graph/resolve.py b/bsfs/graph/resolve.py new file mode 100644 index 0000000..feb0855 --- /dev/null +++ b/bsfs/graph/resolve.py @@ -0,0 +1,161 @@ +""" + +Part of the BlackStar filesystem (bsfs) module. +A copy of the license is provided with the project. +Author: Matthias Baumgartner, 2022 +""" +# imports +import typing + +# bsfs imports +from bsfs import schema as bsc +from bsfs.query import ast +from bsfs.utils import errors + +# inner-module imports +from . import nodes + +# exports +__all__: typing.Sequence[str] = ( + 'Filter', + ) + + +## code ## + +class Filter(): + """Rewrites the query to replace `bsfs.graph.nodes.Nodes` instances with the respective URI. + Does only limited type checking and schema validation. + Use `bsfs.schema.validate.Filter` to do so. + + Example: + input: Any(ns.bse.tag, Is(Nodes(...))) + output: Any(ns.bse.tag, Or(Is(...), Is(...), ...))) + + >>> tags = graph.node(ns.bsfs.Tag, 'http://example.com/me/tag#1234') + >>> graph.get(ns.bsfs.Entity, ast.filter.Any(ns.bse.tag, ast.filter.Is(tags))) + + """ + + T_VERTEX = typing.Union[bsc.Node, bsc.Literal] + + def __init__(self, schema): + self.schema = schema + + def __call__(self, root_type: bsc.Node, node: ast.filter.FilterExpression): + return self._parse_filter_expression(root_type, node) + + def _parse_filter_expression( + self, + type_: T_VERTEX, + node: ast.filter.FilterExpression, + ) -> ast.filter.FilterExpression: + """Route *node* to the handler of the respective FilterExpression subclass.""" + if isinstance(node, ast.filter.Is): + return self._is(type_, node) + if isinstance(node, ast.filter.Not): + return self._not(type_, node) + if isinstance(node, ast.filter.Has): + return self._has(type_, node) + if isinstance(node, ast.filter.Any): + return self._any(type_, node) + if isinstance(node, ast.filter.All): + return self._all(type_, node) + if isinstance(node, ast.filter.And): + return self._and(type_, node) + if isinstance(node, ast.filter.Or): + return self._or(type_, node) + if isinstance(node, (ast.filter.Equals, ast.filter.Substring, \ + ast.filter.StartsWith, ast.filter.EndsWith)): + return self._value(type_, node) + if isinstance(node, (ast.filter.LessThan, ast.filter.GreaterThan)): + return self._bounded(type_, node) + # invalid node + raise errors.BackendError(f'expected filter expression, found {node}') + + def _parse_predicate_expression(self, node: ast.filter.PredicateExpression) -> T_VERTEX: + """Route *node* to the handler of the respective PredicateExpression subclass.""" + if isinstance(node, ast.filter.Predicate): + return self._predicate(node) + if isinstance(node, ast.filter.OneOf): + return self._one_of(node) + # invalid node + raise errors.BackendError(f'expected predicate expression, found {node}') + + def _predicate(self, node: ast.filter.Predicate) -> T_VERTEX: + if not self.schema.has_predicate(node.predicate): + raise errors.ConsistencyError(f'predicate {node.predicate} is not in the schema') + pred = self.schema.predicate(node.predicate) + dom, rng = pred.domain, pred.range + if node.reverse: + dom, rng = rng, dom + return rng + + def _one_of(self, node: ast.filter.OneOf) -> T_VERTEX: + # determine domain and range types + rng = None + for pred in node: + # parse child expression + subrng = self._parse_predicate_expression(pred) + # determine the next type + try: + if rng is None or subrng > rng: # pick most generic range + rng = subrng + except TypeError as err: + raise errors.ConsistencyError(f'ranges {subrng} and {rng} are not related') from err + if rng is None: + raise errors.UnreachableError() + return rng + + def _any(self, type_: T_VERTEX, node: ast.filter.Any) -> ast.filter.Any: # pylint: disable=unused-argument + next_type = self._parse_predicate_expression(node.predicate) + return ast.filter.Any(node.predicate, self._parse_filter_expression(next_type, node.expr)) + + def _all(self, type_: T_VERTEX, node: ast.filter.All) -> ast.filter.All: # pylint: disable=unused-argument + next_type = self._parse_predicate_expression(node.predicate) + return ast.filter.All(node.predicate, self._parse_filter_expression(next_type, node.expr)) + + def _and(self, type_: T_VERTEX, node: ast.filter.And) -> ast.filter.And: + return ast.filter.And({self._parse_filter_expression(type_, expr) for expr in node}) + + def _or(self, type_: T_VERTEX, node: ast.filter.Or) -> ast.filter.Or: + return ast.filter.Or({self._parse_filter_expression(type_, expr) for expr in node}) + + def _not(self, type_: T_VERTEX, node: ast.filter.Not) -> ast.filter.Not: + return ast.filter.Not(self._parse_filter_expression(type_, node.expr)) + + def _has(self, type_: T_VERTEX, node: ast.filter.Has) -> ast.filter.Has: # pylint: disable=unused-argument + return node + + def _value(self, type_: T_VERTEX, node: ast.filter._Value) -> ast.filter._Value: # pylint: disable=unused-argument + return node + + def _bounded(self, type_: T_VERTEX, node: ast.filter._Bounded) -> ast.filter._Bounded: # pylint: disable=unused-argument + return node + + def _is(self, type_: T_VERTEX, node: ast.filter.Is) -> typing.Union[ast.filter.Or, ast.filter.Is]: + # check if action is needed + if not isinstance(node.value, nodes.Nodes): + return node + # check schema consistency + if node.value.node_type not in self.schema.nodes(): + raise errors.ConsistencyError(f'node {node.value.node_type} is not in the schema') + # check type compatibility + if not isinstance(type_, bsc.Node): + raise errors.ConsistencyError(f'expected a node, found {type_}') + if not node.value.node_type <= type_: + raise errors.ConsistencyError(f'expected type {type_} or subtype thereof, found {node.value.node_type}') + # NOTE: We assume that the node type is checked when writing to the backend. + # Links to any of the guids can therefore only exist if the type matches. + # Hence, we don't add a type check/constrain here. + return ast.filter.Or(ast.filter.Is(guid) for guid in node.value.guids) + # optimized code, removing unnecessary ast.filter.Or + #guids = set(node.value.guids) + #if len(guids) == 0: + # raise errors.BackendError(f'') + #if len(guids) == 1: + # return ast.filter.Nodeid(next(iter(guids))) + #return ast.filter.Or(ast.filter.Is(guid) for guid in guids) + + +## EOF ## -- cgit v1.2.3 From 3940cb3c79937a431ba2ae3b57fd0c6c2ccfff33 Mon Sep 17 00:00:00 2001 From: Matthias Baumgartner Date: Thu, 12 Jan 2023 10:12:43 +0100 Subject: use Vertex in type annotations --- bsfs/graph/resolve.py | 28 +++++++++++++--------------- 1 file changed, 13 insertions(+), 15 deletions(-) (limited to 'bsfs/graph/resolve.py') diff --git a/bsfs/graph/resolve.py b/bsfs/graph/resolve.py index feb0855..e398a5e 100644 --- a/bsfs/graph/resolve.py +++ b/bsfs/graph/resolve.py @@ -37,8 +37,6 @@ class Filter(): """ - T_VERTEX = typing.Union[bsc.Node, bsc.Literal] - def __init__(self, schema): self.schema = schema @@ -47,7 +45,7 @@ class Filter(): def _parse_filter_expression( self, - type_: T_VERTEX, + type_: bsc.Vertex, node: ast.filter.FilterExpression, ) -> ast.filter.FilterExpression: """Route *node* to the handler of the respective FilterExpression subclass.""" @@ -73,7 +71,7 @@ class Filter(): # invalid node raise errors.BackendError(f'expected filter expression, found {node}') - def _parse_predicate_expression(self, node: ast.filter.PredicateExpression) -> T_VERTEX: + def _parse_predicate_expression(self, node: ast.filter.PredicateExpression) -> bsc.Vertex: """Route *node* to the handler of the respective PredicateExpression subclass.""" if isinstance(node, ast.filter.Predicate): return self._predicate(node) @@ -82,7 +80,7 @@ class Filter(): # invalid node raise errors.BackendError(f'expected predicate expression, found {node}') - def _predicate(self, node: ast.filter.Predicate) -> T_VERTEX: + def _predicate(self, node: ast.filter.Predicate) -> bsc.Vertex: if not self.schema.has_predicate(node.predicate): raise errors.ConsistencyError(f'predicate {node.predicate} is not in the schema') pred = self.schema.predicate(node.predicate) @@ -91,7 +89,7 @@ class Filter(): dom, rng = rng, dom return rng - def _one_of(self, node: ast.filter.OneOf) -> T_VERTEX: + def _one_of(self, node: ast.filter.OneOf) -> bsc.Vertex: # determine domain and range types rng = None for pred in node: @@ -107,33 +105,33 @@ class Filter(): raise errors.UnreachableError() return rng - def _any(self, type_: T_VERTEX, node: ast.filter.Any) -> ast.filter.Any: # pylint: disable=unused-argument + def _any(self, type_: bsc.Vertex, node: ast.filter.Any) -> ast.filter.Any: # pylint: disable=unused-argument next_type = self._parse_predicate_expression(node.predicate) return ast.filter.Any(node.predicate, self._parse_filter_expression(next_type, node.expr)) - def _all(self, type_: T_VERTEX, node: ast.filter.All) -> ast.filter.All: # pylint: disable=unused-argument + def _all(self, type_: bsc.Vertex, node: ast.filter.All) -> ast.filter.All: # pylint: disable=unused-argument next_type = self._parse_predicate_expression(node.predicate) return ast.filter.All(node.predicate, self._parse_filter_expression(next_type, node.expr)) - def _and(self, type_: T_VERTEX, node: ast.filter.And) -> ast.filter.And: + def _and(self, type_: bsc.Vertex, node: ast.filter.And) -> ast.filter.And: return ast.filter.And({self._parse_filter_expression(type_, expr) for expr in node}) - def _or(self, type_: T_VERTEX, node: ast.filter.Or) -> ast.filter.Or: + def _or(self, type_: bsc.Vertex, node: ast.filter.Or) -> ast.filter.Or: return ast.filter.Or({self._parse_filter_expression(type_, expr) for expr in node}) - def _not(self, type_: T_VERTEX, node: ast.filter.Not) -> ast.filter.Not: + def _not(self, type_: bsc.Vertex, node: ast.filter.Not) -> ast.filter.Not: return ast.filter.Not(self._parse_filter_expression(type_, node.expr)) - def _has(self, type_: T_VERTEX, node: ast.filter.Has) -> ast.filter.Has: # pylint: disable=unused-argument + def _has(self, type_: bsc.Vertex, node: ast.filter.Has) -> ast.filter.Has: # pylint: disable=unused-argument return node - def _value(self, type_: T_VERTEX, node: ast.filter._Value) -> ast.filter._Value: # pylint: disable=unused-argument + def _value(self, type_: bsc.Vertex, node: ast.filter._Value) -> ast.filter._Value: # pylint: disable=unused-argument return node - def _bounded(self, type_: T_VERTEX, node: ast.filter._Bounded) -> ast.filter._Bounded: # pylint: disable=unused-argument + def _bounded(self, type_: bsc.Vertex, node: ast.filter._Bounded) -> ast.filter._Bounded: # pylint: disable=unused-argument return node - def _is(self, type_: T_VERTEX, node: ast.filter.Is) -> typing.Union[ast.filter.Or, ast.filter.Is]: + def _is(self, type_: bsc.Vertex, node: ast.filter.Is) -> typing.Union[ast.filter.Or, ast.filter.Is]: # check if action is needed if not isinstance(node.value, nodes.Nodes): return node -- cgit v1.2.3 From 7e7284d5fc01c0a081aa79d67736f51069864a7d Mon Sep 17 00:00:00 2001 From: Matthias Baumgartner Date: Thu, 12 Jan 2023 10:22:59 +0100 Subject: adapt to non-optional range in query checks --- bsfs/graph/resolve.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'bsfs/graph/resolve.py') diff --git a/bsfs/graph/resolve.py b/bsfs/graph/resolve.py index e398a5e..9b5f631 100644 --- a/bsfs/graph/resolve.py +++ b/bsfs/graph/resolve.py @@ -101,8 +101,8 @@ class Filter(): rng = subrng except TypeError as err: raise errors.ConsistencyError(f'ranges {subrng} and {rng} are not related') from err - if rng is None: - raise errors.UnreachableError() + if not isinstance(rng, (bsc.Node, bsc.Literal)): + raise errors.BackendError(f'the range of node {node} is undefined') return rng def _any(self, type_: bsc.Vertex, node: ast.filter.Any) -> ast.filter.Any: # pylint: disable=unused-argument -- cgit v1.2.3 From 60257ed3c2aa6ea2891f362a691bde9d7ef17831 Mon Sep 17 00:00:00 2001 From: Matthias Baumgartner Date: Fri, 13 Jan 2023 12:22:34 +0100 Subject: schema type comparison across classes --- bsfs/graph/resolve.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) (limited to 'bsfs/graph/resolve.py') diff --git a/bsfs/graph/resolve.py b/bsfs/graph/resolve.py index 9b5f631..b671204 100644 --- a/bsfs/graph/resolve.py +++ b/bsfs/graph/resolve.py @@ -96,11 +96,11 @@ class Filter(): # parse child expression subrng = self._parse_predicate_expression(pred) # determine the next type - try: - if rng is None or subrng > rng: # pick most generic range - rng = subrng - except TypeError as err: - raise errors.ConsistencyError(f'ranges {subrng} and {rng} are not related') from err + if rng is None or subrng > rng: # pick most generic range + rng = subrng + # check range consistency + if not subrng <= rng and not subrng >= rng: + raise errors.ConsistencyError(f'ranges {subrng} and {rng} are not related') if not isinstance(rng, (bsc.Node, bsc.Literal)): raise errors.BackendError(f'the range of node {node} is undefined') return rng -- cgit v1.2.3 From 80a97bfa9f22d0d6dd25928fe1754a3a0d1de78a Mon Sep 17 00:00:00 2001 From: Matthias Baumgartner Date: Sun, 15 Jan 2023 21:00:12 +0100 Subject: Distance filter ast node --- bsfs/graph/resolve.py | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'bsfs/graph/resolve.py') diff --git a/bsfs/graph/resolve.py b/bsfs/graph/resolve.py index b671204..00b778b 100644 --- a/bsfs/graph/resolve.py +++ b/bsfs/graph/resolve.py @@ -63,6 +63,8 @@ class Filter(): return self._and(type_, node) if isinstance(node, ast.filter.Or): return self._or(type_, node) + if isinstance(node, ast.filter.Distance): + return self._distance(type_, node) if isinstance(node, (ast.filter.Equals, ast.filter.Substring, \ ast.filter.StartsWith, ast.filter.EndsWith)): return self._value(type_, node) @@ -125,6 +127,9 @@ class Filter(): def _has(self, type_: bsc.Vertex, node: ast.filter.Has) -> ast.filter.Has: # pylint: disable=unused-argument return node + def _distance(self, type_: bsc.Vertex, node: ast.filter.Distance): # pylint: disable=unused-argument + return node + def _value(self, type_: bsc.Vertex, node: ast.filter._Value) -> ast.filter._Value: # pylint: disable=unused-argument return node -- cgit v1.2.3 From f31a0d005785d474a37ec769c1f7f5e27aa08a57 Mon Sep 17 00:00:00 2001 From: Matthias Baumgartner Date: Wed, 8 Feb 2023 21:08:24 +0100 Subject: minor comments --- bsfs/graph/resolve.py | 1 + 1 file changed, 1 insertion(+) (limited to 'bsfs/graph/resolve.py') diff --git a/bsfs/graph/resolve.py b/bsfs/graph/resolve.py index 00b778b..4677401 100644 --- a/bsfs/graph/resolve.py +++ b/bsfs/graph/resolve.py @@ -41,6 +41,7 @@ class Filter(): self.schema = schema def __call__(self, root_type: bsc.Node, node: ast.filter.FilterExpression): + # FIXME: node can be None! return self._parse_filter_expression(root_type, node) def _parse_filter_expression( -- cgit v1.2.3 From 87f437380c1dd8f420437cddc028c0f3174ee1c9 Mon Sep 17 00:00:00 2001 From: Matthias Baumgartner Date: Thu, 2 Mar 2023 12:19:58 +0100 Subject: Node getters in bsfs.Graph: * Empty nodes instance (Graph.empty) * Order-preserving get query (Graph.sorted) * Collect common code in private Graph.__get * Empty query in Graph.get * Empty query in Graph.resolve.Filter * Empty query in AC: filter_read --- bsfs/graph/resolve.py | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) (limited to 'bsfs/graph/resolve.py') diff --git a/bsfs/graph/resolve.py b/bsfs/graph/resolve.py index 4677401..b3ab001 100644 --- a/bsfs/graph/resolve.py +++ b/bsfs/graph/resolve.py @@ -40,8 +40,13 @@ class Filter(): def __init__(self, schema): self.schema = schema - def __call__(self, root_type: bsc.Node, node: ast.filter.FilterExpression): - # FIXME: node can be None! + def __call__( + self, + root_type: bsc.Node, + node: typing.Optional[ast.filter.FilterExpression], + ): + if node is None: + return None return self._parse_filter_expression(root_type, node) def _parse_filter_expression( -- cgit v1.2.3 From 2e07f33314c238e42bfadc5f39805f93ffbc622e Mon Sep 17 00:00:00 2001 From: Matthias Baumgartner Date: Thu, 2 Mar 2023 15:10:05 +0100 Subject: removed author and license notices from individual files --- bsfs/graph/resolve.py | 5 ----- 1 file changed, 5 deletions(-) (limited to 'bsfs/graph/resolve.py') diff --git a/bsfs/graph/resolve.py b/bsfs/graph/resolve.py index b3ab001..213ac4c 100644 --- a/bsfs/graph/resolve.py +++ b/bsfs/graph/resolve.py @@ -1,9 +1,4 @@ -""" -Part of the BlackStar filesystem (bsfs) module. -A copy of the license is provided with the project. -Author: Matthias Baumgartner, 2022 -""" # imports import typing -- cgit v1.2.3 From 6b9379d75198082054c35e44bc2cd880353a7485 Mon Sep 17 00:00:00 2001 From: Matthias Baumgartner Date: Thu, 2 Mar 2023 16:40:43 +0100 Subject: hardening --- bsfs/graph/resolve.py | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'bsfs/graph/resolve.py') diff --git a/bsfs/graph/resolve.py b/bsfs/graph/resolve.py index 213ac4c..0ba1e36 100644 --- a/bsfs/graph/resolve.py +++ b/bsfs/graph/resolve.py @@ -40,6 +40,14 @@ class Filter(): root_type: bsc.Node, node: typing.Optional[ast.filter.FilterExpression], ): + """Alias for `Resolve.resolve`.""" + return self.resolve(root_type, node) + + def resolve( + self, + root_type: bsc.Node, + node: typing.Optional[ast.filter.FilterExpression], + ): if node is None: return None return self._parse_filter_expression(root_type, node) -- cgit v1.2.3 From 2c6c23f85e7f2123c508f9ff8a4aa776948bb589 Mon Sep 17 00:00:00 2001 From: Matthias Baumgartner Date: Thu, 2 Mar 2023 16:46:11 +0100 Subject: minor style fixes --- bsfs/graph/resolve.py | 1 + 1 file changed, 1 insertion(+) (limited to 'bsfs/graph/resolve.py') diff --git a/bsfs/graph/resolve.py b/bsfs/graph/resolve.py index 0ba1e36..95dcfc1 100644 --- a/bsfs/graph/resolve.py +++ b/bsfs/graph/resolve.py @@ -48,6 +48,7 @@ class Filter(): root_type: bsc.Node, node: typing.Optional[ast.filter.FilterExpression], ): + """Resolve Nodes instances of a *node* query starting at *root_type*.""" if node is None: return None return self._parse_filter_expression(root_type, node) -- cgit v1.2.3 From 4fead04055be4967d9ea3b24ff61fe37a93108dd Mon Sep 17 00:00:00 2001 From: Matthias Baumgartner Date: Sat, 4 Mar 2023 13:31:11 +0100 Subject: namespace refactoring and cleanup --- bsfs/graph/resolve.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'bsfs/graph/resolve.py') diff --git a/bsfs/graph/resolve.py b/bsfs/graph/resolve.py index 95dcfc1..a58eb67 100644 --- a/bsfs/graph/resolve.py +++ b/bsfs/graph/resolve.py @@ -27,8 +27,8 @@ class Filter(): input: Any(ns.bse.tag, Is(Nodes(...))) output: Any(ns.bse.tag, Or(Is(...), Is(...), ...))) - >>> tags = graph.node(ns.bsfs.Tag, 'http://example.com/me/tag#1234') - >>> graph.get(ns.bsfs.Entity, ast.filter.Any(ns.bse.tag, ast.filter.Is(tags))) + >>> tags = graph.node(ns.bsn.Tag, 'http://example.com/me/tag#1234') + >>> graph.get(ns.bsn.Entity, ast.filter.Any(ns.bse.tag, ast.filter.Is(tags))) """ -- cgit v1.2.3