diff options
29 files changed, 3157 insertions, 27 deletions
@@ -88,7 +88,7 @@ max-parents=7 max-public-methods=20 # Maximum number of return / yield for function / method body. -max-returns=6 +max-returns=15 # Maximum number of statements in function / method body. max-statements=50 @@ -164,7 +164,7 @@ score=yes [SIMILARITIES] # Minimum lines number of a similarity. -min-similarity-lines=4 +min-similarity-lines=5 [STRING] diff --git a/bsfs/graph/ac/base.py b/bsfs/graph/ac/base.py index bc9aeb3..0703e2e 100644 --- a/bsfs/graph/ac/base.py +++ b/bsfs/graph/ac/base.py @@ -10,6 +10,7 @@ import typing # bsfs imports from bsfs import schema +from bsfs.query import ast from bsfs.triple_store import TripleStoreBase from bsfs.utils import URI @@ -67,5 +68,8 @@ class AccessControlBase(abc.ABC): def createable(self, node_type: schema.Node, guids: typing.Iterable[URI]) -> typing.Iterable[URI]: """Return nodes that are allowed to be created.""" + @abc.abstractmethod + def filter_read(self, node_type: schema.Node, query: ast.filter.FilterExpression) -> ast.filter.FilterExpression: + """Re-write a filter *query* to get (i.e., read) *node_type* nodes.""" ## EOF ## diff --git a/bsfs/graph/ac/null.py b/bsfs/graph/ac/null.py index 36838bd..12b4e87 100644 --- a/bsfs/graph/ac/null.py +++ b/bsfs/graph/ac/null.py @@ -10,6 +10,7 @@ import typing # bsfs imports from bsfs import schema from bsfs.namespace import ns +from bsfs.query import ast from bsfs.utils import URI # inner-module imports @@ -49,4 +50,8 @@ class NullAC(base.AccessControlBase): """Return nodes that are allowed to be created.""" return guids + def filter_read(self, node_type: schema.Node, query: ast.filter.FilterExpression) -> ast.filter.FilterExpression: + """Re-write a filter *query* to get (i.e., read) *node_type* nodes.""" + return query + ## EOF ## diff --git a/bsfs/graph/graph.py b/bsfs/graph/graph.py index b7b9f1c..f030fed 100644 --- a/bsfs/graph/graph.py +++ b/bsfs/graph/graph.py @@ -9,12 +9,15 @@ import os import typing # bsfs imports +from bsfs.query import ast, validate from bsfs.schema import Schema from bsfs.triple_store import TripleStoreBase from bsfs.utils import URI, typename # inner-module imports +from . import ac from . import nodes as _nodes +from . import resolve # exports __all__: typing.Sequence[str] = ( @@ -43,6 +46,9 @@ class Graph(): def __init__(self, backend: TripleStoreBase, user: URI): self._backend = backend self._user = user + self._resolver = resolve.Filter(self._backend.schema) + self._validate = validate.Filter(self._backend.schema) + self._ac = ac.NullAC(self._backend, self._user) # ensure Graph schema requirements self.migrate(self._backend.schema) @@ -84,6 +90,9 @@ class Graph(): # migrate schema in backend # FIXME: consult access controls! self._backend.schema = schema + # re-initialize members + self._resolver.schema = self.schema + self._validate.schema = self.schema # return self return self @@ -107,7 +116,21 @@ class Graph(): *node_type*) once some data is assigned to them. """ + return self.nodes(node_type, {guid}) + + def get(self, node_type: URI, query: ast.filter.FilterExpression) -> _nodes.Nodes: # FIXME: How about empty query? + """Return a `Nodes` instance over all nodes of type *node_type* that match the *subject* query.""" + # get node type type_ = self.schema.node(node_type) - return _nodes.Nodes(self._backend, self._user, type_, {guid}) + # resolve Nodes instances + query = self._resolver(type_, query) + # add access controls to query + query = self._ac.filter_read(type_, query) + # validate query + self._validate(type_, query) + # query the backend + guids = self._backend.get(type_, query) # no need to materialize + # return Nodes instance + return _nodes.Nodes(self._backend, self._user, type_, guids) ## EOF ## diff --git a/bsfs/graph/resolve.py b/bsfs/graph/resolve.py new file mode 100644 index 0000000..feb0855 --- /dev/null +++ b/bsfs/graph/resolve.py @@ -0,0 +1,161 @@ +""" + +Part of the BlackStar filesystem (bsfs) module. +A copy of the license is provided with the project. +Author: Matthias Baumgartner, 2022 +""" +# imports +import typing + +# bsfs imports +from bsfs import schema as bsc +from bsfs.query import ast +from bsfs.utils import errors + +# inner-module imports +from . import nodes + +# exports +__all__: typing.Sequence[str] = ( + 'Filter', + ) + + +## code ## + +class Filter(): + """Rewrites the query to replace `bsfs.graph.nodes.Nodes` instances with the respective URI. + Does only limited type checking and schema validation. + Use `bsfs.schema.validate.Filter` to do so. + + Example: + input: Any(ns.bse.tag, Is(Nodes(...))) + output: Any(ns.bse.tag, Or(Is(...), Is(...), ...))) + + >>> tags = graph.node(ns.bsfs.Tag, 'http://example.com/me/tag#1234') + >>> graph.get(ns.bsfs.Entity, ast.filter.Any(ns.bse.tag, ast.filter.Is(tags))) + + """ + + T_VERTEX = typing.Union[bsc.Node, bsc.Literal] + + def __init__(self, schema): + self.schema = schema + + def __call__(self, root_type: bsc.Node, node: ast.filter.FilterExpression): + return self._parse_filter_expression(root_type, node) + + def _parse_filter_expression( + self, + type_: T_VERTEX, + node: ast.filter.FilterExpression, + ) -> ast.filter.FilterExpression: + """Route *node* to the handler of the respective FilterExpression subclass.""" + if isinstance(node, ast.filter.Is): + return self._is(type_, node) + if isinstance(node, ast.filter.Not): + return self._not(type_, node) + if isinstance(node, ast.filter.Has): + return self._has(type_, node) + if isinstance(node, ast.filter.Any): + return self._any(type_, node) + if isinstance(node, ast.filter.All): + return self._all(type_, node) + if isinstance(node, ast.filter.And): + return self._and(type_, node) + if isinstance(node, ast.filter.Or): + return self._or(type_, node) + if isinstance(node, (ast.filter.Equals, ast.filter.Substring, \ + ast.filter.StartsWith, ast.filter.EndsWith)): + return self._value(type_, node) + if isinstance(node, (ast.filter.LessThan, ast.filter.GreaterThan)): + return self._bounded(type_, node) + # invalid node + raise errors.BackendError(f'expected filter expression, found {node}') + + def _parse_predicate_expression(self, node: ast.filter.PredicateExpression) -> T_VERTEX: + """Route *node* to the handler of the respective PredicateExpression subclass.""" + if isinstance(node, ast.filter.Predicate): + return self._predicate(node) + if isinstance(node, ast.filter.OneOf): + return self._one_of(node) + # invalid node + raise errors.BackendError(f'expected predicate expression, found {node}') + + def _predicate(self, node: ast.filter.Predicate) -> T_VERTEX: + if not self.schema.has_predicate(node.predicate): + raise errors.ConsistencyError(f'predicate {node.predicate} is not in the schema') + pred = self.schema.predicate(node.predicate) + dom, rng = pred.domain, pred.range + if node.reverse: + dom, rng = rng, dom + return rng + + def _one_of(self, node: ast.filter.OneOf) -> T_VERTEX: + # determine domain and range types + rng = None + for pred in node: + # parse child expression + subrng = self._parse_predicate_expression(pred) + # determine the next type + try: + if rng is None or subrng > rng: # pick most generic range + rng = subrng + except TypeError as err: + raise errors.ConsistencyError(f'ranges {subrng} and {rng} are not related') from err + if rng is None: + raise errors.UnreachableError() + return rng + + def _any(self, type_: T_VERTEX, node: ast.filter.Any) -> ast.filter.Any: # pylint: disable=unused-argument + next_type = self._parse_predicate_expression(node.predicate) + return ast.filter.Any(node.predicate, self._parse_filter_expression(next_type, node.expr)) + + def _all(self, type_: T_VERTEX, node: ast.filter.All) -> ast.filter.All: # pylint: disable=unused-argument + next_type = self._parse_predicate_expression(node.predicate) + return ast.filter.All(node.predicate, self._parse_filter_expression(next_type, node.expr)) + + def _and(self, type_: T_VERTEX, node: ast.filter.And) -> ast.filter.And: + return ast.filter.And({self._parse_filter_expression(type_, expr) for expr in node}) + + def _or(self, type_: T_VERTEX, node: ast.filter.Or) -> ast.filter.Or: + return ast.filter.Or({self._parse_filter_expression(type_, expr) for expr in node}) + + def _not(self, type_: T_VERTEX, node: ast.filter.Not) -> ast.filter.Not: + return ast.filter.Not(self._parse_filter_expression(type_, node.expr)) + + def _has(self, type_: T_VERTEX, node: ast.filter.Has) -> ast.filter.Has: # pylint: disable=unused-argument + return node + + def _value(self, type_: T_VERTEX, node: ast.filter._Value) -> ast.filter._Value: # pylint: disable=unused-argument + return node + + def _bounded(self, type_: T_VERTEX, node: ast.filter._Bounded) -> ast.filter._Bounded: # pylint: disable=unused-argument + return node + + def _is(self, type_: T_VERTEX, node: ast.filter.Is) -> typing.Union[ast.filter.Or, ast.filter.Is]: + # check if action is needed + if not isinstance(node.value, nodes.Nodes): + return node + # check schema consistency + if node.value.node_type not in self.schema.nodes(): + raise errors.ConsistencyError(f'node {node.value.node_type} is not in the schema') + # check type compatibility + if not isinstance(type_, bsc.Node): + raise errors.ConsistencyError(f'expected a node, found {type_}') + if not node.value.node_type <= type_: + raise errors.ConsistencyError(f'expected type {type_} or subtype thereof, found {node.value.node_type}') + # NOTE: We assume that the node type is checked when writing to the backend. + # Links to any of the guids can therefore only exist if the type matches. + # Hence, we don't add a type check/constrain here. + return ast.filter.Or(ast.filter.Is(guid) for guid in node.value.guids) + # optimized code, removing unnecessary ast.filter.Or + #guids = set(node.value.guids) + #if len(guids) == 0: + # raise errors.BackendError(f'') + #if len(guids) == 1: + # return ast.filter.Nodeid(next(iter(guids))) + #return ast.filter.Or(ast.filter.Is(guid) for guid in guids) + + +## EOF ## diff --git a/bsfs/query/__init__.py b/bsfs/query/__init__.py new file mode 100644 index 0000000..21c7389 --- /dev/null +++ b/bsfs/query/__init__.py @@ -0,0 +1,20 @@ +""" + +Part of the BlackStar filesystem (bsfs) module. +A copy of the license is provided with the project. +Author: Matthias Baumgartner, 2022 +""" +# imports +import typing + +# inner-module imports +from . import ast +from . import validator as validate + +# exports +__all__: typing.Sequence[str] = ( + 'ast', + 'validate', + ) + +## EOF ## diff --git a/bsfs/query/ast/__init__.py b/bsfs/query/ast/__init__.py new file mode 100644 index 0000000..704d051 --- /dev/null +++ b/bsfs/query/ast/__init__.py @@ -0,0 +1,24 @@ +"""Query AST components. + +The query AST consists of a Filter syntax tree. + +Classes beginning with an underscore (_) represent internal type hierarchies +and should not be used for parsing. Note that the AST structures do not +(and cannot) check semantic validity or consistency with a given schema. + +Part of the BlackStar filesystem (bsfs) module. +A copy of the license is provided with the project. +Author: Matthias Baumgartner, 2022 +""" +# imports +import typing + +# inner-module imports +from . import filter_ as filter # pylint: disable=redefined-builtin + +# exports +__all__: typing.Sequence[str] = ( + 'filter', + ) + +## EOF ## diff --git a/bsfs/query/ast/filter_.py b/bsfs/query/ast/filter_.py new file mode 100644 index 0000000..b129ded --- /dev/null +++ b/bsfs/query/ast/filter_.py @@ -0,0 +1,433 @@ +"""Filter AST. + +Note that it is easily possible to construct an AST that is inconsistent with +a given schema. Furthermore, it is possible to construct a semantically invalid +AST which that cannot be parsed correctly or includes contradicting statements. +The AST nodes do not (and cannot) check such issues. + +For example, consider the following AST: + +>>> Any(ns.bse.collection, +... And( +... Equals('hello'), +... Any(ns.bsm.guid, Any(ns.bsm.guid, Equals('hello'))), +... Any(ns.bst.label, Equals('world')), +... All(ns.bst.label, Not(Equals('world'))), +... ) +... ) + +This AST has multiple issues that are not verified upon its creation: +* A condition on a non-literal. +* A Filter on a literal. +* Conditions exclude each other +* The predicate along the branch have incompatible domains and ranges. + +Part of the BlackStar filesystem (bsfs) module. +A copy of the license is provided with the project. +Author: Matthias Baumgartner, 2022 +""" +# imports +from collections import abc +import typing + +# bsfs imports +from bsfs.utils import URI, typename, normalize_args + +# inner-module imports +#from . import utils + +# exports +__all__ : typing.Sequence[str] = ( + # base classes + 'FilterExpression', + 'PredicateExpression', + # predicate expressions + 'OneOf', + 'Predicate', + # branching + 'All', + 'Any', + # aggregators + 'And', + 'Or', + # value matchers + 'Equals', + 'Substring', + 'EndsWith', + 'StartsWith', + # range matchers + 'GreaterThan', + 'LessThan', + # misc + 'Has', + 'Is', + 'Not', + ) + + +## code ## + +# pylint: disable=too-few-public-methods # Many expressions use mostly magic methods + +class _Expression(abc.Hashable): + def __repr__(self) -> str: + """Return the expressions's string representation.""" + return f'{typename(self)}()' + + def __hash__(self) -> int: + """Return the expression's integer representation.""" + return hash(type(self)) + + def __eq__(self, other: typing.Any) -> bool: + """Return True if *self* and *other* are equivalent.""" + return isinstance(other, type(self)) + + +class FilterExpression(_Expression): + """Generic Filter expression.""" + + +class PredicateExpression(_Expression): + """Generic Predicate expression.""" + + +class _Branch(FilterExpression): + """Branch the filter along a predicate.""" + + # predicate to follow. + predicate: PredicateExpression + + # child expression to evaluate. + expr: FilterExpression + + def __init__( + self, + predicate: typing.Union[PredicateExpression, URI], + expr: FilterExpression, + ): + # process predicate argument + if isinstance(predicate, URI): + predicate = Predicate(predicate) + elif not isinstance(predicate, PredicateExpression): + raise TypeError(predicate) + # process expression argument + if not isinstance(expr, FilterExpression): + raise TypeError(expr) + # assign members + self.predicate = predicate + self.expr = expr + + def __repr__(self) -> str: + return f'{typename(self)}({self.predicate}, {self.expr})' + + def __hash__(self) -> int: + return hash((super().__hash__(), self.predicate, self.expr)) + + def __eq__(self, other) -> bool: + return super().__eq__(other) \ + and self.predicate == other.predicate \ + and self.expr == other.expr + +class Any(_Branch): + """Any (and at least one) triple matches.""" + + +class All(_Branch): + """All (and at least one) triples match.""" + + +class _Agg(FilterExpression, abc.Collection): + """Combine multiple expressions.""" + + # child expressions + expr: typing.Set[FilterExpression] + + def __init__( + self, + *expr: typing.Union[FilterExpression, + typing.Iterable[FilterExpression], + typing.Iterator[FilterExpression]] + ): + # unfold arguments + unfolded = set(normalize_args(*expr)) + # check type + if not all(isinstance(e, FilterExpression) for e in unfolded): + raise TypeError(expr) + # assign member + self.expr = unfolded + + def __contains__(self, expr: typing.Any) -> bool: + """Return True if *expr* is among the child expressions.""" + return expr in self.expr + + def __iter__(self) -> typing.Iterator[FilterExpression]: + """Iterator over child expressions.""" + return iter(self.expr) + + def __len__(self) -> int: + """Number of child expressions.""" + return len(self.expr) + + def __repr__(self) -> str: + return f'{typename(self)}({self.expr})' + + def __hash__(self) -> int: + return hash((super().__hash__(), tuple(self.expr))) # FIXME: Unique hash of different orders over self.expr + + def __eq__(self, other) -> bool: + return super().__eq__(other) and self.expr == other.expr + + +class And(_Agg): + """All conditions match.""" + + +class Or(_Agg): + """At least one condition matches.""" + + +class Not(FilterExpression): + """Invert a statement.""" + + # child expression + expr: FilterExpression + + def __init__(self, expr: FilterExpression): + # check argument + if not isinstance(expr, FilterExpression): + raise TypeError(expr) + # assign member + self.expr = expr + + def __repr__(self) -> str: + return f'{typename(self)}({self.expr})' + + def __hash__(self) -> int: + return hash((super().__hash__(), self.expr)) + + def __eq__(self, other: typing.Any) -> bool: + return super().__eq__(other) and self.expr == other.expr + + +class Has(FilterExpression): + """Has predicate N times""" + + # predicate to follow. + predicate: PredicateExpression + + # target count + count: FilterExpression + + def __init__( + self, + predicate: typing.Union[PredicateExpression, URI], + count: typing.Optional[typing.Union[FilterExpression, int]] = None, + ): + # check predicate + if isinstance(predicate, URI): + predicate = Predicate(predicate) + elif not isinstance(predicate, PredicateExpression): + raise TypeError(predicate) + # check count + if count is None: + count = GreaterThan(1, strict=False) + elif isinstance(count, int): + count = Equals(count) + elif not isinstance(count, FilterExpression): + raise TypeError(count) + # assign members + self.predicate = predicate + self.count = count + + def __repr__(self) -> str: + return f'{typename(self)}({self.predicate}, {self.count})' + + def __hash__(self) -> int: + return hash((super().__hash__(), self.predicate, self.count)) + + def __eq__(self, other) -> bool: + return super().__eq__(other) \ + and self.predicate == other.predicate \ + and self.count == other.count + + +class _Value(FilterExpression): + """ + """ + + # target value. + value: typing.Any + + def __init__(self, value: typing.Any): + self.value = value + + def __repr__(self) -> str: + return f'{typename(self)}({self.value})' + + def __hash__(self) -> int: + return hash((super().__hash__(), self.value)) + + def __eq__(self, other) -> bool: + return super().__eq__(other) and self.value == other.value + + +class Is(_Value): + """Match the URI of a node.""" + + +class Equals(_Value): + """Value matches exactly. + NOTE: Value format must correspond to literal type; can be a string, a number, or a Node + """ + + +class Substring(_Value): + """Value matches a substring + NOTE: value format must be a string + """ + + +class StartsWith(_Value): + """Value begins with a given string.""" + + +class EndsWith(_Value): + """Value ends with a given string.""" + + +class _Bounded(FilterExpression): + """ + """ + + # bound. + threshold: float + + # closed (True) or open (False) bound. + strict: bool + + def __init__( + self, + threshold: float, + strict: bool = True, + ): + self.threshold = float(threshold) + self.strict = bool(strict) + + def __repr__(self) -> str: + return f'{typename(self)}({self.threshold}, {self.strict})' + + def __hash__(self) -> int: + return hash((super().__hash__(), self.threshold, self.strict)) + + def __eq__(self, other) -> bool: + return super().__eq__(other) \ + and self.threshold == other.threshold \ + and self.strict == other.strict + + + +class LessThan(_Bounded): + """Value is (strictly) smaller than threshold. + NOTE: only on numerical literals + """ + + +class GreaterThan(_Bounded): + """Value is (strictly) larger than threshold + NOTE: only on numerical literals + """ + + +class Predicate(PredicateExpression): + """A single predicate.""" + + # predicate URI + predicate: URI + + # reverse the predicate's direction + reverse: bool + + def __init__( + self, + predicate: URI, + reverse: typing.Optional[bool] = False, + ): + # check arguments + if not isinstance(predicate, URI): + raise TypeError(predicate) + # assign members + self.predicate = predicate + self.reverse = bool(reverse) + + def __repr__(self) -> str: + return f'{typename(self)}({self.predicate}, {self.reverse})' + + def __hash__(self) -> int: + return hash((super().__hash__(), self.predicate, self.reverse)) + + def __eq__(self, other) -> bool: + return super().__eq__(other) \ + and self.predicate == other.predicate \ + and self.reverse == other.reverse + + +class OneOf(PredicateExpression, abc.Collection): + """A set of predicate alternatives. + + The predicates' domains must be ascendants or descendants of each other. + The overall domain is the most specific one. + + The predicate's domains must be ascendants or descendants of each other. + The overall range is the most generic one. + """ + + # predicate alternatives + expr: typing.Set[PredicateExpression] + + def __init__(self, *expr: typing.Union[PredicateExpression, URI]): + # unfold arguments + unfolded = set(normalize_args(*expr)) # type: ignore [arg-type] # this is getting too complex... + # check arguments + if len(unfolded) == 0: + raise AttributeError('expected at least one expression, found none') + # ensure PredicateExpression + unfolded = {Predicate(e) if isinstance(e, URI) else e for e in unfolded} + # check type + if not all(isinstance(e, PredicateExpression) for e in unfolded): + raise TypeError(expr) + # assign member + self.expr = unfolded + + def __contains__(self, expr: typing.Any) -> bool: + """Return True if *expr* is among the child expressions.""" + return expr in self.expr + + def __iter__(self) -> typing.Iterator[PredicateExpression]: + """Iterator over child expressions.""" + return iter(self.expr) + + def __len__(self) -> int: + """Number of child expressions.""" + return len(self.expr) + + def __repr__(self) -> str: + return f'{typename(self)}({self.expr})' + + def __hash__(self) -> int: + return hash((super().__hash__(), tuple(self.expr))) # FIXME: Unique hash of different orders over self.expr + + def __eq__(self, other) -> bool: + return super().__eq__(other) and self.expr == other.expr + + +# Helpers + +def IsIn(*values): # pylint: disable=invalid-name # explicitly mimics an expression + """Match any of the given URIs.""" + return Or(Is(value) for value in normalize_args(*values)) + +def IsNotIn(*values): # pylint: disable=invalid-name # explicitly mimics an expression + """Match none of the given URIs.""" + return Not(IsIn(*values)) + +## EOF ## diff --git a/bsfs/query/validator.py b/bsfs/query/validator.py new file mode 100644 index 0000000..352203a --- /dev/null +++ b/bsfs/query/validator.py @@ -0,0 +1,224 @@ +""" + +Part of the BlackStar filesystem (bsfs) module. +A copy of the license is provided with the project. +Author: Matthias Baumgartner, 2022 +""" +# imports +import typing + +# bsfs imports +from bsfs import schema as bsc +from bsfs.namespace import ns +from bsfs.utils import errors, typename + +# inner-module imports +from . import ast + +# exports +__all__ : typing.Sequence[str] = ( + 'Filter', + ) + + +## code ## + +class Filter(): + """Validate a `bsfs.query.ast.filter` query's structure and schema compliance. + + * Conditions (Bounded, Value) can only be applied on literals + * Branches, Id, and Has can only be applied on nodes + * Predicates' domain and range must match + * Predicate paths must follow the schema + * Referenced types are present in the schema + + """ + + # vertex types + T_VERTEX = typing.Union[bsc.Node, bsc.Literal] # FIXME: Shouldn't this be in the schema? + + # schema to validate against. + schema: bsc.Schema + + def __init__(self, schema: bsc.Schema): + self.schema = schema + + def __call__(self, root_type: bsc.Node, query: ast.filter.FilterExpression): + """Validate a filter *query*, assuming the subject having *root_type*. + + Raises a `bsfs.utils.errors.ConsistencyError` if the query violates the schema. + Raises a `bsfs.utils.errors.BackendError` if the query structure is invalid. + + """ + # root_type must be a schema.Node + if not isinstance(root_type, bsc.Node): + raise TypeError(f'Expected a node, found {typename(root_type)}') + # root_type must exist in the schema + if root_type not in self.schema.nodes(): + raise errors.ConsistencyError(f'{root_type} is not defined in the schema') + # check root expression + self._parse_filter_expression(root_type, query) + # all tests passed + return True + + + ## routing methods + + def _parse_filter_expression(self, type_: T_VERTEX, node: ast.filter.FilterExpression): + """Route *node* to the handler of the respective FilterExpression subclass.""" + if isinstance(node, ast.filter.Is): + return self._is(type_, node) + if isinstance(node, ast.filter.Not): + return self._not(type_, node) + if isinstance(node, ast.filter.Has): + return self._has(type_, node) + if isinstance(node, (ast.filter.Any, ast.filter.All)): + return self._branch(type_, node) + if isinstance(node, (ast.filter.And, ast.filter.Or)): + return self._agg(type_, node) + if isinstance(node, (ast.filter.Equals, ast.filter.Substring, ast.filter.StartsWith, ast.filter.EndsWith)): + return self._value(type_, node) + if isinstance(node, (ast.filter.LessThan, ast.filter.GreaterThan)): + return self._bounded(type_, node) + # invalid node + raise errors.BackendError(f'expected filter expression, found {node}') + + def _parse_predicate_expression(self, node: ast.filter.PredicateExpression) -> typing.Tuple[T_VERTEX, T_VERTEX]: + """Route *node* to the handler of the respective PredicateExpression subclass.""" + if isinstance(node, ast.filter.Predicate): + return self._predicate(node) + if isinstance(node, ast.filter.OneOf): + return self._one_of(node) + # invalid node + raise errors.BackendError(f'expected predicate expression, found {node}') + + + ## predicate expressions + + def _predicate(self, node: ast.filter.Predicate) -> typing.Tuple[T_VERTEX, T_VERTEX]: + # predicate exists in the schema + if not self.schema.has_predicate(node.predicate): + raise errors.ConsistencyError(f'predicate {node.predicate} is not in the schema') + # determine domain and range + pred = self.schema.predicate(node.predicate) + dom, rng = pred.domain, pred.range + if rng is None: + # FIXME: It is a design error that Predicates can have a None range... + raise errors.BackendError(f'predicate {pred} has no range') + if node.reverse: + dom, rng = rng, dom # type: ignore [assignment] # variable re-use confuses mypy + # return domain and range + return dom, rng + + def _one_of(self, node: ast.filter.OneOf) -> typing.Tuple[T_VERTEX, T_VERTEX]: + # determine domain and range types + # NOTE: select the most specific domain and the most generic range + dom, rng = None, None + for pred in node: + # parse child expression + subdom, subrng = self._parse_predicate_expression(pred) + try: + # determine overall domain + if dom is None or subdom < dom: # pick most specific domain + dom = subdom + # domains must be related across all child expressions + if not subdom <= dom and not subdom >= dom: + raise errors.ConsistencyError(f'domains {subdom} and {dom} are not related') + except TypeError as err: # compared literal vs. node + raise errors.ConsistencyError(f'domains {subdom} and {dom} are not of the same type') from err + + try: + # determine overall range + if rng is None or subrng > rng: # pick most generic range + rng = subrng + # ranges must be related across all child expressions + if not subrng <= rng and not subrng >= rng: + raise errors.ConsistencyError(f'ranges {subrng} and {rng} are not related') + except TypeError as err: # compared literal vs. node + raise errors.ConsistencyError(f'ranges {subrng} and {rng} are not of the same type') from err + # check domain and range + if dom is None or rng is None: + # OneOf guarantees at least one expression, these two cases cannot happen + raise errors.UnreachableError() + # return domain and range + return dom, rng + + + ## intermediates + + def _branch(self, type_: T_VERTEX, node: ast.filter._Branch): + # type is a Node + if not isinstance(type_, bsc.Node): + raise errors.ConsistencyError(f'expected a Node, found {type_}') + # type exists in the schema + # FIXME: Isn't it actually guaranteed that the type (except the root type) is part of the schema? + # all types can be traced back to (a) root_type, (b) predicate, or (c) manually set (e.g. in _is). + # For (a), we do (and have to) perform a check. For (c), the code base should be consistent throughout + # the module, so this is an assumption that has to be ensured in schema.Schema. For (b), we know (and + # check) that the predicate is in the schema, hence all node/literals derived from it are also in the + # schema by construction of the schema.Schema class. So, why do we check this every time? + if type_ not in self.schema.nodes(): + raise errors.ConsistencyError(f'node {type_} is not in the schema') + # predicate is valid + dom, rng = self._parse_predicate_expression(node.predicate) + # type_ is a subtype of the predicate's domain + if not type_ <= dom: + raise errors.ConsistencyError(f'expected type {dom} or subtype thereof, found {type_}') + # child expression is valid + self._parse_filter_expression(rng, node.expr) + + def _agg(self, type_: T_VERTEX, node: ast.filter._Agg): + for expr in node: + # child expression is valid + self._parse_filter_expression(type_, expr) + + def _not(self, type_: T_VERTEX, node: ast.filter.Not): + # child expression is valid + self._parse_filter_expression(type_, node.expr) + + def _has(self, type_: T_VERTEX, node: ast.filter.Has): + # type is a Node + if not isinstance(type_, bsc.Node): + raise errors.ConsistencyError(f'expected a Node, found {type_}') + # type exists in the schema + if type_ not in self.schema.nodes(): + raise errors.ConsistencyError(f'node {type_} is not in the schema') + # predicate is valid + dom, _= self._parse_predicate_expression(node.predicate) + # type_ is a subtype of the predicate's domain + if not type_ <= dom: + raise errors.ConsistencyError(f'expected type {dom}, found {type_}') + # node.count is a numerical expression + # FIXME: We have to ensure that ns.xsd.integer is always known in the schema! + self._parse_filter_expression(self.schema.literal(ns.xsd.integer), node.count) + + + ## conditions + + def _is(self, type_: T_VERTEX, node: ast.filter.Is): # pylint: disable=unused-argument # (node) + if not isinstance(type_, bsc.Node): + raise errors.ConsistencyError(f'expected a Node, found {type_}') + if type_ not in self.schema.nodes(): + raise errors.ConsistencyError(f'node {type_} is not in the schema') + + def _value(self, type_: T_VERTEX, node: ast.filter._Value): # pylint: disable=unused-argument # (node) + # type is a literal + if not isinstance(type_, bsc.Literal): + raise errors.ConsistencyError(f'expected a Literal, found {type_}') + # type exists in the schema + if type_ not in self.schema.literals(): + raise errors.ConsistencyError(f'literal {type_} is not in the schema') + # FIXME: Check if node.value corresponds to type_ + # FIXME: A specific literal might be requested (i.e., a numeric type when used in Has) + + def _bounded(self, type_: T_VERTEX, node: ast.filter._Bounded): # pylint: disable=unused-argument # (node) + # type is a literal + if not isinstance(type_, bsc.Literal): + raise errors.ConsistencyError(f'expected a Literal, found {type_}') + # type exists in the schema + if type_ not in self.schema.literals(): + raise errors.ConsistencyError(f'literal {type_} is not in the schema') + # FIXME: Check if node.value corresponds to type_ + + +## EOF ## diff --git a/bsfs/triple_store/base.py b/bsfs/triple_store/base.py index 6561262..7e03714 100644 --- a/bsfs/triple_store/base.py +++ b/bsfs/triple_store/base.py @@ -9,6 +9,7 @@ import abc import typing # inner-module imports +from bsfs.query import ast from bsfs.utils import URI, typename import bsfs.schema as _schema @@ -109,6 +110,16 @@ class TripleStoreBase(abc.ABC): """ @abc.abstractmethod + def get( + self, + node_type: _schema.Node, + query: typing.Optional[ast.filter.FilterExpression] = None, + ) -> typing.Iterator[URI]: + """Return guids of nodes of type *node_type* that match the *query*. + Return all guids of the respective type if *query* is None. + """ + + @abc.abstractmethod def exists( self, node_type: _schema.Node, diff --git a/bsfs/triple_store/sparql/__init__.py b/bsfs/triple_store/sparql/__init__.py new file mode 100644 index 0000000..285334a --- /dev/null +++ b/bsfs/triple_store/sparql/__init__.py @@ -0,0 +1,18 @@ +""" + +Part of the BlackStar filesystem (bsfs) module. +A copy of the license is provided with the project. +Author: Matthias Baumgartner, 2022 +""" +# imports +import typing + +# inner-module imports +from .sparql import SparqlStore + +# exports +__all__: typing.Sequence[str] = ( + 'SparqlStore', + ) + +## EOF ## diff --git a/bsfs/triple_store/sparql/parse_filter.py b/bsfs/triple_store/sparql/parse_filter.py new file mode 100644 index 0000000..d4db0aa --- /dev/null +++ b/bsfs/triple_store/sparql/parse_filter.py @@ -0,0 +1,307 @@ +""" + +Part of the BlackStar filesystem (bsfs) module. +A copy of the license is provided with the project. +Author: Matthias Baumgartner, 2022 +""" +# imports +import typing + +# bsfs imports +from bsfs import schema as bsc +from bsfs.namespace import ns +from bsfs.query import ast +from bsfs.utils import URI, errors + +# exports +__all__: typing.Sequence[str] = ( + 'Filter', + ) + +class _GenHopName(): + """Generator that produces a new unique symbol name with each iteration.""" + + # Symbol name prefix. + prefix: str + + # Current counter. + curr: int + + def __init__(self, prefix: str = '?hop', start: int = 0): + self.prefix = prefix + self.curr = start - 1 + + def __next__(self): + """Generate and return the next unique name.""" + self.curr += 1 + return self.prefix + str(self.curr) + + +class Filter(): + """Translate `bsfs.query.ast.filter` structures into Sparql queries.""" + + # Current schema to validate against. + schema: bsc.Schema + + # Generator that produces unique symbol names. + ngen: _GenHopName + + # Vertex type. + T_VERTEX = typing.Union[bsc.Node, bsc.Literal] + + def __init__(self, schema): + self.schema = schema + self.ngen = _GenHopName() + + def __call__( + self, + root_type: bsc.Node, + root: typing.Optional[ast.filter.FilterExpression] = None, + ) -> str: + """ + """ + # check root_type + if not isinstance(root_type, bsc.Node): + raise errors.BackendError(f'expected Node, found {root_type}') + if root_type not in self.schema.nodes(): + raise errors.ConsistencyError(f'node {root_type} is not in the schema') + # parse root + if root is None: + cond = '' + else: + cond = self._parse_filter_expression(root_type, root, '?ent') + # assemble query + return f''' + SELECT ?ent + WHERE {{ + ?ent <{ns.rdf.type}>/<{ns.rdfs.subClassOf}>* <{root_type.uri}> . + {cond} + }} + ''' + + def _parse_filter_expression(self, type_: T_VERTEX, node: ast.filter.FilterExpression, head: str) -> str: + """Route *node* to the handler of the respective FilterExpression subclass.""" + if isinstance(node, ast.filter.Is): + return self._is(type_, node, head) + if isinstance(node, ast.filter.Not): + return self._not(type_, node, head) + if isinstance(node, ast.filter.Has): + return self._has(type_, node, head) + if isinstance(node, ast.filter.Any): + return self._any(type_, node, head) + if isinstance(node, ast.filter.All): + return self._all(type_, node, head) + if isinstance(node, ast.filter.And): + return self._and(type_, node, head) + if isinstance(node, ast.filter.Or): + return self._or(type_, node, head) + if isinstance(node, ast.filter.Equals): + return self._equals(type_, node, head) + if isinstance(node, ast.filter.Substring): + return self._substring(type_, node, head) + if isinstance(node, ast.filter.StartsWith): + return self._starts_with(type_, node, head) + if isinstance(node, ast.filter.EndsWith): + return self._ends_with(type_, node, head) + if isinstance(node, ast.filter.LessThan): + return self._less_than(type_, node, head) + if isinstance(node, ast.filter.GreaterThan): + return self._greater_than(type_, node, head) + # invalid node + raise errors.BackendError(f'expected filter expression, found {node}') + + def _parse_predicate_expression( + self, + type_: T_VERTEX, + node: ast.filter.PredicateExpression + ) -> typing.Tuple[str, T_VERTEX]: + """Route *node* to the handler of the respective PredicateExpression subclass.""" + if isinstance(node, ast.filter.Predicate): + return self._predicate(type_, node) + if isinstance(node, ast.filter.OneOf): + return self._one_of(type_, node) + # invalid node + raise errors.BackendError(f'expected predicate expression, found {node}') + + def _one_of(self, node_type: T_VERTEX, node: ast.filter.OneOf) -> typing.Tuple[str, T_VERTEX]: + """ + """ + if not isinstance(node_type, bsc.Node): + raise errors.BackendError(f'expected Node, found {node_type}') + # walk through predicates + suburi, rng = set(), None + for pred in node: # OneOf guarantees at least one expression + puri, subrng = self._parse_predicate_expression(node_type, pred) + # track predicate uris + suburi.add(puri) + try: + # check for more generic range + if rng is None or subrng > rng: + rng = subrng + # check range consistency + if not subrng <= rng and not subrng >= rng: + raise errors.ConsistencyError(f'ranges {subrng} and {rng} are not related') + except TypeError as err: # subrng and rng are not comparable + raise errors.ConsistencyError(f'ranges {subrng} and {rng} are not related') from err + if rng is None: + # for mypy to be certain of the rng type + # if rng were None, we'd have gotten a TypeError above (None > None) + raise errors.UnreachableError() + # return joint predicate expression and next range + return '|'.join(suburi), rng + + def _predicate(self, node_type: T_VERTEX, node: ast.filter.Predicate) -> typing.Tuple[str, T_VERTEX]: + """ + """ + # check node_type + if not isinstance(node_type, bsc.Node): + raise errors.BackendError(f'expected Node, found {node_type}') + # fetch predicate and its uri + puri = node.predicate + # get and check predicate, domain, and range + if not self.schema.has_predicate(puri): + raise errors.ConsistencyError(f'predicate {puri} is not in the schema') + pred = self.schema.predicate(puri) + if pred.range is None: + # FIXME: It is a design error that Predicates can have a None range... + raise errors.BackendError(f'predicate {pred} has no range') + dom, rng = pred.domain, pred.range + # encapsulate predicate uri + puri = f'<{puri}>' # type: ignore [assignment] # variable re-use confuses mypy + # apply reverse flag + if node.reverse: + puri = URI('^' + puri) + dom, rng = rng, dom # type: ignore [assignment] # variable re-use confuses mypy + # check path consistency + if not node_type <= dom: + raise errors.ConsistencyError(f'expected type {dom} or subtype thereof, found {node_type}') + # return predicate URI and next node type + return puri, rng + + def _any(self, node_type: T_VERTEX, node: ast.filter.Any, head: str) -> str: + """ + """ + if not isinstance(node_type, bsc.Node): + raise errors.BackendError(f'expected Node, found {node_type}') + # parse predicate + pred, next_type = self._parse_predicate_expression(node_type, node.predicate) + # parse expression + nexthead = next(self.ngen) + expr = self._parse_filter_expression(next_type, node.expr, nexthead) + # combine results + return f'{head} {pred} {nexthead} . {expr}' + + def _all(self, node_type: T_VERTEX, node: ast.filter.All, head: str) -> str: + """ + """ + # NOTE: All(P, E) := Not(Any(P, Not(E))) and EXISTS(P, ?) + if not isinstance(node_type, bsc.Node): + raise errors.BackendError(f'expected Node, found {node_type}') + # parse rewritten ast + expr = self._parse_filter_expression(node_type, + ast.filter.Not( + ast.filter.Any(node.predicate, + ast.filter.Not(node.expr))), head) + # parse predicate for existence constraint + pred, _ = self._parse_predicate_expression(node_type, node.predicate) + temphead = next(self.ngen) + # return existence and rewritten expression + return f'FILTER EXISTS {{ {head} {pred} {temphead} }} . ' + expr + + def _and(self, node_type: T_VERTEX, node: ast.filter.And, head: str) -> str: + """ + """ + sub = [self._parse_filter_expression(node_type, expr, head) for expr in node] + return ' . '.join(sub) + + def _or(self, node_type: T_VERTEX, node: ast.filter.Or, head: str) -> str: + """ + """ + # potential special case optimization: + # * ast: Or(Equals('foo'), Equals('bar'), ...) + # * query: VALUES ?head { "value1"^^<...> "value2"^^<...> "value3"^<...> ... } + sub = [self._parse_filter_expression(node_type, expr, head) for expr in node] + sub = ['{' + expr + '}' for expr in sub] + return ' UNION '.join(sub) + + def _not(self, node_type: T_VERTEX, node: ast.filter.Not, head: str) -> str: + """ + """ + expr = self._parse_filter_expression(node_type, node.expr, head) + if isinstance(node_type, bsc.Literal): + return f'MINUS {{ {expr} }}' + # NOTE: for bsc.Node types, we must include at least one expression in the body of MINUS, + # otherwise the connection between the context and body of MINUS is lost. + # The simplest (and non-interfering) choice is a type statement. + return f'MINUS {{ {head} <{ns.rdf.type}>/<{ns.rdfs.subClassOf}>* <{node_type.uri}> . {expr} }}' + + def _has(self, node_type: T_VERTEX, node: ast.filter.Has, head: str) -> str: + """ + """ + if not isinstance(node_type, bsc.Node): + raise errors.BackendError(f'expected Node, found {node_type}') + # parse predicate + pred, _ = self._parse_predicate_expression(node_type, node.predicate) + # get new heads + inner = next(self.ngen) + outer = next(self.ngen) + # predicate count expression (fetch number of predicates at *head*) + num_preds = f'{{ SELECT (COUNT(distinct {inner}) as {outer}) WHERE {{ {head} {pred} {inner} }} }}' + # count expression + # FIXME: We have to ensure that ns.xsd.integer is always known in the schema! + count_bounds = self._parse_filter_expression(self.schema.literal(ns.xsd.integer), node.count, outer) + # combine + return num_preds + ' . ' + count_bounds + + def _is(self, node_type: T_VERTEX, node: ast.filter.Is, head: str) -> str: + """ + """ + if not isinstance(node_type, bsc.Node): + raise errors.BackendError(f'expected Node, found {node_type}') + return f'VALUES {head} {{ <{node.value}> }}' + + def _equals(self, node_type: T_VERTEX, node: ast.filter.Equals, head: str) -> str: + """ + """ + if not isinstance(node_type, bsc.Literal): + raise errors.BackendError(f'expected Literal, found {node}') + return f'VALUES {head} {{ "{node.value}"^^<{node_type.uri}> }}' + + def _substring(self, node_type: T_VERTEX, node: ast.filter.Substring, head: str) -> str: + """ + """ + if not isinstance(node_type, bsc.Literal): + raise errors.BackendError(f'expected Literal, found {node_type}') + return f'FILTER contains(str({head}), "{node.value}")' + + def _starts_with(self, node_type: T_VERTEX, node: ast.filter.StartsWith, head: str) -> str: + """ + """ + if not isinstance(node_type, bsc.Literal): + raise errors.BackendError(f'expected Literal, found {node_type}') + return f'FILTER strstarts(str({head}), "{node.value}")' + + def _ends_with(self, node_type: T_VERTEX, node: ast.filter.EndsWith, head: str) -> str: + """ + """ + if not isinstance(node_type, bsc.Literal): + raise errors.BackendError(f'expected Literal, found {node_type}') + return f'FILTER strends(str({head}), "{node.value}")' + + def _less_than(self, node_type: T_VERTEX, node: ast.filter.LessThan, head: str) -> str: + """ + """ + if not isinstance(node_type, bsc.Literal): + raise errors.BackendError(f'expected Literal, found {node_type}') + equality = '=' if not node.strict else '' + return f'FILTER ({head} <{equality} {float(node.threshold)})' + + def _greater_than(self, node_type: T_VERTEX, node: ast.filter.GreaterThan, head: str) -> str: + """ + """ + if not isinstance(node_type, bsc.Literal): + raise errors.BackendError(f'expected Literal, found {node_type}') + equality = '=' if not node.strict else '' + return f'FILTER ({head} >{equality} {float(node.threshold)})' + +## EOF ## diff --git a/bsfs/triple_store/sparql.py b/bsfs/triple_store/sparql/sparql.py index 7516dff..c3cbff6 100644 --- a/bsfs/triple_store/sparql.py +++ b/bsfs/triple_store/sparql/sparql.py @@ -11,10 +11,12 @@ import rdflib # bsfs imports from bsfs import schema as bsc +from bsfs.query import ast from bsfs.utils import errors, URI # inner-module imports -from . import base +from . import parse_filter +from .. import base # exports @@ -85,11 +87,15 @@ class SparqlStore(base.TripleStoreBase): # The local schema. _schema: bsc.Schema + # Filter parser + _filter_parser: parse_filter.Filter + def __init__(self): super().__init__(None) self._graph = rdflib.Graph() self._transaction = _Transaction(self._graph) self._schema = bsc.Schema.Empty() + self._filter_parser = parse_filter.Filter(self._schema) # NOTE: mypy and pylint complain about the **kwargs not being listed (contrasting super) # However, not having it here is clearer since it's explicit that there are no arguments. @@ -126,10 +132,17 @@ class SparqlStore(base.TripleStoreBase): # get deleted classes sub = self.schema - schema - # remove predicate instances for pred in sub.predicates: + # remove predicate instances for src, trg in self._graph.subject_objects(rdflib.URIRef(pred.uri)): self._transaction.remove((src, rdflib.URIRef(pred.uri), trg)) + # remove predicate definition + if pred.parent is not None: + self._transaction.remove(( + rdflib.URIRef(pred.uri), + rdflib.RDFS.subClassOf, + rdflib.URIRef(pred.parent.uri), + )) # remove node instances for node in sub.nodes: @@ -143,15 +156,46 @@ class SparqlStore(base.TripleStoreBase): self._transaction.remove((inst, pred, trg)) # remove instance self._transaction.remove((inst, rdflib.RDF.type, rdflib.URIRef(node.uri))) - - # NOTE: Nothing to do for literals + # remove node definition + if node.parent is not None: + self._transaction.remove(( + rdflib.URIRef(node.uri), + rdflib.RDFS.subClassOf, + rdflib.URIRef(node.parent.uri), + )) + + for lit in sub.literals: + # remove literal definition + if lit.parent is not None: + self._transaction.remove(( + rdflib.URIRef(lit.uri), + rdflib.RDFS.subClassOf, + rdflib.URIRef(lit.parent.uri), + )) + + # add predicate, node, and literal hierarchies to the graph + for itm in itertools.chain(schema.predicates(), schema.nodes(), schema.literals()): + if itm.parent is not None: + self._transaction.add((rdflib.URIRef(itm.uri), rdflib.RDFS.subClassOf, rdflib.URIRef(itm.parent.uri))) # commit instance changes self.commit() # migrate schema self._schema = schema + self._filter_parser.schema = schema + def get( + self, + node_type: bsc.Node, + query: typing.Optional[ast.filter.FilterExpression] = None, + ) -> typing.Iterator[URI]: + if node_type not in self.schema.nodes(): + raise errors.ConsistencyError(f'{node_type} is not defined in the schema') + if not isinstance(query, ast.filter.FilterExpression): + raise TypeError(query) + for guid, in self._graph.query(self._filter_parser(node_type, query)): + yield URI(guid) def _has_type(self, subject: URI, node_type: bsc.Node) -> bool: """Return True if *subject* is a node of class *node_type* or a subclass thereof.""" diff --git a/bsfs/utils/__init__.py b/bsfs/utils/__init__.py index 94680ee..6737cef 100644 --- a/bsfs/utils/__init__.py +++ b/bsfs/utils/__init__.py @@ -9,7 +9,7 @@ import typing # inner-module imports from . import errors -from .commons import typename +from .commons import typename, normalize_args from .uri import URI from .uuid import UUID, UCID @@ -19,6 +19,7 @@ __all__ : typing.Sequence[str] = ( 'URI', 'UUID', 'errors', + 'normalize_args', 'typename', ) diff --git a/bsfs/utils/commons.py b/bsfs/utils/commons.py index bad2fe0..e9f0b7f 100644 --- a/bsfs/utils/commons.py +++ b/bsfs/utils/commons.py @@ -5,10 +5,12 @@ A copy of the license is provided with the project. Author: Matthias Baumgartner, 2022 """ # imports +from collections import abc import typing # exports __all__: typing.Sequence[str] = ( + 'normalize_args', 'typename', ) @@ -19,5 +21,37 @@ def typename(obj) -> str: """Return the type name of *obj*.""" return type(obj).__name__ +# argument type in `normalize_args`. +ArgType = typing.TypeVar('ArgType') # pylint: disable=invalid-name # type vars don't follow the usual convention + +def normalize_args( + *args: typing.Union[ArgType, typing.Iterable[ArgType], typing.Iterator[ArgType]] + ) -> typing.Tuple[ArgType, ...]: + """Arguments to a function can be passed as individual arguments, list-like + structures, or iterables. This function processes any of these styles and + returns a tuple of the respective items. Typically used within a function + provide a flexible interface but sill have parameters in a normalized form. + + Examples: + + >>> normalize_args(0,1,2) + (1,2,3) + >>> normalize_args([0,1,2]) + (1,2,3) + >>> normalize_args(range(3)) + (1,2,3) + + """ + if len(args) == 0: # foo() + return tuple() + if len(args) > 1: # foo(0, 1, 2) + return tuple(args) # type: ignore [arg-type] # we assume that argument styles (arg vs. iterable) are not mixed. + if isinstance(args[0], abc.Iterator): # foo(iter([0,1,2])) + return tuple(args[0]) + if isinstance(args[0], abc.Iterable) and not isinstance(args[0], str): # foo([0, 1, 2]) + return tuple(args[0]) + # foo(0) + return (args[0], ) # type: ignore [return-value] # if args[0] is a str, we assume that ArgType was str. + ## EOF ## diff --git a/bsfs/utils/errors.py b/bsfs/utils/errors.py index c5e8e16..be9d40e 100644 --- a/bsfs/utils/errors.py +++ b/bsfs/utils/errors.py @@ -38,4 +38,7 @@ class UnreachableError(ProgrammingError): class ConfigError(_BSFSError): """User config issue.""" +class BackendError(_BSFSError): + """Could not parse an AST structure.""" + ## EOF ## diff --git a/test/graph/ac/test_null.py b/test/graph/ac/test_null.py index f39c9be..c863943 100644 --- a/test/graph/ac/test_null.py +++ b/test/graph/ac/test_null.py @@ -10,6 +10,7 @@ import unittest # bsie imports from bsfs import schema as _schema from bsfs.namespace import ns +from bsfs.query import ast from bsfs.triple_store import SparqlStore from bsfs.utils import URI @@ -93,6 +94,15 @@ class TestNullAC(unittest.TestCase): ac = NullAC(self.backend, self.user) self.assertSetEqual(self.ent_ids, ac.createable(self.ent_type, self.ent_ids)) + def test_filter_read(self): + query = ast.filter.Or( + ast.filter.Any(ns.bse.tag, ast.filter.Is('http://example.com/tag#1234')), + ast.filter.Any(ns.bse.tag, ast.filter.Is('http://example.com/tag#4321')), + ast.filter.Any(ns.bse.author, ast.filter.Equals('Me, Myself, and I'))) + ac = NullAC(self.backend, self.user) + self.assertEqual(query, ac.filter_read(self.ent_type, query)) + return query + ## main ## diff --git a/test/graph/test_graph.py b/test/graph/test_graph.py index 33cf6aa..8503d5b 100644 --- a/test/graph/test_graph.py +++ b/test/graph/test_graph.py @@ -9,10 +9,11 @@ import unittest # bsie imports from bsfs import schema +from bsfs.graph.nodes import Nodes from bsfs.namespace import ns +from bsfs.query import ast from bsfs.triple_store import SparqlStore from bsfs.utils import URI, errors -from bsfs.graph.nodes import Nodes # objects to test from bsfs.graph.graph import Graph @@ -192,6 +193,58 @@ class TestGraph(unittest.TestCase): bsfs:unique "true"^^xsd:boolean . ''')) + def test_get(self): + # setup + graph = Graph(self.backend, self.user) + graph.migrate(schema.Schema.from_string(''' + prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#> + prefix xsd: <http://www.w3.org/2001/XMLSchema#> + prefix bsfs: <http://bsfs.ai/schema/> + prefix bse: <http://bsfs.ai/schema/Entity#> + + bsfs:Entity rdfs:subClassOf bsfs:Node . + bsfs:Tag rdfs:subClassOf bsfs:Node . + xsd:string rdfs:subClassOf bsfs:Literal . + + bse:tag rdfs:subClassOf bsfs:Predicate ; + rdfs:domain bsfs:Entity ; + rdfs:range bsfs:Tag ; + bsfs:unique "false"^^xsd:boolean . + + bse:comment rdfs:subClassOf bsfs:Predicate ; + rdfs:domain bsfs:Node ; + rdfs:range xsd:string ; + bsfs:unique "false"^^xsd:boolean . + + ''')) + # add some instances + ents = graph.nodes(ns.bsfs.Entity, {URI('http://example.com/entity#1234'), URI('http://example.com/entity#4321')}) + tags = graph.nodes(ns.bsfs.Tag, {URI('http://example.com/tag#1234'), URI('http://example.com/tag#4321')}) + # add some node links + ents.set(ns.bse.tag, tags) + # add some literals + graph.node(ns.bsfs.Entity, URI('http://example.com/entity#1234')).set(ns.bse.comment, 'hello world') + graph.node(ns.bsfs.Entity, URI('http://example.com/entity#1234')).set(ns.bse.comment, 'foo') + graph.node(ns.bsfs.Entity, URI('http://example.com/entity#1234')).set(ns.bse.comment, 'foobar') + graph.node(ns.bsfs.Tag, URI('http://example.com/tag#1234')).set(ns.bse.comment, 'foo') + graph.node(ns.bsfs.Tag, URI('http://example.com/tag#4321')).set(ns.bse.comment, 'bar') + + # get exception for invalid query + self.assertRaises(errors.ConsistencyError, graph.get, ns.bsfs.Entity, ast.filter.Any(ns.bse.tag, ast.filter.Equals('hello world'))) + + # query returns nodes + self.assertEqual(graph.get(ns.bsfs.Entity, ast.filter.Any(ns.bse.tag, ast.filter.Is(tags))), ents) + self.assertEqual(graph.get(ns.bsfs.Entity, ast.filter.Any(ns.bse.comment, ast.filter.StartsWith('foo'))), + graph.node(ns.bsfs.Entity, URI('http://example.com/entity#1234'))) + self.assertEqual(graph.get(ns.bsfs.Node, ast.filter.Any(ns.bse.comment, ast.filter.StartsWith('foo'))), + graph.nodes(ns.bsfs.Node, {URI('http://example.com/entity#1234'), URI('http://example.com/tag#1234')})) + self.assertEqual(graph.get(ns.bsfs.Entity, ast.filter.Or( + ast.filter.Any(ns.bse.comment, ast.filter.EndsWith('bar')), + ast.filter.Any(ns.bse.tag, ast.filter.All(ns.bse.comment, ast.filter.Equals('bar'))))), + ents) + + + ## main ## diff --git a/test/graph/test_nodes.py b/test/graph/test_nodes.py index 43e7f6f..11ae46d 100644 --- a/test/graph/test_nodes.py +++ b/test/graph/test_nodes.py @@ -72,6 +72,20 @@ class TestNodes(unittest.TestCase): bsfs:unique "true"^^xsd:boolean . ''') + self.schema_triples = { + # schema hierarchy + (rdflib.URIRef(ns.bsfs.Entity), rdflib.RDFS.subClassOf, rdflib.URIRef(ns.bsfs.Node)), + (rdflib.URIRef(ns.bsfs.Tag), rdflib.RDFS.subClassOf, rdflib.URIRef(ns.bsfs.Node)), + (rdflib.URIRef(ns.bsfs.User), rdflib.RDFS.subClassOf, rdflib.URIRef(ns.bsfs.Node)), + (rdflib.URIRef(ns.xsd.string), rdflib.RDFS.subClassOf, rdflib.URIRef(ns.bsfs.Literal)), + (rdflib.URIRef(ns.xsd.integer), rdflib.RDFS.subClassOf, rdflib.URIRef(ns.bsfs.Literal)), + (rdflib.URIRef(ns.bsm.t_created), rdflib.RDFS.subClassOf, rdflib.URIRef(ns.bsfs.Predicate)), + (rdflib.URIRef(ns.bse.comment), rdflib.RDFS.subClassOf, rdflib.URIRef(ns.bsfs.Predicate)), + (rdflib.URIRef(ns.bse.filesize), rdflib.RDFS.subClassOf, rdflib.URIRef(ns.bsfs.Predicate)), + (rdflib.URIRef(ns.bse.tag), rdflib.RDFS.subClassOf, rdflib.URIRef(ns.bsfs.Predicate)), + (rdflib.URIRef(ns.bse.author), rdflib.RDFS.subClassOf, rdflib.URIRef(ns.bsfs.Predicate)), + (rdflib.URIRef('http://bsfs.ai/schema/Tag#representative'), rdflib.RDFS.subClassOf, rdflib.URIRef(ns.bsfs.Predicate)), + } # Nodes constructor args self.user = URI('http://example.com/me') # set args @@ -160,7 +174,7 @@ class TestNodes(unittest.TestCase): time_triples = list(self.backend._graph.objects(rdflib.URIRef('http://example.com/me/entity#1234'), rdflib.URIRef(self.t_created.uri))) t_ent_created = float(time_triples[0]) if len(time_triples) > 0 else 0.0 # check triples - self.assertSetEqual(set(self.backend._graph), { + self.assertSetEqual(set(self.backend._graph), self.schema_triples | { # entity definitions (rdflib.URIRef('http://example.com/me/entity#1234'), rdflib.RDF.type, rdflib.URIRef('http://bsfs.ai/schema/Entity')), (rdflib.URIRef('http://example.com/me/entity#4321'), rdflib.RDF.type, rdflib.URIRef('http://bsfs.ai/schema/Entity')), @@ -171,7 +185,7 @@ class TestNodes(unittest.TestCase): # existing nodes remain unchanged self.assertSetEqual(self.ent_ids, nodes._ensure_nodes(self.ent_type, self.ent_ids)) - self.assertSetEqual(set(self.backend._graph), { + self.assertSetEqual(set(self.backend._graph), self.schema_triples | { # entity definitions (rdflib.URIRef('http://example.com/me/entity#1234'), rdflib.RDF.type, rdflib.URIRef('http://bsfs.ai/schema/Entity')), (rdflib.URIRef('http://example.com/me/entity#4321'), rdflib.RDF.type, rdflib.URIRef('http://bsfs.ai/schema/Entity')), @@ -186,7 +200,7 @@ class TestNodes(unittest.TestCase): time_triples = list(self.backend._graph.objects(rdflib.URIRef('http://example.com/me/tag#1234'), rdflib.URIRef(self.t_created.uri))) t_tag_created = float(time_triples[0]) if len(time_triples) > 0 else 0.0 # check triples - self.assertSetEqual(set(self.backend._graph), { + self.assertSetEqual(set(self.backend._graph), self.schema_triples | { # previous triples (rdflib.URIRef('http://example.com/me/entity#1234'), rdflib.RDF.type, rdflib.URIRef('http://bsfs.ai/schema/Entity')), (rdflib.URIRef('http://example.com/me/entity#4321'), rdflib.RDF.type, rdflib.URIRef('http://bsfs.ai/schema/Entity')), @@ -202,7 +216,7 @@ class TestNodes(unittest.TestCase): def test___set(self): # setup nodes = Nodes(self.backend, self.user, self.ent_type, {URI('http://example.com/me/entity#1234'), URI('http://example.com/me/entity#4321')}) - self.assertSetEqual(set(self.backend._graph), set()) + self.assertSetEqual(set(self.backend._graph), self.schema_triples | set()) set_ = nodes._Nodes__set # node_type must match predicate's domain @@ -217,7 +231,7 @@ class TestNodes(unittest.TestCase): time_triples = list(self.backend._graph.objects(rdflib.URIRef('http://example.com/me/entity#1234'), rdflib.URIRef(self.t_created.uri))) t_ent_created = float(time_triples[0]) if len(time_triples) > 0 else 0.0 # verify triples - self.assertSetEqual(set(self.backend._graph), { + self.assertSetEqual(set(self.backend._graph), self.schema_triples | { # entity definitions (rdflib.URIRef('http://example.com/me/entity#1234'), rdflib.RDF.type, rdflib.URIRef('http://bsfs.ai/schema/Entity')), (rdflib.URIRef('http://example.com/me/entity#4321'), rdflib.RDF.type, rdflib.URIRef('http://bsfs.ai/schema/Entity')), @@ -236,7 +250,7 @@ class TestNodes(unittest.TestCase): time_triples = list(self.backend._graph.objects(rdflib.URIRef('http://example.com/me/tag#1234'), rdflib.URIRef(self.t_created.uri))) t_tag_created = float(time_triples[0]) if len(time_triples) > 0 else 0.0 # verify triples - self.assertSetEqual(set(self.backend._graph), { + self.assertSetEqual(set(self.backend._graph), self.schema_triples | { # previous values (rdflib.URIRef('http://example.com/me/entity#1234'), rdflib.RDF.type, rdflib.URIRef('http://bsfs.ai/schema/Entity')), (rdflib.URIRef('http://example.com/me/entity#4321'), rdflib.RDF.type, rdflib.URIRef('http://bsfs.ai/schema/Entity')), @@ -265,7 +279,7 @@ class TestNodes(unittest.TestCase): Nodes(self.backend, self.user, self.ent_type, self.ent_ids)) def test_set(self): - self.assertSetEqual(set(self.backend._graph), set()) + self.assertSetEqual(set(self.backend._graph), self.schema_triples | set()) nodes = Nodes(self.backend, self.user, self.ent_type, self.ent_ids) # can set literal values self.assertEqual(nodes, nodes.set(self.p_filesize.uri, 1234)) @@ -312,7 +326,7 @@ class TestNodes(unittest.TestCase): def test_set_from_iterable(self): - self.assertSetEqual(set(self.backend._graph), set()) + self.assertSetEqual(set(self.backend._graph), self.schema_triples | set()) nodes = Nodes(self.backend, self.user, self.ent_type, self.ent_ids) # can set literal and node values simultaneously self.assertEqual(nodes, nodes.set_from_iterable({ diff --git a/test/graph/test_resolve.py b/test/graph/test_resolve.py new file mode 100644 index 0000000..5bc99e4 --- /dev/null +++ b/test/graph/test_resolve.py @@ -0,0 +1,181 @@ +""" + +Part of the bsfs test suite. +A copy of the license is provided with the project. +Author: Matthias Baumgartner, 2022 +""" +# imports +import unittest + +# bsie imports +from bsfs import schema as bsc +from bsfs.graph import Graph, nodes +from bsfs.namespace import ns +from bsfs.query import ast +from bsfs.triple_store import SparqlStore +from bsfs.utils import URI, errors + +# objects to test +from bsfs.graph.resolve import Filter + + +## code ## + +class TestFilter(unittest.TestCase): + """ + + NOTE: The Filter resolver is relatively simple as it only checks and changes + ast.filter.Is instances. Hence, we don't test all methods individually but + all of them with respect to ast.filter.Is elements. + + """ + + def test_call(self): + schema = bsc.Schema.from_string(''' + prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#> + prefix xsd: <http://www.w3.org/2001/XMLSchema#> + + prefix bsfs: <http://bsfs.ai/schema/> + prefix bse: <http://bsfs.ai/schema/Entity#> + + bsfs:Entity rdfs:subClassOf bsfs:Node . + bsfs:Tag rdfs:subClassOf bsfs:Node . + xsd:string rdfs:subClassOf bsfs:Literal . + xsd:integer rdfs:subClassOf bsfs:Literal . + + bse:comment rdfs:subClassOf bsfs:Predicate ; + rdfs:domain bsfs:Entity ; + rdfs:range xsd:string ; + bsfs:unique "false"^^xsd:boolean . + + bse:filesize rdfs:subClassOf bsfs:Predicate ; + rdfs:domain bsfs:Entity ; + rdfs:range xsd:integer ; + bsfs:unique "false"^^xsd:boolean . + + bse:tag rdfs:subClassOf bsfs:Predicate ; + rdfs:domain bsfs:Entity ; + rdfs:range bsfs:Tag ; + bsfs:unique "false"^^xsd:boolean . + ''') + backend = SparqlStore.Open() + backend.schema = schema + graph = Graph(backend, URI('http://example.com/me')) + ents = graph.nodes(ns.bsfs.Entity, + {URI('http://example.com/me/entity#1234'), URI('http://example.com/me/entity#4321')}) + tags = graph.nodes(ns.bsfs.Tag, + {URI('http://example.com/me/tag#1234'), URI('http://example.com/me/tag#4321')}) + invalid = nodes.Nodes(None, '', schema.node(ns.bsfs.Node).get_child(ns.bsfs.Invalid), + {'http://example.com/you/invalid#1234', 'http://example.com/you/invalid#4321'}) + resolver = Filter(schema) + + # immediate Is + self.assertEqual(resolver(schema.node(ns.bsfs.Entity), + ast.filter.Is(ents)), + ast.filter.Or( + ast.filter.Is('http://example.com/me/entity#1234'), + ast.filter.Is('http://example.com/me/entity#4321') + )) + # only resolves nodes instances, not URIs + self.assertEqual(resolver(schema.node(ns.bsfs.Entity), + ast.filter.Is('http://example.com/me/entity#1234')), + ast.filter.Is('http://example.com/me/entity#1234')) + self.assertEqual(resolver(schema.node(ns.bsfs.Entity), + ast.filter.Is(1234)), + ast.filter.Is(1234)) + + # within And (also checks _value) + self.assertEqual(resolver(schema.node(ns.bsfs.Entity), + ast.filter.And( + ast.filter.Is(ents), + ast.filter.Any(ns.bse.comment, ast.filter.Equals('hello world')), + )), + ast.filter.And( + ast.filter.Or( + ast.filter.Is('http://example.com/me/entity#1234'), + ast.filter.Is('http://example.com/me/entity#4321')), + ast.filter.Any(ns.bse.comment, ast.filter.Equals('hello world')) + )) + # within Or (checks _bounded) + self.assertEqual(resolver(schema.node(ns.bsfs.Entity), + ast.filter.Or( + ast.filter.Is(ents), + ast.filter.Any(ns.bse.filesize, ast.filter.LessThan(5)), + )), + ast.filter.Or( + ast.filter.Or( + ast.filter.Is('http://example.com/me/entity#1234'), + ast.filter.Is('http://example.com/me/entity#4321')), + ast.filter.Any(ns.bse.filesize, ast.filter.LessThan(5)) + )) + + # Any-branched Is + self.assertEqual(resolver(schema.node(ns.bsfs.Entity), + ast.filter.Any(ns.bse.tag, ast.filter.Is(tags))), + ast.filter.Any(ns.bse.tag, ast.filter.Or( + ast.filter.Is('http://example.com/me/tag#1234'), + ast.filter.Is('http://example.com/me/tag#4321')), + )) + # All-branched Is + self.assertEqual(resolver(schema.node(ns.bsfs.Entity), + ast.filter.All(ns.bse.tag, ast.filter.Is(tags))), + ast.filter.All(ns.bse.tag, ast.filter.Or( + ast.filter.Is('http://example.com/me/tag#1234'), + ast.filter.Is('http://example.com/me/tag#4321')), + )) + # Negated predicate + self.assertEqual(resolver(schema.node(ns.bsfs.Tag), + ast.filter.Any(ast.filter.Predicate(ns.bse.tag, reverse=True), ast.filter.Is(ents))), + ast.filter.Any(ast.filter.Predicate(ns.bse.tag, reverse=True), ast.filter.Or( + ast.filter.Is('http://example.com/me/entity#1234'), + ast.filter.Is('http://example.com/me/entity#4321')), + )) + + # negated Is + self.assertEqual(resolver(schema.node(ns.bsfs.Entity), + ast.filter.Not(ast.filter.Is(ents))), + ast.filter.Not( + ast.filter.Or( + ast.filter.Is('http://example.com/me/entity#1234'), + ast.filter.Is('http://example.com/me/entity#4321')), + )) + + # for sake of completeness: Has + self.assertEqual(resolver(schema.node(ns.bsfs.Entity), + ast.filter.Has(ns.bse.comment)), + ast.filter.Has(ns.bse.comment)) + # route errors + self.assertRaises(errors.BackendError, resolver, schema.node(ns.bsfs.Tag), + ast.filter.Predicate(ns.bse.comment)) + self.assertRaises(errors.BackendError, resolver, schema.node(ns.bsfs.Tag), + ast.filter.Any(ast.filter.PredicateExpression(), ast.filter.Equals('foo'))) + self.assertRaises(errors.UnreachableError, resolver._one_of, ast.filter.OneOf(ast.filter.Predicate(ns.bsfs.Predicate))) + + # check schema consistency + self.assertRaises(errors.ConsistencyError, resolver, schema.node(ns.bsfs.Tag), + ast.filter.Is(invalid)) + # check immediate type compatibility + self.assertRaises(errors.ConsistencyError, resolver, schema.node(ns.bsfs.Tag), + ast.filter.Is(ents)) + self.assertRaises(errors.ConsistencyError, resolver, schema.node(ns.bsfs.Entity), + ast.filter.Is(tags)) + # check type compatibility through branches + self.assertRaises(errors.ConsistencyError, resolver, schema.node(ns.bsfs.Tag), + ast.filter.Any(ns.bse.comment, ast.filter.Is(tags))) + self.assertRaises(errors.ConsistencyError, resolver, schema.node(ns.bsfs.Tag), + ast.filter.Any(ns.bse.invalid, ast.filter.Is(tags))) + self.assertRaises(errors.ConsistencyError, resolver, schema.node(ns.bsfs.Tag), + ast.filter.Any(ast.filter.OneOf(ns.bse.comment, ns.bse.tag), ast.filter.Is(tags))) + self.assertRaises(errors.ConsistencyError, resolver, schema.node(ns.bsfs.Tag), + ast.filter.Any(ast.filter.OneOf(ns.bse.comment, ns.bse.filesize), ast.filter.Is(tags))) + self.assertRaises(errors.ConsistencyError, resolver, schema.node(ns.bsfs.Tag), + ast.filter.Any(ast.filter.Predicate(ns.bse.tag, reverse=True), ast.filter.Is(tags))) + + + +## main ## + +if __name__ == '__main__': + unittest.main() + +## EOF ## diff --git a/test/query/__init__.py b/test/query/__init__.py new file mode 100644 index 0000000..e69de29 --- /dev/null +++ b/test/query/__init__.py diff --git a/test/query/ast/__init__.py b/test/query/ast/__init__.py new file mode 100644 index 0000000..e69de29 --- /dev/null +++ b/test/query/ast/__init__.py diff --git a/test/query/ast/test_filter_.py b/test/query/ast/test_filter_.py new file mode 100644 index 0000000..4f69bdc --- /dev/null +++ b/test/query/ast/test_filter_.py @@ -0,0 +1,480 @@ +""" + +Part of the tagit test suite. +A copy of the license is provided with the project. +Author: Matthias Baumgartner, 2022 +""" +# imports +import unittest + +# bsfs imports +from bsfs.namespace import ns +from bsfs.utils import URI + +# objects to test +from bsfs.query.ast.filter_ import _Expression, FilterExpression, PredicateExpression +from bsfs.query.ast.filter_ import _Branch, Any, All +from bsfs.query.ast.filter_ import _Agg, And, Or +from bsfs.query.ast.filter_ import Not, Has +from bsfs.query.ast.filter_ import _Value, Is, Equals, Substring, StartsWith, EndsWith +from bsfs.query.ast.filter_ import _Bounded, LessThan, GreaterThan +from bsfs.query.ast.filter_ import Predicate, OneOf +from bsfs.query.ast.filter_ import IsIn, IsNotIn + + +## code ## + +class TestExpression(unittest.TestCase): + def test_essentials(self): + # comparison + self.assertEqual(_Expression(), _Expression()) + self.assertEqual(FilterExpression(), FilterExpression()) + self.assertEqual(PredicateExpression(), PredicateExpression()) + self.assertEqual(hash(_Expression()), hash(_Expression())) + self.assertEqual(hash(FilterExpression()), hash(FilterExpression())) + self.assertEqual(hash(PredicateExpression()), hash(PredicateExpression())) + # comparison respects type + self.assertNotEqual(FilterExpression(), _Expression()) + self.assertNotEqual(_Expression(), PredicateExpression()) + self.assertNotEqual(PredicateExpression(), FilterExpression()) + self.assertNotEqual(hash(FilterExpression()), hash(_Expression())) + self.assertNotEqual(hash(_Expression()), hash(PredicateExpression())) + self.assertNotEqual(hash(PredicateExpression()), hash(FilterExpression())) + # string conversion + self.assertEqual(str(_Expression()), '_Expression()') + self.assertEqual(str(FilterExpression()), 'FilterExpression()') + self.assertEqual(str(PredicateExpression()), 'PredicateExpression()') + self.assertEqual(repr(_Expression()), '_Expression()') + self.assertEqual(repr(FilterExpression()), 'FilterExpression()') + self.assertEqual(repr(PredicateExpression()), 'PredicateExpression()') + + +class TestBranch(unittest.TestCase): # _Branch, Any, All + def test_essentials(self): + pred = PredicateExpression() + expr = FilterExpression() + + # comparison respects type + self.assertNotEqual(_Branch(pred, expr), Any(pred, expr)) + self.assertNotEqual(Any(pred, expr), All(pred, expr)) + self.assertNotEqual(All(pred, expr), _Branch(pred, expr)) + self.assertNotEqual(hash(_Branch(pred, expr)), hash(Any(pred, expr))) + self.assertNotEqual(hash(Any(pred, expr)), hash(All(pred, expr))) + self.assertNotEqual(hash(All(pred, expr)), hash(_Branch(pred, expr))) + + for cls in (_Branch, Any, All): + # comparison + self.assertEqual(cls(pred, expr), cls(pred, expr)) + self.assertEqual(hash(cls(pred, expr)), hash(cls(pred, expr))) + # comparison respects predicate + self.assertNotEqual(cls(ns.bse.filename, expr), cls(ns.bse.filesize, expr)) + self.assertNotEqual(hash(cls(ns.bse.filename, expr)), hash(cls(ns.bse.filesize, expr))) + # comparison respects expression + self.assertNotEqual(cls(pred, Equals('hello')), cls(pred, Equals('world'))) + self.assertNotEqual(hash(cls(pred, Equals('hello'))), hash(cls(pred, Equals('world')))) + + # string conversion + self.assertEqual(str(_Branch(pred, expr)), f'_Branch({pred}, {expr})') + self.assertEqual(repr(_Branch(pred, expr)), f'_Branch({pred}, {expr})') + self.assertEqual(str(Any(pred, expr)), f'Any({pred}, {expr})') + self.assertEqual(repr(Any(pred, expr)), f'Any({pred}, {expr})') + self.assertEqual(str(All(pred, expr)), f'All({pred}, {expr})') + self.assertEqual(repr(All(pred, expr)), f'All({pred}, {expr})') + + def test_members(self): + class Foo(): pass + pred = PredicateExpression() + expr = FilterExpression() + + for cls in (_Branch, Any, All): + # predicate returns member + self.assertEqual(cls(PredicateExpression(), expr).predicate, PredicateExpression()) + # can pass an URI + self.assertEqual(cls(ns.bse.filename, expr).predicate, Predicate(ns.bse.filename)) + # can pass a PredicateExpression + self.assertEqual(cls(Predicate(ns.bse.filename), expr).predicate, Predicate(ns.bse.filename)) + # must pass an URI or PredicateExpression + self.assertRaises(TypeError, cls, Foo(), expr) + # expression returns member + self.assertEqual(cls(pred, Equals('hello')).expr, Equals('hello')) + # expression must be a FilterExpression + self.assertRaises(TypeError, cls, ns.bse.filename, 'hello') + self.assertRaises(TypeError, cls, ns.bse.filename, 1234) + self.assertRaises(TypeError, cls, ns.bse.filename, Foo()) + + +class TestAgg(unittest.TestCase): # _Agg, And, Or + def test_essentials(self): + expr = {Equals('hello'), Equals('world')} + + # comparison respects type + self.assertNotEqual(_Agg(expr), And(expr)) + self.assertNotEqual(And(expr), Or(expr)) + self.assertNotEqual(Or(expr), _Agg(expr)) + self.assertNotEqual(hash(_Agg(expr)), hash(And(expr))) + self.assertNotEqual(hash(And(expr)), hash(Or(expr))) + self.assertNotEqual(hash(Or(expr)), hash(_Agg(expr))) + + for cls in (_Agg, And, Or): + # comparison + self.assertEqual(cls(expr), cls(expr)) + self.assertEqual(hash(cls(expr)), hash(cls(expr))) + # comparison respects expression + self.assertNotEqual(cls(expr), cls(Equals('world'))) + self.assertNotEqual(hash(cls(expr)), hash(cls(Equals('world')))) + self.assertNotEqual(cls(Equals('hello')), cls(Equals('world'))) + self.assertNotEqual(hash(cls(Equals('hello'))), hash(cls(Equals('world')))) + + # string conversion + self.assertEqual(str(_Agg(Equals('hello'))), '_Agg({Equals(hello)})') + self.assertEqual(repr(_Agg(Equals('hello'))), '_Agg({Equals(hello)})') + self.assertEqual(str(And(Equals('hello'))), 'And({Equals(hello)})') + self.assertEqual(repr(And(Equals('hello'))), 'And({Equals(hello)})') + self.assertEqual(str(Or(Equals('hello'))), 'Or({Equals(hello)})') + self.assertEqual(repr(Or(Equals('hello'))), 'Or({Equals(hello)})') + + def test_expression(self): + class Foo(): pass + + for cls in (_Agg, And, Or): + # can pass expressions as arguments + self.assertSetEqual(cls(Equals('hello'), Equals('world')).expr, {Equals('hello'), Equals('world')}) + # can pass one expressions as argument + self.assertSetEqual(cls(Equals('hello')).expr, {Equals('hello')}) + # can pass expressions as iterator + self.assertSetEqual(cls(iter((Equals('hello'), Equals('world')))).expr, {Equals('hello'), Equals('world')}) + # can pass expressions as generator + def gen(): + yield Equals('hello') + yield Equals('world') + self.assertSetEqual(cls(gen()).expr, {Equals('hello'), Equals('world')}) + # can pass expressions as list-like + self.assertSetEqual(cls((Equals('hello'), Equals('world'))).expr, {Equals('hello'), Equals('world')}) + # can pass one expression as list-like + self.assertSetEqual(cls([Equals('hello')]).expr, {Equals('hello')}) + # must pass expressions + self.assertRaises(TypeError, cls, Foo(), Foo()) + self.assertRaises(TypeError, cls, [Foo(), Foo()]) + + # iter + self.assertSetEqual(set(iter(cls(Equals('hello'), Equals('world')))), {Equals('hello'), Equals('world')}) + # contains + self.assertIn(Equals('world'), cls(Equals('hello'), Equals('world'))) + self.assertNotIn(Equals('foo'), cls(Equals('hello'), Equals('world'))) + # len + self.assertEqual(len(cls(Equals('hello'), Equals('world'))), 2) + self.assertEqual(len(cls(Equals('hello'), Equals('world'), Equals('foo'))), 3) + + + +class TestNot(unittest.TestCase): + def test_essentials(self): + expr = FilterExpression() + # comparison + self.assertEqual(Not(expr), Not(expr)) + self.assertEqual(hash(Not(expr)), hash(Not(expr))) + # comparison respects type + self.assertNotEqual(Not(expr), FilterExpression()) + self.assertNotEqual(hash(Not(expr)), hash(FilterExpression())) + # comparison respects expression + self.assertNotEqual(Not(Equals('hello')), Not(Equals('world'))) + self.assertNotEqual(hash(Not(Equals('hello'))), hash(Not(Equals('world')))) + # string conversion + self.assertEqual(str(Not(Equals('hello'))), 'Not(Equals(hello))') + self.assertEqual(repr(Not(Equals('hello'))), 'Not(Equals(hello))') + + def test_expression(self): + # Not requires an expression argument + self.assertRaises(TypeError, Not) + # expression must be a FilterExpression + self.assertRaises(TypeError, Not, 'hello') + self.assertRaises(TypeError, Not, 1234) + self.assertRaises(TypeError, Not, Predicate(ns.bse.filesize)) + # member returns expression + self.assertEqual(Not(Equals('hello')).expr, Equals('hello')) + + +class TestHas(unittest.TestCase): + def test_essentials(self): + pred = PredicateExpression() + count = FilterExpression() + # comparison + self.assertEqual(Has(pred, count), Has(pred, count)) + self.assertEqual(hash(Has(pred, count)), hash(Has(pred, count))) + # comparison respects type + self.assertNotEqual(Has(pred, count), FilterExpression()) + self.assertNotEqual(hash(Has(pred, count)), hash(FilterExpression())) + # comparison respects predicate + self.assertNotEqual(Has(pred, count), Has(Predicate(ns.bse.filesize), count)) + self.assertNotEqual(hash(Has(pred, count)), hash(Has(Predicate(ns.bse.filesize), count))) + # comparison respects count + self.assertNotEqual(Has(pred, count), Has(pred, LessThan(5))) + self.assertNotEqual(hash(Has(pred, count)), hash(Has(pred, LessThan(5)))) + # string conversion + self.assertEqual(str(Has(Predicate(ns.bse.filesize), LessThan(5))), + f'Has(Predicate({ns.bse.filesize}, False), LessThan(5.0, True))') + self.assertEqual(repr(Has(Predicate(ns.bse.filesize), LessThan(5))), + f'Has(Predicate({ns.bse.filesize}, False), LessThan(5.0, True))') + + def test_members(self): + pred = PredicateExpression() + count = FilterExpression() + # member returns expression + # predicate must be an URI or a PredicateExpression + self.assertEqual(Has(ns.bse.filesize, count).predicate, Predicate(ns.bse.filesize)) + self.assertEqual(Has(Predicate(ns.bse.filesize), count).predicate, Predicate(ns.bse.filesize)) + self.assertRaises(TypeError, Has, 1234, FilterExpression()) + self.assertRaises(TypeError, Has, FilterExpression(), FilterExpression()) + # member returns count + # count must be None, an integer, or a FilterExpression + self.assertEqual(Has(pred).count, GreaterThan(1, False)) + self.assertEqual(Has(pred, LessThan(5)).count, LessThan(5)) + self.assertEqual(Has(pred, 5).count, Equals(5)) + self.assertRaises(TypeError, Has, pred, 'hello') + self.assertRaises(TypeError, Has, pred, Predicate(ns.bse.filesize)) + + + +class TestValue(unittest.TestCase): + def test_essentials(self): + # comparison respects type + self.assertNotEqual(_Value('hello'), Equals('hello')) + self.assertNotEqual(Equals('hello'), Is('hello')) + self.assertNotEqual(Is('hello'), Substring('hello')) + self.assertNotEqual(Substring('hello'), StartsWith('hello')) + self.assertNotEqual(StartsWith('hello'), EndsWith('hello')) + self.assertNotEqual(EndsWith('hello'), _Value('hello')) + self.assertNotEqual(hash(_Value('hello')), hash(Equals('hello'))) + self.assertNotEqual(hash(Equals('hello')), hash(Is('hello'))) + self.assertNotEqual(hash(Is('hello')), hash(Substring('hello'))) + self.assertNotEqual(hash(Substring('hello')), hash(StartsWith('hello'))) + self.assertNotEqual(hash(StartsWith('hello')), hash(EndsWith('hello'))) + self.assertNotEqual(hash(EndsWith('hello')), hash(_Value('hello'))) + + for cls in (_Value, Is, Equals, Substring, StartsWith, EndsWith): + # comparison + self.assertEqual(cls('hello'), cls('hello')) + self.assertEqual(hash(cls('hello')), hash(cls('hello'))) + # comparison respects value + self.assertNotEqual(cls('hello'), cls('world')) + self.assertNotEqual(hash(cls('hello')), hash(cls('world'))) + + # string conversion + self.assertEqual(str(_Value('hello')), '_Value(hello)') + self.assertEqual(repr(_Value('hello')), '_Value(hello)') + self.assertEqual(str(Is('hello')), 'Is(hello)') + self.assertEqual(repr(Is('hello')), 'Is(hello)') + self.assertEqual(str(Equals('hello')), 'Equals(hello)') + self.assertEqual(repr(Equals('hello')), 'Equals(hello)') + self.assertEqual(str(Substring('hello')), 'Substring(hello)') + self.assertEqual(repr(Substring('hello')), 'Substring(hello)') + self.assertEqual(str(StartsWith('hello')), 'StartsWith(hello)') + self.assertEqual(repr(StartsWith('hello')), 'StartsWith(hello)') + self.assertEqual(str(EndsWith('hello')), 'EndsWith(hello)') + self.assertEqual(repr(EndsWith('hello')), 'EndsWith(hello)') + + def test_value(self): + class Foo(): pass + for cls in (_Value, Is, Equals, Substring, StartsWith, EndsWith): + # value can be anything + # value returns member + f = Foo() + self.assertEqual(cls('hello').value, 'hello') + self.assertEqual(cls(1234).value, 1234) + self.assertEqual(cls(f).value, f) + + +class TestBounded(unittest.TestCase): + def test_essentials(self): + # comparison respects type + self.assertNotEqual(_Bounded(1234), LessThan(1234)) + self.assertNotEqual(LessThan(1234), GreaterThan(1234)) + self.assertNotEqual(GreaterThan(1234), _Bounded(1234)) + self.assertNotEqual(hash(_Bounded(1234)), hash(LessThan(1234))) + self.assertNotEqual(hash(LessThan(1234)), hash(GreaterThan(1234))) + self.assertNotEqual(hash(GreaterThan(1234)), hash(_Bounded(1234))) + + for cls in (_Bounded, LessThan, GreaterThan): + # comparison + self.assertEqual(cls(1234), cls(1234)) + self.assertEqual(hash(cls(1234)), hash(cls(1234))) + # comparison respects threshold + self.assertNotEqual(cls(1234), cls(4321)) + self.assertNotEqual(hash(cls(1234)), hash(cls(4321))) + # comparison respects strict + self.assertNotEqual(cls(1234, True), cls(1234, False)) + self.assertNotEqual(hash(cls(1234, True)), hash(cls(1234, False))) + + # string conversion + self.assertEqual(str(_Bounded(1234, False)), '_Bounded(1234.0, False)') + self.assertEqual(repr(_Bounded(1234, False)), '_Bounded(1234.0, False)') + self.assertEqual(str(LessThan(1234, False)), 'LessThan(1234.0, False)') + self.assertEqual(repr(LessThan(1234, False)), 'LessThan(1234.0, False)') + self.assertEqual(str(GreaterThan(1234, False)), 'GreaterThan(1234.0, False)') + self.assertEqual(repr(GreaterThan(1234, False)), 'GreaterThan(1234.0, False)') + + def test_members(self): + class Foo(): pass + for cls in (_Bounded, LessThan, GreaterThan): + # threshold becomes float + self.assertEqual(cls(1.234).threshold, 1.234) + self.assertEqual(cls(1234).threshold, 1234.0) + self.assertEqual(cls('1234').threshold, 1234) + self.assertRaises(TypeError, cls, Foo()) + # strict becomes bool + self.assertEqual(cls(1234, True).strict, True) + self.assertEqual(cls(1234, False).strict, False) + self.assertEqual(cls(1234, Foo()).strict, True) + + +class TestPredicate(unittest.TestCase): + def test_essentials(self): + # comparison + self.assertEqual(Predicate(ns.bse.filesize), Predicate(ns.bse.filesize)) + self.assertEqual(hash(Predicate(ns.bse.filesize)), hash(Predicate(ns.bse.filesize))) + # comparison respects type + self.assertNotEqual(Predicate(ns.bse.filesize), PredicateExpression()) + self.assertNotEqual(hash(Predicate(ns.bse.filesize)), hash(PredicateExpression())) + # comparison respects predicate + self.assertNotEqual(Predicate(ns.bse.filesize), Predicate(ns.bse.filename)) + self.assertNotEqual(hash(Predicate(ns.bse.filesize)), hash(Predicate(ns.bse.filename))) + # comparison respects reverse + self.assertNotEqual(Predicate(ns.bse.filesize, True), Predicate(ns.bse.filesize, False)) + self.assertNotEqual(hash(Predicate(ns.bse.filesize, True)), hash(Predicate(ns.bse.filesize, False))) + # string conversion + self.assertEqual(str(Predicate(ns.bse.filesize)), f'Predicate({ns.bse.filesize}, False)') + self.assertEqual(str(Predicate(ns.bse.filesize, True)), + f'Predicate({ns.bse.filesize}, True)') + self.assertEqual(repr(Predicate(ns.bse.filesize)), f'Predicate({ns.bse.filesize}, False)') + self.assertEqual(repr(Predicate(ns.bse.filesize, True)), + f'Predicate({ns.bse.filesize}, True)') + + def test_members(self): + # member returns predicate + # predicate must be an URI + self.assertEqual(Predicate(ns.bse.filesize).predicate, ns.bse.filesize) + self.assertEqual(Predicate(URI('hello world')).predicate, URI('hello world')) + self.assertRaises(TypeError, Predicate, 1234) + self.assertRaises(TypeError, Predicate, FilterExpression()) + self.assertRaises(TypeError, Predicate, FilterExpression()) + # reverse becomes a boolean + self.assertEqual(Predicate(ns.bse.filesize, True).reverse, True) + self.assertEqual(Predicate(ns.bse.filesize, False).reverse, False) + self.assertEqual(Predicate(ns.bse.filesize, 'abc').reverse, True) + + +class TestOneOf(unittest.TestCase): + def test_essentials(self): + expr = {Predicate(ns.bse.filename), Predicate(ns.bse.filesize)} + # comparison + self.assertEqual(OneOf(expr), OneOf(expr)) + self.assertEqual(hash(OneOf(expr)), hash(OneOf(expr))) + # comparison respects type + self.assertNotEqual(OneOf(expr), PredicateExpression()) + self.assertNotEqual(hash(OneOf(expr)), hash(PredicateExpression())) + # comparison respects expression + self.assertNotEqual(OneOf(expr), OneOf(Predicate(ns.bse.filename))) + self.assertNotEqual(hash(OneOf(expr)), hash(OneOf(Predicate(ns.bse.filename)))) + # string conversion + self.assertEqual(str(OneOf(Predicate(ns.bse.filesize))), + f'OneOf({{Predicate({ns.bse.filesize}, False)}})') + self.assertEqual(repr(OneOf(Predicate(ns.bse.filesize))), + f'OneOf({{Predicate({ns.bse.filesize}, False)}})') + + def test_expression(self): + class Foo(): pass + # can pass expressions as arguments + self.assertSetEqual(OneOf(Predicate(ns.bse.filesize), Predicate(ns.bse.filename)).expr, + {Predicate(ns.bse.filesize), Predicate(ns.bse.filename)}) + # can pass one expressions as argument + self.assertSetEqual(OneOf(Predicate(ns.bse.filesize)).expr, + {Predicate(ns.bse.filesize)}) + # can pass expressions as iterator + self.assertSetEqual(OneOf(iter((Predicate(ns.bse.filesize), Predicate(ns.bse.filename)))).expr, + {Predicate(ns.bse.filesize), Predicate(ns.bse.filename)}) + # can pass expressions as generator + def gen(): + yield Predicate(ns.bse.filesize) + yield Predicate(ns.bse.filename) + self.assertSetEqual(OneOf(gen()).expr, + {Predicate(ns.bse.filesize), Predicate(ns.bse.filename)}) + # can pass expressions as list-like + self.assertSetEqual(OneOf((Predicate(ns.bse.filesize), Predicate(ns.bse.filename))).expr, + {Predicate(ns.bse.filesize), Predicate(ns.bse.filename)}) + # can pass one expression as list-like + self.assertSetEqual(OneOf([Predicate(ns.bse.filesize)]).expr, + {Predicate(ns.bse.filesize)}) + # must pass expressions + self.assertRaises(TypeError, OneOf, Foo(), Foo()) + self.assertRaises(TypeError, OneOf, [Foo(), Foo()]) + # must pass at least one expression + self.assertRaises(AttributeError, OneOf) + + # iter + self.assertSetEqual(set(iter(OneOf(Predicate(ns.bse.filesize), Predicate(ns.bse.filename)))), + {Predicate(ns.bse.filesize), Predicate(ns.bse.filename)}) + # contains + self.assertIn(Predicate(ns.bse.filesize), + OneOf(Predicate(ns.bse.filesize), Predicate(ns.bse.filename))) + self.assertNotIn(Predicate(ns.bse.tag), + OneOf(Predicate(ns.bse.filesize), Predicate(ns.bse.filename))) + # len + self.assertEqual(len(OneOf(Predicate(ns.bse.filesize), Predicate(ns.bse.filename))), 2) + self.assertEqual(len(OneOf(Predicate(ns.bse.filesize), Predicate(ns.bse.filename), Predicate(ns.bse.tag))), 3) + + + def testIsIn(self): + # can pass expressions as arguments + self.assertEqual(IsIn('http://example.com/entity#1234', 'http://example.com/entity#4321'), + Or(Is('http://example.com/entity#1234'), Is('http://example.com/entity#4321'))) + # can pass one expression as argument + self.assertEqual(IsIn('http://example.com/entity#1234'), + Or(Is('http://example.com/entity#1234'))) + # can pass expressions as iterator + self.assertEqual(IsIn(iter(('http://example.com/entity#1234', 'http://example.com/entity#4321'))), + Or(Is('http://example.com/entity#1234'), Is('http://example.com/entity#4321'))) + # can pass expressions as generator + def gen(): + yield 'http://example.com/entity#1234' + yield 'http://example.com/entity#4321' + self.assertEqual(IsIn(gen()), + Or(Is('http://example.com/entity#1234'), Is('http://example.com/entity#4321'))) + # can pass expressions as list-like + self.assertEqual(IsIn(['http://example.com/entity#1234', 'http://example.com/entity#4321']), + Or(Is('http://example.com/entity#1234'), Is('http://example.com/entity#4321'))) + # can pass one expression as list-like + self.assertEqual(IsIn(['http://example.com/entity#1234']), + Or(Is('http://example.com/entity#1234'))) + + + def testIsNotIn(self): + # can pass expressions as arguments + self.assertEqual(IsNotIn('http://example.com/entity#1234', 'http://example.com/entity#4321'), + Not(Or(Is('http://example.com/entity#1234'), Is('http://example.com/entity#4321')))) + # can pass one expression as argument + self.assertEqual(IsNotIn('http://example.com/entity#1234'), + Not(Or(Is('http://example.com/entity#1234')))) + # can pass expressions as iterator + self.assertEqual(IsNotIn(iter(('http://example.com/entity#1234', 'http://example.com/entity#4321'))), + Not(Or(Is('http://example.com/entity#1234'), Is('http://example.com/entity#4321')))) + # can pass expressions as generator + def gen(): + yield 'http://example.com/entity#1234' + yield 'http://example.com/entity#4321' + self.assertEqual(IsNotIn(gen()), + Not(Or(Is('http://example.com/entity#1234'), Is('http://example.com/entity#4321')))) + # can pass expressions as list-like + self.assertEqual(IsNotIn(['http://example.com/entity#1234', 'http://example.com/entity#4321']), + Not(Or(Is('http://example.com/entity#1234'), Is('http://example.com/entity#4321')))) + # can pass one expression as list-like + self.assertEqual(IsNotIn(['http://example.com/entity#1234']), + Not(Or(Is('http://example.com/entity#1234')))) + + + +## main ## + +if __name__ == '__main__': + unittest.main() + +## EOF ## diff --git a/test/query/test_validator.py b/test/query/test_validator.py new file mode 100644 index 0000000..4f8364a --- /dev/null +++ b/test/query/test_validator.py @@ -0,0 +1,261 @@ +""" + +Part of the tagit test suite. +A copy of the license is provided with the project. +Author: Matthias Baumgartner, 2022 +""" +# imports +import unittest + +# bsfs imports +from bsfs import schema as _schema +from bsfs.namespace import ns +from bsfs.query import ast +from bsfs.utils import errors + +# objects to test +from bsfs.query.validator import Filter + + +## code ## + +class TestFilter(unittest.TestCase): + def setUp(self): + self.schema = _schema.Schema.from_string(''' + prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#> + prefix xsd: <http://www.w3.org/2001/XMLSchema#> + + prefix bsfs: <http://bsfs.ai/schema/> + prefix bse: <http://bsfs.ai/schema/Entity#> + + bsfs:Entity rdfs:subClassOf bsfs:Node . + bsfs:URI rdfs:subClassOf bsfs:Literal . + + bsfs:Tag rdfs:subClassOf bsfs:Node . + xsd:string rdfs:subClassOf bsfs:Literal . + xsd:integer rdfs:subClassOf bsfs:Literal . + + bse:comment rdfs:subClassOf bsfs:Predicate ; + rdfs:domain bsfs:Node ; + rdfs:range xsd:string ; + bsfs:unique "false"^^xsd:boolean . + + bse:filesize rdfs:subClassOf bsfs:Predicate ; + rdfs:domain bsfs:Entity ; + rdfs:range xsd:integer ; + bsfs:unique "true"^^xsd:boolean . + + bse:tag rdfs:subClassOf bsfs:Predicate ; + rdfs:domain bsfs:Entity ; + rdfs:range bsfs:Tag ; + bsfs:unique "false"^^xsd:boolean . + + bse:label rdfs:subClassOf bsfs:Predicate ; + rdfs:domain bsfs:Tag ; + rdfs:range xsd:string ; + bsfs:unique "true"^^xsd:boolean . + + bse:buddy rdfs:subClassOf bsfs:Predicate ; + rdfs:domain bsfs:Entity ; + rdfs:range bsfs:Node ; + bsfs:unique "false"^^xsd:boolean . + + ''') + self.validate = Filter(self.schema) + + def test_call(self): + # root_type must be a _schema.Node + self.assertRaises(TypeError, self.validate, 1234, None) + self.assertRaises(TypeError, self.validate, '1234', None) + self.assertRaises(TypeError, self.validate, self.schema.literal(ns.bsfs.URI), None) + # root_type must exist in the schema + self.assertRaises(errors.ConsistencyError, self.validate, self.schema.node(ns.bsfs.Node).get_child(ns.bsfs.Image), None) + self.assertRaises(errors.ConsistencyError, self.validate, self.schema.node(ns.bsfs.Entity).get_child(ns.bsfs.Image), None) + # valid query returns true + self.assertTrue(self.validate(self.schema.node(ns.bsfs.Entity), + ast.filter.Any(ast.filter.OneOf(ns.bse.tag, ns.bse.buddy), + ast.filter.Or( + ast.filter.Is('http://example.com/symbol#1234'), + ast.filter.All(ns.bse.comment, ast.filter.StartsWith('foo')), + ast.filter.And( + ast.filter.Has(ns.bse.comment, ast.filter.Or( + ast.filter.GreaterThan(5), + ast.filter.LessThan(1), + ) + ), + ast.filter.Not(ast.filter.Any(ns.bse.comment, + ast.filter.Not(ast.filter.Equals('hello world')))), + ))))) + # invalid paths raise consistency error + self.assertRaises(errors.ConsistencyError, self.validate, self.schema.node(ns.bsfs.Entity), + ast.filter.Any(ast.filter.OneOf(ns.bse.tag, ns.bse.buddy), + ast.filter.Or( + ast.filter.All(ns.bse.comment, ast.filter.Equals('hello world')), + ast.filter.All(ns.bse.label, ast.filter.Equals('hello world')), # domain mismatch + ))) + + def test_routing(self): + self.assertRaises(errors.BackendError, self.validate._parse_filter_expression, ast.filter.FilterExpression(), self.schema.node(ns.bsfs.Node)) + self.assertRaises(errors.BackendError, self.validate._parse_predicate_expression, ast.filter.PredicateExpression()) + + def test_predicate(self): + # predicate must be in the schema + self.assertRaises(errors.ConsistencyError, self.validate._predicate, ast.filter.Predicate(ns.bse.invalid)) + # predicate must have a range + self.assertRaises(errors.BackendError, self.validate._predicate, ast.filter.Predicate(ns.bsfs.Predicate)) + # predicate returns domain and range + self.assertEqual(self.validate._predicate(ast.filter.Predicate(ns.bse.tag)), + (self.schema.node(ns.bsfs.Entity), self.schema.node(ns.bsfs.Tag))) + # reverse is applied + self.assertEqual(self.validate._predicate(ast.filter.Predicate(ns.bse.tag, reverse=True)), + (self.schema.node(ns.bsfs.Tag), self.schema.node(ns.bsfs.Entity))) + + def test_one_of(self): + # domains must both be nodes or literals + self.assertRaises(errors.ConsistencyError, self.validate._one_of, ast.filter.OneOf(ns.bse.tag, ast.filter.Predicate(ns.bse.label, reverse=True))) + # domains must be related + self.assertRaises(errors.ConsistencyError, self.validate._one_of, ast.filter.OneOf(ns.bse.tag, ns.bse.label)) + # ranges must both be nodes or literals + self.assertRaises(errors.ConsistencyError, self.validate._one_of, ast.filter.OneOf(ns.bse.tag, ns.bse.comment)) + # ranges must be related + self.assertRaises(errors.ConsistencyError, self.validate._one_of, ast.filter.OneOf(ns.bse.tag, ast.filter.Predicate(ns.bse.buddy, reverse=True))) + # one_of returns most specific domain + self.assertEqual(self.validate._one_of(ast.filter.OneOf(ns.bse.comment, ns.bse.label)), + (self.schema.node(ns.bsfs.Tag), self.schema.literal(ns.xsd.string))) + # one_of returns the most generic range + self.assertEqual(self.validate._one_of(ast.filter.OneOf(ns.bse.tag, ns.bse.buddy)), + (self.schema.node(ns.bsfs.Entity), self.schema.node(ns.bsfs.Node))) + + def test_branch(self): + # type must be a node + self.assertRaises(errors.ConsistencyError, self.validate._branch, self.schema.literal(ns.bsfs.Literal), None) + # type must be in the schema + self.assertRaises(errors.ConsistencyError, self.validate._branch, self.schema.node(ns.bsfs.Node).get_child(ns.bsfs.Invalid), None) + # predicate is verified + self.assertRaises(errors.ConsistencyError, self.validate._branch, self.schema.node(ns.bsfs.Entity), + ast.filter.Any(ns.bsfs.Invalid, ast.filter.Is('http://example.com/entity#1234'))) + # predicate must match the domain + self.assertRaises(errors.ConsistencyError, self.validate._branch, self.schema.node(ns.bsfs.Node), + ast.filter.Any(ns.bse.tag, ast.filter.Is('http://example.com/tag#1234'))) + self.assertRaises(errors.ConsistencyError, self.validate._branch, self.schema.node(ns.bsfs.Tag), + ast.filter.Any(ns.bse.tag, ast.filter.Is('http://example.com/tag#1234'))) + # child expression must be valid + self.assertRaises(errors.ConsistencyError, self.validate._branch, self.schema.node(ns.bsfs.Entity), + ast.filter.Any(ns.bse.tag, ast.filter.Equals('hello world'))) + # branch accepts valid expressions + self.assertIsNone(self.validate._branch(self.schema.node(ns.bsfs.Entity), + ast.filter.Any(ns.bse.tag, ast.filter.Is('http://example.com/entity#1234')))) + self.assertIsNone(self.validate._branch(self.schema.node(ns.bsfs.Entity), + ast.filter.All(ns.bse.tag, ast.filter.Is('http://example.com/entity#1234')))) + + def test_agg(self): + # agg evaluates child expressions + self.assertRaises(errors.ConsistencyError, self.validate._agg, self.schema.node(ns.bsfs.Entity), + ast.filter.And(ast.filter.Is('http://example.com/entity#1234'), ast.filter.Equals('hello world'))) + self.assertRaises(errors.ConsistencyError, self.validate._agg, self.schema.literal(ns.xsd.string), + ast.filter.And(ast.filter.Is('http://example.com/entity#1234'), ast.filter.Equals('hello world'))) + self.assertRaises(errors.ConsistencyError, self.validate._agg, self.schema.node(ns.bsfs.Entity), + ast.filter.Or(ast.filter.Is('http://example.com/entity#1234'), ast.filter.Equals('hello world'))) + self.assertRaises(errors.ConsistencyError, self.validate._agg, self.schema.literal(ns.xsd.string), + ast.filter.Or(ast.filter.Is('http://example.com/entity#1234'), ast.filter.Equals('hello world'))) + # agg works on nodes + self.assertIsNone(self.validate._agg(self.schema.node(ns.bsfs.Entity), + ast.filter.And(ast.filter.Is('http://example.com/entity#1234'), ast.filter.Is('http://example.com/entity#4321')))) + self.assertIsNone(self.validate._agg(self.schema.node(ns.bsfs.Entity), + ast.filter.Or(ast.filter.Is('http://example.com/entity#1234'), ast.filter.Is('http://example.com/entity#4321')))) + # agg works on literals + self.assertIsNone(self.validate._agg(self.schema.literal(ns.xsd.string), + ast.filter.And(ast.filter.Equals('foobar'), ast.filter.Equals('hello world')))) + self.assertIsNone(self.validate._agg(self.schema.literal(ns.xsd.string), + ast.filter.Or(ast.filter.Equals('foobar'), ast.filter.Equals('hello world')))) + + def test_not(self): + # not evaluates child expressions + self.assertRaises(errors.ConsistencyError, self.validate._not, self.schema.node(ns.bsfs.Entity), + ast.filter.Not(ast.filter.Equals('hello world'))) + self.assertRaises(errors.ConsistencyError, self.validate._not, self.schema.literal(ns.xsd.string), + ast.filter.Not(ast.filter.Is('http://example.com/entity#1234'))) + # not works on nodes + self.assertIsNone(self.validate._not(self.schema.node(ns.bsfs.Entity), + ast.filter.Not(ast.filter.Is('http://example.com/entity#1234')))) + # not works on literals + self.assertIsNone(self.validate._not(self.schema.literal(ns.xsd.string), + ast.filter.Not(ast.filter.Equals('hello world')))) + + def test_has(self): + # type must be node + self.assertRaises(errors.ConsistencyError, self.validate._has, self.schema.literal(ns.bsfs.Literal), + ast.filter.Has(ns.bse.tag)) + # type must be in the schema + self.assertRaises(errors.ConsistencyError, self.validate._has, self.schema.node(ns.bsfs.Node).get_child(ns.bsfs.Invalid), + ast.filter.Has(ns.bse.tag)) + # has checks predicate + self.assertRaises(errors.ConsistencyError, self.validate._has, self.schema.node(ns.bsfs.Entity), + ast.filter.Has(ns.bse.invalid)) + # predicate must match domain + self.assertRaises(errors.ConsistencyError, self.validate._has, self.schema.node(ns.bsfs.Tag), + ast.filter.Has(ns.bse.tag)) + # has checks count expression + self.assertRaises(errors.ConsistencyError, self.validate._has, self.schema.node(ns.bsfs.Entity), + ast.filter.Has(ns.bse.tag, ast.filter.Is('http://example.com/entity#1234'))) + # has accepts correct expressions + self.assertIsNone(self.validate._has(self.schema.node(ns.bsfs.Entity), ast.filter.Has(ns.bse.tag, ast.filter.GreaterThan(5)))) + + def test_is(self): + # type must be node + self.assertRaises(errors.ConsistencyError, self.validate._is, self.schema.literal(ns.bsfs.Literal), + ast.filter.Is('http://example.com/foo')) + # type must be in the schema + self.assertRaises(errors.ConsistencyError, self.validate._is, self.schema.node(ns.bsfs.Node).get_child(ns.bsfs.Invalid), + ast.filter.Is('http://example.com/foo')) + # is accepts correct expressions + self.assertIsNone(self.validate._is(self.schema.node(ns.bsfs.Entity), ast.filter.Is('http://example.com/entity#1234'))) + + def test_value(self): + # type must be literal + self.assertRaises(errors.ConsistencyError, self.validate._value, self.schema.node(ns.bsfs.Node), + ast.filter.Equals('hello world')) + self.assertRaises(errors.ConsistencyError, self.validate._value, self.schema.node(ns.bsfs.Node), + ast.filter.Substring('hello world')) + self.assertRaises(errors.ConsistencyError, self.validate._value, self.schema.node(ns.bsfs.Node), + ast.filter.StartsWith('hello world')) + self.assertRaises(errors.ConsistencyError, self.validate._value, self.schema.node(ns.bsfs.Node), + ast.filter.EndsWith('hello world')) + # type must be in the schema + self.assertRaises(errors.ConsistencyError, self.validate._value, self.schema.literal(ns.bsfs.Literal).get_child(ns.bsfs.Invalid), + ast.filter.Equals('hello world')) + self.assertRaises(errors.ConsistencyError, self.validate._value, self.schema.literal(ns.bsfs.Literal).get_child(ns.bsfs.Invalid), + ast.filter.Substring('hello world')) + self.assertRaises(errors.ConsistencyError, self.validate._value, self.schema.literal(ns.bsfs.Literal).get_child(ns.bsfs.Invalid), + ast.filter.StartsWith('hello world')) + self.assertRaises(errors.ConsistencyError, self.validate._value, self.schema.literal(ns.bsfs.Literal).get_child(ns.bsfs.Invalid), + ast.filter.EndsWith('hello world')) + # value accepts correct expressions + self.assertIsNone(self.validate._value(self.schema.literal(ns.xsd.string), ast.filter.Equals('hello world'))) + self.assertIsNone(self.validate._value(self.schema.literal(ns.xsd.string), ast.filter.Substring('hello world'))) + self.assertIsNone(self.validate._value(self.schema.literal(ns.xsd.string), ast.filter.StartsWith('hello world'))) + self.assertIsNone(self.validate._value(self.schema.literal(ns.xsd.string), ast.filter.EndsWith('hello world'))) + + def test_bounded(self): + # type must be literal + self.assertRaises(errors.ConsistencyError, self.validate._bounded, self.schema.node(ns.bsfs.Node), + ast.filter.GreaterThan(0)) + self.assertRaises(errors.ConsistencyError, self.validate._bounded, self.schema.node(ns.bsfs.Node), + ast.filter.LessThan(0)) + # type must be in the schema + self.assertRaises(errors.ConsistencyError, self.validate._bounded, self.schema.literal(ns.bsfs.Literal).get_child(ns.bsfs.Invalid), + ast.filter.GreaterThan(0)) + self.assertRaises(errors.ConsistencyError, self.validate._bounded, self.schema.literal(ns.bsfs.Literal).get_child(ns.bsfs.Invalid), + ast.filter.LessThan(0)) + # bounded accepts correct expressions + self.assertIsNone(self.validate._bounded(self.schema.literal(ns.xsd.integer), ast.filter.LessThan(0))) + self.assertIsNone(self.validate._bounded(self.schema.literal(ns.xsd.integer), ast.filter.GreaterThan(0))) + + + +## main ## + +if __name__ == '__main__': + unittest.main() + +## EOF ## diff --git a/test/triple_store/sparql/__init__.py b/test/triple_store/sparql/__init__.py new file mode 100644 index 0000000..e69de29 --- /dev/null +++ b/test/triple_store/sparql/__init__.py diff --git a/test/triple_store/sparql/test_parse_filter.py b/test/triple_store/sparql/test_parse_filter.py new file mode 100644 index 0000000..bd19803 --- /dev/null +++ b/test/triple_store/sparql/test_parse_filter.py @@ -0,0 +1,727 @@ +""" + +Part of the bsfs test suite. +A copy of the license is provided with the project. +Author: Matthias Baumgartner, 2022 +""" +# imports +import rdflib +import unittest + +# bsie imports +from bsfs import schema as _schema +from bsfs.namespace import ns +from bsfs.query import ast +from bsfs.utils import errors + +# objects to test +from bsfs.triple_store.sparql.parse_filter import Filter + + +## code ## + +class TestParseFilter(unittest.TestCase): + def setUp(self): + # schema + self.schema = _schema.Schema.from_string(''' + prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#> + prefix xsd: <http://www.w3.org/2001/XMLSchema#> + + prefix bsfs: <http://bsfs.ai/schema/> + prefix bse: <http://bsfs.ai/schema/Entity#> + + bsfs:Entity rdfs:subClassOf bsfs:Node . + bsfs:Image rdfs:subClassOf bsfs:Entity . + bsfs:Tag rdfs:subClassOf bsfs:Node . + + xsd:string rdfs:subClassOf bsfs:Literal . + xsd:integer rdfs:subClassOf bsfs:Literal . + bsfs:URI rdfs:subClassOf bsfs:Literal . + + bse:comment rdfs:subClassOf bsfs:Predicate ; + rdfs:domain bsfs:Node ; + rdfs:range xsd:string ; + bsfs:unique "false"^^xsd:boolean . + + bse:filesize rdfs:subClassOf bsfs:Predicate ; + rdfs:domain bsfs:Entity ; + rdfs:range xsd:integer ; + bsfs:unique "true"^^xsd:boolean . + + bse:buddy rdfs:subClassOf bsfs:Predicate ; + rdfs:domain bsfs:Entity ; + rdfs:range bsfs:Node ; + bsfs:unique "false"^^xsd:boolean . + + bse:tag rdfs:subClassOf bsfs:Predicate ; + rdfs:domain bsfs:Entity ; + rdfs:range bsfs:Tag ; + bsfs:unique "false"^^xsd:boolean . + + bse:representative rdfs:subClassOf bsfs:Predicate ; + rdfs:domain bsfs:Tag ; + rdfs:range bsfs:Image ; + bsfs:unique "false"^^xsd:boolean . + + bse:iso rdfs:subClassOf bsfs:Predicate ; + rdfs:domain bsfs:Image ; + rdfs:range xsd:integer ; + bsfs:unique "true"^^xsd:boolean . + + ''') + + # parser instance + self.parser = Filter(self.schema) + + # graph to test queries + self.graph = rdflib.Graph() + # schema hierarchies + self.graph.add((rdflib.URIRef('http://bsfs.ai/schema/Entity'), rdflib.RDFS.subClassOf, rdflib.URIRef('http://bsfs.ai/schema/Node'))) + self.graph.add((rdflib.URIRef('http://bsfs.ai/schema/Image'), rdflib.RDFS.subClassOf, rdflib.URIRef('http://bsfs.ai/schema/Entity'))) + self.graph.add((rdflib.URIRef('http://bsfs.ai/schema/Tag'), rdflib.RDFS.subClassOf, rdflib.URIRef('http://bsfs.ai/schema/Node'))) + # entities + self.graph.add((rdflib.URIRef('http://example.com/entity#1234'), rdflib.RDF.type, rdflib.URIRef('http://bsfs.ai/schema/Entity'))) + self.graph.add((rdflib.URIRef('http://example.com/entity#4321'), rdflib.RDF.type, rdflib.URIRef('http://bsfs.ai/schema/Entity'))) + # tags + self.graph.add((rdflib.URIRef('http://example.com/tag#1234'), rdflib.RDF.type, rdflib.URIRef('http://bsfs.ai/schema/Tag'))) + self.graph.add((rdflib.URIRef('http://example.com/tag#4321'), rdflib.RDF.type, rdflib.URIRef('http://bsfs.ai/schema/Tag'))) + # images + self.graph.add((rdflib.URIRef('http://example.com/image#1234'), rdflib.RDF.type, rdflib.URIRef('http://bsfs.ai/schema/Image'))) + self.graph.add((rdflib.URIRef('http://example.com/image#4321'), rdflib.RDF.type, rdflib.URIRef('http://bsfs.ai/schema/Image'))) + # node comments + self.graph.add((rdflib.URIRef('http://example.com/entity#1234'), rdflib.URIRef(ns.bse.comment), rdflib.Literal('Me, Myself, and I', datatype=rdflib.XSD.string))) + self.graph.add((rdflib.URIRef('http://example.com/entity#1234'), rdflib.URIRef(ns.bse.comment), rdflib.Literal('hello world', datatype=rdflib.XSD.string))) + self.graph.add((rdflib.URIRef('http://example.com/entity#4321'), rdflib.URIRef(ns.bse.comment), rdflib.Literal('hello world', datatype=rdflib.XSD.string))) + self.graph.add((rdflib.URIRef('http://example.com/image#1234'), rdflib.URIRef(ns.bse.comment), rdflib.Literal('Me, Myself, and I', datatype=rdflib.XSD.string))) + self.graph.add((rdflib.URIRef('http://example.com/tag#1234'), rdflib.URIRef(ns.bse.comment), rdflib.Literal('Me, Myself, and I', datatype=rdflib.XSD.string))) + self.graph.add((rdflib.URIRef('http://example.com/tag#4321'), rdflib.URIRef(ns.bse.comment), rdflib.Literal('4321', datatype=rdflib.XSD.string))) + # entity filesizes + self.graph.add((rdflib.URIRef('http://example.com/entity#1234'), rdflib.URIRef(ns.bse.filesize), rdflib.Literal(1234, datatype=rdflib.XSD.integer))) + self.graph.add((rdflib.URIRef('http://example.com/entity#4321'), rdflib.URIRef(ns.bse.filesize), rdflib.Literal(4321, datatype=rdflib.XSD.integer))) + self.graph.add((rdflib.URIRef('http://example.com/image#1234'), rdflib.URIRef(ns.bse.filesize), rdflib.Literal(1234, datatype=rdflib.XSD.integer))) + self.graph.add((rdflib.URIRef('http://example.com/image#4321'), rdflib.URIRef(ns.bse.filesize), rdflib.Literal(4321, datatype=rdflib.XSD.integer))) + # entity tags + self.graph.add((rdflib.URIRef('http://example.com/entity#1234'), rdflib.URIRef(ns.bse.tag), rdflib.URIRef('http://example.com/tag#1234'))) + self.graph.add((rdflib.URIRef('http://example.com/entity#4321'), rdflib.URIRef(ns.bse.tag), rdflib.URIRef('http://example.com/tag#4321'))) + self.graph.add((rdflib.URIRef('http://example.com/image#1234'), rdflib.URIRef(ns.bse.tag), rdflib.URIRef('http://example.com/tag#1234'))) + # tag representatives + self.graph.add((rdflib.URIRef('http://example.com/tag#1234'), rdflib.URIRef(ns.bse.representative), rdflib.URIRef('http://example.com/image#1234'))) + self.graph.add((rdflib.URIRef('http://example.com/tag#4321'), rdflib.URIRef(ns.bse.representative), rdflib.URIRef('http://example.com/image#4321'))) + # entity buddies + self.graph.add((rdflib.URIRef('http://example.com/entity#1234'), rdflib.URIRef(ns.bse.buddy), rdflib.URIRef('http://example.com/image#1234'))) + self.graph.add((rdflib.URIRef('http://example.com/entity#4321'), rdflib.URIRef(ns.bse.buddy), rdflib.URIRef('http://example.com/image#4321'))) + # image iso + self.graph.add((rdflib.URIRef('http://example.com/image#1234'), rdflib.URIRef(ns.bse.iso), rdflib.Literal(1234, datatype=rdflib.XSD.integer))) + self.graph.add((rdflib.URIRef('http://example.com/image#4321'), rdflib.URIRef(ns.bse.iso), rdflib.Literal(4321, datatype=rdflib.XSD.integer))) + + + def test_routing(self): + self.assertRaises(errors.BackendError, self.parser._parse_filter_expression, '1234', None, '') + self.assertRaises(errors.BackendError, self.parser._parse_predicate_expression, '1234', None) + + def test_call(self): + # NOTE: The individual ast components are considered in the respective tests. Here, we test __call__ specifics. + + # __call__ requires a valid root type + self.assertRaises(errors.BackendError, self.parser, self.schema.literal(ns.bsfs.Literal), None) + self.assertRaises(errors.ConsistencyError, self.parser, self.schema.node(ns.bsfs.Node).get_child(ns.bsfs.Invalid), None) + # __call__ requires a parseable root + self.assertRaises(errors.BackendError, self.parser, self.schema.node(ns.bsfs.Entity), ast.filter.FilterExpression()) + # __call__ returns an executable query + q = self.parser(self.schema.node(ns.bsfs.Entity), + ast.filter.Or( + ast.filter.Is('http://example.com/entity#1234'), + ast.filter.Is('http://example.com/entity#5678'))) + self.assertSetEqual({str(guid) for guid, in self.graph.query(q)}, {'http://example.com/entity#1234'}) + # root is optional + q = self.parser(self.schema.node(ns.bsfs.Entity)) + self.assertSetEqual({str(guid) for guid, in self.graph.query(q)}, + {'http://example.com/entity#1234', 'http://example.com/entity#4321', 'http://example.com/image#1234', 'http://example.com/image#4321'}) + q = self.parser(self.schema.node(ns.bsfs.Tag)) + self.assertSetEqual({str(guid) for guid, in self.graph.query(q)}, + {'http://example.com/tag#1234', 'http://example.com/tag#4321'}) + + + def test_is(self): + # _is requires a node + self.assertRaises(errors.BackendError, self.parser._is, self.schema.literal(ns.bsfs.Literal), ast.filter.Is('http://example.com/entity#1234'), '?ent') + # a single Is statement + q = self.parser(self.schema.node(ns.bsfs.Entity), ast.filter.Is('http://example.com/entity#1234')) + self.assertSetEqual({str(guid) for guid, in self.graph.query(q)}, + {'http://example.com/entity#1234'}) + # an aggregate of Is statements + q = self.parser(self.schema.node(ns.bsfs.Entity), + ast.filter.Or( + ast.filter.Is('http://example.com/entity#1234'), + ast.filter.Is('http://example.com/entity#4321'), + )) + self.assertSetEqual({str(guid) for guid, in self.graph.query(q)}, + {'http://example.com/entity#1234', 'http://example.com/entity#4321'}) + # combined with other filters + q = self.parser(self.schema.node(ns.bsfs.Entity), + ast.filter.And( + ast.filter.Or( + ast.filter.Is('http://example.com/entity#1234'), + ast.filter.Is('http://example.com/entity#4321'), + ), + ast.filter.Any(ns.bse.comment, + ast.filter.Equals('Me, Myself, and I') + ), + )) + self.assertSetEqual({str(guid) for guid, in self.graph.query(q)}, + {'http://example.com/entity#1234'}) + # as argument of Any/All + q = self.parser(self.schema.node(ns.bsfs.Entity), + ast.filter.Any(ns.bse.tag, ast.filter.Is('http://example.com/tag#1234'))) + self.assertSetEqual({str(guid) for guid, in self.graph.query(q)}, + {'http://example.com/entity#1234', 'http://example.com/image#1234'}) + + + def test_equals(self): + # _equals requires a literal + self.assertRaises(errors.BackendError, self.parser._equals, self.schema.node(ns.bsfs.Entity), ast.filter.Equals('hello world'), '?ent') + # a single Equals statement + q = self.parser(self.schema.node(ns.bsfs.Entity), ast.filter.Any(ns.bse.comment, ast.filter.Equals('hello world'))) + self.assertSetEqual({str(guid) for guid, in self.graph.query(q)}, + {'http://example.com/entity#1234', 'http://example.com/entity#4321'}) + # a single Equals statement that includes subtypes + q = self.parser(self.schema.node(ns.bsfs.Entity), ast.filter.Any(ns.bse.comment, ast.filter.Equals('Me, Myself, and I'))) + self.assertSetEqual({str(guid) for guid, in self.graph.query(q)}, + {'http://example.com/entity#1234', 'http://example.com/image#1234'}) + # an Equals statement on an integer + q = self.parser(self.schema.node(ns.bsfs.Entity), ast.filter.Any(ns.bse.filesize, ast.filter.Equals(4321))) + self.assertSetEqual({str(guid) for guid, in self.graph.query(q)}, + {'http://example.com/entity#4321', 'http://example.com/image#4321'}) + + + def test_substring(self): + # _substring requires a literal + self.assertRaises(errors.BackendError, self.parser._substring, self.schema.node(ns.bsfs.Entity), ast.filter.Substring('hello world'), '?ent') + # a single Substring statement + q = self.parser(self.schema.node(ns.bsfs.Entity), ast.filter.Any(ns.bse.comment, ast.filter.Substring('hello'))) + self.assertSetEqual({str(guid) for guid, in self.graph.query(q)}, + {'http://example.com/entity#1234', 'http://example.com/entity#4321'}) + q = self.parser(self.schema.node(ns.bsfs.Entity), ast.filter.Any(ns.bse.comment, ast.filter.Substring('lo wo'))) + self.assertSetEqual({str(guid) for guid, in self.graph.query(q)}, + {'http://example.com/entity#1234', 'http://example.com/entity#4321'}) + # a single Substring statement that includes subtypes + q = self.parser(self.schema.node(ns.bsfs.Entity), ast.filter.Any(ns.bse.comment, ast.filter.Substring('Myself'))) + self.assertSetEqual({str(guid) for guid, in self.graph.query(q)}, + {'http://example.com/entity#1234', 'http://example.com/image#1234'}) + # an Substring statement on an integer + q = self.parser(self.schema.node(ns.bsfs.Entity), ast.filter.Any(ns.bse.filesize, ast.filter.Substring('32'))) + self.assertSetEqual({str(guid) for guid, in self.graph.query(q)}, + {'http://example.com/entity#4321', 'http://example.com/image#4321'}) + + + def test_starts_with(self): + # _starts_with requires a literal + self.assertRaises(errors.BackendError, self.parser._starts_with, self.schema.node(ns.bsfs.Entity), ast.filter.StartsWith('hello world'), '?ent') + # a single StartsWith statement + q = self.parser(self.schema.node(ns.bsfs.Entity), ast.filter.Any(ns.bse.comment, ast.filter.StartsWith('hello'))) + self.assertSetEqual({str(guid) for guid, in self.graph.query(q)}, + {'http://example.com/entity#1234', 'http://example.com/entity#4321'}) + # a single StartsWith statement that includes subtypes + q = self.parser(self.schema.node(ns.bsfs.Entity), ast.filter.Any(ns.bse.comment, ast.filter.StartsWith('Me, Mys'))) + self.assertSetEqual({str(guid) for guid, in self.graph.query(q)}, + {'http://example.com/entity#1234', 'http://example.com/image#1234'}) + # an StartsWith statement on an integer + q = self.parser(self.schema.node(ns.bsfs.Entity), ast.filter.Any(ns.bse.filesize, ast.filter.StartsWith(432))) + self.assertSetEqual({str(guid) for guid, in self.graph.query(q)}, + {'http://example.com/entity#4321', 'http://example.com/image#4321'}) + + + def test_ends_with(self): + # _ends_with requires a literal + self.assertRaises(errors.BackendError, self.parser._ends_with, self.schema.node(ns.bsfs.Entity), ast.filter.EndsWith('hello world'), '?ent') + # a single EndsWith statement + q = self.parser(self.schema.node(ns.bsfs.Entity), ast.filter.Any(ns.bse.comment, ast.filter.EndsWith('orld'))) + self.assertSetEqual({str(guid) for guid, in self.graph.query(q)}, + {'http://example.com/entity#1234', 'http://example.com/entity#4321'}) + # a single EndsWith statement that includes subtypes + q = self.parser(self.schema.node(ns.bsfs.Entity), ast.filter.Any(ns.bse.comment, ast.filter.EndsWith('and I'))) + self.assertSetEqual({str(guid) for guid, in self.graph.query(q)}, + {'http://example.com/entity#1234', 'http://example.com/image#1234'}) + # an EndsWith statement on an integer + q = self.parser(self.schema.node(ns.bsfs.Entity), ast.filter.Any(ns.bse.filesize, ast.filter.EndsWith(321))) + self.assertSetEqual({str(guid) for guid, in self.graph.query(q)}, + {'http://example.com/entity#4321', 'http://example.com/image#4321'}) + + + def test_less_than(self): + # _less_than requires a literal + self.assertRaises(errors.BackendError, self.parser._less_than, self.schema.node(ns.bsfs.Entity), ast.filter.LessThan(2000), '?ent') + # a single LessThan statement + q = self.parser(self.schema.node(ns.bsfs.Image), ast.filter.Any(ns.bse.iso, ast.filter.LessThan(2000))) + self.assertSetEqual({str(guid) for guid, in self.graph.query(q)}, + {'http://example.com/image#1234'}) + # _less_than respects boundary + q = self.parser(self.schema.node(ns.bsfs.Image), ast.filter.Any(ns.bse.iso, ast.filter.LessThan(1234, strict=True))) + self.assertSetEqual({str(guid) for guid, in self.graph.query(q)}, set()) + q = self.parser(self.schema.node(ns.bsfs.Image), ast.filter.Any(ns.bse.iso, ast.filter.LessThan(1234, strict=False))) + self.assertSetEqual({str(guid) for guid, in self.graph.query(q)}, + {'http://example.com/image#1234'}) + # a single LessThan statement that includes subtypes + q = self.parser(self.schema.node(ns.bsfs.Entity), ast.filter.Any(ns.bse.filesize, ast.filter.LessThan(2000))) + self.assertSetEqual({str(guid) for guid, in self.graph.query(q)}, + {'http://example.com/entity#1234', 'http://example.com/image#1234'}) + # an LessThan statement on a string + # always negative; note that http://example.com/tag#4321 is also not returned although its comment is a pure number + q = self.parser(self.schema.node(ns.bsfs.Entity), ast.filter.Any(ns.bse.comment, ast.filter.LessThan(10_000))) + self.assertSetEqual({str(guid) for guid, in self.graph.query(q)}, set()) + + + def test_greater_than(self): + # _greater_than requires a literal + self.assertRaises(errors.BackendError, self.parser._greater_than, self.schema.node(ns.bsfs.Entity), ast.filter.GreaterThan(2000), '?ent') + # a single GreaterThan statement + q = self.parser(self.schema.node(ns.bsfs.Image), ast.filter.Any(ns.bse.iso, ast.filter.GreaterThan(2000))) + self.assertSetEqual({str(guid) for guid, in self.graph.query(q)}, + {'http://example.com/image#4321'}) + # _greater_than respects boundary + q = self.parser(self.schema.node(ns.bsfs.Image), ast.filter.Any(ns.bse.iso, ast.filter.GreaterThan(4321, strict=True))) + self.assertSetEqual({str(guid) for guid, in self.graph.query(q)}, set()) + q = self.parser(self.schema.node(ns.bsfs.Image), ast.filter.Any(ns.bse.iso, ast.filter.GreaterThan(4321, strict=False))) + self.assertSetEqual({str(guid) for guid, in self.graph.query(q)}, + {'http://example.com/image#4321'}) + # a single GreaterThan statement that includes subtypes + q = self.parser(self.schema.node(ns.bsfs.Entity), ast.filter.Any(ns.bse.filesize, ast.filter.GreaterThan(2000))) + self.assertSetEqual({str(guid) for guid, in self.graph.query(q)}, + {'http://example.com/entity#4321', 'http://example.com/image#4321'}) + # an GreaterThan statement on a string + # always positive + q = self.parser(self.schema.node(ns.bsfs.Entity), ast.filter.Any(ns.bse.comment, ast.filter.GreaterThan(0))) + self.assertSetEqual({str(guid) for guid, in self.graph.query(q)}, + {'http://example.com/entity#1234', 'http://example.com/entity#4321', 'http://example.com/image#1234'}) + + + def test_and(self): + # And childs have to match the node type + self.assertRaises(errors.BackendError, self.parser, + self.schema.node(ns.bsfs.Entity), + ast.filter.And( + ast.filter.StartsWith('hello'), + ast.filter.EndsWith('world'), + )) + # no child produces an empty query + self.assertEqual(self.parser._and( + self.schema.node(ns.bsfs.Entity), + ast.filter.And(), '?ent'), '') + # And can mix different conditions + q = self.parser(self.schema.node(ns.bsfs.Entity), + ast.filter.And( + ast.filter.Is('http://example.com/entity#1234'), + ast.filter.Any(ns.bse.filesize, ast.filter.Equals(1234)), + ast.filter.Any(ns.bse.comment, ast.filter.Equals('Me, Myself, and I')), + )) + self.assertSetEqual({str(guid) for guid, in self.graph.query(q)}, + {'http://example.com/entity#1234'}) + # all conditions have to match + q = self.parser(self.schema.node(ns.bsfs.Entity), + ast.filter.And( + ast.filter.Is('http://example.com/entity#4321'), + ast.filter.Any(ns.bse.filesize, ast.filter.Equals(1234)), + ast.filter.Any(ns.bse.comment, ast.filter.Equals('Me, Myself, and I')), + )) + self.assertSetEqual({str(guid) for guid, in self.graph.query(q)}, set()) + q = self.parser(self.schema.node(ns.bsfs.Entity), + ast.filter.And( + ast.filter.Is('http://example.com/entity#1234'), + ast.filter.Any(ns.bse.filesize, ast.filter.Equals(4321)), + ast.filter.Any(ns.bse.comment, ast.filter.Equals('Me, Myself, and I')), + )) + self.assertSetEqual({str(guid) for guid, in self.graph.query(q)}, set()) + q = self.parser(self.schema.node(ns.bsfs.Entity), + ast.filter.And( + ast.filter.Is('http://example.com/entity#1234'), + ast.filter.Any(ns.bse.filesize, ast.filter.Equals(1234)), + ast.filter.Any(ns.bse.comment, ast.filter.Equals('foobar')), + )) + self.assertSetEqual({str(guid) for guid, in self.graph.query(q)}, set()) + # And can be nested + q = self.parser(self.schema.node(ns.bsfs.Entity), + ast.filter.And( + ast.filter.Is('http://example.com/entity#1234'), + ast.filter.And( + ast.filter.Any(ns.bse.filesize, ast.filter.Equals(1234)), + ast.filter.Any(ns.bse.comment, ast.filter.Equals('Me, Myself, and I')), + ), + )) + self.assertSetEqual({str(guid) for guid, in self.graph.query(q)}, + {'http://example.com/entity#1234'}) + + + def test_or(self): + # Or childs have to match the node type + self.assertRaises(errors.BackendError, self.parser, + self.schema.node(ns.bsfs.Entity), + ast.filter.Or( + ast.filter.StartsWith('hello'), + ast.filter.EndsWith('world'), + )) + # no child produces an empty query + self.assertEqual(self.parser._and( + self.schema.node(ns.bsfs.Entity), + ast.filter.Or(), '?ent'), '') + # Or can mix different conditions + q = self.parser(self.schema.node(ns.bsfs.Entity), + ast.filter.Or( + ast.filter.Is('http://example.com/entity#1234'), + ast.filter.Any(ns.bse.filesize, ast.filter.Equals(4321)), + ast.filter.Any(ns.bse.comment, ast.filter.Equals('Me, Myself, and I')), + )) + self.assertSetEqual({str(guid) for guid, in self.graph.query(q)}, + {'http://example.com/entity#1234', 'http://example.com/image#1234', 'http://example.com/entity#4321', 'http://example.com/image#4321'}) + # at least one condition has to match + q = self.parser(self.schema.node(ns.bsfs.Entity), + ast.filter.Or( + ast.filter.Is('http://example.com/entity#5678'), + ast.filter.Any(ns.bse.filesize, ast.filter.Equals(8765)), + ast.filter.Any(ns.bse.comment, ast.filter.Equals('foobar')), + )) + self.assertSetEqual({str(guid) for guid, in self.graph.query(q)}, set()) + q = self.parser(self.schema.node(ns.bsfs.Entity), + ast.filter.Or( + ast.filter.Is('http://example.com/entity#1234'), + ast.filter.Any(ns.bse.filesize, ast.filter.Equals(8765)), + ast.filter.Any(ns.bse.comment, ast.filter.Equals('foobar')), + )) + self.assertSetEqual({str(guid) for guid, in self.graph.query(q)}, + {'http://example.com/entity#1234'}) + q = self.parser(self.schema.node(ns.bsfs.Entity), + ast.filter.Or( + ast.filter.Is('http://example.com/entity#5678'), + ast.filter.Any(ns.bse.filesize, ast.filter.Equals(4321)), + ast.filter.Any(ns.bse.comment, ast.filter.Equals('foobar')), + )) + self.assertSetEqual({str(guid) for guid, in self.graph.query(q)}, + {'http://example.com/entity#4321', 'http://example.com/image#4321'}) + q = self.parser(self.schema.node(ns.bsfs.Entity), + ast.filter.Or( + ast.filter.Is('http://example.com/entity#5678'), + ast.filter.Any(ns.bse.filesize, ast.filter.Equals(8765)), + ast.filter.Any(ns.bse.comment, ast.filter.Equals('Me, Myself, and I')), + )) + self.assertSetEqual({str(guid) for guid, in self.graph.query(q)}, + {'http://example.com/entity#1234', 'http://example.com/image#1234'}) + # Or can be nested + q = self.parser(self.schema.node(ns.bsfs.Entity), + ast.filter.Or( + ast.filter.Is('http://example.com/entity#1234'), + ast.filter.Or( + ast.filter.Any(ns.bse.filesize, ast.filter.Equals(4321)), + ast.filter.Any(ns.bse.comment, ast.filter.Equals('Me, Myself, and I')), + ), + )) + self.assertSetEqual({str(guid) for guid, in self.graph.query(q)}, + {'http://example.com/entity#1234', 'http://example.com/image#1234', 'http://example.com/entity#4321', 'http://example.com/image#4321'}) + + + + def test_any(self): + # _any requires a node + self.assertRaises(errors.BackendError, self.parser._any, + self.schema.literal(ns.bsfs.Literal), + ast.filter.Any(ns.bse.filesize, ast.filter.Equals(1234)), '?ent') + # node type must match predicate's domain + self.assertRaises(errors.ConsistencyError, self.parser._any, + self.schema.node(ns.bsfs.Tag), + ast.filter.Any(ns.bse.filesize, ast.filter.Equals(1234)), '?ent') + # predicate must be valid + self.assertRaises(errors.ConsistencyError, self.parser._any, + self.schema.node(ns.bsfs.Entity), + ast.filter.Any(ns.bse.invalid, ast.filter.Equals(1234)), '?ent') + # _any returns a valid query + q = self.parser(self.schema.node(ns.bsfs.Entity), + ast.filter.Any(ns.bse.filesize, ast.filter.Equals(1234))) + self.assertSetEqual({str(guid) for guid, in self.graph.query(q)}, + {'http://example.com/entity#1234', 'http://example.com/image#1234'}) + # _any can be nested + q = self.parser(self.schema.node(ns.bsfs.Entity), + ast.filter.Any(ns.bse.tag, + ast.filter.Any(ns.bse.representative, + ast.filter.Is('http://example.com/image#1234')))) + self.assertSetEqual({str(guid) for guid, in self.graph.query(q)}, + {'http://example.com/entity#1234', 'http://example.com/image#1234'}) + + + def test_all(self): + # All requires a Node + self.assertRaises(errors.BackendError, self.parser._all, self.schema.literal(ns.bsfs.Literal), None, '') + # All Nodes + q = self.parser(self.schema.node(ns.bsfs.Entity), + ast.filter.All(ns.bse.tag, ast.filter.Is('http://example.com/tag#1234'))) + self.assertSetEqual({str(guid) for guid, in self.graph.query(q)}, + {'http://example.com/entity#1234', 'http://example.com/image#1234'}) + # All values + q = self.parser(self.schema.node(ns.bsfs.Entity), + ast.filter.All(ns.bse.comment, ast.filter.Equals('hello world'))) + self.assertSetEqual({str(guid) for guid, in self.graph.query(q)}, + {'http://example.com/entity#4321'}) + # All on value within Or branch + # entity#1234 is selected because all of its comments are in ("hello world", "Me, Myself, and I") + q = self.parser(self.schema.node(ns.bsfs.Entity), + ast.filter.All(ns.bse.comment, ast.filter.Or( + ast.filter.Equals('hello world'), + ast.filter.Equals('Me, Myself, and I')))) + self.assertSetEqual({str(guid) for guid, in self.graph.query(q)}, + {'http://example.com/entity#1234', 'http://example.com/entity#4321', 'http://example.com/image#1234'}) + # All requires at least one predicate/value + q = self.parser(self.schema.node(ns.bsfs.Entity), + ast.filter.All(ns.bse.comment, ast.filter.Equals('Me, Myself, and I'))) + self.assertSetEqual({str(guid) for guid, in self.graph.query(q)}, + {'http://example.com/image#1234'}) + # All within a statement + q = self.parser(self.schema.node(ns.bsfs.Entity), + ast.filter.And( + ast.filter.All(ns.bse.tag, ast.filter.Is('http://example.com/tag#1234')), # entity#1234, image#1234 + ast.filter.All(ns.bse.comment, ast.filter.Or( # entity#1234, entity#4321, image#1234 + ast.filter.Equals('hello world'), + ast.filter.Equals('Me, Myself, and I'), + )) + ) + ) + self.assertSetEqual({str(guid) for guid, in self.graph.query(q)}, + {'http://example.com/entity#1234', 'http://example.com/image#1234'}) + # All with reversed Predicate + q = self.parser(self.schema.node(ns.bsfs.Tag), + ast.filter.All(ast.filter.Predicate(ns.bse.tag, reverse=True), ast.filter.Is('http://example.com/entity#4321'))) + self.assertSetEqual({str(guid) for guid, in self.graph.query(q)}, + {'http://example.com/tag#4321'}) + # All with multiple predicates + q = self.parser(self.schema.node(ns.bsfs.Entity), + ast.filter.All(ast.filter.OneOf(ns.bse.tag, ns.bse.buddy), # entity#1234 (tag:tag#1234), entity#1234 (buddy:image#1234), image#1234(tag:tag#1234) + ast.filter.Any(ns.bse.comment, ast.filter.Equals('Me, Myself, and I')))) # entity#1234, image#1234, tag#1234 + self.assertSetEqual({str(guid) for guid, in self.graph.query(q)}, + {'http://example.com/entity#1234', 'http://example.com/image#1234'}) + + + + def test_not(self): + # Not applies on conditions + q = self.parser(self.schema.node(ns.bsfs.Entity), + ast.filter.Not(ast.filter.Is('http://example.com/entity#1234'))) + self.assertSetEqual({str(guid) for guid, in self.graph.query(q)}, + {'http://example.com/image#1234', 'http://example.com/entity#4321', 'http://example.com/image#4321'}) + # Not applies on conditions within branches + q = self.parser(self.schema.node(ns.bsfs.Entity), + ast.filter.Any(ns.bse.comment, ast.filter.Not(ast.filter.Equals('Me, Myself, and I')))) + self.assertSetEqual({str(guid) for guid, in self.graph.query(q)}, + {'http://example.com/entity#1234', 'http://example.com/entity#4321'}) + # Not applies on branches + q = self.parser(self.schema.node(ns.bsfs.Entity), + ast.filter.Not(ast.filter.Any(ns.bse.comment, ast.filter.Equals('Me, Myself, and I')))) + self.assertSetEqual({str(guid) for guid, in self.graph.query(q)}, + {'http://example.com/entity#4321', 'http://example.com/image#4321'}) + # Double Not cancel each other + q = self.parser(self.schema.node(ns.bsfs.Entity), + ast.filter.Not(ast.filter.Not(ast.filter.Is('http://example.com/entity#1234')))) + self.assertSetEqual({str(guid) for guid, in self.graph.query(q)}, + {'http://example.com/entity#1234'}) + # Not works within aggregation (and) + q = self.parser(self.schema.node(ns.bsfs.Entity), + ast.filter.And( + ast.filter.Not(ast.filter.Is('http://example.com/entity#1234')), + ast.filter.Any(ns.bse.comment, ast.filter.Equals('hello world')), + )) + self.assertSetEqual({str(guid) for guid, in self.graph.query(q)}, + {'http://example.com/entity#4321'}) + # Not works within aggregation (or) + q = self.parser(self.schema.node(ns.bsfs.Entity), + ast.filter.Or( + ast.filter.Not(ast.filter.Is('http://example.com/entity#1234')), + ast.filter.Any(ns.bse.comment, ast.filter.Equals('Me, Myself, and I')), + )) + self.assertSetEqual({str(guid) for guid, in self.graph.query(q)}, + {'http://example.com/entity#1234', 'http://example.com/entity#4321', 'http://example.com/image#1234', 'http://example.com/image#4321'}) + # Not works outside aggregation (and) + q = self.parser(self.schema.node(ns.bsfs.Entity), + ast.filter.Not( + ast.filter.And( + ast.filter.Is('http://example.com/entity#1234'), + ast.filter.Any(ns.bse.comment, ast.filter.Equals('hello world')), + ))) + self.assertSetEqual({str(guid) for guid, in self.graph.query(q)}, + {'http://example.com/entity#4321', 'http://example.com/image#1234', 'http://example.com/image#4321'}) + # Not works outside aggregation (or) + q = self.parser(self.schema.node(ns.bsfs.Entity), + ast.filter.Not( + ast.filter.Or( + ast.filter.Is('http://example.com/entity#4321'), + ast.filter.Any(ns.bse.comment, ast.filter.Equals('Me, Myself, and I')), + ))) + self.assertSetEqual({str(guid) for guid, in self.graph.query(q)}, + {'http://example.com/image#4321'}) + # Not mixed with branch, aggregation, id, and value + q = self.parser(self.schema.node(ns.bsfs.Entity), + ast.filter.And( + ast.filter.Not( # image#1234, image#4321 + ast.filter.Or( # entity#4321, entity#1234 + ast.filter.Is('http://example.com/entity#4321'), + ast.filter.Any(ns.bse.comment, ast.filter.Equals('hello world')), + ) + ), + ast.filter.Any(ns.bse.comment, ast.filter.Not(ast.filter.Equals('foobar'))), # entity#1234, entity#4321, image#1234 + )) + self.assertSetEqual({str(guid) for guid, in self.graph.query(q)}, + {'http://example.com/image#1234'}) + + + def test_has(self): + # Has requires Node + self.assertRaises(errors.BackendError, self.parser._has, self.schema.literal(ns.bsfs.Literal), None, '') + # Has with GreaterThan constraint + q = self.parser(self.schema.node(ns.bsfs.Entity), + ast.filter.Has(ns.bse.comment, ast.filter.GreaterThan(0))) + self.assertSetEqual({str(guid) for guid, in self.graph.query(q)}, + {'http://example.com/entity#1234', 'http://example.com/entity#4321', 'http://example.com/image#1234'}) + q = self.parser(self.schema.node(ns.bsfs.Entity), + ast.filter.Has(ns.bse.comment, ast.filter.GreaterThan(1))) + self.assertSetEqual({str(guid) for guid, in self.graph.query(q)}, + {'http://example.com/entity#1234'}) + # Has with Equals constraint + q = self.parser(self.schema.node(ns.bsfs.Entity), + ast.filter.Has(ns.bse.comment, 1)) + self.assertSetEqual({str(guid) for guid, in self.graph.query(q)}, + {'http://example.com/entity#4321', 'http://example.com/image#1234'}) + # Has with LessThan constraint + q = self.parser(self.schema.node(ns.bsfs.Entity), + ast.filter.Has(ns.bse.comment, ast.filter.LessThan(2))) + self.assertSetEqual({str(guid) for guid, in self.graph.query(q)}, + {'http://example.com/entity#4321', 'http://example.com/image#1234', 'http://example.com/image#4321'}) + # Has with multiple constraints + self.graph.add((rdflib.URIRef('http://example.com/entity#1234'), rdflib.URIRef(ns.bse.comment), rdflib.Literal('extra1', datatype=rdflib.XSD.string))) + self.graph.add((rdflib.URIRef('http://example.com/entity#1234'), rdflib.URIRef(ns.bse.comment), rdflib.Literal('extra2', datatype=rdflib.XSD.string))) + self.graph.add((rdflib.URIRef('http://example.com/entity#1234'), rdflib.URIRef(ns.bse.comment), rdflib.Literal('extra3', datatype=rdflib.XSD.string))) + self.graph.add((rdflib.URIRef('http://example.com/entity#1234'), rdflib.URIRef(ns.bse.comment), rdflib.Literal('extra4', datatype=rdflib.XSD.string))) + self.graph.add((rdflib.URIRef('http://example.com/entity#1234'), rdflib.URIRef(ns.bse.comment), rdflib.Literal('extra5', datatype=rdflib.XSD.string))) + self.graph.add((rdflib.URIRef('http://example.com/entity#4321'), rdflib.URIRef(ns.bse.comment), rdflib.Literal('extra1', datatype=rdflib.XSD.string))) + self.graph.add((rdflib.URIRef('http://example.com/entity#4321'), rdflib.URIRef(ns.bse.comment), rdflib.Literal('extra2', datatype=rdflib.XSD.string))) + q = self.parser(self.schema.node(ns.bsfs.Entity), ast.filter.Has(ns.bse.comment, + ast.filter.And(ast.filter.GreaterThan(1), ast.filter.LessThan(5)))) + self.assertSetEqual({str(guid) for guid, in self.graph.query(q)}, + {'http://example.com/entity#4321'}) + # Has with OneOf predicate + q = self.parser(self.schema.node(ns.bsfs.Entity), ast.filter.Has(ast.filter.OneOf(ns.bse.tag, ns.bse.buddy), + ast.filter.GreaterThan(1))) + self.assertSetEqual({str(guid) for guid, in self.graph.query(q)}, + {'http://example.com/entity#1234', 'http://example.com/entity#4321'}) + # Has with reversed predicate + q = self.parser(self.schema.node(ns.bsfs.Tag), ast.filter.Has(ast.filter.Predicate(ns.bse.tag, reverse=True), + ast.filter.GreaterThan(1))) + self.assertSetEqual({str(guid) for guid, in self.graph.query(q)}, + {'http://example.com/tag#1234'}) + + + def test_one_of(self): + # _one_of expects a node + self.assertRaises(errors.BackendError, self.parser._one_of, + self.schema.literal(ns.bsfs.Literal), + ast.filter.OneOf(ast.filter.Predicate(ns.bse.filesize))) + # invalid predicate for node type raises an error + self.assertRaises(errors.ConsistencyError, self.parser._one_of, + self.schema.node(ns.bsfs.Node), + ast.filter.OneOf(ast.filter.Predicate(ns.bse.filesize))) + self.assertRaises(errors.ConsistencyError, self.parser, + self.schema.node(ns.bsfs.Tag), + ast.filter.Any(ast.filter.OneOf(ast.filter.Predicate(ns.bse.filesize)), ast.filter.Equals(1234))) + self.assertRaises(errors.BackendError, self.parser._one_of, + self.schema.node(ns.bsfs.Node), + ast.filter.OneOf(ast.filter.Predicate(ns.bsfs.Predicate))) + # invalid predicate combinations raise an error + self.assertRaises(errors.ConsistencyError, self.parser._one_of, + self.schema.node(ns.bsfs.Node), + ast.filter.OneOf( + ast.filter.Predicate(ns.bse.filesize), + ast.filter.Predicate(ns.bse.representative))) + # _one_of returns the URI and range + q = self.parser._one_of(self.schema.node(ns.bsfs.Image), + ast.filter.OneOf( + ast.filter.Predicate(ns.bse.iso), + ast.filter.Predicate(ns.bse.filesize))) + self.assertTrue(q[0] == f'<{ns.bse.iso}>|<{ns.bse.filesize}>' or q[0] == f'<{ns.bse.filesize}>|<{ns.bse.iso}>') + self.assertEqual(q[1], self.schema.literal(ns.xsd.integer)) + # OneOf can be nested + q = self.parser._one_of(self.schema.node(ns.bsfs.Image), + ast.filter.OneOf( + ast.filter.Predicate(ns.bse.iso), + ast.filter.OneOf( + ast.filter.Predicate(ns.bse.filesize)))) + self.assertTrue(q[0] == f'<{ns.bse.iso}>|<{ns.bse.filesize}>' or q[0] == f'<{ns.bse.filesize}>|<{ns.bse.iso}>') + self.assertEqual(q[1], self.schema.literal(ns.xsd.integer)) + # _one_of returns the most generic range + q = self.parser._one_of(self.schema.node(ns.bsfs.Entity), + ast.filter.OneOf( + ast.filter.Predicate(ns.bse.tag), + ast.filter.Predicate(ns.bse.buddy))) + self.assertTrue(q[0] == f'<{ns.bse.tag}>|<{ns.bse.buddy}>' or q[0] == f'<{ns.bse.buddy}>|<{ns.bse.tag}>') + self.assertEqual(q[1], self.schema.node(ns.bsfs.Node)) + # domains must match the given type + self.assertRaises(errors.ConsistencyError, self.parser, + self.schema.node(ns.bsfs.Entity), + ast.filter.Any(ast.filter.OneOf(ns.bse.tag, ns.bse.buddy), + ast.filter.Any(ast.filter.OneOf(ns.bse.filesize), + ast.filter.Equals(1234)))) + # ranges must have the same type (Node/Literal) + self.assertRaises(errors.ConsistencyError, self.parser, + self.schema.node(ns.bsfs.Entity), + ast.filter.Any(ast.filter.OneOf(ns.bse.tag, ns.bse.filesize), + ast.filter.Equals(1234))) + # ranges must be related + self.assertRaises(errors.ConsistencyError, self.parser, + self.schema.node(ns.bsfs.Entity), + ast.filter.Any(ast.filter.OneOf(ns.bse.comment, ns.bse.filesize), + ast.filter.Equals(1234))) + # integration: _one_of returns a valid sparql query + q = self.parser(self.schema.node(ns.bsfs.Entity), + ast.filter.Any(ast.filter.OneOf(ns.bse.tag, ns.bse.buddy), + ast.filter.Any(ast.filter.OneOf(ns.bse.comment), + ast.filter.Equals('Me, Myself, and I')))) + self.assertSetEqual({str(guid) for guid, in self.graph.query(q)}, + {'http://example.com/entity#1234', 'http://example.com/image#1234'}) + + + def test_predicate(self): + # predicate cannot be the root predicate (ns.bsfs.Predicate) + self.assertRaises(errors.BackendError, self.parser._predicate, self.schema.node(ns.bsfs.Node), ast.filter.Predicate(ns.bsfs.Predicate)) + # _predicate expects a node + self.assertRaises(errors.BackendError, self.parser._predicate, + self.schema.literal(ns.bsfs.Literal), + ast.filter.Predicate(ns.bse.filesize)) + # invalid predicate for node type raises an error + self.assertRaises(errors.ConsistencyError, self.parser._predicate, + self.schema.node(ns.bsfs.Node), + ast.filter.Predicate(ns.bse.filesize)) + self.assertRaises(errors.ConsistencyError, self.parser, + self.schema.node(ns.bsfs.Tag), + ast.filter.Any(ast.filter.Predicate(ns.bse.filesize), ast.filter.Equals(1234))) + # _predicate returns the URI and range + self.assertEqual(self.parser._predicate(self.schema.node(ns.bsfs.Entity), ast.filter.Predicate(ns.bse.filesize)), + (f'<{ns.bse.filesize}>', self.schema.literal(ns.xsd.integer))) + self.assertEqual(self.parser._predicate(self.schema.node(ns.bsfs.Entity), ast.filter.Predicate(ns.bse.tag)), + (f'<{ns.bse.tag}>', self.schema.node(ns.bsfs.Tag))) + # _predicate respects reverse flag + self.assertEqual(self.parser._predicate(self.schema.node(ns.bsfs.Tag), ast.filter.Predicate(ns.bse.tag, reverse=True)), + ('^<' + ns.bse.tag + '>', self.schema.node(ns.bsfs.Entity))) + # integration: _predicate returns a valid sparql query + q = self.parser(self.schema.node(ns.bsfs.Entity), + ast.filter.Any(ns.bse.tag, + ast.filter.Any(ns.bse.representative, + ast.filter.Any(ns.bse.filesize, + ast.filter.Equals(1234))))) + self.assertSetEqual({str(guid) for guid, in self.graph.query(q)}, + {'http://example.com/entity#1234', 'http://example.com/image#1234'}) + q = self.parser(self.schema.node(ns.bsfs.Tag), + ast.filter.Any(ast.filter.Predicate(ns.bse.tag, reverse=True), + ast.filter.Any(ns.bse.filesize, + ast.filter.LessThan(2000)))) + self.assertSetEqual({str(guid) for guid, in self.graph.query(q)}, + {'http://example.com/tag#1234'}) + + +## main ## + +if __name__ == '__main__': + unittest.main() + +## EOF ## diff --git a/test/triple_store/test_sparql.py b/test/triple_store/sparql/test_sparql.py index 8d98749..3d81de1 100644 --- a/test/triple_store/test_sparql.py +++ b/test/triple_store/sparql/test_sparql.py @@ -11,10 +11,11 @@ import unittest # bsie imports from bsfs import schema as _schema from bsfs.namespace import ns +from bsfs.query import ast from bsfs.utils import errors, URI # objects to test -from bsfs.triple_store.sparql import SparqlStore +from bsfs.triple_store.sparql.sparql import SparqlStore ## code ## @@ -59,6 +60,18 @@ class TestSparqlStore(unittest.TestCase): bsfs:unique "true"^^xsd:boolean . ''') + self.schema_triples = { + # schema hierarchy + (rdflib.URIRef(ns.bsfs.Entity), rdflib.RDFS.subClassOf, rdflib.URIRef(ns.bsfs.Node)), + (rdflib.URIRef(ns.bsfs.Tag), rdflib.RDFS.subClassOf, rdflib.URIRef(ns.bsfs.Node)), + (rdflib.URIRef(ns.bsfs.User), rdflib.RDFS.subClassOf, rdflib.URIRef(ns.bsfs.Node)), + (rdflib.URIRef(ns.xsd.string), rdflib.RDFS.subClassOf, rdflib.URIRef(ns.bsfs.Literal)), + (rdflib.URIRef(ns.xsd.integer), rdflib.RDFS.subClassOf, rdflib.URIRef(ns.bsfs.Literal)), + (rdflib.URIRef(ns.bse.comment), rdflib.RDFS.subClassOf, rdflib.URIRef(ns.bsfs.Predicate)), + (rdflib.URIRef(ns.bse.filesize), rdflib.RDFS.subClassOf, rdflib.URIRef(ns.bsfs.Predicate)), + (rdflib.URIRef(ns.bse.tag), rdflib.RDFS.subClassOf, rdflib.URIRef(ns.bsfs.Predicate)), + (rdflib.URIRef(ns.bse.author), rdflib.RDFS.subClassOf, rdflib.URIRef(ns.bsfs.Predicate)), + } def test_essentials(self): store = SparqlStore.Open() @@ -155,7 +168,7 @@ class TestSparqlStore(unittest.TestCase): store.set(curr.node(ns.bsfs.Entity), ent_ids, p_author, {URI('http://example.com/me')}) # check instances - instances = { + instances = self.schema_triples | { # node instances (rdflib.URIRef('http://example.com/me/entity#1234'), rdflib.RDF.type, rdflib.URIRef(ns.bsfs.Entity)), (rdflib.URIRef('http://example.com/me/entity#4321'), rdflib.RDF.type, rdflib.URIRef(ns.bsfs.Entity)), @@ -228,7 +241,16 @@ class TestSparqlStore(unittest.TestCase): store.schema = curr self.assertEqual(store.schema, curr) # instances have not changed - self.assertSetEqual(set(store._graph), instances) + self.assertSetEqual(set(store._graph), instances | { + # schema hierarchy + (rdflib.URIRef(ns.bsfs.Collection), rdflib.RDFS.subClassOf, rdflib.URIRef(ns.bsfs.Node)), + (rdflib.URIRef(ns.xsd.boolean), rdflib.RDFS.subClassOf, rdflib.URIRef(ns.bsfs.Literal)), + (rdflib.URIRef(ns.bse.shared), rdflib.RDFS.subClassOf, rdflib.URIRef(ns.bsfs.Predicate)), + (rdflib.URIRef(ns.bse.partOf), rdflib.RDFS.subClassOf, rdflib.URIRef(ns.bsfs.Predicate)), + (rdflib.URIRef('http://bsfs.ai/schema/Tag#usedIn'), rdflib.RDFS.subClassOf, rdflib.URIRef(ns.bsfs.Predicate)), + (rdflib.URIRef('http://bsfs.ai/schema/Collection#tag'), rdflib.RDFS.subClassOf, rdflib.URIRef(ns.bsfs.Predicate)), + (rdflib.URIRef('http://bsfs.ai/schema/Tag#principal'), rdflib.RDFS.subClassOf, rdflib.URIRef(ns.bsfs.Predicate)), + }) # add some instances of the new classes p_partOf = curr.predicate(ns.bse.partOf) p_shared = curr.predicate(ns.bse.shared) @@ -248,6 +270,14 @@ class TestSparqlStore(unittest.TestCase): {URI('http://example.com/me/collection#1234')}) # new instances are now in the graph self.assertSetEqual(set(store._graph), instances | { + # same old schema hierarchy + (rdflib.URIRef(ns.bsfs.Collection), rdflib.RDFS.subClassOf, rdflib.URIRef(ns.bsfs.Node)), + (rdflib.URIRef(ns.xsd.boolean), rdflib.RDFS.subClassOf, rdflib.URIRef(ns.bsfs.Literal)), + (rdflib.URIRef(ns.bse.shared), rdflib.RDFS.subClassOf, rdflib.URIRef(ns.bsfs.Predicate)), + (rdflib.URIRef(ns.bse.partOf), rdflib.RDFS.subClassOf, rdflib.URIRef(ns.bsfs.Predicate)), + (rdflib.URIRef('http://bsfs.ai/schema/Tag#usedIn'), rdflib.RDFS.subClassOf, rdflib.URIRef(ns.bsfs.Predicate)), + (rdflib.URIRef('http://bsfs.ai/schema/Collection#tag'), rdflib.RDFS.subClassOf, rdflib.URIRef(ns.bsfs.Predicate)), + (rdflib.URIRef('http://bsfs.ai/schema/Tag#principal'), rdflib.RDFS.subClassOf, rdflib.URIRef(ns.bsfs.Predicate)), # collections (rdflib.URIRef('http://example.com/me/collection#1234'), rdflib.RDF.type, rdflib.URIRef(ns.bsfs.Collection)), (rdflib.URIRef('http://example.com/me/collection#4321'), rdflib.RDF.type, rdflib.URIRef(ns.bsfs.Collection)), @@ -316,6 +346,16 @@ class TestSparqlStore(unittest.TestCase): self.assertEqual(store.schema, curr) # instances of old classes were removed self.assertSetEqual(set(store._graph), { + # schema hierarchy + (rdflib.URIRef(ns.bsfs.Entity), rdflib.RDFS.subClassOf, rdflib.URIRef(ns.bsfs.Node)), + (rdflib.URIRef(ns.bsfs.Tag), rdflib.RDFS.subClassOf, rdflib.URIRef(ns.bsfs.Node)), + (rdflib.URIRef(ns.bsfs.User), rdflib.RDFS.subClassOf, rdflib.URIRef(ns.bsfs.Node)), + (rdflib.URIRef(ns.xsd.boolean), rdflib.RDFS.subClassOf, rdflib.URIRef(ns.bsfs.Literal)), + (rdflib.URIRef(ns.xsd.integer), rdflib.RDFS.subClassOf, rdflib.URIRef(ns.bsfs.Literal)), + (rdflib.URIRef(ns.bse.shared), rdflib.RDFS.subClassOf, rdflib.URIRef(ns.bsfs.Predicate)), + (rdflib.URIRef(ns.bse.tag), rdflib.RDFS.subClassOf, rdflib.URIRef(ns.bsfs.Predicate)), + (rdflib.URIRef(ns.bse.filesize), rdflib.RDFS.subClassOf, rdflib.URIRef(ns.bsfs.Predicate)), + (rdflib.URIRef('http://bsfs.ai/schema/Tag#principal'), rdflib.RDFS.subClassOf, rdflib.URIRef(ns.bsfs.Predicate)), # node instances (rdflib.URIRef('http://example.com/me/entity#1234'), rdflib.RDF.type, rdflib.URIRef(ns.bsfs.Entity)), (rdflib.URIRef('http://example.com/me/entity#4321'), rdflib.RDF.type, rdflib.URIRef(ns.bsfs.Entity)), @@ -390,7 +430,7 @@ class TestSparqlStore(unittest.TestCase): ent_ids = {URI('http://example.com/me/entity#1234'), URI('http://example.com/me/entity#4321')} tag_ids = {URI('http://example.com/me/tag#1234'), URI('http://example.com/me/tag#4321')} # target instances - instances = { + instances = self.schema_triples | { # node instances (rdflib.URIRef('http://example.com/me/entity#1234'), rdflib.RDF.type, rdflib.URIRef(ns.bsfs.Entity)), (rdflib.URIRef('http://example.com/me/entity#4321'), rdflib.RDF.type, rdflib.URIRef(ns.bsfs.Entity)), @@ -416,7 +456,7 @@ class TestSparqlStore(unittest.TestCase): # rollback undoes previous changes store.rollback() - self.assertSetEqual(set(store._graph), set()) + self.assertSetEqual(set(store._graph), self.schema_triples) # add some data once more store.create(ent_type, ent_ids) @@ -455,6 +495,39 @@ class TestSparqlStore(unittest.TestCase): (rdflib.URIRef('http://example.com/me/entity#4321'), rdflib.URIRef(p_filesize.uri), rdflib.Literal(1234, datatype=rdflib.XSD.integer)), }) + def test_get(self): + # store setup + store = SparqlStore.Open() + store.schema = self.schema + ent_type = self.schema.node(ns.bsfs.Entity) + tag_type = self.schema.node(ns.bsfs.Tag) + ent_ids = {URI('http://example.com/me/entity#1234'), URI('http://example.com/me/entity#4321')} + tag_ids = {URI('http://example.com/me/tag#1234'), URI('http://example.com/me/tag#4321')} + store.create(ent_type, ent_ids) + store.create(tag_type, tag_ids) + store.set(ent_type, ent_ids, self.schema.predicate(ns.bse.tag), tag_ids) + store.set(ent_type, {URI('http://example.com/me/entity#1234')}, self.schema.predicate(ns.bse.filesize), {1234}) + store.set(ent_type, {URI('http://example.com/me/entity#4321')}, self.schema.predicate(ns.bse.filesize), {4321}) + # node_type must be in the schema + self.assertRaises(errors.ConsistencyError, set, store.get(self.schema.node(ns.bsfs.Node).get_child(ns.bsfs.Invalid), ast.filter.IsIn(ent_ids))) + # query must be a filter expression + class Foo(): pass + self.assertRaises(TypeError, set, store.get(ent_type, 1234)) + self.assertRaises(TypeError, set, store.get(ent_type, '1234')) + self.assertRaises(TypeError, set, store.get(ent_type, Foo())) + # run some queries + self.assertSetEqual(set(store.get(tag_type, ast.filter.IsIn(tag_ids))), tag_ids) + self.assertSetEqual(set(store.get(ent_type, ast.filter.Any(ns.bse.tag, ast.filter.IsIn(tag_ids)))), ent_ids) + self.assertSetEqual(set(store.get(ent_type, ast.filter.IsIn(tag_ids))), set()) + # invalid queries raise error + self.assertRaises(errors.ConsistencyError, set, store.get(tag_type, ast.filter.Any(ns.bse.filesize, ast.filter.Equals(1234)))) + self.assertRaises(errors.BackendError, set, store.get(ent_type, ast.filter.Equals('http://example.com/me/entity#1234'))) + # run some more complex query + q = store.get(tag_type, ast.filter.Any(ast.filter.Predicate(ns.bse.tag, reverse=True), + ast.filter.Any(ns.bse.filesize, + ast.filter.LessThan(2000)))) + self.assertSetEqual(set(q), tag_ids) + def test_exists(self): # store setup @@ -507,14 +580,15 @@ class TestSparqlStore(unittest.TestCase): # can create some nodes ent_type = store.schema.node(ns.bsfs.Entity) store.create(ent_type, {URI('http://example.com/me/entity#1234'), URI('http://example.com/me/entity#4321')}) - self.assertSetEqual(set(store._graph), { + self.assertSetEqual(set(store._graph), self.schema_triples | { + # instances (rdflib.URIRef('http://example.com/me/entity#1234'), rdflib.RDF.type, rdflib.URIRef(ns.bsfs.Entity)), (rdflib.URIRef('http://example.com/me/entity#4321'), rdflib.RDF.type, rdflib.URIRef(ns.bsfs.Entity)), }) # existing nodes are skipped store.create(ent_type, {URI('http://example.com/me/entity#1234'), URI('http://example.com/me/entity#5678')}) - self.assertSetEqual(set(store._graph), { + self.assertSetEqual(set(store._graph), self.schema_triples | { # previous triples (rdflib.URIRef('http://example.com/me/entity#1234'), rdflib.RDF.type, rdflib.URIRef(ns.bsfs.Entity)), (rdflib.URIRef('http://example.com/me/entity#4321'), rdflib.RDF.type, rdflib.URIRef(ns.bsfs.Entity)), @@ -525,7 +599,7 @@ class TestSparqlStore(unittest.TestCase): # can create nodes of a different type tag_type = store.schema.node(ns.bsfs.Tag) store.create(tag_type, {URI('http://example.com/me/tag#1234'), URI('http://example.com/me/tag#4321')}) - self.assertSetEqual(set(store._graph), { + self.assertSetEqual(set(store._graph), self.schema_triples | { # previous triples (rdflib.URIRef('http://example.com/me/entity#1234'), rdflib.RDF.type, rdflib.URIRef(ns.bsfs.Entity)), (rdflib.URIRef('http://example.com/me/entity#4321'), rdflib.RDF.type, rdflib.URIRef(ns.bsfs.Entity)), @@ -538,7 +612,7 @@ class TestSparqlStore(unittest.TestCase): # creation does not change types of existing nodes tag_type = store.schema.node(ns.bsfs.Tag) store.create(tag_type, {URI('http://example.com/me/entity#1234'), URI('http://example.com/me/entity#4321')}) - self.assertSetEqual(set(store._graph), { + self.assertSetEqual(set(store._graph), self.schema_triples | { # previous triples (rdflib.URIRef('http://example.com/me/entity#1234'), rdflib.RDF.type, rdflib.URIRef(ns.bsfs.Entity)), (rdflib.URIRef('http://example.com/me/entity#4321'), rdflib.RDF.type, rdflib.URIRef(ns.bsfs.Entity)), diff --git a/test/triple_store/test_base.py b/test/triple_store/test_base.py index a4b0559..a0c3260 100644 --- a/test/triple_store/test_base.py +++ b/test/triple_store/test_base.py @@ -35,6 +35,9 @@ class DummyBase(TripleStoreBase): def schema(self, schema): pass + def get(self, node_type, query): + pass + def exists(self, node_type, guids): pass diff --git a/test/utils/test_commons.py b/test/utils/test_commons.py index ce73788..3ad6dea 100644 --- a/test/utils/test_commons.py +++ b/test/utils/test_commons.py @@ -8,7 +8,7 @@ Author: Matthias Baumgartner, 2022 import unittest # objects to test -from bsfs.utils.commons import typename +from bsfs.utils.commons import typename, normalize_args ## code ## @@ -21,6 +21,21 @@ class TestCommons(unittest.TestCase): self.assertEqual(typename(123), 'int') self.assertEqual(typename(None), 'NoneType') + def test_normalize_args(self): + # one argument + self.assertEqual(normalize_args(1), (1, )) + # pass as arguments + self.assertEqual(normalize_args(1,2,3), (1,2,3)) + # pass as iterator + self.assertEqual(normalize_args(iter([1,2,3])), (1,2,3)) + # pass as generator + self.assertEqual(normalize_args((i for i in range(1, 4))), (1,2,3)) + self.assertEqual(normalize_args(i for i in range(1, 4)), (1,2,3)) # w/o brackets + # pass as iterable + self.assertEqual(normalize_args([1,2,3]), (1,2,3)) + # pass an iterable with a single item + self.assertEqual(normalize_args([1]), (1, )) + ## main ## |