diff options
author | Matthias Baumgartner <dev@igsor.net> | 2023-02-08 21:17:57 +0100 |
---|---|---|
committer | Matthias Baumgartner <dev@igsor.net> | 2023-02-08 21:17:57 +0100 |
commit | 9b490d19dcebc0fc24cb2ab89a783f1f7d6147f7 (patch) | |
tree | 5fc3d3b8864a8ff996e5739ed9654dae494d9d8f | |
parent | e12cd52ad267563c8046a593ad551b1dd089a702 (diff) | |
parent | c0218a8dffcdc3a7a5568f66bb959139fe514ad5 (diff) | |
download | bsfs-9b490d19dcebc0fc24cb2ab89a783f1f7d6147f7.tar.gz bsfs-9b490d19dcebc0fc24cb2ab89a783f1f7d6147f7.tar.bz2 bsfs-9b490d19dcebc0fc24cb2ab89a783f1f7d6147f7.zip |
Merge branch 'mb/fetch' into develop
37 files changed, 4745 insertions, 169 deletions
@@ -76,10 +76,10 @@ max-attributes=7 max-bool-expr=5 # Maximum number of branch for function / method body. -max-branches=15 +max-branches=20 # Maximum number of locals for function / method body. -max-locals=15 +max-locals=20 # Maximum number of parents for a class (see R0901). max-parents=7 @@ -91,7 +91,7 @@ max-public-methods=20 max-returns=15 # Maximum number of statements in function / method body. -max-statements=50 +max-statements=100 # Minimum number of public methods for a class (see R0903). min-public-methods=1 diff --git a/bsfs/graph/ac/base.py b/bsfs/graph/ac/base.py index 0703e2e..79b09e5 100644 --- a/bsfs/graph/ac/base.py +++ b/bsfs/graph/ac/base.py @@ -72,4 +72,8 @@ class AccessControlBase(abc.ABC): def filter_read(self, node_type: schema.Node, query: ast.filter.FilterExpression) -> ast.filter.FilterExpression: """Re-write a filter *query* to get (i.e., read) *node_type* nodes.""" + @abc.abstractmethod + def fetch_read(self, node_type: schema.Node, query: ast.fetch.FetchExpression) -> ast.fetch.FetchExpression: + """Re-write a fetch *query* to get (i.e, read) values for *node_type* nodes.""" + ## EOF ## diff --git a/bsfs/graph/ac/null.py b/bsfs/graph/ac/null.py index 12b4e87..6a923a5 100644 --- a/bsfs/graph/ac/null.py +++ b/bsfs/graph/ac/null.py @@ -54,4 +54,8 @@ class NullAC(base.AccessControlBase): """Re-write a filter *query* to get (i.e., read) *node_type* nodes.""" return query + def fetch_read(self, node_type: schema.Node, query: ast.fetch.FetchExpression) -> ast.fetch.FetchExpression: + """Re-write a fetch *query* to get (i.e, read) values for *node_type* nodes.""" + return query + ## EOF ## diff --git a/bsfs/graph/graph.py b/bsfs/graph/graph.py index 2210755..df2e3a5 100644 --- a/bsfs/graph/graph.py +++ b/bsfs/graph/graph.py @@ -133,4 +133,12 @@ class Graph(): # return Nodes instance return _nodes.Nodes(self._backend, self._user, type_, guids) + def all(self, node_type: URI) -> _nodes.Nodes: + """Return all instances of type *node_type*.""" + # get node type + type_ = self.schema.node(node_type) + guids = self._backend.get(type_, None) # no need to materialize + return _nodes.Nodes(self._backend, self._user, type_, guids) + + ## EOF ## diff --git a/bsfs/graph/nodes.py b/bsfs/graph/nodes.py index 5a93f77..bc71a32 100644 --- a/bsfs/graph/nodes.py +++ b/bsfs/graph/nodes.py @@ -5,17 +5,21 @@ A copy of the license is provided with the project. Author: Matthias Baumgartner, 2022 """ # imports +from collections import abc import time import typing # bsfs imports -from bsfs import schema as _schema +from bsfs import schema as bsc from bsfs.namespace import ns +from bsfs.query import ast, validate from bsfs.triple_store import TripleStoreBase from bsfs.utils import errors, URI, typename # inner-module imports from . import ac +from . import result +from . import walk # exports __all__: typing.Sequence[str] = ( @@ -37,7 +41,7 @@ class Nodes(): _user: URI # node type. - _node_type: _schema.Node + _node_type: bsc.Node # guids of nodes. Can be empty. _guids: typing.Set[URI] @@ -46,13 +50,16 @@ class Nodes(): self, backend: TripleStoreBase, user: URI, - node_type: _schema.Node, + node_type: bsc.Node, guids: typing.Iterable[URI], ): + # set main members self._backend = backend self._user = user self._node_type = node_type self._guids = set(guids) + # create helper instances + # FIXME: Assumes that the schema does not change while the instance is in use! self._ac = ac.NullAC(self._backend, self._user) def __eq__(self, other: typing.Any) -> bool: @@ -72,7 +79,7 @@ class Nodes(): return f'{typename(self)}({self._node_type}, {self._guids})' @property - def node_type(self) -> _schema.Node: + def node_type(self) -> bsc.Node: """Return the node's type.""" return self._node_type @@ -81,9 +88,72 @@ class Nodes(): """Return all node guids.""" return iter(self._guids) + @property + def schema(self) -> bsc.Schema: + """Return the store's local schema.""" + return self._backend.schema + + def __add__(self, other: typing.Any) -> 'Nodes': + """Concatenate guids. Backend, user, and node type must match.""" + if not isinstance(other, type(self)): + return NotImplemented + if self._backend != other._backend: + raise ValueError(other) + if self._user != other._user: + raise ValueError(other) + if self.node_type != other.node_type: + raise ValueError(other) + return Nodes(self._backend, self._user, self.node_type, self._guids | other._guids) + + def __or__(self, other: typing.Any) -> 'Nodes': + """Concatenate guids. Backend, user, and node type must match.""" + return self.__add__(other) + + def __sub__(self, other: typing.Any) -> 'Nodes': + """Subtract guids. Backend, user, and node type must match.""" + if not isinstance(other, type(self)): + return NotImplemented + if self._backend != other._backend: + raise ValueError(other) + if self._user != other._user: + raise ValueError(other) + if self.node_type != other.node_type: + raise ValueError(other) + return Nodes(self._backend, self._user, self.node_type, self._guids - other._guids) + + def __and__(self, other: typing.Any) -> 'Nodes': + """Intersect guids. Backend, user, and node type must match.""" + if not isinstance(other, type(self)): + return NotImplemented + if self._backend != other._backend: + raise ValueError(other) + if self._user != other._user: + raise ValueError(other) + if self.node_type != other.node_type: + raise ValueError(other) + return Nodes(self._backend, self._user, self.node_type, self._guids & other._guids) + + def __len__(self) -> int: + """Return the number of guids.""" + return len(self._guids) + + def __iter__(self) -> typing.Iterator['Nodes']: + """Iterate over individual guids. Returns `Nodes` instances.""" + return iter( + Nodes(self._backend, self._user, self.node_type, {guid}) + for guid in self._guids + ) + + def __getattr__(self, name: str): + try: + return super().__getattr__(name) # type: ignore [misc] # parent has no getattr + except AttributeError: + pass + return walk.Walk(self, walk.Walk.step(self.schema, self.node_type, name)) + def set( self, - pred: URI, # FIXME: URI or _schema.Predicate? + pred: URI, # FIXME: URI or bsc.Predicate? value: typing.Any, ) -> 'Nodes': """Set predicate *pred* to *value*.""" @@ -91,7 +161,7 @@ class Nodes(): def set_from_iterable( self, - predicate_values: typing.Iterable[typing.Tuple[URI, typing.Any]], # FIXME: URI or _schema.Predicate? + predicate_values: typing.Iterable[typing.Tuple[URI, typing.Any]], # FIXME: URI or bsc.Predicate? ) -> 'Nodes': """Set mutliple predicate-value pairs at once.""" # TODO: Could group predicate_values by predicate to gain some efficiency @@ -120,6 +190,120 @@ class Nodes(): return self + def get( + self, + *paths: typing.Union[URI, typing.Iterable[URI]], + view: typing.Union[typing.Type[list], typing.Type[dict]] = dict, + **view_kwargs, + ) -> typing.Any: + """Get values or nodes at *paths*. + Return an iterator (view=list) or a dict (view=dict) over the results. + """ + # FIXME: user-provided Fetch query AST? + # check args + if len(paths) == 0: + raise AttributeError('expected at least one path, found none') + if view not in (dict, list): + raise ValueError(f'expected dict or list, found {view}') + # process paths: create fetch ast, build name mapping, and find unique paths + schema = self.schema + statements = set() + name2path = {} + unique_paths = set() # paths that result in a single (unique) value + normpath: typing.Tuple[URI, ...] + for idx, path in enumerate(paths): + # normalize path + if isinstance(path, str): + normpath = (URI(path), ) + elif isinstance(path, abc.Iterable): + if not all(isinstance(step, str) for step in path): + raise TypeError(path) + normpath = tuple(URI(step) for step in path) + else: + raise TypeError(path) + # check path's schema consistency + if not all(schema.has_predicate(pred) for pred in normpath): + raise errors.ConsistencyError(f'path is not fully covered by the schema: {path}') + # check path's uniqueness + if all(schema.predicate(pred).unique for pred in normpath): + unique_paths.add(path) + # fetch tail predicate + tail = schema.predicate(normpath[-1]) + # determine tail ast node type + factory = ast.fetch.Node if isinstance(tail.range, bsc.Node) else ast.fetch.Value + # assign name + name = f'fetch{idx}' + name2path[name] = (path, tail) + # create tail ast node + curr: ast.fetch.FetchExpression = factory(tail.uri, name) + # walk towards front + hop: URI + for hop in normpath[-2::-1]: + curr = ast.fetch.Fetch(hop, curr) + # add to fetch query + statements.add(curr) + # aggregate fetch statements + if len(statements) == 1: + fetch = next(iter(statements)) + else: + fetch = ast.fetch.All(*statements) + # add access controls to fetch + fetch = self._ac.fetch_read(self.node_type, fetch) + + # compose filter ast + filter = ast.filter.IsIn(self.guids) # pylint: disable=redefined-builtin + # add access controls to filter + filter = self._ac.filter_read(self.node_type, filter) + + # validate queries + validate.Filter(self._backend.schema)(self.node_type, filter) + validate.Fetch(self._backend.schema)(self.node_type, fetch) + + # process results, convert if need be + def triple_iter(): + # query the backend + triples = self._backend.fetch(self.node_type, filter, fetch) + # process triples + for root, name, raw in triples: + # get node + node = Nodes(self._backend, self._user, self.node_type, {root}) + # get path + path, tail = name2path[name] + # covert raw to value + if isinstance(tail.range, bsc.Node): + value = Nodes(self._backend, self._user, tail.range, {raw}) + else: + value = raw + # emit triple + yield node, path, value + + # simplify by default + view_kwargs['node'] = view_kwargs.get('node', len(self._guids) != 1) + view_kwargs['path'] = view_kwargs.get('path', len(paths) != 1) + view_kwargs['value'] = view_kwargs.get('value', False) + + # return results view + if view == list: + return result.to_list_view( + triple_iter(), + # aggregation args + **view_kwargs, + ) + + if view == dict: + return result.to_dict_view( + triple_iter(), + # context + len(self._guids) == 1, + len(paths) == 1, + unique_paths, + # aggregation args + **view_kwargs, + ) + + raise errors.UnreachableError() # view was already checked + + def __set(self, predicate: URI, value: typing.Any): """ """ @@ -145,7 +329,7 @@ class Nodes(): guids = set(self._ensure_nodes(node_type, guids)) # check value - if isinstance(pred.range, _schema.Literal): + if isinstance(pred.range, bsc.Literal): # check write permissions on existing nodes # As long as the user has write permissions, we don't restrict # the creation or modification of literal values. @@ -160,8 +344,9 @@ class Nodes(): [value], ) - elif isinstance(pred.range, _schema.Node): + elif isinstance(pred.range, bsc.Node): # check value type + # FIXME: value could be a set of Nodes if not isinstance(value, Nodes): raise TypeError(value) # value's node_type must be a subclass of the predicate's range @@ -192,7 +377,7 @@ class Nodes(): else: raise errors.UnreachableError() - def _ensure_nodes(self, node_type: _schema.Node, guids: typing.Iterable[URI]): + def _ensure_nodes(self, node_type: bsc.Node, guids: typing.Iterable[URI]): """ """ # check node existence diff --git a/bsfs/graph/resolve.py b/bsfs/graph/resolve.py index 00b778b..4677401 100644 --- a/bsfs/graph/resolve.py +++ b/bsfs/graph/resolve.py @@ -41,6 +41,7 @@ class Filter(): self.schema = schema def __call__(self, root_type: bsc.Node, node: ast.filter.FilterExpression): + # FIXME: node can be None! return self._parse_filter_expression(root_type, node) def _parse_filter_expression( diff --git a/bsfs/graph/result.py b/bsfs/graph/result.py new file mode 100644 index 0000000..31822f1 --- /dev/null +++ b/bsfs/graph/result.py @@ -0,0 +1,124 @@ +""" + +Part of the BlackStar filesystem (bsfs) module. +A copy of the license is provided with the project. +Author: Matthias Baumgartner, 2022 +""" +# imports +from collections import defaultdict +import typing + +# bsfs imports +from bsfs.utils import URI + +# exports +__all__: typing.Sequence[str] = ( + 'to_list_view', + 'to_dict_view', + ) + + +## code ## + +# FIXME: node, path, value seem counter-intuitive: +# node.get(..., node=True) removes the node part. +# wouldn't it make more sense if node=True keeps the node part +# and node=False drops it? + +def to_list_view( + triples, + # aggregators + node: bool, + path: bool, + value: bool, # pylint: disable=unused-argument + ): + """Return an iterator over results. + + Dependent on the *node*, *path*, and *value* flags, + the respective component is omitted. + + """ + if not node and not path: + return iter(val for _, _, val in triples) + if not node: + return iter((pred, val) for _, pred, val in triples) + if not path: + return iter((subj, val) for subj, _, val in triples) + return iter((subj, pred, val) for subj, pred, val in triples) + + +def to_dict_view( + triples, + # context + one_node: bool, + one_path: bool, + unique_paths: typing.Set[typing.Union[URI, typing.Iterable[URI]]], + # aggregators + node: bool, + path: bool, + value: bool, + default: typing.Optional[typing.Any] = None, + ) -> typing.Any: + """Return a dict of results. + + Note that triples are materialized to create this view. + + The returned structure depends on the *node*, *path*, and *value* flags. + If all flags are set to False, returns a dict(node -> dict(path -> set(values))). + Setting a flag to true omits or simplifies the respective component (if possible). + + """ + # NOTE: To create a dict, we need to materialize or make further assumptions + # (e.g., sorted in a specific order). + + data: typing.Any # disable type checks on data since it's very flexibly typed. + + # FIXME: type of data can be overwritten later on (if value) + + if not node and not path: + data = set() + elif node ^ path: + data = defaultdict(set) + else: + data = defaultdict(lambda: defaultdict(set)) + + for subj, pred, val in triples: + unique = pred in unique_paths + if not node and not path: + if not value and unique and one_node and one_path: + return val + data.add(val) + elif not node: + # remove node from result, group by predicate + if not value and unique and one_node: + data[pred] = val + else: + data[pred].add(val) + elif not path: + # remove predicate from result, group by node + if not value and unique and one_path: + data[subj] = val + else: + data[subj].add(val) + else: + if not value and unique: + data[subj][pred] = val + else: + data[subj][pred].add(val) + + # FIXME: Combine multiple Nodes instances into one? + + # convert defaultdict to ordinary dict + # pylint: disable=too-many-boolean-expressions + if not node and not path and not value \ + and len(unique_paths) > 0 and one_node and one_path \ + and len(data) == 0: + return default + # pylint: enable=too-many-boolean-expressions + if not node and not path: + return data + if node ^ path: + return dict(data) + return {key: dict(val) for key, val in data.items()} + +## EOF ## diff --git a/bsfs/graph/walk.py b/bsfs/graph/walk.py new file mode 100644 index 0000000..1b1cfa0 --- /dev/null +++ b/bsfs/graph/walk.py @@ -0,0 +1,120 @@ +""" + +Part of the BlackStar filesystem (bsfs) module. +A copy of the license is provided with the project. +Author: Matthias Baumgartner, 2022 +""" +# imports +from collections import abc +import typing + +# bsfs imports +from bsfs import schema as bsc + +# inner-module imports +# NOTE: circular import! OK as long as only used for type annotations. +from . import nodes # pylint: disable=cyclic-import + +# exports +__all__: typing.Sequence[str] = ( + 'Walk', + ) + + +## code ## + +class Walk(abc.Hashable, abc.Callable): # type: ignore [misc] # invalid base class (Callable) + """Syntactic sugar for `Nodes` to build and act on predicate paths via members.""" + + # Link to Nodes instance. + _root: 'nodes.Nodes' + + # Current predicate path. + _path: typing.Tuple[bsc.Predicate, ...] + + def __init__( + self, + root: 'nodes.Nodes', + path: typing.Sequence[bsc.Predicate], + ): + self._root = root + self._path = tuple(path) + + @property + def tail(self): + """Return the node type at the end of the path.""" + return self._path[-1].range + + + ## comparison + + def __hash__(self) -> int: + """Return an integer hash that identifies the instance.""" + return hash((type(self), self._root, self._path)) + + def __eq__(self, other) -> bool: + """Compare against *other* backend.""" + return isinstance(other, type(self)) \ + and self._root == other._root \ + and self._path == other._path + + + ## representation + + def __repr__(self) -> str: + """Return a formal string representation.""" + path = ', '.join(pred.uri for pred in self._path) + return f'Walk({self._root.node_type.uri}, ({path}))' + + def __str__(self) -> str: + """Return an informal string representation.""" + path = ', '.join(pred.uri for pred in self._path) + return f'Walk(@{self._root.node_type.uri}: {path})' + + + ## walk + + @staticmethod + def step( + schema: bsc.Schema, + node: bsc.Node, + name: str, + ) -> typing.Tuple[bsc.Predicate]: + """Get an predicate at *node* whose fragment matches *name*.""" + predicates = tuple( + pred + for pred + in schema.predicates_at(node) + if pred.uri.get('fragment', None) == name + ) + if len(predicates) == 0: # no fragment found for name + raise ValueError(f'no available predicate matches {name}') # FIXME: Custom exception + if len(predicates) > 1: # ambiguous name + raise ValueError(f'{name} matches multiple predicates') # FIXME: Custom exception + # append predicate to walk + return predicates # type: ignore [return-value] # size is one + + def __getattr__(self, name: str) -> 'Walk': + """Alias for `Walk.step(name)`.""" + try: + return super().__getattr__(name) + except AttributeError: + pass + # get predicate + pred = self.step(self._root.schema, self.tail, name) + # append predicate to walk + return Walk(self._root, self._path + pred) + + + ## get paths ## + + def get(self, **kwargs) -> typing.Any: + """Alias for `Nodes.get(..)`.""" + return self._root.get(tuple(pred.uri for pred in self._path), **kwargs) + + def __call__(self, **kwargs) -> typing.Any: # pylint: disable=arguments-differ + """Alias for `Walk.get(...)`.""" + return self.get(**kwargs) + + +## EOF ## diff --git a/bsfs/query/ast/__init__.py b/bsfs/query/ast/__init__.py index 704d051..66b097d 100644 --- a/bsfs/query/ast/__init__.py +++ b/bsfs/query/ast/__init__.py @@ -1,6 +1,6 @@ """Query AST components. -The query AST consists of a Filter syntax tree. +The query AST consists of a Filter and a Fetch syntax trees. Classes beginning with an underscore (_) represent internal type hierarchies and should not be used for parsing. Note that the AST structures do not @@ -14,10 +14,12 @@ Author: Matthias Baumgartner, 2022 import typing # inner-module imports +from . import fetch from . import filter_ as filter # pylint: disable=redefined-builtin # exports __all__: typing.Sequence[str] = ( + 'fetch', 'filter', ) diff --git a/bsfs/query/ast/fetch.py b/bsfs/query/ast/fetch.py new file mode 100644 index 0000000..d653a8a --- /dev/null +++ b/bsfs/query/ast/fetch.py @@ -0,0 +1,174 @@ +""" + +Part of the BlackStar filesystem (bsfs) module. +A copy of the license is provided with the project. +Author: Matthias Baumgartner, 2022 +""" +# imports +from collections import abc +import typing + +# bsfs imports +from bsfs.utils import URI, typename, normalize_args + +# exports +__all__ : typing.Sequence[str] = ( + 'All', + 'Fetch', + 'FetchExpression', + 'Node', + 'This', + 'Value', + ) + + +## code ## + +class FetchExpression(abc.Hashable): + """Generic Fetch expression.""" + + def __repr__(self) -> str: + """Return the expressions's string representation.""" + return f'{typename(self)}()' + + def __hash__(self) -> int: + """Return the expression's integer representation.""" + return hash(type(self)) + + def __eq__(self, other: typing.Any) -> bool: + """Return True if *self* and *other* are equivalent.""" + return isinstance(other, type(self)) + + +class All(FetchExpression): + """Fetch all child expressions.""" + + # child expressions. + expr: typing.Set[FetchExpression] + + def __init__(self, *expr): + # unpack child expressions + unfolded = set(normalize_args(*expr)) + # check child expressions + if len(unfolded) == 0: + raise AttributeError('expected at least one expression, found none') + if not all(isinstance(itm, FetchExpression) for itm in unfolded): + raise TypeError(expr) + # initialize + super().__init__() + # assign members + self.expr = unfolded + + def __iter__(self) -> typing.Iterator[FetchExpression]: + return iter(self.expr) + + def __len__(self) -> int: + return len(self.expr) + + def __repr__(self) -> str: + return f'{typename(self)}({self.expr})' + + def __hash__(self) -> int: + return hash((super().__hash__(), tuple(sorted(self.expr, key=repr)))) + + def __eq__(self, other: typing.Any) -> bool: + return super().__eq__(other) and self.expr == other.expr + + +class _Branch(FetchExpression): + """Branch along a predicate.""" + + # FIXME: Use a Predicate (like in ast.filter) so that we can also reverse them! + + # predicate to follow. + predicate: URI + + def __init__(self, predicate: URI): + if not isinstance(predicate, URI): + raise TypeError(predicate) + self.predicate = predicate + + def __repr__(self) -> str: + return f'{typename(self)}({self.predicate})' + + def __hash__(self) -> int: + return hash((super().__hash__(), self.predicate)) + + def __eq__(self, other: typing.Any) -> bool: + return super().__eq__(other) and self.predicate == other.predicate + + +class Fetch(_Branch): + """Follow a predicate before evaluating a child epxression.""" + + # child expression. + expr: FetchExpression + + def __init__(self, predicate: URI, expr: FetchExpression): + # check child expressions + if not isinstance(expr, FetchExpression): + raise TypeError(expr) + # initialize + super().__init__(predicate) + # assign members + self.expr = expr + + def __repr__(self) -> str: + return f'{typename(self)}({self.predicate}, {self.expr})' + + def __hash__(self) -> int: + return hash((super().__hash__(), self.expr)) + + def __eq__(self, other: typing.Any) -> bool: + return super().__eq__(other) and self.expr == other.expr + + +class _Named(_Branch): + """Fetch a (named) symbol at a predicate.""" + + # symbol name. + name: str + + def __init__(self, predicate: URI, name: str): + super().__init__(predicate) + self.name = str(name) + + def __repr__(self) -> str: + return f'{typename(self)}({self.predicate}, {self.name})' + + def __hash__(self) -> int: + return hash((super().__hash__(), self.name)) + + def __eq__(self, other: typing.Any) -> bool: + return super().__eq__(other) and self.name == other.name + + +class Node(_Named): # pylint: disable=too-few-public-methods + """Fetch a Node at a predicate.""" + # FIXME: Is this actually needed? + + +class Value(_Named): # pylint: disable=too-few-public-methods + """Fetch a Literal at a predicate.""" + + +class This(FetchExpression): + """Fetch the current Node.""" + + # symbol name. + name: str + + def __init__(self, name: str): + super().__init__() + self.name = str(name) + + def __repr__(self) -> str: + return f'{typename(self)}({self.name})' + + def __hash__(self) -> int: + return hash((super().__hash__(), self.name)) + + def __eq__(self, other: typing.Any) -> bool: + return super().__eq__(other) and self.name == other.name + +## EOF ## diff --git a/bsfs/query/ast/filter_.py b/bsfs/query/ast/filter_.py index 2f0270c..b29d89e 100644 --- a/bsfs/query/ast/filter_.py +++ b/bsfs/query/ast/filter_.py @@ -33,9 +33,6 @@ import typing # bsfs imports from bsfs.utils import URI, typename, normalize_args -# inner-module imports -#from . import utils - # exports __all__ : typing.Sequence[str] = ( # base classes @@ -153,6 +150,7 @@ class _Agg(FilterExpression, abc.Collection): # check type if not all(isinstance(e, FilterExpression) for e in unfolded): raise TypeError(expr) + # FIXME: Require at least one child expression? # assign member self.expr = unfolded @@ -172,7 +170,7 @@ class _Agg(FilterExpression, abc.Collection): return f'{typename(self)}({self.expr})' def __hash__(self) -> int: - return hash((super().__hash__(), tuple(self.expr))) # FIXME: Unique hash of different orders over self.expr + return hash((super().__hash__(), tuple(sorted(self.expr, key=repr)))) def __eq__(self, other) -> bool: return super().__eq__(other) and self.expr == other.expr @@ -449,20 +447,72 @@ class OneOf(PredicateExpression, abc.Collection): return f'{typename(self)}({self.expr})' def __hash__(self) -> int: - return hash((super().__hash__(), tuple(self.expr))) # FIXME: Unique hash of different orders over self.expr + return hash((super().__hash__(), tuple(sorted(self.expr, key=repr)))) def __eq__(self, other) -> bool: return super().__eq__(other) and self.expr == other.expr # Helpers +# invalid-name is disabled since they explicitly mimic an expression -def IsIn(*values): # pylint: disable=invalid-name # explicitly mimics an expression +def IsIn(*values) -> FilterExpression: # pylint: disable=invalid-name """Match any of the given URIs.""" - return Or(Is(value) for value in normalize_args(*values)) - -def IsNotIn(*values): # pylint: disable=invalid-name # explicitly mimics an expression + args = normalize_args(*values) + if len(args) == 0: + raise AttributeError('expected at least one value, found none') + if len(args) == 1: + return Is(args[0]) + return Or(Is(value) for value in args) + +def IsNotIn(*values) -> FilterExpression: # pylint: disable=invalid-name """Match none of the given URIs.""" return Not(IsIn(*values)) + +def Between( # pylint: disable=invalid-name + lo: float = float('-inf'), + hi: float = float('inf'), + lo_strict: bool = True, + hi_strict: bool = True, + ) -> FilterExpression : + """Match numerical values between *lo* and *hi*. Include bounds if strict is False.""" + if abs(lo) == hi == float('inf'): + raise ValueError('range cannot be INF on both sides') + if lo > hi: + raise ValueError(f'lower bound ({lo}) cannot be less than upper bound ({hi})') + if lo == hi and not lo_strict and not hi_strict: + return Equals(lo) + if lo == hi: # either bound is strict + raise ValueError('bounds cannot be equal when either is strict') + if lo != float('-inf') and hi != float('inf'): + return And(GreaterThan(lo, lo_strict), LessThan(hi, hi_strict)) + if lo != float('-inf'): + return GreaterThan(lo, lo_strict) + # hi != float('inf'): + return LessThan(hi, hi_strict) + + +def Includes(*values, approx: bool = False) -> FilterExpression: # pylint: disable=invalid-name + """Match any of the given *values*. Uses `Substring` if *approx* is set.""" + args = normalize_args(*values) + cls = Substring if approx else Equals + if len(args) == 0: + raise AttributeError('expected at least one value, found none') + if len(args) == 1: + return cls(args[0]) + return Or(cls(v) for v in args) + + +def Excludes(*values, approx: bool = False) -> FilterExpression: # pylint: disable=invalid-name + """Match none of the given *values*. Uses `Substring` if *approx* is set.""" + args = normalize_args(*values) + cls = Substring if approx else Equals + if len(args) == 0: + raise AttributeError('expected at least one value, found none') + if len(args) == 1: + return Not(cls(args[0])) + return Not(Or(cls(v) for v in args)) + + ## EOF ## diff --git a/bsfs/query/matcher.py b/bsfs/query/matcher.py new file mode 100644 index 0000000..a910756 --- /dev/null +++ b/bsfs/query/matcher.py @@ -0,0 +1,366 @@ +""" + +Part of the BlackStar filesystem (bsfs) module. +A copy of the license is provided with the project. +Author: Matthias Baumgartner, 2022 +""" +# imports +from collections import defaultdict +from itertools import product +from time import time +import random +import threading +import typing + +# external imports +from hopcroftkarp import HopcroftKarp + +# bsfs imports +from bsfs.utils import errors, typename + +# inner-module imports +from . import ast + +# exports +__all__ : typing.Sequence[str] = ( + 'Filter', + ) + + +## code ## + +class Any(ast.filter.FilterExpression, ast.filter.PredicateExpression): + """Match any ast class. + + Note that Any instances are unique, i.e. they do not compare, and + can hence be repeated in a set: + >>> Any() == Any() + False + >>> len({Any(), Any(), Any(), Any()}) + 4 + + """ + + # unique instance id + _uid: typing.Tuple[int, int, float, float] + + def __init__(self): + self._uid = ( + id(self), + id(threading.current_thread()), + time(), + random.random(), + ) + + def __eq__(self, other: typing.Any): + return super().__eq__(other) and self._uid == other._uid + + def __hash__(self): + return hash((super().__hash__(), self._uid)) + + +class Rest(ast.filter.FilterExpression, ast.filter.PredicateExpression): + """Match the leftovers in a set of items to be compared. + + Rest can be used in junction with aggregating expressions such as ast.filter.And, + ast.filter.Or, ast.filter.OneOf. It controls childs expressions that were not yet + consumed by other matching rules. Rest may match to only a specific expression. + The expresssion defaults to Any(). + + For example, the following to ast structures would match since Rest + allows an arbitrary repetition of ast.filter.Equals statements. + + >>> And(Equals('hello'), Equals('world'), Equals('foobar')) + >>> And(Equals('world'), Rest(Partial(Equals))) + + """ + + # child expression for the Rest. + expr: typing.Union[ast.filter.FilterExpression, ast.filter.PredicateExpression] + + def __init__( + self, + expr: typing.Optional[typing.Union[ast.filter.FilterExpression, ast.filter.PredicateExpression]] = None, + ): + if expr is None: + expr = Any() + self.expr = expr + + def __repr__(self) -> str: + return f'{typename(self)}({self.expr})' + + def __hash__(self) -> int: + return hash((super().__hash__(), self.expr)) + + def __eq__(self, other: typing.Any) -> bool: + return super().__eq__(other) and self.expr == other.expr + + +class Partial(ast.filter.FilterExpression, ast.filter.PredicateExpression): + """Match a partially defined ast expression. + + Literal values might be irrelevant or unknown when comparing two ast + structures. Partial allows to constrain the matcher to a certain + ast class, while leaving some of its members unspecified. + + Pass the class (not instance) and its members as keyword arguments + to Partial. Note that the arguments are not validated. + + For example, the following instance matches any ast.filter.Equals, + irrespective of its value: + + >>> Partial(ast.filter.Equals) + + Likewise, the following instance matches any ast.filter.LessThan + that has a strict bounds, but makes no claim about the threshold: + + >>> Partial(ast.filter.LessThan, strict=False) + + """ + + # target node type. + node: typing.Type + + # node construction args. + kwargs: typing.Dict[str, typing.Any] + + def __init__( + self, + node: typing.Type, + **kwargs, + ): + self.node = node + self.kwargs = kwargs + + def __repr__(self) -> str: + return f'{typename(self)}({self.node.__name__}, {self.kwargs})' + + def __hash__(self) -> int: + kwargs = tuple((key, self.kwargs[key]) for key in sorted(self.kwargs)) + return hash((super().__hash__(), self.node, kwargs)) + + def __eq__(self, other: typing.Any) -> bool: + return super().__eq__(other) \ + and self.node == other.node \ + and self.kwargs == other.kwargs + + def match( + self, + name: str, + value: typing.Any, + ) -> bool: + """Return True if *name* is unspecified or matches *value*.""" + return name not in self.kwargs or self.kwargs[name] == value + + +T_ITEM_TYPE = typing.TypeVar('T_ITEM_TYPE') # pylint: disable=invalid-name + +def _set_matcher( + query: typing.Collection[T_ITEM_TYPE], + reference: typing.Collection[T_ITEM_TYPE], + cmp: typing.Callable[[T_ITEM_TYPE, T_ITEM_TYPE], bool], + ) -> bool: + """Compare two sets of child expressions. + + This check has a best-case complexity of O(|N|**2) and worst-case + complexity of O(|N|**3), with N the number of child expressions. + """ + # get reference items + r_items = list(reference) + # deal with Rest + r_rest = {itm for itm in r_items if isinstance(itm, Rest)} + if len(r_rest) > 1: + raise errors.BackendError(f'there must be at most one Rest instance per set, found {len(r_rest)}') + if len(r_rest) == 1: + # replace Rest by filling the reference up with rest's expression + # NOTE: convert r_items to list so that items can be repeated + expr = next(iter(r_rest)).expr # type: ignore [attr-defined] + r_items = [itm for itm in r_items if not isinstance(itm, Rest)] + r_items += [expr for _ in range(len(query) - len(r_items))] # type: ignore [misc] + # sanity check: cannot match if the item sizes differ: + # either a reference item is unmatched (len(r_items) > len(query)) + # or a query item is unmatched (len(r_items) < len(query)) + if len(query) != len(r_items): + return False + + # To have a positive match between the query and the reference, + # each query expr has to match any reference expr. + # However, each reference expr can only be "consumed" once even + # if it matches multiple query exprs (e.g., the Any expression matches + # every query expr). + # This is a bipartide matching problem (Hall's marriage problem) + # and the Hopcroft-Karp-Karzanov algorithm finds a maximum + # matching. While there might be multiple maximum matchings, + # we only need to know whether (at least) one complete matching + # exists. The hopcroftkarp module provides this functionality. + # The HKK algorithm has worst-case complexity of O(|N|**2 * sqrt(|N|)) + # and we also need to compare expressions pairwise, hence O(|N|**2). + num_items = len(r_items) + graph = defaultdict(set) + # build the bipartide graph as {lhs: {rhs}, ...} + # lhs and rhs must be disjoint identifiers. + for (ridx, ref), (nidx, node) in product(enumerate(r_items), enumerate(query)): + # add edges for equal expressions + if cmp(node, ref): + graph[ridx].add(num_items + nidx) + + # maximum_matching returns the matches for all nodes in the graph + # ({ref_itm: node_itm}), hence a complete matching's size is + # the number of reference's child expressions. + return len(HopcroftKarp(graph).maximum_matching(keys_only=True)) == num_items + + +class Filter(): + """Compare a bsfs.query.ast.filter` query's structure to a reference ast. + + The reference ast may include `Rest`, `Partial`, or `Any` to account for irrelevant + or unknown ast pieces. + + This is only a structural comparison, not a semantic one. For example, the + two following queries are semantically identical, but structurally different, + and would therefore not match: + + >>> ast.filter.OneOf(ast.filter.Predicate(ns.bse.filename)) + >>> ast.filter.Predicate(ns.bse.filename) + + """ + + def __call__(self, query: ast.filter.FilterExpression, reference: ast.filter.FilterExpression) -> bool: + """Compare a *query* to a *reference* ast structure. + Return True if both are structurally equivalent. + """ + if not isinstance(query, ast.filter.FilterExpression): + raise errors.BackendError(f'expected filter expression, found {query}') + if not isinstance(reference, ast.filter.FilterExpression): + raise errors.BackendError(f'expected filter expression, found {reference}') + return self._parse_filter_expression(query, reference) + + def _parse_filter_expression( + self, + node: ast.filter.FilterExpression, + reference: ast.filter.FilterExpression, + ) -> bool: + """Route *node* to the handler of the respective FilterExpression subclass.""" + # generic checks: reference type must be Any or match node type + if isinstance(reference, Any): + return True + # node-specific checks + if isinstance(node, ast.filter.Not): + return self._not(node, reference) + if isinstance(node, ast.filter.Has): + return self._has(node, reference) + if isinstance(node, ast.filter.Distance): + return self._distance(node, reference) + if isinstance(node, (ast.filter.Any, ast.filter.All)): + return self._branch(node, reference) + if isinstance(node, (ast.filter.And, ast.filter.Or)): + return self._agg(node, reference) + if isinstance(node, (ast.filter.Is, ast.filter.Equals, ast.filter.Substring, + ast.filter.StartsWith, ast.filter.EndsWith)): + return self._value(node, reference) + if isinstance(node, (ast.filter.LessThan, ast.filter.GreaterThan)): + return self._bounded(node, reference) + # invalid node + raise errors.BackendError(f'expected filter expression, found {node}') + + def _parse_predicate_expression( + self, + node: ast.filter.PredicateExpression, + reference: ast.filter.PredicateExpression, + ) -> bool: + """Route *node* to the handler of the respective PredicateExpression subclass.""" + if isinstance(reference, Any): + return True + if isinstance(node, ast.filter.Predicate): + return self._predicate(node, reference) + if isinstance(node, ast.filter.OneOf): + return self._one_of(node, reference) + # invalid node + raise errors.BackendError(f'expected predicate expression, found {node}') + + def _one_of(self, node: ast.filter.OneOf, reference: ast.filter.PredicateExpression) -> bool: + if not isinstance(reference, type(node)): + return False + return _set_matcher(node, reference, self._parse_predicate_expression) + + def _predicate(self, node: ast.filter.Predicate, reference: ast.filter.PredicateExpression) -> bool: + if not isinstance(reference, (Partial, type(node))): + return False + # partial check + if isinstance(reference, Partial): + if not isinstance(node, reference.node): + return False + return reference.match('predicate', node.predicate) \ + and reference.match('reverse', node.reverse) + # full check + return node.predicate == reference.predicate \ + and node.reverse == reference.reverse + + def _branch(self, + node: typing.Union[ast.filter.Any, ast.filter.All], + reference: ast.filter.FilterExpression, + ) -> bool: + if not isinstance(reference, type(node)): + return False + if not self._parse_predicate_expression(node.predicate, reference.predicate): # type: ignore [attr-defined] + return False + if not self._parse_filter_expression(node.expr, reference.expr): # type: ignore [attr-defined] + return False + return True + + def _agg(self, node: typing.Union[ast.filter.And, ast.filter.Or], reference: ast.filter.FilterExpression) -> bool: + if not isinstance(reference, type(node)): + return False + return _set_matcher(node, reference, self._parse_filter_expression) # type: ignore [arg-type] + + def _not(self, node: ast.filter.Not, reference: ast.filter.FilterExpression) -> bool: + if not isinstance(reference, type(node)): + return False + return self._parse_filter_expression(node.expr, reference.expr) + + def _has(self, node: ast.filter.Has, reference: ast.filter.FilterExpression) -> bool: + if not isinstance(reference, type(node)): + return False + return self._parse_predicate_expression(node.predicate, reference.predicate) \ + and self._parse_filter_expression(node.count, reference.count) + + def _distance(self, node: ast.filter.Distance, reference: ast.filter.FilterExpression) -> bool: + if not isinstance(reference, (Partial, type(node))): + return False + # partial check + if isinstance(reference, Partial): + if not isinstance(node, reference.node): + return False + return reference.match('reference', node.reference) \ + and reference.match('threshold', node.threshold) \ + and reference.match('strict', node.strict) + # full check + return node.reference == reference.reference \ + and node.threshold == reference.threshold \ + and node.strict == reference.strict + + def _value(self, node: ast.filter._Value, reference: ast.filter.FilterExpression) -> bool: + if not isinstance(reference, (Partial, type(node))): + return False + # partial check + if isinstance(reference, Partial): + if not isinstance(node, reference.node): + return False + return reference.match('value', node.value) + # full ckeck + return node.value == reference.value + + def _bounded(self, node: ast.filter._Bounded, reference: ast.filter.FilterExpression) -> bool: + if not isinstance(reference, (Partial, type(node))): + return False + # partial check + if isinstance(reference, Partial): + if not isinstance(node, reference.node): + return False + return reference.match('threshold', node.threshold) \ + and reference.match('strict', node.strict) + # full check + return node.threshold == reference.threshold \ + and node.strict == reference.strict + +## EOF ## diff --git a/bsfs/query/validator.py b/bsfs/query/validator.py index 904ac14..f0aa795 100644 --- a/bsfs/query/validator.py +++ b/bsfs/query/validator.py @@ -20,6 +20,7 @@ __all__ : typing.Sequence[str] = ( 'Filter', ) +# FIXME: Split into a submodule and the two classes into their own respective files. ## code ## @@ -49,7 +50,7 @@ class Filter(): """ # root_type must be a schema.Node if not isinstance(root_type, bsc.Node): - raise TypeError(f'Expected a node, found {typename(root_type)}') + raise TypeError(f'expected a node, found {typename(root_type)}') # root_type must exist in the schema if root_type not in self.schema.nodes(): raise errors.ConsistencyError(f'{root_type} is not defined in the schema') @@ -223,4 +224,125 @@ class Filter(): # FIXME: Check if node.value corresponds to type_ +class Fetch(): + """Validate a `bsfs.query.ast.fetch` query's structure and schema compliance. + + * Value can only be applied on literals + * Node can only be applied on nodes + * Names must be non-empty + * Branching nodes' predicates must match the type + * Symbols must be in the schema + * Predicates must follow the schema + + """ + + # schema to validate against. + schema: bsc.Schema + + def __init__(self, schema: bsc.Schema): + self.schema = schema + + def __call__(self, root_type: bsc.Node, query: ast.fetch.FetchExpression): + """Validate a fetch *query*, assuming the subject having *root_type*. + + Raises a `bsfs.utils.errors.ConsistencyError` if the query violates the schema. + Raises a `bsfs.utils.errors.BackendError` if the query structure is invalid. + + """ + # root_type must be a schema.Node + if not isinstance(root_type, bsc.Node): + raise TypeError(f'expected a node, found {typename(root_type)}') + # root_type must exist in the schema + if root_type not in self.schema.nodes(): + raise errors.ConsistencyError(f'{root_type} is not defined in the schema') + # query must be a FetchExpression + if not isinstance(query, ast.fetch.FetchExpression): + raise TypeError(f'expected a fetch expression, found {typename(query)}') + # check root expression + self._parse_fetch_expression(root_type, query) + # all tests passed + return True + + def _parse_fetch_expression(self, type_: bsc.Vertex, node: ast.fetch.FetchExpression): + """Route *node* to the handler of the respective FetchExpression subclass.""" + if isinstance(node, (ast.fetch.Fetch, ast.fetch.Value, ast.fetch.Node)): + # NOTE: don't return so that checks below are executed + self._branch(type_, node) + if isinstance(node, (ast.fetch.Value, ast.fetch.Node)): + # NOTE: don't return so that checks below are executed + self._named(type_, node) + if isinstance(node, ast.fetch.All): + return self._all(type_, node) + if isinstance(node, ast.fetch.Fetch): + return self._fetch(type_, node) + if isinstance(node, ast.fetch.Value): + return self._value(type_, node) + if isinstance(node, ast.fetch.Node): + return self._node(type_, node) + if isinstance(node, ast.fetch.This): + return self._this(type_, node) + # invalid node + raise errors.BackendError(f'expected fetch expression, found {node}') + + def _all(self, type_: bsc.Vertex, node: ast.fetch.All): + # check child expressions + for expr in node: + self._parse_fetch_expression(type_, expr) + + def _branch(self, type_: bsc.Vertex, node: ast.fetch._Branch): + # type is a node + if not isinstance(type_, bsc.Node): + raise errors.ConsistencyError(f'expected a Node, found {type_}') + # node exists in the schema + if type_ not in self.schema.nodes(): + raise errors.ConsistencyError(f'node {type_} is not in the schema') + # predicate exists in the schema + if not self.schema.has_predicate(node.predicate): + raise errors.ConsistencyError(f'predicate {node.predicate} is not in the schema') + pred = self.schema.predicate(node.predicate) + # type_ must be a subclass of domain + if not type_ <= pred.domain: + raise errors.ConsistencyError( + f'expected type {pred.domain} or subtype thereof, found {type_}') + + def _fetch(self, type_: bsc.Vertex, node: ast.fetch.Fetch): # pylint: disable=unused-argument # type_ was considered in _branch + # range must be a node + rng = self.schema.predicate(node.predicate).range + if not isinstance(rng, bsc.Node): + raise errors.ConsistencyError( + f'expected the predicate\'s range to be a Node, found {rng}') + # child expression must be valid + self._parse_fetch_expression(rng, node.expr) + + def _named(self, type_: bsc.Vertex, node: ast.fetch._Named): # pylint: disable=unused-argument # type_ was considered in _branch + # name must be set + if node.name.strip() == '': + raise errors.BackendError('node name cannot be empty') + # FIXME: check for double name use? + + def _node(self, type_: bsc.Vertex, node: ast.fetch.Node): # pylint: disable=unused-argument # type_ was considered in _branch + # range must be a node + rng = self.schema.predicate(node.predicate).range + if not isinstance(rng, bsc.Node): + raise errors.ConsistencyError( + f'expected the predicate\'s range to be a Node, found {rng}') + + def _value(self, type_: bsc.Vertex, node: ast.fetch.Value): # pylint: disable=unused-argument # type_ was considered in _branch + # range must be a literal + rng = self.schema.predicate(node.predicate).range + if not isinstance(rng, bsc.Literal): + raise errors.ConsistencyError( + f'expected the predicate\'s range to be a Literal, found {rng}') + + def _this(self, type_: bsc.Vertex, node: ast.fetch.This): + # type is a node + if not isinstance(type_, bsc.Node): + raise errors.ConsistencyError(f'expected a Node, found {type_}') + # node exists in the schema + if type_ not in self.schema.nodes(): + raise errors.ConsistencyError(f'node {type_} is not in the schema') + # name must be set + if node.name.strip() == '': + raise errors.BackendError('node name cannot be empty') + ## EOF ## diff --git a/bsfs/schema/schema.py b/bsfs/schema/schema.py index 8d9a821..0de4203 100644 --- a/bsfs/schema/schema.py +++ b/bsfs/schema/schema.py @@ -69,6 +69,7 @@ class Schema(): literals.add(types.ROOT_LITERAL) predicates.add(types.ROOT_PREDICATE) # add minimally necessary types to the schema + literals.add(types.ROOT_BLOB) literals.add(types.ROOT_NUMBER) literals.add(types.ROOT_TIME) literals.add(types.ROOT_ARRAY) @@ -312,4 +313,8 @@ class Schema(): """Return the Literal matching the *uri*.""" return self._literals[uri] + def predicates_at(self, node: types.Node) -> typing.Iterator[types.Predicate]: + """Return predicates that have domain *node* (or superclass thereof).""" + return iter(pred for pred in self._predicates.values() if node <= pred.domain) + ## EOF ## diff --git a/bsfs/schema/types.py b/bsfs/schema/types.py index 3a2e10c..12e7e94 100644 --- a/bsfs/schema/types.py +++ b/bsfs/schema/types.py @@ -380,6 +380,11 @@ ROOT_LITERAL = Literal( parent=None, ) +ROOT_BLOB = Literal( + uri=ns.bsfs.BinaryBlob, + parent=ROOT_LITERAL, + ) + ROOT_NUMBER = Literal( uri=ns.bsfs.Number, parent=ROOT_LITERAL, diff --git a/bsfs/triple_store/base.py b/bsfs/triple_store/base.py index 7e03714..1baa63b 100644 --- a/bsfs/triple_store/base.py +++ b/bsfs/triple_store/base.py @@ -11,7 +11,7 @@ import typing # inner-module imports from bsfs.query import ast from bsfs.utils import URI, typename -import bsfs.schema as _schema +import bsfs.schema as bsc # exports __all__: typing.Sequence[str] = ( @@ -82,12 +82,12 @@ class TripleStoreBase(abc.ABC): @property @abc.abstractmethod - def schema(self) -> _schema.Schema: + def schema(self) -> bsc.Schema: """Return the store's local schema.""" @schema.setter @abc.abstractmethod - def schema(self, schema: _schema.Schema): + def schema(self, schema: bsc.Schema): """Migrate to new schema by adding or removing class definitions. Commits before and after the migration. @@ -112,17 +112,28 @@ class TripleStoreBase(abc.ABC): @abc.abstractmethod def get( self, - node_type: _schema.Node, - query: typing.Optional[ast.filter.FilterExpression] = None, + node_type: bsc.Node, + filter: typing.Optional[ast.filter.FilterExpression] = None, # pylint: disable=redefined-builtin ) -> typing.Iterator[URI]: - """Return guids of nodes of type *node_type* that match the *query*. - Return all guids of the respective type if *query* is None. + """Return guids of nodes of type *node_type* that match the *filter*. + Return all guids of the respective type if *filter* is None. + """ + + @abc.abstractmethod + def fetch( + self, + node_type: bsc.Node, + filter: ast.filter.FilterExpression, # pylint: disable=redefined-builtin + fetch: ast.fetch.FetchExpression, + ) -> typing.Iterator[typing.Tuple[URI, str, typing.Any]]: + """Return (guid, name, value) triples where the guid is determined by the *filter* + query and the name matches the *fetch* query. """ @abc.abstractmethod def exists( self, - node_type: _schema.Node, + node_type: bsc.Node, guids: typing.Iterable[URI], ) -> typing.Iterable[URI]: """Return those *guids* that exist and have type *node_type* or a subclass thereof.""" @@ -130,7 +141,7 @@ class TripleStoreBase(abc.ABC): @abc.abstractmethod def create( self, - node_type: _schema.Node, + node_type: bsc.Node, guids: typing.Iterable[URI], ): """Create *guid* nodes with type *subject*.""" @@ -138,9 +149,9 @@ class TripleStoreBase(abc.ABC): @abc.abstractmethod def set( self, - node_type: _schema.Node, # FIXME: is the node_type even needed? Couldn't I infer from the predicate? + node_type: bsc.Node, # FIXME: is the node_type even needed? Couldn't I infer from the predicate? guids: typing.Iterable[URI], - predicate: _schema.Predicate, + predicate: bsc.Predicate, values: typing.Iterable[typing.Any], ): """Add triples to the graph. diff --git a/bsfs/triple_store/sparql/parse_fetch.py b/bsfs/triple_store/sparql/parse_fetch.py new file mode 100644 index 0000000..20d4e74 --- /dev/null +++ b/bsfs/triple_store/sparql/parse_fetch.py @@ -0,0 +1,109 @@ +""" + +Part of the BlackStar filesystem (bsfs) module. +A copy of the license is provided with the project. +Author: Matthias Baumgartner, 2022 +""" +# standard imports +import typing + +# bsfs imports +from bsfs import schema as bsc +from bsfs.query import ast +from bsfs.utils import errors + +# inner-module imports +from .utils import GenHopName, Query + +# exports +__all__: typing.Sequence[str] = ( + 'Fetch', + ) + + +## code ## + +class Fetch(): + """Translate `bsfs.query.ast.fetch` structures into Sparql queries.""" + + def __init__(self, schema): + self.schema = schema + self.ngen = GenHopName(prefix='?fch') + + def __call__( + self, + root_type: bsc.Node, + root: ast.fetch.FetchExpression, + ) -> Query: + """ + """ + # check root_type + if not isinstance(root_type, bsc.Node): + raise errors.BackendError(f'expected Node, found {root_type}') + if root_type not in self.schema.nodes(): + raise errors.ConsistencyError(f'node {root_type} is not in the schema') + # parse root + terms, expr = self._parse_fetch_expression(root_type, root, '?ent') + # assemble query + return Query( + root_type=root_type.uri, + root_head='?ent', + select=terms, + where=expr, + ) + + def _parse_fetch_expression( + self, + node_type: bsc.Vertex, + node: ast.fetch.FetchExpression, + head: str, + ): + """Route *node* to the handler of the respective FetchExpression subclass.""" + if isinstance(node, ast.fetch.All): + return self._all(node_type, node, head) + if isinstance(node, ast.fetch.Fetch): + return self._fetch(node_type, node, head) + if isinstance(node, ast.fetch.Node): + return self._node(node_type, node, head) + if isinstance(node, ast.fetch.Value): + return self._value(node_type, node, head) + if isinstance(node, ast.fetch.This): + return self._this(node_type, node, head) + # invalid node + raise errors.BackendError(f'expected fetch expression, found {node}') + + def _all(self, node_type: bsc.Vertex, node: ast.fetch.All, head: str): + # child expressions + terms, exprs = zip(*[self._parse_fetch_expression(node_type, expr, head) for expr in node]) + terms = {term for sub in terms for term in sub} + exprs = ' .\n'.join({expr for expr in exprs if len(expr.strip()) > 0}) + return terms, exprs + + def _fetch(self, node_type: bsc.Vertex, node: ast.fetch.Fetch, head: str): # pylint: disable=unused-argument # (node_type) + # child expressions + rng = self.schema.predicate(node.predicate).range + nexthead = next(self.ngen) + terms, expr = self._parse_fetch_expression(rng, node.expr, nexthead) + return terms, f'OPTIONAL{{ {head} <{node.predicate}> {nexthead} .\n {expr} }}' + + def _node(self, node_type: bsc.Vertex, node: ast.fetch.Node, head: str): # pylint: disable=unused-argument # (node_type) + if f'?{node.name}'.startswith(self.ngen.prefix): + raise errors.BackendError(f'Node name must start with {self.ngen.prefix}') + # compose and return statement + term = next(self.ngen) + return {(term, node.name)}, f'OPTIONAL{{ {head} <{node.predicate}> {term} }}' + + def _value(self, node_type: bsc.Vertex, node: ast.fetch.Value, head: str): # pylint: disable=unused-argument # (node_type) + if f'?{node.name}'.startswith(self.ngen.prefix): + raise errors.BackendError(f'Value name must start with {self.ngen.prefix}') + # compose and return statement + term = next(self.ngen) + return {(term, node.name)}, f'OPTIONAL{{ {head} <{node.predicate}> {term} }}' + + def _this(self, node_type: bsc.Vertex, node: ast.fetch.This, head: str): # pylint: disable=unused-argument # (node_type) + if f'?{node.name}'.startswith(self.ngen.prefix): + raise errors.BackendError(f'This name must start with {self.ngen.prefix}') + # compose and return statement + return {(head, node.name)}, '' + +## EOF ## diff --git a/bsfs/triple_store/sparql/parse_filter.py b/bsfs/triple_store/sparql/parse_filter.py index 8b6b976..dca0aea 100644 --- a/bsfs/triple_store/sparql/parse_filter.py +++ b/bsfs/triple_store/sparql/parse_filter.py @@ -19,6 +19,7 @@ from bsfs.utils import URI, errors # inner-module imports from .distance import DISTANCE_FU +from .utils import GenHopName, Query # exports __all__: typing.Sequence[str] = ( @@ -28,25 +29,6 @@ __all__: typing.Sequence[str] = ( ## code ## -class _GenHopName(): - """Generator that produces a new unique symbol name with each iteration.""" - - # Symbol name prefix. - prefix: str - - # Current counter. - curr: int - - def __init__(self, prefix: str = '?hop', start: int = 0): - self.prefix = prefix - self.curr = start - 1 - - def __next__(self): - """Generate and return the next unique name.""" - self.curr += 1 - return self.prefix + str(self.curr) - - class Filter(): """Translate `bsfs.query.ast.filter` structures into Sparql queries.""" @@ -54,18 +36,18 @@ class Filter(): schema: bsc.Schema # Generator that produces unique symbol names. - ngen: _GenHopName + ngen: GenHopName def __init__(self, graph, schema): self.graph = graph self.schema = schema - self.ngen = _GenHopName() + self.ngen = GenHopName(prefix='?flt') def __call__( self, root_type: bsc.Node, root: typing.Optional[ast.filter.FilterExpression] = None, - ) -> str: + ) -> Query: """ """ # check root_type @@ -79,15 +61,18 @@ class Filter(): else: cond = self._parse_filter_expression(root_type, root, '?ent') # assemble query - return f''' - SELECT ?ent - WHERE {{ - ?ent <{ns.rdf.type}>/<{ns.rdfs.subClassOf}>* <{root_type.uri}> . - {cond} - }} - ''' + return Query( + root_type=root_type.uri, + root_head='?ent', + where=cond, + ) - def _parse_filter_expression(self, type_: bsc.Vertex, node: ast.filter.FilterExpression, head: str) -> str: + def _parse_filter_expression( + self, + type_: bsc.Vertex, + node: ast.filter.FilterExpression, + head: str, + ) -> str: """Route *node* to the handler of the respective FilterExpression subclass.""" if isinstance(node, ast.filter.Is): return self._is(type_, node, head) diff --git a/bsfs/triple_store/sparql/sparql.py b/bsfs/triple_store/sparql/sparql.py index fedd227..dbf9d45 100644 --- a/bsfs/triple_store/sparql/sparql.py +++ b/bsfs/triple_store/sparql/sparql.py @@ -5,8 +5,11 @@ A copy of the license is provided with the project. Author: Matthias Baumgartner, 2022 """ # imports +import base64 import itertools import typing + +# external imports import rdflib # bsfs imports @@ -16,6 +19,7 @@ from bsfs.query import ast from bsfs.utils import errors, URI # inner-module imports +from . import parse_fetch from . import parse_filter from .. import base from .distance import DISTANCE_FU @@ -29,6 +33,8 @@ __all__: typing.Sequence[str] = ( ## code ## +rdflib.term.bind(ns.bsfs.BinaryBlob, bytes, constructor=base64.b64decode) + class _Transaction(): """Lightweight rdflib transactions for in-memory databases.""" @@ -92,13 +98,16 @@ class SparqlStore(base.TripleStoreBase): # Filter parser _filter_parser: parse_filter.Filter + # Fetch parser + _fetch_parser: parse_fetch.Fetch + def __init__(self): super().__init__(None) self._graph = rdflib.Graph() self._transaction = _Transaction(self._graph) - # NOTE: parsing bsfs.query.ast.filter.Has requires xsd:integer. self._schema = bsc.Schema(literals={bsc.ROOT_NUMBER.child(ns.xsd.integer)}) self._filter_parser = parse_filter.Filter(self._graph, self._schema) + self._fetch_parser = parse_fetch.Fetch(self._schema) # NOTE: mypy and pylint complain about the **kwargs not being listed (contrasting super) # However, not having it here is clearer since it's explicit that there are no arguments. @@ -197,17 +206,53 @@ class SparqlStore(base.TripleStoreBase): # migrate schema self._schema = schema self._filter_parser.schema = schema + self._fetch_parser.schema = schema + + def fetch( + self, + node_type: bsc.Node, + filter: ast.filter.FilterExpression, # pylint: disable=redefined-builtin + fetch: ast.fetch.FetchExpression, + ) -> typing.Iterator[typing.Tuple[URI, str, typing.Any]]: + if node_type not in self.schema.nodes(): + raise errors.ConsistencyError(f'{node_type} is not defined in the schema') + if not isinstance(filter, ast.filter.FilterExpression): + raise TypeError(filter) + if not isinstance(fetch, ast.fetch.FetchExpression): + raise TypeError(fetch) + # compose a query from fetch and filter ast + query = self._filter_parser(node_type, filter) + query += self._fetch_parser(node_type, fetch) + # run query + emitted = set() + for result in query(self._graph): + guid = URI(result[0]) + for name, raw in zip(query.names, result[1:]): + if raw is None: # undefined value + continue + if isinstance(raw, rdflib.Literal): + value = raw.value + else: + value = URI(raw) + # emit triple + triple = (guid, name, value) + if triple not in emitted: # FIXME: needs a better solution! + emitted.add(triple) + yield guid, name, value def get( self, node_type: bsc.Node, - query: typing.Optional[ast.filter.FilterExpression] = None, + filter: typing.Optional[ast.filter.FilterExpression] = None, # pylint: disable=redefined-builtin ) -> typing.Iterator[URI]: if node_type not in self.schema.nodes(): raise errors.ConsistencyError(f'{node_type} is not defined in the schema') - if not isinstance(query, ast.filter.FilterExpression): - raise TypeError(query) - for guid, in self._graph.query(self._filter_parser(node_type, query)): + if filter is not None and not isinstance(filter, ast.filter.FilterExpression): + raise TypeError(filter) + # compose query + query = self._filter_parser(node_type, filter) + # run query + for guid, in query(self._graph): yield URI(guid) def _has_type(self, subject: URI, node_type: bsc.Node) -> bool: @@ -294,7 +339,11 @@ class SparqlStore(base.TripleStoreBase): guid = rdflib.URIRef(guid) # convert value if isinstance(predicate.range, bsc.Literal): - value = rdflib.Literal(value, datatype=rdflib.URIRef(predicate.range.uri)) + dtype = rdflib.URIRef(predicate.range.uri) + if predicate.range <= self.schema.literal(ns.bsfs.BinaryBlob): + dtype = rdflib.URIRef(ns.bsfs.BinaryBlob) + value = base64.b64encode(value) + value = rdflib.Literal(value, datatype=dtype) elif isinstance(predicate.range, bsc.Node): value = rdflib.URIRef(value) else: diff --git a/bsfs/triple_store/sparql/utils.py b/bsfs/triple_store/sparql/utils.py new file mode 100644 index 0000000..deca4d8 --- /dev/null +++ b/bsfs/triple_store/sparql/utils.py @@ -0,0 +1,141 @@ +""" + +Part of the BlackStar filesystem (bsfs) module. +A copy of the license is provided with the project. +Author: Matthias Baumgartner, 2022 +""" +# standard imports +import typing + +# external imports +import rdflib + +# bsfs imports +from bsfs.namespace import ns +from bsfs.utils import typename + +# exports +__all__: typing.Sequence[str] = ( + 'GenHopName', + 'Query', + ) + + +## code ## + +class GenHopName(): + """Generator that produces a new unique symbol name with each iteration.""" + + # Symbol name prefix. + prefix: str + + # Current counter. + curr: int + + def __init__(self, prefix: str = '?hop', start: int = 0): + self.prefix = prefix + self.curr = start - 1 + + def __next__(self): + """Generate and return the next unique name.""" + self.curr += 1 + return self.prefix + str(self.curr) + + +class Query(): + """Hold, manage, and complete partial Sparql queries.""" + + # root node type URI. + root_type: str + + # root node variable name. + root_head: str + + # (head, name) tuples (w/o root) + select: typing.Tuple[typing.Tuple[str, str], ...] + + # where statements. + where: str + + def __init__( + self, + root_type: str, + root_head: str = '?ent', + select: typing.Optional[typing.Iterable[typing.Tuple[str, str]]] = None, + where: typing.Optional[str] = None, + ): + # check arguments + if select is None: + select = [] + if where is None: + where = '' + # set members + self.root_type = root_type + self.root_head = root_head + self.select = tuple(select) # tuple ensures presistent order + self.where = where.strip() + + def __str__(self) -> str: + return self.query + + def __repr__(self) -> str: + return f'{typename(self)}({self.root_type}, {self.root_head}, {self.select}, {self.where})' + + def __eq__(self, other: typing.Any) -> bool: + return isinstance(other, type(self)) \ + and self.root_type == other.root_type \ + and self.root_head == other.root_head \ + and self.select == other.select \ + and self.where == other.where + + def __hash__(self) -> int: + return hash((type(self), self.root_type, self.root_head, self.select, self.where)) + + def __add__(self, other: typing.Any) -> 'Query': + # check other's type + if not isinstance(other, type(self)): + return NotImplemented + # check query compatibility + if not self.root_type == other.root_type: + raise ValueError(other) + if not self.root_head == other.root_head: + raise ValueError(other) + # combine selections + select = self.select + other.select + # combine conditions + conds = [] + if self.where != '': + conds.append(self.where) + if other.where != '': + conds.append(other.where) + where = ' . '.join(conds) + # return new query + return Query( + root_type=self.root_type, + root_head=self.root_head, + select=select, + where=where, + ) + + @property + def names(self) -> typing.Tuple[str, ...]: + """Return a tuple of selected variable names, excluding the root.""" + return tuple(name for _, name in self.select) + + @property + def query(self) -> str: + """Return an executable sparql query.""" + select = ' '.join(f'({head} as ?{name})' for head, name in self.select) + return f''' + SELECT {self.root_head} {select} + WHERE {{ + {self.root_head} <{ns.rdf.type}>/<{ns.rdfs.subClassOf}>* <{self.root_type}> . + {self.where} + }} + ''' + + def __call__(self, graph: rdflib.Graph) -> rdflib.query.Result: + """Execute the query on a *graph* and return the query result.""" + return graph.query(self.query) + +## EOF ## diff --git a/bsfs/utils/uuid.py b/bsfs/utils/uuid.py index ba5cf52..70e1656 100644 --- a/bsfs/utils/uuid.py +++ b/bsfs/utils/uuid.py @@ -7,6 +7,7 @@ Author: Matthias Baumgartner, 2022 # imports from collections import abc import hashlib +import io import json import os import platform @@ -106,6 +107,17 @@ class UCID(): with open(path, 'rb') as ifile: return HASH(ifile.read()).hexdigest() + @staticmethod + def from_buffer(buffer: io.IOBase) -> str: + """Read the content from a buffer.""" + if isinstance(buffer, io.TextIOBase): + return HASH(buffer.read().encode('utf-8', errors='ignore')).hexdigest() + return HASH(buffer.read()).hexdigest() + + @staticmethod + def from_bytes(content: bytes) -> str: + """Get the content from as bytes.""" + return HASH(content).hexdigest() @staticmethod def from_dict(content: dict) -> str: @@ -14,7 +14,10 @@ setup( url='https://www.igsor.net/projects/blackstar/bsfs/', download_url='https://pip.igsor.net', packages=('bsfs', ), - install_requires=('rdflib', ), + install_requires=( + 'rdflib', # schema and sparql storage + 'hopcroftkarp', # ast matching + ), python_requires=">=3.7", ) diff --git a/test/graph/test_graph.py b/test/graph/test_graph.py index f97783b..5db1fd2 100644 --- a/test/graph/test_graph.py +++ b/test/graph/test_graph.py @@ -95,6 +95,19 @@ class TestGraph(unittest.TestCase): # node_type must be in the schema self.assertRaises(KeyError, graph.nodes, ns.bsfs.Invalid, guids) + def test_all(self): + graph = Graph(self.backend, self.user) + # resulting nodes can be empty + self.assertEqual(graph.all(ns.bsfs.Entity), + Nodes(self.backend, self.user, graph.schema.node(ns.bsfs.Entity), set())) + # resulting nodes contains all nodes of the respective type + guids = {URI('http://example.com/me/entity#1234'), URI('http://example.com/me/entity#4321')} + self.backend.create(graph.schema.node(ns.bsfs.Entity), guids) + self.assertEqual(graph.all(ns.bsfs.Entity), + Nodes(self.backend, self.user, graph.schema.node(ns.bsfs.Entity), guids)) + # node_type must be in the schema + self.assertRaises(KeyError, graph.all, ns.bsfs.Invalid) + def test_migrate(self): # setup graph = Graph(self.backend, self.user) diff --git a/test/graph/test_nodes.py b/test/graph/test_nodes.py index 2870f35..6bb3ef3 100644 --- a/test/graph/test_nodes.py +++ b/test/graph/test_nodes.py @@ -4,13 +4,18 @@ Part of the bsfs test suite. A copy of the license is provided with the project. Author: Matthias Baumgartner, 2022 """ -# imports -import rdflib +# standard imports +from functools import partial +import operator import unittest +# external imports +import rdflib + # bsie imports -from bsfs import schema as _schema -from bsfs.namespace import ns +from bsfs import schema as bsc +from bsfs.graph.walk import Walk +from bsfs.namespace import Namespace, ns from bsfs.triple_store.sparql import SparqlStore from bsfs.utils import errors, URI @@ -20,11 +25,13 @@ from bsfs.graph.nodes import Nodes ## code ## +bst = Namespace('http://bsfs.ai/schema/Tag') + class TestNodes(unittest.TestCase): def setUp(self): # initialize backend self.backend = SparqlStore() - self.backend.schema = _schema.from_string(''' + self.backend.schema = bsc.from_string(''' prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#> prefix xsd: <http://www.w3.org/2001/XMLSchema#> @@ -67,6 +74,11 @@ class TestNodes(unittest.TestCase): rdfs:range bsfs:User ; bsfs:unique "true"^^xsd:boolean . + bst:label rdfs:subClassOf bsfs:Predicate ; + rdfs:domain bsfs:Tag ; + rdfs:range xsd:string ; + bsfs:unique "true"^^xsd:boolean . + bst:representative rdfs:subClassOf bsfs:Predicate ; rdfs:domain bsfs:Tag ; rdfs:range bsfs:Entity ; @@ -80,6 +92,7 @@ class TestNodes(unittest.TestCase): (rdflib.URIRef(ns.bsfs.User), rdflib.RDFS.subClassOf, rdflib.URIRef(ns.bsfs.Node)), (rdflib.URIRef(ns.xsd.string), rdflib.RDFS.subClassOf, rdflib.URIRef(ns.bsfs.Literal)), (rdflib.URIRef(ns.bsfs.Array), rdflib.RDFS.subClassOf, rdflib.URIRef(ns.bsfs.Literal)), + (rdflib.URIRef(ns.bsfs.BinaryBlob), rdflib.RDFS.subClassOf, rdflib.URIRef(ns.bsfs.Literal)), (rdflib.URIRef(ns.bsfs.Feature), rdflib.RDFS.subClassOf, rdflib.URIRef(ns.bsfs.Array)), (rdflib.URIRef(ns.bsfs.Number), rdflib.RDFS.subClassOf, rdflib.URIRef(ns.bsfs.Literal)), (rdflib.URIRef(ns.bsfs.Time), rdflib.RDFS.subClassOf, rdflib.URIRef(ns.bsfs.Literal)), @@ -89,7 +102,8 @@ class TestNodes(unittest.TestCase): (rdflib.URIRef(ns.bse.filesize), rdflib.RDFS.subClassOf, rdflib.URIRef(ns.bsfs.Predicate)), (rdflib.URIRef(ns.bse.tag), rdflib.RDFS.subClassOf, rdflib.URIRef(ns.bsfs.Predicate)), (rdflib.URIRef(ns.bse.author), rdflib.RDFS.subClassOf, rdflib.URIRef(ns.bsfs.Predicate)), - (rdflib.URIRef('http://bsfs.ai/schema/Tag#representative'), rdflib.RDFS.subClassOf, rdflib.URIRef(ns.bsfs.Predicate)), + (rdflib.URIRef(bst.representative), rdflib.RDFS.subClassOf, rdflib.URIRef(ns.bsfs.Predicate)), + (rdflib.URIRef(bst.label), rdflib.RDFS.subClassOf, rdflib.URIRef(ns.bsfs.Predicate)), } # Nodes constructor args self.user = URI('http://example.com/me') @@ -100,7 +114,8 @@ class TestNodes(unittest.TestCase): self.p_filesize = self.backend.schema.predicate(ns.bse.filesize) self.p_author = self.backend.schema.predicate(ns.bse.author) self.p_tag = self.backend.schema.predicate(ns.bse.tag) - self.p_representative = self.backend.schema.predicate(URI('http://bsfs.ai/schema/Tag#representative')) + self.p_representative = self.backend.schema.predicate(bst.representative) + self.p_label = self.backend.schema.predicate(bst.label) self.t_created = self.backend.schema.predicate(ns.bsm.t_created) self.ent_ids = { URI('http://example.com/me/entity#1234'), @@ -371,6 +386,201 @@ class TestNodes(unittest.TestCase): (self.p_author.uri, Nodes(self.backend, self.user, self.user_type, {URI('http://example.com/me/user#1234'), URI('http://example.com/me/user#4321')})))) self.assertSetEqual(curr, set(self.backend._graph)) + def test_get(self): + # setup: add some instances + Nodes(self.backend, self.user, self.ent_type, {'http://example.com/me/entity#1234'}) \ + .set(ns.bse.comment, 'hello world') \ + .set(ns.bse.filesize, 1234) \ + .set(ns.bse.tag, Nodes(self.backend, self.user, self.tag_type, {'http://example.com/me/tag#1234'})) + Nodes(self.backend, self.user, self.ent_type, {'http://example.com/me/entity#4321'}) \ + .set(ns.bse.filesize, 4321) \ + .set(ns.bse.tag, Nodes(self.backend, self.user, self.tag_type, {'http://example.com/me/tag#4321'})) + Nodes(self.backend, self.user, self.tag_type, {'http://example.com/me/tag#1234'}) \ + .set(bst.label, 'tag_label_1234') + Nodes(self.backend, self.user, self.tag_type, {'http://example.com/me/tag#4321'}) \ + .set(bst.label, 'tag_label_4321') + # setup: get nodes instance + nodes = Nodes(self.backend, self.user, self.ent_type, self.ent_ids) + # must pass at least one path + self.assertRaises(AttributeError, nodes.get) + # view must be list or dict + self.assertRaises(ValueError, nodes.get, ns.bse.filesize, view='hello') + self.assertRaises(ValueError, nodes.get, ns.bse.filesize, view=1234) + self.assertRaises(ValueError, nodes.get, ns.bse.filesize, view=tuple) + # can pass path as URI + self.assertDictEqual(nodes.get(ns.bse.filesize), { + Nodes(self.backend, self.user, self.ent_type, {'http://example.com/me/entity#1234'}): 1234, + Nodes(self.backend, self.user, self.ent_type, {'http://example.com/me/entity#4321'}): 4321, + }) + # can pass path as sequence of URI + self.assertDictEqual(nodes.get((ns.bse.tag, bst.label)), { + Nodes(self.backend, self.user, self.ent_type, {'http://example.com/me/entity#1234'}): {'tag_label_1234'}, + Nodes(self.backend, self.user, self.ent_type, {'http://example.com/me/entity#4321'}): {'tag_label_4321'}, + }) + # get returns the same path that was passed + self.assertCountEqual(list(nodes.get((ns.bse.tag, bst.label), path=True, view=list)), [ + (Nodes(self.backend, self.user, self.ent_type, {'http://example.com/me/entity#1234'}), (ns.bse.tag, bst.label), 'tag_label_1234'), + (Nodes(self.backend, self.user, self.ent_type, {'http://example.com/me/entity#4321'}), (ns.bse.tag, bst.label), 'tag_label_4321'), + ]) + self.assertCountEqual(list(nodes.get([ns.bse.tag, bst.label], path=True, view=list)), [ + (Nodes(self.backend, self.user, self.ent_type, {'http://example.com/me/entity#1234'}), [ns.bse.tag, bst.label], 'tag_label_1234'), + (Nodes(self.backend, self.user, self.ent_type, {'http://example.com/me/entity#4321'}), [ns.bse.tag, bst.label], 'tag_label_4321'), + ]) + # paths must be URI or sequence thereof + self.assertRaises(TypeError, nodes.get, 1234) + self.assertRaises(TypeError, nodes.get, (ns.bse.tag, 1234)) + self.assertRaises(TypeError, nodes.get, (1234, ns.bse.tag)) + self.assertRaises(errors.ConsistencyError, nodes.get, 'hello world') + self.assertRaises(errors.ConsistencyError, nodes.get, ns.bse.invalid) + self.assertRaises(errors.ConsistencyError, nodes.get, (ns.bse.tag, bst.invalid)) + # can pass multiple paths + self.assertDictEqual(nodes.get(ns.bse.filesize, (ns.bse.tag, bst.label)), { + Nodes(self.backend, self.user, self.ent_type, {'http://example.com/me/entity#1234'}): { + ns.bse.filesize: 1234, + (ns.bse.tag, bst.label): {'tag_label_1234'}, + }, + Nodes(self.backend, self.user, self.ent_type, {'http://example.com/me/entity#4321'}): { + ns.bse.filesize: 4321, + (ns.bse.tag, bst.label): {'tag_label_4321'}, + }, + }) + # get respects view + self.assertDictEqual(nodes.get(ns.bse.filesize, view=dict), { + Nodes(self.backend, self.user, self.ent_type, {'http://example.com/me/entity#1234'}): 1234, + Nodes(self.backend, self.user, self.ent_type, {'http://example.com/me/entity#4321'}): 4321, + }) + self.assertSetEqual(set(nodes.get(ns.bse.filesize, view=list)), { + (Nodes(self.backend, self.user, self.ent_type, {'http://example.com/me/entity#1234'}), 1234), + (Nodes(self.backend, self.user, self.ent_type, {'http://example.com/me/entity#4321'}), 4321), + }) + # get returns Nodes instance when fetching a node + self.assertDictEqual(nodes.get(ns.bse.tag), { + Nodes(self.backend, self.user, self.ent_type, {'http://example.com/me/entity#1234'}): + {Nodes(self.backend, self.user, self.tag_type, {'http://example.com/me/tag#1234'})}, + Nodes(self.backend, self.user, self.ent_type, {'http://example.com/me/entity#4321'}): + {Nodes(self.backend, self.user, self.tag_type, {'http://example.com/me/tag#4321'})}, + }) + # get returns a value when fetching a value and omits missing values + self.assertDictEqual(nodes.get(ns.bse.comment), { + Nodes(self.backend, self.user, self.ent_type, {'http://example.com/me/entity#1234'}): {'hello world'}, + }) + + # FIXME: What if I call `get` with a single predicate and a single node, but + # that node has no value for that predicate? + # so, essentially, what if triples is empty? -> Also check in test_result! + raise NotImplementedError() + + def test_getattr(self): + nodes = Nodes(self.backend, self.user, self.ent_type, {'http://example.com/me/entity#1234'}) + # can get walks to values + self.assertEqual(nodes.filesize, Walk(nodes, (self.p_filesize, ))) + # can get walks to nodes + self.assertEqual(nodes.tag, Walk(nodes, (self.p_tag, ))) + # can do multiple hops + self.assertEqual(nodes.tag.label, Walk(nodes, (self.p_tag, self.p_label))) + # invalid step raises an error + self.assertRaises(ValueError, getattr, nodes, 'foobar') + + def test_schema(self): + self.assertEqual(Nodes(self.backend, self.user, self.ent_type, + {URI('http://example.com/me/entity#1234')}).schema, self.backend.schema) + + def test_operators(self): # __add__, __or__, __sub__, __and__ + gen = partial(Nodes, self.backend, self.user, self.ent_type) + nodes = gen({URI('http://example.com/me/entity#1234')}) + # add/or concatenates guids + self.assertEqual( + gen({URI('http://example.com/me/entity#1234')}) + + gen({URI('http://example.com/me/entity#4321')}), + # target + gen({ + URI('http://example.com/me/entity#1234'), + URI('http://example.com/me/entity#4321')})) + self.assertEqual( + gen({URI('http://example.com/me/entity#1234')}) | + gen({URI('http://example.com/me/entity#4321')}), + # target + gen({ + URI('http://example.com/me/entity#1234'), + URI('http://example.com/me/entity#4321')})) + # repeated guids are ignored + self.assertEqual( + gen({URI('http://example.com/me/entity#1234')}) + + gen({URI('http://example.com/me/entity#1234')}), + # target + gen({URI('http://example.com/me/entity#1234')})) + self.assertEqual( + gen({URI('http://example.com/me/entity#1234')}) | + gen({URI('http://example.com/me/entity#1234')}), + # target + gen({URI('http://example.com/me/entity#1234')})) + + # sub substracts guids + self.assertEqual( + gen({URI('http://example.com/me/entity#1234'), + URI('http://example.com/me/entity#4321')}) - + gen({URI('http://example.com/me/entity#4321')}), + # target + gen({URI('http://example.com/me/entity#1234')})) + # missing guids are ignored + self.assertEqual( + gen({URI('http://example.com/me/entity#1234')}) - + gen({URI('http://example.com/me/entity#4321')}), + # target + gen({URI('http://example.com/me/entity#1234')})) + + # and intersects guids + self.assertEqual( + gen({URI('http://example.com/me/entity#1234'), + URI('http://example.com/me/entity#4321')}) & + gen({URI('http://example.com/me/entity#4321'), + URI('http://example.com/me/entity#5678')}), + # target + gen({URI('http://example.com/me/entity#4321')})) + + for op in (operator.add, operator.or_, operator.sub, operator.and_): + # type must match + self.assertRaises(TypeError, op, nodes, 1234) + self.assertRaises(TypeError, op, nodes, 'hello world') + # backend must match + self.assertRaises(ValueError, op, nodes, + Nodes(None, self.user, self.ent_type, {URI('http://example.com/me/entity#1234')})) + # user must match + self.assertRaises(ValueError, op, nodes, + Nodes(self.backend, '', self.ent_type, {URI('http://example.com/me/entity#1234')})) + # node type must match + self.assertRaises(ValueError, op, nodes, + Nodes(self.backend, self.user, self.tag_type, {URI('http://example.com/me/entity#1234')})) + + def test_len(self): + self.assertEqual(1, len(Nodes(self.backend, self.user, self.ent_type, { + URI('http://example.com/me/entity#1234'), + }))) + self.assertEqual(2, len(Nodes(self.backend, self.user, self.ent_type, { + URI('http://example.com/me/entity#1234'), + URI('http://example.com/me/entity#4321'), + }))) + self.assertEqual(4, len(Nodes(self.backend, self.user, self.ent_type, { + URI('http://example.com/me/entity#1234'), + URI('http://example.com/me/entity#4321'), + URI('http://example.com/me/entity#5678'), + URI('http://example.com/me/entity#8765'), + }))) + + def test_iter(self): # __iter__ + gen = partial(Nodes, self.backend, self.user, self.ent_type) + self.assertSetEqual(set(Nodes(self.backend, self.user, self.ent_type, { + URI('http://example.com/me/entity#1234'), + URI('http://example.com/me/entity#4321'), + URI('http://example.com/me/entity#5678'), + URI('http://example.com/me/entity#8765'), + })), { + gen({URI('http://example.com/me/entity#1234')}), + gen({URI('http://example.com/me/entity#4321')}), + gen({URI('http://example.com/me/entity#5678')}), + gen({URI('http://example.com/me/entity#8765')}), + }) + ## main ## diff --git a/test/graph/test_result.py b/test/graph/test_result.py new file mode 100644 index 0000000..749b8ad --- /dev/null +++ b/test/graph/test_result.py @@ -0,0 +1,432 @@ +""" + +Part of the bsfs test suite. +A copy of the license is provided with the project. +Author: Matthias Baumgartner, 2022 +""" +# imports +import unittest + +# bsie imports +from bsfs import schema as bsc +from bsfs.namespace import ns +from bsfs.utils import URI + +# objects to test +from bsfs.graph.result import to_list_view, to_dict_view + + +## code ## + +class TestListView(unittest.TestCase): + def setUp(self): + self.triples_111 = [('ent#1234', ns.bse.iso, 123)] + self.triples_11U = [('ent#1234', ns.bse.tag, 'tag#1234'), + ('ent#1234', ns.bse.tag, 'tag#5678')] + self.triples_1M1 = [('ent#1234', ns.bse.iso, 123), + ('ent#1234', ns.bse.t_created, '2010-01-02')] + self.triples_1MU = [('ent#1234', ns.bse.iso, 123), + ('ent#1234', ns.bse.tag, 'tag#1234'), + ('ent#1234', ns.bse.tag, 'tag#5678')] + self.triples_N11 = [('ent#1234', ns.bse.iso, 123), + ('ent#4321', ns.bse.iso, 321)] + self.triples_N1U = [('ent#1234', ns.bse.tag, 'tag#1234'), + ('ent#1234', ns.bse.tag, 'tag#5678'), + ('ent#4321', ns.bse.tag, 'tag#4321')] + self.triples_NM1 = [('ent#1234', ns.bse.iso, 123), + ('ent#1234', ns.bse.t_created, '2010-01-02'), + ('ent#4321', ns.bse.iso, 321), + ('ent#4321', ns.bse.t_created, '2022-02-22')] + self.triples_NMU = [('ent#1234', ns.bse.iso, 123), + ('ent#1234', ns.bse.tag, 'tag#1234'), + ('ent#1234', ns.bse.tag, 'tag#5678'), + ('ent#4321', ns.bse.iso, 321), + ('ent#4321', ns.bse.t_created, '2022-02-22')] + + def test_copy(self): + # iterator yields tuples + self.assertIsInstance(list(to_list_view([('subject', 'predicate', 'object')], node=True, path=True, value=True))[0], tuple) + # components are not changed + class Foo(): pass + foo = Foo() + self.assertListEqual(list(to_list_view([('subject', 'predicate', 'object')], node=True, path=True, value=True)), + [('subject', 'predicate', 'object')]) + self.assertListEqual(list(to_list_view([(foo, 'predicate', 'object')], node=True, path=True, value=True)), + [(foo, 'predicate', 'object')]) + self.assertListEqual(list(to_list_view([('subject', foo, 'object')], node=True, path=True, value=True)), + [('subject', foo, 'object')]) + self.assertListEqual(list(to_list_view([('subject', 'predicate', foo)], node=True, path=True, value=True)), + [('subject', 'predicate', foo)]) + + def test_agg_none(self): + self.assertListEqual(list(to_list_view(self.triples_111, node=True, path=True, value=True)), self.triples_111) + self.assertListEqual(list(to_list_view(self.triples_11U, node=True, path=True, value=True)), self.triples_11U) + self.assertListEqual(list(to_list_view(self.triples_1M1, node=True, path=True, value=True)), self.triples_1M1) + self.assertListEqual(list(to_list_view(self.triples_1MU, node=True, path=True, value=True)), self.triples_1MU) + self.assertListEqual(list(to_list_view(self.triples_N11, node=True, path=True, value=True)), self.triples_N11) + self.assertListEqual(list(to_list_view(self.triples_N1U, node=True, path=True, value=True)), self.triples_N1U) + self.assertListEqual(list(to_list_view(self.triples_NM1, node=True, path=True, value=True)), self.triples_NM1) + self.assertListEqual(list(to_list_view(self.triples_NMU, node=True, path=True, value=True)), self.triples_NMU) + + def test_agg_node(self): + self.assertListEqual(list(to_list_view(self.triples_111, node=False, path=True, value=True)), + [(ns.bse.iso, 123)]) + self.assertListEqual(list(to_list_view(self.triples_11U, node=False, path=True, value=True)), + [(ns.bse.tag, 'tag#1234'), (ns.bse.tag, 'tag#5678')]) + self.assertListEqual(list(to_list_view(self.triples_1M1, node=False, path=True, value=True)), + [(ns.bse.iso, 123), (ns.bse.t_created, '2010-01-02')]) + self.assertListEqual(list(to_list_view(self.triples_1MU, node=False, path=True, value=True)), + [(ns.bse.iso, 123), (ns.bse.tag, 'tag#1234'), (ns.bse.tag, 'tag#5678')]) + self.assertListEqual(list(to_list_view(self.triples_N11, node=False, path=True, value=True)), + [(ns.bse.iso, 123), (ns.bse.iso, 321)]) + self.assertListEqual(list(to_list_view(self.triples_N1U, node=False, path=True, value=True)), + [(ns.bse.tag, 'tag#1234'), (ns.bse.tag, 'tag#5678'), (ns.bse.tag, 'tag#4321')]) + self.assertListEqual(list(to_list_view(self.triples_NM1, node=False, path=True, value=True)), + [(ns.bse.iso, 123), (ns.bse.t_created, '2010-01-02'), (ns.bse.iso, 321), (ns.bse.t_created, '2022-02-22')]) + self.assertListEqual(list(to_list_view(self.triples_NMU, node=False, path=True, value=True)), + [(ns.bse.iso, 123), (ns.bse.tag, 'tag#1234'), (ns.bse.tag, 'tag#5678'), (ns.bse.iso, 321), (ns.bse.t_created, '2022-02-22')]) + + def test_agg_path(self): + self.assertListEqual(list(to_list_view(self.triples_111, node=True, path=False, value=True)), + [('ent#1234', 123)]) + self.assertListEqual(list(to_list_view(self.triples_11U, node=True, path=False, value=True)), + [('ent#1234', 'tag#1234'), ('ent#1234', 'tag#5678')]) + self.assertListEqual(list(to_list_view(self.triples_1M1, node=True, path=False, value=True)), + [('ent#1234', 123), ('ent#1234', '2010-01-02')]) + self.assertListEqual(list(to_list_view(self.triples_1MU, node=True, path=False, value=True)), + [('ent#1234', 123), ('ent#1234', 'tag#1234'), ('ent#1234', 'tag#5678')]) + self.assertListEqual(list(to_list_view(self.triples_N11, node=True, path=False, value=True)), + [('ent#1234', 123), ('ent#4321', 321)]) + self.assertListEqual(list(to_list_view(self.triples_N1U, node=True, path=False, value=True)), + [('ent#1234', 'tag#1234'), ('ent#1234', 'tag#5678'), ('ent#4321', 'tag#4321')]) + self.assertListEqual(list(to_list_view(self.triples_NM1, node=True, path=False, value=True)), + [('ent#1234', 123), ('ent#1234', '2010-01-02'), ('ent#4321', 321), ('ent#4321', '2022-02-22')]) + self.assertListEqual(list(to_list_view(self.triples_NMU, node=True, path=False, value=True)), + [('ent#1234', 123), ('ent#1234', 'tag#1234'), ('ent#1234', 'tag#5678'), ('ent#4321', 321), ('ent#4321', '2022-02-22')]) + + def test_agg_node_path(self): + self.assertListEqual(list(to_list_view(self.triples_111, node=False, path=False, value=True)), + [123]) + self.assertListEqual(list(to_list_view(self.triples_11U, node=False, path=False, value=True)), + ['tag#1234', 'tag#5678']) + self.assertListEqual(list(to_list_view(self.triples_1M1, node=False, path=False, value=True)), + [123, '2010-01-02']) + self.assertListEqual(list(to_list_view(self.triples_1MU, node=False, path=False, value=True)), + [123, 'tag#1234', 'tag#5678']) + self.assertListEqual(list(to_list_view(self.triples_N11, node=False, path=False, value=True)), + [123, 321]) + self.assertListEqual(list(to_list_view(self.triples_N1U, node=False, path=False, value=True)), + ['tag#1234', 'tag#5678', 'tag#4321']) + self.assertListEqual(list(to_list_view(self.triples_NM1, node=False, path=False, value=True)), + [123, '2010-01-02', 321, '2022-02-22']) + self.assertListEqual(list(to_list_view(self.triples_NMU, node=False, path=False, value=True)), + [123, 'tag#1234', 'tag#5678', 321, '2022-02-22']) + + def test_agg_value(self): + # value flag has no effect + self.assertListEqual(list(to_list_view(self.triples_111, node=True, path=True, value=True)), self.triples_111) + self.assertListEqual(list(to_list_view(self.triples_11U, node=True, path=True, value=True)), self.triples_11U) + self.assertListEqual(list(to_list_view(self.triples_1M1, node=True, path=True, value=True)), self.triples_1M1) + self.assertListEqual(list(to_list_view(self.triples_1MU, node=True, path=True, value=True)), self.triples_1MU) + self.assertListEqual(list(to_list_view(self.triples_N11, node=True, path=True, value=True)), self.triples_N11) + self.assertListEqual(list(to_list_view(self.triples_N1U, node=True, path=True, value=True)), self.triples_N1U) + self.assertListEqual(list(to_list_view(self.triples_NM1, node=True, path=True, value=True)), self.triples_NM1) + self.assertListEqual(list(to_list_view(self.triples_NMU, node=True, path=True, value=True)), self.triples_NMU) + + def test_agg_node_value(self): + # value flag has no effect -> same test as test_agg_node + self.assertListEqual(list(to_list_view(self.triples_111, node=False, path=True, value=False)), + [(ns.bse.iso, 123)]) + self.assertListEqual(list(to_list_view(self.triples_11U, node=False, path=True, value=False)), + [(ns.bse.tag, 'tag#1234'), (ns.bse.tag, 'tag#5678')]) + self.assertListEqual(list(to_list_view(self.triples_1M1, node=False, path=True, value=False)), + [(ns.bse.iso, 123), (ns.bse.t_created, '2010-01-02')]) + self.assertListEqual(list(to_list_view(self.triples_1MU, node=False, path=True, value=False)), + [(ns.bse.iso, 123), (ns.bse.tag, 'tag#1234'), (ns.bse.tag, 'tag#5678')]) + self.assertListEqual(list(to_list_view(self.triples_N11, node=False, path=True, value=False)), + [(ns.bse.iso, 123), (ns.bse.iso, 321)]) + self.assertListEqual(list(to_list_view(self.triples_N1U, node=False, path=True, value=False)), + [(ns.bse.tag, 'tag#1234'), (ns.bse.tag, 'tag#5678'), (ns.bse.tag, 'tag#4321')]) + self.assertListEqual(list(to_list_view(self.triples_NM1, node=False, path=True, value=False)), + [(ns.bse.iso, 123), (ns.bse.t_created, '2010-01-02'), (ns.bse.iso, 321), (ns.bse.t_created, '2022-02-22')]) + self.assertListEqual(list(to_list_view(self.triples_NMU, node=False, path=True, value=False)), + [(ns.bse.iso, 123), (ns.bse.tag, 'tag#1234'), (ns.bse.tag, 'tag#5678'), (ns.bse.iso, 321), (ns.bse.t_created, '2022-02-22')]) + + def test_agg_path_value(self): + # value flag has no effect -> same test as test_agg_path + self.assertListEqual(list(to_list_view(self.triples_111, node=True, path=False, value=False)), + [('ent#1234', 123)]) + self.assertListEqual(list(to_list_view(self.triples_11U, node=True, path=False, value=False)), + [('ent#1234', 'tag#1234'), ('ent#1234', 'tag#5678')]) + self.assertListEqual(list(to_list_view(self.triples_1M1, node=True, path=False, value=False)), + [('ent#1234', 123), ('ent#1234', '2010-01-02')]) + self.assertListEqual(list(to_list_view(self.triples_1MU, node=True, path=False, value=False)), + [('ent#1234', 123), ('ent#1234', 'tag#1234'), ('ent#1234', 'tag#5678')]) + self.assertListEqual(list(to_list_view(self.triples_N11, node=True, path=False, value=False)), + [('ent#1234', 123), ('ent#4321', 321)]) + self.assertListEqual(list(to_list_view(self.triples_N1U, node=True, path=False, value=False)), + [('ent#1234', 'tag#1234'), ('ent#1234', 'tag#5678'), ('ent#4321', 'tag#4321')]) + self.assertListEqual(list(to_list_view(self.triples_NM1, node=True, path=False, value=False)), + [('ent#1234', 123), ('ent#1234', '2010-01-02'), ('ent#4321', 321), ('ent#4321', '2022-02-22')]) + self.assertListEqual(list(to_list_view(self.triples_NMU, node=True, path=False, value=False)), + [('ent#1234', 123), ('ent#1234', 'tag#1234'), ('ent#1234', 'tag#5678'), ('ent#4321', 321), ('ent#4321', '2022-02-22')]) + + def test_agg_all(self): + # value flag has no effect -> same test as test_agg_node_path + self.assertListEqual(list(to_list_view(self.triples_111, node=False, path=False, value=False)), + [123]) + self.assertListEqual(list(to_list_view(self.triples_11U, node=False, path=False, value=False)), + ['tag#1234', 'tag#5678']) + self.assertListEqual(list(to_list_view(self.triples_1M1, node=False, path=False, value=False)), + [123, '2010-01-02']) + self.assertListEqual(list(to_list_view(self.triples_1MU, node=False, path=False, value=False)), + [123, 'tag#1234', 'tag#5678']) + self.assertListEqual(list(to_list_view(self.triples_N11, node=False, path=False, value=False)), + [123, 321]) + self.assertListEqual(list(to_list_view(self.triples_N1U, node=False, path=False, value=False)), + ['tag#1234', 'tag#5678', 'tag#4321']) + self.assertListEqual(list(to_list_view(self.triples_NM1, node=False, path=False, value=False)), + [123, '2010-01-02', 321, '2022-02-22']) + self.assertListEqual(list(to_list_view(self.triples_NMU, node=False, path=False, value=False)), + [123, 'tag#1234', 'tag#5678', 321, '2022-02-22']) + + +class TestDictView(unittest.TestCase): + def setUp(self): + self.unique_paths = {ns.bse.iso, ns.bse.t_created} + self.triples_111 = [('ent#1234', ns.bse.iso, 123)] + self.triples_11U = [('ent#1234', ns.bse.tag, 'tag#1234'), + ('ent#1234', ns.bse.tag, 'tag#5678')] + self.triples_1M1 = [('ent#1234', ns.bse.iso, 123), + ('ent#1234', ns.bse.t_created, '2010-01-02')] + self.triples_1MU = [('ent#1234', ns.bse.iso, 123), + ('ent#1234', ns.bse.tag, 'tag#1234'), + ('ent#1234', ns.bse.tag, 'tag#5678')] + self.triples_N11 = [('ent#1234', ns.bse.iso, 123), + ('ent#4321', ns.bse.iso, 321)] + self.triples_N1U = [('ent#1234', ns.bse.tag, 'tag#1234'), + ('ent#1234', ns.bse.tag, 'tag#5678'), + ('ent#4321', ns.bse.tag, 'tag#4321')] + self.triples_NM1 = [('ent#1234', ns.bse.iso, 123), + ('ent#1234', ns.bse.t_created, '2010-01-02'), + ('ent#4321', ns.bse.iso, 321), + ('ent#4321', ns.bse.t_created, '2022-02-22')] + self.triples_NMU = [('ent#1234', ns.bse.iso, 123), + ('ent#1234', ns.bse.tag, 'tag#1234'), + ('ent#1234', ns.bse.tag, 'tag#5678'), + ('ent#4321', ns.bse.iso, 321), + ('ent#4321', ns.bse.t_created, '2022-02-22')] + + def test_errounous_call(self): + # return set instead of value + self.assertSetEqual(to_dict_view(self.triples_111, one_node=False, one_path=True, unique_paths=self.unique_paths, node=False, path=False, value=False), + {123}) + self.assertSetEqual(to_dict_view(self.triples_111, one_node=True, one_path=False, unique_paths=self.unique_paths, node=False, path=False, value=False), + {123}) + # one_node mismatch: return set of values instead of value + self.assertDictEqual(to_dict_view(self.triples_111, one_node=False, one_path=True, unique_paths=self.unique_paths, node=False, path=True, value=False), + {ns.bse.iso: {123}}) + # one_path mismatch: return set of values instead of value + self.assertDictEqual(to_dict_view(self.triples_111, one_node=True, one_path=False, unique_paths=self.unique_paths, node=True, path=False, value=False), + {'ent#1234': {123}}) + # unique_paths mismatch: return set of values instead of value + self.assertSetEqual(to_dict_view(self.triples_111, one_node=True, one_path=False, unique_paths={}, node=False, path=False, value=False), + {123}) + self.assertDictEqual(to_dict_view(self.triples_111, one_node=True, one_path=False, unique_paths={}, node=False, path=True, value=False), + {ns.bse.iso: {123}}) + self.assertDictEqual(to_dict_view(self.triples_111, one_node=True, one_path=False, unique_paths={}, node=True, path=False, value=False), + {'ent#1234': {123}}) + self.assertDictEqual(to_dict_view(self.triples_111, one_node=True, one_path=False, unique_paths={}, node=True, path=True, value=False), + {'ent#1234': {ns.bse.iso: {123}}}) + + def test_agg_none(self): + self.assertDictEqual(to_dict_view(self.triples_111, one_node=True, one_path=True, unique_paths=self.unique_paths, node=True, path=True, value=True), + {'ent#1234': {ns.bse.iso: {123}}}) + self.assertDictEqual(to_dict_view(self.triples_11U, one_node=True, one_path=True, unique_paths=self.unique_paths, node=True, path=True, value=True), + {'ent#1234': {ns.bse.tag: {'tag#1234', 'tag#5678'}}}) + self.assertDictEqual(to_dict_view(self.triples_1M1, one_node=True, one_path=False, unique_paths=self.unique_paths, node=True, path=True, value=True), + {'ent#1234': {ns.bse.iso: {123}, ns.bse.t_created: {'2010-01-02'}}}) + self.assertDictEqual(to_dict_view(self.triples_1MU, one_node=True, one_path=False, unique_paths=self.unique_paths, node=True, path=True, value=True), + {'ent#1234': {ns.bse.iso: {123}, ns.bse.tag: {'tag#1234', 'tag#5678'}}}) + self.assertDictEqual(to_dict_view(self.triples_N11, one_node=False, one_path=True, unique_paths=self.unique_paths, node=True, path=True, value=True), + {'ent#1234': {ns.bse.iso: {123}}, 'ent#4321': {ns.bse.iso: {321}}}) + self.assertDictEqual(to_dict_view(self.triples_N1U, one_node=False, one_path=True, unique_paths=self.unique_paths, node=True, path=True, value=True), + {'ent#1234': {ns.bse.tag: {'tag#1234', 'tag#5678'}}, 'ent#4321': {ns.bse.tag: {'tag#4321'}}}) + self.assertDictEqual(to_dict_view(self.triples_NM1, one_node=False, one_path=False, unique_paths=self.unique_paths, node=True, path=True, value=True), + {'ent#1234': {ns.bse.iso: {123}, ns.bse.t_created: {'2010-01-02'}}, 'ent#4321': {ns.bse.iso: {321}, ns.bse.t_created: {'2022-02-22'}}}) + self.assertDictEqual(to_dict_view(self.triples_NMU, one_node=False, one_path=False, unique_paths=self.unique_paths, node=True, path=True, value=True), + {'ent#1234': {ns.bse.iso: {123}, ns.bse.tag: {'tag#1234', 'tag#5678'}}, 'ent#4321': {ns.bse.iso: {321}, ns.bse.t_created: {'2022-02-22'}}}) + # empty + self.assertDictEqual(to_dict_view([], one_node=True, one_path=True, unique_paths=self.unique_paths, node=True, path=True, value=True), {}) + self.assertDictEqual(to_dict_view([], one_node=True, one_path=False, unique_paths=self.unique_paths, node=True, path=True, value=True), {}) + self.assertDictEqual(to_dict_view([], one_node=False, one_path=True, unique_paths=self.unique_paths, node=True, path=True, value=True), {}) + self.assertDictEqual(to_dict_view([], one_node=False, one_path=False, unique_paths=self.unique_paths, node=True, path=True, value=True), {}) + + def test_agg_node(self): + self.assertDictEqual(to_dict_view(self.triples_111, one_node=True, one_path=True, unique_paths=self.unique_paths, node=False, path=True, value=True), + {ns.bse.iso: {123}}) + self.assertDictEqual(to_dict_view(self.triples_11U, one_node=True, one_path=True, unique_paths=self.unique_paths, node=False, path=True, value=True), + {ns.bse.tag: {'tag#1234', 'tag#5678'}}) + self.assertDictEqual(to_dict_view(self.triples_1M1, one_node=True, one_path=False, unique_paths=self.unique_paths, node=False, path=True, value=True), + {ns.bse.iso: {123}, ns.bse.t_created: {'2010-01-02'}}) + self.assertDictEqual(to_dict_view(self.triples_1MU, one_node=True, one_path=False, unique_paths=self.unique_paths, node=False, path=True, value=True), + {ns.bse.iso: {123}, ns.bse.tag: {'tag#1234', 'tag#5678'}}) + self.assertDictEqual(to_dict_view(self.triples_N11, one_node=False, one_path=True, unique_paths=self.unique_paths, node=False, path=True, value=True), + {ns.bse.iso: {123, 321}}) + self.assertDictEqual(to_dict_view(self.triples_N1U, one_node=False, one_path=True, unique_paths=self.unique_paths, node=False, path=True, value=True), + {ns.bse.tag: {'tag#1234', 'tag#5678', 'tag#4321'}}) + self.assertDictEqual(to_dict_view(self.triples_NM1, one_node=False, one_path=False, unique_paths=self.unique_paths, node=False, path=True, value=True), + {ns.bse.iso: {123, 321}, ns.bse.t_created: {'2010-01-02', '2022-02-22'}}) + self.assertDictEqual(to_dict_view(self.triples_NMU, one_node=False, one_path=False, unique_paths=self.unique_paths, node=False, path=True, value=True), + {ns.bse.iso: {123, 321}, ns.bse.tag: {'tag#1234', 'tag#5678'}, ns.bse.t_created: {'2022-02-22'}}) + # empty + self.assertDictEqual(to_dict_view([], one_node=True, one_path=True, unique_paths=self.unique_paths, node=False, path=True, value=True), {}) + self.assertDictEqual(to_dict_view([], one_node=True, one_path=False, unique_paths=self.unique_paths, node=False, path=True, value=True), {}) + self.assertDictEqual(to_dict_view([], one_node=False, one_path=True, unique_paths=self.unique_paths, node=False, path=True, value=True), {}) + self.assertDictEqual(to_dict_view([], one_node=False, one_path=False, unique_paths=self.unique_paths, node=False, path=True, value=True), {}) + + def test_agg_path(self): + self.assertDictEqual(to_dict_view(self.triples_111, one_node=True, one_path=True, unique_paths=self.unique_paths, node=True, path=False, value=True), + {'ent#1234': {123}}) + self.assertDictEqual(to_dict_view(self.triples_11U, one_node=True, one_path=True, unique_paths=self.unique_paths, node=True, path=False, value=True), + {'ent#1234': {'tag#1234', 'tag#5678'}}) + self.assertDictEqual(to_dict_view(self.triples_1M1, one_node=True, one_path=False, unique_paths=self.unique_paths, node=True, path=False, value=True), + {'ent#1234': {123, '2010-01-02'}}) + self.assertDictEqual(to_dict_view(self.triples_1MU, one_node=True, one_path=False, unique_paths=self.unique_paths, node=True, path=False, value=True), + {'ent#1234': {123, 'tag#1234', 'tag#5678'}}) + self.assertDictEqual(to_dict_view(self.triples_N11, one_node=False, one_path=True, unique_paths=self.unique_paths, node=True, path=False, value=True), + {'ent#1234': {123}, 'ent#4321': {321}}) + self.assertDictEqual(to_dict_view(self.triples_N1U, one_node=False, one_path=True, unique_paths=self.unique_paths, node=True, path=False, value=True), + {'ent#1234': {'tag#1234', 'tag#5678'}, 'ent#4321': {'tag#4321'}}) + self.assertDictEqual(to_dict_view(self.triples_NM1, one_node=False, one_path=False, unique_paths=self.unique_paths, node=True, path=False, value=True), + {'ent#1234': {123, '2010-01-02'}, 'ent#4321': {321, '2022-02-22'}}) + self.assertDictEqual(to_dict_view(self.triples_NMU, one_node=False, one_path=False, unique_paths=self.unique_paths, node=True, path=False, value=True), + {'ent#1234': {123, 'tag#1234', 'tag#5678'}, 'ent#4321': {321, '2022-02-22'}}) + # empty + self.assertDictEqual(to_dict_view([], one_node=True, one_path=True, unique_paths=self.unique_paths, node=True, path=False, value=True), {}) + self.assertDictEqual(to_dict_view([], one_node=True, one_path=False, unique_paths=self.unique_paths, node=True, path=False, value=True), {}) + self.assertDictEqual(to_dict_view([], one_node=False, one_path=True, unique_paths=self.unique_paths, node=True, path=False, value=True), {}) + self.assertDictEqual(to_dict_view([], one_node=False, one_path=False, unique_paths=self.unique_paths, node=True, path=False, value=True), {}) + + def test_agg_node_path(self): + self.assertSetEqual(to_dict_view(self.triples_111, one_node=True, one_path=True, unique_paths=self.unique_paths, node=False, path=False, value=True), + {123}) + self.assertSetEqual(to_dict_view(self.triples_11U, one_node=True, one_path=True, unique_paths=self.unique_paths, node=False, path=False, value=True), + {'tag#1234', 'tag#5678'}) + self.assertSetEqual(to_dict_view(self.triples_1M1, one_node=True, one_path=False, unique_paths=self.unique_paths, node=False, path=False, value=True), + {123, '2010-01-02'}) + self.assertSetEqual(to_dict_view(self.triples_1MU, one_node=True, one_path=False, unique_paths=self.unique_paths, node=False, path=False, value=True), + {123, 'tag#1234', 'tag#5678'}) + self.assertSetEqual(to_dict_view(self.triples_N11, one_node=False, one_path=True, unique_paths=self.unique_paths, node=False, path=False, value=True), + {123, 321}) + self.assertSetEqual(to_dict_view(self.triples_N1U, one_node=False, one_path=True, unique_paths=self.unique_paths, node=False, path=False, value=True), + {'tag#1234', 'tag#5678', 'tag#4321'}) + self.assertSetEqual(to_dict_view(self.triples_NM1, one_node=False, one_path=False, unique_paths=self.unique_paths, node=False, path=False, value=True), + {123, '2010-01-02', 321, '2022-02-22'}) + self.assertSetEqual(to_dict_view(self.triples_NMU, one_node=False, one_path=False, unique_paths=self.unique_paths, node=False, path=False, value=True), + {123, 'tag#1234', 'tag#5678', 321, '2022-02-22'}) + # empty + self.assertSetEqual(to_dict_view([], one_node=True, one_path=True, unique_paths=self.unique_paths, node=False, path=False, value=True), set()) + self.assertSetEqual(to_dict_view([], one_node=True, one_path=False, unique_paths=self.unique_paths, node=False, path=False, value=True), set()) + self.assertSetEqual(to_dict_view([], one_node=False, one_path=True, unique_paths=self.unique_paths, node=False, path=False, value=True), set()) + self.assertSetEqual(to_dict_view([], one_node=False, one_path=False, unique_paths=self.unique_paths, node=False, path=False, value=True), set()) + + def test_agg_value(self): + self.assertDictEqual(to_dict_view(self.triples_111, one_node=True, one_path=True, unique_paths=self.unique_paths, node=True, path=True, value=False), + {'ent#1234': {ns.bse.iso: 123}}) + self.assertDictEqual(to_dict_view(self.triples_11U, one_node=True, one_path=True, unique_paths=self.unique_paths, node=True, path=True, value=False), + {'ent#1234': {ns.bse.tag: {'tag#1234', 'tag#5678'}}}) + self.assertDictEqual(to_dict_view(self.triples_1M1, one_node=True, one_path=False, unique_paths=self.unique_paths, node=True, path=True, value=False), + {'ent#1234': {ns.bse.iso: 123, ns.bse.t_created: '2010-01-02'}}) + self.assertDictEqual(to_dict_view(self.triples_1MU, one_node=True, one_path=False, unique_paths=self.unique_paths, node=True, path=True, value=False), + {'ent#1234': {ns.bse.iso: 123, ns.bse.tag: {'tag#1234', 'tag#5678'}}}) + self.assertDictEqual(to_dict_view(self.triples_N11, one_node=False, one_path=True, unique_paths=self.unique_paths, node=True, path=True, value=False), + {'ent#1234': {ns.bse.iso: 123}, 'ent#4321': {ns.bse.iso: 321}}) + self.assertDictEqual(to_dict_view(self.triples_N1U, one_node=False, one_path=True, unique_paths=self.unique_paths, node=True, path=True, value=False), + {'ent#1234': {ns.bse.tag: {'tag#1234', 'tag#5678'}}, 'ent#4321': {ns.bse.tag: {'tag#4321'}}}) + self.assertDictEqual(to_dict_view(self.triples_NM1, one_node=False, one_path=False, unique_paths=self.unique_paths, node=True, path=True, value=False), + {'ent#1234': {ns.bse.iso: 123, ns.bse.t_created: '2010-01-02'}, 'ent#4321': {ns.bse.iso: 321, ns.bse.t_created: '2022-02-22'}}) + self.assertDictEqual(to_dict_view(self.triples_NMU, one_node=False, one_path=False, unique_paths=self.unique_paths, node=True, path=True, value=False), + {'ent#1234': {ns.bse.iso: 123, ns.bse.tag: {'tag#1234', 'tag#5678'}}, 'ent#4321': {ns.bse.iso: 321, ns.bse.t_created: '2022-02-22'}}) + # empty + self.assertDictEqual(to_dict_view([], one_node=True, one_path=True, unique_paths=self.unique_paths, node=True, path=True, value=False), {}) + self.assertDictEqual(to_dict_view([], one_node=True, one_path=False, unique_paths=self.unique_paths, node=True, path=True, value=False), {}) + self.assertDictEqual(to_dict_view([], one_node=False, one_path=True, unique_paths=self.unique_paths, node=True, path=True, value=False), {}) + self.assertDictEqual(to_dict_view([], one_node=False, one_path=False, unique_paths=self.unique_paths, node=True, path=True, value=False), {}) + + def test_agg_node_value(self): + self.assertDictEqual(to_dict_view(self.triples_111, one_node=True, one_path=True, unique_paths=self.unique_paths, node=False, path=True, value=False), + {ns.bse.iso: 123}) + self.assertDictEqual(to_dict_view(self.triples_11U, one_node=True, one_path=True, unique_paths=self.unique_paths, node=False, path=True, value=False), + {ns.bse.tag: {'tag#1234', 'tag#5678'}}) + self.assertDictEqual(to_dict_view(self.triples_1M1, one_node=True, one_path=False, unique_paths=self.unique_paths, node=False, path=True, value=False), + {ns.bse.iso: 123, ns.bse.t_created: '2010-01-02'}) + self.assertDictEqual(to_dict_view(self.triples_1MU, one_node=True, one_path=False, unique_paths=self.unique_paths, node=False, path=True, value=False), + {ns.bse.iso: 123, ns.bse.tag: {'tag#1234', 'tag#5678'}}) + self.assertDictEqual(to_dict_view(self.triples_N11, one_node=False, one_path=True, unique_paths=self.unique_paths, node=False, path=True, value=False), + {ns.bse.iso: {123, 321}}) + self.assertDictEqual(to_dict_view(self.triples_N1U, one_node=False, one_path=True, unique_paths=self.unique_paths, node=False, path=True, value=False), + {ns.bse.tag: {'tag#1234', 'tag#5678', 'tag#4321'}}) + self.assertDictEqual(to_dict_view(self.triples_NM1, one_node=False, one_path=False, unique_paths=self.unique_paths, node=False, path=True, value=False), + {ns.bse.iso: {123, 321}, ns.bse.t_created: {'2010-01-02', '2022-02-22'}}) + self.assertDictEqual(to_dict_view(self.triples_NMU, one_node=False, one_path=False, unique_paths=self.unique_paths, node=False, path=True, value=False), + {ns.bse.iso: {123, 321}, ns.bse.tag: {'tag#1234', 'tag#5678'}, ns.bse.t_created: {'2022-02-22'}}) + # empty + self.assertDictEqual(to_dict_view([], one_node=True, one_path=True, unique_paths=self.unique_paths, node=False, path=True, value=False), {}) + self.assertDictEqual(to_dict_view([], one_node=True, one_path=False, unique_paths=self.unique_paths, node=False, path=True, value=False), {}) + self.assertDictEqual(to_dict_view([], one_node=False, one_path=True, unique_paths=self.unique_paths, node=False, path=True, value=False), {}) + self.assertDictEqual(to_dict_view([], one_node=False, one_path=False, unique_paths=self.unique_paths, node=False, path=True, value=False), {}) + + def test_agg_path_value(self): + self.assertDictEqual(to_dict_view(self.triples_111, one_node=True, one_path=True, unique_paths=self.unique_paths, node=True, path=False, value=False), + {'ent#1234': 123}) + self.assertDictEqual(to_dict_view(self.triples_11U, one_node=True, one_path=True, unique_paths=self.unique_paths, node=True, path=False, value=False), + {'ent#1234': {'tag#1234', 'tag#5678'}}) + self.assertDictEqual(to_dict_view(self.triples_1M1, one_node=True, one_path=False, unique_paths=self.unique_paths, node=True, path=False, value=False), + {'ent#1234': {123, '2010-01-02'}}) + self.assertDictEqual(to_dict_view(self.triples_1MU, one_node=True, one_path=False, unique_paths=self.unique_paths, node=True, path=False, value=False), + {'ent#1234': {123, 'tag#1234', 'tag#5678'}}) + self.assertDictEqual(to_dict_view(self.triples_N11, one_node=False, one_path=True, unique_paths=self.unique_paths, node=True, path=False, value=False), + {'ent#1234': 123, 'ent#4321': 321}) + self.assertDictEqual(to_dict_view(self.triples_N1U, one_node=False, one_path=True, unique_paths=self.unique_paths, node=True, path=False, value=False), + {'ent#1234': {'tag#1234', 'tag#5678'}, 'ent#4321': {'tag#4321'}}) + self.assertDictEqual(to_dict_view(self.triples_NM1, one_node=False, one_path=False, unique_paths=self.unique_paths, node=True, path=False, value=False), + {'ent#1234': {123, '2010-01-02'}, 'ent#4321': {321, '2022-02-22'}}) + self.assertDictEqual(to_dict_view(self.triples_NMU, one_node=False, one_path=False, unique_paths=self.unique_paths, node=True, path=False, value=False), + {'ent#1234': {123, 'tag#1234', 'tag#5678'}, 'ent#4321': {321, '2022-02-22'}}) + # empty + self.assertDictEqual(to_dict_view([], one_node=True, one_path=True, unique_paths=self.unique_paths, node=True, path=False, value=False), {}) + self.assertDictEqual(to_dict_view([], one_node=True, one_path=False, unique_paths=self.unique_paths, node=True, path=False, value=False), {}) + self.assertDictEqual(to_dict_view([], one_node=False, one_path=True, unique_paths=self.unique_paths, node=True, path=False, value=False), {}) + self.assertDictEqual(to_dict_view([], one_node=False, one_path=False, unique_paths=self.unique_paths, node=True, path=False, value=False), {}) + + def test_agg_all(self): + self.assertEqual(to_dict_view(self.triples_111, one_node=True, one_path=True, unique_paths=self.unique_paths, node=False, path=False, value=False), + 123) + self.assertSetEqual(to_dict_view(self.triples_11U, one_node=True, one_path=True, unique_paths=self.unique_paths, node=False, path=False, value=False), + {'tag#1234', 'tag#5678'}) + self.assertSetEqual(to_dict_view(self.triples_1M1, one_node=True, one_path=False, unique_paths=self.unique_paths, node=False, path=False, value=False), + {123, '2010-01-02'}) + self.assertSetEqual(to_dict_view(self.triples_1MU, one_node=True, one_path=False, unique_paths=self.unique_paths, node=False, path=False, value=False), + {123, 'tag#1234', 'tag#5678'}) + self.assertSetEqual(to_dict_view(self.triples_N11, one_node=False, one_path=True, unique_paths=self.unique_paths, node=False, path=False, value=False), + {123, 321}) + self.assertSetEqual(to_dict_view(self.triples_N1U, one_node=False, one_path=True, unique_paths=self.unique_paths, node=False, path=False, value=False), + {'tag#1234', 'tag#5678', 'tag#4321'}) + self.assertSetEqual(to_dict_view(self.triples_NM1, one_node=False, one_path=False, unique_paths=self.unique_paths, node=False, path=False, value=False), + {123, '2010-01-02', 321, '2022-02-22'}) + self.assertSetEqual(to_dict_view(self.triples_NMU, one_node=False, one_path=False, unique_paths=self.unique_paths, node=False, path=False, value=False), + {123, 'tag#1234', 'tag#5678', 321, '2022-02-22'}) + # empty + self.assertEqual(to_dict_view([], one_node=True, one_path=True, unique_paths=self.unique_paths, node=False, path=False, value=False), None) + self.assertSetEqual(to_dict_view([], one_node=True, one_path=False, unique_paths=self.unique_paths, node=False, path=False, value=False), set()) + self.assertSetEqual(to_dict_view([], one_node=False, one_path=True, unique_paths=self.unique_paths, node=False, path=False, value=False), set()) + self.assertSetEqual(to_dict_view([], one_node=False, one_path=False, unique_paths=self.unique_paths, node=False, path=False, value=False), set()) + self.assertEqual(to_dict_view([], one_node=True, one_path=True, unique_paths=self.unique_paths, node=False, path=False, value=False, default=123), 123) + +## main ## + +if __name__ == '__main__': + unittest.main() + +## EOF ## diff --git a/test/graph/test_walk.py b/test/graph/test_walk.py new file mode 100644 index 0000000..f9dbc7a --- /dev/null +++ b/test/graph/test_walk.py @@ -0,0 +1,173 @@ +""" + +Part of the bsfs test suite. +A copy of the license is provided with the project. +Author: Matthias Baumgartner, 2022 +""" +# imports +import unittest + +# bsfs imports +from bsfs import schema as bsc +from bsfs.graph import Graph +from bsfs.namespace import Namespace, ns +from bsfs.triple_store.sparql import SparqlStore +from bsfs.utils import URI + +# symbol to test +from bsfs.graph.walk import Walk + +## code ## + +bse = ns.bse +bst = Namespace('http://bsfs.ai/schema/Tag') + +class TestWalk(unittest.TestCase): + def setUp(self): + # backend setup + self.schema = bsc.from_string(''' + prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#> + prefix xsd: <http://www.w3.org/2001/XMLSchema#> + prefix bsfs: <http://bsfs.ai/schema/> + prefix bse: <http://bsfs.ai/schema/Entity#> + prefix bst: <http://bsfs.ai/schema/Tag#> + + bsfs:Entity rdfs:subClassOf bsfs:Node . + bsfs:Tag rdfs:subClassOf bsfs:Node . + bsfs:User rdfs:subClassOf bsfs:Node . + xsd:string rdfs:subClassOf bsfs:Literal . + + bse:author rdfs:subClassOf bsfs:Predicate ; + rdfs:domain bsfs:Node ; + rdfs:range bsfs:User . + + bse:tag rdfs:subClassOf bsfs:Predicate ; + rdfs:domain bsfs:Entity ; + rdfs:range bsfs:Tag . + + bst:label rdfs:subClassOf bsfs:Predicate ; + rdfs:domain bsfs:Tag ; + rdfs:range xsd:string ; + bsfs:unique "true"^^xsd:boolean . + + bst:subTagOf rdfs:subClassOf bsfs:Predicate ; + rdfs:domain bsfs:Tag ; + rdfs:range bsfs:Tag . + + bst:main rdfs:subClassOf bsfs:Predicate ; + rdfs:domain bsfs:Tag ; + rdfs:range bsfs:Entity . + + bst:author rdfs:subClassOf bsfs:Predicate ; + rdfs:domain bsfs:Tag ; + rdfs:range xsd:string . + + ''') + self.backend = SparqlStore.Open() + self.user = URI('http://example.com/me') + self.graph = Graph(self.backend, self.user) + self.graph.migrate(self.schema) + + # nodes setup + self.ents = self.graph.nodes(ns.bsfs.Entity, { + URI('http://example.com/me/entity#1234'), + URI('http://example.com/me/entity#4321')}) + self.tags = self.graph.nodes(ns.bsfs.Tag, { + URI('http://example.com/me/tag#1234'), + URI('http://example.com/me/tag#4321')}) + # add some instances + self.ents.set(bse.tag, self.tags) + self.graph.node(ns.bsfs.Tag, URI('http://example.com/me/tag#1234')).set(bst.label, 'hello') + self.graph.node(ns.bsfs.Tag, URI('http://example.com/me/tag#4321')).set(bst.label, 'world') + + def test_essentials(self): # __eq__, __hash__, __str__, __repr__ + p_author = self.schema.predicate(bse.author) + p_tag = self.schema.predicate(bse.tag) + p_main = self.schema.predicate(bst.main) + # comparison + self.assertEqual(Walk(self.ents, [p_tag]), Walk(self.ents, [p_tag])) + self.assertEqual(hash(Walk(self.ents, [p_tag])), hash(Walk(self.ents, [p_tag]))) + # comparison respects type + class Foo(Walk): pass + self.assertNotEqual(Walk(self.ents, [p_tag]), Foo(self.ents, [p_tag])) + self.assertNotEqual(hash(Walk(self.ents, [p_tag])), hash(Foo(self.ents, [p_tag]))) + # comparison respects root + self.assertNotEqual(Walk(self.ents, [p_author]), Walk(self.tags, [p_author])) + self.assertNotEqual(hash(Walk(self.ents, [p_author])), hash(Walk(self.tags, [p_author]))) + # comparison respects path + self.assertNotEqual(Walk(self.tags, [p_author]), Walk(self.tags, [p_main])) + self.assertNotEqual(hash(Walk(self.tags, [p_author])), hash(Walk(self.tags, [p_main]))) + # string conversion + self.assertEqual(str(Walk(self.ents, [p_tag, p_main])), + 'Walk(@http://bsfs.ai/schema/Entity: http://bsfs.ai/schema/Entity#tag, http://bsfs.ai/schema/Tag#main)') + self.assertEqual(repr(Walk(self.ents, [p_tag, p_main])), + 'Walk(http://bsfs.ai/schema/Entity, (http://bsfs.ai/schema/Entity#tag, http://bsfs.ai/schema/Tag#main))') + + def test_tail(self): + self.assertEqual(Walk(self.ents, ( + self.schema.predicate(bse.tag), + )).tail, + self.schema.node(ns.bsfs.Tag)) + self.assertEqual(Walk(self.ents, ( + self.schema.predicate(bse.tag), + self.schema.predicate(bst.main), + )).tail, + self.schema.node(ns.bsfs.Entity)) + + def test_step(self): + tag_type = self.schema.node(ns.bsfs.Tag) + # step returns a predicate + self.assertEqual(Walk.step(self.schema, tag_type, 'subTagOf'), + (self.schema.predicate(bst.subTagOf), )) + # invalid step raises an error + self.assertRaises(ValueError, Walk.step, self.schema, tag_type, 'foobar') + # ambiguous step raises an error + self.assertRaises(ValueError, Walk.step, self.schema, tag_type, 'author') + + def test_getattr(self): # __getattr__ + walk = Walk(self.ents, (self.schema.predicate(bse.tag), )) + # first step + self.assertEqual(walk.subTagOf, Walk(self.ents, ( + self.schema.predicate(bse.tag), + self.schema.predicate(bst.subTagOf), + ))) + # second step + self.assertEqual(walk.subTagOf.main, Walk(self.ents, ( + self.schema.predicate(bse.tag), + self.schema.predicate(bst.subTagOf), + self.schema.predicate(bst.main), + ))) + # invalid step raises an error + self.assertRaises(ValueError, getattr, walk, 'foobar') + # ambiguous step raises an error + self.assertRaises(ValueError, getattr, walk, 'author') + + def test_get(self): # get, __call__ + walk = Walk(self.ents, (self.schema.predicate(bse.tag), )) + tags = { + self.graph.node(ns.bsfs.Tag, URI('http://example.com/me/tag#1234')), + self.graph.node(ns.bsfs.Tag, URI('http://example.com/me/tag#4321'))} + # get returns from Nodes.get + self.assertDictEqual(walk.get(), { + self.graph.node(ns.bsfs.Entity, URI('http://example.com/me/entity#1234')): tags, + self.graph.node(ns.bsfs.Entity, URI('http://example.com/me/entity#4321')): tags, + }) + self.assertDictEqual(walk(), { + self.graph.node(ns.bsfs.Entity, URI('http://example.com/me/entity#1234')): tags, + self.graph.node(ns.bsfs.Entity, URI('http://example.com/me/entity#4321')): tags, + }) + # get passes kwargs to Nodes.get + self.assertSetEqual(tags, walk.get(node=False)) + self.assertSetEqual(tags, walk(node=False)) + self.assertSetEqual(tags, set(walk.get(view=list, node=False))) + self.assertSetEqual(tags, set(walk(view=list, node=False))) + # get returns values if need be + self.assertSetEqual(walk.label(node=False), {'hello', 'world'}) + + +## main ## + +if __name__ == '__main__': + unittest.main() + +## EOF ## diff --git a/test/query/ast_test/test_fetch.py b/test/query/ast_test/test_fetch.py new file mode 100644 index 0000000..0c48a1f --- /dev/null +++ b/test/query/ast_test/test_fetch.py @@ -0,0 +1,239 @@ +""" + +Part of the tagit test suite. +A copy of the license is provided with the project. +Author: Matthias Baumgartner, 2022 +""" +# imports +import unittest + +# bsfs imports +from bsfs.namespace import ns +from bsfs.utils import URI + +# objects to test +from bsfs.query.ast.fetch import FetchExpression +from bsfs.query.ast.fetch import All, This +from bsfs.query.ast.fetch import _Branch, Fetch +from bsfs.query.ast.fetch import _Named, Node, Value + + +## code ## + +class TestExpression(unittest.TestCase): # FetchExpression + def test_essentials(self): + class Foo(FetchExpression): pass + # comparison + self.assertEqual(FetchExpression(), FetchExpression()) + self.assertEqual(hash(FetchExpression()), hash(FetchExpression())) + # comparison respects type + self.assertNotEqual(FetchExpression(), Foo()) + self.assertNotEqual(hash(FetchExpression()), hash(Foo())) + # string conversion + self.assertEqual(str(FetchExpression()), 'FetchExpression()') + self.assertEqual(repr(FetchExpression()), 'FetchExpression()') + self.assertEqual(str(Foo()), 'Foo()') + self.assertEqual(repr(Foo()), 'Foo()') + + +class TestAll(unittest.TestCase): # All + def test_essentials(self): + class Foo(All): pass + expr0 = This('hello') + expr1 = This('world') + # comparison + self.assertEqual(All(expr0), All(expr0)) + self.assertEqual(hash(All(expr0)), hash(All(expr0))) + # comparison respects type + self.assertNotEqual(All(expr0), Foo(expr0)) + self.assertNotEqual(hash(All(expr0)), hash(Foo(expr0))) + # comparison respects expressions + self.assertEqual(All(expr0, expr1), All(expr0, expr1)) + self.assertEqual(hash(All(expr0, expr1)), hash(All(expr0, expr1))) + self.assertNotEqual(All(expr0), All(expr1)) + self.assertNotEqual(hash(All(expr0)), hash(All(expr1))) + # expressions are unordered + self.assertEqual(All(expr0, expr1), All(expr1, expr0)) + self.assertEqual(hash(All(expr0, expr1)), hash(All(expr1, expr0))) + # string conversion + self.assertIn(str(All(expr0, expr1)), { + 'All({This(world), This(hello)})', + 'All({This(hello), This(world)})'}) + self.assertIn(repr(All(expr0, expr1)), { + 'All({This(world), This(hello)})', + 'All({This(hello), This(world)})'}) + + def test_members(self): + class Foo(): pass + expr0 = This('hello') + expr1 = This('world') + # requires at least one child expression + self.assertRaises(AttributeError, All) + # expr returns child expressions + self.assertEqual(All(expr0, expr1).expr, {expr0, expr1}) + # can pass expressions as arguments + self.assertEqual(All(expr0, expr1).expr, {expr0, expr1}) + # can pass a single expression as argument + self.assertEqual(All(expr0).expr, {expr0}) + # can pass expressions as list-like + self.assertEqual(All([expr0, expr1]).expr, {expr0, expr1}) + self.assertEqual(All((expr0, expr1)).expr, {expr0, expr1}) + self.assertEqual(All({expr0, expr1}).expr, {expr0, expr1}) + # can pass a single expression as list-like + self.assertEqual(All([expr0]).expr, {expr0}) + # must pass a FilterExpression + self.assertRaises(TypeError, All, Foo()) + self.assertRaises(TypeError, All, 1234) + self.assertRaises(TypeError, All, 'hello world') + # len returns the number of child expressions + self.assertEqual(len(All(expr0)), 1) + self.assertEqual(len(All(expr0, expr1)), 2) + # iter iterates over child expressions + self.assertSetEqual(set(All(expr0, expr1)), {expr0, expr1}) + + +class TestThis(unittest.TestCase): # This + def test_essentials(self): + class Foo(This): pass + # comparison + self.assertEqual(This('hello'), This('hello')) + self.assertEqual(hash(This('hello')), hash(This('hello'))) + # comparison respects type + self.assertNotEqual(This('hello'), Foo('hello')) + self.assertNotEqual(hash(This('hello')), hash(Foo('hello'))) + # comparison respects name + self.assertNotEqual(This('hello'), This('world')) + self.assertNotEqual(hash(This('hello')), hash(This('world'))) + # string conversion + self.assertEqual(str(This('hello')), 'This(hello)') + self.assertEqual(repr(This('hello')), 'This(hello)') + + def test_members(self): + class Foo(): pass + # name returns member + self.assertEqual(This('hello').name, 'hello') + self.assertEqual(This('world').name, 'world') + # name is converted to a string + self.assertEqual(This(1234).name, '1234') + foo = Foo() + self.assertEqual(This(foo).name, str(foo)) + + +class TestBranch(unittest.TestCase): # _Branch, Fetch + def test_essentials(self): + pred = ns.bse.tag + expr = FetchExpression() + # comparison + self.assertEqual(_Branch(pred), _Branch(pred)) + self.assertEqual(hash(_Branch(pred)), hash(_Branch(pred))) + self.assertEqual(Fetch(pred, expr), Fetch(pred, expr)) + self.assertEqual(hash(Fetch(pred, expr)), hash(Fetch(pred, expr))) + # comparison respects type + self.assertNotEqual(_Branch(pred), Fetch(pred, expr)) + self.assertNotEqual(hash(_Branch(pred)), hash(Fetch(pred, expr))) + self.assertNotEqual(Fetch(pred, expr), _Branch(pred)) + self.assertNotEqual(hash(Fetch(pred, expr)), hash(_Branch(pred))) + # comparison respects predicate + self.assertNotEqual(_Branch(pred), _Branch(ns.bse.filesize)) + self.assertNotEqual(hash(_Branch(pred)), hash(_Branch(ns.bse.filesize))) + self.assertNotEqual(Fetch(pred, expr), Fetch(ns.bse.filesize, expr)) + self.assertNotEqual(hash(Fetch(pred, expr)), hash(Fetch(ns.bse.filesize, expr))) + # comparison respects expression + self.assertNotEqual(Fetch(pred, expr), Fetch(pred, This('foo'))) + self.assertNotEqual(hash(Fetch(pred, expr)), hash(Fetch(pred, This('foo')))) + # string conversion + self.assertEqual(str(_Branch(pred)), f'_Branch({pred})') + self.assertEqual(repr(_Branch(pred)), f'_Branch({pred})') + self.assertEqual(str(Fetch(pred, expr)), f'Fetch({pred}, {expr})') + self.assertEqual(repr(Fetch(pred, expr)), f'Fetch({pred}, {expr})') + + def test_members(self): + class Foo(): pass + pred = ns.bse.tag + expr = FetchExpression() + + # predicate returns member + self.assertEqual(_Branch(pred).predicate, pred) + self.assertEqual(Fetch(pred, expr).predicate, pred) + # can pass an URI + self.assertEqual(_Branch(ns.bse.filename).predicate, ns.bse.filename) + self.assertEqual(Fetch(ns.bse.filename, expr).predicate, ns.bse.filename) + # must pass an URI + self.assertRaises(TypeError, _Branch, Foo()) + self.assertRaises(TypeError, Fetch, Foo(), expr) + # expression returns member + self.assertEqual(Fetch(pred, expr).expr, expr) + # expression must be a FilterExpression + self.assertRaises(TypeError, Fetch, ns.bse.filename, 'hello') + self.assertRaises(TypeError, Fetch, ns.bse.filename, 1234) + self.assertRaises(TypeError, Fetch, ns.bse.filename, Foo()) + + +class TestNamed(unittest.TestCase): # _Named, Node, Value + def test_essentials(self): + pred = ns.bse.tag + name = 'foobar' + # comparison + self.assertEqual(_Named(pred, name), _Named(pred, name)) + self.assertEqual(hash(_Named(pred, name)), hash(_Named(pred, name))) + # comparison respects type + self.assertNotEqual(_Named(pred, name), Node(pred, name)) + self.assertNotEqual(Node(pred, name), Value(pred, name)) + self.assertNotEqual(Value(pred, name), _Named(pred, name)) + self.assertNotEqual(hash(_Named(pred, name)), hash(Node(pred, name))) + self.assertNotEqual(hash(Node(pred, name)), hash(Value(pred, name))) + self.assertNotEqual(hash(Value(pred, name)), hash(_Named(pred, name))) + # comparison respects predicate + self.assertNotEqual(_Named(pred, name), _Named(ns.bse.filesize, name)) + self.assertNotEqual(hash(_Named(pred, name)), hash(_Named(ns.bse.filesize, name))) + self.assertNotEqual(Node(pred, name), Node(ns.bse.filesize, name)) + self.assertNotEqual(hash(Node(pred, name)), hash(Node(ns.bse.filesize, name))) + self.assertNotEqual(Value(pred, name), Value(ns.bse.filesize, name)) + self.assertNotEqual(hash(Value(pred, name)), hash(Value(ns.bse.filesize, name))) + # comparison respects name + self.assertNotEqual(_Named(pred, name), _Named(pred, 'foo')) + self.assertNotEqual(hash(_Named(pred, name)), hash(_Named(pred, 'foo'))) + self.assertNotEqual(Node(pred, name), Node(pred, 'foo')) + self.assertNotEqual(hash(Node(pred, name)), hash(Node(pred, 'foo'))) + self.assertNotEqual(Value(pred, name), Value(pred, 'foo')) + self.assertNotEqual(hash(Value(pred, name)), hash(Value(pred, 'foo'))) + # string conversion + self.assertEqual(str(_Named(pred, name)), f'_Named({pred}, {name})') + self.assertEqual(repr(_Named(pred, name)), f'_Named({pred}, {name})') + self.assertEqual(str(Node(pred, name)), f'Node({pred}, {name})') + self.assertEqual(repr(Node(pred, name)), f'Node({pred}, {name})') + self.assertEqual(str(Value(pred, name)), f'Value({pred}, {name})') + self.assertEqual(repr(Value(pred, name)), f'Value({pred}, {name})') + + def test_members(self): + class Foo(): pass + pred = ns.bse.tag + name = 'foobar' + # predicate returns member + self.assertEqual(_Named(pred, name).predicate, pred) + self.assertEqual(Node(pred, name).predicate, pred) + self.assertEqual(Value(pred, name).predicate, pred) + # can pass an URI as predicate + self.assertEqual(_Named(ns.bse.filename, name).predicate, ns.bse.filename) + self.assertEqual(Node(ns.bse.filename, name).predicate, ns.bse.filename) + self.assertEqual(Value(ns.bse.filename, name).predicate, ns.bse.filename) + # must pass an URI + self.assertRaises(TypeError, _Named, Foo(), name) + self.assertRaises(TypeError, Node, Foo(), name) + self.assertRaises(TypeError, Value, Foo(), name) + # name returns member + self.assertEqual(_Named(pred, name).name, name) + self.assertEqual(Node(pred, name).name, name) + self.assertEqual(Value(pred, name).name, name) + # name is converted to a string + self.assertEqual(_Named(pred, 1234).name, '1234') + self.assertEqual(Node(pred, 1234).name, '1234') + self.assertEqual(Value(pred, 1234).name, '1234') + + +## main ## + +if __name__ == '__main__': + unittest.main() + +## EOF ## diff --git a/test/query/ast_test/test_filter_.py b/test/query/ast_test/test_filter_.py index 9eb92e2..39b98f8 100644 --- a/test/query/ast_test/test_filter_.py +++ b/test/query/ast_test/test_filter_.py @@ -20,6 +20,7 @@ from bsfs.query.ast.filter_ import _Value, Is, Equals, Substring, StartsWith, En from bsfs.query.ast.filter_ import _Bounded, LessThan, GreaterThan from bsfs.query.ast.filter_ import Predicate, OneOf from bsfs.query.ast.filter_ import IsIn, IsNotIn +from bsfs.query.ast.filter_ import Includes, Excludes, Between ## code ## @@ -456,13 +457,15 @@ class TestOneOf(unittest.TestCase): self.assertEqual(len(OneOf(Predicate(ns.bse.filesize), Predicate(ns.bse.filename), Predicate(ns.bse.tag))), 3) - def testIsIn(self): + def test_IsIn(self): + # cannot pass zero arguments + self.assertRaises(AttributeError, IsIn) # can pass expressions as arguments self.assertEqual(IsIn('http://example.com/entity#1234', 'http://example.com/entity#4321'), Or(Is('http://example.com/entity#1234'), Is('http://example.com/entity#4321'))) # can pass one expression as argument self.assertEqual(IsIn('http://example.com/entity#1234'), - Or(Is('http://example.com/entity#1234'))) + Is('http://example.com/entity#1234')) # can pass expressions as iterator self.assertEqual(IsIn(iter(('http://example.com/entity#1234', 'http://example.com/entity#4321'))), Or(Is('http://example.com/entity#1234'), Is('http://example.com/entity#4321'))) @@ -477,16 +480,18 @@ class TestOneOf(unittest.TestCase): Or(Is('http://example.com/entity#1234'), Is('http://example.com/entity#4321'))) # can pass one expression as list-like self.assertEqual(IsIn(['http://example.com/entity#1234']), - Or(Is('http://example.com/entity#1234'))) + Is('http://example.com/entity#1234')) - def testIsNotIn(self): + def test_IsNotIn(self): + # cannot pass zero arguments + self.assertRaises(AttributeError, IsNotIn) # can pass expressions as arguments self.assertEqual(IsNotIn('http://example.com/entity#1234', 'http://example.com/entity#4321'), Not(Or(Is('http://example.com/entity#1234'), Is('http://example.com/entity#4321')))) # can pass one expression as argument self.assertEqual(IsNotIn('http://example.com/entity#1234'), - Not(Or(Is('http://example.com/entity#1234')))) + Not(Is('http://example.com/entity#1234'))) # can pass expressions as iterator self.assertEqual(IsNotIn(iter(('http://example.com/entity#1234', 'http://example.com/entity#4321'))), Not(Or(Is('http://example.com/entity#1234'), Is('http://example.com/entity#4321')))) @@ -501,9 +506,110 @@ class TestOneOf(unittest.TestCase): Not(Or(Is('http://example.com/entity#1234'), Is('http://example.com/entity#4321')))) # can pass one expression as list-like self.assertEqual(IsNotIn(['http://example.com/entity#1234']), - Not(Or(Is('http://example.com/entity#1234')))) + Not(Is('http://example.com/entity#1234'))) + def test_Includes(self): + # cannot pass zero arguments + self.assertRaises(AttributeError, Includes) + # can pass expressions as arguments + self.assertEqual(Includes('hello', 'world'), + Or(Equals('hello'), Equals('world'))) + self.assertEqual(Includes('hello', 'world', approx=True), + Or(Substring('hello'), Substring('world'))) + # can pass one expression as argument + self.assertEqual(Includes('hello'), + Equals('hello')) + self.assertEqual(Includes('hello', approx=True), + Substring('hello')) + # can pass expressions as iterator + self.assertEqual(Includes(iter(('hello', 'world'))), + Or(Equals('hello'), Equals('world'))) + self.assertEqual(Includes(iter(('hello', 'world')), approx=True), + Or(Substring('hello'), Substring('world'))) + # can pass expressions as generator + def gen(): + yield 'hello' + yield 'world' + self.assertEqual(Includes(gen()), + Or(Equals('hello'), Equals('world'))) + self.assertEqual(Includes(gen(), approx=True), + Or(Substring('hello'), Substring('world'))) + # can pass expressions as list-like + self.assertEqual(Includes(['hello', 'world']), + Or(Equals('hello'), Equals('world'))) + self.assertEqual(Includes(['hello', 'world'], approx=True), + Or(Substring('hello'), Substring('world'))) + # can pass one expression as list-like + self.assertEqual(Includes(['hello']), + Equals('hello')) + self.assertEqual(Includes(['hello'], approx=True), + Substring('hello')) + + + def test_Excludes(self): + # cannot pass zero arguments + self.assertRaises(AttributeError, Excludes) + # can pass expressions as arguments + self.assertEqual(Excludes('hello', 'world'), + Not(Or(Equals('hello'), Equals('world')))) + self.assertEqual(Excludes('hello', 'world', approx=True), + Not(Or(Substring('hello'), Substring('world')))) + # can pass one expression as argument + self.assertEqual(Excludes('hello'), + Not(Equals('hello'))) + self.assertEqual(Excludes('hello', approx=True), + Not(Substring('hello'))) + # can pass expressions as iterator + self.assertEqual(Excludes(iter(('hello', 'world'))), + Not(Or(Equals('hello'), Equals('world')))) + self.assertEqual(Excludes(iter(('hello', 'world')), approx=True), + Not(Or(Substring('hello'), Substring('world')))) + # can pass expressions as generator + def gen(): + yield 'hello' + yield 'world' + self.assertEqual(Excludes(gen()), + Not(Or(Equals('hello'), Equals('world')))) + self.assertEqual(Excludes(gen(), approx=True), + Not(Or(Substring('hello'), Substring('world')))) + # can pass expressions as list-like + self.assertEqual(Excludes(['hello', 'world']), + Not(Or(Equals('hello'), Equals('world')))) + self.assertEqual(Excludes(['hello', 'world'], approx=True), + Not(Or(Substring('hello'), Substring('world')))) + # can pass one expression as list-like + self.assertEqual(Excludes(['hello']), + Not(Equals('hello'))) + self.assertEqual(Excludes(['hello'], approx=True), + Not(Substring('hello'))) + + + def test_Between(self): + # must specify at least one bound + self.assertRaises(ValueError, Between, float('inf'), float('inf')) + # lower bound must be less than the upper bound + self.assertRaises(ValueError, Between, 321, 123) + # can set a lower bound only + self.assertEqual(Between(123), + GreaterThan(123, strict=True)) + self.assertEqual(Between(123, lo_strict=False), + GreaterThan(123, strict=False)) + # can set an upper bound only + self.assertEqual(Between(hi=123), + LessThan(123, strict=True)) + self.assertEqual(Between(hi=123, hi_strict=False), + LessThan(123, strict=False)) + # can set both bounds + self.assertEqual(Between(123, 321), + And(GreaterThan(123, strict=True), LessThan(321, strict=True))) + self.assertEqual(Between(123, 321, False, False), + And(GreaterThan(123, strict=False), LessThan(321, strict=False))) + # can set identical bounds + self.assertRaises(ValueError, Between, 123, 123) + self.assertEqual(Between(123, 123, False, False), + Equals(123)) + ## main ## diff --git a/test/query/test_matcher.py b/test/query/test_matcher.py new file mode 100644 index 0000000..e830cf8 --- /dev/null +++ b/test/query/test_matcher.py @@ -0,0 +1,1182 @@ +""" + +Part of the tagit test suite. +A copy of the license is provided with the project. +Author: Matthias Baumgartner, 2022 +""" +# imports +import operator +import unittest + +# bsfs imports +from bsfs.namespace import ns +from bsfs.query import ast +from bsfs.utils import errors + +# objects to test +from bsfs.query.matcher import Any, Filter, Partial, Rest, _set_matcher + + +## code ## + +class TestAny(unittest.TestCase): + def test_essentials(self): + # comparison + a = Any() + b = Any() + self.assertNotEqual(Any(), Any()) + self.assertNotEqual(hash(Any()), hash(Any())) + self.assertNotEqual(a, Any()) + self.assertNotEqual(hash(a), hash(Any())) + self.assertNotEqual(a, b) + self.assertNotEqual(hash(a), hash(b)) + # comparison within sets + self.assertEqual(len({Any(), Any(), Any(), Any()}), 4) + self.assertEqual(len({Any() for _ in range(1000)}), 1000) + # string representation + self.assertEqual(str(Any()), 'Any()') + self.assertEqual(repr(Any()), 'Any()') + + +class TestRest(unittest.TestCase): + def test_essentials(self): + expr = ast.filter.Equals('hello') + # comparison + self.assertEqual(Rest(expr), Rest(expr)) + self.assertEqual(hash(Rest(expr)), hash(Rest(expr))) + # comparison respects type + class Foo(): pass + self.assertNotEqual(Rest(expr), 1234) + self.assertNotEqual(hash(Rest(expr)), hash(1234)) + self.assertNotEqual(Rest(expr), Foo()) + self.assertNotEqual(hash(Rest(expr)), hash(Foo())) + # comparison respects expr + self.assertNotEqual(Rest(expr), Rest(ast.filter.Equals('world'))) + self.assertNotEqual(hash(Rest(expr)), hash(Rest(ast.filter.Equals('world')))) + # default constructor -> Any -> Not equal + self.assertNotEqual(Rest(), Rest()) + self.assertNotEqual(hash(Rest()), hash(Rest())) + # string representation + self.assertEqual(str(Rest()), 'Rest(Any())') + self.assertEqual(str(Rest(expr)), 'Rest(Equals(hello))') + self.assertEqual(repr(Rest()), 'Rest(Any())') + self.assertEqual(repr(Rest(expr)), 'Rest(Equals(hello))') + + + +class TestPartial(unittest.TestCase): + def test_match(self): + p0 = Partial(ast.filter.LessThan) + p1 = Partial(ast.filter.LessThan, threshold=3) + p2 = Partial(ast.filter.LessThan, strict=False) + p3 = Partial(ast.filter.LessThan, threshold=3, strict=False) + # match respects name + self.assertTrue(p0.match('foo', None)) + self.assertTrue(p1.match('foo', None)) + self.assertTrue(p2.match('foo', None)) + self.assertTrue(p3.match('foo', None)) + # match respects correct value + self.assertTrue(p0.match('threshold', 3)) + self.assertTrue(p1.match('threshold', 3)) + self.assertTrue(p2.match('threshold', 3)) + self.assertTrue(p3.match('threshold', 3)) + self.assertTrue(p0.match('strict', False)) + self.assertTrue(p1.match('strict', False)) + self.assertTrue(p2.match('strict', False)) + self.assertTrue(p3.match('strict', False)) + # match respects incorrect value + self.assertTrue(p0.match('threshold', 5)) + self.assertFalse(p1.match('threshold', 5)) + self.assertTrue(p2.match('threshold', 5)) + self.assertFalse(p3.match('threshold', 5)) + self.assertTrue(p0.match('strict', True)) + self.assertTrue(p1.match('strict', True)) + self.assertFalse(p2.match('strict', True)) + self.assertFalse(p3.match('strict', True)) + + def test_members(self): + # node returns expression + self.assertEqual(Partial(ast.filter.Equals).node, ast.filter.Equals) + self.assertEqual(Partial(ast.filter.LessThan).node, ast.filter.LessThan) + # kwargs returns arguments + self.assertDictEqual(Partial(ast.filter.Equals, value='hello').kwargs, + {'value': 'hello'}) + self.assertDictEqual(Partial(ast.filter.LessThan, threshold=3, strict=False).kwargs, + {'threshold': 3, 'strict': False}) + # Partial does not check about kwargs + self.assertDictEqual(Partial(ast.filter.LessThan, value='hello').kwargs, + {'value': 'hello'}) + self.assertDictEqual(Partial(ast.filter.Equals, threshold=3, strict=False).kwargs, + {'threshold': 3, 'strict': False}) + + def test_essentials(self): + # comparison respects type + class Foo(): pass + self.assertNotEqual(Partial(ast.filter.Equals), 1234) + self.assertNotEqual(hash(Partial(ast.filter.Equals)), hash(1234)) + self.assertNotEqual(Partial(ast.filter.Equals), Foo()) + self.assertNotEqual(hash(Partial(ast.filter.Equals)), hash(Foo())) + self.assertNotEqual(Partial(ast.filter.Equals), ast.filter.Equals) + self.assertNotEqual(hash(Partial(ast.filter.Equals)), hash(ast.filter.Equals)) + self.assertNotEqual(Partial(ast.filter.Equals), ast.filter.Equals('hello')) + self.assertNotEqual(hash(Partial(ast.filter.Equals)), hash(ast.filter.Equals('hello'))) + # comparison respects node + self.assertEqual(Partial(ast.filter.Equals), Partial(ast.filter.Equals)) + self.assertEqual(hash(Partial(ast.filter.Equals)), hash(Partial(ast.filter.Equals))) + self.assertEqual(Partial(ast.filter.LessThan), Partial(ast.filter.LessThan)) + self.assertEqual(hash(Partial(ast.filter.LessThan)), hash(Partial(ast.filter.LessThan))) + self.assertNotEqual(Partial(ast.filter.Equals), Partial(ast.filter.LessThan)) + self.assertNotEqual(hash(Partial(ast.filter.Equals)), hash(Partial(ast.filter.LessThan))) + # comparison respects kwargs + self.assertEqual( + Partial(ast.filter.Equals, value='hello'), + Partial(ast.filter.Equals, value='hello')) + self.assertEqual( + hash(Partial(ast.filter.Equals, value='hello')), + hash(Partial(ast.filter.Equals, value='hello'))) + self.assertEqual( + Partial(ast.filter.LessThan, threshold=3, strict=False), + Partial(ast.filter.LessThan, threshold=3, strict=False)) + self.assertEqual( + hash(Partial(ast.filter.LessThan, threshold=3, strict=False)), + hash(Partial(ast.filter.LessThan, threshold=3, strict=False))) + self.assertNotEqual( + Partial(ast.filter.Equals, value='hello'), + Partial(ast.filter.Equals)) + self.assertNotEqual( + hash(Partial(ast.filter.Equals, value='hello')), + hash(Partial(ast.filter.Equals))) + self.assertNotEqual( + Partial(ast.filter.Equals, value='hello'), + Partial(ast.filter.Equals, value='world')) + self.assertNotEqual( + hash(Partial(ast.filter.Equals, value='hello')), + hash(Partial(ast.filter.Equals, value='world'))) + self.assertNotEqual( + Partial(ast.filter.LessThan, threshold=3, strict=False), + Partial(ast.filter.LessThan)) + self.assertNotEqual( + hash(Partial(ast.filter.LessThan, threshold=3, strict=False)), + hash(Partial(ast.filter.LessThan))) + self.assertNotEqual( + Partial(ast.filter.LessThan, threshold=3, strict=False), + Partial(ast.filter.LessThan, threshold=5)) + self.assertNotEqual( + hash(Partial(ast.filter.LessThan, threshold=3, strict=False)), + hash(Partial(ast.filter.LessThan, threshold=5))) + self.assertNotEqual( + Partial(ast.filter.LessThan, threshold=3, strict=False), + Partial(ast.filter.LessThan, strict=False)) + self.assertNotEqual( + hash(Partial(ast.filter.LessThan, threshold=3, strict=False)), + hash(Partial(ast.filter.LessThan, strict=False))) + self.assertNotEqual( + Partial(ast.filter.LessThan, threshold=3, strict=False), + Partial(ast.filter.LessThan, threshold=3, strict=True)) + self.assertNotEqual( + hash(Partial(ast.filter.LessThan, threshold=3, strict=False)), + hash(Partial(ast.filter.LessThan, threshold=3, strict=True))) + self.assertNotEqual( + Partial(ast.filter.LessThan, threshold=3, strict=False), + Partial(ast.filter.LessThan, threshold=5, strict=False)) + self.assertNotEqual( + hash(Partial(ast.filter.LessThan, threshold=3, strict=False)), + hash(Partial(ast.filter.LessThan, threshold=5, strict=False))) + # string representation + self.assertEqual(str(Partial(ast.filter.Equals)), 'Partial(Equals, {})') + self.assertEqual(repr(Partial(ast.filter.Equals)), 'Partial(Equals, {})') + self.assertEqual(str(Partial(ast.filter.LessThan)), 'Partial(LessThan, {})') + self.assertEqual(repr(Partial(ast.filter.LessThan)), 'Partial(LessThan, {})') + self.assertEqual(str(Partial(ast.filter.Equals, value='hello')), "Partial(Equals, {'value': 'hello'})") + self.assertEqual(repr(Partial(ast.filter.Equals, value='hello')), "Partial(Equals, {'value': 'hello'})") + self.assertEqual(str(Partial(ast.filter.LessThan, threshold=3)), "Partial(LessThan, {'threshold': 3})") + self.assertEqual(repr(Partial(ast.filter.LessThan, threshold=3)), "Partial(LessThan, {'threshold': 3})") + self.assertEqual(str(Partial(ast.filter.LessThan, strict=False)), "Partial(LessThan, {'strict': False})") + self.assertEqual(repr(Partial(ast.filter.LessThan, strict=False)), "Partial(LessThan, {'strict': False})") + self.assertEqual(str(Partial(ast.filter.LessThan, threshold=3, strict=False)), "Partial(LessThan, {'threshold': 3, 'strict': False})") + self.assertEqual(repr(Partial(ast.filter.LessThan, threshold=3, strict=False)), "Partial(LessThan, {'threshold': 3, 'strict': False})") + + +class TestSetMatcher(unittest.TestCase): + def test_set_matcher(self): + # setup + A = ast.filter.Equals('A') + B = ast.filter.Equals('B') + C = ast.filter.Equals('C') + D = ast.filter.Equals('D') + matcher = Filter() + + # identical sets match + self.assertTrue(_set_matcher( + ast.filter.And(A, B, C), + ast.filter.And(A, B, C), + matcher._parse_filter_expression, + )) + + # order is irrelevant + self.assertTrue(_set_matcher( + ast.filter.And(A, B, C), + ast.filter.And(B, C, A), + matcher._parse_filter_expression, + )) + + # all reference items must be present + self.assertFalse(_set_matcher( + ast.filter.And(A, B), + ast.filter.And(A, B, C), + matcher._parse_filter_expression, + )) + + # all reference items must have a match + self.assertFalse(_set_matcher( + ast.filter.And(A, B, C), + ast.filter.And(D, B, C), + matcher._parse_filter_expression, + )) + self.assertFalse(_set_matcher( + ast.filter.And(A, B, C), + ast.filter.And(A, D, C), + matcher._parse_filter_expression, + )) + self.assertFalse(_set_matcher( + ast.filter.And(A, B, C), + ast.filter.And(A, B, D), + matcher._parse_filter_expression, + )) + + # Any matches every item + self.assertTrue(_set_matcher( + ast.filter.And(A, B, C), + ast.filter.And(Any(), B, C), + matcher._parse_filter_expression, + )) + self.assertTrue(_set_matcher( + ast.filter.And(A, B, C), + ast.filter.And(A, Any(), C), + matcher._parse_filter_expression, + )) + self.assertTrue(_set_matcher( + ast.filter.And(A, B, C), + ast.filter.And(A, B, Any()), + matcher._parse_filter_expression, + )) + self.assertTrue(_set_matcher( + ast.filter.And(A, B, D), + ast.filter.And(A, B, Any()), + matcher._parse_filter_expression, + )) + + # there can be multiple Any's + self.assertTrue(_set_matcher( + ast.filter.And(A, B, C), + ast.filter.And(A, Any(), Any()), + matcher._parse_filter_expression, + )) + self.assertTrue(_set_matcher( + ast.filter.And(A, B, C), + ast.filter.And(Any(), B, Any()), + matcher._parse_filter_expression, + )) + self.assertTrue(_set_matcher( + ast.filter.And(A, B, C), + ast.filter.And(Any(), Any(), C), + matcher._parse_filter_expression, + )) + + # Any covers exactly one element + self.assertTrue(_set_matcher( + ast.filter.And(A, B, C), + ast.filter.And(A, B, Any()), + matcher._parse_filter_expression, + )) + self.assertTrue(_set_matcher( + ast.filter.And(A, B, D), + ast.filter.And(A, B, Any()), + matcher._parse_filter_expression, + )) + self.assertFalse(_set_matcher( + ast.filter.And(A, B), + ast.filter.And(A, B, Any()), + matcher._parse_filter_expression, + )) + self.assertFalse(_set_matcher( + ast.filter.And(A, B, C, D), + ast.filter.And(A, B, Any()), + matcher._parse_filter_expression, + )) + + # each Any covers exactly one element + self.assertTrue(_set_matcher( + ast.filter.And(A, B, C), + ast.filter.And(Any(), Any(), Any()), + matcher._parse_filter_expression, + )) + self.assertFalse(_set_matcher( + ast.filter.And(A, B, C), + ast.filter.And(Any(), Any()), + matcher._parse_filter_expression, + )) + self.assertFalse(_set_matcher( + ast.filter.And(A, B), + ast.filter.And(Any(), Any(), Any()), + matcher._parse_filter_expression, + )) + + # Rest captures remainder + self.assertTrue(_set_matcher( + ast.filter.And(A, B, C), + ast.filter.And(A, B, Rest()), + matcher._parse_filter_expression, + )) + self.assertTrue(_set_matcher( + ast.filter.And(A, B, C, D), + ast.filter.And(A, B, Rest()), + matcher._parse_filter_expression, + )) + # remainder matches the empty set + self.assertTrue(_set_matcher( + ast.filter.And(A, B), + ast.filter.And(A, B, Rest()), + matcher._parse_filter_expression, + )) + # Rest does not absolve other refernce items from having a match + self.assertFalse(_set_matcher( + ast.filter.And(A, C, D), + ast.filter.And(A, B, Rest()), + matcher._parse_filter_expression, + )) + # Rest can be combined with Any ... + self.assertTrue(_set_matcher( + ast.filter.And(A, C, D), + ast.filter.And(A, Any(), Rest()), + matcher._parse_filter_expression, + )) + self.assertTrue(_set_matcher( + ast.filter.And(A, C, D), + ast.filter.And(A, Any(), Rest()), + matcher._parse_filter_expression, + )) + # ... explicit items still need to match + self.assertFalse(_set_matcher( + ast.filter.And(A, C, D), + ast.filter.And(B, Any(), Rest()), + matcher._parse_filter_expression, + )) + # ... Any still determines minimum element count + self.assertTrue(_set_matcher( + ast.filter.And(A, B), + ast.filter.And(A, Any(), Rest()), + matcher._parse_filter_expression, + )) + self.assertFalse(_set_matcher( + ast.filter.And(A, B), + ast.filter.And(A, Any(), Any(), Rest()), + matcher._parse_filter_expression, + )) + # Rest cannot be repeated ... + self.assertRaises(errors.BackendError, _set_matcher, + ast.filter.And(A, B, C), + ast.filter.And(A, Rest(), Rest(ast.filter.Equals('hello'))), + matcher._parse_filter_expression, + ) + # ... unless they are identical + self.assertRaises(errors.BackendError, _set_matcher, + ast.filter.And(A, B, C), + ast.filter.And(A, Rest(), Rest()), # Any instances are different! + matcher._parse_filter_expression, + ) + # ... unless they are identical + self.assertTrue(_set_matcher( + ast.filter.And(A, B, C), + ast.filter.And(A, B, Rest(C), Rest(C)), + matcher._parse_filter_expression, + )) + # Rest can mandate a specific expression + self.assertTrue(_set_matcher( + ast.filter.And(A, B, C), + ast.filter.And(A, B, Rest(C)), + matcher._parse_filter_expression, + )) + self.assertFalse(_set_matcher( + ast.filter.And(A, B, C), + ast.filter.And(A, B, Rest(D)), + matcher._parse_filter_expression, + )) + # Rest can mandate a partial expression + self.assertTrue(_set_matcher( + ast.filter.And(A, B, C), + ast.filter.And(A, B, Rest(Partial(ast.filter.Equals))), + matcher._parse_filter_expression, + )) + self.assertTrue(_set_matcher( + ast.filter.And(A, B, C), + ast.filter.And(A, Rest(Partial(ast.filter.Equals))), + matcher._parse_filter_expression, + )) + self.assertFalse(_set_matcher( + ast.filter.And(A, B, C), + ast.filter.And(A, B, Rest(Partial(ast.filter.Substring))), + matcher._parse_filter_expression, + )) + self.assertFalse(_set_matcher( + ast.filter.And(A, B, C), + ast.filter.And(A, B, Rest(Partial(ast.filter.Equals, value='D'))), + matcher._parse_filter_expression, + )) + # Rest can be the only expression + self.assertTrue(_set_matcher( + ast.filter.And(A, B, C), + ast.filter.And(Rest(Partial(ast.filter.Equals))), + matcher._parse_filter_expression, + )) + # Rest's expression defaults to Any + self.assertTrue(_set_matcher( + ast.filter.And(A, B, C), + ast.filter.And(Rest()), + matcher._parse_filter_expression, + )) + + +class TestFilter(unittest.TestCase): + def setUp(self): + self.match = Filter() + + def test_call(self): + # query must be a filter expression + self.assertRaises(errors.BackendError, self.match, 1234, Any()) + self.assertRaises(errors.BackendError, self.match, ast.filter.Predicate(ns.bse.filename), Any()) + # reference must be a filter expression + self.assertRaises(errors.BackendError, self.match, ast.filter.Equals('hello'), 1234) + self.assertRaises(errors.BackendError, self.match, ast.filter.Equals('hello'), ast.filter.Predicate(ns.bse.filename)) + # reference can be Any or Partial + self.assertTrue(self.match( + ast.filter.Equals('hello'), + Any(), + )) + self.assertTrue(self.match( + ast.filter.Equals('hello'), + Partial(ast.filter.Equals), + )) + # call parses expression + self.assertTrue(self.match( + # query + ast.filter.And( + ast.filter.Any(ns.bse.tag, + ast.filter.All(ns.bse.label, + ast.filter.Or( + ast.filter.Equals('hello'), + ast.filter.Equals('world'), + ast.filter.StartsWith('foo'), + ast.filter.EndsWith('bar'), + ) + ) + ), + ast.filter.Any(ns.bse.iso, + ast.filter.And( + ast.filter.GreaterThan(100, strict=True), + ast.filter.LessThan(200, strict=False), + ) + ), + ast.filter.Any(ast.filter.OneOf(ns.bse.featureA, ns.bse.featureB), + ast.filter.Distance([1,2,3], 1) + ), + ), + # reference + ast.filter.And( + ast.filter.Any(Any(), + ast.filter.All(Partial(ast.filter.Predicate, reverse=False), + ast.filter.Or( + Partial(ast.filter.StartsWith), + ast.filter.EndsWith('bar'), + Rest(Partial(ast.filter.Equals)), + ) + ) + ), + ast.filter.Any(ns.bse.iso, + ast.filter.And( + Partial(ast.filter.GreaterThan, strict=True), + Any(), + Rest(), + ) + ), + ast.filter.Any(ast.filter.OneOf(Rest()), + Partial(ast.filter.Distance) + ), + ), + )) + self.assertFalse(self.match( + # query + ast.filter.Any(ns.bse.tag, + ast.filter.And( + ast.filter.Any(ns.bse.label, ast.filter.Equals('hello')), + ast.filter.Any(ns.bse.collection, ast.filter.Is('http://example.com/col#123')), + ast.filter.Not(ast.filter.Has(ns.bse.label)), + ) + ), + # reference + ast.filter.Any(ns.bse.tag, + ast.filter.And( + Any(), + ast.filter.Any(Partial(ast.filter.Predicate, reverse=True), # reverse mismatch + Partial(ast.filter.Is)), + ast.filter.Not(ast.filter.Has(Any(), Any())), + ) + ) + )) + + def test_parse_filter_expression(self): + # Any matches every filter expression + self.assertTrue(self.match._parse_filter_expression( + ast.filter.Not(ast.filter.FilterExpression()), Any())) + self.assertTrue(self.match._parse_filter_expression( + ast.filter.Has(ns.bse.filename), Any())) + self.assertTrue(self.match._parse_filter_expression( + ast.filter.Distance([1,2,3], 1.0), Any())) + self.assertTrue(self.match._parse_filter_expression( + ast.filter.Any(ns.bse.filename, ast.filter.Equals('hello')), Any())) + self.assertTrue(self.match._parse_filter_expression( + ast.filter.All(ns.bse.filename, ast.filter.Equals('hello')), Any())) + self.assertTrue(self.match._parse_filter_expression( + ast.filter.And(ast.filter.Equals('hello')), Any())) + self.assertTrue(self.match._parse_filter_expression( + ast.filter.Or(ast.filter.Equals('hello')), Any())) + self.assertTrue(self.match._parse_filter_expression( + ast.filter.Equals('hello'), Any())) + self.assertTrue(self.match._parse_filter_expression( + ast.filter.Substring('hello'), Any())) + self.assertTrue(self.match._parse_filter_expression( + ast.filter.StartsWith('hello'), Any())) + self.assertTrue(self.match._parse_filter_expression( + ast.filter.EndsWith('hello'), Any())) + self.assertTrue(self.match._parse_filter_expression( + ast.filter.Is('hello'), Any())) + self.assertTrue(self.match._parse_filter_expression( + ast.filter.LessThan(3), Any())) + self.assertTrue(self.match._parse_filter_expression( + ast.filter.GreaterThan(3), Any())) + # Any matches invalid filter expressions + self.assertTrue(self.match._parse_filter_expression( + ast.filter.FilterExpression(), Any())) + # node must be an appropriate filter expression + self.assertRaises(errors.BackendError, self.match._parse_filter_expression, + ast.filter.FilterExpression(), ast.filter.FilterExpression()) + self.assertRaises(errors.BackendError, self.match._parse_filter_expression, + 1234, ast.filter.FilterExpression()) + + def test_parse_predicate_expression(self): + # Any matches every predicate expression + self.assertTrue(self.match._parse_predicate_expression( + ast.filter.Predicate(ns.bse.filename), Any())) + self.assertTrue(self.match._parse_predicate_expression( + ast.filter.OneOf(ns.bse.filename), Any())) + # Any matches invalid predicate expression + self.assertTrue(self.match._parse_predicate_expression( + ast.filter.FilterExpression(), Any())) + # node must be an appropriate predicate expression + self.assertRaises(errors.BackendError, self.match._parse_predicate_expression, + ast.filter.PredicateExpression(), ast.filter.PredicateExpression()) + self.assertRaises(errors.BackendError, self.match._parse_predicate_expression, + 1234, ast.filter.PredicateExpression()) + + def test_predicate(self): + # identical expressions match + self.assertTrue(self.match._predicate( + ast.filter.Predicate(ns.bse.filename, reverse=False), + ast.filter.Predicate(ns.bse.filename, reverse=False), + )) + # _predicate respects type + self.assertFalse(self.match._predicate( + ast.filter.Predicate(ns.bse.filename, reverse=False), + ast.filter.FilterExpression(), + )) + # _predicate respects predicate + self.assertFalse(self.match._predicate( + ast.filter.Predicate(ns.bse.filename, reverse=False), + ast.filter.Predicate(ns.bse.filesize, reverse=False), + )) + # _predicate respects reverse + self.assertFalse(self.match._predicate( + ast.filter.Predicate(ns.bse.filename, reverse=False), + ast.filter.Predicate(ns.bse.filename, reverse=True), + )) + # Partial requires ast.filter.Predicate + self.assertFalse(self.match._predicate( + ast.filter.Predicate(ns.bse.filename, reverse=False), + Partial(ast.filter.Equals), + )) + # predicate and reverse can be specified + self.assertTrue(self.match._predicate( + ast.filter.Predicate(ns.bse.filename, reverse=False), + Partial(ast.filter.Predicate, predicate=ns.bse.filename, reverse=False), + )) + self.assertFalse(self.match._predicate( + ast.filter.Predicate(ns.bse.filename, reverse=False), + Partial(ast.filter.Predicate, predicate=ns.bse.filesize, reverse=False), + )) + self.assertFalse(self.match._predicate( + ast.filter.Predicate(ns.bse.filename, reverse=False), + Partial(ast.filter.Predicate, predicate=ns.bse.filename, reverse=True), + )) + # predicate can remain unspecified + self.assertTrue(self.match._predicate( + ast.filter.Predicate(ns.bse.filename, reverse=False), + Partial(ast.filter.Predicate, reverse=False), + )) + self.assertTrue(self.match._predicate( + ast.filter.Predicate(ns.bse.filesize, reverse=False), + Partial(ast.filter.Predicate, reverse=False), + )) + self.assertFalse(self.match._predicate( + ast.filter.Predicate(ns.bse.filesize, reverse=False), + Partial(ast.filter.Predicate, reverse=True), + )) + # reverse can remain unspecified + self.assertTrue(self.match._predicate( + ast.filter.Predicate(ns.bse.filename, reverse=False), + Partial(ast.filter.Predicate, predicate=ns.bse.filename), + )) + self.assertTrue(self.match._predicate( + ast.filter.Predicate(ns.bse.filename, reverse=True), + Partial(ast.filter.Predicate, predicate=ns.bse.filename), + )) + self.assertFalse(self.match._predicate( + ast.filter.Predicate(ns.bse.filename, reverse=False), + Partial(ast.filter.Predicate, predicate=ns.bse.filesize), + )) + + def test_one_of(self): + A = ast.filter.Predicate(ns.bse.filename) + B = ast.filter.Predicate(ns.bse.filesize) + C = ast.filter.Predicate(ns.bse.filename, reverse=True) + # identical expressions match + self.assertTrue(self.match._one_of( + ast.filter.OneOf(A, B), + ast.filter.OneOf(A, B), + )) + # _one_of respects type + self.assertFalse(self.match._one_of( + ast.filter.OneOf(A, B), + ast.filter.Predicate(ns.bse.filesize, reverse=True), + )) + # _one_of respects child expressions + self.assertFalse(self.match._one_of( + ast.filter.OneOf(A, B), + ast.filter.OneOf(A, C), + )) + self.assertFalse(self.match._one_of( + ast.filter.OneOf(A, B), + ast.filter.OneOf(A), + )) + self.assertFalse(self.match._one_of( + ast.filter.OneOf(A, B), + ast.filter.OneOf(A, B, C), + )) + self.assertTrue(self.match._one_of( + ast.filter.OneOf(A, B), + ast.filter.OneOf(B, A), + )) + self.assertTrue(self.match._one_of( + ast.filter.OneOf(A, B), + ast.filter.OneOf(A, Any()), + )) + self.assertTrue(self.match._one_of( + ast.filter.OneOf(A, B), + ast.filter.OneOf(B, Rest()), + )) + + def test_branch(self): + # identical expressions match + self.assertTrue(self.match._branch( + ast.filter.Any(ns.bse.filename, ast.filter.Equals('hello')), + ast.filter.Any(ns.bse.filename, ast.filter.Equals('hello')), + )) + self.assertTrue(self.match._branch( + ast.filter.All(ns.bse.filename, ast.filter.Equals('hello')), + ast.filter.All(ns.bse.filename, ast.filter.Equals('hello')), + )) + # _agg respects type + self.assertFalse(self.match._branch( + ast.filter.Any(ns.bse.filename, ast.filter.Equals('hello')), + ast.filter.All(ns.bse.filename, ast.filter.Equals('hello')), + )) + self.assertFalse(self.match._branch( + ast.filter.All(ns.bse.filename, ast.filter.Equals('hello')), + ast.filter.Any(ns.bse.filename, ast.filter.Equals('hello')), + )) + self.assertFalse(self.match._branch( + ast.filter.Any(ns.bse.filename, ast.filter.Equals('hello')), + ast.filter.Equals('hello'), + )) + self.assertFalse(self.match._branch( + ast.filter.All(ns.bse.filename, ast.filter.Equals('hello')), + ast.filter.Equals('hello'), + )) + # _agg respects predicate expression + self.assertTrue(self.match._branch( + ast.filter.Any(ns.bse.filename, ast.filter.Equals('hello')), + ast.filter.Any(ast.filter.Predicate(ns.bse.filename), ast.filter.Equals('hello')), + )) + self.assertTrue(self.match._branch( + ast.filter.All(ns.bse.filename, ast.filter.Equals('hello')), + ast.filter.All(ast.filter.Predicate(ns.bse.filename), ast.filter.Equals('hello')), + )) + self.assertFalse(self.match._branch( + ast.filter.Any(ns.bse.filename, ast.filter.Equals('hello')), + ast.filter.Any(ns.bse.filesize, ast.filter.Equals('hello')), + )) + self.assertFalse(self.match._branch( + ast.filter.All(ns.bse.filename, ast.filter.Equals('hello')), + ast.filter.All(ns.bse.filesize, ast.filter.Equals('hello')), + )) + self.assertFalse(self.match._branch( + ast.filter.Any(ns.bse.filename, ast.filter.Equals('hello')), + ast.filter.Any(ast.filter.OneOf(ns.bse.filename), ast.filter.Equals('hello')), + )) + self.assertFalse(self.match._branch( + ast.filter.All(ns.bse.filename, ast.filter.Equals('hello')), + ast.filter.All(ast.filter.OneOf(ns.bse.filename), ast.filter.Equals('hello')), + )) + self.assertFalse(self.match._branch( + ast.filter.Any(ns.bse.filename, ast.filter.Equals('hello')), + ast.filter.Any(ast.filter.Predicate(ns.bse.filename, reverse=True), ast.filter.Equals('hello')), + )) + self.assertFalse(self.match._branch( + ast.filter.All(ns.bse.filename, ast.filter.Equals('hello')), + ast.filter.All(ast.filter.Predicate(ns.bse.filename, reverse=True), ast.filter.Equals('hello')), + )) + self.assertTrue(self.match._branch( + ast.filter.Any(ns.bse.filename, ast.filter.Equals('hello')), + ast.filter.Any(Any(), ast.filter.Equals('hello')), + )) + self.assertTrue(self.match._branch( + ast.filter.All(ns.bse.filename, ast.filter.Equals('hello')), + ast.filter.All(Any(), ast.filter.Equals('hello')), + )) + self.assertTrue(self.match._branch( + ast.filter.Any(ns.bse.filename, ast.filter.Equals('hello')), + ast.filter.Any(Partial(ast.filter.Predicate), ast.filter.Equals('hello')), + )) + self.assertTrue(self.match._branch( + ast.filter.All(ns.bse.filename, ast.filter.Equals('hello')), + ast.filter.All(Partial(ast.filter.Predicate), ast.filter.Equals('hello')), + )) + # _agg respects filter expression + self.assertFalse(self.match._branch( + ast.filter.Any(ns.bse.filename, ast.filter.Equals('hello')), + ast.filter.Any(ns.bse.filename, ast.filter.Substring('hello')), + )) + self.assertFalse(self.match._branch( + ast.filter.All(ns.bse.filename, ast.filter.Equals('hello')), + ast.filter.All(ns.bse.filename, ast.filter.Substring('hello')), + )) + self.assertFalse(self.match._branch( + ast.filter.Any(ns.bse.filename, ast.filter.Equals('hello')), + ast.filter.Any(ns.bse.filename, ast.filter.Any(Any(), Any())), + )) + self.assertFalse(self.match._branch( + ast.filter.All(ns.bse.filename, ast.filter.Equals('hello')), + ast.filter.All(ns.bse.filename, ast.filter.All(Any(), Any())), + )) + self.assertTrue(self.match._branch( + ast.filter.Any(ns.bse.filename, ast.filter.Equals('hello')), + ast.filter.Any(ns.bse.filename, Any()), + )) + self.assertTrue(self.match._branch( + ast.filter.All(ns.bse.filename, ast.filter.Equals('hello')), + ast.filter.All(ns.bse.filename, Any()), + )) + self.assertTrue(self.match._branch( + ast.filter.Any(ns.bse.filename, ast.filter.Equals('hello')), + ast.filter.Any(ns.bse.filename, Partial(ast.filter.Equals)), + )) + self.assertTrue(self.match._branch( + ast.filter.All(ns.bse.filename, ast.filter.Equals('hello')), + ast.filter.All(ns.bse.filename, Partial(ast.filter.Equals)), + )) + self.assertFalse(self.match._branch( + ast.filter.Any(ns.bse.filename, ast.filter.Equals('hello')), + ast.filter.Any(ns.bse.filename, Partial(ast.filter.Equals, value='world')), + )) + self.assertFalse(self.match._branch( + ast.filter.All(ns.bse.filename, ast.filter.Equals('hello')), + ast.filter.All(ns.bse.filename, Partial(ast.filter.Equals, value='world')), + )) + + def test_agg(self): + A = ast.filter.Equals('hello') + B = ast.filter.Equals('world') + C = ast.filter.Equals('foobar') + # identical expressions match + self.assertTrue(self.match._agg( + ast.filter.And(A, B), + ast.filter.And(A, B), + )) + self.assertTrue(self.match._agg( + ast.filter.Or(A, B), + ast.filter.Or(A, B), + )) + # _agg respects type + self.assertFalse(self.match._agg( + ast.filter.And(A, B), + ast.filter.Or(A, B), + )) + self.assertFalse(self.match._agg( + ast.filter.Or(A, B), + ast.filter.And(A, B), + )) + self.assertFalse(self.match._agg( + ast.filter.And(A, B), + ast.filter.Equals('hello'), + )) + self.assertFalse(self.match._agg( + ast.filter.Or(A, B), + ast.filter.Equals('hello'), + )) + # _agg respects child expressions + self.assertFalse(self.match._agg( + ast.filter.And(A, B), + ast.filter.And(A, ast.filter.Equals('bar')), + )) + self.assertFalse(self.match._agg( + ast.filter.Or(A, B), + ast.filter.Or(A, ast.filter.Equals('bar')), + )) + self.assertFalse(self.match._agg( + ast.filter.And(A, B), + ast.filter.And(A), + )) + self.assertFalse(self.match._agg( + ast.filter.Or(A, B), + ast.filter.Or(A), + )) + self.assertFalse(self.match._agg( + ast.filter.And(A, B), + ast.filter.And(A, B, C), + )) + self.assertFalse(self.match._agg( + ast.filter.Or(A, B), + ast.filter.Or(A, B, C), + )) + self.assertTrue(self.match._agg( + ast.filter.And(A, B), + ast.filter.And(B, A), + )) + self.assertTrue(self.match._agg( + ast.filter.Or(A, B), + ast.filter.Or(B, A), + )) + self.assertTrue(self.match._agg( + ast.filter.And(A, B), + ast.filter.And(A, Any()), + )) + self.assertTrue(self.match._agg( + ast.filter.Or(A, B), + ast.filter.Or(A, Any()), + )) + self.assertTrue(self.match._agg( + ast.filter.And(A, B), + ast.filter.And(B, Rest()), + )) + self.assertTrue(self.match._agg( + ast.filter.Or(A, B), + ast.filter.Or(B, Rest()), + )) + + def test_not(self): + # identical expressions match + self.assertTrue(self.match._not( + ast.filter.Not(ast.filter.Equals('hello')), + ast.filter.Not(ast.filter.Equals('hello')), + )) + # _not respects type + self.assertFalse(self.match._not( + ast.filter.Not(ast.filter.Equals('hello')), + ast.filter.Equals('hello'), + )) + # _not respects child expression + self.assertFalse(self.match._not( + ast.filter.Not(ast.filter.Equals('hello')), + ast.filter.Not(ast.filter.Equals('world')), + )) + self.assertFalse(self.match._not( + ast.filter.Not(ast.filter.Equals('hello')), + ast.filter.Not(ast.filter.Substring('hello')), + )) + self.assertTrue(self.match._not( + ast.filter.Not(ast.filter.Equals('hello')), + ast.filter.Not(Any()), + )) + + def test_has(self): + # identical expressions match + self.assertTrue(self.match._has( + ast.filter.Has(ns.bse.filesize), + ast.filter.Has(ns.bse.filesize), + )) + self.assertTrue(self.match._has( + ast.filter.Has(ns.bse.filesize, ast.filter.LessThan(3)), + ast.filter.Has(ns.bse.filesize, ast.filter.LessThan(3)), + )) + # _has respects type + self.assertFalse(self.match._has( + ast.filter.Has(ns.bse.filesize), + ast.filter.Equals('hello'), + )) + self.assertFalse(self.match._has( + ast.filter.Has(ns.bse.filesize), + ast.filter.Equals('hello'), + )) + # _has respects predicate + self.assertFalse(self.match._has( + ast.filter.Has(ns.bse.filesize, ast.filter.LessThan(3)), + ast.filter.Has(ns.bse.iso, ast.filter.LessThan(3)), + )) + self.assertTrue(self.match._has( + ast.filter.Has(ns.bse.filesize, ast.filter.LessThan(3)), + ast.filter.Has(Any(), ast.filter.LessThan(3)), + )) + self.assertTrue(self.match._has( + ast.filter.Has(ns.bse.filesize, ast.filter.LessThan(3)), + ast.filter.Has(Partial(ast.filter.Predicate), ast.filter.LessThan(3)), + )) + # _has respects count + self.assertFalse(self.match._has( + ast.filter.Has(ns.bse.filesize, ast.filter.LessThan(3)), + ast.filter.Has(ns.bse.filesize, ast.filter.GreaterThan(3)), + )) + self.assertFalse(self.match._has( + ast.filter.Has(ns.bse.filesize, ast.filter.LessThan(3)), + ast.filter.Has(ns.bse.filesize, ast.filter.LessThan(5)), + )) + self.assertTrue(self.match._has( + ast.filter.Has(ns.bse.filesize, ast.filter.LessThan(3)), + ast.filter.Has(ns.bse.filesize, Any()), + )) + self.assertTrue(self.match._has( + ast.filter.Has(ns.bse.filesize, ast.filter.LessThan(3)), + ast.filter.Has(ns.bse.filesize, Partial(ast.filter.LessThan)), + )) + + def test_distance(self): + # identical expressions match + self.assertTrue(self.match._distance( + ast.filter.Distance([1,2,3], 5, True), + ast.filter.Distance([1,2,3], 5, True), + )) + # _distance respects type + self.assertFalse(self.match._distance( + ast.filter.Distance([1,2,3], 5, True), + ast.filter.Equals('hello'), + )) + self.assertFalse(self.match._distance( + ast.filter.Distance([1,2,3], 5, True), + Partial(ast.filter.Equals), + )) + # _distance respects reference value + self.assertFalse(self.match._distance( + ast.filter.Distance([1,2,3], 5, True), + ast.filter.Distance([3,2,1], 5, True), + )) + self.assertTrue(self.match._distance( + ast.filter.Distance([1,2,3], 5, True), + Partial(ast.filter.Distance, threshold=5, strict=True), + )) + self.assertTrue(self.match._distance( + ast.filter.Distance([1,2,3], 5, True), + Partial(ast.filter.Distance, reference=[1,2,3], threshold=5, strict=True), + )) + self.assertFalse(self.match._distance( + ast.filter.Distance([1,2,3], 5, True), + Partial(ast.filter.Distance, reference=[3,2,1], threshold=5, strict=True), + )) + # _distance respects threshold + self.assertFalse(self.match._distance( + ast.filter.Distance([1,2,3], 5, True), + ast.filter.Distance([1,2,3], 8, True), + )) + self.assertTrue(self.match._distance( + ast.filter.Distance([1,2,3], 5, True), + Partial(ast.filter.Distance, reference=[1,2,3], strict=True), + )) + self.assertTrue(self.match._distance( + ast.filter.Distance([1,2,3], 5, True), + Partial(ast.filter.Distance, reference=[1,2,3], threshold=5, strict=True), + )) + self.assertFalse(self.match._distance( + ast.filter.Distance([1,2,3], 5, True), + Partial(ast.filter.Distance, reference=[1,2,3], threshold=8, strict=True), + )) + # _distance respects strict + self.assertFalse(self.match._distance( + ast.filter.Distance([1,2,3], 5, True), + ast.filter.Distance([1,2,3], 5, False), + )) + self.assertTrue(self.match._distance( + ast.filter.Distance([1,2,3], 5, True), + Partial(ast.filter.Distance, reference=[1,2,3], threshold=5), + )) + self.assertTrue(self.match._distance( + ast.filter.Distance([1,2,3], 5, True), + Partial(ast.filter.Distance, reference=[1,2,3], threshold=5, strict=True), + )) + self.assertFalse(self.match._distance( + ast.filter.Distance([1,2,3], 5, True), + Partial(ast.filter.Distance, reference=[1,2,3], threshold=5, strict=False), + )) + + def test_value(self): + # identical expressions match + self.assertTrue(self.match._value(ast.filter.Equals('hello'), ast.filter.Equals('hello'))) + self.assertTrue(self.match._value(ast.filter.Substring('hello'), ast.filter.Substring('hello'))) + self.assertTrue(self.match._value(ast.filter.StartsWith('hello'), ast.filter.StartsWith('hello'))) + self.assertTrue(self.match._value(ast.filter.EndsWith('hello'), ast.filter.EndsWith('hello'))) + self.assertTrue(self.match._value(ast.filter.Is('hello'), ast.filter.Is('hello'))) + # _value respects type + self.assertFalse(self.match._value(ast.filter.Equals('hello'), ast.filter.Is('hello'))) + self.assertFalse(self.match._value(ast.filter.Substring('hello'), ast.filter.Is('hello'))) + self.assertFalse(self.match._value(ast.filter.StartsWith('hello'), ast.filter.Is('hello'))) + self.assertFalse(self.match._value(ast.filter.EndsWith('hello'), ast.filter.Is('hello'))) + self.assertFalse(self.match._value(ast.filter.Is('hello'), ast.filter.Equals('hello'))) + # _value respects value + self.assertFalse(self.match._value(ast.filter.Equals('hello'), ast.filter.Equals('world'))) + self.assertFalse(self.match._value(ast.filter.Substring('hello'), ast.filter.Substring('world'))) + self.assertFalse(self.match._value(ast.filter.StartsWith('hello'), ast.filter.StartsWith('world'))) + self.assertFalse(self.match._value(ast.filter.EndsWith('hello'), ast.filter.EndsWith('world'))) + self.assertFalse(self.match._value(ast.filter.Is('hello'), ast.filter.Is('world'))) + # Partial requires correct type + self.assertFalse(self.match._value(ast.filter.Equals('hello'), Partial(ast.filter.Is))) + self.assertFalse(self.match._value(ast.filter.Substring('hello'), Partial(ast.filter.Is))) + self.assertFalse(self.match._value(ast.filter.StartsWith('hello'), Partial(ast.filter.Is))) + self.assertFalse(self.match._value(ast.filter.EndsWith('hello'), Partial(ast.filter.Is))) + self.assertFalse(self.match._value(ast.filter.Is('hello'), Partial(ast.filter.Equals))) + # value can be specified + self.assertTrue(self.match._value(ast.filter.Equals('hello'), Partial(ast.filter.Equals, value='hello'))) + self.assertFalse(self.match._value(ast.filter.Equals('hello'), Partial(ast.filter.Equals, value='world'))) + self.assertTrue(self.match._value(ast.filter.Substring('hello'), Partial(ast.filter.Substring, value='hello'))) + self.assertFalse(self.match._value(ast.filter.Substring('hello'), Partial(ast.filter.Substring, value='world'))) + self.assertTrue(self.match._value(ast.filter.StartsWith('hello'), Partial(ast.filter.StartsWith, value='hello'))) + self.assertFalse(self.match._value(ast.filter.StartsWith('hello'), Partial(ast.filter.StartsWith, value='world'))) + self.assertTrue(self.match._value(ast.filter.EndsWith('hello'), Partial(ast.filter.EndsWith, value='hello'))) + self.assertFalse(self.match._value(ast.filter.EndsWith('hello'), Partial(ast.filter.EndsWith, value='world'))) + self.assertTrue(self.match._value(ast.filter.Is('hello'), Partial(ast.filter.Is, value='hello'))) + self.assertFalse(self.match._value(ast.filter.Is('hello'), Partial(ast.filter.Is, value='world'))) + # value can remain unspecified + self.assertTrue(self.match._value(ast.filter.Equals('hello'), Partial(ast.filter.Equals))) + self.assertTrue(self.match._value(ast.filter.Substring('hello'), Partial(ast.filter.Substring))) + self.assertTrue(self.match._value(ast.filter.StartsWith('hello'), Partial(ast.filter.StartsWith))) + self.assertTrue(self.match._value(ast.filter.EndsWith('hello'), Partial(ast.filter.EndsWith))) + self.assertTrue(self.match._value(ast.filter.Is('hello'), Partial(ast.filter.Is))) + + def test_bounded(self): + # identical expressions match + self.assertTrue(self.match._bounded( + ast.filter.LessThan(threshold=3, strict=False), + ast.filter.LessThan(threshold=3, strict=False), + )) + self.assertTrue(self.match._bounded( + ast.filter.GreaterThan(threshold=3, strict=False), + ast.filter.GreaterThan(threshold=3, strict=False), + )) + # _bounded respects type + self.assertFalse(self.match._bounded( + ast.filter.LessThan(threshold=3, strict=False), + ast.filter.GreaterThan(threshold=3, strict=False), + )) + self.assertFalse(self.match._bounded( + ast.filter.GreaterThan(threshold=3, strict=False), + ast.filter.LessThan(threshold=3, strict=False), + )) + # _bounded respects threshold + self.assertFalse(self.match._bounded( + ast.filter.LessThan(threshold=3, strict=False), + ast.filter.LessThan(threshold=4, strict=False), + )) + self.assertFalse(self.match._bounded( + ast.filter.GreaterThan(threshold=3, strict=False), + ast.filter.GreaterThan(threshold=4, strict=False), + )) + # _bounded respects strict + self.assertFalse(self.match._bounded( + ast.filter.LessThan(threshold=3, strict=False), + ast.filter.LessThan(threshold=3, strict=True), + )) + self.assertFalse(self.match._bounded( + ast.filter.GreaterThan(threshold=3, strict=False), + ast.filter.GreaterThan(threshold=3, strict=True), + )) + # Partial requires correct type + self.assertFalse(self.match._bounded( + ast.filter.LessThan(threshold=3, strict=False), + Partial(ast.filter.GreaterThan), + )) + self.assertFalse(self.match._bounded( + ast.filter.GreaterThan(threshold=3, strict=False), + Partial(ast.filter.LessThan), + )) + # threshold and strict can be specified + self.assertTrue(self.match._bounded( + ast.filter.LessThan(threshold=3, strict=False), + Partial(ast.filter.LessThan, threshold=3, strict=False), + )) + self.assertTrue(self.match._bounded( + ast.filter.GreaterThan(threshold=3, strict=False), + Partial(ast.filter.GreaterThan, threshold=3, strict=False), + )) + self.assertFalse(self.match._bounded( + ast.filter.LessThan(threshold=3, strict=False), + Partial(ast.filter.LessThan, threshold=4, strict=False), + )) + self.assertFalse(self.match._bounded( + ast.filter.GreaterThan(threshold=3, strict=False), + Partial(ast.filter.GreaterThan, threshold=4, strict=False), + )) + self.assertFalse(self.match._bounded( + ast.filter.LessThan(threshold=3, strict=False), + Partial(ast.filter.LessThan, threshold=3, strict=True), + )) + self.assertFalse(self.match._bounded( + ast.filter.GreaterThan(threshold=3, strict=False), + Partial(ast.filter.GreaterThan, threshold=3, strict=True), + )) + # threshold can remain unspecified + self.assertTrue(self.match._bounded( + ast.filter.LessThan(threshold=3, strict=False), + Partial(ast.filter.LessThan, strict=False), + )) + self.assertTrue(self.match._bounded( + ast.filter.GreaterThan(threshold=3, strict=False), + Partial(ast.filter.GreaterThan, strict=False), + )) + self.assertFalse(self.match._bounded( + ast.filter.LessThan(threshold=3, strict=False), + Partial(ast.filter.LessThan, strict=True), + )) + self.assertFalse(self.match._bounded( + ast.filter.GreaterThan(threshold=3, strict=False), + Partial(ast.filter.GreaterThan, strict=True), + )) + # strict can remain unspecified + self.assertTrue(self.match._bounded( + ast.filter.LessThan(threshold=3, strict=False), + Partial(ast.filter.LessThan, threshold=3), + )) + self.assertTrue(self.match._bounded( + ast.filter.GreaterThan(threshold=3, strict=False), + Partial(ast.filter.GreaterThan, threshold=3), + )) + self.assertFalse(self.match._bounded( + ast.filter.LessThan(threshold=3, strict=False), + Partial(ast.filter.LessThan, threshold=4), + )) + self.assertFalse(self.match._bounded( + ast.filter.GreaterThan(threshold=3, strict=False), + Partial(ast.filter.GreaterThan, threshold=4), + )) + + +## main ## + +if __name__ == '__main__': + unittest.main() + +## EOF ## diff --git a/test/query/test_validator.py b/test/query/test_validator.py index dc9d913..fec3d23 100644 --- a/test/query/test_validator.py +++ b/test/query/test_validator.py @@ -14,7 +14,7 @@ from bsfs.query import ast from bsfs.utils import errors # objects to test -from bsfs.query.validator import Filter +from bsfs.query.validator import Filter, Fetch ## code ## @@ -286,6 +286,219 @@ class TestFilter(unittest.TestCase): self.assertIsNone(self.validate._distance(self.schema.literal(ns.bsfs.Colors), ast.filter.Distance([1,2,3,4,5], 1, False))) +class TestFetch(unittest.TestCase): + def setUp(self): + self.schema = _schema.from_string(''' + prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#> + prefix xsd: <http://www.w3.org/2001/XMLSchema#> + + prefix bsfs: <http://bsfs.ai/schema/> + prefix bse: <http://bsfs.ai/schema/Entity#> + + bsfs:Entity rdfs:subClassOf bsfs:Node . + bsfs:Tag rdfs:subClassOf bsfs:Node . + xsd:string rdfs:subClassOf bsfs:Literal . + + bse:filename rdfs:subClassOf bsfs:Predicate ; + rdfs:domain bsfs:Node ; + rdfs:range xsd:string . + + bse:tag rdfs:subClassOf bsfs:Predicate ; + rdfs:domain bsfs:Entity ; + rdfs:range bsfs:Tag . + + bse:label rdfs:subClassOf bsfs:Predicate ; + rdfs:domain bsfs:Tag ; + rdfs:range xsd:string . + + ''') + self.validate = Fetch(self.schema) + + def test_call(self): + # call accepts correct expressions + self.assertTrue(self.validate(self.schema.node(ns.bsfs.Entity), + ast.fetch.Fetch(ns.bse.tag, ast.fetch.Value(ns.bse.label, 'value')))) + self.assertTrue(self.validate(self.schema.node(ns.bsfs.Entity), + ast.fetch.Fetch(ns.bse.tag, ast.fetch.This('this')))) + self.assertTrue(self.validate(self.schema.node(ns.bsfs.Entity), + ast.fetch.This('this'))) + self.assertTrue(self.validate(self.schema.node(ns.bsfs.Entity), + ast.fetch.All(ast.fetch.This('this'), ast.fetch.Node(ns.bse.tag, 'node'), ast.fetch.Value(ns.bse.filename, 'value')))) + # type must be a Node + self.assertRaises(TypeError, self.validate, 1234, ast.fetch.This('this')) + self.assertRaises(TypeError, self.validate, 'foobar', ast.fetch.This('this')) + self.assertRaises(TypeError, self.validate, self.schema.literal(ns.bsfs.Literal), ast.fetch.This('this')) + # type must be in the schema + self.assertRaises(errors.ConsistencyError, self.validate, self.schema.node(ns.bsfs.Node).child(ns.bsfs.Invalid), + ast.fetch.FetchExpression()) + # expression must be a fetch expression + self.assertRaises(TypeError, self.validate, self.schema.node(ns.bsfs.Entity), 1234) + self.assertRaises(TypeError, self.validate, self.schema.node(ns.bsfs.Entity), 'hello') + self.assertRaises(TypeError, self.validate, self.schema.node(ns.bsfs.Entity), ast.filter.FilterExpression()) + # expression must be valid + self.assertRaises(errors.ConsistencyError, self.validate, self.schema.node(ns.bsfs.Entity), + ast.fetch.Fetch(ns.bse.tag, ast.fetch.Node(ns.bse.label, 'node'))) + self.assertRaises(errors.ConsistencyError, self.validate, self.schema.node(ns.bsfs.Entity), + ast.fetch.Value(ns.bse.tag, 'value')) + + def test_routing(self): + # Node passes _branch, _named, and _node checks + self.assertRaises(errors.ConsistencyError, self.validate._parse_fetch_expression, self.schema.node(ns.bsfs.Node), + ast.fetch.Node(ns.bse.tag, 'node')) # fails in _branch + self.assertRaises(errors.BackendError, self.validate._parse_fetch_expression, self.schema.node(ns.bsfs.Entity), + ast.fetch.Node(ns.bse.tag, '')) # fails in _named + self.assertRaises(errors.ConsistencyError, self.validate._parse_fetch_expression, self.schema.node(ns.bsfs.Entity), + ast.fetch.Node(ns.bse.label, 'node')) # fails in _node + # Value passes _branch, _named, and _value checks + self.assertRaises(errors.ConsistencyError, self.validate._parse_fetch_expression, self.schema.node(ns.bsfs.Node), + ast.fetch.Value(ns.bse.label, 'value')) # fails in _branch + self.assertRaises(errors.BackendError, self.validate._parse_fetch_expression, self.schema.node(ns.bsfs.Entity), + ast.fetch.Value(ns.bse.filename, '')) # fails in _named + self.assertRaises(errors.ConsistencyError, self.validate._parse_fetch_expression, self.schema.node(ns.bsfs.Entity), + ast.fetch.Value(ns.bse.tag, 'value')) # fails in _value + # Fetch passes _branch and _fetch checks + self.assertRaises(errors.ConsistencyError, self.validate._parse_fetch_expression, self.schema.node(ns.bsfs.Node), + ast.fetch.Fetch(ns.bse.tag, ast.fetch.This('this'))) # fails in _branch + self.assertRaises(errors.ConsistencyError, self.validate._parse_fetch_expression, self.schema.node(ns.bsfs.Node), + ast.fetch.Fetch(ns.bse.filename, ast.fetch.This('this'))) # fails in _fetch + # invalid expressions cannot be parsed + type_ = self.schema.node(ns.bsfs.Node) + self.assertRaises(errors.BackendError, self.validate._parse_fetch_expression, type_, + ast.filter.FilterExpression()) + self.assertRaises(errors.BackendError, self.validate._parse_fetch_expression, type_, + 1234) + self.assertRaises(errors.BackendError, self.validate._parse_fetch_expression, type_, + 'hello world') + + def test_all(self): + # all accepts correct expressions + self.assertIsNone(self.validate._all(self.schema.node(ns.bsfs.Entity), + ast.fetch.All(ast.fetch.Value(ns.bse.filename, 'value'), ast.fetch.Node(ns.bse.tag, 'node')))) + # child expressions must be valid + self.assertRaises(errors.ConsistencyError, self.validate._all, self.schema.node(ns.bsfs.Entity), + ast.fetch.All(ast.fetch.Value(ns.bse.tag, 'value'))) + self.assertRaises(errors.ConsistencyError, self.validate._all, self.schema.node(ns.bsfs.Entity), + ast.fetch.All(ast.fetch.Value(ns.bse.filename, 'value'), ast.fetch.Node(ns.bse.filename, 'node'))) + self.assertRaises(errors.ConsistencyError, self.validate._all, self.schema.node(ns.bsfs.Entity), + ast.fetch.All(ast.fetch.Value(ns.bse.tag, 'value'), ast.fetch.Node(ns.bse.tag, 'node'))) + self.assertRaises(errors.ConsistencyError, self.validate._all, self.schema.node(ns.bsfs.Entity), + ast.fetch.All(ast.fetch.Value(ns.bse.tag, 'value'), ast.fetch.Node(ns.bse.filename, 'node'))) + + def test_branch(self): + # branch accepts correct expressions + self.assertIsNone(self.validate._branch(self.schema.node(ns.bsfs.Entity), + ast.fetch._Branch(ns.bse.filename))) + self.assertIsNone(self.validate._branch(self.schema.node(ns.bsfs.Entity), + ast.fetch.Fetch(ns.bse.tag, ast.fetch.This('this')))) + self.assertIsNone(self.validate._branch(self.schema.node(ns.bsfs.Entity), + ast.fetch.Value(ns.bse.filename, 'value'))) + self.assertIsNone(self.validate._branch(self.schema.node(ns.bsfs.Entity), + ast.fetch.Node(ns.bse.tag, 'node'))) + # type must be a node + self.assertRaises(errors.ConsistencyError, self.validate._branch, self.schema.literal(ns.bsfs.Literal), + ast.fetch._Branch(ns.bse.filename)) + self.assertRaises(errors.ConsistencyError, self.validate._branch, self.schema.literal(ns.bsfs.Literal), + ast.fetch.Fetch(ns.bse.tag, ast.fetch.This('this'))) + self.assertRaises(errors.ConsistencyError, self.validate._branch, self.schema.literal(ns.bsfs.Literal), + ast.fetch.Value(ns.bse.filename, 'value')) + self.assertRaises(errors.ConsistencyError, self.validate._branch, self.schema.literal(ns.bsfs.Literal), + ast.fetch.Node(ns.bse.tag, 'node')) + # type must be in the schema + self.assertRaises(errors.ConsistencyError, self.validate._branch, self.schema.node(ns.bsfs.Node).child(ns.bsfs.Invalid), + ast.fetch._Branch(ns.bse.filename)) + self.assertRaises(errors.ConsistencyError, self.validate._branch, self.schema.node(ns.bsfs.Node).child(ns.bsfs.Invalid), + ast.fetch.Fetch(ns.bse.tag, ast.fetch.This('this'))) + self.assertRaises(errors.ConsistencyError, self.validate._branch, self.schema.node(ns.bsfs.Node).child(ns.bsfs.Invalid), + ast.fetch.Value(ns.bse.filename, 'value')) + self.assertRaises(errors.ConsistencyError, self.validate._branch, self.schema.node(ns.bsfs.Node).child(ns.bsfs.Invalid), + ast.fetch.Node(ns.bse.tag, 'node')) + # predicate must be in the schema + self.assertRaises(errors.ConsistencyError, self.validate._branch, self.schema.node(ns.bsfs.Entity), + ast.fetch._Branch(ns.bse.invalid)) + self.assertRaises(errors.ConsistencyError, self.validate._branch, self.schema.node(ns.bsfs.Entity), + ast.fetch.Fetch(ns.bse.invalid, ast.fetch.This('this'))) + self.assertRaises(errors.ConsistencyError, self.validate._branch, self.schema.node(ns.bsfs.Entity), + ast.fetch.Value(ns.bse.invalid, 'value')) + self.assertRaises(errors.ConsistencyError, self.validate._branch, self.schema.node(ns.bsfs.Entity), + ast.fetch.Node(ns.bse.invalid, 'node')) + # predicate's domain must be related to the type + self.assertRaises(errors.ConsistencyError, self.validate._branch, self.schema.node(ns.bsfs.Entity), + ast.fetch._Branch(ns.bse.label)) + self.assertRaises(errors.ConsistencyError, self.validate._branch, self.schema.node(ns.bsfs.Entity), + ast.fetch.Fetch(ns.bse.label, ast.fetch.This('this'))) + self.assertRaises(errors.ConsistencyError, self.validate._branch, self.schema.node(ns.bsfs.Entity), + ast.fetch.Node(ns.bse.label, 'node')) + self.assertRaises(errors.ConsistencyError, self.validate._branch, self.schema.node(ns.bsfs.Entity), + ast.fetch.Value(ns.bse.label, 'value')) + # predicate's domain cannot be a supertype + self.assertRaises(errors.ConsistencyError, self.validate._branch, self.schema.node(ns.bsfs.Node), + ast.fetch._Branch(ns.bse.tag)) + self.assertRaises(errors.ConsistencyError, self.validate._branch, self.schema.node(ns.bsfs.Node), + ast.fetch.Fetch(ns.bse.tag, ast.fetch.This('this'))) + self.assertRaises(errors.ConsistencyError, self.validate._branch, self.schema.node(ns.bsfs.Node), + ast.fetch.Node(ns.bse.tag, 'node')) + self.assertRaises(errors.ConsistencyError, self.validate._branch, self.schema.node(ns.bsfs.Node), + ast.fetch.Value(ns.bse.tag, 'value')) + # predicate's domain can be a subtype + self.assertIsNone(self.validate._branch(self.schema.node(ns.bsfs.Entity), + ast.fetch._Branch(ns.bse.filename))) + self.assertIsNone(self.validate._branch(self.schema.node(ns.bsfs.Entity), + ast.fetch.Value(ns.bse.filename, 'value'))) + + def test_fetch(self): + # fetch accepts correct expressions + self.assertIsNone(self.validate._fetch(self.schema.node(ns.bsfs.Node), + ast.fetch.Fetch(ns.bse.tag, ast.fetch.Value(ns.bse.label, 'value')))) + # range must be a node + self.assertRaises(errors.ConsistencyError, self.validate._fetch, self.schema.node(ns.bsfs.Node), + ast.fetch.Fetch(ns.bse.filename, ast.fetch.This('this'))) + # child expression must be valid + self.assertRaises(errors.ConsistencyError, self.validate._fetch, self.schema.node(ns.bsfs.Node), + ast.fetch.Fetch(ns.bse.tag, ast.fetch.Node(ns.bse.label, 'node'))) + + def test_named(self): + # named accepts correct expressions + self.assertIsNone(self.validate._named(self.schema.node(ns.bsfs.Entity), + ast.fetch.Node(ns.bse.tag, 'node'))) + self.assertIsNone(self.validate._named(self.schema.node(ns.bsfs.Entity), + ast.fetch.Value(ns.bse.filename, 'value'))) + # name must be non-empty + self.assertRaises(errors.BackendError, self.validate._named, self.schema.node(ns.bsfs.Entity), + ast.fetch.Node(ns.bse.tag, '')) + self.assertRaises(errors.BackendError, self.validate._named, self.schema.node(ns.bsfs.Entity), + ast.fetch.Value(ns.bse.filename, '')) + + def test_node(self): + # node accepts correct expressions + self.assertIsNone(self.validate._node(self.schema.node(ns.bsfs.Entity), + ast.fetch.Node(ns.bse.tag, 'node'))) + # range must be a node + self.assertRaises(errors.ConsistencyError, self.validate._node, self.schema.node(ns.bsfs.Entity), + ast.fetch.Node(ns.bse.filename, 'node')) + + def test_value(self): + # value accepts correct expressions + self.assertIsNone(self.validate._value(self.schema.node(ns.bsfs.Entity), + ast.fetch.Value(ns.bse.filename, 'value'))) + # range must be a literal + self.assertRaises(errors.ConsistencyError, self.validate._value, self.schema.node(ns.bsfs.Entity), + ast.fetch.Value(ns.bse.tag, 'value')) + + def test_this(self): + # this accepts correct expressions + self.assertIsNone(self.validate._this(self.schema.node(ns.bsfs.Entity), ast.fetch.This('this'))) + # type must be a node + self.assertRaises(errors.ConsistencyError, self.validate._this, self.schema.literal(ns.bsfs.Literal), + ast.fetch.This('this')) + self.assertRaises(errors.ConsistencyError, self.validate._this, self.schema.predicate(ns.bsfs.Predicate), + ast.fetch.This('this')) + # type must be in the schema + self.assertRaises(errors.ConsistencyError, self.validate._this, self.schema.node(ns.bsfs.Node).child(ns.bsfs.Invalid), + ast.fetch.This('this')) + # name must be non-empty + self.assertRaises(errors.BackendError, self.validate._this, self.schema.node(ns.bsfs.Entity), ast.fetch.This('')) + + ## main ## if __name__ == '__main__': diff --git a/test/schema/test_schema.py b/test/schema/test_schema.py index 32dbc93..414e542 100644 --- a/test/schema/test_schema.py +++ b/test/schema/test_schema.py @@ -66,13 +66,14 @@ class TestSchema(unittest.TestCase): # literals self.l_root = types.ROOT_LITERAL self.l_number = types.ROOT_NUMBER + self.l_blob = types.ROOT_BLOB self.l_array = types.ROOT_ARRAY self.l_time = types.ROOT_TIME self.l_string = self.l_root.child(ns.xsd.string) self.l_integer = self.l_root.child(ns.xsd.integer) self.l_unused = self.l_root.child(ns.xsd.boolean) self.f_root = types.ROOT_FEATURE - self.literals = [self.l_root, self.l_array, self.f_root, self.l_number, self.l_time, self.l_string, self.l_integer, self.l_unused] + self.literals = [self.l_root, self.l_array, self.f_root, self.l_number, self.l_time, self.l_string, self.l_integer, self.l_unused, self.l_blob] # predicates self.p_root = types.ROOT_PREDICATE @@ -85,13 +86,13 @@ class TestSchema(unittest.TestCase): # no args yields a minimal schema schema = Schema() self.assertSetEqual(set(schema.nodes()), {self.n_root}) - self.assertSetEqual(set(schema.literals()), {self.l_root, self.l_number, self.l_array, self.l_time, self.f_root}) + self.assertSetEqual(set(schema.literals()), {self.l_root, self.l_number, self.l_array, self.l_time, self.f_root, self.l_blob}) self.assertSetEqual(set(schema.predicates()), {self.p_root}) # nodes and literals are optional schema = Schema(self.predicates) self.assertSetEqual(set(schema.nodes()), {self.n_root, self.n_ent, self.n_img, self.n_tag}) - self.assertSetEqual(set(schema.literals()), {self.l_root, self.l_string, self.l_number, self.l_time, self.l_array, self.f_root}) + self.assertSetEqual(set(schema.literals()), {self.l_root, self.l_string, self.l_number, self.l_time, self.l_array, self.f_root, self.l_blob}) self.assertSetEqual(set(schema.predicates()), set(self.predicates)) # predicates, nodes, and literals are respected @@ -112,13 +113,13 @@ class TestSchema(unittest.TestCase): # literals are complete schema = Schema(self.predicates, self.nodes, None) - self.assertSetEqual(set(schema.literals()), {self.l_root, self.l_string, self.l_number, self.l_array, self.l_time, self.f_root}) + self.assertSetEqual(set(schema.literals()), {self.l_root, self.l_string, self.l_number, self.l_array, self.l_time, self.f_root, self.l_blob}) schema = Schema(self.predicates, self.nodes, []) - self.assertSetEqual(set(schema.literals()), {self.l_root, self.l_string, self.l_number, self.l_array, self.l_time, self.f_root}) + self.assertSetEqual(set(schema.literals()), {self.l_root, self.l_string, self.l_number, self.l_array, self.l_time, self.f_root, self.l_blob}) schema = Schema(self.predicates, self.nodes, [self.l_string]) - self.assertSetEqual(set(schema.literals()), {self.l_root, self.l_string, self.l_number, self.l_array, self.l_time, self.f_root}) + self.assertSetEqual(set(schema.literals()), {self.l_root, self.l_string, self.l_number, self.l_array, self.l_time, self.f_root, self.l_blob}) schema = Schema(self.predicates, self.nodes, [self.l_integer]) - self.assertSetEqual(set(schema.literals()), {self.l_root, self.l_string, self.l_integer, self.l_number, self.l_array, self.l_time, self.f_root}) + self.assertSetEqual(set(schema.literals()), {self.l_root, self.l_string, self.l_integer, self.l_number, self.l_array, self.l_time, self.f_root, self.l_blob}) schema = Schema(self.predicates, self.nodes, [self.l_integer, self.l_unused]) self.assertSetEqual(set(schema.literals()), set(self.literals)) @@ -178,13 +179,13 @@ class TestSchema(unittest.TestCase): self.assertEqual(str(Schema(self.predicates, self.nodes, self.literals)), 'Schema()') # repr conversion with only default nodes, literals, and predicates n = [ns.bsfs.Node] - l = [ns.bsfs.Array, ns.bsfs.Feature, ns.bsfs.Literal, ns.bsfs.Number, ns.bsfs.Time] + l = [ns.bsfs.Array, ns.bsfs.BinaryBlob, ns.bsfs.Feature, ns.bsfs.Literal, ns.bsfs.Number, ns.bsfs.Time] p = [ns.bsfs.Predicate] self.assertEqual(repr(Schema()), f'Schema({n}, {l}, {p})') self.assertEqual(repr(Schema([], [], [])), f'Schema({n}, {l}, {p})') # repr conversion n = [ns.bsfs.Entity, ns.bsfs.Image, ns.bsfs.Node, ns.bsfs.Tag, ns.bsfs.Unused] - l = [ns.bsfs.Array, ns.bsfs.Feature, ns.bsfs.Literal, ns.bsfs.Number, ns.bsfs.Time, ns.xsd.boolean, ns.xsd.integer, ns.xsd.string] + l = [ns.bsfs.Array, ns.bsfs.BinaryBlob, ns.bsfs.Feature, ns.bsfs.Literal, ns.bsfs.Number, ns.bsfs.Time, ns.xsd.boolean, ns.xsd.integer, ns.xsd.string] p = [ns.bse.comment, ns.bse.group, ns.bse.tag, ns.bsfs.Predicate] self.assertEqual(repr(Schema(self.predicates, self.nodes, self.literals)), f'Schema({n}, {l}, {p})') diff --git a/test/triple_store/sparql/test_parse_fetch.py b/test/triple_store/sparql/test_parse_fetch.py new file mode 100644 index 0000000..0961789 --- /dev/null +++ b/test/triple_store/sparql/test_parse_fetch.py @@ -0,0 +1,263 @@ +""" + +Part of the bsfs test suite. +A copy of the license is provided with the project. +Author: Matthias Baumgartner, 2022 +""" +# imports +import rdflib +import unittest + +# bsie imports +from bsfs import schema +from bsfs.namespace import Namespace, ns +from bsfs.query import ast +from bsfs.utils import errors, URI + +# objects to test +from bsfs.triple_store.sparql.parse_fetch import Fetch + + +## code ## + +bsfs = Namespace('http://bsfs.ai/schema', fsep='/') +bse = Namespace('http://bsfs.ai/schema/Entity') +bst = Namespace('http://bsfs.ai/schema/Tag') +bsc = Namespace('http://bsfs.ai/schema/Collection') + +class TestParseFetch(unittest.TestCase): + + def setUp(self): + self.schema = schema.from_string(''' + prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#> + prefix xsd: <http://www.w3.org/2001/XMLSchema#> + + prefix bsfs: <http://bsfs.ai/schema/> + prefix bse: <http://bsfs.ai/schema/Entity#> + prefix bst: <http://bsfs.ai/schema/Tag#> + prefix bsc: <http://bsfs.ai/schema/Collection#> + + # nodes + bsfs:Entity rdfs:subClassOf bsfs:Node . + bsfs:Tag rdfs:subClassOf bsfs:Node . + bsfs:Collection rdfs:subClassOf bsfs:Node . + + # literals + xsd:integer rdfs:subClassOf bsfs:Literal . + xsd:string rdfs:subClassOf bsfs:Literal . + + # predicates + bse:tag rdfs:subClassOf bsfs:Predicate ; + rdfs:domain bsfs:Entity ; + rdfs:range bsfs:Tag . + + bse:collection rdfs:subClassOf bsfs:Predicate ; + rdfs:domain bsfs:Entity ; + rdfs:range bsfs:Collection . + + bse:filename rdfs:subClassOf bsfs:Predicate ; + rdfs:domain bsfs:Entity ; + rdfs:range xsd:string . + + bse:rank rdfs:subClassOf bsfs:Predicate ; + rdfs:domain bsfs:Entity ; + rdfs:range xsd:integer . + + bst:main rdfs:subClassOf bsfs:Predicate ; + rdfs:domain bsfs:Tag ; + rdfs:range bsfs:Entity . + + bst:label rdfs:subClassOf bsfs:Predicate ; + rdfs:domain bsfs:Tag ; + rdfs:range xsd:string . + + bsc:tag rdfs:subClassOf bsfs:Predicate ; + rdfs:domain bsfs:Collection ; + rdfs:range bsfs:Tag . + + bsc:label rdfs:subClassOf bsfs:Predicate ; + rdfs:domain bsfs:Collection ; + rdfs:range xsd:string . + + bsc:rating rdfs:subClassOf bsfs:Predicate ; + rdfs:domain bsfs:Collection ; + rdfs:range xsd:integer . + + ''') + + # graph to test queries + self.graph = rdflib.Graph() + # schema hierarchies + self.graph.add((rdflib.URIRef('http://bsfs.ai/schema/Entity'), rdflib.RDFS.subClassOf, rdflib.URIRef('http://bsfs.ai/schema/Node'))) + self.graph.add((rdflib.URIRef('http://bsfs.ai/schema/Collection'), rdflib.RDFS.subClassOf, rdflib.URIRef('http://bsfs.ai/schema/Node'))) + self.graph.add((rdflib.URIRef('http://bsfs.ai/schema/Tag'), rdflib.RDFS.subClassOf, rdflib.URIRef('http://bsfs.ai/schema/Node'))) + # entities + self.graph.add((rdflib.URIRef('http://example.com/entity#1234'), rdflib.RDF.type, rdflib.URIRef('http://bsfs.ai/schema/Entity'))) + self.graph.add((rdflib.URIRef('http://example.com/entity#4321'), rdflib.RDF.type, rdflib.URIRef('http://bsfs.ai/schema/Entity'))) + # tags + self.graph.add((rdflib.URIRef('http://example.com/tag#1234'), rdflib.RDF.type, rdflib.URIRef('http://bsfs.ai/schema/Tag'))) + self.graph.add((rdflib.URIRef('http://example.com/tag#4321'), rdflib.RDF.type, rdflib.URIRef('http://bsfs.ai/schema/Tag'))) + # collections + self.graph.add((rdflib.URIRef('http://example.com/collection#1234'), rdflib.RDF.type, rdflib.URIRef('http://bsfs.ai/schema/Collection'))) + self.graph.add((rdflib.URIRef('http://example.com/collection#4321'), rdflib.RDF.type, rdflib.URIRef('http://bsfs.ai/schema/Collection'))) + # entity literals + self.graph.add((rdflib.URIRef('http://example.com/entity#1234'), rdflib.URIRef(bse.rank), rdflib.Literal('1234', datatype=rdflib.XSD.integer))) + self.graph.add((rdflib.URIRef('http://example.com/entity#1234'), rdflib.URIRef(bse.filename), rdflib.Literal('filename_1234', datatype=rdflib.XSD.string))) + #self.graph.add((rdflib.URIRef('http://example.com/entity#4321'), rdflib.URIRef(bse.rank), rdflib.Literal('4321', datatype=rdflib.XSD.integer))) + self.graph.add((rdflib.URIRef('http://example.com/entity#4321'), rdflib.URIRef(bse.filename), rdflib.Literal('filename_4321', datatype=rdflib.XSD.string))) + # tag literals + self.graph.add((rdflib.URIRef('http://example.com/tag#1234'), rdflib.URIRef(bst.label), rdflib.Literal('tag_label_1234', datatype=rdflib.XSD.string))) + self.graph.add((rdflib.URIRef('http://example.com/tag#4321'), rdflib.URIRef(bst.label), rdflib.Literal('tag_label_4321', datatype=rdflib.XSD.string))) + # collection literals + self.graph.add((rdflib.URIRef('http://example.com/collection#1234'), rdflib.URIRef(bsc.label), rdflib.Literal('collection_label_1234', datatype=rdflib.XSD.string))) + self.graph.add((rdflib.URIRef('http://example.com/collection#1234'), rdflib.URIRef(bsc.rating), rdflib.Literal('1234', datatype=rdflib.XSD.integer))) + self.graph.add((rdflib.URIRef('http://example.com/collection#4321'), rdflib.URIRef(bsc.label), rdflib.Literal('collection_label_4321', datatype=rdflib.XSD.string))) + self.graph.add((rdflib.URIRef('http://example.com/collection#4321'), rdflib.URIRef(bsc.rating), rdflib.Literal('4321', datatype=rdflib.XSD.integer))) + # entity-tag links + self.graph.add((rdflib.URIRef('http://example.com/entity#1234'), rdflib.URIRef(bse.tag), rdflib.URIRef('http://example.com/tag#1234'))) + self.graph.add((rdflib.URIRef('http://example.com/entity#4321'), rdflib.URIRef(bse.tag), rdflib.URIRef('http://example.com/tag#4321'))) + # entity-collection links + self.graph.add((rdflib.URIRef('http://example.com/entity#1234'), rdflib.URIRef(bse.collection), rdflib.URIRef('http://example.com/collection#1234'))) + self.graph.add((rdflib.URIRef('http://example.com/entity#4321'), rdflib.URIRef(bse.collection), rdflib.URIRef('http://example.com/collection#4321'))) + # collection-tag links + self.graph.add((rdflib.URIRef('http://example.com/collection#1234'), rdflib.URIRef(bsc.tag), rdflib.URIRef('http://example.com/tag#1234'))) + self.graph.add((rdflib.URIRef('http://example.com/collection#4321'), rdflib.URIRef(bsc.tag), rdflib.URIRef('http://example.com/tag#4321'))) + # tag-entity links # NOTE: cross-over + self.graph.add((rdflib.URIRef('http://example.com/tag#1234'), rdflib.URIRef(bst.main), rdflib.URIRef('http://example.com/entity#4321'))) + self.graph.add((rdflib.URIRef('http://example.com/tag#4321'), rdflib.URIRef(bst.main), rdflib.URIRef('http://example.com/entity#1234'))) + + # default parser + self.parser = Fetch(self.schema) + self.ent = self.schema.node(ns.bsfs.Entity) + + + def test_call(self): + # NOTE: The individual ast components are considered in the respective tests. Here, we test __call__ specifics. + + # __call__ requires a valid root type + self.assertRaises(errors.BackendError, self.parser, self.schema.literal(ns.bsfs.Literal), ast.fetch.This('this')) + self.assertRaises(errors.ConsistencyError, self.parser, self.schema.node(ns.bsfs.Node).child(ns.bsfs.Invalid), ast.fetch.This('this')) + # __call__ requires a parseable root + self.assertRaises(errors.BackendError, self.parser, self.ent, ast.filter.FilterExpression()) + # __call__ returns an executable query + q = self.parser(self.ent, ast.fetch.Fetch(bse.tag, ast.fetch.Value(bst.label, 'label'))) + self.assertSetEqual(set(q(self.graph)), { + (rdflib.URIRef('http://example.com/entity#1234'), rdflib.Literal('tag_label_1234', datatype=rdflib.XSD.string)), + (rdflib.URIRef('http://example.com/entity#4321'), rdflib.Literal('tag_label_4321', datatype=rdflib.XSD.string)), + }) + + + def test_routing(self): + self.assertRaises(errors.BackendError, self.parser._parse_fetch_expression, self.ent, ast.fetch.FetchExpression(), '?head') + + + def test_all(self): + # multiple values query + q = self.parser(self.ent, ast.fetch.All( + ast.fetch.Value(bse.filename, name='filename'), + ast.fetch.Value(bse.rank, name='rank')), + ) + self.assertSetEqual(set(q.names), {'filename', 'rank'}) + if q.names == ('filename', 'rank'): + self.assertSetEqual(set(q(self.graph)), { + (rdflib.URIRef('http://example.com/entity#1234'), rdflib.Literal('filename_1234', datatype=rdflib.XSD.string), rdflib.Literal('1234', datatype=rdflib.XSD.integer)), + (rdflib.URIRef('http://example.com/entity#4321'), rdflib.Literal('filename_4321', datatype=rdflib.XSD.string), None), + }) + else: + self.assertSetEqual(set(q(self.graph)), { + (rdflib.URIRef('http://example.com/entity#1234'), rdflib.Literal('1234', datatype=rdflib.XSD.integer), rdflib.Literal('filename_1234', datatype=rdflib.XSD.string)), + (rdflib.URIRef('http://example.com/entity#4321'), None, rdflib.Literal('filename_4321', datatype=rdflib.XSD.string)), + }) + # mixed values and node query + q = self.parser(self.ent, ast.fetch.All( + ast.fetch.Value(bse.filename, name='filename'), + ast.fetch.Node(bse.tag, name='tag'), + )) + self.assertSetEqual(set(q.names), {'filename', 'tag'}) + if q.names == ('filename', 'tag'): + self.assertSetEqual(set(q(self.graph)), { + (rdflib.URIRef('http://example.com/entity#1234'), rdflib.Literal('filename_1234', datatype=rdflib.XSD.string), rdflib.URIRef('http://example.com/tag#1234')), + (rdflib.URIRef('http://example.com/entity#4321'), rdflib.Literal('filename_4321', datatype=rdflib.XSD.string), rdflib.URIRef('http://example.com/tag#4321')), + }) + else: + self.assertSetEqual(set(q(self.graph)), { + (rdflib.URIRef('http://example.com/entity#1234'), rdflib.URIRef('http://example.com/tag#1234'), rdflib.Literal('filename_1234', datatype=rdflib.XSD.string)), + (rdflib.URIRef('http://example.com/entity#4321'), rdflib.URIRef('http://example.com/tag#4321'), rdflib.Literal('filename_4321', datatype=rdflib.XSD.string)), + }) + # multiple values and second hop + q = self.parser(self.ent, ast.fetch.Fetch(bse.tag, ast.fetch.All( + ast.fetch.This(name='tag'), + ast.fetch.Value(bst.label, name='label'), + ))) + self.assertSetEqual(set(q.names), {'tag', 'label'}) + if q.names == ('tag', 'label'): + self.assertSetEqual(set(q(self.graph)), { + (rdflib.URIRef('http://example.com/entity#1234'), rdflib.URIRef('http://example.com/tag#1234'), rdflib.Literal('tag_label_1234', datatype=rdflib.XSD.string)), + (rdflib.URIRef('http://example.com/entity#4321'), rdflib.URIRef('http://example.com/tag#4321'), rdflib.Literal('tag_label_4321', datatype=rdflib.XSD.string)), + }) + else: + self.assertSetEqual(set(q(self.graph)), { + (rdflib.URIRef('http://example.com/entity#1234'), rdflib.Literal('tag_label_1234', datatype=rdflib.XSD.string), rdflib.URIRef('http://example.com/tag#1234')), + (rdflib.URIRef('http://example.com/entity#4321'), rdflib.Literal('tag_label_4321', datatype=rdflib.XSD.string), rdflib.URIRef('http://example.com/tag#4321')), + }) + + + + def test_fetch(self): + # two-hop query + q = self.parser(self.ent, ast.fetch.Fetch(bse.tag, ast.fetch.Value(bst.label, 'tag_label'))) + self.assertSetEqual(set(q(self.graph)), { + (rdflib.URIRef('http://example.com/entity#1234'), rdflib.Literal('tag_label_1234', datatype=rdflib.XSD.string)), + (rdflib.URIRef('http://example.com/entity#4321'), rdflib.Literal('tag_label_4321', datatype=rdflib.XSD.string)), + }) + # three-hop-query + q = self.parser(self.ent, ast.fetch.Fetch(bse.tag, ast.fetch.Fetch(bst.main, ast.fetch.Value(bse.rank, 'entity_rank')))) + self.assertSetEqual(set(q(self.graph)), { + (rdflib.URIRef('http://example.com/entity#1234'), None), + (rdflib.URIRef('http://example.com/entity#4321'), rdflib.Literal('1234', datatype=rdflib.XSD.integer)), + }) + + + def test_node(self): + # cannot use the internal hop name + self.assertRaises(errors.BackendError, self.parser, self.ent, ast.fetch.Node(bse.tag, self.parser.ngen.prefix[1:] + '123')) + # a simple Node statement + q = self.parser(self.ent, ast.fetch.Node(bse.tag, 'tag')) + self.assertSetEqual(set(q.names), {'tag'}) + self.assertSetEqual(set(q(self.graph)), { + (rdflib.URIRef('http://example.com/entity#1234'), rdflib.URIRef('http://example.com/tag#1234')), + (rdflib.URIRef('http://example.com/entity#4321'), rdflib.URIRef('http://example.com/tag#4321')), + }) + + + def test_value(self): + # cannot use the internal hop name + self.assertRaises(errors.BackendError, self.parser, self.schema.node(ns.bsfs.Entity), ast.fetch.Value(bse.filename, self.parser.ngen.prefix[1:] + '123')) + # a simple Value statement + q = self.parser(self.ent, ast.fetch.Value(bse.filename, 'filename')) + self.assertSetEqual(set(q.names), {'filename'}) + self.assertSetEqual(set(q(self.graph)), { + (rdflib.URIRef('http://example.com/entity#1234'), rdflib.Literal('filename_1234', datatype=rdflib.XSD.string)), + (rdflib.URIRef('http://example.com/entity#4321'), rdflib.Literal('filename_4321', datatype=rdflib.XSD.string)), + }) + + + def test_this(self): + # cannot use the internal hop name + self.assertRaises(errors.BackendError, self.parser, self.ent, ast.fetch.This(self.parser.ngen.prefix[1:] + '123')) + # a simple This statement + self.assertEqual(self.parser._this(self.ent, ast.fetch.This('this'), '?head'), + ({('?head', 'this')}, '')) + q = self.parser(self.ent, ast.fetch.This('this')) + self.assertSetEqual(set(q(self.graph)), { + (rdflib.URIRef('http://example.com/entity#1234'), rdflib.URIRef('http://example.com/entity#1234')), + (rdflib.URIRef('http://example.com/entity#4321'), rdflib.URIRef('http://example.com/entity#4321')), + }) + + +## main ## + +if __name__ == '__main__': + unittest.main() + +## EOF ## diff --git a/test/triple_store/sparql/test_parse_filter.py b/test/triple_store/sparql/test_parse_filter.py index 8764535..6fa0cd3 100644 --- a/test/triple_store/sparql/test_parse_filter.py +++ b/test/triple_store/sparql/test_parse_filter.py @@ -149,13 +149,13 @@ class TestParseFilter(unittest.TestCase): ast.filter.Or( ast.filter.Is('http://example.com/entity#1234'), ast.filter.Is('http://example.com/entity#5678'))) - self.assertSetEqual({str(guid) for guid, in self.graph.query(q)}, {'http://example.com/entity#1234'}) + self.assertSetEqual({str(guid) for guid, in q(self.graph)}, {'http://example.com/entity#1234'}) # root is optional q = self.parser(self.schema.node(ns.bsfs.Entity)) - self.assertSetEqual({str(guid) for guid, in self.graph.query(q)}, + self.assertSetEqual({str(guid) for guid, in q(self.graph)}, {'http://example.com/entity#1234', 'http://example.com/entity#4321', 'http://example.com/image#1234', 'http://example.com/image#4321'}) q = self.parser(self.schema.node(ns.bsfs.Tag)) - self.assertSetEqual({str(guid) for guid, in self.graph.query(q)}, + self.assertSetEqual({str(guid) for guid, in q(self.graph)}, {'http://example.com/tag#1234', 'http://example.com/tag#4321'}) @@ -164,7 +164,7 @@ class TestParseFilter(unittest.TestCase): self.assertRaises(errors.BackendError, self.parser._is, self.schema.literal(ns.bsfs.Literal), ast.filter.Is('http://example.com/entity#1234'), '?ent') # a single Is statement q = self.parser(self.schema.node(ns.bsfs.Entity), ast.filter.Is('http://example.com/entity#1234')) - self.assertSetEqual({str(guid) for guid, in self.graph.query(q)}, + self.assertSetEqual({str(guid) for guid, in q(self.graph)}, {'http://example.com/entity#1234'}) # an aggregate of Is statements q = self.parser(self.schema.node(ns.bsfs.Entity), @@ -172,7 +172,7 @@ class TestParseFilter(unittest.TestCase): ast.filter.Is('http://example.com/entity#1234'), ast.filter.Is('http://example.com/entity#4321'), )) - self.assertSetEqual({str(guid) for guid, in self.graph.query(q)}, + self.assertSetEqual({str(guid) for guid, in q(self.graph)}, {'http://example.com/entity#1234', 'http://example.com/entity#4321'}) # combined with other filters q = self.parser(self.schema.node(ns.bsfs.Entity), @@ -185,12 +185,12 @@ class TestParseFilter(unittest.TestCase): ast.filter.Equals('Me, Myself, and I') ), )) - self.assertSetEqual({str(guid) for guid, in self.graph.query(q)}, + self.assertSetEqual({str(guid) for guid, in q(self.graph)}, {'http://example.com/entity#1234'}) # as argument of Any/All q = self.parser(self.schema.node(ns.bsfs.Entity), ast.filter.Any(ns.bse.tag, ast.filter.Is('http://example.com/tag#1234'))) - self.assertSetEqual({str(guid) for guid, in self.graph.query(q)}, + self.assertSetEqual({str(guid) for guid, in q(self.graph)}, {'http://example.com/entity#1234', 'http://example.com/image#1234'}) @@ -199,15 +199,15 @@ class TestParseFilter(unittest.TestCase): self.assertRaises(errors.BackendError, self.parser._equals, self.schema.node(ns.bsfs.Entity), ast.filter.Equals('hello world'), '?ent') # a single Equals statement q = self.parser(self.schema.node(ns.bsfs.Entity), ast.filter.Any(ns.bse.comment, ast.filter.Equals('hello world'))) - self.assertSetEqual({str(guid) for guid, in self.graph.query(q)}, + self.assertSetEqual({str(guid) for guid, in q(self.graph)}, {'http://example.com/entity#1234', 'http://example.com/entity#4321'}) # a single Equals statement that includes subtypes q = self.parser(self.schema.node(ns.bsfs.Entity), ast.filter.Any(ns.bse.comment, ast.filter.Equals('Me, Myself, and I'))) - self.assertSetEqual({str(guid) for guid, in self.graph.query(q)}, + self.assertSetEqual({str(guid) for guid, in q(self.graph)}, {'http://example.com/entity#1234', 'http://example.com/image#1234'}) # an Equals statement on an integer q = self.parser(self.schema.node(ns.bsfs.Entity), ast.filter.Any(ns.bse.filesize, ast.filter.Equals(4321))) - self.assertSetEqual({str(guid) for guid, in self.graph.query(q)}, + self.assertSetEqual({str(guid) for guid, in q(self.graph)}, {'http://example.com/entity#4321', 'http://example.com/image#4321'}) @@ -216,18 +216,18 @@ class TestParseFilter(unittest.TestCase): self.assertRaises(errors.BackendError, self.parser._substring, self.schema.node(ns.bsfs.Entity), ast.filter.Substring('hello world'), '?ent') # a single Substring statement q = self.parser(self.schema.node(ns.bsfs.Entity), ast.filter.Any(ns.bse.comment, ast.filter.Substring('hello'))) - self.assertSetEqual({str(guid) for guid, in self.graph.query(q)}, + self.assertSetEqual({str(guid) for guid, in q(self.graph)}, {'http://example.com/entity#1234', 'http://example.com/entity#4321'}) q = self.parser(self.schema.node(ns.bsfs.Entity), ast.filter.Any(ns.bse.comment, ast.filter.Substring('lo wo'))) - self.assertSetEqual({str(guid) for guid, in self.graph.query(q)}, + self.assertSetEqual({str(guid) for guid, in q(self.graph)}, {'http://example.com/entity#1234', 'http://example.com/entity#4321'}) # a single Substring statement that includes subtypes q = self.parser(self.schema.node(ns.bsfs.Entity), ast.filter.Any(ns.bse.comment, ast.filter.Substring('Myself'))) - self.assertSetEqual({str(guid) for guid, in self.graph.query(q)}, + self.assertSetEqual({str(guid) for guid, in q(self.graph)}, {'http://example.com/entity#1234', 'http://example.com/image#1234'}) # an Substring statement on an integer q = self.parser(self.schema.node(ns.bsfs.Entity), ast.filter.Any(ns.bse.filesize, ast.filter.Substring('32'))) - self.assertSetEqual({str(guid) for guid, in self.graph.query(q)}, + self.assertSetEqual({str(guid) for guid, in q(self.graph)}, {'http://example.com/entity#4321', 'http://example.com/image#4321'}) @@ -236,15 +236,15 @@ class TestParseFilter(unittest.TestCase): self.assertRaises(errors.BackendError, self.parser._starts_with, self.schema.node(ns.bsfs.Entity), ast.filter.StartsWith('hello world'), '?ent') # a single StartsWith statement q = self.parser(self.schema.node(ns.bsfs.Entity), ast.filter.Any(ns.bse.comment, ast.filter.StartsWith('hello'))) - self.assertSetEqual({str(guid) for guid, in self.graph.query(q)}, + self.assertSetEqual({str(guid) for guid, in q(self.graph)}, {'http://example.com/entity#1234', 'http://example.com/entity#4321'}) # a single StartsWith statement that includes subtypes q = self.parser(self.schema.node(ns.bsfs.Entity), ast.filter.Any(ns.bse.comment, ast.filter.StartsWith('Me, Mys'))) - self.assertSetEqual({str(guid) for guid, in self.graph.query(q)}, + self.assertSetEqual({str(guid) for guid, in q(self.graph)}, {'http://example.com/entity#1234', 'http://example.com/image#1234'}) # an StartsWith statement on an integer q = self.parser(self.schema.node(ns.bsfs.Entity), ast.filter.Any(ns.bse.filesize, ast.filter.StartsWith(432))) - self.assertSetEqual({str(guid) for guid, in self.graph.query(q)}, + self.assertSetEqual({str(guid) for guid, in q(self.graph)}, {'http://example.com/entity#4321', 'http://example.com/image#4321'}) @@ -253,15 +253,15 @@ class TestParseFilter(unittest.TestCase): self.assertRaises(errors.BackendError, self.parser._ends_with, self.schema.node(ns.bsfs.Entity), ast.filter.EndsWith('hello world'), '?ent') # a single EndsWith statement q = self.parser(self.schema.node(ns.bsfs.Entity), ast.filter.Any(ns.bse.comment, ast.filter.EndsWith('orld'))) - self.assertSetEqual({str(guid) for guid, in self.graph.query(q)}, + self.assertSetEqual({str(guid) for guid, in q(self.graph)}, {'http://example.com/entity#1234', 'http://example.com/entity#4321'}) # a single EndsWith statement that includes subtypes q = self.parser(self.schema.node(ns.bsfs.Entity), ast.filter.Any(ns.bse.comment, ast.filter.EndsWith('and I'))) - self.assertSetEqual({str(guid) for guid, in self.graph.query(q)}, + self.assertSetEqual({str(guid) for guid, in q(self.graph)}, {'http://example.com/entity#1234', 'http://example.com/image#1234'}) # an EndsWith statement on an integer q = self.parser(self.schema.node(ns.bsfs.Entity), ast.filter.Any(ns.bse.filesize, ast.filter.EndsWith(321))) - self.assertSetEqual({str(guid) for guid, in self.graph.query(q)}, + self.assertSetEqual({str(guid) for guid, in q(self.graph)}, {'http://example.com/entity#4321', 'http://example.com/image#4321'}) @@ -270,22 +270,22 @@ class TestParseFilter(unittest.TestCase): self.assertRaises(errors.BackendError, self.parser._less_than, self.schema.node(ns.bsfs.Entity), ast.filter.LessThan(2000), '?ent') # a single LessThan statement q = self.parser(self.schema.node(ns.bsfs.Image), ast.filter.Any(ns.bse.iso, ast.filter.LessThan(2000))) - self.assertSetEqual({str(guid) for guid, in self.graph.query(q)}, + self.assertSetEqual({str(guid) for guid, in q(self.graph)}, {'http://example.com/image#1234'}) # _less_than respects boundary q = self.parser(self.schema.node(ns.bsfs.Image), ast.filter.Any(ns.bse.iso, ast.filter.LessThan(1234, strict=True))) - self.assertSetEqual({str(guid) for guid, in self.graph.query(q)}, set()) + self.assertSetEqual({str(guid) for guid, in q(self.graph)}, set()) q = self.parser(self.schema.node(ns.bsfs.Image), ast.filter.Any(ns.bse.iso, ast.filter.LessThan(1234, strict=False))) - self.assertSetEqual({str(guid) for guid, in self.graph.query(q)}, + self.assertSetEqual({str(guid) for guid, in q(self.graph)}, {'http://example.com/image#1234'}) # a single LessThan statement that includes subtypes q = self.parser(self.schema.node(ns.bsfs.Entity), ast.filter.Any(ns.bse.filesize, ast.filter.LessThan(2000))) - self.assertSetEqual({str(guid) for guid, in self.graph.query(q)}, + self.assertSetEqual({str(guid) for guid, in q(self.graph)}, {'http://example.com/entity#1234', 'http://example.com/image#1234'}) # an LessThan statement on a string # always negative; note that http://example.com/tag#4321 is also not returned although its comment is a pure number q = self.parser(self.schema.node(ns.bsfs.Entity), ast.filter.Any(ns.bse.comment, ast.filter.LessThan(10_000))) - self.assertSetEqual({str(guid) for guid, in self.graph.query(q)}, set()) + self.assertSetEqual({str(guid) for guid, in q(self.graph)}, set()) def test_greater_than(self): @@ -293,22 +293,22 @@ class TestParseFilter(unittest.TestCase): self.assertRaises(errors.BackendError, self.parser._greater_than, self.schema.node(ns.bsfs.Entity), ast.filter.GreaterThan(2000), '?ent') # a single GreaterThan statement q = self.parser(self.schema.node(ns.bsfs.Image), ast.filter.Any(ns.bse.iso, ast.filter.GreaterThan(2000))) - self.assertSetEqual({str(guid) for guid, in self.graph.query(q)}, + self.assertSetEqual({str(guid) for guid, in q(self.graph)}, {'http://example.com/image#4321'}) # _greater_than respects boundary q = self.parser(self.schema.node(ns.bsfs.Image), ast.filter.Any(ns.bse.iso, ast.filter.GreaterThan(4321, strict=True))) - self.assertSetEqual({str(guid) for guid, in self.graph.query(q)}, set()) + self.assertSetEqual({str(guid) for guid, in q(self.graph)}, set()) q = self.parser(self.schema.node(ns.bsfs.Image), ast.filter.Any(ns.bse.iso, ast.filter.GreaterThan(4321, strict=False))) - self.assertSetEqual({str(guid) for guid, in self.graph.query(q)}, + self.assertSetEqual({str(guid) for guid, in q(self.graph)}, {'http://example.com/image#4321'}) # a single GreaterThan statement that includes subtypes q = self.parser(self.schema.node(ns.bsfs.Entity), ast.filter.Any(ns.bse.filesize, ast.filter.GreaterThan(2000))) - self.assertSetEqual({str(guid) for guid, in self.graph.query(q)}, + self.assertSetEqual({str(guid) for guid, in q(self.graph)}, {'http://example.com/entity#4321', 'http://example.com/image#4321'}) # an GreaterThan statement on a string # always positive q = self.parser(self.schema.node(ns.bsfs.Entity), ast.filter.Any(ns.bse.comment, ast.filter.GreaterThan(0))) - self.assertSetEqual({str(guid) for guid, in self.graph.query(q)}, + self.assertSetEqual({str(guid) for guid, in q(self.graph)}, {'http://example.com/entity#1234', 'http://example.com/entity#4321', 'http://example.com/image#1234'}) @@ -331,7 +331,7 @@ class TestParseFilter(unittest.TestCase): ast.filter.Any(ns.bse.filesize, ast.filter.Equals(1234)), ast.filter.Any(ns.bse.comment, ast.filter.Equals('Me, Myself, and I')), )) - self.assertSetEqual({str(guid) for guid, in self.graph.query(q)}, + self.assertSetEqual({str(guid) for guid, in q(self.graph)}, {'http://example.com/entity#1234'}) # all conditions have to match q = self.parser(self.schema.node(ns.bsfs.Entity), @@ -340,21 +340,21 @@ class TestParseFilter(unittest.TestCase): ast.filter.Any(ns.bse.filesize, ast.filter.Equals(1234)), ast.filter.Any(ns.bse.comment, ast.filter.Equals('Me, Myself, and I')), )) - self.assertSetEqual({str(guid) for guid, in self.graph.query(q)}, set()) + self.assertSetEqual({str(guid) for guid, in q(self.graph)}, set()) q = self.parser(self.schema.node(ns.bsfs.Entity), ast.filter.And( ast.filter.Is('http://example.com/entity#1234'), ast.filter.Any(ns.bse.filesize, ast.filter.Equals(4321)), ast.filter.Any(ns.bse.comment, ast.filter.Equals('Me, Myself, and I')), )) - self.assertSetEqual({str(guid) for guid, in self.graph.query(q)}, set()) + self.assertSetEqual({str(guid) for guid, in q(self.graph)}, set()) q = self.parser(self.schema.node(ns.bsfs.Entity), ast.filter.And( ast.filter.Is('http://example.com/entity#1234'), ast.filter.Any(ns.bse.filesize, ast.filter.Equals(1234)), ast.filter.Any(ns.bse.comment, ast.filter.Equals('foobar')), )) - self.assertSetEqual({str(guid) for guid, in self.graph.query(q)}, set()) + self.assertSetEqual({str(guid) for guid, in q(self.graph)}, set()) # And can be nested q = self.parser(self.schema.node(ns.bsfs.Entity), ast.filter.And( @@ -364,7 +364,7 @@ class TestParseFilter(unittest.TestCase): ast.filter.Any(ns.bse.comment, ast.filter.Equals('Me, Myself, and I')), ), )) - self.assertSetEqual({str(guid) for guid, in self.graph.query(q)}, + self.assertSetEqual({str(guid) for guid, in q(self.graph)}, {'http://example.com/entity#1234'}) @@ -387,7 +387,7 @@ class TestParseFilter(unittest.TestCase): ast.filter.Any(ns.bse.filesize, ast.filter.Equals(4321)), ast.filter.Any(ns.bse.comment, ast.filter.Equals('Me, Myself, and I')), )) - self.assertSetEqual({str(guid) for guid, in self.graph.query(q)}, + self.assertSetEqual({str(guid) for guid, in q(self.graph)}, {'http://example.com/entity#1234', 'http://example.com/image#1234', 'http://example.com/entity#4321', 'http://example.com/image#4321'}) # at least one condition has to match q = self.parser(self.schema.node(ns.bsfs.Entity), @@ -396,14 +396,14 @@ class TestParseFilter(unittest.TestCase): ast.filter.Any(ns.bse.filesize, ast.filter.Equals(8765)), ast.filter.Any(ns.bse.comment, ast.filter.Equals('foobar')), )) - self.assertSetEqual({str(guid) for guid, in self.graph.query(q)}, set()) + self.assertSetEqual({str(guid) for guid, in q(self.graph)}, set()) q = self.parser(self.schema.node(ns.bsfs.Entity), ast.filter.Or( ast.filter.Is('http://example.com/entity#1234'), ast.filter.Any(ns.bse.filesize, ast.filter.Equals(8765)), ast.filter.Any(ns.bse.comment, ast.filter.Equals('foobar')), )) - self.assertSetEqual({str(guid) for guid, in self.graph.query(q)}, + self.assertSetEqual({str(guid) for guid, in q(self.graph)}, {'http://example.com/entity#1234'}) q = self.parser(self.schema.node(ns.bsfs.Entity), ast.filter.Or( @@ -411,7 +411,7 @@ class TestParseFilter(unittest.TestCase): ast.filter.Any(ns.bse.filesize, ast.filter.Equals(4321)), ast.filter.Any(ns.bse.comment, ast.filter.Equals('foobar')), )) - self.assertSetEqual({str(guid) for guid, in self.graph.query(q)}, + self.assertSetEqual({str(guid) for guid, in q(self.graph)}, {'http://example.com/entity#4321', 'http://example.com/image#4321'}) q = self.parser(self.schema.node(ns.bsfs.Entity), ast.filter.Or( @@ -419,7 +419,7 @@ class TestParseFilter(unittest.TestCase): ast.filter.Any(ns.bse.filesize, ast.filter.Equals(8765)), ast.filter.Any(ns.bse.comment, ast.filter.Equals('Me, Myself, and I')), )) - self.assertSetEqual({str(guid) for guid, in self.graph.query(q)}, + self.assertSetEqual({str(guid) for guid, in q(self.graph)}, {'http://example.com/entity#1234', 'http://example.com/image#1234'}) # Or can be nested q = self.parser(self.schema.node(ns.bsfs.Entity), @@ -430,7 +430,7 @@ class TestParseFilter(unittest.TestCase): ast.filter.Any(ns.bse.comment, ast.filter.Equals('Me, Myself, and I')), ), )) - self.assertSetEqual({str(guid) for guid, in self.graph.query(q)}, + self.assertSetEqual({str(guid) for guid, in q(self.graph)}, {'http://example.com/entity#1234', 'http://example.com/image#1234', 'http://example.com/entity#4321', 'http://example.com/image#4321'}) @@ -451,14 +451,14 @@ class TestParseFilter(unittest.TestCase): # _any returns a valid query q = self.parser(self.schema.node(ns.bsfs.Entity), ast.filter.Any(ns.bse.filesize, ast.filter.Equals(1234))) - self.assertSetEqual({str(guid) for guid, in self.graph.query(q)}, + self.assertSetEqual({str(guid) for guid, in q(self.graph)}, {'http://example.com/entity#1234', 'http://example.com/image#1234'}) # _any can be nested q = self.parser(self.schema.node(ns.bsfs.Entity), ast.filter.Any(ns.bse.tag, ast.filter.Any(ns.bse.representative, ast.filter.Is('http://example.com/image#1234')))) - self.assertSetEqual({str(guid) for guid, in self.graph.query(q)}, + self.assertSetEqual({str(guid) for guid, in q(self.graph)}, {'http://example.com/entity#1234', 'http://example.com/image#1234'}) @@ -468,12 +468,12 @@ class TestParseFilter(unittest.TestCase): # All Nodes q = self.parser(self.schema.node(ns.bsfs.Entity), ast.filter.All(ns.bse.tag, ast.filter.Is('http://example.com/tag#1234'))) - self.assertSetEqual({str(guid) for guid, in self.graph.query(q)}, + self.assertSetEqual({str(guid) for guid, in q(self.graph)}, {'http://example.com/entity#1234', 'http://example.com/image#1234'}) # All values q = self.parser(self.schema.node(ns.bsfs.Entity), ast.filter.All(ns.bse.comment, ast.filter.Equals('hello world'))) - self.assertSetEqual({str(guid) for guid, in self.graph.query(q)}, + self.assertSetEqual({str(guid) for guid, in q(self.graph)}, {'http://example.com/entity#4321'}) # All on value within Or branch # entity#1234 is selected because all of its comments are in ("hello world", "Me, Myself, and I") @@ -481,12 +481,12 @@ class TestParseFilter(unittest.TestCase): ast.filter.All(ns.bse.comment, ast.filter.Or( ast.filter.Equals('hello world'), ast.filter.Equals('Me, Myself, and I')))) - self.assertSetEqual({str(guid) for guid, in self.graph.query(q)}, + self.assertSetEqual({str(guid) for guid, in q(self.graph)}, {'http://example.com/entity#1234', 'http://example.com/entity#4321', 'http://example.com/image#1234'}) # All requires at least one predicate/value q = self.parser(self.schema.node(ns.bsfs.Entity), ast.filter.All(ns.bse.comment, ast.filter.Equals('Me, Myself, and I'))) - self.assertSetEqual({str(guid) for guid, in self.graph.query(q)}, + self.assertSetEqual({str(guid) for guid, in q(self.graph)}, {'http://example.com/image#1234'}) # All within a statement q = self.parser(self.schema.node(ns.bsfs.Entity), @@ -498,18 +498,18 @@ class TestParseFilter(unittest.TestCase): )) ) ) - self.assertSetEqual({str(guid) for guid, in self.graph.query(q)}, + self.assertSetEqual({str(guid) for guid, in q(self.graph)}, {'http://example.com/entity#1234', 'http://example.com/image#1234'}) # All with reversed Predicate q = self.parser(self.schema.node(ns.bsfs.Tag), ast.filter.All(ast.filter.Predicate(ns.bse.tag, reverse=True), ast.filter.Is('http://example.com/entity#4321'))) - self.assertSetEqual({str(guid) for guid, in self.graph.query(q)}, + self.assertSetEqual({str(guid) for guid, in q(self.graph)}, {'http://example.com/tag#4321'}) # All with multiple predicates q = self.parser(self.schema.node(ns.bsfs.Entity), ast.filter.All(ast.filter.OneOf(ns.bse.tag, ns.bse.buddy), # entity#1234 (tag:tag#1234), entity#1234 (buddy:image#1234), image#1234(tag:tag#1234) ast.filter.Any(ns.bse.comment, ast.filter.Equals('Me, Myself, and I')))) # entity#1234, image#1234, tag#1234 - self.assertSetEqual({str(guid) for guid, in self.graph.query(q)}, + self.assertSetEqual({str(guid) for guid, in q(self.graph)}, {'http://example.com/entity#1234', 'http://example.com/image#1234'}) @@ -518,22 +518,22 @@ class TestParseFilter(unittest.TestCase): # Not applies on conditions q = self.parser(self.schema.node(ns.bsfs.Entity), ast.filter.Not(ast.filter.Is('http://example.com/entity#1234'))) - self.assertSetEqual({str(guid) for guid, in self.graph.query(q)}, + self.assertSetEqual({str(guid) for guid, in q(self.graph)}, {'http://example.com/image#1234', 'http://example.com/entity#4321', 'http://example.com/image#4321'}) # Not applies on conditions within branches q = self.parser(self.schema.node(ns.bsfs.Entity), ast.filter.Any(ns.bse.comment, ast.filter.Not(ast.filter.Equals('Me, Myself, and I')))) - self.assertSetEqual({str(guid) for guid, in self.graph.query(q)}, + self.assertSetEqual({str(guid) for guid, in q(self.graph)}, {'http://example.com/entity#1234', 'http://example.com/entity#4321'}) # Not applies on branches q = self.parser(self.schema.node(ns.bsfs.Entity), ast.filter.Not(ast.filter.Any(ns.bse.comment, ast.filter.Equals('Me, Myself, and I')))) - self.assertSetEqual({str(guid) for guid, in self.graph.query(q)}, + self.assertSetEqual({str(guid) for guid, in q(self.graph)}, {'http://example.com/entity#4321', 'http://example.com/image#4321'}) # Double Not cancel each other q = self.parser(self.schema.node(ns.bsfs.Entity), ast.filter.Not(ast.filter.Not(ast.filter.Is('http://example.com/entity#1234')))) - self.assertSetEqual({str(guid) for guid, in self.graph.query(q)}, + self.assertSetEqual({str(guid) for guid, in q(self.graph)}, {'http://example.com/entity#1234'}) # Not works within aggregation (and) q = self.parser(self.schema.node(ns.bsfs.Entity), @@ -541,7 +541,7 @@ class TestParseFilter(unittest.TestCase): ast.filter.Not(ast.filter.Is('http://example.com/entity#1234')), ast.filter.Any(ns.bse.comment, ast.filter.Equals('hello world')), )) - self.assertSetEqual({str(guid) for guid, in self.graph.query(q)}, + self.assertSetEqual({str(guid) for guid, in q(self.graph)}, {'http://example.com/entity#4321'}) # Not works within aggregation (or) q = self.parser(self.schema.node(ns.bsfs.Entity), @@ -549,7 +549,7 @@ class TestParseFilter(unittest.TestCase): ast.filter.Not(ast.filter.Is('http://example.com/entity#1234')), ast.filter.Any(ns.bse.comment, ast.filter.Equals('Me, Myself, and I')), )) - self.assertSetEqual({str(guid) for guid, in self.graph.query(q)}, + self.assertSetEqual({str(guid) for guid, in q(self.graph)}, {'http://example.com/entity#1234', 'http://example.com/entity#4321', 'http://example.com/image#1234', 'http://example.com/image#4321'}) # Not works outside aggregation (and) q = self.parser(self.schema.node(ns.bsfs.Entity), @@ -558,7 +558,7 @@ class TestParseFilter(unittest.TestCase): ast.filter.Is('http://example.com/entity#1234'), ast.filter.Any(ns.bse.comment, ast.filter.Equals('hello world')), ))) - self.assertSetEqual({str(guid) for guid, in self.graph.query(q)}, + self.assertSetEqual({str(guid) for guid, in q(self.graph)}, {'http://example.com/entity#4321', 'http://example.com/image#1234', 'http://example.com/image#4321'}) # Not works outside aggregation (or) q = self.parser(self.schema.node(ns.bsfs.Entity), @@ -567,7 +567,7 @@ class TestParseFilter(unittest.TestCase): ast.filter.Is('http://example.com/entity#4321'), ast.filter.Any(ns.bse.comment, ast.filter.Equals('Me, Myself, and I')), ))) - self.assertSetEqual({str(guid) for guid, in self.graph.query(q)}, + self.assertSetEqual({str(guid) for guid, in q(self.graph)}, {'http://example.com/image#4321'}) # Not mixed with branch, aggregation, id, and value q = self.parser(self.schema.node(ns.bsfs.Entity), @@ -580,7 +580,7 @@ class TestParseFilter(unittest.TestCase): ), ast.filter.Any(ns.bse.comment, ast.filter.Not(ast.filter.Equals('foobar'))), # entity#1234, entity#4321, image#1234 )) - self.assertSetEqual({str(guid) for guid, in self.graph.query(q)}, + self.assertSetEqual({str(guid) for guid, in q(self.graph)}, {'http://example.com/image#1234'}) @@ -590,21 +590,21 @@ class TestParseFilter(unittest.TestCase): # Has with GreaterThan constraint q = self.parser(self.schema.node(ns.bsfs.Entity), ast.filter.Has(ns.bse.comment, ast.filter.GreaterThan(0))) - self.assertSetEqual({str(guid) for guid, in self.graph.query(q)}, + self.assertSetEqual({str(guid) for guid, in q(self.graph)}, {'http://example.com/entity#1234', 'http://example.com/entity#4321', 'http://example.com/image#1234'}) q = self.parser(self.schema.node(ns.bsfs.Entity), ast.filter.Has(ns.bse.comment, ast.filter.GreaterThan(1))) - self.assertSetEqual({str(guid) for guid, in self.graph.query(q)}, + self.assertSetEqual({str(guid) for guid, in q(self.graph)}, {'http://example.com/entity#1234'}) # Has with Equals constraint q = self.parser(self.schema.node(ns.bsfs.Entity), ast.filter.Has(ns.bse.comment, 1)) - self.assertSetEqual({str(guid) for guid, in self.graph.query(q)}, + self.assertSetEqual({str(guid) for guid, in q(self.graph)}, {'http://example.com/entity#4321', 'http://example.com/image#1234'}) # Has with LessThan constraint q = self.parser(self.schema.node(ns.bsfs.Entity), ast.filter.Has(ns.bse.comment, ast.filter.LessThan(2))) - self.assertSetEqual({str(guid) for guid, in self.graph.query(q)}, + self.assertSetEqual({str(guid) for guid, in q(self.graph)}, {'http://example.com/entity#4321', 'http://example.com/image#1234', 'http://example.com/image#4321'}) # Has with multiple constraints self.graph.add((rdflib.URIRef('http://example.com/entity#1234'), rdflib.URIRef(ns.bse.comment), rdflib.Literal('extra1', datatype=rdflib.XSD.string))) @@ -616,17 +616,17 @@ class TestParseFilter(unittest.TestCase): self.graph.add((rdflib.URIRef('http://example.com/entity#4321'), rdflib.URIRef(ns.bse.comment), rdflib.Literal('extra2', datatype=rdflib.XSD.string))) q = self.parser(self.schema.node(ns.bsfs.Entity), ast.filter.Has(ns.bse.comment, ast.filter.And(ast.filter.GreaterThan(1), ast.filter.LessThan(5)))) - self.assertSetEqual({str(guid) for guid, in self.graph.query(q)}, + self.assertSetEqual({str(guid) for guid, in q(self.graph)}, {'http://example.com/entity#4321'}) # Has with OneOf predicate q = self.parser(self.schema.node(ns.bsfs.Entity), ast.filter.Has(ast.filter.OneOf(ns.bse.tag, ns.bse.buddy), ast.filter.GreaterThan(1))) - self.assertSetEqual({str(guid) for guid, in self.graph.query(q)}, + self.assertSetEqual({str(guid) for guid, in q(self.graph)}, {'http://example.com/entity#1234', 'http://example.com/entity#4321'}) # Has with reversed predicate q = self.parser(self.schema.node(ns.bsfs.Tag), ast.filter.Has(ast.filter.Predicate(ns.bse.tag, reverse=True), ast.filter.GreaterThan(1))) - self.assertSetEqual({str(guid) for guid, in self.graph.query(q)}, + self.assertSetEqual({str(guid) for guid, in q(self.graph)}, {'http://example.com/tag#1234'}) @@ -643,23 +643,23 @@ class TestParseFilter(unittest.TestCase): self.assertRaises(errors.ConsistencyError, self.parser._distance, self.schema.literal(ns.bsfs.Colors), ast.filter.Distance([1,2,3,4,5], 1), '') # _distance respects threshold q = self.parser(self.schema.node(ns.bsfs.Entity), ast.filter.Any(ns.bse.colors, ast.filter.Distance([2,4,3,1], 4))) - self.assertSetEqual({str(guid) for guid, in self.graph.query(q)}, + self.assertSetEqual({str(guid) for guid, in q(self.graph)}, {'http://example.com/entity#1234', 'http://example.com/entity#4321', 'http://example.com/image#1234'}) q = self.parser(self.schema.node(ns.bsfs.Entity), ast.filter.Any(ns.bse.colors, ast.filter.Distance([2,4,3,1], 3))) - self.assertSetEqual({str(guid) for guid, in self.graph.query(q)}, + self.assertSetEqual({str(guid) for guid, in q(self.graph)}, {'http://example.com/entity#4321', 'http://example.com/image#1234'}) q = self.parser(self.schema.node(ns.bsfs.Entity), ast.filter.Any(ns.bse.colors, ast.filter.Distance([2,4,3,1], 2))) - self.assertSetEqual({str(guid) for guid, in self.graph.query(q)}, + self.assertSetEqual({str(guid) for guid, in q(self.graph)}, {'http://example.com/image#1234'}) # result set can be empty q = self.parser(self.schema.node(ns.bsfs.Entity), ast.filter.Any(ns.bse.colors, ast.filter.Distance([2,4,3,1], 1))) - self.assertSetEqual({str(guid) for guid, in self.graph.query(q)}, set()) + self.assertSetEqual({str(guid) for guid, in q(self.graph)}, set()) # _distance respects strict q = self.parser(self.schema.node(ns.bsfs.Entity), ast.filter.Any(ns.bse.colors, ast.filter.Distance([1,2,3,4], 0, False))) - self.assertSetEqual({str(guid) for guid, in self.graph.query(q)}, + self.assertSetEqual({str(guid) for guid, in q(self.graph)}, {'http://example.com/entity#1234'}) q = self.parser(self.schema.node(ns.bsfs.Entity), ast.filter.Any(ns.bse.colors, ast.filter.Distance([1,2,3,4], 0, True))) - self.assertSetEqual({str(guid) for guid, in self.graph.query(q)}, set()) + self.assertSetEqual({str(guid) for guid, in q(self.graph)}, set()) def test_one_of(self): # _one_of expects a node @@ -725,7 +725,7 @@ class TestParseFilter(unittest.TestCase): ast.filter.Any(ast.filter.OneOf(ns.bse.tag, ns.bse.buddy), ast.filter.Any(ast.filter.OneOf(ns.bse.comment), ast.filter.Equals('Me, Myself, and I')))) - self.assertSetEqual({str(guid) for guid, in self.graph.query(q)}, + self.assertSetEqual({str(guid) for guid, in q(self.graph)}, {'http://example.com/entity#1234', 'http://example.com/image#1234'}) @@ -757,13 +757,13 @@ class TestParseFilter(unittest.TestCase): ast.filter.Any(ns.bse.representative, ast.filter.Any(ns.bse.filesize, ast.filter.Equals(1234))))) - self.assertSetEqual({str(guid) for guid, in self.graph.query(q)}, + self.assertSetEqual({str(guid) for guid, in q(self.graph)}, {'http://example.com/entity#1234', 'http://example.com/image#1234'}) q = self.parser(self.schema.node(ns.bsfs.Tag), ast.filter.Any(ast.filter.Predicate(ns.bse.tag, reverse=True), ast.filter.Any(ns.bse.filesize, ast.filter.LessThan(2000)))) - self.assertSetEqual({str(guid) for guid, in self.graph.query(q)}, + self.assertSetEqual({str(guid) for guid, in q(self.graph)}, {'http://example.com/tag#1234'}) diff --git a/test/triple_store/sparql/test_sparql.py b/test/triple_store/sparql/test_sparql.py index 7fbfb65..30876f2 100644 --- a/test/triple_store/sparql/test_sparql.py +++ b/test/triple_store/sparql/test_sparql.py @@ -34,6 +34,7 @@ class TestSparqlStore(unittest.TestCase): bsfs:User rdfs:subClassOf bsfs:Node . xsd:string rdfs:subClassOf bsfs:Literal . bsfs:Number rdfs:subClassOf bsfs:Literal . + bsfs:BinaryBlob rdfs:subClassOf bsfs:Literal . xsd:integer rdfs:subClassOf bsfs:Number . # non-unique literal @@ -60,6 +61,11 @@ class TestSparqlStore(unittest.TestCase): rdfs:range bsfs:User ; bsfs:unique "true"^^xsd:boolean . + # binary range + bse:asset rdfs:subClassOf bsfs:Predicate ; + rdfs:domain bsfs:Entity ; + rdfs:range bsfs:BinaryBlob . + ''') self.schema_triples = { # schema hierarchy @@ -68,6 +74,7 @@ class TestSparqlStore(unittest.TestCase): (rdflib.URIRef(ns.bsfs.User), rdflib.RDFS.subClassOf, rdflib.URIRef(ns.bsfs.Node)), (rdflib.URIRef(ns.xsd.string), rdflib.RDFS.subClassOf, rdflib.URIRef(ns.bsfs.Literal)), (rdflib.URIRef(ns.bsfs.Array), rdflib.RDFS.subClassOf, rdflib.URIRef(ns.bsfs.Literal)), + (rdflib.URIRef(ns.bsfs.BinaryBlob), rdflib.RDFS.subClassOf, rdflib.URIRef(ns.bsfs.Literal)), (rdflib.URIRef(ns.bsfs.Feature), rdflib.RDFS.subClassOf, rdflib.URIRef(ns.bsfs.Array)), (rdflib.URIRef(ns.bsfs.Number), rdflib.RDFS.subClassOf, rdflib.URIRef(ns.bsfs.Literal)), (rdflib.URIRef(ns.bsfs.Time), rdflib.RDFS.subClassOf, rdflib.URIRef(ns.bsfs.Literal)), @@ -76,6 +83,7 @@ class TestSparqlStore(unittest.TestCase): (rdflib.URIRef(ns.bse.filesize), rdflib.RDFS.subClassOf, rdflib.URIRef(ns.bsfs.Predicate)), (rdflib.URIRef(ns.bse.tag), rdflib.RDFS.subClassOf, rdflib.URIRef(ns.bsfs.Predicate)), (rdflib.URIRef(ns.bse.author), rdflib.RDFS.subClassOf, rdflib.URIRef(ns.bsfs.Predicate)), + (rdflib.URIRef(ns.bse.asset), rdflib.RDFS.subClassOf, rdflib.URIRef(ns.bsfs.Predicate)), } def test_essentials(self): @@ -358,6 +366,7 @@ class TestSparqlStore(unittest.TestCase): (rdflib.URIRef(ns.bsfs.User), rdflib.RDFS.subClassOf, rdflib.URIRef(ns.bsfs.Node)), (rdflib.URIRef(ns.xsd.boolean), rdflib.RDFS.subClassOf, rdflib.URIRef(ns.bsfs.Literal)), (rdflib.URIRef(ns.bsfs.Array), rdflib.RDFS.subClassOf, rdflib.URIRef(ns.bsfs.Literal)), + (rdflib.URIRef(ns.bsfs.BinaryBlob), rdflib.RDFS.subClassOf, rdflib.URIRef(ns.bsfs.Literal)), (rdflib.URIRef(ns.bsfs.Feature), rdflib.RDFS.subClassOf, rdflib.URIRef(ns.bsfs.Array)), (rdflib.URIRef(ns.bsfs.Number), rdflib.RDFS.subClassOf, rdflib.URIRef(ns.bsfs.Literal)), (rdflib.URIRef(ns.bsfs.Time), rdflib.RDFS.subClassOf, rdflib.URIRef(ns.bsfs.Literal)), @@ -556,6 +565,76 @@ class TestSparqlStore(unittest.TestCase): self.assertSetEqual(set(q), tag_ids) + def test_fetch(self): + # store setup + store = SparqlStore.Open() + store.schema = self.schema + # add instances + ent_type = self.schema.node(ns.bsfs.Entity) + tag_type = self.schema.node(ns.bsfs.Tag) + ent_ids = {URI('http://example.com/me/entity#1234'), URI('http://example.com/me/entity#4321')} + tag_ids = {URI('http://example.com/me/tag#1234'), URI('http://example.com/me/tag#4321')} + store.create(ent_type, ent_ids) + store.create(tag_type, tag_ids) + store.set(ent_type, ent_ids, self.schema.predicate(ns.bse.tag), tag_ids) + store.set(ent_type, {URI('http://example.com/me/entity#1234')}, self.schema.predicate(ns.bse.filesize), {1234}) + store.set(ent_type, {URI('http://example.com/me/entity#4321')}, self.schema.predicate(ns.bse.filesize), {4321}) + store.set(ent_type, {URI('http://example.com/me/entity#1234')}, self.schema.predicate(ns.bse.comment), {'hello world'}) + # node_type must be a node from the schema + self.assertRaises(errors.ConsistencyError, list, store.fetch(self.schema.literal(ns.bsfs.Literal), + ast.filter.FilterExpression(), ast.fetch.FetchExpression())) + self.assertRaises(errors.ConsistencyError, list, store.fetch(self.schema.node(ns.bsfs.Node).child(ns.bsfs.Invalid), + ast.filter.FilterExpression(), ast.fetch.FetchExpression())) + # requires a filter and a fetch query + self.assertRaises(TypeError, list, store.fetch(self.schema.node(ns.bsfs.Entity), None, ast.fetch.FetchExpression())) + self.assertRaises(TypeError, list, store.fetch(self.schema.node(ns.bsfs.Entity), 1234, ast.fetch.FetchExpression())) + self.assertRaises(TypeError, list, store.fetch(self.schema.node(ns.bsfs.Entity), 'hello', ast.fetch.FetchExpression())) + self.assertRaises(TypeError, list, store.fetch(self.schema.node(ns.bsfs.Entity), ast.filter.FilterExpression(), None)) + self.assertRaises(TypeError, list, store.fetch(self.schema.node(ns.bsfs.Entity), ast.filter.FilterExpression(), 1234)) + self.assertRaises(TypeError, list, store.fetch(self.schema.node(ns.bsfs.Entity), ast.filter.FilterExpression(), 'hello')) + # fetch emits triples + self.assertSetEqual(set(store.fetch(self.schema.node(ns.bsfs.Entity), + ast.filter.Is('http://example.com/me/entity#1234'), + ast.fetch.Value(ns.bse.filesize, 'filesize'), + )), { + (URI('http://example.com/me/entity#1234'), 'filesize', 1234), + }) + # fetch respects filter query + self.assertSetEqual(set(store.fetch(self.schema.node(ns.bsfs.Entity), + ast.filter.IsIn('http://example.com/me/entity#1234', 'http://example.com/me/entity#4321'), + ast.fetch.Value(ns.bse.filesize, 'filesize'), + )), { + (URI('http://example.com/me/entity#1234'), 'filesize', 1234), + (URI('http://example.com/me/entity#4321'), 'filesize', 4321), + }) + # fetch ignores missing data + self.assertSetEqual(set(store.fetch(self.schema.node(ns.bsfs.Entity), + ast.filter.IsIn('http://example.com/me/entity#1234', 'http://example.com/me/entity#4321'), + ast.fetch.Value(ns.bse.comment, 'comment'), + )), { + (URI('http://example.com/me/entity#1234'), 'comment', 'hello world'), + }) + # fetch emits all triples + self.assertSetEqual(set(store.fetch(self.schema.node(ns.bsfs.Entity), + ast.filter.Is('http://example.com/me/entity#1234'), + ast.fetch.All( + ast.fetch.Value(ns.bse.filesize, 'filesize'), + ast.fetch.Node(ns.bse.tag, 'tag'), + ) + )), { + (URI('http://example.com/me/entity#1234'), 'filesize', 1234), + (URI('http://example.com/me/entity#1234'), 'tag', URI('http://example.com/me/tag#1234')), + (URI('http://example.com/me/entity#1234'), 'tag', URI('http://example.com/me/tag#4321')), + }) + # triples do not repeat + triples = list(store.fetch(self.schema.node(ns.bsfs.Entity), ast.filter.Is('http://example.com/me/entity#1234'), + ast.fetch.All( + ast.fetch.Value(ns.bse.filesize, 'filesize'), + ast.fetch.Node(ns.bse.tag, 'tag'), + ) + )) + self.assertEqual(len(triples), 3) + def test_exists(self): # store setup store = SparqlStore.Open() @@ -861,6 +940,23 @@ class TestSparqlStore(unittest.TestCase): # inexistent guids self.assertRaises(errors.InstanceError, store.set, ent_type, {URI('http://example.com/me/entity#foobar')}, p_comment, {'xyz'}) + # BinaryBlob values are base64 encoded + p_asset = store.schema.predicate(ns.bse.asset) + store.set(ent_type, ent_ids, p_asset, {bytes(range(128)), bytes(range(128, 256))}) + blob1 = rdflib.Literal('AAECAwQFBgcICQoLDA0ODxAREhMUFRYXGBkaGxwdHh8gISIjJCUmJygpKissLS4vMDEyMzQ1Njc4OTo7PD0+P0BBQkNERUZHSElKS0xNTk9QUVJTVFVWV1hZWltcXV5fYGFiY2RlZmdoaWprbG1ub3BxcnN0dXZ3eHl6e3x9fn8=', + datatype=rdflib.URIRef(ns.bsfs.BinaryBlob)) + blob2 = rdflib.Literal('gIGCg4SFhoeIiYqLjI2Oj5CRkpOUlZaXmJmam5ydnp+goaKjpKWmp6ipqqusra6vsLGys7S1tre4ubq7vL2+v8DBwsPExcbHyMnKy8zNzs/Q0dLT1NXW19jZ2tvc3d7f4OHi4+Tl5ufo6err7O3u7/Dx8vP09fb3+Pn6+/z9/v8=', + datatype=rdflib.URIRef(ns.bsfs.BinaryBlob)) + self.assertTrue(set(store._graph).issuperset({ + (rdflib.URIRef('http://example.com/me/entity#1234'), rdflib.URIRef(p_asset.uri), blob1), + (rdflib.URIRef('http://example.com/me/entity#1234'), rdflib.URIRef(p_asset.uri), blob2), + (rdflib.URIRef('http://example.com/me/entity#4321'), rdflib.URIRef(p_asset.uri), blob1), + (rdflib.URIRef('http://example.com/me/entity#4321'), rdflib.URIRef(p_asset.uri), blob2), + })) + # lit.value returns the original bytes value + self.assertSetEqual({lit.value for lit in store._graph.objects(None, rdflib.URIRef(p_asset.uri))}, + {bytes(range(128)), bytes(range(128, 256))}) + ## main ## diff --git a/test/triple_store/sparql/test_utils.py b/test/triple_store/sparql/test_utils.py new file mode 100644 index 0000000..073b8f8 --- /dev/null +++ b/test/triple_store/sparql/test_utils.py @@ -0,0 +1,155 @@ +""" + +Part of the bsfs test suite. +A copy of the license is provided with the project. +Author: Matthias Baumgartner, 2022 +""" +# standard imports +import operator +import re +import unittest + +# external imports +import rdflib + +# bsie imports +from bsfs.namespace import ns + +# objects to test +from bsfs.triple_store.sparql.utils import GenHopName, Query + + +## code ## + +class TestGenHopName(unittest.TestCase): + def test_next(self): + # baseline + self.assertEqual(next(GenHopName(prefix='?foo', start=123)), '?foo123') + # respects prefix + self.assertEqual(next(GenHopName(prefix='?bar', start=123)), '?bar123') + # respects start + self.assertEqual(next(GenHopName(prefix='?foo', start=321)), '?foo321') + # counts up + cnt = GenHopName(prefix='?foo', start=998) + self.assertEqual(next(cnt), '?foo998') + self.assertEqual(next(cnt), '?foo999') + self.assertEqual(next(cnt), '?foo1000') + self.assertEqual(next(cnt), '?foo1001') + + def test_essentials(self): + # can get the prefix + self.assertEqual(GenHopName(prefix='?foo', start=123).prefix, '?foo') + # can get the counter + self.assertEqual(GenHopName(prefix='?foo', start=123).curr, 122) + + +class TestQuery(unittest.TestCase): + def setUp(self): + self.root_type = 'http://bsfs.ai/schema/Entity' + self.root_head = '?root' + self.select = (('?head', 'name'), ) + self.where = f'?root <{ns.bse.tag}> ?head' + + def test_essentials(self): + # can access members + q = Query(self.root_type, self.root_head, self.select, self.where) + self.assertEqual(q.root_type, self.root_type) + self.assertEqual(q.root_head, self.root_head) + self.assertEqual(q.select, self.select) + self.assertEqual(q.where, self.where) + # comparison + self.assertEqual(q, Query(self.root_type, self.root_head, self.select, self.where)) + self.assertEqual(hash(q), hash(Query(self.root_type, self.root_head, self.select, self.where))) + # comparison respects root_type + self.assertNotEqual(q, Query('http://bsfs.ai/schema/Tag', self.root_head, self.select, self.where)) + self.assertNotEqual(hash(q), hash(Query('http://bsfs.ai/schema/Tag', self.root_head, self.select, self.where))) + # comparison respects root_head + self.assertNotEqual(q, Query(self.root_type, '?foo', self.select, self.where)) + self.assertNotEqual(hash(q), hash(Query(self.root_type, '?foo', self.select, self.where))) + # comparison respects select + self.assertNotEqual(q, Query(self.root_type, self.root_head, (('?head', 'foo'), ), self.where)) + self.assertNotEqual(hash(q), hash(Query(self.root_type, self.root_head, (('?head', 'foo'), ), self.where))) + # comparison respects where + self.assertNotEqual(q, Query(self.root_type, self.root_head, self.select, '?root bse:filename ?head')) + self.assertNotEqual(hash(q), hash(Query(self.root_type, self.root_head, self.select, '?root bse:filename ?head'))) + # string conversion + self.assertEqual(str(q), q.query) + self.assertEqual(repr(q), "Query(http://bsfs.ai/schema/Entity, ?root, (('?head', 'name'),), ?root <http://bsfs.ai/schema/Entity#tag> ?head)") + + def test_add(self): + q = Query(self.root_type, self.root_head, self.select, self.where) + # can only add a query + self.assertRaises(TypeError, operator.add, q, 1234) + self.assertRaises(TypeError, operator.add, q, 'foobar') + # root type and head must match + self.assertRaises(ValueError, operator.add, q, Query('http://bsfs.ai/schema/Tag', self.root_head)) + self.assertRaises(ValueError, operator.add, q, Query(self.root_type, '?foobar')) + # select and were are combined + combo = q + Query(self.root_type, self.root_head, (('?foo', 'bar'), ), f'?root <{ns.bse.filename}> ?foo') + self.assertEqual(combo.select, (('?head', 'name'), ('?foo', 'bar'))) + self.assertEqual(combo.where, f'?root <{ns.bse.tag}> ?head . ?root <{ns.bse.filename}> ?foo') + # select can be empty + combo = q + Query(self.root_type, self.root_head, None, f'?root <{ns.bse.filename}> ?foo') + self.assertEqual(combo.select, (('?head', 'name'), )) + combo = Query(self.root_type, self.root_head, None, f'?root <{ns.bse.filename}> ?foo') + q + self.assertEqual(combo.select, (('?head', 'name'), )) + combo = Query(self.root_type, self.root_head, None, self.where) + Query(self.root_type, self.root_head, None, f'?root <{ns.bse.filename}> ?foo') + self.assertEqual(combo.select, tuple()) + # where can be empty + combo = q + Query(self.root_type, self.root_head, (('?foo', 'bar'), )) + self.assertEqual(combo.where, self.where) + combo = Query(self.root_type, self.root_head, (('?foo', 'bar'), )) + q + self.assertEqual(combo.where, self.where) + combo = Query(self.root_type, self.root_head, self.select) + Query(self.root_type, self.root_head, (('?foo', 'bar'), )) + self.assertEqual(combo.where, '') + + def test_names(self): + self.assertEqual(Query(self.root_type, self.root_head, (('?head', 'name'), ), self.where).names, + ('name', )) + self.assertEqual(Query(self.root_type, self.root_head, (('?head', 'name'), ('?foo', 'bar')), self.where).names, + ('name', 'bar')) + + def test_query(self): + def normalize(value): + value = value.strip() + value = value.lower() + value = value.replace(r'\n', ' ') + value, _ = re.subn('\s\s+', ' ', value) + return value + # query composes a valid query + q = Query(self.root_type, self.root_head, self.select, self.where) + self.assertEqual(normalize(q.query), normalize(f'select ?root (?head as ?name) where {{ ?root <{ns.rdf.type}>/<{ns.rdfs.subClassOf}>* <http://bsfs.ai/schema/Entity> . ?root <{ns.bse.tag}> ?head }}')) + # select and where are optional + q = Query(self.root_type, self.root_head) + self.assertEqual(normalize(q.query), normalize(f'select ?root where {{ ?root <{ns.rdf.type}>/<{ns.rdfs.subClassOf}>* <http://bsfs.ai/schema/Entity> . }}')) + # select and where need not to correspond + q = Query(self.root_type, self.root_head, (('?head', 'name'), )) + self.assertEqual(normalize(q.query), normalize(f'select ?root (?head as ?name) where {{ ?root <{ns.rdf.type}>/<{ns.rdfs.subClassOf}>* <http://bsfs.ai/schema/Entity> . }}')) + # query is used for string representation + self.assertEqual(str(q), q.query) + + def test_call(self): + graph = rdflib.Graph() + # schema + graph.add((rdflib.URIRef('http://bsfs.ai/schema/Document'), rdflib.URIRef(ns.rdfs.subClassOf), rdflib.URIRef('http://bsfs.ai/schema/Entity'))) + # nodes + graph.add((rdflib.URIRef('http://example.com/entity#1234'), rdflib.URIRef(ns.rdf.type), rdflib.URIRef('http://bsfs.ai/schema/Entity'))) + graph.add((rdflib.URIRef('http://example.com/doc#1234'), rdflib.URIRef(ns.rdf.type), rdflib.URIRef('http://bsfs.ai/schema/Document'))) + # links + graph.add((rdflib.URIRef('http://example.com/entity#1234'), rdflib.URIRef(ns.bse.tag), rdflib.Literal('tag#1234', datatype=rdflib.XSD.string))) + graph.add((rdflib.URIRef('http://example.com/doc#1234'), rdflib.URIRef(ns.bse.tag), rdflib.Literal('tag#1234', datatype=rdflib.XSD.string))) + # run query on a given graph + query = Query(self.root_type, self.root_head, self.select, self.where) + self.assertSetEqual(set(query(graph)), { + (rdflib.URIRef('http://example.com/entity#1234'), rdflib.Literal('tag#1234', datatype=rdflib.XSD.string)), + (rdflib.URIRef('http://example.com/doc#1234'), rdflib.Literal('tag#1234', datatype=rdflib.XSD.string)), + }) + # query actually considers the passed graph + self.assertSetEqual(set(query(rdflib.Graph())), set()) + +## main ## + +if __name__ == '__main__': + unittest.main() + +## EOF ## diff --git a/test/triple_store/test_base.py b/test/triple_store/test_base.py index a0c3260..56a2539 100644 --- a/test/triple_store/test_base.py +++ b/test/triple_store/test_base.py @@ -38,6 +38,9 @@ class DummyBase(TripleStoreBase): def get(self, node_type, query): pass + def fetch(self, node_type, filter, fetch): + pass + def exists(self, node_type, guids): pass diff --git a/test/utils/test_uuid.py b/test/utils/test_uuid.py index 0de96ed..804b063 100644 --- a/test/utils/test_uuid.py +++ b/test/utils/test_uuid.py @@ -83,6 +83,16 @@ class TestUCID(unittest.TestCase): def test_from_path(self): self.assertEqual(UCID.from_path(self._path), self._checksum) + def test_from_buffer(self): + with open(self._path, 'rb') as ifile: + self.assertEqual(UCID.from_buffer(ifile), self._checksum) + with open(self._path) as ifile: + self.assertEqual(UCID.from_buffer(ifile), self._checksum) + + def test_from_bytes(self): + with open(self._path, 'rb') as ifile: + self.assertEqual(UCID.from_bytes(ifile.read()), self._checksum) + def test_from_dict(self): self.assertEqual(UCID.from_dict({'hello': 'world', 'foo': 1234, 'bar': False}), '8d2544395a0d2827e3d9ce8cd619d5e3f801e8126bf3f93ee5abd38158959585') |