diff options
Diffstat (limited to 'bsfs/graph')
-rw-r--r-- | bsfs/graph/ac/base.py | 4 | ||||
-rw-r--r-- | bsfs/graph/ac/null.py | 4 | ||||
-rw-r--r-- | bsfs/graph/graph.py | 8 | ||||
-rw-r--r-- | bsfs/graph/nodes.py | 203 | ||||
-rw-r--r-- | bsfs/graph/resolve.py | 1 | ||||
-rw-r--r-- | bsfs/graph/result.py | 124 | ||||
-rw-r--r-- | bsfs/graph/walk.py | 120 |
7 files changed, 455 insertions, 9 deletions
diff --git a/bsfs/graph/ac/base.py b/bsfs/graph/ac/base.py index 0703e2e..79b09e5 100644 --- a/bsfs/graph/ac/base.py +++ b/bsfs/graph/ac/base.py @@ -72,4 +72,8 @@ class AccessControlBase(abc.ABC): def filter_read(self, node_type: schema.Node, query: ast.filter.FilterExpression) -> ast.filter.FilterExpression: """Re-write a filter *query* to get (i.e., read) *node_type* nodes.""" + @abc.abstractmethod + def fetch_read(self, node_type: schema.Node, query: ast.fetch.FetchExpression) -> ast.fetch.FetchExpression: + """Re-write a fetch *query* to get (i.e, read) values for *node_type* nodes.""" + ## EOF ## diff --git a/bsfs/graph/ac/null.py b/bsfs/graph/ac/null.py index 12b4e87..6a923a5 100644 --- a/bsfs/graph/ac/null.py +++ b/bsfs/graph/ac/null.py @@ -54,4 +54,8 @@ class NullAC(base.AccessControlBase): """Re-write a filter *query* to get (i.e., read) *node_type* nodes.""" return query + def fetch_read(self, node_type: schema.Node, query: ast.fetch.FetchExpression) -> ast.fetch.FetchExpression: + """Re-write a fetch *query* to get (i.e, read) values for *node_type* nodes.""" + return query + ## EOF ## diff --git a/bsfs/graph/graph.py b/bsfs/graph/graph.py index 2210755..df2e3a5 100644 --- a/bsfs/graph/graph.py +++ b/bsfs/graph/graph.py @@ -133,4 +133,12 @@ class Graph(): # return Nodes instance return _nodes.Nodes(self._backend, self._user, type_, guids) + def all(self, node_type: URI) -> _nodes.Nodes: + """Return all instances of type *node_type*.""" + # get node type + type_ = self.schema.node(node_type) + guids = self._backend.get(type_, None) # no need to materialize + return _nodes.Nodes(self._backend, self._user, type_, guids) + + ## EOF ## diff --git a/bsfs/graph/nodes.py b/bsfs/graph/nodes.py index 5a93f77..bc71a32 100644 --- a/bsfs/graph/nodes.py +++ b/bsfs/graph/nodes.py @@ -5,17 +5,21 @@ A copy of the license is provided with the project. Author: Matthias Baumgartner, 2022 """ # imports +from collections import abc import time import typing # bsfs imports -from bsfs import schema as _schema +from bsfs import schema as bsc from bsfs.namespace import ns +from bsfs.query import ast, validate from bsfs.triple_store import TripleStoreBase from bsfs.utils import errors, URI, typename # inner-module imports from . import ac +from . import result +from . import walk # exports __all__: typing.Sequence[str] = ( @@ -37,7 +41,7 @@ class Nodes(): _user: URI # node type. - _node_type: _schema.Node + _node_type: bsc.Node # guids of nodes. Can be empty. _guids: typing.Set[URI] @@ -46,13 +50,16 @@ class Nodes(): self, backend: TripleStoreBase, user: URI, - node_type: _schema.Node, + node_type: bsc.Node, guids: typing.Iterable[URI], ): + # set main members self._backend = backend self._user = user self._node_type = node_type self._guids = set(guids) + # create helper instances + # FIXME: Assumes that the schema does not change while the instance is in use! self._ac = ac.NullAC(self._backend, self._user) def __eq__(self, other: typing.Any) -> bool: @@ -72,7 +79,7 @@ class Nodes(): return f'{typename(self)}({self._node_type}, {self._guids})' @property - def node_type(self) -> _schema.Node: + def node_type(self) -> bsc.Node: """Return the node's type.""" return self._node_type @@ -81,9 +88,72 @@ class Nodes(): """Return all node guids.""" return iter(self._guids) + @property + def schema(self) -> bsc.Schema: + """Return the store's local schema.""" + return self._backend.schema + + def __add__(self, other: typing.Any) -> 'Nodes': + """Concatenate guids. Backend, user, and node type must match.""" + if not isinstance(other, type(self)): + return NotImplemented + if self._backend != other._backend: + raise ValueError(other) + if self._user != other._user: + raise ValueError(other) + if self.node_type != other.node_type: + raise ValueError(other) + return Nodes(self._backend, self._user, self.node_type, self._guids | other._guids) + + def __or__(self, other: typing.Any) -> 'Nodes': + """Concatenate guids. Backend, user, and node type must match.""" + return self.__add__(other) + + def __sub__(self, other: typing.Any) -> 'Nodes': + """Subtract guids. Backend, user, and node type must match.""" + if not isinstance(other, type(self)): + return NotImplemented + if self._backend != other._backend: + raise ValueError(other) + if self._user != other._user: + raise ValueError(other) + if self.node_type != other.node_type: + raise ValueError(other) + return Nodes(self._backend, self._user, self.node_type, self._guids - other._guids) + + def __and__(self, other: typing.Any) -> 'Nodes': + """Intersect guids. Backend, user, and node type must match.""" + if not isinstance(other, type(self)): + return NotImplemented + if self._backend != other._backend: + raise ValueError(other) + if self._user != other._user: + raise ValueError(other) + if self.node_type != other.node_type: + raise ValueError(other) + return Nodes(self._backend, self._user, self.node_type, self._guids & other._guids) + + def __len__(self) -> int: + """Return the number of guids.""" + return len(self._guids) + + def __iter__(self) -> typing.Iterator['Nodes']: + """Iterate over individual guids. Returns `Nodes` instances.""" + return iter( + Nodes(self._backend, self._user, self.node_type, {guid}) + for guid in self._guids + ) + + def __getattr__(self, name: str): + try: + return super().__getattr__(name) # type: ignore [misc] # parent has no getattr + except AttributeError: + pass + return walk.Walk(self, walk.Walk.step(self.schema, self.node_type, name)) + def set( self, - pred: URI, # FIXME: URI or _schema.Predicate? + pred: URI, # FIXME: URI or bsc.Predicate? value: typing.Any, ) -> 'Nodes': """Set predicate *pred* to *value*.""" @@ -91,7 +161,7 @@ class Nodes(): def set_from_iterable( self, - predicate_values: typing.Iterable[typing.Tuple[URI, typing.Any]], # FIXME: URI or _schema.Predicate? + predicate_values: typing.Iterable[typing.Tuple[URI, typing.Any]], # FIXME: URI or bsc.Predicate? ) -> 'Nodes': """Set mutliple predicate-value pairs at once.""" # TODO: Could group predicate_values by predicate to gain some efficiency @@ -120,6 +190,120 @@ class Nodes(): return self + def get( + self, + *paths: typing.Union[URI, typing.Iterable[URI]], + view: typing.Union[typing.Type[list], typing.Type[dict]] = dict, + **view_kwargs, + ) -> typing.Any: + """Get values or nodes at *paths*. + Return an iterator (view=list) or a dict (view=dict) over the results. + """ + # FIXME: user-provided Fetch query AST? + # check args + if len(paths) == 0: + raise AttributeError('expected at least one path, found none') + if view not in (dict, list): + raise ValueError(f'expected dict or list, found {view}') + # process paths: create fetch ast, build name mapping, and find unique paths + schema = self.schema + statements = set() + name2path = {} + unique_paths = set() # paths that result in a single (unique) value + normpath: typing.Tuple[URI, ...] + for idx, path in enumerate(paths): + # normalize path + if isinstance(path, str): + normpath = (URI(path), ) + elif isinstance(path, abc.Iterable): + if not all(isinstance(step, str) for step in path): + raise TypeError(path) + normpath = tuple(URI(step) for step in path) + else: + raise TypeError(path) + # check path's schema consistency + if not all(schema.has_predicate(pred) for pred in normpath): + raise errors.ConsistencyError(f'path is not fully covered by the schema: {path}') + # check path's uniqueness + if all(schema.predicate(pred).unique for pred in normpath): + unique_paths.add(path) + # fetch tail predicate + tail = schema.predicate(normpath[-1]) + # determine tail ast node type + factory = ast.fetch.Node if isinstance(tail.range, bsc.Node) else ast.fetch.Value + # assign name + name = f'fetch{idx}' + name2path[name] = (path, tail) + # create tail ast node + curr: ast.fetch.FetchExpression = factory(tail.uri, name) + # walk towards front + hop: URI + for hop in normpath[-2::-1]: + curr = ast.fetch.Fetch(hop, curr) + # add to fetch query + statements.add(curr) + # aggregate fetch statements + if len(statements) == 1: + fetch = next(iter(statements)) + else: + fetch = ast.fetch.All(*statements) + # add access controls to fetch + fetch = self._ac.fetch_read(self.node_type, fetch) + + # compose filter ast + filter = ast.filter.IsIn(self.guids) # pylint: disable=redefined-builtin + # add access controls to filter + filter = self._ac.filter_read(self.node_type, filter) + + # validate queries + validate.Filter(self._backend.schema)(self.node_type, filter) + validate.Fetch(self._backend.schema)(self.node_type, fetch) + + # process results, convert if need be + def triple_iter(): + # query the backend + triples = self._backend.fetch(self.node_type, filter, fetch) + # process triples + for root, name, raw in triples: + # get node + node = Nodes(self._backend, self._user, self.node_type, {root}) + # get path + path, tail = name2path[name] + # covert raw to value + if isinstance(tail.range, bsc.Node): + value = Nodes(self._backend, self._user, tail.range, {raw}) + else: + value = raw + # emit triple + yield node, path, value + + # simplify by default + view_kwargs['node'] = view_kwargs.get('node', len(self._guids) != 1) + view_kwargs['path'] = view_kwargs.get('path', len(paths) != 1) + view_kwargs['value'] = view_kwargs.get('value', False) + + # return results view + if view == list: + return result.to_list_view( + triple_iter(), + # aggregation args + **view_kwargs, + ) + + if view == dict: + return result.to_dict_view( + triple_iter(), + # context + len(self._guids) == 1, + len(paths) == 1, + unique_paths, + # aggregation args + **view_kwargs, + ) + + raise errors.UnreachableError() # view was already checked + + def __set(self, predicate: URI, value: typing.Any): """ """ @@ -145,7 +329,7 @@ class Nodes(): guids = set(self._ensure_nodes(node_type, guids)) # check value - if isinstance(pred.range, _schema.Literal): + if isinstance(pred.range, bsc.Literal): # check write permissions on existing nodes # As long as the user has write permissions, we don't restrict # the creation or modification of literal values. @@ -160,8 +344,9 @@ class Nodes(): [value], ) - elif isinstance(pred.range, _schema.Node): + elif isinstance(pred.range, bsc.Node): # check value type + # FIXME: value could be a set of Nodes if not isinstance(value, Nodes): raise TypeError(value) # value's node_type must be a subclass of the predicate's range @@ -192,7 +377,7 @@ class Nodes(): else: raise errors.UnreachableError() - def _ensure_nodes(self, node_type: _schema.Node, guids: typing.Iterable[URI]): + def _ensure_nodes(self, node_type: bsc.Node, guids: typing.Iterable[URI]): """ """ # check node existence diff --git a/bsfs/graph/resolve.py b/bsfs/graph/resolve.py index 00b778b..4677401 100644 --- a/bsfs/graph/resolve.py +++ b/bsfs/graph/resolve.py @@ -41,6 +41,7 @@ class Filter(): self.schema = schema def __call__(self, root_type: bsc.Node, node: ast.filter.FilterExpression): + # FIXME: node can be None! return self._parse_filter_expression(root_type, node) def _parse_filter_expression( diff --git a/bsfs/graph/result.py b/bsfs/graph/result.py new file mode 100644 index 0000000..31822f1 --- /dev/null +++ b/bsfs/graph/result.py @@ -0,0 +1,124 @@ +""" + +Part of the BlackStar filesystem (bsfs) module. +A copy of the license is provided with the project. +Author: Matthias Baumgartner, 2022 +""" +# imports +from collections import defaultdict +import typing + +# bsfs imports +from bsfs.utils import URI + +# exports +__all__: typing.Sequence[str] = ( + 'to_list_view', + 'to_dict_view', + ) + + +## code ## + +# FIXME: node, path, value seem counter-intuitive: +# node.get(..., node=True) removes the node part. +# wouldn't it make more sense if node=True keeps the node part +# and node=False drops it? + +def to_list_view( + triples, + # aggregators + node: bool, + path: bool, + value: bool, # pylint: disable=unused-argument + ): + """Return an iterator over results. + + Dependent on the *node*, *path*, and *value* flags, + the respective component is omitted. + + """ + if not node and not path: + return iter(val for _, _, val in triples) + if not node: + return iter((pred, val) for _, pred, val in triples) + if not path: + return iter((subj, val) for subj, _, val in triples) + return iter((subj, pred, val) for subj, pred, val in triples) + + +def to_dict_view( + triples, + # context + one_node: bool, + one_path: bool, + unique_paths: typing.Set[typing.Union[URI, typing.Iterable[URI]]], + # aggregators + node: bool, + path: bool, + value: bool, + default: typing.Optional[typing.Any] = None, + ) -> typing.Any: + """Return a dict of results. + + Note that triples are materialized to create this view. + + The returned structure depends on the *node*, *path*, and *value* flags. + If all flags are set to False, returns a dict(node -> dict(path -> set(values))). + Setting a flag to true omits or simplifies the respective component (if possible). + + """ + # NOTE: To create a dict, we need to materialize or make further assumptions + # (e.g., sorted in a specific order). + + data: typing.Any # disable type checks on data since it's very flexibly typed. + + # FIXME: type of data can be overwritten later on (if value) + + if not node and not path: + data = set() + elif node ^ path: + data = defaultdict(set) + else: + data = defaultdict(lambda: defaultdict(set)) + + for subj, pred, val in triples: + unique = pred in unique_paths + if not node and not path: + if not value and unique and one_node and one_path: + return val + data.add(val) + elif not node: + # remove node from result, group by predicate + if not value and unique and one_node: + data[pred] = val + else: + data[pred].add(val) + elif not path: + # remove predicate from result, group by node + if not value and unique and one_path: + data[subj] = val + else: + data[subj].add(val) + else: + if not value and unique: + data[subj][pred] = val + else: + data[subj][pred].add(val) + + # FIXME: Combine multiple Nodes instances into one? + + # convert defaultdict to ordinary dict + # pylint: disable=too-many-boolean-expressions + if not node and not path and not value \ + and len(unique_paths) > 0 and one_node and one_path \ + and len(data) == 0: + return default + # pylint: enable=too-many-boolean-expressions + if not node and not path: + return data + if node ^ path: + return dict(data) + return {key: dict(val) for key, val in data.items()} + +## EOF ## diff --git a/bsfs/graph/walk.py b/bsfs/graph/walk.py new file mode 100644 index 0000000..1b1cfa0 --- /dev/null +++ b/bsfs/graph/walk.py @@ -0,0 +1,120 @@ +""" + +Part of the BlackStar filesystem (bsfs) module. +A copy of the license is provided with the project. +Author: Matthias Baumgartner, 2022 +""" +# imports +from collections import abc +import typing + +# bsfs imports +from bsfs import schema as bsc + +# inner-module imports +# NOTE: circular import! OK as long as only used for type annotations. +from . import nodes # pylint: disable=cyclic-import + +# exports +__all__: typing.Sequence[str] = ( + 'Walk', + ) + + +## code ## + +class Walk(abc.Hashable, abc.Callable): # type: ignore [misc] # invalid base class (Callable) + """Syntactic sugar for `Nodes` to build and act on predicate paths via members.""" + + # Link to Nodes instance. + _root: 'nodes.Nodes' + + # Current predicate path. + _path: typing.Tuple[bsc.Predicate, ...] + + def __init__( + self, + root: 'nodes.Nodes', + path: typing.Sequence[bsc.Predicate], + ): + self._root = root + self._path = tuple(path) + + @property + def tail(self): + """Return the node type at the end of the path.""" + return self._path[-1].range + + + ## comparison + + def __hash__(self) -> int: + """Return an integer hash that identifies the instance.""" + return hash((type(self), self._root, self._path)) + + def __eq__(self, other) -> bool: + """Compare against *other* backend.""" + return isinstance(other, type(self)) \ + and self._root == other._root \ + and self._path == other._path + + + ## representation + + def __repr__(self) -> str: + """Return a formal string representation.""" + path = ', '.join(pred.uri for pred in self._path) + return f'Walk({self._root.node_type.uri}, ({path}))' + + def __str__(self) -> str: + """Return an informal string representation.""" + path = ', '.join(pred.uri for pred in self._path) + return f'Walk(@{self._root.node_type.uri}: {path})' + + + ## walk + + @staticmethod + def step( + schema: bsc.Schema, + node: bsc.Node, + name: str, + ) -> typing.Tuple[bsc.Predicate]: + """Get an predicate at *node* whose fragment matches *name*.""" + predicates = tuple( + pred + for pred + in schema.predicates_at(node) + if pred.uri.get('fragment', None) == name + ) + if len(predicates) == 0: # no fragment found for name + raise ValueError(f'no available predicate matches {name}') # FIXME: Custom exception + if len(predicates) > 1: # ambiguous name + raise ValueError(f'{name} matches multiple predicates') # FIXME: Custom exception + # append predicate to walk + return predicates # type: ignore [return-value] # size is one + + def __getattr__(self, name: str) -> 'Walk': + """Alias for `Walk.step(name)`.""" + try: + return super().__getattr__(name) + except AttributeError: + pass + # get predicate + pred = self.step(self._root.schema, self.tail, name) + # append predicate to walk + return Walk(self._root, self._path + pred) + + + ## get paths ## + + def get(self, **kwargs) -> typing.Any: + """Alias for `Nodes.get(..)`.""" + return self._root.get(tuple(pred.uri for pred in self._path), **kwargs) + + def __call__(self, **kwargs) -> typing.Any: # pylint: disable=arguments-differ + """Alias for `Walk.get(...)`.""" + return self.get(**kwargs) + + +## EOF ## |