aboutsummaryrefslogtreecommitdiffstats
path: root/bsfs/graph
diff options
context:
space:
mode:
authorMatthias Baumgartner <dev@igsor.net>2023-02-08 21:17:57 +0100
committerMatthias Baumgartner <dev@igsor.net>2023-02-08 21:17:57 +0100
commit9b490d19dcebc0fc24cb2ab89a783f1f7d6147f7 (patch)
tree5fc3d3b8864a8ff996e5739ed9654dae494d9d8f /bsfs/graph
parente12cd52ad267563c8046a593ad551b1dd089a702 (diff)
parentc0218a8dffcdc3a7a5568f66bb959139fe514ad5 (diff)
downloadbsfs-9b490d19dcebc0fc24cb2ab89a783f1f7d6147f7.tar.gz
bsfs-9b490d19dcebc0fc24cb2ab89a783f1f7d6147f7.tar.bz2
bsfs-9b490d19dcebc0fc24cb2ab89a783f1f7d6147f7.zip
Merge branch 'mb/fetch' into develop
Diffstat (limited to 'bsfs/graph')
-rw-r--r--bsfs/graph/ac/base.py4
-rw-r--r--bsfs/graph/ac/null.py4
-rw-r--r--bsfs/graph/graph.py8
-rw-r--r--bsfs/graph/nodes.py203
-rw-r--r--bsfs/graph/resolve.py1
-rw-r--r--bsfs/graph/result.py124
-rw-r--r--bsfs/graph/walk.py120
7 files changed, 455 insertions, 9 deletions
diff --git a/bsfs/graph/ac/base.py b/bsfs/graph/ac/base.py
index 0703e2e..79b09e5 100644
--- a/bsfs/graph/ac/base.py
+++ b/bsfs/graph/ac/base.py
@@ -72,4 +72,8 @@ class AccessControlBase(abc.ABC):
def filter_read(self, node_type: schema.Node, query: ast.filter.FilterExpression) -> ast.filter.FilterExpression:
"""Re-write a filter *query* to get (i.e., read) *node_type* nodes."""
+ @abc.abstractmethod
+ def fetch_read(self, node_type: schema.Node, query: ast.fetch.FetchExpression) -> ast.fetch.FetchExpression:
+ """Re-write a fetch *query* to get (i.e, read) values for *node_type* nodes."""
+
## EOF ##
diff --git a/bsfs/graph/ac/null.py b/bsfs/graph/ac/null.py
index 12b4e87..6a923a5 100644
--- a/bsfs/graph/ac/null.py
+++ b/bsfs/graph/ac/null.py
@@ -54,4 +54,8 @@ class NullAC(base.AccessControlBase):
"""Re-write a filter *query* to get (i.e., read) *node_type* nodes."""
return query
+ def fetch_read(self, node_type: schema.Node, query: ast.fetch.FetchExpression) -> ast.fetch.FetchExpression:
+ """Re-write a fetch *query* to get (i.e, read) values for *node_type* nodes."""
+ return query
+
## EOF ##
diff --git a/bsfs/graph/graph.py b/bsfs/graph/graph.py
index 2210755..df2e3a5 100644
--- a/bsfs/graph/graph.py
+++ b/bsfs/graph/graph.py
@@ -133,4 +133,12 @@ class Graph():
# return Nodes instance
return _nodes.Nodes(self._backend, self._user, type_, guids)
+ def all(self, node_type: URI) -> _nodes.Nodes:
+ """Return all instances of type *node_type*."""
+ # get node type
+ type_ = self.schema.node(node_type)
+ guids = self._backend.get(type_, None) # no need to materialize
+ return _nodes.Nodes(self._backend, self._user, type_, guids)
+
+
## EOF ##
diff --git a/bsfs/graph/nodes.py b/bsfs/graph/nodes.py
index 5a93f77..bc71a32 100644
--- a/bsfs/graph/nodes.py
+++ b/bsfs/graph/nodes.py
@@ -5,17 +5,21 @@ A copy of the license is provided with the project.
Author: Matthias Baumgartner, 2022
"""
# imports
+from collections import abc
import time
import typing
# bsfs imports
-from bsfs import schema as _schema
+from bsfs import schema as bsc
from bsfs.namespace import ns
+from bsfs.query import ast, validate
from bsfs.triple_store import TripleStoreBase
from bsfs.utils import errors, URI, typename
# inner-module imports
from . import ac
+from . import result
+from . import walk
# exports
__all__: typing.Sequence[str] = (
@@ -37,7 +41,7 @@ class Nodes():
_user: URI
# node type.
- _node_type: _schema.Node
+ _node_type: bsc.Node
# guids of nodes. Can be empty.
_guids: typing.Set[URI]
@@ -46,13 +50,16 @@ class Nodes():
self,
backend: TripleStoreBase,
user: URI,
- node_type: _schema.Node,
+ node_type: bsc.Node,
guids: typing.Iterable[URI],
):
+ # set main members
self._backend = backend
self._user = user
self._node_type = node_type
self._guids = set(guids)
+ # create helper instances
+ # FIXME: Assumes that the schema does not change while the instance is in use!
self._ac = ac.NullAC(self._backend, self._user)
def __eq__(self, other: typing.Any) -> bool:
@@ -72,7 +79,7 @@ class Nodes():
return f'{typename(self)}({self._node_type}, {self._guids})'
@property
- def node_type(self) -> _schema.Node:
+ def node_type(self) -> bsc.Node:
"""Return the node's type."""
return self._node_type
@@ -81,9 +88,72 @@ class Nodes():
"""Return all node guids."""
return iter(self._guids)
+ @property
+ def schema(self) -> bsc.Schema:
+ """Return the store's local schema."""
+ return self._backend.schema
+
+ def __add__(self, other: typing.Any) -> 'Nodes':
+ """Concatenate guids. Backend, user, and node type must match."""
+ if not isinstance(other, type(self)):
+ return NotImplemented
+ if self._backend != other._backend:
+ raise ValueError(other)
+ if self._user != other._user:
+ raise ValueError(other)
+ if self.node_type != other.node_type:
+ raise ValueError(other)
+ return Nodes(self._backend, self._user, self.node_type, self._guids | other._guids)
+
+ def __or__(self, other: typing.Any) -> 'Nodes':
+ """Concatenate guids. Backend, user, and node type must match."""
+ return self.__add__(other)
+
+ def __sub__(self, other: typing.Any) -> 'Nodes':
+ """Subtract guids. Backend, user, and node type must match."""
+ if not isinstance(other, type(self)):
+ return NotImplemented
+ if self._backend != other._backend:
+ raise ValueError(other)
+ if self._user != other._user:
+ raise ValueError(other)
+ if self.node_type != other.node_type:
+ raise ValueError(other)
+ return Nodes(self._backend, self._user, self.node_type, self._guids - other._guids)
+
+ def __and__(self, other: typing.Any) -> 'Nodes':
+ """Intersect guids. Backend, user, and node type must match."""
+ if not isinstance(other, type(self)):
+ return NotImplemented
+ if self._backend != other._backend:
+ raise ValueError(other)
+ if self._user != other._user:
+ raise ValueError(other)
+ if self.node_type != other.node_type:
+ raise ValueError(other)
+ return Nodes(self._backend, self._user, self.node_type, self._guids & other._guids)
+
+ def __len__(self) -> int:
+ """Return the number of guids."""
+ return len(self._guids)
+
+ def __iter__(self) -> typing.Iterator['Nodes']:
+ """Iterate over individual guids. Returns `Nodes` instances."""
+ return iter(
+ Nodes(self._backend, self._user, self.node_type, {guid})
+ for guid in self._guids
+ )
+
+ def __getattr__(self, name: str):
+ try:
+ return super().__getattr__(name) # type: ignore [misc] # parent has no getattr
+ except AttributeError:
+ pass
+ return walk.Walk(self, walk.Walk.step(self.schema, self.node_type, name))
+
def set(
self,
- pred: URI, # FIXME: URI or _schema.Predicate?
+ pred: URI, # FIXME: URI or bsc.Predicate?
value: typing.Any,
) -> 'Nodes':
"""Set predicate *pred* to *value*."""
@@ -91,7 +161,7 @@ class Nodes():
def set_from_iterable(
self,
- predicate_values: typing.Iterable[typing.Tuple[URI, typing.Any]], # FIXME: URI or _schema.Predicate?
+ predicate_values: typing.Iterable[typing.Tuple[URI, typing.Any]], # FIXME: URI or bsc.Predicate?
) -> 'Nodes':
"""Set mutliple predicate-value pairs at once."""
# TODO: Could group predicate_values by predicate to gain some efficiency
@@ -120,6 +190,120 @@ class Nodes():
return self
+ def get(
+ self,
+ *paths: typing.Union[URI, typing.Iterable[URI]],
+ view: typing.Union[typing.Type[list], typing.Type[dict]] = dict,
+ **view_kwargs,
+ ) -> typing.Any:
+ """Get values or nodes at *paths*.
+ Return an iterator (view=list) or a dict (view=dict) over the results.
+ """
+ # FIXME: user-provided Fetch query AST?
+ # check args
+ if len(paths) == 0:
+ raise AttributeError('expected at least one path, found none')
+ if view not in (dict, list):
+ raise ValueError(f'expected dict or list, found {view}')
+ # process paths: create fetch ast, build name mapping, and find unique paths
+ schema = self.schema
+ statements = set()
+ name2path = {}
+ unique_paths = set() # paths that result in a single (unique) value
+ normpath: typing.Tuple[URI, ...]
+ for idx, path in enumerate(paths):
+ # normalize path
+ if isinstance(path, str):
+ normpath = (URI(path), )
+ elif isinstance(path, abc.Iterable):
+ if not all(isinstance(step, str) for step in path):
+ raise TypeError(path)
+ normpath = tuple(URI(step) for step in path)
+ else:
+ raise TypeError(path)
+ # check path's schema consistency
+ if not all(schema.has_predicate(pred) for pred in normpath):
+ raise errors.ConsistencyError(f'path is not fully covered by the schema: {path}')
+ # check path's uniqueness
+ if all(schema.predicate(pred).unique for pred in normpath):
+ unique_paths.add(path)
+ # fetch tail predicate
+ tail = schema.predicate(normpath[-1])
+ # determine tail ast node type
+ factory = ast.fetch.Node if isinstance(tail.range, bsc.Node) else ast.fetch.Value
+ # assign name
+ name = f'fetch{idx}'
+ name2path[name] = (path, tail)
+ # create tail ast node
+ curr: ast.fetch.FetchExpression = factory(tail.uri, name)
+ # walk towards front
+ hop: URI
+ for hop in normpath[-2::-1]:
+ curr = ast.fetch.Fetch(hop, curr)
+ # add to fetch query
+ statements.add(curr)
+ # aggregate fetch statements
+ if len(statements) == 1:
+ fetch = next(iter(statements))
+ else:
+ fetch = ast.fetch.All(*statements)
+ # add access controls to fetch
+ fetch = self._ac.fetch_read(self.node_type, fetch)
+
+ # compose filter ast
+ filter = ast.filter.IsIn(self.guids) # pylint: disable=redefined-builtin
+ # add access controls to filter
+ filter = self._ac.filter_read(self.node_type, filter)
+
+ # validate queries
+ validate.Filter(self._backend.schema)(self.node_type, filter)
+ validate.Fetch(self._backend.schema)(self.node_type, fetch)
+
+ # process results, convert if need be
+ def triple_iter():
+ # query the backend
+ triples = self._backend.fetch(self.node_type, filter, fetch)
+ # process triples
+ for root, name, raw in triples:
+ # get node
+ node = Nodes(self._backend, self._user, self.node_type, {root})
+ # get path
+ path, tail = name2path[name]
+ # covert raw to value
+ if isinstance(tail.range, bsc.Node):
+ value = Nodes(self._backend, self._user, tail.range, {raw})
+ else:
+ value = raw
+ # emit triple
+ yield node, path, value
+
+ # simplify by default
+ view_kwargs['node'] = view_kwargs.get('node', len(self._guids) != 1)
+ view_kwargs['path'] = view_kwargs.get('path', len(paths) != 1)
+ view_kwargs['value'] = view_kwargs.get('value', False)
+
+ # return results view
+ if view == list:
+ return result.to_list_view(
+ triple_iter(),
+ # aggregation args
+ **view_kwargs,
+ )
+
+ if view == dict:
+ return result.to_dict_view(
+ triple_iter(),
+ # context
+ len(self._guids) == 1,
+ len(paths) == 1,
+ unique_paths,
+ # aggregation args
+ **view_kwargs,
+ )
+
+ raise errors.UnreachableError() # view was already checked
+
+
def __set(self, predicate: URI, value: typing.Any):
"""
"""
@@ -145,7 +329,7 @@ class Nodes():
guids = set(self._ensure_nodes(node_type, guids))
# check value
- if isinstance(pred.range, _schema.Literal):
+ if isinstance(pred.range, bsc.Literal):
# check write permissions on existing nodes
# As long as the user has write permissions, we don't restrict
# the creation or modification of literal values.
@@ -160,8 +344,9 @@ class Nodes():
[value],
)
- elif isinstance(pred.range, _schema.Node):
+ elif isinstance(pred.range, bsc.Node):
# check value type
+ # FIXME: value could be a set of Nodes
if not isinstance(value, Nodes):
raise TypeError(value)
# value's node_type must be a subclass of the predicate's range
@@ -192,7 +377,7 @@ class Nodes():
else:
raise errors.UnreachableError()
- def _ensure_nodes(self, node_type: _schema.Node, guids: typing.Iterable[URI]):
+ def _ensure_nodes(self, node_type: bsc.Node, guids: typing.Iterable[URI]):
"""
"""
# check node existence
diff --git a/bsfs/graph/resolve.py b/bsfs/graph/resolve.py
index 00b778b..4677401 100644
--- a/bsfs/graph/resolve.py
+++ b/bsfs/graph/resolve.py
@@ -41,6 +41,7 @@ class Filter():
self.schema = schema
def __call__(self, root_type: bsc.Node, node: ast.filter.FilterExpression):
+ # FIXME: node can be None!
return self._parse_filter_expression(root_type, node)
def _parse_filter_expression(
diff --git a/bsfs/graph/result.py b/bsfs/graph/result.py
new file mode 100644
index 0000000..31822f1
--- /dev/null
+++ b/bsfs/graph/result.py
@@ -0,0 +1,124 @@
+"""
+
+Part of the BlackStar filesystem (bsfs) module.
+A copy of the license is provided with the project.
+Author: Matthias Baumgartner, 2022
+"""
+# imports
+from collections import defaultdict
+import typing
+
+# bsfs imports
+from bsfs.utils import URI
+
+# exports
+__all__: typing.Sequence[str] = (
+ 'to_list_view',
+ 'to_dict_view',
+ )
+
+
+## code ##
+
+# FIXME: node, path, value seem counter-intuitive:
+# node.get(..., node=True) removes the node part.
+# wouldn't it make more sense if node=True keeps the node part
+# and node=False drops it?
+
+def to_list_view(
+ triples,
+ # aggregators
+ node: bool,
+ path: bool,
+ value: bool, # pylint: disable=unused-argument
+ ):
+ """Return an iterator over results.
+
+ Dependent on the *node*, *path*, and *value* flags,
+ the respective component is omitted.
+
+ """
+ if not node and not path:
+ return iter(val for _, _, val in triples)
+ if not node:
+ return iter((pred, val) for _, pred, val in triples)
+ if not path:
+ return iter((subj, val) for subj, _, val in triples)
+ return iter((subj, pred, val) for subj, pred, val in triples)
+
+
+def to_dict_view(
+ triples,
+ # context
+ one_node: bool,
+ one_path: bool,
+ unique_paths: typing.Set[typing.Union[URI, typing.Iterable[URI]]],
+ # aggregators
+ node: bool,
+ path: bool,
+ value: bool,
+ default: typing.Optional[typing.Any] = None,
+ ) -> typing.Any:
+ """Return a dict of results.
+
+ Note that triples are materialized to create this view.
+
+ The returned structure depends on the *node*, *path*, and *value* flags.
+ If all flags are set to False, returns a dict(node -> dict(path -> set(values))).
+ Setting a flag to true omits or simplifies the respective component (if possible).
+
+ """
+ # NOTE: To create a dict, we need to materialize or make further assumptions
+ # (e.g., sorted in a specific order).
+
+ data: typing.Any # disable type checks on data since it's very flexibly typed.
+
+ # FIXME: type of data can be overwritten later on (if value)
+
+ if not node and not path:
+ data = set()
+ elif node ^ path:
+ data = defaultdict(set)
+ else:
+ data = defaultdict(lambda: defaultdict(set))
+
+ for subj, pred, val in triples:
+ unique = pred in unique_paths
+ if not node and not path:
+ if not value and unique and one_node and one_path:
+ return val
+ data.add(val)
+ elif not node:
+ # remove node from result, group by predicate
+ if not value and unique and one_node:
+ data[pred] = val
+ else:
+ data[pred].add(val)
+ elif not path:
+ # remove predicate from result, group by node
+ if not value and unique and one_path:
+ data[subj] = val
+ else:
+ data[subj].add(val)
+ else:
+ if not value and unique:
+ data[subj][pred] = val
+ else:
+ data[subj][pred].add(val)
+
+ # FIXME: Combine multiple Nodes instances into one?
+
+ # convert defaultdict to ordinary dict
+ # pylint: disable=too-many-boolean-expressions
+ if not node and not path and not value \
+ and len(unique_paths) > 0 and one_node and one_path \
+ and len(data) == 0:
+ return default
+ # pylint: enable=too-many-boolean-expressions
+ if not node and not path:
+ return data
+ if node ^ path:
+ return dict(data)
+ return {key: dict(val) for key, val in data.items()}
+
+## EOF ##
diff --git a/bsfs/graph/walk.py b/bsfs/graph/walk.py
new file mode 100644
index 0000000..1b1cfa0
--- /dev/null
+++ b/bsfs/graph/walk.py
@@ -0,0 +1,120 @@
+"""
+
+Part of the BlackStar filesystem (bsfs) module.
+A copy of the license is provided with the project.
+Author: Matthias Baumgartner, 2022
+"""
+# imports
+from collections import abc
+import typing
+
+# bsfs imports
+from bsfs import schema as bsc
+
+# inner-module imports
+# NOTE: circular import! OK as long as only used for type annotations.
+from . import nodes # pylint: disable=cyclic-import
+
+# exports
+__all__: typing.Sequence[str] = (
+ 'Walk',
+ )
+
+
+## code ##
+
+class Walk(abc.Hashable, abc.Callable): # type: ignore [misc] # invalid base class (Callable)
+ """Syntactic sugar for `Nodes` to build and act on predicate paths via members."""
+
+ # Link to Nodes instance.
+ _root: 'nodes.Nodes'
+
+ # Current predicate path.
+ _path: typing.Tuple[bsc.Predicate, ...]
+
+ def __init__(
+ self,
+ root: 'nodes.Nodes',
+ path: typing.Sequence[bsc.Predicate],
+ ):
+ self._root = root
+ self._path = tuple(path)
+
+ @property
+ def tail(self):
+ """Return the node type at the end of the path."""
+ return self._path[-1].range
+
+
+ ## comparison
+
+ def __hash__(self) -> int:
+ """Return an integer hash that identifies the instance."""
+ return hash((type(self), self._root, self._path))
+
+ def __eq__(self, other) -> bool:
+ """Compare against *other* backend."""
+ return isinstance(other, type(self)) \
+ and self._root == other._root \
+ and self._path == other._path
+
+
+ ## representation
+
+ def __repr__(self) -> str:
+ """Return a formal string representation."""
+ path = ', '.join(pred.uri for pred in self._path)
+ return f'Walk({self._root.node_type.uri}, ({path}))'
+
+ def __str__(self) -> str:
+ """Return an informal string representation."""
+ path = ', '.join(pred.uri for pred in self._path)
+ return f'Walk(@{self._root.node_type.uri}: {path})'
+
+
+ ## walk
+
+ @staticmethod
+ def step(
+ schema: bsc.Schema,
+ node: bsc.Node,
+ name: str,
+ ) -> typing.Tuple[bsc.Predicate]:
+ """Get an predicate at *node* whose fragment matches *name*."""
+ predicates = tuple(
+ pred
+ for pred
+ in schema.predicates_at(node)
+ if pred.uri.get('fragment', None) == name
+ )
+ if len(predicates) == 0: # no fragment found for name
+ raise ValueError(f'no available predicate matches {name}') # FIXME: Custom exception
+ if len(predicates) > 1: # ambiguous name
+ raise ValueError(f'{name} matches multiple predicates') # FIXME: Custom exception
+ # append predicate to walk
+ return predicates # type: ignore [return-value] # size is one
+
+ def __getattr__(self, name: str) -> 'Walk':
+ """Alias for `Walk.step(name)`."""
+ try:
+ return super().__getattr__(name)
+ except AttributeError:
+ pass
+ # get predicate
+ pred = self.step(self._root.schema, self.tail, name)
+ # append predicate to walk
+ return Walk(self._root, self._path + pred)
+
+
+ ## get paths ##
+
+ def get(self, **kwargs) -> typing.Any:
+ """Alias for `Nodes.get(..)`."""
+ return self._root.get(tuple(pred.uri for pred in self._path), **kwargs)
+
+ def __call__(self, **kwargs) -> typing.Any: # pylint: disable=arguments-differ
+ """Alias for `Walk.get(...)`."""
+ return self.get(**kwargs)
+
+
+## EOF ##