Merge branch 'mb/fetch' into develop

author: Matthias Baumgartner <dev@igsor.net> 2023-02-08 21:17:57 +0100
committer: Matthias Baumgartner <dev@igsor.net> 2023-02-08 21:17:57 +0100
commit: 9b490d19dcebc0fc24cb2ab89a783f1f7d6147f7 (patch)
tree: 5fc3d3b8864a8ff996e5739ed9654dae494d9d8f /bsfs
parent: e12cd52ad267563c8046a593ad551b1dd089a702 (diff)
parent: c0218a8dffcdc3a7a5568f66bb959139fe514ad5 (diff)
download: bsfs-9b490d19dcebc0fc24cb2ab89a783f1f7d6147f7.tar.gz
bsfs-9b490d19dcebc0fc24cb2ab89a783f1f7d6147f7.tar.bz2
bsfs-9b490d19dcebc0fc24cb2ab89a783f1f7d6147f7.zip
20 files changed, 1544 insertions, 67 deletions
diff --git a/bsfs/graph/ac/base.py b/bsfs/graph/ac/base.py
index 0703e2e..79b09e5 100644
--- a/bsfs/graph/ac/base.py
+++ b/bsfs/graph/ac/base.py
@@ -72,4 +72,8 @@ class AccessControlBase(abc.ABC):
     def filter_read(self, node_type: schema.Node, query: ast.filter.FilterExpression) -> ast.filter.FilterExpression:
         """Re-write a filter *query* to get (i.e., read) *node_type* nodes."""
 
+    @abc.abstractmethod
+    def fetch_read(self, node_type: schema.Node, query: ast.fetch.FetchExpression) -> ast.fetch.FetchExpression:
+        """Re-write a fetch *query* to get (i.e, read) values for *node_type* nodes."""
+
 ## EOF ##
diff --git a/bsfs/graph/ac/null.py b/bsfs/graph/ac/null.py
index 12b4e87..6a923a5 100644
--- a/bsfs/graph/ac/null.py
+++ b/bsfs/graph/ac/null.py
@@ -54,4 +54,8 @@ class NullAC(base.AccessControlBase):
         """Re-write a filter *query* to get (i.e., read) *node_type* nodes."""
         return query
 
+    def fetch_read(self, node_type: schema.Node, query: ast.fetch.FetchExpression) -> ast.fetch.FetchExpression:
+        """Re-write a fetch *query* to get (i.e, read) values for *node_type* nodes."""
+        return query
+
 ## EOF ##
diff --git a/bsfs/graph/graph.py b/bsfs/graph/graph.py
index 2210755..df2e3a5 100644
--- a/bsfs/graph/graph.py
+++ b/bsfs/graph/graph.py
@@ -133,4 +133,12 @@ class Graph():
         # return Nodes instance
         return _nodes.Nodes(self._backend, self._user, type_, guids)
 
+    def all(self, node_type: URI) -> _nodes.Nodes:
+        """Return all instances of type *node_type*."""
+        # get node type
+        type_ = self.schema.node(node_type)
+        guids = self._backend.get(type_, None) # no need to materialize
+        return _nodes.Nodes(self._backend, self._user, type_, guids)
+
+
 ## EOF ##
diff --git a/bsfs/graph/nodes.py b/bsfs/graph/nodes.py
index 5a93f77..bc71a32 100644
--- a/bsfs/graph/nodes.py
+++ b/bsfs/graph/nodes.py
@@ -5,17 +5,21 @@ A copy of the license is provided with the project.
 Author: Matthias Baumgartner, 2022
 """
 # imports
+from collections import abc
 import time
 import typing
 
 # bsfs imports
-from bsfs import schema as _schema
+from bsfs import schema as bsc
 from bsfs.namespace import ns
+from bsfs.query import ast, validate
 from bsfs.triple_store import TripleStoreBase
 from bsfs.utils import errors, URI, typename
 
 # inner-module imports
 from . import ac
+from . import result
+from . import walk
 
 # exports
 __all__: typing.Sequence[str] = (
@@ -37,7 +41,7 @@ class Nodes():
     _user: URI
 
     # node type.
-    _node_type: _schema.Node
+    _node_type: bsc.Node
 
     # guids of nodes. Can be empty.
     _guids: typing.Set[URI]
@@ -46,13 +50,16 @@ class Nodes():
             self,
             backend: TripleStoreBase,
             user: URI,
-            node_type: _schema.Node,
+            node_type: bsc.Node,
             guids: typing.Iterable[URI],
             ):
+        # set main members
         self._backend = backend
         self._user = user
         self._node_type = node_type
         self._guids = set(guids)
+        # create helper instances
+        # FIXME: Assumes that the schema does not change while the instance is in use!
         self._ac = ac.NullAC(self._backend, self._user)
 
     def __eq__(self, other: typing.Any) -> bool:
@@ -72,7 +79,7 @@ class Nodes():
         return f'{typename(self)}({self._node_type}, {self._guids})'
 
     @property
-    def node_type(self) -> _schema.Node:
+    def node_type(self) -> bsc.Node:
         """Return the node's type."""
         return self._node_type
 
@@ -81,9 +88,72 @@ class Nodes():
         """Return all node guids."""
         return iter(self._guids)
 
+    @property
+    def schema(self) -> bsc.Schema:
+        """Return the store's local schema."""
+        return self._backend.schema
+
+    def __add__(self, other: typing.Any) -> 'Nodes':
+        """Concatenate guids. Backend, user, and node type must match."""
+        if not isinstance(other, type(self)):
+            return NotImplemented
+        if self._backend != other._backend:
+            raise ValueError(other)
+        if self._user != other._user:
+            raise ValueError(other)
+        if self.node_type != other.node_type:
+            raise ValueError(other)
+        return Nodes(self._backend, self._user, self.node_type, self._guids | other._guids)
+
+    def __or__(self, other: typing.Any) -> 'Nodes':
+        """Concatenate guids. Backend, user, and node type must match."""
+        return self.__add__(other)
+
+    def __sub__(self, other: typing.Any) -> 'Nodes':
+        """Subtract guids. Backend, user, and node type must match."""
+        if not isinstance(other, type(self)):
+            return NotImplemented
+        if self._backend != other._backend:
+            raise ValueError(other)
+        if self._user != other._user:
+            raise ValueError(other)
+        if self.node_type != other.node_type:
+            raise ValueError(other)
+        return Nodes(self._backend, self._user, self.node_type, self._guids - other._guids)
+
+    def __and__(self, other: typing.Any) -> 'Nodes':
+        """Intersect guids. Backend, user, and node type must match."""
+        if not isinstance(other, type(self)):
+            return NotImplemented
+        if self._backend != other._backend:
+            raise ValueError(other)
+        if self._user != other._user:
+            raise ValueError(other)
+        if self.node_type != other.node_type:
+            raise ValueError(other)
+        return Nodes(self._backend, self._user, self.node_type, self._guids & other._guids)
+
+    def __len__(self) -> int:
+        """Return the number of guids."""
+        return len(self._guids)
+
+    def __iter__(self) -> typing.Iterator['Nodes']:
+        """Iterate over individual guids. Returns `Nodes` instances."""
+        return iter(
+            Nodes(self._backend, self._user, self.node_type, {guid})
+            for guid in self._guids
+            )
+
+    def __getattr__(self, name: str):
+        try:
+            return super().__getattr__(name) # type: ignore [misc] # parent has no getattr
+        except AttributeError:
+            pass
+        return walk.Walk(self, walk.Walk.step(self.schema, self.node_type, name))
+
     def set(
             self,
-            pred: URI, # FIXME: URI or _schema.Predicate?
+            pred: URI, # FIXME: URI or bsc.Predicate?
             value: typing.Any,
             ) -> 'Nodes':
         """Set predicate *pred* to *value*."""
@@ -91,7 +161,7 @@ class Nodes():
 
     def set_from_iterable(
             self,
-            predicate_values: typing.Iterable[typing.Tuple[URI, typing.Any]], # FIXME: URI or _schema.Predicate?
+            predicate_values: typing.Iterable[typing.Tuple[URI, typing.Any]], # FIXME: URI or bsc.Predicate?
             ) -> 'Nodes':
         """Set mutliple predicate-value pairs at once."""
         # TODO: Could group predicate_values by predicate to gain some efficiency
@@ -120,6 +190,120 @@ class Nodes():
 
         return self
 
+    def get(
+            self,
+            *paths: typing.Union[URI, typing.Iterable[URI]],
+            view: typing.Union[typing.Type[list], typing.Type[dict]] = dict,
+            **view_kwargs,
+            ) -> typing.Any:
+        """Get values or nodes at *paths*.
+        Return an iterator (view=list) or a dict (view=dict) over the results.
+        """
+        # FIXME: user-provided Fetch query AST?
+        # check args
+        if len(paths) == 0:
+            raise AttributeError('expected at least one path, found none')
+        if view not in (dict, list):
+            raise ValueError(f'expected dict or list, found {view}')
+        # process paths: create fetch ast, build name mapping, and find unique paths
+        schema = self.schema
+        statements = set()
+        name2path = {}
+        unique_paths = set() # paths that result in a single (unique) value
+        normpath: typing.Tuple[URI, ...]
+        for idx, path in enumerate(paths):
+            # normalize path
+            if isinstance(path, str):
+                normpath = (URI(path), )
+            elif isinstance(path, abc.Iterable):
+                if not all(isinstance(step, str) for step in path):
+                    raise TypeError(path)
+                normpath = tuple(URI(step) for step in path)
+            else:
+                raise TypeError(path)
+            # check path's schema consistency
+            if not all(schema.has_predicate(pred) for pred in normpath):
+                raise errors.ConsistencyError(f'path is not fully covered by the schema: {path}')
+            # check path's uniqueness
+            if all(schema.predicate(pred).unique for pred in normpath):
+                unique_paths.add(path)
+            # fetch tail predicate
+            tail = schema.predicate(normpath[-1])
+            # determine tail ast node type
+            factory = ast.fetch.Node if isinstance(tail.range, bsc.Node) else ast.fetch.Value
+            # assign name
+            name = f'fetch{idx}'
+            name2path[name] = (path, tail)
+            # create tail ast node
+            curr: ast.fetch.FetchExpression = factory(tail.uri, name)
+            # walk towards front
+            hop: URI
+            for hop in normpath[-2::-1]:
+                curr = ast.fetch.Fetch(hop, curr)
+            # add to fetch query
+            statements.add(curr)
+        # aggregate fetch statements
+        if len(statements) == 1:
+            fetch = next(iter(statements))
+        else:
+            fetch = ast.fetch.All(*statements)
+        # add access controls to fetch
+        fetch = self._ac.fetch_read(self.node_type, fetch)
+
+        # compose filter ast
+        filter = ast.filter.IsIn(self.guids) # pylint: disable=redefined-builtin
+        # add access controls to filter
+        filter = self._ac.filter_read(self.node_type, filter)
+
+        # validate queries
+        validate.Filter(self._backend.schema)(self.node_type, filter)
+        validate.Fetch(self._backend.schema)(self.node_type, fetch)
+
+        # process results, convert if need be
+        def triple_iter():
+            # query the backend
+            triples = self._backend.fetch(self.node_type, filter, fetch)
+            # process triples
+            for root, name, raw in triples:
+                # get node
+                node = Nodes(self._backend, self._user, self.node_type, {root})
+                # get path
+                path, tail = name2path[name]
+                # covert raw to value
+                if isinstance(tail.range, bsc.Node):
+                    value = Nodes(self._backend, self._user, tail.range, {raw})
+                else:
+                    value = raw
+                # emit triple
+                yield node, path, value
+
+        # simplify by default
+        view_kwargs['node'] = view_kwargs.get('node', len(self._guids) != 1)
+        view_kwargs['path'] = view_kwargs.get('path', len(paths) != 1)
+        view_kwargs['value'] = view_kwargs.get('value', False)
+
+        # return results view
+        if view == list:
+            return result.to_list_view(
+                triple_iter(),
+                # aggregation args
+                **view_kwargs,
+                )
+
+        if view == dict:
+            return result.to_dict_view(
+                triple_iter(),
+                # context
+                len(self._guids) == 1,
+                len(paths) == 1,
+                unique_paths,
+                # aggregation args
+                **view_kwargs,
+                )
+
+        raise errors.UnreachableError() # view was already checked
+
+
     def __set(self, predicate: URI, value: typing.Any):
         """
         """
@@ -145,7 +329,7 @@ class Nodes():
         guids = set(self._ensure_nodes(node_type, guids))
 
         # check value
-        if isinstance(pred.range, _schema.Literal):
+        if isinstance(pred.range, bsc.Literal):
             # check write permissions on existing nodes
             # As long as the user has write permissions, we don't restrict
             # the creation or modification of literal values.
@@ -160,8 +344,9 @@ class Nodes():
                 [value],
                 )
 
-        elif isinstance(pred.range, _schema.Node):
+        elif isinstance(pred.range, bsc.Node):
             # check value type
+            # FIXME: value could be a set of Nodes
             if not isinstance(value, Nodes):
                 raise TypeError(value)
             # value's node_type must be a subclass of the predicate's range
@@ -192,7 +377,7 @@ class Nodes():
         else:
             raise errors.UnreachableError()
 
-    def _ensure_nodes(self, node_type: _schema.Node, guids: typing.Iterable[URI]):
+    def _ensure_nodes(self, node_type: bsc.Node, guids: typing.Iterable[URI]):
         """
         """
         # check node existence
diff --git a/bsfs/graph/resolve.py b/bsfs/graph/resolve.py
index 00b778b..4677401 100644
--- a/bsfs/graph/resolve.py
+++ b/bsfs/graph/resolve.py
@@ -41,6 +41,7 @@ class Filter():
         self.schema = schema
 
     def __call__(self, root_type: bsc.Node, node: ast.filter.FilterExpression):
+        # FIXME: node can be None!
         return self._parse_filter_expression(root_type, node)
 
     def _parse_filter_expression(
diff --git a/bsfs/graph/result.py b/bsfs/graph/result.py
new file mode 100644
index 0000000..31822f1
--- /dev/null
+++ b/bsfs/graph/result.py
@@ -0,0 +1,124 @@
+"""
+
+Part of the BlackStar filesystem (bsfs) module.
+A copy of the license is provided with the project.
+Author: Matthias Baumgartner, 2022
+"""
+# imports
+from collections import defaultdict
+import typing
+
+# bsfs imports
+from bsfs.utils import URI
+
+# exports
+__all__: typing.Sequence[str] = (
+    'to_list_view',
+    'to_dict_view',
+    )
+
+
+## code ##
+
+# FIXME: node, path, value seem counter-intuitive:
+# node.get(..., node=True) removes the node part.
+# wouldn't it make more sense if node=True keeps the node part
+# and node=False drops it?
+
+def to_list_view(
+        triples,
+        # aggregators
+        node: bool,
+        path: bool,
+        value: bool, # pylint: disable=unused-argument
+        ):
+    """Return an iterator over results.
+
+    Dependent on the *node*, *path*, and *value* flags,
+    the respective component is omitted.
+
+    """
+    if not node and not path:
+        return iter(val for _, _, val in triples)
+    if not node:
+        return iter((pred, val) for _, pred, val in triples)
+    if not path:
+        return iter((subj, val) for subj, _, val in triples)
+    return iter((subj, pred, val) for subj, pred, val in triples)
+
+
+def to_dict_view(
+        triples,
+        # context
+        one_node: bool,
+        one_path: bool,
+        unique_paths: typing.Set[typing.Union[URI, typing.Iterable[URI]]],
+        # aggregators
+        node: bool,
+        path: bool,
+        value: bool,
+        default: typing.Optional[typing.Any] = None,
+        ) -> typing.Any:
+    """Return a dict of results.
+
+    Note that triples are materialized to create this view.
+
+    The returned structure depends on the *node*, *path*, and *value* flags.
+    If all flags are set to False, returns a dict(node -> dict(path -> set(values))).
+    Setting a flag to true omits or simplifies the respective component (if possible).
+
+    """
+    # NOTE: To create a dict, we need to materialize or make further assumptions
+    # (e.g., sorted in a specific order).
+
+    data: typing.Any # disable type checks on data since it's very flexibly typed.
+
+    # FIXME: type of data can be overwritten later on (if value)
+
+    if not node and not path:
+        data = set()
+    elif node ^ path:
+        data = defaultdict(set)
+    else:
+        data = defaultdict(lambda: defaultdict(set))
+
+    for subj, pred, val in triples:
+        unique = pred in unique_paths
+        if not node and not path:
+            if not value and unique and one_node and one_path:
+                return val
+            data.add(val)
+        elif not node:
+            # remove node from result, group by predicate
+            if not value and unique and one_node:
+                data[pred] = val
+            else:
+                data[pred].add(val)
+        elif not path:
+            # remove predicate from result, group by node
+            if not value and unique and one_path:
+                data[subj] = val
+            else:
+                data[subj].add(val)
+        else:
+            if not value and unique:
+                data[subj][pred] = val
+            else:
+                data[subj][pred].add(val)
+
+    # FIXME: Combine multiple Nodes instances into one?
+
+    # convert defaultdict to ordinary dict
+    # pylint: disable=too-many-boolean-expressions
+    if not node and not path and not value \
+       and len(unique_paths) > 0 and one_node and one_path \
+       and len(data) == 0:
+        return default
+    # pylint: enable=too-many-boolean-expressions
+    if not node and not path:
+        return data
+    if node ^ path:
+        return dict(data)
+    return {key: dict(val) for key, val in data.items()}
+
+## EOF ##
diff --git a/bsfs/graph/walk.py b/bsfs/graph/walk.py
new file mode 100644
index 0000000..1b1cfa0
--- /dev/null
+++ b/bsfs/graph/walk.py
@@ -0,0 +1,120 @@
+"""
+
+Part of the BlackStar filesystem (bsfs) module.
+A copy of the license is provided with the project.
+Author: Matthias Baumgartner, 2022
+"""
+# imports
+from collections import abc
+import typing
+
+# bsfs imports
+from bsfs import schema as bsc
+
+# inner-module imports
+# NOTE: circular import! OK as long as only used for type annotations.
+from . import nodes # pylint: disable=cyclic-import
+
+# exports
+__all__: typing.Sequence[str] = (
+    'Walk',
+    )
+
+
+## code ##
+
+class Walk(abc.Hashable, abc.Callable): # type: ignore [misc] # invalid base class (Callable)
+    """Syntactic sugar for `Nodes` to build and act on predicate paths via members."""
+
+    # Link to Nodes instance.
+    _root: 'nodes.Nodes'
+
+    # Current predicate path.
+    _path: typing.Tuple[bsc.Predicate, ...]
+
+    def __init__(
+            self,
+            root: 'nodes.Nodes',
+            path: typing.Sequence[bsc.Predicate],
+            ):
+        self._root = root
+        self._path = tuple(path)
+
+    @property
+    def tail(self):
+        """Return the node type at the end of the path."""
+        return self._path[-1].range
+
+
+    ## comparison
+
+    def __hash__(self) -> int:
+        """Return an integer hash that identifies the instance."""
+        return hash((type(self), self._root, self._path))
+
+    def __eq__(self, other) -> bool:
+        """Compare against *other* backend."""
+        return isinstance(other, type(self)) \
+           and self._root == other._root \
+           and self._path == other._path
+
+
+    ## representation
+
+    def __repr__(self) -> str:
+        """Return a formal string representation."""
+        path = ', '.join(pred.uri for pred in self._path)
+        return f'Walk({self._root.node_type.uri}, ({path}))'
+
+    def __str__(self) -> str:
+        """Return an informal string representation."""
+        path = ', '.join(pred.uri for pred in self._path)
+        return f'Walk(@{self._root.node_type.uri}: {path})'
+
+
+    ## walk
+
+    @staticmethod
+    def step(
+            schema: bsc.Schema,
+            node: bsc.Node,
+            name: str,
+            ) -> typing.Tuple[bsc.Predicate]:
+        """Get an predicate at *node* whose fragment matches *name*."""
+        predicates = tuple(
+            pred
+            for pred
+            in schema.predicates_at(node)
+            if pred.uri.get('fragment', None) == name
+            )
+        if len(predicates) == 0: # no fragment found for name
+            raise ValueError(f'no available predicate matches {name}') # FIXME: Custom exception
+        if len(predicates) > 1: # ambiguous name
+            raise ValueError(f'{name} matches multiple predicates') # FIXME: Custom exception
+        # append predicate to walk
+        return predicates # type: ignore [return-value] # size is one
+
+    def __getattr__(self, name: str) -> 'Walk':
+        """Alias for `Walk.step(name)`."""
+        try:
+            return super().__getattr__(name)
+        except AttributeError:
+            pass
+        # get predicate
+        pred = self.step(self._root.schema, self.tail, name)
+        # append predicate to walk
+        return Walk(self._root, self._path + pred)
+
+
+    ## get paths ##
+
+    def get(self, **kwargs) -> typing.Any:
+        """Alias for `Nodes.get(..)`."""
+        return self._root.get(tuple(pred.uri for pred in self._path), **kwargs)
+
+    def __call__(self, **kwargs) -> typing.Any: # pylint: disable=arguments-differ
+        """Alias for `Walk.get(...)`."""
+        return self.get(**kwargs)
+
+
+## EOF ##
diff --git a/bsfs/query/ast/__init__.py b/bsfs/query/ast/__init__.py
index 704d051..66b097d 100644
--- a/bsfs/query/ast/__init__.py
+++ b/bsfs/query/ast/__init__.py
@@ -1,6 +1,6 @@
 """Query AST components.
 
-The query AST consists of a Filter syntax tree.
+The query AST consists of a Filter and a Fetch syntax trees.
 
 Classes beginning with an underscore (_) represent internal type hierarchies
 and should not be used for parsing. Note that the AST structures do not
@@ -14,10 +14,12 @@ Author: Matthias Baumgartner, 2022
 import typing
 
 # inner-module imports
+from . import fetch
 from . import filter_ as filter # pylint: disable=redefined-builtin
 
 # exports
 __all__: typing.Sequence[str] = (
+    'fetch',
     'filter',
     )
 
diff --git a/bsfs/query/ast/fetch.py b/bsfs/query/ast/fetch.py
new file mode 100644
index 0000000..d653a8a
--- /dev/null
+++ b/bsfs/query/ast/fetch.py
@@ -0,0 +1,174 @@
+"""
+
+Part of the BlackStar filesystem (bsfs) module.
+A copy of the license is provided with the project.
+Author: Matthias Baumgartner, 2022
+"""
+# imports
+from collections import abc
+import typing
+
+# bsfs imports
+from bsfs.utils import URI, typename, normalize_args
+
+# exports
+__all__ : typing.Sequence[str] = (
+    'All',
+    'Fetch',
+    'FetchExpression',
+    'Node',
+    'This',
+    'Value',
+    )
+
+
+## code ##
+
+class FetchExpression(abc.Hashable):
+    """Generic Fetch expression."""
+
+    def __repr__(self) -> str:
+        """Return the expressions's string representation."""
+        return f'{typename(self)}()'
+
+    def __hash__(self) -> int:
+        """Return the expression's integer representation."""
+        return hash(type(self))
+
+    def __eq__(self, other: typing.Any) -> bool:
+        """Return True if *self* and *other* are equivalent."""
+        return isinstance(other, type(self))
+
+
+class All(FetchExpression):
+    """Fetch all child expressions."""
+
+    # child expressions.
+    expr: typing.Set[FetchExpression]
+
+    def __init__(self, *expr):
+        # unpack child expressions
+        unfolded = set(normalize_args(*expr))
+        # check child expressions
+        if len(unfolded) == 0:
+            raise AttributeError('expected at least one expression, found none')
+        if not all(isinstance(itm, FetchExpression) for itm in unfolded):
+            raise TypeError(expr)
+        # initialize
+        super().__init__()
+        # assign members
+        self.expr = unfolded
+
+    def __iter__(self) -> typing.Iterator[FetchExpression]:
+        return iter(self.expr)
+
+    def __len__(self) -> int:
+        return len(self.expr)
+
+    def __repr__(self) -> str:
+        return f'{typename(self)}({self.expr})'
+
+    def __hash__(self) -> int:
+        return hash((super().__hash__(), tuple(sorted(self.expr, key=repr))))
+
+    def __eq__(self, other: typing.Any) -> bool:
+        return super().__eq__(other) and self.expr == other.expr
+
+
+class _Branch(FetchExpression):
+    """Branch along a predicate."""
+
+    # FIXME: Use a Predicate (like in ast.filter) so that we can also reverse them!
+
+    # predicate to follow.
+    predicate: URI
+
+    def __init__(self, predicate: URI):
+        if not isinstance(predicate, URI):
+            raise TypeError(predicate)
+        self.predicate = predicate
+
+    def __repr__(self) -> str:
+        return f'{typename(self)}({self.predicate})'
+
+    def __hash__(self) -> int:
+        return hash((super().__hash__(), self.predicate))
+
+    def __eq__(self, other: typing.Any) -> bool:
+        return super().__eq__(other) and self.predicate == other.predicate
+
+
+class Fetch(_Branch):
+    """Follow a predicate before evaluating a child epxression."""
+
+    # child expression.
+    expr: FetchExpression
+
+    def __init__(self, predicate: URI, expr: FetchExpression):
+        # check child expressions
+        if not isinstance(expr, FetchExpression):
+            raise TypeError(expr)
+        # initialize
+        super().__init__(predicate)
+        # assign members
+        self.expr = expr
+
+    def __repr__(self) -> str:
+        return f'{typename(self)}({self.predicate}, {self.expr})'
+
+    def __hash__(self) -> int:
+        return hash((super().__hash__(), self.expr))
+
+    def __eq__(self, other: typing.Any) -> bool:
+        return super().__eq__(other) and self.expr == other.expr
+
+
+class _Named(_Branch):
+    """Fetch a (named) symbol at a predicate."""
+
+    # symbol name.
+    name: str
+
+    def __init__(self, predicate: URI, name: str):
+        super().__init__(predicate)
+        self.name = str(name)
+
+    def __repr__(self) -> str:
+        return f'{typename(self)}({self.predicate}, {self.name})'
+
+    def __hash__(self) -> int:
+        return hash((super().__hash__(), self.name))
+
+    def __eq__(self, other: typing.Any) -> bool:
+        return super().__eq__(other) and self.name == other.name
+
+
+class Node(_Named): # pylint: disable=too-few-public-methods
+    """Fetch a Node at a predicate."""
+    # FIXME: Is this actually needed?
+
+
+class Value(_Named): # pylint: disable=too-few-public-methods
+    """Fetch a Literal at a predicate."""
+
+
+class This(FetchExpression):
+    """Fetch the current Node."""
+
+    # symbol name.
+    name: str
+
+    def __init__(self, name: str):
+        super().__init__()
+        self.name = str(name)
+
+    def __repr__(self) -> str:
+        return f'{typename(self)}({self.name})'
+
+    def __hash__(self) -> int:
+        return hash((super().__hash__(), self.name))
+
+    def __eq__(self, other: typing.Any) -> bool:
+        return super().__eq__(other) and self.name == other.name
+
+## EOF ##
diff --git a/bsfs/query/ast/filter_.py b/bsfs/query/ast/filter_.py
index 2f0270c..b29d89e 100644
--- a/bsfs/query/ast/filter_.py
+++ b/bsfs/query/ast/filter_.py
@@ -33,9 +33,6 @@ import typing
 # bsfs imports
 from bsfs.utils import URI, typename, normalize_args
 
-# inner-module imports
-#from . import utils
-
 # exports
 __all__ : typing.Sequence[str] = (
     # base classes
@@ -153,6 +150,7 @@ class _Agg(FilterExpression, abc.Collection):
         # check type
         if not all(isinstance(e, FilterExpression) for e in unfolded):
             raise TypeError(expr)
+        # FIXME: Require at least one child expression?
         # assign member
         self.expr = unfolded
 
@@ -172,7 +170,7 @@ class _Agg(FilterExpression, abc.Collection):
         return f'{typename(self)}({self.expr})'
 
     def __hash__(self) -> int:
-        return hash((super().__hash__(), tuple(self.expr))) # FIXME: Unique hash of different orders over self.expr
+        return hash((super().__hash__(), tuple(sorted(self.expr, key=repr))))
 
     def __eq__(self, other) -> bool:
         return super().__eq__(other) and self.expr == other.expr
@@ -449,20 +447,72 @@ class OneOf(PredicateExpression, abc.Collection):
         return f'{typename(self)}({self.expr})'
 
     def __hash__(self) -> int:
-        return hash((super().__hash__(), tuple(self.expr))) # FIXME: Unique hash of different orders over self.expr
+        return hash((super().__hash__(), tuple(sorted(self.expr, key=repr))))
 
     def __eq__(self, other) -> bool:
         return super().__eq__(other) and self.expr == other.expr
 
 
 # Helpers
+# invalid-name is disabled since they explicitly mimic an expression
 
-def IsIn(*values): # pylint: disable=invalid-name # explicitly mimics an expression
+def IsIn(*values) -> FilterExpression: # pylint: disable=invalid-name
     """Match any of the given URIs."""
-    return Or(Is(value) for value in normalize_args(*values))
-
-def IsNotIn(*values): # pylint: disable=invalid-name # explicitly mimics an expression
+    args = normalize_args(*values)
+    if len(args) == 0:
+        raise AttributeError('expected at least one value, found none')
+    if len(args) == 1:
+        return Is(args[0])
+    return Or(Is(value) for value in args)
+
+def IsNotIn(*values) -> FilterExpression: # pylint: disable=invalid-name
     """Match none of the given URIs."""
     return Not(IsIn(*values))
 
+
+def Between( # pylint: disable=invalid-name
+        lo: float = float('-inf'),
+        hi: float = float('inf'),
+        lo_strict: bool = True,
+        hi_strict: bool = True,
+        ) -> FilterExpression :
+    """Match numerical values between *lo* and *hi*. Include bounds if strict is False."""
+    if abs(lo) == hi == float('inf'):
+        raise ValueError('range cannot be INF on both sides')
+    if lo > hi:
+        raise ValueError(f'lower bound ({lo}) cannot be less than upper bound ({hi})')
+    if lo == hi and not lo_strict and not hi_strict:
+        return Equals(lo)
+    if lo == hi: # either bound is strict
+        raise ValueError('bounds cannot be equal when either is strict')
+    if lo != float('-inf') and hi != float('inf'):
+        return And(GreaterThan(lo, lo_strict), LessThan(hi, hi_strict))
+    if lo != float('-inf'):
+        return GreaterThan(lo, lo_strict)
+    # hi != float('inf'):
+    return LessThan(hi, hi_strict)
+
+
+def Includes(*values, approx: bool = False) -> FilterExpression: # pylint: disable=invalid-name
+    """Match any of the given *values*. Uses `Substring` if *approx* is set."""
+    args = normalize_args(*values)
+    cls = Substring if approx else Equals
+    if len(args) == 0:
+        raise AttributeError('expected at least one value, found none')
+    if len(args) == 1:
+        return cls(args[0])
+    return Or(cls(v) for v in args)
+
+
+def Excludes(*values, approx: bool = False) -> FilterExpression: # pylint: disable=invalid-name
+    """Match none of the given *values*. Uses `Substring` if *approx* is set."""
+    args = normalize_args(*values)
+    cls = Substring if approx else Equals
+    if len(args) == 0:
+        raise AttributeError('expected at least one value, found none')
+    if len(args) == 1:
+        return Not(cls(args[0]))
+    return Not(Or(cls(v) for v in args))
+
+
 ## EOF ##
diff --git a/bsfs/query/matcher.py b/bsfs/query/matcher.py
new file mode 100644
index 0000000..a910756
--- /dev/null
+++ b/bsfs/query/matcher.py
@@ -0,0 +1,366 @@
+"""
+
+Part of the BlackStar filesystem (bsfs) module.
+A copy of the license is provided with the project.
+Author: Matthias Baumgartner, 2022
+"""
+# imports
+from collections import defaultdict
+from itertools import product
+from time import time
+import random
+import threading
+import typing
+
+# external imports
+from hopcroftkarp import HopcroftKarp
+
+# bsfs imports
+from bsfs.utils import errors, typename
+
+# inner-module imports
+from . import ast
+
+# exports
+__all__ : typing.Sequence[str] = (
+    'Filter',
+    )
+
+
+## code ##
+
+class Any(ast.filter.FilterExpression, ast.filter.PredicateExpression):
+    """Match any ast class.
+
+    Note that Any instances are unique, i.e. they do not compare, and
+    can hence be repeated in a set:
+    >>> Any() == Any()
+    False
+    >>> len({Any(), Any(), Any(), Any()})
+    4
+
+    """
+
+    # unique instance id
+    _uid: typing.Tuple[int, int, float, float]
+
+    def __init__(self):
+        self._uid = (
+            id(self),
+            id(threading.current_thread()),
+            time(),
+            random.random(),
+            )
+
+    def __eq__(self, other: typing.Any):
+        return super().__eq__(other) and self._uid == other._uid
+
+    def __hash__(self):
+        return hash((super().__hash__(), self._uid))
+
+
+class Rest(ast.filter.FilterExpression, ast.filter.PredicateExpression):
+    """Match the leftovers in a set of items to be compared.
+
+    Rest can be used in junction with aggregating expressions such as ast.filter.And,
+    ast.filter.Or, ast.filter.OneOf. It controls childs expressions that were not yet
+    consumed by other matching rules. Rest may match to only a specific expression.
+    The expresssion defaults to Any().
+
+    For example, the following to ast structures would match since Rest
+    allows an arbitrary repetition of ast.filter.Equals statements.
+
+    >>> And(Equals('hello'), Equals('world'), Equals('foobar'))
+    >>> And(Equals('world'), Rest(Partial(Equals)))
+
+    """
+
+    # child expression for the Rest.
+    expr: typing.Union[ast.filter.FilterExpression, ast.filter.PredicateExpression]
+
+    def __init__(
+            self,
+            expr: typing.Optional[typing.Union[ast.filter.FilterExpression, ast.filter.PredicateExpression]] = None,
+            ):
+        if expr is None:
+            expr = Any()
+        self.expr = expr
+
+    def __repr__(self) -> str:
+        return f'{typename(self)}({self.expr})'
+
+    def __hash__(self) -> int:
+        return hash((super().__hash__(), self.expr))
+
+    def __eq__(self, other: typing.Any) -> bool:
+        return super().__eq__(other) and self.expr == other.expr
+
+
+class Partial(ast.filter.FilterExpression, ast.filter.PredicateExpression):
+    """Match a partially defined ast expression.
+
+    Literal values might be irrelevant or unknown when comparing two ast
+    structures. Partial allows to constrain the matcher to a certain
+    ast class, while leaving some of its members unspecified.
+
+    Pass the class (not instance) and its members as keyword arguments
+    to Partial. Note that the arguments are not validated.
+
+    For example, the following instance matches any ast.filter.Equals,
+    irrespective of its value:
+
+    >>> Partial(ast.filter.Equals)
+
+    Likewise, the following instance matches any ast.filter.LessThan
+    that has a strict bounds, but makes no claim about the threshold:
+
+    >>> Partial(ast.filter.LessThan, strict=False)
+
+    """
+
+    # target node type.
+    node: typing.Type
+
+    # node construction args.
+    kwargs: typing.Dict[str, typing.Any]
+
+    def __init__(
+            self,
+            node: typing.Type,
+            **kwargs,
+            ):
+        self.node = node
+        self.kwargs = kwargs
+
+    def __repr__(self) -> str:
+        return f'{typename(self)}({self.node.__name__}, {self.kwargs})'
+
+    def __hash__(self) -> int:
+        kwargs = tuple((key, self.kwargs[key]) for key in sorted(self.kwargs))
+        return hash((super().__hash__(), self.node, kwargs))
+
+    def __eq__(self, other: typing.Any) -> bool:
+        return super().__eq__(other) \
+           and self.node == other.node \
+           and self.kwargs == other.kwargs
+
+    def match(
+            self,
+            name: str,
+            value: typing.Any,
+            ) -> bool:
+        """Return True if *name* is unspecified or matches *value*."""
+        return name not in self.kwargs or self.kwargs[name] == value
+
+
+T_ITEM_TYPE = typing.TypeVar('T_ITEM_TYPE') # pylint: disable=invalid-name
+
+def _set_matcher(
+        query: typing.Collection[T_ITEM_TYPE],
+        reference: typing.Collection[T_ITEM_TYPE],
+        cmp: typing.Callable[[T_ITEM_TYPE, T_ITEM_TYPE], bool],
+        ) -> bool:
+    """Compare two sets of child expressions.
+
+    This check has a best-case complexity of O(|N|**2) and worst-case
+    complexity of O(|N|**3), with N the number of child expressions.
+    """
+    # get reference items
+    r_items = list(reference)
+    # deal with Rest
+    r_rest = {itm for itm in r_items if isinstance(itm, Rest)}
+    if len(r_rest) > 1:
+        raise errors.BackendError(f'there must be at most one Rest instance per set, found {len(r_rest)}')
+    if len(r_rest) == 1:
+        # replace Rest by filling the reference up with rest's expression
+        # NOTE: convert r_items to list so that items can be repeated
+        expr = next(iter(r_rest)).expr # type: ignore [attr-defined]
+        r_items = [itm for itm in r_items if not isinstance(itm, Rest)]
+        r_items += [expr for _ in range(len(query) - len(r_items))] # type: ignore [misc]
+    # sanity check: cannot match if the item sizes differ:
+    # either a reference item is unmatched (len(r_items) > len(query))
+    # or a query item is unmatched (len(r_items) < len(query))
+    if len(query) != len(r_items):
+        return False
+
+    # To have a positive match between the query and the reference,
+    # each query expr has to match any reference expr.
+    # However, each reference expr can only be "consumed" once even
+    # if it matches multiple query exprs (e.g., the Any expression matches
+    # every query expr).
+    # This is a bipartide matching problem (Hall's marriage problem)
+    # and the Hopcroft-Karp-Karzanov algorithm finds a maximum
+    # matching. While there might be multiple maximum matchings,
+    # we only need to know whether (at least) one complete matching
+    # exists. The hopcroftkarp module provides this functionality.
+    # The HKK algorithm has worst-case complexity of O(|N|**2 * sqrt(|N|))
+    # and we also need to compare expressions pairwise, hence O(|N|**2).
+    num_items = len(r_items)
+    graph = defaultdict(set)
+    # build the bipartide graph as {lhs: {rhs}, ...}
+    # lhs and rhs must be disjoint identifiers.
+    for (ridx, ref), (nidx, node) in product(enumerate(r_items), enumerate(query)):
+        # add edges for equal expressions
+        if cmp(node, ref):
+            graph[ridx].add(num_items + nidx)
+
+    # maximum_matching returns the matches for all nodes in the graph
+    # ({ref_itm: node_itm}), hence a complete matching's size is
+    # the number of reference's child expressions.
+    return len(HopcroftKarp(graph).maximum_matching(keys_only=True)) == num_items
+
+
+class Filter():
+    """Compare a bsfs.query.ast.filter` query's structure to a reference ast.
+
+    The reference ast may include `Rest`, `Partial`, or `Any` to account for irrelevant
+    or unknown ast pieces.
+
+    This is only a structural comparison, not a semantic one. For example, the
+    two following queries are semantically identical, but structurally different,
+    and would therefore not match:
+
+    >>> ast.filter.OneOf(ast.filter.Predicate(ns.bse.filename))
+    >>> ast.filter.Predicate(ns.bse.filename)
+
+    """
+
+    def __call__(self, query: ast.filter.FilterExpression, reference: ast.filter.FilterExpression) -> bool:
+        """Compare a *query* to a *reference* ast structure.
+        Return True if both are structurally equivalent.
+        """
+        if not isinstance(query, ast.filter.FilterExpression):
+            raise errors.BackendError(f'expected filter expression, found {query}')
+        if not isinstance(reference, ast.filter.FilterExpression):
+            raise errors.BackendError(f'expected filter expression, found {reference}')
+        return self._parse_filter_expression(query, reference)
+
+    def _parse_filter_expression(
+            self,
+            node: ast.filter.FilterExpression,
+            reference: ast.filter.FilterExpression,
+            ) -> bool:
+        """Route *node* to the handler of the respective FilterExpression subclass."""
+        # generic checks: reference type must be Any or match node type
+        if isinstance(reference, Any):
+            return True
+        # node-specific checks
+        if isinstance(node, ast.filter.Not):
+            return self._not(node, reference)
+        if isinstance(node, ast.filter.Has):
+            return self._has(node, reference)
+        if isinstance(node, ast.filter.Distance):
+            return self._distance(node, reference)
+        if isinstance(node, (ast.filter.Any, ast.filter.All)):
+            return self._branch(node, reference)
+        if isinstance(node, (ast.filter.And, ast.filter.Or)):
+            return self._agg(node, reference)
+        if isinstance(node, (ast.filter.Is, ast.filter.Equals, ast.filter.Substring,
+                             ast.filter.StartsWith, ast.filter.EndsWith)):
+            return self._value(node, reference)
+        if isinstance(node, (ast.filter.LessThan, ast.filter.GreaterThan)):
+            return self._bounded(node, reference)
+        # invalid node
+        raise errors.BackendError(f'expected filter expression, found {node}')
+
+    def _parse_predicate_expression(
+            self,
+            node: ast.filter.PredicateExpression,
+            reference: ast.filter.PredicateExpression,
+            ) -> bool:
+        """Route *node* to the handler of the respective PredicateExpression subclass."""
+        if isinstance(reference, Any):
+            return True
+        if isinstance(node, ast.filter.Predicate):
+            return self._predicate(node, reference)
+        if isinstance(node, ast.filter.OneOf):
+            return self._one_of(node, reference)
+        # invalid node
+        raise errors.BackendError(f'expected predicate expression, found {node}')
+
+    def _one_of(self, node: ast.filter.OneOf, reference: ast.filter.PredicateExpression) -> bool:
+        if not isinstance(reference, type(node)):
+            return False
+        return _set_matcher(node, reference, self._parse_predicate_expression)
+
+    def _predicate(self, node: ast.filter.Predicate, reference: ast.filter.PredicateExpression) -> bool:
+        if not isinstance(reference, (Partial, type(node))):
+            return False
+        # partial check
+        if isinstance(reference, Partial):
+            if not isinstance(node, reference.node):
+                return False
+            return reference.match('predicate', node.predicate) \
+               and reference.match('reverse', node.reverse)
+        # full check
+        return node.predicate == reference.predicate \
+           and node.reverse == reference.reverse
+
+    def _branch(self,
+            node: typing.Union[ast.filter.Any, ast.filter.All],
+            reference: ast.filter.FilterExpression,
+            ) -> bool:
+        if not isinstance(reference, type(node)):
+            return False
+        if not self._parse_predicate_expression(node.predicate, reference.predicate): # type: ignore [attr-defined]
+            return False
+        if not self._parse_filter_expression(node.expr, reference.expr): # type: ignore [attr-defined]
+            return False
+        return True
+
+    def _agg(self, node: typing.Union[ast.filter.And, ast.filter.Or], reference: ast.filter.FilterExpression) -> bool:
+        if not isinstance(reference, type(node)):
+            return False
+        return _set_matcher(node, reference, self._parse_filter_expression) # type: ignore [arg-type]
+
+    def _not(self, node: ast.filter.Not, reference: ast.filter.FilterExpression) -> bool:
+        if not isinstance(reference, type(node)):
+            return False
+        return self._parse_filter_expression(node.expr, reference.expr)
+
+    def _has(self, node: ast.filter.Has, reference: ast.filter.FilterExpression) -> bool:
+        if not isinstance(reference, type(node)):
+            return False
+        return self._parse_predicate_expression(node.predicate, reference.predicate) \
+           and self._parse_filter_expression(node.count, reference.count)
+
+    def _distance(self, node: ast.filter.Distance, reference: ast.filter.FilterExpression) -> bool:
+        if not isinstance(reference, (Partial, type(node))):
+            return False
+        # partial check
+        if isinstance(reference, Partial):
+            if not isinstance(node, reference.node):
+                return False
+            return reference.match('reference', node.reference) \
+               and reference.match('threshold', node.threshold) \
+               and reference.match('strict', node.strict)
+        # full check
+        return node.reference == reference.reference \
+           and node.threshold == reference.threshold \
+           and node.strict == reference.strict
+
+    def _value(self, node: ast.filter._Value, reference: ast.filter.FilterExpression) -> bool:
+        if not isinstance(reference, (Partial, type(node))):
+            return False
+        # partial check
+        if isinstance(reference, Partial):
+            if not isinstance(node, reference.node):
+                return False
+            return reference.match('value', node.value)
+        # full ckeck
+        return node.value == reference.value
+
+    def _bounded(self, node: ast.filter._Bounded, reference: ast.filter.FilterExpression) -> bool:
+        if not isinstance(reference, (Partial, type(node))):
+            return False
+        # partial check
+        if isinstance(reference, Partial):
+            if not isinstance(node, reference.node):
+                return False
+            return reference.match('threshold', node.threshold) \
+               and reference.match('strict', node.strict)
+        # full check
+        return node.threshold == reference.threshold \
+           and node.strict == reference.strict
+
+## EOF ##
diff --git a/bsfs/query/validator.py b/bsfs/query/validator.py
index 904ac14..f0aa795 100644
--- a/bsfs/query/validator.py
+++ b/bsfs/query/validator.py
@@ -20,6 +20,7 @@ __all__ : typing.Sequence[str] = (
     'Filter',
     )
 
+# FIXME: Split into a submodule and the two classes into their own respective files.
 
 ## code ##
 
@@ -49,7 +50,7 @@ class Filter():
         """
         # root_type must be a schema.Node
         if not isinstance(root_type, bsc.Node):
-            raise TypeError(f'Expected a node, found {typename(root_type)}')
+            raise TypeError(f'expected a node, found {typename(root_type)}')
         # root_type must exist in the schema
         if root_type not in self.schema.nodes():
             raise errors.ConsistencyError(f'{root_type} is not defined in the schema')
@@ -223,4 +224,125 @@ class Filter():
         # FIXME: Check if node.value corresponds to type_
 
 
+class Fetch():
+    """Validate a `bsfs.query.ast.fetch` query's structure and schema compliance.
+
+    * Value can only be applied on literals
+    * Node can only be applied on nodes
+    * Names must be non-empty
+    * Branching nodes' predicates must match the type
+    * Symbols must be in the schema
+    * Predicates must follow the schema
+
+    """
+
+    # schema to validate against.
+    schema: bsc.Schema
+
+    def __init__(self, schema: bsc.Schema):
+        self.schema = schema
+
+    def __call__(self, root_type: bsc.Node, query: ast.fetch.FetchExpression):
+        """Validate a fetch *query*, assuming the subject having *root_type*.
+
+        Raises a `bsfs.utils.errors.ConsistencyError` if the query violates the schema.
+        Raises a `bsfs.utils.errors.BackendError` if the query structure is invalid.
+
+        """
+        # root_type must be a schema.Node
+        if not isinstance(root_type, bsc.Node):
+            raise TypeError(f'expected a node, found {typename(root_type)}')
+        # root_type must exist in the schema
+        if root_type not in self.schema.nodes():
+            raise errors.ConsistencyError(f'{root_type} is not defined in the schema')
+        # query must be a FetchExpression
+        if not isinstance(query, ast.fetch.FetchExpression):
+            raise TypeError(f'expected a fetch expression, found {typename(query)}')
+        # check root expression
+        self._parse_fetch_expression(root_type, query)
+        # all tests passed
+        return True
+
+    def _parse_fetch_expression(self, type_: bsc.Vertex, node: ast.fetch.FetchExpression):
+        """Route *node* to the handler of the respective FetchExpression subclass."""
+        if isinstance(node, (ast.fetch.Fetch, ast.fetch.Value, ast.fetch.Node)):
+            # NOTE: don't return so that checks below are executed
+            self._branch(type_, node)
+        if isinstance(node, (ast.fetch.Value, ast.fetch.Node)):
+            # NOTE: don't return so that checks below are executed
+            self._named(type_, node)
+        if isinstance(node, ast.fetch.All):
+            return self._all(type_, node)
+        if isinstance(node, ast.fetch.Fetch):
+            return self._fetch(type_, node)
+        if isinstance(node, ast.fetch.Value):
+            return self._value(type_, node)
+        if isinstance(node, ast.fetch.Node):
+            return self._node(type_, node)
+        if isinstance(node, ast.fetch.This):
+            return self._this(type_, node)
+        # invalid node
+        raise errors.BackendError(f'expected fetch expression, found {node}')
+
+    def _all(self, type_: bsc.Vertex, node: ast.fetch.All):
+        # check child expressions
+        for expr in node:
+            self._parse_fetch_expression(type_, expr)
+
+    def _branch(self, type_: bsc.Vertex, node: ast.fetch._Branch):
+        # type is a node
+        if not isinstance(type_, bsc.Node):
+            raise errors.ConsistencyError(f'expected a Node, found {type_}')
+        # node exists in the schema
+        if type_ not in self.schema.nodes():
+            raise errors.ConsistencyError(f'node {type_} is not in the schema')
+        # predicate exists in the schema
+        if not self.schema.has_predicate(node.predicate):
+            raise errors.ConsistencyError(f'predicate {node.predicate} is not in the schema')
+        pred = self.schema.predicate(node.predicate)
+        # type_ must be a subclass of domain
+        if not type_ <= pred.domain:
+            raise errors.ConsistencyError(
+                f'expected type {pred.domain} or subtype thereof, found {type_}')
+
+    def _fetch(self, type_: bsc.Vertex, node: ast.fetch.Fetch): # pylint: disable=unused-argument # type_ was considered in _branch
+        # range must be a node
+        rng = self.schema.predicate(node.predicate).range
+        if not isinstance(rng, bsc.Node):
+            raise errors.ConsistencyError(
+                f'expected the predicate\'s range to be a Node, found {rng}')
+        # child expression must be valid
+        self._parse_fetch_expression(rng, node.expr)
+
+    def _named(self, type_: bsc.Vertex, node: ast.fetch._Named): # pylint: disable=unused-argument # type_ was considered in _branch
+        # name must be set
+        if node.name.strip() == '':
+            raise errors.BackendError('node name cannot be empty')
+        # FIXME: check for double name use?
+
+    def _node(self, type_: bsc.Vertex, node: ast.fetch.Node): # pylint: disable=unused-argument # type_ was considered in _branch
+        # range must be a node
+        rng = self.schema.predicate(node.predicate).range
+        if not isinstance(rng, bsc.Node):
+            raise errors.ConsistencyError(
+                f'expected the predicate\'s range to be a Node, found {rng}')
+
+    def _value(self, type_: bsc.Vertex, node: ast.fetch.Value): # pylint: disable=unused-argument # type_ was considered in _branch
+        # range must be a literal
+        rng = self.schema.predicate(node.predicate).range
+        if not isinstance(rng, bsc.Literal):
+            raise errors.ConsistencyError(
+                f'expected the predicate\'s range to be a Literal, found {rng}')
+
+    def _this(self, type_: bsc.Vertex, node: ast.fetch.This):
+        # type is a node
+        if not isinstance(type_, bsc.Node):
+            raise errors.ConsistencyError(f'expected a Node, found {type_}')
+        # node exists in the schema
+        if type_ not in self.schema.nodes():
+            raise errors.ConsistencyError(f'node {type_} is not in the schema')
+        # name must be set
+        if node.name.strip() == '':
+            raise errors.BackendError('node name cannot be empty')
+
 ## EOF ##
diff --git a/bsfs/schema/schema.py b/bsfs/schema/schema.py
index 8d9a821..0de4203 100644
--- a/bsfs/schema/schema.py
+++ b/bsfs/schema/schema.py
@@ -69,6 +69,7 @@ class Schema():
         literals.add(types.ROOT_LITERAL)
         predicates.add(types.ROOT_PREDICATE)
         # add minimally necessary types to the schema
+        literals.add(types.ROOT_BLOB)
         literals.add(types.ROOT_NUMBER)
         literals.add(types.ROOT_TIME)
         literals.add(types.ROOT_ARRAY)
@@ -312,4 +313,8 @@ class Schema():
         """Return the Literal matching the *uri*."""
         return self._literals[uri]
 
+    def predicates_at(self, node: types.Node) -> typing.Iterator[types.Predicate]:
+        """Return predicates that have domain *node* (or superclass thereof)."""
+        return iter(pred for pred in self._predicates.values() if node <= pred.domain)
+
 ## EOF ##
diff --git a/bsfs/schema/types.py b/bsfs/schema/types.py
index 3a2e10c..12e7e94 100644
--- a/bsfs/schema/types.py
+++ b/bsfs/schema/types.py
@@ -380,6 +380,11 @@ ROOT_LITERAL = Literal(
     parent=None,
     )
 
+ROOT_BLOB = Literal(
+    uri=ns.bsfs.BinaryBlob,
+    parent=ROOT_LITERAL,
+    )
+
 ROOT_NUMBER = Literal(
     uri=ns.bsfs.Number,
     parent=ROOT_LITERAL,
diff --git a/bsfs/triple_store/base.py b/bsfs/triple_store/base.py
index 7e03714..1baa63b 100644
--- a/bsfs/triple_store/base.py
+++ b/bsfs/triple_store/base.py
@@ -11,7 +11,7 @@ import typing
 # inner-module imports
 from bsfs.query import ast
 from bsfs.utils import URI, typename
-import bsfs.schema  as _schema
+import bsfs.schema  as bsc
 
 # exports
 __all__: typing.Sequence[str] = (
@@ -82,12 +82,12 @@ class TripleStoreBase(abc.ABC):
 
     @property
     @abc.abstractmethod
-    def schema(self) -> _schema.Schema:
+    def schema(self) -> bsc.Schema:
         """Return the store's local schema."""
 
     @schema.setter
     @abc.abstractmethod
-    def schema(self, schema: _schema.Schema):
+    def schema(self, schema: bsc.Schema):
         """Migrate to new schema by adding or removing class definitions.
 
         Commits before and after the migration.
@@ -112,17 +112,28 @@ class TripleStoreBase(abc.ABC):
     @abc.abstractmethod
     def get(
             self,
-            node_type: _schema.Node,
-            query: typing.Optional[ast.filter.FilterExpression] = None,
+            node_type: bsc.Node,
+            filter: typing.Optional[ast.filter.FilterExpression] = None, # pylint: disable=redefined-builtin
             ) -> typing.Iterator[URI]:
-        """Return guids of nodes of type *node_type* that match the *query*.
-        Return all guids of the respective type if *query* is None.
+        """Return guids of nodes of type *node_type* that match the *filter*.
+        Return all guids of the respective type if *filter* is None.
+        """
+
+    @abc.abstractmethod
+    def fetch(
+            self,
+            node_type: bsc.Node,
+            filter: ast.filter.FilterExpression, # pylint: disable=redefined-builtin
+            fetch: ast.fetch.FetchExpression,
+            ) -> typing.Iterator[typing.Tuple[URI, str, typing.Any]]:
+        """Return (guid, name, value) triples where the guid is determined by the *filter*
+        query and the name matches the *fetch* query.
         """
 
     @abc.abstractmethod
     def exists(
             self,
-            node_type: _schema.Node,
+            node_type: bsc.Node,
             guids: typing.Iterable[URI],
             ) -> typing.Iterable[URI]:
         """Return those *guids* that exist and have type *node_type* or a subclass thereof."""
@@ -130,7 +141,7 @@ class TripleStoreBase(abc.ABC):
     @abc.abstractmethod
     def create(
             self,
-            node_type: _schema.Node,
+            node_type: bsc.Node,
             guids: typing.Iterable[URI],
             ):
         """Create *guid* nodes with type *subject*."""
@@ -138,9 +149,9 @@ class TripleStoreBase(abc.ABC):
     @abc.abstractmethod
     def set(
             self,
-            node_type: _schema.Node, # FIXME: is the node_type even needed? Couldn't I infer from the predicate?
+            node_type: bsc.Node, # FIXME: is the node_type even needed? Couldn't I infer from the predicate?
             guids: typing.Iterable[URI],
-            predicate: _schema.Predicate,
+            predicate: bsc.Predicate,
             values: typing.Iterable[typing.Any],
             ):
         """Add triples to the graph.
diff --git a/bsfs/triple_store/sparql/parse_fetch.py b/bsfs/triple_store/sparql/parse_fetch.py
new file mode 100644
index 0000000..20d4e74
--- /dev/null
+++ b/bsfs/triple_store/sparql/parse_fetch.py
@@ -0,0 +1,109 @@
+"""
+
+Part of the BlackStar filesystem (bsfs) module.
+A copy of the license is provided with the project.
+Author: Matthias Baumgartner, 2022
+"""
+# standard imports
+import typing
+
+# bsfs imports
+from bsfs import schema as bsc
+from bsfs.query import ast
+from bsfs.utils import errors
+
+# inner-module imports
+from .utils import GenHopName, Query
+
+# exports
+__all__: typing.Sequence[str] = (
+    'Fetch',
+    )
+
+
+## code ##
+
+class Fetch():
+    """Translate `bsfs.query.ast.fetch` structures into Sparql queries."""
+
+    def __init__(self, schema):
+        self.schema = schema
+        self.ngen = GenHopName(prefix='?fch')
+
+    def __call__(
+            self,
+            root_type: bsc.Node,
+            root: ast.fetch.FetchExpression,
+            ) -> Query:
+        """
+        """
+        # check root_type
+        if not isinstance(root_type, bsc.Node):
+            raise errors.BackendError(f'expected Node, found {root_type}')
+        if root_type not in self.schema.nodes():
+            raise errors.ConsistencyError(f'node {root_type} is not in the schema')
+        # parse root
+        terms, expr = self._parse_fetch_expression(root_type, root, '?ent')
+        # assemble query
+        return Query(
+            root_type=root_type.uri,
+            root_head='?ent',
+            select=terms,
+            where=expr,
+            )
+
+    def _parse_fetch_expression(
+            self,
+            node_type: bsc.Vertex,
+            node: ast.fetch.FetchExpression,
+            head: str,
+            ):
+        """Route *node* to the handler of the respective FetchExpression subclass."""
+        if isinstance(node, ast.fetch.All):
+            return self._all(node_type, node, head)
+        if isinstance(node, ast.fetch.Fetch):
+            return self._fetch(node_type, node, head)
+        if isinstance(node, ast.fetch.Node):
+            return self._node(node_type, node, head)
+        if isinstance(node, ast.fetch.Value):
+            return self._value(node_type, node, head)
+        if isinstance(node, ast.fetch.This):
+            return self._this(node_type, node, head)
+        # invalid node
+        raise errors.BackendError(f'expected fetch expression, found {node}')
+
+    def _all(self, node_type: bsc.Vertex, node: ast.fetch.All, head: str):
+        # child expressions
+        terms, exprs = zip(*[self._parse_fetch_expression(node_type, expr, head) for expr in node])
+        terms = {term for sub in terms for term in sub}
+        exprs = ' .\n'.join({expr for expr in exprs if len(expr.strip()) > 0})
+        return terms, exprs
+
+    def _fetch(self, node_type: bsc.Vertex, node: ast.fetch.Fetch, head: str): # pylint: disable=unused-argument # (node_type)
+        # child expressions
+        rng = self.schema.predicate(node.predicate).range
+        nexthead = next(self.ngen)
+        terms, expr = self._parse_fetch_expression(rng, node.expr, nexthead)
+        return terms, f'OPTIONAL{{ {head} <{node.predicate}> {nexthead} .\n {expr} }}'
+
+    def _node(self, node_type: bsc.Vertex, node: ast.fetch.Node, head: str): # pylint: disable=unused-argument # (node_type)
+        if f'?{node.name}'.startswith(self.ngen.prefix):
+            raise errors.BackendError(f'Node name must start with {self.ngen.prefix}')
+        # compose and return statement
+        term = next(self.ngen)
+        return {(term, node.name)}, f'OPTIONAL{{ {head} <{node.predicate}> {term} }}'
+
+    def _value(self, node_type: bsc.Vertex, node: ast.fetch.Value, head: str): # pylint: disable=unused-argument # (node_type)
+        if f'?{node.name}'.startswith(self.ngen.prefix):
+            raise errors.BackendError(f'Value name must start with {self.ngen.prefix}')
+        # compose and return statement
+        term = next(self.ngen)
+        return {(term, node.name)}, f'OPTIONAL{{ {head} <{node.predicate}> {term} }}'
+
+    def _this(self, node_type: bsc.Vertex, node: ast.fetch.This, head: str): # pylint: disable=unused-argument # (node_type)
+        if f'?{node.name}'.startswith(self.ngen.prefix):
+            raise errors.BackendError(f'This name must start with {self.ngen.prefix}')
+        # compose and return statement
+        return {(head, node.name)}, ''
+
+## EOF ##
diff --git a/bsfs/triple_store/sparql/parse_filter.py b/bsfs/triple_store/sparql/parse_filter.py
index 8b6b976..dca0aea 100644
--- a/bsfs/triple_store/sparql/parse_filter.py
+++ b/bsfs/triple_store/sparql/parse_filter.py
@@ -19,6 +19,7 @@ from bsfs.utils import URI, errors
 
 # inner-module imports
 from .distance import DISTANCE_FU
+from .utils import GenHopName, Query
 
 # exports
 __all__: typing.Sequence[str] = (
@@ -28,25 +29,6 @@ __all__: typing.Sequence[str] = (
 
 ## code ##
 
-class _GenHopName():
-    """Generator that produces a new unique symbol name with each iteration."""
-
-    # Symbol name prefix.
-    prefix: str
-
-    # Current counter.
-    curr: int
-
-    def __init__(self, prefix: str = '?hop', start: int = 0):
-        self.prefix = prefix
-        self.curr = start - 1
-
-    def __next__(self):
-        """Generate and return the next unique name."""
-        self.curr += 1
-        return self.prefix + str(self.curr)
-
-
 class Filter():
     """Translate `bsfs.query.ast.filter` structures into Sparql queries."""
 
@@ -54,18 +36,18 @@ class Filter():
     schema: bsc.Schema
 
     # Generator that produces unique symbol names.
-    ngen: _GenHopName
+    ngen: GenHopName
 
     def __init__(self, graph, schema):
         self.graph = graph
         self.schema = schema
-        self.ngen = _GenHopName()
+        self.ngen = GenHopName(prefix='?flt')
 
     def __call__(
             self,
             root_type: bsc.Node,
             root: typing.Optional[ast.filter.FilterExpression] = None,
-            ) -> str:
+            ) -> Query:
         """
         """
         # check root_type
@@ -79,15 +61,18 @@ class Filter():
         else:
             cond = self._parse_filter_expression(root_type, root, '?ent')
         # assemble query
-        return f'''
-            SELECT ?ent
-            WHERE {{
-                ?ent <{ns.rdf.type}>/<{ns.rdfs.subClassOf}>* <{root_type.uri}> .
-                {cond}
-            }}
-            '''
+        return Query(
+            root_type=root_type.uri,
+            root_head='?ent',
+            where=cond,
+            )
 
-    def _parse_filter_expression(self, type_: bsc.Vertex, node: ast.filter.FilterExpression, head: str) -> str:
+    def _parse_filter_expression(
+            self,
+            type_: bsc.Vertex,
+            node: ast.filter.FilterExpression,
+            head: str,
+            ) -> str:
         """Route *node* to the handler of the respective FilterExpression subclass."""
         if isinstance(node, ast.filter.Is):
             return self._is(type_, node, head)
diff --git a/bsfs/triple_store/sparql/sparql.py b/bsfs/triple_store/sparql/sparql.py
index fedd227..dbf9d45 100644
--- a/bsfs/triple_store/sparql/sparql.py
+++ b/bsfs/triple_store/sparql/sparql.py
@@ -5,8 +5,11 @@ A copy of the license is provided with the project.
 Author: Matthias Baumgartner, 2022
 """
 # imports
+import base64
 import itertools
 import typing
+
+# external imports
 import rdflib
 
 # bsfs imports
@@ -16,6 +19,7 @@ from bsfs.query import ast
 from bsfs.utils import errors, URI
 
 # inner-module imports
+from . import parse_fetch
 from . import parse_filter
 from .. import base
 from .distance import DISTANCE_FU
@@ -29,6 +33,8 @@ __all__: typing.Sequence[str] = (
 
 ## code ##
 
+rdflib.term.bind(ns.bsfs.BinaryBlob, bytes, constructor=base64.b64decode)
+
 class _Transaction():
     """Lightweight rdflib transactions for in-memory databases."""
 
@@ -92,13 +98,16 @@ class SparqlStore(base.TripleStoreBase):
     # Filter parser
     _filter_parser: parse_filter.Filter
 
+    # Fetch parser
+    _fetch_parser: parse_fetch.Fetch
+
     def __init__(self):
         super().__init__(None)
         self._graph = rdflib.Graph()
         self._transaction = _Transaction(self._graph)
-        # NOTE: parsing bsfs.query.ast.filter.Has requires xsd:integer.
         self._schema = bsc.Schema(literals={bsc.ROOT_NUMBER.child(ns.xsd.integer)})
         self._filter_parser = parse_filter.Filter(self._graph, self._schema)
+        self._fetch_parser = parse_fetch.Fetch(self._schema)
 
     # NOTE: mypy and pylint complain about the **kwargs not being listed (contrasting super)
     # However, not having it here is clearer since it's explicit that there are no arguments.
@@ -197,17 +206,53 @@ class SparqlStore(base.TripleStoreBase):
         # migrate schema
         self._schema = schema
         self._filter_parser.schema = schema
+        self._fetch_parser.schema = schema
+
+    def fetch(
+            self,
+            node_type: bsc.Node,
+            filter: ast.filter.FilterExpression, # pylint: disable=redefined-builtin
+            fetch: ast.fetch.FetchExpression,
+            ) -> typing.Iterator[typing.Tuple[URI, str, typing.Any]]:
+        if node_type not in self.schema.nodes():
+            raise errors.ConsistencyError(f'{node_type} is not defined in the schema')
+        if not isinstance(filter, ast.filter.FilterExpression):
+            raise TypeError(filter)
+        if not isinstance(fetch, ast.fetch.FetchExpression):
+            raise TypeError(fetch)
+        # compose a query from fetch and filter ast
+        query = self._filter_parser(node_type, filter)
+        query += self._fetch_parser(node_type, fetch)
+        # run query
+        emitted = set()
+        for result in query(self._graph):
+            guid = URI(result[0])
+            for name, raw in zip(query.names, result[1:]):
+                if raw is None: # undefined value
+                    continue
+                if isinstance(raw, rdflib.Literal):
+                    value = raw.value
+                else:
+                    value = URI(raw)
+                # emit triple
+                triple = (guid, name, value)
+                if triple not in emitted: # FIXME: needs a better solution!
+                    emitted.add(triple)
+                    yield guid, name, value
 
     def get(
             self,
             node_type: bsc.Node,
-            query: typing.Optional[ast.filter.FilterExpression] = None,
+            filter: typing.Optional[ast.filter.FilterExpression] = None, # pylint: disable=redefined-builtin
             ) -> typing.Iterator[URI]:
         if node_type not in self.schema.nodes():
             raise errors.ConsistencyError(f'{node_type} is not defined in the schema')
-        if not isinstance(query, ast.filter.FilterExpression):
-            raise TypeError(query)
-        for guid, in self._graph.query(self._filter_parser(node_type, query)):
+        if filter is not None and not isinstance(filter, ast.filter.FilterExpression):
+            raise TypeError(filter)
+        # compose query
+        query = self._filter_parser(node_type, filter)
+        # run query
+        for guid, in query(self._graph):
             yield URI(guid)
 
     def _has_type(self, subject: URI, node_type: bsc.Node) -> bool:
@@ -294,7 +339,11 @@ class SparqlStore(base.TripleStoreBase):
             guid = rdflib.URIRef(guid)
             # convert value
             if isinstance(predicate.range, bsc.Literal):
-                value = rdflib.Literal(value, datatype=rdflib.URIRef(predicate.range.uri))
+                dtype = rdflib.URIRef(predicate.range.uri)
+                if predicate.range <= self.schema.literal(ns.bsfs.BinaryBlob):
+                    dtype = rdflib.URIRef(ns.bsfs.BinaryBlob)
+                    value = base64.b64encode(value)
+                value = rdflib.Literal(value, datatype=dtype)
             elif isinstance(predicate.range, bsc.Node):
                 value = rdflib.URIRef(value)
             else:
diff --git a/bsfs/triple_store/sparql/utils.py b/bsfs/triple_store/sparql/utils.py
new file mode 100644
index 0000000..deca4d8
--- /dev/null
+++ b/bsfs/triple_store/sparql/utils.py
@@ -0,0 +1,141 @@
+"""
+
+Part of the BlackStar filesystem (bsfs) module.
+A copy of the license is provided with the project.
+Author: Matthias Baumgartner, 2022
+"""
+# standard imports
+import typing
+
+# external imports
+import rdflib
+
+# bsfs imports
+from bsfs.namespace import ns
+from bsfs.utils import typename
+
+# exports
+__all__: typing.Sequence[str] = (
+    'GenHopName',
+    'Query',
+    )
+
+
+## code ##
+
+class GenHopName():
+    """Generator that produces a new unique symbol name with each iteration."""
+
+    # Symbol name prefix.
+    prefix: str
+
+    # Current counter.
+    curr: int
+
+    def __init__(self, prefix: str = '?hop', start: int = 0):
+        self.prefix = prefix
+        self.curr = start - 1
+
+    def __next__(self):
+        """Generate and return the next unique name."""
+        self.curr += 1
+        return self.prefix + str(self.curr)
+
+
+class Query():
+    """Hold, manage, and complete partial Sparql queries."""
+
+    # root node type URI.
+    root_type: str
+
+    # root node variable name.
+    root_head: str
+
+    # (head, name) tuples (w/o root)
+    select: typing.Tuple[typing.Tuple[str, str], ...]
+
+    # where statements.
+    where: str
+
+    def __init__(
+            self,
+            root_type: str,
+            root_head: str = '?ent',
+            select: typing.Optional[typing.Iterable[typing.Tuple[str, str]]] = None,
+            where: typing.Optional[str] = None,
+            ):
+        # check arguments
+        if select is None:
+            select = []
+        if where is None:
+            where = ''
+        # set members
+        self.root_type = root_type
+        self.root_head = root_head
+        self.select = tuple(select) # tuple ensures presistent order
+        self.where = where.strip()
+
+    def __str__(self) -> str:
+        return self.query
+
+    def __repr__(self) -> str:
+        return f'{typename(self)}({self.root_type}, {self.root_head}, {self.select}, {self.where})'
+
+    def __eq__(self, other: typing.Any) -> bool:
+        return isinstance(other, type(self)) \
+           and self.root_type == other.root_type \
+           and self.root_head == other.root_head \
+           and self.select == other.select \
+           and self.where == other.where
+
+    def __hash__(self) -> int:
+        return hash((type(self), self.root_type, self.root_head, self.select, self.where))
+
+    def __add__(self, other: typing.Any) -> 'Query':
+        # check other's type
+        if not isinstance(other, type(self)):
+            return NotImplemented
+        # check query compatibility
+        if not self.root_type == other.root_type:
+            raise ValueError(other)
+        if not self.root_head == other.root_head:
+            raise ValueError(other)
+        # combine selections
+        select = self.select + other.select
+        # combine conditions
+        conds = []
+        if self.where != '':
+            conds.append(self.where)
+        if other.where != '':
+            conds.append(other.where)
+        where = ' . '.join(conds)
+        # return new query
+        return Query(
+            root_type=self.root_type,
+            root_head=self.root_head,
+            select=select,
+            where=where,
+            )
+
+    @property
+    def names(self) -> typing.Tuple[str, ...]:
+        """Return a tuple of selected variable names, excluding the root."""
+        return tuple(name for _, name in self.select)
+
+    @property
+    def query(self) -> str:
+        """Return an executable sparql query."""
+        select = ' '.join(f'({head} as ?{name})' for head, name in self.select)
+        return f'''
+            SELECT {self.root_head} {select}
+            WHERE {{
+                {self.root_head} <{ns.rdf.type}>/<{ns.rdfs.subClassOf}>* <{self.root_type}> .
+                {self.where}
+            }}
+            '''
+
+    def __call__(self, graph: rdflib.Graph) -> rdflib.query.Result:
+        """Execute the query on a *graph* and return the query result."""
+        return graph.query(self.query)
+
+## EOF ##
diff --git a/bsfs/utils/uuid.py b/bsfs/utils/uuid.py
index ba5cf52..70e1656 100644
--- a/bsfs/utils/uuid.py
+++ b/bsfs/utils/uuid.py
@@ -7,6 +7,7 @@ Author: Matthias Baumgartner, 2022
 # imports
 from collections import abc
 import hashlib
+import io
 import json
 import os
 import platform
@@ -106,6 +107,17 @@ class UCID():
         with open(path, 'rb') as ifile:
             return HASH(ifile.read()).hexdigest()
 
+    @staticmethod
+    def from_buffer(buffer: io.IOBase) -> str:
+        """Read the content from a buffer."""
+        if isinstance(buffer, io.TextIOBase):
+            return HASH(buffer.read().encode('utf-8', errors='ignore')).hexdigest()
+        return HASH(buffer.read()).hexdigest()
+
+    @staticmethod
+    def from_bytes(content: bytes) -> str:
+        """Get the content from as bytes."""
+        return HASH(content).hexdigest()
 
     @staticmethod
     def from_dict(content: dict) -> str:
author	Matthias Baumgartner <dev@igsor.net>	2023-02-08 21:17:57 +0100
committer	Matthias Baumgartner <dev@igsor.net>	2023-02-08 21:17:57 +0100
commit	9b490d19dcebc0fc24cb2ab89a783f1f7d6147f7 (patch)
tree	5fc3d3b8864a8ff996e5739ed9654dae494d9d8f /bsfs
parent	e12cd52ad267563c8046a593ad551b1dd089a702 (diff)
parent	c0218a8dffcdc3a7a5568f66bb959139fe514ad5 (diff)
download	bsfs-9b490d19dcebc0fc24cb2ab89a783f1f7d6147f7.tar.gz bsfs-9b490d19dcebc0fc24cb2ab89a783f1f7d6147f7.tar.bz2 bsfs-9b490d19dcebc0fc24cb2ab89a783f1f7d6147f7.zip