Merge branch 'mb/fetch' into develop

author: Matthias Baumgartner <dev@igsor.net> 2023-02-08 21:17:57 +0100
committer: Matthias Baumgartner <dev@igsor.net> 2023-02-08 21:17:57 +0100
commit: 9b490d19dcebc0fc24cb2ab89a783f1f7d6147f7 (patch)
tree: 5fc3d3b8864a8ff996e5739ed9654dae494d9d8f /bsfs/query
parent: e12cd52ad267563c8046a593ad551b1dd089a702 (diff)
parent: c0218a8dffcdc3a7a5568f66bb959139fe514ad5 (diff)
download: bsfs-9b490d19dcebc0fc24cb2ab89a783f1f7d6147f7.tar.gz
bsfs-9b490d19dcebc0fc24cb2ab89a783f1f7d6147f7.tar.bz2
bsfs-9b490d19dcebc0fc24cb2ab89a783f1f7d6147f7.zip
5 files changed, 725 insertions, 11 deletions
diff --git a/bsfs/query/ast/__init__.py b/bsfs/query/ast/__init__.py
index 704d051..66b097d 100644
--- a/bsfs/query/ast/__init__.py
+++ b/bsfs/query/ast/__init__.py
@@ -1,6 +1,6 @@
 """Query AST components.
 
-The query AST consists of a Filter syntax tree.
+The query AST consists of a Filter and a Fetch syntax trees.
 
 Classes beginning with an underscore (_) represent internal type hierarchies
 and should not be used for parsing. Note that the AST structures do not
@@ -14,10 +14,12 @@ Author: Matthias Baumgartner, 2022
 import typing
 
 # inner-module imports
+from . import fetch
 from . import filter_ as filter # pylint: disable=redefined-builtin
 
 # exports
 __all__: typing.Sequence[str] = (
+    'fetch',
     'filter',
     )
 
diff --git a/bsfs/query/ast/fetch.py b/bsfs/query/ast/fetch.py
new file mode 100644
index 0000000..d653a8a
--- /dev/null
+++ b/bsfs/query/ast/fetch.py
@@ -0,0 +1,174 @@
+"""
+
+Part of the BlackStar filesystem (bsfs) module.
+A copy of the license is provided with the project.
+Author: Matthias Baumgartner, 2022
+"""
+# imports
+from collections import abc
+import typing
+
+# bsfs imports
+from bsfs.utils import URI, typename, normalize_args
+
+# exports
+__all__ : typing.Sequence[str] = (
+    'All',
+    'Fetch',
+    'FetchExpression',
+    'Node',
+    'This',
+    'Value',
+    )
+
+
+## code ##
+
+class FetchExpression(abc.Hashable):
+    """Generic Fetch expression."""
+
+    def __repr__(self) -> str:
+        """Return the expressions's string representation."""
+        return f'{typename(self)}()'
+
+    def __hash__(self) -> int:
+        """Return the expression's integer representation."""
+        return hash(type(self))
+
+    def __eq__(self, other: typing.Any) -> bool:
+        """Return True if *self* and *other* are equivalent."""
+        return isinstance(other, type(self))
+
+
+class All(FetchExpression):
+    """Fetch all child expressions."""
+
+    # child expressions.
+    expr: typing.Set[FetchExpression]
+
+    def __init__(self, *expr):
+        # unpack child expressions
+        unfolded = set(normalize_args(*expr))
+        # check child expressions
+        if len(unfolded) == 0:
+            raise AttributeError('expected at least one expression, found none')
+        if not all(isinstance(itm, FetchExpression) for itm in unfolded):
+            raise TypeError(expr)
+        # initialize
+        super().__init__()
+        # assign members
+        self.expr = unfolded
+
+    def __iter__(self) -> typing.Iterator[FetchExpression]:
+        return iter(self.expr)
+
+    def __len__(self) -> int:
+        return len(self.expr)
+
+    def __repr__(self) -> str:
+        return f'{typename(self)}({self.expr})'
+
+    def __hash__(self) -> int:
+        return hash((super().__hash__(), tuple(sorted(self.expr, key=repr))))
+
+    def __eq__(self, other: typing.Any) -> bool:
+        return super().__eq__(other) and self.expr == other.expr
+
+
+class _Branch(FetchExpression):
+    """Branch along a predicate."""
+
+    # FIXME: Use a Predicate (like in ast.filter) so that we can also reverse them!
+
+    # predicate to follow.
+    predicate: URI
+
+    def __init__(self, predicate: URI):
+        if not isinstance(predicate, URI):
+            raise TypeError(predicate)
+        self.predicate = predicate
+
+    def __repr__(self) -> str:
+        return f'{typename(self)}({self.predicate})'
+
+    def __hash__(self) -> int:
+        return hash((super().__hash__(), self.predicate))
+
+    def __eq__(self, other: typing.Any) -> bool:
+        return super().__eq__(other) and self.predicate == other.predicate
+
+
+class Fetch(_Branch):
+    """Follow a predicate before evaluating a child epxression."""
+
+    # child expression.
+    expr: FetchExpression
+
+    def __init__(self, predicate: URI, expr: FetchExpression):
+        # check child expressions
+        if not isinstance(expr, FetchExpression):
+            raise TypeError(expr)
+        # initialize
+        super().__init__(predicate)
+        # assign members
+        self.expr = expr
+
+    def __repr__(self) -> str:
+        return f'{typename(self)}({self.predicate}, {self.expr})'
+
+    def __hash__(self) -> int:
+        return hash((super().__hash__(), self.expr))
+
+    def __eq__(self, other: typing.Any) -> bool:
+        return super().__eq__(other) and self.expr == other.expr
+
+
+class _Named(_Branch):
+    """Fetch a (named) symbol at a predicate."""
+
+    # symbol name.
+    name: str
+
+    def __init__(self, predicate: URI, name: str):
+        super().__init__(predicate)
+        self.name = str(name)
+
+    def __repr__(self) -> str:
+        return f'{typename(self)}({self.predicate}, {self.name})'
+
+    def __hash__(self) -> int:
+        return hash((super().__hash__(), self.name))
+
+    def __eq__(self, other: typing.Any) -> bool:
+        return super().__eq__(other) and self.name == other.name
+
+
+class Node(_Named): # pylint: disable=too-few-public-methods
+    """Fetch a Node at a predicate."""
+    # FIXME: Is this actually needed?
+
+
+class Value(_Named): # pylint: disable=too-few-public-methods
+    """Fetch a Literal at a predicate."""
+
+
+class This(FetchExpression):
+    """Fetch the current Node."""
+
+    # symbol name.
+    name: str
+
+    def __init__(self, name: str):
+        super().__init__()
+        self.name = str(name)
+
+    def __repr__(self) -> str:
+        return f'{typename(self)}({self.name})'
+
+    def __hash__(self) -> int:
+        return hash((super().__hash__(), self.name))
+
+    def __eq__(self, other: typing.Any) -> bool:
+        return super().__eq__(other) and self.name == other.name
+
+## EOF ##
diff --git a/bsfs/query/ast/filter_.py b/bsfs/query/ast/filter_.py
index 2f0270c..b29d89e 100644
--- a/bsfs/query/ast/filter_.py
+++ b/bsfs/query/ast/filter_.py
@@ -33,9 +33,6 @@ import typing
 # bsfs imports
 from bsfs.utils import URI, typename, normalize_args
 
-# inner-module imports
-#from . import utils
-
 # exports
 __all__ : typing.Sequence[str] = (
     # base classes
@@ -153,6 +150,7 @@ class _Agg(FilterExpression, abc.Collection):
         # check type
         if not all(isinstance(e, FilterExpression) for e in unfolded):
             raise TypeError(expr)
+        # FIXME: Require at least one child expression?
         # assign member
         self.expr = unfolded
 
@@ -172,7 +170,7 @@ class _Agg(FilterExpression, abc.Collection):
         return f'{typename(self)}({self.expr})'
 
     def __hash__(self) -> int:
-        return hash((super().__hash__(), tuple(self.expr))) # FIXME: Unique hash of different orders over self.expr
+        return hash((super().__hash__(), tuple(sorted(self.expr, key=repr))))
 
     def __eq__(self, other) -> bool:
         return super().__eq__(other) and self.expr == other.expr
@@ -449,20 +447,72 @@ class OneOf(PredicateExpression, abc.Collection):
         return f'{typename(self)}({self.expr})'
 
     def __hash__(self) -> int:
-        return hash((super().__hash__(), tuple(self.expr))) # FIXME: Unique hash of different orders over self.expr
+        return hash((super().__hash__(), tuple(sorted(self.expr, key=repr))))
 
     def __eq__(self, other) -> bool:
         return super().__eq__(other) and self.expr == other.expr
 
 
 # Helpers
+# invalid-name is disabled since they explicitly mimic an expression
 
-def IsIn(*values): # pylint: disable=invalid-name # explicitly mimics an expression
+def IsIn(*values) -> FilterExpression: # pylint: disable=invalid-name
     """Match any of the given URIs."""
-    return Or(Is(value) for value in normalize_args(*values))
-
-def IsNotIn(*values): # pylint: disable=invalid-name # explicitly mimics an expression
+    args = normalize_args(*values)
+    if len(args) == 0:
+        raise AttributeError('expected at least one value, found none')
+    if len(args) == 1:
+        return Is(args[0])
+    return Or(Is(value) for value in args)
+
+def IsNotIn(*values) -> FilterExpression: # pylint: disable=invalid-name
     """Match none of the given URIs."""
     return Not(IsIn(*values))
 
+
+def Between( # pylint: disable=invalid-name
+        lo: float = float('-inf'),
+        hi: float = float('inf'),
+        lo_strict: bool = True,
+        hi_strict: bool = True,
+        ) -> FilterExpression :
+    """Match numerical values between *lo* and *hi*. Include bounds if strict is False."""
+    if abs(lo) == hi == float('inf'):
+        raise ValueError('range cannot be INF on both sides')
+    if lo > hi:
+        raise ValueError(f'lower bound ({lo}) cannot be less than upper bound ({hi})')
+    if lo == hi and not lo_strict and not hi_strict:
+        return Equals(lo)
+    if lo == hi: # either bound is strict
+        raise ValueError('bounds cannot be equal when either is strict')
+    if lo != float('-inf') and hi != float('inf'):
+        return And(GreaterThan(lo, lo_strict), LessThan(hi, hi_strict))
+    if lo != float('-inf'):
+        return GreaterThan(lo, lo_strict)
+    # hi != float('inf'):
+    return LessThan(hi, hi_strict)
+
+
+def Includes(*values, approx: bool = False) -> FilterExpression: # pylint: disable=invalid-name
+    """Match any of the given *values*. Uses `Substring` if *approx* is set."""
+    args = normalize_args(*values)
+    cls = Substring if approx else Equals
+    if len(args) == 0:
+        raise AttributeError('expected at least one value, found none')
+    if len(args) == 1:
+        return cls(args[0])
+    return Or(cls(v) for v in args)
+
+
+def Excludes(*values, approx: bool = False) -> FilterExpression: # pylint: disable=invalid-name
+    """Match none of the given *values*. Uses `Substring` if *approx* is set."""
+    args = normalize_args(*values)
+    cls = Substring if approx else Equals
+    if len(args) == 0:
+        raise AttributeError('expected at least one value, found none')
+    if len(args) == 1:
+        return Not(cls(args[0]))
+    return Not(Or(cls(v) for v in args))
+
+
 ## EOF ##
diff --git a/bsfs/query/matcher.py b/bsfs/query/matcher.py
new file mode 100644
index 0000000..a910756
--- /dev/null
+++ b/bsfs/query/matcher.py
@@ -0,0 +1,366 @@
+"""
+
+Part of the BlackStar filesystem (bsfs) module.
+A copy of the license is provided with the project.
+Author: Matthias Baumgartner, 2022
+"""
+# imports
+from collections import defaultdict
+from itertools import product
+from time import time
+import random
+import threading
+import typing
+
+# external imports
+from hopcroftkarp import HopcroftKarp
+
+# bsfs imports
+from bsfs.utils import errors, typename
+
+# inner-module imports
+from . import ast
+
+# exports
+__all__ : typing.Sequence[str] = (
+    'Filter',
+    )
+
+
+## code ##
+
+class Any(ast.filter.FilterExpression, ast.filter.PredicateExpression):
+    """Match any ast class.
+
+    Note that Any instances are unique, i.e. they do not compare, and
+    can hence be repeated in a set:
+    >>> Any() == Any()
+    False
+    >>> len({Any(), Any(), Any(), Any()})
+    4
+
+    """
+
+    # unique instance id
+    _uid: typing.Tuple[int, int, float, float]
+
+    def __init__(self):
+        self._uid = (
+            id(self),
+            id(threading.current_thread()),
+            time(),
+            random.random(),
+            )
+
+    def __eq__(self, other: typing.Any):
+        return super().__eq__(other) and self._uid == other._uid
+
+    def __hash__(self):
+        return hash((super().__hash__(), self._uid))
+
+
+class Rest(ast.filter.FilterExpression, ast.filter.PredicateExpression):
+    """Match the leftovers in a set of items to be compared.
+
+    Rest can be used in junction with aggregating expressions such as ast.filter.And,
+    ast.filter.Or, ast.filter.OneOf. It controls childs expressions that were not yet
+    consumed by other matching rules. Rest may match to only a specific expression.
+    The expresssion defaults to Any().
+
+    For example, the following to ast structures would match since Rest
+    allows an arbitrary repetition of ast.filter.Equals statements.
+
+    >>> And(Equals('hello'), Equals('world'), Equals('foobar'))
+    >>> And(Equals('world'), Rest(Partial(Equals)))
+
+    """
+
+    # child expression for the Rest.
+    expr: typing.Union[ast.filter.FilterExpression, ast.filter.PredicateExpression]
+
+    def __init__(
+            self,
+            expr: typing.Optional[typing.Union[ast.filter.FilterExpression, ast.filter.PredicateExpression]] = None,
+            ):
+        if expr is None:
+            expr = Any()
+        self.expr = expr
+
+    def __repr__(self) -> str:
+        return f'{typename(self)}({self.expr})'
+
+    def __hash__(self) -> int:
+        return hash((super().__hash__(), self.expr))
+
+    def __eq__(self, other: typing.Any) -> bool:
+        return super().__eq__(other) and self.expr == other.expr
+
+
+class Partial(ast.filter.FilterExpression, ast.filter.PredicateExpression):
+    """Match a partially defined ast expression.
+
+    Literal values might be irrelevant or unknown when comparing two ast
+    structures. Partial allows to constrain the matcher to a certain
+    ast class, while leaving some of its members unspecified.
+
+    Pass the class (not instance) and its members as keyword arguments
+    to Partial. Note that the arguments are not validated.
+
+    For example, the following instance matches any ast.filter.Equals,
+    irrespective of its value:
+
+    >>> Partial(ast.filter.Equals)
+
+    Likewise, the following instance matches any ast.filter.LessThan
+    that has a strict bounds, but makes no claim about the threshold:
+
+    >>> Partial(ast.filter.LessThan, strict=False)
+
+    """
+
+    # target node type.
+    node: typing.Type
+
+    # node construction args.
+    kwargs: typing.Dict[str, typing.Any]
+
+    def __init__(
+            self,
+            node: typing.Type,
+            **kwargs,
+            ):
+        self.node = node
+        self.kwargs = kwargs
+
+    def __repr__(self) -> str:
+        return f'{typename(self)}({self.node.__name__}, {self.kwargs})'
+
+    def __hash__(self) -> int:
+        kwargs = tuple((key, self.kwargs[key]) for key in sorted(self.kwargs))
+        return hash((super().__hash__(), self.node, kwargs))
+
+    def __eq__(self, other: typing.Any) -> bool:
+        return super().__eq__(other) \
+           and self.node == other.node \
+           and self.kwargs == other.kwargs
+
+    def match(
+            self,
+            name: str,
+            value: typing.Any,
+            ) -> bool:
+        """Return True if *name* is unspecified or matches *value*."""
+        return name not in self.kwargs or self.kwargs[name] == value
+
+
+T_ITEM_TYPE = typing.TypeVar('T_ITEM_TYPE') # pylint: disable=invalid-name
+
+def _set_matcher(
+        query: typing.Collection[T_ITEM_TYPE],
+        reference: typing.Collection[T_ITEM_TYPE],
+        cmp: typing.Callable[[T_ITEM_TYPE, T_ITEM_TYPE], bool],
+        ) -> bool:
+    """Compare two sets of child expressions.
+
+    This check has a best-case complexity of O(|N|**2) and worst-case
+    complexity of O(|N|**3), with N the number of child expressions.
+    """
+    # get reference items
+    r_items = list(reference)
+    # deal with Rest
+    r_rest = {itm for itm in r_items if isinstance(itm, Rest)}
+    if len(r_rest) > 1:
+        raise errors.BackendError(f'there must be at most one Rest instance per set, found {len(r_rest)}')
+    if len(r_rest) == 1:
+        # replace Rest by filling the reference up with rest's expression
+        # NOTE: convert r_items to list so that items can be repeated
+        expr = next(iter(r_rest)).expr # type: ignore [attr-defined]
+        r_items = [itm for itm in r_items if not isinstance(itm, Rest)]
+        r_items += [expr for _ in range(len(query) - len(r_items))] # type: ignore [misc]
+    # sanity check: cannot match if the item sizes differ:
+    # either a reference item is unmatched (len(r_items) > len(query))
+    # or a query item is unmatched (len(r_items) < len(query))
+    if len(query) != len(r_items):
+        return False
+
+    # To have a positive match between the query and the reference,
+    # each query expr has to match any reference expr.
+    # However, each reference expr can only be "consumed" once even
+    # if it matches multiple query exprs (e.g., the Any expression matches
+    # every query expr).
+    # This is a bipartide matching problem (Hall's marriage problem)
+    # and the Hopcroft-Karp-Karzanov algorithm finds a maximum
+    # matching. While there might be multiple maximum matchings,
+    # we only need to know whether (at least) one complete matching
+    # exists. The hopcroftkarp module provides this functionality.
+    # The HKK algorithm has worst-case complexity of O(|N|**2 * sqrt(|N|))
+    # and we also need to compare expressions pairwise, hence O(|N|**2).
+    num_items = len(r_items)
+    graph = defaultdict(set)
+    # build the bipartide graph as {lhs: {rhs}, ...}
+    # lhs and rhs must be disjoint identifiers.
+    for (ridx, ref), (nidx, node) in product(enumerate(r_items), enumerate(query)):
+        # add edges for equal expressions
+        if cmp(node, ref):
+            graph[ridx].add(num_items + nidx)
+
+    # maximum_matching returns the matches for all nodes in the graph
+    # ({ref_itm: node_itm}), hence a complete matching's size is
+    # the number of reference's child expressions.
+    return len(HopcroftKarp(graph).maximum_matching(keys_only=True)) == num_items
+
+
+class Filter():
+    """Compare a bsfs.query.ast.filter` query's structure to a reference ast.
+
+    The reference ast may include `Rest`, `Partial`, or `Any` to account for irrelevant
+    or unknown ast pieces.
+
+    This is only a structural comparison, not a semantic one. For example, the
+    two following queries are semantically identical, but structurally different,
+    and would therefore not match:
+
+    >>> ast.filter.OneOf(ast.filter.Predicate(ns.bse.filename))
+    >>> ast.filter.Predicate(ns.bse.filename)
+
+    """
+
+    def __call__(self, query: ast.filter.FilterExpression, reference: ast.filter.FilterExpression) -> bool:
+        """Compare a *query* to a *reference* ast structure.
+        Return True if both are structurally equivalent.
+        """
+        if not isinstance(query, ast.filter.FilterExpression):
+            raise errors.BackendError(f'expected filter expression, found {query}')
+        if not isinstance(reference, ast.filter.FilterExpression):
+            raise errors.BackendError(f'expected filter expression, found {reference}')
+        return self._parse_filter_expression(query, reference)
+
+    def _parse_filter_expression(
+            self,
+            node: ast.filter.FilterExpression,
+            reference: ast.filter.FilterExpression,
+            ) -> bool:
+        """Route *node* to the handler of the respective FilterExpression subclass."""
+        # generic checks: reference type must be Any or match node type
+        if isinstance(reference, Any):
+            return True
+        # node-specific checks
+        if isinstance(node, ast.filter.Not):
+            return self._not(node, reference)
+        if isinstance(node, ast.filter.Has):
+            return self._has(node, reference)
+        if isinstance(node, ast.filter.Distance):
+            return self._distance(node, reference)
+        if isinstance(node, (ast.filter.Any, ast.filter.All)):
+            return self._branch(node, reference)
+        if isinstance(node, (ast.filter.And, ast.filter.Or)):
+            return self._agg(node, reference)
+        if isinstance(node, (ast.filter.Is, ast.filter.Equals, ast.filter.Substring,
+                             ast.filter.StartsWith, ast.filter.EndsWith)):
+            return self._value(node, reference)
+        if isinstance(node, (ast.filter.LessThan, ast.filter.GreaterThan)):
+            return self._bounded(node, reference)
+        # invalid node
+        raise errors.BackendError(f'expected filter expression, found {node}')
+
+    def _parse_predicate_expression(
+            self,
+            node: ast.filter.PredicateExpression,
+            reference: ast.filter.PredicateExpression,
+            ) -> bool:
+        """Route *node* to the handler of the respective PredicateExpression subclass."""
+        if isinstance(reference, Any):
+            return True
+        if isinstance(node, ast.filter.Predicate):
+            return self._predicate(node, reference)
+        if isinstance(node, ast.filter.OneOf):
+            return self._one_of(node, reference)
+        # invalid node
+        raise errors.BackendError(f'expected predicate expression, found {node}')
+
+    def _one_of(self, node: ast.filter.OneOf, reference: ast.filter.PredicateExpression) -> bool:
+        if not isinstance(reference, type(node)):
+            return False
+        return _set_matcher(node, reference, self._parse_predicate_expression)
+
+    def _predicate(self, node: ast.filter.Predicate, reference: ast.filter.PredicateExpression) -> bool:
+        if not isinstance(reference, (Partial, type(node))):
+            return False
+        # partial check
+        if isinstance(reference, Partial):
+            if not isinstance(node, reference.node):
+                return False
+            return reference.match('predicate', node.predicate) \
+               and reference.match('reverse', node.reverse)
+        # full check
+        return node.predicate == reference.predicate \
+           and node.reverse == reference.reverse
+
+    def _branch(self,
+            node: typing.Union[ast.filter.Any, ast.filter.All],
+            reference: ast.filter.FilterExpression,
+            ) -> bool:
+        if not isinstance(reference, type(node)):
+            return False
+        if not self._parse_predicate_expression(node.predicate, reference.predicate): # type: ignore [attr-defined]
+            return False
+        if not self._parse_filter_expression(node.expr, reference.expr): # type: ignore [attr-defined]
+            return False
+        return True
+
+    def _agg(self, node: typing.Union[ast.filter.And, ast.filter.Or], reference: ast.filter.FilterExpression) -> bool:
+        if not isinstance(reference, type(node)):
+            return False
+        return _set_matcher(node, reference, self._parse_filter_expression) # type: ignore [arg-type]
+
+    def _not(self, node: ast.filter.Not, reference: ast.filter.FilterExpression) -> bool:
+        if not isinstance(reference, type(node)):
+            return False
+        return self._parse_filter_expression(node.expr, reference.expr)
+
+    def _has(self, node: ast.filter.Has, reference: ast.filter.FilterExpression) -> bool:
+        if not isinstance(reference, type(node)):
+            return False
+        return self._parse_predicate_expression(node.predicate, reference.predicate) \
+           and self._parse_filter_expression(node.count, reference.count)
+
+    def _distance(self, node: ast.filter.Distance, reference: ast.filter.FilterExpression) -> bool:
+        if not isinstance(reference, (Partial, type(node))):
+            return False
+        # partial check
+        if isinstance(reference, Partial):
+            if not isinstance(node, reference.node):
+                return False
+            return reference.match('reference', node.reference) \
+               and reference.match('threshold', node.threshold) \
+               and reference.match('strict', node.strict)
+        # full check
+        return node.reference == reference.reference \
+           and node.threshold == reference.threshold \
+           and node.strict == reference.strict
+
+    def _value(self, node: ast.filter._Value, reference: ast.filter.FilterExpression) -> bool:
+        if not isinstance(reference, (Partial, type(node))):
+            return False
+        # partial check
+        if isinstance(reference, Partial):
+            if not isinstance(node, reference.node):
+                return False
+            return reference.match('value', node.value)
+        # full ckeck
+        return node.value == reference.value
+
+    def _bounded(self, node: ast.filter._Bounded, reference: ast.filter.FilterExpression) -> bool:
+        if not isinstance(reference, (Partial, type(node))):
+            return False
+        # partial check
+        if isinstance(reference, Partial):
+            if not isinstance(node, reference.node):
+                return False
+            return reference.match('threshold', node.threshold) \
+               and reference.match('strict', node.strict)
+        # full check
+        return node.threshold == reference.threshold \
+           and node.strict == reference.strict
+
+## EOF ##
diff --git a/bsfs/query/validator.py b/bsfs/query/validator.py
index 904ac14..f0aa795 100644
--- a/bsfs/query/validator.py
+++ b/bsfs/query/validator.py
@@ -20,6 +20,7 @@ __all__ : typing.Sequence[str] = (
     'Filter',
     )
 
+# FIXME: Split into a submodule and the two classes into their own respective files.
 
 ## code ##
 
@@ -49,7 +50,7 @@ class Filter():
         """
         # root_type must be a schema.Node
         if not isinstance(root_type, bsc.Node):
-            raise TypeError(f'Expected a node, found {typename(root_type)}')
+            raise TypeError(f'expected a node, found {typename(root_type)}')
         # root_type must exist in the schema
         if root_type not in self.schema.nodes():
             raise errors.ConsistencyError(f'{root_type} is not defined in the schema')
@@ -223,4 +224,125 @@ class Filter():
         # FIXME: Check if node.value corresponds to type_
 
 
+class Fetch():
+    """Validate a `bsfs.query.ast.fetch` query's structure and schema compliance.
+
+    * Value can only be applied on literals
+    * Node can only be applied on nodes
+    * Names must be non-empty
+    * Branching nodes' predicates must match the type
+    * Symbols must be in the schema
+    * Predicates must follow the schema
+
+    """
+
+    # schema to validate against.
+    schema: bsc.Schema
+
+    def __init__(self, schema: bsc.Schema):
+        self.schema = schema
+
+    def __call__(self, root_type: bsc.Node, query: ast.fetch.FetchExpression):
+        """Validate a fetch *query*, assuming the subject having *root_type*.
+
+        Raises a `bsfs.utils.errors.ConsistencyError` if the query violates the schema.
+        Raises a `bsfs.utils.errors.BackendError` if the query structure is invalid.
+
+        """
+        # root_type must be a schema.Node
+        if not isinstance(root_type, bsc.Node):
+            raise TypeError(f'expected a node, found {typename(root_type)}')
+        # root_type must exist in the schema
+        if root_type not in self.schema.nodes():
+            raise errors.ConsistencyError(f'{root_type} is not defined in the schema')
+        # query must be a FetchExpression
+        if not isinstance(query, ast.fetch.FetchExpression):
+            raise TypeError(f'expected a fetch expression, found {typename(query)}')
+        # check root expression
+        self._parse_fetch_expression(root_type, query)
+        # all tests passed
+        return True
+
+    def _parse_fetch_expression(self, type_: bsc.Vertex, node: ast.fetch.FetchExpression):
+        """Route *node* to the handler of the respective FetchExpression subclass."""
+        if isinstance(node, (ast.fetch.Fetch, ast.fetch.Value, ast.fetch.Node)):
+            # NOTE: don't return so that checks below are executed
+            self._branch(type_, node)
+        if isinstance(node, (ast.fetch.Value, ast.fetch.Node)):
+            # NOTE: don't return so that checks below are executed
+            self._named(type_, node)
+        if isinstance(node, ast.fetch.All):
+            return self._all(type_, node)
+        if isinstance(node, ast.fetch.Fetch):
+            return self._fetch(type_, node)
+        if isinstance(node, ast.fetch.Value):
+            return self._value(type_, node)
+        if isinstance(node, ast.fetch.Node):
+            return self._node(type_, node)
+        if isinstance(node, ast.fetch.This):
+            return self._this(type_, node)
+        # invalid node
+        raise errors.BackendError(f'expected fetch expression, found {node}')
+
+    def _all(self, type_: bsc.Vertex, node: ast.fetch.All):
+        # check child expressions
+        for expr in node:
+            self._parse_fetch_expression(type_, expr)
+
+    def _branch(self, type_: bsc.Vertex, node: ast.fetch._Branch):
+        # type is a node
+        if not isinstance(type_, bsc.Node):
+            raise errors.ConsistencyError(f'expected a Node, found {type_}')
+        # node exists in the schema
+        if type_ not in self.schema.nodes():
+            raise errors.ConsistencyError(f'node {type_} is not in the schema')
+        # predicate exists in the schema
+        if not self.schema.has_predicate(node.predicate):
+            raise errors.ConsistencyError(f'predicate {node.predicate} is not in the schema')
+        pred = self.schema.predicate(node.predicate)
+        # type_ must be a subclass of domain
+        if not type_ <= pred.domain:
+            raise errors.ConsistencyError(
+                f'expected type {pred.domain} or subtype thereof, found {type_}')
+
+    def _fetch(self, type_: bsc.Vertex, node: ast.fetch.Fetch): # pylint: disable=unused-argument # type_ was considered in _branch
+        # range must be a node
+        rng = self.schema.predicate(node.predicate).range
+        if not isinstance(rng, bsc.Node):
+            raise errors.ConsistencyError(
+                f'expected the predicate\'s range to be a Node, found {rng}')
+        # child expression must be valid
+        self._parse_fetch_expression(rng, node.expr)
+
+    def _named(self, type_: bsc.Vertex, node: ast.fetch._Named): # pylint: disable=unused-argument # type_ was considered in _branch
+        # name must be set
+        if node.name.strip() == '':
+            raise errors.BackendError('node name cannot be empty')
+        # FIXME: check for double name use?
+
+    def _node(self, type_: bsc.Vertex, node: ast.fetch.Node): # pylint: disable=unused-argument # type_ was considered in _branch
+        # range must be a node
+        rng = self.schema.predicate(node.predicate).range
+        if not isinstance(rng, bsc.Node):
+            raise errors.ConsistencyError(
+                f'expected the predicate\'s range to be a Node, found {rng}')
+
+    def _value(self, type_: bsc.Vertex, node: ast.fetch.Value): # pylint: disable=unused-argument # type_ was considered in _branch
+        # range must be a literal
+        rng = self.schema.predicate(node.predicate).range
+        if not isinstance(rng, bsc.Literal):
+            raise errors.ConsistencyError(
+                f'expected the predicate\'s range to be a Literal, found {rng}')
+
+    def _this(self, type_: bsc.Vertex, node: ast.fetch.This):
+        # type is a node
+        if not isinstance(type_, bsc.Node):
+            raise errors.ConsistencyError(f'expected a Node, found {type_}')
+        # node exists in the schema
+        if type_ not in self.schema.nodes():
+            raise errors.ConsistencyError(f'node {type_} is not in the schema')
+        # name must be set
+        if node.name.strip() == '':
+            raise errors.BackendError('node name cannot be empty')
+
 ## EOF ##
author	Matthias Baumgartner <dev@igsor.net>	2023-02-08 21:17:57 +0100
committer	Matthias Baumgartner <dev@igsor.net>	2023-02-08 21:17:57 +0100
commit	9b490d19dcebc0fc24cb2ab89a783f1f7d6147f7 (patch)
tree	5fc3d3b8864a8ff996e5739ed9654dae494d9d8f /bsfs/query
parent	e12cd52ad267563c8046a593ad551b1dd089a702 (diff)
parent	c0218a8dffcdc3a7a5568f66bb959139fe514ad5 (diff)
download	bsfs-9b490d19dcebc0fc24cb2ab89a783f1f7d6147f7.tar.gz bsfs-9b490d19dcebc0fc24cb2ab89a783f1f7d6147f7.tar.bz2 bsfs-9b490d19dcebc0fc24cb2ab89a783f1f7d6147f7.zip