From 791918039979d0743fd2ea4b9a5e74593ff96fd0 Mon Sep 17 00:00:00 2001
From: Matthias Baumgartner <dev@igsor.net>
Date: Mon, 19 Dec 2022 13:32:34 +0100
Subject: query ast file structures and essential interfaces

---
 bsfs/query/__init__.py     | 20 ++++++++++++++++++++
 bsfs/query/ast/__init__.py | 24 ++++++++++++++++++++++++
 bsfs/query/ast/filter_.py  | 30 ++++++++++++++++++++++++++++++
 bsfs/query/validator.py    | 35 +++++++++++++++++++++++++++++++++++
 4 files changed, 109 insertions(+)
 create mode 100644 bsfs/query/__init__.py
 create mode 100644 bsfs/query/ast/__init__.py
 create mode 100644 bsfs/query/ast/filter_.py
 create mode 100644 bsfs/query/validator.py

(limited to 'bsfs/query')

diff --git a/bsfs/query/__init__.py b/bsfs/query/__init__.py
new file mode 100644
index 0000000..21c7389
--- /dev/null
+++ b/bsfs/query/__init__.py
@@ -0,0 +1,20 @@
+"""
+
+Part of the BlackStar filesystem (bsfs) module.
+A copy of the license is provided with the project.
+Author: Matthias Baumgartner, 2022
+"""
+# imports
+import typing
+
+# inner-module imports
+from . import ast
+from . import validator as validate
+
+# exports
+__all__: typing.Sequence[str] = (
+    'ast',
+    'validate',
+    )
+
+## EOF ##
diff --git a/bsfs/query/ast/__init__.py b/bsfs/query/ast/__init__.py
new file mode 100644
index 0000000..0ee7385
--- /dev/null
+++ b/bsfs/query/ast/__init__.py
@@ -0,0 +1,24 @@
+"""Query AST components.
+
+The query AST consists of a Filter syntax tree.
+
+Classes beginning with an underscore (_) represent internal type hierarchies
+and should not be used for parsing. Note that the AST structures do not
+(and cannot) check semantic validity or consistency with a given schema.
+
+Part of the BlackStar filesystem (bsfs) module.
+A copy of the license is provided with the project.
+Author: Matthias Baumgartner, 2022
+"""
+# imports
+import typing
+
+# inner-module imports
+from . import filter_ as filter
+
+# exports
+__all__: typing.Sequence[str] = (
+    'filter',
+    )
+
+## EOF ##
diff --git a/bsfs/query/ast/filter_.py b/bsfs/query/ast/filter_.py
new file mode 100644
index 0000000..4086fc1
--- /dev/null
+++ b/bsfs/query/ast/filter_.py
@@ -0,0 +1,30 @@
+"""Filter AST.
+
+Part of the BlackStar filesystem (bsfs) module.
+A copy of the license is provided with the project.
+Author: Matthias Baumgartner, 2022
+"""
+# imports
+from collections import abc
+import typing
+
+# exports
+__all__ : typing.Sequence[str] = []
+
+
+## code ##
+
+class _Expression(abc.Hashable):
+    def __repr__(self) -> str:
+        """Return the expressions's string representation."""
+        return f'{typename(self)}()'
+
+    def __hash__(self) -> int:
+        """Return the expression's integer representation."""
+        return hash(type(self))
+
+    def __eq__(self, other: typing.Any) -> bool:
+        """Return True if *self* and *other* are equivalent."""
+        return isinstance(other, type(self))
+
+## EOF ##
diff --git a/bsfs/query/validator.py b/bsfs/query/validator.py
new file mode 100644
index 0000000..ac3789a
--- /dev/null
+++ b/bsfs/query/validator.py
@@ -0,0 +1,35 @@
+"""
+
+Part of the BlackStar filesystem (bsfs) module.
+A copy of the license is provided with the project.
+Author: Matthias Baumgartner, 2022
+"""
+# imports
+import typing
+
+# bsfs imports
+from bsfs import schema as bsc
+
+# inner-module imports
+from . import ast
+
+# exports
+__all__ : typing.Sequence[str] = (
+    'Filter',
+    )
+
+
+## code ##
+
+class Filter():
+
+    # schema to validate against.
+    schema: bsc.Schema
+
+    def __init__(self, schema: bsc.Schema):
+        self.schema = schema
+
+    def parse(self, node: ast.filter.FilterExpression):
+        raise NotImplementedError()
+
+## EOF ##
-- 
cgit v1.2.3


From a0f2308adcb226d28de3355bc7115a6d9b669462 Mon Sep 17 00:00:00 2001
From: Matthias Baumgartner <dev@igsor.net>
Date: Mon, 19 Dec 2022 13:40:02 +0100
Subject: import fixes

---
 bsfs/query/validator.py | 177 +++++++++++++++++++++++++++++++++++++++++++++++-
 1 file changed, 175 insertions(+), 2 deletions(-)

(limited to 'bsfs/query')

diff --git a/bsfs/query/validator.py b/bsfs/query/validator.py
index ac3789a..123b947 100644
--- a/bsfs/query/validator.py
+++ b/bsfs/query/validator.py
@@ -29,7 +29,180 @@ class Filter():
     def __init__(self, schema: bsc.Schema):
         self.schema = schema
 
-    def parse(self, node: ast.filter.FilterExpression):
-        raise NotImplementedError()
+    def parse(self, node: ast.filter.FilterExpression, subject: bsc.types._Vertex):
+        # subject is a node type
+        if not isinstance(subject, bsc.Node):
+            raise errors.ConsistencyError(f'Expected a node, found {subject}')
+        # subject exists in the schema
+        if subject not in self.schema.nodes:
+            raise errors.ConsistencyError(f'Invalid node type {subject}')
+        # root expression is valid
+        self._parse(node, subject)
+        # all tests passed
+        return True
+
+
+    def _parse_numerical_expression(self, node: ast.filter.FilterExpression, subject: bsc.types._Vertex):
+        if isinstance(node, ast.filter.And):
+            return self._and(node, subject)
+        elif isinstance(node, ast.filter.Or):
+            return self._or(node, subject)
+        elif isinstance(node, ast.filter.LessThan):
+            return self._lessThan(node, subject)
+        elif isinstance(node, ast.filter.GreaterThan):
+            return self._greaterThan(node, subject)
+        elif isinstance(node, ast.filter.Equals):
+            return self._equals(node, subject, numerical=True)
+        else:
+            raise errors.ConsistencyError(f'Expected a numerical expression, found {node}')
+
+
+    def __branch(self, node: typing.Union[ast.filter.Any, ast.filter.And], subject: bsc.types._Vertex):
+        # subject is a node type
+        if not isinstance(subject, bsc.Node):
+            raise errors.ConsistencyError(f'Expected a node, found {subject}')
+        # subject exists in the schema
+        if subject not in self.schema.nodes:
+            raise errors.ConsistencyError(f'Invalid node type {subject}')
+        # predicate is valid
+        dom, rng = self._parse_predicate_expression(node.predicate)
+        # subject is a subtype of the predicate's domain
+        if not subject <= dom:
+            raise errors.ConsistencyError(f'Expected type {dom}, found {subject}')
+        # child expression is valid
+        self._parse_filter_expression(node.expr, rng)
+
+    def _any(self, node: ast.filter.Any, subject: bsc.types._Vertex):
+        return self.__branch(node, subject)
+
+    def _all(self, node: ast.filter.All, subject: bsc.types._Vertex):
+        return self.__branch(node, subject)
+
+
+    def __agg(self, node: typing.Union[ast.filter.And, ast.filter.Or], subject: bsc.types._Vertex):
+        for expr in node:
+            # child expression is valid
+            self._parse_filter_expression(expr, subject)
+
+    def _and(self, node: ast.filter.And, subject: bsc.types._Vertex):
+        return self.__agg(node, subject)
+
+    def _or(self, node: ast.filter.Or, subject: bsc.types._Vertex):
+        return self.__agg(node, subject)
+
+
+    def _not(self, node: ast.filter.Not, subject: bsc.types._Vertex):
+        # child expression is valid
+        self._parse_filter_expression(node.expr, subject)
+
+
+    def _has(self, node: ast.filter.Has, subject: bsc.types._Vertex):
+        # subject is a node type
+        if not isinstance(subject, bsc.Node):
+            raise errors.ConsistencyError(f'Expected a node, found {subject}')
+        # subject exists in the schema
+        if subject not in self.schema.nodes:
+            raise errors.ConsistencyError(f'Invalid node type {subject}')
+        # predicate is valid
+        dom, rng = self._parse_predicate_expression(node.predicate)
+        # subject is a subtype of the predicate's domain
+        if not subject <= dom:
+            raise errors.ConsistencyError(f'Expected type {dom}, found {subject}')
+        # node.count is a numerical expression
+        self._parse_numerical_expression(node.count, self.schema.literal(ns.xsd.numerical))
+
+
+    def _equals(self, node: ast.filter.Equals, subject: bsc.types._Vertex, numerical: bool = False):
+        # subject is a literal
+        #if not isinstance(subject, bsc.Literal):
+        #    raise errors.ConsistencyError(f'Expected a literal, found {subject}')
+        if isinstance(subject, bsc.Node):
+            # FIXME: How to handle this case?
+            # FIXME: How to check if a NodeType is acceptable?
+            # FIXME: Maybe use flags to control what is expected as node identifiers?
+            from bsfs.graph.nodes import Nodes # FIXME
+            if not isinstance(node.value, Nodes) and not isinstance(node.value, URI):
+                raise errors.ConsistencyError(f'Expected a Nodes or URI, found {node.value}')
+        elif isinstance(subject, bsc.Literal):
+            # literal exists in the schema
+            if subject not in self.schema.literals:
+                raise errors.ConsistencyError(f'Invalid literal type {subject}')
+        else:
+            # FIXME:
+            raise errors.ConsistencyError(f'Expected a literal, found {subject}')
+        # node.value is numeric (if requested)
+        if numerical and not isinstance(node.value, float) and not isinstance(node.value, int):
+            raise errors.ConsistencyError(f'Expected a numerical value (int or float), found {node.value}')
+        # NOTE: We cannot check if node.value agrees with the subject since we don't know
+        # all literal types, their hierarchy, and how the backend converts datatypes.
+
+
+    def _substring(self, node: ast.filter.Substring, subject: bsc.types._Vertex):
+        # subject is a literal
+        if not isinstance(subject, bsc.Literal):
+            raise errors.ConsistencyError(f'Expected a literal, found {subject}')
+        # literal exists in the schema
+        if subject not in self.schema.literals:
+            raise errors.ConsistencyError(f'Invalid literal type {subject}')
+        # node.value matches literal datatype
+        if not subject.is_a(ns.xsd.string):
+            raise errors.ConsistencyError(f'Expected a string literal, found {subject}')
+
+
+    def _lessThan(self, node: ast.filter.LessThan, subject: bsc.types._Vertex):
+        # subject is a literal
+        if not isinstance(subject, bsc.Literal):
+            raise errors.ConsistencyError(f'Expected a literal, found {subject}')
+        # literal exists in the schema
+        if subject not in self.schema.literals:
+            raise errors.ConsistencyError(f'Invalid literal type {subject}')
+        # subject is numerical
+        if not subject.is_a(ns.xsd.numerical):
+            raise errors.ConsistencyError(f'Expected a numerical literal, found {subject}')
+
+
+    def _greaterThan(self, node: ast.filter.GreaterThan, subject: bsc.types._Vertex):
+        # subject is a literal
+        if not isinstance(subject, bsc.Literal):
+            raise errors.ConsistencyError(f'Expected a literal, found {subject}')
+        # literal exists in the schema
+        if subject not in self.schema.literals:
+            raise errors.ConsistencyError(f'Invalid literal type {subject}')
+        # subject is numerical
+        if not subject.is_a(ns.xsd.numerical):
+            raise errors.ConsistencyError(f'Expected a numerical literal, found {subject}')
+
+
+    def _predicate(self, node: ast.filter.Predicate):
+        try:
+            # predicate exists in the schema
+            pred = self.schema.predicate(node.predicate)
+        except KeyError:
+            raise errors.ConsistencyError(f'') # FIXME
+        if node.reverse:
+            return pred.range, pred.domain
+        else:
+            return pred.domain, pred.range
+
+
+    def _oneOf(self, node: ast.filter.OneOf):
+        dom, rng = None, None
+        for pred in node:
+            try:
+                # parse child expression
+                subdom, subrng = self._parse_predicate_expression(pred)
+                # domain and range must be related across all child expressions
+                if not subdom <= dom and not subdom >= dom:
+                    raise errors.ConsistencyError(f'') # FIXME
+                if not subrng <= rng and not subrng >= rng:
+                    raise errors.ConsistencyError(f'') # FIXME
+                # determine overall domain and range
+                if dom is None or subdom < dom: # pick most specific domain
+                    dom = subdom
+                if rng is None or subrng > rng: # pick most generic range
+                    rng = subrng
+            except KeyError:
+                raise errors.ConsistencyError(f'')
+        return dom, rng
 
 ## EOF ##
-- 
cgit v1.2.3


From 383fa8fd5c2e4b67089b4c5b654ebade51382f2c Mon Sep 17 00:00:00 2001
From: Matthias Baumgartner <dev@igsor.net>
Date: Thu, 22 Dec 2022 20:27:49 +0100
Subject: filter ast definition and validation

---
 bsfs/query/ast/__init__.py |   2 +-
 bsfs/query/ast/filter_.py  | 405 ++++++++++++++++++++++++++++++++++++++++++++-
 bsfs/query/validator.py    | 336 +++++++++++++++++++------------------
 3 files changed, 581 insertions(+), 162 deletions(-)

(limited to 'bsfs/query')

diff --git a/bsfs/query/ast/__init__.py b/bsfs/query/ast/__init__.py
index 0ee7385..704d051 100644
--- a/bsfs/query/ast/__init__.py
+++ b/bsfs/query/ast/__init__.py
@@ -14,7 +14,7 @@ Author: Matthias Baumgartner, 2022
 import typing
 
 # inner-module imports
-from . import filter_ as filter
+from . import filter_ as filter # pylint: disable=redefined-builtin
 
 # exports
 __all__: typing.Sequence[str] = (
diff --git a/bsfs/query/ast/filter_.py b/bsfs/query/ast/filter_.py
index 4086fc1..b129ded 100644
--- a/bsfs/query/ast/filter_.py
+++ b/bsfs/query/ast/filter_.py
@@ -1,5 +1,27 @@
 """Filter AST.
 
+Note that it is easily possible to construct an AST that is inconsistent with
+a given schema. Furthermore, it is possible to construct a semantically invalid
+AST which that cannot be parsed correctly or includes contradicting statements.
+The AST nodes do not (and cannot) check such issues.
+
+For example, consider the following AST:
+
+>>> Any(ns.bse.collection,
+...     And(
+...         Equals('hello'),
+...         Any(ns.bsm.guid, Any(ns.bsm.guid, Equals('hello'))),
+...         Any(ns.bst.label, Equals('world')),
+...         All(ns.bst.label, Not(Equals('world'))),
+...     )
+... )
+
+This AST has multiple issues that are not verified upon its creation:
+* A condition on a non-literal.
+* A Filter on a literal.
+* Conditions exclude each other
+* The predicate along the branch have incompatible domains and ranges.
+
 Part of the BlackStar filesystem (bsfs) module.
 A copy of the license is provided with the project.
 Author: Matthias Baumgartner, 2022
@@ -8,12 +30,45 @@ Author: Matthias Baumgartner, 2022
 from collections import abc
 import typing
 
+# bsfs imports
+from bsfs.utils import URI, typename, normalize_args
+
+# inner-module imports
+#from . import utils
+
 # exports
-__all__ : typing.Sequence[str] = []
+__all__ : typing.Sequence[str] = (
+    # base classes
+    'FilterExpression',
+    'PredicateExpression',
+    # predicate expressions
+    'OneOf',
+    'Predicate',
+    # branching
+    'All',
+    'Any',
+    # aggregators
+    'And',
+    'Or',
+    # value matchers
+    'Equals',
+    'Substring',
+    'EndsWith',
+    'StartsWith',
+    # range matchers
+    'GreaterThan',
+    'LessThan',
+    # misc
+    'Has',
+    'Is',
+    'Not',
+    )
 
 
 ## code ##
 
+# pylint: disable=too-few-public-methods # Many expressions use mostly magic methods
+
 class _Expression(abc.Hashable):
     def __repr__(self) -> str:
         """Return the expressions's string representation."""
@@ -27,4 +82,352 @@ class _Expression(abc.Hashable):
         """Return True if *self* and *other* are equivalent."""
         return isinstance(other, type(self))
 
+
+class FilterExpression(_Expression):
+    """Generic Filter expression."""
+
+
+class PredicateExpression(_Expression):
+    """Generic Predicate expression."""
+
+
+class _Branch(FilterExpression):
+    """Branch the filter along a predicate."""
+
+    # predicate to follow.
+    predicate: PredicateExpression
+
+    # child expression to evaluate.
+    expr: FilterExpression
+
+    def __init__(
+            self,
+            predicate: typing.Union[PredicateExpression, URI],
+            expr: FilterExpression,
+            ):
+        # process predicate argument
+        if isinstance(predicate, URI):
+            predicate = Predicate(predicate)
+        elif not isinstance(predicate, PredicateExpression):
+            raise TypeError(predicate)
+        # process expression argument
+        if not isinstance(expr, FilterExpression):
+            raise TypeError(expr)
+        # assign members
+        self.predicate = predicate
+        self.expr = expr
+
+    def __repr__(self) -> str:
+        return f'{typename(self)}({self.predicate}, {self.expr})'
+
+    def __hash__(self) -> int:
+        return hash((super().__hash__(), self.predicate, self.expr))
+
+    def __eq__(self, other) -> bool:
+        return super().__eq__(other) \
+           and self.predicate == other.predicate \
+           and self.expr == other.expr
+
+class Any(_Branch):
+    """Any (and at least one) triple matches."""
+
+
+class All(_Branch):
+    """All (and at least one) triples match."""
+
+
+class _Agg(FilterExpression, abc.Collection):
+    """Combine multiple expressions."""
+
+    # child expressions
+    expr: typing.Set[FilterExpression]
+
+    def __init__(
+            self,
+            *expr: typing.Union[FilterExpression,
+                                typing.Iterable[FilterExpression],
+                                typing.Iterator[FilterExpression]]
+            ):
+        # unfold arguments
+        unfolded = set(normalize_args(*expr))
+        # check type
+        if not all(isinstance(e, FilterExpression) for e in unfolded):
+            raise TypeError(expr)
+        # assign member
+        self.expr = unfolded
+
+    def __contains__(self, expr: typing.Any) -> bool:
+        """Return True if *expr* is among the child expressions."""
+        return expr in self.expr
+
+    def __iter__(self) -> typing.Iterator[FilterExpression]:
+        """Iterator over child expressions."""
+        return iter(self.expr)
+
+    def __len__(self) -> int:
+        """Number of child expressions."""
+        return len(self.expr)
+
+    def __repr__(self) -> str:
+        return f'{typename(self)}({self.expr})'
+
+    def __hash__(self) -> int:
+        return hash((super().__hash__(), tuple(self.expr))) # FIXME: Unique hash of different orders over self.expr
+
+    def __eq__(self, other) -> bool:
+        return super().__eq__(other) and self.expr == other.expr
+
+
+class And(_Agg):
+    """All conditions match."""
+
+
+class Or(_Agg):
+    """At least one condition matches."""
+
+
+class Not(FilterExpression):
+    """Invert a statement."""
+
+    # child expression
+    expr: FilterExpression
+
+    def __init__(self, expr: FilterExpression):
+        # check argument
+        if not isinstance(expr, FilterExpression):
+            raise TypeError(expr)
+        # assign member
+        self.expr = expr
+
+    def __repr__(self) -> str:
+        return f'{typename(self)}({self.expr})'
+
+    def __hash__(self) -> int:
+        return hash((super().__hash__(), self.expr))
+
+    def __eq__(self, other: typing.Any) -> bool:
+        return super().__eq__(other) and self.expr == other.expr
+
+
+class Has(FilterExpression):
+    """Has predicate N times"""
+
+    # predicate to follow.
+    predicate: PredicateExpression
+
+    # target count
+    count: FilterExpression
+
+    def __init__(
+            self,
+            predicate: typing.Union[PredicateExpression, URI],
+            count: typing.Optional[typing.Union[FilterExpression, int]] = None,
+            ):
+        # check predicate
+        if isinstance(predicate, URI):
+            predicate = Predicate(predicate)
+        elif not isinstance(predicate, PredicateExpression):
+            raise TypeError(predicate)
+        # check count
+        if count is None:
+            count = GreaterThan(1, strict=False)
+        elif isinstance(count, int):
+            count = Equals(count)
+        elif not isinstance(count, FilterExpression):
+            raise TypeError(count)
+        # assign members
+        self.predicate = predicate
+        self.count = count
+
+    def __repr__(self) -> str:
+        return f'{typename(self)}({self.predicate}, {self.count})'
+
+    def __hash__(self) -> int:
+        return hash((super().__hash__(), self.predicate, self.count))
+
+    def __eq__(self, other) -> bool:
+        return super().__eq__(other) \
+           and self.predicate == other.predicate \
+           and self.count == other.count
+
+
+class _Value(FilterExpression):
+    """
+    """
+
+    # target value.
+    value: typing.Any
+
+    def __init__(self, value: typing.Any):
+        self.value = value
+
+    def __repr__(self) -> str:
+        return f'{typename(self)}({self.value})'
+
+    def __hash__(self) -> int:
+        return hash((super().__hash__(), self.value))
+
+    def __eq__(self, other) -> bool:
+        return super().__eq__(other) and self.value == other.value
+
+
+class Is(_Value):
+    """Match the URI of a node."""
+
+
+class Equals(_Value):
+    """Value matches exactly.
+    NOTE: Value format must correspond to literal type; can be a string, a number, or a Node
+    """
+
+
+class Substring(_Value):
+    """Value matches a substring
+    NOTE: value format must be a string
+    """
+
+
+class StartsWith(_Value):
+    """Value begins with a given string."""
+
+
+class EndsWith(_Value):
+    """Value ends with a given string."""
+
+
+class _Bounded(FilterExpression):
+    """
+    """
+
+    # bound.
+    threshold: float
+
+    # closed (True) or open (False) bound.
+    strict: bool
+
+    def __init__(
+            self,
+            threshold: float,
+            strict: bool = True,
+            ):
+        self.threshold = float(threshold)
+        self.strict = bool(strict)
+
+    def __repr__(self) -> str:
+        return f'{typename(self)}({self.threshold}, {self.strict})'
+
+    def __hash__(self) -> int:
+        return hash((super().__hash__(), self.threshold, self.strict))
+
+    def __eq__(self, other) -> bool:
+        return super().__eq__(other) \
+           and self.threshold == other.threshold \
+           and self.strict == other.strict
+
+
+
+class LessThan(_Bounded):
+    """Value is (strictly) smaller than threshold.
+    NOTE: only on numerical literals
+    """
+
+
+class GreaterThan(_Bounded):
+    """Value is (strictly) larger than threshold
+    NOTE: only on numerical literals
+    """
+
+
+class Predicate(PredicateExpression):
+    """A single predicate."""
+
+    # predicate URI
+    predicate: URI
+
+    # reverse the predicate's direction
+    reverse: bool
+
+    def __init__(
+            self,
+            predicate: URI,
+            reverse: typing.Optional[bool] = False,
+            ):
+        # check arguments
+        if not isinstance(predicate, URI):
+            raise TypeError(predicate)
+        # assign members
+        self.predicate = predicate
+        self.reverse = bool(reverse)
+
+    def __repr__(self) -> str:
+        return f'{typename(self)}({self.predicate}, {self.reverse})'
+
+    def __hash__(self) -> int:
+        return hash((super().__hash__(), self.predicate, self.reverse))
+
+    def __eq__(self, other) -> bool:
+        return super().__eq__(other) \
+           and self.predicate == other.predicate \
+           and self.reverse == other.reverse
+
+
+class OneOf(PredicateExpression, abc.Collection):
+    """A set of predicate alternatives.
+
+    The predicates' domains must be ascendants or descendants of each other.
+    The overall domain is the most specific one.
+
+    The predicate's domains must be ascendants or descendants of each other.
+    The overall range is the most generic one.
+    """
+
+    # predicate alternatives
+    expr: typing.Set[PredicateExpression]
+
+    def __init__(self, *expr: typing.Union[PredicateExpression, URI]):
+        # unfold arguments
+        unfolded = set(normalize_args(*expr)) # type: ignore [arg-type] # this is getting too complex...
+        # check arguments
+        if len(unfolded) == 0:
+            raise AttributeError('expected at least one expression, found none')
+        # ensure PredicateExpression
+        unfolded = {Predicate(e) if isinstance(e, URI) else e for e in unfolded}
+        # check type
+        if not all(isinstance(e, PredicateExpression) for e in unfolded):
+            raise TypeError(expr)
+        # assign member
+        self.expr = unfolded
+
+    def __contains__(self, expr: typing.Any) -> bool:
+        """Return True if *expr* is among the child expressions."""
+        return expr in self.expr
+
+    def __iter__(self) -> typing.Iterator[PredicateExpression]:
+        """Iterator over child expressions."""
+        return iter(self.expr)
+
+    def __len__(self) -> int:
+        """Number of child expressions."""
+        return len(self.expr)
+
+    def __repr__(self) -> str:
+        return f'{typename(self)}({self.expr})'
+
+    def __hash__(self) -> int:
+        return hash((super().__hash__(), tuple(self.expr))) # FIXME: Unique hash of different orders over self.expr
+
+    def __eq__(self, other) -> bool:
+        return super().__eq__(other) and self.expr == other.expr
+
+
+# Helpers
+
+def IsIn(*values): # pylint: disable=invalid-name # explicitly mimics an expression
+    """Match any of the given URIs."""
+    return Or(Is(value) for value in normalize_args(*values))
+
+def IsNotIn(*values): # pylint: disable=invalid-name # explicitly mimics an expression
+    """Match none of the given URIs."""
+    return Not(IsIn(*values))
+
 ## EOF ##
diff --git a/bsfs/query/validator.py b/bsfs/query/validator.py
index 123b947..352203a 100644
--- a/bsfs/query/validator.py
+++ b/bsfs/query/validator.py
@@ -9,6 +9,8 @@ import typing
 
 # bsfs imports
 from bsfs import schema as bsc
+from bsfs.namespace import ns
+from bsfs.utils import errors, typename
 
 # inner-module imports
 from . import ast
@@ -22,6 +24,18 @@ __all__ : typing.Sequence[str] = (
 ## code ##
 
 class Filter():
+    """Validate a `bsfs.query.ast.filter` query's structure and schema compliance.
+
+    * Conditions (Bounded, Value) can only be applied on literals
+    * Branches, Id, and Has can only be applied on nodes
+    * Predicates' domain and range must match
+    * Predicate paths must follow the schema
+    * Referenced types are present in the schema
+
+    """
+
+    # vertex types
+    T_VERTEX = typing.Union[bsc.Node, bsc.Literal] # FIXME: Shouldn't this be in the schema?
 
     # schema to validate against.
     schema: bsc.Schema
@@ -29,180 +43,182 @@ class Filter():
     def __init__(self, schema: bsc.Schema):
         self.schema = schema
 
-    def parse(self, node: ast.filter.FilterExpression, subject: bsc.types._Vertex):
-        # subject is a node type
-        if not isinstance(subject, bsc.Node):
-            raise errors.ConsistencyError(f'Expected a node, found {subject}')
-        # subject exists in the schema
-        if subject not in self.schema.nodes:
-            raise errors.ConsistencyError(f'Invalid node type {subject}')
-        # root expression is valid
-        self._parse(node, subject)
+    def __call__(self, root_type: bsc.Node, query: ast.filter.FilterExpression):
+        """Validate a filter *query*, assuming the subject having *root_type*.
+
+        Raises a `bsfs.utils.errors.ConsistencyError` if the query violates the schema.
+        Raises a `bsfs.utils.errors.BackendError` if the query structure is invalid.
+
+        """
+        # root_type must be a schema.Node
+        if not isinstance(root_type, bsc.Node):
+            raise TypeError(f'Expected a node, found {typename(root_type)}')
+        # root_type must exist in the schema
+        if root_type not in self.schema.nodes():
+            raise errors.ConsistencyError(f'{root_type} is not defined in the schema')
+        # check root expression
+        self._parse_filter_expression(root_type, query)
         # all tests passed
         return True
 
 
-    def _parse_numerical_expression(self, node: ast.filter.FilterExpression, subject: bsc.types._Vertex):
-        if isinstance(node, ast.filter.And):
-            return self._and(node, subject)
-        elif isinstance(node, ast.filter.Or):
-            return self._or(node, subject)
-        elif isinstance(node, ast.filter.LessThan):
-            return self._lessThan(node, subject)
-        elif isinstance(node, ast.filter.GreaterThan):
-            return self._greaterThan(node, subject)
-        elif isinstance(node, ast.filter.Equals):
-            return self._equals(node, subject, numerical=True)
-        else:
-            raise errors.ConsistencyError(f'Expected a numerical expression, found {node}')
-
-
-    def __branch(self, node: typing.Union[ast.filter.Any, ast.filter.And], subject: bsc.types._Vertex):
-        # subject is a node type
-        if not isinstance(subject, bsc.Node):
-            raise errors.ConsistencyError(f'Expected a node, found {subject}')
-        # subject exists in the schema
-        if subject not in self.schema.nodes:
-            raise errors.ConsistencyError(f'Invalid node type {subject}')
-        # predicate is valid
-        dom, rng = self._parse_predicate_expression(node.predicate)
-        # subject is a subtype of the predicate's domain
-        if not subject <= dom:
-            raise errors.ConsistencyError(f'Expected type {dom}, found {subject}')
-        # child expression is valid
-        self._parse_filter_expression(node.expr, rng)
+    ## routing methods
+
+    def _parse_filter_expression(self, type_: T_VERTEX, node: ast.filter.FilterExpression):
+        """Route *node* to the handler of the respective FilterExpression subclass."""
+        if isinstance(node, ast.filter.Is):
+            return self._is(type_, node)
+        if isinstance(node, ast.filter.Not):
+            return self._not(type_, node)
+        if isinstance(node, ast.filter.Has):
+            return self._has(type_, node)
+        if isinstance(node, (ast.filter.Any, ast.filter.All)):
+            return self._branch(type_, node)
+        if isinstance(node, (ast.filter.And, ast.filter.Or)):
+            return self._agg(type_, node)
+        if isinstance(node, (ast.filter.Equals, ast.filter.Substring, ast.filter.StartsWith, ast.filter.EndsWith)):
+            return self._value(type_, node)
+        if isinstance(node, (ast.filter.LessThan, ast.filter.GreaterThan)):
+            return self._bounded(type_, node)
+        # invalid node
+        raise errors.BackendError(f'expected filter expression, found {node}')
+
+    def _parse_predicate_expression(self, node: ast.filter.PredicateExpression) -> typing.Tuple[T_VERTEX, T_VERTEX]:
+        """Route *node* to the handler of the respective PredicateExpression subclass."""
+        if isinstance(node, ast.filter.Predicate):
+            return self._predicate(node)
+        if isinstance(node, ast.filter.OneOf):
+            return self._one_of(node)
+        # invalid node
+        raise errors.BackendError(f'expected predicate expression, found {node}')
+
+
+    ## predicate expressions
+
+    def _predicate(self, node: ast.filter.Predicate) -> typing.Tuple[T_VERTEX, T_VERTEX]:
+        # predicate exists in the schema
+        if not self.schema.has_predicate(node.predicate):
+            raise errors.ConsistencyError(f'predicate {node.predicate} is not in the schema')
+        # determine domain and range
+        pred = self.schema.predicate(node.predicate)
+        dom, rng = pred.domain, pred.range
+        if rng is None:
+            # FIXME: It is a design error that Predicates can have a None range...
+            raise errors.BackendError(f'predicate {pred} has no range')
+        if node.reverse:
+            dom, rng = rng, dom # type: ignore [assignment] # variable re-use confuses mypy
+        # return domain and range
+        return dom, rng
 
-    def _any(self, node: ast.filter.Any, subject: bsc.types._Vertex):
-        return self.__branch(node, subject)
+    def _one_of(self, node: ast.filter.OneOf) -> typing.Tuple[T_VERTEX, T_VERTEX]:
+        # determine domain and range types
+        # NOTE: select the most specific domain and the most generic range
+        dom, rng = None, None
+        for pred in node:
+            # parse child expression
+            subdom, subrng = self._parse_predicate_expression(pred)
+            try:
+                # determine overall domain
+                if dom is None or subdom < dom: # pick most specific domain
+                    dom = subdom
+                # domains must be related across all child expressions
+                if not subdom <= dom and not subdom >= dom:
+                    raise errors.ConsistencyError(f'domains {subdom} and {dom} are not related')
+            except TypeError as err: # compared literal vs. node
+                raise errors.ConsistencyError(f'domains {subdom} and {dom} are not of the same type') from err
 
-    def _all(self, node: ast.filter.All, subject: bsc.types._Vertex):
-        return self.__branch(node, subject)
+            try:
+                # determine overall range
+                if rng is None or subrng > rng: # pick most generic range
+                    rng = subrng
+                # ranges must be related across all child expressions
+                if not subrng <= rng and not subrng >= rng:
+                    raise errors.ConsistencyError(f'ranges {subrng} and {rng} are not related')
+            except TypeError as err: # compared literal vs. node
+                raise errors.ConsistencyError(f'ranges {subrng} and {rng} are not of the same type') from err
+        # check domain and range
+        if dom is None or rng is None:
+            # OneOf guarantees at least one expression, these two cases cannot happen
+            raise errors.UnreachableError()
+        # return domain and range
+        return dom, rng
 
 
-    def __agg(self, node: typing.Union[ast.filter.And, ast.filter.Or], subject: bsc.types._Vertex):
+    ## intermediates
+
+    def _branch(self, type_: T_VERTEX, node: ast.filter._Branch):
+        # type is a Node
+        if not isinstance(type_, bsc.Node):
+            raise errors.ConsistencyError(f'expected a Node, found {type_}')
+        # type exists in the schema
+        # FIXME: Isn't it actually guaranteed that the type (except the root type) is part of the schema?
+        # all types can be traced back to (a) root_type, (b) predicate, or (c) manually set (e.g. in _is).
+        # For (a), we do (and have to) perform a check. For (c), the code base should be consistent throughout
+        # the module, so this is an assumption that has to be ensured in schema.Schema. For (b), we know (and
+        # check) that the predicate is in the schema, hence all node/literals derived from it are also in the
+        # schema by construction of the schema.Schema class. So, why do we check this every time?
+        if type_ not in self.schema.nodes():
+            raise errors.ConsistencyError(f'node {type_} is not in the schema')
+        # predicate is valid
+        dom, rng = self._parse_predicate_expression(node.predicate)
+        # type_ is a subtype of the predicate's domain
+        if not type_ <= dom:
+            raise errors.ConsistencyError(f'expected type {dom} or subtype thereof, found {type_}')
+        # child expression is valid
+        self._parse_filter_expression(rng, node.expr)
+
+    def _agg(self, type_: T_VERTEX, node: ast.filter._Agg):
         for expr in node:
             # child expression is valid
-            self._parse_filter_expression(expr, subject)
-
-    def _and(self, node: ast.filter.And, subject: bsc.types._Vertex):
-        return self.__agg(node, subject)
-
-    def _or(self, node: ast.filter.Or, subject: bsc.types._Vertex):
-        return self.__agg(node, subject)
-
+            self._parse_filter_expression(type_, expr)
 
-    def _not(self, node: ast.filter.Not, subject: bsc.types._Vertex):
+    def _not(self, type_: T_VERTEX, node: ast.filter.Not):
         # child expression is valid
-        self._parse_filter_expression(node.expr, subject)
-
-
-    def _has(self, node: ast.filter.Has, subject: bsc.types._Vertex):
-        # subject is a node type
-        if not isinstance(subject, bsc.Node):
-            raise errors.ConsistencyError(f'Expected a node, found {subject}')
-        # subject exists in the schema
-        if subject not in self.schema.nodes:
-            raise errors.ConsistencyError(f'Invalid node type {subject}')
+        self._parse_filter_expression(type_, node.expr)
+
+    def _has(self, type_: T_VERTEX, node: ast.filter.Has):
+        # type is a Node
+        if not isinstance(type_, bsc.Node):
+            raise errors.ConsistencyError(f'expected a Node, found {type_}')
+        # type exists in the schema
+        if type_ not in self.schema.nodes():
+            raise errors.ConsistencyError(f'node {type_} is not in the schema')
         # predicate is valid
-        dom, rng = self._parse_predicate_expression(node.predicate)
-        # subject is a subtype of the predicate's domain
-        if not subject <= dom:
-            raise errors.ConsistencyError(f'Expected type {dom}, found {subject}')
+        dom, _= self._parse_predicate_expression(node.predicate)
+        # type_ is a subtype of the predicate's domain
+        if not type_ <= dom:
+            raise errors.ConsistencyError(f'expected type {dom}, found {type_}')
         # node.count is a numerical expression
-        self._parse_numerical_expression(node.count, self.schema.literal(ns.xsd.numerical))
-
-
-    def _equals(self, node: ast.filter.Equals, subject: bsc.types._Vertex, numerical: bool = False):
-        # subject is a literal
-        #if not isinstance(subject, bsc.Literal):
-        #    raise errors.ConsistencyError(f'Expected a literal, found {subject}')
-        if isinstance(subject, bsc.Node):
-            # FIXME: How to handle this case?
-            # FIXME: How to check if a NodeType is acceptable?
-            # FIXME: Maybe use flags to control what is expected as node identifiers?
-            from bsfs.graph.nodes import Nodes # FIXME
-            if not isinstance(node.value, Nodes) and not isinstance(node.value, URI):
-                raise errors.ConsistencyError(f'Expected a Nodes or URI, found {node.value}')
-        elif isinstance(subject, bsc.Literal):
-            # literal exists in the schema
-            if subject not in self.schema.literals:
-                raise errors.ConsistencyError(f'Invalid literal type {subject}')
-        else:
-            # FIXME:
-            raise errors.ConsistencyError(f'Expected a literal, found {subject}')
-        # node.value is numeric (if requested)
-        if numerical and not isinstance(node.value, float) and not isinstance(node.value, int):
-            raise errors.ConsistencyError(f'Expected a numerical value (int or float), found {node.value}')
-        # NOTE: We cannot check if node.value agrees with the subject since we don't know
-        # all literal types, their hierarchy, and how the backend converts datatypes.
-
-
-    def _substring(self, node: ast.filter.Substring, subject: bsc.types._Vertex):
-        # subject is a literal
-        if not isinstance(subject, bsc.Literal):
-            raise errors.ConsistencyError(f'Expected a literal, found {subject}')
-        # literal exists in the schema
-        if subject not in self.schema.literals:
-            raise errors.ConsistencyError(f'Invalid literal type {subject}')
-        # node.value matches literal datatype
-        if not subject.is_a(ns.xsd.string):
-            raise errors.ConsistencyError(f'Expected a string literal, found {subject}')
-
-
-    def _lessThan(self, node: ast.filter.LessThan, subject: bsc.types._Vertex):
-        # subject is a literal
-        if not isinstance(subject, bsc.Literal):
-            raise errors.ConsistencyError(f'Expected a literal, found {subject}')
-        # literal exists in the schema
-        if subject not in self.schema.literals:
-            raise errors.ConsistencyError(f'Invalid literal type {subject}')
-        # subject is numerical
-        if not subject.is_a(ns.xsd.numerical):
-            raise errors.ConsistencyError(f'Expected a numerical literal, found {subject}')
-
-
-    def _greaterThan(self, node: ast.filter.GreaterThan, subject: bsc.types._Vertex):
-        # subject is a literal
-        if not isinstance(subject, bsc.Literal):
-            raise errors.ConsistencyError(f'Expected a literal, found {subject}')
-        # literal exists in the schema
-        if subject not in self.schema.literals:
-            raise errors.ConsistencyError(f'Invalid literal type {subject}')
-        # subject is numerical
-        if not subject.is_a(ns.xsd.numerical):
-            raise errors.ConsistencyError(f'Expected a numerical literal, found {subject}')
-
-
-    def _predicate(self, node: ast.filter.Predicate):
-        try:
-            # predicate exists in the schema
-            pred = self.schema.predicate(node.predicate)
-        except KeyError:
-            raise errors.ConsistencyError(f'') # FIXME
-        if node.reverse:
-            return pred.range, pred.domain
-        else:
-            return pred.domain, pred.range
-
+        # FIXME: We have to ensure that ns.xsd.integer is always known in the schema!
+        self._parse_filter_expression(self.schema.literal(ns.xsd.integer), node.count)
+
+
+    ## conditions
+
+    def _is(self, type_: T_VERTEX, node: ast.filter.Is): # pylint: disable=unused-argument # (node)
+        if not isinstance(type_, bsc.Node):
+            raise errors.ConsistencyError(f'expected a Node, found {type_}')
+        if type_ not in self.schema.nodes():
+            raise errors.ConsistencyError(f'node {type_} is not in the schema')
+
+    def _value(self, type_: T_VERTEX, node: ast.filter._Value): # pylint: disable=unused-argument # (node)
+        # type is a literal
+        if not isinstance(type_, bsc.Literal):
+            raise errors.ConsistencyError(f'expected a Literal, found {type_}')
+        # type exists in the schema
+        if type_ not in self.schema.literals():
+            raise errors.ConsistencyError(f'literal {type_} is not in the schema')
+        # FIXME: Check if node.value corresponds to type_
+        # FIXME: A specific literal might be requested (i.e., a numeric type when used in Has)
+
+    def _bounded(self, type_: T_VERTEX, node: ast.filter._Bounded): # pylint: disable=unused-argument # (node)
+        # type is a literal
+        if not isinstance(type_, bsc.Literal):
+            raise errors.ConsistencyError(f'expected a Literal, found {type_}')
+        # type exists in the schema
+        if type_ not in self.schema.literals():
+            raise errors.ConsistencyError(f'literal {type_} is not in the schema')
+        # FIXME: Check if node.value corresponds to type_
 
-    def _oneOf(self, node: ast.filter.OneOf):
-        dom, rng = None, None
-        for pred in node:
-            try:
-                # parse child expression
-                subdom, subrng = self._parse_predicate_expression(pred)
-                # domain and range must be related across all child expressions
-                if not subdom <= dom and not subdom >= dom:
-                    raise errors.ConsistencyError(f'') # FIXME
-                if not subrng <= rng and not subrng >= rng:
-                    raise errors.ConsistencyError(f'') # FIXME
-                # determine overall domain and range
-                if dom is None or subdom < dom: # pick most specific domain
-                    dom = subdom
-                if rng is None or subrng > rng: # pick most generic range
-                    rng = subrng
-            except KeyError:
-                raise errors.ConsistencyError(f'')
-        return dom, rng
 
 ## EOF ##
-- 
cgit v1.2.3


From 3940cb3c79937a431ba2ae3b57fd0c6c2ccfff33 Mon Sep 17 00:00:00 2001
From: Matthias Baumgartner <dev@igsor.net>
Date: Thu, 12 Jan 2023 10:12:43 +0100
Subject: use Vertex in type annotations

---
 bsfs/query/validator.py | 25 +++++++++++--------------
 1 file changed, 11 insertions(+), 14 deletions(-)

(limited to 'bsfs/query')

diff --git a/bsfs/query/validator.py b/bsfs/query/validator.py
index 352203a..6bf1b72 100644
--- a/bsfs/query/validator.py
+++ b/bsfs/query/validator.py
@@ -34,9 +34,6 @@ class Filter():
 
     """
 
-    # vertex types
-    T_VERTEX = typing.Union[bsc.Node, bsc.Literal] # FIXME: Shouldn't this be in the schema?
-
     # schema to validate against.
     schema: bsc.Schema
 
@@ -64,7 +61,7 @@ class Filter():
 
     ## routing methods
 
-    def _parse_filter_expression(self, type_: T_VERTEX, node: ast.filter.FilterExpression):
+    def _parse_filter_expression(self, type_: bsc.Vertex, node: ast.filter.FilterExpression):
         """Route *node* to the handler of the respective FilterExpression subclass."""
         if isinstance(node, ast.filter.Is):
             return self._is(type_, node)
@@ -83,7 +80,7 @@ class Filter():
         # invalid node
         raise errors.BackendError(f'expected filter expression, found {node}')
 
-    def _parse_predicate_expression(self, node: ast.filter.PredicateExpression) -> typing.Tuple[T_VERTEX, T_VERTEX]:
+    def _parse_predicate_expression(self, node: ast.filter.PredicateExpression) -> typing.Tuple[bsc.Vertex, bsc.Vertex]:
         """Route *node* to the handler of the respective PredicateExpression subclass."""
         if isinstance(node, ast.filter.Predicate):
             return self._predicate(node)
@@ -95,7 +92,7 @@ class Filter():
 
     ## predicate expressions
 
-    def _predicate(self, node: ast.filter.Predicate) -> typing.Tuple[T_VERTEX, T_VERTEX]:
+    def _predicate(self, node: ast.filter.Predicate) -> typing.Tuple[bsc.Vertex, bsc.Vertex]:
         # predicate exists in the schema
         if not self.schema.has_predicate(node.predicate):
             raise errors.ConsistencyError(f'predicate {node.predicate} is not in the schema')
@@ -110,7 +107,7 @@ class Filter():
         # return domain and range
         return dom, rng
 
-    def _one_of(self, node: ast.filter.OneOf) -> typing.Tuple[T_VERTEX, T_VERTEX]:
+    def _one_of(self, node: ast.filter.OneOf) -> typing.Tuple[bsc.Vertex, bsc.Vertex]:
         # determine domain and range types
         # NOTE: select the most specific domain and the most generic range
         dom, rng = None, None
@@ -146,7 +143,7 @@ class Filter():
 
     ## intermediates
 
-    def _branch(self, type_: T_VERTEX, node: ast.filter._Branch):
+    def _branch(self, type_: bsc.Vertex, node: ast.filter._Branch):
         # type is a Node
         if not isinstance(type_, bsc.Node):
             raise errors.ConsistencyError(f'expected a Node, found {type_}')
@@ -167,16 +164,16 @@ class Filter():
         # child expression is valid
         self._parse_filter_expression(rng, node.expr)
 
-    def _agg(self, type_: T_VERTEX, node: ast.filter._Agg):
+    def _agg(self, type_: bsc.Vertex, node: ast.filter._Agg):
         for expr in node:
             # child expression is valid
             self._parse_filter_expression(type_, expr)
 
-    def _not(self, type_: T_VERTEX, node: ast.filter.Not):
+    def _not(self, type_: bsc.Vertex, node: ast.filter.Not):
         # child expression is valid
         self._parse_filter_expression(type_, node.expr)
 
-    def _has(self, type_: T_VERTEX, node: ast.filter.Has):
+    def _has(self, type_: bsc.Vertex, node: ast.filter.Has):
         # type is a Node
         if not isinstance(type_, bsc.Node):
             raise errors.ConsistencyError(f'expected a Node, found {type_}')
@@ -195,13 +192,13 @@ class Filter():
 
     ## conditions
 
-    def _is(self, type_: T_VERTEX, node: ast.filter.Is): # pylint: disable=unused-argument # (node)
+    def _is(self, type_: bsc.Vertex, node: ast.filter.Is): # pylint: disable=unused-argument # (node)
         if not isinstance(type_, bsc.Node):
             raise errors.ConsistencyError(f'expected a Node, found {type_}')
         if type_ not in self.schema.nodes():
             raise errors.ConsistencyError(f'node {type_} is not in the schema')
 
-    def _value(self, type_: T_VERTEX, node: ast.filter._Value): # pylint: disable=unused-argument # (node)
+    def _value(self, type_: bsc.Vertex, node: ast.filter._Value): # pylint: disable=unused-argument # (node)
         # type is a literal
         if not isinstance(type_, bsc.Literal):
             raise errors.ConsistencyError(f'expected a Literal, found {type_}')
@@ -211,7 +208,7 @@ class Filter():
         # FIXME: Check if node.value corresponds to type_
         # FIXME: A specific literal might be requested (i.e., a numeric type when used in Has)
 
-    def _bounded(self, type_: T_VERTEX, node: ast.filter._Bounded): # pylint: disable=unused-argument # (node)
+    def _bounded(self, type_: bsc.Vertex, node: ast.filter._Bounded): # pylint: disable=unused-argument # (node)
         # type is a literal
         if not isinstance(type_, bsc.Literal):
             raise errors.ConsistencyError(f'expected a Literal, found {type_}')
-- 
cgit v1.2.3


From 7e7284d5fc01c0a081aa79d67736f51069864a7d Mon Sep 17 00:00:00 2001
From: Matthias Baumgartner <dev@igsor.net>
Date: Thu, 12 Jan 2023 10:22:59 +0100
Subject: adapt to non-optional range in query checks

---
 bsfs/query/validator.py | 14 +++++---------
 1 file changed, 5 insertions(+), 9 deletions(-)

(limited to 'bsfs/query')

diff --git a/bsfs/query/validator.py b/bsfs/query/validator.py
index 6bf1b72..b04a9bf 100644
--- a/bsfs/query/validator.py
+++ b/bsfs/query/validator.py
@@ -98,10 +98,9 @@ class Filter():
             raise errors.ConsistencyError(f'predicate {node.predicate} is not in the schema')
         # determine domain and range
         pred = self.schema.predicate(node.predicate)
+        if not isinstance(pred.range, (bsc.Node, bsc.Literal)):
+            raise errors.BackendError(f'the range of predicate {pred} is undefined')
         dom, rng = pred.domain, pred.range
-        if rng is None:
-            # FIXME: It is a design error that Predicates can have a None range...
-            raise errors.BackendError(f'predicate {pred} has no range')
         if node.reverse:
             dom, rng = rng, dom # type: ignore [assignment] # variable re-use confuses mypy
         # return domain and range
@@ -133,12 +132,9 @@ class Filter():
                     raise errors.ConsistencyError(f'ranges {subrng} and {rng} are not related')
             except TypeError as err: # compared literal vs. node
                 raise errors.ConsistencyError(f'ranges {subrng} and {rng} are not of the same type') from err
-        # check domain and range
-        if dom is None or rng is None:
-            # OneOf guarantees at least one expression, these two cases cannot happen
-            raise errors.UnreachableError()
-        # return domain and range
-        return dom, rng
+        # OneOf guarantees at least one expression, dom and rng are always bsc.Vertex.
+        # mypy does not realize this, hence we ignore the warning.
+        return dom, rng # type: ignore [return-value]
 
 
     ## intermediates
-- 
cgit v1.2.3


From b0ff4ed674ad78bf113c3cc0c2ccd187ccb91048 Mon Sep 17 00:00:00 2001
From: Matthias Baumgartner <dev@igsor.net>
Date: Thu, 12 Jan 2023 10:26:30 +0100
Subject: number literal adaptions

---
 bsfs/query/validator.py | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

(limited to 'bsfs/query')

diff --git a/bsfs/query/validator.py b/bsfs/query/validator.py
index b04a9bf..75b51ca 100644
--- a/bsfs/query/validator.py
+++ b/bsfs/query/validator.py
@@ -182,8 +182,7 @@ class Filter():
         if not type_ <= dom:
             raise errors.ConsistencyError(f'expected type {dom}, found {type_}')
         # node.count is a numerical expression
-        # FIXME: We have to ensure that ns.xsd.integer is always known in the schema!
-        self._parse_filter_expression(self.schema.literal(ns.xsd.integer), node.count)
+        self._parse_filter_expression(self.schema.literal(ns.bsfs.Number), node.count)
 
 
     ## conditions
@@ -211,6 +210,9 @@ class Filter():
         # type exists in the schema
         if type_ not in self.schema.literals():
             raise errors.ConsistencyError(f'literal {type_} is not in the schema')
+        # type must be a numerical
+        if not type_ <= self.schema.literal(ns.bsfs.Number):
+            raise errors.ConsistencyError(f'expected a number type, found {type_}')
         # FIXME: Check if node.value corresponds to type_
 
 
-- 
cgit v1.2.3


From 60257ed3c2aa6ea2891f362a691bde9d7ef17831 Mon Sep 17 00:00:00 2001
From: Matthias Baumgartner <dev@igsor.net>
Date: Fri, 13 Jan 2023 12:22:34 +0100
Subject: schema type comparison across classes

---
 bsfs/query/validator.py | 31 ++++++++++++-------------------
 1 file changed, 12 insertions(+), 19 deletions(-)

(limited to 'bsfs/query')

diff --git a/bsfs/query/validator.py b/bsfs/query/validator.py
index 75b51ca..ecea951 100644
--- a/bsfs/query/validator.py
+++ b/bsfs/query/validator.py
@@ -113,25 +113,18 @@ class Filter():
         for pred in node:
             # parse child expression
             subdom, subrng = self._parse_predicate_expression(pred)
-            try:
-                # determine overall domain
-                if dom is None or subdom < dom: # pick most specific domain
-                    dom = subdom
-                # domains must be related across all child expressions
-                if not subdom <= dom and not subdom >= dom:
-                    raise errors.ConsistencyError(f'domains {subdom} and {dom} are not related')
-            except TypeError as err: # compared literal vs. node
-                raise errors.ConsistencyError(f'domains {subdom} and {dom} are not of the same type') from err
-
-            try:
-                # determine overall range
-                if rng is None or subrng > rng: # pick most generic range
-                    rng = subrng
-                # ranges must be related across all child expressions
-                if not subrng <= rng and not subrng >= rng:
-                    raise errors.ConsistencyError(f'ranges {subrng} and {rng} are not related')
-            except TypeError as err: # compared literal vs. node
-                raise errors.ConsistencyError(f'ranges {subrng} and {rng} are not of the same type') from err
+            # determine overall domain
+            if dom is None or subdom < dom: # pick most specific domain
+                dom = subdom
+            # domains must be related across all child expressions
+            if not subdom <= dom and not subdom >= dom:
+                raise errors.ConsistencyError(f'domains {subdom} and {dom} are not related')
+            # determine overall range
+            if rng is None or subrng > rng: # pick most generic range
+                rng = subrng
+            # ranges must be related across all child expressions
+            if not subrng <= rng and not subrng >= rng:
+                raise errors.ConsistencyError(f'ranges {subrng} and {rng} are not related')
         # OneOf guarantees at least one expression, dom and rng are always bsc.Vertex.
         # mypy does not realize this, hence we ignore the warning.
         return dom, rng # type: ignore [return-value]
-- 
cgit v1.2.3


From 80a97bfa9f22d0d6dd25928fe1754a3a0d1de78a Mon Sep 17 00:00:00 2001
From: Matthias Baumgartner <dev@igsor.net>
Date: Sun, 15 Jan 2023 21:00:12 +0100
Subject: Distance filter ast node

---
 bsfs/query/ast/filter_.py | 59 +++++++++++++++++++++++++++++++++++++----------
 bsfs/query/validator.py   | 16 +++++++++++++
 2 files changed, 63 insertions(+), 12 deletions(-)

(limited to 'bsfs/query')

diff --git a/bsfs/query/ast/filter_.py b/bsfs/query/ast/filter_.py
index b129ded..2f0270c 100644
--- a/bsfs/query/ast/filter_.py
+++ b/bsfs/query/ast/filter_.py
@@ -252,8 +252,7 @@ class Has(FilterExpression):
 
 
 class _Value(FilterExpression):
-    """
-    """
+    """Matches some value."""
 
     # target value.
     value: typing.Any
@@ -277,13 +276,13 @@ class Is(_Value):
 
 class Equals(_Value):
     """Value matches exactly.
-    NOTE: Value format must correspond to literal type; can be a string, a number, or a Node
+    NOTE: Value must correspond to literal type.
     """
 
 
 class Substring(_Value):
     """Value matches a substring
-    NOTE: value format must be a string
+    NOTE: value must be a string.
     """
 
 
@@ -295,9 +294,49 @@ class EndsWith(_Value):
     """Value ends with a given string."""
 
 
+class Distance(FilterExpression):
+    """Distance to a reference is (strictly) below a threshold. Assumes a Feature literal."""
+
+    # FIXME:
+    # (a) pass a node/predicate as anchor instead of a value.
+    #     Then we don't need to materialize the reference.
+    # (b) pass a FilterExpression (_Bounded) instead of a threshold.
+    #     Then, we could also query values greater than a threshold.
+
+    # reference value.
+    reference: typing.Any
+
+    # distance threshold.
+    threshold: float
+
+    # closed (True) or open (False) bound.
+    strict: bool
+
+    def __init__(
+            self,
+            reference: typing.Any,
+            threshold: float,
+            strict: bool = False,
+            ):
+        self.reference = reference
+        self.threshold = float(threshold)
+        self.strict = bool(strict)
+
+    def __repr__(self) -> str:
+        return f'{typename(self)}({self.reference}, {self.threshold}, {self.strict})'
+
+    def __hash__(self) -> int:
+        return hash((super().__hash__(), tuple(self.reference), self.threshold, self.strict))
+
+    def __eq__(self, other) -> bool:
+        return super().__eq__(other) \
+           and self.reference == other.reference \
+           and self.threshold == other.threshold \
+           and self.strict == other.strict
+
+
 class _Bounded(FilterExpression):
-    """
-    """
+    """Value is bounded by a threshold. Assumes a Number literal."""
 
     # bound.
     threshold: float
@@ -327,15 +366,11 @@ class _Bounded(FilterExpression):
 
 
 class LessThan(_Bounded):
-    """Value is (strictly) smaller than threshold.
-    NOTE: only on numerical literals
-    """
+    """Value is (strictly) smaller than threshold. Assumes a Number literal."""
 
 
 class GreaterThan(_Bounded):
-    """Value is (strictly) larger than threshold
-    NOTE: only on numerical literals
-    """
+    """Value is (strictly) larger than threshold. Assumes a Number literal."""
 
 
 class Predicate(PredicateExpression):
diff --git a/bsfs/query/validator.py b/bsfs/query/validator.py
index ecea951..1b7f688 100644
--- a/bsfs/query/validator.py
+++ b/bsfs/query/validator.py
@@ -69,6 +69,8 @@ class Filter():
             return self._not(type_, node)
         if isinstance(node, ast.filter.Has):
             return self._has(type_, node)
+        if isinstance(node, ast.filter.Distance):
+            return self._distance(type_, node)
         if isinstance(node, (ast.filter.Any, ast.filter.All)):
             return self._branch(type_, node)
         if isinstance(node, (ast.filter.And, ast.filter.Or)):
@@ -177,6 +179,20 @@ class Filter():
         # node.count is a numerical expression
         self._parse_filter_expression(self.schema.literal(ns.bsfs.Number), node.count)
 
+    def _distance(self, type_: bsc.Vertex, node: ast.filter.Distance):
+        # type is a Literal
+        if not isinstance(type_, bsc.Feature):
+            raise errors.ConsistencyError(f'expected a Feature, found {type_}')
+        # type exists in the schema
+        if type_ not in self.schema.literals():
+            raise errors.ConsistencyError(f'literal {type_} is not in the schema')
+        # reference matches type_
+        if len(node.reference) != type_.dimension:
+            raise errors.ConsistencyError(f'reference has dimension {len(node.reference)}, expected {type_.dimension}')
+        # FIXME:
+        #if node.reference.dtype != type_.dtype:
+        #    raise errors.ConsistencyError(f'')
+
 
     ## conditions
 
-- 
cgit v1.2.3


From 3504609e1ba1f7f653fa79910474bebd3ec24d8a Mon Sep 17 00:00:00 2001
From: Matthias Baumgartner <dev@igsor.net>
Date: Mon, 16 Jan 2023 21:41:20 +0100
Subject: various minor fixes

---
 bsfs/query/validator.py | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

(limited to 'bsfs/query')

diff --git a/bsfs/query/validator.py b/bsfs/query/validator.py
index 1b7f688..904ac14 100644
--- a/bsfs/query/validator.py
+++ b/bsfs/query/validator.py
@@ -189,9 +189,7 @@ class Filter():
         # reference matches type_
         if len(node.reference) != type_.dimension:
             raise errors.ConsistencyError(f'reference has dimension {len(node.reference)}, expected {type_.dimension}')
-        # FIXME:
-        #if node.reference.dtype != type_.dtype:
-        #    raise errors.ConsistencyError(f'')
+        # FIXME: test dtype
 
 
     ## conditions
-- 
cgit v1.2.3


From a4789394e40aaa3152ad6009955709a6c7d277c2 Mon Sep 17 00:00:00 2001
From: Matthias Baumgartner <dev@igsor.net>
Date: Fri, 20 Jan 2023 14:36:11 +0100
Subject: fetch AST

---
 bsfs/query/ast/__init__.py |   4 +-
 bsfs/query/ast/fetch.py    | 175 +++++++++++++++++++++++++++++++++++++++++++++
 bsfs/query/ast/filter_.py  |   1 +
 3 files changed, 179 insertions(+), 1 deletion(-)
 create mode 100644 bsfs/query/ast/fetch.py

(limited to 'bsfs/query')

diff --git a/bsfs/query/ast/__init__.py b/bsfs/query/ast/__init__.py
index 704d051..66b097d 100644
--- a/bsfs/query/ast/__init__.py
+++ b/bsfs/query/ast/__init__.py
@@ -1,6 +1,6 @@
 """Query AST components.
 
-The query AST consists of a Filter syntax tree.
+The query AST consists of a Filter and a Fetch syntax trees.
 
 Classes beginning with an underscore (_) represent internal type hierarchies
 and should not be used for parsing. Note that the AST structures do not
@@ -14,10 +14,12 @@ Author: Matthias Baumgartner, 2022
 import typing
 
 # inner-module imports
+from . import fetch
 from . import filter_ as filter # pylint: disable=redefined-builtin
 
 # exports
 __all__: typing.Sequence[str] = (
+    'fetch',
     'filter',
     )
 
diff --git a/bsfs/query/ast/fetch.py b/bsfs/query/ast/fetch.py
new file mode 100644
index 0000000..5e603a1
--- /dev/null
+++ b/bsfs/query/ast/fetch.py
@@ -0,0 +1,175 @@
+"""
+
+Part of the BlackStar filesystem (bsfs) module.
+A copy of the license is provided with the project.
+Author: Matthias Baumgartner, 2022
+"""
+# imports
+from collections import abc
+import typing
+
+# bsfs imports
+from bsfs.utils import URI, typename, normalize_args
+
+# exports
+__all__ : typing.Sequence[str] = (
+    'All',
+    'Fetch',
+    'FetchExpression',
+    'Node',
+    'This',
+    'Value',
+    )
+
+
+## code ##
+
+class FetchExpression(abc.Hashable):
+    """Generic Fetch expression."""
+
+    def __repr__(self) -> str:
+        """Return the expressions's string representation."""
+        return f'{typename(self)}()'
+
+    def __hash__(self) -> int:
+        """Return the expression's integer representation."""
+        return hash(type(self))
+
+    def __eq__(self, other: typing.Any) -> bool:
+        """Return True if *self* and *other* are equivalent."""
+        return isinstance(other, type(self))
+
+
+class All(FetchExpression):
+    """Fetch all child expressions."""
+
+    # child expressions.
+    expr: typing.Set[FetchExpression]
+
+    def __init__(self, *expr):
+        # unpack child expressions
+        unfolded = set(normalize_args(*expr))
+        # check child expressions
+        if len(unfolded) == 0:
+            raise AttributeError('expected at least one expression, found none')
+        if not all(isinstance(itm, FetchExpression) for itm in unfolded):
+            raise TypeError(expr)
+        # initialize
+        super().__init__()
+        # assign members
+        self.expr = unfolded
+
+    def __iter__(self) -> typing.Iterator[FetchExpression]:
+        return iter(self.expr)
+
+    def __len__(self) -> int:
+        return len(self.expr)
+
+    def __repr__(self) -> str:
+        return f'{typename(self)}({self.expr})'
+
+    def __hash__(self) -> int:
+        # FIXME: Produces different hashes for different orders of self.expr
+        return hash((super().__hash__(), tuple(self.expr)))
+
+    def __eq__(self, other: typing.Any) -> bool:
+        return super().__eq__(other) and self.expr == other.expr
+
+
+class _Branch(FetchExpression):
+    """Branch along a predicate."""
+
+    # FIXME: Use a Predicate (like in ast.filter) so that we can also reverse them!
+
+    # predicate to follow.
+    predicate: URI
+
+    def __init__(self, predicate: URI):
+        if not isinstance(predicate, URI):
+            raise TypeError(predicate)
+        self.predicate = predicate
+
+    def __repr__(self) -> str:
+        return f'{typename(self)}({self.predicate})'
+
+    def __hash__(self) -> int:
+        return hash((super().__hash__(), self.predicate))
+
+    def __eq__(self, other: typing.Any) -> bool:
+        return super().__eq__(other) and self.predicate == other.predicate
+
+
+class Fetch(_Branch):
+    """Follow a predicate before evaluating a child epxression."""
+
+    # child expression.
+    expr: FetchExpression
+
+    def __init__(self, predicate: URI, expr: FetchExpression):
+        # check child expressions
+        if not isinstance(expr, FetchExpression):
+            raise TypeError(expr)
+        # initialize
+        super().__init__(predicate)
+        # assign members
+        self.expr = expr
+
+    def __repr__(self) -> str:
+        return f'{typename(self)}({self.predicate}, {self.expr})'
+
+    def __hash__(self) -> int:
+        return hash((super().__hash__(), self.expr))
+
+    def __eq__(self, other: typing.Any) -> bool:
+        return super().__eq__(other) and self.expr == other.expr
+
+
+class _Named(_Branch):
+    """Fetch a (named) symbol at a predicate."""
+
+    # symbol name.
+    name: str
+
+    def __init__(self, predicate: URI, name: str):
+        super().__init__(predicate)
+        self.name = str(name)
+
+    def __repr__(self) -> str:
+        return f'{typename(self)}({self.predicate}, {self.name})'
+
+    def __hash__(self) -> int:
+        return hash((super().__hash__(), self.name))
+
+    def __eq__(self, other: typing.Any) -> bool:
+        return super().__eq__(other) and self.name == other.name
+
+
+class Node(_Named): # pylint: disable=too-few-public-methods
+    """Fetch a Node at a predicate."""
+    # FIXME: Is this actually needed?
+
+
+class Value(_Named): # pylint: disable=too-few-public-methods
+    """Fetch a Literal at a predicate."""
+
+
+class This(FetchExpression):
+    """Fetch the current Node."""
+
+    # symbol name.
+    name: str
+
+    def __init__(self, name: str):
+        super().__init__()
+        self.name = str(name)
+
+    def __repr__(self) -> str:
+        return f'{typename(self)}({self.name})'
+
+    def __hash__(self) -> int:
+        return hash((super().__hash__(), self.name))
+
+    def __eq__(self, other: typing.Any) -> bool:
+        return super().__eq__(other) and self.name == other.name
+
+## EOF ##
diff --git a/bsfs/query/ast/filter_.py b/bsfs/query/ast/filter_.py
index 2f0270c..81b0de2 100644
--- a/bsfs/query/ast/filter_.py
+++ b/bsfs/query/ast/filter_.py
@@ -153,6 +153,7 @@ class _Agg(FilterExpression, abc.Collection):
         # check type
         if not all(isinstance(e, FilterExpression) for e in unfolded):
             raise TypeError(expr)
+        # FIXME: Require at least one child expression?
         # assign member
         self.expr = unfolded
 
-- 
cgit v1.2.3


From e2f08efc0d8a3c875994bdb69623c30cce5079d9 Mon Sep 17 00:00:00 2001
From: Matthias Baumgartner <dev@igsor.net>
Date: Fri, 20 Jan 2023 18:01:17 +0100
Subject: fetch AST validation

---
 bsfs/query/validator.py | 123 +++++++++++++++++++++++++++++++++++++++++++++++-
 1 file changed, 122 insertions(+), 1 deletion(-)

(limited to 'bsfs/query')

diff --git a/bsfs/query/validator.py b/bsfs/query/validator.py
index 904ac14..9fbff12 100644
--- a/bsfs/query/validator.py
+++ b/bsfs/query/validator.py
@@ -49,7 +49,7 @@ class Filter():
         """
         # root_type must be a schema.Node
         if not isinstance(root_type, bsc.Node):
-            raise TypeError(f'Expected a node, found {typename(root_type)}')
+            raise TypeError(f'expected a node, found {typename(root_type)}')
         # root_type must exist in the schema
         if root_type not in self.schema.nodes():
             raise errors.ConsistencyError(f'{root_type} is not defined in the schema')
@@ -223,4 +223,125 @@ class Filter():
         # FIXME: Check if node.value corresponds to type_
 
 
+class Fetch():
+    """Validate a `bsfs.query.ast.fetch` query's structure and schema compliance.
+
+    * Value can only be applied on literals
+    * Node can only be applied on nodes
+    * Names must be non-empty
+    * Branching nodes' predicates must match the type
+    * Symbols must be in the schema
+    * Predicates must follow the schema
+
+    """
+
+    # schema to validate against.
+    schema: bsc.Schema
+
+    def __init__(self, schema: bsc.Schema):
+        self.schema = schema
+
+    def __call__(self, root_type: bsc.Node, query: ast.fetch.FetchExpression):
+        """Validate a fetch *query*, assuming the subject having *root_type*.
+
+        Raises a `bsfs.utils.errors.ConsistencyError` if the query violates the schema.
+        Raises a `bsfs.utils.errors.BackendError` if the query structure is invalid.
+
+        """
+        # root_type must be a schema.Node
+        if not isinstance(root_type, bsc.Node):
+            raise TypeError(f'expected a node, found {typename(root_type)}')
+        # root_type must exist in the schema
+        if root_type not in self.schema.nodes():
+            raise errors.ConsistencyError(f'{root_type} is not defined in the schema')
+        # query must be a FetchExpression
+        if not isinstance(query, ast.fetch.FetchExpression):
+            raise TypeError(f'expected a fetch expression, found {typename(query)}')
+        # check root expression
+        self._parse_fetch_expression(root_type, query)
+        # all tests passed
+        return True
+
+    def _parse_fetch_expression(self, type_: bsc.Vertex, node: ast.fetch.FetchExpression):
+        """Route *node* to the handler of the respective FetchExpression subclass."""
+        if isinstance(node, (ast.fetch.Fetch, ast.fetch.Value, ast.fetch.Node)):
+            # NOTE: don't return so that checks below are executed
+            self._branch(type_, node)
+        if isinstance(node, (ast.fetch.Value, ast.fetch.Node)):
+            # NOTE: don't return so that checks below are executed
+            self._named(type_, node)
+        if isinstance(node, ast.fetch.All):
+            return self._all(type_, node)
+        if isinstance(node, ast.fetch.Fetch):
+            return self._fetch(type_, node)
+        if isinstance(node, ast.fetch.Value):
+            return self._value(type_, node)
+        if isinstance(node, ast.fetch.Node):
+            return self._node(type_, node)
+        if isinstance(node, ast.fetch.This):
+            return self._this(type_, node)
+        # invalid node
+        raise errors.BackendError(f'expected fetch expression, found {node}')
+
+    def _all(self, type_: bsc.Vertex, node: ast.fetch.All):
+        # check child expressions
+        for expr in node:
+            self._parse_fetch_expression(type_, expr)
+
+    def _branch(self, type_: bsc.Vertex, node: ast.fetch._Branch):
+        # type is a node
+        if not isinstance(type_, bsc.Node):
+            raise errors.ConsistencyError(f'expected a Node, found {type_}')
+        # node exists in the schema
+        if type_ not in self.schema.nodes():
+            raise errors.ConsistencyError(f'node {type_} is not in the schema')
+        # predicate exists in the schema
+        if not self.schema.has_predicate(node.predicate):
+            raise errors.ConsistencyError(f'predicate {node.predicate} is not in the schema')
+        pred = self.schema.predicate(node.predicate)
+        # type_ must be a subclass of domain
+        if not type_ <= pred.domain:
+            raise errors.ConsistencyError(
+                f'expected type {pred.domain} or subtype thereof, found {type_}')
+
+    def _fetch(self, type_: bsc.Vertex, node: ast.fetch.Fetch): # pylint: disable=unused-argument # type_ was considered in _branch
+        # range must be a node
+        rng = self.schema.predicate(node.predicate).range
+        if not isinstance(rng, bsc.Node):
+            raise errors.ConsistencyError(
+                f'expected the predicate\'s range to be a Node, found {rng}')
+        # child expression must be valid
+        self._parse_fetch_expression(rng, node.expr)
+
+    def _named(self, type_: bsc.Vertex, node: ast.fetch._Named): # pylint: disable=unused-argument # type_ was considered in _branch
+        # name must be set
+        if node.name.strip() == '':
+            raise errors.BackendError('node name cannot be empty')
+        # FIXME: check for double name use?
+
+    def _node(self, type_: bsc.Vertex, node: ast.fetch.Node): # pylint: disable=unused-argument # type_ was considered in _branch
+        # range must be a node
+        rng = self.schema.predicate(node.predicate).range
+        if not isinstance(rng, bsc.Node):
+            raise errors.ConsistencyError(
+                f'expected the predicate\'s range to be a Node, found {rng}')
+
+    def _value(self, type_: bsc.Vertex, node: ast.fetch.Value): # pylint: disable=unused-argument # type_ was considered in _branch
+        # range must be a literal
+        rng = self.schema.predicate(node.predicate).range
+        if not isinstance(rng, bsc.Literal):
+            raise errors.ConsistencyError(
+                f'expected the predicate\'s range to be a Literal, found {rng}')
+
+    def _this(self, type_: bsc.Vertex, node: ast.fetch.This):
+        # type is a node
+        if not isinstance(type_, bsc.Node):
+            raise errors.ConsistencyError(f'expected a Node, found {type_}')
+        # node exists in the schema
+        if type_ not in self.schema.nodes():
+            raise errors.ConsistencyError(f'node {type_} is not in the schema')
+        # name must be set
+        if node.name.strip() == '':
+            raise errors.BackendError('node name cannot be empty')
+
 ## EOF ##
-- 
cgit v1.2.3


From 7e0987bcda136a17baea45b8eb22eb5ea668abc0 Mon Sep 17 00:00:00 2001
From: Matthias Baumgartner <dev@igsor.net>
Date: Mon, 30 Jan 2023 14:35:32 +0100
Subject: filter ast comparison

---
 bsfs/query/matcher.py | 366 ++++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 366 insertions(+)
 create mode 100644 bsfs/query/matcher.py

(limited to 'bsfs/query')

diff --git a/bsfs/query/matcher.py b/bsfs/query/matcher.py
new file mode 100644
index 0000000..a910756
--- /dev/null
+++ b/bsfs/query/matcher.py
@@ -0,0 +1,366 @@
+"""
+
+Part of the BlackStar filesystem (bsfs) module.
+A copy of the license is provided with the project.
+Author: Matthias Baumgartner, 2022
+"""
+# imports
+from collections import defaultdict
+from itertools import product
+from time import time
+import random
+import threading
+import typing
+
+# external imports
+from hopcroftkarp import HopcroftKarp
+
+# bsfs imports
+from bsfs.utils import errors, typename
+
+# inner-module imports
+from . import ast
+
+# exports
+__all__ : typing.Sequence[str] = (
+    'Filter',
+    )
+
+
+## code ##
+
+class Any(ast.filter.FilterExpression, ast.filter.PredicateExpression):
+    """Match any ast class.
+
+    Note that Any instances are unique, i.e. they do not compare, and
+    can hence be repeated in a set:
+    >>> Any() == Any()
+    False
+    >>> len({Any(), Any(), Any(), Any()})
+    4
+
+    """
+
+    # unique instance id
+    _uid: typing.Tuple[int, int, float, float]
+
+    def __init__(self):
+        self._uid = (
+            id(self),
+            id(threading.current_thread()),
+            time(),
+            random.random(),
+            )
+
+    def __eq__(self, other: typing.Any):
+        return super().__eq__(other) and self._uid == other._uid
+
+    def __hash__(self):
+        return hash((super().__hash__(), self._uid))
+
+
+class Rest(ast.filter.FilterExpression, ast.filter.PredicateExpression):
+    """Match the leftovers in a set of items to be compared.
+
+    Rest can be used in junction with aggregating expressions such as ast.filter.And,
+    ast.filter.Or, ast.filter.OneOf. It controls childs expressions that were not yet
+    consumed by other matching rules. Rest may match to only a specific expression.
+    The expresssion defaults to Any().
+
+    For example, the following to ast structures would match since Rest
+    allows an arbitrary repetition of ast.filter.Equals statements.
+
+    >>> And(Equals('hello'), Equals('world'), Equals('foobar'))
+    >>> And(Equals('world'), Rest(Partial(Equals)))
+
+    """
+
+    # child expression for the Rest.
+    expr: typing.Union[ast.filter.FilterExpression, ast.filter.PredicateExpression]
+
+    def __init__(
+            self,
+            expr: typing.Optional[typing.Union[ast.filter.FilterExpression, ast.filter.PredicateExpression]] = None,
+            ):
+        if expr is None:
+            expr = Any()
+        self.expr = expr
+
+    def __repr__(self) -> str:
+        return f'{typename(self)}({self.expr})'
+
+    def __hash__(self) -> int:
+        return hash((super().__hash__(), self.expr))
+
+    def __eq__(self, other: typing.Any) -> bool:
+        return super().__eq__(other) and self.expr == other.expr
+
+
+class Partial(ast.filter.FilterExpression, ast.filter.PredicateExpression):
+    """Match a partially defined ast expression.
+
+    Literal values might be irrelevant or unknown when comparing two ast
+    structures. Partial allows to constrain the matcher to a certain
+    ast class, while leaving some of its members unspecified.
+
+    Pass the class (not instance) and its members as keyword arguments
+    to Partial. Note that the arguments are not validated.
+
+    For example, the following instance matches any ast.filter.Equals,
+    irrespective of its value:
+
+    >>> Partial(ast.filter.Equals)
+
+    Likewise, the following instance matches any ast.filter.LessThan
+    that has a strict bounds, but makes no claim about the threshold:
+
+    >>> Partial(ast.filter.LessThan, strict=False)
+
+    """
+
+    # target node type.
+    node: typing.Type
+
+    # node construction args.
+    kwargs: typing.Dict[str, typing.Any]
+
+    def __init__(
+            self,
+            node: typing.Type,
+            **kwargs,
+            ):
+        self.node = node
+        self.kwargs = kwargs
+
+    def __repr__(self) -> str:
+        return f'{typename(self)}({self.node.__name__}, {self.kwargs})'
+
+    def __hash__(self) -> int:
+        kwargs = tuple((key, self.kwargs[key]) for key in sorted(self.kwargs))
+        return hash((super().__hash__(), self.node, kwargs))
+
+    def __eq__(self, other: typing.Any) -> bool:
+        return super().__eq__(other) \
+           and self.node == other.node \
+           and self.kwargs == other.kwargs
+
+    def match(
+            self,
+            name: str,
+            value: typing.Any,
+            ) -> bool:
+        """Return True if *name* is unspecified or matches *value*."""
+        return name not in self.kwargs or self.kwargs[name] == value
+
+
+T_ITEM_TYPE = typing.TypeVar('T_ITEM_TYPE') # pylint: disable=invalid-name
+
+def _set_matcher(
+        query: typing.Collection[T_ITEM_TYPE],
+        reference: typing.Collection[T_ITEM_TYPE],
+        cmp: typing.Callable[[T_ITEM_TYPE, T_ITEM_TYPE], bool],
+        ) -> bool:
+    """Compare two sets of child expressions.
+
+    This check has a best-case complexity of O(|N|**2) and worst-case
+    complexity of O(|N|**3), with N the number of child expressions.
+    """
+    # get reference items
+    r_items = list(reference)
+    # deal with Rest
+    r_rest = {itm for itm in r_items if isinstance(itm, Rest)}
+    if len(r_rest) > 1:
+        raise errors.BackendError(f'there must be at most one Rest instance per set, found {len(r_rest)}')
+    if len(r_rest) == 1:
+        # replace Rest by filling the reference up with rest's expression
+        # NOTE: convert r_items to list so that items can be repeated
+        expr = next(iter(r_rest)).expr # type: ignore [attr-defined]
+        r_items = [itm for itm in r_items if not isinstance(itm, Rest)]
+        r_items += [expr for _ in range(len(query) - len(r_items))] # type: ignore [misc]
+    # sanity check: cannot match if the item sizes differ:
+    # either a reference item is unmatched (len(r_items) > len(query))
+    # or a query item is unmatched (len(r_items) < len(query))
+    if len(query) != len(r_items):
+        return False
+
+    # To have a positive match between the query and the reference,
+    # each query expr has to match any reference expr.
+    # However, each reference expr can only be "consumed" once even
+    # if it matches multiple query exprs (e.g., the Any expression matches
+    # every query expr).
+    # This is a bipartide matching problem (Hall's marriage problem)
+    # and the Hopcroft-Karp-Karzanov algorithm finds a maximum
+    # matching. While there might be multiple maximum matchings,
+    # we only need to know whether (at least) one complete matching
+    # exists. The hopcroftkarp module provides this functionality.
+    # The HKK algorithm has worst-case complexity of O(|N|**2 * sqrt(|N|))
+    # and we also need to compare expressions pairwise, hence O(|N|**2).
+    num_items = len(r_items)
+    graph = defaultdict(set)
+    # build the bipartide graph as {lhs: {rhs}, ...}
+    # lhs and rhs must be disjoint identifiers.
+    for (ridx, ref), (nidx, node) in product(enumerate(r_items), enumerate(query)):
+        # add edges for equal expressions
+        if cmp(node, ref):
+            graph[ridx].add(num_items + nidx)
+
+    # maximum_matching returns the matches for all nodes in the graph
+    # ({ref_itm: node_itm}), hence a complete matching's size is
+    # the number of reference's child expressions.
+    return len(HopcroftKarp(graph).maximum_matching(keys_only=True)) == num_items
+
+
+class Filter():
+    """Compare a bsfs.query.ast.filter` query's structure to a reference ast.
+
+    The reference ast may include `Rest`, `Partial`, or `Any` to account for irrelevant
+    or unknown ast pieces.
+
+    This is only a structural comparison, not a semantic one. For example, the
+    two following queries are semantically identical, but structurally different,
+    and would therefore not match:
+
+    >>> ast.filter.OneOf(ast.filter.Predicate(ns.bse.filename))
+    >>> ast.filter.Predicate(ns.bse.filename)
+
+    """
+
+    def __call__(self, query: ast.filter.FilterExpression, reference: ast.filter.FilterExpression) -> bool:
+        """Compare a *query* to a *reference* ast structure.
+        Return True if both are structurally equivalent.
+        """
+        if not isinstance(query, ast.filter.FilterExpression):
+            raise errors.BackendError(f'expected filter expression, found {query}')
+        if not isinstance(reference, ast.filter.FilterExpression):
+            raise errors.BackendError(f'expected filter expression, found {reference}')
+        return self._parse_filter_expression(query, reference)
+
+    def _parse_filter_expression(
+            self,
+            node: ast.filter.FilterExpression,
+            reference: ast.filter.FilterExpression,
+            ) -> bool:
+        """Route *node* to the handler of the respective FilterExpression subclass."""
+        # generic checks: reference type must be Any or match node type
+        if isinstance(reference, Any):
+            return True
+        # node-specific checks
+        if isinstance(node, ast.filter.Not):
+            return self._not(node, reference)
+        if isinstance(node, ast.filter.Has):
+            return self._has(node, reference)
+        if isinstance(node, ast.filter.Distance):
+            return self._distance(node, reference)
+        if isinstance(node, (ast.filter.Any, ast.filter.All)):
+            return self._branch(node, reference)
+        if isinstance(node, (ast.filter.And, ast.filter.Or)):
+            return self._agg(node, reference)
+        if isinstance(node, (ast.filter.Is, ast.filter.Equals, ast.filter.Substring,
+                             ast.filter.StartsWith, ast.filter.EndsWith)):
+            return self._value(node, reference)
+        if isinstance(node, (ast.filter.LessThan, ast.filter.GreaterThan)):
+            return self._bounded(node, reference)
+        # invalid node
+        raise errors.BackendError(f'expected filter expression, found {node}')
+
+    def _parse_predicate_expression(
+            self,
+            node: ast.filter.PredicateExpression,
+            reference: ast.filter.PredicateExpression,
+            ) -> bool:
+        """Route *node* to the handler of the respective PredicateExpression subclass."""
+        if isinstance(reference, Any):
+            return True
+        if isinstance(node, ast.filter.Predicate):
+            return self._predicate(node, reference)
+        if isinstance(node, ast.filter.OneOf):
+            return self._one_of(node, reference)
+        # invalid node
+        raise errors.BackendError(f'expected predicate expression, found {node}')
+
+    def _one_of(self, node: ast.filter.OneOf, reference: ast.filter.PredicateExpression) -> bool:
+        if not isinstance(reference, type(node)):
+            return False
+        return _set_matcher(node, reference, self._parse_predicate_expression)
+
+    def _predicate(self, node: ast.filter.Predicate, reference: ast.filter.PredicateExpression) -> bool:
+        if not isinstance(reference, (Partial, type(node))):
+            return False
+        # partial check
+        if isinstance(reference, Partial):
+            if not isinstance(node, reference.node):
+                return False
+            return reference.match('predicate', node.predicate) \
+               and reference.match('reverse', node.reverse)
+        # full check
+        return node.predicate == reference.predicate \
+           and node.reverse == reference.reverse
+
+    def _branch(self,
+            node: typing.Union[ast.filter.Any, ast.filter.All],
+            reference: ast.filter.FilterExpression,
+            ) -> bool:
+        if not isinstance(reference, type(node)):
+            return False
+        if not self._parse_predicate_expression(node.predicate, reference.predicate): # type: ignore [attr-defined]
+            return False
+        if not self._parse_filter_expression(node.expr, reference.expr): # type: ignore [attr-defined]
+            return False
+        return True
+
+    def _agg(self, node: typing.Union[ast.filter.And, ast.filter.Or], reference: ast.filter.FilterExpression) -> bool:
+        if not isinstance(reference, type(node)):
+            return False
+        return _set_matcher(node, reference, self._parse_filter_expression) # type: ignore [arg-type]
+
+    def _not(self, node: ast.filter.Not, reference: ast.filter.FilterExpression) -> bool:
+        if not isinstance(reference, type(node)):
+            return False
+        return self._parse_filter_expression(node.expr, reference.expr)
+
+    def _has(self, node: ast.filter.Has, reference: ast.filter.FilterExpression) -> bool:
+        if not isinstance(reference, type(node)):
+            return False
+        return self._parse_predicate_expression(node.predicate, reference.predicate) \
+           and self._parse_filter_expression(node.count, reference.count)
+
+    def _distance(self, node: ast.filter.Distance, reference: ast.filter.FilterExpression) -> bool:
+        if not isinstance(reference, (Partial, type(node))):
+            return False
+        # partial check
+        if isinstance(reference, Partial):
+            if not isinstance(node, reference.node):
+                return False
+            return reference.match('reference', node.reference) \
+               and reference.match('threshold', node.threshold) \
+               and reference.match('strict', node.strict)
+        # full check
+        return node.reference == reference.reference \
+           and node.threshold == reference.threshold \
+           and node.strict == reference.strict
+
+    def _value(self, node: ast.filter._Value, reference: ast.filter.FilterExpression) -> bool:
+        if not isinstance(reference, (Partial, type(node))):
+            return False
+        # partial check
+        if isinstance(reference, Partial):
+            if not isinstance(node, reference.node):
+                return False
+            return reference.match('value', node.value)
+        # full ckeck
+        return node.value == reference.value
+
+    def _bounded(self, node: ast.filter._Bounded, reference: ast.filter.FilterExpression) -> bool:
+        if not isinstance(reference, (Partial, type(node))):
+            return False
+        # partial check
+        if isinstance(reference, Partial):
+            if not isinstance(node, reference.node):
+                return False
+            return reference.match('threshold', node.threshold) \
+               and reference.match('strict', node.strict)
+        # full check
+        return node.threshold == reference.threshold \
+           and node.strict == reference.strict
+
+## EOF ##
-- 
cgit v1.2.3


From c8fdaaa676afbdcf33344d72bd92b3ccb981cbf8 Mon Sep 17 00:00:00 2001
From: Matthias Baumgartner <dev@igsor.net>
Date: Wed, 8 Feb 2023 19:54:24 +0100
Subject: ast fixes

---
 bsfs/query/ast/fetch.py   | 3 +--
 bsfs/query/ast/filter_.py | 4 ++--
 2 files changed, 3 insertions(+), 4 deletions(-)

(limited to 'bsfs/query')

diff --git a/bsfs/query/ast/fetch.py b/bsfs/query/ast/fetch.py
index 5e603a1..d653a8a 100644
--- a/bsfs/query/ast/fetch.py
+++ b/bsfs/query/ast/fetch.py
@@ -69,8 +69,7 @@ class All(FetchExpression):
         return f'{typename(self)}({self.expr})'
 
     def __hash__(self) -> int:
-        # FIXME: Produces different hashes for different orders of self.expr
-        return hash((super().__hash__(), tuple(self.expr)))
+        return hash((super().__hash__(), tuple(sorted(self.expr, key=repr))))
 
     def __eq__(self, other: typing.Any) -> bool:
         return super().__eq__(other) and self.expr == other.expr
diff --git a/bsfs/query/ast/filter_.py b/bsfs/query/ast/filter_.py
index 81b0de2..798d37f 100644
--- a/bsfs/query/ast/filter_.py
+++ b/bsfs/query/ast/filter_.py
@@ -173,7 +173,7 @@ class _Agg(FilterExpression, abc.Collection):
         return f'{typename(self)}({self.expr})'
 
     def __hash__(self) -> int:
-        return hash((super().__hash__(), tuple(self.expr))) # FIXME: Unique hash of different orders over self.expr
+        return hash((super().__hash__(), tuple(sorted(self.expr, key=repr))))
 
     def __eq__(self, other) -> bool:
         return super().__eq__(other) and self.expr == other.expr
@@ -450,7 +450,7 @@ class OneOf(PredicateExpression, abc.Collection):
         return f'{typename(self)}({self.expr})'
 
     def __hash__(self) -> int:
-        return hash((super().__hash__(), tuple(self.expr))) # FIXME: Unique hash of different orders over self.expr
+        return hash((super().__hash__(), tuple(sorted(self.expr, key=repr))))
 
     def __eq__(self, other) -> bool:
         return super().__eq__(other) and self.expr == other.expr
-- 
cgit v1.2.3


From 64f3ac76a2f8d6b51380c06233accfcc19dca228 Mon Sep 17 00:00:00 2001
From: Matthias Baumgartner <dev@igsor.net>
Date: Wed, 8 Feb 2023 20:47:18 +0100
Subject: filter query convenience functions

---
 bsfs/query/ast/filter_.py | 58 +++++++++++++++++++++++++++++++++++++++++++----
 1 file changed, 53 insertions(+), 5 deletions(-)

(limited to 'bsfs/query')

diff --git a/bsfs/query/ast/filter_.py b/bsfs/query/ast/filter_.py
index 798d37f..44490fc 100644
--- a/bsfs/query/ast/filter_.py
+++ b/bsfs/query/ast/filter_.py
@@ -31,10 +31,7 @@ from collections import abc
 import typing
 
 # bsfs imports
-from bsfs.utils import URI, typename, normalize_args
-
-# inner-module imports
-#from . import utils
+from bsfs.utils import URI, errors, typename, normalize_args
 
 # exports
 __all__ : typing.Sequence[str] = (
@@ -460,10 +457,61 @@ class OneOf(PredicateExpression, abc.Collection):
 
 def IsIn(*values): # pylint: disable=invalid-name # explicitly mimics an expression
     """Match any of the given URIs."""
-    return Or(Is(value) for value in normalize_args(*values))
+    args = normalize_args(*values)
+    if len(args) == 0:
+        raise AttributeError('expected at least one value, found none')
+    if len(args) == 1:
+        return Is(args[0])
+    return Or(Is(value) for value in args)
 
 def IsNotIn(*values): # pylint: disable=invalid-name # explicitly mimics an expression
     """Match none of the given URIs."""
     return Not(IsIn(*values))
 
+
+def Between(
+        lo: float = float('-inf'),
+        hi: float = float('inf'),
+        lo_strict: bool = True,
+        hi_strict: bool = True,
+        ):
+    """Match numerical values between *lo* and *hi*. Include bounds if strict is False."""
+    if abs(lo) == hi == float('inf'):
+        raise ValueError('range cannot be INF on both sides')
+    if lo > hi:
+        raise ValueError(f'lower bound ({lo}) cannot be less than upper bound ({hi})')
+    if lo == hi and not lo_strict and not hi_strict:
+        return Equals(lo)
+    if lo == hi: # either bound is strict
+        raise ValueError(f'bounds cannot be equal when either is strict')
+    if lo != float('-inf') and hi != float('inf'):
+        return And(GreaterThan(lo, lo_strict), LessThan(hi, hi_strict))
+    if lo != float('-inf'):
+        return GreaterThan(lo, lo_strict)
+    # hi != float('inf'):
+    return LessThan(hi, hi_strict)
+
+
+def Includes(*values, approx: bool = False):
+    """Match any of the given *values*. Uses `Substring` if *approx* is set."""
+    args = normalize_args(*values)
+    cls = Substring if approx else Equals
+    if len(args) == 0:
+        raise AttributeError('expected at least one value, found none')
+    if len(args) == 1:
+        return cls(args[0])
+    return Or(cls(v) for v in args)
+
+
+def Excludes(*values, approx: bool = False):
+    """Match none of the given *values*. Uses `Substring` if *approx* is set."""
+    args = normalize_args(*values)
+    cls = Substring if approx else Equals
+    if len(args) == 0:
+        raise AttributeError('expected at least one value, found none')
+    if len(args) == 1:
+        return Not(cls(args[0]))
+    return Not(Or(cls(v) for v in args))
+
+
 ## EOF ##
-- 
cgit v1.2.3


From f31a0d005785d474a37ec769c1f7f5e27aa08a57 Mon Sep 17 00:00:00 2001
From: Matthias Baumgartner <dev@igsor.net>
Date: Wed, 8 Feb 2023 21:08:24 +0100
Subject: minor comments

---
 bsfs/query/ast/filter_.py | 17 +++++++++--------
 bsfs/query/validator.py   |  1 +
 2 files changed, 10 insertions(+), 8 deletions(-)

(limited to 'bsfs/query')

diff --git a/bsfs/query/ast/filter_.py b/bsfs/query/ast/filter_.py
index 44490fc..b29d89e 100644
--- a/bsfs/query/ast/filter_.py
+++ b/bsfs/query/ast/filter_.py
@@ -31,7 +31,7 @@ from collections import abc
 import typing
 
 # bsfs imports
-from bsfs.utils import URI, errors, typename, normalize_args
+from bsfs.utils import URI, typename, normalize_args
 
 # exports
 __all__ : typing.Sequence[str] = (
@@ -454,8 +454,9 @@ class OneOf(PredicateExpression, abc.Collection):
 
 
 # Helpers
+# invalid-name is disabled since they explicitly mimic an expression
 
-def IsIn(*values): # pylint: disable=invalid-name # explicitly mimics an expression
+def IsIn(*values) -> FilterExpression: # pylint: disable=invalid-name
     """Match any of the given URIs."""
     args = normalize_args(*values)
     if len(args) == 0:
@@ -464,17 +465,17 @@ def IsIn(*values): # pylint: disable=invalid-name # explicitly mimics an express
         return Is(args[0])
     return Or(Is(value) for value in args)
 
-def IsNotIn(*values): # pylint: disable=invalid-name # explicitly mimics an expression
+def IsNotIn(*values) -> FilterExpression: # pylint: disable=invalid-name
     """Match none of the given URIs."""
     return Not(IsIn(*values))
 
 
-def Between(
+def Between( # pylint: disable=invalid-name
         lo: float = float('-inf'),
         hi: float = float('inf'),
         lo_strict: bool = True,
         hi_strict: bool = True,
-        ):
+        ) -> FilterExpression :
     """Match numerical values between *lo* and *hi*. Include bounds if strict is False."""
     if abs(lo) == hi == float('inf'):
         raise ValueError('range cannot be INF on both sides')
@@ -483,7 +484,7 @@ def Between(
     if lo == hi and not lo_strict and not hi_strict:
         return Equals(lo)
     if lo == hi: # either bound is strict
-        raise ValueError(f'bounds cannot be equal when either is strict')
+        raise ValueError('bounds cannot be equal when either is strict')
     if lo != float('-inf') and hi != float('inf'):
         return And(GreaterThan(lo, lo_strict), LessThan(hi, hi_strict))
     if lo != float('-inf'):
@@ -492,7 +493,7 @@ def Between(
     return LessThan(hi, hi_strict)
 
 
-def Includes(*values, approx: bool = False):
+def Includes(*values, approx: bool = False) -> FilterExpression: # pylint: disable=invalid-name
     """Match any of the given *values*. Uses `Substring` if *approx* is set."""
     args = normalize_args(*values)
     cls = Substring if approx else Equals
@@ -503,7 +504,7 @@ def Includes(*values, approx: bool = False):
     return Or(cls(v) for v in args)
 
 
-def Excludes(*values, approx: bool = False):
+def Excludes(*values, approx: bool = False) -> FilterExpression: # pylint: disable=invalid-name
     """Match none of the given *values*. Uses `Substring` if *approx* is set."""
     args = normalize_args(*values)
     cls = Substring if approx else Equals
diff --git a/bsfs/query/validator.py b/bsfs/query/validator.py
index 9fbff12..f0aa795 100644
--- a/bsfs/query/validator.py
+++ b/bsfs/query/validator.py
@@ -20,6 +20,7 @@ __all__ : typing.Sequence[str] = (
     'Filter',
     )
 
+# FIXME: Split into a submodule and the two classes into their own respective files.
 
 ## code ##
 
-- 
cgit v1.2.3


From 2e07f33314c238e42bfadc5f39805f93ffbc622e Mon Sep 17 00:00:00 2001
From: Matthias Baumgartner <dev@igsor.net>
Date: Thu, 2 Mar 2023 15:10:05 +0100
Subject: removed author and license notices from individual files

---
 bsfs/query/__init__.py     | 5 -----
 bsfs/query/ast/__init__.py | 3 ---
 bsfs/query/ast/fetch.py    | 5 -----
 bsfs/query/ast/filter_.py  | 3 ---
 bsfs/query/matcher.py      | 5 -----
 bsfs/query/validator.py    | 5 -----
 6 files changed, 26 deletions(-)

(limited to 'bsfs/query')

diff --git a/bsfs/query/__init__.py b/bsfs/query/__init__.py
index 21c7389..58ff03a 100644
--- a/bsfs/query/__init__.py
+++ b/bsfs/query/__init__.py
@@ -1,9 +1,4 @@
-"""
 
-Part of the BlackStar filesystem (bsfs) module.
-A copy of the license is provided with the project.
-Author: Matthias Baumgartner, 2022
-"""
 # imports
 import typing
 
diff --git a/bsfs/query/ast/__init__.py b/bsfs/query/ast/__init__.py
index 66b097d..bceaac0 100644
--- a/bsfs/query/ast/__init__.py
+++ b/bsfs/query/ast/__init__.py
@@ -6,9 +6,6 @@ Classes beginning with an underscore (_) represent internal type hierarchies
 and should not be used for parsing. Note that the AST structures do not
 (and cannot) check semantic validity or consistency with a given schema.
 
-Part of the BlackStar filesystem (bsfs) module.
-A copy of the license is provided with the project.
-Author: Matthias Baumgartner, 2022
 """
 # imports
 import typing
diff --git a/bsfs/query/ast/fetch.py b/bsfs/query/ast/fetch.py
index d653a8a..66d94e1 100644
--- a/bsfs/query/ast/fetch.py
+++ b/bsfs/query/ast/fetch.py
@@ -1,9 +1,4 @@
-"""
 
-Part of the BlackStar filesystem (bsfs) module.
-A copy of the license is provided with the project.
-Author: Matthias Baumgartner, 2022
-"""
 # imports
 from collections import abc
 import typing
diff --git a/bsfs/query/ast/filter_.py b/bsfs/query/ast/filter_.py
index b29d89e..56c982e 100644
--- a/bsfs/query/ast/filter_.py
+++ b/bsfs/query/ast/filter_.py
@@ -22,9 +22,6 @@ This AST has multiple issues that are not verified upon its creation:
 * Conditions exclude each other
 * The predicate along the branch have incompatible domains and ranges.
 
-Part of the BlackStar filesystem (bsfs) module.
-A copy of the license is provided with the project.
-Author: Matthias Baumgartner, 2022
 """
 # imports
 from collections import abc
diff --git a/bsfs/query/matcher.py b/bsfs/query/matcher.py
index a910756..5f3b07e 100644
--- a/bsfs/query/matcher.py
+++ b/bsfs/query/matcher.py
@@ -1,9 +1,4 @@
-"""
 
-Part of the BlackStar filesystem (bsfs) module.
-A copy of the license is provided with the project.
-Author: Matthias Baumgartner, 2022
-"""
 # imports
 from collections import defaultdict
 from itertools import product
diff --git a/bsfs/query/validator.py b/bsfs/query/validator.py
index f0aa795..6e3afa1 100644
--- a/bsfs/query/validator.py
+++ b/bsfs/query/validator.py
@@ -1,9 +1,4 @@
-"""
 
-Part of the BlackStar filesystem (bsfs) module.
-A copy of the license is provided with the project.
-Author: Matthias Baumgartner, 2022
-"""
 # imports
 import typing
 
-- 
cgit v1.2.3


From 6b9379d75198082054c35e44bc2cd880353a7485 Mon Sep 17 00:00:00 2001
From: Matthias Baumgartner <dev@igsor.net>
Date: Thu, 2 Mar 2023 16:40:43 +0100
Subject: hardening

---
 bsfs/query/validator.py | 10 +++++++++-
 1 file changed, 9 insertions(+), 1 deletion(-)

(limited to 'bsfs/query')

diff --git a/bsfs/query/validator.py b/bsfs/query/validator.py
index 6e3afa1..b259ea0 100644
--- a/bsfs/query/validator.py
+++ b/bsfs/query/validator.py
@@ -37,6 +37,10 @@ class Filter():
         self.schema = schema
 
     def __call__(self, root_type: bsc.Node, query: ast.filter.FilterExpression):
+        """Alias for `Filter.validate`."""
+        return self.validate(root_type, query)
+
+    def validate(self, root_type: bsc.Node, query: ast.filter.FilterExpression):
         """Validate a filter *query*, assuming the subject having *root_type*.
 
         Raises a `bsfs.utils.errors.ConsistencyError` if the query violates the schema.
@@ -237,7 +241,11 @@ class Fetch():
     def __init__(self, schema: bsc.Schema):
         self.schema = schema
 
-    def __call__(self, root_type: bsc.Node, query: ast.fetch.FetchExpression):
+    def __call__(self, root_type: bsc.Node, query: ast.filter.FilterExpression):
+        """Alias for `Fetch.validate`."""
+        return self.validate(root_type, query)
+
+    def validate(self, root_type: bsc.Node, query: ast.fetch.FetchExpression):
         """Validate a fetch *query*, assuming the subject having *root_type*.
 
         Raises a `bsfs.utils.errors.ConsistencyError` if the query violates the schema.
-- 
cgit v1.2.3


From 2c6c23f85e7f2123c508f9ff8a4aa776948bb589 Mon Sep 17 00:00:00 2001
From: Matthias Baumgartner <dev@igsor.net>
Date: Thu, 2 Mar 2023 16:46:11 +0100
Subject: minor style fixes

---
 bsfs/query/validator.py | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

(limited to 'bsfs/query')

diff --git a/bsfs/query/validator.py b/bsfs/query/validator.py
index b259ea0..1ce44e9 100644
--- a/bsfs/query/validator.py
+++ b/bsfs/query/validator.py
@@ -36,11 +36,11 @@ class Filter():
     def __init__(self, schema: bsc.Schema):
         self.schema = schema
 
-    def __call__(self, root_type: bsc.Node, query: ast.filter.FilterExpression):
+    def __call__(self, root_type: bsc.Node, query: ast.filter.FilterExpression) -> bool:
         """Alias for `Filter.validate`."""
         return self.validate(root_type, query)
 
-    def validate(self, root_type: bsc.Node, query: ast.filter.FilterExpression):
+    def validate(self, root_type: bsc.Node, query: ast.filter.FilterExpression) -> bool:
         """Validate a filter *query*, assuming the subject having *root_type*.
 
         Raises a `bsfs.utils.errors.ConsistencyError` if the query violates the schema.
@@ -241,11 +241,11 @@ class Fetch():
     def __init__(self, schema: bsc.Schema):
         self.schema = schema
 
-    def __call__(self, root_type: bsc.Node, query: ast.filter.FilterExpression):
+    def __call__(self, root_type: bsc.Node, query: ast.fetch.FetchExpression) -> bool:
         """Alias for `Fetch.validate`."""
         return self.validate(root_type, query)
 
-    def validate(self, root_type: bsc.Node, query: ast.fetch.FetchExpression):
+    def validate(self, root_type: bsc.Node, query: ast.fetch.FetchExpression) -> bool:
         """Validate a fetch *query*, assuming the subject having *root_type*.
 
         Raises a `bsfs.utils.errors.ConsistencyError` if the query violates the schema.
-- 
cgit v1.2.3


From 4fead04055be4967d9ea3b24ff61fe37a93108dd Mon Sep 17 00:00:00 2001
From: Matthias Baumgartner <dev@igsor.net>
Date: Sat, 4 Mar 2023 13:31:11 +0100
Subject: namespace refactoring and cleanup

---
 bsfs/query/ast/filter_.py | 3 ++-
 bsfs/query/matcher.py     | 4 ++--
 bsfs/query/validator.py   | 4 ++--
 3 files changed, 6 insertions(+), 5 deletions(-)

(limited to 'bsfs/query')

diff --git a/bsfs/query/ast/filter_.py b/bsfs/query/ast/filter_.py
index 56c982e..610fdb4 100644
--- a/bsfs/query/ast/filter_.py
+++ b/bsfs/query/ast/filter_.py
@@ -10,7 +10,8 @@ For example, consider the following AST:
 >>> Any(ns.bse.collection,
 ...     And(
 ...         Equals('hello'),
-...         Any(ns.bsm.guid, Any(ns.bsm.guid, Equals('hello'))),
+...         Is('hello world'),
+...         Any(ns.bse.tag, Equals('world')),
 ...         Any(ns.bst.label, Equals('world')),
 ...         All(ns.bst.label, Not(Equals('world'))),
 ...     )
diff --git a/bsfs/query/matcher.py b/bsfs/query/matcher.py
index 5f3b07e..17c9c8e 100644
--- a/bsfs/query/matcher.py
+++ b/bsfs/query/matcher.py
@@ -215,8 +215,8 @@ class Filter():
     two following queries are semantically identical, but structurally different,
     and would therefore not match:
 
-    >>> ast.filter.OneOf(ast.filter.Predicate(ns.bse.filename))
-    >>> ast.filter.Predicate(ns.bse.filename)
+    >>> ast.filter.OneOf(ast.filter.Predicate(ns.bse.name))
+    >>> ast.filter.Predicate(ns.bse.name)
 
     """
 
diff --git a/bsfs/query/validator.py b/bsfs/query/validator.py
index 1ce44e9..10ca492 100644
--- a/bsfs/query/validator.py
+++ b/bsfs/query/validator.py
@@ -177,7 +177,7 @@ class Filter():
         if not type_ <= dom:
             raise errors.ConsistencyError(f'expected type {dom}, found {type_}')
         # node.count is a numerical expression
-        self._parse_filter_expression(self.schema.literal(ns.bsfs.Number), node.count)
+        self._parse_filter_expression(self.schema.literal(ns.bsl.Number), node.count)
 
     def _distance(self, type_: bsc.Vertex, node: ast.filter.Distance):
         # type is a Literal
@@ -218,7 +218,7 @@ class Filter():
         if type_ not in self.schema.literals():
             raise errors.ConsistencyError(f'literal {type_} is not in the schema')
         # type must be a numerical
-        if not type_ <= self.schema.literal(ns.bsfs.Number):
+        if not type_ <= self.schema.literal(ns.bsl.Number):
             raise errors.ConsistencyError(f'expected a number type, found {type_}')
         # FIXME: Check if node.value corresponds to type_
 
-- 
cgit v1.2.3