aboutsummaryrefslogtreecommitdiffstats
path: root/bsfs/query/ast
diff options
context:
space:
mode:
Diffstat (limited to 'bsfs/query/ast')
-rw-r--r--bsfs/query/ast/__init__.py24
-rw-r--r--bsfs/query/ast/filter_.py433
2 files changed, 457 insertions, 0 deletions
diff --git a/bsfs/query/ast/__init__.py b/bsfs/query/ast/__init__.py
new file mode 100644
index 0000000..704d051
--- /dev/null
+++ b/bsfs/query/ast/__init__.py
@@ -0,0 +1,24 @@
+"""Query AST components.
+
+The query AST consists of a Filter syntax tree.
+
+Classes beginning with an underscore (_) represent internal type hierarchies
+and should not be used for parsing. Note that the AST structures do not
+(and cannot) check semantic validity or consistency with a given schema.
+
+Part of the BlackStar filesystem (bsfs) module.
+A copy of the license is provided with the project.
+Author: Matthias Baumgartner, 2022
+"""
+# imports
+import typing
+
+# inner-module imports
+from . import filter_ as filter # pylint: disable=redefined-builtin
+
+# exports
+__all__: typing.Sequence[str] = (
+ 'filter',
+ )
+
+## EOF ##
diff --git a/bsfs/query/ast/filter_.py b/bsfs/query/ast/filter_.py
new file mode 100644
index 0000000..b129ded
--- /dev/null
+++ b/bsfs/query/ast/filter_.py
@@ -0,0 +1,433 @@
+"""Filter AST.
+
+Note that it is easily possible to construct an AST that is inconsistent with
+a given schema. Furthermore, it is possible to construct a semantically invalid
+AST which that cannot be parsed correctly or includes contradicting statements.
+The AST nodes do not (and cannot) check such issues.
+
+For example, consider the following AST:
+
+>>> Any(ns.bse.collection,
+... And(
+... Equals('hello'),
+... Any(ns.bsm.guid, Any(ns.bsm.guid, Equals('hello'))),
+... Any(ns.bst.label, Equals('world')),
+... All(ns.bst.label, Not(Equals('world'))),
+... )
+... )
+
+This AST has multiple issues that are not verified upon its creation:
+* A condition on a non-literal.
+* A Filter on a literal.
+* Conditions exclude each other
+* The predicate along the branch have incompatible domains and ranges.
+
+Part of the BlackStar filesystem (bsfs) module.
+A copy of the license is provided with the project.
+Author: Matthias Baumgartner, 2022
+"""
+# imports
+from collections import abc
+import typing
+
+# bsfs imports
+from bsfs.utils import URI, typename, normalize_args
+
+# inner-module imports
+#from . import utils
+
+# exports
+__all__ : typing.Sequence[str] = (
+ # base classes
+ 'FilterExpression',
+ 'PredicateExpression',
+ # predicate expressions
+ 'OneOf',
+ 'Predicate',
+ # branching
+ 'All',
+ 'Any',
+ # aggregators
+ 'And',
+ 'Or',
+ # value matchers
+ 'Equals',
+ 'Substring',
+ 'EndsWith',
+ 'StartsWith',
+ # range matchers
+ 'GreaterThan',
+ 'LessThan',
+ # misc
+ 'Has',
+ 'Is',
+ 'Not',
+ )
+
+
+## code ##
+
+# pylint: disable=too-few-public-methods # Many expressions use mostly magic methods
+
+class _Expression(abc.Hashable):
+ def __repr__(self) -> str:
+ """Return the expressions's string representation."""
+ return f'{typename(self)}()'
+
+ def __hash__(self) -> int:
+ """Return the expression's integer representation."""
+ return hash(type(self))
+
+ def __eq__(self, other: typing.Any) -> bool:
+ """Return True if *self* and *other* are equivalent."""
+ return isinstance(other, type(self))
+
+
+class FilterExpression(_Expression):
+ """Generic Filter expression."""
+
+
+class PredicateExpression(_Expression):
+ """Generic Predicate expression."""
+
+
+class _Branch(FilterExpression):
+ """Branch the filter along a predicate."""
+
+ # predicate to follow.
+ predicate: PredicateExpression
+
+ # child expression to evaluate.
+ expr: FilterExpression
+
+ def __init__(
+ self,
+ predicate: typing.Union[PredicateExpression, URI],
+ expr: FilterExpression,
+ ):
+ # process predicate argument
+ if isinstance(predicate, URI):
+ predicate = Predicate(predicate)
+ elif not isinstance(predicate, PredicateExpression):
+ raise TypeError(predicate)
+ # process expression argument
+ if not isinstance(expr, FilterExpression):
+ raise TypeError(expr)
+ # assign members
+ self.predicate = predicate
+ self.expr = expr
+
+ def __repr__(self) -> str:
+ return f'{typename(self)}({self.predicate}, {self.expr})'
+
+ def __hash__(self) -> int:
+ return hash((super().__hash__(), self.predicate, self.expr))
+
+ def __eq__(self, other) -> bool:
+ return super().__eq__(other) \
+ and self.predicate == other.predicate \
+ and self.expr == other.expr
+
+class Any(_Branch):
+ """Any (and at least one) triple matches."""
+
+
+class All(_Branch):
+ """All (and at least one) triples match."""
+
+
+class _Agg(FilterExpression, abc.Collection):
+ """Combine multiple expressions."""
+
+ # child expressions
+ expr: typing.Set[FilterExpression]
+
+ def __init__(
+ self,
+ *expr: typing.Union[FilterExpression,
+ typing.Iterable[FilterExpression],
+ typing.Iterator[FilterExpression]]
+ ):
+ # unfold arguments
+ unfolded = set(normalize_args(*expr))
+ # check type
+ if not all(isinstance(e, FilterExpression) for e in unfolded):
+ raise TypeError(expr)
+ # assign member
+ self.expr = unfolded
+
+ def __contains__(self, expr: typing.Any) -> bool:
+ """Return True if *expr* is among the child expressions."""
+ return expr in self.expr
+
+ def __iter__(self) -> typing.Iterator[FilterExpression]:
+ """Iterator over child expressions."""
+ return iter(self.expr)
+
+ def __len__(self) -> int:
+ """Number of child expressions."""
+ return len(self.expr)
+
+ def __repr__(self) -> str:
+ return f'{typename(self)}({self.expr})'
+
+ def __hash__(self) -> int:
+ return hash((super().__hash__(), tuple(self.expr))) # FIXME: Unique hash of different orders over self.expr
+
+ def __eq__(self, other) -> bool:
+ return super().__eq__(other) and self.expr == other.expr
+
+
+class And(_Agg):
+ """All conditions match."""
+
+
+class Or(_Agg):
+ """At least one condition matches."""
+
+
+class Not(FilterExpression):
+ """Invert a statement."""
+
+ # child expression
+ expr: FilterExpression
+
+ def __init__(self, expr: FilterExpression):
+ # check argument
+ if not isinstance(expr, FilterExpression):
+ raise TypeError(expr)
+ # assign member
+ self.expr = expr
+
+ def __repr__(self) -> str:
+ return f'{typename(self)}({self.expr})'
+
+ def __hash__(self) -> int:
+ return hash((super().__hash__(), self.expr))
+
+ def __eq__(self, other: typing.Any) -> bool:
+ return super().__eq__(other) and self.expr == other.expr
+
+
+class Has(FilterExpression):
+ """Has predicate N times"""
+
+ # predicate to follow.
+ predicate: PredicateExpression
+
+ # target count
+ count: FilterExpression
+
+ def __init__(
+ self,
+ predicate: typing.Union[PredicateExpression, URI],
+ count: typing.Optional[typing.Union[FilterExpression, int]] = None,
+ ):
+ # check predicate
+ if isinstance(predicate, URI):
+ predicate = Predicate(predicate)
+ elif not isinstance(predicate, PredicateExpression):
+ raise TypeError(predicate)
+ # check count
+ if count is None:
+ count = GreaterThan(1, strict=False)
+ elif isinstance(count, int):
+ count = Equals(count)
+ elif not isinstance(count, FilterExpression):
+ raise TypeError(count)
+ # assign members
+ self.predicate = predicate
+ self.count = count
+
+ def __repr__(self) -> str:
+ return f'{typename(self)}({self.predicate}, {self.count})'
+
+ def __hash__(self) -> int:
+ return hash((super().__hash__(), self.predicate, self.count))
+
+ def __eq__(self, other) -> bool:
+ return super().__eq__(other) \
+ and self.predicate == other.predicate \
+ and self.count == other.count
+
+
+class _Value(FilterExpression):
+ """
+ """
+
+ # target value.
+ value: typing.Any
+
+ def __init__(self, value: typing.Any):
+ self.value = value
+
+ def __repr__(self) -> str:
+ return f'{typename(self)}({self.value})'
+
+ def __hash__(self) -> int:
+ return hash((super().__hash__(), self.value))
+
+ def __eq__(self, other) -> bool:
+ return super().__eq__(other) and self.value == other.value
+
+
+class Is(_Value):
+ """Match the URI of a node."""
+
+
+class Equals(_Value):
+ """Value matches exactly.
+ NOTE: Value format must correspond to literal type; can be a string, a number, or a Node
+ """
+
+
+class Substring(_Value):
+ """Value matches a substring
+ NOTE: value format must be a string
+ """
+
+
+class StartsWith(_Value):
+ """Value begins with a given string."""
+
+
+class EndsWith(_Value):
+ """Value ends with a given string."""
+
+
+class _Bounded(FilterExpression):
+ """
+ """
+
+ # bound.
+ threshold: float
+
+ # closed (True) or open (False) bound.
+ strict: bool
+
+ def __init__(
+ self,
+ threshold: float,
+ strict: bool = True,
+ ):
+ self.threshold = float(threshold)
+ self.strict = bool(strict)
+
+ def __repr__(self) -> str:
+ return f'{typename(self)}({self.threshold}, {self.strict})'
+
+ def __hash__(self) -> int:
+ return hash((super().__hash__(), self.threshold, self.strict))
+
+ def __eq__(self, other) -> bool:
+ return super().__eq__(other) \
+ and self.threshold == other.threshold \
+ and self.strict == other.strict
+
+
+
+class LessThan(_Bounded):
+ """Value is (strictly) smaller than threshold.
+ NOTE: only on numerical literals
+ """
+
+
+class GreaterThan(_Bounded):
+ """Value is (strictly) larger than threshold
+ NOTE: only on numerical literals
+ """
+
+
+class Predicate(PredicateExpression):
+ """A single predicate."""
+
+ # predicate URI
+ predicate: URI
+
+ # reverse the predicate's direction
+ reverse: bool
+
+ def __init__(
+ self,
+ predicate: URI,
+ reverse: typing.Optional[bool] = False,
+ ):
+ # check arguments
+ if not isinstance(predicate, URI):
+ raise TypeError(predicate)
+ # assign members
+ self.predicate = predicate
+ self.reverse = bool(reverse)
+
+ def __repr__(self) -> str:
+ return f'{typename(self)}({self.predicate}, {self.reverse})'
+
+ def __hash__(self) -> int:
+ return hash((super().__hash__(), self.predicate, self.reverse))
+
+ def __eq__(self, other) -> bool:
+ return super().__eq__(other) \
+ and self.predicate == other.predicate \
+ and self.reverse == other.reverse
+
+
+class OneOf(PredicateExpression, abc.Collection):
+ """A set of predicate alternatives.
+
+ The predicates' domains must be ascendants or descendants of each other.
+ The overall domain is the most specific one.
+
+ The predicate's domains must be ascendants or descendants of each other.
+ The overall range is the most generic one.
+ """
+
+ # predicate alternatives
+ expr: typing.Set[PredicateExpression]
+
+ def __init__(self, *expr: typing.Union[PredicateExpression, URI]):
+ # unfold arguments
+ unfolded = set(normalize_args(*expr)) # type: ignore [arg-type] # this is getting too complex...
+ # check arguments
+ if len(unfolded) == 0:
+ raise AttributeError('expected at least one expression, found none')
+ # ensure PredicateExpression
+ unfolded = {Predicate(e) if isinstance(e, URI) else e for e in unfolded}
+ # check type
+ if not all(isinstance(e, PredicateExpression) for e in unfolded):
+ raise TypeError(expr)
+ # assign member
+ self.expr = unfolded
+
+ def __contains__(self, expr: typing.Any) -> bool:
+ """Return True if *expr* is among the child expressions."""
+ return expr in self.expr
+
+ def __iter__(self) -> typing.Iterator[PredicateExpression]:
+ """Iterator over child expressions."""
+ return iter(self.expr)
+
+ def __len__(self) -> int:
+ """Number of child expressions."""
+ return len(self.expr)
+
+ def __repr__(self) -> str:
+ return f'{typename(self)}({self.expr})'
+
+ def __hash__(self) -> int:
+ return hash((super().__hash__(), tuple(self.expr))) # FIXME: Unique hash of different orders over self.expr
+
+ def __eq__(self, other) -> bool:
+ return super().__eq__(other) and self.expr == other.expr
+
+
+# Helpers
+
+def IsIn(*values): # pylint: disable=invalid-name # explicitly mimics an expression
+ """Match any of the given URIs."""
+ return Or(Is(value) for value in normalize_args(*values))
+
+def IsNotIn(*values): # pylint: disable=invalid-name # explicitly mimics an expression
+ """Match none of the given URIs."""
+ return Not(IsIn(*values))
+
+## EOF ##