diff options
Diffstat (limited to 'bsfs/query/ast')
-rw-r--r-- | bsfs/query/ast/__init__.py | 23 | ||||
-rw-r--r-- | bsfs/query/ast/fetch.py | 169 | ||||
-rw-r--r-- | bsfs/query/ast/filter_.py | 516 |
3 files changed, 708 insertions, 0 deletions
diff --git a/bsfs/query/ast/__init__.py b/bsfs/query/ast/__init__.py new file mode 100644 index 0000000..bceaac0 --- /dev/null +++ b/bsfs/query/ast/__init__.py @@ -0,0 +1,23 @@ +"""Query AST components. + +The query AST consists of a Filter and a Fetch syntax trees. + +Classes beginning with an underscore (_) represent internal type hierarchies +and should not be used for parsing. Note that the AST structures do not +(and cannot) check semantic validity or consistency with a given schema. + +""" +# imports +import typing + +# inner-module imports +from . import fetch +from . import filter_ as filter # pylint: disable=redefined-builtin + +# exports +__all__: typing.Sequence[str] = ( + 'fetch', + 'filter', + ) + +## EOF ## diff --git a/bsfs/query/ast/fetch.py b/bsfs/query/ast/fetch.py new file mode 100644 index 0000000..66d94e1 --- /dev/null +++ b/bsfs/query/ast/fetch.py @@ -0,0 +1,169 @@ + +# imports +from collections import abc +import typing + +# bsfs imports +from bsfs.utils import URI, typename, normalize_args + +# exports +__all__ : typing.Sequence[str] = ( + 'All', + 'Fetch', + 'FetchExpression', + 'Node', + 'This', + 'Value', + ) + + +## code ## + +class FetchExpression(abc.Hashable): + """Generic Fetch expression.""" + + def __repr__(self) -> str: + """Return the expressions's string representation.""" + return f'{typename(self)}()' + + def __hash__(self) -> int: + """Return the expression's integer representation.""" + return hash(type(self)) + + def __eq__(self, other: typing.Any) -> bool: + """Return True if *self* and *other* are equivalent.""" + return isinstance(other, type(self)) + + +class All(FetchExpression): + """Fetch all child expressions.""" + + # child expressions. + expr: typing.Set[FetchExpression] + + def __init__(self, *expr): + # unpack child expressions + unfolded = set(normalize_args(*expr)) + # check child expressions + if len(unfolded) == 0: + raise AttributeError('expected at least one expression, found none') + if not all(isinstance(itm, FetchExpression) for itm in unfolded): + raise TypeError(expr) + # initialize + super().__init__() + # assign members + self.expr = unfolded + + def __iter__(self) -> typing.Iterator[FetchExpression]: + return iter(self.expr) + + def __len__(self) -> int: + return len(self.expr) + + def __repr__(self) -> str: + return f'{typename(self)}({self.expr})' + + def __hash__(self) -> int: + return hash((super().__hash__(), tuple(sorted(self.expr, key=repr)))) + + def __eq__(self, other: typing.Any) -> bool: + return super().__eq__(other) and self.expr == other.expr + + +class _Branch(FetchExpression): + """Branch along a predicate.""" + + # FIXME: Use a Predicate (like in ast.filter) so that we can also reverse them! + + # predicate to follow. + predicate: URI + + def __init__(self, predicate: URI): + if not isinstance(predicate, URI): + raise TypeError(predicate) + self.predicate = predicate + + def __repr__(self) -> str: + return f'{typename(self)}({self.predicate})' + + def __hash__(self) -> int: + return hash((super().__hash__(), self.predicate)) + + def __eq__(self, other: typing.Any) -> bool: + return super().__eq__(other) and self.predicate == other.predicate + + +class Fetch(_Branch): + """Follow a predicate before evaluating a child epxression.""" + + # child expression. + expr: FetchExpression + + def __init__(self, predicate: URI, expr: FetchExpression): + # check child expressions + if not isinstance(expr, FetchExpression): + raise TypeError(expr) + # initialize + super().__init__(predicate) + # assign members + self.expr = expr + + def __repr__(self) -> str: + return f'{typename(self)}({self.predicate}, {self.expr})' + + def __hash__(self) -> int: + return hash((super().__hash__(), self.expr)) + + def __eq__(self, other: typing.Any) -> bool: + return super().__eq__(other) and self.expr == other.expr + + +class _Named(_Branch): + """Fetch a (named) symbol at a predicate.""" + + # symbol name. + name: str + + def __init__(self, predicate: URI, name: str): + super().__init__(predicate) + self.name = str(name) + + def __repr__(self) -> str: + return f'{typename(self)}({self.predicate}, {self.name})' + + def __hash__(self) -> int: + return hash((super().__hash__(), self.name)) + + def __eq__(self, other: typing.Any) -> bool: + return super().__eq__(other) and self.name == other.name + + +class Node(_Named): # pylint: disable=too-few-public-methods + """Fetch a Node at a predicate.""" + # FIXME: Is this actually needed? + + +class Value(_Named): # pylint: disable=too-few-public-methods + """Fetch a Literal at a predicate.""" + + +class This(FetchExpression): + """Fetch the current Node.""" + + # symbol name. + name: str + + def __init__(self, name: str): + super().__init__() + self.name = str(name) + + def __repr__(self) -> str: + return f'{typename(self)}({self.name})' + + def __hash__(self) -> int: + return hash((super().__hash__(), self.name)) + + def __eq__(self, other: typing.Any) -> bool: + return super().__eq__(other) and self.name == other.name + +## EOF ## diff --git a/bsfs/query/ast/filter_.py b/bsfs/query/ast/filter_.py new file mode 100644 index 0000000..610fdb4 --- /dev/null +++ b/bsfs/query/ast/filter_.py @@ -0,0 +1,516 @@ +"""Filter AST. + +Note that it is easily possible to construct an AST that is inconsistent with +a given schema. Furthermore, it is possible to construct a semantically invalid +AST which that cannot be parsed correctly or includes contradicting statements. +The AST nodes do not (and cannot) check such issues. + +For example, consider the following AST: + +>>> Any(ns.bse.collection, +... And( +... Equals('hello'), +... Is('hello world'), +... Any(ns.bse.tag, Equals('world')), +... Any(ns.bst.label, Equals('world')), +... All(ns.bst.label, Not(Equals('world'))), +... ) +... ) + +This AST has multiple issues that are not verified upon its creation: +* A condition on a non-literal. +* A Filter on a literal. +* Conditions exclude each other +* The predicate along the branch have incompatible domains and ranges. + +""" +# imports +from collections import abc +import typing + +# bsfs imports +from bsfs.utils import URI, typename, normalize_args + +# exports +__all__ : typing.Sequence[str] = ( + # base classes + 'FilterExpression', + 'PredicateExpression', + # predicate expressions + 'OneOf', + 'Predicate', + # branching + 'All', + 'Any', + # aggregators + 'And', + 'Or', + # value matchers + 'Equals', + 'Substring', + 'EndsWith', + 'StartsWith', + # range matchers + 'GreaterThan', + 'LessThan', + # misc + 'Has', + 'Is', + 'Not', + ) + + +## code ## + +# pylint: disable=too-few-public-methods # Many expressions use mostly magic methods + +class _Expression(abc.Hashable): + def __repr__(self) -> str: + """Return the expressions's string representation.""" + return f'{typename(self)}()' + + def __hash__(self) -> int: + """Return the expression's integer representation.""" + return hash(type(self)) + + def __eq__(self, other: typing.Any) -> bool: + """Return True if *self* and *other* are equivalent.""" + return isinstance(other, type(self)) + + +class FilterExpression(_Expression): + """Generic Filter expression.""" + + +class PredicateExpression(_Expression): + """Generic Predicate expression.""" + + +class _Branch(FilterExpression): + """Branch the filter along a predicate.""" + + # predicate to follow. + predicate: PredicateExpression + + # child expression to evaluate. + expr: FilterExpression + + def __init__( + self, + predicate: typing.Union[PredicateExpression, URI], + expr: FilterExpression, + ): + # process predicate argument + if isinstance(predicate, URI): + predicate = Predicate(predicate) + elif not isinstance(predicate, PredicateExpression): + raise TypeError(predicate) + # process expression argument + if not isinstance(expr, FilterExpression): + raise TypeError(expr) + # assign members + self.predicate = predicate + self.expr = expr + + def __repr__(self) -> str: + return f'{typename(self)}({self.predicate}, {self.expr})' + + def __hash__(self) -> int: + return hash((super().__hash__(), self.predicate, self.expr)) + + def __eq__(self, other) -> bool: + return super().__eq__(other) \ + and self.predicate == other.predicate \ + and self.expr == other.expr + +class Any(_Branch): + """Any (and at least one) triple matches.""" + + +class All(_Branch): + """All (and at least one) triples match.""" + + +class _Agg(FilterExpression, abc.Collection): + """Combine multiple expressions.""" + + # child expressions + expr: typing.Set[FilterExpression] + + def __init__( + self, + *expr: typing.Union[FilterExpression, + typing.Iterable[FilterExpression], + typing.Iterator[FilterExpression]] + ): + # unfold arguments + unfolded = set(normalize_args(*expr)) + # check type + if not all(isinstance(e, FilterExpression) for e in unfolded): + raise TypeError(expr) + # FIXME: Require at least one child expression? + # assign member + self.expr = unfolded + + def __contains__(self, expr: typing.Any) -> bool: + """Return True if *expr* is among the child expressions.""" + return expr in self.expr + + def __iter__(self) -> typing.Iterator[FilterExpression]: + """Iterator over child expressions.""" + return iter(self.expr) + + def __len__(self) -> int: + """Number of child expressions.""" + return len(self.expr) + + def __repr__(self) -> str: + return f'{typename(self)}({self.expr})' + + def __hash__(self) -> int: + return hash((super().__hash__(), tuple(sorted(self.expr, key=repr)))) + + def __eq__(self, other) -> bool: + return super().__eq__(other) and self.expr == other.expr + + +class And(_Agg): + """All conditions match.""" + + +class Or(_Agg): + """At least one condition matches.""" + + +class Not(FilterExpression): + """Invert a statement.""" + + # child expression + expr: FilterExpression + + def __init__(self, expr: FilterExpression): + # check argument + if not isinstance(expr, FilterExpression): + raise TypeError(expr) + # assign member + self.expr = expr + + def __repr__(self) -> str: + return f'{typename(self)}({self.expr})' + + def __hash__(self) -> int: + return hash((super().__hash__(), self.expr)) + + def __eq__(self, other: typing.Any) -> bool: + return super().__eq__(other) and self.expr == other.expr + + +class Has(FilterExpression): + """Has predicate N times""" + + # predicate to follow. + predicate: PredicateExpression + + # target count + count: FilterExpression + + def __init__( + self, + predicate: typing.Union[PredicateExpression, URI], + count: typing.Optional[typing.Union[FilterExpression, int]] = None, + ): + # check predicate + if isinstance(predicate, URI): + predicate = Predicate(predicate) + elif not isinstance(predicate, PredicateExpression): + raise TypeError(predicate) + # check count + if count is None: + count = GreaterThan(1, strict=False) + elif isinstance(count, int): + count = Equals(count) + elif not isinstance(count, FilterExpression): + raise TypeError(count) + # assign members + self.predicate = predicate + self.count = count + + def __repr__(self) -> str: + return f'{typename(self)}({self.predicate}, {self.count})' + + def __hash__(self) -> int: + return hash((super().__hash__(), self.predicate, self.count)) + + def __eq__(self, other) -> bool: + return super().__eq__(other) \ + and self.predicate == other.predicate \ + and self.count == other.count + + +class _Value(FilterExpression): + """Matches some value.""" + + # target value. + value: typing.Any + + def __init__(self, value: typing.Any): + self.value = value + + def __repr__(self) -> str: + return f'{typename(self)}({self.value})' + + def __hash__(self) -> int: + return hash((super().__hash__(), self.value)) + + def __eq__(self, other) -> bool: + return super().__eq__(other) and self.value == other.value + + +class Is(_Value): + """Match the URI of a node.""" + + +class Equals(_Value): + """Value matches exactly. + NOTE: Value must correspond to literal type. + """ + + +class Substring(_Value): + """Value matches a substring + NOTE: value must be a string. + """ + + +class StartsWith(_Value): + """Value begins with a given string.""" + + +class EndsWith(_Value): + """Value ends with a given string.""" + + +class Distance(FilterExpression): + """Distance to a reference is (strictly) below a threshold. Assumes a Feature literal.""" + + # FIXME: + # (a) pass a node/predicate as anchor instead of a value. + # Then we don't need to materialize the reference. + # (b) pass a FilterExpression (_Bounded) instead of a threshold. + # Then, we could also query values greater than a threshold. + + # reference value. + reference: typing.Any + + # distance threshold. + threshold: float + + # closed (True) or open (False) bound. + strict: bool + + def __init__( + self, + reference: typing.Any, + threshold: float, + strict: bool = False, + ): + self.reference = reference + self.threshold = float(threshold) + self.strict = bool(strict) + + def __repr__(self) -> str: + return f'{typename(self)}({self.reference}, {self.threshold}, {self.strict})' + + def __hash__(self) -> int: + return hash((super().__hash__(), tuple(self.reference), self.threshold, self.strict)) + + def __eq__(self, other) -> bool: + return super().__eq__(other) \ + and self.reference == other.reference \ + and self.threshold == other.threshold \ + and self.strict == other.strict + + +class _Bounded(FilterExpression): + """Value is bounded by a threshold. Assumes a Number literal.""" + + # bound. + threshold: float + + # closed (True) or open (False) bound. + strict: bool + + def __init__( + self, + threshold: float, + strict: bool = True, + ): + self.threshold = float(threshold) + self.strict = bool(strict) + + def __repr__(self) -> str: + return f'{typename(self)}({self.threshold}, {self.strict})' + + def __hash__(self) -> int: + return hash((super().__hash__(), self.threshold, self.strict)) + + def __eq__(self, other) -> bool: + return super().__eq__(other) \ + and self.threshold == other.threshold \ + and self.strict == other.strict + + + +class LessThan(_Bounded): + """Value is (strictly) smaller than threshold. Assumes a Number literal.""" + + +class GreaterThan(_Bounded): + """Value is (strictly) larger than threshold. Assumes a Number literal.""" + + +class Predicate(PredicateExpression): + """A single predicate.""" + + # predicate URI + predicate: URI + + # reverse the predicate's direction + reverse: bool + + def __init__( + self, + predicate: URI, + reverse: typing.Optional[bool] = False, + ): + # check arguments + if not isinstance(predicate, URI): + raise TypeError(predicate) + # assign members + self.predicate = predicate + self.reverse = bool(reverse) + + def __repr__(self) -> str: + return f'{typename(self)}({self.predicate}, {self.reverse})' + + def __hash__(self) -> int: + return hash((super().__hash__(), self.predicate, self.reverse)) + + def __eq__(self, other) -> bool: + return super().__eq__(other) \ + and self.predicate == other.predicate \ + and self.reverse == other.reverse + + +class OneOf(PredicateExpression, abc.Collection): + """A set of predicate alternatives. + + The predicates' domains must be ascendants or descendants of each other. + The overall domain is the most specific one. + + The predicate's domains must be ascendants or descendants of each other. + The overall range is the most generic one. + """ + + # predicate alternatives + expr: typing.Set[PredicateExpression] + + def __init__(self, *expr: typing.Union[PredicateExpression, URI]): + # unfold arguments + unfolded = set(normalize_args(*expr)) # type: ignore [arg-type] # this is getting too complex... + # check arguments + if len(unfolded) == 0: + raise AttributeError('expected at least one expression, found none') + # ensure PredicateExpression + unfolded = {Predicate(e) if isinstance(e, URI) else e for e in unfolded} + # check type + if not all(isinstance(e, PredicateExpression) for e in unfolded): + raise TypeError(expr) + # assign member + self.expr = unfolded + + def __contains__(self, expr: typing.Any) -> bool: + """Return True if *expr* is among the child expressions.""" + return expr in self.expr + + def __iter__(self) -> typing.Iterator[PredicateExpression]: + """Iterator over child expressions.""" + return iter(self.expr) + + def __len__(self) -> int: + """Number of child expressions.""" + return len(self.expr) + + def __repr__(self) -> str: + return f'{typename(self)}({self.expr})' + + def __hash__(self) -> int: + return hash((super().__hash__(), tuple(sorted(self.expr, key=repr)))) + + def __eq__(self, other) -> bool: + return super().__eq__(other) and self.expr == other.expr + + +# Helpers +# invalid-name is disabled since they explicitly mimic an expression + +def IsIn(*values) -> FilterExpression: # pylint: disable=invalid-name + """Match any of the given URIs.""" + args = normalize_args(*values) + if len(args) == 0: + raise AttributeError('expected at least one value, found none') + if len(args) == 1: + return Is(args[0]) + return Or(Is(value) for value in args) + +def IsNotIn(*values) -> FilterExpression: # pylint: disable=invalid-name + """Match none of the given URIs.""" + return Not(IsIn(*values)) + + +def Between( # pylint: disable=invalid-name + lo: float = float('-inf'), + hi: float = float('inf'), + lo_strict: bool = True, + hi_strict: bool = True, + ) -> FilterExpression : + """Match numerical values between *lo* and *hi*. Include bounds if strict is False.""" + if abs(lo) == hi == float('inf'): + raise ValueError('range cannot be INF on both sides') + if lo > hi: + raise ValueError(f'lower bound ({lo}) cannot be less than upper bound ({hi})') + if lo == hi and not lo_strict and not hi_strict: + return Equals(lo) + if lo == hi: # either bound is strict + raise ValueError('bounds cannot be equal when either is strict') + if lo != float('-inf') and hi != float('inf'): + return And(GreaterThan(lo, lo_strict), LessThan(hi, hi_strict)) + if lo != float('-inf'): + return GreaterThan(lo, lo_strict) + # hi != float('inf'): + return LessThan(hi, hi_strict) + + +def Includes(*values, approx: bool = False) -> FilterExpression: # pylint: disable=invalid-name + """Match any of the given *values*. Uses `Substring` if *approx* is set.""" + args = normalize_args(*values) + cls = Substring if approx else Equals + if len(args) == 0: + raise AttributeError('expected at least one value, found none') + if len(args) == 1: + return cls(args[0]) + return Or(cls(v) for v in args) + + +def Excludes(*values, approx: bool = False) -> FilterExpression: # pylint: disable=invalid-name + """Match none of the given *values*. Uses `Substring` if *approx* is set.""" + args = normalize_args(*values) + cls = Substring if approx else Equals + if len(args) == 0: + raise AttributeError('expected at least one value, found none') + if len(args) == 1: + return Not(cls(args[0])) + return Not(Or(cls(v) for v in args)) + + +## EOF ## |