From 791918039979d0743fd2ea4b9a5e74593ff96fd0 Mon Sep 17 00:00:00 2001 From: Matthias Baumgartner Date: Mon, 19 Dec 2022 13:32:34 +0100 Subject: query ast file structures and essential interfaces --- bsfs/query/__init__.py | 20 ++++++++++++++++++++ bsfs/query/ast/__init__.py | 24 ++++++++++++++++++++++++ bsfs/query/ast/filter_.py | 30 ++++++++++++++++++++++++++++++ bsfs/query/validator.py | 35 +++++++++++++++++++++++++++++++++++ 4 files changed, 109 insertions(+) create mode 100644 bsfs/query/__init__.py create mode 100644 bsfs/query/ast/__init__.py create mode 100644 bsfs/query/ast/filter_.py create mode 100644 bsfs/query/validator.py (limited to 'bsfs/query') diff --git a/bsfs/query/__init__.py b/bsfs/query/__init__.py new file mode 100644 index 0000000..21c7389 --- /dev/null +++ b/bsfs/query/__init__.py @@ -0,0 +1,20 @@ +""" + +Part of the BlackStar filesystem (bsfs) module. +A copy of the license is provided with the project. +Author: Matthias Baumgartner, 2022 +""" +# imports +import typing + +# inner-module imports +from . import ast +from . import validator as validate + +# exports +__all__: typing.Sequence[str] = ( + 'ast', + 'validate', + ) + +## EOF ## diff --git a/bsfs/query/ast/__init__.py b/bsfs/query/ast/__init__.py new file mode 100644 index 0000000..0ee7385 --- /dev/null +++ b/bsfs/query/ast/__init__.py @@ -0,0 +1,24 @@ +"""Query AST components. + +The query AST consists of a Filter syntax tree. + +Classes beginning with an underscore (_) represent internal type hierarchies +and should not be used for parsing. Note that the AST structures do not +(and cannot) check semantic validity or consistency with a given schema. + +Part of the BlackStar filesystem (bsfs) module. +A copy of the license is provided with the project. +Author: Matthias Baumgartner, 2022 +""" +# imports +import typing + +# inner-module imports +from . import filter_ as filter + +# exports +__all__: typing.Sequence[str] = ( + 'filter', + ) + +## EOF ## diff --git a/bsfs/query/ast/filter_.py b/bsfs/query/ast/filter_.py new file mode 100644 index 0000000..4086fc1 --- /dev/null +++ b/bsfs/query/ast/filter_.py @@ -0,0 +1,30 @@ +"""Filter AST. + +Part of the BlackStar filesystem (bsfs) module. +A copy of the license is provided with the project. +Author: Matthias Baumgartner, 2022 +""" +# imports +from collections import abc +import typing + +# exports +__all__ : typing.Sequence[str] = [] + + +## code ## + +class _Expression(abc.Hashable): + def __repr__(self) -> str: + """Return the expressions's string representation.""" + return f'{typename(self)}()' + + def __hash__(self) -> int: + """Return the expression's integer representation.""" + return hash(type(self)) + + def __eq__(self, other: typing.Any) -> bool: + """Return True if *self* and *other* are equivalent.""" + return isinstance(other, type(self)) + +## EOF ## diff --git a/bsfs/query/validator.py b/bsfs/query/validator.py new file mode 100644 index 0000000..ac3789a --- /dev/null +++ b/bsfs/query/validator.py @@ -0,0 +1,35 @@ +""" + +Part of the BlackStar filesystem (bsfs) module. +A copy of the license is provided with the project. +Author: Matthias Baumgartner, 2022 +""" +# imports +import typing + +# bsfs imports +from bsfs import schema as bsc + +# inner-module imports +from . import ast + +# exports +__all__ : typing.Sequence[str] = ( + 'Filter', + ) + + +## code ## + +class Filter(): + + # schema to validate against. + schema: bsc.Schema + + def __init__(self, schema: bsc.Schema): + self.schema = schema + + def parse(self, node: ast.filter.FilterExpression): + raise NotImplementedError() + +## EOF ## -- cgit v1.2.3 From a0f2308adcb226d28de3355bc7115a6d9b669462 Mon Sep 17 00:00:00 2001 From: Matthias Baumgartner Date: Mon, 19 Dec 2022 13:40:02 +0100 Subject: import fixes --- bsfs/query/validator.py | 177 +++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 175 insertions(+), 2 deletions(-) (limited to 'bsfs/query') diff --git a/bsfs/query/validator.py b/bsfs/query/validator.py index ac3789a..123b947 100644 --- a/bsfs/query/validator.py +++ b/bsfs/query/validator.py @@ -29,7 +29,180 @@ class Filter(): def __init__(self, schema: bsc.Schema): self.schema = schema - def parse(self, node: ast.filter.FilterExpression): - raise NotImplementedError() + def parse(self, node: ast.filter.FilterExpression, subject: bsc.types._Vertex): + # subject is a node type + if not isinstance(subject, bsc.Node): + raise errors.ConsistencyError(f'Expected a node, found {subject}') + # subject exists in the schema + if subject not in self.schema.nodes: + raise errors.ConsistencyError(f'Invalid node type {subject}') + # root expression is valid + self._parse(node, subject) + # all tests passed + return True + + + def _parse_numerical_expression(self, node: ast.filter.FilterExpression, subject: bsc.types._Vertex): + if isinstance(node, ast.filter.And): + return self._and(node, subject) + elif isinstance(node, ast.filter.Or): + return self._or(node, subject) + elif isinstance(node, ast.filter.LessThan): + return self._lessThan(node, subject) + elif isinstance(node, ast.filter.GreaterThan): + return self._greaterThan(node, subject) + elif isinstance(node, ast.filter.Equals): + return self._equals(node, subject, numerical=True) + else: + raise errors.ConsistencyError(f'Expected a numerical expression, found {node}') + + + def __branch(self, node: typing.Union[ast.filter.Any, ast.filter.And], subject: bsc.types._Vertex): + # subject is a node type + if not isinstance(subject, bsc.Node): + raise errors.ConsistencyError(f'Expected a node, found {subject}') + # subject exists in the schema + if subject not in self.schema.nodes: + raise errors.ConsistencyError(f'Invalid node type {subject}') + # predicate is valid + dom, rng = self._parse_predicate_expression(node.predicate) + # subject is a subtype of the predicate's domain + if not subject <= dom: + raise errors.ConsistencyError(f'Expected type {dom}, found {subject}') + # child expression is valid + self._parse_filter_expression(node.expr, rng) + + def _any(self, node: ast.filter.Any, subject: bsc.types._Vertex): + return self.__branch(node, subject) + + def _all(self, node: ast.filter.All, subject: bsc.types._Vertex): + return self.__branch(node, subject) + + + def __agg(self, node: typing.Union[ast.filter.And, ast.filter.Or], subject: bsc.types._Vertex): + for expr in node: + # child expression is valid + self._parse_filter_expression(expr, subject) + + def _and(self, node: ast.filter.And, subject: bsc.types._Vertex): + return self.__agg(node, subject) + + def _or(self, node: ast.filter.Or, subject: bsc.types._Vertex): + return self.__agg(node, subject) + + + def _not(self, node: ast.filter.Not, subject: bsc.types._Vertex): + # child expression is valid + self._parse_filter_expression(node.expr, subject) + + + def _has(self, node: ast.filter.Has, subject: bsc.types._Vertex): + # subject is a node type + if not isinstance(subject, bsc.Node): + raise errors.ConsistencyError(f'Expected a node, found {subject}') + # subject exists in the schema + if subject not in self.schema.nodes: + raise errors.ConsistencyError(f'Invalid node type {subject}') + # predicate is valid + dom, rng = self._parse_predicate_expression(node.predicate) + # subject is a subtype of the predicate's domain + if not subject <= dom: + raise errors.ConsistencyError(f'Expected type {dom}, found {subject}') + # node.count is a numerical expression + self._parse_numerical_expression(node.count, self.schema.literal(ns.xsd.numerical)) + + + def _equals(self, node: ast.filter.Equals, subject: bsc.types._Vertex, numerical: bool = False): + # subject is a literal + #if not isinstance(subject, bsc.Literal): + # raise errors.ConsistencyError(f'Expected a literal, found {subject}') + if isinstance(subject, bsc.Node): + # FIXME: How to handle this case? + # FIXME: How to check if a NodeType is acceptable? + # FIXME: Maybe use flags to control what is expected as node identifiers? + from bsfs.graph.nodes import Nodes # FIXME + if not isinstance(node.value, Nodes) and not isinstance(node.value, URI): + raise errors.ConsistencyError(f'Expected a Nodes or URI, found {node.value}') + elif isinstance(subject, bsc.Literal): + # literal exists in the schema + if subject not in self.schema.literals: + raise errors.ConsistencyError(f'Invalid literal type {subject}') + else: + # FIXME: + raise errors.ConsistencyError(f'Expected a literal, found {subject}') + # node.value is numeric (if requested) + if numerical and not isinstance(node.value, float) and not isinstance(node.value, int): + raise errors.ConsistencyError(f'Expected a numerical value (int or float), found {node.value}') + # NOTE: We cannot check if node.value agrees with the subject since we don't know + # all literal types, their hierarchy, and how the backend converts datatypes. + + + def _substring(self, node: ast.filter.Substring, subject: bsc.types._Vertex): + # subject is a literal + if not isinstance(subject, bsc.Literal): + raise errors.ConsistencyError(f'Expected a literal, found {subject}') + # literal exists in the schema + if subject not in self.schema.literals: + raise errors.ConsistencyError(f'Invalid literal type {subject}') + # node.value matches literal datatype + if not subject.is_a(ns.xsd.string): + raise errors.ConsistencyError(f'Expected a string literal, found {subject}') + + + def _lessThan(self, node: ast.filter.LessThan, subject: bsc.types._Vertex): + # subject is a literal + if not isinstance(subject, bsc.Literal): + raise errors.ConsistencyError(f'Expected a literal, found {subject}') + # literal exists in the schema + if subject not in self.schema.literals: + raise errors.ConsistencyError(f'Invalid literal type {subject}') + # subject is numerical + if not subject.is_a(ns.xsd.numerical): + raise errors.ConsistencyError(f'Expected a numerical literal, found {subject}') + + + def _greaterThan(self, node: ast.filter.GreaterThan, subject: bsc.types._Vertex): + # subject is a literal + if not isinstance(subject, bsc.Literal): + raise errors.ConsistencyError(f'Expected a literal, found {subject}') + # literal exists in the schema + if subject not in self.schema.literals: + raise errors.ConsistencyError(f'Invalid literal type {subject}') + # subject is numerical + if not subject.is_a(ns.xsd.numerical): + raise errors.ConsistencyError(f'Expected a numerical literal, found {subject}') + + + def _predicate(self, node: ast.filter.Predicate): + try: + # predicate exists in the schema + pred = self.schema.predicate(node.predicate) + except KeyError: + raise errors.ConsistencyError(f'') # FIXME + if node.reverse: + return pred.range, pred.domain + else: + return pred.domain, pred.range + + + def _oneOf(self, node: ast.filter.OneOf): + dom, rng = None, None + for pred in node: + try: + # parse child expression + subdom, subrng = self._parse_predicate_expression(pred) + # domain and range must be related across all child expressions + if not subdom <= dom and not subdom >= dom: + raise errors.ConsistencyError(f'') # FIXME + if not subrng <= rng and not subrng >= rng: + raise errors.ConsistencyError(f'') # FIXME + # determine overall domain and range + if dom is None or subdom < dom: # pick most specific domain + dom = subdom + if rng is None or subrng > rng: # pick most generic range + rng = subrng + except KeyError: + raise errors.ConsistencyError(f'') + return dom, rng ## EOF ## -- cgit v1.2.3 From 383fa8fd5c2e4b67089b4c5b654ebade51382f2c Mon Sep 17 00:00:00 2001 From: Matthias Baumgartner Date: Thu, 22 Dec 2022 20:27:49 +0100 Subject: filter ast definition and validation --- bsfs/query/ast/__init__.py | 2 +- bsfs/query/ast/filter_.py | 405 ++++++++++++++++++++++++++++++++++++++++++++- bsfs/query/validator.py | 336 +++++++++++++++++++------------------ 3 files changed, 581 insertions(+), 162 deletions(-) (limited to 'bsfs/query') diff --git a/bsfs/query/ast/__init__.py b/bsfs/query/ast/__init__.py index 0ee7385..704d051 100644 --- a/bsfs/query/ast/__init__.py +++ b/bsfs/query/ast/__init__.py @@ -14,7 +14,7 @@ Author: Matthias Baumgartner, 2022 import typing # inner-module imports -from . import filter_ as filter +from . import filter_ as filter # pylint: disable=redefined-builtin # exports __all__: typing.Sequence[str] = ( diff --git a/bsfs/query/ast/filter_.py b/bsfs/query/ast/filter_.py index 4086fc1..b129ded 100644 --- a/bsfs/query/ast/filter_.py +++ b/bsfs/query/ast/filter_.py @@ -1,5 +1,27 @@ """Filter AST. +Note that it is easily possible to construct an AST that is inconsistent with +a given schema. Furthermore, it is possible to construct a semantically invalid +AST which that cannot be parsed correctly or includes contradicting statements. +The AST nodes do not (and cannot) check such issues. + +For example, consider the following AST: + +>>> Any(ns.bse.collection, +... And( +... Equals('hello'), +... Any(ns.bsm.guid, Any(ns.bsm.guid, Equals('hello'))), +... Any(ns.bst.label, Equals('world')), +... All(ns.bst.label, Not(Equals('world'))), +... ) +... ) + +This AST has multiple issues that are not verified upon its creation: +* A condition on a non-literal. +* A Filter on a literal. +* Conditions exclude each other +* The predicate along the branch have incompatible domains and ranges. + Part of the BlackStar filesystem (bsfs) module. A copy of the license is provided with the project. Author: Matthias Baumgartner, 2022 @@ -8,12 +30,45 @@ Author: Matthias Baumgartner, 2022 from collections import abc import typing +# bsfs imports +from bsfs.utils import URI, typename, normalize_args + +# inner-module imports +#from . import utils + # exports -__all__ : typing.Sequence[str] = [] +__all__ : typing.Sequence[str] = ( + # base classes + 'FilterExpression', + 'PredicateExpression', + # predicate expressions + 'OneOf', + 'Predicate', + # branching + 'All', + 'Any', + # aggregators + 'And', + 'Or', + # value matchers + 'Equals', + 'Substring', + 'EndsWith', + 'StartsWith', + # range matchers + 'GreaterThan', + 'LessThan', + # misc + 'Has', + 'Is', + 'Not', + ) ## code ## +# pylint: disable=too-few-public-methods # Many expressions use mostly magic methods + class _Expression(abc.Hashable): def __repr__(self) -> str: """Return the expressions's string representation.""" @@ -27,4 +82,352 @@ class _Expression(abc.Hashable): """Return True if *self* and *other* are equivalent.""" return isinstance(other, type(self)) + +class FilterExpression(_Expression): + """Generic Filter expression.""" + + +class PredicateExpression(_Expression): + """Generic Predicate expression.""" + + +class _Branch(FilterExpression): + """Branch the filter along a predicate.""" + + # predicate to follow. + predicate: PredicateExpression + + # child expression to evaluate. + expr: FilterExpression + + def __init__( + self, + predicate: typing.Union[PredicateExpression, URI], + expr: FilterExpression, + ): + # process predicate argument + if isinstance(predicate, URI): + predicate = Predicate(predicate) + elif not isinstance(predicate, PredicateExpression): + raise TypeError(predicate) + # process expression argument + if not isinstance(expr, FilterExpression): + raise TypeError(expr) + # assign members + self.predicate = predicate + self.expr = expr + + def __repr__(self) -> str: + return f'{typename(self)}({self.predicate}, {self.expr})' + + def __hash__(self) -> int: + return hash((super().__hash__(), self.predicate, self.expr)) + + def __eq__(self, other) -> bool: + return super().__eq__(other) \ + and self.predicate == other.predicate \ + and self.expr == other.expr + +class Any(_Branch): + """Any (and at least one) triple matches.""" + + +class All(_Branch): + """All (and at least one) triples match.""" + + +class _Agg(FilterExpression, abc.Collection): + """Combine multiple expressions.""" + + # child expressions + expr: typing.Set[FilterExpression] + + def __init__( + self, + *expr: typing.Union[FilterExpression, + typing.Iterable[FilterExpression], + typing.Iterator[FilterExpression]] + ): + # unfold arguments + unfolded = set(normalize_args(*expr)) + # check type + if not all(isinstance(e, FilterExpression) for e in unfolded): + raise TypeError(expr) + # assign member + self.expr = unfolded + + def __contains__(self, expr: typing.Any) -> bool: + """Return True if *expr* is among the child expressions.""" + return expr in self.expr + + def __iter__(self) -> typing.Iterator[FilterExpression]: + """Iterator over child expressions.""" + return iter(self.expr) + + def __len__(self) -> int: + """Number of child expressions.""" + return len(self.expr) + + def __repr__(self) -> str: + return f'{typename(self)}({self.expr})' + + def __hash__(self) -> int: + return hash((super().__hash__(), tuple(self.expr))) # FIXME: Unique hash of different orders over self.expr + + def __eq__(self, other) -> bool: + return super().__eq__(other) and self.expr == other.expr + + +class And(_Agg): + """All conditions match.""" + + +class Or(_Agg): + """At least one condition matches.""" + + +class Not(FilterExpression): + """Invert a statement.""" + + # child expression + expr: FilterExpression + + def __init__(self, expr: FilterExpression): + # check argument + if not isinstance(expr, FilterExpression): + raise TypeError(expr) + # assign member + self.expr = expr + + def __repr__(self) -> str: + return f'{typename(self)}({self.expr})' + + def __hash__(self) -> int: + return hash((super().__hash__(), self.expr)) + + def __eq__(self, other: typing.Any) -> bool: + return super().__eq__(other) and self.expr == other.expr + + +class Has(FilterExpression): + """Has predicate N times""" + + # predicate to follow. + predicate: PredicateExpression + + # target count + count: FilterExpression + + def __init__( + self, + predicate: typing.Union[PredicateExpression, URI], + count: typing.Optional[typing.Union[FilterExpression, int]] = None, + ): + # check predicate + if isinstance(predicate, URI): + predicate = Predicate(predicate) + elif not isinstance(predicate, PredicateExpression): + raise TypeError(predicate) + # check count + if count is None: + count = GreaterThan(1, strict=False) + elif isinstance(count, int): + count = Equals(count) + elif not isinstance(count, FilterExpression): + raise TypeError(count) + # assign members + self.predicate = predicate + self.count = count + + def __repr__(self) -> str: + return f'{typename(self)}({self.predicate}, {self.count})' + + def __hash__(self) -> int: + return hash((super().__hash__(), self.predicate, self.count)) + + def __eq__(self, other) -> bool: + return super().__eq__(other) \ + and self.predicate == other.predicate \ + and self.count == other.count + + +class _Value(FilterExpression): + """ + """ + + # target value. + value: typing.Any + + def __init__(self, value: typing.Any): + self.value = value + + def __repr__(self) -> str: + return f'{typename(self)}({self.value})' + + def __hash__(self) -> int: + return hash((super().__hash__(), self.value)) + + def __eq__(self, other) -> bool: + return super().__eq__(other) and self.value == other.value + + +class Is(_Value): + """Match the URI of a node.""" + + +class Equals(_Value): + """Value matches exactly. + NOTE: Value format must correspond to literal type; can be a string, a number, or a Node + """ + + +class Substring(_Value): + """Value matches a substring + NOTE: value format must be a string + """ + + +class StartsWith(_Value): + """Value begins with a given string.""" + + +class EndsWith(_Value): + """Value ends with a given string.""" + + +class _Bounded(FilterExpression): + """ + """ + + # bound. + threshold: float + + # closed (True) or open (False) bound. + strict: bool + + def __init__( + self, + threshold: float, + strict: bool = True, + ): + self.threshold = float(threshold) + self.strict = bool(strict) + + def __repr__(self) -> str: + return f'{typename(self)}({self.threshold}, {self.strict})' + + def __hash__(self) -> int: + return hash((super().__hash__(), self.threshold, self.strict)) + + def __eq__(self, other) -> bool: + return super().__eq__(other) \ + and self.threshold == other.threshold \ + and self.strict == other.strict + + + +class LessThan(_Bounded): + """Value is (strictly) smaller than threshold. + NOTE: only on numerical literals + """ + + +class GreaterThan(_Bounded): + """Value is (strictly) larger than threshold + NOTE: only on numerical literals + """ + + +class Predicate(PredicateExpression): + """A single predicate.""" + + # predicate URI + predicate: URI + + # reverse the predicate's direction + reverse: bool + + def __init__( + self, + predicate: URI, + reverse: typing.Optional[bool] = False, + ): + # check arguments + if not isinstance(predicate, URI): + raise TypeError(predicate) + # assign members + self.predicate = predicate + self.reverse = bool(reverse) + + def __repr__(self) -> str: + return f'{typename(self)}({self.predicate}, {self.reverse})' + + def __hash__(self) -> int: + return hash((super().__hash__(), self.predicate, self.reverse)) + + def __eq__(self, other) -> bool: + return super().__eq__(other) \ + and self.predicate == other.predicate \ + and self.reverse == other.reverse + + +class OneOf(PredicateExpression, abc.Collection): + """A set of predicate alternatives. + + The predicates' domains must be ascendants or descendants of each other. + The overall domain is the most specific one. + + The predicate's domains must be ascendants or descendants of each other. + The overall range is the most generic one. + """ + + # predicate alternatives + expr: typing.Set[PredicateExpression] + + def __init__(self, *expr: typing.Union[PredicateExpression, URI]): + # unfold arguments + unfolded = set(normalize_args(*expr)) # type: ignore [arg-type] # this is getting too complex... + # check arguments + if len(unfolded) == 0: + raise AttributeError('expected at least one expression, found none') + # ensure PredicateExpression + unfolded = {Predicate(e) if isinstance(e, URI) else e for e in unfolded} + # check type + if not all(isinstance(e, PredicateExpression) for e in unfolded): + raise TypeError(expr) + # assign member + self.expr = unfolded + + def __contains__(self, expr: typing.Any) -> bool: + """Return True if *expr* is among the child expressions.""" + return expr in self.expr + + def __iter__(self) -> typing.Iterator[PredicateExpression]: + """Iterator over child expressions.""" + return iter(self.expr) + + def __len__(self) -> int: + """Number of child expressions.""" + return len(self.expr) + + def __repr__(self) -> str: + return f'{typename(self)}({self.expr})' + + def __hash__(self) -> int: + return hash((super().__hash__(), tuple(self.expr))) # FIXME: Unique hash of different orders over self.expr + + def __eq__(self, other) -> bool: + return super().__eq__(other) and self.expr == other.expr + + +# Helpers + +def IsIn(*values): # pylint: disable=invalid-name # explicitly mimics an expression + """Match any of the given URIs.""" + return Or(Is(value) for value in normalize_args(*values)) + +def IsNotIn(*values): # pylint: disable=invalid-name # explicitly mimics an expression + """Match none of the given URIs.""" + return Not(IsIn(*values)) + ## EOF ## diff --git a/bsfs/query/validator.py b/bsfs/query/validator.py index 123b947..352203a 100644 --- a/bsfs/query/validator.py +++ b/bsfs/query/validator.py @@ -9,6 +9,8 @@ import typing # bsfs imports from bsfs import schema as bsc +from bsfs.namespace import ns +from bsfs.utils import errors, typename # inner-module imports from . import ast @@ -22,6 +24,18 @@ __all__ : typing.Sequence[str] = ( ## code ## class Filter(): + """Validate a `bsfs.query.ast.filter` query's structure and schema compliance. + + * Conditions (Bounded, Value) can only be applied on literals + * Branches, Id, and Has can only be applied on nodes + * Predicates' domain and range must match + * Predicate paths must follow the schema + * Referenced types are present in the schema + + """ + + # vertex types + T_VERTEX = typing.Union[bsc.Node, bsc.Literal] # FIXME: Shouldn't this be in the schema? # schema to validate against. schema: bsc.Schema @@ -29,180 +43,182 @@ class Filter(): def __init__(self, schema: bsc.Schema): self.schema = schema - def parse(self, node: ast.filter.FilterExpression, subject: bsc.types._Vertex): - # subject is a node type - if not isinstance(subject, bsc.Node): - raise errors.ConsistencyError(f'Expected a node, found {subject}') - # subject exists in the schema - if subject not in self.schema.nodes: - raise errors.ConsistencyError(f'Invalid node type {subject}') - # root expression is valid - self._parse(node, subject) + def __call__(self, root_type: bsc.Node, query: ast.filter.FilterExpression): + """Validate a filter *query*, assuming the subject having *root_type*. + + Raises a `bsfs.utils.errors.ConsistencyError` if the query violates the schema. + Raises a `bsfs.utils.errors.BackendError` if the query structure is invalid. + + """ + # root_type must be a schema.Node + if not isinstance(root_type, bsc.Node): + raise TypeError(f'Expected a node, found {typename(root_type)}') + # root_type must exist in the schema + if root_type not in self.schema.nodes(): + raise errors.ConsistencyError(f'{root_type} is not defined in the schema') + # check root expression + self._parse_filter_expression(root_type, query) # all tests passed return True - def _parse_numerical_expression(self, node: ast.filter.FilterExpression, subject: bsc.types._Vertex): - if isinstance(node, ast.filter.And): - return self._and(node, subject) - elif isinstance(node, ast.filter.Or): - return self._or(node, subject) - elif isinstance(node, ast.filter.LessThan): - return self._lessThan(node, subject) - elif isinstance(node, ast.filter.GreaterThan): - return self._greaterThan(node, subject) - elif isinstance(node, ast.filter.Equals): - return self._equals(node, subject, numerical=True) - else: - raise errors.ConsistencyError(f'Expected a numerical expression, found {node}') - - - def __branch(self, node: typing.Union[ast.filter.Any, ast.filter.And], subject: bsc.types._Vertex): - # subject is a node type - if not isinstance(subject, bsc.Node): - raise errors.ConsistencyError(f'Expected a node, found {subject}') - # subject exists in the schema - if subject not in self.schema.nodes: - raise errors.ConsistencyError(f'Invalid node type {subject}') - # predicate is valid - dom, rng = self._parse_predicate_expression(node.predicate) - # subject is a subtype of the predicate's domain - if not subject <= dom: - raise errors.ConsistencyError(f'Expected type {dom}, found {subject}') - # child expression is valid - self._parse_filter_expression(node.expr, rng) + ## routing methods + + def _parse_filter_expression(self, type_: T_VERTEX, node: ast.filter.FilterExpression): + """Route *node* to the handler of the respective FilterExpression subclass.""" + if isinstance(node, ast.filter.Is): + return self._is(type_, node) + if isinstance(node, ast.filter.Not): + return self._not(type_, node) + if isinstance(node, ast.filter.Has): + return self._has(type_, node) + if isinstance(node, (ast.filter.Any, ast.filter.All)): + return self._branch(type_, node) + if isinstance(node, (ast.filter.And, ast.filter.Or)): + return self._agg(type_, node) + if isinstance(node, (ast.filter.Equals, ast.filter.Substring, ast.filter.StartsWith, ast.filter.EndsWith)): + return self._value(type_, node) + if isinstance(node, (ast.filter.LessThan, ast.filter.GreaterThan)): + return self._bounded(type_, node) + # invalid node + raise errors.BackendError(f'expected filter expression, found {node}') + + def _parse_predicate_expression(self, node: ast.filter.PredicateExpression) -> typing.Tuple[T_VERTEX, T_VERTEX]: + """Route *node* to the handler of the respective PredicateExpression subclass.""" + if isinstance(node, ast.filter.Predicate): + return self._predicate(node) + if isinstance(node, ast.filter.OneOf): + return self._one_of(node) + # invalid node + raise errors.BackendError(f'expected predicate expression, found {node}') + + + ## predicate expressions + + def _predicate(self, node: ast.filter.Predicate) -> typing.Tuple[T_VERTEX, T_VERTEX]: + # predicate exists in the schema + if not self.schema.has_predicate(node.predicate): + raise errors.ConsistencyError(f'predicate {node.predicate} is not in the schema') + # determine domain and range + pred = self.schema.predicate(node.predicate) + dom, rng = pred.domain, pred.range + if rng is None: + # FIXME: It is a design error that Predicates can have a None range... + raise errors.BackendError(f'predicate {pred} has no range') + if node.reverse: + dom, rng = rng, dom # type: ignore [assignment] # variable re-use confuses mypy + # return domain and range + return dom, rng - def _any(self, node: ast.filter.Any, subject: bsc.types._Vertex): - return self.__branch(node, subject) + def _one_of(self, node: ast.filter.OneOf) -> typing.Tuple[T_VERTEX, T_VERTEX]: + # determine domain and range types + # NOTE: select the most specific domain and the most generic range + dom, rng = None, None + for pred in node: + # parse child expression + subdom, subrng = self._parse_predicate_expression(pred) + try: + # determine overall domain + if dom is None or subdom < dom: # pick most specific domain + dom = subdom + # domains must be related across all child expressions + if not subdom <= dom and not subdom >= dom: + raise errors.ConsistencyError(f'domains {subdom} and {dom} are not related') + except TypeError as err: # compared literal vs. node + raise errors.ConsistencyError(f'domains {subdom} and {dom} are not of the same type') from err - def _all(self, node: ast.filter.All, subject: bsc.types._Vertex): - return self.__branch(node, subject) + try: + # determine overall range + if rng is None or subrng > rng: # pick most generic range + rng = subrng + # ranges must be related across all child expressions + if not subrng <= rng and not subrng >= rng: + raise errors.ConsistencyError(f'ranges {subrng} and {rng} are not related') + except TypeError as err: # compared literal vs. node + raise errors.ConsistencyError(f'ranges {subrng} and {rng} are not of the same type') from err + # check domain and range + if dom is None or rng is None: + # OneOf guarantees at least one expression, these two cases cannot happen + raise errors.UnreachableError() + # return domain and range + return dom, rng - def __agg(self, node: typing.Union[ast.filter.And, ast.filter.Or], subject: bsc.types._Vertex): + ## intermediates + + def _branch(self, type_: T_VERTEX, node: ast.filter._Branch): + # type is a Node + if not isinstance(type_, bsc.Node): + raise errors.ConsistencyError(f'expected a Node, found {type_}') + # type exists in the schema + # FIXME: Isn't it actually guaranteed that the type (except the root type) is part of the schema? + # all types can be traced back to (a) root_type, (b) predicate, or (c) manually set (e.g. in _is). + # For (a), we do (and have to) perform a check. For (c), the code base should be consistent throughout + # the module, so this is an assumption that has to be ensured in schema.Schema. For (b), we know (and + # check) that the predicate is in the schema, hence all node/literals derived from it are also in the + # schema by construction of the schema.Schema class. So, why do we check this every time? + if type_ not in self.schema.nodes(): + raise errors.ConsistencyError(f'node {type_} is not in the schema') + # predicate is valid + dom, rng = self._parse_predicate_expression(node.predicate) + # type_ is a subtype of the predicate's domain + if not type_ <= dom: + raise errors.ConsistencyError(f'expected type {dom} or subtype thereof, found {type_}') + # child expression is valid + self._parse_filter_expression(rng, node.expr) + + def _agg(self, type_: T_VERTEX, node: ast.filter._Agg): for expr in node: # child expression is valid - self._parse_filter_expression(expr, subject) - - def _and(self, node: ast.filter.And, subject: bsc.types._Vertex): - return self.__agg(node, subject) - - def _or(self, node: ast.filter.Or, subject: bsc.types._Vertex): - return self.__agg(node, subject) - + self._parse_filter_expression(type_, expr) - def _not(self, node: ast.filter.Not, subject: bsc.types._Vertex): + def _not(self, type_: T_VERTEX, node: ast.filter.Not): # child expression is valid - self._parse_filter_expression(node.expr, subject) - - - def _has(self, node: ast.filter.Has, subject: bsc.types._Vertex): - # subject is a node type - if not isinstance(subject, bsc.Node): - raise errors.ConsistencyError(f'Expected a node, found {subject}') - # subject exists in the schema - if subject not in self.schema.nodes: - raise errors.ConsistencyError(f'Invalid node type {subject}') + self._parse_filter_expression(type_, node.expr) + + def _has(self, type_: T_VERTEX, node: ast.filter.Has): + # type is a Node + if not isinstance(type_, bsc.Node): + raise errors.ConsistencyError(f'expected a Node, found {type_}') + # type exists in the schema + if type_ not in self.schema.nodes(): + raise errors.ConsistencyError(f'node {type_} is not in the schema') # predicate is valid - dom, rng = self._parse_predicate_expression(node.predicate) - # subject is a subtype of the predicate's domain - if not subject <= dom: - raise errors.ConsistencyError(f'Expected type {dom}, found {subject}') + dom, _= self._parse_predicate_expression(node.predicate) + # type_ is a subtype of the predicate's domain + if not type_ <= dom: + raise errors.ConsistencyError(f'expected type {dom}, found {type_}') # node.count is a numerical expression - self._parse_numerical_expression(node.count, self.schema.literal(ns.xsd.numerical)) - - - def _equals(self, node: ast.filter.Equals, subject: bsc.types._Vertex, numerical: bool = False): - # subject is a literal - #if not isinstance(subject, bsc.Literal): - # raise errors.ConsistencyError(f'Expected a literal, found {subject}') - if isinstance(subject, bsc.Node): - # FIXME: How to handle this case? - # FIXME: How to check if a NodeType is acceptable? - # FIXME: Maybe use flags to control what is expected as node identifiers? - from bsfs.graph.nodes import Nodes # FIXME - if not isinstance(node.value, Nodes) and not isinstance(node.value, URI): - raise errors.ConsistencyError(f'Expected a Nodes or URI, found {node.value}') - elif isinstance(subject, bsc.Literal): - # literal exists in the schema - if subject not in self.schema.literals: - raise errors.ConsistencyError(f'Invalid literal type {subject}') - else: - # FIXME: - raise errors.ConsistencyError(f'Expected a literal, found {subject}') - # node.value is numeric (if requested) - if numerical and not isinstance(node.value, float) and not isinstance(node.value, int): - raise errors.ConsistencyError(f'Expected a numerical value (int or float), found {node.value}') - # NOTE: We cannot check if node.value agrees with the subject since we don't know - # all literal types, their hierarchy, and how the backend converts datatypes. - - - def _substring(self, node: ast.filter.Substring, subject: bsc.types._Vertex): - # subject is a literal - if not isinstance(subject, bsc.Literal): - raise errors.ConsistencyError(f'Expected a literal, found {subject}') - # literal exists in the schema - if subject not in self.schema.literals: - raise errors.ConsistencyError(f'Invalid literal type {subject}') - # node.value matches literal datatype - if not subject.is_a(ns.xsd.string): - raise errors.ConsistencyError(f'Expected a string literal, found {subject}') - - - def _lessThan(self, node: ast.filter.LessThan, subject: bsc.types._Vertex): - # subject is a literal - if not isinstance(subject, bsc.Literal): - raise errors.ConsistencyError(f'Expected a literal, found {subject}') - # literal exists in the schema - if subject not in self.schema.literals: - raise errors.ConsistencyError(f'Invalid literal type {subject}') - # subject is numerical - if not subject.is_a(ns.xsd.numerical): - raise errors.ConsistencyError(f'Expected a numerical literal, found {subject}') - - - def _greaterThan(self, node: ast.filter.GreaterThan, subject: bsc.types._Vertex): - # subject is a literal - if not isinstance(subject, bsc.Literal): - raise errors.ConsistencyError(f'Expected a literal, found {subject}') - # literal exists in the schema - if subject not in self.schema.literals: - raise errors.ConsistencyError(f'Invalid literal type {subject}') - # subject is numerical - if not subject.is_a(ns.xsd.numerical): - raise errors.ConsistencyError(f'Expected a numerical literal, found {subject}') - - - def _predicate(self, node: ast.filter.Predicate): - try: - # predicate exists in the schema - pred = self.schema.predicate(node.predicate) - except KeyError: - raise errors.ConsistencyError(f'') # FIXME - if node.reverse: - return pred.range, pred.domain - else: - return pred.domain, pred.range - + # FIXME: We have to ensure that ns.xsd.integer is always known in the schema! + self._parse_filter_expression(self.schema.literal(ns.xsd.integer), node.count) + + + ## conditions + + def _is(self, type_: T_VERTEX, node: ast.filter.Is): # pylint: disable=unused-argument # (node) + if not isinstance(type_, bsc.Node): + raise errors.ConsistencyError(f'expected a Node, found {type_}') + if type_ not in self.schema.nodes(): + raise errors.ConsistencyError(f'node {type_} is not in the schema') + + def _value(self, type_: T_VERTEX, node: ast.filter._Value): # pylint: disable=unused-argument # (node) + # type is a literal + if not isinstance(type_, bsc.Literal): + raise errors.ConsistencyError(f'expected a Literal, found {type_}') + # type exists in the schema + if type_ not in self.schema.literals(): + raise errors.ConsistencyError(f'literal {type_} is not in the schema') + # FIXME: Check if node.value corresponds to type_ + # FIXME: A specific literal might be requested (i.e., a numeric type when used in Has) + + def _bounded(self, type_: T_VERTEX, node: ast.filter._Bounded): # pylint: disable=unused-argument # (node) + # type is a literal + if not isinstance(type_, bsc.Literal): + raise errors.ConsistencyError(f'expected a Literal, found {type_}') + # type exists in the schema + if type_ not in self.schema.literals(): + raise errors.ConsistencyError(f'literal {type_} is not in the schema') + # FIXME: Check if node.value corresponds to type_ - def _oneOf(self, node: ast.filter.OneOf): - dom, rng = None, None - for pred in node: - try: - # parse child expression - subdom, subrng = self._parse_predicate_expression(pred) - # domain and range must be related across all child expressions - if not subdom <= dom and not subdom >= dom: - raise errors.ConsistencyError(f'') # FIXME - if not subrng <= rng and not subrng >= rng: - raise errors.ConsistencyError(f'') # FIXME - # determine overall domain and range - if dom is None or subdom < dom: # pick most specific domain - dom = subdom - if rng is None or subrng > rng: # pick most generic range - rng = subrng - except KeyError: - raise errors.ConsistencyError(f'') - return dom, rng ## EOF ## -- cgit v1.2.3 From 3940cb3c79937a431ba2ae3b57fd0c6c2ccfff33 Mon Sep 17 00:00:00 2001 From: Matthias Baumgartner Date: Thu, 12 Jan 2023 10:12:43 +0100 Subject: use Vertex in type annotations --- bsfs/query/validator.py | 25 +++++++++++-------------- 1 file changed, 11 insertions(+), 14 deletions(-) (limited to 'bsfs/query') diff --git a/bsfs/query/validator.py b/bsfs/query/validator.py index 352203a..6bf1b72 100644 --- a/bsfs/query/validator.py +++ b/bsfs/query/validator.py @@ -34,9 +34,6 @@ class Filter(): """ - # vertex types - T_VERTEX = typing.Union[bsc.Node, bsc.Literal] # FIXME: Shouldn't this be in the schema? - # schema to validate against. schema: bsc.Schema @@ -64,7 +61,7 @@ class Filter(): ## routing methods - def _parse_filter_expression(self, type_: T_VERTEX, node: ast.filter.FilterExpression): + def _parse_filter_expression(self, type_: bsc.Vertex, node: ast.filter.FilterExpression): """Route *node* to the handler of the respective FilterExpression subclass.""" if isinstance(node, ast.filter.Is): return self._is(type_, node) @@ -83,7 +80,7 @@ class Filter(): # invalid node raise errors.BackendError(f'expected filter expression, found {node}') - def _parse_predicate_expression(self, node: ast.filter.PredicateExpression) -> typing.Tuple[T_VERTEX, T_VERTEX]: + def _parse_predicate_expression(self, node: ast.filter.PredicateExpression) -> typing.Tuple[bsc.Vertex, bsc.Vertex]: """Route *node* to the handler of the respective PredicateExpression subclass.""" if isinstance(node, ast.filter.Predicate): return self._predicate(node) @@ -95,7 +92,7 @@ class Filter(): ## predicate expressions - def _predicate(self, node: ast.filter.Predicate) -> typing.Tuple[T_VERTEX, T_VERTEX]: + def _predicate(self, node: ast.filter.Predicate) -> typing.Tuple[bsc.Vertex, bsc.Vertex]: # predicate exists in the schema if not self.schema.has_predicate(node.predicate): raise errors.ConsistencyError(f'predicate {node.predicate} is not in the schema') @@ -110,7 +107,7 @@ class Filter(): # return domain and range return dom, rng - def _one_of(self, node: ast.filter.OneOf) -> typing.Tuple[T_VERTEX, T_VERTEX]: + def _one_of(self, node: ast.filter.OneOf) -> typing.Tuple[bsc.Vertex, bsc.Vertex]: # determine domain and range types # NOTE: select the most specific domain and the most generic range dom, rng = None, None @@ -146,7 +143,7 @@ class Filter(): ## intermediates - def _branch(self, type_: T_VERTEX, node: ast.filter._Branch): + def _branch(self, type_: bsc.Vertex, node: ast.filter._Branch): # type is a Node if not isinstance(type_, bsc.Node): raise errors.ConsistencyError(f'expected a Node, found {type_}') @@ -167,16 +164,16 @@ class Filter(): # child expression is valid self._parse_filter_expression(rng, node.expr) - def _agg(self, type_: T_VERTEX, node: ast.filter._Agg): + def _agg(self, type_: bsc.Vertex, node: ast.filter._Agg): for expr in node: # child expression is valid self._parse_filter_expression(type_, expr) - def _not(self, type_: T_VERTEX, node: ast.filter.Not): + def _not(self, type_: bsc.Vertex, node: ast.filter.Not): # child expression is valid self._parse_filter_expression(type_, node.expr) - def _has(self, type_: T_VERTEX, node: ast.filter.Has): + def _has(self, type_: bsc.Vertex, node: ast.filter.Has): # type is a Node if not isinstance(type_, bsc.Node): raise errors.ConsistencyError(f'expected a Node, found {type_}') @@ -195,13 +192,13 @@ class Filter(): ## conditions - def _is(self, type_: T_VERTEX, node: ast.filter.Is): # pylint: disable=unused-argument # (node) + def _is(self, type_: bsc.Vertex, node: ast.filter.Is): # pylint: disable=unused-argument # (node) if not isinstance(type_, bsc.Node): raise errors.ConsistencyError(f'expected a Node, found {type_}') if type_ not in self.schema.nodes(): raise errors.ConsistencyError(f'node {type_} is not in the schema') - def _value(self, type_: T_VERTEX, node: ast.filter._Value): # pylint: disable=unused-argument # (node) + def _value(self, type_: bsc.Vertex, node: ast.filter._Value): # pylint: disable=unused-argument # (node) # type is a literal if not isinstance(type_, bsc.Literal): raise errors.ConsistencyError(f'expected a Literal, found {type_}') @@ -211,7 +208,7 @@ class Filter(): # FIXME: Check if node.value corresponds to type_ # FIXME: A specific literal might be requested (i.e., a numeric type when used in Has) - def _bounded(self, type_: T_VERTEX, node: ast.filter._Bounded): # pylint: disable=unused-argument # (node) + def _bounded(self, type_: bsc.Vertex, node: ast.filter._Bounded): # pylint: disable=unused-argument # (node) # type is a literal if not isinstance(type_, bsc.Literal): raise errors.ConsistencyError(f'expected a Literal, found {type_}') -- cgit v1.2.3 From 7e7284d5fc01c0a081aa79d67736f51069864a7d Mon Sep 17 00:00:00 2001 From: Matthias Baumgartner Date: Thu, 12 Jan 2023 10:22:59 +0100 Subject: adapt to non-optional range in query checks --- bsfs/query/validator.py | 14 +++++--------- 1 file changed, 5 insertions(+), 9 deletions(-) (limited to 'bsfs/query') diff --git a/bsfs/query/validator.py b/bsfs/query/validator.py index 6bf1b72..b04a9bf 100644 --- a/bsfs/query/validator.py +++ b/bsfs/query/validator.py @@ -98,10 +98,9 @@ class Filter(): raise errors.ConsistencyError(f'predicate {node.predicate} is not in the schema') # determine domain and range pred = self.schema.predicate(node.predicate) + if not isinstance(pred.range, (bsc.Node, bsc.Literal)): + raise errors.BackendError(f'the range of predicate {pred} is undefined') dom, rng = pred.domain, pred.range - if rng is None: - # FIXME: It is a design error that Predicates can have a None range... - raise errors.BackendError(f'predicate {pred} has no range') if node.reverse: dom, rng = rng, dom # type: ignore [assignment] # variable re-use confuses mypy # return domain and range @@ -133,12 +132,9 @@ class Filter(): raise errors.ConsistencyError(f'ranges {subrng} and {rng} are not related') except TypeError as err: # compared literal vs. node raise errors.ConsistencyError(f'ranges {subrng} and {rng} are not of the same type') from err - # check domain and range - if dom is None or rng is None: - # OneOf guarantees at least one expression, these two cases cannot happen - raise errors.UnreachableError() - # return domain and range - return dom, rng + # OneOf guarantees at least one expression, dom and rng are always bsc.Vertex. + # mypy does not realize this, hence we ignore the warning. + return dom, rng # type: ignore [return-value] ## intermediates -- cgit v1.2.3 From b0ff4ed674ad78bf113c3cc0c2ccd187ccb91048 Mon Sep 17 00:00:00 2001 From: Matthias Baumgartner Date: Thu, 12 Jan 2023 10:26:30 +0100 Subject: number literal adaptions --- bsfs/query/validator.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'bsfs/query') diff --git a/bsfs/query/validator.py b/bsfs/query/validator.py index b04a9bf..75b51ca 100644 --- a/bsfs/query/validator.py +++ b/bsfs/query/validator.py @@ -182,8 +182,7 @@ class Filter(): if not type_ <= dom: raise errors.ConsistencyError(f'expected type {dom}, found {type_}') # node.count is a numerical expression - # FIXME: We have to ensure that ns.xsd.integer is always known in the schema! - self._parse_filter_expression(self.schema.literal(ns.xsd.integer), node.count) + self._parse_filter_expression(self.schema.literal(ns.bsfs.Number), node.count) ## conditions @@ -211,6 +210,9 @@ class Filter(): # type exists in the schema if type_ not in self.schema.literals(): raise errors.ConsistencyError(f'literal {type_} is not in the schema') + # type must be a numerical + if not type_ <= self.schema.literal(ns.bsfs.Number): + raise errors.ConsistencyError(f'expected a number type, found {type_}') # FIXME: Check if node.value corresponds to type_ -- cgit v1.2.3 From 60257ed3c2aa6ea2891f362a691bde9d7ef17831 Mon Sep 17 00:00:00 2001 From: Matthias Baumgartner Date: Fri, 13 Jan 2023 12:22:34 +0100 Subject: schema type comparison across classes --- bsfs/query/validator.py | 31 ++++++++++++------------------- 1 file changed, 12 insertions(+), 19 deletions(-) (limited to 'bsfs/query') diff --git a/bsfs/query/validator.py b/bsfs/query/validator.py index 75b51ca..ecea951 100644 --- a/bsfs/query/validator.py +++ b/bsfs/query/validator.py @@ -113,25 +113,18 @@ class Filter(): for pred in node: # parse child expression subdom, subrng = self._parse_predicate_expression(pred) - try: - # determine overall domain - if dom is None or subdom < dom: # pick most specific domain - dom = subdom - # domains must be related across all child expressions - if not subdom <= dom and not subdom >= dom: - raise errors.ConsistencyError(f'domains {subdom} and {dom} are not related') - except TypeError as err: # compared literal vs. node - raise errors.ConsistencyError(f'domains {subdom} and {dom} are not of the same type') from err - - try: - # determine overall range - if rng is None or subrng > rng: # pick most generic range - rng = subrng - # ranges must be related across all child expressions - if not subrng <= rng and not subrng >= rng: - raise errors.ConsistencyError(f'ranges {subrng} and {rng} are not related') - except TypeError as err: # compared literal vs. node - raise errors.ConsistencyError(f'ranges {subrng} and {rng} are not of the same type') from err + # determine overall domain + if dom is None or subdom < dom: # pick most specific domain + dom = subdom + # domains must be related across all child expressions + if not subdom <= dom and not subdom >= dom: + raise errors.ConsistencyError(f'domains {subdom} and {dom} are not related') + # determine overall range + if rng is None or subrng > rng: # pick most generic range + rng = subrng + # ranges must be related across all child expressions + if not subrng <= rng and not subrng >= rng: + raise errors.ConsistencyError(f'ranges {subrng} and {rng} are not related') # OneOf guarantees at least one expression, dom and rng are always bsc.Vertex. # mypy does not realize this, hence we ignore the warning. return dom, rng # type: ignore [return-value] -- cgit v1.2.3 From 80a97bfa9f22d0d6dd25928fe1754a3a0d1de78a Mon Sep 17 00:00:00 2001 From: Matthias Baumgartner Date: Sun, 15 Jan 2023 21:00:12 +0100 Subject: Distance filter ast node --- bsfs/query/ast/filter_.py | 59 +++++++++++++++++++++++++++++++++++++---------- bsfs/query/validator.py | 16 +++++++++++++ 2 files changed, 63 insertions(+), 12 deletions(-) (limited to 'bsfs/query') diff --git a/bsfs/query/ast/filter_.py b/bsfs/query/ast/filter_.py index b129ded..2f0270c 100644 --- a/bsfs/query/ast/filter_.py +++ b/bsfs/query/ast/filter_.py @@ -252,8 +252,7 @@ class Has(FilterExpression): class _Value(FilterExpression): - """ - """ + """Matches some value.""" # target value. value: typing.Any @@ -277,13 +276,13 @@ class Is(_Value): class Equals(_Value): """Value matches exactly. - NOTE: Value format must correspond to literal type; can be a string, a number, or a Node + NOTE: Value must correspond to literal type. """ class Substring(_Value): """Value matches a substring - NOTE: value format must be a string + NOTE: value must be a string. """ @@ -295,9 +294,49 @@ class EndsWith(_Value): """Value ends with a given string.""" +class Distance(FilterExpression): + """Distance to a reference is (strictly) below a threshold. Assumes a Feature literal.""" + + # FIXME: + # (a) pass a node/predicate as anchor instead of a value. + # Then we don't need to materialize the reference. + # (b) pass a FilterExpression (_Bounded) instead of a threshold. + # Then, we could also query values greater than a threshold. + + # reference value. + reference: typing.Any + + # distance threshold. + threshold: float + + # closed (True) or open (False) bound. + strict: bool + + def __init__( + self, + reference: typing.Any, + threshold: float, + strict: bool = False, + ): + self.reference = reference + self.threshold = float(threshold) + self.strict = bool(strict) + + def __repr__(self) -> str: + return f'{typename(self)}({self.reference}, {self.threshold}, {self.strict})' + + def __hash__(self) -> int: + return hash((super().__hash__(), tuple(self.reference), self.threshold, self.strict)) + + def __eq__(self, other) -> bool: + return super().__eq__(other) \ + and self.reference == other.reference \ + and self.threshold == other.threshold \ + and self.strict == other.strict + + class _Bounded(FilterExpression): - """ - """ + """Value is bounded by a threshold. Assumes a Number literal.""" # bound. threshold: float @@ -327,15 +366,11 @@ class _Bounded(FilterExpression): class LessThan(_Bounded): - """Value is (strictly) smaller than threshold. - NOTE: only on numerical literals - """ + """Value is (strictly) smaller than threshold. Assumes a Number literal.""" class GreaterThan(_Bounded): - """Value is (strictly) larger than threshold - NOTE: only on numerical literals - """ + """Value is (strictly) larger than threshold. Assumes a Number literal.""" class Predicate(PredicateExpression): diff --git a/bsfs/query/validator.py b/bsfs/query/validator.py index ecea951..1b7f688 100644 --- a/bsfs/query/validator.py +++ b/bsfs/query/validator.py @@ -69,6 +69,8 @@ class Filter(): return self._not(type_, node) if isinstance(node, ast.filter.Has): return self._has(type_, node) + if isinstance(node, ast.filter.Distance): + return self._distance(type_, node) if isinstance(node, (ast.filter.Any, ast.filter.All)): return self._branch(type_, node) if isinstance(node, (ast.filter.And, ast.filter.Or)): @@ -177,6 +179,20 @@ class Filter(): # node.count is a numerical expression self._parse_filter_expression(self.schema.literal(ns.bsfs.Number), node.count) + def _distance(self, type_: bsc.Vertex, node: ast.filter.Distance): + # type is a Literal + if not isinstance(type_, bsc.Feature): + raise errors.ConsistencyError(f'expected a Feature, found {type_}') + # type exists in the schema + if type_ not in self.schema.literals(): + raise errors.ConsistencyError(f'literal {type_} is not in the schema') + # reference matches type_ + if len(node.reference) != type_.dimension: + raise errors.ConsistencyError(f'reference has dimension {len(node.reference)}, expected {type_.dimension}') + # FIXME: + #if node.reference.dtype != type_.dtype: + # raise errors.ConsistencyError(f'') + ## conditions -- cgit v1.2.3 From 3504609e1ba1f7f653fa79910474bebd3ec24d8a Mon Sep 17 00:00:00 2001 From: Matthias Baumgartner Date: Mon, 16 Jan 2023 21:41:20 +0100 Subject: various minor fixes --- bsfs/query/validator.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) (limited to 'bsfs/query') diff --git a/bsfs/query/validator.py b/bsfs/query/validator.py index 1b7f688..904ac14 100644 --- a/bsfs/query/validator.py +++ b/bsfs/query/validator.py @@ -189,9 +189,7 @@ class Filter(): # reference matches type_ if len(node.reference) != type_.dimension: raise errors.ConsistencyError(f'reference has dimension {len(node.reference)}, expected {type_.dimension}') - # FIXME: - #if node.reference.dtype != type_.dtype: - # raise errors.ConsistencyError(f'') + # FIXME: test dtype ## conditions -- cgit v1.2.3 From a4789394e40aaa3152ad6009955709a6c7d277c2 Mon Sep 17 00:00:00 2001 From: Matthias Baumgartner Date: Fri, 20 Jan 2023 14:36:11 +0100 Subject: fetch AST --- bsfs/query/ast/__init__.py | 4 +- bsfs/query/ast/fetch.py | 175 +++++++++++++++++++++++++++++++++++++++++++++ bsfs/query/ast/filter_.py | 1 + 3 files changed, 179 insertions(+), 1 deletion(-) create mode 100644 bsfs/query/ast/fetch.py (limited to 'bsfs/query') diff --git a/bsfs/query/ast/__init__.py b/bsfs/query/ast/__init__.py index 704d051..66b097d 100644 --- a/bsfs/query/ast/__init__.py +++ b/bsfs/query/ast/__init__.py @@ -1,6 +1,6 @@ """Query AST components. -The query AST consists of a Filter syntax tree. +The query AST consists of a Filter and a Fetch syntax trees. Classes beginning with an underscore (_) represent internal type hierarchies and should not be used for parsing. Note that the AST structures do not @@ -14,10 +14,12 @@ Author: Matthias Baumgartner, 2022 import typing # inner-module imports +from . import fetch from . import filter_ as filter # pylint: disable=redefined-builtin # exports __all__: typing.Sequence[str] = ( + 'fetch', 'filter', ) diff --git a/bsfs/query/ast/fetch.py b/bsfs/query/ast/fetch.py new file mode 100644 index 0000000..5e603a1 --- /dev/null +++ b/bsfs/query/ast/fetch.py @@ -0,0 +1,175 @@ +""" + +Part of the BlackStar filesystem (bsfs) module. +A copy of the license is provided with the project. +Author: Matthias Baumgartner, 2022 +""" +# imports +from collections import abc +import typing + +# bsfs imports +from bsfs.utils import URI, typename, normalize_args + +# exports +__all__ : typing.Sequence[str] = ( + 'All', + 'Fetch', + 'FetchExpression', + 'Node', + 'This', + 'Value', + ) + + +## code ## + +class FetchExpression(abc.Hashable): + """Generic Fetch expression.""" + + def __repr__(self) -> str: + """Return the expressions's string representation.""" + return f'{typename(self)}()' + + def __hash__(self) -> int: + """Return the expression's integer representation.""" + return hash(type(self)) + + def __eq__(self, other: typing.Any) -> bool: + """Return True if *self* and *other* are equivalent.""" + return isinstance(other, type(self)) + + +class All(FetchExpression): + """Fetch all child expressions.""" + + # child expressions. + expr: typing.Set[FetchExpression] + + def __init__(self, *expr): + # unpack child expressions + unfolded = set(normalize_args(*expr)) + # check child expressions + if len(unfolded) == 0: + raise AttributeError('expected at least one expression, found none') + if not all(isinstance(itm, FetchExpression) for itm in unfolded): + raise TypeError(expr) + # initialize + super().__init__() + # assign members + self.expr = unfolded + + def __iter__(self) -> typing.Iterator[FetchExpression]: + return iter(self.expr) + + def __len__(self) -> int: + return len(self.expr) + + def __repr__(self) -> str: + return f'{typename(self)}({self.expr})' + + def __hash__(self) -> int: + # FIXME: Produces different hashes for different orders of self.expr + return hash((super().__hash__(), tuple(self.expr))) + + def __eq__(self, other: typing.Any) -> bool: + return super().__eq__(other) and self.expr == other.expr + + +class _Branch(FetchExpression): + """Branch along a predicate.""" + + # FIXME: Use a Predicate (like in ast.filter) so that we can also reverse them! + + # predicate to follow. + predicate: URI + + def __init__(self, predicate: URI): + if not isinstance(predicate, URI): + raise TypeError(predicate) + self.predicate = predicate + + def __repr__(self) -> str: + return f'{typename(self)}({self.predicate})' + + def __hash__(self) -> int: + return hash((super().__hash__(), self.predicate)) + + def __eq__(self, other: typing.Any) -> bool: + return super().__eq__(other) and self.predicate == other.predicate + + +class Fetch(_Branch): + """Follow a predicate before evaluating a child epxression.""" + + # child expression. + expr: FetchExpression + + def __init__(self, predicate: URI, expr: FetchExpression): + # check child expressions + if not isinstance(expr, FetchExpression): + raise TypeError(expr) + # initialize + super().__init__(predicate) + # assign members + self.expr = expr + + def __repr__(self) -> str: + return f'{typename(self)}({self.predicate}, {self.expr})' + + def __hash__(self) -> int: + return hash((super().__hash__(), self.expr)) + + def __eq__(self, other: typing.Any) -> bool: + return super().__eq__(other) and self.expr == other.expr + + +class _Named(_Branch): + """Fetch a (named) symbol at a predicate.""" + + # symbol name. + name: str + + def __init__(self, predicate: URI, name: str): + super().__init__(predicate) + self.name = str(name) + + def __repr__(self) -> str: + return f'{typename(self)}({self.predicate}, {self.name})' + + def __hash__(self) -> int: + return hash((super().__hash__(), self.name)) + + def __eq__(self, other: typing.Any) -> bool: + return super().__eq__(other) and self.name == other.name + + +class Node(_Named): # pylint: disable=too-few-public-methods + """Fetch a Node at a predicate.""" + # FIXME: Is this actually needed? + + +class Value(_Named): # pylint: disable=too-few-public-methods + """Fetch a Literal at a predicate.""" + + +class This(FetchExpression): + """Fetch the current Node.""" + + # symbol name. + name: str + + def __init__(self, name: str): + super().__init__() + self.name = str(name) + + def __repr__(self) -> str: + return f'{typename(self)}({self.name})' + + def __hash__(self) -> int: + return hash((super().__hash__(), self.name)) + + def __eq__(self, other: typing.Any) -> bool: + return super().__eq__(other) and self.name == other.name + +## EOF ## diff --git a/bsfs/query/ast/filter_.py b/bsfs/query/ast/filter_.py index 2f0270c..81b0de2 100644 --- a/bsfs/query/ast/filter_.py +++ b/bsfs/query/ast/filter_.py @@ -153,6 +153,7 @@ class _Agg(FilterExpression, abc.Collection): # check type if not all(isinstance(e, FilterExpression) for e in unfolded): raise TypeError(expr) + # FIXME: Require at least one child expression? # assign member self.expr = unfolded -- cgit v1.2.3 From e2f08efc0d8a3c875994bdb69623c30cce5079d9 Mon Sep 17 00:00:00 2001 From: Matthias Baumgartner Date: Fri, 20 Jan 2023 18:01:17 +0100 Subject: fetch AST validation --- bsfs/query/validator.py | 123 +++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 122 insertions(+), 1 deletion(-) (limited to 'bsfs/query') diff --git a/bsfs/query/validator.py b/bsfs/query/validator.py index 904ac14..9fbff12 100644 --- a/bsfs/query/validator.py +++ b/bsfs/query/validator.py @@ -49,7 +49,7 @@ class Filter(): """ # root_type must be a schema.Node if not isinstance(root_type, bsc.Node): - raise TypeError(f'Expected a node, found {typename(root_type)}') + raise TypeError(f'expected a node, found {typename(root_type)}') # root_type must exist in the schema if root_type not in self.schema.nodes(): raise errors.ConsistencyError(f'{root_type} is not defined in the schema') @@ -223,4 +223,125 @@ class Filter(): # FIXME: Check if node.value corresponds to type_ +class Fetch(): + """Validate a `bsfs.query.ast.fetch` query's structure and schema compliance. + + * Value can only be applied on literals + * Node can only be applied on nodes + * Names must be non-empty + * Branching nodes' predicates must match the type + * Symbols must be in the schema + * Predicates must follow the schema + + """ + + # schema to validate against. + schema: bsc.Schema + + def __init__(self, schema: bsc.Schema): + self.schema = schema + + def __call__(self, root_type: bsc.Node, query: ast.fetch.FetchExpression): + """Validate a fetch *query*, assuming the subject having *root_type*. + + Raises a `bsfs.utils.errors.ConsistencyError` if the query violates the schema. + Raises a `bsfs.utils.errors.BackendError` if the query structure is invalid. + + """ + # root_type must be a schema.Node + if not isinstance(root_type, bsc.Node): + raise TypeError(f'expected a node, found {typename(root_type)}') + # root_type must exist in the schema + if root_type not in self.schema.nodes(): + raise errors.ConsistencyError(f'{root_type} is not defined in the schema') + # query must be a FetchExpression + if not isinstance(query, ast.fetch.FetchExpression): + raise TypeError(f'expected a fetch expression, found {typename(query)}') + # check root expression + self._parse_fetch_expression(root_type, query) + # all tests passed + return True + + def _parse_fetch_expression(self, type_: bsc.Vertex, node: ast.fetch.FetchExpression): + """Route *node* to the handler of the respective FetchExpression subclass.""" + if isinstance(node, (ast.fetch.Fetch, ast.fetch.Value, ast.fetch.Node)): + # NOTE: don't return so that checks below are executed + self._branch(type_, node) + if isinstance(node, (ast.fetch.Value, ast.fetch.Node)): + # NOTE: don't return so that checks below are executed + self._named(type_, node) + if isinstance(node, ast.fetch.All): + return self._all(type_, node) + if isinstance(node, ast.fetch.Fetch): + return self._fetch(type_, node) + if isinstance(node, ast.fetch.Value): + return self._value(type_, node) + if isinstance(node, ast.fetch.Node): + return self._node(type_, node) + if isinstance(node, ast.fetch.This): + return self._this(type_, node) + # invalid node + raise errors.BackendError(f'expected fetch expression, found {node}') + + def _all(self, type_: bsc.Vertex, node: ast.fetch.All): + # check child expressions + for expr in node: + self._parse_fetch_expression(type_, expr) + + def _branch(self, type_: bsc.Vertex, node: ast.fetch._Branch): + # type is a node + if not isinstance(type_, bsc.Node): + raise errors.ConsistencyError(f'expected a Node, found {type_}') + # node exists in the schema + if type_ not in self.schema.nodes(): + raise errors.ConsistencyError(f'node {type_} is not in the schema') + # predicate exists in the schema + if not self.schema.has_predicate(node.predicate): + raise errors.ConsistencyError(f'predicate {node.predicate} is not in the schema') + pred = self.schema.predicate(node.predicate) + # type_ must be a subclass of domain + if not type_ <= pred.domain: + raise errors.ConsistencyError( + f'expected type {pred.domain} or subtype thereof, found {type_}') + + def _fetch(self, type_: bsc.Vertex, node: ast.fetch.Fetch): # pylint: disable=unused-argument # type_ was considered in _branch + # range must be a node + rng = self.schema.predicate(node.predicate).range + if not isinstance(rng, bsc.Node): + raise errors.ConsistencyError( + f'expected the predicate\'s range to be a Node, found {rng}') + # child expression must be valid + self._parse_fetch_expression(rng, node.expr) + + def _named(self, type_: bsc.Vertex, node: ast.fetch._Named): # pylint: disable=unused-argument # type_ was considered in _branch + # name must be set + if node.name.strip() == '': + raise errors.BackendError('node name cannot be empty') + # FIXME: check for double name use? + + def _node(self, type_: bsc.Vertex, node: ast.fetch.Node): # pylint: disable=unused-argument # type_ was considered in _branch + # range must be a node + rng = self.schema.predicate(node.predicate).range + if not isinstance(rng, bsc.Node): + raise errors.ConsistencyError( + f'expected the predicate\'s range to be a Node, found {rng}') + + def _value(self, type_: bsc.Vertex, node: ast.fetch.Value): # pylint: disable=unused-argument # type_ was considered in _branch + # range must be a literal + rng = self.schema.predicate(node.predicate).range + if not isinstance(rng, bsc.Literal): + raise errors.ConsistencyError( + f'expected the predicate\'s range to be a Literal, found {rng}') + + def _this(self, type_: bsc.Vertex, node: ast.fetch.This): + # type is a node + if not isinstance(type_, bsc.Node): + raise errors.ConsistencyError(f'expected a Node, found {type_}') + # node exists in the schema + if type_ not in self.schema.nodes(): + raise errors.ConsistencyError(f'node {type_} is not in the schema') + # name must be set + if node.name.strip() == '': + raise errors.BackendError('node name cannot be empty') + ## EOF ## -- cgit v1.2.3 From 7e0987bcda136a17baea45b8eb22eb5ea668abc0 Mon Sep 17 00:00:00 2001 From: Matthias Baumgartner Date: Mon, 30 Jan 2023 14:35:32 +0100 Subject: filter ast comparison --- bsfs/query/matcher.py | 366 ++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 366 insertions(+) create mode 100644 bsfs/query/matcher.py (limited to 'bsfs/query') diff --git a/bsfs/query/matcher.py b/bsfs/query/matcher.py new file mode 100644 index 0000000..a910756 --- /dev/null +++ b/bsfs/query/matcher.py @@ -0,0 +1,366 @@ +""" + +Part of the BlackStar filesystem (bsfs) module. +A copy of the license is provided with the project. +Author: Matthias Baumgartner, 2022 +""" +# imports +from collections import defaultdict +from itertools import product +from time import time +import random +import threading +import typing + +# external imports +from hopcroftkarp import HopcroftKarp + +# bsfs imports +from bsfs.utils import errors, typename + +# inner-module imports +from . import ast + +# exports +__all__ : typing.Sequence[str] = ( + 'Filter', + ) + + +## code ## + +class Any(ast.filter.FilterExpression, ast.filter.PredicateExpression): + """Match any ast class. + + Note that Any instances are unique, i.e. they do not compare, and + can hence be repeated in a set: + >>> Any() == Any() + False + >>> len({Any(), Any(), Any(), Any()}) + 4 + + """ + + # unique instance id + _uid: typing.Tuple[int, int, float, float] + + def __init__(self): + self._uid = ( + id(self), + id(threading.current_thread()), + time(), + random.random(), + ) + + def __eq__(self, other: typing.Any): + return super().__eq__(other) and self._uid == other._uid + + def __hash__(self): + return hash((super().__hash__(), self._uid)) + + +class Rest(ast.filter.FilterExpression, ast.filter.PredicateExpression): + """Match the leftovers in a set of items to be compared. + + Rest can be used in junction with aggregating expressions such as ast.filter.And, + ast.filter.Or, ast.filter.OneOf. It controls childs expressions that were not yet + consumed by other matching rules. Rest may match to only a specific expression. + The expresssion defaults to Any(). + + For example, the following to ast structures would match since Rest + allows an arbitrary repetition of ast.filter.Equals statements. + + >>> And(Equals('hello'), Equals('world'), Equals('foobar')) + >>> And(Equals('world'), Rest(Partial(Equals))) + + """ + + # child expression for the Rest. + expr: typing.Union[ast.filter.FilterExpression, ast.filter.PredicateExpression] + + def __init__( + self, + expr: typing.Optional[typing.Union[ast.filter.FilterExpression, ast.filter.PredicateExpression]] = None, + ): + if expr is None: + expr = Any() + self.expr = expr + + def __repr__(self) -> str: + return f'{typename(self)}({self.expr})' + + def __hash__(self) -> int: + return hash((super().__hash__(), self.expr)) + + def __eq__(self, other: typing.Any) -> bool: + return super().__eq__(other) and self.expr == other.expr + + +class Partial(ast.filter.FilterExpression, ast.filter.PredicateExpression): + """Match a partially defined ast expression. + + Literal values might be irrelevant or unknown when comparing two ast + structures. Partial allows to constrain the matcher to a certain + ast class, while leaving some of its members unspecified. + + Pass the class (not instance) and its members as keyword arguments + to Partial. Note that the arguments are not validated. + + For example, the following instance matches any ast.filter.Equals, + irrespective of its value: + + >>> Partial(ast.filter.Equals) + + Likewise, the following instance matches any ast.filter.LessThan + that has a strict bounds, but makes no claim about the threshold: + + >>> Partial(ast.filter.LessThan, strict=False) + + """ + + # target node type. + node: typing.Type + + # node construction args. + kwargs: typing.Dict[str, typing.Any] + + def __init__( + self, + node: typing.Type, + **kwargs, + ): + self.node = node + self.kwargs = kwargs + + def __repr__(self) -> str: + return f'{typename(self)}({self.node.__name__}, {self.kwargs})' + + def __hash__(self) -> int: + kwargs = tuple((key, self.kwargs[key]) for key in sorted(self.kwargs)) + return hash((super().__hash__(), self.node, kwargs)) + + def __eq__(self, other: typing.Any) -> bool: + return super().__eq__(other) \ + and self.node == other.node \ + and self.kwargs == other.kwargs + + def match( + self, + name: str, + value: typing.Any, + ) -> bool: + """Return True if *name* is unspecified or matches *value*.""" + return name not in self.kwargs or self.kwargs[name] == value + + +T_ITEM_TYPE = typing.TypeVar('T_ITEM_TYPE') # pylint: disable=invalid-name + +def _set_matcher( + query: typing.Collection[T_ITEM_TYPE], + reference: typing.Collection[T_ITEM_TYPE], + cmp: typing.Callable[[T_ITEM_TYPE, T_ITEM_TYPE], bool], + ) -> bool: + """Compare two sets of child expressions. + + This check has a best-case complexity of O(|N|**2) and worst-case + complexity of O(|N|**3), with N the number of child expressions. + """ + # get reference items + r_items = list(reference) + # deal with Rest + r_rest = {itm for itm in r_items if isinstance(itm, Rest)} + if len(r_rest) > 1: + raise errors.BackendError(f'there must be at most one Rest instance per set, found {len(r_rest)}') + if len(r_rest) == 1: + # replace Rest by filling the reference up with rest's expression + # NOTE: convert r_items to list so that items can be repeated + expr = next(iter(r_rest)).expr # type: ignore [attr-defined] + r_items = [itm for itm in r_items if not isinstance(itm, Rest)] + r_items += [expr for _ in range(len(query) - len(r_items))] # type: ignore [misc] + # sanity check: cannot match if the item sizes differ: + # either a reference item is unmatched (len(r_items) > len(query)) + # or a query item is unmatched (len(r_items) < len(query)) + if len(query) != len(r_items): + return False + + # To have a positive match between the query and the reference, + # each query expr has to match any reference expr. + # However, each reference expr can only be "consumed" once even + # if it matches multiple query exprs (e.g., the Any expression matches + # every query expr). + # This is a bipartide matching problem (Hall's marriage problem) + # and the Hopcroft-Karp-Karzanov algorithm finds a maximum + # matching. While there might be multiple maximum matchings, + # we only need to know whether (at least) one complete matching + # exists. The hopcroftkarp module provides this functionality. + # The HKK algorithm has worst-case complexity of O(|N|**2 * sqrt(|N|)) + # and we also need to compare expressions pairwise, hence O(|N|**2). + num_items = len(r_items) + graph = defaultdict(set) + # build the bipartide graph as {lhs: {rhs}, ...} + # lhs and rhs must be disjoint identifiers. + for (ridx, ref), (nidx, node) in product(enumerate(r_items), enumerate(query)): + # add edges for equal expressions + if cmp(node, ref): + graph[ridx].add(num_items + nidx) + + # maximum_matching returns the matches for all nodes in the graph + # ({ref_itm: node_itm}), hence a complete matching's size is + # the number of reference's child expressions. + return len(HopcroftKarp(graph).maximum_matching(keys_only=True)) == num_items + + +class Filter(): + """Compare a bsfs.query.ast.filter` query's structure to a reference ast. + + The reference ast may include `Rest`, `Partial`, or `Any` to account for irrelevant + or unknown ast pieces. + + This is only a structural comparison, not a semantic one. For example, the + two following queries are semantically identical, but structurally different, + and would therefore not match: + + >>> ast.filter.OneOf(ast.filter.Predicate(ns.bse.filename)) + >>> ast.filter.Predicate(ns.bse.filename) + + """ + + def __call__(self, query: ast.filter.FilterExpression, reference: ast.filter.FilterExpression) -> bool: + """Compare a *query* to a *reference* ast structure. + Return True if both are structurally equivalent. + """ + if not isinstance(query, ast.filter.FilterExpression): + raise errors.BackendError(f'expected filter expression, found {query}') + if not isinstance(reference, ast.filter.FilterExpression): + raise errors.BackendError(f'expected filter expression, found {reference}') + return self._parse_filter_expression(query, reference) + + def _parse_filter_expression( + self, + node: ast.filter.FilterExpression, + reference: ast.filter.FilterExpression, + ) -> bool: + """Route *node* to the handler of the respective FilterExpression subclass.""" + # generic checks: reference type must be Any or match node type + if isinstance(reference, Any): + return True + # node-specific checks + if isinstance(node, ast.filter.Not): + return self._not(node, reference) + if isinstance(node, ast.filter.Has): + return self._has(node, reference) + if isinstance(node, ast.filter.Distance): + return self._distance(node, reference) + if isinstance(node, (ast.filter.Any, ast.filter.All)): + return self._branch(node, reference) + if isinstance(node, (ast.filter.And, ast.filter.Or)): + return self._agg(node, reference) + if isinstance(node, (ast.filter.Is, ast.filter.Equals, ast.filter.Substring, + ast.filter.StartsWith, ast.filter.EndsWith)): + return self._value(node, reference) + if isinstance(node, (ast.filter.LessThan, ast.filter.GreaterThan)): + return self._bounded(node, reference) + # invalid node + raise errors.BackendError(f'expected filter expression, found {node}') + + def _parse_predicate_expression( + self, + node: ast.filter.PredicateExpression, + reference: ast.filter.PredicateExpression, + ) -> bool: + """Route *node* to the handler of the respective PredicateExpression subclass.""" + if isinstance(reference, Any): + return True + if isinstance(node, ast.filter.Predicate): + return self._predicate(node, reference) + if isinstance(node, ast.filter.OneOf): + return self._one_of(node, reference) + # invalid node + raise errors.BackendError(f'expected predicate expression, found {node}') + + def _one_of(self, node: ast.filter.OneOf, reference: ast.filter.PredicateExpression) -> bool: + if not isinstance(reference, type(node)): + return False + return _set_matcher(node, reference, self._parse_predicate_expression) + + def _predicate(self, node: ast.filter.Predicate, reference: ast.filter.PredicateExpression) -> bool: + if not isinstance(reference, (Partial, type(node))): + return False + # partial check + if isinstance(reference, Partial): + if not isinstance(node, reference.node): + return False + return reference.match('predicate', node.predicate) \ + and reference.match('reverse', node.reverse) + # full check + return node.predicate == reference.predicate \ + and node.reverse == reference.reverse + + def _branch(self, + node: typing.Union[ast.filter.Any, ast.filter.All], + reference: ast.filter.FilterExpression, + ) -> bool: + if not isinstance(reference, type(node)): + return False + if not self._parse_predicate_expression(node.predicate, reference.predicate): # type: ignore [attr-defined] + return False + if not self._parse_filter_expression(node.expr, reference.expr): # type: ignore [attr-defined] + return False + return True + + def _agg(self, node: typing.Union[ast.filter.And, ast.filter.Or], reference: ast.filter.FilterExpression) -> bool: + if not isinstance(reference, type(node)): + return False + return _set_matcher(node, reference, self._parse_filter_expression) # type: ignore [arg-type] + + def _not(self, node: ast.filter.Not, reference: ast.filter.FilterExpression) -> bool: + if not isinstance(reference, type(node)): + return False + return self._parse_filter_expression(node.expr, reference.expr) + + def _has(self, node: ast.filter.Has, reference: ast.filter.FilterExpression) -> bool: + if not isinstance(reference, type(node)): + return False + return self._parse_predicate_expression(node.predicate, reference.predicate) \ + and self._parse_filter_expression(node.count, reference.count) + + def _distance(self, node: ast.filter.Distance, reference: ast.filter.FilterExpression) -> bool: + if not isinstance(reference, (Partial, type(node))): + return False + # partial check + if isinstance(reference, Partial): + if not isinstance(node, reference.node): + return False + return reference.match('reference', node.reference) \ + and reference.match('threshold', node.threshold) \ + and reference.match('strict', node.strict) + # full check + return node.reference == reference.reference \ + and node.threshold == reference.threshold \ + and node.strict == reference.strict + + def _value(self, node: ast.filter._Value, reference: ast.filter.FilterExpression) -> bool: + if not isinstance(reference, (Partial, type(node))): + return False + # partial check + if isinstance(reference, Partial): + if not isinstance(node, reference.node): + return False + return reference.match('value', node.value) + # full ckeck + return node.value == reference.value + + def _bounded(self, node: ast.filter._Bounded, reference: ast.filter.FilterExpression) -> bool: + if not isinstance(reference, (Partial, type(node))): + return False + # partial check + if isinstance(reference, Partial): + if not isinstance(node, reference.node): + return False + return reference.match('threshold', node.threshold) \ + and reference.match('strict', node.strict) + # full check + return node.threshold == reference.threshold \ + and node.strict == reference.strict + +## EOF ## -- cgit v1.2.3 From c8fdaaa676afbdcf33344d72bd92b3ccb981cbf8 Mon Sep 17 00:00:00 2001 From: Matthias Baumgartner Date: Wed, 8 Feb 2023 19:54:24 +0100 Subject: ast fixes --- bsfs/query/ast/fetch.py | 3 +-- bsfs/query/ast/filter_.py | 4 ++-- 2 files changed, 3 insertions(+), 4 deletions(-) (limited to 'bsfs/query') diff --git a/bsfs/query/ast/fetch.py b/bsfs/query/ast/fetch.py index 5e603a1..d653a8a 100644 --- a/bsfs/query/ast/fetch.py +++ b/bsfs/query/ast/fetch.py @@ -69,8 +69,7 @@ class All(FetchExpression): return f'{typename(self)}({self.expr})' def __hash__(self) -> int: - # FIXME: Produces different hashes for different orders of self.expr - return hash((super().__hash__(), tuple(self.expr))) + return hash((super().__hash__(), tuple(sorted(self.expr, key=repr)))) def __eq__(self, other: typing.Any) -> bool: return super().__eq__(other) and self.expr == other.expr diff --git a/bsfs/query/ast/filter_.py b/bsfs/query/ast/filter_.py index 81b0de2..798d37f 100644 --- a/bsfs/query/ast/filter_.py +++ b/bsfs/query/ast/filter_.py @@ -173,7 +173,7 @@ class _Agg(FilterExpression, abc.Collection): return f'{typename(self)}({self.expr})' def __hash__(self) -> int: - return hash((super().__hash__(), tuple(self.expr))) # FIXME: Unique hash of different orders over self.expr + return hash((super().__hash__(), tuple(sorted(self.expr, key=repr)))) def __eq__(self, other) -> bool: return super().__eq__(other) and self.expr == other.expr @@ -450,7 +450,7 @@ class OneOf(PredicateExpression, abc.Collection): return f'{typename(self)}({self.expr})' def __hash__(self) -> int: - return hash((super().__hash__(), tuple(self.expr))) # FIXME: Unique hash of different orders over self.expr + return hash((super().__hash__(), tuple(sorted(self.expr, key=repr)))) def __eq__(self, other) -> bool: return super().__eq__(other) and self.expr == other.expr -- cgit v1.2.3 From 64f3ac76a2f8d6b51380c06233accfcc19dca228 Mon Sep 17 00:00:00 2001 From: Matthias Baumgartner Date: Wed, 8 Feb 2023 20:47:18 +0100 Subject: filter query convenience functions --- bsfs/query/ast/filter_.py | 58 +++++++++++++++++++++++++++++++++++++++++++---- 1 file changed, 53 insertions(+), 5 deletions(-) (limited to 'bsfs/query') diff --git a/bsfs/query/ast/filter_.py b/bsfs/query/ast/filter_.py index 798d37f..44490fc 100644 --- a/bsfs/query/ast/filter_.py +++ b/bsfs/query/ast/filter_.py @@ -31,10 +31,7 @@ from collections import abc import typing # bsfs imports -from bsfs.utils import URI, typename, normalize_args - -# inner-module imports -#from . import utils +from bsfs.utils import URI, errors, typename, normalize_args # exports __all__ : typing.Sequence[str] = ( @@ -460,10 +457,61 @@ class OneOf(PredicateExpression, abc.Collection): def IsIn(*values): # pylint: disable=invalid-name # explicitly mimics an expression """Match any of the given URIs.""" - return Or(Is(value) for value in normalize_args(*values)) + args = normalize_args(*values) + if len(args) == 0: + raise AttributeError('expected at least one value, found none') + if len(args) == 1: + return Is(args[0]) + return Or(Is(value) for value in args) def IsNotIn(*values): # pylint: disable=invalid-name # explicitly mimics an expression """Match none of the given URIs.""" return Not(IsIn(*values)) + +def Between( + lo: float = float('-inf'), + hi: float = float('inf'), + lo_strict: bool = True, + hi_strict: bool = True, + ): + """Match numerical values between *lo* and *hi*. Include bounds if strict is False.""" + if abs(lo) == hi == float('inf'): + raise ValueError('range cannot be INF on both sides') + if lo > hi: + raise ValueError(f'lower bound ({lo}) cannot be less than upper bound ({hi})') + if lo == hi and not lo_strict and not hi_strict: + return Equals(lo) + if lo == hi: # either bound is strict + raise ValueError(f'bounds cannot be equal when either is strict') + if lo != float('-inf') and hi != float('inf'): + return And(GreaterThan(lo, lo_strict), LessThan(hi, hi_strict)) + if lo != float('-inf'): + return GreaterThan(lo, lo_strict) + # hi != float('inf'): + return LessThan(hi, hi_strict) + + +def Includes(*values, approx: bool = False): + """Match any of the given *values*. Uses `Substring` if *approx* is set.""" + args = normalize_args(*values) + cls = Substring if approx else Equals + if len(args) == 0: + raise AttributeError('expected at least one value, found none') + if len(args) == 1: + return cls(args[0]) + return Or(cls(v) for v in args) + + +def Excludes(*values, approx: bool = False): + """Match none of the given *values*. Uses `Substring` if *approx* is set.""" + args = normalize_args(*values) + cls = Substring if approx else Equals + if len(args) == 0: + raise AttributeError('expected at least one value, found none') + if len(args) == 1: + return Not(cls(args[0])) + return Not(Or(cls(v) for v in args)) + + ## EOF ## -- cgit v1.2.3 From f31a0d005785d474a37ec769c1f7f5e27aa08a57 Mon Sep 17 00:00:00 2001 From: Matthias Baumgartner Date: Wed, 8 Feb 2023 21:08:24 +0100 Subject: minor comments --- bsfs/query/ast/filter_.py | 17 +++++++++-------- bsfs/query/validator.py | 1 + 2 files changed, 10 insertions(+), 8 deletions(-) (limited to 'bsfs/query') diff --git a/bsfs/query/ast/filter_.py b/bsfs/query/ast/filter_.py index 44490fc..b29d89e 100644 --- a/bsfs/query/ast/filter_.py +++ b/bsfs/query/ast/filter_.py @@ -31,7 +31,7 @@ from collections import abc import typing # bsfs imports -from bsfs.utils import URI, errors, typename, normalize_args +from bsfs.utils import URI, typename, normalize_args # exports __all__ : typing.Sequence[str] = ( @@ -454,8 +454,9 @@ class OneOf(PredicateExpression, abc.Collection): # Helpers +# invalid-name is disabled since they explicitly mimic an expression -def IsIn(*values): # pylint: disable=invalid-name # explicitly mimics an expression +def IsIn(*values) -> FilterExpression: # pylint: disable=invalid-name """Match any of the given URIs.""" args = normalize_args(*values) if len(args) == 0: @@ -464,17 +465,17 @@ def IsIn(*values): # pylint: disable=invalid-name # explicitly mimics an express return Is(args[0]) return Or(Is(value) for value in args) -def IsNotIn(*values): # pylint: disable=invalid-name # explicitly mimics an expression +def IsNotIn(*values) -> FilterExpression: # pylint: disable=invalid-name """Match none of the given URIs.""" return Not(IsIn(*values)) -def Between( +def Between( # pylint: disable=invalid-name lo: float = float('-inf'), hi: float = float('inf'), lo_strict: bool = True, hi_strict: bool = True, - ): + ) -> FilterExpression : """Match numerical values between *lo* and *hi*. Include bounds if strict is False.""" if abs(lo) == hi == float('inf'): raise ValueError('range cannot be INF on both sides') @@ -483,7 +484,7 @@ def Between( if lo == hi and not lo_strict and not hi_strict: return Equals(lo) if lo == hi: # either bound is strict - raise ValueError(f'bounds cannot be equal when either is strict') + raise ValueError('bounds cannot be equal when either is strict') if lo != float('-inf') and hi != float('inf'): return And(GreaterThan(lo, lo_strict), LessThan(hi, hi_strict)) if lo != float('-inf'): @@ -492,7 +493,7 @@ def Between( return LessThan(hi, hi_strict) -def Includes(*values, approx: bool = False): +def Includes(*values, approx: bool = False) -> FilterExpression: # pylint: disable=invalid-name """Match any of the given *values*. Uses `Substring` if *approx* is set.""" args = normalize_args(*values) cls = Substring if approx else Equals @@ -503,7 +504,7 @@ def Includes(*values, approx: bool = False): return Or(cls(v) for v in args) -def Excludes(*values, approx: bool = False): +def Excludes(*values, approx: bool = False) -> FilterExpression: # pylint: disable=invalid-name """Match none of the given *values*. Uses `Substring` if *approx* is set.""" args = normalize_args(*values) cls = Substring if approx else Equals diff --git a/bsfs/query/validator.py b/bsfs/query/validator.py index 9fbff12..f0aa795 100644 --- a/bsfs/query/validator.py +++ b/bsfs/query/validator.py @@ -20,6 +20,7 @@ __all__ : typing.Sequence[str] = ( 'Filter', ) +# FIXME: Split into a submodule and the two classes into their own respective files. ## code ## -- cgit v1.2.3 From 2e07f33314c238e42bfadc5f39805f93ffbc622e Mon Sep 17 00:00:00 2001 From: Matthias Baumgartner Date: Thu, 2 Mar 2023 15:10:05 +0100 Subject: removed author and license notices from individual files --- bsfs/query/__init__.py | 5 ----- bsfs/query/ast/__init__.py | 3 --- bsfs/query/ast/fetch.py | 5 ----- bsfs/query/ast/filter_.py | 3 --- bsfs/query/matcher.py | 5 ----- bsfs/query/validator.py | 5 ----- 6 files changed, 26 deletions(-) (limited to 'bsfs/query') diff --git a/bsfs/query/__init__.py b/bsfs/query/__init__.py index 21c7389..58ff03a 100644 --- a/bsfs/query/__init__.py +++ b/bsfs/query/__init__.py @@ -1,9 +1,4 @@ -""" -Part of the BlackStar filesystem (bsfs) module. -A copy of the license is provided with the project. -Author: Matthias Baumgartner, 2022 -""" # imports import typing diff --git a/bsfs/query/ast/__init__.py b/bsfs/query/ast/__init__.py index 66b097d..bceaac0 100644 --- a/bsfs/query/ast/__init__.py +++ b/bsfs/query/ast/__init__.py @@ -6,9 +6,6 @@ Classes beginning with an underscore (_) represent internal type hierarchies and should not be used for parsing. Note that the AST structures do not (and cannot) check semantic validity or consistency with a given schema. -Part of the BlackStar filesystem (bsfs) module. -A copy of the license is provided with the project. -Author: Matthias Baumgartner, 2022 """ # imports import typing diff --git a/bsfs/query/ast/fetch.py b/bsfs/query/ast/fetch.py index d653a8a..66d94e1 100644 --- a/bsfs/query/ast/fetch.py +++ b/bsfs/query/ast/fetch.py @@ -1,9 +1,4 @@ -""" -Part of the BlackStar filesystem (bsfs) module. -A copy of the license is provided with the project. -Author: Matthias Baumgartner, 2022 -""" # imports from collections import abc import typing diff --git a/bsfs/query/ast/filter_.py b/bsfs/query/ast/filter_.py index b29d89e..56c982e 100644 --- a/bsfs/query/ast/filter_.py +++ b/bsfs/query/ast/filter_.py @@ -22,9 +22,6 @@ This AST has multiple issues that are not verified upon its creation: * Conditions exclude each other * The predicate along the branch have incompatible domains and ranges. -Part of the BlackStar filesystem (bsfs) module. -A copy of the license is provided with the project. -Author: Matthias Baumgartner, 2022 """ # imports from collections import abc diff --git a/bsfs/query/matcher.py b/bsfs/query/matcher.py index a910756..5f3b07e 100644 --- a/bsfs/query/matcher.py +++ b/bsfs/query/matcher.py @@ -1,9 +1,4 @@ -""" -Part of the BlackStar filesystem (bsfs) module. -A copy of the license is provided with the project. -Author: Matthias Baumgartner, 2022 -""" # imports from collections import defaultdict from itertools import product diff --git a/bsfs/query/validator.py b/bsfs/query/validator.py index f0aa795..6e3afa1 100644 --- a/bsfs/query/validator.py +++ b/bsfs/query/validator.py @@ -1,9 +1,4 @@ -""" -Part of the BlackStar filesystem (bsfs) module. -A copy of the license is provided with the project. -Author: Matthias Baumgartner, 2022 -""" # imports import typing -- cgit v1.2.3 From 6b9379d75198082054c35e44bc2cd880353a7485 Mon Sep 17 00:00:00 2001 From: Matthias Baumgartner Date: Thu, 2 Mar 2023 16:40:43 +0100 Subject: hardening --- bsfs/query/validator.py | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) (limited to 'bsfs/query') diff --git a/bsfs/query/validator.py b/bsfs/query/validator.py index 6e3afa1..b259ea0 100644 --- a/bsfs/query/validator.py +++ b/bsfs/query/validator.py @@ -37,6 +37,10 @@ class Filter(): self.schema = schema def __call__(self, root_type: bsc.Node, query: ast.filter.FilterExpression): + """Alias for `Filter.validate`.""" + return self.validate(root_type, query) + + def validate(self, root_type: bsc.Node, query: ast.filter.FilterExpression): """Validate a filter *query*, assuming the subject having *root_type*. Raises a `bsfs.utils.errors.ConsistencyError` if the query violates the schema. @@ -237,7 +241,11 @@ class Fetch(): def __init__(self, schema: bsc.Schema): self.schema = schema - def __call__(self, root_type: bsc.Node, query: ast.fetch.FetchExpression): + def __call__(self, root_type: bsc.Node, query: ast.filter.FilterExpression): + """Alias for `Fetch.validate`.""" + return self.validate(root_type, query) + + def validate(self, root_type: bsc.Node, query: ast.fetch.FetchExpression): """Validate a fetch *query*, assuming the subject having *root_type*. Raises a `bsfs.utils.errors.ConsistencyError` if the query violates the schema. -- cgit v1.2.3 From 2c6c23f85e7f2123c508f9ff8a4aa776948bb589 Mon Sep 17 00:00:00 2001 From: Matthias Baumgartner Date: Thu, 2 Mar 2023 16:46:11 +0100 Subject: minor style fixes --- bsfs/query/validator.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'bsfs/query') diff --git a/bsfs/query/validator.py b/bsfs/query/validator.py index b259ea0..1ce44e9 100644 --- a/bsfs/query/validator.py +++ b/bsfs/query/validator.py @@ -36,11 +36,11 @@ class Filter(): def __init__(self, schema: bsc.Schema): self.schema = schema - def __call__(self, root_type: bsc.Node, query: ast.filter.FilterExpression): + def __call__(self, root_type: bsc.Node, query: ast.filter.FilterExpression) -> bool: """Alias for `Filter.validate`.""" return self.validate(root_type, query) - def validate(self, root_type: bsc.Node, query: ast.filter.FilterExpression): + def validate(self, root_type: bsc.Node, query: ast.filter.FilterExpression) -> bool: """Validate a filter *query*, assuming the subject having *root_type*. Raises a `bsfs.utils.errors.ConsistencyError` if the query violates the schema. @@ -241,11 +241,11 @@ class Fetch(): def __init__(self, schema: bsc.Schema): self.schema = schema - def __call__(self, root_type: bsc.Node, query: ast.filter.FilterExpression): + def __call__(self, root_type: bsc.Node, query: ast.fetch.FetchExpression) -> bool: """Alias for `Fetch.validate`.""" return self.validate(root_type, query) - def validate(self, root_type: bsc.Node, query: ast.fetch.FetchExpression): + def validate(self, root_type: bsc.Node, query: ast.fetch.FetchExpression) -> bool: """Validate a fetch *query*, assuming the subject having *root_type*. Raises a `bsfs.utils.errors.ConsistencyError` if the query violates the schema. -- cgit v1.2.3 From 4fead04055be4967d9ea3b24ff61fe37a93108dd Mon Sep 17 00:00:00 2001 From: Matthias Baumgartner Date: Sat, 4 Mar 2023 13:31:11 +0100 Subject: namespace refactoring and cleanup --- bsfs/query/ast/filter_.py | 3 ++- bsfs/query/matcher.py | 4 ++-- bsfs/query/validator.py | 4 ++-- 3 files changed, 6 insertions(+), 5 deletions(-) (limited to 'bsfs/query') diff --git a/bsfs/query/ast/filter_.py b/bsfs/query/ast/filter_.py index 56c982e..610fdb4 100644 --- a/bsfs/query/ast/filter_.py +++ b/bsfs/query/ast/filter_.py @@ -10,7 +10,8 @@ For example, consider the following AST: >>> Any(ns.bse.collection, ... And( ... Equals('hello'), -... Any(ns.bsm.guid, Any(ns.bsm.guid, Equals('hello'))), +... Is('hello world'), +... Any(ns.bse.tag, Equals('world')), ... Any(ns.bst.label, Equals('world')), ... All(ns.bst.label, Not(Equals('world'))), ... ) diff --git a/bsfs/query/matcher.py b/bsfs/query/matcher.py index 5f3b07e..17c9c8e 100644 --- a/bsfs/query/matcher.py +++ b/bsfs/query/matcher.py @@ -215,8 +215,8 @@ class Filter(): two following queries are semantically identical, but structurally different, and would therefore not match: - >>> ast.filter.OneOf(ast.filter.Predicate(ns.bse.filename)) - >>> ast.filter.Predicate(ns.bse.filename) + >>> ast.filter.OneOf(ast.filter.Predicate(ns.bse.name)) + >>> ast.filter.Predicate(ns.bse.name) """ diff --git a/bsfs/query/validator.py b/bsfs/query/validator.py index 1ce44e9..10ca492 100644 --- a/bsfs/query/validator.py +++ b/bsfs/query/validator.py @@ -177,7 +177,7 @@ class Filter(): if not type_ <= dom: raise errors.ConsistencyError(f'expected type {dom}, found {type_}') # node.count is a numerical expression - self._parse_filter_expression(self.schema.literal(ns.bsfs.Number), node.count) + self._parse_filter_expression(self.schema.literal(ns.bsl.Number), node.count) def _distance(self, type_: bsc.Vertex, node: ast.filter.Distance): # type is a Literal @@ -218,7 +218,7 @@ class Filter(): if type_ not in self.schema.literals(): raise errors.ConsistencyError(f'literal {type_} is not in the schema') # type must be a numerical - if not type_ <= self.schema.literal(ns.bsfs.Number): + if not type_ <= self.schema.literal(ns.bsl.Number): raise errors.ConsistencyError(f'expected a number type, found {type_}') # FIXME: Check if node.value corresponds to type_ -- cgit v1.2.3