aboutsummaryrefslogtreecommitdiffstats
path: root/bsfs
diff options
context:
space:
mode:
authorMatthias Baumgartner <dev@igsor.net>2022-12-22 20:27:49 +0100
committerMatthias Baumgartner <dev@igsor.net>2022-12-22 20:27:49 +0100
commit383fa8fd5c2e4b67089b4c5b654ebade51382f2c (patch)
tree0618ce8221bd430a2206a9a0746800a47291b477 /bsfs
parenta0f2308adcb226d28de3355bc7115a6d9b669462 (diff)
downloadbsfs-383fa8fd5c2e4b67089b4c5b654ebade51382f2c.tar.gz
bsfs-383fa8fd5c2e4b67089b4c5b654ebade51382f2c.tar.bz2
bsfs-383fa8fd5c2e4b67089b4c5b654ebade51382f2c.zip
filter ast definition and validation
Diffstat (limited to 'bsfs')
-rw-r--r--bsfs/query/ast/__init__.py2
-rw-r--r--bsfs/query/ast/filter_.py405
-rw-r--r--bsfs/query/validator.py336
-rw-r--r--bsfs/utils/__init__.py3
-rw-r--r--bsfs/utils/commons.py34
-rw-r--r--bsfs/utils/errors.py3
6 files changed, 620 insertions, 163 deletions
diff --git a/bsfs/query/ast/__init__.py b/bsfs/query/ast/__init__.py
index 0ee7385..704d051 100644
--- a/bsfs/query/ast/__init__.py
+++ b/bsfs/query/ast/__init__.py
@@ -14,7 +14,7 @@ Author: Matthias Baumgartner, 2022
import typing
# inner-module imports
-from . import filter_ as filter
+from . import filter_ as filter # pylint: disable=redefined-builtin
# exports
__all__: typing.Sequence[str] = (
diff --git a/bsfs/query/ast/filter_.py b/bsfs/query/ast/filter_.py
index 4086fc1..b129ded 100644
--- a/bsfs/query/ast/filter_.py
+++ b/bsfs/query/ast/filter_.py
@@ -1,5 +1,27 @@
"""Filter AST.
+Note that it is easily possible to construct an AST that is inconsistent with
+a given schema. Furthermore, it is possible to construct a semantically invalid
+AST which that cannot be parsed correctly or includes contradicting statements.
+The AST nodes do not (and cannot) check such issues.
+
+For example, consider the following AST:
+
+>>> Any(ns.bse.collection,
+... And(
+... Equals('hello'),
+... Any(ns.bsm.guid, Any(ns.bsm.guid, Equals('hello'))),
+... Any(ns.bst.label, Equals('world')),
+... All(ns.bst.label, Not(Equals('world'))),
+... )
+... )
+
+This AST has multiple issues that are not verified upon its creation:
+* A condition on a non-literal.
+* A Filter on a literal.
+* Conditions exclude each other
+* The predicate along the branch have incompatible domains and ranges.
+
Part of the BlackStar filesystem (bsfs) module.
A copy of the license is provided with the project.
Author: Matthias Baumgartner, 2022
@@ -8,12 +30,45 @@ Author: Matthias Baumgartner, 2022
from collections import abc
import typing
+# bsfs imports
+from bsfs.utils import URI, typename, normalize_args
+
+# inner-module imports
+#from . import utils
+
# exports
-__all__ : typing.Sequence[str] = []
+__all__ : typing.Sequence[str] = (
+ # base classes
+ 'FilterExpression',
+ 'PredicateExpression',
+ # predicate expressions
+ 'OneOf',
+ 'Predicate',
+ # branching
+ 'All',
+ 'Any',
+ # aggregators
+ 'And',
+ 'Or',
+ # value matchers
+ 'Equals',
+ 'Substring',
+ 'EndsWith',
+ 'StartsWith',
+ # range matchers
+ 'GreaterThan',
+ 'LessThan',
+ # misc
+ 'Has',
+ 'Is',
+ 'Not',
+ )
## code ##
+# pylint: disable=too-few-public-methods # Many expressions use mostly magic methods
+
class _Expression(abc.Hashable):
def __repr__(self) -> str:
"""Return the expressions's string representation."""
@@ -27,4 +82,352 @@ class _Expression(abc.Hashable):
"""Return True if *self* and *other* are equivalent."""
return isinstance(other, type(self))
+
+class FilterExpression(_Expression):
+ """Generic Filter expression."""
+
+
+class PredicateExpression(_Expression):
+ """Generic Predicate expression."""
+
+
+class _Branch(FilterExpression):
+ """Branch the filter along a predicate."""
+
+ # predicate to follow.
+ predicate: PredicateExpression
+
+ # child expression to evaluate.
+ expr: FilterExpression
+
+ def __init__(
+ self,
+ predicate: typing.Union[PredicateExpression, URI],
+ expr: FilterExpression,
+ ):
+ # process predicate argument
+ if isinstance(predicate, URI):
+ predicate = Predicate(predicate)
+ elif not isinstance(predicate, PredicateExpression):
+ raise TypeError(predicate)
+ # process expression argument
+ if not isinstance(expr, FilterExpression):
+ raise TypeError(expr)
+ # assign members
+ self.predicate = predicate
+ self.expr = expr
+
+ def __repr__(self) -> str:
+ return f'{typename(self)}({self.predicate}, {self.expr})'
+
+ def __hash__(self) -> int:
+ return hash((super().__hash__(), self.predicate, self.expr))
+
+ def __eq__(self, other) -> bool:
+ return super().__eq__(other) \
+ and self.predicate == other.predicate \
+ and self.expr == other.expr
+
+class Any(_Branch):
+ """Any (and at least one) triple matches."""
+
+
+class All(_Branch):
+ """All (and at least one) triples match."""
+
+
+class _Agg(FilterExpression, abc.Collection):
+ """Combine multiple expressions."""
+
+ # child expressions
+ expr: typing.Set[FilterExpression]
+
+ def __init__(
+ self,
+ *expr: typing.Union[FilterExpression,
+ typing.Iterable[FilterExpression],
+ typing.Iterator[FilterExpression]]
+ ):
+ # unfold arguments
+ unfolded = set(normalize_args(*expr))
+ # check type
+ if not all(isinstance(e, FilterExpression) for e in unfolded):
+ raise TypeError(expr)
+ # assign member
+ self.expr = unfolded
+
+ def __contains__(self, expr: typing.Any) -> bool:
+ """Return True if *expr* is among the child expressions."""
+ return expr in self.expr
+
+ def __iter__(self) -> typing.Iterator[FilterExpression]:
+ """Iterator over child expressions."""
+ return iter(self.expr)
+
+ def __len__(self) -> int:
+ """Number of child expressions."""
+ return len(self.expr)
+
+ def __repr__(self) -> str:
+ return f'{typename(self)}({self.expr})'
+
+ def __hash__(self) -> int:
+ return hash((super().__hash__(), tuple(self.expr))) # FIXME: Unique hash of different orders over self.expr
+
+ def __eq__(self, other) -> bool:
+ return super().__eq__(other) and self.expr == other.expr
+
+
+class And(_Agg):
+ """All conditions match."""
+
+
+class Or(_Agg):
+ """At least one condition matches."""
+
+
+class Not(FilterExpression):
+ """Invert a statement."""
+
+ # child expression
+ expr: FilterExpression
+
+ def __init__(self, expr: FilterExpression):
+ # check argument
+ if not isinstance(expr, FilterExpression):
+ raise TypeError(expr)
+ # assign member
+ self.expr = expr
+
+ def __repr__(self) -> str:
+ return f'{typename(self)}({self.expr})'
+
+ def __hash__(self) -> int:
+ return hash((super().__hash__(), self.expr))
+
+ def __eq__(self, other: typing.Any) -> bool:
+ return super().__eq__(other) and self.expr == other.expr
+
+
+class Has(FilterExpression):
+ """Has predicate N times"""
+
+ # predicate to follow.
+ predicate: PredicateExpression
+
+ # target count
+ count: FilterExpression
+
+ def __init__(
+ self,
+ predicate: typing.Union[PredicateExpression, URI],
+ count: typing.Optional[typing.Union[FilterExpression, int]] = None,
+ ):
+ # check predicate
+ if isinstance(predicate, URI):
+ predicate = Predicate(predicate)
+ elif not isinstance(predicate, PredicateExpression):
+ raise TypeError(predicate)
+ # check count
+ if count is None:
+ count = GreaterThan(1, strict=False)
+ elif isinstance(count, int):
+ count = Equals(count)
+ elif not isinstance(count, FilterExpression):
+ raise TypeError(count)
+ # assign members
+ self.predicate = predicate
+ self.count = count
+
+ def __repr__(self) -> str:
+ return f'{typename(self)}({self.predicate}, {self.count})'
+
+ def __hash__(self) -> int:
+ return hash((super().__hash__(), self.predicate, self.count))
+
+ def __eq__(self, other) -> bool:
+ return super().__eq__(other) \
+ and self.predicate == other.predicate \
+ and self.count == other.count
+
+
+class _Value(FilterExpression):
+ """
+ """
+
+ # target value.
+ value: typing.Any
+
+ def __init__(self, value: typing.Any):
+ self.value = value
+
+ def __repr__(self) -> str:
+ return f'{typename(self)}({self.value})'
+
+ def __hash__(self) -> int:
+ return hash((super().__hash__(), self.value))
+
+ def __eq__(self, other) -> bool:
+ return super().__eq__(other) and self.value == other.value
+
+
+class Is(_Value):
+ """Match the URI of a node."""
+
+
+class Equals(_Value):
+ """Value matches exactly.
+ NOTE: Value format must correspond to literal type; can be a string, a number, or a Node
+ """
+
+
+class Substring(_Value):
+ """Value matches a substring
+ NOTE: value format must be a string
+ """
+
+
+class StartsWith(_Value):
+ """Value begins with a given string."""
+
+
+class EndsWith(_Value):
+ """Value ends with a given string."""
+
+
+class _Bounded(FilterExpression):
+ """
+ """
+
+ # bound.
+ threshold: float
+
+ # closed (True) or open (False) bound.
+ strict: bool
+
+ def __init__(
+ self,
+ threshold: float,
+ strict: bool = True,
+ ):
+ self.threshold = float(threshold)
+ self.strict = bool(strict)
+
+ def __repr__(self) -> str:
+ return f'{typename(self)}({self.threshold}, {self.strict})'
+
+ def __hash__(self) -> int:
+ return hash((super().__hash__(), self.threshold, self.strict))
+
+ def __eq__(self, other) -> bool:
+ return super().__eq__(other) \
+ and self.threshold == other.threshold \
+ and self.strict == other.strict
+
+
+
+class LessThan(_Bounded):
+ """Value is (strictly) smaller than threshold.
+ NOTE: only on numerical literals
+ """
+
+
+class GreaterThan(_Bounded):
+ """Value is (strictly) larger than threshold
+ NOTE: only on numerical literals
+ """
+
+
+class Predicate(PredicateExpression):
+ """A single predicate."""
+
+ # predicate URI
+ predicate: URI
+
+ # reverse the predicate's direction
+ reverse: bool
+
+ def __init__(
+ self,
+ predicate: URI,
+ reverse: typing.Optional[bool] = False,
+ ):
+ # check arguments
+ if not isinstance(predicate, URI):
+ raise TypeError(predicate)
+ # assign members
+ self.predicate = predicate
+ self.reverse = bool(reverse)
+
+ def __repr__(self) -> str:
+ return f'{typename(self)}({self.predicate}, {self.reverse})'
+
+ def __hash__(self) -> int:
+ return hash((super().__hash__(), self.predicate, self.reverse))
+
+ def __eq__(self, other) -> bool:
+ return super().__eq__(other) \
+ and self.predicate == other.predicate \
+ and self.reverse == other.reverse
+
+
+class OneOf(PredicateExpression, abc.Collection):
+ """A set of predicate alternatives.
+
+ The predicates' domains must be ascendants or descendants of each other.
+ The overall domain is the most specific one.
+
+ The predicate's domains must be ascendants or descendants of each other.
+ The overall range is the most generic one.
+ """
+
+ # predicate alternatives
+ expr: typing.Set[PredicateExpression]
+
+ def __init__(self, *expr: typing.Union[PredicateExpression, URI]):
+ # unfold arguments
+ unfolded = set(normalize_args(*expr)) # type: ignore [arg-type] # this is getting too complex...
+ # check arguments
+ if len(unfolded) == 0:
+ raise AttributeError('expected at least one expression, found none')
+ # ensure PredicateExpression
+ unfolded = {Predicate(e) if isinstance(e, URI) else e for e in unfolded}
+ # check type
+ if not all(isinstance(e, PredicateExpression) for e in unfolded):
+ raise TypeError(expr)
+ # assign member
+ self.expr = unfolded
+
+ def __contains__(self, expr: typing.Any) -> bool:
+ """Return True if *expr* is among the child expressions."""
+ return expr in self.expr
+
+ def __iter__(self) -> typing.Iterator[PredicateExpression]:
+ """Iterator over child expressions."""
+ return iter(self.expr)
+
+ def __len__(self) -> int:
+ """Number of child expressions."""
+ return len(self.expr)
+
+ def __repr__(self) -> str:
+ return f'{typename(self)}({self.expr})'
+
+ def __hash__(self) -> int:
+ return hash((super().__hash__(), tuple(self.expr))) # FIXME: Unique hash of different orders over self.expr
+
+ def __eq__(self, other) -> bool:
+ return super().__eq__(other) and self.expr == other.expr
+
+
+# Helpers
+
+def IsIn(*values): # pylint: disable=invalid-name # explicitly mimics an expression
+ """Match any of the given URIs."""
+ return Or(Is(value) for value in normalize_args(*values))
+
+def IsNotIn(*values): # pylint: disable=invalid-name # explicitly mimics an expression
+ """Match none of the given URIs."""
+ return Not(IsIn(*values))
+
## EOF ##
diff --git a/bsfs/query/validator.py b/bsfs/query/validator.py
index 123b947..352203a 100644
--- a/bsfs/query/validator.py
+++ b/bsfs/query/validator.py
@@ -9,6 +9,8 @@ import typing
# bsfs imports
from bsfs import schema as bsc
+from bsfs.namespace import ns
+from bsfs.utils import errors, typename
# inner-module imports
from . import ast
@@ -22,6 +24,18 @@ __all__ : typing.Sequence[str] = (
## code ##
class Filter():
+ """Validate a `bsfs.query.ast.filter` query's structure and schema compliance.
+
+ * Conditions (Bounded, Value) can only be applied on literals
+ * Branches, Id, and Has can only be applied on nodes
+ * Predicates' domain and range must match
+ * Predicate paths must follow the schema
+ * Referenced types are present in the schema
+
+ """
+
+ # vertex types
+ T_VERTEX = typing.Union[bsc.Node, bsc.Literal] # FIXME: Shouldn't this be in the schema?
# schema to validate against.
schema: bsc.Schema
@@ -29,180 +43,182 @@ class Filter():
def __init__(self, schema: bsc.Schema):
self.schema = schema
- def parse(self, node: ast.filter.FilterExpression, subject: bsc.types._Vertex):
- # subject is a node type
- if not isinstance(subject, bsc.Node):
- raise errors.ConsistencyError(f'Expected a node, found {subject}')
- # subject exists in the schema
- if subject not in self.schema.nodes:
- raise errors.ConsistencyError(f'Invalid node type {subject}')
- # root expression is valid
- self._parse(node, subject)
+ def __call__(self, root_type: bsc.Node, query: ast.filter.FilterExpression):
+ """Validate a filter *query*, assuming the subject having *root_type*.
+
+ Raises a `bsfs.utils.errors.ConsistencyError` if the query violates the schema.
+ Raises a `bsfs.utils.errors.BackendError` if the query structure is invalid.
+
+ """
+ # root_type must be a schema.Node
+ if not isinstance(root_type, bsc.Node):
+ raise TypeError(f'Expected a node, found {typename(root_type)}')
+ # root_type must exist in the schema
+ if root_type not in self.schema.nodes():
+ raise errors.ConsistencyError(f'{root_type} is not defined in the schema')
+ # check root expression
+ self._parse_filter_expression(root_type, query)
# all tests passed
return True
- def _parse_numerical_expression(self, node: ast.filter.FilterExpression, subject: bsc.types._Vertex):
- if isinstance(node, ast.filter.And):
- return self._and(node, subject)
- elif isinstance(node, ast.filter.Or):
- return self._or(node, subject)
- elif isinstance(node, ast.filter.LessThan):
- return self._lessThan(node, subject)
- elif isinstance(node, ast.filter.GreaterThan):
- return self._greaterThan(node, subject)
- elif isinstance(node, ast.filter.Equals):
- return self._equals(node, subject, numerical=True)
- else:
- raise errors.ConsistencyError(f'Expected a numerical expression, found {node}')
-
-
- def __branch(self, node: typing.Union[ast.filter.Any, ast.filter.And], subject: bsc.types._Vertex):
- # subject is a node type
- if not isinstance(subject, bsc.Node):
- raise errors.ConsistencyError(f'Expected a node, found {subject}')
- # subject exists in the schema
- if subject not in self.schema.nodes:
- raise errors.ConsistencyError(f'Invalid node type {subject}')
- # predicate is valid
- dom, rng = self._parse_predicate_expression(node.predicate)
- # subject is a subtype of the predicate's domain
- if not subject <= dom:
- raise errors.ConsistencyError(f'Expected type {dom}, found {subject}')
- # child expression is valid
- self._parse_filter_expression(node.expr, rng)
+ ## routing methods
+
+ def _parse_filter_expression(self, type_: T_VERTEX, node: ast.filter.FilterExpression):
+ """Route *node* to the handler of the respective FilterExpression subclass."""
+ if isinstance(node, ast.filter.Is):
+ return self._is(type_, node)
+ if isinstance(node, ast.filter.Not):
+ return self._not(type_, node)
+ if isinstance(node, ast.filter.Has):
+ return self._has(type_, node)
+ if isinstance(node, (ast.filter.Any, ast.filter.All)):
+ return self._branch(type_, node)
+ if isinstance(node, (ast.filter.And, ast.filter.Or)):
+ return self._agg(type_, node)
+ if isinstance(node, (ast.filter.Equals, ast.filter.Substring, ast.filter.StartsWith, ast.filter.EndsWith)):
+ return self._value(type_, node)
+ if isinstance(node, (ast.filter.LessThan, ast.filter.GreaterThan)):
+ return self._bounded(type_, node)
+ # invalid node
+ raise errors.BackendError(f'expected filter expression, found {node}')
+
+ def _parse_predicate_expression(self, node: ast.filter.PredicateExpression) -> typing.Tuple[T_VERTEX, T_VERTEX]:
+ """Route *node* to the handler of the respective PredicateExpression subclass."""
+ if isinstance(node, ast.filter.Predicate):
+ return self._predicate(node)
+ if isinstance(node, ast.filter.OneOf):
+ return self._one_of(node)
+ # invalid node
+ raise errors.BackendError(f'expected predicate expression, found {node}')
+
+
+ ## predicate expressions
+
+ def _predicate(self, node: ast.filter.Predicate) -> typing.Tuple[T_VERTEX, T_VERTEX]:
+ # predicate exists in the schema
+ if not self.schema.has_predicate(node.predicate):
+ raise errors.ConsistencyError(f'predicate {node.predicate} is not in the schema')
+ # determine domain and range
+ pred = self.schema.predicate(node.predicate)
+ dom, rng = pred.domain, pred.range
+ if rng is None:
+ # FIXME: It is a design error that Predicates can have a None range...
+ raise errors.BackendError(f'predicate {pred} has no range')
+ if node.reverse:
+ dom, rng = rng, dom # type: ignore [assignment] # variable re-use confuses mypy
+ # return domain and range
+ return dom, rng
- def _any(self, node: ast.filter.Any, subject: bsc.types._Vertex):
- return self.__branch(node, subject)
+ def _one_of(self, node: ast.filter.OneOf) -> typing.Tuple[T_VERTEX, T_VERTEX]:
+ # determine domain and range types
+ # NOTE: select the most specific domain and the most generic range
+ dom, rng = None, None
+ for pred in node:
+ # parse child expression
+ subdom, subrng = self._parse_predicate_expression(pred)
+ try:
+ # determine overall domain
+ if dom is None or subdom < dom: # pick most specific domain
+ dom = subdom
+ # domains must be related across all child expressions
+ if not subdom <= dom and not subdom >= dom:
+ raise errors.ConsistencyError(f'domains {subdom} and {dom} are not related')
+ except TypeError as err: # compared literal vs. node
+ raise errors.ConsistencyError(f'domains {subdom} and {dom} are not of the same type') from err
- def _all(self, node: ast.filter.All, subject: bsc.types._Vertex):
- return self.__branch(node, subject)
+ try:
+ # determine overall range
+ if rng is None or subrng > rng: # pick most generic range
+ rng = subrng
+ # ranges must be related across all child expressions
+ if not subrng <= rng and not subrng >= rng:
+ raise errors.ConsistencyError(f'ranges {subrng} and {rng} are not related')
+ except TypeError as err: # compared literal vs. node
+ raise errors.ConsistencyError(f'ranges {subrng} and {rng} are not of the same type') from err
+ # check domain and range
+ if dom is None or rng is None:
+ # OneOf guarantees at least one expression, these two cases cannot happen
+ raise errors.UnreachableError()
+ # return domain and range
+ return dom, rng
- def __agg(self, node: typing.Union[ast.filter.And, ast.filter.Or], subject: bsc.types._Vertex):
+ ## intermediates
+
+ def _branch(self, type_: T_VERTEX, node: ast.filter._Branch):
+ # type is a Node
+ if not isinstance(type_, bsc.Node):
+ raise errors.ConsistencyError(f'expected a Node, found {type_}')
+ # type exists in the schema
+ # FIXME: Isn't it actually guaranteed that the type (except the root type) is part of the schema?
+ # all types can be traced back to (a) root_type, (b) predicate, or (c) manually set (e.g. in _is).
+ # For (a), we do (and have to) perform a check. For (c), the code base should be consistent throughout
+ # the module, so this is an assumption that has to be ensured in schema.Schema. For (b), we know (and
+ # check) that the predicate is in the schema, hence all node/literals derived from it are also in the
+ # schema by construction of the schema.Schema class. So, why do we check this every time?
+ if type_ not in self.schema.nodes():
+ raise errors.ConsistencyError(f'node {type_} is not in the schema')
+ # predicate is valid
+ dom, rng = self._parse_predicate_expression(node.predicate)
+ # type_ is a subtype of the predicate's domain
+ if not type_ <= dom:
+ raise errors.ConsistencyError(f'expected type {dom} or subtype thereof, found {type_}')
+ # child expression is valid
+ self._parse_filter_expression(rng, node.expr)
+
+ def _agg(self, type_: T_VERTEX, node: ast.filter._Agg):
for expr in node:
# child expression is valid
- self._parse_filter_expression(expr, subject)
-
- def _and(self, node: ast.filter.And, subject: bsc.types._Vertex):
- return self.__agg(node, subject)
-
- def _or(self, node: ast.filter.Or, subject: bsc.types._Vertex):
- return self.__agg(node, subject)
-
+ self._parse_filter_expression(type_, expr)
- def _not(self, node: ast.filter.Not, subject: bsc.types._Vertex):
+ def _not(self, type_: T_VERTEX, node: ast.filter.Not):
# child expression is valid
- self._parse_filter_expression(node.expr, subject)
-
-
- def _has(self, node: ast.filter.Has, subject: bsc.types._Vertex):
- # subject is a node type
- if not isinstance(subject, bsc.Node):
- raise errors.ConsistencyError(f'Expected a node, found {subject}')
- # subject exists in the schema
- if subject not in self.schema.nodes:
- raise errors.ConsistencyError(f'Invalid node type {subject}')
+ self._parse_filter_expression(type_, node.expr)
+
+ def _has(self, type_: T_VERTEX, node: ast.filter.Has):
+ # type is a Node
+ if not isinstance(type_, bsc.Node):
+ raise errors.ConsistencyError(f'expected a Node, found {type_}')
+ # type exists in the schema
+ if type_ not in self.schema.nodes():
+ raise errors.ConsistencyError(f'node {type_} is not in the schema')
# predicate is valid
- dom, rng = self._parse_predicate_expression(node.predicate)
- # subject is a subtype of the predicate's domain
- if not subject <= dom:
- raise errors.ConsistencyError(f'Expected type {dom}, found {subject}')
+ dom, _= self._parse_predicate_expression(node.predicate)
+ # type_ is a subtype of the predicate's domain
+ if not type_ <= dom:
+ raise errors.ConsistencyError(f'expected type {dom}, found {type_}')
# node.count is a numerical expression
- self._parse_numerical_expression(node.count, self.schema.literal(ns.xsd.numerical))
-
-
- def _equals(self, node: ast.filter.Equals, subject: bsc.types._Vertex, numerical: bool = False):
- # subject is a literal
- #if not isinstance(subject, bsc.Literal):
- # raise errors.ConsistencyError(f'Expected a literal, found {subject}')
- if isinstance(subject, bsc.Node):
- # FIXME: How to handle this case?
- # FIXME: How to check if a NodeType is acceptable?
- # FIXME: Maybe use flags to control what is expected as node identifiers?
- from bsfs.graph.nodes import Nodes # FIXME
- if not isinstance(node.value, Nodes) and not isinstance(node.value, URI):
- raise errors.ConsistencyError(f'Expected a Nodes or URI, found {node.value}')
- elif isinstance(subject, bsc.Literal):
- # literal exists in the schema
- if subject not in self.schema.literals:
- raise errors.ConsistencyError(f'Invalid literal type {subject}')
- else:
- # FIXME:
- raise errors.ConsistencyError(f'Expected a literal, found {subject}')
- # node.value is numeric (if requested)
- if numerical and not isinstance(node.value, float) and not isinstance(node.value, int):
- raise errors.ConsistencyError(f'Expected a numerical value (int or float), found {node.value}')
- # NOTE: We cannot check if node.value agrees with the subject since we don't know
- # all literal types, their hierarchy, and how the backend converts datatypes.
-
-
- def _substring(self, node: ast.filter.Substring, subject: bsc.types._Vertex):
- # subject is a literal
- if not isinstance(subject, bsc.Literal):
- raise errors.ConsistencyError(f'Expected a literal, found {subject}')
- # literal exists in the schema
- if subject not in self.schema.literals:
- raise errors.ConsistencyError(f'Invalid literal type {subject}')
- # node.value matches literal datatype
- if not subject.is_a(ns.xsd.string):
- raise errors.ConsistencyError(f'Expected a string literal, found {subject}')
-
-
- def _lessThan(self, node: ast.filter.LessThan, subject: bsc.types._Vertex):
- # subject is a literal
- if not isinstance(subject, bsc.Literal):
- raise errors.ConsistencyError(f'Expected a literal, found {subject}')
- # literal exists in the schema
- if subject not in self.schema.literals:
- raise errors.ConsistencyError(f'Invalid literal type {subject}')
- # subject is numerical
- if not subject.is_a(ns.xsd.numerical):
- raise errors.ConsistencyError(f'Expected a numerical literal, found {subject}')
-
-
- def _greaterThan(self, node: ast.filter.GreaterThan, subject: bsc.types._Vertex):
- # subject is a literal
- if not isinstance(subject, bsc.Literal):
- raise errors.ConsistencyError(f'Expected a literal, found {subject}')
- # literal exists in the schema
- if subject not in self.schema.literals:
- raise errors.ConsistencyError(f'Invalid literal type {subject}')
- # subject is numerical
- if not subject.is_a(ns.xsd.numerical):
- raise errors.ConsistencyError(f'Expected a numerical literal, found {subject}')
-
-
- def _predicate(self, node: ast.filter.Predicate):
- try:
- # predicate exists in the schema
- pred = self.schema.predicate(node.predicate)
- except KeyError:
- raise errors.ConsistencyError(f'') # FIXME
- if node.reverse:
- return pred.range, pred.domain
- else:
- return pred.domain, pred.range
-
+ # FIXME: We have to ensure that ns.xsd.integer is always known in the schema!
+ self._parse_filter_expression(self.schema.literal(ns.xsd.integer), node.count)
+
+
+ ## conditions
+
+ def _is(self, type_: T_VERTEX, node: ast.filter.Is): # pylint: disable=unused-argument # (node)
+ if not isinstance(type_, bsc.Node):
+ raise errors.ConsistencyError(f'expected a Node, found {type_}')
+ if type_ not in self.schema.nodes():
+ raise errors.ConsistencyError(f'node {type_} is not in the schema')
+
+ def _value(self, type_: T_VERTEX, node: ast.filter._Value): # pylint: disable=unused-argument # (node)
+ # type is a literal
+ if not isinstance(type_, bsc.Literal):
+ raise errors.ConsistencyError(f'expected a Literal, found {type_}')
+ # type exists in the schema
+ if type_ not in self.schema.literals():
+ raise errors.ConsistencyError(f'literal {type_} is not in the schema')
+ # FIXME: Check if node.value corresponds to type_
+ # FIXME: A specific literal might be requested (i.e., a numeric type when used in Has)
+
+ def _bounded(self, type_: T_VERTEX, node: ast.filter._Bounded): # pylint: disable=unused-argument # (node)
+ # type is a literal
+ if not isinstance(type_, bsc.Literal):
+ raise errors.ConsistencyError(f'expected a Literal, found {type_}')
+ # type exists in the schema
+ if type_ not in self.schema.literals():
+ raise errors.ConsistencyError(f'literal {type_} is not in the schema')
+ # FIXME: Check if node.value corresponds to type_
- def _oneOf(self, node: ast.filter.OneOf):
- dom, rng = None, None
- for pred in node:
- try:
- # parse child expression
- subdom, subrng = self._parse_predicate_expression(pred)
- # domain and range must be related across all child expressions
- if not subdom <= dom and not subdom >= dom:
- raise errors.ConsistencyError(f'') # FIXME
- if not subrng <= rng and not subrng >= rng:
- raise errors.ConsistencyError(f'') # FIXME
- # determine overall domain and range
- if dom is None or subdom < dom: # pick most specific domain
- dom = subdom
- if rng is None or subrng > rng: # pick most generic range
- rng = subrng
- except KeyError:
- raise errors.ConsistencyError(f'')
- return dom, rng
## EOF ##
diff --git a/bsfs/utils/__init__.py b/bsfs/utils/__init__.py
index 94680ee..6737cef 100644
--- a/bsfs/utils/__init__.py
+++ b/bsfs/utils/__init__.py
@@ -9,7 +9,7 @@ import typing
# inner-module imports
from . import errors
-from .commons import typename
+from .commons import typename, normalize_args
from .uri import URI
from .uuid import UUID, UCID
@@ -19,6 +19,7 @@ __all__ : typing.Sequence[str] = (
'URI',
'UUID',
'errors',
+ 'normalize_args',
'typename',
)
diff --git a/bsfs/utils/commons.py b/bsfs/utils/commons.py
index bad2fe0..e9f0b7f 100644
--- a/bsfs/utils/commons.py
+++ b/bsfs/utils/commons.py
@@ -5,10 +5,12 @@ A copy of the license is provided with the project.
Author: Matthias Baumgartner, 2022
"""
# imports
+from collections import abc
import typing
# exports
__all__: typing.Sequence[str] = (
+ 'normalize_args',
'typename',
)
@@ -19,5 +21,37 @@ def typename(obj) -> str:
"""Return the type name of *obj*."""
return type(obj).__name__
+# argument type in `normalize_args`.
+ArgType = typing.TypeVar('ArgType') # pylint: disable=invalid-name # type vars don't follow the usual convention
+
+def normalize_args(
+ *args: typing.Union[ArgType, typing.Iterable[ArgType], typing.Iterator[ArgType]]
+ ) -> typing.Tuple[ArgType, ...]:
+ """Arguments to a function can be passed as individual arguments, list-like
+ structures, or iterables. This function processes any of these styles and
+ returns a tuple of the respective items. Typically used within a function
+ provide a flexible interface but sill have parameters in a normalized form.
+
+ Examples:
+
+ >>> normalize_args(0,1,2)
+ (1,2,3)
+ >>> normalize_args([0,1,2])
+ (1,2,3)
+ >>> normalize_args(range(3))
+ (1,2,3)
+
+ """
+ if len(args) == 0: # foo()
+ return tuple()
+ if len(args) > 1: # foo(0, 1, 2)
+ return tuple(args) # type: ignore [arg-type] # we assume that argument styles (arg vs. iterable) are not mixed.
+ if isinstance(args[0], abc.Iterator): # foo(iter([0,1,2]))
+ return tuple(args[0])
+ if isinstance(args[0], abc.Iterable) and not isinstance(args[0], str): # foo([0, 1, 2])
+ return tuple(args[0])
+ # foo(0)
+ return (args[0], ) # type: ignore [return-value] # if args[0] is a str, we assume that ArgType was str.
+
## EOF ##
diff --git a/bsfs/utils/errors.py b/bsfs/utils/errors.py
index c5e8e16..be9d40e 100644
--- a/bsfs/utils/errors.py
+++ b/bsfs/utils/errors.py
@@ -38,4 +38,7 @@ class UnreachableError(ProgrammingError):
class ConfigError(_BSFSError):
"""User config issue."""
+class BackendError(_BSFSError):
+ """Could not parse an AST structure."""
+
## EOF ##