aboutsummaryrefslogtreecommitdiffstats
path: root/bsfs/query
diff options
context:
space:
mode:
Diffstat (limited to 'bsfs/query')
-rw-r--r--bsfs/query/ast/filter_.py59
-rw-r--r--bsfs/query/validator.py90
2 files changed, 93 insertions, 56 deletions
diff --git a/bsfs/query/ast/filter_.py b/bsfs/query/ast/filter_.py
index b129ded..2f0270c 100644
--- a/bsfs/query/ast/filter_.py
+++ b/bsfs/query/ast/filter_.py
@@ -252,8 +252,7 @@ class Has(FilterExpression):
class _Value(FilterExpression):
- """
- """
+ """Matches some value."""
# target value.
value: typing.Any
@@ -277,13 +276,13 @@ class Is(_Value):
class Equals(_Value):
"""Value matches exactly.
- NOTE: Value format must correspond to literal type; can be a string, a number, or a Node
+ NOTE: Value must correspond to literal type.
"""
class Substring(_Value):
"""Value matches a substring
- NOTE: value format must be a string
+ NOTE: value must be a string.
"""
@@ -295,9 +294,49 @@ class EndsWith(_Value):
"""Value ends with a given string."""
+class Distance(FilterExpression):
+ """Distance to a reference is (strictly) below a threshold. Assumes a Feature literal."""
+
+ # FIXME:
+ # (a) pass a node/predicate as anchor instead of a value.
+ # Then we don't need to materialize the reference.
+ # (b) pass a FilterExpression (_Bounded) instead of a threshold.
+ # Then, we could also query values greater than a threshold.
+
+ # reference value.
+ reference: typing.Any
+
+ # distance threshold.
+ threshold: float
+
+ # closed (True) or open (False) bound.
+ strict: bool
+
+ def __init__(
+ self,
+ reference: typing.Any,
+ threshold: float,
+ strict: bool = False,
+ ):
+ self.reference = reference
+ self.threshold = float(threshold)
+ self.strict = bool(strict)
+
+ def __repr__(self) -> str:
+ return f'{typename(self)}({self.reference}, {self.threshold}, {self.strict})'
+
+ def __hash__(self) -> int:
+ return hash((super().__hash__(), tuple(self.reference), self.threshold, self.strict))
+
+ def __eq__(self, other) -> bool:
+ return super().__eq__(other) \
+ and self.reference == other.reference \
+ and self.threshold == other.threshold \
+ and self.strict == other.strict
+
+
class _Bounded(FilterExpression):
- """
- """
+ """Value is bounded by a threshold. Assumes a Number literal."""
# bound.
threshold: float
@@ -327,15 +366,11 @@ class _Bounded(FilterExpression):
class LessThan(_Bounded):
- """Value is (strictly) smaller than threshold.
- NOTE: only on numerical literals
- """
+ """Value is (strictly) smaller than threshold. Assumes a Number literal."""
class GreaterThan(_Bounded):
- """Value is (strictly) larger than threshold
- NOTE: only on numerical literals
- """
+ """Value is (strictly) larger than threshold. Assumes a Number literal."""
class Predicate(PredicateExpression):
diff --git a/bsfs/query/validator.py b/bsfs/query/validator.py
index 352203a..904ac14 100644
--- a/bsfs/query/validator.py
+++ b/bsfs/query/validator.py
@@ -34,9 +34,6 @@ class Filter():
"""
- # vertex types
- T_VERTEX = typing.Union[bsc.Node, bsc.Literal] # FIXME: Shouldn't this be in the schema?
-
# schema to validate against.
schema: bsc.Schema
@@ -64,7 +61,7 @@ class Filter():
## routing methods
- def _parse_filter_expression(self, type_: T_VERTEX, node: ast.filter.FilterExpression):
+ def _parse_filter_expression(self, type_: bsc.Vertex, node: ast.filter.FilterExpression):
"""Route *node* to the handler of the respective FilterExpression subclass."""
if isinstance(node, ast.filter.Is):
return self._is(type_, node)
@@ -72,6 +69,8 @@ class Filter():
return self._not(type_, node)
if isinstance(node, ast.filter.Has):
return self._has(type_, node)
+ if isinstance(node, ast.filter.Distance):
+ return self._distance(type_, node)
if isinstance(node, (ast.filter.Any, ast.filter.All)):
return self._branch(type_, node)
if isinstance(node, (ast.filter.And, ast.filter.Or)):
@@ -83,7 +82,7 @@ class Filter():
# invalid node
raise errors.BackendError(f'expected filter expression, found {node}')
- def _parse_predicate_expression(self, node: ast.filter.PredicateExpression) -> typing.Tuple[T_VERTEX, T_VERTEX]:
+ def _parse_predicate_expression(self, node: ast.filter.PredicateExpression) -> typing.Tuple[bsc.Vertex, bsc.Vertex]:
"""Route *node* to the handler of the respective PredicateExpression subclass."""
if isinstance(node, ast.filter.Predicate):
return self._predicate(node)
@@ -95,58 +94,47 @@ class Filter():
## predicate expressions
- def _predicate(self, node: ast.filter.Predicate) -> typing.Tuple[T_VERTEX, T_VERTEX]:
+ def _predicate(self, node: ast.filter.Predicate) -> typing.Tuple[bsc.Vertex, bsc.Vertex]:
# predicate exists in the schema
if not self.schema.has_predicate(node.predicate):
raise errors.ConsistencyError(f'predicate {node.predicate} is not in the schema')
# determine domain and range
pred = self.schema.predicate(node.predicate)
+ if not isinstance(pred.range, (bsc.Node, bsc.Literal)):
+ raise errors.BackendError(f'the range of predicate {pred} is undefined')
dom, rng = pred.domain, pred.range
- if rng is None:
- # FIXME: It is a design error that Predicates can have a None range...
- raise errors.BackendError(f'predicate {pred} has no range')
if node.reverse:
dom, rng = rng, dom # type: ignore [assignment] # variable re-use confuses mypy
# return domain and range
return dom, rng
- def _one_of(self, node: ast.filter.OneOf) -> typing.Tuple[T_VERTEX, T_VERTEX]:
+ def _one_of(self, node: ast.filter.OneOf) -> typing.Tuple[bsc.Vertex, bsc.Vertex]:
# determine domain and range types
# NOTE: select the most specific domain and the most generic range
dom, rng = None, None
for pred in node:
# parse child expression
subdom, subrng = self._parse_predicate_expression(pred)
- try:
- # determine overall domain
- if dom is None or subdom < dom: # pick most specific domain
- dom = subdom
- # domains must be related across all child expressions
- if not subdom <= dom and not subdom >= dom:
- raise errors.ConsistencyError(f'domains {subdom} and {dom} are not related')
- except TypeError as err: # compared literal vs. node
- raise errors.ConsistencyError(f'domains {subdom} and {dom} are not of the same type') from err
-
- try:
- # determine overall range
- if rng is None or subrng > rng: # pick most generic range
- rng = subrng
- # ranges must be related across all child expressions
- if not subrng <= rng and not subrng >= rng:
- raise errors.ConsistencyError(f'ranges {subrng} and {rng} are not related')
- except TypeError as err: # compared literal vs. node
- raise errors.ConsistencyError(f'ranges {subrng} and {rng} are not of the same type') from err
- # check domain and range
- if dom is None or rng is None:
- # OneOf guarantees at least one expression, these two cases cannot happen
- raise errors.UnreachableError()
- # return domain and range
- return dom, rng
+ # determine overall domain
+ if dom is None or subdom < dom: # pick most specific domain
+ dom = subdom
+ # domains must be related across all child expressions
+ if not subdom <= dom and not subdom >= dom:
+ raise errors.ConsistencyError(f'domains {subdom} and {dom} are not related')
+ # determine overall range
+ if rng is None or subrng > rng: # pick most generic range
+ rng = subrng
+ # ranges must be related across all child expressions
+ if not subrng <= rng and not subrng >= rng:
+ raise errors.ConsistencyError(f'ranges {subrng} and {rng} are not related')
+ # OneOf guarantees at least one expression, dom and rng are always bsc.Vertex.
+ # mypy does not realize this, hence we ignore the warning.
+ return dom, rng # type: ignore [return-value]
## intermediates
- def _branch(self, type_: T_VERTEX, node: ast.filter._Branch):
+ def _branch(self, type_: bsc.Vertex, node: ast.filter._Branch):
# type is a Node
if not isinstance(type_, bsc.Node):
raise errors.ConsistencyError(f'expected a Node, found {type_}')
@@ -167,16 +155,16 @@ class Filter():
# child expression is valid
self._parse_filter_expression(rng, node.expr)
- def _agg(self, type_: T_VERTEX, node: ast.filter._Agg):
+ def _agg(self, type_: bsc.Vertex, node: ast.filter._Agg):
for expr in node:
# child expression is valid
self._parse_filter_expression(type_, expr)
- def _not(self, type_: T_VERTEX, node: ast.filter.Not):
+ def _not(self, type_: bsc.Vertex, node: ast.filter.Not):
# child expression is valid
self._parse_filter_expression(type_, node.expr)
- def _has(self, type_: T_VERTEX, node: ast.filter.Has):
+ def _has(self, type_: bsc.Vertex, node: ast.filter.Has):
# type is a Node
if not isinstance(type_, bsc.Node):
raise errors.ConsistencyError(f'expected a Node, found {type_}')
@@ -189,19 +177,30 @@ class Filter():
if not type_ <= dom:
raise errors.ConsistencyError(f'expected type {dom}, found {type_}')
# node.count is a numerical expression
- # FIXME: We have to ensure that ns.xsd.integer is always known in the schema!
- self._parse_filter_expression(self.schema.literal(ns.xsd.integer), node.count)
+ self._parse_filter_expression(self.schema.literal(ns.bsfs.Number), node.count)
+
+ def _distance(self, type_: bsc.Vertex, node: ast.filter.Distance):
+ # type is a Literal
+ if not isinstance(type_, bsc.Feature):
+ raise errors.ConsistencyError(f'expected a Feature, found {type_}')
+ # type exists in the schema
+ if type_ not in self.schema.literals():
+ raise errors.ConsistencyError(f'literal {type_} is not in the schema')
+ # reference matches type_
+ if len(node.reference) != type_.dimension:
+ raise errors.ConsistencyError(f'reference has dimension {len(node.reference)}, expected {type_.dimension}')
+ # FIXME: test dtype
## conditions
- def _is(self, type_: T_VERTEX, node: ast.filter.Is): # pylint: disable=unused-argument # (node)
+ def _is(self, type_: bsc.Vertex, node: ast.filter.Is): # pylint: disable=unused-argument # (node)
if not isinstance(type_, bsc.Node):
raise errors.ConsistencyError(f'expected a Node, found {type_}')
if type_ not in self.schema.nodes():
raise errors.ConsistencyError(f'node {type_} is not in the schema')
- def _value(self, type_: T_VERTEX, node: ast.filter._Value): # pylint: disable=unused-argument # (node)
+ def _value(self, type_: bsc.Vertex, node: ast.filter._Value): # pylint: disable=unused-argument # (node)
# type is a literal
if not isinstance(type_, bsc.Literal):
raise errors.ConsistencyError(f'expected a Literal, found {type_}')
@@ -211,13 +210,16 @@ class Filter():
# FIXME: Check if node.value corresponds to type_
# FIXME: A specific literal might be requested (i.e., a numeric type when used in Has)
- def _bounded(self, type_: T_VERTEX, node: ast.filter._Bounded): # pylint: disable=unused-argument # (node)
+ def _bounded(self, type_: bsc.Vertex, node: ast.filter._Bounded): # pylint: disable=unused-argument # (node)
# type is a literal
if not isinstance(type_, bsc.Literal):
raise errors.ConsistencyError(f'expected a Literal, found {type_}')
# type exists in the schema
if type_ not in self.schema.literals():
raise errors.ConsistencyError(f'literal {type_} is not in the schema')
+ # type must be a numerical
+ if not type_ <= self.schema.literal(ns.bsfs.Number):
+ raise errors.ConsistencyError(f'expected a number type, found {type_}')
# FIXME: Check if node.value corresponds to type_