diff options
Diffstat (limited to 'bsfs/query/validator.py')
-rw-r--r-- | bsfs/query/validator.py | 336 |
1 files changed, 176 insertions, 160 deletions
diff --git a/bsfs/query/validator.py b/bsfs/query/validator.py index 123b947..352203a 100644 --- a/bsfs/query/validator.py +++ b/bsfs/query/validator.py @@ -9,6 +9,8 @@ import typing # bsfs imports from bsfs import schema as bsc +from bsfs.namespace import ns +from bsfs.utils import errors, typename # inner-module imports from . import ast @@ -22,6 +24,18 @@ __all__ : typing.Sequence[str] = ( ## code ## class Filter(): + """Validate a `bsfs.query.ast.filter` query's structure and schema compliance. + + * Conditions (Bounded, Value) can only be applied on literals + * Branches, Id, and Has can only be applied on nodes + * Predicates' domain and range must match + * Predicate paths must follow the schema + * Referenced types are present in the schema + + """ + + # vertex types + T_VERTEX = typing.Union[bsc.Node, bsc.Literal] # FIXME: Shouldn't this be in the schema? # schema to validate against. schema: bsc.Schema @@ -29,180 +43,182 @@ class Filter(): def __init__(self, schema: bsc.Schema): self.schema = schema - def parse(self, node: ast.filter.FilterExpression, subject: bsc.types._Vertex): - # subject is a node type - if not isinstance(subject, bsc.Node): - raise errors.ConsistencyError(f'Expected a node, found {subject}') - # subject exists in the schema - if subject not in self.schema.nodes: - raise errors.ConsistencyError(f'Invalid node type {subject}') - # root expression is valid - self._parse(node, subject) + def __call__(self, root_type: bsc.Node, query: ast.filter.FilterExpression): + """Validate a filter *query*, assuming the subject having *root_type*. + + Raises a `bsfs.utils.errors.ConsistencyError` if the query violates the schema. + Raises a `bsfs.utils.errors.BackendError` if the query structure is invalid. + + """ + # root_type must be a schema.Node + if not isinstance(root_type, bsc.Node): + raise TypeError(f'Expected a node, found {typename(root_type)}') + # root_type must exist in the schema + if root_type not in self.schema.nodes(): + raise errors.ConsistencyError(f'{root_type} is not defined in the schema') + # check root expression + self._parse_filter_expression(root_type, query) # all tests passed return True - def _parse_numerical_expression(self, node: ast.filter.FilterExpression, subject: bsc.types._Vertex): - if isinstance(node, ast.filter.And): - return self._and(node, subject) - elif isinstance(node, ast.filter.Or): - return self._or(node, subject) - elif isinstance(node, ast.filter.LessThan): - return self._lessThan(node, subject) - elif isinstance(node, ast.filter.GreaterThan): - return self._greaterThan(node, subject) - elif isinstance(node, ast.filter.Equals): - return self._equals(node, subject, numerical=True) - else: - raise errors.ConsistencyError(f'Expected a numerical expression, found {node}') - - - def __branch(self, node: typing.Union[ast.filter.Any, ast.filter.And], subject: bsc.types._Vertex): - # subject is a node type - if not isinstance(subject, bsc.Node): - raise errors.ConsistencyError(f'Expected a node, found {subject}') - # subject exists in the schema - if subject not in self.schema.nodes: - raise errors.ConsistencyError(f'Invalid node type {subject}') - # predicate is valid - dom, rng = self._parse_predicate_expression(node.predicate) - # subject is a subtype of the predicate's domain - if not subject <= dom: - raise errors.ConsistencyError(f'Expected type {dom}, found {subject}') - # child expression is valid - self._parse_filter_expression(node.expr, rng) + ## routing methods + + def _parse_filter_expression(self, type_: T_VERTEX, node: ast.filter.FilterExpression): + """Route *node* to the handler of the respective FilterExpression subclass.""" + if isinstance(node, ast.filter.Is): + return self._is(type_, node) + if isinstance(node, ast.filter.Not): + return self._not(type_, node) + if isinstance(node, ast.filter.Has): + return self._has(type_, node) + if isinstance(node, (ast.filter.Any, ast.filter.All)): + return self._branch(type_, node) + if isinstance(node, (ast.filter.And, ast.filter.Or)): + return self._agg(type_, node) + if isinstance(node, (ast.filter.Equals, ast.filter.Substring, ast.filter.StartsWith, ast.filter.EndsWith)): + return self._value(type_, node) + if isinstance(node, (ast.filter.LessThan, ast.filter.GreaterThan)): + return self._bounded(type_, node) + # invalid node + raise errors.BackendError(f'expected filter expression, found {node}') + + def _parse_predicate_expression(self, node: ast.filter.PredicateExpression) -> typing.Tuple[T_VERTEX, T_VERTEX]: + """Route *node* to the handler of the respective PredicateExpression subclass.""" + if isinstance(node, ast.filter.Predicate): + return self._predicate(node) + if isinstance(node, ast.filter.OneOf): + return self._one_of(node) + # invalid node + raise errors.BackendError(f'expected predicate expression, found {node}') + + + ## predicate expressions + + def _predicate(self, node: ast.filter.Predicate) -> typing.Tuple[T_VERTEX, T_VERTEX]: + # predicate exists in the schema + if not self.schema.has_predicate(node.predicate): + raise errors.ConsistencyError(f'predicate {node.predicate} is not in the schema') + # determine domain and range + pred = self.schema.predicate(node.predicate) + dom, rng = pred.domain, pred.range + if rng is None: + # FIXME: It is a design error that Predicates can have a None range... + raise errors.BackendError(f'predicate {pred} has no range') + if node.reverse: + dom, rng = rng, dom # type: ignore [assignment] # variable re-use confuses mypy + # return domain and range + return dom, rng - def _any(self, node: ast.filter.Any, subject: bsc.types._Vertex): - return self.__branch(node, subject) + def _one_of(self, node: ast.filter.OneOf) -> typing.Tuple[T_VERTEX, T_VERTEX]: + # determine domain and range types + # NOTE: select the most specific domain and the most generic range + dom, rng = None, None + for pred in node: + # parse child expression + subdom, subrng = self._parse_predicate_expression(pred) + try: + # determine overall domain + if dom is None or subdom < dom: # pick most specific domain + dom = subdom + # domains must be related across all child expressions + if not subdom <= dom and not subdom >= dom: + raise errors.ConsistencyError(f'domains {subdom} and {dom} are not related') + except TypeError as err: # compared literal vs. node + raise errors.ConsistencyError(f'domains {subdom} and {dom} are not of the same type') from err - def _all(self, node: ast.filter.All, subject: bsc.types._Vertex): - return self.__branch(node, subject) + try: + # determine overall range + if rng is None or subrng > rng: # pick most generic range + rng = subrng + # ranges must be related across all child expressions + if not subrng <= rng and not subrng >= rng: + raise errors.ConsistencyError(f'ranges {subrng} and {rng} are not related') + except TypeError as err: # compared literal vs. node + raise errors.ConsistencyError(f'ranges {subrng} and {rng} are not of the same type') from err + # check domain and range + if dom is None or rng is None: + # OneOf guarantees at least one expression, these two cases cannot happen + raise errors.UnreachableError() + # return domain and range + return dom, rng - def __agg(self, node: typing.Union[ast.filter.And, ast.filter.Or], subject: bsc.types._Vertex): + ## intermediates + + def _branch(self, type_: T_VERTEX, node: ast.filter._Branch): + # type is a Node + if not isinstance(type_, bsc.Node): + raise errors.ConsistencyError(f'expected a Node, found {type_}') + # type exists in the schema + # FIXME: Isn't it actually guaranteed that the type (except the root type) is part of the schema? + # all types can be traced back to (a) root_type, (b) predicate, or (c) manually set (e.g. in _is). + # For (a), we do (and have to) perform a check. For (c), the code base should be consistent throughout + # the module, so this is an assumption that has to be ensured in schema.Schema. For (b), we know (and + # check) that the predicate is in the schema, hence all node/literals derived from it are also in the + # schema by construction of the schema.Schema class. So, why do we check this every time? + if type_ not in self.schema.nodes(): + raise errors.ConsistencyError(f'node {type_} is not in the schema') + # predicate is valid + dom, rng = self._parse_predicate_expression(node.predicate) + # type_ is a subtype of the predicate's domain + if not type_ <= dom: + raise errors.ConsistencyError(f'expected type {dom} or subtype thereof, found {type_}') + # child expression is valid + self._parse_filter_expression(rng, node.expr) + + def _agg(self, type_: T_VERTEX, node: ast.filter._Agg): for expr in node: # child expression is valid - self._parse_filter_expression(expr, subject) - - def _and(self, node: ast.filter.And, subject: bsc.types._Vertex): - return self.__agg(node, subject) - - def _or(self, node: ast.filter.Or, subject: bsc.types._Vertex): - return self.__agg(node, subject) - + self._parse_filter_expression(type_, expr) - def _not(self, node: ast.filter.Not, subject: bsc.types._Vertex): + def _not(self, type_: T_VERTEX, node: ast.filter.Not): # child expression is valid - self._parse_filter_expression(node.expr, subject) - - - def _has(self, node: ast.filter.Has, subject: bsc.types._Vertex): - # subject is a node type - if not isinstance(subject, bsc.Node): - raise errors.ConsistencyError(f'Expected a node, found {subject}') - # subject exists in the schema - if subject not in self.schema.nodes: - raise errors.ConsistencyError(f'Invalid node type {subject}') + self._parse_filter_expression(type_, node.expr) + + def _has(self, type_: T_VERTEX, node: ast.filter.Has): + # type is a Node + if not isinstance(type_, bsc.Node): + raise errors.ConsistencyError(f'expected a Node, found {type_}') + # type exists in the schema + if type_ not in self.schema.nodes(): + raise errors.ConsistencyError(f'node {type_} is not in the schema') # predicate is valid - dom, rng = self._parse_predicate_expression(node.predicate) - # subject is a subtype of the predicate's domain - if not subject <= dom: - raise errors.ConsistencyError(f'Expected type {dom}, found {subject}') + dom, _= self._parse_predicate_expression(node.predicate) + # type_ is a subtype of the predicate's domain + if not type_ <= dom: + raise errors.ConsistencyError(f'expected type {dom}, found {type_}') # node.count is a numerical expression - self._parse_numerical_expression(node.count, self.schema.literal(ns.xsd.numerical)) - - - def _equals(self, node: ast.filter.Equals, subject: bsc.types._Vertex, numerical: bool = False): - # subject is a literal - #if not isinstance(subject, bsc.Literal): - # raise errors.ConsistencyError(f'Expected a literal, found {subject}') - if isinstance(subject, bsc.Node): - # FIXME: How to handle this case? - # FIXME: How to check if a NodeType is acceptable? - # FIXME: Maybe use flags to control what is expected as node identifiers? - from bsfs.graph.nodes import Nodes # FIXME - if not isinstance(node.value, Nodes) and not isinstance(node.value, URI): - raise errors.ConsistencyError(f'Expected a Nodes or URI, found {node.value}') - elif isinstance(subject, bsc.Literal): - # literal exists in the schema - if subject not in self.schema.literals: - raise errors.ConsistencyError(f'Invalid literal type {subject}') - else: - # FIXME: - raise errors.ConsistencyError(f'Expected a literal, found {subject}') - # node.value is numeric (if requested) - if numerical and not isinstance(node.value, float) and not isinstance(node.value, int): - raise errors.ConsistencyError(f'Expected a numerical value (int or float), found {node.value}') - # NOTE: We cannot check if node.value agrees with the subject since we don't know - # all literal types, their hierarchy, and how the backend converts datatypes. - - - def _substring(self, node: ast.filter.Substring, subject: bsc.types._Vertex): - # subject is a literal - if not isinstance(subject, bsc.Literal): - raise errors.ConsistencyError(f'Expected a literal, found {subject}') - # literal exists in the schema - if subject not in self.schema.literals: - raise errors.ConsistencyError(f'Invalid literal type {subject}') - # node.value matches literal datatype - if not subject.is_a(ns.xsd.string): - raise errors.ConsistencyError(f'Expected a string literal, found {subject}') - - - def _lessThan(self, node: ast.filter.LessThan, subject: bsc.types._Vertex): - # subject is a literal - if not isinstance(subject, bsc.Literal): - raise errors.ConsistencyError(f'Expected a literal, found {subject}') - # literal exists in the schema - if subject not in self.schema.literals: - raise errors.ConsistencyError(f'Invalid literal type {subject}') - # subject is numerical - if not subject.is_a(ns.xsd.numerical): - raise errors.ConsistencyError(f'Expected a numerical literal, found {subject}') - - - def _greaterThan(self, node: ast.filter.GreaterThan, subject: bsc.types._Vertex): - # subject is a literal - if not isinstance(subject, bsc.Literal): - raise errors.ConsistencyError(f'Expected a literal, found {subject}') - # literal exists in the schema - if subject not in self.schema.literals: - raise errors.ConsistencyError(f'Invalid literal type {subject}') - # subject is numerical - if not subject.is_a(ns.xsd.numerical): - raise errors.ConsistencyError(f'Expected a numerical literal, found {subject}') - - - def _predicate(self, node: ast.filter.Predicate): - try: - # predicate exists in the schema - pred = self.schema.predicate(node.predicate) - except KeyError: - raise errors.ConsistencyError(f'') # FIXME - if node.reverse: - return pred.range, pred.domain - else: - return pred.domain, pred.range - + # FIXME: We have to ensure that ns.xsd.integer is always known in the schema! + self._parse_filter_expression(self.schema.literal(ns.xsd.integer), node.count) + + + ## conditions + + def _is(self, type_: T_VERTEX, node: ast.filter.Is): # pylint: disable=unused-argument # (node) + if not isinstance(type_, bsc.Node): + raise errors.ConsistencyError(f'expected a Node, found {type_}') + if type_ not in self.schema.nodes(): + raise errors.ConsistencyError(f'node {type_} is not in the schema') + + def _value(self, type_: T_VERTEX, node: ast.filter._Value): # pylint: disable=unused-argument # (node) + # type is a literal + if not isinstance(type_, bsc.Literal): + raise errors.ConsistencyError(f'expected a Literal, found {type_}') + # type exists in the schema + if type_ not in self.schema.literals(): + raise errors.ConsistencyError(f'literal {type_} is not in the schema') + # FIXME: Check if node.value corresponds to type_ + # FIXME: A specific literal might be requested (i.e., a numeric type when used in Has) + + def _bounded(self, type_: T_VERTEX, node: ast.filter._Bounded): # pylint: disable=unused-argument # (node) + # type is a literal + if not isinstance(type_, bsc.Literal): + raise errors.ConsistencyError(f'expected a Literal, found {type_}') + # type exists in the schema + if type_ not in self.schema.literals(): + raise errors.ConsistencyError(f'literal {type_} is not in the schema') + # FIXME: Check if node.value corresponds to type_ - def _oneOf(self, node: ast.filter.OneOf): - dom, rng = None, None - for pred in node: - try: - # parse child expression - subdom, subrng = self._parse_predicate_expression(pred) - # domain and range must be related across all child expressions - if not subdom <= dom and not subdom >= dom: - raise errors.ConsistencyError(f'') # FIXME - if not subrng <= rng and not subrng >= rng: - raise errors.ConsistencyError(f'') # FIXME - # determine overall domain and range - if dom is None or subdom < dom: # pick most specific domain - dom = subdom - if rng is None or subrng > rng: # pick most generic range - rng = subrng - except KeyError: - raise errors.ConsistencyError(f'') - return dom, rng ## EOF ## |