From a0f2308adcb226d28de3355bc7115a6d9b669462 Mon Sep 17 00:00:00 2001 From: Matthias Baumgartner Date: Mon, 19 Dec 2022 13:40:02 +0100 Subject: import fixes --- bsfs/graph/graph.py | 2 +- bsfs/query/validator.py | 177 ++++++++++++++++++++++++++++++++++++- bsfs/triple_store/base.py | 3 +- bsfs/triple_store/sparql/sparql.py | 2 +- test/triple_store/test_base.py | 3 + 5 files changed, 182 insertions(+), 5 deletions(-) diff --git a/bsfs/graph/graph.py b/bsfs/graph/graph.py index 10e5904..51fe75d 100644 --- a/bsfs/graph/graph.py +++ b/bsfs/graph/graph.py @@ -111,7 +111,7 @@ class Graph(): type_ = self.schema.node(node_type) return _nodes.Nodes(self._backend, self._user, type_, {guid}) - def get(self, node_type: URI, subject: ast.filter.FilterExpression) -> Nodes: + def get(self, node_type: URI, subject: ast.filter.FilterExpression) -> _nodes.Nodes: """Return a `Nodes` instance over all nodes of type *node_type* that match the *subject* query.""" raise NotImplementedError() diff --git a/bsfs/query/validator.py b/bsfs/query/validator.py index ac3789a..123b947 100644 --- a/bsfs/query/validator.py +++ b/bsfs/query/validator.py @@ -29,7 +29,180 @@ class Filter(): def __init__(self, schema: bsc.Schema): self.schema = schema - def parse(self, node: ast.filter.FilterExpression): - raise NotImplementedError() + def parse(self, node: ast.filter.FilterExpression, subject: bsc.types._Vertex): + # subject is a node type + if not isinstance(subject, bsc.Node): + raise errors.ConsistencyError(f'Expected a node, found {subject}') + # subject exists in the schema + if subject not in self.schema.nodes: + raise errors.ConsistencyError(f'Invalid node type {subject}') + # root expression is valid + self._parse(node, subject) + # all tests passed + return True + + + def _parse_numerical_expression(self, node: ast.filter.FilterExpression, subject: bsc.types._Vertex): + if isinstance(node, ast.filter.And): + return self._and(node, subject) + elif isinstance(node, ast.filter.Or): + return self._or(node, subject) + elif isinstance(node, ast.filter.LessThan): + return self._lessThan(node, subject) + elif isinstance(node, ast.filter.GreaterThan): + return self._greaterThan(node, subject) + elif isinstance(node, ast.filter.Equals): + return self._equals(node, subject, numerical=True) + else: + raise errors.ConsistencyError(f'Expected a numerical expression, found {node}') + + + def __branch(self, node: typing.Union[ast.filter.Any, ast.filter.And], subject: bsc.types._Vertex): + # subject is a node type + if not isinstance(subject, bsc.Node): + raise errors.ConsistencyError(f'Expected a node, found {subject}') + # subject exists in the schema + if subject not in self.schema.nodes: + raise errors.ConsistencyError(f'Invalid node type {subject}') + # predicate is valid + dom, rng = self._parse_predicate_expression(node.predicate) + # subject is a subtype of the predicate's domain + if not subject <= dom: + raise errors.ConsistencyError(f'Expected type {dom}, found {subject}') + # child expression is valid + self._parse_filter_expression(node.expr, rng) + + def _any(self, node: ast.filter.Any, subject: bsc.types._Vertex): + return self.__branch(node, subject) + + def _all(self, node: ast.filter.All, subject: bsc.types._Vertex): + return self.__branch(node, subject) + + + def __agg(self, node: typing.Union[ast.filter.And, ast.filter.Or], subject: bsc.types._Vertex): + for expr in node: + # child expression is valid + self._parse_filter_expression(expr, subject) + + def _and(self, node: ast.filter.And, subject: bsc.types._Vertex): + return self.__agg(node, subject) + + def _or(self, node: ast.filter.Or, subject: bsc.types._Vertex): + return self.__agg(node, subject) + + + def _not(self, node: ast.filter.Not, subject: bsc.types._Vertex): + # child expression is valid + self._parse_filter_expression(node.expr, subject) + + + def _has(self, node: ast.filter.Has, subject: bsc.types._Vertex): + # subject is a node type + if not isinstance(subject, bsc.Node): + raise errors.ConsistencyError(f'Expected a node, found {subject}') + # subject exists in the schema + if subject not in self.schema.nodes: + raise errors.ConsistencyError(f'Invalid node type {subject}') + # predicate is valid + dom, rng = self._parse_predicate_expression(node.predicate) + # subject is a subtype of the predicate's domain + if not subject <= dom: + raise errors.ConsistencyError(f'Expected type {dom}, found {subject}') + # node.count is a numerical expression + self._parse_numerical_expression(node.count, self.schema.literal(ns.xsd.numerical)) + + + def _equals(self, node: ast.filter.Equals, subject: bsc.types._Vertex, numerical: bool = False): + # subject is a literal + #if not isinstance(subject, bsc.Literal): + # raise errors.ConsistencyError(f'Expected a literal, found {subject}') + if isinstance(subject, bsc.Node): + # FIXME: How to handle this case? + # FIXME: How to check if a NodeType is acceptable? + # FIXME: Maybe use flags to control what is expected as node identifiers? + from bsfs.graph.nodes import Nodes # FIXME + if not isinstance(node.value, Nodes) and not isinstance(node.value, URI): + raise errors.ConsistencyError(f'Expected a Nodes or URI, found {node.value}') + elif isinstance(subject, bsc.Literal): + # literal exists in the schema + if subject not in self.schema.literals: + raise errors.ConsistencyError(f'Invalid literal type {subject}') + else: + # FIXME: + raise errors.ConsistencyError(f'Expected a literal, found {subject}') + # node.value is numeric (if requested) + if numerical and not isinstance(node.value, float) and not isinstance(node.value, int): + raise errors.ConsistencyError(f'Expected a numerical value (int or float), found {node.value}') + # NOTE: We cannot check if node.value agrees with the subject since we don't know + # all literal types, their hierarchy, and how the backend converts datatypes. + + + def _substring(self, node: ast.filter.Substring, subject: bsc.types._Vertex): + # subject is a literal + if not isinstance(subject, bsc.Literal): + raise errors.ConsistencyError(f'Expected a literal, found {subject}') + # literal exists in the schema + if subject not in self.schema.literals: + raise errors.ConsistencyError(f'Invalid literal type {subject}') + # node.value matches literal datatype + if not subject.is_a(ns.xsd.string): + raise errors.ConsistencyError(f'Expected a string literal, found {subject}') + + + def _lessThan(self, node: ast.filter.LessThan, subject: bsc.types._Vertex): + # subject is a literal + if not isinstance(subject, bsc.Literal): + raise errors.ConsistencyError(f'Expected a literal, found {subject}') + # literal exists in the schema + if subject not in self.schema.literals: + raise errors.ConsistencyError(f'Invalid literal type {subject}') + # subject is numerical + if not subject.is_a(ns.xsd.numerical): + raise errors.ConsistencyError(f'Expected a numerical literal, found {subject}') + + + def _greaterThan(self, node: ast.filter.GreaterThan, subject: bsc.types._Vertex): + # subject is a literal + if not isinstance(subject, bsc.Literal): + raise errors.ConsistencyError(f'Expected a literal, found {subject}') + # literal exists in the schema + if subject not in self.schema.literals: + raise errors.ConsistencyError(f'Invalid literal type {subject}') + # subject is numerical + if not subject.is_a(ns.xsd.numerical): + raise errors.ConsistencyError(f'Expected a numerical literal, found {subject}') + + + def _predicate(self, node: ast.filter.Predicate): + try: + # predicate exists in the schema + pred = self.schema.predicate(node.predicate) + except KeyError: + raise errors.ConsistencyError(f'') # FIXME + if node.reverse: + return pred.range, pred.domain + else: + return pred.domain, pred.range + + + def _oneOf(self, node: ast.filter.OneOf): + dom, rng = None, None + for pred in node: + try: + # parse child expression + subdom, subrng = self._parse_predicate_expression(pred) + # domain and range must be related across all child expressions + if not subdom <= dom and not subdom >= dom: + raise errors.ConsistencyError(f'') # FIXME + if not subrng <= rng and not subrng >= rng: + raise errors.ConsistencyError(f'') # FIXME + # determine overall domain and range + if dom is None or subdom < dom: # pick most specific domain + dom = subdom + if rng is None or subrng > rng: # pick most generic range + rng = subrng + except KeyError: + raise errors.ConsistencyError(f'') + return dom, rng ## EOF ## diff --git a/bsfs/triple_store/base.py b/bsfs/triple_store/base.py index 28ebb86..5ff9523 100644 --- a/bsfs/triple_store/base.py +++ b/bsfs/triple_store/base.py @@ -9,6 +9,7 @@ import abc import typing # inner-module imports +from bsfs.query import ast from bsfs.utils import URI, typename import bsfs.schema as _schema @@ -111,7 +112,7 @@ class TripleStoreBase(abc.ABC): @abc.abstractmethod def get( self, - node_type: bsc.Node, + node_type: _schema.Node, query: ast.filter.FilterExpression, ) -> typing.Iterator[URI]: """Return guids of nodes of type *node_type* that match the *query*.""" diff --git a/bsfs/triple_store/sparql/sparql.py b/bsfs/triple_store/sparql/sparql.py index fff540a..7172f34 100644 --- a/bsfs/triple_store/sparql/sparql.py +++ b/bsfs/triple_store/sparql/sparql.py @@ -15,7 +15,7 @@ from bsfs.query import ast from bsfs.utils import errors, URI # inner-module imports -from . import base +from .. import base # exports diff --git a/test/triple_store/test_base.py b/test/triple_store/test_base.py index a4b0559..a0c3260 100644 --- a/test/triple_store/test_base.py +++ b/test/triple_store/test_base.py @@ -35,6 +35,9 @@ class DummyBase(TripleStoreBase): def schema(self, schema): pass + def get(self, node_type, query): + pass + def exists(self, node_type, guids): pass -- cgit v1.2.3