diff options
author | Matthias Baumgartner <dev@igsor.net> | 2023-01-20 18:01:17 +0100 |
---|---|---|
committer | Matthias Baumgartner <dev@igsor.net> | 2023-01-20 18:01:17 +0100 |
commit | e2f08efc0d8a3c875994bdb69623c30cce5079d9 (patch) | |
tree | 0870ac597a55431e63a20e05bb11cf913a5f3e3d | |
parent | a4789394e40aaa3152ad6009955709a6c7d277c2 (diff) | |
download | bsfs-e2f08efc0d8a3c875994bdb69623c30cce5079d9.tar.gz bsfs-e2f08efc0d8a3c875994bdb69623c30cce5079d9.tar.bz2 bsfs-e2f08efc0d8a3c875994bdb69623c30cce5079d9.zip |
fetch AST validation
-rw-r--r-- | bsfs/query/validator.py | 123 | ||||
-rw-r--r-- | test/query/test_validator.py | 215 |
2 files changed, 336 insertions, 2 deletions
diff --git a/bsfs/query/validator.py b/bsfs/query/validator.py index 904ac14..9fbff12 100644 --- a/bsfs/query/validator.py +++ b/bsfs/query/validator.py @@ -49,7 +49,7 @@ class Filter(): """ # root_type must be a schema.Node if not isinstance(root_type, bsc.Node): - raise TypeError(f'Expected a node, found {typename(root_type)}') + raise TypeError(f'expected a node, found {typename(root_type)}') # root_type must exist in the schema if root_type not in self.schema.nodes(): raise errors.ConsistencyError(f'{root_type} is not defined in the schema') @@ -223,4 +223,125 @@ class Filter(): # FIXME: Check if node.value corresponds to type_ +class Fetch(): + """Validate a `bsfs.query.ast.fetch` query's structure and schema compliance. + + * Value can only be applied on literals + * Node can only be applied on nodes + * Names must be non-empty + * Branching nodes' predicates must match the type + * Symbols must be in the schema + * Predicates must follow the schema + + """ + + # schema to validate against. + schema: bsc.Schema + + def __init__(self, schema: bsc.Schema): + self.schema = schema + + def __call__(self, root_type: bsc.Node, query: ast.fetch.FetchExpression): + """Validate a fetch *query*, assuming the subject having *root_type*. + + Raises a `bsfs.utils.errors.ConsistencyError` if the query violates the schema. + Raises a `bsfs.utils.errors.BackendError` if the query structure is invalid. + + """ + # root_type must be a schema.Node + if not isinstance(root_type, bsc.Node): + raise TypeError(f'expected a node, found {typename(root_type)}') + # root_type must exist in the schema + if root_type not in self.schema.nodes(): + raise errors.ConsistencyError(f'{root_type} is not defined in the schema') + # query must be a FetchExpression + if not isinstance(query, ast.fetch.FetchExpression): + raise TypeError(f'expected a fetch expression, found {typename(query)}') + # check root expression + self._parse_fetch_expression(root_type, query) + # all tests passed + return True + + def _parse_fetch_expression(self, type_: bsc.Vertex, node: ast.fetch.FetchExpression): + """Route *node* to the handler of the respective FetchExpression subclass.""" + if isinstance(node, (ast.fetch.Fetch, ast.fetch.Value, ast.fetch.Node)): + # NOTE: don't return so that checks below are executed + self._branch(type_, node) + if isinstance(node, (ast.fetch.Value, ast.fetch.Node)): + # NOTE: don't return so that checks below are executed + self._named(type_, node) + if isinstance(node, ast.fetch.All): + return self._all(type_, node) + if isinstance(node, ast.fetch.Fetch): + return self._fetch(type_, node) + if isinstance(node, ast.fetch.Value): + return self._value(type_, node) + if isinstance(node, ast.fetch.Node): + return self._node(type_, node) + if isinstance(node, ast.fetch.This): + return self._this(type_, node) + # invalid node + raise errors.BackendError(f'expected fetch expression, found {node}') + + def _all(self, type_: bsc.Vertex, node: ast.fetch.All): + # check child expressions + for expr in node: + self._parse_fetch_expression(type_, expr) + + def _branch(self, type_: bsc.Vertex, node: ast.fetch._Branch): + # type is a node + if not isinstance(type_, bsc.Node): + raise errors.ConsistencyError(f'expected a Node, found {type_}') + # node exists in the schema + if type_ not in self.schema.nodes(): + raise errors.ConsistencyError(f'node {type_} is not in the schema') + # predicate exists in the schema + if not self.schema.has_predicate(node.predicate): + raise errors.ConsistencyError(f'predicate {node.predicate} is not in the schema') + pred = self.schema.predicate(node.predicate) + # type_ must be a subclass of domain + if not type_ <= pred.domain: + raise errors.ConsistencyError( + f'expected type {pred.domain} or subtype thereof, found {type_}') + + def _fetch(self, type_: bsc.Vertex, node: ast.fetch.Fetch): # pylint: disable=unused-argument # type_ was considered in _branch + # range must be a node + rng = self.schema.predicate(node.predicate).range + if not isinstance(rng, bsc.Node): + raise errors.ConsistencyError( + f'expected the predicate\'s range to be a Node, found {rng}') + # child expression must be valid + self._parse_fetch_expression(rng, node.expr) + + def _named(self, type_: bsc.Vertex, node: ast.fetch._Named): # pylint: disable=unused-argument # type_ was considered in _branch + # name must be set + if node.name.strip() == '': + raise errors.BackendError('node name cannot be empty') + # FIXME: check for double name use? + + def _node(self, type_: bsc.Vertex, node: ast.fetch.Node): # pylint: disable=unused-argument # type_ was considered in _branch + # range must be a node + rng = self.schema.predicate(node.predicate).range + if not isinstance(rng, bsc.Node): + raise errors.ConsistencyError( + f'expected the predicate\'s range to be a Node, found {rng}') + + def _value(self, type_: bsc.Vertex, node: ast.fetch.Value): # pylint: disable=unused-argument # type_ was considered in _branch + # range must be a literal + rng = self.schema.predicate(node.predicate).range + if not isinstance(rng, bsc.Literal): + raise errors.ConsistencyError( + f'expected the predicate\'s range to be a Literal, found {rng}') + + def _this(self, type_: bsc.Vertex, node: ast.fetch.This): + # type is a node + if not isinstance(type_, bsc.Node): + raise errors.ConsistencyError(f'expected a Node, found {type_}') + # node exists in the schema + if type_ not in self.schema.nodes(): + raise errors.ConsistencyError(f'node {type_} is not in the schema') + # name must be set + if node.name.strip() == '': + raise errors.BackendError('node name cannot be empty') + ## EOF ## diff --git a/test/query/test_validator.py b/test/query/test_validator.py index dc9d913..fec3d23 100644 --- a/test/query/test_validator.py +++ b/test/query/test_validator.py @@ -14,7 +14,7 @@ from bsfs.query import ast from bsfs.utils import errors # objects to test -from bsfs.query.validator import Filter +from bsfs.query.validator import Filter, Fetch ## code ## @@ -286,6 +286,219 @@ class TestFilter(unittest.TestCase): self.assertIsNone(self.validate._distance(self.schema.literal(ns.bsfs.Colors), ast.filter.Distance([1,2,3,4,5], 1, False))) +class TestFetch(unittest.TestCase): + def setUp(self): + self.schema = _schema.from_string(''' + prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#> + prefix xsd: <http://www.w3.org/2001/XMLSchema#> + + prefix bsfs: <http://bsfs.ai/schema/> + prefix bse: <http://bsfs.ai/schema/Entity#> + + bsfs:Entity rdfs:subClassOf bsfs:Node . + bsfs:Tag rdfs:subClassOf bsfs:Node . + xsd:string rdfs:subClassOf bsfs:Literal . + + bse:filename rdfs:subClassOf bsfs:Predicate ; + rdfs:domain bsfs:Node ; + rdfs:range xsd:string . + + bse:tag rdfs:subClassOf bsfs:Predicate ; + rdfs:domain bsfs:Entity ; + rdfs:range bsfs:Tag . + + bse:label rdfs:subClassOf bsfs:Predicate ; + rdfs:domain bsfs:Tag ; + rdfs:range xsd:string . + + ''') + self.validate = Fetch(self.schema) + + def test_call(self): + # call accepts correct expressions + self.assertTrue(self.validate(self.schema.node(ns.bsfs.Entity), + ast.fetch.Fetch(ns.bse.tag, ast.fetch.Value(ns.bse.label, 'value')))) + self.assertTrue(self.validate(self.schema.node(ns.bsfs.Entity), + ast.fetch.Fetch(ns.bse.tag, ast.fetch.This('this')))) + self.assertTrue(self.validate(self.schema.node(ns.bsfs.Entity), + ast.fetch.This('this'))) + self.assertTrue(self.validate(self.schema.node(ns.bsfs.Entity), + ast.fetch.All(ast.fetch.This('this'), ast.fetch.Node(ns.bse.tag, 'node'), ast.fetch.Value(ns.bse.filename, 'value')))) + # type must be a Node + self.assertRaises(TypeError, self.validate, 1234, ast.fetch.This('this')) + self.assertRaises(TypeError, self.validate, 'foobar', ast.fetch.This('this')) + self.assertRaises(TypeError, self.validate, self.schema.literal(ns.bsfs.Literal), ast.fetch.This('this')) + # type must be in the schema + self.assertRaises(errors.ConsistencyError, self.validate, self.schema.node(ns.bsfs.Node).child(ns.bsfs.Invalid), + ast.fetch.FetchExpression()) + # expression must be a fetch expression + self.assertRaises(TypeError, self.validate, self.schema.node(ns.bsfs.Entity), 1234) + self.assertRaises(TypeError, self.validate, self.schema.node(ns.bsfs.Entity), 'hello') + self.assertRaises(TypeError, self.validate, self.schema.node(ns.bsfs.Entity), ast.filter.FilterExpression()) + # expression must be valid + self.assertRaises(errors.ConsistencyError, self.validate, self.schema.node(ns.bsfs.Entity), + ast.fetch.Fetch(ns.bse.tag, ast.fetch.Node(ns.bse.label, 'node'))) + self.assertRaises(errors.ConsistencyError, self.validate, self.schema.node(ns.bsfs.Entity), + ast.fetch.Value(ns.bse.tag, 'value')) + + def test_routing(self): + # Node passes _branch, _named, and _node checks + self.assertRaises(errors.ConsistencyError, self.validate._parse_fetch_expression, self.schema.node(ns.bsfs.Node), + ast.fetch.Node(ns.bse.tag, 'node')) # fails in _branch + self.assertRaises(errors.BackendError, self.validate._parse_fetch_expression, self.schema.node(ns.bsfs.Entity), + ast.fetch.Node(ns.bse.tag, '')) # fails in _named + self.assertRaises(errors.ConsistencyError, self.validate._parse_fetch_expression, self.schema.node(ns.bsfs.Entity), + ast.fetch.Node(ns.bse.label, 'node')) # fails in _node + # Value passes _branch, _named, and _value checks + self.assertRaises(errors.ConsistencyError, self.validate._parse_fetch_expression, self.schema.node(ns.bsfs.Node), + ast.fetch.Value(ns.bse.label, 'value')) # fails in _branch + self.assertRaises(errors.BackendError, self.validate._parse_fetch_expression, self.schema.node(ns.bsfs.Entity), + ast.fetch.Value(ns.bse.filename, '')) # fails in _named + self.assertRaises(errors.ConsistencyError, self.validate._parse_fetch_expression, self.schema.node(ns.bsfs.Entity), + ast.fetch.Value(ns.bse.tag, 'value')) # fails in _value + # Fetch passes _branch and _fetch checks + self.assertRaises(errors.ConsistencyError, self.validate._parse_fetch_expression, self.schema.node(ns.bsfs.Node), + ast.fetch.Fetch(ns.bse.tag, ast.fetch.This('this'))) # fails in _branch + self.assertRaises(errors.ConsistencyError, self.validate._parse_fetch_expression, self.schema.node(ns.bsfs.Node), + ast.fetch.Fetch(ns.bse.filename, ast.fetch.This('this'))) # fails in _fetch + # invalid expressions cannot be parsed + type_ = self.schema.node(ns.bsfs.Node) + self.assertRaises(errors.BackendError, self.validate._parse_fetch_expression, type_, + ast.filter.FilterExpression()) + self.assertRaises(errors.BackendError, self.validate._parse_fetch_expression, type_, + 1234) + self.assertRaises(errors.BackendError, self.validate._parse_fetch_expression, type_, + 'hello world') + + def test_all(self): + # all accepts correct expressions + self.assertIsNone(self.validate._all(self.schema.node(ns.bsfs.Entity), + ast.fetch.All(ast.fetch.Value(ns.bse.filename, 'value'), ast.fetch.Node(ns.bse.tag, 'node')))) + # child expressions must be valid + self.assertRaises(errors.ConsistencyError, self.validate._all, self.schema.node(ns.bsfs.Entity), + ast.fetch.All(ast.fetch.Value(ns.bse.tag, 'value'))) + self.assertRaises(errors.ConsistencyError, self.validate._all, self.schema.node(ns.bsfs.Entity), + ast.fetch.All(ast.fetch.Value(ns.bse.filename, 'value'), ast.fetch.Node(ns.bse.filename, 'node'))) + self.assertRaises(errors.ConsistencyError, self.validate._all, self.schema.node(ns.bsfs.Entity), + ast.fetch.All(ast.fetch.Value(ns.bse.tag, 'value'), ast.fetch.Node(ns.bse.tag, 'node'))) + self.assertRaises(errors.ConsistencyError, self.validate._all, self.schema.node(ns.bsfs.Entity), + ast.fetch.All(ast.fetch.Value(ns.bse.tag, 'value'), ast.fetch.Node(ns.bse.filename, 'node'))) + + def test_branch(self): + # branch accepts correct expressions + self.assertIsNone(self.validate._branch(self.schema.node(ns.bsfs.Entity), + ast.fetch._Branch(ns.bse.filename))) + self.assertIsNone(self.validate._branch(self.schema.node(ns.bsfs.Entity), + ast.fetch.Fetch(ns.bse.tag, ast.fetch.This('this')))) + self.assertIsNone(self.validate._branch(self.schema.node(ns.bsfs.Entity), + ast.fetch.Value(ns.bse.filename, 'value'))) + self.assertIsNone(self.validate._branch(self.schema.node(ns.bsfs.Entity), + ast.fetch.Node(ns.bse.tag, 'node'))) + # type must be a node + self.assertRaises(errors.ConsistencyError, self.validate._branch, self.schema.literal(ns.bsfs.Literal), + ast.fetch._Branch(ns.bse.filename)) + self.assertRaises(errors.ConsistencyError, self.validate._branch, self.schema.literal(ns.bsfs.Literal), + ast.fetch.Fetch(ns.bse.tag, ast.fetch.This('this'))) + self.assertRaises(errors.ConsistencyError, self.validate._branch, self.schema.literal(ns.bsfs.Literal), + ast.fetch.Value(ns.bse.filename, 'value')) + self.assertRaises(errors.ConsistencyError, self.validate._branch, self.schema.literal(ns.bsfs.Literal), + ast.fetch.Node(ns.bse.tag, 'node')) + # type must be in the schema + self.assertRaises(errors.ConsistencyError, self.validate._branch, self.schema.node(ns.bsfs.Node).child(ns.bsfs.Invalid), + ast.fetch._Branch(ns.bse.filename)) + self.assertRaises(errors.ConsistencyError, self.validate._branch, self.schema.node(ns.bsfs.Node).child(ns.bsfs.Invalid), + ast.fetch.Fetch(ns.bse.tag, ast.fetch.This('this'))) + self.assertRaises(errors.ConsistencyError, self.validate._branch, self.schema.node(ns.bsfs.Node).child(ns.bsfs.Invalid), + ast.fetch.Value(ns.bse.filename, 'value')) + self.assertRaises(errors.ConsistencyError, self.validate._branch, self.schema.node(ns.bsfs.Node).child(ns.bsfs.Invalid), + ast.fetch.Node(ns.bse.tag, 'node')) + # predicate must be in the schema + self.assertRaises(errors.ConsistencyError, self.validate._branch, self.schema.node(ns.bsfs.Entity), + ast.fetch._Branch(ns.bse.invalid)) + self.assertRaises(errors.ConsistencyError, self.validate._branch, self.schema.node(ns.bsfs.Entity), + ast.fetch.Fetch(ns.bse.invalid, ast.fetch.This('this'))) + self.assertRaises(errors.ConsistencyError, self.validate._branch, self.schema.node(ns.bsfs.Entity), + ast.fetch.Value(ns.bse.invalid, 'value')) + self.assertRaises(errors.ConsistencyError, self.validate._branch, self.schema.node(ns.bsfs.Entity), + ast.fetch.Node(ns.bse.invalid, 'node')) + # predicate's domain must be related to the type + self.assertRaises(errors.ConsistencyError, self.validate._branch, self.schema.node(ns.bsfs.Entity), + ast.fetch._Branch(ns.bse.label)) + self.assertRaises(errors.ConsistencyError, self.validate._branch, self.schema.node(ns.bsfs.Entity), + ast.fetch.Fetch(ns.bse.label, ast.fetch.This('this'))) + self.assertRaises(errors.ConsistencyError, self.validate._branch, self.schema.node(ns.bsfs.Entity), + ast.fetch.Node(ns.bse.label, 'node')) + self.assertRaises(errors.ConsistencyError, self.validate._branch, self.schema.node(ns.bsfs.Entity), + ast.fetch.Value(ns.bse.label, 'value')) + # predicate's domain cannot be a supertype + self.assertRaises(errors.ConsistencyError, self.validate._branch, self.schema.node(ns.bsfs.Node), + ast.fetch._Branch(ns.bse.tag)) + self.assertRaises(errors.ConsistencyError, self.validate._branch, self.schema.node(ns.bsfs.Node), + ast.fetch.Fetch(ns.bse.tag, ast.fetch.This('this'))) + self.assertRaises(errors.ConsistencyError, self.validate._branch, self.schema.node(ns.bsfs.Node), + ast.fetch.Node(ns.bse.tag, 'node')) + self.assertRaises(errors.ConsistencyError, self.validate._branch, self.schema.node(ns.bsfs.Node), + ast.fetch.Value(ns.bse.tag, 'value')) + # predicate's domain can be a subtype + self.assertIsNone(self.validate._branch(self.schema.node(ns.bsfs.Entity), + ast.fetch._Branch(ns.bse.filename))) + self.assertIsNone(self.validate._branch(self.schema.node(ns.bsfs.Entity), + ast.fetch.Value(ns.bse.filename, 'value'))) + + def test_fetch(self): + # fetch accepts correct expressions + self.assertIsNone(self.validate._fetch(self.schema.node(ns.bsfs.Node), + ast.fetch.Fetch(ns.bse.tag, ast.fetch.Value(ns.bse.label, 'value')))) + # range must be a node + self.assertRaises(errors.ConsistencyError, self.validate._fetch, self.schema.node(ns.bsfs.Node), + ast.fetch.Fetch(ns.bse.filename, ast.fetch.This('this'))) + # child expression must be valid + self.assertRaises(errors.ConsistencyError, self.validate._fetch, self.schema.node(ns.bsfs.Node), + ast.fetch.Fetch(ns.bse.tag, ast.fetch.Node(ns.bse.label, 'node'))) + + def test_named(self): + # named accepts correct expressions + self.assertIsNone(self.validate._named(self.schema.node(ns.bsfs.Entity), + ast.fetch.Node(ns.bse.tag, 'node'))) + self.assertIsNone(self.validate._named(self.schema.node(ns.bsfs.Entity), + ast.fetch.Value(ns.bse.filename, 'value'))) + # name must be non-empty + self.assertRaises(errors.BackendError, self.validate._named, self.schema.node(ns.bsfs.Entity), + ast.fetch.Node(ns.bse.tag, '')) + self.assertRaises(errors.BackendError, self.validate._named, self.schema.node(ns.bsfs.Entity), + ast.fetch.Value(ns.bse.filename, '')) + + def test_node(self): + # node accepts correct expressions + self.assertIsNone(self.validate._node(self.schema.node(ns.bsfs.Entity), + ast.fetch.Node(ns.bse.tag, 'node'))) + # range must be a node + self.assertRaises(errors.ConsistencyError, self.validate._node, self.schema.node(ns.bsfs.Entity), + ast.fetch.Node(ns.bse.filename, 'node')) + + def test_value(self): + # value accepts correct expressions + self.assertIsNone(self.validate._value(self.schema.node(ns.bsfs.Entity), + ast.fetch.Value(ns.bse.filename, 'value'))) + # range must be a literal + self.assertRaises(errors.ConsistencyError, self.validate._value, self.schema.node(ns.bsfs.Entity), + ast.fetch.Value(ns.bse.tag, 'value')) + + def test_this(self): + # this accepts correct expressions + self.assertIsNone(self.validate._this(self.schema.node(ns.bsfs.Entity), ast.fetch.This('this'))) + # type must be a node + self.assertRaises(errors.ConsistencyError, self.validate._this, self.schema.literal(ns.bsfs.Literal), + ast.fetch.This('this')) + self.assertRaises(errors.ConsistencyError, self.validate._this, self.schema.predicate(ns.bsfs.Predicate), + ast.fetch.This('this')) + # type must be in the schema + self.assertRaises(errors.ConsistencyError, self.validate._this, self.schema.node(ns.bsfs.Node).child(ns.bsfs.Invalid), + ast.fetch.This('this')) + # name must be non-empty + self.assertRaises(errors.BackendError, self.validate._this, self.schema.node(ns.bsfs.Entity), ast.fetch.This('')) + + ## main ## if __name__ == '__main__': |