aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorMatthias Baumgartner <dev@igsor.net>2023-01-20 18:01:17 +0100
committerMatthias Baumgartner <dev@igsor.net>2023-01-20 18:01:17 +0100
commite2f08efc0d8a3c875994bdb69623c30cce5079d9 (patch)
tree0870ac597a55431e63a20e05bb11cf913a5f3e3d
parenta4789394e40aaa3152ad6009955709a6c7d277c2 (diff)
downloadbsfs-e2f08efc0d8a3c875994bdb69623c30cce5079d9.tar.gz
bsfs-e2f08efc0d8a3c875994bdb69623c30cce5079d9.tar.bz2
bsfs-e2f08efc0d8a3c875994bdb69623c30cce5079d9.zip
fetch AST validation
-rw-r--r--bsfs/query/validator.py123
-rw-r--r--test/query/test_validator.py215
2 files changed, 336 insertions, 2 deletions
diff --git a/bsfs/query/validator.py b/bsfs/query/validator.py
index 904ac14..9fbff12 100644
--- a/bsfs/query/validator.py
+++ b/bsfs/query/validator.py
@@ -49,7 +49,7 @@ class Filter():
"""
# root_type must be a schema.Node
if not isinstance(root_type, bsc.Node):
- raise TypeError(f'Expected a node, found {typename(root_type)}')
+ raise TypeError(f'expected a node, found {typename(root_type)}')
# root_type must exist in the schema
if root_type not in self.schema.nodes():
raise errors.ConsistencyError(f'{root_type} is not defined in the schema')
@@ -223,4 +223,125 @@ class Filter():
# FIXME: Check if node.value corresponds to type_
+class Fetch():
+ """Validate a `bsfs.query.ast.fetch` query's structure and schema compliance.
+
+ * Value can only be applied on literals
+ * Node can only be applied on nodes
+ * Names must be non-empty
+ * Branching nodes' predicates must match the type
+ * Symbols must be in the schema
+ * Predicates must follow the schema
+
+ """
+
+ # schema to validate against.
+ schema: bsc.Schema
+
+ def __init__(self, schema: bsc.Schema):
+ self.schema = schema
+
+ def __call__(self, root_type: bsc.Node, query: ast.fetch.FetchExpression):
+ """Validate a fetch *query*, assuming the subject having *root_type*.
+
+ Raises a `bsfs.utils.errors.ConsistencyError` if the query violates the schema.
+ Raises a `bsfs.utils.errors.BackendError` if the query structure is invalid.
+
+ """
+ # root_type must be a schema.Node
+ if not isinstance(root_type, bsc.Node):
+ raise TypeError(f'expected a node, found {typename(root_type)}')
+ # root_type must exist in the schema
+ if root_type not in self.schema.nodes():
+ raise errors.ConsistencyError(f'{root_type} is not defined in the schema')
+ # query must be a FetchExpression
+ if not isinstance(query, ast.fetch.FetchExpression):
+ raise TypeError(f'expected a fetch expression, found {typename(query)}')
+ # check root expression
+ self._parse_fetch_expression(root_type, query)
+ # all tests passed
+ return True
+
+ def _parse_fetch_expression(self, type_: bsc.Vertex, node: ast.fetch.FetchExpression):
+ """Route *node* to the handler of the respective FetchExpression subclass."""
+ if isinstance(node, (ast.fetch.Fetch, ast.fetch.Value, ast.fetch.Node)):
+ # NOTE: don't return so that checks below are executed
+ self._branch(type_, node)
+ if isinstance(node, (ast.fetch.Value, ast.fetch.Node)):
+ # NOTE: don't return so that checks below are executed
+ self._named(type_, node)
+ if isinstance(node, ast.fetch.All):
+ return self._all(type_, node)
+ if isinstance(node, ast.fetch.Fetch):
+ return self._fetch(type_, node)
+ if isinstance(node, ast.fetch.Value):
+ return self._value(type_, node)
+ if isinstance(node, ast.fetch.Node):
+ return self._node(type_, node)
+ if isinstance(node, ast.fetch.This):
+ return self._this(type_, node)
+ # invalid node
+ raise errors.BackendError(f'expected fetch expression, found {node}')
+
+ def _all(self, type_: bsc.Vertex, node: ast.fetch.All):
+ # check child expressions
+ for expr in node:
+ self._parse_fetch_expression(type_, expr)
+
+ def _branch(self, type_: bsc.Vertex, node: ast.fetch._Branch):
+ # type is a node
+ if not isinstance(type_, bsc.Node):
+ raise errors.ConsistencyError(f'expected a Node, found {type_}')
+ # node exists in the schema
+ if type_ not in self.schema.nodes():
+ raise errors.ConsistencyError(f'node {type_} is not in the schema')
+ # predicate exists in the schema
+ if not self.schema.has_predicate(node.predicate):
+ raise errors.ConsistencyError(f'predicate {node.predicate} is not in the schema')
+ pred = self.schema.predicate(node.predicate)
+ # type_ must be a subclass of domain
+ if not type_ <= pred.domain:
+ raise errors.ConsistencyError(
+ f'expected type {pred.domain} or subtype thereof, found {type_}')
+
+ def _fetch(self, type_: bsc.Vertex, node: ast.fetch.Fetch): # pylint: disable=unused-argument # type_ was considered in _branch
+ # range must be a node
+ rng = self.schema.predicate(node.predicate).range
+ if not isinstance(rng, bsc.Node):
+ raise errors.ConsistencyError(
+ f'expected the predicate\'s range to be a Node, found {rng}')
+ # child expression must be valid
+ self._parse_fetch_expression(rng, node.expr)
+
+ def _named(self, type_: bsc.Vertex, node: ast.fetch._Named): # pylint: disable=unused-argument # type_ was considered in _branch
+ # name must be set
+ if node.name.strip() == '':
+ raise errors.BackendError('node name cannot be empty')
+ # FIXME: check for double name use?
+
+ def _node(self, type_: bsc.Vertex, node: ast.fetch.Node): # pylint: disable=unused-argument # type_ was considered in _branch
+ # range must be a node
+ rng = self.schema.predicate(node.predicate).range
+ if not isinstance(rng, bsc.Node):
+ raise errors.ConsistencyError(
+ f'expected the predicate\'s range to be a Node, found {rng}')
+
+ def _value(self, type_: bsc.Vertex, node: ast.fetch.Value): # pylint: disable=unused-argument # type_ was considered in _branch
+ # range must be a literal
+ rng = self.schema.predicate(node.predicate).range
+ if not isinstance(rng, bsc.Literal):
+ raise errors.ConsistencyError(
+ f'expected the predicate\'s range to be a Literal, found {rng}')
+
+ def _this(self, type_: bsc.Vertex, node: ast.fetch.This):
+ # type is a node
+ if not isinstance(type_, bsc.Node):
+ raise errors.ConsistencyError(f'expected a Node, found {type_}')
+ # node exists in the schema
+ if type_ not in self.schema.nodes():
+ raise errors.ConsistencyError(f'node {type_} is not in the schema')
+ # name must be set
+ if node.name.strip() == '':
+ raise errors.BackendError('node name cannot be empty')
+
## EOF ##
diff --git a/test/query/test_validator.py b/test/query/test_validator.py
index dc9d913..fec3d23 100644
--- a/test/query/test_validator.py
+++ b/test/query/test_validator.py
@@ -14,7 +14,7 @@ from bsfs.query import ast
from bsfs.utils import errors
# objects to test
-from bsfs.query.validator import Filter
+from bsfs.query.validator import Filter, Fetch
## code ##
@@ -286,6 +286,219 @@ class TestFilter(unittest.TestCase):
self.assertIsNone(self.validate._distance(self.schema.literal(ns.bsfs.Colors), ast.filter.Distance([1,2,3,4,5], 1, False)))
+class TestFetch(unittest.TestCase):
+ def setUp(self):
+ self.schema = _schema.from_string('''
+ prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#>
+ prefix xsd: <http://www.w3.org/2001/XMLSchema#>
+
+ prefix bsfs: <http://bsfs.ai/schema/>
+ prefix bse: <http://bsfs.ai/schema/Entity#>
+
+ bsfs:Entity rdfs:subClassOf bsfs:Node .
+ bsfs:Tag rdfs:subClassOf bsfs:Node .
+ xsd:string rdfs:subClassOf bsfs:Literal .
+
+ bse:filename rdfs:subClassOf bsfs:Predicate ;
+ rdfs:domain bsfs:Node ;
+ rdfs:range xsd:string .
+
+ bse:tag rdfs:subClassOf bsfs:Predicate ;
+ rdfs:domain bsfs:Entity ;
+ rdfs:range bsfs:Tag .
+
+ bse:label rdfs:subClassOf bsfs:Predicate ;
+ rdfs:domain bsfs:Tag ;
+ rdfs:range xsd:string .
+
+ ''')
+ self.validate = Fetch(self.schema)
+
+ def test_call(self):
+ # call accepts correct expressions
+ self.assertTrue(self.validate(self.schema.node(ns.bsfs.Entity),
+ ast.fetch.Fetch(ns.bse.tag, ast.fetch.Value(ns.bse.label, 'value'))))
+ self.assertTrue(self.validate(self.schema.node(ns.bsfs.Entity),
+ ast.fetch.Fetch(ns.bse.tag, ast.fetch.This('this'))))
+ self.assertTrue(self.validate(self.schema.node(ns.bsfs.Entity),
+ ast.fetch.This('this')))
+ self.assertTrue(self.validate(self.schema.node(ns.bsfs.Entity),
+ ast.fetch.All(ast.fetch.This('this'), ast.fetch.Node(ns.bse.tag, 'node'), ast.fetch.Value(ns.bse.filename, 'value'))))
+ # type must be a Node
+ self.assertRaises(TypeError, self.validate, 1234, ast.fetch.This('this'))
+ self.assertRaises(TypeError, self.validate, 'foobar', ast.fetch.This('this'))
+ self.assertRaises(TypeError, self.validate, self.schema.literal(ns.bsfs.Literal), ast.fetch.This('this'))
+ # type must be in the schema
+ self.assertRaises(errors.ConsistencyError, self.validate, self.schema.node(ns.bsfs.Node).child(ns.bsfs.Invalid),
+ ast.fetch.FetchExpression())
+ # expression must be a fetch expression
+ self.assertRaises(TypeError, self.validate, self.schema.node(ns.bsfs.Entity), 1234)
+ self.assertRaises(TypeError, self.validate, self.schema.node(ns.bsfs.Entity), 'hello')
+ self.assertRaises(TypeError, self.validate, self.schema.node(ns.bsfs.Entity), ast.filter.FilterExpression())
+ # expression must be valid
+ self.assertRaises(errors.ConsistencyError, self.validate, self.schema.node(ns.bsfs.Entity),
+ ast.fetch.Fetch(ns.bse.tag, ast.fetch.Node(ns.bse.label, 'node')))
+ self.assertRaises(errors.ConsistencyError, self.validate, self.schema.node(ns.bsfs.Entity),
+ ast.fetch.Value(ns.bse.tag, 'value'))
+
+ def test_routing(self):
+ # Node passes _branch, _named, and _node checks
+ self.assertRaises(errors.ConsistencyError, self.validate._parse_fetch_expression, self.schema.node(ns.bsfs.Node),
+ ast.fetch.Node(ns.bse.tag, 'node')) # fails in _branch
+ self.assertRaises(errors.BackendError, self.validate._parse_fetch_expression, self.schema.node(ns.bsfs.Entity),
+ ast.fetch.Node(ns.bse.tag, '')) # fails in _named
+ self.assertRaises(errors.ConsistencyError, self.validate._parse_fetch_expression, self.schema.node(ns.bsfs.Entity),
+ ast.fetch.Node(ns.bse.label, 'node')) # fails in _node
+ # Value passes _branch, _named, and _value checks
+ self.assertRaises(errors.ConsistencyError, self.validate._parse_fetch_expression, self.schema.node(ns.bsfs.Node),
+ ast.fetch.Value(ns.bse.label, 'value')) # fails in _branch
+ self.assertRaises(errors.BackendError, self.validate._parse_fetch_expression, self.schema.node(ns.bsfs.Entity),
+ ast.fetch.Value(ns.bse.filename, '')) # fails in _named
+ self.assertRaises(errors.ConsistencyError, self.validate._parse_fetch_expression, self.schema.node(ns.bsfs.Entity),
+ ast.fetch.Value(ns.bse.tag, 'value')) # fails in _value
+ # Fetch passes _branch and _fetch checks
+ self.assertRaises(errors.ConsistencyError, self.validate._parse_fetch_expression, self.schema.node(ns.bsfs.Node),
+ ast.fetch.Fetch(ns.bse.tag, ast.fetch.This('this'))) # fails in _branch
+ self.assertRaises(errors.ConsistencyError, self.validate._parse_fetch_expression, self.schema.node(ns.bsfs.Node),
+ ast.fetch.Fetch(ns.bse.filename, ast.fetch.This('this'))) # fails in _fetch
+ # invalid expressions cannot be parsed
+ type_ = self.schema.node(ns.bsfs.Node)
+ self.assertRaises(errors.BackendError, self.validate._parse_fetch_expression, type_,
+ ast.filter.FilterExpression())
+ self.assertRaises(errors.BackendError, self.validate._parse_fetch_expression, type_,
+ 1234)
+ self.assertRaises(errors.BackendError, self.validate._parse_fetch_expression, type_,
+ 'hello world')
+
+ def test_all(self):
+ # all accepts correct expressions
+ self.assertIsNone(self.validate._all(self.schema.node(ns.bsfs.Entity),
+ ast.fetch.All(ast.fetch.Value(ns.bse.filename, 'value'), ast.fetch.Node(ns.bse.tag, 'node'))))
+ # child expressions must be valid
+ self.assertRaises(errors.ConsistencyError, self.validate._all, self.schema.node(ns.bsfs.Entity),
+ ast.fetch.All(ast.fetch.Value(ns.bse.tag, 'value')))
+ self.assertRaises(errors.ConsistencyError, self.validate._all, self.schema.node(ns.bsfs.Entity),
+ ast.fetch.All(ast.fetch.Value(ns.bse.filename, 'value'), ast.fetch.Node(ns.bse.filename, 'node')))
+ self.assertRaises(errors.ConsistencyError, self.validate._all, self.schema.node(ns.bsfs.Entity),
+ ast.fetch.All(ast.fetch.Value(ns.bse.tag, 'value'), ast.fetch.Node(ns.bse.tag, 'node')))
+ self.assertRaises(errors.ConsistencyError, self.validate._all, self.schema.node(ns.bsfs.Entity),
+ ast.fetch.All(ast.fetch.Value(ns.bse.tag, 'value'), ast.fetch.Node(ns.bse.filename, 'node')))
+
+ def test_branch(self):
+ # branch accepts correct expressions
+ self.assertIsNone(self.validate._branch(self.schema.node(ns.bsfs.Entity),
+ ast.fetch._Branch(ns.bse.filename)))
+ self.assertIsNone(self.validate._branch(self.schema.node(ns.bsfs.Entity),
+ ast.fetch.Fetch(ns.bse.tag, ast.fetch.This('this'))))
+ self.assertIsNone(self.validate._branch(self.schema.node(ns.bsfs.Entity),
+ ast.fetch.Value(ns.bse.filename, 'value')))
+ self.assertIsNone(self.validate._branch(self.schema.node(ns.bsfs.Entity),
+ ast.fetch.Node(ns.bse.tag, 'node')))
+ # type must be a node
+ self.assertRaises(errors.ConsistencyError, self.validate._branch, self.schema.literal(ns.bsfs.Literal),
+ ast.fetch._Branch(ns.bse.filename))
+ self.assertRaises(errors.ConsistencyError, self.validate._branch, self.schema.literal(ns.bsfs.Literal),
+ ast.fetch.Fetch(ns.bse.tag, ast.fetch.This('this')))
+ self.assertRaises(errors.ConsistencyError, self.validate._branch, self.schema.literal(ns.bsfs.Literal),
+ ast.fetch.Value(ns.bse.filename, 'value'))
+ self.assertRaises(errors.ConsistencyError, self.validate._branch, self.schema.literal(ns.bsfs.Literal),
+ ast.fetch.Node(ns.bse.tag, 'node'))
+ # type must be in the schema
+ self.assertRaises(errors.ConsistencyError, self.validate._branch, self.schema.node(ns.bsfs.Node).child(ns.bsfs.Invalid),
+ ast.fetch._Branch(ns.bse.filename))
+ self.assertRaises(errors.ConsistencyError, self.validate._branch, self.schema.node(ns.bsfs.Node).child(ns.bsfs.Invalid),
+ ast.fetch.Fetch(ns.bse.tag, ast.fetch.This('this')))
+ self.assertRaises(errors.ConsistencyError, self.validate._branch, self.schema.node(ns.bsfs.Node).child(ns.bsfs.Invalid),
+ ast.fetch.Value(ns.bse.filename, 'value'))
+ self.assertRaises(errors.ConsistencyError, self.validate._branch, self.schema.node(ns.bsfs.Node).child(ns.bsfs.Invalid),
+ ast.fetch.Node(ns.bse.tag, 'node'))
+ # predicate must be in the schema
+ self.assertRaises(errors.ConsistencyError, self.validate._branch, self.schema.node(ns.bsfs.Entity),
+ ast.fetch._Branch(ns.bse.invalid))
+ self.assertRaises(errors.ConsistencyError, self.validate._branch, self.schema.node(ns.bsfs.Entity),
+ ast.fetch.Fetch(ns.bse.invalid, ast.fetch.This('this')))
+ self.assertRaises(errors.ConsistencyError, self.validate._branch, self.schema.node(ns.bsfs.Entity),
+ ast.fetch.Value(ns.bse.invalid, 'value'))
+ self.assertRaises(errors.ConsistencyError, self.validate._branch, self.schema.node(ns.bsfs.Entity),
+ ast.fetch.Node(ns.bse.invalid, 'node'))
+ # predicate's domain must be related to the type
+ self.assertRaises(errors.ConsistencyError, self.validate._branch, self.schema.node(ns.bsfs.Entity),
+ ast.fetch._Branch(ns.bse.label))
+ self.assertRaises(errors.ConsistencyError, self.validate._branch, self.schema.node(ns.bsfs.Entity),
+ ast.fetch.Fetch(ns.bse.label, ast.fetch.This('this')))
+ self.assertRaises(errors.ConsistencyError, self.validate._branch, self.schema.node(ns.bsfs.Entity),
+ ast.fetch.Node(ns.bse.label, 'node'))
+ self.assertRaises(errors.ConsistencyError, self.validate._branch, self.schema.node(ns.bsfs.Entity),
+ ast.fetch.Value(ns.bse.label, 'value'))
+ # predicate's domain cannot be a supertype
+ self.assertRaises(errors.ConsistencyError, self.validate._branch, self.schema.node(ns.bsfs.Node),
+ ast.fetch._Branch(ns.bse.tag))
+ self.assertRaises(errors.ConsistencyError, self.validate._branch, self.schema.node(ns.bsfs.Node),
+ ast.fetch.Fetch(ns.bse.tag, ast.fetch.This('this')))
+ self.assertRaises(errors.ConsistencyError, self.validate._branch, self.schema.node(ns.bsfs.Node),
+ ast.fetch.Node(ns.bse.tag, 'node'))
+ self.assertRaises(errors.ConsistencyError, self.validate._branch, self.schema.node(ns.bsfs.Node),
+ ast.fetch.Value(ns.bse.tag, 'value'))
+ # predicate's domain can be a subtype
+ self.assertIsNone(self.validate._branch(self.schema.node(ns.bsfs.Entity),
+ ast.fetch._Branch(ns.bse.filename)))
+ self.assertIsNone(self.validate._branch(self.schema.node(ns.bsfs.Entity),
+ ast.fetch.Value(ns.bse.filename, 'value')))
+
+ def test_fetch(self):
+ # fetch accepts correct expressions
+ self.assertIsNone(self.validate._fetch(self.schema.node(ns.bsfs.Node),
+ ast.fetch.Fetch(ns.bse.tag, ast.fetch.Value(ns.bse.label, 'value'))))
+ # range must be a node
+ self.assertRaises(errors.ConsistencyError, self.validate._fetch, self.schema.node(ns.bsfs.Node),
+ ast.fetch.Fetch(ns.bse.filename, ast.fetch.This('this')))
+ # child expression must be valid
+ self.assertRaises(errors.ConsistencyError, self.validate._fetch, self.schema.node(ns.bsfs.Node),
+ ast.fetch.Fetch(ns.bse.tag, ast.fetch.Node(ns.bse.label, 'node')))
+
+ def test_named(self):
+ # named accepts correct expressions
+ self.assertIsNone(self.validate._named(self.schema.node(ns.bsfs.Entity),
+ ast.fetch.Node(ns.bse.tag, 'node')))
+ self.assertIsNone(self.validate._named(self.schema.node(ns.bsfs.Entity),
+ ast.fetch.Value(ns.bse.filename, 'value')))
+ # name must be non-empty
+ self.assertRaises(errors.BackendError, self.validate._named, self.schema.node(ns.bsfs.Entity),
+ ast.fetch.Node(ns.bse.tag, ''))
+ self.assertRaises(errors.BackendError, self.validate._named, self.schema.node(ns.bsfs.Entity),
+ ast.fetch.Value(ns.bse.filename, ''))
+
+ def test_node(self):
+ # node accepts correct expressions
+ self.assertIsNone(self.validate._node(self.schema.node(ns.bsfs.Entity),
+ ast.fetch.Node(ns.bse.tag, 'node')))
+ # range must be a node
+ self.assertRaises(errors.ConsistencyError, self.validate._node, self.schema.node(ns.bsfs.Entity),
+ ast.fetch.Node(ns.bse.filename, 'node'))
+
+ def test_value(self):
+ # value accepts correct expressions
+ self.assertIsNone(self.validate._value(self.schema.node(ns.bsfs.Entity),
+ ast.fetch.Value(ns.bse.filename, 'value')))
+ # range must be a literal
+ self.assertRaises(errors.ConsistencyError, self.validate._value, self.schema.node(ns.bsfs.Entity),
+ ast.fetch.Value(ns.bse.tag, 'value'))
+
+ def test_this(self):
+ # this accepts correct expressions
+ self.assertIsNone(self.validate._this(self.schema.node(ns.bsfs.Entity), ast.fetch.This('this')))
+ # type must be a node
+ self.assertRaises(errors.ConsistencyError, self.validate._this, self.schema.literal(ns.bsfs.Literal),
+ ast.fetch.This('this'))
+ self.assertRaises(errors.ConsistencyError, self.validate._this, self.schema.predicate(ns.bsfs.Predicate),
+ ast.fetch.This('this'))
+ # type must be in the schema
+ self.assertRaises(errors.ConsistencyError, self.validate._this, self.schema.node(ns.bsfs.Node).child(ns.bsfs.Invalid),
+ ast.fetch.This('this'))
+ # name must be non-empty
+ self.assertRaises(errors.BackendError, self.validate._this, self.schema.node(ns.bsfs.Entity), ast.fetch.This(''))
+
+
## main ##
if __name__ == '__main__':