From 791918039979d0743fd2ea4b9a5e74593ff96fd0 Mon Sep 17 00:00:00 2001 From: Matthias Baumgartner Date: Mon, 19 Dec 2022 13:32:34 +0100 Subject: query ast file structures and essential interfaces --- bsfs/graph/graph.py | 5 + bsfs/query/__init__.py | 20 + bsfs/query/ast/__init__.py | 24 + bsfs/query/ast/filter_.py | 30 ++ bsfs/query/validator.py | 35 ++ bsfs/triple_store/base.py | 8 + bsfs/triple_store/sparql.py | 253 ----------- bsfs/triple_store/sparql/__init__.py | 18 + bsfs/triple_store/sparql/sparql.py | 256 +++++++++++ test/graph/test_graph.py | 2 + test/query/__init__.py | 0 test/query/ast/__init__.py | 0 test/query/ast/test_filter_.py | 28 ++ test/query/test_validator.py | 30 ++ test/triple_store/sparql/__init__.py | 0 test/triple_store/sparql/test_sparql.py | 771 ++++++++++++++++++++++++++++++++ test/triple_store/test_sparql.py | 769 ------------------------------- 17 files changed, 1227 insertions(+), 1022 deletions(-) create mode 100644 bsfs/query/__init__.py create mode 100644 bsfs/query/ast/__init__.py create mode 100644 bsfs/query/ast/filter_.py create mode 100644 bsfs/query/validator.py delete mode 100644 bsfs/triple_store/sparql.py create mode 100644 bsfs/triple_store/sparql/__init__.py create mode 100644 bsfs/triple_store/sparql/sparql.py create mode 100644 test/query/__init__.py create mode 100644 test/query/ast/__init__.py create mode 100644 test/query/ast/test_filter_.py create mode 100644 test/query/test_validator.py create mode 100644 test/triple_store/sparql/__init__.py create mode 100644 test/triple_store/sparql/test_sparql.py delete mode 100644 test/triple_store/test_sparql.py diff --git a/bsfs/graph/graph.py b/bsfs/graph/graph.py index b7b9f1c..10e5904 100644 --- a/bsfs/graph/graph.py +++ b/bsfs/graph/graph.py @@ -9,6 +9,7 @@ import os import typing # bsfs imports +from bsfs.query import ast from bsfs.schema import Schema from bsfs.triple_store import TripleStoreBase from bsfs.utils import URI, typename @@ -110,4 +111,8 @@ class Graph(): type_ = self.schema.node(node_type) return _nodes.Nodes(self._backend, self._user, type_, {guid}) + def get(self, node_type: URI, subject: ast.filter.FilterExpression) -> Nodes: + """Return a `Nodes` instance over all nodes of type *node_type* that match the *subject* query.""" + raise NotImplementedError() + ## EOF ## diff --git a/bsfs/query/__init__.py b/bsfs/query/__init__.py new file mode 100644 index 0000000..21c7389 --- /dev/null +++ b/bsfs/query/__init__.py @@ -0,0 +1,20 @@ +""" + +Part of the BlackStar filesystem (bsfs) module. +A copy of the license is provided with the project. +Author: Matthias Baumgartner, 2022 +""" +# imports +import typing + +# inner-module imports +from . import ast +from . import validator as validate + +# exports +__all__: typing.Sequence[str] = ( + 'ast', + 'validate', + ) + +## EOF ## diff --git a/bsfs/query/ast/__init__.py b/bsfs/query/ast/__init__.py new file mode 100644 index 0000000..0ee7385 --- /dev/null +++ b/bsfs/query/ast/__init__.py @@ -0,0 +1,24 @@ +"""Query AST components. + +The query AST consists of a Filter syntax tree. + +Classes beginning with an underscore (_) represent internal type hierarchies +and should not be used for parsing. Note that the AST structures do not +(and cannot) check semantic validity or consistency with a given schema. + +Part of the BlackStar filesystem (bsfs) module. +A copy of the license is provided with the project. +Author: Matthias Baumgartner, 2022 +""" +# imports +import typing + +# inner-module imports +from . import filter_ as filter + +# exports +__all__: typing.Sequence[str] = ( + 'filter', + ) + +## EOF ## diff --git a/bsfs/query/ast/filter_.py b/bsfs/query/ast/filter_.py new file mode 100644 index 0000000..4086fc1 --- /dev/null +++ b/bsfs/query/ast/filter_.py @@ -0,0 +1,30 @@ +"""Filter AST. + +Part of the BlackStar filesystem (bsfs) module. +A copy of the license is provided with the project. +Author: Matthias Baumgartner, 2022 +""" +# imports +from collections import abc +import typing + +# exports +__all__ : typing.Sequence[str] = [] + + +## code ## + +class _Expression(abc.Hashable): + def __repr__(self) -> str: + """Return the expressions's string representation.""" + return f'{typename(self)}()' + + def __hash__(self) -> int: + """Return the expression's integer representation.""" + return hash(type(self)) + + def __eq__(self, other: typing.Any) -> bool: + """Return True if *self* and *other* are equivalent.""" + return isinstance(other, type(self)) + +## EOF ## diff --git a/bsfs/query/validator.py b/bsfs/query/validator.py new file mode 100644 index 0000000..ac3789a --- /dev/null +++ b/bsfs/query/validator.py @@ -0,0 +1,35 @@ +""" + +Part of the BlackStar filesystem (bsfs) module. +A copy of the license is provided with the project. +Author: Matthias Baumgartner, 2022 +""" +# imports +import typing + +# bsfs imports +from bsfs import schema as bsc + +# inner-module imports +from . import ast + +# exports +__all__ : typing.Sequence[str] = ( + 'Filter', + ) + + +## code ## + +class Filter(): + + # schema to validate against. + schema: bsc.Schema + + def __init__(self, schema: bsc.Schema): + self.schema = schema + + def parse(self, node: ast.filter.FilterExpression): + raise NotImplementedError() + +## EOF ## diff --git a/bsfs/triple_store/base.py b/bsfs/triple_store/base.py index 6561262..28ebb86 100644 --- a/bsfs/triple_store/base.py +++ b/bsfs/triple_store/base.py @@ -108,6 +108,14 @@ class TripleStoreBase(abc.ABC): """ + @abc.abstractmethod + def get( + self, + node_type: bsc.Node, + query: ast.filter.FilterExpression, + ) -> typing.Iterator[URI]: + """Return guids of nodes of type *node_type* that match the *query*.""" + @abc.abstractmethod def exists( self, diff --git a/bsfs/triple_store/sparql.py b/bsfs/triple_store/sparql.py deleted file mode 100644 index 7516dff..0000000 --- a/bsfs/triple_store/sparql.py +++ /dev/null @@ -1,253 +0,0 @@ -""" - -Part of the BlackStar filesystem (bsfs) module. -A copy of the license is provided with the project. -Author: Matthias Baumgartner, 2022 -""" -# imports -import itertools -import typing -import rdflib - -# bsfs imports -from bsfs import schema as bsc -from bsfs.utils import errors, URI - -# inner-module imports -from . import base - - -# exports -__all__: typing.Sequence[str] = ( - 'SparqlStore', - ) - - -## code ## - -class _Transaction(): - """Lightweight rdflib transactions for in-memory databases.""" - - # graph instance. - _graph: rdflib.Graph - - # current log of added triples. - _added: typing.List[typing.Any] - - # current log of removed triples. - _removed: typing.List[typing.Any] - - def __init__(self, graph: rdflib.Graph): - self._graph = graph - # initialize internal structures - self.commit() - - def commit(self): - """Commit temporary changes.""" - self._added = [] - self._removed = [] - - def rollback(self): - """Undo changes since the last commit.""" - for triple in self._added: - self._graph.remove(triple) - for triple in self._removed: - self._graph.add(triple) - - def add(self, triple: typing.Any): - """Add a triple to the graph.""" - if triple not in self._graph: - self._added.append(triple) - self._graph.add(triple) - - def remove(self, triple: typing.Any): - """Remove a triple from the graph.""" - if triple in self._graph: - self._removed.append(triple) - self._graph.remove(triple) - - -class SparqlStore(base.TripleStoreBase): - """Sparql-based triple store. - - The sparql triple store uses a third-party backend - (currently rdflib) to store triples and manages them via - the Sparql query language. - - """ - - # The rdflib graph. - _graph: rdflib.Graph - - # Current transaction. - _transaction: _Transaction - - # The local schema. - _schema: bsc.Schema - - def __init__(self): - super().__init__(None) - self._graph = rdflib.Graph() - self._transaction = _Transaction(self._graph) - self._schema = bsc.Schema.Empty() - - # NOTE: mypy and pylint complain about the **kwargs not being listed (contrasting super) - # However, not having it here is clearer since it's explicit that there are no arguments. - @classmethod - def Open(cls) -> 'SparqlStore': # type: ignore [override] # pylint: disable=arguments-differ - return cls() - - def commit(self): - self._transaction.commit() - - def rollback(self): - self._transaction.rollback() - - @property - def schema(self) -> bsc.Schema: - return self._schema - - @schema.setter - def schema(self, schema: bsc.Schema): - # check args: Schema instanace - if not isinstance(schema, bsc.Schema): - raise TypeError(schema) - # check compatibility: No contradicting definitions - if not self.schema.consistent_with(schema): - raise errors.ConsistencyError(f'{schema} is inconsistent with {self.schema}') - - # commit the current transaction - self.commit() - - # adjust instances: - # nothing to do for added classes - # delete instances of removed classes - - # get deleted classes - sub = self.schema - schema - - # remove predicate instances - for pred in sub.predicates: - for src, trg in self._graph.subject_objects(rdflib.URIRef(pred.uri)): - self._transaction.remove((src, rdflib.URIRef(pred.uri), trg)) - - # remove node instances - for node in sub.nodes: - # iterate through node instances - for inst in self._graph.subjects(rdflib.RDF.type, rdflib.URIRef(node.uri)): - # remove triples where the instance is in the object position - for src, pred in self._graph.subject_predicates(inst): - self._transaction.remove((src, pred, inst)) - # remove triples where the instance is in the subject position - for pred, trg in self._graph.predicate_objects(inst): - self._transaction.remove((inst, pred, trg)) - # remove instance - self._transaction.remove((inst, rdflib.RDF.type, rdflib.URIRef(node.uri))) - - # NOTE: Nothing to do for literals - - # commit instance changes - self.commit() - - # migrate schema - self._schema = schema - - - def _has_type(self, subject: URI, node_type: bsc.Node) -> bool: - """Return True if *subject* is a node of class *node_type* or a subclass thereof.""" - if node_type not in self.schema.nodes(): - raise errors.ConsistencyError(f'{node_type} is not defined in the schema') - - subject_types = list(self._graph.objects(rdflib.URIRef(subject), rdflib.RDF.type)) - if len(subject_types) == 0: - return False - if len(subject_types) == 1: - node = self.schema.node(URI(subject_types[0])) # type: ignore [arg-type] # URI is a subtype of str - if node == node_type: - return True - if node_type in node.parents(): - return True - return False - raise errors.UnreachableError() - - def exists( - self, - node_type: bsc.Node, - guids: typing.Iterable[URI], - ) -> typing.Iterable[URI]: - return (subj for subj in guids if self._has_type(subj, node_type)) - - def create( - self, - node_type: bsc.Node, - guids: typing.Iterable[URI], - ): - # check node_type - if node_type not in self.schema.nodes(): - raise errors.ConsistencyError(f'{node_type} is not defined in the schema') - # check and create guids - for guid in guids: - subject = rdflib.URIRef(guid) - # check node existence - if (subject, rdflib.RDF.type, None) in self._graph: - # FIXME: node exists and may have a different type! ignore? raise? report? - continue - # add node - self._transaction.add((subject, rdflib.RDF.type, rdflib.URIRef(node_type.uri))) - - def set( - self, - node_type: bsc.Node, - guids: typing.Iterable[URI], - predicate: bsc.Predicate, - values: typing.Iterable[typing.Any], - ): - # check node_type - if node_type not in self.schema.nodes(): - raise errors.ConsistencyError(f'{node_type} is not defined in the schema') - # check predicate - if predicate not in self.schema.predicates(): - raise errors.ConsistencyError(f'{predicate} is not defined in the schema') - if not node_type <= predicate.domain: - raise errors.ConsistencyError(f'{node_type} must be a subclass of {predicate.domain}') - # NOTE: predicate.range is in the schema since predicate is in the schema. - # materialize values - values = set(values) - # check values - if len(values) == 0: - return - if predicate.unique and len(values) != 1: - raise ValueError(values) - if isinstance(predicate.range, bsc.Node): - values = set(values) # materialize to safeguard against iterators passed as argument - inconsistent = {val for val in values if not self._has_type(val, predicate.range)} - # catches nodes that don't exist and nodes that have an inconsistent type - if len(inconsistent) > 0: - raise errors.InstanceError(inconsistent) - # check guids - # FIXME: Fail or skip inexistent nodes? - guids = set(guids) - inconsistent = {guid for guid in guids if not self._has_type(guid, node_type)} - if len(inconsistent) > 0: - raise errors.InstanceError(inconsistent) - - # add triples - pred = rdflib.URIRef(predicate.uri) - for guid, value in itertools.product(guids, values): - guid = rdflib.URIRef(guid) - # convert value - if isinstance(predicate.range, bsc.Literal): - value = rdflib.Literal(value, datatype=rdflib.URIRef(predicate.range.uri)) - elif isinstance(predicate.range, bsc.Node): - value = rdflib.URIRef(value) - else: - raise errors.UnreachableError() - # clear triples for unique predicates - if predicate.unique: - for obj in self._graph.objects(guid, pred): - if obj != value: - self._transaction.remove((guid, pred, obj)) - # add triple - self._transaction.add((guid, pred, value)) - -## EOF ## diff --git a/bsfs/triple_store/sparql/__init__.py b/bsfs/triple_store/sparql/__init__.py new file mode 100644 index 0000000..285334a --- /dev/null +++ b/bsfs/triple_store/sparql/__init__.py @@ -0,0 +1,18 @@ +""" + +Part of the BlackStar filesystem (bsfs) module. +A copy of the license is provided with the project. +Author: Matthias Baumgartner, 2022 +""" +# imports +import typing + +# inner-module imports +from .sparql import SparqlStore + +# exports +__all__: typing.Sequence[str] = ( + 'SparqlStore', + ) + +## EOF ## diff --git a/bsfs/triple_store/sparql/sparql.py b/bsfs/triple_store/sparql/sparql.py new file mode 100644 index 0000000..fff540a --- /dev/null +++ b/bsfs/triple_store/sparql/sparql.py @@ -0,0 +1,256 @@ +""" + +Part of the BlackStar filesystem (bsfs) module. +A copy of the license is provided with the project. +Author: Matthias Baumgartner, 2022 +""" +# imports +import itertools +import typing +import rdflib + +# bsfs imports +from bsfs import schema as bsc +from bsfs.query import ast +from bsfs.utils import errors, URI + +# inner-module imports +from . import base + + +# exports +__all__: typing.Sequence[str] = ( + 'SparqlStore', + ) + + +## code ## + +class _Transaction(): + """Lightweight rdflib transactions for in-memory databases.""" + + # graph instance. + _graph: rdflib.Graph + + # current log of added triples. + _added: typing.List[typing.Any] + + # current log of removed triples. + _removed: typing.List[typing.Any] + + def __init__(self, graph: rdflib.Graph): + self._graph = graph + # initialize internal structures + self.commit() + + def commit(self): + """Commit temporary changes.""" + self._added = [] + self._removed = [] + + def rollback(self): + """Undo changes since the last commit.""" + for triple in self._added: + self._graph.remove(triple) + for triple in self._removed: + self._graph.add(triple) + + def add(self, triple: typing.Any): + """Add a triple to the graph.""" + if triple not in self._graph: + self._added.append(triple) + self._graph.add(triple) + + def remove(self, triple: typing.Any): + """Remove a triple from the graph.""" + if triple in self._graph: + self._removed.append(triple) + self._graph.remove(triple) + + +class SparqlStore(base.TripleStoreBase): + """Sparql-based triple store. + + The sparql triple store uses a third-party backend + (currently rdflib) to store triples and manages them via + the Sparql query language. + + """ + + # The rdflib graph. + _graph: rdflib.Graph + + # Current transaction. + _transaction: _Transaction + + # The local schema. + _schema: bsc.Schema + + def __init__(self): + super().__init__(None) + self._graph = rdflib.Graph() + self._transaction = _Transaction(self._graph) + self._schema = bsc.Schema.Empty() + + # NOTE: mypy and pylint complain about the **kwargs not being listed (contrasting super) + # However, not having it here is clearer since it's explicit that there are no arguments. + @classmethod + def Open(cls) -> 'SparqlStore': # type: ignore [override] # pylint: disable=arguments-differ + return cls() + + def commit(self): + self._transaction.commit() + + def rollback(self): + self._transaction.rollback() + + @property + def schema(self) -> bsc.Schema: + return self._schema + + @schema.setter + def schema(self, schema: bsc.Schema): + # check args: Schema instanace + if not isinstance(schema, bsc.Schema): + raise TypeError(schema) + # check compatibility: No contradicting definitions + if not self.schema.consistent_with(schema): + raise errors.ConsistencyError(f'{schema} is inconsistent with {self.schema}') + + # commit the current transaction + self.commit() + + # adjust instances: + # nothing to do for added classes + # delete instances of removed classes + + # get deleted classes + sub = self.schema - schema + + # remove predicate instances + for pred in sub.predicates: + for src, trg in self._graph.subject_objects(rdflib.URIRef(pred.uri)): + self._transaction.remove((src, rdflib.URIRef(pred.uri), trg)) + + # remove node instances + for node in sub.nodes: + # iterate through node instances + for inst in self._graph.subjects(rdflib.RDF.type, rdflib.URIRef(node.uri)): + # remove triples where the instance is in the object position + for src, pred in self._graph.subject_predicates(inst): + self._transaction.remove((src, pred, inst)) + # remove triples where the instance is in the subject position + for pred, trg in self._graph.predicate_objects(inst): + self._transaction.remove((inst, pred, trg)) + # remove instance + self._transaction.remove((inst, rdflib.RDF.type, rdflib.URIRef(node.uri))) + + # NOTE: Nothing to do for literals + + # commit instance changes + self.commit() + + # migrate schema + self._schema = schema + + def get(self, node_type: bsc.Node, query: ast.filter.FilterExpression) -> typing.Iterator[URI]: + raise NotImplementedError() + + def _has_type(self, subject: URI, node_type: bsc.Node) -> bool: + """Return True if *subject* is a node of class *node_type* or a subclass thereof.""" + if node_type not in self.schema.nodes(): + raise errors.ConsistencyError(f'{node_type} is not defined in the schema') + + subject_types = list(self._graph.objects(rdflib.URIRef(subject), rdflib.RDF.type)) + if len(subject_types) == 0: + return False + if len(subject_types) == 1: + node = self.schema.node(URI(subject_types[0])) # type: ignore [arg-type] # URI is a subtype of str + if node == node_type: + return True + if node_type in node.parents(): + return True + return False + raise errors.UnreachableError() + + def exists( + self, + node_type: bsc.Node, + guids: typing.Iterable[URI], + ) -> typing.Iterable[URI]: + return (subj for subj in guids if self._has_type(subj, node_type)) + + def create( + self, + node_type: bsc.Node, + guids: typing.Iterable[URI], + ): + # check node_type + if node_type not in self.schema.nodes(): + raise errors.ConsistencyError(f'{node_type} is not defined in the schema') + # check and create guids + for guid in guids: + subject = rdflib.URIRef(guid) + # check node existence + if (subject, rdflib.RDF.type, None) in self._graph: + # FIXME: node exists and may have a different type! ignore? raise? report? + continue + # add node + self._transaction.add((subject, rdflib.RDF.type, rdflib.URIRef(node_type.uri))) + + def set( + self, + node_type: bsc.Node, + guids: typing.Iterable[URI], + predicate: bsc.Predicate, + values: typing.Iterable[typing.Any], + ): + # check node_type + if node_type not in self.schema.nodes(): + raise errors.ConsistencyError(f'{node_type} is not defined in the schema') + # check predicate + if predicate not in self.schema.predicates(): + raise errors.ConsistencyError(f'{predicate} is not defined in the schema') + if not node_type <= predicate.domain: + raise errors.ConsistencyError(f'{node_type} must be a subclass of {predicate.domain}') + # NOTE: predicate.range is in the schema since predicate is in the schema. + # materialize values + values = set(values) + # check values + if len(values) == 0: + return + if predicate.unique and len(values) != 1: + raise ValueError(values) + if isinstance(predicate.range, bsc.Node): + values = set(values) # materialize to safeguard against iterators passed as argument + inconsistent = {val for val in values if not self._has_type(val, predicate.range)} + # catches nodes that don't exist and nodes that have an inconsistent type + if len(inconsistent) > 0: + raise errors.InstanceError(inconsistent) + # check guids + # FIXME: Fail or skip inexistent nodes? + guids = set(guids) + inconsistent = {guid for guid in guids if not self._has_type(guid, node_type)} + if len(inconsistent) > 0: + raise errors.InstanceError(inconsistent) + + # add triples + pred = rdflib.URIRef(predicate.uri) + for guid, value in itertools.product(guids, values): + guid = rdflib.URIRef(guid) + # convert value + if isinstance(predicate.range, bsc.Literal): + value = rdflib.Literal(value, datatype=rdflib.URIRef(predicate.range.uri)) + elif isinstance(predicate.range, bsc.Node): + value = rdflib.URIRef(value) + else: + raise errors.UnreachableError() + # clear triples for unique predicates + if predicate.unique: + for obj in self._graph.objects(guid, pred): + if obj != value: + self._transaction.remove((guid, pred, obj)) + # add triple + self._transaction.add((guid, pred, value)) + +## EOF ## diff --git a/test/graph/test_graph.py b/test/graph/test_graph.py index 33cf6aa..0a3fd5b 100644 --- a/test/graph/test_graph.py +++ b/test/graph/test_graph.py @@ -192,6 +192,8 @@ class TestGraph(unittest.TestCase): bsfs:unique "true"^^xsd:boolean . ''')) + def test_get(self): + raise NotImplementedError() ## main ## diff --git a/test/query/__init__.py b/test/query/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/test/query/ast/__init__.py b/test/query/ast/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/test/query/ast/test_filter_.py b/test/query/ast/test_filter_.py new file mode 100644 index 0000000..cc815e3 --- /dev/null +++ b/test/query/ast/test_filter_.py @@ -0,0 +1,28 @@ +""" + +Part of the tagit test suite. +A copy of the license is provided with the project. +Author: Matthias Baumgartner, 2022 +""" +# imports +import unittest + +# bsfs imports + +# objects to test +from bsfs.query.ast.filter_ import _Expression + + +## code ## + +class TestExpression(unittest.TestCase): + def test_essentials(self): + raise NotImplementedError() + + +## main ## + +if __name__ == '__main__': + unittest.main() + +## EOF ## diff --git a/test/query/test_validator.py b/test/query/test_validator.py new file mode 100644 index 0000000..0e88ad3 --- /dev/null +++ b/test/query/test_validator.py @@ -0,0 +1,30 @@ +""" + +Part of the tagit test suite. +A copy of the license is provided with the project. +Author: Matthias Baumgartner, 2022 +""" +# imports +import unittest + +# bsfs imports + +# objects to test +from bsfs.query.validator import Filter + + +## code ## + +class TestFilter(unittest.TestCase): + def test_parse(self): + raise NotImplementedError() + + # FIXME: subtests for individual functions + + +## main ## + +if __name__ == '__main__': + unittest.main() + +## EOF ## diff --git a/test/triple_store/sparql/__init__.py b/test/triple_store/sparql/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/test/triple_store/sparql/test_sparql.py b/test/triple_store/sparql/test_sparql.py new file mode 100644 index 0000000..0bf664a --- /dev/null +++ b/test/triple_store/sparql/test_sparql.py @@ -0,0 +1,771 @@ +""" + +Part of the bsfs test suite. +A copy of the license is provided with the project. +Author: Matthias Baumgartner, 2022 +""" +# imports +import rdflib +import unittest + +# bsie imports +from bsfs import schema as _schema +from bsfs.namespace import ns +from bsfs.utils import errors, URI + +# objects to test +from bsfs.triple_store.sparql.sparql import SparqlStore + + +## code ## + +class TestSparqlStore(unittest.TestCase): + def setUp(self): + self.schema = _schema.Schema.from_string(''' + prefix rdfs: + prefix xsd: + + prefix bsfs: + prefix bse: + + bsfs:Entity rdfs:subClassOf bsfs:Node . + bsfs:Tag rdfs:subClassOf bsfs:Node . + bsfs:User rdfs:subClassOf bsfs:Node . + xsd:string rdfs:subClassOf bsfs:Literal . + xsd:integer rdfs:subClassOf bsfs:Literal . + + # non-unique literal + bse:comment rdfs:subClassOf bsfs:Predicate ; + rdfs:domain bsfs:Entity ; + rdfs:range xsd:string ; + bsfs:unique "false"^^xsd:boolean . + + # unique literal + bse:filesize rdfs:subClassOf bsfs:Predicate ; + rdfs:domain bsfs:Entity ; + rdfs:range xsd:integer ; + bsfs:unique "true"^^xsd:boolean . + + # non-unique node + bse:tag rdfs:subClassOf bsfs:Predicate ; + rdfs:domain bsfs:Entity ; + rdfs:range bsfs:Tag ; + bsfs:unique "false"^^xsd:boolean . + + # unique node + bse:author rdfs:subClassOf bsfs:Predicate ; + rdfs:domain bsfs:Entity ; + rdfs:range bsfs:User ; + bsfs:unique "true"^^xsd:boolean . + + ''') + + def test_essentials(self): + store = SparqlStore.Open() + # equality + self.assertEqual(store, store) + self.assertEqual(hash(store), hash(store)) + self.assertNotEqual(store, SparqlStore.Open()) + self.assertNotEqual(hash(store), hash(SparqlStore.Open())) + # string conversion + self.assertEqual(str(store), 'SparqlStore(uri=None)') + self.assertEqual(repr(store), 'SparqlStore(uri=None)') + # open + self.assertIsInstance(SparqlStore.Open(), SparqlStore) + + + def test__has_type(self): + # setup store + store = SparqlStore.Open() + store.schema = _schema.Schema.from_string(''' + prefix rdfs: + prefix xsd: + prefix bsfs: + + bsfs:Entity rdfs:subClassOf bsfs:Node . + bsfs:Document rdfs:subClassOf bsfs:Entity . + bsfs:Image rdfs:subClassOf bsfs:Entity . + bsfs:PDF rdfs:subClassOf bsfs:Document . + + ''') + # add some instances + store.create(store.schema.node(ns.bsfs.Entity), {URI('http://example.com/me/entity#1234')}) + store.create(store.schema.node(ns.bsfs.Document), {URI('http://example.com/me/document#1234')}) + store.create(store.schema.node(ns.bsfs.Image), {URI('http://example.com/me/image#1234')}) + store.create(store.schema.node(ns.bsfs.PDF), {URI('http://example.com/me/pdf#1234')}) + + # node_type must be in the schema + self.assertRaises(errors.ConsistencyError, store._has_type, URI('http://example.com/me/entity#1234'), store.schema.node(ns.bsfs.Node).get_child(ns.bsfs.invalid)) + + # returns False on inexistent nodes + self.assertFalse(store._has_type(URI('http://example.com/me/entity#4321'), store.schema.node(ns.bsfs.Entity))) + self.assertFalse(store._has_type(URI('http://example.com/me/document#4321'), store.schema.node(ns.bsfs.Document))) + self.assertFalse(store._has_type(URI('http://example.com/me/image#4321'), store.schema.node(ns.bsfs.Image))) + self.assertFalse(store._has_type(URI('http://example.com/me/pdf#4321'), store.schema.node(ns.bsfs.PDF))) + + # _has_type checks direct types + self.assertTrue(store._has_type(URI('http://example.com/me/entity#1234'), store.schema.node(ns.bsfs.Entity))) + self.assertTrue(store._has_type(URI('http://example.com/me/document#1234'), store.schema.node(ns.bsfs.Document))) + self.assertTrue(store._has_type(URI('http://example.com/me/image#1234'), store.schema.node(ns.bsfs.Image))) + self.assertTrue(store._has_type(URI('http://example.com/me/pdf#1234'), store.schema.node(ns.bsfs.PDF))) + + # _has_type checks type hierarchy + self.assertFalse(store._has_type(URI('http://example.com/me/entity#1234'), store.schema.node(ns.bsfs.Document))) + self.assertFalse(store._has_type(URI('http://example.com/me/entity#1234'), store.schema.node(ns.bsfs.Image))) + self.assertFalse(store._has_type(URI('http://example.com/me/entity#1234'), store.schema.node(ns.bsfs.PDF))) + + self.assertTrue(store._has_type(URI('http://example.com/me/document#1234'), store.schema.node(ns.bsfs.Entity))) + self.assertFalse(store._has_type(URI('http://example.com/me/document#1234'), store.schema.node(ns.bsfs.Image))) + self.assertFalse(store._has_type(URI('http://example.com/me/document#1234'), store.schema.node(ns.bsfs.PDF))) + + self.assertTrue(store._has_type(URI('http://example.com/me/image#1234'), store.schema.node(ns.bsfs.Entity))) + self.assertFalse(store._has_type(URI('http://example.com/me/image#1234'), store.schema.node(ns.bsfs.Document))) + self.assertFalse(store._has_type(URI('http://example.com/me/image#1234'), store.schema.node(ns.bsfs.PDF))) + + self.assertTrue(store._has_type(URI('http://example.com/me/pdf#1234'), store.schema.node(ns.bsfs.Entity))) + self.assertTrue(store._has_type(URI('http://example.com/me/pdf#1234'), store.schema.node(ns.bsfs.Document))) + self.assertFalse(store._has_type(URI('http://example.com/me/pdf#1234'), store.schema.node(ns.bsfs.Image))) + + + def test_schema(self): + # setup + store = SparqlStore.Open() + curr = self.schema + p_comment = curr.predicate(ns.bse.comment) + p_filesize = curr.predicate(ns.bse.filesize) + p_tag = curr.predicate(ns.bse.tag) + p_author = curr.predicate(ns.bse.author) + + # migrate to an initial schema + store.schema = curr + # store has migrated + self.assertEqual(store.schema, curr) + + # add some instances + ent_ids = {URI('http://example.com/me/entity#1234'), URI('http://example.com/me/entity#4321')} + tag_ids = {URI('http://example.com/me/tag#1234'), URI('http://example.com/me/tag#4321')} + store.create(curr.node(ns.bsfs.Entity), ent_ids) + store.create(curr.node(ns.bsfs.Tag), tag_ids) + store.create(curr.node(ns.bsfs.User), {URI('http://example.com/me')}) + # add some triples + store.set(curr.node(ns.bsfs.Entity), ent_ids, p_comment, {'foo', 'bar'}) + store.set(curr.node(ns.bsfs.Entity), ent_ids, p_filesize, {1234}) + store.set(curr.node(ns.bsfs.Entity), ent_ids, p_tag, + {URI('http://example.com/me/tag#1234'), URI('http://example.com/me/tag#4321')}) + store.set(curr.node(ns.bsfs.Entity), ent_ids, p_author, + {URI('http://example.com/me')}) + # check instances + instances = { + # node instances + (rdflib.URIRef('http://example.com/me/entity#1234'), rdflib.RDF.type, rdflib.URIRef(ns.bsfs.Entity)), + (rdflib.URIRef('http://example.com/me/entity#4321'), rdflib.RDF.type, rdflib.URIRef(ns.bsfs.Entity)), + (rdflib.URIRef('http://example.com/me/tag#1234'), rdflib.RDF.type, rdflib.URIRef(ns.bsfs.Tag)), + (rdflib.URIRef('http://example.com/me/tag#4321'), rdflib.RDF.type, rdflib.URIRef(ns.bsfs.Tag)), + (rdflib.URIRef('http://example.com/me'), rdflib.RDF.type, rdflib.URIRef(ns.bsfs.User)), + # comments + (rdflib.URIRef('http://example.com/me/entity#1234'), rdflib.URIRef(p_comment.uri), rdflib.Literal('foo', datatype=rdflib.XSD.string)), + (rdflib.URIRef('http://example.com/me/entity#1234'), rdflib.URIRef(p_comment.uri), rdflib.Literal('bar', datatype=rdflib.XSD.string)), + (rdflib.URIRef('http://example.com/me/entity#4321'), rdflib.URIRef(p_comment.uri), rdflib.Literal('foo', datatype=rdflib.XSD.string)), + (rdflib.URIRef('http://example.com/me/entity#4321'), rdflib.URIRef(p_comment.uri), rdflib.Literal('bar', datatype=rdflib.XSD.string)), + # filesize + (rdflib.URIRef('http://example.com/me/entity#1234'), rdflib.URIRef(p_filesize.uri), rdflib.Literal('1234', datatype=rdflib.XSD.integer)), + (rdflib.URIRef('http://example.com/me/entity#4321'), rdflib.URIRef(p_filesize.uri), rdflib.Literal('1234', datatype=rdflib.XSD.integer)), + # tags + (rdflib.URIRef('http://example.com/me/entity#1234'), rdflib.URIRef(p_tag.uri), rdflib.URIRef('http://example.com/me/tag#1234')), + (rdflib.URIRef('http://example.com/me/entity#1234'), rdflib.URIRef(p_tag.uri), rdflib.URIRef('http://example.com/me/tag#4321')), + (rdflib.URIRef('http://example.com/me/entity#4321'), rdflib.URIRef(p_tag.uri), rdflib.URIRef('http://example.com/me/tag#1234')), + (rdflib.URIRef('http://example.com/me/entity#4321'), rdflib.URIRef(p_tag.uri), rdflib.URIRef('http://example.com/me/tag#4321')), + # author + (rdflib.URIRef('http://example.com/me/entity#1234'), rdflib.URIRef(p_author.uri), rdflib.URIRef('http://example.com/me')), + (rdflib.URIRef('http://example.com/me/entity#4321'), rdflib.URIRef(p_author.uri), rdflib.URIRef('http://example.com/me')), + } + self.assertSetEqual(set(store._graph), instances) + + # add some classes to the schema + curr = curr + _schema.Schema.from_string(''' + prefix rdfs: + prefix xsd: + prefix bsfs: + prefix bse: + prefix bst: + prefix bsc: + + bsfs:Entity rdfs:subClassOf bsfs:Node . + bsfs:Tag rdfs:subClassOf bsfs:Node . + bsfs:Collection rdfs:subClassOf bsfs:Node . + xsd:boolean rdfs:subClassOf bsfs:Literal . + + # literal + bse:shared rdfs:subClassOf bsfs:Predicate ; + rdfs:domain bsfs:Entity ; + rdfs:range xsd:boolean ; + bsfs:unique "true"^^xsd:boolean . + + # node + bse:partOf rdfs:subClassOf bsfs:Predicate ; + rdfs:domain bsfs:Entity ; + rdfs:range bsfs:Collection ; + bsfs:unique "false"^^xsd:boolean . + + # predicates across auxiliary node classes + bst:usedIn rdfs:subClassOf bsfs:Predicate ; + rdfs:domain bsfs:Tag ; + rdfs:range bsfs:Collection ; + bsfs:unique "false"^^xsd:boolean . + + bsc:tag rdfs:subClassOf bsfs:Predicate ; + rdfs:domain bsfs:Collection ; + rdfs:range bsfs:Tag ; + bsfs:unique "false"^^xsd:boolean . + + bst:principal rdfs:subClassOf bsfs:Predicate ; + rdfs:domain bsfs:Tag ; + rdfs:range bsfs:Node ; + bsfs:unique "true"^^xsd:boolean . + + ''') + # store migrated to the new schema + store.schema = curr + self.assertEqual(store.schema, curr) + # instances have not changed + self.assertSetEqual(set(store._graph), instances) + # add some instances of the new classes + p_partOf = curr.predicate(ns.bse.partOf) + p_shared = curr.predicate(ns.bse.shared) + p_usedIn = curr.predicate('http://bsfs.ai/schema/Tag#usedIn') + p_ctag = curr.predicate('http://bsfs.ai/schema/Collection#tag') + p_principal = curr.predicate('http://bsfs.ai/schema/Tag#principal') + store.create(curr.node(ns.bsfs.Collection), {URI('http://example.com/me/collection#1234'), URI('http://example.com/me/collection#4321')}) + # add some more triples + store.set(curr.node(ns.bsfs.Entity), ent_ids, p_shared, {True}) + store.set(curr.node(ns.bsfs.Entity), ent_ids, p_partOf, + {URI('http://example.com/me/collection#1234'), URI('http://example.com/me/collection#4321')}) + store.set(curr.node(ns.bsfs.Tag), {URI('http://example.com/me/tag#1234')}, p_usedIn, + {URI('http://example.com/me/collection#1234')}) + store.set(curr.node(ns.bsfs.Collection), {URI('http://example.com/me/collection#4321')}, p_ctag, + {URI('http://example.com/me/tag#1234'), URI('http://example.com/me/tag#4321')}) + store.set(curr.node(ns.bsfs.Tag), {URI('http://example.com/me/tag#1234')}, p_principal, + {URI('http://example.com/me/collection#1234')}) + # new instances are now in the graph + self.assertSetEqual(set(store._graph), instances | { + # collections + (rdflib.URIRef('http://example.com/me/collection#1234'), rdflib.RDF.type, rdflib.URIRef(ns.bsfs.Collection)), + (rdflib.URIRef('http://example.com/me/collection#4321'), rdflib.RDF.type, rdflib.URIRef(ns.bsfs.Collection)), + # partOf + (rdflib.URIRef('http://example.com/me/entity#1234'), rdflib.URIRef(p_partOf.uri), rdflib.URIRef('http://example.com/me/collection#1234')), + (rdflib.URIRef('http://example.com/me/entity#1234'), rdflib.URIRef(p_partOf.uri), rdflib.URIRef('http://example.com/me/collection#4321')), + (rdflib.URIRef('http://example.com/me/entity#4321'), rdflib.URIRef(p_partOf.uri), rdflib.URIRef('http://example.com/me/collection#1234')), + (rdflib.URIRef('http://example.com/me/entity#4321'), rdflib.URIRef(p_partOf.uri), rdflib.URIRef('http://example.com/me/collection#4321')), + # shared + (rdflib.URIRef('http://example.com/me/entity#1234'), rdflib.URIRef(p_shared.uri), rdflib.Literal('true', datatype=rdflib.XSD.boolean)), + (rdflib.URIRef('http://example.com/me/entity#4321'), rdflib.URIRef(p_shared.uri), rdflib.Literal('true', datatype=rdflib.XSD.boolean)), + # auxiliary node connections + (rdflib.URIRef('http://example.com/me/tag#1234'), rdflib.URIRef(p_usedIn.uri), rdflib.URIRef('http://example.com/me/collection#1234')), + (rdflib.URIRef('http://example.com/me/tag#1234'), rdflib.URIRef(p_principal.uri), rdflib.URIRef('http://example.com/me/collection#1234')), + (rdflib.URIRef('http://example.com/me/collection#4321'), rdflib.URIRef(p_ctag.uri), rdflib.URIRef('http://example.com/me/tag#1234')), + (rdflib.URIRef('http://example.com/me/collection#4321'), rdflib.URIRef(p_ctag.uri), rdflib.URIRef('http://example.com/me/tag#4321')), + }) + + + # remove some classes from the schema + curr = _schema.Schema.from_string(''' + prefix rdfs: + prefix xsd: + prefix bsfs: + prefix bse: + prefix bst: + + bsfs:Entity rdfs:subClassOf bsfs:Node . + bsfs:Tag rdfs:subClassOf bsfs:Node . + bsfs:User rdfs:subClassOf bsfs:Node . + + xsd:boolean rdfs:subClassOf bsfs:Literal . + xsd:integer rdfs:subClassOf bsfs:Literal . + + bse:filesize rdfs:subClassOf bsfs:Predicate ; + rdfs:domain bsfs:Entity ; + rdfs:range xsd:integer ; + bsfs:unique "true"^^xsd:boolean . + + bse:tag rdfs:subClassOf bsfs:Predicate ; + rdfs:domain bsfs:Entity ; + rdfs:range bsfs:Tag ; + bsfs:unique "false"^^xsd:boolean . + + bse:shared rdfs:subClassOf bsfs:Predicate ; + rdfs:domain bsfs:Entity ; + rdfs:range xsd:boolean ; + bsfs:unique "true"^^xsd:boolean . + + bst:principal rdfs:subClassOf bsfs:Predicate ; + rdfs:domain bsfs:Tag ; + rdfs:range bsfs:Node ; + bsfs:unique "true"^^xsd:boolean . + + # removed: bsfs:Collection + # removed: xsd:string + # removed: bse:comment (bsfs:Entity -> xsd:string) + # removed: bse:partOf (bsfs:Entity -> bsfs:Collection) + # removed: bse:author (bsfs:entity -> bsfs:User) + # removed: bst:usedIn (bsfs:Tag -> bsfs:Collection) + # removed: bsc:tag (bsfs:Collection -> bsfs:Tag) + + ''') + # store migrated to the new schema + store.schema = curr + self.assertEqual(store.schema, curr) + # instances of old classes were removed + self.assertSetEqual(set(store._graph), { + # node instances + (rdflib.URIRef('http://example.com/me/entity#1234'), rdflib.RDF.type, rdflib.URIRef(ns.bsfs.Entity)), + (rdflib.URIRef('http://example.com/me/entity#4321'), rdflib.RDF.type, rdflib.URIRef(ns.bsfs.Entity)), + (rdflib.URIRef('http://example.com/me/tag#1234'), rdflib.RDF.type, rdflib.URIRef(ns.bsfs.Tag)), + (rdflib.URIRef('http://example.com/me/tag#4321'), rdflib.RDF.type, rdflib.URIRef(ns.bsfs.Tag)), + (rdflib.URIRef('http://example.com/me'), rdflib.RDF.type, rdflib.URIRef(ns.bsfs.User)), + # filesize + (rdflib.URIRef('http://example.com/me/entity#1234'), rdflib.URIRef(p_filesize.uri), rdflib.Literal('1234', datatype=rdflib.XSD.integer)), + (rdflib.URIRef('http://example.com/me/entity#4321'), rdflib.URIRef(p_filesize.uri), rdflib.Literal('1234', datatype=rdflib.XSD.integer)), + # tags + (rdflib.URIRef('http://example.com/me/entity#1234'), rdflib.URIRef(p_tag.uri), rdflib.URIRef('http://example.com/me/tag#1234')), + (rdflib.URIRef('http://example.com/me/entity#1234'), rdflib.URIRef(p_tag.uri), rdflib.URIRef('http://example.com/me/tag#4321')), + (rdflib.URIRef('http://example.com/me/entity#4321'), rdflib.URIRef(p_tag.uri), rdflib.URIRef('http://example.com/me/tag#1234')), + (rdflib.URIRef('http://example.com/me/entity#4321'), rdflib.URIRef(p_tag.uri), rdflib.URIRef('http://example.com/me/tag#4321')), + # shared + (rdflib.URIRef('http://example.com/me/entity#1234'), rdflib.URIRef(p_shared.uri), rdflib.Literal('true', datatype=rdflib.XSD.boolean)), + (rdflib.URIRef('http://example.com/me/entity#4321'), rdflib.URIRef(p_shared.uri), rdflib.Literal('true', datatype=rdflib.XSD.boolean)), + }) + + # can only assign schema instances + self.assertRaises(TypeError, setattr, store, 'schema', None) + self.assertRaises(TypeError, setattr, store, 'schema', 1234) + self.assertRaises(TypeError, setattr, store, 'schema', 'foo') + class Foo(): pass + self.assertRaises(TypeError, setattr, store, 'schema', Foo()) + + # cannot migrate to incompatible schema + invalid = _schema.Schema.from_string(''' + prefix rdfs: + prefix xsd: + prefix bsfs: + prefix bse: + + bsfs:Entity rdfs:subClassOf bsfs:Node . + bsfs:Tag rdfs:subClassOf bsfs:Entity . # inconsistent with previous tag definition + + bse:tag rdfs:subClassOf bsfs:Predicate ; + rdfs:domain bsfs:Entity ; + rdfs:range bsfs:Tag ; + bsfs:unique "false"^^xsd:boolean . + + ''') + self.assertRaises(errors.ConsistencyError, setattr, store, 'schema', invalid) + invalid = _schema.Schema.from_string(''' + prefix rdfs: + prefix xsd: + prefix bsfs: + prefix bse: + + bsfs:Entity rdfs:subClassOf bsfs:Node . + bsfs:User rdfs:subClassOf bsfs:Node . + + # inconsistent predicate + bse:tag rdfs:subClassOf bsfs:Predicate ; + rdfs:domain bsfs:Entity ; + rdfs:range bsfs:User; + bsfs:unique "false"^^xsd:boolean . + + ''') + self.assertRaises(errors.ConsistencyError, setattr, store, 'schema', invalid) + + + def test_transaction(self): + # store setup + store = SparqlStore.Open() + store.schema = self.schema + p_tag = store.schema.predicate(ns.bse.tag) + p_filesize = store.schema.predicate(ns.bse.filesize) + # prepare node types + ent_type = store.schema.node(ns.bsfs.Entity) + tag_type = store.schema.node(ns.bsfs.Tag) + ent_ids = {URI('http://example.com/me/entity#1234'), URI('http://example.com/me/entity#4321')} + tag_ids = {URI('http://example.com/me/tag#1234'), URI('http://example.com/me/tag#4321')} + # target instances + instances = { + # node instances + (rdflib.URIRef('http://example.com/me/entity#1234'), rdflib.RDF.type, rdflib.URIRef(ns.bsfs.Entity)), + (rdflib.URIRef('http://example.com/me/entity#4321'), rdflib.RDF.type, rdflib.URIRef(ns.bsfs.Entity)), + (rdflib.URIRef('http://example.com/me/tag#1234'), rdflib.RDF.type, rdflib.URIRef(ns.bsfs.Tag)), + (rdflib.URIRef('http://example.com/me/tag#4321'), rdflib.RDF.type, rdflib.URIRef(ns.bsfs.Tag)), + # links + (rdflib.URIRef('http://example.com/me/entity#1234'), rdflib.URIRef(p_tag.uri), rdflib.URIRef('http://example.com/me/tag#1234')), + (rdflib.URIRef('http://example.com/me/entity#1234'), rdflib.URIRef(p_tag.uri), rdflib.URIRef('http://example.com/me/tag#4321')), + (rdflib.URIRef('http://example.com/me/entity#4321'), rdflib.URIRef(p_tag.uri), rdflib.URIRef('http://example.com/me/tag#1234')), + (rdflib.URIRef('http://example.com/me/entity#4321'), rdflib.URIRef(p_tag.uri), rdflib.URIRef('http://example.com/me/tag#4321')), + } + + # add some data + store.create(ent_type, ent_ids) + store.create(tag_type, tag_ids) + store.set(ent_type, ent_ids, p_tag, tag_ids) + store.set(ent_type, ent_ids, p_filesize, {1234}) + # current transaction is visible + self.assertSetEqual(set(store._graph), instances | { + (rdflib.URIRef('http://example.com/me/entity#1234'), rdflib.URIRef(p_filesize.uri), rdflib.Literal(1234, datatype=rdflib.XSD.integer)), + (rdflib.URIRef('http://example.com/me/entity#4321'), rdflib.URIRef(p_filesize.uri), rdflib.Literal(1234, datatype=rdflib.XSD.integer)), + }) + + # rollback undoes previous changes + store.rollback() + self.assertSetEqual(set(store._graph), set()) + + # add some data once more + store.create(ent_type, ent_ids) + store.create(tag_type, tag_ids) + store.set(ent_type, ent_ids, p_tag, tag_ids) + store.set(ent_type, ent_ids, p_filesize, {1234}) + # current transaction is visible + self.assertSetEqual(set(store._graph), instances | { + (rdflib.URIRef('http://example.com/me/entity#1234'), rdflib.URIRef(p_filesize.uri), rdflib.Literal(1234, datatype=rdflib.XSD.integer)), + (rdflib.URIRef('http://example.com/me/entity#4321'), rdflib.URIRef(p_filesize.uri), rdflib.Literal(1234, datatype=rdflib.XSD.integer)), + }) + + # commit saves changes + store.commit() + self.assertSetEqual(set(store._graph), instances | { + (rdflib.URIRef('http://example.com/me/entity#1234'), rdflib.URIRef(p_filesize.uri), rdflib.Literal(1234, datatype=rdflib.XSD.integer)), + (rdflib.URIRef('http://example.com/me/entity#4321'), rdflib.URIRef(p_filesize.uri), rdflib.Literal(1234, datatype=rdflib.XSD.integer)), + }) + + # add additional data + store.create(ent_type, {URI('http://example.com/me/entity#hello')}) + store.set(ent_type, {URI('http://example.com/me/entity#hello')}, p_tag, tag_ids) + store.set(ent_type, ent_ids, p_filesize, {4321}) + self.assertSetEqual(set(store._graph), instances | { + (rdflib.URIRef('http://example.com/me/entity#hello'), rdflib.RDF.type, rdflib.URIRef(ns.bsfs.Entity)), + (rdflib.URIRef('http://example.com/me/entity#hello'), rdflib.URIRef(p_tag.uri), rdflib.URIRef('http://example.com/me/tag#1234')), + (rdflib.URIRef('http://example.com/me/entity#hello'), rdflib.URIRef(p_tag.uri), rdflib.URIRef('http://example.com/me/tag#4321')), + (rdflib.URIRef('http://example.com/me/entity#1234'), rdflib.URIRef(p_filesize.uri), rdflib.Literal(4321, datatype=rdflib.XSD.integer)), + (rdflib.URIRef('http://example.com/me/entity#4321'), rdflib.URIRef(p_filesize.uri), rdflib.Literal(4321, datatype=rdflib.XSD.integer)), + }) + + # rollback undoes only changes since last commit + store.rollback() + self.assertSetEqual(set(store._graph), instances | { + (rdflib.URIRef('http://example.com/me/entity#1234'), rdflib.URIRef(p_filesize.uri), rdflib.Literal(1234, datatype=rdflib.XSD.integer)), + (rdflib.URIRef('http://example.com/me/entity#4321'), rdflib.URIRef(p_filesize.uri), rdflib.Literal(1234, datatype=rdflib.XSD.integer)), + }) + + def test_get(self): + raise NotImplementedError() + + def test_exists(self): + # store setup + store = SparqlStore.Open() + store.schema = self.schema + # prepare node types + ent_type = store.schema.node(ns.bsfs.Entity) + tag_type = store.schema.node(ns.bsfs.Tag) + # create node instances + ent_ids = { + URI('http://example.com/me/entity#1234'), + URI('http://example.com/me/entity#4321'), + } + tag_ids = { + URI('http://example.com/me/tag#1234'), + URI('http://example.com/me/tag#4321'), + } + store.create(ent_type, ent_ids) + store.create(tag_type, tag_ids) + + # exists returns all existing nodes of the correct type + self.assertSetEqual(ent_ids, set(store.exists(ent_type, ent_ids))) + self.assertSetEqual(tag_ids, set(store.exists(tag_type, tag_ids))) + # exists returns only nodes that match the type + self.assertSetEqual(set(), set(store.exists(ent_type, tag_ids))) + self.assertSetEqual({URI('http://example.com/me/entity#1234')}, set(store.exists(ent_type, { + URI('http://example.com/me/tag#1234'), + URI('http://example.com/me/entity#1234'), + }))) + # exists returns only nodes that exist + self.assertSetEqual(set(), set(store.exists(ent_type, { + URI('http://example.com/me/entity#foo'), + URI('http://example.com/me/entity#bar'), + }))) + self.assertSetEqual({URI('http://example.com/me/entity#1234')}, set(store.exists(ent_type, { + URI('http://example.com/me/entity#foo'), + URI('http://example.com/me/entity#1234'), + }))) + + + def test_create(self): + # setup + store = SparqlStore.Open() + store.schema = self.schema + + # node type must be valid + self.assertRaises(errors.ConsistencyError, store.create, self.schema.node(ns.bsfs.Entity).get_child(ns.bsfs.invalid), { + URI('http://example.com/me/entity#1234'), URI('http://example.com/me/entity#4321')}) + + # can create some nodes + ent_type = store.schema.node(ns.bsfs.Entity) + store.create(ent_type, {URI('http://example.com/me/entity#1234'), URI('http://example.com/me/entity#4321')}) + self.assertSetEqual(set(store._graph), { + (rdflib.URIRef('http://example.com/me/entity#1234'), rdflib.RDF.type, rdflib.URIRef(ns.bsfs.Entity)), + (rdflib.URIRef('http://example.com/me/entity#4321'), rdflib.RDF.type, rdflib.URIRef(ns.bsfs.Entity)), + }) + + # existing nodes are skipped + store.create(ent_type, {URI('http://example.com/me/entity#1234'), URI('http://example.com/me/entity#5678')}) + self.assertSetEqual(set(store._graph), { + # previous triples + (rdflib.URIRef('http://example.com/me/entity#1234'), rdflib.RDF.type, rdflib.URIRef(ns.bsfs.Entity)), + (rdflib.URIRef('http://example.com/me/entity#4321'), rdflib.RDF.type, rdflib.URIRef(ns.bsfs.Entity)), + # new triples + (rdflib.URIRef('http://example.com/me/entity#5678'), rdflib.RDF.type, rdflib.URIRef(ns.bsfs.Entity)), + }) + + # can create nodes of a different type + tag_type = store.schema.node(ns.bsfs.Tag) + store.create(tag_type, {URI('http://example.com/me/tag#1234'), URI('http://example.com/me/tag#4321')}) + self.assertSetEqual(set(store._graph), { + # previous triples + (rdflib.URIRef('http://example.com/me/entity#1234'), rdflib.RDF.type, rdflib.URIRef(ns.bsfs.Entity)), + (rdflib.URIRef('http://example.com/me/entity#4321'), rdflib.RDF.type, rdflib.URIRef(ns.bsfs.Entity)), + (rdflib.URIRef('http://example.com/me/entity#5678'), rdflib.RDF.type, rdflib.URIRef(ns.bsfs.Entity)), + # new triples + (rdflib.URIRef('http://example.com/me/tag#1234'), rdflib.RDF.type, rdflib.URIRef(ns.bsfs.Tag)), + (rdflib.URIRef('http://example.com/me/tag#4321'), rdflib.RDF.type, rdflib.URIRef(ns.bsfs.Tag)), + }) + + # creation does not change types of existing nodes + tag_type = store.schema.node(ns.bsfs.Tag) + store.create(tag_type, {URI('http://example.com/me/entity#1234'), URI('http://example.com/me/entity#4321')}) + self.assertSetEqual(set(store._graph), { + # previous triples + (rdflib.URIRef('http://example.com/me/entity#1234'), rdflib.RDF.type, rdflib.URIRef(ns.bsfs.Entity)), + (rdflib.URIRef('http://example.com/me/entity#4321'), rdflib.RDF.type, rdflib.URIRef(ns.bsfs.Entity)), + (rdflib.URIRef('http://example.com/me/entity#5678'), rdflib.RDF.type, rdflib.URIRef(ns.bsfs.Entity)), + # new triples + (rdflib.URIRef('http://example.com/me/tag#1234'), rdflib.RDF.type, rdflib.URIRef(ns.bsfs.Tag)), + (rdflib.URIRef('http://example.com/me/tag#4321'), rdflib.RDF.type, rdflib.URIRef(ns.bsfs.Tag)), + }) + + + def test_set(self): + # store setup + store = SparqlStore.Open() + store.schema = self.schema + # prepare node types + ent_type = store.schema.node(ns.bsfs.Entity) + user_type = store.schema.node(ns.bsfs.User) + tag_type = store.schema.node(ns.bsfs.Tag) + # prepare predicates + p_filesize = store.schema.predicate(ns.bse.filesize) + p_comment = store.schema.predicate(ns.bse.comment) + p_author = store.schema.predicate(ns.bse.author) + p_tag = store.schema.predicate(ns.bse.tag) + p_invalid = store.schema.predicate(ns.bsfs.Predicate).get_child(ns.bsfs.foo, range=store.schema.node(ns.bsfs.Tag)) + # create node instances + ent_ids = { + URI('http://example.com/me/entity#1234'), + URI('http://example.com/me/entity#4321'), + } + tag_ids = { + URI('http://example.com/me/tag#1234'), + URI('http://example.com/me/tag#4321'), + URI('http://example.com/me/tag#foo'), + URI('http://example.com/me/tag#bar'), + URI('http://example.com/me/tag#foobar'), + URI('http://example.com/me/tag#xyz'), + } + user_ids = { + URI('http://example.com/me/user#1234'), + URI('http://example.com/me/user#4321'), + } + store.create(ent_type, ent_ids) + store.create(tag_type, tag_ids) + store.create(user_type, user_ids) + + # invalid node_type is not permitted + self.assertRaises(errors.ConsistencyError, store.set, self.schema.node(ns.bsfs.Node).get_child(ns.bse.foo), + ent_ids, p_comment, {'hello world'}) + + # invalid predicate is not permitted + self.assertRaises(errors.ConsistencyError, store.set, ent_type, ent_ids, p_invalid, {'http://example.com/me/tag#1234'}) + + # predicate must match node_type + self.assertRaises(errors.ConsistencyError, store.set, tag_type, tag_ids, p_filesize, {1234}) + + # empty value does not change the graph + plen = len(store._graph) + store.set(ent_type, ent_ids, p_filesize, []) + store.set(ent_type, ent_ids, p_comment, []) + store.set(ent_type, ent_ids, p_author, []) + store.set(ent_type, ent_ids, p_tag, []) + self.assertEqual(plen, len(store._graph)) + + # cannot set multiple values on unique predicates + self.assertRaises(ValueError, store.set, ent_type, ent_ids, p_filesize, {1234, 4321}) + self.assertRaises(ValueError, store.set, ent_type, ent_ids, p_author, {URI('http://example.com/me/user#1234'), URI('http://example.com/me/user#4321')}) + + # value nodes must exist + self.assertRaises(errors.InstanceError, store.set, ent_type, ent_ids, p_author, {URI('http://example.com/me/user#invalid')}) + self.assertRaises(errors.InstanceError, store.set, ent_type, ent_ids, p_tag, {URI('http://example.com/me/tag#invalid')}) + + # value node types must be consistent with the predicate + self.assertRaises(errors.InstanceError, store.set, ent_type, ent_ids, p_author, {URI('http://example.com/me/entity#1234')}) + self.assertRaises(errors.InstanceError, store.set, ent_type, ent_ids, p_tag, {URI('http://example.com/me/entity#1234')}) + + # all value nodes must exist and be consistent + self.assertRaises(errors.InstanceError, store.set, ent_type, ent_ids, p_tag, { + URI('http://example.com/me/tag#1234'), URI('http://example.com/me/tag#invalid'), URI('http://example.com/me/entity#1234')}) + + + # set unique literal + store.set(ent_type, ent_ids, p_filesize, {1234}) + self.assertIn( + (rdflib.URIRef('http://example.com/me/entity#1234'), rdflib.URIRef(p_filesize.uri), rdflib.Literal('1234', datatype=rdflib.XSD.integer)), + set(store._graph)) + self.assertIn( + (rdflib.URIRef('http://example.com/me/entity#4321'), rdflib.URIRef(p_filesize.uri), rdflib.Literal('1234', datatype=rdflib.XSD.integer)), + set(store._graph)) + # re-assigning the same node changes nothing + store.set(ent_type, ent_ids, p_filesize, {1234}) + self.assertIn( + (rdflib.URIRef('http://example.com/me/entity#1234'), rdflib.URIRef(p_filesize.uri), rdflib.Literal('1234', datatype=rdflib.XSD.integer)), + set(store._graph)) + self.assertIn( + (rdflib.URIRef('http://example.com/me/entity#4321'), rdflib.URIRef(p_filesize.uri), rdflib.Literal('1234', datatype=rdflib.XSD.integer)), + set(store._graph)) + # cannot set multiple unique literals + self.assertRaises(ValueError, store.set, ent_type, ent_ids, p_filesize, {1234, 4321}) # same test as above + # unique literals are overwritten by set + store.set(ent_type, ent_ids, p_filesize, {4321}) + self.assertIn( + (rdflib.URIRef('http://example.com/me/entity#1234'), rdflib.URIRef(p_filesize.uri), rdflib.Literal('4321', datatype=rdflib.XSD.integer)), + set(store._graph)) + self.assertNotIn( + (rdflib.URIRef('http://example.com/me/entity#1234'), rdflib.URIRef(p_filesize.uri), rdflib.Literal('1234', datatype=rdflib.XSD.integer)), + set(store._graph)) + self.assertIn( + (rdflib.URIRef('http://example.com/me/entity#4321'), rdflib.URIRef(p_filesize.uri), rdflib.Literal('4321', datatype=rdflib.XSD.integer)), + set(store._graph)) + self.assertNotIn( + (rdflib.URIRef('http://example.com/me/entity#4321'), rdflib.URIRef(p_filesize.uri), rdflib.Literal('1234', datatype=rdflib.XSD.integer)), + set(store._graph)) + + # set non-unique literal + store.set(ent_type, ent_ids, p_comment, {'foobar'}) + self.assertTrue(set(store._graph).issuperset({ + (rdflib.URIRef('http://example.com/me/entity#1234'), rdflib.URIRef(p_comment.uri), rdflib.Literal('foobar', datatype=rdflib.XSD.string)), + (rdflib.URIRef('http://example.com/me/entity#4321'), rdflib.URIRef(p_comment.uri), rdflib.Literal('foobar', datatype=rdflib.XSD.string)), + })) + # re-assigning the same node changes nothing + store.set(ent_type, ent_ids, p_comment, {'foobar'}) + self.assertTrue(set(store._graph).issuperset({ + (rdflib.URIRef('http://example.com/me/entity#1234'), rdflib.URIRef(p_comment.uri), rdflib.Literal('foobar', datatype=rdflib.XSD.string)), + (rdflib.URIRef('http://example.com/me/entity#4321'), rdflib.URIRef(p_comment.uri), rdflib.Literal('foobar', datatype=rdflib.XSD.string)), + })) + # can set multiple non-unique literals at once + store.set(ent_type, ent_ids, p_comment, {'foo', 'bar'}) + self.assertTrue(set(store._graph).issuperset({ + (rdflib.URIRef('http://example.com/me/entity#1234'), rdflib.URIRef(p_comment.uri), rdflib.Literal('foo', datatype=rdflib.XSD.string)), + (rdflib.URIRef('http://example.com/me/entity#1234'), rdflib.URIRef(p_comment.uri), rdflib.Literal('bar', datatype=rdflib.XSD.string)), + (rdflib.URIRef('http://example.com/me/entity#4321'), rdflib.URIRef(p_comment.uri), rdflib.Literal('foo', datatype=rdflib.XSD.string)), + (rdflib.URIRef('http://example.com/me/entity#4321'), rdflib.URIRef(p_comment.uri), rdflib.Literal('bar', datatype=rdflib.XSD.string)), + })) + # non-unique literals are appended by set + store.set(ent_type, ent_ids, p_comment, {'hello world'}) + self.assertTrue(set(store._graph).issuperset({ + (rdflib.URIRef('http://example.com/me/entity#1234'), rdflib.URIRef(p_comment.uri), rdflib.Literal('foo', datatype=rdflib.XSD.string)), + (rdflib.URIRef('http://example.com/me/entity#1234'), rdflib.URIRef(p_comment.uri), rdflib.Literal('bar', datatype=rdflib.XSD.string)), + (rdflib.URIRef('http://example.com/me/entity#1234'), rdflib.URIRef(p_comment.uri), rdflib.Literal('hello world', datatype=rdflib.XSD.string)), + (rdflib.URIRef('http://example.com/me/entity#4321'), rdflib.URIRef(p_comment.uri), rdflib.Literal('foo', datatype=rdflib.XSD.string)), + (rdflib.URIRef('http://example.com/me/entity#4321'), rdflib.URIRef(p_comment.uri), rdflib.Literal('bar', datatype=rdflib.XSD.string)), + (rdflib.URIRef('http://example.com/me/entity#4321'), rdflib.URIRef(p_comment.uri), rdflib.Literal('hello world', datatype=rdflib.XSD.string)), + })) + + # set unique node + store.set(ent_type, ent_ids, p_author, {URI('http://example.com/me/user#1234')}) + self.assertIn( + (rdflib.URIRef('http://example.com/me/entity#1234'), rdflib.URIRef(p_author.uri), rdflib.URIRef('http://example.com/me/user#1234')), + set(store._graph)) + self.assertIn( + (rdflib.URIRef('http://example.com/me/entity#4321'), rdflib.URIRef(p_author.uri), rdflib.URIRef('http://example.com/me/user#1234')), + set(store._graph)) + # re-assigning the same node changes nothing + store.set(ent_type, ent_ids, p_author, {URI('http://example.com/me/user#1234')}) + self.assertIn( + (rdflib.URIRef('http://example.com/me/entity#1234'), rdflib.URIRef(p_author.uri), rdflib.URIRef('http://example.com/me/user#1234')), + set(store._graph)) + self.assertIn( + (rdflib.URIRef('http://example.com/me/entity#4321'), rdflib.URIRef(p_author.uri), rdflib.URIRef('http://example.com/me/user#1234')), + set(store._graph)) + # cannot set multiple unique nodes + self.assertRaises(ValueError, store.set, ent_type, ent_ids, p_author, {URI('http://example.com/me/user#1234'), URI('http://example.com/me/user#4321')}) + # unique nodes are overwritten by set + store.set(ent_type, ent_ids, p_author, {URI('http://example.com/me/user#4321')}) + self.assertIn( + (rdflib.URIRef('http://example.com/me/entity#1234'), rdflib.URIRef(p_author.uri), rdflib.URIRef('http://example.com/me/user#4321')), + set(store._graph)) + self.assertNotIn( + (rdflib.URIRef('http://example.com/me/entity#1234'), rdflib.URIRef(p_author.uri), rdflib.URIRef('http://example.com/me/user#1234')), + set(store._graph)) + self.assertIn( + (rdflib.URIRef('http://example.com/me/entity#4321'), rdflib.URIRef(p_author.uri), rdflib.URIRef('http://example.com/me/user#4321')), + set(store._graph)) + self.assertNotIn( + (rdflib.URIRef('http://example.com/me/entity#4321'), rdflib.URIRef(p_author.uri), rdflib.URIRef('http://example.com/me/user#1234')), + set(store._graph)) + + # set non-unique node + store.set(ent_type, ent_ids, p_tag, {'http://example.com/me/tag#foobar'}) + self.assertTrue(set(store._graph).issuperset({ + (rdflib.URIRef('http://example.com/me/entity#1234'), rdflib.URIRef(p_tag.uri), rdflib.URIRef('http://example.com/me/tag#foobar')), + (rdflib.URIRef('http://example.com/me/entity#4321'), rdflib.URIRef(p_tag.uri), rdflib.URIRef('http://example.com/me/tag#foobar')), + })) + # re-assigning the same node changes nothing + store.set(ent_type, ent_ids, p_tag, {'http://example.com/me/tag#foobar'}) + self.assertTrue(set(store._graph).issuperset({ + (rdflib.URIRef('http://example.com/me/entity#1234'), rdflib.URIRef(p_tag.uri), rdflib.URIRef('http://example.com/me/tag#foobar')), + (rdflib.URIRef('http://example.com/me/entity#4321'), rdflib.URIRef(p_tag.uri), rdflib.URIRef('http://example.com/me/tag#foobar')), + })) + # can set multiple non-unique literals at once + store.set(ent_type, ent_ids, p_tag, {'http://example.com/me/tag#1234', 'http://example.com/me/tag#4321'}) + self.assertTrue(set(store._graph).issuperset({ + (rdflib.URIRef('http://example.com/me/entity#1234'), rdflib.URIRef(p_tag.uri), rdflib.URIRef('http://example.com/me/tag#1234')), + (rdflib.URIRef('http://example.com/me/entity#1234'), rdflib.URIRef(p_tag.uri), rdflib.URIRef('http://example.com/me/tag#4321')), + (rdflib.URIRef('http://example.com/me/entity#4321'), rdflib.URIRef(p_tag.uri), rdflib.URIRef('http://example.com/me/tag#1234')), + (rdflib.URIRef('http://example.com/me/entity#4321'), rdflib.URIRef(p_tag.uri), rdflib.URIRef('http://example.com/me/tag#4321')), + })) + # non-unique nodes are appended by set + store.set(ent_type, ent_ids, p_tag, {'http://example.com/me/tag#foo', 'http://example.com/me/tag#bar'}) + self.assertTrue(set(store._graph).issuperset({ + (rdflib.URIRef('http://example.com/me/entity#1234'), rdflib.URIRef(p_tag.uri), rdflib.URIRef('http://example.com/me/tag#1234')), + (rdflib.URIRef('http://example.com/me/entity#1234'), rdflib.URIRef(p_tag.uri), rdflib.URIRef('http://example.com/me/tag#4321')), + (rdflib.URIRef('http://example.com/me/entity#1234'), rdflib.URIRef(p_tag.uri), rdflib.URIRef('http://example.com/me/tag#foo')), + (rdflib.URIRef('http://example.com/me/entity#1234'), rdflib.URIRef(p_tag.uri), rdflib.URIRef('http://example.com/me/tag#bar')), + (rdflib.URIRef('http://example.com/me/entity#4321'), rdflib.URIRef(p_tag.uri), rdflib.URIRef('http://example.com/me/tag#1234')), + (rdflib.URIRef('http://example.com/me/entity#4321'), rdflib.URIRef(p_tag.uri), rdflib.URIRef('http://example.com/me/tag#4321')), + (rdflib.URIRef('http://example.com/me/entity#4321'), rdflib.URIRef(p_tag.uri), rdflib.URIRef('http://example.com/me/tag#foo')), + (rdflib.URIRef('http://example.com/me/entity#4321'), rdflib.URIRef(p_tag.uri), rdflib.URIRef('http://example.com/me/tag#bar')), + })) + + # nothing happens when no guids are given + plen = len(store._graph) + store.set(ent_type, set(), p_comment, {'xyz'}) + store.set(ent_type, set(), p_tag, {URI('http://example.com/me/tag#xyz')}) + self.assertEqual(plen, len(store._graph)) + + # guids must be instances of node_type + self.assertRaises(errors.InstanceError, store.set, ent_type, tag_ids, p_comment, {'xyz'}) + # inexistent guids + self.assertRaises(errors.InstanceError, store.set, ent_type, {URI('http://example.com/me/entity#foobar')}, p_comment, {'xyz'}) + + +## main ## + +if __name__ == '__main__': + unittest.main() + +## EOF ## diff --git a/test/triple_store/test_sparql.py b/test/triple_store/test_sparql.py deleted file mode 100644 index 8d98749..0000000 --- a/test/triple_store/test_sparql.py +++ /dev/null @@ -1,769 +0,0 @@ -""" - -Part of the bsfs test suite. -A copy of the license is provided with the project. -Author: Matthias Baumgartner, 2022 -""" -# imports -import rdflib -import unittest - -# bsie imports -from bsfs import schema as _schema -from bsfs.namespace import ns -from bsfs.utils import errors, URI - -# objects to test -from bsfs.triple_store.sparql import SparqlStore - - -## code ## - -class TestSparqlStore(unittest.TestCase): - def setUp(self): - self.schema = _schema.Schema.from_string(''' - prefix rdfs: - prefix xsd: - - prefix bsfs: - prefix bse: - - bsfs:Entity rdfs:subClassOf bsfs:Node . - bsfs:Tag rdfs:subClassOf bsfs:Node . - bsfs:User rdfs:subClassOf bsfs:Node . - xsd:string rdfs:subClassOf bsfs:Literal . - xsd:integer rdfs:subClassOf bsfs:Literal . - - # non-unique literal - bse:comment rdfs:subClassOf bsfs:Predicate ; - rdfs:domain bsfs:Entity ; - rdfs:range xsd:string ; - bsfs:unique "false"^^xsd:boolean . - - # unique literal - bse:filesize rdfs:subClassOf bsfs:Predicate ; - rdfs:domain bsfs:Entity ; - rdfs:range xsd:integer ; - bsfs:unique "true"^^xsd:boolean . - - # non-unique node - bse:tag rdfs:subClassOf bsfs:Predicate ; - rdfs:domain bsfs:Entity ; - rdfs:range bsfs:Tag ; - bsfs:unique "false"^^xsd:boolean . - - # unique node - bse:author rdfs:subClassOf bsfs:Predicate ; - rdfs:domain bsfs:Entity ; - rdfs:range bsfs:User ; - bsfs:unique "true"^^xsd:boolean . - - ''') - - def test_essentials(self): - store = SparqlStore.Open() - # equality - self.assertEqual(store, store) - self.assertEqual(hash(store), hash(store)) - self.assertNotEqual(store, SparqlStore.Open()) - self.assertNotEqual(hash(store), hash(SparqlStore.Open())) - # string conversion - self.assertEqual(str(store), 'SparqlStore(uri=None)') - self.assertEqual(repr(store), 'SparqlStore(uri=None)') - # open - self.assertIsInstance(SparqlStore.Open(), SparqlStore) - - - def test__has_type(self): - # setup store - store = SparqlStore.Open() - store.schema = _schema.Schema.from_string(''' - prefix rdfs: - prefix xsd: - prefix bsfs: - - bsfs:Entity rdfs:subClassOf bsfs:Node . - bsfs:Document rdfs:subClassOf bsfs:Entity . - bsfs:Image rdfs:subClassOf bsfs:Entity . - bsfs:PDF rdfs:subClassOf bsfs:Document . - - ''') - # add some instances - store.create(store.schema.node(ns.bsfs.Entity), {URI('http://example.com/me/entity#1234')}) - store.create(store.schema.node(ns.bsfs.Document), {URI('http://example.com/me/document#1234')}) - store.create(store.schema.node(ns.bsfs.Image), {URI('http://example.com/me/image#1234')}) - store.create(store.schema.node(ns.bsfs.PDF), {URI('http://example.com/me/pdf#1234')}) - - # node_type must be in the schema - self.assertRaises(errors.ConsistencyError, store._has_type, URI('http://example.com/me/entity#1234'), store.schema.node(ns.bsfs.Node).get_child(ns.bsfs.invalid)) - - # returns False on inexistent nodes - self.assertFalse(store._has_type(URI('http://example.com/me/entity#4321'), store.schema.node(ns.bsfs.Entity))) - self.assertFalse(store._has_type(URI('http://example.com/me/document#4321'), store.schema.node(ns.bsfs.Document))) - self.assertFalse(store._has_type(URI('http://example.com/me/image#4321'), store.schema.node(ns.bsfs.Image))) - self.assertFalse(store._has_type(URI('http://example.com/me/pdf#4321'), store.schema.node(ns.bsfs.PDF))) - - # _has_type checks direct types - self.assertTrue(store._has_type(URI('http://example.com/me/entity#1234'), store.schema.node(ns.bsfs.Entity))) - self.assertTrue(store._has_type(URI('http://example.com/me/document#1234'), store.schema.node(ns.bsfs.Document))) - self.assertTrue(store._has_type(URI('http://example.com/me/image#1234'), store.schema.node(ns.bsfs.Image))) - self.assertTrue(store._has_type(URI('http://example.com/me/pdf#1234'), store.schema.node(ns.bsfs.PDF))) - - # _has_type checks type hierarchy - self.assertFalse(store._has_type(URI('http://example.com/me/entity#1234'), store.schema.node(ns.bsfs.Document))) - self.assertFalse(store._has_type(URI('http://example.com/me/entity#1234'), store.schema.node(ns.bsfs.Image))) - self.assertFalse(store._has_type(URI('http://example.com/me/entity#1234'), store.schema.node(ns.bsfs.PDF))) - - self.assertTrue(store._has_type(URI('http://example.com/me/document#1234'), store.schema.node(ns.bsfs.Entity))) - self.assertFalse(store._has_type(URI('http://example.com/me/document#1234'), store.schema.node(ns.bsfs.Image))) - self.assertFalse(store._has_type(URI('http://example.com/me/document#1234'), store.schema.node(ns.bsfs.PDF))) - - self.assertTrue(store._has_type(URI('http://example.com/me/image#1234'), store.schema.node(ns.bsfs.Entity))) - self.assertFalse(store._has_type(URI('http://example.com/me/image#1234'), store.schema.node(ns.bsfs.Document))) - self.assertFalse(store._has_type(URI('http://example.com/me/image#1234'), store.schema.node(ns.bsfs.PDF))) - - self.assertTrue(store._has_type(URI('http://example.com/me/pdf#1234'), store.schema.node(ns.bsfs.Entity))) - self.assertTrue(store._has_type(URI('http://example.com/me/pdf#1234'), store.schema.node(ns.bsfs.Document))) - self.assertFalse(store._has_type(URI('http://example.com/me/pdf#1234'), store.schema.node(ns.bsfs.Image))) - - - def test_schema(self): - # setup - store = SparqlStore.Open() - curr = self.schema - p_comment = curr.predicate(ns.bse.comment) - p_filesize = curr.predicate(ns.bse.filesize) - p_tag = curr.predicate(ns.bse.tag) - p_author = curr.predicate(ns.bse.author) - - # migrate to an initial schema - store.schema = curr - # store has migrated - self.assertEqual(store.schema, curr) - - # add some instances - ent_ids = {URI('http://example.com/me/entity#1234'), URI('http://example.com/me/entity#4321')} - tag_ids = {URI('http://example.com/me/tag#1234'), URI('http://example.com/me/tag#4321')} - store.create(curr.node(ns.bsfs.Entity), ent_ids) - store.create(curr.node(ns.bsfs.Tag), tag_ids) - store.create(curr.node(ns.bsfs.User), {URI('http://example.com/me')}) - # add some triples - store.set(curr.node(ns.bsfs.Entity), ent_ids, p_comment, {'foo', 'bar'}) - store.set(curr.node(ns.bsfs.Entity), ent_ids, p_filesize, {1234}) - store.set(curr.node(ns.bsfs.Entity), ent_ids, p_tag, - {URI('http://example.com/me/tag#1234'), URI('http://example.com/me/tag#4321')}) - store.set(curr.node(ns.bsfs.Entity), ent_ids, p_author, - {URI('http://example.com/me')}) - # check instances - instances = { - # node instances - (rdflib.URIRef('http://example.com/me/entity#1234'), rdflib.RDF.type, rdflib.URIRef(ns.bsfs.Entity)), - (rdflib.URIRef('http://example.com/me/entity#4321'), rdflib.RDF.type, rdflib.URIRef(ns.bsfs.Entity)), - (rdflib.URIRef('http://example.com/me/tag#1234'), rdflib.RDF.type, rdflib.URIRef(ns.bsfs.Tag)), - (rdflib.URIRef('http://example.com/me/tag#4321'), rdflib.RDF.type, rdflib.URIRef(ns.bsfs.Tag)), - (rdflib.URIRef('http://example.com/me'), rdflib.RDF.type, rdflib.URIRef(ns.bsfs.User)), - # comments - (rdflib.URIRef('http://example.com/me/entity#1234'), rdflib.URIRef(p_comment.uri), rdflib.Literal('foo', datatype=rdflib.XSD.string)), - (rdflib.URIRef('http://example.com/me/entity#1234'), rdflib.URIRef(p_comment.uri), rdflib.Literal('bar', datatype=rdflib.XSD.string)), - (rdflib.URIRef('http://example.com/me/entity#4321'), rdflib.URIRef(p_comment.uri), rdflib.Literal('foo', datatype=rdflib.XSD.string)), - (rdflib.URIRef('http://example.com/me/entity#4321'), rdflib.URIRef(p_comment.uri), rdflib.Literal('bar', datatype=rdflib.XSD.string)), - # filesize - (rdflib.URIRef('http://example.com/me/entity#1234'), rdflib.URIRef(p_filesize.uri), rdflib.Literal('1234', datatype=rdflib.XSD.integer)), - (rdflib.URIRef('http://example.com/me/entity#4321'), rdflib.URIRef(p_filesize.uri), rdflib.Literal('1234', datatype=rdflib.XSD.integer)), - # tags - (rdflib.URIRef('http://example.com/me/entity#1234'), rdflib.URIRef(p_tag.uri), rdflib.URIRef('http://example.com/me/tag#1234')), - (rdflib.URIRef('http://example.com/me/entity#1234'), rdflib.URIRef(p_tag.uri), rdflib.URIRef('http://example.com/me/tag#4321')), - (rdflib.URIRef('http://example.com/me/entity#4321'), rdflib.URIRef(p_tag.uri), rdflib.URIRef('http://example.com/me/tag#1234')), - (rdflib.URIRef('http://example.com/me/entity#4321'), rdflib.URIRef(p_tag.uri), rdflib.URIRef('http://example.com/me/tag#4321')), - # author - (rdflib.URIRef('http://example.com/me/entity#1234'), rdflib.URIRef(p_author.uri), rdflib.URIRef('http://example.com/me')), - (rdflib.URIRef('http://example.com/me/entity#4321'), rdflib.URIRef(p_author.uri), rdflib.URIRef('http://example.com/me')), - } - self.assertSetEqual(set(store._graph), instances) - - # add some classes to the schema - curr = curr + _schema.Schema.from_string(''' - prefix rdfs: - prefix xsd: - prefix bsfs: - prefix bse: - prefix bst: - prefix bsc: - - bsfs:Entity rdfs:subClassOf bsfs:Node . - bsfs:Tag rdfs:subClassOf bsfs:Node . - bsfs:Collection rdfs:subClassOf bsfs:Node . - xsd:boolean rdfs:subClassOf bsfs:Literal . - - # literal - bse:shared rdfs:subClassOf bsfs:Predicate ; - rdfs:domain bsfs:Entity ; - rdfs:range xsd:boolean ; - bsfs:unique "true"^^xsd:boolean . - - # node - bse:partOf rdfs:subClassOf bsfs:Predicate ; - rdfs:domain bsfs:Entity ; - rdfs:range bsfs:Collection ; - bsfs:unique "false"^^xsd:boolean . - - # predicates across auxiliary node classes - bst:usedIn rdfs:subClassOf bsfs:Predicate ; - rdfs:domain bsfs:Tag ; - rdfs:range bsfs:Collection ; - bsfs:unique "false"^^xsd:boolean . - - bsc:tag rdfs:subClassOf bsfs:Predicate ; - rdfs:domain bsfs:Collection ; - rdfs:range bsfs:Tag ; - bsfs:unique "false"^^xsd:boolean . - - bst:principal rdfs:subClassOf bsfs:Predicate ; - rdfs:domain bsfs:Tag ; - rdfs:range bsfs:Node ; - bsfs:unique "true"^^xsd:boolean . - - ''') - # store migrated to the new schema - store.schema = curr - self.assertEqual(store.schema, curr) - # instances have not changed - self.assertSetEqual(set(store._graph), instances) - # add some instances of the new classes - p_partOf = curr.predicate(ns.bse.partOf) - p_shared = curr.predicate(ns.bse.shared) - p_usedIn = curr.predicate('http://bsfs.ai/schema/Tag#usedIn') - p_ctag = curr.predicate('http://bsfs.ai/schema/Collection#tag') - p_principal = curr.predicate('http://bsfs.ai/schema/Tag#principal') - store.create(curr.node(ns.bsfs.Collection), {URI('http://example.com/me/collection#1234'), URI('http://example.com/me/collection#4321')}) - # add some more triples - store.set(curr.node(ns.bsfs.Entity), ent_ids, p_shared, {True}) - store.set(curr.node(ns.bsfs.Entity), ent_ids, p_partOf, - {URI('http://example.com/me/collection#1234'), URI('http://example.com/me/collection#4321')}) - store.set(curr.node(ns.bsfs.Tag), {URI('http://example.com/me/tag#1234')}, p_usedIn, - {URI('http://example.com/me/collection#1234')}) - store.set(curr.node(ns.bsfs.Collection), {URI('http://example.com/me/collection#4321')}, p_ctag, - {URI('http://example.com/me/tag#1234'), URI('http://example.com/me/tag#4321')}) - store.set(curr.node(ns.bsfs.Tag), {URI('http://example.com/me/tag#1234')}, p_principal, - {URI('http://example.com/me/collection#1234')}) - # new instances are now in the graph - self.assertSetEqual(set(store._graph), instances | { - # collections - (rdflib.URIRef('http://example.com/me/collection#1234'), rdflib.RDF.type, rdflib.URIRef(ns.bsfs.Collection)), - (rdflib.URIRef('http://example.com/me/collection#4321'), rdflib.RDF.type, rdflib.URIRef(ns.bsfs.Collection)), - # partOf - (rdflib.URIRef('http://example.com/me/entity#1234'), rdflib.URIRef(p_partOf.uri), rdflib.URIRef('http://example.com/me/collection#1234')), - (rdflib.URIRef('http://example.com/me/entity#1234'), rdflib.URIRef(p_partOf.uri), rdflib.URIRef('http://example.com/me/collection#4321')), - (rdflib.URIRef('http://example.com/me/entity#4321'), rdflib.URIRef(p_partOf.uri), rdflib.URIRef('http://example.com/me/collection#1234')), - (rdflib.URIRef('http://example.com/me/entity#4321'), rdflib.URIRef(p_partOf.uri), rdflib.URIRef('http://example.com/me/collection#4321')), - # shared - (rdflib.URIRef('http://example.com/me/entity#1234'), rdflib.URIRef(p_shared.uri), rdflib.Literal('true', datatype=rdflib.XSD.boolean)), - (rdflib.URIRef('http://example.com/me/entity#4321'), rdflib.URIRef(p_shared.uri), rdflib.Literal('true', datatype=rdflib.XSD.boolean)), - # auxiliary node connections - (rdflib.URIRef('http://example.com/me/tag#1234'), rdflib.URIRef(p_usedIn.uri), rdflib.URIRef('http://example.com/me/collection#1234')), - (rdflib.URIRef('http://example.com/me/tag#1234'), rdflib.URIRef(p_principal.uri), rdflib.URIRef('http://example.com/me/collection#1234')), - (rdflib.URIRef('http://example.com/me/collection#4321'), rdflib.URIRef(p_ctag.uri), rdflib.URIRef('http://example.com/me/tag#1234')), - (rdflib.URIRef('http://example.com/me/collection#4321'), rdflib.URIRef(p_ctag.uri), rdflib.URIRef('http://example.com/me/tag#4321')), - }) - - - # remove some classes from the schema - curr = _schema.Schema.from_string(''' - prefix rdfs: - prefix xsd: - prefix bsfs: - prefix bse: - prefix bst: - - bsfs:Entity rdfs:subClassOf bsfs:Node . - bsfs:Tag rdfs:subClassOf bsfs:Node . - bsfs:User rdfs:subClassOf bsfs:Node . - - xsd:boolean rdfs:subClassOf bsfs:Literal . - xsd:integer rdfs:subClassOf bsfs:Literal . - - bse:filesize rdfs:subClassOf bsfs:Predicate ; - rdfs:domain bsfs:Entity ; - rdfs:range xsd:integer ; - bsfs:unique "true"^^xsd:boolean . - - bse:tag rdfs:subClassOf bsfs:Predicate ; - rdfs:domain bsfs:Entity ; - rdfs:range bsfs:Tag ; - bsfs:unique "false"^^xsd:boolean . - - bse:shared rdfs:subClassOf bsfs:Predicate ; - rdfs:domain bsfs:Entity ; - rdfs:range xsd:boolean ; - bsfs:unique "true"^^xsd:boolean . - - bst:principal rdfs:subClassOf bsfs:Predicate ; - rdfs:domain bsfs:Tag ; - rdfs:range bsfs:Node ; - bsfs:unique "true"^^xsd:boolean . - - # removed: bsfs:Collection - # removed: xsd:string - # removed: bse:comment (bsfs:Entity -> xsd:string) - # removed: bse:partOf (bsfs:Entity -> bsfs:Collection) - # removed: bse:author (bsfs:entity -> bsfs:User) - # removed: bst:usedIn (bsfs:Tag -> bsfs:Collection) - # removed: bsc:tag (bsfs:Collection -> bsfs:Tag) - - ''') - # store migrated to the new schema - store.schema = curr - self.assertEqual(store.schema, curr) - # instances of old classes were removed - self.assertSetEqual(set(store._graph), { - # node instances - (rdflib.URIRef('http://example.com/me/entity#1234'), rdflib.RDF.type, rdflib.URIRef(ns.bsfs.Entity)), - (rdflib.URIRef('http://example.com/me/entity#4321'), rdflib.RDF.type, rdflib.URIRef(ns.bsfs.Entity)), - (rdflib.URIRef('http://example.com/me/tag#1234'), rdflib.RDF.type, rdflib.URIRef(ns.bsfs.Tag)), - (rdflib.URIRef('http://example.com/me/tag#4321'), rdflib.RDF.type, rdflib.URIRef(ns.bsfs.Tag)), - (rdflib.URIRef('http://example.com/me'), rdflib.RDF.type, rdflib.URIRef(ns.bsfs.User)), - # filesize - (rdflib.URIRef('http://example.com/me/entity#1234'), rdflib.URIRef(p_filesize.uri), rdflib.Literal('1234', datatype=rdflib.XSD.integer)), - (rdflib.URIRef('http://example.com/me/entity#4321'), rdflib.URIRef(p_filesize.uri), rdflib.Literal('1234', datatype=rdflib.XSD.integer)), - # tags - (rdflib.URIRef('http://example.com/me/entity#1234'), rdflib.URIRef(p_tag.uri), rdflib.URIRef('http://example.com/me/tag#1234')), - (rdflib.URIRef('http://example.com/me/entity#1234'), rdflib.URIRef(p_tag.uri), rdflib.URIRef('http://example.com/me/tag#4321')), - (rdflib.URIRef('http://example.com/me/entity#4321'), rdflib.URIRef(p_tag.uri), rdflib.URIRef('http://example.com/me/tag#1234')), - (rdflib.URIRef('http://example.com/me/entity#4321'), rdflib.URIRef(p_tag.uri), rdflib.URIRef('http://example.com/me/tag#4321')), - # shared - (rdflib.URIRef('http://example.com/me/entity#1234'), rdflib.URIRef(p_shared.uri), rdflib.Literal('true', datatype=rdflib.XSD.boolean)), - (rdflib.URIRef('http://example.com/me/entity#4321'), rdflib.URIRef(p_shared.uri), rdflib.Literal('true', datatype=rdflib.XSD.boolean)), - }) - - # can only assign schema instances - self.assertRaises(TypeError, setattr, store, 'schema', None) - self.assertRaises(TypeError, setattr, store, 'schema', 1234) - self.assertRaises(TypeError, setattr, store, 'schema', 'foo') - class Foo(): pass - self.assertRaises(TypeError, setattr, store, 'schema', Foo()) - - # cannot migrate to incompatible schema - invalid = _schema.Schema.from_string(''' - prefix rdfs: - prefix xsd: - prefix bsfs: - prefix bse: - - bsfs:Entity rdfs:subClassOf bsfs:Node . - bsfs:Tag rdfs:subClassOf bsfs:Entity . # inconsistent with previous tag definition - - bse:tag rdfs:subClassOf bsfs:Predicate ; - rdfs:domain bsfs:Entity ; - rdfs:range bsfs:Tag ; - bsfs:unique "false"^^xsd:boolean . - - ''') - self.assertRaises(errors.ConsistencyError, setattr, store, 'schema', invalid) - invalid = _schema.Schema.from_string(''' - prefix rdfs: - prefix xsd: - prefix bsfs: - prefix bse: - - bsfs:Entity rdfs:subClassOf bsfs:Node . - bsfs:User rdfs:subClassOf bsfs:Node . - - # inconsistent predicate - bse:tag rdfs:subClassOf bsfs:Predicate ; - rdfs:domain bsfs:Entity ; - rdfs:range bsfs:User; - bsfs:unique "false"^^xsd:boolean . - - ''') - self.assertRaises(errors.ConsistencyError, setattr, store, 'schema', invalid) - - - def test_transaction(self): - # store setup - store = SparqlStore.Open() - store.schema = self.schema - p_tag = store.schema.predicate(ns.bse.tag) - p_filesize = store.schema.predicate(ns.bse.filesize) - # prepare node types - ent_type = store.schema.node(ns.bsfs.Entity) - tag_type = store.schema.node(ns.bsfs.Tag) - ent_ids = {URI('http://example.com/me/entity#1234'), URI('http://example.com/me/entity#4321')} - tag_ids = {URI('http://example.com/me/tag#1234'), URI('http://example.com/me/tag#4321')} - # target instances - instances = { - # node instances - (rdflib.URIRef('http://example.com/me/entity#1234'), rdflib.RDF.type, rdflib.URIRef(ns.bsfs.Entity)), - (rdflib.URIRef('http://example.com/me/entity#4321'), rdflib.RDF.type, rdflib.URIRef(ns.bsfs.Entity)), - (rdflib.URIRef('http://example.com/me/tag#1234'), rdflib.RDF.type, rdflib.URIRef(ns.bsfs.Tag)), - (rdflib.URIRef('http://example.com/me/tag#4321'), rdflib.RDF.type, rdflib.URIRef(ns.bsfs.Tag)), - # links - (rdflib.URIRef('http://example.com/me/entity#1234'), rdflib.URIRef(p_tag.uri), rdflib.URIRef('http://example.com/me/tag#1234')), - (rdflib.URIRef('http://example.com/me/entity#1234'), rdflib.URIRef(p_tag.uri), rdflib.URIRef('http://example.com/me/tag#4321')), - (rdflib.URIRef('http://example.com/me/entity#4321'), rdflib.URIRef(p_tag.uri), rdflib.URIRef('http://example.com/me/tag#1234')), - (rdflib.URIRef('http://example.com/me/entity#4321'), rdflib.URIRef(p_tag.uri), rdflib.URIRef('http://example.com/me/tag#4321')), - } - - # add some data - store.create(ent_type, ent_ids) - store.create(tag_type, tag_ids) - store.set(ent_type, ent_ids, p_tag, tag_ids) - store.set(ent_type, ent_ids, p_filesize, {1234}) - # current transaction is visible - self.assertSetEqual(set(store._graph), instances | { - (rdflib.URIRef('http://example.com/me/entity#1234'), rdflib.URIRef(p_filesize.uri), rdflib.Literal(1234, datatype=rdflib.XSD.integer)), - (rdflib.URIRef('http://example.com/me/entity#4321'), rdflib.URIRef(p_filesize.uri), rdflib.Literal(1234, datatype=rdflib.XSD.integer)), - }) - - # rollback undoes previous changes - store.rollback() - self.assertSetEqual(set(store._graph), set()) - - # add some data once more - store.create(ent_type, ent_ids) - store.create(tag_type, tag_ids) - store.set(ent_type, ent_ids, p_tag, tag_ids) - store.set(ent_type, ent_ids, p_filesize, {1234}) - # current transaction is visible - self.assertSetEqual(set(store._graph), instances | { - (rdflib.URIRef('http://example.com/me/entity#1234'), rdflib.URIRef(p_filesize.uri), rdflib.Literal(1234, datatype=rdflib.XSD.integer)), - (rdflib.URIRef('http://example.com/me/entity#4321'), rdflib.URIRef(p_filesize.uri), rdflib.Literal(1234, datatype=rdflib.XSD.integer)), - }) - - # commit saves changes - store.commit() - self.assertSetEqual(set(store._graph), instances | { - (rdflib.URIRef('http://example.com/me/entity#1234'), rdflib.URIRef(p_filesize.uri), rdflib.Literal(1234, datatype=rdflib.XSD.integer)), - (rdflib.URIRef('http://example.com/me/entity#4321'), rdflib.URIRef(p_filesize.uri), rdflib.Literal(1234, datatype=rdflib.XSD.integer)), - }) - - # add additional data - store.create(ent_type, {URI('http://example.com/me/entity#hello')}) - store.set(ent_type, {URI('http://example.com/me/entity#hello')}, p_tag, tag_ids) - store.set(ent_type, ent_ids, p_filesize, {4321}) - self.assertSetEqual(set(store._graph), instances | { - (rdflib.URIRef('http://example.com/me/entity#hello'), rdflib.RDF.type, rdflib.URIRef(ns.bsfs.Entity)), - (rdflib.URIRef('http://example.com/me/entity#hello'), rdflib.URIRef(p_tag.uri), rdflib.URIRef('http://example.com/me/tag#1234')), - (rdflib.URIRef('http://example.com/me/entity#hello'), rdflib.URIRef(p_tag.uri), rdflib.URIRef('http://example.com/me/tag#4321')), - (rdflib.URIRef('http://example.com/me/entity#1234'), rdflib.URIRef(p_filesize.uri), rdflib.Literal(4321, datatype=rdflib.XSD.integer)), - (rdflib.URIRef('http://example.com/me/entity#4321'), rdflib.URIRef(p_filesize.uri), rdflib.Literal(4321, datatype=rdflib.XSD.integer)), - }) - - # rollback undoes only changes since last commit - store.rollback() - self.assertSetEqual(set(store._graph), instances | { - (rdflib.URIRef('http://example.com/me/entity#1234'), rdflib.URIRef(p_filesize.uri), rdflib.Literal(1234, datatype=rdflib.XSD.integer)), - (rdflib.URIRef('http://example.com/me/entity#4321'), rdflib.URIRef(p_filesize.uri), rdflib.Literal(1234, datatype=rdflib.XSD.integer)), - }) - - - def test_exists(self): - # store setup - store = SparqlStore.Open() - store.schema = self.schema - # prepare node types - ent_type = store.schema.node(ns.bsfs.Entity) - tag_type = store.schema.node(ns.bsfs.Tag) - # create node instances - ent_ids = { - URI('http://example.com/me/entity#1234'), - URI('http://example.com/me/entity#4321'), - } - tag_ids = { - URI('http://example.com/me/tag#1234'), - URI('http://example.com/me/tag#4321'), - } - store.create(ent_type, ent_ids) - store.create(tag_type, tag_ids) - - # exists returns all existing nodes of the correct type - self.assertSetEqual(ent_ids, set(store.exists(ent_type, ent_ids))) - self.assertSetEqual(tag_ids, set(store.exists(tag_type, tag_ids))) - # exists returns only nodes that match the type - self.assertSetEqual(set(), set(store.exists(ent_type, tag_ids))) - self.assertSetEqual({URI('http://example.com/me/entity#1234')}, set(store.exists(ent_type, { - URI('http://example.com/me/tag#1234'), - URI('http://example.com/me/entity#1234'), - }))) - # exists returns only nodes that exist - self.assertSetEqual(set(), set(store.exists(ent_type, { - URI('http://example.com/me/entity#foo'), - URI('http://example.com/me/entity#bar'), - }))) - self.assertSetEqual({URI('http://example.com/me/entity#1234')}, set(store.exists(ent_type, { - URI('http://example.com/me/entity#foo'), - URI('http://example.com/me/entity#1234'), - }))) - - - def test_create(self): - # setup - store = SparqlStore.Open() - store.schema = self.schema - - # node type must be valid - self.assertRaises(errors.ConsistencyError, store.create, self.schema.node(ns.bsfs.Entity).get_child(ns.bsfs.invalid), { - URI('http://example.com/me/entity#1234'), URI('http://example.com/me/entity#4321')}) - - # can create some nodes - ent_type = store.schema.node(ns.bsfs.Entity) - store.create(ent_type, {URI('http://example.com/me/entity#1234'), URI('http://example.com/me/entity#4321')}) - self.assertSetEqual(set(store._graph), { - (rdflib.URIRef('http://example.com/me/entity#1234'), rdflib.RDF.type, rdflib.URIRef(ns.bsfs.Entity)), - (rdflib.URIRef('http://example.com/me/entity#4321'), rdflib.RDF.type, rdflib.URIRef(ns.bsfs.Entity)), - }) - - # existing nodes are skipped - store.create(ent_type, {URI('http://example.com/me/entity#1234'), URI('http://example.com/me/entity#5678')}) - self.assertSetEqual(set(store._graph), { - # previous triples - (rdflib.URIRef('http://example.com/me/entity#1234'), rdflib.RDF.type, rdflib.URIRef(ns.bsfs.Entity)), - (rdflib.URIRef('http://example.com/me/entity#4321'), rdflib.RDF.type, rdflib.URIRef(ns.bsfs.Entity)), - # new triples - (rdflib.URIRef('http://example.com/me/entity#5678'), rdflib.RDF.type, rdflib.URIRef(ns.bsfs.Entity)), - }) - - # can create nodes of a different type - tag_type = store.schema.node(ns.bsfs.Tag) - store.create(tag_type, {URI('http://example.com/me/tag#1234'), URI('http://example.com/me/tag#4321')}) - self.assertSetEqual(set(store._graph), { - # previous triples - (rdflib.URIRef('http://example.com/me/entity#1234'), rdflib.RDF.type, rdflib.URIRef(ns.bsfs.Entity)), - (rdflib.URIRef('http://example.com/me/entity#4321'), rdflib.RDF.type, rdflib.URIRef(ns.bsfs.Entity)), - (rdflib.URIRef('http://example.com/me/entity#5678'), rdflib.RDF.type, rdflib.URIRef(ns.bsfs.Entity)), - # new triples - (rdflib.URIRef('http://example.com/me/tag#1234'), rdflib.RDF.type, rdflib.URIRef(ns.bsfs.Tag)), - (rdflib.URIRef('http://example.com/me/tag#4321'), rdflib.RDF.type, rdflib.URIRef(ns.bsfs.Tag)), - }) - - # creation does not change types of existing nodes - tag_type = store.schema.node(ns.bsfs.Tag) - store.create(tag_type, {URI('http://example.com/me/entity#1234'), URI('http://example.com/me/entity#4321')}) - self.assertSetEqual(set(store._graph), { - # previous triples - (rdflib.URIRef('http://example.com/me/entity#1234'), rdflib.RDF.type, rdflib.URIRef(ns.bsfs.Entity)), - (rdflib.URIRef('http://example.com/me/entity#4321'), rdflib.RDF.type, rdflib.URIRef(ns.bsfs.Entity)), - (rdflib.URIRef('http://example.com/me/entity#5678'), rdflib.RDF.type, rdflib.URIRef(ns.bsfs.Entity)), - # new triples - (rdflib.URIRef('http://example.com/me/tag#1234'), rdflib.RDF.type, rdflib.URIRef(ns.bsfs.Tag)), - (rdflib.URIRef('http://example.com/me/tag#4321'), rdflib.RDF.type, rdflib.URIRef(ns.bsfs.Tag)), - }) - - - def test_set(self): - # store setup - store = SparqlStore.Open() - store.schema = self.schema - # prepare node types - ent_type = store.schema.node(ns.bsfs.Entity) - user_type = store.schema.node(ns.bsfs.User) - tag_type = store.schema.node(ns.bsfs.Tag) - # prepare predicates - p_filesize = store.schema.predicate(ns.bse.filesize) - p_comment = store.schema.predicate(ns.bse.comment) - p_author = store.schema.predicate(ns.bse.author) - p_tag = store.schema.predicate(ns.bse.tag) - p_invalid = store.schema.predicate(ns.bsfs.Predicate).get_child(ns.bsfs.foo, range=store.schema.node(ns.bsfs.Tag)) - # create node instances - ent_ids = { - URI('http://example.com/me/entity#1234'), - URI('http://example.com/me/entity#4321'), - } - tag_ids = { - URI('http://example.com/me/tag#1234'), - URI('http://example.com/me/tag#4321'), - URI('http://example.com/me/tag#foo'), - URI('http://example.com/me/tag#bar'), - URI('http://example.com/me/tag#foobar'), - URI('http://example.com/me/tag#xyz'), - } - user_ids = { - URI('http://example.com/me/user#1234'), - URI('http://example.com/me/user#4321'), - } - store.create(ent_type, ent_ids) - store.create(tag_type, tag_ids) - store.create(user_type, user_ids) - - # invalid node_type is not permitted - self.assertRaises(errors.ConsistencyError, store.set, self.schema.node(ns.bsfs.Node).get_child(ns.bse.foo), - ent_ids, p_comment, {'hello world'}) - - # invalid predicate is not permitted - self.assertRaises(errors.ConsistencyError, store.set, ent_type, ent_ids, p_invalid, {'http://example.com/me/tag#1234'}) - - # predicate must match node_type - self.assertRaises(errors.ConsistencyError, store.set, tag_type, tag_ids, p_filesize, {1234}) - - # empty value does not change the graph - plen = len(store._graph) - store.set(ent_type, ent_ids, p_filesize, []) - store.set(ent_type, ent_ids, p_comment, []) - store.set(ent_type, ent_ids, p_author, []) - store.set(ent_type, ent_ids, p_tag, []) - self.assertEqual(plen, len(store._graph)) - - # cannot set multiple values on unique predicates - self.assertRaises(ValueError, store.set, ent_type, ent_ids, p_filesize, {1234, 4321}) - self.assertRaises(ValueError, store.set, ent_type, ent_ids, p_author, {URI('http://example.com/me/user#1234'), URI('http://example.com/me/user#4321')}) - - # value nodes must exist - self.assertRaises(errors.InstanceError, store.set, ent_type, ent_ids, p_author, {URI('http://example.com/me/user#invalid')}) - self.assertRaises(errors.InstanceError, store.set, ent_type, ent_ids, p_tag, {URI('http://example.com/me/tag#invalid')}) - - # value node types must be consistent with the predicate - self.assertRaises(errors.InstanceError, store.set, ent_type, ent_ids, p_author, {URI('http://example.com/me/entity#1234')}) - self.assertRaises(errors.InstanceError, store.set, ent_type, ent_ids, p_tag, {URI('http://example.com/me/entity#1234')}) - - # all value nodes must exist and be consistent - self.assertRaises(errors.InstanceError, store.set, ent_type, ent_ids, p_tag, { - URI('http://example.com/me/tag#1234'), URI('http://example.com/me/tag#invalid'), URI('http://example.com/me/entity#1234')}) - - - # set unique literal - store.set(ent_type, ent_ids, p_filesize, {1234}) - self.assertIn( - (rdflib.URIRef('http://example.com/me/entity#1234'), rdflib.URIRef(p_filesize.uri), rdflib.Literal('1234', datatype=rdflib.XSD.integer)), - set(store._graph)) - self.assertIn( - (rdflib.URIRef('http://example.com/me/entity#4321'), rdflib.URIRef(p_filesize.uri), rdflib.Literal('1234', datatype=rdflib.XSD.integer)), - set(store._graph)) - # re-assigning the same node changes nothing - store.set(ent_type, ent_ids, p_filesize, {1234}) - self.assertIn( - (rdflib.URIRef('http://example.com/me/entity#1234'), rdflib.URIRef(p_filesize.uri), rdflib.Literal('1234', datatype=rdflib.XSD.integer)), - set(store._graph)) - self.assertIn( - (rdflib.URIRef('http://example.com/me/entity#4321'), rdflib.URIRef(p_filesize.uri), rdflib.Literal('1234', datatype=rdflib.XSD.integer)), - set(store._graph)) - # cannot set multiple unique literals - self.assertRaises(ValueError, store.set, ent_type, ent_ids, p_filesize, {1234, 4321}) # same test as above - # unique literals are overwritten by set - store.set(ent_type, ent_ids, p_filesize, {4321}) - self.assertIn( - (rdflib.URIRef('http://example.com/me/entity#1234'), rdflib.URIRef(p_filesize.uri), rdflib.Literal('4321', datatype=rdflib.XSD.integer)), - set(store._graph)) - self.assertNotIn( - (rdflib.URIRef('http://example.com/me/entity#1234'), rdflib.URIRef(p_filesize.uri), rdflib.Literal('1234', datatype=rdflib.XSD.integer)), - set(store._graph)) - self.assertIn( - (rdflib.URIRef('http://example.com/me/entity#4321'), rdflib.URIRef(p_filesize.uri), rdflib.Literal('4321', datatype=rdflib.XSD.integer)), - set(store._graph)) - self.assertNotIn( - (rdflib.URIRef('http://example.com/me/entity#4321'), rdflib.URIRef(p_filesize.uri), rdflib.Literal('1234', datatype=rdflib.XSD.integer)), - set(store._graph)) - - # set non-unique literal - store.set(ent_type, ent_ids, p_comment, {'foobar'}) - self.assertTrue(set(store._graph).issuperset({ - (rdflib.URIRef('http://example.com/me/entity#1234'), rdflib.URIRef(p_comment.uri), rdflib.Literal('foobar', datatype=rdflib.XSD.string)), - (rdflib.URIRef('http://example.com/me/entity#4321'), rdflib.URIRef(p_comment.uri), rdflib.Literal('foobar', datatype=rdflib.XSD.string)), - })) - # re-assigning the same node changes nothing - store.set(ent_type, ent_ids, p_comment, {'foobar'}) - self.assertTrue(set(store._graph).issuperset({ - (rdflib.URIRef('http://example.com/me/entity#1234'), rdflib.URIRef(p_comment.uri), rdflib.Literal('foobar', datatype=rdflib.XSD.string)), - (rdflib.URIRef('http://example.com/me/entity#4321'), rdflib.URIRef(p_comment.uri), rdflib.Literal('foobar', datatype=rdflib.XSD.string)), - })) - # can set multiple non-unique literals at once - store.set(ent_type, ent_ids, p_comment, {'foo', 'bar'}) - self.assertTrue(set(store._graph).issuperset({ - (rdflib.URIRef('http://example.com/me/entity#1234'), rdflib.URIRef(p_comment.uri), rdflib.Literal('foo', datatype=rdflib.XSD.string)), - (rdflib.URIRef('http://example.com/me/entity#1234'), rdflib.URIRef(p_comment.uri), rdflib.Literal('bar', datatype=rdflib.XSD.string)), - (rdflib.URIRef('http://example.com/me/entity#4321'), rdflib.URIRef(p_comment.uri), rdflib.Literal('foo', datatype=rdflib.XSD.string)), - (rdflib.URIRef('http://example.com/me/entity#4321'), rdflib.URIRef(p_comment.uri), rdflib.Literal('bar', datatype=rdflib.XSD.string)), - })) - # non-unique literals are appended by set - store.set(ent_type, ent_ids, p_comment, {'hello world'}) - self.assertTrue(set(store._graph).issuperset({ - (rdflib.URIRef('http://example.com/me/entity#1234'), rdflib.URIRef(p_comment.uri), rdflib.Literal('foo', datatype=rdflib.XSD.string)), - (rdflib.URIRef('http://example.com/me/entity#1234'), rdflib.URIRef(p_comment.uri), rdflib.Literal('bar', datatype=rdflib.XSD.string)), - (rdflib.URIRef('http://example.com/me/entity#1234'), rdflib.URIRef(p_comment.uri), rdflib.Literal('hello world', datatype=rdflib.XSD.string)), - (rdflib.URIRef('http://example.com/me/entity#4321'), rdflib.URIRef(p_comment.uri), rdflib.Literal('foo', datatype=rdflib.XSD.string)), - (rdflib.URIRef('http://example.com/me/entity#4321'), rdflib.URIRef(p_comment.uri), rdflib.Literal('bar', datatype=rdflib.XSD.string)), - (rdflib.URIRef('http://example.com/me/entity#4321'), rdflib.URIRef(p_comment.uri), rdflib.Literal('hello world', datatype=rdflib.XSD.string)), - })) - - # set unique node - store.set(ent_type, ent_ids, p_author, {URI('http://example.com/me/user#1234')}) - self.assertIn( - (rdflib.URIRef('http://example.com/me/entity#1234'), rdflib.URIRef(p_author.uri), rdflib.URIRef('http://example.com/me/user#1234')), - set(store._graph)) - self.assertIn( - (rdflib.URIRef('http://example.com/me/entity#4321'), rdflib.URIRef(p_author.uri), rdflib.URIRef('http://example.com/me/user#1234')), - set(store._graph)) - # re-assigning the same node changes nothing - store.set(ent_type, ent_ids, p_author, {URI('http://example.com/me/user#1234')}) - self.assertIn( - (rdflib.URIRef('http://example.com/me/entity#1234'), rdflib.URIRef(p_author.uri), rdflib.URIRef('http://example.com/me/user#1234')), - set(store._graph)) - self.assertIn( - (rdflib.URIRef('http://example.com/me/entity#4321'), rdflib.URIRef(p_author.uri), rdflib.URIRef('http://example.com/me/user#1234')), - set(store._graph)) - # cannot set multiple unique nodes - self.assertRaises(ValueError, store.set, ent_type, ent_ids, p_author, {URI('http://example.com/me/user#1234'), URI('http://example.com/me/user#4321')}) - # unique nodes are overwritten by set - store.set(ent_type, ent_ids, p_author, {URI('http://example.com/me/user#4321')}) - self.assertIn( - (rdflib.URIRef('http://example.com/me/entity#1234'), rdflib.URIRef(p_author.uri), rdflib.URIRef('http://example.com/me/user#4321')), - set(store._graph)) - self.assertNotIn( - (rdflib.URIRef('http://example.com/me/entity#1234'), rdflib.URIRef(p_author.uri), rdflib.URIRef('http://example.com/me/user#1234')), - set(store._graph)) - self.assertIn( - (rdflib.URIRef('http://example.com/me/entity#4321'), rdflib.URIRef(p_author.uri), rdflib.URIRef('http://example.com/me/user#4321')), - set(store._graph)) - self.assertNotIn( - (rdflib.URIRef('http://example.com/me/entity#4321'), rdflib.URIRef(p_author.uri), rdflib.URIRef('http://example.com/me/user#1234')), - set(store._graph)) - - # set non-unique node - store.set(ent_type, ent_ids, p_tag, {'http://example.com/me/tag#foobar'}) - self.assertTrue(set(store._graph).issuperset({ - (rdflib.URIRef('http://example.com/me/entity#1234'), rdflib.URIRef(p_tag.uri), rdflib.URIRef('http://example.com/me/tag#foobar')), - (rdflib.URIRef('http://example.com/me/entity#4321'), rdflib.URIRef(p_tag.uri), rdflib.URIRef('http://example.com/me/tag#foobar')), - })) - # re-assigning the same node changes nothing - store.set(ent_type, ent_ids, p_tag, {'http://example.com/me/tag#foobar'}) - self.assertTrue(set(store._graph).issuperset({ - (rdflib.URIRef('http://example.com/me/entity#1234'), rdflib.URIRef(p_tag.uri), rdflib.URIRef('http://example.com/me/tag#foobar')), - (rdflib.URIRef('http://example.com/me/entity#4321'), rdflib.URIRef(p_tag.uri), rdflib.URIRef('http://example.com/me/tag#foobar')), - })) - # can set multiple non-unique literals at once - store.set(ent_type, ent_ids, p_tag, {'http://example.com/me/tag#1234', 'http://example.com/me/tag#4321'}) - self.assertTrue(set(store._graph).issuperset({ - (rdflib.URIRef('http://example.com/me/entity#1234'), rdflib.URIRef(p_tag.uri), rdflib.URIRef('http://example.com/me/tag#1234')), - (rdflib.URIRef('http://example.com/me/entity#1234'), rdflib.URIRef(p_tag.uri), rdflib.URIRef('http://example.com/me/tag#4321')), - (rdflib.URIRef('http://example.com/me/entity#4321'), rdflib.URIRef(p_tag.uri), rdflib.URIRef('http://example.com/me/tag#1234')), - (rdflib.URIRef('http://example.com/me/entity#4321'), rdflib.URIRef(p_tag.uri), rdflib.URIRef('http://example.com/me/tag#4321')), - })) - # non-unique nodes are appended by set - store.set(ent_type, ent_ids, p_tag, {'http://example.com/me/tag#foo', 'http://example.com/me/tag#bar'}) - self.assertTrue(set(store._graph).issuperset({ - (rdflib.URIRef('http://example.com/me/entity#1234'), rdflib.URIRef(p_tag.uri), rdflib.URIRef('http://example.com/me/tag#1234')), - (rdflib.URIRef('http://example.com/me/entity#1234'), rdflib.URIRef(p_tag.uri), rdflib.URIRef('http://example.com/me/tag#4321')), - (rdflib.URIRef('http://example.com/me/entity#1234'), rdflib.URIRef(p_tag.uri), rdflib.URIRef('http://example.com/me/tag#foo')), - (rdflib.URIRef('http://example.com/me/entity#1234'), rdflib.URIRef(p_tag.uri), rdflib.URIRef('http://example.com/me/tag#bar')), - (rdflib.URIRef('http://example.com/me/entity#4321'), rdflib.URIRef(p_tag.uri), rdflib.URIRef('http://example.com/me/tag#1234')), - (rdflib.URIRef('http://example.com/me/entity#4321'), rdflib.URIRef(p_tag.uri), rdflib.URIRef('http://example.com/me/tag#4321')), - (rdflib.URIRef('http://example.com/me/entity#4321'), rdflib.URIRef(p_tag.uri), rdflib.URIRef('http://example.com/me/tag#foo')), - (rdflib.URIRef('http://example.com/me/entity#4321'), rdflib.URIRef(p_tag.uri), rdflib.URIRef('http://example.com/me/tag#bar')), - })) - - # nothing happens when no guids are given - plen = len(store._graph) - store.set(ent_type, set(), p_comment, {'xyz'}) - store.set(ent_type, set(), p_tag, {URI('http://example.com/me/tag#xyz')}) - self.assertEqual(plen, len(store._graph)) - - # guids must be instances of node_type - self.assertRaises(errors.InstanceError, store.set, ent_type, tag_ids, p_comment, {'xyz'}) - # inexistent guids - self.assertRaises(errors.InstanceError, store.set, ent_type, {URI('http://example.com/me/entity#foobar')}, p_comment, {'xyz'}) - - -## main ## - -if __name__ == '__main__': - unittest.main() - -## EOF ## -- cgit v1.2.3 From a0f2308adcb226d28de3355bc7115a6d9b669462 Mon Sep 17 00:00:00 2001 From: Matthias Baumgartner Date: Mon, 19 Dec 2022 13:40:02 +0100 Subject: import fixes --- bsfs/graph/graph.py | 2 +- bsfs/query/validator.py | 177 ++++++++++++++++++++++++++++++++++++- bsfs/triple_store/base.py | 3 +- bsfs/triple_store/sparql/sparql.py | 2 +- test/triple_store/test_base.py | 3 + 5 files changed, 182 insertions(+), 5 deletions(-) diff --git a/bsfs/graph/graph.py b/bsfs/graph/graph.py index 10e5904..51fe75d 100644 --- a/bsfs/graph/graph.py +++ b/bsfs/graph/graph.py @@ -111,7 +111,7 @@ class Graph(): type_ = self.schema.node(node_type) return _nodes.Nodes(self._backend, self._user, type_, {guid}) - def get(self, node_type: URI, subject: ast.filter.FilterExpression) -> Nodes: + def get(self, node_type: URI, subject: ast.filter.FilterExpression) -> _nodes.Nodes: """Return a `Nodes` instance over all nodes of type *node_type* that match the *subject* query.""" raise NotImplementedError() diff --git a/bsfs/query/validator.py b/bsfs/query/validator.py index ac3789a..123b947 100644 --- a/bsfs/query/validator.py +++ b/bsfs/query/validator.py @@ -29,7 +29,180 @@ class Filter(): def __init__(self, schema: bsc.Schema): self.schema = schema - def parse(self, node: ast.filter.FilterExpression): - raise NotImplementedError() + def parse(self, node: ast.filter.FilterExpression, subject: bsc.types._Vertex): + # subject is a node type + if not isinstance(subject, bsc.Node): + raise errors.ConsistencyError(f'Expected a node, found {subject}') + # subject exists in the schema + if subject not in self.schema.nodes: + raise errors.ConsistencyError(f'Invalid node type {subject}') + # root expression is valid + self._parse(node, subject) + # all tests passed + return True + + + def _parse_numerical_expression(self, node: ast.filter.FilterExpression, subject: bsc.types._Vertex): + if isinstance(node, ast.filter.And): + return self._and(node, subject) + elif isinstance(node, ast.filter.Or): + return self._or(node, subject) + elif isinstance(node, ast.filter.LessThan): + return self._lessThan(node, subject) + elif isinstance(node, ast.filter.GreaterThan): + return self._greaterThan(node, subject) + elif isinstance(node, ast.filter.Equals): + return self._equals(node, subject, numerical=True) + else: + raise errors.ConsistencyError(f'Expected a numerical expression, found {node}') + + + def __branch(self, node: typing.Union[ast.filter.Any, ast.filter.And], subject: bsc.types._Vertex): + # subject is a node type + if not isinstance(subject, bsc.Node): + raise errors.ConsistencyError(f'Expected a node, found {subject}') + # subject exists in the schema + if subject not in self.schema.nodes: + raise errors.ConsistencyError(f'Invalid node type {subject}') + # predicate is valid + dom, rng = self._parse_predicate_expression(node.predicate) + # subject is a subtype of the predicate's domain + if not subject <= dom: + raise errors.ConsistencyError(f'Expected type {dom}, found {subject}') + # child expression is valid + self._parse_filter_expression(node.expr, rng) + + def _any(self, node: ast.filter.Any, subject: bsc.types._Vertex): + return self.__branch(node, subject) + + def _all(self, node: ast.filter.All, subject: bsc.types._Vertex): + return self.__branch(node, subject) + + + def __agg(self, node: typing.Union[ast.filter.And, ast.filter.Or], subject: bsc.types._Vertex): + for expr in node: + # child expression is valid + self._parse_filter_expression(expr, subject) + + def _and(self, node: ast.filter.And, subject: bsc.types._Vertex): + return self.__agg(node, subject) + + def _or(self, node: ast.filter.Or, subject: bsc.types._Vertex): + return self.__agg(node, subject) + + + def _not(self, node: ast.filter.Not, subject: bsc.types._Vertex): + # child expression is valid + self._parse_filter_expression(node.expr, subject) + + + def _has(self, node: ast.filter.Has, subject: bsc.types._Vertex): + # subject is a node type + if not isinstance(subject, bsc.Node): + raise errors.ConsistencyError(f'Expected a node, found {subject}') + # subject exists in the schema + if subject not in self.schema.nodes: + raise errors.ConsistencyError(f'Invalid node type {subject}') + # predicate is valid + dom, rng = self._parse_predicate_expression(node.predicate) + # subject is a subtype of the predicate's domain + if not subject <= dom: + raise errors.ConsistencyError(f'Expected type {dom}, found {subject}') + # node.count is a numerical expression + self._parse_numerical_expression(node.count, self.schema.literal(ns.xsd.numerical)) + + + def _equals(self, node: ast.filter.Equals, subject: bsc.types._Vertex, numerical: bool = False): + # subject is a literal + #if not isinstance(subject, bsc.Literal): + # raise errors.ConsistencyError(f'Expected a literal, found {subject}') + if isinstance(subject, bsc.Node): + # FIXME: How to handle this case? + # FIXME: How to check if a NodeType is acceptable? + # FIXME: Maybe use flags to control what is expected as node identifiers? + from bsfs.graph.nodes import Nodes # FIXME + if not isinstance(node.value, Nodes) and not isinstance(node.value, URI): + raise errors.ConsistencyError(f'Expected a Nodes or URI, found {node.value}') + elif isinstance(subject, bsc.Literal): + # literal exists in the schema + if subject not in self.schema.literals: + raise errors.ConsistencyError(f'Invalid literal type {subject}') + else: + # FIXME: + raise errors.ConsistencyError(f'Expected a literal, found {subject}') + # node.value is numeric (if requested) + if numerical and not isinstance(node.value, float) and not isinstance(node.value, int): + raise errors.ConsistencyError(f'Expected a numerical value (int or float), found {node.value}') + # NOTE: We cannot check if node.value agrees with the subject since we don't know + # all literal types, their hierarchy, and how the backend converts datatypes. + + + def _substring(self, node: ast.filter.Substring, subject: bsc.types._Vertex): + # subject is a literal + if not isinstance(subject, bsc.Literal): + raise errors.ConsistencyError(f'Expected a literal, found {subject}') + # literal exists in the schema + if subject not in self.schema.literals: + raise errors.ConsistencyError(f'Invalid literal type {subject}') + # node.value matches literal datatype + if not subject.is_a(ns.xsd.string): + raise errors.ConsistencyError(f'Expected a string literal, found {subject}') + + + def _lessThan(self, node: ast.filter.LessThan, subject: bsc.types._Vertex): + # subject is a literal + if not isinstance(subject, bsc.Literal): + raise errors.ConsistencyError(f'Expected a literal, found {subject}') + # literal exists in the schema + if subject not in self.schema.literals: + raise errors.ConsistencyError(f'Invalid literal type {subject}') + # subject is numerical + if not subject.is_a(ns.xsd.numerical): + raise errors.ConsistencyError(f'Expected a numerical literal, found {subject}') + + + def _greaterThan(self, node: ast.filter.GreaterThan, subject: bsc.types._Vertex): + # subject is a literal + if not isinstance(subject, bsc.Literal): + raise errors.ConsistencyError(f'Expected a literal, found {subject}') + # literal exists in the schema + if subject not in self.schema.literals: + raise errors.ConsistencyError(f'Invalid literal type {subject}') + # subject is numerical + if not subject.is_a(ns.xsd.numerical): + raise errors.ConsistencyError(f'Expected a numerical literal, found {subject}') + + + def _predicate(self, node: ast.filter.Predicate): + try: + # predicate exists in the schema + pred = self.schema.predicate(node.predicate) + except KeyError: + raise errors.ConsistencyError(f'') # FIXME + if node.reverse: + return pred.range, pred.domain + else: + return pred.domain, pred.range + + + def _oneOf(self, node: ast.filter.OneOf): + dom, rng = None, None + for pred in node: + try: + # parse child expression + subdom, subrng = self._parse_predicate_expression(pred) + # domain and range must be related across all child expressions + if not subdom <= dom and not subdom >= dom: + raise errors.ConsistencyError(f'') # FIXME + if not subrng <= rng and not subrng >= rng: + raise errors.ConsistencyError(f'') # FIXME + # determine overall domain and range + if dom is None or subdom < dom: # pick most specific domain + dom = subdom + if rng is None or subrng > rng: # pick most generic range + rng = subrng + except KeyError: + raise errors.ConsistencyError(f'') + return dom, rng ## EOF ## diff --git a/bsfs/triple_store/base.py b/bsfs/triple_store/base.py index 28ebb86..5ff9523 100644 --- a/bsfs/triple_store/base.py +++ b/bsfs/triple_store/base.py @@ -9,6 +9,7 @@ import abc import typing # inner-module imports +from bsfs.query import ast from bsfs.utils import URI, typename import bsfs.schema as _schema @@ -111,7 +112,7 @@ class TripleStoreBase(abc.ABC): @abc.abstractmethod def get( self, - node_type: bsc.Node, + node_type: _schema.Node, query: ast.filter.FilterExpression, ) -> typing.Iterator[URI]: """Return guids of nodes of type *node_type* that match the *query*.""" diff --git a/bsfs/triple_store/sparql/sparql.py b/bsfs/triple_store/sparql/sparql.py index fff540a..7172f34 100644 --- a/bsfs/triple_store/sparql/sparql.py +++ b/bsfs/triple_store/sparql/sparql.py @@ -15,7 +15,7 @@ from bsfs.query import ast from bsfs.utils import errors, URI # inner-module imports -from . import base +from .. import base # exports diff --git a/test/triple_store/test_base.py b/test/triple_store/test_base.py index a4b0559..a0c3260 100644 --- a/test/triple_store/test_base.py +++ b/test/triple_store/test_base.py @@ -35,6 +35,9 @@ class DummyBase(TripleStoreBase): def schema(self, schema): pass + def get(self, node_type, query): + pass + def exists(self, node_type, guids): pass -- cgit v1.2.3 From 383fa8fd5c2e4b67089b4c5b654ebade51382f2c Mon Sep 17 00:00:00 2001 From: Matthias Baumgartner Date: Thu, 22 Dec 2022 20:27:49 +0100 Subject: filter ast definition and validation --- .pylintrc | 4 +- bsfs/query/ast/__init__.py | 2 +- bsfs/query/ast/filter_.py | 405 +++++++++++++++++++++++++++++++++++- bsfs/query/validator.py | 336 +++++++++++++++--------------- bsfs/utils/__init__.py | 3 +- bsfs/utils/commons.py | 34 +++ bsfs/utils/errors.py | 3 + test/query/ast/test_filter_.py | 456 ++++++++++++++++++++++++++++++++++++++++- test/query/test_validator.py | 237 ++++++++++++++++++++- test/utils/test_commons.py | 17 +- 10 files changed, 1326 insertions(+), 171 deletions(-) diff --git a/.pylintrc b/.pylintrc index 7885c4e..bcb2a86 100644 --- a/.pylintrc +++ b/.pylintrc @@ -88,7 +88,7 @@ max-parents=7 max-public-methods=20 # Maximum number of return / yield for function / method body. -max-returns=6 +max-returns=15 # Maximum number of statements in function / method body. max-statements=50 @@ -164,7 +164,7 @@ score=yes [SIMILARITIES] # Minimum lines number of a similarity. -min-similarity-lines=4 +min-similarity-lines=5 [STRING] diff --git a/bsfs/query/ast/__init__.py b/bsfs/query/ast/__init__.py index 0ee7385..704d051 100644 --- a/bsfs/query/ast/__init__.py +++ b/bsfs/query/ast/__init__.py @@ -14,7 +14,7 @@ Author: Matthias Baumgartner, 2022 import typing # inner-module imports -from . import filter_ as filter +from . import filter_ as filter # pylint: disable=redefined-builtin # exports __all__: typing.Sequence[str] = ( diff --git a/bsfs/query/ast/filter_.py b/bsfs/query/ast/filter_.py index 4086fc1..b129ded 100644 --- a/bsfs/query/ast/filter_.py +++ b/bsfs/query/ast/filter_.py @@ -1,5 +1,27 @@ """Filter AST. +Note that it is easily possible to construct an AST that is inconsistent with +a given schema. Furthermore, it is possible to construct a semantically invalid +AST which that cannot be parsed correctly or includes contradicting statements. +The AST nodes do not (and cannot) check such issues. + +For example, consider the following AST: + +>>> Any(ns.bse.collection, +... And( +... Equals('hello'), +... Any(ns.bsm.guid, Any(ns.bsm.guid, Equals('hello'))), +... Any(ns.bst.label, Equals('world')), +... All(ns.bst.label, Not(Equals('world'))), +... ) +... ) + +This AST has multiple issues that are not verified upon its creation: +* A condition on a non-literal. +* A Filter on a literal. +* Conditions exclude each other +* The predicate along the branch have incompatible domains and ranges. + Part of the BlackStar filesystem (bsfs) module. A copy of the license is provided with the project. Author: Matthias Baumgartner, 2022 @@ -8,12 +30,45 @@ Author: Matthias Baumgartner, 2022 from collections import abc import typing +# bsfs imports +from bsfs.utils import URI, typename, normalize_args + +# inner-module imports +#from . import utils + # exports -__all__ : typing.Sequence[str] = [] +__all__ : typing.Sequence[str] = ( + # base classes + 'FilterExpression', + 'PredicateExpression', + # predicate expressions + 'OneOf', + 'Predicate', + # branching + 'All', + 'Any', + # aggregators + 'And', + 'Or', + # value matchers + 'Equals', + 'Substring', + 'EndsWith', + 'StartsWith', + # range matchers + 'GreaterThan', + 'LessThan', + # misc + 'Has', + 'Is', + 'Not', + ) ## code ## +# pylint: disable=too-few-public-methods # Many expressions use mostly magic methods + class _Expression(abc.Hashable): def __repr__(self) -> str: """Return the expressions's string representation.""" @@ -27,4 +82,352 @@ class _Expression(abc.Hashable): """Return True if *self* and *other* are equivalent.""" return isinstance(other, type(self)) + +class FilterExpression(_Expression): + """Generic Filter expression.""" + + +class PredicateExpression(_Expression): + """Generic Predicate expression.""" + + +class _Branch(FilterExpression): + """Branch the filter along a predicate.""" + + # predicate to follow. + predicate: PredicateExpression + + # child expression to evaluate. + expr: FilterExpression + + def __init__( + self, + predicate: typing.Union[PredicateExpression, URI], + expr: FilterExpression, + ): + # process predicate argument + if isinstance(predicate, URI): + predicate = Predicate(predicate) + elif not isinstance(predicate, PredicateExpression): + raise TypeError(predicate) + # process expression argument + if not isinstance(expr, FilterExpression): + raise TypeError(expr) + # assign members + self.predicate = predicate + self.expr = expr + + def __repr__(self) -> str: + return f'{typename(self)}({self.predicate}, {self.expr})' + + def __hash__(self) -> int: + return hash((super().__hash__(), self.predicate, self.expr)) + + def __eq__(self, other) -> bool: + return super().__eq__(other) \ + and self.predicate == other.predicate \ + and self.expr == other.expr + +class Any(_Branch): + """Any (and at least one) triple matches.""" + + +class All(_Branch): + """All (and at least one) triples match.""" + + +class _Agg(FilterExpression, abc.Collection): + """Combine multiple expressions.""" + + # child expressions + expr: typing.Set[FilterExpression] + + def __init__( + self, + *expr: typing.Union[FilterExpression, + typing.Iterable[FilterExpression], + typing.Iterator[FilterExpression]] + ): + # unfold arguments + unfolded = set(normalize_args(*expr)) + # check type + if not all(isinstance(e, FilterExpression) for e in unfolded): + raise TypeError(expr) + # assign member + self.expr = unfolded + + def __contains__(self, expr: typing.Any) -> bool: + """Return True if *expr* is among the child expressions.""" + return expr in self.expr + + def __iter__(self) -> typing.Iterator[FilterExpression]: + """Iterator over child expressions.""" + return iter(self.expr) + + def __len__(self) -> int: + """Number of child expressions.""" + return len(self.expr) + + def __repr__(self) -> str: + return f'{typename(self)}({self.expr})' + + def __hash__(self) -> int: + return hash((super().__hash__(), tuple(self.expr))) # FIXME: Unique hash of different orders over self.expr + + def __eq__(self, other) -> bool: + return super().__eq__(other) and self.expr == other.expr + + +class And(_Agg): + """All conditions match.""" + + +class Or(_Agg): + """At least one condition matches.""" + + +class Not(FilterExpression): + """Invert a statement.""" + + # child expression + expr: FilterExpression + + def __init__(self, expr: FilterExpression): + # check argument + if not isinstance(expr, FilterExpression): + raise TypeError(expr) + # assign member + self.expr = expr + + def __repr__(self) -> str: + return f'{typename(self)}({self.expr})' + + def __hash__(self) -> int: + return hash((super().__hash__(), self.expr)) + + def __eq__(self, other: typing.Any) -> bool: + return super().__eq__(other) and self.expr == other.expr + + +class Has(FilterExpression): + """Has predicate N times""" + + # predicate to follow. + predicate: PredicateExpression + + # target count + count: FilterExpression + + def __init__( + self, + predicate: typing.Union[PredicateExpression, URI], + count: typing.Optional[typing.Union[FilterExpression, int]] = None, + ): + # check predicate + if isinstance(predicate, URI): + predicate = Predicate(predicate) + elif not isinstance(predicate, PredicateExpression): + raise TypeError(predicate) + # check count + if count is None: + count = GreaterThan(1, strict=False) + elif isinstance(count, int): + count = Equals(count) + elif not isinstance(count, FilterExpression): + raise TypeError(count) + # assign members + self.predicate = predicate + self.count = count + + def __repr__(self) -> str: + return f'{typename(self)}({self.predicate}, {self.count})' + + def __hash__(self) -> int: + return hash((super().__hash__(), self.predicate, self.count)) + + def __eq__(self, other) -> bool: + return super().__eq__(other) \ + and self.predicate == other.predicate \ + and self.count == other.count + + +class _Value(FilterExpression): + """ + """ + + # target value. + value: typing.Any + + def __init__(self, value: typing.Any): + self.value = value + + def __repr__(self) -> str: + return f'{typename(self)}({self.value})' + + def __hash__(self) -> int: + return hash((super().__hash__(), self.value)) + + def __eq__(self, other) -> bool: + return super().__eq__(other) and self.value == other.value + + +class Is(_Value): + """Match the URI of a node.""" + + +class Equals(_Value): + """Value matches exactly. + NOTE: Value format must correspond to literal type; can be a string, a number, or a Node + """ + + +class Substring(_Value): + """Value matches a substring + NOTE: value format must be a string + """ + + +class StartsWith(_Value): + """Value begins with a given string.""" + + +class EndsWith(_Value): + """Value ends with a given string.""" + + +class _Bounded(FilterExpression): + """ + """ + + # bound. + threshold: float + + # closed (True) or open (False) bound. + strict: bool + + def __init__( + self, + threshold: float, + strict: bool = True, + ): + self.threshold = float(threshold) + self.strict = bool(strict) + + def __repr__(self) -> str: + return f'{typename(self)}({self.threshold}, {self.strict})' + + def __hash__(self) -> int: + return hash((super().__hash__(), self.threshold, self.strict)) + + def __eq__(self, other) -> bool: + return super().__eq__(other) \ + and self.threshold == other.threshold \ + and self.strict == other.strict + + + +class LessThan(_Bounded): + """Value is (strictly) smaller than threshold. + NOTE: only on numerical literals + """ + + +class GreaterThan(_Bounded): + """Value is (strictly) larger than threshold + NOTE: only on numerical literals + """ + + +class Predicate(PredicateExpression): + """A single predicate.""" + + # predicate URI + predicate: URI + + # reverse the predicate's direction + reverse: bool + + def __init__( + self, + predicate: URI, + reverse: typing.Optional[bool] = False, + ): + # check arguments + if not isinstance(predicate, URI): + raise TypeError(predicate) + # assign members + self.predicate = predicate + self.reverse = bool(reverse) + + def __repr__(self) -> str: + return f'{typename(self)}({self.predicate}, {self.reverse})' + + def __hash__(self) -> int: + return hash((super().__hash__(), self.predicate, self.reverse)) + + def __eq__(self, other) -> bool: + return super().__eq__(other) \ + and self.predicate == other.predicate \ + and self.reverse == other.reverse + + +class OneOf(PredicateExpression, abc.Collection): + """A set of predicate alternatives. + + The predicates' domains must be ascendants or descendants of each other. + The overall domain is the most specific one. + + The predicate's domains must be ascendants or descendants of each other. + The overall range is the most generic one. + """ + + # predicate alternatives + expr: typing.Set[PredicateExpression] + + def __init__(self, *expr: typing.Union[PredicateExpression, URI]): + # unfold arguments + unfolded = set(normalize_args(*expr)) # type: ignore [arg-type] # this is getting too complex... + # check arguments + if len(unfolded) == 0: + raise AttributeError('expected at least one expression, found none') + # ensure PredicateExpression + unfolded = {Predicate(e) if isinstance(e, URI) else e for e in unfolded} + # check type + if not all(isinstance(e, PredicateExpression) for e in unfolded): + raise TypeError(expr) + # assign member + self.expr = unfolded + + def __contains__(self, expr: typing.Any) -> bool: + """Return True if *expr* is among the child expressions.""" + return expr in self.expr + + def __iter__(self) -> typing.Iterator[PredicateExpression]: + """Iterator over child expressions.""" + return iter(self.expr) + + def __len__(self) -> int: + """Number of child expressions.""" + return len(self.expr) + + def __repr__(self) -> str: + return f'{typename(self)}({self.expr})' + + def __hash__(self) -> int: + return hash((super().__hash__(), tuple(self.expr))) # FIXME: Unique hash of different orders over self.expr + + def __eq__(self, other) -> bool: + return super().__eq__(other) and self.expr == other.expr + + +# Helpers + +def IsIn(*values): # pylint: disable=invalid-name # explicitly mimics an expression + """Match any of the given URIs.""" + return Or(Is(value) for value in normalize_args(*values)) + +def IsNotIn(*values): # pylint: disable=invalid-name # explicitly mimics an expression + """Match none of the given URIs.""" + return Not(IsIn(*values)) + ## EOF ## diff --git a/bsfs/query/validator.py b/bsfs/query/validator.py index 123b947..352203a 100644 --- a/bsfs/query/validator.py +++ b/bsfs/query/validator.py @@ -9,6 +9,8 @@ import typing # bsfs imports from bsfs import schema as bsc +from bsfs.namespace import ns +from bsfs.utils import errors, typename # inner-module imports from . import ast @@ -22,6 +24,18 @@ __all__ : typing.Sequence[str] = ( ## code ## class Filter(): + """Validate a `bsfs.query.ast.filter` query's structure and schema compliance. + + * Conditions (Bounded, Value) can only be applied on literals + * Branches, Id, and Has can only be applied on nodes + * Predicates' domain and range must match + * Predicate paths must follow the schema + * Referenced types are present in the schema + + """ + + # vertex types + T_VERTEX = typing.Union[bsc.Node, bsc.Literal] # FIXME: Shouldn't this be in the schema? # schema to validate against. schema: bsc.Schema @@ -29,180 +43,182 @@ class Filter(): def __init__(self, schema: bsc.Schema): self.schema = schema - def parse(self, node: ast.filter.FilterExpression, subject: bsc.types._Vertex): - # subject is a node type - if not isinstance(subject, bsc.Node): - raise errors.ConsistencyError(f'Expected a node, found {subject}') - # subject exists in the schema - if subject not in self.schema.nodes: - raise errors.ConsistencyError(f'Invalid node type {subject}') - # root expression is valid - self._parse(node, subject) + def __call__(self, root_type: bsc.Node, query: ast.filter.FilterExpression): + """Validate a filter *query*, assuming the subject having *root_type*. + + Raises a `bsfs.utils.errors.ConsistencyError` if the query violates the schema. + Raises a `bsfs.utils.errors.BackendError` if the query structure is invalid. + + """ + # root_type must be a schema.Node + if not isinstance(root_type, bsc.Node): + raise TypeError(f'Expected a node, found {typename(root_type)}') + # root_type must exist in the schema + if root_type not in self.schema.nodes(): + raise errors.ConsistencyError(f'{root_type} is not defined in the schema') + # check root expression + self._parse_filter_expression(root_type, query) # all tests passed return True - def _parse_numerical_expression(self, node: ast.filter.FilterExpression, subject: bsc.types._Vertex): - if isinstance(node, ast.filter.And): - return self._and(node, subject) - elif isinstance(node, ast.filter.Or): - return self._or(node, subject) - elif isinstance(node, ast.filter.LessThan): - return self._lessThan(node, subject) - elif isinstance(node, ast.filter.GreaterThan): - return self._greaterThan(node, subject) - elif isinstance(node, ast.filter.Equals): - return self._equals(node, subject, numerical=True) - else: - raise errors.ConsistencyError(f'Expected a numerical expression, found {node}') - - - def __branch(self, node: typing.Union[ast.filter.Any, ast.filter.And], subject: bsc.types._Vertex): - # subject is a node type - if not isinstance(subject, bsc.Node): - raise errors.ConsistencyError(f'Expected a node, found {subject}') - # subject exists in the schema - if subject not in self.schema.nodes: - raise errors.ConsistencyError(f'Invalid node type {subject}') - # predicate is valid - dom, rng = self._parse_predicate_expression(node.predicate) - # subject is a subtype of the predicate's domain - if not subject <= dom: - raise errors.ConsistencyError(f'Expected type {dom}, found {subject}') - # child expression is valid - self._parse_filter_expression(node.expr, rng) + ## routing methods + + def _parse_filter_expression(self, type_: T_VERTEX, node: ast.filter.FilterExpression): + """Route *node* to the handler of the respective FilterExpression subclass.""" + if isinstance(node, ast.filter.Is): + return self._is(type_, node) + if isinstance(node, ast.filter.Not): + return self._not(type_, node) + if isinstance(node, ast.filter.Has): + return self._has(type_, node) + if isinstance(node, (ast.filter.Any, ast.filter.All)): + return self._branch(type_, node) + if isinstance(node, (ast.filter.And, ast.filter.Or)): + return self._agg(type_, node) + if isinstance(node, (ast.filter.Equals, ast.filter.Substring, ast.filter.StartsWith, ast.filter.EndsWith)): + return self._value(type_, node) + if isinstance(node, (ast.filter.LessThan, ast.filter.GreaterThan)): + return self._bounded(type_, node) + # invalid node + raise errors.BackendError(f'expected filter expression, found {node}') + + def _parse_predicate_expression(self, node: ast.filter.PredicateExpression) -> typing.Tuple[T_VERTEX, T_VERTEX]: + """Route *node* to the handler of the respective PredicateExpression subclass.""" + if isinstance(node, ast.filter.Predicate): + return self._predicate(node) + if isinstance(node, ast.filter.OneOf): + return self._one_of(node) + # invalid node + raise errors.BackendError(f'expected predicate expression, found {node}') + + + ## predicate expressions + + def _predicate(self, node: ast.filter.Predicate) -> typing.Tuple[T_VERTEX, T_VERTEX]: + # predicate exists in the schema + if not self.schema.has_predicate(node.predicate): + raise errors.ConsistencyError(f'predicate {node.predicate} is not in the schema') + # determine domain and range + pred = self.schema.predicate(node.predicate) + dom, rng = pred.domain, pred.range + if rng is None: + # FIXME: It is a design error that Predicates can have a None range... + raise errors.BackendError(f'predicate {pred} has no range') + if node.reverse: + dom, rng = rng, dom # type: ignore [assignment] # variable re-use confuses mypy + # return domain and range + return dom, rng - def _any(self, node: ast.filter.Any, subject: bsc.types._Vertex): - return self.__branch(node, subject) + def _one_of(self, node: ast.filter.OneOf) -> typing.Tuple[T_VERTEX, T_VERTEX]: + # determine domain and range types + # NOTE: select the most specific domain and the most generic range + dom, rng = None, None + for pred in node: + # parse child expression + subdom, subrng = self._parse_predicate_expression(pred) + try: + # determine overall domain + if dom is None or subdom < dom: # pick most specific domain + dom = subdom + # domains must be related across all child expressions + if not subdom <= dom and not subdom >= dom: + raise errors.ConsistencyError(f'domains {subdom} and {dom} are not related') + except TypeError as err: # compared literal vs. node + raise errors.ConsistencyError(f'domains {subdom} and {dom} are not of the same type') from err - def _all(self, node: ast.filter.All, subject: bsc.types._Vertex): - return self.__branch(node, subject) + try: + # determine overall range + if rng is None or subrng > rng: # pick most generic range + rng = subrng + # ranges must be related across all child expressions + if not subrng <= rng and not subrng >= rng: + raise errors.ConsistencyError(f'ranges {subrng} and {rng} are not related') + except TypeError as err: # compared literal vs. node + raise errors.ConsistencyError(f'ranges {subrng} and {rng} are not of the same type') from err + # check domain and range + if dom is None or rng is None: + # OneOf guarantees at least one expression, these two cases cannot happen + raise errors.UnreachableError() + # return domain and range + return dom, rng - def __agg(self, node: typing.Union[ast.filter.And, ast.filter.Or], subject: bsc.types._Vertex): + ## intermediates + + def _branch(self, type_: T_VERTEX, node: ast.filter._Branch): + # type is a Node + if not isinstance(type_, bsc.Node): + raise errors.ConsistencyError(f'expected a Node, found {type_}') + # type exists in the schema + # FIXME: Isn't it actually guaranteed that the type (except the root type) is part of the schema? + # all types can be traced back to (a) root_type, (b) predicate, or (c) manually set (e.g. in _is). + # For (a), we do (and have to) perform a check. For (c), the code base should be consistent throughout + # the module, so this is an assumption that has to be ensured in schema.Schema. For (b), we know (and + # check) that the predicate is in the schema, hence all node/literals derived from it are also in the + # schema by construction of the schema.Schema class. So, why do we check this every time? + if type_ not in self.schema.nodes(): + raise errors.ConsistencyError(f'node {type_} is not in the schema') + # predicate is valid + dom, rng = self._parse_predicate_expression(node.predicate) + # type_ is a subtype of the predicate's domain + if not type_ <= dom: + raise errors.ConsistencyError(f'expected type {dom} or subtype thereof, found {type_}') + # child expression is valid + self._parse_filter_expression(rng, node.expr) + + def _agg(self, type_: T_VERTEX, node: ast.filter._Agg): for expr in node: # child expression is valid - self._parse_filter_expression(expr, subject) - - def _and(self, node: ast.filter.And, subject: bsc.types._Vertex): - return self.__agg(node, subject) - - def _or(self, node: ast.filter.Or, subject: bsc.types._Vertex): - return self.__agg(node, subject) - + self._parse_filter_expression(type_, expr) - def _not(self, node: ast.filter.Not, subject: bsc.types._Vertex): + def _not(self, type_: T_VERTEX, node: ast.filter.Not): # child expression is valid - self._parse_filter_expression(node.expr, subject) - - - def _has(self, node: ast.filter.Has, subject: bsc.types._Vertex): - # subject is a node type - if not isinstance(subject, bsc.Node): - raise errors.ConsistencyError(f'Expected a node, found {subject}') - # subject exists in the schema - if subject not in self.schema.nodes: - raise errors.ConsistencyError(f'Invalid node type {subject}') + self._parse_filter_expression(type_, node.expr) + + def _has(self, type_: T_VERTEX, node: ast.filter.Has): + # type is a Node + if not isinstance(type_, bsc.Node): + raise errors.ConsistencyError(f'expected a Node, found {type_}') + # type exists in the schema + if type_ not in self.schema.nodes(): + raise errors.ConsistencyError(f'node {type_} is not in the schema') # predicate is valid - dom, rng = self._parse_predicate_expression(node.predicate) - # subject is a subtype of the predicate's domain - if not subject <= dom: - raise errors.ConsistencyError(f'Expected type {dom}, found {subject}') + dom, _= self._parse_predicate_expression(node.predicate) + # type_ is a subtype of the predicate's domain + if not type_ <= dom: + raise errors.ConsistencyError(f'expected type {dom}, found {type_}') # node.count is a numerical expression - self._parse_numerical_expression(node.count, self.schema.literal(ns.xsd.numerical)) - - - def _equals(self, node: ast.filter.Equals, subject: bsc.types._Vertex, numerical: bool = False): - # subject is a literal - #if not isinstance(subject, bsc.Literal): - # raise errors.ConsistencyError(f'Expected a literal, found {subject}') - if isinstance(subject, bsc.Node): - # FIXME: How to handle this case? - # FIXME: How to check if a NodeType is acceptable? - # FIXME: Maybe use flags to control what is expected as node identifiers? - from bsfs.graph.nodes import Nodes # FIXME - if not isinstance(node.value, Nodes) and not isinstance(node.value, URI): - raise errors.ConsistencyError(f'Expected a Nodes or URI, found {node.value}') - elif isinstance(subject, bsc.Literal): - # literal exists in the schema - if subject not in self.schema.literals: - raise errors.ConsistencyError(f'Invalid literal type {subject}') - else: - # FIXME: - raise errors.ConsistencyError(f'Expected a literal, found {subject}') - # node.value is numeric (if requested) - if numerical and not isinstance(node.value, float) and not isinstance(node.value, int): - raise errors.ConsistencyError(f'Expected a numerical value (int or float), found {node.value}') - # NOTE: We cannot check if node.value agrees with the subject since we don't know - # all literal types, their hierarchy, and how the backend converts datatypes. - - - def _substring(self, node: ast.filter.Substring, subject: bsc.types._Vertex): - # subject is a literal - if not isinstance(subject, bsc.Literal): - raise errors.ConsistencyError(f'Expected a literal, found {subject}') - # literal exists in the schema - if subject not in self.schema.literals: - raise errors.ConsistencyError(f'Invalid literal type {subject}') - # node.value matches literal datatype - if not subject.is_a(ns.xsd.string): - raise errors.ConsistencyError(f'Expected a string literal, found {subject}') - - - def _lessThan(self, node: ast.filter.LessThan, subject: bsc.types._Vertex): - # subject is a literal - if not isinstance(subject, bsc.Literal): - raise errors.ConsistencyError(f'Expected a literal, found {subject}') - # literal exists in the schema - if subject not in self.schema.literals: - raise errors.ConsistencyError(f'Invalid literal type {subject}') - # subject is numerical - if not subject.is_a(ns.xsd.numerical): - raise errors.ConsistencyError(f'Expected a numerical literal, found {subject}') - - - def _greaterThan(self, node: ast.filter.GreaterThan, subject: bsc.types._Vertex): - # subject is a literal - if not isinstance(subject, bsc.Literal): - raise errors.ConsistencyError(f'Expected a literal, found {subject}') - # literal exists in the schema - if subject not in self.schema.literals: - raise errors.ConsistencyError(f'Invalid literal type {subject}') - # subject is numerical - if not subject.is_a(ns.xsd.numerical): - raise errors.ConsistencyError(f'Expected a numerical literal, found {subject}') - - - def _predicate(self, node: ast.filter.Predicate): - try: - # predicate exists in the schema - pred = self.schema.predicate(node.predicate) - except KeyError: - raise errors.ConsistencyError(f'') # FIXME - if node.reverse: - return pred.range, pred.domain - else: - return pred.domain, pred.range - + # FIXME: We have to ensure that ns.xsd.integer is always known in the schema! + self._parse_filter_expression(self.schema.literal(ns.xsd.integer), node.count) + + + ## conditions + + def _is(self, type_: T_VERTEX, node: ast.filter.Is): # pylint: disable=unused-argument # (node) + if not isinstance(type_, bsc.Node): + raise errors.ConsistencyError(f'expected a Node, found {type_}') + if type_ not in self.schema.nodes(): + raise errors.ConsistencyError(f'node {type_} is not in the schema') + + def _value(self, type_: T_VERTEX, node: ast.filter._Value): # pylint: disable=unused-argument # (node) + # type is a literal + if not isinstance(type_, bsc.Literal): + raise errors.ConsistencyError(f'expected a Literal, found {type_}') + # type exists in the schema + if type_ not in self.schema.literals(): + raise errors.ConsistencyError(f'literal {type_} is not in the schema') + # FIXME: Check if node.value corresponds to type_ + # FIXME: A specific literal might be requested (i.e., a numeric type when used in Has) + + def _bounded(self, type_: T_VERTEX, node: ast.filter._Bounded): # pylint: disable=unused-argument # (node) + # type is a literal + if not isinstance(type_, bsc.Literal): + raise errors.ConsistencyError(f'expected a Literal, found {type_}') + # type exists in the schema + if type_ not in self.schema.literals(): + raise errors.ConsistencyError(f'literal {type_} is not in the schema') + # FIXME: Check if node.value corresponds to type_ - def _oneOf(self, node: ast.filter.OneOf): - dom, rng = None, None - for pred in node: - try: - # parse child expression - subdom, subrng = self._parse_predicate_expression(pred) - # domain and range must be related across all child expressions - if not subdom <= dom and not subdom >= dom: - raise errors.ConsistencyError(f'') # FIXME - if not subrng <= rng and not subrng >= rng: - raise errors.ConsistencyError(f'') # FIXME - # determine overall domain and range - if dom is None or subdom < dom: # pick most specific domain - dom = subdom - if rng is None or subrng > rng: # pick most generic range - rng = subrng - except KeyError: - raise errors.ConsistencyError(f'') - return dom, rng ## EOF ## diff --git a/bsfs/utils/__init__.py b/bsfs/utils/__init__.py index 94680ee..6737cef 100644 --- a/bsfs/utils/__init__.py +++ b/bsfs/utils/__init__.py @@ -9,7 +9,7 @@ import typing # inner-module imports from . import errors -from .commons import typename +from .commons import typename, normalize_args from .uri import URI from .uuid import UUID, UCID @@ -19,6 +19,7 @@ __all__ : typing.Sequence[str] = ( 'URI', 'UUID', 'errors', + 'normalize_args', 'typename', ) diff --git a/bsfs/utils/commons.py b/bsfs/utils/commons.py index bad2fe0..e9f0b7f 100644 --- a/bsfs/utils/commons.py +++ b/bsfs/utils/commons.py @@ -5,10 +5,12 @@ A copy of the license is provided with the project. Author: Matthias Baumgartner, 2022 """ # imports +from collections import abc import typing # exports __all__: typing.Sequence[str] = ( + 'normalize_args', 'typename', ) @@ -19,5 +21,37 @@ def typename(obj) -> str: """Return the type name of *obj*.""" return type(obj).__name__ +# argument type in `normalize_args`. +ArgType = typing.TypeVar('ArgType') # pylint: disable=invalid-name # type vars don't follow the usual convention + +def normalize_args( + *args: typing.Union[ArgType, typing.Iterable[ArgType], typing.Iterator[ArgType]] + ) -> typing.Tuple[ArgType, ...]: + """Arguments to a function can be passed as individual arguments, list-like + structures, or iterables. This function processes any of these styles and + returns a tuple of the respective items. Typically used within a function + provide a flexible interface but sill have parameters in a normalized form. + + Examples: + + >>> normalize_args(0,1,2) + (1,2,3) + >>> normalize_args([0,1,2]) + (1,2,3) + >>> normalize_args(range(3)) + (1,2,3) + + """ + if len(args) == 0: # foo() + return tuple() + if len(args) > 1: # foo(0, 1, 2) + return tuple(args) # type: ignore [arg-type] # we assume that argument styles (arg vs. iterable) are not mixed. + if isinstance(args[0], abc.Iterator): # foo(iter([0,1,2])) + return tuple(args[0]) + if isinstance(args[0], abc.Iterable) and not isinstance(args[0], str): # foo([0, 1, 2]) + return tuple(args[0]) + # foo(0) + return (args[0], ) # type: ignore [return-value] # if args[0] is a str, we assume that ArgType was str. + ## EOF ## diff --git a/bsfs/utils/errors.py b/bsfs/utils/errors.py index c5e8e16..be9d40e 100644 --- a/bsfs/utils/errors.py +++ b/bsfs/utils/errors.py @@ -38,4 +38,7 @@ class UnreachableError(ProgrammingError): class ConfigError(_BSFSError): """User config issue.""" +class BackendError(_BSFSError): + """Could not parse an AST structure.""" + ## EOF ## diff --git a/test/query/ast/test_filter_.py b/test/query/ast/test_filter_.py index cc815e3..4f69bdc 100644 --- a/test/query/ast/test_filter_.py +++ b/test/query/ast/test_filter_.py @@ -8,16 +8,468 @@ Author: Matthias Baumgartner, 2022 import unittest # bsfs imports +from bsfs.namespace import ns +from bsfs.utils import URI # objects to test -from bsfs.query.ast.filter_ import _Expression +from bsfs.query.ast.filter_ import _Expression, FilterExpression, PredicateExpression +from bsfs.query.ast.filter_ import _Branch, Any, All +from bsfs.query.ast.filter_ import _Agg, And, Or +from bsfs.query.ast.filter_ import Not, Has +from bsfs.query.ast.filter_ import _Value, Is, Equals, Substring, StartsWith, EndsWith +from bsfs.query.ast.filter_ import _Bounded, LessThan, GreaterThan +from bsfs.query.ast.filter_ import Predicate, OneOf +from bsfs.query.ast.filter_ import IsIn, IsNotIn ## code ## class TestExpression(unittest.TestCase): def test_essentials(self): - raise NotImplementedError() + # comparison + self.assertEqual(_Expression(), _Expression()) + self.assertEqual(FilterExpression(), FilterExpression()) + self.assertEqual(PredicateExpression(), PredicateExpression()) + self.assertEqual(hash(_Expression()), hash(_Expression())) + self.assertEqual(hash(FilterExpression()), hash(FilterExpression())) + self.assertEqual(hash(PredicateExpression()), hash(PredicateExpression())) + # comparison respects type + self.assertNotEqual(FilterExpression(), _Expression()) + self.assertNotEqual(_Expression(), PredicateExpression()) + self.assertNotEqual(PredicateExpression(), FilterExpression()) + self.assertNotEqual(hash(FilterExpression()), hash(_Expression())) + self.assertNotEqual(hash(_Expression()), hash(PredicateExpression())) + self.assertNotEqual(hash(PredicateExpression()), hash(FilterExpression())) + # string conversion + self.assertEqual(str(_Expression()), '_Expression()') + self.assertEqual(str(FilterExpression()), 'FilterExpression()') + self.assertEqual(str(PredicateExpression()), 'PredicateExpression()') + self.assertEqual(repr(_Expression()), '_Expression()') + self.assertEqual(repr(FilterExpression()), 'FilterExpression()') + self.assertEqual(repr(PredicateExpression()), 'PredicateExpression()') + + +class TestBranch(unittest.TestCase): # _Branch, Any, All + def test_essentials(self): + pred = PredicateExpression() + expr = FilterExpression() + + # comparison respects type + self.assertNotEqual(_Branch(pred, expr), Any(pred, expr)) + self.assertNotEqual(Any(pred, expr), All(pred, expr)) + self.assertNotEqual(All(pred, expr), _Branch(pred, expr)) + self.assertNotEqual(hash(_Branch(pred, expr)), hash(Any(pred, expr))) + self.assertNotEqual(hash(Any(pred, expr)), hash(All(pred, expr))) + self.assertNotEqual(hash(All(pred, expr)), hash(_Branch(pred, expr))) + + for cls in (_Branch, Any, All): + # comparison + self.assertEqual(cls(pred, expr), cls(pred, expr)) + self.assertEqual(hash(cls(pred, expr)), hash(cls(pred, expr))) + # comparison respects predicate + self.assertNotEqual(cls(ns.bse.filename, expr), cls(ns.bse.filesize, expr)) + self.assertNotEqual(hash(cls(ns.bse.filename, expr)), hash(cls(ns.bse.filesize, expr))) + # comparison respects expression + self.assertNotEqual(cls(pred, Equals('hello')), cls(pred, Equals('world'))) + self.assertNotEqual(hash(cls(pred, Equals('hello'))), hash(cls(pred, Equals('world')))) + + # string conversion + self.assertEqual(str(_Branch(pred, expr)), f'_Branch({pred}, {expr})') + self.assertEqual(repr(_Branch(pred, expr)), f'_Branch({pred}, {expr})') + self.assertEqual(str(Any(pred, expr)), f'Any({pred}, {expr})') + self.assertEqual(repr(Any(pred, expr)), f'Any({pred}, {expr})') + self.assertEqual(str(All(pred, expr)), f'All({pred}, {expr})') + self.assertEqual(repr(All(pred, expr)), f'All({pred}, {expr})') + + def test_members(self): + class Foo(): pass + pred = PredicateExpression() + expr = FilterExpression() + + for cls in (_Branch, Any, All): + # predicate returns member + self.assertEqual(cls(PredicateExpression(), expr).predicate, PredicateExpression()) + # can pass an URI + self.assertEqual(cls(ns.bse.filename, expr).predicate, Predicate(ns.bse.filename)) + # can pass a PredicateExpression + self.assertEqual(cls(Predicate(ns.bse.filename), expr).predicate, Predicate(ns.bse.filename)) + # must pass an URI or PredicateExpression + self.assertRaises(TypeError, cls, Foo(), expr) + # expression returns member + self.assertEqual(cls(pred, Equals('hello')).expr, Equals('hello')) + # expression must be a FilterExpression + self.assertRaises(TypeError, cls, ns.bse.filename, 'hello') + self.assertRaises(TypeError, cls, ns.bse.filename, 1234) + self.assertRaises(TypeError, cls, ns.bse.filename, Foo()) + + +class TestAgg(unittest.TestCase): # _Agg, And, Or + def test_essentials(self): + expr = {Equals('hello'), Equals('world')} + + # comparison respects type + self.assertNotEqual(_Agg(expr), And(expr)) + self.assertNotEqual(And(expr), Or(expr)) + self.assertNotEqual(Or(expr), _Agg(expr)) + self.assertNotEqual(hash(_Agg(expr)), hash(And(expr))) + self.assertNotEqual(hash(And(expr)), hash(Or(expr))) + self.assertNotEqual(hash(Or(expr)), hash(_Agg(expr))) + + for cls in (_Agg, And, Or): + # comparison + self.assertEqual(cls(expr), cls(expr)) + self.assertEqual(hash(cls(expr)), hash(cls(expr))) + # comparison respects expression + self.assertNotEqual(cls(expr), cls(Equals('world'))) + self.assertNotEqual(hash(cls(expr)), hash(cls(Equals('world')))) + self.assertNotEqual(cls(Equals('hello')), cls(Equals('world'))) + self.assertNotEqual(hash(cls(Equals('hello'))), hash(cls(Equals('world')))) + + # string conversion + self.assertEqual(str(_Agg(Equals('hello'))), '_Agg({Equals(hello)})') + self.assertEqual(repr(_Agg(Equals('hello'))), '_Agg({Equals(hello)})') + self.assertEqual(str(And(Equals('hello'))), 'And({Equals(hello)})') + self.assertEqual(repr(And(Equals('hello'))), 'And({Equals(hello)})') + self.assertEqual(str(Or(Equals('hello'))), 'Or({Equals(hello)})') + self.assertEqual(repr(Or(Equals('hello'))), 'Or({Equals(hello)})') + + def test_expression(self): + class Foo(): pass + + for cls in (_Agg, And, Or): + # can pass expressions as arguments + self.assertSetEqual(cls(Equals('hello'), Equals('world')).expr, {Equals('hello'), Equals('world')}) + # can pass one expressions as argument + self.assertSetEqual(cls(Equals('hello')).expr, {Equals('hello')}) + # can pass expressions as iterator + self.assertSetEqual(cls(iter((Equals('hello'), Equals('world')))).expr, {Equals('hello'), Equals('world')}) + # can pass expressions as generator + def gen(): + yield Equals('hello') + yield Equals('world') + self.assertSetEqual(cls(gen()).expr, {Equals('hello'), Equals('world')}) + # can pass expressions as list-like + self.assertSetEqual(cls((Equals('hello'), Equals('world'))).expr, {Equals('hello'), Equals('world')}) + # can pass one expression as list-like + self.assertSetEqual(cls([Equals('hello')]).expr, {Equals('hello')}) + # must pass expressions + self.assertRaises(TypeError, cls, Foo(), Foo()) + self.assertRaises(TypeError, cls, [Foo(), Foo()]) + + # iter + self.assertSetEqual(set(iter(cls(Equals('hello'), Equals('world')))), {Equals('hello'), Equals('world')}) + # contains + self.assertIn(Equals('world'), cls(Equals('hello'), Equals('world'))) + self.assertNotIn(Equals('foo'), cls(Equals('hello'), Equals('world'))) + # len + self.assertEqual(len(cls(Equals('hello'), Equals('world'))), 2) + self.assertEqual(len(cls(Equals('hello'), Equals('world'), Equals('foo'))), 3) + + + +class TestNot(unittest.TestCase): + def test_essentials(self): + expr = FilterExpression() + # comparison + self.assertEqual(Not(expr), Not(expr)) + self.assertEqual(hash(Not(expr)), hash(Not(expr))) + # comparison respects type + self.assertNotEqual(Not(expr), FilterExpression()) + self.assertNotEqual(hash(Not(expr)), hash(FilterExpression())) + # comparison respects expression + self.assertNotEqual(Not(Equals('hello')), Not(Equals('world'))) + self.assertNotEqual(hash(Not(Equals('hello'))), hash(Not(Equals('world')))) + # string conversion + self.assertEqual(str(Not(Equals('hello'))), 'Not(Equals(hello))') + self.assertEqual(repr(Not(Equals('hello'))), 'Not(Equals(hello))') + + def test_expression(self): + # Not requires an expression argument + self.assertRaises(TypeError, Not) + # expression must be a FilterExpression + self.assertRaises(TypeError, Not, 'hello') + self.assertRaises(TypeError, Not, 1234) + self.assertRaises(TypeError, Not, Predicate(ns.bse.filesize)) + # member returns expression + self.assertEqual(Not(Equals('hello')).expr, Equals('hello')) + + +class TestHas(unittest.TestCase): + def test_essentials(self): + pred = PredicateExpression() + count = FilterExpression() + # comparison + self.assertEqual(Has(pred, count), Has(pred, count)) + self.assertEqual(hash(Has(pred, count)), hash(Has(pred, count))) + # comparison respects type + self.assertNotEqual(Has(pred, count), FilterExpression()) + self.assertNotEqual(hash(Has(pred, count)), hash(FilterExpression())) + # comparison respects predicate + self.assertNotEqual(Has(pred, count), Has(Predicate(ns.bse.filesize), count)) + self.assertNotEqual(hash(Has(pred, count)), hash(Has(Predicate(ns.bse.filesize), count))) + # comparison respects count + self.assertNotEqual(Has(pred, count), Has(pred, LessThan(5))) + self.assertNotEqual(hash(Has(pred, count)), hash(Has(pred, LessThan(5)))) + # string conversion + self.assertEqual(str(Has(Predicate(ns.bse.filesize), LessThan(5))), + f'Has(Predicate({ns.bse.filesize}, False), LessThan(5.0, True))') + self.assertEqual(repr(Has(Predicate(ns.bse.filesize), LessThan(5))), + f'Has(Predicate({ns.bse.filesize}, False), LessThan(5.0, True))') + + def test_members(self): + pred = PredicateExpression() + count = FilterExpression() + # member returns expression + # predicate must be an URI or a PredicateExpression + self.assertEqual(Has(ns.bse.filesize, count).predicate, Predicate(ns.bse.filesize)) + self.assertEqual(Has(Predicate(ns.bse.filesize), count).predicate, Predicate(ns.bse.filesize)) + self.assertRaises(TypeError, Has, 1234, FilterExpression()) + self.assertRaises(TypeError, Has, FilterExpression(), FilterExpression()) + # member returns count + # count must be None, an integer, or a FilterExpression + self.assertEqual(Has(pred).count, GreaterThan(1, False)) + self.assertEqual(Has(pred, LessThan(5)).count, LessThan(5)) + self.assertEqual(Has(pred, 5).count, Equals(5)) + self.assertRaises(TypeError, Has, pred, 'hello') + self.assertRaises(TypeError, Has, pred, Predicate(ns.bse.filesize)) + + + +class TestValue(unittest.TestCase): + def test_essentials(self): + # comparison respects type + self.assertNotEqual(_Value('hello'), Equals('hello')) + self.assertNotEqual(Equals('hello'), Is('hello')) + self.assertNotEqual(Is('hello'), Substring('hello')) + self.assertNotEqual(Substring('hello'), StartsWith('hello')) + self.assertNotEqual(StartsWith('hello'), EndsWith('hello')) + self.assertNotEqual(EndsWith('hello'), _Value('hello')) + self.assertNotEqual(hash(_Value('hello')), hash(Equals('hello'))) + self.assertNotEqual(hash(Equals('hello')), hash(Is('hello'))) + self.assertNotEqual(hash(Is('hello')), hash(Substring('hello'))) + self.assertNotEqual(hash(Substring('hello')), hash(StartsWith('hello'))) + self.assertNotEqual(hash(StartsWith('hello')), hash(EndsWith('hello'))) + self.assertNotEqual(hash(EndsWith('hello')), hash(_Value('hello'))) + + for cls in (_Value, Is, Equals, Substring, StartsWith, EndsWith): + # comparison + self.assertEqual(cls('hello'), cls('hello')) + self.assertEqual(hash(cls('hello')), hash(cls('hello'))) + # comparison respects value + self.assertNotEqual(cls('hello'), cls('world')) + self.assertNotEqual(hash(cls('hello')), hash(cls('world'))) + + # string conversion + self.assertEqual(str(_Value('hello')), '_Value(hello)') + self.assertEqual(repr(_Value('hello')), '_Value(hello)') + self.assertEqual(str(Is('hello')), 'Is(hello)') + self.assertEqual(repr(Is('hello')), 'Is(hello)') + self.assertEqual(str(Equals('hello')), 'Equals(hello)') + self.assertEqual(repr(Equals('hello')), 'Equals(hello)') + self.assertEqual(str(Substring('hello')), 'Substring(hello)') + self.assertEqual(repr(Substring('hello')), 'Substring(hello)') + self.assertEqual(str(StartsWith('hello')), 'StartsWith(hello)') + self.assertEqual(repr(StartsWith('hello')), 'StartsWith(hello)') + self.assertEqual(str(EndsWith('hello')), 'EndsWith(hello)') + self.assertEqual(repr(EndsWith('hello')), 'EndsWith(hello)') + + def test_value(self): + class Foo(): pass + for cls in (_Value, Is, Equals, Substring, StartsWith, EndsWith): + # value can be anything + # value returns member + f = Foo() + self.assertEqual(cls('hello').value, 'hello') + self.assertEqual(cls(1234).value, 1234) + self.assertEqual(cls(f).value, f) + + +class TestBounded(unittest.TestCase): + def test_essentials(self): + # comparison respects type + self.assertNotEqual(_Bounded(1234), LessThan(1234)) + self.assertNotEqual(LessThan(1234), GreaterThan(1234)) + self.assertNotEqual(GreaterThan(1234), _Bounded(1234)) + self.assertNotEqual(hash(_Bounded(1234)), hash(LessThan(1234))) + self.assertNotEqual(hash(LessThan(1234)), hash(GreaterThan(1234))) + self.assertNotEqual(hash(GreaterThan(1234)), hash(_Bounded(1234))) + + for cls in (_Bounded, LessThan, GreaterThan): + # comparison + self.assertEqual(cls(1234), cls(1234)) + self.assertEqual(hash(cls(1234)), hash(cls(1234))) + # comparison respects threshold + self.assertNotEqual(cls(1234), cls(4321)) + self.assertNotEqual(hash(cls(1234)), hash(cls(4321))) + # comparison respects strict + self.assertNotEqual(cls(1234, True), cls(1234, False)) + self.assertNotEqual(hash(cls(1234, True)), hash(cls(1234, False))) + + # string conversion + self.assertEqual(str(_Bounded(1234, False)), '_Bounded(1234.0, False)') + self.assertEqual(repr(_Bounded(1234, False)), '_Bounded(1234.0, False)') + self.assertEqual(str(LessThan(1234, False)), 'LessThan(1234.0, False)') + self.assertEqual(repr(LessThan(1234, False)), 'LessThan(1234.0, False)') + self.assertEqual(str(GreaterThan(1234, False)), 'GreaterThan(1234.0, False)') + self.assertEqual(repr(GreaterThan(1234, False)), 'GreaterThan(1234.0, False)') + + def test_members(self): + class Foo(): pass + for cls in (_Bounded, LessThan, GreaterThan): + # threshold becomes float + self.assertEqual(cls(1.234).threshold, 1.234) + self.assertEqual(cls(1234).threshold, 1234.0) + self.assertEqual(cls('1234').threshold, 1234) + self.assertRaises(TypeError, cls, Foo()) + # strict becomes bool + self.assertEqual(cls(1234, True).strict, True) + self.assertEqual(cls(1234, False).strict, False) + self.assertEqual(cls(1234, Foo()).strict, True) + + +class TestPredicate(unittest.TestCase): + def test_essentials(self): + # comparison + self.assertEqual(Predicate(ns.bse.filesize), Predicate(ns.bse.filesize)) + self.assertEqual(hash(Predicate(ns.bse.filesize)), hash(Predicate(ns.bse.filesize))) + # comparison respects type + self.assertNotEqual(Predicate(ns.bse.filesize), PredicateExpression()) + self.assertNotEqual(hash(Predicate(ns.bse.filesize)), hash(PredicateExpression())) + # comparison respects predicate + self.assertNotEqual(Predicate(ns.bse.filesize), Predicate(ns.bse.filename)) + self.assertNotEqual(hash(Predicate(ns.bse.filesize)), hash(Predicate(ns.bse.filename))) + # comparison respects reverse + self.assertNotEqual(Predicate(ns.bse.filesize, True), Predicate(ns.bse.filesize, False)) + self.assertNotEqual(hash(Predicate(ns.bse.filesize, True)), hash(Predicate(ns.bse.filesize, False))) + # string conversion + self.assertEqual(str(Predicate(ns.bse.filesize)), f'Predicate({ns.bse.filesize}, False)') + self.assertEqual(str(Predicate(ns.bse.filesize, True)), + f'Predicate({ns.bse.filesize}, True)') + self.assertEqual(repr(Predicate(ns.bse.filesize)), f'Predicate({ns.bse.filesize}, False)') + self.assertEqual(repr(Predicate(ns.bse.filesize, True)), + f'Predicate({ns.bse.filesize}, True)') + + def test_members(self): + # member returns predicate + # predicate must be an URI + self.assertEqual(Predicate(ns.bse.filesize).predicate, ns.bse.filesize) + self.assertEqual(Predicate(URI('hello world')).predicate, URI('hello world')) + self.assertRaises(TypeError, Predicate, 1234) + self.assertRaises(TypeError, Predicate, FilterExpression()) + self.assertRaises(TypeError, Predicate, FilterExpression()) + # reverse becomes a boolean + self.assertEqual(Predicate(ns.bse.filesize, True).reverse, True) + self.assertEqual(Predicate(ns.bse.filesize, False).reverse, False) + self.assertEqual(Predicate(ns.bse.filesize, 'abc').reverse, True) + + +class TestOneOf(unittest.TestCase): + def test_essentials(self): + expr = {Predicate(ns.bse.filename), Predicate(ns.bse.filesize)} + # comparison + self.assertEqual(OneOf(expr), OneOf(expr)) + self.assertEqual(hash(OneOf(expr)), hash(OneOf(expr))) + # comparison respects type + self.assertNotEqual(OneOf(expr), PredicateExpression()) + self.assertNotEqual(hash(OneOf(expr)), hash(PredicateExpression())) + # comparison respects expression + self.assertNotEqual(OneOf(expr), OneOf(Predicate(ns.bse.filename))) + self.assertNotEqual(hash(OneOf(expr)), hash(OneOf(Predicate(ns.bse.filename)))) + # string conversion + self.assertEqual(str(OneOf(Predicate(ns.bse.filesize))), + f'OneOf({{Predicate({ns.bse.filesize}, False)}})') + self.assertEqual(repr(OneOf(Predicate(ns.bse.filesize))), + f'OneOf({{Predicate({ns.bse.filesize}, False)}})') + + def test_expression(self): + class Foo(): pass + # can pass expressions as arguments + self.assertSetEqual(OneOf(Predicate(ns.bse.filesize), Predicate(ns.bse.filename)).expr, + {Predicate(ns.bse.filesize), Predicate(ns.bse.filename)}) + # can pass one expressions as argument + self.assertSetEqual(OneOf(Predicate(ns.bse.filesize)).expr, + {Predicate(ns.bse.filesize)}) + # can pass expressions as iterator + self.assertSetEqual(OneOf(iter((Predicate(ns.bse.filesize), Predicate(ns.bse.filename)))).expr, + {Predicate(ns.bse.filesize), Predicate(ns.bse.filename)}) + # can pass expressions as generator + def gen(): + yield Predicate(ns.bse.filesize) + yield Predicate(ns.bse.filename) + self.assertSetEqual(OneOf(gen()).expr, + {Predicate(ns.bse.filesize), Predicate(ns.bse.filename)}) + # can pass expressions as list-like + self.assertSetEqual(OneOf((Predicate(ns.bse.filesize), Predicate(ns.bse.filename))).expr, + {Predicate(ns.bse.filesize), Predicate(ns.bse.filename)}) + # can pass one expression as list-like + self.assertSetEqual(OneOf([Predicate(ns.bse.filesize)]).expr, + {Predicate(ns.bse.filesize)}) + # must pass expressions + self.assertRaises(TypeError, OneOf, Foo(), Foo()) + self.assertRaises(TypeError, OneOf, [Foo(), Foo()]) + # must pass at least one expression + self.assertRaises(AttributeError, OneOf) + + # iter + self.assertSetEqual(set(iter(OneOf(Predicate(ns.bse.filesize), Predicate(ns.bse.filename)))), + {Predicate(ns.bse.filesize), Predicate(ns.bse.filename)}) + # contains + self.assertIn(Predicate(ns.bse.filesize), + OneOf(Predicate(ns.bse.filesize), Predicate(ns.bse.filename))) + self.assertNotIn(Predicate(ns.bse.tag), + OneOf(Predicate(ns.bse.filesize), Predicate(ns.bse.filename))) + # len + self.assertEqual(len(OneOf(Predicate(ns.bse.filesize), Predicate(ns.bse.filename))), 2) + self.assertEqual(len(OneOf(Predicate(ns.bse.filesize), Predicate(ns.bse.filename), Predicate(ns.bse.tag))), 3) + + + def testIsIn(self): + # can pass expressions as arguments + self.assertEqual(IsIn('http://example.com/entity#1234', 'http://example.com/entity#4321'), + Or(Is('http://example.com/entity#1234'), Is('http://example.com/entity#4321'))) + # can pass one expression as argument + self.assertEqual(IsIn('http://example.com/entity#1234'), + Or(Is('http://example.com/entity#1234'))) + # can pass expressions as iterator + self.assertEqual(IsIn(iter(('http://example.com/entity#1234', 'http://example.com/entity#4321'))), + Or(Is('http://example.com/entity#1234'), Is('http://example.com/entity#4321'))) + # can pass expressions as generator + def gen(): + yield 'http://example.com/entity#1234' + yield 'http://example.com/entity#4321' + self.assertEqual(IsIn(gen()), + Or(Is('http://example.com/entity#1234'), Is('http://example.com/entity#4321'))) + # can pass expressions as list-like + self.assertEqual(IsIn(['http://example.com/entity#1234', 'http://example.com/entity#4321']), + Or(Is('http://example.com/entity#1234'), Is('http://example.com/entity#4321'))) + # can pass one expression as list-like + self.assertEqual(IsIn(['http://example.com/entity#1234']), + Or(Is('http://example.com/entity#1234'))) + + + def testIsNotIn(self): + # can pass expressions as arguments + self.assertEqual(IsNotIn('http://example.com/entity#1234', 'http://example.com/entity#4321'), + Not(Or(Is('http://example.com/entity#1234'), Is('http://example.com/entity#4321')))) + # can pass one expression as argument + self.assertEqual(IsNotIn('http://example.com/entity#1234'), + Not(Or(Is('http://example.com/entity#1234')))) + # can pass expressions as iterator + self.assertEqual(IsNotIn(iter(('http://example.com/entity#1234', 'http://example.com/entity#4321'))), + Not(Or(Is('http://example.com/entity#1234'), Is('http://example.com/entity#4321')))) + # can pass expressions as generator + def gen(): + yield 'http://example.com/entity#1234' + yield 'http://example.com/entity#4321' + self.assertEqual(IsNotIn(gen()), + Not(Or(Is('http://example.com/entity#1234'), Is('http://example.com/entity#4321')))) + # can pass expressions as list-like + self.assertEqual(IsNotIn(['http://example.com/entity#1234', 'http://example.com/entity#4321']), + Not(Or(Is('http://example.com/entity#1234'), Is('http://example.com/entity#4321')))) + # can pass one expression as list-like + self.assertEqual(IsNotIn(['http://example.com/entity#1234']), + Not(Or(Is('http://example.com/entity#1234')))) + ## main ## diff --git a/test/query/test_validator.py b/test/query/test_validator.py index 0e88ad3..4f8364a 100644 --- a/test/query/test_validator.py +++ b/test/query/test_validator.py @@ -8,6 +8,10 @@ Author: Matthias Baumgartner, 2022 import unittest # bsfs imports +from bsfs import schema as _schema +from bsfs.namespace import ns +from bsfs.query import ast +from bsfs.utils import errors # objects to test from bsfs.query.validator import Filter @@ -16,10 +20,237 @@ from bsfs.query.validator import Filter ## code ## class TestFilter(unittest.TestCase): - def test_parse(self): - raise NotImplementedError() + def setUp(self): + self.schema = _schema.Schema.from_string(''' + prefix rdfs: + prefix xsd: + + prefix bsfs: + prefix bse: + + bsfs:Entity rdfs:subClassOf bsfs:Node . + bsfs:URI rdfs:subClassOf bsfs:Literal . + + bsfs:Tag rdfs:subClassOf bsfs:Node . + xsd:string rdfs:subClassOf bsfs:Literal . + xsd:integer rdfs:subClassOf bsfs:Literal . + + bse:comment rdfs:subClassOf bsfs:Predicate ; + rdfs:domain bsfs:Node ; + rdfs:range xsd:string ; + bsfs:unique "false"^^xsd:boolean . + + bse:filesize rdfs:subClassOf bsfs:Predicate ; + rdfs:domain bsfs:Entity ; + rdfs:range xsd:integer ; + bsfs:unique "true"^^xsd:boolean . + + bse:tag rdfs:subClassOf bsfs:Predicate ; + rdfs:domain bsfs:Entity ; + rdfs:range bsfs:Tag ; + bsfs:unique "false"^^xsd:boolean . + + bse:label rdfs:subClassOf bsfs:Predicate ; + rdfs:domain bsfs:Tag ; + rdfs:range xsd:string ; + bsfs:unique "true"^^xsd:boolean . + + bse:buddy rdfs:subClassOf bsfs:Predicate ; + rdfs:domain bsfs:Entity ; + rdfs:range bsfs:Node ; + bsfs:unique "false"^^xsd:boolean . + + ''') + self.validate = Filter(self.schema) + + def test_call(self): + # root_type must be a _schema.Node + self.assertRaises(TypeError, self.validate, 1234, None) + self.assertRaises(TypeError, self.validate, '1234', None) + self.assertRaises(TypeError, self.validate, self.schema.literal(ns.bsfs.URI), None) + # root_type must exist in the schema + self.assertRaises(errors.ConsistencyError, self.validate, self.schema.node(ns.bsfs.Node).get_child(ns.bsfs.Image), None) + self.assertRaises(errors.ConsistencyError, self.validate, self.schema.node(ns.bsfs.Entity).get_child(ns.bsfs.Image), None) + # valid query returns true + self.assertTrue(self.validate(self.schema.node(ns.bsfs.Entity), + ast.filter.Any(ast.filter.OneOf(ns.bse.tag, ns.bse.buddy), + ast.filter.Or( + ast.filter.Is('http://example.com/symbol#1234'), + ast.filter.All(ns.bse.comment, ast.filter.StartsWith('foo')), + ast.filter.And( + ast.filter.Has(ns.bse.comment, ast.filter.Or( + ast.filter.GreaterThan(5), + ast.filter.LessThan(1), + ) + ), + ast.filter.Not(ast.filter.Any(ns.bse.comment, + ast.filter.Not(ast.filter.Equals('hello world')))), + ))))) + # invalid paths raise consistency error + self.assertRaises(errors.ConsistencyError, self.validate, self.schema.node(ns.bsfs.Entity), + ast.filter.Any(ast.filter.OneOf(ns.bse.tag, ns.bse.buddy), + ast.filter.Or( + ast.filter.All(ns.bse.comment, ast.filter.Equals('hello world')), + ast.filter.All(ns.bse.label, ast.filter.Equals('hello world')), # domain mismatch + ))) + + def test_routing(self): + self.assertRaises(errors.BackendError, self.validate._parse_filter_expression, ast.filter.FilterExpression(), self.schema.node(ns.bsfs.Node)) + self.assertRaises(errors.BackendError, self.validate._parse_predicate_expression, ast.filter.PredicateExpression()) + + def test_predicate(self): + # predicate must be in the schema + self.assertRaises(errors.ConsistencyError, self.validate._predicate, ast.filter.Predicate(ns.bse.invalid)) + # predicate must have a range + self.assertRaises(errors.BackendError, self.validate._predicate, ast.filter.Predicate(ns.bsfs.Predicate)) + # predicate returns domain and range + self.assertEqual(self.validate._predicate(ast.filter.Predicate(ns.bse.tag)), + (self.schema.node(ns.bsfs.Entity), self.schema.node(ns.bsfs.Tag))) + # reverse is applied + self.assertEqual(self.validate._predicate(ast.filter.Predicate(ns.bse.tag, reverse=True)), + (self.schema.node(ns.bsfs.Tag), self.schema.node(ns.bsfs.Entity))) + + def test_one_of(self): + # domains must both be nodes or literals + self.assertRaises(errors.ConsistencyError, self.validate._one_of, ast.filter.OneOf(ns.bse.tag, ast.filter.Predicate(ns.bse.label, reverse=True))) + # domains must be related + self.assertRaises(errors.ConsistencyError, self.validate._one_of, ast.filter.OneOf(ns.bse.tag, ns.bse.label)) + # ranges must both be nodes or literals + self.assertRaises(errors.ConsistencyError, self.validate._one_of, ast.filter.OneOf(ns.bse.tag, ns.bse.comment)) + # ranges must be related + self.assertRaises(errors.ConsistencyError, self.validate._one_of, ast.filter.OneOf(ns.bse.tag, ast.filter.Predicate(ns.bse.buddy, reverse=True))) + # one_of returns most specific domain + self.assertEqual(self.validate._one_of(ast.filter.OneOf(ns.bse.comment, ns.bse.label)), + (self.schema.node(ns.bsfs.Tag), self.schema.literal(ns.xsd.string))) + # one_of returns the most generic range + self.assertEqual(self.validate._one_of(ast.filter.OneOf(ns.bse.tag, ns.bse.buddy)), + (self.schema.node(ns.bsfs.Entity), self.schema.node(ns.bsfs.Node))) + + def test_branch(self): + # type must be a node + self.assertRaises(errors.ConsistencyError, self.validate._branch, self.schema.literal(ns.bsfs.Literal), None) + # type must be in the schema + self.assertRaises(errors.ConsistencyError, self.validate._branch, self.schema.node(ns.bsfs.Node).get_child(ns.bsfs.Invalid), None) + # predicate is verified + self.assertRaises(errors.ConsistencyError, self.validate._branch, self.schema.node(ns.bsfs.Entity), + ast.filter.Any(ns.bsfs.Invalid, ast.filter.Is('http://example.com/entity#1234'))) + # predicate must match the domain + self.assertRaises(errors.ConsistencyError, self.validate._branch, self.schema.node(ns.bsfs.Node), + ast.filter.Any(ns.bse.tag, ast.filter.Is('http://example.com/tag#1234'))) + self.assertRaises(errors.ConsistencyError, self.validate._branch, self.schema.node(ns.bsfs.Tag), + ast.filter.Any(ns.bse.tag, ast.filter.Is('http://example.com/tag#1234'))) + # child expression must be valid + self.assertRaises(errors.ConsistencyError, self.validate._branch, self.schema.node(ns.bsfs.Entity), + ast.filter.Any(ns.bse.tag, ast.filter.Equals('hello world'))) + # branch accepts valid expressions + self.assertIsNone(self.validate._branch(self.schema.node(ns.bsfs.Entity), + ast.filter.Any(ns.bse.tag, ast.filter.Is('http://example.com/entity#1234')))) + self.assertIsNone(self.validate._branch(self.schema.node(ns.bsfs.Entity), + ast.filter.All(ns.bse.tag, ast.filter.Is('http://example.com/entity#1234')))) + + def test_agg(self): + # agg evaluates child expressions + self.assertRaises(errors.ConsistencyError, self.validate._agg, self.schema.node(ns.bsfs.Entity), + ast.filter.And(ast.filter.Is('http://example.com/entity#1234'), ast.filter.Equals('hello world'))) + self.assertRaises(errors.ConsistencyError, self.validate._agg, self.schema.literal(ns.xsd.string), + ast.filter.And(ast.filter.Is('http://example.com/entity#1234'), ast.filter.Equals('hello world'))) + self.assertRaises(errors.ConsistencyError, self.validate._agg, self.schema.node(ns.bsfs.Entity), + ast.filter.Or(ast.filter.Is('http://example.com/entity#1234'), ast.filter.Equals('hello world'))) + self.assertRaises(errors.ConsistencyError, self.validate._agg, self.schema.literal(ns.xsd.string), + ast.filter.Or(ast.filter.Is('http://example.com/entity#1234'), ast.filter.Equals('hello world'))) + # agg works on nodes + self.assertIsNone(self.validate._agg(self.schema.node(ns.bsfs.Entity), + ast.filter.And(ast.filter.Is('http://example.com/entity#1234'), ast.filter.Is('http://example.com/entity#4321')))) + self.assertIsNone(self.validate._agg(self.schema.node(ns.bsfs.Entity), + ast.filter.Or(ast.filter.Is('http://example.com/entity#1234'), ast.filter.Is('http://example.com/entity#4321')))) + # agg works on literals + self.assertIsNone(self.validate._agg(self.schema.literal(ns.xsd.string), + ast.filter.And(ast.filter.Equals('foobar'), ast.filter.Equals('hello world')))) + self.assertIsNone(self.validate._agg(self.schema.literal(ns.xsd.string), + ast.filter.Or(ast.filter.Equals('foobar'), ast.filter.Equals('hello world')))) + + def test_not(self): + # not evaluates child expressions + self.assertRaises(errors.ConsistencyError, self.validate._not, self.schema.node(ns.bsfs.Entity), + ast.filter.Not(ast.filter.Equals('hello world'))) + self.assertRaises(errors.ConsistencyError, self.validate._not, self.schema.literal(ns.xsd.string), + ast.filter.Not(ast.filter.Is('http://example.com/entity#1234'))) + # not works on nodes + self.assertIsNone(self.validate._not(self.schema.node(ns.bsfs.Entity), + ast.filter.Not(ast.filter.Is('http://example.com/entity#1234')))) + # not works on literals + self.assertIsNone(self.validate._not(self.schema.literal(ns.xsd.string), + ast.filter.Not(ast.filter.Equals('hello world')))) + + def test_has(self): + # type must be node + self.assertRaises(errors.ConsistencyError, self.validate._has, self.schema.literal(ns.bsfs.Literal), + ast.filter.Has(ns.bse.tag)) + # type must be in the schema + self.assertRaises(errors.ConsistencyError, self.validate._has, self.schema.node(ns.bsfs.Node).get_child(ns.bsfs.Invalid), + ast.filter.Has(ns.bse.tag)) + # has checks predicate + self.assertRaises(errors.ConsistencyError, self.validate._has, self.schema.node(ns.bsfs.Entity), + ast.filter.Has(ns.bse.invalid)) + # predicate must match domain + self.assertRaises(errors.ConsistencyError, self.validate._has, self.schema.node(ns.bsfs.Tag), + ast.filter.Has(ns.bse.tag)) + # has checks count expression + self.assertRaises(errors.ConsistencyError, self.validate._has, self.schema.node(ns.bsfs.Entity), + ast.filter.Has(ns.bse.tag, ast.filter.Is('http://example.com/entity#1234'))) + # has accepts correct expressions + self.assertIsNone(self.validate._has(self.schema.node(ns.bsfs.Entity), ast.filter.Has(ns.bse.tag, ast.filter.GreaterThan(5)))) + + def test_is(self): + # type must be node + self.assertRaises(errors.ConsistencyError, self.validate._is, self.schema.literal(ns.bsfs.Literal), + ast.filter.Is('http://example.com/foo')) + # type must be in the schema + self.assertRaises(errors.ConsistencyError, self.validate._is, self.schema.node(ns.bsfs.Node).get_child(ns.bsfs.Invalid), + ast.filter.Is('http://example.com/foo')) + # is accepts correct expressions + self.assertIsNone(self.validate._is(self.schema.node(ns.bsfs.Entity), ast.filter.Is('http://example.com/entity#1234'))) + + def test_value(self): + # type must be literal + self.assertRaises(errors.ConsistencyError, self.validate._value, self.schema.node(ns.bsfs.Node), + ast.filter.Equals('hello world')) + self.assertRaises(errors.ConsistencyError, self.validate._value, self.schema.node(ns.bsfs.Node), + ast.filter.Substring('hello world')) + self.assertRaises(errors.ConsistencyError, self.validate._value, self.schema.node(ns.bsfs.Node), + ast.filter.StartsWith('hello world')) + self.assertRaises(errors.ConsistencyError, self.validate._value, self.schema.node(ns.bsfs.Node), + ast.filter.EndsWith('hello world')) + # type must be in the schema + self.assertRaises(errors.ConsistencyError, self.validate._value, self.schema.literal(ns.bsfs.Literal).get_child(ns.bsfs.Invalid), + ast.filter.Equals('hello world')) + self.assertRaises(errors.ConsistencyError, self.validate._value, self.schema.literal(ns.bsfs.Literal).get_child(ns.bsfs.Invalid), + ast.filter.Substring('hello world')) + self.assertRaises(errors.ConsistencyError, self.validate._value, self.schema.literal(ns.bsfs.Literal).get_child(ns.bsfs.Invalid), + ast.filter.StartsWith('hello world')) + self.assertRaises(errors.ConsistencyError, self.validate._value, self.schema.literal(ns.bsfs.Literal).get_child(ns.bsfs.Invalid), + ast.filter.EndsWith('hello world')) + # value accepts correct expressions + self.assertIsNone(self.validate._value(self.schema.literal(ns.xsd.string), ast.filter.Equals('hello world'))) + self.assertIsNone(self.validate._value(self.schema.literal(ns.xsd.string), ast.filter.Substring('hello world'))) + self.assertIsNone(self.validate._value(self.schema.literal(ns.xsd.string), ast.filter.StartsWith('hello world'))) + self.assertIsNone(self.validate._value(self.schema.literal(ns.xsd.string), ast.filter.EndsWith('hello world'))) + + def test_bounded(self): + # type must be literal + self.assertRaises(errors.ConsistencyError, self.validate._bounded, self.schema.node(ns.bsfs.Node), + ast.filter.GreaterThan(0)) + self.assertRaises(errors.ConsistencyError, self.validate._bounded, self.schema.node(ns.bsfs.Node), + ast.filter.LessThan(0)) + # type must be in the schema + self.assertRaises(errors.ConsistencyError, self.validate._bounded, self.schema.literal(ns.bsfs.Literal).get_child(ns.bsfs.Invalid), + ast.filter.GreaterThan(0)) + self.assertRaises(errors.ConsistencyError, self.validate._bounded, self.schema.literal(ns.bsfs.Literal).get_child(ns.bsfs.Invalid), + ast.filter.LessThan(0)) + # bounded accepts correct expressions + self.assertIsNone(self.validate._bounded(self.schema.literal(ns.xsd.integer), ast.filter.LessThan(0))) + self.assertIsNone(self.validate._bounded(self.schema.literal(ns.xsd.integer), ast.filter.GreaterThan(0))) - # FIXME: subtests for individual functions ## main ## diff --git a/test/utils/test_commons.py b/test/utils/test_commons.py index ce73788..3ad6dea 100644 --- a/test/utils/test_commons.py +++ b/test/utils/test_commons.py @@ -8,7 +8,7 @@ Author: Matthias Baumgartner, 2022 import unittest # objects to test -from bsfs.utils.commons import typename +from bsfs.utils.commons import typename, normalize_args ## code ## @@ -21,6 +21,21 @@ class TestCommons(unittest.TestCase): self.assertEqual(typename(123), 'int') self.assertEqual(typename(None), 'NoneType') + def test_normalize_args(self): + # one argument + self.assertEqual(normalize_args(1), (1, )) + # pass as arguments + self.assertEqual(normalize_args(1,2,3), (1,2,3)) + # pass as iterator + self.assertEqual(normalize_args(iter([1,2,3])), (1,2,3)) + # pass as generator + self.assertEqual(normalize_args((i for i in range(1, 4))), (1,2,3)) + self.assertEqual(normalize_args(i for i in range(1, 4)), (1,2,3)) # w/o brackets + # pass as iterable + self.assertEqual(normalize_args([1,2,3]), (1,2,3)) + # pass an iterable with a single item + self.assertEqual(normalize_args([1]), (1, )) + ## main ## -- cgit v1.2.3 From 73e39cb4967949025aefe874f401e27b0abb772c Mon Sep 17 00:00:00 2001 From: Matthias Baumgartner Date: Thu, 22 Dec 2022 20:29:57 +0100 Subject: filter ast parser and get method in sparql store --- bsfs/triple_store/base.py | 6 +- bsfs/triple_store/sparql/parse_filter.py | 307 +++++++++++ bsfs/triple_store/sparql/sparql.py | 51 +- test/triple_store/sparql/test_parse_filter.py | 727 ++++++++++++++++++++++++++ test/triple_store/sparql/test_sparql.py | 90 +++- 5 files changed, 1165 insertions(+), 16 deletions(-) create mode 100644 bsfs/triple_store/sparql/parse_filter.py create mode 100644 test/triple_store/sparql/test_parse_filter.py diff --git a/bsfs/triple_store/base.py b/bsfs/triple_store/base.py index 5ff9523..7e03714 100644 --- a/bsfs/triple_store/base.py +++ b/bsfs/triple_store/base.py @@ -113,9 +113,11 @@ class TripleStoreBase(abc.ABC): def get( self, node_type: _schema.Node, - query: ast.filter.FilterExpression, + query: typing.Optional[ast.filter.FilterExpression] = None, ) -> typing.Iterator[URI]: - """Return guids of nodes of type *node_type* that match the *query*.""" + """Return guids of nodes of type *node_type* that match the *query*. + Return all guids of the respective type if *query* is None. + """ @abc.abstractmethod def exists( diff --git a/bsfs/triple_store/sparql/parse_filter.py b/bsfs/triple_store/sparql/parse_filter.py new file mode 100644 index 0000000..d4db0aa --- /dev/null +++ b/bsfs/triple_store/sparql/parse_filter.py @@ -0,0 +1,307 @@ +""" + +Part of the BlackStar filesystem (bsfs) module. +A copy of the license is provided with the project. +Author: Matthias Baumgartner, 2022 +""" +# imports +import typing + +# bsfs imports +from bsfs import schema as bsc +from bsfs.namespace import ns +from bsfs.query import ast +from bsfs.utils import URI, errors + +# exports +__all__: typing.Sequence[str] = ( + 'Filter', + ) + +class _GenHopName(): + """Generator that produces a new unique symbol name with each iteration.""" + + # Symbol name prefix. + prefix: str + + # Current counter. + curr: int + + def __init__(self, prefix: str = '?hop', start: int = 0): + self.prefix = prefix + self.curr = start - 1 + + def __next__(self): + """Generate and return the next unique name.""" + self.curr += 1 + return self.prefix + str(self.curr) + + +class Filter(): + """Translate `bsfs.query.ast.filter` structures into Sparql queries.""" + + # Current schema to validate against. + schema: bsc.Schema + + # Generator that produces unique symbol names. + ngen: _GenHopName + + # Vertex type. + T_VERTEX = typing.Union[bsc.Node, bsc.Literal] + + def __init__(self, schema): + self.schema = schema + self.ngen = _GenHopName() + + def __call__( + self, + root_type: bsc.Node, + root: typing.Optional[ast.filter.FilterExpression] = None, + ) -> str: + """ + """ + # check root_type + if not isinstance(root_type, bsc.Node): + raise errors.BackendError(f'expected Node, found {root_type}') + if root_type not in self.schema.nodes(): + raise errors.ConsistencyError(f'node {root_type} is not in the schema') + # parse root + if root is None: + cond = '' + else: + cond = self._parse_filter_expression(root_type, root, '?ent') + # assemble query + return f''' + SELECT ?ent + WHERE {{ + ?ent <{ns.rdf.type}>/<{ns.rdfs.subClassOf}>* <{root_type.uri}> . + {cond} + }} + ''' + + def _parse_filter_expression(self, type_: T_VERTEX, node: ast.filter.FilterExpression, head: str) -> str: + """Route *node* to the handler of the respective FilterExpression subclass.""" + if isinstance(node, ast.filter.Is): + return self._is(type_, node, head) + if isinstance(node, ast.filter.Not): + return self._not(type_, node, head) + if isinstance(node, ast.filter.Has): + return self._has(type_, node, head) + if isinstance(node, ast.filter.Any): + return self._any(type_, node, head) + if isinstance(node, ast.filter.All): + return self._all(type_, node, head) + if isinstance(node, ast.filter.And): + return self._and(type_, node, head) + if isinstance(node, ast.filter.Or): + return self._or(type_, node, head) + if isinstance(node, ast.filter.Equals): + return self._equals(type_, node, head) + if isinstance(node, ast.filter.Substring): + return self._substring(type_, node, head) + if isinstance(node, ast.filter.StartsWith): + return self._starts_with(type_, node, head) + if isinstance(node, ast.filter.EndsWith): + return self._ends_with(type_, node, head) + if isinstance(node, ast.filter.LessThan): + return self._less_than(type_, node, head) + if isinstance(node, ast.filter.GreaterThan): + return self._greater_than(type_, node, head) + # invalid node + raise errors.BackendError(f'expected filter expression, found {node}') + + def _parse_predicate_expression( + self, + type_: T_VERTEX, + node: ast.filter.PredicateExpression + ) -> typing.Tuple[str, T_VERTEX]: + """Route *node* to the handler of the respective PredicateExpression subclass.""" + if isinstance(node, ast.filter.Predicate): + return self._predicate(type_, node) + if isinstance(node, ast.filter.OneOf): + return self._one_of(type_, node) + # invalid node + raise errors.BackendError(f'expected predicate expression, found {node}') + + def _one_of(self, node_type: T_VERTEX, node: ast.filter.OneOf) -> typing.Tuple[str, T_VERTEX]: + """ + """ + if not isinstance(node_type, bsc.Node): + raise errors.BackendError(f'expected Node, found {node_type}') + # walk through predicates + suburi, rng = set(), None + for pred in node: # OneOf guarantees at least one expression + puri, subrng = self._parse_predicate_expression(node_type, pred) + # track predicate uris + suburi.add(puri) + try: + # check for more generic range + if rng is None or subrng > rng: + rng = subrng + # check range consistency + if not subrng <= rng and not subrng >= rng: + raise errors.ConsistencyError(f'ranges {subrng} and {rng} are not related') + except TypeError as err: # subrng and rng are not comparable + raise errors.ConsistencyError(f'ranges {subrng} and {rng} are not related') from err + if rng is None: + # for mypy to be certain of the rng type + # if rng were None, we'd have gotten a TypeError above (None > None) + raise errors.UnreachableError() + # return joint predicate expression and next range + return '|'.join(suburi), rng + + def _predicate(self, node_type: T_VERTEX, node: ast.filter.Predicate) -> typing.Tuple[str, T_VERTEX]: + """ + """ + # check node_type + if not isinstance(node_type, bsc.Node): + raise errors.BackendError(f'expected Node, found {node_type}') + # fetch predicate and its uri + puri = node.predicate + # get and check predicate, domain, and range + if not self.schema.has_predicate(puri): + raise errors.ConsistencyError(f'predicate {puri} is not in the schema') + pred = self.schema.predicate(puri) + if pred.range is None: + # FIXME: It is a design error that Predicates can have a None range... + raise errors.BackendError(f'predicate {pred} has no range') + dom, rng = pred.domain, pred.range + # encapsulate predicate uri + puri = f'<{puri}>' # type: ignore [assignment] # variable re-use confuses mypy + # apply reverse flag + if node.reverse: + puri = URI('^' + puri) + dom, rng = rng, dom # type: ignore [assignment] # variable re-use confuses mypy + # check path consistency + if not node_type <= dom: + raise errors.ConsistencyError(f'expected type {dom} or subtype thereof, found {node_type}') + # return predicate URI and next node type + return puri, rng + + def _any(self, node_type: T_VERTEX, node: ast.filter.Any, head: str) -> str: + """ + """ + if not isinstance(node_type, bsc.Node): + raise errors.BackendError(f'expected Node, found {node_type}') + # parse predicate + pred, next_type = self._parse_predicate_expression(node_type, node.predicate) + # parse expression + nexthead = next(self.ngen) + expr = self._parse_filter_expression(next_type, node.expr, nexthead) + # combine results + return f'{head} {pred} {nexthead} . {expr}' + + def _all(self, node_type: T_VERTEX, node: ast.filter.All, head: str) -> str: + """ + """ + # NOTE: All(P, E) := Not(Any(P, Not(E))) and EXISTS(P, ?) + if not isinstance(node_type, bsc.Node): + raise errors.BackendError(f'expected Node, found {node_type}') + # parse rewritten ast + expr = self._parse_filter_expression(node_type, + ast.filter.Not( + ast.filter.Any(node.predicate, + ast.filter.Not(node.expr))), head) + # parse predicate for existence constraint + pred, _ = self._parse_predicate_expression(node_type, node.predicate) + temphead = next(self.ngen) + # return existence and rewritten expression + return f'FILTER EXISTS {{ {head} {pred} {temphead} }} . ' + expr + + def _and(self, node_type: T_VERTEX, node: ast.filter.And, head: str) -> str: + """ + """ + sub = [self._parse_filter_expression(node_type, expr, head) for expr in node] + return ' . '.join(sub) + + def _or(self, node_type: T_VERTEX, node: ast.filter.Or, head: str) -> str: + """ + """ + # potential special case optimization: + # * ast: Or(Equals('foo'), Equals('bar'), ...) + # * query: VALUES ?head { "value1"^^<...> "value2"^^<...> "value3"^<...> ... } + sub = [self._parse_filter_expression(node_type, expr, head) for expr in node] + sub = ['{' + expr + '}' for expr in sub] + return ' UNION '.join(sub) + + def _not(self, node_type: T_VERTEX, node: ast.filter.Not, head: str) -> str: + """ + """ + expr = self._parse_filter_expression(node_type, node.expr, head) + if isinstance(node_type, bsc.Literal): + return f'MINUS {{ {expr} }}' + # NOTE: for bsc.Node types, we must include at least one expression in the body of MINUS, + # otherwise the connection between the context and body of MINUS is lost. + # The simplest (and non-interfering) choice is a type statement. + return f'MINUS {{ {head} <{ns.rdf.type}>/<{ns.rdfs.subClassOf}>* <{node_type.uri}> . {expr} }}' + + def _has(self, node_type: T_VERTEX, node: ast.filter.Has, head: str) -> str: + """ + """ + if not isinstance(node_type, bsc.Node): + raise errors.BackendError(f'expected Node, found {node_type}') + # parse predicate + pred, _ = self._parse_predicate_expression(node_type, node.predicate) + # get new heads + inner = next(self.ngen) + outer = next(self.ngen) + # predicate count expression (fetch number of predicates at *head*) + num_preds = f'{{ SELECT (COUNT(distinct {inner}) as {outer}) WHERE {{ {head} {pred} {inner} }} }}' + # count expression + # FIXME: We have to ensure that ns.xsd.integer is always known in the schema! + count_bounds = self._parse_filter_expression(self.schema.literal(ns.xsd.integer), node.count, outer) + # combine + return num_preds + ' . ' + count_bounds + + def _is(self, node_type: T_VERTEX, node: ast.filter.Is, head: str) -> str: + """ + """ + if not isinstance(node_type, bsc.Node): + raise errors.BackendError(f'expected Node, found {node_type}') + return f'VALUES {head} {{ <{node.value}> }}' + + def _equals(self, node_type: T_VERTEX, node: ast.filter.Equals, head: str) -> str: + """ + """ + if not isinstance(node_type, bsc.Literal): + raise errors.BackendError(f'expected Literal, found {node}') + return f'VALUES {head} {{ "{node.value}"^^<{node_type.uri}> }}' + + def _substring(self, node_type: T_VERTEX, node: ast.filter.Substring, head: str) -> str: + """ + """ + if not isinstance(node_type, bsc.Literal): + raise errors.BackendError(f'expected Literal, found {node_type}') + return f'FILTER contains(str({head}), "{node.value}")' + + def _starts_with(self, node_type: T_VERTEX, node: ast.filter.StartsWith, head: str) -> str: + """ + """ + if not isinstance(node_type, bsc.Literal): + raise errors.BackendError(f'expected Literal, found {node_type}') + return f'FILTER strstarts(str({head}), "{node.value}")' + + def _ends_with(self, node_type: T_VERTEX, node: ast.filter.EndsWith, head: str) -> str: + """ + """ + if not isinstance(node_type, bsc.Literal): + raise errors.BackendError(f'expected Literal, found {node_type}') + return f'FILTER strends(str({head}), "{node.value}")' + + def _less_than(self, node_type: T_VERTEX, node: ast.filter.LessThan, head: str) -> str: + """ + """ + if not isinstance(node_type, bsc.Literal): + raise errors.BackendError(f'expected Literal, found {node_type}') + equality = '=' if not node.strict else '' + return f'FILTER ({head} <{equality} {float(node.threshold)})' + + def _greater_than(self, node_type: T_VERTEX, node: ast.filter.GreaterThan, head: str) -> str: + """ + """ + if not isinstance(node_type, bsc.Literal): + raise errors.BackendError(f'expected Literal, found {node_type}') + equality = '=' if not node.strict else '' + return f'FILTER ({head} >{equality} {float(node.threshold)})' + +## EOF ## diff --git a/bsfs/triple_store/sparql/sparql.py b/bsfs/triple_store/sparql/sparql.py index 7172f34..c3cbff6 100644 --- a/bsfs/triple_store/sparql/sparql.py +++ b/bsfs/triple_store/sparql/sparql.py @@ -15,6 +15,7 @@ from bsfs.query import ast from bsfs.utils import errors, URI # inner-module imports +from . import parse_filter from .. import base @@ -86,11 +87,15 @@ class SparqlStore(base.TripleStoreBase): # The local schema. _schema: bsc.Schema + # Filter parser + _filter_parser: parse_filter.Filter + def __init__(self): super().__init__(None) self._graph = rdflib.Graph() self._transaction = _Transaction(self._graph) self._schema = bsc.Schema.Empty() + self._filter_parser = parse_filter.Filter(self._schema) # NOTE: mypy and pylint complain about the **kwargs not being listed (contrasting super) # However, not having it here is clearer since it's explicit that there are no arguments. @@ -127,10 +132,17 @@ class SparqlStore(base.TripleStoreBase): # get deleted classes sub = self.schema - schema - # remove predicate instances for pred in sub.predicates: + # remove predicate instances for src, trg in self._graph.subject_objects(rdflib.URIRef(pred.uri)): self._transaction.remove((src, rdflib.URIRef(pred.uri), trg)) + # remove predicate definition + if pred.parent is not None: + self._transaction.remove(( + rdflib.URIRef(pred.uri), + rdflib.RDFS.subClassOf, + rdflib.URIRef(pred.parent.uri), + )) # remove node instances for node in sub.nodes: @@ -144,17 +156,46 @@ class SparqlStore(base.TripleStoreBase): self._transaction.remove((inst, pred, trg)) # remove instance self._transaction.remove((inst, rdflib.RDF.type, rdflib.URIRef(node.uri))) - - # NOTE: Nothing to do for literals + # remove node definition + if node.parent is not None: + self._transaction.remove(( + rdflib.URIRef(node.uri), + rdflib.RDFS.subClassOf, + rdflib.URIRef(node.parent.uri), + )) + + for lit in sub.literals: + # remove literal definition + if lit.parent is not None: + self._transaction.remove(( + rdflib.URIRef(lit.uri), + rdflib.RDFS.subClassOf, + rdflib.URIRef(lit.parent.uri), + )) + + # add predicate, node, and literal hierarchies to the graph + for itm in itertools.chain(schema.predicates(), schema.nodes(), schema.literals()): + if itm.parent is not None: + self._transaction.add((rdflib.URIRef(itm.uri), rdflib.RDFS.subClassOf, rdflib.URIRef(itm.parent.uri))) # commit instance changes self.commit() # migrate schema self._schema = schema + self._filter_parser.schema = schema - def get(self, node_type: bsc.Node, query: ast.filter.FilterExpression) -> typing.Iterator[URI]: - raise NotImplementedError() + def get( + self, + node_type: bsc.Node, + query: typing.Optional[ast.filter.FilterExpression] = None, + ) -> typing.Iterator[URI]: + if node_type not in self.schema.nodes(): + raise errors.ConsistencyError(f'{node_type} is not defined in the schema') + if not isinstance(query, ast.filter.FilterExpression): + raise TypeError(query) + for guid, in self._graph.query(self._filter_parser(node_type, query)): + yield URI(guid) def _has_type(self, subject: URI, node_type: bsc.Node) -> bool: """Return True if *subject* is a node of class *node_type* or a subclass thereof.""" diff --git a/test/triple_store/sparql/test_parse_filter.py b/test/triple_store/sparql/test_parse_filter.py new file mode 100644 index 0000000..bd19803 --- /dev/null +++ b/test/triple_store/sparql/test_parse_filter.py @@ -0,0 +1,727 @@ +""" + +Part of the bsfs test suite. +A copy of the license is provided with the project. +Author: Matthias Baumgartner, 2022 +""" +# imports +import rdflib +import unittest + +# bsie imports +from bsfs import schema as _schema +from bsfs.namespace import ns +from bsfs.query import ast +from bsfs.utils import errors + +# objects to test +from bsfs.triple_store.sparql.parse_filter import Filter + + +## code ## + +class TestParseFilter(unittest.TestCase): + def setUp(self): + # schema + self.schema = _schema.Schema.from_string(''' + prefix rdfs: + prefix xsd: + + prefix bsfs: + prefix bse: + + bsfs:Entity rdfs:subClassOf bsfs:Node . + bsfs:Image rdfs:subClassOf bsfs:Entity . + bsfs:Tag rdfs:subClassOf bsfs:Node . + + xsd:string rdfs:subClassOf bsfs:Literal . + xsd:integer rdfs:subClassOf bsfs:Literal . + bsfs:URI rdfs:subClassOf bsfs:Literal . + + bse:comment rdfs:subClassOf bsfs:Predicate ; + rdfs:domain bsfs:Node ; + rdfs:range xsd:string ; + bsfs:unique "false"^^xsd:boolean . + + bse:filesize rdfs:subClassOf bsfs:Predicate ; + rdfs:domain bsfs:Entity ; + rdfs:range xsd:integer ; + bsfs:unique "true"^^xsd:boolean . + + bse:buddy rdfs:subClassOf bsfs:Predicate ; + rdfs:domain bsfs:Entity ; + rdfs:range bsfs:Node ; + bsfs:unique "false"^^xsd:boolean . + + bse:tag rdfs:subClassOf bsfs:Predicate ; + rdfs:domain bsfs:Entity ; + rdfs:range bsfs:Tag ; + bsfs:unique "false"^^xsd:boolean . + + bse:representative rdfs:subClassOf bsfs:Predicate ; + rdfs:domain bsfs:Tag ; + rdfs:range bsfs:Image ; + bsfs:unique "false"^^xsd:boolean . + + bse:iso rdfs:subClassOf bsfs:Predicate ; + rdfs:domain bsfs:Image ; + rdfs:range xsd:integer ; + bsfs:unique "true"^^xsd:boolean . + + ''') + + # parser instance + self.parser = Filter(self.schema) + + # graph to test queries + self.graph = rdflib.Graph() + # schema hierarchies + self.graph.add((rdflib.URIRef('http://bsfs.ai/schema/Entity'), rdflib.RDFS.subClassOf, rdflib.URIRef('http://bsfs.ai/schema/Node'))) + self.graph.add((rdflib.URIRef('http://bsfs.ai/schema/Image'), rdflib.RDFS.subClassOf, rdflib.URIRef('http://bsfs.ai/schema/Entity'))) + self.graph.add((rdflib.URIRef('http://bsfs.ai/schema/Tag'), rdflib.RDFS.subClassOf, rdflib.URIRef('http://bsfs.ai/schema/Node'))) + # entities + self.graph.add((rdflib.URIRef('http://example.com/entity#1234'), rdflib.RDF.type, rdflib.URIRef('http://bsfs.ai/schema/Entity'))) + self.graph.add((rdflib.URIRef('http://example.com/entity#4321'), rdflib.RDF.type, rdflib.URIRef('http://bsfs.ai/schema/Entity'))) + # tags + self.graph.add((rdflib.URIRef('http://example.com/tag#1234'), rdflib.RDF.type, rdflib.URIRef('http://bsfs.ai/schema/Tag'))) + self.graph.add((rdflib.URIRef('http://example.com/tag#4321'), rdflib.RDF.type, rdflib.URIRef('http://bsfs.ai/schema/Tag'))) + # images + self.graph.add((rdflib.URIRef('http://example.com/image#1234'), rdflib.RDF.type, rdflib.URIRef('http://bsfs.ai/schema/Image'))) + self.graph.add((rdflib.URIRef('http://example.com/image#4321'), rdflib.RDF.type, rdflib.URIRef('http://bsfs.ai/schema/Image'))) + # node comments + self.graph.add((rdflib.URIRef('http://example.com/entity#1234'), rdflib.URIRef(ns.bse.comment), rdflib.Literal('Me, Myself, and I', datatype=rdflib.XSD.string))) + self.graph.add((rdflib.URIRef('http://example.com/entity#1234'), rdflib.URIRef(ns.bse.comment), rdflib.Literal('hello world', datatype=rdflib.XSD.string))) + self.graph.add((rdflib.URIRef('http://example.com/entity#4321'), rdflib.URIRef(ns.bse.comment), rdflib.Literal('hello world', datatype=rdflib.XSD.string))) + self.graph.add((rdflib.URIRef('http://example.com/image#1234'), rdflib.URIRef(ns.bse.comment), rdflib.Literal('Me, Myself, and I', datatype=rdflib.XSD.string))) + self.graph.add((rdflib.URIRef('http://example.com/tag#1234'), rdflib.URIRef(ns.bse.comment), rdflib.Literal('Me, Myself, and I', datatype=rdflib.XSD.string))) + self.graph.add((rdflib.URIRef('http://example.com/tag#4321'), rdflib.URIRef(ns.bse.comment), rdflib.Literal('4321', datatype=rdflib.XSD.string))) + # entity filesizes + self.graph.add((rdflib.URIRef('http://example.com/entity#1234'), rdflib.URIRef(ns.bse.filesize), rdflib.Literal(1234, datatype=rdflib.XSD.integer))) + self.graph.add((rdflib.URIRef('http://example.com/entity#4321'), rdflib.URIRef(ns.bse.filesize), rdflib.Literal(4321, datatype=rdflib.XSD.integer))) + self.graph.add((rdflib.URIRef('http://example.com/image#1234'), rdflib.URIRef(ns.bse.filesize), rdflib.Literal(1234, datatype=rdflib.XSD.integer))) + self.graph.add((rdflib.URIRef('http://example.com/image#4321'), rdflib.URIRef(ns.bse.filesize), rdflib.Literal(4321, datatype=rdflib.XSD.integer))) + # entity tags + self.graph.add((rdflib.URIRef('http://example.com/entity#1234'), rdflib.URIRef(ns.bse.tag), rdflib.URIRef('http://example.com/tag#1234'))) + self.graph.add((rdflib.URIRef('http://example.com/entity#4321'), rdflib.URIRef(ns.bse.tag), rdflib.URIRef('http://example.com/tag#4321'))) + self.graph.add((rdflib.URIRef('http://example.com/image#1234'), rdflib.URIRef(ns.bse.tag), rdflib.URIRef('http://example.com/tag#1234'))) + # tag representatives + self.graph.add((rdflib.URIRef('http://example.com/tag#1234'), rdflib.URIRef(ns.bse.representative), rdflib.URIRef('http://example.com/image#1234'))) + self.graph.add((rdflib.URIRef('http://example.com/tag#4321'), rdflib.URIRef(ns.bse.representative), rdflib.URIRef('http://example.com/image#4321'))) + # entity buddies + self.graph.add((rdflib.URIRef('http://example.com/entity#1234'), rdflib.URIRef(ns.bse.buddy), rdflib.URIRef('http://example.com/image#1234'))) + self.graph.add((rdflib.URIRef('http://example.com/entity#4321'), rdflib.URIRef(ns.bse.buddy), rdflib.URIRef('http://example.com/image#4321'))) + # image iso + self.graph.add((rdflib.URIRef('http://example.com/image#1234'), rdflib.URIRef(ns.bse.iso), rdflib.Literal(1234, datatype=rdflib.XSD.integer))) + self.graph.add((rdflib.URIRef('http://example.com/image#4321'), rdflib.URIRef(ns.bse.iso), rdflib.Literal(4321, datatype=rdflib.XSD.integer))) + + + def test_routing(self): + self.assertRaises(errors.BackendError, self.parser._parse_filter_expression, '1234', None, '') + self.assertRaises(errors.BackendError, self.parser._parse_predicate_expression, '1234', None) + + def test_call(self): + # NOTE: The individual ast components are considered in the respective tests. Here, we test __call__ specifics. + + # __call__ requires a valid root type + self.assertRaises(errors.BackendError, self.parser, self.schema.literal(ns.bsfs.Literal), None) + self.assertRaises(errors.ConsistencyError, self.parser, self.schema.node(ns.bsfs.Node).get_child(ns.bsfs.Invalid), None) + # __call__ requires a parseable root + self.assertRaises(errors.BackendError, self.parser, self.schema.node(ns.bsfs.Entity), ast.filter.FilterExpression()) + # __call__ returns an executable query + q = self.parser(self.schema.node(ns.bsfs.Entity), + ast.filter.Or( + ast.filter.Is('http://example.com/entity#1234'), + ast.filter.Is('http://example.com/entity#5678'))) + self.assertSetEqual({str(guid) for guid, in self.graph.query(q)}, {'http://example.com/entity#1234'}) + # root is optional + q = self.parser(self.schema.node(ns.bsfs.Entity)) + self.assertSetEqual({str(guid) for guid, in self.graph.query(q)}, + {'http://example.com/entity#1234', 'http://example.com/entity#4321', 'http://example.com/image#1234', 'http://example.com/image#4321'}) + q = self.parser(self.schema.node(ns.bsfs.Tag)) + self.assertSetEqual({str(guid) for guid, in self.graph.query(q)}, + {'http://example.com/tag#1234', 'http://example.com/tag#4321'}) + + + def test_is(self): + # _is requires a node + self.assertRaises(errors.BackendError, self.parser._is, self.schema.literal(ns.bsfs.Literal), ast.filter.Is('http://example.com/entity#1234'), '?ent') + # a single Is statement + q = self.parser(self.schema.node(ns.bsfs.Entity), ast.filter.Is('http://example.com/entity#1234')) + self.assertSetEqual({str(guid) for guid, in self.graph.query(q)}, + {'http://example.com/entity#1234'}) + # an aggregate of Is statements + q = self.parser(self.schema.node(ns.bsfs.Entity), + ast.filter.Or( + ast.filter.Is('http://example.com/entity#1234'), + ast.filter.Is('http://example.com/entity#4321'), + )) + self.assertSetEqual({str(guid) for guid, in self.graph.query(q)}, + {'http://example.com/entity#1234', 'http://example.com/entity#4321'}) + # combined with other filters + q = self.parser(self.schema.node(ns.bsfs.Entity), + ast.filter.And( + ast.filter.Or( + ast.filter.Is('http://example.com/entity#1234'), + ast.filter.Is('http://example.com/entity#4321'), + ), + ast.filter.Any(ns.bse.comment, + ast.filter.Equals('Me, Myself, and I') + ), + )) + self.assertSetEqual({str(guid) for guid, in self.graph.query(q)}, + {'http://example.com/entity#1234'}) + # as argument of Any/All + q = self.parser(self.schema.node(ns.bsfs.Entity), + ast.filter.Any(ns.bse.tag, ast.filter.Is('http://example.com/tag#1234'))) + self.assertSetEqual({str(guid) for guid, in self.graph.query(q)}, + {'http://example.com/entity#1234', 'http://example.com/image#1234'}) + + + def test_equals(self): + # _equals requires a literal + self.assertRaises(errors.BackendError, self.parser._equals, self.schema.node(ns.bsfs.Entity), ast.filter.Equals('hello world'), '?ent') + # a single Equals statement + q = self.parser(self.schema.node(ns.bsfs.Entity), ast.filter.Any(ns.bse.comment, ast.filter.Equals('hello world'))) + self.assertSetEqual({str(guid) for guid, in self.graph.query(q)}, + {'http://example.com/entity#1234', 'http://example.com/entity#4321'}) + # a single Equals statement that includes subtypes + q = self.parser(self.schema.node(ns.bsfs.Entity), ast.filter.Any(ns.bse.comment, ast.filter.Equals('Me, Myself, and I'))) + self.assertSetEqual({str(guid) for guid, in self.graph.query(q)}, + {'http://example.com/entity#1234', 'http://example.com/image#1234'}) + # an Equals statement on an integer + q = self.parser(self.schema.node(ns.bsfs.Entity), ast.filter.Any(ns.bse.filesize, ast.filter.Equals(4321))) + self.assertSetEqual({str(guid) for guid, in self.graph.query(q)}, + {'http://example.com/entity#4321', 'http://example.com/image#4321'}) + + + def test_substring(self): + # _substring requires a literal + self.assertRaises(errors.BackendError, self.parser._substring, self.schema.node(ns.bsfs.Entity), ast.filter.Substring('hello world'), '?ent') + # a single Substring statement + q = self.parser(self.schema.node(ns.bsfs.Entity), ast.filter.Any(ns.bse.comment, ast.filter.Substring('hello'))) + self.assertSetEqual({str(guid) for guid, in self.graph.query(q)}, + {'http://example.com/entity#1234', 'http://example.com/entity#4321'}) + q = self.parser(self.schema.node(ns.bsfs.Entity), ast.filter.Any(ns.bse.comment, ast.filter.Substring('lo wo'))) + self.assertSetEqual({str(guid) for guid, in self.graph.query(q)}, + {'http://example.com/entity#1234', 'http://example.com/entity#4321'}) + # a single Substring statement that includes subtypes + q = self.parser(self.schema.node(ns.bsfs.Entity), ast.filter.Any(ns.bse.comment, ast.filter.Substring('Myself'))) + self.assertSetEqual({str(guid) for guid, in self.graph.query(q)}, + {'http://example.com/entity#1234', 'http://example.com/image#1234'}) + # an Substring statement on an integer + q = self.parser(self.schema.node(ns.bsfs.Entity), ast.filter.Any(ns.bse.filesize, ast.filter.Substring('32'))) + self.assertSetEqual({str(guid) for guid, in self.graph.query(q)}, + {'http://example.com/entity#4321', 'http://example.com/image#4321'}) + + + def test_starts_with(self): + # _starts_with requires a literal + self.assertRaises(errors.BackendError, self.parser._starts_with, self.schema.node(ns.bsfs.Entity), ast.filter.StartsWith('hello world'), '?ent') + # a single StartsWith statement + q = self.parser(self.schema.node(ns.bsfs.Entity), ast.filter.Any(ns.bse.comment, ast.filter.StartsWith('hello'))) + self.assertSetEqual({str(guid) for guid, in self.graph.query(q)}, + {'http://example.com/entity#1234', 'http://example.com/entity#4321'}) + # a single StartsWith statement that includes subtypes + q = self.parser(self.schema.node(ns.bsfs.Entity), ast.filter.Any(ns.bse.comment, ast.filter.StartsWith('Me, Mys'))) + self.assertSetEqual({str(guid) for guid, in self.graph.query(q)}, + {'http://example.com/entity#1234', 'http://example.com/image#1234'}) + # an StartsWith statement on an integer + q = self.parser(self.schema.node(ns.bsfs.Entity), ast.filter.Any(ns.bse.filesize, ast.filter.StartsWith(432))) + self.assertSetEqual({str(guid) for guid, in self.graph.query(q)}, + {'http://example.com/entity#4321', 'http://example.com/image#4321'}) + + + def test_ends_with(self): + # _ends_with requires a literal + self.assertRaises(errors.BackendError, self.parser._ends_with, self.schema.node(ns.bsfs.Entity), ast.filter.EndsWith('hello world'), '?ent') + # a single EndsWith statement + q = self.parser(self.schema.node(ns.bsfs.Entity), ast.filter.Any(ns.bse.comment, ast.filter.EndsWith('orld'))) + self.assertSetEqual({str(guid) for guid, in self.graph.query(q)}, + {'http://example.com/entity#1234', 'http://example.com/entity#4321'}) + # a single EndsWith statement that includes subtypes + q = self.parser(self.schema.node(ns.bsfs.Entity), ast.filter.Any(ns.bse.comment, ast.filter.EndsWith('and I'))) + self.assertSetEqual({str(guid) for guid, in self.graph.query(q)}, + {'http://example.com/entity#1234', 'http://example.com/image#1234'}) + # an EndsWith statement on an integer + q = self.parser(self.schema.node(ns.bsfs.Entity), ast.filter.Any(ns.bse.filesize, ast.filter.EndsWith(321))) + self.assertSetEqual({str(guid) for guid, in self.graph.query(q)}, + {'http://example.com/entity#4321', 'http://example.com/image#4321'}) + + + def test_less_than(self): + # _less_than requires a literal + self.assertRaises(errors.BackendError, self.parser._less_than, self.schema.node(ns.bsfs.Entity), ast.filter.LessThan(2000), '?ent') + # a single LessThan statement + q = self.parser(self.schema.node(ns.bsfs.Image), ast.filter.Any(ns.bse.iso, ast.filter.LessThan(2000))) + self.assertSetEqual({str(guid) for guid, in self.graph.query(q)}, + {'http://example.com/image#1234'}) + # _less_than respects boundary + q = self.parser(self.schema.node(ns.bsfs.Image), ast.filter.Any(ns.bse.iso, ast.filter.LessThan(1234, strict=True))) + self.assertSetEqual({str(guid) for guid, in self.graph.query(q)}, set()) + q = self.parser(self.schema.node(ns.bsfs.Image), ast.filter.Any(ns.bse.iso, ast.filter.LessThan(1234, strict=False))) + self.assertSetEqual({str(guid) for guid, in self.graph.query(q)}, + {'http://example.com/image#1234'}) + # a single LessThan statement that includes subtypes + q = self.parser(self.schema.node(ns.bsfs.Entity), ast.filter.Any(ns.bse.filesize, ast.filter.LessThan(2000))) + self.assertSetEqual({str(guid) for guid, in self.graph.query(q)}, + {'http://example.com/entity#1234', 'http://example.com/image#1234'}) + # an LessThan statement on a string + # always negative; note that http://example.com/tag#4321 is also not returned although its comment is a pure number + q = self.parser(self.schema.node(ns.bsfs.Entity), ast.filter.Any(ns.bse.comment, ast.filter.LessThan(10_000))) + self.assertSetEqual({str(guid) for guid, in self.graph.query(q)}, set()) + + + def test_greater_than(self): + # _greater_than requires a literal + self.assertRaises(errors.BackendError, self.parser._greater_than, self.schema.node(ns.bsfs.Entity), ast.filter.GreaterThan(2000), '?ent') + # a single GreaterThan statement + q = self.parser(self.schema.node(ns.bsfs.Image), ast.filter.Any(ns.bse.iso, ast.filter.GreaterThan(2000))) + self.assertSetEqual({str(guid) for guid, in self.graph.query(q)}, + {'http://example.com/image#4321'}) + # _greater_than respects boundary + q = self.parser(self.schema.node(ns.bsfs.Image), ast.filter.Any(ns.bse.iso, ast.filter.GreaterThan(4321, strict=True))) + self.assertSetEqual({str(guid) for guid, in self.graph.query(q)}, set()) + q = self.parser(self.schema.node(ns.bsfs.Image), ast.filter.Any(ns.bse.iso, ast.filter.GreaterThan(4321, strict=False))) + self.assertSetEqual({str(guid) for guid, in self.graph.query(q)}, + {'http://example.com/image#4321'}) + # a single GreaterThan statement that includes subtypes + q = self.parser(self.schema.node(ns.bsfs.Entity), ast.filter.Any(ns.bse.filesize, ast.filter.GreaterThan(2000))) + self.assertSetEqual({str(guid) for guid, in self.graph.query(q)}, + {'http://example.com/entity#4321', 'http://example.com/image#4321'}) + # an GreaterThan statement on a string + # always positive + q = self.parser(self.schema.node(ns.bsfs.Entity), ast.filter.Any(ns.bse.comment, ast.filter.GreaterThan(0))) + self.assertSetEqual({str(guid) for guid, in self.graph.query(q)}, + {'http://example.com/entity#1234', 'http://example.com/entity#4321', 'http://example.com/image#1234'}) + + + def test_and(self): + # And childs have to match the node type + self.assertRaises(errors.BackendError, self.parser, + self.schema.node(ns.bsfs.Entity), + ast.filter.And( + ast.filter.StartsWith('hello'), + ast.filter.EndsWith('world'), + )) + # no child produces an empty query + self.assertEqual(self.parser._and( + self.schema.node(ns.bsfs.Entity), + ast.filter.And(), '?ent'), '') + # And can mix different conditions + q = self.parser(self.schema.node(ns.bsfs.Entity), + ast.filter.And( + ast.filter.Is('http://example.com/entity#1234'), + ast.filter.Any(ns.bse.filesize, ast.filter.Equals(1234)), + ast.filter.Any(ns.bse.comment, ast.filter.Equals('Me, Myself, and I')), + )) + self.assertSetEqual({str(guid) for guid, in self.graph.query(q)}, + {'http://example.com/entity#1234'}) + # all conditions have to match + q = self.parser(self.schema.node(ns.bsfs.Entity), + ast.filter.And( + ast.filter.Is('http://example.com/entity#4321'), + ast.filter.Any(ns.bse.filesize, ast.filter.Equals(1234)), + ast.filter.Any(ns.bse.comment, ast.filter.Equals('Me, Myself, and I')), + )) + self.assertSetEqual({str(guid) for guid, in self.graph.query(q)}, set()) + q = self.parser(self.schema.node(ns.bsfs.Entity), + ast.filter.And( + ast.filter.Is('http://example.com/entity#1234'), + ast.filter.Any(ns.bse.filesize, ast.filter.Equals(4321)), + ast.filter.Any(ns.bse.comment, ast.filter.Equals('Me, Myself, and I')), + )) + self.assertSetEqual({str(guid) for guid, in self.graph.query(q)}, set()) + q = self.parser(self.schema.node(ns.bsfs.Entity), + ast.filter.And( + ast.filter.Is('http://example.com/entity#1234'), + ast.filter.Any(ns.bse.filesize, ast.filter.Equals(1234)), + ast.filter.Any(ns.bse.comment, ast.filter.Equals('foobar')), + )) + self.assertSetEqual({str(guid) for guid, in self.graph.query(q)}, set()) + # And can be nested + q = self.parser(self.schema.node(ns.bsfs.Entity), + ast.filter.And( + ast.filter.Is('http://example.com/entity#1234'), + ast.filter.And( + ast.filter.Any(ns.bse.filesize, ast.filter.Equals(1234)), + ast.filter.Any(ns.bse.comment, ast.filter.Equals('Me, Myself, and I')), + ), + )) + self.assertSetEqual({str(guid) for guid, in self.graph.query(q)}, + {'http://example.com/entity#1234'}) + + + def test_or(self): + # Or childs have to match the node type + self.assertRaises(errors.BackendError, self.parser, + self.schema.node(ns.bsfs.Entity), + ast.filter.Or( + ast.filter.StartsWith('hello'), + ast.filter.EndsWith('world'), + )) + # no child produces an empty query + self.assertEqual(self.parser._and( + self.schema.node(ns.bsfs.Entity), + ast.filter.Or(), '?ent'), '') + # Or can mix different conditions + q = self.parser(self.schema.node(ns.bsfs.Entity), + ast.filter.Or( + ast.filter.Is('http://example.com/entity#1234'), + ast.filter.Any(ns.bse.filesize, ast.filter.Equals(4321)), + ast.filter.Any(ns.bse.comment, ast.filter.Equals('Me, Myself, and I')), + )) + self.assertSetEqual({str(guid) for guid, in self.graph.query(q)}, + {'http://example.com/entity#1234', 'http://example.com/image#1234', 'http://example.com/entity#4321', 'http://example.com/image#4321'}) + # at least one condition has to match + q = self.parser(self.schema.node(ns.bsfs.Entity), + ast.filter.Or( + ast.filter.Is('http://example.com/entity#5678'), + ast.filter.Any(ns.bse.filesize, ast.filter.Equals(8765)), + ast.filter.Any(ns.bse.comment, ast.filter.Equals('foobar')), + )) + self.assertSetEqual({str(guid) for guid, in self.graph.query(q)}, set()) + q = self.parser(self.schema.node(ns.bsfs.Entity), + ast.filter.Or( + ast.filter.Is('http://example.com/entity#1234'), + ast.filter.Any(ns.bse.filesize, ast.filter.Equals(8765)), + ast.filter.Any(ns.bse.comment, ast.filter.Equals('foobar')), + )) + self.assertSetEqual({str(guid) for guid, in self.graph.query(q)}, + {'http://example.com/entity#1234'}) + q = self.parser(self.schema.node(ns.bsfs.Entity), + ast.filter.Or( + ast.filter.Is('http://example.com/entity#5678'), + ast.filter.Any(ns.bse.filesize, ast.filter.Equals(4321)), + ast.filter.Any(ns.bse.comment, ast.filter.Equals('foobar')), + )) + self.assertSetEqual({str(guid) for guid, in self.graph.query(q)}, + {'http://example.com/entity#4321', 'http://example.com/image#4321'}) + q = self.parser(self.schema.node(ns.bsfs.Entity), + ast.filter.Or( + ast.filter.Is('http://example.com/entity#5678'), + ast.filter.Any(ns.bse.filesize, ast.filter.Equals(8765)), + ast.filter.Any(ns.bse.comment, ast.filter.Equals('Me, Myself, and I')), + )) + self.assertSetEqual({str(guid) for guid, in self.graph.query(q)}, + {'http://example.com/entity#1234', 'http://example.com/image#1234'}) + # Or can be nested + q = self.parser(self.schema.node(ns.bsfs.Entity), + ast.filter.Or( + ast.filter.Is('http://example.com/entity#1234'), + ast.filter.Or( + ast.filter.Any(ns.bse.filesize, ast.filter.Equals(4321)), + ast.filter.Any(ns.bse.comment, ast.filter.Equals('Me, Myself, and I')), + ), + )) + self.assertSetEqual({str(guid) for guid, in self.graph.query(q)}, + {'http://example.com/entity#1234', 'http://example.com/image#1234', 'http://example.com/entity#4321', 'http://example.com/image#4321'}) + + + + def test_any(self): + # _any requires a node + self.assertRaises(errors.BackendError, self.parser._any, + self.schema.literal(ns.bsfs.Literal), + ast.filter.Any(ns.bse.filesize, ast.filter.Equals(1234)), '?ent') + # node type must match predicate's domain + self.assertRaises(errors.ConsistencyError, self.parser._any, + self.schema.node(ns.bsfs.Tag), + ast.filter.Any(ns.bse.filesize, ast.filter.Equals(1234)), '?ent') + # predicate must be valid + self.assertRaises(errors.ConsistencyError, self.parser._any, + self.schema.node(ns.bsfs.Entity), + ast.filter.Any(ns.bse.invalid, ast.filter.Equals(1234)), '?ent') + # _any returns a valid query + q = self.parser(self.schema.node(ns.bsfs.Entity), + ast.filter.Any(ns.bse.filesize, ast.filter.Equals(1234))) + self.assertSetEqual({str(guid) for guid, in self.graph.query(q)}, + {'http://example.com/entity#1234', 'http://example.com/image#1234'}) + # _any can be nested + q = self.parser(self.schema.node(ns.bsfs.Entity), + ast.filter.Any(ns.bse.tag, + ast.filter.Any(ns.bse.representative, + ast.filter.Is('http://example.com/image#1234')))) + self.assertSetEqual({str(guid) for guid, in self.graph.query(q)}, + {'http://example.com/entity#1234', 'http://example.com/image#1234'}) + + + def test_all(self): + # All requires a Node + self.assertRaises(errors.BackendError, self.parser._all, self.schema.literal(ns.bsfs.Literal), None, '') + # All Nodes + q = self.parser(self.schema.node(ns.bsfs.Entity), + ast.filter.All(ns.bse.tag, ast.filter.Is('http://example.com/tag#1234'))) + self.assertSetEqual({str(guid) for guid, in self.graph.query(q)}, + {'http://example.com/entity#1234', 'http://example.com/image#1234'}) + # All values + q = self.parser(self.schema.node(ns.bsfs.Entity), + ast.filter.All(ns.bse.comment, ast.filter.Equals('hello world'))) + self.assertSetEqual({str(guid) for guid, in self.graph.query(q)}, + {'http://example.com/entity#4321'}) + # All on value within Or branch + # entity#1234 is selected because all of its comments are in ("hello world", "Me, Myself, and I") + q = self.parser(self.schema.node(ns.bsfs.Entity), + ast.filter.All(ns.bse.comment, ast.filter.Or( + ast.filter.Equals('hello world'), + ast.filter.Equals('Me, Myself, and I')))) + self.assertSetEqual({str(guid) for guid, in self.graph.query(q)}, + {'http://example.com/entity#1234', 'http://example.com/entity#4321', 'http://example.com/image#1234'}) + # All requires at least one predicate/value + q = self.parser(self.schema.node(ns.bsfs.Entity), + ast.filter.All(ns.bse.comment, ast.filter.Equals('Me, Myself, and I'))) + self.assertSetEqual({str(guid) for guid, in self.graph.query(q)}, + {'http://example.com/image#1234'}) + # All within a statement + q = self.parser(self.schema.node(ns.bsfs.Entity), + ast.filter.And( + ast.filter.All(ns.bse.tag, ast.filter.Is('http://example.com/tag#1234')), # entity#1234, image#1234 + ast.filter.All(ns.bse.comment, ast.filter.Or( # entity#1234, entity#4321, image#1234 + ast.filter.Equals('hello world'), + ast.filter.Equals('Me, Myself, and I'), + )) + ) + ) + self.assertSetEqual({str(guid) for guid, in self.graph.query(q)}, + {'http://example.com/entity#1234', 'http://example.com/image#1234'}) + # All with reversed Predicate + q = self.parser(self.schema.node(ns.bsfs.Tag), + ast.filter.All(ast.filter.Predicate(ns.bse.tag, reverse=True), ast.filter.Is('http://example.com/entity#4321'))) + self.assertSetEqual({str(guid) for guid, in self.graph.query(q)}, + {'http://example.com/tag#4321'}) + # All with multiple predicates + q = self.parser(self.schema.node(ns.bsfs.Entity), + ast.filter.All(ast.filter.OneOf(ns.bse.tag, ns.bse.buddy), # entity#1234 (tag:tag#1234), entity#1234 (buddy:image#1234), image#1234(tag:tag#1234) + ast.filter.Any(ns.bse.comment, ast.filter.Equals('Me, Myself, and I')))) # entity#1234, image#1234, tag#1234 + self.assertSetEqual({str(guid) for guid, in self.graph.query(q)}, + {'http://example.com/entity#1234', 'http://example.com/image#1234'}) + + + + def test_not(self): + # Not applies on conditions + q = self.parser(self.schema.node(ns.bsfs.Entity), + ast.filter.Not(ast.filter.Is('http://example.com/entity#1234'))) + self.assertSetEqual({str(guid) for guid, in self.graph.query(q)}, + {'http://example.com/image#1234', 'http://example.com/entity#4321', 'http://example.com/image#4321'}) + # Not applies on conditions within branches + q = self.parser(self.schema.node(ns.bsfs.Entity), + ast.filter.Any(ns.bse.comment, ast.filter.Not(ast.filter.Equals('Me, Myself, and I')))) + self.assertSetEqual({str(guid) for guid, in self.graph.query(q)}, + {'http://example.com/entity#1234', 'http://example.com/entity#4321'}) + # Not applies on branches + q = self.parser(self.schema.node(ns.bsfs.Entity), + ast.filter.Not(ast.filter.Any(ns.bse.comment, ast.filter.Equals('Me, Myself, and I')))) + self.assertSetEqual({str(guid) for guid, in self.graph.query(q)}, + {'http://example.com/entity#4321', 'http://example.com/image#4321'}) + # Double Not cancel each other + q = self.parser(self.schema.node(ns.bsfs.Entity), + ast.filter.Not(ast.filter.Not(ast.filter.Is('http://example.com/entity#1234')))) + self.assertSetEqual({str(guid) for guid, in self.graph.query(q)}, + {'http://example.com/entity#1234'}) + # Not works within aggregation (and) + q = self.parser(self.schema.node(ns.bsfs.Entity), + ast.filter.And( + ast.filter.Not(ast.filter.Is('http://example.com/entity#1234')), + ast.filter.Any(ns.bse.comment, ast.filter.Equals('hello world')), + )) + self.assertSetEqual({str(guid) for guid, in self.graph.query(q)}, + {'http://example.com/entity#4321'}) + # Not works within aggregation (or) + q = self.parser(self.schema.node(ns.bsfs.Entity), + ast.filter.Or( + ast.filter.Not(ast.filter.Is('http://example.com/entity#1234')), + ast.filter.Any(ns.bse.comment, ast.filter.Equals('Me, Myself, and I')), + )) + self.assertSetEqual({str(guid) for guid, in self.graph.query(q)}, + {'http://example.com/entity#1234', 'http://example.com/entity#4321', 'http://example.com/image#1234', 'http://example.com/image#4321'}) + # Not works outside aggregation (and) + q = self.parser(self.schema.node(ns.bsfs.Entity), + ast.filter.Not( + ast.filter.And( + ast.filter.Is('http://example.com/entity#1234'), + ast.filter.Any(ns.bse.comment, ast.filter.Equals('hello world')), + ))) + self.assertSetEqual({str(guid) for guid, in self.graph.query(q)}, + {'http://example.com/entity#4321', 'http://example.com/image#1234', 'http://example.com/image#4321'}) + # Not works outside aggregation (or) + q = self.parser(self.schema.node(ns.bsfs.Entity), + ast.filter.Not( + ast.filter.Or( + ast.filter.Is('http://example.com/entity#4321'), + ast.filter.Any(ns.bse.comment, ast.filter.Equals('Me, Myself, and I')), + ))) + self.assertSetEqual({str(guid) for guid, in self.graph.query(q)}, + {'http://example.com/image#4321'}) + # Not mixed with branch, aggregation, id, and value + q = self.parser(self.schema.node(ns.bsfs.Entity), + ast.filter.And( + ast.filter.Not( # image#1234, image#4321 + ast.filter.Or( # entity#4321, entity#1234 + ast.filter.Is('http://example.com/entity#4321'), + ast.filter.Any(ns.bse.comment, ast.filter.Equals('hello world')), + ) + ), + ast.filter.Any(ns.bse.comment, ast.filter.Not(ast.filter.Equals('foobar'))), # entity#1234, entity#4321, image#1234 + )) + self.assertSetEqual({str(guid) for guid, in self.graph.query(q)}, + {'http://example.com/image#1234'}) + + + def test_has(self): + # Has requires Node + self.assertRaises(errors.BackendError, self.parser._has, self.schema.literal(ns.bsfs.Literal), None, '') + # Has with GreaterThan constraint + q = self.parser(self.schema.node(ns.bsfs.Entity), + ast.filter.Has(ns.bse.comment, ast.filter.GreaterThan(0))) + self.assertSetEqual({str(guid) for guid, in self.graph.query(q)}, + {'http://example.com/entity#1234', 'http://example.com/entity#4321', 'http://example.com/image#1234'}) + q = self.parser(self.schema.node(ns.bsfs.Entity), + ast.filter.Has(ns.bse.comment, ast.filter.GreaterThan(1))) + self.assertSetEqual({str(guid) for guid, in self.graph.query(q)}, + {'http://example.com/entity#1234'}) + # Has with Equals constraint + q = self.parser(self.schema.node(ns.bsfs.Entity), + ast.filter.Has(ns.bse.comment, 1)) + self.assertSetEqual({str(guid) for guid, in self.graph.query(q)}, + {'http://example.com/entity#4321', 'http://example.com/image#1234'}) + # Has with LessThan constraint + q = self.parser(self.schema.node(ns.bsfs.Entity), + ast.filter.Has(ns.bse.comment, ast.filter.LessThan(2))) + self.assertSetEqual({str(guid) for guid, in self.graph.query(q)}, + {'http://example.com/entity#4321', 'http://example.com/image#1234', 'http://example.com/image#4321'}) + # Has with multiple constraints + self.graph.add((rdflib.URIRef('http://example.com/entity#1234'), rdflib.URIRef(ns.bse.comment), rdflib.Literal('extra1', datatype=rdflib.XSD.string))) + self.graph.add((rdflib.URIRef('http://example.com/entity#1234'), rdflib.URIRef(ns.bse.comment), rdflib.Literal('extra2', datatype=rdflib.XSD.string))) + self.graph.add((rdflib.URIRef('http://example.com/entity#1234'), rdflib.URIRef(ns.bse.comment), rdflib.Literal('extra3', datatype=rdflib.XSD.string))) + self.graph.add((rdflib.URIRef('http://example.com/entity#1234'), rdflib.URIRef(ns.bse.comment), rdflib.Literal('extra4', datatype=rdflib.XSD.string))) + self.graph.add((rdflib.URIRef('http://example.com/entity#1234'), rdflib.URIRef(ns.bse.comment), rdflib.Literal('extra5', datatype=rdflib.XSD.string))) + self.graph.add((rdflib.URIRef('http://example.com/entity#4321'), rdflib.URIRef(ns.bse.comment), rdflib.Literal('extra1', datatype=rdflib.XSD.string))) + self.graph.add((rdflib.URIRef('http://example.com/entity#4321'), rdflib.URIRef(ns.bse.comment), rdflib.Literal('extra2', datatype=rdflib.XSD.string))) + q = self.parser(self.schema.node(ns.bsfs.Entity), ast.filter.Has(ns.bse.comment, + ast.filter.And(ast.filter.GreaterThan(1), ast.filter.LessThan(5)))) + self.assertSetEqual({str(guid) for guid, in self.graph.query(q)}, + {'http://example.com/entity#4321'}) + # Has with OneOf predicate + q = self.parser(self.schema.node(ns.bsfs.Entity), ast.filter.Has(ast.filter.OneOf(ns.bse.tag, ns.bse.buddy), + ast.filter.GreaterThan(1))) + self.assertSetEqual({str(guid) for guid, in self.graph.query(q)}, + {'http://example.com/entity#1234', 'http://example.com/entity#4321'}) + # Has with reversed predicate + q = self.parser(self.schema.node(ns.bsfs.Tag), ast.filter.Has(ast.filter.Predicate(ns.bse.tag, reverse=True), + ast.filter.GreaterThan(1))) + self.assertSetEqual({str(guid) for guid, in self.graph.query(q)}, + {'http://example.com/tag#1234'}) + + + def test_one_of(self): + # _one_of expects a node + self.assertRaises(errors.BackendError, self.parser._one_of, + self.schema.literal(ns.bsfs.Literal), + ast.filter.OneOf(ast.filter.Predicate(ns.bse.filesize))) + # invalid predicate for node type raises an error + self.assertRaises(errors.ConsistencyError, self.parser._one_of, + self.schema.node(ns.bsfs.Node), + ast.filter.OneOf(ast.filter.Predicate(ns.bse.filesize))) + self.assertRaises(errors.ConsistencyError, self.parser, + self.schema.node(ns.bsfs.Tag), + ast.filter.Any(ast.filter.OneOf(ast.filter.Predicate(ns.bse.filesize)), ast.filter.Equals(1234))) + self.assertRaises(errors.BackendError, self.parser._one_of, + self.schema.node(ns.bsfs.Node), + ast.filter.OneOf(ast.filter.Predicate(ns.bsfs.Predicate))) + # invalid predicate combinations raise an error + self.assertRaises(errors.ConsistencyError, self.parser._one_of, + self.schema.node(ns.bsfs.Node), + ast.filter.OneOf( + ast.filter.Predicate(ns.bse.filesize), + ast.filter.Predicate(ns.bse.representative))) + # _one_of returns the URI and range + q = self.parser._one_of(self.schema.node(ns.bsfs.Image), + ast.filter.OneOf( + ast.filter.Predicate(ns.bse.iso), + ast.filter.Predicate(ns.bse.filesize))) + self.assertTrue(q[0] == f'<{ns.bse.iso}>|<{ns.bse.filesize}>' or q[0] == f'<{ns.bse.filesize}>|<{ns.bse.iso}>') + self.assertEqual(q[1], self.schema.literal(ns.xsd.integer)) + # OneOf can be nested + q = self.parser._one_of(self.schema.node(ns.bsfs.Image), + ast.filter.OneOf( + ast.filter.Predicate(ns.bse.iso), + ast.filter.OneOf( + ast.filter.Predicate(ns.bse.filesize)))) + self.assertTrue(q[0] == f'<{ns.bse.iso}>|<{ns.bse.filesize}>' or q[0] == f'<{ns.bse.filesize}>|<{ns.bse.iso}>') + self.assertEqual(q[1], self.schema.literal(ns.xsd.integer)) + # _one_of returns the most generic range + q = self.parser._one_of(self.schema.node(ns.bsfs.Entity), + ast.filter.OneOf( + ast.filter.Predicate(ns.bse.tag), + ast.filter.Predicate(ns.bse.buddy))) + self.assertTrue(q[0] == f'<{ns.bse.tag}>|<{ns.bse.buddy}>' or q[0] == f'<{ns.bse.buddy}>|<{ns.bse.tag}>') + self.assertEqual(q[1], self.schema.node(ns.bsfs.Node)) + # domains must match the given type + self.assertRaises(errors.ConsistencyError, self.parser, + self.schema.node(ns.bsfs.Entity), + ast.filter.Any(ast.filter.OneOf(ns.bse.tag, ns.bse.buddy), + ast.filter.Any(ast.filter.OneOf(ns.bse.filesize), + ast.filter.Equals(1234)))) + # ranges must have the same type (Node/Literal) + self.assertRaises(errors.ConsistencyError, self.parser, + self.schema.node(ns.bsfs.Entity), + ast.filter.Any(ast.filter.OneOf(ns.bse.tag, ns.bse.filesize), + ast.filter.Equals(1234))) + # ranges must be related + self.assertRaises(errors.ConsistencyError, self.parser, + self.schema.node(ns.bsfs.Entity), + ast.filter.Any(ast.filter.OneOf(ns.bse.comment, ns.bse.filesize), + ast.filter.Equals(1234))) + # integration: _one_of returns a valid sparql query + q = self.parser(self.schema.node(ns.bsfs.Entity), + ast.filter.Any(ast.filter.OneOf(ns.bse.tag, ns.bse.buddy), + ast.filter.Any(ast.filter.OneOf(ns.bse.comment), + ast.filter.Equals('Me, Myself, and I')))) + self.assertSetEqual({str(guid) for guid, in self.graph.query(q)}, + {'http://example.com/entity#1234', 'http://example.com/image#1234'}) + + + def test_predicate(self): + # predicate cannot be the root predicate (ns.bsfs.Predicate) + self.assertRaises(errors.BackendError, self.parser._predicate, self.schema.node(ns.bsfs.Node), ast.filter.Predicate(ns.bsfs.Predicate)) + # _predicate expects a node + self.assertRaises(errors.BackendError, self.parser._predicate, + self.schema.literal(ns.bsfs.Literal), + ast.filter.Predicate(ns.bse.filesize)) + # invalid predicate for node type raises an error + self.assertRaises(errors.ConsistencyError, self.parser._predicate, + self.schema.node(ns.bsfs.Node), + ast.filter.Predicate(ns.bse.filesize)) + self.assertRaises(errors.ConsistencyError, self.parser, + self.schema.node(ns.bsfs.Tag), + ast.filter.Any(ast.filter.Predicate(ns.bse.filesize), ast.filter.Equals(1234))) + # _predicate returns the URI and range + self.assertEqual(self.parser._predicate(self.schema.node(ns.bsfs.Entity), ast.filter.Predicate(ns.bse.filesize)), + (f'<{ns.bse.filesize}>', self.schema.literal(ns.xsd.integer))) + self.assertEqual(self.parser._predicate(self.schema.node(ns.bsfs.Entity), ast.filter.Predicate(ns.bse.tag)), + (f'<{ns.bse.tag}>', self.schema.node(ns.bsfs.Tag))) + # _predicate respects reverse flag + self.assertEqual(self.parser._predicate(self.schema.node(ns.bsfs.Tag), ast.filter.Predicate(ns.bse.tag, reverse=True)), + ('^<' + ns.bse.tag + '>', self.schema.node(ns.bsfs.Entity))) + # integration: _predicate returns a valid sparql query + q = self.parser(self.schema.node(ns.bsfs.Entity), + ast.filter.Any(ns.bse.tag, + ast.filter.Any(ns.bse.representative, + ast.filter.Any(ns.bse.filesize, + ast.filter.Equals(1234))))) + self.assertSetEqual({str(guid) for guid, in self.graph.query(q)}, + {'http://example.com/entity#1234', 'http://example.com/image#1234'}) + q = self.parser(self.schema.node(ns.bsfs.Tag), + ast.filter.Any(ast.filter.Predicate(ns.bse.tag, reverse=True), + ast.filter.Any(ns.bse.filesize, + ast.filter.LessThan(2000)))) + self.assertSetEqual({str(guid) for guid, in self.graph.query(q)}, + {'http://example.com/tag#1234'}) + + +## main ## + +if __name__ == '__main__': + unittest.main() + +## EOF ## diff --git a/test/triple_store/sparql/test_sparql.py b/test/triple_store/sparql/test_sparql.py index 0bf664a..3d81de1 100644 --- a/test/triple_store/sparql/test_sparql.py +++ b/test/triple_store/sparql/test_sparql.py @@ -11,6 +11,7 @@ import unittest # bsie imports from bsfs import schema as _schema from bsfs.namespace import ns +from bsfs.query import ast from bsfs.utils import errors, URI # objects to test @@ -59,6 +60,18 @@ class TestSparqlStore(unittest.TestCase): bsfs:unique "true"^^xsd:boolean . ''') + self.schema_triples = { + # schema hierarchy + (rdflib.URIRef(ns.bsfs.Entity), rdflib.RDFS.subClassOf, rdflib.URIRef(ns.bsfs.Node)), + (rdflib.URIRef(ns.bsfs.Tag), rdflib.RDFS.subClassOf, rdflib.URIRef(ns.bsfs.Node)), + (rdflib.URIRef(ns.bsfs.User), rdflib.RDFS.subClassOf, rdflib.URIRef(ns.bsfs.Node)), + (rdflib.URIRef(ns.xsd.string), rdflib.RDFS.subClassOf, rdflib.URIRef(ns.bsfs.Literal)), + (rdflib.URIRef(ns.xsd.integer), rdflib.RDFS.subClassOf, rdflib.URIRef(ns.bsfs.Literal)), + (rdflib.URIRef(ns.bse.comment), rdflib.RDFS.subClassOf, rdflib.URIRef(ns.bsfs.Predicate)), + (rdflib.URIRef(ns.bse.filesize), rdflib.RDFS.subClassOf, rdflib.URIRef(ns.bsfs.Predicate)), + (rdflib.URIRef(ns.bse.tag), rdflib.RDFS.subClassOf, rdflib.URIRef(ns.bsfs.Predicate)), + (rdflib.URIRef(ns.bse.author), rdflib.RDFS.subClassOf, rdflib.URIRef(ns.bsfs.Predicate)), + } def test_essentials(self): store = SparqlStore.Open() @@ -155,7 +168,7 @@ class TestSparqlStore(unittest.TestCase): store.set(curr.node(ns.bsfs.Entity), ent_ids, p_author, {URI('http://example.com/me')}) # check instances - instances = { + instances = self.schema_triples | { # node instances (rdflib.URIRef('http://example.com/me/entity#1234'), rdflib.RDF.type, rdflib.URIRef(ns.bsfs.Entity)), (rdflib.URIRef('http://example.com/me/entity#4321'), rdflib.RDF.type, rdflib.URIRef(ns.bsfs.Entity)), @@ -228,7 +241,16 @@ class TestSparqlStore(unittest.TestCase): store.schema = curr self.assertEqual(store.schema, curr) # instances have not changed - self.assertSetEqual(set(store._graph), instances) + self.assertSetEqual(set(store._graph), instances | { + # schema hierarchy + (rdflib.URIRef(ns.bsfs.Collection), rdflib.RDFS.subClassOf, rdflib.URIRef(ns.bsfs.Node)), + (rdflib.URIRef(ns.xsd.boolean), rdflib.RDFS.subClassOf, rdflib.URIRef(ns.bsfs.Literal)), + (rdflib.URIRef(ns.bse.shared), rdflib.RDFS.subClassOf, rdflib.URIRef(ns.bsfs.Predicate)), + (rdflib.URIRef(ns.bse.partOf), rdflib.RDFS.subClassOf, rdflib.URIRef(ns.bsfs.Predicate)), + (rdflib.URIRef('http://bsfs.ai/schema/Tag#usedIn'), rdflib.RDFS.subClassOf, rdflib.URIRef(ns.bsfs.Predicate)), + (rdflib.URIRef('http://bsfs.ai/schema/Collection#tag'), rdflib.RDFS.subClassOf, rdflib.URIRef(ns.bsfs.Predicate)), + (rdflib.URIRef('http://bsfs.ai/schema/Tag#principal'), rdflib.RDFS.subClassOf, rdflib.URIRef(ns.bsfs.Predicate)), + }) # add some instances of the new classes p_partOf = curr.predicate(ns.bse.partOf) p_shared = curr.predicate(ns.bse.shared) @@ -248,6 +270,14 @@ class TestSparqlStore(unittest.TestCase): {URI('http://example.com/me/collection#1234')}) # new instances are now in the graph self.assertSetEqual(set(store._graph), instances | { + # same old schema hierarchy + (rdflib.URIRef(ns.bsfs.Collection), rdflib.RDFS.subClassOf, rdflib.URIRef(ns.bsfs.Node)), + (rdflib.URIRef(ns.xsd.boolean), rdflib.RDFS.subClassOf, rdflib.URIRef(ns.bsfs.Literal)), + (rdflib.URIRef(ns.bse.shared), rdflib.RDFS.subClassOf, rdflib.URIRef(ns.bsfs.Predicate)), + (rdflib.URIRef(ns.bse.partOf), rdflib.RDFS.subClassOf, rdflib.URIRef(ns.bsfs.Predicate)), + (rdflib.URIRef('http://bsfs.ai/schema/Tag#usedIn'), rdflib.RDFS.subClassOf, rdflib.URIRef(ns.bsfs.Predicate)), + (rdflib.URIRef('http://bsfs.ai/schema/Collection#tag'), rdflib.RDFS.subClassOf, rdflib.URIRef(ns.bsfs.Predicate)), + (rdflib.URIRef('http://bsfs.ai/schema/Tag#principal'), rdflib.RDFS.subClassOf, rdflib.URIRef(ns.bsfs.Predicate)), # collections (rdflib.URIRef('http://example.com/me/collection#1234'), rdflib.RDF.type, rdflib.URIRef(ns.bsfs.Collection)), (rdflib.URIRef('http://example.com/me/collection#4321'), rdflib.RDF.type, rdflib.URIRef(ns.bsfs.Collection)), @@ -316,6 +346,16 @@ class TestSparqlStore(unittest.TestCase): self.assertEqual(store.schema, curr) # instances of old classes were removed self.assertSetEqual(set(store._graph), { + # schema hierarchy + (rdflib.URIRef(ns.bsfs.Entity), rdflib.RDFS.subClassOf, rdflib.URIRef(ns.bsfs.Node)), + (rdflib.URIRef(ns.bsfs.Tag), rdflib.RDFS.subClassOf, rdflib.URIRef(ns.bsfs.Node)), + (rdflib.URIRef(ns.bsfs.User), rdflib.RDFS.subClassOf, rdflib.URIRef(ns.bsfs.Node)), + (rdflib.URIRef(ns.xsd.boolean), rdflib.RDFS.subClassOf, rdflib.URIRef(ns.bsfs.Literal)), + (rdflib.URIRef(ns.xsd.integer), rdflib.RDFS.subClassOf, rdflib.URIRef(ns.bsfs.Literal)), + (rdflib.URIRef(ns.bse.shared), rdflib.RDFS.subClassOf, rdflib.URIRef(ns.bsfs.Predicate)), + (rdflib.URIRef(ns.bse.tag), rdflib.RDFS.subClassOf, rdflib.URIRef(ns.bsfs.Predicate)), + (rdflib.URIRef(ns.bse.filesize), rdflib.RDFS.subClassOf, rdflib.URIRef(ns.bsfs.Predicate)), + (rdflib.URIRef('http://bsfs.ai/schema/Tag#principal'), rdflib.RDFS.subClassOf, rdflib.URIRef(ns.bsfs.Predicate)), # node instances (rdflib.URIRef('http://example.com/me/entity#1234'), rdflib.RDF.type, rdflib.URIRef(ns.bsfs.Entity)), (rdflib.URIRef('http://example.com/me/entity#4321'), rdflib.RDF.type, rdflib.URIRef(ns.bsfs.Entity)), @@ -390,7 +430,7 @@ class TestSparqlStore(unittest.TestCase): ent_ids = {URI('http://example.com/me/entity#1234'), URI('http://example.com/me/entity#4321')} tag_ids = {URI('http://example.com/me/tag#1234'), URI('http://example.com/me/tag#4321')} # target instances - instances = { + instances = self.schema_triples | { # node instances (rdflib.URIRef('http://example.com/me/entity#1234'), rdflib.RDF.type, rdflib.URIRef(ns.bsfs.Entity)), (rdflib.URIRef('http://example.com/me/entity#4321'), rdflib.RDF.type, rdflib.URIRef(ns.bsfs.Entity)), @@ -416,7 +456,7 @@ class TestSparqlStore(unittest.TestCase): # rollback undoes previous changes store.rollback() - self.assertSetEqual(set(store._graph), set()) + self.assertSetEqual(set(store._graph), self.schema_triples) # add some data once more store.create(ent_type, ent_ids) @@ -456,7 +496,38 @@ class TestSparqlStore(unittest.TestCase): }) def test_get(self): - raise NotImplementedError() + # store setup + store = SparqlStore.Open() + store.schema = self.schema + ent_type = self.schema.node(ns.bsfs.Entity) + tag_type = self.schema.node(ns.bsfs.Tag) + ent_ids = {URI('http://example.com/me/entity#1234'), URI('http://example.com/me/entity#4321')} + tag_ids = {URI('http://example.com/me/tag#1234'), URI('http://example.com/me/tag#4321')} + store.create(ent_type, ent_ids) + store.create(tag_type, tag_ids) + store.set(ent_type, ent_ids, self.schema.predicate(ns.bse.tag), tag_ids) + store.set(ent_type, {URI('http://example.com/me/entity#1234')}, self.schema.predicate(ns.bse.filesize), {1234}) + store.set(ent_type, {URI('http://example.com/me/entity#4321')}, self.schema.predicate(ns.bse.filesize), {4321}) + # node_type must be in the schema + self.assertRaises(errors.ConsistencyError, set, store.get(self.schema.node(ns.bsfs.Node).get_child(ns.bsfs.Invalid), ast.filter.IsIn(ent_ids))) + # query must be a filter expression + class Foo(): pass + self.assertRaises(TypeError, set, store.get(ent_type, 1234)) + self.assertRaises(TypeError, set, store.get(ent_type, '1234')) + self.assertRaises(TypeError, set, store.get(ent_type, Foo())) + # run some queries + self.assertSetEqual(set(store.get(tag_type, ast.filter.IsIn(tag_ids))), tag_ids) + self.assertSetEqual(set(store.get(ent_type, ast.filter.Any(ns.bse.tag, ast.filter.IsIn(tag_ids)))), ent_ids) + self.assertSetEqual(set(store.get(ent_type, ast.filter.IsIn(tag_ids))), set()) + # invalid queries raise error + self.assertRaises(errors.ConsistencyError, set, store.get(tag_type, ast.filter.Any(ns.bse.filesize, ast.filter.Equals(1234)))) + self.assertRaises(errors.BackendError, set, store.get(ent_type, ast.filter.Equals('http://example.com/me/entity#1234'))) + # run some more complex query + q = store.get(tag_type, ast.filter.Any(ast.filter.Predicate(ns.bse.tag, reverse=True), + ast.filter.Any(ns.bse.filesize, + ast.filter.LessThan(2000)))) + self.assertSetEqual(set(q), tag_ids) + def test_exists(self): # store setup @@ -509,14 +580,15 @@ class TestSparqlStore(unittest.TestCase): # can create some nodes ent_type = store.schema.node(ns.bsfs.Entity) store.create(ent_type, {URI('http://example.com/me/entity#1234'), URI('http://example.com/me/entity#4321')}) - self.assertSetEqual(set(store._graph), { + self.assertSetEqual(set(store._graph), self.schema_triples | { + # instances (rdflib.URIRef('http://example.com/me/entity#1234'), rdflib.RDF.type, rdflib.URIRef(ns.bsfs.Entity)), (rdflib.URIRef('http://example.com/me/entity#4321'), rdflib.RDF.type, rdflib.URIRef(ns.bsfs.Entity)), }) # existing nodes are skipped store.create(ent_type, {URI('http://example.com/me/entity#1234'), URI('http://example.com/me/entity#5678')}) - self.assertSetEqual(set(store._graph), { + self.assertSetEqual(set(store._graph), self.schema_triples | { # previous triples (rdflib.URIRef('http://example.com/me/entity#1234'), rdflib.RDF.type, rdflib.URIRef(ns.bsfs.Entity)), (rdflib.URIRef('http://example.com/me/entity#4321'), rdflib.RDF.type, rdflib.URIRef(ns.bsfs.Entity)), @@ -527,7 +599,7 @@ class TestSparqlStore(unittest.TestCase): # can create nodes of a different type tag_type = store.schema.node(ns.bsfs.Tag) store.create(tag_type, {URI('http://example.com/me/tag#1234'), URI('http://example.com/me/tag#4321')}) - self.assertSetEqual(set(store._graph), { + self.assertSetEqual(set(store._graph), self.schema_triples | { # previous triples (rdflib.URIRef('http://example.com/me/entity#1234'), rdflib.RDF.type, rdflib.URIRef(ns.bsfs.Entity)), (rdflib.URIRef('http://example.com/me/entity#4321'), rdflib.RDF.type, rdflib.URIRef(ns.bsfs.Entity)), @@ -540,7 +612,7 @@ class TestSparqlStore(unittest.TestCase): # creation does not change types of existing nodes tag_type = store.schema.node(ns.bsfs.Tag) store.create(tag_type, {URI('http://example.com/me/entity#1234'), URI('http://example.com/me/entity#4321')}) - self.assertSetEqual(set(store._graph), { + self.assertSetEqual(set(store._graph), self.schema_triples | { # previous triples (rdflib.URIRef('http://example.com/me/entity#1234'), rdflib.RDF.type, rdflib.URIRef(ns.bsfs.Entity)), (rdflib.URIRef('http://example.com/me/entity#4321'), rdflib.RDF.type, rdflib.URIRef(ns.bsfs.Entity)), -- cgit v1.2.3 From ca7ee6c59d2eb3f4ec4d16e392d12d946cd85e4d Mon Sep 17 00:00:00 2001 From: Matthias Baumgartner Date: Thu, 22 Dec 2022 20:33:00 +0100 Subject: filter-ast based get interface in graph. * Graph interface: Graph.get added * Node instance resolver so that Nodes can be used in a filter ast * AC interface: filter_read added to interface * upstream test adjustments of previous sparql store changes --- bsfs/graph/ac/base.py | 4 + bsfs/graph/ac/null.py | 5 ++ bsfs/graph/graph.py | 28 +++++-- bsfs/graph/resolve.py | 161 ++++++++++++++++++++++++++++++++++++++++ test/graph/ac/test_null.py | 10 +++ test/graph/test_graph.py | 55 +++++++++++++- test/graph/test_nodes.py | 30 ++++++-- test/graph/test_resolve.py | 181 +++++++++++++++++++++++++++++++++++++++++++++ 8 files changed, 459 insertions(+), 15 deletions(-) create mode 100644 bsfs/graph/resolve.py create mode 100644 test/graph/test_resolve.py diff --git a/bsfs/graph/ac/base.py b/bsfs/graph/ac/base.py index bc9aeb3..0703e2e 100644 --- a/bsfs/graph/ac/base.py +++ b/bsfs/graph/ac/base.py @@ -10,6 +10,7 @@ import typing # bsfs imports from bsfs import schema +from bsfs.query import ast from bsfs.triple_store import TripleStoreBase from bsfs.utils import URI @@ -67,5 +68,8 @@ class AccessControlBase(abc.ABC): def createable(self, node_type: schema.Node, guids: typing.Iterable[URI]) -> typing.Iterable[URI]: """Return nodes that are allowed to be created.""" + @abc.abstractmethod + def filter_read(self, node_type: schema.Node, query: ast.filter.FilterExpression) -> ast.filter.FilterExpression: + """Re-write a filter *query* to get (i.e., read) *node_type* nodes.""" ## EOF ## diff --git a/bsfs/graph/ac/null.py b/bsfs/graph/ac/null.py index 36838bd..12b4e87 100644 --- a/bsfs/graph/ac/null.py +++ b/bsfs/graph/ac/null.py @@ -10,6 +10,7 @@ import typing # bsfs imports from bsfs import schema from bsfs.namespace import ns +from bsfs.query import ast from bsfs.utils import URI # inner-module imports @@ -49,4 +50,8 @@ class NullAC(base.AccessControlBase): """Return nodes that are allowed to be created.""" return guids + def filter_read(self, node_type: schema.Node, query: ast.filter.FilterExpression) -> ast.filter.FilterExpression: + """Re-write a filter *query* to get (i.e., read) *node_type* nodes.""" + return query + ## EOF ## diff --git a/bsfs/graph/graph.py b/bsfs/graph/graph.py index 51fe75d..f030fed 100644 --- a/bsfs/graph/graph.py +++ b/bsfs/graph/graph.py @@ -9,13 +9,15 @@ import os import typing # bsfs imports -from bsfs.query import ast +from bsfs.query import ast, validate from bsfs.schema import Schema from bsfs.triple_store import TripleStoreBase from bsfs.utils import URI, typename # inner-module imports +from . import ac from . import nodes as _nodes +from . import resolve # exports __all__: typing.Sequence[str] = ( @@ -44,6 +46,9 @@ class Graph(): def __init__(self, backend: TripleStoreBase, user: URI): self._backend = backend self._user = user + self._resolver = resolve.Filter(self._backend.schema) + self._validate = validate.Filter(self._backend.schema) + self._ac = ac.NullAC(self._backend, self._user) # ensure Graph schema requirements self.migrate(self._backend.schema) @@ -85,6 +90,9 @@ class Graph(): # migrate schema in backend # FIXME: consult access controls! self._backend.schema = schema + # re-initialize members + self._resolver.schema = self.schema + self._validate.schema = self.schema # return self return self @@ -108,11 +116,21 @@ class Graph(): *node_type*) once some data is assigned to them. """ - type_ = self.schema.node(node_type) - return _nodes.Nodes(self._backend, self._user, type_, {guid}) + return self.nodes(node_type, {guid}) - def get(self, node_type: URI, subject: ast.filter.FilterExpression) -> _nodes.Nodes: + def get(self, node_type: URI, query: ast.filter.FilterExpression) -> _nodes.Nodes: # FIXME: How about empty query? """Return a `Nodes` instance over all nodes of type *node_type* that match the *subject* query.""" - raise NotImplementedError() + # get node type + type_ = self.schema.node(node_type) + # resolve Nodes instances + query = self._resolver(type_, query) + # add access controls to query + query = self._ac.filter_read(type_, query) + # validate query + self._validate(type_, query) + # query the backend + guids = self._backend.get(type_, query) # no need to materialize + # return Nodes instance + return _nodes.Nodes(self._backend, self._user, type_, guids) ## EOF ## diff --git a/bsfs/graph/resolve.py b/bsfs/graph/resolve.py new file mode 100644 index 0000000..feb0855 --- /dev/null +++ b/bsfs/graph/resolve.py @@ -0,0 +1,161 @@ +""" + +Part of the BlackStar filesystem (bsfs) module. +A copy of the license is provided with the project. +Author: Matthias Baumgartner, 2022 +""" +# imports +import typing + +# bsfs imports +from bsfs import schema as bsc +from bsfs.query import ast +from bsfs.utils import errors + +# inner-module imports +from . import nodes + +# exports +__all__: typing.Sequence[str] = ( + 'Filter', + ) + + +## code ## + +class Filter(): + """Rewrites the query to replace `bsfs.graph.nodes.Nodes` instances with the respective URI. + Does only limited type checking and schema validation. + Use `bsfs.schema.validate.Filter` to do so. + + Example: + input: Any(ns.bse.tag, Is(Nodes(...))) + output: Any(ns.bse.tag, Or(Is(...), Is(...), ...))) + + >>> tags = graph.node(ns.bsfs.Tag, 'http://example.com/me/tag#1234') + >>> graph.get(ns.bsfs.Entity, ast.filter.Any(ns.bse.tag, ast.filter.Is(tags))) + + """ + + T_VERTEX = typing.Union[bsc.Node, bsc.Literal] + + def __init__(self, schema): + self.schema = schema + + def __call__(self, root_type: bsc.Node, node: ast.filter.FilterExpression): + return self._parse_filter_expression(root_type, node) + + def _parse_filter_expression( + self, + type_: T_VERTEX, + node: ast.filter.FilterExpression, + ) -> ast.filter.FilterExpression: + """Route *node* to the handler of the respective FilterExpression subclass.""" + if isinstance(node, ast.filter.Is): + return self._is(type_, node) + if isinstance(node, ast.filter.Not): + return self._not(type_, node) + if isinstance(node, ast.filter.Has): + return self._has(type_, node) + if isinstance(node, ast.filter.Any): + return self._any(type_, node) + if isinstance(node, ast.filter.All): + return self._all(type_, node) + if isinstance(node, ast.filter.And): + return self._and(type_, node) + if isinstance(node, ast.filter.Or): + return self._or(type_, node) + if isinstance(node, (ast.filter.Equals, ast.filter.Substring, \ + ast.filter.StartsWith, ast.filter.EndsWith)): + return self._value(type_, node) + if isinstance(node, (ast.filter.LessThan, ast.filter.GreaterThan)): + return self._bounded(type_, node) + # invalid node + raise errors.BackendError(f'expected filter expression, found {node}') + + def _parse_predicate_expression(self, node: ast.filter.PredicateExpression) -> T_VERTEX: + """Route *node* to the handler of the respective PredicateExpression subclass.""" + if isinstance(node, ast.filter.Predicate): + return self._predicate(node) + if isinstance(node, ast.filter.OneOf): + return self._one_of(node) + # invalid node + raise errors.BackendError(f'expected predicate expression, found {node}') + + def _predicate(self, node: ast.filter.Predicate) -> T_VERTEX: + if not self.schema.has_predicate(node.predicate): + raise errors.ConsistencyError(f'predicate {node.predicate} is not in the schema') + pred = self.schema.predicate(node.predicate) + dom, rng = pred.domain, pred.range + if node.reverse: + dom, rng = rng, dom + return rng + + def _one_of(self, node: ast.filter.OneOf) -> T_VERTEX: + # determine domain and range types + rng = None + for pred in node: + # parse child expression + subrng = self._parse_predicate_expression(pred) + # determine the next type + try: + if rng is None or subrng > rng: # pick most generic range + rng = subrng + except TypeError as err: + raise errors.ConsistencyError(f'ranges {subrng} and {rng} are not related') from err + if rng is None: + raise errors.UnreachableError() + return rng + + def _any(self, type_: T_VERTEX, node: ast.filter.Any) -> ast.filter.Any: # pylint: disable=unused-argument + next_type = self._parse_predicate_expression(node.predicate) + return ast.filter.Any(node.predicate, self._parse_filter_expression(next_type, node.expr)) + + def _all(self, type_: T_VERTEX, node: ast.filter.All) -> ast.filter.All: # pylint: disable=unused-argument + next_type = self._parse_predicate_expression(node.predicate) + return ast.filter.All(node.predicate, self._parse_filter_expression(next_type, node.expr)) + + def _and(self, type_: T_VERTEX, node: ast.filter.And) -> ast.filter.And: + return ast.filter.And({self._parse_filter_expression(type_, expr) for expr in node}) + + def _or(self, type_: T_VERTEX, node: ast.filter.Or) -> ast.filter.Or: + return ast.filter.Or({self._parse_filter_expression(type_, expr) for expr in node}) + + def _not(self, type_: T_VERTEX, node: ast.filter.Not) -> ast.filter.Not: + return ast.filter.Not(self._parse_filter_expression(type_, node.expr)) + + def _has(self, type_: T_VERTEX, node: ast.filter.Has) -> ast.filter.Has: # pylint: disable=unused-argument + return node + + def _value(self, type_: T_VERTEX, node: ast.filter._Value) -> ast.filter._Value: # pylint: disable=unused-argument + return node + + def _bounded(self, type_: T_VERTEX, node: ast.filter._Bounded) -> ast.filter._Bounded: # pylint: disable=unused-argument + return node + + def _is(self, type_: T_VERTEX, node: ast.filter.Is) -> typing.Union[ast.filter.Or, ast.filter.Is]: + # check if action is needed + if not isinstance(node.value, nodes.Nodes): + return node + # check schema consistency + if node.value.node_type not in self.schema.nodes(): + raise errors.ConsistencyError(f'node {node.value.node_type} is not in the schema') + # check type compatibility + if not isinstance(type_, bsc.Node): + raise errors.ConsistencyError(f'expected a node, found {type_}') + if not node.value.node_type <= type_: + raise errors.ConsistencyError(f'expected type {type_} or subtype thereof, found {node.value.node_type}') + # NOTE: We assume that the node type is checked when writing to the backend. + # Links to any of the guids can therefore only exist if the type matches. + # Hence, we don't add a type check/constrain here. + return ast.filter.Or(ast.filter.Is(guid) for guid in node.value.guids) + # optimized code, removing unnecessary ast.filter.Or + #guids = set(node.value.guids) + #if len(guids) == 0: + # raise errors.BackendError(f'') + #if len(guids) == 1: + # return ast.filter.Nodeid(next(iter(guids))) + #return ast.filter.Or(ast.filter.Is(guid) for guid in guids) + + +## EOF ## diff --git a/test/graph/ac/test_null.py b/test/graph/ac/test_null.py index f39c9be..c863943 100644 --- a/test/graph/ac/test_null.py +++ b/test/graph/ac/test_null.py @@ -10,6 +10,7 @@ import unittest # bsie imports from bsfs import schema as _schema from bsfs.namespace import ns +from bsfs.query import ast from bsfs.triple_store import SparqlStore from bsfs.utils import URI @@ -93,6 +94,15 @@ class TestNullAC(unittest.TestCase): ac = NullAC(self.backend, self.user) self.assertSetEqual(self.ent_ids, ac.createable(self.ent_type, self.ent_ids)) + def test_filter_read(self): + query = ast.filter.Or( + ast.filter.Any(ns.bse.tag, ast.filter.Is('http://example.com/tag#1234')), + ast.filter.Any(ns.bse.tag, ast.filter.Is('http://example.com/tag#4321')), + ast.filter.Any(ns.bse.author, ast.filter.Equals('Me, Myself, and I'))) + ac = NullAC(self.backend, self.user) + self.assertEqual(query, ac.filter_read(self.ent_type, query)) + return query + ## main ## diff --git a/test/graph/test_graph.py b/test/graph/test_graph.py index 0a3fd5b..8503d5b 100644 --- a/test/graph/test_graph.py +++ b/test/graph/test_graph.py @@ -9,10 +9,11 @@ import unittest # bsie imports from bsfs import schema +from bsfs.graph.nodes import Nodes from bsfs.namespace import ns +from bsfs.query import ast from bsfs.triple_store import SparqlStore from bsfs.utils import URI, errors -from bsfs.graph.nodes import Nodes # objects to test from bsfs.graph.graph import Graph @@ -193,7 +194,57 @@ class TestGraph(unittest.TestCase): ''')) def test_get(self): - raise NotImplementedError() + # setup + graph = Graph(self.backend, self.user) + graph.migrate(schema.Schema.from_string(''' + prefix rdfs: + prefix xsd: + prefix bsfs: + prefix bse: + + bsfs:Entity rdfs:subClassOf bsfs:Node . + bsfs:Tag rdfs:subClassOf bsfs:Node . + xsd:string rdfs:subClassOf bsfs:Literal . + + bse:tag rdfs:subClassOf bsfs:Predicate ; + rdfs:domain bsfs:Entity ; + rdfs:range bsfs:Tag ; + bsfs:unique "false"^^xsd:boolean . + + bse:comment rdfs:subClassOf bsfs:Predicate ; + rdfs:domain bsfs:Node ; + rdfs:range xsd:string ; + bsfs:unique "false"^^xsd:boolean . + + ''')) + # add some instances + ents = graph.nodes(ns.bsfs.Entity, {URI('http://example.com/entity#1234'), URI('http://example.com/entity#4321')}) + tags = graph.nodes(ns.bsfs.Tag, {URI('http://example.com/tag#1234'), URI('http://example.com/tag#4321')}) + # add some node links + ents.set(ns.bse.tag, tags) + # add some literals + graph.node(ns.bsfs.Entity, URI('http://example.com/entity#1234')).set(ns.bse.comment, 'hello world') + graph.node(ns.bsfs.Entity, URI('http://example.com/entity#1234')).set(ns.bse.comment, 'foo') + graph.node(ns.bsfs.Entity, URI('http://example.com/entity#1234')).set(ns.bse.comment, 'foobar') + graph.node(ns.bsfs.Tag, URI('http://example.com/tag#1234')).set(ns.bse.comment, 'foo') + graph.node(ns.bsfs.Tag, URI('http://example.com/tag#4321')).set(ns.bse.comment, 'bar') + + # get exception for invalid query + self.assertRaises(errors.ConsistencyError, graph.get, ns.bsfs.Entity, ast.filter.Any(ns.bse.tag, ast.filter.Equals('hello world'))) + + # query returns nodes + self.assertEqual(graph.get(ns.bsfs.Entity, ast.filter.Any(ns.bse.tag, ast.filter.Is(tags))), ents) + self.assertEqual(graph.get(ns.bsfs.Entity, ast.filter.Any(ns.bse.comment, ast.filter.StartsWith('foo'))), + graph.node(ns.bsfs.Entity, URI('http://example.com/entity#1234'))) + self.assertEqual(graph.get(ns.bsfs.Node, ast.filter.Any(ns.bse.comment, ast.filter.StartsWith('foo'))), + graph.nodes(ns.bsfs.Node, {URI('http://example.com/entity#1234'), URI('http://example.com/tag#1234')})) + self.assertEqual(graph.get(ns.bsfs.Entity, ast.filter.Or( + ast.filter.Any(ns.bse.comment, ast.filter.EndsWith('bar')), + ast.filter.Any(ns.bse.tag, ast.filter.All(ns.bse.comment, ast.filter.Equals('bar'))))), + ents) + + + ## main ## diff --git a/test/graph/test_nodes.py b/test/graph/test_nodes.py index 43e7f6f..11ae46d 100644 --- a/test/graph/test_nodes.py +++ b/test/graph/test_nodes.py @@ -72,6 +72,20 @@ class TestNodes(unittest.TestCase): bsfs:unique "true"^^xsd:boolean . ''') + self.schema_triples = { + # schema hierarchy + (rdflib.URIRef(ns.bsfs.Entity), rdflib.RDFS.subClassOf, rdflib.URIRef(ns.bsfs.Node)), + (rdflib.URIRef(ns.bsfs.Tag), rdflib.RDFS.subClassOf, rdflib.URIRef(ns.bsfs.Node)), + (rdflib.URIRef(ns.bsfs.User), rdflib.RDFS.subClassOf, rdflib.URIRef(ns.bsfs.Node)), + (rdflib.URIRef(ns.xsd.string), rdflib.RDFS.subClassOf, rdflib.URIRef(ns.bsfs.Literal)), + (rdflib.URIRef(ns.xsd.integer), rdflib.RDFS.subClassOf, rdflib.URIRef(ns.bsfs.Literal)), + (rdflib.URIRef(ns.bsm.t_created), rdflib.RDFS.subClassOf, rdflib.URIRef(ns.bsfs.Predicate)), + (rdflib.URIRef(ns.bse.comment), rdflib.RDFS.subClassOf, rdflib.URIRef(ns.bsfs.Predicate)), + (rdflib.URIRef(ns.bse.filesize), rdflib.RDFS.subClassOf, rdflib.URIRef(ns.bsfs.Predicate)), + (rdflib.URIRef(ns.bse.tag), rdflib.RDFS.subClassOf, rdflib.URIRef(ns.bsfs.Predicate)), + (rdflib.URIRef(ns.bse.author), rdflib.RDFS.subClassOf, rdflib.URIRef(ns.bsfs.Predicate)), + (rdflib.URIRef('http://bsfs.ai/schema/Tag#representative'), rdflib.RDFS.subClassOf, rdflib.URIRef(ns.bsfs.Predicate)), + } # Nodes constructor args self.user = URI('http://example.com/me') # set args @@ -160,7 +174,7 @@ class TestNodes(unittest.TestCase): time_triples = list(self.backend._graph.objects(rdflib.URIRef('http://example.com/me/entity#1234'), rdflib.URIRef(self.t_created.uri))) t_ent_created = float(time_triples[0]) if len(time_triples) > 0 else 0.0 # check triples - self.assertSetEqual(set(self.backend._graph), { + self.assertSetEqual(set(self.backend._graph), self.schema_triples | { # entity definitions (rdflib.URIRef('http://example.com/me/entity#1234'), rdflib.RDF.type, rdflib.URIRef('http://bsfs.ai/schema/Entity')), (rdflib.URIRef('http://example.com/me/entity#4321'), rdflib.RDF.type, rdflib.URIRef('http://bsfs.ai/schema/Entity')), @@ -171,7 +185,7 @@ class TestNodes(unittest.TestCase): # existing nodes remain unchanged self.assertSetEqual(self.ent_ids, nodes._ensure_nodes(self.ent_type, self.ent_ids)) - self.assertSetEqual(set(self.backend._graph), { + self.assertSetEqual(set(self.backend._graph), self.schema_triples | { # entity definitions (rdflib.URIRef('http://example.com/me/entity#1234'), rdflib.RDF.type, rdflib.URIRef('http://bsfs.ai/schema/Entity')), (rdflib.URIRef('http://example.com/me/entity#4321'), rdflib.RDF.type, rdflib.URIRef('http://bsfs.ai/schema/Entity')), @@ -186,7 +200,7 @@ class TestNodes(unittest.TestCase): time_triples = list(self.backend._graph.objects(rdflib.URIRef('http://example.com/me/tag#1234'), rdflib.URIRef(self.t_created.uri))) t_tag_created = float(time_triples[0]) if len(time_triples) > 0 else 0.0 # check triples - self.assertSetEqual(set(self.backend._graph), { + self.assertSetEqual(set(self.backend._graph), self.schema_triples | { # previous triples (rdflib.URIRef('http://example.com/me/entity#1234'), rdflib.RDF.type, rdflib.URIRef('http://bsfs.ai/schema/Entity')), (rdflib.URIRef('http://example.com/me/entity#4321'), rdflib.RDF.type, rdflib.URIRef('http://bsfs.ai/schema/Entity')), @@ -202,7 +216,7 @@ class TestNodes(unittest.TestCase): def test___set(self): # setup nodes = Nodes(self.backend, self.user, self.ent_type, {URI('http://example.com/me/entity#1234'), URI('http://example.com/me/entity#4321')}) - self.assertSetEqual(set(self.backend._graph), set()) + self.assertSetEqual(set(self.backend._graph), self.schema_triples | set()) set_ = nodes._Nodes__set # node_type must match predicate's domain @@ -217,7 +231,7 @@ class TestNodes(unittest.TestCase): time_triples = list(self.backend._graph.objects(rdflib.URIRef('http://example.com/me/entity#1234'), rdflib.URIRef(self.t_created.uri))) t_ent_created = float(time_triples[0]) if len(time_triples) > 0 else 0.0 # verify triples - self.assertSetEqual(set(self.backend._graph), { + self.assertSetEqual(set(self.backend._graph), self.schema_triples | { # entity definitions (rdflib.URIRef('http://example.com/me/entity#1234'), rdflib.RDF.type, rdflib.URIRef('http://bsfs.ai/schema/Entity')), (rdflib.URIRef('http://example.com/me/entity#4321'), rdflib.RDF.type, rdflib.URIRef('http://bsfs.ai/schema/Entity')), @@ -236,7 +250,7 @@ class TestNodes(unittest.TestCase): time_triples = list(self.backend._graph.objects(rdflib.URIRef('http://example.com/me/tag#1234'), rdflib.URIRef(self.t_created.uri))) t_tag_created = float(time_triples[0]) if len(time_triples) > 0 else 0.0 # verify triples - self.assertSetEqual(set(self.backend._graph), { + self.assertSetEqual(set(self.backend._graph), self.schema_triples | { # previous values (rdflib.URIRef('http://example.com/me/entity#1234'), rdflib.RDF.type, rdflib.URIRef('http://bsfs.ai/schema/Entity')), (rdflib.URIRef('http://example.com/me/entity#4321'), rdflib.RDF.type, rdflib.URIRef('http://bsfs.ai/schema/Entity')), @@ -265,7 +279,7 @@ class TestNodes(unittest.TestCase): Nodes(self.backend, self.user, self.ent_type, self.ent_ids)) def test_set(self): - self.assertSetEqual(set(self.backend._graph), set()) + self.assertSetEqual(set(self.backend._graph), self.schema_triples | set()) nodes = Nodes(self.backend, self.user, self.ent_type, self.ent_ids) # can set literal values self.assertEqual(nodes, nodes.set(self.p_filesize.uri, 1234)) @@ -312,7 +326,7 @@ class TestNodes(unittest.TestCase): def test_set_from_iterable(self): - self.assertSetEqual(set(self.backend._graph), set()) + self.assertSetEqual(set(self.backend._graph), self.schema_triples | set()) nodes = Nodes(self.backend, self.user, self.ent_type, self.ent_ids) # can set literal and node values simultaneously self.assertEqual(nodes, nodes.set_from_iterable({ diff --git a/test/graph/test_resolve.py b/test/graph/test_resolve.py new file mode 100644 index 0000000..5bc99e4 --- /dev/null +++ b/test/graph/test_resolve.py @@ -0,0 +1,181 @@ +""" + +Part of the bsfs test suite. +A copy of the license is provided with the project. +Author: Matthias Baumgartner, 2022 +""" +# imports +import unittest + +# bsie imports +from bsfs import schema as bsc +from bsfs.graph import Graph, nodes +from bsfs.namespace import ns +from bsfs.query import ast +from bsfs.triple_store import SparqlStore +from bsfs.utils import URI, errors + +# objects to test +from bsfs.graph.resolve import Filter + + +## code ## + +class TestFilter(unittest.TestCase): + """ + + NOTE: The Filter resolver is relatively simple as it only checks and changes + ast.filter.Is instances. Hence, we don't test all methods individually but + all of them with respect to ast.filter.Is elements. + + """ + + def test_call(self): + schema = bsc.Schema.from_string(''' + prefix rdfs: + prefix xsd: + + prefix bsfs: + prefix bse: + + bsfs:Entity rdfs:subClassOf bsfs:Node . + bsfs:Tag rdfs:subClassOf bsfs:Node . + xsd:string rdfs:subClassOf bsfs:Literal . + xsd:integer rdfs:subClassOf bsfs:Literal . + + bse:comment rdfs:subClassOf bsfs:Predicate ; + rdfs:domain bsfs:Entity ; + rdfs:range xsd:string ; + bsfs:unique "false"^^xsd:boolean . + + bse:filesize rdfs:subClassOf bsfs:Predicate ; + rdfs:domain bsfs:Entity ; + rdfs:range xsd:integer ; + bsfs:unique "false"^^xsd:boolean . + + bse:tag rdfs:subClassOf bsfs:Predicate ; + rdfs:domain bsfs:Entity ; + rdfs:range bsfs:Tag ; + bsfs:unique "false"^^xsd:boolean . + ''') + backend = SparqlStore.Open() + backend.schema = schema + graph = Graph(backend, URI('http://example.com/me')) + ents = graph.nodes(ns.bsfs.Entity, + {URI('http://example.com/me/entity#1234'), URI('http://example.com/me/entity#4321')}) + tags = graph.nodes(ns.bsfs.Tag, + {URI('http://example.com/me/tag#1234'), URI('http://example.com/me/tag#4321')}) + invalid = nodes.Nodes(None, '', schema.node(ns.bsfs.Node).get_child(ns.bsfs.Invalid), + {'http://example.com/you/invalid#1234', 'http://example.com/you/invalid#4321'}) + resolver = Filter(schema) + + # immediate Is + self.assertEqual(resolver(schema.node(ns.bsfs.Entity), + ast.filter.Is(ents)), + ast.filter.Or( + ast.filter.Is('http://example.com/me/entity#1234'), + ast.filter.Is('http://example.com/me/entity#4321') + )) + # only resolves nodes instances, not URIs + self.assertEqual(resolver(schema.node(ns.bsfs.Entity), + ast.filter.Is('http://example.com/me/entity#1234')), + ast.filter.Is('http://example.com/me/entity#1234')) + self.assertEqual(resolver(schema.node(ns.bsfs.Entity), + ast.filter.Is(1234)), + ast.filter.Is(1234)) + + # within And (also checks _value) + self.assertEqual(resolver(schema.node(ns.bsfs.Entity), + ast.filter.And( + ast.filter.Is(ents), + ast.filter.Any(ns.bse.comment, ast.filter.Equals('hello world')), + )), + ast.filter.And( + ast.filter.Or( + ast.filter.Is('http://example.com/me/entity#1234'), + ast.filter.Is('http://example.com/me/entity#4321')), + ast.filter.Any(ns.bse.comment, ast.filter.Equals('hello world')) + )) + # within Or (checks _bounded) + self.assertEqual(resolver(schema.node(ns.bsfs.Entity), + ast.filter.Or( + ast.filter.Is(ents), + ast.filter.Any(ns.bse.filesize, ast.filter.LessThan(5)), + )), + ast.filter.Or( + ast.filter.Or( + ast.filter.Is('http://example.com/me/entity#1234'), + ast.filter.Is('http://example.com/me/entity#4321')), + ast.filter.Any(ns.bse.filesize, ast.filter.LessThan(5)) + )) + + # Any-branched Is + self.assertEqual(resolver(schema.node(ns.bsfs.Entity), + ast.filter.Any(ns.bse.tag, ast.filter.Is(tags))), + ast.filter.Any(ns.bse.tag, ast.filter.Or( + ast.filter.Is('http://example.com/me/tag#1234'), + ast.filter.Is('http://example.com/me/tag#4321')), + )) + # All-branched Is + self.assertEqual(resolver(schema.node(ns.bsfs.Entity), + ast.filter.All(ns.bse.tag, ast.filter.Is(tags))), + ast.filter.All(ns.bse.tag, ast.filter.Or( + ast.filter.Is('http://example.com/me/tag#1234'), + ast.filter.Is('http://example.com/me/tag#4321')), + )) + # Negated predicate + self.assertEqual(resolver(schema.node(ns.bsfs.Tag), + ast.filter.Any(ast.filter.Predicate(ns.bse.tag, reverse=True), ast.filter.Is(ents))), + ast.filter.Any(ast.filter.Predicate(ns.bse.tag, reverse=True), ast.filter.Or( + ast.filter.Is('http://example.com/me/entity#1234'), + ast.filter.Is('http://example.com/me/entity#4321')), + )) + + # negated Is + self.assertEqual(resolver(schema.node(ns.bsfs.Entity), + ast.filter.Not(ast.filter.Is(ents))), + ast.filter.Not( + ast.filter.Or( + ast.filter.Is('http://example.com/me/entity#1234'), + ast.filter.Is('http://example.com/me/entity#4321')), + )) + + # for sake of completeness: Has + self.assertEqual(resolver(schema.node(ns.bsfs.Entity), + ast.filter.Has(ns.bse.comment)), + ast.filter.Has(ns.bse.comment)) + # route errors + self.assertRaises(errors.BackendError, resolver, schema.node(ns.bsfs.Tag), + ast.filter.Predicate(ns.bse.comment)) + self.assertRaises(errors.BackendError, resolver, schema.node(ns.bsfs.Tag), + ast.filter.Any(ast.filter.PredicateExpression(), ast.filter.Equals('foo'))) + self.assertRaises(errors.UnreachableError, resolver._one_of, ast.filter.OneOf(ast.filter.Predicate(ns.bsfs.Predicate))) + + # check schema consistency + self.assertRaises(errors.ConsistencyError, resolver, schema.node(ns.bsfs.Tag), + ast.filter.Is(invalid)) + # check immediate type compatibility + self.assertRaises(errors.ConsistencyError, resolver, schema.node(ns.bsfs.Tag), + ast.filter.Is(ents)) + self.assertRaises(errors.ConsistencyError, resolver, schema.node(ns.bsfs.Entity), + ast.filter.Is(tags)) + # check type compatibility through branches + self.assertRaises(errors.ConsistencyError, resolver, schema.node(ns.bsfs.Tag), + ast.filter.Any(ns.bse.comment, ast.filter.Is(tags))) + self.assertRaises(errors.ConsistencyError, resolver, schema.node(ns.bsfs.Tag), + ast.filter.Any(ns.bse.invalid, ast.filter.Is(tags))) + self.assertRaises(errors.ConsistencyError, resolver, schema.node(ns.bsfs.Tag), + ast.filter.Any(ast.filter.OneOf(ns.bse.comment, ns.bse.tag), ast.filter.Is(tags))) + self.assertRaises(errors.ConsistencyError, resolver, schema.node(ns.bsfs.Tag), + ast.filter.Any(ast.filter.OneOf(ns.bse.comment, ns.bse.filesize), ast.filter.Is(tags))) + self.assertRaises(errors.ConsistencyError, resolver, schema.node(ns.bsfs.Tag), + ast.filter.Any(ast.filter.Predicate(ns.bse.tag, reverse=True), ast.filter.Is(tags))) + + + +## main ## + +if __name__ == '__main__': + unittest.main() + +## EOF ## -- cgit v1.2.3 From 7f5a2920ef311b2077300714d7700313077a0bf6 Mon Sep 17 00:00:00 2001 From: Matthias Baumgartner Date: Thu, 22 Dec 2022 20:35:38 +0100 Subject: cosmetic changes --- bsfs/graph/nodes.py | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/bsfs/graph/nodes.py b/bsfs/graph/nodes.py index c417a0e..5a93f77 100644 --- a/bsfs/graph/nodes.py +++ b/bsfs/graph/nodes.py @@ -53,7 +53,7 @@ class Nodes(): self._user = user self._node_type = node_type self._guids = set(guids) - self.__ac = ac.NullAC(self._backend, self._user) + self._ac = ac.NullAC(self._backend, self._user) def __eq__(self, other: typing.Any) -> bool: return isinstance(other, Nodes) \ @@ -135,7 +135,7 @@ class Nodes(): # FIXME: Needed? Could be integrated into other AC methods (by passing the predicate!) # This could allow more fine-grained predicate control (e.g. based on ownership) # rather than a global approach like this. - if self.__ac.is_protected_predicate(pred): + if self._ac.is_protected_predicate(pred): raise errors.PermissionDeniedError(pred) # set operation affects all nodes (if possible) @@ -149,7 +149,7 @@ class Nodes(): # check write permissions on existing nodes # As long as the user has write permissions, we don't restrict # the creation or modification of literal values. - guids = set(self.__ac.write_literal(node_type, guids)) + guids = set(self._ac.write_literal(node_type, guids)) # insert literals # TODO: Support passing iterators as values for non-unique predicates @@ -172,14 +172,14 @@ class Nodes(): # Link permissions cover adding and removing links on the source node. # Specifically, link permissions also allow to remove links to other # nodes if needed (e.g. for unique predicates). - guids = set(self.__ac.link_from_node(node_type, guids)) + guids = set(self._ac.link_from_node(node_type, guids)) # get link targets targets = set(value.guids) # ensure existence of value nodes; create nodes if need be targets = set(self._ensure_nodes(value.node_type, targets)) # check link permissions on target nodes - targets = set(self.__ac.link_to_node(value.node_type, targets)) + targets = set(self._ac.link_to_node(value.node_type, targets)) # insert node links self._backend.set( @@ -203,14 +203,14 @@ class Nodes(): # create nodes if need be if len(missing) > 0: # check which missing nodes can be created - missing = set(self.__ac.createable(node_type, missing)) + missing = set(self._ac.createable(node_type, missing)) # create nodes self._backend.create(node_type, missing) # add bookkeeping triples self._backend.set(node_type, missing, self._backend.schema.predicate(ns.bsm.t_created), [time.time()]) # add permission triples - self.__ac.create(node_type, missing) + self._ac.create(node_type, missing) # return available nodes return existing | missing -- cgit v1.2.3 From c664d19e7d4a0aa0762c30a72ae238cf818891ab Mon Sep 17 00:00:00 2001 From: Matthias Baumgartner Date: Wed, 11 Jan 2023 21:20:47 +0100 Subject: Feature support in the schema * Type annotations * Feature type * Moved from_string from Schema to its own file/function * Root predicate has a valid (not-None) range * ROOT_... export in schema.types * Empty as the default Schema constructor * Schema loads some additional default symbols * _Type instances compare along class hierarchy --- bsfs/schema/__init__.py | 5 +- bsfs/schema/schema.py | 111 +---- bsfs/schema/serialize.py | 143 ++++++ bsfs/schema/types.py | 183 +++++++- test/schema/test_schema.py | 271 ++--------- test/schema/test_serialize.py | 1007 +++++++++++++++++++++++++++++++++++++++++ test/schema/test_types.py | 240 ++++++++-- 7 files changed, 1578 insertions(+), 382 deletions(-) create mode 100644 bsfs/schema/serialize.py create mode 100644 test/schema/test_serialize.py diff --git a/bsfs/schema/__init__.py b/bsfs/schema/__init__.py index ad4d456..dc24313 100644 --- a/bsfs/schema/__init__.py +++ b/bsfs/schema/__init__.py @@ -9,7 +9,8 @@ import typing # inner-module imports from .schema import Schema -from .types import Literal, Node, Predicate +from .serialize import from_string, to_string +from .types import Literal, Node, Predicate, _Vertex # FIXME: _Vertex # exports __all__: typing.Sequence[str] = ( @@ -17,6 +18,8 @@ __all__: typing.Sequence[str] = ( 'Node', 'Predicate', 'Schema', + 'from_string', + 'to_string', ) ## EOF ## diff --git a/bsfs/schema/schema.py b/bsfs/schema/schema.py index c5d4571..1c4c807 100644 --- a/bsfs/schema/schema.py +++ b/bsfs/schema/schema.py @@ -51,11 +51,13 @@ class Schema(): def __init__( self, - predicates: typing.Iterable[types.Predicate], + predicates: typing.Optional[typing.Iterable[types.Predicate]] = None, nodes: typing.Optional[typing.Iterable[types.Node]] = None, literals: typing.Optional[typing.Iterable[types.Literal]] = None, ): # materialize arguments + if predicates is None: + predicates = set() if nodes is None: nodes = set() if literals is None: @@ -63,24 +65,36 @@ class Schema(): nodes = set(nodes) literals = set(literals) predicates = set(predicates) + + # add root types to the schema + nodes.add(types.ROOT_NODE) + literals.add(types.ROOT_LITERAL) + predicates.add(types.ROOT_PREDICATE) + # add minimally necessary types to the schema + literals.add(types.ROOT_NUMBER) + predicates.add(types.ROOT_FEATURE) + # include parents in predicates set # TODO: review type annotations and ignores for python >= 3.11 (parents is _Type but should be typing.Self) predicates |= {par for pred in predicates for par in pred.parents()} # type: ignore [misc] # include predicate domain in nodes set nodes |= {pred.domain for pred in predicates} # include predicate range in nodes and literals sets - prange = {pred.range for pred in predicates if pred.range is not None} + prange = {pred.range for pred in predicates} nodes |= {vert for vert in prange if isinstance(vert, types.Node)} literals |= {vert for vert in prange if isinstance(vert, types.Literal)} + # NOTE: ROOT_PREDICATE has a _Vertex as range which is neither in nodes nor literals + # FIXME: with the ROOT_VERTEX missing, the schema is not complete anymore! + # include parents in nodes and literals sets - # NOTE: Must be done after predicate domain/range was handled - # so that their parents are included as well. + # NOTE: Must come after predicate domain/range was handled to have their parents as well. nodes |= {par for node in nodes for par in node.parents()} # type: ignore [misc] literals |= {par for lit in literals for par in lit.parents()} # type: ignore [misc] # assign members self._nodes = {node.uri: node for node in nodes} self._literals = {lit.uri: lit for lit in literals} self._predicates = {pred.uri: pred for pred in predicates} + # verify unique uris if len(nodes) != len(self._nodes): raise errors.ConsistencyError('inconsistent nodes') @@ -214,6 +228,7 @@ class Schema(): >>> Schema.Union([a, b, c]) """ + # FIXME: copy type annotations? if len(args) == 0: raise TypeError('Schema.Union requires at least one argument (Schema or Iterable)') if isinstance(args[0], cls): # args is sequence of Schema instances @@ -295,92 +310,4 @@ class Schema(): """Return the Literal matching the *uri*.""" return self._literals[uri] - - ## constructors ## - - - @classmethod - def Empty(cls) -> 'Schema': # pylint: disable=invalid-name # capitalized classmethod - """Return a minimal Schema.""" - node = types.Node(ns.bsfs.Node, None) - literal = types.Literal(ns.bsfs.Literal, None) - predicate = types.Predicate( - uri=ns.bsfs.Predicate, - parent=None, - domain=node, - range=None, - unique=False, - ) - return cls((predicate, ), (node, ), (literal, )) - - - @classmethod - def from_string(cls, schema: str) -> 'Schema': # pylint: disable=invalid-name # capitalized classmethod - """Load and return a Schema from a string.""" - # parse string into rdf graph - graph = rdflib.Graph() - graph.parse(data=schema, format='turtle') - - def _fetch_hierarchically(factory, curr): - # emit current node - yield curr - # walk through childs - for child in graph.subjects(rdflib.URIRef(ns.rdfs.subClassOf), rdflib.URIRef(curr.uri)): - # convert to URI - child = URI(child) - # check circular dependency - if child == curr.uri or child in {node.uri for node in curr.parents()}: - raise errors.ConsistencyError('circular dependency') - # recurse and emit (sub*)childs - yield from _fetch_hierarchically(factory, factory(child, curr)) - - # fetch nodes - nodes = set(_fetch_hierarchically(types.Node, types.Node(ns.bsfs.Node, None))) - nodes_lut = {node.uri: node for node in nodes} - if len(nodes_lut) != len(nodes): - raise errors.ConsistencyError('inconsistent nodes') - - # fetch literals - literals = set(_fetch_hierarchically(types.Literal, types.Literal(ns.bsfs.Literal, None))) - literals_lut = {lit.uri: lit for lit in literals} - if len(literals_lut) != len(literals): - raise errors.ConsistencyError('inconsistent literals') - - # fetch predicates - def build_predicate(uri, parent): - uri = rdflib.URIRef(uri) - # get domain - domains = set(graph.objects(uri, rdflib.RDFS.domain)) - if len(domains) != 1: - raise errors.ConsistencyError(f'inconsistent domain: {domains}') - dom = nodes_lut.get(next(iter(domains))) - if dom is None: - raise errors.ConsistencyError('missing domain') - # get range - ranges = set(graph.objects(uri, rdflib.RDFS.range)) - if len(ranges) != 1: - raise errors.ConsistencyError(f'inconsistent range: {ranges}') - rng = next(iter(ranges)) - rng = nodes_lut.get(rng, literals_lut.get(rng)) - if rng is None: - raise errors.ConsistencyError('missing range') - # get unique flag - uniques = set(graph.objects(uri, rdflib.URIRef(ns.bsfs.unique))) - if len(uniques) != 1: - raise errors.ConsistencyError(f'inconsistent unique flags: {uniques}') - unique = bool(next(iter(uniques))) - # build Predicate - return types.Predicate(URI(uri), parent, dom, rng, unique) - - root_predicate = types.Predicate( - uri=ns.bsfs.Predicate, - parent=None, - domain=nodes_lut[ns.bsfs.Node], - range=None, # FIXME: Unclear how to handle this! Can be either a Literal or a Node - unique=False, - ) - predicates = _fetch_hierarchically(build_predicate, root_predicate) - # return Schema - return cls(predicates, nodes, literals) - ## EOF ## diff --git a/bsfs/schema/serialize.py b/bsfs/schema/serialize.py new file mode 100644 index 0000000..1222aa6 --- /dev/null +++ b/bsfs/schema/serialize.py @@ -0,0 +1,143 @@ +""" + +Part of the BlackStar filesystem (bsfs) module. +A copy of the license is provided with the project. +Author: Matthias Baumgartner, 2022 +""" +# standard imports +from collections import abc +import itertools +import typing + +# external imports +import rdflib + +# bsfs imports +from bsfs.namespace import ns +from bsfs.utils import errors, URI, typename + +# inner-module imports +from . import types +from . import schema + +# exports +__all__: typing.Sequence[str] = ( + 'to_string', + 'from_string', + ) + + +## code ## + +def from_string(schema_str: str) -> schema.Schema: + """Load and return a Schema from a string.""" + # parse string into rdf graph + graph = rdflib.Graph() + graph.parse(data=schema_str, format='turtle') + + # helper functions + def _convert(value): + """Convert the subject type from rdflib to a bsfs native type.""" + if isinstance(value, rdflib.Literal): + return value.value + if isinstance(value, rdflib.URIRef): + return URI(value) + raise errors.BackendError(f'expected Literal or URIRef, found {typename(value)}') + + def _fetch_hierarchically(factory, curr): + """Walk through a rdfs:subClassOf hierarchy, creating symbols along the way.""" + # emit current node + yield curr + # walk through childs + for child in graph.subjects(rdflib.URIRef(ns.rdfs.subClassOf), rdflib.URIRef(curr.uri)): + # fetch annotations + annotations = { + URI(pred): _convert(value) + for pred, value # FIXME: preserve datatype of value?! + in graph.predicate_objects(child) + if URI(pred) != ns.rdfs.subClassOf + } + # convert child to URI + child = URI(child) + # check circular dependency + if child == curr.uri or child in {node.uri for node in curr.parents()}: + raise errors.ConsistencyError('circular dependency') + # recurse and emit (sub*)childs + yield from _fetch_hierarchically(factory, factory(child, curr, **annotations)) + + # fetch nodes + nodes = set(_fetch_hierarchically(types.Node, types.ROOT_NODE)) + nodes_lut = {node.uri: node for node in nodes} + if len(nodes_lut) != len(nodes): + raise errors.ConsistencyError('inconsistent nodes') + + # fetch literals + literals = set(_fetch_hierarchically(types.Literal, types.ROOT_LITERAL)) + literals_lut = {lit.uri: lit for lit in literals} + if len(literals_lut) != len(literals): + raise errors.ConsistencyError('inconsistent literals') + + # fetch predicates + # FIXME: type annotation + def _fetch_value(subject: URI, predicate: rdflib.URIRef, value_factory) -> typing.Optional[typing.Any]: + """Fetch the object of a given subject and predicate. Raises a `errors.ConsistencyError` if multiple objects match.""" + values = list(graph.objects(rdflib.URIRef(subject), predicate)) + if len(values) == 0: + return None + elif len(values) == 1: + return value_factory(values[0]) + else: + raise errors.ConsistencyError(f'{subject} has multiple values for predicate {str(predicate)}, expected zero or one') + + def _build_predicate(uri, parent, **annotations): + """Predicate factory.""" + # break out on root feature type + if uri == types.ROOT_FEATURE.uri: + return types.ROOT_FEATURE + # clean annotations + annotations.pop(ns.rdfs.domain, None) + annotations.pop(ns.rdfs.range, None) + annotations.pop(ns.bsfs.unique, None) + # get domain + dom = _fetch_value(uri, rdflib.RDFS.domain, URI) + if dom is not None and dom not in nodes_lut: + raise errors.ConsistencyError(f'predicate {uri} has undefined domain {dom}') + elif dom is not None: + dom = nodes_lut[dom] + # get range + rng = _fetch_value(uri, rdflib.RDFS.range, URI) + if rng is not None and rng not in nodes_lut and rng not in literals_lut: + raise errors.ConsistencyError(f'predicate {uri} has undefined range {rng}') + elif rng is not None: + rng = nodes_lut.get(rng, literals_lut.get(rng)) + # get unique + unique = _fetch_value(uri, rdflib.URIRef(ns.bsfs.unique), bool) + # handle feature types + if isinstance(parent, types.Feature): + # clean annotations + annotations.pop(ns.bsfs.dimension, None) + annotations.pop(ns.bsfs.dtype, None) + annotations.pop(ns.bsfs.distance, None) + # get dimension + dimension = _fetch_value(uri, rdflib.URIRef(ns.bsfs.dimension), int) + # get dtype + dtype = _fetch_value(uri, rdflib.URIRef(ns.bsfs.dtype), URI) + # get distance + distance = _fetch_value(uri, rdflib.URIRef(ns.bsfs.distance), URI) + # return feature + return parent.get_child(URI(uri), domain=dom, range=rng, unique=unique, + dtype=dtype, dimension=dimension, distance=distance, **annotations) + # handle non-feature predicate + return parent.get_child(URI(uri), domain=dom, range=rng, unique=unique, **annotations) + predicates = _fetch_hierarchically(_build_predicate, types.ROOT_PREDICATE) + + return schema.Schema(predicates, nodes, literals) + + + +def to_string(schema_inst: schema.Schema) -> str: + """ + """ + raise NotImplementedError() + +## EOF ## diff --git a/bsfs/schema/types.py b/bsfs/schema/types.py index 54a7e99..e737263 100644 --- a/bsfs/schema/types.py +++ b/bsfs/schema/types.py @@ -8,6 +8,7 @@ Author: Matthias Baumgartner, 2022 import typing # bsfs imports +from bsfs.namespace import ns from bsfs.utils import errors, URI, typename # exports @@ -15,6 +16,7 @@ __all__: typing.Sequence[str] = ( 'Literal', 'Node', 'Predicate', + 'Feature', ) @@ -99,9 +101,11 @@ class _Type(): self, uri: URI, parent: typing.Optional['_Type'] = None, + **annotations: typing.Any, ): self.uri = uri self.parent = parent + self.annotations = annotations def parents(self) -> typing.Generator['_Type', None, None]: """Generate a list of parent nodes.""" @@ -110,9 +114,17 @@ class _Type(): yield curr curr = curr.parent - def get_child(self, uri: URI, **kwargs): + def get_child( + self, + uri: URI, + **kwargs, + ): """Return a child of the current class.""" - return type(self)(uri, self, **kwargs) + return type(self)( + uri=uri, + parent=self, + **kwargs + ) def __str__(self) -> str: return f'{typename(self)}({self.uri})' @@ -138,7 +150,7 @@ class _Type(): def __lt__(self, other: typing.Any) -> bool: """Return True iff *self* is a true subclass of *other*.""" - if not type(self) is type(other): # type mismatch # pylint: disable=unidiomatic-typecheck + if not isinstance(other, type(self)): return NotImplemented if self.uri == other.uri: # equivalence return False @@ -151,7 +163,7 @@ class _Type(): def __le__(self, other: typing.Any) -> bool: """Return True iff *self* is equivalent or a subclass of *other*.""" - if not type(self) is type(other): # type mismatch # pylint: disable=unidiomatic-typecheck + if not isinstance(other, type(self)): return NotImplemented if self.uri == other.uri: # equivalence return True @@ -164,7 +176,7 @@ class _Type(): def __gt__(self, other: typing.Any) -> bool: """Return True iff *self* is a true superclass of *other*.""" - if not type(self) is type(other): # type mismatch # pylint: disable=unidiomatic-typecheck + if not isinstance(other, type(self)): return NotImplemented if self.uri == other.uri: # equivalence return False @@ -177,7 +189,7 @@ class _Type(): def __ge__(self, other: typing.Any) -> bool: """Return True iff *self* is eqiuvalent or a superclass of *other*.""" - if not type(self) is type(other): # type mismatch # pylint: disable=unidiomatic-typecheck + if not isinstance(other, type(self)): return NotImplemented if self.uri == other.uri: # equivalence return True @@ -191,30 +203,33 @@ class _Type(): class _Vertex(_Type): """Graph vertex types. Can be a Node or a Literal.""" - def __init__(self, uri: URI, parent: typing.Optional['_Vertex']): - super().__init__(uri, parent) + parent: typing.Optional['_Vertex'] + def __init__(self, uri: URI, parent: typing.Optional['_Vertex'], **kwargs): + super().__init__(uri, parent, **kwargs) class Node(_Vertex): """Node type.""" - def __init__(self, uri: URI, parent: typing.Optional['Node']): - super().__init__(uri, parent) + parent: typing.Optional['Node'] + def __init__(self, uri: URI, parent: typing.Optional['Node'], **kwargs): + super().__init__(uri, parent, **kwargs) class Literal(_Vertex): """Literal type.""" - def __init__(self, uri: URI, parent: typing.Optional['Literal']): - super().__init__(uri, parent) + parent: typing.Optional['Literal'] + def __init__(self, uri: URI, parent: typing.Optional['Literal'] ,**kwargs): + super().__init__(uri, parent, **kwargs) class Predicate(_Type): - """Predicate type.""" + """Predicate base type.""" # source type. domain: Node # destination type. - range: typing.Optional[typing.Union[Node, Literal]] + range: _Vertex # maximum cardinality of type. unique: bool @@ -223,25 +238,26 @@ class Predicate(_Type): self, # Type members uri: URI, - parent: typing.Optional['Predicate'], + parent: '_PredicateBase', # Predicate members domain: Node, - range: typing.Optional[typing.Union[Node, Literal]], # pylint: disable=redefined-builtin + range: _Vertex, # pylint: disable=redefined-builtin unique: bool, + **kwargs, ): # check arguments if not isinstance(domain, Node): raise TypeError(domain) - if range is not None and not isinstance(range, Node) and not isinstance(range, Literal): + if range != ROOT_VERTEX and not isinstance(range, (Node, Literal)): raise TypeError(range) # initialize - super().__init__(uri, parent) + super().__init__(uri, parent, **kwargs) self.domain = domain self.range = range - self.unique = unique + self.unique = bool(unique) def __hash__(self) -> int: - return hash((super().__hash__(), self.domain, self.range, self.unique)) + return hash((super().__hash__(), self.domain, self.unique, self.range)) def __eq__(self, other: typing.Any) -> bool: return super().__eq__(other) \ @@ -264,13 +280,132 @@ class Predicate(_Type): raise errors.ConsistencyError(f'{domain} must be a subclass of {self.domain}') if range is None: range = self.range - if range is None: # inherited range from ns.bsfs.Predicate - raise ValueError('range must be defined by the parent or argument') - if self.range is not None and not range <= self.range: + # NOTE: The root predicate has a Vertex as range, which is neither a parent of the root + # Node nor Literal. Hence, that test is skipped since a child should be allowed to + # specialize from Vertex to anything. + if self.range != ROOT_VERTEX and not range <= self.range: raise errors.ConsistencyError(f'{range} must be a subclass of {self.range}') if unique is None: unique = self.unique - return super().get_child(uri, domain=domain, range=range, unique=unique, **kwargs) + return super().get_child( + uri=uri, + domain=domain, + range=range, + unique=unique, + **kwargs + ) + + +class Feature(Predicate): + """Feature base type.""" + + # Number of feature vector dimensions. + dimension: int + + # Feature vector datatype. + dtype: URI + + # Distance measure to compare feature vectors. + distance: URI + + def __init__( + self, + # Type members + uri: URI, + parent: Predicate, + # Predicate members + domain: Node, + range: Literal, + unique: bool, + # Feature members + dimension: int, + dtype: URI, + distance: URI, + **kwargs, + ): + super().__init__(uri, parent, domain, range, unique, **kwargs) + self.dimension = int(dimension) + self.dtype = URI(dtype) + self.distance = URI(distance) + + def __hash__(self) -> int: + return hash((super().__hash__(), self.dimension, self.dtype, self.distance)) + + def __eq__(self, other: typing.Any) -> bool: + return super().__eq__(other) \ + and self.dimension == other.dimension \ + and self.dtype == other.dtype \ + and self.distance == other.distance + + def get_child( + self, + uri: URI, + domain: typing.Optional[Node] = None, + range: typing.Optional[Literal] = None, # pylint: disable=redefined-builtin + unique: typing.Optional[bool] = None, + dimension: typing.Optional[int] = None, + dtype: typing.Optional[URI] = None, + distance: typing.Optional[URI] = None, + **kwargs, + ): + """Return a child of the current class.""" + if dimension is None: + dimension = self.dimension + if dtype is None: + dtype = self.dtype + if distance is None: + distance = self.distance + return super().get_child( + uri=uri, + domain=domain, + range=range, + unique=unique, + dimension=dimension, + dtype=dtype, + distance=distance, + **kwargs, + ) +# essential vertices +ROOT_VERTEX = _Vertex( + uri=ns.bsfs.Vertex, + parent=None, + ) + +ROOT_NODE = Node( + uri=ns.bsfs.Node, + parent=None, + ) + +ROOT_LITERAL = Literal( + uri=ns.bsfs.Literal, + parent=None, + ) + +ROOT_NUMBER = Literal( + uri=ns.bsfs.Number, + parent=ROOT_LITERAL, + ) + +# essential predicates +ROOT_PREDICATE = Predicate( + uri=ns.bsfs.Predicate, + parent=None, + domain=ROOT_NODE, + range=ROOT_VERTEX, + unique=False, + ) + +ROOT_FEATURE = Feature( + uri=ns.bsfs.Feature, + parent=ROOT_PREDICATE, + domain=ROOT_NODE, + range=ROOT_LITERAL, + unique=False, + dimension=1, + dtype=ns.bsfs.f16, + distance=ns.bsfs.euclidean, + ) + ## EOF ## diff --git a/test/schema/test_schema.py b/test/schema/test_schema.py index 888cdca..1b45db0 100644 --- a/test/schema/test_schema.py +++ b/test/schema/test_schema.py @@ -10,7 +10,7 @@ import unittest # bsfs imports from bsfs.namespace import ns -from bsfs.schema import types +from bsfs.schema import types, from_string from bsfs.utils import errors # objects to test @@ -55,7 +55,7 @@ class TestSchema(unittest.TestCase): ''' # nodes - self.n_root = types.Node(ns.bsfs.Node, None) + self.n_root = types.ROOT_NODE self.n_ent = types.Node(ns.bsfs.Entity, types.Node(ns.bsfs.Node, None)) self.n_img = types.Node(ns.bsfs.Image, types.Node(ns.bsfs.Entity, types.Node(ns.bsfs.Node, None))) self.n_tag = types.Node(ns.bsfs.Tag, types.Node(ns.bsfs.Node, None)) @@ -63,24 +63,32 @@ class TestSchema(unittest.TestCase): self.nodes = [self.n_root, self.n_ent, self.n_img, self.n_tag, self.n_unused] # literals - self.l_root = types.Literal(ns.bsfs.Literal, None) + self.l_root = types.ROOT_LITERAL + self.l_number = types.ROOT_NUMBER self.l_string = types.Literal(ns.xsd.string, types.Literal(ns.bsfs.Literal, None)) - self.l_integer = types.Literal(ns.xsd.integer, types.Literal(ns.bsfs.Literal, None)) + self.l_integer = types.Literal(ns.xsd.integer, self.l_number) self.l_unused = types.Literal(ns.xsd.boolean, types.Literal(ns.bsfs.Literal, None)) - self.literals = [self.l_root, self.l_string, self.l_integer, self.l_unused] + self.literals = [self.l_root, self.l_number, self.l_string, self.l_integer, self.l_unused] # predicates - self.p_root = types.Predicate(ns.bsfs.Predicate, None, types.Node(ns.bsfs.Node, None), None, False) + self.p_root = types.ROOT_PREDICATE + self.f_root = types.ROOT_FEATURE self.p_tag = self.p_root.get_child(ns.bse.tag, self.n_ent, self.n_tag, False) self.p_group = self.p_tag.get_child(ns.bse.group, self.n_img, self.n_tag, False) self.p_comment = self.p_root.get_child(ns.bse.comment, self.n_root, self.l_string, True) - self.predicates = [self.p_root, self.p_tag, self.p_group, self.p_comment] + self.predicates = [self.p_root, self.f_root, self.p_tag, self.p_group, self.p_comment] def test_construction(self): + # no args yields a minimal schema + schema = Schema() + self.assertSetEqual(set(schema.nodes()), {self.n_root}) + self.assertSetEqual(set(schema.literals()), {self.l_root, self.l_number}) + self.assertSetEqual(set(schema.predicates()), {self.p_root, self.f_root}) + # nodes and literals are optional schema = Schema(self.predicates) self.assertSetEqual(set(schema.nodes()), {self.n_root, self.n_ent, self.n_img, self.n_tag}) - self.assertSetEqual(set(schema.literals()), {self.l_root, self.l_string}) + self.assertSetEqual(set(schema.literals()), {self.l_root, self.l_string, self.l_number}) self.assertSetEqual(set(schema.predicates()), set(self.predicates)) # predicates, nodes, and literals are respected @@ -101,21 +109,21 @@ class TestSchema(unittest.TestCase): # literals are complete schema = Schema(self.predicates, self.nodes, None) - self.assertSetEqual(set(schema.literals()), {self.l_root, self.l_string}) + self.assertSetEqual(set(schema.literals()), {self.l_root, self.l_string, self.l_number}) schema = Schema(self.predicates, self.nodes, []) - self.assertSetEqual(set(schema.literals()), {self.l_root, self.l_string}) + self.assertSetEqual(set(schema.literals()), {self.l_root, self.l_string, self.l_number}) schema = Schema(self.predicates, self.nodes, [self.l_string]) - self.assertSetEqual(set(schema.literals()), {self.l_root, self.l_string}) + self.assertSetEqual(set(schema.literals()), {self.l_root, self.l_string, self.l_number}) schema = Schema(self.predicates, self.nodes, [self.l_integer]) - self.assertSetEqual(set(schema.literals()), {self.l_root, self.l_string, self.l_integer}) + self.assertSetEqual(set(schema.literals()), {self.l_root, self.l_string, self.l_integer, self.l_number}) schema = Schema(self.predicates, self.nodes, [self.l_integer, self.l_unused]) self.assertSetEqual(set(schema.literals()), set(self.literals)) # predicates are complete schema = Schema([], self.nodes, self.literals) - self.assertSetEqual(set(schema.predicates()), set()) + self.assertSetEqual(set(schema.predicates()), {self.p_root, self.f_root}) schema = Schema([self.p_group], self.nodes, self.literals) - self.assertSetEqual(set(schema.predicates()), {self.p_root, self.p_tag, self.p_group}) + self.assertSetEqual(set(schema.predicates()), {self.p_root, self.f_root, self.p_tag, self.p_group}) schema = Schema([self.p_group, self.p_comment], self.nodes, self.literals) self.assertSetEqual(set(schema.predicates()), set(self.predicates)) @@ -153,21 +161,28 @@ class TestSchema(unittest.TestCase): self.assertRaises(errors.ConsistencyError, Schema, {}, {types.Node(ns.bsfs.Foo, None)}, {types.Node(ns.bsfs.Foo, None)}) self.assertRaises(errors.ConsistencyError, Schema, - {types.Predicate(ns.bsfs.Foo, None, types.Node(ns.bsfs.Node, None), None, False)}, {}, {types.Node(ns.bsfs.Foo, None)}) + {types.Predicate(ns.bsfs.Foo, None, types.Node(ns.bsfs.Node, None), types.ROOT_VERTEX, False)}, {}, {types.Node(ns.bsfs.Foo, None)}) self.assertRaises(errors.ConsistencyError, Schema, - {types.Predicate(ns.bsfs.Foo, None, types.Node(ns.bsfs.Node, None), None, False)}, {types.Node(ns.bsfs.Foo, None)}, {}) + {types.Predicate(ns.bsfs.Foo, None, types.Node(ns.bsfs.Node, None), types.ROOT_VERTEX, False)}, {types.Node(ns.bsfs.Foo, None)}, {}) self.assertRaises(errors.ConsistencyError, Schema, - {types.Predicate(ns.bsfs.Foo, None, types.Node(ns.bsfs.Node, None), None, False)}, {types.Node(ns.bsfs.Foo, None)}, {types.Node(ns.bsfs.Foo, None)}) + {types.Predicate(ns.bsfs.Foo, None, types.Node(ns.bsfs.Node, None), types.ROOT_VERTEX, False)}, {types.Node(ns.bsfs.Foo, None)}, {types.Node(ns.bsfs.Foo, None)}) + def test_str(self): + # string conversion self.assertEqual(str(Schema([])), 'Schema()') self.assertEqual(str(Schema([], [], [])), 'Schema()') self.assertEqual(str(Schema(self.predicates, self.nodes, self.literals)), 'Schema()') - self.assertEqual(repr(Schema([])), 'Schema([], [], [])') - self.assertEqual(repr(Schema([], [], [])), 'Schema([], [], [])') + # repr conversion with only default nodes, literals, and predicates + n = [ns.bsfs.Node] + l = [ns.bsfs.Literal, ns.bsfs.Number] + p = [ns.bsfs.Feature, ns.bsfs.Predicate] + self.assertEqual(repr(Schema()), f'Schema({n}, {l}, {p})') + self.assertEqual(repr(Schema([], [], [])), f'Schema({n}, {l}, {p})') + # repr conversion n = [ns.bsfs.Entity, ns.bsfs.Image, ns.bsfs.Node, ns.bsfs.Tag, ns.bsfs.Unused] - l = [ns.bsfs.Literal, ns.xsd.boolean, ns.xsd.integer, ns.xsd.string] - p = [ns.bse.comment, ns.bse.group, ns.bse.tag, ns.bsfs.Predicate] + l = [ns.bsfs.Literal, ns.bsfs.Number, ns.xsd.boolean, ns.xsd.integer, ns.xsd.string] + p = [ns.bse.comment, ns.bse.group, ns.bse.tag, ns.bsfs.Feature, ns.bsfs.Predicate] self.assertEqual(repr(Schema(self.predicates, self.nodes, self.literals)), f'Schema({n}, {l}, {p})') def test_equality(self): @@ -258,11 +273,11 @@ class TestSchema(unittest.TestCase): self.assertTrue(operator.lt(Schema({self.p_tag}), Schema({self.p_group}))) self.assertTrue(operator.le(Schema({self.p_tag}), Schema({self.p_group}))) # subset considers differences in predicates and literals - self.assertTrue(operator.lt(Schema.Empty(), Schema({self.p_comment}))) + self.assertTrue(operator.lt(Schema(), Schema({self.p_comment}))) # subset considers differences in predicates, nodes, and literals - self.assertTrue(operator.lt(Schema({}), Schema.Empty())) - self.assertTrue(operator.lt(Schema({self.p_tag}), Schema.from_string(self.schema_str))) - self.assertTrue(operator.le(Schema({self.p_tag}), Schema.from_string(self.schema_str))) + self.assertTrue(operator.le(Schema({}), Schema())) + self.assertTrue(operator.lt(Schema({self.p_tag}), from_string(self.schema_str))) + self.assertTrue(operator.le(Schema({self.p_tag}), from_string(self.schema_str))) self.assertFalse(operator.lt(Schema({self.p_comment}), Schema({self.p_tag}))) self.assertFalse(operator.le(Schema({self.p_comment}), Schema({self.p_tag}))) @@ -280,11 +295,11 @@ class TestSchema(unittest.TestCase): self.assertTrue(operator.gt(Schema({self.p_group}), Schema({self.p_tag}))) self.assertTrue(operator.ge(Schema({self.p_group}), Schema({self.p_tag}))) # superset considers differences in predicates and literals - self.assertTrue(operator.gt(Schema({self.p_comment}), Schema.Empty())) + self.assertTrue(operator.gt(Schema({self.p_comment}), Schema())) # superset considers differences in predicates, nodes, and literals - self.assertTrue(operator.gt(Schema.Empty(), Schema({}))) - self.assertTrue(operator.gt(Schema.from_string(self.schema_str), Schema({self.p_tag}))) - self.assertTrue(operator.ge(Schema.from_string(self.schema_str), Schema({self.p_tag}))) + self.assertTrue(operator.ge(Schema(), Schema({}))) + self.assertTrue(operator.gt(from_string(self.schema_str), Schema({self.p_tag}))) + self.assertTrue(operator.ge(from_string(self.schema_str), Schema({self.p_tag}))) self.assertFalse(operator.gt(Schema({self.p_tag}), Schema({self.p_comment}))) self.assertFalse(operator.ge(Schema({self.p_tag}), Schema({self.p_comment}))) @@ -351,26 +366,26 @@ class TestSchema(unittest.TestCase): # difference does not contain predicates from the RHS diff = Schema({self.p_tag, self.p_comment}).diff(Schema({self.p_group})) self.assertSetEqual(set(diff.nodes), set()) - self.assertSetEqual(set(diff.literals), {self.l_root, self.l_string}) + self.assertSetEqual(set(diff.literals), {self.l_string}) self.assertSetEqual(set(diff.predicates), {self.p_comment}) # difference considers extra nodes and literals diff = Schema({self.p_tag}, {self.n_unused}, {self.l_unused}).diff(Schema({self.p_tag})) self.assertSetEqual(set(diff.nodes), {self.n_unused}) - self.assertSetEqual(set(diff.literals), {self.l_root, self.l_unused}) + self.assertSetEqual(set(diff.literals), {self.l_unused}) self.assertSetEqual(set(diff.predicates), set()) # difference considers inconsistent types diff = Schema({self.p_tag}, {self.n_unused}, {self.l_unused}).diff( Schema({self.p_tag}, {types.Node(ns.bsfs.Unused, None)}, {types.Literal(ns.xsd.boolean, None)})) self.assertSetEqual(set(diff.nodes), {self.n_unused}) - self.assertSetEqual(set(diff.literals), {self.l_root, self.l_unused}) + self.assertSetEqual(set(diff.literals), {self.l_unused}) self.assertSetEqual(set(diff.predicates), set()) # __sub__ is an alias for diff diff = Schema({self.p_comment}, {self.n_unused}, {self.l_unused}) - Schema({self.p_group}) self.assertSetEqual(set(diff.nodes), {self.n_unused}) - self.assertSetEqual(set(diff.literals), {self.l_root, self.l_string, self.l_unused}) + self.assertSetEqual(set(diff.literals), {self.l_string, self.l_unused}) self.assertSetEqual(set(diff.predicates), {self.p_comment}) # __sub__ only accepts Schema instances class Foo(): pass @@ -547,196 +562,6 @@ class TestSchema(unittest.TestCase): self.assertFalse(schema.has_predicate(ns.bse.mimetype)) self.assertFalse(schema.has_predicate(self.p_root)) - def test_empty(self): - self.assertEqual(Schema.Empty(), Schema( - [types.Predicate(ns.bsfs.Predicate, None, types.Node(ns.bsfs.Node, None), None, False)], - [types.Node(ns.bsfs.Node, None)], - [types.Literal(ns.bsfs.Literal, None)], - )) - - def test_from_string(self): - # from_string creates a schema - self.assertEqual( - Schema(self.predicates, self.nodes, self.literals), - Schema.from_string(self.schema_str)) - - # schema contains at least the root types - self.assertEqual(Schema.from_string(''), Schema({self.p_root}, {self.n_root}, {self.l_root})) - - # custom example - self.assertEqual( - Schema({types.Predicate(ns.bsfs.Predicate, None, self.n_root, None, False).get_child( - ns.bse.filename, self.n_ent, self.l_string, False)}), - Schema.from_string(''' - prefix rdfs: - prefix xsd: - prefix bsfs: - prefix bse: - - bsfs:Entity rdfs:subClassOf bsfs:Node . - xsd:string rdfs:subClassOf bsfs:Literal . - - bse:filename rdfs:subClassOf bsfs:Predicate ; - rdfs:domain bsfs:Entity ; - rdfs:range xsd:string ; - bsfs:unique "false"^^xsd:boolean . - ''')) - - # all nodes must be defined - self.assertRaises(errors.ConsistencyError, Schema.from_string, ''' - prefix rdfs: - prefix xsd: - prefix bsfs: - prefix bse: - - xsd:string rdfs:subClassOf bsfs:Literal . - - bse:filename rdfs:subClassOf bsfs:Predicate ; - rdfs:domain bsfs:Entity ; - rdfs:range xsd:string ; - bsfs:unique "false"^^xsd:boolean . - ''') - - # all literals must be defined - self.assertRaises(errors.ConsistencyError, Schema.from_string, ''' - prefix rdfs: - prefix xsd: - prefix bsfs: - prefix bse: - - bsfs:Entity rdfs:subClassOf bsfs:Node . - - bse:filename rdfs:subClassOf bsfs:Predicate ; - rdfs:domain bsfs:Entity ; - rdfs:range xsd:string ; - bsfs:unique "false"^^xsd:boolean . - ''') - - # must not have circular dependencies - self.assertRaises(errors.ConsistencyError, Schema.from_string, ''' - prefix rdfs: - prefix bsfs: - bsfs:Entity rdfs:subClassOf bsfs:Node . - # ah, a nice circular dependency - bsfs:Entity rdfs:subClassOf bsfs:Document . - bsfs:Document rdfs:subClassOf bsfs:Entity . - bsfs:PDF rdfs:subClassOf bsfs:Document . - ''') - - # range must be a node or literal - self.assertRaises(errors.ConsistencyError, Schema.from_string, ''' - prefix rdfs: - prefix xsd: - prefix bsfs: - prefix bse: - - bsfs:Entity rdfs:subClassOf bsfs:Node . - - bse:filename rdfs:subClassOf bsfs:Predicate ; - rdfs:domain bsfs:Entity ; - rdfs:range xsd:string ; - bsfs:unique "false"^^xsd:boolean . - ''') - self.assertRaises(errors.ConsistencyError, Schema.from_string, ''' - prefix rdfs: - prefix xsd: - prefix bsfs: - prefix bse: - - bsfs:Entity rdfs:subClassOf bsfs:Node . - - bse:filename rdfs:subClassOf bsfs:Predicate ; - rdfs:domain bsfs:Entity ; - rdfs:range bsfs:Foo ; - bsfs:unique "false"^^xsd:boolean . - ''') - self.assertRaises(errors.ConsistencyError, Schema.from_string, ''' - prefix rdfs: - prefix xsd: - prefix bsfs: - prefix bse: - - bsfs:Entity rdfs:subClassOf bsfs:Node . - - bse:filename rdfs:subClassOf bsfs:Predicate ; - rdfs:domain bsfs:Entity ; - rdfs:range bsfs:Predicate ; - bsfs:unique "false"^^xsd:boolean . - ''') - - # must be consistent - self.assertRaises(errors.ConsistencyError, Schema.from_string, ''' - prefix rdfs: - prefix xsd: - prefix bsfs: - - bsfs:Entity rdfs:subClassOf bsfs:Node . - bsfs:Document rdfs:subClassOf bsfs:Node . - bsfs:Document rdfs:subClassOf bsfs:Entity. - ''') - self.assertRaises(errors.ConsistencyError, Schema.from_string, ''' - prefix rdfs: - prefix xsd: - prefix bsfs: - - xsd:string rdfs:subClassOf bsfs:Literal . - xsd:name rdfs:subClassOf bsfs:Literal . - xsd:name rdfs:subClassOf xsd:string . - ''') - self.assertRaises(errors.ConsistencyError, Schema.from_string, ''' - prefix rdfs: - prefix xsd: - prefix bsfs: - prefix bse: - - bsfs:Entity rdfs:subClassOf bsfs:Node . - - bse:foo rdfs:subClassOf bsfs:Predicate ; - rdfs:domain bsfs:Node ; - rdfs:range bsfs:Node ; - bsfs:unique "false"^^xsd:boolean . - - bse:foo rdfs:subClassOf bsfs:Predicate ; - rdfs:domain bsfs:Entity . - - ''') - self.assertRaises(errors.ConsistencyError, Schema.from_string, ''' - prefix rdfs: - prefix xsd: - prefix bsfs: - prefix bse: - - bsfs:Entity rdfs:subClassOf bsfs:Node . - - bse:foo rdfs:subClassOf bsfs:Predicate ; - rdfs:domain bsfs:Node ; - rdfs:range bsfs:Node ; - bsfs:unique "false"^^xsd:boolean . - - bse:foo rdfs:subClassOf bsfs:Predicate ; - rdfs:range bsfs:Entity . - - ''') - self.assertRaises(errors.ConsistencyError, Schema.from_string, ''' - prefix rdfs: - prefix xsd: - prefix bsfs: - prefix bse: - - bsfs:Entity rdfs:subClassOf bsfs:Node . - - bse:foo rdfs:subClassOf bsfs:Predicate ; - rdfs:domain bsfs:Node ; - rdfs:range bsfs:Node ; - bsfs:unique "false"^^xsd:boolean . - - bse:foo rdfs:subClassOf bsfs:Predicate ; - bsfs:unique "true"^^xsd:boolean . - - ''') - - - ## main ## if __name__ == '__main__': diff --git a/test/schema/test_serialize.py b/test/schema/test_serialize.py new file mode 100644 index 0000000..7392cc0 --- /dev/null +++ b/test/schema/test_serialize.py @@ -0,0 +1,1007 @@ +""" + +Part of the tagit test suite. +A copy of the license is provided with the project. +Author: Matthias Baumgartner, 2022 +""" +# imports +import unittest + +# bsfs imports +from bsfs.namespace import ns +from bsfs.schema import Schema, types +from bsfs.utils import errors, URI + +# objects to test +from bsfs.schema.serialize import from_string, to_string + + +## code ## + +class TestFromString(unittest.TestCase): + + def test_empty(self): + # schema contains at least the root types + self.assertEqual(from_string(''), Schema()) + + + def test_circular_dependency(self): + # must not have circular dependencies + self.assertRaises(errors.ConsistencyError, from_string, ''' + prefix rdfs: + prefix bsfs: + bsfs:Entity rdfs:subClassOf bsfs:Node . + # ah, a nice circular dependency + bsfs:Entity rdfs:subClassOf bsfs:Document . + bsfs:Document rdfs:subClassOf bsfs:Entity . + bsfs:PDF rdfs:subClassOf bsfs:Document . + ''') + + + def test_node(self): + # all nodes must be defined + self.assertRaises(errors.ConsistencyError, from_string, ''' + prefix rdfs: + prefix xsd: + prefix bsfs: + prefix bse: + + xsd:string rdfs:subClassOf bsfs:Literal . + + bse:filename rdfs:subClassOf bsfs:Predicate ; + rdfs:domain bsfs:Entity ; + rdfs:range xsd:string ; + bsfs:unique "false"^^xsd:boolean . + ''') + + # node definitions must be consistent (cannot re-use a node uri) + self.assertRaises(errors.ConsistencyError, from_string, ''' + prefix rdfs: + prefix xsd: + prefix bsfs: + + bsfs:Entity rdfs:subClassOf bsfs:Node . + bsfs:Document rdfs:subClassOf bsfs:Node . + bsfs:Document rdfs:subClassOf bsfs:Entity . # conflicting parent + ''') + + # additional nodes can be defined + n_unused = types.ROOT_NODE.get_child(ns.bsfs.unused) + self.assertEqual(Schema({}, {n_unused}), from_string(''' + prefix rdfs: + prefix xsd: + prefix bsfs: + prefix bse: + + bsfs:unused rdfs:subClassOf bsfs:Node . # unused symbol + ''')) + + # a node can have multiple children + n_ent = types.ROOT_NODE.get_child(ns.bsfs.Entity) + n_tag = types.ROOT_NODE.get_child(ns.bsfs.Tag) + n_doc = n_ent.get_child(ns.bsfs.Document) + n_image = n_ent.get_child(ns.bsfs.Image) + self.assertEqual(Schema({}, {n_ent, n_tag, n_doc, n_image}), from_string(''' + prefix rdfs: + prefix xsd: + prefix bsfs: + prefix bse: + + # nodes inherit from same parent + bsfs:Entity rdfs:subClassOf bsfs:Node . + bsfs:Tag rdfs:subClassOf bsfs:Node . + + # nodes inherit from same parent + bsfs:Document rdfs:subClassOf bsfs:Entity . + bsfs:Image rdfs:subClassOf bsfs:Entity . + ''')) + + # additional nodes can be defined and used + n_ent = types.ROOT_NODE.get_child(ns.bsfs.Entity) + l_string = types.ROOT_LITERAL.get_child(ns.xsd.string) + p_filename = types.ROOT_PREDICATE.get_child(ns.bse.filename, + n_ent, l_string, False) + self.assertEqual(Schema({p_filename}), from_string(''' + prefix rdfs: + prefix xsd: + prefix bsfs: + prefix bse: + + bsfs:Entity rdfs:subClassOf bsfs:Node . + xsd:string rdfs:subClassOf bsfs:Literal . + + bse:filename rdfs:subClassOf bsfs:Predicate ; + rdfs:domain bsfs:Entity ; + rdfs:range xsd:string ; + bsfs:unique "false"^^xsd:boolean . + ''')) + + # nodes can have annotations + self.assertDictEqual(from_string(''' + prefix rdfs: + prefix xsd: + prefix bsfs: + + bsfs:Entity rdfs:subClassOf bsfs:Node . + + ''').node(ns.bsfs.Entity).annotations, {}) + self.assertDictEqual(from_string(''' + prefix rdfs: + prefix xsd: + prefix bsfs: + + bsfs:Entity rdfs:subClassOf bsfs:Node ; + rdfs:label "hello world"^^xsd:string ; + bsfs:foo "1234"^^xsd:integer . + + ''').node(ns.bsfs.Entity).annotations, { + ns.rdfs.label: 'hello world', + ns.bsfs.foo: 1234, + }) + + + def test_literal(self): + # all literals must be defined + self.assertRaises(errors.ConsistencyError, from_string, ''' + prefix rdfs: + prefix xsd: + prefix bsfs: + prefix bse: + + bsfs:Entity rdfs:subClassOf bsfs:Node . + + bse:filename rdfs:subClassOf bsfs:Predicate ; + rdfs:domain bsfs:Entity ; + rdfs:range xsd:string ; # undefined symbol + bsfs:unique "false"^^xsd:boolean . + ''') + + # literal definitions must be consistent (cannot re-use a literal uri) + self.assertRaises(errors.ConsistencyError, from_string, ''' + prefix rdfs: + prefix xsd: + prefix bsfs: + + xsd:string rdfs:subClassOf bsfs:Literal . + xsd:name rdfs:subClassOf bsfs:Literal . + xsd:name rdfs:subClassOf xsd:string . # conflicting parent + ''') + + # additional literals can be defined + l_unused = types.ROOT_LITERAL.get_child(ns.xsd.unused) + self.assertEqual(Schema({}, {}, {l_unused}), from_string(''' + prefix rdfs: + prefix xsd: + prefix bsfs: + prefix bse: + + xsd:unused rdfs:subClassOf bsfs:Literal . # unused symbol + ''')) + + # a literal can have multiple children + l_string = types.ROOT_LITERAL.get_child(ns.xsd.string) + l_integer = types.ROOT_LITERAL.get_child(ns.xsd.integer) + l_unsigned = l_integer.get_child(ns.xsd.unsigned) + l_signed = l_integer.get_child(ns.xsd.signed) + self.assertEqual(Schema({}, {}, {l_string, l_integer, l_unsigned, l_signed}), from_string(''' + prefix rdfs: + prefix xsd: + prefix bsfs: + prefix bse: + + # literals inherit from same parent + xsd:string rdfs:subClassOf bsfs:Literal . + xsd:integer rdfs:subClassOf bsfs:Literal . + + # literals inherit from same parent + xsd:unsigned rdfs:subClassOf xsd:integer . + xsd:signed rdfs:subClassOf xsd:integer . + ''')) + + # additional literals can be defined and used + n_ent = types.ROOT_NODE.get_child(ns.bsfs.Entity) + l_string = types.ROOT_LITERAL.get_child(ns.xsd.string) + p_filename = types.ROOT_PREDICATE.get_child(ns.bse.filename, + n_ent, l_string, False) + self.assertEqual(Schema({p_filename}), from_string(''' + prefix rdfs: + prefix xsd: + prefix bsfs: + prefix bse: + + bsfs:Entity rdfs:subClassOf bsfs:Node . + xsd:string rdfs:subClassOf bsfs:Literal . + + bse:filename rdfs:subClassOf bsfs:Predicate ; + rdfs:domain bsfs:Entity ; + rdfs:range xsd:string ; + bsfs:unique "false"^^xsd:boolean . + ''')) + + # literals can have annotations + self.assertDictEqual(from_string(''' + prefix rdfs: + prefix xsd: + prefix bsfs: + + xsd:string rdfs:subClassOf bsfs:Literal . + + ''').literal(ns.xsd.string).annotations, {}) + self.assertDictEqual(from_string(''' + prefix rdfs: + prefix xsd: + prefix bsfs: + + xsd:string rdfs:subClassOf bsfs:Literal ; + rdfs:label "hello world"^^xsd:string ; + bsfs:foo "1234"^^xsd:integer . + + ''').literal(ns.xsd.string).annotations, { + ns.rdfs.label: 'hello world', + ns.bsfs.foo: 1234, + }) + + + def test_predicate(self): + # domain must be defined + self.assertRaises(errors.ConsistencyError, from_string, ''' + prefix rdfs: + prefix xsd: + prefix bsfs: + prefix bse: + + xsd:string rdfs:subClassOf bsfs:Literal . + + bse:filename rdfs:subClassOf bsfs:Predicate ; + rdfs:domain bsfs:Entity ; # undefined symbol + rdfs:range xsd:string ; + bsfs:unique "false"^^xsd:boolean . + ''') + # domain cannot be a literal + self.assertRaises(errors.ConsistencyError, from_string, ''' + prefix rdfs: + prefix xsd: + prefix bsfs: + prefix bse: + + bsfs:Entity rdfs:subClassOf bsfs:Literal . + xsd:string rdfs:subClassOf bsfs:Literal . + + bse:filename rdfs:subClassOf bsfs:Predicate ; + rdfs:domain bsfs:Entity ; # literal instead of node + rdfs:range xsd:string ; + bsfs:unique "false"^^xsd:boolean . + ''') + + # range must be defined + self.assertRaises(errors.ConsistencyError, from_string, ''' + prefix rdfs: + prefix xsd: + prefix bsfs: + prefix bse: + + bsfs:Entity rdfs:subClassOf bsfs:Node . + + bse:filename rdfs:subClassOf bsfs:Predicate ; + rdfs:domain bsfs:Entity ; + rdfs:range xsd:string ; # undefined symbol + bsfs:unique "false"^^xsd:boolean . + ''') + # range must be defined + self.assertRaises(errors.ConsistencyError, from_string, ''' + prefix rdfs: + prefix xsd: + prefix bsfs: + prefix bse: + + bsfs:Entity rdfs:subClassOf bsfs:Node . + + bse:filename rdfs:subClassOf bsfs:Predicate ; + rdfs:domain bsfs:Entity ; + rdfs:range bsfs:Foo ; # undefined symbol + bsfs:unique "false"^^xsd:boolean . + ''') + # range must be a node or a literal + self.assertRaises(errors.ConsistencyError, from_string, ''' + prefix rdfs: + prefix xsd: + prefix bsfs: + prefix bse: + + bsfs:Entity rdfs:subClassOf bsfs:Node . + + bse:filename rdfs:subClassOf bsfs:Predicate ; + rdfs:domain bsfs:Entity ; + rdfs:range bsfs:Predicate ; # invalid symbol + bsfs:unique "false"^^xsd:boolean . + ''') + + # additional predicates can be defined + n_ent = types.ROOT_NODE.get_child(ns.bsfs.Entity) + l_string = types.ROOT_LITERAL.get_child(ns.xsd.string) + p_comment = types.ROOT_PREDICATE.get_child(ns.bse.comment, domain=n_ent, range=l_string, unique=False) + self.assertEqual(Schema({p_comment}), from_string(''' + prefix rdfs: + prefix xsd: + prefix bsfs: + prefix bse: + + bsfs:Entity rdfs:subClassOf bsfs:Node . + xsd:string rdfs:subClassOf bsfs:Literal . + + bse:comment rdfs:subClassOf bsfs:Predicate ; + rdfs:domain bsfs:Entity ; + rdfs:range xsd:string ; + bsfs:unique "false"^^xsd:boolean . + ''')) + + # predicates inherit properties from parents + n_ent = types.ROOT_NODE.get_child(ns.bsfs.Entity) + l_string = types.ROOT_LITERAL.get_child(ns.xsd.string) + p_annotation = types.ROOT_PREDICATE.get_child(ns.bsfs.Annotation, domain=n_ent, range=l_string) + p_comment = p_annotation.get_child(ns.bse.comment, unique=True) + self.assertEqual(Schema({p_comment}), from_string(''' + prefix rdfs: + prefix xsd: + prefix bsfs: + prefix bse: + + bsfs:Entity rdfs:subClassOf bsfs:Node . + xsd:string rdfs:subClassOf bsfs:Literal . + + bsfs:Annotation rdfs:subClassOf bsfs:Predicate ; + rdfs:domain bsfs:Entity ; + rdfs:range xsd:string . + + bse:comment rdfs:subClassOf bsfs:Annotation ; # inherits domain/range from bsfs:Annotation + bsfs:unique "true"^^xsd:boolean . + ''')) + + # we can define partial predicates (w/o specifying a usable range) + n_ent = types.ROOT_NODE.get_child(ns.bsfs.Entity) + l_string = types.ROOT_LITERAL.get_child(ns.xsd.string) + p_annotation = types.ROOT_PREDICATE.get_child(ns.bsfs.Annotation, domain=n_ent) + p_comment = p_annotation.get_child(ns.bse.comment, range=l_string, unique=False) + self.assertEqual(Schema({p_comment}), from_string(''' + prefix rdfs: + prefix xsd: + prefix bsfs: + prefix bse: + + bsfs:Entity rdfs:subClassOf bsfs:Node . + xsd:string rdfs:subClassOf bsfs:Literal . + + bsfs:Annotation rdfs:subClassOf bsfs:Predicate ; # derive predicate w/o setting range + rdfs:domain bsfs:Entity . + + bse:comment rdfs:subClassOf bsfs:Annotation ; # derived predicate w/ setting range + rdfs:range xsd:string ; + bsfs:unique "false"^^xsd:boolean . + ''')) + + # predicate definition can be split across multiple statements. + # statements can be repeated + n_ent = types.ROOT_NODE.get_child(ns.bsfs.Entity) + p_foo = types.ROOT_PREDICATE.get_child(ns.bse.foo, domain=n_ent, range=types.ROOT_NODE, unique=True) + self.assertEqual(Schema({p_foo}), from_string(''' + prefix rdfs: + prefix xsd: + prefix bsfs: + prefix bse: + + bsfs:Entity rdfs:subClassOf bsfs:Node . + + bse:foo rdfs:subClassOf bsfs:Predicate ; + rdfs:range bsfs:Node ; + bsfs:unique "true"^^xsd:boolean . + + bse:foo rdfs:subClassOf bsfs:Predicate ; + rdfs:domain bsfs:Entity. + ''')) + + # domain must be a subtype of parent's domain + n_ent = types.ROOT_NODE.get_child(ns.bsfs.Entity) + n_image = n_ent.get_child(ns.bsfs.Image) + p_foo = types.ROOT_PREDICATE.get_child(ns.bse.foo, domain=types.ROOT_NODE) + p_bar = p_foo.get_child(ns.bse.bar, domain=n_ent) + p_foobar = p_bar.get_child(ns.bse.foobar, domain=n_image) + self.assertEqual(Schema({p_foobar}), from_string(''' + prefix rdfs: + prefix xsd: + prefix bsfs: + prefix bse: + + bsfs:Entity rdfs:subClassOf bsfs:Node . + bsfs:Image rdfs:subClassOf bsfs:Entity . + + bse:foo rdfs:subClassOf bsfs:Predicate ; + rdfs:domain bsfs:Node . + bse:bar rdfs:subClassOf bse:foo ; + rdfs:domain bsfs:Entity . + bse:foobar rdfs:subClassOf bse:bar ; + rdfs:domain bsfs:Image . + ''')) + self.assertRaises(errors.ConsistencyError, from_string, ''' + prefix rdfs: + prefix xsd: + prefix bsfs: + prefix bse: + + bsfs:Entity rdfs:subClassOf bsfs:Node . + bsfs:Image rdfs:subClassOf bsfs:Entity . + + bse:foo rdfs:subClassOf bsfs:Predicate ; + rdfs:domain bsfs:Image . + bse:bar rdfs:subClassOf bse:foo ; + rdfs:domain bsfs:Entity . + bse:foobar rdfs:subClassOf bse:bar ; + rdfs:domain bsfs:Node . + ''') + + # range must be a subtype of parent's range + n_ent = types.ROOT_NODE.get_child(ns.bsfs.Entity) + n_image = n_ent.get_child(ns.bsfs.Image) + p_foo = types.ROOT_PREDICATE.get_child(ns.bse.foo, range=types.ROOT_NODE) + p_bar = p_foo.get_child(ns.bse.bar, range=n_ent) + p_foobar = p_bar.get_child(ns.bse.foobar, range=n_image) + self.assertEqual(Schema({p_foobar}), from_string(''' + prefix rdfs: + prefix xsd: + prefix bsfs: + prefix bse: + + bsfs:Entity rdfs:subClassOf bsfs:Node . + bsfs:Image rdfs:subClassOf bsfs:Entity . + + bse:foo rdfs:subClassOf bsfs:Predicate ; + rdfs:range bsfs:Node . + bse:bar rdfs:subClassOf bse:foo ; + rdfs:range bsfs:Entity . + bse:foobar rdfs:subClassOf bse:bar ; + rdfs:range bsfs:Image . + ''')) + self.assertRaises(errors.ConsistencyError, from_string, ''' + prefix rdfs: + prefix xsd: + prefix bsfs: + prefix bse: + + bsfs:Entity rdfs:subClassOf bsfs:Node . + bsfs:Image rdfs:subClassOf bsfs:Entity . + + bse:foo rdfs:subClassOf bsfs:Predicate ; + rdfs:range bsfs:Image . + bse:bar rdfs:subClassOf bse:foo ; + rdfs:range bsfs:Entity . + bse:foobar rdfs:subClassOf bse:bar ; + rdfs:range bsfs:Node . + ''') + + # cannot define the same predicate from multiple parents + self.assertRaises(errors.ConsistencyError, from_string, ''' + prefix rdfs: + prefix xsd: + prefix bsfs: + prefix bse: + + bsfs:Entity rdfs:subClassOf bsfs:Node . + bsfs:Annotation rdfs:subClassOf bsfs:Predicate . + + bse:foo rdfs:subClassOf bsfs:Predicate ; + rdfs:domain bsfs:Node ; + rdfs:range bsfs:Node ; + bsfs:unique "false"^^xsd:boolean . + + bse:foo rdfs:subClassOf bsfs:Annotation ; + rdfs:domain bsfs:Node ; + rdfs:range bsfs:Node ; + bsfs:unique "false"^^xsd:boolean . + + ''') + # cannot assign multiple conflicting domains to the same predicate + self.assertRaises(errors.ConsistencyError, from_string, ''' + prefix rdfs: + prefix xsd: + prefix bsfs: + prefix bse: + + bsfs:Entity rdfs:subClassOf bsfs:Node . + + bse:foo rdfs:subClassOf bsfs:Predicate ; + rdfs:domain bsfs:Node ; + rdfs:range bsfs:Node ; + bsfs:unique "false"^^xsd:boolean . + + bse:foo rdfs:subClassOf bsfs:Predicate ; + rdfs:domain bsfs:Entity . # conflicting domain + ''') + # cannot assign multiple conflicting ranges to the same predicate + self.assertRaises(errors.ConsistencyError, from_string, ''' + prefix rdfs: + prefix xsd: + prefix bsfs: + prefix bse: + + bsfs:Entity rdfs:subClassOf bsfs:Node . + + bse:foo rdfs:subClassOf bsfs:Predicate ; + rdfs:domain bsfs:Node ; + rdfs:range bsfs:Node ; + bsfs:unique "false"^^xsd:boolean . + + bse:foo rdfs:subClassOf bsfs:Predicate ; + rdfs:range bsfs:Entity . # conflicting range + ''') + # cannot assign multiple conflicting uniques to the same predicate + self.assertRaises(errors.ConsistencyError, from_string, ''' + prefix rdfs: + prefix xsd: + prefix bsfs: + prefix bse: + + bsfs:Entity rdfs:subClassOf bsfs:Node . + + bse:foo rdfs:subClassOf bsfs:Predicate ; + rdfs:domain bsfs:Node ; + rdfs:range bsfs:Node ; + bsfs:unique "false"^^xsd:boolean . + + bse:foo rdfs:subClassOf bsfs:Predicate ; + bsfs:unique "true"^^xsd:boolean . # conflicting unique + ''') + + # predicates can have annotations + self.assertDictEqual(from_string(''' + prefix rdfs: + prefix xsd: + prefix bsfs: + prefix bse: + + bse:comment rdfs:subClassOf bsfs:Predicate ; + rdfs:range bsfs:Node . + + ''').predicate(ns.bse.comment).annotations, {}) + self.assertDictEqual(from_string(''' + prefix rdfs: + prefix xsd: + prefix bsfs: + prefix bse: + + bse:comment rdfs:subClassOf bsfs:Predicate ; + rdfs:range bsfs:Node ; + rdfs:label "hello world"^^xsd:string ; + bsfs:foo "1234"^^xsd:integer . + + ''').predicate(ns.bse.comment).annotations, { + ns.rdfs.label: 'hello world', + ns.bsfs.foo: 1234, + }) + + + def test_feature(self): + # domain must be defined + self.assertRaises(errors.ConsistencyError, from_string, ''' + prefix rdfs: + prefix xsd: + prefix bsfs: + prefix bse: + + bsfs:Feature rdfs:subClassOf bsfs:Predicate . + bsfs:array rdfs:subClassOf bsfs:Literal . + + bse:colors rdfs:subClassOf bsfs:Feature ; + rdfs:domain bsfs:Entity ; # undefined symbol + rdfs:range bsfs:array ; + bsfs:unique "false"^^xsd:boolean . + ''') + # domain cannot be a literal + self.assertRaises(errors.ConsistencyError, from_string, ''' + prefix rdfs: + prefix xsd: + prefix bsfs: + prefix bse: + + bsfs:Feature rdfs:subClassOf bsfs:Predicate . + bsfs:Entity rdfs:subClassOf bsfs:Literal . + bsfs:array rdfs:subClassOf bsfs:Literal . + + bse:colors rdfs:subClassOf bsfs:Feature ; + rdfs:domain bsfs:Entity ; # literal instead of node + rdfs:range bsfs:array ; + bsfs:unique "false"^^xsd:boolean . + ''') + + # range must be defined + self.assertRaises(errors.ConsistencyError, from_string, ''' + prefix rdfs: + prefix xsd: + prefix bsfs: + prefix bse: + + bsfs:Feature rdfs:subClassOf bsfs:Predicate . + bsfs:Entity rdfs:subClassOf bsfs:Node . + + bse:colors rdfs:subClassOf bsfs:Feature ; + rdfs:domain bsfs:Entity ; + rdfs:range bsfs:array ; # undefined symbol + bsfs:unique "false"^^xsd:boolean . + ''') + # range must be defined + self.assertRaises(errors.ConsistencyError, from_string, ''' + prefix rdfs: + prefix xsd: + prefix bsfs: + prefix bse: + + bsfs:Feature rdfs:subClassOf bsfs:Predicate . + bsfs:Entity rdfs:subClassOf bsfs:Node . + + bse:colors rdfs:subClassOf bsfs:Feature ; + rdfs:domain bsfs:Entity ; + rdfs:range bsfs:Foo ; # undefined symbol + bsfs:unique "false"^^xsd:boolean . + ''') + # range must be a node or a literal + self.assertRaises(errors.ConsistencyError, from_string, ''' + prefix rdfs: + prefix xsd: + prefix bsfs: + prefix bse: + + bsfs:Feature rdfs:subClassOf bsfs:Predicate . + bsfs:Entity rdfs:subClassOf bsfs:Node . + + bse:colors rdfs:subClassOf bsfs:Feature ; + rdfs:domain bsfs:Entity ; + rdfs:range bsfs:Predicate ; # invalid symbol + bsfs:unique "false"^^xsd:boolean . + ''') + + # additional predicates can be defined + n_ent = types.ROOT_NODE.get_child(ns.bsfs.Entity) + l_array = types.ROOT_LITERAL.get_child(ns.bsfs.array) + p_comment = types.ROOT_FEATURE.get_child(ns.bse.colors, domain=n_ent, range=l_array, unique=False) + self.assertEqual(Schema({p_comment}), from_string(''' + prefix rdfs: + prefix xsd: + prefix bsfs: + prefix bse: + + bsfs:Feature rdfs:subClassOf bsfs:Predicate . + bsfs:Entity rdfs:subClassOf bsfs:Node . + bsfs:array rdfs:subClassOf bsfs:Literal . + + bse:colors rdfs:subClassOf bsfs:Feature ; + rdfs:domain bsfs:Entity ; + rdfs:range bsfs:array ; + bsfs:unique "false"^^xsd:boolean . + ''')) + + # features inherit properties from parents + n_ent = types.ROOT_NODE.get_child(ns.bsfs.Entity) + l_array = types.ROOT_LITERAL.get_child(ns.bsfs.array) + l_string = types.ROOT_LITERAL.get_child(ns.xsd.string) + p_annotation = types.ROOT_FEATURE.get_child(ns.bsfs.Annotation, domain=n_ent, range=l_array, + dimension=1234, dtype=ns.xsd.string) + p_comment = p_annotation.get_child(ns.bse.colors, unique=True) + self.assertEqual(Schema({p_comment}), from_string(''' + prefix rdfs: + prefix xsd: + prefix bsfs: + prefix bse: + + bsfs:Feature rdfs:subClassOf bsfs:Predicate . + bsfs:Entity rdfs:subClassOf bsfs:Node . + bsfs:array rdfs:subClassOf bsfs:Literal . + + bsfs:Annotation rdfs:subClassOf bsfs:Feature ; # inherits defaults from bsfs:Feature + rdfs:domain bsfs:Entity ; + rdfs:range bsfs:array ; + bsfs:dimension "1234"^^xsd:integer ; + bsfs:dtype xsd:string . + + bse:colors rdfs:subClassOf bsfs:Annotation ; # inherits domain/range/etc. from bsfs:Annotation + bsfs:unique "true"^^xsd:boolean . # overwrites bsfs:Predicate + ''')) + + # feature definition can be split across multiple statements. + # statements can be repeated + n_ent = types.ROOT_NODE.get_child(ns.bsfs.Entity) + p_foo = types.ROOT_FEATURE.get_child(ns.bse.foo, domain=n_ent, unique=True, + dimension=1234, dtype=ns.bsfs.f32) + self.assertEqual(Schema({p_foo}), from_string(''' + prefix rdfs: + prefix xsd: + prefix bsfs: + prefix bse: + + bsfs:Feature rdfs:subClassOf bsfs:Predicate . + bsfs:Entity rdfs:subClassOf bsfs:Node . + + bse:foo rdfs:subClassOf bsfs:Feature ; + bsfs:unique "true"^^xsd:boolean ; + bsfs:dimension "1234"^^xsd:integer . + + bse:foo rdfs:subClassOf bsfs:Feature ; + rdfs:domain bsfs:Entity ; + bsfs:dtype bsfs:f32 . + ''')) + + # cannot define the same feature from multiple parents + self.assertRaises(errors.ConsistencyError, from_string, ''' + prefix rdfs: + prefix xsd: + prefix bsfs: + prefix bse: + + bsfs:Feature rdfs:subClassOf bsfs:Predicate . + bsfs:Annotation rdfs:subClassOf bsfs:Feature . + bsfs:Entity rdfs:subClassOf bsfs:Node . + + bse:foo rdfs:subClassOf bsfs:Feature ; + rdfs:domain bsfs:Node ; + bsfs:unique "false"^^xsd:boolean . + + bse:foo rdfs:subClassOf bsfs:Annotation ; + rdfs:domain bsfs:Node ; + bsfs:unique "false"^^xsd:boolean . + + ''') + # cannot assign multiple conflicting domains to the same feature + self.assertRaises(errors.ConsistencyError, from_string, ''' + prefix rdfs: + prefix xsd: + prefix bsfs: + prefix bse: + + bsfs:Feature rdfs:subClassOf bsfs:Predicate . + bsfs:Entity rdfs:subClassOf bsfs:Node . + + bse:foo rdfs:subClassOf bsfs:Feature ; + rdfs:domain bsfs:Node ; + bsfs:unique "false"^^xsd:boolean . + + bse:foo rdfs:subClassOf bsfs:Feature ; + rdfs:domain bsfs:Entity . # conflicting domain + ''') + # cannot assign multiple conflicting ranges to the same feature + self.assertRaises(errors.ConsistencyError, from_string, ''' + prefix rdfs: + prefix xsd: + prefix bsfs: + prefix bse: + + bsfs:Feature rdfs:subClassOf bsfs:Predicate . + bsfs:Entity rdfs:subClassOf bsfs:Node . + bsfs:array rdfs:subClassOf bsfs:Literal . + bsfs:large_array rdfs:subClassOf bsfs:array . + bsfs:small_array rdfs:subClassOf bsfs:array . + + bse:foo rdfs:subClassOf bsfs:Feature ; + rdfs:domain bsfs:Node ; + rdfs:range bsfs:large_array ; + bsfs:unique "false"^^xsd:boolean . + + bse:foo rdfs:subClassOf bsfs:Feature ; + rdfs:range bsfs:small_array . # conflicting range + ''') + # cannot assign multiple conflicting uniques to the same feature + self.assertRaises(errors.ConsistencyError, from_string, ''' + prefix rdfs: + prefix xsd: + prefix bsfs: + prefix bse: + + bsfs:Feature rdfs:subClassOf bsfs:Predicate . + bsfs:Entity rdfs:subClassOf bsfs:Node . + + bse:foo rdfs:subClassOf bsfs:Feature ; + rdfs:domain bsfs:Node ; + rdfs:range bsfs:Node ; + bsfs:unique "false"^^xsd:boolean . + + bse:foo rdfs:subClassOf bsfs:Feature ; + bsfs:unique "true"^^xsd:boolean . # conflicting unique + ''') + # cannot assign multiple conflicting dimensions to the same feature + self.assertRaises(errors.ConsistencyError, from_string, ''' + prefix rdfs: + prefix xsd: + prefix bsfs: + prefix bse: + + bsfs:Feature rdfs:subClassOf bsfs:Predicate . + bsfs:Entity rdfs:subClassOf bsfs:Node . + + bse:foo rdfs:subClassOf bsfs:Feature ; + rdfs:domain bsfs:Node ; + bsfs:dimension "1234"^^xsd:integer . + + bse:foo rdfs:subClassOf bsfs:Feature ; + bsfs:dimension "4321"^^xsd:integer . # conflicting dimension + ''') + # cannot assign multiple conflicting dtypes to the same feature + self.assertRaises(errors.ConsistencyError, from_string, ''' + prefix rdfs: + prefix xsd: + prefix bsfs: + prefix bse: + + bsfs:Feature rdfs:subClassOf bsfs:Predicate . + bsfs:Entity rdfs:subClassOf bsfs:Node . + + bse:foo rdfs:subClassOf bsfs:Feature ; + rdfs:domain bsfs:Node ; + bsfs:dtype bsfs:f32 . + + bse:foo rdfs:subClassOf bsfs:Feature ; + bsfs:dtype bsfs:f16 . # conflicting dtype + ''') + # cannot assign multiple conflicting distance metrics to the same feature + self.assertRaises(errors.ConsistencyError, from_string, ''' + prefix rdfs: + prefix xsd: + prefix bsfs: + prefix bse: + + bsfs:Feature rdfs:subClassOf bsfs:Predicate . + bsfs:Entity rdfs:subClassOf bsfs:Node . + + bse:foo rdfs:subClassOf bsfs:Feature ; + rdfs:domain bsfs:Node ; + bsfs:distance bsfs:euclidean . + + bse:foo rdfs:subClassOf bsfs:Feature ; + bsfs:distance bsfs:cosine . # conflicting distance + ''') + + # features can have annotations + self.assertDictEqual(from_string(''' + prefix rdfs: + prefix xsd: + prefix bsfs: + prefix bse: + + bsfs:Feature rdfs:subClassOf bsfs:Predicate . + bse:colors rdfs:subClassOf bsfs:Feature ; + bsfs:dimension "1234"^^xsd:integer . + + ''').predicate(ns.bse.colors).annotations, {}) + self.assertDictEqual(from_string(''' + prefix rdfs: + prefix xsd: + prefix bsfs: + prefix bse: + + bsfs:Feature rdfs:subClassOf bsfs:Predicate . + bse:colors rdfs:subClassOf bsfs:Feature ; + bsfs:dimension "1234"^^xsd:integer ; + rdfs:label "hello world"^^xsd:string ; + bsfs:foo "1234"^^xsd:integer . + + ''').predicate(ns.bse.colors).annotations, { + ns.rdfs.label: 'hello world', + ns.bsfs.foo: 1234, + }) + + + def test_integration(self): + # nodes + n_ent = types.ROOT_NODE.get_child(ns.bsfs.Entity) + n_tag = types.ROOT_NODE.get_child(ns.bsfs.Tag) + n_image = n_ent.get_child(ns.bsfs.Image) + # literals + l_string = types.ROOT_LITERAL.get_child(ns.xsd.string) + l_array = types.ROOT_LITERAL.get_child(ns.bsfs.array) + l_integer = types.ROOT_LITERAL.get_child(ns.xsd.integer) + l_boolean = types.ROOT_LITERAL.get_child(ns.xsd.boolean) + # predicates + p_annotation = types.ROOT_PREDICATE.get_child(ns.bsfs.Annotation) + p_tag = types.ROOT_PREDICATE.get_child(ns.bse.tag, domain=n_ent, range=n_tag) + p_group = p_tag.get_child(ns.bse.group, domain=n_image, unique=True) + p_comment = p_annotation.get_child(ns.bse.comment, range=l_string) + # features + f_colors = types.ROOT_FEATURE.get_child(URI('http://bsfs.ai/schema/Feature/colors_spatial'), + domain=n_ent, range=l_array, unique=True, dtype=ns.bsfs.f16, distance=ns.bsfs.euclidean) + f_colors1234 = f_colors.get_child(URI('http://bsfs.ai/schema/Feature/colors_spatial#1234'), dimension=1024) + f_colors4321 = f_colors.get_child(URI('http://bsfs.ai/schema/Feature/colors_spatial#4321'), dimension=2048) + # schema + ref = Schema( + {p_annotation, p_tag, p_group, p_comment, f_colors, f_colors1234, f_colors4321}, + {n_ent, n_tag, n_image}, + {l_string, l_integer, l_boolean}) + # load from string + gen = from_string(''' + # generic prefixes + prefix rdfs: + prefix xsd: + + # bsfs prefixes + prefix bsfs: + prefix bse: + + # nodes + bsfs:Entity rdfs:subClassOf bsfs:Node ; + rdfs:label "Principal node"^^xsd:string . + bsfs:Tag rdfs:subClassOf bsfs:Node ; + rdfs:label "Tag"^^xsd:string . + bsfs:Image rdfs:subClassOf bsfs:Entity . + + # literals + xsd:string rdfs:subClassOf bsfs:Literal ; + rdfs:label "A sequence of characters"^^xsd:string . + bsfs:array rdfs:subClassOf bsfs:Literal . + xsd:integer rdfs:subClassOf bsfs:Literal . + xsd:boolean rdfs:subClassOf bsfs:Literal . + + # abstract predicates + bsfs:Annotation rdfs:subClassOf bsfs:Predicate ; + rdfs:label "node annotation"^^xsd:string . + bsfs:Feature rdfs:subClassOf bsfs:Predicate . + + # feature instances + rdfs:subClassOf bsfs:Feature ; + rdfs:domain bsfs:Entity ; + rdfs:range bsfs:array ; + bsfs:unique "true"^^xsd:boolean ; + bsfs:dtype bsfs:f16 ; + bsfs:distance bsfs:euclidean ; + # annotations + rdfs:label "ColorsSpatial instances. Dimension depends on instance."^^xsd:string ; + bsfs:first_arg "1234"^^xsd:integer ; + bsfs:second_arg "hello world"^^xsd:string . + + rdfs:subClassOf ; + bsfs:dimension "1024"^^xsd:integer ; + rdfs:label "Main colors spatial instance"^^xsd:string . + + rdfs:subClassOf ; + bsfs:dimension "2048"^^xsd:integer . + + # predicate instances + bse:tag rdfs:subClassOf bsfs:Predicate ; + rdfs:domain bsfs:Entity ; + rdfs:range bsfs:Tag ; + bsfs:unique "false"^^xsd:boolean ; + # annotations + rdfs:label "connect entity to a tag"^^xsd:string . + + bse:group rdfs:subClassOf bse:tag ; # subtype of another predicate + rdfs:domain bsfs:Image ; + bsfs:unique "true"^^xsd:boolean . + + bse:comment rdfs:subClassOf bsfs:Annotation ; # subtype of abstract predicate + rdfs:domain bsfs:Node ; + rdfs:range xsd:string ; + bsfs:unique "false"^^xsd:boolean . + + ''') + # schemas are equal + self.assertEqual(ref, gen) + # check annotations + self.assertDictEqual(gen.node(ns.bsfs.Entity).annotations, {ns.rdfs.label: 'Principal node'}) + self.assertDictEqual(gen.node(ns.bsfs.Tag).annotations, {ns.rdfs.label: 'Tag'}) + self.assertDictEqual(gen.literal(ns.xsd.string).annotations, {ns.rdfs.label: 'A sequence of characters'}) + self.assertDictEqual(gen.predicate(ns.bsfs.Annotation).annotations, {ns.rdfs.label: 'node annotation'}) + self.assertDictEqual(gen.predicate(URI('http://bsfs.ai/schema/Feature/colors_spatial')).annotations, { + ns.rdfs.label: 'ColorsSpatial instances. Dimension depends on instance.', + ns.bsfs.first_arg: 1234, + ns.bsfs.second_arg: 'hello world', + }) + self.assertDictEqual(gen.predicate(URI('http://bsfs.ai/schema/Feature/colors_spatial#1234')).annotations, { + ns.rdfs.label: 'Main colors spatial instance'}) + self.assertDictEqual(gen.predicate(ns.bse.tag).annotations, {ns.rdfs.label: 'connect entity to a tag'}) + + + +class TestToString(unittest.TestCase): + def test_stub(self): + raise NotImplementedError() + + +## main ## + +if __name__ == '__main__': + unittest.main() + +## EOF ## diff --git a/test/schema/test_types.py b/test/schema/test_types.py index 4a49e6e..af47f0d 100644 --- a/test/schema/test_types.py +++ b/test/schema/test_types.py @@ -10,15 +10,17 @@ import unittest # bsfs imports from bsfs.namespace import ns +from bsfs.schema.types import ROOT_PREDICATE, ROOT_VERTEX, ROOT_FEATURE from bsfs.utils import errors # objects to test -from bsfs.schema.types import _Type, _Vertex, Node, Literal, Predicate +from bsfs.schema.types import _Type, _Vertex, Node, Literal, Predicate, Feature ## code ## class TestType(unittest.TestCase): + def test_parents(self): # create some types fst = _Type('First') @@ -31,7 +33,25 @@ class TestType(unittest.TestCase): self.assertListEqual(list(trd.parents()), [snd, fst]) self.assertListEqual(list(frd.parents()), [trd, snd, fst]) - def test_essentials(self): + def test_annotations(self): + # annotations can be empty + self.assertDictEqual(_Type('Foo', None).annotations, {}) + # annotations are stored + self.assertDictEqual(_Type('Foo', None, foo='bar', bar=123).annotations, { + 'foo': 'bar', + 'bar': 123}) + # comparison ignores annotations + self.assertEqual( + _Type('Foo', None, foo='bar', bar='foo'), + _Type('Foo', None, hello='world', foobar=1234)) + self.assertEqual( + hash(_Type('Foo', None, foo='bar', bar='foo')), + hash(_Type('Foo', None, hello='world', foobar=1234))) + # annotations can be passed to get_child + self.assertDictEqual(_Type('First', foo='bar').get_child('Second', bar='foo').annotations, { + 'bar': 'foo'}) + + def test_string_conversion(self): # type w/o parent self.assertEqual(str(_Type('Foo')), '_Type(Foo)') self.assertEqual(repr(_Type('Foo')), '_Type(Foo, None)') @@ -59,6 +79,9 @@ class TestType(unittest.TestCase): # type persists class Foo(_Type): pass self.assertEqual(Foo('First').get_child('Second'), Foo('Second', Foo('First'))) + # annotations are respected + self.assertDictEqual(_Type('First', foo='bar').get_child('Second', bar='foo').annotations, { + 'bar': 'foo'}) def test_equality(self): # equality depends on uri @@ -76,6 +99,13 @@ class TestType(unittest.TestCase): # comparison respects parent self.assertNotEqual(_Type('Foo', _Type('Bar')), _Type('Foo')) self.assertNotEqual(hash(_Type('Foo', _Type('Bar'))), hash(_Type('Foo'))) + # comparison ignores annotations + self.assertEqual( + _Type('Foo', None, foo='bar', bar='foo'), + _Type('Foo', None, hello='world', foobar=1234)) + self.assertEqual( + hash(_Type('Foo', None, foo='bar', bar='foo')), + hash(_Type('Foo', None, hello='world', foobar=1234))) def test_order(self): # create some types. @@ -109,25 +139,40 @@ class TestType(unittest.TestCase): self.assertFalse(bike > bicycle) self.assertFalse(bike >= bicycle) self.assertFalse(bike == bicycle) + + # can compare types along the class hierarchy class Foo(_Type): pass - foo = Foo(bike.uri, bike.parent) - # cannot compare different types - self.assertRaises(TypeError, operator.lt, foo, bike) - self.assertRaises(TypeError, operator.le, foo, bike) - self.assertRaises(TypeError, operator.gt, foo, bike) - self.assertRaises(TypeError, operator.ge, foo, bike) + foo = Foo('Foo', bike) + self.assertTrue(foo < bike) + self.assertTrue(foo <= bike) + self.assertFalse(foo > bike) + self.assertFalse(foo >= bike) # goes both ways - self.assertRaises(TypeError, operator.lt, bike, foo) - self.assertRaises(TypeError, operator.le, bike, foo) - self.assertRaises(TypeError, operator.gt, bike, foo) - self.assertRaises(TypeError, operator.ge, bike, foo) + self.assertFalse(bike < foo) + self.assertFalse(bike <= foo) + self.assertTrue(bike > foo) + self.assertTrue(bike >= foo) + # cannot compare unrelated classes + class Bar(_Type): pass + bar = Bar('Bar', bike) + self.assertRaises(TypeError, operator.lt, foo, bar) + self.assertRaises(TypeError, operator.le, foo, bar) + self.assertRaises(TypeError, operator.gt, foo, bar) + self.assertRaises(TypeError, operator.ge, foo, bar) + # goes both ways + self.assertRaises(TypeError, operator.lt, bar, foo) + self.assertRaises(TypeError, operator.le, bar, foo) + self.assertRaises(TypeError, operator.gt, bar, foo) + self.assertRaises(TypeError, operator.ge, bar, foo) + class TestPredicate(unittest.TestCase): def test_construction(self): # domain must be a node self.assertRaises(TypeError, Predicate, ns.bse.foo, 1234, None, True) self.assertRaises(TypeError, Predicate, ns.bse.foo, None, Literal(ns.bsfs.Foo, None), None, True) - # range must be None, a Literal, or a Node + # range must be a Literal, a Node, or the root Vertex + self.assertRaises(TypeError, Predicate, ns.bse.foo, None, Node(ns.bsfs.Node, None), None, True) self.assertRaises(TypeError, Predicate, ns.bse.foo, None, Node(ns.bsfs.Node, None), 1234, True) self.assertRaises(TypeError, Predicate, ns.bse.foo, None, Node(ns.bsfs.Node, None), _Vertex(ns.bsfs.Foo, None), True) self.assertRaises(TypeError, Predicate, ns.bse.foo, None, Node(ns.bsfs.Node, None), _Type(ns.bsfs.Foo, None), True) @@ -138,54 +183,52 @@ class TestPredicate(unittest.TestCase): n_root = Node(ns.bsfs.Node, None) n_ent = Node(ns.bsfs.Entity, Node(ns.bsfs.Node, None)) n_tag = Node(ns.bsfs.Tag, Node(ns.bsfs.Tag, None)) - root = Predicate( - uri=ns.bsfs.Predicate, - parent=None, + root = ROOT_PREDICATE + tag = Predicate( + uri=ns.bse.tag, + parent=root, domain=n_root, - range=None, + range=n_tag, unique=False, ) # instance is equal to itself - self.assertEqual(root, root) - self.assertEqual(hash(root), hash(root)) + self.assertEqual(tag, tag) + self.assertEqual(hash(tag), hash(tag)) # instance is equal to a clone - self.assertEqual(root, Predicate(ns.bsfs.Predicate, None, n_root, None, False)) - self.assertEqual(hash(root), hash(Predicate(ns.bsfs.Predicate, None, n_root, None, False))) + self.assertEqual(tag, Predicate(ns.bse.tag, root, n_root, n_tag, False)) + self.assertEqual(hash(tag), hash(Predicate(ns.bse.tag, root, n_root, n_tag, False))) # equality respects uri - self.assertNotEqual(root, Predicate(ns.bsfs.Alternative, None, n_root, None, False)) - self.assertNotEqual(hash(root), hash(Predicate(ns.bsfs.Alternative, None, n_root, None, False))) + self.assertNotEqual(tag, Predicate(ns.bsfs.Alternative, root, n_root, n_tag, False)) + self.assertNotEqual(hash(tag), hash(Predicate(ns.bsfs.Alternative, root, n_root, n_tag, False))) # equality respects parent - self.assertNotEqual(root, Predicate(ns.bsfs.Predicate, n_root, n_root, None, False)) - self.assertNotEqual(hash(root), hash(Predicate(ns.bsfs.Predicate, n_root, n_root, None, False))) + self.assertNotEqual(tag, Predicate(ns.bse.tag, n_root, n_root, n_tag, False)) + self.assertNotEqual(hash(tag), hash(Predicate(ns.bse.tag, n_root, n_root, n_tag, False))) # equality respects domain - self.assertNotEqual(root, Predicate(ns.bsfs.Predicate, None, n_ent, None, False)) - self.assertNotEqual(hash(root), hash(Predicate(ns.bsfs.Predicate, None, n_ent, None, False))) + self.assertNotEqual(tag, Predicate(ns.bse.tag, root, n_ent, n_tag, False)) + self.assertNotEqual(hash(tag), hash(Predicate(ns.bse.tag, root, n_ent, n_tag, False))) # equality respects range - self.assertNotEqual(root, Predicate(ns.bsfs.Predicate, None, n_root, n_root, False)) - self.assertNotEqual(hash(root), hash(Predicate(ns.bsfs.Predicate, None, n_root, n_root, False))) + self.assertNotEqual(tag, Predicate(ns.bse.tag, root, n_root, n_root, False)) + self.assertNotEqual(hash(tag), hash(Predicate(ns.bse.tag, root, n_root, n_root, False))) # equality respects unique - self.assertNotEqual(root, Predicate(ns.bsfs.Predicate, None, n_root, None, True)) - self.assertNotEqual(hash(root), hash(Predicate(ns.bsfs.Predicate, None, n_root, None, True))) + self.assertNotEqual(tag, Predicate(ns.bse.tag, root, n_root, n_tag, True)) + self.assertNotEqual(hash(tag), hash(Predicate(ns.bse.tag, root, n_root, n_tag, True))) def test_get_child(self): n_root = Node(ns.bsfs.Node, None) + l_root = Literal(ns.bsfs.Literal, None) n_ent = Node(ns.bsfs.Entity, Node(ns.bsfs.Node, None)) n_tag = Node(ns.bsfs.Tag, Node(ns.bsfs.Tag, None)) - root = Predicate( - uri=ns.bsfs.Predicate, - parent=None, - domain=n_root, - range=None, - unique=False, - ) + root = ROOT_PREDICATE tag = Predicate( - uri=ns.bsfs.Entity, + uri=ns.bse.tag, parent=root, domain=n_ent, range=n_tag, unique=False, ) + # get_child returns Predicate + self.assertIsInstance(tag.get_child(ns.bse.foo), Predicate) # uri is respected self.assertEqual(ns.bse.foo, tag.get_child(ns.bse.foo).uri) # domain is respected @@ -198,10 +241,17 @@ class TestPredicate(unittest.TestCase): self.assertEqual(n_tag, tag.get_child(ns.bse.foo, range=None).range) # unique is respected self.assertTrue(tag.get_child(ns.bse.foo, unique=True).unique) + # annotations are respected + self.assertDictEqual(tag.get_child(ns.bse.foo, foo='bar', bar=123).annotations, { + 'foo': 'bar', + 'bar': 123, + }) # domain is inherited from parent + self.assertEqual(n_root, root.get_child(ns.bse.foo).domain) self.assertEqual(n_ent, tag.get_child(ns.bse.foo).domain) # range is inherited from parent + self.assertEqual(ROOT_VERTEX, root.get_child(ns.bse.foo).range) self.assertEqual(n_tag, tag.get_child(ns.bse.foo).range) # uniqueness is inherited from parent self.assertFalse(tag.get_child(ns.bse.foo).unique) @@ -209,11 +259,118 @@ class TestPredicate(unittest.TestCase): # domain must be subtype of parent's domain self.assertRaises(errors.ConsistencyError, tag.get_child, ns.bse.foo, domain=n_root) self.assertRaises(errors.ConsistencyError, tag.get_child, ns.bse.foo, domain=Node(ns.bsfs.Image, n_root)) - # range cannot be None - self.assertRaises(ValueError, root.get_child, ns.bse.foo) # range must be subtype of parent's range self.assertRaises(errors.ConsistencyError, tag.get_child, ns.bse.foo, range=n_root) self.assertRaises(errors.ConsistencyError, tag.get_child, ns.bse.foo, range=Node(ns.bsfs.Image, n_root)) + self.assertRaises(TypeError, tag.get_child, ns.bse.foo, range=Literal(ns.bsfs.Tag, l_root)) + # range can be subtyped from ROOT_VERTEX to Node or Literal + self.assertEqual(n_root, root.get_child(ns.bse.foo, range=n_root).range) + self.assertEqual(l_root, root.get_child(ns.bse.foo, range=l_root).range) + + +class TestFeature(unittest.TestCase): + def test_construction(self): + n_root = Node(ns.bsfs.Node, None) + l_root = Literal(ns.bsfs.Literal, None) + # dimension, dtype, and distance are respected + feat = Feature(ns.bsfs.Feature, None, n_root, l_root, False, + 1234, ns.bsfs.float, ns.bsfs.euclidean) + self.assertEqual(1234, feat.dimension) + self.assertEqual(ns.bsfs.float, feat.dtype) + self.assertEqual(ns.bsfs.euclidean, feat.distance) + + def test_equality(self): + n_ent = Node(ns.bsfs.Entity, Node(ns.bsfs.Node, None)) + l_array = Literal(ns.bsfs.array, Literal(ns.bsfs.Literal, None)) + colors = Feature( + uri=ns.bse.colors, + parent=ROOT_FEATURE, + domain=n_ent, + range=l_array, + unique=False, + dimension=1234, + dtype=ns.bsfs.float, + distance=ns.bsfs.euclidean, + ) + # instance is equal to itself + self.assertEqual(colors, colors) + self.assertEqual(hash(colors), hash(colors)) + # instance is equal to a clone + self.assertEqual(colors, Feature(ns.bse.colors, ROOT_FEATURE, n_ent, l_array, False, 1234, ns.bsfs.float, ns.bsfs.euclidean)) + self.assertEqual(hash(colors), hash(Feature(ns.bse.colors, ROOT_FEATURE, n_ent, l_array, False, 1234, ns.bsfs.float, ns.bsfs.euclidean))) + # equality respects dimension + self.assertNotEqual(colors, Feature(ns.bse.colors, ROOT_FEATURE, n_ent, l_array, False, 4321, ns.bsfs.float, ns.bsfs.euclidean)) + self.assertNotEqual(hash(colors), hash(Feature(ns.bse.colors, ROOT_FEATURE, n_ent, l_array, False, 4321, ns.bsfs.float, ns.bsfs.euclidean))) + # equality respects dtype + self.assertNotEqual(colors, Feature(ns.bse.colors, ROOT_FEATURE, n_ent, l_array, False, 1234, ns.bsfs.integer, ns.bsfs.euclidean)) + self.assertNotEqual(hash(colors), hash(Feature(ns.bse.colors, ROOT_FEATURE, n_ent, l_array, False, 1234, ns.bsfs.integer, ns.bsfs.euclidean))) + # equality respects distance + self.assertNotEqual(colors, Feature(ns.bse.colors, ROOT_FEATURE, n_ent, l_array, False, 1234, ns.bsfs.float, ns.bsfs.cosine)) + self.assertNotEqual(hash(colors), hash(Feature(ns.bse.colors, ROOT_FEATURE, n_ent, l_array, False, 1234, ns.bsfs.float, ns.bsfs.cosine))) + + def test_get_child(self): + n_root = Node(ns.bsfs.Node, None) + n_ent = Node(ns.bsfs.Entity, n_root) + l_root = Literal(ns.bsfs.Literal, None) + l_array = Literal(ns.bsfs.array, l_root) + colors = Feature( + uri=ns.bse.colors, + parent=ROOT_FEATURE, + domain=n_ent, + range=l_array, + unique=False, + dimension=1234, + dtype=ns.bsfs.float, + distance=ns.bsfs.euclidean, + ) + + # get_child returns Feature + self.assertIsInstance(colors.get_child(ns.bse.foo), Feature) + # uri is respected + self.assertEqual(ns.bse.foo, colors.get_child(ns.bse.foo).uri) + # domain is respected + dom = Node(ns.bsfs.Image, n_ent) + self.assertEqual(dom, colors.get_child(ns.bse.foo, domain=dom).domain) + # range is respected + rng = Literal(ns.bse.foo, l_array) + self.assertEqual(rng, colors.get_child(ns.bse.foo, range=rng).range) + # cannot set range to None + self.assertEqual(l_array, colors.get_child(ns.bse.foo, range=None).range) + # unique is respected + self.assertTrue(colors.get_child(ns.bse.foo, unique=True).unique) + # dimension is respected + self.assertEqual(4321, colors.get_child(ns.bse.foo, dimension=4321).dimension) + # dtype is respected + self.assertEqual(ns.bsfs.integer, colors.get_child(ns.bse.foo, dtype=ns.bsfs.integer).dtype) + # distance is respected + self.assertEqual(ns.bsfs.cosine, colors.get_child(ns.bse.foo, distance=ns.bsfs.cosine).distance) + # annotations are respected + self.assertDictEqual(colors.get_child(ns.bse.foo, foo='bar', bar=123).annotations, { + 'foo': 'bar', + 'bar': 123, + }) + + # domain is inherited from parent + self.assertEqual(n_root, ROOT_FEATURE.get_child(ns.bse.foo).domain) + self.assertEqual(n_ent, colors.get_child(ns.bse.foo).domain) + # range is inherited from parent + self.assertEqual(l_array, colors.get_child(ns.bse.foo).range) + # uniqueness is inherited from parent + self.assertFalse(colors.get_child(ns.bse.foo).unique) + # dimension is inherited from parent + self.assertEqual(1234, colors.get_child(ns.bse.foo).dimension) + # dtype is inherited from parent + self.assertEqual(ns.bsfs.float, colors.get_child(ns.bse.foo).dtype) + # distance is inherited from parent + self.assertEqual(ns.bsfs.euclidean, colors.get_child(ns.bse.foo).distance) + + # domain must be subtype of parent's domain + self.assertRaises(errors.ConsistencyError, colors.get_child, ns.bse.foo, domain=n_root) + self.assertRaises(errors.ConsistencyError, colors.get_child, ns.bse.foo, domain=Node(ns.bsfs.Image, n_root)) + # range must be subtype of parent's range + self.assertRaises(errors.ConsistencyError, colors.get_child, ns.bse.foo, range=Literal(ns.bsfs.Literal, None)) + self.assertRaises(errors.ConsistencyError, colors.get_child, ns.bse.foo, range=Literal(ns.bsfs.foo, Literal(ns.bsfs.Literal, None))) + self.assertRaises(TypeError, colors.get_child, ns.bse.foo, range=Node(ns.bsfs.Tag, n_root)) ## main ## @@ -222,4 +379,3 @@ if __name__ == '__main__': unittest.main() ## EOF ## - -- cgit v1.2.3 From 1ffb815f25b9f7db7b946f9db436974a687cf818 Mon Sep 17 00:00:00 2001 From: Matthias Baumgartner Date: Thu, 12 Jan 2023 08:28:14 +0100 Subject: folder rename due to python import conflict --- test/query/ast/__init__.py | 0 test/query/ast/test_filter_.py | 480 ------------------------------------ test/query/ast_test/__init__.py | 0 test/query/ast_test/test_filter_.py | 480 ++++++++++++++++++++++++++++++++++++ 4 files changed, 480 insertions(+), 480 deletions(-) delete mode 100644 test/query/ast/__init__.py delete mode 100644 test/query/ast/test_filter_.py create mode 100644 test/query/ast_test/__init__.py create mode 100644 test/query/ast_test/test_filter_.py diff --git a/test/query/ast/__init__.py b/test/query/ast/__init__.py deleted file mode 100644 index e69de29..0000000 diff --git a/test/query/ast/test_filter_.py b/test/query/ast/test_filter_.py deleted file mode 100644 index 4f69bdc..0000000 --- a/test/query/ast/test_filter_.py +++ /dev/null @@ -1,480 +0,0 @@ -""" - -Part of the tagit test suite. -A copy of the license is provided with the project. -Author: Matthias Baumgartner, 2022 -""" -# imports -import unittest - -# bsfs imports -from bsfs.namespace import ns -from bsfs.utils import URI - -# objects to test -from bsfs.query.ast.filter_ import _Expression, FilterExpression, PredicateExpression -from bsfs.query.ast.filter_ import _Branch, Any, All -from bsfs.query.ast.filter_ import _Agg, And, Or -from bsfs.query.ast.filter_ import Not, Has -from bsfs.query.ast.filter_ import _Value, Is, Equals, Substring, StartsWith, EndsWith -from bsfs.query.ast.filter_ import _Bounded, LessThan, GreaterThan -from bsfs.query.ast.filter_ import Predicate, OneOf -from bsfs.query.ast.filter_ import IsIn, IsNotIn - - -## code ## - -class TestExpression(unittest.TestCase): - def test_essentials(self): - # comparison - self.assertEqual(_Expression(), _Expression()) - self.assertEqual(FilterExpression(), FilterExpression()) - self.assertEqual(PredicateExpression(), PredicateExpression()) - self.assertEqual(hash(_Expression()), hash(_Expression())) - self.assertEqual(hash(FilterExpression()), hash(FilterExpression())) - self.assertEqual(hash(PredicateExpression()), hash(PredicateExpression())) - # comparison respects type - self.assertNotEqual(FilterExpression(), _Expression()) - self.assertNotEqual(_Expression(), PredicateExpression()) - self.assertNotEqual(PredicateExpression(), FilterExpression()) - self.assertNotEqual(hash(FilterExpression()), hash(_Expression())) - self.assertNotEqual(hash(_Expression()), hash(PredicateExpression())) - self.assertNotEqual(hash(PredicateExpression()), hash(FilterExpression())) - # string conversion - self.assertEqual(str(_Expression()), '_Expression()') - self.assertEqual(str(FilterExpression()), 'FilterExpression()') - self.assertEqual(str(PredicateExpression()), 'PredicateExpression()') - self.assertEqual(repr(_Expression()), '_Expression()') - self.assertEqual(repr(FilterExpression()), 'FilterExpression()') - self.assertEqual(repr(PredicateExpression()), 'PredicateExpression()') - - -class TestBranch(unittest.TestCase): # _Branch, Any, All - def test_essentials(self): - pred = PredicateExpression() - expr = FilterExpression() - - # comparison respects type - self.assertNotEqual(_Branch(pred, expr), Any(pred, expr)) - self.assertNotEqual(Any(pred, expr), All(pred, expr)) - self.assertNotEqual(All(pred, expr), _Branch(pred, expr)) - self.assertNotEqual(hash(_Branch(pred, expr)), hash(Any(pred, expr))) - self.assertNotEqual(hash(Any(pred, expr)), hash(All(pred, expr))) - self.assertNotEqual(hash(All(pred, expr)), hash(_Branch(pred, expr))) - - for cls in (_Branch, Any, All): - # comparison - self.assertEqual(cls(pred, expr), cls(pred, expr)) - self.assertEqual(hash(cls(pred, expr)), hash(cls(pred, expr))) - # comparison respects predicate - self.assertNotEqual(cls(ns.bse.filename, expr), cls(ns.bse.filesize, expr)) - self.assertNotEqual(hash(cls(ns.bse.filename, expr)), hash(cls(ns.bse.filesize, expr))) - # comparison respects expression - self.assertNotEqual(cls(pred, Equals('hello')), cls(pred, Equals('world'))) - self.assertNotEqual(hash(cls(pred, Equals('hello'))), hash(cls(pred, Equals('world')))) - - # string conversion - self.assertEqual(str(_Branch(pred, expr)), f'_Branch({pred}, {expr})') - self.assertEqual(repr(_Branch(pred, expr)), f'_Branch({pred}, {expr})') - self.assertEqual(str(Any(pred, expr)), f'Any({pred}, {expr})') - self.assertEqual(repr(Any(pred, expr)), f'Any({pred}, {expr})') - self.assertEqual(str(All(pred, expr)), f'All({pred}, {expr})') - self.assertEqual(repr(All(pred, expr)), f'All({pred}, {expr})') - - def test_members(self): - class Foo(): pass - pred = PredicateExpression() - expr = FilterExpression() - - for cls in (_Branch, Any, All): - # predicate returns member - self.assertEqual(cls(PredicateExpression(), expr).predicate, PredicateExpression()) - # can pass an URI - self.assertEqual(cls(ns.bse.filename, expr).predicate, Predicate(ns.bse.filename)) - # can pass a PredicateExpression - self.assertEqual(cls(Predicate(ns.bse.filename), expr).predicate, Predicate(ns.bse.filename)) - # must pass an URI or PredicateExpression - self.assertRaises(TypeError, cls, Foo(), expr) - # expression returns member - self.assertEqual(cls(pred, Equals('hello')).expr, Equals('hello')) - # expression must be a FilterExpression - self.assertRaises(TypeError, cls, ns.bse.filename, 'hello') - self.assertRaises(TypeError, cls, ns.bse.filename, 1234) - self.assertRaises(TypeError, cls, ns.bse.filename, Foo()) - - -class TestAgg(unittest.TestCase): # _Agg, And, Or - def test_essentials(self): - expr = {Equals('hello'), Equals('world')} - - # comparison respects type - self.assertNotEqual(_Agg(expr), And(expr)) - self.assertNotEqual(And(expr), Or(expr)) - self.assertNotEqual(Or(expr), _Agg(expr)) - self.assertNotEqual(hash(_Agg(expr)), hash(And(expr))) - self.assertNotEqual(hash(And(expr)), hash(Or(expr))) - self.assertNotEqual(hash(Or(expr)), hash(_Agg(expr))) - - for cls in (_Agg, And, Or): - # comparison - self.assertEqual(cls(expr), cls(expr)) - self.assertEqual(hash(cls(expr)), hash(cls(expr))) - # comparison respects expression - self.assertNotEqual(cls(expr), cls(Equals('world'))) - self.assertNotEqual(hash(cls(expr)), hash(cls(Equals('world')))) - self.assertNotEqual(cls(Equals('hello')), cls(Equals('world'))) - self.assertNotEqual(hash(cls(Equals('hello'))), hash(cls(Equals('world')))) - - # string conversion - self.assertEqual(str(_Agg(Equals('hello'))), '_Agg({Equals(hello)})') - self.assertEqual(repr(_Agg(Equals('hello'))), '_Agg({Equals(hello)})') - self.assertEqual(str(And(Equals('hello'))), 'And({Equals(hello)})') - self.assertEqual(repr(And(Equals('hello'))), 'And({Equals(hello)})') - self.assertEqual(str(Or(Equals('hello'))), 'Or({Equals(hello)})') - self.assertEqual(repr(Or(Equals('hello'))), 'Or({Equals(hello)})') - - def test_expression(self): - class Foo(): pass - - for cls in (_Agg, And, Or): - # can pass expressions as arguments - self.assertSetEqual(cls(Equals('hello'), Equals('world')).expr, {Equals('hello'), Equals('world')}) - # can pass one expressions as argument - self.assertSetEqual(cls(Equals('hello')).expr, {Equals('hello')}) - # can pass expressions as iterator - self.assertSetEqual(cls(iter((Equals('hello'), Equals('world')))).expr, {Equals('hello'), Equals('world')}) - # can pass expressions as generator - def gen(): - yield Equals('hello') - yield Equals('world') - self.assertSetEqual(cls(gen()).expr, {Equals('hello'), Equals('world')}) - # can pass expressions as list-like - self.assertSetEqual(cls((Equals('hello'), Equals('world'))).expr, {Equals('hello'), Equals('world')}) - # can pass one expression as list-like - self.assertSetEqual(cls([Equals('hello')]).expr, {Equals('hello')}) - # must pass expressions - self.assertRaises(TypeError, cls, Foo(), Foo()) - self.assertRaises(TypeError, cls, [Foo(), Foo()]) - - # iter - self.assertSetEqual(set(iter(cls(Equals('hello'), Equals('world')))), {Equals('hello'), Equals('world')}) - # contains - self.assertIn(Equals('world'), cls(Equals('hello'), Equals('world'))) - self.assertNotIn(Equals('foo'), cls(Equals('hello'), Equals('world'))) - # len - self.assertEqual(len(cls(Equals('hello'), Equals('world'))), 2) - self.assertEqual(len(cls(Equals('hello'), Equals('world'), Equals('foo'))), 3) - - - -class TestNot(unittest.TestCase): - def test_essentials(self): - expr = FilterExpression() - # comparison - self.assertEqual(Not(expr), Not(expr)) - self.assertEqual(hash(Not(expr)), hash(Not(expr))) - # comparison respects type - self.assertNotEqual(Not(expr), FilterExpression()) - self.assertNotEqual(hash(Not(expr)), hash(FilterExpression())) - # comparison respects expression - self.assertNotEqual(Not(Equals('hello')), Not(Equals('world'))) - self.assertNotEqual(hash(Not(Equals('hello'))), hash(Not(Equals('world')))) - # string conversion - self.assertEqual(str(Not(Equals('hello'))), 'Not(Equals(hello))') - self.assertEqual(repr(Not(Equals('hello'))), 'Not(Equals(hello))') - - def test_expression(self): - # Not requires an expression argument - self.assertRaises(TypeError, Not) - # expression must be a FilterExpression - self.assertRaises(TypeError, Not, 'hello') - self.assertRaises(TypeError, Not, 1234) - self.assertRaises(TypeError, Not, Predicate(ns.bse.filesize)) - # member returns expression - self.assertEqual(Not(Equals('hello')).expr, Equals('hello')) - - -class TestHas(unittest.TestCase): - def test_essentials(self): - pred = PredicateExpression() - count = FilterExpression() - # comparison - self.assertEqual(Has(pred, count), Has(pred, count)) - self.assertEqual(hash(Has(pred, count)), hash(Has(pred, count))) - # comparison respects type - self.assertNotEqual(Has(pred, count), FilterExpression()) - self.assertNotEqual(hash(Has(pred, count)), hash(FilterExpression())) - # comparison respects predicate - self.assertNotEqual(Has(pred, count), Has(Predicate(ns.bse.filesize), count)) - self.assertNotEqual(hash(Has(pred, count)), hash(Has(Predicate(ns.bse.filesize), count))) - # comparison respects count - self.assertNotEqual(Has(pred, count), Has(pred, LessThan(5))) - self.assertNotEqual(hash(Has(pred, count)), hash(Has(pred, LessThan(5)))) - # string conversion - self.assertEqual(str(Has(Predicate(ns.bse.filesize), LessThan(5))), - f'Has(Predicate({ns.bse.filesize}, False), LessThan(5.0, True))') - self.assertEqual(repr(Has(Predicate(ns.bse.filesize), LessThan(5))), - f'Has(Predicate({ns.bse.filesize}, False), LessThan(5.0, True))') - - def test_members(self): - pred = PredicateExpression() - count = FilterExpression() - # member returns expression - # predicate must be an URI or a PredicateExpression - self.assertEqual(Has(ns.bse.filesize, count).predicate, Predicate(ns.bse.filesize)) - self.assertEqual(Has(Predicate(ns.bse.filesize), count).predicate, Predicate(ns.bse.filesize)) - self.assertRaises(TypeError, Has, 1234, FilterExpression()) - self.assertRaises(TypeError, Has, FilterExpression(), FilterExpression()) - # member returns count - # count must be None, an integer, or a FilterExpression - self.assertEqual(Has(pred).count, GreaterThan(1, False)) - self.assertEqual(Has(pred, LessThan(5)).count, LessThan(5)) - self.assertEqual(Has(pred, 5).count, Equals(5)) - self.assertRaises(TypeError, Has, pred, 'hello') - self.assertRaises(TypeError, Has, pred, Predicate(ns.bse.filesize)) - - - -class TestValue(unittest.TestCase): - def test_essentials(self): - # comparison respects type - self.assertNotEqual(_Value('hello'), Equals('hello')) - self.assertNotEqual(Equals('hello'), Is('hello')) - self.assertNotEqual(Is('hello'), Substring('hello')) - self.assertNotEqual(Substring('hello'), StartsWith('hello')) - self.assertNotEqual(StartsWith('hello'), EndsWith('hello')) - self.assertNotEqual(EndsWith('hello'), _Value('hello')) - self.assertNotEqual(hash(_Value('hello')), hash(Equals('hello'))) - self.assertNotEqual(hash(Equals('hello')), hash(Is('hello'))) - self.assertNotEqual(hash(Is('hello')), hash(Substring('hello'))) - self.assertNotEqual(hash(Substring('hello')), hash(StartsWith('hello'))) - self.assertNotEqual(hash(StartsWith('hello')), hash(EndsWith('hello'))) - self.assertNotEqual(hash(EndsWith('hello')), hash(_Value('hello'))) - - for cls in (_Value, Is, Equals, Substring, StartsWith, EndsWith): - # comparison - self.assertEqual(cls('hello'), cls('hello')) - self.assertEqual(hash(cls('hello')), hash(cls('hello'))) - # comparison respects value - self.assertNotEqual(cls('hello'), cls('world')) - self.assertNotEqual(hash(cls('hello')), hash(cls('world'))) - - # string conversion - self.assertEqual(str(_Value('hello')), '_Value(hello)') - self.assertEqual(repr(_Value('hello')), '_Value(hello)') - self.assertEqual(str(Is('hello')), 'Is(hello)') - self.assertEqual(repr(Is('hello')), 'Is(hello)') - self.assertEqual(str(Equals('hello')), 'Equals(hello)') - self.assertEqual(repr(Equals('hello')), 'Equals(hello)') - self.assertEqual(str(Substring('hello')), 'Substring(hello)') - self.assertEqual(repr(Substring('hello')), 'Substring(hello)') - self.assertEqual(str(StartsWith('hello')), 'StartsWith(hello)') - self.assertEqual(repr(StartsWith('hello')), 'StartsWith(hello)') - self.assertEqual(str(EndsWith('hello')), 'EndsWith(hello)') - self.assertEqual(repr(EndsWith('hello')), 'EndsWith(hello)') - - def test_value(self): - class Foo(): pass - for cls in (_Value, Is, Equals, Substring, StartsWith, EndsWith): - # value can be anything - # value returns member - f = Foo() - self.assertEqual(cls('hello').value, 'hello') - self.assertEqual(cls(1234).value, 1234) - self.assertEqual(cls(f).value, f) - - -class TestBounded(unittest.TestCase): - def test_essentials(self): - # comparison respects type - self.assertNotEqual(_Bounded(1234), LessThan(1234)) - self.assertNotEqual(LessThan(1234), GreaterThan(1234)) - self.assertNotEqual(GreaterThan(1234), _Bounded(1234)) - self.assertNotEqual(hash(_Bounded(1234)), hash(LessThan(1234))) - self.assertNotEqual(hash(LessThan(1234)), hash(GreaterThan(1234))) - self.assertNotEqual(hash(GreaterThan(1234)), hash(_Bounded(1234))) - - for cls in (_Bounded, LessThan, GreaterThan): - # comparison - self.assertEqual(cls(1234), cls(1234)) - self.assertEqual(hash(cls(1234)), hash(cls(1234))) - # comparison respects threshold - self.assertNotEqual(cls(1234), cls(4321)) - self.assertNotEqual(hash(cls(1234)), hash(cls(4321))) - # comparison respects strict - self.assertNotEqual(cls(1234, True), cls(1234, False)) - self.assertNotEqual(hash(cls(1234, True)), hash(cls(1234, False))) - - # string conversion - self.assertEqual(str(_Bounded(1234, False)), '_Bounded(1234.0, False)') - self.assertEqual(repr(_Bounded(1234, False)), '_Bounded(1234.0, False)') - self.assertEqual(str(LessThan(1234, False)), 'LessThan(1234.0, False)') - self.assertEqual(repr(LessThan(1234, False)), 'LessThan(1234.0, False)') - self.assertEqual(str(GreaterThan(1234, False)), 'GreaterThan(1234.0, False)') - self.assertEqual(repr(GreaterThan(1234, False)), 'GreaterThan(1234.0, False)') - - def test_members(self): - class Foo(): pass - for cls in (_Bounded, LessThan, GreaterThan): - # threshold becomes float - self.assertEqual(cls(1.234).threshold, 1.234) - self.assertEqual(cls(1234).threshold, 1234.0) - self.assertEqual(cls('1234').threshold, 1234) - self.assertRaises(TypeError, cls, Foo()) - # strict becomes bool - self.assertEqual(cls(1234, True).strict, True) - self.assertEqual(cls(1234, False).strict, False) - self.assertEqual(cls(1234, Foo()).strict, True) - - -class TestPredicate(unittest.TestCase): - def test_essentials(self): - # comparison - self.assertEqual(Predicate(ns.bse.filesize), Predicate(ns.bse.filesize)) - self.assertEqual(hash(Predicate(ns.bse.filesize)), hash(Predicate(ns.bse.filesize))) - # comparison respects type - self.assertNotEqual(Predicate(ns.bse.filesize), PredicateExpression()) - self.assertNotEqual(hash(Predicate(ns.bse.filesize)), hash(PredicateExpression())) - # comparison respects predicate - self.assertNotEqual(Predicate(ns.bse.filesize), Predicate(ns.bse.filename)) - self.assertNotEqual(hash(Predicate(ns.bse.filesize)), hash(Predicate(ns.bse.filename))) - # comparison respects reverse - self.assertNotEqual(Predicate(ns.bse.filesize, True), Predicate(ns.bse.filesize, False)) - self.assertNotEqual(hash(Predicate(ns.bse.filesize, True)), hash(Predicate(ns.bse.filesize, False))) - # string conversion - self.assertEqual(str(Predicate(ns.bse.filesize)), f'Predicate({ns.bse.filesize}, False)') - self.assertEqual(str(Predicate(ns.bse.filesize, True)), - f'Predicate({ns.bse.filesize}, True)') - self.assertEqual(repr(Predicate(ns.bse.filesize)), f'Predicate({ns.bse.filesize}, False)') - self.assertEqual(repr(Predicate(ns.bse.filesize, True)), - f'Predicate({ns.bse.filesize}, True)') - - def test_members(self): - # member returns predicate - # predicate must be an URI - self.assertEqual(Predicate(ns.bse.filesize).predicate, ns.bse.filesize) - self.assertEqual(Predicate(URI('hello world')).predicate, URI('hello world')) - self.assertRaises(TypeError, Predicate, 1234) - self.assertRaises(TypeError, Predicate, FilterExpression()) - self.assertRaises(TypeError, Predicate, FilterExpression()) - # reverse becomes a boolean - self.assertEqual(Predicate(ns.bse.filesize, True).reverse, True) - self.assertEqual(Predicate(ns.bse.filesize, False).reverse, False) - self.assertEqual(Predicate(ns.bse.filesize, 'abc').reverse, True) - - -class TestOneOf(unittest.TestCase): - def test_essentials(self): - expr = {Predicate(ns.bse.filename), Predicate(ns.bse.filesize)} - # comparison - self.assertEqual(OneOf(expr), OneOf(expr)) - self.assertEqual(hash(OneOf(expr)), hash(OneOf(expr))) - # comparison respects type - self.assertNotEqual(OneOf(expr), PredicateExpression()) - self.assertNotEqual(hash(OneOf(expr)), hash(PredicateExpression())) - # comparison respects expression - self.assertNotEqual(OneOf(expr), OneOf(Predicate(ns.bse.filename))) - self.assertNotEqual(hash(OneOf(expr)), hash(OneOf(Predicate(ns.bse.filename)))) - # string conversion - self.assertEqual(str(OneOf(Predicate(ns.bse.filesize))), - f'OneOf({{Predicate({ns.bse.filesize}, False)}})') - self.assertEqual(repr(OneOf(Predicate(ns.bse.filesize))), - f'OneOf({{Predicate({ns.bse.filesize}, False)}})') - - def test_expression(self): - class Foo(): pass - # can pass expressions as arguments - self.assertSetEqual(OneOf(Predicate(ns.bse.filesize), Predicate(ns.bse.filename)).expr, - {Predicate(ns.bse.filesize), Predicate(ns.bse.filename)}) - # can pass one expressions as argument - self.assertSetEqual(OneOf(Predicate(ns.bse.filesize)).expr, - {Predicate(ns.bse.filesize)}) - # can pass expressions as iterator - self.assertSetEqual(OneOf(iter((Predicate(ns.bse.filesize), Predicate(ns.bse.filename)))).expr, - {Predicate(ns.bse.filesize), Predicate(ns.bse.filename)}) - # can pass expressions as generator - def gen(): - yield Predicate(ns.bse.filesize) - yield Predicate(ns.bse.filename) - self.assertSetEqual(OneOf(gen()).expr, - {Predicate(ns.bse.filesize), Predicate(ns.bse.filename)}) - # can pass expressions as list-like - self.assertSetEqual(OneOf((Predicate(ns.bse.filesize), Predicate(ns.bse.filename))).expr, - {Predicate(ns.bse.filesize), Predicate(ns.bse.filename)}) - # can pass one expression as list-like - self.assertSetEqual(OneOf([Predicate(ns.bse.filesize)]).expr, - {Predicate(ns.bse.filesize)}) - # must pass expressions - self.assertRaises(TypeError, OneOf, Foo(), Foo()) - self.assertRaises(TypeError, OneOf, [Foo(), Foo()]) - # must pass at least one expression - self.assertRaises(AttributeError, OneOf) - - # iter - self.assertSetEqual(set(iter(OneOf(Predicate(ns.bse.filesize), Predicate(ns.bse.filename)))), - {Predicate(ns.bse.filesize), Predicate(ns.bse.filename)}) - # contains - self.assertIn(Predicate(ns.bse.filesize), - OneOf(Predicate(ns.bse.filesize), Predicate(ns.bse.filename))) - self.assertNotIn(Predicate(ns.bse.tag), - OneOf(Predicate(ns.bse.filesize), Predicate(ns.bse.filename))) - # len - self.assertEqual(len(OneOf(Predicate(ns.bse.filesize), Predicate(ns.bse.filename))), 2) - self.assertEqual(len(OneOf(Predicate(ns.bse.filesize), Predicate(ns.bse.filename), Predicate(ns.bse.tag))), 3) - - - def testIsIn(self): - # can pass expressions as arguments - self.assertEqual(IsIn('http://example.com/entity#1234', 'http://example.com/entity#4321'), - Or(Is('http://example.com/entity#1234'), Is('http://example.com/entity#4321'))) - # can pass one expression as argument - self.assertEqual(IsIn('http://example.com/entity#1234'), - Or(Is('http://example.com/entity#1234'))) - # can pass expressions as iterator - self.assertEqual(IsIn(iter(('http://example.com/entity#1234', 'http://example.com/entity#4321'))), - Or(Is('http://example.com/entity#1234'), Is('http://example.com/entity#4321'))) - # can pass expressions as generator - def gen(): - yield 'http://example.com/entity#1234' - yield 'http://example.com/entity#4321' - self.assertEqual(IsIn(gen()), - Or(Is('http://example.com/entity#1234'), Is('http://example.com/entity#4321'))) - # can pass expressions as list-like - self.assertEqual(IsIn(['http://example.com/entity#1234', 'http://example.com/entity#4321']), - Or(Is('http://example.com/entity#1234'), Is('http://example.com/entity#4321'))) - # can pass one expression as list-like - self.assertEqual(IsIn(['http://example.com/entity#1234']), - Or(Is('http://example.com/entity#1234'))) - - - def testIsNotIn(self): - # can pass expressions as arguments - self.assertEqual(IsNotIn('http://example.com/entity#1234', 'http://example.com/entity#4321'), - Not(Or(Is('http://example.com/entity#1234'), Is('http://example.com/entity#4321')))) - # can pass one expression as argument - self.assertEqual(IsNotIn('http://example.com/entity#1234'), - Not(Or(Is('http://example.com/entity#1234')))) - # can pass expressions as iterator - self.assertEqual(IsNotIn(iter(('http://example.com/entity#1234', 'http://example.com/entity#4321'))), - Not(Or(Is('http://example.com/entity#1234'), Is('http://example.com/entity#4321')))) - # can pass expressions as generator - def gen(): - yield 'http://example.com/entity#1234' - yield 'http://example.com/entity#4321' - self.assertEqual(IsNotIn(gen()), - Not(Or(Is('http://example.com/entity#1234'), Is('http://example.com/entity#4321')))) - # can pass expressions as list-like - self.assertEqual(IsNotIn(['http://example.com/entity#1234', 'http://example.com/entity#4321']), - Not(Or(Is('http://example.com/entity#1234'), Is('http://example.com/entity#4321')))) - # can pass one expression as list-like - self.assertEqual(IsNotIn(['http://example.com/entity#1234']), - Not(Or(Is('http://example.com/entity#1234')))) - - - -## main ## - -if __name__ == '__main__': - unittest.main() - -## EOF ## diff --git a/test/query/ast_test/__init__.py b/test/query/ast_test/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/test/query/ast_test/test_filter_.py b/test/query/ast_test/test_filter_.py new file mode 100644 index 0000000..4f69bdc --- /dev/null +++ b/test/query/ast_test/test_filter_.py @@ -0,0 +1,480 @@ +""" + +Part of the tagit test suite. +A copy of the license is provided with the project. +Author: Matthias Baumgartner, 2022 +""" +# imports +import unittest + +# bsfs imports +from bsfs.namespace import ns +from bsfs.utils import URI + +# objects to test +from bsfs.query.ast.filter_ import _Expression, FilterExpression, PredicateExpression +from bsfs.query.ast.filter_ import _Branch, Any, All +from bsfs.query.ast.filter_ import _Agg, And, Or +from bsfs.query.ast.filter_ import Not, Has +from bsfs.query.ast.filter_ import _Value, Is, Equals, Substring, StartsWith, EndsWith +from bsfs.query.ast.filter_ import _Bounded, LessThan, GreaterThan +from bsfs.query.ast.filter_ import Predicate, OneOf +from bsfs.query.ast.filter_ import IsIn, IsNotIn + + +## code ## + +class TestExpression(unittest.TestCase): + def test_essentials(self): + # comparison + self.assertEqual(_Expression(), _Expression()) + self.assertEqual(FilterExpression(), FilterExpression()) + self.assertEqual(PredicateExpression(), PredicateExpression()) + self.assertEqual(hash(_Expression()), hash(_Expression())) + self.assertEqual(hash(FilterExpression()), hash(FilterExpression())) + self.assertEqual(hash(PredicateExpression()), hash(PredicateExpression())) + # comparison respects type + self.assertNotEqual(FilterExpression(), _Expression()) + self.assertNotEqual(_Expression(), PredicateExpression()) + self.assertNotEqual(PredicateExpression(), FilterExpression()) + self.assertNotEqual(hash(FilterExpression()), hash(_Expression())) + self.assertNotEqual(hash(_Expression()), hash(PredicateExpression())) + self.assertNotEqual(hash(PredicateExpression()), hash(FilterExpression())) + # string conversion + self.assertEqual(str(_Expression()), '_Expression()') + self.assertEqual(str(FilterExpression()), 'FilterExpression()') + self.assertEqual(str(PredicateExpression()), 'PredicateExpression()') + self.assertEqual(repr(_Expression()), '_Expression()') + self.assertEqual(repr(FilterExpression()), 'FilterExpression()') + self.assertEqual(repr(PredicateExpression()), 'PredicateExpression()') + + +class TestBranch(unittest.TestCase): # _Branch, Any, All + def test_essentials(self): + pred = PredicateExpression() + expr = FilterExpression() + + # comparison respects type + self.assertNotEqual(_Branch(pred, expr), Any(pred, expr)) + self.assertNotEqual(Any(pred, expr), All(pred, expr)) + self.assertNotEqual(All(pred, expr), _Branch(pred, expr)) + self.assertNotEqual(hash(_Branch(pred, expr)), hash(Any(pred, expr))) + self.assertNotEqual(hash(Any(pred, expr)), hash(All(pred, expr))) + self.assertNotEqual(hash(All(pred, expr)), hash(_Branch(pred, expr))) + + for cls in (_Branch, Any, All): + # comparison + self.assertEqual(cls(pred, expr), cls(pred, expr)) + self.assertEqual(hash(cls(pred, expr)), hash(cls(pred, expr))) + # comparison respects predicate + self.assertNotEqual(cls(ns.bse.filename, expr), cls(ns.bse.filesize, expr)) + self.assertNotEqual(hash(cls(ns.bse.filename, expr)), hash(cls(ns.bse.filesize, expr))) + # comparison respects expression + self.assertNotEqual(cls(pred, Equals('hello')), cls(pred, Equals('world'))) + self.assertNotEqual(hash(cls(pred, Equals('hello'))), hash(cls(pred, Equals('world')))) + + # string conversion + self.assertEqual(str(_Branch(pred, expr)), f'_Branch({pred}, {expr})') + self.assertEqual(repr(_Branch(pred, expr)), f'_Branch({pred}, {expr})') + self.assertEqual(str(Any(pred, expr)), f'Any({pred}, {expr})') + self.assertEqual(repr(Any(pred, expr)), f'Any({pred}, {expr})') + self.assertEqual(str(All(pred, expr)), f'All({pred}, {expr})') + self.assertEqual(repr(All(pred, expr)), f'All({pred}, {expr})') + + def test_members(self): + class Foo(): pass + pred = PredicateExpression() + expr = FilterExpression() + + for cls in (_Branch, Any, All): + # predicate returns member + self.assertEqual(cls(PredicateExpression(), expr).predicate, PredicateExpression()) + # can pass an URI + self.assertEqual(cls(ns.bse.filename, expr).predicate, Predicate(ns.bse.filename)) + # can pass a PredicateExpression + self.assertEqual(cls(Predicate(ns.bse.filename), expr).predicate, Predicate(ns.bse.filename)) + # must pass an URI or PredicateExpression + self.assertRaises(TypeError, cls, Foo(), expr) + # expression returns member + self.assertEqual(cls(pred, Equals('hello')).expr, Equals('hello')) + # expression must be a FilterExpression + self.assertRaises(TypeError, cls, ns.bse.filename, 'hello') + self.assertRaises(TypeError, cls, ns.bse.filename, 1234) + self.assertRaises(TypeError, cls, ns.bse.filename, Foo()) + + +class TestAgg(unittest.TestCase): # _Agg, And, Or + def test_essentials(self): + expr = {Equals('hello'), Equals('world')} + + # comparison respects type + self.assertNotEqual(_Agg(expr), And(expr)) + self.assertNotEqual(And(expr), Or(expr)) + self.assertNotEqual(Or(expr), _Agg(expr)) + self.assertNotEqual(hash(_Agg(expr)), hash(And(expr))) + self.assertNotEqual(hash(And(expr)), hash(Or(expr))) + self.assertNotEqual(hash(Or(expr)), hash(_Agg(expr))) + + for cls in (_Agg, And, Or): + # comparison + self.assertEqual(cls(expr), cls(expr)) + self.assertEqual(hash(cls(expr)), hash(cls(expr))) + # comparison respects expression + self.assertNotEqual(cls(expr), cls(Equals('world'))) + self.assertNotEqual(hash(cls(expr)), hash(cls(Equals('world')))) + self.assertNotEqual(cls(Equals('hello')), cls(Equals('world'))) + self.assertNotEqual(hash(cls(Equals('hello'))), hash(cls(Equals('world')))) + + # string conversion + self.assertEqual(str(_Agg(Equals('hello'))), '_Agg({Equals(hello)})') + self.assertEqual(repr(_Agg(Equals('hello'))), '_Agg({Equals(hello)})') + self.assertEqual(str(And(Equals('hello'))), 'And({Equals(hello)})') + self.assertEqual(repr(And(Equals('hello'))), 'And({Equals(hello)})') + self.assertEqual(str(Or(Equals('hello'))), 'Or({Equals(hello)})') + self.assertEqual(repr(Or(Equals('hello'))), 'Or({Equals(hello)})') + + def test_expression(self): + class Foo(): pass + + for cls in (_Agg, And, Or): + # can pass expressions as arguments + self.assertSetEqual(cls(Equals('hello'), Equals('world')).expr, {Equals('hello'), Equals('world')}) + # can pass one expressions as argument + self.assertSetEqual(cls(Equals('hello')).expr, {Equals('hello')}) + # can pass expressions as iterator + self.assertSetEqual(cls(iter((Equals('hello'), Equals('world')))).expr, {Equals('hello'), Equals('world')}) + # can pass expressions as generator + def gen(): + yield Equals('hello') + yield Equals('world') + self.assertSetEqual(cls(gen()).expr, {Equals('hello'), Equals('world')}) + # can pass expressions as list-like + self.assertSetEqual(cls((Equals('hello'), Equals('world'))).expr, {Equals('hello'), Equals('world')}) + # can pass one expression as list-like + self.assertSetEqual(cls([Equals('hello')]).expr, {Equals('hello')}) + # must pass expressions + self.assertRaises(TypeError, cls, Foo(), Foo()) + self.assertRaises(TypeError, cls, [Foo(), Foo()]) + + # iter + self.assertSetEqual(set(iter(cls(Equals('hello'), Equals('world')))), {Equals('hello'), Equals('world')}) + # contains + self.assertIn(Equals('world'), cls(Equals('hello'), Equals('world'))) + self.assertNotIn(Equals('foo'), cls(Equals('hello'), Equals('world'))) + # len + self.assertEqual(len(cls(Equals('hello'), Equals('world'))), 2) + self.assertEqual(len(cls(Equals('hello'), Equals('world'), Equals('foo'))), 3) + + + +class TestNot(unittest.TestCase): + def test_essentials(self): + expr = FilterExpression() + # comparison + self.assertEqual(Not(expr), Not(expr)) + self.assertEqual(hash(Not(expr)), hash(Not(expr))) + # comparison respects type + self.assertNotEqual(Not(expr), FilterExpression()) + self.assertNotEqual(hash(Not(expr)), hash(FilterExpression())) + # comparison respects expression + self.assertNotEqual(Not(Equals('hello')), Not(Equals('world'))) + self.assertNotEqual(hash(Not(Equals('hello'))), hash(Not(Equals('world')))) + # string conversion + self.assertEqual(str(Not(Equals('hello'))), 'Not(Equals(hello))') + self.assertEqual(repr(Not(Equals('hello'))), 'Not(Equals(hello))') + + def test_expression(self): + # Not requires an expression argument + self.assertRaises(TypeError, Not) + # expression must be a FilterExpression + self.assertRaises(TypeError, Not, 'hello') + self.assertRaises(TypeError, Not, 1234) + self.assertRaises(TypeError, Not, Predicate(ns.bse.filesize)) + # member returns expression + self.assertEqual(Not(Equals('hello')).expr, Equals('hello')) + + +class TestHas(unittest.TestCase): + def test_essentials(self): + pred = PredicateExpression() + count = FilterExpression() + # comparison + self.assertEqual(Has(pred, count), Has(pred, count)) + self.assertEqual(hash(Has(pred, count)), hash(Has(pred, count))) + # comparison respects type + self.assertNotEqual(Has(pred, count), FilterExpression()) + self.assertNotEqual(hash(Has(pred, count)), hash(FilterExpression())) + # comparison respects predicate + self.assertNotEqual(Has(pred, count), Has(Predicate(ns.bse.filesize), count)) + self.assertNotEqual(hash(Has(pred, count)), hash(Has(Predicate(ns.bse.filesize), count))) + # comparison respects count + self.assertNotEqual(Has(pred, count), Has(pred, LessThan(5))) + self.assertNotEqual(hash(Has(pred, count)), hash(Has(pred, LessThan(5)))) + # string conversion + self.assertEqual(str(Has(Predicate(ns.bse.filesize), LessThan(5))), + f'Has(Predicate({ns.bse.filesize}, False), LessThan(5.0, True))') + self.assertEqual(repr(Has(Predicate(ns.bse.filesize), LessThan(5))), + f'Has(Predicate({ns.bse.filesize}, False), LessThan(5.0, True))') + + def test_members(self): + pred = PredicateExpression() + count = FilterExpression() + # member returns expression + # predicate must be an URI or a PredicateExpression + self.assertEqual(Has(ns.bse.filesize, count).predicate, Predicate(ns.bse.filesize)) + self.assertEqual(Has(Predicate(ns.bse.filesize), count).predicate, Predicate(ns.bse.filesize)) + self.assertRaises(TypeError, Has, 1234, FilterExpression()) + self.assertRaises(TypeError, Has, FilterExpression(), FilterExpression()) + # member returns count + # count must be None, an integer, or a FilterExpression + self.assertEqual(Has(pred).count, GreaterThan(1, False)) + self.assertEqual(Has(pred, LessThan(5)).count, LessThan(5)) + self.assertEqual(Has(pred, 5).count, Equals(5)) + self.assertRaises(TypeError, Has, pred, 'hello') + self.assertRaises(TypeError, Has, pred, Predicate(ns.bse.filesize)) + + + +class TestValue(unittest.TestCase): + def test_essentials(self): + # comparison respects type + self.assertNotEqual(_Value('hello'), Equals('hello')) + self.assertNotEqual(Equals('hello'), Is('hello')) + self.assertNotEqual(Is('hello'), Substring('hello')) + self.assertNotEqual(Substring('hello'), StartsWith('hello')) + self.assertNotEqual(StartsWith('hello'), EndsWith('hello')) + self.assertNotEqual(EndsWith('hello'), _Value('hello')) + self.assertNotEqual(hash(_Value('hello')), hash(Equals('hello'))) + self.assertNotEqual(hash(Equals('hello')), hash(Is('hello'))) + self.assertNotEqual(hash(Is('hello')), hash(Substring('hello'))) + self.assertNotEqual(hash(Substring('hello')), hash(StartsWith('hello'))) + self.assertNotEqual(hash(StartsWith('hello')), hash(EndsWith('hello'))) + self.assertNotEqual(hash(EndsWith('hello')), hash(_Value('hello'))) + + for cls in (_Value, Is, Equals, Substring, StartsWith, EndsWith): + # comparison + self.assertEqual(cls('hello'), cls('hello')) + self.assertEqual(hash(cls('hello')), hash(cls('hello'))) + # comparison respects value + self.assertNotEqual(cls('hello'), cls('world')) + self.assertNotEqual(hash(cls('hello')), hash(cls('world'))) + + # string conversion + self.assertEqual(str(_Value('hello')), '_Value(hello)') + self.assertEqual(repr(_Value('hello')), '_Value(hello)') + self.assertEqual(str(Is('hello')), 'Is(hello)') + self.assertEqual(repr(Is('hello')), 'Is(hello)') + self.assertEqual(str(Equals('hello')), 'Equals(hello)') + self.assertEqual(repr(Equals('hello')), 'Equals(hello)') + self.assertEqual(str(Substring('hello')), 'Substring(hello)') + self.assertEqual(repr(Substring('hello')), 'Substring(hello)') + self.assertEqual(str(StartsWith('hello')), 'StartsWith(hello)') + self.assertEqual(repr(StartsWith('hello')), 'StartsWith(hello)') + self.assertEqual(str(EndsWith('hello')), 'EndsWith(hello)') + self.assertEqual(repr(EndsWith('hello')), 'EndsWith(hello)') + + def test_value(self): + class Foo(): pass + for cls in (_Value, Is, Equals, Substring, StartsWith, EndsWith): + # value can be anything + # value returns member + f = Foo() + self.assertEqual(cls('hello').value, 'hello') + self.assertEqual(cls(1234).value, 1234) + self.assertEqual(cls(f).value, f) + + +class TestBounded(unittest.TestCase): + def test_essentials(self): + # comparison respects type + self.assertNotEqual(_Bounded(1234), LessThan(1234)) + self.assertNotEqual(LessThan(1234), GreaterThan(1234)) + self.assertNotEqual(GreaterThan(1234), _Bounded(1234)) + self.assertNotEqual(hash(_Bounded(1234)), hash(LessThan(1234))) + self.assertNotEqual(hash(LessThan(1234)), hash(GreaterThan(1234))) + self.assertNotEqual(hash(GreaterThan(1234)), hash(_Bounded(1234))) + + for cls in (_Bounded, LessThan, GreaterThan): + # comparison + self.assertEqual(cls(1234), cls(1234)) + self.assertEqual(hash(cls(1234)), hash(cls(1234))) + # comparison respects threshold + self.assertNotEqual(cls(1234), cls(4321)) + self.assertNotEqual(hash(cls(1234)), hash(cls(4321))) + # comparison respects strict + self.assertNotEqual(cls(1234, True), cls(1234, False)) + self.assertNotEqual(hash(cls(1234, True)), hash(cls(1234, False))) + + # string conversion + self.assertEqual(str(_Bounded(1234, False)), '_Bounded(1234.0, False)') + self.assertEqual(repr(_Bounded(1234, False)), '_Bounded(1234.0, False)') + self.assertEqual(str(LessThan(1234, False)), 'LessThan(1234.0, False)') + self.assertEqual(repr(LessThan(1234, False)), 'LessThan(1234.0, False)') + self.assertEqual(str(GreaterThan(1234, False)), 'GreaterThan(1234.0, False)') + self.assertEqual(repr(GreaterThan(1234, False)), 'GreaterThan(1234.0, False)') + + def test_members(self): + class Foo(): pass + for cls in (_Bounded, LessThan, GreaterThan): + # threshold becomes float + self.assertEqual(cls(1.234).threshold, 1.234) + self.assertEqual(cls(1234).threshold, 1234.0) + self.assertEqual(cls('1234').threshold, 1234) + self.assertRaises(TypeError, cls, Foo()) + # strict becomes bool + self.assertEqual(cls(1234, True).strict, True) + self.assertEqual(cls(1234, False).strict, False) + self.assertEqual(cls(1234, Foo()).strict, True) + + +class TestPredicate(unittest.TestCase): + def test_essentials(self): + # comparison + self.assertEqual(Predicate(ns.bse.filesize), Predicate(ns.bse.filesize)) + self.assertEqual(hash(Predicate(ns.bse.filesize)), hash(Predicate(ns.bse.filesize))) + # comparison respects type + self.assertNotEqual(Predicate(ns.bse.filesize), PredicateExpression()) + self.assertNotEqual(hash(Predicate(ns.bse.filesize)), hash(PredicateExpression())) + # comparison respects predicate + self.assertNotEqual(Predicate(ns.bse.filesize), Predicate(ns.bse.filename)) + self.assertNotEqual(hash(Predicate(ns.bse.filesize)), hash(Predicate(ns.bse.filename))) + # comparison respects reverse + self.assertNotEqual(Predicate(ns.bse.filesize, True), Predicate(ns.bse.filesize, False)) + self.assertNotEqual(hash(Predicate(ns.bse.filesize, True)), hash(Predicate(ns.bse.filesize, False))) + # string conversion + self.assertEqual(str(Predicate(ns.bse.filesize)), f'Predicate({ns.bse.filesize}, False)') + self.assertEqual(str(Predicate(ns.bse.filesize, True)), + f'Predicate({ns.bse.filesize}, True)') + self.assertEqual(repr(Predicate(ns.bse.filesize)), f'Predicate({ns.bse.filesize}, False)') + self.assertEqual(repr(Predicate(ns.bse.filesize, True)), + f'Predicate({ns.bse.filesize}, True)') + + def test_members(self): + # member returns predicate + # predicate must be an URI + self.assertEqual(Predicate(ns.bse.filesize).predicate, ns.bse.filesize) + self.assertEqual(Predicate(URI('hello world')).predicate, URI('hello world')) + self.assertRaises(TypeError, Predicate, 1234) + self.assertRaises(TypeError, Predicate, FilterExpression()) + self.assertRaises(TypeError, Predicate, FilterExpression()) + # reverse becomes a boolean + self.assertEqual(Predicate(ns.bse.filesize, True).reverse, True) + self.assertEqual(Predicate(ns.bse.filesize, False).reverse, False) + self.assertEqual(Predicate(ns.bse.filesize, 'abc').reverse, True) + + +class TestOneOf(unittest.TestCase): + def test_essentials(self): + expr = {Predicate(ns.bse.filename), Predicate(ns.bse.filesize)} + # comparison + self.assertEqual(OneOf(expr), OneOf(expr)) + self.assertEqual(hash(OneOf(expr)), hash(OneOf(expr))) + # comparison respects type + self.assertNotEqual(OneOf(expr), PredicateExpression()) + self.assertNotEqual(hash(OneOf(expr)), hash(PredicateExpression())) + # comparison respects expression + self.assertNotEqual(OneOf(expr), OneOf(Predicate(ns.bse.filename))) + self.assertNotEqual(hash(OneOf(expr)), hash(OneOf(Predicate(ns.bse.filename)))) + # string conversion + self.assertEqual(str(OneOf(Predicate(ns.bse.filesize))), + f'OneOf({{Predicate({ns.bse.filesize}, False)}})') + self.assertEqual(repr(OneOf(Predicate(ns.bse.filesize))), + f'OneOf({{Predicate({ns.bse.filesize}, False)}})') + + def test_expression(self): + class Foo(): pass + # can pass expressions as arguments + self.assertSetEqual(OneOf(Predicate(ns.bse.filesize), Predicate(ns.bse.filename)).expr, + {Predicate(ns.bse.filesize), Predicate(ns.bse.filename)}) + # can pass one expressions as argument + self.assertSetEqual(OneOf(Predicate(ns.bse.filesize)).expr, + {Predicate(ns.bse.filesize)}) + # can pass expressions as iterator + self.assertSetEqual(OneOf(iter((Predicate(ns.bse.filesize), Predicate(ns.bse.filename)))).expr, + {Predicate(ns.bse.filesize), Predicate(ns.bse.filename)}) + # can pass expressions as generator + def gen(): + yield Predicate(ns.bse.filesize) + yield Predicate(ns.bse.filename) + self.assertSetEqual(OneOf(gen()).expr, + {Predicate(ns.bse.filesize), Predicate(ns.bse.filename)}) + # can pass expressions as list-like + self.assertSetEqual(OneOf((Predicate(ns.bse.filesize), Predicate(ns.bse.filename))).expr, + {Predicate(ns.bse.filesize), Predicate(ns.bse.filename)}) + # can pass one expression as list-like + self.assertSetEqual(OneOf([Predicate(ns.bse.filesize)]).expr, + {Predicate(ns.bse.filesize)}) + # must pass expressions + self.assertRaises(TypeError, OneOf, Foo(), Foo()) + self.assertRaises(TypeError, OneOf, [Foo(), Foo()]) + # must pass at least one expression + self.assertRaises(AttributeError, OneOf) + + # iter + self.assertSetEqual(set(iter(OneOf(Predicate(ns.bse.filesize), Predicate(ns.bse.filename)))), + {Predicate(ns.bse.filesize), Predicate(ns.bse.filename)}) + # contains + self.assertIn(Predicate(ns.bse.filesize), + OneOf(Predicate(ns.bse.filesize), Predicate(ns.bse.filename))) + self.assertNotIn(Predicate(ns.bse.tag), + OneOf(Predicate(ns.bse.filesize), Predicate(ns.bse.filename))) + # len + self.assertEqual(len(OneOf(Predicate(ns.bse.filesize), Predicate(ns.bse.filename))), 2) + self.assertEqual(len(OneOf(Predicate(ns.bse.filesize), Predicate(ns.bse.filename), Predicate(ns.bse.tag))), 3) + + + def testIsIn(self): + # can pass expressions as arguments + self.assertEqual(IsIn('http://example.com/entity#1234', 'http://example.com/entity#4321'), + Or(Is('http://example.com/entity#1234'), Is('http://example.com/entity#4321'))) + # can pass one expression as argument + self.assertEqual(IsIn('http://example.com/entity#1234'), + Or(Is('http://example.com/entity#1234'))) + # can pass expressions as iterator + self.assertEqual(IsIn(iter(('http://example.com/entity#1234', 'http://example.com/entity#4321'))), + Or(Is('http://example.com/entity#1234'), Is('http://example.com/entity#4321'))) + # can pass expressions as generator + def gen(): + yield 'http://example.com/entity#1234' + yield 'http://example.com/entity#4321' + self.assertEqual(IsIn(gen()), + Or(Is('http://example.com/entity#1234'), Is('http://example.com/entity#4321'))) + # can pass expressions as list-like + self.assertEqual(IsIn(['http://example.com/entity#1234', 'http://example.com/entity#4321']), + Or(Is('http://example.com/entity#1234'), Is('http://example.com/entity#4321'))) + # can pass one expression as list-like + self.assertEqual(IsIn(['http://example.com/entity#1234']), + Or(Is('http://example.com/entity#1234'))) + + + def testIsNotIn(self): + # can pass expressions as arguments + self.assertEqual(IsNotIn('http://example.com/entity#1234', 'http://example.com/entity#4321'), + Not(Or(Is('http://example.com/entity#1234'), Is('http://example.com/entity#4321')))) + # can pass one expression as argument + self.assertEqual(IsNotIn('http://example.com/entity#1234'), + Not(Or(Is('http://example.com/entity#1234')))) + # can pass expressions as iterator + self.assertEqual(IsNotIn(iter(('http://example.com/entity#1234', 'http://example.com/entity#4321'))), + Not(Or(Is('http://example.com/entity#1234'), Is('http://example.com/entity#4321')))) + # can pass expressions as generator + def gen(): + yield 'http://example.com/entity#1234' + yield 'http://example.com/entity#4321' + self.assertEqual(IsNotIn(gen()), + Not(Or(Is('http://example.com/entity#1234'), Is('http://example.com/entity#4321')))) + # can pass expressions as list-like + self.assertEqual(IsNotIn(['http://example.com/entity#1234', 'http://example.com/entity#4321']), + Not(Or(Is('http://example.com/entity#1234'), Is('http://example.com/entity#4321')))) + # can pass one expression as list-like + self.assertEqual(IsNotIn(['http://example.com/entity#1234']), + Not(Or(Is('http://example.com/entity#1234')))) + + + +## main ## + +if __name__ == '__main__': + unittest.main() + +## EOF ## -- cgit v1.2.3 From 6fd984e694b0a7b749ab947211d792f5b011ee6f Mon Sep 17 00:00:00 2001 From: Matthias Baumgartner Date: Thu, 12 Jan 2023 08:44:25 +0100 Subject: renamed get_child to child in schema.types._Type and _Vertex to Vertex in schema.types --- bsfs/schema/__init__.py | 2 +- bsfs/schema/schema.py | 2 +- bsfs/schema/serialize.py | 4 +- bsfs/schema/types.py | 28 +++--- test/graph/test_resolve.py | 2 +- test/query/test_validator.py | 22 ++--- test/schema/test_schema.py | 40 ++++---- test/schema/test_serialize.py | 126 +++++++++++++------------- test/schema/test_types.py | 108 +++++++++++----------- test/triple_store/sparql/test_parse_filter.py | 2 +- test/triple_store/sparql/test_sparql.py | 10 +- 11 files changed, 173 insertions(+), 173 deletions(-) diff --git a/bsfs/schema/__init__.py b/bsfs/schema/__init__.py index dc24313..5162a01 100644 --- a/bsfs/schema/__init__.py +++ b/bsfs/schema/__init__.py @@ -10,7 +10,7 @@ import typing # inner-module imports from .schema import Schema from .serialize import from_string, to_string -from .types import Literal, Node, Predicate, _Vertex # FIXME: _Vertex +from .types import Literal, Node, Predicate, Vertex, ROOT_FEATURE, ROOT_LITERAL, ROOT_NODE, ROOT_NUMBER, ROOT_PREDICATE, ROOT_VERTEX # exports __all__: typing.Sequence[str] = ( diff --git a/bsfs/schema/schema.py b/bsfs/schema/schema.py index 1c4c807..80cb58a 100644 --- a/bsfs/schema/schema.py +++ b/bsfs/schema/schema.py @@ -83,7 +83,7 @@ class Schema(): prange = {pred.range for pred in predicates} nodes |= {vert for vert in prange if isinstance(vert, types.Node)} literals |= {vert for vert in prange if isinstance(vert, types.Literal)} - # NOTE: ROOT_PREDICATE has a _Vertex as range which is neither in nodes nor literals + # NOTE: ROOT_PREDICATE has a Vertex as range which is neither in nodes nor literals # FIXME: with the ROOT_VERTEX missing, the schema is not complete anymore! # include parents in nodes and literals sets diff --git a/bsfs/schema/serialize.py b/bsfs/schema/serialize.py index 1222aa6..c1ac9a9 100644 --- a/bsfs/schema/serialize.py +++ b/bsfs/schema/serialize.py @@ -125,10 +125,10 @@ def from_string(schema_str: str) -> schema.Schema: # get distance distance = _fetch_value(uri, rdflib.URIRef(ns.bsfs.distance), URI) # return feature - return parent.get_child(URI(uri), domain=dom, range=rng, unique=unique, + return parent.child(URI(uri), domain=dom, range=rng, unique=unique, dtype=dtype, dimension=dimension, distance=distance, **annotations) # handle non-feature predicate - return parent.get_child(URI(uri), domain=dom, range=rng, unique=unique, **annotations) + return parent.child(URI(uri), domain=dom, range=rng, unique=unique, **annotations) predicates = _fetch_hierarchically(_build_predicate, types.ROOT_PREDICATE) return schema.Schema(predicates, nodes, literals) diff --git a/bsfs/schema/types.py b/bsfs/schema/types.py index e737263..4f49efe 100644 --- a/bsfs/schema/types.py +++ b/bsfs/schema/types.py @@ -114,7 +114,7 @@ class _Type(): yield curr curr = curr.parent - def get_child( + def child( self, uri: URI, **kwargs, @@ -201,21 +201,21 @@ class _Type(): return False -class _Vertex(_Type): +class Vertex(_Type): """Graph vertex types. Can be a Node or a Literal.""" - parent: typing.Optional['_Vertex'] - def __init__(self, uri: URI, parent: typing.Optional['_Vertex'], **kwargs): + parent: typing.Optional['Vertex'] + def __init__(self, uri: URI, parent: typing.Optional['Vertex'], **kwargs): super().__init__(uri, parent, **kwargs) -class Node(_Vertex): +class Node(Vertex): """Node type.""" parent: typing.Optional['Node'] def __init__(self, uri: URI, parent: typing.Optional['Node'], **kwargs): super().__init__(uri, parent, **kwargs) -class Literal(_Vertex): +class Literal(Vertex): """Literal type.""" parent: typing.Optional['Literal'] def __init__(self, uri: URI, parent: typing.Optional['Literal'] ,**kwargs): @@ -229,7 +229,7 @@ class Predicate(_Type): domain: Node # destination type. - range: _Vertex + range: Vertex # maximum cardinality of type. unique: bool @@ -241,7 +241,7 @@ class Predicate(_Type): parent: '_PredicateBase', # Predicate members domain: Node, - range: _Vertex, # pylint: disable=redefined-builtin + range: Vertex, # pylint: disable=redefined-builtin unique: bool, **kwargs, ): @@ -265,11 +265,11 @@ class Predicate(_Type): and self.range == other.range \ and self.unique == other.unique - def get_child( + def child( self, uri: URI, domain: typing.Optional[Node] = None, - range: typing.Optional[_Vertex] = None, # pylint: disable=redefined-builtin + range: typing.Optional[Vertex] = None, # pylint: disable=redefined-builtin unique: typing.Optional[bool] = None, **kwargs, ): @@ -287,7 +287,7 @@ class Predicate(_Type): raise errors.ConsistencyError(f'{range} must be a subclass of {self.range}') if unique is None: unique = self.unique - return super().get_child( + return super().child( uri=uri, domain=domain, range=range, @@ -337,7 +337,7 @@ class Feature(Predicate): and self.dtype == other.dtype \ and self.distance == other.distance - def get_child( + def child( self, uri: URI, domain: typing.Optional[Node] = None, @@ -355,7 +355,7 @@ class Feature(Predicate): dtype = self.dtype if distance is None: distance = self.distance - return super().get_child( + return super().child( uri=uri, domain=domain, range=range, @@ -368,7 +368,7 @@ class Feature(Predicate): # essential vertices -ROOT_VERTEX = _Vertex( +ROOT_VERTEX = Vertex( uri=ns.bsfs.Vertex, parent=None, ) diff --git a/test/graph/test_resolve.py b/test/graph/test_resolve.py index 5bc99e4..f515320 100644 --- a/test/graph/test_resolve.py +++ b/test/graph/test_resolve.py @@ -65,7 +65,7 @@ class TestFilter(unittest.TestCase): {URI('http://example.com/me/entity#1234'), URI('http://example.com/me/entity#4321')}) tags = graph.nodes(ns.bsfs.Tag, {URI('http://example.com/me/tag#1234'), URI('http://example.com/me/tag#4321')}) - invalid = nodes.Nodes(None, '', schema.node(ns.bsfs.Node).get_child(ns.bsfs.Invalid), + invalid = nodes.Nodes(None, '', schema.node(ns.bsfs.Node).child(ns.bsfs.Invalid), {'http://example.com/you/invalid#1234', 'http://example.com/you/invalid#4321'}) resolver = Filter(schema) diff --git a/test/query/test_validator.py b/test/query/test_validator.py index 4f8364a..bf3ceeb 100644 --- a/test/query/test_validator.py +++ b/test/query/test_validator.py @@ -69,8 +69,8 @@ class TestFilter(unittest.TestCase): self.assertRaises(TypeError, self.validate, '1234', None) self.assertRaises(TypeError, self.validate, self.schema.literal(ns.bsfs.URI), None) # root_type must exist in the schema - self.assertRaises(errors.ConsistencyError, self.validate, self.schema.node(ns.bsfs.Node).get_child(ns.bsfs.Image), None) - self.assertRaises(errors.ConsistencyError, self.validate, self.schema.node(ns.bsfs.Entity).get_child(ns.bsfs.Image), None) + self.assertRaises(errors.ConsistencyError, self.validate, self.schema.node(ns.bsfs.Node).child(ns.bsfs.Image), None) + self.assertRaises(errors.ConsistencyError, self.validate, self.schema.node(ns.bsfs.Entity).child(ns.bsfs.Image), None) # valid query returns true self.assertTrue(self.validate(self.schema.node(ns.bsfs.Entity), ast.filter.Any(ast.filter.OneOf(ns.bse.tag, ns.bse.buddy), @@ -130,7 +130,7 @@ class TestFilter(unittest.TestCase): # type must be a node self.assertRaises(errors.ConsistencyError, self.validate._branch, self.schema.literal(ns.bsfs.Literal), None) # type must be in the schema - self.assertRaises(errors.ConsistencyError, self.validate._branch, self.schema.node(ns.bsfs.Node).get_child(ns.bsfs.Invalid), None) + self.assertRaises(errors.ConsistencyError, self.validate._branch, self.schema.node(ns.bsfs.Node).child(ns.bsfs.Invalid), None) # predicate is verified self.assertRaises(errors.ConsistencyError, self.validate._branch, self.schema.node(ns.bsfs.Entity), ast.filter.Any(ns.bsfs.Invalid, ast.filter.Is('http://example.com/entity#1234'))) @@ -187,7 +187,7 @@ class TestFilter(unittest.TestCase): self.assertRaises(errors.ConsistencyError, self.validate._has, self.schema.literal(ns.bsfs.Literal), ast.filter.Has(ns.bse.tag)) # type must be in the schema - self.assertRaises(errors.ConsistencyError, self.validate._has, self.schema.node(ns.bsfs.Node).get_child(ns.bsfs.Invalid), + self.assertRaises(errors.ConsistencyError, self.validate._has, self.schema.node(ns.bsfs.Node).child(ns.bsfs.Invalid), ast.filter.Has(ns.bse.tag)) # has checks predicate self.assertRaises(errors.ConsistencyError, self.validate._has, self.schema.node(ns.bsfs.Entity), @@ -206,7 +206,7 @@ class TestFilter(unittest.TestCase): self.assertRaises(errors.ConsistencyError, self.validate._is, self.schema.literal(ns.bsfs.Literal), ast.filter.Is('http://example.com/foo')) # type must be in the schema - self.assertRaises(errors.ConsistencyError, self.validate._is, self.schema.node(ns.bsfs.Node).get_child(ns.bsfs.Invalid), + self.assertRaises(errors.ConsistencyError, self.validate._is, self.schema.node(ns.bsfs.Node).child(ns.bsfs.Invalid), ast.filter.Is('http://example.com/foo')) # is accepts correct expressions self.assertIsNone(self.validate._is(self.schema.node(ns.bsfs.Entity), ast.filter.Is('http://example.com/entity#1234'))) @@ -222,13 +222,13 @@ class TestFilter(unittest.TestCase): self.assertRaises(errors.ConsistencyError, self.validate._value, self.schema.node(ns.bsfs.Node), ast.filter.EndsWith('hello world')) # type must be in the schema - self.assertRaises(errors.ConsistencyError, self.validate._value, self.schema.literal(ns.bsfs.Literal).get_child(ns.bsfs.Invalid), + self.assertRaises(errors.ConsistencyError, self.validate._value, self.schema.literal(ns.bsfs.Literal).child(ns.bsfs.Invalid), ast.filter.Equals('hello world')) - self.assertRaises(errors.ConsistencyError, self.validate._value, self.schema.literal(ns.bsfs.Literal).get_child(ns.bsfs.Invalid), + self.assertRaises(errors.ConsistencyError, self.validate._value, self.schema.literal(ns.bsfs.Literal).child(ns.bsfs.Invalid), ast.filter.Substring('hello world')) - self.assertRaises(errors.ConsistencyError, self.validate._value, self.schema.literal(ns.bsfs.Literal).get_child(ns.bsfs.Invalid), + self.assertRaises(errors.ConsistencyError, self.validate._value, self.schema.literal(ns.bsfs.Literal).child(ns.bsfs.Invalid), ast.filter.StartsWith('hello world')) - self.assertRaises(errors.ConsistencyError, self.validate._value, self.schema.literal(ns.bsfs.Literal).get_child(ns.bsfs.Invalid), + self.assertRaises(errors.ConsistencyError, self.validate._value, self.schema.literal(ns.bsfs.Literal).child(ns.bsfs.Invalid), ast.filter.EndsWith('hello world')) # value accepts correct expressions self.assertIsNone(self.validate._value(self.schema.literal(ns.xsd.string), ast.filter.Equals('hello world'))) @@ -243,9 +243,9 @@ class TestFilter(unittest.TestCase): self.assertRaises(errors.ConsistencyError, self.validate._bounded, self.schema.node(ns.bsfs.Node), ast.filter.LessThan(0)) # type must be in the schema - self.assertRaises(errors.ConsistencyError, self.validate._bounded, self.schema.literal(ns.bsfs.Literal).get_child(ns.bsfs.Invalid), + self.assertRaises(errors.ConsistencyError, self.validate._bounded, self.schema.literal(ns.bsfs.Literal).child(ns.bsfs.Invalid), ast.filter.GreaterThan(0)) - self.assertRaises(errors.ConsistencyError, self.validate._bounded, self.schema.literal(ns.bsfs.Literal).get_child(ns.bsfs.Invalid), + self.assertRaises(errors.ConsistencyError, self.validate._bounded, self.schema.literal(ns.bsfs.Literal).child(ns.bsfs.Invalid), ast.filter.LessThan(0)) # bounded accepts correct expressions self.assertIsNone(self.validate._bounded(self.schema.literal(ns.xsd.integer), ast.filter.LessThan(0))) diff --git a/test/schema/test_schema.py b/test/schema/test_schema.py index 1b45db0..ca21f87 100644 --- a/test/schema/test_schema.py +++ b/test/schema/test_schema.py @@ -73,9 +73,9 @@ class TestSchema(unittest.TestCase): # predicates self.p_root = types.ROOT_PREDICATE self.f_root = types.ROOT_FEATURE - self.p_tag = self.p_root.get_child(ns.bse.tag, self.n_ent, self.n_tag, False) - self.p_group = self.p_tag.get_child(ns.bse.group, self.n_img, self.n_tag, False) - self.p_comment = self.p_root.get_child(ns.bse.comment, self.n_root, self.l_string, True) + self.p_tag = self.p_root.child(ns.bse.tag, self.n_ent, self.n_tag, False) + self.p_group = self.p_tag.child(ns.bse.group, self.n_img, self.n_tag, False) + self.p_comment = self.p_root.child(ns.bse.comment, self.n_root, self.l_string, True) self.predicates = [self.p_root, self.f_root, self.p_tag, self.p_group, self.p_comment] def test_construction(self): @@ -217,16 +217,16 @@ class TestSchema(unittest.TestCase): self.assertNotEqual(hash(schema), hash(Schema([self.p_group, self.p_tag, self.p_root], self.nodes, self.literals))) self.assertNotEqual(schema, - Schema(self.predicates + [self.p_root.get_child(ns.bse.filesize, self.n_ent, self.l_integer)], self.nodes, self.literals)) + Schema(self.predicates + [self.p_root.child(ns.bse.filesize, self.n_ent, self.l_integer)], self.nodes, self.literals)) self.assertNotEqual(hash(schema), - hash(Schema(self.predicates + [self.p_root.get_child(ns.bse.filesize, self.n_ent, self.l_integer)], self.nodes, self.literals))) + hash(Schema(self.predicates + [self.p_root.child(ns.bse.filesize, self.n_ent, self.l_integer)], self.nodes, self.literals))) def test_order(self): # setup class Foo(): pass - p_foo = self.p_root.get_child(ns.bse.foo, self.n_ent, self.l_string, True) - p_sub = p_foo.get_child(ns.bse.sub, self.n_ent, self.l_string, True) - p_bar = self.p_root.get_child(ns.bse.bar, self.n_ent, self.l_string, True) + p_foo = self.p_root.child(ns.bse.foo, self.n_ent, self.l_string, True) + p_sub = p_foo.child(ns.bse.sub, self.n_ent, self.l_string, True) + p_bar = self.p_root.child(ns.bse.bar, self.n_ent, self.l_string, True) # can only compare schema to other schema # < @@ -305,44 +305,44 @@ class TestSchema(unittest.TestCase): # inconsistent schema cannot be a subset self.assertFalse(operator.le(Schema({p_foo}), Schema({ - self.p_root.get_child(ns.bse.foo, self.n_ent, self.l_integer, True)}))) # inconsistent w.r.t. literal + self.p_root.child(ns.bse.foo, self.n_ent, self.l_integer, True)}))) # inconsistent w.r.t. literal self.assertFalse(operator.le(Schema({p_foo}), Schema({ - self.p_root.get_child(ns.bse.foo, self.n_img, self.l_string, True)}))) # inconsistent w.r.t. node + self.p_root.child(ns.bse.foo, self.n_img, self.l_string, True)}))) # inconsistent w.r.t. node self.assertFalse(operator.le(Schema({p_foo}), Schema({ - self.p_root.get_child(ns.bse.foo, self.n_ent, self.l_string, False)}))) # inconsistent w.r.t. unique + self.p_root.child(ns.bse.foo, self.n_ent, self.l_string, False)}))) # inconsistent w.r.t. unique self.assertFalse(operator.le(Schema({}, {self.n_img}), Schema({}, { types.Node(ns.bsfs.Image, types.Node(ns.bsfs.Node, None))}))) self.assertFalse(operator.le(Schema({}, {}, {self.l_integer}), Schema({}, {}, { types.Literal(ns.xsd.integer, types.Literal(ns.xsd.number, types.Literal(ns.bsfs.Literal, None)))}))) # inconsistent schema cannot be a true subset self.assertFalse(operator.lt(Schema({p_foo}), Schema({ - self.p_root.get_child(ns.bse.foo, self.n_ent, self.l_integer, True)}))) # inconsistent w.r.t. literal + self.p_root.child(ns.bse.foo, self.n_ent, self.l_integer, True)}))) # inconsistent w.r.t. literal self.assertFalse(operator.lt(Schema({p_foo}), Schema({ - self.p_root.get_child(ns.bse.foo, self.n_img, self.l_string, True)}))) # inconsistent w.r.t. node + self.p_root.child(ns.bse.foo, self.n_img, self.l_string, True)}))) # inconsistent w.r.t. node self.assertFalse(operator.lt(Schema({p_foo}), Schema({ - self.p_root.get_child(ns.bse.foo, self.n_ent, self.l_string, False)}))) # inconsistent w.r.t. unique + self.p_root.child(ns.bse.foo, self.n_ent, self.l_string, False)}))) # inconsistent w.r.t. unique self.assertFalse(operator.lt(Schema({}, {self.n_img}), Schema({}, { types.Node(ns.bsfs.Image, types.Node(ns.bsfs.Node, None))}))) self.assertFalse(operator.lt(Schema({}, {}, {self.l_integer}), Schema({}, {}, { types.Literal(ns.xsd.integer, types.Literal(ns.xsd.number, types.Literal(ns.bsfs.Literal, None)))}))) # inconsistent schema cannot be a superset self.assertFalse(operator.ge(Schema({p_foo}), Schema({ - self.p_root.get_child(ns.bse.foo, self.n_ent, self.l_integer, True)}))) # inconsistent w.r.t. literal + self.p_root.child(ns.bse.foo, self.n_ent, self.l_integer, True)}))) # inconsistent w.r.t. literal self.assertFalse(operator.ge(Schema({p_foo}), Schema({ - self.p_root.get_child(ns.bse.foo, self.n_img, self.l_string, True)}))) # inconsistent w.r.t. node + self.p_root.child(ns.bse.foo, self.n_img, self.l_string, True)}))) # inconsistent w.r.t. node self.assertFalse(operator.ge(Schema({p_foo}), Schema({ - self.p_root.get_child(ns.bse.foo, self.n_ent, self.l_string, False)}))) # inconsistent w.r.t. unique + self.p_root.child(ns.bse.foo, self.n_ent, self.l_string, False)}))) # inconsistent w.r.t. unique self.assertFalse(operator.ge(Schema({}, {self.n_img}), Schema({}, { types.Node(ns.bsfs.Image, types.Node(ns.bsfs.Node, None))}))) self.assertFalse(operator.ge(Schema({}, {}, {self.l_integer}), Schema({}, {}, { types.Literal(ns.xsd.integer, types.Literal(ns.xsd.number, types.Literal(ns.bsfs.Literal, None)))}))) # inconsistent schema cannot be a true superset self.assertFalse(operator.gt(Schema({p_foo}), Schema({ - self.p_root.get_child(ns.bse.foo, self.n_ent, self.l_integer, True)}))) # inconsistent w.r.t. literal + self.p_root.child(ns.bse.foo, self.n_ent, self.l_integer, True)}))) # inconsistent w.r.t. literal self.assertFalse(operator.gt(Schema({p_foo}), Schema({ - self.p_root.get_child(ns.bse.foo, self.n_img, self.l_string, True)}))) # inconsistent w.r.t. node + self.p_root.child(ns.bse.foo, self.n_img, self.l_string, True)}))) # inconsistent w.r.t. node self.assertFalse(operator.gt(Schema({p_foo}), Schema({ - self.p_root.get_child(ns.bse.foo, self.n_ent, self.l_string, False)}))) # inconsistent w.r.t. unique + self.p_root.child(ns.bse.foo, self.n_ent, self.l_string, False)}))) # inconsistent w.r.t. unique self.assertFalse(operator.gt(Schema({}, {self.n_img}), Schema({}, { types.Node(ns.bsfs.Image, types.Node(ns.bsfs.Node, None))}))) self.assertFalse(operator.gt(Schema({}, {}, {self.l_integer}), Schema({}, {}, { diff --git a/test/schema/test_serialize.py b/test/schema/test_serialize.py index 7392cc0..b9d8599 100644 --- a/test/schema/test_serialize.py +++ b/test/schema/test_serialize.py @@ -66,7 +66,7 @@ class TestFromString(unittest.TestCase): ''') # additional nodes can be defined - n_unused = types.ROOT_NODE.get_child(ns.bsfs.unused) + n_unused = types.ROOT_NODE.child(ns.bsfs.unused) self.assertEqual(Schema({}, {n_unused}), from_string(''' prefix rdfs: prefix xsd: @@ -77,10 +77,10 @@ class TestFromString(unittest.TestCase): ''')) # a node can have multiple children - n_ent = types.ROOT_NODE.get_child(ns.bsfs.Entity) - n_tag = types.ROOT_NODE.get_child(ns.bsfs.Tag) - n_doc = n_ent.get_child(ns.bsfs.Document) - n_image = n_ent.get_child(ns.bsfs.Image) + n_ent = types.ROOT_NODE.child(ns.bsfs.Entity) + n_tag = types.ROOT_NODE.child(ns.bsfs.Tag) + n_doc = n_ent.child(ns.bsfs.Document) + n_image = n_ent.child(ns.bsfs.Image) self.assertEqual(Schema({}, {n_ent, n_tag, n_doc, n_image}), from_string(''' prefix rdfs: prefix xsd: @@ -97,9 +97,9 @@ class TestFromString(unittest.TestCase): ''')) # additional nodes can be defined and used - n_ent = types.ROOT_NODE.get_child(ns.bsfs.Entity) - l_string = types.ROOT_LITERAL.get_child(ns.xsd.string) - p_filename = types.ROOT_PREDICATE.get_child(ns.bse.filename, + n_ent = types.ROOT_NODE.child(ns.bsfs.Entity) + l_string = types.ROOT_LITERAL.child(ns.xsd.string) + p_filename = types.ROOT_PREDICATE.child(ns.bse.filename, n_ent, l_string, False) self.assertEqual(Schema({p_filename}), from_string(''' prefix rdfs: @@ -168,7 +168,7 @@ class TestFromString(unittest.TestCase): ''') # additional literals can be defined - l_unused = types.ROOT_LITERAL.get_child(ns.xsd.unused) + l_unused = types.ROOT_LITERAL.child(ns.xsd.unused) self.assertEqual(Schema({}, {}, {l_unused}), from_string(''' prefix rdfs: prefix xsd: @@ -179,10 +179,10 @@ class TestFromString(unittest.TestCase): ''')) # a literal can have multiple children - l_string = types.ROOT_LITERAL.get_child(ns.xsd.string) - l_integer = types.ROOT_LITERAL.get_child(ns.xsd.integer) - l_unsigned = l_integer.get_child(ns.xsd.unsigned) - l_signed = l_integer.get_child(ns.xsd.signed) + l_string = types.ROOT_LITERAL.child(ns.xsd.string) + l_integer = types.ROOT_LITERAL.child(ns.xsd.integer) + l_unsigned = l_integer.child(ns.xsd.unsigned) + l_signed = l_integer.child(ns.xsd.signed) self.assertEqual(Schema({}, {}, {l_string, l_integer, l_unsigned, l_signed}), from_string(''' prefix rdfs: prefix xsd: @@ -199,9 +199,9 @@ class TestFromString(unittest.TestCase): ''')) # additional literals can be defined and used - n_ent = types.ROOT_NODE.get_child(ns.bsfs.Entity) - l_string = types.ROOT_LITERAL.get_child(ns.xsd.string) - p_filename = types.ROOT_PREDICATE.get_child(ns.bse.filename, + n_ent = types.ROOT_NODE.child(ns.bsfs.Entity) + l_string = types.ROOT_LITERAL.child(ns.xsd.string) + p_filename = types.ROOT_PREDICATE.child(ns.bse.filename, n_ent, l_string, False) self.assertEqual(Schema({p_filename}), from_string(''' prefix rdfs: @@ -317,9 +317,9 @@ class TestFromString(unittest.TestCase): ''') # additional predicates can be defined - n_ent = types.ROOT_NODE.get_child(ns.bsfs.Entity) - l_string = types.ROOT_LITERAL.get_child(ns.xsd.string) - p_comment = types.ROOT_PREDICATE.get_child(ns.bse.comment, domain=n_ent, range=l_string, unique=False) + n_ent = types.ROOT_NODE.child(ns.bsfs.Entity) + l_string = types.ROOT_LITERAL.child(ns.xsd.string) + p_comment = types.ROOT_PREDICATE.child(ns.bse.comment, domain=n_ent, range=l_string, unique=False) self.assertEqual(Schema({p_comment}), from_string(''' prefix rdfs: prefix xsd: @@ -336,10 +336,10 @@ class TestFromString(unittest.TestCase): ''')) # predicates inherit properties from parents - n_ent = types.ROOT_NODE.get_child(ns.bsfs.Entity) - l_string = types.ROOT_LITERAL.get_child(ns.xsd.string) - p_annotation = types.ROOT_PREDICATE.get_child(ns.bsfs.Annotation, domain=n_ent, range=l_string) - p_comment = p_annotation.get_child(ns.bse.comment, unique=True) + n_ent = types.ROOT_NODE.child(ns.bsfs.Entity) + l_string = types.ROOT_LITERAL.child(ns.xsd.string) + p_annotation = types.ROOT_PREDICATE.child(ns.bsfs.Annotation, domain=n_ent, range=l_string) + p_comment = p_annotation.child(ns.bse.comment, unique=True) self.assertEqual(Schema({p_comment}), from_string(''' prefix rdfs: prefix xsd: @@ -358,10 +358,10 @@ class TestFromString(unittest.TestCase): ''')) # we can define partial predicates (w/o specifying a usable range) - n_ent = types.ROOT_NODE.get_child(ns.bsfs.Entity) - l_string = types.ROOT_LITERAL.get_child(ns.xsd.string) - p_annotation = types.ROOT_PREDICATE.get_child(ns.bsfs.Annotation, domain=n_ent) - p_comment = p_annotation.get_child(ns.bse.comment, range=l_string, unique=False) + n_ent = types.ROOT_NODE.child(ns.bsfs.Entity) + l_string = types.ROOT_LITERAL.child(ns.xsd.string) + p_annotation = types.ROOT_PREDICATE.child(ns.bsfs.Annotation, domain=n_ent) + p_comment = p_annotation.child(ns.bse.comment, range=l_string, unique=False) self.assertEqual(Schema({p_comment}), from_string(''' prefix rdfs: prefix xsd: @@ -381,8 +381,8 @@ class TestFromString(unittest.TestCase): # predicate definition can be split across multiple statements. # statements can be repeated - n_ent = types.ROOT_NODE.get_child(ns.bsfs.Entity) - p_foo = types.ROOT_PREDICATE.get_child(ns.bse.foo, domain=n_ent, range=types.ROOT_NODE, unique=True) + n_ent = types.ROOT_NODE.child(ns.bsfs.Entity) + p_foo = types.ROOT_PREDICATE.child(ns.bse.foo, domain=n_ent, range=types.ROOT_NODE, unique=True) self.assertEqual(Schema({p_foo}), from_string(''' prefix rdfs: prefix xsd: @@ -400,11 +400,11 @@ class TestFromString(unittest.TestCase): ''')) # domain must be a subtype of parent's domain - n_ent = types.ROOT_NODE.get_child(ns.bsfs.Entity) - n_image = n_ent.get_child(ns.bsfs.Image) - p_foo = types.ROOT_PREDICATE.get_child(ns.bse.foo, domain=types.ROOT_NODE) - p_bar = p_foo.get_child(ns.bse.bar, domain=n_ent) - p_foobar = p_bar.get_child(ns.bse.foobar, domain=n_image) + n_ent = types.ROOT_NODE.child(ns.bsfs.Entity) + n_image = n_ent.child(ns.bsfs.Image) + p_foo = types.ROOT_PREDICATE.child(ns.bse.foo, domain=types.ROOT_NODE) + p_bar = p_foo.child(ns.bse.bar, domain=n_ent) + p_foobar = p_bar.child(ns.bse.foobar, domain=n_image) self.assertEqual(Schema({p_foobar}), from_string(''' prefix rdfs: prefix xsd: @@ -439,11 +439,11 @@ class TestFromString(unittest.TestCase): ''') # range must be a subtype of parent's range - n_ent = types.ROOT_NODE.get_child(ns.bsfs.Entity) - n_image = n_ent.get_child(ns.bsfs.Image) - p_foo = types.ROOT_PREDICATE.get_child(ns.bse.foo, range=types.ROOT_NODE) - p_bar = p_foo.get_child(ns.bse.bar, range=n_ent) - p_foobar = p_bar.get_child(ns.bse.foobar, range=n_image) + n_ent = types.ROOT_NODE.child(ns.bsfs.Entity) + n_image = n_ent.child(ns.bsfs.Image) + p_foo = types.ROOT_PREDICATE.child(ns.bse.foo, range=types.ROOT_NODE) + p_bar = p_foo.child(ns.bse.bar, range=n_ent) + p_foobar = p_bar.child(ns.bse.foobar, range=n_image) self.assertEqual(Schema({p_foobar}), from_string(''' prefix rdfs: prefix xsd: @@ -658,9 +658,9 @@ class TestFromString(unittest.TestCase): ''') # additional predicates can be defined - n_ent = types.ROOT_NODE.get_child(ns.bsfs.Entity) - l_array = types.ROOT_LITERAL.get_child(ns.bsfs.array) - p_comment = types.ROOT_FEATURE.get_child(ns.bse.colors, domain=n_ent, range=l_array, unique=False) + n_ent = types.ROOT_NODE.child(ns.bsfs.Entity) + l_array = types.ROOT_LITERAL.child(ns.bsfs.array) + p_comment = types.ROOT_FEATURE.child(ns.bse.colors, domain=n_ent, range=l_array, unique=False) self.assertEqual(Schema({p_comment}), from_string(''' prefix rdfs: prefix xsd: @@ -678,12 +678,12 @@ class TestFromString(unittest.TestCase): ''')) # features inherit properties from parents - n_ent = types.ROOT_NODE.get_child(ns.bsfs.Entity) - l_array = types.ROOT_LITERAL.get_child(ns.bsfs.array) - l_string = types.ROOT_LITERAL.get_child(ns.xsd.string) - p_annotation = types.ROOT_FEATURE.get_child(ns.bsfs.Annotation, domain=n_ent, range=l_array, + n_ent = types.ROOT_NODE.child(ns.bsfs.Entity) + l_array = types.ROOT_LITERAL.child(ns.bsfs.array) + l_string = types.ROOT_LITERAL.child(ns.xsd.string) + p_annotation = types.ROOT_FEATURE.child(ns.bsfs.Annotation, domain=n_ent, range=l_array, dimension=1234, dtype=ns.xsd.string) - p_comment = p_annotation.get_child(ns.bse.colors, unique=True) + p_comment = p_annotation.child(ns.bse.colors, unique=True) self.assertEqual(Schema({p_comment}), from_string(''' prefix rdfs: prefix xsd: @@ -706,8 +706,8 @@ class TestFromString(unittest.TestCase): # feature definition can be split across multiple statements. # statements can be repeated - n_ent = types.ROOT_NODE.get_child(ns.bsfs.Entity) - p_foo = types.ROOT_FEATURE.get_child(ns.bse.foo, domain=n_ent, unique=True, + n_ent = types.ROOT_NODE.child(ns.bsfs.Entity) + p_foo = types.ROOT_FEATURE.child(ns.bse.foo, domain=n_ent, unique=True, dimension=1234, dtype=ns.bsfs.f32) self.assertEqual(Schema({p_foo}), from_string(''' prefix rdfs: @@ -887,24 +887,24 @@ class TestFromString(unittest.TestCase): def test_integration(self): # nodes - n_ent = types.ROOT_NODE.get_child(ns.bsfs.Entity) - n_tag = types.ROOT_NODE.get_child(ns.bsfs.Tag) - n_image = n_ent.get_child(ns.bsfs.Image) + n_ent = types.ROOT_NODE.child(ns.bsfs.Entity) + n_tag = types.ROOT_NODE.child(ns.bsfs.Tag) + n_image = n_ent.child(ns.bsfs.Image) # literals - l_string = types.ROOT_LITERAL.get_child(ns.xsd.string) - l_array = types.ROOT_LITERAL.get_child(ns.bsfs.array) - l_integer = types.ROOT_LITERAL.get_child(ns.xsd.integer) - l_boolean = types.ROOT_LITERAL.get_child(ns.xsd.boolean) + l_string = types.ROOT_LITERAL.child(ns.xsd.string) + l_array = types.ROOT_LITERAL.child(ns.bsfs.array) + l_integer = types.ROOT_LITERAL.child(ns.xsd.integer) + l_boolean = types.ROOT_LITERAL.child(ns.xsd.boolean) # predicates - p_annotation = types.ROOT_PREDICATE.get_child(ns.bsfs.Annotation) - p_tag = types.ROOT_PREDICATE.get_child(ns.bse.tag, domain=n_ent, range=n_tag) - p_group = p_tag.get_child(ns.bse.group, domain=n_image, unique=True) - p_comment = p_annotation.get_child(ns.bse.comment, range=l_string) + p_annotation = types.ROOT_PREDICATE.child(ns.bsfs.Annotation) + p_tag = types.ROOT_PREDICATE.child(ns.bse.tag, domain=n_ent, range=n_tag) + p_group = p_tag.child(ns.bse.group, domain=n_image, unique=True) + p_comment = p_annotation.child(ns.bse.comment, range=l_string) # features - f_colors = types.ROOT_FEATURE.get_child(URI('http://bsfs.ai/schema/Feature/colors_spatial'), + f_colors = types.ROOT_FEATURE.child(URI('http://bsfs.ai/schema/Feature/colors_spatial'), domain=n_ent, range=l_array, unique=True, dtype=ns.bsfs.f16, distance=ns.bsfs.euclidean) - f_colors1234 = f_colors.get_child(URI('http://bsfs.ai/schema/Feature/colors_spatial#1234'), dimension=1024) - f_colors4321 = f_colors.get_child(URI('http://bsfs.ai/schema/Feature/colors_spatial#4321'), dimension=2048) + f_colors1234 = f_colors.child(URI('http://bsfs.ai/schema/Feature/colors_spatial#1234'), dimension=1024) + f_colors4321 = f_colors.child(URI('http://bsfs.ai/schema/Feature/colors_spatial#4321'), dimension=2048) # schema ref = Schema( {p_annotation, p_tag, p_group, p_comment, f_colors, f_colors1234, f_colors4321}, diff --git a/test/schema/test_types.py b/test/schema/test_types.py index af47f0d..26da270 100644 --- a/test/schema/test_types.py +++ b/test/schema/test_types.py @@ -14,7 +14,7 @@ from bsfs.schema.types import ROOT_PREDICATE, ROOT_VERTEX, ROOT_FEATURE from bsfs.utils import errors # objects to test -from bsfs.schema.types import _Type, _Vertex, Node, Literal, Predicate, Feature +from bsfs.schema.types import _Type, Vertex, Node, Literal, Predicate, Feature ## code ## @@ -47,8 +47,8 @@ class TestType(unittest.TestCase): self.assertEqual( hash(_Type('Foo', None, foo='bar', bar='foo')), hash(_Type('Foo', None, hello='world', foobar=1234))) - # annotations can be passed to get_child - self.assertDictEqual(_Type('First', foo='bar').get_child('Second', bar='foo').annotations, { + # annotations can be passed to child + self.assertDictEqual(_Type('First', foo='bar').child('Second', bar='foo').annotations, { 'bar': 'foo'}) def test_string_conversion(self): @@ -71,16 +71,16 @@ class TestType(unittest.TestCase): self.assertEqual(str(_Type('Foo', SubType('Bar'))), '_Type(Foo)') self.assertEqual(repr(_Type('Foo', SubType('Bar'))), '_Type(Foo, SubType(Bar, None))') - def test_get_child(self): + def test_child(self): # callee is used as parent - self.assertEqual(_Type('First').get_child('Second'), _Type('Second', _Type('First'))) + self.assertEqual(_Type('First').child('Second'), _Type('Second', _Type('First'))) # works with multiple parents - self.assertEqual(_Type('First').get_child('Second').get_child('Third'), _Type('Third', _Type('Second', _Type('First')))) + self.assertEqual(_Type('First').child('Second').child('Third'), _Type('Third', _Type('Second', _Type('First')))) # type persists class Foo(_Type): pass - self.assertEqual(Foo('First').get_child('Second'), Foo('Second', Foo('First'))) + self.assertEqual(Foo('First').child('Second'), Foo('Second', Foo('First'))) # annotations are respected - self.assertDictEqual(_Type('First', foo='bar').get_child('Second', bar='foo').annotations, { + self.assertDictEqual(_Type('First', foo='bar').child('Second', bar='foo').annotations, { 'bar': 'foo'}) def test_equality(self): @@ -174,7 +174,7 @@ class TestPredicate(unittest.TestCase): # range must be a Literal, a Node, or the root Vertex self.assertRaises(TypeError, Predicate, ns.bse.foo, None, Node(ns.bsfs.Node, None), None, True) self.assertRaises(TypeError, Predicate, ns.bse.foo, None, Node(ns.bsfs.Node, None), 1234, True) - self.assertRaises(TypeError, Predicate, ns.bse.foo, None, Node(ns.bsfs.Node, None), _Vertex(ns.bsfs.Foo, None), True) + self.assertRaises(TypeError, Predicate, ns.bse.foo, None, Node(ns.bsfs.Node, None), Vertex(ns.bsfs.Foo, None), True) self.assertRaises(TypeError, Predicate, ns.bse.foo, None, Node(ns.bsfs.Node, None), _Type(ns.bsfs.Foo, None), True) class Foo(): pass self.assertRaises(TypeError, Predicate, ns.bse.foo, None, Node(ns.bsfs.Node, None), Foo(), True) @@ -213,7 +213,7 @@ class TestPredicate(unittest.TestCase): self.assertNotEqual(tag, Predicate(ns.bse.tag, root, n_root, n_tag, True)) self.assertNotEqual(hash(tag), hash(Predicate(ns.bse.tag, root, n_root, n_tag, True))) - def test_get_child(self): + def test_child(self): n_root = Node(ns.bsfs.Node, None) l_root = Literal(ns.bsfs.Literal, None) n_ent = Node(ns.bsfs.Entity, Node(ns.bsfs.Node, None)) @@ -227,45 +227,45 @@ class TestPredicate(unittest.TestCase): unique=False, ) - # get_child returns Predicate - self.assertIsInstance(tag.get_child(ns.bse.foo), Predicate) + # child returns Predicate + self.assertIsInstance(tag.child(ns.bse.foo), Predicate) # uri is respected - self.assertEqual(ns.bse.foo, tag.get_child(ns.bse.foo).uri) + self.assertEqual(ns.bse.foo, tag.child(ns.bse.foo).uri) # domain is respected dom = Node(ns.bsfs.Image, n_ent) - self.assertEqual(dom, tag.get_child(ns.bse.foo, domain=dom).domain) + self.assertEqual(dom, tag.child(ns.bse.foo, domain=dom).domain) # range is respected rng = Node(ns.bsfs.Group, n_tag) - self.assertEqual(rng, tag.get_child(ns.bse.foo, range=rng).range) + self.assertEqual(rng, tag.child(ns.bse.foo, range=rng).range) # cannot set range to None - self.assertEqual(n_tag, tag.get_child(ns.bse.foo, range=None).range) + self.assertEqual(n_tag, tag.child(ns.bse.foo, range=None).range) # unique is respected - self.assertTrue(tag.get_child(ns.bse.foo, unique=True).unique) + self.assertTrue(tag.child(ns.bse.foo, unique=True).unique) # annotations are respected - self.assertDictEqual(tag.get_child(ns.bse.foo, foo='bar', bar=123).annotations, { + self.assertDictEqual(tag.child(ns.bse.foo, foo='bar', bar=123).annotations, { 'foo': 'bar', 'bar': 123, }) # domain is inherited from parent - self.assertEqual(n_root, root.get_child(ns.bse.foo).domain) - self.assertEqual(n_ent, tag.get_child(ns.bse.foo).domain) + self.assertEqual(n_root, root.child(ns.bse.foo).domain) + self.assertEqual(n_ent, tag.child(ns.bse.foo).domain) # range is inherited from parent - self.assertEqual(ROOT_VERTEX, root.get_child(ns.bse.foo).range) - self.assertEqual(n_tag, tag.get_child(ns.bse.foo).range) + self.assertEqual(ROOT_VERTEX, root.child(ns.bse.foo).range) + self.assertEqual(n_tag, tag.child(ns.bse.foo).range) # uniqueness is inherited from parent - self.assertFalse(tag.get_child(ns.bse.foo).unique) + self.assertFalse(tag.child(ns.bse.foo).unique) # domain must be subtype of parent's domain - self.assertRaises(errors.ConsistencyError, tag.get_child, ns.bse.foo, domain=n_root) - self.assertRaises(errors.ConsistencyError, tag.get_child, ns.bse.foo, domain=Node(ns.bsfs.Image, n_root)) + self.assertRaises(errors.ConsistencyError, tag.child, ns.bse.foo, domain=n_root) + self.assertRaises(errors.ConsistencyError, tag.child, ns.bse.foo, domain=Node(ns.bsfs.Image, n_root)) # range must be subtype of parent's range - self.assertRaises(errors.ConsistencyError, tag.get_child, ns.bse.foo, range=n_root) - self.assertRaises(errors.ConsistencyError, tag.get_child, ns.bse.foo, range=Node(ns.bsfs.Image, n_root)) - self.assertRaises(TypeError, tag.get_child, ns.bse.foo, range=Literal(ns.bsfs.Tag, l_root)) + self.assertRaises(errors.ConsistencyError, tag.child, ns.bse.foo, range=n_root) + self.assertRaises(errors.ConsistencyError, tag.child, ns.bse.foo, range=Node(ns.bsfs.Image, n_root)) + self.assertRaises(TypeError, tag.child, ns.bse.foo, range=Literal(ns.bsfs.Tag, l_root)) # range can be subtyped from ROOT_VERTEX to Node or Literal - self.assertEqual(n_root, root.get_child(ns.bse.foo, range=n_root).range) - self.assertEqual(l_root, root.get_child(ns.bse.foo, range=l_root).range) + self.assertEqual(n_root, root.child(ns.bse.foo, range=n_root).range) + self.assertEqual(l_root, root.child(ns.bse.foo, range=l_root).range) class TestFeature(unittest.TestCase): @@ -308,7 +308,7 @@ class TestFeature(unittest.TestCase): self.assertNotEqual(colors, Feature(ns.bse.colors, ROOT_FEATURE, n_ent, l_array, False, 1234, ns.bsfs.float, ns.bsfs.cosine)) self.assertNotEqual(hash(colors), hash(Feature(ns.bse.colors, ROOT_FEATURE, n_ent, l_array, False, 1234, ns.bsfs.float, ns.bsfs.cosine))) - def test_get_child(self): + def test_child(self): n_root = Node(ns.bsfs.Node, None) n_ent = Node(ns.bsfs.Entity, n_root) l_root = Literal(ns.bsfs.Literal, None) @@ -324,53 +324,53 @@ class TestFeature(unittest.TestCase): distance=ns.bsfs.euclidean, ) - # get_child returns Feature - self.assertIsInstance(colors.get_child(ns.bse.foo), Feature) + # child returns Feature + self.assertIsInstance(colors.child(ns.bse.foo), Feature) # uri is respected - self.assertEqual(ns.bse.foo, colors.get_child(ns.bse.foo).uri) + self.assertEqual(ns.bse.foo, colors.child(ns.bse.foo).uri) # domain is respected dom = Node(ns.bsfs.Image, n_ent) - self.assertEqual(dom, colors.get_child(ns.bse.foo, domain=dom).domain) + self.assertEqual(dom, colors.child(ns.bse.foo, domain=dom).domain) # range is respected rng = Literal(ns.bse.foo, l_array) - self.assertEqual(rng, colors.get_child(ns.bse.foo, range=rng).range) + self.assertEqual(rng, colors.child(ns.bse.foo, range=rng).range) # cannot set range to None - self.assertEqual(l_array, colors.get_child(ns.bse.foo, range=None).range) + self.assertEqual(l_array, colors.child(ns.bse.foo, range=None).range) # unique is respected - self.assertTrue(colors.get_child(ns.bse.foo, unique=True).unique) + self.assertTrue(colors.child(ns.bse.foo, unique=True).unique) # dimension is respected - self.assertEqual(4321, colors.get_child(ns.bse.foo, dimension=4321).dimension) + self.assertEqual(4321, colors.child(ns.bse.foo, dimension=4321).dimension) # dtype is respected - self.assertEqual(ns.bsfs.integer, colors.get_child(ns.bse.foo, dtype=ns.bsfs.integer).dtype) + self.assertEqual(ns.bsfs.integer, colors.child(ns.bse.foo, dtype=ns.bsfs.integer).dtype) # distance is respected - self.assertEqual(ns.bsfs.cosine, colors.get_child(ns.bse.foo, distance=ns.bsfs.cosine).distance) + self.assertEqual(ns.bsfs.cosine, colors.child(ns.bse.foo, distance=ns.bsfs.cosine).distance) # annotations are respected - self.assertDictEqual(colors.get_child(ns.bse.foo, foo='bar', bar=123).annotations, { + self.assertDictEqual(colors.child(ns.bse.foo, foo='bar', bar=123).annotations, { 'foo': 'bar', 'bar': 123, }) # domain is inherited from parent - self.assertEqual(n_root, ROOT_FEATURE.get_child(ns.bse.foo).domain) - self.assertEqual(n_ent, colors.get_child(ns.bse.foo).domain) + self.assertEqual(n_root, ROOT_FEATURE.child(ns.bse.foo).domain) + self.assertEqual(n_ent, colors.child(ns.bse.foo).domain) # range is inherited from parent - self.assertEqual(l_array, colors.get_child(ns.bse.foo).range) + self.assertEqual(l_array, colors.child(ns.bse.foo).range) # uniqueness is inherited from parent - self.assertFalse(colors.get_child(ns.bse.foo).unique) + self.assertFalse(colors.child(ns.bse.foo).unique) # dimension is inherited from parent - self.assertEqual(1234, colors.get_child(ns.bse.foo).dimension) + self.assertEqual(1234, colors.child(ns.bse.foo).dimension) # dtype is inherited from parent - self.assertEqual(ns.bsfs.float, colors.get_child(ns.bse.foo).dtype) + self.assertEqual(ns.bsfs.float, colors.child(ns.bse.foo).dtype) # distance is inherited from parent - self.assertEqual(ns.bsfs.euclidean, colors.get_child(ns.bse.foo).distance) + self.assertEqual(ns.bsfs.euclidean, colors.child(ns.bse.foo).distance) # domain must be subtype of parent's domain - self.assertRaises(errors.ConsistencyError, colors.get_child, ns.bse.foo, domain=n_root) - self.assertRaises(errors.ConsistencyError, colors.get_child, ns.bse.foo, domain=Node(ns.bsfs.Image, n_root)) + self.assertRaises(errors.ConsistencyError, colors.child, ns.bse.foo, domain=n_root) + self.assertRaises(errors.ConsistencyError, colors.child, ns.bse.foo, domain=Node(ns.bsfs.Image, n_root)) # range must be subtype of parent's range - self.assertRaises(errors.ConsistencyError, colors.get_child, ns.bse.foo, range=Literal(ns.bsfs.Literal, None)) - self.assertRaises(errors.ConsistencyError, colors.get_child, ns.bse.foo, range=Literal(ns.bsfs.foo, Literal(ns.bsfs.Literal, None))) - self.assertRaises(TypeError, colors.get_child, ns.bse.foo, range=Node(ns.bsfs.Tag, n_root)) + self.assertRaises(errors.ConsistencyError, colors.child, ns.bse.foo, range=Literal(ns.bsfs.Literal, None)) + self.assertRaises(errors.ConsistencyError, colors.child, ns.bse.foo, range=Literal(ns.bsfs.foo, Literal(ns.bsfs.Literal, None))) + self.assertRaises(TypeError, colors.child, ns.bse.foo, range=Node(ns.bsfs.Tag, n_root)) ## main ## diff --git a/test/triple_store/sparql/test_parse_filter.py b/test/triple_store/sparql/test_parse_filter.py index bd19803..bd967e5 100644 --- a/test/triple_store/sparql/test_parse_filter.py +++ b/test/triple_store/sparql/test_parse_filter.py @@ -124,7 +124,7 @@ class TestParseFilter(unittest.TestCase): # __call__ requires a valid root type self.assertRaises(errors.BackendError, self.parser, self.schema.literal(ns.bsfs.Literal), None) - self.assertRaises(errors.ConsistencyError, self.parser, self.schema.node(ns.bsfs.Node).get_child(ns.bsfs.Invalid), None) + self.assertRaises(errors.ConsistencyError, self.parser, self.schema.node(ns.bsfs.Node).child(ns.bsfs.Invalid), None) # __call__ requires a parseable root self.assertRaises(errors.BackendError, self.parser, self.schema.node(ns.bsfs.Entity), ast.filter.FilterExpression()) # __call__ returns an executable query diff --git a/test/triple_store/sparql/test_sparql.py b/test/triple_store/sparql/test_sparql.py index 3d81de1..25a0b15 100644 --- a/test/triple_store/sparql/test_sparql.py +++ b/test/triple_store/sparql/test_sparql.py @@ -108,7 +108,7 @@ class TestSparqlStore(unittest.TestCase): store.create(store.schema.node(ns.bsfs.PDF), {URI('http://example.com/me/pdf#1234')}) # node_type must be in the schema - self.assertRaises(errors.ConsistencyError, store._has_type, URI('http://example.com/me/entity#1234'), store.schema.node(ns.bsfs.Node).get_child(ns.bsfs.invalid)) + self.assertRaises(errors.ConsistencyError, store._has_type, URI('http://example.com/me/entity#1234'), store.schema.node(ns.bsfs.Node).child(ns.bsfs.invalid)) # returns False on inexistent nodes self.assertFalse(store._has_type(URI('http://example.com/me/entity#4321'), store.schema.node(ns.bsfs.Entity))) @@ -509,7 +509,7 @@ class TestSparqlStore(unittest.TestCase): store.set(ent_type, {URI('http://example.com/me/entity#1234')}, self.schema.predicate(ns.bse.filesize), {1234}) store.set(ent_type, {URI('http://example.com/me/entity#4321')}, self.schema.predicate(ns.bse.filesize), {4321}) # node_type must be in the schema - self.assertRaises(errors.ConsistencyError, set, store.get(self.schema.node(ns.bsfs.Node).get_child(ns.bsfs.Invalid), ast.filter.IsIn(ent_ids))) + self.assertRaises(errors.ConsistencyError, set, store.get(self.schema.node(ns.bsfs.Node).child(ns.bsfs.Invalid), ast.filter.IsIn(ent_ids))) # query must be a filter expression class Foo(): pass self.assertRaises(TypeError, set, store.get(ent_type, 1234)) @@ -574,7 +574,7 @@ class TestSparqlStore(unittest.TestCase): store.schema = self.schema # node type must be valid - self.assertRaises(errors.ConsistencyError, store.create, self.schema.node(ns.bsfs.Entity).get_child(ns.bsfs.invalid), { + self.assertRaises(errors.ConsistencyError, store.create, self.schema.node(ns.bsfs.Entity).child(ns.bsfs.invalid), { URI('http://example.com/me/entity#1234'), URI('http://example.com/me/entity#4321')}) # can create some nodes @@ -636,7 +636,7 @@ class TestSparqlStore(unittest.TestCase): p_comment = store.schema.predicate(ns.bse.comment) p_author = store.schema.predicate(ns.bse.author) p_tag = store.schema.predicate(ns.bse.tag) - p_invalid = store.schema.predicate(ns.bsfs.Predicate).get_child(ns.bsfs.foo, range=store.schema.node(ns.bsfs.Tag)) + p_invalid = store.schema.predicate(ns.bsfs.Predicate).child(ns.bsfs.foo, range=store.schema.node(ns.bsfs.Tag)) # create node instances ent_ids = { URI('http://example.com/me/entity#1234'), @@ -659,7 +659,7 @@ class TestSparqlStore(unittest.TestCase): store.create(user_type, user_ids) # invalid node_type is not permitted - self.assertRaises(errors.ConsistencyError, store.set, self.schema.node(ns.bsfs.Node).get_child(ns.bse.foo), + self.assertRaises(errors.ConsistencyError, store.set, self.schema.node(ns.bsfs.Node).child(ns.bse.foo), ent_ids, p_comment, {'hello world'}) # invalid predicate is not permitted -- cgit v1.2.3 From 3940cb3c79937a431ba2ae3b57fd0c6c2ccfff33 Mon Sep 17 00:00:00 2001 From: Matthias Baumgartner Date: Thu, 12 Jan 2023 10:12:43 +0100 Subject: use Vertex in type annotations --- bsfs/graph/resolve.py | 28 +++++++++++------------ bsfs/query/validator.py | 25 +++++++++----------- bsfs/triple_store/sparql/parse_filter.py | 39 +++++++++++++++----------------- 3 files changed, 42 insertions(+), 50 deletions(-) diff --git a/bsfs/graph/resolve.py b/bsfs/graph/resolve.py index feb0855..e398a5e 100644 --- a/bsfs/graph/resolve.py +++ b/bsfs/graph/resolve.py @@ -37,8 +37,6 @@ class Filter(): """ - T_VERTEX = typing.Union[bsc.Node, bsc.Literal] - def __init__(self, schema): self.schema = schema @@ -47,7 +45,7 @@ class Filter(): def _parse_filter_expression( self, - type_: T_VERTEX, + type_: bsc.Vertex, node: ast.filter.FilterExpression, ) -> ast.filter.FilterExpression: """Route *node* to the handler of the respective FilterExpression subclass.""" @@ -73,7 +71,7 @@ class Filter(): # invalid node raise errors.BackendError(f'expected filter expression, found {node}') - def _parse_predicate_expression(self, node: ast.filter.PredicateExpression) -> T_VERTEX: + def _parse_predicate_expression(self, node: ast.filter.PredicateExpression) -> bsc.Vertex: """Route *node* to the handler of the respective PredicateExpression subclass.""" if isinstance(node, ast.filter.Predicate): return self._predicate(node) @@ -82,7 +80,7 @@ class Filter(): # invalid node raise errors.BackendError(f'expected predicate expression, found {node}') - def _predicate(self, node: ast.filter.Predicate) -> T_VERTEX: + def _predicate(self, node: ast.filter.Predicate) -> bsc.Vertex: if not self.schema.has_predicate(node.predicate): raise errors.ConsistencyError(f'predicate {node.predicate} is not in the schema') pred = self.schema.predicate(node.predicate) @@ -91,7 +89,7 @@ class Filter(): dom, rng = rng, dom return rng - def _one_of(self, node: ast.filter.OneOf) -> T_VERTEX: + def _one_of(self, node: ast.filter.OneOf) -> bsc.Vertex: # determine domain and range types rng = None for pred in node: @@ -107,33 +105,33 @@ class Filter(): raise errors.UnreachableError() return rng - def _any(self, type_: T_VERTEX, node: ast.filter.Any) -> ast.filter.Any: # pylint: disable=unused-argument + def _any(self, type_: bsc.Vertex, node: ast.filter.Any) -> ast.filter.Any: # pylint: disable=unused-argument next_type = self._parse_predicate_expression(node.predicate) return ast.filter.Any(node.predicate, self._parse_filter_expression(next_type, node.expr)) - def _all(self, type_: T_VERTEX, node: ast.filter.All) -> ast.filter.All: # pylint: disable=unused-argument + def _all(self, type_: bsc.Vertex, node: ast.filter.All) -> ast.filter.All: # pylint: disable=unused-argument next_type = self._parse_predicate_expression(node.predicate) return ast.filter.All(node.predicate, self._parse_filter_expression(next_type, node.expr)) - def _and(self, type_: T_VERTEX, node: ast.filter.And) -> ast.filter.And: + def _and(self, type_: bsc.Vertex, node: ast.filter.And) -> ast.filter.And: return ast.filter.And({self._parse_filter_expression(type_, expr) for expr in node}) - def _or(self, type_: T_VERTEX, node: ast.filter.Or) -> ast.filter.Or: + def _or(self, type_: bsc.Vertex, node: ast.filter.Or) -> ast.filter.Or: return ast.filter.Or({self._parse_filter_expression(type_, expr) for expr in node}) - def _not(self, type_: T_VERTEX, node: ast.filter.Not) -> ast.filter.Not: + def _not(self, type_: bsc.Vertex, node: ast.filter.Not) -> ast.filter.Not: return ast.filter.Not(self._parse_filter_expression(type_, node.expr)) - def _has(self, type_: T_VERTEX, node: ast.filter.Has) -> ast.filter.Has: # pylint: disable=unused-argument + def _has(self, type_: bsc.Vertex, node: ast.filter.Has) -> ast.filter.Has: # pylint: disable=unused-argument return node - def _value(self, type_: T_VERTEX, node: ast.filter._Value) -> ast.filter._Value: # pylint: disable=unused-argument + def _value(self, type_: bsc.Vertex, node: ast.filter._Value) -> ast.filter._Value: # pylint: disable=unused-argument return node - def _bounded(self, type_: T_VERTEX, node: ast.filter._Bounded) -> ast.filter._Bounded: # pylint: disable=unused-argument + def _bounded(self, type_: bsc.Vertex, node: ast.filter._Bounded) -> ast.filter._Bounded: # pylint: disable=unused-argument return node - def _is(self, type_: T_VERTEX, node: ast.filter.Is) -> typing.Union[ast.filter.Or, ast.filter.Is]: + def _is(self, type_: bsc.Vertex, node: ast.filter.Is) -> typing.Union[ast.filter.Or, ast.filter.Is]: # check if action is needed if not isinstance(node.value, nodes.Nodes): return node diff --git a/bsfs/query/validator.py b/bsfs/query/validator.py index 352203a..6bf1b72 100644 --- a/bsfs/query/validator.py +++ b/bsfs/query/validator.py @@ -34,9 +34,6 @@ class Filter(): """ - # vertex types - T_VERTEX = typing.Union[bsc.Node, bsc.Literal] # FIXME: Shouldn't this be in the schema? - # schema to validate against. schema: bsc.Schema @@ -64,7 +61,7 @@ class Filter(): ## routing methods - def _parse_filter_expression(self, type_: T_VERTEX, node: ast.filter.FilterExpression): + def _parse_filter_expression(self, type_: bsc.Vertex, node: ast.filter.FilterExpression): """Route *node* to the handler of the respective FilterExpression subclass.""" if isinstance(node, ast.filter.Is): return self._is(type_, node) @@ -83,7 +80,7 @@ class Filter(): # invalid node raise errors.BackendError(f'expected filter expression, found {node}') - def _parse_predicate_expression(self, node: ast.filter.PredicateExpression) -> typing.Tuple[T_VERTEX, T_VERTEX]: + def _parse_predicate_expression(self, node: ast.filter.PredicateExpression) -> typing.Tuple[bsc.Vertex, bsc.Vertex]: """Route *node* to the handler of the respective PredicateExpression subclass.""" if isinstance(node, ast.filter.Predicate): return self._predicate(node) @@ -95,7 +92,7 @@ class Filter(): ## predicate expressions - def _predicate(self, node: ast.filter.Predicate) -> typing.Tuple[T_VERTEX, T_VERTEX]: + def _predicate(self, node: ast.filter.Predicate) -> typing.Tuple[bsc.Vertex, bsc.Vertex]: # predicate exists in the schema if not self.schema.has_predicate(node.predicate): raise errors.ConsistencyError(f'predicate {node.predicate} is not in the schema') @@ -110,7 +107,7 @@ class Filter(): # return domain and range return dom, rng - def _one_of(self, node: ast.filter.OneOf) -> typing.Tuple[T_VERTEX, T_VERTEX]: + def _one_of(self, node: ast.filter.OneOf) -> typing.Tuple[bsc.Vertex, bsc.Vertex]: # determine domain and range types # NOTE: select the most specific domain and the most generic range dom, rng = None, None @@ -146,7 +143,7 @@ class Filter(): ## intermediates - def _branch(self, type_: T_VERTEX, node: ast.filter._Branch): + def _branch(self, type_: bsc.Vertex, node: ast.filter._Branch): # type is a Node if not isinstance(type_, bsc.Node): raise errors.ConsistencyError(f'expected a Node, found {type_}') @@ -167,16 +164,16 @@ class Filter(): # child expression is valid self._parse_filter_expression(rng, node.expr) - def _agg(self, type_: T_VERTEX, node: ast.filter._Agg): + def _agg(self, type_: bsc.Vertex, node: ast.filter._Agg): for expr in node: # child expression is valid self._parse_filter_expression(type_, expr) - def _not(self, type_: T_VERTEX, node: ast.filter.Not): + def _not(self, type_: bsc.Vertex, node: ast.filter.Not): # child expression is valid self._parse_filter_expression(type_, node.expr) - def _has(self, type_: T_VERTEX, node: ast.filter.Has): + def _has(self, type_: bsc.Vertex, node: ast.filter.Has): # type is a Node if not isinstance(type_, bsc.Node): raise errors.ConsistencyError(f'expected a Node, found {type_}') @@ -195,13 +192,13 @@ class Filter(): ## conditions - def _is(self, type_: T_VERTEX, node: ast.filter.Is): # pylint: disable=unused-argument # (node) + def _is(self, type_: bsc.Vertex, node: ast.filter.Is): # pylint: disable=unused-argument # (node) if not isinstance(type_, bsc.Node): raise errors.ConsistencyError(f'expected a Node, found {type_}') if type_ not in self.schema.nodes(): raise errors.ConsistencyError(f'node {type_} is not in the schema') - def _value(self, type_: T_VERTEX, node: ast.filter._Value): # pylint: disable=unused-argument # (node) + def _value(self, type_: bsc.Vertex, node: ast.filter._Value): # pylint: disable=unused-argument # (node) # type is a literal if not isinstance(type_, bsc.Literal): raise errors.ConsistencyError(f'expected a Literal, found {type_}') @@ -211,7 +208,7 @@ class Filter(): # FIXME: Check if node.value corresponds to type_ # FIXME: A specific literal might be requested (i.e., a numeric type when used in Has) - def _bounded(self, type_: T_VERTEX, node: ast.filter._Bounded): # pylint: disable=unused-argument # (node) + def _bounded(self, type_: bsc.Vertex, node: ast.filter._Bounded): # pylint: disable=unused-argument # (node) # type is a literal if not isinstance(type_, bsc.Literal): raise errors.ConsistencyError(f'expected a Literal, found {type_}') diff --git a/bsfs/triple_store/sparql/parse_filter.py b/bsfs/triple_store/sparql/parse_filter.py index d4db0aa..a851888 100644 --- a/bsfs/triple_store/sparql/parse_filter.py +++ b/bsfs/triple_store/sparql/parse_filter.py @@ -46,9 +46,6 @@ class Filter(): # Generator that produces unique symbol names. ngen: _GenHopName - # Vertex type. - T_VERTEX = typing.Union[bsc.Node, bsc.Literal] - def __init__(self, schema): self.schema = schema self.ngen = _GenHopName() @@ -79,7 +76,7 @@ class Filter(): }} ''' - def _parse_filter_expression(self, type_: T_VERTEX, node: ast.filter.FilterExpression, head: str) -> str: + def _parse_filter_expression(self, type_: bsc.Vertex, node: ast.filter.FilterExpression, head: str) -> str: """Route *node* to the handler of the respective FilterExpression subclass.""" if isinstance(node, ast.filter.Is): return self._is(type_, node, head) @@ -112,9 +109,9 @@ class Filter(): def _parse_predicate_expression( self, - type_: T_VERTEX, + type_: bsc.Vertex, node: ast.filter.PredicateExpression - ) -> typing.Tuple[str, T_VERTEX]: + ) -> typing.Tuple[str, bsc.Vertex]: """Route *node* to the handler of the respective PredicateExpression subclass.""" if isinstance(node, ast.filter.Predicate): return self._predicate(type_, node) @@ -123,7 +120,7 @@ class Filter(): # invalid node raise errors.BackendError(f'expected predicate expression, found {node}') - def _one_of(self, node_type: T_VERTEX, node: ast.filter.OneOf) -> typing.Tuple[str, T_VERTEX]: + def _one_of(self, node_type: bsc.Vertex, node: ast.filter.OneOf) -> typing.Tuple[str, bsc.Vertex]: """ """ if not isinstance(node_type, bsc.Node): @@ -150,7 +147,7 @@ class Filter(): # return joint predicate expression and next range return '|'.join(suburi), rng - def _predicate(self, node_type: T_VERTEX, node: ast.filter.Predicate) -> typing.Tuple[str, T_VERTEX]: + def _predicate(self, node_type: bsc.Vertex, node: ast.filter.Predicate) -> typing.Tuple[str, bsc.Vertex]: """ """ # check node_type @@ -178,7 +175,7 @@ class Filter(): # return predicate URI and next node type return puri, rng - def _any(self, node_type: T_VERTEX, node: ast.filter.Any, head: str) -> str: + def _any(self, node_type: bsc.Vertex, node: ast.filter.Any, head: str) -> str: """ """ if not isinstance(node_type, bsc.Node): @@ -191,7 +188,7 @@ class Filter(): # combine results return f'{head} {pred} {nexthead} . {expr}' - def _all(self, node_type: T_VERTEX, node: ast.filter.All, head: str) -> str: + def _all(self, node_type: bsc.Vertex, node: ast.filter.All, head: str) -> str: """ """ # NOTE: All(P, E) := Not(Any(P, Not(E))) and EXISTS(P, ?) @@ -208,13 +205,13 @@ class Filter(): # return existence and rewritten expression return f'FILTER EXISTS {{ {head} {pred} {temphead} }} . ' + expr - def _and(self, node_type: T_VERTEX, node: ast.filter.And, head: str) -> str: + def _and(self, node_type: bsc.Vertex, node: ast.filter.And, head: str) -> str: """ """ sub = [self._parse_filter_expression(node_type, expr, head) for expr in node] return ' . '.join(sub) - def _or(self, node_type: T_VERTEX, node: ast.filter.Or, head: str) -> str: + def _or(self, node_type: bsc.Vertex, node: ast.filter.Or, head: str) -> str: """ """ # potential special case optimization: @@ -224,7 +221,7 @@ class Filter(): sub = ['{' + expr + '}' for expr in sub] return ' UNION '.join(sub) - def _not(self, node_type: T_VERTEX, node: ast.filter.Not, head: str) -> str: + def _not(self, node_type: bsc.Vertex, node: ast.filter.Not, head: str) -> str: """ """ expr = self._parse_filter_expression(node_type, node.expr, head) @@ -235,7 +232,7 @@ class Filter(): # The simplest (and non-interfering) choice is a type statement. return f'MINUS {{ {head} <{ns.rdf.type}>/<{ns.rdfs.subClassOf}>* <{node_type.uri}> . {expr} }}' - def _has(self, node_type: T_VERTEX, node: ast.filter.Has, head: str) -> str: + def _has(self, node_type: bsc.Vertex, node: ast.filter.Has, head: str) -> str: """ """ if not isinstance(node_type, bsc.Node): @@ -253,42 +250,42 @@ class Filter(): # combine return num_preds + ' . ' + count_bounds - def _is(self, node_type: T_VERTEX, node: ast.filter.Is, head: str) -> str: + def _is(self, node_type: bsc.Vertex, node: ast.filter.Is, head: str) -> str: """ """ if not isinstance(node_type, bsc.Node): raise errors.BackendError(f'expected Node, found {node_type}') return f'VALUES {head} {{ <{node.value}> }}' - def _equals(self, node_type: T_VERTEX, node: ast.filter.Equals, head: str) -> str: + def _equals(self, node_type: bsc.Vertex, node: ast.filter.Equals, head: str) -> str: """ """ if not isinstance(node_type, bsc.Literal): raise errors.BackendError(f'expected Literal, found {node}') return f'VALUES {head} {{ "{node.value}"^^<{node_type.uri}> }}' - def _substring(self, node_type: T_VERTEX, node: ast.filter.Substring, head: str) -> str: + def _substring(self, node_type: bsc.Vertex, node: ast.filter.Substring, head: str) -> str: """ """ if not isinstance(node_type, bsc.Literal): raise errors.BackendError(f'expected Literal, found {node_type}') return f'FILTER contains(str({head}), "{node.value}")' - def _starts_with(self, node_type: T_VERTEX, node: ast.filter.StartsWith, head: str) -> str: + def _starts_with(self, node_type: bsc.Vertex, node: ast.filter.StartsWith, head: str) -> str: """ """ if not isinstance(node_type, bsc.Literal): raise errors.BackendError(f'expected Literal, found {node_type}') return f'FILTER strstarts(str({head}), "{node.value}")' - def _ends_with(self, node_type: T_VERTEX, node: ast.filter.EndsWith, head: str) -> str: + def _ends_with(self, node_type: bsc.Vertex, node: ast.filter.EndsWith, head: str) -> str: """ """ if not isinstance(node_type, bsc.Literal): raise errors.BackendError(f'expected Literal, found {node_type}') return f'FILTER strends(str({head}), "{node.value}")' - def _less_than(self, node_type: T_VERTEX, node: ast.filter.LessThan, head: str) -> str: + def _less_than(self, node_type: bsc.Vertex, node: ast.filter.LessThan, head: str) -> str: """ """ if not isinstance(node_type, bsc.Literal): @@ -296,7 +293,7 @@ class Filter(): equality = '=' if not node.strict else '' return f'FILTER ({head} <{equality} {float(node.threshold)})' - def _greater_than(self, node_type: T_VERTEX, node: ast.filter.GreaterThan, head: str) -> str: + def _greater_than(self, node_type: bsc.Vertex, node: ast.filter.GreaterThan, head: str) -> str: """ """ if not isinstance(node_type, bsc.Literal): -- cgit v1.2.3 From 6b3e32b29799a8143e8ce9d20c5f27e3e166b9bb Mon Sep 17 00:00:00 2001 From: Matthias Baumgartner Date: Thu, 12 Jan 2023 10:17:07 +0100 Subject: changed path to from_string in clients --- bsfs/apps/migrate.py | 6 +++--- bsfs/graph/graph.py | 10 +++++----- bsfs/triple_store/sparql/sparql.py | 2 +- test/apps/test_migrate.py | 10 +++++----- test/graph/ac/test_null.py | 4 ++-- test/graph/test_graph.py | 12 ++++++------ test/graph/test_nodes.py | 2 +- test/graph/test_resolve.py | 2 +- test/query/test_validator.py | 2 +- test/triple_store/sparql/test_parse_filter.py | 4 ++-- test/triple_store/sparql/test_sparql.py | 14 +++++++------- 11 files changed, 34 insertions(+), 34 deletions(-) diff --git a/bsfs/apps/migrate.py b/bsfs/apps/migrate.py index 91c1661..b9d019f 100644 --- a/bsfs/apps/migrate.py +++ b/bsfs/apps/migrate.py @@ -42,15 +42,15 @@ def main(argv): graph = bsfs.Open(config) # initialize schema - schema = bsfs.schema.Schema.Empty() + schema = bsfs.schema.Schema() if len(args.schema) == 0: # assemble schema from standard input - schema = schema + bsfs.schema.Schema.from_string(sys.stdin.read()) + schema = schema + bsfs.schema.from_string(sys.stdin.read()) else: # assemble schema from input files for pth in args.schema: with open(pth, mode='rt', encoding='UTF-8') as ifile: - schema = schema + bsfs.schema.Schema.from_string(ifile.read()) + schema = schema + bsfs.schema.from_string(ifile.read()) # migrate schema graph.migrate(schema, not args.remove) diff --git a/bsfs/graph/graph.py b/bsfs/graph/graph.py index f030fed..2210755 100644 --- a/bsfs/graph/graph.py +++ b/bsfs/graph/graph.py @@ -10,7 +10,7 @@ import typing # bsfs imports from bsfs.query import ast, validate -from bsfs.schema import Schema +from bsfs import schema as bsc from bsfs.triple_store import TripleStoreBase from bsfs.utils import URI, typename @@ -67,11 +67,11 @@ class Graph(): return f'{typename(self)}({str(self._backend)}, {self._user})' @property - def schema(self) -> Schema: + def schema(self) -> bsc.Schema: """Return the store's local schema.""" return self._backend.schema - def migrate(self, schema: Schema, append: bool = True) -> 'Graph': + def migrate(self, schema: bsc.Schema, append: bool = True) -> 'Graph': """Migrate the current schema to a new *schema*. Appends to the current schema by default; control this via *append*. @@ -79,14 +79,14 @@ class Graph(): """ # check args - if not isinstance(schema, Schema): + if not isinstance(schema, bsc.Schema): raise TypeError(schema) # append to current schema if append: schema = schema + self._backend.schema # add Graph schema requirements with open(os.path.join(os.path.dirname(__file__), 'schema.nt'), mode='rt', encoding='UTF-8') as ifile: - schema = schema + Schema.from_string(ifile.read()) + schema = schema + bsc.from_string(ifile.read()) # migrate schema in backend # FIXME: consult access controls! self._backend.schema = schema diff --git a/bsfs/triple_store/sparql/sparql.py b/bsfs/triple_store/sparql/sparql.py index c3cbff6..ddace35 100644 --- a/bsfs/triple_store/sparql/sparql.py +++ b/bsfs/triple_store/sparql/sparql.py @@ -94,7 +94,7 @@ class SparqlStore(base.TripleStoreBase): super().__init__(None) self._graph = rdflib.Graph() self._transaction = _Transaction(self._graph) - self._schema = bsc.Schema.Empty() + self._schema = bsc.Schema() self._filter_parser = parse_filter.Filter(self._schema) # NOTE: mypy and pylint complain about the **kwargs not being listed (contrasting super) diff --git a/test/apps/test_migrate.py b/test/apps/test_migrate.py index 957509a..230c032 100644 --- a/test/apps/test_migrate.py +++ b/test/apps/test_migrate.py @@ -13,7 +13,7 @@ import unittest import unittest.mock # bsie imports -from bsfs.schema import Schema +from bsfs import schema # objects to test from bsfs.apps.migrate import main @@ -33,21 +33,21 @@ class TestMigrate(unittest.TestCase): # read schema from file with open(schema_1) as ifile: - target = Schema.from_string(ifile.read()) + target = schema.from_string(ifile.read()) graph = main([config, schema_1]) self.assertTrue(target <= graph.schema) # read schema from multiple files with open(schema_1) as ifile: - target = Schema.from_string(ifile.read()) + target = schema.from_string(ifile.read()) with open(schema_2) as ifile: - target = target + Schema.from_string(ifile.read()) + target = target + schema.from_string(ifile.read()) graph = main([config, schema_1, schema_2]) self.assertTrue(target <= graph.schema) # read schema from stdin with open(schema_1, 'rt') as ifile: - target = Schema.from_string(ifile.read()) + target = schema.from_string(ifile.read()) with open(schema_1, 'rt') as ifile: with unittest.mock.patch('sys.stdin', ifile): graph = main([config]) diff --git a/test/graph/ac/test_null.py b/test/graph/ac/test_null.py index c863943..c3df393 100644 --- a/test/graph/ac/test_null.py +++ b/test/graph/ac/test_null.py @@ -8,7 +8,7 @@ Author: Matthias Baumgartner, 2022 import unittest # bsie imports -from bsfs import schema as _schema +from bsfs import schema as bsc from bsfs.namespace import ns from bsfs.query import ast from bsfs.triple_store import SparqlStore @@ -23,7 +23,7 @@ from bsfs.graph.ac.null import NullAC class TestNullAC(unittest.TestCase): def setUp(self): self.backend = SparqlStore() - self.backend.schema = _schema.Schema.from_string(''' + self.backend.schema = bsc.from_string(''' prefix rdfs: prefix xsd: diff --git a/test/graph/test_graph.py b/test/graph/test_graph.py index 8503d5b..125084c 100644 --- a/test/graph/test_graph.py +++ b/test/graph/test_graph.py @@ -25,7 +25,7 @@ class TestGraph(unittest.TestCase): def setUp(self): self.user = URI('http://example.com/me') self.backend = SparqlStore.Open() - self.backend.schema = schema.Schema.from_string(''' + self.backend.schema = schema.from_string(''' prefix rdfs: prefix bsfs: bsfs:Entity rdfs:subClassOf bsfs:Node . @@ -118,7 +118,7 @@ class TestGraph(unittest.TestCase): schema.Node(ns.bsfs.Node, None)))}), append=False) # can migrate to compatible schema - target_1 = schema.Schema.from_string(''' + target_1 = schema.from_string(''' prefix rdfs: prefix xsd: prefix bsfs: @@ -142,7 +142,7 @@ class TestGraph(unittest.TestCase): # new schema is applied self.assertLess(target_1, graph.schema) # graph appends its predicates - self.assertEqual(graph.schema, target_1 + schema.Schema.from_string(''' + self.assertEqual(graph.schema, target_1 + schema.from_string(''' prefix rdfs: prefix xsd: prefix bsfs: @@ -155,7 +155,7 @@ class TestGraph(unittest.TestCase): ''')) # can overwrite the current schema - target_2 = schema.Schema.from_string(''' + target_2 = schema.from_string(''' prefix rdfs: prefix xsd: prefix bsfs: @@ -181,7 +181,7 @@ class TestGraph(unittest.TestCase): # new schema is applied self.assertLess(target_2, graph.schema) # graph appends its predicates - self.assertEqual(graph.schema, target_2 + schema.Schema.from_string(''' + self.assertEqual(graph.schema, target_2 + schema.from_string(''' prefix rdfs: prefix xsd: prefix bsfs: @@ -196,7 +196,7 @@ class TestGraph(unittest.TestCase): def test_get(self): # setup graph = Graph(self.backend, self.user) - graph.migrate(schema.Schema.from_string(''' + graph.migrate(schema.from_string(''' prefix rdfs: prefix xsd: prefix bsfs: diff --git a/test/graph/test_nodes.py b/test/graph/test_nodes.py index 11ae46d..47647bd 100644 --- a/test/graph/test_nodes.py +++ b/test/graph/test_nodes.py @@ -24,7 +24,7 @@ class TestNodes(unittest.TestCase): def setUp(self): # initialize backend self.backend = SparqlStore() - self.backend.schema = _schema.Schema.from_string(''' + self.backend.schema = _schema.from_string(''' prefix rdfs: prefix xsd: diff --git a/test/graph/test_resolve.py b/test/graph/test_resolve.py index f515320..a27e8c9 100644 --- a/test/graph/test_resolve.py +++ b/test/graph/test_resolve.py @@ -31,7 +31,7 @@ class TestFilter(unittest.TestCase): """ def test_call(self): - schema = bsc.Schema.from_string(''' + schema = bsc.from_string(''' prefix rdfs: prefix xsd: diff --git a/test/query/test_validator.py b/test/query/test_validator.py index bf3ceeb..405872c 100644 --- a/test/query/test_validator.py +++ b/test/query/test_validator.py @@ -21,7 +21,7 @@ from bsfs.query.validator import Filter class TestFilter(unittest.TestCase): def setUp(self): - self.schema = _schema.Schema.from_string(''' + self.schema = _schema.from_string(''' prefix rdfs: prefix xsd: diff --git a/test/triple_store/sparql/test_parse_filter.py b/test/triple_store/sparql/test_parse_filter.py index bd967e5..1d96994 100644 --- a/test/triple_store/sparql/test_parse_filter.py +++ b/test/triple_store/sparql/test_parse_filter.py @@ -9,7 +9,7 @@ import rdflib import unittest # bsie imports -from bsfs import schema as _schema +from bsfs import schema as bsc from bsfs.namespace import ns from bsfs.query import ast from bsfs.utils import errors @@ -23,7 +23,7 @@ from bsfs.triple_store.sparql.parse_filter import Filter class TestParseFilter(unittest.TestCase): def setUp(self): # schema - self.schema = _schema.Schema.from_string(''' + self.schema = bsc.from_string(''' prefix rdfs: prefix xsd: diff --git a/test/triple_store/sparql/test_sparql.py b/test/triple_store/sparql/test_sparql.py index 25a0b15..5342925 100644 --- a/test/triple_store/sparql/test_sparql.py +++ b/test/triple_store/sparql/test_sparql.py @@ -9,7 +9,7 @@ import rdflib import unittest # bsie imports -from bsfs import schema as _schema +from bsfs import schema as bsc from bsfs.namespace import ns from bsfs.query import ast from bsfs.utils import errors, URI @@ -22,7 +22,7 @@ from bsfs.triple_store.sparql.sparql import SparqlStore class TestSparqlStore(unittest.TestCase): def setUp(self): - self.schema = _schema.Schema.from_string(''' + self.schema = bsc.from_string(''' prefix rdfs: prefix xsd: @@ -90,7 +90,7 @@ class TestSparqlStore(unittest.TestCase): def test__has_type(self): # setup store store = SparqlStore.Open() - store.schema = _schema.Schema.from_string(''' + store.schema = bsc.from_string(''' prefix rdfs: prefix xsd: prefix bsfs: @@ -195,7 +195,7 @@ class TestSparqlStore(unittest.TestCase): self.assertSetEqual(set(store._graph), instances) # add some classes to the schema - curr = curr + _schema.Schema.from_string(''' + curr = curr + bsc.from_string(''' prefix rdfs: prefix xsd: prefix bsfs: @@ -298,7 +298,7 @@ class TestSparqlStore(unittest.TestCase): # remove some classes from the schema - curr = _schema.Schema.from_string(''' + curr = bsc.from_string(''' prefix rdfs: prefix xsd: prefix bsfs: @@ -383,7 +383,7 @@ class TestSparqlStore(unittest.TestCase): self.assertRaises(TypeError, setattr, store, 'schema', Foo()) # cannot migrate to incompatible schema - invalid = _schema.Schema.from_string(''' + invalid = bsc.from_string(''' prefix rdfs: prefix xsd: prefix bsfs: @@ -399,7 +399,7 @@ class TestSparqlStore(unittest.TestCase): ''') self.assertRaises(errors.ConsistencyError, setattr, store, 'schema', invalid) - invalid = _schema.Schema.from_string(''' + invalid = bsc.from_string(''' prefix rdfs: prefix xsd: prefix bsfs: -- cgit v1.2.3 From 7e7284d5fc01c0a081aa79d67736f51069864a7d Mon Sep 17 00:00:00 2001 From: Matthias Baumgartner Date: Thu, 12 Jan 2023 10:22:59 +0100 Subject: adapt to non-optional range in query checks --- bsfs/graph/resolve.py | 4 ++-- bsfs/query/validator.py | 14 +++++--------- bsfs/triple_store/sparql/parse_filter.py | 13 +++++-------- 3 files changed, 12 insertions(+), 19 deletions(-) diff --git a/bsfs/graph/resolve.py b/bsfs/graph/resolve.py index e398a5e..9b5f631 100644 --- a/bsfs/graph/resolve.py +++ b/bsfs/graph/resolve.py @@ -101,8 +101,8 @@ class Filter(): rng = subrng except TypeError as err: raise errors.ConsistencyError(f'ranges {subrng} and {rng} are not related') from err - if rng is None: - raise errors.UnreachableError() + if not isinstance(rng, (bsc.Node, bsc.Literal)): + raise errors.BackendError(f'the range of node {node} is undefined') return rng def _any(self, type_: bsc.Vertex, node: ast.filter.Any) -> ast.filter.Any: # pylint: disable=unused-argument diff --git a/bsfs/query/validator.py b/bsfs/query/validator.py index 6bf1b72..b04a9bf 100644 --- a/bsfs/query/validator.py +++ b/bsfs/query/validator.py @@ -98,10 +98,9 @@ class Filter(): raise errors.ConsistencyError(f'predicate {node.predicate} is not in the schema') # determine domain and range pred = self.schema.predicate(node.predicate) + if not isinstance(pred.range, (bsc.Node, bsc.Literal)): + raise errors.BackendError(f'the range of predicate {pred} is undefined') dom, rng = pred.domain, pred.range - if rng is None: - # FIXME: It is a design error that Predicates can have a None range... - raise errors.BackendError(f'predicate {pred} has no range') if node.reverse: dom, rng = rng, dom # type: ignore [assignment] # variable re-use confuses mypy # return domain and range @@ -133,12 +132,9 @@ class Filter(): raise errors.ConsistencyError(f'ranges {subrng} and {rng} are not related') except TypeError as err: # compared literal vs. node raise errors.ConsistencyError(f'ranges {subrng} and {rng} are not of the same type') from err - # check domain and range - if dom is None or rng is None: - # OneOf guarantees at least one expression, these two cases cannot happen - raise errors.UnreachableError() - # return domain and range - return dom, rng + # OneOf guarantees at least one expression, dom and rng are always bsc.Vertex. + # mypy does not realize this, hence we ignore the warning. + return dom, rng # type: ignore [return-value] ## intermediates diff --git a/bsfs/triple_store/sparql/parse_filter.py b/bsfs/triple_store/sparql/parse_filter.py index a851888..0297cbc 100644 --- a/bsfs/triple_store/sparql/parse_filter.py +++ b/bsfs/triple_store/sparql/parse_filter.py @@ -140,12 +140,10 @@ class Filter(): raise errors.ConsistencyError(f'ranges {subrng} and {rng} are not related') except TypeError as err: # subrng and rng are not comparable raise errors.ConsistencyError(f'ranges {subrng} and {rng} are not related') from err - if rng is None: - # for mypy to be certain of the rng type - # if rng were None, we'd have gotten a TypeError above (None > None) - raise errors.UnreachableError() # return joint predicate expression and next range - return '|'.join(suburi), rng + # OneOf guarantees at least one expression, rng is always a bsc.Vertex. + # mypy does not realize this, hence we ignore the warning. + return '|'.join(suburi), rng # type: ignore [return-value] def _predicate(self, node_type: bsc.Vertex, node: ast.filter.Predicate) -> typing.Tuple[str, bsc.Vertex]: """ @@ -159,9 +157,8 @@ class Filter(): if not self.schema.has_predicate(puri): raise errors.ConsistencyError(f'predicate {puri} is not in the schema') pred = self.schema.predicate(puri) - if pred.range is None: - # FIXME: It is a design error that Predicates can have a None range... - raise errors.BackendError(f'predicate {pred} has no range') + if not isinstance(pred.range, (bsc.Node, bsc.Literal)): + raise errors.BackendError(f'the range of predicate {pred} is undefined') dom, rng = pred.domain, pred.range # encapsulate predicate uri puri = f'<{puri}>' # type: ignore [assignment] # variable re-use confuses mypy -- cgit v1.2.3 From b0ff4ed674ad78bf113c3cc0c2ccd187ccb91048 Mon Sep 17 00:00:00 2001 From: Matthias Baumgartner Date: Thu, 12 Jan 2023 10:26:30 +0100 Subject: number literal adaptions --- bsfs/graph/schema.nt | 3 ++- bsfs/query/validator.py | 6 ++++-- bsfs/triple_store/sparql/parse_filter.py | 1 - bsfs/triple_store/sparql/sparql.py | 4 +++- test/apps/schema-2.nt | 3 ++- test/graph/ac/test_null.py | 3 ++- test/graph/test_graph.py | 12 ++++++++---- test/graph/test_nodes.py | 6 ++++-- test/graph/test_resolve.py | 3 ++- test/query/test_validator.py | 6 +++++- test/schema/test_schema.py | 17 +++++++++-------- test/schema/test_serialize.py | 10 ++++++---- test/triple_store/sparql/test_parse_filter.py | 3 ++- test/triple_store/sparql/test_sparql.py | 12 ++++++++---- 14 files changed, 57 insertions(+), 32 deletions(-) diff --git a/bsfs/graph/schema.nt b/bsfs/graph/schema.nt index 8612681..f619746 100644 --- a/bsfs/graph/schema.nt +++ b/bsfs/graph/schema.nt @@ -8,7 +8,8 @@ prefix bsfs: prefix bsm: # literals -xsd:integer rdfs:subClassOf bsfs:Literal . +bsfs:Number rdfs:subClassOf bsfs:Literal . +xsd:integer rdfs:subClassOf bsfs:Number . # predicates bsm:t_created rdfs:subClassOf bsfs:Predicate ; diff --git a/bsfs/query/validator.py b/bsfs/query/validator.py index b04a9bf..75b51ca 100644 --- a/bsfs/query/validator.py +++ b/bsfs/query/validator.py @@ -182,8 +182,7 @@ class Filter(): if not type_ <= dom: raise errors.ConsistencyError(f'expected type {dom}, found {type_}') # node.count is a numerical expression - # FIXME: We have to ensure that ns.xsd.integer is always known in the schema! - self._parse_filter_expression(self.schema.literal(ns.xsd.integer), node.count) + self._parse_filter_expression(self.schema.literal(ns.bsfs.Number), node.count) ## conditions @@ -211,6 +210,9 @@ class Filter(): # type exists in the schema if type_ not in self.schema.literals(): raise errors.ConsistencyError(f'literal {type_} is not in the schema') + # type must be a numerical + if not type_ <= self.schema.literal(ns.bsfs.Number): + raise errors.ConsistencyError(f'expected a number type, found {type_}') # FIXME: Check if node.value corresponds to type_ diff --git a/bsfs/triple_store/sparql/parse_filter.py b/bsfs/triple_store/sparql/parse_filter.py index 0297cbc..18a3288 100644 --- a/bsfs/triple_store/sparql/parse_filter.py +++ b/bsfs/triple_store/sparql/parse_filter.py @@ -242,7 +242,6 @@ class Filter(): # predicate count expression (fetch number of predicates at *head*) num_preds = f'{{ SELECT (COUNT(distinct {inner}) as {outer}) WHERE {{ {head} {pred} {inner} }} }}' # count expression - # FIXME: We have to ensure that ns.xsd.integer is always known in the schema! count_bounds = self._parse_filter_expression(self.schema.literal(ns.xsd.integer), node.count, outer) # combine return num_preds + ' . ' + count_bounds diff --git a/bsfs/triple_store/sparql/sparql.py b/bsfs/triple_store/sparql/sparql.py index ddace35..87467ff 100644 --- a/bsfs/triple_store/sparql/sparql.py +++ b/bsfs/triple_store/sparql/sparql.py @@ -11,6 +11,7 @@ import rdflib # bsfs imports from bsfs import schema as bsc +from bsfs.namespace import ns from bsfs.query import ast from bsfs.utils import errors, URI @@ -94,7 +95,8 @@ class SparqlStore(base.TripleStoreBase): super().__init__(None) self._graph = rdflib.Graph() self._transaction = _Transaction(self._graph) - self._schema = bsc.Schema() + # NOTE: parsing bsfs.query.ast.filter.Has requires xsd:integer. + self._schema = bsc.Schema(literals={bsc.ROOT_NUMBER.child(ns.xsd.integer)}) self._filter_parser = parse_filter.Filter(self._schema) # NOTE: mypy and pylint complain about the **kwargs not being listed (contrasting super) diff --git a/test/apps/schema-2.nt b/test/apps/schema-2.nt index 525ac99..4c5468f 100644 --- a/test/apps/schema-2.nt +++ b/test/apps/schema-2.nt @@ -10,7 +10,8 @@ prefix bse: bsfs:Entity rdfs:subClassOf bsfs:Node . # common definitions -xsd:integer rdfs:subClassOf bsfs:Literal . +bsfs:Number rdfs:subClassOf bsfs:Literal . +xsd:integer rdfs:subClassOf bsfs:Number . bse:filesize rdfs:subClassOf bsfs:Predicate ; rdfs:domain bsfs:Entity ; diff --git a/test/graph/ac/test_null.py b/test/graph/ac/test_null.py index c3df393..e35852d 100644 --- a/test/graph/ac/test_null.py +++ b/test/graph/ac/test_null.py @@ -34,7 +34,8 @@ class TestNullAC(unittest.TestCase): bsfs:Entity rdfs:subClassOf bsfs:Node . bsfs:Tag rdfs:subClassOf bsfs:Node . xsd:string rdfs:subClassOf bsfs:Literal . - xsd:integer rdfs:subClassOf bsfs:Literal . + bsfs:Number rdfs:subClassOf bsfs:Literal . + xsd:integer rdfs:subClassOf bsfs:Number . # predicates mandated by Nodes bsm:t_created rdfs:subClassOf bsfs:Predicate ; diff --git a/test/graph/test_graph.py b/test/graph/test_graph.py index 125084c..f97783b 100644 --- a/test/graph/test_graph.py +++ b/test/graph/test_graph.py @@ -125,7 +125,8 @@ class TestGraph(unittest.TestCase): prefix bse: bsfs:Entity rdfs:subClassOf bsfs:Node . xsd:string rdfs:subClassOf bsfs:Literal . - xsd:integer rdfs:subClassOf bsfs:Literal . + bsfs:Number rdfs:subClassOf bsfs:Literal . + xsd:integer rdfs:subClassOf bsfs:Number . bse:filename rdfs:subClassOf bsfs:Predicate ; rdfs:domain bsfs:Entity ; @@ -147,7 +148,8 @@ class TestGraph(unittest.TestCase): prefix xsd: prefix bsfs: prefix bsm: - xsd:integer rdfs:subClassOf bsfs:Literal . + bsfs:Number rdfs:subClassOf bsfs:Literal . + xsd:integer rdfs:subClassOf bsfs:Number . bsm:t_created rdfs:subClassOf bsfs:Predicate ; rdfs:domain bsfs:Node ; rdfs:range xsd:integer ; @@ -162,7 +164,8 @@ class TestGraph(unittest.TestCase): prefix bse: bsfs:Entity rdfs:subClassOf bsfs:Node . xsd:string rdfs:subClassOf bsfs:Literal . - xsd:integer rdfs:subClassOf bsfs:Literal . + bsfs:Number rdfs:subClassOf bsfs:Literal . + xsd:integer rdfs:subClassOf bsfs:Number . bse:filename rdfs:subClassOf bsfs:Predicate ; rdfs:domain bsfs:Entity ; @@ -186,7 +189,8 @@ class TestGraph(unittest.TestCase): prefix xsd: prefix bsfs: prefix bsm: - xsd:integer rdfs:subClassOf bsfs:Literal . + bsfs:Number rdfs:subClassOf bsfs:Literal . + xsd:integer rdfs:subClassOf bsfs:Number . bsm:t_created rdfs:subClassOf bsfs:Predicate ; rdfs:domain bsfs:Node ; rdfs:range xsd:integer ; diff --git a/test/graph/test_nodes.py b/test/graph/test_nodes.py index 47647bd..e29ab6a 100644 --- a/test/graph/test_nodes.py +++ b/test/graph/test_nodes.py @@ -37,7 +37,8 @@ class TestNodes(unittest.TestCase): bsfs:Tag rdfs:subClassOf bsfs:Node . bsfs:User rdfs:subClassOf bsfs:Node . xsd:string rdfs:subClassOf bsfs:Literal . - xsd:integer rdfs:subClassOf bsfs:Literal . + bsfs:Number rdfs:subClassOf bsfs:Literal . + xsd:integer rdfs:subClassOf bsfs:Number . # predicates mandated by Nodes bsm:t_created rdfs:subClassOf bsfs:Predicate ; @@ -78,7 +79,8 @@ class TestNodes(unittest.TestCase): (rdflib.URIRef(ns.bsfs.Tag), rdflib.RDFS.subClassOf, rdflib.URIRef(ns.bsfs.Node)), (rdflib.URIRef(ns.bsfs.User), rdflib.RDFS.subClassOf, rdflib.URIRef(ns.bsfs.Node)), (rdflib.URIRef(ns.xsd.string), rdflib.RDFS.subClassOf, rdflib.URIRef(ns.bsfs.Literal)), - (rdflib.URIRef(ns.xsd.integer), rdflib.RDFS.subClassOf, rdflib.URIRef(ns.bsfs.Literal)), + (rdflib.URIRef(ns.bsfs.Number), rdflib.RDFS.subClassOf, rdflib.URIRef(ns.bsfs.Literal)), + (rdflib.URIRef(ns.xsd.integer), rdflib.RDFS.subClassOf, rdflib.URIRef(ns.bsfs.Number)), (rdflib.URIRef(ns.bsm.t_created), rdflib.RDFS.subClassOf, rdflib.URIRef(ns.bsfs.Predicate)), (rdflib.URIRef(ns.bse.comment), rdflib.RDFS.subClassOf, rdflib.URIRef(ns.bsfs.Predicate)), (rdflib.URIRef(ns.bse.filesize), rdflib.RDFS.subClassOf, rdflib.URIRef(ns.bsfs.Predicate)), diff --git a/test/graph/test_resolve.py b/test/graph/test_resolve.py index a27e8c9..0e7da99 100644 --- a/test/graph/test_resolve.py +++ b/test/graph/test_resolve.py @@ -41,7 +41,8 @@ class TestFilter(unittest.TestCase): bsfs:Entity rdfs:subClassOf bsfs:Node . bsfs:Tag rdfs:subClassOf bsfs:Node . xsd:string rdfs:subClassOf bsfs:Literal . - xsd:integer rdfs:subClassOf bsfs:Literal . + bsfs:Number rdfs:subClassOf bsfs:Literal . + xsd:integer rdfs:subClassOf bsfs:Number . bse:comment rdfs:subClassOf bsfs:Predicate ; rdfs:domain bsfs:Entity ; diff --git a/test/query/test_validator.py b/test/query/test_validator.py index 405872c..ea56a57 100644 --- a/test/query/test_validator.py +++ b/test/query/test_validator.py @@ -33,7 +33,8 @@ class TestFilter(unittest.TestCase): bsfs:Tag rdfs:subClassOf bsfs:Node . xsd:string rdfs:subClassOf bsfs:Literal . - xsd:integer rdfs:subClassOf bsfs:Literal . + bsfs:Number rdfs:subClassOf bsfs:Literal . + xsd:integer rdfs:subClassOf bsfs:Number . bse:comment rdfs:subClassOf bsfs:Predicate ; rdfs:domain bsfs:Node ; @@ -247,6 +248,9 @@ class TestFilter(unittest.TestCase): ast.filter.GreaterThan(0)) self.assertRaises(errors.ConsistencyError, self.validate._bounded, self.schema.literal(ns.bsfs.Literal).child(ns.bsfs.Invalid), ast.filter.LessThan(0)) + # type must be a number + self.assertRaises(errors.ConsistencyError, self.validate._bounded, self.schema.literal(ns.xsd.string), + ast.filter.LessThan(0)) # bounded accepts correct expressions self.assertIsNone(self.validate._bounded(self.schema.literal(ns.xsd.integer), ast.filter.LessThan(0))) self.assertIsNone(self.validate._bounded(self.schema.literal(ns.xsd.integer), ast.filter.GreaterThan(0))) diff --git a/test/schema/test_schema.py b/test/schema/test_schema.py index ca21f87..c19c226 100644 --- a/test/schema/test_schema.py +++ b/test/schema/test_schema.py @@ -35,7 +35,8 @@ class TestSchema(unittest.TestCase): bsfs:Unused rdfs:subClassOf bsfs:Node . xsd:string rdfs:subClassOf bsfs:Literal . - xsd:integer rdfs:subClassOf bsfs:Literal . + bsfs:Number rdfs:subClassOf bsfs:Literal . + xsd:integer rdfs:subClassOf bsfs:Number . xsd:boolean rdfs:subClassOf bsfs:Literal . bse:tag rdfs:subClassOf bsfs:Predicate ; @@ -56,18 +57,18 @@ class TestSchema(unittest.TestCase): ''' # nodes self.n_root = types.ROOT_NODE - self.n_ent = types.Node(ns.bsfs.Entity, types.Node(ns.bsfs.Node, None)) - self.n_img = types.Node(ns.bsfs.Image, types.Node(ns.bsfs.Entity, types.Node(ns.bsfs.Node, None))) - self.n_tag = types.Node(ns.bsfs.Tag, types.Node(ns.bsfs.Node, None)) - self.n_unused = types.Node(ns.bsfs.Unused, types.Node(ns.bsfs.Node, None)) + self.n_ent = self.n_root.child(ns.bsfs.Entity) + self.n_img = self.n_ent.child(ns.bsfs.Image) + self.n_tag = self.n_root.child(ns.bsfs.Tag) + self.n_unused = self.n_root.child(ns.bsfs.Unused) self.nodes = [self.n_root, self.n_ent, self.n_img, self.n_tag, self.n_unused] # literals self.l_root = types.ROOT_LITERAL self.l_number = types.ROOT_NUMBER - self.l_string = types.Literal(ns.xsd.string, types.Literal(ns.bsfs.Literal, None)) - self.l_integer = types.Literal(ns.xsd.integer, self.l_number) - self.l_unused = types.Literal(ns.xsd.boolean, types.Literal(ns.bsfs.Literal, None)) + self.l_string = self.l_root.child(ns.xsd.string) + self.l_integer = self.l_root.child(ns.xsd.integer) + self.l_unused = self.l_root.child(ns.xsd.boolean) self.literals = [self.l_root, self.l_number, self.l_string, self.l_integer, self.l_unused] # predicates diff --git a/test/schema/test_serialize.py b/test/schema/test_serialize.py index b9d8599..f46b3a4 100644 --- a/test/schema/test_serialize.py +++ b/test/schema/test_serialize.py @@ -180,7 +180,7 @@ class TestFromString(unittest.TestCase): # a literal can have multiple children l_string = types.ROOT_LITERAL.child(ns.xsd.string) - l_integer = types.ROOT_LITERAL.child(ns.xsd.integer) + l_integer = types.ROOT_NUMBER.child(ns.xsd.integer) l_unsigned = l_integer.child(ns.xsd.unsigned) l_signed = l_integer.child(ns.xsd.signed) self.assertEqual(Schema({}, {}, {l_string, l_integer, l_unsigned, l_signed}), from_string(''' @@ -191,7 +191,8 @@ class TestFromString(unittest.TestCase): # literals inherit from same parent xsd:string rdfs:subClassOf bsfs:Literal . - xsd:integer rdfs:subClassOf bsfs:Literal . + bsfs:Number rdfs:subClassOf bsfs:Literal . + xsd:integer rdfs:subClassOf bsfs:Number . # literals inherit from same parent xsd:unsigned rdfs:subClassOf xsd:integer . @@ -893,7 +894,7 @@ class TestFromString(unittest.TestCase): # literals l_string = types.ROOT_LITERAL.child(ns.xsd.string) l_array = types.ROOT_LITERAL.child(ns.bsfs.array) - l_integer = types.ROOT_LITERAL.child(ns.xsd.integer) + l_integer = types.ROOT_NUMBER.child(ns.xsd.integer) l_boolean = types.ROOT_LITERAL.child(ns.xsd.boolean) # predicates p_annotation = types.ROOT_PREDICATE.child(ns.bsfs.Annotation) @@ -931,7 +932,8 @@ class TestFromString(unittest.TestCase): xsd:string rdfs:subClassOf bsfs:Literal ; rdfs:label "A sequence of characters"^^xsd:string . bsfs:array rdfs:subClassOf bsfs:Literal . - xsd:integer rdfs:subClassOf bsfs:Literal . + bsfs:Number rdfs:subClassOf bsfs:Literal . + xsd:integer rdfs:subClassOf bsfs:Number . xsd:boolean rdfs:subClassOf bsfs:Literal . # abstract predicates diff --git a/test/triple_store/sparql/test_parse_filter.py b/test/triple_store/sparql/test_parse_filter.py index 1d96994..f6842c5 100644 --- a/test/triple_store/sparql/test_parse_filter.py +++ b/test/triple_store/sparql/test_parse_filter.py @@ -35,7 +35,8 @@ class TestParseFilter(unittest.TestCase): bsfs:Tag rdfs:subClassOf bsfs:Node . xsd:string rdfs:subClassOf bsfs:Literal . - xsd:integer rdfs:subClassOf bsfs:Literal . + bsfs:Number rdfs:subClassOf bsfs:Literal . + xsd:integer rdfs:subClassOf bsfs:Number . bsfs:URI rdfs:subClassOf bsfs:Literal . bse:comment rdfs:subClassOf bsfs:Predicate ; diff --git a/test/triple_store/sparql/test_sparql.py b/test/triple_store/sparql/test_sparql.py index 5342925..5b71016 100644 --- a/test/triple_store/sparql/test_sparql.py +++ b/test/triple_store/sparql/test_sparql.py @@ -33,7 +33,8 @@ class TestSparqlStore(unittest.TestCase): bsfs:Tag rdfs:subClassOf bsfs:Node . bsfs:User rdfs:subClassOf bsfs:Node . xsd:string rdfs:subClassOf bsfs:Literal . - xsd:integer rdfs:subClassOf bsfs:Literal . + bsfs:Number rdfs:subClassOf bsfs:Literal . + xsd:integer rdfs:subClassOf bsfs:Number . # non-unique literal bse:comment rdfs:subClassOf bsfs:Predicate ; @@ -66,7 +67,8 @@ class TestSparqlStore(unittest.TestCase): (rdflib.URIRef(ns.bsfs.Tag), rdflib.RDFS.subClassOf, rdflib.URIRef(ns.bsfs.Node)), (rdflib.URIRef(ns.bsfs.User), rdflib.RDFS.subClassOf, rdflib.URIRef(ns.bsfs.Node)), (rdflib.URIRef(ns.xsd.string), rdflib.RDFS.subClassOf, rdflib.URIRef(ns.bsfs.Literal)), - (rdflib.URIRef(ns.xsd.integer), rdflib.RDFS.subClassOf, rdflib.URIRef(ns.bsfs.Literal)), + (rdflib.URIRef(ns.bsfs.Number), rdflib.RDFS.subClassOf, rdflib.URIRef(ns.bsfs.Literal)), + (rdflib.URIRef(ns.xsd.integer), rdflib.RDFS.subClassOf, rdflib.URIRef(ns.bsfs.Number)), (rdflib.URIRef(ns.bse.comment), rdflib.RDFS.subClassOf, rdflib.URIRef(ns.bsfs.Predicate)), (rdflib.URIRef(ns.bse.filesize), rdflib.RDFS.subClassOf, rdflib.URIRef(ns.bsfs.Predicate)), (rdflib.URIRef(ns.bse.tag), rdflib.RDFS.subClassOf, rdflib.URIRef(ns.bsfs.Predicate)), @@ -310,7 +312,8 @@ class TestSparqlStore(unittest.TestCase): bsfs:User rdfs:subClassOf bsfs:Node . xsd:boolean rdfs:subClassOf bsfs:Literal . - xsd:integer rdfs:subClassOf bsfs:Literal . + bsfs:Number rdfs:subClassOf bsfs:Literal . + xsd:integer rdfs:subClassOf bsfs:Number . bse:filesize rdfs:subClassOf bsfs:Predicate ; rdfs:domain bsfs:Entity ; @@ -351,7 +354,8 @@ class TestSparqlStore(unittest.TestCase): (rdflib.URIRef(ns.bsfs.Tag), rdflib.RDFS.subClassOf, rdflib.URIRef(ns.bsfs.Node)), (rdflib.URIRef(ns.bsfs.User), rdflib.RDFS.subClassOf, rdflib.URIRef(ns.bsfs.Node)), (rdflib.URIRef(ns.xsd.boolean), rdflib.RDFS.subClassOf, rdflib.URIRef(ns.bsfs.Literal)), - (rdflib.URIRef(ns.xsd.integer), rdflib.RDFS.subClassOf, rdflib.URIRef(ns.bsfs.Literal)), + (rdflib.URIRef(ns.bsfs.Number), rdflib.RDFS.subClassOf, rdflib.URIRef(ns.bsfs.Literal)), + (rdflib.URIRef(ns.xsd.integer), rdflib.RDFS.subClassOf, rdflib.URIRef(ns.bsfs.Number)), (rdflib.URIRef(ns.bse.shared), rdflib.RDFS.subClassOf, rdflib.URIRef(ns.bsfs.Predicate)), (rdflib.URIRef(ns.bse.tag), rdflib.RDFS.subClassOf, rdflib.URIRef(ns.bsfs.Predicate)), (rdflib.URIRef(ns.bse.filesize), rdflib.RDFS.subClassOf, rdflib.URIRef(ns.bsfs.Predicate)), -- cgit v1.2.3 From e708016ae366e96051281f3a744af35a8c06d98b Mon Sep 17 00:00:00 2001 From: Matthias Baumgartner Date: Thu, 12 Jan 2023 10:28:16 +0100 Subject: cleanup and cosmetic changes --- bsfs/schema/__init__.py | 3 ++- bsfs/schema/schema.py | 2 -- bsfs/schema/serialize.py | 17 +++++++++-------- bsfs/schema/types.py | 8 ++++---- bsfs/triple_store/sparql/sparql.py | 6 +++--- test/graph/test_nodes.py | 1 + test/graph/test_resolve.py | 2 +- test/triple_store/sparql/test_sparql.py | 2 ++ 8 files changed, 22 insertions(+), 19 deletions(-) diff --git a/bsfs/schema/__init__.py b/bsfs/schema/__init__.py index 5162a01..31d7d61 100644 --- a/bsfs/schema/__init__.py +++ b/bsfs/schema/__init__.py @@ -10,7 +10,8 @@ import typing # inner-module imports from .schema import Schema from .serialize import from_string, to_string -from .types import Literal, Node, Predicate, Vertex, ROOT_FEATURE, ROOT_LITERAL, ROOT_NODE, ROOT_NUMBER, ROOT_PREDICATE, ROOT_VERTEX +from .types import Literal, Node, Predicate, Vertex, \ + ROOT_FEATURE, ROOT_LITERAL, ROOT_NODE, ROOT_NUMBER, ROOT_PREDICATE, ROOT_VERTEX # exports __all__: typing.Sequence[str] = ( diff --git a/bsfs/schema/schema.py b/bsfs/schema/schema.py index 80cb58a..52ad191 100644 --- a/bsfs/schema/schema.py +++ b/bsfs/schema/schema.py @@ -7,10 +7,8 @@ Author: Matthias Baumgartner, 2022 # imports from collections import abc, namedtuple import typing -import rdflib # bsfs imports -from bsfs.namespace import ns from bsfs.utils import errors, URI, typename # inner-module imports diff --git a/bsfs/schema/serialize.py b/bsfs/schema/serialize.py index c1ac9a9..0eb6628 100644 --- a/bsfs/schema/serialize.py +++ b/bsfs/schema/serialize.py @@ -5,7 +5,6 @@ A copy of the license is provided with the project. Author: Matthias Baumgartner, 2022 """ # standard imports -from collections import abc import itertools import typing @@ -42,7 +41,7 @@ def from_string(schema_str: str) -> schema.Schema: return value.value if isinstance(value, rdflib.URIRef): return URI(value) - raise errors.BackendError(f'expected Literal or URIRef, found {typename(value)}') + raise errors.UnreachableError(f'expected Literal or URIRef, found {typename(value)}') def _fetch_hierarchically(factory, curr): """Walk through a rdfs:subClassOf hierarchy, creating symbols along the way.""" @@ -80,14 +79,16 @@ def from_string(schema_str: str) -> schema.Schema: # fetch predicates # FIXME: type annotation def _fetch_value(subject: URI, predicate: rdflib.URIRef, value_factory) -> typing.Optional[typing.Any]: - """Fetch the object of a given subject and predicate. Raises a `errors.ConsistencyError` if multiple objects match.""" + """Fetch the object of a given subject and predicate. + Raises a `errors.ConsistencyError` if multiple objects match. + """ values = list(graph.objects(rdflib.URIRef(subject), predicate)) if len(values) == 0: return None - elif len(values) == 1: + if len(values) == 1: return value_factory(values[0]) - else: - raise errors.ConsistencyError(f'{subject} has multiple values for predicate {str(predicate)}, expected zero or one') + raise errors.ConsistencyError( + f'{subject} has multiple values for predicate {str(predicate)}, expected zero or one') def _build_predicate(uri, parent, **annotations): """Predicate factory.""" @@ -102,13 +103,13 @@ def from_string(schema_str: str) -> schema.Schema: dom = _fetch_value(uri, rdflib.RDFS.domain, URI) if dom is not None and dom not in nodes_lut: raise errors.ConsistencyError(f'predicate {uri} has undefined domain {dom}') - elif dom is not None: + if dom is not None: dom = nodes_lut[dom] # get range rng = _fetch_value(uri, rdflib.RDFS.range, URI) if rng is not None and rng not in nodes_lut and rng not in literals_lut: raise errors.ConsistencyError(f'predicate {uri} has undefined range {rng}') - elif rng is not None: + if rng is not None: rng = nodes_lut.get(rng, literals_lut.get(rng)) # get unique unique = _fetch_value(uri, rdflib.URIRef(ns.bsfs.unique), bool) diff --git a/bsfs/schema/types.py b/bsfs/schema/types.py index 4f49efe..6257dee 100644 --- a/bsfs/schema/types.py +++ b/bsfs/schema/types.py @@ -238,7 +238,7 @@ class Predicate(_Type): self, # Type members uri: URI, - parent: '_PredicateBase', + parent: typing.Optional['Predicate'], # Predicate members domain: Node, range: Vertex, # pylint: disable=redefined-builtin @@ -312,10 +312,10 @@ class Feature(Predicate): self, # Type members uri: URI, - parent: Predicate, + parent: typing.Optional[Predicate], # Predicate members domain: Node, - range: Literal, + range: Literal, # pylint: disable=redefined-builtin unique: bool, # Feature members dimension: int, @@ -341,7 +341,7 @@ class Feature(Predicate): self, uri: URI, domain: typing.Optional[Node] = None, - range: typing.Optional[Literal] = None, # pylint: disable=redefined-builtin + range: typing.Optional[Vertex] = None, # pylint: disable=redefined-builtin unique: typing.Optional[bool] = None, dimension: typing.Optional[int] = None, dtype: typing.Optional[URI] = None, diff --git a/bsfs/triple_store/sparql/sparql.py b/bsfs/triple_store/sparql/sparql.py index 87467ff..3877d1a 100644 --- a/bsfs/triple_store/sparql/sparql.py +++ b/bsfs/triple_store/sparql/sparql.py @@ -139,7 +139,7 @@ class SparqlStore(base.TripleStoreBase): for src, trg in self._graph.subject_objects(rdflib.URIRef(pred.uri)): self._transaction.remove((src, rdflib.URIRef(pred.uri), trg)) # remove predicate definition - if pred.parent is not None: + if pred.parent is not None: # NOTE: there shouldn't be any predicate w/o parent self._transaction.remove(( rdflib.URIRef(pred.uri), rdflib.RDFS.subClassOf, @@ -159,7 +159,7 @@ class SparqlStore(base.TripleStoreBase): # remove instance self._transaction.remove((inst, rdflib.RDF.type, rdflib.URIRef(node.uri))) # remove node definition - if node.parent is not None: + if node.parent is not None: # NOTE: there shouldn't be any node w/o parent self._transaction.remove(( rdflib.URIRef(node.uri), rdflib.RDFS.subClassOf, @@ -168,7 +168,7 @@ class SparqlStore(base.TripleStoreBase): for lit in sub.literals: # remove literal definition - if lit.parent is not None: + if lit.parent is not None: # NOTE: there shouldn't be any literal w/o parent self._transaction.remove(( rdflib.URIRef(lit.uri), rdflib.RDFS.subClassOf, diff --git a/test/graph/test_nodes.py b/test/graph/test_nodes.py index e29ab6a..81da60f 100644 --- a/test/graph/test_nodes.py +++ b/test/graph/test_nodes.py @@ -75,6 +75,7 @@ class TestNodes(unittest.TestCase): ''') self.schema_triples = { # schema hierarchy + (rdflib.URIRef(ns.bsfs.Feature), rdflib.RDFS.subClassOf, rdflib.URIRef(ns.bsfs.Predicate)), (rdflib.URIRef(ns.bsfs.Entity), rdflib.RDFS.subClassOf, rdflib.URIRef(ns.bsfs.Node)), (rdflib.URIRef(ns.bsfs.Tag), rdflib.RDFS.subClassOf, rdflib.URIRef(ns.bsfs.Node)), (rdflib.URIRef(ns.bsfs.User), rdflib.RDFS.subClassOf, rdflib.URIRef(ns.bsfs.Node)), diff --git a/test/graph/test_resolve.py b/test/graph/test_resolve.py index 0e7da99..9cde38e 100644 --- a/test/graph/test_resolve.py +++ b/test/graph/test_resolve.py @@ -150,7 +150,7 @@ class TestFilter(unittest.TestCase): ast.filter.Predicate(ns.bse.comment)) self.assertRaises(errors.BackendError, resolver, schema.node(ns.bsfs.Tag), ast.filter.Any(ast.filter.PredicateExpression(), ast.filter.Equals('foo'))) - self.assertRaises(errors.UnreachableError, resolver._one_of, ast.filter.OneOf(ast.filter.Predicate(ns.bsfs.Predicate))) + self.assertRaises(errors.BackendError, resolver._one_of, ast.filter.OneOf(ast.filter.Predicate(ns.bsfs.Predicate))) # check schema consistency self.assertRaises(errors.ConsistencyError, resolver, schema.node(ns.bsfs.Tag), diff --git a/test/triple_store/sparql/test_sparql.py b/test/triple_store/sparql/test_sparql.py index 5b71016..aa5dfc7 100644 --- a/test/triple_store/sparql/test_sparql.py +++ b/test/triple_store/sparql/test_sparql.py @@ -63,6 +63,7 @@ class TestSparqlStore(unittest.TestCase): ''') self.schema_triples = { # schema hierarchy + (rdflib.URIRef(ns.bsfs.Feature), rdflib.RDFS.subClassOf, rdflib.URIRef(ns.bsfs.Predicate)), (rdflib.URIRef(ns.bsfs.Entity), rdflib.RDFS.subClassOf, rdflib.URIRef(ns.bsfs.Node)), (rdflib.URIRef(ns.bsfs.Tag), rdflib.RDFS.subClassOf, rdflib.URIRef(ns.bsfs.Node)), (rdflib.URIRef(ns.bsfs.User), rdflib.RDFS.subClassOf, rdflib.URIRef(ns.bsfs.Node)), @@ -350,6 +351,7 @@ class TestSparqlStore(unittest.TestCase): # instances of old classes were removed self.assertSetEqual(set(store._graph), { # schema hierarchy + (rdflib.URIRef(ns.bsfs.Feature), rdflib.RDFS.subClassOf, rdflib.URIRef(ns.bsfs.Predicate)), (rdflib.URIRef(ns.bsfs.Entity), rdflib.RDFS.subClassOf, rdflib.URIRef(ns.bsfs.Node)), (rdflib.URIRef(ns.bsfs.Tag), rdflib.RDFS.subClassOf, rdflib.URIRef(ns.bsfs.Node)), (rdflib.URIRef(ns.bsfs.User), rdflib.RDFS.subClassOf, rdflib.URIRef(ns.bsfs.Node)), -- cgit v1.2.3 From 1b7ef16c3795bb7112683662b8c22a774e219269 Mon Sep 17 00:00:00 2001 From: Matthias Baumgartner Date: Thu, 12 Jan 2023 16:57:58 +0100 Subject: schema to string --- bsfs/schema/schema.py | 2 + bsfs/schema/serialize.py | 104 ++++++++++++++++++++++++- test/schema/test_serialize.py | 173 +++++++++++++++++++++++++++++++++++++++++- 3 files changed, 274 insertions(+), 5 deletions(-) diff --git a/bsfs/schema/schema.py b/bsfs/schema/schema.py index 52ad191..bc50d4e 100644 --- a/bsfs/schema/schema.py +++ b/bsfs/schema/schema.py @@ -72,6 +72,8 @@ class Schema(): literals.add(types.ROOT_NUMBER) predicates.add(types.ROOT_FEATURE) + # FIXME: ensure that types derive from the right root? + # include parents in predicates set # TODO: review type annotations and ignores for python >= 3.11 (parents is _Type but should be typing.Self) predicates |= {par for pred in predicates for par in pred.parents()} # type: ignore [misc] diff --git a/bsfs/schema/serialize.py b/bsfs/schema/serialize.py index 0eb6628..a566d65 100644 --- a/bsfs/schema/serialize.py +++ b/bsfs/schema/serialize.py @@ -136,9 +136,107 @@ def from_string(schema_str: str) -> schema.Schema: -def to_string(schema_inst: schema.Schema) -> str: +def to_string(schema_inst: schema.Schema, fmt: str = 'turtle') -> str: + """Serialize a `bsfs.schema.Schema` to a string. + See `rdflib.Graph.serialize` for viable formats (default: turtle). """ - """ - raise NotImplementedError() + + # type of emitted triples. + T_TRIPLE = typing.Iterator[typing.Tuple[rdflib.URIRef, rdflib.URIRef, rdflib.term.Identifier]] + + def _type(tpe: types._Type) -> T_TRIPLE : + """Emit _Type properties (parent, annotations).""" + # emit parent + if tpe.parent is not None: + yield ( + rdflib.URIRef(tpe.uri), + rdflib.URIRef(ns.rdfs.subClassOf), + rdflib.URIRef(tpe.parent.uri), + ) + # emit annotations + for prop, value in tpe.annotations.items(): + yield ( + rdflib.URIRef(tpe.uri), + rdflib.URIRef(prop), + rdflib.Literal(value), # FIXME: datatype?! + ) + + def _predicate(pred: types.Predicate) -> T_TRIPLE: + """Emit Predicate properties (domain, range, unique).""" + # no need to emit anything for the root predicate + if pred == types.ROOT_PREDICATE: + return + # emit domain + if pred.domain != getattr(pred.parent, 'domain', None): + yield ( + rdflib.URIRef(pred.uri), + rdflib.URIRef(ns.rdfs.domain), + rdflib.URIRef(pred.domain.uri), + ) + # emit range + if pred.range != getattr(pred.parent, 'range', None): + yield ( + rdflib.URIRef(pred.uri), + rdflib.URIRef(ns.rdfs.range), + rdflib.URIRef(pred.range.uri), + ) + # emit cardinality + if pred.unique != getattr(pred.parent, 'unique', None): + yield ( + rdflib.URIRef(pred.uri), + rdflib.URIRef(ns.bsfs.unique), + rdflib.Literal(pred.unique, datatype=rdflib.XSD.boolean), + ) + + def _feature(feat: types.Feature) -> T_TRIPLE: + """Emit Feature properties (dimension, dtype, distance).""" + # emit size + if feat.dimension != getattr(feat.parent, 'dimension', None): + yield ( + rdflib.URIRef(feat.uri), + rdflib.URIRef(ns.bsfs.dimension), + rdflib.Literal(feat.dimension, datatype=rdflib.XSD.integer), + ) + # emit dtype + if feat.dtype != getattr(feat.parent, 'dtype', None): + yield ( + rdflib.URIRef(feat.uri), + rdflib.URIRef(ns.bsfs.dtype), + rdflib.URIRef(feat.dtype), + ) + # emit distance + if feat.distance != getattr(feat.parent, 'distance', None): + yield ( + rdflib.URIRef(feat.uri), + rdflib.URIRef(ns.bsfs.distance), + rdflib.URIRef(feat.distance), + ) + + def _parse(node: types._Type) -> T_TRIPLE: + """Emit all properties of a type.""" + if isinstance(node, types._Type): # pylint: disable=protected-access + # NOTE: all nodes are _Type + yield from _type(node) + if isinstance(node, types.Predicate): + yield from _predicate(node) + if isinstance(node, types.Feature): + yield from _feature(node) + + # create graph + graph = rdflib.Graph() + # add triples to graph + nodes = itertools.chain( + schema_inst.nodes(), + schema_inst.literals(), + schema_inst.predicates()) + for node in nodes: + for triple in _parse(node): + graph.add(triple) + # add known namespaces for readability + # FIXME: more systematically (e.g. for all in ns?) + graph.bind('bsfs', rdflib.URIRef('http://bsfs.ai/schema/')) + graph.bind('bse', rdflib.URIRef('http://bsfs.ai/schema/Entity#')) + # serialize to turtle + return graph.serialize(format=fmt) ## EOF ## diff --git a/test/schema/test_serialize.py b/test/schema/test_serialize.py index f46b3a4..205150a 100644 --- a/test/schema/test_serialize.py +++ b/test/schema/test_serialize.py @@ -5,6 +5,7 @@ A copy of the license is provided with the project. Author: Matthias Baumgartner, 2022 """ # imports +import re import unittest # bsfs imports @@ -997,8 +998,176 @@ class TestFromString(unittest.TestCase): class TestToString(unittest.TestCase): - def test_stub(self): - raise NotImplementedError() + + def test_empty(self): + self.assertEqual(Schema(), from_string(to_string(Schema()))) + + def test_literal(self): + # root literals + l_str = types.ROOT_LITERAL.child(ns.xsd.string) + # derived literals + l_int = types.ROOT_NUMBER.child(ns.xsd.integer) + l_unsigned = l_int.child(ns.xsd.unsigned) + # create schema + schema = Schema(literals={l_int, l_str, l_unsigned}) + + schema_str = to_string(schema) + # all symbols are serialized + self.assertIn('xsd:string', schema_str) + self.assertIn('xsd:integer', schema_str) + self.assertIn('xsd:unsigned', schema_str) + # unserialize yields the original schema + self.assertEqual(schema, from_string(schema_str)) + + # literals that have no parent are ignored + schema = Schema(literals={types.Literal(ns.bsfs.Invalid, None)}) + self.assertEqual(Schema(), from_string(to_string(schema))) + self.assertNotIn('Invalid', to_string(schema)) + + # literal annotations are serialized + annotations = { + ns.rdfs.label: 'hello world', + ns.schema.description: 'some text', + ns.bsfs.foo: 1234, + ns.bsfs.bar: True, + } + l_str = types.ROOT_LITERAL.child(ns.xsd.string, **annotations) + self.assertDictEqual( + annotations, + from_string(to_string(Schema(literals={l_str}))).literal(ns.xsd.string).annotations) + + + def test_node(self): + # root nodes + n_ent = types.ROOT_NODE.child(ns.bsfs.Entity) + n_tag = types.ROOT_NODE.child(ns.bsfs.Tag) + # derived nodes + n_img = n_ent.child(ns.bsfs.Image) + n_doc = n_ent.child(ns.bsfs.Document) + n_grp = n_tag.child(ns.bsfs.Group) + # create schema + schema = Schema(nodes={n_ent, n_img, n_doc, n_tag, n_grp}) + + schema_str = to_string(schema) + # all symbols are serialized + self.assertIn('bsfs:Entity', schema_str) + self.assertIn('bsfs:Tag', schema_str) + self.assertIn('bsfs:Image', schema_str) + self.assertIn('bsfs:Document', schema_str) + self.assertIn('bsfs:Group', schema_str) + # unserialize yields the original schema + self.assertEqual(schema, from_string(schema_str)) + + # nodes that have no parent are ignored + schema = Schema(nodes={types.Node(ns.bsfs.Invalid, None)}) + self.assertEqual(Schema(), from_string(to_string(schema))) + self.assertNotIn('Invalid', to_string(schema)) + + # node annotations are serialized + annotations = { + ns.rdfs.label: 'hello world', + ns.schema.description: 'some text', + ns.bsfs.foo: 1234, + ns.bsfs.bar: True, + } + n_ent = types.ROOT_NODE.child(ns.bsfs.Entity, **annotations) + self.assertDictEqual( + annotations, + from_string(to_string(Schema(nodes={n_ent}))).node(ns.bsfs.Entity).annotations) + + + def test_predicate(self): + # auxiliary types + n_ent = types.ROOT_NODE.child(ns.bsfs.Entity) + l_str = types.ROOT_LITERAL.child(ns.xsd.string) + # root predicates + p_annotation = types.ROOT_PREDICATE.child(ns.bsfs.Annotation, domain=n_ent) + p_owner = types.ROOT_PREDICATE.child(ns.bse.owner, range=l_str, unique=True) + # derived predicates + p_comment = p_annotation.child(ns.bse.comment, range=l_str) # inherits domain + p_note = p_comment.child(ns.bse.note, unique=True) # inherits domain/range + # create schema + schema = Schema({p_owner, p_comment, p_note}) + + schema_str = to_string(schema) + # all symbols are serialized + self.assertIn('bsfs:Entity', schema_str) + self.assertIn('xsd:string', schema_str) + self.assertIn('bsfs:Annotation', schema_str) + self.assertIn('bse:comment', schema_str) + self.assertIn('bse:owner', schema_str) + self.assertIn('bse:note', schema_str) + # inherited properties are not serialized + self.assertIsNotNone(re.search(r'bse:comment[^\.]*rdfs:range[^\.]', schema_str)) + self.assertIsNone(re.search(r'bse:comment[^\.]*rdfs:domain[^\.]', schema_str)) + #p_note has no domain/range + self.assertIsNone(re.search(r'bse:note[^\.]*rdfs:domain[^\.]', schema_str)) + self.assertIsNone(re.search(r'bse:note[^\.]*rdfs:range[^\.]', schema_str)) + # unserialize yields the original schema + self.assertEqual(schema, from_string(schema_str)) + + # predicate annotations are serialized + annotations = { + ns.rdfs.label: 'hello world', + ns.schema.description: 'some text', + ns.bsfs.foo: 1234, + ns.bsfs.bar: False, + } + p_annotation = types.ROOT_PREDICATE.child(ns.bsfs.Annotation, **annotations) + self.assertDictEqual( + annotations, + from_string(to_string(Schema({p_annotation}))).predicate(ns.bsfs.Annotation).annotations) + + + def test_feature(self): + # auxiliary types + n_ent = types.ROOT_NODE.child(ns.bsfs.Entity) + l_array = types.ROOT_LITERAL.child(ns.bsfs.array) + # root features + f_colors = types.ROOT_FEATURE.child(URI('http://bsfs.ai/schema/Feature/colors'), + range=l_array, unique=True, distance=ns.bsfs.cosine) + # derived features + f_colors1234 = f_colors.child(URI('http://bsfs.ai/schema/Feature/colors#1234'), + dimension=1024, domain=n_ent) # inherits range/dtype/distance + f_colors4321 = f_colors.child(URI('http://bsfs.ai/schema/Feature/colors#4321'), + dimension=2048, distance=ns.bsfs.euclidean) # inherits domain/range/dtype + # create schema + schema = Schema({f_colors, f_colors1234, f_colors4321}) + + schema_str = to_string(schema) + # all symbols are serialized + self.assertIn('bsfs:Entity', schema_str) + self.assertIn('bsfs:array', schema_str) + self.assertIn('[^\.]*rdfs:domain[^\.]', schema_str)) + self.assertIsNotNone(re.search(r'[^\.]*bsfs:dimension[^\.]', schema_str)) + self.assertIsNone(re.search(r'[^\.]*rdfs:range[^\.]', schema_str)) + self.assertIsNone(re.search(r'[^\.]*bsfs:dtype[^\.]', schema_str)) + self.assertIsNone(re.search(r'[^\.]*bsfs:distance[^\.]', schema_str)) + self.assertIsNotNone(re.search(r'[^\.]*bsfs:dimension[^\.]', schema_str)) + self.assertIsNotNone(re.search(r'[^\.]*bsfs:distance[^\.]', schema_str)) + self.assertIsNone(re.search(r'[^\.]*rdfs:domain[^\.]', schema_str)) + self.assertIsNone(re.search(r'[^\.]*rdfs:range[^\.]', schema_str)) + self.assertIsNone(re.search(r'[^\.]*bsfs:dtype[^\.]', schema_str)) + # unserialize yields the original schema + self.assertEqual(schema, from_string(schema_str)) + + # predicate annotations are serialized + annotations = { + ns.rdfs.label: 'hello world', + ns.schema.description: 'some text', + ns.bsfs.foo: 1234, + ns.bsfs.bar: False, + } + f_colors = types.ROOT_FEATURE.child(URI('http://bsfs.ai/schema/Feature/colors'), + domain=n_ent, range=l_array, unique=True, dtype=ns.bsfs.f16, distance=ns.bsfs.euclidean, + **annotations) + self.assertDictEqual( + annotations, + from_string(to_string(Schema({f_colors}))).predicate(URI('http://bsfs.ai/schema/Feature/colors')).annotations) ## main ## -- cgit v1.2.3 From 60257ed3c2aa6ea2891f362a691bde9d7ef17831 Mon Sep 17 00:00:00 2001 From: Matthias Baumgartner Date: Fri, 13 Jan 2023 12:22:34 +0100 Subject: schema type comparison across classes --- bsfs/graph/resolve.py | 10 +++++----- bsfs/query/validator.py | 31 ++++++++++++------------------ bsfs/schema/types.py | 16 ++++++++++++---- bsfs/triple_store/sparql/parse_filter.py | 15 ++++++--------- test/schema/test_types.py | 33 ++++++++++++++++---------------- 5 files changed, 52 insertions(+), 53 deletions(-) diff --git a/bsfs/graph/resolve.py b/bsfs/graph/resolve.py index 9b5f631..b671204 100644 --- a/bsfs/graph/resolve.py +++ b/bsfs/graph/resolve.py @@ -96,11 +96,11 @@ class Filter(): # parse child expression subrng = self._parse_predicate_expression(pred) # determine the next type - try: - if rng is None or subrng > rng: # pick most generic range - rng = subrng - except TypeError as err: - raise errors.ConsistencyError(f'ranges {subrng} and {rng} are not related') from err + if rng is None or subrng > rng: # pick most generic range + rng = subrng + # check range consistency + if not subrng <= rng and not subrng >= rng: + raise errors.ConsistencyError(f'ranges {subrng} and {rng} are not related') if not isinstance(rng, (bsc.Node, bsc.Literal)): raise errors.BackendError(f'the range of node {node} is undefined') return rng diff --git a/bsfs/query/validator.py b/bsfs/query/validator.py index 75b51ca..ecea951 100644 --- a/bsfs/query/validator.py +++ b/bsfs/query/validator.py @@ -113,25 +113,18 @@ class Filter(): for pred in node: # parse child expression subdom, subrng = self._parse_predicate_expression(pred) - try: - # determine overall domain - if dom is None or subdom < dom: # pick most specific domain - dom = subdom - # domains must be related across all child expressions - if not subdom <= dom and not subdom >= dom: - raise errors.ConsistencyError(f'domains {subdom} and {dom} are not related') - except TypeError as err: # compared literal vs. node - raise errors.ConsistencyError(f'domains {subdom} and {dom} are not of the same type') from err - - try: - # determine overall range - if rng is None or subrng > rng: # pick most generic range - rng = subrng - # ranges must be related across all child expressions - if not subrng <= rng and not subrng >= rng: - raise errors.ConsistencyError(f'ranges {subrng} and {rng} are not related') - except TypeError as err: # compared literal vs. node - raise errors.ConsistencyError(f'ranges {subrng} and {rng} are not of the same type') from err + # determine overall domain + if dom is None or subdom < dom: # pick most specific domain + dom = subdom + # domains must be related across all child expressions + if not subdom <= dom and not subdom >= dom: + raise errors.ConsistencyError(f'domains {subdom} and {dom} are not related') + # determine overall range + if rng is None or subrng > rng: # pick most generic range + rng = subrng + # ranges must be related across all child expressions + if not subrng <= rng and not subrng >= rng: + raise errors.ConsistencyError(f'ranges {subrng} and {rng} are not related') # OneOf guarantees at least one expression, dom and rng are always bsc.Vertex. # mypy does not realize this, hence we ignore the warning. return dom, rng # type: ignore [return-value] diff --git a/bsfs/schema/types.py b/bsfs/schema/types.py index 6257dee..95dc66a 100644 --- a/bsfs/schema/types.py +++ b/bsfs/schema/types.py @@ -150,8 +150,10 @@ class _Type(): def __lt__(self, other: typing.Any) -> bool: """Return True iff *self* is a true subclass of *other*.""" - if not isinstance(other, type(self)): + if not isinstance(other, _Type): return NotImplemented + if not isinstance(other, type(self)): # FIXME: necessary? + return False if self.uri == other.uri: # equivalence return False if self in other.parents(): # superclass @@ -163,8 +165,10 @@ class _Type(): def __le__(self, other: typing.Any) -> bool: """Return True iff *self* is equivalent or a subclass of *other*.""" - if not isinstance(other, type(self)): + if not isinstance(other, _Type): return NotImplemented + if not isinstance(other, type(self)): # FIXME: necessary? + return False if self.uri == other.uri: # equivalence return True if self in other.parents(): # superclass @@ -176,8 +180,10 @@ class _Type(): def __gt__(self, other: typing.Any) -> bool: """Return True iff *self* is a true superclass of *other*.""" - if not isinstance(other, type(self)): + if not isinstance(other, _Type): return NotImplemented + if not isinstance(other, type(self)): # FIXME: necessary? + return False if self.uri == other.uri: # equivalence return False if self in other.parents(): # superclass @@ -189,8 +195,10 @@ class _Type(): def __ge__(self, other: typing.Any) -> bool: """Return True iff *self* is eqiuvalent or a superclass of *other*.""" - if not isinstance(other, type(self)): + if not isinstance(other, _Type): return NotImplemented + if not isinstance(other, type(self)): # FIXME: necessary? + return False if self.uri == other.uri: # equivalence return True if self in other.parents(): # superclass diff --git a/bsfs/triple_store/sparql/parse_filter.py b/bsfs/triple_store/sparql/parse_filter.py index 18a3288..5d8a2d9 100644 --- a/bsfs/triple_store/sparql/parse_filter.py +++ b/bsfs/triple_store/sparql/parse_filter.py @@ -131,15 +131,12 @@ class Filter(): puri, subrng = self._parse_predicate_expression(node_type, pred) # track predicate uris suburi.add(puri) - try: - # check for more generic range - if rng is None or subrng > rng: - rng = subrng - # check range consistency - if not subrng <= rng and not subrng >= rng: - raise errors.ConsistencyError(f'ranges {subrng} and {rng} are not related') - except TypeError as err: # subrng and rng are not comparable - raise errors.ConsistencyError(f'ranges {subrng} and {rng} are not related') from err + # check for more generic range + if rng is None or subrng > rng: + rng = subrng + # check range consistency + if not subrng <= rng and not subrng >= rng: + raise errors.ConsistencyError(f'ranges {subrng} and {rng} are not related') # return joint predicate expression and next range # OneOf guarantees at least one expression, rng is always a bsc.Vertex. # mypy does not realize this, hence we ignore the warning. diff --git a/test/schema/test_types.py b/test/schema/test_types.py index 26da270..1eeafa1 100644 --- a/test/schema/test_types.py +++ b/test/schema/test_types.py @@ -140,30 +140,31 @@ class TestType(unittest.TestCase): self.assertFalse(bike >= bicycle) self.assertFalse(bike == bicycle) - # can compare types along the class hierarchy + # comparing different classes returns False ... + # ... when classes are hierarchically related class Foo(_Type): pass foo = Foo('Foo', bike) - self.assertTrue(foo < bike) - self.assertTrue(foo <= bike) + self.assertFalse(foo < bike) + self.assertFalse(foo <= bike) self.assertFalse(foo > bike) self.assertFalse(foo >= bike) # goes both ways self.assertFalse(bike < foo) self.assertFalse(bike <= foo) - self.assertTrue(bike > foo) - self.assertTrue(bike >= foo) - # cannot compare unrelated classes + self.assertFalse(bike > foo) + self.assertFalse(bike >= foo) + # ... when classes are unrelated class Bar(_Type): pass bar = Bar('Bar', bike) - self.assertRaises(TypeError, operator.lt, foo, bar) - self.assertRaises(TypeError, operator.le, foo, bar) - self.assertRaises(TypeError, operator.gt, foo, bar) - self.assertRaises(TypeError, operator.ge, foo, bar) + self.assertFalse(foo < bar) + self.assertFalse(foo <= bar) + self.assertFalse(foo > bar) + self.assertFalse(foo >= bar) # goes both ways - self.assertRaises(TypeError, operator.lt, bar, foo) - self.assertRaises(TypeError, operator.le, bar, foo) - self.assertRaises(TypeError, operator.gt, bar, foo) - self.assertRaises(TypeError, operator.ge, bar, foo) + self.assertFalse(bar < foo) + self.assertFalse(bar <= foo) + self.assertFalse(bar > foo) + self.assertFalse(bar >= foo) class TestPredicate(unittest.TestCase): @@ -262,7 +263,7 @@ class TestPredicate(unittest.TestCase): # range must be subtype of parent's range self.assertRaises(errors.ConsistencyError, tag.child, ns.bse.foo, range=n_root) self.assertRaises(errors.ConsistencyError, tag.child, ns.bse.foo, range=Node(ns.bsfs.Image, n_root)) - self.assertRaises(TypeError, tag.child, ns.bse.foo, range=Literal(ns.bsfs.Tag, l_root)) + self.assertRaises(errors.ConsistencyError, tag.child, ns.bse.foo, range=Literal(ns.bsfs.Tag, l_root)) # range can be subtyped from ROOT_VERTEX to Node or Literal self.assertEqual(n_root, root.child(ns.bse.foo, range=n_root).range) self.assertEqual(l_root, root.child(ns.bse.foo, range=l_root).range) @@ -370,7 +371,7 @@ class TestFeature(unittest.TestCase): # range must be subtype of parent's range self.assertRaises(errors.ConsistencyError, colors.child, ns.bse.foo, range=Literal(ns.bsfs.Literal, None)) self.assertRaises(errors.ConsistencyError, colors.child, ns.bse.foo, range=Literal(ns.bsfs.foo, Literal(ns.bsfs.Literal, None))) - self.assertRaises(TypeError, colors.child, ns.bse.foo, range=Node(ns.bsfs.Tag, n_root)) + self.assertRaises(errors.ConsistencyError, colors.child, ns.bse.foo, range=Node(ns.bsfs.Tag, n_root)) ## main ## -- cgit v1.2.3 From ccaee71e2b6135d3b324fe551c8652940b67aab3 Mon Sep 17 00:00:00 2001 From: Matthias Baumgartner Date: Sun, 15 Jan 2023 20:57:42 +0100 Subject: Feature as Literal instead of Predicate subtype --- bsfs/schema/__init__.py | 7 +- bsfs/schema/schema.py | 4 +- bsfs/schema/serialize.py | 83 +++---- bsfs/schema/types.py | 162 +++++++------- test/graph/test_nodes.py | 4 +- test/graph/test_resolve.py | 2 + test/query/test_validator.py | 2 + test/schema/test_schema.py | 34 +-- test/schema/test_serialize.py | 308 +++++++------------------- test/schema/test_types.py | 52 +---- test/triple_store/sparql/test_parse_filter.py | 5 +- test/triple_store/sparql/test_sparql.py | 8 +- 12 files changed, 257 insertions(+), 414 deletions(-) diff --git a/bsfs/schema/__init__.py b/bsfs/schema/__init__.py index 31d7d61..f53512e 100644 --- a/bsfs/schema/__init__.py +++ b/bsfs/schema/__init__.py @@ -10,8 +10,11 @@ import typing # inner-module imports from .schema import Schema from .serialize import from_string, to_string -from .types import Literal, Node, Predicate, Vertex, \ - ROOT_FEATURE, ROOT_LITERAL, ROOT_NODE, ROOT_NUMBER, ROOT_PREDICATE, ROOT_VERTEX +from .types import Literal, Node, Predicate, Vertex, Feature, \ + ROOT_VERTEX, ROOT_NODE, ROOT_LITERAL, \ + ROOT_NUMBER, ROOT_TIME, \ + ROOT_ARRAY, ROOT_FEATURE, \ + ROOT_PREDICATE # exports __all__: typing.Sequence[str] = ( diff --git a/bsfs/schema/schema.py b/bsfs/schema/schema.py index bc50d4e..8d9a821 100644 --- a/bsfs/schema/schema.py +++ b/bsfs/schema/schema.py @@ -70,7 +70,9 @@ class Schema(): predicates.add(types.ROOT_PREDICATE) # add minimally necessary types to the schema literals.add(types.ROOT_NUMBER) - predicates.add(types.ROOT_FEATURE) + literals.add(types.ROOT_TIME) + literals.add(types.ROOT_ARRAY) + literals.add(types.ROOT_FEATURE) # FIXME: ensure that types derive from the right root? diff --git a/bsfs/schema/serialize.py b/bsfs/schema/serialize.py index a566d65..8b31737 100644 --- a/bsfs/schema/serialize.py +++ b/bsfs/schema/serialize.py @@ -35,13 +35,27 @@ def from_string(schema_str: str) -> schema.Schema: graph.parse(data=schema_str, format='turtle') # helper functions + # FIXME: type annotation + def _fetch_value(subject: URI, predicate: rdflib.URIRef, value_factory) -> typing.Optional[typing.Any]: + """Fetch the object of a given subject and predicate. + Raises a `errors.ConsistencyError` if multiple objects match. + """ + values = list(graph.objects(rdflib.URIRef(subject), predicate)) + if len(values) == 0: + return None + if len(values) == 1: + return value_factory(values[0]) + raise errors.ConsistencyError( + f'{subject} has multiple values for predicate {str(predicate)}, expected zero or one') + def _convert(value): """Convert the subject type from rdflib to a bsfs native type.""" if isinstance(value, rdflib.Literal): return value.value if isinstance(value, rdflib.URIRef): return URI(value) - raise errors.UnreachableError(f'expected Literal or URIRef, found {typename(value)}') + # value is neither a node nor a literal, but e.g. a blank node + raise errors.BackendError(f'expected Literal or URIRef, found {typename(value)}') def _fetch_hierarchically(factory, curr): """Walk through a rdfs:subClassOf hierarchy, creating symbols along the way.""" @@ -71,30 +85,36 @@ def from_string(schema_str: str) -> schema.Schema: raise errors.ConsistencyError('inconsistent nodes') # fetch literals - literals = set(_fetch_hierarchically(types.Literal, types.ROOT_LITERAL)) + def _build_literal(uri, parent, **annotations): + """Literal factory.""" + # break out on root feature type + if uri == types.ROOT_FEATURE.uri: + return types.ROOT_FEATURE + # handle feature types + if isinstance(parent, types.Feature): + # clean annotations + annotations.pop(ns.bsfs.dimension, None) + annotations.pop(ns.bsfs.dtype, None) + annotations.pop(ns.bsfs.distance, None) + # get dimension + dimension = _fetch_value(uri, rdflib.URIRef(ns.bsfs.dimension), int) + # get dtype + dtype = _fetch_value(uri, rdflib.URIRef(ns.bsfs.dtype), URI) + # get distance + distance = _fetch_value(uri, rdflib.URIRef(ns.bsfs.distance), URI) + # return feature + return parent.child(URI(uri), dtype=dtype, dimension=dimension, distance=distance, **annotations) + # handle non-feature types + return parent.child(URI(uri), **annotations) + + literals = set(_fetch_hierarchically(_build_literal, types.ROOT_LITERAL)) literals_lut = {lit.uri: lit for lit in literals} if len(literals_lut) != len(literals): raise errors.ConsistencyError('inconsistent literals') # fetch predicates - # FIXME: type annotation - def _fetch_value(subject: URI, predicate: rdflib.URIRef, value_factory) -> typing.Optional[typing.Any]: - """Fetch the object of a given subject and predicate. - Raises a `errors.ConsistencyError` if multiple objects match. - """ - values = list(graph.objects(rdflib.URIRef(subject), predicate)) - if len(values) == 0: - return None - if len(values) == 1: - return value_factory(values[0]) - raise errors.ConsistencyError( - f'{subject} has multiple values for predicate {str(predicate)}, expected zero or one') - def _build_predicate(uri, parent, **annotations): """Predicate factory.""" - # break out on root feature type - if uri == types.ROOT_FEATURE.uri: - return types.ROOT_FEATURE # clean annotations annotations.pop(ns.rdfs.domain, None) annotations.pop(ns.rdfs.range, None) @@ -113,23 +133,9 @@ def from_string(schema_str: str) -> schema.Schema: rng = nodes_lut.get(rng, literals_lut.get(rng)) # get unique unique = _fetch_value(uri, rdflib.URIRef(ns.bsfs.unique), bool) - # handle feature types - if isinstance(parent, types.Feature): - # clean annotations - annotations.pop(ns.bsfs.dimension, None) - annotations.pop(ns.bsfs.dtype, None) - annotations.pop(ns.bsfs.distance, None) - # get dimension - dimension = _fetch_value(uri, rdflib.URIRef(ns.bsfs.dimension), int) - # get dtype - dtype = _fetch_value(uri, rdflib.URIRef(ns.bsfs.dtype), URI) - # get distance - distance = _fetch_value(uri, rdflib.URIRef(ns.bsfs.distance), URI) - # return feature - return parent.child(URI(uri), domain=dom, range=rng, unique=unique, - dtype=dtype, dimension=dimension, distance=distance, **annotations) - # handle non-feature predicate + # build predicate return parent.child(URI(uri), domain=dom, range=rng, unique=unique, **annotations) + predicates = _fetch_hierarchically(_build_predicate, types.ROOT_PREDICATE) return schema.Schema(predicates, nodes, literals) @@ -214,9 +220,12 @@ def to_string(schema_inst: schema.Schema, fmt: str = 'turtle') -> str: def _parse(node: types._Type) -> T_TRIPLE: """Emit all properties of a type.""" - if isinstance(node, types._Type): # pylint: disable=protected-access - # NOTE: all nodes are _Type - yield from _type(node) + # check arg + if not isinstance(node, types._Type): # pylint: disable=protected-access + raise TypeError(node) + # emit _Type essentials + yield from _type(node) + # emit properties of derived types if isinstance(node, types.Predicate): yield from _predicate(node) if isinstance(node, types.Feature): diff --git a/bsfs/schema/types.py b/bsfs/schema/types.py index 95dc66a..3a2e10c 100644 --- a/bsfs/schema/types.py +++ b/bsfs/schema/types.py @@ -226,10 +226,70 @@ class Node(Vertex): class Literal(Vertex): """Literal type.""" parent: typing.Optional['Literal'] - def __init__(self, uri: URI, parent: typing.Optional['Literal'] ,**kwargs): + def __init__(self, uri: URI, parent: typing.Optional['Literal'], **kwargs): super().__init__(uri, parent, **kwargs) +class Feature(Literal): + """Feature type.""" + + # Number of feature vector dimensions. + dimension: int + + # Feature vector datatype. + dtype: URI + + # Distance measure to compare feature vectors. + distance: URI + + def __init__( + self, + # Type members + uri: URI, + parent: typing.Optional[Literal], + # Feature members + dimension: int, + dtype: URI, + distance: URI, + **kwargs, + ): + super().__init__(uri, parent, **kwargs) + self.dimension = int(dimension) + self.dtype = URI(dtype) + self.distance = URI(distance) + + def __hash__(self) -> int: + return hash((super().__hash__(), self.dimension, self.dtype, self.distance)) + + def __eq__(self, other: typing.Any) -> bool: + return super().__eq__(other) \ + and self.dimension == other.dimension \ + and self.dtype == other.dtype \ + and self.distance == other.distance + + def child( + self, + uri: URI, + dimension: typing.Optional[int] = None, + dtype: typing.Optional[URI] = None, + distance: typing.Optional[URI] = None, + **kwargs, + ): + """Return a child of the current class.""" + if dimension is None: + dimension = self.dimension + if dtype is None: + dtype = self.dtype + if distance is None: + distance = self.distance + return super().child( + uri=uri, + dimension=dimension, + dtype=dtype, + distance=distance, + **kwargs, + ) + class Predicate(_Type): """Predicate base type.""" @@ -304,77 +364,6 @@ class Predicate(_Type): ) -class Feature(Predicate): - """Feature base type.""" - - # Number of feature vector dimensions. - dimension: int - - # Feature vector datatype. - dtype: URI - - # Distance measure to compare feature vectors. - distance: URI - - def __init__( - self, - # Type members - uri: URI, - parent: typing.Optional[Predicate], - # Predicate members - domain: Node, - range: Literal, # pylint: disable=redefined-builtin - unique: bool, - # Feature members - dimension: int, - dtype: URI, - distance: URI, - **kwargs, - ): - super().__init__(uri, parent, domain, range, unique, **kwargs) - self.dimension = int(dimension) - self.dtype = URI(dtype) - self.distance = URI(distance) - - def __hash__(self) -> int: - return hash((super().__hash__(), self.dimension, self.dtype, self.distance)) - - def __eq__(self, other: typing.Any) -> bool: - return super().__eq__(other) \ - and self.dimension == other.dimension \ - and self.dtype == other.dtype \ - and self.distance == other.distance - - def child( - self, - uri: URI, - domain: typing.Optional[Node] = None, - range: typing.Optional[Vertex] = None, # pylint: disable=redefined-builtin - unique: typing.Optional[bool] = None, - dimension: typing.Optional[int] = None, - dtype: typing.Optional[URI] = None, - distance: typing.Optional[URI] = None, - **kwargs, - ): - """Return a child of the current class.""" - if dimension is None: - dimension = self.dimension - if dtype is None: - dtype = self.dtype - if distance is None: - distance = self.distance - return super().child( - uri=uri, - domain=domain, - range=range, - unique=unique, - dimension=dimension, - dtype=dtype, - distance=distance, - **kwargs, - ) - - # essential vertices ROOT_VERTEX = Vertex( uri=ns.bsfs.Vertex, @@ -396,24 +385,31 @@ ROOT_NUMBER = Literal( parent=ROOT_LITERAL, ) -# essential predicates -ROOT_PREDICATE = Predicate( - uri=ns.bsfs.Predicate, - parent=None, - domain=ROOT_NODE, - range=ROOT_VERTEX, - unique=False, +ROOT_TIME = Literal( + uri=ns.bsfs.Time, + parent=ROOT_LITERAL, + ) + +ROOT_ARRAY = Literal( + uri=ns.bsfs.Array, + parent=ROOT_LITERAL, ) ROOT_FEATURE = Feature( uri=ns.bsfs.Feature, - parent=ROOT_PREDICATE, - domain=ROOT_NODE, - range=ROOT_LITERAL, - unique=False, + parent=ROOT_ARRAY, dimension=1, dtype=ns.bsfs.f16, distance=ns.bsfs.euclidean, ) +# essential predicates +ROOT_PREDICATE = Predicate( + uri=ns.bsfs.Predicate, + parent=None, + domain=ROOT_NODE, + range=ROOT_VERTEX, + unique=False, + ) + ## EOF ## diff --git a/test/graph/test_nodes.py b/test/graph/test_nodes.py index 81da60f..2870f35 100644 --- a/test/graph/test_nodes.py +++ b/test/graph/test_nodes.py @@ -75,12 +75,14 @@ class TestNodes(unittest.TestCase): ''') self.schema_triples = { # schema hierarchy - (rdflib.URIRef(ns.bsfs.Feature), rdflib.RDFS.subClassOf, rdflib.URIRef(ns.bsfs.Predicate)), (rdflib.URIRef(ns.bsfs.Entity), rdflib.RDFS.subClassOf, rdflib.URIRef(ns.bsfs.Node)), (rdflib.URIRef(ns.bsfs.Tag), rdflib.RDFS.subClassOf, rdflib.URIRef(ns.bsfs.Node)), (rdflib.URIRef(ns.bsfs.User), rdflib.RDFS.subClassOf, rdflib.URIRef(ns.bsfs.Node)), (rdflib.URIRef(ns.xsd.string), rdflib.RDFS.subClassOf, rdflib.URIRef(ns.bsfs.Literal)), + (rdflib.URIRef(ns.bsfs.Array), rdflib.RDFS.subClassOf, rdflib.URIRef(ns.bsfs.Literal)), + (rdflib.URIRef(ns.bsfs.Feature), rdflib.RDFS.subClassOf, rdflib.URIRef(ns.bsfs.Array)), (rdflib.URIRef(ns.bsfs.Number), rdflib.RDFS.subClassOf, rdflib.URIRef(ns.bsfs.Literal)), + (rdflib.URIRef(ns.bsfs.Time), rdflib.RDFS.subClassOf, rdflib.URIRef(ns.bsfs.Literal)), (rdflib.URIRef(ns.xsd.integer), rdflib.RDFS.subClassOf, rdflib.URIRef(ns.bsfs.Number)), (rdflib.URIRef(ns.bsm.t_created), rdflib.RDFS.subClassOf, rdflib.URIRef(ns.bsfs.Predicate)), (rdflib.URIRef(ns.bse.comment), rdflib.RDFS.subClassOf, rdflib.URIRef(ns.bsfs.Predicate)), diff --git a/test/graph/test_resolve.py b/test/graph/test_resolve.py index 9cde38e..0861a53 100644 --- a/test/graph/test_resolve.py +++ b/test/graph/test_resolve.py @@ -42,6 +42,8 @@ class TestFilter(unittest.TestCase): bsfs:Tag rdfs:subClassOf bsfs:Node . xsd:string rdfs:subClassOf bsfs:Literal . bsfs:Number rdfs:subClassOf bsfs:Literal . + bsfs:Array rdfs:subClassOf bsfs:Literal . + bsfs:Feature rdfs:subClassOf bsfs:Array . xsd:integer rdfs:subClassOf bsfs:Number . bse:comment rdfs:subClassOf bsfs:Predicate ; diff --git a/test/query/test_validator.py b/test/query/test_validator.py index ea56a57..63ead52 100644 --- a/test/query/test_validator.py +++ b/test/query/test_validator.py @@ -34,6 +34,8 @@ class TestFilter(unittest.TestCase): bsfs:Tag rdfs:subClassOf bsfs:Node . xsd:string rdfs:subClassOf bsfs:Literal . bsfs:Number rdfs:subClassOf bsfs:Literal . + bsfs:Array rdfs:subClassOf bsfs:Literal . + bsfs:Feature rdfs:subClassOf bsfs:Array . xsd:integer rdfs:subClassOf bsfs:Number . bse:comment rdfs:subClassOf bsfs:Predicate ; diff --git a/test/schema/test_schema.py b/test/schema/test_schema.py index c19c226..32dbc93 100644 --- a/test/schema/test_schema.py +++ b/test/schema/test_schema.py @@ -66,30 +66,32 @@ class TestSchema(unittest.TestCase): # literals self.l_root = types.ROOT_LITERAL self.l_number = types.ROOT_NUMBER + self.l_array = types.ROOT_ARRAY + self.l_time = types.ROOT_TIME self.l_string = self.l_root.child(ns.xsd.string) self.l_integer = self.l_root.child(ns.xsd.integer) self.l_unused = self.l_root.child(ns.xsd.boolean) - self.literals = [self.l_root, self.l_number, self.l_string, self.l_integer, self.l_unused] + self.f_root = types.ROOT_FEATURE + self.literals = [self.l_root, self.l_array, self.f_root, self.l_number, self.l_time, self.l_string, self.l_integer, self.l_unused] # predicates self.p_root = types.ROOT_PREDICATE - self.f_root = types.ROOT_FEATURE self.p_tag = self.p_root.child(ns.bse.tag, self.n_ent, self.n_tag, False) self.p_group = self.p_tag.child(ns.bse.group, self.n_img, self.n_tag, False) self.p_comment = self.p_root.child(ns.bse.comment, self.n_root, self.l_string, True) - self.predicates = [self.p_root, self.f_root, self.p_tag, self.p_group, self.p_comment] + self.predicates = [self.p_root, self.p_tag, self.p_group, self.p_comment] def test_construction(self): # no args yields a minimal schema schema = Schema() self.assertSetEqual(set(schema.nodes()), {self.n_root}) - self.assertSetEqual(set(schema.literals()), {self.l_root, self.l_number}) - self.assertSetEqual(set(schema.predicates()), {self.p_root, self.f_root}) + self.assertSetEqual(set(schema.literals()), {self.l_root, self.l_number, self.l_array, self.l_time, self.f_root}) + self.assertSetEqual(set(schema.predicates()), {self.p_root}) # nodes and literals are optional schema = Schema(self.predicates) self.assertSetEqual(set(schema.nodes()), {self.n_root, self.n_ent, self.n_img, self.n_tag}) - self.assertSetEqual(set(schema.literals()), {self.l_root, self.l_string, self.l_number}) + self.assertSetEqual(set(schema.literals()), {self.l_root, self.l_string, self.l_number, self.l_time, self.l_array, self.f_root}) self.assertSetEqual(set(schema.predicates()), set(self.predicates)) # predicates, nodes, and literals are respected @@ -110,21 +112,21 @@ class TestSchema(unittest.TestCase): # literals are complete schema = Schema(self.predicates, self.nodes, None) - self.assertSetEqual(set(schema.literals()), {self.l_root, self.l_string, self.l_number}) + self.assertSetEqual(set(schema.literals()), {self.l_root, self.l_string, self.l_number, self.l_array, self.l_time, self.f_root}) schema = Schema(self.predicates, self.nodes, []) - self.assertSetEqual(set(schema.literals()), {self.l_root, self.l_string, self.l_number}) + self.assertSetEqual(set(schema.literals()), {self.l_root, self.l_string, self.l_number, self.l_array, self.l_time, self.f_root}) schema = Schema(self.predicates, self.nodes, [self.l_string]) - self.assertSetEqual(set(schema.literals()), {self.l_root, self.l_string, self.l_number}) + self.assertSetEqual(set(schema.literals()), {self.l_root, self.l_string, self.l_number, self.l_array, self.l_time, self.f_root}) schema = Schema(self.predicates, self.nodes, [self.l_integer]) - self.assertSetEqual(set(schema.literals()), {self.l_root, self.l_string, self.l_integer, self.l_number}) + self.assertSetEqual(set(schema.literals()), {self.l_root, self.l_string, self.l_integer, self.l_number, self.l_array, self.l_time, self.f_root}) schema = Schema(self.predicates, self.nodes, [self.l_integer, self.l_unused]) self.assertSetEqual(set(schema.literals()), set(self.literals)) # predicates are complete schema = Schema([], self.nodes, self.literals) - self.assertSetEqual(set(schema.predicates()), {self.p_root, self.f_root}) + self.assertSetEqual(set(schema.predicates()), {self.p_root}) schema = Schema([self.p_group], self.nodes, self.literals) - self.assertSetEqual(set(schema.predicates()), {self.p_root, self.f_root, self.p_tag, self.p_group}) + self.assertSetEqual(set(schema.predicates()), {self.p_root, self.p_tag, self.p_group}) schema = Schema([self.p_group, self.p_comment], self.nodes, self.literals) self.assertSetEqual(set(schema.predicates()), set(self.predicates)) @@ -176,14 +178,14 @@ class TestSchema(unittest.TestCase): self.assertEqual(str(Schema(self.predicates, self.nodes, self.literals)), 'Schema()') # repr conversion with only default nodes, literals, and predicates n = [ns.bsfs.Node] - l = [ns.bsfs.Literal, ns.bsfs.Number] - p = [ns.bsfs.Feature, ns.bsfs.Predicate] + l = [ns.bsfs.Array, ns.bsfs.Feature, ns.bsfs.Literal, ns.bsfs.Number, ns.bsfs.Time] + p = [ns.bsfs.Predicate] self.assertEqual(repr(Schema()), f'Schema({n}, {l}, {p})') self.assertEqual(repr(Schema([], [], [])), f'Schema({n}, {l}, {p})') # repr conversion n = [ns.bsfs.Entity, ns.bsfs.Image, ns.bsfs.Node, ns.bsfs.Tag, ns.bsfs.Unused] - l = [ns.bsfs.Literal, ns.bsfs.Number, ns.xsd.boolean, ns.xsd.integer, ns.xsd.string] - p = [ns.bse.comment, ns.bse.group, ns.bse.tag, ns.bsfs.Feature, ns.bsfs.Predicate] + l = [ns.bsfs.Array, ns.bsfs.Feature, ns.bsfs.Literal, ns.bsfs.Number, ns.bsfs.Time, ns.xsd.boolean, ns.xsd.integer, ns.xsd.string] + p = [ns.bse.comment, ns.bse.group, ns.bse.tag, ns.bsfs.Predicate] self.assertEqual(repr(Schema(self.predicates, self.nodes, self.literals)), f'Schema({n}, {l}, {p})') def test_equality(self): diff --git a/test/schema/test_serialize.py b/test/schema/test_serialize.py index 205150a..fc6b20a 100644 --- a/test/schema/test_serialize.py +++ b/test/schema/test_serialize.py @@ -581,151 +581,60 @@ class TestFromString(unittest.TestCase): def test_feature(self): - # domain must be defined - self.assertRaises(errors.ConsistencyError, from_string, ''' - prefix rdfs: - prefix xsd: - prefix bsfs: - prefix bse: - - bsfs:Feature rdfs:subClassOf bsfs:Predicate . - bsfs:array rdfs:subClassOf bsfs:Literal . - - bse:colors rdfs:subClassOf bsfs:Feature ; - rdfs:domain bsfs:Entity ; # undefined symbol - rdfs:range bsfs:array ; - bsfs:unique "false"^^xsd:boolean . - ''') - # domain cannot be a literal - self.assertRaises(errors.ConsistencyError, from_string, ''' - prefix rdfs: - prefix xsd: - prefix bsfs: - prefix bse: - - bsfs:Feature rdfs:subClassOf bsfs:Predicate . - bsfs:Entity rdfs:subClassOf bsfs:Literal . - bsfs:array rdfs:subClassOf bsfs:Literal . - - bse:colors rdfs:subClassOf bsfs:Feature ; - rdfs:domain bsfs:Entity ; # literal instead of node - rdfs:range bsfs:array ; - bsfs:unique "false"^^xsd:boolean . - ''') - - # range must be defined - self.assertRaises(errors.ConsistencyError, from_string, ''' - prefix rdfs: - prefix xsd: - prefix bsfs: - prefix bse: - - bsfs:Feature rdfs:subClassOf bsfs:Predicate . - bsfs:Entity rdfs:subClassOf bsfs:Node . - - bse:colors rdfs:subClassOf bsfs:Feature ; - rdfs:domain bsfs:Entity ; - rdfs:range bsfs:array ; # undefined symbol - bsfs:unique "false"^^xsd:boolean . - ''') - # range must be defined - self.assertRaises(errors.ConsistencyError, from_string, ''' + # additional features can be defined + f_colors = types.ROOT_FEATURE.child(ns.bsfs.Colors) + self.assertEqual(Schema(literals={f_colors}), from_string(''' prefix rdfs: prefix xsd: prefix bsfs: prefix bse: - bsfs:Feature rdfs:subClassOf bsfs:Predicate . - bsfs:Entity rdfs:subClassOf bsfs:Node . + bsfs:Array rdfs:subClassOf bsfs:Literal . + bsfs:Feature rdfs:subClassOf bsfs:Array. - bse:colors rdfs:subClassOf bsfs:Feature ; - rdfs:domain bsfs:Entity ; - rdfs:range bsfs:Foo ; # undefined symbol - bsfs:unique "false"^^xsd:boolean . - ''') - # range must be a node or a literal - self.assertRaises(errors.ConsistencyError, from_string, ''' - prefix rdfs: - prefix xsd: - prefix bsfs: - prefix bse: - - bsfs:Feature rdfs:subClassOf bsfs:Predicate . - bsfs:Entity rdfs:subClassOf bsfs:Node . - - bse:colors rdfs:subClassOf bsfs:Feature ; - rdfs:domain bsfs:Entity ; - rdfs:range bsfs:Predicate ; # invalid symbol - bsfs:unique "false"^^xsd:boolean . - ''') + bsfs:Colors rdfs:subClassOf bsfs:Feature . - # additional predicates can be defined - n_ent = types.ROOT_NODE.child(ns.bsfs.Entity) - l_array = types.ROOT_LITERAL.child(ns.bsfs.array) - p_comment = types.ROOT_FEATURE.child(ns.bse.colors, domain=n_ent, range=l_array, unique=False) - self.assertEqual(Schema({p_comment}), from_string(''' - prefix rdfs: - prefix xsd: - prefix bsfs: - prefix bse: - - bsfs:Feature rdfs:subClassOf bsfs:Predicate . - bsfs:Entity rdfs:subClassOf bsfs:Node . - bsfs:array rdfs:subClassOf bsfs:Literal . - - bse:colors rdfs:subClassOf bsfs:Feature ; - rdfs:domain bsfs:Entity ; - rdfs:range bsfs:array ; - bsfs:unique "false"^^xsd:boolean . ''')) # features inherit properties from parents - n_ent = types.ROOT_NODE.child(ns.bsfs.Entity) - l_array = types.ROOT_LITERAL.child(ns.bsfs.array) - l_string = types.ROOT_LITERAL.child(ns.xsd.string) - p_annotation = types.ROOT_FEATURE.child(ns.bsfs.Annotation, domain=n_ent, range=l_array, - dimension=1234, dtype=ns.xsd.string) - p_comment = p_annotation.child(ns.bse.colors, unique=True) - self.assertEqual(Schema({p_comment}), from_string(''' + f_colors = types.ROOT_FEATURE.child(ns.bsfs.Colors, dimension=1234, dtype=ns.bsfs.i32) + f_main_colors = f_colors.child(ns.bsfs.MainColor, distance=ns.bsfs.cosine, dtype=ns.bsfs.f16) + self.assertEqual(Schema(literals={f_colors, f_main_colors}), from_string(''' prefix rdfs: prefix xsd: prefix bsfs: prefix bse: - bsfs:Feature rdfs:subClassOf bsfs:Predicate . - bsfs:Entity rdfs:subClassOf bsfs:Node . - bsfs:array rdfs:subClassOf bsfs:Literal . + bsfs:Array rdfs:subClassOf bsfs:Literal . + bsfs:Feature rdfs:subClassOf bsfs:Array. - bsfs:Annotation rdfs:subClassOf bsfs:Feature ; # inherits defaults from bsfs:Feature - rdfs:domain bsfs:Entity ; - rdfs:range bsfs:array ; - bsfs:dimension "1234"^^xsd:integer ; - bsfs:dtype xsd:string . + bsfs:Colors rdfs:subClassOf bsfs:Feature ; # inherits distance from bsfs:Feature + bsfs:dimension "1234"^^xsd:integer ; # overwrites bsfs:Feature + bsfs:dtype bsfs:i32 . # overwrites bsfs:Feature + + bsfs:MainColor rdfs:subClassOf bsfs:Colors ; # inherits dimension from bsfs:Colors + bsfs:distance bsfs:cosine ; # overwrites bsfs:Feature + bsfs:dtype bsfs:f16 . # overwrites bsfs:Colors - bse:colors rdfs:subClassOf bsfs:Annotation ; # inherits domain/range/etc. from bsfs:Annotation - bsfs:unique "true"^^xsd:boolean . # overwrites bsfs:Predicate ''')) # feature definition can be split across multiple statements. # statements can be repeated - n_ent = types.ROOT_NODE.child(ns.bsfs.Entity) - p_foo = types.ROOT_FEATURE.child(ns.bse.foo, domain=n_ent, unique=True, - dimension=1234, dtype=ns.bsfs.f32) - self.assertEqual(Schema({p_foo}), from_string(''' + f_colors = types.ROOT_FEATURE.child(ns.bsfs.Colors, dimension=1234, dtype=ns.bsfs.f32) + self.assertEqual(Schema(literals={f_colors}), from_string(''' prefix rdfs: prefix xsd: prefix bsfs: prefix bse: - bsfs:Feature rdfs:subClassOf bsfs:Predicate . - bsfs:Entity rdfs:subClassOf bsfs:Node . + bsfs:Array rdfs:subClassOf bsfs:Literal . + bsfs:Feature rdfs:subClassOf bsfs:Array. - bse:foo rdfs:subClassOf bsfs:Feature ; - bsfs:unique "true"^^xsd:boolean ; + bsfs:Colors rdfs:subClassOf bsfs:Feature ; bsfs:dimension "1234"^^xsd:integer . - bse:foo rdfs:subClassOf bsfs:Feature ; - rdfs:domain bsfs:Entity ; + bsfs:Colors rdfs:subClassOf bsfs:Feature ; + bsfs:dimension "1234"^^xsd:integer ; # non-conflicting repetition bsfs:dtype bsfs:f32 . ''')) @@ -736,75 +645,14 @@ class TestFromString(unittest.TestCase): prefix bsfs: prefix bse: - bsfs:Feature rdfs:subClassOf bsfs:Predicate . - bsfs:Annotation rdfs:subClassOf bsfs:Feature . - bsfs:Entity rdfs:subClassOf bsfs:Node . + bsfs:Array rdfs:subClassOf bsfs:Literal . + bsfs:Feature rdfs:subClassOf bsfs:Array. + bsfs:ColorSpace rdfs:subClassOf bsfs:Feature . - bse:foo rdfs:subClassOf bsfs:Feature ; - rdfs:domain bsfs:Node ; - bsfs:unique "false"^^xsd:boolean . - - bse:foo rdfs:subClassOf bsfs:Annotation ; - rdfs:domain bsfs:Node ; - bsfs:unique "false"^^xsd:boolean . + bsfs:Colors rdfs:subClassOf bsfs:Feature . + bsfs:Colors rdfs:subClassOf bsfs:ColorSpace . ''') - # cannot assign multiple conflicting domains to the same feature - self.assertRaises(errors.ConsistencyError, from_string, ''' - prefix rdfs: - prefix xsd: - prefix bsfs: - prefix bse: - - bsfs:Feature rdfs:subClassOf bsfs:Predicate . - bsfs:Entity rdfs:subClassOf bsfs:Node . - - bse:foo rdfs:subClassOf bsfs:Feature ; - rdfs:domain bsfs:Node ; - bsfs:unique "false"^^xsd:boolean . - - bse:foo rdfs:subClassOf bsfs:Feature ; - rdfs:domain bsfs:Entity . # conflicting domain - ''') - # cannot assign multiple conflicting ranges to the same feature - self.assertRaises(errors.ConsistencyError, from_string, ''' - prefix rdfs: - prefix xsd: - prefix bsfs: - prefix bse: - - bsfs:Feature rdfs:subClassOf bsfs:Predicate . - bsfs:Entity rdfs:subClassOf bsfs:Node . - bsfs:array rdfs:subClassOf bsfs:Literal . - bsfs:large_array rdfs:subClassOf bsfs:array . - bsfs:small_array rdfs:subClassOf bsfs:array . - - bse:foo rdfs:subClassOf bsfs:Feature ; - rdfs:domain bsfs:Node ; - rdfs:range bsfs:large_array ; - bsfs:unique "false"^^xsd:boolean . - - bse:foo rdfs:subClassOf bsfs:Feature ; - rdfs:range bsfs:small_array . # conflicting range - ''') - # cannot assign multiple conflicting uniques to the same feature - self.assertRaises(errors.ConsistencyError, from_string, ''' - prefix rdfs: - prefix xsd: - prefix bsfs: - prefix bse: - - bsfs:Feature rdfs:subClassOf bsfs:Predicate . - bsfs:Entity rdfs:subClassOf bsfs:Node . - - bse:foo rdfs:subClassOf bsfs:Feature ; - rdfs:domain bsfs:Node ; - rdfs:range bsfs:Node ; - bsfs:unique "false"^^xsd:boolean . - - bse:foo rdfs:subClassOf bsfs:Feature ; - bsfs:unique "true"^^xsd:boolean . # conflicting unique - ''') # cannot assign multiple conflicting dimensions to the same feature self.assertRaises(errors.ConsistencyError, from_string, ''' prefix rdfs: @@ -812,15 +660,15 @@ class TestFromString(unittest.TestCase): prefix bsfs: prefix bse: - bsfs:Feature rdfs:subClassOf bsfs:Predicate . - bsfs:Entity rdfs:subClassOf bsfs:Node . + bsfs:Array rdfs:subClassOf bsfs:Literal . + bsfs:Feature rdfs:subClassOf bsfs:Array. - bse:foo rdfs:subClassOf bsfs:Feature ; - rdfs:domain bsfs:Node ; + bsfs:Colors rdfs:subClassOf bsfs:Feature ; bsfs:dimension "1234"^^xsd:integer . - bse:foo rdfs:subClassOf bsfs:Feature ; + bsfs:Colors rdfs:subClassOf bsfs:Feature ; bsfs:dimension "4321"^^xsd:integer . # conflicting dimension + ''') # cannot assign multiple conflicting dtypes to the same feature self.assertRaises(errors.ConsistencyError, from_string, ''' @@ -829,14 +677,13 @@ class TestFromString(unittest.TestCase): prefix bsfs: prefix bse: - bsfs:Feature rdfs:subClassOf bsfs:Predicate . - bsfs:Entity rdfs:subClassOf bsfs:Node . + bsfs:Array rdfs:subClassOf bsfs:Literal . + bsfs:Feature rdfs:subClassOf bsfs:Array. - bse:foo rdfs:subClassOf bsfs:Feature ; - rdfs:domain bsfs:Node ; + bsfs:Colors rdfs:subClassOf bsfs:Feature ; bsfs:dtype bsfs:f32 . - bse:foo rdfs:subClassOf bsfs:Feature ; + bsfs:Colors rdfs:subClassOf bsfs:Feature ; bsfs:dtype bsfs:f16 . # conflicting dtype ''') # cannot assign multiple conflicting distance metrics to the same feature @@ -846,14 +693,13 @@ class TestFromString(unittest.TestCase): prefix bsfs: prefix bse: - bsfs:Feature rdfs:subClassOf bsfs:Predicate . - bsfs:Entity rdfs:subClassOf bsfs:Node . + bsfs:Array rdfs:subClassOf bsfs:Literal . + bsfs:Feature rdfs:subClassOf bsfs:Array. - bse:foo rdfs:subClassOf bsfs:Feature ; - rdfs:domain bsfs:Node ; + bsfs:Colors rdfs:subClassOf bsfs:Feature ; bsfs:distance bsfs:euclidean . - bse:foo rdfs:subClassOf bsfs:Feature ; + bsfs:Colors rdfs:subClassOf bsfs:Feature ; bsfs:distance bsfs:cosine . # conflicting distance ''') @@ -864,24 +710,28 @@ class TestFromString(unittest.TestCase): prefix bsfs: prefix bse: - bsfs:Feature rdfs:subClassOf bsfs:Predicate . - bse:colors rdfs:subClassOf bsfs:Feature ; + bsfs:Array rdfs:subClassOf bsfs:Literal . + bsfs:Feature rdfs:subClassOf bsfs:Array. + + bsfs:Colors rdfs:subClassOf bsfs:Feature ; bsfs:dimension "1234"^^xsd:integer . - ''').predicate(ns.bse.colors).annotations, {}) + ''').literal(ns.bsfs.Colors).annotations, {}) self.assertDictEqual(from_string(''' prefix rdfs: prefix xsd: prefix bsfs: prefix bse: - bsfs:Feature rdfs:subClassOf bsfs:Predicate . - bse:colors rdfs:subClassOf bsfs:Feature ; + bsfs:Array rdfs:subClassOf bsfs:Literal . + bsfs:Feature rdfs:subClassOf bsfs:Array. + + bsfs:Colors rdfs:subClassOf bsfs:Feature ; bsfs:dimension "1234"^^xsd:integer ; rdfs:label "hello world"^^xsd:string ; bsfs:foo "1234"^^xsd:integer . - ''').predicate(ns.bse.colors).annotations, { + ''').literal(ns.bsfs.Colors).annotations, { ns.rdfs.label: 'hello world', ns.bsfs.foo: 1234, }) @@ -904,14 +754,14 @@ class TestFromString(unittest.TestCase): p_comment = p_annotation.child(ns.bse.comment, range=l_string) # features f_colors = types.ROOT_FEATURE.child(URI('http://bsfs.ai/schema/Feature/colors_spatial'), - domain=n_ent, range=l_array, unique=True, dtype=ns.bsfs.f16, distance=ns.bsfs.euclidean) + dtype=ns.bsfs.f16, distance=ns.bsfs.euclidean) f_colors1234 = f_colors.child(URI('http://bsfs.ai/schema/Feature/colors_spatial#1234'), dimension=1024) f_colors4321 = f_colors.child(URI('http://bsfs.ai/schema/Feature/colors_spatial#4321'), dimension=2048) # schema ref = Schema( - {p_annotation, p_tag, p_group, p_comment, f_colors, f_colors1234, f_colors4321}, + {p_annotation, p_tag, p_group, p_comment}, {n_ent, n_tag, n_image}, - {l_string, l_integer, l_boolean}) + {l_string, l_integer, l_boolean, f_colors, f_colors1234, f_colors4321}) # load from string gen = from_string(''' # generic prefixes @@ -932,21 +782,19 @@ class TestFromString(unittest.TestCase): # literals xsd:string rdfs:subClassOf bsfs:Literal ; rdfs:label "A sequence of characters"^^xsd:string . - bsfs:array rdfs:subClassOf bsfs:Literal . + bsfs:Array rdfs:subClassOf bsfs:Literal . + bsfs:Feature rdfs:subClassOf bsfs:Array. bsfs:Number rdfs:subClassOf bsfs:Literal . xsd:integer rdfs:subClassOf bsfs:Number . xsd:boolean rdfs:subClassOf bsfs:Literal . + # abstract predicates bsfs:Annotation rdfs:subClassOf bsfs:Predicate ; rdfs:label "node annotation"^^xsd:string . - bsfs:Feature rdfs:subClassOf bsfs:Predicate . # feature instances rdfs:subClassOf bsfs:Feature ; - rdfs:domain bsfs:Entity ; - rdfs:range bsfs:array ; - bsfs:unique "true"^^xsd:boolean ; bsfs:dtype bsfs:f16 ; bsfs:distance bsfs:euclidean ; # annotations @@ -986,15 +834,22 @@ class TestFromString(unittest.TestCase): self.assertDictEqual(gen.node(ns.bsfs.Tag).annotations, {ns.rdfs.label: 'Tag'}) self.assertDictEqual(gen.literal(ns.xsd.string).annotations, {ns.rdfs.label: 'A sequence of characters'}) self.assertDictEqual(gen.predicate(ns.bsfs.Annotation).annotations, {ns.rdfs.label: 'node annotation'}) - self.assertDictEqual(gen.predicate(URI('http://bsfs.ai/schema/Feature/colors_spatial')).annotations, { + self.assertDictEqual(gen.literal(URI('http://bsfs.ai/schema/Feature/colors_spatial')).annotations, { ns.rdfs.label: 'ColorsSpatial instances. Dimension depends on instance.', ns.bsfs.first_arg: 1234, ns.bsfs.second_arg: 'hello world', }) - self.assertDictEqual(gen.predicate(URI('http://bsfs.ai/schema/Feature/colors_spatial#1234')).annotations, { + self.assertDictEqual(gen.literal(URI('http://bsfs.ai/schema/Feature/colors_spatial#1234')).annotations, { ns.rdfs.label: 'Main colors spatial instance'}) self.assertDictEqual(gen.predicate(ns.bse.tag).annotations, {ns.rdfs.label: 'connect entity to a tag'}) + # blank nodes result in an error + self.assertRaises(errors.BackendError, from_string, ''' + prefix rdfs: + prefix bsfs: + bsfs:Entity rdfs:subClassOf bsfs:Node ; + bsfs:foo _:bar . + ''') class TestToString(unittest.TestCase): @@ -1002,6 +857,11 @@ class TestToString(unittest.TestCase): def test_empty(self): self.assertEqual(Schema(), from_string(to_string(Schema()))) + def test_parse(self): + schema = Schema() + schema._nodes[ns.bsfs.Invalid] = 123 # NOTE: Access protected to force an invalid schema + self.assertRaises(TypeError, to_string, schema) + def test_literal(self): # root literals l_str = types.ROOT_LITERAL.child(ns.xsd.string) @@ -1120,37 +980,29 @@ class TestToString(unittest.TestCase): def test_feature(self): - # auxiliary types - n_ent = types.ROOT_NODE.child(ns.bsfs.Entity) - l_array = types.ROOT_LITERAL.child(ns.bsfs.array) # root features f_colors = types.ROOT_FEATURE.child(URI('http://bsfs.ai/schema/Feature/colors'), - range=l_array, unique=True, distance=ns.bsfs.cosine) + distance=ns.bsfs.cosine) # derived features f_colors1234 = f_colors.child(URI('http://bsfs.ai/schema/Feature/colors#1234'), - dimension=1024, domain=n_ent) # inherits range/dtype/distance + dimension=1024) # inherits dtype, distance f_colors4321 = f_colors.child(URI('http://bsfs.ai/schema/Feature/colors#4321'), - dimension=2048, distance=ns.bsfs.euclidean) # inherits domain/range/dtype + dimension=2048, distance=ns.bsfs.euclidean) # inherits dtype # create schema - schema = Schema({f_colors, f_colors1234, f_colors4321}) + schema = Schema(literals={f_colors, f_colors1234, f_colors4321}) schema_str = to_string(schema) # all symbols are serialized - self.assertIn('bsfs:Entity', schema_str) - self.assertIn('bsfs:array', schema_str) + self.assertIn('bsfs:Array', schema_str) self.assertIn('[^\.]*rdfs:domain[^\.]', schema_str)) self.assertIsNotNone(re.search(r'[^\.]*bsfs:dimension[^\.]', schema_str)) - self.assertIsNone(re.search(r'[^\.]*rdfs:range[^\.]', schema_str)) self.assertIsNone(re.search(r'[^\.]*bsfs:dtype[^\.]', schema_str)) self.assertIsNone(re.search(r'[^\.]*bsfs:distance[^\.]', schema_str)) self.assertIsNotNone(re.search(r'[^\.]*bsfs:dimension[^\.]', schema_str)) self.assertIsNotNone(re.search(r'[^\.]*bsfs:distance[^\.]', schema_str)) - self.assertIsNone(re.search(r'[^\.]*rdfs:domain[^\.]', schema_str)) - self.assertIsNone(re.search(r'[^\.]*rdfs:range[^\.]', schema_str)) self.assertIsNone(re.search(r'[^\.]*bsfs:dtype[^\.]', schema_str)) # unserialize yields the original schema self.assertEqual(schema, from_string(schema_str)) @@ -1163,11 +1015,11 @@ class TestToString(unittest.TestCase): ns.bsfs.bar: False, } f_colors = types.ROOT_FEATURE.child(URI('http://bsfs.ai/schema/Feature/colors'), - domain=n_ent, range=l_array, unique=True, dtype=ns.bsfs.f16, distance=ns.bsfs.euclidean, + dtype=ns.bsfs.f16, distance=ns.bsfs.euclidean, **annotations) self.assertDictEqual( annotations, - from_string(to_string(Schema({f_colors}))).predicate(URI('http://bsfs.ai/schema/Feature/colors')).annotations) + from_string(to_string(Schema(literals={f_colors}))).literal(URI('http://bsfs.ai/schema/Feature/colors')).annotations) ## main ## diff --git a/test/schema/test_types.py b/test/schema/test_types.py index 1eeafa1..c5895d2 100644 --- a/test/schema/test_types.py +++ b/test/schema/test_types.py @@ -274,21 +274,16 @@ class TestFeature(unittest.TestCase): n_root = Node(ns.bsfs.Node, None) l_root = Literal(ns.bsfs.Literal, None) # dimension, dtype, and distance are respected - feat = Feature(ns.bsfs.Feature, None, n_root, l_root, False, - 1234, ns.bsfs.float, ns.bsfs.euclidean) + feat = Feature(ns.bsfs.Feature, None, 1234, ns.bsfs.float, ns.bsfs.euclidean) self.assertEqual(1234, feat.dimension) self.assertEqual(ns.bsfs.float, feat.dtype) self.assertEqual(ns.bsfs.euclidean, feat.distance) def test_equality(self): n_ent = Node(ns.bsfs.Entity, Node(ns.bsfs.Node, None)) - l_array = Literal(ns.bsfs.array, Literal(ns.bsfs.Literal, None)) colors = Feature( uri=ns.bse.colors, parent=ROOT_FEATURE, - domain=n_ent, - range=l_array, - unique=False, dimension=1234, dtype=ns.bsfs.float, distance=ns.bsfs.euclidean, @@ -297,29 +292,25 @@ class TestFeature(unittest.TestCase): self.assertEqual(colors, colors) self.assertEqual(hash(colors), hash(colors)) # instance is equal to a clone - self.assertEqual(colors, Feature(ns.bse.colors, ROOT_FEATURE, n_ent, l_array, False, 1234, ns.bsfs.float, ns.bsfs.euclidean)) - self.assertEqual(hash(colors), hash(Feature(ns.bse.colors, ROOT_FEATURE, n_ent, l_array, False, 1234, ns.bsfs.float, ns.bsfs.euclidean))) + self.assertEqual(colors, Feature(ns.bse.colors, ROOT_FEATURE, 1234, ns.bsfs.float, ns.bsfs.euclidean)) + self.assertEqual(hash(colors), hash(Feature(ns.bse.colors, ROOT_FEATURE, 1234, ns.bsfs.float, ns.bsfs.euclidean))) # equality respects dimension - self.assertNotEqual(colors, Feature(ns.bse.colors, ROOT_FEATURE, n_ent, l_array, False, 4321, ns.bsfs.float, ns.bsfs.euclidean)) - self.assertNotEqual(hash(colors), hash(Feature(ns.bse.colors, ROOT_FEATURE, n_ent, l_array, False, 4321, ns.bsfs.float, ns.bsfs.euclidean))) + self.assertNotEqual(colors, Feature(ns.bse.colors, ROOT_FEATURE, 4321, ns.bsfs.float, ns.bsfs.euclidean)) + self.assertNotEqual(hash(colors), hash(Feature(ns.bse.colors, ROOT_FEATURE, 4321, ns.bsfs.float, ns.bsfs.euclidean))) # equality respects dtype - self.assertNotEqual(colors, Feature(ns.bse.colors, ROOT_FEATURE, n_ent, l_array, False, 1234, ns.bsfs.integer, ns.bsfs.euclidean)) - self.assertNotEqual(hash(colors), hash(Feature(ns.bse.colors, ROOT_FEATURE, n_ent, l_array, False, 1234, ns.bsfs.integer, ns.bsfs.euclidean))) + self.assertNotEqual(colors, Feature(ns.bse.colors, ROOT_FEATURE, 1234, ns.bsfs.integer, ns.bsfs.euclidean)) + self.assertNotEqual(hash(colors), hash(Feature(ns.bse.colors, ROOT_FEATURE, 1234, ns.bsfs.integer, ns.bsfs.euclidean))) # equality respects distance - self.assertNotEqual(colors, Feature(ns.bse.colors, ROOT_FEATURE, n_ent, l_array, False, 1234, ns.bsfs.float, ns.bsfs.cosine)) - self.assertNotEqual(hash(colors), hash(Feature(ns.bse.colors, ROOT_FEATURE, n_ent, l_array, False, 1234, ns.bsfs.float, ns.bsfs.cosine))) + self.assertNotEqual(colors, Feature(ns.bse.colors, ROOT_FEATURE, 1234, ns.bsfs.float, ns.bsfs.cosine)) + self.assertNotEqual(hash(colors), hash(Feature(ns.bse.colors, ROOT_FEATURE, 1234, ns.bsfs.float, ns.bsfs.cosine))) def test_child(self): n_root = Node(ns.bsfs.Node, None) n_ent = Node(ns.bsfs.Entity, n_root) l_root = Literal(ns.bsfs.Literal, None) - l_array = Literal(ns.bsfs.array, l_root) colors = Feature( uri=ns.bse.colors, parent=ROOT_FEATURE, - domain=n_ent, - range=l_array, - unique=False, dimension=1234, dtype=ns.bsfs.float, distance=ns.bsfs.euclidean, @@ -329,16 +320,6 @@ class TestFeature(unittest.TestCase): self.assertIsInstance(colors.child(ns.bse.foo), Feature) # uri is respected self.assertEqual(ns.bse.foo, colors.child(ns.bse.foo).uri) - # domain is respected - dom = Node(ns.bsfs.Image, n_ent) - self.assertEqual(dom, colors.child(ns.bse.foo, domain=dom).domain) - # range is respected - rng = Literal(ns.bse.foo, l_array) - self.assertEqual(rng, colors.child(ns.bse.foo, range=rng).range) - # cannot set range to None - self.assertEqual(l_array, colors.child(ns.bse.foo, range=None).range) - # unique is respected - self.assertTrue(colors.child(ns.bse.foo, unique=True).unique) # dimension is respected self.assertEqual(4321, colors.child(ns.bse.foo, dimension=4321).dimension) # dtype is respected @@ -351,13 +332,6 @@ class TestFeature(unittest.TestCase): 'bar': 123, }) - # domain is inherited from parent - self.assertEqual(n_root, ROOT_FEATURE.child(ns.bse.foo).domain) - self.assertEqual(n_ent, colors.child(ns.bse.foo).domain) - # range is inherited from parent - self.assertEqual(l_array, colors.child(ns.bse.foo).range) - # uniqueness is inherited from parent - self.assertFalse(colors.child(ns.bse.foo).unique) # dimension is inherited from parent self.assertEqual(1234, colors.child(ns.bse.foo).dimension) # dtype is inherited from parent @@ -365,14 +339,6 @@ class TestFeature(unittest.TestCase): # distance is inherited from parent self.assertEqual(ns.bsfs.euclidean, colors.child(ns.bse.foo).distance) - # domain must be subtype of parent's domain - self.assertRaises(errors.ConsistencyError, colors.child, ns.bse.foo, domain=n_root) - self.assertRaises(errors.ConsistencyError, colors.child, ns.bse.foo, domain=Node(ns.bsfs.Image, n_root)) - # range must be subtype of parent's range - self.assertRaises(errors.ConsistencyError, colors.child, ns.bse.foo, range=Literal(ns.bsfs.Literal, None)) - self.assertRaises(errors.ConsistencyError, colors.child, ns.bse.foo, range=Literal(ns.bsfs.foo, Literal(ns.bsfs.Literal, None))) - self.assertRaises(errors.ConsistencyError, colors.child, ns.bse.foo, range=Node(ns.bsfs.Tag, n_root)) - ## main ## diff --git a/test/triple_store/sparql/test_parse_filter.py b/test/triple_store/sparql/test_parse_filter.py index f6842c5..5c16f11 100644 --- a/test/triple_store/sparql/test_parse_filter.py +++ b/test/triple_store/sparql/test_parse_filter.py @@ -30,12 +30,15 @@ class TestParseFilter(unittest.TestCase): prefix bsfs: prefix bse: + bsfs:Array rdfs:subClassOf bsfs:Literal . + bsfs:Feature rdfs:subClassOf bsfs:Array . + bsfs:Number rdfs:subClassOf bsfs:Literal . + bsfs:Entity rdfs:subClassOf bsfs:Node . bsfs:Image rdfs:subClassOf bsfs:Entity . bsfs:Tag rdfs:subClassOf bsfs:Node . xsd:string rdfs:subClassOf bsfs:Literal . - bsfs:Number rdfs:subClassOf bsfs:Literal . xsd:integer rdfs:subClassOf bsfs:Number . bsfs:URI rdfs:subClassOf bsfs:Literal . diff --git a/test/triple_store/sparql/test_sparql.py b/test/triple_store/sparql/test_sparql.py index aa5dfc7..1f56a7e 100644 --- a/test/triple_store/sparql/test_sparql.py +++ b/test/triple_store/sparql/test_sparql.py @@ -63,12 +63,14 @@ class TestSparqlStore(unittest.TestCase): ''') self.schema_triples = { # schema hierarchy - (rdflib.URIRef(ns.bsfs.Feature), rdflib.RDFS.subClassOf, rdflib.URIRef(ns.bsfs.Predicate)), (rdflib.URIRef(ns.bsfs.Entity), rdflib.RDFS.subClassOf, rdflib.URIRef(ns.bsfs.Node)), (rdflib.URIRef(ns.bsfs.Tag), rdflib.RDFS.subClassOf, rdflib.URIRef(ns.bsfs.Node)), (rdflib.URIRef(ns.bsfs.User), rdflib.RDFS.subClassOf, rdflib.URIRef(ns.bsfs.Node)), (rdflib.URIRef(ns.xsd.string), rdflib.RDFS.subClassOf, rdflib.URIRef(ns.bsfs.Literal)), + (rdflib.URIRef(ns.bsfs.Array), rdflib.RDFS.subClassOf, rdflib.URIRef(ns.bsfs.Literal)), + (rdflib.URIRef(ns.bsfs.Feature), rdflib.RDFS.subClassOf, rdflib.URIRef(ns.bsfs.Array)), (rdflib.URIRef(ns.bsfs.Number), rdflib.RDFS.subClassOf, rdflib.URIRef(ns.bsfs.Literal)), + (rdflib.URIRef(ns.bsfs.Time), rdflib.RDFS.subClassOf, rdflib.URIRef(ns.bsfs.Literal)), (rdflib.URIRef(ns.xsd.integer), rdflib.RDFS.subClassOf, rdflib.URIRef(ns.bsfs.Number)), (rdflib.URIRef(ns.bse.comment), rdflib.RDFS.subClassOf, rdflib.URIRef(ns.bsfs.Predicate)), (rdflib.URIRef(ns.bse.filesize), rdflib.RDFS.subClassOf, rdflib.URIRef(ns.bsfs.Predicate)), @@ -351,12 +353,14 @@ class TestSparqlStore(unittest.TestCase): # instances of old classes were removed self.assertSetEqual(set(store._graph), { # schema hierarchy - (rdflib.URIRef(ns.bsfs.Feature), rdflib.RDFS.subClassOf, rdflib.URIRef(ns.bsfs.Predicate)), (rdflib.URIRef(ns.bsfs.Entity), rdflib.RDFS.subClassOf, rdflib.URIRef(ns.bsfs.Node)), (rdflib.URIRef(ns.bsfs.Tag), rdflib.RDFS.subClassOf, rdflib.URIRef(ns.bsfs.Node)), (rdflib.URIRef(ns.bsfs.User), rdflib.RDFS.subClassOf, rdflib.URIRef(ns.bsfs.Node)), (rdflib.URIRef(ns.xsd.boolean), rdflib.RDFS.subClassOf, rdflib.URIRef(ns.bsfs.Literal)), + (rdflib.URIRef(ns.bsfs.Array), rdflib.RDFS.subClassOf, rdflib.URIRef(ns.bsfs.Literal)), + (rdflib.URIRef(ns.bsfs.Feature), rdflib.RDFS.subClassOf, rdflib.URIRef(ns.bsfs.Array)), (rdflib.URIRef(ns.bsfs.Number), rdflib.RDFS.subClassOf, rdflib.URIRef(ns.bsfs.Literal)), + (rdflib.URIRef(ns.bsfs.Time), rdflib.RDFS.subClassOf, rdflib.URIRef(ns.bsfs.Literal)), (rdflib.URIRef(ns.xsd.integer), rdflib.RDFS.subClassOf, rdflib.URIRef(ns.bsfs.Number)), (rdflib.URIRef(ns.bse.shared), rdflib.RDFS.subClassOf, rdflib.URIRef(ns.bsfs.Predicate)), (rdflib.URIRef(ns.bse.tag), rdflib.RDFS.subClassOf, rdflib.URIRef(ns.bsfs.Predicate)), -- cgit v1.2.3 From 80a97bfa9f22d0d6dd25928fe1754a3a0d1de78a Mon Sep 17 00:00:00 2001 From: Matthias Baumgartner Date: Sun, 15 Jan 2023 21:00:12 +0100 Subject: Distance filter ast node --- bsfs/graph/resolve.py | 5 +++ bsfs/query/ast/filter_.py | 59 ++++++++++++++++++++------ bsfs/query/validator.py | 16 +++++++ bsfs/triple_store/sparql/distance.py | 56 ++++++++++++++++++++++++ bsfs/triple_store/sparql/parse_filter.py | 41 +++++++++++++++++- bsfs/triple_store/sparql/sparql.py | 13 +++++- test/graph/test_resolve.py | 13 ++++++ test/query/ast_test/test_filter_.py | 35 ++++++++++++++- test/query/test_validator.py | 27 ++++++++++++ test/triple_store/sparql/test_distance.py | 61 +++++++++++++++++++++++++++ test/triple_store/sparql/test_parse_filter.py | 50 ++++++++++++++++++++-- test/triple_store/sparql/test_sparql.py | 17 ++++++++ 12 files changed, 375 insertions(+), 18 deletions(-) create mode 100644 bsfs/triple_store/sparql/distance.py create mode 100644 test/triple_store/sparql/test_distance.py diff --git a/bsfs/graph/resolve.py b/bsfs/graph/resolve.py index b671204..00b778b 100644 --- a/bsfs/graph/resolve.py +++ b/bsfs/graph/resolve.py @@ -63,6 +63,8 @@ class Filter(): return self._and(type_, node) if isinstance(node, ast.filter.Or): return self._or(type_, node) + if isinstance(node, ast.filter.Distance): + return self._distance(type_, node) if isinstance(node, (ast.filter.Equals, ast.filter.Substring, \ ast.filter.StartsWith, ast.filter.EndsWith)): return self._value(type_, node) @@ -125,6 +127,9 @@ class Filter(): def _has(self, type_: bsc.Vertex, node: ast.filter.Has) -> ast.filter.Has: # pylint: disable=unused-argument return node + def _distance(self, type_: bsc.Vertex, node: ast.filter.Distance): # pylint: disable=unused-argument + return node + def _value(self, type_: bsc.Vertex, node: ast.filter._Value) -> ast.filter._Value: # pylint: disable=unused-argument return node diff --git a/bsfs/query/ast/filter_.py b/bsfs/query/ast/filter_.py index b129ded..2f0270c 100644 --- a/bsfs/query/ast/filter_.py +++ b/bsfs/query/ast/filter_.py @@ -252,8 +252,7 @@ class Has(FilterExpression): class _Value(FilterExpression): - """ - """ + """Matches some value.""" # target value. value: typing.Any @@ -277,13 +276,13 @@ class Is(_Value): class Equals(_Value): """Value matches exactly. - NOTE: Value format must correspond to literal type; can be a string, a number, or a Node + NOTE: Value must correspond to literal type. """ class Substring(_Value): """Value matches a substring - NOTE: value format must be a string + NOTE: value must be a string. """ @@ -295,9 +294,49 @@ class EndsWith(_Value): """Value ends with a given string.""" +class Distance(FilterExpression): + """Distance to a reference is (strictly) below a threshold. Assumes a Feature literal.""" + + # FIXME: + # (a) pass a node/predicate as anchor instead of a value. + # Then we don't need to materialize the reference. + # (b) pass a FilterExpression (_Bounded) instead of a threshold. + # Then, we could also query values greater than a threshold. + + # reference value. + reference: typing.Any + + # distance threshold. + threshold: float + + # closed (True) or open (False) bound. + strict: bool + + def __init__( + self, + reference: typing.Any, + threshold: float, + strict: bool = False, + ): + self.reference = reference + self.threshold = float(threshold) + self.strict = bool(strict) + + def __repr__(self) -> str: + return f'{typename(self)}({self.reference}, {self.threshold}, {self.strict})' + + def __hash__(self) -> int: + return hash((super().__hash__(), tuple(self.reference), self.threshold, self.strict)) + + def __eq__(self, other) -> bool: + return super().__eq__(other) \ + and self.reference == other.reference \ + and self.threshold == other.threshold \ + and self.strict == other.strict + + class _Bounded(FilterExpression): - """ - """ + """Value is bounded by a threshold. Assumes a Number literal.""" # bound. threshold: float @@ -327,15 +366,11 @@ class _Bounded(FilterExpression): class LessThan(_Bounded): - """Value is (strictly) smaller than threshold. - NOTE: only on numerical literals - """ + """Value is (strictly) smaller than threshold. Assumes a Number literal.""" class GreaterThan(_Bounded): - """Value is (strictly) larger than threshold - NOTE: only on numerical literals - """ + """Value is (strictly) larger than threshold. Assumes a Number literal.""" class Predicate(PredicateExpression): diff --git a/bsfs/query/validator.py b/bsfs/query/validator.py index ecea951..1b7f688 100644 --- a/bsfs/query/validator.py +++ b/bsfs/query/validator.py @@ -69,6 +69,8 @@ class Filter(): return self._not(type_, node) if isinstance(node, ast.filter.Has): return self._has(type_, node) + if isinstance(node, ast.filter.Distance): + return self._distance(type_, node) if isinstance(node, (ast.filter.Any, ast.filter.All)): return self._branch(type_, node) if isinstance(node, (ast.filter.And, ast.filter.Or)): @@ -177,6 +179,20 @@ class Filter(): # node.count is a numerical expression self._parse_filter_expression(self.schema.literal(ns.bsfs.Number), node.count) + def _distance(self, type_: bsc.Vertex, node: ast.filter.Distance): + # type is a Literal + if not isinstance(type_, bsc.Feature): + raise errors.ConsistencyError(f'expected a Feature, found {type_}') + # type exists in the schema + if type_ not in self.schema.literals(): + raise errors.ConsistencyError(f'literal {type_} is not in the schema') + # reference matches type_ + if len(node.reference) != type_.dimension: + raise errors.ConsistencyError(f'reference has dimension {len(node.reference)}, expected {type_.dimension}') + # FIXME: + #if node.reference.dtype != type_.dtype: + # raise errors.ConsistencyError(f'') + ## conditions diff --git a/bsfs/triple_store/sparql/distance.py b/bsfs/triple_store/sparql/distance.py new file mode 100644 index 0000000..2f5387a --- /dev/null +++ b/bsfs/triple_store/sparql/distance.py @@ -0,0 +1,56 @@ +""" + +Part of the BlackStar filesystem (bsfs) module. +A copy of the license is provided with the project. +Author: Matthias Baumgartner, 2022 +""" +# standard imports +import typing + +# external imports +import numpy as np + +# bsfs imports +from bsfs.namespace import ns + +# constants +EPS = 1e-9 + +# exports +__all__: typing.Sequence[str] = ( + 'DISTANCE_FU', + ) + + +## code ## + +def euclid(fst, snd) -> float: + """Euclidean distance (l2 norm).""" + fst = np.array(fst) + snd = np.array(snd) + return float(np.linalg.norm(fst - snd)) + +def cosine(fst, snd) -> float: + """Cosine distance.""" + fst = np.array(fst) + snd = np.array(snd) + if (fst == snd).all(): + return 0.0 + nrm0 = np.linalg.norm(fst) + nrm1 = np.linalg.norm(snd) + return float(1.0 - np.dot(fst, snd) / (nrm0 * nrm1 + EPS)) + +def manhatten(fst, snd) -> float: + """Manhatten (cityblock) distance (l1 norm).""" + fst = np.array(fst) + snd = np.array(snd) + return float(np.abs(fst - snd).sum()) + +# Known distance functions. +DISTANCE_FU = { + ns.bsfs.euclidean: euclid, + ns.bsfs.cosine: cosine, + ns.bsfs.manhatten: manhatten, +} + +## EOF ## diff --git a/bsfs/triple_store/sparql/parse_filter.py b/bsfs/triple_store/sparql/parse_filter.py index 5d8a2d9..8b6b976 100644 --- a/bsfs/triple_store/sparql/parse_filter.py +++ b/bsfs/triple_store/sparql/parse_filter.py @@ -5,19 +5,29 @@ A copy of the license is provided with the project. Author: Matthias Baumgartner, 2022 """ # imports +import operator import typing +# external imports +import rdflib + # bsfs imports from bsfs import schema as bsc from bsfs.namespace import ns from bsfs.query import ast from bsfs.utils import URI, errors +# inner-module imports +from .distance import DISTANCE_FU + # exports __all__: typing.Sequence[str] = ( 'Filter', ) + +## code ## + class _GenHopName(): """Generator that produces a new unique symbol name with each iteration.""" @@ -46,7 +56,8 @@ class Filter(): # Generator that produces unique symbol names. ngen: _GenHopName - def __init__(self, schema): + def __init__(self, graph, schema): + self.graph = graph self.schema = schema self.ngen = _GenHopName() @@ -84,6 +95,8 @@ class Filter(): return self._not(type_, node, head) if isinstance(node, ast.filter.Has): return self._has(type_, node, head) + if isinstance(node, ast.filter.Distance): + return self._distance(type_, node, head) if isinstance(node, ast.filter.Any): return self._any(type_, node, head) if isinstance(node, ast.filter.All): @@ -243,6 +256,32 @@ class Filter(): # combine return num_preds + ' . ' + count_bounds + def _distance(self, node_type: bsc.Vertex, node: ast.filter.Distance, head: str) -> str: + """ + """ + if not isinstance(node_type, bsc.Feature): + raise errors.BackendError(f'expected Feature, found {node_type}') + if len(node.reference) != node_type.dimension: + raise errors.ConsistencyError( + f'reference has dimension {len(node.reference)}, expected {node_type.dimension}') + # get distance metric + dist = DISTANCE_FU[node_type.distance] + # get operator + cmp = operator.lt if node.strict else operator.le + # get candidate values + candidates = { + f'"{cand}"^^<{node_type.uri}>' + for cand + in self.graph.objects() + if isinstance(cand, rdflib.Literal) + and cand.datatype == rdflib.URIRef(node_type.uri) + and cmp(dist(cand.value, node.reference), node.threshold) + } + # combine candidate values + values = ' '.join(candidates) if len(candidates) else f'"impossible value"^^<{ns.xsd.string}>' + # return sparql fragment + return f'VALUES {head} {{ {values} }}' + def _is(self, node_type: bsc.Vertex, node: ast.filter.Is, head: str) -> str: """ """ diff --git a/bsfs/triple_store/sparql/sparql.py b/bsfs/triple_store/sparql/sparql.py index 3877d1a..dfd9871 100644 --- a/bsfs/triple_store/sparql/sparql.py +++ b/bsfs/triple_store/sparql/sparql.py @@ -18,6 +18,7 @@ from bsfs.utils import errors, URI # inner-module imports from . import parse_filter from .. import base +from .distance import DISTANCE_FU # exports @@ -97,7 +98,7 @@ class SparqlStore(base.TripleStoreBase): self._transaction = _Transaction(self._graph) # NOTE: parsing bsfs.query.ast.filter.Has requires xsd:integer. self._schema = bsc.Schema(literals={bsc.ROOT_NUMBER.child(ns.xsd.integer)}) - self._filter_parser = parse_filter.Filter(self._schema) + self._filter_parser = parse_filter.Filter(self._graph, self._schema) # NOTE: mypy and pylint complain about the **kwargs not being listed (contrasting super) # However, not having it here is clearer since it's explicit that there are no arguments. @@ -123,6 +124,16 @@ class SparqlStore(base.TripleStoreBase): # check compatibility: No contradicting definitions if not self.schema.consistent_with(schema): raise errors.ConsistencyError(f'{schema} is inconsistent with {self.schema}') + # check distance functions of features + invalid = { + (cand.uri, cand.distance) + for cand + in schema.literals() + if isinstance(cand, bsc.Feature) and cand.distance not in DISTANCE_FU} + if len(invalid) > 0: + cand, dist = zip(*invalid) + raise ValueError( + f'unknown distance function {",".join(dist)} in feature {", ".join(cand)}') # commit the current transaction self.commit() diff --git a/test/graph/test_resolve.py b/test/graph/test_resolve.py index 0861a53..0918b02 100644 --- a/test/graph/test_resolve.py +++ b/test/graph/test_resolve.py @@ -46,6 +46,13 @@ class TestFilter(unittest.TestCase): bsfs:Feature rdfs:subClassOf bsfs:Array . xsd:integer rdfs:subClassOf bsfs:Number . + bsfs:Colors rdfs:subClassOf bsfs:Feature ; + bsfs:dimension "5"^^xsd:integer . + + bse:colors rdfs:subClassOf bsfs:Predicate ; + rdfs:domain bsfs:Entity ; + rdfs:range bsfs:Colors . + bse:comment rdfs:subClassOf bsfs:Predicate ; rdfs:domain bsfs:Entity ; rdfs:range xsd:string ; @@ -147,12 +154,18 @@ class TestFilter(unittest.TestCase): self.assertEqual(resolver(schema.node(ns.bsfs.Entity), ast.filter.Has(ns.bse.comment)), ast.filter.Has(ns.bse.comment)) + # for sake of completeness: Distance + self.assertEqual(resolver(schema.node(ns.bsfs.Entity), + ast.filter.Any(ns.bse.colors, ast.filter.Distance([1,2,3,4,5], 1))), + ast.filter.Any(ns.bse.colors, ast.filter.Distance([1,2,3,4,5], 1))) # route errors self.assertRaises(errors.BackendError, resolver, schema.node(ns.bsfs.Tag), ast.filter.Predicate(ns.bse.comment)) self.assertRaises(errors.BackendError, resolver, schema.node(ns.bsfs.Tag), ast.filter.Any(ast.filter.PredicateExpression(), ast.filter.Equals('foo'))) self.assertRaises(errors.BackendError, resolver._one_of, ast.filter.OneOf(ast.filter.Predicate(ns.bsfs.Predicate))) + # for sake of coverage completeness: valid OneOf + self.assertIsNotNone(resolver._one_of(ast.filter.OneOf(ast.filter.Predicate(ns.bse.colors)))) # check schema consistency self.assertRaises(errors.ConsistencyError, resolver, schema.node(ns.bsfs.Tag), diff --git a/test/query/ast_test/test_filter_.py b/test/query/ast_test/test_filter_.py index 4f69bdc..9eb92e2 100644 --- a/test/query/ast_test/test_filter_.py +++ b/test/query/ast_test/test_filter_.py @@ -15,7 +15,7 @@ from bsfs.utils import URI from bsfs.query.ast.filter_ import _Expression, FilterExpression, PredicateExpression from bsfs.query.ast.filter_ import _Branch, Any, All from bsfs.query.ast.filter_ import _Agg, And, Or -from bsfs.query.ast.filter_ import Not, Has +from bsfs.query.ast.filter_ import Not, Has, Distance from bsfs.query.ast.filter_ import _Value, Is, Equals, Substring, StartsWith, EndsWith from bsfs.query.ast.filter_ import _Bounded, LessThan, GreaterThan from bsfs.query.ast.filter_ import Predicate, OneOf @@ -284,6 +284,39 @@ class TestValue(unittest.TestCase): self.assertEqual(cls(f).value, f) +class TestDistance(unittest.TestCase): + def test_essentials(self): + ref = (1,2,3) + # comparison + self.assertEqual(Distance(ref, 3), Distance(ref, 3)) + self.assertEqual(hash(Distance(ref, 3)), hash(Distance(ref, 3))) + # comparison respects type + self.assertNotEqual(Distance(ref, 3), FilterExpression()) + self.assertNotEqual(hash(Distance(ref, 3)), hash(FilterExpression())) + # comparison respects reference + self.assertNotEqual(Distance((1,2,3), 3, False), Distance((1,2), 3, False)) + self.assertNotEqual(hash(Distance((1,2,3), 3, False)), hash(Distance((1,2), 3, False))) + self.assertNotEqual(Distance((1,2,3), 3, False), Distance((1,5,3), 3, False)) + self.assertNotEqual(hash(Distance((1,2,3), 3, False)), hash(Distance((1,5,3), 3, False))) + # comparison respects threshold + self.assertNotEqual(Distance((1,2,3), 3, False), Distance((1,2,3), 3.1, False)) + self.assertNotEqual(hash(Distance((1,2,3), 3, False)), hash(Distance((1,2,3), 3.1, False))) + # comparison respects strict flag + self.assertNotEqual(Distance((1,2,3), 3, False), Distance((1,2,3), 3, True)) + self.assertNotEqual(hash(Distance((1,2,3), 3, False)), hash(Distance((1,2,3), 3, True))) + # string conversion + self.assertEqual(str(Distance(ref, 3, False)), 'Distance((1, 2, 3), 3.0, False)') + self.assertEqual(repr(Distance(ref, 3, False)), 'Distance((1, 2, 3), 3.0, False)') + + def test_members(self): + self.assertEqual(Distance((1,2,3), 3, False).reference, (1,2,3)) + self.assertEqual(Distance((3,2,1), 3, False).reference, (3,2,1)) + self.assertEqual(Distance((1,2,3), 3, False).threshold, 3.0) + self.assertEqual(Distance((1,2,3), 53.45, False).threshold, 53.45) + self.assertEqual(Distance((1,2,3), 3, False).strict, False) + self.assertEqual(Distance((1,2,3), 3, True).strict, True) + + class TestBounded(unittest.TestCase): def test_essentials(self): # comparison respects type diff --git a/test/query/test_validator.py b/test/query/test_validator.py index 63ead52..dc9d913 100644 --- a/test/query/test_validator.py +++ b/test/query/test_validator.py @@ -38,6 +38,15 @@ class TestFilter(unittest.TestCase): bsfs:Feature rdfs:subClassOf bsfs:Array . xsd:integer rdfs:subClassOf bsfs:Number . + bsfs:Colors rdfs:subClassOf bsfs:Feature ; + bsfs:dimension "5"^^xsd:integer ; + bsfs:dtype bsfs:f32 . + + bse:color rdfs:subClassOf bsfs:Predicate ; + rdfs:domain bsfs:Node ; + rdfs:range bsfs:Colors ; + bsfs:unique "true"^^xsd:boolean . + bse:comment rdfs:subClassOf bsfs:Predicate ; rdfs:domain bsfs:Node ; rdfs:range xsd:string ; @@ -88,6 +97,7 @@ class TestFilter(unittest.TestCase): ), ast.filter.Not(ast.filter.Any(ns.bse.comment, ast.filter.Not(ast.filter.Equals('hello world')))), + ast.filter.Any(ns.bse.color, ast.filter.Distance([1,2,3,4,5], 3)), ))))) # invalid paths raise consistency error self.assertRaises(errors.ConsistencyError, self.validate, self.schema.node(ns.bsfs.Entity), @@ -257,6 +267,23 @@ class TestFilter(unittest.TestCase): self.assertIsNone(self.validate._bounded(self.schema.literal(ns.xsd.integer), ast.filter.LessThan(0))) self.assertIsNone(self.validate._bounded(self.schema.literal(ns.xsd.integer), ast.filter.GreaterThan(0))) + def test_distance(self): + # type must be a literal + self.assertRaises(errors.ConsistencyError, self.validate._distance, self.schema.node(ns.bsfs.Node), + ast.filter.Distance([1,2,3], 1, False)) + # type must be a feature + self.assertRaises(errors.ConsistencyError, self.validate._distance, self.schema.literal(ns.bsfs.Array), + ast.filter.Distance([1,2,3], 1, False)) + # type must be in the schema + self.assertRaises(errors.ConsistencyError, self.validate._distance, self.schema.literal(ns.bsfs.Feature).child(ns.bsfs.Invalid), + ast.filter.Distance([1,2,3], 1, False)) + # FIXME: reference must be a numpy array + # reference must have the correct dimension + self.assertRaises(errors.ConsistencyError, self.validate._distance, self.schema.literal(ns.bsfs.Colors), + ast.filter.Distance([1,2,3], 1, False)) + # FIXME: reference must have the correct dtype + # distance accepts correct expressions + self.assertIsNone(self.validate._distance(self.schema.literal(ns.bsfs.Colors), ast.filter.Distance([1,2,3,4,5], 1, False))) ## main ## diff --git a/test/triple_store/sparql/test_distance.py b/test/triple_store/sparql/test_distance.py new file mode 100644 index 0000000..0659459 --- /dev/null +++ b/test/triple_store/sparql/test_distance.py @@ -0,0 +1,61 @@ +""" + +Part of the bsfs test suite. +A copy of the license is provided with the project. +Author: Matthias Baumgartner, 2022 +""" +# imports +import numpy as np +import unittest + +# objects to test +from bsfs.triple_store.sparql import distance + + +## code ## + +class TestDistance(unittest.TestCase): + + def test_euclid(self): + # self-distance is zero + self.assertEqual(distance.euclid([1,2,3,4], [1,2,3,4]), 0.0) + # accepts list-like arguments + self.assertAlmostEqual(distance.euclid([1,2,3,4], [2,3,4,5]), 2.0, 3) + self.assertAlmostEqual(distance.euclid((1,2,3,4), (2,3,4,5)), 2.0, 3) + # dimension can vary + self.assertAlmostEqual(distance.euclid([1,2,3], [2,3,4]), 1.732, 3) + self.assertAlmostEqual(distance.euclid([1,2,3,4,5], [2,3,4,5,6]), 2.236, 3) + # vector can be zero + self.assertAlmostEqual(distance.euclid([0,0,0], [1,2,3]), 3.742, 3) + + def test_cosine(self): + # self-distance is zero + self.assertEqual(distance.cosine([1,2,3,4], [1,2,3,4]), 0.0) + # accepts list-like arguments + self.assertAlmostEqual(distance.cosine([1,2,3,4], [4,3,2,1]), 0.333, 3) + self.assertAlmostEqual(distance.cosine((1,2,3,4), (4,3,2,1)), 0.333, 3) + # dimension can vary + self.assertAlmostEqual(distance.cosine([1,2,3], [3,2,1]), 0.286, 3) + self.assertAlmostEqual(distance.cosine([1,2,3,4,5], [5,4,3,2,1]), 0.364, 3) + # vector can be zero + self.assertAlmostEqual(distance.cosine([0,0,0], [1,2,3]), 1.0, 3) + + def test_manhatten(self): + # self-distance is zero + self.assertEqual(distance.manhatten([1,2,3,4], [1,2,3,4]), 0.0) + # accepts list-like arguments + self.assertAlmostEqual(distance.manhatten([1,2,3,4], [2,3,4,5]), 4.0, 3) + self.assertAlmostEqual(distance.manhatten((1,2,3,4), (2,3,4,5)), 4.0, 3) + # dimension can vary + self.assertAlmostEqual(distance.manhatten([1,2,3], [2,3,4]), 3.0, 3) + self.assertAlmostEqual(distance.manhatten([1,2,3,4,5], [2,3,4,5,6]), 5.0, 3) + # vector can be zero + self.assertAlmostEqual(distance.manhatten([0,0,0], [1,2,3]), 6.0, 3) + + +## main ## + +if __name__ == '__main__': + unittest.main() + +## EOF ## diff --git a/test/triple_store/sparql/test_parse_filter.py b/test/triple_store/sparql/test_parse_filter.py index 5c16f11..8764535 100644 --- a/test/triple_store/sparql/test_parse_filter.py +++ b/test/triple_store/sparql/test_parse_filter.py @@ -42,6 +42,15 @@ class TestParseFilter(unittest.TestCase): xsd:integer rdfs:subClassOf bsfs:Number . bsfs:URI rdfs:subClassOf bsfs:Literal . + bsfs:Colors rdfs:subClassOf bsfs:Feature ; + bsfs:dimension "4"^^xsd:integer ; + bsfs:dtype xsd:integer ; + bsfs:distance bsfs:euclidean . + + bse:colors rdfs:subClassOf bsfs:Predicate ; + rdfs:domain bsfs:Entity ; + rdfs:range bsfs:Colors . + bse:comment rdfs:subClassOf bsfs:Predicate ; rdfs:domain bsfs:Node ; rdfs:range xsd:string ; @@ -74,9 +83,6 @@ class TestParseFilter(unittest.TestCase): ''') - # parser instance - self.parser = Filter(self.schema) - # graph to test queries self.graph = rdflib.Graph() # schema hierarchies @@ -117,6 +123,13 @@ class TestParseFilter(unittest.TestCase): # image iso self.graph.add((rdflib.URIRef('http://example.com/image#1234'), rdflib.URIRef(ns.bse.iso), rdflib.Literal(1234, datatype=rdflib.XSD.integer))) self.graph.add((rdflib.URIRef('http://example.com/image#4321'), rdflib.URIRef(ns.bse.iso), rdflib.Literal(4321, datatype=rdflib.XSD.integer))) + # color features + self.graph.add((rdflib.URIRef('http://example.com/entity#1234'), rdflib.URIRef(ns.bse.colors), rdflib.Literal([1,2,3,4], datatype=rdflib.URIRef(ns.bsfs.Colors)))) + self.graph.add((rdflib.URIRef('http://example.com/entity#4321'), rdflib.URIRef(ns.bse.colors), rdflib.Literal([4,3,2,1], datatype=rdflib.URIRef(ns.bsfs.Colors)))) + self.graph.add((rdflib.URIRef('http://example.com/image#1234'), rdflib.URIRef(ns.bse.colors), rdflib.Literal([3,4,2,1], datatype=rdflib.URIRef(ns.bsfs.Colors)))) + + # parser instance + self.parser = Filter(self.graph, self.schema) def test_routing(self): @@ -617,6 +630,37 @@ class TestParseFilter(unittest.TestCase): {'http://example.com/tag#1234'}) + def test_distance(self): + # node colors distance to [2,4,3,1] + # entity#1234 [1,2,3,4] 3.742 + # entity#4321 [4,3,2,1] 2.449 + # image#1234 [3,4,2,1] 1.414 + + # _distance expects a feature + self.assertRaises(errors.BackendError, self.parser._distance, self.schema.node(ns.bsfs.Entity), ast.filter.Distance([1,2,3,4], 1), '') + # reference must have the correct dimension + self.assertRaises(errors.ConsistencyError, self.parser._distance, self.schema.literal(ns.bsfs.Colors), ast.filter.Distance([1,2,3], 1), '') + self.assertRaises(errors.ConsistencyError, self.parser._distance, self.schema.literal(ns.bsfs.Colors), ast.filter.Distance([1,2,3,4,5], 1), '') + # _distance respects threshold + q = self.parser(self.schema.node(ns.bsfs.Entity), ast.filter.Any(ns.bse.colors, ast.filter.Distance([2,4,3,1], 4))) + self.assertSetEqual({str(guid) for guid, in self.graph.query(q)}, + {'http://example.com/entity#1234', 'http://example.com/entity#4321', 'http://example.com/image#1234'}) + q = self.parser(self.schema.node(ns.bsfs.Entity), ast.filter.Any(ns.bse.colors, ast.filter.Distance([2,4,3,1], 3))) + self.assertSetEqual({str(guid) for guid, in self.graph.query(q)}, + {'http://example.com/entity#4321', 'http://example.com/image#1234'}) + q = self.parser(self.schema.node(ns.bsfs.Entity), ast.filter.Any(ns.bse.colors, ast.filter.Distance([2,4,3,1], 2))) + self.assertSetEqual({str(guid) for guid, in self.graph.query(q)}, + {'http://example.com/image#1234'}) + # result set can be empty + q = self.parser(self.schema.node(ns.bsfs.Entity), ast.filter.Any(ns.bse.colors, ast.filter.Distance([2,4,3,1], 1))) + self.assertSetEqual({str(guid) for guid, in self.graph.query(q)}, set()) + # _distance respects strict + q = self.parser(self.schema.node(ns.bsfs.Entity), ast.filter.Any(ns.bse.colors, ast.filter.Distance([1,2,3,4], 0, False))) + self.assertSetEqual({str(guid) for guid, in self.graph.query(q)}, + {'http://example.com/entity#1234'}) + q = self.parser(self.schema.node(ns.bsfs.Entity), ast.filter.Any(ns.bse.colors, ast.filter.Distance([1,2,3,4], 0, True))) + self.assertSetEqual({str(guid) for guid, in self.graph.query(q)}, set()) + def test_one_of(self): # _one_of expects a node self.assertRaises(errors.BackendError, self.parser._one_of, diff --git a/test/triple_store/sparql/test_sparql.py b/test/triple_store/sparql/test_sparql.py index 1f56a7e..435ca28 100644 --- a/test/triple_store/sparql/test_sparql.py +++ b/test/triple_store/sparql/test_sparql.py @@ -392,6 +392,23 @@ class TestSparqlStore(unittest.TestCase): class Foo(): pass self.assertRaises(TypeError, setattr, store, 'schema', Foo()) + # cannot define features w/o known distance function + invalid = bsc.from_string(''' + prefix rdfs: + prefix xsd: + prefix bsfs: + prefix bse: + + bsfs:Array rdfs:subClassOf bsfs:Literal . + bsfs:Feature rdfs:subClassOf bsfs:Array . + + bsfs:Colors rdfs:subClassOf bsfs:Feature ; + bsfs:dimension "4"^^xsd:integer ; + bsfs:distance bsfs:foobar . + + ''') + self.assertRaises(ValueError, setattr, store, 'schema', invalid) + # cannot migrate to incompatible schema invalid = bsc.from_string(''' prefix rdfs: -- cgit v1.2.3 From afdfc25408c3b9d2c779c83e2e193d68a973810b Mon Sep 17 00:00:00 2001 From: Matthias Baumgartner Date: Mon, 16 Jan 2023 21:38:55 +0100 Subject: namespace to string plain uri --- bsfs/namespace/namespace.py | 2 +- test/namespace/test_namespace.py | 12 ++++++------ 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/bsfs/namespace/namespace.py b/bsfs/namespace/namespace.py index f652dcd..1d443c1 100644 --- a/bsfs/namespace/namespace.py +++ b/bsfs/namespace/namespace.py @@ -59,7 +59,7 @@ class Namespace(): return hash((type(self), self.prefix, self.fsep, self.psep)) def __str__(self) -> str: - return f'{typename(self)}({self.prefix})' + return str(self.prefix) def __repr__(self) -> str: return f'{typename(self)}({self.prefix}, {self.fsep}, {self.psep})' diff --git a/test/namespace/test_namespace.py b/test/namespace/test_namespace.py index f109653..2536203 100644 --- a/test/namespace/test_namespace.py +++ b/test/namespace/test_namespace.py @@ -20,15 +20,15 @@ from bsfs.namespace.namespace import Namespace, ClosedNamespace class TestNamespace(unittest.TestCase): def test_essentials(self): # string conversion - self.assertEqual(str(Namespace('http://example.org/')), 'Namespace(http://example.org)') - self.assertEqual(str(Namespace('http://example.org#')), 'Namespace(http://example.org)') + self.assertEqual(str(Namespace('http://example.org/')), 'http://example.org') + self.assertEqual(str(Namespace('http://example.org#')), 'http://example.org') self.assertEqual(repr(Namespace('http://example.org/')), 'Namespace(http://example.org, #, /)') self.assertEqual(repr(Namespace('http://example.org#')), 'Namespace(http://example.org, #, /)') self.assertEqual(repr(Namespace('http://example.org', fsep='.')), 'Namespace(http://example.org, ., /)') self.assertEqual(repr(Namespace('http://example.org', psep='.')), 'Namespace(http://example.org, #, .)') # repeated separators are truncated - self.assertEqual(str(Namespace('http://example.org////')), 'Namespace(http://example.org)') - self.assertEqual(str(Namespace('http://example.org####')), 'Namespace(http://example.org)') + self.assertEqual(str(Namespace('http://example.org////')), 'http://example.org') + self.assertEqual(str(Namespace('http://example.org####')), 'http://example.org') self.assertEqual(repr(Namespace('http://example.org///##')), 'Namespace(http://example.org, #, /)') # comparison class Foo(Namespace): pass @@ -83,8 +83,8 @@ class TestNamespace(unittest.TestCase): class TestClosedNamespace(unittest.TestCase): def test_essentials(self): # string conversion - self.assertEqual(str(ClosedNamespace('http://example.org/')), 'ClosedNamespace(http://example.org)') - self.assertEqual(str(ClosedNamespace('http://example.org#')), 'ClosedNamespace(http://example.org)') + self.assertEqual(str(ClosedNamespace('http://example.org/')), 'http://example.org') + self.assertEqual(str(ClosedNamespace('http://example.org#')), 'http://example.org') self.assertEqual(repr(ClosedNamespace('http://example.org/')), 'ClosedNamespace(http://example.org, #, /)') self.assertEqual(repr(ClosedNamespace('http://example.org#')), 'ClosedNamespace(http://example.org, #, /)') self.assertEqual(repr(ClosedNamespace('http://example.org', fsep='.')), 'ClosedNamespace(http://example.org, ., /)') -- cgit v1.2.3 From 76fa694911f54e293ddf517246c6c4a1e8e745fd Mon Sep 17 00:00:00 2001 From: Matthias Baumgartner Date: Mon, 16 Jan 2023 21:39:47 +0100 Subject: uuid from dict --- bsfs/utils/uuid.py | 7 +++++++ test/utils/test_uuid.py | 4 ++++ 2 files changed, 11 insertions(+) diff --git a/bsfs/utils/uuid.py b/bsfs/utils/uuid.py index 6366b18..ba5cf52 100644 --- a/bsfs/utils/uuid.py +++ b/bsfs/utils/uuid.py @@ -7,6 +7,7 @@ Author: Matthias Baumgartner, 2022 # imports from collections import abc import hashlib +import json import os import platform import random @@ -105,4 +106,10 @@ class UCID(): with open(path, 'rb') as ifile: return HASH(ifile.read()).hexdigest() + + @staticmethod + def from_dict(content: dict) -> str: + """Get the content from a dict.""" + return HASH(json.dumps(content).encode('ascii', 'ignore')).hexdigest() + ## EOF ## diff --git a/test/utils/test_uuid.py b/test/utils/test_uuid.py index 49176d4..0de96ed 100644 --- a/test/utils/test_uuid.py +++ b/test/utils/test_uuid.py @@ -83,6 +83,10 @@ class TestUCID(unittest.TestCase): def test_from_path(self): self.assertEqual(UCID.from_path(self._path), self._checksum) + def test_from_dict(self): + self.assertEqual(UCID.from_dict({'hello': 'world', 'foo': 1234, 'bar': False}), + '8d2544395a0d2827e3d9ce8cd619d5e3f801e8126bf3f93ee5abd38158959585') + ## main ## -- cgit v1.2.3 From 3504609e1ba1f7f653fa79910474bebd3ec24d8a Mon Sep 17 00:00:00 2001 From: Matthias Baumgartner Date: Mon, 16 Jan 2023 21:41:20 +0100 Subject: various minor fixes --- bsfs/query/validator.py | 4 +--- bsfs/schema/serialize.py | 18 +++++++++++++----- bsfs/triple_store/sparql/sparql.py | 2 +- bsfs/utils/errors.py | 3 +++ test/triple_store/sparql/test_sparql.py | 2 +- 5 files changed, 19 insertions(+), 10 deletions(-) diff --git a/bsfs/query/validator.py b/bsfs/query/validator.py index 1b7f688..904ac14 100644 --- a/bsfs/query/validator.py +++ b/bsfs/query/validator.py @@ -189,9 +189,7 @@ class Filter(): # reference matches type_ if len(node.reference) != type_.dimension: raise errors.ConsistencyError(f'reference has dimension {len(node.reference)}, expected {type_.dimension}') - # FIXME: - #if node.reference.dtype != type_.dtype: - # raise errors.ConsistencyError(f'') + # FIXME: test dtype ## conditions diff --git a/bsfs/schema/serialize.py b/bsfs/schema/serialize.py index 8b31737..acc009a 100644 --- a/bsfs/schema/serialize.py +++ b/bsfs/schema/serialize.py @@ -35,8 +35,11 @@ def from_string(schema_str: str) -> schema.Schema: graph.parse(data=schema_str, format='turtle') # helper functions - # FIXME: type annotation - def _fetch_value(subject: URI, predicate: rdflib.URIRef, value_factory) -> typing.Optional[typing.Any]: + def _fetch_value( + subject: URI, + predicate: rdflib.URIRef, + value_factory: typing.Callable[[typing.Any], typing.Any], + ) -> typing.Optional[typing.Any]: """Fetch the object of a given subject and predicate. Raises a `errors.ConsistencyError` if multiple objects match. """ @@ -242,9 +245,14 @@ def to_string(schema_inst: schema.Schema, fmt: str = 'turtle') -> str: for triple in _parse(node): graph.add(triple) # add known namespaces for readability - # FIXME: more systematically (e.g. for all in ns?) - graph.bind('bsfs', rdflib.URIRef('http://bsfs.ai/schema/')) - graph.bind('bse', rdflib.URIRef('http://bsfs.ai/schema/Entity#')) + # FIXME: more generically? + graph.bind('bse', rdflib.URIRef(ns.bse[''])) + graph.bind('bsfs', rdflib.URIRef(ns.bsfs[''])) + graph.bind('bsm', rdflib.URIRef(ns.bsm[''])) + graph.bind('rdf', rdflib.URIRef(ns.rdf[''])) + graph.bind('rdfs', rdflib.URIRef(ns.rdfs[''])) + graph.bind('schema', rdflib.URIRef(ns.schema[''])) + graph.bind('xsd', rdflib.URIRef(ns.xsd[''])) # serialize to turtle return graph.serialize(format=fmt) diff --git a/bsfs/triple_store/sparql/sparql.py b/bsfs/triple_store/sparql/sparql.py index dfd9871..fedd227 100644 --- a/bsfs/triple_store/sparql/sparql.py +++ b/bsfs/triple_store/sparql/sparql.py @@ -132,7 +132,7 @@ class SparqlStore(base.TripleStoreBase): if isinstance(cand, bsc.Feature) and cand.distance not in DISTANCE_FU} if len(invalid) > 0: cand, dist = zip(*invalid) - raise ValueError( + raise errors.UnsupportedError( f'unknown distance function {",".join(dist)} in feature {", ".join(cand)}') # commit the current transaction diff --git a/bsfs/utils/errors.py b/bsfs/utils/errors.py index be9d40e..6ae6484 100644 --- a/bsfs/utils/errors.py +++ b/bsfs/utils/errors.py @@ -41,4 +41,7 @@ class ConfigError(_BSFSError): class BackendError(_BSFSError): """Could not parse an AST structure.""" +class UnsupportedError(_BSFSError): + """Some requested functionality is not supported by an implementation.""" + ## EOF ## diff --git a/test/triple_store/sparql/test_sparql.py b/test/triple_store/sparql/test_sparql.py index 435ca28..7fbfb65 100644 --- a/test/triple_store/sparql/test_sparql.py +++ b/test/triple_store/sparql/test_sparql.py @@ -407,7 +407,7 @@ class TestSparqlStore(unittest.TestCase): bsfs:distance bsfs:foobar . ''') - self.assertRaises(ValueError, setattr, store, 'schema', invalid) + self.assertRaises(errors.UnsupportedError, setattr, store, 'schema', invalid) # cannot migrate to incompatible schema invalid = bsc.from_string(''' -- cgit v1.2.3 From a4789394e40aaa3152ad6009955709a6c7d277c2 Mon Sep 17 00:00:00 2001 From: Matthias Baumgartner Date: Fri, 20 Jan 2023 14:36:11 +0100 Subject: fetch AST --- bsfs/query/ast/__init__.py | 4 +- bsfs/query/ast/fetch.py | 175 ++++++++++++++++++++++++++++ bsfs/query/ast/filter_.py | 1 + test/query/ast_test/test_fetch.py | 239 ++++++++++++++++++++++++++++++++++++++ 4 files changed, 418 insertions(+), 1 deletion(-) create mode 100644 bsfs/query/ast/fetch.py create mode 100644 test/query/ast_test/test_fetch.py diff --git a/bsfs/query/ast/__init__.py b/bsfs/query/ast/__init__.py index 704d051..66b097d 100644 --- a/bsfs/query/ast/__init__.py +++ b/bsfs/query/ast/__init__.py @@ -1,6 +1,6 @@ """Query AST components. -The query AST consists of a Filter syntax tree. +The query AST consists of a Filter and a Fetch syntax trees. Classes beginning with an underscore (_) represent internal type hierarchies and should not be used for parsing. Note that the AST structures do not @@ -14,10 +14,12 @@ Author: Matthias Baumgartner, 2022 import typing # inner-module imports +from . import fetch from . import filter_ as filter # pylint: disable=redefined-builtin # exports __all__: typing.Sequence[str] = ( + 'fetch', 'filter', ) diff --git a/bsfs/query/ast/fetch.py b/bsfs/query/ast/fetch.py new file mode 100644 index 0000000..5e603a1 --- /dev/null +++ b/bsfs/query/ast/fetch.py @@ -0,0 +1,175 @@ +""" + +Part of the BlackStar filesystem (bsfs) module. +A copy of the license is provided with the project. +Author: Matthias Baumgartner, 2022 +""" +# imports +from collections import abc +import typing + +# bsfs imports +from bsfs.utils import URI, typename, normalize_args + +# exports +__all__ : typing.Sequence[str] = ( + 'All', + 'Fetch', + 'FetchExpression', + 'Node', + 'This', + 'Value', + ) + + +## code ## + +class FetchExpression(abc.Hashable): + """Generic Fetch expression.""" + + def __repr__(self) -> str: + """Return the expressions's string representation.""" + return f'{typename(self)}()' + + def __hash__(self) -> int: + """Return the expression's integer representation.""" + return hash(type(self)) + + def __eq__(self, other: typing.Any) -> bool: + """Return True if *self* and *other* are equivalent.""" + return isinstance(other, type(self)) + + +class All(FetchExpression): + """Fetch all child expressions.""" + + # child expressions. + expr: typing.Set[FetchExpression] + + def __init__(self, *expr): + # unpack child expressions + unfolded = set(normalize_args(*expr)) + # check child expressions + if len(unfolded) == 0: + raise AttributeError('expected at least one expression, found none') + if not all(isinstance(itm, FetchExpression) for itm in unfolded): + raise TypeError(expr) + # initialize + super().__init__() + # assign members + self.expr = unfolded + + def __iter__(self) -> typing.Iterator[FetchExpression]: + return iter(self.expr) + + def __len__(self) -> int: + return len(self.expr) + + def __repr__(self) -> str: + return f'{typename(self)}({self.expr})' + + def __hash__(self) -> int: + # FIXME: Produces different hashes for different orders of self.expr + return hash((super().__hash__(), tuple(self.expr))) + + def __eq__(self, other: typing.Any) -> bool: + return super().__eq__(other) and self.expr == other.expr + + +class _Branch(FetchExpression): + """Branch along a predicate.""" + + # FIXME: Use a Predicate (like in ast.filter) so that we can also reverse them! + + # predicate to follow. + predicate: URI + + def __init__(self, predicate: URI): + if not isinstance(predicate, URI): + raise TypeError(predicate) + self.predicate = predicate + + def __repr__(self) -> str: + return f'{typename(self)}({self.predicate})' + + def __hash__(self) -> int: + return hash((super().__hash__(), self.predicate)) + + def __eq__(self, other: typing.Any) -> bool: + return super().__eq__(other) and self.predicate == other.predicate + + +class Fetch(_Branch): + """Follow a predicate before evaluating a child epxression.""" + + # child expression. + expr: FetchExpression + + def __init__(self, predicate: URI, expr: FetchExpression): + # check child expressions + if not isinstance(expr, FetchExpression): + raise TypeError(expr) + # initialize + super().__init__(predicate) + # assign members + self.expr = expr + + def __repr__(self) -> str: + return f'{typename(self)}({self.predicate}, {self.expr})' + + def __hash__(self) -> int: + return hash((super().__hash__(), self.expr)) + + def __eq__(self, other: typing.Any) -> bool: + return super().__eq__(other) and self.expr == other.expr + + +class _Named(_Branch): + """Fetch a (named) symbol at a predicate.""" + + # symbol name. + name: str + + def __init__(self, predicate: URI, name: str): + super().__init__(predicate) + self.name = str(name) + + def __repr__(self) -> str: + return f'{typename(self)}({self.predicate}, {self.name})' + + def __hash__(self) -> int: + return hash((super().__hash__(), self.name)) + + def __eq__(self, other: typing.Any) -> bool: + return super().__eq__(other) and self.name == other.name + + +class Node(_Named): # pylint: disable=too-few-public-methods + """Fetch a Node at a predicate.""" + # FIXME: Is this actually needed? + + +class Value(_Named): # pylint: disable=too-few-public-methods + """Fetch a Literal at a predicate.""" + + +class This(FetchExpression): + """Fetch the current Node.""" + + # symbol name. + name: str + + def __init__(self, name: str): + super().__init__() + self.name = str(name) + + def __repr__(self) -> str: + return f'{typename(self)}({self.name})' + + def __hash__(self) -> int: + return hash((super().__hash__(), self.name)) + + def __eq__(self, other: typing.Any) -> bool: + return super().__eq__(other) and self.name == other.name + +## EOF ## diff --git a/bsfs/query/ast/filter_.py b/bsfs/query/ast/filter_.py index 2f0270c..81b0de2 100644 --- a/bsfs/query/ast/filter_.py +++ b/bsfs/query/ast/filter_.py @@ -153,6 +153,7 @@ class _Agg(FilterExpression, abc.Collection): # check type if not all(isinstance(e, FilterExpression) for e in unfolded): raise TypeError(expr) + # FIXME: Require at least one child expression? # assign member self.expr = unfolded diff --git a/test/query/ast_test/test_fetch.py b/test/query/ast_test/test_fetch.py new file mode 100644 index 0000000..0c48a1f --- /dev/null +++ b/test/query/ast_test/test_fetch.py @@ -0,0 +1,239 @@ +""" + +Part of the tagit test suite. +A copy of the license is provided with the project. +Author: Matthias Baumgartner, 2022 +""" +# imports +import unittest + +# bsfs imports +from bsfs.namespace import ns +from bsfs.utils import URI + +# objects to test +from bsfs.query.ast.fetch import FetchExpression +from bsfs.query.ast.fetch import All, This +from bsfs.query.ast.fetch import _Branch, Fetch +from bsfs.query.ast.fetch import _Named, Node, Value + + +## code ## + +class TestExpression(unittest.TestCase): # FetchExpression + def test_essentials(self): + class Foo(FetchExpression): pass + # comparison + self.assertEqual(FetchExpression(), FetchExpression()) + self.assertEqual(hash(FetchExpression()), hash(FetchExpression())) + # comparison respects type + self.assertNotEqual(FetchExpression(), Foo()) + self.assertNotEqual(hash(FetchExpression()), hash(Foo())) + # string conversion + self.assertEqual(str(FetchExpression()), 'FetchExpression()') + self.assertEqual(repr(FetchExpression()), 'FetchExpression()') + self.assertEqual(str(Foo()), 'Foo()') + self.assertEqual(repr(Foo()), 'Foo()') + + +class TestAll(unittest.TestCase): # All + def test_essentials(self): + class Foo(All): pass + expr0 = This('hello') + expr1 = This('world') + # comparison + self.assertEqual(All(expr0), All(expr0)) + self.assertEqual(hash(All(expr0)), hash(All(expr0))) + # comparison respects type + self.assertNotEqual(All(expr0), Foo(expr0)) + self.assertNotEqual(hash(All(expr0)), hash(Foo(expr0))) + # comparison respects expressions + self.assertEqual(All(expr0, expr1), All(expr0, expr1)) + self.assertEqual(hash(All(expr0, expr1)), hash(All(expr0, expr1))) + self.assertNotEqual(All(expr0), All(expr1)) + self.assertNotEqual(hash(All(expr0)), hash(All(expr1))) + # expressions are unordered + self.assertEqual(All(expr0, expr1), All(expr1, expr0)) + self.assertEqual(hash(All(expr0, expr1)), hash(All(expr1, expr0))) + # string conversion + self.assertIn(str(All(expr0, expr1)), { + 'All({This(world), This(hello)})', + 'All({This(hello), This(world)})'}) + self.assertIn(repr(All(expr0, expr1)), { + 'All({This(world), This(hello)})', + 'All({This(hello), This(world)})'}) + + def test_members(self): + class Foo(): pass + expr0 = This('hello') + expr1 = This('world') + # requires at least one child expression + self.assertRaises(AttributeError, All) + # expr returns child expressions + self.assertEqual(All(expr0, expr1).expr, {expr0, expr1}) + # can pass expressions as arguments + self.assertEqual(All(expr0, expr1).expr, {expr0, expr1}) + # can pass a single expression as argument + self.assertEqual(All(expr0).expr, {expr0}) + # can pass expressions as list-like + self.assertEqual(All([expr0, expr1]).expr, {expr0, expr1}) + self.assertEqual(All((expr0, expr1)).expr, {expr0, expr1}) + self.assertEqual(All({expr0, expr1}).expr, {expr0, expr1}) + # can pass a single expression as list-like + self.assertEqual(All([expr0]).expr, {expr0}) + # must pass a FilterExpression + self.assertRaises(TypeError, All, Foo()) + self.assertRaises(TypeError, All, 1234) + self.assertRaises(TypeError, All, 'hello world') + # len returns the number of child expressions + self.assertEqual(len(All(expr0)), 1) + self.assertEqual(len(All(expr0, expr1)), 2) + # iter iterates over child expressions + self.assertSetEqual(set(All(expr0, expr1)), {expr0, expr1}) + + +class TestThis(unittest.TestCase): # This + def test_essentials(self): + class Foo(This): pass + # comparison + self.assertEqual(This('hello'), This('hello')) + self.assertEqual(hash(This('hello')), hash(This('hello'))) + # comparison respects type + self.assertNotEqual(This('hello'), Foo('hello')) + self.assertNotEqual(hash(This('hello')), hash(Foo('hello'))) + # comparison respects name + self.assertNotEqual(This('hello'), This('world')) + self.assertNotEqual(hash(This('hello')), hash(This('world'))) + # string conversion + self.assertEqual(str(This('hello')), 'This(hello)') + self.assertEqual(repr(This('hello')), 'This(hello)') + + def test_members(self): + class Foo(): pass + # name returns member + self.assertEqual(This('hello').name, 'hello') + self.assertEqual(This('world').name, 'world') + # name is converted to a string + self.assertEqual(This(1234).name, '1234') + foo = Foo() + self.assertEqual(This(foo).name, str(foo)) + + +class TestBranch(unittest.TestCase): # _Branch, Fetch + def test_essentials(self): + pred = ns.bse.tag + expr = FetchExpression() + # comparison + self.assertEqual(_Branch(pred), _Branch(pred)) + self.assertEqual(hash(_Branch(pred)), hash(_Branch(pred))) + self.assertEqual(Fetch(pred, expr), Fetch(pred, expr)) + self.assertEqual(hash(Fetch(pred, expr)), hash(Fetch(pred, expr))) + # comparison respects type + self.assertNotEqual(_Branch(pred), Fetch(pred, expr)) + self.assertNotEqual(hash(_Branch(pred)), hash(Fetch(pred, expr))) + self.assertNotEqual(Fetch(pred, expr), _Branch(pred)) + self.assertNotEqual(hash(Fetch(pred, expr)), hash(_Branch(pred))) + # comparison respects predicate + self.assertNotEqual(_Branch(pred), _Branch(ns.bse.filesize)) + self.assertNotEqual(hash(_Branch(pred)), hash(_Branch(ns.bse.filesize))) + self.assertNotEqual(Fetch(pred, expr), Fetch(ns.bse.filesize, expr)) + self.assertNotEqual(hash(Fetch(pred, expr)), hash(Fetch(ns.bse.filesize, expr))) + # comparison respects expression + self.assertNotEqual(Fetch(pred, expr), Fetch(pred, This('foo'))) + self.assertNotEqual(hash(Fetch(pred, expr)), hash(Fetch(pred, This('foo')))) + # string conversion + self.assertEqual(str(_Branch(pred)), f'_Branch({pred})') + self.assertEqual(repr(_Branch(pred)), f'_Branch({pred})') + self.assertEqual(str(Fetch(pred, expr)), f'Fetch({pred}, {expr})') + self.assertEqual(repr(Fetch(pred, expr)), f'Fetch({pred}, {expr})') + + def test_members(self): + class Foo(): pass + pred = ns.bse.tag + expr = FetchExpression() + + # predicate returns member + self.assertEqual(_Branch(pred).predicate, pred) + self.assertEqual(Fetch(pred, expr).predicate, pred) + # can pass an URI + self.assertEqual(_Branch(ns.bse.filename).predicate, ns.bse.filename) + self.assertEqual(Fetch(ns.bse.filename, expr).predicate, ns.bse.filename) + # must pass an URI + self.assertRaises(TypeError, _Branch, Foo()) + self.assertRaises(TypeError, Fetch, Foo(), expr) + # expression returns member + self.assertEqual(Fetch(pred, expr).expr, expr) + # expression must be a FilterExpression + self.assertRaises(TypeError, Fetch, ns.bse.filename, 'hello') + self.assertRaises(TypeError, Fetch, ns.bse.filename, 1234) + self.assertRaises(TypeError, Fetch, ns.bse.filename, Foo()) + + +class TestNamed(unittest.TestCase): # _Named, Node, Value + def test_essentials(self): + pred = ns.bse.tag + name = 'foobar' + # comparison + self.assertEqual(_Named(pred, name), _Named(pred, name)) + self.assertEqual(hash(_Named(pred, name)), hash(_Named(pred, name))) + # comparison respects type + self.assertNotEqual(_Named(pred, name), Node(pred, name)) + self.assertNotEqual(Node(pred, name), Value(pred, name)) + self.assertNotEqual(Value(pred, name), _Named(pred, name)) + self.assertNotEqual(hash(_Named(pred, name)), hash(Node(pred, name))) + self.assertNotEqual(hash(Node(pred, name)), hash(Value(pred, name))) + self.assertNotEqual(hash(Value(pred, name)), hash(_Named(pred, name))) + # comparison respects predicate + self.assertNotEqual(_Named(pred, name), _Named(ns.bse.filesize, name)) + self.assertNotEqual(hash(_Named(pred, name)), hash(_Named(ns.bse.filesize, name))) + self.assertNotEqual(Node(pred, name), Node(ns.bse.filesize, name)) + self.assertNotEqual(hash(Node(pred, name)), hash(Node(ns.bse.filesize, name))) + self.assertNotEqual(Value(pred, name), Value(ns.bse.filesize, name)) + self.assertNotEqual(hash(Value(pred, name)), hash(Value(ns.bse.filesize, name))) + # comparison respects name + self.assertNotEqual(_Named(pred, name), _Named(pred, 'foo')) + self.assertNotEqual(hash(_Named(pred, name)), hash(_Named(pred, 'foo'))) + self.assertNotEqual(Node(pred, name), Node(pred, 'foo')) + self.assertNotEqual(hash(Node(pred, name)), hash(Node(pred, 'foo'))) + self.assertNotEqual(Value(pred, name), Value(pred, 'foo')) + self.assertNotEqual(hash(Value(pred, name)), hash(Value(pred, 'foo'))) + # string conversion + self.assertEqual(str(_Named(pred, name)), f'_Named({pred}, {name})') + self.assertEqual(repr(_Named(pred, name)), f'_Named({pred}, {name})') + self.assertEqual(str(Node(pred, name)), f'Node({pred}, {name})') + self.assertEqual(repr(Node(pred, name)), f'Node({pred}, {name})') + self.assertEqual(str(Value(pred, name)), f'Value({pred}, {name})') + self.assertEqual(repr(Value(pred, name)), f'Value({pred}, {name})') + + def test_members(self): + class Foo(): pass + pred = ns.bse.tag + name = 'foobar' + # predicate returns member + self.assertEqual(_Named(pred, name).predicate, pred) + self.assertEqual(Node(pred, name).predicate, pred) + self.assertEqual(Value(pred, name).predicate, pred) + # can pass an URI as predicate + self.assertEqual(_Named(ns.bse.filename, name).predicate, ns.bse.filename) + self.assertEqual(Node(ns.bse.filename, name).predicate, ns.bse.filename) + self.assertEqual(Value(ns.bse.filename, name).predicate, ns.bse.filename) + # must pass an URI + self.assertRaises(TypeError, _Named, Foo(), name) + self.assertRaises(TypeError, Node, Foo(), name) + self.assertRaises(TypeError, Value, Foo(), name) + # name returns member + self.assertEqual(_Named(pred, name).name, name) + self.assertEqual(Node(pred, name).name, name) + self.assertEqual(Value(pred, name).name, name) + # name is converted to a string + self.assertEqual(_Named(pred, 1234).name, '1234') + self.assertEqual(Node(pred, 1234).name, '1234') + self.assertEqual(Value(pred, 1234).name, '1234') + + +## main ## + +if __name__ == '__main__': + unittest.main() + +## EOF ## -- cgit v1.2.3 From e2f08efc0d8a3c875994bdb69623c30cce5079d9 Mon Sep 17 00:00:00 2001 From: Matthias Baumgartner Date: Fri, 20 Jan 2023 18:01:17 +0100 Subject: fetch AST validation --- bsfs/query/validator.py | 123 ++++++++++++++++++++++++- test/query/test_validator.py | 215 ++++++++++++++++++++++++++++++++++++++++++- 2 files changed, 336 insertions(+), 2 deletions(-) diff --git a/bsfs/query/validator.py b/bsfs/query/validator.py index 904ac14..9fbff12 100644 --- a/bsfs/query/validator.py +++ b/bsfs/query/validator.py @@ -49,7 +49,7 @@ class Filter(): """ # root_type must be a schema.Node if not isinstance(root_type, bsc.Node): - raise TypeError(f'Expected a node, found {typename(root_type)}') + raise TypeError(f'expected a node, found {typename(root_type)}') # root_type must exist in the schema if root_type not in self.schema.nodes(): raise errors.ConsistencyError(f'{root_type} is not defined in the schema') @@ -223,4 +223,125 @@ class Filter(): # FIXME: Check if node.value corresponds to type_ +class Fetch(): + """Validate a `bsfs.query.ast.fetch` query's structure and schema compliance. + + * Value can only be applied on literals + * Node can only be applied on nodes + * Names must be non-empty + * Branching nodes' predicates must match the type + * Symbols must be in the schema + * Predicates must follow the schema + + """ + + # schema to validate against. + schema: bsc.Schema + + def __init__(self, schema: bsc.Schema): + self.schema = schema + + def __call__(self, root_type: bsc.Node, query: ast.fetch.FetchExpression): + """Validate a fetch *query*, assuming the subject having *root_type*. + + Raises a `bsfs.utils.errors.ConsistencyError` if the query violates the schema. + Raises a `bsfs.utils.errors.BackendError` if the query structure is invalid. + + """ + # root_type must be a schema.Node + if not isinstance(root_type, bsc.Node): + raise TypeError(f'expected a node, found {typename(root_type)}') + # root_type must exist in the schema + if root_type not in self.schema.nodes(): + raise errors.ConsistencyError(f'{root_type} is not defined in the schema') + # query must be a FetchExpression + if not isinstance(query, ast.fetch.FetchExpression): + raise TypeError(f'expected a fetch expression, found {typename(query)}') + # check root expression + self._parse_fetch_expression(root_type, query) + # all tests passed + return True + + def _parse_fetch_expression(self, type_: bsc.Vertex, node: ast.fetch.FetchExpression): + """Route *node* to the handler of the respective FetchExpression subclass.""" + if isinstance(node, (ast.fetch.Fetch, ast.fetch.Value, ast.fetch.Node)): + # NOTE: don't return so that checks below are executed + self._branch(type_, node) + if isinstance(node, (ast.fetch.Value, ast.fetch.Node)): + # NOTE: don't return so that checks below are executed + self._named(type_, node) + if isinstance(node, ast.fetch.All): + return self._all(type_, node) + if isinstance(node, ast.fetch.Fetch): + return self._fetch(type_, node) + if isinstance(node, ast.fetch.Value): + return self._value(type_, node) + if isinstance(node, ast.fetch.Node): + return self._node(type_, node) + if isinstance(node, ast.fetch.This): + return self._this(type_, node) + # invalid node + raise errors.BackendError(f'expected fetch expression, found {node}') + + def _all(self, type_: bsc.Vertex, node: ast.fetch.All): + # check child expressions + for expr in node: + self._parse_fetch_expression(type_, expr) + + def _branch(self, type_: bsc.Vertex, node: ast.fetch._Branch): + # type is a node + if not isinstance(type_, bsc.Node): + raise errors.ConsistencyError(f'expected a Node, found {type_}') + # node exists in the schema + if type_ not in self.schema.nodes(): + raise errors.ConsistencyError(f'node {type_} is not in the schema') + # predicate exists in the schema + if not self.schema.has_predicate(node.predicate): + raise errors.ConsistencyError(f'predicate {node.predicate} is not in the schema') + pred = self.schema.predicate(node.predicate) + # type_ must be a subclass of domain + if not type_ <= pred.domain: + raise errors.ConsistencyError( + f'expected type {pred.domain} or subtype thereof, found {type_}') + + def _fetch(self, type_: bsc.Vertex, node: ast.fetch.Fetch): # pylint: disable=unused-argument # type_ was considered in _branch + # range must be a node + rng = self.schema.predicate(node.predicate).range + if not isinstance(rng, bsc.Node): + raise errors.ConsistencyError( + f'expected the predicate\'s range to be a Node, found {rng}') + # child expression must be valid + self._parse_fetch_expression(rng, node.expr) + + def _named(self, type_: bsc.Vertex, node: ast.fetch._Named): # pylint: disable=unused-argument # type_ was considered in _branch + # name must be set + if node.name.strip() == '': + raise errors.BackendError('node name cannot be empty') + # FIXME: check for double name use? + + def _node(self, type_: bsc.Vertex, node: ast.fetch.Node): # pylint: disable=unused-argument # type_ was considered in _branch + # range must be a node + rng = self.schema.predicate(node.predicate).range + if not isinstance(rng, bsc.Node): + raise errors.ConsistencyError( + f'expected the predicate\'s range to be a Node, found {rng}') + + def _value(self, type_: bsc.Vertex, node: ast.fetch.Value): # pylint: disable=unused-argument # type_ was considered in _branch + # range must be a literal + rng = self.schema.predicate(node.predicate).range + if not isinstance(rng, bsc.Literal): + raise errors.ConsistencyError( + f'expected the predicate\'s range to be a Literal, found {rng}') + + def _this(self, type_: bsc.Vertex, node: ast.fetch.This): + # type is a node + if not isinstance(type_, bsc.Node): + raise errors.ConsistencyError(f'expected a Node, found {type_}') + # node exists in the schema + if type_ not in self.schema.nodes(): + raise errors.ConsistencyError(f'node {type_} is not in the schema') + # name must be set + if node.name.strip() == '': + raise errors.BackendError('node name cannot be empty') + ## EOF ## diff --git a/test/query/test_validator.py b/test/query/test_validator.py index dc9d913..fec3d23 100644 --- a/test/query/test_validator.py +++ b/test/query/test_validator.py @@ -14,7 +14,7 @@ from bsfs.query import ast from bsfs.utils import errors # objects to test -from bsfs.query.validator import Filter +from bsfs.query.validator import Filter, Fetch ## code ## @@ -286,6 +286,219 @@ class TestFilter(unittest.TestCase): self.assertIsNone(self.validate._distance(self.schema.literal(ns.bsfs.Colors), ast.filter.Distance([1,2,3,4,5], 1, False))) +class TestFetch(unittest.TestCase): + def setUp(self): + self.schema = _schema.from_string(''' + prefix rdfs: + prefix xsd: + + prefix bsfs: + prefix bse: + + bsfs:Entity rdfs:subClassOf bsfs:Node . + bsfs:Tag rdfs:subClassOf bsfs:Node . + xsd:string rdfs:subClassOf bsfs:Literal . + + bse:filename rdfs:subClassOf bsfs:Predicate ; + rdfs:domain bsfs:Node ; + rdfs:range xsd:string . + + bse:tag rdfs:subClassOf bsfs:Predicate ; + rdfs:domain bsfs:Entity ; + rdfs:range bsfs:Tag . + + bse:label rdfs:subClassOf bsfs:Predicate ; + rdfs:domain bsfs:Tag ; + rdfs:range xsd:string . + + ''') + self.validate = Fetch(self.schema) + + def test_call(self): + # call accepts correct expressions + self.assertTrue(self.validate(self.schema.node(ns.bsfs.Entity), + ast.fetch.Fetch(ns.bse.tag, ast.fetch.Value(ns.bse.label, 'value')))) + self.assertTrue(self.validate(self.schema.node(ns.bsfs.Entity), + ast.fetch.Fetch(ns.bse.tag, ast.fetch.This('this')))) + self.assertTrue(self.validate(self.schema.node(ns.bsfs.Entity), + ast.fetch.This('this'))) + self.assertTrue(self.validate(self.schema.node(ns.bsfs.Entity), + ast.fetch.All(ast.fetch.This('this'), ast.fetch.Node(ns.bse.tag, 'node'), ast.fetch.Value(ns.bse.filename, 'value')))) + # type must be a Node + self.assertRaises(TypeError, self.validate, 1234, ast.fetch.This('this')) + self.assertRaises(TypeError, self.validate, 'foobar', ast.fetch.This('this')) + self.assertRaises(TypeError, self.validate, self.schema.literal(ns.bsfs.Literal), ast.fetch.This('this')) + # type must be in the schema + self.assertRaises(errors.ConsistencyError, self.validate, self.schema.node(ns.bsfs.Node).child(ns.bsfs.Invalid), + ast.fetch.FetchExpression()) + # expression must be a fetch expression + self.assertRaises(TypeError, self.validate, self.schema.node(ns.bsfs.Entity), 1234) + self.assertRaises(TypeError, self.validate, self.schema.node(ns.bsfs.Entity), 'hello') + self.assertRaises(TypeError, self.validate, self.schema.node(ns.bsfs.Entity), ast.filter.FilterExpression()) + # expression must be valid + self.assertRaises(errors.ConsistencyError, self.validate, self.schema.node(ns.bsfs.Entity), + ast.fetch.Fetch(ns.bse.tag, ast.fetch.Node(ns.bse.label, 'node'))) + self.assertRaises(errors.ConsistencyError, self.validate, self.schema.node(ns.bsfs.Entity), + ast.fetch.Value(ns.bse.tag, 'value')) + + def test_routing(self): + # Node passes _branch, _named, and _node checks + self.assertRaises(errors.ConsistencyError, self.validate._parse_fetch_expression, self.schema.node(ns.bsfs.Node), + ast.fetch.Node(ns.bse.tag, 'node')) # fails in _branch + self.assertRaises(errors.BackendError, self.validate._parse_fetch_expression, self.schema.node(ns.bsfs.Entity), + ast.fetch.Node(ns.bse.tag, '')) # fails in _named + self.assertRaises(errors.ConsistencyError, self.validate._parse_fetch_expression, self.schema.node(ns.bsfs.Entity), + ast.fetch.Node(ns.bse.label, 'node')) # fails in _node + # Value passes _branch, _named, and _value checks + self.assertRaises(errors.ConsistencyError, self.validate._parse_fetch_expression, self.schema.node(ns.bsfs.Node), + ast.fetch.Value(ns.bse.label, 'value')) # fails in _branch + self.assertRaises(errors.BackendError, self.validate._parse_fetch_expression, self.schema.node(ns.bsfs.Entity), + ast.fetch.Value(ns.bse.filename, '')) # fails in _named + self.assertRaises(errors.ConsistencyError, self.validate._parse_fetch_expression, self.schema.node(ns.bsfs.Entity), + ast.fetch.Value(ns.bse.tag, 'value')) # fails in _value + # Fetch passes _branch and _fetch checks + self.assertRaises(errors.ConsistencyError, self.validate._parse_fetch_expression, self.schema.node(ns.bsfs.Node), + ast.fetch.Fetch(ns.bse.tag, ast.fetch.This('this'))) # fails in _branch + self.assertRaises(errors.ConsistencyError, self.validate._parse_fetch_expression, self.schema.node(ns.bsfs.Node), + ast.fetch.Fetch(ns.bse.filename, ast.fetch.This('this'))) # fails in _fetch + # invalid expressions cannot be parsed + type_ = self.schema.node(ns.bsfs.Node) + self.assertRaises(errors.BackendError, self.validate._parse_fetch_expression, type_, + ast.filter.FilterExpression()) + self.assertRaises(errors.BackendError, self.validate._parse_fetch_expression, type_, + 1234) + self.assertRaises(errors.BackendError, self.validate._parse_fetch_expression, type_, + 'hello world') + + def test_all(self): + # all accepts correct expressions + self.assertIsNone(self.validate._all(self.schema.node(ns.bsfs.Entity), + ast.fetch.All(ast.fetch.Value(ns.bse.filename, 'value'), ast.fetch.Node(ns.bse.tag, 'node')))) + # child expressions must be valid + self.assertRaises(errors.ConsistencyError, self.validate._all, self.schema.node(ns.bsfs.Entity), + ast.fetch.All(ast.fetch.Value(ns.bse.tag, 'value'))) + self.assertRaises(errors.ConsistencyError, self.validate._all, self.schema.node(ns.bsfs.Entity), + ast.fetch.All(ast.fetch.Value(ns.bse.filename, 'value'), ast.fetch.Node(ns.bse.filename, 'node'))) + self.assertRaises(errors.ConsistencyError, self.validate._all, self.schema.node(ns.bsfs.Entity), + ast.fetch.All(ast.fetch.Value(ns.bse.tag, 'value'), ast.fetch.Node(ns.bse.tag, 'node'))) + self.assertRaises(errors.ConsistencyError, self.validate._all, self.schema.node(ns.bsfs.Entity), + ast.fetch.All(ast.fetch.Value(ns.bse.tag, 'value'), ast.fetch.Node(ns.bse.filename, 'node'))) + + def test_branch(self): + # branch accepts correct expressions + self.assertIsNone(self.validate._branch(self.schema.node(ns.bsfs.Entity), + ast.fetch._Branch(ns.bse.filename))) + self.assertIsNone(self.validate._branch(self.schema.node(ns.bsfs.Entity), + ast.fetch.Fetch(ns.bse.tag, ast.fetch.This('this')))) + self.assertIsNone(self.validate._branch(self.schema.node(ns.bsfs.Entity), + ast.fetch.Value(ns.bse.filename, 'value'))) + self.assertIsNone(self.validate._branch(self.schema.node(ns.bsfs.Entity), + ast.fetch.Node(ns.bse.tag, 'node'))) + # type must be a node + self.assertRaises(errors.ConsistencyError, self.validate._branch, self.schema.literal(ns.bsfs.Literal), + ast.fetch._Branch(ns.bse.filename)) + self.assertRaises(errors.ConsistencyError, self.validate._branch, self.schema.literal(ns.bsfs.Literal), + ast.fetch.Fetch(ns.bse.tag, ast.fetch.This('this'))) + self.assertRaises(errors.ConsistencyError, self.validate._branch, self.schema.literal(ns.bsfs.Literal), + ast.fetch.Value(ns.bse.filename, 'value')) + self.assertRaises(errors.ConsistencyError, self.validate._branch, self.schema.literal(ns.bsfs.Literal), + ast.fetch.Node(ns.bse.tag, 'node')) + # type must be in the schema + self.assertRaises(errors.ConsistencyError, self.validate._branch, self.schema.node(ns.bsfs.Node).child(ns.bsfs.Invalid), + ast.fetch._Branch(ns.bse.filename)) + self.assertRaises(errors.ConsistencyError, self.validate._branch, self.schema.node(ns.bsfs.Node).child(ns.bsfs.Invalid), + ast.fetch.Fetch(ns.bse.tag, ast.fetch.This('this'))) + self.assertRaises(errors.ConsistencyError, self.validate._branch, self.schema.node(ns.bsfs.Node).child(ns.bsfs.Invalid), + ast.fetch.Value(ns.bse.filename, 'value')) + self.assertRaises(errors.ConsistencyError, self.validate._branch, self.schema.node(ns.bsfs.Node).child(ns.bsfs.Invalid), + ast.fetch.Node(ns.bse.tag, 'node')) + # predicate must be in the schema + self.assertRaises(errors.ConsistencyError, self.validate._branch, self.schema.node(ns.bsfs.Entity), + ast.fetch._Branch(ns.bse.invalid)) + self.assertRaises(errors.ConsistencyError, self.validate._branch, self.schema.node(ns.bsfs.Entity), + ast.fetch.Fetch(ns.bse.invalid, ast.fetch.This('this'))) + self.assertRaises(errors.ConsistencyError, self.validate._branch, self.schema.node(ns.bsfs.Entity), + ast.fetch.Value(ns.bse.invalid, 'value')) + self.assertRaises(errors.ConsistencyError, self.validate._branch, self.schema.node(ns.bsfs.Entity), + ast.fetch.Node(ns.bse.invalid, 'node')) + # predicate's domain must be related to the type + self.assertRaises(errors.ConsistencyError, self.validate._branch, self.schema.node(ns.bsfs.Entity), + ast.fetch._Branch(ns.bse.label)) + self.assertRaises(errors.ConsistencyError, self.validate._branch, self.schema.node(ns.bsfs.Entity), + ast.fetch.Fetch(ns.bse.label, ast.fetch.This('this'))) + self.assertRaises(errors.ConsistencyError, self.validate._branch, self.schema.node(ns.bsfs.Entity), + ast.fetch.Node(ns.bse.label, 'node')) + self.assertRaises(errors.ConsistencyError, self.validate._branch, self.schema.node(ns.bsfs.Entity), + ast.fetch.Value(ns.bse.label, 'value')) + # predicate's domain cannot be a supertype + self.assertRaises(errors.ConsistencyError, self.validate._branch, self.schema.node(ns.bsfs.Node), + ast.fetch._Branch(ns.bse.tag)) + self.assertRaises(errors.ConsistencyError, self.validate._branch, self.schema.node(ns.bsfs.Node), + ast.fetch.Fetch(ns.bse.tag, ast.fetch.This('this'))) + self.assertRaises(errors.ConsistencyError, self.validate._branch, self.schema.node(ns.bsfs.Node), + ast.fetch.Node(ns.bse.tag, 'node')) + self.assertRaises(errors.ConsistencyError, self.validate._branch, self.schema.node(ns.bsfs.Node), + ast.fetch.Value(ns.bse.tag, 'value')) + # predicate's domain can be a subtype + self.assertIsNone(self.validate._branch(self.schema.node(ns.bsfs.Entity), + ast.fetch._Branch(ns.bse.filename))) + self.assertIsNone(self.validate._branch(self.schema.node(ns.bsfs.Entity), + ast.fetch.Value(ns.bse.filename, 'value'))) + + def test_fetch(self): + # fetch accepts correct expressions + self.assertIsNone(self.validate._fetch(self.schema.node(ns.bsfs.Node), + ast.fetch.Fetch(ns.bse.tag, ast.fetch.Value(ns.bse.label, 'value')))) + # range must be a node + self.assertRaises(errors.ConsistencyError, self.validate._fetch, self.schema.node(ns.bsfs.Node), + ast.fetch.Fetch(ns.bse.filename, ast.fetch.This('this'))) + # child expression must be valid + self.assertRaises(errors.ConsistencyError, self.validate._fetch, self.schema.node(ns.bsfs.Node), + ast.fetch.Fetch(ns.bse.tag, ast.fetch.Node(ns.bse.label, 'node'))) + + def test_named(self): + # named accepts correct expressions + self.assertIsNone(self.validate._named(self.schema.node(ns.bsfs.Entity), + ast.fetch.Node(ns.bse.tag, 'node'))) + self.assertIsNone(self.validate._named(self.schema.node(ns.bsfs.Entity), + ast.fetch.Value(ns.bse.filename, 'value'))) + # name must be non-empty + self.assertRaises(errors.BackendError, self.validate._named, self.schema.node(ns.bsfs.Entity), + ast.fetch.Node(ns.bse.tag, '')) + self.assertRaises(errors.BackendError, self.validate._named, self.schema.node(ns.bsfs.Entity), + ast.fetch.Value(ns.bse.filename, '')) + + def test_node(self): + # node accepts correct expressions + self.assertIsNone(self.validate._node(self.schema.node(ns.bsfs.Entity), + ast.fetch.Node(ns.bse.tag, 'node'))) + # range must be a node + self.assertRaises(errors.ConsistencyError, self.validate._node, self.schema.node(ns.bsfs.Entity), + ast.fetch.Node(ns.bse.filename, 'node')) + + def test_value(self): + # value accepts correct expressions + self.assertIsNone(self.validate._value(self.schema.node(ns.bsfs.Entity), + ast.fetch.Value(ns.bse.filename, 'value'))) + # range must be a literal + self.assertRaises(errors.ConsistencyError, self.validate._value, self.schema.node(ns.bsfs.Entity), + ast.fetch.Value(ns.bse.tag, 'value')) + + def test_this(self): + # this accepts correct expressions + self.assertIsNone(self.validate._this(self.schema.node(ns.bsfs.Entity), ast.fetch.This('this'))) + # type must be a node + self.assertRaises(errors.ConsistencyError, self.validate._this, self.schema.literal(ns.bsfs.Literal), + ast.fetch.This('this')) + self.assertRaises(errors.ConsistencyError, self.validate._this, self.schema.predicate(ns.bsfs.Predicate), + ast.fetch.This('this')) + # type must be in the schema + self.assertRaises(errors.ConsistencyError, self.validate._this, self.schema.node(ns.bsfs.Node).child(ns.bsfs.Invalid), + ast.fetch.This('this')) + # name must be non-empty + self.assertRaises(errors.BackendError, self.validate._this, self.schema.node(ns.bsfs.Entity), ast.fetch.This('')) + + ## main ## if __name__ == '__main__': -- cgit v1.2.3 From 965f4dfe41afd552ed6477c75e1286c14e3580f6 Mon Sep 17 00:00:00 2001 From: Matthias Baumgartner Date: Sat, 21 Jan 2023 16:31:08 +0100 Subject: Fetch in triple stores: * fetch interface * sparql fetch ast parser * sparql fetch implementation --- bsfs/triple_store/base.py | 33 ++-- bsfs/triple_store/sparql/parse_fetch.py | 109 +++++++++++ bsfs/triple_store/sparql/parse_filter.py | 45 ++--- bsfs/triple_store/sparql/sparql.py | 50 ++++- bsfs/triple_store/sparql/utils.py | 141 ++++++++++++++ test/triple_store/sparql/test_parse_fetch.py | 263 ++++++++++++++++++++++++++ test/triple_store/sparql/test_parse_filter.py | 150 +++++++-------- test/triple_store/sparql/test_sparql.py | 70 +++++++ test/triple_store/sparql/test_utils.py | 155 +++++++++++++++ test/triple_store/test_base.py | 3 + 10 files changed, 898 insertions(+), 121 deletions(-) create mode 100644 bsfs/triple_store/sparql/parse_fetch.py create mode 100644 bsfs/triple_store/sparql/utils.py create mode 100644 test/triple_store/sparql/test_parse_fetch.py create mode 100644 test/triple_store/sparql/test_utils.py diff --git a/bsfs/triple_store/base.py b/bsfs/triple_store/base.py index 7e03714..1baa63b 100644 --- a/bsfs/triple_store/base.py +++ b/bsfs/triple_store/base.py @@ -11,7 +11,7 @@ import typing # inner-module imports from bsfs.query import ast from bsfs.utils import URI, typename -import bsfs.schema as _schema +import bsfs.schema as bsc # exports __all__: typing.Sequence[str] = ( @@ -82,12 +82,12 @@ class TripleStoreBase(abc.ABC): @property @abc.abstractmethod - def schema(self) -> _schema.Schema: + def schema(self) -> bsc.Schema: """Return the store's local schema.""" @schema.setter @abc.abstractmethod - def schema(self, schema: _schema.Schema): + def schema(self, schema: bsc.Schema): """Migrate to new schema by adding or removing class definitions. Commits before and after the migration. @@ -112,17 +112,28 @@ class TripleStoreBase(abc.ABC): @abc.abstractmethod def get( self, - node_type: _schema.Node, - query: typing.Optional[ast.filter.FilterExpression] = None, + node_type: bsc.Node, + filter: typing.Optional[ast.filter.FilterExpression] = None, # pylint: disable=redefined-builtin ) -> typing.Iterator[URI]: - """Return guids of nodes of type *node_type* that match the *query*. - Return all guids of the respective type if *query* is None. + """Return guids of nodes of type *node_type* that match the *filter*. + Return all guids of the respective type if *filter* is None. + """ + + @abc.abstractmethod + def fetch( + self, + node_type: bsc.Node, + filter: ast.filter.FilterExpression, # pylint: disable=redefined-builtin + fetch: ast.fetch.FetchExpression, + ) -> typing.Iterator[typing.Tuple[URI, str, typing.Any]]: + """Return (guid, name, value) triples where the guid is determined by the *filter* + query and the name matches the *fetch* query. """ @abc.abstractmethod def exists( self, - node_type: _schema.Node, + node_type: bsc.Node, guids: typing.Iterable[URI], ) -> typing.Iterable[URI]: """Return those *guids* that exist and have type *node_type* or a subclass thereof.""" @@ -130,7 +141,7 @@ class TripleStoreBase(abc.ABC): @abc.abstractmethod def create( self, - node_type: _schema.Node, + node_type: bsc.Node, guids: typing.Iterable[URI], ): """Create *guid* nodes with type *subject*.""" @@ -138,9 +149,9 @@ class TripleStoreBase(abc.ABC): @abc.abstractmethod def set( self, - node_type: _schema.Node, # FIXME: is the node_type even needed? Couldn't I infer from the predicate? + node_type: bsc.Node, # FIXME: is the node_type even needed? Couldn't I infer from the predicate? guids: typing.Iterable[URI], - predicate: _schema.Predicate, + predicate: bsc.Predicate, values: typing.Iterable[typing.Any], ): """Add triples to the graph. diff --git a/bsfs/triple_store/sparql/parse_fetch.py b/bsfs/triple_store/sparql/parse_fetch.py new file mode 100644 index 0000000..20d4e74 --- /dev/null +++ b/bsfs/triple_store/sparql/parse_fetch.py @@ -0,0 +1,109 @@ +""" + +Part of the BlackStar filesystem (bsfs) module. +A copy of the license is provided with the project. +Author: Matthias Baumgartner, 2022 +""" +# standard imports +import typing + +# bsfs imports +from bsfs import schema as bsc +from bsfs.query import ast +from bsfs.utils import errors + +# inner-module imports +from .utils import GenHopName, Query + +# exports +__all__: typing.Sequence[str] = ( + 'Fetch', + ) + + +## code ## + +class Fetch(): + """Translate `bsfs.query.ast.fetch` structures into Sparql queries.""" + + def __init__(self, schema): + self.schema = schema + self.ngen = GenHopName(prefix='?fch') + + def __call__( + self, + root_type: bsc.Node, + root: ast.fetch.FetchExpression, + ) -> Query: + """ + """ + # check root_type + if not isinstance(root_type, bsc.Node): + raise errors.BackendError(f'expected Node, found {root_type}') + if root_type not in self.schema.nodes(): + raise errors.ConsistencyError(f'node {root_type} is not in the schema') + # parse root + terms, expr = self._parse_fetch_expression(root_type, root, '?ent') + # assemble query + return Query( + root_type=root_type.uri, + root_head='?ent', + select=terms, + where=expr, + ) + + def _parse_fetch_expression( + self, + node_type: bsc.Vertex, + node: ast.fetch.FetchExpression, + head: str, + ): + """Route *node* to the handler of the respective FetchExpression subclass.""" + if isinstance(node, ast.fetch.All): + return self._all(node_type, node, head) + if isinstance(node, ast.fetch.Fetch): + return self._fetch(node_type, node, head) + if isinstance(node, ast.fetch.Node): + return self._node(node_type, node, head) + if isinstance(node, ast.fetch.Value): + return self._value(node_type, node, head) + if isinstance(node, ast.fetch.This): + return self._this(node_type, node, head) + # invalid node + raise errors.BackendError(f'expected fetch expression, found {node}') + + def _all(self, node_type: bsc.Vertex, node: ast.fetch.All, head: str): + # child expressions + terms, exprs = zip(*[self._parse_fetch_expression(node_type, expr, head) for expr in node]) + terms = {term for sub in terms for term in sub} + exprs = ' .\n'.join({expr for expr in exprs if len(expr.strip()) > 0}) + return terms, exprs + + def _fetch(self, node_type: bsc.Vertex, node: ast.fetch.Fetch, head: str): # pylint: disable=unused-argument # (node_type) + # child expressions + rng = self.schema.predicate(node.predicate).range + nexthead = next(self.ngen) + terms, expr = self._parse_fetch_expression(rng, node.expr, nexthead) + return terms, f'OPTIONAL{{ {head} <{node.predicate}> {nexthead} .\n {expr} }}' + + def _node(self, node_type: bsc.Vertex, node: ast.fetch.Node, head: str): # pylint: disable=unused-argument # (node_type) + if f'?{node.name}'.startswith(self.ngen.prefix): + raise errors.BackendError(f'Node name must start with {self.ngen.prefix}') + # compose and return statement + term = next(self.ngen) + return {(term, node.name)}, f'OPTIONAL{{ {head} <{node.predicate}> {term} }}' + + def _value(self, node_type: bsc.Vertex, node: ast.fetch.Value, head: str): # pylint: disable=unused-argument # (node_type) + if f'?{node.name}'.startswith(self.ngen.prefix): + raise errors.BackendError(f'Value name must start with {self.ngen.prefix}') + # compose and return statement + term = next(self.ngen) + return {(term, node.name)}, f'OPTIONAL{{ {head} <{node.predicate}> {term} }}' + + def _this(self, node_type: bsc.Vertex, node: ast.fetch.This, head: str): # pylint: disable=unused-argument # (node_type) + if f'?{node.name}'.startswith(self.ngen.prefix): + raise errors.BackendError(f'This name must start with {self.ngen.prefix}') + # compose and return statement + return {(head, node.name)}, '' + +## EOF ## diff --git a/bsfs/triple_store/sparql/parse_filter.py b/bsfs/triple_store/sparql/parse_filter.py index 8b6b976..dca0aea 100644 --- a/bsfs/triple_store/sparql/parse_filter.py +++ b/bsfs/triple_store/sparql/parse_filter.py @@ -19,6 +19,7 @@ from bsfs.utils import URI, errors # inner-module imports from .distance import DISTANCE_FU +from .utils import GenHopName, Query # exports __all__: typing.Sequence[str] = ( @@ -28,25 +29,6 @@ __all__: typing.Sequence[str] = ( ## code ## -class _GenHopName(): - """Generator that produces a new unique symbol name with each iteration.""" - - # Symbol name prefix. - prefix: str - - # Current counter. - curr: int - - def __init__(self, prefix: str = '?hop', start: int = 0): - self.prefix = prefix - self.curr = start - 1 - - def __next__(self): - """Generate and return the next unique name.""" - self.curr += 1 - return self.prefix + str(self.curr) - - class Filter(): """Translate `bsfs.query.ast.filter` structures into Sparql queries.""" @@ -54,18 +36,18 @@ class Filter(): schema: bsc.Schema # Generator that produces unique symbol names. - ngen: _GenHopName + ngen: GenHopName def __init__(self, graph, schema): self.graph = graph self.schema = schema - self.ngen = _GenHopName() + self.ngen = GenHopName(prefix='?flt') def __call__( self, root_type: bsc.Node, root: typing.Optional[ast.filter.FilterExpression] = None, - ) -> str: + ) -> Query: """ """ # check root_type @@ -79,15 +61,18 @@ class Filter(): else: cond = self._parse_filter_expression(root_type, root, '?ent') # assemble query - return f''' - SELECT ?ent - WHERE {{ - ?ent <{ns.rdf.type}>/<{ns.rdfs.subClassOf}>* <{root_type.uri}> . - {cond} - }} - ''' + return Query( + root_type=root_type.uri, + root_head='?ent', + where=cond, + ) - def _parse_filter_expression(self, type_: bsc.Vertex, node: ast.filter.FilterExpression, head: str) -> str: + def _parse_filter_expression( + self, + type_: bsc.Vertex, + node: ast.filter.FilterExpression, + head: str, + ) -> str: """Route *node* to the handler of the respective FilterExpression subclass.""" if isinstance(node, ast.filter.Is): return self._is(type_, node, head) diff --git a/bsfs/triple_store/sparql/sparql.py b/bsfs/triple_store/sparql/sparql.py index fedd227..a0dd12e 100644 --- a/bsfs/triple_store/sparql/sparql.py +++ b/bsfs/triple_store/sparql/sparql.py @@ -16,6 +16,7 @@ from bsfs.query import ast from bsfs.utils import errors, URI # inner-module imports +from . import parse_fetch from . import parse_filter from .. import base from .distance import DISTANCE_FU @@ -92,13 +93,16 @@ class SparqlStore(base.TripleStoreBase): # Filter parser _filter_parser: parse_filter.Filter + # Fetch parser + _fetch_parser: parse_fetch.Fetch + def __init__(self): super().__init__(None) self._graph = rdflib.Graph() self._transaction = _Transaction(self._graph) - # NOTE: parsing bsfs.query.ast.filter.Has requires xsd:integer. self._schema = bsc.Schema(literals={bsc.ROOT_NUMBER.child(ns.xsd.integer)}) self._filter_parser = parse_filter.Filter(self._graph, self._schema) + self._fetch_parser = parse_fetch.Fetch(self._schema) # NOTE: mypy and pylint complain about the **kwargs not being listed (contrasting super) # However, not having it here is clearer since it's explicit that there are no arguments. @@ -197,17 +201,53 @@ class SparqlStore(base.TripleStoreBase): # migrate schema self._schema = schema self._filter_parser.schema = schema + self._fetch_parser.schema = schema + + def fetch( + self, + node_type: bsc.Node, + filter: ast.filter.FilterExpression, # pylint: disable=redefined-builtin + fetch: ast.fetch.FetchExpression, + ) -> typing.Iterator[typing.Tuple[URI, str, typing.Any]]: + if node_type not in self.schema.nodes(): + raise errors.ConsistencyError(f'{node_type} is not defined in the schema') + if not isinstance(filter, ast.filter.FilterExpression): + raise TypeError(filter) + if not isinstance(fetch, ast.fetch.FetchExpression): + raise TypeError(fetch) + # compose a query from fetch and filter ast + query = self._filter_parser(node_type, filter) + query += self._fetch_parser(node_type, fetch) + # run query + emitted = set() + for result in query(self._graph): + guid = URI(result[0]) + for name, raw in zip(query.names, result[1:]): + if raw is None: # undefined value + continue + if isinstance(raw, rdflib.Literal): + value = raw.value + else: + value = URI(raw) + # emit triple + triple = (guid, name, value) + if triple not in emitted: # FIXME: needs a better solution! + emitted.add(triple) + yield guid, name, value def get( self, node_type: bsc.Node, - query: typing.Optional[ast.filter.FilterExpression] = None, + filter: typing.Optional[ast.filter.FilterExpression] = None, # pylint: disable=redefined-builtin ) -> typing.Iterator[URI]: if node_type not in self.schema.nodes(): raise errors.ConsistencyError(f'{node_type} is not defined in the schema') - if not isinstance(query, ast.filter.FilterExpression): - raise TypeError(query) - for guid, in self._graph.query(self._filter_parser(node_type, query)): + if not isinstance(filter, ast.filter.FilterExpression): + raise TypeError(filter) + # compose query + query = self._filter_parser(node_type, filter) + # run query + for guid, in query(self._graph): yield URI(guid) def _has_type(self, subject: URI, node_type: bsc.Node) -> bool: diff --git a/bsfs/triple_store/sparql/utils.py b/bsfs/triple_store/sparql/utils.py new file mode 100644 index 0000000..deca4d8 --- /dev/null +++ b/bsfs/triple_store/sparql/utils.py @@ -0,0 +1,141 @@ +""" + +Part of the BlackStar filesystem (bsfs) module. +A copy of the license is provided with the project. +Author: Matthias Baumgartner, 2022 +""" +# standard imports +import typing + +# external imports +import rdflib + +# bsfs imports +from bsfs.namespace import ns +from bsfs.utils import typename + +# exports +__all__: typing.Sequence[str] = ( + 'GenHopName', + 'Query', + ) + + +## code ## + +class GenHopName(): + """Generator that produces a new unique symbol name with each iteration.""" + + # Symbol name prefix. + prefix: str + + # Current counter. + curr: int + + def __init__(self, prefix: str = '?hop', start: int = 0): + self.prefix = prefix + self.curr = start - 1 + + def __next__(self): + """Generate and return the next unique name.""" + self.curr += 1 + return self.prefix + str(self.curr) + + +class Query(): + """Hold, manage, and complete partial Sparql queries.""" + + # root node type URI. + root_type: str + + # root node variable name. + root_head: str + + # (head, name) tuples (w/o root) + select: typing.Tuple[typing.Tuple[str, str], ...] + + # where statements. + where: str + + def __init__( + self, + root_type: str, + root_head: str = '?ent', + select: typing.Optional[typing.Iterable[typing.Tuple[str, str]]] = None, + where: typing.Optional[str] = None, + ): + # check arguments + if select is None: + select = [] + if where is None: + where = '' + # set members + self.root_type = root_type + self.root_head = root_head + self.select = tuple(select) # tuple ensures presistent order + self.where = where.strip() + + def __str__(self) -> str: + return self.query + + def __repr__(self) -> str: + return f'{typename(self)}({self.root_type}, {self.root_head}, {self.select}, {self.where})' + + def __eq__(self, other: typing.Any) -> bool: + return isinstance(other, type(self)) \ + and self.root_type == other.root_type \ + and self.root_head == other.root_head \ + and self.select == other.select \ + and self.where == other.where + + def __hash__(self) -> int: + return hash((type(self), self.root_type, self.root_head, self.select, self.where)) + + def __add__(self, other: typing.Any) -> 'Query': + # check other's type + if not isinstance(other, type(self)): + return NotImplemented + # check query compatibility + if not self.root_type == other.root_type: + raise ValueError(other) + if not self.root_head == other.root_head: + raise ValueError(other) + # combine selections + select = self.select + other.select + # combine conditions + conds = [] + if self.where != '': + conds.append(self.where) + if other.where != '': + conds.append(other.where) + where = ' . '.join(conds) + # return new query + return Query( + root_type=self.root_type, + root_head=self.root_head, + select=select, + where=where, + ) + + @property + def names(self) -> typing.Tuple[str, ...]: + """Return a tuple of selected variable names, excluding the root.""" + return tuple(name for _, name in self.select) + + @property + def query(self) -> str: + """Return an executable sparql query.""" + select = ' '.join(f'({head} as ?{name})' for head, name in self.select) + return f''' + SELECT {self.root_head} {select} + WHERE {{ + {self.root_head} <{ns.rdf.type}>/<{ns.rdfs.subClassOf}>* <{self.root_type}> . + {self.where} + }} + ''' + + def __call__(self, graph: rdflib.Graph) -> rdflib.query.Result: + """Execute the query on a *graph* and return the query result.""" + return graph.query(self.query) + +## EOF ## diff --git a/test/triple_store/sparql/test_parse_fetch.py b/test/triple_store/sparql/test_parse_fetch.py new file mode 100644 index 0000000..0961789 --- /dev/null +++ b/test/triple_store/sparql/test_parse_fetch.py @@ -0,0 +1,263 @@ +""" + +Part of the bsfs test suite. +A copy of the license is provided with the project. +Author: Matthias Baumgartner, 2022 +""" +# imports +import rdflib +import unittest + +# bsie imports +from bsfs import schema +from bsfs.namespace import Namespace, ns +from bsfs.query import ast +from bsfs.utils import errors, URI + +# objects to test +from bsfs.triple_store.sparql.parse_fetch import Fetch + + +## code ## + +bsfs = Namespace('http://bsfs.ai/schema', fsep='/') +bse = Namespace('http://bsfs.ai/schema/Entity') +bst = Namespace('http://bsfs.ai/schema/Tag') +bsc = Namespace('http://bsfs.ai/schema/Collection') + +class TestParseFetch(unittest.TestCase): + + def setUp(self): + self.schema = schema.from_string(''' + prefix rdfs: + prefix xsd: + + prefix bsfs: + prefix bse: + prefix bst: + prefix bsc: + + # nodes + bsfs:Entity rdfs:subClassOf bsfs:Node . + bsfs:Tag rdfs:subClassOf bsfs:Node . + bsfs:Collection rdfs:subClassOf bsfs:Node . + + # literals + xsd:integer rdfs:subClassOf bsfs:Literal . + xsd:string rdfs:subClassOf bsfs:Literal . + + # predicates + bse:tag rdfs:subClassOf bsfs:Predicate ; + rdfs:domain bsfs:Entity ; + rdfs:range bsfs:Tag . + + bse:collection rdfs:subClassOf bsfs:Predicate ; + rdfs:domain bsfs:Entity ; + rdfs:range bsfs:Collection . + + bse:filename rdfs:subClassOf bsfs:Predicate ; + rdfs:domain bsfs:Entity ; + rdfs:range xsd:string . + + bse:rank rdfs:subClassOf bsfs:Predicate ; + rdfs:domain bsfs:Entity ; + rdfs:range xsd:integer . + + bst:main rdfs:subClassOf bsfs:Predicate ; + rdfs:domain bsfs:Tag ; + rdfs:range bsfs:Entity . + + bst:label rdfs:subClassOf bsfs:Predicate ; + rdfs:domain bsfs:Tag ; + rdfs:range xsd:string . + + bsc:tag rdfs:subClassOf bsfs:Predicate ; + rdfs:domain bsfs:Collection ; + rdfs:range bsfs:Tag . + + bsc:label rdfs:subClassOf bsfs:Predicate ; + rdfs:domain bsfs:Collection ; + rdfs:range xsd:string . + + bsc:rating rdfs:subClassOf bsfs:Predicate ; + rdfs:domain bsfs:Collection ; + rdfs:range xsd:integer . + + ''') + + # graph to test queries + self.graph = rdflib.Graph() + # schema hierarchies + self.graph.add((rdflib.URIRef('http://bsfs.ai/schema/Entity'), rdflib.RDFS.subClassOf, rdflib.URIRef('http://bsfs.ai/schema/Node'))) + self.graph.add((rdflib.URIRef('http://bsfs.ai/schema/Collection'), rdflib.RDFS.subClassOf, rdflib.URIRef('http://bsfs.ai/schema/Node'))) + self.graph.add((rdflib.URIRef('http://bsfs.ai/schema/Tag'), rdflib.RDFS.subClassOf, rdflib.URIRef('http://bsfs.ai/schema/Node'))) + # entities + self.graph.add((rdflib.URIRef('http://example.com/entity#1234'), rdflib.RDF.type, rdflib.URIRef('http://bsfs.ai/schema/Entity'))) + self.graph.add((rdflib.URIRef('http://example.com/entity#4321'), rdflib.RDF.type, rdflib.URIRef('http://bsfs.ai/schema/Entity'))) + # tags + self.graph.add((rdflib.URIRef('http://example.com/tag#1234'), rdflib.RDF.type, rdflib.URIRef('http://bsfs.ai/schema/Tag'))) + self.graph.add((rdflib.URIRef('http://example.com/tag#4321'), rdflib.RDF.type, rdflib.URIRef('http://bsfs.ai/schema/Tag'))) + # collections + self.graph.add((rdflib.URIRef('http://example.com/collection#1234'), rdflib.RDF.type, rdflib.URIRef('http://bsfs.ai/schema/Collection'))) + self.graph.add((rdflib.URIRef('http://example.com/collection#4321'), rdflib.RDF.type, rdflib.URIRef('http://bsfs.ai/schema/Collection'))) + # entity literals + self.graph.add((rdflib.URIRef('http://example.com/entity#1234'), rdflib.URIRef(bse.rank), rdflib.Literal('1234', datatype=rdflib.XSD.integer))) + self.graph.add((rdflib.URIRef('http://example.com/entity#1234'), rdflib.URIRef(bse.filename), rdflib.Literal('filename_1234', datatype=rdflib.XSD.string))) + #self.graph.add((rdflib.URIRef('http://example.com/entity#4321'), rdflib.URIRef(bse.rank), rdflib.Literal('4321', datatype=rdflib.XSD.integer))) + self.graph.add((rdflib.URIRef('http://example.com/entity#4321'), rdflib.URIRef(bse.filename), rdflib.Literal('filename_4321', datatype=rdflib.XSD.string))) + # tag literals + self.graph.add((rdflib.URIRef('http://example.com/tag#1234'), rdflib.URIRef(bst.label), rdflib.Literal('tag_label_1234', datatype=rdflib.XSD.string))) + self.graph.add((rdflib.URIRef('http://example.com/tag#4321'), rdflib.URIRef(bst.label), rdflib.Literal('tag_label_4321', datatype=rdflib.XSD.string))) + # collection literals + self.graph.add((rdflib.URIRef('http://example.com/collection#1234'), rdflib.URIRef(bsc.label), rdflib.Literal('collection_label_1234', datatype=rdflib.XSD.string))) + self.graph.add((rdflib.URIRef('http://example.com/collection#1234'), rdflib.URIRef(bsc.rating), rdflib.Literal('1234', datatype=rdflib.XSD.integer))) + self.graph.add((rdflib.URIRef('http://example.com/collection#4321'), rdflib.URIRef(bsc.label), rdflib.Literal('collection_label_4321', datatype=rdflib.XSD.string))) + self.graph.add((rdflib.URIRef('http://example.com/collection#4321'), rdflib.URIRef(bsc.rating), rdflib.Literal('4321', datatype=rdflib.XSD.integer))) + # entity-tag links + self.graph.add((rdflib.URIRef('http://example.com/entity#1234'), rdflib.URIRef(bse.tag), rdflib.URIRef('http://example.com/tag#1234'))) + self.graph.add((rdflib.URIRef('http://example.com/entity#4321'), rdflib.URIRef(bse.tag), rdflib.URIRef('http://example.com/tag#4321'))) + # entity-collection links + self.graph.add((rdflib.URIRef('http://example.com/entity#1234'), rdflib.URIRef(bse.collection), rdflib.URIRef('http://example.com/collection#1234'))) + self.graph.add((rdflib.URIRef('http://example.com/entity#4321'), rdflib.URIRef(bse.collection), rdflib.URIRef('http://example.com/collection#4321'))) + # collection-tag links + self.graph.add((rdflib.URIRef('http://example.com/collection#1234'), rdflib.URIRef(bsc.tag), rdflib.URIRef('http://example.com/tag#1234'))) + self.graph.add((rdflib.URIRef('http://example.com/collection#4321'), rdflib.URIRef(bsc.tag), rdflib.URIRef('http://example.com/tag#4321'))) + # tag-entity links # NOTE: cross-over + self.graph.add((rdflib.URIRef('http://example.com/tag#1234'), rdflib.URIRef(bst.main), rdflib.URIRef('http://example.com/entity#4321'))) + self.graph.add((rdflib.URIRef('http://example.com/tag#4321'), rdflib.URIRef(bst.main), rdflib.URIRef('http://example.com/entity#1234'))) + + # default parser + self.parser = Fetch(self.schema) + self.ent = self.schema.node(ns.bsfs.Entity) + + + def test_call(self): + # NOTE: The individual ast components are considered in the respective tests. Here, we test __call__ specifics. + + # __call__ requires a valid root type + self.assertRaises(errors.BackendError, self.parser, self.schema.literal(ns.bsfs.Literal), ast.fetch.This('this')) + self.assertRaises(errors.ConsistencyError, self.parser, self.schema.node(ns.bsfs.Node).child(ns.bsfs.Invalid), ast.fetch.This('this')) + # __call__ requires a parseable root + self.assertRaises(errors.BackendError, self.parser, self.ent, ast.filter.FilterExpression()) + # __call__ returns an executable query + q = self.parser(self.ent, ast.fetch.Fetch(bse.tag, ast.fetch.Value(bst.label, 'label'))) + self.assertSetEqual(set(q(self.graph)), { + (rdflib.URIRef('http://example.com/entity#1234'), rdflib.Literal('tag_label_1234', datatype=rdflib.XSD.string)), + (rdflib.URIRef('http://example.com/entity#4321'), rdflib.Literal('tag_label_4321', datatype=rdflib.XSD.string)), + }) + + + def test_routing(self): + self.assertRaises(errors.BackendError, self.parser._parse_fetch_expression, self.ent, ast.fetch.FetchExpression(), '?head') + + + def test_all(self): + # multiple values query + q = self.parser(self.ent, ast.fetch.All( + ast.fetch.Value(bse.filename, name='filename'), + ast.fetch.Value(bse.rank, name='rank')), + ) + self.assertSetEqual(set(q.names), {'filename', 'rank'}) + if q.names == ('filename', 'rank'): + self.assertSetEqual(set(q(self.graph)), { + (rdflib.URIRef('http://example.com/entity#1234'), rdflib.Literal('filename_1234', datatype=rdflib.XSD.string), rdflib.Literal('1234', datatype=rdflib.XSD.integer)), + (rdflib.URIRef('http://example.com/entity#4321'), rdflib.Literal('filename_4321', datatype=rdflib.XSD.string), None), + }) + else: + self.assertSetEqual(set(q(self.graph)), { + (rdflib.URIRef('http://example.com/entity#1234'), rdflib.Literal('1234', datatype=rdflib.XSD.integer), rdflib.Literal('filename_1234', datatype=rdflib.XSD.string)), + (rdflib.URIRef('http://example.com/entity#4321'), None, rdflib.Literal('filename_4321', datatype=rdflib.XSD.string)), + }) + # mixed values and node query + q = self.parser(self.ent, ast.fetch.All( + ast.fetch.Value(bse.filename, name='filename'), + ast.fetch.Node(bse.tag, name='tag'), + )) + self.assertSetEqual(set(q.names), {'filename', 'tag'}) + if q.names == ('filename', 'tag'): + self.assertSetEqual(set(q(self.graph)), { + (rdflib.URIRef('http://example.com/entity#1234'), rdflib.Literal('filename_1234', datatype=rdflib.XSD.string), rdflib.URIRef('http://example.com/tag#1234')), + (rdflib.URIRef('http://example.com/entity#4321'), rdflib.Literal('filename_4321', datatype=rdflib.XSD.string), rdflib.URIRef('http://example.com/tag#4321')), + }) + else: + self.assertSetEqual(set(q(self.graph)), { + (rdflib.URIRef('http://example.com/entity#1234'), rdflib.URIRef('http://example.com/tag#1234'), rdflib.Literal('filename_1234', datatype=rdflib.XSD.string)), + (rdflib.URIRef('http://example.com/entity#4321'), rdflib.URIRef('http://example.com/tag#4321'), rdflib.Literal('filename_4321', datatype=rdflib.XSD.string)), + }) + # multiple values and second hop + q = self.parser(self.ent, ast.fetch.Fetch(bse.tag, ast.fetch.All( + ast.fetch.This(name='tag'), + ast.fetch.Value(bst.label, name='label'), + ))) + self.assertSetEqual(set(q.names), {'tag', 'label'}) + if q.names == ('tag', 'label'): + self.assertSetEqual(set(q(self.graph)), { + (rdflib.URIRef('http://example.com/entity#1234'), rdflib.URIRef('http://example.com/tag#1234'), rdflib.Literal('tag_label_1234', datatype=rdflib.XSD.string)), + (rdflib.URIRef('http://example.com/entity#4321'), rdflib.URIRef('http://example.com/tag#4321'), rdflib.Literal('tag_label_4321', datatype=rdflib.XSD.string)), + }) + else: + self.assertSetEqual(set(q(self.graph)), { + (rdflib.URIRef('http://example.com/entity#1234'), rdflib.Literal('tag_label_1234', datatype=rdflib.XSD.string), rdflib.URIRef('http://example.com/tag#1234')), + (rdflib.URIRef('http://example.com/entity#4321'), rdflib.Literal('tag_label_4321', datatype=rdflib.XSD.string), rdflib.URIRef('http://example.com/tag#4321')), + }) + + + + def test_fetch(self): + # two-hop query + q = self.parser(self.ent, ast.fetch.Fetch(bse.tag, ast.fetch.Value(bst.label, 'tag_label'))) + self.assertSetEqual(set(q(self.graph)), { + (rdflib.URIRef('http://example.com/entity#1234'), rdflib.Literal('tag_label_1234', datatype=rdflib.XSD.string)), + (rdflib.URIRef('http://example.com/entity#4321'), rdflib.Literal('tag_label_4321', datatype=rdflib.XSD.string)), + }) + # three-hop-query + q = self.parser(self.ent, ast.fetch.Fetch(bse.tag, ast.fetch.Fetch(bst.main, ast.fetch.Value(bse.rank, 'entity_rank')))) + self.assertSetEqual(set(q(self.graph)), { + (rdflib.URIRef('http://example.com/entity#1234'), None), + (rdflib.URIRef('http://example.com/entity#4321'), rdflib.Literal('1234', datatype=rdflib.XSD.integer)), + }) + + + def test_node(self): + # cannot use the internal hop name + self.assertRaises(errors.BackendError, self.parser, self.ent, ast.fetch.Node(bse.tag, self.parser.ngen.prefix[1:] + '123')) + # a simple Node statement + q = self.parser(self.ent, ast.fetch.Node(bse.tag, 'tag')) + self.assertSetEqual(set(q.names), {'tag'}) + self.assertSetEqual(set(q(self.graph)), { + (rdflib.URIRef('http://example.com/entity#1234'), rdflib.URIRef('http://example.com/tag#1234')), + (rdflib.URIRef('http://example.com/entity#4321'), rdflib.URIRef('http://example.com/tag#4321')), + }) + + + def test_value(self): + # cannot use the internal hop name + self.assertRaises(errors.BackendError, self.parser, self.schema.node(ns.bsfs.Entity), ast.fetch.Value(bse.filename, self.parser.ngen.prefix[1:] + '123')) + # a simple Value statement + q = self.parser(self.ent, ast.fetch.Value(bse.filename, 'filename')) + self.assertSetEqual(set(q.names), {'filename'}) + self.assertSetEqual(set(q(self.graph)), { + (rdflib.URIRef('http://example.com/entity#1234'), rdflib.Literal('filename_1234', datatype=rdflib.XSD.string)), + (rdflib.URIRef('http://example.com/entity#4321'), rdflib.Literal('filename_4321', datatype=rdflib.XSD.string)), + }) + + + def test_this(self): + # cannot use the internal hop name + self.assertRaises(errors.BackendError, self.parser, self.ent, ast.fetch.This(self.parser.ngen.prefix[1:] + '123')) + # a simple This statement + self.assertEqual(self.parser._this(self.ent, ast.fetch.This('this'), '?head'), + ({('?head', 'this')}, '')) + q = self.parser(self.ent, ast.fetch.This('this')) + self.assertSetEqual(set(q(self.graph)), { + (rdflib.URIRef('http://example.com/entity#1234'), rdflib.URIRef('http://example.com/entity#1234')), + (rdflib.URIRef('http://example.com/entity#4321'), rdflib.URIRef('http://example.com/entity#4321')), + }) + + +## main ## + +if __name__ == '__main__': + unittest.main() + +## EOF ## diff --git a/test/triple_store/sparql/test_parse_filter.py b/test/triple_store/sparql/test_parse_filter.py index 8764535..6fa0cd3 100644 --- a/test/triple_store/sparql/test_parse_filter.py +++ b/test/triple_store/sparql/test_parse_filter.py @@ -149,13 +149,13 @@ class TestParseFilter(unittest.TestCase): ast.filter.Or( ast.filter.Is('http://example.com/entity#1234'), ast.filter.Is('http://example.com/entity#5678'))) - self.assertSetEqual({str(guid) for guid, in self.graph.query(q)}, {'http://example.com/entity#1234'}) + self.assertSetEqual({str(guid) for guid, in q(self.graph)}, {'http://example.com/entity#1234'}) # root is optional q = self.parser(self.schema.node(ns.bsfs.Entity)) - self.assertSetEqual({str(guid) for guid, in self.graph.query(q)}, + self.assertSetEqual({str(guid) for guid, in q(self.graph)}, {'http://example.com/entity#1234', 'http://example.com/entity#4321', 'http://example.com/image#1234', 'http://example.com/image#4321'}) q = self.parser(self.schema.node(ns.bsfs.Tag)) - self.assertSetEqual({str(guid) for guid, in self.graph.query(q)}, + self.assertSetEqual({str(guid) for guid, in q(self.graph)}, {'http://example.com/tag#1234', 'http://example.com/tag#4321'}) @@ -164,7 +164,7 @@ class TestParseFilter(unittest.TestCase): self.assertRaises(errors.BackendError, self.parser._is, self.schema.literal(ns.bsfs.Literal), ast.filter.Is('http://example.com/entity#1234'), '?ent') # a single Is statement q = self.parser(self.schema.node(ns.bsfs.Entity), ast.filter.Is('http://example.com/entity#1234')) - self.assertSetEqual({str(guid) for guid, in self.graph.query(q)}, + self.assertSetEqual({str(guid) for guid, in q(self.graph)}, {'http://example.com/entity#1234'}) # an aggregate of Is statements q = self.parser(self.schema.node(ns.bsfs.Entity), @@ -172,7 +172,7 @@ class TestParseFilter(unittest.TestCase): ast.filter.Is('http://example.com/entity#1234'), ast.filter.Is('http://example.com/entity#4321'), )) - self.assertSetEqual({str(guid) for guid, in self.graph.query(q)}, + self.assertSetEqual({str(guid) for guid, in q(self.graph)}, {'http://example.com/entity#1234', 'http://example.com/entity#4321'}) # combined with other filters q = self.parser(self.schema.node(ns.bsfs.Entity), @@ -185,12 +185,12 @@ class TestParseFilter(unittest.TestCase): ast.filter.Equals('Me, Myself, and I') ), )) - self.assertSetEqual({str(guid) for guid, in self.graph.query(q)}, + self.assertSetEqual({str(guid) for guid, in q(self.graph)}, {'http://example.com/entity#1234'}) # as argument of Any/All q = self.parser(self.schema.node(ns.bsfs.Entity), ast.filter.Any(ns.bse.tag, ast.filter.Is('http://example.com/tag#1234'))) - self.assertSetEqual({str(guid) for guid, in self.graph.query(q)}, + self.assertSetEqual({str(guid) for guid, in q(self.graph)}, {'http://example.com/entity#1234', 'http://example.com/image#1234'}) @@ -199,15 +199,15 @@ class TestParseFilter(unittest.TestCase): self.assertRaises(errors.BackendError, self.parser._equals, self.schema.node(ns.bsfs.Entity), ast.filter.Equals('hello world'), '?ent') # a single Equals statement q = self.parser(self.schema.node(ns.bsfs.Entity), ast.filter.Any(ns.bse.comment, ast.filter.Equals('hello world'))) - self.assertSetEqual({str(guid) for guid, in self.graph.query(q)}, + self.assertSetEqual({str(guid) for guid, in q(self.graph)}, {'http://example.com/entity#1234', 'http://example.com/entity#4321'}) # a single Equals statement that includes subtypes q = self.parser(self.schema.node(ns.bsfs.Entity), ast.filter.Any(ns.bse.comment, ast.filter.Equals('Me, Myself, and I'))) - self.assertSetEqual({str(guid) for guid, in self.graph.query(q)}, + self.assertSetEqual({str(guid) for guid, in q(self.graph)}, {'http://example.com/entity#1234', 'http://example.com/image#1234'}) # an Equals statement on an integer q = self.parser(self.schema.node(ns.bsfs.Entity), ast.filter.Any(ns.bse.filesize, ast.filter.Equals(4321))) - self.assertSetEqual({str(guid) for guid, in self.graph.query(q)}, + self.assertSetEqual({str(guid) for guid, in q(self.graph)}, {'http://example.com/entity#4321', 'http://example.com/image#4321'}) @@ -216,18 +216,18 @@ class TestParseFilter(unittest.TestCase): self.assertRaises(errors.BackendError, self.parser._substring, self.schema.node(ns.bsfs.Entity), ast.filter.Substring('hello world'), '?ent') # a single Substring statement q = self.parser(self.schema.node(ns.bsfs.Entity), ast.filter.Any(ns.bse.comment, ast.filter.Substring('hello'))) - self.assertSetEqual({str(guid) for guid, in self.graph.query(q)}, + self.assertSetEqual({str(guid) for guid, in q(self.graph)}, {'http://example.com/entity#1234', 'http://example.com/entity#4321'}) q = self.parser(self.schema.node(ns.bsfs.Entity), ast.filter.Any(ns.bse.comment, ast.filter.Substring('lo wo'))) - self.assertSetEqual({str(guid) for guid, in self.graph.query(q)}, + self.assertSetEqual({str(guid) for guid, in q(self.graph)}, {'http://example.com/entity#1234', 'http://example.com/entity#4321'}) # a single Substring statement that includes subtypes q = self.parser(self.schema.node(ns.bsfs.Entity), ast.filter.Any(ns.bse.comment, ast.filter.Substring('Myself'))) - self.assertSetEqual({str(guid) for guid, in self.graph.query(q)}, + self.assertSetEqual({str(guid) for guid, in q(self.graph)}, {'http://example.com/entity#1234', 'http://example.com/image#1234'}) # an Substring statement on an integer q = self.parser(self.schema.node(ns.bsfs.Entity), ast.filter.Any(ns.bse.filesize, ast.filter.Substring('32'))) - self.assertSetEqual({str(guid) for guid, in self.graph.query(q)}, + self.assertSetEqual({str(guid) for guid, in q(self.graph)}, {'http://example.com/entity#4321', 'http://example.com/image#4321'}) @@ -236,15 +236,15 @@ class TestParseFilter(unittest.TestCase): self.assertRaises(errors.BackendError, self.parser._starts_with, self.schema.node(ns.bsfs.Entity), ast.filter.StartsWith('hello world'), '?ent') # a single StartsWith statement q = self.parser(self.schema.node(ns.bsfs.Entity), ast.filter.Any(ns.bse.comment, ast.filter.StartsWith('hello'))) - self.assertSetEqual({str(guid) for guid, in self.graph.query(q)}, + self.assertSetEqual({str(guid) for guid, in q(self.graph)}, {'http://example.com/entity#1234', 'http://example.com/entity#4321'}) # a single StartsWith statement that includes subtypes q = self.parser(self.schema.node(ns.bsfs.Entity), ast.filter.Any(ns.bse.comment, ast.filter.StartsWith('Me, Mys'))) - self.assertSetEqual({str(guid) for guid, in self.graph.query(q)}, + self.assertSetEqual({str(guid) for guid, in q(self.graph)}, {'http://example.com/entity#1234', 'http://example.com/image#1234'}) # an StartsWith statement on an integer q = self.parser(self.schema.node(ns.bsfs.Entity), ast.filter.Any(ns.bse.filesize, ast.filter.StartsWith(432))) - self.assertSetEqual({str(guid) for guid, in self.graph.query(q)}, + self.assertSetEqual({str(guid) for guid, in q(self.graph)}, {'http://example.com/entity#4321', 'http://example.com/image#4321'}) @@ -253,15 +253,15 @@ class TestParseFilter(unittest.TestCase): self.assertRaises(errors.BackendError, self.parser._ends_with, self.schema.node(ns.bsfs.Entity), ast.filter.EndsWith('hello world'), '?ent') # a single EndsWith statement q = self.parser(self.schema.node(ns.bsfs.Entity), ast.filter.Any(ns.bse.comment, ast.filter.EndsWith('orld'))) - self.assertSetEqual({str(guid) for guid, in self.graph.query(q)}, + self.assertSetEqual({str(guid) for guid, in q(self.graph)}, {'http://example.com/entity#1234', 'http://example.com/entity#4321'}) # a single EndsWith statement that includes subtypes q = self.parser(self.schema.node(ns.bsfs.Entity), ast.filter.Any(ns.bse.comment, ast.filter.EndsWith('and I'))) - self.assertSetEqual({str(guid) for guid, in self.graph.query(q)}, + self.assertSetEqual({str(guid) for guid, in q(self.graph)}, {'http://example.com/entity#1234', 'http://example.com/image#1234'}) # an EndsWith statement on an integer q = self.parser(self.schema.node(ns.bsfs.Entity), ast.filter.Any(ns.bse.filesize, ast.filter.EndsWith(321))) - self.assertSetEqual({str(guid) for guid, in self.graph.query(q)}, + self.assertSetEqual({str(guid) for guid, in q(self.graph)}, {'http://example.com/entity#4321', 'http://example.com/image#4321'}) @@ -270,22 +270,22 @@ class TestParseFilter(unittest.TestCase): self.assertRaises(errors.BackendError, self.parser._less_than, self.schema.node(ns.bsfs.Entity), ast.filter.LessThan(2000), '?ent') # a single LessThan statement q = self.parser(self.schema.node(ns.bsfs.Image), ast.filter.Any(ns.bse.iso, ast.filter.LessThan(2000))) - self.assertSetEqual({str(guid) for guid, in self.graph.query(q)}, + self.assertSetEqual({str(guid) for guid, in q(self.graph)}, {'http://example.com/image#1234'}) # _less_than respects boundary q = self.parser(self.schema.node(ns.bsfs.Image), ast.filter.Any(ns.bse.iso, ast.filter.LessThan(1234, strict=True))) - self.assertSetEqual({str(guid) for guid, in self.graph.query(q)}, set()) + self.assertSetEqual({str(guid) for guid, in q(self.graph)}, set()) q = self.parser(self.schema.node(ns.bsfs.Image), ast.filter.Any(ns.bse.iso, ast.filter.LessThan(1234, strict=False))) - self.assertSetEqual({str(guid) for guid, in self.graph.query(q)}, + self.assertSetEqual({str(guid) for guid, in q(self.graph)}, {'http://example.com/image#1234'}) # a single LessThan statement that includes subtypes q = self.parser(self.schema.node(ns.bsfs.Entity), ast.filter.Any(ns.bse.filesize, ast.filter.LessThan(2000))) - self.assertSetEqual({str(guid) for guid, in self.graph.query(q)}, + self.assertSetEqual({str(guid) for guid, in q(self.graph)}, {'http://example.com/entity#1234', 'http://example.com/image#1234'}) # an LessThan statement on a string # always negative; note that http://example.com/tag#4321 is also not returned although its comment is a pure number q = self.parser(self.schema.node(ns.bsfs.Entity), ast.filter.Any(ns.bse.comment, ast.filter.LessThan(10_000))) - self.assertSetEqual({str(guid) for guid, in self.graph.query(q)}, set()) + self.assertSetEqual({str(guid) for guid, in q(self.graph)}, set()) def test_greater_than(self): @@ -293,22 +293,22 @@ class TestParseFilter(unittest.TestCase): self.assertRaises(errors.BackendError, self.parser._greater_than, self.schema.node(ns.bsfs.Entity), ast.filter.GreaterThan(2000), '?ent') # a single GreaterThan statement q = self.parser(self.schema.node(ns.bsfs.Image), ast.filter.Any(ns.bse.iso, ast.filter.GreaterThan(2000))) - self.assertSetEqual({str(guid) for guid, in self.graph.query(q)}, + self.assertSetEqual({str(guid) for guid, in q(self.graph)}, {'http://example.com/image#4321'}) # _greater_than respects boundary q = self.parser(self.schema.node(ns.bsfs.Image), ast.filter.Any(ns.bse.iso, ast.filter.GreaterThan(4321, strict=True))) - self.assertSetEqual({str(guid) for guid, in self.graph.query(q)}, set()) + self.assertSetEqual({str(guid) for guid, in q(self.graph)}, set()) q = self.parser(self.schema.node(ns.bsfs.Image), ast.filter.Any(ns.bse.iso, ast.filter.GreaterThan(4321, strict=False))) - self.assertSetEqual({str(guid) for guid, in self.graph.query(q)}, + self.assertSetEqual({str(guid) for guid, in q(self.graph)}, {'http://example.com/image#4321'}) # a single GreaterThan statement that includes subtypes q = self.parser(self.schema.node(ns.bsfs.Entity), ast.filter.Any(ns.bse.filesize, ast.filter.GreaterThan(2000))) - self.assertSetEqual({str(guid) for guid, in self.graph.query(q)}, + self.assertSetEqual({str(guid) for guid, in q(self.graph)}, {'http://example.com/entity#4321', 'http://example.com/image#4321'}) # an GreaterThan statement on a string # always positive q = self.parser(self.schema.node(ns.bsfs.Entity), ast.filter.Any(ns.bse.comment, ast.filter.GreaterThan(0))) - self.assertSetEqual({str(guid) for guid, in self.graph.query(q)}, + self.assertSetEqual({str(guid) for guid, in q(self.graph)}, {'http://example.com/entity#1234', 'http://example.com/entity#4321', 'http://example.com/image#1234'}) @@ -331,7 +331,7 @@ class TestParseFilter(unittest.TestCase): ast.filter.Any(ns.bse.filesize, ast.filter.Equals(1234)), ast.filter.Any(ns.bse.comment, ast.filter.Equals('Me, Myself, and I')), )) - self.assertSetEqual({str(guid) for guid, in self.graph.query(q)}, + self.assertSetEqual({str(guid) for guid, in q(self.graph)}, {'http://example.com/entity#1234'}) # all conditions have to match q = self.parser(self.schema.node(ns.bsfs.Entity), @@ -340,21 +340,21 @@ class TestParseFilter(unittest.TestCase): ast.filter.Any(ns.bse.filesize, ast.filter.Equals(1234)), ast.filter.Any(ns.bse.comment, ast.filter.Equals('Me, Myself, and I')), )) - self.assertSetEqual({str(guid) for guid, in self.graph.query(q)}, set()) + self.assertSetEqual({str(guid) for guid, in q(self.graph)}, set()) q = self.parser(self.schema.node(ns.bsfs.Entity), ast.filter.And( ast.filter.Is('http://example.com/entity#1234'), ast.filter.Any(ns.bse.filesize, ast.filter.Equals(4321)), ast.filter.Any(ns.bse.comment, ast.filter.Equals('Me, Myself, and I')), )) - self.assertSetEqual({str(guid) for guid, in self.graph.query(q)}, set()) + self.assertSetEqual({str(guid) for guid, in q(self.graph)}, set()) q = self.parser(self.schema.node(ns.bsfs.Entity), ast.filter.And( ast.filter.Is('http://example.com/entity#1234'), ast.filter.Any(ns.bse.filesize, ast.filter.Equals(1234)), ast.filter.Any(ns.bse.comment, ast.filter.Equals('foobar')), )) - self.assertSetEqual({str(guid) for guid, in self.graph.query(q)}, set()) + self.assertSetEqual({str(guid) for guid, in q(self.graph)}, set()) # And can be nested q = self.parser(self.schema.node(ns.bsfs.Entity), ast.filter.And( @@ -364,7 +364,7 @@ class TestParseFilter(unittest.TestCase): ast.filter.Any(ns.bse.comment, ast.filter.Equals('Me, Myself, and I')), ), )) - self.assertSetEqual({str(guid) for guid, in self.graph.query(q)}, + self.assertSetEqual({str(guid) for guid, in q(self.graph)}, {'http://example.com/entity#1234'}) @@ -387,7 +387,7 @@ class TestParseFilter(unittest.TestCase): ast.filter.Any(ns.bse.filesize, ast.filter.Equals(4321)), ast.filter.Any(ns.bse.comment, ast.filter.Equals('Me, Myself, and I')), )) - self.assertSetEqual({str(guid) for guid, in self.graph.query(q)}, + self.assertSetEqual({str(guid) for guid, in q(self.graph)}, {'http://example.com/entity#1234', 'http://example.com/image#1234', 'http://example.com/entity#4321', 'http://example.com/image#4321'}) # at least one condition has to match q = self.parser(self.schema.node(ns.bsfs.Entity), @@ -396,14 +396,14 @@ class TestParseFilter(unittest.TestCase): ast.filter.Any(ns.bse.filesize, ast.filter.Equals(8765)), ast.filter.Any(ns.bse.comment, ast.filter.Equals('foobar')), )) - self.assertSetEqual({str(guid) for guid, in self.graph.query(q)}, set()) + self.assertSetEqual({str(guid) for guid, in q(self.graph)}, set()) q = self.parser(self.schema.node(ns.bsfs.Entity), ast.filter.Or( ast.filter.Is('http://example.com/entity#1234'), ast.filter.Any(ns.bse.filesize, ast.filter.Equals(8765)), ast.filter.Any(ns.bse.comment, ast.filter.Equals('foobar')), )) - self.assertSetEqual({str(guid) for guid, in self.graph.query(q)}, + self.assertSetEqual({str(guid) for guid, in q(self.graph)}, {'http://example.com/entity#1234'}) q = self.parser(self.schema.node(ns.bsfs.Entity), ast.filter.Or( @@ -411,7 +411,7 @@ class TestParseFilter(unittest.TestCase): ast.filter.Any(ns.bse.filesize, ast.filter.Equals(4321)), ast.filter.Any(ns.bse.comment, ast.filter.Equals('foobar')), )) - self.assertSetEqual({str(guid) for guid, in self.graph.query(q)}, + self.assertSetEqual({str(guid) for guid, in q(self.graph)}, {'http://example.com/entity#4321', 'http://example.com/image#4321'}) q = self.parser(self.schema.node(ns.bsfs.Entity), ast.filter.Or( @@ -419,7 +419,7 @@ class TestParseFilter(unittest.TestCase): ast.filter.Any(ns.bse.filesize, ast.filter.Equals(8765)), ast.filter.Any(ns.bse.comment, ast.filter.Equals('Me, Myself, and I')), )) - self.assertSetEqual({str(guid) for guid, in self.graph.query(q)}, + self.assertSetEqual({str(guid) for guid, in q(self.graph)}, {'http://example.com/entity#1234', 'http://example.com/image#1234'}) # Or can be nested q = self.parser(self.schema.node(ns.bsfs.Entity), @@ -430,7 +430,7 @@ class TestParseFilter(unittest.TestCase): ast.filter.Any(ns.bse.comment, ast.filter.Equals('Me, Myself, and I')), ), )) - self.assertSetEqual({str(guid) for guid, in self.graph.query(q)}, + self.assertSetEqual({str(guid) for guid, in q(self.graph)}, {'http://example.com/entity#1234', 'http://example.com/image#1234', 'http://example.com/entity#4321', 'http://example.com/image#4321'}) @@ -451,14 +451,14 @@ class TestParseFilter(unittest.TestCase): # _any returns a valid query q = self.parser(self.schema.node(ns.bsfs.Entity), ast.filter.Any(ns.bse.filesize, ast.filter.Equals(1234))) - self.assertSetEqual({str(guid) for guid, in self.graph.query(q)}, + self.assertSetEqual({str(guid) for guid, in q(self.graph)}, {'http://example.com/entity#1234', 'http://example.com/image#1234'}) # _any can be nested q = self.parser(self.schema.node(ns.bsfs.Entity), ast.filter.Any(ns.bse.tag, ast.filter.Any(ns.bse.representative, ast.filter.Is('http://example.com/image#1234')))) - self.assertSetEqual({str(guid) for guid, in self.graph.query(q)}, + self.assertSetEqual({str(guid) for guid, in q(self.graph)}, {'http://example.com/entity#1234', 'http://example.com/image#1234'}) @@ -468,12 +468,12 @@ class TestParseFilter(unittest.TestCase): # All Nodes q = self.parser(self.schema.node(ns.bsfs.Entity), ast.filter.All(ns.bse.tag, ast.filter.Is('http://example.com/tag#1234'))) - self.assertSetEqual({str(guid) for guid, in self.graph.query(q)}, + self.assertSetEqual({str(guid) for guid, in q(self.graph)}, {'http://example.com/entity#1234', 'http://example.com/image#1234'}) # All values q = self.parser(self.schema.node(ns.bsfs.Entity), ast.filter.All(ns.bse.comment, ast.filter.Equals('hello world'))) - self.assertSetEqual({str(guid) for guid, in self.graph.query(q)}, + self.assertSetEqual({str(guid) for guid, in q(self.graph)}, {'http://example.com/entity#4321'}) # All on value within Or branch # entity#1234 is selected because all of its comments are in ("hello world", "Me, Myself, and I") @@ -481,12 +481,12 @@ class TestParseFilter(unittest.TestCase): ast.filter.All(ns.bse.comment, ast.filter.Or( ast.filter.Equals('hello world'), ast.filter.Equals('Me, Myself, and I')))) - self.assertSetEqual({str(guid) for guid, in self.graph.query(q)}, + self.assertSetEqual({str(guid) for guid, in q(self.graph)}, {'http://example.com/entity#1234', 'http://example.com/entity#4321', 'http://example.com/image#1234'}) # All requires at least one predicate/value q = self.parser(self.schema.node(ns.bsfs.Entity), ast.filter.All(ns.bse.comment, ast.filter.Equals('Me, Myself, and I'))) - self.assertSetEqual({str(guid) for guid, in self.graph.query(q)}, + self.assertSetEqual({str(guid) for guid, in q(self.graph)}, {'http://example.com/image#1234'}) # All within a statement q = self.parser(self.schema.node(ns.bsfs.Entity), @@ -498,18 +498,18 @@ class TestParseFilter(unittest.TestCase): )) ) ) - self.assertSetEqual({str(guid) for guid, in self.graph.query(q)}, + self.assertSetEqual({str(guid) for guid, in q(self.graph)}, {'http://example.com/entity#1234', 'http://example.com/image#1234'}) # All with reversed Predicate q = self.parser(self.schema.node(ns.bsfs.Tag), ast.filter.All(ast.filter.Predicate(ns.bse.tag, reverse=True), ast.filter.Is('http://example.com/entity#4321'))) - self.assertSetEqual({str(guid) for guid, in self.graph.query(q)}, + self.assertSetEqual({str(guid) for guid, in q(self.graph)}, {'http://example.com/tag#4321'}) # All with multiple predicates q = self.parser(self.schema.node(ns.bsfs.Entity), ast.filter.All(ast.filter.OneOf(ns.bse.tag, ns.bse.buddy), # entity#1234 (tag:tag#1234), entity#1234 (buddy:image#1234), image#1234(tag:tag#1234) ast.filter.Any(ns.bse.comment, ast.filter.Equals('Me, Myself, and I')))) # entity#1234, image#1234, tag#1234 - self.assertSetEqual({str(guid) for guid, in self.graph.query(q)}, + self.assertSetEqual({str(guid) for guid, in q(self.graph)}, {'http://example.com/entity#1234', 'http://example.com/image#1234'}) @@ -518,22 +518,22 @@ class TestParseFilter(unittest.TestCase): # Not applies on conditions q = self.parser(self.schema.node(ns.bsfs.Entity), ast.filter.Not(ast.filter.Is('http://example.com/entity#1234'))) - self.assertSetEqual({str(guid) for guid, in self.graph.query(q)}, + self.assertSetEqual({str(guid) for guid, in q(self.graph)}, {'http://example.com/image#1234', 'http://example.com/entity#4321', 'http://example.com/image#4321'}) # Not applies on conditions within branches q = self.parser(self.schema.node(ns.bsfs.Entity), ast.filter.Any(ns.bse.comment, ast.filter.Not(ast.filter.Equals('Me, Myself, and I')))) - self.assertSetEqual({str(guid) for guid, in self.graph.query(q)}, + self.assertSetEqual({str(guid) for guid, in q(self.graph)}, {'http://example.com/entity#1234', 'http://example.com/entity#4321'}) # Not applies on branches q = self.parser(self.schema.node(ns.bsfs.Entity), ast.filter.Not(ast.filter.Any(ns.bse.comment, ast.filter.Equals('Me, Myself, and I')))) - self.assertSetEqual({str(guid) for guid, in self.graph.query(q)}, + self.assertSetEqual({str(guid) for guid, in q(self.graph)}, {'http://example.com/entity#4321', 'http://example.com/image#4321'}) # Double Not cancel each other q = self.parser(self.schema.node(ns.bsfs.Entity), ast.filter.Not(ast.filter.Not(ast.filter.Is('http://example.com/entity#1234')))) - self.assertSetEqual({str(guid) for guid, in self.graph.query(q)}, + self.assertSetEqual({str(guid) for guid, in q(self.graph)}, {'http://example.com/entity#1234'}) # Not works within aggregation (and) q = self.parser(self.schema.node(ns.bsfs.Entity), @@ -541,7 +541,7 @@ class TestParseFilter(unittest.TestCase): ast.filter.Not(ast.filter.Is('http://example.com/entity#1234')), ast.filter.Any(ns.bse.comment, ast.filter.Equals('hello world')), )) - self.assertSetEqual({str(guid) for guid, in self.graph.query(q)}, + self.assertSetEqual({str(guid) for guid, in q(self.graph)}, {'http://example.com/entity#4321'}) # Not works within aggregation (or) q = self.parser(self.schema.node(ns.bsfs.Entity), @@ -549,7 +549,7 @@ class TestParseFilter(unittest.TestCase): ast.filter.Not(ast.filter.Is('http://example.com/entity#1234')), ast.filter.Any(ns.bse.comment, ast.filter.Equals('Me, Myself, and I')), )) - self.assertSetEqual({str(guid) for guid, in self.graph.query(q)}, + self.assertSetEqual({str(guid) for guid, in q(self.graph)}, {'http://example.com/entity#1234', 'http://example.com/entity#4321', 'http://example.com/image#1234', 'http://example.com/image#4321'}) # Not works outside aggregation (and) q = self.parser(self.schema.node(ns.bsfs.Entity), @@ -558,7 +558,7 @@ class TestParseFilter(unittest.TestCase): ast.filter.Is('http://example.com/entity#1234'), ast.filter.Any(ns.bse.comment, ast.filter.Equals('hello world')), ))) - self.assertSetEqual({str(guid) for guid, in self.graph.query(q)}, + self.assertSetEqual({str(guid) for guid, in q(self.graph)}, {'http://example.com/entity#4321', 'http://example.com/image#1234', 'http://example.com/image#4321'}) # Not works outside aggregation (or) q = self.parser(self.schema.node(ns.bsfs.Entity), @@ -567,7 +567,7 @@ class TestParseFilter(unittest.TestCase): ast.filter.Is('http://example.com/entity#4321'), ast.filter.Any(ns.bse.comment, ast.filter.Equals('Me, Myself, and I')), ))) - self.assertSetEqual({str(guid) for guid, in self.graph.query(q)}, + self.assertSetEqual({str(guid) for guid, in q(self.graph)}, {'http://example.com/image#4321'}) # Not mixed with branch, aggregation, id, and value q = self.parser(self.schema.node(ns.bsfs.Entity), @@ -580,7 +580,7 @@ class TestParseFilter(unittest.TestCase): ), ast.filter.Any(ns.bse.comment, ast.filter.Not(ast.filter.Equals('foobar'))), # entity#1234, entity#4321, image#1234 )) - self.assertSetEqual({str(guid) for guid, in self.graph.query(q)}, + self.assertSetEqual({str(guid) for guid, in q(self.graph)}, {'http://example.com/image#1234'}) @@ -590,21 +590,21 @@ class TestParseFilter(unittest.TestCase): # Has with GreaterThan constraint q = self.parser(self.schema.node(ns.bsfs.Entity), ast.filter.Has(ns.bse.comment, ast.filter.GreaterThan(0))) - self.assertSetEqual({str(guid) for guid, in self.graph.query(q)}, + self.assertSetEqual({str(guid) for guid, in q(self.graph)}, {'http://example.com/entity#1234', 'http://example.com/entity#4321', 'http://example.com/image#1234'}) q = self.parser(self.schema.node(ns.bsfs.Entity), ast.filter.Has(ns.bse.comment, ast.filter.GreaterThan(1))) - self.assertSetEqual({str(guid) for guid, in self.graph.query(q)}, + self.assertSetEqual({str(guid) for guid, in q(self.graph)}, {'http://example.com/entity#1234'}) # Has with Equals constraint q = self.parser(self.schema.node(ns.bsfs.Entity), ast.filter.Has(ns.bse.comment, 1)) - self.assertSetEqual({str(guid) for guid, in self.graph.query(q)}, + self.assertSetEqual({str(guid) for guid, in q(self.graph)}, {'http://example.com/entity#4321', 'http://example.com/image#1234'}) # Has with LessThan constraint q = self.parser(self.schema.node(ns.bsfs.Entity), ast.filter.Has(ns.bse.comment, ast.filter.LessThan(2))) - self.assertSetEqual({str(guid) for guid, in self.graph.query(q)}, + self.assertSetEqual({str(guid) for guid, in q(self.graph)}, {'http://example.com/entity#4321', 'http://example.com/image#1234', 'http://example.com/image#4321'}) # Has with multiple constraints self.graph.add((rdflib.URIRef('http://example.com/entity#1234'), rdflib.URIRef(ns.bse.comment), rdflib.Literal('extra1', datatype=rdflib.XSD.string))) @@ -616,17 +616,17 @@ class TestParseFilter(unittest.TestCase): self.graph.add((rdflib.URIRef('http://example.com/entity#4321'), rdflib.URIRef(ns.bse.comment), rdflib.Literal('extra2', datatype=rdflib.XSD.string))) q = self.parser(self.schema.node(ns.bsfs.Entity), ast.filter.Has(ns.bse.comment, ast.filter.And(ast.filter.GreaterThan(1), ast.filter.LessThan(5)))) - self.assertSetEqual({str(guid) for guid, in self.graph.query(q)}, + self.assertSetEqual({str(guid) for guid, in q(self.graph)}, {'http://example.com/entity#4321'}) # Has with OneOf predicate q = self.parser(self.schema.node(ns.bsfs.Entity), ast.filter.Has(ast.filter.OneOf(ns.bse.tag, ns.bse.buddy), ast.filter.GreaterThan(1))) - self.assertSetEqual({str(guid) for guid, in self.graph.query(q)}, + self.assertSetEqual({str(guid) for guid, in q(self.graph)}, {'http://example.com/entity#1234', 'http://example.com/entity#4321'}) # Has with reversed predicate q = self.parser(self.schema.node(ns.bsfs.Tag), ast.filter.Has(ast.filter.Predicate(ns.bse.tag, reverse=True), ast.filter.GreaterThan(1))) - self.assertSetEqual({str(guid) for guid, in self.graph.query(q)}, + self.assertSetEqual({str(guid) for guid, in q(self.graph)}, {'http://example.com/tag#1234'}) @@ -643,23 +643,23 @@ class TestParseFilter(unittest.TestCase): self.assertRaises(errors.ConsistencyError, self.parser._distance, self.schema.literal(ns.bsfs.Colors), ast.filter.Distance([1,2,3,4,5], 1), '') # _distance respects threshold q = self.parser(self.schema.node(ns.bsfs.Entity), ast.filter.Any(ns.bse.colors, ast.filter.Distance([2,4,3,1], 4))) - self.assertSetEqual({str(guid) for guid, in self.graph.query(q)}, + self.assertSetEqual({str(guid) for guid, in q(self.graph)}, {'http://example.com/entity#1234', 'http://example.com/entity#4321', 'http://example.com/image#1234'}) q = self.parser(self.schema.node(ns.bsfs.Entity), ast.filter.Any(ns.bse.colors, ast.filter.Distance([2,4,3,1], 3))) - self.assertSetEqual({str(guid) for guid, in self.graph.query(q)}, + self.assertSetEqual({str(guid) for guid, in q(self.graph)}, {'http://example.com/entity#4321', 'http://example.com/image#1234'}) q = self.parser(self.schema.node(ns.bsfs.Entity), ast.filter.Any(ns.bse.colors, ast.filter.Distance([2,4,3,1], 2))) - self.assertSetEqual({str(guid) for guid, in self.graph.query(q)}, + self.assertSetEqual({str(guid) for guid, in q(self.graph)}, {'http://example.com/image#1234'}) # result set can be empty q = self.parser(self.schema.node(ns.bsfs.Entity), ast.filter.Any(ns.bse.colors, ast.filter.Distance([2,4,3,1], 1))) - self.assertSetEqual({str(guid) for guid, in self.graph.query(q)}, set()) + self.assertSetEqual({str(guid) for guid, in q(self.graph)}, set()) # _distance respects strict q = self.parser(self.schema.node(ns.bsfs.Entity), ast.filter.Any(ns.bse.colors, ast.filter.Distance([1,2,3,4], 0, False))) - self.assertSetEqual({str(guid) for guid, in self.graph.query(q)}, + self.assertSetEqual({str(guid) for guid, in q(self.graph)}, {'http://example.com/entity#1234'}) q = self.parser(self.schema.node(ns.bsfs.Entity), ast.filter.Any(ns.bse.colors, ast.filter.Distance([1,2,3,4], 0, True))) - self.assertSetEqual({str(guid) for guid, in self.graph.query(q)}, set()) + self.assertSetEqual({str(guid) for guid, in q(self.graph)}, set()) def test_one_of(self): # _one_of expects a node @@ -725,7 +725,7 @@ class TestParseFilter(unittest.TestCase): ast.filter.Any(ast.filter.OneOf(ns.bse.tag, ns.bse.buddy), ast.filter.Any(ast.filter.OneOf(ns.bse.comment), ast.filter.Equals('Me, Myself, and I')))) - self.assertSetEqual({str(guid) for guid, in self.graph.query(q)}, + self.assertSetEqual({str(guid) for guid, in q(self.graph)}, {'http://example.com/entity#1234', 'http://example.com/image#1234'}) @@ -757,13 +757,13 @@ class TestParseFilter(unittest.TestCase): ast.filter.Any(ns.bse.representative, ast.filter.Any(ns.bse.filesize, ast.filter.Equals(1234))))) - self.assertSetEqual({str(guid) for guid, in self.graph.query(q)}, + self.assertSetEqual({str(guid) for guid, in q(self.graph)}, {'http://example.com/entity#1234', 'http://example.com/image#1234'}) q = self.parser(self.schema.node(ns.bsfs.Tag), ast.filter.Any(ast.filter.Predicate(ns.bse.tag, reverse=True), ast.filter.Any(ns.bse.filesize, ast.filter.LessThan(2000)))) - self.assertSetEqual({str(guid) for guid, in self.graph.query(q)}, + self.assertSetEqual({str(guid) for guid, in q(self.graph)}, {'http://example.com/tag#1234'}) diff --git a/test/triple_store/sparql/test_sparql.py b/test/triple_store/sparql/test_sparql.py index 7fbfb65..c58fae3 100644 --- a/test/triple_store/sparql/test_sparql.py +++ b/test/triple_store/sparql/test_sparql.py @@ -556,6 +556,76 @@ class TestSparqlStore(unittest.TestCase): self.assertSetEqual(set(q), tag_ids) + def test_fetch(self): + # store setup + store = SparqlStore.Open() + store.schema = self.schema + # add instances + ent_type = self.schema.node(ns.bsfs.Entity) + tag_type = self.schema.node(ns.bsfs.Tag) + ent_ids = {URI('http://example.com/me/entity#1234'), URI('http://example.com/me/entity#4321')} + tag_ids = {URI('http://example.com/me/tag#1234'), URI('http://example.com/me/tag#4321')} + store.create(ent_type, ent_ids) + store.create(tag_type, tag_ids) + store.set(ent_type, ent_ids, self.schema.predicate(ns.bse.tag), tag_ids) + store.set(ent_type, {URI('http://example.com/me/entity#1234')}, self.schema.predicate(ns.bse.filesize), {1234}) + store.set(ent_type, {URI('http://example.com/me/entity#4321')}, self.schema.predicate(ns.bse.filesize), {4321}) + store.set(ent_type, {URI('http://example.com/me/entity#1234')}, self.schema.predicate(ns.bse.comment), {'hello world'}) + # node_type must be a node from the schema + self.assertRaises(errors.ConsistencyError, list, store.fetch(self.schema.literal(ns.bsfs.Literal), + ast.filter.FilterExpression(), ast.fetch.FetchExpression())) + self.assertRaises(errors.ConsistencyError, list, store.fetch(self.schema.node(ns.bsfs.Node).child(ns.bsfs.Invalid), + ast.filter.FilterExpression(), ast.fetch.FetchExpression())) + # requires a filter and a fetch query + self.assertRaises(TypeError, list, store.fetch(self.schema.node(ns.bsfs.Entity), None, ast.fetch.FetchExpression())) + self.assertRaises(TypeError, list, store.fetch(self.schema.node(ns.bsfs.Entity), 1234, ast.fetch.FetchExpression())) + self.assertRaises(TypeError, list, store.fetch(self.schema.node(ns.bsfs.Entity), 'hello', ast.fetch.FetchExpression())) + self.assertRaises(TypeError, list, store.fetch(self.schema.node(ns.bsfs.Entity), ast.filter.FilterExpression(), None)) + self.assertRaises(TypeError, list, store.fetch(self.schema.node(ns.bsfs.Entity), ast.filter.FilterExpression(), 1234)) + self.assertRaises(TypeError, list, store.fetch(self.schema.node(ns.bsfs.Entity), ast.filter.FilterExpression(), 'hello')) + # fetch emits triples + self.assertSetEqual(set(store.fetch(self.schema.node(ns.bsfs.Entity), + ast.filter.Is('http://example.com/me/entity#1234'), + ast.fetch.Value(ns.bse.filesize, 'filesize'), + )), { + (URI('http://example.com/me/entity#1234'), 'filesize', 1234), + }) + # fetch respects filter query + self.assertSetEqual(set(store.fetch(self.schema.node(ns.bsfs.Entity), + ast.filter.IsIn('http://example.com/me/entity#1234', 'http://example.com/me/entity#4321'), + ast.fetch.Value(ns.bse.filesize, 'filesize'), + )), { + (URI('http://example.com/me/entity#1234'), 'filesize', 1234), + (URI('http://example.com/me/entity#4321'), 'filesize', 4321), + }) + # fetch ignores missing data + self.assertSetEqual(set(store.fetch(self.schema.node(ns.bsfs.Entity), + ast.filter.IsIn('http://example.com/me/entity#1234', 'http://example.com/me/entity#4321'), + ast.fetch.Value(ns.bse.comment, 'comment'), + )), { + (URI('http://example.com/me/entity#1234'), 'comment', 'hello world'), + }) + # fetch emits all triples + self.assertSetEqual(set(store.fetch(self.schema.node(ns.bsfs.Entity), + ast.filter.Is('http://example.com/me/entity#1234'), + ast.fetch.All( + ast.fetch.Value(ns.bse.filesize, 'filesize'), + ast.fetch.Node(ns.bse.tag, 'tag'), + ) + )), { + (URI('http://example.com/me/entity#1234'), 'filesize', 1234), + (URI('http://example.com/me/entity#1234'), 'tag', URI('http://example.com/me/tag#1234')), + (URI('http://example.com/me/entity#1234'), 'tag', URI('http://example.com/me/tag#4321')), + }) + # triples do not repeat + triples = list(store.fetch(self.schema.node(ns.bsfs.Entity), ast.filter.Is('http://example.com/me/entity#1234'), + ast.fetch.All( + ast.fetch.Value(ns.bse.filesize, 'filesize'), + ast.fetch.Node(ns.bse.tag, 'tag'), + ) + )) + self.assertEqual(len(triples), 3) + def test_exists(self): # store setup store = SparqlStore.Open() diff --git a/test/triple_store/sparql/test_utils.py b/test/triple_store/sparql/test_utils.py new file mode 100644 index 0000000..073b8f8 --- /dev/null +++ b/test/triple_store/sparql/test_utils.py @@ -0,0 +1,155 @@ +""" + +Part of the bsfs test suite. +A copy of the license is provided with the project. +Author: Matthias Baumgartner, 2022 +""" +# standard imports +import operator +import re +import unittest + +# external imports +import rdflib + +# bsie imports +from bsfs.namespace import ns + +# objects to test +from bsfs.triple_store.sparql.utils import GenHopName, Query + + +## code ## + +class TestGenHopName(unittest.TestCase): + def test_next(self): + # baseline + self.assertEqual(next(GenHopName(prefix='?foo', start=123)), '?foo123') + # respects prefix + self.assertEqual(next(GenHopName(prefix='?bar', start=123)), '?bar123') + # respects start + self.assertEqual(next(GenHopName(prefix='?foo', start=321)), '?foo321') + # counts up + cnt = GenHopName(prefix='?foo', start=998) + self.assertEqual(next(cnt), '?foo998') + self.assertEqual(next(cnt), '?foo999') + self.assertEqual(next(cnt), '?foo1000') + self.assertEqual(next(cnt), '?foo1001') + + def test_essentials(self): + # can get the prefix + self.assertEqual(GenHopName(prefix='?foo', start=123).prefix, '?foo') + # can get the counter + self.assertEqual(GenHopName(prefix='?foo', start=123).curr, 122) + + +class TestQuery(unittest.TestCase): + def setUp(self): + self.root_type = 'http://bsfs.ai/schema/Entity' + self.root_head = '?root' + self.select = (('?head', 'name'), ) + self.where = f'?root <{ns.bse.tag}> ?head' + + def test_essentials(self): + # can access members + q = Query(self.root_type, self.root_head, self.select, self.where) + self.assertEqual(q.root_type, self.root_type) + self.assertEqual(q.root_head, self.root_head) + self.assertEqual(q.select, self.select) + self.assertEqual(q.where, self.where) + # comparison + self.assertEqual(q, Query(self.root_type, self.root_head, self.select, self.where)) + self.assertEqual(hash(q), hash(Query(self.root_type, self.root_head, self.select, self.where))) + # comparison respects root_type + self.assertNotEqual(q, Query('http://bsfs.ai/schema/Tag', self.root_head, self.select, self.where)) + self.assertNotEqual(hash(q), hash(Query('http://bsfs.ai/schema/Tag', self.root_head, self.select, self.where))) + # comparison respects root_head + self.assertNotEqual(q, Query(self.root_type, '?foo', self.select, self.where)) + self.assertNotEqual(hash(q), hash(Query(self.root_type, '?foo', self.select, self.where))) + # comparison respects select + self.assertNotEqual(q, Query(self.root_type, self.root_head, (('?head', 'foo'), ), self.where)) + self.assertNotEqual(hash(q), hash(Query(self.root_type, self.root_head, (('?head', 'foo'), ), self.where))) + # comparison respects where + self.assertNotEqual(q, Query(self.root_type, self.root_head, self.select, '?root bse:filename ?head')) + self.assertNotEqual(hash(q), hash(Query(self.root_type, self.root_head, self.select, '?root bse:filename ?head'))) + # string conversion + self.assertEqual(str(q), q.query) + self.assertEqual(repr(q), "Query(http://bsfs.ai/schema/Entity, ?root, (('?head', 'name'),), ?root ?head)") + + def test_add(self): + q = Query(self.root_type, self.root_head, self.select, self.where) + # can only add a query + self.assertRaises(TypeError, operator.add, q, 1234) + self.assertRaises(TypeError, operator.add, q, 'foobar') + # root type and head must match + self.assertRaises(ValueError, operator.add, q, Query('http://bsfs.ai/schema/Tag', self.root_head)) + self.assertRaises(ValueError, operator.add, q, Query(self.root_type, '?foobar')) + # select and were are combined + combo = q + Query(self.root_type, self.root_head, (('?foo', 'bar'), ), f'?root <{ns.bse.filename}> ?foo') + self.assertEqual(combo.select, (('?head', 'name'), ('?foo', 'bar'))) + self.assertEqual(combo.where, f'?root <{ns.bse.tag}> ?head . ?root <{ns.bse.filename}> ?foo') + # select can be empty + combo = q + Query(self.root_type, self.root_head, None, f'?root <{ns.bse.filename}> ?foo') + self.assertEqual(combo.select, (('?head', 'name'), )) + combo = Query(self.root_type, self.root_head, None, f'?root <{ns.bse.filename}> ?foo') + q + self.assertEqual(combo.select, (('?head', 'name'), )) + combo = Query(self.root_type, self.root_head, None, self.where) + Query(self.root_type, self.root_head, None, f'?root <{ns.bse.filename}> ?foo') + self.assertEqual(combo.select, tuple()) + # where can be empty + combo = q + Query(self.root_type, self.root_head, (('?foo', 'bar'), )) + self.assertEqual(combo.where, self.where) + combo = Query(self.root_type, self.root_head, (('?foo', 'bar'), )) + q + self.assertEqual(combo.where, self.where) + combo = Query(self.root_type, self.root_head, self.select) + Query(self.root_type, self.root_head, (('?foo', 'bar'), )) + self.assertEqual(combo.where, '') + + def test_names(self): + self.assertEqual(Query(self.root_type, self.root_head, (('?head', 'name'), ), self.where).names, + ('name', )) + self.assertEqual(Query(self.root_type, self.root_head, (('?head', 'name'), ('?foo', 'bar')), self.where).names, + ('name', 'bar')) + + def test_query(self): + def normalize(value): + value = value.strip() + value = value.lower() + value = value.replace(r'\n', ' ') + value, _ = re.subn('\s\s+', ' ', value) + return value + # query composes a valid query + q = Query(self.root_type, self.root_head, self.select, self.where) + self.assertEqual(normalize(q.query), normalize(f'select ?root (?head as ?name) where {{ ?root <{ns.rdf.type}>/<{ns.rdfs.subClassOf}>* . ?root <{ns.bse.tag}> ?head }}')) + # select and where are optional + q = Query(self.root_type, self.root_head) + self.assertEqual(normalize(q.query), normalize(f'select ?root where {{ ?root <{ns.rdf.type}>/<{ns.rdfs.subClassOf}>* . }}')) + # select and where need not to correspond + q = Query(self.root_type, self.root_head, (('?head', 'name'), )) + self.assertEqual(normalize(q.query), normalize(f'select ?root (?head as ?name) where {{ ?root <{ns.rdf.type}>/<{ns.rdfs.subClassOf}>* . }}')) + # query is used for string representation + self.assertEqual(str(q), q.query) + + def test_call(self): + graph = rdflib.Graph() + # schema + graph.add((rdflib.URIRef('http://bsfs.ai/schema/Document'), rdflib.URIRef(ns.rdfs.subClassOf), rdflib.URIRef('http://bsfs.ai/schema/Entity'))) + # nodes + graph.add((rdflib.URIRef('http://example.com/entity#1234'), rdflib.URIRef(ns.rdf.type), rdflib.URIRef('http://bsfs.ai/schema/Entity'))) + graph.add((rdflib.URIRef('http://example.com/doc#1234'), rdflib.URIRef(ns.rdf.type), rdflib.URIRef('http://bsfs.ai/schema/Document'))) + # links + graph.add((rdflib.URIRef('http://example.com/entity#1234'), rdflib.URIRef(ns.bse.tag), rdflib.Literal('tag#1234', datatype=rdflib.XSD.string))) + graph.add((rdflib.URIRef('http://example.com/doc#1234'), rdflib.URIRef(ns.bse.tag), rdflib.Literal('tag#1234', datatype=rdflib.XSD.string))) + # run query on a given graph + query = Query(self.root_type, self.root_head, self.select, self.where) + self.assertSetEqual(set(query(graph)), { + (rdflib.URIRef('http://example.com/entity#1234'), rdflib.Literal('tag#1234', datatype=rdflib.XSD.string)), + (rdflib.URIRef('http://example.com/doc#1234'), rdflib.Literal('tag#1234', datatype=rdflib.XSD.string)), + }) + # query actually considers the passed graph + self.assertSetEqual(set(query(rdflib.Graph())), set()) + +## main ## + +if __name__ == '__main__': + unittest.main() + +## EOF ## diff --git a/test/triple_store/test_base.py b/test/triple_store/test_base.py index a0c3260..56a2539 100644 --- a/test/triple_store/test_base.py +++ b/test/triple_store/test_base.py @@ -38,6 +38,9 @@ class DummyBase(TripleStoreBase): def get(self, node_type, query): pass + def fetch(self, node_type, filter, fetch): + pass + def exists(self, node_type, guids): pass -- cgit v1.2.3 From c196d2ce73d8351a18c19bcddd4b06d224e644fc Mon Sep 17 00:00:00 2001 From: Matthias Baumgartner Date: Sat, 21 Jan 2023 18:27:22 +0100 Subject: Fetch in graph including results view --- .pylintrc | 6 +- bsfs/graph/ac/base.py | 4 + bsfs/graph/ac/null.py | 4 + bsfs/graph/nodes.py | 137 ++++++++++++++-- bsfs/graph/result.py | 112 +++++++++++++ test/graph/test_nodes.py | 95 ++++++++++- test/graph/test_result.py | 392 ++++++++++++++++++++++++++++++++++++++++++++++ 7 files changed, 734 insertions(+), 16 deletions(-) create mode 100644 bsfs/graph/result.py create mode 100644 test/graph/test_result.py diff --git a/.pylintrc b/.pylintrc index bcb2a86..6b7f471 100644 --- a/.pylintrc +++ b/.pylintrc @@ -76,10 +76,10 @@ max-attributes=7 max-bool-expr=5 # Maximum number of branch for function / method body. -max-branches=15 +max-branches=20 # Maximum number of locals for function / method body. -max-locals=15 +max-locals=20 # Maximum number of parents for a class (see R0901). max-parents=7 @@ -91,7 +91,7 @@ max-public-methods=20 max-returns=15 # Maximum number of statements in function / method body. -max-statements=50 +max-statements=100 # Minimum number of public methods for a class (see R0903). min-public-methods=1 diff --git a/bsfs/graph/ac/base.py b/bsfs/graph/ac/base.py index 0703e2e..79b09e5 100644 --- a/bsfs/graph/ac/base.py +++ b/bsfs/graph/ac/base.py @@ -72,4 +72,8 @@ class AccessControlBase(abc.ABC): def filter_read(self, node_type: schema.Node, query: ast.filter.FilterExpression) -> ast.filter.FilterExpression: """Re-write a filter *query* to get (i.e., read) *node_type* nodes.""" + @abc.abstractmethod + def fetch_read(self, node_type: schema.Node, query: ast.fetch.FetchExpression) -> ast.fetch.FetchExpression: + """Re-write a fetch *query* to get (i.e, read) values for *node_type* nodes.""" + ## EOF ## diff --git a/bsfs/graph/ac/null.py b/bsfs/graph/ac/null.py index 12b4e87..6a923a5 100644 --- a/bsfs/graph/ac/null.py +++ b/bsfs/graph/ac/null.py @@ -54,4 +54,8 @@ class NullAC(base.AccessControlBase): """Re-write a filter *query* to get (i.e., read) *node_type* nodes.""" return query + def fetch_read(self, node_type: schema.Node, query: ast.fetch.FetchExpression) -> ast.fetch.FetchExpression: + """Re-write a fetch *query* to get (i.e, read) values for *node_type* nodes.""" + return query + ## EOF ## diff --git a/bsfs/graph/nodes.py b/bsfs/graph/nodes.py index 5a93f77..a4ba45f 100644 --- a/bsfs/graph/nodes.py +++ b/bsfs/graph/nodes.py @@ -5,17 +5,20 @@ A copy of the license is provided with the project. Author: Matthias Baumgartner, 2022 """ # imports +from collections import abc import time import typing # bsfs imports -from bsfs import schema as _schema +from bsfs import schema as bsc from bsfs.namespace import ns +from bsfs.query import ast, validate from bsfs.triple_store import TripleStoreBase from bsfs.utils import errors, URI, typename # inner-module imports from . import ac +from . import result # exports __all__: typing.Sequence[str] = ( @@ -37,7 +40,7 @@ class Nodes(): _user: URI # node type. - _node_type: _schema.Node + _node_type: bsc.Node # guids of nodes. Can be empty. _guids: typing.Set[URI] @@ -46,13 +49,16 @@ class Nodes(): self, backend: TripleStoreBase, user: URI, - node_type: _schema.Node, + node_type: bsc.Node, guids: typing.Iterable[URI], ): + # set main members self._backend = backend self._user = user self._node_type = node_type self._guids = set(guids) + # create helper instances + # FIXME: Assumes that the schema does not change while the instance is in use! self._ac = ac.NullAC(self._backend, self._user) def __eq__(self, other: typing.Any) -> bool: @@ -72,7 +78,7 @@ class Nodes(): return f'{typename(self)}({self._node_type}, {self._guids})' @property - def node_type(self) -> _schema.Node: + def node_type(self) -> bsc.Node: """Return the node's type.""" return self._node_type @@ -83,7 +89,7 @@ class Nodes(): def set( self, - pred: URI, # FIXME: URI or _schema.Predicate? + pred: URI, # FIXME: URI or bsc.Predicate? value: typing.Any, ) -> 'Nodes': """Set predicate *pred* to *value*.""" @@ -91,7 +97,7 @@ class Nodes(): def set_from_iterable( self, - predicate_values: typing.Iterable[typing.Tuple[URI, typing.Any]], # FIXME: URI or _schema.Predicate? + predicate_values: typing.Iterable[typing.Tuple[URI, typing.Any]], # FIXME: URI or bsc.Predicate? ) -> 'Nodes': """Set mutliple predicate-value pairs at once.""" # TODO: Could group predicate_values by predicate to gain some efficiency @@ -120,6 +126,119 @@ class Nodes(): return self + def get( + self, + *paths: typing.Union[URI, typing.Iterable[URI]], + view: typing.Union[typing.Type[list], typing.Type[dict]] = dict, + **view_kwargs, + ) -> typing.Any: + """Get values or nodes at *paths*. + Return an iterator (view=list) or a dict (view=dict) over the results. + """ + # check args + if len(paths) == 0: + raise AttributeError('expected at least one path, found none') + if view not in (dict, list): + raise ValueError(f'expected dict or list, found {view}') + # process paths: create fetch ast, build name mapping, and find unique paths + schema = self._backend.schema + statements = set() + name2path = {} + unique_paths = set() # paths that result in a single (unique) value + normpath: typing.Tuple[URI, ...] + for idx, path in enumerate(paths): + # normalize path + if isinstance(path, str): + normpath = (URI(path), ) + elif isinstance(path, abc.Iterable): + if not all(isinstance(step, str) for step in path): + raise TypeError(path) + normpath = tuple(URI(step) for step in path) + else: + raise TypeError(path) + # check path's schema consistency + if not all(schema.has_predicate(pred) for pred in normpath): + raise errors.ConsistencyError(f'path is not fully covered by the schema: {path}') + # check path's uniqueness + if all(schema.predicate(pred).unique for pred in normpath): + unique_paths.add(path) + # fetch tail predicate + tail = schema.predicate(normpath[-1]) + # determine tail ast node type + factory = ast.fetch.Node if isinstance(tail.range, bsc.Node) else ast.fetch.Value + # assign name + name = f'fetch{idx}' + name2path[name] = (path, tail) + # create tail ast node + curr: ast.fetch.FetchExpression = factory(tail.uri, name) + # walk towards front + hop: URI + for hop in normpath[-2::-1]: + curr = ast.fetch.Fetch(hop, curr) + # add to fetch query + statements.add(curr) + # aggregate fetch statements + if len(statements) == 1: + fetch = next(iter(statements)) + else: + fetch = ast.fetch.All(*statements) + # add access controls to fetch + fetch = self._ac.fetch_read(self.node_type, fetch) + + # compose filter ast + filter = ast.filter.IsIn(self.guids) # pylint: disable=redefined-builtin + # add access controls to filter + filter = self._ac.filter_read(self.node_type, filter) + + # validate queries + validate.Filter(self._backend.schema)(self.node_type, filter) + validate.Fetch(self._backend.schema)(self.node_type, fetch) + + # process results, convert if need be + def triple_iter(): + # query the backend + triples = self._backend.fetch(self.node_type, filter, fetch) + # process triples + for root, name, raw in triples: + # get node + node = Nodes(self._backend, self._user, self.node_type, {root}) + # get path + path, tail = name2path[name] + # covert raw to value + if isinstance(tail.range, bsc.Node): + value = Nodes(self._backend, self._user, tail.range, {raw}) + else: + value = raw + # emit triple + yield node, path, value + + # simplify by default + view_kwargs['node'] = view_kwargs.get('node', len(self._guids) == 1) + view_kwargs['path'] = view_kwargs.get('path', len(paths) == 1) + view_kwargs['value'] = view_kwargs.get('value', True) + + # return results view + if view == list: + return result.to_list_view( + triple_iter(), + # aggregation args + **view_kwargs, + ) + + if view == dict: + return result.to_dict_view( + triple_iter(), + # context + len(self._guids) == 1, + len(paths) == 1, + unique_paths, + # aggregation args + **view_kwargs, + ) + + raise errors.UnreachableError() # view was already checked + + def __set(self, predicate: URI, value: typing.Any): """ """ @@ -145,7 +264,7 @@ class Nodes(): guids = set(self._ensure_nodes(node_type, guids)) # check value - if isinstance(pred.range, _schema.Literal): + if isinstance(pred.range, bsc.Literal): # check write permissions on existing nodes # As long as the user has write permissions, we don't restrict # the creation or modification of literal values. @@ -160,7 +279,7 @@ class Nodes(): [value], ) - elif isinstance(pred.range, _schema.Node): + elif isinstance(pred.range, bsc.Node): # check value type if not isinstance(value, Nodes): raise TypeError(value) @@ -192,7 +311,7 @@ class Nodes(): else: raise errors.UnreachableError() - def _ensure_nodes(self, node_type: _schema.Node, guids: typing.Iterable[URI]): + def _ensure_nodes(self, node_type: bsc.Node, guids: typing.Iterable[URI]): """ """ # check node existence diff --git a/bsfs/graph/result.py b/bsfs/graph/result.py new file mode 100644 index 0000000..3009801 --- /dev/null +++ b/bsfs/graph/result.py @@ -0,0 +1,112 @@ +""" + +Part of the BlackStar filesystem (bsfs) module. +A copy of the license is provided with the project. +Author: Matthias Baumgartner, 2022 +""" +# imports +from collections import defaultdict +import typing + +# bsfs imports +from bsfs.utils import URI + +# exports +__all__: typing.Sequence[str] = ( + 'to_list_view', + 'to_dict_view', + ) + + +## code ## + +def to_list_view( + triples, + # aggregators + node: bool, + path: bool, + value: bool, # pylint: disable=unused-argument + ): + """Return an iterator over results. + + Dependent on the *node*, *path*, and *value* flags, + the respective component is omitted. + + """ + if node and path: + return iter(val for _, _, val in triples) + if node: + return iter((pred, val) for _, pred, val in triples) + if path: + return iter((subj, val) for subj, _, val in triples) + return iter((subj, pred, val) for subj, pred, val in triples) + + +def to_dict_view( + triples, + # context + one_node: bool, + one_path: bool, + unique_paths: typing.Set[typing.Union[URI, typing.Iterable[URI]]], + # aggregators + node: bool, + path: bool, + value: bool, + ) -> typing.Any: + """Return a dict of results. + + Note that triples are materialized to create this view. + + The returned structure depends on the *node*, *path*, and *value* flags. + If all flags are set to False, returns a dict(node -> dict(path -> set(values))). + Setting a flag to true omits or simplifies the respective component (if possible). + + """ + # NOTE: To create a dict, we need to materialize or make further assumptions + # (e.g., sorted in a specific order). + + data: typing.Any # disable type checks on data since it's very flexibly typed. + + # FIXME: type of data can be overwritten later on (if value) + + if node and path: + data = set() + elif node ^ path: + data = defaultdict(set) + else: + data = defaultdict(lambda: defaultdict(set)) + + for subj, pred, val in triples: + unique = pred in unique_paths + if node and path: + if value and unique and one_node and one_path: + return val + data.add(val) + elif node: + # remove node from result, group by predicate + if value and unique and one_node: + data[pred] = val + else: + data[pred].add(val) + elif path: + # remove predicate from result, group by node + if value and unique and one_path: + data[subj] = val + else: + data[subj].add(val) + else: + if value and unique: + data[subj][pred] = val + else: + data[subj][pred].add(val) + + # FIXME: Combine multiple Nodes instances into one? + + # convert defaultdict to ordinary dict + if node and path: + return data + if node ^ path: + return dict(data) + return {key: dict(val) for key, val in data.items()} + +## EOF ## diff --git a/test/graph/test_nodes.py b/test/graph/test_nodes.py index 2870f35..a4e07ee 100644 --- a/test/graph/test_nodes.py +++ b/test/graph/test_nodes.py @@ -9,8 +9,8 @@ import rdflib import unittest # bsie imports -from bsfs import schema as _schema -from bsfs.namespace import ns +from bsfs import schema as bsc +from bsfs.namespace import Namespace, ns from bsfs.triple_store.sparql import SparqlStore from bsfs.utils import errors, URI @@ -20,11 +20,13 @@ from bsfs.graph.nodes import Nodes ## code ## +bst = Namespace('http://bsfs.ai/schema/Tag') + class TestNodes(unittest.TestCase): def setUp(self): # initialize backend self.backend = SparqlStore() - self.backend.schema = _schema.from_string(''' + self.backend.schema = bsc.from_string(''' prefix rdfs: prefix xsd: @@ -67,6 +69,11 @@ class TestNodes(unittest.TestCase): rdfs:range bsfs:User ; bsfs:unique "true"^^xsd:boolean . + bst:label rdfs:subClassOf bsfs:Predicate ; + rdfs:domain bsfs:Tag ; + rdfs:range xsd:string ; + bsfs:unique "true"^^xsd:boolean . + bst:representative rdfs:subClassOf bsfs:Predicate ; rdfs:domain bsfs:Tag ; rdfs:range bsfs:Entity ; @@ -89,7 +96,8 @@ class TestNodes(unittest.TestCase): (rdflib.URIRef(ns.bse.filesize), rdflib.RDFS.subClassOf, rdflib.URIRef(ns.bsfs.Predicate)), (rdflib.URIRef(ns.bse.tag), rdflib.RDFS.subClassOf, rdflib.URIRef(ns.bsfs.Predicate)), (rdflib.URIRef(ns.bse.author), rdflib.RDFS.subClassOf, rdflib.URIRef(ns.bsfs.Predicate)), - (rdflib.URIRef('http://bsfs.ai/schema/Tag#representative'), rdflib.RDFS.subClassOf, rdflib.URIRef(ns.bsfs.Predicate)), + (rdflib.URIRef(bst.representative), rdflib.RDFS.subClassOf, rdflib.URIRef(ns.bsfs.Predicate)), + (rdflib.URIRef(bst.label), rdflib.RDFS.subClassOf, rdflib.URIRef(ns.bsfs.Predicate)), } # Nodes constructor args self.user = URI('http://example.com/me') @@ -371,6 +379,85 @@ class TestNodes(unittest.TestCase): (self.p_author.uri, Nodes(self.backend, self.user, self.user_type, {URI('http://example.com/me/user#1234'), URI('http://example.com/me/user#4321')})))) self.assertSetEqual(curr, set(self.backend._graph)) + def test_fetch(self): + # setup: add some instances + Nodes(self.backend, self.user, self.ent_type, {'http://example.com/me/entity#1234'}) \ + .set(ns.bse.comment, 'hello world') \ + .set(ns.bse.filesize, 1234) \ + .set(ns.bse.tag, Nodes(self.backend, self.user, self.tag_type, {'http://example.com/me/tag#1234'})) + Nodes(self.backend, self.user, self.ent_type, {'http://example.com/me/entity#4321'}) \ + .set(ns.bse.filesize, 4321) \ + .set(ns.bse.tag, Nodes(self.backend, self.user, self.tag_type, {'http://example.com/me/tag#4321'})) + Nodes(self.backend, self.user, self.tag_type, {'http://example.com/me/tag#1234'}) \ + .set(bst.label, 'tag_label_1234') + Nodes(self.backend, self.user, self.tag_type, {'http://example.com/me/tag#4321'}) \ + .set(bst.label, 'tag_label_4321') + # setup: get nodes instance + nodes = Nodes(self.backend, self.user, self.ent_type, self.ent_ids) + # must pass at least one path + self.assertRaises(AttributeError, nodes.get) + # view must be list or dict + self.assertRaises(ValueError, nodes.get, ns.bse.filesize, view='hello') + self.assertRaises(ValueError, nodes.get, ns.bse.filesize, view=1234) + self.assertRaises(ValueError, nodes.get, ns.bse.filesize, view=tuple) + # can pass path as URI + self.assertDictEqual(nodes.get(ns.bse.filesize), { + Nodes(self.backend, self.user, self.ent_type, {'http://example.com/me/entity#1234'}): 1234, + Nodes(self.backend, self.user, self.ent_type, {'http://example.com/me/entity#4321'}): 4321, + }) + # can pass path as sequence of URI + self.assertDictEqual(nodes.get((ns.bse.tag, bst.label)), { + Nodes(self.backend, self.user, self.ent_type, {'http://example.com/me/entity#1234'}): {'tag_label_1234'}, + Nodes(self.backend, self.user, self.ent_type, {'http://example.com/me/entity#4321'}): {'tag_label_4321'}, + }) + # get returns the same path that was passed + self.assertCountEqual(list(nodes.get((ns.bse.tag, bst.label), path=False, view=list)), [ + (Nodes(self.backend, self.user, self.ent_type, {'http://example.com/me/entity#1234'}), (ns.bse.tag, bst.label), 'tag_label_1234'), + (Nodes(self.backend, self.user, self.ent_type, {'http://example.com/me/entity#4321'}), (ns.bse.tag, bst.label), 'tag_label_4321'), + ]) + self.assertCountEqual(list(nodes.get([ns.bse.tag, bst.label], path=False, view=list)), [ + (Nodes(self.backend, self.user, self.ent_type, {'http://example.com/me/entity#1234'}), [ns.bse.tag, bst.label], 'tag_label_1234'), + (Nodes(self.backend, self.user, self.ent_type, {'http://example.com/me/entity#4321'}), [ns.bse.tag, bst.label], 'tag_label_4321'), + ]) + # paths must be URI or sequence thereof + self.assertRaises(TypeError, nodes.get, 1234) + self.assertRaises(TypeError, nodes.get, (ns.bse.tag, 1234)) + self.assertRaises(TypeError, nodes.get, (1234, ns.bse.tag)) + self.assertRaises(errors.ConsistencyError, nodes.get, 'hello world') + self.assertRaises(errors.ConsistencyError, nodes.get, ns.bse.invalid) + self.assertRaises(errors.ConsistencyError, nodes.get, (ns.bse.tag, bst.invalid)) + # can pass multiple paths + self.assertDictEqual(nodes.get(ns.bse.filesize, (ns.bse.tag, bst.label)), { + Nodes(self.backend, self.user, self.ent_type, {'http://example.com/me/entity#1234'}): { + ns.bse.filesize: 1234, + (ns.bse.tag, bst.label): {'tag_label_1234'}, + }, + Nodes(self.backend, self.user, self.ent_type, {'http://example.com/me/entity#4321'}): { + ns.bse.filesize: 4321, + (ns.bse.tag, bst.label): {'tag_label_4321'}, + }, + }) + # get respects view + self.assertDictEqual(nodes.get(ns.bse.filesize, view=dict), { + Nodes(self.backend, self.user, self.ent_type, {'http://example.com/me/entity#1234'}): 1234, + Nodes(self.backend, self.user, self.ent_type, {'http://example.com/me/entity#4321'}): 4321, + }) + self.assertSetEqual(set(nodes.get(ns.bse.filesize, view=list)), { + (Nodes(self.backend, self.user, self.ent_type, {'http://example.com/me/entity#1234'}), 1234), + (Nodes(self.backend, self.user, self.ent_type, {'http://example.com/me/entity#4321'}), 4321), + }) + # get returns Nodes instance when fetching a node + self.assertDictEqual(nodes.get(ns.bse.tag), { + Nodes(self.backend, self.user, self.ent_type, {'http://example.com/me/entity#1234'}): + {Nodes(self.backend, self.user, self.tag_type, {'http://example.com/me/tag#1234'})}, + Nodes(self.backend, self.user, self.ent_type, {'http://example.com/me/entity#4321'}): + {Nodes(self.backend, self.user, self.tag_type, {'http://example.com/me/tag#4321'})}, + }) + # get returns a value when fetching a value and omits missing values + self.assertDictEqual(nodes.get(ns.bse.comment), { + Nodes(self.backend, self.user, self.ent_type, {'http://example.com/me/entity#1234'}): {'hello world'}, + }) + ## main ## diff --git a/test/graph/test_result.py b/test/graph/test_result.py new file mode 100644 index 0000000..89b0da1 --- /dev/null +++ b/test/graph/test_result.py @@ -0,0 +1,392 @@ +""" + +Part of the bsfs test suite. +A copy of the license is provided with the project. +Author: Matthias Baumgartner, 2022 +""" +# imports +import unittest + +# bsie imports +from bsfs import schema as bsc +from bsfs.namespace import ns +from bsfs.utils import URI + +# objects to test +from bsfs.graph.result import to_list_view, to_dict_view + + +## code ## + +class TestListView(unittest.TestCase): + def setUp(self): + self.triples_111 = [('ent#1234', ns.bse.iso, 123)] + self.triples_11U = [('ent#1234', ns.bse.tag, 'tag#1234'), + ('ent#1234', ns.bse.tag, 'tag#5678')] + self.triples_1M1 = [('ent#1234', ns.bse.iso, 123), + ('ent#1234', ns.bse.t_created, '2010-01-02')] + self.triples_1MU = [('ent#1234', ns.bse.iso, 123), + ('ent#1234', ns.bse.tag, 'tag#1234'), + ('ent#1234', ns.bse.tag, 'tag#5678')] + self.triples_N11 = [('ent#1234', ns.bse.iso, 123), + ('ent#4321', ns.bse.iso, 321)] + self.triples_N1U = [('ent#1234', ns.bse.tag, 'tag#1234'), + ('ent#1234', ns.bse.tag, 'tag#5678'), + ('ent#4321', ns.bse.tag, 'tag#4321')] + self.triples_NM1 = [('ent#1234', ns.bse.iso, 123), + ('ent#1234', ns.bse.t_created, '2010-01-02'), + ('ent#4321', ns.bse.iso, 321), + ('ent#4321', ns.bse.t_created, '2022-02-22')] + self.triples_NMU = [('ent#1234', ns.bse.iso, 123), + ('ent#1234', ns.bse.tag, 'tag#1234'), + ('ent#1234', ns.bse.tag, 'tag#5678'), + ('ent#4321', ns.bse.iso, 321), + ('ent#4321', ns.bse.t_created, '2022-02-22')] + + def test_copy(self): + # iterator yields tuples + self.assertIsInstance(list(to_list_view([('subject', 'predicate', 'object')], node=False, path=False, value=False))[0], tuple) + # components are not changed + class Foo(): pass + foo = Foo() + self.assertListEqual(list(to_list_view([('subject', 'predicate', 'object')], node=False, path=False, value=False)), + [('subject', 'predicate', 'object')]) + self.assertListEqual(list(to_list_view([(foo, 'predicate', 'object')], node=False, path=False, value=False)), + [(foo, 'predicate', 'object')]) + self.assertListEqual(list(to_list_view([('subject', foo, 'object')], node=False, path=False, value=False)), + [('subject', foo, 'object')]) + self.assertListEqual(list(to_list_view([('subject', 'predicate', foo)], node=False, path=False, value=False)), + [('subject', 'predicate', foo)]) + + def test_agg_none(self): + self.assertListEqual(list(to_list_view(self.triples_111, node=False, path=False, value=False)), self.triples_111) + self.assertListEqual(list(to_list_view(self.triples_11U, node=False, path=False, value=False)), self.triples_11U) + self.assertListEqual(list(to_list_view(self.triples_1M1, node=False, path=False, value=False)), self.triples_1M1) + self.assertListEqual(list(to_list_view(self.triples_1MU, node=False, path=False, value=False)), self.triples_1MU) + self.assertListEqual(list(to_list_view(self.triples_N11, node=False, path=False, value=False)), self.triples_N11) + self.assertListEqual(list(to_list_view(self.triples_N1U, node=False, path=False, value=False)), self.triples_N1U) + self.assertListEqual(list(to_list_view(self.triples_NM1, node=False, path=False, value=False)), self.triples_NM1) + self.assertListEqual(list(to_list_view(self.triples_NMU, node=False, path=False, value=False)), self.triples_NMU) + + def test_agg_node(self): + self.assertListEqual(list(to_list_view(self.triples_111, node=True, path=False, value=False)), + [(ns.bse.iso, 123)]) + self.assertListEqual(list(to_list_view(self.triples_11U, node=True, path=False, value=False)), + [(ns.bse.tag, 'tag#1234'), (ns.bse.tag, 'tag#5678')]) + self.assertListEqual(list(to_list_view(self.triples_1M1, node=True, path=False, value=False)), + [(ns.bse.iso, 123), (ns.bse.t_created, '2010-01-02')]) + self.assertListEqual(list(to_list_view(self.triples_1MU, node=True, path=False, value=False)), + [(ns.bse.iso, 123), (ns.bse.tag, 'tag#1234'), (ns.bse.tag, 'tag#5678')]) + self.assertListEqual(list(to_list_view(self.triples_N11, node=True, path=False, value=False)), + [(ns.bse.iso, 123), (ns.bse.iso, 321)]) + self.assertListEqual(list(to_list_view(self.triples_N1U, node=True, path=False, value=False)), + [(ns.bse.tag, 'tag#1234'), (ns.bse.tag, 'tag#5678'), (ns.bse.tag, 'tag#4321')]) + self.assertListEqual(list(to_list_view(self.triples_NM1, node=True, path=False, value=False)), + [(ns.bse.iso, 123), (ns.bse.t_created, '2010-01-02'), (ns.bse.iso, 321), (ns.bse.t_created, '2022-02-22')]) + self.assertListEqual(list(to_list_view(self.triples_NMU, node=True, path=False, value=False)), + [(ns.bse.iso, 123), (ns.bse.tag, 'tag#1234'), (ns.bse.tag, 'tag#5678'), (ns.bse.iso, 321), (ns.bse.t_created, '2022-02-22')]) + + def test_agg_path(self): + self.assertListEqual(list(to_list_view(self.triples_111, node=False, path=True, value=False)), + [('ent#1234', 123)]) + self.assertListEqual(list(to_list_view(self.triples_11U, node=False, path=True, value=False)), + [('ent#1234', 'tag#1234'), ('ent#1234', 'tag#5678')]) + self.assertListEqual(list(to_list_view(self.triples_1M1, node=False, path=True, value=False)), + [('ent#1234', 123), ('ent#1234', '2010-01-02')]) + self.assertListEqual(list(to_list_view(self.triples_1MU, node=False, path=True, value=False)), + [('ent#1234', 123), ('ent#1234', 'tag#1234'), ('ent#1234', 'tag#5678')]) + self.assertListEqual(list(to_list_view(self.triples_N11, node=False, path=True, value=False)), + [('ent#1234', 123), ('ent#4321', 321)]) + self.assertListEqual(list(to_list_view(self.triples_N1U, node=False, path=True, value=False)), + [('ent#1234', 'tag#1234'), ('ent#1234', 'tag#5678'), ('ent#4321', 'tag#4321')]) + self.assertListEqual(list(to_list_view(self.triples_NM1, node=False, path=True, value=False)), + [('ent#1234', 123), ('ent#1234', '2010-01-02'), ('ent#4321', 321), ('ent#4321', '2022-02-22')]) + self.assertListEqual(list(to_list_view(self.triples_NMU, node=False, path=True, value=False)), + [('ent#1234', 123), ('ent#1234', 'tag#1234'), ('ent#1234', 'tag#5678'), ('ent#4321', 321), ('ent#4321', '2022-02-22')]) + + def test_agg_node_path(self): + self.assertListEqual(list(to_list_view(self.triples_111, node=True, path=True, value=False)), + [123]) + self.assertListEqual(list(to_list_view(self.triples_11U, node=True, path=True, value=False)), + ['tag#1234', 'tag#5678']) + self.assertListEqual(list(to_list_view(self.triples_1M1, node=True, path=True, value=False)), + [123, '2010-01-02']) + self.assertListEqual(list(to_list_view(self.triples_1MU, node=True, path=True, value=False)), + [123, 'tag#1234', 'tag#5678']) + self.assertListEqual(list(to_list_view(self.triples_N11, node=True, path=True, value=False)), + [123, 321]) + self.assertListEqual(list(to_list_view(self.triples_N1U, node=True, path=True, value=False)), + ['tag#1234', 'tag#5678', 'tag#4321']) + self.assertListEqual(list(to_list_view(self.triples_NM1, node=True, path=True, value=False)), + [123, '2010-01-02', 321, '2022-02-22']) + self.assertListEqual(list(to_list_view(self.triples_NMU, node=True, path=True, value=False)), + [123, 'tag#1234', 'tag#5678', 321, '2022-02-22']) + + def test_agg_value(self): + # value flag has no effect + self.assertListEqual(list(to_list_view(self.triples_111, node=False, path=False, value=False)), self.triples_111) + self.assertListEqual(list(to_list_view(self.triples_11U, node=False, path=False, value=False)), self.triples_11U) + self.assertListEqual(list(to_list_view(self.triples_1M1, node=False, path=False, value=False)), self.triples_1M1) + self.assertListEqual(list(to_list_view(self.triples_1MU, node=False, path=False, value=False)), self.triples_1MU) + self.assertListEqual(list(to_list_view(self.triples_N11, node=False, path=False, value=False)), self.triples_N11) + self.assertListEqual(list(to_list_view(self.triples_N1U, node=False, path=False, value=False)), self.triples_N1U) + self.assertListEqual(list(to_list_view(self.triples_NM1, node=False, path=False, value=False)), self.triples_NM1) + self.assertListEqual(list(to_list_view(self.triples_NMU, node=False, path=False, value=False)), self.triples_NMU) + + def test_agg_node_value(self): + # value flag has no effect -> same test as test_agg_node + self.assertListEqual(list(to_list_view(self.triples_111, node=True, path=False, value=True)), + [(ns.bse.iso, 123)]) + self.assertListEqual(list(to_list_view(self.triples_11U, node=True, path=False, value=True)), + [(ns.bse.tag, 'tag#1234'), (ns.bse.tag, 'tag#5678')]) + self.assertListEqual(list(to_list_view(self.triples_1M1, node=True, path=False, value=True)), + [(ns.bse.iso, 123), (ns.bse.t_created, '2010-01-02')]) + self.assertListEqual(list(to_list_view(self.triples_1MU, node=True, path=False, value=True)), + [(ns.bse.iso, 123), (ns.bse.tag, 'tag#1234'), (ns.bse.tag, 'tag#5678')]) + self.assertListEqual(list(to_list_view(self.triples_N11, node=True, path=False, value=True)), + [(ns.bse.iso, 123), (ns.bse.iso, 321)]) + self.assertListEqual(list(to_list_view(self.triples_N1U, node=True, path=False, value=True)), + [(ns.bse.tag, 'tag#1234'), (ns.bse.tag, 'tag#5678'), (ns.bse.tag, 'tag#4321')]) + self.assertListEqual(list(to_list_view(self.triples_NM1, node=True, path=False, value=True)), + [(ns.bse.iso, 123), (ns.bse.t_created, '2010-01-02'), (ns.bse.iso, 321), (ns.bse.t_created, '2022-02-22')]) + self.assertListEqual(list(to_list_view(self.triples_NMU, node=True, path=False, value=True)), + [(ns.bse.iso, 123), (ns.bse.tag, 'tag#1234'), (ns.bse.tag, 'tag#5678'), (ns.bse.iso, 321), (ns.bse.t_created, '2022-02-22')]) + + def test_agg_path_value(self): + # value flag has no effect -> same test as test_agg_path + self.assertListEqual(list(to_list_view(self.triples_111, node=False, path=True, value=True)), + [('ent#1234', 123)]) + self.assertListEqual(list(to_list_view(self.triples_11U, node=False, path=True, value=True)), + [('ent#1234', 'tag#1234'), ('ent#1234', 'tag#5678')]) + self.assertListEqual(list(to_list_view(self.triples_1M1, node=False, path=True, value=True)), + [('ent#1234', 123), ('ent#1234', '2010-01-02')]) + self.assertListEqual(list(to_list_view(self.triples_1MU, node=False, path=True, value=True)), + [('ent#1234', 123), ('ent#1234', 'tag#1234'), ('ent#1234', 'tag#5678')]) + self.assertListEqual(list(to_list_view(self.triples_N11, node=False, path=True, value=True)), + [('ent#1234', 123), ('ent#4321', 321)]) + self.assertListEqual(list(to_list_view(self.triples_N1U, node=False, path=True, value=True)), + [('ent#1234', 'tag#1234'), ('ent#1234', 'tag#5678'), ('ent#4321', 'tag#4321')]) + self.assertListEqual(list(to_list_view(self.triples_NM1, node=False, path=True, value=True)), + [('ent#1234', 123), ('ent#1234', '2010-01-02'), ('ent#4321', 321), ('ent#4321', '2022-02-22')]) + self.assertListEqual(list(to_list_view(self.triples_NMU, node=False, path=True, value=True)), + [('ent#1234', 123), ('ent#1234', 'tag#1234'), ('ent#1234', 'tag#5678'), ('ent#4321', 321), ('ent#4321', '2022-02-22')]) + + def test_agg_all(self): + # value flag has no effect -> same test as test_agg_node_path + self.assertListEqual(list(to_list_view(self.triples_111, node=True, path=True, value=True)), + [123]) + self.assertListEqual(list(to_list_view(self.triples_11U, node=True, path=True, value=True)), + ['tag#1234', 'tag#5678']) + self.assertListEqual(list(to_list_view(self.triples_1M1, node=True, path=True, value=True)), + [123, '2010-01-02']) + self.assertListEqual(list(to_list_view(self.triples_1MU, node=True, path=True, value=True)), + [123, 'tag#1234', 'tag#5678']) + self.assertListEqual(list(to_list_view(self.triples_N11, node=True, path=True, value=True)), + [123, 321]) + self.assertListEqual(list(to_list_view(self.triples_N1U, node=True, path=True, value=True)), + ['tag#1234', 'tag#5678', 'tag#4321']) + self.assertListEqual(list(to_list_view(self.triples_NM1, node=True, path=True, value=True)), + [123, '2010-01-02', 321, '2022-02-22']) + self.assertListEqual(list(to_list_view(self.triples_NMU, node=True, path=True, value=True)), + [123, 'tag#1234', 'tag#5678', 321, '2022-02-22']) + + +class TestDictView(unittest.TestCase): + def setUp(self): + self.unique_paths = {ns.bse.iso, ns.bse.t_created} + self.triples_111 = [('ent#1234', ns.bse.iso, 123)] + self.triples_11U = [('ent#1234', ns.bse.tag, 'tag#1234'), + ('ent#1234', ns.bse.tag, 'tag#5678')] + self.triples_1M1 = [('ent#1234', ns.bse.iso, 123), + ('ent#1234', ns.bse.t_created, '2010-01-02')] + self.triples_1MU = [('ent#1234', ns.bse.iso, 123), + ('ent#1234', ns.bse.tag, 'tag#1234'), + ('ent#1234', ns.bse.tag, 'tag#5678')] + self.triples_N11 = [('ent#1234', ns.bse.iso, 123), + ('ent#4321', ns.bse.iso, 321)] + self.triples_N1U = [('ent#1234', ns.bse.tag, 'tag#1234'), + ('ent#1234', ns.bse.tag, 'tag#5678'), + ('ent#4321', ns.bse.tag, 'tag#4321')] + self.triples_NM1 = [('ent#1234', ns.bse.iso, 123), + ('ent#1234', ns.bse.t_created, '2010-01-02'), + ('ent#4321', ns.bse.iso, 321), + ('ent#4321', ns.bse.t_created, '2022-02-22')] + self.triples_NMU = [('ent#1234', ns.bse.iso, 123), + ('ent#1234', ns.bse.tag, 'tag#1234'), + ('ent#1234', ns.bse.tag, 'tag#5678'), + ('ent#4321', ns.bse.iso, 321), + ('ent#4321', ns.bse.t_created, '2022-02-22')] + + def test_errounous_call(self): + # return set instead of value + self.assertSetEqual(to_dict_view(self.triples_111, one_node=False, one_path=True, unique_paths=self.unique_paths, node=True, path=True, value=True), + {123}) + self.assertSetEqual(to_dict_view(self.triples_111, one_node=True, one_path=False, unique_paths=self.unique_paths, node=True, path=True, value=True), + {123}) + # one_node mismatch: return set of values instead of value + self.assertDictEqual(to_dict_view(self.triples_111, one_node=False, one_path=True, unique_paths=self.unique_paths, node=True, path=False, value=True), + {ns.bse.iso: {123}}) + # one_path mismatch: return set of values instead of value + self.assertDictEqual(to_dict_view(self.triples_111, one_node=True, one_path=False, unique_paths=self.unique_paths, node=False, path=True, value=True), + {'ent#1234': {123}}) + # unique_paths mismatch: return set of values instead of value + self.assertSetEqual(to_dict_view(self.triples_111, one_node=True, one_path=False, unique_paths={}, node=True, path=True, value=True), + {123}) + self.assertDictEqual(to_dict_view(self.triples_111, one_node=True, one_path=False, unique_paths={}, node=True, path=False, value=True), + {ns.bse.iso: {123}}) + self.assertDictEqual(to_dict_view(self.triples_111, one_node=True, one_path=False, unique_paths={}, node=False, path=True, value=True), + {'ent#1234': {123}}) + self.assertDictEqual(to_dict_view(self.triples_111, one_node=True, one_path=False, unique_paths={}, node=False, path=False, value=True), + {'ent#1234': {ns.bse.iso: {123}}}) + + def test_agg_none(self): + self.assertDictEqual(to_dict_view(self.triples_111, one_node=True, one_path=True, unique_paths=self.unique_paths, node=False, path=False, value=False), + {'ent#1234': {ns.bse.iso: {123}}}) + self.assertDictEqual(to_dict_view(self.triples_11U, one_node=True, one_path=True, unique_paths=self.unique_paths, node=False, path=False, value=False), + {'ent#1234': {ns.bse.tag: {'tag#1234', 'tag#5678'}}}) + self.assertDictEqual(to_dict_view(self.triples_1M1, one_node=True, one_path=False, unique_paths=self.unique_paths, node=False, path=False, value=False), + {'ent#1234': {ns.bse.iso: {123}, ns.bse.t_created: {'2010-01-02'}}}) + self.assertDictEqual(to_dict_view(self.triples_1MU, one_node=True, one_path=False, unique_paths=self.unique_paths, node=False, path=False, value=False), + {'ent#1234': {ns.bse.iso: {123}, ns.bse.tag: {'tag#1234', 'tag#5678'}}}) + self.assertDictEqual(to_dict_view(self.triples_N11, one_node=False, one_path=True, unique_paths=self.unique_paths, node=False, path=False, value=False), + {'ent#1234': {ns.bse.iso: {123}}, 'ent#4321': {ns.bse.iso: {321}}}) + self.assertDictEqual(to_dict_view(self.triples_N1U, one_node=False, one_path=True, unique_paths=self.unique_paths, node=False, path=False, value=False), + {'ent#1234': {ns.bse.tag: {'tag#1234', 'tag#5678'}}, 'ent#4321': {ns.bse.tag: {'tag#4321'}}}) + self.assertDictEqual(to_dict_view(self.triples_NM1, one_node=False, one_path=False, unique_paths=self.unique_paths, node=False, path=False, value=False), + {'ent#1234': {ns.bse.iso: {123}, ns.bse.t_created: {'2010-01-02'}}, 'ent#4321': {ns.bse.iso: {321}, ns.bse.t_created: {'2022-02-22'}}}) + self.assertDictEqual(to_dict_view(self.triples_NMU, one_node=False, one_path=False, unique_paths=self.unique_paths, node=False, path=False, value=False), + {'ent#1234': {ns.bse.iso: {123}, ns.bse.tag: {'tag#1234', 'tag#5678'}}, 'ent#4321': {ns.bse.iso: {321}, ns.bse.t_created: {'2022-02-22'}}}) + + def test_agg_node(self): + self.assertDictEqual(to_dict_view(self.triples_111, one_node=True, one_path=True, unique_paths=self.unique_paths, node=True, path=False, value=False), + {ns.bse.iso: {123}}) + self.assertDictEqual(to_dict_view(self.triples_11U, one_node=True, one_path=True, unique_paths=self.unique_paths, node=True, path=False, value=False), + {ns.bse.tag: {'tag#1234', 'tag#5678'}}) + self.assertDictEqual(to_dict_view(self.triples_1M1, one_node=True, one_path=False, unique_paths=self.unique_paths, node=True, path=False, value=False), + {ns.bse.iso: {123}, ns.bse.t_created: {'2010-01-02'}}) + self.assertDictEqual(to_dict_view(self.triples_1MU, one_node=True, one_path=False, unique_paths=self.unique_paths, node=True, path=False, value=False), + {ns.bse.iso: {123}, ns.bse.tag: {'tag#1234', 'tag#5678'}}) + self.assertDictEqual(to_dict_view(self.triples_N11, one_node=False, one_path=True, unique_paths=self.unique_paths, node=True, path=False, value=False), + {ns.bse.iso: {123, 321}}) + self.assertDictEqual(to_dict_view(self.triples_N1U, one_node=False, one_path=True, unique_paths=self.unique_paths, node=True, path=False, value=False), + {ns.bse.tag: {'tag#1234', 'tag#5678', 'tag#4321'}}) + self.assertDictEqual(to_dict_view(self.triples_NM1, one_node=False, one_path=False, unique_paths=self.unique_paths, node=True, path=False, value=False), + {ns.bse.iso: {123, 321}, ns.bse.t_created: {'2010-01-02', '2022-02-22'}}) + self.assertDictEqual(to_dict_view(self.triples_NMU, one_node=False, one_path=False, unique_paths=self.unique_paths, node=True, path=False, value=False), + {ns.bse.iso: {123, 321}, ns.bse.tag: {'tag#1234', 'tag#5678'}, ns.bse.t_created: {'2022-02-22'}}) + + def test_agg_path(self): + self.assertDictEqual(to_dict_view(self.triples_111, one_node=True, one_path=True, unique_paths=self.unique_paths, node=False, path=True, value=False), + {'ent#1234': {123}}) + self.assertDictEqual(to_dict_view(self.triples_11U, one_node=True, one_path=True, unique_paths=self.unique_paths, node=False, path=True, value=False), + {'ent#1234': {'tag#1234', 'tag#5678'}}) + self.assertDictEqual(to_dict_view(self.triples_1M1, one_node=True, one_path=False, unique_paths=self.unique_paths, node=False, path=True, value=False), + {'ent#1234': {123, '2010-01-02'}}) + self.assertDictEqual(to_dict_view(self.triples_1MU, one_node=True, one_path=False, unique_paths=self.unique_paths, node=False, path=True, value=False), + {'ent#1234': {123, 'tag#1234', 'tag#5678'}}) + self.assertDictEqual(to_dict_view(self.triples_N11, one_node=False, one_path=True, unique_paths=self.unique_paths, node=False, path=True, value=False), + {'ent#1234': {123}, 'ent#4321': {321}}) + self.assertDictEqual(to_dict_view(self.triples_N1U, one_node=False, one_path=True, unique_paths=self.unique_paths, node=False, path=True, value=False), + {'ent#1234': {'tag#1234', 'tag#5678'}, 'ent#4321': {'tag#4321'}}) + self.assertDictEqual(to_dict_view(self.triples_NM1, one_node=False, one_path=False, unique_paths=self.unique_paths, node=False, path=True, value=False), + {'ent#1234': {123, '2010-01-02'}, 'ent#4321': {321, '2022-02-22'}}) + self.assertDictEqual(to_dict_view(self.triples_NMU, one_node=False, one_path=False, unique_paths=self.unique_paths, node=False, path=True, value=False), + {'ent#1234': {123, 'tag#1234', 'tag#5678'}, 'ent#4321': {321, '2022-02-22'}}) + + def test_agg_node_path(self): + self.assertSetEqual(to_dict_view(self.triples_111, one_node=True, one_path=True, unique_paths=self.unique_paths, node=True, path=True, value=False), + {123}) + self.assertSetEqual(to_dict_view(self.triples_11U, one_node=True, one_path=True, unique_paths=self.unique_paths, node=True, path=True, value=False), + {'tag#1234', 'tag#5678'}) + self.assertSetEqual(to_dict_view(self.triples_1M1, one_node=True, one_path=False, unique_paths=self.unique_paths, node=True, path=True, value=False), + {123, '2010-01-02'}) + self.assertSetEqual(to_dict_view(self.triples_1MU, one_node=True, one_path=False, unique_paths=self.unique_paths, node=True, path=True, value=False), + {123, 'tag#1234', 'tag#5678'}) + self.assertSetEqual(to_dict_view(self.triples_N11, one_node=False, one_path=True, unique_paths=self.unique_paths, node=True, path=True, value=False), + {123, 321}) + self.assertSetEqual(to_dict_view(self.triples_N1U, one_node=False, one_path=True, unique_paths=self.unique_paths, node=True, path=True, value=False), + {'tag#1234', 'tag#5678', 'tag#4321'}) + self.assertSetEqual(to_dict_view(self.triples_NM1, one_node=False, one_path=False, unique_paths=self.unique_paths, node=True, path=True, value=False), + {123, '2010-01-02', 321, '2022-02-22'}) + self.assertSetEqual(to_dict_view(self.triples_NMU, one_node=False, one_path=False, unique_paths=self.unique_paths, node=True, path=True, value=False), + {123, 'tag#1234', 'tag#5678', 321, '2022-02-22'}) + + def test_agg_value(self): + self.assertDictEqual(to_dict_view(self.triples_111, one_node=True, one_path=True, unique_paths=self.unique_paths, node=False, path=False, value=True), + {'ent#1234': {ns.bse.iso: 123}}) + self.assertDictEqual(to_dict_view(self.triples_11U, one_node=True, one_path=True, unique_paths=self.unique_paths, node=False, path=False, value=True), + {'ent#1234': {ns.bse.tag: {'tag#1234', 'tag#5678'}}}) + self.assertDictEqual(to_dict_view(self.triples_1M1, one_node=True, one_path=False, unique_paths=self.unique_paths, node=False, path=False, value=True), + {'ent#1234': {ns.bse.iso: 123, ns.bse.t_created: '2010-01-02'}}) + self.assertDictEqual(to_dict_view(self.triples_1MU, one_node=True, one_path=False, unique_paths=self.unique_paths, node=False, path=False, value=True), + {'ent#1234': {ns.bse.iso: 123, ns.bse.tag: {'tag#1234', 'tag#5678'}}}) + self.assertDictEqual(to_dict_view(self.triples_N11, one_node=False, one_path=True, unique_paths=self.unique_paths, node=False, path=False, value=True), + {'ent#1234': {ns.bse.iso: 123}, 'ent#4321': {ns.bse.iso: 321}}) + self.assertDictEqual(to_dict_view(self.triples_N1U, one_node=False, one_path=True, unique_paths=self.unique_paths, node=False, path=False, value=True), + {'ent#1234': {ns.bse.tag: {'tag#1234', 'tag#5678'}}, 'ent#4321': {ns.bse.tag: {'tag#4321'}}}) + self.assertDictEqual(to_dict_view(self.triples_NM1, one_node=False, one_path=False, unique_paths=self.unique_paths, node=False, path=False, value=True), + {'ent#1234': {ns.bse.iso: 123, ns.bse.t_created: '2010-01-02'}, 'ent#4321': {ns.bse.iso: 321, ns.bse.t_created: '2022-02-22'}}) + self.assertDictEqual(to_dict_view(self.triples_NMU, one_node=False, one_path=False, unique_paths=self.unique_paths, node=False, path=False, value=True), + {'ent#1234': {ns.bse.iso: 123, ns.bse.tag: {'tag#1234', 'tag#5678'}}, 'ent#4321': {ns.bse.iso: 321, ns.bse.t_created: '2022-02-22'}}) + + def test_agg_node_value(self): + self.assertDictEqual(to_dict_view(self.triples_111, one_node=True, one_path=True, unique_paths=self.unique_paths, node=True, path=False, value=True), + {ns.bse.iso: 123}) + self.assertDictEqual(to_dict_view(self.triples_11U, one_node=True, one_path=True, unique_paths=self.unique_paths, node=True, path=False, value=True), + {ns.bse.tag: {'tag#1234', 'tag#5678'}}) + self.assertDictEqual(to_dict_view(self.triples_1M1, one_node=True, one_path=False, unique_paths=self.unique_paths, node=True, path=False, value=True), + {ns.bse.iso: 123, ns.bse.t_created: '2010-01-02'}) + self.assertDictEqual(to_dict_view(self.triples_1MU, one_node=True, one_path=False, unique_paths=self.unique_paths, node=True, path=False, value=True), + {ns.bse.iso: 123, ns.bse.tag: {'tag#1234', 'tag#5678'}}) + self.assertDictEqual(to_dict_view(self.triples_N11, one_node=False, one_path=True, unique_paths=self.unique_paths, node=True, path=False, value=True), + {ns.bse.iso: {123, 321}}) + self.assertDictEqual(to_dict_view(self.triples_N1U, one_node=False, one_path=True, unique_paths=self.unique_paths, node=True, path=False, value=True), + {ns.bse.tag: {'tag#1234', 'tag#5678', 'tag#4321'}}) + self.assertDictEqual(to_dict_view(self.triples_NM1, one_node=False, one_path=False, unique_paths=self.unique_paths, node=True, path=False, value=True), + {ns.bse.iso: {123, 321}, ns.bse.t_created: {'2010-01-02', '2022-02-22'}}) + self.assertDictEqual(to_dict_view(self.triples_NMU, one_node=False, one_path=False, unique_paths=self.unique_paths, node=True, path=False, value=True), + {ns.bse.iso: {123, 321}, ns.bse.tag: {'tag#1234', 'tag#5678'}, ns.bse.t_created: {'2022-02-22'}}) + + def test_agg_path_value(self): + self.assertDictEqual(to_dict_view(self.triples_111, one_node=True, one_path=True, unique_paths=self.unique_paths, node=False, path=True, value=True), + {'ent#1234': 123}) + self.assertDictEqual(to_dict_view(self.triples_11U, one_node=True, one_path=True, unique_paths=self.unique_paths, node=False, path=True, value=True), + {'ent#1234': {'tag#1234', 'tag#5678'}}) + self.assertDictEqual(to_dict_view(self.triples_1M1, one_node=True, one_path=False, unique_paths=self.unique_paths, node=False, path=True, value=True), + {'ent#1234': {123, '2010-01-02'}}) + self.assertDictEqual(to_dict_view(self.triples_1MU, one_node=True, one_path=False, unique_paths=self.unique_paths, node=False, path=True, value=True), + {'ent#1234': {123, 'tag#1234', 'tag#5678'}}) + self.assertDictEqual(to_dict_view(self.triples_N11, one_node=False, one_path=True, unique_paths=self.unique_paths, node=False, path=True, value=True), + {'ent#1234': 123, 'ent#4321': 321}) + self.assertDictEqual(to_dict_view(self.triples_N1U, one_node=False, one_path=True, unique_paths=self.unique_paths, node=False, path=True, value=True), + {'ent#1234': {'tag#1234', 'tag#5678'}, 'ent#4321': {'tag#4321'}}) + self.assertDictEqual(to_dict_view(self.triples_NM1, one_node=False, one_path=False, unique_paths=self.unique_paths, node=False, path=True, value=True), + {'ent#1234': {123, '2010-01-02'}, 'ent#4321': {321, '2022-02-22'}}) + self.assertDictEqual(to_dict_view(self.triples_NMU, one_node=False, one_path=False, unique_paths=self.unique_paths, node=False, path=True, value=True), + {'ent#1234': {123, 'tag#1234', 'tag#5678'}, 'ent#4321': {321, '2022-02-22'}}) + + def test_agg_all(self): + self.assertEqual(to_dict_view(self.triples_111, one_node=True, one_path=True, unique_paths=self.unique_paths, node=True, path=True, value=True), + 123) + self.assertSetEqual(to_dict_view(self.triples_11U, one_node=True, one_path=True, unique_paths=self.unique_paths, node=True, path=True, value=True), + {'tag#1234', 'tag#5678'}) + self.assertSetEqual(to_dict_view(self.triples_1M1, one_node=True, one_path=False, unique_paths=self.unique_paths, node=True, path=True, value=True), + {123, '2010-01-02'}) + self.assertSetEqual(to_dict_view(self.triples_1MU, one_node=True, one_path=False, unique_paths=self.unique_paths, node=True, path=True, value=True), + {123, 'tag#1234', 'tag#5678'}) + self.assertSetEqual(to_dict_view(self.triples_N11, one_node=False, one_path=True, unique_paths=self.unique_paths, node=True, path=True, value=True), + {123, 321}) + self.assertSetEqual(to_dict_view(self.triples_N1U, one_node=False, one_path=True, unique_paths=self.unique_paths, node=True, path=True, value=True), + {'tag#1234', 'tag#5678', 'tag#4321'}) + self.assertSetEqual(to_dict_view(self.triples_NM1, one_node=False, one_path=False, unique_paths=self.unique_paths, node=True, path=True, value=True), + {123, '2010-01-02', 321, '2022-02-22'}) + self.assertSetEqual(to_dict_view(self.triples_NMU, one_node=False, one_path=False, unique_paths=self.unique_paths, node=True, path=True, value=True), + {123, 'tag#1234', 'tag#5678', 321, '2022-02-22'}) + + +## main ## + +if __name__ == '__main__': + unittest.main() + +## EOF ## -- cgit v1.2.3 From 9310610a7edf4dcbb934aedcecff1d11348197bb Mon Sep 17 00:00:00 2001 From: Matthias Baumgartner Date: Sat, 21 Jan 2023 22:32:33 +0100 Subject: nodes predicate walk sugar --- bsfs/graph/nodes.py | 15 +++- bsfs/graph/walk.py | 120 ++++++++++++++++++++++++++++++++ bsfs/schema/schema.py | 4 ++ test/graph/test_nodes.py | 14 +++- test/graph/test_walk.py | 173 +++++++++++++++++++++++++++++++++++++++++++++++ 5 files changed, 324 insertions(+), 2 deletions(-) create mode 100644 bsfs/graph/walk.py create mode 100644 test/graph/test_walk.py diff --git a/bsfs/graph/nodes.py b/bsfs/graph/nodes.py index a4ba45f..18ab30d 100644 --- a/bsfs/graph/nodes.py +++ b/bsfs/graph/nodes.py @@ -19,6 +19,7 @@ from bsfs.utils import errors, URI, typename # inner-module imports from . import ac from . import result +from . import walk # exports __all__: typing.Sequence[str] = ( @@ -87,6 +88,18 @@ class Nodes(): """Return all node guids.""" return iter(self._guids) + @property + def schema(self) -> bsc.Schema: + """Return the store's local schema.""" + return self._backend.schema + + def __getattr__(self, name: str): + try: + return super().__getattr__(name) # type: ignore [misc] # parent has no getattr + except AttributeError: + pass + return walk.Walk(self, walk.Walk.step(self.schema, self.node_type, name)) + def set( self, pred: URI, # FIXME: URI or bsc.Predicate? @@ -141,7 +154,7 @@ class Nodes(): if view not in (dict, list): raise ValueError(f'expected dict or list, found {view}') # process paths: create fetch ast, build name mapping, and find unique paths - schema = self._backend.schema + schema = self.schema statements = set() name2path = {} unique_paths = set() # paths that result in a single (unique) value diff --git a/bsfs/graph/walk.py b/bsfs/graph/walk.py new file mode 100644 index 0000000..63ef5e9 --- /dev/null +++ b/bsfs/graph/walk.py @@ -0,0 +1,120 @@ +""" + +Part of the BlackStar filesystem (bsfs) module. +A copy of the license is provided with the project. +Author: Matthias Baumgartner, 2022 +""" +# imports +from collections import abc +import typing + +# bsfs imports +from bsfs import schema as bsc + +# inner-module imports +# NOTE: circular import! OK as long as only used for type annotations. +from . import nodes # pylint: disable=cyclic-import + +# exports +__all__: typing.Sequence[str] = ( + 'Walk', + ) + + +## code ## + +class Walk(abc.Hashable, abc.Callable): # type: ignore [misc] # invalid base class (Callable) + """Syntactic sugar for `Nodes` to build and act on predicate paths via members.""" + + # Link to Nodes instance. + _root: 'nodes.Nodes' + + # Current predicate path. + _path: typing.Tuple[bsc.Predicate, ...] + + def __init__( + self, + root: 'nodes.Nodes', + path: typing.Sequence[bsc.Predicate], + ): + self._root = root + self._path = tuple(path) + + @property + def tail(self): + """Return the node type at the end of the path.""" + return self._path[-1].range + + + ## comparison + + def __hash__(self) -> int: + """Return an integer hash that identifies the instance.""" + return hash((type(self), self._root, self._path)) + + def __eq__(self, other) -> bool: + """Compare against *other* backend.""" + return isinstance(other, type(self)) \ + and self._root == other._root \ + and self._path == other._path + + + ## representation + + def __repr__(self) -> str: + """Return a formal string representation.""" + path = ', '.join(pred.uri for pred in self._path) + return f'Walk({self._root.node_type.uri}, ({path}))' + + def __str__(self) -> str: + """Return an informal string representation.""" + path = ', '.join(pred.uri for pred in self._path) + return f'Walk(@{self._root.node_type.uri}: {path})' + + + ## walk + + @staticmethod + def step( + schema: bsc.Schema, + node: bsc.Node, + name: str, + ) -> typing.Tuple[bsc.Predicate]: + """Get an predicate at *node* whose fragment matches *name*.""" + predicates = tuple( + pred + for pred + in schema.predicates_at(node) + if pred.uri.get('fragment', None) == name + ) + if len(predicates) == 0: # no fragment found for name + raise ValueError(f'no available predicate matches {name}') + if len(predicates) > 1: # ambiguous name + raise ValueError(f'{name} matches multiple predicates') + # append predicate to walk + return predicates # type: ignore [return-value] # size is one + + def __getattr__(self, name: str) -> 'Walk': + """Alias for `Walk.step(name)`.""" + try: + return super().__getattr__(name) + except AttributeError: + pass + # get predicate + pred = self.step(self._root.schema, self.tail, name) + # append predicate to walk + return Walk(self._root, self._path + pred) + + + ## get paths ## + + def get(self, **kwargs) -> typing.Any: + """Alias for `Nodes.get(..)`.""" + return self._root.get(tuple(pred.uri for pred in self._path), **kwargs) + + def __call__(self, **kwargs) -> typing.Any: # pylint: disable=arguments-differ + """Alias for `Walk.get(...)`.""" + return self.get(**kwargs) + + +## EOF ## diff --git a/bsfs/schema/schema.py b/bsfs/schema/schema.py index 8d9a821..1644926 100644 --- a/bsfs/schema/schema.py +++ b/bsfs/schema/schema.py @@ -312,4 +312,8 @@ class Schema(): """Return the Literal matching the *uri*.""" return self._literals[uri] + def predicates_at(self, node: types.Node) -> typing.Iterator[types.Predicate]: + """Return predicates that have domain *node* (or superclass thereof).""" + return iter(pred for pred in self._predicates.values() if node <= pred.domain) + ## EOF ## diff --git a/test/graph/test_nodes.py b/test/graph/test_nodes.py index a4e07ee..670df69 100644 --- a/test/graph/test_nodes.py +++ b/test/graph/test_nodes.py @@ -10,6 +10,7 @@ import unittest # bsie imports from bsfs import schema as bsc +from bsfs.graph.walk import Walk from bsfs.namespace import Namespace, ns from bsfs.triple_store.sparql import SparqlStore from bsfs.utils import errors, URI @@ -108,7 +109,8 @@ class TestNodes(unittest.TestCase): self.p_filesize = self.backend.schema.predicate(ns.bse.filesize) self.p_author = self.backend.schema.predicate(ns.bse.author) self.p_tag = self.backend.schema.predicate(ns.bse.tag) - self.p_representative = self.backend.schema.predicate(URI('http://bsfs.ai/schema/Tag#representative')) + self.p_representative = self.backend.schema.predicate(bst.representative) + self.p_label = self.backend.schema.predicate(bst.label) self.t_created = self.backend.schema.predicate(ns.bsm.t_created) self.ent_ids = { URI('http://example.com/me/entity#1234'), @@ -458,6 +460,16 @@ class TestNodes(unittest.TestCase): Nodes(self.backend, self.user, self.ent_type, {'http://example.com/me/entity#1234'}): {'hello world'}, }) + def test_getattr(self): + nodes = Nodes(self.backend, self.user, self.ent_type, {'http://example.com/me/entity#1234'}) + # can get walks to values + self.assertEqual(nodes.filesize, Walk(nodes, (self.p_filesize, ))) + # can get walks to nodes + self.assertEqual(nodes.tag, Walk(nodes, (self.p_tag, ))) + # can do multiple hops + self.assertEqual(nodes.tag.label, Walk(nodes, (self.p_tag, self.p_label))) + # invalid step raises an error + self.assertRaises(ValueError, getattr, nodes, 'foobar') ## main ## diff --git a/test/graph/test_walk.py b/test/graph/test_walk.py new file mode 100644 index 0000000..057ac85 --- /dev/null +++ b/test/graph/test_walk.py @@ -0,0 +1,173 @@ +""" + +Part of the bsfs test suite. +A copy of the license is provided with the project. +Author: Matthias Baumgartner, 2022 +""" +# imports +import unittest + +# bsfs imports +from bsfs import schema as bsc +from bsfs.graph import Graph +from bsfs.namespace import Namespace, ns +from bsfs.triple_store.sparql import SparqlStore +from bsfs.utils import URI + +# symbol to test +from bsfs.graph.walk import Walk + +## code ## + +bse = ns.bse +bst = Namespace('http://bsfs.ai/schema/Tag') + +class TestWalk(unittest.TestCase): + def setUp(self): + # backend setup + self.schema = bsc.from_string(''' + prefix rdfs: + prefix xsd: + prefix bsfs: + prefix bse: + prefix bst: + + bsfs:Entity rdfs:subClassOf bsfs:Node . + bsfs:Tag rdfs:subClassOf bsfs:Node . + bsfs:User rdfs:subClassOf bsfs:Node . + xsd:string rdfs:subClassOf bsfs:Literal . + + bse:author rdfs:subClassOf bsfs:Predicate ; + rdfs:domain bsfs:Node ; + rdfs:range bsfs:User . + + bse:tag rdfs:subClassOf bsfs:Predicate ; + rdfs:domain bsfs:Entity ; + rdfs:range bsfs:Tag . + + bst:label rdfs:subClassOf bsfs:Predicate ; + rdfs:domain bsfs:Tag ; + rdfs:range xsd:string ; + bsfs:unique "true"^^xsd:boolean . + + bst:subTagOf rdfs:subClassOf bsfs:Predicate ; + rdfs:domain bsfs:Tag ; + rdfs:range bsfs:Tag . + + bst:main rdfs:subClassOf bsfs:Predicate ; + rdfs:domain bsfs:Tag ; + rdfs:range bsfs:Entity . + + bst:author rdfs:subClassOf bsfs:Predicate ; + rdfs:domain bsfs:Tag ; + rdfs:range xsd:string . + + ''') + self.backend = SparqlStore.Open() + self.user = URI('http://example.com/me') + self.graph = Graph(self.backend, self.user) + self.graph.migrate(self.schema) + + # nodes setup + self.ents = self.graph.nodes(ns.bsfs.Entity, { + URI('http://example.com/me/entity#1234'), + URI('http://example.com/me/entity#4321')}) + self.tags = self.graph.nodes(ns.bsfs.Tag, { + URI('http://example.com/me/tag#1234'), + URI('http://example.com/me/tag#4321')}) + # add some instances + self.ents.set(bse.tag, self.tags) + self.graph.node(ns.bsfs.Tag, URI('http://example.com/me/tag#1234')).set(bst.label, 'hello') + self.graph.node(ns.bsfs.Tag, URI('http://example.com/me/tag#4321')).set(bst.label, 'world') + + def test_essentials(self): # __eq__, __hash__, __str__, __repr__ + p_author = self.schema.predicate(bse.author) + p_tag = self.schema.predicate(bse.tag) + p_main = self.schema.predicate(bst.main) + # comparison + self.assertEqual(Walk(self.ents, [p_tag]), Walk(self.ents, [p_tag])) + self.assertEqual(hash(Walk(self.ents, [p_tag])), hash(Walk(self.ents, [p_tag]))) + # comparison respects type + class Foo(Walk): pass + self.assertNotEqual(Walk(self.ents, [p_tag]), Foo(self.ents, [p_tag])) + self.assertNotEqual(hash(Walk(self.ents, [p_tag])), hash(Foo(self.ents, [p_tag]))) + # comparison respects root + self.assertNotEqual(Walk(self.ents, [p_author]), Walk(self.tags, [p_author])) + self.assertNotEqual(hash(Walk(self.ents, [p_author])), hash(Walk(self.tags, [p_author]))) + # comparison respects path + self.assertNotEqual(Walk(self.tags, [p_author]), Walk(self.tags, [p_main])) + self.assertNotEqual(hash(Walk(self.tags, [p_author])), hash(Walk(self.tags, [p_main]))) + # string conversion + self.assertEqual(str(Walk(self.ents, [p_tag, p_main])), + 'Walk(@http://bsfs.ai/schema/Entity: http://bsfs.ai/schema/Entity#tag, http://bsfs.ai/schema/Tag#main)') + self.assertEqual(repr(Walk(self.ents, [p_tag, p_main])), + 'Walk(http://bsfs.ai/schema/Entity, (http://bsfs.ai/schema/Entity#tag, http://bsfs.ai/schema/Tag#main))') + + def test_tail(self): + self.assertEqual(Walk(self.ents, ( + self.schema.predicate(bse.tag), + )).tail, + self.schema.node(ns.bsfs.Tag)) + self.assertEqual(Walk(self.ents, ( + self.schema.predicate(bse.tag), + self.schema.predicate(bst.main), + )).tail, + self.schema.node(ns.bsfs.Entity)) + + def test_step(self): + tag_type = self.schema.node(ns.bsfs.Tag) + # step returns a predicate + self.assertEqual(Walk.step(self.schema, tag_type, 'subTagOf'), + (self.schema.predicate(bst.subTagOf), )) + # invalid step raises an error + self.assertRaises(ValueError, Walk.step, self.schema, tag_type, 'foobar') + # ambiguous step raises an error + self.assertRaises(ValueError, Walk.step, self.schema, tag_type, 'author') + + def test_getattr(self): # __getattr__ + walk = Walk(self.ents, (self.schema.predicate(bse.tag), )) + # first step + self.assertEqual(walk.subTagOf, Walk(self.ents, ( + self.schema.predicate(bse.tag), + self.schema.predicate(bst.subTagOf), + ))) + # second step + self.assertEqual(walk.subTagOf.main, Walk(self.ents, ( + self.schema.predicate(bse.tag), + self.schema.predicate(bst.subTagOf), + self.schema.predicate(bst.main), + ))) + # invalid step raises an error + self.assertRaises(ValueError, getattr, walk, 'foobar') + # ambiguous step raises an error + self.assertRaises(ValueError, getattr, walk, 'author') + + def test_get(self): # get, __call__ + walk = Walk(self.ents, (self.schema.predicate(bse.tag), )) + tags = { + self.graph.node(ns.bsfs.Tag, URI('http://example.com/me/tag#1234')), + self.graph.node(ns.bsfs.Tag, URI('http://example.com/me/tag#4321'))} + # get returns from Nodes.get + self.assertDictEqual(walk.get(), { + self.graph.node(ns.bsfs.Entity, URI('http://example.com/me/entity#1234')): tags, + self.graph.node(ns.bsfs.Entity, URI('http://example.com/me/entity#4321')): tags, + }) + self.assertDictEqual(walk(), { + self.graph.node(ns.bsfs.Entity, URI('http://example.com/me/entity#1234')): tags, + self.graph.node(ns.bsfs.Entity, URI('http://example.com/me/entity#4321')): tags, + }) + # get passes kwargs to Nodes.get + self.assertSetEqual(tags, walk.get(node=True)) + self.assertSetEqual(tags, walk(node=True)) + self.assertSetEqual(tags, set(walk.get(view=list, node=True))) + self.assertSetEqual(tags, set(walk(view=list, node=True))) + # get returns values if need be + self.assertSetEqual(walk.label(node=True), {'hello', 'world'}) + + +## main ## + +if __name__ == '__main__': + unittest.main() + +## EOF ## -- cgit v1.2.3 From 04bb201c6162e81dbdefcb1cff9595180fa66917 Mon Sep 17 00:00:00 2001 From: Matthias Baumgartner Date: Sat, 21 Jan 2023 22:34:49 +0100 Subject: minor notes --- bsfs/graph/result.py | 5 +++++ test/graph/test_nodes.py | 7 ++++++- 2 files changed, 11 insertions(+), 1 deletion(-) diff --git a/bsfs/graph/result.py b/bsfs/graph/result.py index 3009801..688929b 100644 --- a/bsfs/graph/result.py +++ b/bsfs/graph/result.py @@ -20,6 +20,11 @@ __all__: typing.Sequence[str] = ( ## code ## +# FIXME: node, path, value seem counter-intuitive: +# node.get(..., node=True) removes the node part. +# wouldn't it make more sense if node=True keeps the node part +# and node=False drops it? + def to_list_view( triples, # aggregators diff --git a/test/graph/test_nodes.py b/test/graph/test_nodes.py index 670df69..4eae250 100644 --- a/test/graph/test_nodes.py +++ b/test/graph/test_nodes.py @@ -381,7 +381,7 @@ class TestNodes(unittest.TestCase): (self.p_author.uri, Nodes(self.backend, self.user, self.user_type, {URI('http://example.com/me/user#1234'), URI('http://example.com/me/user#4321')})))) self.assertSetEqual(curr, set(self.backend._graph)) - def test_fetch(self): + def test_get(self): # setup: add some instances Nodes(self.backend, self.user, self.ent_type, {'http://example.com/me/entity#1234'}) \ .set(ns.bse.comment, 'hello world') \ @@ -460,6 +460,11 @@ class TestNodes(unittest.TestCase): Nodes(self.backend, self.user, self.ent_type, {'http://example.com/me/entity#1234'}): {'hello world'}, }) + # FIXME: What if I call `get` with a single predicate and a single node, but + # that node has no value for that predicate? + # so, essentially, what if triples is empty? -> Also check in test_result! + raise NotImplementedError() + def test_getattr(self): nodes = Nodes(self.backend, self.user, self.ent_type, {'http://example.com/me/entity#1234'}) # can get walks to values -- cgit v1.2.3 From 1392951dfc82af05e7a5999baa1c0a4fc72083b8 Mon Sep 17 00:00:00 2001 From: Matthias Baumgartner Date: Sat, 21 Jan 2023 23:03:50 +0100 Subject: Nodes magic methods for convenience --- bsfs/graph/nodes.py | 51 ++++++++++++++++++++++ test/graph/test_nodes.py | 109 ++++++++++++++++++++++++++++++++++++++++++++++- 2 files changed, 158 insertions(+), 2 deletions(-) diff --git a/bsfs/graph/nodes.py b/bsfs/graph/nodes.py index 18ab30d..85e5fdb 100644 --- a/bsfs/graph/nodes.py +++ b/bsfs/graph/nodes.py @@ -93,6 +93,57 @@ class Nodes(): """Return the store's local schema.""" return self._backend.schema + def __add__(self, other: typing.Any) -> 'Nodes': + """Concatenate guids. Backend, user, and node type must match.""" + if not isinstance(other, type(self)): + return NotImplemented + if self._backend != other._backend: + raise ValueError(other) + if self._user != other._user: + raise ValueError(other) + if self.node_type != other.node_type: + raise ValueError(other) + return Nodes(self._backend, self._user, self.node_type, self._guids | other._guids) + + def __or__(self, other: typing.Any) -> 'Nodes': + """Concatenate guids. Backend, user, and node type must match.""" + return self.__add__(other) + + def __sub__(self, other: typing.Any) -> 'Nodes': + """Subtract guids. Backend, user, and node type must match.""" + if not isinstance(other, type(self)): + return NotImplemented + if self._backend != other._backend: + raise ValueError(other) + if self._user != other._user: + raise ValueError(other) + if self.node_type != other.node_type: + raise ValueError(other) + return Nodes(self._backend, self._user, self.node_type, self._guids - other._guids) + + def __and__(self, other: typing.Any) -> 'Nodes': + """Intersect guids. Backend, user, and node type must match.""" + if not isinstance(other, type(self)): + return NotImplemented + if self._backend != other._backend: + raise ValueError(other) + if self._user != other._user: + raise ValueError(other) + if self.node_type != other.node_type: + raise ValueError(other) + return Nodes(self._backend, self._user, self.node_type, self._guids & other._guids) + + def __len__(self) -> int: + """Return the number of guids.""" + return len(self._guids) + + def __iter__(self) -> typing.Iterator['Nodes']: + """Iterate over individual guids. Returns `Nodes` instances.""" + return iter( + Nodes(self._backend, self._user, self.node_type, {guid}) + for guid in self._guids + ) + def __getattr__(self, name: str): try: return super().__getattr__(name) # type: ignore [misc] # parent has no getattr diff --git a/test/graph/test_nodes.py b/test/graph/test_nodes.py index 4eae250..c07fa53 100644 --- a/test/graph/test_nodes.py +++ b/test/graph/test_nodes.py @@ -4,10 +4,14 @@ Part of the bsfs test suite. A copy of the license is provided with the project. Author: Matthias Baumgartner, 2022 """ -# imports -import rdflib +# standard imports +from functools import partial +import operator import unittest +# external imports +import rdflib + # bsie imports from bsfs import schema as bsc from bsfs.graph.walk import Walk @@ -476,6 +480,107 @@ class TestNodes(unittest.TestCase): # invalid step raises an error self.assertRaises(ValueError, getattr, nodes, 'foobar') + def test_schema(self): + self.assertEqual(Nodes(self.backend, self.user, self.ent_type, + {URI('http://example.com/me/entity#1234')}).schema, self.backend.schema) + + def test_operators(self): # __add__, __or__, __sub__, __and__ + gen = partial(Nodes, self.backend, self.user, self.ent_type) + nodes = gen({URI('http://example.com/me/entity#1234')}) + # add/or concatenates guids + self.assertEqual( + gen({URI('http://example.com/me/entity#1234')}) + + gen({URI('http://example.com/me/entity#4321')}), + # target + gen({ + URI('http://example.com/me/entity#1234'), + URI('http://example.com/me/entity#4321')})) + self.assertEqual( + gen({URI('http://example.com/me/entity#1234')}) | + gen({URI('http://example.com/me/entity#4321')}), + # target + gen({ + URI('http://example.com/me/entity#1234'), + URI('http://example.com/me/entity#4321')})) + # repeated guids are ignored + self.assertEqual( + gen({URI('http://example.com/me/entity#1234')}) + + gen({URI('http://example.com/me/entity#1234')}), + # target + gen({URI('http://example.com/me/entity#1234')})) + self.assertEqual( + gen({URI('http://example.com/me/entity#1234')}) | + gen({URI('http://example.com/me/entity#1234')}), + # target + gen({URI('http://example.com/me/entity#1234')})) + + # sub substracts guids + self.assertEqual( + gen({URI('http://example.com/me/entity#1234'), + URI('http://example.com/me/entity#4321')}) - + gen({URI('http://example.com/me/entity#4321')}), + # target + gen({URI('http://example.com/me/entity#1234')})) + # missing guids are ignored + self.assertEqual( + gen({URI('http://example.com/me/entity#1234')}) - + gen({URI('http://example.com/me/entity#4321')}), + # target + gen({URI('http://example.com/me/entity#1234')})) + + # and intersects guids + self.assertEqual( + gen({URI('http://example.com/me/entity#1234'), + URI('http://example.com/me/entity#4321')}) & + gen({URI('http://example.com/me/entity#4321'), + URI('http://example.com/me/entity#5678')}), + # target + gen({URI('http://example.com/me/entity#4321')})) + + for op in (operator.add, operator.or_, operator.sub, operator.and_): + # type must match + self.assertRaises(TypeError, op, nodes, 1234) + self.assertRaises(TypeError, op, nodes, 'hello world') + # backend must match + self.assertRaises(ValueError, op, nodes, + Nodes(None, self.user, self.ent_type, {URI('http://example.com/me/entity#1234')})) + # user must match + self.assertRaises(ValueError, op, nodes, + Nodes(self.backend, '', self.ent_type, {URI('http://example.com/me/entity#1234')})) + # node type must match + self.assertRaises(ValueError, op, nodes, + Nodes(self.backend, self.user, self.tag_type, {URI('http://example.com/me/entity#1234')})) + + def test_len(self): + self.assertEqual(1, len(Nodes(self.backend, self.user, self.ent_type, { + URI('http://example.com/me/entity#1234'), + }))) + self.assertEqual(2, len(Nodes(self.backend, self.user, self.ent_type, { + URI('http://example.com/me/entity#1234'), + URI('http://example.com/me/entity#4321'), + }))) + self.assertEqual(4, len(Nodes(self.backend, self.user, self.ent_type, { + URI('http://example.com/me/entity#1234'), + URI('http://example.com/me/entity#4321'), + URI('http://example.com/me/entity#5678'), + URI('http://example.com/me/entity#8765'), + }))) + + def test_iter(self): # __iter__ + gen = partial(Nodes, self.backend, self.user, self.ent_type) + self.assertSetEqual(set(Nodes(self.backend, self.user, self.ent_type, { + URI('http://example.com/me/entity#1234'), + URI('http://example.com/me/entity#4321'), + URI('http://example.com/me/entity#5678'), + URI('http://example.com/me/entity#8765'), + })), { + gen({URI('http://example.com/me/entity#1234')}), + gen({URI('http://example.com/me/entity#4321')}), + gen({URI('http://example.com/me/entity#5678')}), + gen({URI('http://example.com/me/entity#8765')}), + }) + + ## main ## if __name__ == '__main__': -- cgit v1.2.3 From 72e0bd78dc9cc1d74c3061b028040b64c0efcf9f Mon Sep 17 00:00:00 2001 From: Matthias Baumgartner Date: Mon, 30 Jan 2023 09:52:18 +0100 Subject: flip graph fethc result flags --- bsfs/graph/nodes.py | 6 +- bsfs/graph/result.py | 29 +++-- test/graph/test_nodes.py | 4 +- test/graph/test_result.py | 324 ++++++++++++++++++++++++++-------------------- test/graph/test_walk.py | 10 +- 5 files changed, 209 insertions(+), 164 deletions(-) diff --git a/bsfs/graph/nodes.py b/bsfs/graph/nodes.py index 85e5fdb..9990714 100644 --- a/bsfs/graph/nodes.py +++ b/bsfs/graph/nodes.py @@ -277,9 +277,9 @@ class Nodes(): yield node, path, value # simplify by default - view_kwargs['node'] = view_kwargs.get('node', len(self._guids) == 1) - view_kwargs['path'] = view_kwargs.get('path', len(paths) == 1) - view_kwargs['value'] = view_kwargs.get('value', True) + view_kwargs['node'] = view_kwargs.get('node', len(self._guids) != 1) + view_kwargs['path'] = view_kwargs.get('path', len(paths) != 1) + view_kwargs['value'] = view_kwargs.get('value', False) # return results view if view == list: diff --git a/bsfs/graph/result.py b/bsfs/graph/result.py index 688929b..00607f4 100644 --- a/bsfs/graph/result.py +++ b/bsfs/graph/result.py @@ -38,11 +38,11 @@ def to_list_view( the respective component is omitted. """ - if node and path: + if not node and not path: return iter(val for _, _, val in triples) - if node: + if not node: return iter((pred, val) for _, pred, val in triples) - if path: + if not path: return iter((subj, val) for subj, _, val in triples) return iter((subj, pred, val) for subj, pred, val in triples) @@ -57,6 +57,7 @@ def to_dict_view( node: bool, path: bool, value: bool, + default: typing.Optional[typing.Any] = None, ) -> typing.Any: """Return a dict of results. @@ -74,7 +75,7 @@ def to_dict_view( # FIXME: type of data can be overwritten later on (if value) - if node and path: + if not node and not path: data = set() elif node ^ path: data = defaultdict(set) @@ -83,24 +84,24 @@ def to_dict_view( for subj, pred, val in triples: unique = pred in unique_paths - if node and path: - if value and unique and one_node and one_path: + if not node and not path: + if not value and unique and one_node and one_path: return val data.add(val) - elif node: + elif not node: # remove node from result, group by predicate - if value and unique and one_node: + if not value and unique and one_node: data[pred] = val else: data[pred].add(val) - elif path: + elif not path: # remove predicate from result, group by node - if value and unique and one_path: + if not value and unique and one_path: data[subj] = val else: data[subj].add(val) else: - if value and unique: + if not value and unique: data[subj][pred] = val else: data[subj][pred].add(val) @@ -108,7 +109,11 @@ def to_dict_view( # FIXME: Combine multiple Nodes instances into one? # convert defaultdict to ordinary dict - if node and path: + if not node and not path and not value \ + and len(unique_paths) > 0 and one_node and one_path \ + and len(data) == 0: + return default + if not node and not path: return data if node ^ path: return dict(data) diff --git a/test/graph/test_nodes.py b/test/graph/test_nodes.py index c07fa53..dabe794 100644 --- a/test/graph/test_nodes.py +++ b/test/graph/test_nodes.py @@ -417,11 +417,11 @@ class TestNodes(unittest.TestCase): Nodes(self.backend, self.user, self.ent_type, {'http://example.com/me/entity#4321'}): {'tag_label_4321'}, }) # get returns the same path that was passed - self.assertCountEqual(list(nodes.get((ns.bse.tag, bst.label), path=False, view=list)), [ + self.assertCountEqual(list(nodes.get((ns.bse.tag, bst.label), path=True, view=list)), [ (Nodes(self.backend, self.user, self.ent_type, {'http://example.com/me/entity#1234'}), (ns.bse.tag, bst.label), 'tag_label_1234'), (Nodes(self.backend, self.user, self.ent_type, {'http://example.com/me/entity#4321'}), (ns.bse.tag, bst.label), 'tag_label_4321'), ]) - self.assertCountEqual(list(nodes.get([ns.bse.tag, bst.label], path=False, view=list)), [ + self.assertCountEqual(list(nodes.get([ns.bse.tag, bst.label], path=True, view=list)), [ (Nodes(self.backend, self.user, self.ent_type, {'http://example.com/me/entity#1234'}), [ns.bse.tag, bst.label], 'tag_label_1234'), (Nodes(self.backend, self.user, self.ent_type, {'http://example.com/me/entity#4321'}), [ns.bse.tag, bst.label], 'tag_label_4321'), ]) diff --git a/test/graph/test_result.py b/test/graph/test_result.py index 89b0da1..749b8ad 100644 --- a/test/graph/test_result.py +++ b/test/graph/test_result.py @@ -45,149 +45,149 @@ class TestListView(unittest.TestCase): def test_copy(self): # iterator yields tuples - self.assertIsInstance(list(to_list_view([('subject', 'predicate', 'object')], node=False, path=False, value=False))[0], tuple) + self.assertIsInstance(list(to_list_view([('subject', 'predicate', 'object')], node=True, path=True, value=True))[0], tuple) # components are not changed class Foo(): pass foo = Foo() - self.assertListEqual(list(to_list_view([('subject', 'predicate', 'object')], node=False, path=False, value=False)), + self.assertListEqual(list(to_list_view([('subject', 'predicate', 'object')], node=True, path=True, value=True)), [('subject', 'predicate', 'object')]) - self.assertListEqual(list(to_list_view([(foo, 'predicate', 'object')], node=False, path=False, value=False)), + self.assertListEqual(list(to_list_view([(foo, 'predicate', 'object')], node=True, path=True, value=True)), [(foo, 'predicate', 'object')]) - self.assertListEqual(list(to_list_view([('subject', foo, 'object')], node=False, path=False, value=False)), + self.assertListEqual(list(to_list_view([('subject', foo, 'object')], node=True, path=True, value=True)), [('subject', foo, 'object')]) - self.assertListEqual(list(to_list_view([('subject', 'predicate', foo)], node=False, path=False, value=False)), + self.assertListEqual(list(to_list_view([('subject', 'predicate', foo)], node=True, path=True, value=True)), [('subject', 'predicate', foo)]) def test_agg_none(self): - self.assertListEqual(list(to_list_view(self.triples_111, node=False, path=False, value=False)), self.triples_111) - self.assertListEqual(list(to_list_view(self.triples_11U, node=False, path=False, value=False)), self.triples_11U) - self.assertListEqual(list(to_list_view(self.triples_1M1, node=False, path=False, value=False)), self.triples_1M1) - self.assertListEqual(list(to_list_view(self.triples_1MU, node=False, path=False, value=False)), self.triples_1MU) - self.assertListEqual(list(to_list_view(self.triples_N11, node=False, path=False, value=False)), self.triples_N11) - self.assertListEqual(list(to_list_view(self.triples_N1U, node=False, path=False, value=False)), self.triples_N1U) - self.assertListEqual(list(to_list_view(self.triples_NM1, node=False, path=False, value=False)), self.triples_NM1) - self.assertListEqual(list(to_list_view(self.triples_NMU, node=False, path=False, value=False)), self.triples_NMU) + self.assertListEqual(list(to_list_view(self.triples_111, node=True, path=True, value=True)), self.triples_111) + self.assertListEqual(list(to_list_view(self.triples_11U, node=True, path=True, value=True)), self.triples_11U) + self.assertListEqual(list(to_list_view(self.triples_1M1, node=True, path=True, value=True)), self.triples_1M1) + self.assertListEqual(list(to_list_view(self.triples_1MU, node=True, path=True, value=True)), self.triples_1MU) + self.assertListEqual(list(to_list_view(self.triples_N11, node=True, path=True, value=True)), self.triples_N11) + self.assertListEqual(list(to_list_view(self.triples_N1U, node=True, path=True, value=True)), self.triples_N1U) + self.assertListEqual(list(to_list_view(self.triples_NM1, node=True, path=True, value=True)), self.triples_NM1) + self.assertListEqual(list(to_list_view(self.triples_NMU, node=True, path=True, value=True)), self.triples_NMU) def test_agg_node(self): - self.assertListEqual(list(to_list_view(self.triples_111, node=True, path=False, value=False)), + self.assertListEqual(list(to_list_view(self.triples_111, node=False, path=True, value=True)), [(ns.bse.iso, 123)]) - self.assertListEqual(list(to_list_view(self.triples_11U, node=True, path=False, value=False)), + self.assertListEqual(list(to_list_view(self.triples_11U, node=False, path=True, value=True)), [(ns.bse.tag, 'tag#1234'), (ns.bse.tag, 'tag#5678')]) - self.assertListEqual(list(to_list_view(self.triples_1M1, node=True, path=False, value=False)), + self.assertListEqual(list(to_list_view(self.triples_1M1, node=False, path=True, value=True)), [(ns.bse.iso, 123), (ns.bse.t_created, '2010-01-02')]) - self.assertListEqual(list(to_list_view(self.triples_1MU, node=True, path=False, value=False)), + self.assertListEqual(list(to_list_view(self.triples_1MU, node=False, path=True, value=True)), [(ns.bse.iso, 123), (ns.bse.tag, 'tag#1234'), (ns.bse.tag, 'tag#5678')]) - self.assertListEqual(list(to_list_view(self.triples_N11, node=True, path=False, value=False)), + self.assertListEqual(list(to_list_view(self.triples_N11, node=False, path=True, value=True)), [(ns.bse.iso, 123), (ns.bse.iso, 321)]) - self.assertListEqual(list(to_list_view(self.triples_N1U, node=True, path=False, value=False)), + self.assertListEqual(list(to_list_view(self.triples_N1U, node=False, path=True, value=True)), [(ns.bse.tag, 'tag#1234'), (ns.bse.tag, 'tag#5678'), (ns.bse.tag, 'tag#4321')]) - self.assertListEqual(list(to_list_view(self.triples_NM1, node=True, path=False, value=False)), + self.assertListEqual(list(to_list_view(self.triples_NM1, node=False, path=True, value=True)), [(ns.bse.iso, 123), (ns.bse.t_created, '2010-01-02'), (ns.bse.iso, 321), (ns.bse.t_created, '2022-02-22')]) - self.assertListEqual(list(to_list_view(self.triples_NMU, node=True, path=False, value=False)), + self.assertListEqual(list(to_list_view(self.triples_NMU, node=False, path=True, value=True)), [(ns.bse.iso, 123), (ns.bse.tag, 'tag#1234'), (ns.bse.tag, 'tag#5678'), (ns.bse.iso, 321), (ns.bse.t_created, '2022-02-22')]) def test_agg_path(self): - self.assertListEqual(list(to_list_view(self.triples_111, node=False, path=True, value=False)), + self.assertListEqual(list(to_list_view(self.triples_111, node=True, path=False, value=True)), [('ent#1234', 123)]) - self.assertListEqual(list(to_list_view(self.triples_11U, node=False, path=True, value=False)), + self.assertListEqual(list(to_list_view(self.triples_11U, node=True, path=False, value=True)), [('ent#1234', 'tag#1234'), ('ent#1234', 'tag#5678')]) - self.assertListEqual(list(to_list_view(self.triples_1M1, node=False, path=True, value=False)), + self.assertListEqual(list(to_list_view(self.triples_1M1, node=True, path=False, value=True)), [('ent#1234', 123), ('ent#1234', '2010-01-02')]) - self.assertListEqual(list(to_list_view(self.triples_1MU, node=False, path=True, value=False)), + self.assertListEqual(list(to_list_view(self.triples_1MU, node=True, path=False, value=True)), [('ent#1234', 123), ('ent#1234', 'tag#1234'), ('ent#1234', 'tag#5678')]) - self.assertListEqual(list(to_list_view(self.triples_N11, node=False, path=True, value=False)), + self.assertListEqual(list(to_list_view(self.triples_N11, node=True, path=False, value=True)), [('ent#1234', 123), ('ent#4321', 321)]) - self.assertListEqual(list(to_list_view(self.triples_N1U, node=False, path=True, value=False)), + self.assertListEqual(list(to_list_view(self.triples_N1U, node=True, path=False, value=True)), [('ent#1234', 'tag#1234'), ('ent#1234', 'tag#5678'), ('ent#4321', 'tag#4321')]) - self.assertListEqual(list(to_list_view(self.triples_NM1, node=False, path=True, value=False)), + self.assertListEqual(list(to_list_view(self.triples_NM1, node=True, path=False, value=True)), [('ent#1234', 123), ('ent#1234', '2010-01-02'), ('ent#4321', 321), ('ent#4321', '2022-02-22')]) - self.assertListEqual(list(to_list_view(self.triples_NMU, node=False, path=True, value=False)), + self.assertListEqual(list(to_list_view(self.triples_NMU, node=True, path=False, value=True)), [('ent#1234', 123), ('ent#1234', 'tag#1234'), ('ent#1234', 'tag#5678'), ('ent#4321', 321), ('ent#4321', '2022-02-22')]) def test_agg_node_path(self): - self.assertListEqual(list(to_list_view(self.triples_111, node=True, path=True, value=False)), + self.assertListEqual(list(to_list_view(self.triples_111, node=False, path=False, value=True)), [123]) - self.assertListEqual(list(to_list_view(self.triples_11U, node=True, path=True, value=False)), + self.assertListEqual(list(to_list_view(self.triples_11U, node=False, path=False, value=True)), ['tag#1234', 'tag#5678']) - self.assertListEqual(list(to_list_view(self.triples_1M1, node=True, path=True, value=False)), + self.assertListEqual(list(to_list_view(self.triples_1M1, node=False, path=False, value=True)), [123, '2010-01-02']) - self.assertListEqual(list(to_list_view(self.triples_1MU, node=True, path=True, value=False)), + self.assertListEqual(list(to_list_view(self.triples_1MU, node=False, path=False, value=True)), [123, 'tag#1234', 'tag#5678']) - self.assertListEqual(list(to_list_view(self.triples_N11, node=True, path=True, value=False)), + self.assertListEqual(list(to_list_view(self.triples_N11, node=False, path=False, value=True)), [123, 321]) - self.assertListEqual(list(to_list_view(self.triples_N1U, node=True, path=True, value=False)), + self.assertListEqual(list(to_list_view(self.triples_N1U, node=False, path=False, value=True)), ['tag#1234', 'tag#5678', 'tag#4321']) - self.assertListEqual(list(to_list_view(self.triples_NM1, node=True, path=True, value=False)), + self.assertListEqual(list(to_list_view(self.triples_NM1, node=False, path=False, value=True)), [123, '2010-01-02', 321, '2022-02-22']) - self.assertListEqual(list(to_list_view(self.triples_NMU, node=True, path=True, value=False)), + self.assertListEqual(list(to_list_view(self.triples_NMU, node=False, path=False, value=True)), [123, 'tag#1234', 'tag#5678', 321, '2022-02-22']) def test_agg_value(self): # value flag has no effect - self.assertListEqual(list(to_list_view(self.triples_111, node=False, path=False, value=False)), self.triples_111) - self.assertListEqual(list(to_list_view(self.triples_11U, node=False, path=False, value=False)), self.triples_11U) - self.assertListEqual(list(to_list_view(self.triples_1M1, node=False, path=False, value=False)), self.triples_1M1) - self.assertListEqual(list(to_list_view(self.triples_1MU, node=False, path=False, value=False)), self.triples_1MU) - self.assertListEqual(list(to_list_view(self.triples_N11, node=False, path=False, value=False)), self.triples_N11) - self.assertListEqual(list(to_list_view(self.triples_N1U, node=False, path=False, value=False)), self.triples_N1U) - self.assertListEqual(list(to_list_view(self.triples_NM1, node=False, path=False, value=False)), self.triples_NM1) - self.assertListEqual(list(to_list_view(self.triples_NMU, node=False, path=False, value=False)), self.triples_NMU) + self.assertListEqual(list(to_list_view(self.triples_111, node=True, path=True, value=True)), self.triples_111) + self.assertListEqual(list(to_list_view(self.triples_11U, node=True, path=True, value=True)), self.triples_11U) + self.assertListEqual(list(to_list_view(self.triples_1M1, node=True, path=True, value=True)), self.triples_1M1) + self.assertListEqual(list(to_list_view(self.triples_1MU, node=True, path=True, value=True)), self.triples_1MU) + self.assertListEqual(list(to_list_view(self.triples_N11, node=True, path=True, value=True)), self.triples_N11) + self.assertListEqual(list(to_list_view(self.triples_N1U, node=True, path=True, value=True)), self.triples_N1U) + self.assertListEqual(list(to_list_view(self.triples_NM1, node=True, path=True, value=True)), self.triples_NM1) + self.assertListEqual(list(to_list_view(self.triples_NMU, node=True, path=True, value=True)), self.triples_NMU) def test_agg_node_value(self): # value flag has no effect -> same test as test_agg_node - self.assertListEqual(list(to_list_view(self.triples_111, node=True, path=False, value=True)), + self.assertListEqual(list(to_list_view(self.triples_111, node=False, path=True, value=False)), [(ns.bse.iso, 123)]) - self.assertListEqual(list(to_list_view(self.triples_11U, node=True, path=False, value=True)), + self.assertListEqual(list(to_list_view(self.triples_11U, node=False, path=True, value=False)), [(ns.bse.tag, 'tag#1234'), (ns.bse.tag, 'tag#5678')]) - self.assertListEqual(list(to_list_view(self.triples_1M1, node=True, path=False, value=True)), + self.assertListEqual(list(to_list_view(self.triples_1M1, node=False, path=True, value=False)), [(ns.bse.iso, 123), (ns.bse.t_created, '2010-01-02')]) - self.assertListEqual(list(to_list_view(self.triples_1MU, node=True, path=False, value=True)), + self.assertListEqual(list(to_list_view(self.triples_1MU, node=False, path=True, value=False)), [(ns.bse.iso, 123), (ns.bse.tag, 'tag#1234'), (ns.bse.tag, 'tag#5678')]) - self.assertListEqual(list(to_list_view(self.triples_N11, node=True, path=False, value=True)), + self.assertListEqual(list(to_list_view(self.triples_N11, node=False, path=True, value=False)), [(ns.bse.iso, 123), (ns.bse.iso, 321)]) - self.assertListEqual(list(to_list_view(self.triples_N1U, node=True, path=False, value=True)), + self.assertListEqual(list(to_list_view(self.triples_N1U, node=False, path=True, value=False)), [(ns.bse.tag, 'tag#1234'), (ns.bse.tag, 'tag#5678'), (ns.bse.tag, 'tag#4321')]) - self.assertListEqual(list(to_list_view(self.triples_NM1, node=True, path=False, value=True)), + self.assertListEqual(list(to_list_view(self.triples_NM1, node=False, path=True, value=False)), [(ns.bse.iso, 123), (ns.bse.t_created, '2010-01-02'), (ns.bse.iso, 321), (ns.bse.t_created, '2022-02-22')]) - self.assertListEqual(list(to_list_view(self.triples_NMU, node=True, path=False, value=True)), + self.assertListEqual(list(to_list_view(self.triples_NMU, node=False, path=True, value=False)), [(ns.bse.iso, 123), (ns.bse.tag, 'tag#1234'), (ns.bse.tag, 'tag#5678'), (ns.bse.iso, 321), (ns.bse.t_created, '2022-02-22')]) def test_agg_path_value(self): # value flag has no effect -> same test as test_agg_path - self.assertListEqual(list(to_list_view(self.triples_111, node=False, path=True, value=True)), + self.assertListEqual(list(to_list_view(self.triples_111, node=True, path=False, value=False)), [('ent#1234', 123)]) - self.assertListEqual(list(to_list_view(self.triples_11U, node=False, path=True, value=True)), + self.assertListEqual(list(to_list_view(self.triples_11U, node=True, path=False, value=False)), [('ent#1234', 'tag#1234'), ('ent#1234', 'tag#5678')]) - self.assertListEqual(list(to_list_view(self.triples_1M1, node=False, path=True, value=True)), + self.assertListEqual(list(to_list_view(self.triples_1M1, node=True, path=False, value=False)), [('ent#1234', 123), ('ent#1234', '2010-01-02')]) - self.assertListEqual(list(to_list_view(self.triples_1MU, node=False, path=True, value=True)), + self.assertListEqual(list(to_list_view(self.triples_1MU, node=True, path=False, value=False)), [('ent#1234', 123), ('ent#1234', 'tag#1234'), ('ent#1234', 'tag#5678')]) - self.assertListEqual(list(to_list_view(self.triples_N11, node=False, path=True, value=True)), + self.assertListEqual(list(to_list_view(self.triples_N11, node=True, path=False, value=False)), [('ent#1234', 123), ('ent#4321', 321)]) - self.assertListEqual(list(to_list_view(self.triples_N1U, node=False, path=True, value=True)), + self.assertListEqual(list(to_list_view(self.triples_N1U, node=True, path=False, value=False)), [('ent#1234', 'tag#1234'), ('ent#1234', 'tag#5678'), ('ent#4321', 'tag#4321')]) - self.assertListEqual(list(to_list_view(self.triples_NM1, node=False, path=True, value=True)), + self.assertListEqual(list(to_list_view(self.triples_NM1, node=True, path=False, value=False)), [('ent#1234', 123), ('ent#1234', '2010-01-02'), ('ent#4321', 321), ('ent#4321', '2022-02-22')]) - self.assertListEqual(list(to_list_view(self.triples_NMU, node=False, path=True, value=True)), + self.assertListEqual(list(to_list_view(self.triples_NMU, node=True, path=False, value=False)), [('ent#1234', 123), ('ent#1234', 'tag#1234'), ('ent#1234', 'tag#5678'), ('ent#4321', 321), ('ent#4321', '2022-02-22')]) def test_agg_all(self): # value flag has no effect -> same test as test_agg_node_path - self.assertListEqual(list(to_list_view(self.triples_111, node=True, path=True, value=True)), + self.assertListEqual(list(to_list_view(self.triples_111, node=False, path=False, value=False)), [123]) - self.assertListEqual(list(to_list_view(self.triples_11U, node=True, path=True, value=True)), + self.assertListEqual(list(to_list_view(self.triples_11U, node=False, path=False, value=False)), ['tag#1234', 'tag#5678']) - self.assertListEqual(list(to_list_view(self.triples_1M1, node=True, path=True, value=True)), + self.assertListEqual(list(to_list_view(self.triples_1M1, node=False, path=False, value=False)), [123, '2010-01-02']) - self.assertListEqual(list(to_list_view(self.triples_1MU, node=True, path=True, value=True)), + self.assertListEqual(list(to_list_view(self.triples_1MU, node=False, path=False, value=False)), [123, 'tag#1234', 'tag#5678']) - self.assertListEqual(list(to_list_view(self.triples_N11, node=True, path=True, value=True)), + self.assertListEqual(list(to_list_view(self.triples_N11, node=False, path=False, value=False)), [123, 321]) - self.assertListEqual(list(to_list_view(self.triples_N1U, node=True, path=True, value=True)), + self.assertListEqual(list(to_list_view(self.triples_N1U, node=False, path=False, value=False)), ['tag#1234', 'tag#5678', 'tag#4321']) - self.assertListEqual(list(to_list_view(self.triples_NM1, node=True, path=True, value=True)), + self.assertListEqual(list(to_list_view(self.triples_NM1, node=False, path=False, value=False)), [123, '2010-01-02', 321, '2022-02-22']) - self.assertListEqual(list(to_list_view(self.triples_NMU, node=True, path=True, value=True)), + self.assertListEqual(list(to_list_view(self.triples_NMU, node=False, path=False, value=False)), [123, 'tag#1234', 'tag#5678', 321, '2022-02-22']) @@ -219,170 +219,210 @@ class TestDictView(unittest.TestCase): def test_errounous_call(self): # return set instead of value - self.assertSetEqual(to_dict_view(self.triples_111, one_node=False, one_path=True, unique_paths=self.unique_paths, node=True, path=True, value=True), + self.assertSetEqual(to_dict_view(self.triples_111, one_node=False, one_path=True, unique_paths=self.unique_paths, node=False, path=False, value=False), {123}) - self.assertSetEqual(to_dict_view(self.triples_111, one_node=True, one_path=False, unique_paths=self.unique_paths, node=True, path=True, value=True), + self.assertSetEqual(to_dict_view(self.triples_111, one_node=True, one_path=False, unique_paths=self.unique_paths, node=False, path=False, value=False), {123}) # one_node mismatch: return set of values instead of value - self.assertDictEqual(to_dict_view(self.triples_111, one_node=False, one_path=True, unique_paths=self.unique_paths, node=True, path=False, value=True), + self.assertDictEqual(to_dict_view(self.triples_111, one_node=False, one_path=True, unique_paths=self.unique_paths, node=False, path=True, value=False), {ns.bse.iso: {123}}) # one_path mismatch: return set of values instead of value - self.assertDictEqual(to_dict_view(self.triples_111, one_node=True, one_path=False, unique_paths=self.unique_paths, node=False, path=True, value=True), + self.assertDictEqual(to_dict_view(self.triples_111, one_node=True, one_path=False, unique_paths=self.unique_paths, node=True, path=False, value=False), {'ent#1234': {123}}) # unique_paths mismatch: return set of values instead of value - self.assertSetEqual(to_dict_view(self.triples_111, one_node=True, one_path=False, unique_paths={}, node=True, path=True, value=True), + self.assertSetEqual(to_dict_view(self.triples_111, one_node=True, one_path=False, unique_paths={}, node=False, path=False, value=False), {123}) - self.assertDictEqual(to_dict_view(self.triples_111, one_node=True, one_path=False, unique_paths={}, node=True, path=False, value=True), + self.assertDictEqual(to_dict_view(self.triples_111, one_node=True, one_path=False, unique_paths={}, node=False, path=True, value=False), {ns.bse.iso: {123}}) - self.assertDictEqual(to_dict_view(self.triples_111, one_node=True, one_path=False, unique_paths={}, node=False, path=True, value=True), + self.assertDictEqual(to_dict_view(self.triples_111, one_node=True, one_path=False, unique_paths={}, node=True, path=False, value=False), {'ent#1234': {123}}) - self.assertDictEqual(to_dict_view(self.triples_111, one_node=True, one_path=False, unique_paths={}, node=False, path=False, value=True), + self.assertDictEqual(to_dict_view(self.triples_111, one_node=True, one_path=False, unique_paths={}, node=True, path=True, value=False), {'ent#1234': {ns.bse.iso: {123}}}) def test_agg_none(self): - self.assertDictEqual(to_dict_view(self.triples_111, one_node=True, one_path=True, unique_paths=self.unique_paths, node=False, path=False, value=False), + self.assertDictEqual(to_dict_view(self.triples_111, one_node=True, one_path=True, unique_paths=self.unique_paths, node=True, path=True, value=True), {'ent#1234': {ns.bse.iso: {123}}}) - self.assertDictEqual(to_dict_view(self.triples_11U, one_node=True, one_path=True, unique_paths=self.unique_paths, node=False, path=False, value=False), + self.assertDictEqual(to_dict_view(self.triples_11U, one_node=True, one_path=True, unique_paths=self.unique_paths, node=True, path=True, value=True), {'ent#1234': {ns.bse.tag: {'tag#1234', 'tag#5678'}}}) - self.assertDictEqual(to_dict_view(self.triples_1M1, one_node=True, one_path=False, unique_paths=self.unique_paths, node=False, path=False, value=False), + self.assertDictEqual(to_dict_view(self.triples_1M1, one_node=True, one_path=False, unique_paths=self.unique_paths, node=True, path=True, value=True), {'ent#1234': {ns.bse.iso: {123}, ns.bse.t_created: {'2010-01-02'}}}) - self.assertDictEqual(to_dict_view(self.triples_1MU, one_node=True, one_path=False, unique_paths=self.unique_paths, node=False, path=False, value=False), + self.assertDictEqual(to_dict_view(self.triples_1MU, one_node=True, one_path=False, unique_paths=self.unique_paths, node=True, path=True, value=True), {'ent#1234': {ns.bse.iso: {123}, ns.bse.tag: {'tag#1234', 'tag#5678'}}}) - self.assertDictEqual(to_dict_view(self.triples_N11, one_node=False, one_path=True, unique_paths=self.unique_paths, node=False, path=False, value=False), + self.assertDictEqual(to_dict_view(self.triples_N11, one_node=False, one_path=True, unique_paths=self.unique_paths, node=True, path=True, value=True), {'ent#1234': {ns.bse.iso: {123}}, 'ent#4321': {ns.bse.iso: {321}}}) - self.assertDictEqual(to_dict_view(self.triples_N1U, one_node=False, one_path=True, unique_paths=self.unique_paths, node=False, path=False, value=False), + self.assertDictEqual(to_dict_view(self.triples_N1U, one_node=False, one_path=True, unique_paths=self.unique_paths, node=True, path=True, value=True), {'ent#1234': {ns.bse.tag: {'tag#1234', 'tag#5678'}}, 'ent#4321': {ns.bse.tag: {'tag#4321'}}}) - self.assertDictEqual(to_dict_view(self.triples_NM1, one_node=False, one_path=False, unique_paths=self.unique_paths, node=False, path=False, value=False), + self.assertDictEqual(to_dict_view(self.triples_NM1, one_node=False, one_path=False, unique_paths=self.unique_paths, node=True, path=True, value=True), {'ent#1234': {ns.bse.iso: {123}, ns.bse.t_created: {'2010-01-02'}}, 'ent#4321': {ns.bse.iso: {321}, ns.bse.t_created: {'2022-02-22'}}}) - self.assertDictEqual(to_dict_view(self.triples_NMU, one_node=False, one_path=False, unique_paths=self.unique_paths, node=False, path=False, value=False), + self.assertDictEqual(to_dict_view(self.triples_NMU, one_node=False, one_path=False, unique_paths=self.unique_paths, node=True, path=True, value=True), {'ent#1234': {ns.bse.iso: {123}, ns.bse.tag: {'tag#1234', 'tag#5678'}}, 'ent#4321': {ns.bse.iso: {321}, ns.bse.t_created: {'2022-02-22'}}}) + # empty + self.assertDictEqual(to_dict_view([], one_node=True, one_path=True, unique_paths=self.unique_paths, node=True, path=True, value=True), {}) + self.assertDictEqual(to_dict_view([], one_node=True, one_path=False, unique_paths=self.unique_paths, node=True, path=True, value=True), {}) + self.assertDictEqual(to_dict_view([], one_node=False, one_path=True, unique_paths=self.unique_paths, node=True, path=True, value=True), {}) + self.assertDictEqual(to_dict_view([], one_node=False, one_path=False, unique_paths=self.unique_paths, node=True, path=True, value=True), {}) def test_agg_node(self): - self.assertDictEqual(to_dict_view(self.triples_111, one_node=True, one_path=True, unique_paths=self.unique_paths, node=True, path=False, value=False), + self.assertDictEqual(to_dict_view(self.triples_111, one_node=True, one_path=True, unique_paths=self.unique_paths, node=False, path=True, value=True), {ns.bse.iso: {123}}) - self.assertDictEqual(to_dict_view(self.triples_11U, one_node=True, one_path=True, unique_paths=self.unique_paths, node=True, path=False, value=False), + self.assertDictEqual(to_dict_view(self.triples_11U, one_node=True, one_path=True, unique_paths=self.unique_paths, node=False, path=True, value=True), {ns.bse.tag: {'tag#1234', 'tag#5678'}}) - self.assertDictEqual(to_dict_view(self.triples_1M1, one_node=True, one_path=False, unique_paths=self.unique_paths, node=True, path=False, value=False), + self.assertDictEqual(to_dict_view(self.triples_1M1, one_node=True, one_path=False, unique_paths=self.unique_paths, node=False, path=True, value=True), {ns.bse.iso: {123}, ns.bse.t_created: {'2010-01-02'}}) - self.assertDictEqual(to_dict_view(self.triples_1MU, one_node=True, one_path=False, unique_paths=self.unique_paths, node=True, path=False, value=False), + self.assertDictEqual(to_dict_view(self.triples_1MU, one_node=True, one_path=False, unique_paths=self.unique_paths, node=False, path=True, value=True), {ns.bse.iso: {123}, ns.bse.tag: {'tag#1234', 'tag#5678'}}) - self.assertDictEqual(to_dict_view(self.triples_N11, one_node=False, one_path=True, unique_paths=self.unique_paths, node=True, path=False, value=False), + self.assertDictEqual(to_dict_view(self.triples_N11, one_node=False, one_path=True, unique_paths=self.unique_paths, node=False, path=True, value=True), {ns.bse.iso: {123, 321}}) - self.assertDictEqual(to_dict_view(self.triples_N1U, one_node=False, one_path=True, unique_paths=self.unique_paths, node=True, path=False, value=False), + self.assertDictEqual(to_dict_view(self.triples_N1U, one_node=False, one_path=True, unique_paths=self.unique_paths, node=False, path=True, value=True), {ns.bse.tag: {'tag#1234', 'tag#5678', 'tag#4321'}}) - self.assertDictEqual(to_dict_view(self.triples_NM1, one_node=False, one_path=False, unique_paths=self.unique_paths, node=True, path=False, value=False), + self.assertDictEqual(to_dict_view(self.triples_NM1, one_node=False, one_path=False, unique_paths=self.unique_paths, node=False, path=True, value=True), {ns.bse.iso: {123, 321}, ns.bse.t_created: {'2010-01-02', '2022-02-22'}}) - self.assertDictEqual(to_dict_view(self.triples_NMU, one_node=False, one_path=False, unique_paths=self.unique_paths, node=True, path=False, value=False), + self.assertDictEqual(to_dict_view(self.triples_NMU, one_node=False, one_path=False, unique_paths=self.unique_paths, node=False, path=True, value=True), {ns.bse.iso: {123, 321}, ns.bse.tag: {'tag#1234', 'tag#5678'}, ns.bse.t_created: {'2022-02-22'}}) + # empty + self.assertDictEqual(to_dict_view([], one_node=True, one_path=True, unique_paths=self.unique_paths, node=False, path=True, value=True), {}) + self.assertDictEqual(to_dict_view([], one_node=True, one_path=False, unique_paths=self.unique_paths, node=False, path=True, value=True), {}) + self.assertDictEqual(to_dict_view([], one_node=False, one_path=True, unique_paths=self.unique_paths, node=False, path=True, value=True), {}) + self.assertDictEqual(to_dict_view([], one_node=False, one_path=False, unique_paths=self.unique_paths, node=False, path=True, value=True), {}) def test_agg_path(self): - self.assertDictEqual(to_dict_view(self.triples_111, one_node=True, one_path=True, unique_paths=self.unique_paths, node=False, path=True, value=False), + self.assertDictEqual(to_dict_view(self.triples_111, one_node=True, one_path=True, unique_paths=self.unique_paths, node=True, path=False, value=True), {'ent#1234': {123}}) - self.assertDictEqual(to_dict_view(self.triples_11U, one_node=True, one_path=True, unique_paths=self.unique_paths, node=False, path=True, value=False), + self.assertDictEqual(to_dict_view(self.triples_11U, one_node=True, one_path=True, unique_paths=self.unique_paths, node=True, path=False, value=True), {'ent#1234': {'tag#1234', 'tag#5678'}}) - self.assertDictEqual(to_dict_view(self.triples_1M1, one_node=True, one_path=False, unique_paths=self.unique_paths, node=False, path=True, value=False), + self.assertDictEqual(to_dict_view(self.triples_1M1, one_node=True, one_path=False, unique_paths=self.unique_paths, node=True, path=False, value=True), {'ent#1234': {123, '2010-01-02'}}) - self.assertDictEqual(to_dict_view(self.triples_1MU, one_node=True, one_path=False, unique_paths=self.unique_paths, node=False, path=True, value=False), + self.assertDictEqual(to_dict_view(self.triples_1MU, one_node=True, one_path=False, unique_paths=self.unique_paths, node=True, path=False, value=True), {'ent#1234': {123, 'tag#1234', 'tag#5678'}}) - self.assertDictEqual(to_dict_view(self.triples_N11, one_node=False, one_path=True, unique_paths=self.unique_paths, node=False, path=True, value=False), + self.assertDictEqual(to_dict_view(self.triples_N11, one_node=False, one_path=True, unique_paths=self.unique_paths, node=True, path=False, value=True), {'ent#1234': {123}, 'ent#4321': {321}}) - self.assertDictEqual(to_dict_view(self.triples_N1U, one_node=False, one_path=True, unique_paths=self.unique_paths, node=False, path=True, value=False), + self.assertDictEqual(to_dict_view(self.triples_N1U, one_node=False, one_path=True, unique_paths=self.unique_paths, node=True, path=False, value=True), {'ent#1234': {'tag#1234', 'tag#5678'}, 'ent#4321': {'tag#4321'}}) - self.assertDictEqual(to_dict_view(self.triples_NM1, one_node=False, one_path=False, unique_paths=self.unique_paths, node=False, path=True, value=False), + self.assertDictEqual(to_dict_view(self.triples_NM1, one_node=False, one_path=False, unique_paths=self.unique_paths, node=True, path=False, value=True), {'ent#1234': {123, '2010-01-02'}, 'ent#4321': {321, '2022-02-22'}}) - self.assertDictEqual(to_dict_view(self.triples_NMU, one_node=False, one_path=False, unique_paths=self.unique_paths, node=False, path=True, value=False), + self.assertDictEqual(to_dict_view(self.triples_NMU, one_node=False, one_path=False, unique_paths=self.unique_paths, node=True, path=False, value=True), {'ent#1234': {123, 'tag#1234', 'tag#5678'}, 'ent#4321': {321, '2022-02-22'}}) + # empty + self.assertDictEqual(to_dict_view([], one_node=True, one_path=True, unique_paths=self.unique_paths, node=True, path=False, value=True), {}) + self.assertDictEqual(to_dict_view([], one_node=True, one_path=False, unique_paths=self.unique_paths, node=True, path=False, value=True), {}) + self.assertDictEqual(to_dict_view([], one_node=False, one_path=True, unique_paths=self.unique_paths, node=True, path=False, value=True), {}) + self.assertDictEqual(to_dict_view([], one_node=False, one_path=False, unique_paths=self.unique_paths, node=True, path=False, value=True), {}) def test_agg_node_path(self): - self.assertSetEqual(to_dict_view(self.triples_111, one_node=True, one_path=True, unique_paths=self.unique_paths, node=True, path=True, value=False), + self.assertSetEqual(to_dict_view(self.triples_111, one_node=True, one_path=True, unique_paths=self.unique_paths, node=False, path=False, value=True), {123}) - self.assertSetEqual(to_dict_view(self.triples_11U, one_node=True, one_path=True, unique_paths=self.unique_paths, node=True, path=True, value=False), + self.assertSetEqual(to_dict_view(self.triples_11U, one_node=True, one_path=True, unique_paths=self.unique_paths, node=False, path=False, value=True), {'tag#1234', 'tag#5678'}) - self.assertSetEqual(to_dict_view(self.triples_1M1, one_node=True, one_path=False, unique_paths=self.unique_paths, node=True, path=True, value=False), + self.assertSetEqual(to_dict_view(self.triples_1M1, one_node=True, one_path=False, unique_paths=self.unique_paths, node=False, path=False, value=True), {123, '2010-01-02'}) - self.assertSetEqual(to_dict_view(self.triples_1MU, one_node=True, one_path=False, unique_paths=self.unique_paths, node=True, path=True, value=False), + self.assertSetEqual(to_dict_view(self.triples_1MU, one_node=True, one_path=False, unique_paths=self.unique_paths, node=False, path=False, value=True), {123, 'tag#1234', 'tag#5678'}) - self.assertSetEqual(to_dict_view(self.triples_N11, one_node=False, one_path=True, unique_paths=self.unique_paths, node=True, path=True, value=False), + self.assertSetEqual(to_dict_view(self.triples_N11, one_node=False, one_path=True, unique_paths=self.unique_paths, node=False, path=False, value=True), {123, 321}) - self.assertSetEqual(to_dict_view(self.triples_N1U, one_node=False, one_path=True, unique_paths=self.unique_paths, node=True, path=True, value=False), + self.assertSetEqual(to_dict_view(self.triples_N1U, one_node=False, one_path=True, unique_paths=self.unique_paths, node=False, path=False, value=True), {'tag#1234', 'tag#5678', 'tag#4321'}) - self.assertSetEqual(to_dict_view(self.triples_NM1, one_node=False, one_path=False, unique_paths=self.unique_paths, node=True, path=True, value=False), + self.assertSetEqual(to_dict_view(self.triples_NM1, one_node=False, one_path=False, unique_paths=self.unique_paths, node=False, path=False, value=True), {123, '2010-01-02', 321, '2022-02-22'}) - self.assertSetEqual(to_dict_view(self.triples_NMU, one_node=False, one_path=False, unique_paths=self.unique_paths, node=True, path=True, value=False), + self.assertSetEqual(to_dict_view(self.triples_NMU, one_node=False, one_path=False, unique_paths=self.unique_paths, node=False, path=False, value=True), {123, 'tag#1234', 'tag#5678', 321, '2022-02-22'}) + # empty + self.assertSetEqual(to_dict_view([], one_node=True, one_path=True, unique_paths=self.unique_paths, node=False, path=False, value=True), set()) + self.assertSetEqual(to_dict_view([], one_node=True, one_path=False, unique_paths=self.unique_paths, node=False, path=False, value=True), set()) + self.assertSetEqual(to_dict_view([], one_node=False, one_path=True, unique_paths=self.unique_paths, node=False, path=False, value=True), set()) + self.assertSetEqual(to_dict_view([], one_node=False, one_path=False, unique_paths=self.unique_paths, node=False, path=False, value=True), set()) def test_agg_value(self): - self.assertDictEqual(to_dict_view(self.triples_111, one_node=True, one_path=True, unique_paths=self.unique_paths, node=False, path=False, value=True), + self.assertDictEqual(to_dict_view(self.triples_111, one_node=True, one_path=True, unique_paths=self.unique_paths, node=True, path=True, value=False), {'ent#1234': {ns.bse.iso: 123}}) - self.assertDictEqual(to_dict_view(self.triples_11U, one_node=True, one_path=True, unique_paths=self.unique_paths, node=False, path=False, value=True), + self.assertDictEqual(to_dict_view(self.triples_11U, one_node=True, one_path=True, unique_paths=self.unique_paths, node=True, path=True, value=False), {'ent#1234': {ns.bse.tag: {'tag#1234', 'tag#5678'}}}) - self.assertDictEqual(to_dict_view(self.triples_1M1, one_node=True, one_path=False, unique_paths=self.unique_paths, node=False, path=False, value=True), + self.assertDictEqual(to_dict_view(self.triples_1M1, one_node=True, one_path=False, unique_paths=self.unique_paths, node=True, path=True, value=False), {'ent#1234': {ns.bse.iso: 123, ns.bse.t_created: '2010-01-02'}}) - self.assertDictEqual(to_dict_view(self.triples_1MU, one_node=True, one_path=False, unique_paths=self.unique_paths, node=False, path=False, value=True), + self.assertDictEqual(to_dict_view(self.triples_1MU, one_node=True, one_path=False, unique_paths=self.unique_paths, node=True, path=True, value=False), {'ent#1234': {ns.bse.iso: 123, ns.bse.tag: {'tag#1234', 'tag#5678'}}}) - self.assertDictEqual(to_dict_view(self.triples_N11, one_node=False, one_path=True, unique_paths=self.unique_paths, node=False, path=False, value=True), + self.assertDictEqual(to_dict_view(self.triples_N11, one_node=False, one_path=True, unique_paths=self.unique_paths, node=True, path=True, value=False), {'ent#1234': {ns.bse.iso: 123}, 'ent#4321': {ns.bse.iso: 321}}) - self.assertDictEqual(to_dict_view(self.triples_N1U, one_node=False, one_path=True, unique_paths=self.unique_paths, node=False, path=False, value=True), + self.assertDictEqual(to_dict_view(self.triples_N1U, one_node=False, one_path=True, unique_paths=self.unique_paths, node=True, path=True, value=False), {'ent#1234': {ns.bse.tag: {'tag#1234', 'tag#5678'}}, 'ent#4321': {ns.bse.tag: {'tag#4321'}}}) - self.assertDictEqual(to_dict_view(self.triples_NM1, one_node=False, one_path=False, unique_paths=self.unique_paths, node=False, path=False, value=True), + self.assertDictEqual(to_dict_view(self.triples_NM1, one_node=False, one_path=False, unique_paths=self.unique_paths, node=True, path=True, value=False), {'ent#1234': {ns.bse.iso: 123, ns.bse.t_created: '2010-01-02'}, 'ent#4321': {ns.bse.iso: 321, ns.bse.t_created: '2022-02-22'}}) - self.assertDictEqual(to_dict_view(self.triples_NMU, one_node=False, one_path=False, unique_paths=self.unique_paths, node=False, path=False, value=True), + self.assertDictEqual(to_dict_view(self.triples_NMU, one_node=False, one_path=False, unique_paths=self.unique_paths, node=True, path=True, value=False), {'ent#1234': {ns.bse.iso: 123, ns.bse.tag: {'tag#1234', 'tag#5678'}}, 'ent#4321': {ns.bse.iso: 321, ns.bse.t_created: '2022-02-22'}}) + # empty + self.assertDictEqual(to_dict_view([], one_node=True, one_path=True, unique_paths=self.unique_paths, node=True, path=True, value=False), {}) + self.assertDictEqual(to_dict_view([], one_node=True, one_path=False, unique_paths=self.unique_paths, node=True, path=True, value=False), {}) + self.assertDictEqual(to_dict_view([], one_node=False, one_path=True, unique_paths=self.unique_paths, node=True, path=True, value=False), {}) + self.assertDictEqual(to_dict_view([], one_node=False, one_path=False, unique_paths=self.unique_paths, node=True, path=True, value=False), {}) def test_agg_node_value(self): - self.assertDictEqual(to_dict_view(self.triples_111, one_node=True, one_path=True, unique_paths=self.unique_paths, node=True, path=False, value=True), + self.assertDictEqual(to_dict_view(self.triples_111, one_node=True, one_path=True, unique_paths=self.unique_paths, node=False, path=True, value=False), {ns.bse.iso: 123}) - self.assertDictEqual(to_dict_view(self.triples_11U, one_node=True, one_path=True, unique_paths=self.unique_paths, node=True, path=False, value=True), + self.assertDictEqual(to_dict_view(self.triples_11U, one_node=True, one_path=True, unique_paths=self.unique_paths, node=False, path=True, value=False), {ns.bse.tag: {'tag#1234', 'tag#5678'}}) - self.assertDictEqual(to_dict_view(self.triples_1M1, one_node=True, one_path=False, unique_paths=self.unique_paths, node=True, path=False, value=True), + self.assertDictEqual(to_dict_view(self.triples_1M1, one_node=True, one_path=False, unique_paths=self.unique_paths, node=False, path=True, value=False), {ns.bse.iso: 123, ns.bse.t_created: '2010-01-02'}) - self.assertDictEqual(to_dict_view(self.triples_1MU, one_node=True, one_path=False, unique_paths=self.unique_paths, node=True, path=False, value=True), + self.assertDictEqual(to_dict_view(self.triples_1MU, one_node=True, one_path=False, unique_paths=self.unique_paths, node=False, path=True, value=False), {ns.bse.iso: 123, ns.bse.tag: {'tag#1234', 'tag#5678'}}) - self.assertDictEqual(to_dict_view(self.triples_N11, one_node=False, one_path=True, unique_paths=self.unique_paths, node=True, path=False, value=True), + self.assertDictEqual(to_dict_view(self.triples_N11, one_node=False, one_path=True, unique_paths=self.unique_paths, node=False, path=True, value=False), {ns.bse.iso: {123, 321}}) - self.assertDictEqual(to_dict_view(self.triples_N1U, one_node=False, one_path=True, unique_paths=self.unique_paths, node=True, path=False, value=True), + self.assertDictEqual(to_dict_view(self.triples_N1U, one_node=False, one_path=True, unique_paths=self.unique_paths, node=False, path=True, value=False), {ns.bse.tag: {'tag#1234', 'tag#5678', 'tag#4321'}}) - self.assertDictEqual(to_dict_view(self.triples_NM1, one_node=False, one_path=False, unique_paths=self.unique_paths, node=True, path=False, value=True), + self.assertDictEqual(to_dict_view(self.triples_NM1, one_node=False, one_path=False, unique_paths=self.unique_paths, node=False, path=True, value=False), {ns.bse.iso: {123, 321}, ns.bse.t_created: {'2010-01-02', '2022-02-22'}}) - self.assertDictEqual(to_dict_view(self.triples_NMU, one_node=False, one_path=False, unique_paths=self.unique_paths, node=True, path=False, value=True), + self.assertDictEqual(to_dict_view(self.triples_NMU, one_node=False, one_path=False, unique_paths=self.unique_paths, node=False, path=True, value=False), {ns.bse.iso: {123, 321}, ns.bse.tag: {'tag#1234', 'tag#5678'}, ns.bse.t_created: {'2022-02-22'}}) + # empty + self.assertDictEqual(to_dict_view([], one_node=True, one_path=True, unique_paths=self.unique_paths, node=False, path=True, value=False), {}) + self.assertDictEqual(to_dict_view([], one_node=True, one_path=False, unique_paths=self.unique_paths, node=False, path=True, value=False), {}) + self.assertDictEqual(to_dict_view([], one_node=False, one_path=True, unique_paths=self.unique_paths, node=False, path=True, value=False), {}) + self.assertDictEqual(to_dict_view([], one_node=False, one_path=False, unique_paths=self.unique_paths, node=False, path=True, value=False), {}) def test_agg_path_value(self): - self.assertDictEqual(to_dict_view(self.triples_111, one_node=True, one_path=True, unique_paths=self.unique_paths, node=False, path=True, value=True), + self.assertDictEqual(to_dict_view(self.triples_111, one_node=True, one_path=True, unique_paths=self.unique_paths, node=True, path=False, value=False), {'ent#1234': 123}) - self.assertDictEqual(to_dict_view(self.triples_11U, one_node=True, one_path=True, unique_paths=self.unique_paths, node=False, path=True, value=True), + self.assertDictEqual(to_dict_view(self.triples_11U, one_node=True, one_path=True, unique_paths=self.unique_paths, node=True, path=False, value=False), {'ent#1234': {'tag#1234', 'tag#5678'}}) - self.assertDictEqual(to_dict_view(self.triples_1M1, one_node=True, one_path=False, unique_paths=self.unique_paths, node=False, path=True, value=True), + self.assertDictEqual(to_dict_view(self.triples_1M1, one_node=True, one_path=False, unique_paths=self.unique_paths, node=True, path=False, value=False), {'ent#1234': {123, '2010-01-02'}}) - self.assertDictEqual(to_dict_view(self.triples_1MU, one_node=True, one_path=False, unique_paths=self.unique_paths, node=False, path=True, value=True), + self.assertDictEqual(to_dict_view(self.triples_1MU, one_node=True, one_path=False, unique_paths=self.unique_paths, node=True, path=False, value=False), {'ent#1234': {123, 'tag#1234', 'tag#5678'}}) - self.assertDictEqual(to_dict_view(self.triples_N11, one_node=False, one_path=True, unique_paths=self.unique_paths, node=False, path=True, value=True), + self.assertDictEqual(to_dict_view(self.triples_N11, one_node=False, one_path=True, unique_paths=self.unique_paths, node=True, path=False, value=False), {'ent#1234': 123, 'ent#4321': 321}) - self.assertDictEqual(to_dict_view(self.triples_N1U, one_node=False, one_path=True, unique_paths=self.unique_paths, node=False, path=True, value=True), + self.assertDictEqual(to_dict_view(self.triples_N1U, one_node=False, one_path=True, unique_paths=self.unique_paths, node=True, path=False, value=False), {'ent#1234': {'tag#1234', 'tag#5678'}, 'ent#4321': {'tag#4321'}}) - self.assertDictEqual(to_dict_view(self.triples_NM1, one_node=False, one_path=False, unique_paths=self.unique_paths, node=False, path=True, value=True), + self.assertDictEqual(to_dict_view(self.triples_NM1, one_node=False, one_path=False, unique_paths=self.unique_paths, node=True, path=False, value=False), {'ent#1234': {123, '2010-01-02'}, 'ent#4321': {321, '2022-02-22'}}) - self.assertDictEqual(to_dict_view(self.triples_NMU, one_node=False, one_path=False, unique_paths=self.unique_paths, node=False, path=True, value=True), + self.assertDictEqual(to_dict_view(self.triples_NMU, one_node=False, one_path=False, unique_paths=self.unique_paths, node=True, path=False, value=False), {'ent#1234': {123, 'tag#1234', 'tag#5678'}, 'ent#4321': {321, '2022-02-22'}}) + # empty + self.assertDictEqual(to_dict_view([], one_node=True, one_path=True, unique_paths=self.unique_paths, node=True, path=False, value=False), {}) + self.assertDictEqual(to_dict_view([], one_node=True, one_path=False, unique_paths=self.unique_paths, node=True, path=False, value=False), {}) + self.assertDictEqual(to_dict_view([], one_node=False, one_path=True, unique_paths=self.unique_paths, node=True, path=False, value=False), {}) + self.assertDictEqual(to_dict_view([], one_node=False, one_path=False, unique_paths=self.unique_paths, node=True, path=False, value=False), {}) def test_agg_all(self): - self.assertEqual(to_dict_view(self.triples_111, one_node=True, one_path=True, unique_paths=self.unique_paths, node=True, path=True, value=True), + self.assertEqual(to_dict_view(self.triples_111, one_node=True, one_path=True, unique_paths=self.unique_paths, node=False, path=False, value=False), 123) - self.assertSetEqual(to_dict_view(self.triples_11U, one_node=True, one_path=True, unique_paths=self.unique_paths, node=True, path=True, value=True), + self.assertSetEqual(to_dict_view(self.triples_11U, one_node=True, one_path=True, unique_paths=self.unique_paths, node=False, path=False, value=False), {'tag#1234', 'tag#5678'}) - self.assertSetEqual(to_dict_view(self.triples_1M1, one_node=True, one_path=False, unique_paths=self.unique_paths, node=True, path=True, value=True), + self.assertSetEqual(to_dict_view(self.triples_1M1, one_node=True, one_path=False, unique_paths=self.unique_paths, node=False, path=False, value=False), {123, '2010-01-02'}) - self.assertSetEqual(to_dict_view(self.triples_1MU, one_node=True, one_path=False, unique_paths=self.unique_paths, node=True, path=True, value=True), + self.assertSetEqual(to_dict_view(self.triples_1MU, one_node=True, one_path=False, unique_paths=self.unique_paths, node=False, path=False, value=False), {123, 'tag#1234', 'tag#5678'}) - self.assertSetEqual(to_dict_view(self.triples_N11, one_node=False, one_path=True, unique_paths=self.unique_paths, node=True, path=True, value=True), + self.assertSetEqual(to_dict_view(self.triples_N11, one_node=False, one_path=True, unique_paths=self.unique_paths, node=False, path=False, value=False), {123, 321}) - self.assertSetEqual(to_dict_view(self.triples_N1U, one_node=False, one_path=True, unique_paths=self.unique_paths, node=True, path=True, value=True), + self.assertSetEqual(to_dict_view(self.triples_N1U, one_node=False, one_path=True, unique_paths=self.unique_paths, node=False, path=False, value=False), {'tag#1234', 'tag#5678', 'tag#4321'}) - self.assertSetEqual(to_dict_view(self.triples_NM1, one_node=False, one_path=False, unique_paths=self.unique_paths, node=True, path=True, value=True), + self.assertSetEqual(to_dict_view(self.triples_NM1, one_node=False, one_path=False, unique_paths=self.unique_paths, node=False, path=False, value=False), {123, '2010-01-02', 321, '2022-02-22'}) - self.assertSetEqual(to_dict_view(self.triples_NMU, one_node=False, one_path=False, unique_paths=self.unique_paths, node=True, path=True, value=True), + self.assertSetEqual(to_dict_view(self.triples_NMU, one_node=False, one_path=False, unique_paths=self.unique_paths, node=False, path=False, value=False), {123, 'tag#1234', 'tag#5678', 321, '2022-02-22'}) - + # empty + self.assertEqual(to_dict_view([], one_node=True, one_path=True, unique_paths=self.unique_paths, node=False, path=False, value=False), None) + self.assertSetEqual(to_dict_view([], one_node=True, one_path=False, unique_paths=self.unique_paths, node=False, path=False, value=False), set()) + self.assertSetEqual(to_dict_view([], one_node=False, one_path=True, unique_paths=self.unique_paths, node=False, path=False, value=False), set()) + self.assertSetEqual(to_dict_view([], one_node=False, one_path=False, unique_paths=self.unique_paths, node=False, path=False, value=False), set()) + self.assertEqual(to_dict_view([], one_node=True, one_path=True, unique_paths=self.unique_paths, node=False, path=False, value=False, default=123), 123) ## main ## diff --git a/test/graph/test_walk.py b/test/graph/test_walk.py index 057ac85..f9dbc7a 100644 --- a/test/graph/test_walk.py +++ b/test/graph/test_walk.py @@ -157,12 +157,12 @@ class TestWalk(unittest.TestCase): self.graph.node(ns.bsfs.Entity, URI('http://example.com/me/entity#4321')): tags, }) # get passes kwargs to Nodes.get - self.assertSetEqual(tags, walk.get(node=True)) - self.assertSetEqual(tags, walk(node=True)) - self.assertSetEqual(tags, set(walk.get(view=list, node=True))) - self.assertSetEqual(tags, set(walk(view=list, node=True))) + self.assertSetEqual(tags, walk.get(node=False)) + self.assertSetEqual(tags, walk(node=False)) + self.assertSetEqual(tags, set(walk.get(view=list, node=False))) + self.assertSetEqual(tags, set(walk(view=list, node=False))) # get returns values if need be - self.assertSetEqual(walk.label(node=True), {'hello', 'world'}) + self.assertSetEqual(walk.label(node=False), {'hello', 'world'}) ## main ## -- cgit v1.2.3 From 7e0987bcda136a17baea45b8eb22eb5ea668abc0 Mon Sep 17 00:00:00 2001 From: Matthias Baumgartner Date: Mon, 30 Jan 2023 14:35:32 +0100 Subject: filter ast comparison --- bsfs/query/matcher.py | 366 ++++++++++++++ setup.py | 5 +- test/query/test_matcher.py | 1182 ++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 1552 insertions(+), 1 deletion(-) create mode 100644 bsfs/query/matcher.py create mode 100644 test/query/test_matcher.py diff --git a/bsfs/query/matcher.py b/bsfs/query/matcher.py new file mode 100644 index 0000000..a910756 --- /dev/null +++ b/bsfs/query/matcher.py @@ -0,0 +1,366 @@ +""" + +Part of the BlackStar filesystem (bsfs) module. +A copy of the license is provided with the project. +Author: Matthias Baumgartner, 2022 +""" +# imports +from collections import defaultdict +from itertools import product +from time import time +import random +import threading +import typing + +# external imports +from hopcroftkarp import HopcroftKarp + +# bsfs imports +from bsfs.utils import errors, typename + +# inner-module imports +from . import ast + +# exports +__all__ : typing.Sequence[str] = ( + 'Filter', + ) + + +## code ## + +class Any(ast.filter.FilterExpression, ast.filter.PredicateExpression): + """Match any ast class. + + Note that Any instances are unique, i.e. they do not compare, and + can hence be repeated in a set: + >>> Any() == Any() + False + >>> len({Any(), Any(), Any(), Any()}) + 4 + + """ + + # unique instance id + _uid: typing.Tuple[int, int, float, float] + + def __init__(self): + self._uid = ( + id(self), + id(threading.current_thread()), + time(), + random.random(), + ) + + def __eq__(self, other: typing.Any): + return super().__eq__(other) and self._uid == other._uid + + def __hash__(self): + return hash((super().__hash__(), self._uid)) + + +class Rest(ast.filter.FilterExpression, ast.filter.PredicateExpression): + """Match the leftovers in a set of items to be compared. + + Rest can be used in junction with aggregating expressions such as ast.filter.And, + ast.filter.Or, ast.filter.OneOf. It controls childs expressions that were not yet + consumed by other matching rules. Rest may match to only a specific expression. + The expresssion defaults to Any(). + + For example, the following to ast structures would match since Rest + allows an arbitrary repetition of ast.filter.Equals statements. + + >>> And(Equals('hello'), Equals('world'), Equals('foobar')) + >>> And(Equals('world'), Rest(Partial(Equals))) + + """ + + # child expression for the Rest. + expr: typing.Union[ast.filter.FilterExpression, ast.filter.PredicateExpression] + + def __init__( + self, + expr: typing.Optional[typing.Union[ast.filter.FilterExpression, ast.filter.PredicateExpression]] = None, + ): + if expr is None: + expr = Any() + self.expr = expr + + def __repr__(self) -> str: + return f'{typename(self)}({self.expr})' + + def __hash__(self) -> int: + return hash((super().__hash__(), self.expr)) + + def __eq__(self, other: typing.Any) -> bool: + return super().__eq__(other) and self.expr == other.expr + + +class Partial(ast.filter.FilterExpression, ast.filter.PredicateExpression): + """Match a partially defined ast expression. + + Literal values might be irrelevant or unknown when comparing two ast + structures. Partial allows to constrain the matcher to a certain + ast class, while leaving some of its members unspecified. + + Pass the class (not instance) and its members as keyword arguments + to Partial. Note that the arguments are not validated. + + For example, the following instance matches any ast.filter.Equals, + irrespective of its value: + + >>> Partial(ast.filter.Equals) + + Likewise, the following instance matches any ast.filter.LessThan + that has a strict bounds, but makes no claim about the threshold: + + >>> Partial(ast.filter.LessThan, strict=False) + + """ + + # target node type. + node: typing.Type + + # node construction args. + kwargs: typing.Dict[str, typing.Any] + + def __init__( + self, + node: typing.Type, + **kwargs, + ): + self.node = node + self.kwargs = kwargs + + def __repr__(self) -> str: + return f'{typename(self)}({self.node.__name__}, {self.kwargs})' + + def __hash__(self) -> int: + kwargs = tuple((key, self.kwargs[key]) for key in sorted(self.kwargs)) + return hash((super().__hash__(), self.node, kwargs)) + + def __eq__(self, other: typing.Any) -> bool: + return super().__eq__(other) \ + and self.node == other.node \ + and self.kwargs == other.kwargs + + def match( + self, + name: str, + value: typing.Any, + ) -> bool: + """Return True if *name* is unspecified or matches *value*.""" + return name not in self.kwargs or self.kwargs[name] == value + + +T_ITEM_TYPE = typing.TypeVar('T_ITEM_TYPE') # pylint: disable=invalid-name + +def _set_matcher( + query: typing.Collection[T_ITEM_TYPE], + reference: typing.Collection[T_ITEM_TYPE], + cmp: typing.Callable[[T_ITEM_TYPE, T_ITEM_TYPE], bool], + ) -> bool: + """Compare two sets of child expressions. + + This check has a best-case complexity of O(|N|**2) and worst-case + complexity of O(|N|**3), with N the number of child expressions. + """ + # get reference items + r_items = list(reference) + # deal with Rest + r_rest = {itm for itm in r_items if isinstance(itm, Rest)} + if len(r_rest) > 1: + raise errors.BackendError(f'there must be at most one Rest instance per set, found {len(r_rest)}') + if len(r_rest) == 1: + # replace Rest by filling the reference up with rest's expression + # NOTE: convert r_items to list so that items can be repeated + expr = next(iter(r_rest)).expr # type: ignore [attr-defined] + r_items = [itm for itm in r_items if not isinstance(itm, Rest)] + r_items += [expr for _ in range(len(query) - len(r_items))] # type: ignore [misc] + # sanity check: cannot match if the item sizes differ: + # either a reference item is unmatched (len(r_items) > len(query)) + # or a query item is unmatched (len(r_items) < len(query)) + if len(query) != len(r_items): + return False + + # To have a positive match between the query and the reference, + # each query expr has to match any reference expr. + # However, each reference expr can only be "consumed" once even + # if it matches multiple query exprs (e.g., the Any expression matches + # every query expr). + # This is a bipartide matching problem (Hall's marriage problem) + # and the Hopcroft-Karp-Karzanov algorithm finds a maximum + # matching. While there might be multiple maximum matchings, + # we only need to know whether (at least) one complete matching + # exists. The hopcroftkarp module provides this functionality. + # The HKK algorithm has worst-case complexity of O(|N|**2 * sqrt(|N|)) + # and we also need to compare expressions pairwise, hence O(|N|**2). + num_items = len(r_items) + graph = defaultdict(set) + # build the bipartide graph as {lhs: {rhs}, ...} + # lhs and rhs must be disjoint identifiers. + for (ridx, ref), (nidx, node) in product(enumerate(r_items), enumerate(query)): + # add edges for equal expressions + if cmp(node, ref): + graph[ridx].add(num_items + nidx) + + # maximum_matching returns the matches for all nodes in the graph + # ({ref_itm: node_itm}), hence a complete matching's size is + # the number of reference's child expressions. + return len(HopcroftKarp(graph).maximum_matching(keys_only=True)) == num_items + + +class Filter(): + """Compare a bsfs.query.ast.filter` query's structure to a reference ast. + + The reference ast may include `Rest`, `Partial`, or `Any` to account for irrelevant + or unknown ast pieces. + + This is only a structural comparison, not a semantic one. For example, the + two following queries are semantically identical, but structurally different, + and would therefore not match: + + >>> ast.filter.OneOf(ast.filter.Predicate(ns.bse.filename)) + >>> ast.filter.Predicate(ns.bse.filename) + + """ + + def __call__(self, query: ast.filter.FilterExpression, reference: ast.filter.FilterExpression) -> bool: + """Compare a *query* to a *reference* ast structure. + Return True if both are structurally equivalent. + """ + if not isinstance(query, ast.filter.FilterExpression): + raise errors.BackendError(f'expected filter expression, found {query}') + if not isinstance(reference, ast.filter.FilterExpression): + raise errors.BackendError(f'expected filter expression, found {reference}') + return self._parse_filter_expression(query, reference) + + def _parse_filter_expression( + self, + node: ast.filter.FilterExpression, + reference: ast.filter.FilterExpression, + ) -> bool: + """Route *node* to the handler of the respective FilterExpression subclass.""" + # generic checks: reference type must be Any or match node type + if isinstance(reference, Any): + return True + # node-specific checks + if isinstance(node, ast.filter.Not): + return self._not(node, reference) + if isinstance(node, ast.filter.Has): + return self._has(node, reference) + if isinstance(node, ast.filter.Distance): + return self._distance(node, reference) + if isinstance(node, (ast.filter.Any, ast.filter.All)): + return self._branch(node, reference) + if isinstance(node, (ast.filter.And, ast.filter.Or)): + return self._agg(node, reference) + if isinstance(node, (ast.filter.Is, ast.filter.Equals, ast.filter.Substring, + ast.filter.StartsWith, ast.filter.EndsWith)): + return self._value(node, reference) + if isinstance(node, (ast.filter.LessThan, ast.filter.GreaterThan)): + return self._bounded(node, reference) + # invalid node + raise errors.BackendError(f'expected filter expression, found {node}') + + def _parse_predicate_expression( + self, + node: ast.filter.PredicateExpression, + reference: ast.filter.PredicateExpression, + ) -> bool: + """Route *node* to the handler of the respective PredicateExpression subclass.""" + if isinstance(reference, Any): + return True + if isinstance(node, ast.filter.Predicate): + return self._predicate(node, reference) + if isinstance(node, ast.filter.OneOf): + return self._one_of(node, reference) + # invalid node + raise errors.BackendError(f'expected predicate expression, found {node}') + + def _one_of(self, node: ast.filter.OneOf, reference: ast.filter.PredicateExpression) -> bool: + if not isinstance(reference, type(node)): + return False + return _set_matcher(node, reference, self._parse_predicate_expression) + + def _predicate(self, node: ast.filter.Predicate, reference: ast.filter.PredicateExpression) -> bool: + if not isinstance(reference, (Partial, type(node))): + return False + # partial check + if isinstance(reference, Partial): + if not isinstance(node, reference.node): + return False + return reference.match('predicate', node.predicate) \ + and reference.match('reverse', node.reverse) + # full check + return node.predicate == reference.predicate \ + and node.reverse == reference.reverse + + def _branch(self, + node: typing.Union[ast.filter.Any, ast.filter.All], + reference: ast.filter.FilterExpression, + ) -> bool: + if not isinstance(reference, type(node)): + return False + if not self._parse_predicate_expression(node.predicate, reference.predicate): # type: ignore [attr-defined] + return False + if not self._parse_filter_expression(node.expr, reference.expr): # type: ignore [attr-defined] + return False + return True + + def _agg(self, node: typing.Union[ast.filter.And, ast.filter.Or], reference: ast.filter.FilterExpression) -> bool: + if not isinstance(reference, type(node)): + return False + return _set_matcher(node, reference, self._parse_filter_expression) # type: ignore [arg-type] + + def _not(self, node: ast.filter.Not, reference: ast.filter.FilterExpression) -> bool: + if not isinstance(reference, type(node)): + return False + return self._parse_filter_expression(node.expr, reference.expr) + + def _has(self, node: ast.filter.Has, reference: ast.filter.FilterExpression) -> bool: + if not isinstance(reference, type(node)): + return False + return self._parse_predicate_expression(node.predicate, reference.predicate) \ + and self._parse_filter_expression(node.count, reference.count) + + def _distance(self, node: ast.filter.Distance, reference: ast.filter.FilterExpression) -> bool: + if not isinstance(reference, (Partial, type(node))): + return False + # partial check + if isinstance(reference, Partial): + if not isinstance(node, reference.node): + return False + return reference.match('reference', node.reference) \ + and reference.match('threshold', node.threshold) \ + and reference.match('strict', node.strict) + # full check + return node.reference == reference.reference \ + and node.threshold == reference.threshold \ + and node.strict == reference.strict + + def _value(self, node: ast.filter._Value, reference: ast.filter.FilterExpression) -> bool: + if not isinstance(reference, (Partial, type(node))): + return False + # partial check + if isinstance(reference, Partial): + if not isinstance(node, reference.node): + return False + return reference.match('value', node.value) + # full ckeck + return node.value == reference.value + + def _bounded(self, node: ast.filter._Bounded, reference: ast.filter.FilterExpression) -> bool: + if not isinstance(reference, (Partial, type(node))): + return False + # partial check + if isinstance(reference, Partial): + if not isinstance(node, reference.node): + return False + return reference.match('threshold', node.threshold) \ + and reference.match('strict', node.strict) + # full check + return node.threshold == reference.threshold \ + and node.strict == reference.strict + +## EOF ## diff --git a/setup.py b/setup.py index ab3864a..243c73f 100644 --- a/setup.py +++ b/setup.py @@ -14,7 +14,10 @@ setup( url='https://www.igsor.net/projects/blackstar/bsfs/', download_url='https://pip.igsor.net', packages=('bsfs', ), - install_requires=('rdflib', ), + install_requires=( + 'rdflib', # schema and sparql storage + 'hopcroftkarp', # ast matching + ), python_requires=">=3.7", ) diff --git a/test/query/test_matcher.py b/test/query/test_matcher.py new file mode 100644 index 0000000..e830cf8 --- /dev/null +++ b/test/query/test_matcher.py @@ -0,0 +1,1182 @@ +""" + +Part of the tagit test suite. +A copy of the license is provided with the project. +Author: Matthias Baumgartner, 2022 +""" +# imports +import operator +import unittest + +# bsfs imports +from bsfs.namespace import ns +from bsfs.query import ast +from bsfs.utils import errors + +# objects to test +from bsfs.query.matcher import Any, Filter, Partial, Rest, _set_matcher + + +## code ## + +class TestAny(unittest.TestCase): + def test_essentials(self): + # comparison + a = Any() + b = Any() + self.assertNotEqual(Any(), Any()) + self.assertNotEqual(hash(Any()), hash(Any())) + self.assertNotEqual(a, Any()) + self.assertNotEqual(hash(a), hash(Any())) + self.assertNotEqual(a, b) + self.assertNotEqual(hash(a), hash(b)) + # comparison within sets + self.assertEqual(len({Any(), Any(), Any(), Any()}), 4) + self.assertEqual(len({Any() for _ in range(1000)}), 1000) + # string representation + self.assertEqual(str(Any()), 'Any()') + self.assertEqual(repr(Any()), 'Any()') + + +class TestRest(unittest.TestCase): + def test_essentials(self): + expr = ast.filter.Equals('hello') + # comparison + self.assertEqual(Rest(expr), Rest(expr)) + self.assertEqual(hash(Rest(expr)), hash(Rest(expr))) + # comparison respects type + class Foo(): pass + self.assertNotEqual(Rest(expr), 1234) + self.assertNotEqual(hash(Rest(expr)), hash(1234)) + self.assertNotEqual(Rest(expr), Foo()) + self.assertNotEqual(hash(Rest(expr)), hash(Foo())) + # comparison respects expr + self.assertNotEqual(Rest(expr), Rest(ast.filter.Equals('world'))) + self.assertNotEqual(hash(Rest(expr)), hash(Rest(ast.filter.Equals('world')))) + # default constructor -> Any -> Not equal + self.assertNotEqual(Rest(), Rest()) + self.assertNotEqual(hash(Rest()), hash(Rest())) + # string representation + self.assertEqual(str(Rest()), 'Rest(Any())') + self.assertEqual(str(Rest(expr)), 'Rest(Equals(hello))') + self.assertEqual(repr(Rest()), 'Rest(Any())') + self.assertEqual(repr(Rest(expr)), 'Rest(Equals(hello))') + + + +class TestPartial(unittest.TestCase): + def test_match(self): + p0 = Partial(ast.filter.LessThan) + p1 = Partial(ast.filter.LessThan, threshold=3) + p2 = Partial(ast.filter.LessThan, strict=False) + p3 = Partial(ast.filter.LessThan, threshold=3, strict=False) + # match respects name + self.assertTrue(p0.match('foo', None)) + self.assertTrue(p1.match('foo', None)) + self.assertTrue(p2.match('foo', None)) + self.assertTrue(p3.match('foo', None)) + # match respects correct value + self.assertTrue(p0.match('threshold', 3)) + self.assertTrue(p1.match('threshold', 3)) + self.assertTrue(p2.match('threshold', 3)) + self.assertTrue(p3.match('threshold', 3)) + self.assertTrue(p0.match('strict', False)) + self.assertTrue(p1.match('strict', False)) + self.assertTrue(p2.match('strict', False)) + self.assertTrue(p3.match('strict', False)) + # match respects incorrect value + self.assertTrue(p0.match('threshold', 5)) + self.assertFalse(p1.match('threshold', 5)) + self.assertTrue(p2.match('threshold', 5)) + self.assertFalse(p3.match('threshold', 5)) + self.assertTrue(p0.match('strict', True)) + self.assertTrue(p1.match('strict', True)) + self.assertFalse(p2.match('strict', True)) + self.assertFalse(p3.match('strict', True)) + + def test_members(self): + # node returns expression + self.assertEqual(Partial(ast.filter.Equals).node, ast.filter.Equals) + self.assertEqual(Partial(ast.filter.LessThan).node, ast.filter.LessThan) + # kwargs returns arguments + self.assertDictEqual(Partial(ast.filter.Equals, value='hello').kwargs, + {'value': 'hello'}) + self.assertDictEqual(Partial(ast.filter.LessThan, threshold=3, strict=False).kwargs, + {'threshold': 3, 'strict': False}) + # Partial does not check about kwargs + self.assertDictEqual(Partial(ast.filter.LessThan, value='hello').kwargs, + {'value': 'hello'}) + self.assertDictEqual(Partial(ast.filter.Equals, threshold=3, strict=False).kwargs, + {'threshold': 3, 'strict': False}) + + def test_essentials(self): + # comparison respects type + class Foo(): pass + self.assertNotEqual(Partial(ast.filter.Equals), 1234) + self.assertNotEqual(hash(Partial(ast.filter.Equals)), hash(1234)) + self.assertNotEqual(Partial(ast.filter.Equals), Foo()) + self.assertNotEqual(hash(Partial(ast.filter.Equals)), hash(Foo())) + self.assertNotEqual(Partial(ast.filter.Equals), ast.filter.Equals) + self.assertNotEqual(hash(Partial(ast.filter.Equals)), hash(ast.filter.Equals)) + self.assertNotEqual(Partial(ast.filter.Equals), ast.filter.Equals('hello')) + self.assertNotEqual(hash(Partial(ast.filter.Equals)), hash(ast.filter.Equals('hello'))) + # comparison respects node + self.assertEqual(Partial(ast.filter.Equals), Partial(ast.filter.Equals)) + self.assertEqual(hash(Partial(ast.filter.Equals)), hash(Partial(ast.filter.Equals))) + self.assertEqual(Partial(ast.filter.LessThan), Partial(ast.filter.LessThan)) + self.assertEqual(hash(Partial(ast.filter.LessThan)), hash(Partial(ast.filter.LessThan))) + self.assertNotEqual(Partial(ast.filter.Equals), Partial(ast.filter.LessThan)) + self.assertNotEqual(hash(Partial(ast.filter.Equals)), hash(Partial(ast.filter.LessThan))) + # comparison respects kwargs + self.assertEqual( + Partial(ast.filter.Equals, value='hello'), + Partial(ast.filter.Equals, value='hello')) + self.assertEqual( + hash(Partial(ast.filter.Equals, value='hello')), + hash(Partial(ast.filter.Equals, value='hello'))) + self.assertEqual( + Partial(ast.filter.LessThan, threshold=3, strict=False), + Partial(ast.filter.LessThan, threshold=3, strict=False)) + self.assertEqual( + hash(Partial(ast.filter.LessThan, threshold=3, strict=False)), + hash(Partial(ast.filter.LessThan, threshold=3, strict=False))) + self.assertNotEqual( + Partial(ast.filter.Equals, value='hello'), + Partial(ast.filter.Equals)) + self.assertNotEqual( + hash(Partial(ast.filter.Equals, value='hello')), + hash(Partial(ast.filter.Equals))) + self.assertNotEqual( + Partial(ast.filter.Equals, value='hello'), + Partial(ast.filter.Equals, value='world')) + self.assertNotEqual( + hash(Partial(ast.filter.Equals, value='hello')), + hash(Partial(ast.filter.Equals, value='world'))) + self.assertNotEqual( + Partial(ast.filter.LessThan, threshold=3, strict=False), + Partial(ast.filter.LessThan)) + self.assertNotEqual( + hash(Partial(ast.filter.LessThan, threshold=3, strict=False)), + hash(Partial(ast.filter.LessThan))) + self.assertNotEqual( + Partial(ast.filter.LessThan, threshold=3, strict=False), + Partial(ast.filter.LessThan, threshold=5)) + self.assertNotEqual( + hash(Partial(ast.filter.LessThan, threshold=3, strict=False)), + hash(Partial(ast.filter.LessThan, threshold=5))) + self.assertNotEqual( + Partial(ast.filter.LessThan, threshold=3, strict=False), + Partial(ast.filter.LessThan, strict=False)) + self.assertNotEqual( + hash(Partial(ast.filter.LessThan, threshold=3, strict=False)), + hash(Partial(ast.filter.LessThan, strict=False))) + self.assertNotEqual( + Partial(ast.filter.LessThan, threshold=3, strict=False), + Partial(ast.filter.LessThan, threshold=3, strict=True)) + self.assertNotEqual( + hash(Partial(ast.filter.LessThan, threshold=3, strict=False)), + hash(Partial(ast.filter.LessThan, threshold=3, strict=True))) + self.assertNotEqual( + Partial(ast.filter.LessThan, threshold=3, strict=False), + Partial(ast.filter.LessThan, threshold=5, strict=False)) + self.assertNotEqual( + hash(Partial(ast.filter.LessThan, threshold=3, strict=False)), + hash(Partial(ast.filter.LessThan, threshold=5, strict=False))) + # string representation + self.assertEqual(str(Partial(ast.filter.Equals)), 'Partial(Equals, {})') + self.assertEqual(repr(Partial(ast.filter.Equals)), 'Partial(Equals, {})') + self.assertEqual(str(Partial(ast.filter.LessThan)), 'Partial(LessThan, {})') + self.assertEqual(repr(Partial(ast.filter.LessThan)), 'Partial(LessThan, {})') + self.assertEqual(str(Partial(ast.filter.Equals, value='hello')), "Partial(Equals, {'value': 'hello'})") + self.assertEqual(repr(Partial(ast.filter.Equals, value='hello')), "Partial(Equals, {'value': 'hello'})") + self.assertEqual(str(Partial(ast.filter.LessThan, threshold=3)), "Partial(LessThan, {'threshold': 3})") + self.assertEqual(repr(Partial(ast.filter.LessThan, threshold=3)), "Partial(LessThan, {'threshold': 3})") + self.assertEqual(str(Partial(ast.filter.LessThan, strict=False)), "Partial(LessThan, {'strict': False})") + self.assertEqual(repr(Partial(ast.filter.LessThan, strict=False)), "Partial(LessThan, {'strict': False})") + self.assertEqual(str(Partial(ast.filter.LessThan, threshold=3, strict=False)), "Partial(LessThan, {'threshold': 3, 'strict': False})") + self.assertEqual(repr(Partial(ast.filter.LessThan, threshold=3, strict=False)), "Partial(LessThan, {'threshold': 3, 'strict': False})") + + +class TestSetMatcher(unittest.TestCase): + def test_set_matcher(self): + # setup + A = ast.filter.Equals('A') + B = ast.filter.Equals('B') + C = ast.filter.Equals('C') + D = ast.filter.Equals('D') + matcher = Filter() + + # identical sets match + self.assertTrue(_set_matcher( + ast.filter.And(A, B, C), + ast.filter.And(A, B, C), + matcher._parse_filter_expression, + )) + + # order is irrelevant + self.assertTrue(_set_matcher( + ast.filter.And(A, B, C), + ast.filter.And(B, C, A), + matcher._parse_filter_expression, + )) + + # all reference items must be present + self.assertFalse(_set_matcher( + ast.filter.And(A, B), + ast.filter.And(A, B, C), + matcher._parse_filter_expression, + )) + + # all reference items must have a match + self.assertFalse(_set_matcher( + ast.filter.And(A, B, C), + ast.filter.And(D, B, C), + matcher._parse_filter_expression, + )) + self.assertFalse(_set_matcher( + ast.filter.And(A, B, C), + ast.filter.And(A, D, C), + matcher._parse_filter_expression, + )) + self.assertFalse(_set_matcher( + ast.filter.And(A, B, C), + ast.filter.And(A, B, D), + matcher._parse_filter_expression, + )) + + # Any matches every item + self.assertTrue(_set_matcher( + ast.filter.And(A, B, C), + ast.filter.And(Any(), B, C), + matcher._parse_filter_expression, + )) + self.assertTrue(_set_matcher( + ast.filter.And(A, B, C), + ast.filter.And(A, Any(), C), + matcher._parse_filter_expression, + )) + self.assertTrue(_set_matcher( + ast.filter.And(A, B, C), + ast.filter.And(A, B, Any()), + matcher._parse_filter_expression, + )) + self.assertTrue(_set_matcher( + ast.filter.And(A, B, D), + ast.filter.And(A, B, Any()), + matcher._parse_filter_expression, + )) + + # there can be multiple Any's + self.assertTrue(_set_matcher( + ast.filter.And(A, B, C), + ast.filter.And(A, Any(), Any()), + matcher._parse_filter_expression, + )) + self.assertTrue(_set_matcher( + ast.filter.And(A, B, C), + ast.filter.And(Any(), B, Any()), + matcher._parse_filter_expression, + )) + self.assertTrue(_set_matcher( + ast.filter.And(A, B, C), + ast.filter.And(Any(), Any(), C), + matcher._parse_filter_expression, + )) + + # Any covers exactly one element + self.assertTrue(_set_matcher( + ast.filter.And(A, B, C), + ast.filter.And(A, B, Any()), + matcher._parse_filter_expression, + )) + self.assertTrue(_set_matcher( + ast.filter.And(A, B, D), + ast.filter.And(A, B, Any()), + matcher._parse_filter_expression, + )) + self.assertFalse(_set_matcher( + ast.filter.And(A, B), + ast.filter.And(A, B, Any()), + matcher._parse_filter_expression, + )) + self.assertFalse(_set_matcher( + ast.filter.And(A, B, C, D), + ast.filter.And(A, B, Any()), + matcher._parse_filter_expression, + )) + + # each Any covers exactly one element + self.assertTrue(_set_matcher( + ast.filter.And(A, B, C), + ast.filter.And(Any(), Any(), Any()), + matcher._parse_filter_expression, + )) + self.assertFalse(_set_matcher( + ast.filter.And(A, B, C), + ast.filter.And(Any(), Any()), + matcher._parse_filter_expression, + )) + self.assertFalse(_set_matcher( + ast.filter.And(A, B), + ast.filter.And(Any(), Any(), Any()), + matcher._parse_filter_expression, + )) + + # Rest captures remainder + self.assertTrue(_set_matcher( + ast.filter.And(A, B, C), + ast.filter.And(A, B, Rest()), + matcher._parse_filter_expression, + )) + self.assertTrue(_set_matcher( + ast.filter.And(A, B, C, D), + ast.filter.And(A, B, Rest()), + matcher._parse_filter_expression, + )) + # remainder matches the empty set + self.assertTrue(_set_matcher( + ast.filter.And(A, B), + ast.filter.And(A, B, Rest()), + matcher._parse_filter_expression, + )) + # Rest does not absolve other refernce items from having a match + self.assertFalse(_set_matcher( + ast.filter.And(A, C, D), + ast.filter.And(A, B, Rest()), + matcher._parse_filter_expression, + )) + # Rest can be combined with Any ... + self.assertTrue(_set_matcher( + ast.filter.And(A, C, D), + ast.filter.And(A, Any(), Rest()), + matcher._parse_filter_expression, + )) + self.assertTrue(_set_matcher( + ast.filter.And(A, C, D), + ast.filter.And(A, Any(), Rest()), + matcher._parse_filter_expression, + )) + # ... explicit items still need to match + self.assertFalse(_set_matcher( + ast.filter.And(A, C, D), + ast.filter.And(B, Any(), Rest()), + matcher._parse_filter_expression, + )) + # ... Any still determines minimum element count + self.assertTrue(_set_matcher( + ast.filter.And(A, B), + ast.filter.And(A, Any(), Rest()), + matcher._parse_filter_expression, + )) + self.assertFalse(_set_matcher( + ast.filter.And(A, B), + ast.filter.And(A, Any(), Any(), Rest()), + matcher._parse_filter_expression, + )) + # Rest cannot be repeated ... + self.assertRaises(errors.BackendError, _set_matcher, + ast.filter.And(A, B, C), + ast.filter.And(A, Rest(), Rest(ast.filter.Equals('hello'))), + matcher._parse_filter_expression, + ) + # ... unless they are identical + self.assertRaises(errors.BackendError, _set_matcher, + ast.filter.And(A, B, C), + ast.filter.And(A, Rest(), Rest()), # Any instances are different! + matcher._parse_filter_expression, + ) + # ... unless they are identical + self.assertTrue(_set_matcher( + ast.filter.And(A, B, C), + ast.filter.And(A, B, Rest(C), Rest(C)), + matcher._parse_filter_expression, + )) + # Rest can mandate a specific expression + self.assertTrue(_set_matcher( + ast.filter.And(A, B, C), + ast.filter.And(A, B, Rest(C)), + matcher._parse_filter_expression, + )) + self.assertFalse(_set_matcher( + ast.filter.And(A, B, C), + ast.filter.And(A, B, Rest(D)), + matcher._parse_filter_expression, + )) + # Rest can mandate a partial expression + self.assertTrue(_set_matcher( + ast.filter.And(A, B, C), + ast.filter.And(A, B, Rest(Partial(ast.filter.Equals))), + matcher._parse_filter_expression, + )) + self.assertTrue(_set_matcher( + ast.filter.And(A, B, C), + ast.filter.And(A, Rest(Partial(ast.filter.Equals))), + matcher._parse_filter_expression, + )) + self.assertFalse(_set_matcher( + ast.filter.And(A, B, C), + ast.filter.And(A, B, Rest(Partial(ast.filter.Substring))), + matcher._parse_filter_expression, + )) + self.assertFalse(_set_matcher( + ast.filter.And(A, B, C), + ast.filter.And(A, B, Rest(Partial(ast.filter.Equals, value='D'))), + matcher._parse_filter_expression, + )) + # Rest can be the only expression + self.assertTrue(_set_matcher( + ast.filter.And(A, B, C), + ast.filter.And(Rest(Partial(ast.filter.Equals))), + matcher._parse_filter_expression, + )) + # Rest's expression defaults to Any + self.assertTrue(_set_matcher( + ast.filter.And(A, B, C), + ast.filter.And(Rest()), + matcher._parse_filter_expression, + )) + + +class TestFilter(unittest.TestCase): + def setUp(self): + self.match = Filter() + + def test_call(self): + # query must be a filter expression + self.assertRaises(errors.BackendError, self.match, 1234, Any()) + self.assertRaises(errors.BackendError, self.match, ast.filter.Predicate(ns.bse.filename), Any()) + # reference must be a filter expression + self.assertRaises(errors.BackendError, self.match, ast.filter.Equals('hello'), 1234) + self.assertRaises(errors.BackendError, self.match, ast.filter.Equals('hello'), ast.filter.Predicate(ns.bse.filename)) + # reference can be Any or Partial + self.assertTrue(self.match( + ast.filter.Equals('hello'), + Any(), + )) + self.assertTrue(self.match( + ast.filter.Equals('hello'), + Partial(ast.filter.Equals), + )) + # call parses expression + self.assertTrue(self.match( + # query + ast.filter.And( + ast.filter.Any(ns.bse.tag, + ast.filter.All(ns.bse.label, + ast.filter.Or( + ast.filter.Equals('hello'), + ast.filter.Equals('world'), + ast.filter.StartsWith('foo'), + ast.filter.EndsWith('bar'), + ) + ) + ), + ast.filter.Any(ns.bse.iso, + ast.filter.And( + ast.filter.GreaterThan(100, strict=True), + ast.filter.LessThan(200, strict=False), + ) + ), + ast.filter.Any(ast.filter.OneOf(ns.bse.featureA, ns.bse.featureB), + ast.filter.Distance([1,2,3], 1) + ), + ), + # reference + ast.filter.And( + ast.filter.Any(Any(), + ast.filter.All(Partial(ast.filter.Predicate, reverse=False), + ast.filter.Or( + Partial(ast.filter.StartsWith), + ast.filter.EndsWith('bar'), + Rest(Partial(ast.filter.Equals)), + ) + ) + ), + ast.filter.Any(ns.bse.iso, + ast.filter.And( + Partial(ast.filter.GreaterThan, strict=True), + Any(), + Rest(), + ) + ), + ast.filter.Any(ast.filter.OneOf(Rest()), + Partial(ast.filter.Distance) + ), + ), + )) + self.assertFalse(self.match( + # query + ast.filter.Any(ns.bse.tag, + ast.filter.And( + ast.filter.Any(ns.bse.label, ast.filter.Equals('hello')), + ast.filter.Any(ns.bse.collection, ast.filter.Is('http://example.com/col#123')), + ast.filter.Not(ast.filter.Has(ns.bse.label)), + ) + ), + # reference + ast.filter.Any(ns.bse.tag, + ast.filter.And( + Any(), + ast.filter.Any(Partial(ast.filter.Predicate, reverse=True), # reverse mismatch + Partial(ast.filter.Is)), + ast.filter.Not(ast.filter.Has(Any(), Any())), + ) + ) + )) + + def test_parse_filter_expression(self): + # Any matches every filter expression + self.assertTrue(self.match._parse_filter_expression( + ast.filter.Not(ast.filter.FilterExpression()), Any())) + self.assertTrue(self.match._parse_filter_expression( + ast.filter.Has(ns.bse.filename), Any())) + self.assertTrue(self.match._parse_filter_expression( + ast.filter.Distance([1,2,3], 1.0), Any())) + self.assertTrue(self.match._parse_filter_expression( + ast.filter.Any(ns.bse.filename, ast.filter.Equals('hello')), Any())) + self.assertTrue(self.match._parse_filter_expression( + ast.filter.All(ns.bse.filename, ast.filter.Equals('hello')), Any())) + self.assertTrue(self.match._parse_filter_expression( + ast.filter.And(ast.filter.Equals('hello')), Any())) + self.assertTrue(self.match._parse_filter_expression( + ast.filter.Or(ast.filter.Equals('hello')), Any())) + self.assertTrue(self.match._parse_filter_expression( + ast.filter.Equals('hello'), Any())) + self.assertTrue(self.match._parse_filter_expression( + ast.filter.Substring('hello'), Any())) + self.assertTrue(self.match._parse_filter_expression( + ast.filter.StartsWith('hello'), Any())) + self.assertTrue(self.match._parse_filter_expression( + ast.filter.EndsWith('hello'), Any())) + self.assertTrue(self.match._parse_filter_expression( + ast.filter.Is('hello'), Any())) + self.assertTrue(self.match._parse_filter_expression( + ast.filter.LessThan(3), Any())) + self.assertTrue(self.match._parse_filter_expression( + ast.filter.GreaterThan(3), Any())) + # Any matches invalid filter expressions + self.assertTrue(self.match._parse_filter_expression( + ast.filter.FilterExpression(), Any())) + # node must be an appropriate filter expression + self.assertRaises(errors.BackendError, self.match._parse_filter_expression, + ast.filter.FilterExpression(), ast.filter.FilterExpression()) + self.assertRaises(errors.BackendError, self.match._parse_filter_expression, + 1234, ast.filter.FilterExpression()) + + def test_parse_predicate_expression(self): + # Any matches every predicate expression + self.assertTrue(self.match._parse_predicate_expression( + ast.filter.Predicate(ns.bse.filename), Any())) + self.assertTrue(self.match._parse_predicate_expression( + ast.filter.OneOf(ns.bse.filename), Any())) + # Any matches invalid predicate expression + self.assertTrue(self.match._parse_predicate_expression( + ast.filter.FilterExpression(), Any())) + # node must be an appropriate predicate expression + self.assertRaises(errors.BackendError, self.match._parse_predicate_expression, + ast.filter.PredicateExpression(), ast.filter.PredicateExpression()) + self.assertRaises(errors.BackendError, self.match._parse_predicate_expression, + 1234, ast.filter.PredicateExpression()) + + def test_predicate(self): + # identical expressions match + self.assertTrue(self.match._predicate( + ast.filter.Predicate(ns.bse.filename, reverse=False), + ast.filter.Predicate(ns.bse.filename, reverse=False), + )) + # _predicate respects type + self.assertFalse(self.match._predicate( + ast.filter.Predicate(ns.bse.filename, reverse=False), + ast.filter.FilterExpression(), + )) + # _predicate respects predicate + self.assertFalse(self.match._predicate( + ast.filter.Predicate(ns.bse.filename, reverse=False), + ast.filter.Predicate(ns.bse.filesize, reverse=False), + )) + # _predicate respects reverse + self.assertFalse(self.match._predicate( + ast.filter.Predicate(ns.bse.filename, reverse=False), + ast.filter.Predicate(ns.bse.filename, reverse=True), + )) + # Partial requires ast.filter.Predicate + self.assertFalse(self.match._predicate( + ast.filter.Predicate(ns.bse.filename, reverse=False), + Partial(ast.filter.Equals), + )) + # predicate and reverse can be specified + self.assertTrue(self.match._predicate( + ast.filter.Predicate(ns.bse.filename, reverse=False), + Partial(ast.filter.Predicate, predicate=ns.bse.filename, reverse=False), + )) + self.assertFalse(self.match._predicate( + ast.filter.Predicate(ns.bse.filename, reverse=False), + Partial(ast.filter.Predicate, predicate=ns.bse.filesize, reverse=False), + )) + self.assertFalse(self.match._predicate( + ast.filter.Predicate(ns.bse.filename, reverse=False), + Partial(ast.filter.Predicate, predicate=ns.bse.filename, reverse=True), + )) + # predicate can remain unspecified + self.assertTrue(self.match._predicate( + ast.filter.Predicate(ns.bse.filename, reverse=False), + Partial(ast.filter.Predicate, reverse=False), + )) + self.assertTrue(self.match._predicate( + ast.filter.Predicate(ns.bse.filesize, reverse=False), + Partial(ast.filter.Predicate, reverse=False), + )) + self.assertFalse(self.match._predicate( + ast.filter.Predicate(ns.bse.filesize, reverse=False), + Partial(ast.filter.Predicate, reverse=True), + )) + # reverse can remain unspecified + self.assertTrue(self.match._predicate( + ast.filter.Predicate(ns.bse.filename, reverse=False), + Partial(ast.filter.Predicate, predicate=ns.bse.filename), + )) + self.assertTrue(self.match._predicate( + ast.filter.Predicate(ns.bse.filename, reverse=True), + Partial(ast.filter.Predicate, predicate=ns.bse.filename), + )) + self.assertFalse(self.match._predicate( + ast.filter.Predicate(ns.bse.filename, reverse=False), + Partial(ast.filter.Predicate, predicate=ns.bse.filesize), + )) + + def test_one_of(self): + A = ast.filter.Predicate(ns.bse.filename) + B = ast.filter.Predicate(ns.bse.filesize) + C = ast.filter.Predicate(ns.bse.filename, reverse=True) + # identical expressions match + self.assertTrue(self.match._one_of( + ast.filter.OneOf(A, B), + ast.filter.OneOf(A, B), + )) + # _one_of respects type + self.assertFalse(self.match._one_of( + ast.filter.OneOf(A, B), + ast.filter.Predicate(ns.bse.filesize, reverse=True), + )) + # _one_of respects child expressions + self.assertFalse(self.match._one_of( + ast.filter.OneOf(A, B), + ast.filter.OneOf(A, C), + )) + self.assertFalse(self.match._one_of( + ast.filter.OneOf(A, B), + ast.filter.OneOf(A), + )) + self.assertFalse(self.match._one_of( + ast.filter.OneOf(A, B), + ast.filter.OneOf(A, B, C), + )) + self.assertTrue(self.match._one_of( + ast.filter.OneOf(A, B), + ast.filter.OneOf(B, A), + )) + self.assertTrue(self.match._one_of( + ast.filter.OneOf(A, B), + ast.filter.OneOf(A, Any()), + )) + self.assertTrue(self.match._one_of( + ast.filter.OneOf(A, B), + ast.filter.OneOf(B, Rest()), + )) + + def test_branch(self): + # identical expressions match + self.assertTrue(self.match._branch( + ast.filter.Any(ns.bse.filename, ast.filter.Equals('hello')), + ast.filter.Any(ns.bse.filename, ast.filter.Equals('hello')), + )) + self.assertTrue(self.match._branch( + ast.filter.All(ns.bse.filename, ast.filter.Equals('hello')), + ast.filter.All(ns.bse.filename, ast.filter.Equals('hello')), + )) + # _agg respects type + self.assertFalse(self.match._branch( + ast.filter.Any(ns.bse.filename, ast.filter.Equals('hello')), + ast.filter.All(ns.bse.filename, ast.filter.Equals('hello')), + )) + self.assertFalse(self.match._branch( + ast.filter.All(ns.bse.filename, ast.filter.Equals('hello')), + ast.filter.Any(ns.bse.filename, ast.filter.Equals('hello')), + )) + self.assertFalse(self.match._branch( + ast.filter.Any(ns.bse.filename, ast.filter.Equals('hello')), + ast.filter.Equals('hello'), + )) + self.assertFalse(self.match._branch( + ast.filter.All(ns.bse.filename, ast.filter.Equals('hello')), + ast.filter.Equals('hello'), + )) + # _agg respects predicate expression + self.assertTrue(self.match._branch( + ast.filter.Any(ns.bse.filename, ast.filter.Equals('hello')), + ast.filter.Any(ast.filter.Predicate(ns.bse.filename), ast.filter.Equals('hello')), + )) + self.assertTrue(self.match._branch( + ast.filter.All(ns.bse.filename, ast.filter.Equals('hello')), + ast.filter.All(ast.filter.Predicate(ns.bse.filename), ast.filter.Equals('hello')), + )) + self.assertFalse(self.match._branch( + ast.filter.Any(ns.bse.filename, ast.filter.Equals('hello')), + ast.filter.Any(ns.bse.filesize, ast.filter.Equals('hello')), + )) + self.assertFalse(self.match._branch( + ast.filter.All(ns.bse.filename, ast.filter.Equals('hello')), + ast.filter.All(ns.bse.filesize, ast.filter.Equals('hello')), + )) + self.assertFalse(self.match._branch( + ast.filter.Any(ns.bse.filename, ast.filter.Equals('hello')), + ast.filter.Any(ast.filter.OneOf(ns.bse.filename), ast.filter.Equals('hello')), + )) + self.assertFalse(self.match._branch( + ast.filter.All(ns.bse.filename, ast.filter.Equals('hello')), + ast.filter.All(ast.filter.OneOf(ns.bse.filename), ast.filter.Equals('hello')), + )) + self.assertFalse(self.match._branch( + ast.filter.Any(ns.bse.filename, ast.filter.Equals('hello')), + ast.filter.Any(ast.filter.Predicate(ns.bse.filename, reverse=True), ast.filter.Equals('hello')), + )) + self.assertFalse(self.match._branch( + ast.filter.All(ns.bse.filename, ast.filter.Equals('hello')), + ast.filter.All(ast.filter.Predicate(ns.bse.filename, reverse=True), ast.filter.Equals('hello')), + )) + self.assertTrue(self.match._branch( + ast.filter.Any(ns.bse.filename, ast.filter.Equals('hello')), + ast.filter.Any(Any(), ast.filter.Equals('hello')), + )) + self.assertTrue(self.match._branch( + ast.filter.All(ns.bse.filename, ast.filter.Equals('hello')), + ast.filter.All(Any(), ast.filter.Equals('hello')), + )) + self.assertTrue(self.match._branch( + ast.filter.Any(ns.bse.filename, ast.filter.Equals('hello')), + ast.filter.Any(Partial(ast.filter.Predicate), ast.filter.Equals('hello')), + )) + self.assertTrue(self.match._branch( + ast.filter.All(ns.bse.filename, ast.filter.Equals('hello')), + ast.filter.All(Partial(ast.filter.Predicate), ast.filter.Equals('hello')), + )) + # _agg respects filter expression + self.assertFalse(self.match._branch( + ast.filter.Any(ns.bse.filename, ast.filter.Equals('hello')), + ast.filter.Any(ns.bse.filename, ast.filter.Substring('hello')), + )) + self.assertFalse(self.match._branch( + ast.filter.All(ns.bse.filename, ast.filter.Equals('hello')), + ast.filter.All(ns.bse.filename, ast.filter.Substring('hello')), + )) + self.assertFalse(self.match._branch( + ast.filter.Any(ns.bse.filename, ast.filter.Equals('hello')), + ast.filter.Any(ns.bse.filename, ast.filter.Any(Any(), Any())), + )) + self.assertFalse(self.match._branch( + ast.filter.All(ns.bse.filename, ast.filter.Equals('hello')), + ast.filter.All(ns.bse.filename, ast.filter.All(Any(), Any())), + )) + self.assertTrue(self.match._branch( + ast.filter.Any(ns.bse.filename, ast.filter.Equals('hello')), + ast.filter.Any(ns.bse.filename, Any()), + )) + self.assertTrue(self.match._branch( + ast.filter.All(ns.bse.filename, ast.filter.Equals('hello')), + ast.filter.All(ns.bse.filename, Any()), + )) + self.assertTrue(self.match._branch( + ast.filter.Any(ns.bse.filename, ast.filter.Equals('hello')), + ast.filter.Any(ns.bse.filename, Partial(ast.filter.Equals)), + )) + self.assertTrue(self.match._branch( + ast.filter.All(ns.bse.filename, ast.filter.Equals('hello')), + ast.filter.All(ns.bse.filename, Partial(ast.filter.Equals)), + )) + self.assertFalse(self.match._branch( + ast.filter.Any(ns.bse.filename, ast.filter.Equals('hello')), + ast.filter.Any(ns.bse.filename, Partial(ast.filter.Equals, value='world')), + )) + self.assertFalse(self.match._branch( + ast.filter.All(ns.bse.filename, ast.filter.Equals('hello')), + ast.filter.All(ns.bse.filename, Partial(ast.filter.Equals, value='world')), + )) + + def test_agg(self): + A = ast.filter.Equals('hello') + B = ast.filter.Equals('world') + C = ast.filter.Equals('foobar') + # identical expressions match + self.assertTrue(self.match._agg( + ast.filter.And(A, B), + ast.filter.And(A, B), + )) + self.assertTrue(self.match._agg( + ast.filter.Or(A, B), + ast.filter.Or(A, B), + )) + # _agg respects type + self.assertFalse(self.match._agg( + ast.filter.And(A, B), + ast.filter.Or(A, B), + )) + self.assertFalse(self.match._agg( + ast.filter.Or(A, B), + ast.filter.And(A, B), + )) + self.assertFalse(self.match._agg( + ast.filter.And(A, B), + ast.filter.Equals('hello'), + )) + self.assertFalse(self.match._agg( + ast.filter.Or(A, B), + ast.filter.Equals('hello'), + )) + # _agg respects child expressions + self.assertFalse(self.match._agg( + ast.filter.And(A, B), + ast.filter.And(A, ast.filter.Equals('bar')), + )) + self.assertFalse(self.match._agg( + ast.filter.Or(A, B), + ast.filter.Or(A, ast.filter.Equals('bar')), + )) + self.assertFalse(self.match._agg( + ast.filter.And(A, B), + ast.filter.And(A), + )) + self.assertFalse(self.match._agg( + ast.filter.Or(A, B), + ast.filter.Or(A), + )) + self.assertFalse(self.match._agg( + ast.filter.And(A, B), + ast.filter.And(A, B, C), + )) + self.assertFalse(self.match._agg( + ast.filter.Or(A, B), + ast.filter.Or(A, B, C), + )) + self.assertTrue(self.match._agg( + ast.filter.And(A, B), + ast.filter.And(B, A), + )) + self.assertTrue(self.match._agg( + ast.filter.Or(A, B), + ast.filter.Or(B, A), + )) + self.assertTrue(self.match._agg( + ast.filter.And(A, B), + ast.filter.And(A, Any()), + )) + self.assertTrue(self.match._agg( + ast.filter.Or(A, B), + ast.filter.Or(A, Any()), + )) + self.assertTrue(self.match._agg( + ast.filter.And(A, B), + ast.filter.And(B, Rest()), + )) + self.assertTrue(self.match._agg( + ast.filter.Or(A, B), + ast.filter.Or(B, Rest()), + )) + + def test_not(self): + # identical expressions match + self.assertTrue(self.match._not( + ast.filter.Not(ast.filter.Equals('hello')), + ast.filter.Not(ast.filter.Equals('hello')), + )) + # _not respects type + self.assertFalse(self.match._not( + ast.filter.Not(ast.filter.Equals('hello')), + ast.filter.Equals('hello'), + )) + # _not respects child expression + self.assertFalse(self.match._not( + ast.filter.Not(ast.filter.Equals('hello')), + ast.filter.Not(ast.filter.Equals('world')), + )) + self.assertFalse(self.match._not( + ast.filter.Not(ast.filter.Equals('hello')), + ast.filter.Not(ast.filter.Substring('hello')), + )) + self.assertTrue(self.match._not( + ast.filter.Not(ast.filter.Equals('hello')), + ast.filter.Not(Any()), + )) + + def test_has(self): + # identical expressions match + self.assertTrue(self.match._has( + ast.filter.Has(ns.bse.filesize), + ast.filter.Has(ns.bse.filesize), + )) + self.assertTrue(self.match._has( + ast.filter.Has(ns.bse.filesize, ast.filter.LessThan(3)), + ast.filter.Has(ns.bse.filesize, ast.filter.LessThan(3)), + )) + # _has respects type + self.assertFalse(self.match._has( + ast.filter.Has(ns.bse.filesize), + ast.filter.Equals('hello'), + )) + self.assertFalse(self.match._has( + ast.filter.Has(ns.bse.filesize), + ast.filter.Equals('hello'), + )) + # _has respects predicate + self.assertFalse(self.match._has( + ast.filter.Has(ns.bse.filesize, ast.filter.LessThan(3)), + ast.filter.Has(ns.bse.iso, ast.filter.LessThan(3)), + )) + self.assertTrue(self.match._has( + ast.filter.Has(ns.bse.filesize, ast.filter.LessThan(3)), + ast.filter.Has(Any(), ast.filter.LessThan(3)), + )) + self.assertTrue(self.match._has( + ast.filter.Has(ns.bse.filesize, ast.filter.LessThan(3)), + ast.filter.Has(Partial(ast.filter.Predicate), ast.filter.LessThan(3)), + )) + # _has respects count + self.assertFalse(self.match._has( + ast.filter.Has(ns.bse.filesize, ast.filter.LessThan(3)), + ast.filter.Has(ns.bse.filesize, ast.filter.GreaterThan(3)), + )) + self.assertFalse(self.match._has( + ast.filter.Has(ns.bse.filesize, ast.filter.LessThan(3)), + ast.filter.Has(ns.bse.filesize, ast.filter.LessThan(5)), + )) + self.assertTrue(self.match._has( + ast.filter.Has(ns.bse.filesize, ast.filter.LessThan(3)), + ast.filter.Has(ns.bse.filesize, Any()), + )) + self.assertTrue(self.match._has( + ast.filter.Has(ns.bse.filesize, ast.filter.LessThan(3)), + ast.filter.Has(ns.bse.filesize, Partial(ast.filter.LessThan)), + )) + + def test_distance(self): + # identical expressions match + self.assertTrue(self.match._distance( + ast.filter.Distance([1,2,3], 5, True), + ast.filter.Distance([1,2,3], 5, True), + )) + # _distance respects type + self.assertFalse(self.match._distance( + ast.filter.Distance([1,2,3], 5, True), + ast.filter.Equals('hello'), + )) + self.assertFalse(self.match._distance( + ast.filter.Distance([1,2,3], 5, True), + Partial(ast.filter.Equals), + )) + # _distance respects reference value + self.assertFalse(self.match._distance( + ast.filter.Distance([1,2,3], 5, True), + ast.filter.Distance([3,2,1], 5, True), + )) + self.assertTrue(self.match._distance( + ast.filter.Distance([1,2,3], 5, True), + Partial(ast.filter.Distance, threshold=5, strict=True), + )) + self.assertTrue(self.match._distance( + ast.filter.Distance([1,2,3], 5, True), + Partial(ast.filter.Distance, reference=[1,2,3], threshold=5, strict=True), + )) + self.assertFalse(self.match._distance( + ast.filter.Distance([1,2,3], 5, True), + Partial(ast.filter.Distance, reference=[3,2,1], threshold=5, strict=True), + )) + # _distance respects threshold + self.assertFalse(self.match._distance( + ast.filter.Distance([1,2,3], 5, True), + ast.filter.Distance([1,2,3], 8, True), + )) + self.assertTrue(self.match._distance( + ast.filter.Distance([1,2,3], 5, True), + Partial(ast.filter.Distance, reference=[1,2,3], strict=True), + )) + self.assertTrue(self.match._distance( + ast.filter.Distance([1,2,3], 5, True), + Partial(ast.filter.Distance, reference=[1,2,3], threshold=5, strict=True), + )) + self.assertFalse(self.match._distance( + ast.filter.Distance([1,2,3], 5, True), + Partial(ast.filter.Distance, reference=[1,2,3], threshold=8, strict=True), + )) + # _distance respects strict + self.assertFalse(self.match._distance( + ast.filter.Distance([1,2,3], 5, True), + ast.filter.Distance([1,2,3], 5, False), + )) + self.assertTrue(self.match._distance( + ast.filter.Distance([1,2,3], 5, True), + Partial(ast.filter.Distance, reference=[1,2,3], threshold=5), + )) + self.assertTrue(self.match._distance( + ast.filter.Distance([1,2,3], 5, True), + Partial(ast.filter.Distance, reference=[1,2,3], threshold=5, strict=True), + )) + self.assertFalse(self.match._distance( + ast.filter.Distance([1,2,3], 5, True), + Partial(ast.filter.Distance, reference=[1,2,3], threshold=5, strict=False), + )) + + def test_value(self): + # identical expressions match + self.assertTrue(self.match._value(ast.filter.Equals('hello'), ast.filter.Equals('hello'))) + self.assertTrue(self.match._value(ast.filter.Substring('hello'), ast.filter.Substring('hello'))) + self.assertTrue(self.match._value(ast.filter.StartsWith('hello'), ast.filter.StartsWith('hello'))) + self.assertTrue(self.match._value(ast.filter.EndsWith('hello'), ast.filter.EndsWith('hello'))) + self.assertTrue(self.match._value(ast.filter.Is('hello'), ast.filter.Is('hello'))) + # _value respects type + self.assertFalse(self.match._value(ast.filter.Equals('hello'), ast.filter.Is('hello'))) + self.assertFalse(self.match._value(ast.filter.Substring('hello'), ast.filter.Is('hello'))) + self.assertFalse(self.match._value(ast.filter.StartsWith('hello'), ast.filter.Is('hello'))) + self.assertFalse(self.match._value(ast.filter.EndsWith('hello'), ast.filter.Is('hello'))) + self.assertFalse(self.match._value(ast.filter.Is('hello'), ast.filter.Equals('hello'))) + # _value respects value + self.assertFalse(self.match._value(ast.filter.Equals('hello'), ast.filter.Equals('world'))) + self.assertFalse(self.match._value(ast.filter.Substring('hello'), ast.filter.Substring('world'))) + self.assertFalse(self.match._value(ast.filter.StartsWith('hello'), ast.filter.StartsWith('world'))) + self.assertFalse(self.match._value(ast.filter.EndsWith('hello'), ast.filter.EndsWith('world'))) + self.assertFalse(self.match._value(ast.filter.Is('hello'), ast.filter.Is('world'))) + # Partial requires correct type + self.assertFalse(self.match._value(ast.filter.Equals('hello'), Partial(ast.filter.Is))) + self.assertFalse(self.match._value(ast.filter.Substring('hello'), Partial(ast.filter.Is))) + self.assertFalse(self.match._value(ast.filter.StartsWith('hello'), Partial(ast.filter.Is))) + self.assertFalse(self.match._value(ast.filter.EndsWith('hello'), Partial(ast.filter.Is))) + self.assertFalse(self.match._value(ast.filter.Is('hello'), Partial(ast.filter.Equals))) + # value can be specified + self.assertTrue(self.match._value(ast.filter.Equals('hello'), Partial(ast.filter.Equals, value='hello'))) + self.assertFalse(self.match._value(ast.filter.Equals('hello'), Partial(ast.filter.Equals, value='world'))) + self.assertTrue(self.match._value(ast.filter.Substring('hello'), Partial(ast.filter.Substring, value='hello'))) + self.assertFalse(self.match._value(ast.filter.Substring('hello'), Partial(ast.filter.Substring, value='world'))) + self.assertTrue(self.match._value(ast.filter.StartsWith('hello'), Partial(ast.filter.StartsWith, value='hello'))) + self.assertFalse(self.match._value(ast.filter.StartsWith('hello'), Partial(ast.filter.StartsWith, value='world'))) + self.assertTrue(self.match._value(ast.filter.EndsWith('hello'), Partial(ast.filter.EndsWith, value='hello'))) + self.assertFalse(self.match._value(ast.filter.EndsWith('hello'), Partial(ast.filter.EndsWith, value='world'))) + self.assertTrue(self.match._value(ast.filter.Is('hello'), Partial(ast.filter.Is, value='hello'))) + self.assertFalse(self.match._value(ast.filter.Is('hello'), Partial(ast.filter.Is, value='world'))) + # value can remain unspecified + self.assertTrue(self.match._value(ast.filter.Equals('hello'), Partial(ast.filter.Equals))) + self.assertTrue(self.match._value(ast.filter.Substring('hello'), Partial(ast.filter.Substring))) + self.assertTrue(self.match._value(ast.filter.StartsWith('hello'), Partial(ast.filter.StartsWith))) + self.assertTrue(self.match._value(ast.filter.EndsWith('hello'), Partial(ast.filter.EndsWith))) + self.assertTrue(self.match._value(ast.filter.Is('hello'), Partial(ast.filter.Is))) + + def test_bounded(self): + # identical expressions match + self.assertTrue(self.match._bounded( + ast.filter.LessThan(threshold=3, strict=False), + ast.filter.LessThan(threshold=3, strict=False), + )) + self.assertTrue(self.match._bounded( + ast.filter.GreaterThan(threshold=3, strict=False), + ast.filter.GreaterThan(threshold=3, strict=False), + )) + # _bounded respects type + self.assertFalse(self.match._bounded( + ast.filter.LessThan(threshold=3, strict=False), + ast.filter.GreaterThan(threshold=3, strict=False), + )) + self.assertFalse(self.match._bounded( + ast.filter.GreaterThan(threshold=3, strict=False), + ast.filter.LessThan(threshold=3, strict=False), + )) + # _bounded respects threshold + self.assertFalse(self.match._bounded( + ast.filter.LessThan(threshold=3, strict=False), + ast.filter.LessThan(threshold=4, strict=False), + )) + self.assertFalse(self.match._bounded( + ast.filter.GreaterThan(threshold=3, strict=False), + ast.filter.GreaterThan(threshold=4, strict=False), + )) + # _bounded respects strict + self.assertFalse(self.match._bounded( + ast.filter.LessThan(threshold=3, strict=False), + ast.filter.LessThan(threshold=3, strict=True), + )) + self.assertFalse(self.match._bounded( + ast.filter.GreaterThan(threshold=3, strict=False), + ast.filter.GreaterThan(threshold=3, strict=True), + )) + # Partial requires correct type + self.assertFalse(self.match._bounded( + ast.filter.LessThan(threshold=3, strict=False), + Partial(ast.filter.GreaterThan), + )) + self.assertFalse(self.match._bounded( + ast.filter.GreaterThan(threshold=3, strict=False), + Partial(ast.filter.LessThan), + )) + # threshold and strict can be specified + self.assertTrue(self.match._bounded( + ast.filter.LessThan(threshold=3, strict=False), + Partial(ast.filter.LessThan, threshold=3, strict=False), + )) + self.assertTrue(self.match._bounded( + ast.filter.GreaterThan(threshold=3, strict=False), + Partial(ast.filter.GreaterThan, threshold=3, strict=False), + )) + self.assertFalse(self.match._bounded( + ast.filter.LessThan(threshold=3, strict=False), + Partial(ast.filter.LessThan, threshold=4, strict=False), + )) + self.assertFalse(self.match._bounded( + ast.filter.GreaterThan(threshold=3, strict=False), + Partial(ast.filter.GreaterThan, threshold=4, strict=False), + )) + self.assertFalse(self.match._bounded( + ast.filter.LessThan(threshold=3, strict=False), + Partial(ast.filter.LessThan, threshold=3, strict=True), + )) + self.assertFalse(self.match._bounded( + ast.filter.GreaterThan(threshold=3, strict=False), + Partial(ast.filter.GreaterThan, threshold=3, strict=True), + )) + # threshold can remain unspecified + self.assertTrue(self.match._bounded( + ast.filter.LessThan(threshold=3, strict=False), + Partial(ast.filter.LessThan, strict=False), + )) + self.assertTrue(self.match._bounded( + ast.filter.GreaterThan(threshold=3, strict=False), + Partial(ast.filter.GreaterThan, strict=False), + )) + self.assertFalse(self.match._bounded( + ast.filter.LessThan(threshold=3, strict=False), + Partial(ast.filter.LessThan, strict=True), + )) + self.assertFalse(self.match._bounded( + ast.filter.GreaterThan(threshold=3, strict=False), + Partial(ast.filter.GreaterThan, strict=True), + )) + # strict can remain unspecified + self.assertTrue(self.match._bounded( + ast.filter.LessThan(threshold=3, strict=False), + Partial(ast.filter.LessThan, threshold=3), + )) + self.assertTrue(self.match._bounded( + ast.filter.GreaterThan(threshold=3, strict=False), + Partial(ast.filter.GreaterThan, threshold=3), + )) + self.assertFalse(self.match._bounded( + ast.filter.LessThan(threshold=3, strict=False), + Partial(ast.filter.LessThan, threshold=4), + )) + self.assertFalse(self.match._bounded( + ast.filter.GreaterThan(threshold=3, strict=False), + Partial(ast.filter.GreaterThan, threshold=4), + )) + + +## main ## + +if __name__ == '__main__': + unittest.main() + +## EOF ## -- cgit v1.2.3 From 1365e21ab9f13597d7fdb4feb0825453f32cae4b Mon Sep 17 00:00:00 2001 From: Matthias Baumgartner Date: Wed, 8 Feb 2023 19:51:54 +0100 Subject: ucid from buffer and bytes --- bsfs/utils/uuid.py | 12 ++++++++++++ test/utils/test_uuid.py | 10 ++++++++++ 2 files changed, 22 insertions(+) diff --git a/bsfs/utils/uuid.py b/bsfs/utils/uuid.py index ba5cf52..70e1656 100644 --- a/bsfs/utils/uuid.py +++ b/bsfs/utils/uuid.py @@ -7,6 +7,7 @@ Author: Matthias Baumgartner, 2022 # imports from collections import abc import hashlib +import io import json import os import platform @@ -106,6 +107,17 @@ class UCID(): with open(path, 'rb') as ifile: return HASH(ifile.read()).hexdigest() + @staticmethod + def from_buffer(buffer: io.IOBase) -> str: + """Read the content from a buffer.""" + if isinstance(buffer, io.TextIOBase): + return HASH(buffer.read().encode('utf-8', errors='ignore')).hexdigest() + return HASH(buffer.read()).hexdigest() + + @staticmethod + def from_bytes(content: bytes) -> str: + """Get the content from as bytes.""" + return HASH(content).hexdigest() @staticmethod def from_dict(content: dict) -> str: diff --git a/test/utils/test_uuid.py b/test/utils/test_uuid.py index 0de96ed..804b063 100644 --- a/test/utils/test_uuid.py +++ b/test/utils/test_uuid.py @@ -83,6 +83,16 @@ class TestUCID(unittest.TestCase): def test_from_path(self): self.assertEqual(UCID.from_path(self._path), self._checksum) + def test_from_buffer(self): + with open(self._path, 'rb') as ifile: + self.assertEqual(UCID.from_buffer(ifile), self._checksum) + with open(self._path) as ifile: + self.assertEqual(UCID.from_buffer(ifile), self._checksum) + + def test_from_bytes(self): + with open(self._path, 'rb') as ifile: + self.assertEqual(UCID.from_bytes(ifile.read()), self._checksum) + def test_from_dict(self): self.assertEqual(UCID.from_dict({'hello': 'world', 'foo': 1234, 'bar': False}), '8d2544395a0d2827e3d9ce8cd619d5e3f801e8126bf3f93ee5abd38158959585') -- cgit v1.2.3 From c8fdaaa676afbdcf33344d72bd92b3ccb981cbf8 Mon Sep 17 00:00:00 2001 From: Matthias Baumgartner Date: Wed, 8 Feb 2023 19:54:24 +0100 Subject: ast fixes --- bsfs/query/ast/fetch.py | 3 +-- bsfs/query/ast/filter_.py | 4 ++-- 2 files changed, 3 insertions(+), 4 deletions(-) diff --git a/bsfs/query/ast/fetch.py b/bsfs/query/ast/fetch.py index 5e603a1..d653a8a 100644 --- a/bsfs/query/ast/fetch.py +++ b/bsfs/query/ast/fetch.py @@ -69,8 +69,7 @@ class All(FetchExpression): return f'{typename(self)}({self.expr})' def __hash__(self) -> int: - # FIXME: Produces different hashes for different orders of self.expr - return hash((super().__hash__(), tuple(self.expr))) + return hash((super().__hash__(), tuple(sorted(self.expr, key=repr)))) def __eq__(self, other: typing.Any) -> bool: return super().__eq__(other) and self.expr == other.expr diff --git a/bsfs/query/ast/filter_.py b/bsfs/query/ast/filter_.py index 81b0de2..798d37f 100644 --- a/bsfs/query/ast/filter_.py +++ b/bsfs/query/ast/filter_.py @@ -173,7 +173,7 @@ class _Agg(FilterExpression, abc.Collection): return f'{typename(self)}({self.expr})' def __hash__(self) -> int: - return hash((super().__hash__(), tuple(self.expr))) # FIXME: Unique hash of different orders over self.expr + return hash((super().__hash__(), tuple(sorted(self.expr, key=repr)))) def __eq__(self, other) -> bool: return super().__eq__(other) and self.expr == other.expr @@ -450,7 +450,7 @@ class OneOf(PredicateExpression, abc.Collection): return f'{typename(self)}({self.expr})' def __hash__(self) -> int: - return hash((super().__hash__(), tuple(self.expr))) # FIXME: Unique hash of different orders over self.expr + return hash((super().__hash__(), tuple(sorted(self.expr, key=repr)))) def __eq__(self, other) -> bool: return super().__eq__(other) and self.expr == other.expr -- cgit v1.2.3 From cb819b8c268908b5f6cc680173db86e172847c46 Mon Sep 17 00:00:00 2001 From: Matthias Baumgartner Date: Wed, 8 Feb 2023 20:15:41 +0100 Subject: binary blob in schema and sparql triple store --- bsfs/schema/schema.py | 1 + bsfs/schema/types.py | 5 +++++ bsfs/triple_store/sparql/sparql.py | 13 +++++++++++-- test/graph/test_nodes.py | 1 + test/schema/test_schema.py | 19 ++++++++++--------- test/triple_store/sparql/test_sparql.py | 26 ++++++++++++++++++++++++++ 6 files changed, 54 insertions(+), 11 deletions(-) diff --git a/bsfs/schema/schema.py b/bsfs/schema/schema.py index 1644926..0de4203 100644 --- a/bsfs/schema/schema.py +++ b/bsfs/schema/schema.py @@ -69,6 +69,7 @@ class Schema(): literals.add(types.ROOT_LITERAL) predicates.add(types.ROOT_PREDICATE) # add minimally necessary types to the schema + literals.add(types.ROOT_BLOB) literals.add(types.ROOT_NUMBER) literals.add(types.ROOT_TIME) literals.add(types.ROOT_ARRAY) diff --git a/bsfs/schema/types.py b/bsfs/schema/types.py index 3a2e10c..12e7e94 100644 --- a/bsfs/schema/types.py +++ b/bsfs/schema/types.py @@ -380,6 +380,11 @@ ROOT_LITERAL = Literal( parent=None, ) +ROOT_BLOB = Literal( + uri=ns.bsfs.BinaryBlob, + parent=ROOT_LITERAL, + ) + ROOT_NUMBER = Literal( uri=ns.bsfs.Number, parent=ROOT_LITERAL, diff --git a/bsfs/triple_store/sparql/sparql.py b/bsfs/triple_store/sparql/sparql.py index a0dd12e..dbf9d45 100644 --- a/bsfs/triple_store/sparql/sparql.py +++ b/bsfs/triple_store/sparql/sparql.py @@ -5,8 +5,11 @@ A copy of the license is provided with the project. Author: Matthias Baumgartner, 2022 """ # imports +import base64 import itertools import typing + +# external imports import rdflib # bsfs imports @@ -30,6 +33,8 @@ __all__: typing.Sequence[str] = ( ## code ## +rdflib.term.bind(ns.bsfs.BinaryBlob, bytes, constructor=base64.b64decode) + class _Transaction(): """Lightweight rdflib transactions for in-memory databases.""" @@ -242,7 +247,7 @@ class SparqlStore(base.TripleStoreBase): ) -> typing.Iterator[URI]: if node_type not in self.schema.nodes(): raise errors.ConsistencyError(f'{node_type} is not defined in the schema') - if not isinstance(filter, ast.filter.FilterExpression): + if filter is not None and not isinstance(filter, ast.filter.FilterExpression): raise TypeError(filter) # compose query query = self._filter_parser(node_type, filter) @@ -334,7 +339,11 @@ class SparqlStore(base.TripleStoreBase): guid = rdflib.URIRef(guid) # convert value if isinstance(predicate.range, bsc.Literal): - value = rdflib.Literal(value, datatype=rdflib.URIRef(predicate.range.uri)) + dtype = rdflib.URIRef(predicate.range.uri) + if predicate.range <= self.schema.literal(ns.bsfs.BinaryBlob): + dtype = rdflib.URIRef(ns.bsfs.BinaryBlob) + value = base64.b64encode(value) + value = rdflib.Literal(value, datatype=dtype) elif isinstance(predicate.range, bsc.Node): value = rdflib.URIRef(value) else: diff --git a/test/graph/test_nodes.py b/test/graph/test_nodes.py index dabe794..6bb3ef3 100644 --- a/test/graph/test_nodes.py +++ b/test/graph/test_nodes.py @@ -92,6 +92,7 @@ class TestNodes(unittest.TestCase): (rdflib.URIRef(ns.bsfs.User), rdflib.RDFS.subClassOf, rdflib.URIRef(ns.bsfs.Node)), (rdflib.URIRef(ns.xsd.string), rdflib.RDFS.subClassOf, rdflib.URIRef(ns.bsfs.Literal)), (rdflib.URIRef(ns.bsfs.Array), rdflib.RDFS.subClassOf, rdflib.URIRef(ns.bsfs.Literal)), + (rdflib.URIRef(ns.bsfs.BinaryBlob), rdflib.RDFS.subClassOf, rdflib.URIRef(ns.bsfs.Literal)), (rdflib.URIRef(ns.bsfs.Feature), rdflib.RDFS.subClassOf, rdflib.URIRef(ns.bsfs.Array)), (rdflib.URIRef(ns.bsfs.Number), rdflib.RDFS.subClassOf, rdflib.URIRef(ns.bsfs.Literal)), (rdflib.URIRef(ns.bsfs.Time), rdflib.RDFS.subClassOf, rdflib.URIRef(ns.bsfs.Literal)), diff --git a/test/schema/test_schema.py b/test/schema/test_schema.py index 32dbc93..414e542 100644 --- a/test/schema/test_schema.py +++ b/test/schema/test_schema.py @@ -66,13 +66,14 @@ class TestSchema(unittest.TestCase): # literals self.l_root = types.ROOT_LITERAL self.l_number = types.ROOT_NUMBER + self.l_blob = types.ROOT_BLOB self.l_array = types.ROOT_ARRAY self.l_time = types.ROOT_TIME self.l_string = self.l_root.child(ns.xsd.string) self.l_integer = self.l_root.child(ns.xsd.integer) self.l_unused = self.l_root.child(ns.xsd.boolean) self.f_root = types.ROOT_FEATURE - self.literals = [self.l_root, self.l_array, self.f_root, self.l_number, self.l_time, self.l_string, self.l_integer, self.l_unused] + self.literals = [self.l_root, self.l_array, self.f_root, self.l_number, self.l_time, self.l_string, self.l_integer, self.l_unused, self.l_blob] # predicates self.p_root = types.ROOT_PREDICATE @@ -85,13 +86,13 @@ class TestSchema(unittest.TestCase): # no args yields a minimal schema schema = Schema() self.assertSetEqual(set(schema.nodes()), {self.n_root}) - self.assertSetEqual(set(schema.literals()), {self.l_root, self.l_number, self.l_array, self.l_time, self.f_root}) + self.assertSetEqual(set(schema.literals()), {self.l_root, self.l_number, self.l_array, self.l_time, self.f_root, self.l_blob}) self.assertSetEqual(set(schema.predicates()), {self.p_root}) # nodes and literals are optional schema = Schema(self.predicates) self.assertSetEqual(set(schema.nodes()), {self.n_root, self.n_ent, self.n_img, self.n_tag}) - self.assertSetEqual(set(schema.literals()), {self.l_root, self.l_string, self.l_number, self.l_time, self.l_array, self.f_root}) + self.assertSetEqual(set(schema.literals()), {self.l_root, self.l_string, self.l_number, self.l_time, self.l_array, self.f_root, self.l_blob}) self.assertSetEqual(set(schema.predicates()), set(self.predicates)) # predicates, nodes, and literals are respected @@ -112,13 +113,13 @@ class TestSchema(unittest.TestCase): # literals are complete schema = Schema(self.predicates, self.nodes, None) - self.assertSetEqual(set(schema.literals()), {self.l_root, self.l_string, self.l_number, self.l_array, self.l_time, self.f_root}) + self.assertSetEqual(set(schema.literals()), {self.l_root, self.l_string, self.l_number, self.l_array, self.l_time, self.f_root, self.l_blob}) schema = Schema(self.predicates, self.nodes, []) - self.assertSetEqual(set(schema.literals()), {self.l_root, self.l_string, self.l_number, self.l_array, self.l_time, self.f_root}) + self.assertSetEqual(set(schema.literals()), {self.l_root, self.l_string, self.l_number, self.l_array, self.l_time, self.f_root, self.l_blob}) schema = Schema(self.predicates, self.nodes, [self.l_string]) - self.assertSetEqual(set(schema.literals()), {self.l_root, self.l_string, self.l_number, self.l_array, self.l_time, self.f_root}) + self.assertSetEqual(set(schema.literals()), {self.l_root, self.l_string, self.l_number, self.l_array, self.l_time, self.f_root, self.l_blob}) schema = Schema(self.predicates, self.nodes, [self.l_integer]) - self.assertSetEqual(set(schema.literals()), {self.l_root, self.l_string, self.l_integer, self.l_number, self.l_array, self.l_time, self.f_root}) + self.assertSetEqual(set(schema.literals()), {self.l_root, self.l_string, self.l_integer, self.l_number, self.l_array, self.l_time, self.f_root, self.l_blob}) schema = Schema(self.predicates, self.nodes, [self.l_integer, self.l_unused]) self.assertSetEqual(set(schema.literals()), set(self.literals)) @@ -178,13 +179,13 @@ class TestSchema(unittest.TestCase): self.assertEqual(str(Schema(self.predicates, self.nodes, self.literals)), 'Schema()') # repr conversion with only default nodes, literals, and predicates n = [ns.bsfs.Node] - l = [ns.bsfs.Array, ns.bsfs.Feature, ns.bsfs.Literal, ns.bsfs.Number, ns.bsfs.Time] + l = [ns.bsfs.Array, ns.bsfs.BinaryBlob, ns.bsfs.Feature, ns.bsfs.Literal, ns.bsfs.Number, ns.bsfs.Time] p = [ns.bsfs.Predicate] self.assertEqual(repr(Schema()), f'Schema({n}, {l}, {p})') self.assertEqual(repr(Schema([], [], [])), f'Schema({n}, {l}, {p})') # repr conversion n = [ns.bsfs.Entity, ns.bsfs.Image, ns.bsfs.Node, ns.bsfs.Tag, ns.bsfs.Unused] - l = [ns.bsfs.Array, ns.bsfs.Feature, ns.bsfs.Literal, ns.bsfs.Number, ns.bsfs.Time, ns.xsd.boolean, ns.xsd.integer, ns.xsd.string] + l = [ns.bsfs.Array, ns.bsfs.BinaryBlob, ns.bsfs.Feature, ns.bsfs.Literal, ns.bsfs.Number, ns.bsfs.Time, ns.xsd.boolean, ns.xsd.integer, ns.xsd.string] p = [ns.bse.comment, ns.bse.group, ns.bse.tag, ns.bsfs.Predicate] self.assertEqual(repr(Schema(self.predicates, self.nodes, self.literals)), f'Schema({n}, {l}, {p})') diff --git a/test/triple_store/sparql/test_sparql.py b/test/triple_store/sparql/test_sparql.py index c58fae3..30876f2 100644 --- a/test/triple_store/sparql/test_sparql.py +++ b/test/triple_store/sparql/test_sparql.py @@ -34,6 +34,7 @@ class TestSparqlStore(unittest.TestCase): bsfs:User rdfs:subClassOf bsfs:Node . xsd:string rdfs:subClassOf bsfs:Literal . bsfs:Number rdfs:subClassOf bsfs:Literal . + bsfs:BinaryBlob rdfs:subClassOf bsfs:Literal . xsd:integer rdfs:subClassOf bsfs:Number . # non-unique literal @@ -60,6 +61,11 @@ class TestSparqlStore(unittest.TestCase): rdfs:range bsfs:User ; bsfs:unique "true"^^xsd:boolean . + # binary range + bse:asset rdfs:subClassOf bsfs:Predicate ; + rdfs:domain bsfs:Entity ; + rdfs:range bsfs:BinaryBlob . + ''') self.schema_triples = { # schema hierarchy @@ -68,6 +74,7 @@ class TestSparqlStore(unittest.TestCase): (rdflib.URIRef(ns.bsfs.User), rdflib.RDFS.subClassOf, rdflib.URIRef(ns.bsfs.Node)), (rdflib.URIRef(ns.xsd.string), rdflib.RDFS.subClassOf, rdflib.URIRef(ns.bsfs.Literal)), (rdflib.URIRef(ns.bsfs.Array), rdflib.RDFS.subClassOf, rdflib.URIRef(ns.bsfs.Literal)), + (rdflib.URIRef(ns.bsfs.BinaryBlob), rdflib.RDFS.subClassOf, rdflib.URIRef(ns.bsfs.Literal)), (rdflib.URIRef(ns.bsfs.Feature), rdflib.RDFS.subClassOf, rdflib.URIRef(ns.bsfs.Array)), (rdflib.URIRef(ns.bsfs.Number), rdflib.RDFS.subClassOf, rdflib.URIRef(ns.bsfs.Literal)), (rdflib.URIRef(ns.bsfs.Time), rdflib.RDFS.subClassOf, rdflib.URIRef(ns.bsfs.Literal)), @@ -76,6 +83,7 @@ class TestSparqlStore(unittest.TestCase): (rdflib.URIRef(ns.bse.filesize), rdflib.RDFS.subClassOf, rdflib.URIRef(ns.bsfs.Predicate)), (rdflib.URIRef(ns.bse.tag), rdflib.RDFS.subClassOf, rdflib.URIRef(ns.bsfs.Predicate)), (rdflib.URIRef(ns.bse.author), rdflib.RDFS.subClassOf, rdflib.URIRef(ns.bsfs.Predicate)), + (rdflib.URIRef(ns.bse.asset), rdflib.RDFS.subClassOf, rdflib.URIRef(ns.bsfs.Predicate)), } def test_essentials(self): @@ -358,6 +366,7 @@ class TestSparqlStore(unittest.TestCase): (rdflib.URIRef(ns.bsfs.User), rdflib.RDFS.subClassOf, rdflib.URIRef(ns.bsfs.Node)), (rdflib.URIRef(ns.xsd.boolean), rdflib.RDFS.subClassOf, rdflib.URIRef(ns.bsfs.Literal)), (rdflib.URIRef(ns.bsfs.Array), rdflib.RDFS.subClassOf, rdflib.URIRef(ns.bsfs.Literal)), + (rdflib.URIRef(ns.bsfs.BinaryBlob), rdflib.RDFS.subClassOf, rdflib.URIRef(ns.bsfs.Literal)), (rdflib.URIRef(ns.bsfs.Feature), rdflib.RDFS.subClassOf, rdflib.URIRef(ns.bsfs.Array)), (rdflib.URIRef(ns.bsfs.Number), rdflib.RDFS.subClassOf, rdflib.URIRef(ns.bsfs.Literal)), (rdflib.URIRef(ns.bsfs.Time), rdflib.RDFS.subClassOf, rdflib.URIRef(ns.bsfs.Literal)), @@ -931,6 +940,23 @@ class TestSparqlStore(unittest.TestCase): # inexistent guids self.assertRaises(errors.InstanceError, store.set, ent_type, {URI('http://example.com/me/entity#foobar')}, p_comment, {'xyz'}) + # BinaryBlob values are base64 encoded + p_asset = store.schema.predicate(ns.bse.asset) + store.set(ent_type, ent_ids, p_asset, {bytes(range(128)), bytes(range(128, 256))}) + blob1 = rdflib.Literal('AAECAwQFBgcICQoLDA0ODxAREhMUFRYXGBkaGxwdHh8gISIjJCUmJygpKissLS4vMDEyMzQ1Njc4OTo7PD0+P0BBQkNERUZHSElKS0xNTk9QUVJTVFVWV1hZWltcXV5fYGFiY2RlZmdoaWprbG1ub3BxcnN0dXZ3eHl6e3x9fn8=', + datatype=rdflib.URIRef(ns.bsfs.BinaryBlob)) + blob2 = rdflib.Literal('gIGCg4SFhoeIiYqLjI2Oj5CRkpOUlZaXmJmam5ydnp+goaKjpKWmp6ipqqusra6vsLGys7S1tre4ubq7vL2+v8DBwsPExcbHyMnKy8zNzs/Q0dLT1NXW19jZ2tvc3d7f4OHi4+Tl5ufo6err7O3u7/Dx8vP09fb3+Pn6+/z9/v8=', + datatype=rdflib.URIRef(ns.bsfs.BinaryBlob)) + self.assertTrue(set(store._graph).issuperset({ + (rdflib.URIRef('http://example.com/me/entity#1234'), rdflib.URIRef(p_asset.uri), blob1), + (rdflib.URIRef('http://example.com/me/entity#1234'), rdflib.URIRef(p_asset.uri), blob2), + (rdflib.URIRef('http://example.com/me/entity#4321'), rdflib.URIRef(p_asset.uri), blob1), + (rdflib.URIRef('http://example.com/me/entity#4321'), rdflib.URIRef(p_asset.uri), blob2), + })) + # lit.value returns the original bytes value + self.assertSetEqual({lit.value for lit in store._graph.objects(None, rdflib.URIRef(p_asset.uri))}, + {bytes(range(128)), bytes(range(128, 256))}) + ## main ## -- cgit v1.2.3 From 64f3ac76a2f8d6b51380c06233accfcc19dca228 Mon Sep 17 00:00:00 2001 From: Matthias Baumgartner Date: Wed, 8 Feb 2023 20:47:18 +0100 Subject: filter query convenience functions --- bsfs/query/ast/filter_.py | 58 ++++++++++++++++-- test/query/ast_test/test_filter_.py | 118 ++++++++++++++++++++++++++++++++++-- 2 files changed, 165 insertions(+), 11 deletions(-) diff --git a/bsfs/query/ast/filter_.py b/bsfs/query/ast/filter_.py index 798d37f..44490fc 100644 --- a/bsfs/query/ast/filter_.py +++ b/bsfs/query/ast/filter_.py @@ -31,10 +31,7 @@ from collections import abc import typing # bsfs imports -from bsfs.utils import URI, typename, normalize_args - -# inner-module imports -#from . import utils +from bsfs.utils import URI, errors, typename, normalize_args # exports __all__ : typing.Sequence[str] = ( @@ -460,10 +457,61 @@ class OneOf(PredicateExpression, abc.Collection): def IsIn(*values): # pylint: disable=invalid-name # explicitly mimics an expression """Match any of the given URIs.""" - return Or(Is(value) for value in normalize_args(*values)) + args = normalize_args(*values) + if len(args) == 0: + raise AttributeError('expected at least one value, found none') + if len(args) == 1: + return Is(args[0]) + return Or(Is(value) for value in args) def IsNotIn(*values): # pylint: disable=invalid-name # explicitly mimics an expression """Match none of the given URIs.""" return Not(IsIn(*values)) + +def Between( + lo: float = float('-inf'), + hi: float = float('inf'), + lo_strict: bool = True, + hi_strict: bool = True, + ): + """Match numerical values between *lo* and *hi*. Include bounds if strict is False.""" + if abs(lo) == hi == float('inf'): + raise ValueError('range cannot be INF on both sides') + if lo > hi: + raise ValueError(f'lower bound ({lo}) cannot be less than upper bound ({hi})') + if lo == hi and not lo_strict and not hi_strict: + return Equals(lo) + if lo == hi: # either bound is strict + raise ValueError(f'bounds cannot be equal when either is strict') + if lo != float('-inf') and hi != float('inf'): + return And(GreaterThan(lo, lo_strict), LessThan(hi, hi_strict)) + if lo != float('-inf'): + return GreaterThan(lo, lo_strict) + # hi != float('inf'): + return LessThan(hi, hi_strict) + + +def Includes(*values, approx: bool = False): + """Match any of the given *values*. Uses `Substring` if *approx* is set.""" + args = normalize_args(*values) + cls = Substring if approx else Equals + if len(args) == 0: + raise AttributeError('expected at least one value, found none') + if len(args) == 1: + return cls(args[0]) + return Or(cls(v) for v in args) + + +def Excludes(*values, approx: bool = False): + """Match none of the given *values*. Uses `Substring` if *approx* is set.""" + args = normalize_args(*values) + cls = Substring if approx else Equals + if len(args) == 0: + raise AttributeError('expected at least one value, found none') + if len(args) == 1: + return Not(cls(args[0])) + return Not(Or(cls(v) for v in args)) + + ## EOF ## diff --git a/test/query/ast_test/test_filter_.py b/test/query/ast_test/test_filter_.py index 9eb92e2..39b98f8 100644 --- a/test/query/ast_test/test_filter_.py +++ b/test/query/ast_test/test_filter_.py @@ -20,6 +20,7 @@ from bsfs.query.ast.filter_ import _Value, Is, Equals, Substring, StartsWith, En from bsfs.query.ast.filter_ import _Bounded, LessThan, GreaterThan from bsfs.query.ast.filter_ import Predicate, OneOf from bsfs.query.ast.filter_ import IsIn, IsNotIn +from bsfs.query.ast.filter_ import Includes, Excludes, Between ## code ## @@ -456,13 +457,15 @@ class TestOneOf(unittest.TestCase): self.assertEqual(len(OneOf(Predicate(ns.bse.filesize), Predicate(ns.bse.filename), Predicate(ns.bse.tag))), 3) - def testIsIn(self): + def test_IsIn(self): + # cannot pass zero arguments + self.assertRaises(AttributeError, IsIn) # can pass expressions as arguments self.assertEqual(IsIn('http://example.com/entity#1234', 'http://example.com/entity#4321'), Or(Is('http://example.com/entity#1234'), Is('http://example.com/entity#4321'))) # can pass one expression as argument self.assertEqual(IsIn('http://example.com/entity#1234'), - Or(Is('http://example.com/entity#1234'))) + Is('http://example.com/entity#1234')) # can pass expressions as iterator self.assertEqual(IsIn(iter(('http://example.com/entity#1234', 'http://example.com/entity#4321'))), Or(Is('http://example.com/entity#1234'), Is('http://example.com/entity#4321'))) @@ -477,16 +480,18 @@ class TestOneOf(unittest.TestCase): Or(Is('http://example.com/entity#1234'), Is('http://example.com/entity#4321'))) # can pass one expression as list-like self.assertEqual(IsIn(['http://example.com/entity#1234']), - Or(Is('http://example.com/entity#1234'))) + Is('http://example.com/entity#1234')) - def testIsNotIn(self): + def test_IsNotIn(self): + # cannot pass zero arguments + self.assertRaises(AttributeError, IsNotIn) # can pass expressions as arguments self.assertEqual(IsNotIn('http://example.com/entity#1234', 'http://example.com/entity#4321'), Not(Or(Is('http://example.com/entity#1234'), Is('http://example.com/entity#4321')))) # can pass one expression as argument self.assertEqual(IsNotIn('http://example.com/entity#1234'), - Not(Or(Is('http://example.com/entity#1234')))) + Not(Is('http://example.com/entity#1234'))) # can pass expressions as iterator self.assertEqual(IsNotIn(iter(('http://example.com/entity#1234', 'http://example.com/entity#4321'))), Not(Or(Is('http://example.com/entity#1234'), Is('http://example.com/entity#4321')))) @@ -501,9 +506,110 @@ class TestOneOf(unittest.TestCase): Not(Or(Is('http://example.com/entity#1234'), Is('http://example.com/entity#4321')))) # can pass one expression as list-like self.assertEqual(IsNotIn(['http://example.com/entity#1234']), - Not(Or(Is('http://example.com/entity#1234')))) + Not(Is('http://example.com/entity#1234'))) + def test_Includes(self): + # cannot pass zero arguments + self.assertRaises(AttributeError, Includes) + # can pass expressions as arguments + self.assertEqual(Includes('hello', 'world'), + Or(Equals('hello'), Equals('world'))) + self.assertEqual(Includes('hello', 'world', approx=True), + Or(Substring('hello'), Substring('world'))) + # can pass one expression as argument + self.assertEqual(Includes('hello'), + Equals('hello')) + self.assertEqual(Includes('hello', approx=True), + Substring('hello')) + # can pass expressions as iterator + self.assertEqual(Includes(iter(('hello', 'world'))), + Or(Equals('hello'), Equals('world'))) + self.assertEqual(Includes(iter(('hello', 'world')), approx=True), + Or(Substring('hello'), Substring('world'))) + # can pass expressions as generator + def gen(): + yield 'hello' + yield 'world' + self.assertEqual(Includes(gen()), + Or(Equals('hello'), Equals('world'))) + self.assertEqual(Includes(gen(), approx=True), + Or(Substring('hello'), Substring('world'))) + # can pass expressions as list-like + self.assertEqual(Includes(['hello', 'world']), + Or(Equals('hello'), Equals('world'))) + self.assertEqual(Includes(['hello', 'world'], approx=True), + Or(Substring('hello'), Substring('world'))) + # can pass one expression as list-like + self.assertEqual(Includes(['hello']), + Equals('hello')) + self.assertEqual(Includes(['hello'], approx=True), + Substring('hello')) + + + def test_Excludes(self): + # cannot pass zero arguments + self.assertRaises(AttributeError, Excludes) + # can pass expressions as arguments + self.assertEqual(Excludes('hello', 'world'), + Not(Or(Equals('hello'), Equals('world')))) + self.assertEqual(Excludes('hello', 'world', approx=True), + Not(Or(Substring('hello'), Substring('world')))) + # can pass one expression as argument + self.assertEqual(Excludes('hello'), + Not(Equals('hello'))) + self.assertEqual(Excludes('hello', approx=True), + Not(Substring('hello'))) + # can pass expressions as iterator + self.assertEqual(Excludes(iter(('hello', 'world'))), + Not(Or(Equals('hello'), Equals('world')))) + self.assertEqual(Excludes(iter(('hello', 'world')), approx=True), + Not(Or(Substring('hello'), Substring('world')))) + # can pass expressions as generator + def gen(): + yield 'hello' + yield 'world' + self.assertEqual(Excludes(gen()), + Not(Or(Equals('hello'), Equals('world')))) + self.assertEqual(Excludes(gen(), approx=True), + Not(Or(Substring('hello'), Substring('world')))) + # can pass expressions as list-like + self.assertEqual(Excludes(['hello', 'world']), + Not(Or(Equals('hello'), Equals('world')))) + self.assertEqual(Excludes(['hello', 'world'], approx=True), + Not(Or(Substring('hello'), Substring('world')))) + # can pass one expression as list-like + self.assertEqual(Excludes(['hello']), + Not(Equals('hello'))) + self.assertEqual(Excludes(['hello'], approx=True), + Not(Substring('hello'))) + + + def test_Between(self): + # must specify at least one bound + self.assertRaises(ValueError, Between, float('inf'), float('inf')) + # lower bound must be less than the upper bound + self.assertRaises(ValueError, Between, 321, 123) + # can set a lower bound only + self.assertEqual(Between(123), + GreaterThan(123, strict=True)) + self.assertEqual(Between(123, lo_strict=False), + GreaterThan(123, strict=False)) + # can set an upper bound only + self.assertEqual(Between(hi=123), + LessThan(123, strict=True)) + self.assertEqual(Between(hi=123, hi_strict=False), + LessThan(123, strict=False)) + # can set both bounds + self.assertEqual(Between(123, 321), + And(GreaterThan(123, strict=True), LessThan(321, strict=True))) + self.assertEqual(Between(123, 321, False, False), + And(GreaterThan(123, strict=False), LessThan(321, strict=False))) + # can set identical bounds + self.assertRaises(ValueError, Between, 123, 123) + self.assertEqual(Between(123, 123, False, False), + Equals(123)) + ## main ## -- cgit v1.2.3 From f31a0d005785d474a37ec769c1f7f5e27aa08a57 Mon Sep 17 00:00:00 2001 From: Matthias Baumgartner Date: Wed, 8 Feb 2023 21:08:24 +0100 Subject: minor comments --- bsfs/graph/nodes.py | 2 ++ bsfs/graph/resolve.py | 1 + bsfs/graph/result.py | 2 ++ bsfs/graph/walk.py | 4 ++-- bsfs/query/ast/filter_.py | 17 +++++++++-------- bsfs/query/validator.py | 1 + 6 files changed, 17 insertions(+), 10 deletions(-) diff --git a/bsfs/graph/nodes.py b/bsfs/graph/nodes.py index 9990714..bc71a32 100644 --- a/bsfs/graph/nodes.py +++ b/bsfs/graph/nodes.py @@ -199,6 +199,7 @@ class Nodes(): """Get values or nodes at *paths*. Return an iterator (view=list) or a dict (view=dict) over the results. """ + # FIXME: user-provided Fetch query AST? # check args if len(paths) == 0: raise AttributeError('expected at least one path, found none') @@ -345,6 +346,7 @@ class Nodes(): elif isinstance(pred.range, bsc.Node): # check value type + # FIXME: value could be a set of Nodes if not isinstance(value, Nodes): raise TypeError(value) # value's node_type must be a subclass of the predicate's range diff --git a/bsfs/graph/resolve.py b/bsfs/graph/resolve.py index 00b778b..4677401 100644 --- a/bsfs/graph/resolve.py +++ b/bsfs/graph/resolve.py @@ -41,6 +41,7 @@ class Filter(): self.schema = schema def __call__(self, root_type: bsc.Node, node: ast.filter.FilterExpression): + # FIXME: node can be None! return self._parse_filter_expression(root_type, node) def _parse_filter_expression( diff --git a/bsfs/graph/result.py b/bsfs/graph/result.py index 00607f4..31822f1 100644 --- a/bsfs/graph/result.py +++ b/bsfs/graph/result.py @@ -109,10 +109,12 @@ def to_dict_view( # FIXME: Combine multiple Nodes instances into one? # convert defaultdict to ordinary dict + # pylint: disable=too-many-boolean-expressions if not node and not path and not value \ and len(unique_paths) > 0 and one_node and one_path \ and len(data) == 0: return default + # pylint: enable=too-many-boolean-expressions if not node and not path: return data if node ^ path: diff --git a/bsfs/graph/walk.py b/bsfs/graph/walk.py index 63ef5e9..1b1cfa0 100644 --- a/bsfs/graph/walk.py +++ b/bsfs/graph/walk.py @@ -88,9 +88,9 @@ class Walk(abc.Hashable, abc.Callable): # type: ignore [misc] # invalid base cla if pred.uri.get('fragment', None) == name ) if len(predicates) == 0: # no fragment found for name - raise ValueError(f'no available predicate matches {name}') + raise ValueError(f'no available predicate matches {name}') # FIXME: Custom exception if len(predicates) > 1: # ambiguous name - raise ValueError(f'{name} matches multiple predicates') + raise ValueError(f'{name} matches multiple predicates') # FIXME: Custom exception # append predicate to walk return predicates # type: ignore [return-value] # size is one diff --git a/bsfs/query/ast/filter_.py b/bsfs/query/ast/filter_.py index 44490fc..b29d89e 100644 --- a/bsfs/query/ast/filter_.py +++ b/bsfs/query/ast/filter_.py @@ -31,7 +31,7 @@ from collections import abc import typing # bsfs imports -from bsfs.utils import URI, errors, typename, normalize_args +from bsfs.utils import URI, typename, normalize_args # exports __all__ : typing.Sequence[str] = ( @@ -454,8 +454,9 @@ class OneOf(PredicateExpression, abc.Collection): # Helpers +# invalid-name is disabled since they explicitly mimic an expression -def IsIn(*values): # pylint: disable=invalid-name # explicitly mimics an expression +def IsIn(*values) -> FilterExpression: # pylint: disable=invalid-name """Match any of the given URIs.""" args = normalize_args(*values) if len(args) == 0: @@ -464,17 +465,17 @@ def IsIn(*values): # pylint: disable=invalid-name # explicitly mimics an express return Is(args[0]) return Or(Is(value) for value in args) -def IsNotIn(*values): # pylint: disable=invalid-name # explicitly mimics an expression +def IsNotIn(*values) -> FilterExpression: # pylint: disable=invalid-name """Match none of the given URIs.""" return Not(IsIn(*values)) -def Between( +def Between( # pylint: disable=invalid-name lo: float = float('-inf'), hi: float = float('inf'), lo_strict: bool = True, hi_strict: bool = True, - ): + ) -> FilterExpression : """Match numerical values between *lo* and *hi*. Include bounds if strict is False.""" if abs(lo) == hi == float('inf'): raise ValueError('range cannot be INF on both sides') @@ -483,7 +484,7 @@ def Between( if lo == hi and not lo_strict and not hi_strict: return Equals(lo) if lo == hi: # either bound is strict - raise ValueError(f'bounds cannot be equal when either is strict') + raise ValueError('bounds cannot be equal when either is strict') if lo != float('-inf') and hi != float('inf'): return And(GreaterThan(lo, lo_strict), LessThan(hi, hi_strict)) if lo != float('-inf'): @@ -492,7 +493,7 @@ def Between( return LessThan(hi, hi_strict) -def Includes(*values, approx: bool = False): +def Includes(*values, approx: bool = False) -> FilterExpression: # pylint: disable=invalid-name """Match any of the given *values*. Uses `Substring` if *approx* is set.""" args = normalize_args(*values) cls = Substring if approx else Equals @@ -503,7 +504,7 @@ def Includes(*values, approx: bool = False): return Or(cls(v) for v in args) -def Excludes(*values, approx: bool = False): +def Excludes(*values, approx: bool = False) -> FilterExpression: # pylint: disable=invalid-name """Match none of the given *values*. Uses `Substring` if *approx* is set.""" args = normalize_args(*values) cls = Substring if approx else Equals diff --git a/bsfs/query/validator.py b/bsfs/query/validator.py index 9fbff12..f0aa795 100644 --- a/bsfs/query/validator.py +++ b/bsfs/query/validator.py @@ -20,6 +20,7 @@ __all__ : typing.Sequence[str] = ( 'Filter', ) +# FIXME: Split into a submodule and the two classes into their own respective files. ## code ## -- cgit v1.2.3 From c0218a8dffcdc3a7a5568f66bb959139fe514ad5 Mon Sep 17 00:00:00 2001 From: Matthias Baumgartner Date: Wed, 8 Feb 2023 21:14:36 +0100 Subject: Graph.all to retrieve all nodes --- bsfs/graph/graph.py | 8 ++++++++ test/graph/test_graph.py | 13 +++++++++++++ 2 files changed, 21 insertions(+) diff --git a/bsfs/graph/graph.py b/bsfs/graph/graph.py index 2210755..df2e3a5 100644 --- a/bsfs/graph/graph.py +++ b/bsfs/graph/graph.py @@ -133,4 +133,12 @@ class Graph(): # return Nodes instance return _nodes.Nodes(self._backend, self._user, type_, guids) + def all(self, node_type: URI) -> _nodes.Nodes: + """Return all instances of type *node_type*.""" + # get node type + type_ = self.schema.node(node_type) + guids = self._backend.get(type_, None) # no need to materialize + return _nodes.Nodes(self._backend, self._user, type_, guids) + + ## EOF ## diff --git a/test/graph/test_graph.py b/test/graph/test_graph.py index f97783b..5db1fd2 100644 --- a/test/graph/test_graph.py +++ b/test/graph/test_graph.py @@ -95,6 +95,19 @@ class TestGraph(unittest.TestCase): # node_type must be in the schema self.assertRaises(KeyError, graph.nodes, ns.bsfs.Invalid, guids) + def test_all(self): + graph = Graph(self.backend, self.user) + # resulting nodes can be empty + self.assertEqual(graph.all(ns.bsfs.Entity), + Nodes(self.backend, self.user, graph.schema.node(ns.bsfs.Entity), set())) + # resulting nodes contains all nodes of the respective type + guids = {URI('http://example.com/me/entity#1234'), URI('http://example.com/me/entity#4321')} + self.backend.create(graph.schema.node(ns.bsfs.Entity), guids) + self.assertEqual(graph.all(ns.bsfs.Entity), + Nodes(self.backend, self.user, graph.schema.node(ns.bsfs.Entity), guids)) + # node_type must be in the schema + self.assertRaises(KeyError, graph.all, ns.bsfs.Invalid) + def test_migrate(self): # setup graph = Graph(self.backend, self.user) -- cgit v1.2.3 From f9eec185bf3d857c220e5d78de75ec6713437330 Mon Sep 17 00:00:00 2001 From: Matthias Baumgartner Date: Wed, 1 Mar 2023 12:39:42 +0100 Subject: Construct Graph and Nodes with AC instead of user --- bsfs/front/builder.py | 6 +- bsfs/graph/ac/base.py | 16 ++++- bsfs/graph/graph.py | 35 +++++++---- bsfs/graph/nodes.py | 41 ++++++------ test/front/test_bsfs.py | 3 +- test/front/test_builder.py | 3 +- test/graph/ac/test_base.py | 83 +++++++++++++++++++++++++ test/graph/ac/test_null.py | 30 +++++++++ test/graph/test_graph.py | 74 +++++++++++----------- test/graph/test_nodes.py | 151 +++++++++++++++++++++++---------------------- test/graph/test_walk.py | 4 +- 11 files changed, 297 insertions(+), 149 deletions(-) create mode 100644 test/graph/ac/test_base.py diff --git a/bsfs/front/builder.py b/bsfs/front/builder.py index 73f1703..ecdc768 100644 --- a/bsfs/front/builder.py +++ b/bsfs/front/builder.py @@ -8,7 +8,7 @@ Author: Matthias Baumgartner, 2022 import typing # bsfs imports -from bsfs.graph import Graph +from bsfs.graph import Graph, ac from bsfs.triple_store import TripleStoreBase, SparqlStore from bsfs.utils import URI, errors @@ -68,8 +68,10 @@ def build_graph(cfg: typing.Any) -> Graph: if 'backend' not in args: raise errors.ConfigError('required argument "backend" is not provided') backend = build_backend(args['backend']) + # build access controls + access_controls = ac.NullAC(backend, user) # build and return graph cls = _graph_classes[name] - return cls(backend, user) + return cls(backend, access_controls) ## EOF ## diff --git a/bsfs/graph/ac/base.py b/bsfs/graph/ac/base.py index 79b09e5..0b9f988 100644 --- a/bsfs/graph/ac/base.py +++ b/bsfs/graph/ac/base.py @@ -12,7 +12,7 @@ import typing from bsfs import schema from bsfs.query import ast from bsfs.triple_store import TripleStoreBase -from bsfs.utils import URI +from bsfs.utils import URI, typename # exports __all__: typing.Sequence[str] = ( @@ -44,6 +44,20 @@ class AccessControlBase(abc.ABC): self._backend = backend self._user = URI(user) + def __str__(self) -> str: + return f'{typename(self)}({self._user})' + + def __repr__(self) -> str: + return f'{typename(self)}({self._user})' + + def __eq__(self, other: typing.Any) -> bool: + return isinstance(other, type(self)) \ + and self._backend == other._backend \ + and self._user == other._user + + def __hash__(self) -> int: + return hash((type(self), self._backend, self._user)) + @abc.abstractmethod def is_protected_predicate(self, pred: schema.Predicate) -> bool: """Return True if a predicate cannot be modified manually.""" diff --git a/bsfs/graph/graph.py b/bsfs/graph/graph.py index df2e3a5..a74da01 100644 --- a/bsfs/graph/graph.py +++ b/bsfs/graph/graph.py @@ -40,31 +40,42 @@ class Graph(): # link to the triple storage backend. _backend: TripleStoreBase - # user uri. - _user: URI + # access controls. + _ac: ac.AccessControlBase - def __init__(self, backend: TripleStoreBase, user: URI): + # query resolver. + _resolver: resolve.Filter + + # query validator. + _validate: validate.Filter + + def __init__( + self, + backend: TripleStoreBase, + access_control: ac.AccessControlBase, + ): + # store members self._backend = backend - self._user = user + self._ac = access_control + # helper classes self._resolver = resolve.Filter(self._backend.schema) self._validate = validate.Filter(self._backend.schema) - self._ac = ac.NullAC(self._backend, self._user) # ensure Graph schema requirements self.migrate(self._backend.schema) def __hash__(self) -> int: - return hash((type(self), self._backend, self._user)) + return hash((type(self), self._backend, self._ac)) def __eq__(self, other) -> bool: return isinstance(other, type(self)) \ and self._backend == other._backend \ - and self._user == other._user + and self._ac == other._ac def __repr__(self) -> str: - return f'{typename(self)}(backend={repr(self._backend)}, user={self._user})' + return f'{typename(self)}({repr(self._backend)}, {self._ac})' def __str__(self) -> str: - return f'{typename(self)}({str(self._backend)}, {self._user})' + return f'{typename(self)}({str(self._backend)})' @property def schema(self) -> bsc.Schema: @@ -106,7 +117,7 @@ class Graph(): """ type_ = self.schema.node(node_type) # NOTE: Nodes constructor materializes guids. - return _nodes.Nodes(self._backend, self._user, type_, guids) + return _nodes.Nodes(self._backend, self._ac, type_, guids) def node(self, node_type: URI, guid: URI) -> _nodes.Nodes: """Return node *guid* of type *node_type* as a `bsfs.graph.Nodes` instance. @@ -131,14 +142,14 @@ class Graph(): # query the backend guids = self._backend.get(type_, query) # no need to materialize # return Nodes instance - return _nodes.Nodes(self._backend, self._user, type_, guids) + return _nodes.Nodes(self._backend, self._ac, type_, guids) def all(self, node_type: URI) -> _nodes.Nodes: """Return all instances of type *node_type*.""" # get node type type_ = self.schema.node(node_type) guids = self._backend.get(type_, None) # no need to materialize - return _nodes.Nodes(self._backend, self._user, type_, guids) + return _nodes.Nodes(self._backend, self._ac, type_, guids) ## EOF ## diff --git a/bsfs/graph/nodes.py b/bsfs/graph/nodes.py index bc71a32..91cbb5d 100644 --- a/bsfs/graph/nodes.py +++ b/bsfs/graph/nodes.py @@ -37,8 +37,8 @@ class Nodes(): # triple store backend. _backend: TripleStoreBase - # user uri. - _user: URI + # access controls. + _ac: ac.AccessControlBase # node type. _node_type: bsc.Node @@ -49,31 +49,30 @@ class Nodes(): def __init__( self, backend: TripleStoreBase, - user: URI, + access_control: ac.AccessControlBase, node_type: bsc.Node, guids: typing.Iterable[URI], ): # set main members self._backend = backend - self._user = user + self._ac = access_control self._node_type = node_type self._guids = set(guids) # create helper instances # FIXME: Assumes that the schema does not change while the instance is in use! - self._ac = ac.NullAC(self._backend, self._user) def __eq__(self, other: typing.Any) -> bool: return isinstance(other, Nodes) \ and self._backend == other._backend \ - and self._user == other._user \ + and self._ac == other._ac \ and self._node_type == other._node_type \ and self._guids == other._guids def __hash__(self) -> int: - return hash((type(self), self._backend, self._user, self._node_type, tuple(sorted(self._guids)))) + return hash((type(self), self._backend, self._ac, self._node_type, tuple(sorted(self._guids)))) def __repr__(self) -> str: - return f'{typename(self)}({self._backend}, {self._user}, {self._node_type}, {self._guids})' + return f'{typename(self)}({self._backend}, {self._ac}, {self._node_type}, {self._guids})' def __str__(self) -> str: return f'{typename(self)}({self._node_type}, {self._guids})' @@ -94,44 +93,44 @@ class Nodes(): return self._backend.schema def __add__(self, other: typing.Any) -> 'Nodes': - """Concatenate guids. Backend, user, and node type must match.""" + """Concatenate guids. Backend, AC, and node type must match.""" if not isinstance(other, type(self)): return NotImplemented if self._backend != other._backend: raise ValueError(other) - if self._user != other._user: + if self._ac != other._ac: raise ValueError(other) if self.node_type != other.node_type: raise ValueError(other) - return Nodes(self._backend, self._user, self.node_type, self._guids | other._guids) + return Nodes(self._backend, self._ac, self.node_type, self._guids | other._guids) def __or__(self, other: typing.Any) -> 'Nodes': - """Concatenate guids. Backend, user, and node type must match.""" + """Concatenate guids. Backend, AC, and node type must match.""" return self.__add__(other) def __sub__(self, other: typing.Any) -> 'Nodes': - """Subtract guids. Backend, user, and node type must match.""" + """Subtract guids. Backend, AC, and node type must match.""" if not isinstance(other, type(self)): return NotImplemented if self._backend != other._backend: raise ValueError(other) - if self._user != other._user: + if self._ac != other._ac: raise ValueError(other) if self.node_type != other.node_type: raise ValueError(other) - return Nodes(self._backend, self._user, self.node_type, self._guids - other._guids) + return Nodes(self._backend, self._ac, self.node_type, self._guids - other._guids) def __and__(self, other: typing.Any) -> 'Nodes': - """Intersect guids. Backend, user, and node type must match.""" + """Intersect guids. Backend, AC, and node type must match.""" if not isinstance(other, type(self)): return NotImplemented if self._backend != other._backend: raise ValueError(other) - if self._user != other._user: + if self._ac != other._ac: raise ValueError(other) if self.node_type != other.node_type: raise ValueError(other) - return Nodes(self._backend, self._user, self.node_type, self._guids & other._guids) + return Nodes(self._backend, self._ac, self.node_type, self._guids & other._guids) def __len__(self) -> int: """Return the number of guids.""" @@ -140,7 +139,7 @@ class Nodes(): def __iter__(self) -> typing.Iterator['Nodes']: """Iterate over individual guids. Returns `Nodes` instances.""" return iter( - Nodes(self._backend, self._user, self.node_type, {guid}) + Nodes(self._backend, self._ac, self.node_type, {guid}) for guid in self._guids ) @@ -266,12 +265,12 @@ class Nodes(): # process triples for root, name, raw in triples: # get node - node = Nodes(self._backend, self._user, self.node_type, {root}) + node = Nodes(self._backend, self._ac, self.node_type, {root}) # get path path, tail = name2path[name] # covert raw to value if isinstance(tail.range, bsc.Node): - value = Nodes(self._backend, self._user, tail.range, {raw}) + value = Nodes(self._backend, self._ac, tail.range, {raw}) else: value = raw # emit triple diff --git a/test/front/test_bsfs.py b/test/front/test_bsfs.py index 0d7f383..4eb36c3 100644 --- a/test/front/test_bsfs.py +++ b/test/front/test_bsfs.py @@ -9,6 +9,7 @@ import unittest # bsie imports from bsfs.graph import Graph +from bsfs.graph.ac import NullAC from bsfs.triple_store import SparqlStore from bsfs.utils import errors, URI @@ -25,7 +26,7 @@ class TestBSFS(unittest.TestCase): graph = Open(config) self.assertIsInstance(graph, Graph) self.assertIsInstance(graph._backend, SparqlStore) - self.assertEqual(graph._user, URI('http://example.com/me')) + self.assertEqual(graph._ac, NullAC(graph._backend, URI('http://example.com/me'))) # invalid config raises an error self.assertRaises(errors.ConfigError, Open, {}) diff --git a/test/front/test_builder.py b/test/front/test_builder.py index 08f2027..0328a0a 100644 --- a/test/front/test_builder.py +++ b/test/front/test_builder.py @@ -9,6 +9,7 @@ import unittest # bsie imports from bsfs.graph import Graph +from bsfs.graph.ac import NullAC from bsfs.triple_store import SparqlStore from bsfs.utils import errors, URI @@ -40,7 +41,7 @@ class TestBuilder(unittest.TestCase): graph = build_graph({'Graph': {'backend': {'SparqlStore': {}}, 'user': 'http://example.com/me'}}) self.assertIsInstance(graph, Graph) self.assertIsInstance(graph._backend, SparqlStore) - self.assertEqual(graph._user, URI('http://example.com/me')) + self.assertEqual(graph._ac, NullAC(graph._backend, URI('http://example.com/me'))) # cannot create an invalid graph self.assertRaises(errors.ConfigError, build_graph, {'MyGraph': {}}) # must pass a dict diff --git a/test/graph/ac/test_base.py b/test/graph/ac/test_base.py new file mode 100644 index 0000000..ad24e3d --- /dev/null +++ b/test/graph/ac/test_base.py @@ -0,0 +1,83 @@ +""" + +Part of the bsfs test suite. +A copy of the license is provided with the project. +Author: Matthias Baumgartner, 2022 +""" +# imports +import unittest + +# bsie imports +from bsfs import schema as bsc +from bsfs.namespace import ns +from bsfs.query import ast +from bsfs.triple_store import SparqlStore +from bsfs.utils import URI + +# objects to test +from bsfs.graph.ac.base import AccessControlBase + + +## code ## + +class StubAC(AccessControlBase): + def is_protected_predicate(self, pred): + pass + def create(self, node_type, guids): + pass + def link_from_node(self, node_type, guids): + pass + def link_to_node(self, node_type, guids): + pass + def write_literal(self, node_type, guids): + pass + def createable(self, node_type, guids): + pass + def filter_read(self, node_type, query): + pass + def fetch_read(self, node_type, query): + pass + + +class TestAccessControlBase(unittest.TestCase): + def setUp(self): + self.backend = SparqlStore() + self.user = URI('http://www.example.com/me') + + def test_essentials(self): + ac = StubAC(self.backend, self.user) + # equal construction means equal instance + self.assertEqual(StubAC(self.backend, self.user), StubAC(self.backend, self.user)) + self.assertEqual(hash(StubAC(self.backend, self.user)), hash(StubAC(self.backend, self.user))) + self.assertEqual(ac, StubAC(self.backend, self.user)) + self.assertEqual(hash(ac), hash(StubAC(self.backend, self.user))) + # equivalence respects type + class Foo(): pass + self.assertNotEqual(ac, 1234) + self.assertNotEqual(hash(ac), hash(1234)) + self.assertNotEqual(ac, 'hello') + self.assertNotEqual(hash(ac), hash('hello')) + self.assertNotEqual(ac, Foo()) + self.assertNotEqual(hash(ac), hash(Foo())) + # equivalence respects backend + self.assertNotEqual(ac, StubAC(SparqlStore(), self.user)) + self.assertNotEqual(hash(ac), hash(StubAC(SparqlStore(), self.user))) + # equivalence respects user + self.assertNotEqual(ac, StubAC(self.backend, URI('http://www.example.com/you'))) + self.assertNotEqual(hash(ac), hash(StubAC(self.backend, URI('http://www.example.com/you')))) + # string conversion + self.assertEqual(str(ac), f'StubAC({self.user})') + self.assertEqual(repr(ac), f'StubAC({self.user})') + # string conversion respects user + self.assertEqual(str(StubAC(self.backend, URI('http://www.example.com/you'))), + f'StubAC(http://www.example.com/you)') + self.assertEqual(repr(StubAC(self.backend, URI('http://www.example.com/you'))), + f'StubAC(http://www.example.com/you)') + + +## main ## + +if __name__ == '__main__': + unittest.main() + +## EOF ## diff --git a/test/graph/ac/test_null.py b/test/graph/ac/test_null.py index e35852d..7d25980 100644 --- a/test/graph/ac/test_null.py +++ b/test/graph/ac/test_null.py @@ -68,6 +68,36 @@ class TestNullAC(unittest.TestCase): self.ent_type = self.backend.schema.node(ns.bsfs.Entity) self.ent_ids = {URI('http://www.example.com/me/entity#1234'), URI('http://www.example.com/me/entity#4321')} + def test_essentials(self): + ac = NullAC(self.backend, self.user) + # equal construction means equal instance + self.assertEqual(NullAC(self.backend, self.user), NullAC(self.backend, self.user)) + self.assertEqual(hash(NullAC(self.backend, self.user)), hash(NullAC(self.backend, self.user))) + self.assertEqual(ac, NullAC(self.backend, self.user)) + self.assertEqual(hash(ac), hash(NullAC(self.backend, self.user))) + # equivalence respects type + class Foo(): pass + self.assertNotEqual(ac, 1234) + self.assertNotEqual(hash(ac), hash(1234)) + self.assertNotEqual(ac, 'hello') + self.assertNotEqual(hash(ac), hash('hello')) + self.assertNotEqual(ac, Foo()) + self.assertNotEqual(hash(ac), hash(Foo())) + # equivalence respects backend + self.assertNotEqual(ac, NullAC(SparqlStore(), self.user)) + self.assertNotEqual(hash(ac), hash(NullAC(SparqlStore(), self.user))) + # equivalence respects user + self.assertNotEqual(ac, NullAC(self.backend, URI('http://www.example.com/you'))) + self.assertNotEqual(hash(ac), hash(NullAC(self.backend, URI('http://www.example.com/you')))) + # string conversion + self.assertEqual(str(ac), f'NullAC({self.user})') + self.assertEqual(repr(ac), f'NullAC({self.user})') + # string conversion respects user + self.assertEqual(str(NullAC(self.backend, URI('http://www.example.com/you'))), + f'NullAC(http://www.example.com/you)') + self.assertEqual(repr(NullAC(self.backend, URI('http://www.example.com/you'))), + f'NullAC(http://www.example.com/you)') + def test_is_protected_predicate(self): ac = NullAC(self.backend, self.user) self.assertTrue(ac.is_protected_predicate(self.p_created)) diff --git a/test/graph/test_graph.py b/test/graph/test_graph.py index 5db1fd2..d89d346 100644 --- a/test/graph/test_graph.py +++ b/test/graph/test_graph.py @@ -9,6 +9,7 @@ import unittest # bsie imports from bsfs import schema +from bsfs.graph.ac import NullAC from bsfs.graph.nodes import Nodes from bsfs.namespace import ns from bsfs.query import ast @@ -23,94 +24,95 @@ from bsfs.graph.graph import Graph class TestGraph(unittest.TestCase): def setUp(self): - self.user = URI('http://example.com/me') self.backend = SparqlStore.Open() self.backend.schema = schema.from_string(''' prefix rdfs: prefix bsfs: bsfs:Entity rdfs:subClassOf bsfs:Node . ''') + self.user = URI('http://example.com/me') + self.ac = NullAC(self.backend, self.user) def test_str(self): - self.assertEqual(str(Graph(self.backend, self.user)), - 'Graph(SparqlStore(uri=None), http://example.com/me)') - self.assertEqual(repr(Graph(self.backend, self.user)), - 'Graph(backend=SparqlStore(uri=None), user=http://example.com/me)') + self.assertEqual(str(Graph(self.backend, self.ac)), + 'Graph(SparqlStore(uri=None))') + self.assertEqual(repr(Graph(self.backend, self.ac)), + 'Graph(SparqlStore(uri=None), NullAC(http://example.com/me))') # str respects backend class Foo(SparqlStore): pass - self.assertEqual(str(Graph(Foo.Open(), self.user)), - 'Graph(Foo(uri=None), http://example.com/me)') - self.assertEqual(repr(Graph(Foo.Open(), self.user)), - 'Graph(backend=Foo(uri=None), user=http://example.com/me)') + self.assertEqual(str(Graph(Foo.Open(), self.ac)), + 'Graph(Foo(uri=None))') + self.assertEqual(repr(Graph(Foo.Open(), self.ac)), + 'Graph(Foo(uri=None), NullAC(http://example.com/me))') # str respect user - self.assertEqual(str(Graph(self.backend, URI('http://example.com/you'))), - 'Graph(SparqlStore(uri=None), http://example.com/you)') - self.assertEqual(repr(Graph(self.backend, URI('http://example.com/you'))), - 'Graph(backend=SparqlStore(uri=None), user=http://example.com/you)') + self.assertEqual(str(Graph(self.backend, NullAC(self.backend, URI('http://example.com/you')))), + 'Graph(SparqlStore(uri=None))') + self.assertEqual(repr(Graph(self.backend, NullAC(self.backend, URI('http://example.com/you')))), + 'Graph(SparqlStore(uri=None), NullAC(http://example.com/you))') # str respects type class Bar(Graph): pass - self.assertEqual(str(Bar(self.backend, self.user)), - 'Bar(SparqlStore(uri=None), http://example.com/me)') - self.assertEqual(repr(Bar(self.backend, self.user)), - 'Bar(backend=SparqlStore(uri=None), user=http://example.com/me)') + self.assertEqual(str(Bar(self.backend, self.ac)), + 'Bar(SparqlStore(uri=None))') + self.assertEqual(repr(Bar(self.backend, self.ac)), + 'Bar(SparqlStore(uri=None), NullAC(http://example.com/me))') def test_equality(self): - graph = Graph(self.backend, self.user) + graph = Graph(self.backend, self.ac) # instance is equal to itself self.assertEqual(graph, graph) self.assertEqual(hash(graph), hash(graph)) # instance is equal to a clone - self.assertEqual(graph, Graph(self.backend, self.user)) - self.assertEqual(hash(graph), hash(Graph(self.backend, self.user))) + self.assertEqual(graph, Graph(self.backend, self.ac)) + self.assertEqual(hash(graph), hash(Graph(self.backend, self.ac))) # equality respects backend - self.assertNotEqual(graph, Graph(SparqlStore.Open(), self.user)) - self.assertNotEqual(hash(graph), hash(Graph(SparqlStore.Open(), self.user))) + self.assertNotEqual(graph, Graph(SparqlStore.Open(), self.ac)) + self.assertNotEqual(hash(graph), hash(Graph(SparqlStore.Open(), self.ac))) # equality respects user self.assertNotEqual(graph, Graph(self.backend, URI('http://example.com/you'))) self.assertNotEqual(hash(graph), hash(Graph(self.backend, URI('http://example.com/you')))) def test_essentials(self): - graph = Graph(self.backend, self.user) + graph = Graph(self.backend, self.ac) # schema self.assertEqual(graph.schema, self.backend.schema) self.assertRaises(AttributeError, setattr, graph, 'schema', None) def test_node(self): - graph = Graph(self.backend, self.user) + graph = Graph(self.backend, self.ac) guid = URI('http://example.com/me/entity#1234') # returns a Nodes instance self.assertEqual( graph.node(ns.bsfs.Entity, guid), - Nodes(self.backend, self.user, graph.schema.node(ns.bsfs.Entity), {guid})) + Nodes(self.backend, self.ac, graph.schema.node(ns.bsfs.Entity), {guid})) # node_type must be in the schema self.assertRaises(KeyError, graph.node, ns.bsfs.Invalid, guid) def test_nodes(self): - graph = Graph(self.backend, self.user) + graph = Graph(self.backend, self.ac) guids = {URI('http://example.com/me/entity#1234'), URI('http://example.com/me/entity#4321')} # returns a Nodes instance self.assertEqual( graph.nodes(ns.bsfs.Entity, guids), - Nodes(self.backend, self.user, graph.schema.node(ns.bsfs.Entity), guids)) + Nodes(self.backend, self.ac, graph.schema.node(ns.bsfs.Entity), guids)) # node_type must be in the schema self.assertRaises(KeyError, graph.nodes, ns.bsfs.Invalid, guids) def test_all(self): - graph = Graph(self.backend, self.user) + graph = Graph(self.backend, self.ac) # resulting nodes can be empty self.assertEqual(graph.all(ns.bsfs.Entity), - Nodes(self.backend, self.user, graph.schema.node(ns.bsfs.Entity), set())) + Nodes(self.backend, self.ac, graph.schema.node(ns.bsfs.Entity), set())) # resulting nodes contains all nodes of the respective type guids = {URI('http://example.com/me/entity#1234'), URI('http://example.com/me/entity#4321')} self.backend.create(graph.schema.node(ns.bsfs.Entity), guids) self.assertEqual(graph.all(ns.bsfs.Entity), - Nodes(self.backend, self.user, graph.schema.node(ns.bsfs.Entity), guids)) + Nodes(self.backend, self.ac, graph.schema.node(ns.bsfs.Entity), guids)) # node_type must be in the schema self.assertRaises(KeyError, graph.all, ns.bsfs.Invalid) def test_migrate(self): # setup - graph = Graph(self.backend, self.user) + graph = Graph(self.backend, self.ac) # argument must be a schema class Foo(): pass @@ -162,10 +164,10 @@ class TestGraph(unittest.TestCase): prefix bsfs: prefix bsm: bsfs:Number rdfs:subClassOf bsfs:Literal . - xsd:integer rdfs:subClassOf bsfs:Number . + xsd:float rdfs:subClassOf bsfs:Number . bsm:t_created rdfs:subClassOf bsfs:Predicate ; rdfs:domain bsfs:Node ; - rdfs:range xsd:integer ; + rdfs:range xsd:float ; bsfs:unique "true"^^xsd:boolean . ''')) @@ -203,16 +205,16 @@ class TestGraph(unittest.TestCase): prefix bsfs: prefix bsm: bsfs:Number rdfs:subClassOf bsfs:Literal . - xsd:integer rdfs:subClassOf bsfs:Number . + xsd:float rdfs:subClassOf bsfs:Number . bsm:t_created rdfs:subClassOf bsfs:Predicate ; rdfs:domain bsfs:Node ; - rdfs:range xsd:integer ; + rdfs:range xsd:float ; bsfs:unique "true"^^xsd:boolean . ''')) def test_get(self): # setup - graph = Graph(self.backend, self.user) + graph = Graph(self.backend, self.ac) graph.migrate(schema.from_string(''' prefix rdfs: prefix xsd: diff --git a/test/graph/test_nodes.py b/test/graph/test_nodes.py index 6bb3ef3..9541656 100644 --- a/test/graph/test_nodes.py +++ b/test/graph/test_nodes.py @@ -14,6 +14,7 @@ import rdflib # bsie imports from bsfs import schema as bsc +from bsfs.graph.ac import NullAC from bsfs.graph.walk import Walk from bsfs.namespace import Namespace, ns from bsfs.triple_store.sparql import SparqlStore @@ -107,6 +108,7 @@ class TestNodes(unittest.TestCase): } # Nodes constructor args self.user = URI('http://example.com/me') + self.ac = NullAC(self.backend, self.user) # set args self.tag_type = self.backend.schema.node(ns.bsfs.Tag) self.ent_type = self.backend.schema.node(ns.bsfs.Entity) @@ -128,65 +130,65 @@ class TestNodes(unittest.TestCase): def test_str(self): # str baseline - nodes = Nodes(self.backend, self.user, self.ent_type, self.ent_ids) + nodes = Nodes(self.backend, self.ac, self.ent_type, self.ent_ids) self.assertEqual(str(nodes), f'Nodes({self.ent_type}, {self.ent_ids})') - self.assertEqual(repr(nodes), f'Nodes({self.backend}, {self.user}, {self.ent_type}, {self.ent_ids})') + self.assertEqual(repr(nodes), f'Nodes({self.backend}, {self.ac}, {self.ent_type}, {self.ent_ids})') # str respects node_type - nodes = Nodes(self.backend, self.user, self.tag_type, self.tag_ids) + nodes = Nodes(self.backend, self.ac, self.tag_type, self.tag_ids) self.assertEqual(str(nodes), f'Nodes({self.tag_type}, {self.tag_ids})') - self.assertEqual(repr(nodes), f'Nodes({self.backend}, {self.user}, {self.tag_type}, {self.tag_ids})') + self.assertEqual(repr(nodes), f'Nodes({self.backend}, {self.ac}, {self.tag_type}, {self.tag_ids})') # str respects guids - nodes = Nodes(self.backend, self.user, self.ent_type, {URI('http://example.com/me/entity#foo')}) + nodes = Nodes(self.backend, self.ac, self.ent_type, {URI('http://example.com/me/entity#foo')}) self.assertEqual(str(nodes), f'Nodes({self.ent_type}, {{\'http://example.com/me/entity#foo\'}})') - self.assertEqual(repr(nodes), f'Nodes({self.backend}, {self.user}, {self.ent_type}, {{\'http://example.com/me/entity#foo\'}})') + self.assertEqual(repr(nodes), f'Nodes({self.backend}, {self.ac}, {self.ent_type}, {{\'http://example.com/me/entity#foo\'}})') # repr respects backend class Foo(SparqlStore): pass backend = Foo.Open() backend.schema = self.backend.schema - nodes = Nodes(backend, self.user, self.ent_type, self.ent_ids) - self.assertEqual(repr(nodes), f'Nodes({backend}, {self.user}, {self.ent_type}, {self.ent_ids})') + nodes = Nodes(backend, self.ac, self.ent_type, self.ent_ids) + self.assertEqual(repr(nodes), f'Nodes({backend}, {self.ac}, {self.ent_type}, {self.ent_ids})') # repr respects user - nodes = Nodes(self.backend, URI('http://example.com/you'), self.ent_type, self.ent_ids) - self.assertEqual(repr(nodes), f'Nodes({self.backend}, http://example.com/you, {self.ent_type}, {self.ent_ids})') + nodes = Nodes(self.backend, NullAC(self.backend, URI('http://example.com/you')), self.ent_type, self.ent_ids) + self.assertEqual(repr(nodes), f'Nodes({self.backend}, NullAC(http://example.com/you), {self.ent_type}, {self.ent_ids})') def test_equality(self): - nodes = Nodes(self.backend, self.user, self.ent_type, self.ent_ids) + nodes = Nodes(self.backend, self.ac, self.ent_type, self.ent_ids) # instance is equal to itself self.assertEqual(nodes, nodes) self.assertEqual(hash(nodes), hash(nodes)) # instance is equal to a clone - self.assertEqual(nodes, Nodes(self.backend, self.user, self.ent_type, self.ent_ids)) - self.assertEqual(Nodes(self.backend, self.user, self.ent_type, self.ent_ids), nodes) - self.assertEqual(hash(nodes), hash(Nodes(self.backend, self.user, self.ent_type, self.ent_ids))) + self.assertEqual(nodes, Nodes(self.backend, self.ac, self.ent_type, self.ent_ids)) + self.assertEqual(Nodes(self.backend, self.ac, self.ent_type, self.ent_ids), nodes) + self.assertEqual(hash(nodes), hash(Nodes(self.backend, self.ac, self.ent_type, self.ent_ids))) # equality respects backend backend = SparqlStore.Open() backend.schema = self.backend.schema - self.assertNotEqual(nodes, Nodes(backend, self.user, self.ent_type, self.ent_ids)) - self.assertNotEqual(hash(nodes), hash(Nodes(backend, self.user, self.ent_type, self.ent_ids))) + self.assertNotEqual(nodes, Nodes(backend, self.ac, self.ent_type, self.ent_ids)) + self.assertNotEqual(hash(nodes), hash(Nodes(backend, self.ac, self.ent_type, self.ent_ids))) # equality respects user - self.assertNotEqual(nodes, Nodes(self.backend, URI('http://example.com/you'), self.ent_type, self.ent_ids)) - self.assertNotEqual(hash(nodes), hash(Nodes(self.backend, URI('http://example.com/you'), self.ent_type, self.ent_ids))) + self.assertNotEqual(nodes, Nodes(self.backend, NullAC(self.backend, URI('http://example.com/you')), self.ent_type, self.ent_ids)) + self.assertNotEqual(hash(nodes), hash(Nodes(self.backend, NullAC(self.backend, URI('http://example.com/you')), self.ent_type, self.ent_ids))) # equality respects node_type - self.assertNotEqual(nodes, Nodes(self.backend, self.user, self.tag_type, self.ent_ids)) - self.assertNotEqual(hash(nodes), hash(Nodes(self.backend, self.user, self.tag_type, self.ent_ids))) + self.assertNotEqual(nodes, Nodes(self.backend, self.ac, self.tag_type, self.ent_ids)) + self.assertNotEqual(hash(nodes), hash(Nodes(self.backend, self.ac, self.tag_type, self.ent_ids))) # equality respects guids - self.assertNotEqual(nodes, Nodes(self.backend, self.user, self.ent_type, self.tag_ids)) - self.assertNotEqual(hash(nodes), hash(Nodes(self.backend, self.user, self.ent_type, self.tag_ids))) + self.assertNotEqual(nodes, Nodes(self.backend, self.ac, self.ent_type, self.tag_ids)) + self.assertNotEqual(hash(nodes), hash(Nodes(self.backend, self.ac, self.ent_type, self.tag_ids))) def test_properties(self): # node_type self.assertEqual(self.ent_type, Nodes( - self.backend, self.user, self.ent_type, self.ent_ids).node_type) + self.backend, self.ac, self.ent_type, self.ent_ids).node_type) self.assertEqual(self.tag_type, Nodes( - self.backend, self.user, self.tag_type, self.tag_ids).node_type) + self.backend, self.ac, self.tag_type, self.tag_ids).node_type) # guids self.assertSetEqual(self.ent_ids, set(Nodes( - self.backend, self.user, self.ent_type, self.ent_ids).guids)) + self.backend, self.ac, self.ent_type, self.ent_ids).guids)) self.assertSetEqual(self.tag_ids, set(Nodes( - self.backend, self.user, self.tag_type, self.tag_ids).guids)) + self.backend, self.ac, self.tag_type, self.tag_ids).guids)) def test__ensure_nodes(self): - nodes = Nodes(self.backend, self.user, self.ent_type, self.ent_ids) + nodes = Nodes(self.backend, self.ac, self.ent_type, self.ent_ids) # missing nodes are created self.assertSetEqual(self.ent_ids, nodes._ensure_nodes(self.ent_type, self.ent_ids)) @@ -235,7 +237,7 @@ class TestNodes(unittest.TestCase): def test___set(self): # setup - nodes = Nodes(self.backend, self.user, self.ent_type, {URI('http://example.com/me/entity#1234'), URI('http://example.com/me/entity#4321')}) + nodes = Nodes(self.backend, self.ac, self.ent_type, {URI('http://example.com/me/entity#1234'), URI('http://example.com/me/entity#4321')}) self.assertSetEqual(set(self.backend._graph), self.schema_triples | set()) set_ = nodes._Nodes__set @@ -264,7 +266,7 @@ class TestNodes(unittest.TestCase): }) # set node value - tags = Nodes(self.backend, self.user, self.tag_type, {URI('http://example.com/me/tag#1234'), URI('http://example.com/me/tag#4321')}) + tags = Nodes(self.backend, self.ac, self.tag_type, {URI('http://example.com/me/tag#1234'), URI('http://example.com/me/tag#4321')}) set_(self.p_tag.uri, tags) # get creation time from backend manually time_triples = list(self.backend._graph.objects(rdflib.URIRef('http://example.com/me/tag#1234'), rdflib.URIRef(self.t_created.uri))) @@ -296,11 +298,11 @@ class TestNodes(unittest.TestCase): self.assertRaises(TypeError, set_, self.p_tag.uri, URI('http://example.com/me/tag#1234')) # value's node_type must match the predicate's range self.assertRaises(errors.ConsistencyError, set_, self.p_tag.uri, - Nodes(self.backend, self.user, self.ent_type, self.ent_ids)) + Nodes(self.backend, self.ac, self.ent_type, self.ent_ids)) def test_set(self): self.assertSetEqual(set(self.backend._graph), self.schema_triples | set()) - nodes = Nodes(self.backend, self.user, self.ent_type, self.ent_ids) + nodes = Nodes(self.backend, self.ac, self.ent_type, self.ent_ids) # can set literal values self.assertEqual(nodes, nodes.set(self.p_filesize.uri, 1234)) self.assertTrue(set(self.backend._graph).issuperset({ @@ -312,7 +314,7 @@ class TestNodes(unittest.TestCase): (rdflib.URIRef('http://example.com/me/entity#4321'), rdflib.URIRef(self.p_filesize.uri), rdflib.Literal(1234, datatype=rdflib.XSD.integer)), })) # can set node values - self.assertEqual(nodes, nodes.set(self.p_tag.uri, Nodes(self.backend, self.user, self.tag_type, self.tag_ids))) + self.assertEqual(nodes, nodes.set(self.p_tag.uri, Nodes(self.backend, self.ac, self.tag_type, self.tag_ids))) self.assertTrue(set(self.backend._graph).issuperset({ # nodes exist (rdflib.URIRef('http://example.com/me/entity#1234'), rdflib.RDF.type, rdflib.URIRef('http://bsfs.ai/schema/Entity')), @@ -341,17 +343,17 @@ class TestNodes(unittest.TestCase): self.assertSetEqual(curr, set(self.backend._graph)) # cannot assing multiple values to unique predicate self.assertRaises(ValueError, nodes.set, self.p_author.uri, - Nodes(self.backend, self.user, self.user_type, {URI('http://example.com/me/user#1234'), URI('http://example.com/me/user#4321')})) + Nodes(self.backend, self.ac, self.user_type, {URI('http://example.com/me/user#1234'), URI('http://example.com/me/user#4321')})) self.assertSetEqual(curr, set(self.backend._graph)) def test_set_from_iterable(self): self.assertSetEqual(set(self.backend._graph), self.schema_triples | set()) - nodes = Nodes(self.backend, self.user, self.ent_type, self.ent_ids) + nodes = Nodes(self.backend, self.ac, self.ent_type, self.ent_ids) # can set literal and node values simultaneously self.assertEqual(nodes, nodes.set_from_iterable({ self.p_filesize.uri: 1234, - self.p_tag.uri: Nodes(self.backend, self.user, self.tag_type, self.tag_ids), + self.p_tag.uri: Nodes(self.backend, self.ac, self.tag_type, self.tag_ids), }.items())) self.assertTrue(set(self.backend._graph).issuperset({ # nodes exist @@ -383,24 +385,24 @@ class TestNodes(unittest.TestCase): self.assertSetEqual(curr, set(self.backend._graph)) # cannot assing multiple values to unique predicate self.assertRaises(ValueError, nodes.set_from_iterable, ((self.p_filesize.uri, 1234), - (self.p_author.uri, Nodes(self.backend, self.user, self.user_type, {URI('http://example.com/me/user#1234'), URI('http://example.com/me/user#4321')})))) + (self.p_author.uri, Nodes(self.backend, self.ac, self.user_type, {URI('http://example.com/me/user#1234'), URI('http://example.com/me/user#4321')})))) self.assertSetEqual(curr, set(self.backend._graph)) def test_get(self): # setup: add some instances - Nodes(self.backend, self.user, self.ent_type, {'http://example.com/me/entity#1234'}) \ + Nodes(self.backend, self.ac, self.ent_type, {'http://example.com/me/entity#1234'}) \ .set(ns.bse.comment, 'hello world') \ .set(ns.bse.filesize, 1234) \ - .set(ns.bse.tag, Nodes(self.backend, self.user, self.tag_type, {'http://example.com/me/tag#1234'})) - Nodes(self.backend, self.user, self.ent_type, {'http://example.com/me/entity#4321'}) \ + .set(ns.bse.tag, Nodes(self.backend, self.ac, self.tag_type, {'http://example.com/me/tag#1234'})) + Nodes(self.backend, self.ac, self.ent_type, {'http://example.com/me/entity#4321'}) \ .set(ns.bse.filesize, 4321) \ - .set(ns.bse.tag, Nodes(self.backend, self.user, self.tag_type, {'http://example.com/me/tag#4321'})) - Nodes(self.backend, self.user, self.tag_type, {'http://example.com/me/tag#1234'}) \ + .set(ns.bse.tag, Nodes(self.backend, self.ac, self.tag_type, {'http://example.com/me/tag#4321'})) + Nodes(self.backend, self.ac, self.tag_type, {'http://example.com/me/tag#1234'}) \ .set(bst.label, 'tag_label_1234') - Nodes(self.backend, self.user, self.tag_type, {'http://example.com/me/tag#4321'}) \ + Nodes(self.backend, self.ac, self.tag_type, {'http://example.com/me/tag#4321'}) \ .set(bst.label, 'tag_label_4321') # setup: get nodes instance - nodes = Nodes(self.backend, self.user, self.ent_type, self.ent_ids) + nodes = Nodes(self.backend, self.ac, self.ent_type, self.ent_ids) # must pass at least one path self.assertRaises(AttributeError, nodes.get) # view must be list or dict @@ -409,22 +411,22 @@ class TestNodes(unittest.TestCase): self.assertRaises(ValueError, nodes.get, ns.bse.filesize, view=tuple) # can pass path as URI self.assertDictEqual(nodes.get(ns.bse.filesize), { - Nodes(self.backend, self.user, self.ent_type, {'http://example.com/me/entity#1234'}): 1234, - Nodes(self.backend, self.user, self.ent_type, {'http://example.com/me/entity#4321'}): 4321, + Nodes(self.backend, self.ac, self.ent_type, {'http://example.com/me/entity#1234'}): 1234, + Nodes(self.backend, self.ac, self.ent_type, {'http://example.com/me/entity#4321'}): 4321, }) # can pass path as sequence of URI self.assertDictEqual(nodes.get((ns.bse.tag, bst.label)), { - Nodes(self.backend, self.user, self.ent_type, {'http://example.com/me/entity#1234'}): {'tag_label_1234'}, - Nodes(self.backend, self.user, self.ent_type, {'http://example.com/me/entity#4321'}): {'tag_label_4321'}, + Nodes(self.backend, self.ac, self.ent_type, {'http://example.com/me/entity#1234'}): {'tag_label_1234'}, + Nodes(self.backend, self.ac, self.ent_type, {'http://example.com/me/entity#4321'}): {'tag_label_4321'}, }) # get returns the same path that was passed self.assertCountEqual(list(nodes.get((ns.bse.tag, bst.label), path=True, view=list)), [ - (Nodes(self.backend, self.user, self.ent_type, {'http://example.com/me/entity#1234'}), (ns.bse.tag, bst.label), 'tag_label_1234'), - (Nodes(self.backend, self.user, self.ent_type, {'http://example.com/me/entity#4321'}), (ns.bse.tag, bst.label), 'tag_label_4321'), + (Nodes(self.backend, self.ac, self.ent_type, {'http://example.com/me/entity#1234'}), (ns.bse.tag, bst.label), 'tag_label_1234'), + (Nodes(self.backend, self.ac, self.ent_type, {'http://example.com/me/entity#4321'}), (ns.bse.tag, bst.label), 'tag_label_4321'), ]) self.assertCountEqual(list(nodes.get([ns.bse.tag, bst.label], path=True, view=list)), [ - (Nodes(self.backend, self.user, self.ent_type, {'http://example.com/me/entity#1234'}), [ns.bse.tag, bst.label], 'tag_label_1234'), - (Nodes(self.backend, self.user, self.ent_type, {'http://example.com/me/entity#4321'}), [ns.bse.tag, bst.label], 'tag_label_4321'), + (Nodes(self.backend, self.ac, self.ent_type, {'http://example.com/me/entity#1234'}), [ns.bse.tag, bst.label], 'tag_label_1234'), + (Nodes(self.backend, self.ac, self.ent_type, {'http://example.com/me/entity#4321'}), [ns.bse.tag, bst.label], 'tag_label_4321'), ]) # paths must be URI or sequence thereof self.assertRaises(TypeError, nodes.get, 1234) @@ -435,34 +437,34 @@ class TestNodes(unittest.TestCase): self.assertRaises(errors.ConsistencyError, nodes.get, (ns.bse.tag, bst.invalid)) # can pass multiple paths self.assertDictEqual(nodes.get(ns.bse.filesize, (ns.bse.tag, bst.label)), { - Nodes(self.backend, self.user, self.ent_type, {'http://example.com/me/entity#1234'}): { + Nodes(self.backend, self.ac, self.ent_type, {'http://example.com/me/entity#1234'}): { ns.bse.filesize: 1234, (ns.bse.tag, bst.label): {'tag_label_1234'}, }, - Nodes(self.backend, self.user, self.ent_type, {'http://example.com/me/entity#4321'}): { + Nodes(self.backend, self.ac, self.ent_type, {'http://example.com/me/entity#4321'}): { ns.bse.filesize: 4321, (ns.bse.tag, bst.label): {'tag_label_4321'}, }, }) # get respects view self.assertDictEqual(nodes.get(ns.bse.filesize, view=dict), { - Nodes(self.backend, self.user, self.ent_type, {'http://example.com/me/entity#1234'}): 1234, - Nodes(self.backend, self.user, self.ent_type, {'http://example.com/me/entity#4321'}): 4321, + Nodes(self.backend, self.ac, self.ent_type, {'http://example.com/me/entity#1234'}): 1234, + Nodes(self.backend, self.ac, self.ent_type, {'http://example.com/me/entity#4321'}): 4321, }) self.assertSetEqual(set(nodes.get(ns.bse.filesize, view=list)), { - (Nodes(self.backend, self.user, self.ent_type, {'http://example.com/me/entity#1234'}), 1234), - (Nodes(self.backend, self.user, self.ent_type, {'http://example.com/me/entity#4321'}), 4321), + (Nodes(self.backend, self.ac, self.ent_type, {'http://example.com/me/entity#1234'}), 1234), + (Nodes(self.backend, self.ac, self.ent_type, {'http://example.com/me/entity#4321'}), 4321), }) # get returns Nodes instance when fetching a node self.assertDictEqual(nodes.get(ns.bse.tag), { - Nodes(self.backend, self.user, self.ent_type, {'http://example.com/me/entity#1234'}): - {Nodes(self.backend, self.user, self.tag_type, {'http://example.com/me/tag#1234'})}, - Nodes(self.backend, self.user, self.ent_type, {'http://example.com/me/entity#4321'}): - {Nodes(self.backend, self.user, self.tag_type, {'http://example.com/me/tag#4321'})}, + Nodes(self.backend, self.ac, self.ent_type, {'http://example.com/me/entity#1234'}): + {Nodes(self.backend, self.ac, self.tag_type, {'http://example.com/me/tag#1234'})}, + Nodes(self.backend, self.ac, self.ent_type, {'http://example.com/me/entity#4321'}): + {Nodes(self.backend, self.ac, self.tag_type, {'http://example.com/me/tag#4321'})}, }) # get returns a value when fetching a value and omits missing values self.assertDictEqual(nodes.get(ns.bse.comment), { - Nodes(self.backend, self.user, self.ent_type, {'http://example.com/me/entity#1234'}): {'hello world'}, + Nodes(self.backend, self.ac, self.ent_type, {'http://example.com/me/entity#1234'}): {'hello world'}, }) # FIXME: What if I call `get` with a single predicate and a single node, but @@ -471,7 +473,7 @@ class TestNodes(unittest.TestCase): raise NotImplementedError() def test_getattr(self): - nodes = Nodes(self.backend, self.user, self.ent_type, {'http://example.com/me/entity#1234'}) + nodes = Nodes(self.backend, self.ac, self.ent_type, {'http://example.com/me/entity#1234'}) # can get walks to values self.assertEqual(nodes.filesize, Walk(nodes, (self.p_filesize, ))) # can get walks to nodes @@ -482,11 +484,11 @@ class TestNodes(unittest.TestCase): self.assertRaises(ValueError, getattr, nodes, 'foobar') def test_schema(self): - self.assertEqual(Nodes(self.backend, self.user, self.ent_type, + self.assertEqual(Nodes(self.backend, self.ac, self.ent_type, {URI('http://example.com/me/entity#1234')}).schema, self.backend.schema) def test_operators(self): # __add__, __or__, __sub__, __and__ - gen = partial(Nodes, self.backend, self.user, self.ent_type) + gen = partial(Nodes, self.backend, self.ac, self.ent_type) nodes = gen({URI('http://example.com/me/entity#1234')}) # add/or concatenates guids self.assertEqual( @@ -544,23 +546,24 @@ class TestNodes(unittest.TestCase): self.assertRaises(TypeError, op, nodes, 'hello world') # backend must match self.assertRaises(ValueError, op, nodes, - Nodes(None, self.user, self.ent_type, {URI('http://example.com/me/entity#1234')})) - # user must match + Nodes(None, self.ac, self.ent_type, {URI('http://example.com/me/entity#1234')})) + # ac must match self.assertRaises(ValueError, op, nodes, - Nodes(self.backend, '', self.ent_type, {URI('http://example.com/me/entity#1234')})) + Nodes(self.backend, NullAC(self.backend, ''), + self.ent_type, {URI('http://example.com/me/entity#1234')})) # node type must match self.assertRaises(ValueError, op, nodes, - Nodes(self.backend, self.user, self.tag_type, {URI('http://example.com/me/entity#1234')})) + Nodes(self.backend, self.ac, self.tag_type, {URI('http://example.com/me/entity#1234')})) def test_len(self): - self.assertEqual(1, len(Nodes(self.backend, self.user, self.ent_type, { + self.assertEqual(1, len(Nodes(self.backend, self.ac, self.ent_type, { URI('http://example.com/me/entity#1234'), }))) - self.assertEqual(2, len(Nodes(self.backend, self.user, self.ent_type, { + self.assertEqual(2, len(Nodes(self.backend, self.ac, self.ent_type, { URI('http://example.com/me/entity#1234'), URI('http://example.com/me/entity#4321'), }))) - self.assertEqual(4, len(Nodes(self.backend, self.user, self.ent_type, { + self.assertEqual(4, len(Nodes(self.backend, self.ac, self.ent_type, { URI('http://example.com/me/entity#1234'), URI('http://example.com/me/entity#4321'), URI('http://example.com/me/entity#5678'), @@ -568,8 +571,8 @@ class TestNodes(unittest.TestCase): }))) def test_iter(self): # __iter__ - gen = partial(Nodes, self.backend, self.user, self.ent_type) - self.assertSetEqual(set(Nodes(self.backend, self.user, self.ent_type, { + gen = partial(Nodes, self.backend, self.ac, self.ent_type) + self.assertSetEqual(set(Nodes(self.backend, self.ac, self.ent_type, { URI('http://example.com/me/entity#1234'), URI('http://example.com/me/entity#4321'), URI('http://example.com/me/entity#5678'), diff --git a/test/graph/test_walk.py b/test/graph/test_walk.py index f9dbc7a..e5c8981 100644 --- a/test/graph/test_walk.py +++ b/test/graph/test_walk.py @@ -10,6 +10,7 @@ import unittest # bsfs imports from bsfs import schema as bsc from bsfs.graph import Graph +from bsfs.graph.ac import NullAC from bsfs.namespace import Namespace, ns from bsfs.triple_store.sparql import SparqlStore from bsfs.utils import URI @@ -65,7 +66,8 @@ class TestWalk(unittest.TestCase): ''') self.backend = SparqlStore.Open() self.user = URI('http://example.com/me') - self.graph = Graph(self.backend, self.user) + self.ac = NullAC(self.backend, self.user) + self.graph = Graph(self.backend, self.ac) self.graph.migrate(self.schema) # nodes setup -- cgit v1.2.3 From d822df3dad0525f35dbacda9d5a66f4756f079ff Mon Sep 17 00:00:00 2001 From: Matthias Baumgartner Date: Wed, 1 Mar 2023 12:49:47 +0100 Subject: Integrate main app into package --- bsfs.app | 52 +++----------------------------------------------- bsfs/apps/__init__.py | 38 ++++++++++++++++++++++++++++++++++++ bsfs/apps/init.py | 6 ++++-- bsfs/apps/migrate.py | 1 + test/apps/test_main.py | 42 ++++++++++++++++++++++++++++++++++++++++ 5 files changed, 88 insertions(+), 51 deletions(-) create mode 100644 test/apps/test_main.py diff --git a/bsfs.app b/bsfs.app index babacbb..c837ca0 100755 --- a/bsfs.app +++ b/bsfs.app @@ -1,52 +1,6 @@ -"""BSFS tools. - -Part of the BlackStar filesystem (bsfs) module. -A copy of the license is provided with the project. -Author: Matthias Baumgartner, 2022 -""" -# imports -import argparse -import typing - -# module imports -import bsfs -import bsfs.apps - -# exports -__all__: typing.Sequence[str] = ( - 'main', - ) - -# config -apps = { - 'init' : bsfs.apps.init, - 'migrate' : bsfs.apps.migrate, - } - - -## code ## - -def main(argv): - """Black Star File System maintenance tools.""" - parser = argparse.ArgumentParser(description=main.__doc__, prog='bsfs') - # version - parser.add_argument('--version', action='version', - version='%(prog)s version {}.{}.{}'.format(*bsfs.version_info)) - # application selection - parser.add_argument('app', choices=apps.keys(), - help='Select the application to run.') - # dangling args - parser.add_argument('rest', nargs=argparse.REMAINDER) - # parse - args = parser.parse_args() - # run application - apps[args.app](args.rest) - - -## main ## - +#!/usr/bin/env python3 if __name__ == '__main__': + import bsfs.apps import sys - main(sys.argv[1:]) + bsfs.apps.main(sys.argv[1:]) -## EOF ## diff --git a/bsfs/apps/__init__.py b/bsfs/apps/__init__.py index 7efaa87..3dec9ad 100644 --- a/bsfs/apps/__init__.py +++ b/bsfs/apps/__init__.py @@ -1,3 +1,4 @@ +#!/usr/bin/env python3 """ Part of the BlackStar filesystem (bsfs) module. @@ -5,16 +6,53 @@ A copy of the license is provided with the project. Author: Matthias Baumgartner, 2022 """ # imports +import argparse import typing +# bsfs imports +import bsfs + # inner-module imports from .init import main as init from .migrate import main as migrate # exports __all__: typing.Sequence[str] = ( + 'main', 'init', 'migrate', ) +# config +apps = { + 'init' : init, + 'migrate' : migrate, + } + + +## code ## + +def main(argv=None): + """Black Star File System maintenance tools.""" + parser = argparse.ArgumentParser(description=main.__doc__, prog='bsfs') + # version + parser.add_argument('--version', action='version', + version='%(prog)s version {}.{}.{}'.format(*bsfs.version_info)) + # application selection + parser.add_argument('app', choices=apps.keys(), + help='Select the application to run.') + # dangling args + parser.add_argument('rest', nargs=argparse.REMAINDER) + # parse + args = parser.parse_args(argv) + # run application + apps[args.app](args.rest) + + +## main ## + +if __name__ == '__main__': + import sys + main(sys.argv[1:]) + ## EOF ## diff --git a/bsfs/apps/init.py b/bsfs/apps/init.py index 3e2ef37..ec48525 100644 --- a/bsfs/apps/init.py +++ b/bsfs/apps/init.py @@ -1,3 +1,4 @@ +#!/usr/bin/env python3 """ Part of the BlackStar filesystem (bsfs) module. @@ -60,9 +61,10 @@ def main(argv): # print config if args.output is not None: with open(args.output, mode='wt', encoding='UTF-8') as ofile: - json.dump(config, ofile) + json.dump(config, ofile, indent=4) else: - json.dump(config, sys.stdout) + json.dump(config, sys.stdout, indent=4) + print('') ## main ## diff --git a/bsfs/apps/migrate.py b/bsfs/apps/migrate.py index b9d019f..cb62542 100644 --- a/bsfs/apps/migrate.py +++ b/bsfs/apps/migrate.py @@ -1,3 +1,4 @@ +#!/usr/bin/env python3 """ Part of the BlackStar filesystem (bsfs) module. diff --git a/test/apps/test_main.py b/test/apps/test_main.py new file mode 100644 index 0000000..ae19b5e --- /dev/null +++ b/test/apps/test_main.py @@ -0,0 +1,42 @@ +""" + +Part of the bsfs test suite. +A copy of the license is provided with the project. +Author: Matthias Baumgartner, 2022 +""" +# imports +import contextlib +import io +import json +import unittest + +# objects to test +from bsfs.apps import main + + +## code ## + +class TestMain(unittest.TestCase): + def test_main(self): + # must at least pass an app + with contextlib.redirect_stderr(io.StringIO()): + self.assertRaises(SystemExit, main, []) + # app takes over + with contextlib.redirect_stderr(io.StringIO()): + self.assertRaises(SystemExit, main, ['init']) + outbuf = io.StringIO() + with contextlib.redirect_stdout(outbuf): + main(['init', 'sparql']) + self.assertEqual(json.loads(outbuf.getvalue()), { + 'Graph': { + 'user': 'http://example.com/me', + 'backend': { + 'SparqlStore': {}}}}) + + +## main ## + +if __name__ == '__main__': + unittest.main() + +## EOF ## -- cgit v1.2.3 From 47e147bbbb3182065d76847ad7cb71c895003abf Mon Sep 17 00:00:00 2001 From: Matthias Baumgartner Date: Wed, 1 Mar 2023 12:51:09 +0100 Subject: build instructions --- MANIFEST.in | 1 + bsfs.toml | 11 ----------- setup.py | 33 ++++++++++++++++++++++----------- 3 files changed, 23 insertions(+), 22 deletions(-) create mode 100644 MANIFEST.in delete mode 100644 bsfs.toml diff --git a/MANIFEST.in b/MANIFEST.in new file mode 100644 index 0000000..a06c41c --- /dev/null +++ b/MANIFEST.in @@ -0,0 +1 @@ +include bsfs/graph/schema.nt diff --git a/bsfs.toml b/bsfs.toml deleted file mode 100644 index 45bf1c9..0000000 --- a/bsfs.toml +++ /dev/null @@ -1,11 +0,0 @@ -[project] -name = "bsfs" -description = "A content aware graph file system." -version = "0.0.1" -license = {text = "BSD 3-Clause License"} -authors = [{name='Matthias Baumgartner', email="dev@igsor.net"}] -dependencies = [ - "rdflib", -] -requires-python = ">=3.7" - diff --git a/setup.py b/setup.py index 243c73f..747e853 100644 --- a/setup.py +++ b/setup.py @@ -1,26 +1,37 @@ -from setuptools import setup +from setuptools import setup, find_packages import os setup( + # package metadata name='bsfs', - version='0.0.1', + version='0.23.03', author='Matthias Baumgartner', - author_email='dev@igsor.net', - description='A content aware graph file system.', + author_email='dev@bsfs.io', + description='A content-aware graph file system.', long_description=open(os.path.join(os.path.dirname(__file__), 'README')).read(), license='BSD', license_files=('LICENSE', ), - url='https://www.igsor.net/projects/blackstar/bsfs/', - download_url='https://pip.igsor.net', - packages=('bsfs', ), + url='https://www.bsfs.io/bsfs/', + download_url='https://pip.bsfs.io', + + # packages + packages=[p for p in find_packages() if p.startswith('bsfs')], + # data files are included if mentioned in MANIFEST.in + include_package_data=True, + + # entrypoints + entry_points={ + 'console_scripts': [ + 'bsfs = bsfs.apps:main', + ], + }, + + # dependencies install_requires=( 'rdflib', # schema and sparql storage 'hopcroftkarp', # ast matching + 'numpy', # distance functions for sparql store ), python_requires=">=3.7", ) - -# FIXME: bsfs/graph/schema.nt -# FIXME: bsfs.app - -- cgit v1.2.3 From 1a6b812276326d03869cd78d3a184868233c05d4 Mon Sep 17 00:00:00 2001 From: Matthias Baumgartner Date: Wed, 1 Mar 2023 13:05:10 +0100 Subject: documentation skeleton --- doc/Makefile | 20 ++ doc/make.bat | 35 +++ doc/source/_static/arch_dark.png | Bin 0 -> 27346 bytes doc/source/_static/arch_dark.svg | 500 ++++++++++++++++++++++++++++++++++++++ doc/source/_static/arch_light.png | Bin 0 -> 17509 bytes doc/source/_static/arch_light.svg | 499 +++++++++++++++++++++++++++++++++++++ doc/source/conf.py | 37 +++ 7 files changed, 1091 insertions(+) create mode 100644 doc/Makefile create mode 100644 doc/make.bat create mode 100644 doc/source/_static/arch_dark.png create mode 100644 doc/source/_static/arch_dark.svg create mode 100644 doc/source/_static/arch_light.png create mode 100644 doc/source/_static/arch_light.svg create mode 100644 doc/source/conf.py diff --git a/doc/Makefile b/doc/Makefile new file mode 100644 index 0000000..d0c3cbf --- /dev/null +++ b/doc/Makefile @@ -0,0 +1,20 @@ +# Minimal makefile for Sphinx documentation +# + +# You can set these variables from the command line, and also +# from the environment for the first two. +SPHINXOPTS ?= +SPHINXBUILD ?= sphinx-build +SOURCEDIR = source +BUILDDIR = build + +# Put it first so that "make" without argument is like "make help". +help: + @$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) + +.PHONY: help Makefile + +# Catch-all target: route all unknown targets to Sphinx using the new +# "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS). +%: Makefile + @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) diff --git a/doc/make.bat b/doc/make.bat new file mode 100644 index 0000000..747ffb7 --- /dev/null +++ b/doc/make.bat @@ -0,0 +1,35 @@ +@ECHO OFF + +pushd %~dp0 + +REM Command file for Sphinx documentation + +if "%SPHINXBUILD%" == "" ( + set SPHINXBUILD=sphinx-build +) +set SOURCEDIR=source +set BUILDDIR=build + +%SPHINXBUILD% >NUL 2>NUL +if errorlevel 9009 ( + echo. + echo.The 'sphinx-build' command was not found. Make sure you have Sphinx + echo.installed, then set the SPHINXBUILD environment variable to point + echo.to the full path of the 'sphinx-build' executable. Alternatively you + echo.may add the Sphinx directory to PATH. + echo. + echo.If you don't have Sphinx installed, grab it from + echo.https://www.sphinx-doc.org/ + exit /b 1 +) + +if "%1" == "" goto help + +%SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O% +goto end + +:help +%SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O% + +:end +popd diff --git a/doc/source/_static/arch_dark.png b/doc/source/_static/arch_dark.png new file mode 100644 index 0000000..b5ea1b3 Binary files /dev/null and b/doc/source/_static/arch_dark.png differ diff --git a/doc/source/_static/arch_dark.svg b/doc/source/_static/arch_dark.svg new file mode 100644 index 0000000..22de237 --- /dev/null +++ b/doc/source/_static/arch_dark.svg @@ -0,0 +1,500 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + image/svg+xml + + + + + + + Graph + Nodes + AC + + Backend + + App + Lib + + Config + + Query AST + + Schema + + Utils + + + Front + Center + Back + Envelope + + + Client + + + + + + + + diff --git a/doc/source/_static/arch_light.png b/doc/source/_static/arch_light.png new file mode 100644 index 0000000..c210ecf Binary files /dev/null and b/doc/source/_static/arch_light.png differ diff --git a/doc/source/_static/arch_light.svg b/doc/source/_static/arch_light.svg new file mode 100644 index 0000000..e93694c --- /dev/null +++ b/doc/source/_static/arch_light.svg @@ -0,0 +1,499 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + image/svg+xml + + + + + + Graph + Nodes + AC + + Backend + + App + Lib + + Config + + Query AST + + Schema + + Utils + + + Front + Center + Back + Envelope + + + Client + + + + + + + + diff --git a/doc/source/conf.py b/doc/source/conf.py new file mode 100644 index 0000000..6de4993 --- /dev/null +++ b/doc/source/conf.py @@ -0,0 +1,37 @@ +# Configuration file for the Sphinx documentation builder. +# +# For the full list of built-in configuration values, see the documentation: +# https://www.sphinx-doc.org/en/master/usage/configuration.html + +# -- Project information ----------------------------------------------------- +# https://www.sphinx-doc.org/en/master/usage/configuration.html#project-information + +project = 'Black Star File System' +copyright = '2023, Matthias Baumgartner' +author = 'Matthias Baumgartner' +release = '0.5' + +# -- General configuration --------------------------------------------------- +# https://www.sphinx-doc.org/en/master/usage/configuration.html#general-configuration + +extensions = [ + 'sphinx_copybutton', + 'sphinx.ext.autodoc', + ] + +templates_path = ['_templates'] +exclude_patterns = [] + + + +# -- Options for HTML output ------------------------------------------------- +# https://www.sphinx-doc.org/en/master/usage/configuration.html#options-for-html-output + +html_theme = 'furo' +html_static_path = ['_static'] + +html_title = 'bsfs' +html_theme_options = { + 'announcement': 'This project is under heavy development and subject to rapid changes. Use at your own discretion.', + } + -- cgit v1.2.3 From a5695dcc4e1be8fccd0b35ba4672cdb3c2c119d7 Mon Sep 17 00:00:00 2001 From: Matthias Baumgartner Date: Wed, 1 Mar 2023 13:06:59 +0100 Subject: minor fixes --- .gitignore | 1 + CHANGELOG | 0 bsfs/graph/graph.py | 4 +--- bsfs/graph/schema.nt | 4 ++-- test/graph/ac/test_null.py | 1 - 5 files changed, 4 insertions(+), 6 deletions(-) create mode 100644 CHANGELOG diff --git a/.gitignore b/.gitignore index ba88570..c32d36b 100644 --- a/.gitignore +++ b/.gitignore @@ -22,6 +22,7 @@ build/ # doc builds doc/build/ +doc/source/api # doc extra files diff --git a/CHANGELOG b/CHANGELOG new file mode 100644 index 0000000..e69de29 diff --git a/bsfs/graph/graph.py b/bsfs/graph/graph.py index a74da01..a356533 100644 --- a/bsfs/graph/graph.py +++ b/bsfs/graph/graph.py @@ -28,9 +28,7 @@ __all__: typing.Sequence[str] = ( ## code ## class Graph(): - """The Graph class is - - The Graph class provides a convenient interface to query and access a graph. + """The Graph class provides a convenient interface to query and access a graph. Since it logically builds on the concept of graphs it is easier to navigate than raw triple stores. Naturally, it uses a triple store as *backend*. It also controls actions via access permissions to a *user*. diff --git a/bsfs/graph/schema.nt b/bsfs/graph/schema.nt index f619746..cba5e80 100644 --- a/bsfs/graph/schema.nt +++ b/bsfs/graph/schema.nt @@ -9,11 +9,11 @@ prefix bsm: # literals bsfs:Number rdfs:subClassOf bsfs:Literal . -xsd:integer rdfs:subClassOf bsfs:Number . +xsd:float rdfs:subClassOf bsfs:Number . # predicates bsm:t_created rdfs:subClassOf bsfs:Predicate ; rdfs:domain bsfs:Node ; - rdfs:range xsd:integer ; + rdfs:range xsd:float ; bsfs:unique "true"^^xsd:boolean . diff --git a/test/graph/ac/test_null.py b/test/graph/ac/test_null.py index 7d25980..e33d46a 100644 --- a/test/graph/ac/test_null.py +++ b/test/graph/ac/test_null.py @@ -132,7 +132,6 @@ class TestNullAC(unittest.TestCase): ast.filter.Any(ns.bse.author, ast.filter.Equals('Me, Myself, and I'))) ac = NullAC(self.backend, self.user) self.assertEqual(query, ac.filter_read(self.ent_type, query)) - return query ## main ## -- cgit v1.2.3 From d70e78bbdd9d9b5727f18a82fce08f20bdbbba19 Mon Sep 17 00:00:00 2001 From: Matthias Baumgartner Date: Thu, 2 Mar 2023 10:26:29 +0100 Subject: readme and changelog --- CHANGELOG | 0 CHANGELOG.md | 0 README | 57 --------------------------------------------------------- README.md | 57 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 4 files changed, 57 insertions(+), 57 deletions(-) delete mode 100644 CHANGELOG create mode 100644 CHANGELOG.md delete mode 100644 README create mode 100644 README.md diff --git a/CHANGELOG b/CHANGELOG deleted file mode 100644 index e69de29..0000000 diff --git a/CHANGELOG.md b/CHANGELOG.md new file mode 100644 index 0000000..e69de29 diff --git a/README b/README deleted file mode 100644 index da066f6..0000000 --- a/README +++ /dev/null @@ -1,57 +0,0 @@ - -The Black Star File System -========================== - - -### Developer tools setup - -#### Test coverage (coverage) - -Resources: -* https://coverage.readthedocs.io/en/6.5.0/index.html -* https://nedbatchelder.com/blog/200710/flaws_in_coverage_measurement.html - -Commands: -$ pip install coverage -$ coverage run ; coverage html ; xdg-open .htmlcov/index.html - - - -#### Static code analysis (pylint) - -Resources: -* https://github.com/PyCQA/pylint -* https://pylint.org/ -* https://pylint.pycqa.org/en/latest/user_guide/messages/messages_overview.html#messages-overview - -Commands: -$ pip install pylint -$ pylint bsfs - - - -#### Type analysis (mypy) - -Resources: -* https://github.com/python/mypy -* https://mypy.readthedocs.io/en/stable/ - -Commands: -$ pip install mypy -$ mypy - - - -#### Documentation (sphinx) - -Resources: -* -* - -Commands: -$ pip install ... -$ - - - - diff --git a/README.md b/README.md new file mode 100644 index 0000000..da066f6 --- /dev/null +++ b/README.md @@ -0,0 +1,57 @@ + +The Black Star File System +========================== + + +### Developer tools setup + +#### Test coverage (coverage) + +Resources: +* https://coverage.readthedocs.io/en/6.5.0/index.html +* https://nedbatchelder.com/blog/200710/flaws_in_coverage_measurement.html + +Commands: +$ pip install coverage +$ coverage run ; coverage html ; xdg-open .htmlcov/index.html + + + +#### Static code analysis (pylint) + +Resources: +* https://github.com/PyCQA/pylint +* https://pylint.org/ +* https://pylint.pycqa.org/en/latest/user_guide/messages/messages_overview.html#messages-overview + +Commands: +$ pip install pylint +$ pylint bsfs + + + +#### Type analysis (mypy) + +Resources: +* https://github.com/python/mypy +* https://mypy.readthedocs.io/en/stable/ + +Commands: +$ pip install mypy +$ mypy + + + +#### Documentation (sphinx) + +Resources: +* +* + +Commands: +$ pip install ... +$ + + + + -- cgit v1.2.3 From 87f437380c1dd8f420437cddc028c0f3174ee1c9 Mon Sep 17 00:00:00 2001 From: Matthias Baumgartner Date: Thu, 2 Mar 2023 12:19:58 +0100 Subject: Node getters in bsfs.Graph: * Empty nodes instance (Graph.empty) * Order-preserving get query (Graph.sorted) * Collect common code in private Graph.__get * Empty query in Graph.get * Empty query in Graph.resolve.Filter * Empty query in AC: filter_read --- bsfs/graph/ac/base.py | 6 ++- bsfs/graph/ac/null.py | 6 ++- bsfs/graph/graph.py | 60 +++++++++++++++++++++-------- bsfs/graph/resolve.py | 9 ++++- test/graph/ac/test_null.py | 3 ++ test/graph/test_graph.py | 95 ++++++++++++++++++++++++++++++++++++++++------ test/graph/test_resolve.py | 3 ++ 7 files changed, 151 insertions(+), 31 deletions(-) diff --git a/bsfs/graph/ac/base.py b/bsfs/graph/ac/base.py index 0b9f988..2759557 100644 --- a/bsfs/graph/ac/base.py +++ b/bsfs/graph/ac/base.py @@ -83,7 +83,11 @@ class AccessControlBase(abc.ABC): """Return nodes that are allowed to be created.""" @abc.abstractmethod - def filter_read(self, node_type: schema.Node, query: ast.filter.FilterExpression) -> ast.filter.FilterExpression: + def filter_read( + self, + node_type: schema.Node, + query: typing.Optional[ast.filter.FilterExpression], + ) -> typing.Optional[ast.filter.FilterExpression]: """Re-write a filter *query* to get (i.e., read) *node_type* nodes.""" @abc.abstractmethod diff --git a/bsfs/graph/ac/null.py b/bsfs/graph/ac/null.py index 6a923a5..e67b55d 100644 --- a/bsfs/graph/ac/null.py +++ b/bsfs/graph/ac/null.py @@ -50,7 +50,11 @@ class NullAC(base.AccessControlBase): """Return nodes that are allowed to be created.""" return guids - def filter_read(self, node_type: schema.Node, query: ast.filter.FilterExpression) -> ast.filter.FilterExpression: + def filter_read( + self, + node_type: schema.Node, + query: typing.Optional[ast.filter.FilterExpression] + ) -> typing.Optional[ast.filter.FilterExpression]: """Re-write a filter *query* to get (i.e., read) *node_type* nodes.""" return query diff --git a/bsfs/graph/graph.py b/bsfs/graph/graph.py index a356533..11fe835 100644 --- a/bsfs/graph/graph.py +++ b/bsfs/graph/graph.py @@ -113,6 +113,7 @@ class Graph(): *node_type*) once some data is assigned to them. """ + # get node type type_ = self.schema.node(node_type) # NOTE: Nodes constructor materializes guids. return _nodes.Nodes(self._backend, self._ac, type_, guids) @@ -120,15 +121,51 @@ class Graph(): def node(self, node_type: URI, guid: URI) -> _nodes.Nodes: """Return node *guid* of type *node_type* as a `bsfs.graph.Nodes` instance. - Note that the *guids* need not to exist (however, the *node_type* has + Note that the *guid* need not to exist (however, the *node_type* has to be part of the schema). An inexistent guid will be created (using *node_type*) once some data is assigned to them. """ return self.nodes(node_type, {guid}) - def get(self, node_type: URI, query: ast.filter.FilterExpression) -> _nodes.Nodes: # FIXME: How about empty query? - """Return a `Nodes` instance over all nodes of type *node_type* that match the *subject* query.""" + def empty(self, node_type: URI) -> _nodes.Nodes: + """Return a `Nodes` instance with type *node_type* but no nodes.""" + return self.nodes(node_type, set()) + + def get( + self, + node_type: URI, + query: typing.Optional[ast.filter.FilterExpression], + ) -> _nodes.Nodes: + """Return a `Nodes` instance over all nodes of type *node_type* that match the *query*.""" + # return Nodes instance + type_ = self.schema.node(node_type) + return _nodes.Nodes(self._backend, self._ac, type_, self.__get(node_type, query)) + + def sorted( + self, + node_type: URI, + query: typing.Optional[ast.filter.FilterExpression], + # FIXME: sort ast + ) -> typing.Iterator[_nodes.Nodes]: + """Return a iterator over `Nodes` instances over all nodes of type *node_type* that match the *query*.""" + # FIXME: Order should be a parameter + # return iterator over Nodes instances + type_ = self.schema.node(node_type) + for guid in self.__get(node_type, query): + yield _nodes.Nodes(self._backend, self._ac, type_, {guid}) + + def all(self, node_type: URI) -> _nodes.Nodes: + """Return all instances of type *node_type*.""" + type_ = self.schema.node(node_type) + return _nodes.Nodes(self._backend, self._ac, type_, self.__get(node_type, None)) + + def __get( + self, + node_type: URI, + query: typing.Optional[ast.filter.FilterExpression], + ) -> typing.Iterator[URI]: + """Build and execute a get query.""" # get node type type_ = self.schema.node(node_type) # resolve Nodes instances @@ -136,18 +173,9 @@ class Graph(): # add access controls to query query = self._ac.filter_read(type_, query) # validate query - self._validate(type_, query) - # query the backend - guids = self._backend.get(type_, query) # no need to materialize - # return Nodes instance - return _nodes.Nodes(self._backend, self._ac, type_, guids) - - def all(self, node_type: URI) -> _nodes.Nodes: - """Return all instances of type *node_type*.""" - # get node type - type_ = self.schema.node(node_type) - guids = self._backend.get(type_, None) # no need to materialize - return _nodes.Nodes(self._backend, self._ac, type_, guids) - + if query is not None: + self._validate(type_, query) + # query the backend and return the (non-materialized) result + return self._backend.get(type_, query) ## EOF ## diff --git a/bsfs/graph/resolve.py b/bsfs/graph/resolve.py index 4677401..b3ab001 100644 --- a/bsfs/graph/resolve.py +++ b/bsfs/graph/resolve.py @@ -40,8 +40,13 @@ class Filter(): def __init__(self, schema): self.schema = schema - def __call__(self, root_type: bsc.Node, node: ast.filter.FilterExpression): - # FIXME: node can be None! + def __call__( + self, + root_type: bsc.Node, + node: typing.Optional[ast.filter.FilterExpression], + ): + if node is None: + return None return self._parse_filter_expression(root_type, node) def _parse_filter_expression( diff --git a/test/graph/ac/test_null.py b/test/graph/ac/test_null.py index e33d46a..544a01e 100644 --- a/test/graph/ac/test_null.py +++ b/test/graph/ac/test_null.py @@ -131,7 +131,10 @@ class TestNullAC(unittest.TestCase): ast.filter.Any(ns.bse.tag, ast.filter.Is('http://example.com/tag#4321')), ast.filter.Any(ns.bse.author, ast.filter.Equals('Me, Myself, and I'))) ac = NullAC(self.backend, self.user) + # NullAC returns query self.assertEqual(query, ac.filter_read(self.ent_type, query)) + # query can be none + self.assertIsNone(ac.filter_read(self.ent_type, None)) ## main ## diff --git a/test/graph/test_graph.py b/test/graph/test_graph.py index d89d346..93f8db7 100644 --- a/test/graph/test_graph.py +++ b/test/graph/test_graph.py @@ -5,6 +5,8 @@ A copy of the license is provided with the project. Author: Matthias Baumgartner, 2022 """ # imports +from functools import reduce +import operator import unittest # bsie imports @@ -97,18 +99,14 @@ class TestGraph(unittest.TestCase): # node_type must be in the schema self.assertRaises(KeyError, graph.nodes, ns.bsfs.Invalid, guids) - def test_all(self): + def test_empty(self): graph = Graph(self.backend, self.ac) - # resulting nodes can be empty - self.assertEqual(graph.all(ns.bsfs.Entity), + # returns a Nodes instance + self.assertEqual( + graph.empty(ns.bsfs.Entity), Nodes(self.backend, self.ac, graph.schema.node(ns.bsfs.Entity), set())) - # resulting nodes contains all nodes of the respective type - guids = {URI('http://example.com/me/entity#1234'), URI('http://example.com/me/entity#4321')} - self.backend.create(graph.schema.node(ns.bsfs.Entity), guids) - self.assertEqual(graph.all(ns.bsfs.Entity), - Nodes(self.backend, self.ac, graph.schema.node(ns.bsfs.Entity), guids)) # node_type must be in the schema - self.assertRaises(KeyError, graph.all, ns.bsfs.Invalid) + self.assertRaises(KeyError, graph.empty, ns.bsfs.Invalid) def test_migrate(self): # setup @@ -248,10 +246,10 @@ class TestGraph(unittest.TestCase): graph.node(ns.bsfs.Tag, URI('http://example.com/tag#1234')).set(ns.bse.comment, 'foo') graph.node(ns.bsfs.Tag, URI('http://example.com/tag#4321')).set(ns.bse.comment, 'bar') - # get exception for invalid query + # invalid query raises exception self.assertRaises(errors.ConsistencyError, graph.get, ns.bsfs.Entity, ast.filter.Any(ns.bse.tag, ast.filter.Equals('hello world'))) - # query returns nodes + # get returns nodes self.assertEqual(graph.get(ns.bsfs.Entity, ast.filter.Any(ns.bse.tag, ast.filter.Is(tags))), ents) self.assertEqual(graph.get(ns.bsfs.Entity, ast.filter.Any(ns.bse.comment, ast.filter.StartsWith('foo'))), graph.node(ns.bsfs.Entity, URI('http://example.com/entity#1234'))) @@ -262,6 +260,81 @@ class TestGraph(unittest.TestCase): ast.filter.Any(ns.bse.tag, ast.filter.All(ns.bse.comment, ast.filter.Equals('bar'))))), ents) + # query can be None + self.assertEqual(graph.get(ns.bsfs.Entity, None), ents) + + def test_sorted(self): + # setup + graph = Graph(self.backend, self.ac) + graph.migrate(schema.from_string(''' + prefix rdfs: + prefix xsd: + prefix bsfs: + prefix bse: + + bsfs:Entity rdfs:subClassOf bsfs:Node . + bsfs:Tag rdfs:subClassOf bsfs:Node . + xsd:string rdfs:subClassOf bsfs:Literal . + + bse:tag rdfs:subClassOf bsfs:Predicate ; + rdfs:domain bsfs:Entity ; + rdfs:range bsfs:Tag ; + bsfs:unique "false"^^xsd:boolean . + + bse:comment rdfs:subClassOf bsfs:Predicate ; + rdfs:domain bsfs:Node ; + rdfs:range xsd:string ; + bsfs:unique "false"^^xsd:boolean . + + ''')) + # add some instances + ents = [ + # default is alphabetical order + graph.node(ns.bsfs.Entity, URI('http://example.com/entity#1234')), + graph.node(ns.bsfs.Entity, URI('http://example.com/entity#4321')), + ] + tags = graph.nodes(ns.bsfs.Tag, {URI('http://example.com/tag#1234'), URI('http://example.com/tag#4321')}) + # add some node links + reduce(operator.add, ents).set(ns.bse.tag, tags) + # add some literals + graph.node(ns.bsfs.Entity, URI('http://example.com/entity#1234')).set(ns.bse.comment, 'hello world') + graph.node(ns.bsfs.Entity, URI('http://example.com/entity#1234')).set(ns.bse.comment, 'foo') + graph.node(ns.bsfs.Entity, URI('http://example.com/entity#1234')).set(ns.bse.comment, 'foobar') + graph.node(ns.bsfs.Tag, URI('http://example.com/tag#1234')).set(ns.bse.comment, 'foo') + graph.node(ns.bsfs.Tag, URI('http://example.com/tag#4321')).set(ns.bse.comment, 'bar') + + # invalid query raises exception + self.assertRaises(errors.ConsistencyError, list, graph.sorted(ns.bsfs.Entity, ast.filter.Any(ns.bse.tag, ast.filter.Equals('hello world')))) + + # get returns nodes + self.assertListEqual(list(graph.sorted(ns.bsfs.Entity, ast.filter.Any(ns.bse.tag, ast.filter.Is(tags)))), ents) + self.assertListEqual(list(graph.sorted(ns.bsfs.Entity, ast.filter.Any(ns.bse.comment, ast.filter.StartsWith('foo')))), + [graph.node(ns.bsfs.Entity, URI('http://example.com/entity#1234'))]) + self.assertListEqual(list(graph.sorted(ns.bsfs.Node, ast.filter.Any(ns.bse.comment, ast.filter.StartsWith('foo')))), [ + graph.node(ns.bsfs.Node, URI('http://example.com/entity#1234')), + graph.node(ns.bsfs.Node, URI('http://example.com/tag#1234')), + ]) + self.assertListEqual(list(graph.sorted(ns.bsfs.Entity, ast.filter.Or( + ast.filter.Any(ns.bse.comment, ast.filter.EndsWith('bar')), + ast.filter.Any(ns.bse.tag, ast.filter.All(ns.bse.comment, ast.filter.Equals('bar')))))), + ents) + + # query can be None + self.assertListEqual(list(graph.sorted(ns.bsfs.Entity, None)), ents) + + + def test_all(self): + graph = Graph(self.backend, self.ac) + # resulting nodes can be empty + self.assertEqual(graph.all(ns.bsfs.Entity), + Nodes(self.backend, self.ac, graph.schema.node(ns.bsfs.Entity), set())) + # resulting nodes contains all nodes of the respective type + guids = {URI('http://example.com/me/entity#1234'), URI('http://example.com/me/entity#4321')} + self.backend.create(graph.schema.node(ns.bsfs.Entity), guids) + self.assertEqual(graph.all(ns.bsfs.Entity), + Nodes(self.backend, self.ac, graph.schema.node(ns.bsfs.Entity), guids)) + # node_type must be in the schema + self.assertRaises(KeyError, graph.all, ns.bsfs.Invalid) diff --git a/test/graph/test_resolve.py b/test/graph/test_resolve.py index 0918b02..b4d76c7 100644 --- a/test/graph/test_resolve.py +++ b/test/graph/test_resolve.py @@ -79,6 +79,9 @@ class TestFilter(unittest.TestCase): {'http://example.com/you/invalid#1234', 'http://example.com/you/invalid#4321'}) resolver = Filter(schema) + # query can be None + self.assertIsNone(resolver(schema.node(ns.bsfs.Entity), None)) + # immediate Is self.assertEqual(resolver(schema.node(ns.bsfs.Entity), ast.filter.Is(ents)), -- cgit v1.2.3 From cd27775b406482b11f44575ab196501a30d9b075 Mon Sep 17 00:00:00 2001 From: Matthias Baumgartner Date: Thu, 2 Mar 2023 12:23:49 +0100 Subject: default sort order in sparql backend --- bsfs/triple_store/sparql/utils.py | 3 ++- test/triple_store/sparql/test_utils.py | 6 +++--- 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/bsfs/triple_store/sparql/utils.py b/bsfs/triple_store/sparql/utils.py index deca4d8..51de893 100644 --- a/bsfs/triple_store/sparql/utils.py +++ b/bsfs/triple_store/sparql/utils.py @@ -127,11 +127,12 @@ class Query(): """Return an executable sparql query.""" select = ' '.join(f'({head} as ?{name})' for head, name in self.select) return f''' - SELECT {self.root_head} {select} + SELECT DISTINCT {self.root_head} {select} WHERE {{ {self.root_head} <{ns.rdf.type}>/<{ns.rdfs.subClassOf}>* <{self.root_type}> . {self.where} }} + ORDER BY str({self.root_head}) ''' def __call__(self, graph: rdflib.Graph) -> rdflib.query.Result: diff --git a/test/triple_store/sparql/test_utils.py b/test/triple_store/sparql/test_utils.py index 073b8f8..edcf6d7 100644 --- a/test/triple_store/sparql/test_utils.py +++ b/test/triple_store/sparql/test_utils.py @@ -118,13 +118,13 @@ class TestQuery(unittest.TestCase): return value # query composes a valid query q = Query(self.root_type, self.root_head, self.select, self.where) - self.assertEqual(normalize(q.query), normalize(f'select ?root (?head as ?name) where {{ ?root <{ns.rdf.type}>/<{ns.rdfs.subClassOf}>* . ?root <{ns.bse.tag}> ?head }}')) + self.assertEqual(normalize(q.query), normalize(f'select distinct ?root (?head as ?name) where {{ ?root <{ns.rdf.type}>/<{ns.rdfs.subClassOf}>* . ?root <{ns.bse.tag}> ?head }} order by str(?root)')) # select and where are optional q = Query(self.root_type, self.root_head) - self.assertEqual(normalize(q.query), normalize(f'select ?root where {{ ?root <{ns.rdf.type}>/<{ns.rdfs.subClassOf}>* . }}')) + self.assertEqual(normalize(q.query), normalize(f'select distinct ?root where {{ ?root <{ns.rdf.type}>/<{ns.rdfs.subClassOf}>* . }} order by str(?root)')) # select and where need not to correspond q = Query(self.root_type, self.root_head, (('?head', 'name'), )) - self.assertEqual(normalize(q.query), normalize(f'select ?root (?head as ?name) where {{ ?root <{ns.rdf.type}>/<{ns.rdfs.subClassOf}>* . }}')) + self.assertEqual(normalize(q.query), normalize(f'select distinct ?root (?head as ?name) where {{ ?root <{ns.rdf.type}>/<{ns.rdfs.subClassOf}>* . }} order by str(?root)')) # query is used for string representation self.assertEqual(str(q), q.query) -- cgit v1.2.3 From 36d07cc6e0ec0f53001bfc5045437a374ebb895f Mon Sep 17 00:00:00 2001 From: Matthias Baumgartner Date: Thu, 2 Mar 2023 14:55:57 +0100 Subject: empty fetch result in Nodes --- bsfs/graph/nodes.py | 58 ++++++++++++++++++++++++-------------------- test/graph/ac/test_null.py | 9 +++++++ test/graph/test_nodes.py | 60 ++++++++++++++++++++++++++++++++++++++++++---- 3 files changed, 97 insertions(+), 30 deletions(-) diff --git a/bsfs/graph/nodes.py b/bsfs/graph/nodes.py index 91cbb5d..c6bd5d8 100644 --- a/bsfs/graph/nodes.py +++ b/bsfs/graph/nodes.py @@ -249,32 +249,38 @@ class Nodes(): # add access controls to fetch fetch = self._ac.fetch_read(self.node_type, fetch) - # compose filter ast - filter = ast.filter.IsIn(self.guids) # pylint: disable=redefined-builtin - # add access controls to filter - filter = self._ac.filter_read(self.node_type, filter) - - # validate queries - validate.Filter(self._backend.schema)(self.node_type, filter) - validate.Fetch(self._backend.schema)(self.node_type, fetch) - - # process results, convert if need be - def triple_iter(): - # query the backend - triples = self._backend.fetch(self.node_type, filter, fetch) - # process triples - for root, name, raw in triples: - # get node - node = Nodes(self._backend, self._ac, self.node_type, {root}) - # get path - path, tail = name2path[name] - # covert raw to value - if isinstance(tail.range, bsc.Node): - value = Nodes(self._backend, self._ac, tail.range, {raw}) - else: - value = raw - # emit triple - yield node, path, value + if len(self._guids) == 0: + # shortcut: no need to query; no triples + # FIXME: if the Fetch query can given by the user, we might want to check its validity + def triple_iter(): + return [] + else: + # compose filter ast + filter = ast.filter.IsIn(self.guids) # pylint: disable=redefined-builtin + # add access controls to filter + filter = self._ac.filter_read(self.node_type, filter) # type: ignore [assignment] + + # validate queries + validate.Filter(self._backend.schema)(self.node_type, filter) + validate.Fetch(self._backend.schema)(self.node_type, fetch) + + # process results, convert if need be + def triple_iter(): + # query the backend + triples = self._backend.fetch(self.node_type, filter, fetch) + # process triples + for root, name, raw in triples: + # get node + node = Nodes(self._backend, self._ac, self.node_type, {root}) + # get path + path, tail = name2path[name] + # covert raw to value + if isinstance(tail.range, bsc.Node): + value = Nodes(self._backend, self._ac, tail.range, {raw}) + else: + value = raw + # emit triple + yield node, path, value # simplify by default view_kwargs['node'] = view_kwargs.get('node', len(self._guids) != 1) diff --git a/test/graph/ac/test_null.py b/test/graph/ac/test_null.py index 544a01e..6053f81 100644 --- a/test/graph/ac/test_null.py +++ b/test/graph/ac/test_null.py @@ -136,6 +136,15 @@ class TestNullAC(unittest.TestCase): # query can be none self.assertIsNone(ac.filter_read(self.ent_type, None)) + def test_fetch_read(self): + query = ast.fetch.All( + ast.fetch.Fetch(ns.bse.tag, ast.fetch.Value(ns.bse.label, 'tag_label')), + ast.fetch.Node(ns.bse.tag, 'tag_node'), + ast.fetch.Value(ns.bse.iso, 'iso')) + ac = NullAC(self.backend, self.user) + # NullAC returns query + self.assertEqual(query, ac.fetch_read(self.ent_type, query)) + ## main ## diff --git a/test/graph/test_nodes.py b/test/graph/test_nodes.py index 9541656..bf73e6e 100644 --- a/test/graph/test_nodes.py +++ b/test/graph/test_nodes.py @@ -346,6 +346,10 @@ class TestNodes(unittest.TestCase): Nodes(self.backend, self.ac, self.user_type, {URI('http://example.com/me/user#1234'), URI('http://example.com/me/user#4321')})) self.assertSetEqual(curr, set(self.backend._graph)) + # can set on empty nodes + nodes = Nodes(self.backend, self.ac, self.ent_type, {}) + self.assertEqual(nodes, nodes.set(self.p_filesize.uri, 1234)) + def test_set_from_iterable(self): self.assertSetEqual(set(self.backend._graph), self.schema_triples | set()) @@ -388,6 +392,11 @@ class TestNodes(unittest.TestCase): (self.p_author.uri, Nodes(self.backend, self.ac, self.user_type, {URI('http://example.com/me/user#1234'), URI('http://example.com/me/user#4321')})))) self.assertSetEqual(curr, set(self.backend._graph)) + # can set on empty nodes + nodes = Nodes(self.backend, self.ac, self.ent_type, {}) + self.assertEqual(nodes, nodes.set_from_iterable([(self.p_filesize.uri, 1234)])) + + def test_get(self): # setup: add some instances Nodes(self.backend, self.ac, self.ent_type, {'http://example.com/me/entity#1234'}) \ @@ -403,6 +412,7 @@ class TestNodes(unittest.TestCase): .set(bst.label, 'tag_label_4321') # setup: get nodes instance nodes = Nodes(self.backend, self.ac, self.ent_type, self.ent_ids) + # must pass at least one path self.assertRaises(AttributeError, nodes.get) # view must be list or dict @@ -467,10 +477,52 @@ class TestNodes(unittest.TestCase): Nodes(self.backend, self.ac, self.ent_type, {'http://example.com/me/entity#1234'}): {'hello world'}, }) - # FIXME: What if I call `get` with a single predicate and a single node, but - # that node has no value for that predicate? - # so, essentially, what if triples is empty? -> Also check in test_result! - raise NotImplementedError() + # results can be empty + nodes = Nodes(self.backend, self.ac, self.ent_type, {'http://example.com/me/entity#4321'}) # has filesize, tag but no comment + # unique paths return the default value + self.assertIsNone(nodes.get(ns.bse.author)) + self.assertEqual(nodes.get(ns.bse.author, default=1234), 1234) + # non-unique paths return an empty set + self.assertSetEqual(nodes.get(ns.bse.comment), set()) + + # nodes can have no guids + nodes = Nodes(self.backend, self.ac, self.ent_type, set()) + # empty nodes does not excuse an invalid request + self.assertRaises(TypeError, nodes.get, 1234) + self.assertRaises(errors.ConsistencyError, nodes.get, ns.bse.invalid) + # list view always returns an empty list + self.assertListEqual(list(nodes.get(ns.bse.comment, view=list)), []) + self.assertListEqual(list(nodes.get(ns.bse.comment, view=list, node=True)), []) + self.assertListEqual(list(nodes.get(ns.bse.comment, view=list, path=True)), []) + self.assertListEqual(list(nodes.get(ns.bse.comment, view=list, node=True, path=True, value=True)), []) + self.assertListEqual(list(nodes.get(ns.bse.comment, view=list, node=False)), []) + self.assertListEqual(list(nodes.get(ns.bse.comment, view=list, path=False)), []) + self.assertListEqual(list(nodes.get(ns.bse.comment, view=list, node=False, path=False, value=False)), []) + self.assertListEqual(list(nodes.get(ns.bse.comment, ns.bse.filesize, view=list)), []) + self.assertListEqual(list(nodes.get(ns.bse.comment, ns.bse.filesize, view=list, node=True)), []) + self.assertListEqual(list(nodes.get(ns.bse.comment, ns.bse.filesize, view=list, path=True)), []) + self.assertListEqual(list(nodes.get(ns.bse.comment, ns.bse.filesize, view=list, node=True, path=True, value=True)), []) + self.assertListEqual(list(nodes.get(ns.bse.comment, ns.bse.filesize, view=list, node=False)), []) + self.assertListEqual(list(nodes.get(ns.bse.comment, ns.bse.filesize, view=list, path=False)), []) + self.assertListEqual(list(nodes.get(ns.bse.comment, ns.bse.filesize, view=list, node=False, path=False, value=False)), []) + # dict view returns an empty dict or an empty set + self.assertDictEqual(nodes.get(ns.bse.comment, view=dict), {}) + self.assertDictEqual(nodes.get(ns.bse.comment, view=dict, node=True), {}) + self.assertDictEqual(nodes.get(ns.bse.comment, view=dict, path=True), {}) + self.assertDictEqual(nodes.get(ns.bse.comment, view=dict, node=True, path=True, value=True, default=None), {}) + self.assertSetEqual(nodes.get(ns.bse.comment, view=dict, node=False), set()) + self.assertDictEqual(nodes.get(ns.bse.comment, view=dict, path=False), {}) + self.assertSetEqual(nodes.get(ns.bse.comment, view=dict, node=False, path=False), set()) + self.assertSetEqual(nodes.get(ns.bse.comment, view=dict, node=False, path=False, value=False, default=None), set()) + self.assertDictEqual(nodes.get(ns.bse.comment, ns.bse.filesize, view=dict), {}) + self.assertDictEqual(nodes.get(ns.bse.comment, ns.bse.filesize, view=dict, node=True), {}) + self.assertDictEqual(nodes.get(ns.bse.comment, ns.bse.filesize, view=dict, path=True), {}) + self.assertDictEqual(nodes.get(ns.bse.comment, ns.bse.filesize, view=dict, node=True, path=True, value=True, default=None), {}) + self.assertDictEqual(nodes.get(ns.bse.comment, ns.bse.filesize, view=dict, node=False), {}) + self.assertDictEqual(nodes.get(ns.bse.comment, ns.bse.filesize, view=dict, path=False), {}) + self.assertSetEqual(nodes.get(ns.bse.comment, ns.bse.filesize, view=dict, node=False, path=False), set()) + self.assertSetEqual(nodes.get(ns.bse.comment, ns.bse.filesize, view=dict, node=False, path=False, value=False, default=None), set()) + def test_getattr(self): nodes = Nodes(self.backend, self.ac, self.ent_type, {'http://example.com/me/entity#1234'}) -- cgit v1.2.3 From 48fd909f502d25cbe7ef7732c44734f593c6e022 Mon Sep 17 00:00:00 2001 From: Matthias Baumgartner Date: Thu, 2 Mar 2023 14:57:49 +0100 Subject: README, CHANGELOG, and minor style fixes --- .pylintrc | 13 +++++++++++ CHANGELOG.md | 28 ++++++++++++++++++++++ README.md | 64 ++++++++++++++++++++++++--------------------------- bsfs/apps/__init__.py | 2 +- 4 files changed, 72 insertions(+), 35 deletions(-) diff --git a/.pylintrc b/.pylintrc index 6b7f471..418a728 100644 --- a/.pylintrc +++ b/.pylintrc @@ -144,6 +144,19 @@ allow-wildcard-with-all=no logging-format-style=old +[MESSAGES CONTROL] + +# disable similarities check +disable=raw-checker-failed, + bad-inline-option, + locally-disabled, + file-ignored, + suppressed-message, + useless-suppression, + deprecated-pragma, + use-symbolic-message-instead, + duplicate-code + [MISCELLANEOUS] diff --git a/CHANGELOG.md b/CHANGELOG.md index e69de29..fb66c1e 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -0,0 +1,28 @@ + +# Changelog + +## 0.23.03 (Initial release) + +### Added + +- File graph protocol + - Graph access and navigation + - Syntactic sugar + - Fetch result shortcuts +- Filter and Fetch Queries + - Syntax trees + - Validation + - Matching +- Infrastructure to Open a storage +- Storage schema +- Backend + - Basic interface + - Sparql triple store: Manage triples via rdflib and sparql. +- Access controls + - Basic interface + - NullAC: A dummy access control mechanism. +- Essential utilities + - URI + - uuid + - namespaces + diff --git a/README.md b/README.md index da066f6..1956752 100644 --- a/README.md +++ b/README.md @@ -1,57 +1,53 @@ -The Black Star File System -========================== +# The Black Star File System +The Black Star File System (BSFS) is a semantic file system, meaning that it organizes files +by association, and can record files, their metadata and content in a structured manner. -### Developer tools setup -#### Test coverage (coverage) +## Installation -Resources: -* https://coverage.readthedocs.io/en/6.5.0/index.html -* https://nedbatchelder.com/blog/200710/flaws_in_coverage_measurement.html +You can install BSFS via pip: -Commands: -$ pip install coverage -$ coverage run ; coverage html ; xdg-open .htmlcov/index.html + $ pip install --extra-index-url https://pip.bsfs.io bsfs +## Development -#### Static code analysis (pylint) +Set up a virtual environment: -Resources: -* https://github.com/PyCQA/pylint -* https://pylint.org/ -* https://pylint.pycqa.org/en/latest/user_guide/messages/messages_overview.html#messages-overview + $ virtualenv env + $ source env/bin/activate -Commands: -$ pip install pylint -$ pylint bsfs +Install bsfs as editable from the git repository: + $ git clone https://git.bsfs.io/bsfs.git + $ cd bsfs + $ pip install -e . +Install the following additional packages besides bsfs: -#### Type analysis (mypy) + $ pip install coverage mypy pylint + $ pip install sphinx sphinx-copybutton furo + $ pip install build -Resources: -* https://github.com/python/mypy -* https://mypy.readthedocs.io/en/stable/ +To ensure code style discipline, run the following commands: -Commands: -$ pip install mypy -$ mypy + $ coverage run ; coverage html ; xdg-open .htmlcov/index.html + $ pylint bsfs + $ mypy +To build the package, do: + $ python -m build -#### Documentation (sphinx) - -Resources: -* -* - -Commands: -$ pip install ... -$ +To run only the tests (without coverage), run the following command from the **test folder**: + $ python -m unittest +To build the documentation, run the following commands from the **doc folder**: + $ sphinx-apidoc -f -o source/api ../bsfs/ --module-first -d 1 --separate + $ make html + $ xdg-open build/html/index.html diff --git a/bsfs/apps/__init__.py b/bsfs/apps/__init__.py index 3dec9ad..a85d5db 100644 --- a/bsfs/apps/__init__.py +++ b/bsfs/apps/__init__.py @@ -37,7 +37,7 @@ def main(argv=None): parser = argparse.ArgumentParser(description=main.__doc__, prog='bsfs') # version parser.add_argument('--version', action='version', - version='%(prog)s version {}.{}.{}'.format(*bsfs.version_info)) + version='%(prog)s version {}.{}.{}'.format(*bsfs.version_info)) # pylint: disable=C0209 # application selection parser.add_argument('app', choices=apps.keys(), help='Select the application to run.') -- cgit v1.2.3 From 2e07f33314c238e42bfadc5f39805f93ffbc622e Mon Sep 17 00:00:00 2001 From: Matthias Baumgartner Date: Thu, 2 Mar 2023 15:10:05 +0100 Subject: removed author and license notices from individual files --- bsfs/__init__.py | 5 ----- bsfs/apps/__init__.py | 5 ----- bsfs/apps/init.py | 5 ----- bsfs/apps/migrate.py | 5 ----- bsfs/front/__init__.py | 5 ----- bsfs/front/bsfs.py | 5 ----- bsfs/front/builder.py | 5 ----- bsfs/graph/__init__.py | 5 ----- bsfs/graph/ac/__init__.py | 5 ----- bsfs/graph/ac/base.py | 5 ----- bsfs/graph/ac/null.py | 5 ----- bsfs/graph/graph.py | 5 ----- bsfs/graph/nodes.py | 5 ----- bsfs/graph/resolve.py | 5 ----- bsfs/graph/result.py | 5 ----- bsfs/graph/walk.py | 5 ----- bsfs/namespace/__init__.py | 5 ----- bsfs/namespace/namespace.py | 5 ----- bsfs/namespace/predefined.py | 5 ----- bsfs/query/__init__.py | 5 ----- bsfs/query/ast/__init__.py | 3 --- bsfs/query/ast/fetch.py | 5 ----- bsfs/query/ast/filter_.py | 3 --- bsfs/query/matcher.py | 5 ----- bsfs/query/validator.py | 5 ----- bsfs/schema/__init__.py | 5 ----- bsfs/schema/schema.py | 5 ----- bsfs/schema/serialize.py | 5 ----- bsfs/schema/types.py | 5 ----- bsfs/triple_store/__init__.py | 5 ----- bsfs/triple_store/base.py | 5 ----- bsfs/triple_store/sparql/__init__.py | 5 ----- bsfs/triple_store/sparql/distance.py | 5 ----- bsfs/triple_store/sparql/parse_fetch.py | 5 ----- bsfs/triple_store/sparql/parse_filter.py | 5 ----- bsfs/triple_store/sparql/sparql.py | 5 ----- bsfs/triple_store/sparql/utils.py | 5 ----- bsfs/utils/__init__.py | 5 ----- bsfs/utils/commons.py | 5 ----- bsfs/utils/errors.py | 5 ----- bsfs/utils/uri.py | 5 ----- bsfs/utils/uuid.py | 5 ----- test/apps/test_init.py | 5 ----- test/apps/test_main.py | 5 ----- test/apps/test_migrate.py | 5 ----- test/front/test_bsfs.py | 5 ----- test/front/test_builder.py | 5 ----- test/graph/ac/test_base.py | 5 ----- test/graph/ac/test_null.py | 5 ----- test/graph/test_graph.py | 5 ----- test/graph/test_nodes.py | 5 ----- test/graph/test_resolve.py | 5 ----- test/graph/test_result.py | 5 ----- test/graph/test_walk.py | 5 ----- test/namespace/test_namespace.py | 5 ----- test/query/ast_test/test_fetch.py | 5 ----- test/query/ast_test/test_filter_.py | 5 ----- test/query/test_matcher.py | 5 ----- test/query/test_validator.py | 5 ----- test/schema/test_schema.py | 5 ----- test/schema/test_serialize.py | 5 ----- test/schema/test_types.py | 5 ----- test/triple_store/sparql/test_distance.py | 5 ----- test/triple_store/sparql/test_parse_fetch.py | 5 ----- test/triple_store/sparql/test_parse_filter.py | 5 ----- test/triple_store/sparql/test_sparql.py | 5 ----- test/triple_store/sparql/test_utils.py | 5 ----- test/triple_store/test_base.py | 5 ----- test/utils/test_commons.py | 5 ----- test/utils/test_uri.py | 5 ----- test/utils/test_uuid.py | 5 ----- 71 files changed, 351 deletions(-) diff --git a/bsfs/__init__.py b/bsfs/__init__.py index 079ffaf..cf08d64 100644 --- a/bsfs/__init__.py +++ b/bsfs/__init__.py @@ -1,9 +1,4 @@ -""" -Part of the BlackStar filesystem (bsfs) module. -A copy of the license is provided with the project. -Author: Matthias Baumgartner, 2022 -""" # imports import collections import typing diff --git a/bsfs/apps/__init__.py b/bsfs/apps/__init__.py index a85d5db..62dc5b5 100644 --- a/bsfs/apps/__init__.py +++ b/bsfs/apps/__init__.py @@ -1,10 +1,5 @@ #!/usr/bin/env python3 -""" -Part of the BlackStar filesystem (bsfs) module. -A copy of the license is provided with the project. -Author: Matthias Baumgartner, 2022 -""" # imports import argparse import typing diff --git a/bsfs/apps/init.py b/bsfs/apps/init.py index ec48525..9afbdd5 100644 --- a/bsfs/apps/init.py +++ b/bsfs/apps/init.py @@ -1,10 +1,5 @@ #!/usr/bin/env python3 -""" -Part of the BlackStar filesystem (bsfs) module. -A copy of the license is provided with the project. -Author: Matthias Baumgartner, 2022 -""" # imports import argparse import json diff --git a/bsfs/apps/migrate.py b/bsfs/apps/migrate.py index cb62542..34ea2e7 100644 --- a/bsfs/apps/migrate.py +++ b/bsfs/apps/migrate.py @@ -1,10 +1,5 @@ #!/usr/bin/env python3 -""" -Part of the BlackStar filesystem (bsfs) module. -A copy of the license is provided with the project. -Author: Matthias Baumgartner, 2022 -""" # imports import argparse import json diff --git a/bsfs/front/__init__.py b/bsfs/front/__init__.py index 92886ab..cedcd7f 100644 --- a/bsfs/front/__init__.py +++ b/bsfs/front/__init__.py @@ -1,9 +1,4 @@ -""" -Part of the BlackStar filesystem (bsfs) module. -A copy of the license is provided with the project. -Author: Matthias Baumgartner, 2022 -""" # imports import typing diff --git a/bsfs/front/bsfs.py b/bsfs/front/bsfs.py index 968b3f5..f437212 100644 --- a/bsfs/front/bsfs.py +++ b/bsfs/front/bsfs.py @@ -1,9 +1,4 @@ -""" -Part of the BlackStar filesystem (bsfs) module. -A copy of the license is provided with the project. -Author: Matthias Baumgartner, 2022 -""" # imports import typing diff --git a/bsfs/front/builder.py b/bsfs/front/builder.py index ecdc768..b1d488b 100644 --- a/bsfs/front/builder.py +++ b/bsfs/front/builder.py @@ -1,9 +1,4 @@ -""" -Part of the BlackStar filesystem (bsfs) module. -A copy of the license is provided with the project. -Author: Matthias Baumgartner, 2022 -""" # imports import typing diff --git a/bsfs/graph/__init__.py b/bsfs/graph/__init__.py index 82d2235..8d38d23 100644 --- a/bsfs/graph/__init__.py +++ b/bsfs/graph/__init__.py @@ -1,9 +1,4 @@ -""" -Part of the BlackStar filesystem (bsfs) module. -A copy of the license is provided with the project. -Author: Matthias Baumgartner, 2022 -""" # imports import typing diff --git a/bsfs/graph/ac/__init__.py b/bsfs/graph/ac/__init__.py index 420de01..11b45df 100644 --- a/bsfs/graph/ac/__init__.py +++ b/bsfs/graph/ac/__init__.py @@ -1,9 +1,4 @@ -""" -Part of the BlackStar filesystem (bsfs) module. -A copy of the license is provided with the project. -Author: Matthias Baumgartner, 2022 -""" # imports import typing diff --git a/bsfs/graph/ac/base.py b/bsfs/graph/ac/base.py index 2759557..e85c1dd 100644 --- a/bsfs/graph/ac/base.py +++ b/bsfs/graph/ac/base.py @@ -1,9 +1,4 @@ -""" -Part of the BlackStar filesystem (bsfs) module. -A copy of the license is provided with the project. -Author: Matthias Baumgartner, 2022 -""" # imports import abc import typing diff --git a/bsfs/graph/ac/null.py b/bsfs/graph/ac/null.py index e67b55d..3a391aa 100644 --- a/bsfs/graph/ac/null.py +++ b/bsfs/graph/ac/null.py @@ -1,9 +1,4 @@ -""" -Part of the BlackStar filesystem (bsfs) module. -A copy of the license is provided with the project. -Author: Matthias Baumgartner, 2022 -""" # imports import typing diff --git a/bsfs/graph/graph.py b/bsfs/graph/graph.py index 11fe835..ade51a5 100644 --- a/bsfs/graph/graph.py +++ b/bsfs/graph/graph.py @@ -1,9 +1,4 @@ -""" -Part of the BlackStar filesystem (bsfs) module. -A copy of the license is provided with the project. -Author: Matthias Baumgartner, 2022 -""" # imports import os import typing diff --git a/bsfs/graph/nodes.py b/bsfs/graph/nodes.py index c6bd5d8..c3530c1 100644 --- a/bsfs/graph/nodes.py +++ b/bsfs/graph/nodes.py @@ -1,9 +1,4 @@ -""" -Part of the BlackStar filesystem (bsfs) module. -A copy of the license is provided with the project. -Author: Matthias Baumgartner, 2022 -""" # imports from collections import abc import time diff --git a/bsfs/graph/resolve.py b/bsfs/graph/resolve.py index b3ab001..213ac4c 100644 --- a/bsfs/graph/resolve.py +++ b/bsfs/graph/resolve.py @@ -1,9 +1,4 @@ -""" -Part of the BlackStar filesystem (bsfs) module. -A copy of the license is provided with the project. -Author: Matthias Baumgartner, 2022 -""" # imports import typing diff --git a/bsfs/graph/result.py b/bsfs/graph/result.py index 31822f1..0fcbb13 100644 --- a/bsfs/graph/result.py +++ b/bsfs/graph/result.py @@ -1,9 +1,4 @@ -""" -Part of the BlackStar filesystem (bsfs) module. -A copy of the license is provided with the project. -Author: Matthias Baumgartner, 2022 -""" # imports from collections import defaultdict import typing diff --git a/bsfs/graph/walk.py b/bsfs/graph/walk.py index 1b1cfa0..6415c9b 100644 --- a/bsfs/graph/walk.py +++ b/bsfs/graph/walk.py @@ -1,9 +1,4 @@ -""" -Part of the BlackStar filesystem (bsfs) module. -A copy of the license is provided with the project. -Author: Matthias Baumgartner, 2022 -""" # imports from collections import abc import typing diff --git a/bsfs/namespace/__init__.py b/bsfs/namespace/__init__.py index 98d472f..1784808 100644 --- a/bsfs/namespace/__init__.py +++ b/bsfs/namespace/__init__.py @@ -1,9 +1,4 @@ -""" -Part of the BlackStar filesystem (bsfs) module. -A copy of the license is provided with the project. -Author: Matthias Baumgartner, 2022 -""" # imports import typing diff --git a/bsfs/namespace/namespace.py b/bsfs/namespace/namespace.py index 1d443c1..0a62b78 100644 --- a/bsfs/namespace/namespace.py +++ b/bsfs/namespace/namespace.py @@ -1,9 +1,4 @@ -""" -Part of the BlackStar filesystem (bsfs) module. -A copy of the license is provided with the project. -Author: Matthias Baumgartner, 2022 -""" # imports import typing diff --git a/bsfs/namespace/predefined.py b/bsfs/namespace/predefined.py index cd48a46..15f12ac 100644 --- a/bsfs/namespace/predefined.py +++ b/bsfs/namespace/predefined.py @@ -1,9 +1,4 @@ -""" -Part of the BlackStar filesystem (bsfs) module. -A copy of the license is provided with the project. -Author: Matthias Baumgartner, 2022 -""" # imports import typing diff --git a/bsfs/query/__init__.py b/bsfs/query/__init__.py index 21c7389..58ff03a 100644 --- a/bsfs/query/__init__.py +++ b/bsfs/query/__init__.py @@ -1,9 +1,4 @@ -""" -Part of the BlackStar filesystem (bsfs) module. -A copy of the license is provided with the project. -Author: Matthias Baumgartner, 2022 -""" # imports import typing diff --git a/bsfs/query/ast/__init__.py b/bsfs/query/ast/__init__.py index 66b097d..bceaac0 100644 --- a/bsfs/query/ast/__init__.py +++ b/bsfs/query/ast/__init__.py @@ -6,9 +6,6 @@ Classes beginning with an underscore (_) represent internal type hierarchies and should not be used for parsing. Note that the AST structures do not (and cannot) check semantic validity or consistency with a given schema. -Part of the BlackStar filesystem (bsfs) module. -A copy of the license is provided with the project. -Author: Matthias Baumgartner, 2022 """ # imports import typing diff --git a/bsfs/query/ast/fetch.py b/bsfs/query/ast/fetch.py index d653a8a..66d94e1 100644 --- a/bsfs/query/ast/fetch.py +++ b/bsfs/query/ast/fetch.py @@ -1,9 +1,4 @@ -""" -Part of the BlackStar filesystem (bsfs) module. -A copy of the license is provided with the project. -Author: Matthias Baumgartner, 2022 -""" # imports from collections import abc import typing diff --git a/bsfs/query/ast/filter_.py b/bsfs/query/ast/filter_.py index b29d89e..56c982e 100644 --- a/bsfs/query/ast/filter_.py +++ b/bsfs/query/ast/filter_.py @@ -22,9 +22,6 @@ This AST has multiple issues that are not verified upon its creation: * Conditions exclude each other * The predicate along the branch have incompatible domains and ranges. -Part of the BlackStar filesystem (bsfs) module. -A copy of the license is provided with the project. -Author: Matthias Baumgartner, 2022 """ # imports from collections import abc diff --git a/bsfs/query/matcher.py b/bsfs/query/matcher.py index a910756..5f3b07e 100644 --- a/bsfs/query/matcher.py +++ b/bsfs/query/matcher.py @@ -1,9 +1,4 @@ -""" -Part of the BlackStar filesystem (bsfs) module. -A copy of the license is provided with the project. -Author: Matthias Baumgartner, 2022 -""" # imports from collections import defaultdict from itertools import product diff --git a/bsfs/query/validator.py b/bsfs/query/validator.py index f0aa795..6e3afa1 100644 --- a/bsfs/query/validator.py +++ b/bsfs/query/validator.py @@ -1,9 +1,4 @@ -""" -Part of the BlackStar filesystem (bsfs) module. -A copy of the license is provided with the project. -Author: Matthias Baumgartner, 2022 -""" # imports import typing diff --git a/bsfs/schema/__init__.py b/bsfs/schema/__init__.py index f53512e..ca2e0cd 100644 --- a/bsfs/schema/__init__.py +++ b/bsfs/schema/__init__.py @@ -1,9 +1,4 @@ -""" -Part of the BlackStar filesystem (bsfs) module. -A copy of the license is provided with the project. -Author: Matthias Baumgartner, 2022 -""" # imports import typing diff --git a/bsfs/schema/schema.py b/bsfs/schema/schema.py index 0de4203..c104436 100644 --- a/bsfs/schema/schema.py +++ b/bsfs/schema/schema.py @@ -1,9 +1,4 @@ -""" -Part of the BlackStar filesystem (bsfs) module. -A copy of the license is provided with the project. -Author: Matthias Baumgartner, 2022 -""" # imports from collections import abc, namedtuple import typing diff --git a/bsfs/schema/serialize.py b/bsfs/schema/serialize.py index acc009a..b05b289 100644 --- a/bsfs/schema/serialize.py +++ b/bsfs/schema/serialize.py @@ -1,9 +1,4 @@ -""" -Part of the BlackStar filesystem (bsfs) module. -A copy of the license is provided with the project. -Author: Matthias Baumgartner, 2022 -""" # standard imports import itertools import typing diff --git a/bsfs/schema/types.py b/bsfs/schema/types.py index 12e7e94..54adffb 100644 --- a/bsfs/schema/types.py +++ b/bsfs/schema/types.py @@ -1,9 +1,4 @@ -""" -Part of the BlackStar filesystem (bsfs) module. -A copy of the license is provided with the project. -Author: Matthias Baumgartner, 2022 -""" # imports import typing diff --git a/bsfs/triple_store/__init__.py b/bsfs/triple_store/__init__.py index fb5a8a9..79a2887 100644 --- a/bsfs/triple_store/__init__.py +++ b/bsfs/triple_store/__init__.py @@ -1,9 +1,4 @@ -""" -Part of the BlackStar filesystem (bsfs) module. -A copy of the license is provided with the project. -Author: Matthias Baumgartner, 2022 -""" # imports import typing diff --git a/bsfs/triple_store/base.py b/bsfs/triple_store/base.py index 1baa63b..58b5670 100644 --- a/bsfs/triple_store/base.py +++ b/bsfs/triple_store/base.py @@ -1,9 +1,4 @@ -""" -Part of the BlackStar filesystem (bsfs) module. -A copy of the license is provided with the project. -Author: Matthias Baumgartner, 2022 -""" # imports import abc import typing diff --git a/bsfs/triple_store/sparql/__init__.py b/bsfs/triple_store/sparql/__init__.py index 285334a..cfa2732 100644 --- a/bsfs/triple_store/sparql/__init__.py +++ b/bsfs/triple_store/sparql/__init__.py @@ -1,9 +1,4 @@ -""" -Part of the BlackStar filesystem (bsfs) module. -A copy of the license is provided with the project. -Author: Matthias Baumgartner, 2022 -""" # imports import typing diff --git a/bsfs/triple_store/sparql/distance.py b/bsfs/triple_store/sparql/distance.py index 2f5387a..9b58088 100644 --- a/bsfs/triple_store/sparql/distance.py +++ b/bsfs/triple_store/sparql/distance.py @@ -1,9 +1,4 @@ -""" -Part of the BlackStar filesystem (bsfs) module. -A copy of the license is provided with the project. -Author: Matthias Baumgartner, 2022 -""" # standard imports import typing diff --git a/bsfs/triple_store/sparql/parse_fetch.py b/bsfs/triple_store/sparql/parse_fetch.py index 20d4e74..fab8173 100644 --- a/bsfs/triple_store/sparql/parse_fetch.py +++ b/bsfs/triple_store/sparql/parse_fetch.py @@ -1,9 +1,4 @@ -""" -Part of the BlackStar filesystem (bsfs) module. -A copy of the license is provided with the project. -Author: Matthias Baumgartner, 2022 -""" # standard imports import typing diff --git a/bsfs/triple_store/sparql/parse_filter.py b/bsfs/triple_store/sparql/parse_filter.py index dca0aea..ff22de2 100644 --- a/bsfs/triple_store/sparql/parse_filter.py +++ b/bsfs/triple_store/sparql/parse_filter.py @@ -1,9 +1,4 @@ -""" -Part of the BlackStar filesystem (bsfs) module. -A copy of the license is provided with the project. -Author: Matthias Baumgartner, 2022 -""" # imports import operator import typing diff --git a/bsfs/triple_store/sparql/sparql.py b/bsfs/triple_store/sparql/sparql.py index dbf9d45..5890bcc 100644 --- a/bsfs/triple_store/sparql/sparql.py +++ b/bsfs/triple_store/sparql/sparql.py @@ -1,9 +1,4 @@ -""" -Part of the BlackStar filesystem (bsfs) module. -A copy of the license is provided with the project. -Author: Matthias Baumgartner, 2022 -""" # imports import base64 import itertools diff --git a/bsfs/triple_store/sparql/utils.py b/bsfs/triple_store/sparql/utils.py index 51de893..38062c2 100644 --- a/bsfs/triple_store/sparql/utils.py +++ b/bsfs/triple_store/sparql/utils.py @@ -1,9 +1,4 @@ -""" -Part of the BlackStar filesystem (bsfs) module. -A copy of the license is provided with the project. -Author: Matthias Baumgartner, 2022 -""" # standard imports import typing diff --git a/bsfs/utils/__init__.py b/bsfs/utils/__init__.py index 6737cef..d497645 100644 --- a/bsfs/utils/__init__.py +++ b/bsfs/utils/__init__.py @@ -1,9 +1,4 @@ -""" -Part of the BlackStar filesystem (bsfs) module. -A copy of the license is provided with the project. -Author: Matthias Baumgartner, 2022 -""" # imports import typing diff --git a/bsfs/utils/commons.py b/bsfs/utils/commons.py index e9f0b7f..a7092ae 100644 --- a/bsfs/utils/commons.py +++ b/bsfs/utils/commons.py @@ -1,9 +1,4 @@ -""" -Part of the BlackStar filesystem (bsfs) module. -A copy of the license is provided with the project. -Author: Matthias Baumgartner, 2022 -""" # imports from collections import abc import typing diff --git a/bsfs/utils/errors.py b/bsfs/utils/errors.py index 6ae6484..b82e6e2 100644 --- a/bsfs/utils/errors.py +++ b/bsfs/utils/errors.py @@ -1,9 +1,4 @@ -""" -Part of the BlackStar filesystem (bsfs) module. -A copy of the license is provided with the project. -Author: Matthias Baumgartner, 2022 -""" # imports import typing diff --git a/bsfs/utils/uri.py b/bsfs/utils/uri.py index 84854a4..0693017 100644 --- a/bsfs/utils/uri.py +++ b/bsfs/utils/uri.py @@ -1,9 +1,4 @@ -""" -Part of the BlackStar filesystem (bsfs) module. -A copy of the license is provided with the project. -Author: Matthias Baumgartner, 2022 -""" # imports import re import typing diff --git a/bsfs/utils/uuid.py b/bsfs/utils/uuid.py index 70e1656..ad7fc1c 100644 --- a/bsfs/utils/uuid.py +++ b/bsfs/utils/uuid.py @@ -1,9 +1,4 @@ -""" -Part of the BlackStar filesystem (bsfs) module. -A copy of the license is provided with the project. -Author: Matthias Baumgartner, 2022 -""" # imports from collections import abc import hashlib diff --git a/test/apps/test_init.py b/test/apps/test_init.py index bae6a68..59e10eb 100644 --- a/test/apps/test_init.py +++ b/test/apps/test_init.py @@ -1,9 +1,4 @@ -""" -Part of the bsfs test suite. -A copy of the license is provided with the project. -Author: Matthias Baumgartner, 2022 -""" # imports import contextlib import io diff --git a/test/apps/test_main.py b/test/apps/test_main.py index ae19b5e..d61372f 100644 --- a/test/apps/test_main.py +++ b/test/apps/test_main.py @@ -1,9 +1,4 @@ -""" -Part of the bsfs test suite. -A copy of the license is provided with the project. -Author: Matthias Baumgartner, 2022 -""" # imports import contextlib import io diff --git a/test/apps/test_migrate.py b/test/apps/test_migrate.py index 230c032..618cb37 100644 --- a/test/apps/test_migrate.py +++ b/test/apps/test_migrate.py @@ -1,9 +1,4 @@ -""" -Part of the bsfs test suite. -A copy of the license is provided with the project. -Author: Matthias Baumgartner, 2022 -""" # imports import contextlib import io diff --git a/test/front/test_bsfs.py b/test/front/test_bsfs.py index 4eb36c3..8905bf8 100644 --- a/test/front/test_bsfs.py +++ b/test/front/test_bsfs.py @@ -1,9 +1,4 @@ -""" -Part of the bsfs test suite. -A copy of the license is provided with the project. -Author: Matthias Baumgartner, 2022 -""" # imports import unittest diff --git a/test/front/test_builder.py b/test/front/test_builder.py index 0328a0a..875fa8a 100644 --- a/test/front/test_builder.py +++ b/test/front/test_builder.py @@ -1,9 +1,4 @@ -""" -Part of the bsfs test suite. -A copy of the license is provided with the project. -Author: Matthias Baumgartner, 2022 -""" # imports import unittest diff --git a/test/graph/ac/test_base.py b/test/graph/ac/test_base.py index ad24e3d..addecd4 100644 --- a/test/graph/ac/test_base.py +++ b/test/graph/ac/test_base.py @@ -1,9 +1,4 @@ -""" -Part of the bsfs test suite. -A copy of the license is provided with the project. -Author: Matthias Baumgartner, 2022 -""" # imports import unittest diff --git a/test/graph/ac/test_null.py b/test/graph/ac/test_null.py index 6053f81..b695e7e 100644 --- a/test/graph/ac/test_null.py +++ b/test/graph/ac/test_null.py @@ -1,9 +1,4 @@ -""" -Part of the bsfs test suite. -A copy of the license is provided with the project. -Author: Matthias Baumgartner, 2022 -""" # imports import unittest diff --git a/test/graph/test_graph.py b/test/graph/test_graph.py index 93f8db7..e6d5ae4 100644 --- a/test/graph/test_graph.py +++ b/test/graph/test_graph.py @@ -1,9 +1,4 @@ -""" -Part of the bsfs test suite. -A copy of the license is provided with the project. -Author: Matthias Baumgartner, 2022 -""" # imports from functools import reduce import operator diff --git a/test/graph/test_nodes.py b/test/graph/test_nodes.py index bf73e6e..083b2d8 100644 --- a/test/graph/test_nodes.py +++ b/test/graph/test_nodes.py @@ -1,9 +1,4 @@ -""" -Part of the bsfs test suite. -A copy of the license is provided with the project. -Author: Matthias Baumgartner, 2022 -""" # standard imports from functools import partial import operator diff --git a/test/graph/test_resolve.py b/test/graph/test_resolve.py index b4d76c7..0223c49 100644 --- a/test/graph/test_resolve.py +++ b/test/graph/test_resolve.py @@ -1,9 +1,4 @@ -""" -Part of the bsfs test suite. -A copy of the license is provided with the project. -Author: Matthias Baumgartner, 2022 -""" # imports import unittest diff --git a/test/graph/test_result.py b/test/graph/test_result.py index 749b8ad..099234a 100644 --- a/test/graph/test_result.py +++ b/test/graph/test_result.py @@ -1,9 +1,4 @@ -""" -Part of the bsfs test suite. -A copy of the license is provided with the project. -Author: Matthias Baumgartner, 2022 -""" # imports import unittest diff --git a/test/graph/test_walk.py b/test/graph/test_walk.py index e5c8981..346896b 100644 --- a/test/graph/test_walk.py +++ b/test/graph/test_walk.py @@ -1,9 +1,4 @@ -""" -Part of the bsfs test suite. -A copy of the license is provided with the project. -Author: Matthias Baumgartner, 2022 -""" # imports import unittest diff --git a/test/namespace/test_namespace.py b/test/namespace/test_namespace.py index 2536203..ec2f393 100644 --- a/test/namespace/test_namespace.py +++ b/test/namespace/test_namespace.py @@ -1,9 +1,4 @@ -""" -Part of the tagit test suite. -A copy of the license is provided with the project. -Author: Matthias Baumgartner, 2022 -""" # imports import operator import unittest diff --git a/test/query/ast_test/test_fetch.py b/test/query/ast_test/test_fetch.py index 0c48a1f..ccb680e 100644 --- a/test/query/ast_test/test_fetch.py +++ b/test/query/ast_test/test_fetch.py @@ -1,9 +1,4 @@ -""" -Part of the tagit test suite. -A copy of the license is provided with the project. -Author: Matthias Baumgartner, 2022 -""" # imports import unittest diff --git a/test/query/ast_test/test_filter_.py b/test/query/ast_test/test_filter_.py index 39b98f8..cdc530c 100644 --- a/test/query/ast_test/test_filter_.py +++ b/test/query/ast_test/test_filter_.py @@ -1,9 +1,4 @@ -""" -Part of the tagit test suite. -A copy of the license is provided with the project. -Author: Matthias Baumgartner, 2022 -""" # imports import unittest diff --git a/test/query/test_matcher.py b/test/query/test_matcher.py index e830cf8..6b975b2 100644 --- a/test/query/test_matcher.py +++ b/test/query/test_matcher.py @@ -1,9 +1,4 @@ -""" -Part of the tagit test suite. -A copy of the license is provided with the project. -Author: Matthias Baumgartner, 2022 -""" # imports import operator import unittest diff --git a/test/query/test_validator.py b/test/query/test_validator.py index fec3d23..ca93118 100644 --- a/test/query/test_validator.py +++ b/test/query/test_validator.py @@ -1,9 +1,4 @@ -""" -Part of the tagit test suite. -A copy of the license is provided with the project. -Author: Matthias Baumgartner, 2022 -""" # imports import unittest diff --git a/test/schema/test_schema.py b/test/schema/test_schema.py index 414e542..f9ddb68 100644 --- a/test/schema/test_schema.py +++ b/test/schema/test_schema.py @@ -1,9 +1,4 @@ -""" -Part of the tagit test suite. -A copy of the license is provided with the project. -Author: Matthias Baumgartner, 2022 -""" # imports import operator import unittest diff --git a/test/schema/test_serialize.py b/test/schema/test_serialize.py index fc6b20a..84512e9 100644 --- a/test/schema/test_serialize.py +++ b/test/schema/test_serialize.py @@ -1,9 +1,4 @@ -""" -Part of the tagit test suite. -A copy of the license is provided with the project. -Author: Matthias Baumgartner, 2022 -""" # imports import re import unittest diff --git a/test/schema/test_types.py b/test/schema/test_types.py index c5895d2..f87d857 100644 --- a/test/schema/test_types.py +++ b/test/schema/test_types.py @@ -1,9 +1,4 @@ -""" -Part of the tagit test suite. -A copy of the license is provided with the project. -Author: Matthias Baumgartner, 2022 -""" # imports import operator import unittest diff --git a/test/triple_store/sparql/test_distance.py b/test/triple_store/sparql/test_distance.py index 0659459..e95be5a 100644 --- a/test/triple_store/sparql/test_distance.py +++ b/test/triple_store/sparql/test_distance.py @@ -1,9 +1,4 @@ -""" -Part of the bsfs test suite. -A copy of the license is provided with the project. -Author: Matthias Baumgartner, 2022 -""" # imports import numpy as np import unittest diff --git a/test/triple_store/sparql/test_parse_fetch.py b/test/triple_store/sparql/test_parse_fetch.py index 0961789..9284608 100644 --- a/test/triple_store/sparql/test_parse_fetch.py +++ b/test/triple_store/sparql/test_parse_fetch.py @@ -1,9 +1,4 @@ -""" -Part of the bsfs test suite. -A copy of the license is provided with the project. -Author: Matthias Baumgartner, 2022 -""" # imports import rdflib import unittest diff --git a/test/triple_store/sparql/test_parse_filter.py b/test/triple_store/sparql/test_parse_filter.py index 6fa0cd3..8a9940e 100644 --- a/test/triple_store/sparql/test_parse_filter.py +++ b/test/triple_store/sparql/test_parse_filter.py @@ -1,9 +1,4 @@ -""" -Part of the bsfs test suite. -A copy of the license is provided with the project. -Author: Matthias Baumgartner, 2022 -""" # imports import rdflib import unittest diff --git a/test/triple_store/sparql/test_sparql.py b/test/triple_store/sparql/test_sparql.py index 30876f2..b1d99ac 100644 --- a/test/triple_store/sparql/test_sparql.py +++ b/test/triple_store/sparql/test_sparql.py @@ -1,9 +1,4 @@ -""" -Part of the bsfs test suite. -A copy of the license is provided with the project. -Author: Matthias Baumgartner, 2022 -""" # imports import rdflib import unittest diff --git a/test/triple_store/sparql/test_utils.py b/test/triple_store/sparql/test_utils.py index edcf6d7..8f894bb 100644 --- a/test/triple_store/sparql/test_utils.py +++ b/test/triple_store/sparql/test_utils.py @@ -1,9 +1,4 @@ -""" -Part of the bsfs test suite. -A copy of the license is provided with the project. -Author: Matthias Baumgartner, 2022 -""" # standard imports import operator import re diff --git a/test/triple_store/test_base.py b/test/triple_store/test_base.py index 56a2539..4c4a9b6 100644 --- a/test/triple_store/test_base.py +++ b/test/triple_store/test_base.py @@ -1,9 +1,4 @@ -""" -Part of the bsfs test suite. -A copy of the license is provided with the project. -Author: Matthias Baumgartner, 2022 -""" # imports import unittest diff --git a/test/utils/test_commons.py b/test/utils/test_commons.py index 3ad6dea..29e3046 100644 --- a/test/utils/test_commons.py +++ b/test/utils/test_commons.py @@ -1,9 +1,4 @@ -""" -Part of the tagit test suite. -A copy of the license is provided with the project. -Author: Matthias Baumgartner, 2022 -""" # imports import unittest diff --git a/test/utils/test_uri.py b/test/utils/test_uri.py index 770e65a..6ee2ef7 100644 --- a/test/utils/test_uri.py +++ b/test/utils/test_uri.py @@ -1,9 +1,4 @@ -""" -Part of the tagit test suite. -A copy of the license is provided with the project. -Author: Matthias Baumgartner, 2022 -""" # imports import operator import unittest diff --git a/test/utils/test_uuid.py b/test/utils/test_uuid.py index 804b063..8f519d9 100644 --- a/test/utils/test_uuid.py +++ b/test/utils/test_uuid.py @@ -1,9 +1,4 @@ -""" -Part of the tagit test suite. -A copy of the license is provided with the project. -Author: Matthias Baumgartner, 2022 -""" # imports import os import re -- cgit v1.2.3 From b66ed641d5cbb4cb83f4a571223e4d65d80ed05c Mon Sep 17 00:00:00 2001 From: Matthias Baumgartner Date: Thu, 2 Mar 2023 15:29:12 +0100 Subject: check non-serializable URIs in the sparql store --- bsfs/triple_store/sparql/parse_filter.py | 2 ++ bsfs/triple_store/sparql/sparql.py | 6 ++++++ test/triple_store/sparql/test_parse_filter.py | 2 ++ test/triple_store/sparql/test_sparql.py | 6 ++++++ 4 files changed, 16 insertions(+) diff --git a/bsfs/triple_store/sparql/parse_filter.py b/bsfs/triple_store/sparql/parse_filter.py index ff22de2..8959b2c 100644 --- a/bsfs/triple_store/sparql/parse_filter.py +++ b/bsfs/triple_store/sparql/parse_filter.py @@ -267,6 +267,8 @@ class Filter(): """ if not isinstance(node_type, bsc.Node): raise errors.BackendError(f'expected Node, found {node_type}') + if not rdflib.term._is_valid_uri(node.value): # pylint: disable=protected-access + raise errors.BackendError(f'<{node.value}> is not a serializable uri') return f'VALUES {head} {{ <{node.value}> }}' def _equals(self, node_type: bsc.Vertex, node: ast.filter.Equals, head: str) -> str: diff --git a/bsfs/triple_store/sparql/sparql.py b/bsfs/triple_store/sparql/sparql.py index 5890bcc..bd98f46 100644 --- a/bsfs/triple_store/sparql/sparql.py +++ b/bsfs/triple_store/sparql/sparql.py @@ -284,6 +284,9 @@ class SparqlStore(base.TripleStoreBase): raise errors.ConsistencyError(f'{node_type} is not defined in the schema') # check and create guids for guid in guids: + # check convert to rdflib.URIRef + if not rdflib.term._is_valid_uri(guid): # pylint: disable=protected-access + raise ValueError(guids) subject = rdflib.URIRef(guid) # check node existence if (subject, rdflib.RDF.type, None) in self._graph: @@ -324,6 +327,9 @@ class SparqlStore(base.TripleStoreBase): # check guids # FIXME: Fail or skip inexistent nodes? guids = set(guids) + invalid = {guid for guid in guids if not rdflib.term._is_valid_uri(guid)} # pylint: disable=protected-access + if len(invalid) > 0: + raise ValueError(invalid) inconsistent = {guid for guid in guids if not self._has_type(guid, node_type)} if len(inconsistent) > 0: raise errors.InstanceError(inconsistent) diff --git a/test/triple_store/sparql/test_parse_filter.py b/test/triple_store/sparql/test_parse_filter.py index 8a9940e..6db9224 100644 --- a/test/triple_store/sparql/test_parse_filter.py +++ b/test/triple_store/sparql/test_parse_filter.py @@ -157,6 +157,8 @@ class TestParseFilter(unittest.TestCase): def test_is(self): # _is requires a node self.assertRaises(errors.BackendError, self.parser._is, self.schema.literal(ns.bsfs.Literal), ast.filter.Is('http://example.com/entity#1234'), '?ent') + # _is requires a serializable guid + self.assertRaises(errors.BackendError, self.parser._is, self.schema.node(ns.bsfs.Entity), ast.filter.Is('http://example.com/entity#foo and bar'), '?ent') # a single Is statement q = self.parser(self.schema.node(ns.bsfs.Entity), ast.filter.Is('http://example.com/entity#1234')) self.assertSetEqual({str(guid) for guid, in q(self.graph)}, diff --git a/test/triple_store/sparql/test_sparql.py b/test/triple_store/sparql/test_sparql.py index b1d99ac..d880082 100644 --- a/test/triple_store/sparql/test_sparql.py +++ b/test/triple_store/sparql/test_sparql.py @@ -678,6 +678,9 @@ class TestSparqlStore(unittest.TestCase): self.assertRaises(errors.ConsistencyError, store.create, self.schema.node(ns.bsfs.Entity).child(ns.bsfs.invalid), { URI('http://example.com/me/entity#1234'), URI('http://example.com/me/entity#4321')}) + # guid must be valid + self.assertRaises(ValueError, store.create, self.schema.node(ns.bsfs.Entity), {URI('http://example.com/me/foo and bar')}) + # can create some nodes ent_type = store.schema.node(ns.bsfs.Entity) store.create(ent_type, {URI('http://example.com/me/entity#1234'), URI('http://example.com/me/entity#4321')}) @@ -766,6 +769,9 @@ class TestSparqlStore(unittest.TestCase): # invalid predicate is not permitted self.assertRaises(errors.ConsistencyError, store.set, ent_type, ent_ids, p_invalid, {'http://example.com/me/tag#1234'}) + # invalid guid is not permitted + self.assertRaises(ValueError, store.set, ent_type, {URI('http://example.com/me/foo and bar')}, p_filesize, {1234}) + # predicate must match node_type self.assertRaises(errors.ConsistencyError, store.set, tag_type, tag_ids, p_filesize, {1234}) -- cgit v1.2.3 From 28a021483c13e974e00b6159f0653b0727df9d10 Mon Sep 17 00:00:00 2001 From: Matthias Baumgartner Date: Thu, 2 Mar 2023 16:40:00 +0100 Subject: prohibit certain characters in URI and ensure URIs in bsfs.graph --- bsfs/graph/nodes.py | 5 ++--- bsfs/schema/types.py | 2 +- bsfs/triple_store/sparql/parse_filter.py | 6 ++---- bsfs/triple_store/sparql/sparql.py | 10 ++-------- bsfs/utils/uri.py | 8 +++++--- test/graph/test_nodes.py | 19 ++++++++++++------- test/query/ast_test/test_filter_.py | 2 +- test/triple_store/sparql/test_parse_filter.py | 2 +- test/triple_store/sparql/test_sparql.py | 4 ++-- test/utils/test_uri.py | 19 ++++++++++++++----- 10 files changed, 42 insertions(+), 35 deletions(-) diff --git a/bsfs/graph/nodes.py b/bsfs/graph/nodes.py index c3530c1..84996c7 100644 --- a/bsfs/graph/nodes.py +++ b/bsfs/graph/nodes.py @@ -52,9 +52,8 @@ class Nodes(): self._backend = backend self._ac = access_control self._node_type = node_type - self._guids = set(guids) - # create helper instances - # FIXME: Assumes that the schema does not change while the instance is in use! + # convert to URI since this is not guaranteed by Graph + self._guids = {URI(guid) for guid in guids} def __eq__(self, other: typing.Any) -> bool: return isinstance(other, Nodes) \ diff --git a/bsfs/schema/types.py b/bsfs/schema/types.py index 54adffb..104580d 100644 --- a/bsfs/schema/types.py +++ b/bsfs/schema/types.py @@ -98,7 +98,7 @@ class _Type(): parent: typing.Optional['_Type'] = None, **annotations: typing.Any, ): - self.uri = uri + self.uri = URI(uri) self.parent = parent self.annotations = annotations diff --git a/bsfs/triple_store/sparql/parse_filter.py b/bsfs/triple_store/sparql/parse_filter.py index 8959b2c..bf19a02 100644 --- a/bsfs/triple_store/sparql/parse_filter.py +++ b/bsfs/triple_store/sparql/parse_filter.py @@ -154,7 +154,7 @@ class Filter(): puri = f'<{puri}>' # type: ignore [assignment] # variable re-use confuses mypy # apply reverse flag if node.reverse: - puri = URI('^' + puri) + puri = '^' + puri dom, rng = rng, dom # type: ignore [assignment] # variable re-use confuses mypy # check path consistency if not node_type <= dom: @@ -267,9 +267,7 @@ class Filter(): """ if not isinstance(node_type, bsc.Node): raise errors.BackendError(f'expected Node, found {node_type}') - if not rdflib.term._is_valid_uri(node.value): # pylint: disable=protected-access - raise errors.BackendError(f'<{node.value}> is not a serializable uri') - return f'VALUES {head} {{ <{node.value}> }}' + return f'VALUES {head} {{ <{URI(node.value)}> }}' def _equals(self, node_type: bsc.Vertex, node: ast.filter.Equals, head: str) -> str: """ diff --git a/bsfs/triple_store/sparql/sparql.py b/bsfs/triple_store/sparql/sparql.py index bd98f46..68c0027 100644 --- a/bsfs/triple_store/sparql/sparql.py +++ b/bsfs/triple_store/sparql/sparql.py @@ -284,10 +284,7 @@ class SparqlStore(base.TripleStoreBase): raise errors.ConsistencyError(f'{node_type} is not defined in the schema') # check and create guids for guid in guids: - # check convert to rdflib.URIRef - if not rdflib.term._is_valid_uri(guid): # pylint: disable=protected-access - raise ValueError(guids) - subject = rdflib.URIRef(guid) + subject = rdflib.URIRef(URI(guid)) # check node existence if (subject, rdflib.RDF.type, None) in self._graph: # FIXME: node exists and may have a different type! ignore? raise? report? @@ -326,10 +323,7 @@ class SparqlStore(base.TripleStoreBase): raise errors.InstanceError(inconsistent) # check guids # FIXME: Fail or skip inexistent nodes? - guids = set(guids) - invalid = {guid for guid in guids if not rdflib.term._is_valid_uri(guid)} # pylint: disable=protected-access - if len(invalid) > 0: - raise ValueError(invalid) + guids = {URI(guid) for guid in guids} inconsistent = {guid for guid in guids if not self._has_type(guid, node_type)} if len(inconsistent) > 0: raise errors.InstanceError(inconsistent) diff --git a/bsfs/utils/uri.py b/bsfs/utils/uri.py index 0693017..5755a6e 100644 --- a/bsfs/utils/uri.py +++ b/bsfs/utils/uri.py @@ -4,6 +4,8 @@ import re import typing # constants +RX_CHARS = re.compile(r'[<>" {}|\\^]') + RX_URI = re.compile(r''' ^ (?:(?P[^:/?#]+):)? # scheme, ://-delimited @@ -77,6 +79,9 @@ class URI(str): no claim about the validity of an URI! """ + # check characters + if RX_CHARS.search(query) is not None: + return False # check uri parts = RX_URI.match(query) if parts is not None: @@ -227,9 +232,6 @@ class URI(str): # overload formatting methods - def format(self, *args, **kwargs) -> 'URI': - return URI(super().format(*args, **kwargs)) - def __mod__(self, *args) -> 'URI': return URI(super().__mod__(*args)) diff --git a/test/graph/test_nodes.py b/test/graph/test_nodes.py index 083b2d8..dca887a 100644 --- a/test/graph/test_nodes.py +++ b/test/graph/test_nodes.py @@ -123,15 +123,19 @@ class TestNodes(unittest.TestCase): URI('http://example.com/me/tag#4321'), } + def test_construct(self): + self.assertIsInstance(Nodes(self.backend, self.ac, self.ent_type, {'http://example.com/me-and-you'}), Nodes) + self.assertRaises(ValueError, Nodes, self.backend, self.ac, self.ent_type, {'http://example.com/me and you'}) + def test_str(self): # str baseline - nodes = Nodes(self.backend, self.ac, self.ent_type, self.ent_ids) - self.assertEqual(str(nodes), f'Nodes({self.ent_type}, {self.ent_ids})') - self.assertEqual(repr(nodes), f'Nodes({self.backend}, {self.ac}, {self.ent_type}, {self.ent_ids})') + nodes = Nodes(self.backend, self.ac, self.ent_type, {URI('http://example.com/me/entity#1234')}) + self.assertEqual(str(nodes), f"Nodes({self.ent_type}, {{'http://example.com/me/entity#1234'}})") + self.assertEqual(repr(nodes), f"Nodes({self.backend}, {self.ac}, {self.ent_type}, {{'http://example.com/me/entity#1234'}})") # str respects node_type - nodes = Nodes(self.backend, self.ac, self.tag_type, self.tag_ids) - self.assertEqual(str(nodes), f'Nodes({self.tag_type}, {self.tag_ids})') - self.assertEqual(repr(nodes), f'Nodes({self.backend}, {self.ac}, {self.tag_type}, {self.tag_ids})') + nodes = Nodes(self.backend, self.ac, self.tag_type, {URI('http://example.com/me/tag#1234')}) + self.assertEqual(str(nodes), f"Nodes({self.tag_type}, {{'http://example.com/me/tag#1234'}})") + self.assertEqual(repr(nodes), f"Nodes({self.backend}, {self.ac}, {self.tag_type}, {{'http://example.com/me/tag#1234'}})") # str respects guids nodes = Nodes(self.backend, self.ac, self.ent_type, {URI('http://example.com/me/entity#foo')}) self.assertEqual(str(nodes), f'Nodes({self.ent_type}, {{\'http://example.com/me/entity#foo\'}})') @@ -437,7 +441,8 @@ class TestNodes(unittest.TestCase): self.assertRaises(TypeError, nodes.get, 1234) self.assertRaises(TypeError, nodes.get, (ns.bse.tag, 1234)) self.assertRaises(TypeError, nodes.get, (1234, ns.bse.tag)) - self.assertRaises(errors.ConsistencyError, nodes.get, 'hello world') + self.assertRaises(ValueError, nodes.get, 'hello world') + self.assertRaises(errors.ConsistencyError, nodes.get, 'hello_world') self.assertRaises(errors.ConsistencyError, nodes.get, ns.bse.invalid) self.assertRaises(errors.ConsistencyError, nodes.get, (ns.bse.tag, bst.invalid)) # can pass multiple paths diff --git a/test/query/ast_test/test_filter_.py b/test/query/ast_test/test_filter_.py index cdc530c..d0d42ea 100644 --- a/test/query/ast_test/test_filter_.py +++ b/test/query/ast_test/test_filter_.py @@ -382,7 +382,7 @@ class TestPredicate(unittest.TestCase): # member returns predicate # predicate must be an URI self.assertEqual(Predicate(ns.bse.filesize).predicate, ns.bse.filesize) - self.assertEqual(Predicate(URI('hello world')).predicate, URI('hello world')) + self.assertEqual(Predicate(URI('hello_world')).predicate, URI('hello_world')) self.assertRaises(TypeError, Predicate, 1234) self.assertRaises(TypeError, Predicate, FilterExpression()) self.assertRaises(TypeError, Predicate, FilterExpression()) diff --git a/test/triple_store/sparql/test_parse_filter.py b/test/triple_store/sparql/test_parse_filter.py index 6db9224..5b6ca8a 100644 --- a/test/triple_store/sparql/test_parse_filter.py +++ b/test/triple_store/sparql/test_parse_filter.py @@ -158,7 +158,7 @@ class TestParseFilter(unittest.TestCase): # _is requires a node self.assertRaises(errors.BackendError, self.parser._is, self.schema.literal(ns.bsfs.Literal), ast.filter.Is('http://example.com/entity#1234'), '?ent') # _is requires a serializable guid - self.assertRaises(errors.BackendError, self.parser._is, self.schema.node(ns.bsfs.Entity), ast.filter.Is('http://example.com/entity#foo and bar'), '?ent') + self.assertRaises(ValueError, self.parser._is, self.schema.node(ns.bsfs.Entity), ast.filter.Is('http://example.com/entity#foo and bar'), '?ent') # a single Is statement q = self.parser(self.schema.node(ns.bsfs.Entity), ast.filter.Is('http://example.com/entity#1234')) self.assertSetEqual({str(guid) for guid, in q(self.graph)}, diff --git a/test/triple_store/sparql/test_sparql.py b/test/triple_store/sparql/test_sparql.py index d880082..f45ca37 100644 --- a/test/triple_store/sparql/test_sparql.py +++ b/test/triple_store/sparql/test_sparql.py @@ -679,7 +679,7 @@ class TestSparqlStore(unittest.TestCase): URI('http://example.com/me/entity#1234'), URI('http://example.com/me/entity#4321')}) # guid must be valid - self.assertRaises(ValueError, store.create, self.schema.node(ns.bsfs.Entity), {URI('http://example.com/me/foo and bar')}) + self.assertRaises(ValueError, store.create, self.schema.node(ns.bsfs.Entity), {'http://example.com/me/foo and bar'}) # can create some nodes ent_type = store.schema.node(ns.bsfs.Entity) @@ -770,7 +770,7 @@ class TestSparqlStore(unittest.TestCase): self.assertRaises(errors.ConsistencyError, store.set, ent_type, ent_ids, p_invalid, {'http://example.com/me/tag#1234'}) # invalid guid is not permitted - self.assertRaises(ValueError, store.set, ent_type, {URI('http://example.com/me/foo and bar')}, p_filesize, {1234}) + self.assertRaises(ValueError, store.set, ent_type, {'http://example.com/me/foo and bar'}, p_filesize, {1234}) # predicate must match node_type self.assertRaises(errors.ConsistencyError, store.set, tag_type, tag_ids, p_filesize, {1234}) diff --git a/test/utils/test_uri.py b/test/utils/test_uri.py index 6ee2ef7..1c4c9f9 100644 --- a/test/utils/test_uri.py +++ b/test/utils/test_uri.py @@ -35,6 +35,16 @@ class TestURI(unittest.TestCase): self.assertTrue(URI.is_parseable('telnet://192.0.2.16:80/')) self.assertTrue(URI.is_parseable('urn:oasis:names:specification:docbook:dtd:xml:4.1.2')) + # some characters are prohibited + self.assertFalse(URI.is_parseable('http://example.com/foobar')) + self.assertFalse(URI.is_parseable('http://example.com/foo bar')) + self.assertFalse(URI.is_parseable('http://example.com/foo{bar')) + self.assertFalse(URI.is_parseable('http://example.com/foo}bar')) + self.assertFalse(URI.is_parseable('http://example.com/foo|bar')) + self.assertFalse(URI.is_parseable('http://example.com/foo^bar')) + self.assertFalse(URI.is_parseable('http://example.com/foo\\bar')) + # uri cannot end with a scheme delimiter self.assertFalse(URI.is_parseable('http://')) # port must be a number @@ -159,10 +169,10 @@ class TestURI(unittest.TestCase): def test_overloaded(self): # composition - self.assertIsInstance(URI('http://user@www.example.com:1234/{}/path1?{}#fragment') + 'hello', URI) - self.assertIsInstance(URI('http://user@www.example.com:1234/{}/path1?{}#fragment') * 2, URI) - self.assertIsInstance(2 * URI('http://user@www.example.com:1234/{}/path1?{}#fragment'), URI) # rmul - self.assertIsInstance(URI('http://user@www.example.com:1234/{}/path1?{}#fragment').join(['hello', 'world']) , URI) + self.assertIsInstance(URI('http://user@www.example.com:1234/path0/path1?query#fragment') + 'hello', URI) + self.assertIsInstance(URI('http://user@www.example.com:1234/path0/path1?query#fragment') * 2, URI) + self.assertIsInstance(2 * URI('http://user@www.example.com:1234/path0/path1?query#fragment'), URI) # rmul + self.assertIsInstance(URI('http://user@www.example.com:1234/path0/path1?query#fragment').join(['hello', 'world']) , URI) # stripping self.assertIsInstance(URI('http://user@www.example.com:1234/path0/path1?query#fragment').strip(), URI) self.assertIsInstance(URI('http://user@www.example.com:1234/path0/path1?query#fragment').lstrip(), URI) @@ -171,7 +181,6 @@ class TestURI(unittest.TestCase): self.assertIsInstance(URI('http://user@www.example.com:1234/path0/path1?query#fragment').lower(), URI) self.assertIsInstance(URI('http://user@www.example.com:1234/path0/path1?query#fragment').upper(), URI) # formatting - self.assertIsInstance(URI('http://user@www.example.com:1234/{}/path1?{}#fragment').format('hello', 'world'), URI) self.assertIsInstance(URI('http://user@www.example.com:1234/%s/path1?%s#fragment') % ('hello', 'world'), URI) self.assertIsInstance(URI('http://user@www.example.com:1234/path0/path1?query#fragment').replace('path0', 'pathX'), URI) -- cgit v1.2.3 From 6b9379d75198082054c35e44bc2cd880353a7485 Mon Sep 17 00:00:00 2001 From: Matthias Baumgartner Date: Thu, 2 Mar 2023 16:40:43 +0100 Subject: hardening --- bsfs/graph/graph.py | 16 ++-------------- bsfs/graph/nodes.py | 8 +++++--- bsfs/graph/resolve.py | 8 ++++++++ bsfs/query/validator.py | 10 +++++++++- test/graph/test_resolve.py | 2 +- test/query/test_validator.py | 4 ++-- 6 files changed, 27 insertions(+), 21 deletions(-) diff --git a/bsfs/graph/graph.py b/bsfs/graph/graph.py index ade51a5..1b4c212 100644 --- a/bsfs/graph/graph.py +++ b/bsfs/graph/graph.py @@ -36,12 +36,6 @@ class Graph(): # access controls. _ac: ac.AccessControlBase - # query resolver. - _resolver: resolve.Filter - - # query validator. - _validate: validate.Filter - def __init__( self, backend: TripleStoreBase, @@ -50,9 +44,6 @@ class Graph(): # store members self._backend = backend self._ac = access_control - # helper classes - self._resolver = resolve.Filter(self._backend.schema) - self._validate = validate.Filter(self._backend.schema) # ensure Graph schema requirements self.migrate(self._backend.schema) @@ -94,9 +85,6 @@ class Graph(): # migrate schema in backend # FIXME: consult access controls! self._backend.schema = schema - # re-initialize members - self._resolver.schema = self.schema - self._validate.schema = self.schema # return self return self @@ -164,12 +152,12 @@ class Graph(): # get node type type_ = self.schema.node(node_type) # resolve Nodes instances - query = self._resolver(type_, query) + query = resolve.Filter(self._backend.schema).resolve(type_, query) # add access controls to query query = self._ac.filter_read(type_, query) # validate query if query is not None: - self._validate(type_, query) + validate.Filter(self._backend.schema).validate(type_, query) # query the backend and return the (non-materialized) result return self._backend.get(type_, query) diff --git a/bsfs/graph/nodes.py b/bsfs/graph/nodes.py index 84996c7..74f4c4f 100644 --- a/bsfs/graph/nodes.py +++ b/bsfs/graph/nodes.py @@ -25,7 +25,9 @@ __all__: typing.Sequence[str] = ( ## code ## class Nodes(): - """ + """Container for graph nodes, provides operations on nodes. + + NOTE: Should not be created directly but only via `bsfs.graph.Graph`. NOTE: guids may or may not exist. This is not verified as nodes are created on demand. """ @@ -255,8 +257,8 @@ class Nodes(): filter = self._ac.filter_read(self.node_type, filter) # type: ignore [assignment] # validate queries - validate.Filter(self._backend.schema)(self.node_type, filter) - validate.Fetch(self._backend.schema)(self.node_type, fetch) + validate.Filter(self._backend.schema).validate(self.node_type, filter) + validate.Fetch(self._backend.schema).validate(self.node_type, fetch) # process results, convert if need be def triple_iter(): diff --git a/bsfs/graph/resolve.py b/bsfs/graph/resolve.py index 213ac4c..0ba1e36 100644 --- a/bsfs/graph/resolve.py +++ b/bsfs/graph/resolve.py @@ -40,6 +40,14 @@ class Filter(): root_type: bsc.Node, node: typing.Optional[ast.filter.FilterExpression], ): + """Alias for `Resolve.resolve`.""" + return self.resolve(root_type, node) + + def resolve( + self, + root_type: bsc.Node, + node: typing.Optional[ast.filter.FilterExpression], + ): if node is None: return None return self._parse_filter_expression(root_type, node) diff --git a/bsfs/query/validator.py b/bsfs/query/validator.py index 6e3afa1..b259ea0 100644 --- a/bsfs/query/validator.py +++ b/bsfs/query/validator.py @@ -37,6 +37,10 @@ class Filter(): self.schema = schema def __call__(self, root_type: bsc.Node, query: ast.filter.FilterExpression): + """Alias for `Filter.validate`.""" + return self.validate(root_type, query) + + def validate(self, root_type: bsc.Node, query: ast.filter.FilterExpression): """Validate a filter *query*, assuming the subject having *root_type*. Raises a `bsfs.utils.errors.ConsistencyError` if the query violates the schema. @@ -237,7 +241,11 @@ class Fetch(): def __init__(self, schema: bsc.Schema): self.schema = schema - def __call__(self, root_type: bsc.Node, query: ast.fetch.FetchExpression): + def __call__(self, root_type: bsc.Node, query: ast.filter.FilterExpression): + """Alias for `Fetch.validate`.""" + return self.validate(root_type, query) + + def validate(self, root_type: bsc.Node, query: ast.fetch.FetchExpression): """Validate a fetch *query*, assuming the subject having *root_type*. Raises a `bsfs.utils.errors.ConsistencyError` if the query violates the schema. diff --git a/test/graph/test_resolve.py b/test/graph/test_resolve.py index 0223c49..accb565 100644 --- a/test/graph/test_resolve.py +++ b/test/graph/test_resolve.py @@ -25,7 +25,7 @@ class TestFilter(unittest.TestCase): """ - def test_call(self): + def test_call(self): # tests resolve implicitly schema = bsc.from_string(''' prefix rdfs: prefix xsd: diff --git a/test/query/test_validator.py b/test/query/test_validator.py index ca93118..bbfd2e6 100644 --- a/test/query/test_validator.py +++ b/test/query/test_validator.py @@ -70,7 +70,7 @@ class TestFilter(unittest.TestCase): ''') self.validate = Filter(self.schema) - def test_call(self): + def test_call(self): # tests validate implicitly # root_type must be a _schema.Node self.assertRaises(TypeError, self.validate, 1234, None) self.assertRaises(TypeError, self.validate, '1234', None) @@ -309,7 +309,7 @@ class TestFetch(unittest.TestCase): ''') self.validate = Fetch(self.schema) - def test_call(self): + def test_call(self): # tests validate implicitly # call accepts correct expressions self.assertTrue(self.validate(self.schema.node(ns.bsfs.Entity), ast.fetch.Fetch(ns.bse.tag, ast.fetch.Value(ns.bse.label, 'value')))) -- cgit v1.2.3 From 2c6c23f85e7f2123c508f9ff8a4aa776948bb589 Mon Sep 17 00:00:00 2001 From: Matthias Baumgartner Date: Thu, 2 Mar 2023 16:46:11 +0100 Subject: minor style fixes --- bsfs/graph/resolve.py | 1 + bsfs/query/validator.py | 8 ++++---- bsfs/triple_store/sparql/parse_filter.py | 6 +++--- 3 files changed, 8 insertions(+), 7 deletions(-) diff --git a/bsfs/graph/resolve.py b/bsfs/graph/resolve.py index 0ba1e36..95dcfc1 100644 --- a/bsfs/graph/resolve.py +++ b/bsfs/graph/resolve.py @@ -48,6 +48,7 @@ class Filter(): root_type: bsc.Node, node: typing.Optional[ast.filter.FilterExpression], ): + """Resolve Nodes instances of a *node* query starting at *root_type*.""" if node is None: return None return self._parse_filter_expression(root_type, node) diff --git a/bsfs/query/validator.py b/bsfs/query/validator.py index b259ea0..1ce44e9 100644 --- a/bsfs/query/validator.py +++ b/bsfs/query/validator.py @@ -36,11 +36,11 @@ class Filter(): def __init__(self, schema: bsc.Schema): self.schema = schema - def __call__(self, root_type: bsc.Node, query: ast.filter.FilterExpression): + def __call__(self, root_type: bsc.Node, query: ast.filter.FilterExpression) -> bool: """Alias for `Filter.validate`.""" return self.validate(root_type, query) - def validate(self, root_type: bsc.Node, query: ast.filter.FilterExpression): + def validate(self, root_type: bsc.Node, query: ast.filter.FilterExpression) -> bool: """Validate a filter *query*, assuming the subject having *root_type*. Raises a `bsfs.utils.errors.ConsistencyError` if the query violates the schema. @@ -241,11 +241,11 @@ class Fetch(): def __init__(self, schema: bsc.Schema): self.schema = schema - def __call__(self, root_type: bsc.Node, query: ast.filter.FilterExpression): + def __call__(self, root_type: bsc.Node, query: ast.fetch.FetchExpression) -> bool: """Alias for `Fetch.validate`.""" return self.validate(root_type, query) - def validate(self, root_type: bsc.Node, query: ast.fetch.FetchExpression): + def validate(self, root_type: bsc.Node, query: ast.fetch.FetchExpression) -> bool: """Validate a fetch *query*, assuming the subject having *root_type*. Raises a `bsfs.utils.errors.ConsistencyError` if the query violates the schema. diff --git a/bsfs/triple_store/sparql/parse_filter.py b/bsfs/triple_store/sparql/parse_filter.py index bf19a02..2f5a25b 100644 --- a/bsfs/triple_store/sparql/parse_filter.py +++ b/bsfs/triple_store/sparql/parse_filter.py @@ -151,16 +151,16 @@ class Filter(): raise errors.BackendError(f'the range of predicate {pred} is undefined') dom, rng = pred.domain, pred.range # encapsulate predicate uri - puri = f'<{puri}>' # type: ignore [assignment] # variable re-use confuses mypy + uri_str = f'<{puri}>' # apply reverse flag if node.reverse: - puri = '^' + puri + uri_str = '^' + uri_str dom, rng = rng, dom # type: ignore [assignment] # variable re-use confuses mypy # check path consistency if not node_type <= dom: raise errors.ConsistencyError(f'expected type {dom} or subtype thereof, found {node_type}') # return predicate URI and next node type - return puri, rng + return uri_str, rng def _any(self, node_type: bsc.Vertex, node: ast.filter.Any, head: str) -> str: """ -- cgit v1.2.3 From 4fead04055be4967d9ea3b24ff61fe37a93108dd Mon Sep 17 00:00:00 2001 From: Matthias Baumgartner Date: Sat, 4 Mar 2023 13:31:11 +0100 Subject: namespace refactoring and cleanup --- bsfs/graph/ac/null.py | 2 +- bsfs/graph/nodes.py | 4 +- bsfs/graph/resolve.py | 4 +- bsfs/graph/schema.nt | 11 +- bsfs/namespace/__init__.py | 3 +- bsfs/namespace/namespace.py | 97 ++------ bsfs/namespace/predefined.py | 27 +-- bsfs/query/ast/filter_.py | 3 +- bsfs/query/matcher.py | 4 +- bsfs/query/validator.py | 4 +- bsfs/schema/serialize.py | 15 +- bsfs/schema/types.py | 14 +- bsfs/triple_store/sparql/distance.py | 6 +- bsfs/triple_store/sparql/sparql.py | 6 +- test/apps/schema-1.nt | 4 +- test/apps/schema-2.nt | 4 +- test/graph/ac/test_null.py | 17 +- test/graph/test_graph.py | 52 ++-- test/graph/test_nodes.py | 124 +++++----- test/graph/test_resolve.py | 18 +- test/graph/test_result.py | 2 + test/graph/test_walk.py | 48 ++-- test/namespace/test_namespace.py | 126 +++------- test/query/test_validator.py | 25 +- test/schema/test_schema.py | 13 +- test/schema/test_serialize.py | 335 ++++++++++++++------------ test/schema/test_types.py | 2 + test/triple_store/sparql/test_parse_fetch.py | 95 ++++---- test/triple_store/sparql/test_parse_filter.py | 39 +-- test/triple_store/sparql/test_sparql.py | 104 ++++---- test/triple_store/sparql/test_utils.py | 24 +- 31 files changed, 594 insertions(+), 638 deletions(-) diff --git a/bsfs/graph/ac/null.py b/bsfs/graph/ac/null.py index 3a391aa..c9ec7d0 100644 --- a/bsfs/graph/ac/null.py +++ b/bsfs/graph/ac/null.py @@ -24,7 +24,7 @@ class NullAC(base.AccessControlBase): def is_protected_predicate(self, pred: schema.Predicate) -> bool: """Return True if a predicate cannot be modified manually.""" - return pred.uri == ns.bsm.t_created + return pred.uri == ns.bsn.t_created def create(self, node_type: schema.Node, guids: typing.Iterable[URI]): """Perform post-creation operations on nodes, e.g. ownership information.""" diff --git a/bsfs/graph/nodes.py b/bsfs/graph/nodes.py index 74f4c4f..47b0217 100644 --- a/bsfs/graph/nodes.py +++ b/bsfs/graph/nodes.py @@ -170,7 +170,7 @@ class Nodes(): self._backend.commit() except ( - errors.PermissionDeniedError, # tried to set a protected predicate (ns.bsm.t_created) + errors.PermissionDeniedError, # tried to set a protected predicate errors.ConsistencyError, # node types are not in the schema or don't match the predicate errors.InstanceError, # guids/values don't have the correct type TypeError, # value is supposed to be a Nodes instance @@ -394,7 +394,7 @@ class Nodes(): self._backend.create(node_type, missing) # add bookkeeping triples self._backend.set(node_type, missing, - self._backend.schema.predicate(ns.bsm.t_created), [time.time()]) + self._backend.schema.predicate(ns.bsn.t_created), [time.time()]) # add permission triples self._ac.create(node_type, missing) # return available nodes diff --git a/bsfs/graph/resolve.py b/bsfs/graph/resolve.py index 95dcfc1..a58eb67 100644 --- a/bsfs/graph/resolve.py +++ b/bsfs/graph/resolve.py @@ -27,8 +27,8 @@ class Filter(): input: Any(ns.bse.tag, Is(Nodes(...))) output: Any(ns.bse.tag, Or(Is(...), Is(...), ...))) - >>> tags = graph.node(ns.bsfs.Tag, 'http://example.com/me/tag#1234') - >>> graph.get(ns.bsfs.Entity, ast.filter.Any(ns.bse.tag, ast.filter.Is(tags))) + >>> tags = graph.node(ns.bsn.Tag, 'http://example.com/me/tag#1234') + >>> graph.get(ns.bsn.Entity, ast.filter.Any(ns.bse.tag, ast.filter.Is(tags))) """ diff --git a/bsfs/graph/schema.nt b/bsfs/graph/schema.nt index cba5e80..37bba5e 100644 --- a/bsfs/graph/schema.nt +++ b/bsfs/graph/schema.nt @@ -4,15 +4,16 @@ prefix rdfs: prefix xsd: # bsfs prefixes -prefix bsfs: -prefix bsm: +prefix bsfs: +prefix bsl: +prefix bsn: # literals -bsfs:Number rdfs:subClassOf bsfs:Literal . -xsd:float rdfs:subClassOf bsfs:Number . +bsl:Number rdfs:subClassOf bsfs:Literal . +xsd:float rdfs:subClassOf bsl:Number . # predicates -bsm:t_created rdfs:subClassOf bsfs:Predicate ; +bsn:t_created rdfs:subClassOf bsfs:Predicate ; rdfs:domain bsfs:Node ; rdfs:range xsd:float ; bsfs:unique "true"^^xsd:boolean . diff --git a/bsfs/namespace/__init__.py b/bsfs/namespace/__init__.py index 1784808..76f39a2 100644 --- a/bsfs/namespace/__init__.py +++ b/bsfs/namespace/__init__.py @@ -4,11 +4,10 @@ import typing # inner-module imports from . import predefined as ns -from .namespace import ClosedNamespace, Namespace +from .namespace import Namespace # exports __all__: typing.Sequence[str] = ( - 'ClosedNamespace', 'Namespace', 'ns', ) diff --git a/bsfs/namespace/namespace.py b/bsfs/namespace/namespace.py index 0a62b78..b388f53 100644 --- a/bsfs/namespace/namespace.py +++ b/bsfs/namespace/namespace.py @@ -3,97 +3,52 @@ import typing # bsfs imports -from bsfs.utils import URI, typename +from bsfs.utils import URI # exports __all__: typing.Sequence[str] = ( - 'ClosedNamespace', 'Namespace', + 'FinalNamespace', ) ## code ## -class Namespace(): - """A namespace consists of a common prefix that is used in a set of URIs. - Note that the prefix must include the separator between - path and fragment (typically a '#' or a '/'). - """ - - # namespace prefix. - prefix: URI - - # fragment separator. - fsep: str - - # path separator. - psep: str - - def __init__(self, prefix: URI, fsep: str = '#', psep: str = '/'): - # ensure prefix type - prefix = URI(prefix) - # truncate fragment separator - while prefix.endswith(fsep): - prefix = URI(prefix[:-1]) - # truncate path separator - while prefix.endswith(psep): - prefix = URI(prefix[:-1]) - # store members - self.prefix = prefix - self.fsep = fsep - self.psep = psep - - def __eq__(self, other: typing.Any) -> bool: - return isinstance(other, type(self)) \ - and self.prefix == other.prefix \ - and self.fsep == other.fsep \ - and self.psep == other.psep +class Namespace(URI): + """The Namespace allows you to incrementally append path segments to an URI. - def __hash__(self) -> int: - return hash((type(self), self.prefix, self.fsep, self.psep)) + Segments are separated by `Namespace.sep` ('/'). + The `__call__` method signals that the URI is complete until the query part. - def __str__(self) -> str: - return str(self.prefix) - - def __repr__(self) -> str: - return f'{typename(self)}({self.prefix}, {self.fsep}, {self.psep})' - - def __getattr__(self, fragment: str) -> URI: - """Return prefix + fragment.""" - return URI(self.prefix + self.fsep + fragment) - - def __getitem__(self, fragment: str) -> URI: - """Alias for getattr(self, fragment).""" - return self.__getattr__(fragment) + """ - def __add__(self, value: typing.Any) -> 'Namespace': - """Concatenate another namespace to this one.""" - if not isinstance(value, str): - return NotImplemented - return Namespace(self.prefix + self.psep + value, self.fsep, self.psep) + # path separator + sep: str = '/' + def __getattr__(self, query: str) -> 'Namespace': + """Append the *query* to the current value and return as Namespace.""" + return Namespace(self + self.sep + query) -class ClosedNamespace(Namespace): - """Namespace that covers a restricted set of URIs.""" + def __call__(self, sep: str = '#') -> 'FinalNamespace': + """Finalize the namespace.""" + return FinalNamespace(self, sep) - # set of permissible fragments. - fragments: typing.Set[str] - def __init__(self, prefix: URI, *args: str, fsep: str = '#', psep: str = '/'): - super().__init__(prefix, fsep, psep) - self.fragments = set(args) +# FIXME: Integrate FinalNamespace into Namespace? Do we need to have both? +class FinalNamespace(URI): + """The FinalNamespace allows you to append a fragment to an URI.""" - def __eq__(self, other: typing.Any) -> bool: - return super().__eq__(other) and self.fragments == other.fragments + # fragment separator + sep: str - def __hash__(self) -> int: - return hash((type(self), self.prefix, tuple(sorted(self.fragments)))) + def __new__(cls, value: str, sep: str = '#'): + inst = URI.__new__(cls, value) + inst.sep = sep + return inst def __getattr__(self, fragment: str) -> URI: - """Return prefix + fragment or raise a KeyError if the fragment is not part of this namespace.""" - if fragment not in self.fragments: - raise KeyError(f'{fragment} is not a valid fragment of namespace {self.prefix}') - return super().__getattr__(fragment) + """Append the *fragment* to the current value and return as URI.""" + return URI(self + self.sep + fragment) ## EOF ## diff --git a/bsfs/namespace/predefined.py b/bsfs/namespace/predefined.py index 15f12ac..8b60d39 100644 --- a/bsfs/namespace/predefined.py +++ b/bsfs/namespace/predefined.py @@ -2,29 +2,28 @@ # imports import typing -# bsfs imports -from bsfs.utils import URI - # inner-module imports -from . import namespace +from .namespace import Namespace, FinalNamespace # essential bsfs namespaces -bsfs: namespace.Namespace = namespace.Namespace(URI('http://bsfs.ai/schema'), fsep='/') - +bsfs = Namespace('https://schema.bsfs.io/core') # additional bsfs namespaces -bse: namespace.Namespace = namespace.Namespace(URI('http://bsfs.ai/schema/Entity')) -bsm: namespace.Namespace = namespace.Namespace(URI('http://bsfs.ai/schema/Meta')) +bsd = bsfs.distance() +bsl = bsfs.Literal +bsn = bsfs.Node() # generic namespaces -rdf: namespace.Namespace = namespace.Namespace(URI('http://www.w3.org/1999/02/22-rdf-syntax-ns')) -rdfs: namespace.Namespace = namespace.Namespace(URI('http://www.w3.org/2000/01/rdf-schema')) -schema: namespace.Namespace = namespace.Namespace(URI('http://schema.org'), fsep='/') -xsd: namespace.Namespace = namespace.Namespace(URI('http://www.w3.org/2001/XMLSchema')) +rdf = FinalNamespace('http://www.w3.org/1999/02/22-rdf-syntax-ns') +rdfs = FinalNamespace('http://www.w3.org/2000/01/rdf-schema') +xsd = FinalNamespace('http://www.w3.org/2001/XMLSchema') +schema = FinalNamespace('http://schema.org', sep='/') +# exports __all__: typing.Sequence[str] = ( - 'bse', + 'bsd', 'bsfs', - 'bsm', + 'bsl', + 'bsn', 'rdf', 'rdfs', 'schema', diff --git a/bsfs/query/ast/filter_.py b/bsfs/query/ast/filter_.py index 56c982e..610fdb4 100644 --- a/bsfs/query/ast/filter_.py +++ b/bsfs/query/ast/filter_.py @@ -10,7 +10,8 @@ For example, consider the following AST: >>> Any(ns.bse.collection, ... And( ... Equals('hello'), -... Any(ns.bsm.guid, Any(ns.bsm.guid, Equals('hello'))), +... Is('hello world'), +... Any(ns.bse.tag, Equals('world')), ... Any(ns.bst.label, Equals('world')), ... All(ns.bst.label, Not(Equals('world'))), ... ) diff --git a/bsfs/query/matcher.py b/bsfs/query/matcher.py index 5f3b07e..17c9c8e 100644 --- a/bsfs/query/matcher.py +++ b/bsfs/query/matcher.py @@ -215,8 +215,8 @@ class Filter(): two following queries are semantically identical, but structurally different, and would therefore not match: - >>> ast.filter.OneOf(ast.filter.Predicate(ns.bse.filename)) - >>> ast.filter.Predicate(ns.bse.filename) + >>> ast.filter.OneOf(ast.filter.Predicate(ns.bse.name)) + >>> ast.filter.Predicate(ns.bse.name) """ diff --git a/bsfs/query/validator.py b/bsfs/query/validator.py index 1ce44e9..10ca492 100644 --- a/bsfs/query/validator.py +++ b/bsfs/query/validator.py @@ -177,7 +177,7 @@ class Filter(): if not type_ <= dom: raise errors.ConsistencyError(f'expected type {dom}, found {type_}') # node.count is a numerical expression - self._parse_filter_expression(self.schema.literal(ns.bsfs.Number), node.count) + self._parse_filter_expression(self.schema.literal(ns.bsl.Number), node.count) def _distance(self, type_: bsc.Vertex, node: ast.filter.Distance): # type is a Literal @@ -218,7 +218,7 @@ class Filter(): if type_ not in self.schema.literals(): raise errors.ConsistencyError(f'literal {type_} is not in the schema') # type must be a numerical - if not type_ <= self.schema.literal(ns.bsfs.Number): + if not type_ <= self.schema.literal(ns.bsl.Number): raise errors.ConsistencyError(f'expected a number type, found {type_}') # FIXME: Check if node.value corresponds to type_ diff --git a/bsfs/schema/serialize.py b/bsfs/schema/serialize.py index b05b289..ea8b2f4 100644 --- a/bsfs/schema/serialize.py +++ b/bsfs/schema/serialize.py @@ -241,13 +241,14 @@ def to_string(schema_inst: schema.Schema, fmt: str = 'turtle') -> str: graph.add(triple) # add known namespaces for readability # FIXME: more generically? - graph.bind('bse', rdflib.URIRef(ns.bse[''])) - graph.bind('bsfs', rdflib.URIRef(ns.bsfs[''])) - graph.bind('bsm', rdflib.URIRef(ns.bsm[''])) - graph.bind('rdf', rdflib.URIRef(ns.rdf[''])) - graph.bind('rdfs', rdflib.URIRef(ns.rdfs[''])) - graph.bind('schema', rdflib.URIRef(ns.schema[''])) - graph.bind('xsd', rdflib.URIRef(ns.xsd[''])) + graph.bind('bsfs', rdflib.URIRef(ns.bsfs + '/')) + graph.bind('bsl', rdflib.URIRef(ns.bsl + '/')) + graph.bind('bsn', rdflib.URIRef(ns.bsn + '#')) + graph.bind('bse', rdflib.URIRef(ns.bsfs.Entity() + '#')) + graph.bind('rdf', rdflib.URIRef(ns.rdf)) + graph.bind('rdfs', rdflib.URIRef(ns.rdfs)) + graph.bind('schema', rdflib.URIRef(ns.schema)) + graph.bind('xsd', rdflib.URIRef(ns.xsd)) # serialize to turtle return graph.serialize(format=fmt) diff --git a/bsfs/schema/types.py b/bsfs/schema/types.py index 104580d..5834df8 100644 --- a/bsfs/schema/types.py +++ b/bsfs/schema/types.py @@ -376,31 +376,31 @@ ROOT_LITERAL = Literal( ) ROOT_BLOB = Literal( - uri=ns.bsfs.BinaryBlob, + uri=ns.bsl.BinaryBlob, parent=ROOT_LITERAL, ) ROOT_NUMBER = Literal( - uri=ns.bsfs.Number, + uri=ns.bsl.Number, parent=ROOT_LITERAL, ) ROOT_TIME = Literal( - uri=ns.bsfs.Time, + uri=ns.bsl.Time, parent=ROOT_LITERAL, ) ROOT_ARRAY = Literal( - uri=ns.bsfs.Array, + uri=ns.bsl.Array, parent=ROOT_LITERAL, ) ROOT_FEATURE = Feature( - uri=ns.bsfs.Feature, + uri=ns.bsl.Array.Feature, parent=ROOT_ARRAY, dimension=1, - dtype=ns.bsfs.f16, - distance=ns.bsfs.euclidean, + dtype=ns.bsfs.dtype().f16, + distance=ns.bsd.euclidean, ) # essential predicates diff --git a/bsfs/triple_store/sparql/distance.py b/bsfs/triple_store/sparql/distance.py index 9b58088..2c2f355 100644 --- a/bsfs/triple_store/sparql/distance.py +++ b/bsfs/triple_store/sparql/distance.py @@ -43,9 +43,9 @@ def manhatten(fst, snd) -> float: # Known distance functions. DISTANCE_FU = { - ns.bsfs.euclidean: euclid, - ns.bsfs.cosine: cosine, - ns.bsfs.manhatten: manhatten, + ns.bsd.euclidean: euclid, + ns.bsd.cosine: cosine, + ns.bsd.manhatten: manhatten, } ## EOF ## diff --git a/bsfs/triple_store/sparql/sparql.py b/bsfs/triple_store/sparql/sparql.py index 68c0027..99e67d6 100644 --- a/bsfs/triple_store/sparql/sparql.py +++ b/bsfs/triple_store/sparql/sparql.py @@ -28,7 +28,7 @@ __all__: typing.Sequence[str] = ( ## code ## -rdflib.term.bind(ns.bsfs.BinaryBlob, bytes, constructor=base64.b64decode) +rdflib.term.bind(ns.bsl.BinaryBlob, bytes, constructor=base64.b64decode) class _Transaction(): """Lightweight rdflib transactions for in-memory databases.""" @@ -335,8 +335,8 @@ class SparqlStore(base.TripleStoreBase): # convert value if isinstance(predicate.range, bsc.Literal): dtype = rdflib.URIRef(predicate.range.uri) - if predicate.range <= self.schema.literal(ns.bsfs.BinaryBlob): - dtype = rdflib.URIRef(ns.bsfs.BinaryBlob) + if predicate.range <= self.schema.literal(ns.bsl.BinaryBlob): + dtype = rdflib.URIRef(ns.bsl.BinaryBlob) value = base64.b64encode(value) value = rdflib.Literal(value, datatype=dtype) elif isinstance(predicate.range, bsc.Node): diff --git a/test/apps/schema-1.nt b/test/apps/schema-1.nt index e57146d..4daf0ad 100644 --- a/test/apps/schema-1.nt +++ b/test/apps/schema-1.nt @@ -3,8 +3,8 @@ prefix rdfs: prefix xsd: # common bsfs prefixes -prefix bsfs: -prefix bse: +prefix bsfs: +prefix bse: # essential nodes bsfs:Entity rdfs:subClassOf bsfs:Node . diff --git a/test/apps/schema-2.nt b/test/apps/schema-2.nt index 4c5468f..4eb2467 100644 --- a/test/apps/schema-2.nt +++ b/test/apps/schema-2.nt @@ -3,8 +3,8 @@ prefix rdfs: prefix xsd: # common bsfs prefixes -prefix bsfs: -prefix bse: +prefix bsfs: +prefix bse: # essential nodes bsfs:Entity rdfs:subClassOf bsfs:Node . diff --git a/test/graph/ac/test_null.py b/test/graph/ac/test_null.py index b695e7e..142bc23 100644 --- a/test/graph/ac/test_null.py +++ b/test/graph/ac/test_null.py @@ -15,6 +15,8 @@ from bsfs.graph.ac.null import NullAC ## code ## +ns.bse = ns.bsfs.Entity() + class TestNullAC(unittest.TestCase): def setUp(self): self.backend = SparqlStore() @@ -22,18 +24,19 @@ class TestNullAC(unittest.TestCase): prefix rdfs: prefix xsd: - prefix bsfs: - prefix bsm: - prefix bse: + prefix bsfs: + prefix bsl: + prefix bsn: + prefix bse: bsfs:Entity rdfs:subClassOf bsfs:Node . bsfs:Tag rdfs:subClassOf bsfs:Node . xsd:string rdfs:subClassOf bsfs:Literal . - bsfs:Number rdfs:subClassOf bsfs:Literal . - xsd:integer rdfs:subClassOf bsfs:Number . + bsl:Number rdfs:subClassOf bsfs:Literal . + xsd:integer rdfs:subClassOf bsl:Number . # predicates mandated by Nodes - bsm:t_created rdfs:subClassOf bsfs:Predicate ; + bsn:t_created rdfs:subClassOf bsfs:Predicate ; rdfs:domain bsfs:Node ; rdfs:range xsd:integer ; bsfs:unique "true"^^xsd:boolean . @@ -59,7 +62,7 @@ class TestNullAC(unittest.TestCase): self.p_author = self.backend.schema.predicate(ns.bse.author) self.p_filesize = self.backend.schema.predicate(ns.bse.filesize) self.p_tag = self.backend.schema.predicate(ns.bse.tag) - self.p_created = self.backend.schema.predicate(ns.bsm.t_created) + self.p_created = self.backend.schema.predicate(ns.bsn.t_created) self.ent_type = self.backend.schema.node(ns.bsfs.Entity) self.ent_ids = {URI('http://www.example.com/me/entity#1234'), URI('http://www.example.com/me/entity#4321')} diff --git a/test/graph/test_graph.py b/test/graph/test_graph.py index e6d5ae4..167168d 100644 --- a/test/graph/test_graph.py +++ b/test/graph/test_graph.py @@ -19,12 +19,14 @@ from bsfs.graph.graph import Graph ## code ## +ns.bse = ns.bsfs.Entity() + class TestGraph(unittest.TestCase): def setUp(self): self.backend = SparqlStore.Open() self.backend.schema = schema.from_string(''' prefix rdfs: - prefix bsfs: + prefix bsfs: bsfs:Entity rdfs:subClassOf bsfs:Node . ''') self.user = URI('http://example.com/me') @@ -129,12 +131,13 @@ class TestGraph(unittest.TestCase): target_1 = schema.from_string(''' prefix rdfs: prefix xsd: - prefix bsfs: - prefix bse: + prefix bsfs: + prefix bse: + prefix bsl: bsfs:Entity rdfs:subClassOf bsfs:Node . xsd:string rdfs:subClassOf bsfs:Literal . - bsfs:Number rdfs:subClassOf bsfs:Literal . - xsd:integer rdfs:subClassOf bsfs:Number . + bsl:Number rdfs:subClassOf bsfs:Literal . + xsd:integer rdfs:subClassOf bsl:Number . bse:filename rdfs:subClassOf bsfs:Predicate ; rdfs:domain bsfs:Entity ; @@ -154,11 +157,12 @@ class TestGraph(unittest.TestCase): self.assertEqual(graph.schema, target_1 + schema.from_string(''' prefix rdfs: prefix xsd: - prefix bsfs: - prefix bsm: - bsfs:Number rdfs:subClassOf bsfs:Literal . - xsd:float rdfs:subClassOf bsfs:Number . - bsm:t_created rdfs:subClassOf bsfs:Predicate ; + prefix bsfs: + prefix bsn: + prefix bsl: + bsl:Number rdfs:subClassOf bsfs:Literal . + xsd:float rdfs:subClassOf bsl:Number . + bsn:t_created rdfs:subClassOf bsfs:Predicate ; rdfs:domain bsfs:Node ; rdfs:range xsd:float ; bsfs:unique "true"^^xsd:boolean . @@ -168,12 +172,13 @@ class TestGraph(unittest.TestCase): target_2 = schema.from_string(''' prefix rdfs: prefix xsd: - prefix bsfs: - prefix bse: + prefix bsfs: + prefix bse: + prefix bsl: bsfs:Entity rdfs:subClassOf bsfs:Node . xsd:string rdfs:subClassOf bsfs:Literal . - bsfs:Number rdfs:subClassOf bsfs:Literal . - xsd:integer rdfs:subClassOf bsfs:Number . + bsl:Number rdfs:subClassOf bsfs:Literal . + xsd:integer rdfs:subClassOf bsl:Number . bse:filename rdfs:subClassOf bsfs:Predicate ; rdfs:domain bsfs:Entity ; @@ -195,11 +200,12 @@ class TestGraph(unittest.TestCase): self.assertEqual(graph.schema, target_2 + schema.from_string(''' prefix rdfs: prefix xsd: - prefix bsfs: - prefix bsm: - bsfs:Number rdfs:subClassOf bsfs:Literal . - xsd:float rdfs:subClassOf bsfs:Number . - bsm:t_created rdfs:subClassOf bsfs:Predicate ; + prefix bsfs: + prefix bsn: + prefix bsl: + bsl:Number rdfs:subClassOf bsfs:Literal . + xsd:float rdfs:subClassOf bsl:Number . + bsn:t_created rdfs:subClassOf bsfs:Predicate ; rdfs:domain bsfs:Node ; rdfs:range xsd:float ; bsfs:unique "true"^^xsd:boolean . @@ -211,8 +217,8 @@ class TestGraph(unittest.TestCase): graph.migrate(schema.from_string(''' prefix rdfs: prefix xsd: - prefix bsfs: - prefix bse: + prefix bsfs: + prefix bse: bsfs:Entity rdfs:subClassOf bsfs:Node . bsfs:Tag rdfs:subClassOf bsfs:Node . @@ -264,8 +270,8 @@ class TestGraph(unittest.TestCase): graph.migrate(schema.from_string(''' prefix rdfs: prefix xsd: - prefix bsfs: - prefix bse: + prefix bsfs: + prefix bse: bsfs:Entity rdfs:subClassOf bsfs:Node . bsfs:Tag rdfs:subClassOf bsfs:Node . diff --git a/test/graph/test_nodes.py b/test/graph/test_nodes.py index dca887a..afe7522 100644 --- a/test/graph/test_nodes.py +++ b/test/graph/test_nodes.py @@ -21,7 +21,8 @@ from bsfs.graph.nodes import Nodes ## code ## -bst = Namespace('http://bsfs.ai/schema/Tag') +ns.bse = ns.bsfs.Entity() +ns.bst = ns.bsfs.Tag() class TestNodes(unittest.TestCase): def setUp(self): @@ -31,20 +32,21 @@ class TestNodes(unittest.TestCase): prefix rdfs: prefix xsd: - prefix bsfs: - prefix bsm: - prefix bse: - prefix bst: + prefix bsfs: + prefix bsl: + prefix bsn: + prefix bse: + prefix bst: bsfs:Entity rdfs:subClassOf bsfs:Node . bsfs:Tag rdfs:subClassOf bsfs:Node . bsfs:User rdfs:subClassOf bsfs:Node . xsd:string rdfs:subClassOf bsfs:Literal . - bsfs:Number rdfs:subClassOf bsfs:Literal . - xsd:integer rdfs:subClassOf bsfs:Number . + bsl:Number rdfs:subClassOf bsfs:Literal . + xsd:integer rdfs:subClassOf bsl:Number . # predicates mandated by Nodes - bsm:t_created rdfs:subClassOf bsfs:Predicate ; + bsn:t_created rdfs:subClassOf bsfs:Predicate ; rdfs:domain bsfs:Node ; rdfs:range xsd:integer ; bsfs:unique "true"^^xsd:boolean . @@ -87,19 +89,19 @@ class TestNodes(unittest.TestCase): (rdflib.URIRef(ns.bsfs.Tag), rdflib.RDFS.subClassOf, rdflib.URIRef(ns.bsfs.Node)), (rdflib.URIRef(ns.bsfs.User), rdflib.RDFS.subClassOf, rdflib.URIRef(ns.bsfs.Node)), (rdflib.URIRef(ns.xsd.string), rdflib.RDFS.subClassOf, rdflib.URIRef(ns.bsfs.Literal)), - (rdflib.URIRef(ns.bsfs.Array), rdflib.RDFS.subClassOf, rdflib.URIRef(ns.bsfs.Literal)), - (rdflib.URIRef(ns.bsfs.BinaryBlob), rdflib.RDFS.subClassOf, rdflib.URIRef(ns.bsfs.Literal)), - (rdflib.URIRef(ns.bsfs.Feature), rdflib.RDFS.subClassOf, rdflib.URIRef(ns.bsfs.Array)), - (rdflib.URIRef(ns.bsfs.Number), rdflib.RDFS.subClassOf, rdflib.URIRef(ns.bsfs.Literal)), - (rdflib.URIRef(ns.bsfs.Time), rdflib.RDFS.subClassOf, rdflib.URIRef(ns.bsfs.Literal)), - (rdflib.URIRef(ns.xsd.integer), rdflib.RDFS.subClassOf, rdflib.URIRef(ns.bsfs.Number)), - (rdflib.URIRef(ns.bsm.t_created), rdflib.RDFS.subClassOf, rdflib.URIRef(ns.bsfs.Predicate)), + (rdflib.URIRef(ns.bsl.Array), rdflib.RDFS.subClassOf, rdflib.URIRef(ns.bsfs.Literal)), + (rdflib.URIRef(ns.bsl.BinaryBlob), rdflib.RDFS.subClassOf, rdflib.URIRef(ns.bsfs.Literal)), + (rdflib.URIRef(ns.bsl.Array.Feature), rdflib.RDFS.subClassOf, rdflib.URIRef(ns.bsl.Array)), + (rdflib.URIRef(ns.bsl.Number), rdflib.RDFS.subClassOf, rdflib.URIRef(ns.bsfs.Literal)), + (rdflib.URIRef(ns.bsl.Time), rdflib.RDFS.subClassOf, rdflib.URIRef(ns.bsfs.Literal)), + (rdflib.URIRef(ns.xsd.integer), rdflib.RDFS.subClassOf, rdflib.URIRef(ns.bsl.Number)), + (rdflib.URIRef(ns.bsn.t_created), rdflib.RDFS.subClassOf, rdflib.URIRef(ns.bsfs.Predicate)), (rdflib.URIRef(ns.bse.comment), rdflib.RDFS.subClassOf, rdflib.URIRef(ns.bsfs.Predicate)), (rdflib.URIRef(ns.bse.filesize), rdflib.RDFS.subClassOf, rdflib.URIRef(ns.bsfs.Predicate)), (rdflib.URIRef(ns.bse.tag), rdflib.RDFS.subClassOf, rdflib.URIRef(ns.bsfs.Predicate)), (rdflib.URIRef(ns.bse.author), rdflib.RDFS.subClassOf, rdflib.URIRef(ns.bsfs.Predicate)), - (rdflib.URIRef(bst.representative), rdflib.RDFS.subClassOf, rdflib.URIRef(ns.bsfs.Predicate)), - (rdflib.URIRef(bst.label), rdflib.RDFS.subClassOf, rdflib.URIRef(ns.bsfs.Predicate)), + (rdflib.URIRef(ns.bst.representative), rdflib.RDFS.subClassOf, rdflib.URIRef(ns.bsfs.Predicate)), + (rdflib.URIRef(ns.bst.label), rdflib.RDFS.subClassOf, rdflib.URIRef(ns.bsfs.Predicate)), } # Nodes constructor args self.user = URI('http://example.com/me') @@ -111,9 +113,9 @@ class TestNodes(unittest.TestCase): self.p_filesize = self.backend.schema.predicate(ns.bse.filesize) self.p_author = self.backend.schema.predicate(ns.bse.author) self.p_tag = self.backend.schema.predicate(ns.bse.tag) - self.p_representative = self.backend.schema.predicate(bst.representative) - self.p_label = self.backend.schema.predicate(bst.label) - self.t_created = self.backend.schema.predicate(ns.bsm.t_created) + self.p_representative = self.backend.schema.predicate(ns.bst.representative) + self.p_label = self.backend.schema.predicate(ns.bst.label) + self.t_created = self.backend.schema.predicate(ns.bsn.t_created) self.ent_ids = { URI('http://example.com/me/entity#1234'), URI('http://example.com/me/entity#4321'), @@ -144,11 +146,11 @@ class TestNodes(unittest.TestCase): class Foo(SparqlStore): pass backend = Foo.Open() backend.schema = self.backend.schema - nodes = Nodes(backend, self.ac, self.ent_type, self.ent_ids) - self.assertEqual(repr(nodes), f'Nodes({backend}, {self.ac}, {self.ent_type}, {self.ent_ids})') + nodes = Nodes(backend, self.ac, self.ent_type, {URI('http://example.com/me/entity#1234')}) + self.assertEqual(repr(nodes), f"Nodes({backend}, {self.ac}, {self.ent_type}, {{'http://example.com/me/entity#1234'}})") # repr respects user - nodes = Nodes(self.backend, NullAC(self.backend, URI('http://example.com/you')), self.ent_type, self.ent_ids) - self.assertEqual(repr(nodes), f'Nodes({self.backend}, NullAC(http://example.com/you), {self.ent_type}, {self.ent_ids})') + nodes = Nodes(self.backend, NullAC(self.backend, URI('http://example.com/you')), self.ent_type, {URI('http://example.com/me/entity#1234')}) + self.assertEqual(repr(nodes), f"Nodes({self.backend}, NullAC(http://example.com/you), {self.ent_type}, {{'http://example.com/me/entity#1234'}})") def test_equality(self): nodes = Nodes(self.backend, self.ac, self.ent_type, self.ent_ids) @@ -197,8 +199,8 @@ class TestNodes(unittest.TestCase): # check triples self.assertSetEqual(set(self.backend._graph), self.schema_triples | { # entity definitions - (rdflib.URIRef('http://example.com/me/entity#1234'), rdflib.RDF.type, rdflib.URIRef('http://bsfs.ai/schema/Entity')), - (rdflib.URIRef('http://example.com/me/entity#4321'), rdflib.RDF.type, rdflib.URIRef('http://bsfs.ai/schema/Entity')), + (rdflib.URIRef('http://example.com/me/entity#1234'), rdflib.RDF.type, rdflib.URIRef('https://schema.bsfs.io/core/Entity')), + (rdflib.URIRef('http://example.com/me/entity#4321'), rdflib.RDF.type, rdflib.URIRef('https://schema.bsfs.io/core/Entity')), # bookkeeping (rdflib.URIRef('http://example.com/me/entity#1234'), rdflib.URIRef(self.t_created.uri), rdflib.Literal(t_ent_created, datatype=rdflib.XSD.integer)), (rdflib.URIRef('http://example.com/me/entity#4321'), rdflib.URIRef(self.t_created.uri), rdflib.Literal(t_ent_created, datatype=rdflib.XSD.integer)), @@ -208,8 +210,8 @@ class TestNodes(unittest.TestCase): self.assertSetEqual(self.ent_ids, nodes._ensure_nodes(self.ent_type, self.ent_ids)) self.assertSetEqual(set(self.backend._graph), self.schema_triples | { # entity definitions - (rdflib.URIRef('http://example.com/me/entity#1234'), rdflib.RDF.type, rdflib.URIRef('http://bsfs.ai/schema/Entity')), - (rdflib.URIRef('http://example.com/me/entity#4321'), rdflib.RDF.type, rdflib.URIRef('http://bsfs.ai/schema/Entity')), + (rdflib.URIRef('http://example.com/me/entity#1234'), rdflib.RDF.type, rdflib.URIRef('https://schema.bsfs.io/core/Entity')), + (rdflib.URIRef('http://example.com/me/entity#4321'), rdflib.RDF.type, rdflib.URIRef('https://schema.bsfs.io/core/Entity')), # bookkeeping (rdflib.URIRef('http://example.com/me/entity#1234'), rdflib.URIRef(self.t_created.uri), rdflib.Literal(t_ent_created, datatype=rdflib.XSD.integer)), (rdflib.URIRef('http://example.com/me/entity#4321'), rdflib.URIRef(self.t_created.uri), rdflib.Literal(t_ent_created, datatype=rdflib.XSD.integer)), @@ -223,13 +225,13 @@ class TestNodes(unittest.TestCase): # check triples self.assertSetEqual(set(self.backend._graph), self.schema_triples | { # previous triples - (rdflib.URIRef('http://example.com/me/entity#1234'), rdflib.RDF.type, rdflib.URIRef('http://bsfs.ai/schema/Entity')), - (rdflib.URIRef('http://example.com/me/entity#4321'), rdflib.RDF.type, rdflib.URIRef('http://bsfs.ai/schema/Entity')), + (rdflib.URIRef('http://example.com/me/entity#1234'), rdflib.RDF.type, rdflib.URIRef('https://schema.bsfs.io/core/Entity')), + (rdflib.URIRef('http://example.com/me/entity#4321'), rdflib.RDF.type, rdflib.URIRef('https://schema.bsfs.io/core/Entity')), (rdflib.URIRef('http://example.com/me/entity#1234'), rdflib.URIRef(self.t_created.uri), rdflib.Literal(t_ent_created, datatype=rdflib.XSD.integer)), (rdflib.URIRef('http://example.com/me/entity#4321'), rdflib.URIRef(self.t_created.uri), rdflib.Literal(t_ent_created, datatype=rdflib.XSD.integer)), # new triples - (rdflib.URIRef('http://example.com/me/tag#1234'), rdflib.RDF.type, rdflib.URIRef('http://bsfs.ai/schema/Tag')), - (rdflib.URIRef('http://example.com/me/tag#4321'), rdflib.RDF.type, rdflib.URIRef('http://bsfs.ai/schema/Tag')), + (rdflib.URIRef('http://example.com/me/tag#1234'), rdflib.RDF.type, rdflib.URIRef('https://schema.bsfs.io/core/Tag')), + (rdflib.URIRef('http://example.com/me/tag#4321'), rdflib.RDF.type, rdflib.URIRef('https://schema.bsfs.io/core/Tag')), (rdflib.URIRef('http://example.com/me/tag#1234'), rdflib.URIRef(self.t_created.uri), rdflib.Literal(t_tag_created, datatype=rdflib.XSD.integer)), (rdflib.URIRef('http://example.com/me/tag#4321'), rdflib.URIRef(self.t_created.uri), rdflib.Literal(t_tag_created, datatype=rdflib.XSD.integer)), }) @@ -254,8 +256,8 @@ class TestNodes(unittest.TestCase): # verify triples self.assertSetEqual(set(self.backend._graph), self.schema_triples | { # entity definitions - (rdflib.URIRef('http://example.com/me/entity#1234'), rdflib.RDF.type, rdflib.URIRef('http://bsfs.ai/schema/Entity')), - (rdflib.URIRef('http://example.com/me/entity#4321'), rdflib.RDF.type, rdflib.URIRef('http://bsfs.ai/schema/Entity')), + (rdflib.URIRef('http://example.com/me/entity#1234'), rdflib.RDF.type, rdflib.URIRef('https://schema.bsfs.io/core/Entity')), + (rdflib.URIRef('http://example.com/me/entity#4321'), rdflib.RDF.type, rdflib.URIRef('https://schema.bsfs.io/core/Entity')), # bookkeeping (rdflib.URIRef('http://example.com/me/entity#1234'), rdflib.URIRef(self.t_created.uri), rdflib.Literal(t_ent_created, datatype=rdflib.XSD.integer)), (rdflib.URIRef('http://example.com/me/entity#4321'), rdflib.URIRef(self.t_created.uri), rdflib.Literal(t_ent_created, datatype=rdflib.XSD.integer)), @@ -273,15 +275,15 @@ class TestNodes(unittest.TestCase): # verify triples self.assertSetEqual(set(self.backend._graph), self.schema_triples | { # previous values - (rdflib.URIRef('http://example.com/me/entity#1234'), rdflib.RDF.type, rdflib.URIRef('http://bsfs.ai/schema/Entity')), - (rdflib.URIRef('http://example.com/me/entity#4321'), rdflib.RDF.type, rdflib.URIRef('http://bsfs.ai/schema/Entity')), + (rdflib.URIRef('http://example.com/me/entity#1234'), rdflib.RDF.type, rdflib.URIRef('https://schema.bsfs.io/core/Entity')), + (rdflib.URIRef('http://example.com/me/entity#4321'), rdflib.RDF.type, rdflib.URIRef('https://schema.bsfs.io/core/Entity')), (rdflib.URIRef('http://example.com/me/entity#1234'), rdflib.URIRef(self.p_filesize.uri), rdflib.Literal(1234, datatype=rdflib.XSD.integer)), (rdflib.URIRef('http://example.com/me/entity#4321'), rdflib.URIRef(self.p_filesize.uri), rdflib.Literal(1234, datatype=rdflib.XSD.integer)), (rdflib.URIRef('http://example.com/me/entity#1234'), rdflib.URIRef(self.t_created.uri), rdflib.Literal(t_ent_created, datatype=rdflib.XSD.integer)), (rdflib.URIRef('http://example.com/me/entity#4321'), rdflib.URIRef(self.t_created.uri), rdflib.Literal(t_ent_created, datatype=rdflib.XSD.integer)), # tag definitions - (rdflib.URIRef('http://example.com/me/tag#1234'), rdflib.RDF.type, rdflib.URIRef('http://bsfs.ai/schema/Tag')), - (rdflib.URIRef('http://example.com/me/tag#4321'), rdflib.RDF.type, rdflib.URIRef('http://bsfs.ai/schema/Tag')), + (rdflib.URIRef('http://example.com/me/tag#1234'), rdflib.RDF.type, rdflib.URIRef('https://schema.bsfs.io/core/Tag')), + (rdflib.URIRef('http://example.com/me/tag#4321'), rdflib.RDF.type, rdflib.URIRef('https://schema.bsfs.io/core/Tag')), # tag bookkeeping (rdflib.URIRef('http://example.com/me/tag#1234'), rdflib.URIRef(self.t_created.uri), rdflib.Literal(t_tag_created, datatype=rdflib.XSD.integer)), (rdflib.URIRef('http://example.com/me/tag#4321'), rdflib.URIRef(self.t_created.uri), rdflib.Literal(t_tag_created, datatype=rdflib.XSD.integer)), @@ -306,8 +308,8 @@ class TestNodes(unittest.TestCase): self.assertEqual(nodes, nodes.set(self.p_filesize.uri, 1234)) self.assertTrue(set(self.backend._graph).issuperset({ # nodes exist - (rdflib.URIRef('http://example.com/me/entity#1234'), rdflib.RDF.type, rdflib.URIRef('http://bsfs.ai/schema/Entity')), - (rdflib.URIRef('http://example.com/me/entity#4321'), rdflib.RDF.type, rdflib.URIRef('http://bsfs.ai/schema/Entity')), + (rdflib.URIRef('http://example.com/me/entity#1234'), rdflib.RDF.type, rdflib.URIRef('https://schema.bsfs.io/core/Entity')), + (rdflib.URIRef('http://example.com/me/entity#4321'), rdflib.RDF.type, rdflib.URIRef('https://schema.bsfs.io/core/Entity')), # links exist (rdflib.URIRef('http://example.com/me/entity#1234'), rdflib.URIRef(self.p_filesize.uri), rdflib.Literal(1234, datatype=rdflib.XSD.integer)), (rdflib.URIRef('http://example.com/me/entity#4321'), rdflib.URIRef(self.p_filesize.uri), rdflib.Literal(1234, datatype=rdflib.XSD.integer)), @@ -316,10 +318,10 @@ class TestNodes(unittest.TestCase): self.assertEqual(nodes, nodes.set(self.p_tag.uri, Nodes(self.backend, self.ac, self.tag_type, self.tag_ids))) self.assertTrue(set(self.backend._graph).issuperset({ # nodes exist - (rdflib.URIRef('http://example.com/me/entity#1234'), rdflib.RDF.type, rdflib.URIRef('http://bsfs.ai/schema/Entity')), - (rdflib.URIRef('http://example.com/me/entity#4321'), rdflib.RDF.type, rdflib.URIRef('http://bsfs.ai/schema/Entity')), - (rdflib.URIRef('http://example.com/me/tag#1234'), rdflib.RDF.type, rdflib.URIRef('http://bsfs.ai/schema/Tag')), - (rdflib.URIRef('http://example.com/me/tag#4321'), rdflib.RDF.type, rdflib.URIRef('http://bsfs.ai/schema/Tag')), + (rdflib.URIRef('http://example.com/me/entity#1234'), rdflib.RDF.type, rdflib.URIRef('https://schema.bsfs.io/core/Entity')), + (rdflib.URIRef('http://example.com/me/entity#4321'), rdflib.RDF.type, rdflib.URIRef('https://schema.bsfs.io/core/Entity')), + (rdflib.URIRef('http://example.com/me/tag#1234'), rdflib.RDF.type, rdflib.URIRef('https://schema.bsfs.io/core/Tag')), + (rdflib.URIRef('http://example.com/me/tag#4321'), rdflib.RDF.type, rdflib.URIRef('https://schema.bsfs.io/core/Tag')), # links exist (rdflib.URIRef('http://example.com/me/entity#1234'), rdflib.URIRef(self.p_tag.uri), rdflib.URIRef('http://example.com/me/tag#1234')), (rdflib.URIRef('http://example.com/me/entity#1234'), rdflib.URIRef(self.p_tag.uri), rdflib.URIRef('http://example.com/me/tag#4321')), @@ -360,10 +362,10 @@ class TestNodes(unittest.TestCase): }.items())) self.assertTrue(set(self.backend._graph).issuperset({ # nodes exist - (rdflib.URIRef('http://example.com/me/entity#1234'), rdflib.RDF.type, rdflib.URIRef('http://bsfs.ai/schema/Entity')), - (rdflib.URIRef('http://example.com/me/entity#4321'), rdflib.RDF.type, rdflib.URIRef('http://bsfs.ai/schema/Entity')), - (rdflib.URIRef('http://example.com/me/tag#1234'), rdflib.RDF.type, rdflib.URIRef('http://bsfs.ai/schema/Tag')), - (rdflib.URIRef('http://example.com/me/tag#4321'), rdflib.RDF.type, rdflib.URIRef('http://bsfs.ai/schema/Tag')), + (rdflib.URIRef('http://example.com/me/entity#1234'), rdflib.RDF.type, rdflib.URIRef('https://schema.bsfs.io/core/Entity')), + (rdflib.URIRef('http://example.com/me/entity#4321'), rdflib.RDF.type, rdflib.URIRef('https://schema.bsfs.io/core/Entity')), + (rdflib.URIRef('http://example.com/me/tag#1234'), rdflib.RDF.type, rdflib.URIRef('https://schema.bsfs.io/core/Tag')), + (rdflib.URIRef('http://example.com/me/tag#4321'), rdflib.RDF.type, rdflib.URIRef('https://schema.bsfs.io/core/Tag')), # links exist (rdflib.URIRef('http://example.com/me/entity#1234'), rdflib.URIRef(self.p_filesize.uri), rdflib.Literal(1234, datatype=rdflib.XSD.integer)), (rdflib.URIRef('http://example.com/me/entity#4321'), rdflib.URIRef(self.p_filesize.uri), rdflib.Literal(1234, datatype=rdflib.XSD.integer)), @@ -406,9 +408,9 @@ class TestNodes(unittest.TestCase): .set(ns.bse.filesize, 4321) \ .set(ns.bse.tag, Nodes(self.backend, self.ac, self.tag_type, {'http://example.com/me/tag#4321'})) Nodes(self.backend, self.ac, self.tag_type, {'http://example.com/me/tag#1234'}) \ - .set(bst.label, 'tag_label_1234') + .set(ns.bst.label, 'tag_label_1234') Nodes(self.backend, self.ac, self.tag_type, {'http://example.com/me/tag#4321'}) \ - .set(bst.label, 'tag_label_4321') + .set(ns.bst.label, 'tag_label_4321') # setup: get nodes instance nodes = Nodes(self.backend, self.ac, self.ent_type, self.ent_ids) @@ -424,18 +426,18 @@ class TestNodes(unittest.TestCase): Nodes(self.backend, self.ac, self.ent_type, {'http://example.com/me/entity#4321'}): 4321, }) # can pass path as sequence of URI - self.assertDictEqual(nodes.get((ns.bse.tag, bst.label)), { + self.assertDictEqual(nodes.get((ns.bse.tag, ns.bst.label)), { Nodes(self.backend, self.ac, self.ent_type, {'http://example.com/me/entity#1234'}): {'tag_label_1234'}, Nodes(self.backend, self.ac, self.ent_type, {'http://example.com/me/entity#4321'}): {'tag_label_4321'}, }) # get returns the same path that was passed - self.assertCountEqual(list(nodes.get((ns.bse.tag, bst.label), path=True, view=list)), [ - (Nodes(self.backend, self.ac, self.ent_type, {'http://example.com/me/entity#1234'}), (ns.bse.tag, bst.label), 'tag_label_1234'), - (Nodes(self.backend, self.ac, self.ent_type, {'http://example.com/me/entity#4321'}), (ns.bse.tag, bst.label), 'tag_label_4321'), + self.assertCountEqual(list(nodes.get((ns.bse.tag, ns.bst.label), path=True, view=list)), [ + (Nodes(self.backend, self.ac, self.ent_type, {'http://example.com/me/entity#1234'}), (ns.bse.tag, ns.bst.label), 'tag_label_1234'), + (Nodes(self.backend, self.ac, self.ent_type, {'http://example.com/me/entity#4321'}), (ns.bse.tag, ns.bst.label), 'tag_label_4321'), ]) - self.assertCountEqual(list(nodes.get([ns.bse.tag, bst.label], path=True, view=list)), [ - (Nodes(self.backend, self.ac, self.ent_type, {'http://example.com/me/entity#1234'}), [ns.bse.tag, bst.label], 'tag_label_1234'), - (Nodes(self.backend, self.ac, self.ent_type, {'http://example.com/me/entity#4321'}), [ns.bse.tag, bst.label], 'tag_label_4321'), + self.assertCountEqual(list(nodes.get([ns.bse.tag, ns.bst.label], path=True, view=list)), [ + (Nodes(self.backend, self.ac, self.ent_type, {'http://example.com/me/entity#1234'}), [ns.bse.tag, ns.bst.label], 'tag_label_1234'), + (Nodes(self.backend, self.ac, self.ent_type, {'http://example.com/me/entity#4321'}), [ns.bse.tag, ns.bst.label], 'tag_label_4321'), ]) # paths must be URI or sequence thereof self.assertRaises(TypeError, nodes.get, 1234) @@ -444,16 +446,16 @@ class TestNodes(unittest.TestCase): self.assertRaises(ValueError, nodes.get, 'hello world') self.assertRaises(errors.ConsistencyError, nodes.get, 'hello_world') self.assertRaises(errors.ConsistencyError, nodes.get, ns.bse.invalid) - self.assertRaises(errors.ConsistencyError, nodes.get, (ns.bse.tag, bst.invalid)) + self.assertRaises(errors.ConsistencyError, nodes.get, (ns.bse.tag, ns.bst.invalid)) # can pass multiple paths - self.assertDictEqual(nodes.get(ns.bse.filesize, (ns.bse.tag, bst.label)), { + self.assertDictEqual(nodes.get(ns.bse.filesize, (ns.bse.tag, ns.bst.label)), { Nodes(self.backend, self.ac, self.ent_type, {'http://example.com/me/entity#1234'}): { ns.bse.filesize: 1234, - (ns.bse.tag, bst.label): {'tag_label_1234'}, + (ns.bse.tag, ns.bst.label): {'tag_label_1234'}, }, Nodes(self.backend, self.ac, self.ent_type, {'http://example.com/me/entity#4321'}): { ns.bse.filesize: 4321, - (ns.bse.tag, bst.label): {'tag_label_4321'}, + (ns.bse.tag, ns.bst.label): {'tag_label_4321'}, }, }) # get respects view diff --git a/test/graph/test_resolve.py b/test/graph/test_resolve.py index accb565..e09b1cc 100644 --- a/test/graph/test_resolve.py +++ b/test/graph/test_resolve.py @@ -16,6 +16,8 @@ from bsfs.graph.resolve import Filter ## code ## +ns.bse = ns.bsfs.Entity() + class TestFilter(unittest.TestCase): """ @@ -30,18 +32,20 @@ class TestFilter(unittest.TestCase): prefix rdfs: prefix xsd: - prefix bsfs: - prefix bse: + prefix bsfs: + prefix bse: + prefix bsl: + prefix bsa: bsfs:Entity rdfs:subClassOf bsfs:Node . bsfs:Tag rdfs:subClassOf bsfs:Node . xsd:string rdfs:subClassOf bsfs:Literal . - bsfs:Number rdfs:subClassOf bsfs:Literal . - bsfs:Array rdfs:subClassOf bsfs:Literal . - bsfs:Feature rdfs:subClassOf bsfs:Array . - xsd:integer rdfs:subClassOf bsfs:Number . + bsl:Number rdfs:subClassOf bsfs:Literal . + bsl:Array rdfs:subClassOf bsfs:Literal . + bsa:Feature rdfs:subClassOf bsl:Array . + xsd:integer rdfs:subClassOf bsl:Number . - bsfs:Colors rdfs:subClassOf bsfs:Feature ; + bsfs:Colors rdfs:subClassOf bsa:Feature ; bsfs:dimension "5"^^xsd:integer . bse:colors rdfs:subClassOf bsfs:Predicate ; diff --git a/test/graph/test_result.py b/test/graph/test_result.py index 099234a..8960ef6 100644 --- a/test/graph/test_result.py +++ b/test/graph/test_result.py @@ -13,6 +13,8 @@ from bsfs.graph.result import to_list_view, to_dict_view ## code ## +ns.bse = ns.bsfs.Entity() + class TestListView(unittest.TestCase): def setUp(self): self.triples_111 = [('ent#1234', ns.bse.iso, 123)] diff --git a/test/graph/test_walk.py b/test/graph/test_walk.py index 346896b..4b844da 100644 --- a/test/graph/test_walk.py +++ b/test/graph/test_walk.py @@ -15,8 +15,8 @@ from bsfs.graph.walk import Walk ## code ## -bse = ns.bse -bst = Namespace('http://bsfs.ai/schema/Tag') +ns.bse = ns.bsfs.Entity() +ns.bst = ns.bsfs.Tag() class TestWalk(unittest.TestCase): def setUp(self): @@ -24,9 +24,9 @@ class TestWalk(unittest.TestCase): self.schema = bsc.from_string(''' prefix rdfs: prefix xsd: - prefix bsfs: - prefix bse: - prefix bst: + prefix bsfs: + prefix bse: + prefix bst: bsfs:Entity rdfs:subClassOf bsfs:Node . bsfs:Tag rdfs:subClassOf bsfs:Node . @@ -73,14 +73,14 @@ class TestWalk(unittest.TestCase): URI('http://example.com/me/tag#1234'), URI('http://example.com/me/tag#4321')}) # add some instances - self.ents.set(bse.tag, self.tags) - self.graph.node(ns.bsfs.Tag, URI('http://example.com/me/tag#1234')).set(bst.label, 'hello') - self.graph.node(ns.bsfs.Tag, URI('http://example.com/me/tag#4321')).set(bst.label, 'world') + self.ents.set(ns.bse.tag, self.tags) + self.graph.node(ns.bsfs.Tag, URI('http://example.com/me/tag#1234')).set(ns.bst.label, 'hello') + self.graph.node(ns.bsfs.Tag, URI('http://example.com/me/tag#4321')).set(ns.bst.label, 'world') def test_essentials(self): # __eq__, __hash__, __str__, __repr__ - p_author = self.schema.predicate(bse.author) - p_tag = self.schema.predicate(bse.tag) - p_main = self.schema.predicate(bst.main) + p_author = self.schema.predicate(ns.bse.author) + p_tag = self.schema.predicate(ns.bse.tag) + p_main = self.schema.predicate(ns.bst.main) # comparison self.assertEqual(Walk(self.ents, [p_tag]), Walk(self.ents, [p_tag])) self.assertEqual(hash(Walk(self.ents, [p_tag])), hash(Walk(self.ents, [p_tag]))) @@ -96,18 +96,18 @@ class TestWalk(unittest.TestCase): self.assertNotEqual(hash(Walk(self.tags, [p_author])), hash(Walk(self.tags, [p_main]))) # string conversion self.assertEqual(str(Walk(self.ents, [p_tag, p_main])), - 'Walk(@http://bsfs.ai/schema/Entity: http://bsfs.ai/schema/Entity#tag, http://bsfs.ai/schema/Tag#main)') + 'Walk(@https://schema.bsfs.io/core/Entity: https://schema.bsfs.io/core/Entity#tag, https://schema.bsfs.io/core/Tag#main)') self.assertEqual(repr(Walk(self.ents, [p_tag, p_main])), - 'Walk(http://bsfs.ai/schema/Entity, (http://bsfs.ai/schema/Entity#tag, http://bsfs.ai/schema/Tag#main))') + 'Walk(https://schema.bsfs.io/core/Entity, (https://schema.bsfs.io/core/Entity#tag, https://schema.bsfs.io/core/Tag#main))') def test_tail(self): self.assertEqual(Walk(self.ents, ( - self.schema.predicate(bse.tag), + self.schema.predicate(ns.bse.tag), )).tail, self.schema.node(ns.bsfs.Tag)) self.assertEqual(Walk(self.ents, ( - self.schema.predicate(bse.tag), - self.schema.predicate(bst.main), + self.schema.predicate(ns.bse.tag), + self.schema.predicate(ns.bst.main), )).tail, self.schema.node(ns.bsfs.Entity)) @@ -115,24 +115,24 @@ class TestWalk(unittest.TestCase): tag_type = self.schema.node(ns.bsfs.Tag) # step returns a predicate self.assertEqual(Walk.step(self.schema, tag_type, 'subTagOf'), - (self.schema.predicate(bst.subTagOf), )) + (self.schema.predicate(ns.bst.subTagOf), )) # invalid step raises an error self.assertRaises(ValueError, Walk.step, self.schema, tag_type, 'foobar') # ambiguous step raises an error self.assertRaises(ValueError, Walk.step, self.schema, tag_type, 'author') def test_getattr(self): # __getattr__ - walk = Walk(self.ents, (self.schema.predicate(bse.tag), )) + walk = Walk(self.ents, (self.schema.predicate(ns.bse.tag), )) # first step self.assertEqual(walk.subTagOf, Walk(self.ents, ( - self.schema.predicate(bse.tag), - self.schema.predicate(bst.subTagOf), + self.schema.predicate(ns.bse.tag), + self.schema.predicate(ns.bst.subTagOf), ))) # second step self.assertEqual(walk.subTagOf.main, Walk(self.ents, ( - self.schema.predicate(bse.tag), - self.schema.predicate(bst.subTagOf), - self.schema.predicate(bst.main), + self.schema.predicate(ns.bse.tag), + self.schema.predicate(ns.bst.subTagOf), + self.schema.predicate(ns.bst.main), ))) # invalid step raises an error self.assertRaises(ValueError, getattr, walk, 'foobar') @@ -140,7 +140,7 @@ class TestWalk(unittest.TestCase): self.assertRaises(ValueError, getattr, walk, 'author') def test_get(self): # get, __call__ - walk = Walk(self.ents, (self.schema.predicate(bse.tag), )) + walk = Walk(self.ents, (self.schema.predicate(ns.bse.tag), )) tags = { self.graph.node(ns.bsfs.Tag, URI('http://example.com/me/tag#1234')), self.graph.node(ns.bsfs.Tag, URI('http://example.com/me/tag#4321'))} diff --git a/test/namespace/test_namespace.py b/test/namespace/test_namespace.py index ec2f393..f7bf02a 100644 --- a/test/namespace/test_namespace.py +++ b/test/namespace/test_namespace.py @@ -7,7 +7,7 @@ import unittest from bsfs.utils import URI # objects to test -from bsfs.namespace.namespace import Namespace, ClosedNamespace +from bsfs.namespace.namespace import Namespace, FinalNamespace ## code ## @@ -15,108 +15,48 @@ from bsfs.namespace.namespace import Namespace, ClosedNamespace class TestNamespace(unittest.TestCase): def test_essentials(self): # string conversion - self.assertEqual(str(Namespace('http://example.org/')), 'http://example.org') - self.assertEqual(str(Namespace('http://example.org#')), 'http://example.org') - self.assertEqual(repr(Namespace('http://example.org/')), 'Namespace(http://example.org, #, /)') - self.assertEqual(repr(Namespace('http://example.org#')), 'Namespace(http://example.org, #, /)') - self.assertEqual(repr(Namespace('http://example.org', fsep='.')), 'Namespace(http://example.org, ., /)') - self.assertEqual(repr(Namespace('http://example.org', psep='.')), 'Namespace(http://example.org, #, .)') - # repeated separators are truncated - self.assertEqual(str(Namespace('http://example.org////')), 'http://example.org') - self.assertEqual(str(Namespace('http://example.org####')), 'http://example.org') - self.assertEqual(repr(Namespace('http://example.org///##')), 'Namespace(http://example.org, #, /)') + self.assertEqual(str(Namespace('http://example.org')), 'http://example.org') + self.assertEqual(repr(Namespace('http://example.org')), "'http://example.org'") # comparison - class Foo(Namespace): pass - self.assertEqual(Namespace('http://example.org/'), Namespace('http://example.org/')) - self.assertEqual(Namespace('http://example.org/'), Namespace('http://example.org')) - self.assertEqual(Namespace('http://example.org/'), Namespace('http://example.org#')) - self.assertNotEqual(Namespace('http://example.org'), Namespace('http://example.org', fsep='.')) - self.assertNotEqual(Namespace('http://example.org'), Namespace('http://example.org', psep='.')) - self.assertNotEqual(Namespace('http://example.org/'), Foo('http://example.org/')) - self.assertNotEqual(Foo('http://example.org/'), Namespace('http://example.org/')) - # hashing - self.assertEqual(hash(Namespace('http://example.org/')), hash(Namespace('http://example.org/'))) - self.assertEqual(hash(Namespace('http://example.org/')), hash(Namespace('http://example.org'))) - self.assertEqual(hash(Namespace('http://example.org/')), hash(Namespace('http://example.org#'))) + self.assertEqual(Namespace('http://example.org'), Namespace('http://example.org')) + self.assertEqual(hash(Namespace('http://example.org')), hash(Namespace('http://example.org'))) + # Namespace compares to string + self.assertEqual(Namespace('http://example.org'), 'http://example.org') + self.assertEqual(hash(Namespace('http://example.org')), hash('http://example.org')) + # URI must match + self.assertNotEqual(Namespace('http://example.org'), Namespace('http://example.com')) self.assertNotEqual(hash(Namespace('http://example.org')), hash(Namespace('http://example.com'))) - self.assertNotEqual(hash(Namespace('http://example.org')), hash(Namespace('http://example.org', fsep='.'))) - self.assertNotEqual(hash(Namespace('http://example.org')), hash(Namespace('http://example.org', psep='.'))) - self.assertNotEqual(hash(Namespace('http://example.org/')), hash(Foo('http://example.org/'))) - self.assertNotEqual(hash(Foo('http://example.org/')), hash(Namespace('http://example.org/'))) def test_getattr(self): - self.assertEqual(Namespace('http://example.org/').foo, 'http://example.org#foo') - self.assertEqual(Namespace('http://example.org/').bar, 'http://example.org#bar') - self.assertEqual(Namespace('http://example.org/', fsep='/').foo, 'http://example.org/foo') - self.assertEqual(Namespace('http://example.org/', fsep='/').bar, 'http://example.org/bar') - self.assertEqual(Namespace('http://example.org', fsep='/').foo, 'http://example.org/foo') - self.assertEqual(Namespace('http://example.org', fsep='/').bar, 'http://example.org/bar') - self.assertEqual(Namespace('http://example.org#', fsep='/').foo, 'http://example.org#/foo') - self.assertEqual(Namespace('http://example.org#', fsep='/').bar, 'http://example.org#/bar') - self.assertEqual(Namespace('http://example.org/me#').foo, 'http://example.org/me#foo') - self.assertEqual(Namespace('http://example.org/me#').bar, 'http://example.org/me#bar') + self.assertEqual(Namespace('http://example.org').foo, Namespace('http://example.org/foo')) + self.assertEqual(Namespace('http://example.org').bar, Namespace('http://example.org/bar')) - def test_getitem(self): - self.assertEqual(Namespace('http://example.org')['foo'], 'http://example.org#foo') - self.assertEqual(Namespace('http://example.org')['bar'], 'http://example.org#bar') - self.assertEqual(Namespace('http://example.org', fsep='/')['foo'], 'http://example.org/foo') - self.assertEqual(Namespace('http://example.org', fsep='/')['bar'], 'http://example.org/bar') - self.assertEqual(Namespace('http://example.org/me#')['foo'], 'http://example.org/me#foo') - self.assertEqual(Namespace('http://example.org/me#')['bar'], 'http://example.org/me#bar') + def test_call(self): + self.assertEqual(Namespace('http://example.org')(), FinalNamespace('http://example.org', sep='#')) + self.assertEqual(Namespace('http://example.org').foo(), FinalNamespace('http://example.org/foo', sep='#')) - def test_add(self): - self.assertEqual(Namespace('http://example.org') + 'foo', Namespace('http://example.org/foo')) - self.assertEqual(Namespace('http://example.org', psep='.') + 'foo', Namespace('http://example.org.foo', psep='.')) - self.assertEqual(Namespace('http://example.org') + 'foo' + 'bar', Namespace('http://example.org/foo/bar')) - # can add URIs - self.assertEqual(Namespace('http://example.org') + URI('foo'), Namespace('http://example.org/foo')) - # can only add strings - self.assertRaises(TypeError, operator.add, Namespace('http://example.org'), 1234) - self.assertRaises(TypeError, operator.add, Namespace('http://example.org'), Namespace('http://example.com')) - - -class TestClosedNamespace(unittest.TestCase): +class TestFinalNamespace(unittest.TestCase): def test_essentials(self): - # string conversion - self.assertEqual(str(ClosedNamespace('http://example.org/')), 'http://example.org') - self.assertEqual(str(ClosedNamespace('http://example.org#')), 'http://example.org') - self.assertEqual(repr(ClosedNamespace('http://example.org/')), 'ClosedNamespace(http://example.org, #, /)') - self.assertEqual(repr(ClosedNamespace('http://example.org#')), 'ClosedNamespace(http://example.org, #, /)') - self.assertEqual(repr(ClosedNamespace('http://example.org', fsep='.')), 'ClosedNamespace(http://example.org, ., /)') - self.assertEqual(repr(ClosedNamespace('http://example.org', psep='.')), 'ClosedNamespace(http://example.org, #, .)') + # string conversion + self.assertEqual(str(FinalNamespace('http://example.org')), 'http://example.org') + self.assertEqual(repr(FinalNamespace('http://example.org')), "'http://example.org'") # comparison - class Foo(ClosedNamespace): pass - self.assertEqual(ClosedNamespace('http://example.org'), ClosedNamespace('http://example.org#')) - self.assertEqual(ClosedNamespace('http://example.org'), ClosedNamespace('http://example.org')) - self.assertEqual(ClosedNamespace('http://example.org'), ClosedNamespace('http://example.org/')) - self.assertEqual(ClosedNamespace('http://example.org/', 'foo', 'bar'), ClosedNamespace('http://example.org/', 'foo', 'bar')) - self.assertNotEqual(ClosedNamespace('http://example.org/', 'foo'), ClosedNamespace('http://example.org/', 'bar')) - self.assertNotEqual(ClosedNamespace('http://example.org/'), Foo('http://example.org/')) - self.assertNotEqual(Foo('http://example.org/'), ClosedNamespace('http://example.org/')) - # hashing - self.assertEqual(hash(ClosedNamespace('http://example.org')), hash(ClosedNamespace('http://example.org'))) - self.assertEqual(hash(ClosedNamespace('http://example.org')), hash(ClosedNamespace('http://example.org/'))) - self.assertEqual(hash(ClosedNamespace('http://example.org')), hash(ClosedNamespace('http://example.org#'))) - self.assertEqual(hash(ClosedNamespace('http://example.org/', 'foo', 'bar')), hash(ClosedNamespace('http://example.org/', 'foo', 'bar'))) - self.assertNotEqual(hash(ClosedNamespace('http://example.org/', 'foo')), hash(ClosedNamespace('http://example.org/', 'bar'))) - self.assertNotEqual(hash(ClosedNamespace('http://example.org/')), hash(Foo('http://example.org/'))) - self.assertNotEqual(hash(Foo('http://example.org/')), hash(ClosedNamespace('http://example.org/'))) + self.assertEqual(FinalNamespace('http://example.org'), FinalNamespace('http://example.org')) + self.assertEqual(hash(FinalNamespace('http://example.org')), hash(FinalNamespace('http://example.org'))) + # FinalNamespace compares to string + self.assertEqual(FinalNamespace('http://example.org'), 'http://example.org') + self.assertEqual(hash(FinalNamespace('http://example.org')), hash('http://example.org')) + # URI must match + self.assertNotEqual(FinalNamespace('http://example.org'), FinalNamespace('http://example.com')) + self.assertNotEqual(hash(FinalNamespace('http://example.org')), hash(FinalNamespace('http://example.com'))) + # separator is ignored + self.assertEqual(FinalNamespace('http://example.org'), FinalNamespace('http://example.org', sep='/')) + self.assertEqual(hash(FinalNamespace('http://example.org')), hash(FinalNamespace('http://example.org', sep='/'))) def test_getattr(self): - self.assertEqual(ClosedNamespace('http://example.org/', 'foo', 'bar').foo, 'http://example.org#foo') - self.assertEqual(ClosedNamespace('http://example.org/', 'bar', 'bar').bar, 'http://example.org#bar') - self.assertEqual(ClosedNamespace('http://example.org/me#', 'foo', 'bar').foo, 'http://example.org/me#foo') - self.assertEqual(ClosedNamespace('http://example.org/me#', 'foo', 'bar').bar, 'http://example.org/me#bar') - self.assertRaises(KeyError, getattr, ClosedNamespace('http://example.org/', 'bar', 'bar'), 'foobar') - self.assertRaises(KeyError, getattr, ClosedNamespace('http://example.org#', 'bar', 'bar'), 'foobar') - - def test_getitem(self): - self.assertEqual(ClosedNamespace('http://example.org/', 'foo', 'bar')['foo'], 'http://example.org#foo') - self.assertEqual(ClosedNamespace('http://example.org/', 'foo', 'bar')['bar'], 'http://example.org#bar') - self.assertEqual(ClosedNamespace('http://example.org/me#', 'foo', 'bar')['foo'], 'http://example.org/me#foo') - self.assertEqual(ClosedNamespace('http://example.org/me#', 'foo', 'bar')['bar'], 'http://example.org/me#bar') - self.assertRaises(KeyError, ClosedNamespace('http://example.org/', 'bar', 'bar').__getitem__, 'foobar') - self.assertRaises(KeyError, ClosedNamespace('http://example.org#', 'bar', 'bar').__getitem__, 'foobar') + self.assertEqual(FinalNamespace('http://example.org').foo, FinalNamespace('http://example.org#foo')) + self.assertEqual(FinalNamespace('http://example.org').bar, FinalNamespace('http://example.org#bar')) + self.assertEqual(FinalNamespace('http://example.org', sep='/').bar, FinalNamespace('http://example.org/bar')) ## main ## diff --git a/test/query/test_validator.py b/test/query/test_validator.py index bbfd2e6..418463e 100644 --- a/test/query/test_validator.py +++ b/test/query/test_validator.py @@ -14,26 +14,29 @@ from bsfs.query.validator import Filter, Fetch ## code ## +ns.bse = ns.bsfs.Entity() + class TestFilter(unittest.TestCase): def setUp(self): self.schema = _schema.from_string(''' prefix rdfs: prefix xsd: - prefix bsfs: - prefix bse: + prefix bsfs: + prefix bse: + prefix bsl: bsfs:Entity rdfs:subClassOf bsfs:Node . bsfs:URI rdfs:subClassOf bsfs:Literal . bsfs:Tag rdfs:subClassOf bsfs:Node . xsd:string rdfs:subClassOf bsfs:Literal . - bsfs:Number rdfs:subClassOf bsfs:Literal . - bsfs:Array rdfs:subClassOf bsfs:Literal . - bsfs:Feature rdfs:subClassOf bsfs:Array . - xsd:integer rdfs:subClassOf bsfs:Number . + bsl:Number rdfs:subClassOf bsfs:Literal . + bsl:Array rdfs:subClassOf bsfs:Literal . + rdfs:subClassOf bsl:Array . + xsd:integer rdfs:subClassOf bsl:Number . - bsfs:Colors rdfs:subClassOf bsfs:Feature ; + bsfs:Colors rdfs:subClassOf ; bsfs:dimension "5"^^xsd:integer ; bsfs:dtype bsfs:f32 . @@ -267,10 +270,10 @@ class TestFilter(unittest.TestCase): self.assertRaises(errors.ConsistencyError, self.validate._distance, self.schema.node(ns.bsfs.Node), ast.filter.Distance([1,2,3], 1, False)) # type must be a feature - self.assertRaises(errors.ConsistencyError, self.validate._distance, self.schema.literal(ns.bsfs.Array), + self.assertRaises(errors.ConsistencyError, self.validate._distance, self.schema.literal(ns.bsl.Array), ast.filter.Distance([1,2,3], 1, False)) # type must be in the schema - self.assertRaises(errors.ConsistencyError, self.validate._distance, self.schema.literal(ns.bsfs.Feature).child(ns.bsfs.Invalid), + self.assertRaises(errors.ConsistencyError, self.validate._distance, self.schema.literal(ns.bsl.Array.Feature).child(ns.bsfs.Invalid), ast.filter.Distance([1,2,3], 1, False)) # FIXME: reference must be a numpy array # reference must have the correct dimension @@ -287,8 +290,8 @@ class TestFetch(unittest.TestCase): prefix rdfs: prefix xsd: - prefix bsfs: - prefix bse: + prefix bsfs: + prefix bse: bsfs:Entity rdfs:subClassOf bsfs:Node . bsfs:Tag rdfs:subClassOf bsfs:Node . diff --git a/test/schema/test_schema.py b/test/schema/test_schema.py index f9ddb68..f52cf95 100644 --- a/test/schema/test_schema.py +++ b/test/schema/test_schema.py @@ -14,6 +14,8 @@ from bsfs.schema.schema import Schema ## code ## +ns.bse = ns.bsfs.Entity() + class TestSchema(unittest.TestCase): def setUp(self): @@ -21,8 +23,9 @@ class TestSchema(unittest.TestCase): prefix rdfs: prefix xsd: - prefix bsfs: - prefix bse: + prefix bsfs: + prefix bse: + prefix bsl: bsfs:Entity rdfs:subClassOf bsfs:Node . bsfs:Tag rdfs:subClassOf bsfs:Node . @@ -31,7 +34,7 @@ class TestSchema(unittest.TestCase): xsd:string rdfs:subClassOf bsfs:Literal . bsfs:Number rdfs:subClassOf bsfs:Literal . - xsd:integer rdfs:subClassOf bsfs:Number . + xsd:integer rdfs:subClassOf bsl:Number . xsd:boolean rdfs:subClassOf bsfs:Literal . bse:tag rdfs:subClassOf bsfs:Predicate ; @@ -174,13 +177,13 @@ class TestSchema(unittest.TestCase): self.assertEqual(str(Schema(self.predicates, self.nodes, self.literals)), 'Schema()') # repr conversion with only default nodes, literals, and predicates n = [ns.bsfs.Node] - l = [ns.bsfs.Array, ns.bsfs.BinaryBlob, ns.bsfs.Feature, ns.bsfs.Literal, ns.bsfs.Number, ns.bsfs.Time] + l = [ns.bsfs.Literal, ns.bsl.Array, ns.bsl.Array.Feature, ns.bsl.BinaryBlob, ns.bsl.Number, ns.bsl.Time] p = [ns.bsfs.Predicate] self.assertEqual(repr(Schema()), f'Schema({n}, {l}, {p})') self.assertEqual(repr(Schema([], [], [])), f'Schema({n}, {l}, {p})') # repr conversion n = [ns.bsfs.Entity, ns.bsfs.Image, ns.bsfs.Node, ns.bsfs.Tag, ns.bsfs.Unused] - l = [ns.bsfs.Array, ns.bsfs.BinaryBlob, ns.bsfs.Feature, ns.bsfs.Literal, ns.bsfs.Number, ns.bsfs.Time, ns.xsd.boolean, ns.xsd.integer, ns.xsd.string] + l = [ns.xsd.boolean, ns.xsd.integer, ns.xsd.string, ns.bsfs.Literal, ns.bsl.Array, ns.bsl.Array.Feature, ns.bsl.BinaryBlob, ns.bsl.Number, ns.bsl.Time] p = [ns.bse.comment, ns.bse.group, ns.bse.tag, ns.bsfs.Predicate] self.assertEqual(repr(Schema(self.predicates, self.nodes, self.literals)), f'Schema({n}, {l}, {p})') diff --git a/test/schema/test_serialize.py b/test/schema/test_serialize.py index 84512e9..7d5d3ae 100644 --- a/test/schema/test_serialize.py +++ b/test/schema/test_serialize.py @@ -14,6 +14,8 @@ from bsfs.schema.serialize import from_string, to_string ## code ## +ns.bse = ns.bsfs.Entity() + class TestFromString(unittest.TestCase): def test_empty(self): @@ -25,7 +27,7 @@ class TestFromString(unittest.TestCase): # must not have circular dependencies self.assertRaises(errors.ConsistencyError, from_string, ''' prefix rdfs: - prefix bsfs: + prefix bsfs: bsfs:Entity rdfs:subClassOf bsfs:Node . # ah, a nice circular dependency bsfs:Entity rdfs:subClassOf bsfs:Document . @@ -39,8 +41,8 @@ class TestFromString(unittest.TestCase): self.assertRaises(errors.ConsistencyError, from_string, ''' prefix rdfs: prefix xsd: - prefix bsfs: - prefix bse: + prefix bsfs: + prefix bse: xsd:string rdfs:subClassOf bsfs:Literal . @@ -54,7 +56,7 @@ class TestFromString(unittest.TestCase): self.assertRaises(errors.ConsistencyError, from_string, ''' prefix rdfs: prefix xsd: - prefix bsfs: + prefix bsfs: bsfs:Entity rdfs:subClassOf bsfs:Node . bsfs:Document rdfs:subClassOf bsfs:Node . @@ -66,8 +68,8 @@ class TestFromString(unittest.TestCase): self.assertEqual(Schema({}, {n_unused}), from_string(''' prefix rdfs: prefix xsd: - prefix bsfs: - prefix bse: + prefix bsfs: + prefix bse: bsfs:unused rdfs:subClassOf bsfs:Node . # unused symbol ''')) @@ -80,8 +82,8 @@ class TestFromString(unittest.TestCase): self.assertEqual(Schema({}, {n_ent, n_tag, n_doc, n_image}), from_string(''' prefix rdfs: prefix xsd: - prefix bsfs: - prefix bse: + prefix bsfs: + prefix bse: # nodes inherit from same parent bsfs:Entity rdfs:subClassOf bsfs:Node . @@ -100,8 +102,8 @@ class TestFromString(unittest.TestCase): self.assertEqual(Schema({p_filename}), from_string(''' prefix rdfs: prefix xsd: - prefix bsfs: - prefix bse: + prefix bsfs: + prefix bse: bsfs:Entity rdfs:subClassOf bsfs:Node . xsd:string rdfs:subClassOf bsfs:Literal . @@ -116,7 +118,7 @@ class TestFromString(unittest.TestCase): self.assertDictEqual(from_string(''' prefix rdfs: prefix xsd: - prefix bsfs: + prefix bsfs: bsfs:Entity rdfs:subClassOf bsfs:Node . @@ -124,7 +126,7 @@ class TestFromString(unittest.TestCase): self.assertDictEqual(from_string(''' prefix rdfs: prefix xsd: - prefix bsfs: + prefix bsfs: bsfs:Entity rdfs:subClassOf bsfs:Node ; rdfs:label "hello world"^^xsd:string ; @@ -141,8 +143,8 @@ class TestFromString(unittest.TestCase): self.assertRaises(errors.ConsistencyError, from_string, ''' prefix rdfs: prefix xsd: - prefix bsfs: - prefix bse: + prefix bsfs: + prefix bse: bsfs:Entity rdfs:subClassOf bsfs:Node . @@ -156,7 +158,7 @@ class TestFromString(unittest.TestCase): self.assertRaises(errors.ConsistencyError, from_string, ''' prefix rdfs: prefix xsd: - prefix bsfs: + prefix bsfs: xsd:string rdfs:subClassOf bsfs:Literal . xsd:name rdfs:subClassOf bsfs:Literal . @@ -168,8 +170,8 @@ class TestFromString(unittest.TestCase): self.assertEqual(Schema({}, {}, {l_unused}), from_string(''' prefix rdfs: prefix xsd: - prefix bsfs: - prefix bse: + prefix bsfs: + prefix bse: xsd:unused rdfs:subClassOf bsfs:Literal . # unused symbol ''')) @@ -182,13 +184,14 @@ class TestFromString(unittest.TestCase): self.assertEqual(Schema({}, {}, {l_string, l_integer, l_unsigned, l_signed}), from_string(''' prefix rdfs: prefix xsd: - prefix bsfs: - prefix bse: + prefix bsfs: + prefix bse: + prefix bsl: # literals inherit from same parent xsd:string rdfs:subClassOf bsfs:Literal . - bsfs:Number rdfs:subClassOf bsfs:Literal . - xsd:integer rdfs:subClassOf bsfs:Number . + bsl:Number rdfs:subClassOf bsfs:Literal . + xsd:integer rdfs:subClassOf bsl:Number . # literals inherit from same parent xsd:unsigned rdfs:subClassOf xsd:integer . @@ -203,8 +206,8 @@ class TestFromString(unittest.TestCase): self.assertEqual(Schema({p_filename}), from_string(''' prefix rdfs: prefix xsd: - prefix bsfs: - prefix bse: + prefix bsfs: + prefix bse: bsfs:Entity rdfs:subClassOf bsfs:Node . xsd:string rdfs:subClassOf bsfs:Literal . @@ -219,7 +222,7 @@ class TestFromString(unittest.TestCase): self.assertDictEqual(from_string(''' prefix rdfs: prefix xsd: - prefix bsfs: + prefix bsfs: xsd:string rdfs:subClassOf bsfs:Literal . @@ -227,7 +230,7 @@ class TestFromString(unittest.TestCase): self.assertDictEqual(from_string(''' prefix rdfs: prefix xsd: - prefix bsfs: + prefix bsfs: xsd:string rdfs:subClassOf bsfs:Literal ; rdfs:label "hello world"^^xsd:string ; @@ -244,8 +247,8 @@ class TestFromString(unittest.TestCase): self.assertRaises(errors.ConsistencyError, from_string, ''' prefix rdfs: prefix xsd: - prefix bsfs: - prefix bse: + prefix bsfs: + prefix bse: xsd:string rdfs:subClassOf bsfs:Literal . @@ -258,8 +261,8 @@ class TestFromString(unittest.TestCase): self.assertRaises(errors.ConsistencyError, from_string, ''' prefix rdfs: prefix xsd: - prefix bsfs: - prefix bse: + prefix bsfs: + prefix bse: bsfs:Entity rdfs:subClassOf bsfs:Literal . xsd:string rdfs:subClassOf bsfs:Literal . @@ -274,8 +277,8 @@ class TestFromString(unittest.TestCase): self.assertRaises(errors.ConsistencyError, from_string, ''' prefix rdfs: prefix xsd: - prefix bsfs: - prefix bse: + prefix bsfs: + prefix bse: bsfs:Entity rdfs:subClassOf bsfs:Node . @@ -288,8 +291,8 @@ class TestFromString(unittest.TestCase): self.assertRaises(errors.ConsistencyError, from_string, ''' prefix rdfs: prefix xsd: - prefix bsfs: - prefix bse: + prefix bsfs: + prefix bse: bsfs:Entity rdfs:subClassOf bsfs:Node . @@ -302,8 +305,8 @@ class TestFromString(unittest.TestCase): self.assertRaises(errors.ConsistencyError, from_string, ''' prefix rdfs: prefix xsd: - prefix bsfs: - prefix bse: + prefix bsfs: + prefix bse: bsfs:Entity rdfs:subClassOf bsfs:Node . @@ -320,8 +323,8 @@ class TestFromString(unittest.TestCase): self.assertEqual(Schema({p_comment}), from_string(''' prefix rdfs: prefix xsd: - prefix bsfs: - prefix bse: + prefix bsfs: + prefix bse: bsfs:Entity rdfs:subClassOf bsfs:Node . xsd:string rdfs:subClassOf bsfs:Literal . @@ -340,8 +343,8 @@ class TestFromString(unittest.TestCase): self.assertEqual(Schema({p_comment}), from_string(''' prefix rdfs: prefix xsd: - prefix bsfs: - prefix bse: + prefix bsfs: + prefix bse: bsfs:Entity rdfs:subClassOf bsfs:Node . xsd:string rdfs:subClassOf bsfs:Literal . @@ -362,8 +365,8 @@ class TestFromString(unittest.TestCase): self.assertEqual(Schema({p_comment}), from_string(''' prefix rdfs: prefix xsd: - prefix bsfs: - prefix bse: + prefix bsfs: + prefix bse: bsfs:Entity rdfs:subClassOf bsfs:Node . xsd:string rdfs:subClassOf bsfs:Literal . @@ -383,8 +386,8 @@ class TestFromString(unittest.TestCase): self.assertEqual(Schema({p_foo}), from_string(''' prefix rdfs: prefix xsd: - prefix bsfs: - prefix bse: + prefix bsfs: + prefix bse: bsfs:Entity rdfs:subClassOf bsfs:Node . @@ -405,8 +408,8 @@ class TestFromString(unittest.TestCase): self.assertEqual(Schema({p_foobar}), from_string(''' prefix rdfs: prefix xsd: - prefix bsfs: - prefix bse: + prefix bsfs: + prefix bse: bsfs:Entity rdfs:subClassOf bsfs:Node . bsfs:Image rdfs:subClassOf bsfs:Entity . @@ -421,8 +424,8 @@ class TestFromString(unittest.TestCase): self.assertRaises(errors.ConsistencyError, from_string, ''' prefix rdfs: prefix xsd: - prefix bsfs: - prefix bse: + prefix bsfs: + prefix bse: bsfs:Entity rdfs:subClassOf bsfs:Node . bsfs:Image rdfs:subClassOf bsfs:Entity . @@ -444,8 +447,8 @@ class TestFromString(unittest.TestCase): self.assertEqual(Schema({p_foobar}), from_string(''' prefix rdfs: prefix xsd: - prefix bsfs: - prefix bse: + prefix bsfs: + prefix bse: bsfs:Entity rdfs:subClassOf bsfs:Node . bsfs:Image rdfs:subClassOf bsfs:Entity . @@ -460,8 +463,8 @@ class TestFromString(unittest.TestCase): self.assertRaises(errors.ConsistencyError, from_string, ''' prefix rdfs: prefix xsd: - prefix bsfs: - prefix bse: + prefix bsfs: + prefix bse: bsfs:Entity rdfs:subClassOf bsfs:Node . bsfs:Image rdfs:subClassOf bsfs:Entity . @@ -478,8 +481,8 @@ class TestFromString(unittest.TestCase): self.assertRaises(errors.ConsistencyError, from_string, ''' prefix rdfs: prefix xsd: - prefix bsfs: - prefix bse: + prefix bsfs: + prefix bse: bsfs:Entity rdfs:subClassOf bsfs:Node . bsfs:Annotation rdfs:subClassOf bsfs:Predicate . @@ -499,8 +502,8 @@ class TestFromString(unittest.TestCase): self.assertRaises(errors.ConsistencyError, from_string, ''' prefix rdfs: prefix xsd: - prefix bsfs: - prefix bse: + prefix bsfs: + prefix bse: bsfs:Entity rdfs:subClassOf bsfs:Node . @@ -516,8 +519,8 @@ class TestFromString(unittest.TestCase): self.assertRaises(errors.ConsistencyError, from_string, ''' prefix rdfs: prefix xsd: - prefix bsfs: - prefix bse: + prefix bsfs: + prefix bse: bsfs:Entity rdfs:subClassOf bsfs:Node . @@ -533,8 +536,8 @@ class TestFromString(unittest.TestCase): self.assertRaises(errors.ConsistencyError, from_string, ''' prefix rdfs: prefix xsd: - prefix bsfs: - prefix bse: + prefix bsfs: + prefix bse: bsfs:Entity rdfs:subClassOf bsfs:Node . @@ -551,8 +554,8 @@ class TestFromString(unittest.TestCase): self.assertDictEqual(from_string(''' prefix rdfs: prefix xsd: - prefix bsfs: - prefix bse: + prefix bsfs: + prefix bse: bse:comment rdfs:subClassOf bsfs:Predicate ; rdfs:range bsfs:Node . @@ -561,8 +564,8 @@ class TestFromString(unittest.TestCase): self.assertDictEqual(from_string(''' prefix rdfs: prefix xsd: - prefix bsfs: - prefix bse: + prefix bsfs: + prefix bse: bse:comment rdfs:subClassOf bsfs:Predicate ; rdfs:range bsfs:Node ; @@ -581,70 +584,78 @@ class TestFromString(unittest.TestCase): self.assertEqual(Schema(literals={f_colors}), from_string(''' prefix rdfs: prefix xsd: - prefix bsfs: - prefix bse: + prefix bsfs: + prefix bse: + prefix bsl: + prefix bsa: - bsfs:Array rdfs:subClassOf bsfs:Literal . - bsfs:Feature rdfs:subClassOf bsfs:Array. + bsl:Array rdfs:subClassOf bsfs:Literal . + bsa:Feature rdfs:subClassOf bsl:Array. - bsfs:Colors rdfs:subClassOf bsfs:Feature . + bsfs:Colors rdfs:subClassOf bsa:Feature . ''')) # features inherit properties from parents - f_colors = types.ROOT_FEATURE.child(ns.bsfs.Colors, dimension=1234, dtype=ns.bsfs.i32) - f_main_colors = f_colors.child(ns.bsfs.MainColor, distance=ns.bsfs.cosine, dtype=ns.bsfs.f16) + f_colors = types.ROOT_FEATURE.child(ns.bsfs.Colors, dimension=1234, dtype=ns.bsfs.dtype().i32) + f_main_colors = f_colors.child(ns.bsfs.MainColor, distance=ns.bsfs.cosine, dtype=ns.bsfs.dtype().f16) self.assertEqual(Schema(literals={f_colors, f_main_colors}), from_string(''' prefix rdfs: prefix xsd: - prefix bsfs: - prefix bse: + prefix bsfs: + prefix bse: + prefix bsl: + prefix bsa: - bsfs:Array rdfs:subClassOf bsfs:Literal . - bsfs:Feature rdfs:subClassOf bsfs:Array. + bsl:Array rdfs:subClassOf bsfs:Literal . + bsa:Feature rdfs:subClassOf bsl:Array. - bsfs:Colors rdfs:subClassOf bsfs:Feature ; # inherits distance from bsfs:Feature - bsfs:dimension "1234"^^xsd:integer ; # overwrites bsfs:Feature - bsfs:dtype bsfs:i32 . # overwrites bsfs:Feature + bsfs:Colors rdfs:subClassOf bsa:Feature ; # inherits distance from bsa:Feature + bsfs:dimension "1234"^^xsd:integer ; # overwrites bsa:Feature + bsfs:dtype . # overwrites bsa:Feature bsfs:MainColor rdfs:subClassOf bsfs:Colors ; # inherits dimension from bsfs:Colors - bsfs:distance bsfs:cosine ; # overwrites bsfs:Feature - bsfs:dtype bsfs:f16 . # overwrites bsfs:Colors + bsfs:distance bsfs:cosine ; # overwrites bsa:Feature + bsfs:dtype . # overwrites bsfs:Colors ''')) # feature definition can be split across multiple statements. # statements can be repeated - f_colors = types.ROOT_FEATURE.child(ns.bsfs.Colors, dimension=1234, dtype=ns.bsfs.f32) + f_colors = types.ROOT_FEATURE.child(ns.bsfs.Colors, dimension=1234, dtype=ns.bsfs.dtype().f32) self.assertEqual(Schema(literals={f_colors}), from_string(''' prefix rdfs: prefix xsd: - prefix bsfs: - prefix bse: + prefix bsfs: + prefix bse: + prefix bsl: + prefix bsa: - bsfs:Array rdfs:subClassOf bsfs:Literal . - bsfs:Feature rdfs:subClassOf bsfs:Array. + bsl:Array rdfs:subClassOf bsfs:Literal . + bsa:Feature rdfs:subClassOf bsl:Array. - bsfs:Colors rdfs:subClassOf bsfs:Feature ; + bsfs:Colors rdfs:subClassOf bsa:Feature ; bsfs:dimension "1234"^^xsd:integer . - bsfs:Colors rdfs:subClassOf bsfs:Feature ; + bsfs:Colors rdfs:subClassOf bsa:Feature ; bsfs:dimension "1234"^^xsd:integer ; # non-conflicting repetition - bsfs:dtype bsfs:f32 . + bsfs:dtype . ''')) # cannot define the same feature from multiple parents self.assertRaises(errors.ConsistencyError, from_string, ''' prefix rdfs: prefix xsd: - prefix bsfs: - prefix bse: + prefix bsfs: + prefix bse: + prefix bsl: + prefix bsa: - bsfs:Array rdfs:subClassOf bsfs:Literal . - bsfs:Feature rdfs:subClassOf bsfs:Array. - bsfs:ColorSpace rdfs:subClassOf bsfs:Feature . + bsl:Array rdfs:subClassOf bsfs:Literal . + bsa:Feature rdfs:subClassOf bsl:Array. + bsfs:ColorSpace rdfs:subClassOf bsa:Feature . - bsfs:Colors rdfs:subClassOf bsfs:Feature . + bsfs:Colors rdfs:subClassOf bsa:Feature . bsfs:Colors rdfs:subClassOf bsfs:ColorSpace . ''') @@ -652,16 +663,18 @@ class TestFromString(unittest.TestCase): self.assertRaises(errors.ConsistencyError, from_string, ''' prefix rdfs: prefix xsd: - prefix bsfs: - prefix bse: + prefix bsfs: + prefix bse: + prefix bsl: + prefix bsa: - bsfs:Array rdfs:subClassOf bsfs:Literal . - bsfs:Feature rdfs:subClassOf bsfs:Array. + bsl:Array rdfs:subClassOf bsfs:Literal . + bsa:Feature rdfs:subClassOf bsl:Array. - bsfs:Colors rdfs:subClassOf bsfs:Feature ; + bsfs:Colors rdfs:subClassOf bsa:Feature ; bsfs:dimension "1234"^^xsd:integer . - bsfs:Colors rdfs:subClassOf bsfs:Feature ; + bsfs:Colors rdfs:subClassOf bsa:Feature ; bsfs:dimension "4321"^^xsd:integer . # conflicting dimension ''') @@ -669,32 +682,36 @@ class TestFromString(unittest.TestCase): self.assertRaises(errors.ConsistencyError, from_string, ''' prefix rdfs: prefix xsd: - prefix bsfs: - prefix bse: + prefix bsfs: + prefix bse: + prefix bsl: + prefix bsa: - bsfs:Array rdfs:subClassOf bsfs:Literal . - bsfs:Feature rdfs:subClassOf bsfs:Array. + bsl:Array rdfs:subClassOf bsfs:Literal . + bsa:Feature rdfs:subClassOf bsl:Array. - bsfs:Colors rdfs:subClassOf bsfs:Feature ; - bsfs:dtype bsfs:f32 . + bsfs:Colors rdfs:subClassOf bsa:Feature ; + bsfs:dtype . - bsfs:Colors rdfs:subClassOf bsfs:Feature ; - bsfs:dtype bsfs:f16 . # conflicting dtype + bsfs:Colors rdfs:subClassOf bsa:Feature ; + bsfs:dtype . # conflicting dtype ''') # cannot assign multiple conflicting distance metrics to the same feature self.assertRaises(errors.ConsistencyError, from_string, ''' prefix rdfs: prefix xsd: - prefix bsfs: - prefix bse: + prefix bsfs: + prefix bse: + prefix bsl: + prefix bsa: - bsfs:Array rdfs:subClassOf bsfs:Literal . - bsfs:Feature rdfs:subClassOf bsfs:Array. + bsl:Array rdfs:subClassOf bsfs:Literal . + bsa:Feature rdfs:subClassOf bsl:Array. - bsfs:Colors rdfs:subClassOf bsfs:Feature ; + bsfs:Colors rdfs:subClassOf bsa:Feature ; bsfs:distance bsfs:euclidean . - bsfs:Colors rdfs:subClassOf bsfs:Feature ; + bsfs:Colors rdfs:subClassOf bsa:Feature ; bsfs:distance bsfs:cosine . # conflicting distance ''') @@ -702,26 +719,30 @@ class TestFromString(unittest.TestCase): self.assertDictEqual(from_string(''' prefix rdfs: prefix xsd: - prefix bsfs: - prefix bse: + prefix bsfs: + prefix bse: + prefix bsl: + prefix bsa: - bsfs:Array rdfs:subClassOf bsfs:Literal . - bsfs:Feature rdfs:subClassOf bsfs:Array. + bsl:Array rdfs:subClassOf bsfs:Literal . + bsa:Feature rdfs:subClassOf bsl:Array. - bsfs:Colors rdfs:subClassOf bsfs:Feature ; + bsfs:Colors rdfs:subClassOf bsa:Feature ; bsfs:dimension "1234"^^xsd:integer . ''').literal(ns.bsfs.Colors).annotations, {}) self.assertDictEqual(from_string(''' prefix rdfs: prefix xsd: - prefix bsfs: - prefix bse: + prefix bsfs: + prefix bse: + prefix bsl: + prefix bsa: - bsfs:Array rdfs:subClassOf bsfs:Literal . - bsfs:Feature rdfs:subClassOf bsfs:Array. + bsl:Array rdfs:subClassOf bsfs:Literal . + bsa:Feature rdfs:subClassOf bsl:Array. - bsfs:Colors rdfs:subClassOf bsfs:Feature ; + bsfs:Colors rdfs:subClassOf bsa:Feature ; bsfs:dimension "1234"^^xsd:integer ; rdfs:label "hello world"^^xsd:string ; bsfs:foo "1234"^^xsd:integer . @@ -748,10 +769,10 @@ class TestFromString(unittest.TestCase): p_group = p_tag.child(ns.bse.group, domain=n_image, unique=True) p_comment = p_annotation.child(ns.bse.comment, range=l_string) # features - f_colors = types.ROOT_FEATURE.child(URI('http://bsfs.ai/schema/Feature/colors_spatial'), - dtype=ns.bsfs.f16, distance=ns.bsfs.euclidean) - f_colors1234 = f_colors.child(URI('http://bsfs.ai/schema/Feature/colors_spatial#1234'), dimension=1024) - f_colors4321 = f_colors.child(URI('http://bsfs.ai/schema/Feature/colors_spatial#4321'), dimension=2048) + f_colors = types.ROOT_FEATURE.child(URI('https://schema.bsfs.io/core/Feature/colors_spatial'), + dtype=ns.bsfs.dtype().f16, distance=ns.bsfs.euclidean) + f_colors1234 = f_colors.child(URI('https://schema.bsfs.io/core/Feature/colors_spatial#1234'), dimension=1024) + f_colors4321 = f_colors.child(URI('https://schema.bsfs.io/core/Feature/colors_spatial#4321'), dimension=2048) # schema ref = Schema( {p_annotation, p_tag, p_group, p_comment}, @@ -764,8 +785,10 @@ class TestFromString(unittest.TestCase): prefix xsd: # bsfs prefixes - prefix bsfs: - prefix bse: + prefix bsfs: + prefix bse: + prefix bsl: + prefix bsa: # nodes bsfs:Entity rdfs:subClassOf bsfs:Node ; @@ -777,10 +800,10 @@ class TestFromString(unittest.TestCase): # literals xsd:string rdfs:subClassOf bsfs:Literal ; rdfs:label "A sequence of characters"^^xsd:string . - bsfs:Array rdfs:subClassOf bsfs:Literal . - bsfs:Feature rdfs:subClassOf bsfs:Array. - bsfs:Number rdfs:subClassOf bsfs:Literal . - xsd:integer rdfs:subClassOf bsfs:Number . + bsl:Array rdfs:subClassOf bsfs:Literal . + bsa:Feature rdfs:subClassOf bsl:Array. + bsl:Number rdfs:subClassOf bsfs:Literal . + xsd:integer rdfs:subClassOf bsl:Number . xsd:boolean rdfs:subClassOf bsfs:Literal . @@ -789,19 +812,19 @@ class TestFromString(unittest.TestCase): rdfs:label "node annotation"^^xsd:string . # feature instances - rdfs:subClassOf bsfs:Feature ; - bsfs:dtype bsfs:f16 ; + rdfs:subClassOf bsa:Feature ; + bsfs:dtype ; bsfs:distance bsfs:euclidean ; # annotations rdfs:label "ColorsSpatial instances. Dimension depends on instance."^^xsd:string ; bsfs:first_arg "1234"^^xsd:integer ; bsfs:second_arg "hello world"^^xsd:string . - rdfs:subClassOf ; + rdfs:subClassOf ; bsfs:dimension "1024"^^xsd:integer ; rdfs:label "Main colors spatial instance"^^xsd:string . - rdfs:subClassOf ; + rdfs:subClassOf ; bsfs:dimension "2048"^^xsd:integer . # predicate instances @@ -829,19 +852,19 @@ class TestFromString(unittest.TestCase): self.assertDictEqual(gen.node(ns.bsfs.Tag).annotations, {ns.rdfs.label: 'Tag'}) self.assertDictEqual(gen.literal(ns.xsd.string).annotations, {ns.rdfs.label: 'A sequence of characters'}) self.assertDictEqual(gen.predicate(ns.bsfs.Annotation).annotations, {ns.rdfs.label: 'node annotation'}) - self.assertDictEqual(gen.literal(URI('http://bsfs.ai/schema/Feature/colors_spatial')).annotations, { + self.assertDictEqual(gen.literal(URI('https://schema.bsfs.io/core/Feature/colors_spatial')).annotations, { ns.rdfs.label: 'ColorsSpatial instances. Dimension depends on instance.', ns.bsfs.first_arg: 1234, ns.bsfs.second_arg: 'hello world', }) - self.assertDictEqual(gen.literal(URI('http://bsfs.ai/schema/Feature/colors_spatial#1234')).annotations, { + self.assertDictEqual(gen.literal(URI('https://schema.bsfs.io/core/Feature/colors_spatial#1234')).annotations, { ns.rdfs.label: 'Main colors spatial instance'}) self.assertDictEqual(gen.predicate(ns.bse.tag).annotations, {ns.rdfs.label: 'connect entity to a tag'}) # blank nodes result in an error self.assertRaises(errors.BackendError, from_string, ''' prefix rdfs: - prefix bsfs: + prefix bsfs: bsfs:Entity rdfs:subClassOf bsfs:Node ; bsfs:foo _:bar . ''') @@ -976,29 +999,29 @@ class TestToString(unittest.TestCase): def test_feature(self): # root features - f_colors = types.ROOT_FEATURE.child(URI('http://bsfs.ai/schema/Feature/colors'), + f_colors = types.ROOT_FEATURE.child(URI('https://schema.bsfs.io/core/Feature/colors'), distance=ns.bsfs.cosine) # derived features - f_colors1234 = f_colors.child(URI('http://bsfs.ai/schema/Feature/colors#1234'), + f_colors1234 = f_colors.child(URI('https://schema.bsfs.io/core/Feature/colors#1234'), dimension=1024) # inherits dtype, distance - f_colors4321 = f_colors.child(URI('http://bsfs.ai/schema/Feature/colors#4321'), + f_colors4321 = f_colors.child(URI('https://schema.bsfs.io/core/Feature/colors#4321'), dimension=2048, distance=ns.bsfs.euclidean) # inherits dtype # create schema schema = Schema(literals={f_colors, f_colors1234, f_colors4321}) schema_str = to_string(schema) # all symbols are serialized - self.assertIn('bsfs:Array', schema_str) - self.assertIn('[^\.]*bsfs:dimension[^\.]', schema_str)) - self.assertIsNone(re.search(r'[^\.]*bsfs:dtype[^\.]', schema_str)) - self.assertIsNone(re.search(r'[^\.]*bsfs:distance[^\.]', schema_str)) - self.assertIsNotNone(re.search(r'[^\.]*bsfs:dimension[^\.]', schema_str)) - self.assertIsNotNone(re.search(r'[^\.]*bsfs:distance[^\.]', schema_str)) - self.assertIsNone(re.search(r'[^\.]*bsfs:dtype[^\.]', schema_str)) + self.assertIsNotNone(re.search(r'.*[^\.]*bsfs:dimension[^\.]', schema_str)) + self.assertIsNone(re.search(r'.*[^\.]*bsfs:dtype[^\.]', schema_str)) + self.assertIsNone(re.search(r'.*[^\.]*bsfs:distance[^\.]', schema_str)) + self.assertIsNotNone(re.search(r'.*[^\.]*bsfs:dimension[^\.]', schema_str)) + self.assertIsNotNone(re.search(r'.*[^\.]*bsfs:distance[^\.]', schema_str)) + self.assertIsNone(re.search(r'.*[^\.]*bsfs:dtype[^\.]', schema_str)) # unserialize yields the original schema self.assertEqual(schema, from_string(schema_str)) @@ -1009,12 +1032,12 @@ class TestToString(unittest.TestCase): ns.bsfs.foo: 1234, ns.bsfs.bar: False, } - f_colors = types.ROOT_FEATURE.child(URI('http://bsfs.ai/schema/Feature/colors'), - dtype=ns.bsfs.f16, distance=ns.bsfs.euclidean, + f_colors = types.ROOT_FEATURE.child(URI('https://schema.bsfs.io/core/Feature/colors'), + dtype=ns.bsfs.dtype().f16, distance=ns.bsfs.euclidean, **annotations) self.assertDictEqual( annotations, - from_string(to_string(Schema(literals={f_colors}))).literal(URI('http://bsfs.ai/schema/Feature/colors')).annotations) + from_string(to_string(Schema(literals={f_colors}))).literal(URI('https://schema.bsfs.io/core/Feature/colors')).annotations) ## main ## diff --git a/test/schema/test_types.py b/test/schema/test_types.py index f87d857..9bfa8c5 100644 --- a/test/schema/test_types.py +++ b/test/schema/test_types.py @@ -14,6 +14,8 @@ from bsfs.schema.types import _Type, Vertex, Node, Literal, Predicate, Feature ## code ## +ns.bse = ns.bsfs.Entity() + class TestType(unittest.TestCase): def test_parents(self): diff --git a/test/triple_store/sparql/test_parse_fetch.py b/test/triple_store/sparql/test_parse_fetch.py index 9284608..1d793e7 100644 --- a/test/triple_store/sparql/test_parse_fetch.py +++ b/test/triple_store/sparql/test_parse_fetch.py @@ -15,10 +15,9 @@ from bsfs.triple_store.sparql.parse_fetch import Fetch ## code ## -bsfs = Namespace('http://bsfs.ai/schema', fsep='/') -bse = Namespace('http://bsfs.ai/schema/Entity') -bst = Namespace('http://bsfs.ai/schema/Tag') -bsc = Namespace('http://bsfs.ai/schema/Collection') +ns.bse = ns.bsfs.Entity() +ns.bst = ns.bsfs.Tag() +ns.bsc = ns.bsfs.Collection() class TestParseFetch(unittest.TestCase): @@ -27,10 +26,10 @@ class TestParseFetch(unittest.TestCase): prefix rdfs: prefix xsd: - prefix bsfs: - prefix bse: - prefix bst: - prefix bsc: + prefix bsfs: + prefix bse: + prefix bst: + prefix bsc: # nodes bsfs:Entity rdfs:subClassOf bsfs:Node . @@ -83,43 +82,43 @@ class TestParseFetch(unittest.TestCase): # graph to test queries self.graph = rdflib.Graph() # schema hierarchies - self.graph.add((rdflib.URIRef('http://bsfs.ai/schema/Entity'), rdflib.RDFS.subClassOf, rdflib.URIRef('http://bsfs.ai/schema/Node'))) - self.graph.add((rdflib.URIRef('http://bsfs.ai/schema/Collection'), rdflib.RDFS.subClassOf, rdflib.URIRef('http://bsfs.ai/schema/Node'))) - self.graph.add((rdflib.URIRef('http://bsfs.ai/schema/Tag'), rdflib.RDFS.subClassOf, rdflib.URIRef('http://bsfs.ai/schema/Node'))) + self.graph.add((rdflib.URIRef('https://schema.bsfs.io/core/Entity'), rdflib.RDFS.subClassOf, rdflib.URIRef('https://schema.bsfs.io/core/Node'))) + self.graph.add((rdflib.URIRef('https://schema.bsfs.io/core/Collection'), rdflib.RDFS.subClassOf, rdflib.URIRef('https://schema.bsfs.io/core/Node'))) + self.graph.add((rdflib.URIRef('https://schema.bsfs.io/core/Tag'), rdflib.RDFS.subClassOf, rdflib.URIRef('https://schema.bsfs.io/core/Node'))) # entities - self.graph.add((rdflib.URIRef('http://example.com/entity#1234'), rdflib.RDF.type, rdflib.URIRef('http://bsfs.ai/schema/Entity'))) - self.graph.add((rdflib.URIRef('http://example.com/entity#4321'), rdflib.RDF.type, rdflib.URIRef('http://bsfs.ai/schema/Entity'))) + self.graph.add((rdflib.URIRef('http://example.com/entity#1234'), rdflib.RDF.type, rdflib.URIRef('https://schema.bsfs.io/core/Entity'))) + self.graph.add((rdflib.URIRef('http://example.com/entity#4321'), rdflib.RDF.type, rdflib.URIRef('https://schema.bsfs.io/core/Entity'))) # tags - self.graph.add((rdflib.URIRef('http://example.com/tag#1234'), rdflib.RDF.type, rdflib.URIRef('http://bsfs.ai/schema/Tag'))) - self.graph.add((rdflib.URIRef('http://example.com/tag#4321'), rdflib.RDF.type, rdflib.URIRef('http://bsfs.ai/schema/Tag'))) + self.graph.add((rdflib.URIRef('http://example.com/tag#1234'), rdflib.RDF.type, rdflib.URIRef('https://schema.bsfs.io/core/Tag'))) + self.graph.add((rdflib.URIRef('http://example.com/tag#4321'), rdflib.RDF.type, rdflib.URIRef('https://schema.bsfs.io/core/Tag'))) # collections - self.graph.add((rdflib.URIRef('http://example.com/collection#1234'), rdflib.RDF.type, rdflib.URIRef('http://bsfs.ai/schema/Collection'))) - self.graph.add((rdflib.URIRef('http://example.com/collection#4321'), rdflib.RDF.type, rdflib.URIRef('http://bsfs.ai/schema/Collection'))) + self.graph.add((rdflib.URIRef('http://example.com/collection#1234'), rdflib.RDF.type, rdflib.URIRef('https://schema.bsfs.io/core/Collection'))) + self.graph.add((rdflib.URIRef('http://example.com/collection#4321'), rdflib.RDF.type, rdflib.URIRef('https://schema.bsfs.io/core/Collection'))) # entity literals - self.graph.add((rdflib.URIRef('http://example.com/entity#1234'), rdflib.URIRef(bse.rank), rdflib.Literal('1234', datatype=rdflib.XSD.integer))) - self.graph.add((rdflib.URIRef('http://example.com/entity#1234'), rdflib.URIRef(bse.filename), rdflib.Literal('filename_1234', datatype=rdflib.XSD.string))) - #self.graph.add((rdflib.URIRef('http://example.com/entity#4321'), rdflib.URIRef(bse.rank), rdflib.Literal('4321', datatype=rdflib.XSD.integer))) - self.graph.add((rdflib.URIRef('http://example.com/entity#4321'), rdflib.URIRef(bse.filename), rdflib.Literal('filename_4321', datatype=rdflib.XSD.string))) + self.graph.add((rdflib.URIRef('http://example.com/entity#1234'), rdflib.URIRef(ns.bse.rank), rdflib.Literal('1234', datatype=rdflib.XSD.integer))) + self.graph.add((rdflib.URIRef('http://example.com/entity#1234'), rdflib.URIRef(ns.bse.filename), rdflib.Literal('filename_1234', datatype=rdflib.XSD.string))) + #self.graph.add((rdflib.URIRef('http://example.com/entity#4321'), rdflib.URIRef(ns.bse.rank), rdflib.Literal('4321', datatype=rdflib.XSD.integer))) + self.graph.add((rdflib.URIRef('http://example.com/entity#4321'), rdflib.URIRef(ns.bse.filename), rdflib.Literal('filename_4321', datatype=rdflib.XSD.string))) # tag literals - self.graph.add((rdflib.URIRef('http://example.com/tag#1234'), rdflib.URIRef(bst.label), rdflib.Literal('tag_label_1234', datatype=rdflib.XSD.string))) - self.graph.add((rdflib.URIRef('http://example.com/tag#4321'), rdflib.URIRef(bst.label), rdflib.Literal('tag_label_4321', datatype=rdflib.XSD.string))) + self.graph.add((rdflib.URIRef('http://example.com/tag#1234'), rdflib.URIRef(ns.bst.label), rdflib.Literal('tag_label_1234', datatype=rdflib.XSD.string))) + self.graph.add((rdflib.URIRef('http://example.com/tag#4321'), rdflib.URIRef(ns.bst.label), rdflib.Literal('tag_label_4321', datatype=rdflib.XSD.string))) # collection literals - self.graph.add((rdflib.URIRef('http://example.com/collection#1234'), rdflib.URIRef(bsc.label), rdflib.Literal('collection_label_1234', datatype=rdflib.XSD.string))) - self.graph.add((rdflib.URIRef('http://example.com/collection#1234'), rdflib.URIRef(bsc.rating), rdflib.Literal('1234', datatype=rdflib.XSD.integer))) - self.graph.add((rdflib.URIRef('http://example.com/collection#4321'), rdflib.URIRef(bsc.label), rdflib.Literal('collection_label_4321', datatype=rdflib.XSD.string))) - self.graph.add((rdflib.URIRef('http://example.com/collection#4321'), rdflib.URIRef(bsc.rating), rdflib.Literal('4321', datatype=rdflib.XSD.integer))) + self.graph.add((rdflib.URIRef('http://example.com/collection#1234'), rdflib.URIRef(ns.bsc.label), rdflib.Literal('collection_label_1234', datatype=rdflib.XSD.string))) + self.graph.add((rdflib.URIRef('http://example.com/collection#1234'), rdflib.URIRef(ns.bsc.rating), rdflib.Literal('1234', datatype=rdflib.XSD.integer))) + self.graph.add((rdflib.URIRef('http://example.com/collection#4321'), rdflib.URIRef(ns.bsc.label), rdflib.Literal('collection_label_4321', datatype=rdflib.XSD.string))) + self.graph.add((rdflib.URIRef('http://example.com/collection#4321'), rdflib.URIRef(ns.bsc.rating), rdflib.Literal('4321', datatype=rdflib.XSD.integer))) # entity-tag links - self.graph.add((rdflib.URIRef('http://example.com/entity#1234'), rdflib.URIRef(bse.tag), rdflib.URIRef('http://example.com/tag#1234'))) - self.graph.add((rdflib.URIRef('http://example.com/entity#4321'), rdflib.URIRef(bse.tag), rdflib.URIRef('http://example.com/tag#4321'))) + self.graph.add((rdflib.URIRef('http://example.com/entity#1234'), rdflib.URIRef(ns.bse.tag), rdflib.URIRef('http://example.com/tag#1234'))) + self.graph.add((rdflib.URIRef('http://example.com/entity#4321'), rdflib.URIRef(ns.bse.tag), rdflib.URIRef('http://example.com/tag#4321'))) # entity-collection links - self.graph.add((rdflib.URIRef('http://example.com/entity#1234'), rdflib.URIRef(bse.collection), rdflib.URIRef('http://example.com/collection#1234'))) - self.graph.add((rdflib.URIRef('http://example.com/entity#4321'), rdflib.URIRef(bse.collection), rdflib.URIRef('http://example.com/collection#4321'))) + self.graph.add((rdflib.URIRef('http://example.com/entity#1234'), rdflib.URIRef(ns.bse.collection), rdflib.URIRef('http://example.com/collection#1234'))) + self.graph.add((rdflib.URIRef('http://example.com/entity#4321'), rdflib.URIRef(ns.bse.collection), rdflib.URIRef('http://example.com/collection#4321'))) # collection-tag links - self.graph.add((rdflib.URIRef('http://example.com/collection#1234'), rdflib.URIRef(bsc.tag), rdflib.URIRef('http://example.com/tag#1234'))) - self.graph.add((rdflib.URIRef('http://example.com/collection#4321'), rdflib.URIRef(bsc.tag), rdflib.URIRef('http://example.com/tag#4321'))) + self.graph.add((rdflib.URIRef('http://example.com/collection#1234'), rdflib.URIRef(ns.bsc.tag), rdflib.URIRef('http://example.com/tag#1234'))) + self.graph.add((rdflib.URIRef('http://example.com/collection#4321'), rdflib.URIRef(ns.bsc.tag), rdflib.URIRef('http://example.com/tag#4321'))) # tag-entity links # NOTE: cross-over - self.graph.add((rdflib.URIRef('http://example.com/tag#1234'), rdflib.URIRef(bst.main), rdflib.URIRef('http://example.com/entity#4321'))) - self.graph.add((rdflib.URIRef('http://example.com/tag#4321'), rdflib.URIRef(bst.main), rdflib.URIRef('http://example.com/entity#1234'))) + self.graph.add((rdflib.URIRef('http://example.com/tag#1234'), rdflib.URIRef(ns.bst.main), rdflib.URIRef('http://example.com/entity#4321'))) + self.graph.add((rdflib.URIRef('http://example.com/tag#4321'), rdflib.URIRef(ns.bst.main), rdflib.URIRef('http://example.com/entity#1234'))) # default parser self.parser = Fetch(self.schema) @@ -135,7 +134,7 @@ class TestParseFetch(unittest.TestCase): # __call__ requires a parseable root self.assertRaises(errors.BackendError, self.parser, self.ent, ast.filter.FilterExpression()) # __call__ returns an executable query - q = self.parser(self.ent, ast.fetch.Fetch(bse.tag, ast.fetch.Value(bst.label, 'label'))) + q = self.parser(self.ent, ast.fetch.Fetch(ns.bse.tag, ast.fetch.Value(ns.bst.label, 'label'))) self.assertSetEqual(set(q(self.graph)), { (rdflib.URIRef('http://example.com/entity#1234'), rdflib.Literal('tag_label_1234', datatype=rdflib.XSD.string)), (rdflib.URIRef('http://example.com/entity#4321'), rdflib.Literal('tag_label_4321', datatype=rdflib.XSD.string)), @@ -149,8 +148,8 @@ class TestParseFetch(unittest.TestCase): def test_all(self): # multiple values query q = self.parser(self.ent, ast.fetch.All( - ast.fetch.Value(bse.filename, name='filename'), - ast.fetch.Value(bse.rank, name='rank')), + ast.fetch.Value(ns.bse.filename, name='filename'), + ast.fetch.Value(ns.bse.rank, name='rank')), ) self.assertSetEqual(set(q.names), {'filename', 'rank'}) if q.names == ('filename', 'rank'): @@ -165,8 +164,8 @@ class TestParseFetch(unittest.TestCase): }) # mixed values and node query q = self.parser(self.ent, ast.fetch.All( - ast.fetch.Value(bse.filename, name='filename'), - ast.fetch.Node(bse.tag, name='tag'), + ast.fetch.Value(ns.bse.filename, name='filename'), + ast.fetch.Node(ns.bse.tag, name='tag'), )) self.assertSetEqual(set(q.names), {'filename', 'tag'}) if q.names == ('filename', 'tag'): @@ -180,9 +179,9 @@ class TestParseFetch(unittest.TestCase): (rdflib.URIRef('http://example.com/entity#4321'), rdflib.URIRef('http://example.com/tag#4321'), rdflib.Literal('filename_4321', datatype=rdflib.XSD.string)), }) # multiple values and second hop - q = self.parser(self.ent, ast.fetch.Fetch(bse.tag, ast.fetch.All( + q = self.parser(self.ent, ast.fetch.Fetch(ns.bse.tag, ast.fetch.All( ast.fetch.This(name='tag'), - ast.fetch.Value(bst.label, name='label'), + ast.fetch.Value(ns.bst.label, name='label'), ))) self.assertSetEqual(set(q.names), {'tag', 'label'}) if q.names == ('tag', 'label'): @@ -200,13 +199,13 @@ class TestParseFetch(unittest.TestCase): def test_fetch(self): # two-hop query - q = self.parser(self.ent, ast.fetch.Fetch(bse.tag, ast.fetch.Value(bst.label, 'tag_label'))) + q = self.parser(self.ent, ast.fetch.Fetch(ns.bse.tag, ast.fetch.Value(ns.bst.label, 'tag_label'))) self.assertSetEqual(set(q(self.graph)), { (rdflib.URIRef('http://example.com/entity#1234'), rdflib.Literal('tag_label_1234', datatype=rdflib.XSD.string)), (rdflib.URIRef('http://example.com/entity#4321'), rdflib.Literal('tag_label_4321', datatype=rdflib.XSD.string)), }) # three-hop-query - q = self.parser(self.ent, ast.fetch.Fetch(bse.tag, ast.fetch.Fetch(bst.main, ast.fetch.Value(bse.rank, 'entity_rank')))) + q = self.parser(self.ent, ast.fetch.Fetch(ns.bse.tag, ast.fetch.Fetch(ns.bst.main, ast.fetch.Value(ns.bse.rank, 'entity_rank')))) self.assertSetEqual(set(q(self.graph)), { (rdflib.URIRef('http://example.com/entity#1234'), None), (rdflib.URIRef('http://example.com/entity#4321'), rdflib.Literal('1234', datatype=rdflib.XSD.integer)), @@ -215,9 +214,9 @@ class TestParseFetch(unittest.TestCase): def test_node(self): # cannot use the internal hop name - self.assertRaises(errors.BackendError, self.parser, self.ent, ast.fetch.Node(bse.tag, self.parser.ngen.prefix[1:] + '123')) + self.assertRaises(errors.BackendError, self.parser, self.ent, ast.fetch.Node(ns.bse.tag, self.parser.ngen.prefix[1:] + '123')) # a simple Node statement - q = self.parser(self.ent, ast.fetch.Node(bse.tag, 'tag')) + q = self.parser(self.ent, ast.fetch.Node(ns.bse.tag, 'tag')) self.assertSetEqual(set(q.names), {'tag'}) self.assertSetEqual(set(q(self.graph)), { (rdflib.URIRef('http://example.com/entity#1234'), rdflib.URIRef('http://example.com/tag#1234')), @@ -227,9 +226,9 @@ class TestParseFetch(unittest.TestCase): def test_value(self): # cannot use the internal hop name - self.assertRaises(errors.BackendError, self.parser, self.schema.node(ns.bsfs.Entity), ast.fetch.Value(bse.filename, self.parser.ngen.prefix[1:] + '123')) + self.assertRaises(errors.BackendError, self.parser, self.schema.node(ns.bsfs.Entity), ast.fetch.Value(ns.bse.filename, self.parser.ngen.prefix[1:] + '123')) # a simple Value statement - q = self.parser(self.ent, ast.fetch.Value(bse.filename, 'filename')) + q = self.parser(self.ent, ast.fetch.Value(ns.bse.filename, 'filename')) self.assertSetEqual(set(q.names), {'filename'}) self.assertSetEqual(set(q(self.graph)), { (rdflib.URIRef('http://example.com/entity#1234'), rdflib.Literal('filename_1234', datatype=rdflib.XSD.string)), diff --git a/test/triple_store/sparql/test_parse_filter.py b/test/triple_store/sparql/test_parse_filter.py index 5b6ca8a..a45f2ef 100644 --- a/test/triple_store/sparql/test_parse_filter.py +++ b/test/triple_store/sparql/test_parse_filter.py @@ -15,6 +15,8 @@ from bsfs.triple_store.sparql.parse_filter import Filter ## code ## +ns.bse = ns.bsfs.Entity() + class TestParseFilter(unittest.TestCase): def setUp(self): # schema @@ -22,25 +24,28 @@ class TestParseFilter(unittest.TestCase): prefix rdfs: prefix xsd: - prefix bsfs: - prefix bse: + prefix bsfs: + prefix bse: + prefix bsl: + prefix bsd: + prefix bsa: - bsfs:Array rdfs:subClassOf bsfs:Literal . - bsfs:Feature rdfs:subClassOf bsfs:Array . - bsfs:Number rdfs:subClassOf bsfs:Literal . + bsl:Array rdfs:subClassOf bsfs:Literal . + bsa:Feature rdfs:subClassOf bsl:Array . + bsl:Number rdfs:subClassOf bsfs:Literal . bsfs:Entity rdfs:subClassOf bsfs:Node . bsfs:Image rdfs:subClassOf bsfs:Entity . bsfs:Tag rdfs:subClassOf bsfs:Node . xsd:string rdfs:subClassOf bsfs:Literal . - xsd:integer rdfs:subClassOf bsfs:Number . + xsd:integer rdfs:subClassOf bsl:Number . bsfs:URI rdfs:subClassOf bsfs:Literal . - bsfs:Colors rdfs:subClassOf bsfs:Feature ; + bsfs:Colors rdfs:subClassOf bsa:Feature ; bsfs:dimension "4"^^xsd:integer ; bsfs:dtype xsd:integer ; - bsfs:distance bsfs:euclidean . + bsfs:distance bsd:euclidean . bse:colors rdfs:subClassOf bsfs:Predicate ; rdfs:domain bsfs:Entity ; @@ -81,18 +86,18 @@ class TestParseFilter(unittest.TestCase): # graph to test queries self.graph = rdflib.Graph() # schema hierarchies - self.graph.add((rdflib.URIRef('http://bsfs.ai/schema/Entity'), rdflib.RDFS.subClassOf, rdflib.URIRef('http://bsfs.ai/schema/Node'))) - self.graph.add((rdflib.URIRef('http://bsfs.ai/schema/Image'), rdflib.RDFS.subClassOf, rdflib.URIRef('http://bsfs.ai/schema/Entity'))) - self.graph.add((rdflib.URIRef('http://bsfs.ai/schema/Tag'), rdflib.RDFS.subClassOf, rdflib.URIRef('http://bsfs.ai/schema/Node'))) + self.graph.add((rdflib.URIRef('https://schema.bsfs.io/core/Entity'), rdflib.RDFS.subClassOf, rdflib.URIRef('https://schema.bsfs.io/core/Node'))) + self.graph.add((rdflib.URIRef('https://schema.bsfs.io/core/Image'), rdflib.RDFS.subClassOf, rdflib.URIRef('https://schema.bsfs.io/core/Entity'))) + self.graph.add((rdflib.URIRef('https://schema.bsfs.io/core/Tag'), rdflib.RDFS.subClassOf, rdflib.URIRef('https://schema.bsfs.io/core/Node'))) # entities - self.graph.add((rdflib.URIRef('http://example.com/entity#1234'), rdflib.RDF.type, rdflib.URIRef('http://bsfs.ai/schema/Entity'))) - self.graph.add((rdflib.URIRef('http://example.com/entity#4321'), rdflib.RDF.type, rdflib.URIRef('http://bsfs.ai/schema/Entity'))) + self.graph.add((rdflib.URIRef('http://example.com/entity#1234'), rdflib.RDF.type, rdflib.URIRef('https://schema.bsfs.io/core/Entity'))) + self.graph.add((rdflib.URIRef('http://example.com/entity#4321'), rdflib.RDF.type, rdflib.URIRef('https://schema.bsfs.io/core/Entity'))) # tags - self.graph.add((rdflib.URIRef('http://example.com/tag#1234'), rdflib.RDF.type, rdflib.URIRef('http://bsfs.ai/schema/Tag'))) - self.graph.add((rdflib.URIRef('http://example.com/tag#4321'), rdflib.RDF.type, rdflib.URIRef('http://bsfs.ai/schema/Tag'))) + self.graph.add((rdflib.URIRef('http://example.com/tag#1234'), rdflib.RDF.type, rdflib.URIRef('https://schema.bsfs.io/core/Tag'))) + self.graph.add((rdflib.URIRef('http://example.com/tag#4321'), rdflib.RDF.type, rdflib.URIRef('https://schema.bsfs.io/core/Tag'))) # images - self.graph.add((rdflib.URIRef('http://example.com/image#1234'), rdflib.RDF.type, rdflib.URIRef('http://bsfs.ai/schema/Image'))) - self.graph.add((rdflib.URIRef('http://example.com/image#4321'), rdflib.RDF.type, rdflib.URIRef('http://bsfs.ai/schema/Image'))) + self.graph.add((rdflib.URIRef('http://example.com/image#1234'), rdflib.RDF.type, rdflib.URIRef('https://schema.bsfs.io/core/Image'))) + self.graph.add((rdflib.URIRef('http://example.com/image#4321'), rdflib.RDF.type, rdflib.URIRef('https://schema.bsfs.io/core/Image'))) # node comments self.graph.add((rdflib.URIRef('http://example.com/entity#1234'), rdflib.URIRef(ns.bse.comment), rdflib.Literal('Me, Myself, and I', datatype=rdflib.XSD.string))) self.graph.add((rdflib.URIRef('http://example.com/entity#1234'), rdflib.URIRef(ns.bse.comment), rdflib.Literal('hello world', datatype=rdflib.XSD.string))) diff --git a/test/triple_store/sparql/test_sparql.py b/test/triple_store/sparql/test_sparql.py index f45ca37..a7e7d37 100644 --- a/test/triple_store/sparql/test_sparql.py +++ b/test/triple_store/sparql/test_sparql.py @@ -15,22 +15,25 @@ from bsfs.triple_store.sparql.sparql import SparqlStore ## code ## +ns.bse = ns.bsfs.Entity() + class TestSparqlStore(unittest.TestCase): def setUp(self): self.schema = bsc.from_string(''' prefix rdfs: prefix xsd: - prefix bsfs: - prefix bse: + prefix bsfs: + prefix bse: + prefix bsl: bsfs:Entity rdfs:subClassOf bsfs:Node . bsfs:Tag rdfs:subClassOf bsfs:Node . bsfs:User rdfs:subClassOf bsfs:Node . xsd:string rdfs:subClassOf bsfs:Literal . - bsfs:Number rdfs:subClassOf bsfs:Literal . - bsfs:BinaryBlob rdfs:subClassOf bsfs:Literal . - xsd:integer rdfs:subClassOf bsfs:Number . + bsl:Number rdfs:subClassOf bsfs:Literal . + bsl:BinaryBlob rdfs:subClassOf bsfs:Literal . + xsd:integer rdfs:subClassOf bsl:Number . # non-unique literal bse:comment rdfs:subClassOf bsfs:Predicate ; @@ -59,7 +62,7 @@ class TestSparqlStore(unittest.TestCase): # binary range bse:asset rdfs:subClassOf bsfs:Predicate ; rdfs:domain bsfs:Entity ; - rdfs:range bsfs:BinaryBlob . + rdfs:range bsl:BinaryBlob . ''') self.schema_triples = { @@ -68,12 +71,12 @@ class TestSparqlStore(unittest.TestCase): (rdflib.URIRef(ns.bsfs.Tag), rdflib.RDFS.subClassOf, rdflib.URIRef(ns.bsfs.Node)), (rdflib.URIRef(ns.bsfs.User), rdflib.RDFS.subClassOf, rdflib.URIRef(ns.bsfs.Node)), (rdflib.URIRef(ns.xsd.string), rdflib.RDFS.subClassOf, rdflib.URIRef(ns.bsfs.Literal)), - (rdflib.URIRef(ns.bsfs.Array), rdflib.RDFS.subClassOf, rdflib.URIRef(ns.bsfs.Literal)), - (rdflib.URIRef(ns.bsfs.BinaryBlob), rdflib.RDFS.subClassOf, rdflib.URIRef(ns.bsfs.Literal)), - (rdflib.URIRef(ns.bsfs.Feature), rdflib.RDFS.subClassOf, rdflib.URIRef(ns.bsfs.Array)), - (rdflib.URIRef(ns.bsfs.Number), rdflib.RDFS.subClassOf, rdflib.URIRef(ns.bsfs.Literal)), - (rdflib.URIRef(ns.bsfs.Time), rdflib.RDFS.subClassOf, rdflib.URIRef(ns.bsfs.Literal)), - (rdflib.URIRef(ns.xsd.integer), rdflib.RDFS.subClassOf, rdflib.URIRef(ns.bsfs.Number)), + (rdflib.URIRef(ns.bsl.Array), rdflib.RDFS.subClassOf, rdflib.URIRef(ns.bsfs.Literal)), + (rdflib.URIRef(ns.bsl.BinaryBlob), rdflib.RDFS.subClassOf, rdflib.URIRef(ns.bsfs.Literal)), + (rdflib.URIRef(ns.bsl.Array.Feature), rdflib.RDFS.subClassOf, rdflib.URIRef(ns.bsl.Array)), + (rdflib.URIRef(ns.bsl.Number), rdflib.RDFS.subClassOf, rdflib.URIRef(ns.bsfs.Literal)), + (rdflib.URIRef(ns.bsl.Time), rdflib.RDFS.subClassOf, rdflib.URIRef(ns.bsfs.Literal)), + (rdflib.URIRef(ns.xsd.integer), rdflib.RDFS.subClassOf, rdflib.URIRef(ns.bsl.Number)), (rdflib.URIRef(ns.bse.comment), rdflib.RDFS.subClassOf, rdflib.URIRef(ns.bsfs.Predicate)), (rdflib.URIRef(ns.bse.filesize), rdflib.RDFS.subClassOf, rdflib.URIRef(ns.bsfs.Predicate)), (rdflib.URIRef(ns.bse.tag), rdflib.RDFS.subClassOf, rdflib.URIRef(ns.bsfs.Predicate)), @@ -101,7 +104,7 @@ class TestSparqlStore(unittest.TestCase): store.schema = bsc.from_string(''' prefix rdfs: prefix xsd: - prefix bsfs: + prefix bsfs: bsfs:Entity rdfs:subClassOf bsfs:Node . bsfs:Document rdfs:subClassOf bsfs:Entity . @@ -206,10 +209,10 @@ class TestSparqlStore(unittest.TestCase): curr = curr + bsc.from_string(''' prefix rdfs: prefix xsd: - prefix bsfs: - prefix bse: - prefix bst: - prefix bsc: + prefix bsfs: + prefix bse: + prefix bst: + prefix bsc: bsfs:Entity rdfs:subClassOf bsfs:Node . bsfs:Tag rdfs:subClassOf bsfs:Node . @@ -255,16 +258,16 @@ class TestSparqlStore(unittest.TestCase): (rdflib.URIRef(ns.xsd.boolean), rdflib.RDFS.subClassOf, rdflib.URIRef(ns.bsfs.Literal)), (rdflib.URIRef(ns.bse.shared), rdflib.RDFS.subClassOf, rdflib.URIRef(ns.bsfs.Predicate)), (rdflib.URIRef(ns.bse.partOf), rdflib.RDFS.subClassOf, rdflib.URIRef(ns.bsfs.Predicate)), - (rdflib.URIRef('http://bsfs.ai/schema/Tag#usedIn'), rdflib.RDFS.subClassOf, rdflib.URIRef(ns.bsfs.Predicate)), - (rdflib.URIRef('http://bsfs.ai/schema/Collection#tag'), rdflib.RDFS.subClassOf, rdflib.URIRef(ns.bsfs.Predicate)), - (rdflib.URIRef('http://bsfs.ai/schema/Tag#principal'), rdflib.RDFS.subClassOf, rdflib.URIRef(ns.bsfs.Predicate)), + (rdflib.URIRef('https://schema.bsfs.io/core/Tag#usedIn'), rdflib.RDFS.subClassOf, rdflib.URIRef(ns.bsfs.Predicate)), + (rdflib.URIRef('https://schema.bsfs.io/core/Collection#tag'), rdflib.RDFS.subClassOf, rdflib.URIRef(ns.bsfs.Predicate)), + (rdflib.URIRef('https://schema.bsfs.io/core/Tag#principal'), rdflib.RDFS.subClassOf, rdflib.URIRef(ns.bsfs.Predicate)), }) # add some instances of the new classes p_partOf = curr.predicate(ns.bse.partOf) p_shared = curr.predicate(ns.bse.shared) - p_usedIn = curr.predicate('http://bsfs.ai/schema/Tag#usedIn') - p_ctag = curr.predicate('http://bsfs.ai/schema/Collection#tag') - p_principal = curr.predicate('http://bsfs.ai/schema/Tag#principal') + p_usedIn = curr.predicate('https://schema.bsfs.io/core/Tag#usedIn') + p_ctag = curr.predicate('https://schema.bsfs.io/core/Collection#tag') + p_principal = curr.predicate('https://schema.bsfs.io/core/Tag#principal') store.create(curr.node(ns.bsfs.Collection), {URI('http://example.com/me/collection#1234'), URI('http://example.com/me/collection#4321')}) # add some more triples store.set(curr.node(ns.bsfs.Entity), ent_ids, p_shared, {True}) @@ -283,9 +286,9 @@ class TestSparqlStore(unittest.TestCase): (rdflib.URIRef(ns.xsd.boolean), rdflib.RDFS.subClassOf, rdflib.URIRef(ns.bsfs.Literal)), (rdflib.URIRef(ns.bse.shared), rdflib.RDFS.subClassOf, rdflib.URIRef(ns.bsfs.Predicate)), (rdflib.URIRef(ns.bse.partOf), rdflib.RDFS.subClassOf, rdflib.URIRef(ns.bsfs.Predicate)), - (rdflib.URIRef('http://bsfs.ai/schema/Tag#usedIn'), rdflib.RDFS.subClassOf, rdflib.URIRef(ns.bsfs.Predicate)), - (rdflib.URIRef('http://bsfs.ai/schema/Collection#tag'), rdflib.RDFS.subClassOf, rdflib.URIRef(ns.bsfs.Predicate)), - (rdflib.URIRef('http://bsfs.ai/schema/Tag#principal'), rdflib.RDFS.subClassOf, rdflib.URIRef(ns.bsfs.Predicate)), + (rdflib.URIRef('https://schema.bsfs.io/core/Tag#usedIn'), rdflib.RDFS.subClassOf, rdflib.URIRef(ns.bsfs.Predicate)), + (rdflib.URIRef('https://schema.bsfs.io/core/Collection#tag'), rdflib.RDFS.subClassOf, rdflib.URIRef(ns.bsfs.Predicate)), + (rdflib.URIRef('https://schema.bsfs.io/core/Tag#principal'), rdflib.RDFS.subClassOf, rdflib.URIRef(ns.bsfs.Predicate)), # collections (rdflib.URIRef('http://example.com/me/collection#1234'), rdflib.RDF.type, rdflib.URIRef(ns.bsfs.Collection)), (rdflib.URIRef('http://example.com/me/collection#4321'), rdflib.RDF.type, rdflib.URIRef(ns.bsfs.Collection)), @@ -309,17 +312,18 @@ class TestSparqlStore(unittest.TestCase): curr = bsc.from_string(''' prefix rdfs: prefix xsd: - prefix bsfs: - prefix bse: - prefix bst: + prefix bsfs: + prefix bse: + prefix bst: + prefix bsl: bsfs:Entity rdfs:subClassOf bsfs:Node . bsfs:Tag rdfs:subClassOf bsfs:Node . bsfs:User rdfs:subClassOf bsfs:Node . xsd:boolean rdfs:subClassOf bsfs:Literal . - bsfs:Number rdfs:subClassOf bsfs:Literal . - xsd:integer rdfs:subClassOf bsfs:Number . + bsl:Number rdfs:subClassOf bsfs:Literal . + xsd:integer rdfs:subClassOf bsl:Number . bse:filesize rdfs:subClassOf bsfs:Predicate ; rdfs:domain bsfs:Entity ; @@ -360,16 +364,16 @@ class TestSparqlStore(unittest.TestCase): (rdflib.URIRef(ns.bsfs.Tag), rdflib.RDFS.subClassOf, rdflib.URIRef(ns.bsfs.Node)), (rdflib.URIRef(ns.bsfs.User), rdflib.RDFS.subClassOf, rdflib.URIRef(ns.bsfs.Node)), (rdflib.URIRef(ns.xsd.boolean), rdflib.RDFS.subClassOf, rdflib.URIRef(ns.bsfs.Literal)), - (rdflib.URIRef(ns.bsfs.Array), rdflib.RDFS.subClassOf, rdflib.URIRef(ns.bsfs.Literal)), - (rdflib.URIRef(ns.bsfs.BinaryBlob), rdflib.RDFS.subClassOf, rdflib.URIRef(ns.bsfs.Literal)), - (rdflib.URIRef(ns.bsfs.Feature), rdflib.RDFS.subClassOf, rdflib.URIRef(ns.bsfs.Array)), - (rdflib.URIRef(ns.bsfs.Number), rdflib.RDFS.subClassOf, rdflib.URIRef(ns.bsfs.Literal)), - (rdflib.URIRef(ns.bsfs.Time), rdflib.RDFS.subClassOf, rdflib.URIRef(ns.bsfs.Literal)), - (rdflib.URIRef(ns.xsd.integer), rdflib.RDFS.subClassOf, rdflib.URIRef(ns.bsfs.Number)), + (rdflib.URIRef(ns.bsl.Array), rdflib.RDFS.subClassOf, rdflib.URIRef(ns.bsfs.Literal)), + (rdflib.URIRef(ns.bsl.BinaryBlob), rdflib.RDFS.subClassOf, rdflib.URIRef(ns.bsfs.Literal)), + (rdflib.URIRef(ns.bsl.Array.Feature), rdflib.RDFS.subClassOf, rdflib.URIRef(ns.bsl.Array)), + (rdflib.URIRef(ns.bsl.Number), rdflib.RDFS.subClassOf, rdflib.URIRef(ns.bsfs.Literal)), + (rdflib.URIRef(ns.bsl.Time), rdflib.RDFS.subClassOf, rdflib.URIRef(ns.bsfs.Literal)), + (rdflib.URIRef(ns.xsd.integer), rdflib.RDFS.subClassOf, rdflib.URIRef(ns.bsl.Number)), (rdflib.URIRef(ns.bse.shared), rdflib.RDFS.subClassOf, rdflib.URIRef(ns.bsfs.Predicate)), (rdflib.URIRef(ns.bse.tag), rdflib.RDFS.subClassOf, rdflib.URIRef(ns.bsfs.Predicate)), (rdflib.URIRef(ns.bse.filesize), rdflib.RDFS.subClassOf, rdflib.URIRef(ns.bsfs.Predicate)), - (rdflib.URIRef('http://bsfs.ai/schema/Tag#principal'), rdflib.RDFS.subClassOf, rdflib.URIRef(ns.bsfs.Predicate)), + (rdflib.URIRef('https://schema.bsfs.io/core/Tag#principal'), rdflib.RDFS.subClassOf, rdflib.URIRef(ns.bsfs.Predicate)), # node instances (rdflib.URIRef('http://example.com/me/entity#1234'), rdflib.RDF.type, rdflib.URIRef(ns.bsfs.Entity)), (rdflib.URIRef('http://example.com/me/entity#4321'), rdflib.RDF.type, rdflib.URIRef(ns.bsfs.Entity)), @@ -400,13 +404,15 @@ class TestSparqlStore(unittest.TestCase): invalid = bsc.from_string(''' prefix rdfs: prefix xsd: - prefix bsfs: - prefix bse: + prefix bsfs: + prefix bse: + prefix bsl: + prefix bsa: - bsfs:Array rdfs:subClassOf bsfs:Literal . - bsfs:Feature rdfs:subClassOf bsfs:Array . + bsl:Array rdfs:subClassOf bsfs:Literal . + bsa:Feature rdfs:subClassOf bsl:Array . - bsfs:Colors rdfs:subClassOf bsfs:Feature ; + bsfs:Colors rdfs:subClassOf bsa:Feature ; bsfs:dimension "4"^^xsd:integer ; bsfs:distance bsfs:foobar . @@ -417,8 +423,8 @@ class TestSparqlStore(unittest.TestCase): invalid = bsc.from_string(''' prefix rdfs: prefix xsd: - prefix bsfs: - prefix bse: + prefix bsfs: + prefix bse: bsfs:Entity rdfs:subClassOf bsfs:Node . bsfs:Tag rdfs:subClassOf bsfs:Entity . # inconsistent with previous tag definition @@ -433,8 +439,8 @@ class TestSparqlStore(unittest.TestCase): invalid = bsc.from_string(''' prefix rdfs: prefix xsd: - prefix bsfs: - prefix bse: + prefix bsfs: + prefix bse: bsfs:Entity rdfs:subClassOf bsfs:Node . bsfs:User rdfs:subClassOf bsfs:Node . @@ -945,9 +951,9 @@ class TestSparqlStore(unittest.TestCase): p_asset = store.schema.predicate(ns.bse.asset) store.set(ent_type, ent_ids, p_asset, {bytes(range(128)), bytes(range(128, 256))}) blob1 = rdflib.Literal('AAECAwQFBgcICQoLDA0ODxAREhMUFRYXGBkaGxwdHh8gISIjJCUmJygpKissLS4vMDEyMzQ1Njc4OTo7PD0+P0BBQkNERUZHSElKS0xNTk9QUVJTVFVWV1hZWltcXV5fYGFiY2RlZmdoaWprbG1ub3BxcnN0dXZ3eHl6e3x9fn8=', - datatype=rdflib.URIRef(ns.bsfs.BinaryBlob)) + datatype=rdflib.URIRef(ns.bsl.BinaryBlob)) blob2 = rdflib.Literal('gIGCg4SFhoeIiYqLjI2Oj5CRkpOUlZaXmJmam5ydnp+goaKjpKWmp6ipqqusra6vsLGys7S1tre4ubq7vL2+v8DBwsPExcbHyMnKy8zNzs/Q0dLT1NXW19jZ2tvc3d7f4OHi4+Tl5ufo6err7O3u7/Dx8vP09fb3+Pn6+/z9/v8=', - datatype=rdflib.URIRef(ns.bsfs.BinaryBlob)) + datatype=rdflib.URIRef(ns.bsl.BinaryBlob)) self.assertTrue(set(store._graph).issuperset({ (rdflib.URIRef('http://example.com/me/entity#1234'), rdflib.URIRef(p_asset.uri), blob1), (rdflib.URIRef('http://example.com/me/entity#1234'), rdflib.URIRef(p_asset.uri), blob2), diff --git a/test/triple_store/sparql/test_utils.py b/test/triple_store/sparql/test_utils.py index 8f894bb..44a1299 100644 --- a/test/triple_store/sparql/test_utils.py +++ b/test/triple_store/sparql/test_utils.py @@ -16,6 +16,8 @@ from bsfs.triple_store.sparql.utils import GenHopName, Query ## code ## +ns.bse = ns.bsfs.Entity() + class TestGenHopName(unittest.TestCase): def test_next(self): # baseline @@ -40,7 +42,7 @@ class TestGenHopName(unittest.TestCase): class TestQuery(unittest.TestCase): def setUp(self): - self.root_type = 'http://bsfs.ai/schema/Entity' + self.root_type = 'https://schema.bsfs.io/core/Entity' self.root_head = '?root' self.select = (('?head', 'name'), ) self.where = f'?root <{ns.bse.tag}> ?head' @@ -56,8 +58,8 @@ class TestQuery(unittest.TestCase): self.assertEqual(q, Query(self.root_type, self.root_head, self.select, self.where)) self.assertEqual(hash(q), hash(Query(self.root_type, self.root_head, self.select, self.where))) # comparison respects root_type - self.assertNotEqual(q, Query('http://bsfs.ai/schema/Tag', self.root_head, self.select, self.where)) - self.assertNotEqual(hash(q), hash(Query('http://bsfs.ai/schema/Tag', self.root_head, self.select, self.where))) + self.assertNotEqual(q, Query('https://schema.bsfs.io/core/Tag', self.root_head, self.select, self.where)) + self.assertNotEqual(hash(q), hash(Query('https://schema.bsfs.io/core/Tag', self.root_head, self.select, self.where))) # comparison respects root_head self.assertNotEqual(q, Query(self.root_type, '?foo', self.select, self.where)) self.assertNotEqual(hash(q), hash(Query(self.root_type, '?foo', self.select, self.where))) @@ -69,7 +71,7 @@ class TestQuery(unittest.TestCase): self.assertNotEqual(hash(q), hash(Query(self.root_type, self.root_head, self.select, '?root bse:filename ?head'))) # string conversion self.assertEqual(str(q), q.query) - self.assertEqual(repr(q), "Query(http://bsfs.ai/schema/Entity, ?root, (('?head', 'name'),), ?root ?head)") + self.assertEqual(repr(q), "Query(https://schema.bsfs.io/core/Entity, ?root, (('?head', 'name'),), ?root ?head)") def test_add(self): q = Query(self.root_type, self.root_head, self.select, self.where) @@ -77,7 +79,7 @@ class TestQuery(unittest.TestCase): self.assertRaises(TypeError, operator.add, q, 1234) self.assertRaises(TypeError, operator.add, q, 'foobar') # root type and head must match - self.assertRaises(ValueError, operator.add, q, Query('http://bsfs.ai/schema/Tag', self.root_head)) + self.assertRaises(ValueError, operator.add, q, Query('https://schema.bsfs.io/core/Node/Tag', self.root_head)) self.assertRaises(ValueError, operator.add, q, Query(self.root_type, '?foobar')) # select and were are combined combo = q + Query(self.root_type, self.root_head, (('?foo', 'bar'), ), f'?root <{ns.bse.filename}> ?foo') @@ -113,23 +115,23 @@ class TestQuery(unittest.TestCase): return value # query composes a valid query q = Query(self.root_type, self.root_head, self.select, self.where) - self.assertEqual(normalize(q.query), normalize(f'select distinct ?root (?head as ?name) where {{ ?root <{ns.rdf.type}>/<{ns.rdfs.subClassOf}>* . ?root <{ns.bse.tag}> ?head }} order by str(?root)')) + self.assertEqual(normalize(q.query), normalize(f'select distinct ?root (?head as ?name) where {{ ?root <{ns.rdf.type}>/<{ns.rdfs.subClassOf}>* . ?root <{ns.bse.tag}> ?head }} order by str(?root)')) # select and where are optional q = Query(self.root_type, self.root_head) - self.assertEqual(normalize(q.query), normalize(f'select distinct ?root where {{ ?root <{ns.rdf.type}>/<{ns.rdfs.subClassOf}>* . }} order by str(?root)')) + self.assertEqual(normalize(q.query), normalize(f'select distinct ?root where {{ ?root <{ns.rdf.type}>/<{ns.rdfs.subClassOf}>* . }} order by str(?root)')) # select and where need not to correspond q = Query(self.root_type, self.root_head, (('?head', 'name'), )) - self.assertEqual(normalize(q.query), normalize(f'select distinct ?root (?head as ?name) where {{ ?root <{ns.rdf.type}>/<{ns.rdfs.subClassOf}>* . }} order by str(?root)')) + self.assertEqual(normalize(q.query), normalize(f'select distinct ?root (?head as ?name) where {{ ?root <{ns.rdf.type}>/<{ns.rdfs.subClassOf}>* . }} order by str(?root)')) # query is used for string representation self.assertEqual(str(q), q.query) def test_call(self): graph = rdflib.Graph() # schema - graph.add((rdflib.URIRef('http://bsfs.ai/schema/Document'), rdflib.URIRef(ns.rdfs.subClassOf), rdflib.URIRef('http://bsfs.ai/schema/Entity'))) + graph.add((rdflib.URIRef('https://schema.bsfs.io/core/Document'), rdflib.URIRef(ns.rdfs.subClassOf), rdflib.URIRef('https://schema.bsfs.io/core/Entity'))) # nodes - graph.add((rdflib.URIRef('http://example.com/entity#1234'), rdflib.URIRef(ns.rdf.type), rdflib.URIRef('http://bsfs.ai/schema/Entity'))) - graph.add((rdflib.URIRef('http://example.com/doc#1234'), rdflib.URIRef(ns.rdf.type), rdflib.URIRef('http://bsfs.ai/schema/Document'))) + graph.add((rdflib.URIRef('http://example.com/entity#1234'), rdflib.URIRef(ns.rdf.type), rdflib.URIRef('https://schema.bsfs.io/core/Entity'))) + graph.add((rdflib.URIRef('http://example.com/doc#1234'), rdflib.URIRef(ns.rdf.type), rdflib.URIRef('https://schema.bsfs.io/core/Document'))) # links graph.add((rdflib.URIRef('http://example.com/entity#1234'), rdflib.URIRef(ns.bse.tag), rdflib.Literal('tag#1234', datatype=rdflib.XSD.string))) graph.add((rdflib.URIRef('http://example.com/doc#1234'), rdflib.URIRef(ns.bse.tag), rdflib.Literal('tag#1234', datatype=rdflib.XSD.string))) -- cgit v1.2.3 From 87004fa65cc4833cfdbd9a24ba149123c7020edb Mon Sep 17 00:00:00 2001 From: Matthias Baumgartner Date: Sat, 4 Mar 2023 17:05:47 +0100 Subject: documentation --- doc/source/architecture.rst | 87 ++++++++++++++++++++++++++++++++++++++++ doc/source/concepts.rst | 98 +++++++++++++++++++++++++++++++++++++++++++++ doc/source/index.rst | 75 ++++++++++++++++++++++++++++++++++ doc/source/installation.rst | 46 +++++++++++++++++++++ 4 files changed, 306 insertions(+) create mode 100644 doc/source/architecture.rst create mode 100644 doc/source/concepts.rst create mode 100644 doc/source/index.rst create mode 100644 doc/source/installation.rst diff --git a/doc/source/architecture.rst b/doc/source/architecture.rst new file mode 100644 index 0000000..4cca49a --- /dev/null +++ b/doc/source/architecture.rst @@ -0,0 +1,87 @@ + +Architecture +============ + +The BSFS stack can be coarsely divided into four parts (see the image below). + +* Envelope: Essentials and utils used throughout the whole codebase. +* Front: End-user applications and APIs. +* Center: The core interfaces and functionality. +* Back: The triple store backends. + +Details of these components are given in the sections below. + + +.. image:: _static/arch_light.png + :class: only-light + +.. image:: _static/arch_dark.png + :class: only-dark + + +Envelope +-------- + +Most notably, the envelope covers the :class:`Schema ` and the :mod:`Query syntax trees (AST) `. +Both of them essential for all parts of the BSFS stack. +For example, the schema is specified by the user via the :func:`Migrate ` command, checked and extended by the :class:`Graph `, and ultimately stored by a :class:`Triple Store backend `. +Similarly, the Query AST may be provided by a caller and is translated to a database query by a backend. +In addition, the envelope also contains some classes to handle URIs: +:class:`URI ` defines the URI base class, +:class:`Namespace ` provides shortcuts to generate URIs, and +:mod:`UUID ` is used to generate unique URIs. + + +Front +----- + +The front consists of exposed interfaces such as end-user applications or APIs, +and all utils needed to offer this functionality. +See :mod:`bsfs.apps` and :mod:`bsfs.front`. + + +Center +------ + +The heart of BSFS is grouped around the :mod:`bsfs.graph` module. +These classes provide the interface to navigate and manipulate the file graph +in a safe and programmer friendly manner. +Some of them are indirectly exposed through the higher-level APIs. + +The two core design principles of BSFS are the focus on nodes and batch processing. +They are realized in the the Graph and Nodes classes. +The :class:`Graph class ` manages the graph as a whole, +and offers methods to get a specific set of Nodes. +In turn, the :class:`Nodes class ` represents such a set of nodes, +and performs operations on the whole node set at once. +Besides, the :mod:`bsfs.graph` module also comes with some syntactic sugar. + +Example:: + + # Open a file graph. + from bsfs import Open, ns + graph = Open(...) + # Get all nodes of type File. + nodes = graph.all(ns.bsfs.File) + # Set the author of all nodes at once. + nodes.set(ns.bse.author, 'Myself') + # Retrieve the author of all nodes at once. + set(nodes.get(ns.bse.author, node=False)) + # Same as above, but shorter. + set(nodes.comment(node=False)) + + +Back +---- + +There are various graph databases (e.g., `RDFLib`_, `Blazegraph`_, `Titan`_, etc.) +and it would be foolish to replicate the work that others have done. +Instead, we use third-party stores that take care of how to store and manage the data. +The :class:`Backend base class ` defines the +interface to integrate any such third-party store to BSFS. +Besides storing the data, a triple store backend also need to track the current schema. + + +.. _RDFLib: https://rdflib.readthedocs.io/en/stable/index.html +.. _Blazegraph: https://blazegraph.com/ +.. _Titan: http://titan.thinkaurelius.com/ diff --git a/doc/source/concepts.rst b/doc/source/concepts.rst new file mode 100644 index 0000000..9c2ed43 --- /dev/null +++ b/doc/source/concepts.rst @@ -0,0 +1,98 @@ + +Core concepts +============= + +In the following, we present a few core concepts that should help in understanding the BSFS operations and codebase. + + +Graph storage +------------- + +`RDF`_ describes a network or graph like the file graph as a set of +*(subject, predicate, object)* triples. +*Subject* is the identifier of the source node, +*object* is the identifier of the target node (or a literal value), +and *predicate* is the type of relation between the source node and the target. +As suggested by `RDF`_, we use URIs to identify nodes and predicates. +For example, a triple that assigns me as the author of a file could look like this:: + + + +Note that alternatively, the *object* could also be a literal value ("me"):: + + "me" + +There are a number of graph databases that support this or an analoguous paradigm, +such as `RDFLib`_, `Blazegraph`_, `TypeDB`_, `Titan`_, +and `many more `_. +BSFS uses such a third-party graph database to store its file graph. + +As usual in database systems, +we have to distinguish schema data (that coverns the structure of the storage) +from instance data (the actual database content). +Similar to relational database systems, +both kinds of data can be represented as triples, +and subsequently stored within the same graph storage +(although one might need to separate them logically). +In BSFS, we employ an explicit schema (see next section) that is managed alongside the data. + + + +Schema +------ + +BSFS ensures consistency across multiple distributed client applications +by maintaining an explicit schema that governs node types and predicates. +Furthermore, exposing the schema allows client to run a number of compatibility and validity checks +locally, and a graph database may use the schema to optimize its storage or operations. + +In BSFS, the schema is initially provided by the system administrator +(usually in the `Turtle`_ format) +and subsequently stored by the backend. +The default schema defines three root types +(``bsfs:Node``, ``bsfs:Predicate``, and ``bsfs:Literal``), +and BSFS expects any node, literal, or predicate to be derived from these roots. + +For example, a new predicate can be defined like so:: + + # define some abbreviations + prefix rdfs: + prefix bsfs: + prefix bse: + + # define a node type + bsfs:Entity rdfs:subClassOf bsfs:Node . + + # define a literal type + xsd:string rdfs:subClassOf bsfs:Literal . + + # define a predicate ("author of a node") + bse:author rdfs:subClassOf bsfs:Predicate ; + rdfs:domain bsfs:Entity ; + rdfs:range xsd:string . + +BSFS checks all requests and rejects queries or operations that violate the schema. + + +Querying +-------- + +BSFS at its core is not much more than a translator from a user query into a graph database query. +It operates directly on three abstract syntax trees (AST), +to run fetch, search, or sort, queries respectively. +By not using an existing query language, +we avoid an unnecessary and possibly expensive parsing step. +Some routines create an AST internally (e.g., :func:`bsfs.graph.graph.Graph.all`), +others accept an user-defined AST (e.g., :func:`bsfs.graph.graph.Graph.get`). +One way or another, the AST is validated against the schema, +and access control conditions are added. + + +.. _RDF: https://www.w3.org/RDF/ +.. _RDFLib: https://rdflib.readthedocs.io/en/stable/index.html +.. _Blazegraph: https://blazegraph.com/ +.. _Titan: http://titan.thinkaurelius.com/ +.. _TypeDB: https://vaticle.com/ +.. _Turtle: https://www.w3.org/TR/turtle/ + + diff --git a/doc/source/index.rst b/doc/source/index.rst new file mode 100644 index 0000000..91d53f6 --- /dev/null +++ b/doc/source/index.rst @@ -0,0 +1,75 @@ + +The Black Star File System +========================== + +A file system has two roles: It has to specify how to write files to a medium, and it has to define how a user can access files. +Most file systems focus on the first role and adopt the standard directory tree approach for the second role. +It is of course necessary to solve the challenges of medium access, but we should not neglect the user's perspective. +As a user, I mostly care about how how conveniently I can organize my data, and quickly I can access relevant information. +The hierarchical approach is rather restrictive in this regard: +You can only organize files in a directory tree [#f1]_, and search tasks often require third-party tools like `find`_ or `locate`_. + +Tagging file systems proposed an alternative file organization model. +Instead of a placing files in directories, they assign one or more (user-defined) tags to each file. +This increases the flexibility over a hierarchical data model, +because you can group any combination of files, and each file can be a part of various groups. +Semantic file systems push this idea one step further by trying to understand +the data they're dealing with. +For example, files can be grouped by their data type (documents), file format (odt), +author (yourself), topic (information management), etc. +The benefit for the user is that they can browse their files by association rather than by location --- similar to how we nagivate the Web. + +Clearly, the hierarchical approach is insufficient to organize this variety of information. +Instead, we need a network of files, +where they can be connected to each other, their properties, or to auxiliary nodes +(such as tags, collections, etc.) under a given relationship. +We call this the file graph. +With the *Black Star File System (BSFS)*, you can store, manage, and query such a file graph. + +.. + TODO: Clarify + * Different relationships + * Properties and auxiliary nodes + + TODO: File graph image + TODO: SFS/TFS references + + TODO: BSFS features + Within BSFS, you can store the file content, file metadata, + and content-derived information (e.g., features) alike. + + Within the file graph, we link files directly, + through properties, or through intermediate nodes. + +The Black Star File System is designed with three query patterns in mind: +navigation, search, and browsing. + +The **navigation** pattern describes the case when the user knows exactly what they want, +and they already have an address or id of the target file. +BSFS identifies each file with a unique URI, +or you can quickly navigate to a file via its name or other file properties. + +A **search** occurs when the user lacks the specific address or identifier to a target file, +but they have relatively clear and narrow search criteria. +With BSFS, you can search by file properties (name, size), content (keywords, features), +or associations to other files and auxiliary nodes (tags, collections). + +**Browsing** takes place when the user has only vague query criteria but wants to quickly scan and compare many files. +In BSFS, you can browse along file associations and rank results by a variety of similarity metrics. + +.. toctree:: + :maxdepth: 1 + + installation + concepts + architecture + api/modules + + +.. [#f1] although links and similar techniques allow some deviation from this principle + +.. _find: https://www.gnu.org/software/findutils/manual/html_node/find_html/Invoking-find.html#Invoking-find + +.. _locate: https://www.gnu.org/software/findutils/manual/html_node/find_html/Invoking-locate.html + + diff --git a/doc/source/installation.rst b/doc/source/installation.rst new file mode 100644 index 0000000..c7d8fba --- /dev/null +++ b/doc/source/installation.rst @@ -0,0 +1,46 @@ + +Installation +============ + +Installation +------------ + +Install *BSFS* via pip:: + + pip install --extra-index-url https://pip.bsfs.io bsfs + +This installs the `bsfs` python package as well as the `bsfs.app` command. +It is recommended to install *bsfs* in a virtual environment (via `virtualenv`). + + +License +------- + +This project is released under the terms of the 3-clause BSD License. +By downloading or using the application you agree to the license's terms and conditions. + +.. literalinclude:: ../../LICENSE + + +Source +------ + +Check out our git repository:: + + git clone https://git.bsfs.io/bsfs.git + +You can further install *bsfs* via the ususal `setuptools `_ commands from your bsfs source directory:: + + python setup.py develop + +For development, you also need to install some additional dependencies:: + + # code style discipline + pip install mypy coverage pylint + + # documentation + pip install sphinx sphinx-copybutton furo + + # packaging + pip install build + -- cgit v1.2.3 From 3ae93a405724ca6b5ddeb0b458fcc95685f83f09 Mon Sep 17 00:00:00 2001 From: Matthias Baumgartner Date: Sun, 5 Mar 2023 19:12:57 +0100 Subject: build fixes --- README.md | 8 +++++++- doc/source/installation.rst | 3 --- setup.py | 13 ++++++++++--- 3 files changed, 17 insertions(+), 7 deletions(-) diff --git a/README.md b/README.md index 1956752..796c198 100644 --- a/README.md +++ b/README.md @@ -25,7 +25,13 @@ Install bsfs as editable from the git repository: $ cd bsfs $ pip install -e . -Install the following additional packages besides bsfs: +If you want to develop (*dev*), run the tests (*test*), edit the +documentation (*doc*), or build a distributable (*build*), +install bsfs with the respective extras: + + $ pip install -e .[dev,doc,build,test] + +Or, you can manually install the following packages besides BSFS: $ pip install coverage mypy pylint $ pip install sphinx sphinx-copybutton furo diff --git a/doc/source/installation.rst b/doc/source/installation.rst index c7d8fba..4316136 100644 --- a/doc/source/installation.rst +++ b/doc/source/installation.rst @@ -2,9 +2,6 @@ Installation ============ -Installation ------------- - Install *BSFS* via pip:: pip install --extra-index-url https://pip.bsfs.io bsfs diff --git a/setup.py b/setup.py index 747e853..f6bd3e8 100644 --- a/setup.py +++ b/setup.py @@ -9,14 +9,15 @@ setup( author='Matthias Baumgartner', author_email='dev@bsfs.io', description='A content-aware graph file system.', - long_description=open(os.path.join(os.path.dirname(__file__), 'README')).read(), + long_description=open(os.path.join(os.path.dirname(__file__), 'README.md')).read(), license='BSD', license_files=('LICENSE', ), url='https://www.bsfs.io/bsfs/', download_url='https://pip.bsfs.io', # packages - packages=[p for p in find_packages() if p.startswith('bsfs')], + packages=find_packages(include=['bsfs']), + package_dir={'bsfs': 'bsfs'}, # data files are included if mentioned in MANIFEST.in include_package_data=True, @@ -28,10 +29,16 @@ setup( }, # dependencies + python_requires=">=3.7", install_requires=( 'rdflib', # schema and sparql storage 'hopcroftkarp', # ast matching 'numpy', # distance functions for sparql store ), - python_requires=">=3.7", + extras_require={ + 'dev': ['coverage', 'mypy', 'pylint'], + 'doc': ['sphinx', 'furo', 'sphinx-copybutton'], + 'test': [], + 'build': ['build'], + }, ) -- cgit v1.2.3