From 791918039979d0743fd2ea4b9a5e74593ff96fd0 Mon Sep 17 00:00:00 2001 From: Matthias Baumgartner Date: Mon, 19 Dec 2022 13:32:34 +0100 Subject: query ast file structures and essential interfaces --- bsfs/graph/graph.py | 5 + bsfs/query/__init__.py | 20 +++ bsfs/query/ast/__init__.py | 24 ++++ bsfs/query/ast/filter_.py | 30 ++++ bsfs/query/validator.py | 35 +++++ bsfs/triple_store/base.py | 8 ++ bsfs/triple_store/sparql.py | 253 ---------------------------------- bsfs/triple_store/sparql/__init__.py | 18 +++ bsfs/triple_store/sparql/sparql.py | 256 +++++++++++++++++++++++++++++++++++ 9 files changed, 396 insertions(+), 253 deletions(-) create mode 100644 bsfs/query/__init__.py create mode 100644 bsfs/query/ast/__init__.py create mode 100644 bsfs/query/ast/filter_.py create mode 100644 bsfs/query/validator.py delete mode 100644 bsfs/triple_store/sparql.py create mode 100644 bsfs/triple_store/sparql/__init__.py create mode 100644 bsfs/triple_store/sparql/sparql.py (limited to 'bsfs') diff --git a/bsfs/graph/graph.py b/bsfs/graph/graph.py index b7b9f1c..10e5904 100644 --- a/bsfs/graph/graph.py +++ b/bsfs/graph/graph.py @@ -9,6 +9,7 @@ import os import typing # bsfs imports +from bsfs.query import ast from bsfs.schema import Schema from bsfs.triple_store import TripleStoreBase from bsfs.utils import URI, typename @@ -110,4 +111,8 @@ class Graph(): type_ = self.schema.node(node_type) return _nodes.Nodes(self._backend, self._user, type_, {guid}) + def get(self, node_type: URI, subject: ast.filter.FilterExpression) -> Nodes: + """Return a `Nodes` instance over all nodes of type *node_type* that match the *subject* query.""" + raise NotImplementedError() + ## EOF ## diff --git a/bsfs/query/__init__.py b/bsfs/query/__init__.py new file mode 100644 index 0000000..21c7389 --- /dev/null +++ b/bsfs/query/__init__.py @@ -0,0 +1,20 @@ +""" + +Part of the BlackStar filesystem (bsfs) module. +A copy of the license is provided with the project. +Author: Matthias Baumgartner, 2022 +""" +# imports +import typing + +# inner-module imports +from . import ast +from . import validator as validate + +# exports +__all__: typing.Sequence[str] = ( + 'ast', + 'validate', + ) + +## EOF ## diff --git a/bsfs/query/ast/__init__.py b/bsfs/query/ast/__init__.py new file mode 100644 index 0000000..0ee7385 --- /dev/null +++ b/bsfs/query/ast/__init__.py @@ -0,0 +1,24 @@ +"""Query AST components. + +The query AST consists of a Filter syntax tree. + +Classes beginning with an underscore (_) represent internal type hierarchies +and should not be used for parsing. Note that the AST structures do not +(and cannot) check semantic validity or consistency with a given schema. + +Part of the BlackStar filesystem (bsfs) module. +A copy of the license is provided with the project. +Author: Matthias Baumgartner, 2022 +""" +# imports +import typing + +# inner-module imports +from . import filter_ as filter + +# exports +__all__: typing.Sequence[str] = ( + 'filter', + ) + +## EOF ## diff --git a/bsfs/query/ast/filter_.py b/bsfs/query/ast/filter_.py new file mode 100644 index 0000000..4086fc1 --- /dev/null +++ b/bsfs/query/ast/filter_.py @@ -0,0 +1,30 @@ +"""Filter AST. + +Part of the BlackStar filesystem (bsfs) module. +A copy of the license is provided with the project. +Author: Matthias Baumgartner, 2022 +""" +# imports +from collections import abc +import typing + +# exports +__all__ : typing.Sequence[str] = [] + + +## code ## + +class _Expression(abc.Hashable): + def __repr__(self) -> str: + """Return the expressions's string representation.""" + return f'{typename(self)}()' + + def __hash__(self) -> int: + """Return the expression's integer representation.""" + return hash(type(self)) + + def __eq__(self, other: typing.Any) -> bool: + """Return True if *self* and *other* are equivalent.""" + return isinstance(other, type(self)) + +## EOF ## diff --git a/bsfs/query/validator.py b/bsfs/query/validator.py new file mode 100644 index 0000000..ac3789a --- /dev/null +++ b/bsfs/query/validator.py @@ -0,0 +1,35 @@ +""" + +Part of the BlackStar filesystem (bsfs) module. +A copy of the license is provided with the project. +Author: Matthias Baumgartner, 2022 +""" +# imports +import typing + +# bsfs imports +from bsfs import schema as bsc + +# inner-module imports +from . import ast + +# exports +__all__ : typing.Sequence[str] = ( + 'Filter', + ) + + +## code ## + +class Filter(): + + # schema to validate against. + schema: bsc.Schema + + def __init__(self, schema: bsc.Schema): + self.schema = schema + + def parse(self, node: ast.filter.FilterExpression): + raise NotImplementedError() + +## EOF ## diff --git a/bsfs/triple_store/base.py b/bsfs/triple_store/base.py index 6561262..28ebb86 100644 --- a/bsfs/triple_store/base.py +++ b/bsfs/triple_store/base.py @@ -108,6 +108,14 @@ class TripleStoreBase(abc.ABC): """ + @abc.abstractmethod + def get( + self, + node_type: bsc.Node, + query: ast.filter.FilterExpression, + ) -> typing.Iterator[URI]: + """Return guids of nodes of type *node_type* that match the *query*.""" + @abc.abstractmethod def exists( self, diff --git a/bsfs/triple_store/sparql.py b/bsfs/triple_store/sparql.py deleted file mode 100644 index 7516dff..0000000 --- a/bsfs/triple_store/sparql.py +++ /dev/null @@ -1,253 +0,0 @@ -""" - -Part of the BlackStar filesystem (bsfs) module. -A copy of the license is provided with the project. -Author: Matthias Baumgartner, 2022 -""" -# imports -import itertools -import typing -import rdflib - -# bsfs imports -from bsfs import schema as bsc -from bsfs.utils import errors, URI - -# inner-module imports -from . import base - - -# exports -__all__: typing.Sequence[str] = ( - 'SparqlStore', - ) - - -## code ## - -class _Transaction(): - """Lightweight rdflib transactions for in-memory databases.""" - - # graph instance. - _graph: rdflib.Graph - - # current log of added triples. - _added: typing.List[typing.Any] - - # current log of removed triples. - _removed: typing.List[typing.Any] - - def __init__(self, graph: rdflib.Graph): - self._graph = graph - # initialize internal structures - self.commit() - - def commit(self): - """Commit temporary changes.""" - self._added = [] - self._removed = [] - - def rollback(self): - """Undo changes since the last commit.""" - for triple in self._added: - self._graph.remove(triple) - for triple in self._removed: - self._graph.add(triple) - - def add(self, triple: typing.Any): - """Add a triple to the graph.""" - if triple not in self._graph: - self._added.append(triple) - self._graph.add(triple) - - def remove(self, triple: typing.Any): - """Remove a triple from the graph.""" - if triple in self._graph: - self._removed.append(triple) - self._graph.remove(triple) - - -class SparqlStore(base.TripleStoreBase): - """Sparql-based triple store. - - The sparql triple store uses a third-party backend - (currently rdflib) to store triples and manages them via - the Sparql query language. - - """ - - # The rdflib graph. - _graph: rdflib.Graph - - # Current transaction. - _transaction: _Transaction - - # The local schema. - _schema: bsc.Schema - - def __init__(self): - super().__init__(None) - self._graph = rdflib.Graph() - self._transaction = _Transaction(self._graph) - self._schema = bsc.Schema.Empty() - - # NOTE: mypy and pylint complain about the **kwargs not being listed (contrasting super) - # However, not having it here is clearer since it's explicit that there are no arguments. - @classmethod - def Open(cls) -> 'SparqlStore': # type: ignore [override] # pylint: disable=arguments-differ - return cls() - - def commit(self): - self._transaction.commit() - - def rollback(self): - self._transaction.rollback() - - @property - def schema(self) -> bsc.Schema: - return self._schema - - @schema.setter - def schema(self, schema: bsc.Schema): - # check args: Schema instanace - if not isinstance(schema, bsc.Schema): - raise TypeError(schema) - # check compatibility: No contradicting definitions - if not self.schema.consistent_with(schema): - raise errors.ConsistencyError(f'{schema} is inconsistent with {self.schema}') - - # commit the current transaction - self.commit() - - # adjust instances: - # nothing to do for added classes - # delete instances of removed classes - - # get deleted classes - sub = self.schema - schema - - # remove predicate instances - for pred in sub.predicates: - for src, trg in self._graph.subject_objects(rdflib.URIRef(pred.uri)): - self._transaction.remove((src, rdflib.URIRef(pred.uri), trg)) - - # remove node instances - for node in sub.nodes: - # iterate through node instances - for inst in self._graph.subjects(rdflib.RDF.type, rdflib.URIRef(node.uri)): - # remove triples where the instance is in the object position - for src, pred in self._graph.subject_predicates(inst): - self._transaction.remove((src, pred, inst)) - # remove triples where the instance is in the subject position - for pred, trg in self._graph.predicate_objects(inst): - self._transaction.remove((inst, pred, trg)) - # remove instance - self._transaction.remove((inst, rdflib.RDF.type, rdflib.URIRef(node.uri))) - - # NOTE: Nothing to do for literals - - # commit instance changes - self.commit() - - # migrate schema - self._schema = schema - - - def _has_type(self, subject: URI, node_type: bsc.Node) -> bool: - """Return True if *subject* is a node of class *node_type* or a subclass thereof.""" - if node_type not in self.schema.nodes(): - raise errors.ConsistencyError(f'{node_type} is not defined in the schema') - - subject_types = list(self._graph.objects(rdflib.URIRef(subject), rdflib.RDF.type)) - if len(subject_types) == 0: - return False - if len(subject_types) == 1: - node = self.schema.node(URI(subject_types[0])) # type: ignore [arg-type] # URI is a subtype of str - if node == node_type: - return True - if node_type in node.parents(): - return True - return False - raise errors.UnreachableError() - - def exists( - self, - node_type: bsc.Node, - guids: typing.Iterable[URI], - ) -> typing.Iterable[URI]: - return (subj for subj in guids if self._has_type(subj, node_type)) - - def create( - self, - node_type: bsc.Node, - guids: typing.Iterable[URI], - ): - # check node_type - if node_type not in self.schema.nodes(): - raise errors.ConsistencyError(f'{node_type} is not defined in the schema') - # check and create guids - for guid in guids: - subject = rdflib.URIRef(guid) - # check node existence - if (subject, rdflib.RDF.type, None) in self._graph: - # FIXME: node exists and may have a different type! ignore? raise? report? - continue - # add node - self._transaction.add((subject, rdflib.RDF.type, rdflib.URIRef(node_type.uri))) - - def set( - self, - node_type: bsc.Node, - guids: typing.Iterable[URI], - predicate: bsc.Predicate, - values: typing.Iterable[typing.Any], - ): - # check node_type - if node_type not in self.schema.nodes(): - raise errors.ConsistencyError(f'{node_type} is not defined in the schema') - # check predicate - if predicate not in self.schema.predicates(): - raise errors.ConsistencyError(f'{predicate} is not defined in the schema') - if not node_type <= predicate.domain: - raise errors.ConsistencyError(f'{node_type} must be a subclass of {predicate.domain}') - # NOTE: predicate.range is in the schema since predicate is in the schema. - # materialize values - values = set(values) - # check values - if len(values) == 0: - return - if predicate.unique and len(values) != 1: - raise ValueError(values) - if isinstance(predicate.range, bsc.Node): - values = set(values) # materialize to safeguard against iterators passed as argument - inconsistent = {val for val in values if not self._has_type(val, predicate.range)} - # catches nodes that don't exist and nodes that have an inconsistent type - if len(inconsistent) > 0: - raise errors.InstanceError(inconsistent) - # check guids - # FIXME: Fail or skip inexistent nodes? - guids = set(guids) - inconsistent = {guid for guid in guids if not self._has_type(guid, node_type)} - if len(inconsistent) > 0: - raise errors.InstanceError(inconsistent) - - # add triples - pred = rdflib.URIRef(predicate.uri) - for guid, value in itertools.product(guids, values): - guid = rdflib.URIRef(guid) - # convert value - if isinstance(predicate.range, bsc.Literal): - value = rdflib.Literal(value, datatype=rdflib.URIRef(predicate.range.uri)) - elif isinstance(predicate.range, bsc.Node): - value = rdflib.URIRef(value) - else: - raise errors.UnreachableError() - # clear triples for unique predicates - if predicate.unique: - for obj in self._graph.objects(guid, pred): - if obj != value: - self._transaction.remove((guid, pred, obj)) - # add triple - self._transaction.add((guid, pred, value)) - -## EOF ## diff --git a/bsfs/triple_store/sparql/__init__.py b/bsfs/triple_store/sparql/__init__.py new file mode 100644 index 0000000..285334a --- /dev/null +++ b/bsfs/triple_store/sparql/__init__.py @@ -0,0 +1,18 @@ +""" + +Part of the BlackStar filesystem (bsfs) module. +A copy of the license is provided with the project. +Author: Matthias Baumgartner, 2022 +""" +# imports +import typing + +# inner-module imports +from .sparql import SparqlStore + +# exports +__all__: typing.Sequence[str] = ( + 'SparqlStore', + ) + +## EOF ## diff --git a/bsfs/triple_store/sparql/sparql.py b/bsfs/triple_store/sparql/sparql.py new file mode 100644 index 0000000..fff540a --- /dev/null +++ b/bsfs/triple_store/sparql/sparql.py @@ -0,0 +1,256 @@ +""" + +Part of the BlackStar filesystem (bsfs) module. +A copy of the license is provided with the project. +Author: Matthias Baumgartner, 2022 +""" +# imports +import itertools +import typing +import rdflib + +# bsfs imports +from bsfs import schema as bsc +from bsfs.query import ast +from bsfs.utils import errors, URI + +# inner-module imports +from . import base + + +# exports +__all__: typing.Sequence[str] = ( + 'SparqlStore', + ) + + +## code ## + +class _Transaction(): + """Lightweight rdflib transactions for in-memory databases.""" + + # graph instance. + _graph: rdflib.Graph + + # current log of added triples. + _added: typing.List[typing.Any] + + # current log of removed triples. + _removed: typing.List[typing.Any] + + def __init__(self, graph: rdflib.Graph): + self._graph = graph + # initialize internal structures + self.commit() + + def commit(self): + """Commit temporary changes.""" + self._added = [] + self._removed = [] + + def rollback(self): + """Undo changes since the last commit.""" + for triple in self._added: + self._graph.remove(triple) + for triple in self._removed: + self._graph.add(triple) + + def add(self, triple: typing.Any): + """Add a triple to the graph.""" + if triple not in self._graph: + self._added.append(triple) + self._graph.add(triple) + + def remove(self, triple: typing.Any): + """Remove a triple from the graph.""" + if triple in self._graph: + self._removed.append(triple) + self._graph.remove(triple) + + +class SparqlStore(base.TripleStoreBase): + """Sparql-based triple store. + + The sparql triple store uses a third-party backend + (currently rdflib) to store triples and manages them via + the Sparql query language. + + """ + + # The rdflib graph. + _graph: rdflib.Graph + + # Current transaction. + _transaction: _Transaction + + # The local schema. + _schema: bsc.Schema + + def __init__(self): + super().__init__(None) + self._graph = rdflib.Graph() + self._transaction = _Transaction(self._graph) + self._schema = bsc.Schema.Empty() + + # NOTE: mypy and pylint complain about the **kwargs not being listed (contrasting super) + # However, not having it here is clearer since it's explicit that there are no arguments. + @classmethod + def Open(cls) -> 'SparqlStore': # type: ignore [override] # pylint: disable=arguments-differ + return cls() + + def commit(self): + self._transaction.commit() + + def rollback(self): + self._transaction.rollback() + + @property + def schema(self) -> bsc.Schema: + return self._schema + + @schema.setter + def schema(self, schema: bsc.Schema): + # check args: Schema instanace + if not isinstance(schema, bsc.Schema): + raise TypeError(schema) + # check compatibility: No contradicting definitions + if not self.schema.consistent_with(schema): + raise errors.ConsistencyError(f'{schema} is inconsistent with {self.schema}') + + # commit the current transaction + self.commit() + + # adjust instances: + # nothing to do for added classes + # delete instances of removed classes + + # get deleted classes + sub = self.schema - schema + + # remove predicate instances + for pred in sub.predicates: + for src, trg in self._graph.subject_objects(rdflib.URIRef(pred.uri)): + self._transaction.remove((src, rdflib.URIRef(pred.uri), trg)) + + # remove node instances + for node in sub.nodes: + # iterate through node instances + for inst in self._graph.subjects(rdflib.RDF.type, rdflib.URIRef(node.uri)): + # remove triples where the instance is in the object position + for src, pred in self._graph.subject_predicates(inst): + self._transaction.remove((src, pred, inst)) + # remove triples where the instance is in the subject position + for pred, trg in self._graph.predicate_objects(inst): + self._transaction.remove((inst, pred, trg)) + # remove instance + self._transaction.remove((inst, rdflib.RDF.type, rdflib.URIRef(node.uri))) + + # NOTE: Nothing to do for literals + + # commit instance changes + self.commit() + + # migrate schema + self._schema = schema + + def get(self, node_type: bsc.Node, query: ast.filter.FilterExpression) -> typing.Iterator[URI]: + raise NotImplementedError() + + def _has_type(self, subject: URI, node_type: bsc.Node) -> bool: + """Return True if *subject* is a node of class *node_type* or a subclass thereof.""" + if node_type not in self.schema.nodes(): + raise errors.ConsistencyError(f'{node_type} is not defined in the schema') + + subject_types = list(self._graph.objects(rdflib.URIRef(subject), rdflib.RDF.type)) + if len(subject_types) == 0: + return False + if len(subject_types) == 1: + node = self.schema.node(URI(subject_types[0])) # type: ignore [arg-type] # URI is a subtype of str + if node == node_type: + return True + if node_type in node.parents(): + return True + return False + raise errors.UnreachableError() + + def exists( + self, + node_type: bsc.Node, + guids: typing.Iterable[URI], + ) -> typing.Iterable[URI]: + return (subj for subj in guids if self._has_type(subj, node_type)) + + def create( + self, + node_type: bsc.Node, + guids: typing.Iterable[URI], + ): + # check node_type + if node_type not in self.schema.nodes(): + raise errors.ConsistencyError(f'{node_type} is not defined in the schema') + # check and create guids + for guid in guids: + subject = rdflib.URIRef(guid) + # check node existence + if (subject, rdflib.RDF.type, None) in self._graph: + # FIXME: node exists and may have a different type! ignore? raise? report? + continue + # add node + self._transaction.add((subject, rdflib.RDF.type, rdflib.URIRef(node_type.uri))) + + def set( + self, + node_type: bsc.Node, + guids: typing.Iterable[URI], + predicate: bsc.Predicate, + values: typing.Iterable[typing.Any], + ): + # check node_type + if node_type not in self.schema.nodes(): + raise errors.ConsistencyError(f'{node_type} is not defined in the schema') + # check predicate + if predicate not in self.schema.predicates(): + raise errors.ConsistencyError(f'{predicate} is not defined in the schema') + if not node_type <= predicate.domain: + raise errors.ConsistencyError(f'{node_type} must be a subclass of {predicate.domain}') + # NOTE: predicate.range is in the schema since predicate is in the schema. + # materialize values + values = set(values) + # check values + if len(values) == 0: + return + if predicate.unique and len(values) != 1: + raise ValueError(values) + if isinstance(predicate.range, bsc.Node): + values = set(values) # materialize to safeguard against iterators passed as argument + inconsistent = {val for val in values if not self._has_type(val, predicate.range)} + # catches nodes that don't exist and nodes that have an inconsistent type + if len(inconsistent) > 0: + raise errors.InstanceError(inconsistent) + # check guids + # FIXME: Fail or skip inexistent nodes? + guids = set(guids) + inconsistent = {guid for guid in guids if not self._has_type(guid, node_type)} + if len(inconsistent) > 0: + raise errors.InstanceError(inconsistent) + + # add triples + pred = rdflib.URIRef(predicate.uri) + for guid, value in itertools.product(guids, values): + guid = rdflib.URIRef(guid) + # convert value + if isinstance(predicate.range, bsc.Literal): + value = rdflib.Literal(value, datatype=rdflib.URIRef(predicate.range.uri)) + elif isinstance(predicate.range, bsc.Node): + value = rdflib.URIRef(value) + else: + raise errors.UnreachableError() + # clear triples for unique predicates + if predicate.unique: + for obj in self._graph.objects(guid, pred): + if obj != value: + self._transaction.remove((guid, pred, obj)) + # add triple + self._transaction.add((guid, pred, value)) + +## EOF ## -- cgit v1.2.3 From a0f2308adcb226d28de3355bc7115a6d9b669462 Mon Sep 17 00:00:00 2001 From: Matthias Baumgartner Date: Mon, 19 Dec 2022 13:40:02 +0100 Subject: import fixes --- bsfs/graph/graph.py | 2 +- bsfs/query/validator.py | 177 ++++++++++++++++++++++++++++++++++++- bsfs/triple_store/base.py | 3 +- bsfs/triple_store/sparql/sparql.py | 2 +- 4 files changed, 179 insertions(+), 5 deletions(-) (limited to 'bsfs') diff --git a/bsfs/graph/graph.py b/bsfs/graph/graph.py index 10e5904..51fe75d 100644 --- a/bsfs/graph/graph.py +++ b/bsfs/graph/graph.py @@ -111,7 +111,7 @@ class Graph(): type_ = self.schema.node(node_type) return _nodes.Nodes(self._backend, self._user, type_, {guid}) - def get(self, node_type: URI, subject: ast.filter.FilterExpression) -> Nodes: + def get(self, node_type: URI, subject: ast.filter.FilterExpression) -> _nodes.Nodes: """Return a `Nodes` instance over all nodes of type *node_type* that match the *subject* query.""" raise NotImplementedError() diff --git a/bsfs/query/validator.py b/bsfs/query/validator.py index ac3789a..123b947 100644 --- a/bsfs/query/validator.py +++ b/bsfs/query/validator.py @@ -29,7 +29,180 @@ class Filter(): def __init__(self, schema: bsc.Schema): self.schema = schema - def parse(self, node: ast.filter.FilterExpression): - raise NotImplementedError() + def parse(self, node: ast.filter.FilterExpression, subject: bsc.types._Vertex): + # subject is a node type + if not isinstance(subject, bsc.Node): + raise errors.ConsistencyError(f'Expected a node, found {subject}') + # subject exists in the schema + if subject not in self.schema.nodes: + raise errors.ConsistencyError(f'Invalid node type {subject}') + # root expression is valid + self._parse(node, subject) + # all tests passed + return True + + + def _parse_numerical_expression(self, node: ast.filter.FilterExpression, subject: bsc.types._Vertex): + if isinstance(node, ast.filter.And): + return self._and(node, subject) + elif isinstance(node, ast.filter.Or): + return self._or(node, subject) + elif isinstance(node, ast.filter.LessThan): + return self._lessThan(node, subject) + elif isinstance(node, ast.filter.GreaterThan): + return self._greaterThan(node, subject) + elif isinstance(node, ast.filter.Equals): + return self._equals(node, subject, numerical=True) + else: + raise errors.ConsistencyError(f'Expected a numerical expression, found {node}') + + + def __branch(self, node: typing.Union[ast.filter.Any, ast.filter.And], subject: bsc.types._Vertex): + # subject is a node type + if not isinstance(subject, bsc.Node): + raise errors.ConsistencyError(f'Expected a node, found {subject}') + # subject exists in the schema + if subject not in self.schema.nodes: + raise errors.ConsistencyError(f'Invalid node type {subject}') + # predicate is valid + dom, rng = self._parse_predicate_expression(node.predicate) + # subject is a subtype of the predicate's domain + if not subject <= dom: + raise errors.ConsistencyError(f'Expected type {dom}, found {subject}') + # child expression is valid + self._parse_filter_expression(node.expr, rng) + + def _any(self, node: ast.filter.Any, subject: bsc.types._Vertex): + return self.__branch(node, subject) + + def _all(self, node: ast.filter.All, subject: bsc.types._Vertex): + return self.__branch(node, subject) + + + def __agg(self, node: typing.Union[ast.filter.And, ast.filter.Or], subject: bsc.types._Vertex): + for expr in node: + # child expression is valid + self._parse_filter_expression(expr, subject) + + def _and(self, node: ast.filter.And, subject: bsc.types._Vertex): + return self.__agg(node, subject) + + def _or(self, node: ast.filter.Or, subject: bsc.types._Vertex): + return self.__agg(node, subject) + + + def _not(self, node: ast.filter.Not, subject: bsc.types._Vertex): + # child expression is valid + self._parse_filter_expression(node.expr, subject) + + + def _has(self, node: ast.filter.Has, subject: bsc.types._Vertex): + # subject is a node type + if not isinstance(subject, bsc.Node): + raise errors.ConsistencyError(f'Expected a node, found {subject}') + # subject exists in the schema + if subject not in self.schema.nodes: + raise errors.ConsistencyError(f'Invalid node type {subject}') + # predicate is valid + dom, rng = self._parse_predicate_expression(node.predicate) + # subject is a subtype of the predicate's domain + if not subject <= dom: + raise errors.ConsistencyError(f'Expected type {dom}, found {subject}') + # node.count is a numerical expression + self._parse_numerical_expression(node.count, self.schema.literal(ns.xsd.numerical)) + + + def _equals(self, node: ast.filter.Equals, subject: bsc.types._Vertex, numerical: bool = False): + # subject is a literal + #if not isinstance(subject, bsc.Literal): + # raise errors.ConsistencyError(f'Expected a literal, found {subject}') + if isinstance(subject, bsc.Node): + # FIXME: How to handle this case? + # FIXME: How to check if a NodeType is acceptable? + # FIXME: Maybe use flags to control what is expected as node identifiers? + from bsfs.graph.nodes import Nodes # FIXME + if not isinstance(node.value, Nodes) and not isinstance(node.value, URI): + raise errors.ConsistencyError(f'Expected a Nodes or URI, found {node.value}') + elif isinstance(subject, bsc.Literal): + # literal exists in the schema + if subject not in self.schema.literals: + raise errors.ConsistencyError(f'Invalid literal type {subject}') + else: + # FIXME: + raise errors.ConsistencyError(f'Expected a literal, found {subject}') + # node.value is numeric (if requested) + if numerical and not isinstance(node.value, float) and not isinstance(node.value, int): + raise errors.ConsistencyError(f'Expected a numerical value (int or float), found {node.value}') + # NOTE: We cannot check if node.value agrees with the subject since we don't know + # all literal types, their hierarchy, and how the backend converts datatypes. + + + def _substring(self, node: ast.filter.Substring, subject: bsc.types._Vertex): + # subject is a literal + if not isinstance(subject, bsc.Literal): + raise errors.ConsistencyError(f'Expected a literal, found {subject}') + # literal exists in the schema + if subject not in self.schema.literals: + raise errors.ConsistencyError(f'Invalid literal type {subject}') + # node.value matches literal datatype + if not subject.is_a(ns.xsd.string): + raise errors.ConsistencyError(f'Expected a string literal, found {subject}') + + + def _lessThan(self, node: ast.filter.LessThan, subject: bsc.types._Vertex): + # subject is a literal + if not isinstance(subject, bsc.Literal): + raise errors.ConsistencyError(f'Expected a literal, found {subject}') + # literal exists in the schema + if subject not in self.schema.literals: + raise errors.ConsistencyError(f'Invalid literal type {subject}') + # subject is numerical + if not subject.is_a(ns.xsd.numerical): + raise errors.ConsistencyError(f'Expected a numerical literal, found {subject}') + + + def _greaterThan(self, node: ast.filter.GreaterThan, subject: bsc.types._Vertex): + # subject is a literal + if not isinstance(subject, bsc.Literal): + raise errors.ConsistencyError(f'Expected a literal, found {subject}') + # literal exists in the schema + if subject not in self.schema.literals: + raise errors.ConsistencyError(f'Invalid literal type {subject}') + # subject is numerical + if not subject.is_a(ns.xsd.numerical): + raise errors.ConsistencyError(f'Expected a numerical literal, found {subject}') + + + def _predicate(self, node: ast.filter.Predicate): + try: + # predicate exists in the schema + pred = self.schema.predicate(node.predicate) + except KeyError: + raise errors.ConsistencyError(f'') # FIXME + if node.reverse: + return pred.range, pred.domain + else: + return pred.domain, pred.range + + + def _oneOf(self, node: ast.filter.OneOf): + dom, rng = None, None + for pred in node: + try: + # parse child expression + subdom, subrng = self._parse_predicate_expression(pred) + # domain and range must be related across all child expressions + if not subdom <= dom and not subdom >= dom: + raise errors.ConsistencyError(f'') # FIXME + if not subrng <= rng and not subrng >= rng: + raise errors.ConsistencyError(f'') # FIXME + # determine overall domain and range + if dom is None or subdom < dom: # pick most specific domain + dom = subdom + if rng is None or subrng > rng: # pick most generic range + rng = subrng + except KeyError: + raise errors.ConsistencyError(f'') + return dom, rng ## EOF ## diff --git a/bsfs/triple_store/base.py b/bsfs/triple_store/base.py index 28ebb86..5ff9523 100644 --- a/bsfs/triple_store/base.py +++ b/bsfs/triple_store/base.py @@ -9,6 +9,7 @@ import abc import typing # inner-module imports +from bsfs.query import ast from bsfs.utils import URI, typename import bsfs.schema as _schema @@ -111,7 +112,7 @@ class TripleStoreBase(abc.ABC): @abc.abstractmethod def get( self, - node_type: bsc.Node, + node_type: _schema.Node, query: ast.filter.FilterExpression, ) -> typing.Iterator[URI]: """Return guids of nodes of type *node_type* that match the *query*.""" diff --git a/bsfs/triple_store/sparql/sparql.py b/bsfs/triple_store/sparql/sparql.py index fff540a..7172f34 100644 --- a/bsfs/triple_store/sparql/sparql.py +++ b/bsfs/triple_store/sparql/sparql.py @@ -15,7 +15,7 @@ from bsfs.query import ast from bsfs.utils import errors, URI # inner-module imports -from . import base +from .. import base # exports -- cgit v1.2.3 From 383fa8fd5c2e4b67089b4c5b654ebade51382f2c Mon Sep 17 00:00:00 2001 From: Matthias Baumgartner Date: Thu, 22 Dec 2022 20:27:49 +0100 Subject: filter ast definition and validation --- bsfs/query/ast/__init__.py | 2 +- bsfs/query/ast/filter_.py | 405 ++++++++++++++++++++++++++++++++++++++++++++- bsfs/query/validator.py | 336 +++++++++++++++++++------------------ bsfs/utils/__init__.py | 3 +- bsfs/utils/commons.py | 34 ++++ bsfs/utils/errors.py | 3 + 6 files changed, 620 insertions(+), 163 deletions(-) (limited to 'bsfs') diff --git a/bsfs/query/ast/__init__.py b/bsfs/query/ast/__init__.py index 0ee7385..704d051 100644 --- a/bsfs/query/ast/__init__.py +++ b/bsfs/query/ast/__init__.py @@ -14,7 +14,7 @@ Author: Matthias Baumgartner, 2022 import typing # inner-module imports -from . import filter_ as filter +from . import filter_ as filter # pylint: disable=redefined-builtin # exports __all__: typing.Sequence[str] = ( diff --git a/bsfs/query/ast/filter_.py b/bsfs/query/ast/filter_.py index 4086fc1..b129ded 100644 --- a/bsfs/query/ast/filter_.py +++ b/bsfs/query/ast/filter_.py @@ -1,5 +1,27 @@ """Filter AST. +Note that it is easily possible to construct an AST that is inconsistent with +a given schema. Furthermore, it is possible to construct a semantically invalid +AST which that cannot be parsed correctly or includes contradicting statements. +The AST nodes do not (and cannot) check such issues. + +For example, consider the following AST: + +>>> Any(ns.bse.collection, +... And( +... Equals('hello'), +... Any(ns.bsm.guid, Any(ns.bsm.guid, Equals('hello'))), +... Any(ns.bst.label, Equals('world')), +... All(ns.bst.label, Not(Equals('world'))), +... ) +... ) + +This AST has multiple issues that are not verified upon its creation: +* A condition on a non-literal. +* A Filter on a literal. +* Conditions exclude each other +* The predicate along the branch have incompatible domains and ranges. + Part of the BlackStar filesystem (bsfs) module. A copy of the license is provided with the project. Author: Matthias Baumgartner, 2022 @@ -8,12 +30,45 @@ Author: Matthias Baumgartner, 2022 from collections import abc import typing +# bsfs imports +from bsfs.utils import URI, typename, normalize_args + +# inner-module imports +#from . import utils + # exports -__all__ : typing.Sequence[str] = [] +__all__ : typing.Sequence[str] = ( + # base classes + 'FilterExpression', + 'PredicateExpression', + # predicate expressions + 'OneOf', + 'Predicate', + # branching + 'All', + 'Any', + # aggregators + 'And', + 'Or', + # value matchers + 'Equals', + 'Substring', + 'EndsWith', + 'StartsWith', + # range matchers + 'GreaterThan', + 'LessThan', + # misc + 'Has', + 'Is', + 'Not', + ) ## code ## +# pylint: disable=too-few-public-methods # Many expressions use mostly magic methods + class _Expression(abc.Hashable): def __repr__(self) -> str: """Return the expressions's string representation.""" @@ -27,4 +82,352 @@ class _Expression(abc.Hashable): """Return True if *self* and *other* are equivalent.""" return isinstance(other, type(self)) + +class FilterExpression(_Expression): + """Generic Filter expression.""" + + +class PredicateExpression(_Expression): + """Generic Predicate expression.""" + + +class _Branch(FilterExpression): + """Branch the filter along a predicate.""" + + # predicate to follow. + predicate: PredicateExpression + + # child expression to evaluate. + expr: FilterExpression + + def __init__( + self, + predicate: typing.Union[PredicateExpression, URI], + expr: FilterExpression, + ): + # process predicate argument + if isinstance(predicate, URI): + predicate = Predicate(predicate) + elif not isinstance(predicate, PredicateExpression): + raise TypeError(predicate) + # process expression argument + if not isinstance(expr, FilterExpression): + raise TypeError(expr) + # assign members + self.predicate = predicate + self.expr = expr + + def __repr__(self) -> str: + return f'{typename(self)}({self.predicate}, {self.expr})' + + def __hash__(self) -> int: + return hash((super().__hash__(), self.predicate, self.expr)) + + def __eq__(self, other) -> bool: + return super().__eq__(other) \ + and self.predicate == other.predicate \ + and self.expr == other.expr + +class Any(_Branch): + """Any (and at least one) triple matches.""" + + +class All(_Branch): + """All (and at least one) triples match.""" + + +class _Agg(FilterExpression, abc.Collection): + """Combine multiple expressions.""" + + # child expressions + expr: typing.Set[FilterExpression] + + def __init__( + self, + *expr: typing.Union[FilterExpression, + typing.Iterable[FilterExpression], + typing.Iterator[FilterExpression]] + ): + # unfold arguments + unfolded = set(normalize_args(*expr)) + # check type + if not all(isinstance(e, FilterExpression) for e in unfolded): + raise TypeError(expr) + # assign member + self.expr = unfolded + + def __contains__(self, expr: typing.Any) -> bool: + """Return True if *expr* is among the child expressions.""" + return expr in self.expr + + def __iter__(self) -> typing.Iterator[FilterExpression]: + """Iterator over child expressions.""" + return iter(self.expr) + + def __len__(self) -> int: + """Number of child expressions.""" + return len(self.expr) + + def __repr__(self) -> str: + return f'{typename(self)}({self.expr})' + + def __hash__(self) -> int: + return hash((super().__hash__(), tuple(self.expr))) # FIXME: Unique hash of different orders over self.expr + + def __eq__(self, other) -> bool: + return super().__eq__(other) and self.expr == other.expr + + +class And(_Agg): + """All conditions match.""" + + +class Or(_Agg): + """At least one condition matches.""" + + +class Not(FilterExpression): + """Invert a statement.""" + + # child expression + expr: FilterExpression + + def __init__(self, expr: FilterExpression): + # check argument + if not isinstance(expr, FilterExpression): + raise TypeError(expr) + # assign member + self.expr = expr + + def __repr__(self) -> str: + return f'{typename(self)}({self.expr})' + + def __hash__(self) -> int: + return hash((super().__hash__(), self.expr)) + + def __eq__(self, other: typing.Any) -> bool: + return super().__eq__(other) and self.expr == other.expr + + +class Has(FilterExpression): + """Has predicate N times""" + + # predicate to follow. + predicate: PredicateExpression + + # target count + count: FilterExpression + + def __init__( + self, + predicate: typing.Union[PredicateExpression, URI], + count: typing.Optional[typing.Union[FilterExpression, int]] = None, + ): + # check predicate + if isinstance(predicate, URI): + predicate = Predicate(predicate) + elif not isinstance(predicate, PredicateExpression): + raise TypeError(predicate) + # check count + if count is None: + count = GreaterThan(1, strict=False) + elif isinstance(count, int): + count = Equals(count) + elif not isinstance(count, FilterExpression): + raise TypeError(count) + # assign members + self.predicate = predicate + self.count = count + + def __repr__(self) -> str: + return f'{typename(self)}({self.predicate}, {self.count})' + + def __hash__(self) -> int: + return hash((super().__hash__(), self.predicate, self.count)) + + def __eq__(self, other) -> bool: + return super().__eq__(other) \ + and self.predicate == other.predicate \ + and self.count == other.count + + +class _Value(FilterExpression): + """ + """ + + # target value. + value: typing.Any + + def __init__(self, value: typing.Any): + self.value = value + + def __repr__(self) -> str: + return f'{typename(self)}({self.value})' + + def __hash__(self) -> int: + return hash((super().__hash__(), self.value)) + + def __eq__(self, other) -> bool: + return super().__eq__(other) and self.value == other.value + + +class Is(_Value): + """Match the URI of a node.""" + + +class Equals(_Value): + """Value matches exactly. + NOTE: Value format must correspond to literal type; can be a string, a number, or a Node + """ + + +class Substring(_Value): + """Value matches a substring + NOTE: value format must be a string + """ + + +class StartsWith(_Value): + """Value begins with a given string.""" + + +class EndsWith(_Value): + """Value ends with a given string.""" + + +class _Bounded(FilterExpression): + """ + """ + + # bound. + threshold: float + + # closed (True) or open (False) bound. + strict: bool + + def __init__( + self, + threshold: float, + strict: bool = True, + ): + self.threshold = float(threshold) + self.strict = bool(strict) + + def __repr__(self) -> str: + return f'{typename(self)}({self.threshold}, {self.strict})' + + def __hash__(self) -> int: + return hash((super().__hash__(), self.threshold, self.strict)) + + def __eq__(self, other) -> bool: + return super().__eq__(other) \ + and self.threshold == other.threshold \ + and self.strict == other.strict + + + +class LessThan(_Bounded): + """Value is (strictly) smaller than threshold. + NOTE: only on numerical literals + """ + + +class GreaterThan(_Bounded): + """Value is (strictly) larger than threshold + NOTE: only on numerical literals + """ + + +class Predicate(PredicateExpression): + """A single predicate.""" + + # predicate URI + predicate: URI + + # reverse the predicate's direction + reverse: bool + + def __init__( + self, + predicate: URI, + reverse: typing.Optional[bool] = False, + ): + # check arguments + if not isinstance(predicate, URI): + raise TypeError(predicate) + # assign members + self.predicate = predicate + self.reverse = bool(reverse) + + def __repr__(self) -> str: + return f'{typename(self)}({self.predicate}, {self.reverse})' + + def __hash__(self) -> int: + return hash((super().__hash__(), self.predicate, self.reverse)) + + def __eq__(self, other) -> bool: + return super().__eq__(other) \ + and self.predicate == other.predicate \ + and self.reverse == other.reverse + + +class OneOf(PredicateExpression, abc.Collection): + """A set of predicate alternatives. + + The predicates' domains must be ascendants or descendants of each other. + The overall domain is the most specific one. + + The predicate's domains must be ascendants or descendants of each other. + The overall range is the most generic one. + """ + + # predicate alternatives + expr: typing.Set[PredicateExpression] + + def __init__(self, *expr: typing.Union[PredicateExpression, URI]): + # unfold arguments + unfolded = set(normalize_args(*expr)) # type: ignore [arg-type] # this is getting too complex... + # check arguments + if len(unfolded) == 0: + raise AttributeError('expected at least one expression, found none') + # ensure PredicateExpression + unfolded = {Predicate(e) if isinstance(e, URI) else e for e in unfolded} + # check type + if not all(isinstance(e, PredicateExpression) for e in unfolded): + raise TypeError(expr) + # assign member + self.expr = unfolded + + def __contains__(self, expr: typing.Any) -> bool: + """Return True if *expr* is among the child expressions.""" + return expr in self.expr + + def __iter__(self) -> typing.Iterator[PredicateExpression]: + """Iterator over child expressions.""" + return iter(self.expr) + + def __len__(self) -> int: + """Number of child expressions.""" + return len(self.expr) + + def __repr__(self) -> str: + return f'{typename(self)}({self.expr})' + + def __hash__(self) -> int: + return hash((super().__hash__(), tuple(self.expr))) # FIXME: Unique hash of different orders over self.expr + + def __eq__(self, other) -> bool: + return super().__eq__(other) and self.expr == other.expr + + +# Helpers + +def IsIn(*values): # pylint: disable=invalid-name # explicitly mimics an expression + """Match any of the given URIs.""" + return Or(Is(value) for value in normalize_args(*values)) + +def IsNotIn(*values): # pylint: disable=invalid-name # explicitly mimics an expression + """Match none of the given URIs.""" + return Not(IsIn(*values)) + ## EOF ## diff --git a/bsfs/query/validator.py b/bsfs/query/validator.py index 123b947..352203a 100644 --- a/bsfs/query/validator.py +++ b/bsfs/query/validator.py @@ -9,6 +9,8 @@ import typing # bsfs imports from bsfs import schema as bsc +from bsfs.namespace import ns +from bsfs.utils import errors, typename # inner-module imports from . import ast @@ -22,6 +24,18 @@ __all__ : typing.Sequence[str] = ( ## code ## class Filter(): + """Validate a `bsfs.query.ast.filter` query's structure and schema compliance. + + * Conditions (Bounded, Value) can only be applied on literals + * Branches, Id, and Has can only be applied on nodes + * Predicates' domain and range must match + * Predicate paths must follow the schema + * Referenced types are present in the schema + + """ + + # vertex types + T_VERTEX = typing.Union[bsc.Node, bsc.Literal] # FIXME: Shouldn't this be in the schema? # schema to validate against. schema: bsc.Schema @@ -29,180 +43,182 @@ class Filter(): def __init__(self, schema: bsc.Schema): self.schema = schema - def parse(self, node: ast.filter.FilterExpression, subject: bsc.types._Vertex): - # subject is a node type - if not isinstance(subject, bsc.Node): - raise errors.ConsistencyError(f'Expected a node, found {subject}') - # subject exists in the schema - if subject not in self.schema.nodes: - raise errors.ConsistencyError(f'Invalid node type {subject}') - # root expression is valid - self._parse(node, subject) + def __call__(self, root_type: bsc.Node, query: ast.filter.FilterExpression): + """Validate a filter *query*, assuming the subject having *root_type*. + + Raises a `bsfs.utils.errors.ConsistencyError` if the query violates the schema. + Raises a `bsfs.utils.errors.BackendError` if the query structure is invalid. + + """ + # root_type must be a schema.Node + if not isinstance(root_type, bsc.Node): + raise TypeError(f'Expected a node, found {typename(root_type)}') + # root_type must exist in the schema + if root_type not in self.schema.nodes(): + raise errors.ConsistencyError(f'{root_type} is not defined in the schema') + # check root expression + self._parse_filter_expression(root_type, query) # all tests passed return True - def _parse_numerical_expression(self, node: ast.filter.FilterExpression, subject: bsc.types._Vertex): - if isinstance(node, ast.filter.And): - return self._and(node, subject) - elif isinstance(node, ast.filter.Or): - return self._or(node, subject) - elif isinstance(node, ast.filter.LessThan): - return self._lessThan(node, subject) - elif isinstance(node, ast.filter.GreaterThan): - return self._greaterThan(node, subject) - elif isinstance(node, ast.filter.Equals): - return self._equals(node, subject, numerical=True) - else: - raise errors.ConsistencyError(f'Expected a numerical expression, found {node}') - - - def __branch(self, node: typing.Union[ast.filter.Any, ast.filter.And], subject: bsc.types._Vertex): - # subject is a node type - if not isinstance(subject, bsc.Node): - raise errors.ConsistencyError(f'Expected a node, found {subject}') - # subject exists in the schema - if subject not in self.schema.nodes: - raise errors.ConsistencyError(f'Invalid node type {subject}') - # predicate is valid - dom, rng = self._parse_predicate_expression(node.predicate) - # subject is a subtype of the predicate's domain - if not subject <= dom: - raise errors.ConsistencyError(f'Expected type {dom}, found {subject}') - # child expression is valid - self._parse_filter_expression(node.expr, rng) + ## routing methods + + def _parse_filter_expression(self, type_: T_VERTEX, node: ast.filter.FilterExpression): + """Route *node* to the handler of the respective FilterExpression subclass.""" + if isinstance(node, ast.filter.Is): + return self._is(type_, node) + if isinstance(node, ast.filter.Not): + return self._not(type_, node) + if isinstance(node, ast.filter.Has): + return self._has(type_, node) + if isinstance(node, (ast.filter.Any, ast.filter.All)): + return self._branch(type_, node) + if isinstance(node, (ast.filter.And, ast.filter.Or)): + return self._agg(type_, node) + if isinstance(node, (ast.filter.Equals, ast.filter.Substring, ast.filter.StartsWith, ast.filter.EndsWith)): + return self._value(type_, node) + if isinstance(node, (ast.filter.LessThan, ast.filter.GreaterThan)): + return self._bounded(type_, node) + # invalid node + raise errors.BackendError(f'expected filter expression, found {node}') + + def _parse_predicate_expression(self, node: ast.filter.PredicateExpression) -> typing.Tuple[T_VERTEX, T_VERTEX]: + """Route *node* to the handler of the respective PredicateExpression subclass.""" + if isinstance(node, ast.filter.Predicate): + return self._predicate(node) + if isinstance(node, ast.filter.OneOf): + return self._one_of(node) + # invalid node + raise errors.BackendError(f'expected predicate expression, found {node}') + + + ## predicate expressions + + def _predicate(self, node: ast.filter.Predicate) -> typing.Tuple[T_VERTEX, T_VERTEX]: + # predicate exists in the schema + if not self.schema.has_predicate(node.predicate): + raise errors.ConsistencyError(f'predicate {node.predicate} is not in the schema') + # determine domain and range + pred = self.schema.predicate(node.predicate) + dom, rng = pred.domain, pred.range + if rng is None: + # FIXME: It is a design error that Predicates can have a None range... + raise errors.BackendError(f'predicate {pred} has no range') + if node.reverse: + dom, rng = rng, dom # type: ignore [assignment] # variable re-use confuses mypy + # return domain and range + return dom, rng - def _any(self, node: ast.filter.Any, subject: bsc.types._Vertex): - return self.__branch(node, subject) + def _one_of(self, node: ast.filter.OneOf) -> typing.Tuple[T_VERTEX, T_VERTEX]: + # determine domain and range types + # NOTE: select the most specific domain and the most generic range + dom, rng = None, None + for pred in node: + # parse child expression + subdom, subrng = self._parse_predicate_expression(pred) + try: + # determine overall domain + if dom is None or subdom < dom: # pick most specific domain + dom = subdom + # domains must be related across all child expressions + if not subdom <= dom and not subdom >= dom: + raise errors.ConsistencyError(f'domains {subdom} and {dom} are not related') + except TypeError as err: # compared literal vs. node + raise errors.ConsistencyError(f'domains {subdom} and {dom} are not of the same type') from err - def _all(self, node: ast.filter.All, subject: bsc.types._Vertex): - return self.__branch(node, subject) + try: + # determine overall range + if rng is None or subrng > rng: # pick most generic range + rng = subrng + # ranges must be related across all child expressions + if not subrng <= rng and not subrng >= rng: + raise errors.ConsistencyError(f'ranges {subrng} and {rng} are not related') + except TypeError as err: # compared literal vs. node + raise errors.ConsistencyError(f'ranges {subrng} and {rng} are not of the same type') from err + # check domain and range + if dom is None or rng is None: + # OneOf guarantees at least one expression, these two cases cannot happen + raise errors.UnreachableError() + # return domain and range + return dom, rng - def __agg(self, node: typing.Union[ast.filter.And, ast.filter.Or], subject: bsc.types._Vertex): + ## intermediates + + def _branch(self, type_: T_VERTEX, node: ast.filter._Branch): + # type is a Node + if not isinstance(type_, bsc.Node): + raise errors.ConsistencyError(f'expected a Node, found {type_}') + # type exists in the schema + # FIXME: Isn't it actually guaranteed that the type (except the root type) is part of the schema? + # all types can be traced back to (a) root_type, (b) predicate, or (c) manually set (e.g. in _is). + # For (a), we do (and have to) perform a check. For (c), the code base should be consistent throughout + # the module, so this is an assumption that has to be ensured in schema.Schema. For (b), we know (and + # check) that the predicate is in the schema, hence all node/literals derived from it are also in the + # schema by construction of the schema.Schema class. So, why do we check this every time? + if type_ not in self.schema.nodes(): + raise errors.ConsistencyError(f'node {type_} is not in the schema') + # predicate is valid + dom, rng = self._parse_predicate_expression(node.predicate) + # type_ is a subtype of the predicate's domain + if not type_ <= dom: + raise errors.ConsistencyError(f'expected type {dom} or subtype thereof, found {type_}') + # child expression is valid + self._parse_filter_expression(rng, node.expr) + + def _agg(self, type_: T_VERTEX, node: ast.filter._Agg): for expr in node: # child expression is valid - self._parse_filter_expression(expr, subject) - - def _and(self, node: ast.filter.And, subject: bsc.types._Vertex): - return self.__agg(node, subject) - - def _or(self, node: ast.filter.Or, subject: bsc.types._Vertex): - return self.__agg(node, subject) - + self._parse_filter_expression(type_, expr) - def _not(self, node: ast.filter.Not, subject: bsc.types._Vertex): + def _not(self, type_: T_VERTEX, node: ast.filter.Not): # child expression is valid - self._parse_filter_expression(node.expr, subject) - - - def _has(self, node: ast.filter.Has, subject: bsc.types._Vertex): - # subject is a node type - if not isinstance(subject, bsc.Node): - raise errors.ConsistencyError(f'Expected a node, found {subject}') - # subject exists in the schema - if subject not in self.schema.nodes: - raise errors.ConsistencyError(f'Invalid node type {subject}') + self._parse_filter_expression(type_, node.expr) + + def _has(self, type_: T_VERTEX, node: ast.filter.Has): + # type is a Node + if not isinstance(type_, bsc.Node): + raise errors.ConsistencyError(f'expected a Node, found {type_}') + # type exists in the schema + if type_ not in self.schema.nodes(): + raise errors.ConsistencyError(f'node {type_} is not in the schema') # predicate is valid - dom, rng = self._parse_predicate_expression(node.predicate) - # subject is a subtype of the predicate's domain - if not subject <= dom: - raise errors.ConsistencyError(f'Expected type {dom}, found {subject}') + dom, _= self._parse_predicate_expression(node.predicate) + # type_ is a subtype of the predicate's domain + if not type_ <= dom: + raise errors.ConsistencyError(f'expected type {dom}, found {type_}') # node.count is a numerical expression - self._parse_numerical_expression(node.count, self.schema.literal(ns.xsd.numerical)) - - - def _equals(self, node: ast.filter.Equals, subject: bsc.types._Vertex, numerical: bool = False): - # subject is a literal - #if not isinstance(subject, bsc.Literal): - # raise errors.ConsistencyError(f'Expected a literal, found {subject}') - if isinstance(subject, bsc.Node): - # FIXME: How to handle this case? - # FIXME: How to check if a NodeType is acceptable? - # FIXME: Maybe use flags to control what is expected as node identifiers? - from bsfs.graph.nodes import Nodes # FIXME - if not isinstance(node.value, Nodes) and not isinstance(node.value, URI): - raise errors.ConsistencyError(f'Expected a Nodes or URI, found {node.value}') - elif isinstance(subject, bsc.Literal): - # literal exists in the schema - if subject not in self.schema.literals: - raise errors.ConsistencyError(f'Invalid literal type {subject}') - else: - # FIXME: - raise errors.ConsistencyError(f'Expected a literal, found {subject}') - # node.value is numeric (if requested) - if numerical and not isinstance(node.value, float) and not isinstance(node.value, int): - raise errors.ConsistencyError(f'Expected a numerical value (int or float), found {node.value}') - # NOTE: We cannot check if node.value agrees with the subject since we don't know - # all literal types, their hierarchy, and how the backend converts datatypes. - - - def _substring(self, node: ast.filter.Substring, subject: bsc.types._Vertex): - # subject is a literal - if not isinstance(subject, bsc.Literal): - raise errors.ConsistencyError(f'Expected a literal, found {subject}') - # literal exists in the schema - if subject not in self.schema.literals: - raise errors.ConsistencyError(f'Invalid literal type {subject}') - # node.value matches literal datatype - if not subject.is_a(ns.xsd.string): - raise errors.ConsistencyError(f'Expected a string literal, found {subject}') - - - def _lessThan(self, node: ast.filter.LessThan, subject: bsc.types._Vertex): - # subject is a literal - if not isinstance(subject, bsc.Literal): - raise errors.ConsistencyError(f'Expected a literal, found {subject}') - # literal exists in the schema - if subject not in self.schema.literals: - raise errors.ConsistencyError(f'Invalid literal type {subject}') - # subject is numerical - if not subject.is_a(ns.xsd.numerical): - raise errors.ConsistencyError(f'Expected a numerical literal, found {subject}') - - - def _greaterThan(self, node: ast.filter.GreaterThan, subject: bsc.types._Vertex): - # subject is a literal - if not isinstance(subject, bsc.Literal): - raise errors.ConsistencyError(f'Expected a literal, found {subject}') - # literal exists in the schema - if subject not in self.schema.literals: - raise errors.ConsistencyError(f'Invalid literal type {subject}') - # subject is numerical - if not subject.is_a(ns.xsd.numerical): - raise errors.ConsistencyError(f'Expected a numerical literal, found {subject}') - - - def _predicate(self, node: ast.filter.Predicate): - try: - # predicate exists in the schema - pred = self.schema.predicate(node.predicate) - except KeyError: - raise errors.ConsistencyError(f'') # FIXME - if node.reverse: - return pred.range, pred.domain - else: - return pred.domain, pred.range - + # FIXME: We have to ensure that ns.xsd.integer is always known in the schema! + self._parse_filter_expression(self.schema.literal(ns.xsd.integer), node.count) + + + ## conditions + + def _is(self, type_: T_VERTEX, node: ast.filter.Is): # pylint: disable=unused-argument # (node) + if not isinstance(type_, bsc.Node): + raise errors.ConsistencyError(f'expected a Node, found {type_}') + if type_ not in self.schema.nodes(): + raise errors.ConsistencyError(f'node {type_} is not in the schema') + + def _value(self, type_: T_VERTEX, node: ast.filter._Value): # pylint: disable=unused-argument # (node) + # type is a literal + if not isinstance(type_, bsc.Literal): + raise errors.ConsistencyError(f'expected a Literal, found {type_}') + # type exists in the schema + if type_ not in self.schema.literals(): + raise errors.ConsistencyError(f'literal {type_} is not in the schema') + # FIXME: Check if node.value corresponds to type_ + # FIXME: A specific literal might be requested (i.e., a numeric type when used in Has) + + def _bounded(self, type_: T_VERTEX, node: ast.filter._Bounded): # pylint: disable=unused-argument # (node) + # type is a literal + if not isinstance(type_, bsc.Literal): + raise errors.ConsistencyError(f'expected a Literal, found {type_}') + # type exists in the schema + if type_ not in self.schema.literals(): + raise errors.ConsistencyError(f'literal {type_} is not in the schema') + # FIXME: Check if node.value corresponds to type_ - def _oneOf(self, node: ast.filter.OneOf): - dom, rng = None, None - for pred in node: - try: - # parse child expression - subdom, subrng = self._parse_predicate_expression(pred) - # domain and range must be related across all child expressions - if not subdom <= dom and not subdom >= dom: - raise errors.ConsistencyError(f'') # FIXME - if not subrng <= rng and not subrng >= rng: - raise errors.ConsistencyError(f'') # FIXME - # determine overall domain and range - if dom is None or subdom < dom: # pick most specific domain - dom = subdom - if rng is None or subrng > rng: # pick most generic range - rng = subrng - except KeyError: - raise errors.ConsistencyError(f'') - return dom, rng ## EOF ## diff --git a/bsfs/utils/__init__.py b/bsfs/utils/__init__.py index 94680ee..6737cef 100644 --- a/bsfs/utils/__init__.py +++ b/bsfs/utils/__init__.py @@ -9,7 +9,7 @@ import typing # inner-module imports from . import errors -from .commons import typename +from .commons import typename, normalize_args from .uri import URI from .uuid import UUID, UCID @@ -19,6 +19,7 @@ __all__ : typing.Sequence[str] = ( 'URI', 'UUID', 'errors', + 'normalize_args', 'typename', ) diff --git a/bsfs/utils/commons.py b/bsfs/utils/commons.py index bad2fe0..e9f0b7f 100644 --- a/bsfs/utils/commons.py +++ b/bsfs/utils/commons.py @@ -5,10 +5,12 @@ A copy of the license is provided with the project. Author: Matthias Baumgartner, 2022 """ # imports +from collections import abc import typing # exports __all__: typing.Sequence[str] = ( + 'normalize_args', 'typename', ) @@ -19,5 +21,37 @@ def typename(obj) -> str: """Return the type name of *obj*.""" return type(obj).__name__ +# argument type in `normalize_args`. +ArgType = typing.TypeVar('ArgType') # pylint: disable=invalid-name # type vars don't follow the usual convention + +def normalize_args( + *args: typing.Union[ArgType, typing.Iterable[ArgType], typing.Iterator[ArgType]] + ) -> typing.Tuple[ArgType, ...]: + """Arguments to a function can be passed as individual arguments, list-like + structures, or iterables. This function processes any of these styles and + returns a tuple of the respective items. Typically used within a function + provide a flexible interface but sill have parameters in a normalized form. + + Examples: + + >>> normalize_args(0,1,2) + (1,2,3) + >>> normalize_args([0,1,2]) + (1,2,3) + >>> normalize_args(range(3)) + (1,2,3) + + """ + if len(args) == 0: # foo() + return tuple() + if len(args) > 1: # foo(0, 1, 2) + return tuple(args) # type: ignore [arg-type] # we assume that argument styles (arg vs. iterable) are not mixed. + if isinstance(args[0], abc.Iterator): # foo(iter([0,1,2])) + return tuple(args[0]) + if isinstance(args[0], abc.Iterable) and not isinstance(args[0], str): # foo([0, 1, 2]) + return tuple(args[0]) + # foo(0) + return (args[0], ) # type: ignore [return-value] # if args[0] is a str, we assume that ArgType was str. + ## EOF ## diff --git a/bsfs/utils/errors.py b/bsfs/utils/errors.py index c5e8e16..be9d40e 100644 --- a/bsfs/utils/errors.py +++ b/bsfs/utils/errors.py @@ -38,4 +38,7 @@ class UnreachableError(ProgrammingError): class ConfigError(_BSFSError): """User config issue.""" +class BackendError(_BSFSError): + """Could not parse an AST structure.""" + ## EOF ## -- cgit v1.2.3 From 73e39cb4967949025aefe874f401e27b0abb772c Mon Sep 17 00:00:00 2001 From: Matthias Baumgartner Date: Thu, 22 Dec 2022 20:29:57 +0100 Subject: filter ast parser and get method in sparql store --- bsfs/triple_store/base.py | 6 +- bsfs/triple_store/sparql/parse_filter.py | 307 +++++++++++++++++++++++++++++++ bsfs/triple_store/sparql/sparql.py | 51 ++++- 3 files changed, 357 insertions(+), 7 deletions(-) create mode 100644 bsfs/triple_store/sparql/parse_filter.py (limited to 'bsfs') diff --git a/bsfs/triple_store/base.py b/bsfs/triple_store/base.py index 5ff9523..7e03714 100644 --- a/bsfs/triple_store/base.py +++ b/bsfs/triple_store/base.py @@ -113,9 +113,11 @@ class TripleStoreBase(abc.ABC): def get( self, node_type: _schema.Node, - query: ast.filter.FilterExpression, + query: typing.Optional[ast.filter.FilterExpression] = None, ) -> typing.Iterator[URI]: - """Return guids of nodes of type *node_type* that match the *query*.""" + """Return guids of nodes of type *node_type* that match the *query*. + Return all guids of the respective type if *query* is None. + """ @abc.abstractmethod def exists( diff --git a/bsfs/triple_store/sparql/parse_filter.py b/bsfs/triple_store/sparql/parse_filter.py new file mode 100644 index 0000000..d4db0aa --- /dev/null +++ b/bsfs/triple_store/sparql/parse_filter.py @@ -0,0 +1,307 @@ +""" + +Part of the BlackStar filesystem (bsfs) module. +A copy of the license is provided with the project. +Author: Matthias Baumgartner, 2022 +""" +# imports +import typing + +# bsfs imports +from bsfs import schema as bsc +from bsfs.namespace import ns +from bsfs.query import ast +from bsfs.utils import URI, errors + +# exports +__all__: typing.Sequence[str] = ( + 'Filter', + ) + +class _GenHopName(): + """Generator that produces a new unique symbol name with each iteration.""" + + # Symbol name prefix. + prefix: str + + # Current counter. + curr: int + + def __init__(self, prefix: str = '?hop', start: int = 0): + self.prefix = prefix + self.curr = start - 1 + + def __next__(self): + """Generate and return the next unique name.""" + self.curr += 1 + return self.prefix + str(self.curr) + + +class Filter(): + """Translate `bsfs.query.ast.filter` structures into Sparql queries.""" + + # Current schema to validate against. + schema: bsc.Schema + + # Generator that produces unique symbol names. + ngen: _GenHopName + + # Vertex type. + T_VERTEX = typing.Union[bsc.Node, bsc.Literal] + + def __init__(self, schema): + self.schema = schema + self.ngen = _GenHopName() + + def __call__( + self, + root_type: bsc.Node, + root: typing.Optional[ast.filter.FilterExpression] = None, + ) -> str: + """ + """ + # check root_type + if not isinstance(root_type, bsc.Node): + raise errors.BackendError(f'expected Node, found {root_type}') + if root_type not in self.schema.nodes(): + raise errors.ConsistencyError(f'node {root_type} is not in the schema') + # parse root + if root is None: + cond = '' + else: + cond = self._parse_filter_expression(root_type, root, '?ent') + # assemble query + return f''' + SELECT ?ent + WHERE {{ + ?ent <{ns.rdf.type}>/<{ns.rdfs.subClassOf}>* <{root_type.uri}> . + {cond} + }} + ''' + + def _parse_filter_expression(self, type_: T_VERTEX, node: ast.filter.FilterExpression, head: str) -> str: + """Route *node* to the handler of the respective FilterExpression subclass.""" + if isinstance(node, ast.filter.Is): + return self._is(type_, node, head) + if isinstance(node, ast.filter.Not): + return self._not(type_, node, head) + if isinstance(node, ast.filter.Has): + return self._has(type_, node, head) + if isinstance(node, ast.filter.Any): + return self._any(type_, node, head) + if isinstance(node, ast.filter.All): + return self._all(type_, node, head) + if isinstance(node, ast.filter.And): + return self._and(type_, node, head) + if isinstance(node, ast.filter.Or): + return self._or(type_, node, head) + if isinstance(node, ast.filter.Equals): + return self._equals(type_, node, head) + if isinstance(node, ast.filter.Substring): + return self._substring(type_, node, head) + if isinstance(node, ast.filter.StartsWith): + return self._starts_with(type_, node, head) + if isinstance(node, ast.filter.EndsWith): + return self._ends_with(type_, node, head) + if isinstance(node, ast.filter.LessThan): + return self._less_than(type_, node, head) + if isinstance(node, ast.filter.GreaterThan): + return self._greater_than(type_, node, head) + # invalid node + raise errors.BackendError(f'expected filter expression, found {node}') + + def _parse_predicate_expression( + self, + type_: T_VERTEX, + node: ast.filter.PredicateExpression + ) -> typing.Tuple[str, T_VERTEX]: + """Route *node* to the handler of the respective PredicateExpression subclass.""" + if isinstance(node, ast.filter.Predicate): + return self._predicate(type_, node) + if isinstance(node, ast.filter.OneOf): + return self._one_of(type_, node) + # invalid node + raise errors.BackendError(f'expected predicate expression, found {node}') + + def _one_of(self, node_type: T_VERTEX, node: ast.filter.OneOf) -> typing.Tuple[str, T_VERTEX]: + """ + """ + if not isinstance(node_type, bsc.Node): + raise errors.BackendError(f'expected Node, found {node_type}') + # walk through predicates + suburi, rng = set(), None + for pred in node: # OneOf guarantees at least one expression + puri, subrng = self._parse_predicate_expression(node_type, pred) + # track predicate uris + suburi.add(puri) + try: + # check for more generic range + if rng is None or subrng > rng: + rng = subrng + # check range consistency + if not subrng <= rng and not subrng >= rng: + raise errors.ConsistencyError(f'ranges {subrng} and {rng} are not related') + except TypeError as err: # subrng and rng are not comparable + raise errors.ConsistencyError(f'ranges {subrng} and {rng} are not related') from err + if rng is None: + # for mypy to be certain of the rng type + # if rng were None, we'd have gotten a TypeError above (None > None) + raise errors.UnreachableError() + # return joint predicate expression and next range + return '|'.join(suburi), rng + + def _predicate(self, node_type: T_VERTEX, node: ast.filter.Predicate) -> typing.Tuple[str, T_VERTEX]: + """ + """ + # check node_type + if not isinstance(node_type, bsc.Node): + raise errors.BackendError(f'expected Node, found {node_type}') + # fetch predicate and its uri + puri = node.predicate + # get and check predicate, domain, and range + if not self.schema.has_predicate(puri): + raise errors.ConsistencyError(f'predicate {puri} is not in the schema') + pred = self.schema.predicate(puri) + if pred.range is None: + # FIXME: It is a design error that Predicates can have a None range... + raise errors.BackendError(f'predicate {pred} has no range') + dom, rng = pred.domain, pred.range + # encapsulate predicate uri + puri = f'<{puri}>' # type: ignore [assignment] # variable re-use confuses mypy + # apply reverse flag + if node.reverse: + puri = URI('^' + puri) + dom, rng = rng, dom # type: ignore [assignment] # variable re-use confuses mypy + # check path consistency + if not node_type <= dom: + raise errors.ConsistencyError(f'expected type {dom} or subtype thereof, found {node_type}') + # return predicate URI and next node type + return puri, rng + + def _any(self, node_type: T_VERTEX, node: ast.filter.Any, head: str) -> str: + """ + """ + if not isinstance(node_type, bsc.Node): + raise errors.BackendError(f'expected Node, found {node_type}') + # parse predicate + pred, next_type = self._parse_predicate_expression(node_type, node.predicate) + # parse expression + nexthead = next(self.ngen) + expr = self._parse_filter_expression(next_type, node.expr, nexthead) + # combine results + return f'{head} {pred} {nexthead} . {expr}' + + def _all(self, node_type: T_VERTEX, node: ast.filter.All, head: str) -> str: + """ + """ + # NOTE: All(P, E) := Not(Any(P, Not(E))) and EXISTS(P, ?) + if not isinstance(node_type, bsc.Node): + raise errors.BackendError(f'expected Node, found {node_type}') + # parse rewritten ast + expr = self._parse_filter_expression(node_type, + ast.filter.Not( + ast.filter.Any(node.predicate, + ast.filter.Not(node.expr))), head) + # parse predicate for existence constraint + pred, _ = self._parse_predicate_expression(node_type, node.predicate) + temphead = next(self.ngen) + # return existence and rewritten expression + return f'FILTER EXISTS {{ {head} {pred} {temphead} }} . ' + expr + + def _and(self, node_type: T_VERTEX, node: ast.filter.And, head: str) -> str: + """ + """ + sub = [self._parse_filter_expression(node_type, expr, head) for expr in node] + return ' . '.join(sub) + + def _or(self, node_type: T_VERTEX, node: ast.filter.Or, head: str) -> str: + """ + """ + # potential special case optimization: + # * ast: Or(Equals('foo'), Equals('bar'), ...) + # * query: VALUES ?head { "value1"^^<...> "value2"^^<...> "value3"^<...> ... } + sub = [self._parse_filter_expression(node_type, expr, head) for expr in node] + sub = ['{' + expr + '}' for expr in sub] + return ' UNION '.join(sub) + + def _not(self, node_type: T_VERTEX, node: ast.filter.Not, head: str) -> str: + """ + """ + expr = self._parse_filter_expression(node_type, node.expr, head) + if isinstance(node_type, bsc.Literal): + return f'MINUS {{ {expr} }}' + # NOTE: for bsc.Node types, we must include at least one expression in the body of MINUS, + # otherwise the connection between the context and body of MINUS is lost. + # The simplest (and non-interfering) choice is a type statement. + return f'MINUS {{ {head} <{ns.rdf.type}>/<{ns.rdfs.subClassOf}>* <{node_type.uri}> . {expr} }}' + + def _has(self, node_type: T_VERTEX, node: ast.filter.Has, head: str) -> str: + """ + """ + if not isinstance(node_type, bsc.Node): + raise errors.BackendError(f'expected Node, found {node_type}') + # parse predicate + pred, _ = self._parse_predicate_expression(node_type, node.predicate) + # get new heads + inner = next(self.ngen) + outer = next(self.ngen) + # predicate count expression (fetch number of predicates at *head*) + num_preds = f'{{ SELECT (COUNT(distinct {inner}) as {outer}) WHERE {{ {head} {pred} {inner} }} }}' + # count expression + # FIXME: We have to ensure that ns.xsd.integer is always known in the schema! + count_bounds = self._parse_filter_expression(self.schema.literal(ns.xsd.integer), node.count, outer) + # combine + return num_preds + ' . ' + count_bounds + + def _is(self, node_type: T_VERTEX, node: ast.filter.Is, head: str) -> str: + """ + """ + if not isinstance(node_type, bsc.Node): + raise errors.BackendError(f'expected Node, found {node_type}') + return f'VALUES {head} {{ <{node.value}> }}' + + def _equals(self, node_type: T_VERTEX, node: ast.filter.Equals, head: str) -> str: + """ + """ + if not isinstance(node_type, bsc.Literal): + raise errors.BackendError(f'expected Literal, found {node}') + return f'VALUES {head} {{ "{node.value}"^^<{node_type.uri}> }}' + + def _substring(self, node_type: T_VERTEX, node: ast.filter.Substring, head: str) -> str: + """ + """ + if not isinstance(node_type, bsc.Literal): + raise errors.BackendError(f'expected Literal, found {node_type}') + return f'FILTER contains(str({head}), "{node.value}")' + + def _starts_with(self, node_type: T_VERTEX, node: ast.filter.StartsWith, head: str) -> str: + """ + """ + if not isinstance(node_type, bsc.Literal): + raise errors.BackendError(f'expected Literal, found {node_type}') + return f'FILTER strstarts(str({head}), "{node.value}")' + + def _ends_with(self, node_type: T_VERTEX, node: ast.filter.EndsWith, head: str) -> str: + """ + """ + if not isinstance(node_type, bsc.Literal): + raise errors.BackendError(f'expected Literal, found {node_type}') + return f'FILTER strends(str({head}), "{node.value}")' + + def _less_than(self, node_type: T_VERTEX, node: ast.filter.LessThan, head: str) -> str: + """ + """ + if not isinstance(node_type, bsc.Literal): + raise errors.BackendError(f'expected Literal, found {node_type}') + equality = '=' if not node.strict else '' + return f'FILTER ({head} <{equality} {float(node.threshold)})' + + def _greater_than(self, node_type: T_VERTEX, node: ast.filter.GreaterThan, head: str) -> str: + """ + """ + if not isinstance(node_type, bsc.Literal): + raise errors.BackendError(f'expected Literal, found {node_type}') + equality = '=' if not node.strict else '' + return f'FILTER ({head} >{equality} {float(node.threshold)})' + +## EOF ## diff --git a/bsfs/triple_store/sparql/sparql.py b/bsfs/triple_store/sparql/sparql.py index 7172f34..c3cbff6 100644 --- a/bsfs/triple_store/sparql/sparql.py +++ b/bsfs/triple_store/sparql/sparql.py @@ -15,6 +15,7 @@ from bsfs.query import ast from bsfs.utils import errors, URI # inner-module imports +from . import parse_filter from .. import base @@ -86,11 +87,15 @@ class SparqlStore(base.TripleStoreBase): # The local schema. _schema: bsc.Schema + # Filter parser + _filter_parser: parse_filter.Filter + def __init__(self): super().__init__(None) self._graph = rdflib.Graph() self._transaction = _Transaction(self._graph) self._schema = bsc.Schema.Empty() + self._filter_parser = parse_filter.Filter(self._schema) # NOTE: mypy and pylint complain about the **kwargs not being listed (contrasting super) # However, not having it here is clearer since it's explicit that there are no arguments. @@ -127,10 +132,17 @@ class SparqlStore(base.TripleStoreBase): # get deleted classes sub = self.schema - schema - # remove predicate instances for pred in sub.predicates: + # remove predicate instances for src, trg in self._graph.subject_objects(rdflib.URIRef(pred.uri)): self._transaction.remove((src, rdflib.URIRef(pred.uri), trg)) + # remove predicate definition + if pred.parent is not None: + self._transaction.remove(( + rdflib.URIRef(pred.uri), + rdflib.RDFS.subClassOf, + rdflib.URIRef(pred.parent.uri), + )) # remove node instances for node in sub.nodes: @@ -144,17 +156,46 @@ class SparqlStore(base.TripleStoreBase): self._transaction.remove((inst, pred, trg)) # remove instance self._transaction.remove((inst, rdflib.RDF.type, rdflib.URIRef(node.uri))) - - # NOTE: Nothing to do for literals + # remove node definition + if node.parent is not None: + self._transaction.remove(( + rdflib.URIRef(node.uri), + rdflib.RDFS.subClassOf, + rdflib.URIRef(node.parent.uri), + )) + + for lit in sub.literals: + # remove literal definition + if lit.parent is not None: + self._transaction.remove(( + rdflib.URIRef(lit.uri), + rdflib.RDFS.subClassOf, + rdflib.URIRef(lit.parent.uri), + )) + + # add predicate, node, and literal hierarchies to the graph + for itm in itertools.chain(schema.predicates(), schema.nodes(), schema.literals()): + if itm.parent is not None: + self._transaction.add((rdflib.URIRef(itm.uri), rdflib.RDFS.subClassOf, rdflib.URIRef(itm.parent.uri))) # commit instance changes self.commit() # migrate schema self._schema = schema + self._filter_parser.schema = schema - def get(self, node_type: bsc.Node, query: ast.filter.FilterExpression) -> typing.Iterator[URI]: - raise NotImplementedError() + def get( + self, + node_type: bsc.Node, + query: typing.Optional[ast.filter.FilterExpression] = None, + ) -> typing.Iterator[URI]: + if node_type not in self.schema.nodes(): + raise errors.ConsistencyError(f'{node_type} is not defined in the schema') + if not isinstance(query, ast.filter.FilterExpression): + raise TypeError(query) + for guid, in self._graph.query(self._filter_parser(node_type, query)): + yield URI(guid) def _has_type(self, subject: URI, node_type: bsc.Node) -> bool: """Return True if *subject* is a node of class *node_type* or a subclass thereof.""" -- cgit v1.2.3 From ca7ee6c59d2eb3f4ec4d16e392d12d946cd85e4d Mon Sep 17 00:00:00 2001 From: Matthias Baumgartner Date: Thu, 22 Dec 2022 20:33:00 +0100 Subject: filter-ast based get interface in graph. * Graph interface: Graph.get added * Node instance resolver so that Nodes can be used in a filter ast * AC interface: filter_read added to interface * upstream test adjustments of previous sparql store changes --- bsfs/graph/ac/base.py | 4 ++ bsfs/graph/ac/null.py | 5 ++ bsfs/graph/graph.py | 28 +++++++-- bsfs/graph/resolve.py | 161 ++++++++++++++++++++++++++++++++++++++++++++++++++ 4 files changed, 193 insertions(+), 5 deletions(-) create mode 100644 bsfs/graph/resolve.py (limited to 'bsfs') diff --git a/bsfs/graph/ac/base.py b/bsfs/graph/ac/base.py index bc9aeb3..0703e2e 100644 --- a/bsfs/graph/ac/base.py +++ b/bsfs/graph/ac/base.py @@ -10,6 +10,7 @@ import typing # bsfs imports from bsfs import schema +from bsfs.query import ast from bsfs.triple_store import TripleStoreBase from bsfs.utils import URI @@ -67,5 +68,8 @@ class AccessControlBase(abc.ABC): def createable(self, node_type: schema.Node, guids: typing.Iterable[URI]) -> typing.Iterable[URI]: """Return nodes that are allowed to be created.""" + @abc.abstractmethod + def filter_read(self, node_type: schema.Node, query: ast.filter.FilterExpression) -> ast.filter.FilterExpression: + """Re-write a filter *query* to get (i.e., read) *node_type* nodes.""" ## EOF ## diff --git a/bsfs/graph/ac/null.py b/bsfs/graph/ac/null.py index 36838bd..12b4e87 100644 --- a/bsfs/graph/ac/null.py +++ b/bsfs/graph/ac/null.py @@ -10,6 +10,7 @@ import typing # bsfs imports from bsfs import schema from bsfs.namespace import ns +from bsfs.query import ast from bsfs.utils import URI # inner-module imports @@ -49,4 +50,8 @@ class NullAC(base.AccessControlBase): """Return nodes that are allowed to be created.""" return guids + def filter_read(self, node_type: schema.Node, query: ast.filter.FilterExpression) -> ast.filter.FilterExpression: + """Re-write a filter *query* to get (i.e., read) *node_type* nodes.""" + return query + ## EOF ## diff --git a/bsfs/graph/graph.py b/bsfs/graph/graph.py index 51fe75d..f030fed 100644 --- a/bsfs/graph/graph.py +++ b/bsfs/graph/graph.py @@ -9,13 +9,15 @@ import os import typing # bsfs imports -from bsfs.query import ast +from bsfs.query import ast, validate from bsfs.schema import Schema from bsfs.triple_store import TripleStoreBase from bsfs.utils import URI, typename # inner-module imports +from . import ac from . import nodes as _nodes +from . import resolve # exports __all__: typing.Sequence[str] = ( @@ -44,6 +46,9 @@ class Graph(): def __init__(self, backend: TripleStoreBase, user: URI): self._backend = backend self._user = user + self._resolver = resolve.Filter(self._backend.schema) + self._validate = validate.Filter(self._backend.schema) + self._ac = ac.NullAC(self._backend, self._user) # ensure Graph schema requirements self.migrate(self._backend.schema) @@ -85,6 +90,9 @@ class Graph(): # migrate schema in backend # FIXME: consult access controls! self._backend.schema = schema + # re-initialize members + self._resolver.schema = self.schema + self._validate.schema = self.schema # return self return self @@ -108,11 +116,21 @@ class Graph(): *node_type*) once some data is assigned to them. """ - type_ = self.schema.node(node_type) - return _nodes.Nodes(self._backend, self._user, type_, {guid}) + return self.nodes(node_type, {guid}) - def get(self, node_type: URI, subject: ast.filter.FilterExpression) -> _nodes.Nodes: + def get(self, node_type: URI, query: ast.filter.FilterExpression) -> _nodes.Nodes: # FIXME: How about empty query? """Return a `Nodes` instance over all nodes of type *node_type* that match the *subject* query.""" - raise NotImplementedError() + # get node type + type_ = self.schema.node(node_type) + # resolve Nodes instances + query = self._resolver(type_, query) + # add access controls to query + query = self._ac.filter_read(type_, query) + # validate query + self._validate(type_, query) + # query the backend + guids = self._backend.get(type_, query) # no need to materialize + # return Nodes instance + return _nodes.Nodes(self._backend, self._user, type_, guids) ## EOF ## diff --git a/bsfs/graph/resolve.py b/bsfs/graph/resolve.py new file mode 100644 index 0000000..feb0855 --- /dev/null +++ b/bsfs/graph/resolve.py @@ -0,0 +1,161 @@ +""" + +Part of the BlackStar filesystem (bsfs) module. +A copy of the license is provided with the project. +Author: Matthias Baumgartner, 2022 +""" +# imports +import typing + +# bsfs imports +from bsfs import schema as bsc +from bsfs.query import ast +from bsfs.utils import errors + +# inner-module imports +from . import nodes + +# exports +__all__: typing.Sequence[str] = ( + 'Filter', + ) + + +## code ## + +class Filter(): + """Rewrites the query to replace `bsfs.graph.nodes.Nodes` instances with the respective URI. + Does only limited type checking and schema validation. + Use `bsfs.schema.validate.Filter` to do so. + + Example: + input: Any(ns.bse.tag, Is(Nodes(...))) + output: Any(ns.bse.tag, Or(Is(...), Is(...), ...))) + + >>> tags = graph.node(ns.bsfs.Tag, 'http://example.com/me/tag#1234') + >>> graph.get(ns.bsfs.Entity, ast.filter.Any(ns.bse.tag, ast.filter.Is(tags))) + + """ + + T_VERTEX = typing.Union[bsc.Node, bsc.Literal] + + def __init__(self, schema): + self.schema = schema + + def __call__(self, root_type: bsc.Node, node: ast.filter.FilterExpression): + return self._parse_filter_expression(root_type, node) + + def _parse_filter_expression( + self, + type_: T_VERTEX, + node: ast.filter.FilterExpression, + ) -> ast.filter.FilterExpression: + """Route *node* to the handler of the respective FilterExpression subclass.""" + if isinstance(node, ast.filter.Is): + return self._is(type_, node) + if isinstance(node, ast.filter.Not): + return self._not(type_, node) + if isinstance(node, ast.filter.Has): + return self._has(type_, node) + if isinstance(node, ast.filter.Any): + return self._any(type_, node) + if isinstance(node, ast.filter.All): + return self._all(type_, node) + if isinstance(node, ast.filter.And): + return self._and(type_, node) + if isinstance(node, ast.filter.Or): + return self._or(type_, node) + if isinstance(node, (ast.filter.Equals, ast.filter.Substring, \ + ast.filter.StartsWith, ast.filter.EndsWith)): + return self._value(type_, node) + if isinstance(node, (ast.filter.LessThan, ast.filter.GreaterThan)): + return self._bounded(type_, node) + # invalid node + raise errors.BackendError(f'expected filter expression, found {node}') + + def _parse_predicate_expression(self, node: ast.filter.PredicateExpression) -> T_VERTEX: + """Route *node* to the handler of the respective PredicateExpression subclass.""" + if isinstance(node, ast.filter.Predicate): + return self._predicate(node) + if isinstance(node, ast.filter.OneOf): + return self._one_of(node) + # invalid node + raise errors.BackendError(f'expected predicate expression, found {node}') + + def _predicate(self, node: ast.filter.Predicate) -> T_VERTEX: + if not self.schema.has_predicate(node.predicate): + raise errors.ConsistencyError(f'predicate {node.predicate} is not in the schema') + pred = self.schema.predicate(node.predicate) + dom, rng = pred.domain, pred.range + if node.reverse: + dom, rng = rng, dom + return rng + + def _one_of(self, node: ast.filter.OneOf) -> T_VERTEX: + # determine domain and range types + rng = None + for pred in node: + # parse child expression + subrng = self._parse_predicate_expression(pred) + # determine the next type + try: + if rng is None or subrng > rng: # pick most generic range + rng = subrng + except TypeError as err: + raise errors.ConsistencyError(f'ranges {subrng} and {rng} are not related') from err + if rng is None: + raise errors.UnreachableError() + return rng + + def _any(self, type_: T_VERTEX, node: ast.filter.Any) -> ast.filter.Any: # pylint: disable=unused-argument + next_type = self._parse_predicate_expression(node.predicate) + return ast.filter.Any(node.predicate, self._parse_filter_expression(next_type, node.expr)) + + def _all(self, type_: T_VERTEX, node: ast.filter.All) -> ast.filter.All: # pylint: disable=unused-argument + next_type = self._parse_predicate_expression(node.predicate) + return ast.filter.All(node.predicate, self._parse_filter_expression(next_type, node.expr)) + + def _and(self, type_: T_VERTEX, node: ast.filter.And) -> ast.filter.And: + return ast.filter.And({self._parse_filter_expression(type_, expr) for expr in node}) + + def _or(self, type_: T_VERTEX, node: ast.filter.Or) -> ast.filter.Or: + return ast.filter.Or({self._parse_filter_expression(type_, expr) for expr in node}) + + def _not(self, type_: T_VERTEX, node: ast.filter.Not) -> ast.filter.Not: + return ast.filter.Not(self._parse_filter_expression(type_, node.expr)) + + def _has(self, type_: T_VERTEX, node: ast.filter.Has) -> ast.filter.Has: # pylint: disable=unused-argument + return node + + def _value(self, type_: T_VERTEX, node: ast.filter._Value) -> ast.filter._Value: # pylint: disable=unused-argument + return node + + def _bounded(self, type_: T_VERTEX, node: ast.filter._Bounded) -> ast.filter._Bounded: # pylint: disable=unused-argument + return node + + def _is(self, type_: T_VERTEX, node: ast.filter.Is) -> typing.Union[ast.filter.Or, ast.filter.Is]: + # check if action is needed + if not isinstance(node.value, nodes.Nodes): + return node + # check schema consistency + if node.value.node_type not in self.schema.nodes(): + raise errors.ConsistencyError(f'node {node.value.node_type} is not in the schema') + # check type compatibility + if not isinstance(type_, bsc.Node): + raise errors.ConsistencyError(f'expected a node, found {type_}') + if not node.value.node_type <= type_: + raise errors.ConsistencyError(f'expected type {type_} or subtype thereof, found {node.value.node_type}') + # NOTE: We assume that the node type is checked when writing to the backend. + # Links to any of the guids can therefore only exist if the type matches. + # Hence, we don't add a type check/constrain here. + return ast.filter.Or(ast.filter.Is(guid) for guid in node.value.guids) + # optimized code, removing unnecessary ast.filter.Or + #guids = set(node.value.guids) + #if len(guids) == 0: + # raise errors.BackendError(f'') + #if len(guids) == 1: + # return ast.filter.Nodeid(next(iter(guids))) + #return ast.filter.Or(ast.filter.Is(guid) for guid in guids) + + +## EOF ## -- cgit v1.2.3 From 7f5a2920ef311b2077300714d7700313077a0bf6 Mon Sep 17 00:00:00 2001 From: Matthias Baumgartner Date: Thu, 22 Dec 2022 20:35:38 +0100 Subject: cosmetic changes --- bsfs/graph/nodes.py | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) (limited to 'bsfs') diff --git a/bsfs/graph/nodes.py b/bsfs/graph/nodes.py index c417a0e..5a93f77 100644 --- a/bsfs/graph/nodes.py +++ b/bsfs/graph/nodes.py @@ -53,7 +53,7 @@ class Nodes(): self._user = user self._node_type = node_type self._guids = set(guids) - self.__ac = ac.NullAC(self._backend, self._user) + self._ac = ac.NullAC(self._backend, self._user) def __eq__(self, other: typing.Any) -> bool: return isinstance(other, Nodes) \ @@ -135,7 +135,7 @@ class Nodes(): # FIXME: Needed? Could be integrated into other AC methods (by passing the predicate!) # This could allow more fine-grained predicate control (e.g. based on ownership) # rather than a global approach like this. - if self.__ac.is_protected_predicate(pred): + if self._ac.is_protected_predicate(pred): raise errors.PermissionDeniedError(pred) # set operation affects all nodes (if possible) @@ -149,7 +149,7 @@ class Nodes(): # check write permissions on existing nodes # As long as the user has write permissions, we don't restrict # the creation or modification of literal values. - guids = set(self.__ac.write_literal(node_type, guids)) + guids = set(self._ac.write_literal(node_type, guids)) # insert literals # TODO: Support passing iterators as values for non-unique predicates @@ -172,14 +172,14 @@ class Nodes(): # Link permissions cover adding and removing links on the source node. # Specifically, link permissions also allow to remove links to other # nodes if needed (e.g. for unique predicates). - guids = set(self.__ac.link_from_node(node_type, guids)) + guids = set(self._ac.link_from_node(node_type, guids)) # get link targets targets = set(value.guids) # ensure existence of value nodes; create nodes if need be targets = set(self._ensure_nodes(value.node_type, targets)) # check link permissions on target nodes - targets = set(self.__ac.link_to_node(value.node_type, targets)) + targets = set(self._ac.link_to_node(value.node_type, targets)) # insert node links self._backend.set( @@ -203,14 +203,14 @@ class Nodes(): # create nodes if need be if len(missing) > 0: # check which missing nodes can be created - missing = set(self.__ac.createable(node_type, missing)) + missing = set(self._ac.createable(node_type, missing)) # create nodes self._backend.create(node_type, missing) # add bookkeeping triples self._backend.set(node_type, missing, self._backend.schema.predicate(ns.bsm.t_created), [time.time()]) # add permission triples - self.__ac.create(node_type, missing) + self._ac.create(node_type, missing) # return available nodes return existing | missing -- cgit v1.2.3 From c664d19e7d4a0aa0762c30a72ae238cf818891ab Mon Sep 17 00:00:00 2001 From: Matthias Baumgartner Date: Wed, 11 Jan 2023 21:20:47 +0100 Subject: Feature support in the schema * Type annotations * Feature type * Moved from_string from Schema to its own file/function * Root predicate has a valid (not-None) range * ROOT_... export in schema.types * Empty as the default Schema constructor * Schema loads some additional default symbols * _Type instances compare along class hierarchy --- bsfs/schema/__init__.py | 5 +- bsfs/schema/schema.py | 111 +++++----------------------- bsfs/schema/serialize.py | 143 ++++++++++++++++++++++++++++++++++++ bsfs/schema/types.py | 183 ++++++++++++++++++++++++++++++++++++++++------- 4 files changed, 325 insertions(+), 117 deletions(-) create mode 100644 bsfs/schema/serialize.py (limited to 'bsfs') diff --git a/bsfs/schema/__init__.py b/bsfs/schema/__init__.py index ad4d456..dc24313 100644 --- a/bsfs/schema/__init__.py +++ b/bsfs/schema/__init__.py @@ -9,7 +9,8 @@ import typing # inner-module imports from .schema import Schema -from .types import Literal, Node, Predicate +from .serialize import from_string, to_string +from .types import Literal, Node, Predicate, _Vertex # FIXME: _Vertex # exports __all__: typing.Sequence[str] = ( @@ -17,6 +18,8 @@ __all__: typing.Sequence[str] = ( 'Node', 'Predicate', 'Schema', + 'from_string', + 'to_string', ) ## EOF ## diff --git a/bsfs/schema/schema.py b/bsfs/schema/schema.py index c5d4571..1c4c807 100644 --- a/bsfs/schema/schema.py +++ b/bsfs/schema/schema.py @@ -51,11 +51,13 @@ class Schema(): def __init__( self, - predicates: typing.Iterable[types.Predicate], + predicates: typing.Optional[typing.Iterable[types.Predicate]] = None, nodes: typing.Optional[typing.Iterable[types.Node]] = None, literals: typing.Optional[typing.Iterable[types.Literal]] = None, ): # materialize arguments + if predicates is None: + predicates = set() if nodes is None: nodes = set() if literals is None: @@ -63,24 +65,36 @@ class Schema(): nodes = set(nodes) literals = set(literals) predicates = set(predicates) + + # add root types to the schema + nodes.add(types.ROOT_NODE) + literals.add(types.ROOT_LITERAL) + predicates.add(types.ROOT_PREDICATE) + # add minimally necessary types to the schema + literals.add(types.ROOT_NUMBER) + predicates.add(types.ROOT_FEATURE) + # include parents in predicates set # TODO: review type annotations and ignores for python >= 3.11 (parents is _Type but should be typing.Self) predicates |= {par for pred in predicates for par in pred.parents()} # type: ignore [misc] # include predicate domain in nodes set nodes |= {pred.domain for pred in predicates} # include predicate range in nodes and literals sets - prange = {pred.range for pred in predicates if pred.range is not None} + prange = {pred.range for pred in predicates} nodes |= {vert for vert in prange if isinstance(vert, types.Node)} literals |= {vert for vert in prange if isinstance(vert, types.Literal)} + # NOTE: ROOT_PREDICATE has a _Vertex as range which is neither in nodes nor literals + # FIXME: with the ROOT_VERTEX missing, the schema is not complete anymore! + # include parents in nodes and literals sets - # NOTE: Must be done after predicate domain/range was handled - # so that their parents are included as well. + # NOTE: Must come after predicate domain/range was handled to have their parents as well. nodes |= {par for node in nodes for par in node.parents()} # type: ignore [misc] literals |= {par for lit in literals for par in lit.parents()} # type: ignore [misc] # assign members self._nodes = {node.uri: node for node in nodes} self._literals = {lit.uri: lit for lit in literals} self._predicates = {pred.uri: pred for pred in predicates} + # verify unique uris if len(nodes) != len(self._nodes): raise errors.ConsistencyError('inconsistent nodes') @@ -214,6 +228,7 @@ class Schema(): >>> Schema.Union([a, b, c]) """ + # FIXME: copy type annotations? if len(args) == 0: raise TypeError('Schema.Union requires at least one argument (Schema or Iterable)') if isinstance(args[0], cls): # args is sequence of Schema instances @@ -295,92 +310,4 @@ class Schema(): """Return the Literal matching the *uri*.""" return self._literals[uri] - - ## constructors ## - - - @classmethod - def Empty(cls) -> 'Schema': # pylint: disable=invalid-name # capitalized classmethod - """Return a minimal Schema.""" - node = types.Node(ns.bsfs.Node, None) - literal = types.Literal(ns.bsfs.Literal, None) - predicate = types.Predicate( - uri=ns.bsfs.Predicate, - parent=None, - domain=node, - range=None, - unique=False, - ) - return cls((predicate, ), (node, ), (literal, )) - - - @classmethod - def from_string(cls, schema: str) -> 'Schema': # pylint: disable=invalid-name # capitalized classmethod - """Load and return a Schema from a string.""" - # parse string into rdf graph - graph = rdflib.Graph() - graph.parse(data=schema, format='turtle') - - def _fetch_hierarchically(factory, curr): - # emit current node - yield curr - # walk through childs - for child in graph.subjects(rdflib.URIRef(ns.rdfs.subClassOf), rdflib.URIRef(curr.uri)): - # convert to URI - child = URI(child) - # check circular dependency - if child == curr.uri or child in {node.uri for node in curr.parents()}: - raise errors.ConsistencyError('circular dependency') - # recurse and emit (sub*)childs - yield from _fetch_hierarchically(factory, factory(child, curr)) - - # fetch nodes - nodes = set(_fetch_hierarchically(types.Node, types.Node(ns.bsfs.Node, None))) - nodes_lut = {node.uri: node for node in nodes} - if len(nodes_lut) != len(nodes): - raise errors.ConsistencyError('inconsistent nodes') - - # fetch literals - literals = set(_fetch_hierarchically(types.Literal, types.Literal(ns.bsfs.Literal, None))) - literals_lut = {lit.uri: lit for lit in literals} - if len(literals_lut) != len(literals): - raise errors.ConsistencyError('inconsistent literals') - - # fetch predicates - def build_predicate(uri, parent): - uri = rdflib.URIRef(uri) - # get domain - domains = set(graph.objects(uri, rdflib.RDFS.domain)) - if len(domains) != 1: - raise errors.ConsistencyError(f'inconsistent domain: {domains}') - dom = nodes_lut.get(next(iter(domains))) - if dom is None: - raise errors.ConsistencyError('missing domain') - # get range - ranges = set(graph.objects(uri, rdflib.RDFS.range)) - if len(ranges) != 1: - raise errors.ConsistencyError(f'inconsistent range: {ranges}') - rng = next(iter(ranges)) - rng = nodes_lut.get(rng, literals_lut.get(rng)) - if rng is None: - raise errors.ConsistencyError('missing range') - # get unique flag - uniques = set(graph.objects(uri, rdflib.URIRef(ns.bsfs.unique))) - if len(uniques) != 1: - raise errors.ConsistencyError(f'inconsistent unique flags: {uniques}') - unique = bool(next(iter(uniques))) - # build Predicate - return types.Predicate(URI(uri), parent, dom, rng, unique) - - root_predicate = types.Predicate( - uri=ns.bsfs.Predicate, - parent=None, - domain=nodes_lut[ns.bsfs.Node], - range=None, # FIXME: Unclear how to handle this! Can be either a Literal or a Node - unique=False, - ) - predicates = _fetch_hierarchically(build_predicate, root_predicate) - # return Schema - return cls(predicates, nodes, literals) - ## EOF ## diff --git a/bsfs/schema/serialize.py b/bsfs/schema/serialize.py new file mode 100644 index 0000000..1222aa6 --- /dev/null +++ b/bsfs/schema/serialize.py @@ -0,0 +1,143 @@ +""" + +Part of the BlackStar filesystem (bsfs) module. +A copy of the license is provided with the project. +Author: Matthias Baumgartner, 2022 +""" +# standard imports +from collections import abc +import itertools +import typing + +# external imports +import rdflib + +# bsfs imports +from bsfs.namespace import ns +from bsfs.utils import errors, URI, typename + +# inner-module imports +from . import types +from . import schema + +# exports +__all__: typing.Sequence[str] = ( + 'to_string', + 'from_string', + ) + + +## code ## + +def from_string(schema_str: str) -> schema.Schema: + """Load and return a Schema from a string.""" + # parse string into rdf graph + graph = rdflib.Graph() + graph.parse(data=schema_str, format='turtle') + + # helper functions + def _convert(value): + """Convert the subject type from rdflib to a bsfs native type.""" + if isinstance(value, rdflib.Literal): + return value.value + if isinstance(value, rdflib.URIRef): + return URI(value) + raise errors.BackendError(f'expected Literal or URIRef, found {typename(value)}') + + def _fetch_hierarchically(factory, curr): + """Walk through a rdfs:subClassOf hierarchy, creating symbols along the way.""" + # emit current node + yield curr + # walk through childs + for child in graph.subjects(rdflib.URIRef(ns.rdfs.subClassOf), rdflib.URIRef(curr.uri)): + # fetch annotations + annotations = { + URI(pred): _convert(value) + for pred, value # FIXME: preserve datatype of value?! + in graph.predicate_objects(child) + if URI(pred) != ns.rdfs.subClassOf + } + # convert child to URI + child = URI(child) + # check circular dependency + if child == curr.uri or child in {node.uri for node in curr.parents()}: + raise errors.ConsistencyError('circular dependency') + # recurse and emit (sub*)childs + yield from _fetch_hierarchically(factory, factory(child, curr, **annotations)) + + # fetch nodes + nodes = set(_fetch_hierarchically(types.Node, types.ROOT_NODE)) + nodes_lut = {node.uri: node for node in nodes} + if len(nodes_lut) != len(nodes): + raise errors.ConsistencyError('inconsistent nodes') + + # fetch literals + literals = set(_fetch_hierarchically(types.Literal, types.ROOT_LITERAL)) + literals_lut = {lit.uri: lit for lit in literals} + if len(literals_lut) != len(literals): + raise errors.ConsistencyError('inconsistent literals') + + # fetch predicates + # FIXME: type annotation + def _fetch_value(subject: URI, predicate: rdflib.URIRef, value_factory) -> typing.Optional[typing.Any]: + """Fetch the object of a given subject and predicate. Raises a `errors.ConsistencyError` if multiple objects match.""" + values = list(graph.objects(rdflib.URIRef(subject), predicate)) + if len(values) == 0: + return None + elif len(values) == 1: + return value_factory(values[0]) + else: + raise errors.ConsistencyError(f'{subject} has multiple values for predicate {str(predicate)}, expected zero or one') + + def _build_predicate(uri, parent, **annotations): + """Predicate factory.""" + # break out on root feature type + if uri == types.ROOT_FEATURE.uri: + return types.ROOT_FEATURE + # clean annotations + annotations.pop(ns.rdfs.domain, None) + annotations.pop(ns.rdfs.range, None) + annotations.pop(ns.bsfs.unique, None) + # get domain + dom = _fetch_value(uri, rdflib.RDFS.domain, URI) + if dom is not None and dom not in nodes_lut: + raise errors.ConsistencyError(f'predicate {uri} has undefined domain {dom}') + elif dom is not None: + dom = nodes_lut[dom] + # get range + rng = _fetch_value(uri, rdflib.RDFS.range, URI) + if rng is not None and rng not in nodes_lut and rng not in literals_lut: + raise errors.ConsistencyError(f'predicate {uri} has undefined range {rng}') + elif rng is not None: + rng = nodes_lut.get(rng, literals_lut.get(rng)) + # get unique + unique = _fetch_value(uri, rdflib.URIRef(ns.bsfs.unique), bool) + # handle feature types + if isinstance(parent, types.Feature): + # clean annotations + annotations.pop(ns.bsfs.dimension, None) + annotations.pop(ns.bsfs.dtype, None) + annotations.pop(ns.bsfs.distance, None) + # get dimension + dimension = _fetch_value(uri, rdflib.URIRef(ns.bsfs.dimension), int) + # get dtype + dtype = _fetch_value(uri, rdflib.URIRef(ns.bsfs.dtype), URI) + # get distance + distance = _fetch_value(uri, rdflib.URIRef(ns.bsfs.distance), URI) + # return feature + return parent.get_child(URI(uri), domain=dom, range=rng, unique=unique, + dtype=dtype, dimension=dimension, distance=distance, **annotations) + # handle non-feature predicate + return parent.get_child(URI(uri), domain=dom, range=rng, unique=unique, **annotations) + predicates = _fetch_hierarchically(_build_predicate, types.ROOT_PREDICATE) + + return schema.Schema(predicates, nodes, literals) + + + +def to_string(schema_inst: schema.Schema) -> str: + """ + """ + raise NotImplementedError() + +## EOF ## diff --git a/bsfs/schema/types.py b/bsfs/schema/types.py index 54a7e99..e737263 100644 --- a/bsfs/schema/types.py +++ b/bsfs/schema/types.py @@ -8,6 +8,7 @@ Author: Matthias Baumgartner, 2022 import typing # bsfs imports +from bsfs.namespace import ns from bsfs.utils import errors, URI, typename # exports @@ -15,6 +16,7 @@ __all__: typing.Sequence[str] = ( 'Literal', 'Node', 'Predicate', + 'Feature', ) @@ -99,9 +101,11 @@ class _Type(): self, uri: URI, parent: typing.Optional['_Type'] = None, + **annotations: typing.Any, ): self.uri = uri self.parent = parent + self.annotations = annotations def parents(self) -> typing.Generator['_Type', None, None]: """Generate a list of parent nodes.""" @@ -110,9 +114,17 @@ class _Type(): yield curr curr = curr.parent - def get_child(self, uri: URI, **kwargs): + def get_child( + self, + uri: URI, + **kwargs, + ): """Return a child of the current class.""" - return type(self)(uri, self, **kwargs) + return type(self)( + uri=uri, + parent=self, + **kwargs + ) def __str__(self) -> str: return f'{typename(self)}({self.uri})' @@ -138,7 +150,7 @@ class _Type(): def __lt__(self, other: typing.Any) -> bool: """Return True iff *self* is a true subclass of *other*.""" - if not type(self) is type(other): # type mismatch # pylint: disable=unidiomatic-typecheck + if not isinstance(other, type(self)): return NotImplemented if self.uri == other.uri: # equivalence return False @@ -151,7 +163,7 @@ class _Type(): def __le__(self, other: typing.Any) -> bool: """Return True iff *self* is equivalent or a subclass of *other*.""" - if not type(self) is type(other): # type mismatch # pylint: disable=unidiomatic-typecheck + if not isinstance(other, type(self)): return NotImplemented if self.uri == other.uri: # equivalence return True @@ -164,7 +176,7 @@ class _Type(): def __gt__(self, other: typing.Any) -> bool: """Return True iff *self* is a true superclass of *other*.""" - if not type(self) is type(other): # type mismatch # pylint: disable=unidiomatic-typecheck + if not isinstance(other, type(self)): return NotImplemented if self.uri == other.uri: # equivalence return False @@ -177,7 +189,7 @@ class _Type(): def __ge__(self, other: typing.Any) -> bool: """Return True iff *self* is eqiuvalent or a superclass of *other*.""" - if not type(self) is type(other): # type mismatch # pylint: disable=unidiomatic-typecheck + if not isinstance(other, type(self)): return NotImplemented if self.uri == other.uri: # equivalence return True @@ -191,30 +203,33 @@ class _Type(): class _Vertex(_Type): """Graph vertex types. Can be a Node or a Literal.""" - def __init__(self, uri: URI, parent: typing.Optional['_Vertex']): - super().__init__(uri, parent) + parent: typing.Optional['_Vertex'] + def __init__(self, uri: URI, parent: typing.Optional['_Vertex'], **kwargs): + super().__init__(uri, parent, **kwargs) class Node(_Vertex): """Node type.""" - def __init__(self, uri: URI, parent: typing.Optional['Node']): - super().__init__(uri, parent) + parent: typing.Optional['Node'] + def __init__(self, uri: URI, parent: typing.Optional['Node'], **kwargs): + super().__init__(uri, parent, **kwargs) class Literal(_Vertex): """Literal type.""" - def __init__(self, uri: URI, parent: typing.Optional['Literal']): - super().__init__(uri, parent) + parent: typing.Optional['Literal'] + def __init__(self, uri: URI, parent: typing.Optional['Literal'] ,**kwargs): + super().__init__(uri, parent, **kwargs) class Predicate(_Type): - """Predicate type.""" + """Predicate base type.""" # source type. domain: Node # destination type. - range: typing.Optional[typing.Union[Node, Literal]] + range: _Vertex # maximum cardinality of type. unique: bool @@ -223,25 +238,26 @@ class Predicate(_Type): self, # Type members uri: URI, - parent: typing.Optional['Predicate'], + parent: '_PredicateBase', # Predicate members domain: Node, - range: typing.Optional[typing.Union[Node, Literal]], # pylint: disable=redefined-builtin + range: _Vertex, # pylint: disable=redefined-builtin unique: bool, + **kwargs, ): # check arguments if not isinstance(domain, Node): raise TypeError(domain) - if range is not None and not isinstance(range, Node) and not isinstance(range, Literal): + if range != ROOT_VERTEX and not isinstance(range, (Node, Literal)): raise TypeError(range) # initialize - super().__init__(uri, parent) + super().__init__(uri, parent, **kwargs) self.domain = domain self.range = range - self.unique = unique + self.unique = bool(unique) def __hash__(self) -> int: - return hash((super().__hash__(), self.domain, self.range, self.unique)) + return hash((super().__hash__(), self.domain, self.unique, self.range)) def __eq__(self, other: typing.Any) -> bool: return super().__eq__(other) \ @@ -264,13 +280,132 @@ class Predicate(_Type): raise errors.ConsistencyError(f'{domain} must be a subclass of {self.domain}') if range is None: range = self.range - if range is None: # inherited range from ns.bsfs.Predicate - raise ValueError('range must be defined by the parent or argument') - if self.range is not None and not range <= self.range: + # NOTE: The root predicate has a Vertex as range, which is neither a parent of the root + # Node nor Literal. Hence, that test is skipped since a child should be allowed to + # specialize from Vertex to anything. + if self.range != ROOT_VERTEX and not range <= self.range: raise errors.ConsistencyError(f'{range} must be a subclass of {self.range}') if unique is None: unique = self.unique - return super().get_child(uri, domain=domain, range=range, unique=unique, **kwargs) + return super().get_child( + uri=uri, + domain=domain, + range=range, + unique=unique, + **kwargs + ) + + +class Feature(Predicate): + """Feature base type.""" + + # Number of feature vector dimensions. + dimension: int + + # Feature vector datatype. + dtype: URI + + # Distance measure to compare feature vectors. + distance: URI + + def __init__( + self, + # Type members + uri: URI, + parent: Predicate, + # Predicate members + domain: Node, + range: Literal, + unique: bool, + # Feature members + dimension: int, + dtype: URI, + distance: URI, + **kwargs, + ): + super().__init__(uri, parent, domain, range, unique, **kwargs) + self.dimension = int(dimension) + self.dtype = URI(dtype) + self.distance = URI(distance) + + def __hash__(self) -> int: + return hash((super().__hash__(), self.dimension, self.dtype, self.distance)) + + def __eq__(self, other: typing.Any) -> bool: + return super().__eq__(other) \ + and self.dimension == other.dimension \ + and self.dtype == other.dtype \ + and self.distance == other.distance + + def get_child( + self, + uri: URI, + domain: typing.Optional[Node] = None, + range: typing.Optional[Literal] = None, # pylint: disable=redefined-builtin + unique: typing.Optional[bool] = None, + dimension: typing.Optional[int] = None, + dtype: typing.Optional[URI] = None, + distance: typing.Optional[URI] = None, + **kwargs, + ): + """Return a child of the current class.""" + if dimension is None: + dimension = self.dimension + if dtype is None: + dtype = self.dtype + if distance is None: + distance = self.distance + return super().get_child( + uri=uri, + domain=domain, + range=range, + unique=unique, + dimension=dimension, + dtype=dtype, + distance=distance, + **kwargs, + ) +# essential vertices +ROOT_VERTEX = _Vertex( + uri=ns.bsfs.Vertex, + parent=None, + ) + +ROOT_NODE = Node( + uri=ns.bsfs.Node, + parent=None, + ) + +ROOT_LITERAL = Literal( + uri=ns.bsfs.Literal, + parent=None, + ) + +ROOT_NUMBER = Literal( + uri=ns.bsfs.Number, + parent=ROOT_LITERAL, + ) + +# essential predicates +ROOT_PREDICATE = Predicate( + uri=ns.bsfs.Predicate, + parent=None, + domain=ROOT_NODE, + range=ROOT_VERTEX, + unique=False, + ) + +ROOT_FEATURE = Feature( + uri=ns.bsfs.Feature, + parent=ROOT_PREDICATE, + domain=ROOT_NODE, + range=ROOT_LITERAL, + unique=False, + dimension=1, + dtype=ns.bsfs.f16, + distance=ns.bsfs.euclidean, + ) + ## EOF ## -- cgit v1.2.3 From 6fd984e694b0a7b749ab947211d792f5b011ee6f Mon Sep 17 00:00:00 2001 From: Matthias Baumgartner Date: Thu, 12 Jan 2023 08:44:25 +0100 Subject: renamed get_child to child in schema.types._Type and _Vertex to Vertex in schema.types --- bsfs/schema/__init__.py | 2 +- bsfs/schema/schema.py | 2 +- bsfs/schema/serialize.py | 4 ++-- bsfs/schema/types.py | 28 ++++++++++++++-------------- 4 files changed, 18 insertions(+), 18 deletions(-) (limited to 'bsfs') diff --git a/bsfs/schema/__init__.py b/bsfs/schema/__init__.py index dc24313..5162a01 100644 --- a/bsfs/schema/__init__.py +++ b/bsfs/schema/__init__.py @@ -10,7 +10,7 @@ import typing # inner-module imports from .schema import Schema from .serialize import from_string, to_string -from .types import Literal, Node, Predicate, _Vertex # FIXME: _Vertex +from .types import Literal, Node, Predicate, Vertex, ROOT_FEATURE, ROOT_LITERAL, ROOT_NODE, ROOT_NUMBER, ROOT_PREDICATE, ROOT_VERTEX # exports __all__: typing.Sequence[str] = ( diff --git a/bsfs/schema/schema.py b/bsfs/schema/schema.py index 1c4c807..80cb58a 100644 --- a/bsfs/schema/schema.py +++ b/bsfs/schema/schema.py @@ -83,7 +83,7 @@ class Schema(): prange = {pred.range for pred in predicates} nodes |= {vert for vert in prange if isinstance(vert, types.Node)} literals |= {vert for vert in prange if isinstance(vert, types.Literal)} - # NOTE: ROOT_PREDICATE has a _Vertex as range which is neither in nodes nor literals + # NOTE: ROOT_PREDICATE has a Vertex as range which is neither in nodes nor literals # FIXME: with the ROOT_VERTEX missing, the schema is not complete anymore! # include parents in nodes and literals sets diff --git a/bsfs/schema/serialize.py b/bsfs/schema/serialize.py index 1222aa6..c1ac9a9 100644 --- a/bsfs/schema/serialize.py +++ b/bsfs/schema/serialize.py @@ -125,10 +125,10 @@ def from_string(schema_str: str) -> schema.Schema: # get distance distance = _fetch_value(uri, rdflib.URIRef(ns.bsfs.distance), URI) # return feature - return parent.get_child(URI(uri), domain=dom, range=rng, unique=unique, + return parent.child(URI(uri), domain=dom, range=rng, unique=unique, dtype=dtype, dimension=dimension, distance=distance, **annotations) # handle non-feature predicate - return parent.get_child(URI(uri), domain=dom, range=rng, unique=unique, **annotations) + return parent.child(URI(uri), domain=dom, range=rng, unique=unique, **annotations) predicates = _fetch_hierarchically(_build_predicate, types.ROOT_PREDICATE) return schema.Schema(predicates, nodes, literals) diff --git a/bsfs/schema/types.py b/bsfs/schema/types.py index e737263..4f49efe 100644 --- a/bsfs/schema/types.py +++ b/bsfs/schema/types.py @@ -114,7 +114,7 @@ class _Type(): yield curr curr = curr.parent - def get_child( + def child( self, uri: URI, **kwargs, @@ -201,21 +201,21 @@ class _Type(): return False -class _Vertex(_Type): +class Vertex(_Type): """Graph vertex types. Can be a Node or a Literal.""" - parent: typing.Optional['_Vertex'] - def __init__(self, uri: URI, parent: typing.Optional['_Vertex'], **kwargs): + parent: typing.Optional['Vertex'] + def __init__(self, uri: URI, parent: typing.Optional['Vertex'], **kwargs): super().__init__(uri, parent, **kwargs) -class Node(_Vertex): +class Node(Vertex): """Node type.""" parent: typing.Optional['Node'] def __init__(self, uri: URI, parent: typing.Optional['Node'], **kwargs): super().__init__(uri, parent, **kwargs) -class Literal(_Vertex): +class Literal(Vertex): """Literal type.""" parent: typing.Optional['Literal'] def __init__(self, uri: URI, parent: typing.Optional['Literal'] ,**kwargs): @@ -229,7 +229,7 @@ class Predicate(_Type): domain: Node # destination type. - range: _Vertex + range: Vertex # maximum cardinality of type. unique: bool @@ -241,7 +241,7 @@ class Predicate(_Type): parent: '_PredicateBase', # Predicate members domain: Node, - range: _Vertex, # pylint: disable=redefined-builtin + range: Vertex, # pylint: disable=redefined-builtin unique: bool, **kwargs, ): @@ -265,11 +265,11 @@ class Predicate(_Type): and self.range == other.range \ and self.unique == other.unique - def get_child( + def child( self, uri: URI, domain: typing.Optional[Node] = None, - range: typing.Optional[_Vertex] = None, # pylint: disable=redefined-builtin + range: typing.Optional[Vertex] = None, # pylint: disable=redefined-builtin unique: typing.Optional[bool] = None, **kwargs, ): @@ -287,7 +287,7 @@ class Predicate(_Type): raise errors.ConsistencyError(f'{range} must be a subclass of {self.range}') if unique is None: unique = self.unique - return super().get_child( + return super().child( uri=uri, domain=domain, range=range, @@ -337,7 +337,7 @@ class Feature(Predicate): and self.dtype == other.dtype \ and self.distance == other.distance - def get_child( + def child( self, uri: URI, domain: typing.Optional[Node] = None, @@ -355,7 +355,7 @@ class Feature(Predicate): dtype = self.dtype if distance is None: distance = self.distance - return super().get_child( + return super().child( uri=uri, domain=domain, range=range, @@ -368,7 +368,7 @@ class Feature(Predicate): # essential vertices -ROOT_VERTEX = _Vertex( +ROOT_VERTEX = Vertex( uri=ns.bsfs.Vertex, parent=None, ) -- cgit v1.2.3 From 3940cb3c79937a431ba2ae3b57fd0c6c2ccfff33 Mon Sep 17 00:00:00 2001 From: Matthias Baumgartner Date: Thu, 12 Jan 2023 10:12:43 +0100 Subject: use Vertex in type annotations --- bsfs/graph/resolve.py | 28 +++++++++++------------ bsfs/query/validator.py | 25 +++++++++----------- bsfs/triple_store/sparql/parse_filter.py | 39 +++++++++++++++----------------- 3 files changed, 42 insertions(+), 50 deletions(-) (limited to 'bsfs') diff --git a/bsfs/graph/resolve.py b/bsfs/graph/resolve.py index feb0855..e398a5e 100644 --- a/bsfs/graph/resolve.py +++ b/bsfs/graph/resolve.py @@ -37,8 +37,6 @@ class Filter(): """ - T_VERTEX = typing.Union[bsc.Node, bsc.Literal] - def __init__(self, schema): self.schema = schema @@ -47,7 +45,7 @@ class Filter(): def _parse_filter_expression( self, - type_: T_VERTEX, + type_: bsc.Vertex, node: ast.filter.FilterExpression, ) -> ast.filter.FilterExpression: """Route *node* to the handler of the respective FilterExpression subclass.""" @@ -73,7 +71,7 @@ class Filter(): # invalid node raise errors.BackendError(f'expected filter expression, found {node}') - def _parse_predicate_expression(self, node: ast.filter.PredicateExpression) -> T_VERTEX: + def _parse_predicate_expression(self, node: ast.filter.PredicateExpression) -> bsc.Vertex: """Route *node* to the handler of the respective PredicateExpression subclass.""" if isinstance(node, ast.filter.Predicate): return self._predicate(node) @@ -82,7 +80,7 @@ class Filter(): # invalid node raise errors.BackendError(f'expected predicate expression, found {node}') - def _predicate(self, node: ast.filter.Predicate) -> T_VERTEX: + def _predicate(self, node: ast.filter.Predicate) -> bsc.Vertex: if not self.schema.has_predicate(node.predicate): raise errors.ConsistencyError(f'predicate {node.predicate} is not in the schema') pred = self.schema.predicate(node.predicate) @@ -91,7 +89,7 @@ class Filter(): dom, rng = rng, dom return rng - def _one_of(self, node: ast.filter.OneOf) -> T_VERTEX: + def _one_of(self, node: ast.filter.OneOf) -> bsc.Vertex: # determine domain and range types rng = None for pred in node: @@ -107,33 +105,33 @@ class Filter(): raise errors.UnreachableError() return rng - def _any(self, type_: T_VERTEX, node: ast.filter.Any) -> ast.filter.Any: # pylint: disable=unused-argument + def _any(self, type_: bsc.Vertex, node: ast.filter.Any) -> ast.filter.Any: # pylint: disable=unused-argument next_type = self._parse_predicate_expression(node.predicate) return ast.filter.Any(node.predicate, self._parse_filter_expression(next_type, node.expr)) - def _all(self, type_: T_VERTEX, node: ast.filter.All) -> ast.filter.All: # pylint: disable=unused-argument + def _all(self, type_: bsc.Vertex, node: ast.filter.All) -> ast.filter.All: # pylint: disable=unused-argument next_type = self._parse_predicate_expression(node.predicate) return ast.filter.All(node.predicate, self._parse_filter_expression(next_type, node.expr)) - def _and(self, type_: T_VERTEX, node: ast.filter.And) -> ast.filter.And: + def _and(self, type_: bsc.Vertex, node: ast.filter.And) -> ast.filter.And: return ast.filter.And({self._parse_filter_expression(type_, expr) for expr in node}) - def _or(self, type_: T_VERTEX, node: ast.filter.Or) -> ast.filter.Or: + def _or(self, type_: bsc.Vertex, node: ast.filter.Or) -> ast.filter.Or: return ast.filter.Or({self._parse_filter_expression(type_, expr) for expr in node}) - def _not(self, type_: T_VERTEX, node: ast.filter.Not) -> ast.filter.Not: + def _not(self, type_: bsc.Vertex, node: ast.filter.Not) -> ast.filter.Not: return ast.filter.Not(self._parse_filter_expression(type_, node.expr)) - def _has(self, type_: T_VERTEX, node: ast.filter.Has) -> ast.filter.Has: # pylint: disable=unused-argument + def _has(self, type_: bsc.Vertex, node: ast.filter.Has) -> ast.filter.Has: # pylint: disable=unused-argument return node - def _value(self, type_: T_VERTEX, node: ast.filter._Value) -> ast.filter._Value: # pylint: disable=unused-argument + def _value(self, type_: bsc.Vertex, node: ast.filter._Value) -> ast.filter._Value: # pylint: disable=unused-argument return node - def _bounded(self, type_: T_VERTEX, node: ast.filter._Bounded) -> ast.filter._Bounded: # pylint: disable=unused-argument + def _bounded(self, type_: bsc.Vertex, node: ast.filter._Bounded) -> ast.filter._Bounded: # pylint: disable=unused-argument return node - def _is(self, type_: T_VERTEX, node: ast.filter.Is) -> typing.Union[ast.filter.Or, ast.filter.Is]: + def _is(self, type_: bsc.Vertex, node: ast.filter.Is) -> typing.Union[ast.filter.Or, ast.filter.Is]: # check if action is needed if not isinstance(node.value, nodes.Nodes): return node diff --git a/bsfs/query/validator.py b/bsfs/query/validator.py index 352203a..6bf1b72 100644 --- a/bsfs/query/validator.py +++ b/bsfs/query/validator.py @@ -34,9 +34,6 @@ class Filter(): """ - # vertex types - T_VERTEX = typing.Union[bsc.Node, bsc.Literal] # FIXME: Shouldn't this be in the schema? - # schema to validate against. schema: bsc.Schema @@ -64,7 +61,7 @@ class Filter(): ## routing methods - def _parse_filter_expression(self, type_: T_VERTEX, node: ast.filter.FilterExpression): + def _parse_filter_expression(self, type_: bsc.Vertex, node: ast.filter.FilterExpression): """Route *node* to the handler of the respective FilterExpression subclass.""" if isinstance(node, ast.filter.Is): return self._is(type_, node) @@ -83,7 +80,7 @@ class Filter(): # invalid node raise errors.BackendError(f'expected filter expression, found {node}') - def _parse_predicate_expression(self, node: ast.filter.PredicateExpression) -> typing.Tuple[T_VERTEX, T_VERTEX]: + def _parse_predicate_expression(self, node: ast.filter.PredicateExpression) -> typing.Tuple[bsc.Vertex, bsc.Vertex]: """Route *node* to the handler of the respective PredicateExpression subclass.""" if isinstance(node, ast.filter.Predicate): return self._predicate(node) @@ -95,7 +92,7 @@ class Filter(): ## predicate expressions - def _predicate(self, node: ast.filter.Predicate) -> typing.Tuple[T_VERTEX, T_VERTEX]: + def _predicate(self, node: ast.filter.Predicate) -> typing.Tuple[bsc.Vertex, bsc.Vertex]: # predicate exists in the schema if not self.schema.has_predicate(node.predicate): raise errors.ConsistencyError(f'predicate {node.predicate} is not in the schema') @@ -110,7 +107,7 @@ class Filter(): # return domain and range return dom, rng - def _one_of(self, node: ast.filter.OneOf) -> typing.Tuple[T_VERTEX, T_VERTEX]: + def _one_of(self, node: ast.filter.OneOf) -> typing.Tuple[bsc.Vertex, bsc.Vertex]: # determine domain and range types # NOTE: select the most specific domain and the most generic range dom, rng = None, None @@ -146,7 +143,7 @@ class Filter(): ## intermediates - def _branch(self, type_: T_VERTEX, node: ast.filter._Branch): + def _branch(self, type_: bsc.Vertex, node: ast.filter._Branch): # type is a Node if not isinstance(type_, bsc.Node): raise errors.ConsistencyError(f'expected a Node, found {type_}') @@ -167,16 +164,16 @@ class Filter(): # child expression is valid self._parse_filter_expression(rng, node.expr) - def _agg(self, type_: T_VERTEX, node: ast.filter._Agg): + def _agg(self, type_: bsc.Vertex, node: ast.filter._Agg): for expr in node: # child expression is valid self._parse_filter_expression(type_, expr) - def _not(self, type_: T_VERTEX, node: ast.filter.Not): + def _not(self, type_: bsc.Vertex, node: ast.filter.Not): # child expression is valid self._parse_filter_expression(type_, node.expr) - def _has(self, type_: T_VERTEX, node: ast.filter.Has): + def _has(self, type_: bsc.Vertex, node: ast.filter.Has): # type is a Node if not isinstance(type_, bsc.Node): raise errors.ConsistencyError(f'expected a Node, found {type_}') @@ -195,13 +192,13 @@ class Filter(): ## conditions - def _is(self, type_: T_VERTEX, node: ast.filter.Is): # pylint: disable=unused-argument # (node) + def _is(self, type_: bsc.Vertex, node: ast.filter.Is): # pylint: disable=unused-argument # (node) if not isinstance(type_, bsc.Node): raise errors.ConsistencyError(f'expected a Node, found {type_}') if type_ not in self.schema.nodes(): raise errors.ConsistencyError(f'node {type_} is not in the schema') - def _value(self, type_: T_VERTEX, node: ast.filter._Value): # pylint: disable=unused-argument # (node) + def _value(self, type_: bsc.Vertex, node: ast.filter._Value): # pylint: disable=unused-argument # (node) # type is a literal if not isinstance(type_, bsc.Literal): raise errors.ConsistencyError(f'expected a Literal, found {type_}') @@ -211,7 +208,7 @@ class Filter(): # FIXME: Check if node.value corresponds to type_ # FIXME: A specific literal might be requested (i.e., a numeric type when used in Has) - def _bounded(self, type_: T_VERTEX, node: ast.filter._Bounded): # pylint: disable=unused-argument # (node) + def _bounded(self, type_: bsc.Vertex, node: ast.filter._Bounded): # pylint: disable=unused-argument # (node) # type is a literal if not isinstance(type_, bsc.Literal): raise errors.ConsistencyError(f'expected a Literal, found {type_}') diff --git a/bsfs/triple_store/sparql/parse_filter.py b/bsfs/triple_store/sparql/parse_filter.py index d4db0aa..a851888 100644 --- a/bsfs/triple_store/sparql/parse_filter.py +++ b/bsfs/triple_store/sparql/parse_filter.py @@ -46,9 +46,6 @@ class Filter(): # Generator that produces unique symbol names. ngen: _GenHopName - # Vertex type. - T_VERTEX = typing.Union[bsc.Node, bsc.Literal] - def __init__(self, schema): self.schema = schema self.ngen = _GenHopName() @@ -79,7 +76,7 @@ class Filter(): }} ''' - def _parse_filter_expression(self, type_: T_VERTEX, node: ast.filter.FilterExpression, head: str) -> str: + def _parse_filter_expression(self, type_: bsc.Vertex, node: ast.filter.FilterExpression, head: str) -> str: """Route *node* to the handler of the respective FilterExpression subclass.""" if isinstance(node, ast.filter.Is): return self._is(type_, node, head) @@ -112,9 +109,9 @@ class Filter(): def _parse_predicate_expression( self, - type_: T_VERTEX, + type_: bsc.Vertex, node: ast.filter.PredicateExpression - ) -> typing.Tuple[str, T_VERTEX]: + ) -> typing.Tuple[str, bsc.Vertex]: """Route *node* to the handler of the respective PredicateExpression subclass.""" if isinstance(node, ast.filter.Predicate): return self._predicate(type_, node) @@ -123,7 +120,7 @@ class Filter(): # invalid node raise errors.BackendError(f'expected predicate expression, found {node}') - def _one_of(self, node_type: T_VERTEX, node: ast.filter.OneOf) -> typing.Tuple[str, T_VERTEX]: + def _one_of(self, node_type: bsc.Vertex, node: ast.filter.OneOf) -> typing.Tuple[str, bsc.Vertex]: """ """ if not isinstance(node_type, bsc.Node): @@ -150,7 +147,7 @@ class Filter(): # return joint predicate expression and next range return '|'.join(suburi), rng - def _predicate(self, node_type: T_VERTEX, node: ast.filter.Predicate) -> typing.Tuple[str, T_VERTEX]: + def _predicate(self, node_type: bsc.Vertex, node: ast.filter.Predicate) -> typing.Tuple[str, bsc.Vertex]: """ """ # check node_type @@ -178,7 +175,7 @@ class Filter(): # return predicate URI and next node type return puri, rng - def _any(self, node_type: T_VERTEX, node: ast.filter.Any, head: str) -> str: + def _any(self, node_type: bsc.Vertex, node: ast.filter.Any, head: str) -> str: """ """ if not isinstance(node_type, bsc.Node): @@ -191,7 +188,7 @@ class Filter(): # combine results return f'{head} {pred} {nexthead} . {expr}' - def _all(self, node_type: T_VERTEX, node: ast.filter.All, head: str) -> str: + def _all(self, node_type: bsc.Vertex, node: ast.filter.All, head: str) -> str: """ """ # NOTE: All(P, E) := Not(Any(P, Not(E))) and EXISTS(P, ?) @@ -208,13 +205,13 @@ class Filter(): # return existence and rewritten expression return f'FILTER EXISTS {{ {head} {pred} {temphead} }} . ' + expr - def _and(self, node_type: T_VERTEX, node: ast.filter.And, head: str) -> str: + def _and(self, node_type: bsc.Vertex, node: ast.filter.And, head: str) -> str: """ """ sub = [self._parse_filter_expression(node_type, expr, head) for expr in node] return ' . '.join(sub) - def _or(self, node_type: T_VERTEX, node: ast.filter.Or, head: str) -> str: + def _or(self, node_type: bsc.Vertex, node: ast.filter.Or, head: str) -> str: """ """ # potential special case optimization: @@ -224,7 +221,7 @@ class Filter(): sub = ['{' + expr + '}' for expr in sub] return ' UNION '.join(sub) - def _not(self, node_type: T_VERTEX, node: ast.filter.Not, head: str) -> str: + def _not(self, node_type: bsc.Vertex, node: ast.filter.Not, head: str) -> str: """ """ expr = self._parse_filter_expression(node_type, node.expr, head) @@ -235,7 +232,7 @@ class Filter(): # The simplest (and non-interfering) choice is a type statement. return f'MINUS {{ {head} <{ns.rdf.type}>/<{ns.rdfs.subClassOf}>* <{node_type.uri}> . {expr} }}' - def _has(self, node_type: T_VERTEX, node: ast.filter.Has, head: str) -> str: + def _has(self, node_type: bsc.Vertex, node: ast.filter.Has, head: str) -> str: """ """ if not isinstance(node_type, bsc.Node): @@ -253,42 +250,42 @@ class Filter(): # combine return num_preds + ' . ' + count_bounds - def _is(self, node_type: T_VERTEX, node: ast.filter.Is, head: str) -> str: + def _is(self, node_type: bsc.Vertex, node: ast.filter.Is, head: str) -> str: """ """ if not isinstance(node_type, bsc.Node): raise errors.BackendError(f'expected Node, found {node_type}') return f'VALUES {head} {{ <{node.value}> }}' - def _equals(self, node_type: T_VERTEX, node: ast.filter.Equals, head: str) -> str: + def _equals(self, node_type: bsc.Vertex, node: ast.filter.Equals, head: str) -> str: """ """ if not isinstance(node_type, bsc.Literal): raise errors.BackendError(f'expected Literal, found {node}') return f'VALUES {head} {{ "{node.value}"^^<{node_type.uri}> }}' - def _substring(self, node_type: T_VERTEX, node: ast.filter.Substring, head: str) -> str: + def _substring(self, node_type: bsc.Vertex, node: ast.filter.Substring, head: str) -> str: """ """ if not isinstance(node_type, bsc.Literal): raise errors.BackendError(f'expected Literal, found {node_type}') return f'FILTER contains(str({head}), "{node.value}")' - def _starts_with(self, node_type: T_VERTEX, node: ast.filter.StartsWith, head: str) -> str: + def _starts_with(self, node_type: bsc.Vertex, node: ast.filter.StartsWith, head: str) -> str: """ """ if not isinstance(node_type, bsc.Literal): raise errors.BackendError(f'expected Literal, found {node_type}') return f'FILTER strstarts(str({head}), "{node.value}")' - def _ends_with(self, node_type: T_VERTEX, node: ast.filter.EndsWith, head: str) -> str: + def _ends_with(self, node_type: bsc.Vertex, node: ast.filter.EndsWith, head: str) -> str: """ """ if not isinstance(node_type, bsc.Literal): raise errors.BackendError(f'expected Literal, found {node_type}') return f'FILTER strends(str({head}), "{node.value}")' - def _less_than(self, node_type: T_VERTEX, node: ast.filter.LessThan, head: str) -> str: + def _less_than(self, node_type: bsc.Vertex, node: ast.filter.LessThan, head: str) -> str: """ """ if not isinstance(node_type, bsc.Literal): @@ -296,7 +293,7 @@ class Filter(): equality = '=' if not node.strict else '' return f'FILTER ({head} <{equality} {float(node.threshold)})' - def _greater_than(self, node_type: T_VERTEX, node: ast.filter.GreaterThan, head: str) -> str: + def _greater_than(self, node_type: bsc.Vertex, node: ast.filter.GreaterThan, head: str) -> str: """ """ if not isinstance(node_type, bsc.Literal): -- cgit v1.2.3 From 6b3e32b29799a8143e8ce9d20c5f27e3e166b9bb Mon Sep 17 00:00:00 2001 From: Matthias Baumgartner Date: Thu, 12 Jan 2023 10:17:07 +0100 Subject: changed path to from_string in clients --- bsfs/apps/migrate.py | 6 +++--- bsfs/graph/graph.py | 10 +++++----- bsfs/triple_store/sparql/sparql.py | 2 +- 3 files changed, 9 insertions(+), 9 deletions(-) (limited to 'bsfs') diff --git a/bsfs/apps/migrate.py b/bsfs/apps/migrate.py index 91c1661..b9d019f 100644 --- a/bsfs/apps/migrate.py +++ b/bsfs/apps/migrate.py @@ -42,15 +42,15 @@ def main(argv): graph = bsfs.Open(config) # initialize schema - schema = bsfs.schema.Schema.Empty() + schema = bsfs.schema.Schema() if len(args.schema) == 0: # assemble schema from standard input - schema = schema + bsfs.schema.Schema.from_string(sys.stdin.read()) + schema = schema + bsfs.schema.from_string(sys.stdin.read()) else: # assemble schema from input files for pth in args.schema: with open(pth, mode='rt', encoding='UTF-8') as ifile: - schema = schema + bsfs.schema.Schema.from_string(ifile.read()) + schema = schema + bsfs.schema.from_string(ifile.read()) # migrate schema graph.migrate(schema, not args.remove) diff --git a/bsfs/graph/graph.py b/bsfs/graph/graph.py index f030fed..2210755 100644 --- a/bsfs/graph/graph.py +++ b/bsfs/graph/graph.py @@ -10,7 +10,7 @@ import typing # bsfs imports from bsfs.query import ast, validate -from bsfs.schema import Schema +from bsfs import schema as bsc from bsfs.triple_store import TripleStoreBase from bsfs.utils import URI, typename @@ -67,11 +67,11 @@ class Graph(): return f'{typename(self)}({str(self._backend)}, {self._user})' @property - def schema(self) -> Schema: + def schema(self) -> bsc.Schema: """Return the store's local schema.""" return self._backend.schema - def migrate(self, schema: Schema, append: bool = True) -> 'Graph': + def migrate(self, schema: bsc.Schema, append: bool = True) -> 'Graph': """Migrate the current schema to a new *schema*. Appends to the current schema by default; control this via *append*. @@ -79,14 +79,14 @@ class Graph(): """ # check args - if not isinstance(schema, Schema): + if not isinstance(schema, bsc.Schema): raise TypeError(schema) # append to current schema if append: schema = schema + self._backend.schema # add Graph schema requirements with open(os.path.join(os.path.dirname(__file__), 'schema.nt'), mode='rt', encoding='UTF-8') as ifile: - schema = schema + Schema.from_string(ifile.read()) + schema = schema + bsc.from_string(ifile.read()) # migrate schema in backend # FIXME: consult access controls! self._backend.schema = schema diff --git a/bsfs/triple_store/sparql/sparql.py b/bsfs/triple_store/sparql/sparql.py index c3cbff6..ddace35 100644 --- a/bsfs/triple_store/sparql/sparql.py +++ b/bsfs/triple_store/sparql/sparql.py @@ -94,7 +94,7 @@ class SparqlStore(base.TripleStoreBase): super().__init__(None) self._graph = rdflib.Graph() self._transaction = _Transaction(self._graph) - self._schema = bsc.Schema.Empty() + self._schema = bsc.Schema() self._filter_parser = parse_filter.Filter(self._schema) # NOTE: mypy and pylint complain about the **kwargs not being listed (contrasting super) -- cgit v1.2.3 From 7e7284d5fc01c0a081aa79d67736f51069864a7d Mon Sep 17 00:00:00 2001 From: Matthias Baumgartner Date: Thu, 12 Jan 2023 10:22:59 +0100 Subject: adapt to non-optional range in query checks --- bsfs/graph/resolve.py | 4 ++-- bsfs/query/validator.py | 14 +++++--------- bsfs/triple_store/sparql/parse_filter.py | 13 +++++-------- 3 files changed, 12 insertions(+), 19 deletions(-) (limited to 'bsfs') diff --git a/bsfs/graph/resolve.py b/bsfs/graph/resolve.py index e398a5e..9b5f631 100644 --- a/bsfs/graph/resolve.py +++ b/bsfs/graph/resolve.py @@ -101,8 +101,8 @@ class Filter(): rng = subrng except TypeError as err: raise errors.ConsistencyError(f'ranges {subrng} and {rng} are not related') from err - if rng is None: - raise errors.UnreachableError() + if not isinstance(rng, (bsc.Node, bsc.Literal)): + raise errors.BackendError(f'the range of node {node} is undefined') return rng def _any(self, type_: bsc.Vertex, node: ast.filter.Any) -> ast.filter.Any: # pylint: disable=unused-argument diff --git a/bsfs/query/validator.py b/bsfs/query/validator.py index 6bf1b72..b04a9bf 100644 --- a/bsfs/query/validator.py +++ b/bsfs/query/validator.py @@ -98,10 +98,9 @@ class Filter(): raise errors.ConsistencyError(f'predicate {node.predicate} is not in the schema') # determine domain and range pred = self.schema.predicate(node.predicate) + if not isinstance(pred.range, (bsc.Node, bsc.Literal)): + raise errors.BackendError(f'the range of predicate {pred} is undefined') dom, rng = pred.domain, pred.range - if rng is None: - # FIXME: It is a design error that Predicates can have a None range... - raise errors.BackendError(f'predicate {pred} has no range') if node.reverse: dom, rng = rng, dom # type: ignore [assignment] # variable re-use confuses mypy # return domain and range @@ -133,12 +132,9 @@ class Filter(): raise errors.ConsistencyError(f'ranges {subrng} and {rng} are not related') except TypeError as err: # compared literal vs. node raise errors.ConsistencyError(f'ranges {subrng} and {rng} are not of the same type') from err - # check domain and range - if dom is None or rng is None: - # OneOf guarantees at least one expression, these two cases cannot happen - raise errors.UnreachableError() - # return domain and range - return dom, rng + # OneOf guarantees at least one expression, dom and rng are always bsc.Vertex. + # mypy does not realize this, hence we ignore the warning. + return dom, rng # type: ignore [return-value] ## intermediates diff --git a/bsfs/triple_store/sparql/parse_filter.py b/bsfs/triple_store/sparql/parse_filter.py index a851888..0297cbc 100644 --- a/bsfs/triple_store/sparql/parse_filter.py +++ b/bsfs/triple_store/sparql/parse_filter.py @@ -140,12 +140,10 @@ class Filter(): raise errors.ConsistencyError(f'ranges {subrng} and {rng} are not related') except TypeError as err: # subrng and rng are not comparable raise errors.ConsistencyError(f'ranges {subrng} and {rng} are not related') from err - if rng is None: - # for mypy to be certain of the rng type - # if rng were None, we'd have gotten a TypeError above (None > None) - raise errors.UnreachableError() # return joint predicate expression and next range - return '|'.join(suburi), rng + # OneOf guarantees at least one expression, rng is always a bsc.Vertex. + # mypy does not realize this, hence we ignore the warning. + return '|'.join(suburi), rng # type: ignore [return-value] def _predicate(self, node_type: bsc.Vertex, node: ast.filter.Predicate) -> typing.Tuple[str, bsc.Vertex]: """ @@ -159,9 +157,8 @@ class Filter(): if not self.schema.has_predicate(puri): raise errors.ConsistencyError(f'predicate {puri} is not in the schema') pred = self.schema.predicate(puri) - if pred.range is None: - # FIXME: It is a design error that Predicates can have a None range... - raise errors.BackendError(f'predicate {pred} has no range') + if not isinstance(pred.range, (bsc.Node, bsc.Literal)): + raise errors.BackendError(f'the range of predicate {pred} is undefined') dom, rng = pred.domain, pred.range # encapsulate predicate uri puri = f'<{puri}>' # type: ignore [assignment] # variable re-use confuses mypy -- cgit v1.2.3 From b0ff4ed674ad78bf113c3cc0c2ccd187ccb91048 Mon Sep 17 00:00:00 2001 From: Matthias Baumgartner Date: Thu, 12 Jan 2023 10:26:30 +0100 Subject: number literal adaptions --- bsfs/graph/schema.nt | 3 ++- bsfs/query/validator.py | 6 ++++-- bsfs/triple_store/sparql/parse_filter.py | 1 - bsfs/triple_store/sparql/sparql.py | 4 +++- 4 files changed, 9 insertions(+), 5 deletions(-) (limited to 'bsfs') diff --git a/bsfs/graph/schema.nt b/bsfs/graph/schema.nt index 8612681..f619746 100644 --- a/bsfs/graph/schema.nt +++ b/bsfs/graph/schema.nt @@ -8,7 +8,8 @@ prefix bsfs: prefix bsm: # literals -xsd:integer rdfs:subClassOf bsfs:Literal . +bsfs:Number rdfs:subClassOf bsfs:Literal . +xsd:integer rdfs:subClassOf bsfs:Number . # predicates bsm:t_created rdfs:subClassOf bsfs:Predicate ; diff --git a/bsfs/query/validator.py b/bsfs/query/validator.py index b04a9bf..75b51ca 100644 --- a/bsfs/query/validator.py +++ b/bsfs/query/validator.py @@ -182,8 +182,7 @@ class Filter(): if not type_ <= dom: raise errors.ConsistencyError(f'expected type {dom}, found {type_}') # node.count is a numerical expression - # FIXME: We have to ensure that ns.xsd.integer is always known in the schema! - self._parse_filter_expression(self.schema.literal(ns.xsd.integer), node.count) + self._parse_filter_expression(self.schema.literal(ns.bsfs.Number), node.count) ## conditions @@ -211,6 +210,9 @@ class Filter(): # type exists in the schema if type_ not in self.schema.literals(): raise errors.ConsistencyError(f'literal {type_} is not in the schema') + # type must be a numerical + if not type_ <= self.schema.literal(ns.bsfs.Number): + raise errors.ConsistencyError(f'expected a number type, found {type_}') # FIXME: Check if node.value corresponds to type_ diff --git a/bsfs/triple_store/sparql/parse_filter.py b/bsfs/triple_store/sparql/parse_filter.py index 0297cbc..18a3288 100644 --- a/bsfs/triple_store/sparql/parse_filter.py +++ b/bsfs/triple_store/sparql/parse_filter.py @@ -242,7 +242,6 @@ class Filter(): # predicate count expression (fetch number of predicates at *head*) num_preds = f'{{ SELECT (COUNT(distinct {inner}) as {outer}) WHERE {{ {head} {pred} {inner} }} }}' # count expression - # FIXME: We have to ensure that ns.xsd.integer is always known in the schema! count_bounds = self._parse_filter_expression(self.schema.literal(ns.xsd.integer), node.count, outer) # combine return num_preds + ' . ' + count_bounds diff --git a/bsfs/triple_store/sparql/sparql.py b/bsfs/triple_store/sparql/sparql.py index ddace35..87467ff 100644 --- a/bsfs/triple_store/sparql/sparql.py +++ b/bsfs/triple_store/sparql/sparql.py @@ -11,6 +11,7 @@ import rdflib # bsfs imports from bsfs import schema as bsc +from bsfs.namespace import ns from bsfs.query import ast from bsfs.utils import errors, URI @@ -94,7 +95,8 @@ class SparqlStore(base.TripleStoreBase): super().__init__(None) self._graph = rdflib.Graph() self._transaction = _Transaction(self._graph) - self._schema = bsc.Schema() + # NOTE: parsing bsfs.query.ast.filter.Has requires xsd:integer. + self._schema = bsc.Schema(literals={bsc.ROOT_NUMBER.child(ns.xsd.integer)}) self._filter_parser = parse_filter.Filter(self._schema) # NOTE: mypy and pylint complain about the **kwargs not being listed (contrasting super) -- cgit v1.2.3 From e708016ae366e96051281f3a744af35a8c06d98b Mon Sep 17 00:00:00 2001 From: Matthias Baumgartner Date: Thu, 12 Jan 2023 10:28:16 +0100 Subject: cleanup and cosmetic changes --- bsfs/schema/__init__.py | 3 ++- bsfs/schema/schema.py | 2 -- bsfs/schema/serialize.py | 17 +++++++++-------- bsfs/schema/types.py | 8 ++++---- bsfs/triple_store/sparql/sparql.py | 6 +++--- 5 files changed, 18 insertions(+), 18 deletions(-) (limited to 'bsfs') diff --git a/bsfs/schema/__init__.py b/bsfs/schema/__init__.py index 5162a01..31d7d61 100644 --- a/bsfs/schema/__init__.py +++ b/bsfs/schema/__init__.py @@ -10,7 +10,8 @@ import typing # inner-module imports from .schema import Schema from .serialize import from_string, to_string -from .types import Literal, Node, Predicate, Vertex, ROOT_FEATURE, ROOT_LITERAL, ROOT_NODE, ROOT_NUMBER, ROOT_PREDICATE, ROOT_VERTEX +from .types import Literal, Node, Predicate, Vertex, \ + ROOT_FEATURE, ROOT_LITERAL, ROOT_NODE, ROOT_NUMBER, ROOT_PREDICATE, ROOT_VERTEX # exports __all__: typing.Sequence[str] = ( diff --git a/bsfs/schema/schema.py b/bsfs/schema/schema.py index 80cb58a..52ad191 100644 --- a/bsfs/schema/schema.py +++ b/bsfs/schema/schema.py @@ -7,10 +7,8 @@ Author: Matthias Baumgartner, 2022 # imports from collections import abc, namedtuple import typing -import rdflib # bsfs imports -from bsfs.namespace import ns from bsfs.utils import errors, URI, typename # inner-module imports diff --git a/bsfs/schema/serialize.py b/bsfs/schema/serialize.py index c1ac9a9..0eb6628 100644 --- a/bsfs/schema/serialize.py +++ b/bsfs/schema/serialize.py @@ -5,7 +5,6 @@ A copy of the license is provided with the project. Author: Matthias Baumgartner, 2022 """ # standard imports -from collections import abc import itertools import typing @@ -42,7 +41,7 @@ def from_string(schema_str: str) -> schema.Schema: return value.value if isinstance(value, rdflib.URIRef): return URI(value) - raise errors.BackendError(f'expected Literal or URIRef, found {typename(value)}') + raise errors.UnreachableError(f'expected Literal or URIRef, found {typename(value)}') def _fetch_hierarchically(factory, curr): """Walk through a rdfs:subClassOf hierarchy, creating symbols along the way.""" @@ -80,14 +79,16 @@ def from_string(schema_str: str) -> schema.Schema: # fetch predicates # FIXME: type annotation def _fetch_value(subject: URI, predicate: rdflib.URIRef, value_factory) -> typing.Optional[typing.Any]: - """Fetch the object of a given subject and predicate. Raises a `errors.ConsistencyError` if multiple objects match.""" + """Fetch the object of a given subject and predicate. + Raises a `errors.ConsistencyError` if multiple objects match. + """ values = list(graph.objects(rdflib.URIRef(subject), predicate)) if len(values) == 0: return None - elif len(values) == 1: + if len(values) == 1: return value_factory(values[0]) - else: - raise errors.ConsistencyError(f'{subject} has multiple values for predicate {str(predicate)}, expected zero or one') + raise errors.ConsistencyError( + f'{subject} has multiple values for predicate {str(predicate)}, expected zero or one') def _build_predicate(uri, parent, **annotations): """Predicate factory.""" @@ -102,13 +103,13 @@ def from_string(schema_str: str) -> schema.Schema: dom = _fetch_value(uri, rdflib.RDFS.domain, URI) if dom is not None and dom not in nodes_lut: raise errors.ConsistencyError(f'predicate {uri} has undefined domain {dom}') - elif dom is not None: + if dom is not None: dom = nodes_lut[dom] # get range rng = _fetch_value(uri, rdflib.RDFS.range, URI) if rng is not None and rng not in nodes_lut and rng not in literals_lut: raise errors.ConsistencyError(f'predicate {uri} has undefined range {rng}') - elif rng is not None: + if rng is not None: rng = nodes_lut.get(rng, literals_lut.get(rng)) # get unique unique = _fetch_value(uri, rdflib.URIRef(ns.bsfs.unique), bool) diff --git a/bsfs/schema/types.py b/bsfs/schema/types.py index 4f49efe..6257dee 100644 --- a/bsfs/schema/types.py +++ b/bsfs/schema/types.py @@ -238,7 +238,7 @@ class Predicate(_Type): self, # Type members uri: URI, - parent: '_PredicateBase', + parent: typing.Optional['Predicate'], # Predicate members domain: Node, range: Vertex, # pylint: disable=redefined-builtin @@ -312,10 +312,10 @@ class Feature(Predicate): self, # Type members uri: URI, - parent: Predicate, + parent: typing.Optional[Predicate], # Predicate members domain: Node, - range: Literal, + range: Literal, # pylint: disable=redefined-builtin unique: bool, # Feature members dimension: int, @@ -341,7 +341,7 @@ class Feature(Predicate): self, uri: URI, domain: typing.Optional[Node] = None, - range: typing.Optional[Literal] = None, # pylint: disable=redefined-builtin + range: typing.Optional[Vertex] = None, # pylint: disable=redefined-builtin unique: typing.Optional[bool] = None, dimension: typing.Optional[int] = None, dtype: typing.Optional[URI] = None, diff --git a/bsfs/triple_store/sparql/sparql.py b/bsfs/triple_store/sparql/sparql.py index 87467ff..3877d1a 100644 --- a/bsfs/triple_store/sparql/sparql.py +++ b/bsfs/triple_store/sparql/sparql.py @@ -139,7 +139,7 @@ class SparqlStore(base.TripleStoreBase): for src, trg in self._graph.subject_objects(rdflib.URIRef(pred.uri)): self._transaction.remove((src, rdflib.URIRef(pred.uri), trg)) # remove predicate definition - if pred.parent is not None: + if pred.parent is not None: # NOTE: there shouldn't be any predicate w/o parent self._transaction.remove(( rdflib.URIRef(pred.uri), rdflib.RDFS.subClassOf, @@ -159,7 +159,7 @@ class SparqlStore(base.TripleStoreBase): # remove instance self._transaction.remove((inst, rdflib.RDF.type, rdflib.URIRef(node.uri))) # remove node definition - if node.parent is not None: + if node.parent is not None: # NOTE: there shouldn't be any node w/o parent self._transaction.remove(( rdflib.URIRef(node.uri), rdflib.RDFS.subClassOf, @@ -168,7 +168,7 @@ class SparqlStore(base.TripleStoreBase): for lit in sub.literals: # remove literal definition - if lit.parent is not None: + if lit.parent is not None: # NOTE: there shouldn't be any literal w/o parent self._transaction.remove(( rdflib.URIRef(lit.uri), rdflib.RDFS.subClassOf, -- cgit v1.2.3 From 1b7ef16c3795bb7112683662b8c22a774e219269 Mon Sep 17 00:00:00 2001 From: Matthias Baumgartner Date: Thu, 12 Jan 2023 16:57:58 +0100 Subject: schema to string --- bsfs/schema/schema.py | 2 + bsfs/schema/serialize.py | 104 +++++++++++++++++++++++++++++++++++++++++++++-- 2 files changed, 103 insertions(+), 3 deletions(-) (limited to 'bsfs') diff --git a/bsfs/schema/schema.py b/bsfs/schema/schema.py index 52ad191..bc50d4e 100644 --- a/bsfs/schema/schema.py +++ b/bsfs/schema/schema.py @@ -72,6 +72,8 @@ class Schema(): literals.add(types.ROOT_NUMBER) predicates.add(types.ROOT_FEATURE) + # FIXME: ensure that types derive from the right root? + # include parents in predicates set # TODO: review type annotations and ignores for python >= 3.11 (parents is _Type but should be typing.Self) predicates |= {par for pred in predicates for par in pred.parents()} # type: ignore [misc] diff --git a/bsfs/schema/serialize.py b/bsfs/schema/serialize.py index 0eb6628..a566d65 100644 --- a/bsfs/schema/serialize.py +++ b/bsfs/schema/serialize.py @@ -136,9 +136,107 @@ def from_string(schema_str: str) -> schema.Schema: -def to_string(schema_inst: schema.Schema) -> str: +def to_string(schema_inst: schema.Schema, fmt: str = 'turtle') -> str: + """Serialize a `bsfs.schema.Schema` to a string. + See `rdflib.Graph.serialize` for viable formats (default: turtle). """ - """ - raise NotImplementedError() + + # type of emitted triples. + T_TRIPLE = typing.Iterator[typing.Tuple[rdflib.URIRef, rdflib.URIRef, rdflib.term.Identifier]] + + def _type(tpe: types._Type) -> T_TRIPLE : + """Emit _Type properties (parent, annotations).""" + # emit parent + if tpe.parent is not None: + yield ( + rdflib.URIRef(tpe.uri), + rdflib.URIRef(ns.rdfs.subClassOf), + rdflib.URIRef(tpe.parent.uri), + ) + # emit annotations + for prop, value in tpe.annotations.items(): + yield ( + rdflib.URIRef(tpe.uri), + rdflib.URIRef(prop), + rdflib.Literal(value), # FIXME: datatype?! + ) + + def _predicate(pred: types.Predicate) -> T_TRIPLE: + """Emit Predicate properties (domain, range, unique).""" + # no need to emit anything for the root predicate + if pred == types.ROOT_PREDICATE: + return + # emit domain + if pred.domain != getattr(pred.parent, 'domain', None): + yield ( + rdflib.URIRef(pred.uri), + rdflib.URIRef(ns.rdfs.domain), + rdflib.URIRef(pred.domain.uri), + ) + # emit range + if pred.range != getattr(pred.parent, 'range', None): + yield ( + rdflib.URIRef(pred.uri), + rdflib.URIRef(ns.rdfs.range), + rdflib.URIRef(pred.range.uri), + ) + # emit cardinality + if pred.unique != getattr(pred.parent, 'unique', None): + yield ( + rdflib.URIRef(pred.uri), + rdflib.URIRef(ns.bsfs.unique), + rdflib.Literal(pred.unique, datatype=rdflib.XSD.boolean), + ) + + def _feature(feat: types.Feature) -> T_TRIPLE: + """Emit Feature properties (dimension, dtype, distance).""" + # emit size + if feat.dimension != getattr(feat.parent, 'dimension', None): + yield ( + rdflib.URIRef(feat.uri), + rdflib.URIRef(ns.bsfs.dimension), + rdflib.Literal(feat.dimension, datatype=rdflib.XSD.integer), + ) + # emit dtype + if feat.dtype != getattr(feat.parent, 'dtype', None): + yield ( + rdflib.URIRef(feat.uri), + rdflib.URIRef(ns.bsfs.dtype), + rdflib.URIRef(feat.dtype), + ) + # emit distance + if feat.distance != getattr(feat.parent, 'distance', None): + yield ( + rdflib.URIRef(feat.uri), + rdflib.URIRef(ns.bsfs.distance), + rdflib.URIRef(feat.distance), + ) + + def _parse(node: types._Type) -> T_TRIPLE: + """Emit all properties of a type.""" + if isinstance(node, types._Type): # pylint: disable=protected-access + # NOTE: all nodes are _Type + yield from _type(node) + if isinstance(node, types.Predicate): + yield from _predicate(node) + if isinstance(node, types.Feature): + yield from _feature(node) + + # create graph + graph = rdflib.Graph() + # add triples to graph + nodes = itertools.chain( + schema_inst.nodes(), + schema_inst.literals(), + schema_inst.predicates()) + for node in nodes: + for triple in _parse(node): + graph.add(triple) + # add known namespaces for readability + # FIXME: more systematically (e.g. for all in ns?) + graph.bind('bsfs', rdflib.URIRef('http://bsfs.ai/schema/')) + graph.bind('bse', rdflib.URIRef('http://bsfs.ai/schema/Entity#')) + # serialize to turtle + return graph.serialize(format=fmt) ## EOF ## -- cgit v1.2.3 From 60257ed3c2aa6ea2891f362a691bde9d7ef17831 Mon Sep 17 00:00:00 2001 From: Matthias Baumgartner Date: Fri, 13 Jan 2023 12:22:34 +0100 Subject: schema type comparison across classes --- bsfs/graph/resolve.py | 10 +++++----- bsfs/query/validator.py | 31 ++++++++++++------------------- bsfs/schema/types.py | 16 ++++++++++++---- bsfs/triple_store/sparql/parse_filter.py | 15 ++++++--------- 4 files changed, 35 insertions(+), 37 deletions(-) (limited to 'bsfs') diff --git a/bsfs/graph/resolve.py b/bsfs/graph/resolve.py index 9b5f631..b671204 100644 --- a/bsfs/graph/resolve.py +++ b/bsfs/graph/resolve.py @@ -96,11 +96,11 @@ class Filter(): # parse child expression subrng = self._parse_predicate_expression(pred) # determine the next type - try: - if rng is None or subrng > rng: # pick most generic range - rng = subrng - except TypeError as err: - raise errors.ConsistencyError(f'ranges {subrng} and {rng} are not related') from err + if rng is None or subrng > rng: # pick most generic range + rng = subrng + # check range consistency + if not subrng <= rng and not subrng >= rng: + raise errors.ConsistencyError(f'ranges {subrng} and {rng} are not related') if not isinstance(rng, (bsc.Node, bsc.Literal)): raise errors.BackendError(f'the range of node {node} is undefined') return rng diff --git a/bsfs/query/validator.py b/bsfs/query/validator.py index 75b51ca..ecea951 100644 --- a/bsfs/query/validator.py +++ b/bsfs/query/validator.py @@ -113,25 +113,18 @@ class Filter(): for pred in node: # parse child expression subdom, subrng = self._parse_predicate_expression(pred) - try: - # determine overall domain - if dom is None or subdom < dom: # pick most specific domain - dom = subdom - # domains must be related across all child expressions - if not subdom <= dom and not subdom >= dom: - raise errors.ConsistencyError(f'domains {subdom} and {dom} are not related') - except TypeError as err: # compared literal vs. node - raise errors.ConsistencyError(f'domains {subdom} and {dom} are not of the same type') from err - - try: - # determine overall range - if rng is None or subrng > rng: # pick most generic range - rng = subrng - # ranges must be related across all child expressions - if not subrng <= rng and not subrng >= rng: - raise errors.ConsistencyError(f'ranges {subrng} and {rng} are not related') - except TypeError as err: # compared literal vs. node - raise errors.ConsistencyError(f'ranges {subrng} and {rng} are not of the same type') from err + # determine overall domain + if dom is None or subdom < dom: # pick most specific domain + dom = subdom + # domains must be related across all child expressions + if not subdom <= dom and not subdom >= dom: + raise errors.ConsistencyError(f'domains {subdom} and {dom} are not related') + # determine overall range + if rng is None or subrng > rng: # pick most generic range + rng = subrng + # ranges must be related across all child expressions + if not subrng <= rng and not subrng >= rng: + raise errors.ConsistencyError(f'ranges {subrng} and {rng} are not related') # OneOf guarantees at least one expression, dom and rng are always bsc.Vertex. # mypy does not realize this, hence we ignore the warning. return dom, rng # type: ignore [return-value] diff --git a/bsfs/schema/types.py b/bsfs/schema/types.py index 6257dee..95dc66a 100644 --- a/bsfs/schema/types.py +++ b/bsfs/schema/types.py @@ -150,8 +150,10 @@ class _Type(): def __lt__(self, other: typing.Any) -> bool: """Return True iff *self* is a true subclass of *other*.""" - if not isinstance(other, type(self)): + if not isinstance(other, _Type): return NotImplemented + if not isinstance(other, type(self)): # FIXME: necessary? + return False if self.uri == other.uri: # equivalence return False if self in other.parents(): # superclass @@ -163,8 +165,10 @@ class _Type(): def __le__(self, other: typing.Any) -> bool: """Return True iff *self* is equivalent or a subclass of *other*.""" - if not isinstance(other, type(self)): + if not isinstance(other, _Type): return NotImplemented + if not isinstance(other, type(self)): # FIXME: necessary? + return False if self.uri == other.uri: # equivalence return True if self in other.parents(): # superclass @@ -176,8 +180,10 @@ class _Type(): def __gt__(self, other: typing.Any) -> bool: """Return True iff *self* is a true superclass of *other*.""" - if not isinstance(other, type(self)): + if not isinstance(other, _Type): return NotImplemented + if not isinstance(other, type(self)): # FIXME: necessary? + return False if self.uri == other.uri: # equivalence return False if self in other.parents(): # superclass @@ -189,8 +195,10 @@ class _Type(): def __ge__(self, other: typing.Any) -> bool: """Return True iff *self* is eqiuvalent or a superclass of *other*.""" - if not isinstance(other, type(self)): + if not isinstance(other, _Type): return NotImplemented + if not isinstance(other, type(self)): # FIXME: necessary? + return False if self.uri == other.uri: # equivalence return True if self in other.parents(): # superclass diff --git a/bsfs/triple_store/sparql/parse_filter.py b/bsfs/triple_store/sparql/parse_filter.py index 18a3288..5d8a2d9 100644 --- a/bsfs/triple_store/sparql/parse_filter.py +++ b/bsfs/triple_store/sparql/parse_filter.py @@ -131,15 +131,12 @@ class Filter(): puri, subrng = self._parse_predicate_expression(node_type, pred) # track predicate uris suburi.add(puri) - try: - # check for more generic range - if rng is None or subrng > rng: - rng = subrng - # check range consistency - if not subrng <= rng and not subrng >= rng: - raise errors.ConsistencyError(f'ranges {subrng} and {rng} are not related') - except TypeError as err: # subrng and rng are not comparable - raise errors.ConsistencyError(f'ranges {subrng} and {rng} are not related') from err + # check for more generic range + if rng is None or subrng > rng: + rng = subrng + # check range consistency + if not subrng <= rng and not subrng >= rng: + raise errors.ConsistencyError(f'ranges {subrng} and {rng} are not related') # return joint predicate expression and next range # OneOf guarantees at least one expression, rng is always a bsc.Vertex. # mypy does not realize this, hence we ignore the warning. -- cgit v1.2.3 From ccaee71e2b6135d3b324fe551c8652940b67aab3 Mon Sep 17 00:00:00 2001 From: Matthias Baumgartner Date: Sun, 15 Jan 2023 20:57:42 +0100 Subject: Feature as Literal instead of Predicate subtype --- bsfs/schema/__init__.py | 7 +- bsfs/schema/schema.py | 4 +- bsfs/schema/serialize.py | 83 +++++++++++++----------- bsfs/schema/types.py | 162 +++++++++++++++++++++++------------------------ 4 files changed, 133 insertions(+), 123 deletions(-) (limited to 'bsfs') diff --git a/bsfs/schema/__init__.py b/bsfs/schema/__init__.py index 31d7d61..f53512e 100644 --- a/bsfs/schema/__init__.py +++ b/bsfs/schema/__init__.py @@ -10,8 +10,11 @@ import typing # inner-module imports from .schema import Schema from .serialize import from_string, to_string -from .types import Literal, Node, Predicate, Vertex, \ - ROOT_FEATURE, ROOT_LITERAL, ROOT_NODE, ROOT_NUMBER, ROOT_PREDICATE, ROOT_VERTEX +from .types import Literal, Node, Predicate, Vertex, Feature, \ + ROOT_VERTEX, ROOT_NODE, ROOT_LITERAL, \ + ROOT_NUMBER, ROOT_TIME, \ + ROOT_ARRAY, ROOT_FEATURE, \ + ROOT_PREDICATE # exports __all__: typing.Sequence[str] = ( diff --git a/bsfs/schema/schema.py b/bsfs/schema/schema.py index bc50d4e..8d9a821 100644 --- a/bsfs/schema/schema.py +++ b/bsfs/schema/schema.py @@ -70,7 +70,9 @@ class Schema(): predicates.add(types.ROOT_PREDICATE) # add minimally necessary types to the schema literals.add(types.ROOT_NUMBER) - predicates.add(types.ROOT_FEATURE) + literals.add(types.ROOT_TIME) + literals.add(types.ROOT_ARRAY) + literals.add(types.ROOT_FEATURE) # FIXME: ensure that types derive from the right root? diff --git a/bsfs/schema/serialize.py b/bsfs/schema/serialize.py index a566d65..8b31737 100644 --- a/bsfs/schema/serialize.py +++ b/bsfs/schema/serialize.py @@ -35,13 +35,27 @@ def from_string(schema_str: str) -> schema.Schema: graph.parse(data=schema_str, format='turtle') # helper functions + # FIXME: type annotation + def _fetch_value(subject: URI, predicate: rdflib.URIRef, value_factory) -> typing.Optional[typing.Any]: + """Fetch the object of a given subject and predicate. + Raises a `errors.ConsistencyError` if multiple objects match. + """ + values = list(graph.objects(rdflib.URIRef(subject), predicate)) + if len(values) == 0: + return None + if len(values) == 1: + return value_factory(values[0]) + raise errors.ConsistencyError( + f'{subject} has multiple values for predicate {str(predicate)}, expected zero or one') + def _convert(value): """Convert the subject type from rdflib to a bsfs native type.""" if isinstance(value, rdflib.Literal): return value.value if isinstance(value, rdflib.URIRef): return URI(value) - raise errors.UnreachableError(f'expected Literal or URIRef, found {typename(value)}') + # value is neither a node nor a literal, but e.g. a blank node + raise errors.BackendError(f'expected Literal or URIRef, found {typename(value)}') def _fetch_hierarchically(factory, curr): """Walk through a rdfs:subClassOf hierarchy, creating symbols along the way.""" @@ -71,30 +85,36 @@ def from_string(schema_str: str) -> schema.Schema: raise errors.ConsistencyError('inconsistent nodes') # fetch literals - literals = set(_fetch_hierarchically(types.Literal, types.ROOT_LITERAL)) + def _build_literal(uri, parent, **annotations): + """Literal factory.""" + # break out on root feature type + if uri == types.ROOT_FEATURE.uri: + return types.ROOT_FEATURE + # handle feature types + if isinstance(parent, types.Feature): + # clean annotations + annotations.pop(ns.bsfs.dimension, None) + annotations.pop(ns.bsfs.dtype, None) + annotations.pop(ns.bsfs.distance, None) + # get dimension + dimension = _fetch_value(uri, rdflib.URIRef(ns.bsfs.dimension), int) + # get dtype + dtype = _fetch_value(uri, rdflib.URIRef(ns.bsfs.dtype), URI) + # get distance + distance = _fetch_value(uri, rdflib.URIRef(ns.bsfs.distance), URI) + # return feature + return parent.child(URI(uri), dtype=dtype, dimension=dimension, distance=distance, **annotations) + # handle non-feature types + return parent.child(URI(uri), **annotations) + + literals = set(_fetch_hierarchically(_build_literal, types.ROOT_LITERAL)) literals_lut = {lit.uri: lit for lit in literals} if len(literals_lut) != len(literals): raise errors.ConsistencyError('inconsistent literals') # fetch predicates - # FIXME: type annotation - def _fetch_value(subject: URI, predicate: rdflib.URIRef, value_factory) -> typing.Optional[typing.Any]: - """Fetch the object of a given subject and predicate. - Raises a `errors.ConsistencyError` if multiple objects match. - """ - values = list(graph.objects(rdflib.URIRef(subject), predicate)) - if len(values) == 0: - return None - if len(values) == 1: - return value_factory(values[0]) - raise errors.ConsistencyError( - f'{subject} has multiple values for predicate {str(predicate)}, expected zero or one') - def _build_predicate(uri, parent, **annotations): """Predicate factory.""" - # break out on root feature type - if uri == types.ROOT_FEATURE.uri: - return types.ROOT_FEATURE # clean annotations annotations.pop(ns.rdfs.domain, None) annotations.pop(ns.rdfs.range, None) @@ -113,23 +133,9 @@ def from_string(schema_str: str) -> schema.Schema: rng = nodes_lut.get(rng, literals_lut.get(rng)) # get unique unique = _fetch_value(uri, rdflib.URIRef(ns.bsfs.unique), bool) - # handle feature types - if isinstance(parent, types.Feature): - # clean annotations - annotations.pop(ns.bsfs.dimension, None) - annotations.pop(ns.bsfs.dtype, None) - annotations.pop(ns.bsfs.distance, None) - # get dimension - dimension = _fetch_value(uri, rdflib.URIRef(ns.bsfs.dimension), int) - # get dtype - dtype = _fetch_value(uri, rdflib.URIRef(ns.bsfs.dtype), URI) - # get distance - distance = _fetch_value(uri, rdflib.URIRef(ns.bsfs.distance), URI) - # return feature - return parent.child(URI(uri), domain=dom, range=rng, unique=unique, - dtype=dtype, dimension=dimension, distance=distance, **annotations) - # handle non-feature predicate + # build predicate return parent.child(URI(uri), domain=dom, range=rng, unique=unique, **annotations) + predicates = _fetch_hierarchically(_build_predicate, types.ROOT_PREDICATE) return schema.Schema(predicates, nodes, literals) @@ -214,9 +220,12 @@ def to_string(schema_inst: schema.Schema, fmt: str = 'turtle') -> str: def _parse(node: types._Type) -> T_TRIPLE: """Emit all properties of a type.""" - if isinstance(node, types._Type): # pylint: disable=protected-access - # NOTE: all nodes are _Type - yield from _type(node) + # check arg + if not isinstance(node, types._Type): # pylint: disable=protected-access + raise TypeError(node) + # emit _Type essentials + yield from _type(node) + # emit properties of derived types if isinstance(node, types.Predicate): yield from _predicate(node) if isinstance(node, types.Feature): diff --git a/bsfs/schema/types.py b/bsfs/schema/types.py index 95dc66a..3a2e10c 100644 --- a/bsfs/schema/types.py +++ b/bsfs/schema/types.py @@ -226,10 +226,70 @@ class Node(Vertex): class Literal(Vertex): """Literal type.""" parent: typing.Optional['Literal'] - def __init__(self, uri: URI, parent: typing.Optional['Literal'] ,**kwargs): + def __init__(self, uri: URI, parent: typing.Optional['Literal'], **kwargs): super().__init__(uri, parent, **kwargs) +class Feature(Literal): + """Feature type.""" + + # Number of feature vector dimensions. + dimension: int + + # Feature vector datatype. + dtype: URI + + # Distance measure to compare feature vectors. + distance: URI + + def __init__( + self, + # Type members + uri: URI, + parent: typing.Optional[Literal], + # Feature members + dimension: int, + dtype: URI, + distance: URI, + **kwargs, + ): + super().__init__(uri, parent, **kwargs) + self.dimension = int(dimension) + self.dtype = URI(dtype) + self.distance = URI(distance) + + def __hash__(self) -> int: + return hash((super().__hash__(), self.dimension, self.dtype, self.distance)) + + def __eq__(self, other: typing.Any) -> bool: + return super().__eq__(other) \ + and self.dimension == other.dimension \ + and self.dtype == other.dtype \ + and self.distance == other.distance + + def child( + self, + uri: URI, + dimension: typing.Optional[int] = None, + dtype: typing.Optional[URI] = None, + distance: typing.Optional[URI] = None, + **kwargs, + ): + """Return a child of the current class.""" + if dimension is None: + dimension = self.dimension + if dtype is None: + dtype = self.dtype + if distance is None: + distance = self.distance + return super().child( + uri=uri, + dimension=dimension, + dtype=dtype, + distance=distance, + **kwargs, + ) + class Predicate(_Type): """Predicate base type.""" @@ -304,77 +364,6 @@ class Predicate(_Type): ) -class Feature(Predicate): - """Feature base type.""" - - # Number of feature vector dimensions. - dimension: int - - # Feature vector datatype. - dtype: URI - - # Distance measure to compare feature vectors. - distance: URI - - def __init__( - self, - # Type members - uri: URI, - parent: typing.Optional[Predicate], - # Predicate members - domain: Node, - range: Literal, # pylint: disable=redefined-builtin - unique: bool, - # Feature members - dimension: int, - dtype: URI, - distance: URI, - **kwargs, - ): - super().__init__(uri, parent, domain, range, unique, **kwargs) - self.dimension = int(dimension) - self.dtype = URI(dtype) - self.distance = URI(distance) - - def __hash__(self) -> int: - return hash((super().__hash__(), self.dimension, self.dtype, self.distance)) - - def __eq__(self, other: typing.Any) -> bool: - return super().__eq__(other) \ - and self.dimension == other.dimension \ - and self.dtype == other.dtype \ - and self.distance == other.distance - - def child( - self, - uri: URI, - domain: typing.Optional[Node] = None, - range: typing.Optional[Vertex] = None, # pylint: disable=redefined-builtin - unique: typing.Optional[bool] = None, - dimension: typing.Optional[int] = None, - dtype: typing.Optional[URI] = None, - distance: typing.Optional[URI] = None, - **kwargs, - ): - """Return a child of the current class.""" - if dimension is None: - dimension = self.dimension - if dtype is None: - dtype = self.dtype - if distance is None: - distance = self.distance - return super().child( - uri=uri, - domain=domain, - range=range, - unique=unique, - dimension=dimension, - dtype=dtype, - distance=distance, - **kwargs, - ) - - # essential vertices ROOT_VERTEX = Vertex( uri=ns.bsfs.Vertex, @@ -396,24 +385,31 @@ ROOT_NUMBER = Literal( parent=ROOT_LITERAL, ) -# essential predicates -ROOT_PREDICATE = Predicate( - uri=ns.bsfs.Predicate, - parent=None, - domain=ROOT_NODE, - range=ROOT_VERTEX, - unique=False, +ROOT_TIME = Literal( + uri=ns.bsfs.Time, + parent=ROOT_LITERAL, + ) + +ROOT_ARRAY = Literal( + uri=ns.bsfs.Array, + parent=ROOT_LITERAL, ) ROOT_FEATURE = Feature( uri=ns.bsfs.Feature, - parent=ROOT_PREDICATE, - domain=ROOT_NODE, - range=ROOT_LITERAL, - unique=False, + parent=ROOT_ARRAY, dimension=1, dtype=ns.bsfs.f16, distance=ns.bsfs.euclidean, ) +# essential predicates +ROOT_PREDICATE = Predicate( + uri=ns.bsfs.Predicate, + parent=None, + domain=ROOT_NODE, + range=ROOT_VERTEX, + unique=False, + ) + ## EOF ## -- cgit v1.2.3 From 80a97bfa9f22d0d6dd25928fe1754a3a0d1de78a Mon Sep 17 00:00:00 2001 From: Matthias Baumgartner Date: Sun, 15 Jan 2023 21:00:12 +0100 Subject: Distance filter ast node --- bsfs/graph/resolve.py | 5 +++ bsfs/query/ast/filter_.py | 59 +++++++++++++++++++++++++------- bsfs/query/validator.py | 16 +++++++++ bsfs/triple_store/sparql/distance.py | 56 ++++++++++++++++++++++++++++++ bsfs/triple_store/sparql/parse_filter.py | 41 +++++++++++++++++++++- bsfs/triple_store/sparql/sparql.py | 13 ++++++- 6 files changed, 176 insertions(+), 14 deletions(-) create mode 100644 bsfs/triple_store/sparql/distance.py (limited to 'bsfs') diff --git a/bsfs/graph/resolve.py b/bsfs/graph/resolve.py index b671204..00b778b 100644 --- a/bsfs/graph/resolve.py +++ b/bsfs/graph/resolve.py @@ -63,6 +63,8 @@ class Filter(): return self._and(type_, node) if isinstance(node, ast.filter.Or): return self._or(type_, node) + if isinstance(node, ast.filter.Distance): + return self._distance(type_, node) if isinstance(node, (ast.filter.Equals, ast.filter.Substring, \ ast.filter.StartsWith, ast.filter.EndsWith)): return self._value(type_, node) @@ -125,6 +127,9 @@ class Filter(): def _has(self, type_: bsc.Vertex, node: ast.filter.Has) -> ast.filter.Has: # pylint: disable=unused-argument return node + def _distance(self, type_: bsc.Vertex, node: ast.filter.Distance): # pylint: disable=unused-argument + return node + def _value(self, type_: bsc.Vertex, node: ast.filter._Value) -> ast.filter._Value: # pylint: disable=unused-argument return node diff --git a/bsfs/query/ast/filter_.py b/bsfs/query/ast/filter_.py index b129ded..2f0270c 100644 --- a/bsfs/query/ast/filter_.py +++ b/bsfs/query/ast/filter_.py @@ -252,8 +252,7 @@ class Has(FilterExpression): class _Value(FilterExpression): - """ - """ + """Matches some value.""" # target value. value: typing.Any @@ -277,13 +276,13 @@ class Is(_Value): class Equals(_Value): """Value matches exactly. - NOTE: Value format must correspond to literal type; can be a string, a number, or a Node + NOTE: Value must correspond to literal type. """ class Substring(_Value): """Value matches a substring - NOTE: value format must be a string + NOTE: value must be a string. """ @@ -295,9 +294,49 @@ class EndsWith(_Value): """Value ends with a given string.""" +class Distance(FilterExpression): + """Distance to a reference is (strictly) below a threshold. Assumes a Feature literal.""" + + # FIXME: + # (a) pass a node/predicate as anchor instead of a value. + # Then we don't need to materialize the reference. + # (b) pass a FilterExpression (_Bounded) instead of a threshold. + # Then, we could also query values greater than a threshold. + + # reference value. + reference: typing.Any + + # distance threshold. + threshold: float + + # closed (True) or open (False) bound. + strict: bool + + def __init__( + self, + reference: typing.Any, + threshold: float, + strict: bool = False, + ): + self.reference = reference + self.threshold = float(threshold) + self.strict = bool(strict) + + def __repr__(self) -> str: + return f'{typename(self)}({self.reference}, {self.threshold}, {self.strict})' + + def __hash__(self) -> int: + return hash((super().__hash__(), tuple(self.reference), self.threshold, self.strict)) + + def __eq__(self, other) -> bool: + return super().__eq__(other) \ + and self.reference == other.reference \ + and self.threshold == other.threshold \ + and self.strict == other.strict + + class _Bounded(FilterExpression): - """ - """ + """Value is bounded by a threshold. Assumes a Number literal.""" # bound. threshold: float @@ -327,15 +366,11 @@ class _Bounded(FilterExpression): class LessThan(_Bounded): - """Value is (strictly) smaller than threshold. - NOTE: only on numerical literals - """ + """Value is (strictly) smaller than threshold. Assumes a Number literal.""" class GreaterThan(_Bounded): - """Value is (strictly) larger than threshold - NOTE: only on numerical literals - """ + """Value is (strictly) larger than threshold. Assumes a Number literal.""" class Predicate(PredicateExpression): diff --git a/bsfs/query/validator.py b/bsfs/query/validator.py index ecea951..1b7f688 100644 --- a/bsfs/query/validator.py +++ b/bsfs/query/validator.py @@ -69,6 +69,8 @@ class Filter(): return self._not(type_, node) if isinstance(node, ast.filter.Has): return self._has(type_, node) + if isinstance(node, ast.filter.Distance): + return self._distance(type_, node) if isinstance(node, (ast.filter.Any, ast.filter.All)): return self._branch(type_, node) if isinstance(node, (ast.filter.And, ast.filter.Or)): @@ -177,6 +179,20 @@ class Filter(): # node.count is a numerical expression self._parse_filter_expression(self.schema.literal(ns.bsfs.Number), node.count) + def _distance(self, type_: bsc.Vertex, node: ast.filter.Distance): + # type is a Literal + if not isinstance(type_, bsc.Feature): + raise errors.ConsistencyError(f'expected a Feature, found {type_}') + # type exists in the schema + if type_ not in self.schema.literals(): + raise errors.ConsistencyError(f'literal {type_} is not in the schema') + # reference matches type_ + if len(node.reference) != type_.dimension: + raise errors.ConsistencyError(f'reference has dimension {len(node.reference)}, expected {type_.dimension}') + # FIXME: + #if node.reference.dtype != type_.dtype: + # raise errors.ConsistencyError(f'') + ## conditions diff --git a/bsfs/triple_store/sparql/distance.py b/bsfs/triple_store/sparql/distance.py new file mode 100644 index 0000000..2f5387a --- /dev/null +++ b/bsfs/triple_store/sparql/distance.py @@ -0,0 +1,56 @@ +""" + +Part of the BlackStar filesystem (bsfs) module. +A copy of the license is provided with the project. +Author: Matthias Baumgartner, 2022 +""" +# standard imports +import typing + +# external imports +import numpy as np + +# bsfs imports +from bsfs.namespace import ns + +# constants +EPS = 1e-9 + +# exports +__all__: typing.Sequence[str] = ( + 'DISTANCE_FU', + ) + + +## code ## + +def euclid(fst, snd) -> float: + """Euclidean distance (l2 norm).""" + fst = np.array(fst) + snd = np.array(snd) + return float(np.linalg.norm(fst - snd)) + +def cosine(fst, snd) -> float: + """Cosine distance.""" + fst = np.array(fst) + snd = np.array(snd) + if (fst == snd).all(): + return 0.0 + nrm0 = np.linalg.norm(fst) + nrm1 = np.linalg.norm(snd) + return float(1.0 - np.dot(fst, snd) / (nrm0 * nrm1 + EPS)) + +def manhatten(fst, snd) -> float: + """Manhatten (cityblock) distance (l1 norm).""" + fst = np.array(fst) + snd = np.array(snd) + return float(np.abs(fst - snd).sum()) + +# Known distance functions. +DISTANCE_FU = { + ns.bsfs.euclidean: euclid, + ns.bsfs.cosine: cosine, + ns.bsfs.manhatten: manhatten, +} + +## EOF ## diff --git a/bsfs/triple_store/sparql/parse_filter.py b/bsfs/triple_store/sparql/parse_filter.py index 5d8a2d9..8b6b976 100644 --- a/bsfs/triple_store/sparql/parse_filter.py +++ b/bsfs/triple_store/sparql/parse_filter.py @@ -5,19 +5,29 @@ A copy of the license is provided with the project. Author: Matthias Baumgartner, 2022 """ # imports +import operator import typing +# external imports +import rdflib + # bsfs imports from bsfs import schema as bsc from bsfs.namespace import ns from bsfs.query import ast from bsfs.utils import URI, errors +# inner-module imports +from .distance import DISTANCE_FU + # exports __all__: typing.Sequence[str] = ( 'Filter', ) + +## code ## + class _GenHopName(): """Generator that produces a new unique symbol name with each iteration.""" @@ -46,7 +56,8 @@ class Filter(): # Generator that produces unique symbol names. ngen: _GenHopName - def __init__(self, schema): + def __init__(self, graph, schema): + self.graph = graph self.schema = schema self.ngen = _GenHopName() @@ -84,6 +95,8 @@ class Filter(): return self._not(type_, node, head) if isinstance(node, ast.filter.Has): return self._has(type_, node, head) + if isinstance(node, ast.filter.Distance): + return self._distance(type_, node, head) if isinstance(node, ast.filter.Any): return self._any(type_, node, head) if isinstance(node, ast.filter.All): @@ -243,6 +256,32 @@ class Filter(): # combine return num_preds + ' . ' + count_bounds + def _distance(self, node_type: bsc.Vertex, node: ast.filter.Distance, head: str) -> str: + """ + """ + if not isinstance(node_type, bsc.Feature): + raise errors.BackendError(f'expected Feature, found {node_type}') + if len(node.reference) != node_type.dimension: + raise errors.ConsistencyError( + f'reference has dimension {len(node.reference)}, expected {node_type.dimension}') + # get distance metric + dist = DISTANCE_FU[node_type.distance] + # get operator + cmp = operator.lt if node.strict else operator.le + # get candidate values + candidates = { + f'"{cand}"^^<{node_type.uri}>' + for cand + in self.graph.objects() + if isinstance(cand, rdflib.Literal) + and cand.datatype == rdflib.URIRef(node_type.uri) + and cmp(dist(cand.value, node.reference), node.threshold) + } + # combine candidate values + values = ' '.join(candidates) if len(candidates) else f'"impossible value"^^<{ns.xsd.string}>' + # return sparql fragment + return f'VALUES {head} {{ {values} }}' + def _is(self, node_type: bsc.Vertex, node: ast.filter.Is, head: str) -> str: """ """ diff --git a/bsfs/triple_store/sparql/sparql.py b/bsfs/triple_store/sparql/sparql.py index 3877d1a..dfd9871 100644 --- a/bsfs/triple_store/sparql/sparql.py +++ b/bsfs/triple_store/sparql/sparql.py @@ -18,6 +18,7 @@ from bsfs.utils import errors, URI # inner-module imports from . import parse_filter from .. import base +from .distance import DISTANCE_FU # exports @@ -97,7 +98,7 @@ class SparqlStore(base.TripleStoreBase): self._transaction = _Transaction(self._graph) # NOTE: parsing bsfs.query.ast.filter.Has requires xsd:integer. self._schema = bsc.Schema(literals={bsc.ROOT_NUMBER.child(ns.xsd.integer)}) - self._filter_parser = parse_filter.Filter(self._schema) + self._filter_parser = parse_filter.Filter(self._graph, self._schema) # NOTE: mypy and pylint complain about the **kwargs not being listed (contrasting super) # However, not having it here is clearer since it's explicit that there are no arguments. @@ -123,6 +124,16 @@ class SparqlStore(base.TripleStoreBase): # check compatibility: No contradicting definitions if not self.schema.consistent_with(schema): raise errors.ConsistencyError(f'{schema} is inconsistent with {self.schema}') + # check distance functions of features + invalid = { + (cand.uri, cand.distance) + for cand + in schema.literals() + if isinstance(cand, bsc.Feature) and cand.distance not in DISTANCE_FU} + if len(invalid) > 0: + cand, dist = zip(*invalid) + raise ValueError( + f'unknown distance function {",".join(dist)} in feature {", ".join(cand)}') # commit the current transaction self.commit() -- cgit v1.2.3 From afdfc25408c3b9d2c779c83e2e193d68a973810b Mon Sep 17 00:00:00 2001 From: Matthias Baumgartner Date: Mon, 16 Jan 2023 21:38:55 +0100 Subject: namespace to string plain uri --- bsfs/namespace/namespace.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'bsfs') diff --git a/bsfs/namespace/namespace.py b/bsfs/namespace/namespace.py index f652dcd..1d443c1 100644 --- a/bsfs/namespace/namespace.py +++ b/bsfs/namespace/namespace.py @@ -59,7 +59,7 @@ class Namespace(): return hash((type(self), self.prefix, self.fsep, self.psep)) def __str__(self) -> str: - return f'{typename(self)}({self.prefix})' + return str(self.prefix) def __repr__(self) -> str: return f'{typename(self)}({self.prefix}, {self.fsep}, {self.psep})' -- cgit v1.2.3 From 76fa694911f54e293ddf517246c6c4a1e8e745fd Mon Sep 17 00:00:00 2001 From: Matthias Baumgartner Date: Mon, 16 Jan 2023 21:39:47 +0100 Subject: uuid from dict --- bsfs/utils/uuid.py | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'bsfs') diff --git a/bsfs/utils/uuid.py b/bsfs/utils/uuid.py index 6366b18..ba5cf52 100644 --- a/bsfs/utils/uuid.py +++ b/bsfs/utils/uuid.py @@ -7,6 +7,7 @@ Author: Matthias Baumgartner, 2022 # imports from collections import abc import hashlib +import json import os import platform import random @@ -105,4 +106,10 @@ class UCID(): with open(path, 'rb') as ifile: return HASH(ifile.read()).hexdigest() + + @staticmethod + def from_dict(content: dict) -> str: + """Get the content from a dict.""" + return HASH(json.dumps(content).encode('ascii', 'ignore')).hexdigest() + ## EOF ## -- cgit v1.2.3 From 3504609e1ba1f7f653fa79910474bebd3ec24d8a Mon Sep 17 00:00:00 2001 From: Matthias Baumgartner Date: Mon, 16 Jan 2023 21:41:20 +0100 Subject: various minor fixes --- bsfs/query/validator.py | 4 +--- bsfs/schema/serialize.py | 18 +++++++++++++----- bsfs/triple_store/sparql/sparql.py | 2 +- bsfs/utils/errors.py | 3 +++ 4 files changed, 18 insertions(+), 9 deletions(-) (limited to 'bsfs') diff --git a/bsfs/query/validator.py b/bsfs/query/validator.py index 1b7f688..904ac14 100644 --- a/bsfs/query/validator.py +++ b/bsfs/query/validator.py @@ -189,9 +189,7 @@ class Filter(): # reference matches type_ if len(node.reference) != type_.dimension: raise errors.ConsistencyError(f'reference has dimension {len(node.reference)}, expected {type_.dimension}') - # FIXME: - #if node.reference.dtype != type_.dtype: - # raise errors.ConsistencyError(f'') + # FIXME: test dtype ## conditions diff --git a/bsfs/schema/serialize.py b/bsfs/schema/serialize.py index 8b31737..acc009a 100644 --- a/bsfs/schema/serialize.py +++ b/bsfs/schema/serialize.py @@ -35,8 +35,11 @@ def from_string(schema_str: str) -> schema.Schema: graph.parse(data=schema_str, format='turtle') # helper functions - # FIXME: type annotation - def _fetch_value(subject: URI, predicate: rdflib.URIRef, value_factory) -> typing.Optional[typing.Any]: + def _fetch_value( + subject: URI, + predicate: rdflib.URIRef, + value_factory: typing.Callable[[typing.Any], typing.Any], + ) -> typing.Optional[typing.Any]: """Fetch the object of a given subject and predicate. Raises a `errors.ConsistencyError` if multiple objects match. """ @@ -242,9 +245,14 @@ def to_string(schema_inst: schema.Schema, fmt: str = 'turtle') -> str: for triple in _parse(node): graph.add(triple) # add known namespaces for readability - # FIXME: more systematically (e.g. for all in ns?) - graph.bind('bsfs', rdflib.URIRef('http://bsfs.ai/schema/')) - graph.bind('bse', rdflib.URIRef('http://bsfs.ai/schema/Entity#')) + # FIXME: more generically? + graph.bind('bse', rdflib.URIRef(ns.bse[''])) + graph.bind('bsfs', rdflib.URIRef(ns.bsfs[''])) + graph.bind('bsm', rdflib.URIRef(ns.bsm[''])) + graph.bind('rdf', rdflib.URIRef(ns.rdf[''])) + graph.bind('rdfs', rdflib.URIRef(ns.rdfs[''])) + graph.bind('schema', rdflib.URIRef(ns.schema[''])) + graph.bind('xsd', rdflib.URIRef(ns.xsd[''])) # serialize to turtle return graph.serialize(format=fmt) diff --git a/bsfs/triple_store/sparql/sparql.py b/bsfs/triple_store/sparql/sparql.py index dfd9871..fedd227 100644 --- a/bsfs/triple_store/sparql/sparql.py +++ b/bsfs/triple_store/sparql/sparql.py @@ -132,7 +132,7 @@ class SparqlStore(base.TripleStoreBase): if isinstance(cand, bsc.Feature) and cand.distance not in DISTANCE_FU} if len(invalid) > 0: cand, dist = zip(*invalid) - raise ValueError( + raise errors.UnsupportedError( f'unknown distance function {",".join(dist)} in feature {", ".join(cand)}') # commit the current transaction diff --git a/bsfs/utils/errors.py b/bsfs/utils/errors.py index be9d40e..6ae6484 100644 --- a/bsfs/utils/errors.py +++ b/bsfs/utils/errors.py @@ -41,4 +41,7 @@ class ConfigError(_BSFSError): class BackendError(_BSFSError): """Could not parse an AST structure.""" +class UnsupportedError(_BSFSError): + """Some requested functionality is not supported by an implementation.""" + ## EOF ## -- cgit v1.2.3 From a4789394e40aaa3152ad6009955709a6c7d277c2 Mon Sep 17 00:00:00 2001 From: Matthias Baumgartner Date: Fri, 20 Jan 2023 14:36:11 +0100 Subject: fetch AST --- bsfs/query/ast/__init__.py | 4 +- bsfs/query/ast/fetch.py | 175 +++++++++++++++++++++++++++++++++++++++++++++ bsfs/query/ast/filter_.py | 1 + 3 files changed, 179 insertions(+), 1 deletion(-) create mode 100644 bsfs/query/ast/fetch.py (limited to 'bsfs') diff --git a/bsfs/query/ast/__init__.py b/bsfs/query/ast/__init__.py index 704d051..66b097d 100644 --- a/bsfs/query/ast/__init__.py +++ b/bsfs/query/ast/__init__.py @@ -1,6 +1,6 @@ """Query AST components. -The query AST consists of a Filter syntax tree. +The query AST consists of a Filter and a Fetch syntax trees. Classes beginning with an underscore (_) represent internal type hierarchies and should not be used for parsing. Note that the AST structures do not @@ -14,10 +14,12 @@ Author: Matthias Baumgartner, 2022 import typing # inner-module imports +from . import fetch from . import filter_ as filter # pylint: disable=redefined-builtin # exports __all__: typing.Sequence[str] = ( + 'fetch', 'filter', ) diff --git a/bsfs/query/ast/fetch.py b/bsfs/query/ast/fetch.py new file mode 100644 index 0000000..5e603a1 --- /dev/null +++ b/bsfs/query/ast/fetch.py @@ -0,0 +1,175 @@ +""" + +Part of the BlackStar filesystem (bsfs) module. +A copy of the license is provided with the project. +Author: Matthias Baumgartner, 2022 +""" +# imports +from collections import abc +import typing + +# bsfs imports +from bsfs.utils import URI, typename, normalize_args + +# exports +__all__ : typing.Sequence[str] = ( + 'All', + 'Fetch', + 'FetchExpression', + 'Node', + 'This', + 'Value', + ) + + +## code ## + +class FetchExpression(abc.Hashable): + """Generic Fetch expression.""" + + def __repr__(self) -> str: + """Return the expressions's string representation.""" + return f'{typename(self)}()' + + def __hash__(self) -> int: + """Return the expression's integer representation.""" + return hash(type(self)) + + def __eq__(self, other: typing.Any) -> bool: + """Return True if *self* and *other* are equivalent.""" + return isinstance(other, type(self)) + + +class All(FetchExpression): + """Fetch all child expressions.""" + + # child expressions. + expr: typing.Set[FetchExpression] + + def __init__(self, *expr): + # unpack child expressions + unfolded = set(normalize_args(*expr)) + # check child expressions + if len(unfolded) == 0: + raise AttributeError('expected at least one expression, found none') + if not all(isinstance(itm, FetchExpression) for itm in unfolded): + raise TypeError(expr) + # initialize + super().__init__() + # assign members + self.expr = unfolded + + def __iter__(self) -> typing.Iterator[FetchExpression]: + return iter(self.expr) + + def __len__(self) -> int: + return len(self.expr) + + def __repr__(self) -> str: + return f'{typename(self)}({self.expr})' + + def __hash__(self) -> int: + # FIXME: Produces different hashes for different orders of self.expr + return hash((super().__hash__(), tuple(self.expr))) + + def __eq__(self, other: typing.Any) -> bool: + return super().__eq__(other) and self.expr == other.expr + + +class _Branch(FetchExpression): + """Branch along a predicate.""" + + # FIXME: Use a Predicate (like in ast.filter) so that we can also reverse them! + + # predicate to follow. + predicate: URI + + def __init__(self, predicate: URI): + if not isinstance(predicate, URI): + raise TypeError(predicate) + self.predicate = predicate + + def __repr__(self) -> str: + return f'{typename(self)}({self.predicate})' + + def __hash__(self) -> int: + return hash((super().__hash__(), self.predicate)) + + def __eq__(self, other: typing.Any) -> bool: + return super().__eq__(other) and self.predicate == other.predicate + + +class Fetch(_Branch): + """Follow a predicate before evaluating a child epxression.""" + + # child expression. + expr: FetchExpression + + def __init__(self, predicate: URI, expr: FetchExpression): + # check child expressions + if not isinstance(expr, FetchExpression): + raise TypeError(expr) + # initialize + super().__init__(predicate) + # assign members + self.expr = expr + + def __repr__(self) -> str: + return f'{typename(self)}({self.predicate}, {self.expr})' + + def __hash__(self) -> int: + return hash((super().__hash__(), self.expr)) + + def __eq__(self, other: typing.Any) -> bool: + return super().__eq__(other) and self.expr == other.expr + + +class _Named(_Branch): + """Fetch a (named) symbol at a predicate.""" + + # symbol name. + name: str + + def __init__(self, predicate: URI, name: str): + super().__init__(predicate) + self.name = str(name) + + def __repr__(self) -> str: + return f'{typename(self)}({self.predicate}, {self.name})' + + def __hash__(self) -> int: + return hash((super().__hash__(), self.name)) + + def __eq__(self, other: typing.Any) -> bool: + return super().__eq__(other) and self.name == other.name + + +class Node(_Named): # pylint: disable=too-few-public-methods + """Fetch a Node at a predicate.""" + # FIXME: Is this actually needed? + + +class Value(_Named): # pylint: disable=too-few-public-methods + """Fetch a Literal at a predicate.""" + + +class This(FetchExpression): + """Fetch the current Node.""" + + # symbol name. + name: str + + def __init__(self, name: str): + super().__init__() + self.name = str(name) + + def __repr__(self) -> str: + return f'{typename(self)}({self.name})' + + def __hash__(self) -> int: + return hash((super().__hash__(), self.name)) + + def __eq__(self, other: typing.Any) -> bool: + return super().__eq__(other) and self.name == other.name + +## EOF ## diff --git a/bsfs/query/ast/filter_.py b/bsfs/query/ast/filter_.py index 2f0270c..81b0de2 100644 --- a/bsfs/query/ast/filter_.py +++ b/bsfs/query/ast/filter_.py @@ -153,6 +153,7 @@ class _Agg(FilterExpression, abc.Collection): # check type if not all(isinstance(e, FilterExpression) for e in unfolded): raise TypeError(expr) + # FIXME: Require at least one child expression? # assign member self.expr = unfolded -- cgit v1.2.3 From e2f08efc0d8a3c875994bdb69623c30cce5079d9 Mon Sep 17 00:00:00 2001 From: Matthias Baumgartner Date: Fri, 20 Jan 2023 18:01:17 +0100 Subject: fetch AST validation --- bsfs/query/validator.py | 123 +++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 122 insertions(+), 1 deletion(-) (limited to 'bsfs') diff --git a/bsfs/query/validator.py b/bsfs/query/validator.py index 904ac14..9fbff12 100644 --- a/bsfs/query/validator.py +++ b/bsfs/query/validator.py @@ -49,7 +49,7 @@ class Filter(): """ # root_type must be a schema.Node if not isinstance(root_type, bsc.Node): - raise TypeError(f'Expected a node, found {typename(root_type)}') + raise TypeError(f'expected a node, found {typename(root_type)}') # root_type must exist in the schema if root_type not in self.schema.nodes(): raise errors.ConsistencyError(f'{root_type} is not defined in the schema') @@ -223,4 +223,125 @@ class Filter(): # FIXME: Check if node.value corresponds to type_ +class Fetch(): + """Validate a `bsfs.query.ast.fetch` query's structure and schema compliance. + + * Value can only be applied on literals + * Node can only be applied on nodes + * Names must be non-empty + * Branching nodes' predicates must match the type + * Symbols must be in the schema + * Predicates must follow the schema + + """ + + # schema to validate against. + schema: bsc.Schema + + def __init__(self, schema: bsc.Schema): + self.schema = schema + + def __call__(self, root_type: bsc.Node, query: ast.fetch.FetchExpression): + """Validate a fetch *query*, assuming the subject having *root_type*. + + Raises a `bsfs.utils.errors.ConsistencyError` if the query violates the schema. + Raises a `bsfs.utils.errors.BackendError` if the query structure is invalid. + + """ + # root_type must be a schema.Node + if not isinstance(root_type, bsc.Node): + raise TypeError(f'expected a node, found {typename(root_type)}') + # root_type must exist in the schema + if root_type not in self.schema.nodes(): + raise errors.ConsistencyError(f'{root_type} is not defined in the schema') + # query must be a FetchExpression + if not isinstance(query, ast.fetch.FetchExpression): + raise TypeError(f'expected a fetch expression, found {typename(query)}') + # check root expression + self._parse_fetch_expression(root_type, query) + # all tests passed + return True + + def _parse_fetch_expression(self, type_: bsc.Vertex, node: ast.fetch.FetchExpression): + """Route *node* to the handler of the respective FetchExpression subclass.""" + if isinstance(node, (ast.fetch.Fetch, ast.fetch.Value, ast.fetch.Node)): + # NOTE: don't return so that checks below are executed + self._branch(type_, node) + if isinstance(node, (ast.fetch.Value, ast.fetch.Node)): + # NOTE: don't return so that checks below are executed + self._named(type_, node) + if isinstance(node, ast.fetch.All): + return self._all(type_, node) + if isinstance(node, ast.fetch.Fetch): + return self._fetch(type_, node) + if isinstance(node, ast.fetch.Value): + return self._value(type_, node) + if isinstance(node, ast.fetch.Node): + return self._node(type_, node) + if isinstance(node, ast.fetch.This): + return self._this(type_, node) + # invalid node + raise errors.BackendError(f'expected fetch expression, found {node}') + + def _all(self, type_: bsc.Vertex, node: ast.fetch.All): + # check child expressions + for expr in node: + self._parse_fetch_expression(type_, expr) + + def _branch(self, type_: bsc.Vertex, node: ast.fetch._Branch): + # type is a node + if not isinstance(type_, bsc.Node): + raise errors.ConsistencyError(f'expected a Node, found {type_}') + # node exists in the schema + if type_ not in self.schema.nodes(): + raise errors.ConsistencyError(f'node {type_} is not in the schema') + # predicate exists in the schema + if not self.schema.has_predicate(node.predicate): + raise errors.ConsistencyError(f'predicate {node.predicate} is not in the schema') + pred = self.schema.predicate(node.predicate) + # type_ must be a subclass of domain + if not type_ <= pred.domain: + raise errors.ConsistencyError( + f'expected type {pred.domain} or subtype thereof, found {type_}') + + def _fetch(self, type_: bsc.Vertex, node: ast.fetch.Fetch): # pylint: disable=unused-argument # type_ was considered in _branch + # range must be a node + rng = self.schema.predicate(node.predicate).range + if not isinstance(rng, bsc.Node): + raise errors.ConsistencyError( + f'expected the predicate\'s range to be a Node, found {rng}') + # child expression must be valid + self._parse_fetch_expression(rng, node.expr) + + def _named(self, type_: bsc.Vertex, node: ast.fetch._Named): # pylint: disable=unused-argument # type_ was considered in _branch + # name must be set + if node.name.strip() == '': + raise errors.BackendError('node name cannot be empty') + # FIXME: check for double name use? + + def _node(self, type_: bsc.Vertex, node: ast.fetch.Node): # pylint: disable=unused-argument # type_ was considered in _branch + # range must be a node + rng = self.schema.predicate(node.predicate).range + if not isinstance(rng, bsc.Node): + raise errors.ConsistencyError( + f'expected the predicate\'s range to be a Node, found {rng}') + + def _value(self, type_: bsc.Vertex, node: ast.fetch.Value): # pylint: disable=unused-argument # type_ was considered in _branch + # range must be a literal + rng = self.schema.predicate(node.predicate).range + if not isinstance(rng, bsc.Literal): + raise errors.ConsistencyError( + f'expected the predicate\'s range to be a Literal, found {rng}') + + def _this(self, type_: bsc.Vertex, node: ast.fetch.This): + # type is a node + if not isinstance(type_, bsc.Node): + raise errors.ConsistencyError(f'expected a Node, found {type_}') + # node exists in the schema + if type_ not in self.schema.nodes(): + raise errors.ConsistencyError(f'node {type_} is not in the schema') + # name must be set + if node.name.strip() == '': + raise errors.BackendError('node name cannot be empty') + ## EOF ## -- cgit v1.2.3 From 965f4dfe41afd552ed6477c75e1286c14e3580f6 Mon Sep 17 00:00:00 2001 From: Matthias Baumgartner Date: Sat, 21 Jan 2023 16:31:08 +0100 Subject: Fetch in triple stores: * fetch interface * sparql fetch ast parser * sparql fetch implementation --- bsfs/triple_store/base.py | 33 +++++--- bsfs/triple_store/sparql/parse_fetch.py | 109 ++++++++++++++++++++++++ bsfs/triple_store/sparql/parse_filter.py | 45 ++++------ bsfs/triple_store/sparql/sparql.py | 50 +++++++++-- bsfs/triple_store/sparql/utils.py | 141 +++++++++++++++++++++++++++++++ 5 files changed, 332 insertions(+), 46 deletions(-) create mode 100644 bsfs/triple_store/sparql/parse_fetch.py create mode 100644 bsfs/triple_store/sparql/utils.py (limited to 'bsfs') diff --git a/bsfs/triple_store/base.py b/bsfs/triple_store/base.py index 7e03714..1baa63b 100644 --- a/bsfs/triple_store/base.py +++ b/bsfs/triple_store/base.py @@ -11,7 +11,7 @@ import typing # inner-module imports from bsfs.query import ast from bsfs.utils import URI, typename -import bsfs.schema as _schema +import bsfs.schema as bsc # exports __all__: typing.Sequence[str] = ( @@ -82,12 +82,12 @@ class TripleStoreBase(abc.ABC): @property @abc.abstractmethod - def schema(self) -> _schema.Schema: + def schema(self) -> bsc.Schema: """Return the store's local schema.""" @schema.setter @abc.abstractmethod - def schema(self, schema: _schema.Schema): + def schema(self, schema: bsc.Schema): """Migrate to new schema by adding or removing class definitions. Commits before and after the migration. @@ -112,17 +112,28 @@ class TripleStoreBase(abc.ABC): @abc.abstractmethod def get( self, - node_type: _schema.Node, - query: typing.Optional[ast.filter.FilterExpression] = None, + node_type: bsc.Node, + filter: typing.Optional[ast.filter.FilterExpression] = None, # pylint: disable=redefined-builtin ) -> typing.Iterator[URI]: - """Return guids of nodes of type *node_type* that match the *query*. - Return all guids of the respective type if *query* is None. + """Return guids of nodes of type *node_type* that match the *filter*. + Return all guids of the respective type if *filter* is None. + """ + + @abc.abstractmethod + def fetch( + self, + node_type: bsc.Node, + filter: ast.filter.FilterExpression, # pylint: disable=redefined-builtin + fetch: ast.fetch.FetchExpression, + ) -> typing.Iterator[typing.Tuple[URI, str, typing.Any]]: + """Return (guid, name, value) triples where the guid is determined by the *filter* + query and the name matches the *fetch* query. """ @abc.abstractmethod def exists( self, - node_type: _schema.Node, + node_type: bsc.Node, guids: typing.Iterable[URI], ) -> typing.Iterable[URI]: """Return those *guids* that exist and have type *node_type* or a subclass thereof.""" @@ -130,7 +141,7 @@ class TripleStoreBase(abc.ABC): @abc.abstractmethod def create( self, - node_type: _schema.Node, + node_type: bsc.Node, guids: typing.Iterable[URI], ): """Create *guid* nodes with type *subject*.""" @@ -138,9 +149,9 @@ class TripleStoreBase(abc.ABC): @abc.abstractmethod def set( self, - node_type: _schema.Node, # FIXME: is the node_type even needed? Couldn't I infer from the predicate? + node_type: bsc.Node, # FIXME: is the node_type even needed? Couldn't I infer from the predicate? guids: typing.Iterable[URI], - predicate: _schema.Predicate, + predicate: bsc.Predicate, values: typing.Iterable[typing.Any], ): """Add triples to the graph. diff --git a/bsfs/triple_store/sparql/parse_fetch.py b/bsfs/triple_store/sparql/parse_fetch.py new file mode 100644 index 0000000..20d4e74 --- /dev/null +++ b/bsfs/triple_store/sparql/parse_fetch.py @@ -0,0 +1,109 @@ +""" + +Part of the BlackStar filesystem (bsfs) module. +A copy of the license is provided with the project. +Author: Matthias Baumgartner, 2022 +""" +# standard imports +import typing + +# bsfs imports +from bsfs import schema as bsc +from bsfs.query import ast +from bsfs.utils import errors + +# inner-module imports +from .utils import GenHopName, Query + +# exports +__all__: typing.Sequence[str] = ( + 'Fetch', + ) + + +## code ## + +class Fetch(): + """Translate `bsfs.query.ast.fetch` structures into Sparql queries.""" + + def __init__(self, schema): + self.schema = schema + self.ngen = GenHopName(prefix='?fch') + + def __call__( + self, + root_type: bsc.Node, + root: ast.fetch.FetchExpression, + ) -> Query: + """ + """ + # check root_type + if not isinstance(root_type, bsc.Node): + raise errors.BackendError(f'expected Node, found {root_type}') + if root_type not in self.schema.nodes(): + raise errors.ConsistencyError(f'node {root_type} is not in the schema') + # parse root + terms, expr = self._parse_fetch_expression(root_type, root, '?ent') + # assemble query + return Query( + root_type=root_type.uri, + root_head='?ent', + select=terms, + where=expr, + ) + + def _parse_fetch_expression( + self, + node_type: bsc.Vertex, + node: ast.fetch.FetchExpression, + head: str, + ): + """Route *node* to the handler of the respective FetchExpression subclass.""" + if isinstance(node, ast.fetch.All): + return self._all(node_type, node, head) + if isinstance(node, ast.fetch.Fetch): + return self._fetch(node_type, node, head) + if isinstance(node, ast.fetch.Node): + return self._node(node_type, node, head) + if isinstance(node, ast.fetch.Value): + return self._value(node_type, node, head) + if isinstance(node, ast.fetch.This): + return self._this(node_type, node, head) + # invalid node + raise errors.BackendError(f'expected fetch expression, found {node}') + + def _all(self, node_type: bsc.Vertex, node: ast.fetch.All, head: str): + # child expressions + terms, exprs = zip(*[self._parse_fetch_expression(node_type, expr, head) for expr in node]) + terms = {term for sub in terms for term in sub} + exprs = ' .\n'.join({expr for expr in exprs if len(expr.strip()) > 0}) + return terms, exprs + + def _fetch(self, node_type: bsc.Vertex, node: ast.fetch.Fetch, head: str): # pylint: disable=unused-argument # (node_type) + # child expressions + rng = self.schema.predicate(node.predicate).range + nexthead = next(self.ngen) + terms, expr = self._parse_fetch_expression(rng, node.expr, nexthead) + return terms, f'OPTIONAL{{ {head} <{node.predicate}> {nexthead} .\n {expr} }}' + + def _node(self, node_type: bsc.Vertex, node: ast.fetch.Node, head: str): # pylint: disable=unused-argument # (node_type) + if f'?{node.name}'.startswith(self.ngen.prefix): + raise errors.BackendError(f'Node name must start with {self.ngen.prefix}') + # compose and return statement + term = next(self.ngen) + return {(term, node.name)}, f'OPTIONAL{{ {head} <{node.predicate}> {term} }}' + + def _value(self, node_type: bsc.Vertex, node: ast.fetch.Value, head: str): # pylint: disable=unused-argument # (node_type) + if f'?{node.name}'.startswith(self.ngen.prefix): + raise errors.BackendError(f'Value name must start with {self.ngen.prefix}') + # compose and return statement + term = next(self.ngen) + return {(term, node.name)}, f'OPTIONAL{{ {head} <{node.predicate}> {term} }}' + + def _this(self, node_type: bsc.Vertex, node: ast.fetch.This, head: str): # pylint: disable=unused-argument # (node_type) + if f'?{node.name}'.startswith(self.ngen.prefix): + raise errors.BackendError(f'This name must start with {self.ngen.prefix}') + # compose and return statement + return {(head, node.name)}, '' + +## EOF ## diff --git a/bsfs/triple_store/sparql/parse_filter.py b/bsfs/triple_store/sparql/parse_filter.py index 8b6b976..dca0aea 100644 --- a/bsfs/triple_store/sparql/parse_filter.py +++ b/bsfs/triple_store/sparql/parse_filter.py @@ -19,6 +19,7 @@ from bsfs.utils import URI, errors # inner-module imports from .distance import DISTANCE_FU +from .utils import GenHopName, Query # exports __all__: typing.Sequence[str] = ( @@ -28,25 +29,6 @@ __all__: typing.Sequence[str] = ( ## code ## -class _GenHopName(): - """Generator that produces a new unique symbol name with each iteration.""" - - # Symbol name prefix. - prefix: str - - # Current counter. - curr: int - - def __init__(self, prefix: str = '?hop', start: int = 0): - self.prefix = prefix - self.curr = start - 1 - - def __next__(self): - """Generate and return the next unique name.""" - self.curr += 1 - return self.prefix + str(self.curr) - - class Filter(): """Translate `bsfs.query.ast.filter` structures into Sparql queries.""" @@ -54,18 +36,18 @@ class Filter(): schema: bsc.Schema # Generator that produces unique symbol names. - ngen: _GenHopName + ngen: GenHopName def __init__(self, graph, schema): self.graph = graph self.schema = schema - self.ngen = _GenHopName() + self.ngen = GenHopName(prefix='?flt') def __call__( self, root_type: bsc.Node, root: typing.Optional[ast.filter.FilterExpression] = None, - ) -> str: + ) -> Query: """ """ # check root_type @@ -79,15 +61,18 @@ class Filter(): else: cond = self._parse_filter_expression(root_type, root, '?ent') # assemble query - return f''' - SELECT ?ent - WHERE {{ - ?ent <{ns.rdf.type}>/<{ns.rdfs.subClassOf}>* <{root_type.uri}> . - {cond} - }} - ''' + return Query( + root_type=root_type.uri, + root_head='?ent', + where=cond, + ) - def _parse_filter_expression(self, type_: bsc.Vertex, node: ast.filter.FilterExpression, head: str) -> str: + def _parse_filter_expression( + self, + type_: bsc.Vertex, + node: ast.filter.FilterExpression, + head: str, + ) -> str: """Route *node* to the handler of the respective FilterExpression subclass.""" if isinstance(node, ast.filter.Is): return self._is(type_, node, head) diff --git a/bsfs/triple_store/sparql/sparql.py b/bsfs/triple_store/sparql/sparql.py index fedd227..a0dd12e 100644 --- a/bsfs/triple_store/sparql/sparql.py +++ b/bsfs/triple_store/sparql/sparql.py @@ -16,6 +16,7 @@ from bsfs.query import ast from bsfs.utils import errors, URI # inner-module imports +from . import parse_fetch from . import parse_filter from .. import base from .distance import DISTANCE_FU @@ -92,13 +93,16 @@ class SparqlStore(base.TripleStoreBase): # Filter parser _filter_parser: parse_filter.Filter + # Fetch parser + _fetch_parser: parse_fetch.Fetch + def __init__(self): super().__init__(None) self._graph = rdflib.Graph() self._transaction = _Transaction(self._graph) - # NOTE: parsing bsfs.query.ast.filter.Has requires xsd:integer. self._schema = bsc.Schema(literals={bsc.ROOT_NUMBER.child(ns.xsd.integer)}) self._filter_parser = parse_filter.Filter(self._graph, self._schema) + self._fetch_parser = parse_fetch.Fetch(self._schema) # NOTE: mypy and pylint complain about the **kwargs not being listed (contrasting super) # However, not having it here is clearer since it's explicit that there are no arguments. @@ -197,17 +201,53 @@ class SparqlStore(base.TripleStoreBase): # migrate schema self._schema = schema self._filter_parser.schema = schema + self._fetch_parser.schema = schema + + def fetch( + self, + node_type: bsc.Node, + filter: ast.filter.FilterExpression, # pylint: disable=redefined-builtin + fetch: ast.fetch.FetchExpression, + ) -> typing.Iterator[typing.Tuple[URI, str, typing.Any]]: + if node_type not in self.schema.nodes(): + raise errors.ConsistencyError(f'{node_type} is not defined in the schema') + if not isinstance(filter, ast.filter.FilterExpression): + raise TypeError(filter) + if not isinstance(fetch, ast.fetch.FetchExpression): + raise TypeError(fetch) + # compose a query from fetch and filter ast + query = self._filter_parser(node_type, filter) + query += self._fetch_parser(node_type, fetch) + # run query + emitted = set() + for result in query(self._graph): + guid = URI(result[0]) + for name, raw in zip(query.names, result[1:]): + if raw is None: # undefined value + continue + if isinstance(raw, rdflib.Literal): + value = raw.value + else: + value = URI(raw) + # emit triple + triple = (guid, name, value) + if triple not in emitted: # FIXME: needs a better solution! + emitted.add(triple) + yield guid, name, value def get( self, node_type: bsc.Node, - query: typing.Optional[ast.filter.FilterExpression] = None, + filter: typing.Optional[ast.filter.FilterExpression] = None, # pylint: disable=redefined-builtin ) -> typing.Iterator[URI]: if node_type not in self.schema.nodes(): raise errors.ConsistencyError(f'{node_type} is not defined in the schema') - if not isinstance(query, ast.filter.FilterExpression): - raise TypeError(query) - for guid, in self._graph.query(self._filter_parser(node_type, query)): + if not isinstance(filter, ast.filter.FilterExpression): + raise TypeError(filter) + # compose query + query = self._filter_parser(node_type, filter) + # run query + for guid, in query(self._graph): yield URI(guid) def _has_type(self, subject: URI, node_type: bsc.Node) -> bool: diff --git a/bsfs/triple_store/sparql/utils.py b/bsfs/triple_store/sparql/utils.py new file mode 100644 index 0000000..deca4d8 --- /dev/null +++ b/bsfs/triple_store/sparql/utils.py @@ -0,0 +1,141 @@ +""" + +Part of the BlackStar filesystem (bsfs) module. +A copy of the license is provided with the project. +Author: Matthias Baumgartner, 2022 +""" +# standard imports +import typing + +# external imports +import rdflib + +# bsfs imports +from bsfs.namespace import ns +from bsfs.utils import typename + +# exports +__all__: typing.Sequence[str] = ( + 'GenHopName', + 'Query', + ) + + +## code ## + +class GenHopName(): + """Generator that produces a new unique symbol name with each iteration.""" + + # Symbol name prefix. + prefix: str + + # Current counter. + curr: int + + def __init__(self, prefix: str = '?hop', start: int = 0): + self.prefix = prefix + self.curr = start - 1 + + def __next__(self): + """Generate and return the next unique name.""" + self.curr += 1 + return self.prefix + str(self.curr) + + +class Query(): + """Hold, manage, and complete partial Sparql queries.""" + + # root node type URI. + root_type: str + + # root node variable name. + root_head: str + + # (head, name) tuples (w/o root) + select: typing.Tuple[typing.Tuple[str, str], ...] + + # where statements. + where: str + + def __init__( + self, + root_type: str, + root_head: str = '?ent', + select: typing.Optional[typing.Iterable[typing.Tuple[str, str]]] = None, + where: typing.Optional[str] = None, + ): + # check arguments + if select is None: + select = [] + if where is None: + where = '' + # set members + self.root_type = root_type + self.root_head = root_head + self.select = tuple(select) # tuple ensures presistent order + self.where = where.strip() + + def __str__(self) -> str: + return self.query + + def __repr__(self) -> str: + return f'{typename(self)}({self.root_type}, {self.root_head}, {self.select}, {self.where})' + + def __eq__(self, other: typing.Any) -> bool: + return isinstance(other, type(self)) \ + and self.root_type == other.root_type \ + and self.root_head == other.root_head \ + and self.select == other.select \ + and self.where == other.where + + def __hash__(self) -> int: + return hash((type(self), self.root_type, self.root_head, self.select, self.where)) + + def __add__(self, other: typing.Any) -> 'Query': + # check other's type + if not isinstance(other, type(self)): + return NotImplemented + # check query compatibility + if not self.root_type == other.root_type: + raise ValueError(other) + if not self.root_head == other.root_head: + raise ValueError(other) + # combine selections + select = self.select + other.select + # combine conditions + conds = [] + if self.where != '': + conds.append(self.where) + if other.where != '': + conds.append(other.where) + where = ' . '.join(conds) + # return new query + return Query( + root_type=self.root_type, + root_head=self.root_head, + select=select, + where=where, + ) + + @property + def names(self) -> typing.Tuple[str, ...]: + """Return a tuple of selected variable names, excluding the root.""" + return tuple(name for _, name in self.select) + + @property + def query(self) -> str: + """Return an executable sparql query.""" + select = ' '.join(f'({head} as ?{name})' for head, name in self.select) + return f''' + SELECT {self.root_head} {select} + WHERE {{ + {self.root_head} <{ns.rdf.type}>/<{ns.rdfs.subClassOf}>* <{self.root_type}> . + {self.where} + }} + ''' + + def __call__(self, graph: rdflib.Graph) -> rdflib.query.Result: + """Execute the query on a *graph* and return the query result.""" + return graph.query(self.query) + +## EOF ## -- cgit v1.2.3 From c196d2ce73d8351a18c19bcddd4b06d224e644fc Mon Sep 17 00:00:00 2001 From: Matthias Baumgartner Date: Sat, 21 Jan 2023 18:27:22 +0100 Subject: Fetch in graph including results view --- bsfs/graph/ac/base.py | 4 ++ bsfs/graph/ac/null.py | 4 ++ bsfs/graph/nodes.py | 137 ++++++++++++++++++++++++++++++++++++++++++++++---- bsfs/graph/result.py | 112 +++++++++++++++++++++++++++++++++++++++++ 4 files changed, 248 insertions(+), 9 deletions(-) create mode 100644 bsfs/graph/result.py (limited to 'bsfs') diff --git a/bsfs/graph/ac/base.py b/bsfs/graph/ac/base.py index 0703e2e..79b09e5 100644 --- a/bsfs/graph/ac/base.py +++ b/bsfs/graph/ac/base.py @@ -72,4 +72,8 @@ class AccessControlBase(abc.ABC): def filter_read(self, node_type: schema.Node, query: ast.filter.FilterExpression) -> ast.filter.FilterExpression: """Re-write a filter *query* to get (i.e., read) *node_type* nodes.""" + @abc.abstractmethod + def fetch_read(self, node_type: schema.Node, query: ast.fetch.FetchExpression) -> ast.fetch.FetchExpression: + """Re-write a fetch *query* to get (i.e, read) values for *node_type* nodes.""" + ## EOF ## diff --git a/bsfs/graph/ac/null.py b/bsfs/graph/ac/null.py index 12b4e87..6a923a5 100644 --- a/bsfs/graph/ac/null.py +++ b/bsfs/graph/ac/null.py @@ -54,4 +54,8 @@ class NullAC(base.AccessControlBase): """Re-write a filter *query* to get (i.e., read) *node_type* nodes.""" return query + def fetch_read(self, node_type: schema.Node, query: ast.fetch.FetchExpression) -> ast.fetch.FetchExpression: + """Re-write a fetch *query* to get (i.e, read) values for *node_type* nodes.""" + return query + ## EOF ## diff --git a/bsfs/graph/nodes.py b/bsfs/graph/nodes.py index 5a93f77..a4ba45f 100644 --- a/bsfs/graph/nodes.py +++ b/bsfs/graph/nodes.py @@ -5,17 +5,20 @@ A copy of the license is provided with the project. Author: Matthias Baumgartner, 2022 """ # imports +from collections import abc import time import typing # bsfs imports -from bsfs import schema as _schema +from bsfs import schema as bsc from bsfs.namespace import ns +from bsfs.query import ast, validate from bsfs.triple_store import TripleStoreBase from bsfs.utils import errors, URI, typename # inner-module imports from . import ac +from . import result # exports __all__: typing.Sequence[str] = ( @@ -37,7 +40,7 @@ class Nodes(): _user: URI # node type. - _node_type: _schema.Node + _node_type: bsc.Node # guids of nodes. Can be empty. _guids: typing.Set[URI] @@ -46,13 +49,16 @@ class Nodes(): self, backend: TripleStoreBase, user: URI, - node_type: _schema.Node, + node_type: bsc.Node, guids: typing.Iterable[URI], ): + # set main members self._backend = backend self._user = user self._node_type = node_type self._guids = set(guids) + # create helper instances + # FIXME: Assumes that the schema does not change while the instance is in use! self._ac = ac.NullAC(self._backend, self._user) def __eq__(self, other: typing.Any) -> bool: @@ -72,7 +78,7 @@ class Nodes(): return f'{typename(self)}({self._node_type}, {self._guids})' @property - def node_type(self) -> _schema.Node: + def node_type(self) -> bsc.Node: """Return the node's type.""" return self._node_type @@ -83,7 +89,7 @@ class Nodes(): def set( self, - pred: URI, # FIXME: URI or _schema.Predicate? + pred: URI, # FIXME: URI or bsc.Predicate? value: typing.Any, ) -> 'Nodes': """Set predicate *pred* to *value*.""" @@ -91,7 +97,7 @@ class Nodes(): def set_from_iterable( self, - predicate_values: typing.Iterable[typing.Tuple[URI, typing.Any]], # FIXME: URI or _schema.Predicate? + predicate_values: typing.Iterable[typing.Tuple[URI, typing.Any]], # FIXME: URI or bsc.Predicate? ) -> 'Nodes': """Set mutliple predicate-value pairs at once.""" # TODO: Could group predicate_values by predicate to gain some efficiency @@ -120,6 +126,119 @@ class Nodes(): return self + def get( + self, + *paths: typing.Union[URI, typing.Iterable[URI]], + view: typing.Union[typing.Type[list], typing.Type[dict]] = dict, + **view_kwargs, + ) -> typing.Any: + """Get values or nodes at *paths*. + Return an iterator (view=list) or a dict (view=dict) over the results. + """ + # check args + if len(paths) == 0: + raise AttributeError('expected at least one path, found none') + if view not in (dict, list): + raise ValueError(f'expected dict or list, found {view}') + # process paths: create fetch ast, build name mapping, and find unique paths + schema = self._backend.schema + statements = set() + name2path = {} + unique_paths = set() # paths that result in a single (unique) value + normpath: typing.Tuple[URI, ...] + for idx, path in enumerate(paths): + # normalize path + if isinstance(path, str): + normpath = (URI(path), ) + elif isinstance(path, abc.Iterable): + if not all(isinstance(step, str) for step in path): + raise TypeError(path) + normpath = tuple(URI(step) for step in path) + else: + raise TypeError(path) + # check path's schema consistency + if not all(schema.has_predicate(pred) for pred in normpath): + raise errors.ConsistencyError(f'path is not fully covered by the schema: {path}') + # check path's uniqueness + if all(schema.predicate(pred).unique for pred in normpath): + unique_paths.add(path) + # fetch tail predicate + tail = schema.predicate(normpath[-1]) + # determine tail ast node type + factory = ast.fetch.Node if isinstance(tail.range, bsc.Node) else ast.fetch.Value + # assign name + name = f'fetch{idx}' + name2path[name] = (path, tail) + # create tail ast node + curr: ast.fetch.FetchExpression = factory(tail.uri, name) + # walk towards front + hop: URI + for hop in normpath[-2::-1]: + curr = ast.fetch.Fetch(hop, curr) + # add to fetch query + statements.add(curr) + # aggregate fetch statements + if len(statements) == 1: + fetch = next(iter(statements)) + else: + fetch = ast.fetch.All(*statements) + # add access controls to fetch + fetch = self._ac.fetch_read(self.node_type, fetch) + + # compose filter ast + filter = ast.filter.IsIn(self.guids) # pylint: disable=redefined-builtin + # add access controls to filter + filter = self._ac.filter_read(self.node_type, filter) + + # validate queries + validate.Filter(self._backend.schema)(self.node_type, filter) + validate.Fetch(self._backend.schema)(self.node_type, fetch) + + # process results, convert if need be + def triple_iter(): + # query the backend + triples = self._backend.fetch(self.node_type, filter, fetch) + # process triples + for root, name, raw in triples: + # get node + node = Nodes(self._backend, self._user, self.node_type, {root}) + # get path + path, tail = name2path[name] + # covert raw to value + if isinstance(tail.range, bsc.Node): + value = Nodes(self._backend, self._user, tail.range, {raw}) + else: + value = raw + # emit triple + yield node, path, value + + # simplify by default + view_kwargs['node'] = view_kwargs.get('node', len(self._guids) == 1) + view_kwargs['path'] = view_kwargs.get('path', len(paths) == 1) + view_kwargs['value'] = view_kwargs.get('value', True) + + # return results view + if view == list: + return result.to_list_view( + triple_iter(), + # aggregation args + **view_kwargs, + ) + + if view == dict: + return result.to_dict_view( + triple_iter(), + # context + len(self._guids) == 1, + len(paths) == 1, + unique_paths, + # aggregation args + **view_kwargs, + ) + + raise errors.UnreachableError() # view was already checked + + def __set(self, predicate: URI, value: typing.Any): """ """ @@ -145,7 +264,7 @@ class Nodes(): guids = set(self._ensure_nodes(node_type, guids)) # check value - if isinstance(pred.range, _schema.Literal): + if isinstance(pred.range, bsc.Literal): # check write permissions on existing nodes # As long as the user has write permissions, we don't restrict # the creation or modification of literal values. @@ -160,7 +279,7 @@ class Nodes(): [value], ) - elif isinstance(pred.range, _schema.Node): + elif isinstance(pred.range, bsc.Node): # check value type if not isinstance(value, Nodes): raise TypeError(value) @@ -192,7 +311,7 @@ class Nodes(): else: raise errors.UnreachableError() - def _ensure_nodes(self, node_type: _schema.Node, guids: typing.Iterable[URI]): + def _ensure_nodes(self, node_type: bsc.Node, guids: typing.Iterable[URI]): """ """ # check node existence diff --git a/bsfs/graph/result.py b/bsfs/graph/result.py new file mode 100644 index 0000000..3009801 --- /dev/null +++ b/bsfs/graph/result.py @@ -0,0 +1,112 @@ +""" + +Part of the BlackStar filesystem (bsfs) module. +A copy of the license is provided with the project. +Author: Matthias Baumgartner, 2022 +""" +# imports +from collections import defaultdict +import typing + +# bsfs imports +from bsfs.utils import URI + +# exports +__all__: typing.Sequence[str] = ( + 'to_list_view', + 'to_dict_view', + ) + + +## code ## + +def to_list_view( + triples, + # aggregators + node: bool, + path: bool, + value: bool, # pylint: disable=unused-argument + ): + """Return an iterator over results. + + Dependent on the *node*, *path*, and *value* flags, + the respective component is omitted. + + """ + if node and path: + return iter(val for _, _, val in triples) + if node: + return iter((pred, val) for _, pred, val in triples) + if path: + return iter((subj, val) for subj, _, val in triples) + return iter((subj, pred, val) for subj, pred, val in triples) + + +def to_dict_view( + triples, + # context + one_node: bool, + one_path: bool, + unique_paths: typing.Set[typing.Union[URI, typing.Iterable[URI]]], + # aggregators + node: bool, + path: bool, + value: bool, + ) -> typing.Any: + """Return a dict of results. + + Note that triples are materialized to create this view. + + The returned structure depends on the *node*, *path*, and *value* flags. + If all flags are set to False, returns a dict(node -> dict(path -> set(values))). + Setting a flag to true omits or simplifies the respective component (if possible). + + """ + # NOTE: To create a dict, we need to materialize or make further assumptions + # (e.g., sorted in a specific order). + + data: typing.Any # disable type checks on data since it's very flexibly typed. + + # FIXME: type of data can be overwritten later on (if value) + + if node and path: + data = set() + elif node ^ path: + data = defaultdict(set) + else: + data = defaultdict(lambda: defaultdict(set)) + + for subj, pred, val in triples: + unique = pred in unique_paths + if node and path: + if value and unique and one_node and one_path: + return val + data.add(val) + elif node: + # remove node from result, group by predicate + if value and unique and one_node: + data[pred] = val + else: + data[pred].add(val) + elif path: + # remove predicate from result, group by node + if value and unique and one_path: + data[subj] = val + else: + data[subj].add(val) + else: + if value and unique: + data[subj][pred] = val + else: + data[subj][pred].add(val) + + # FIXME: Combine multiple Nodes instances into one? + + # convert defaultdict to ordinary dict + if node and path: + return data + if node ^ path: + return dict(data) + return {key: dict(val) for key, val in data.items()} + +## EOF ## -- cgit v1.2.3 From 9310610a7edf4dcbb934aedcecff1d11348197bb Mon Sep 17 00:00:00 2001 From: Matthias Baumgartner Date: Sat, 21 Jan 2023 22:32:33 +0100 Subject: nodes predicate walk sugar --- bsfs/graph/nodes.py | 15 ++++++- bsfs/graph/walk.py | 120 ++++++++++++++++++++++++++++++++++++++++++++++++++ bsfs/schema/schema.py | 4 ++ 3 files changed, 138 insertions(+), 1 deletion(-) create mode 100644 bsfs/graph/walk.py (limited to 'bsfs') diff --git a/bsfs/graph/nodes.py b/bsfs/graph/nodes.py index a4ba45f..18ab30d 100644 --- a/bsfs/graph/nodes.py +++ b/bsfs/graph/nodes.py @@ -19,6 +19,7 @@ from bsfs.utils import errors, URI, typename # inner-module imports from . import ac from . import result +from . import walk # exports __all__: typing.Sequence[str] = ( @@ -87,6 +88,18 @@ class Nodes(): """Return all node guids.""" return iter(self._guids) + @property + def schema(self) -> bsc.Schema: + """Return the store's local schema.""" + return self._backend.schema + + def __getattr__(self, name: str): + try: + return super().__getattr__(name) # type: ignore [misc] # parent has no getattr + except AttributeError: + pass + return walk.Walk(self, walk.Walk.step(self.schema, self.node_type, name)) + def set( self, pred: URI, # FIXME: URI or bsc.Predicate? @@ -141,7 +154,7 @@ class Nodes(): if view not in (dict, list): raise ValueError(f'expected dict or list, found {view}') # process paths: create fetch ast, build name mapping, and find unique paths - schema = self._backend.schema + schema = self.schema statements = set() name2path = {} unique_paths = set() # paths that result in a single (unique) value diff --git a/bsfs/graph/walk.py b/bsfs/graph/walk.py new file mode 100644 index 0000000..63ef5e9 --- /dev/null +++ b/bsfs/graph/walk.py @@ -0,0 +1,120 @@ +""" + +Part of the BlackStar filesystem (bsfs) module. +A copy of the license is provided with the project. +Author: Matthias Baumgartner, 2022 +""" +# imports +from collections import abc +import typing + +# bsfs imports +from bsfs import schema as bsc + +# inner-module imports +# NOTE: circular import! OK as long as only used for type annotations. +from . import nodes # pylint: disable=cyclic-import + +# exports +__all__: typing.Sequence[str] = ( + 'Walk', + ) + + +## code ## + +class Walk(abc.Hashable, abc.Callable): # type: ignore [misc] # invalid base class (Callable) + """Syntactic sugar for `Nodes` to build and act on predicate paths via members.""" + + # Link to Nodes instance. + _root: 'nodes.Nodes' + + # Current predicate path. + _path: typing.Tuple[bsc.Predicate, ...] + + def __init__( + self, + root: 'nodes.Nodes', + path: typing.Sequence[bsc.Predicate], + ): + self._root = root + self._path = tuple(path) + + @property + def tail(self): + """Return the node type at the end of the path.""" + return self._path[-1].range + + + ## comparison + + def __hash__(self) -> int: + """Return an integer hash that identifies the instance.""" + return hash((type(self), self._root, self._path)) + + def __eq__(self, other) -> bool: + """Compare against *other* backend.""" + return isinstance(other, type(self)) \ + and self._root == other._root \ + and self._path == other._path + + + ## representation + + def __repr__(self) -> str: + """Return a formal string representation.""" + path = ', '.join(pred.uri for pred in self._path) + return f'Walk({self._root.node_type.uri}, ({path}))' + + def __str__(self) -> str: + """Return an informal string representation.""" + path = ', '.join(pred.uri for pred in self._path) + return f'Walk(@{self._root.node_type.uri}: {path})' + + + ## walk + + @staticmethod + def step( + schema: bsc.Schema, + node: bsc.Node, + name: str, + ) -> typing.Tuple[bsc.Predicate]: + """Get an predicate at *node* whose fragment matches *name*.""" + predicates = tuple( + pred + for pred + in schema.predicates_at(node) + if pred.uri.get('fragment', None) == name + ) + if len(predicates) == 0: # no fragment found for name + raise ValueError(f'no available predicate matches {name}') + if len(predicates) > 1: # ambiguous name + raise ValueError(f'{name} matches multiple predicates') + # append predicate to walk + return predicates # type: ignore [return-value] # size is one + + def __getattr__(self, name: str) -> 'Walk': + """Alias for `Walk.step(name)`.""" + try: + return super().__getattr__(name) + except AttributeError: + pass + # get predicate + pred = self.step(self._root.schema, self.tail, name) + # append predicate to walk + return Walk(self._root, self._path + pred) + + + ## get paths ## + + def get(self, **kwargs) -> typing.Any: + """Alias for `Nodes.get(..)`.""" + return self._root.get(tuple(pred.uri for pred in self._path), **kwargs) + + def __call__(self, **kwargs) -> typing.Any: # pylint: disable=arguments-differ + """Alias for `Walk.get(...)`.""" + return self.get(**kwargs) + + +## EOF ## diff --git a/bsfs/schema/schema.py b/bsfs/schema/schema.py index 8d9a821..1644926 100644 --- a/bsfs/schema/schema.py +++ b/bsfs/schema/schema.py @@ -312,4 +312,8 @@ class Schema(): """Return the Literal matching the *uri*.""" return self._literals[uri] + def predicates_at(self, node: types.Node) -> typing.Iterator[types.Predicate]: + """Return predicates that have domain *node* (or superclass thereof).""" + return iter(pred for pred in self._predicates.values() if node <= pred.domain) + ## EOF ## -- cgit v1.2.3 From 04bb201c6162e81dbdefcb1cff9595180fa66917 Mon Sep 17 00:00:00 2001 From: Matthias Baumgartner Date: Sat, 21 Jan 2023 22:34:49 +0100 Subject: minor notes --- bsfs/graph/result.py | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'bsfs') diff --git a/bsfs/graph/result.py b/bsfs/graph/result.py index 3009801..688929b 100644 --- a/bsfs/graph/result.py +++ b/bsfs/graph/result.py @@ -20,6 +20,11 @@ __all__: typing.Sequence[str] = ( ## code ## +# FIXME: node, path, value seem counter-intuitive: +# node.get(..., node=True) removes the node part. +# wouldn't it make more sense if node=True keeps the node part +# and node=False drops it? + def to_list_view( triples, # aggregators -- cgit v1.2.3 From 1392951dfc82af05e7a5999baa1c0a4fc72083b8 Mon Sep 17 00:00:00 2001 From: Matthias Baumgartner Date: Sat, 21 Jan 2023 23:03:50 +0100 Subject: Nodes magic methods for convenience --- bsfs/graph/nodes.py | 51 +++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 51 insertions(+) (limited to 'bsfs') diff --git a/bsfs/graph/nodes.py b/bsfs/graph/nodes.py index 18ab30d..85e5fdb 100644 --- a/bsfs/graph/nodes.py +++ b/bsfs/graph/nodes.py @@ -93,6 +93,57 @@ class Nodes(): """Return the store's local schema.""" return self._backend.schema + def __add__(self, other: typing.Any) -> 'Nodes': + """Concatenate guids. Backend, user, and node type must match.""" + if not isinstance(other, type(self)): + return NotImplemented + if self._backend != other._backend: + raise ValueError(other) + if self._user != other._user: + raise ValueError(other) + if self.node_type != other.node_type: + raise ValueError(other) + return Nodes(self._backend, self._user, self.node_type, self._guids | other._guids) + + def __or__(self, other: typing.Any) -> 'Nodes': + """Concatenate guids. Backend, user, and node type must match.""" + return self.__add__(other) + + def __sub__(self, other: typing.Any) -> 'Nodes': + """Subtract guids. Backend, user, and node type must match.""" + if not isinstance(other, type(self)): + return NotImplemented + if self._backend != other._backend: + raise ValueError(other) + if self._user != other._user: + raise ValueError(other) + if self.node_type != other.node_type: + raise ValueError(other) + return Nodes(self._backend, self._user, self.node_type, self._guids - other._guids) + + def __and__(self, other: typing.Any) -> 'Nodes': + """Intersect guids. Backend, user, and node type must match.""" + if not isinstance(other, type(self)): + return NotImplemented + if self._backend != other._backend: + raise ValueError(other) + if self._user != other._user: + raise ValueError(other) + if self.node_type != other.node_type: + raise ValueError(other) + return Nodes(self._backend, self._user, self.node_type, self._guids & other._guids) + + def __len__(self) -> int: + """Return the number of guids.""" + return len(self._guids) + + def __iter__(self) -> typing.Iterator['Nodes']: + """Iterate over individual guids. Returns `Nodes` instances.""" + return iter( + Nodes(self._backend, self._user, self.node_type, {guid}) + for guid in self._guids + ) + def __getattr__(self, name: str): try: return super().__getattr__(name) # type: ignore [misc] # parent has no getattr -- cgit v1.2.3 From 72e0bd78dc9cc1d74c3061b028040b64c0efcf9f Mon Sep 17 00:00:00 2001 From: Matthias Baumgartner Date: Mon, 30 Jan 2023 09:52:18 +0100 Subject: flip graph fethc result flags --- bsfs/graph/nodes.py | 6 +++--- bsfs/graph/result.py | 29 +++++++++++++++++------------ 2 files changed, 20 insertions(+), 15 deletions(-) (limited to 'bsfs') diff --git a/bsfs/graph/nodes.py b/bsfs/graph/nodes.py index 85e5fdb..9990714 100644 --- a/bsfs/graph/nodes.py +++ b/bsfs/graph/nodes.py @@ -277,9 +277,9 @@ class Nodes(): yield node, path, value # simplify by default - view_kwargs['node'] = view_kwargs.get('node', len(self._guids) == 1) - view_kwargs['path'] = view_kwargs.get('path', len(paths) == 1) - view_kwargs['value'] = view_kwargs.get('value', True) + view_kwargs['node'] = view_kwargs.get('node', len(self._guids) != 1) + view_kwargs['path'] = view_kwargs.get('path', len(paths) != 1) + view_kwargs['value'] = view_kwargs.get('value', False) # return results view if view == list: diff --git a/bsfs/graph/result.py b/bsfs/graph/result.py index 688929b..00607f4 100644 --- a/bsfs/graph/result.py +++ b/bsfs/graph/result.py @@ -38,11 +38,11 @@ def to_list_view( the respective component is omitted. """ - if node and path: + if not node and not path: return iter(val for _, _, val in triples) - if node: + if not node: return iter((pred, val) for _, pred, val in triples) - if path: + if not path: return iter((subj, val) for subj, _, val in triples) return iter((subj, pred, val) for subj, pred, val in triples) @@ -57,6 +57,7 @@ def to_dict_view( node: bool, path: bool, value: bool, + default: typing.Optional[typing.Any] = None, ) -> typing.Any: """Return a dict of results. @@ -74,7 +75,7 @@ def to_dict_view( # FIXME: type of data can be overwritten later on (if value) - if node and path: + if not node and not path: data = set() elif node ^ path: data = defaultdict(set) @@ -83,24 +84,24 @@ def to_dict_view( for subj, pred, val in triples: unique = pred in unique_paths - if node and path: - if value and unique and one_node and one_path: + if not node and not path: + if not value and unique and one_node and one_path: return val data.add(val) - elif node: + elif not node: # remove node from result, group by predicate - if value and unique and one_node: + if not value and unique and one_node: data[pred] = val else: data[pred].add(val) - elif path: + elif not path: # remove predicate from result, group by node - if value and unique and one_path: + if not value and unique and one_path: data[subj] = val else: data[subj].add(val) else: - if value and unique: + if not value and unique: data[subj][pred] = val else: data[subj][pred].add(val) @@ -108,7 +109,11 @@ def to_dict_view( # FIXME: Combine multiple Nodes instances into one? # convert defaultdict to ordinary dict - if node and path: + if not node and not path and not value \ + and len(unique_paths) > 0 and one_node and one_path \ + and len(data) == 0: + return default + if not node and not path: return data if node ^ path: return dict(data) -- cgit v1.2.3 From 7e0987bcda136a17baea45b8eb22eb5ea668abc0 Mon Sep 17 00:00:00 2001 From: Matthias Baumgartner Date: Mon, 30 Jan 2023 14:35:32 +0100 Subject: filter ast comparison --- bsfs/query/matcher.py | 366 ++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 366 insertions(+) create mode 100644 bsfs/query/matcher.py (limited to 'bsfs') diff --git a/bsfs/query/matcher.py b/bsfs/query/matcher.py new file mode 100644 index 0000000..a910756 --- /dev/null +++ b/bsfs/query/matcher.py @@ -0,0 +1,366 @@ +""" + +Part of the BlackStar filesystem (bsfs) module. +A copy of the license is provided with the project. +Author: Matthias Baumgartner, 2022 +""" +# imports +from collections import defaultdict +from itertools import product +from time import time +import random +import threading +import typing + +# external imports +from hopcroftkarp import HopcroftKarp + +# bsfs imports +from bsfs.utils import errors, typename + +# inner-module imports +from . import ast + +# exports +__all__ : typing.Sequence[str] = ( + 'Filter', + ) + + +## code ## + +class Any(ast.filter.FilterExpression, ast.filter.PredicateExpression): + """Match any ast class. + + Note that Any instances are unique, i.e. they do not compare, and + can hence be repeated in a set: + >>> Any() == Any() + False + >>> len({Any(), Any(), Any(), Any()}) + 4 + + """ + + # unique instance id + _uid: typing.Tuple[int, int, float, float] + + def __init__(self): + self._uid = ( + id(self), + id(threading.current_thread()), + time(), + random.random(), + ) + + def __eq__(self, other: typing.Any): + return super().__eq__(other) and self._uid == other._uid + + def __hash__(self): + return hash((super().__hash__(), self._uid)) + + +class Rest(ast.filter.FilterExpression, ast.filter.PredicateExpression): + """Match the leftovers in a set of items to be compared. + + Rest can be used in junction with aggregating expressions such as ast.filter.And, + ast.filter.Or, ast.filter.OneOf. It controls childs expressions that were not yet + consumed by other matching rules. Rest may match to only a specific expression. + The expresssion defaults to Any(). + + For example, the following to ast structures would match since Rest + allows an arbitrary repetition of ast.filter.Equals statements. + + >>> And(Equals('hello'), Equals('world'), Equals('foobar')) + >>> And(Equals('world'), Rest(Partial(Equals))) + + """ + + # child expression for the Rest. + expr: typing.Union[ast.filter.FilterExpression, ast.filter.PredicateExpression] + + def __init__( + self, + expr: typing.Optional[typing.Union[ast.filter.FilterExpression, ast.filter.PredicateExpression]] = None, + ): + if expr is None: + expr = Any() + self.expr = expr + + def __repr__(self) -> str: + return f'{typename(self)}({self.expr})' + + def __hash__(self) -> int: + return hash((super().__hash__(), self.expr)) + + def __eq__(self, other: typing.Any) -> bool: + return super().__eq__(other) and self.expr == other.expr + + +class Partial(ast.filter.FilterExpression, ast.filter.PredicateExpression): + """Match a partially defined ast expression. + + Literal values might be irrelevant or unknown when comparing two ast + structures. Partial allows to constrain the matcher to a certain + ast class, while leaving some of its members unspecified. + + Pass the class (not instance) and its members as keyword arguments + to Partial. Note that the arguments are not validated. + + For example, the following instance matches any ast.filter.Equals, + irrespective of its value: + + >>> Partial(ast.filter.Equals) + + Likewise, the following instance matches any ast.filter.LessThan + that has a strict bounds, but makes no claim about the threshold: + + >>> Partial(ast.filter.LessThan, strict=False) + + """ + + # target node type. + node: typing.Type + + # node construction args. + kwargs: typing.Dict[str, typing.Any] + + def __init__( + self, + node: typing.Type, + **kwargs, + ): + self.node = node + self.kwargs = kwargs + + def __repr__(self) -> str: + return f'{typename(self)}({self.node.__name__}, {self.kwargs})' + + def __hash__(self) -> int: + kwargs = tuple((key, self.kwargs[key]) for key in sorted(self.kwargs)) + return hash((super().__hash__(), self.node, kwargs)) + + def __eq__(self, other: typing.Any) -> bool: + return super().__eq__(other) \ + and self.node == other.node \ + and self.kwargs == other.kwargs + + def match( + self, + name: str, + value: typing.Any, + ) -> bool: + """Return True if *name* is unspecified or matches *value*.""" + return name not in self.kwargs or self.kwargs[name] == value + + +T_ITEM_TYPE = typing.TypeVar('T_ITEM_TYPE') # pylint: disable=invalid-name + +def _set_matcher( + query: typing.Collection[T_ITEM_TYPE], + reference: typing.Collection[T_ITEM_TYPE], + cmp: typing.Callable[[T_ITEM_TYPE, T_ITEM_TYPE], bool], + ) -> bool: + """Compare two sets of child expressions. + + This check has a best-case complexity of O(|N|**2) and worst-case + complexity of O(|N|**3), with N the number of child expressions. + """ + # get reference items + r_items = list(reference) + # deal with Rest + r_rest = {itm for itm in r_items if isinstance(itm, Rest)} + if len(r_rest) > 1: + raise errors.BackendError(f'there must be at most one Rest instance per set, found {len(r_rest)}') + if len(r_rest) == 1: + # replace Rest by filling the reference up with rest's expression + # NOTE: convert r_items to list so that items can be repeated + expr = next(iter(r_rest)).expr # type: ignore [attr-defined] + r_items = [itm for itm in r_items if not isinstance(itm, Rest)] + r_items += [expr for _ in range(len(query) - len(r_items))] # type: ignore [misc] + # sanity check: cannot match if the item sizes differ: + # either a reference item is unmatched (len(r_items) > len(query)) + # or a query item is unmatched (len(r_items) < len(query)) + if len(query) != len(r_items): + return False + + # To have a positive match between the query and the reference, + # each query expr has to match any reference expr. + # However, each reference expr can only be "consumed" once even + # if it matches multiple query exprs (e.g., the Any expression matches + # every query expr). + # This is a bipartide matching problem (Hall's marriage problem) + # and the Hopcroft-Karp-Karzanov algorithm finds a maximum + # matching. While there might be multiple maximum matchings, + # we only need to know whether (at least) one complete matching + # exists. The hopcroftkarp module provides this functionality. + # The HKK algorithm has worst-case complexity of O(|N|**2 * sqrt(|N|)) + # and we also need to compare expressions pairwise, hence O(|N|**2). + num_items = len(r_items) + graph = defaultdict(set) + # build the bipartide graph as {lhs: {rhs}, ...} + # lhs and rhs must be disjoint identifiers. + for (ridx, ref), (nidx, node) in product(enumerate(r_items), enumerate(query)): + # add edges for equal expressions + if cmp(node, ref): + graph[ridx].add(num_items + nidx) + + # maximum_matching returns the matches for all nodes in the graph + # ({ref_itm: node_itm}), hence a complete matching's size is + # the number of reference's child expressions. + return len(HopcroftKarp(graph).maximum_matching(keys_only=True)) == num_items + + +class Filter(): + """Compare a bsfs.query.ast.filter` query's structure to a reference ast. + + The reference ast may include `Rest`, `Partial`, or `Any` to account for irrelevant + or unknown ast pieces. + + This is only a structural comparison, not a semantic one. For example, the + two following queries are semantically identical, but structurally different, + and would therefore not match: + + >>> ast.filter.OneOf(ast.filter.Predicate(ns.bse.filename)) + >>> ast.filter.Predicate(ns.bse.filename) + + """ + + def __call__(self, query: ast.filter.FilterExpression, reference: ast.filter.FilterExpression) -> bool: + """Compare a *query* to a *reference* ast structure. + Return True if both are structurally equivalent. + """ + if not isinstance(query, ast.filter.FilterExpression): + raise errors.BackendError(f'expected filter expression, found {query}') + if not isinstance(reference, ast.filter.FilterExpression): + raise errors.BackendError(f'expected filter expression, found {reference}') + return self._parse_filter_expression(query, reference) + + def _parse_filter_expression( + self, + node: ast.filter.FilterExpression, + reference: ast.filter.FilterExpression, + ) -> bool: + """Route *node* to the handler of the respective FilterExpression subclass.""" + # generic checks: reference type must be Any or match node type + if isinstance(reference, Any): + return True + # node-specific checks + if isinstance(node, ast.filter.Not): + return self._not(node, reference) + if isinstance(node, ast.filter.Has): + return self._has(node, reference) + if isinstance(node, ast.filter.Distance): + return self._distance(node, reference) + if isinstance(node, (ast.filter.Any, ast.filter.All)): + return self._branch(node, reference) + if isinstance(node, (ast.filter.And, ast.filter.Or)): + return self._agg(node, reference) + if isinstance(node, (ast.filter.Is, ast.filter.Equals, ast.filter.Substring, + ast.filter.StartsWith, ast.filter.EndsWith)): + return self._value(node, reference) + if isinstance(node, (ast.filter.LessThan, ast.filter.GreaterThan)): + return self._bounded(node, reference) + # invalid node + raise errors.BackendError(f'expected filter expression, found {node}') + + def _parse_predicate_expression( + self, + node: ast.filter.PredicateExpression, + reference: ast.filter.PredicateExpression, + ) -> bool: + """Route *node* to the handler of the respective PredicateExpression subclass.""" + if isinstance(reference, Any): + return True + if isinstance(node, ast.filter.Predicate): + return self._predicate(node, reference) + if isinstance(node, ast.filter.OneOf): + return self._one_of(node, reference) + # invalid node + raise errors.BackendError(f'expected predicate expression, found {node}') + + def _one_of(self, node: ast.filter.OneOf, reference: ast.filter.PredicateExpression) -> bool: + if not isinstance(reference, type(node)): + return False + return _set_matcher(node, reference, self._parse_predicate_expression) + + def _predicate(self, node: ast.filter.Predicate, reference: ast.filter.PredicateExpression) -> bool: + if not isinstance(reference, (Partial, type(node))): + return False + # partial check + if isinstance(reference, Partial): + if not isinstance(node, reference.node): + return False + return reference.match('predicate', node.predicate) \ + and reference.match('reverse', node.reverse) + # full check + return node.predicate == reference.predicate \ + and node.reverse == reference.reverse + + def _branch(self, + node: typing.Union[ast.filter.Any, ast.filter.All], + reference: ast.filter.FilterExpression, + ) -> bool: + if not isinstance(reference, type(node)): + return False + if not self._parse_predicate_expression(node.predicate, reference.predicate): # type: ignore [attr-defined] + return False + if not self._parse_filter_expression(node.expr, reference.expr): # type: ignore [attr-defined] + return False + return True + + def _agg(self, node: typing.Union[ast.filter.And, ast.filter.Or], reference: ast.filter.FilterExpression) -> bool: + if not isinstance(reference, type(node)): + return False + return _set_matcher(node, reference, self._parse_filter_expression) # type: ignore [arg-type] + + def _not(self, node: ast.filter.Not, reference: ast.filter.FilterExpression) -> bool: + if not isinstance(reference, type(node)): + return False + return self._parse_filter_expression(node.expr, reference.expr) + + def _has(self, node: ast.filter.Has, reference: ast.filter.FilterExpression) -> bool: + if not isinstance(reference, type(node)): + return False + return self._parse_predicate_expression(node.predicate, reference.predicate) \ + and self._parse_filter_expression(node.count, reference.count) + + def _distance(self, node: ast.filter.Distance, reference: ast.filter.FilterExpression) -> bool: + if not isinstance(reference, (Partial, type(node))): + return False + # partial check + if isinstance(reference, Partial): + if not isinstance(node, reference.node): + return False + return reference.match('reference', node.reference) \ + and reference.match('threshold', node.threshold) \ + and reference.match('strict', node.strict) + # full check + return node.reference == reference.reference \ + and node.threshold == reference.threshold \ + and node.strict == reference.strict + + def _value(self, node: ast.filter._Value, reference: ast.filter.FilterExpression) -> bool: + if not isinstance(reference, (Partial, type(node))): + return False + # partial check + if isinstance(reference, Partial): + if not isinstance(node, reference.node): + return False + return reference.match('value', node.value) + # full ckeck + return node.value == reference.value + + def _bounded(self, node: ast.filter._Bounded, reference: ast.filter.FilterExpression) -> bool: + if not isinstance(reference, (Partial, type(node))): + return False + # partial check + if isinstance(reference, Partial): + if not isinstance(node, reference.node): + return False + return reference.match('threshold', node.threshold) \ + and reference.match('strict', node.strict) + # full check + return node.threshold == reference.threshold \ + and node.strict == reference.strict + +## EOF ## -- cgit v1.2.3 From 1365e21ab9f13597d7fdb4feb0825453f32cae4b Mon Sep 17 00:00:00 2001 From: Matthias Baumgartner Date: Wed, 8 Feb 2023 19:51:54 +0100 Subject: ucid from buffer and bytes --- bsfs/utils/uuid.py | 12 ++++++++++++ 1 file changed, 12 insertions(+) (limited to 'bsfs') diff --git a/bsfs/utils/uuid.py b/bsfs/utils/uuid.py index ba5cf52..70e1656 100644 --- a/bsfs/utils/uuid.py +++ b/bsfs/utils/uuid.py @@ -7,6 +7,7 @@ Author: Matthias Baumgartner, 2022 # imports from collections import abc import hashlib +import io import json import os import platform @@ -106,6 +107,17 @@ class UCID(): with open(path, 'rb') as ifile: return HASH(ifile.read()).hexdigest() + @staticmethod + def from_buffer(buffer: io.IOBase) -> str: + """Read the content from a buffer.""" + if isinstance(buffer, io.TextIOBase): + return HASH(buffer.read().encode('utf-8', errors='ignore')).hexdigest() + return HASH(buffer.read()).hexdigest() + + @staticmethod + def from_bytes(content: bytes) -> str: + """Get the content from as bytes.""" + return HASH(content).hexdigest() @staticmethod def from_dict(content: dict) -> str: -- cgit v1.2.3 From c8fdaaa676afbdcf33344d72bd92b3ccb981cbf8 Mon Sep 17 00:00:00 2001 From: Matthias Baumgartner Date: Wed, 8 Feb 2023 19:54:24 +0100 Subject: ast fixes --- bsfs/query/ast/fetch.py | 3 +-- bsfs/query/ast/filter_.py | 4 ++-- 2 files changed, 3 insertions(+), 4 deletions(-) (limited to 'bsfs') diff --git a/bsfs/query/ast/fetch.py b/bsfs/query/ast/fetch.py index 5e603a1..d653a8a 100644 --- a/bsfs/query/ast/fetch.py +++ b/bsfs/query/ast/fetch.py @@ -69,8 +69,7 @@ class All(FetchExpression): return f'{typename(self)}({self.expr})' def __hash__(self) -> int: - # FIXME: Produces different hashes for different orders of self.expr - return hash((super().__hash__(), tuple(self.expr))) + return hash((super().__hash__(), tuple(sorted(self.expr, key=repr)))) def __eq__(self, other: typing.Any) -> bool: return super().__eq__(other) and self.expr == other.expr diff --git a/bsfs/query/ast/filter_.py b/bsfs/query/ast/filter_.py index 81b0de2..798d37f 100644 --- a/bsfs/query/ast/filter_.py +++ b/bsfs/query/ast/filter_.py @@ -173,7 +173,7 @@ class _Agg(FilterExpression, abc.Collection): return f'{typename(self)}({self.expr})' def __hash__(self) -> int: - return hash((super().__hash__(), tuple(self.expr))) # FIXME: Unique hash of different orders over self.expr + return hash((super().__hash__(), tuple(sorted(self.expr, key=repr)))) def __eq__(self, other) -> bool: return super().__eq__(other) and self.expr == other.expr @@ -450,7 +450,7 @@ class OneOf(PredicateExpression, abc.Collection): return f'{typename(self)}({self.expr})' def __hash__(self) -> int: - return hash((super().__hash__(), tuple(self.expr))) # FIXME: Unique hash of different orders over self.expr + return hash((super().__hash__(), tuple(sorted(self.expr, key=repr)))) def __eq__(self, other) -> bool: return super().__eq__(other) and self.expr == other.expr -- cgit v1.2.3 From cb819b8c268908b5f6cc680173db86e172847c46 Mon Sep 17 00:00:00 2001 From: Matthias Baumgartner Date: Wed, 8 Feb 2023 20:15:41 +0100 Subject: binary blob in schema and sparql triple store --- bsfs/schema/schema.py | 1 + bsfs/schema/types.py | 5 +++++ bsfs/triple_store/sparql/sparql.py | 13 +++++++++++-- 3 files changed, 17 insertions(+), 2 deletions(-) (limited to 'bsfs') diff --git a/bsfs/schema/schema.py b/bsfs/schema/schema.py index 1644926..0de4203 100644 --- a/bsfs/schema/schema.py +++ b/bsfs/schema/schema.py @@ -69,6 +69,7 @@ class Schema(): literals.add(types.ROOT_LITERAL) predicates.add(types.ROOT_PREDICATE) # add minimally necessary types to the schema + literals.add(types.ROOT_BLOB) literals.add(types.ROOT_NUMBER) literals.add(types.ROOT_TIME) literals.add(types.ROOT_ARRAY) diff --git a/bsfs/schema/types.py b/bsfs/schema/types.py index 3a2e10c..12e7e94 100644 --- a/bsfs/schema/types.py +++ b/bsfs/schema/types.py @@ -380,6 +380,11 @@ ROOT_LITERAL = Literal( parent=None, ) +ROOT_BLOB = Literal( + uri=ns.bsfs.BinaryBlob, + parent=ROOT_LITERAL, + ) + ROOT_NUMBER = Literal( uri=ns.bsfs.Number, parent=ROOT_LITERAL, diff --git a/bsfs/triple_store/sparql/sparql.py b/bsfs/triple_store/sparql/sparql.py index a0dd12e..dbf9d45 100644 --- a/bsfs/triple_store/sparql/sparql.py +++ b/bsfs/triple_store/sparql/sparql.py @@ -5,8 +5,11 @@ A copy of the license is provided with the project. Author: Matthias Baumgartner, 2022 """ # imports +import base64 import itertools import typing + +# external imports import rdflib # bsfs imports @@ -30,6 +33,8 @@ __all__: typing.Sequence[str] = ( ## code ## +rdflib.term.bind(ns.bsfs.BinaryBlob, bytes, constructor=base64.b64decode) + class _Transaction(): """Lightweight rdflib transactions for in-memory databases.""" @@ -242,7 +247,7 @@ class SparqlStore(base.TripleStoreBase): ) -> typing.Iterator[URI]: if node_type not in self.schema.nodes(): raise errors.ConsistencyError(f'{node_type} is not defined in the schema') - if not isinstance(filter, ast.filter.FilterExpression): + if filter is not None and not isinstance(filter, ast.filter.FilterExpression): raise TypeError(filter) # compose query query = self._filter_parser(node_type, filter) @@ -334,7 +339,11 @@ class SparqlStore(base.TripleStoreBase): guid = rdflib.URIRef(guid) # convert value if isinstance(predicate.range, bsc.Literal): - value = rdflib.Literal(value, datatype=rdflib.URIRef(predicate.range.uri)) + dtype = rdflib.URIRef(predicate.range.uri) + if predicate.range <= self.schema.literal(ns.bsfs.BinaryBlob): + dtype = rdflib.URIRef(ns.bsfs.BinaryBlob) + value = base64.b64encode(value) + value = rdflib.Literal(value, datatype=dtype) elif isinstance(predicate.range, bsc.Node): value = rdflib.URIRef(value) else: -- cgit v1.2.3 From 64f3ac76a2f8d6b51380c06233accfcc19dca228 Mon Sep 17 00:00:00 2001 From: Matthias Baumgartner Date: Wed, 8 Feb 2023 20:47:18 +0100 Subject: filter query convenience functions --- bsfs/query/ast/filter_.py | 58 +++++++++++++++++++++++++++++++++++++++++++---- 1 file changed, 53 insertions(+), 5 deletions(-) (limited to 'bsfs') diff --git a/bsfs/query/ast/filter_.py b/bsfs/query/ast/filter_.py index 798d37f..44490fc 100644 --- a/bsfs/query/ast/filter_.py +++ b/bsfs/query/ast/filter_.py @@ -31,10 +31,7 @@ from collections import abc import typing # bsfs imports -from bsfs.utils import URI, typename, normalize_args - -# inner-module imports -#from . import utils +from bsfs.utils import URI, errors, typename, normalize_args # exports __all__ : typing.Sequence[str] = ( @@ -460,10 +457,61 @@ class OneOf(PredicateExpression, abc.Collection): def IsIn(*values): # pylint: disable=invalid-name # explicitly mimics an expression """Match any of the given URIs.""" - return Or(Is(value) for value in normalize_args(*values)) + args = normalize_args(*values) + if len(args) == 0: + raise AttributeError('expected at least one value, found none') + if len(args) == 1: + return Is(args[0]) + return Or(Is(value) for value in args) def IsNotIn(*values): # pylint: disable=invalid-name # explicitly mimics an expression """Match none of the given URIs.""" return Not(IsIn(*values)) + +def Between( + lo: float = float('-inf'), + hi: float = float('inf'), + lo_strict: bool = True, + hi_strict: bool = True, + ): + """Match numerical values between *lo* and *hi*. Include bounds if strict is False.""" + if abs(lo) == hi == float('inf'): + raise ValueError('range cannot be INF on both sides') + if lo > hi: + raise ValueError(f'lower bound ({lo}) cannot be less than upper bound ({hi})') + if lo == hi and not lo_strict and not hi_strict: + return Equals(lo) + if lo == hi: # either bound is strict + raise ValueError(f'bounds cannot be equal when either is strict') + if lo != float('-inf') and hi != float('inf'): + return And(GreaterThan(lo, lo_strict), LessThan(hi, hi_strict)) + if lo != float('-inf'): + return GreaterThan(lo, lo_strict) + # hi != float('inf'): + return LessThan(hi, hi_strict) + + +def Includes(*values, approx: bool = False): + """Match any of the given *values*. Uses `Substring` if *approx* is set.""" + args = normalize_args(*values) + cls = Substring if approx else Equals + if len(args) == 0: + raise AttributeError('expected at least one value, found none') + if len(args) == 1: + return cls(args[0]) + return Or(cls(v) for v in args) + + +def Excludes(*values, approx: bool = False): + """Match none of the given *values*. Uses `Substring` if *approx* is set.""" + args = normalize_args(*values) + cls = Substring if approx else Equals + if len(args) == 0: + raise AttributeError('expected at least one value, found none') + if len(args) == 1: + return Not(cls(args[0])) + return Not(Or(cls(v) for v in args)) + + ## EOF ## -- cgit v1.2.3 From f31a0d005785d474a37ec769c1f7f5e27aa08a57 Mon Sep 17 00:00:00 2001 From: Matthias Baumgartner Date: Wed, 8 Feb 2023 21:08:24 +0100 Subject: minor comments --- bsfs/graph/nodes.py | 2 ++ bsfs/graph/resolve.py | 1 + bsfs/graph/result.py | 2 ++ bsfs/graph/walk.py | 4 ++-- bsfs/query/ast/filter_.py | 17 +++++++++-------- bsfs/query/validator.py | 1 + 6 files changed, 17 insertions(+), 10 deletions(-) (limited to 'bsfs') diff --git a/bsfs/graph/nodes.py b/bsfs/graph/nodes.py index 9990714..bc71a32 100644 --- a/bsfs/graph/nodes.py +++ b/bsfs/graph/nodes.py @@ -199,6 +199,7 @@ class Nodes(): """Get values or nodes at *paths*. Return an iterator (view=list) or a dict (view=dict) over the results. """ + # FIXME: user-provided Fetch query AST? # check args if len(paths) == 0: raise AttributeError('expected at least one path, found none') @@ -345,6 +346,7 @@ class Nodes(): elif isinstance(pred.range, bsc.Node): # check value type + # FIXME: value could be a set of Nodes if not isinstance(value, Nodes): raise TypeError(value) # value's node_type must be a subclass of the predicate's range diff --git a/bsfs/graph/resolve.py b/bsfs/graph/resolve.py index 00b778b..4677401 100644 --- a/bsfs/graph/resolve.py +++ b/bsfs/graph/resolve.py @@ -41,6 +41,7 @@ class Filter(): self.schema = schema def __call__(self, root_type: bsc.Node, node: ast.filter.FilterExpression): + # FIXME: node can be None! return self._parse_filter_expression(root_type, node) def _parse_filter_expression( diff --git a/bsfs/graph/result.py b/bsfs/graph/result.py index 00607f4..31822f1 100644 --- a/bsfs/graph/result.py +++ b/bsfs/graph/result.py @@ -109,10 +109,12 @@ def to_dict_view( # FIXME: Combine multiple Nodes instances into one? # convert defaultdict to ordinary dict + # pylint: disable=too-many-boolean-expressions if not node and not path and not value \ and len(unique_paths) > 0 and one_node and one_path \ and len(data) == 0: return default + # pylint: enable=too-many-boolean-expressions if not node and not path: return data if node ^ path: diff --git a/bsfs/graph/walk.py b/bsfs/graph/walk.py index 63ef5e9..1b1cfa0 100644 --- a/bsfs/graph/walk.py +++ b/bsfs/graph/walk.py @@ -88,9 +88,9 @@ class Walk(abc.Hashable, abc.Callable): # type: ignore [misc] # invalid base cla if pred.uri.get('fragment', None) == name ) if len(predicates) == 0: # no fragment found for name - raise ValueError(f'no available predicate matches {name}') + raise ValueError(f'no available predicate matches {name}') # FIXME: Custom exception if len(predicates) > 1: # ambiguous name - raise ValueError(f'{name} matches multiple predicates') + raise ValueError(f'{name} matches multiple predicates') # FIXME: Custom exception # append predicate to walk return predicates # type: ignore [return-value] # size is one diff --git a/bsfs/query/ast/filter_.py b/bsfs/query/ast/filter_.py index 44490fc..b29d89e 100644 --- a/bsfs/query/ast/filter_.py +++ b/bsfs/query/ast/filter_.py @@ -31,7 +31,7 @@ from collections import abc import typing # bsfs imports -from bsfs.utils import URI, errors, typename, normalize_args +from bsfs.utils import URI, typename, normalize_args # exports __all__ : typing.Sequence[str] = ( @@ -454,8 +454,9 @@ class OneOf(PredicateExpression, abc.Collection): # Helpers +# invalid-name is disabled since they explicitly mimic an expression -def IsIn(*values): # pylint: disable=invalid-name # explicitly mimics an expression +def IsIn(*values) -> FilterExpression: # pylint: disable=invalid-name """Match any of the given URIs.""" args = normalize_args(*values) if len(args) == 0: @@ -464,17 +465,17 @@ def IsIn(*values): # pylint: disable=invalid-name # explicitly mimics an express return Is(args[0]) return Or(Is(value) for value in args) -def IsNotIn(*values): # pylint: disable=invalid-name # explicitly mimics an expression +def IsNotIn(*values) -> FilterExpression: # pylint: disable=invalid-name """Match none of the given URIs.""" return Not(IsIn(*values)) -def Between( +def Between( # pylint: disable=invalid-name lo: float = float('-inf'), hi: float = float('inf'), lo_strict: bool = True, hi_strict: bool = True, - ): + ) -> FilterExpression : """Match numerical values between *lo* and *hi*. Include bounds if strict is False.""" if abs(lo) == hi == float('inf'): raise ValueError('range cannot be INF on both sides') @@ -483,7 +484,7 @@ def Between( if lo == hi and not lo_strict and not hi_strict: return Equals(lo) if lo == hi: # either bound is strict - raise ValueError(f'bounds cannot be equal when either is strict') + raise ValueError('bounds cannot be equal when either is strict') if lo != float('-inf') and hi != float('inf'): return And(GreaterThan(lo, lo_strict), LessThan(hi, hi_strict)) if lo != float('-inf'): @@ -492,7 +493,7 @@ def Between( return LessThan(hi, hi_strict) -def Includes(*values, approx: bool = False): +def Includes(*values, approx: bool = False) -> FilterExpression: # pylint: disable=invalid-name """Match any of the given *values*. Uses `Substring` if *approx* is set.""" args = normalize_args(*values) cls = Substring if approx else Equals @@ -503,7 +504,7 @@ def Includes(*values, approx: bool = False): return Or(cls(v) for v in args) -def Excludes(*values, approx: bool = False): +def Excludes(*values, approx: bool = False) -> FilterExpression: # pylint: disable=invalid-name """Match none of the given *values*. Uses `Substring` if *approx* is set.""" args = normalize_args(*values) cls = Substring if approx else Equals diff --git a/bsfs/query/validator.py b/bsfs/query/validator.py index 9fbff12..f0aa795 100644 --- a/bsfs/query/validator.py +++ b/bsfs/query/validator.py @@ -20,6 +20,7 @@ __all__ : typing.Sequence[str] = ( 'Filter', ) +# FIXME: Split into a submodule and the two classes into their own respective files. ## code ## -- cgit v1.2.3 From c0218a8dffcdc3a7a5568f66bb959139fe514ad5 Mon Sep 17 00:00:00 2001 From: Matthias Baumgartner Date: Wed, 8 Feb 2023 21:14:36 +0100 Subject: Graph.all to retrieve all nodes --- bsfs/graph/graph.py | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'bsfs') diff --git a/bsfs/graph/graph.py b/bsfs/graph/graph.py index 2210755..df2e3a5 100644 --- a/bsfs/graph/graph.py +++ b/bsfs/graph/graph.py @@ -133,4 +133,12 @@ class Graph(): # return Nodes instance return _nodes.Nodes(self._backend, self._user, type_, guids) + def all(self, node_type: URI) -> _nodes.Nodes: + """Return all instances of type *node_type*.""" + # get node type + type_ = self.schema.node(node_type) + guids = self._backend.get(type_, None) # no need to materialize + return _nodes.Nodes(self._backend, self._user, type_, guids) + + ## EOF ## -- cgit v1.2.3 From f9eec185bf3d857c220e5d78de75ec6713437330 Mon Sep 17 00:00:00 2001 From: Matthias Baumgartner Date: Wed, 1 Mar 2023 12:39:42 +0100 Subject: Construct Graph and Nodes with AC instead of user --- bsfs/front/builder.py | 6 ++++-- bsfs/graph/ac/base.py | 16 +++++++++++++++- bsfs/graph/graph.py | 35 +++++++++++++++++++++++------------ bsfs/graph/nodes.py | 41 ++++++++++++++++++++--------------------- 4 files changed, 62 insertions(+), 36 deletions(-) (limited to 'bsfs') diff --git a/bsfs/front/builder.py b/bsfs/front/builder.py index 73f1703..ecdc768 100644 --- a/bsfs/front/builder.py +++ b/bsfs/front/builder.py @@ -8,7 +8,7 @@ Author: Matthias Baumgartner, 2022 import typing # bsfs imports -from bsfs.graph import Graph +from bsfs.graph import Graph, ac from bsfs.triple_store import TripleStoreBase, SparqlStore from bsfs.utils import URI, errors @@ -68,8 +68,10 @@ def build_graph(cfg: typing.Any) -> Graph: if 'backend' not in args: raise errors.ConfigError('required argument "backend" is not provided') backend = build_backend(args['backend']) + # build access controls + access_controls = ac.NullAC(backend, user) # build and return graph cls = _graph_classes[name] - return cls(backend, user) + return cls(backend, access_controls) ## EOF ## diff --git a/bsfs/graph/ac/base.py b/bsfs/graph/ac/base.py index 79b09e5..0b9f988 100644 --- a/bsfs/graph/ac/base.py +++ b/bsfs/graph/ac/base.py @@ -12,7 +12,7 @@ import typing from bsfs import schema from bsfs.query import ast from bsfs.triple_store import TripleStoreBase -from bsfs.utils import URI +from bsfs.utils import URI, typename # exports __all__: typing.Sequence[str] = ( @@ -44,6 +44,20 @@ class AccessControlBase(abc.ABC): self._backend = backend self._user = URI(user) + def __str__(self) -> str: + return f'{typename(self)}({self._user})' + + def __repr__(self) -> str: + return f'{typename(self)}({self._user})' + + def __eq__(self, other: typing.Any) -> bool: + return isinstance(other, type(self)) \ + and self._backend == other._backend \ + and self._user == other._user + + def __hash__(self) -> int: + return hash((type(self), self._backend, self._user)) + @abc.abstractmethod def is_protected_predicate(self, pred: schema.Predicate) -> bool: """Return True if a predicate cannot be modified manually.""" diff --git a/bsfs/graph/graph.py b/bsfs/graph/graph.py index df2e3a5..a74da01 100644 --- a/bsfs/graph/graph.py +++ b/bsfs/graph/graph.py @@ -40,31 +40,42 @@ class Graph(): # link to the triple storage backend. _backend: TripleStoreBase - # user uri. - _user: URI + # access controls. + _ac: ac.AccessControlBase - def __init__(self, backend: TripleStoreBase, user: URI): + # query resolver. + _resolver: resolve.Filter + + # query validator. + _validate: validate.Filter + + def __init__( + self, + backend: TripleStoreBase, + access_control: ac.AccessControlBase, + ): + # store members self._backend = backend - self._user = user + self._ac = access_control + # helper classes self._resolver = resolve.Filter(self._backend.schema) self._validate = validate.Filter(self._backend.schema) - self._ac = ac.NullAC(self._backend, self._user) # ensure Graph schema requirements self.migrate(self._backend.schema) def __hash__(self) -> int: - return hash((type(self), self._backend, self._user)) + return hash((type(self), self._backend, self._ac)) def __eq__(self, other) -> bool: return isinstance(other, type(self)) \ and self._backend == other._backend \ - and self._user == other._user + and self._ac == other._ac def __repr__(self) -> str: - return f'{typename(self)}(backend={repr(self._backend)}, user={self._user})' + return f'{typename(self)}({repr(self._backend)}, {self._ac})' def __str__(self) -> str: - return f'{typename(self)}({str(self._backend)}, {self._user})' + return f'{typename(self)}({str(self._backend)})' @property def schema(self) -> bsc.Schema: @@ -106,7 +117,7 @@ class Graph(): """ type_ = self.schema.node(node_type) # NOTE: Nodes constructor materializes guids. - return _nodes.Nodes(self._backend, self._user, type_, guids) + return _nodes.Nodes(self._backend, self._ac, type_, guids) def node(self, node_type: URI, guid: URI) -> _nodes.Nodes: """Return node *guid* of type *node_type* as a `bsfs.graph.Nodes` instance. @@ -131,14 +142,14 @@ class Graph(): # query the backend guids = self._backend.get(type_, query) # no need to materialize # return Nodes instance - return _nodes.Nodes(self._backend, self._user, type_, guids) + return _nodes.Nodes(self._backend, self._ac, type_, guids) def all(self, node_type: URI) -> _nodes.Nodes: """Return all instances of type *node_type*.""" # get node type type_ = self.schema.node(node_type) guids = self._backend.get(type_, None) # no need to materialize - return _nodes.Nodes(self._backend, self._user, type_, guids) + return _nodes.Nodes(self._backend, self._ac, type_, guids) ## EOF ## diff --git a/bsfs/graph/nodes.py b/bsfs/graph/nodes.py index bc71a32..91cbb5d 100644 --- a/bsfs/graph/nodes.py +++ b/bsfs/graph/nodes.py @@ -37,8 +37,8 @@ class Nodes(): # triple store backend. _backend: TripleStoreBase - # user uri. - _user: URI + # access controls. + _ac: ac.AccessControlBase # node type. _node_type: bsc.Node @@ -49,31 +49,30 @@ class Nodes(): def __init__( self, backend: TripleStoreBase, - user: URI, + access_control: ac.AccessControlBase, node_type: bsc.Node, guids: typing.Iterable[URI], ): # set main members self._backend = backend - self._user = user + self._ac = access_control self._node_type = node_type self._guids = set(guids) # create helper instances # FIXME: Assumes that the schema does not change while the instance is in use! - self._ac = ac.NullAC(self._backend, self._user) def __eq__(self, other: typing.Any) -> bool: return isinstance(other, Nodes) \ and self._backend == other._backend \ - and self._user == other._user \ + and self._ac == other._ac \ and self._node_type == other._node_type \ and self._guids == other._guids def __hash__(self) -> int: - return hash((type(self), self._backend, self._user, self._node_type, tuple(sorted(self._guids)))) + return hash((type(self), self._backend, self._ac, self._node_type, tuple(sorted(self._guids)))) def __repr__(self) -> str: - return f'{typename(self)}({self._backend}, {self._user}, {self._node_type}, {self._guids})' + return f'{typename(self)}({self._backend}, {self._ac}, {self._node_type}, {self._guids})' def __str__(self) -> str: return f'{typename(self)}({self._node_type}, {self._guids})' @@ -94,44 +93,44 @@ class Nodes(): return self._backend.schema def __add__(self, other: typing.Any) -> 'Nodes': - """Concatenate guids. Backend, user, and node type must match.""" + """Concatenate guids. Backend, AC, and node type must match.""" if not isinstance(other, type(self)): return NotImplemented if self._backend != other._backend: raise ValueError(other) - if self._user != other._user: + if self._ac != other._ac: raise ValueError(other) if self.node_type != other.node_type: raise ValueError(other) - return Nodes(self._backend, self._user, self.node_type, self._guids | other._guids) + return Nodes(self._backend, self._ac, self.node_type, self._guids | other._guids) def __or__(self, other: typing.Any) -> 'Nodes': - """Concatenate guids. Backend, user, and node type must match.""" + """Concatenate guids. Backend, AC, and node type must match.""" return self.__add__(other) def __sub__(self, other: typing.Any) -> 'Nodes': - """Subtract guids. Backend, user, and node type must match.""" + """Subtract guids. Backend, AC, and node type must match.""" if not isinstance(other, type(self)): return NotImplemented if self._backend != other._backend: raise ValueError(other) - if self._user != other._user: + if self._ac != other._ac: raise ValueError(other) if self.node_type != other.node_type: raise ValueError(other) - return Nodes(self._backend, self._user, self.node_type, self._guids - other._guids) + return Nodes(self._backend, self._ac, self.node_type, self._guids - other._guids) def __and__(self, other: typing.Any) -> 'Nodes': - """Intersect guids. Backend, user, and node type must match.""" + """Intersect guids. Backend, AC, and node type must match.""" if not isinstance(other, type(self)): return NotImplemented if self._backend != other._backend: raise ValueError(other) - if self._user != other._user: + if self._ac != other._ac: raise ValueError(other) if self.node_type != other.node_type: raise ValueError(other) - return Nodes(self._backend, self._user, self.node_type, self._guids & other._guids) + return Nodes(self._backend, self._ac, self.node_type, self._guids & other._guids) def __len__(self) -> int: """Return the number of guids.""" @@ -140,7 +139,7 @@ class Nodes(): def __iter__(self) -> typing.Iterator['Nodes']: """Iterate over individual guids. Returns `Nodes` instances.""" return iter( - Nodes(self._backend, self._user, self.node_type, {guid}) + Nodes(self._backend, self._ac, self.node_type, {guid}) for guid in self._guids ) @@ -266,12 +265,12 @@ class Nodes(): # process triples for root, name, raw in triples: # get node - node = Nodes(self._backend, self._user, self.node_type, {root}) + node = Nodes(self._backend, self._ac, self.node_type, {root}) # get path path, tail = name2path[name] # covert raw to value if isinstance(tail.range, bsc.Node): - value = Nodes(self._backend, self._user, tail.range, {raw}) + value = Nodes(self._backend, self._ac, tail.range, {raw}) else: value = raw # emit triple -- cgit v1.2.3 From d822df3dad0525f35dbacda9d5a66f4756f079ff Mon Sep 17 00:00:00 2001 From: Matthias Baumgartner Date: Wed, 1 Mar 2023 12:49:47 +0100 Subject: Integrate main app into package --- bsfs/apps/__init__.py | 38 ++++++++++++++++++++++++++++++++++++++ bsfs/apps/init.py | 6 ++++-- bsfs/apps/migrate.py | 1 + 3 files changed, 43 insertions(+), 2 deletions(-) (limited to 'bsfs') diff --git a/bsfs/apps/__init__.py b/bsfs/apps/__init__.py index 7efaa87..3dec9ad 100644 --- a/bsfs/apps/__init__.py +++ b/bsfs/apps/__init__.py @@ -1,3 +1,4 @@ +#!/usr/bin/env python3 """ Part of the BlackStar filesystem (bsfs) module. @@ -5,16 +6,53 @@ A copy of the license is provided with the project. Author: Matthias Baumgartner, 2022 """ # imports +import argparse import typing +# bsfs imports +import bsfs + # inner-module imports from .init import main as init from .migrate import main as migrate # exports __all__: typing.Sequence[str] = ( + 'main', 'init', 'migrate', ) +# config +apps = { + 'init' : init, + 'migrate' : migrate, + } + + +## code ## + +def main(argv=None): + """Black Star File System maintenance tools.""" + parser = argparse.ArgumentParser(description=main.__doc__, prog='bsfs') + # version + parser.add_argument('--version', action='version', + version='%(prog)s version {}.{}.{}'.format(*bsfs.version_info)) + # application selection + parser.add_argument('app', choices=apps.keys(), + help='Select the application to run.') + # dangling args + parser.add_argument('rest', nargs=argparse.REMAINDER) + # parse + args = parser.parse_args(argv) + # run application + apps[args.app](args.rest) + + +## main ## + +if __name__ == '__main__': + import sys + main(sys.argv[1:]) + ## EOF ## diff --git a/bsfs/apps/init.py b/bsfs/apps/init.py index 3e2ef37..ec48525 100644 --- a/bsfs/apps/init.py +++ b/bsfs/apps/init.py @@ -1,3 +1,4 @@ +#!/usr/bin/env python3 """ Part of the BlackStar filesystem (bsfs) module. @@ -60,9 +61,10 @@ def main(argv): # print config if args.output is not None: with open(args.output, mode='wt', encoding='UTF-8') as ofile: - json.dump(config, ofile) + json.dump(config, ofile, indent=4) else: - json.dump(config, sys.stdout) + json.dump(config, sys.stdout, indent=4) + print('') ## main ## diff --git a/bsfs/apps/migrate.py b/bsfs/apps/migrate.py index b9d019f..cb62542 100644 --- a/bsfs/apps/migrate.py +++ b/bsfs/apps/migrate.py @@ -1,3 +1,4 @@ +#!/usr/bin/env python3 """ Part of the BlackStar filesystem (bsfs) module. -- cgit v1.2.3 From a5695dcc4e1be8fccd0b35ba4672cdb3c2c119d7 Mon Sep 17 00:00:00 2001 From: Matthias Baumgartner Date: Wed, 1 Mar 2023 13:06:59 +0100 Subject: minor fixes --- bsfs/graph/graph.py | 4 +--- bsfs/graph/schema.nt | 4 ++-- 2 files changed, 3 insertions(+), 5 deletions(-) (limited to 'bsfs') diff --git a/bsfs/graph/graph.py b/bsfs/graph/graph.py index a74da01..a356533 100644 --- a/bsfs/graph/graph.py +++ b/bsfs/graph/graph.py @@ -28,9 +28,7 @@ __all__: typing.Sequence[str] = ( ## code ## class Graph(): - """The Graph class is - - The Graph class provides a convenient interface to query and access a graph. + """The Graph class provides a convenient interface to query and access a graph. Since it logically builds on the concept of graphs it is easier to navigate than raw triple stores. Naturally, it uses a triple store as *backend*. It also controls actions via access permissions to a *user*. diff --git a/bsfs/graph/schema.nt b/bsfs/graph/schema.nt index f619746..cba5e80 100644 --- a/bsfs/graph/schema.nt +++ b/bsfs/graph/schema.nt @@ -9,11 +9,11 @@ prefix bsm: # literals bsfs:Number rdfs:subClassOf bsfs:Literal . -xsd:integer rdfs:subClassOf bsfs:Number . +xsd:float rdfs:subClassOf bsfs:Number . # predicates bsm:t_created rdfs:subClassOf bsfs:Predicate ; rdfs:domain bsfs:Node ; - rdfs:range xsd:integer ; + rdfs:range xsd:float ; bsfs:unique "true"^^xsd:boolean . -- cgit v1.2.3 From 87f437380c1dd8f420437cddc028c0f3174ee1c9 Mon Sep 17 00:00:00 2001 From: Matthias Baumgartner Date: Thu, 2 Mar 2023 12:19:58 +0100 Subject: Node getters in bsfs.Graph: * Empty nodes instance (Graph.empty) * Order-preserving get query (Graph.sorted) * Collect common code in private Graph.__get * Empty query in Graph.get * Empty query in Graph.resolve.Filter * Empty query in AC: filter_read --- bsfs/graph/ac/base.py | 6 +++++- bsfs/graph/ac/null.py | 6 +++++- bsfs/graph/graph.py | 60 +++++++++++++++++++++++++++++++++++++-------------- bsfs/graph/resolve.py | 9 ++++++-- 4 files changed, 61 insertions(+), 20 deletions(-) (limited to 'bsfs') diff --git a/bsfs/graph/ac/base.py b/bsfs/graph/ac/base.py index 0b9f988..2759557 100644 --- a/bsfs/graph/ac/base.py +++ b/bsfs/graph/ac/base.py @@ -83,7 +83,11 @@ class AccessControlBase(abc.ABC): """Return nodes that are allowed to be created.""" @abc.abstractmethod - def filter_read(self, node_type: schema.Node, query: ast.filter.FilterExpression) -> ast.filter.FilterExpression: + def filter_read( + self, + node_type: schema.Node, + query: typing.Optional[ast.filter.FilterExpression], + ) -> typing.Optional[ast.filter.FilterExpression]: """Re-write a filter *query* to get (i.e., read) *node_type* nodes.""" @abc.abstractmethod diff --git a/bsfs/graph/ac/null.py b/bsfs/graph/ac/null.py index 6a923a5..e67b55d 100644 --- a/bsfs/graph/ac/null.py +++ b/bsfs/graph/ac/null.py @@ -50,7 +50,11 @@ class NullAC(base.AccessControlBase): """Return nodes that are allowed to be created.""" return guids - def filter_read(self, node_type: schema.Node, query: ast.filter.FilterExpression) -> ast.filter.FilterExpression: + def filter_read( + self, + node_type: schema.Node, + query: typing.Optional[ast.filter.FilterExpression] + ) -> typing.Optional[ast.filter.FilterExpression]: """Re-write a filter *query* to get (i.e., read) *node_type* nodes.""" return query diff --git a/bsfs/graph/graph.py b/bsfs/graph/graph.py index a356533..11fe835 100644 --- a/bsfs/graph/graph.py +++ b/bsfs/graph/graph.py @@ -113,6 +113,7 @@ class Graph(): *node_type*) once some data is assigned to them. """ + # get node type type_ = self.schema.node(node_type) # NOTE: Nodes constructor materializes guids. return _nodes.Nodes(self._backend, self._ac, type_, guids) @@ -120,15 +121,51 @@ class Graph(): def node(self, node_type: URI, guid: URI) -> _nodes.Nodes: """Return node *guid* of type *node_type* as a `bsfs.graph.Nodes` instance. - Note that the *guids* need not to exist (however, the *node_type* has + Note that the *guid* need not to exist (however, the *node_type* has to be part of the schema). An inexistent guid will be created (using *node_type*) once some data is assigned to them. """ return self.nodes(node_type, {guid}) - def get(self, node_type: URI, query: ast.filter.FilterExpression) -> _nodes.Nodes: # FIXME: How about empty query? - """Return a `Nodes` instance over all nodes of type *node_type* that match the *subject* query.""" + def empty(self, node_type: URI) -> _nodes.Nodes: + """Return a `Nodes` instance with type *node_type* but no nodes.""" + return self.nodes(node_type, set()) + + def get( + self, + node_type: URI, + query: typing.Optional[ast.filter.FilterExpression], + ) -> _nodes.Nodes: + """Return a `Nodes` instance over all nodes of type *node_type* that match the *query*.""" + # return Nodes instance + type_ = self.schema.node(node_type) + return _nodes.Nodes(self._backend, self._ac, type_, self.__get(node_type, query)) + + def sorted( + self, + node_type: URI, + query: typing.Optional[ast.filter.FilterExpression], + # FIXME: sort ast + ) -> typing.Iterator[_nodes.Nodes]: + """Return a iterator over `Nodes` instances over all nodes of type *node_type* that match the *query*.""" + # FIXME: Order should be a parameter + # return iterator over Nodes instances + type_ = self.schema.node(node_type) + for guid in self.__get(node_type, query): + yield _nodes.Nodes(self._backend, self._ac, type_, {guid}) + + def all(self, node_type: URI) -> _nodes.Nodes: + """Return all instances of type *node_type*.""" + type_ = self.schema.node(node_type) + return _nodes.Nodes(self._backend, self._ac, type_, self.__get(node_type, None)) + + def __get( + self, + node_type: URI, + query: typing.Optional[ast.filter.FilterExpression], + ) -> typing.Iterator[URI]: + """Build and execute a get query.""" # get node type type_ = self.schema.node(node_type) # resolve Nodes instances @@ -136,18 +173,9 @@ class Graph(): # add access controls to query query = self._ac.filter_read(type_, query) # validate query - self._validate(type_, query) - # query the backend - guids = self._backend.get(type_, query) # no need to materialize - # return Nodes instance - return _nodes.Nodes(self._backend, self._ac, type_, guids) - - def all(self, node_type: URI) -> _nodes.Nodes: - """Return all instances of type *node_type*.""" - # get node type - type_ = self.schema.node(node_type) - guids = self._backend.get(type_, None) # no need to materialize - return _nodes.Nodes(self._backend, self._ac, type_, guids) - + if query is not None: + self._validate(type_, query) + # query the backend and return the (non-materialized) result + return self._backend.get(type_, query) ## EOF ## diff --git a/bsfs/graph/resolve.py b/bsfs/graph/resolve.py index 4677401..b3ab001 100644 --- a/bsfs/graph/resolve.py +++ b/bsfs/graph/resolve.py @@ -40,8 +40,13 @@ class Filter(): def __init__(self, schema): self.schema = schema - def __call__(self, root_type: bsc.Node, node: ast.filter.FilterExpression): - # FIXME: node can be None! + def __call__( + self, + root_type: bsc.Node, + node: typing.Optional[ast.filter.FilterExpression], + ): + if node is None: + return None return self._parse_filter_expression(root_type, node) def _parse_filter_expression( -- cgit v1.2.3 From cd27775b406482b11f44575ab196501a30d9b075 Mon Sep 17 00:00:00 2001 From: Matthias Baumgartner Date: Thu, 2 Mar 2023 12:23:49 +0100 Subject: default sort order in sparql backend --- bsfs/triple_store/sparql/utils.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'bsfs') diff --git a/bsfs/triple_store/sparql/utils.py b/bsfs/triple_store/sparql/utils.py index deca4d8..51de893 100644 --- a/bsfs/triple_store/sparql/utils.py +++ b/bsfs/triple_store/sparql/utils.py @@ -127,11 +127,12 @@ class Query(): """Return an executable sparql query.""" select = ' '.join(f'({head} as ?{name})' for head, name in self.select) return f''' - SELECT {self.root_head} {select} + SELECT DISTINCT {self.root_head} {select} WHERE {{ {self.root_head} <{ns.rdf.type}>/<{ns.rdfs.subClassOf}>* <{self.root_type}> . {self.where} }} + ORDER BY str({self.root_head}) ''' def __call__(self, graph: rdflib.Graph) -> rdflib.query.Result: -- cgit v1.2.3 From 36d07cc6e0ec0f53001bfc5045437a374ebb895f Mon Sep 17 00:00:00 2001 From: Matthias Baumgartner Date: Thu, 2 Mar 2023 14:55:57 +0100 Subject: empty fetch result in Nodes --- bsfs/graph/nodes.py | 58 +++++++++++++++++++++++++++++------------------------ 1 file changed, 32 insertions(+), 26 deletions(-) (limited to 'bsfs') diff --git a/bsfs/graph/nodes.py b/bsfs/graph/nodes.py index 91cbb5d..c6bd5d8 100644 --- a/bsfs/graph/nodes.py +++ b/bsfs/graph/nodes.py @@ -249,32 +249,38 @@ class Nodes(): # add access controls to fetch fetch = self._ac.fetch_read(self.node_type, fetch) - # compose filter ast - filter = ast.filter.IsIn(self.guids) # pylint: disable=redefined-builtin - # add access controls to filter - filter = self._ac.filter_read(self.node_type, filter) - - # validate queries - validate.Filter(self._backend.schema)(self.node_type, filter) - validate.Fetch(self._backend.schema)(self.node_type, fetch) - - # process results, convert if need be - def triple_iter(): - # query the backend - triples = self._backend.fetch(self.node_type, filter, fetch) - # process triples - for root, name, raw in triples: - # get node - node = Nodes(self._backend, self._ac, self.node_type, {root}) - # get path - path, tail = name2path[name] - # covert raw to value - if isinstance(tail.range, bsc.Node): - value = Nodes(self._backend, self._ac, tail.range, {raw}) - else: - value = raw - # emit triple - yield node, path, value + if len(self._guids) == 0: + # shortcut: no need to query; no triples + # FIXME: if the Fetch query can given by the user, we might want to check its validity + def triple_iter(): + return [] + else: + # compose filter ast + filter = ast.filter.IsIn(self.guids) # pylint: disable=redefined-builtin + # add access controls to filter + filter = self._ac.filter_read(self.node_type, filter) # type: ignore [assignment] + + # validate queries + validate.Filter(self._backend.schema)(self.node_type, filter) + validate.Fetch(self._backend.schema)(self.node_type, fetch) + + # process results, convert if need be + def triple_iter(): + # query the backend + triples = self._backend.fetch(self.node_type, filter, fetch) + # process triples + for root, name, raw in triples: + # get node + node = Nodes(self._backend, self._ac, self.node_type, {root}) + # get path + path, tail = name2path[name] + # covert raw to value + if isinstance(tail.range, bsc.Node): + value = Nodes(self._backend, self._ac, tail.range, {raw}) + else: + value = raw + # emit triple + yield node, path, value # simplify by default view_kwargs['node'] = view_kwargs.get('node', len(self._guids) != 1) -- cgit v1.2.3 From 48fd909f502d25cbe7ef7732c44734f593c6e022 Mon Sep 17 00:00:00 2001 From: Matthias Baumgartner Date: Thu, 2 Mar 2023 14:57:49 +0100 Subject: README, CHANGELOG, and minor style fixes --- bsfs/apps/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'bsfs') diff --git a/bsfs/apps/__init__.py b/bsfs/apps/__init__.py index 3dec9ad..a85d5db 100644 --- a/bsfs/apps/__init__.py +++ b/bsfs/apps/__init__.py @@ -37,7 +37,7 @@ def main(argv=None): parser = argparse.ArgumentParser(description=main.__doc__, prog='bsfs') # version parser.add_argument('--version', action='version', - version='%(prog)s version {}.{}.{}'.format(*bsfs.version_info)) + version='%(prog)s version {}.{}.{}'.format(*bsfs.version_info)) # pylint: disable=C0209 # application selection parser.add_argument('app', choices=apps.keys(), help='Select the application to run.') -- cgit v1.2.3 From 2e07f33314c238e42bfadc5f39805f93ffbc622e Mon Sep 17 00:00:00 2001 From: Matthias Baumgartner Date: Thu, 2 Mar 2023 15:10:05 +0100 Subject: removed author and license notices from individual files --- bsfs/__init__.py | 5 ----- bsfs/apps/__init__.py | 5 ----- bsfs/apps/init.py | 5 ----- bsfs/apps/migrate.py | 5 ----- bsfs/front/__init__.py | 5 ----- bsfs/front/bsfs.py | 5 ----- bsfs/front/builder.py | 5 ----- bsfs/graph/__init__.py | 5 ----- bsfs/graph/ac/__init__.py | 5 ----- bsfs/graph/ac/base.py | 5 ----- bsfs/graph/ac/null.py | 5 ----- bsfs/graph/graph.py | 5 ----- bsfs/graph/nodes.py | 5 ----- bsfs/graph/resolve.py | 5 ----- bsfs/graph/result.py | 5 ----- bsfs/graph/walk.py | 5 ----- bsfs/namespace/__init__.py | 5 ----- bsfs/namespace/namespace.py | 5 ----- bsfs/namespace/predefined.py | 5 ----- bsfs/query/__init__.py | 5 ----- bsfs/query/ast/__init__.py | 3 --- bsfs/query/ast/fetch.py | 5 ----- bsfs/query/ast/filter_.py | 3 --- bsfs/query/matcher.py | 5 ----- bsfs/query/validator.py | 5 ----- bsfs/schema/__init__.py | 5 ----- bsfs/schema/schema.py | 5 ----- bsfs/schema/serialize.py | 5 ----- bsfs/schema/types.py | 5 ----- bsfs/triple_store/__init__.py | 5 ----- bsfs/triple_store/base.py | 5 ----- bsfs/triple_store/sparql/__init__.py | 5 ----- bsfs/triple_store/sparql/distance.py | 5 ----- bsfs/triple_store/sparql/parse_fetch.py | 5 ----- bsfs/triple_store/sparql/parse_filter.py | 5 ----- bsfs/triple_store/sparql/sparql.py | 5 ----- bsfs/triple_store/sparql/utils.py | 5 ----- bsfs/utils/__init__.py | 5 ----- bsfs/utils/commons.py | 5 ----- bsfs/utils/errors.py | 5 ----- bsfs/utils/uri.py | 5 ----- bsfs/utils/uuid.py | 5 ----- 42 files changed, 206 deletions(-) (limited to 'bsfs') diff --git a/bsfs/__init__.py b/bsfs/__init__.py index 079ffaf..cf08d64 100644 --- a/bsfs/__init__.py +++ b/bsfs/__init__.py @@ -1,9 +1,4 @@ -""" -Part of the BlackStar filesystem (bsfs) module. -A copy of the license is provided with the project. -Author: Matthias Baumgartner, 2022 -""" # imports import collections import typing diff --git a/bsfs/apps/__init__.py b/bsfs/apps/__init__.py index a85d5db..62dc5b5 100644 --- a/bsfs/apps/__init__.py +++ b/bsfs/apps/__init__.py @@ -1,10 +1,5 @@ #!/usr/bin/env python3 -""" -Part of the BlackStar filesystem (bsfs) module. -A copy of the license is provided with the project. -Author: Matthias Baumgartner, 2022 -""" # imports import argparse import typing diff --git a/bsfs/apps/init.py b/bsfs/apps/init.py index ec48525..9afbdd5 100644 --- a/bsfs/apps/init.py +++ b/bsfs/apps/init.py @@ -1,10 +1,5 @@ #!/usr/bin/env python3 -""" -Part of the BlackStar filesystem (bsfs) module. -A copy of the license is provided with the project. -Author: Matthias Baumgartner, 2022 -""" # imports import argparse import json diff --git a/bsfs/apps/migrate.py b/bsfs/apps/migrate.py index cb62542..34ea2e7 100644 --- a/bsfs/apps/migrate.py +++ b/bsfs/apps/migrate.py @@ -1,10 +1,5 @@ #!/usr/bin/env python3 -""" -Part of the BlackStar filesystem (bsfs) module. -A copy of the license is provided with the project. -Author: Matthias Baumgartner, 2022 -""" # imports import argparse import json diff --git a/bsfs/front/__init__.py b/bsfs/front/__init__.py index 92886ab..cedcd7f 100644 --- a/bsfs/front/__init__.py +++ b/bsfs/front/__init__.py @@ -1,9 +1,4 @@ -""" -Part of the BlackStar filesystem (bsfs) module. -A copy of the license is provided with the project. -Author: Matthias Baumgartner, 2022 -""" # imports import typing diff --git a/bsfs/front/bsfs.py b/bsfs/front/bsfs.py index 968b3f5..f437212 100644 --- a/bsfs/front/bsfs.py +++ b/bsfs/front/bsfs.py @@ -1,9 +1,4 @@ -""" -Part of the BlackStar filesystem (bsfs) module. -A copy of the license is provided with the project. -Author: Matthias Baumgartner, 2022 -""" # imports import typing diff --git a/bsfs/front/builder.py b/bsfs/front/builder.py index ecdc768..b1d488b 100644 --- a/bsfs/front/builder.py +++ b/bsfs/front/builder.py @@ -1,9 +1,4 @@ -""" -Part of the BlackStar filesystem (bsfs) module. -A copy of the license is provided with the project. -Author: Matthias Baumgartner, 2022 -""" # imports import typing diff --git a/bsfs/graph/__init__.py b/bsfs/graph/__init__.py index 82d2235..8d38d23 100644 --- a/bsfs/graph/__init__.py +++ b/bsfs/graph/__init__.py @@ -1,9 +1,4 @@ -""" -Part of the BlackStar filesystem (bsfs) module. -A copy of the license is provided with the project. -Author: Matthias Baumgartner, 2022 -""" # imports import typing diff --git a/bsfs/graph/ac/__init__.py b/bsfs/graph/ac/__init__.py index 420de01..11b45df 100644 --- a/bsfs/graph/ac/__init__.py +++ b/bsfs/graph/ac/__init__.py @@ -1,9 +1,4 @@ -""" -Part of the BlackStar filesystem (bsfs) module. -A copy of the license is provided with the project. -Author: Matthias Baumgartner, 2022 -""" # imports import typing diff --git a/bsfs/graph/ac/base.py b/bsfs/graph/ac/base.py index 2759557..e85c1dd 100644 --- a/bsfs/graph/ac/base.py +++ b/bsfs/graph/ac/base.py @@ -1,9 +1,4 @@ -""" -Part of the BlackStar filesystem (bsfs) module. -A copy of the license is provided with the project. -Author: Matthias Baumgartner, 2022 -""" # imports import abc import typing diff --git a/bsfs/graph/ac/null.py b/bsfs/graph/ac/null.py index e67b55d..3a391aa 100644 --- a/bsfs/graph/ac/null.py +++ b/bsfs/graph/ac/null.py @@ -1,9 +1,4 @@ -""" -Part of the BlackStar filesystem (bsfs) module. -A copy of the license is provided with the project. -Author: Matthias Baumgartner, 2022 -""" # imports import typing diff --git a/bsfs/graph/graph.py b/bsfs/graph/graph.py index 11fe835..ade51a5 100644 --- a/bsfs/graph/graph.py +++ b/bsfs/graph/graph.py @@ -1,9 +1,4 @@ -""" -Part of the BlackStar filesystem (bsfs) module. -A copy of the license is provided with the project. -Author: Matthias Baumgartner, 2022 -""" # imports import os import typing diff --git a/bsfs/graph/nodes.py b/bsfs/graph/nodes.py index c6bd5d8..c3530c1 100644 --- a/bsfs/graph/nodes.py +++ b/bsfs/graph/nodes.py @@ -1,9 +1,4 @@ -""" -Part of the BlackStar filesystem (bsfs) module. -A copy of the license is provided with the project. -Author: Matthias Baumgartner, 2022 -""" # imports from collections import abc import time diff --git a/bsfs/graph/resolve.py b/bsfs/graph/resolve.py index b3ab001..213ac4c 100644 --- a/bsfs/graph/resolve.py +++ b/bsfs/graph/resolve.py @@ -1,9 +1,4 @@ -""" -Part of the BlackStar filesystem (bsfs) module. -A copy of the license is provided with the project. -Author: Matthias Baumgartner, 2022 -""" # imports import typing diff --git a/bsfs/graph/result.py b/bsfs/graph/result.py index 31822f1..0fcbb13 100644 --- a/bsfs/graph/result.py +++ b/bsfs/graph/result.py @@ -1,9 +1,4 @@ -""" -Part of the BlackStar filesystem (bsfs) module. -A copy of the license is provided with the project. -Author: Matthias Baumgartner, 2022 -""" # imports from collections import defaultdict import typing diff --git a/bsfs/graph/walk.py b/bsfs/graph/walk.py index 1b1cfa0..6415c9b 100644 --- a/bsfs/graph/walk.py +++ b/bsfs/graph/walk.py @@ -1,9 +1,4 @@ -""" -Part of the BlackStar filesystem (bsfs) module. -A copy of the license is provided with the project. -Author: Matthias Baumgartner, 2022 -""" # imports from collections import abc import typing diff --git a/bsfs/namespace/__init__.py b/bsfs/namespace/__init__.py index 98d472f..1784808 100644 --- a/bsfs/namespace/__init__.py +++ b/bsfs/namespace/__init__.py @@ -1,9 +1,4 @@ -""" -Part of the BlackStar filesystem (bsfs) module. -A copy of the license is provided with the project. -Author: Matthias Baumgartner, 2022 -""" # imports import typing diff --git a/bsfs/namespace/namespace.py b/bsfs/namespace/namespace.py index 1d443c1..0a62b78 100644 --- a/bsfs/namespace/namespace.py +++ b/bsfs/namespace/namespace.py @@ -1,9 +1,4 @@ -""" -Part of the BlackStar filesystem (bsfs) module. -A copy of the license is provided with the project. -Author: Matthias Baumgartner, 2022 -""" # imports import typing diff --git a/bsfs/namespace/predefined.py b/bsfs/namespace/predefined.py index cd48a46..15f12ac 100644 --- a/bsfs/namespace/predefined.py +++ b/bsfs/namespace/predefined.py @@ -1,9 +1,4 @@ -""" -Part of the BlackStar filesystem (bsfs) module. -A copy of the license is provided with the project. -Author: Matthias Baumgartner, 2022 -""" # imports import typing diff --git a/bsfs/query/__init__.py b/bsfs/query/__init__.py index 21c7389..58ff03a 100644 --- a/bsfs/query/__init__.py +++ b/bsfs/query/__init__.py @@ -1,9 +1,4 @@ -""" -Part of the BlackStar filesystem (bsfs) module. -A copy of the license is provided with the project. -Author: Matthias Baumgartner, 2022 -""" # imports import typing diff --git a/bsfs/query/ast/__init__.py b/bsfs/query/ast/__init__.py index 66b097d..bceaac0 100644 --- a/bsfs/query/ast/__init__.py +++ b/bsfs/query/ast/__init__.py @@ -6,9 +6,6 @@ Classes beginning with an underscore (_) represent internal type hierarchies and should not be used for parsing. Note that the AST structures do not (and cannot) check semantic validity or consistency with a given schema. -Part of the BlackStar filesystem (bsfs) module. -A copy of the license is provided with the project. -Author: Matthias Baumgartner, 2022 """ # imports import typing diff --git a/bsfs/query/ast/fetch.py b/bsfs/query/ast/fetch.py index d653a8a..66d94e1 100644 --- a/bsfs/query/ast/fetch.py +++ b/bsfs/query/ast/fetch.py @@ -1,9 +1,4 @@ -""" -Part of the BlackStar filesystem (bsfs) module. -A copy of the license is provided with the project. -Author: Matthias Baumgartner, 2022 -""" # imports from collections import abc import typing diff --git a/bsfs/query/ast/filter_.py b/bsfs/query/ast/filter_.py index b29d89e..56c982e 100644 --- a/bsfs/query/ast/filter_.py +++ b/bsfs/query/ast/filter_.py @@ -22,9 +22,6 @@ This AST has multiple issues that are not verified upon its creation: * Conditions exclude each other * The predicate along the branch have incompatible domains and ranges. -Part of the BlackStar filesystem (bsfs) module. -A copy of the license is provided with the project. -Author: Matthias Baumgartner, 2022 """ # imports from collections import abc diff --git a/bsfs/query/matcher.py b/bsfs/query/matcher.py index a910756..5f3b07e 100644 --- a/bsfs/query/matcher.py +++ b/bsfs/query/matcher.py @@ -1,9 +1,4 @@ -""" -Part of the BlackStar filesystem (bsfs) module. -A copy of the license is provided with the project. -Author: Matthias Baumgartner, 2022 -""" # imports from collections import defaultdict from itertools import product diff --git a/bsfs/query/validator.py b/bsfs/query/validator.py index f0aa795..6e3afa1 100644 --- a/bsfs/query/validator.py +++ b/bsfs/query/validator.py @@ -1,9 +1,4 @@ -""" -Part of the BlackStar filesystem (bsfs) module. -A copy of the license is provided with the project. -Author: Matthias Baumgartner, 2022 -""" # imports import typing diff --git a/bsfs/schema/__init__.py b/bsfs/schema/__init__.py index f53512e..ca2e0cd 100644 --- a/bsfs/schema/__init__.py +++ b/bsfs/schema/__init__.py @@ -1,9 +1,4 @@ -""" -Part of the BlackStar filesystem (bsfs) module. -A copy of the license is provided with the project. -Author: Matthias Baumgartner, 2022 -""" # imports import typing diff --git a/bsfs/schema/schema.py b/bsfs/schema/schema.py index 0de4203..c104436 100644 --- a/bsfs/schema/schema.py +++ b/bsfs/schema/schema.py @@ -1,9 +1,4 @@ -""" -Part of the BlackStar filesystem (bsfs) module. -A copy of the license is provided with the project. -Author: Matthias Baumgartner, 2022 -""" # imports from collections import abc, namedtuple import typing diff --git a/bsfs/schema/serialize.py b/bsfs/schema/serialize.py index acc009a..b05b289 100644 --- a/bsfs/schema/serialize.py +++ b/bsfs/schema/serialize.py @@ -1,9 +1,4 @@ -""" -Part of the BlackStar filesystem (bsfs) module. -A copy of the license is provided with the project. -Author: Matthias Baumgartner, 2022 -""" # standard imports import itertools import typing diff --git a/bsfs/schema/types.py b/bsfs/schema/types.py index 12e7e94..54adffb 100644 --- a/bsfs/schema/types.py +++ b/bsfs/schema/types.py @@ -1,9 +1,4 @@ -""" -Part of the BlackStar filesystem (bsfs) module. -A copy of the license is provided with the project. -Author: Matthias Baumgartner, 2022 -""" # imports import typing diff --git a/bsfs/triple_store/__init__.py b/bsfs/triple_store/__init__.py index fb5a8a9..79a2887 100644 --- a/bsfs/triple_store/__init__.py +++ b/bsfs/triple_store/__init__.py @@ -1,9 +1,4 @@ -""" -Part of the BlackStar filesystem (bsfs) module. -A copy of the license is provided with the project. -Author: Matthias Baumgartner, 2022 -""" # imports import typing diff --git a/bsfs/triple_store/base.py b/bsfs/triple_store/base.py index 1baa63b..58b5670 100644 --- a/bsfs/triple_store/base.py +++ b/bsfs/triple_store/base.py @@ -1,9 +1,4 @@ -""" -Part of the BlackStar filesystem (bsfs) module. -A copy of the license is provided with the project. -Author: Matthias Baumgartner, 2022 -""" # imports import abc import typing diff --git a/bsfs/triple_store/sparql/__init__.py b/bsfs/triple_store/sparql/__init__.py index 285334a..cfa2732 100644 --- a/bsfs/triple_store/sparql/__init__.py +++ b/bsfs/triple_store/sparql/__init__.py @@ -1,9 +1,4 @@ -""" -Part of the BlackStar filesystem (bsfs) module. -A copy of the license is provided with the project. -Author: Matthias Baumgartner, 2022 -""" # imports import typing diff --git a/bsfs/triple_store/sparql/distance.py b/bsfs/triple_store/sparql/distance.py index 2f5387a..9b58088 100644 --- a/bsfs/triple_store/sparql/distance.py +++ b/bsfs/triple_store/sparql/distance.py @@ -1,9 +1,4 @@ -""" -Part of the BlackStar filesystem (bsfs) module. -A copy of the license is provided with the project. -Author: Matthias Baumgartner, 2022 -""" # standard imports import typing diff --git a/bsfs/triple_store/sparql/parse_fetch.py b/bsfs/triple_store/sparql/parse_fetch.py index 20d4e74..fab8173 100644 --- a/bsfs/triple_store/sparql/parse_fetch.py +++ b/bsfs/triple_store/sparql/parse_fetch.py @@ -1,9 +1,4 @@ -""" -Part of the BlackStar filesystem (bsfs) module. -A copy of the license is provided with the project. -Author: Matthias Baumgartner, 2022 -""" # standard imports import typing diff --git a/bsfs/triple_store/sparql/parse_filter.py b/bsfs/triple_store/sparql/parse_filter.py index dca0aea..ff22de2 100644 --- a/bsfs/triple_store/sparql/parse_filter.py +++ b/bsfs/triple_store/sparql/parse_filter.py @@ -1,9 +1,4 @@ -""" -Part of the BlackStar filesystem (bsfs) module. -A copy of the license is provided with the project. -Author: Matthias Baumgartner, 2022 -""" # imports import operator import typing diff --git a/bsfs/triple_store/sparql/sparql.py b/bsfs/triple_store/sparql/sparql.py index dbf9d45..5890bcc 100644 --- a/bsfs/triple_store/sparql/sparql.py +++ b/bsfs/triple_store/sparql/sparql.py @@ -1,9 +1,4 @@ -""" -Part of the BlackStar filesystem (bsfs) module. -A copy of the license is provided with the project. -Author: Matthias Baumgartner, 2022 -""" # imports import base64 import itertools diff --git a/bsfs/triple_store/sparql/utils.py b/bsfs/triple_store/sparql/utils.py index 51de893..38062c2 100644 --- a/bsfs/triple_store/sparql/utils.py +++ b/bsfs/triple_store/sparql/utils.py @@ -1,9 +1,4 @@ -""" -Part of the BlackStar filesystem (bsfs) module. -A copy of the license is provided with the project. -Author: Matthias Baumgartner, 2022 -""" # standard imports import typing diff --git a/bsfs/utils/__init__.py b/bsfs/utils/__init__.py index 6737cef..d497645 100644 --- a/bsfs/utils/__init__.py +++ b/bsfs/utils/__init__.py @@ -1,9 +1,4 @@ -""" -Part of the BlackStar filesystem (bsfs) module. -A copy of the license is provided with the project. -Author: Matthias Baumgartner, 2022 -""" # imports import typing diff --git a/bsfs/utils/commons.py b/bsfs/utils/commons.py index e9f0b7f..a7092ae 100644 --- a/bsfs/utils/commons.py +++ b/bsfs/utils/commons.py @@ -1,9 +1,4 @@ -""" -Part of the BlackStar filesystem (bsfs) module. -A copy of the license is provided with the project. -Author: Matthias Baumgartner, 2022 -""" # imports from collections import abc import typing diff --git a/bsfs/utils/errors.py b/bsfs/utils/errors.py index 6ae6484..b82e6e2 100644 --- a/bsfs/utils/errors.py +++ b/bsfs/utils/errors.py @@ -1,9 +1,4 @@ -""" -Part of the BlackStar filesystem (bsfs) module. -A copy of the license is provided with the project. -Author: Matthias Baumgartner, 2022 -""" # imports import typing diff --git a/bsfs/utils/uri.py b/bsfs/utils/uri.py index 84854a4..0693017 100644 --- a/bsfs/utils/uri.py +++ b/bsfs/utils/uri.py @@ -1,9 +1,4 @@ -""" -Part of the BlackStar filesystem (bsfs) module. -A copy of the license is provided with the project. -Author: Matthias Baumgartner, 2022 -""" # imports import re import typing diff --git a/bsfs/utils/uuid.py b/bsfs/utils/uuid.py index 70e1656..ad7fc1c 100644 --- a/bsfs/utils/uuid.py +++ b/bsfs/utils/uuid.py @@ -1,9 +1,4 @@ -""" -Part of the BlackStar filesystem (bsfs) module. -A copy of the license is provided with the project. -Author: Matthias Baumgartner, 2022 -""" # imports from collections import abc import hashlib -- cgit v1.2.3 From b66ed641d5cbb4cb83f4a571223e4d65d80ed05c Mon Sep 17 00:00:00 2001 From: Matthias Baumgartner Date: Thu, 2 Mar 2023 15:29:12 +0100 Subject: check non-serializable URIs in the sparql store --- bsfs/triple_store/sparql/parse_filter.py | 2 ++ bsfs/triple_store/sparql/sparql.py | 6 ++++++ 2 files changed, 8 insertions(+) (limited to 'bsfs') diff --git a/bsfs/triple_store/sparql/parse_filter.py b/bsfs/triple_store/sparql/parse_filter.py index ff22de2..8959b2c 100644 --- a/bsfs/triple_store/sparql/parse_filter.py +++ b/bsfs/triple_store/sparql/parse_filter.py @@ -267,6 +267,8 @@ class Filter(): """ if not isinstance(node_type, bsc.Node): raise errors.BackendError(f'expected Node, found {node_type}') + if not rdflib.term._is_valid_uri(node.value): # pylint: disable=protected-access + raise errors.BackendError(f'<{node.value}> is not a serializable uri') return f'VALUES {head} {{ <{node.value}> }}' def _equals(self, node_type: bsc.Vertex, node: ast.filter.Equals, head: str) -> str: diff --git a/bsfs/triple_store/sparql/sparql.py b/bsfs/triple_store/sparql/sparql.py index 5890bcc..bd98f46 100644 --- a/bsfs/triple_store/sparql/sparql.py +++ b/bsfs/triple_store/sparql/sparql.py @@ -284,6 +284,9 @@ class SparqlStore(base.TripleStoreBase): raise errors.ConsistencyError(f'{node_type} is not defined in the schema') # check and create guids for guid in guids: + # check convert to rdflib.URIRef + if not rdflib.term._is_valid_uri(guid): # pylint: disable=protected-access + raise ValueError(guids) subject = rdflib.URIRef(guid) # check node existence if (subject, rdflib.RDF.type, None) in self._graph: @@ -324,6 +327,9 @@ class SparqlStore(base.TripleStoreBase): # check guids # FIXME: Fail or skip inexistent nodes? guids = set(guids) + invalid = {guid for guid in guids if not rdflib.term._is_valid_uri(guid)} # pylint: disable=protected-access + if len(invalid) > 0: + raise ValueError(invalid) inconsistent = {guid for guid in guids if not self._has_type(guid, node_type)} if len(inconsistent) > 0: raise errors.InstanceError(inconsistent) -- cgit v1.2.3 From 28a021483c13e974e00b6159f0653b0727df9d10 Mon Sep 17 00:00:00 2001 From: Matthias Baumgartner Date: Thu, 2 Mar 2023 16:40:00 +0100 Subject: prohibit certain characters in URI and ensure URIs in bsfs.graph --- bsfs/graph/nodes.py | 5 ++--- bsfs/schema/types.py | 2 +- bsfs/triple_store/sparql/parse_filter.py | 6 ++---- bsfs/triple_store/sparql/sparql.py | 10 ++-------- bsfs/utils/uri.py | 8 +++++--- 5 files changed, 12 insertions(+), 19 deletions(-) (limited to 'bsfs') diff --git a/bsfs/graph/nodes.py b/bsfs/graph/nodes.py index c3530c1..84996c7 100644 --- a/bsfs/graph/nodes.py +++ b/bsfs/graph/nodes.py @@ -52,9 +52,8 @@ class Nodes(): self._backend = backend self._ac = access_control self._node_type = node_type - self._guids = set(guids) - # create helper instances - # FIXME: Assumes that the schema does not change while the instance is in use! + # convert to URI since this is not guaranteed by Graph + self._guids = {URI(guid) for guid in guids} def __eq__(self, other: typing.Any) -> bool: return isinstance(other, Nodes) \ diff --git a/bsfs/schema/types.py b/bsfs/schema/types.py index 54adffb..104580d 100644 --- a/bsfs/schema/types.py +++ b/bsfs/schema/types.py @@ -98,7 +98,7 @@ class _Type(): parent: typing.Optional['_Type'] = None, **annotations: typing.Any, ): - self.uri = uri + self.uri = URI(uri) self.parent = parent self.annotations = annotations diff --git a/bsfs/triple_store/sparql/parse_filter.py b/bsfs/triple_store/sparql/parse_filter.py index 8959b2c..bf19a02 100644 --- a/bsfs/triple_store/sparql/parse_filter.py +++ b/bsfs/triple_store/sparql/parse_filter.py @@ -154,7 +154,7 @@ class Filter(): puri = f'<{puri}>' # type: ignore [assignment] # variable re-use confuses mypy # apply reverse flag if node.reverse: - puri = URI('^' + puri) + puri = '^' + puri dom, rng = rng, dom # type: ignore [assignment] # variable re-use confuses mypy # check path consistency if not node_type <= dom: @@ -267,9 +267,7 @@ class Filter(): """ if not isinstance(node_type, bsc.Node): raise errors.BackendError(f'expected Node, found {node_type}') - if not rdflib.term._is_valid_uri(node.value): # pylint: disable=protected-access - raise errors.BackendError(f'<{node.value}> is not a serializable uri') - return f'VALUES {head} {{ <{node.value}> }}' + return f'VALUES {head} {{ <{URI(node.value)}> }}' def _equals(self, node_type: bsc.Vertex, node: ast.filter.Equals, head: str) -> str: """ diff --git a/bsfs/triple_store/sparql/sparql.py b/bsfs/triple_store/sparql/sparql.py index bd98f46..68c0027 100644 --- a/bsfs/triple_store/sparql/sparql.py +++ b/bsfs/triple_store/sparql/sparql.py @@ -284,10 +284,7 @@ class SparqlStore(base.TripleStoreBase): raise errors.ConsistencyError(f'{node_type} is not defined in the schema') # check and create guids for guid in guids: - # check convert to rdflib.URIRef - if not rdflib.term._is_valid_uri(guid): # pylint: disable=protected-access - raise ValueError(guids) - subject = rdflib.URIRef(guid) + subject = rdflib.URIRef(URI(guid)) # check node existence if (subject, rdflib.RDF.type, None) in self._graph: # FIXME: node exists and may have a different type! ignore? raise? report? @@ -326,10 +323,7 @@ class SparqlStore(base.TripleStoreBase): raise errors.InstanceError(inconsistent) # check guids # FIXME: Fail or skip inexistent nodes? - guids = set(guids) - invalid = {guid for guid in guids if not rdflib.term._is_valid_uri(guid)} # pylint: disable=protected-access - if len(invalid) > 0: - raise ValueError(invalid) + guids = {URI(guid) for guid in guids} inconsistent = {guid for guid in guids if not self._has_type(guid, node_type)} if len(inconsistent) > 0: raise errors.InstanceError(inconsistent) diff --git a/bsfs/utils/uri.py b/bsfs/utils/uri.py index 0693017..5755a6e 100644 --- a/bsfs/utils/uri.py +++ b/bsfs/utils/uri.py @@ -4,6 +4,8 @@ import re import typing # constants +RX_CHARS = re.compile(r'[<>" {}|\\^]') + RX_URI = re.compile(r''' ^ (?:(?P[^:/?#]+):)? # scheme, ://-delimited @@ -77,6 +79,9 @@ class URI(str): no claim about the validity of an URI! """ + # check characters + if RX_CHARS.search(query) is not None: + return False # check uri parts = RX_URI.match(query) if parts is not None: @@ -227,9 +232,6 @@ class URI(str): # overload formatting methods - def format(self, *args, **kwargs) -> 'URI': - return URI(super().format(*args, **kwargs)) - def __mod__(self, *args) -> 'URI': return URI(super().__mod__(*args)) -- cgit v1.2.3 From 6b9379d75198082054c35e44bc2cd880353a7485 Mon Sep 17 00:00:00 2001 From: Matthias Baumgartner Date: Thu, 2 Mar 2023 16:40:43 +0100 Subject: hardening --- bsfs/graph/graph.py | 16 ++-------------- bsfs/graph/nodes.py | 8 +++++--- bsfs/graph/resolve.py | 8 ++++++++ bsfs/query/validator.py | 10 +++++++++- 4 files changed, 24 insertions(+), 18 deletions(-) (limited to 'bsfs') diff --git a/bsfs/graph/graph.py b/bsfs/graph/graph.py index ade51a5..1b4c212 100644 --- a/bsfs/graph/graph.py +++ b/bsfs/graph/graph.py @@ -36,12 +36,6 @@ class Graph(): # access controls. _ac: ac.AccessControlBase - # query resolver. - _resolver: resolve.Filter - - # query validator. - _validate: validate.Filter - def __init__( self, backend: TripleStoreBase, @@ -50,9 +44,6 @@ class Graph(): # store members self._backend = backend self._ac = access_control - # helper classes - self._resolver = resolve.Filter(self._backend.schema) - self._validate = validate.Filter(self._backend.schema) # ensure Graph schema requirements self.migrate(self._backend.schema) @@ -94,9 +85,6 @@ class Graph(): # migrate schema in backend # FIXME: consult access controls! self._backend.schema = schema - # re-initialize members - self._resolver.schema = self.schema - self._validate.schema = self.schema # return self return self @@ -164,12 +152,12 @@ class Graph(): # get node type type_ = self.schema.node(node_type) # resolve Nodes instances - query = self._resolver(type_, query) + query = resolve.Filter(self._backend.schema).resolve(type_, query) # add access controls to query query = self._ac.filter_read(type_, query) # validate query if query is not None: - self._validate(type_, query) + validate.Filter(self._backend.schema).validate(type_, query) # query the backend and return the (non-materialized) result return self._backend.get(type_, query) diff --git a/bsfs/graph/nodes.py b/bsfs/graph/nodes.py index 84996c7..74f4c4f 100644 --- a/bsfs/graph/nodes.py +++ b/bsfs/graph/nodes.py @@ -25,7 +25,9 @@ __all__: typing.Sequence[str] = ( ## code ## class Nodes(): - """ + """Container for graph nodes, provides operations on nodes. + + NOTE: Should not be created directly but only via `bsfs.graph.Graph`. NOTE: guids may or may not exist. This is not verified as nodes are created on demand. """ @@ -255,8 +257,8 @@ class Nodes(): filter = self._ac.filter_read(self.node_type, filter) # type: ignore [assignment] # validate queries - validate.Filter(self._backend.schema)(self.node_type, filter) - validate.Fetch(self._backend.schema)(self.node_type, fetch) + validate.Filter(self._backend.schema).validate(self.node_type, filter) + validate.Fetch(self._backend.schema).validate(self.node_type, fetch) # process results, convert if need be def triple_iter(): diff --git a/bsfs/graph/resolve.py b/bsfs/graph/resolve.py index 213ac4c..0ba1e36 100644 --- a/bsfs/graph/resolve.py +++ b/bsfs/graph/resolve.py @@ -40,6 +40,14 @@ class Filter(): root_type: bsc.Node, node: typing.Optional[ast.filter.FilterExpression], ): + """Alias for `Resolve.resolve`.""" + return self.resolve(root_type, node) + + def resolve( + self, + root_type: bsc.Node, + node: typing.Optional[ast.filter.FilterExpression], + ): if node is None: return None return self._parse_filter_expression(root_type, node) diff --git a/bsfs/query/validator.py b/bsfs/query/validator.py index 6e3afa1..b259ea0 100644 --- a/bsfs/query/validator.py +++ b/bsfs/query/validator.py @@ -37,6 +37,10 @@ class Filter(): self.schema = schema def __call__(self, root_type: bsc.Node, query: ast.filter.FilterExpression): + """Alias for `Filter.validate`.""" + return self.validate(root_type, query) + + def validate(self, root_type: bsc.Node, query: ast.filter.FilterExpression): """Validate a filter *query*, assuming the subject having *root_type*. Raises a `bsfs.utils.errors.ConsistencyError` if the query violates the schema. @@ -237,7 +241,11 @@ class Fetch(): def __init__(self, schema: bsc.Schema): self.schema = schema - def __call__(self, root_type: bsc.Node, query: ast.fetch.FetchExpression): + def __call__(self, root_type: bsc.Node, query: ast.filter.FilterExpression): + """Alias for `Fetch.validate`.""" + return self.validate(root_type, query) + + def validate(self, root_type: bsc.Node, query: ast.fetch.FetchExpression): """Validate a fetch *query*, assuming the subject having *root_type*. Raises a `bsfs.utils.errors.ConsistencyError` if the query violates the schema. -- cgit v1.2.3 From 2c6c23f85e7f2123c508f9ff8a4aa776948bb589 Mon Sep 17 00:00:00 2001 From: Matthias Baumgartner Date: Thu, 2 Mar 2023 16:46:11 +0100 Subject: minor style fixes --- bsfs/graph/resolve.py | 1 + bsfs/query/validator.py | 8 ++++---- bsfs/triple_store/sparql/parse_filter.py | 6 +++--- 3 files changed, 8 insertions(+), 7 deletions(-) (limited to 'bsfs') diff --git a/bsfs/graph/resolve.py b/bsfs/graph/resolve.py index 0ba1e36..95dcfc1 100644 --- a/bsfs/graph/resolve.py +++ b/bsfs/graph/resolve.py @@ -48,6 +48,7 @@ class Filter(): root_type: bsc.Node, node: typing.Optional[ast.filter.FilterExpression], ): + """Resolve Nodes instances of a *node* query starting at *root_type*.""" if node is None: return None return self._parse_filter_expression(root_type, node) diff --git a/bsfs/query/validator.py b/bsfs/query/validator.py index b259ea0..1ce44e9 100644 --- a/bsfs/query/validator.py +++ b/bsfs/query/validator.py @@ -36,11 +36,11 @@ class Filter(): def __init__(self, schema: bsc.Schema): self.schema = schema - def __call__(self, root_type: bsc.Node, query: ast.filter.FilterExpression): + def __call__(self, root_type: bsc.Node, query: ast.filter.FilterExpression) -> bool: """Alias for `Filter.validate`.""" return self.validate(root_type, query) - def validate(self, root_type: bsc.Node, query: ast.filter.FilterExpression): + def validate(self, root_type: bsc.Node, query: ast.filter.FilterExpression) -> bool: """Validate a filter *query*, assuming the subject having *root_type*. Raises a `bsfs.utils.errors.ConsistencyError` if the query violates the schema. @@ -241,11 +241,11 @@ class Fetch(): def __init__(self, schema: bsc.Schema): self.schema = schema - def __call__(self, root_type: bsc.Node, query: ast.filter.FilterExpression): + def __call__(self, root_type: bsc.Node, query: ast.fetch.FetchExpression) -> bool: """Alias for `Fetch.validate`.""" return self.validate(root_type, query) - def validate(self, root_type: bsc.Node, query: ast.fetch.FetchExpression): + def validate(self, root_type: bsc.Node, query: ast.fetch.FetchExpression) -> bool: """Validate a fetch *query*, assuming the subject having *root_type*. Raises a `bsfs.utils.errors.ConsistencyError` if the query violates the schema. diff --git a/bsfs/triple_store/sparql/parse_filter.py b/bsfs/triple_store/sparql/parse_filter.py index bf19a02..2f5a25b 100644 --- a/bsfs/triple_store/sparql/parse_filter.py +++ b/bsfs/triple_store/sparql/parse_filter.py @@ -151,16 +151,16 @@ class Filter(): raise errors.BackendError(f'the range of predicate {pred} is undefined') dom, rng = pred.domain, pred.range # encapsulate predicate uri - puri = f'<{puri}>' # type: ignore [assignment] # variable re-use confuses mypy + uri_str = f'<{puri}>' # apply reverse flag if node.reverse: - puri = '^' + puri + uri_str = '^' + uri_str dom, rng = rng, dom # type: ignore [assignment] # variable re-use confuses mypy # check path consistency if not node_type <= dom: raise errors.ConsistencyError(f'expected type {dom} or subtype thereof, found {node_type}') # return predicate URI and next node type - return puri, rng + return uri_str, rng def _any(self, node_type: bsc.Vertex, node: ast.filter.Any, head: str) -> str: """ -- cgit v1.2.3 From 4fead04055be4967d9ea3b24ff61fe37a93108dd Mon Sep 17 00:00:00 2001 From: Matthias Baumgartner Date: Sat, 4 Mar 2023 13:31:11 +0100 Subject: namespace refactoring and cleanup --- bsfs/graph/ac/null.py | 2 +- bsfs/graph/nodes.py | 4 +- bsfs/graph/resolve.py | 4 +- bsfs/graph/schema.nt | 11 ++-- bsfs/namespace/__init__.py | 3 +- bsfs/namespace/namespace.py | 97 ++++++++++-------------------------- bsfs/namespace/predefined.py | 27 +++++----- bsfs/query/ast/filter_.py | 3 +- bsfs/query/matcher.py | 4 +- bsfs/query/validator.py | 4 +- bsfs/schema/serialize.py | 15 +++--- bsfs/schema/types.py | 14 +++--- bsfs/triple_store/sparql/distance.py | 6 +-- bsfs/triple_store/sparql/sparql.py | 6 +-- 14 files changed, 78 insertions(+), 122 deletions(-) (limited to 'bsfs') diff --git a/bsfs/graph/ac/null.py b/bsfs/graph/ac/null.py index 3a391aa..c9ec7d0 100644 --- a/bsfs/graph/ac/null.py +++ b/bsfs/graph/ac/null.py @@ -24,7 +24,7 @@ class NullAC(base.AccessControlBase): def is_protected_predicate(self, pred: schema.Predicate) -> bool: """Return True if a predicate cannot be modified manually.""" - return pred.uri == ns.bsm.t_created + return pred.uri == ns.bsn.t_created def create(self, node_type: schema.Node, guids: typing.Iterable[URI]): """Perform post-creation operations on nodes, e.g. ownership information.""" diff --git a/bsfs/graph/nodes.py b/bsfs/graph/nodes.py index 74f4c4f..47b0217 100644 --- a/bsfs/graph/nodes.py +++ b/bsfs/graph/nodes.py @@ -170,7 +170,7 @@ class Nodes(): self._backend.commit() except ( - errors.PermissionDeniedError, # tried to set a protected predicate (ns.bsm.t_created) + errors.PermissionDeniedError, # tried to set a protected predicate errors.ConsistencyError, # node types are not in the schema or don't match the predicate errors.InstanceError, # guids/values don't have the correct type TypeError, # value is supposed to be a Nodes instance @@ -394,7 +394,7 @@ class Nodes(): self._backend.create(node_type, missing) # add bookkeeping triples self._backend.set(node_type, missing, - self._backend.schema.predicate(ns.bsm.t_created), [time.time()]) + self._backend.schema.predicate(ns.bsn.t_created), [time.time()]) # add permission triples self._ac.create(node_type, missing) # return available nodes diff --git a/bsfs/graph/resolve.py b/bsfs/graph/resolve.py index 95dcfc1..a58eb67 100644 --- a/bsfs/graph/resolve.py +++ b/bsfs/graph/resolve.py @@ -27,8 +27,8 @@ class Filter(): input: Any(ns.bse.tag, Is(Nodes(...))) output: Any(ns.bse.tag, Or(Is(...), Is(...), ...))) - >>> tags = graph.node(ns.bsfs.Tag, 'http://example.com/me/tag#1234') - >>> graph.get(ns.bsfs.Entity, ast.filter.Any(ns.bse.tag, ast.filter.Is(tags))) + >>> tags = graph.node(ns.bsn.Tag, 'http://example.com/me/tag#1234') + >>> graph.get(ns.bsn.Entity, ast.filter.Any(ns.bse.tag, ast.filter.Is(tags))) """ diff --git a/bsfs/graph/schema.nt b/bsfs/graph/schema.nt index cba5e80..37bba5e 100644 --- a/bsfs/graph/schema.nt +++ b/bsfs/graph/schema.nt @@ -4,15 +4,16 @@ prefix rdfs: prefix xsd: # bsfs prefixes -prefix bsfs: -prefix bsm: +prefix bsfs: +prefix bsl: +prefix bsn: # literals -bsfs:Number rdfs:subClassOf bsfs:Literal . -xsd:float rdfs:subClassOf bsfs:Number . +bsl:Number rdfs:subClassOf bsfs:Literal . +xsd:float rdfs:subClassOf bsl:Number . # predicates -bsm:t_created rdfs:subClassOf bsfs:Predicate ; +bsn:t_created rdfs:subClassOf bsfs:Predicate ; rdfs:domain bsfs:Node ; rdfs:range xsd:float ; bsfs:unique "true"^^xsd:boolean . diff --git a/bsfs/namespace/__init__.py b/bsfs/namespace/__init__.py index 1784808..76f39a2 100644 --- a/bsfs/namespace/__init__.py +++ b/bsfs/namespace/__init__.py @@ -4,11 +4,10 @@ import typing # inner-module imports from . import predefined as ns -from .namespace import ClosedNamespace, Namespace +from .namespace import Namespace # exports __all__: typing.Sequence[str] = ( - 'ClosedNamespace', 'Namespace', 'ns', ) diff --git a/bsfs/namespace/namespace.py b/bsfs/namespace/namespace.py index 0a62b78..b388f53 100644 --- a/bsfs/namespace/namespace.py +++ b/bsfs/namespace/namespace.py @@ -3,97 +3,52 @@ import typing # bsfs imports -from bsfs.utils import URI, typename +from bsfs.utils import URI # exports __all__: typing.Sequence[str] = ( - 'ClosedNamespace', 'Namespace', + 'FinalNamespace', ) ## code ## -class Namespace(): - """A namespace consists of a common prefix that is used in a set of URIs. - Note that the prefix must include the separator between - path and fragment (typically a '#' or a '/'). - """ - - # namespace prefix. - prefix: URI - - # fragment separator. - fsep: str - - # path separator. - psep: str - - def __init__(self, prefix: URI, fsep: str = '#', psep: str = '/'): - # ensure prefix type - prefix = URI(prefix) - # truncate fragment separator - while prefix.endswith(fsep): - prefix = URI(prefix[:-1]) - # truncate path separator - while prefix.endswith(psep): - prefix = URI(prefix[:-1]) - # store members - self.prefix = prefix - self.fsep = fsep - self.psep = psep - - def __eq__(self, other: typing.Any) -> bool: - return isinstance(other, type(self)) \ - and self.prefix == other.prefix \ - and self.fsep == other.fsep \ - and self.psep == other.psep +class Namespace(URI): + """The Namespace allows you to incrementally append path segments to an URI. - def __hash__(self) -> int: - return hash((type(self), self.prefix, self.fsep, self.psep)) + Segments are separated by `Namespace.sep` ('/'). + The `__call__` method signals that the URI is complete until the query part. - def __str__(self) -> str: - return str(self.prefix) - - def __repr__(self) -> str: - return f'{typename(self)}({self.prefix}, {self.fsep}, {self.psep})' - - def __getattr__(self, fragment: str) -> URI: - """Return prefix + fragment.""" - return URI(self.prefix + self.fsep + fragment) - - def __getitem__(self, fragment: str) -> URI: - """Alias for getattr(self, fragment).""" - return self.__getattr__(fragment) + """ - def __add__(self, value: typing.Any) -> 'Namespace': - """Concatenate another namespace to this one.""" - if not isinstance(value, str): - return NotImplemented - return Namespace(self.prefix + self.psep + value, self.fsep, self.psep) + # path separator + sep: str = '/' + def __getattr__(self, query: str) -> 'Namespace': + """Append the *query* to the current value and return as Namespace.""" + return Namespace(self + self.sep + query) -class ClosedNamespace(Namespace): - """Namespace that covers a restricted set of URIs.""" + def __call__(self, sep: str = '#') -> 'FinalNamespace': + """Finalize the namespace.""" + return FinalNamespace(self, sep) - # set of permissible fragments. - fragments: typing.Set[str] - def __init__(self, prefix: URI, *args: str, fsep: str = '#', psep: str = '/'): - super().__init__(prefix, fsep, psep) - self.fragments = set(args) +# FIXME: Integrate FinalNamespace into Namespace? Do we need to have both? +class FinalNamespace(URI): + """The FinalNamespace allows you to append a fragment to an URI.""" - def __eq__(self, other: typing.Any) -> bool: - return super().__eq__(other) and self.fragments == other.fragments + # fragment separator + sep: str - def __hash__(self) -> int: - return hash((type(self), self.prefix, tuple(sorted(self.fragments)))) + def __new__(cls, value: str, sep: str = '#'): + inst = URI.__new__(cls, value) + inst.sep = sep + return inst def __getattr__(self, fragment: str) -> URI: - """Return prefix + fragment or raise a KeyError if the fragment is not part of this namespace.""" - if fragment not in self.fragments: - raise KeyError(f'{fragment} is not a valid fragment of namespace {self.prefix}') - return super().__getattr__(fragment) + """Append the *fragment* to the current value and return as URI.""" + return URI(self + self.sep + fragment) ## EOF ## diff --git a/bsfs/namespace/predefined.py b/bsfs/namespace/predefined.py index 15f12ac..8b60d39 100644 --- a/bsfs/namespace/predefined.py +++ b/bsfs/namespace/predefined.py @@ -2,29 +2,28 @@ # imports import typing -# bsfs imports -from bsfs.utils import URI - # inner-module imports -from . import namespace +from .namespace import Namespace, FinalNamespace # essential bsfs namespaces -bsfs: namespace.Namespace = namespace.Namespace(URI('http://bsfs.ai/schema'), fsep='/') - +bsfs = Namespace('https://schema.bsfs.io/core') # additional bsfs namespaces -bse: namespace.Namespace = namespace.Namespace(URI('http://bsfs.ai/schema/Entity')) -bsm: namespace.Namespace = namespace.Namespace(URI('http://bsfs.ai/schema/Meta')) +bsd = bsfs.distance() +bsl = bsfs.Literal +bsn = bsfs.Node() # generic namespaces -rdf: namespace.Namespace = namespace.Namespace(URI('http://www.w3.org/1999/02/22-rdf-syntax-ns')) -rdfs: namespace.Namespace = namespace.Namespace(URI('http://www.w3.org/2000/01/rdf-schema')) -schema: namespace.Namespace = namespace.Namespace(URI('http://schema.org'), fsep='/') -xsd: namespace.Namespace = namespace.Namespace(URI('http://www.w3.org/2001/XMLSchema')) +rdf = FinalNamespace('http://www.w3.org/1999/02/22-rdf-syntax-ns') +rdfs = FinalNamespace('http://www.w3.org/2000/01/rdf-schema') +xsd = FinalNamespace('http://www.w3.org/2001/XMLSchema') +schema = FinalNamespace('http://schema.org', sep='/') +# exports __all__: typing.Sequence[str] = ( - 'bse', + 'bsd', 'bsfs', - 'bsm', + 'bsl', + 'bsn', 'rdf', 'rdfs', 'schema', diff --git a/bsfs/query/ast/filter_.py b/bsfs/query/ast/filter_.py index 56c982e..610fdb4 100644 --- a/bsfs/query/ast/filter_.py +++ b/bsfs/query/ast/filter_.py @@ -10,7 +10,8 @@ For example, consider the following AST: >>> Any(ns.bse.collection, ... And( ... Equals('hello'), -... Any(ns.bsm.guid, Any(ns.bsm.guid, Equals('hello'))), +... Is('hello world'), +... Any(ns.bse.tag, Equals('world')), ... Any(ns.bst.label, Equals('world')), ... All(ns.bst.label, Not(Equals('world'))), ... ) diff --git a/bsfs/query/matcher.py b/bsfs/query/matcher.py index 5f3b07e..17c9c8e 100644 --- a/bsfs/query/matcher.py +++ b/bsfs/query/matcher.py @@ -215,8 +215,8 @@ class Filter(): two following queries are semantically identical, but structurally different, and would therefore not match: - >>> ast.filter.OneOf(ast.filter.Predicate(ns.bse.filename)) - >>> ast.filter.Predicate(ns.bse.filename) + >>> ast.filter.OneOf(ast.filter.Predicate(ns.bse.name)) + >>> ast.filter.Predicate(ns.bse.name) """ diff --git a/bsfs/query/validator.py b/bsfs/query/validator.py index 1ce44e9..10ca492 100644 --- a/bsfs/query/validator.py +++ b/bsfs/query/validator.py @@ -177,7 +177,7 @@ class Filter(): if not type_ <= dom: raise errors.ConsistencyError(f'expected type {dom}, found {type_}') # node.count is a numerical expression - self._parse_filter_expression(self.schema.literal(ns.bsfs.Number), node.count) + self._parse_filter_expression(self.schema.literal(ns.bsl.Number), node.count) def _distance(self, type_: bsc.Vertex, node: ast.filter.Distance): # type is a Literal @@ -218,7 +218,7 @@ class Filter(): if type_ not in self.schema.literals(): raise errors.ConsistencyError(f'literal {type_} is not in the schema') # type must be a numerical - if not type_ <= self.schema.literal(ns.bsfs.Number): + if not type_ <= self.schema.literal(ns.bsl.Number): raise errors.ConsistencyError(f'expected a number type, found {type_}') # FIXME: Check if node.value corresponds to type_ diff --git a/bsfs/schema/serialize.py b/bsfs/schema/serialize.py index b05b289..ea8b2f4 100644 --- a/bsfs/schema/serialize.py +++ b/bsfs/schema/serialize.py @@ -241,13 +241,14 @@ def to_string(schema_inst: schema.Schema, fmt: str = 'turtle') -> str: graph.add(triple) # add known namespaces for readability # FIXME: more generically? - graph.bind('bse', rdflib.URIRef(ns.bse[''])) - graph.bind('bsfs', rdflib.URIRef(ns.bsfs[''])) - graph.bind('bsm', rdflib.URIRef(ns.bsm[''])) - graph.bind('rdf', rdflib.URIRef(ns.rdf[''])) - graph.bind('rdfs', rdflib.URIRef(ns.rdfs[''])) - graph.bind('schema', rdflib.URIRef(ns.schema[''])) - graph.bind('xsd', rdflib.URIRef(ns.xsd[''])) + graph.bind('bsfs', rdflib.URIRef(ns.bsfs + '/')) + graph.bind('bsl', rdflib.URIRef(ns.bsl + '/')) + graph.bind('bsn', rdflib.URIRef(ns.bsn + '#')) + graph.bind('bse', rdflib.URIRef(ns.bsfs.Entity() + '#')) + graph.bind('rdf', rdflib.URIRef(ns.rdf)) + graph.bind('rdfs', rdflib.URIRef(ns.rdfs)) + graph.bind('schema', rdflib.URIRef(ns.schema)) + graph.bind('xsd', rdflib.URIRef(ns.xsd)) # serialize to turtle return graph.serialize(format=fmt) diff --git a/bsfs/schema/types.py b/bsfs/schema/types.py index 104580d..5834df8 100644 --- a/bsfs/schema/types.py +++ b/bsfs/schema/types.py @@ -376,31 +376,31 @@ ROOT_LITERAL = Literal( ) ROOT_BLOB = Literal( - uri=ns.bsfs.BinaryBlob, + uri=ns.bsl.BinaryBlob, parent=ROOT_LITERAL, ) ROOT_NUMBER = Literal( - uri=ns.bsfs.Number, + uri=ns.bsl.Number, parent=ROOT_LITERAL, ) ROOT_TIME = Literal( - uri=ns.bsfs.Time, + uri=ns.bsl.Time, parent=ROOT_LITERAL, ) ROOT_ARRAY = Literal( - uri=ns.bsfs.Array, + uri=ns.bsl.Array, parent=ROOT_LITERAL, ) ROOT_FEATURE = Feature( - uri=ns.bsfs.Feature, + uri=ns.bsl.Array.Feature, parent=ROOT_ARRAY, dimension=1, - dtype=ns.bsfs.f16, - distance=ns.bsfs.euclidean, + dtype=ns.bsfs.dtype().f16, + distance=ns.bsd.euclidean, ) # essential predicates diff --git a/bsfs/triple_store/sparql/distance.py b/bsfs/triple_store/sparql/distance.py index 9b58088..2c2f355 100644 --- a/bsfs/triple_store/sparql/distance.py +++ b/bsfs/triple_store/sparql/distance.py @@ -43,9 +43,9 @@ def manhatten(fst, snd) -> float: # Known distance functions. DISTANCE_FU = { - ns.bsfs.euclidean: euclid, - ns.bsfs.cosine: cosine, - ns.bsfs.manhatten: manhatten, + ns.bsd.euclidean: euclid, + ns.bsd.cosine: cosine, + ns.bsd.manhatten: manhatten, } ## EOF ## diff --git a/bsfs/triple_store/sparql/sparql.py b/bsfs/triple_store/sparql/sparql.py index 68c0027..99e67d6 100644 --- a/bsfs/triple_store/sparql/sparql.py +++ b/bsfs/triple_store/sparql/sparql.py @@ -28,7 +28,7 @@ __all__: typing.Sequence[str] = ( ## code ## -rdflib.term.bind(ns.bsfs.BinaryBlob, bytes, constructor=base64.b64decode) +rdflib.term.bind(ns.bsl.BinaryBlob, bytes, constructor=base64.b64decode) class _Transaction(): """Lightweight rdflib transactions for in-memory databases.""" @@ -335,8 +335,8 @@ class SparqlStore(base.TripleStoreBase): # convert value if isinstance(predicate.range, bsc.Literal): dtype = rdflib.URIRef(predicate.range.uri) - if predicate.range <= self.schema.literal(ns.bsfs.BinaryBlob): - dtype = rdflib.URIRef(ns.bsfs.BinaryBlob) + if predicate.range <= self.schema.literal(ns.bsl.BinaryBlob): + dtype = rdflib.URIRef(ns.bsl.BinaryBlob) value = base64.b64encode(value) value = rdflib.Literal(value, datatype=dtype) elif isinstance(predicate.range, bsc.Node): -- cgit v1.2.3