aboutsummaryrefslogtreecommitdiffstats
path: root/bsfs
diff options
context:
space:
mode:
authorMatthias Baumgartner <dev@igsor.net>2023-03-05 19:25:29 +0100
committerMatthias Baumgartner <dev@igsor.net>2023-03-05 19:25:29 +0100
commit48b6081d0092e9c5a1b0ad79bdde2e51649bf61a (patch)
tree634198c34aae3c0306ce30ac7452abd7b53a14e8 /bsfs
parent91437ba89d35bf482f3d9671bb99ef2fc69f5985 (diff)
parente4845c627e97a6d125bf33d9e7a4a8d373d7fc4a (diff)
downloadbsfs-0.23.03.tar.gz
bsfs-0.23.03.tar.bz2
bsfs-0.23.03.zip
Merge branch 'develop'v0.23.03
Diffstat (limited to 'bsfs')
-rw-r--r--bsfs/__init__.py5
-rw-r--r--bsfs/apps/__init__.py43
-rw-r--r--bsfs/apps/init.py11
-rw-r--r--bsfs/apps/migrate.py12
-rw-r--r--bsfs/front/__init__.py5
-rw-r--r--bsfs/front/bsfs.py5
-rw-r--r--bsfs/front/builder.py11
-rw-r--r--bsfs/graph/__init__.py5
-rw-r--r--bsfs/graph/ac/__init__.py5
-rw-r--r--bsfs/graph/ac/base.py33
-rw-r--r--bsfs/graph/ac/null.py20
-rw-r--r--bsfs/graph/graph.py99
-rw-r--r--bsfs/graph/nodes.py250
-rw-r--r--bsfs/graph/resolve.py174
-rw-r--r--bsfs/graph/result.py119
-rw-r--r--bsfs/graph/schema.nt12
-rw-r--r--bsfs/graph/walk.py115
-rw-r--r--bsfs/namespace/__init__.py8
-rw-r--r--bsfs/namespace/namespace.py102
-rw-r--r--bsfs/namespace/predefined.py32
-rw-r--r--bsfs/query/__init__.py15
-rw-r--r--bsfs/query/ast/__init__.py23
-rw-r--r--bsfs/query/ast/fetch.py169
-rw-r--r--bsfs/query/ast/filter_.py516
-rw-r--r--bsfs/query/matcher.py361
-rw-r--r--bsfs/query/validator.py351
-rw-r--r--bsfs/schema/__init__.py14
-rw-r--r--bsfs/schema/schema.py125
-rw-r--r--bsfs/schema/serialize.py255
-rw-r--r--bsfs/schema/types.py207
-rw-r--r--bsfs/triple_store/__init__.py5
-rw-r--r--bsfs/triple_store/base.py41
-rw-r--r--bsfs/triple_store/sparql/__init__.py13
-rw-r--r--bsfs/triple_store/sparql/distance.py51
-rw-r--r--bsfs/triple_store/sparql/parse_fetch.py104
-rw-r--r--bsfs/triple_store/sparql/parse_filter.py316
-rw-r--r--bsfs/triple_store/sparql/sparql.py (renamed from bsfs/triple_store/sparql.py)127
-rw-r--r--bsfs/triple_store/sparql/utils.py137
-rw-r--r--bsfs/utils/__init__.py8
-rw-r--r--bsfs/utils/commons.py39
-rw-r--r--bsfs/utils/errors.py11
-rw-r--r--bsfs/utils/uri.py13
-rw-r--r--bsfs/utils/uuid.py24
43 files changed, 3567 insertions, 424 deletions
diff --git a/bsfs/__init__.py b/bsfs/__init__.py
index 079ffaf..cf08d64 100644
--- a/bsfs/__init__.py
+++ b/bsfs/__init__.py
@@ -1,9 +1,4 @@
-"""
-Part of the BlackStar filesystem (bsfs) module.
-A copy of the license is provided with the project.
-Author: Matthias Baumgartner, 2022
-"""
# imports
import collections
import typing
diff --git a/bsfs/apps/__init__.py b/bsfs/apps/__init__.py
index 7efaa87..62dc5b5 100644
--- a/bsfs/apps/__init__.py
+++ b/bsfs/apps/__init__.py
@@ -1,20 +1,53 @@
-"""
+#!/usr/bin/env python3
-Part of the BlackStar filesystem (bsfs) module.
-A copy of the license is provided with the project.
-Author: Matthias Baumgartner, 2022
-"""
# imports
+import argparse
import typing
+# bsfs imports
+import bsfs
+
# inner-module imports
from .init import main as init
from .migrate import main as migrate
# exports
__all__: typing.Sequence[str] = (
+ 'main',
'init',
'migrate',
)
+# config
+apps = {
+ 'init' : init,
+ 'migrate' : migrate,
+ }
+
+
+## code ##
+
+def main(argv=None):
+ """Black Star File System maintenance tools."""
+ parser = argparse.ArgumentParser(description=main.__doc__, prog='bsfs')
+ # version
+ parser.add_argument('--version', action='version',
+ version='%(prog)s version {}.{}.{}'.format(*bsfs.version_info)) # pylint: disable=C0209
+ # application selection
+ parser.add_argument('app', choices=apps.keys(),
+ help='Select the application to run.')
+ # dangling args
+ parser.add_argument('rest', nargs=argparse.REMAINDER)
+ # parse
+ args = parser.parse_args(argv)
+ # run application
+ apps[args.app](args.rest)
+
+
+## main ##
+
+if __name__ == '__main__':
+ import sys
+ main(sys.argv[1:])
+
## EOF ##
diff --git a/bsfs/apps/init.py b/bsfs/apps/init.py
index 3e2ef37..9afbdd5 100644
--- a/bsfs/apps/init.py
+++ b/bsfs/apps/init.py
@@ -1,9 +1,5 @@
-"""
+#!/usr/bin/env python3
-Part of the BlackStar filesystem (bsfs) module.
-A copy of the license is provided with the project.
-Author: Matthias Baumgartner, 2022
-"""
# imports
import argparse
import json
@@ -60,9 +56,10 @@ def main(argv):
# print config
if args.output is not None:
with open(args.output, mode='wt', encoding='UTF-8') as ofile:
- json.dump(config, ofile)
+ json.dump(config, ofile, indent=4)
else:
- json.dump(config, sys.stdout)
+ json.dump(config, sys.stdout, indent=4)
+ print('')
## main ##
diff --git a/bsfs/apps/migrate.py b/bsfs/apps/migrate.py
index 91c1661..34ea2e7 100644
--- a/bsfs/apps/migrate.py
+++ b/bsfs/apps/migrate.py
@@ -1,9 +1,5 @@
-"""
+#!/usr/bin/env python3
-Part of the BlackStar filesystem (bsfs) module.
-A copy of the license is provided with the project.
-Author: Matthias Baumgartner, 2022
-"""
# imports
import argparse
import json
@@ -42,15 +38,15 @@ def main(argv):
graph = bsfs.Open(config)
# initialize schema
- schema = bsfs.schema.Schema.Empty()
+ schema = bsfs.schema.Schema()
if len(args.schema) == 0:
# assemble schema from standard input
- schema = schema + bsfs.schema.Schema.from_string(sys.stdin.read())
+ schema = schema + bsfs.schema.from_string(sys.stdin.read())
else:
# assemble schema from input files
for pth in args.schema:
with open(pth, mode='rt', encoding='UTF-8') as ifile:
- schema = schema + bsfs.schema.Schema.from_string(ifile.read())
+ schema = schema + bsfs.schema.from_string(ifile.read())
# migrate schema
graph.migrate(schema, not args.remove)
diff --git a/bsfs/front/__init__.py b/bsfs/front/__init__.py
index 92886ab..cedcd7f 100644
--- a/bsfs/front/__init__.py
+++ b/bsfs/front/__init__.py
@@ -1,9 +1,4 @@
-"""
-Part of the BlackStar filesystem (bsfs) module.
-A copy of the license is provided with the project.
-Author: Matthias Baumgartner, 2022
-"""
# imports
import typing
diff --git a/bsfs/front/bsfs.py b/bsfs/front/bsfs.py
index 968b3f5..f437212 100644
--- a/bsfs/front/bsfs.py
+++ b/bsfs/front/bsfs.py
@@ -1,9 +1,4 @@
-"""
-Part of the BlackStar filesystem (bsfs) module.
-A copy of the license is provided with the project.
-Author: Matthias Baumgartner, 2022
-"""
# imports
import typing
diff --git a/bsfs/front/builder.py b/bsfs/front/builder.py
index 73f1703..b1d488b 100644
--- a/bsfs/front/builder.py
+++ b/bsfs/front/builder.py
@@ -1,14 +1,9 @@
-"""
-Part of the BlackStar filesystem (bsfs) module.
-A copy of the license is provided with the project.
-Author: Matthias Baumgartner, 2022
-"""
# imports
import typing
# bsfs imports
-from bsfs.graph import Graph
+from bsfs.graph import Graph, ac
from bsfs.triple_store import TripleStoreBase, SparqlStore
from bsfs.utils import URI, errors
@@ -68,8 +63,10 @@ def build_graph(cfg: typing.Any) -> Graph:
if 'backend' not in args:
raise errors.ConfigError('required argument "backend" is not provided')
backend = build_backend(args['backend'])
+ # build access controls
+ access_controls = ac.NullAC(backend, user)
# build and return graph
cls = _graph_classes[name]
- return cls(backend, user)
+ return cls(backend, access_controls)
## EOF ##
diff --git a/bsfs/graph/__init__.py b/bsfs/graph/__init__.py
index 82d2235..8d38d23 100644
--- a/bsfs/graph/__init__.py
+++ b/bsfs/graph/__init__.py
@@ -1,9 +1,4 @@
-"""
-Part of the BlackStar filesystem (bsfs) module.
-A copy of the license is provided with the project.
-Author: Matthias Baumgartner, 2022
-"""
# imports
import typing
diff --git a/bsfs/graph/ac/__init__.py b/bsfs/graph/ac/__init__.py
index 420de01..11b45df 100644
--- a/bsfs/graph/ac/__init__.py
+++ b/bsfs/graph/ac/__init__.py
@@ -1,9 +1,4 @@
-"""
-Part of the BlackStar filesystem (bsfs) module.
-A copy of the license is provided with the project.
-Author: Matthias Baumgartner, 2022
-"""
# imports
import typing
diff --git a/bsfs/graph/ac/base.py b/bsfs/graph/ac/base.py
index bc9aeb3..e85c1dd 100644
--- a/bsfs/graph/ac/base.py
+++ b/bsfs/graph/ac/base.py
@@ -1,17 +1,13 @@
-"""
-Part of the BlackStar filesystem (bsfs) module.
-A copy of the license is provided with the project.
-Author: Matthias Baumgartner, 2022
-"""
# imports
import abc
import typing
# bsfs imports
from bsfs import schema
+from bsfs.query import ast
from bsfs.triple_store import TripleStoreBase
-from bsfs.utils import URI
+from bsfs.utils import URI, typename
# exports
__all__: typing.Sequence[str] = (
@@ -43,6 +39,20 @@ class AccessControlBase(abc.ABC):
self._backend = backend
self._user = URI(user)
+ def __str__(self) -> str:
+ return f'{typename(self)}({self._user})'
+
+ def __repr__(self) -> str:
+ return f'{typename(self)}({self._user})'
+
+ def __eq__(self, other: typing.Any) -> bool:
+ return isinstance(other, type(self)) \
+ and self._backend == other._backend \
+ and self._user == other._user
+
+ def __hash__(self) -> int:
+ return hash((type(self), self._backend, self._user))
+
@abc.abstractmethod
def is_protected_predicate(self, pred: schema.Predicate) -> bool:
"""Return True if a predicate cannot be modified manually."""
@@ -67,5 +77,16 @@ class AccessControlBase(abc.ABC):
def createable(self, node_type: schema.Node, guids: typing.Iterable[URI]) -> typing.Iterable[URI]:
"""Return nodes that are allowed to be created."""
+ @abc.abstractmethod
+ def filter_read(
+ self,
+ node_type: schema.Node,
+ query: typing.Optional[ast.filter.FilterExpression],
+ ) -> typing.Optional[ast.filter.FilterExpression]:
+ """Re-write a filter *query* to get (i.e., read) *node_type* nodes."""
+
+ @abc.abstractmethod
+ def fetch_read(self, node_type: schema.Node, query: ast.fetch.FetchExpression) -> ast.fetch.FetchExpression:
+ """Re-write a fetch *query* to get (i.e, read) values for *node_type* nodes."""
## EOF ##
diff --git a/bsfs/graph/ac/null.py b/bsfs/graph/ac/null.py
index 36838bd..c9ec7d0 100644
--- a/bsfs/graph/ac/null.py
+++ b/bsfs/graph/ac/null.py
@@ -1,15 +1,11 @@
-"""
-Part of the BlackStar filesystem (bsfs) module.
-A copy of the license is provided with the project.
-Author: Matthias Baumgartner, 2022
-"""
# imports
import typing
# bsfs imports
from bsfs import schema
from bsfs.namespace import ns
+from bsfs.query import ast
from bsfs.utils import URI
# inner-module imports
@@ -28,7 +24,7 @@ class NullAC(base.AccessControlBase):
def is_protected_predicate(self, pred: schema.Predicate) -> bool:
"""Return True if a predicate cannot be modified manually."""
- return pred.uri == ns.bsm.t_created
+ return pred.uri == ns.bsn.t_created
def create(self, node_type: schema.Node, guids: typing.Iterable[URI]):
"""Perform post-creation operations on nodes, e.g. ownership information."""
@@ -49,4 +45,16 @@ class NullAC(base.AccessControlBase):
"""Return nodes that are allowed to be created."""
return guids
+ def filter_read(
+ self,
+ node_type: schema.Node,
+ query: typing.Optional[ast.filter.FilterExpression]
+ ) -> typing.Optional[ast.filter.FilterExpression]:
+ """Re-write a filter *query* to get (i.e., read) *node_type* nodes."""
+ return query
+
+ def fetch_read(self, node_type: schema.Node, query: ast.fetch.FetchExpression) -> ast.fetch.FetchExpression:
+ """Re-write a fetch *query* to get (i.e, read) values for *node_type* nodes."""
+ return query
+
## EOF ##
diff --git a/bsfs/graph/graph.py b/bsfs/graph/graph.py
index b7b9f1c..1b4c212 100644
--- a/bsfs/graph/graph.py
+++ b/bsfs/graph/graph.py
@@ -1,20 +1,18 @@
-"""
-Part of the BlackStar filesystem (bsfs) module.
-A copy of the license is provided with the project.
-Author: Matthias Baumgartner, 2022
-"""
# imports
import os
import typing
# bsfs imports
-from bsfs.schema import Schema
+from bsfs.query import ast, validate
+from bsfs import schema as bsc
from bsfs.triple_store import TripleStoreBase
from bsfs.utils import URI, typename
# inner-module imports
+from . import ac
from . import nodes as _nodes
+from . import resolve
# exports
__all__: typing.Sequence[str] = (
@@ -25,9 +23,7 @@ __all__: typing.Sequence[str] = (
## code ##
class Graph():
- """The Graph class is
-
- The Graph class provides a convenient interface to query and access a graph.
+ """The Graph class provides a convenient interface to query and access a graph.
Since it logically builds on the concept of graphs it is easier to
navigate than raw triple stores. Naturally, it uses a triple store
as *backend*. It also controls actions via access permissions to a *user*.
@@ -37,35 +33,40 @@ class Graph():
# link to the triple storage backend.
_backend: TripleStoreBase
- # user uri.
- _user: URI
+ # access controls.
+ _ac: ac.AccessControlBase
- def __init__(self, backend: TripleStoreBase, user: URI):
+ def __init__(
+ self,
+ backend: TripleStoreBase,
+ access_control: ac.AccessControlBase,
+ ):
+ # store members
self._backend = backend
- self._user = user
+ self._ac = access_control
# ensure Graph schema requirements
self.migrate(self._backend.schema)
def __hash__(self) -> int:
- return hash((type(self), self._backend, self._user))
+ return hash((type(self), self._backend, self._ac))
def __eq__(self, other) -> bool:
return isinstance(other, type(self)) \
and self._backend == other._backend \
- and self._user == other._user
+ and self._ac == other._ac
def __repr__(self) -> str:
- return f'{typename(self)}(backend={repr(self._backend)}, user={self._user})'
+ return f'{typename(self)}({repr(self._backend)}, {self._ac})'
def __str__(self) -> str:
- return f'{typename(self)}({str(self._backend)}, {self._user})'
+ return f'{typename(self)}({str(self._backend)})'
@property
- def schema(self) -> Schema:
+ def schema(self) -> bsc.Schema:
"""Return the store's local schema."""
return self._backend.schema
- def migrate(self, schema: Schema, append: bool = True) -> 'Graph':
+ def migrate(self, schema: bsc.Schema, append: bool = True) -> 'Graph':
"""Migrate the current schema to a new *schema*.
Appends to the current schema by default; control this via *append*.
@@ -73,14 +74,14 @@ class Graph():
"""
# check args
- if not isinstance(schema, Schema):
+ if not isinstance(schema, bsc.Schema):
raise TypeError(schema)
# append to current schema
if append:
schema = schema + self._backend.schema
# add Graph schema requirements
with open(os.path.join(os.path.dirname(__file__), 'schema.nt'), mode='rt', encoding='UTF-8') as ifile:
- schema = schema + Schema.from_string(ifile.read())
+ schema = schema + bsc.from_string(ifile.read())
# migrate schema in backend
# FIXME: consult access controls!
self._backend.schema = schema
@@ -95,19 +96,69 @@ class Graph():
*node_type*) once some data is assigned to them.
"""
+ # get node type
type_ = self.schema.node(node_type)
# NOTE: Nodes constructor materializes guids.
- return _nodes.Nodes(self._backend, self._user, type_, guids)
+ return _nodes.Nodes(self._backend, self._ac, type_, guids)
def node(self, node_type: URI, guid: URI) -> _nodes.Nodes:
"""Return node *guid* of type *node_type* as a `bsfs.graph.Nodes` instance.
- Note that the *guids* need not to exist (however, the *node_type* has
+ Note that the *guid* need not to exist (however, the *node_type* has
to be part of the schema). An inexistent guid will be created (using
*node_type*) once some data is assigned to them.
"""
+ return self.nodes(node_type, {guid})
+
+ def empty(self, node_type: URI) -> _nodes.Nodes:
+ """Return a `Nodes` instance with type *node_type* but no nodes."""
+ return self.nodes(node_type, set())
+
+ def get(
+ self,
+ node_type: URI,
+ query: typing.Optional[ast.filter.FilterExpression],
+ ) -> _nodes.Nodes:
+ """Return a `Nodes` instance over all nodes of type *node_type* that match the *query*."""
+ # return Nodes instance
+ type_ = self.schema.node(node_type)
+ return _nodes.Nodes(self._backend, self._ac, type_, self.__get(node_type, query))
+
+ def sorted(
+ self,
+ node_type: URI,
+ query: typing.Optional[ast.filter.FilterExpression],
+ # FIXME: sort ast
+ ) -> typing.Iterator[_nodes.Nodes]:
+ """Return a iterator over `Nodes` instances over all nodes of type *node_type* that match the *query*."""
+ # FIXME: Order should be a parameter
+ # return iterator over Nodes instances
+ type_ = self.schema.node(node_type)
+ for guid in self.__get(node_type, query):
+ yield _nodes.Nodes(self._backend, self._ac, type_, {guid})
+
+ def all(self, node_type: URI) -> _nodes.Nodes:
+ """Return all instances of type *node_type*."""
+ type_ = self.schema.node(node_type)
+ return _nodes.Nodes(self._backend, self._ac, type_, self.__get(node_type, None))
+
+ def __get(
+ self,
+ node_type: URI,
+ query: typing.Optional[ast.filter.FilterExpression],
+ ) -> typing.Iterator[URI]:
+ """Build and execute a get query."""
+ # get node type
type_ = self.schema.node(node_type)
- return _nodes.Nodes(self._backend, self._user, type_, {guid})
+ # resolve Nodes instances
+ query = resolve.Filter(self._backend.schema).resolve(type_, query)
+ # add access controls to query
+ query = self._ac.filter_read(type_, query)
+ # validate query
+ if query is not None:
+ validate.Filter(self._backend.schema).validate(type_, query)
+ # query the backend and return the (non-materialized) result
+ return self._backend.get(type_, query)
## EOF ##
diff --git a/bsfs/graph/nodes.py b/bsfs/graph/nodes.py
index c417a0e..47b0217 100644
--- a/bsfs/graph/nodes.py
+++ b/bsfs/graph/nodes.py
@@ -1,21 +1,20 @@
-"""
-Part of the BlackStar filesystem (bsfs) module.
-A copy of the license is provided with the project.
-Author: Matthias Baumgartner, 2022
-"""
# imports
+from collections import abc
import time
import typing
# bsfs imports
-from bsfs import schema as _schema
+from bsfs import schema as bsc
from bsfs.namespace import ns
+from bsfs.query import ast, validate
from bsfs.triple_store import TripleStoreBase
from bsfs.utils import errors, URI, typename
# inner-module imports
from . import ac
+from . import result
+from . import walk
# exports
__all__: typing.Sequence[str] = (
@@ -26,18 +25,20 @@ __all__: typing.Sequence[str] = (
## code ##
class Nodes():
- """
+ """Container for graph nodes, provides operations on nodes.
+
+ NOTE: Should not be created directly but only via `bsfs.graph.Graph`.
NOTE: guids may or may not exist. This is not verified as nodes are created on demand.
"""
# triple store backend.
_backend: TripleStoreBase
- # user uri.
- _user: URI
+ # access controls.
+ _ac: ac.AccessControlBase
# node type.
- _node_type: _schema.Node
+ _node_type: bsc.Node
# guids of nodes. Can be empty.
_guids: typing.Set[URI]
@@ -45,34 +46,35 @@ class Nodes():
def __init__(
self,
backend: TripleStoreBase,
- user: URI,
- node_type: _schema.Node,
+ access_control: ac.AccessControlBase,
+ node_type: bsc.Node,
guids: typing.Iterable[URI],
):
+ # set main members
self._backend = backend
- self._user = user
+ self._ac = access_control
self._node_type = node_type
- self._guids = set(guids)
- self.__ac = ac.NullAC(self._backend, self._user)
+ # convert to URI since this is not guaranteed by Graph
+ self._guids = {URI(guid) for guid in guids}
def __eq__(self, other: typing.Any) -> bool:
return isinstance(other, Nodes) \
and self._backend == other._backend \
- and self._user == other._user \
+ and self._ac == other._ac \
and self._node_type == other._node_type \
and self._guids == other._guids
def __hash__(self) -> int:
- return hash((type(self), self._backend, self._user, self._node_type, tuple(sorted(self._guids))))
+ return hash((type(self), self._backend, self._ac, self._node_type, tuple(sorted(self._guids))))
def __repr__(self) -> str:
- return f'{typename(self)}({self._backend}, {self._user}, {self._node_type}, {self._guids})'
+ return f'{typename(self)}({self._backend}, {self._ac}, {self._node_type}, {self._guids})'
def __str__(self) -> str:
return f'{typename(self)}({self._node_type}, {self._guids})'
@property
- def node_type(self) -> _schema.Node:
+ def node_type(self) -> bsc.Node:
"""Return the node's type."""
return self._node_type
@@ -81,9 +83,72 @@ class Nodes():
"""Return all node guids."""
return iter(self._guids)
+ @property
+ def schema(self) -> bsc.Schema:
+ """Return the store's local schema."""
+ return self._backend.schema
+
+ def __add__(self, other: typing.Any) -> 'Nodes':
+ """Concatenate guids. Backend, AC, and node type must match."""
+ if not isinstance(other, type(self)):
+ return NotImplemented
+ if self._backend != other._backend:
+ raise ValueError(other)
+ if self._ac != other._ac:
+ raise ValueError(other)
+ if self.node_type != other.node_type:
+ raise ValueError(other)
+ return Nodes(self._backend, self._ac, self.node_type, self._guids | other._guids)
+
+ def __or__(self, other: typing.Any) -> 'Nodes':
+ """Concatenate guids. Backend, AC, and node type must match."""
+ return self.__add__(other)
+
+ def __sub__(self, other: typing.Any) -> 'Nodes':
+ """Subtract guids. Backend, AC, and node type must match."""
+ if not isinstance(other, type(self)):
+ return NotImplemented
+ if self._backend != other._backend:
+ raise ValueError(other)
+ if self._ac != other._ac:
+ raise ValueError(other)
+ if self.node_type != other.node_type:
+ raise ValueError(other)
+ return Nodes(self._backend, self._ac, self.node_type, self._guids - other._guids)
+
+ def __and__(self, other: typing.Any) -> 'Nodes':
+ """Intersect guids. Backend, AC, and node type must match."""
+ if not isinstance(other, type(self)):
+ return NotImplemented
+ if self._backend != other._backend:
+ raise ValueError(other)
+ if self._ac != other._ac:
+ raise ValueError(other)
+ if self.node_type != other.node_type:
+ raise ValueError(other)
+ return Nodes(self._backend, self._ac, self.node_type, self._guids & other._guids)
+
+ def __len__(self) -> int:
+ """Return the number of guids."""
+ return len(self._guids)
+
+ def __iter__(self) -> typing.Iterator['Nodes']:
+ """Iterate over individual guids. Returns `Nodes` instances."""
+ return iter(
+ Nodes(self._backend, self._ac, self.node_type, {guid})
+ for guid in self._guids
+ )
+
+ def __getattr__(self, name: str):
+ try:
+ return super().__getattr__(name) # type: ignore [misc] # parent has no getattr
+ except AttributeError:
+ pass
+ return walk.Walk(self, walk.Walk.step(self.schema, self.node_type, name))
+
def set(
self,
- pred: URI, # FIXME: URI or _schema.Predicate?
+ pred: URI, # FIXME: URI or bsc.Predicate?
value: typing.Any,
) -> 'Nodes':
"""Set predicate *pred* to *value*."""
@@ -91,7 +156,7 @@ class Nodes():
def set_from_iterable(
self,
- predicate_values: typing.Iterable[typing.Tuple[URI, typing.Any]], # FIXME: URI or _schema.Predicate?
+ predicate_values: typing.Iterable[typing.Tuple[URI, typing.Any]], # FIXME: URI or bsc.Predicate?
) -> 'Nodes':
"""Set mutliple predicate-value pairs at once."""
# TODO: Could group predicate_values by predicate to gain some efficiency
@@ -105,7 +170,7 @@ class Nodes():
self._backend.commit()
except (
- errors.PermissionDeniedError, # tried to set a protected predicate (ns.bsm.t_created)
+ errors.PermissionDeniedError, # tried to set a protected predicate
errors.ConsistencyError, # node types are not in the schema or don't match the predicate
errors.InstanceError, # guids/values don't have the correct type
TypeError, # value is supposed to be a Nodes instance
@@ -120,6 +185,126 @@ class Nodes():
return self
+ def get(
+ self,
+ *paths: typing.Union[URI, typing.Iterable[URI]],
+ view: typing.Union[typing.Type[list], typing.Type[dict]] = dict,
+ **view_kwargs,
+ ) -> typing.Any:
+ """Get values or nodes at *paths*.
+ Return an iterator (view=list) or a dict (view=dict) over the results.
+ """
+ # FIXME: user-provided Fetch query AST?
+ # check args
+ if len(paths) == 0:
+ raise AttributeError('expected at least one path, found none')
+ if view not in (dict, list):
+ raise ValueError(f'expected dict or list, found {view}')
+ # process paths: create fetch ast, build name mapping, and find unique paths
+ schema = self.schema
+ statements = set()
+ name2path = {}
+ unique_paths = set() # paths that result in a single (unique) value
+ normpath: typing.Tuple[URI, ...]
+ for idx, path in enumerate(paths):
+ # normalize path
+ if isinstance(path, str):
+ normpath = (URI(path), )
+ elif isinstance(path, abc.Iterable):
+ if not all(isinstance(step, str) for step in path):
+ raise TypeError(path)
+ normpath = tuple(URI(step) for step in path)
+ else:
+ raise TypeError(path)
+ # check path's schema consistency
+ if not all(schema.has_predicate(pred) for pred in normpath):
+ raise errors.ConsistencyError(f'path is not fully covered by the schema: {path}')
+ # check path's uniqueness
+ if all(schema.predicate(pred).unique for pred in normpath):
+ unique_paths.add(path)
+ # fetch tail predicate
+ tail = schema.predicate(normpath[-1])
+ # determine tail ast node type
+ factory = ast.fetch.Node if isinstance(tail.range, bsc.Node) else ast.fetch.Value
+ # assign name
+ name = f'fetch{idx}'
+ name2path[name] = (path, tail)
+ # create tail ast node
+ curr: ast.fetch.FetchExpression = factory(tail.uri, name)
+ # walk towards front
+ hop: URI
+ for hop in normpath[-2::-1]:
+ curr = ast.fetch.Fetch(hop, curr)
+ # add to fetch query
+ statements.add(curr)
+ # aggregate fetch statements
+ if len(statements) == 1:
+ fetch = next(iter(statements))
+ else:
+ fetch = ast.fetch.All(*statements)
+ # add access controls to fetch
+ fetch = self._ac.fetch_read(self.node_type, fetch)
+
+ if len(self._guids) == 0:
+ # shortcut: no need to query; no triples
+ # FIXME: if the Fetch query can given by the user, we might want to check its validity
+ def triple_iter():
+ return []
+ else:
+ # compose filter ast
+ filter = ast.filter.IsIn(self.guids) # pylint: disable=redefined-builtin
+ # add access controls to filter
+ filter = self._ac.filter_read(self.node_type, filter) # type: ignore [assignment]
+
+ # validate queries
+ validate.Filter(self._backend.schema).validate(self.node_type, filter)
+ validate.Fetch(self._backend.schema).validate(self.node_type, fetch)
+
+ # process results, convert if need be
+ def triple_iter():
+ # query the backend
+ triples = self._backend.fetch(self.node_type, filter, fetch)
+ # process triples
+ for root, name, raw in triples:
+ # get node
+ node = Nodes(self._backend, self._ac, self.node_type, {root})
+ # get path
+ path, tail = name2path[name]
+ # covert raw to value
+ if isinstance(tail.range, bsc.Node):
+ value = Nodes(self._backend, self._ac, tail.range, {raw})
+ else:
+ value = raw
+ # emit triple
+ yield node, path, value
+
+ # simplify by default
+ view_kwargs['node'] = view_kwargs.get('node', len(self._guids) != 1)
+ view_kwargs['path'] = view_kwargs.get('path', len(paths) != 1)
+ view_kwargs['value'] = view_kwargs.get('value', False)
+
+ # return results view
+ if view == list:
+ return result.to_list_view(
+ triple_iter(),
+ # aggregation args
+ **view_kwargs,
+ )
+
+ if view == dict:
+ return result.to_dict_view(
+ triple_iter(),
+ # context
+ len(self._guids) == 1,
+ len(paths) == 1,
+ unique_paths,
+ # aggregation args
+ **view_kwargs,
+ )
+
+ raise errors.UnreachableError() # view was already checked
+
+
def __set(self, predicate: URI, value: typing.Any):
"""
"""
@@ -135,7 +320,7 @@ class Nodes():
# FIXME: Needed? Could be integrated into other AC methods (by passing the predicate!)
# This could allow more fine-grained predicate control (e.g. based on ownership)
# rather than a global approach like this.
- if self.__ac.is_protected_predicate(pred):
+ if self._ac.is_protected_predicate(pred):
raise errors.PermissionDeniedError(pred)
# set operation affects all nodes (if possible)
@@ -145,11 +330,11 @@ class Nodes():
guids = set(self._ensure_nodes(node_type, guids))
# check value
- if isinstance(pred.range, _schema.Literal):
+ if isinstance(pred.range, bsc.Literal):
# check write permissions on existing nodes
# As long as the user has write permissions, we don't restrict
# the creation or modification of literal values.
- guids = set(self.__ac.write_literal(node_type, guids))
+ guids = set(self._ac.write_literal(node_type, guids))
# insert literals
# TODO: Support passing iterators as values for non-unique predicates
@@ -160,8 +345,9 @@ class Nodes():
[value],
)
- elif isinstance(pred.range, _schema.Node):
+ elif isinstance(pred.range, bsc.Node):
# check value type
+ # FIXME: value could be a set of Nodes
if not isinstance(value, Nodes):
raise TypeError(value)
# value's node_type must be a subclass of the predicate's range
@@ -172,14 +358,14 @@ class Nodes():
# Link permissions cover adding and removing links on the source node.
# Specifically, link permissions also allow to remove links to other
# nodes if needed (e.g. for unique predicates).
- guids = set(self.__ac.link_from_node(node_type, guids))
+ guids = set(self._ac.link_from_node(node_type, guids))
# get link targets
targets = set(value.guids)
# ensure existence of value nodes; create nodes if need be
targets = set(self._ensure_nodes(value.node_type, targets))
# check link permissions on target nodes
- targets = set(self.__ac.link_to_node(value.node_type, targets))
+ targets = set(self._ac.link_to_node(value.node_type, targets))
# insert node links
self._backend.set(
@@ -192,7 +378,7 @@ class Nodes():
else:
raise errors.UnreachableError()
- def _ensure_nodes(self, node_type: _schema.Node, guids: typing.Iterable[URI]):
+ def _ensure_nodes(self, node_type: bsc.Node, guids: typing.Iterable[URI]):
"""
"""
# check node existence
@@ -203,14 +389,14 @@ class Nodes():
# create nodes if need be
if len(missing) > 0:
# check which missing nodes can be created
- missing = set(self.__ac.createable(node_type, missing))
+ missing = set(self._ac.createable(node_type, missing))
# create nodes
self._backend.create(node_type, missing)
# add bookkeeping triples
self._backend.set(node_type, missing,
- self._backend.schema.predicate(ns.bsm.t_created), [time.time()])
+ self._backend.schema.predicate(ns.bsn.t_created), [time.time()])
# add permission triples
- self.__ac.create(node_type, missing)
+ self._ac.create(node_type, missing)
# return available nodes
return existing | missing
diff --git a/bsfs/graph/resolve.py b/bsfs/graph/resolve.py
new file mode 100644
index 0000000..a58eb67
--- /dev/null
+++ b/bsfs/graph/resolve.py
@@ -0,0 +1,174 @@
+
+# imports
+import typing
+
+# bsfs imports
+from bsfs import schema as bsc
+from bsfs.query import ast
+from bsfs.utils import errors
+
+# inner-module imports
+from . import nodes
+
+# exports
+__all__: typing.Sequence[str] = (
+ 'Filter',
+ )
+
+
+## code ##
+
+class Filter():
+ """Rewrites the query to replace `bsfs.graph.nodes.Nodes` instances with the respective URI.
+ Does only limited type checking and schema validation.
+ Use `bsfs.schema.validate.Filter` to do so.
+
+ Example:
+ input: Any(ns.bse.tag, Is(Nodes(...)))
+ output: Any(ns.bse.tag, Or(Is(...), Is(...), ...)))
+
+ >>> tags = graph.node(ns.bsn.Tag, 'http://example.com/me/tag#1234')
+ >>> graph.get(ns.bsn.Entity, ast.filter.Any(ns.bse.tag, ast.filter.Is(tags)))
+
+ """
+
+ def __init__(self, schema):
+ self.schema = schema
+
+ def __call__(
+ self,
+ root_type: bsc.Node,
+ node: typing.Optional[ast.filter.FilterExpression],
+ ):
+ """Alias for `Resolve.resolve`."""
+ return self.resolve(root_type, node)
+
+ def resolve(
+ self,
+ root_type: bsc.Node,
+ node: typing.Optional[ast.filter.FilterExpression],
+ ):
+ """Resolve Nodes instances of a *node* query starting at *root_type*."""
+ if node is None:
+ return None
+ return self._parse_filter_expression(root_type, node)
+
+ def _parse_filter_expression(
+ self,
+ type_: bsc.Vertex,
+ node: ast.filter.FilterExpression,
+ ) -> ast.filter.FilterExpression:
+ """Route *node* to the handler of the respective FilterExpression subclass."""
+ if isinstance(node, ast.filter.Is):
+ return self._is(type_, node)
+ if isinstance(node, ast.filter.Not):
+ return self._not(type_, node)
+ if isinstance(node, ast.filter.Has):
+ return self._has(type_, node)
+ if isinstance(node, ast.filter.Any):
+ return self._any(type_, node)
+ if isinstance(node, ast.filter.All):
+ return self._all(type_, node)
+ if isinstance(node, ast.filter.And):
+ return self._and(type_, node)
+ if isinstance(node, ast.filter.Or):
+ return self._or(type_, node)
+ if isinstance(node, ast.filter.Distance):
+ return self._distance(type_, node)
+ if isinstance(node, (ast.filter.Equals, ast.filter.Substring, \
+ ast.filter.StartsWith, ast.filter.EndsWith)):
+ return self._value(type_, node)
+ if isinstance(node, (ast.filter.LessThan, ast.filter.GreaterThan)):
+ return self._bounded(type_, node)
+ # invalid node
+ raise errors.BackendError(f'expected filter expression, found {node}')
+
+ def _parse_predicate_expression(self, node: ast.filter.PredicateExpression) -> bsc.Vertex:
+ """Route *node* to the handler of the respective PredicateExpression subclass."""
+ if isinstance(node, ast.filter.Predicate):
+ return self._predicate(node)
+ if isinstance(node, ast.filter.OneOf):
+ return self._one_of(node)
+ # invalid node
+ raise errors.BackendError(f'expected predicate expression, found {node}')
+
+ def _predicate(self, node: ast.filter.Predicate) -> bsc.Vertex:
+ if not self.schema.has_predicate(node.predicate):
+ raise errors.ConsistencyError(f'predicate {node.predicate} is not in the schema')
+ pred = self.schema.predicate(node.predicate)
+ dom, rng = pred.domain, pred.range
+ if node.reverse:
+ dom, rng = rng, dom
+ return rng
+
+ def _one_of(self, node: ast.filter.OneOf) -> bsc.Vertex:
+ # determine domain and range types
+ rng = None
+ for pred in node:
+ # parse child expression
+ subrng = self._parse_predicate_expression(pred)
+ # determine the next type
+ if rng is None or subrng > rng: # pick most generic range
+ rng = subrng
+ # check range consistency
+ if not subrng <= rng and not subrng >= rng:
+ raise errors.ConsistencyError(f'ranges {subrng} and {rng} are not related')
+ if not isinstance(rng, (bsc.Node, bsc.Literal)):
+ raise errors.BackendError(f'the range of node {node} is undefined')
+ return rng
+
+ def _any(self, type_: bsc.Vertex, node: ast.filter.Any) -> ast.filter.Any: # pylint: disable=unused-argument
+ next_type = self._parse_predicate_expression(node.predicate)
+ return ast.filter.Any(node.predicate, self._parse_filter_expression(next_type, node.expr))
+
+ def _all(self, type_: bsc.Vertex, node: ast.filter.All) -> ast.filter.All: # pylint: disable=unused-argument
+ next_type = self._parse_predicate_expression(node.predicate)
+ return ast.filter.All(node.predicate, self._parse_filter_expression(next_type, node.expr))
+
+ def _and(self, type_: bsc.Vertex, node: ast.filter.And) -> ast.filter.And:
+ return ast.filter.And({self._parse_filter_expression(type_, expr) for expr in node})
+
+ def _or(self, type_: bsc.Vertex, node: ast.filter.Or) -> ast.filter.Or:
+ return ast.filter.Or({self._parse_filter_expression(type_, expr) for expr in node})
+
+ def _not(self, type_: bsc.Vertex, node: ast.filter.Not) -> ast.filter.Not:
+ return ast.filter.Not(self._parse_filter_expression(type_, node.expr))
+
+ def _has(self, type_: bsc.Vertex, node: ast.filter.Has) -> ast.filter.Has: # pylint: disable=unused-argument
+ return node
+
+ def _distance(self, type_: bsc.Vertex, node: ast.filter.Distance): # pylint: disable=unused-argument
+ return node
+
+ def _value(self, type_: bsc.Vertex, node: ast.filter._Value) -> ast.filter._Value: # pylint: disable=unused-argument
+ return node
+
+ def _bounded(self, type_: bsc.Vertex, node: ast.filter._Bounded) -> ast.filter._Bounded: # pylint: disable=unused-argument
+ return node
+
+ def _is(self, type_: bsc.Vertex, node: ast.filter.Is) -> typing.Union[ast.filter.Or, ast.filter.Is]:
+ # check if action is needed
+ if not isinstance(node.value, nodes.Nodes):
+ return node
+ # check schema consistency
+ if node.value.node_type not in self.schema.nodes():
+ raise errors.ConsistencyError(f'node {node.value.node_type} is not in the schema')
+ # check type compatibility
+ if not isinstance(type_, bsc.Node):
+ raise errors.ConsistencyError(f'expected a node, found {type_}')
+ if not node.value.node_type <= type_:
+ raise errors.ConsistencyError(f'expected type {type_} or subtype thereof, found {node.value.node_type}')
+ # NOTE: We assume that the node type is checked when writing to the backend.
+ # Links to any of the guids can therefore only exist if the type matches.
+ # Hence, we don't add a type check/constrain here.
+ return ast.filter.Or(ast.filter.Is(guid) for guid in node.value.guids)
+ # optimized code, removing unnecessary ast.filter.Or
+ #guids = set(node.value.guids)
+ #if len(guids) == 0:
+ # raise errors.BackendError(f'')
+ #if len(guids) == 1:
+ # return ast.filter.Nodeid(next(iter(guids)))
+ #return ast.filter.Or(ast.filter.Is(guid) for guid in guids)
+
+
+## EOF ##
diff --git a/bsfs/graph/result.py b/bsfs/graph/result.py
new file mode 100644
index 0000000..0fcbb13
--- /dev/null
+++ b/bsfs/graph/result.py
@@ -0,0 +1,119 @@
+
+# imports
+from collections import defaultdict
+import typing
+
+# bsfs imports
+from bsfs.utils import URI
+
+# exports
+__all__: typing.Sequence[str] = (
+ 'to_list_view',
+ 'to_dict_view',
+ )
+
+
+## code ##
+
+# FIXME: node, path, value seem counter-intuitive:
+# node.get(..., node=True) removes the node part.
+# wouldn't it make more sense if node=True keeps the node part
+# and node=False drops it?
+
+def to_list_view(
+ triples,
+ # aggregators
+ node: bool,
+ path: bool,
+ value: bool, # pylint: disable=unused-argument
+ ):
+ """Return an iterator over results.
+
+ Dependent on the *node*, *path*, and *value* flags,
+ the respective component is omitted.
+
+ """
+ if not node and not path:
+ return iter(val for _, _, val in triples)
+ if not node:
+ return iter((pred, val) for _, pred, val in triples)
+ if not path:
+ return iter((subj, val) for subj, _, val in triples)
+ return iter((subj, pred, val) for subj, pred, val in triples)
+
+
+def to_dict_view(
+ triples,
+ # context
+ one_node: bool,
+ one_path: bool,
+ unique_paths: typing.Set[typing.Union[URI, typing.Iterable[URI]]],
+ # aggregators
+ node: bool,
+ path: bool,
+ value: bool,
+ default: typing.Optional[typing.Any] = None,
+ ) -> typing.Any:
+ """Return a dict of results.
+
+ Note that triples are materialized to create this view.
+
+ The returned structure depends on the *node*, *path*, and *value* flags.
+ If all flags are set to False, returns a dict(node -> dict(path -> set(values))).
+ Setting a flag to true omits or simplifies the respective component (if possible).
+
+ """
+ # NOTE: To create a dict, we need to materialize or make further assumptions
+ # (e.g., sorted in a specific order).
+
+ data: typing.Any # disable type checks on data since it's very flexibly typed.
+
+ # FIXME: type of data can be overwritten later on (if value)
+
+ if not node and not path:
+ data = set()
+ elif node ^ path:
+ data = defaultdict(set)
+ else:
+ data = defaultdict(lambda: defaultdict(set))
+
+ for subj, pred, val in triples:
+ unique = pred in unique_paths
+ if not node and not path:
+ if not value and unique and one_node and one_path:
+ return val
+ data.add(val)
+ elif not node:
+ # remove node from result, group by predicate
+ if not value and unique and one_node:
+ data[pred] = val
+ else:
+ data[pred].add(val)
+ elif not path:
+ # remove predicate from result, group by node
+ if not value and unique and one_path:
+ data[subj] = val
+ else:
+ data[subj].add(val)
+ else:
+ if not value and unique:
+ data[subj][pred] = val
+ else:
+ data[subj][pred].add(val)
+
+ # FIXME: Combine multiple Nodes instances into one?
+
+ # convert defaultdict to ordinary dict
+ # pylint: disable=too-many-boolean-expressions
+ if not node and not path and not value \
+ and len(unique_paths) > 0 and one_node and one_path \
+ and len(data) == 0:
+ return default
+ # pylint: enable=too-many-boolean-expressions
+ if not node and not path:
+ return data
+ if node ^ path:
+ return dict(data)
+ return {key: dict(val) for key, val in data.items()}
+
+## EOF ##
diff --git a/bsfs/graph/schema.nt b/bsfs/graph/schema.nt
index 8612681..37bba5e 100644
--- a/bsfs/graph/schema.nt
+++ b/bsfs/graph/schema.nt
@@ -4,15 +4,17 @@ prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#>
prefix xsd: <http://www.w3.org/2001/XMLSchema#>
# bsfs prefixes
-prefix bsfs: <http://bsfs.ai/schema/>
-prefix bsm: <http://bsfs.ai/schema/Meta#>
+prefix bsfs: <https://schema.bsfs.io/core/>
+prefix bsl: <https://schema.bsfs.io/core/Literal/>
+prefix bsn: <https://schema.bsfs.io/core/Node#>
# literals
-xsd:integer rdfs:subClassOf bsfs:Literal .
+bsl:Number rdfs:subClassOf bsfs:Literal .
+xsd:float rdfs:subClassOf bsl:Number .
# predicates
-bsm:t_created rdfs:subClassOf bsfs:Predicate ;
+bsn:t_created rdfs:subClassOf bsfs:Predicate ;
rdfs:domain bsfs:Node ;
- rdfs:range xsd:integer ;
+ rdfs:range xsd:float ;
bsfs:unique "true"^^xsd:boolean .
diff --git a/bsfs/graph/walk.py b/bsfs/graph/walk.py
new file mode 100644
index 0000000..6415c9b
--- /dev/null
+++ b/bsfs/graph/walk.py
@@ -0,0 +1,115 @@
+
+# imports
+from collections import abc
+import typing
+
+# bsfs imports
+from bsfs import schema as bsc
+
+# inner-module imports
+# NOTE: circular import! OK as long as only used for type annotations.
+from . import nodes # pylint: disable=cyclic-import
+
+# exports
+__all__: typing.Sequence[str] = (
+ 'Walk',
+ )
+
+
+## code ##
+
+class Walk(abc.Hashable, abc.Callable): # type: ignore [misc] # invalid base class (Callable)
+ """Syntactic sugar for `Nodes` to build and act on predicate paths via members."""
+
+ # Link to Nodes instance.
+ _root: 'nodes.Nodes'
+
+ # Current predicate path.
+ _path: typing.Tuple[bsc.Predicate, ...]
+
+ def __init__(
+ self,
+ root: 'nodes.Nodes',
+ path: typing.Sequence[bsc.Predicate],
+ ):
+ self._root = root
+ self._path = tuple(path)
+
+ @property
+ def tail(self):
+ """Return the node type at the end of the path."""
+ return self._path[-1].range
+
+
+ ## comparison
+
+ def __hash__(self) -> int:
+ """Return an integer hash that identifies the instance."""
+ return hash((type(self), self._root, self._path))
+
+ def __eq__(self, other) -> bool:
+ """Compare against *other* backend."""
+ return isinstance(other, type(self)) \
+ and self._root == other._root \
+ and self._path == other._path
+
+
+ ## representation
+
+ def __repr__(self) -> str:
+ """Return a formal string representation."""
+ path = ', '.join(pred.uri for pred in self._path)
+ return f'Walk({self._root.node_type.uri}, ({path}))'
+
+ def __str__(self) -> str:
+ """Return an informal string representation."""
+ path = ', '.join(pred.uri for pred in self._path)
+ return f'Walk(@{self._root.node_type.uri}: {path})'
+
+
+ ## walk
+
+ @staticmethod
+ def step(
+ schema: bsc.Schema,
+ node: bsc.Node,
+ name: str,
+ ) -> typing.Tuple[bsc.Predicate]:
+ """Get an predicate at *node* whose fragment matches *name*."""
+ predicates = tuple(
+ pred
+ for pred
+ in schema.predicates_at(node)
+ if pred.uri.get('fragment', None) == name
+ )
+ if len(predicates) == 0: # no fragment found for name
+ raise ValueError(f'no available predicate matches {name}') # FIXME: Custom exception
+ if len(predicates) > 1: # ambiguous name
+ raise ValueError(f'{name} matches multiple predicates') # FIXME: Custom exception
+ # append predicate to walk
+ return predicates # type: ignore [return-value] # size is one
+
+ def __getattr__(self, name: str) -> 'Walk':
+ """Alias for `Walk.step(name)`."""
+ try:
+ return super().__getattr__(name)
+ except AttributeError:
+ pass
+ # get predicate
+ pred = self.step(self._root.schema, self.tail, name)
+ # append predicate to walk
+ return Walk(self._root, self._path + pred)
+
+
+ ## get paths ##
+
+ def get(self, **kwargs) -> typing.Any:
+ """Alias for `Nodes.get(..)`."""
+ return self._root.get(tuple(pred.uri for pred in self._path), **kwargs)
+
+ def __call__(self, **kwargs) -> typing.Any: # pylint: disable=arguments-differ
+ """Alias for `Walk.get(...)`."""
+ return self.get(**kwargs)
+
+
+## EOF ##
diff --git a/bsfs/namespace/__init__.py b/bsfs/namespace/__init__.py
index 98d472f..76f39a2 100644
--- a/bsfs/namespace/__init__.py
+++ b/bsfs/namespace/__init__.py
@@ -1,19 +1,13 @@
-"""
-Part of the BlackStar filesystem (bsfs) module.
-A copy of the license is provided with the project.
-Author: Matthias Baumgartner, 2022
-"""
# imports
import typing
# inner-module imports
from . import predefined as ns
-from .namespace import ClosedNamespace, Namespace
+from .namespace import Namespace
# exports
__all__: typing.Sequence[str] = (
- 'ClosedNamespace',
'Namespace',
'ns',
)
diff --git a/bsfs/namespace/namespace.py b/bsfs/namespace/namespace.py
index f652dcd..b388f53 100644
--- a/bsfs/namespace/namespace.py
+++ b/bsfs/namespace/namespace.py
@@ -1,104 +1,54 @@
-"""
-Part of the BlackStar filesystem (bsfs) module.
-A copy of the license is provided with the project.
-Author: Matthias Baumgartner, 2022
-"""
# imports
import typing
# bsfs imports
-from bsfs.utils import URI, typename
+from bsfs.utils import URI
# exports
__all__: typing.Sequence[str] = (
- 'ClosedNamespace',
'Namespace',
+ 'FinalNamespace',
)
## code ##
-class Namespace():
- """A namespace consists of a common prefix that is used in a set of URIs.
- Note that the prefix must include the separator between
- path and fragment (typically a '#' or a '/').
- """
-
- # namespace prefix.
- prefix: URI
-
- # fragment separator.
- fsep: str
-
- # path separator.
- psep: str
-
- def __init__(self, prefix: URI, fsep: str = '#', psep: str = '/'):
- # ensure prefix type
- prefix = URI(prefix)
- # truncate fragment separator
- while prefix.endswith(fsep):
- prefix = URI(prefix[:-1])
- # truncate path separator
- while prefix.endswith(psep):
- prefix = URI(prefix[:-1])
- # store members
- self.prefix = prefix
- self.fsep = fsep
- self.psep = psep
-
- def __eq__(self, other: typing.Any) -> bool:
- return isinstance(other, type(self)) \
- and self.prefix == other.prefix \
- and self.fsep == other.fsep \
- and self.psep == other.psep
+class Namespace(URI):
+ """The Namespace allows you to incrementally append path segments to an URI.
- def __hash__(self) -> int:
- return hash((type(self), self.prefix, self.fsep, self.psep))
+ Segments are separated by `Namespace.sep` ('/').
+ The `__call__` method signals that the URI is complete until the query part.
- def __str__(self) -> str:
- return f'{typename(self)}({self.prefix})'
-
- def __repr__(self) -> str:
- return f'{typename(self)}({self.prefix}, {self.fsep}, {self.psep})'
-
- def __getattr__(self, fragment: str) -> URI:
- """Return prefix + fragment."""
- return URI(self.prefix + self.fsep + fragment)
-
- def __getitem__(self, fragment: str) -> URI:
- """Alias for getattr(self, fragment)."""
- return self.__getattr__(fragment)
+ """
- def __add__(self, value: typing.Any) -> 'Namespace':
- """Concatenate another namespace to this one."""
- if not isinstance(value, str):
- return NotImplemented
- return Namespace(self.prefix + self.psep + value, self.fsep, self.psep)
+ # path separator
+ sep: str = '/'
+ def __getattr__(self, query: str) -> 'Namespace':
+ """Append the *query* to the current value and return as Namespace."""
+ return Namespace(self + self.sep + query)
-class ClosedNamespace(Namespace):
- """Namespace that covers a restricted set of URIs."""
+ def __call__(self, sep: str = '#') -> 'FinalNamespace':
+ """Finalize the namespace."""
+ return FinalNamespace(self, sep)
- # set of permissible fragments.
- fragments: typing.Set[str]
- def __init__(self, prefix: URI, *args: str, fsep: str = '#', psep: str = '/'):
- super().__init__(prefix, fsep, psep)
- self.fragments = set(args)
+# FIXME: Integrate FinalNamespace into Namespace? Do we need to have both?
+class FinalNamespace(URI):
+ """The FinalNamespace allows you to append a fragment to an URI."""
- def __eq__(self, other: typing.Any) -> bool:
- return super().__eq__(other) and self.fragments == other.fragments
+ # fragment separator
+ sep: str
- def __hash__(self) -> int:
- return hash((type(self), self.prefix, tuple(sorted(self.fragments))))
+ def __new__(cls, value: str, sep: str = '#'):
+ inst = URI.__new__(cls, value)
+ inst.sep = sep
+ return inst
def __getattr__(self, fragment: str) -> URI:
- """Return prefix + fragment or raise a KeyError if the fragment is not part of this namespace."""
- if fragment not in self.fragments:
- raise KeyError(f'{fragment} is not a valid fragment of namespace {self.prefix}')
- return super().__getattr__(fragment)
+ """Append the *fragment* to the current value and return as URI."""
+ return URI(self + self.sep + fragment)
## EOF ##
diff --git a/bsfs/namespace/predefined.py b/bsfs/namespace/predefined.py
index cd48a46..8b60d39 100644
--- a/bsfs/namespace/predefined.py
+++ b/bsfs/namespace/predefined.py
@@ -1,35 +1,29 @@
-"""
-Part of the BlackStar filesystem (bsfs) module.
-A copy of the license is provided with the project.
-Author: Matthias Baumgartner, 2022
-"""
# imports
import typing
-# bsfs imports
-from bsfs.utils import URI
-
# inner-module imports
-from . import namespace
+from .namespace import Namespace, FinalNamespace
# essential bsfs namespaces
-bsfs: namespace.Namespace = namespace.Namespace(URI('http://bsfs.ai/schema'), fsep='/')
-
+bsfs = Namespace('https://schema.bsfs.io/core')
# additional bsfs namespaces
-bse: namespace.Namespace = namespace.Namespace(URI('http://bsfs.ai/schema/Entity'))
-bsm: namespace.Namespace = namespace.Namespace(URI('http://bsfs.ai/schema/Meta'))
+bsd = bsfs.distance()
+bsl = bsfs.Literal
+bsn = bsfs.Node()
# generic namespaces
-rdf: namespace.Namespace = namespace.Namespace(URI('http://www.w3.org/1999/02/22-rdf-syntax-ns'))
-rdfs: namespace.Namespace = namespace.Namespace(URI('http://www.w3.org/2000/01/rdf-schema'))
-schema: namespace.Namespace = namespace.Namespace(URI('http://schema.org'), fsep='/')
-xsd: namespace.Namespace = namespace.Namespace(URI('http://www.w3.org/2001/XMLSchema'))
+rdf = FinalNamespace('http://www.w3.org/1999/02/22-rdf-syntax-ns')
+rdfs = FinalNamespace('http://www.w3.org/2000/01/rdf-schema')
+xsd = FinalNamespace('http://www.w3.org/2001/XMLSchema')
+schema = FinalNamespace('http://schema.org', sep='/')
+# exports
__all__: typing.Sequence[str] = (
- 'bse',
+ 'bsd',
'bsfs',
- 'bsm',
+ 'bsl',
+ 'bsn',
'rdf',
'rdfs',
'schema',
diff --git a/bsfs/query/__init__.py b/bsfs/query/__init__.py
new file mode 100644
index 0000000..58ff03a
--- /dev/null
+++ b/bsfs/query/__init__.py
@@ -0,0 +1,15 @@
+
+# imports
+import typing
+
+# inner-module imports
+from . import ast
+from . import validator as validate
+
+# exports
+__all__: typing.Sequence[str] = (
+ 'ast',
+ 'validate',
+ )
+
+## EOF ##
diff --git a/bsfs/query/ast/__init__.py b/bsfs/query/ast/__init__.py
new file mode 100644
index 0000000..bceaac0
--- /dev/null
+++ b/bsfs/query/ast/__init__.py
@@ -0,0 +1,23 @@
+"""Query AST components.
+
+The query AST consists of a Filter and a Fetch syntax trees.
+
+Classes beginning with an underscore (_) represent internal type hierarchies
+and should not be used for parsing. Note that the AST structures do not
+(and cannot) check semantic validity or consistency with a given schema.
+
+"""
+# imports
+import typing
+
+# inner-module imports
+from . import fetch
+from . import filter_ as filter # pylint: disable=redefined-builtin
+
+# exports
+__all__: typing.Sequence[str] = (
+ 'fetch',
+ 'filter',
+ )
+
+## EOF ##
diff --git a/bsfs/query/ast/fetch.py b/bsfs/query/ast/fetch.py
new file mode 100644
index 0000000..66d94e1
--- /dev/null
+++ b/bsfs/query/ast/fetch.py
@@ -0,0 +1,169 @@
+
+# imports
+from collections import abc
+import typing
+
+# bsfs imports
+from bsfs.utils import URI, typename, normalize_args
+
+# exports
+__all__ : typing.Sequence[str] = (
+ 'All',
+ 'Fetch',
+ 'FetchExpression',
+ 'Node',
+ 'This',
+ 'Value',
+ )
+
+
+## code ##
+
+class FetchExpression(abc.Hashable):
+ """Generic Fetch expression."""
+
+ def __repr__(self) -> str:
+ """Return the expressions's string representation."""
+ return f'{typename(self)}()'
+
+ def __hash__(self) -> int:
+ """Return the expression's integer representation."""
+ return hash(type(self))
+
+ def __eq__(self, other: typing.Any) -> bool:
+ """Return True if *self* and *other* are equivalent."""
+ return isinstance(other, type(self))
+
+
+class All(FetchExpression):
+ """Fetch all child expressions."""
+
+ # child expressions.
+ expr: typing.Set[FetchExpression]
+
+ def __init__(self, *expr):
+ # unpack child expressions
+ unfolded = set(normalize_args(*expr))
+ # check child expressions
+ if len(unfolded) == 0:
+ raise AttributeError('expected at least one expression, found none')
+ if not all(isinstance(itm, FetchExpression) for itm in unfolded):
+ raise TypeError(expr)
+ # initialize
+ super().__init__()
+ # assign members
+ self.expr = unfolded
+
+ def __iter__(self) -> typing.Iterator[FetchExpression]:
+ return iter(self.expr)
+
+ def __len__(self) -> int:
+ return len(self.expr)
+
+ def __repr__(self) -> str:
+ return f'{typename(self)}({self.expr})'
+
+ def __hash__(self) -> int:
+ return hash((super().__hash__(), tuple(sorted(self.expr, key=repr))))
+
+ def __eq__(self, other: typing.Any) -> bool:
+ return super().__eq__(other) and self.expr == other.expr
+
+
+class _Branch(FetchExpression):
+ """Branch along a predicate."""
+
+ # FIXME: Use a Predicate (like in ast.filter) so that we can also reverse them!
+
+ # predicate to follow.
+ predicate: URI
+
+ def __init__(self, predicate: URI):
+ if not isinstance(predicate, URI):
+ raise TypeError(predicate)
+ self.predicate = predicate
+
+ def __repr__(self) -> str:
+ return f'{typename(self)}({self.predicate})'
+
+ def __hash__(self) -> int:
+ return hash((super().__hash__(), self.predicate))
+
+ def __eq__(self, other: typing.Any) -> bool:
+ return super().__eq__(other) and self.predicate == other.predicate
+
+
+class Fetch(_Branch):
+ """Follow a predicate before evaluating a child epxression."""
+
+ # child expression.
+ expr: FetchExpression
+
+ def __init__(self, predicate: URI, expr: FetchExpression):
+ # check child expressions
+ if not isinstance(expr, FetchExpression):
+ raise TypeError(expr)
+ # initialize
+ super().__init__(predicate)
+ # assign members
+ self.expr = expr
+
+ def __repr__(self) -> str:
+ return f'{typename(self)}({self.predicate}, {self.expr})'
+
+ def __hash__(self) -> int:
+ return hash((super().__hash__(), self.expr))
+
+ def __eq__(self, other: typing.Any) -> bool:
+ return super().__eq__(other) and self.expr == other.expr
+
+
+class _Named(_Branch):
+ """Fetch a (named) symbol at a predicate."""
+
+ # symbol name.
+ name: str
+
+ def __init__(self, predicate: URI, name: str):
+ super().__init__(predicate)
+ self.name = str(name)
+
+ def __repr__(self) -> str:
+ return f'{typename(self)}({self.predicate}, {self.name})'
+
+ def __hash__(self) -> int:
+ return hash((super().__hash__(), self.name))
+
+ def __eq__(self, other: typing.Any) -> bool:
+ return super().__eq__(other) and self.name == other.name
+
+
+class Node(_Named): # pylint: disable=too-few-public-methods
+ """Fetch a Node at a predicate."""
+ # FIXME: Is this actually needed?
+
+
+class Value(_Named): # pylint: disable=too-few-public-methods
+ """Fetch a Literal at a predicate."""
+
+
+class This(FetchExpression):
+ """Fetch the current Node."""
+
+ # symbol name.
+ name: str
+
+ def __init__(self, name: str):
+ super().__init__()
+ self.name = str(name)
+
+ def __repr__(self) -> str:
+ return f'{typename(self)}({self.name})'
+
+ def __hash__(self) -> int:
+ return hash((super().__hash__(), self.name))
+
+ def __eq__(self, other: typing.Any) -> bool:
+ return super().__eq__(other) and self.name == other.name
+
+## EOF ##
diff --git a/bsfs/query/ast/filter_.py b/bsfs/query/ast/filter_.py
new file mode 100644
index 0000000..610fdb4
--- /dev/null
+++ b/bsfs/query/ast/filter_.py
@@ -0,0 +1,516 @@
+"""Filter AST.
+
+Note that it is easily possible to construct an AST that is inconsistent with
+a given schema. Furthermore, it is possible to construct a semantically invalid
+AST which that cannot be parsed correctly or includes contradicting statements.
+The AST nodes do not (and cannot) check such issues.
+
+For example, consider the following AST:
+
+>>> Any(ns.bse.collection,
+... And(
+... Equals('hello'),
+... Is('hello world'),
+... Any(ns.bse.tag, Equals('world')),
+... Any(ns.bst.label, Equals('world')),
+... All(ns.bst.label, Not(Equals('world'))),
+... )
+... )
+
+This AST has multiple issues that are not verified upon its creation:
+* A condition on a non-literal.
+* A Filter on a literal.
+* Conditions exclude each other
+* The predicate along the branch have incompatible domains and ranges.
+
+"""
+# imports
+from collections import abc
+import typing
+
+# bsfs imports
+from bsfs.utils import URI, typename, normalize_args
+
+# exports
+__all__ : typing.Sequence[str] = (
+ # base classes
+ 'FilterExpression',
+ 'PredicateExpression',
+ # predicate expressions
+ 'OneOf',
+ 'Predicate',
+ # branching
+ 'All',
+ 'Any',
+ # aggregators
+ 'And',
+ 'Or',
+ # value matchers
+ 'Equals',
+ 'Substring',
+ 'EndsWith',
+ 'StartsWith',
+ # range matchers
+ 'GreaterThan',
+ 'LessThan',
+ # misc
+ 'Has',
+ 'Is',
+ 'Not',
+ )
+
+
+## code ##
+
+# pylint: disable=too-few-public-methods # Many expressions use mostly magic methods
+
+class _Expression(abc.Hashable):
+ def __repr__(self) -> str:
+ """Return the expressions's string representation."""
+ return f'{typename(self)}()'
+
+ def __hash__(self) -> int:
+ """Return the expression's integer representation."""
+ return hash(type(self))
+
+ def __eq__(self, other: typing.Any) -> bool:
+ """Return True if *self* and *other* are equivalent."""
+ return isinstance(other, type(self))
+
+
+class FilterExpression(_Expression):
+ """Generic Filter expression."""
+
+
+class PredicateExpression(_Expression):
+ """Generic Predicate expression."""
+
+
+class _Branch(FilterExpression):
+ """Branch the filter along a predicate."""
+
+ # predicate to follow.
+ predicate: PredicateExpression
+
+ # child expression to evaluate.
+ expr: FilterExpression
+
+ def __init__(
+ self,
+ predicate: typing.Union[PredicateExpression, URI],
+ expr: FilterExpression,
+ ):
+ # process predicate argument
+ if isinstance(predicate, URI):
+ predicate = Predicate(predicate)
+ elif not isinstance(predicate, PredicateExpression):
+ raise TypeError(predicate)
+ # process expression argument
+ if not isinstance(expr, FilterExpression):
+ raise TypeError(expr)
+ # assign members
+ self.predicate = predicate
+ self.expr = expr
+
+ def __repr__(self) -> str:
+ return f'{typename(self)}({self.predicate}, {self.expr})'
+
+ def __hash__(self) -> int:
+ return hash((super().__hash__(), self.predicate, self.expr))
+
+ def __eq__(self, other) -> bool:
+ return super().__eq__(other) \
+ and self.predicate == other.predicate \
+ and self.expr == other.expr
+
+class Any(_Branch):
+ """Any (and at least one) triple matches."""
+
+
+class All(_Branch):
+ """All (and at least one) triples match."""
+
+
+class _Agg(FilterExpression, abc.Collection):
+ """Combine multiple expressions."""
+
+ # child expressions
+ expr: typing.Set[FilterExpression]
+
+ def __init__(
+ self,
+ *expr: typing.Union[FilterExpression,
+ typing.Iterable[FilterExpression],
+ typing.Iterator[FilterExpression]]
+ ):
+ # unfold arguments
+ unfolded = set(normalize_args(*expr))
+ # check type
+ if not all(isinstance(e, FilterExpression) for e in unfolded):
+ raise TypeError(expr)
+ # FIXME: Require at least one child expression?
+ # assign member
+ self.expr = unfolded
+
+ def __contains__(self, expr: typing.Any) -> bool:
+ """Return True if *expr* is among the child expressions."""
+ return expr in self.expr
+
+ def __iter__(self) -> typing.Iterator[FilterExpression]:
+ """Iterator over child expressions."""
+ return iter(self.expr)
+
+ def __len__(self) -> int:
+ """Number of child expressions."""
+ return len(self.expr)
+
+ def __repr__(self) -> str:
+ return f'{typename(self)}({self.expr})'
+
+ def __hash__(self) -> int:
+ return hash((super().__hash__(), tuple(sorted(self.expr, key=repr))))
+
+ def __eq__(self, other) -> bool:
+ return super().__eq__(other) and self.expr == other.expr
+
+
+class And(_Agg):
+ """All conditions match."""
+
+
+class Or(_Agg):
+ """At least one condition matches."""
+
+
+class Not(FilterExpression):
+ """Invert a statement."""
+
+ # child expression
+ expr: FilterExpression
+
+ def __init__(self, expr: FilterExpression):
+ # check argument
+ if not isinstance(expr, FilterExpression):
+ raise TypeError(expr)
+ # assign member
+ self.expr = expr
+
+ def __repr__(self) -> str:
+ return f'{typename(self)}({self.expr})'
+
+ def __hash__(self) -> int:
+ return hash((super().__hash__(), self.expr))
+
+ def __eq__(self, other: typing.Any) -> bool:
+ return super().__eq__(other) and self.expr == other.expr
+
+
+class Has(FilterExpression):
+ """Has predicate N times"""
+
+ # predicate to follow.
+ predicate: PredicateExpression
+
+ # target count
+ count: FilterExpression
+
+ def __init__(
+ self,
+ predicate: typing.Union[PredicateExpression, URI],
+ count: typing.Optional[typing.Union[FilterExpression, int]] = None,
+ ):
+ # check predicate
+ if isinstance(predicate, URI):
+ predicate = Predicate(predicate)
+ elif not isinstance(predicate, PredicateExpression):
+ raise TypeError(predicate)
+ # check count
+ if count is None:
+ count = GreaterThan(1, strict=False)
+ elif isinstance(count, int):
+ count = Equals(count)
+ elif not isinstance(count, FilterExpression):
+ raise TypeError(count)
+ # assign members
+ self.predicate = predicate
+ self.count = count
+
+ def __repr__(self) -> str:
+ return f'{typename(self)}({self.predicate}, {self.count})'
+
+ def __hash__(self) -> int:
+ return hash((super().__hash__(), self.predicate, self.count))
+
+ def __eq__(self, other) -> bool:
+ return super().__eq__(other) \
+ and self.predicate == other.predicate \
+ and self.count == other.count
+
+
+class _Value(FilterExpression):
+ """Matches some value."""
+
+ # target value.
+ value: typing.Any
+
+ def __init__(self, value: typing.Any):
+ self.value = value
+
+ def __repr__(self) -> str:
+ return f'{typename(self)}({self.value})'
+
+ def __hash__(self) -> int:
+ return hash((super().__hash__(), self.value))
+
+ def __eq__(self, other) -> bool:
+ return super().__eq__(other) and self.value == other.value
+
+
+class Is(_Value):
+ """Match the URI of a node."""
+
+
+class Equals(_Value):
+ """Value matches exactly.
+ NOTE: Value must correspond to literal type.
+ """
+
+
+class Substring(_Value):
+ """Value matches a substring
+ NOTE: value must be a string.
+ """
+
+
+class StartsWith(_Value):
+ """Value begins with a given string."""
+
+
+class EndsWith(_Value):
+ """Value ends with a given string."""
+
+
+class Distance(FilterExpression):
+ """Distance to a reference is (strictly) below a threshold. Assumes a Feature literal."""
+
+ # FIXME:
+ # (a) pass a node/predicate as anchor instead of a value.
+ # Then we don't need to materialize the reference.
+ # (b) pass a FilterExpression (_Bounded) instead of a threshold.
+ # Then, we could also query values greater than a threshold.
+
+ # reference value.
+ reference: typing.Any
+
+ # distance threshold.
+ threshold: float
+
+ # closed (True) or open (False) bound.
+ strict: bool
+
+ def __init__(
+ self,
+ reference: typing.Any,
+ threshold: float,
+ strict: bool = False,
+ ):
+ self.reference = reference
+ self.threshold = float(threshold)
+ self.strict = bool(strict)
+
+ def __repr__(self) -> str:
+ return f'{typename(self)}({self.reference}, {self.threshold}, {self.strict})'
+
+ def __hash__(self) -> int:
+ return hash((super().__hash__(), tuple(self.reference), self.threshold, self.strict))
+
+ def __eq__(self, other) -> bool:
+ return super().__eq__(other) \
+ and self.reference == other.reference \
+ and self.threshold == other.threshold \
+ and self.strict == other.strict
+
+
+class _Bounded(FilterExpression):
+ """Value is bounded by a threshold. Assumes a Number literal."""
+
+ # bound.
+ threshold: float
+
+ # closed (True) or open (False) bound.
+ strict: bool
+
+ def __init__(
+ self,
+ threshold: float,
+ strict: bool = True,
+ ):
+ self.threshold = float(threshold)
+ self.strict = bool(strict)
+
+ def __repr__(self) -> str:
+ return f'{typename(self)}({self.threshold}, {self.strict})'
+
+ def __hash__(self) -> int:
+ return hash((super().__hash__(), self.threshold, self.strict))
+
+ def __eq__(self, other) -> bool:
+ return super().__eq__(other) \
+ and self.threshold == other.threshold \
+ and self.strict == other.strict
+
+
+
+class LessThan(_Bounded):
+ """Value is (strictly) smaller than threshold. Assumes a Number literal."""
+
+
+class GreaterThan(_Bounded):
+ """Value is (strictly) larger than threshold. Assumes a Number literal."""
+
+
+class Predicate(PredicateExpression):
+ """A single predicate."""
+
+ # predicate URI
+ predicate: URI
+
+ # reverse the predicate's direction
+ reverse: bool
+
+ def __init__(
+ self,
+ predicate: URI,
+ reverse: typing.Optional[bool] = False,
+ ):
+ # check arguments
+ if not isinstance(predicate, URI):
+ raise TypeError(predicate)
+ # assign members
+ self.predicate = predicate
+ self.reverse = bool(reverse)
+
+ def __repr__(self) -> str:
+ return f'{typename(self)}({self.predicate}, {self.reverse})'
+
+ def __hash__(self) -> int:
+ return hash((super().__hash__(), self.predicate, self.reverse))
+
+ def __eq__(self, other) -> bool:
+ return super().__eq__(other) \
+ and self.predicate == other.predicate \
+ and self.reverse == other.reverse
+
+
+class OneOf(PredicateExpression, abc.Collection):
+ """A set of predicate alternatives.
+
+ The predicates' domains must be ascendants or descendants of each other.
+ The overall domain is the most specific one.
+
+ The predicate's domains must be ascendants or descendants of each other.
+ The overall range is the most generic one.
+ """
+
+ # predicate alternatives
+ expr: typing.Set[PredicateExpression]
+
+ def __init__(self, *expr: typing.Union[PredicateExpression, URI]):
+ # unfold arguments
+ unfolded = set(normalize_args(*expr)) # type: ignore [arg-type] # this is getting too complex...
+ # check arguments
+ if len(unfolded) == 0:
+ raise AttributeError('expected at least one expression, found none')
+ # ensure PredicateExpression
+ unfolded = {Predicate(e) if isinstance(e, URI) else e for e in unfolded}
+ # check type
+ if not all(isinstance(e, PredicateExpression) for e in unfolded):
+ raise TypeError(expr)
+ # assign member
+ self.expr = unfolded
+
+ def __contains__(self, expr: typing.Any) -> bool:
+ """Return True if *expr* is among the child expressions."""
+ return expr in self.expr
+
+ def __iter__(self) -> typing.Iterator[PredicateExpression]:
+ """Iterator over child expressions."""
+ return iter(self.expr)
+
+ def __len__(self) -> int:
+ """Number of child expressions."""
+ return len(self.expr)
+
+ def __repr__(self) -> str:
+ return f'{typename(self)}({self.expr})'
+
+ def __hash__(self) -> int:
+ return hash((super().__hash__(), tuple(sorted(self.expr, key=repr))))
+
+ def __eq__(self, other) -> bool:
+ return super().__eq__(other) and self.expr == other.expr
+
+
+# Helpers
+# invalid-name is disabled since they explicitly mimic an expression
+
+def IsIn(*values) -> FilterExpression: # pylint: disable=invalid-name
+ """Match any of the given URIs."""
+ args = normalize_args(*values)
+ if len(args) == 0:
+ raise AttributeError('expected at least one value, found none')
+ if len(args) == 1:
+ return Is(args[0])
+ return Or(Is(value) for value in args)
+
+def IsNotIn(*values) -> FilterExpression: # pylint: disable=invalid-name
+ """Match none of the given URIs."""
+ return Not(IsIn(*values))
+
+
+def Between( # pylint: disable=invalid-name
+ lo: float = float('-inf'),
+ hi: float = float('inf'),
+ lo_strict: bool = True,
+ hi_strict: bool = True,
+ ) -> FilterExpression :
+ """Match numerical values between *lo* and *hi*. Include bounds if strict is False."""
+ if abs(lo) == hi == float('inf'):
+ raise ValueError('range cannot be INF on both sides')
+ if lo > hi:
+ raise ValueError(f'lower bound ({lo}) cannot be less than upper bound ({hi})')
+ if lo == hi and not lo_strict and not hi_strict:
+ return Equals(lo)
+ if lo == hi: # either bound is strict
+ raise ValueError('bounds cannot be equal when either is strict')
+ if lo != float('-inf') and hi != float('inf'):
+ return And(GreaterThan(lo, lo_strict), LessThan(hi, hi_strict))
+ if lo != float('-inf'):
+ return GreaterThan(lo, lo_strict)
+ # hi != float('inf'):
+ return LessThan(hi, hi_strict)
+
+
+def Includes(*values, approx: bool = False) -> FilterExpression: # pylint: disable=invalid-name
+ """Match any of the given *values*. Uses `Substring` if *approx* is set."""
+ args = normalize_args(*values)
+ cls = Substring if approx else Equals
+ if len(args) == 0:
+ raise AttributeError('expected at least one value, found none')
+ if len(args) == 1:
+ return cls(args[0])
+ return Or(cls(v) for v in args)
+
+
+def Excludes(*values, approx: bool = False) -> FilterExpression: # pylint: disable=invalid-name
+ """Match none of the given *values*. Uses `Substring` if *approx* is set."""
+ args = normalize_args(*values)
+ cls = Substring if approx else Equals
+ if len(args) == 0:
+ raise AttributeError('expected at least one value, found none')
+ if len(args) == 1:
+ return Not(cls(args[0]))
+ return Not(Or(cls(v) for v in args))
+
+
+## EOF ##
diff --git a/bsfs/query/matcher.py b/bsfs/query/matcher.py
new file mode 100644
index 0000000..17c9c8e
--- /dev/null
+++ b/bsfs/query/matcher.py
@@ -0,0 +1,361 @@
+
+# imports
+from collections import defaultdict
+from itertools import product
+from time import time
+import random
+import threading
+import typing
+
+# external imports
+from hopcroftkarp import HopcroftKarp
+
+# bsfs imports
+from bsfs.utils import errors, typename
+
+# inner-module imports
+from . import ast
+
+# exports
+__all__ : typing.Sequence[str] = (
+ 'Filter',
+ )
+
+
+## code ##
+
+class Any(ast.filter.FilterExpression, ast.filter.PredicateExpression):
+ """Match any ast class.
+
+ Note that Any instances are unique, i.e. they do not compare, and
+ can hence be repeated in a set:
+ >>> Any() == Any()
+ False
+ >>> len({Any(), Any(), Any(), Any()})
+ 4
+
+ """
+
+ # unique instance id
+ _uid: typing.Tuple[int, int, float, float]
+
+ def __init__(self):
+ self._uid = (
+ id(self),
+ id(threading.current_thread()),
+ time(),
+ random.random(),
+ )
+
+ def __eq__(self, other: typing.Any):
+ return super().__eq__(other) and self._uid == other._uid
+
+ def __hash__(self):
+ return hash((super().__hash__(), self._uid))
+
+
+class Rest(ast.filter.FilterExpression, ast.filter.PredicateExpression):
+ """Match the leftovers in a set of items to be compared.
+
+ Rest can be used in junction with aggregating expressions such as ast.filter.And,
+ ast.filter.Or, ast.filter.OneOf. It controls childs expressions that were not yet
+ consumed by other matching rules. Rest may match to only a specific expression.
+ The expresssion defaults to Any().
+
+ For example, the following to ast structures would match since Rest
+ allows an arbitrary repetition of ast.filter.Equals statements.
+
+ >>> And(Equals('hello'), Equals('world'), Equals('foobar'))
+ >>> And(Equals('world'), Rest(Partial(Equals)))
+
+ """
+
+ # child expression for the Rest.
+ expr: typing.Union[ast.filter.FilterExpression, ast.filter.PredicateExpression]
+
+ def __init__(
+ self,
+ expr: typing.Optional[typing.Union[ast.filter.FilterExpression, ast.filter.PredicateExpression]] = None,
+ ):
+ if expr is None:
+ expr = Any()
+ self.expr = expr
+
+ def __repr__(self) -> str:
+ return f'{typename(self)}({self.expr})'
+
+ def __hash__(self) -> int:
+ return hash((super().__hash__(), self.expr))
+
+ def __eq__(self, other: typing.Any) -> bool:
+ return super().__eq__(other) and self.expr == other.expr
+
+
+class Partial(ast.filter.FilterExpression, ast.filter.PredicateExpression):
+ """Match a partially defined ast expression.
+
+ Literal values might be irrelevant or unknown when comparing two ast
+ structures. Partial allows to constrain the matcher to a certain
+ ast class, while leaving some of its members unspecified.
+
+ Pass the class (not instance) and its members as keyword arguments
+ to Partial. Note that the arguments are not validated.
+
+ For example, the following instance matches any ast.filter.Equals,
+ irrespective of its value:
+
+ >>> Partial(ast.filter.Equals)
+
+ Likewise, the following instance matches any ast.filter.LessThan
+ that has a strict bounds, but makes no claim about the threshold:
+
+ >>> Partial(ast.filter.LessThan, strict=False)
+
+ """
+
+ # target node type.
+ node: typing.Type
+
+ # node construction args.
+ kwargs: typing.Dict[str, typing.Any]
+
+ def __init__(
+ self,
+ node: typing.Type,
+ **kwargs,
+ ):
+ self.node = node
+ self.kwargs = kwargs
+
+ def __repr__(self) -> str:
+ return f'{typename(self)}({self.node.__name__}, {self.kwargs})'
+
+ def __hash__(self) -> int:
+ kwargs = tuple((key, self.kwargs[key]) for key in sorted(self.kwargs))
+ return hash((super().__hash__(), self.node, kwargs))
+
+ def __eq__(self, other: typing.Any) -> bool:
+ return super().__eq__(other) \
+ and self.node == other.node \
+ and self.kwargs == other.kwargs
+
+ def match(
+ self,
+ name: str,
+ value: typing.Any,
+ ) -> bool:
+ """Return True if *name* is unspecified or matches *value*."""
+ return name not in self.kwargs or self.kwargs[name] == value
+
+
+T_ITEM_TYPE = typing.TypeVar('T_ITEM_TYPE') # pylint: disable=invalid-name
+
+def _set_matcher(
+ query: typing.Collection[T_ITEM_TYPE],
+ reference: typing.Collection[T_ITEM_TYPE],
+ cmp: typing.Callable[[T_ITEM_TYPE, T_ITEM_TYPE], bool],
+ ) -> bool:
+ """Compare two sets of child expressions.
+
+ This check has a best-case complexity of O(|N|**2) and worst-case
+ complexity of O(|N|**3), with N the number of child expressions.
+ """
+ # get reference items
+ r_items = list(reference)
+ # deal with Rest
+ r_rest = {itm for itm in r_items if isinstance(itm, Rest)}
+ if len(r_rest) > 1:
+ raise errors.BackendError(f'there must be at most one Rest instance per set, found {len(r_rest)}')
+ if len(r_rest) == 1:
+ # replace Rest by filling the reference up with rest's expression
+ # NOTE: convert r_items to list so that items can be repeated
+ expr = next(iter(r_rest)).expr # type: ignore [attr-defined]
+ r_items = [itm for itm in r_items if not isinstance(itm, Rest)]
+ r_items += [expr for _ in range(len(query) - len(r_items))] # type: ignore [misc]
+ # sanity check: cannot match if the item sizes differ:
+ # either a reference item is unmatched (len(r_items) > len(query))
+ # or a query item is unmatched (len(r_items) < len(query))
+ if len(query) != len(r_items):
+ return False
+
+ # To have a positive match between the query and the reference,
+ # each query expr has to match any reference expr.
+ # However, each reference expr can only be "consumed" once even
+ # if it matches multiple query exprs (e.g., the Any expression matches
+ # every query expr).
+ # This is a bipartide matching problem (Hall's marriage problem)
+ # and the Hopcroft-Karp-Karzanov algorithm finds a maximum
+ # matching. While there might be multiple maximum matchings,
+ # we only need to know whether (at least) one complete matching
+ # exists. The hopcroftkarp module provides this functionality.
+ # The HKK algorithm has worst-case complexity of O(|N|**2 * sqrt(|N|))
+ # and we also need to compare expressions pairwise, hence O(|N|**2).
+ num_items = len(r_items)
+ graph = defaultdict(set)
+ # build the bipartide graph as {lhs: {rhs}, ...}
+ # lhs and rhs must be disjoint identifiers.
+ for (ridx, ref), (nidx, node) in product(enumerate(r_items), enumerate(query)):
+ # add edges for equal expressions
+ if cmp(node, ref):
+ graph[ridx].add(num_items + nidx)
+
+ # maximum_matching returns the matches for all nodes in the graph
+ # ({ref_itm: node_itm}), hence a complete matching's size is
+ # the number of reference's child expressions.
+ return len(HopcroftKarp(graph).maximum_matching(keys_only=True)) == num_items
+
+
+class Filter():
+ """Compare a bsfs.query.ast.filter` query's structure to a reference ast.
+
+ The reference ast may include `Rest`, `Partial`, or `Any` to account for irrelevant
+ or unknown ast pieces.
+
+ This is only a structural comparison, not a semantic one. For example, the
+ two following queries are semantically identical, but structurally different,
+ and would therefore not match:
+
+ >>> ast.filter.OneOf(ast.filter.Predicate(ns.bse.name))
+ >>> ast.filter.Predicate(ns.bse.name)
+
+ """
+
+ def __call__(self, query: ast.filter.FilterExpression, reference: ast.filter.FilterExpression) -> bool:
+ """Compare a *query* to a *reference* ast structure.
+ Return True if both are structurally equivalent.
+ """
+ if not isinstance(query, ast.filter.FilterExpression):
+ raise errors.BackendError(f'expected filter expression, found {query}')
+ if not isinstance(reference, ast.filter.FilterExpression):
+ raise errors.BackendError(f'expected filter expression, found {reference}')
+ return self._parse_filter_expression(query, reference)
+
+ def _parse_filter_expression(
+ self,
+ node: ast.filter.FilterExpression,
+ reference: ast.filter.FilterExpression,
+ ) -> bool:
+ """Route *node* to the handler of the respective FilterExpression subclass."""
+ # generic checks: reference type must be Any or match node type
+ if isinstance(reference, Any):
+ return True
+ # node-specific checks
+ if isinstance(node, ast.filter.Not):
+ return self._not(node, reference)
+ if isinstance(node, ast.filter.Has):
+ return self._has(node, reference)
+ if isinstance(node, ast.filter.Distance):
+ return self._distance(node, reference)
+ if isinstance(node, (ast.filter.Any, ast.filter.All)):
+ return self._branch(node, reference)
+ if isinstance(node, (ast.filter.And, ast.filter.Or)):
+ return self._agg(node, reference)
+ if isinstance(node, (ast.filter.Is, ast.filter.Equals, ast.filter.Substring,
+ ast.filter.StartsWith, ast.filter.EndsWith)):
+ return self._value(node, reference)
+ if isinstance(node, (ast.filter.LessThan, ast.filter.GreaterThan)):
+ return self._bounded(node, reference)
+ # invalid node
+ raise errors.BackendError(f'expected filter expression, found {node}')
+
+ def _parse_predicate_expression(
+ self,
+ node: ast.filter.PredicateExpression,
+ reference: ast.filter.PredicateExpression,
+ ) -> bool:
+ """Route *node* to the handler of the respective PredicateExpression subclass."""
+ if isinstance(reference, Any):
+ return True
+ if isinstance(node, ast.filter.Predicate):
+ return self._predicate(node, reference)
+ if isinstance(node, ast.filter.OneOf):
+ return self._one_of(node, reference)
+ # invalid node
+ raise errors.BackendError(f'expected predicate expression, found {node}')
+
+ def _one_of(self, node: ast.filter.OneOf, reference: ast.filter.PredicateExpression) -> bool:
+ if not isinstance(reference, type(node)):
+ return False
+ return _set_matcher(node, reference, self._parse_predicate_expression)
+
+ def _predicate(self, node: ast.filter.Predicate, reference: ast.filter.PredicateExpression) -> bool:
+ if not isinstance(reference, (Partial, type(node))):
+ return False
+ # partial check
+ if isinstance(reference, Partial):
+ if not isinstance(node, reference.node):
+ return False
+ return reference.match('predicate', node.predicate) \
+ and reference.match('reverse', node.reverse)
+ # full check
+ return node.predicate == reference.predicate \
+ and node.reverse == reference.reverse
+
+ def _branch(self,
+ node: typing.Union[ast.filter.Any, ast.filter.All],
+ reference: ast.filter.FilterExpression,
+ ) -> bool:
+ if not isinstance(reference, type(node)):
+ return False
+ if not self._parse_predicate_expression(node.predicate, reference.predicate): # type: ignore [attr-defined]
+ return False
+ if not self._parse_filter_expression(node.expr, reference.expr): # type: ignore [attr-defined]
+ return False
+ return True
+
+ def _agg(self, node: typing.Union[ast.filter.And, ast.filter.Or], reference: ast.filter.FilterExpression) -> bool:
+ if not isinstance(reference, type(node)):
+ return False
+ return _set_matcher(node, reference, self._parse_filter_expression) # type: ignore [arg-type]
+
+ def _not(self, node: ast.filter.Not, reference: ast.filter.FilterExpression) -> bool:
+ if not isinstance(reference, type(node)):
+ return False
+ return self._parse_filter_expression(node.expr, reference.expr)
+
+ def _has(self, node: ast.filter.Has, reference: ast.filter.FilterExpression) -> bool:
+ if not isinstance(reference, type(node)):
+ return False
+ return self._parse_predicate_expression(node.predicate, reference.predicate) \
+ and self._parse_filter_expression(node.count, reference.count)
+
+ def _distance(self, node: ast.filter.Distance, reference: ast.filter.FilterExpression) -> bool:
+ if not isinstance(reference, (Partial, type(node))):
+ return False
+ # partial check
+ if isinstance(reference, Partial):
+ if not isinstance(node, reference.node):
+ return False
+ return reference.match('reference', node.reference) \
+ and reference.match('threshold', node.threshold) \
+ and reference.match('strict', node.strict)
+ # full check
+ return node.reference == reference.reference \
+ and node.threshold == reference.threshold \
+ and node.strict == reference.strict
+
+ def _value(self, node: ast.filter._Value, reference: ast.filter.FilterExpression) -> bool:
+ if not isinstance(reference, (Partial, type(node))):
+ return False
+ # partial check
+ if isinstance(reference, Partial):
+ if not isinstance(node, reference.node):
+ return False
+ return reference.match('value', node.value)
+ # full ckeck
+ return node.value == reference.value
+
+ def _bounded(self, node: ast.filter._Bounded, reference: ast.filter.FilterExpression) -> bool:
+ if not isinstance(reference, (Partial, type(node))):
+ return False
+ # partial check
+ if isinstance(reference, Partial):
+ if not isinstance(node, reference.node):
+ return False
+ return reference.match('threshold', node.threshold) \
+ and reference.match('strict', node.strict)
+ # full check
+ return node.threshold == reference.threshold \
+ and node.strict == reference.strict
+
+## EOF ##
diff --git a/bsfs/query/validator.py b/bsfs/query/validator.py
new file mode 100644
index 0000000..10ca492
--- /dev/null
+++ b/bsfs/query/validator.py
@@ -0,0 +1,351 @@
+
+# imports
+import typing
+
+# bsfs imports
+from bsfs import schema as bsc
+from bsfs.namespace import ns
+from bsfs.utils import errors, typename
+
+# inner-module imports
+from . import ast
+
+# exports
+__all__ : typing.Sequence[str] = (
+ 'Filter',
+ )
+
+# FIXME: Split into a submodule and the two classes into their own respective files.
+
+## code ##
+
+class Filter():
+ """Validate a `bsfs.query.ast.filter` query's structure and schema compliance.
+
+ * Conditions (Bounded, Value) can only be applied on literals
+ * Branches, Id, and Has can only be applied on nodes
+ * Predicates' domain and range must match
+ * Predicate paths must follow the schema
+ * Referenced types are present in the schema
+
+ """
+
+ # schema to validate against.
+ schema: bsc.Schema
+
+ def __init__(self, schema: bsc.Schema):
+ self.schema = schema
+
+ def __call__(self, root_type: bsc.Node, query: ast.filter.FilterExpression) -> bool:
+ """Alias for `Filter.validate`."""
+ return self.validate(root_type, query)
+
+ def validate(self, root_type: bsc.Node, query: ast.filter.FilterExpression) -> bool:
+ """Validate a filter *query*, assuming the subject having *root_type*.
+
+ Raises a `bsfs.utils.errors.ConsistencyError` if the query violates the schema.
+ Raises a `bsfs.utils.errors.BackendError` if the query structure is invalid.
+
+ """
+ # root_type must be a schema.Node
+ if not isinstance(root_type, bsc.Node):
+ raise TypeError(f'expected a node, found {typename(root_type)}')
+ # root_type must exist in the schema
+ if root_type not in self.schema.nodes():
+ raise errors.ConsistencyError(f'{root_type} is not defined in the schema')
+ # check root expression
+ self._parse_filter_expression(root_type, query)
+ # all tests passed
+ return True
+
+
+ ## routing methods
+
+ def _parse_filter_expression(self, type_: bsc.Vertex, node: ast.filter.FilterExpression):
+ """Route *node* to the handler of the respective FilterExpression subclass."""
+ if isinstance(node, ast.filter.Is):
+ return self._is(type_, node)
+ if isinstance(node, ast.filter.Not):
+ return self._not(type_, node)
+ if isinstance(node, ast.filter.Has):
+ return self._has(type_, node)
+ if isinstance(node, ast.filter.Distance):
+ return self._distance(type_, node)
+ if isinstance(node, (ast.filter.Any, ast.filter.All)):
+ return self._branch(type_, node)
+ if isinstance(node, (ast.filter.And, ast.filter.Or)):
+ return self._agg(type_, node)
+ if isinstance(node, (ast.filter.Equals, ast.filter.Substring, ast.filter.StartsWith, ast.filter.EndsWith)):
+ return self._value(type_, node)
+ if isinstance(node, (ast.filter.LessThan, ast.filter.GreaterThan)):
+ return self._bounded(type_, node)
+ # invalid node
+ raise errors.BackendError(f'expected filter expression, found {node}')
+
+ def _parse_predicate_expression(self, node: ast.filter.PredicateExpression) -> typing.Tuple[bsc.Vertex, bsc.Vertex]:
+ """Route *node* to the handler of the respective PredicateExpression subclass."""
+ if isinstance(node, ast.filter.Predicate):
+ return self._predicate(node)
+ if isinstance(node, ast.filter.OneOf):
+ return self._one_of(node)
+ # invalid node
+ raise errors.BackendError(f'expected predicate expression, found {node}')
+
+
+ ## predicate expressions
+
+ def _predicate(self, node: ast.filter.Predicate) -> typing.Tuple[bsc.Vertex, bsc.Vertex]:
+ # predicate exists in the schema
+ if not self.schema.has_predicate(node.predicate):
+ raise errors.ConsistencyError(f'predicate {node.predicate} is not in the schema')
+ # determine domain and range
+ pred = self.schema.predicate(node.predicate)
+ if not isinstance(pred.range, (bsc.Node, bsc.Literal)):
+ raise errors.BackendError(f'the range of predicate {pred} is undefined')
+ dom, rng = pred.domain, pred.range
+ if node.reverse:
+ dom, rng = rng, dom # type: ignore [assignment] # variable re-use confuses mypy
+ # return domain and range
+ return dom, rng
+
+ def _one_of(self, node: ast.filter.OneOf) -> typing.Tuple[bsc.Vertex, bsc.Vertex]:
+ # determine domain and range types
+ # NOTE: select the most specific domain and the most generic range
+ dom, rng = None, None
+ for pred in node:
+ # parse child expression
+ subdom, subrng = self._parse_predicate_expression(pred)
+ # determine overall domain
+ if dom is None or subdom < dom: # pick most specific domain
+ dom = subdom
+ # domains must be related across all child expressions
+ if not subdom <= dom and not subdom >= dom:
+ raise errors.ConsistencyError(f'domains {subdom} and {dom} are not related')
+ # determine overall range
+ if rng is None or subrng > rng: # pick most generic range
+ rng = subrng
+ # ranges must be related across all child expressions
+ if not subrng <= rng and not subrng >= rng:
+ raise errors.ConsistencyError(f'ranges {subrng} and {rng} are not related')
+ # OneOf guarantees at least one expression, dom and rng are always bsc.Vertex.
+ # mypy does not realize this, hence we ignore the warning.
+ return dom, rng # type: ignore [return-value]
+
+
+ ## intermediates
+
+ def _branch(self, type_: bsc.Vertex, node: ast.filter._Branch):
+ # type is a Node
+ if not isinstance(type_, bsc.Node):
+ raise errors.ConsistencyError(f'expected a Node, found {type_}')
+ # type exists in the schema
+ # FIXME: Isn't it actually guaranteed that the type (except the root type) is part of the schema?
+ # all types can be traced back to (a) root_type, (b) predicate, or (c) manually set (e.g. in _is).
+ # For (a), we do (and have to) perform a check. For (c), the code base should be consistent throughout
+ # the module, so this is an assumption that has to be ensured in schema.Schema. For (b), we know (and
+ # check) that the predicate is in the schema, hence all node/literals derived from it are also in the
+ # schema by construction of the schema.Schema class. So, why do we check this every time?
+ if type_ not in self.schema.nodes():
+ raise errors.ConsistencyError(f'node {type_} is not in the schema')
+ # predicate is valid
+ dom, rng = self._parse_predicate_expression(node.predicate)
+ # type_ is a subtype of the predicate's domain
+ if not type_ <= dom:
+ raise errors.ConsistencyError(f'expected type {dom} or subtype thereof, found {type_}')
+ # child expression is valid
+ self._parse_filter_expression(rng, node.expr)
+
+ def _agg(self, type_: bsc.Vertex, node: ast.filter._Agg):
+ for expr in node:
+ # child expression is valid
+ self._parse_filter_expression(type_, expr)
+
+ def _not(self, type_: bsc.Vertex, node: ast.filter.Not):
+ # child expression is valid
+ self._parse_filter_expression(type_, node.expr)
+
+ def _has(self, type_: bsc.Vertex, node: ast.filter.Has):
+ # type is a Node
+ if not isinstance(type_, bsc.Node):
+ raise errors.ConsistencyError(f'expected a Node, found {type_}')
+ # type exists in the schema
+ if type_ not in self.schema.nodes():
+ raise errors.ConsistencyError(f'node {type_} is not in the schema')
+ # predicate is valid
+ dom, _= self._parse_predicate_expression(node.predicate)
+ # type_ is a subtype of the predicate's domain
+ if not type_ <= dom:
+ raise errors.ConsistencyError(f'expected type {dom}, found {type_}')
+ # node.count is a numerical expression
+ self._parse_filter_expression(self.schema.literal(ns.bsl.Number), node.count)
+
+ def _distance(self, type_: bsc.Vertex, node: ast.filter.Distance):
+ # type is a Literal
+ if not isinstance(type_, bsc.Feature):
+ raise errors.ConsistencyError(f'expected a Feature, found {type_}')
+ # type exists in the schema
+ if type_ not in self.schema.literals():
+ raise errors.ConsistencyError(f'literal {type_} is not in the schema')
+ # reference matches type_
+ if len(node.reference) != type_.dimension:
+ raise errors.ConsistencyError(f'reference has dimension {len(node.reference)}, expected {type_.dimension}')
+ # FIXME: test dtype
+
+
+ ## conditions
+
+ def _is(self, type_: bsc.Vertex, node: ast.filter.Is): # pylint: disable=unused-argument # (node)
+ if not isinstance(type_, bsc.Node):
+ raise errors.ConsistencyError(f'expected a Node, found {type_}')
+ if type_ not in self.schema.nodes():
+ raise errors.ConsistencyError(f'node {type_} is not in the schema')
+
+ def _value(self, type_: bsc.Vertex, node: ast.filter._Value): # pylint: disable=unused-argument # (node)
+ # type is a literal
+ if not isinstance(type_, bsc.Literal):
+ raise errors.ConsistencyError(f'expected a Literal, found {type_}')
+ # type exists in the schema
+ if type_ not in self.schema.literals():
+ raise errors.ConsistencyError(f'literal {type_} is not in the schema')
+ # FIXME: Check if node.value corresponds to type_
+ # FIXME: A specific literal might be requested (i.e., a numeric type when used in Has)
+
+ def _bounded(self, type_: bsc.Vertex, node: ast.filter._Bounded): # pylint: disable=unused-argument # (node)
+ # type is a literal
+ if not isinstance(type_, bsc.Literal):
+ raise errors.ConsistencyError(f'expected a Literal, found {type_}')
+ # type exists in the schema
+ if type_ not in self.schema.literals():
+ raise errors.ConsistencyError(f'literal {type_} is not in the schema')
+ # type must be a numerical
+ if not type_ <= self.schema.literal(ns.bsl.Number):
+ raise errors.ConsistencyError(f'expected a number type, found {type_}')
+ # FIXME: Check if node.value corresponds to type_
+
+
+class Fetch():
+ """Validate a `bsfs.query.ast.fetch` query's structure and schema compliance.
+
+ * Value can only be applied on literals
+ * Node can only be applied on nodes
+ * Names must be non-empty
+ * Branching nodes' predicates must match the type
+ * Symbols must be in the schema
+ * Predicates must follow the schema
+
+ """
+
+ # schema to validate against.
+ schema: bsc.Schema
+
+ def __init__(self, schema: bsc.Schema):
+ self.schema = schema
+
+ def __call__(self, root_type: bsc.Node, query: ast.fetch.FetchExpression) -> bool:
+ """Alias for `Fetch.validate`."""
+ return self.validate(root_type, query)
+
+ def validate(self, root_type: bsc.Node, query: ast.fetch.FetchExpression) -> bool:
+ """Validate a fetch *query*, assuming the subject having *root_type*.
+
+ Raises a `bsfs.utils.errors.ConsistencyError` if the query violates the schema.
+ Raises a `bsfs.utils.errors.BackendError` if the query structure is invalid.
+
+ """
+ # root_type must be a schema.Node
+ if not isinstance(root_type, bsc.Node):
+ raise TypeError(f'expected a node, found {typename(root_type)}')
+ # root_type must exist in the schema
+ if root_type not in self.schema.nodes():
+ raise errors.ConsistencyError(f'{root_type} is not defined in the schema')
+ # query must be a FetchExpression
+ if not isinstance(query, ast.fetch.FetchExpression):
+ raise TypeError(f'expected a fetch expression, found {typename(query)}')
+ # check root expression
+ self._parse_fetch_expression(root_type, query)
+ # all tests passed
+ return True
+
+ def _parse_fetch_expression(self, type_: bsc.Vertex, node: ast.fetch.FetchExpression):
+ """Route *node* to the handler of the respective FetchExpression subclass."""
+ if isinstance(node, (ast.fetch.Fetch, ast.fetch.Value, ast.fetch.Node)):
+ # NOTE: don't return so that checks below are executed
+ self._branch(type_, node)
+ if isinstance(node, (ast.fetch.Value, ast.fetch.Node)):
+ # NOTE: don't return so that checks below are executed
+ self._named(type_, node)
+ if isinstance(node, ast.fetch.All):
+ return self._all(type_, node)
+ if isinstance(node, ast.fetch.Fetch):
+ return self._fetch(type_, node)
+ if isinstance(node, ast.fetch.Value):
+ return self._value(type_, node)
+ if isinstance(node, ast.fetch.Node):
+ return self._node(type_, node)
+ if isinstance(node, ast.fetch.This):
+ return self._this(type_, node)
+ # invalid node
+ raise errors.BackendError(f'expected fetch expression, found {node}')
+
+ def _all(self, type_: bsc.Vertex, node: ast.fetch.All):
+ # check child expressions
+ for expr in node:
+ self._parse_fetch_expression(type_, expr)
+
+ def _branch(self, type_: bsc.Vertex, node: ast.fetch._Branch):
+ # type is a node
+ if not isinstance(type_, bsc.Node):
+ raise errors.ConsistencyError(f'expected a Node, found {type_}')
+ # node exists in the schema
+ if type_ not in self.schema.nodes():
+ raise errors.ConsistencyError(f'node {type_} is not in the schema')
+ # predicate exists in the schema
+ if not self.schema.has_predicate(node.predicate):
+ raise errors.ConsistencyError(f'predicate {node.predicate} is not in the schema')
+ pred = self.schema.predicate(node.predicate)
+ # type_ must be a subclass of domain
+ if not type_ <= pred.domain:
+ raise errors.ConsistencyError(
+ f'expected type {pred.domain} or subtype thereof, found {type_}')
+
+ def _fetch(self, type_: bsc.Vertex, node: ast.fetch.Fetch): # pylint: disable=unused-argument # type_ was considered in _branch
+ # range must be a node
+ rng = self.schema.predicate(node.predicate).range
+ if not isinstance(rng, bsc.Node):
+ raise errors.ConsistencyError(
+ f'expected the predicate\'s range to be a Node, found {rng}')
+ # child expression must be valid
+ self._parse_fetch_expression(rng, node.expr)
+
+ def _named(self, type_: bsc.Vertex, node: ast.fetch._Named): # pylint: disable=unused-argument # type_ was considered in _branch
+ # name must be set
+ if node.name.strip() == '':
+ raise errors.BackendError('node name cannot be empty')
+ # FIXME: check for double name use?
+
+ def _node(self, type_: bsc.Vertex, node: ast.fetch.Node): # pylint: disable=unused-argument # type_ was considered in _branch
+ # range must be a node
+ rng = self.schema.predicate(node.predicate).range
+ if not isinstance(rng, bsc.Node):
+ raise errors.ConsistencyError(
+ f'expected the predicate\'s range to be a Node, found {rng}')
+
+ def _value(self, type_: bsc.Vertex, node: ast.fetch.Value): # pylint: disable=unused-argument # type_ was considered in _branch
+ # range must be a literal
+ rng = self.schema.predicate(node.predicate).range
+ if not isinstance(rng, bsc.Literal):
+ raise errors.ConsistencyError(
+ f'expected the predicate\'s range to be a Literal, found {rng}')
+
+ def _this(self, type_: bsc.Vertex, node: ast.fetch.This):
+ # type is a node
+ if not isinstance(type_, bsc.Node):
+ raise errors.ConsistencyError(f'expected a Node, found {type_}')
+ # node exists in the schema
+ if type_ not in self.schema.nodes():
+ raise errors.ConsistencyError(f'node {type_} is not in the schema')
+ # name must be set
+ if node.name.strip() == '':
+ raise errors.BackendError('node name cannot be empty')
+
+## EOF ##
diff --git a/bsfs/schema/__init__.py b/bsfs/schema/__init__.py
index ad4d456..ca2e0cd 100644
--- a/bsfs/schema/__init__.py
+++ b/bsfs/schema/__init__.py
@@ -1,15 +1,15 @@
-"""
-Part of the BlackStar filesystem (bsfs) module.
-A copy of the license is provided with the project.
-Author: Matthias Baumgartner, 2022
-"""
# imports
import typing
# inner-module imports
from .schema import Schema
-from .types import Literal, Node, Predicate
+from .serialize import from_string, to_string
+from .types import Literal, Node, Predicate, Vertex, Feature, \
+ ROOT_VERTEX, ROOT_NODE, ROOT_LITERAL, \
+ ROOT_NUMBER, ROOT_TIME, \
+ ROOT_ARRAY, ROOT_FEATURE, \
+ ROOT_PREDICATE
# exports
__all__: typing.Sequence[str] = (
@@ -17,6 +17,8 @@ __all__: typing.Sequence[str] = (
'Node',
'Predicate',
'Schema',
+ 'from_string',
+ 'to_string',
)
## EOF ##
diff --git a/bsfs/schema/schema.py b/bsfs/schema/schema.py
index c5d4571..c104436 100644
--- a/bsfs/schema/schema.py
+++ b/bsfs/schema/schema.py
@@ -1,16 +1,9 @@
-"""
-Part of the BlackStar filesystem (bsfs) module.
-A copy of the license is provided with the project.
-Author: Matthias Baumgartner, 2022
-"""
# imports
from collections import abc, namedtuple
import typing
-import rdflib
# bsfs imports
-from bsfs.namespace import ns
from bsfs.utils import errors, URI, typename
# inner-module imports
@@ -51,11 +44,13 @@ class Schema():
def __init__(
self,
- predicates: typing.Iterable[types.Predicate],
+ predicates: typing.Optional[typing.Iterable[types.Predicate]] = None,
nodes: typing.Optional[typing.Iterable[types.Node]] = None,
literals: typing.Optional[typing.Iterable[types.Literal]] = None,
):
# materialize arguments
+ if predicates is None:
+ predicates = set()
if nodes is None:
nodes = set()
if literals is None:
@@ -63,24 +58,41 @@ class Schema():
nodes = set(nodes)
literals = set(literals)
predicates = set(predicates)
+
+ # add root types to the schema
+ nodes.add(types.ROOT_NODE)
+ literals.add(types.ROOT_LITERAL)
+ predicates.add(types.ROOT_PREDICATE)
+ # add minimally necessary types to the schema
+ literals.add(types.ROOT_BLOB)
+ literals.add(types.ROOT_NUMBER)
+ literals.add(types.ROOT_TIME)
+ literals.add(types.ROOT_ARRAY)
+ literals.add(types.ROOT_FEATURE)
+
+ # FIXME: ensure that types derive from the right root?
+
# include parents in predicates set
# TODO: review type annotations and ignores for python >= 3.11 (parents is _Type but should be typing.Self)
predicates |= {par for pred in predicates for par in pred.parents()} # type: ignore [misc]
# include predicate domain in nodes set
nodes |= {pred.domain for pred in predicates}
# include predicate range in nodes and literals sets
- prange = {pred.range for pred in predicates if pred.range is not None}
+ prange = {pred.range for pred in predicates}
nodes |= {vert for vert in prange if isinstance(vert, types.Node)}
literals |= {vert for vert in prange if isinstance(vert, types.Literal)}
+ # NOTE: ROOT_PREDICATE has a Vertex as range which is neither in nodes nor literals
+ # FIXME: with the ROOT_VERTEX missing, the schema is not complete anymore!
+
# include parents in nodes and literals sets
- # NOTE: Must be done after predicate domain/range was handled
- # so that their parents are included as well.
+ # NOTE: Must come after predicate domain/range was handled to have their parents as well.
nodes |= {par for node in nodes for par in node.parents()} # type: ignore [misc]
literals |= {par for lit in literals for par in lit.parents()} # type: ignore [misc]
# assign members
self._nodes = {node.uri: node for node in nodes}
self._literals = {lit.uri: lit for lit in literals}
self._predicates = {pred.uri: pred for pred in predicates}
+
# verify unique uris
if len(nodes) != len(self._nodes):
raise errors.ConsistencyError('inconsistent nodes')
@@ -214,6 +226,7 @@ class Schema():
>>> Schema.Union([a, b, c])
"""
+ # FIXME: copy type annotations?
if len(args) == 0:
raise TypeError('Schema.Union requires at least one argument (Schema or Iterable)')
if isinstance(args[0], cls): # args is sequence of Schema instances
@@ -295,92 +308,8 @@ class Schema():
"""Return the Literal matching the *uri*."""
return self._literals[uri]
-
- ## constructors ##
-
-
- @classmethod
- def Empty(cls) -> 'Schema': # pylint: disable=invalid-name # capitalized classmethod
- """Return a minimal Schema."""
- node = types.Node(ns.bsfs.Node, None)
- literal = types.Literal(ns.bsfs.Literal, None)
- predicate = types.Predicate(
- uri=ns.bsfs.Predicate,
- parent=None,
- domain=node,
- range=None,
- unique=False,
- )
- return cls((predicate, ), (node, ), (literal, ))
-
-
- @classmethod
- def from_string(cls, schema: str) -> 'Schema': # pylint: disable=invalid-name # capitalized classmethod
- """Load and return a Schema from a string."""
- # parse string into rdf graph
- graph = rdflib.Graph()
- graph.parse(data=schema, format='turtle')
-
- def _fetch_hierarchically(factory, curr):
- # emit current node
- yield curr
- # walk through childs
- for child in graph.subjects(rdflib.URIRef(ns.rdfs.subClassOf), rdflib.URIRef(curr.uri)):
- # convert to URI
- child = URI(child)
- # check circular dependency
- if child == curr.uri or child in {node.uri for node in curr.parents()}:
- raise errors.ConsistencyError('circular dependency')
- # recurse and emit (sub*)childs
- yield from _fetch_hierarchically(factory, factory(child, curr))
-
- # fetch nodes
- nodes = set(_fetch_hierarchically(types.Node, types.Node(ns.bsfs.Node, None)))
- nodes_lut = {node.uri: node for node in nodes}
- if len(nodes_lut) != len(nodes):
- raise errors.ConsistencyError('inconsistent nodes')
-
- # fetch literals
- literals = set(_fetch_hierarchically(types.Literal, types.Literal(ns.bsfs.Literal, None)))
- literals_lut = {lit.uri: lit for lit in literals}
- if len(literals_lut) != len(literals):
- raise errors.ConsistencyError('inconsistent literals')
-
- # fetch predicates
- def build_predicate(uri, parent):
- uri = rdflib.URIRef(uri)
- # get domain
- domains = set(graph.objects(uri, rdflib.RDFS.domain))
- if len(domains) != 1:
- raise errors.ConsistencyError(f'inconsistent domain: {domains}')
- dom = nodes_lut.get(next(iter(domains)))
- if dom is None:
- raise errors.ConsistencyError('missing domain')
- # get range
- ranges = set(graph.objects(uri, rdflib.RDFS.range))
- if len(ranges) != 1:
- raise errors.ConsistencyError(f'inconsistent range: {ranges}')
- rng = next(iter(ranges))
- rng = nodes_lut.get(rng, literals_lut.get(rng))
- if rng is None:
- raise errors.ConsistencyError('missing range')
- # get unique flag
- uniques = set(graph.objects(uri, rdflib.URIRef(ns.bsfs.unique)))
- if len(uniques) != 1:
- raise errors.ConsistencyError(f'inconsistent unique flags: {uniques}')
- unique = bool(next(iter(uniques)))
- # build Predicate
- return types.Predicate(URI(uri), parent, dom, rng, unique)
-
- root_predicate = types.Predicate(
- uri=ns.bsfs.Predicate,
- parent=None,
- domain=nodes_lut[ns.bsfs.Node],
- range=None, # FIXME: Unclear how to handle this! Can be either a Literal or a Node
- unique=False,
- )
- predicates = _fetch_hierarchically(build_predicate, root_predicate)
- # return Schema
- return cls(predicates, nodes, literals)
+ def predicates_at(self, node: types.Node) -> typing.Iterator[types.Predicate]:
+ """Return predicates that have domain *node* (or superclass thereof)."""
+ return iter(pred for pred in self._predicates.values() if node <= pred.domain)
## EOF ##
diff --git a/bsfs/schema/serialize.py b/bsfs/schema/serialize.py
new file mode 100644
index 0000000..ea8b2f4
--- /dev/null
+++ b/bsfs/schema/serialize.py
@@ -0,0 +1,255 @@
+
+# standard imports
+import itertools
+import typing
+
+# external imports
+import rdflib
+
+# bsfs imports
+from bsfs.namespace import ns
+from bsfs.utils import errors, URI, typename
+
+# inner-module imports
+from . import types
+from . import schema
+
+# exports
+__all__: typing.Sequence[str] = (
+ 'to_string',
+ 'from_string',
+ )
+
+
+## code ##
+
+def from_string(schema_str: str) -> schema.Schema:
+ """Load and return a Schema from a string."""
+ # parse string into rdf graph
+ graph = rdflib.Graph()
+ graph.parse(data=schema_str, format='turtle')
+
+ # helper functions
+ def _fetch_value(
+ subject: URI,
+ predicate: rdflib.URIRef,
+ value_factory: typing.Callable[[typing.Any], typing.Any],
+ ) -> typing.Optional[typing.Any]:
+ """Fetch the object of a given subject and predicate.
+ Raises a `errors.ConsistencyError` if multiple objects match.
+ """
+ values = list(graph.objects(rdflib.URIRef(subject), predicate))
+ if len(values) == 0:
+ return None
+ if len(values) == 1:
+ return value_factory(values[0])
+ raise errors.ConsistencyError(
+ f'{subject} has multiple values for predicate {str(predicate)}, expected zero or one')
+
+ def _convert(value):
+ """Convert the subject type from rdflib to a bsfs native type."""
+ if isinstance(value, rdflib.Literal):
+ return value.value
+ if isinstance(value, rdflib.URIRef):
+ return URI(value)
+ # value is neither a node nor a literal, but e.g. a blank node
+ raise errors.BackendError(f'expected Literal or URIRef, found {typename(value)}')
+
+ def _fetch_hierarchically(factory, curr):
+ """Walk through a rdfs:subClassOf hierarchy, creating symbols along the way."""
+ # emit current node
+ yield curr
+ # walk through childs
+ for child in graph.subjects(rdflib.URIRef(ns.rdfs.subClassOf), rdflib.URIRef(curr.uri)):
+ # fetch annotations
+ annotations = {
+ URI(pred): _convert(value)
+ for pred, value # FIXME: preserve datatype of value?!
+ in graph.predicate_objects(child)
+ if URI(pred) != ns.rdfs.subClassOf
+ }
+ # convert child to URI
+ child = URI(child)
+ # check circular dependency
+ if child == curr.uri or child in {node.uri for node in curr.parents()}:
+ raise errors.ConsistencyError('circular dependency')
+ # recurse and emit (sub*)childs
+ yield from _fetch_hierarchically(factory, factory(child, curr, **annotations))
+
+ # fetch nodes
+ nodes = set(_fetch_hierarchically(types.Node, types.ROOT_NODE))
+ nodes_lut = {node.uri: node for node in nodes}
+ if len(nodes_lut) != len(nodes):
+ raise errors.ConsistencyError('inconsistent nodes')
+
+ # fetch literals
+ def _build_literal(uri, parent, **annotations):
+ """Literal factory."""
+ # break out on root feature type
+ if uri == types.ROOT_FEATURE.uri:
+ return types.ROOT_FEATURE
+ # handle feature types
+ if isinstance(parent, types.Feature):
+ # clean annotations
+ annotations.pop(ns.bsfs.dimension, None)
+ annotations.pop(ns.bsfs.dtype, None)
+ annotations.pop(ns.bsfs.distance, None)
+ # get dimension
+ dimension = _fetch_value(uri, rdflib.URIRef(ns.bsfs.dimension), int)
+ # get dtype
+ dtype = _fetch_value(uri, rdflib.URIRef(ns.bsfs.dtype), URI)
+ # get distance
+ distance = _fetch_value(uri, rdflib.URIRef(ns.bsfs.distance), URI)
+ # return feature
+ return parent.child(URI(uri), dtype=dtype, dimension=dimension, distance=distance, **annotations)
+ # handle non-feature types
+ return parent.child(URI(uri), **annotations)
+
+ literals = set(_fetch_hierarchically(_build_literal, types.ROOT_LITERAL))
+ literals_lut = {lit.uri: lit for lit in literals}
+ if len(literals_lut) != len(literals):
+ raise errors.ConsistencyError('inconsistent literals')
+
+ # fetch predicates
+ def _build_predicate(uri, parent, **annotations):
+ """Predicate factory."""
+ # clean annotations
+ annotations.pop(ns.rdfs.domain, None)
+ annotations.pop(ns.rdfs.range, None)
+ annotations.pop(ns.bsfs.unique, None)
+ # get domain
+ dom = _fetch_value(uri, rdflib.RDFS.domain, URI)
+ if dom is not None and dom not in nodes_lut:
+ raise errors.ConsistencyError(f'predicate {uri} has undefined domain {dom}')
+ if dom is not None:
+ dom = nodes_lut[dom]
+ # get range
+ rng = _fetch_value(uri, rdflib.RDFS.range, URI)
+ if rng is not None and rng not in nodes_lut and rng not in literals_lut:
+ raise errors.ConsistencyError(f'predicate {uri} has undefined range {rng}')
+ if rng is not None:
+ rng = nodes_lut.get(rng, literals_lut.get(rng))
+ # get unique
+ unique = _fetch_value(uri, rdflib.URIRef(ns.bsfs.unique), bool)
+ # build predicate
+ return parent.child(URI(uri), domain=dom, range=rng, unique=unique, **annotations)
+
+ predicates = _fetch_hierarchically(_build_predicate, types.ROOT_PREDICATE)
+
+ return schema.Schema(predicates, nodes, literals)
+
+
+
+def to_string(schema_inst: schema.Schema, fmt: str = 'turtle') -> str:
+ """Serialize a `bsfs.schema.Schema` to a string.
+ See `rdflib.Graph.serialize` for viable formats (default: turtle).
+ """
+
+ # type of emitted triples.
+ T_TRIPLE = typing.Iterator[typing.Tuple[rdflib.URIRef, rdflib.URIRef, rdflib.term.Identifier]]
+
+ def _type(tpe: types._Type) -> T_TRIPLE :
+ """Emit _Type properties (parent, annotations)."""
+ # emit parent
+ if tpe.parent is not None:
+ yield (
+ rdflib.URIRef(tpe.uri),
+ rdflib.URIRef(ns.rdfs.subClassOf),
+ rdflib.URIRef(tpe.parent.uri),
+ )
+ # emit annotations
+ for prop, value in tpe.annotations.items():
+ yield (
+ rdflib.URIRef(tpe.uri),
+ rdflib.URIRef(prop),
+ rdflib.Literal(value), # FIXME: datatype?!
+ )
+
+ def _predicate(pred: types.Predicate) -> T_TRIPLE:
+ """Emit Predicate properties (domain, range, unique)."""
+ # no need to emit anything for the root predicate
+ if pred == types.ROOT_PREDICATE:
+ return
+ # emit domain
+ if pred.domain != getattr(pred.parent, 'domain', None):
+ yield (
+ rdflib.URIRef(pred.uri),
+ rdflib.URIRef(ns.rdfs.domain),
+ rdflib.URIRef(pred.domain.uri),
+ )
+ # emit range
+ if pred.range != getattr(pred.parent, 'range', None):
+ yield (
+ rdflib.URIRef(pred.uri),
+ rdflib.URIRef(ns.rdfs.range),
+ rdflib.URIRef(pred.range.uri),
+ )
+ # emit cardinality
+ if pred.unique != getattr(pred.parent, 'unique', None):
+ yield (
+ rdflib.URIRef(pred.uri),
+ rdflib.URIRef(ns.bsfs.unique),
+ rdflib.Literal(pred.unique, datatype=rdflib.XSD.boolean),
+ )
+
+ def _feature(feat: types.Feature) -> T_TRIPLE:
+ """Emit Feature properties (dimension, dtype, distance)."""
+ # emit size
+ if feat.dimension != getattr(feat.parent, 'dimension', None):
+ yield (
+ rdflib.URIRef(feat.uri),
+ rdflib.URIRef(ns.bsfs.dimension),
+ rdflib.Literal(feat.dimension, datatype=rdflib.XSD.integer),
+ )
+ # emit dtype
+ if feat.dtype != getattr(feat.parent, 'dtype', None):
+ yield (
+ rdflib.URIRef(feat.uri),
+ rdflib.URIRef(ns.bsfs.dtype),
+ rdflib.URIRef(feat.dtype),
+ )
+ # emit distance
+ if feat.distance != getattr(feat.parent, 'distance', None):
+ yield (
+ rdflib.URIRef(feat.uri),
+ rdflib.URIRef(ns.bsfs.distance),
+ rdflib.URIRef(feat.distance),
+ )
+
+ def _parse(node: types._Type) -> T_TRIPLE:
+ """Emit all properties of a type."""
+ # check arg
+ if not isinstance(node, types._Type): # pylint: disable=protected-access
+ raise TypeError(node)
+ # emit _Type essentials
+ yield from _type(node)
+ # emit properties of derived types
+ if isinstance(node, types.Predicate):
+ yield from _predicate(node)
+ if isinstance(node, types.Feature):
+ yield from _feature(node)
+
+ # create graph
+ graph = rdflib.Graph()
+ # add triples to graph
+ nodes = itertools.chain(
+ schema_inst.nodes(),
+ schema_inst.literals(),
+ schema_inst.predicates())
+ for node in nodes:
+ for triple in _parse(node):
+ graph.add(triple)
+ # add known namespaces for readability
+ # FIXME: more generically?
+ graph.bind('bsfs', rdflib.URIRef(ns.bsfs + '/'))
+ graph.bind('bsl', rdflib.URIRef(ns.bsl + '/'))
+ graph.bind('bsn', rdflib.URIRef(ns.bsn + '#'))
+ graph.bind('bse', rdflib.URIRef(ns.bsfs.Entity() + '#'))
+ graph.bind('rdf', rdflib.URIRef(ns.rdf))
+ graph.bind('rdfs', rdflib.URIRef(ns.rdfs))
+ graph.bind('schema', rdflib.URIRef(ns.schema))
+ graph.bind('xsd', rdflib.URIRef(ns.xsd))
+ # serialize to turtle
+ return graph.serialize(format=fmt)
+
+## EOF ##
diff --git a/bsfs/schema/types.py b/bsfs/schema/types.py
index 54a7e99..5834df8 100644
--- a/bsfs/schema/types.py
+++ b/bsfs/schema/types.py
@@ -1,13 +1,9 @@
-"""
-Part of the BlackStar filesystem (bsfs) module.
-A copy of the license is provided with the project.
-Author: Matthias Baumgartner, 2022
-"""
# imports
import typing
# bsfs imports
+from bsfs.namespace import ns
from bsfs.utils import errors, URI, typename
# exports
@@ -15,6 +11,7 @@ __all__: typing.Sequence[str] = (
'Literal',
'Node',
'Predicate',
+ 'Feature',
)
@@ -99,9 +96,11 @@ class _Type():
self,
uri: URI,
parent: typing.Optional['_Type'] = None,
+ **annotations: typing.Any,
):
- self.uri = uri
+ self.uri = URI(uri)
self.parent = parent
+ self.annotations = annotations
def parents(self) -> typing.Generator['_Type', None, None]:
"""Generate a list of parent nodes."""
@@ -110,9 +109,17 @@ class _Type():
yield curr
curr = curr.parent
- def get_child(self, uri: URI, **kwargs):
+ def child(
+ self,
+ uri: URI,
+ **kwargs,
+ ):
"""Return a child of the current class."""
- return type(self)(uri, self, **kwargs)
+ return type(self)(
+ uri=uri,
+ parent=self,
+ **kwargs
+ )
def __str__(self) -> str:
return f'{typename(self)}({self.uri})'
@@ -138,8 +145,10 @@ class _Type():
def __lt__(self, other: typing.Any) -> bool:
"""Return True iff *self* is a true subclass of *other*."""
- if not type(self) is type(other): # type mismatch # pylint: disable=unidiomatic-typecheck
+ if not isinstance(other, _Type):
return NotImplemented
+ if not isinstance(other, type(self)): # FIXME: necessary?
+ return False
if self.uri == other.uri: # equivalence
return False
if self in other.parents(): # superclass
@@ -151,8 +160,10 @@ class _Type():
def __le__(self, other: typing.Any) -> bool:
"""Return True iff *self* is equivalent or a subclass of *other*."""
- if not type(self) is type(other): # type mismatch # pylint: disable=unidiomatic-typecheck
+ if not isinstance(other, _Type):
return NotImplemented
+ if not isinstance(other, type(self)): # FIXME: necessary?
+ return False
if self.uri == other.uri: # equivalence
return True
if self in other.parents(): # superclass
@@ -164,8 +175,10 @@ class _Type():
def __gt__(self, other: typing.Any) -> bool:
"""Return True iff *self* is a true superclass of *other*."""
- if not type(self) is type(other): # type mismatch # pylint: disable=unidiomatic-typecheck
+ if not isinstance(other, _Type):
return NotImplemented
+ if not isinstance(other, type(self)): # FIXME: necessary?
+ return False
if self.uri == other.uri: # equivalence
return False
if self in other.parents(): # superclass
@@ -177,8 +190,10 @@ class _Type():
def __ge__(self, other: typing.Any) -> bool:
"""Return True iff *self* is eqiuvalent or a superclass of *other*."""
- if not type(self) is type(other): # type mismatch # pylint: disable=unidiomatic-typecheck
+ if not isinstance(other, _Type):
return NotImplemented
+ if not isinstance(other, type(self)): # FIXME: necessary?
+ return False
if self.uri == other.uri: # equivalence
return True
if self in other.parents(): # superclass
@@ -189,32 +204,95 @@ class _Type():
return False
-class _Vertex(_Type):
+class Vertex(_Type):
"""Graph vertex types. Can be a Node or a Literal."""
- def __init__(self, uri: URI, parent: typing.Optional['_Vertex']):
- super().__init__(uri, parent)
+ parent: typing.Optional['Vertex']
+ def __init__(self, uri: URI, parent: typing.Optional['Vertex'], **kwargs):
+ super().__init__(uri, parent, **kwargs)
-class Node(_Vertex):
+class Node(Vertex):
"""Node type."""
- def __init__(self, uri: URI, parent: typing.Optional['Node']):
- super().__init__(uri, parent)
+ parent: typing.Optional['Node']
+ def __init__(self, uri: URI, parent: typing.Optional['Node'], **kwargs):
+ super().__init__(uri, parent, **kwargs)
-class Literal(_Vertex):
+class Literal(Vertex):
"""Literal type."""
- def __init__(self, uri: URI, parent: typing.Optional['Literal']):
- super().__init__(uri, parent)
+ parent: typing.Optional['Literal']
+ def __init__(self, uri: URI, parent: typing.Optional['Literal'], **kwargs):
+ super().__init__(uri, parent, **kwargs)
+
+
+class Feature(Literal):
+ """Feature type."""
+
+ # Number of feature vector dimensions.
+ dimension: int
+
+ # Feature vector datatype.
+ dtype: URI
+
+ # Distance measure to compare feature vectors.
+ distance: URI
+
+ def __init__(
+ self,
+ # Type members
+ uri: URI,
+ parent: typing.Optional[Literal],
+ # Feature members
+ dimension: int,
+ dtype: URI,
+ distance: URI,
+ **kwargs,
+ ):
+ super().__init__(uri, parent, **kwargs)
+ self.dimension = int(dimension)
+ self.dtype = URI(dtype)
+ self.distance = URI(distance)
+
+ def __hash__(self) -> int:
+ return hash((super().__hash__(), self.dimension, self.dtype, self.distance))
+
+ def __eq__(self, other: typing.Any) -> bool:
+ return super().__eq__(other) \
+ and self.dimension == other.dimension \
+ and self.dtype == other.dtype \
+ and self.distance == other.distance
+ def child(
+ self,
+ uri: URI,
+ dimension: typing.Optional[int] = None,
+ dtype: typing.Optional[URI] = None,
+ distance: typing.Optional[URI] = None,
+ **kwargs,
+ ):
+ """Return a child of the current class."""
+ if dimension is None:
+ dimension = self.dimension
+ if dtype is None:
+ dtype = self.dtype
+ if distance is None:
+ distance = self.distance
+ return super().child(
+ uri=uri,
+ dimension=dimension,
+ dtype=dtype,
+ distance=distance,
+ **kwargs,
+ )
class Predicate(_Type):
- """Predicate type."""
+ """Predicate base type."""
# source type.
domain: Node
# destination type.
- range: typing.Optional[typing.Union[Node, Literal]]
+ range: Vertex
# maximum cardinality of type.
unique: bool
@@ -226,22 +304,23 @@ class Predicate(_Type):
parent: typing.Optional['Predicate'],
# Predicate members
domain: Node,
- range: typing.Optional[typing.Union[Node, Literal]], # pylint: disable=redefined-builtin
+ range: Vertex, # pylint: disable=redefined-builtin
unique: bool,
+ **kwargs,
):
# check arguments
if not isinstance(domain, Node):
raise TypeError(domain)
- if range is not None and not isinstance(range, Node) and not isinstance(range, Literal):
+ if range != ROOT_VERTEX and not isinstance(range, (Node, Literal)):
raise TypeError(range)
# initialize
- super().__init__(uri, parent)
+ super().__init__(uri, parent, **kwargs)
self.domain = domain
self.range = range
- self.unique = unique
+ self.unique = bool(unique)
def __hash__(self) -> int:
- return hash((super().__hash__(), self.domain, self.range, self.unique))
+ return hash((super().__hash__(), self.domain, self.unique, self.range))
def __eq__(self, other: typing.Any) -> bool:
return super().__eq__(other) \
@@ -249,11 +328,11 @@ class Predicate(_Type):
and self.range == other.range \
and self.unique == other.unique
- def get_child(
+ def child(
self,
uri: URI,
domain: typing.Optional[Node] = None,
- range: typing.Optional[_Vertex] = None, # pylint: disable=redefined-builtin
+ range: typing.Optional[Vertex] = None, # pylint: disable=redefined-builtin
unique: typing.Optional[bool] = None,
**kwargs,
):
@@ -264,13 +343,73 @@ class Predicate(_Type):
raise errors.ConsistencyError(f'{domain} must be a subclass of {self.domain}')
if range is None:
range = self.range
- if range is None: # inherited range from ns.bsfs.Predicate
- raise ValueError('range must be defined by the parent or argument')
- if self.range is not None and not range <= self.range:
+ # NOTE: The root predicate has a Vertex as range, which is neither a parent of the root
+ # Node nor Literal. Hence, that test is skipped since a child should be allowed to
+ # specialize from Vertex to anything.
+ if self.range != ROOT_VERTEX and not range <= self.range:
raise errors.ConsistencyError(f'{range} must be a subclass of {self.range}')
if unique is None:
unique = self.unique
- return super().get_child(uri, domain=domain, range=range, unique=unique, **kwargs)
+ return super().child(
+ uri=uri,
+ domain=domain,
+ range=range,
+ unique=unique,
+ **kwargs
+ )
+
+
+# essential vertices
+ROOT_VERTEX = Vertex(
+ uri=ns.bsfs.Vertex,
+ parent=None,
+ )
+
+ROOT_NODE = Node(
+ uri=ns.bsfs.Node,
+ parent=None,
+ )
+
+ROOT_LITERAL = Literal(
+ uri=ns.bsfs.Literal,
+ parent=None,
+ )
+
+ROOT_BLOB = Literal(
+ uri=ns.bsl.BinaryBlob,
+ parent=ROOT_LITERAL,
+ )
+
+ROOT_NUMBER = Literal(
+ uri=ns.bsl.Number,
+ parent=ROOT_LITERAL,
+ )
+
+ROOT_TIME = Literal(
+ uri=ns.bsl.Time,
+ parent=ROOT_LITERAL,
+ )
+
+ROOT_ARRAY = Literal(
+ uri=ns.bsl.Array,
+ parent=ROOT_LITERAL,
+ )
+ROOT_FEATURE = Feature(
+ uri=ns.bsl.Array.Feature,
+ parent=ROOT_ARRAY,
+ dimension=1,
+ dtype=ns.bsfs.dtype().f16,
+ distance=ns.bsd.euclidean,
+ )
+
+# essential predicates
+ROOT_PREDICATE = Predicate(
+ uri=ns.bsfs.Predicate,
+ parent=None,
+ domain=ROOT_NODE,
+ range=ROOT_VERTEX,
+ unique=False,
+ )
## EOF ##
diff --git a/bsfs/triple_store/__init__.py b/bsfs/triple_store/__init__.py
index fb5a8a9..79a2887 100644
--- a/bsfs/triple_store/__init__.py
+++ b/bsfs/triple_store/__init__.py
@@ -1,9 +1,4 @@
-"""
-Part of the BlackStar filesystem (bsfs) module.
-A copy of the license is provided with the project.
-Author: Matthias Baumgartner, 2022
-"""
# imports
import typing
diff --git a/bsfs/triple_store/base.py b/bsfs/triple_store/base.py
index 6561262..58b5670 100644
--- a/bsfs/triple_store/base.py
+++ b/bsfs/triple_store/base.py
@@ -1,16 +1,12 @@
-"""
-Part of the BlackStar filesystem (bsfs) module.
-A copy of the license is provided with the project.
-Author: Matthias Baumgartner, 2022
-"""
# imports
import abc
import typing
# inner-module imports
+from bsfs.query import ast
from bsfs.utils import URI, typename
-import bsfs.schema as _schema
+import bsfs.schema as bsc
# exports
__all__: typing.Sequence[str] = (
@@ -81,12 +77,12 @@ class TripleStoreBase(abc.ABC):
@property
@abc.abstractmethod
- def schema(self) -> _schema.Schema:
+ def schema(self) -> bsc.Schema:
"""Return the store's local schema."""
@schema.setter
@abc.abstractmethod
- def schema(self, schema: _schema.Schema):
+ def schema(self, schema: bsc.Schema):
"""Migrate to new schema by adding or removing class definitions.
Commits before and after the migration.
@@ -109,9 +105,30 @@ class TripleStoreBase(abc.ABC):
"""
@abc.abstractmethod
+ def get(
+ self,
+ node_type: bsc.Node,
+ filter: typing.Optional[ast.filter.FilterExpression] = None, # pylint: disable=redefined-builtin
+ ) -> typing.Iterator[URI]:
+ """Return guids of nodes of type *node_type* that match the *filter*.
+ Return all guids of the respective type if *filter* is None.
+ """
+
+ @abc.abstractmethod
+ def fetch(
+ self,
+ node_type: bsc.Node,
+ filter: ast.filter.FilterExpression, # pylint: disable=redefined-builtin
+ fetch: ast.fetch.FetchExpression,
+ ) -> typing.Iterator[typing.Tuple[URI, str, typing.Any]]:
+ """Return (guid, name, value) triples where the guid is determined by the *filter*
+ query and the name matches the *fetch* query.
+ """
+
+ @abc.abstractmethod
def exists(
self,
- node_type: _schema.Node,
+ node_type: bsc.Node,
guids: typing.Iterable[URI],
) -> typing.Iterable[URI]:
"""Return those *guids* that exist and have type *node_type* or a subclass thereof."""
@@ -119,7 +136,7 @@ class TripleStoreBase(abc.ABC):
@abc.abstractmethod
def create(
self,
- node_type: _schema.Node,
+ node_type: bsc.Node,
guids: typing.Iterable[URI],
):
"""Create *guid* nodes with type *subject*."""
@@ -127,9 +144,9 @@ class TripleStoreBase(abc.ABC):
@abc.abstractmethod
def set(
self,
- node_type: _schema.Node, # FIXME: is the node_type even needed? Couldn't I infer from the predicate?
+ node_type: bsc.Node, # FIXME: is the node_type even needed? Couldn't I infer from the predicate?
guids: typing.Iterable[URI],
- predicate: _schema.Predicate,
+ predicate: bsc.Predicate,
values: typing.Iterable[typing.Any],
):
"""Add triples to the graph.
diff --git a/bsfs/triple_store/sparql/__init__.py b/bsfs/triple_store/sparql/__init__.py
new file mode 100644
index 0000000..cfa2732
--- /dev/null
+++ b/bsfs/triple_store/sparql/__init__.py
@@ -0,0 +1,13 @@
+
+# imports
+import typing
+
+# inner-module imports
+from .sparql import SparqlStore
+
+# exports
+__all__: typing.Sequence[str] = (
+ 'SparqlStore',
+ )
+
+## EOF ##
diff --git a/bsfs/triple_store/sparql/distance.py b/bsfs/triple_store/sparql/distance.py
new file mode 100644
index 0000000..2c2f355
--- /dev/null
+++ b/bsfs/triple_store/sparql/distance.py
@@ -0,0 +1,51 @@
+
+# standard imports
+import typing
+
+# external imports
+import numpy as np
+
+# bsfs imports
+from bsfs.namespace import ns
+
+# constants
+EPS = 1e-9
+
+# exports
+__all__: typing.Sequence[str] = (
+ 'DISTANCE_FU',
+ )
+
+
+## code ##
+
+def euclid(fst, snd) -> float:
+ """Euclidean distance (l2 norm)."""
+ fst = np.array(fst)
+ snd = np.array(snd)
+ return float(np.linalg.norm(fst - snd))
+
+def cosine(fst, snd) -> float:
+ """Cosine distance."""
+ fst = np.array(fst)
+ snd = np.array(snd)
+ if (fst == snd).all():
+ return 0.0
+ nrm0 = np.linalg.norm(fst)
+ nrm1 = np.linalg.norm(snd)
+ return float(1.0 - np.dot(fst, snd) / (nrm0 * nrm1 + EPS))
+
+def manhatten(fst, snd) -> float:
+ """Manhatten (cityblock) distance (l1 norm)."""
+ fst = np.array(fst)
+ snd = np.array(snd)
+ return float(np.abs(fst - snd).sum())
+
+# Known distance functions.
+DISTANCE_FU = {
+ ns.bsd.euclidean: euclid,
+ ns.bsd.cosine: cosine,
+ ns.bsd.manhatten: manhatten,
+}
+
+## EOF ##
diff --git a/bsfs/triple_store/sparql/parse_fetch.py b/bsfs/triple_store/sparql/parse_fetch.py
new file mode 100644
index 0000000..fab8173
--- /dev/null
+++ b/bsfs/triple_store/sparql/parse_fetch.py
@@ -0,0 +1,104 @@
+
+# standard imports
+import typing
+
+# bsfs imports
+from bsfs import schema as bsc
+from bsfs.query import ast
+from bsfs.utils import errors
+
+# inner-module imports
+from .utils import GenHopName, Query
+
+# exports
+__all__: typing.Sequence[str] = (
+ 'Fetch',
+ )
+
+
+## code ##
+
+class Fetch():
+ """Translate `bsfs.query.ast.fetch` structures into Sparql queries."""
+
+ def __init__(self, schema):
+ self.schema = schema
+ self.ngen = GenHopName(prefix='?fch')
+
+ def __call__(
+ self,
+ root_type: bsc.Node,
+ root: ast.fetch.FetchExpression,
+ ) -> Query:
+ """
+ """
+ # check root_type
+ if not isinstance(root_type, bsc.Node):
+ raise errors.BackendError(f'expected Node, found {root_type}')
+ if root_type not in self.schema.nodes():
+ raise errors.ConsistencyError(f'node {root_type} is not in the schema')
+ # parse root
+ terms, expr = self._parse_fetch_expression(root_type, root, '?ent')
+ # assemble query
+ return Query(
+ root_type=root_type.uri,
+ root_head='?ent',
+ select=terms,
+ where=expr,
+ )
+
+ def _parse_fetch_expression(
+ self,
+ node_type: bsc.Vertex,
+ node: ast.fetch.FetchExpression,
+ head: str,
+ ):
+ """Route *node* to the handler of the respective FetchExpression subclass."""
+ if isinstance(node, ast.fetch.All):
+ return self._all(node_type, node, head)
+ if isinstance(node, ast.fetch.Fetch):
+ return self._fetch(node_type, node, head)
+ if isinstance(node, ast.fetch.Node):
+ return self._node(node_type, node, head)
+ if isinstance(node, ast.fetch.Value):
+ return self._value(node_type, node, head)
+ if isinstance(node, ast.fetch.This):
+ return self._this(node_type, node, head)
+ # invalid node
+ raise errors.BackendError(f'expected fetch expression, found {node}')
+
+ def _all(self, node_type: bsc.Vertex, node: ast.fetch.All, head: str):
+ # child expressions
+ terms, exprs = zip(*[self._parse_fetch_expression(node_type, expr, head) for expr in node])
+ terms = {term for sub in terms for term in sub}
+ exprs = ' .\n'.join({expr for expr in exprs if len(expr.strip()) > 0})
+ return terms, exprs
+
+ def _fetch(self, node_type: bsc.Vertex, node: ast.fetch.Fetch, head: str): # pylint: disable=unused-argument # (node_type)
+ # child expressions
+ rng = self.schema.predicate(node.predicate).range
+ nexthead = next(self.ngen)
+ terms, expr = self._parse_fetch_expression(rng, node.expr, nexthead)
+ return terms, f'OPTIONAL{{ {head} <{node.predicate}> {nexthead} .\n {expr} }}'
+
+ def _node(self, node_type: bsc.Vertex, node: ast.fetch.Node, head: str): # pylint: disable=unused-argument # (node_type)
+ if f'?{node.name}'.startswith(self.ngen.prefix):
+ raise errors.BackendError(f'Node name must start with {self.ngen.prefix}')
+ # compose and return statement
+ term = next(self.ngen)
+ return {(term, node.name)}, f'OPTIONAL{{ {head} <{node.predicate}> {term} }}'
+
+ def _value(self, node_type: bsc.Vertex, node: ast.fetch.Value, head: str): # pylint: disable=unused-argument # (node_type)
+ if f'?{node.name}'.startswith(self.ngen.prefix):
+ raise errors.BackendError(f'Value name must start with {self.ngen.prefix}')
+ # compose and return statement
+ term = next(self.ngen)
+ return {(term, node.name)}, f'OPTIONAL{{ {head} <{node.predicate}> {term} }}'
+
+ def _this(self, node_type: bsc.Vertex, node: ast.fetch.This, head: str): # pylint: disable=unused-argument # (node_type)
+ if f'?{node.name}'.startswith(self.ngen.prefix):
+ raise errors.BackendError(f'This name must start with {self.ngen.prefix}')
+ # compose and return statement
+ return {(head, node.name)}, ''
+
+## EOF ##
diff --git a/bsfs/triple_store/sparql/parse_filter.py b/bsfs/triple_store/sparql/parse_filter.py
new file mode 100644
index 0000000..2f5a25b
--- /dev/null
+++ b/bsfs/triple_store/sparql/parse_filter.py
@@ -0,0 +1,316 @@
+
+# imports
+import operator
+import typing
+
+# external imports
+import rdflib
+
+# bsfs imports
+from bsfs import schema as bsc
+from bsfs.namespace import ns
+from bsfs.query import ast
+from bsfs.utils import URI, errors
+
+# inner-module imports
+from .distance import DISTANCE_FU
+from .utils import GenHopName, Query
+
+# exports
+__all__: typing.Sequence[str] = (
+ 'Filter',
+ )
+
+
+## code ##
+
+class Filter():
+ """Translate `bsfs.query.ast.filter` structures into Sparql queries."""
+
+ # Current schema to validate against.
+ schema: bsc.Schema
+
+ # Generator that produces unique symbol names.
+ ngen: GenHopName
+
+ def __init__(self, graph, schema):
+ self.graph = graph
+ self.schema = schema
+ self.ngen = GenHopName(prefix='?flt')
+
+ def __call__(
+ self,
+ root_type: bsc.Node,
+ root: typing.Optional[ast.filter.FilterExpression] = None,
+ ) -> Query:
+ """
+ """
+ # check root_type
+ if not isinstance(root_type, bsc.Node):
+ raise errors.BackendError(f'expected Node, found {root_type}')
+ if root_type not in self.schema.nodes():
+ raise errors.ConsistencyError(f'node {root_type} is not in the schema')
+ # parse root
+ if root is None:
+ cond = ''
+ else:
+ cond = self._parse_filter_expression(root_type, root, '?ent')
+ # assemble query
+ return Query(
+ root_type=root_type.uri,
+ root_head='?ent',
+ where=cond,
+ )
+
+ def _parse_filter_expression(
+ self,
+ type_: bsc.Vertex,
+ node: ast.filter.FilterExpression,
+ head: str,
+ ) -> str:
+ """Route *node* to the handler of the respective FilterExpression subclass."""
+ if isinstance(node, ast.filter.Is):
+ return self._is(type_, node, head)
+ if isinstance(node, ast.filter.Not):
+ return self._not(type_, node, head)
+ if isinstance(node, ast.filter.Has):
+ return self._has(type_, node, head)
+ if isinstance(node, ast.filter.Distance):
+ return self._distance(type_, node, head)
+ if isinstance(node, ast.filter.Any):
+ return self._any(type_, node, head)
+ if isinstance(node, ast.filter.All):
+ return self._all(type_, node, head)
+ if isinstance(node, ast.filter.And):
+ return self._and(type_, node, head)
+ if isinstance(node, ast.filter.Or):
+ return self._or(type_, node, head)
+ if isinstance(node, ast.filter.Equals):
+ return self._equals(type_, node, head)
+ if isinstance(node, ast.filter.Substring):
+ return self._substring(type_, node, head)
+ if isinstance(node, ast.filter.StartsWith):
+ return self._starts_with(type_, node, head)
+ if isinstance(node, ast.filter.EndsWith):
+ return self._ends_with(type_, node, head)
+ if isinstance(node, ast.filter.LessThan):
+ return self._less_than(type_, node, head)
+ if isinstance(node, ast.filter.GreaterThan):
+ return self._greater_than(type_, node, head)
+ # invalid node
+ raise errors.BackendError(f'expected filter expression, found {node}')
+
+ def _parse_predicate_expression(
+ self,
+ type_: bsc.Vertex,
+ node: ast.filter.PredicateExpression
+ ) -> typing.Tuple[str, bsc.Vertex]:
+ """Route *node* to the handler of the respective PredicateExpression subclass."""
+ if isinstance(node, ast.filter.Predicate):
+ return self._predicate(type_, node)
+ if isinstance(node, ast.filter.OneOf):
+ return self._one_of(type_, node)
+ # invalid node
+ raise errors.BackendError(f'expected predicate expression, found {node}')
+
+ def _one_of(self, node_type: bsc.Vertex, node: ast.filter.OneOf) -> typing.Tuple[str, bsc.Vertex]:
+ """
+ """
+ if not isinstance(node_type, bsc.Node):
+ raise errors.BackendError(f'expected Node, found {node_type}')
+ # walk through predicates
+ suburi, rng = set(), None
+ for pred in node: # OneOf guarantees at least one expression
+ puri, subrng = self._parse_predicate_expression(node_type, pred)
+ # track predicate uris
+ suburi.add(puri)
+ # check for more generic range
+ if rng is None or subrng > rng:
+ rng = subrng
+ # check range consistency
+ if not subrng <= rng and not subrng >= rng:
+ raise errors.ConsistencyError(f'ranges {subrng} and {rng} are not related')
+ # return joint predicate expression and next range
+ # OneOf guarantees at least one expression, rng is always a bsc.Vertex.
+ # mypy does not realize this, hence we ignore the warning.
+ return '|'.join(suburi), rng # type: ignore [return-value]
+
+ def _predicate(self, node_type: bsc.Vertex, node: ast.filter.Predicate) -> typing.Tuple[str, bsc.Vertex]:
+ """
+ """
+ # check node_type
+ if not isinstance(node_type, bsc.Node):
+ raise errors.BackendError(f'expected Node, found {node_type}')
+ # fetch predicate and its uri
+ puri = node.predicate
+ # get and check predicate, domain, and range
+ if not self.schema.has_predicate(puri):
+ raise errors.ConsistencyError(f'predicate {puri} is not in the schema')
+ pred = self.schema.predicate(puri)
+ if not isinstance(pred.range, (bsc.Node, bsc.Literal)):
+ raise errors.BackendError(f'the range of predicate {pred} is undefined')
+ dom, rng = pred.domain, pred.range
+ # encapsulate predicate uri
+ uri_str = f'<{puri}>'
+ # apply reverse flag
+ if node.reverse:
+ uri_str = '^' + uri_str
+ dom, rng = rng, dom # type: ignore [assignment] # variable re-use confuses mypy
+ # check path consistency
+ if not node_type <= dom:
+ raise errors.ConsistencyError(f'expected type {dom} or subtype thereof, found {node_type}')
+ # return predicate URI and next node type
+ return uri_str, rng
+
+ def _any(self, node_type: bsc.Vertex, node: ast.filter.Any, head: str) -> str:
+ """
+ """
+ if not isinstance(node_type, bsc.Node):
+ raise errors.BackendError(f'expected Node, found {node_type}')
+ # parse predicate
+ pred, next_type = self._parse_predicate_expression(node_type, node.predicate)
+ # parse expression
+ nexthead = next(self.ngen)
+ expr = self._parse_filter_expression(next_type, node.expr, nexthead)
+ # combine results
+ return f'{head} {pred} {nexthead} . {expr}'
+
+ def _all(self, node_type: bsc.Vertex, node: ast.filter.All, head: str) -> str:
+ """
+ """
+ # NOTE: All(P, E) := Not(Any(P, Not(E))) and EXISTS(P, ?)
+ if not isinstance(node_type, bsc.Node):
+ raise errors.BackendError(f'expected Node, found {node_type}')
+ # parse rewritten ast
+ expr = self._parse_filter_expression(node_type,
+ ast.filter.Not(
+ ast.filter.Any(node.predicate,
+ ast.filter.Not(node.expr))), head)
+ # parse predicate for existence constraint
+ pred, _ = self._parse_predicate_expression(node_type, node.predicate)
+ temphead = next(self.ngen)
+ # return existence and rewritten expression
+ return f'FILTER EXISTS {{ {head} {pred} {temphead} }} . ' + expr
+
+ def _and(self, node_type: bsc.Vertex, node: ast.filter.And, head: str) -> str:
+ """
+ """
+ sub = [self._parse_filter_expression(node_type, expr, head) for expr in node]
+ return ' . '.join(sub)
+
+ def _or(self, node_type: bsc.Vertex, node: ast.filter.Or, head: str) -> str:
+ """
+ """
+ # potential special case optimization:
+ # * ast: Or(Equals('foo'), Equals('bar'), ...)
+ # * query: VALUES ?head { "value1"^^<...> "value2"^^<...> "value3"^<...> ... }
+ sub = [self._parse_filter_expression(node_type, expr, head) for expr in node]
+ sub = ['{' + expr + '}' for expr in sub]
+ return ' UNION '.join(sub)
+
+ def _not(self, node_type: bsc.Vertex, node: ast.filter.Not, head: str) -> str:
+ """
+ """
+ expr = self._parse_filter_expression(node_type, node.expr, head)
+ if isinstance(node_type, bsc.Literal):
+ return f'MINUS {{ {expr} }}'
+ # NOTE: for bsc.Node types, we must include at least one expression in the body of MINUS,
+ # otherwise the connection between the context and body of MINUS is lost.
+ # The simplest (and non-interfering) choice is a type statement.
+ return f'MINUS {{ {head} <{ns.rdf.type}>/<{ns.rdfs.subClassOf}>* <{node_type.uri}> . {expr} }}'
+
+ def _has(self, node_type: bsc.Vertex, node: ast.filter.Has, head: str) -> str:
+ """
+ """
+ if not isinstance(node_type, bsc.Node):
+ raise errors.BackendError(f'expected Node, found {node_type}')
+ # parse predicate
+ pred, _ = self._parse_predicate_expression(node_type, node.predicate)
+ # get new heads
+ inner = next(self.ngen)
+ outer = next(self.ngen)
+ # predicate count expression (fetch number of predicates at *head*)
+ num_preds = f'{{ SELECT (COUNT(distinct {inner}) as {outer}) WHERE {{ {head} {pred} {inner} }} }}'
+ # count expression
+ count_bounds = self._parse_filter_expression(self.schema.literal(ns.xsd.integer), node.count, outer)
+ # combine
+ return num_preds + ' . ' + count_bounds
+
+ def _distance(self, node_type: bsc.Vertex, node: ast.filter.Distance, head: str) -> str:
+ """
+ """
+ if not isinstance(node_type, bsc.Feature):
+ raise errors.BackendError(f'expected Feature, found {node_type}')
+ if len(node.reference) != node_type.dimension:
+ raise errors.ConsistencyError(
+ f'reference has dimension {len(node.reference)}, expected {node_type.dimension}')
+ # get distance metric
+ dist = DISTANCE_FU[node_type.distance]
+ # get operator
+ cmp = operator.lt if node.strict else operator.le
+ # get candidate values
+ candidates = {
+ f'"{cand}"^^<{node_type.uri}>'
+ for cand
+ in self.graph.objects()
+ if isinstance(cand, rdflib.Literal)
+ and cand.datatype == rdflib.URIRef(node_type.uri)
+ and cmp(dist(cand.value, node.reference), node.threshold)
+ }
+ # combine candidate values
+ values = ' '.join(candidates) if len(candidates) else f'"impossible value"^^<{ns.xsd.string}>'
+ # return sparql fragment
+ return f'VALUES {head} {{ {values} }}'
+
+ def _is(self, node_type: bsc.Vertex, node: ast.filter.Is, head: str) -> str:
+ """
+ """
+ if not isinstance(node_type, bsc.Node):
+ raise errors.BackendError(f'expected Node, found {node_type}')
+ return f'VALUES {head} {{ <{URI(node.value)}> }}'
+
+ def _equals(self, node_type: bsc.Vertex, node: ast.filter.Equals, head: str) -> str:
+ """
+ """
+ if not isinstance(node_type, bsc.Literal):
+ raise errors.BackendError(f'expected Literal, found {node}')
+ return f'VALUES {head} {{ "{node.value}"^^<{node_type.uri}> }}'
+
+ def _substring(self, node_type: bsc.Vertex, node: ast.filter.Substring, head: str) -> str:
+ """
+ """
+ if not isinstance(node_type, bsc.Literal):
+ raise errors.BackendError(f'expected Literal, found {node_type}')
+ return f'FILTER contains(str({head}), "{node.value}")'
+
+ def _starts_with(self, node_type: bsc.Vertex, node: ast.filter.StartsWith, head: str) -> str:
+ """
+ """
+ if not isinstance(node_type, bsc.Literal):
+ raise errors.BackendError(f'expected Literal, found {node_type}')
+ return f'FILTER strstarts(str({head}), "{node.value}")'
+
+ def _ends_with(self, node_type: bsc.Vertex, node: ast.filter.EndsWith, head: str) -> str:
+ """
+ """
+ if not isinstance(node_type, bsc.Literal):
+ raise errors.BackendError(f'expected Literal, found {node_type}')
+ return f'FILTER strends(str({head}), "{node.value}")'
+
+ def _less_than(self, node_type: bsc.Vertex, node: ast.filter.LessThan, head: str) -> str:
+ """
+ """
+ if not isinstance(node_type, bsc.Literal):
+ raise errors.BackendError(f'expected Literal, found {node_type}')
+ equality = '=' if not node.strict else ''
+ return f'FILTER ({head} <{equality} {float(node.threshold)})'
+
+ def _greater_than(self, node_type: bsc.Vertex, node: ast.filter.GreaterThan, head: str) -> str:
+ """
+ """
+ if not isinstance(node_type, bsc.Literal):
+ raise errors.BackendError(f'expected Literal, found {node_type}')
+ equality = '=' if not node.strict else ''
+ return f'FILTER ({head} >{equality} {float(node.threshold)})'
+
+## EOF ##
diff --git a/bsfs/triple_store/sparql.py b/bsfs/triple_store/sparql/sparql.py
index 7516dff..99e67d6 100644
--- a/bsfs/triple_store/sparql.py
+++ b/bsfs/triple_store/sparql/sparql.py
@@ -1,20 +1,23 @@
-"""
-Part of the BlackStar filesystem (bsfs) module.
-A copy of the license is provided with the project.
-Author: Matthias Baumgartner, 2022
-"""
# imports
+import base64
import itertools
import typing
+
+# external imports
import rdflib
# bsfs imports
from bsfs import schema as bsc
+from bsfs.namespace import ns
+from bsfs.query import ast
from bsfs.utils import errors, URI
# inner-module imports
-from . import base
+from . import parse_fetch
+from . import parse_filter
+from .. import base
+from .distance import DISTANCE_FU
# exports
@@ -25,6 +28,8 @@ __all__: typing.Sequence[str] = (
## code ##
+rdflib.term.bind(ns.bsl.BinaryBlob, bytes, constructor=base64.b64decode)
+
class _Transaction():
"""Lightweight rdflib transactions for in-memory databases."""
@@ -85,11 +90,19 @@ class SparqlStore(base.TripleStoreBase):
# The local schema.
_schema: bsc.Schema
+ # Filter parser
+ _filter_parser: parse_filter.Filter
+
+ # Fetch parser
+ _fetch_parser: parse_fetch.Fetch
+
def __init__(self):
super().__init__(None)
self._graph = rdflib.Graph()
self._transaction = _Transaction(self._graph)
- self._schema = bsc.Schema.Empty()
+ self._schema = bsc.Schema(literals={bsc.ROOT_NUMBER.child(ns.xsd.integer)})
+ self._filter_parser = parse_filter.Filter(self._graph, self._schema)
+ self._fetch_parser = parse_fetch.Fetch(self._schema)
# NOTE: mypy and pylint complain about the **kwargs not being listed (contrasting super)
# However, not having it here is clearer since it's explicit that there are no arguments.
@@ -115,6 +128,16 @@ class SparqlStore(base.TripleStoreBase):
# check compatibility: No contradicting definitions
if not self.schema.consistent_with(schema):
raise errors.ConsistencyError(f'{schema} is inconsistent with {self.schema}')
+ # check distance functions of features
+ invalid = {
+ (cand.uri, cand.distance)
+ for cand
+ in schema.literals()
+ if isinstance(cand, bsc.Feature) and cand.distance not in DISTANCE_FU}
+ if len(invalid) > 0:
+ cand, dist = zip(*invalid)
+ raise errors.UnsupportedError(
+ f'unknown distance function {",".join(dist)} in feature {", ".join(cand)}')
# commit the current transaction
self.commit()
@@ -126,10 +149,17 @@ class SparqlStore(base.TripleStoreBase):
# get deleted classes
sub = self.schema - schema
- # remove predicate instances
for pred in sub.predicates:
+ # remove predicate instances
for src, trg in self._graph.subject_objects(rdflib.URIRef(pred.uri)):
self._transaction.remove((src, rdflib.URIRef(pred.uri), trg))
+ # remove predicate definition
+ if pred.parent is not None: # NOTE: there shouldn't be any predicate w/o parent
+ self._transaction.remove((
+ rdflib.URIRef(pred.uri),
+ rdflib.RDFS.subClassOf,
+ rdflib.URIRef(pred.parent.uri),
+ ))
# remove node instances
for node in sub.nodes:
@@ -143,15 +173,82 @@ class SparqlStore(base.TripleStoreBase):
self._transaction.remove((inst, pred, trg))
# remove instance
self._transaction.remove((inst, rdflib.RDF.type, rdflib.URIRef(node.uri)))
-
- # NOTE: Nothing to do for literals
+ # remove node definition
+ if node.parent is not None: # NOTE: there shouldn't be any node w/o parent
+ self._transaction.remove((
+ rdflib.URIRef(node.uri),
+ rdflib.RDFS.subClassOf,
+ rdflib.URIRef(node.parent.uri),
+ ))
+
+ for lit in sub.literals:
+ # remove literal definition
+ if lit.parent is not None: # NOTE: there shouldn't be any literal w/o parent
+ self._transaction.remove((
+ rdflib.URIRef(lit.uri),
+ rdflib.RDFS.subClassOf,
+ rdflib.URIRef(lit.parent.uri),
+ ))
+
+ # add predicate, node, and literal hierarchies to the graph
+ for itm in itertools.chain(schema.predicates(), schema.nodes(), schema.literals()):
+ if itm.parent is not None:
+ self._transaction.add((rdflib.URIRef(itm.uri), rdflib.RDFS.subClassOf, rdflib.URIRef(itm.parent.uri)))
# commit instance changes
self.commit()
# migrate schema
self._schema = schema
+ self._filter_parser.schema = schema
+ self._fetch_parser.schema = schema
+ def fetch(
+ self,
+ node_type: bsc.Node,
+ filter: ast.filter.FilterExpression, # pylint: disable=redefined-builtin
+ fetch: ast.fetch.FetchExpression,
+ ) -> typing.Iterator[typing.Tuple[URI, str, typing.Any]]:
+ if node_type not in self.schema.nodes():
+ raise errors.ConsistencyError(f'{node_type} is not defined in the schema')
+ if not isinstance(filter, ast.filter.FilterExpression):
+ raise TypeError(filter)
+ if not isinstance(fetch, ast.fetch.FetchExpression):
+ raise TypeError(fetch)
+ # compose a query from fetch and filter ast
+ query = self._filter_parser(node_type, filter)
+ query += self._fetch_parser(node_type, fetch)
+ # run query
+ emitted = set()
+ for result in query(self._graph):
+ guid = URI(result[0])
+ for name, raw in zip(query.names, result[1:]):
+ if raw is None: # undefined value
+ continue
+ if isinstance(raw, rdflib.Literal):
+ value = raw.value
+ else:
+ value = URI(raw)
+ # emit triple
+ triple = (guid, name, value)
+ if triple not in emitted: # FIXME: needs a better solution!
+ emitted.add(triple)
+ yield guid, name, value
+
+ def get(
+ self,
+ node_type: bsc.Node,
+ filter: typing.Optional[ast.filter.FilterExpression] = None, # pylint: disable=redefined-builtin
+ ) -> typing.Iterator[URI]:
+ if node_type not in self.schema.nodes():
+ raise errors.ConsistencyError(f'{node_type} is not defined in the schema')
+ if filter is not None and not isinstance(filter, ast.filter.FilterExpression):
+ raise TypeError(filter)
+ # compose query
+ query = self._filter_parser(node_type, filter)
+ # run query
+ for guid, in query(self._graph):
+ yield URI(guid)
def _has_type(self, subject: URI, node_type: bsc.Node) -> bool:
"""Return True if *subject* is a node of class *node_type* or a subclass thereof."""
@@ -187,7 +284,7 @@ class SparqlStore(base.TripleStoreBase):
raise errors.ConsistencyError(f'{node_type} is not defined in the schema')
# check and create guids
for guid in guids:
- subject = rdflib.URIRef(guid)
+ subject = rdflib.URIRef(URI(guid))
# check node existence
if (subject, rdflib.RDF.type, None) in self._graph:
# FIXME: node exists and may have a different type! ignore? raise? report?
@@ -226,7 +323,7 @@ class SparqlStore(base.TripleStoreBase):
raise errors.InstanceError(inconsistent)
# check guids
# FIXME: Fail or skip inexistent nodes?
- guids = set(guids)
+ guids = {URI(guid) for guid in guids}
inconsistent = {guid for guid in guids if not self._has_type(guid, node_type)}
if len(inconsistent) > 0:
raise errors.InstanceError(inconsistent)
@@ -237,7 +334,11 @@ class SparqlStore(base.TripleStoreBase):
guid = rdflib.URIRef(guid)
# convert value
if isinstance(predicate.range, bsc.Literal):
- value = rdflib.Literal(value, datatype=rdflib.URIRef(predicate.range.uri))
+ dtype = rdflib.URIRef(predicate.range.uri)
+ if predicate.range <= self.schema.literal(ns.bsl.BinaryBlob):
+ dtype = rdflib.URIRef(ns.bsl.BinaryBlob)
+ value = base64.b64encode(value)
+ value = rdflib.Literal(value, datatype=dtype)
elif isinstance(predicate.range, bsc.Node):
value = rdflib.URIRef(value)
else:
diff --git a/bsfs/triple_store/sparql/utils.py b/bsfs/triple_store/sparql/utils.py
new file mode 100644
index 0000000..38062c2
--- /dev/null
+++ b/bsfs/triple_store/sparql/utils.py
@@ -0,0 +1,137 @@
+
+# standard imports
+import typing
+
+# external imports
+import rdflib
+
+# bsfs imports
+from bsfs.namespace import ns
+from bsfs.utils import typename
+
+# exports
+__all__: typing.Sequence[str] = (
+ 'GenHopName',
+ 'Query',
+ )
+
+
+## code ##
+
+class GenHopName():
+ """Generator that produces a new unique symbol name with each iteration."""
+
+ # Symbol name prefix.
+ prefix: str
+
+ # Current counter.
+ curr: int
+
+ def __init__(self, prefix: str = '?hop', start: int = 0):
+ self.prefix = prefix
+ self.curr = start - 1
+
+ def __next__(self):
+ """Generate and return the next unique name."""
+ self.curr += 1
+ return self.prefix + str(self.curr)
+
+
+class Query():
+ """Hold, manage, and complete partial Sparql queries."""
+
+ # root node type URI.
+ root_type: str
+
+ # root node variable name.
+ root_head: str
+
+ # (head, name) tuples (w/o root)
+ select: typing.Tuple[typing.Tuple[str, str], ...]
+
+ # where statements.
+ where: str
+
+ def __init__(
+ self,
+ root_type: str,
+ root_head: str = '?ent',
+ select: typing.Optional[typing.Iterable[typing.Tuple[str, str]]] = None,
+ where: typing.Optional[str] = None,
+ ):
+ # check arguments
+ if select is None:
+ select = []
+ if where is None:
+ where = ''
+ # set members
+ self.root_type = root_type
+ self.root_head = root_head
+ self.select = tuple(select) # tuple ensures presistent order
+ self.where = where.strip()
+
+ def __str__(self) -> str:
+ return self.query
+
+ def __repr__(self) -> str:
+ return f'{typename(self)}({self.root_type}, {self.root_head}, {self.select}, {self.where})'
+
+ def __eq__(self, other: typing.Any) -> bool:
+ return isinstance(other, type(self)) \
+ and self.root_type == other.root_type \
+ and self.root_head == other.root_head \
+ and self.select == other.select \
+ and self.where == other.where
+
+ def __hash__(self) -> int:
+ return hash((type(self), self.root_type, self.root_head, self.select, self.where))
+
+ def __add__(self, other: typing.Any) -> 'Query':
+ # check other's type
+ if not isinstance(other, type(self)):
+ return NotImplemented
+ # check query compatibility
+ if not self.root_type == other.root_type:
+ raise ValueError(other)
+ if not self.root_head == other.root_head:
+ raise ValueError(other)
+ # combine selections
+ select = self.select + other.select
+ # combine conditions
+ conds = []
+ if self.where != '':
+ conds.append(self.where)
+ if other.where != '':
+ conds.append(other.where)
+ where = ' . '.join(conds)
+ # return new query
+ return Query(
+ root_type=self.root_type,
+ root_head=self.root_head,
+ select=select,
+ where=where,
+ )
+
+ @property
+ def names(self) -> typing.Tuple[str, ...]:
+ """Return a tuple of selected variable names, excluding the root."""
+ return tuple(name for _, name in self.select)
+
+ @property
+ def query(self) -> str:
+ """Return an executable sparql query."""
+ select = ' '.join(f'({head} as ?{name})' for head, name in self.select)
+ return f'''
+ SELECT DISTINCT {self.root_head} {select}
+ WHERE {{
+ {self.root_head} <{ns.rdf.type}>/<{ns.rdfs.subClassOf}>* <{self.root_type}> .
+ {self.where}
+ }}
+ ORDER BY str({self.root_head})
+ '''
+
+ def __call__(self, graph: rdflib.Graph) -> rdflib.query.Result:
+ """Execute the query on a *graph* and return the query result."""
+ return graph.query(self.query)
+
+## EOF ##
diff --git a/bsfs/utils/__init__.py b/bsfs/utils/__init__.py
index 94680ee..d497645 100644
--- a/bsfs/utils/__init__.py
+++ b/bsfs/utils/__init__.py
@@ -1,15 +1,10 @@
-"""
-Part of the BlackStar filesystem (bsfs) module.
-A copy of the license is provided with the project.
-Author: Matthias Baumgartner, 2022
-"""
# imports
import typing
# inner-module imports
from . import errors
-from .commons import typename
+from .commons import typename, normalize_args
from .uri import URI
from .uuid import UUID, UCID
@@ -19,6 +14,7 @@ __all__ : typing.Sequence[str] = (
'URI',
'UUID',
'errors',
+ 'normalize_args',
'typename',
)
diff --git a/bsfs/utils/commons.py b/bsfs/utils/commons.py
index bad2fe0..a7092ae 100644
--- a/bsfs/utils/commons.py
+++ b/bsfs/utils/commons.py
@@ -1,14 +1,11 @@
-"""
-Part of the BlackStar filesystem (bsfs) module.
-A copy of the license is provided with the project.
-Author: Matthias Baumgartner, 2022
-"""
# imports
+from collections import abc
import typing
# exports
__all__: typing.Sequence[str] = (
+ 'normalize_args',
'typename',
)
@@ -19,5 +16,37 @@ def typename(obj) -> str:
"""Return the type name of *obj*."""
return type(obj).__name__
+# argument type in `normalize_args`.
+ArgType = typing.TypeVar('ArgType') # pylint: disable=invalid-name # type vars don't follow the usual convention
+
+def normalize_args(
+ *args: typing.Union[ArgType, typing.Iterable[ArgType], typing.Iterator[ArgType]]
+ ) -> typing.Tuple[ArgType, ...]:
+ """Arguments to a function can be passed as individual arguments, list-like
+ structures, or iterables. This function processes any of these styles and
+ returns a tuple of the respective items. Typically used within a function
+ provide a flexible interface but sill have parameters in a normalized form.
+
+ Examples:
+
+ >>> normalize_args(0,1,2)
+ (1,2,3)
+ >>> normalize_args([0,1,2])
+ (1,2,3)
+ >>> normalize_args(range(3))
+ (1,2,3)
+
+ """
+ if len(args) == 0: # foo()
+ return tuple()
+ if len(args) > 1: # foo(0, 1, 2)
+ return tuple(args) # type: ignore [arg-type] # we assume that argument styles (arg vs. iterable) are not mixed.
+ if isinstance(args[0], abc.Iterator): # foo(iter([0,1,2]))
+ return tuple(args[0])
+ if isinstance(args[0], abc.Iterable) and not isinstance(args[0], str): # foo([0, 1, 2])
+ return tuple(args[0])
+ # foo(0)
+ return (args[0], ) # type: ignore [return-value] # if args[0] is a str, we assume that ArgType was str.
+
## EOF ##
diff --git a/bsfs/utils/errors.py b/bsfs/utils/errors.py
index c5e8e16..b82e6e2 100644
--- a/bsfs/utils/errors.py
+++ b/bsfs/utils/errors.py
@@ -1,9 +1,4 @@
-"""
-Part of the BlackStar filesystem (bsfs) module.
-A copy of the license is provided with the project.
-Author: Matthias Baumgartner, 2022
-"""
# imports
import typing
@@ -38,4 +33,10 @@ class UnreachableError(ProgrammingError):
class ConfigError(_BSFSError):
"""User config issue."""
+class BackendError(_BSFSError):
+ """Could not parse an AST structure."""
+
+class UnsupportedError(_BSFSError):
+ """Some requested functionality is not supported by an implementation."""
+
## EOF ##
diff --git a/bsfs/utils/uri.py b/bsfs/utils/uri.py
index 84854a4..5755a6e 100644
--- a/bsfs/utils/uri.py
+++ b/bsfs/utils/uri.py
@@ -1,14 +1,11 @@
-"""
-Part of the BlackStar filesystem (bsfs) module.
-A copy of the license is provided with the project.
-Author: Matthias Baumgartner, 2022
-"""
# imports
import re
import typing
# constants
+RX_CHARS = re.compile(r'[<>" {}|\\^]')
+
RX_URI = re.compile(r'''
^
(?:(?P<scheme>[^:/?#]+):)? # scheme, ://-delimited
@@ -82,6 +79,9 @@ class URI(str):
no claim about the validity of an URI!
"""
+ # check characters
+ if RX_CHARS.search(query) is not None:
+ return False
# check uri
parts = RX_URI.match(query)
if parts is not None:
@@ -232,9 +232,6 @@ class URI(str):
# overload formatting methods
- def format(self, *args, **kwargs) -> 'URI':
- return URI(super().format(*args, **kwargs))
-
def __mod__(self, *args) -> 'URI':
return URI(super().__mod__(*args))
diff --git a/bsfs/utils/uuid.py b/bsfs/utils/uuid.py
index 6366b18..ad7fc1c 100644
--- a/bsfs/utils/uuid.py
+++ b/bsfs/utils/uuid.py
@@ -1,12 +1,9 @@
-"""
-Part of the BlackStar filesystem (bsfs) module.
-A copy of the license is provided with the project.
-Author: Matthias Baumgartner, 2022
-"""
# imports
from collections import abc
import hashlib
+import io
+import json
import os
import platform
import random
@@ -105,4 +102,21 @@ class UCID():
with open(path, 'rb') as ifile:
return HASH(ifile.read()).hexdigest()
+ @staticmethod
+ def from_buffer(buffer: io.IOBase) -> str:
+ """Read the content from a buffer."""
+ if isinstance(buffer, io.TextIOBase):
+ return HASH(buffer.read().encode('utf-8', errors='ignore')).hexdigest()
+ return HASH(buffer.read()).hexdigest()
+
+ @staticmethod
+ def from_bytes(content: bytes) -> str:
+ """Get the content from as bytes."""
+ return HASH(content).hexdigest()
+
+ @staticmethod
+ def from_dict(content: dict) -> str:
+ """Get the content from a dict."""
+ return HASH(json.dumps(content).encode('ascii', 'ignore')).hexdigest()
+
## EOF ##