aboutsummaryrefslogtreecommitdiffstats
path: root/bsfs
diff options
context:
space:
mode:
authorMatthias Baumgartner <dev@igsor.net>2023-01-16 21:43:38 +0100
committerMatthias Baumgartner <dev@igsor.net>2023-01-16 21:43:38 +0100
commite12cd52ad267563c8046a593ad551b1dd089a702 (patch)
treed94cdf7ac540eb82630f78cbf564682b66007f51 /bsfs
parent7f5a2920ef311b2077300714d7700313077a0bf6 (diff)
parent3504609e1ba1f7f653fa79910474bebd3ec24d8a (diff)
downloadbsfs-e12cd52ad267563c8046a593ad551b1dd089a702.tar.gz
bsfs-e12cd52ad267563c8046a593ad551b1dd089a702.tar.bz2
bsfs-e12cd52ad267563c8046a593ad551b1dd089a702.zip
Merge branch 'mb/features' into develop
Diffstat (limited to 'bsfs')
-rw-r--r--bsfs/apps/migrate.py6
-rw-r--r--bsfs/graph/graph.py10
-rw-r--r--bsfs/graph/resolve.py47
-rw-r--r--bsfs/graph/schema.nt3
-rw-r--r--bsfs/namespace/namespace.py2
-rw-r--r--bsfs/query/ast/filter_.py59
-rw-r--r--bsfs/query/validator.py90
-rw-r--r--bsfs/schema/__init__.py9
-rw-r--r--bsfs/schema/schema.py117
-rw-r--r--bsfs/schema/serialize.py259
-rw-r--r--bsfs/schema/types.py195
-rw-r--r--bsfs/triple_store/sparql/distance.py56
-rw-r--r--bsfs/triple_store/sparql/parse_filter.py109
-rw-r--r--bsfs/triple_store/sparql/sparql.py23
-rw-r--r--bsfs/utils/errors.py3
-rw-r--r--bsfs/utils/uuid.py7
16 files changed, 739 insertions, 256 deletions
diff --git a/bsfs/apps/migrate.py b/bsfs/apps/migrate.py
index 91c1661..b9d019f 100644
--- a/bsfs/apps/migrate.py
+++ b/bsfs/apps/migrate.py
@@ -42,15 +42,15 @@ def main(argv):
graph = bsfs.Open(config)
# initialize schema
- schema = bsfs.schema.Schema.Empty()
+ schema = bsfs.schema.Schema()
if len(args.schema) == 0:
# assemble schema from standard input
- schema = schema + bsfs.schema.Schema.from_string(sys.stdin.read())
+ schema = schema + bsfs.schema.from_string(sys.stdin.read())
else:
# assemble schema from input files
for pth in args.schema:
with open(pth, mode='rt', encoding='UTF-8') as ifile:
- schema = schema + bsfs.schema.Schema.from_string(ifile.read())
+ schema = schema + bsfs.schema.from_string(ifile.read())
# migrate schema
graph.migrate(schema, not args.remove)
diff --git a/bsfs/graph/graph.py b/bsfs/graph/graph.py
index f030fed..2210755 100644
--- a/bsfs/graph/graph.py
+++ b/bsfs/graph/graph.py
@@ -10,7 +10,7 @@ import typing
# bsfs imports
from bsfs.query import ast, validate
-from bsfs.schema import Schema
+from bsfs import schema as bsc
from bsfs.triple_store import TripleStoreBase
from bsfs.utils import URI, typename
@@ -67,11 +67,11 @@ class Graph():
return f'{typename(self)}({str(self._backend)}, {self._user})'
@property
- def schema(self) -> Schema:
+ def schema(self) -> bsc.Schema:
"""Return the store's local schema."""
return self._backend.schema
- def migrate(self, schema: Schema, append: bool = True) -> 'Graph':
+ def migrate(self, schema: bsc.Schema, append: bool = True) -> 'Graph':
"""Migrate the current schema to a new *schema*.
Appends to the current schema by default; control this via *append*.
@@ -79,14 +79,14 @@ class Graph():
"""
# check args
- if not isinstance(schema, Schema):
+ if not isinstance(schema, bsc.Schema):
raise TypeError(schema)
# append to current schema
if append:
schema = schema + self._backend.schema
# add Graph schema requirements
with open(os.path.join(os.path.dirname(__file__), 'schema.nt'), mode='rt', encoding='UTF-8') as ifile:
- schema = schema + Schema.from_string(ifile.read())
+ schema = schema + bsc.from_string(ifile.read())
# migrate schema in backend
# FIXME: consult access controls!
self._backend.schema = schema
diff --git a/bsfs/graph/resolve.py b/bsfs/graph/resolve.py
index feb0855..00b778b 100644
--- a/bsfs/graph/resolve.py
+++ b/bsfs/graph/resolve.py
@@ -37,8 +37,6 @@ class Filter():
"""
- T_VERTEX = typing.Union[bsc.Node, bsc.Literal]
-
def __init__(self, schema):
self.schema = schema
@@ -47,7 +45,7 @@ class Filter():
def _parse_filter_expression(
self,
- type_: T_VERTEX,
+ type_: bsc.Vertex,
node: ast.filter.FilterExpression,
) -> ast.filter.FilterExpression:
"""Route *node* to the handler of the respective FilterExpression subclass."""
@@ -65,6 +63,8 @@ class Filter():
return self._and(type_, node)
if isinstance(node, ast.filter.Or):
return self._or(type_, node)
+ if isinstance(node, ast.filter.Distance):
+ return self._distance(type_, node)
if isinstance(node, (ast.filter.Equals, ast.filter.Substring, \
ast.filter.StartsWith, ast.filter.EndsWith)):
return self._value(type_, node)
@@ -73,7 +73,7 @@ class Filter():
# invalid node
raise errors.BackendError(f'expected filter expression, found {node}')
- def _parse_predicate_expression(self, node: ast.filter.PredicateExpression) -> T_VERTEX:
+ def _parse_predicate_expression(self, node: ast.filter.PredicateExpression) -> bsc.Vertex:
"""Route *node* to the handler of the respective PredicateExpression subclass."""
if isinstance(node, ast.filter.Predicate):
return self._predicate(node)
@@ -82,7 +82,7 @@ class Filter():
# invalid node
raise errors.BackendError(f'expected predicate expression, found {node}')
- def _predicate(self, node: ast.filter.Predicate) -> T_VERTEX:
+ def _predicate(self, node: ast.filter.Predicate) -> bsc.Vertex:
if not self.schema.has_predicate(node.predicate):
raise errors.ConsistencyError(f'predicate {node.predicate} is not in the schema')
pred = self.schema.predicate(node.predicate)
@@ -91,49 +91,52 @@ class Filter():
dom, rng = rng, dom
return rng
- def _one_of(self, node: ast.filter.OneOf) -> T_VERTEX:
+ def _one_of(self, node: ast.filter.OneOf) -> bsc.Vertex:
# determine domain and range types
rng = None
for pred in node:
# parse child expression
subrng = self._parse_predicate_expression(pred)
# determine the next type
- try:
- if rng is None or subrng > rng: # pick most generic range
- rng = subrng
- except TypeError as err:
- raise errors.ConsistencyError(f'ranges {subrng} and {rng} are not related') from err
- if rng is None:
- raise errors.UnreachableError()
+ if rng is None or subrng > rng: # pick most generic range
+ rng = subrng
+ # check range consistency
+ if not subrng <= rng and not subrng >= rng:
+ raise errors.ConsistencyError(f'ranges {subrng} and {rng} are not related')
+ if not isinstance(rng, (bsc.Node, bsc.Literal)):
+ raise errors.BackendError(f'the range of node {node} is undefined')
return rng
- def _any(self, type_: T_VERTEX, node: ast.filter.Any) -> ast.filter.Any: # pylint: disable=unused-argument
+ def _any(self, type_: bsc.Vertex, node: ast.filter.Any) -> ast.filter.Any: # pylint: disable=unused-argument
next_type = self._parse_predicate_expression(node.predicate)
return ast.filter.Any(node.predicate, self._parse_filter_expression(next_type, node.expr))
- def _all(self, type_: T_VERTEX, node: ast.filter.All) -> ast.filter.All: # pylint: disable=unused-argument
+ def _all(self, type_: bsc.Vertex, node: ast.filter.All) -> ast.filter.All: # pylint: disable=unused-argument
next_type = self._parse_predicate_expression(node.predicate)
return ast.filter.All(node.predicate, self._parse_filter_expression(next_type, node.expr))
- def _and(self, type_: T_VERTEX, node: ast.filter.And) -> ast.filter.And:
+ def _and(self, type_: bsc.Vertex, node: ast.filter.And) -> ast.filter.And:
return ast.filter.And({self._parse_filter_expression(type_, expr) for expr in node})
- def _or(self, type_: T_VERTEX, node: ast.filter.Or) -> ast.filter.Or:
+ def _or(self, type_: bsc.Vertex, node: ast.filter.Or) -> ast.filter.Or:
return ast.filter.Or({self._parse_filter_expression(type_, expr) for expr in node})
- def _not(self, type_: T_VERTEX, node: ast.filter.Not) -> ast.filter.Not:
+ def _not(self, type_: bsc.Vertex, node: ast.filter.Not) -> ast.filter.Not:
return ast.filter.Not(self._parse_filter_expression(type_, node.expr))
- def _has(self, type_: T_VERTEX, node: ast.filter.Has) -> ast.filter.Has: # pylint: disable=unused-argument
+ def _has(self, type_: bsc.Vertex, node: ast.filter.Has) -> ast.filter.Has: # pylint: disable=unused-argument
+ return node
+
+ def _distance(self, type_: bsc.Vertex, node: ast.filter.Distance): # pylint: disable=unused-argument
return node
- def _value(self, type_: T_VERTEX, node: ast.filter._Value) -> ast.filter._Value: # pylint: disable=unused-argument
+ def _value(self, type_: bsc.Vertex, node: ast.filter._Value) -> ast.filter._Value: # pylint: disable=unused-argument
return node
- def _bounded(self, type_: T_VERTEX, node: ast.filter._Bounded) -> ast.filter._Bounded: # pylint: disable=unused-argument
+ def _bounded(self, type_: bsc.Vertex, node: ast.filter._Bounded) -> ast.filter._Bounded: # pylint: disable=unused-argument
return node
- def _is(self, type_: T_VERTEX, node: ast.filter.Is) -> typing.Union[ast.filter.Or, ast.filter.Is]:
+ def _is(self, type_: bsc.Vertex, node: ast.filter.Is) -> typing.Union[ast.filter.Or, ast.filter.Is]:
# check if action is needed
if not isinstance(node.value, nodes.Nodes):
return node
diff --git a/bsfs/graph/schema.nt b/bsfs/graph/schema.nt
index 8612681..f619746 100644
--- a/bsfs/graph/schema.nt
+++ b/bsfs/graph/schema.nt
@@ -8,7 +8,8 @@ prefix bsfs: <http://bsfs.ai/schema/>
prefix bsm: <http://bsfs.ai/schema/Meta#>
# literals
-xsd:integer rdfs:subClassOf bsfs:Literal .
+bsfs:Number rdfs:subClassOf bsfs:Literal .
+xsd:integer rdfs:subClassOf bsfs:Number .
# predicates
bsm:t_created rdfs:subClassOf bsfs:Predicate ;
diff --git a/bsfs/namespace/namespace.py b/bsfs/namespace/namespace.py
index f652dcd..1d443c1 100644
--- a/bsfs/namespace/namespace.py
+++ b/bsfs/namespace/namespace.py
@@ -59,7 +59,7 @@ class Namespace():
return hash((type(self), self.prefix, self.fsep, self.psep))
def __str__(self) -> str:
- return f'{typename(self)}({self.prefix})'
+ return str(self.prefix)
def __repr__(self) -> str:
return f'{typename(self)}({self.prefix}, {self.fsep}, {self.psep})'
diff --git a/bsfs/query/ast/filter_.py b/bsfs/query/ast/filter_.py
index b129ded..2f0270c 100644
--- a/bsfs/query/ast/filter_.py
+++ b/bsfs/query/ast/filter_.py
@@ -252,8 +252,7 @@ class Has(FilterExpression):
class _Value(FilterExpression):
- """
- """
+ """Matches some value."""
# target value.
value: typing.Any
@@ -277,13 +276,13 @@ class Is(_Value):
class Equals(_Value):
"""Value matches exactly.
- NOTE: Value format must correspond to literal type; can be a string, a number, or a Node
+ NOTE: Value must correspond to literal type.
"""
class Substring(_Value):
"""Value matches a substring
- NOTE: value format must be a string
+ NOTE: value must be a string.
"""
@@ -295,9 +294,49 @@ class EndsWith(_Value):
"""Value ends with a given string."""
+class Distance(FilterExpression):
+ """Distance to a reference is (strictly) below a threshold. Assumes a Feature literal."""
+
+ # FIXME:
+ # (a) pass a node/predicate as anchor instead of a value.
+ # Then we don't need to materialize the reference.
+ # (b) pass a FilterExpression (_Bounded) instead of a threshold.
+ # Then, we could also query values greater than a threshold.
+
+ # reference value.
+ reference: typing.Any
+
+ # distance threshold.
+ threshold: float
+
+ # closed (True) or open (False) bound.
+ strict: bool
+
+ def __init__(
+ self,
+ reference: typing.Any,
+ threshold: float,
+ strict: bool = False,
+ ):
+ self.reference = reference
+ self.threshold = float(threshold)
+ self.strict = bool(strict)
+
+ def __repr__(self) -> str:
+ return f'{typename(self)}({self.reference}, {self.threshold}, {self.strict})'
+
+ def __hash__(self) -> int:
+ return hash((super().__hash__(), tuple(self.reference), self.threshold, self.strict))
+
+ def __eq__(self, other) -> bool:
+ return super().__eq__(other) \
+ and self.reference == other.reference \
+ and self.threshold == other.threshold \
+ and self.strict == other.strict
+
+
class _Bounded(FilterExpression):
- """
- """
+ """Value is bounded by a threshold. Assumes a Number literal."""
# bound.
threshold: float
@@ -327,15 +366,11 @@ class _Bounded(FilterExpression):
class LessThan(_Bounded):
- """Value is (strictly) smaller than threshold.
- NOTE: only on numerical literals
- """
+ """Value is (strictly) smaller than threshold. Assumes a Number literal."""
class GreaterThan(_Bounded):
- """Value is (strictly) larger than threshold
- NOTE: only on numerical literals
- """
+ """Value is (strictly) larger than threshold. Assumes a Number literal."""
class Predicate(PredicateExpression):
diff --git a/bsfs/query/validator.py b/bsfs/query/validator.py
index 352203a..904ac14 100644
--- a/bsfs/query/validator.py
+++ b/bsfs/query/validator.py
@@ -34,9 +34,6 @@ class Filter():
"""
- # vertex types
- T_VERTEX = typing.Union[bsc.Node, bsc.Literal] # FIXME: Shouldn't this be in the schema?
-
# schema to validate against.
schema: bsc.Schema
@@ -64,7 +61,7 @@ class Filter():
## routing methods
- def _parse_filter_expression(self, type_: T_VERTEX, node: ast.filter.FilterExpression):
+ def _parse_filter_expression(self, type_: bsc.Vertex, node: ast.filter.FilterExpression):
"""Route *node* to the handler of the respective FilterExpression subclass."""
if isinstance(node, ast.filter.Is):
return self._is(type_, node)
@@ -72,6 +69,8 @@ class Filter():
return self._not(type_, node)
if isinstance(node, ast.filter.Has):
return self._has(type_, node)
+ if isinstance(node, ast.filter.Distance):
+ return self._distance(type_, node)
if isinstance(node, (ast.filter.Any, ast.filter.All)):
return self._branch(type_, node)
if isinstance(node, (ast.filter.And, ast.filter.Or)):
@@ -83,7 +82,7 @@ class Filter():
# invalid node
raise errors.BackendError(f'expected filter expression, found {node}')
- def _parse_predicate_expression(self, node: ast.filter.PredicateExpression) -> typing.Tuple[T_VERTEX, T_VERTEX]:
+ def _parse_predicate_expression(self, node: ast.filter.PredicateExpression) -> typing.Tuple[bsc.Vertex, bsc.Vertex]:
"""Route *node* to the handler of the respective PredicateExpression subclass."""
if isinstance(node, ast.filter.Predicate):
return self._predicate(node)
@@ -95,58 +94,47 @@ class Filter():
## predicate expressions
- def _predicate(self, node: ast.filter.Predicate) -> typing.Tuple[T_VERTEX, T_VERTEX]:
+ def _predicate(self, node: ast.filter.Predicate) -> typing.Tuple[bsc.Vertex, bsc.Vertex]:
# predicate exists in the schema
if not self.schema.has_predicate(node.predicate):
raise errors.ConsistencyError(f'predicate {node.predicate} is not in the schema')
# determine domain and range
pred = self.schema.predicate(node.predicate)
+ if not isinstance(pred.range, (bsc.Node, bsc.Literal)):
+ raise errors.BackendError(f'the range of predicate {pred} is undefined')
dom, rng = pred.domain, pred.range
- if rng is None:
- # FIXME: It is a design error that Predicates can have a None range...
- raise errors.BackendError(f'predicate {pred} has no range')
if node.reverse:
dom, rng = rng, dom # type: ignore [assignment] # variable re-use confuses mypy
# return domain and range
return dom, rng
- def _one_of(self, node: ast.filter.OneOf) -> typing.Tuple[T_VERTEX, T_VERTEX]:
+ def _one_of(self, node: ast.filter.OneOf) -> typing.Tuple[bsc.Vertex, bsc.Vertex]:
# determine domain and range types
# NOTE: select the most specific domain and the most generic range
dom, rng = None, None
for pred in node:
# parse child expression
subdom, subrng = self._parse_predicate_expression(pred)
- try:
- # determine overall domain
- if dom is None or subdom < dom: # pick most specific domain
- dom = subdom
- # domains must be related across all child expressions
- if not subdom <= dom and not subdom >= dom:
- raise errors.ConsistencyError(f'domains {subdom} and {dom} are not related')
- except TypeError as err: # compared literal vs. node
- raise errors.ConsistencyError(f'domains {subdom} and {dom} are not of the same type') from err
-
- try:
- # determine overall range
- if rng is None or subrng > rng: # pick most generic range
- rng = subrng
- # ranges must be related across all child expressions
- if not subrng <= rng and not subrng >= rng:
- raise errors.ConsistencyError(f'ranges {subrng} and {rng} are not related')
- except TypeError as err: # compared literal vs. node
- raise errors.ConsistencyError(f'ranges {subrng} and {rng} are not of the same type') from err
- # check domain and range
- if dom is None or rng is None:
- # OneOf guarantees at least one expression, these two cases cannot happen
- raise errors.UnreachableError()
- # return domain and range
- return dom, rng
+ # determine overall domain
+ if dom is None or subdom < dom: # pick most specific domain
+ dom = subdom
+ # domains must be related across all child expressions
+ if not subdom <= dom and not subdom >= dom:
+ raise errors.ConsistencyError(f'domains {subdom} and {dom} are not related')
+ # determine overall range
+ if rng is None or subrng > rng: # pick most generic range
+ rng = subrng
+ # ranges must be related across all child expressions
+ if not subrng <= rng and not subrng >= rng:
+ raise errors.ConsistencyError(f'ranges {subrng} and {rng} are not related')
+ # OneOf guarantees at least one expression, dom and rng are always bsc.Vertex.
+ # mypy does not realize this, hence we ignore the warning.
+ return dom, rng # type: ignore [return-value]
## intermediates
- def _branch(self, type_: T_VERTEX, node: ast.filter._Branch):
+ def _branch(self, type_: bsc.Vertex, node: ast.filter._Branch):
# type is a Node
if not isinstance(type_, bsc.Node):
raise errors.ConsistencyError(f'expected a Node, found {type_}')
@@ -167,16 +155,16 @@ class Filter():
# child expression is valid
self._parse_filter_expression(rng, node.expr)
- def _agg(self, type_: T_VERTEX, node: ast.filter._Agg):
+ def _agg(self, type_: bsc.Vertex, node: ast.filter._Agg):
for expr in node:
# child expression is valid
self._parse_filter_expression(type_, expr)
- def _not(self, type_: T_VERTEX, node: ast.filter.Not):
+ def _not(self, type_: bsc.Vertex, node: ast.filter.Not):
# child expression is valid
self._parse_filter_expression(type_, node.expr)
- def _has(self, type_: T_VERTEX, node: ast.filter.Has):
+ def _has(self, type_: bsc.Vertex, node: ast.filter.Has):
# type is a Node
if not isinstance(type_, bsc.Node):
raise errors.ConsistencyError(f'expected a Node, found {type_}')
@@ -189,19 +177,30 @@ class Filter():
if not type_ <= dom:
raise errors.ConsistencyError(f'expected type {dom}, found {type_}')
# node.count is a numerical expression
- # FIXME: We have to ensure that ns.xsd.integer is always known in the schema!
- self._parse_filter_expression(self.schema.literal(ns.xsd.integer), node.count)
+ self._parse_filter_expression(self.schema.literal(ns.bsfs.Number), node.count)
+
+ def _distance(self, type_: bsc.Vertex, node: ast.filter.Distance):
+ # type is a Literal
+ if not isinstance(type_, bsc.Feature):
+ raise errors.ConsistencyError(f'expected a Feature, found {type_}')
+ # type exists in the schema
+ if type_ not in self.schema.literals():
+ raise errors.ConsistencyError(f'literal {type_} is not in the schema')
+ # reference matches type_
+ if len(node.reference) != type_.dimension:
+ raise errors.ConsistencyError(f'reference has dimension {len(node.reference)}, expected {type_.dimension}')
+ # FIXME: test dtype
## conditions
- def _is(self, type_: T_VERTEX, node: ast.filter.Is): # pylint: disable=unused-argument # (node)
+ def _is(self, type_: bsc.Vertex, node: ast.filter.Is): # pylint: disable=unused-argument # (node)
if not isinstance(type_, bsc.Node):
raise errors.ConsistencyError(f'expected a Node, found {type_}')
if type_ not in self.schema.nodes():
raise errors.ConsistencyError(f'node {type_} is not in the schema')
- def _value(self, type_: T_VERTEX, node: ast.filter._Value): # pylint: disable=unused-argument # (node)
+ def _value(self, type_: bsc.Vertex, node: ast.filter._Value): # pylint: disable=unused-argument # (node)
# type is a literal
if not isinstance(type_, bsc.Literal):
raise errors.ConsistencyError(f'expected a Literal, found {type_}')
@@ -211,13 +210,16 @@ class Filter():
# FIXME: Check if node.value corresponds to type_
# FIXME: A specific literal might be requested (i.e., a numeric type when used in Has)
- def _bounded(self, type_: T_VERTEX, node: ast.filter._Bounded): # pylint: disable=unused-argument # (node)
+ def _bounded(self, type_: bsc.Vertex, node: ast.filter._Bounded): # pylint: disable=unused-argument # (node)
# type is a literal
if not isinstance(type_, bsc.Literal):
raise errors.ConsistencyError(f'expected a Literal, found {type_}')
# type exists in the schema
if type_ not in self.schema.literals():
raise errors.ConsistencyError(f'literal {type_} is not in the schema')
+ # type must be a numerical
+ if not type_ <= self.schema.literal(ns.bsfs.Number):
+ raise errors.ConsistencyError(f'expected a number type, found {type_}')
# FIXME: Check if node.value corresponds to type_
diff --git a/bsfs/schema/__init__.py b/bsfs/schema/__init__.py
index ad4d456..f53512e 100644
--- a/bsfs/schema/__init__.py
+++ b/bsfs/schema/__init__.py
@@ -9,7 +9,12 @@ import typing
# inner-module imports
from .schema import Schema
-from .types import Literal, Node, Predicate
+from .serialize import from_string, to_string
+from .types import Literal, Node, Predicate, Vertex, Feature, \
+ ROOT_VERTEX, ROOT_NODE, ROOT_LITERAL, \
+ ROOT_NUMBER, ROOT_TIME, \
+ ROOT_ARRAY, ROOT_FEATURE, \
+ ROOT_PREDICATE
# exports
__all__: typing.Sequence[str] = (
@@ -17,6 +22,8 @@ __all__: typing.Sequence[str] = (
'Node',
'Predicate',
'Schema',
+ 'from_string',
+ 'to_string',
)
## EOF ##
diff --git a/bsfs/schema/schema.py b/bsfs/schema/schema.py
index c5d4571..8d9a821 100644
--- a/bsfs/schema/schema.py
+++ b/bsfs/schema/schema.py
@@ -7,10 +7,8 @@ Author: Matthias Baumgartner, 2022
# imports
from collections import abc, namedtuple
import typing
-import rdflib
# bsfs imports
-from bsfs.namespace import ns
from bsfs.utils import errors, URI, typename
# inner-module imports
@@ -51,11 +49,13 @@ class Schema():
def __init__(
self,
- predicates: typing.Iterable[types.Predicate],
+ predicates: typing.Optional[typing.Iterable[types.Predicate]] = None,
nodes: typing.Optional[typing.Iterable[types.Node]] = None,
literals: typing.Optional[typing.Iterable[types.Literal]] = None,
):
# materialize arguments
+ if predicates is None:
+ predicates = set()
if nodes is None:
nodes = set()
if literals is None:
@@ -63,24 +63,40 @@ class Schema():
nodes = set(nodes)
literals = set(literals)
predicates = set(predicates)
+
+ # add root types to the schema
+ nodes.add(types.ROOT_NODE)
+ literals.add(types.ROOT_LITERAL)
+ predicates.add(types.ROOT_PREDICATE)
+ # add minimally necessary types to the schema
+ literals.add(types.ROOT_NUMBER)
+ literals.add(types.ROOT_TIME)
+ literals.add(types.ROOT_ARRAY)
+ literals.add(types.ROOT_FEATURE)
+
+ # FIXME: ensure that types derive from the right root?
+
# include parents in predicates set
# TODO: review type annotations and ignores for python >= 3.11 (parents is _Type but should be typing.Self)
predicates |= {par for pred in predicates for par in pred.parents()} # type: ignore [misc]
# include predicate domain in nodes set
nodes |= {pred.domain for pred in predicates}
# include predicate range in nodes and literals sets
- prange = {pred.range for pred in predicates if pred.range is not None}
+ prange = {pred.range for pred in predicates}
nodes |= {vert for vert in prange if isinstance(vert, types.Node)}
literals |= {vert for vert in prange if isinstance(vert, types.Literal)}
+ # NOTE: ROOT_PREDICATE has a Vertex as range which is neither in nodes nor literals
+ # FIXME: with the ROOT_VERTEX missing, the schema is not complete anymore!
+
# include parents in nodes and literals sets
- # NOTE: Must be done after predicate domain/range was handled
- # so that their parents are included as well.
+ # NOTE: Must come after predicate domain/range was handled to have their parents as well.
nodes |= {par for node in nodes for par in node.parents()} # type: ignore [misc]
literals |= {par for lit in literals for par in lit.parents()} # type: ignore [misc]
# assign members
self._nodes = {node.uri: node for node in nodes}
self._literals = {lit.uri: lit for lit in literals}
self._predicates = {pred.uri: pred for pred in predicates}
+
# verify unique uris
if len(nodes) != len(self._nodes):
raise errors.ConsistencyError('inconsistent nodes')
@@ -214,6 +230,7 @@ class Schema():
>>> Schema.Union([a, b, c])
"""
+ # FIXME: copy type annotations?
if len(args) == 0:
raise TypeError('Schema.Union requires at least one argument (Schema or Iterable)')
if isinstance(args[0], cls): # args is sequence of Schema instances
@@ -295,92 +312,4 @@ class Schema():
"""Return the Literal matching the *uri*."""
return self._literals[uri]
-
- ## constructors ##
-
-
- @classmethod
- def Empty(cls) -> 'Schema': # pylint: disable=invalid-name # capitalized classmethod
- """Return a minimal Schema."""
- node = types.Node(ns.bsfs.Node, None)
- literal = types.Literal(ns.bsfs.Literal, None)
- predicate = types.Predicate(
- uri=ns.bsfs.Predicate,
- parent=None,
- domain=node,
- range=None,
- unique=False,
- )
- return cls((predicate, ), (node, ), (literal, ))
-
-
- @classmethod
- def from_string(cls, schema: str) -> 'Schema': # pylint: disable=invalid-name # capitalized classmethod
- """Load and return a Schema from a string."""
- # parse string into rdf graph
- graph = rdflib.Graph()
- graph.parse(data=schema, format='turtle')
-
- def _fetch_hierarchically(factory, curr):
- # emit current node
- yield curr
- # walk through childs
- for child in graph.subjects(rdflib.URIRef(ns.rdfs.subClassOf), rdflib.URIRef(curr.uri)):
- # convert to URI
- child = URI(child)
- # check circular dependency
- if child == curr.uri or child in {node.uri for node in curr.parents()}:
- raise errors.ConsistencyError('circular dependency')
- # recurse and emit (sub*)childs
- yield from _fetch_hierarchically(factory, factory(child, curr))
-
- # fetch nodes
- nodes = set(_fetch_hierarchically(types.Node, types.Node(ns.bsfs.Node, None)))
- nodes_lut = {node.uri: node for node in nodes}
- if len(nodes_lut) != len(nodes):
- raise errors.ConsistencyError('inconsistent nodes')
-
- # fetch literals
- literals = set(_fetch_hierarchically(types.Literal, types.Literal(ns.bsfs.Literal, None)))
- literals_lut = {lit.uri: lit for lit in literals}
- if len(literals_lut) != len(literals):
- raise errors.ConsistencyError('inconsistent literals')
-
- # fetch predicates
- def build_predicate(uri, parent):
- uri = rdflib.URIRef(uri)
- # get domain
- domains = set(graph.objects(uri, rdflib.RDFS.domain))
- if len(domains) != 1:
- raise errors.ConsistencyError(f'inconsistent domain: {domains}')
- dom = nodes_lut.get(next(iter(domains)))
- if dom is None:
- raise errors.ConsistencyError('missing domain')
- # get range
- ranges = set(graph.objects(uri, rdflib.RDFS.range))
- if len(ranges) != 1:
- raise errors.ConsistencyError(f'inconsistent range: {ranges}')
- rng = next(iter(ranges))
- rng = nodes_lut.get(rng, literals_lut.get(rng))
- if rng is None:
- raise errors.ConsistencyError('missing range')
- # get unique flag
- uniques = set(graph.objects(uri, rdflib.URIRef(ns.bsfs.unique)))
- if len(uniques) != 1:
- raise errors.ConsistencyError(f'inconsistent unique flags: {uniques}')
- unique = bool(next(iter(uniques)))
- # build Predicate
- return types.Predicate(URI(uri), parent, dom, rng, unique)
-
- root_predicate = types.Predicate(
- uri=ns.bsfs.Predicate,
- parent=None,
- domain=nodes_lut[ns.bsfs.Node],
- range=None, # FIXME: Unclear how to handle this! Can be either a Literal or a Node
- unique=False,
- )
- predicates = _fetch_hierarchically(build_predicate, root_predicate)
- # return Schema
- return cls(predicates, nodes, literals)
-
## EOF ##
diff --git a/bsfs/schema/serialize.py b/bsfs/schema/serialize.py
new file mode 100644
index 0000000..acc009a
--- /dev/null
+++ b/bsfs/schema/serialize.py
@@ -0,0 +1,259 @@
+"""
+
+Part of the BlackStar filesystem (bsfs) module.
+A copy of the license is provided with the project.
+Author: Matthias Baumgartner, 2022
+"""
+# standard imports
+import itertools
+import typing
+
+# external imports
+import rdflib
+
+# bsfs imports
+from bsfs.namespace import ns
+from bsfs.utils import errors, URI, typename
+
+# inner-module imports
+from . import types
+from . import schema
+
+# exports
+__all__: typing.Sequence[str] = (
+ 'to_string',
+ 'from_string',
+ )
+
+
+## code ##
+
+def from_string(schema_str: str) -> schema.Schema:
+ """Load and return a Schema from a string."""
+ # parse string into rdf graph
+ graph = rdflib.Graph()
+ graph.parse(data=schema_str, format='turtle')
+
+ # helper functions
+ def _fetch_value(
+ subject: URI,
+ predicate: rdflib.URIRef,
+ value_factory: typing.Callable[[typing.Any], typing.Any],
+ ) -> typing.Optional[typing.Any]:
+ """Fetch the object of a given subject and predicate.
+ Raises a `errors.ConsistencyError` if multiple objects match.
+ """
+ values = list(graph.objects(rdflib.URIRef(subject), predicate))
+ if len(values) == 0:
+ return None
+ if len(values) == 1:
+ return value_factory(values[0])
+ raise errors.ConsistencyError(
+ f'{subject} has multiple values for predicate {str(predicate)}, expected zero or one')
+
+ def _convert(value):
+ """Convert the subject type from rdflib to a bsfs native type."""
+ if isinstance(value, rdflib.Literal):
+ return value.value
+ if isinstance(value, rdflib.URIRef):
+ return URI(value)
+ # value is neither a node nor a literal, but e.g. a blank node
+ raise errors.BackendError(f'expected Literal or URIRef, found {typename(value)}')
+
+ def _fetch_hierarchically(factory, curr):
+ """Walk through a rdfs:subClassOf hierarchy, creating symbols along the way."""
+ # emit current node
+ yield curr
+ # walk through childs
+ for child in graph.subjects(rdflib.URIRef(ns.rdfs.subClassOf), rdflib.URIRef(curr.uri)):
+ # fetch annotations
+ annotations = {
+ URI(pred): _convert(value)
+ for pred, value # FIXME: preserve datatype of value?!
+ in graph.predicate_objects(child)
+ if URI(pred) != ns.rdfs.subClassOf
+ }
+ # convert child to URI
+ child = URI(child)
+ # check circular dependency
+ if child == curr.uri or child in {node.uri for node in curr.parents()}:
+ raise errors.ConsistencyError('circular dependency')
+ # recurse and emit (sub*)childs
+ yield from _fetch_hierarchically(factory, factory(child, curr, **annotations))
+
+ # fetch nodes
+ nodes = set(_fetch_hierarchically(types.Node, types.ROOT_NODE))
+ nodes_lut = {node.uri: node for node in nodes}
+ if len(nodes_lut) != len(nodes):
+ raise errors.ConsistencyError('inconsistent nodes')
+
+ # fetch literals
+ def _build_literal(uri, parent, **annotations):
+ """Literal factory."""
+ # break out on root feature type
+ if uri == types.ROOT_FEATURE.uri:
+ return types.ROOT_FEATURE
+ # handle feature types
+ if isinstance(parent, types.Feature):
+ # clean annotations
+ annotations.pop(ns.bsfs.dimension, None)
+ annotations.pop(ns.bsfs.dtype, None)
+ annotations.pop(ns.bsfs.distance, None)
+ # get dimension
+ dimension = _fetch_value(uri, rdflib.URIRef(ns.bsfs.dimension), int)
+ # get dtype
+ dtype = _fetch_value(uri, rdflib.URIRef(ns.bsfs.dtype), URI)
+ # get distance
+ distance = _fetch_value(uri, rdflib.URIRef(ns.bsfs.distance), URI)
+ # return feature
+ return parent.child(URI(uri), dtype=dtype, dimension=dimension, distance=distance, **annotations)
+ # handle non-feature types
+ return parent.child(URI(uri), **annotations)
+
+ literals = set(_fetch_hierarchically(_build_literal, types.ROOT_LITERAL))
+ literals_lut = {lit.uri: lit for lit in literals}
+ if len(literals_lut) != len(literals):
+ raise errors.ConsistencyError('inconsistent literals')
+
+ # fetch predicates
+ def _build_predicate(uri, parent, **annotations):
+ """Predicate factory."""
+ # clean annotations
+ annotations.pop(ns.rdfs.domain, None)
+ annotations.pop(ns.rdfs.range, None)
+ annotations.pop(ns.bsfs.unique, None)
+ # get domain
+ dom = _fetch_value(uri, rdflib.RDFS.domain, URI)
+ if dom is not None and dom not in nodes_lut:
+ raise errors.ConsistencyError(f'predicate {uri} has undefined domain {dom}')
+ if dom is not None:
+ dom = nodes_lut[dom]
+ # get range
+ rng = _fetch_value(uri, rdflib.RDFS.range, URI)
+ if rng is not None and rng not in nodes_lut and rng not in literals_lut:
+ raise errors.ConsistencyError(f'predicate {uri} has undefined range {rng}')
+ if rng is not None:
+ rng = nodes_lut.get(rng, literals_lut.get(rng))
+ # get unique
+ unique = _fetch_value(uri, rdflib.URIRef(ns.bsfs.unique), bool)
+ # build predicate
+ return parent.child(URI(uri), domain=dom, range=rng, unique=unique, **annotations)
+
+ predicates = _fetch_hierarchically(_build_predicate, types.ROOT_PREDICATE)
+
+ return schema.Schema(predicates, nodes, literals)
+
+
+
+def to_string(schema_inst: schema.Schema, fmt: str = 'turtle') -> str:
+ """Serialize a `bsfs.schema.Schema` to a string.
+ See `rdflib.Graph.serialize` for viable formats (default: turtle).
+ """
+
+ # type of emitted triples.
+ T_TRIPLE = typing.Iterator[typing.Tuple[rdflib.URIRef, rdflib.URIRef, rdflib.term.Identifier]]
+
+ def _type(tpe: types._Type) -> T_TRIPLE :
+ """Emit _Type properties (parent, annotations)."""
+ # emit parent
+ if tpe.parent is not None:
+ yield (
+ rdflib.URIRef(tpe.uri),
+ rdflib.URIRef(ns.rdfs.subClassOf),
+ rdflib.URIRef(tpe.parent.uri),
+ )
+ # emit annotations
+ for prop, value in tpe.annotations.items():
+ yield (
+ rdflib.URIRef(tpe.uri),
+ rdflib.URIRef(prop),
+ rdflib.Literal(value), # FIXME: datatype?!
+ )
+
+ def _predicate(pred: types.Predicate) -> T_TRIPLE:
+ """Emit Predicate properties (domain, range, unique)."""
+ # no need to emit anything for the root predicate
+ if pred == types.ROOT_PREDICATE:
+ return
+ # emit domain
+ if pred.domain != getattr(pred.parent, 'domain', None):
+ yield (
+ rdflib.URIRef(pred.uri),
+ rdflib.URIRef(ns.rdfs.domain),
+ rdflib.URIRef(pred.domain.uri),
+ )
+ # emit range
+ if pred.range != getattr(pred.parent, 'range', None):
+ yield (
+ rdflib.URIRef(pred.uri),
+ rdflib.URIRef(ns.rdfs.range),
+ rdflib.URIRef(pred.range.uri),
+ )
+ # emit cardinality
+ if pred.unique != getattr(pred.parent, 'unique', None):
+ yield (
+ rdflib.URIRef(pred.uri),
+ rdflib.URIRef(ns.bsfs.unique),
+ rdflib.Literal(pred.unique, datatype=rdflib.XSD.boolean),
+ )
+
+ def _feature(feat: types.Feature) -> T_TRIPLE:
+ """Emit Feature properties (dimension, dtype, distance)."""
+ # emit size
+ if feat.dimension != getattr(feat.parent, 'dimension', None):
+ yield (
+ rdflib.URIRef(feat.uri),
+ rdflib.URIRef(ns.bsfs.dimension),
+ rdflib.Literal(feat.dimension, datatype=rdflib.XSD.integer),
+ )
+ # emit dtype
+ if feat.dtype != getattr(feat.parent, 'dtype', None):
+ yield (
+ rdflib.URIRef(feat.uri),
+ rdflib.URIRef(ns.bsfs.dtype),
+ rdflib.URIRef(feat.dtype),
+ )
+ # emit distance
+ if feat.distance != getattr(feat.parent, 'distance', None):
+ yield (
+ rdflib.URIRef(feat.uri),
+ rdflib.URIRef(ns.bsfs.distance),
+ rdflib.URIRef(feat.distance),
+ )
+
+ def _parse(node: types._Type) -> T_TRIPLE:
+ """Emit all properties of a type."""
+ # check arg
+ if not isinstance(node, types._Type): # pylint: disable=protected-access
+ raise TypeError(node)
+ # emit _Type essentials
+ yield from _type(node)
+ # emit properties of derived types
+ if isinstance(node, types.Predicate):
+ yield from _predicate(node)
+ if isinstance(node, types.Feature):
+ yield from _feature(node)
+
+ # create graph
+ graph = rdflib.Graph()
+ # add triples to graph
+ nodes = itertools.chain(
+ schema_inst.nodes(),
+ schema_inst.literals(),
+ schema_inst.predicates())
+ for node in nodes:
+ for triple in _parse(node):
+ graph.add(triple)
+ # add known namespaces for readability
+ # FIXME: more generically?
+ graph.bind('bse', rdflib.URIRef(ns.bse['']))
+ graph.bind('bsfs', rdflib.URIRef(ns.bsfs['']))
+ graph.bind('bsm', rdflib.URIRef(ns.bsm['']))
+ graph.bind('rdf', rdflib.URIRef(ns.rdf['']))
+ graph.bind('rdfs', rdflib.URIRef(ns.rdfs['']))
+ graph.bind('schema', rdflib.URIRef(ns.schema['']))
+ graph.bind('xsd', rdflib.URIRef(ns.xsd['']))
+ # serialize to turtle
+ return graph.serialize(format=fmt)
+
+## EOF ##
diff --git a/bsfs/schema/types.py b/bsfs/schema/types.py
index 54a7e99..3a2e10c 100644
--- a/bsfs/schema/types.py
+++ b/bsfs/schema/types.py
@@ -8,6 +8,7 @@ Author: Matthias Baumgartner, 2022
import typing
# bsfs imports
+from bsfs.namespace import ns
from bsfs.utils import errors, URI, typename
# exports
@@ -15,6 +16,7 @@ __all__: typing.Sequence[str] = (
'Literal',
'Node',
'Predicate',
+ 'Feature',
)
@@ -99,9 +101,11 @@ class _Type():
self,
uri: URI,
parent: typing.Optional['_Type'] = None,
+ **annotations: typing.Any,
):
self.uri = uri
self.parent = parent
+ self.annotations = annotations
def parents(self) -> typing.Generator['_Type', None, None]:
"""Generate a list of parent nodes."""
@@ -110,9 +114,17 @@ class _Type():
yield curr
curr = curr.parent
- def get_child(self, uri: URI, **kwargs):
+ def child(
+ self,
+ uri: URI,
+ **kwargs,
+ ):
"""Return a child of the current class."""
- return type(self)(uri, self, **kwargs)
+ return type(self)(
+ uri=uri,
+ parent=self,
+ **kwargs
+ )
def __str__(self) -> str:
return f'{typename(self)}({self.uri})'
@@ -138,8 +150,10 @@ class _Type():
def __lt__(self, other: typing.Any) -> bool:
"""Return True iff *self* is a true subclass of *other*."""
- if not type(self) is type(other): # type mismatch # pylint: disable=unidiomatic-typecheck
+ if not isinstance(other, _Type):
return NotImplemented
+ if not isinstance(other, type(self)): # FIXME: necessary?
+ return False
if self.uri == other.uri: # equivalence
return False
if self in other.parents(): # superclass
@@ -151,8 +165,10 @@ class _Type():
def __le__(self, other: typing.Any) -> bool:
"""Return True iff *self* is equivalent or a subclass of *other*."""
- if not type(self) is type(other): # type mismatch # pylint: disable=unidiomatic-typecheck
+ if not isinstance(other, _Type):
return NotImplemented
+ if not isinstance(other, type(self)): # FIXME: necessary?
+ return False
if self.uri == other.uri: # equivalence
return True
if self in other.parents(): # superclass
@@ -164,8 +180,10 @@ class _Type():
def __gt__(self, other: typing.Any) -> bool:
"""Return True iff *self* is a true superclass of *other*."""
- if not type(self) is type(other): # type mismatch # pylint: disable=unidiomatic-typecheck
+ if not isinstance(other, _Type):
return NotImplemented
+ if not isinstance(other, type(self)): # FIXME: necessary?
+ return False
if self.uri == other.uri: # equivalence
return False
if self in other.parents(): # superclass
@@ -177,8 +195,10 @@ class _Type():
def __ge__(self, other: typing.Any) -> bool:
"""Return True iff *self* is eqiuvalent or a superclass of *other*."""
- if not type(self) is type(other): # type mismatch # pylint: disable=unidiomatic-typecheck
+ if not isinstance(other, _Type):
return NotImplemented
+ if not isinstance(other, type(self)): # FIXME: necessary?
+ return False
if self.uri == other.uri: # equivalence
return True
if self in other.parents(): # superclass
@@ -189,32 +209,95 @@ class _Type():
return False
-class _Vertex(_Type):
+class Vertex(_Type):
"""Graph vertex types. Can be a Node or a Literal."""
- def __init__(self, uri: URI, parent: typing.Optional['_Vertex']):
- super().__init__(uri, parent)
+ parent: typing.Optional['Vertex']
+ def __init__(self, uri: URI, parent: typing.Optional['Vertex'], **kwargs):
+ super().__init__(uri, parent, **kwargs)
-class Node(_Vertex):
+class Node(Vertex):
"""Node type."""
- def __init__(self, uri: URI, parent: typing.Optional['Node']):
- super().__init__(uri, parent)
+ parent: typing.Optional['Node']
+ def __init__(self, uri: URI, parent: typing.Optional['Node'], **kwargs):
+ super().__init__(uri, parent, **kwargs)
-class Literal(_Vertex):
+class Literal(Vertex):
"""Literal type."""
- def __init__(self, uri: URI, parent: typing.Optional['Literal']):
- super().__init__(uri, parent)
+ parent: typing.Optional['Literal']
+ def __init__(self, uri: URI, parent: typing.Optional['Literal'], **kwargs):
+ super().__init__(uri, parent, **kwargs)
+
+
+class Feature(Literal):
+ """Feature type."""
+
+ # Number of feature vector dimensions.
+ dimension: int
+
+ # Feature vector datatype.
+ dtype: URI
+
+ # Distance measure to compare feature vectors.
+ distance: URI
+
+ def __init__(
+ self,
+ # Type members
+ uri: URI,
+ parent: typing.Optional[Literal],
+ # Feature members
+ dimension: int,
+ dtype: URI,
+ distance: URI,
+ **kwargs,
+ ):
+ super().__init__(uri, parent, **kwargs)
+ self.dimension = int(dimension)
+ self.dtype = URI(dtype)
+ self.distance = URI(distance)
+
+ def __hash__(self) -> int:
+ return hash((super().__hash__(), self.dimension, self.dtype, self.distance))
+
+ def __eq__(self, other: typing.Any) -> bool:
+ return super().__eq__(other) \
+ and self.dimension == other.dimension \
+ and self.dtype == other.dtype \
+ and self.distance == other.distance
+ def child(
+ self,
+ uri: URI,
+ dimension: typing.Optional[int] = None,
+ dtype: typing.Optional[URI] = None,
+ distance: typing.Optional[URI] = None,
+ **kwargs,
+ ):
+ """Return a child of the current class."""
+ if dimension is None:
+ dimension = self.dimension
+ if dtype is None:
+ dtype = self.dtype
+ if distance is None:
+ distance = self.distance
+ return super().child(
+ uri=uri,
+ dimension=dimension,
+ dtype=dtype,
+ distance=distance,
+ **kwargs,
+ )
class Predicate(_Type):
- """Predicate type."""
+ """Predicate base type."""
# source type.
domain: Node
# destination type.
- range: typing.Optional[typing.Union[Node, Literal]]
+ range: Vertex
# maximum cardinality of type.
unique: bool
@@ -226,22 +309,23 @@ class Predicate(_Type):
parent: typing.Optional['Predicate'],
# Predicate members
domain: Node,
- range: typing.Optional[typing.Union[Node, Literal]], # pylint: disable=redefined-builtin
+ range: Vertex, # pylint: disable=redefined-builtin
unique: bool,
+ **kwargs,
):
# check arguments
if not isinstance(domain, Node):
raise TypeError(domain)
- if range is not None and not isinstance(range, Node) and not isinstance(range, Literal):
+ if range != ROOT_VERTEX and not isinstance(range, (Node, Literal)):
raise TypeError(range)
# initialize
- super().__init__(uri, parent)
+ super().__init__(uri, parent, **kwargs)
self.domain = domain
self.range = range
- self.unique = unique
+ self.unique = bool(unique)
def __hash__(self) -> int:
- return hash((super().__hash__(), self.domain, self.range, self.unique))
+ return hash((super().__hash__(), self.domain, self.unique, self.range))
def __eq__(self, other: typing.Any) -> bool:
return super().__eq__(other) \
@@ -249,11 +333,11 @@ class Predicate(_Type):
and self.range == other.range \
and self.unique == other.unique
- def get_child(
+ def child(
self,
uri: URI,
domain: typing.Optional[Node] = None,
- range: typing.Optional[_Vertex] = None, # pylint: disable=redefined-builtin
+ range: typing.Optional[Vertex] = None, # pylint: disable=redefined-builtin
unique: typing.Optional[bool] = None,
**kwargs,
):
@@ -264,13 +348,68 @@ class Predicate(_Type):
raise errors.ConsistencyError(f'{domain} must be a subclass of {self.domain}')
if range is None:
range = self.range
- if range is None: # inherited range from ns.bsfs.Predicate
- raise ValueError('range must be defined by the parent or argument')
- if self.range is not None and not range <= self.range:
+ # NOTE: The root predicate has a Vertex as range, which is neither a parent of the root
+ # Node nor Literal. Hence, that test is skipped since a child should be allowed to
+ # specialize from Vertex to anything.
+ if self.range != ROOT_VERTEX and not range <= self.range:
raise errors.ConsistencyError(f'{range} must be a subclass of {self.range}')
if unique is None:
unique = self.unique
- return super().get_child(uri, domain=domain, range=range, unique=unique, **kwargs)
+ return super().child(
+ uri=uri,
+ domain=domain,
+ range=range,
+ unique=unique,
+ **kwargs
+ )
+
+
+# essential vertices
+ROOT_VERTEX = Vertex(
+ uri=ns.bsfs.Vertex,
+ parent=None,
+ )
+
+ROOT_NODE = Node(
+ uri=ns.bsfs.Node,
+ parent=None,
+ )
+ROOT_LITERAL = Literal(
+ uri=ns.bsfs.Literal,
+ parent=None,
+ )
+
+ROOT_NUMBER = Literal(
+ uri=ns.bsfs.Number,
+ parent=ROOT_LITERAL,
+ )
+
+ROOT_TIME = Literal(
+ uri=ns.bsfs.Time,
+ parent=ROOT_LITERAL,
+ )
+
+ROOT_ARRAY = Literal(
+ uri=ns.bsfs.Array,
+ parent=ROOT_LITERAL,
+ )
+
+ROOT_FEATURE = Feature(
+ uri=ns.bsfs.Feature,
+ parent=ROOT_ARRAY,
+ dimension=1,
+ dtype=ns.bsfs.f16,
+ distance=ns.bsfs.euclidean,
+ )
+
+# essential predicates
+ROOT_PREDICATE = Predicate(
+ uri=ns.bsfs.Predicate,
+ parent=None,
+ domain=ROOT_NODE,
+ range=ROOT_VERTEX,
+ unique=False,
+ )
## EOF ##
diff --git a/bsfs/triple_store/sparql/distance.py b/bsfs/triple_store/sparql/distance.py
new file mode 100644
index 0000000..2f5387a
--- /dev/null
+++ b/bsfs/triple_store/sparql/distance.py
@@ -0,0 +1,56 @@
+"""
+
+Part of the BlackStar filesystem (bsfs) module.
+A copy of the license is provided with the project.
+Author: Matthias Baumgartner, 2022
+"""
+# standard imports
+import typing
+
+# external imports
+import numpy as np
+
+# bsfs imports
+from bsfs.namespace import ns
+
+# constants
+EPS = 1e-9
+
+# exports
+__all__: typing.Sequence[str] = (
+ 'DISTANCE_FU',
+ )
+
+
+## code ##
+
+def euclid(fst, snd) -> float:
+ """Euclidean distance (l2 norm)."""
+ fst = np.array(fst)
+ snd = np.array(snd)
+ return float(np.linalg.norm(fst - snd))
+
+def cosine(fst, snd) -> float:
+ """Cosine distance."""
+ fst = np.array(fst)
+ snd = np.array(snd)
+ if (fst == snd).all():
+ return 0.0
+ nrm0 = np.linalg.norm(fst)
+ nrm1 = np.linalg.norm(snd)
+ return float(1.0 - np.dot(fst, snd) / (nrm0 * nrm1 + EPS))
+
+def manhatten(fst, snd) -> float:
+ """Manhatten (cityblock) distance (l1 norm)."""
+ fst = np.array(fst)
+ snd = np.array(snd)
+ return float(np.abs(fst - snd).sum())
+
+# Known distance functions.
+DISTANCE_FU = {
+ ns.bsfs.euclidean: euclid,
+ ns.bsfs.cosine: cosine,
+ ns.bsfs.manhatten: manhatten,
+}
+
+## EOF ##
diff --git a/bsfs/triple_store/sparql/parse_filter.py b/bsfs/triple_store/sparql/parse_filter.py
index d4db0aa..8b6b976 100644
--- a/bsfs/triple_store/sparql/parse_filter.py
+++ b/bsfs/triple_store/sparql/parse_filter.py
@@ -5,19 +5,29 @@ A copy of the license is provided with the project.
Author: Matthias Baumgartner, 2022
"""
# imports
+import operator
import typing
+# external imports
+import rdflib
+
# bsfs imports
from bsfs import schema as bsc
from bsfs.namespace import ns
from bsfs.query import ast
from bsfs.utils import URI, errors
+# inner-module imports
+from .distance import DISTANCE_FU
+
# exports
__all__: typing.Sequence[str] = (
'Filter',
)
+
+## code ##
+
class _GenHopName():
"""Generator that produces a new unique symbol name with each iteration."""
@@ -46,10 +56,8 @@ class Filter():
# Generator that produces unique symbol names.
ngen: _GenHopName
- # Vertex type.
- T_VERTEX = typing.Union[bsc.Node, bsc.Literal]
-
- def __init__(self, schema):
+ def __init__(self, graph, schema):
+ self.graph = graph
self.schema = schema
self.ngen = _GenHopName()
@@ -79,7 +87,7 @@ class Filter():
}}
'''
- def _parse_filter_expression(self, type_: T_VERTEX, node: ast.filter.FilterExpression, head: str) -> str:
+ def _parse_filter_expression(self, type_: bsc.Vertex, node: ast.filter.FilterExpression, head: str) -> str:
"""Route *node* to the handler of the respective FilterExpression subclass."""
if isinstance(node, ast.filter.Is):
return self._is(type_, node, head)
@@ -87,6 +95,8 @@ class Filter():
return self._not(type_, node, head)
if isinstance(node, ast.filter.Has):
return self._has(type_, node, head)
+ if isinstance(node, ast.filter.Distance):
+ return self._distance(type_, node, head)
if isinstance(node, ast.filter.Any):
return self._any(type_, node, head)
if isinstance(node, ast.filter.All):
@@ -112,9 +122,9 @@ class Filter():
def _parse_predicate_expression(
self,
- type_: T_VERTEX,
+ type_: bsc.Vertex,
node: ast.filter.PredicateExpression
- ) -> typing.Tuple[str, T_VERTEX]:
+ ) -> typing.Tuple[str, bsc.Vertex]:
"""Route *node* to the handler of the respective PredicateExpression subclass."""
if isinstance(node, ast.filter.Predicate):
return self._predicate(type_, node)
@@ -123,7 +133,7 @@ class Filter():
# invalid node
raise errors.BackendError(f'expected predicate expression, found {node}')
- def _one_of(self, node_type: T_VERTEX, node: ast.filter.OneOf) -> typing.Tuple[str, T_VERTEX]:
+ def _one_of(self, node_type: bsc.Vertex, node: ast.filter.OneOf) -> typing.Tuple[str, bsc.Vertex]:
"""
"""
if not isinstance(node_type, bsc.Node):
@@ -134,23 +144,18 @@ class Filter():
puri, subrng = self._parse_predicate_expression(node_type, pred)
# track predicate uris
suburi.add(puri)
- try:
- # check for more generic range
- if rng is None or subrng > rng:
- rng = subrng
- # check range consistency
- if not subrng <= rng and not subrng >= rng:
- raise errors.ConsistencyError(f'ranges {subrng} and {rng} are not related')
- except TypeError as err: # subrng and rng are not comparable
- raise errors.ConsistencyError(f'ranges {subrng} and {rng} are not related') from err
- if rng is None:
- # for mypy to be certain of the rng type
- # if rng were None, we'd have gotten a TypeError above (None > None)
- raise errors.UnreachableError()
+ # check for more generic range
+ if rng is None or subrng > rng:
+ rng = subrng
+ # check range consistency
+ if not subrng <= rng and not subrng >= rng:
+ raise errors.ConsistencyError(f'ranges {subrng} and {rng} are not related')
# return joint predicate expression and next range
- return '|'.join(suburi), rng
+ # OneOf guarantees at least one expression, rng is always a bsc.Vertex.
+ # mypy does not realize this, hence we ignore the warning.
+ return '|'.join(suburi), rng # type: ignore [return-value]
- def _predicate(self, node_type: T_VERTEX, node: ast.filter.Predicate) -> typing.Tuple[str, T_VERTEX]:
+ def _predicate(self, node_type: bsc.Vertex, node: ast.filter.Predicate) -> typing.Tuple[str, bsc.Vertex]:
"""
"""
# check node_type
@@ -162,9 +167,8 @@ class Filter():
if not self.schema.has_predicate(puri):
raise errors.ConsistencyError(f'predicate {puri} is not in the schema')
pred = self.schema.predicate(puri)
- if pred.range is None:
- # FIXME: It is a design error that Predicates can have a None range...
- raise errors.BackendError(f'predicate {pred} has no range')
+ if not isinstance(pred.range, (bsc.Node, bsc.Literal)):
+ raise errors.BackendError(f'the range of predicate {pred} is undefined')
dom, rng = pred.domain, pred.range
# encapsulate predicate uri
puri = f'<{puri}>' # type: ignore [assignment] # variable re-use confuses mypy
@@ -178,7 +182,7 @@ class Filter():
# return predicate URI and next node type
return puri, rng
- def _any(self, node_type: T_VERTEX, node: ast.filter.Any, head: str) -> str:
+ def _any(self, node_type: bsc.Vertex, node: ast.filter.Any, head: str) -> str:
"""
"""
if not isinstance(node_type, bsc.Node):
@@ -191,7 +195,7 @@ class Filter():
# combine results
return f'{head} {pred} {nexthead} . {expr}'
- def _all(self, node_type: T_VERTEX, node: ast.filter.All, head: str) -> str:
+ def _all(self, node_type: bsc.Vertex, node: ast.filter.All, head: str) -> str:
"""
"""
# NOTE: All(P, E) := Not(Any(P, Not(E))) and EXISTS(P, ?)
@@ -208,13 +212,13 @@ class Filter():
# return existence and rewritten expression
return f'FILTER EXISTS {{ {head} {pred} {temphead} }} . ' + expr
- def _and(self, node_type: T_VERTEX, node: ast.filter.And, head: str) -> str:
+ def _and(self, node_type: bsc.Vertex, node: ast.filter.And, head: str) -> str:
"""
"""
sub = [self._parse_filter_expression(node_type, expr, head) for expr in node]
return ' . '.join(sub)
- def _or(self, node_type: T_VERTEX, node: ast.filter.Or, head: str) -> str:
+ def _or(self, node_type: bsc.Vertex, node: ast.filter.Or, head: str) -> str:
"""
"""
# potential special case optimization:
@@ -224,7 +228,7 @@ class Filter():
sub = ['{' + expr + '}' for expr in sub]
return ' UNION '.join(sub)
- def _not(self, node_type: T_VERTEX, node: ast.filter.Not, head: str) -> str:
+ def _not(self, node_type: bsc.Vertex, node: ast.filter.Not, head: str) -> str:
"""
"""
expr = self._parse_filter_expression(node_type, node.expr, head)
@@ -235,7 +239,7 @@ class Filter():
# The simplest (and non-interfering) choice is a type statement.
return f'MINUS {{ {head} <{ns.rdf.type}>/<{ns.rdfs.subClassOf}>* <{node_type.uri}> . {expr} }}'
- def _has(self, node_type: T_VERTEX, node: ast.filter.Has, head: str) -> str:
+ def _has(self, node_type: bsc.Vertex, node: ast.filter.Has, head: str) -> str:
"""
"""
if not isinstance(node_type, bsc.Node):
@@ -248,47 +252,72 @@ class Filter():
# predicate count expression (fetch number of predicates at *head*)
num_preds = f'{{ SELECT (COUNT(distinct {inner}) as {outer}) WHERE {{ {head} {pred} {inner} }} }}'
# count expression
- # FIXME: We have to ensure that ns.xsd.integer is always known in the schema!
count_bounds = self._parse_filter_expression(self.schema.literal(ns.xsd.integer), node.count, outer)
# combine
return num_preds + ' . ' + count_bounds
- def _is(self, node_type: T_VERTEX, node: ast.filter.Is, head: str) -> str:
+ def _distance(self, node_type: bsc.Vertex, node: ast.filter.Distance, head: str) -> str:
+ """
+ """
+ if not isinstance(node_type, bsc.Feature):
+ raise errors.BackendError(f'expected Feature, found {node_type}')
+ if len(node.reference) != node_type.dimension:
+ raise errors.ConsistencyError(
+ f'reference has dimension {len(node.reference)}, expected {node_type.dimension}')
+ # get distance metric
+ dist = DISTANCE_FU[node_type.distance]
+ # get operator
+ cmp = operator.lt if node.strict else operator.le
+ # get candidate values
+ candidates = {
+ f'"{cand}"^^<{node_type.uri}>'
+ for cand
+ in self.graph.objects()
+ if isinstance(cand, rdflib.Literal)
+ and cand.datatype == rdflib.URIRef(node_type.uri)
+ and cmp(dist(cand.value, node.reference), node.threshold)
+ }
+ # combine candidate values
+ values = ' '.join(candidates) if len(candidates) else f'"impossible value"^^<{ns.xsd.string}>'
+ # return sparql fragment
+ return f'VALUES {head} {{ {values} }}'
+
+ def _is(self, node_type: bsc.Vertex, node: ast.filter.Is, head: str) -> str:
"""
"""
if not isinstance(node_type, bsc.Node):
raise errors.BackendError(f'expected Node, found {node_type}')
return f'VALUES {head} {{ <{node.value}> }}'
- def _equals(self, node_type: T_VERTEX, node: ast.filter.Equals, head: str) -> str:
+ def _equals(self, node_type: bsc.Vertex, node: ast.filter.Equals, head: str) -> str:
"""
"""
if not isinstance(node_type, bsc.Literal):
raise errors.BackendError(f'expected Literal, found {node}')
return f'VALUES {head} {{ "{node.value}"^^<{node_type.uri}> }}'
- def _substring(self, node_type: T_VERTEX, node: ast.filter.Substring, head: str) -> str:
+ def _substring(self, node_type: bsc.Vertex, node: ast.filter.Substring, head: str) -> str:
"""
"""
if not isinstance(node_type, bsc.Literal):
raise errors.BackendError(f'expected Literal, found {node_type}')
return f'FILTER contains(str({head}), "{node.value}")'
- def _starts_with(self, node_type: T_VERTEX, node: ast.filter.StartsWith, head: str) -> str:
+ def _starts_with(self, node_type: bsc.Vertex, node: ast.filter.StartsWith, head: str) -> str:
"""
"""
if not isinstance(node_type, bsc.Literal):
raise errors.BackendError(f'expected Literal, found {node_type}')
return f'FILTER strstarts(str({head}), "{node.value}")'
- def _ends_with(self, node_type: T_VERTEX, node: ast.filter.EndsWith, head: str) -> str:
+ def _ends_with(self, node_type: bsc.Vertex, node: ast.filter.EndsWith, head: str) -> str:
"""
"""
if not isinstance(node_type, bsc.Literal):
raise errors.BackendError(f'expected Literal, found {node_type}')
return f'FILTER strends(str({head}), "{node.value}")'
- def _less_than(self, node_type: T_VERTEX, node: ast.filter.LessThan, head: str) -> str:
+ def _less_than(self, node_type: bsc.Vertex, node: ast.filter.LessThan, head: str) -> str:
"""
"""
if not isinstance(node_type, bsc.Literal):
@@ -296,7 +325,7 @@ class Filter():
equality = '=' if not node.strict else ''
return f'FILTER ({head} <{equality} {float(node.threshold)})'
- def _greater_than(self, node_type: T_VERTEX, node: ast.filter.GreaterThan, head: str) -> str:
+ def _greater_than(self, node_type: bsc.Vertex, node: ast.filter.GreaterThan, head: str) -> str:
"""
"""
if not isinstance(node_type, bsc.Literal):
diff --git a/bsfs/triple_store/sparql/sparql.py b/bsfs/triple_store/sparql/sparql.py
index c3cbff6..fedd227 100644
--- a/bsfs/triple_store/sparql/sparql.py
+++ b/bsfs/triple_store/sparql/sparql.py
@@ -11,12 +11,14 @@ import rdflib
# bsfs imports
from bsfs import schema as bsc
+from bsfs.namespace import ns
from bsfs.query import ast
from bsfs.utils import errors, URI
# inner-module imports
from . import parse_filter
from .. import base
+from .distance import DISTANCE_FU
# exports
@@ -94,8 +96,9 @@ class SparqlStore(base.TripleStoreBase):
super().__init__(None)
self._graph = rdflib.Graph()
self._transaction = _Transaction(self._graph)
- self._schema = bsc.Schema.Empty()
- self._filter_parser = parse_filter.Filter(self._schema)
+ # NOTE: parsing bsfs.query.ast.filter.Has requires xsd:integer.
+ self._schema = bsc.Schema(literals={bsc.ROOT_NUMBER.child(ns.xsd.integer)})
+ self._filter_parser = parse_filter.Filter(self._graph, self._schema)
# NOTE: mypy and pylint complain about the **kwargs not being listed (contrasting super)
# However, not having it here is clearer since it's explicit that there are no arguments.
@@ -121,6 +124,16 @@ class SparqlStore(base.TripleStoreBase):
# check compatibility: No contradicting definitions
if not self.schema.consistent_with(schema):
raise errors.ConsistencyError(f'{schema} is inconsistent with {self.schema}')
+ # check distance functions of features
+ invalid = {
+ (cand.uri, cand.distance)
+ for cand
+ in schema.literals()
+ if isinstance(cand, bsc.Feature) and cand.distance not in DISTANCE_FU}
+ if len(invalid) > 0:
+ cand, dist = zip(*invalid)
+ raise errors.UnsupportedError(
+ f'unknown distance function {",".join(dist)} in feature {", ".join(cand)}')
# commit the current transaction
self.commit()
@@ -137,7 +150,7 @@ class SparqlStore(base.TripleStoreBase):
for src, trg in self._graph.subject_objects(rdflib.URIRef(pred.uri)):
self._transaction.remove((src, rdflib.URIRef(pred.uri), trg))
# remove predicate definition
- if pred.parent is not None:
+ if pred.parent is not None: # NOTE: there shouldn't be any predicate w/o parent
self._transaction.remove((
rdflib.URIRef(pred.uri),
rdflib.RDFS.subClassOf,
@@ -157,7 +170,7 @@ class SparqlStore(base.TripleStoreBase):
# remove instance
self._transaction.remove((inst, rdflib.RDF.type, rdflib.URIRef(node.uri)))
# remove node definition
- if node.parent is not None:
+ if node.parent is not None: # NOTE: there shouldn't be any node w/o parent
self._transaction.remove((
rdflib.URIRef(node.uri),
rdflib.RDFS.subClassOf,
@@ -166,7 +179,7 @@ class SparqlStore(base.TripleStoreBase):
for lit in sub.literals:
# remove literal definition
- if lit.parent is not None:
+ if lit.parent is not None: # NOTE: there shouldn't be any literal w/o parent
self._transaction.remove((
rdflib.URIRef(lit.uri),
rdflib.RDFS.subClassOf,
diff --git a/bsfs/utils/errors.py b/bsfs/utils/errors.py
index be9d40e..6ae6484 100644
--- a/bsfs/utils/errors.py
+++ b/bsfs/utils/errors.py
@@ -41,4 +41,7 @@ class ConfigError(_BSFSError):
class BackendError(_BSFSError):
"""Could not parse an AST structure."""
+class UnsupportedError(_BSFSError):
+ """Some requested functionality is not supported by an implementation."""
+
## EOF ##
diff --git a/bsfs/utils/uuid.py b/bsfs/utils/uuid.py
index 6366b18..ba5cf52 100644
--- a/bsfs/utils/uuid.py
+++ b/bsfs/utils/uuid.py
@@ -7,6 +7,7 @@ Author: Matthias Baumgartner, 2022
# imports
from collections import abc
import hashlib
+import json
import os
import platform
import random
@@ -105,4 +106,10 @@ class UCID():
with open(path, 'rb') as ifile:
return HASH(ifile.read()).hexdigest()
+
+ @staticmethod
+ def from_dict(content: dict) -> str:
+ """Get the content from a dict."""
+ return HASH(json.dumps(content).encode('ascii', 'ignore')).hexdigest()
+
## EOF ##