aboutsummaryrefslogtreecommitdiffstats
path: root/bsfs/schema/serialize.py
diff options
context:
space:
mode:
Diffstat (limited to 'bsfs/schema/serialize.py')
-rw-r--r--bsfs/schema/serialize.py143
1 files changed, 143 insertions, 0 deletions
diff --git a/bsfs/schema/serialize.py b/bsfs/schema/serialize.py
new file mode 100644
index 0000000..1222aa6
--- /dev/null
+++ b/bsfs/schema/serialize.py
@@ -0,0 +1,143 @@
+"""
+
+Part of the BlackStar filesystem (bsfs) module.
+A copy of the license is provided with the project.
+Author: Matthias Baumgartner, 2022
+"""
+# standard imports
+from collections import abc
+import itertools
+import typing
+
+# external imports
+import rdflib
+
+# bsfs imports
+from bsfs.namespace import ns
+from bsfs.utils import errors, URI, typename
+
+# inner-module imports
+from . import types
+from . import schema
+
+# exports
+__all__: typing.Sequence[str] = (
+ 'to_string',
+ 'from_string',
+ )
+
+
+## code ##
+
+def from_string(schema_str: str) -> schema.Schema:
+ """Load and return a Schema from a string."""
+ # parse string into rdf graph
+ graph = rdflib.Graph()
+ graph.parse(data=schema_str, format='turtle')
+
+ # helper functions
+ def _convert(value):
+ """Convert the subject type from rdflib to a bsfs native type."""
+ if isinstance(value, rdflib.Literal):
+ return value.value
+ if isinstance(value, rdflib.URIRef):
+ return URI(value)
+ raise errors.BackendError(f'expected Literal or URIRef, found {typename(value)}')
+
+ def _fetch_hierarchically(factory, curr):
+ """Walk through a rdfs:subClassOf hierarchy, creating symbols along the way."""
+ # emit current node
+ yield curr
+ # walk through childs
+ for child in graph.subjects(rdflib.URIRef(ns.rdfs.subClassOf), rdflib.URIRef(curr.uri)):
+ # fetch annotations
+ annotations = {
+ URI(pred): _convert(value)
+ for pred, value # FIXME: preserve datatype of value?!
+ in graph.predicate_objects(child)
+ if URI(pred) != ns.rdfs.subClassOf
+ }
+ # convert child to URI
+ child = URI(child)
+ # check circular dependency
+ if child == curr.uri or child in {node.uri for node in curr.parents()}:
+ raise errors.ConsistencyError('circular dependency')
+ # recurse and emit (sub*)childs
+ yield from _fetch_hierarchically(factory, factory(child, curr, **annotations))
+
+ # fetch nodes
+ nodes = set(_fetch_hierarchically(types.Node, types.ROOT_NODE))
+ nodes_lut = {node.uri: node for node in nodes}
+ if len(nodes_lut) != len(nodes):
+ raise errors.ConsistencyError('inconsistent nodes')
+
+ # fetch literals
+ literals = set(_fetch_hierarchically(types.Literal, types.ROOT_LITERAL))
+ literals_lut = {lit.uri: lit for lit in literals}
+ if len(literals_lut) != len(literals):
+ raise errors.ConsistencyError('inconsistent literals')
+
+ # fetch predicates
+ # FIXME: type annotation
+ def _fetch_value(subject: URI, predicate: rdflib.URIRef, value_factory) -> typing.Optional[typing.Any]:
+ """Fetch the object of a given subject and predicate. Raises a `errors.ConsistencyError` if multiple objects match."""
+ values = list(graph.objects(rdflib.URIRef(subject), predicate))
+ if len(values) == 0:
+ return None
+ elif len(values) == 1:
+ return value_factory(values[0])
+ else:
+ raise errors.ConsistencyError(f'{subject} has multiple values for predicate {str(predicate)}, expected zero or one')
+
+ def _build_predicate(uri, parent, **annotations):
+ """Predicate factory."""
+ # break out on root feature type
+ if uri == types.ROOT_FEATURE.uri:
+ return types.ROOT_FEATURE
+ # clean annotations
+ annotations.pop(ns.rdfs.domain, None)
+ annotations.pop(ns.rdfs.range, None)
+ annotations.pop(ns.bsfs.unique, None)
+ # get domain
+ dom = _fetch_value(uri, rdflib.RDFS.domain, URI)
+ if dom is not None and dom not in nodes_lut:
+ raise errors.ConsistencyError(f'predicate {uri} has undefined domain {dom}')
+ elif dom is not None:
+ dom = nodes_lut[dom]
+ # get range
+ rng = _fetch_value(uri, rdflib.RDFS.range, URI)
+ if rng is not None and rng not in nodes_lut and rng not in literals_lut:
+ raise errors.ConsistencyError(f'predicate {uri} has undefined range {rng}')
+ elif rng is not None:
+ rng = nodes_lut.get(rng, literals_lut.get(rng))
+ # get unique
+ unique = _fetch_value(uri, rdflib.URIRef(ns.bsfs.unique), bool)
+ # handle feature types
+ if isinstance(parent, types.Feature):
+ # clean annotations
+ annotations.pop(ns.bsfs.dimension, None)
+ annotations.pop(ns.bsfs.dtype, None)
+ annotations.pop(ns.bsfs.distance, None)
+ # get dimension
+ dimension = _fetch_value(uri, rdflib.URIRef(ns.bsfs.dimension), int)
+ # get dtype
+ dtype = _fetch_value(uri, rdflib.URIRef(ns.bsfs.dtype), URI)
+ # get distance
+ distance = _fetch_value(uri, rdflib.URIRef(ns.bsfs.distance), URI)
+ # return feature
+ return parent.get_child(URI(uri), domain=dom, range=rng, unique=unique,
+ dtype=dtype, dimension=dimension, distance=distance, **annotations)
+ # handle non-feature predicate
+ return parent.get_child(URI(uri), domain=dom, range=rng, unique=unique, **annotations)
+ predicates = _fetch_hierarchically(_build_predicate, types.ROOT_PREDICATE)
+
+ return schema.Schema(predicates, nodes, literals)
+
+
+
+def to_string(schema_inst: schema.Schema) -> str:
+ """
+ """
+ raise NotImplementedError()
+
+## EOF ##