diff options
Diffstat (limited to 'bsfs/schema/schema.py')
-rw-r--r-- | bsfs/schema/schema.py | 117 |
1 files changed, 23 insertions, 94 deletions
diff --git a/bsfs/schema/schema.py b/bsfs/schema/schema.py index c5d4571..8d9a821 100644 --- a/bsfs/schema/schema.py +++ b/bsfs/schema/schema.py @@ -7,10 +7,8 @@ Author: Matthias Baumgartner, 2022 # imports from collections import abc, namedtuple import typing -import rdflib # bsfs imports -from bsfs.namespace import ns from bsfs.utils import errors, URI, typename # inner-module imports @@ -51,11 +49,13 @@ class Schema(): def __init__( self, - predicates: typing.Iterable[types.Predicate], + predicates: typing.Optional[typing.Iterable[types.Predicate]] = None, nodes: typing.Optional[typing.Iterable[types.Node]] = None, literals: typing.Optional[typing.Iterable[types.Literal]] = None, ): # materialize arguments + if predicates is None: + predicates = set() if nodes is None: nodes = set() if literals is None: @@ -63,24 +63,40 @@ class Schema(): nodes = set(nodes) literals = set(literals) predicates = set(predicates) + + # add root types to the schema + nodes.add(types.ROOT_NODE) + literals.add(types.ROOT_LITERAL) + predicates.add(types.ROOT_PREDICATE) + # add minimally necessary types to the schema + literals.add(types.ROOT_NUMBER) + literals.add(types.ROOT_TIME) + literals.add(types.ROOT_ARRAY) + literals.add(types.ROOT_FEATURE) + + # FIXME: ensure that types derive from the right root? + # include parents in predicates set # TODO: review type annotations and ignores for python >= 3.11 (parents is _Type but should be typing.Self) predicates |= {par for pred in predicates for par in pred.parents()} # type: ignore [misc] # include predicate domain in nodes set nodes |= {pred.domain for pred in predicates} # include predicate range in nodes and literals sets - prange = {pred.range for pred in predicates if pred.range is not None} + prange = {pred.range for pred in predicates} nodes |= {vert for vert in prange if isinstance(vert, types.Node)} literals |= {vert for vert in prange if isinstance(vert, types.Literal)} + # NOTE: ROOT_PREDICATE has a Vertex as range which is neither in nodes nor literals + # FIXME: with the ROOT_VERTEX missing, the schema is not complete anymore! + # include parents in nodes and literals sets - # NOTE: Must be done after predicate domain/range was handled - # so that their parents are included as well. + # NOTE: Must come after predicate domain/range was handled to have their parents as well. nodes |= {par for node in nodes for par in node.parents()} # type: ignore [misc] literals |= {par for lit in literals for par in lit.parents()} # type: ignore [misc] # assign members self._nodes = {node.uri: node for node in nodes} self._literals = {lit.uri: lit for lit in literals} self._predicates = {pred.uri: pred for pred in predicates} + # verify unique uris if len(nodes) != len(self._nodes): raise errors.ConsistencyError('inconsistent nodes') @@ -214,6 +230,7 @@ class Schema(): >>> Schema.Union([a, b, c]) """ + # FIXME: copy type annotations? if len(args) == 0: raise TypeError('Schema.Union requires at least one argument (Schema or Iterable)') if isinstance(args[0], cls): # args is sequence of Schema instances @@ -295,92 +312,4 @@ class Schema(): """Return the Literal matching the *uri*.""" return self._literals[uri] - - ## constructors ## - - - @classmethod - def Empty(cls) -> 'Schema': # pylint: disable=invalid-name # capitalized classmethod - """Return a minimal Schema.""" - node = types.Node(ns.bsfs.Node, None) - literal = types.Literal(ns.bsfs.Literal, None) - predicate = types.Predicate( - uri=ns.bsfs.Predicate, - parent=None, - domain=node, - range=None, - unique=False, - ) - return cls((predicate, ), (node, ), (literal, )) - - - @classmethod - def from_string(cls, schema: str) -> 'Schema': # pylint: disable=invalid-name # capitalized classmethod - """Load and return a Schema from a string.""" - # parse string into rdf graph - graph = rdflib.Graph() - graph.parse(data=schema, format='turtle') - - def _fetch_hierarchically(factory, curr): - # emit current node - yield curr - # walk through childs - for child in graph.subjects(rdflib.URIRef(ns.rdfs.subClassOf), rdflib.URIRef(curr.uri)): - # convert to URI - child = URI(child) - # check circular dependency - if child == curr.uri or child in {node.uri for node in curr.parents()}: - raise errors.ConsistencyError('circular dependency') - # recurse and emit (sub*)childs - yield from _fetch_hierarchically(factory, factory(child, curr)) - - # fetch nodes - nodes = set(_fetch_hierarchically(types.Node, types.Node(ns.bsfs.Node, None))) - nodes_lut = {node.uri: node for node in nodes} - if len(nodes_lut) != len(nodes): - raise errors.ConsistencyError('inconsistent nodes') - - # fetch literals - literals = set(_fetch_hierarchically(types.Literal, types.Literal(ns.bsfs.Literal, None))) - literals_lut = {lit.uri: lit for lit in literals} - if len(literals_lut) != len(literals): - raise errors.ConsistencyError('inconsistent literals') - - # fetch predicates - def build_predicate(uri, parent): - uri = rdflib.URIRef(uri) - # get domain - domains = set(graph.objects(uri, rdflib.RDFS.domain)) - if len(domains) != 1: - raise errors.ConsistencyError(f'inconsistent domain: {domains}') - dom = nodes_lut.get(next(iter(domains))) - if dom is None: - raise errors.ConsistencyError('missing domain') - # get range - ranges = set(graph.objects(uri, rdflib.RDFS.range)) - if len(ranges) != 1: - raise errors.ConsistencyError(f'inconsistent range: {ranges}') - rng = next(iter(ranges)) - rng = nodes_lut.get(rng, literals_lut.get(rng)) - if rng is None: - raise errors.ConsistencyError('missing range') - # get unique flag - uniques = set(graph.objects(uri, rdflib.URIRef(ns.bsfs.unique))) - if len(uniques) != 1: - raise errors.ConsistencyError(f'inconsistent unique flags: {uniques}') - unique = bool(next(iter(uniques))) - # build Predicate - return types.Predicate(URI(uri), parent, dom, rng, unique) - - root_predicate = types.Predicate( - uri=ns.bsfs.Predicate, - parent=None, - domain=nodes_lut[ns.bsfs.Node], - range=None, # FIXME: Unclear how to handle this! Can be either a Literal or a Node - unique=False, - ) - predicates = _fetch_hierarchically(build_predicate, root_predicate) - # return Schema - return cls(predicates, nodes, literals) - ## EOF ## |