aboutsummaryrefslogtreecommitdiffstats
path: root/bsfs/schema/schema.py
diff options
context:
space:
mode:
authorMatthias Baumgartner <dev@igsor.net>2023-01-11 21:20:47 +0100
committerMatthias Baumgartner <dev@igsor.net>2023-01-11 21:20:47 +0100
commitc664d19e7d4a0aa0762c30a72ae238cf818891ab (patch)
tree93349de711a18cff8329745af22710738b933cdc /bsfs/schema/schema.py
parent7f5a2920ef311b2077300714d7700313077a0bf6 (diff)
downloadbsfs-c664d19e7d4a0aa0762c30a72ae238cf818891ab.tar.gz
bsfs-c664d19e7d4a0aa0762c30a72ae238cf818891ab.tar.bz2
bsfs-c664d19e7d4a0aa0762c30a72ae238cf818891ab.zip
Feature support in the schema
* Type annotations * Feature type * Moved from_string from Schema to its own file/function * Root predicate has a valid (not-None) range * ROOT_... export in schema.types * Empty as the default Schema constructor * Schema loads some additional default symbols * _Type instances compare along class hierarchy
Diffstat (limited to 'bsfs/schema/schema.py')
-rw-r--r--bsfs/schema/schema.py111
1 files changed, 19 insertions, 92 deletions
diff --git a/bsfs/schema/schema.py b/bsfs/schema/schema.py
index c5d4571..1c4c807 100644
--- a/bsfs/schema/schema.py
+++ b/bsfs/schema/schema.py
@@ -51,11 +51,13 @@ class Schema():
def __init__(
self,
- predicates: typing.Iterable[types.Predicate],
+ predicates: typing.Optional[typing.Iterable[types.Predicate]] = None,
nodes: typing.Optional[typing.Iterable[types.Node]] = None,
literals: typing.Optional[typing.Iterable[types.Literal]] = None,
):
# materialize arguments
+ if predicates is None:
+ predicates = set()
if nodes is None:
nodes = set()
if literals is None:
@@ -63,24 +65,36 @@ class Schema():
nodes = set(nodes)
literals = set(literals)
predicates = set(predicates)
+
+ # add root types to the schema
+ nodes.add(types.ROOT_NODE)
+ literals.add(types.ROOT_LITERAL)
+ predicates.add(types.ROOT_PREDICATE)
+ # add minimally necessary types to the schema
+ literals.add(types.ROOT_NUMBER)
+ predicates.add(types.ROOT_FEATURE)
+
# include parents in predicates set
# TODO: review type annotations and ignores for python >= 3.11 (parents is _Type but should be typing.Self)
predicates |= {par for pred in predicates for par in pred.parents()} # type: ignore [misc]
# include predicate domain in nodes set
nodes |= {pred.domain for pred in predicates}
# include predicate range in nodes and literals sets
- prange = {pred.range for pred in predicates if pred.range is not None}
+ prange = {pred.range for pred in predicates}
nodes |= {vert for vert in prange if isinstance(vert, types.Node)}
literals |= {vert for vert in prange if isinstance(vert, types.Literal)}
+ # NOTE: ROOT_PREDICATE has a _Vertex as range which is neither in nodes nor literals
+ # FIXME: with the ROOT_VERTEX missing, the schema is not complete anymore!
+
# include parents in nodes and literals sets
- # NOTE: Must be done after predicate domain/range was handled
- # so that their parents are included as well.
+ # NOTE: Must come after predicate domain/range was handled to have their parents as well.
nodes |= {par for node in nodes for par in node.parents()} # type: ignore [misc]
literals |= {par for lit in literals for par in lit.parents()} # type: ignore [misc]
# assign members
self._nodes = {node.uri: node for node in nodes}
self._literals = {lit.uri: lit for lit in literals}
self._predicates = {pred.uri: pred for pred in predicates}
+
# verify unique uris
if len(nodes) != len(self._nodes):
raise errors.ConsistencyError('inconsistent nodes')
@@ -214,6 +228,7 @@ class Schema():
>>> Schema.Union([a, b, c])
"""
+ # FIXME: copy type annotations?
if len(args) == 0:
raise TypeError('Schema.Union requires at least one argument (Schema or Iterable)')
if isinstance(args[0], cls): # args is sequence of Schema instances
@@ -295,92 +310,4 @@ class Schema():
"""Return the Literal matching the *uri*."""
return self._literals[uri]
-
- ## constructors ##
-
-
- @classmethod
- def Empty(cls) -> 'Schema': # pylint: disable=invalid-name # capitalized classmethod
- """Return a minimal Schema."""
- node = types.Node(ns.bsfs.Node, None)
- literal = types.Literal(ns.bsfs.Literal, None)
- predicate = types.Predicate(
- uri=ns.bsfs.Predicate,
- parent=None,
- domain=node,
- range=None,
- unique=False,
- )
- return cls((predicate, ), (node, ), (literal, ))
-
-
- @classmethod
- def from_string(cls, schema: str) -> 'Schema': # pylint: disable=invalid-name # capitalized classmethod
- """Load and return a Schema from a string."""
- # parse string into rdf graph
- graph = rdflib.Graph()
- graph.parse(data=schema, format='turtle')
-
- def _fetch_hierarchically(factory, curr):
- # emit current node
- yield curr
- # walk through childs
- for child in graph.subjects(rdflib.URIRef(ns.rdfs.subClassOf), rdflib.URIRef(curr.uri)):
- # convert to URI
- child = URI(child)
- # check circular dependency
- if child == curr.uri or child in {node.uri for node in curr.parents()}:
- raise errors.ConsistencyError('circular dependency')
- # recurse and emit (sub*)childs
- yield from _fetch_hierarchically(factory, factory(child, curr))
-
- # fetch nodes
- nodes = set(_fetch_hierarchically(types.Node, types.Node(ns.bsfs.Node, None)))
- nodes_lut = {node.uri: node for node in nodes}
- if len(nodes_lut) != len(nodes):
- raise errors.ConsistencyError('inconsistent nodes')
-
- # fetch literals
- literals = set(_fetch_hierarchically(types.Literal, types.Literal(ns.bsfs.Literal, None)))
- literals_lut = {lit.uri: lit for lit in literals}
- if len(literals_lut) != len(literals):
- raise errors.ConsistencyError('inconsistent literals')
-
- # fetch predicates
- def build_predicate(uri, parent):
- uri = rdflib.URIRef(uri)
- # get domain
- domains = set(graph.objects(uri, rdflib.RDFS.domain))
- if len(domains) != 1:
- raise errors.ConsistencyError(f'inconsistent domain: {domains}')
- dom = nodes_lut.get(next(iter(domains)))
- if dom is None:
- raise errors.ConsistencyError('missing domain')
- # get range
- ranges = set(graph.objects(uri, rdflib.RDFS.range))
- if len(ranges) != 1:
- raise errors.ConsistencyError(f'inconsistent range: {ranges}')
- rng = next(iter(ranges))
- rng = nodes_lut.get(rng, literals_lut.get(rng))
- if rng is None:
- raise errors.ConsistencyError('missing range')
- # get unique flag
- uniques = set(graph.objects(uri, rdflib.URIRef(ns.bsfs.unique)))
- if len(uniques) != 1:
- raise errors.ConsistencyError(f'inconsistent unique flags: {uniques}')
- unique = bool(next(iter(uniques)))
- # build Predicate
- return types.Predicate(URI(uri), parent, dom, rng, unique)
-
- root_predicate = types.Predicate(
- uri=ns.bsfs.Predicate,
- parent=None,
- domain=nodes_lut[ns.bsfs.Node],
- range=None, # FIXME: Unclear how to handle this! Can be either a Literal or a Node
- unique=False,
- )
- predicates = _fetch_hierarchically(build_predicate, root_predicate)
- # return Schema
- return cls(predicates, nodes, literals)
-
## EOF ##