From 1b7ef16c3795bb7112683662b8c22a774e219269 Mon Sep 17 00:00:00 2001 From: Matthias Baumgartner Date: Thu, 12 Jan 2023 16:57:58 +0100 Subject: schema to string --- bsfs/schema/schema.py | 2 + bsfs/schema/serialize.py | 104 +++++++++++++++++++++++++++++++++++++++++++++-- 2 files changed, 103 insertions(+), 3 deletions(-) (limited to 'bsfs') diff --git a/bsfs/schema/schema.py b/bsfs/schema/schema.py index 52ad191..bc50d4e 100644 --- a/bsfs/schema/schema.py +++ b/bsfs/schema/schema.py @@ -72,6 +72,8 @@ class Schema(): literals.add(types.ROOT_NUMBER) predicates.add(types.ROOT_FEATURE) + # FIXME: ensure that types derive from the right root? + # include parents in predicates set # TODO: review type annotations and ignores for python >= 3.11 (parents is _Type but should be typing.Self) predicates |= {par for pred in predicates for par in pred.parents()} # type: ignore [misc] diff --git a/bsfs/schema/serialize.py b/bsfs/schema/serialize.py index 0eb6628..a566d65 100644 --- a/bsfs/schema/serialize.py +++ b/bsfs/schema/serialize.py @@ -136,9 +136,107 @@ def from_string(schema_str: str) -> schema.Schema: -def to_string(schema_inst: schema.Schema) -> str: +def to_string(schema_inst: schema.Schema, fmt: str = 'turtle') -> str: + """Serialize a `bsfs.schema.Schema` to a string. + See `rdflib.Graph.serialize` for viable formats (default: turtle). """ - """ - raise NotImplementedError() + + # type of emitted triples. + T_TRIPLE = typing.Iterator[typing.Tuple[rdflib.URIRef, rdflib.URIRef, rdflib.term.Identifier]] + + def _type(tpe: types._Type) -> T_TRIPLE : + """Emit _Type properties (parent, annotations).""" + # emit parent + if tpe.parent is not None: + yield ( + rdflib.URIRef(tpe.uri), + rdflib.URIRef(ns.rdfs.subClassOf), + rdflib.URIRef(tpe.parent.uri), + ) + # emit annotations + for prop, value in tpe.annotations.items(): + yield ( + rdflib.URIRef(tpe.uri), + rdflib.URIRef(prop), + rdflib.Literal(value), # FIXME: datatype?! + ) + + def _predicate(pred: types.Predicate) -> T_TRIPLE: + """Emit Predicate properties (domain, range, unique).""" + # no need to emit anything for the root predicate + if pred == types.ROOT_PREDICATE: + return + # emit domain + if pred.domain != getattr(pred.parent, 'domain', None): + yield ( + rdflib.URIRef(pred.uri), + rdflib.URIRef(ns.rdfs.domain), + rdflib.URIRef(pred.domain.uri), + ) + # emit range + if pred.range != getattr(pred.parent, 'range', None): + yield ( + rdflib.URIRef(pred.uri), + rdflib.URIRef(ns.rdfs.range), + rdflib.URIRef(pred.range.uri), + ) + # emit cardinality + if pred.unique != getattr(pred.parent, 'unique', None): + yield ( + rdflib.URIRef(pred.uri), + rdflib.URIRef(ns.bsfs.unique), + rdflib.Literal(pred.unique, datatype=rdflib.XSD.boolean), + ) + + def _feature(feat: types.Feature) -> T_TRIPLE: + """Emit Feature properties (dimension, dtype, distance).""" + # emit size + if feat.dimension != getattr(feat.parent, 'dimension', None): + yield ( + rdflib.URIRef(feat.uri), + rdflib.URIRef(ns.bsfs.dimension), + rdflib.Literal(feat.dimension, datatype=rdflib.XSD.integer), + ) + # emit dtype + if feat.dtype != getattr(feat.parent, 'dtype', None): + yield ( + rdflib.URIRef(feat.uri), + rdflib.URIRef(ns.bsfs.dtype), + rdflib.URIRef(feat.dtype), + ) + # emit distance + if feat.distance != getattr(feat.parent, 'distance', None): + yield ( + rdflib.URIRef(feat.uri), + rdflib.URIRef(ns.bsfs.distance), + rdflib.URIRef(feat.distance), + ) + + def _parse(node: types._Type) -> T_TRIPLE: + """Emit all properties of a type.""" + if isinstance(node, types._Type): # pylint: disable=protected-access + # NOTE: all nodes are _Type + yield from _type(node) + if isinstance(node, types.Predicate): + yield from _predicate(node) + if isinstance(node, types.Feature): + yield from _feature(node) + + # create graph + graph = rdflib.Graph() + # add triples to graph + nodes = itertools.chain( + schema_inst.nodes(), + schema_inst.literals(), + schema_inst.predicates()) + for node in nodes: + for triple in _parse(node): + graph.add(triple) + # add known namespaces for readability + # FIXME: more systematically (e.g. for all in ns?) + graph.bind('bsfs', rdflib.URIRef('http://bsfs.ai/schema/')) + graph.bind('bse', rdflib.URIRef('http://bsfs.ai/schema/Entity#')) + # serialize to turtle + return graph.serialize(format=fmt) ## EOF ## -- cgit v1.2.3