diff options
-rw-r--r-- | bsfs/schema/schema.py | 2 | ||||
-rw-r--r-- | bsfs/schema/serialize.py | 104 | ||||
-rw-r--r-- | test/schema/test_serialize.py | 173 |
3 files changed, 274 insertions, 5 deletions
diff --git a/bsfs/schema/schema.py b/bsfs/schema/schema.py index 52ad191..bc50d4e 100644 --- a/bsfs/schema/schema.py +++ b/bsfs/schema/schema.py @@ -72,6 +72,8 @@ class Schema(): literals.add(types.ROOT_NUMBER) predicates.add(types.ROOT_FEATURE) + # FIXME: ensure that types derive from the right root? + # include parents in predicates set # TODO: review type annotations and ignores for python >= 3.11 (parents is _Type but should be typing.Self) predicates |= {par for pred in predicates for par in pred.parents()} # type: ignore [misc] diff --git a/bsfs/schema/serialize.py b/bsfs/schema/serialize.py index 0eb6628..a566d65 100644 --- a/bsfs/schema/serialize.py +++ b/bsfs/schema/serialize.py @@ -136,9 +136,107 @@ def from_string(schema_str: str) -> schema.Schema: -def to_string(schema_inst: schema.Schema) -> str: +def to_string(schema_inst: schema.Schema, fmt: str = 'turtle') -> str: + """Serialize a `bsfs.schema.Schema` to a string. + See `rdflib.Graph.serialize` for viable formats (default: turtle). """ - """ - raise NotImplementedError() + + # type of emitted triples. + T_TRIPLE = typing.Iterator[typing.Tuple[rdflib.URIRef, rdflib.URIRef, rdflib.term.Identifier]] + + def _type(tpe: types._Type) -> T_TRIPLE : + """Emit _Type properties (parent, annotations).""" + # emit parent + if tpe.parent is not None: + yield ( + rdflib.URIRef(tpe.uri), + rdflib.URIRef(ns.rdfs.subClassOf), + rdflib.URIRef(tpe.parent.uri), + ) + # emit annotations + for prop, value in tpe.annotations.items(): + yield ( + rdflib.URIRef(tpe.uri), + rdflib.URIRef(prop), + rdflib.Literal(value), # FIXME: datatype?! + ) + + def _predicate(pred: types.Predicate) -> T_TRIPLE: + """Emit Predicate properties (domain, range, unique).""" + # no need to emit anything for the root predicate + if pred == types.ROOT_PREDICATE: + return + # emit domain + if pred.domain != getattr(pred.parent, 'domain', None): + yield ( + rdflib.URIRef(pred.uri), + rdflib.URIRef(ns.rdfs.domain), + rdflib.URIRef(pred.domain.uri), + ) + # emit range + if pred.range != getattr(pred.parent, 'range', None): + yield ( + rdflib.URIRef(pred.uri), + rdflib.URIRef(ns.rdfs.range), + rdflib.URIRef(pred.range.uri), + ) + # emit cardinality + if pred.unique != getattr(pred.parent, 'unique', None): + yield ( + rdflib.URIRef(pred.uri), + rdflib.URIRef(ns.bsfs.unique), + rdflib.Literal(pred.unique, datatype=rdflib.XSD.boolean), + ) + + def _feature(feat: types.Feature) -> T_TRIPLE: + """Emit Feature properties (dimension, dtype, distance).""" + # emit size + if feat.dimension != getattr(feat.parent, 'dimension', None): + yield ( + rdflib.URIRef(feat.uri), + rdflib.URIRef(ns.bsfs.dimension), + rdflib.Literal(feat.dimension, datatype=rdflib.XSD.integer), + ) + # emit dtype + if feat.dtype != getattr(feat.parent, 'dtype', None): + yield ( + rdflib.URIRef(feat.uri), + rdflib.URIRef(ns.bsfs.dtype), + rdflib.URIRef(feat.dtype), + ) + # emit distance + if feat.distance != getattr(feat.parent, 'distance', None): + yield ( + rdflib.URIRef(feat.uri), + rdflib.URIRef(ns.bsfs.distance), + rdflib.URIRef(feat.distance), + ) + + def _parse(node: types._Type) -> T_TRIPLE: + """Emit all properties of a type.""" + if isinstance(node, types._Type): # pylint: disable=protected-access + # NOTE: all nodes are _Type + yield from _type(node) + if isinstance(node, types.Predicate): + yield from _predicate(node) + if isinstance(node, types.Feature): + yield from _feature(node) + + # create graph + graph = rdflib.Graph() + # add triples to graph + nodes = itertools.chain( + schema_inst.nodes(), + schema_inst.literals(), + schema_inst.predicates()) + for node in nodes: + for triple in _parse(node): + graph.add(triple) + # add known namespaces for readability + # FIXME: more systematically (e.g. for all in ns?) + graph.bind('bsfs', rdflib.URIRef('http://bsfs.ai/schema/')) + graph.bind('bse', rdflib.URIRef('http://bsfs.ai/schema/Entity#')) + # serialize to turtle + return graph.serialize(format=fmt) ## EOF ## diff --git a/test/schema/test_serialize.py b/test/schema/test_serialize.py index f46b3a4..205150a 100644 --- a/test/schema/test_serialize.py +++ b/test/schema/test_serialize.py @@ -5,6 +5,7 @@ A copy of the license is provided with the project. Author: Matthias Baumgartner, 2022 """ # imports +import re import unittest # bsfs imports @@ -997,8 +998,176 @@ class TestFromString(unittest.TestCase): class TestToString(unittest.TestCase): - def test_stub(self): - raise NotImplementedError() + + def test_empty(self): + self.assertEqual(Schema(), from_string(to_string(Schema()))) + + def test_literal(self): + # root literals + l_str = types.ROOT_LITERAL.child(ns.xsd.string) + # derived literals + l_int = types.ROOT_NUMBER.child(ns.xsd.integer) + l_unsigned = l_int.child(ns.xsd.unsigned) + # create schema + schema = Schema(literals={l_int, l_str, l_unsigned}) + + schema_str = to_string(schema) + # all symbols are serialized + self.assertIn('xsd:string', schema_str) + self.assertIn('xsd:integer', schema_str) + self.assertIn('xsd:unsigned', schema_str) + # unserialize yields the original schema + self.assertEqual(schema, from_string(schema_str)) + + # literals that have no parent are ignored + schema = Schema(literals={types.Literal(ns.bsfs.Invalid, None)}) + self.assertEqual(Schema(), from_string(to_string(schema))) + self.assertNotIn('Invalid', to_string(schema)) + + # literal annotations are serialized + annotations = { + ns.rdfs.label: 'hello world', + ns.schema.description: 'some text', + ns.bsfs.foo: 1234, + ns.bsfs.bar: True, + } + l_str = types.ROOT_LITERAL.child(ns.xsd.string, **annotations) + self.assertDictEqual( + annotations, + from_string(to_string(Schema(literals={l_str}))).literal(ns.xsd.string).annotations) + + + def test_node(self): + # root nodes + n_ent = types.ROOT_NODE.child(ns.bsfs.Entity) + n_tag = types.ROOT_NODE.child(ns.bsfs.Tag) + # derived nodes + n_img = n_ent.child(ns.bsfs.Image) + n_doc = n_ent.child(ns.bsfs.Document) + n_grp = n_tag.child(ns.bsfs.Group) + # create schema + schema = Schema(nodes={n_ent, n_img, n_doc, n_tag, n_grp}) + + schema_str = to_string(schema) + # all symbols are serialized + self.assertIn('bsfs:Entity', schema_str) + self.assertIn('bsfs:Tag', schema_str) + self.assertIn('bsfs:Image', schema_str) + self.assertIn('bsfs:Document', schema_str) + self.assertIn('bsfs:Group', schema_str) + # unserialize yields the original schema + self.assertEqual(schema, from_string(schema_str)) + + # nodes that have no parent are ignored + schema = Schema(nodes={types.Node(ns.bsfs.Invalid, None)}) + self.assertEqual(Schema(), from_string(to_string(schema))) + self.assertNotIn('Invalid', to_string(schema)) + + # node annotations are serialized + annotations = { + ns.rdfs.label: 'hello world', + ns.schema.description: 'some text', + ns.bsfs.foo: 1234, + ns.bsfs.bar: True, + } + n_ent = types.ROOT_NODE.child(ns.bsfs.Entity, **annotations) + self.assertDictEqual( + annotations, + from_string(to_string(Schema(nodes={n_ent}))).node(ns.bsfs.Entity).annotations) + + + def test_predicate(self): + # auxiliary types + n_ent = types.ROOT_NODE.child(ns.bsfs.Entity) + l_str = types.ROOT_LITERAL.child(ns.xsd.string) + # root predicates + p_annotation = types.ROOT_PREDICATE.child(ns.bsfs.Annotation, domain=n_ent) + p_owner = types.ROOT_PREDICATE.child(ns.bse.owner, range=l_str, unique=True) + # derived predicates + p_comment = p_annotation.child(ns.bse.comment, range=l_str) # inherits domain + p_note = p_comment.child(ns.bse.note, unique=True) # inherits domain/range + # create schema + schema = Schema({p_owner, p_comment, p_note}) + + schema_str = to_string(schema) + # all symbols are serialized + self.assertIn('bsfs:Entity', schema_str) + self.assertIn('xsd:string', schema_str) + self.assertIn('bsfs:Annotation', schema_str) + self.assertIn('bse:comment', schema_str) + self.assertIn('bse:owner', schema_str) + self.assertIn('bse:note', schema_str) + # inherited properties are not serialized + self.assertIsNotNone(re.search(r'bse:comment[^\.]*rdfs:range[^\.]', schema_str)) + self.assertIsNone(re.search(r'bse:comment[^\.]*rdfs:domain[^\.]', schema_str)) + #p_note has no domain/range + self.assertIsNone(re.search(r'bse:note[^\.]*rdfs:domain[^\.]', schema_str)) + self.assertIsNone(re.search(r'bse:note[^\.]*rdfs:range[^\.]', schema_str)) + # unserialize yields the original schema + self.assertEqual(schema, from_string(schema_str)) + + # predicate annotations are serialized + annotations = { + ns.rdfs.label: 'hello world', + ns.schema.description: 'some text', + ns.bsfs.foo: 1234, + ns.bsfs.bar: False, + } + p_annotation = types.ROOT_PREDICATE.child(ns.bsfs.Annotation, **annotations) + self.assertDictEqual( + annotations, + from_string(to_string(Schema({p_annotation}))).predicate(ns.bsfs.Annotation).annotations) + + + def test_feature(self): + # auxiliary types + n_ent = types.ROOT_NODE.child(ns.bsfs.Entity) + l_array = types.ROOT_LITERAL.child(ns.bsfs.array) + # root features + f_colors = types.ROOT_FEATURE.child(URI('http://bsfs.ai/schema/Feature/colors'), + range=l_array, unique=True, distance=ns.bsfs.cosine) + # derived features + f_colors1234 = f_colors.child(URI('http://bsfs.ai/schema/Feature/colors#1234'), + dimension=1024, domain=n_ent) # inherits range/dtype/distance + f_colors4321 = f_colors.child(URI('http://bsfs.ai/schema/Feature/colors#4321'), + dimension=2048, distance=ns.bsfs.euclidean) # inherits domain/range/dtype + # create schema + schema = Schema({f_colors, f_colors1234, f_colors4321}) + + schema_str = to_string(schema) + # all symbols are serialized + self.assertIn('bsfs:Entity', schema_str) + self.assertIn('bsfs:array', schema_str) + self.assertIn('<http://bsfs.ai/schema/Feature/colors', schema_str) + self.assertIn('<http://bsfs.ai/schema/Feature/colors#1234', schema_str) + self.assertIn('<http://bsfs.ai/schema/Feature/colors#4321', schema_str) + # inherited properties are not serialized + self.assertIsNotNone(re.search(r'<http://bsfs\.ai/schema/Feature/colors#1234>[^\.]*rdfs:domain[^\.]', schema_str)) + self.assertIsNotNone(re.search(r'<http://bsfs\.ai/schema/Feature/colors#1234>[^\.]*bsfs:dimension[^\.]', schema_str)) + self.assertIsNone(re.search(r'<http://bsfs\.ai/schema/Feature/colors#1234>[^\.]*rdfs:range[^\.]', schema_str)) + self.assertIsNone(re.search(r'<http://bsfs\.ai/schema/Feature/colors#1234>[^\.]*bsfs:dtype[^\.]', schema_str)) + self.assertIsNone(re.search(r'<http://bsfs\.ai/schema/Feature/colors#1234>[^\.]*bsfs:distance[^\.]', schema_str)) + self.assertIsNotNone(re.search(r'<http://bsfs\.ai/schema/Feature/colors#4321>[^\.]*bsfs:dimension[^\.]', schema_str)) + self.assertIsNotNone(re.search(r'<http://bsfs\.ai/schema/Feature/colors#4321>[^\.]*bsfs:distance[^\.]', schema_str)) + self.assertIsNone(re.search(r'<http://bsfs\.ai/schema/Feature/colors#4321>[^\.]*rdfs:domain[^\.]', schema_str)) + self.assertIsNone(re.search(r'<http://bsfs\.ai/schema/Feature/colors#4321>[^\.]*rdfs:range[^\.]', schema_str)) + self.assertIsNone(re.search(r'<http://bsfs\.ai/schema/Feature/colors#4321>[^\.]*bsfs:dtype[^\.]', schema_str)) + # unserialize yields the original schema + self.assertEqual(schema, from_string(schema_str)) + + # predicate annotations are serialized + annotations = { + ns.rdfs.label: 'hello world', + ns.schema.description: 'some text', + ns.bsfs.foo: 1234, + ns.bsfs.bar: False, + } + f_colors = types.ROOT_FEATURE.child(URI('http://bsfs.ai/schema/Feature/colors'), + domain=n_ent, range=l_array, unique=True, dtype=ns.bsfs.f16, distance=ns.bsfs.euclidean, + **annotations) + self.assertDictEqual( + annotations, + from_string(to_string(Schema({f_colors}))).predicate(URI('http://bsfs.ai/schema/Feature/colors')).annotations) ## main ## |