aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorMatthias Baumgartner <dev@igsor.net>2023-01-12 16:57:58 +0100
committerMatthias Baumgartner <dev@igsor.net>2023-01-12 16:57:58 +0100
commit1b7ef16c3795bb7112683662b8c22a774e219269 (patch)
treeb460e60adb8eb2f93070beb153be15fda6f9cca0
parente708016ae366e96051281f3a744af35a8c06d98b (diff)
downloadbsfs-1b7ef16c3795bb7112683662b8c22a774e219269.tar.gz
bsfs-1b7ef16c3795bb7112683662b8c22a774e219269.tar.bz2
bsfs-1b7ef16c3795bb7112683662b8c22a774e219269.zip
schema to string
-rw-r--r--bsfs/schema/schema.py2
-rw-r--r--bsfs/schema/serialize.py104
-rw-r--r--test/schema/test_serialize.py173
3 files changed, 274 insertions, 5 deletions
diff --git a/bsfs/schema/schema.py b/bsfs/schema/schema.py
index 52ad191..bc50d4e 100644
--- a/bsfs/schema/schema.py
+++ b/bsfs/schema/schema.py
@@ -72,6 +72,8 @@ class Schema():
literals.add(types.ROOT_NUMBER)
predicates.add(types.ROOT_FEATURE)
+ # FIXME: ensure that types derive from the right root?
+
# include parents in predicates set
# TODO: review type annotations and ignores for python >= 3.11 (parents is _Type but should be typing.Self)
predicates |= {par for pred in predicates for par in pred.parents()} # type: ignore [misc]
diff --git a/bsfs/schema/serialize.py b/bsfs/schema/serialize.py
index 0eb6628..a566d65 100644
--- a/bsfs/schema/serialize.py
+++ b/bsfs/schema/serialize.py
@@ -136,9 +136,107 @@ def from_string(schema_str: str) -> schema.Schema:
-def to_string(schema_inst: schema.Schema) -> str:
+def to_string(schema_inst: schema.Schema, fmt: str = 'turtle') -> str:
+ """Serialize a `bsfs.schema.Schema` to a string.
+ See `rdflib.Graph.serialize` for viable formats (default: turtle).
"""
- """
- raise NotImplementedError()
+
+ # type of emitted triples.
+ T_TRIPLE = typing.Iterator[typing.Tuple[rdflib.URIRef, rdflib.URIRef, rdflib.term.Identifier]]
+
+ def _type(tpe: types._Type) -> T_TRIPLE :
+ """Emit _Type properties (parent, annotations)."""
+ # emit parent
+ if tpe.parent is not None:
+ yield (
+ rdflib.URIRef(tpe.uri),
+ rdflib.URIRef(ns.rdfs.subClassOf),
+ rdflib.URIRef(tpe.parent.uri),
+ )
+ # emit annotations
+ for prop, value in tpe.annotations.items():
+ yield (
+ rdflib.URIRef(tpe.uri),
+ rdflib.URIRef(prop),
+ rdflib.Literal(value), # FIXME: datatype?!
+ )
+
+ def _predicate(pred: types.Predicate) -> T_TRIPLE:
+ """Emit Predicate properties (domain, range, unique)."""
+ # no need to emit anything for the root predicate
+ if pred == types.ROOT_PREDICATE:
+ return
+ # emit domain
+ if pred.domain != getattr(pred.parent, 'domain', None):
+ yield (
+ rdflib.URIRef(pred.uri),
+ rdflib.URIRef(ns.rdfs.domain),
+ rdflib.URIRef(pred.domain.uri),
+ )
+ # emit range
+ if pred.range != getattr(pred.parent, 'range', None):
+ yield (
+ rdflib.URIRef(pred.uri),
+ rdflib.URIRef(ns.rdfs.range),
+ rdflib.URIRef(pred.range.uri),
+ )
+ # emit cardinality
+ if pred.unique != getattr(pred.parent, 'unique', None):
+ yield (
+ rdflib.URIRef(pred.uri),
+ rdflib.URIRef(ns.bsfs.unique),
+ rdflib.Literal(pred.unique, datatype=rdflib.XSD.boolean),
+ )
+
+ def _feature(feat: types.Feature) -> T_TRIPLE:
+ """Emit Feature properties (dimension, dtype, distance)."""
+ # emit size
+ if feat.dimension != getattr(feat.parent, 'dimension', None):
+ yield (
+ rdflib.URIRef(feat.uri),
+ rdflib.URIRef(ns.bsfs.dimension),
+ rdflib.Literal(feat.dimension, datatype=rdflib.XSD.integer),
+ )
+ # emit dtype
+ if feat.dtype != getattr(feat.parent, 'dtype', None):
+ yield (
+ rdflib.URIRef(feat.uri),
+ rdflib.URIRef(ns.bsfs.dtype),
+ rdflib.URIRef(feat.dtype),
+ )
+ # emit distance
+ if feat.distance != getattr(feat.parent, 'distance', None):
+ yield (
+ rdflib.URIRef(feat.uri),
+ rdflib.URIRef(ns.bsfs.distance),
+ rdflib.URIRef(feat.distance),
+ )
+
+ def _parse(node: types._Type) -> T_TRIPLE:
+ """Emit all properties of a type."""
+ if isinstance(node, types._Type): # pylint: disable=protected-access
+ # NOTE: all nodes are _Type
+ yield from _type(node)
+ if isinstance(node, types.Predicate):
+ yield from _predicate(node)
+ if isinstance(node, types.Feature):
+ yield from _feature(node)
+
+ # create graph
+ graph = rdflib.Graph()
+ # add triples to graph
+ nodes = itertools.chain(
+ schema_inst.nodes(),
+ schema_inst.literals(),
+ schema_inst.predicates())
+ for node in nodes:
+ for triple in _parse(node):
+ graph.add(triple)
+ # add known namespaces for readability
+ # FIXME: more systematically (e.g. for all in ns?)
+ graph.bind('bsfs', rdflib.URIRef('http://bsfs.ai/schema/'))
+ graph.bind('bse', rdflib.URIRef('http://bsfs.ai/schema/Entity#'))
+ # serialize to turtle
+ return graph.serialize(format=fmt)
## EOF ##
diff --git a/test/schema/test_serialize.py b/test/schema/test_serialize.py
index f46b3a4..205150a 100644
--- a/test/schema/test_serialize.py
+++ b/test/schema/test_serialize.py
@@ -5,6 +5,7 @@ A copy of the license is provided with the project.
Author: Matthias Baumgartner, 2022
"""
# imports
+import re
import unittest
# bsfs imports
@@ -997,8 +998,176 @@ class TestFromString(unittest.TestCase):
class TestToString(unittest.TestCase):
- def test_stub(self):
- raise NotImplementedError()
+
+ def test_empty(self):
+ self.assertEqual(Schema(), from_string(to_string(Schema())))
+
+ def test_literal(self):
+ # root literals
+ l_str = types.ROOT_LITERAL.child(ns.xsd.string)
+ # derived literals
+ l_int = types.ROOT_NUMBER.child(ns.xsd.integer)
+ l_unsigned = l_int.child(ns.xsd.unsigned)
+ # create schema
+ schema = Schema(literals={l_int, l_str, l_unsigned})
+
+ schema_str = to_string(schema)
+ # all symbols are serialized
+ self.assertIn('xsd:string', schema_str)
+ self.assertIn('xsd:integer', schema_str)
+ self.assertIn('xsd:unsigned', schema_str)
+ # unserialize yields the original schema
+ self.assertEqual(schema, from_string(schema_str))
+
+ # literals that have no parent are ignored
+ schema = Schema(literals={types.Literal(ns.bsfs.Invalid, None)})
+ self.assertEqual(Schema(), from_string(to_string(schema)))
+ self.assertNotIn('Invalid', to_string(schema))
+
+ # literal annotations are serialized
+ annotations = {
+ ns.rdfs.label: 'hello world',
+ ns.schema.description: 'some text',
+ ns.bsfs.foo: 1234,
+ ns.bsfs.bar: True,
+ }
+ l_str = types.ROOT_LITERAL.child(ns.xsd.string, **annotations)
+ self.assertDictEqual(
+ annotations,
+ from_string(to_string(Schema(literals={l_str}))).literal(ns.xsd.string).annotations)
+
+
+ def test_node(self):
+ # root nodes
+ n_ent = types.ROOT_NODE.child(ns.bsfs.Entity)
+ n_tag = types.ROOT_NODE.child(ns.bsfs.Tag)
+ # derived nodes
+ n_img = n_ent.child(ns.bsfs.Image)
+ n_doc = n_ent.child(ns.bsfs.Document)
+ n_grp = n_tag.child(ns.bsfs.Group)
+ # create schema
+ schema = Schema(nodes={n_ent, n_img, n_doc, n_tag, n_grp})
+
+ schema_str = to_string(schema)
+ # all symbols are serialized
+ self.assertIn('bsfs:Entity', schema_str)
+ self.assertIn('bsfs:Tag', schema_str)
+ self.assertIn('bsfs:Image', schema_str)
+ self.assertIn('bsfs:Document', schema_str)
+ self.assertIn('bsfs:Group', schema_str)
+ # unserialize yields the original schema
+ self.assertEqual(schema, from_string(schema_str))
+
+ # nodes that have no parent are ignored
+ schema = Schema(nodes={types.Node(ns.bsfs.Invalid, None)})
+ self.assertEqual(Schema(), from_string(to_string(schema)))
+ self.assertNotIn('Invalid', to_string(schema))
+
+ # node annotations are serialized
+ annotations = {
+ ns.rdfs.label: 'hello world',
+ ns.schema.description: 'some text',
+ ns.bsfs.foo: 1234,
+ ns.bsfs.bar: True,
+ }
+ n_ent = types.ROOT_NODE.child(ns.bsfs.Entity, **annotations)
+ self.assertDictEqual(
+ annotations,
+ from_string(to_string(Schema(nodes={n_ent}))).node(ns.bsfs.Entity).annotations)
+
+
+ def test_predicate(self):
+ # auxiliary types
+ n_ent = types.ROOT_NODE.child(ns.bsfs.Entity)
+ l_str = types.ROOT_LITERAL.child(ns.xsd.string)
+ # root predicates
+ p_annotation = types.ROOT_PREDICATE.child(ns.bsfs.Annotation, domain=n_ent)
+ p_owner = types.ROOT_PREDICATE.child(ns.bse.owner, range=l_str, unique=True)
+ # derived predicates
+ p_comment = p_annotation.child(ns.bse.comment, range=l_str) # inherits domain
+ p_note = p_comment.child(ns.bse.note, unique=True) # inherits domain/range
+ # create schema
+ schema = Schema({p_owner, p_comment, p_note})
+
+ schema_str = to_string(schema)
+ # all symbols are serialized
+ self.assertIn('bsfs:Entity', schema_str)
+ self.assertIn('xsd:string', schema_str)
+ self.assertIn('bsfs:Annotation', schema_str)
+ self.assertIn('bse:comment', schema_str)
+ self.assertIn('bse:owner', schema_str)
+ self.assertIn('bse:note', schema_str)
+ # inherited properties are not serialized
+ self.assertIsNotNone(re.search(r'bse:comment[^\.]*rdfs:range[^\.]', schema_str))
+ self.assertIsNone(re.search(r'bse:comment[^\.]*rdfs:domain[^\.]', schema_str))
+ #p_note has no domain/range
+ self.assertIsNone(re.search(r'bse:note[^\.]*rdfs:domain[^\.]', schema_str))
+ self.assertIsNone(re.search(r'bse:note[^\.]*rdfs:range[^\.]', schema_str))
+ # unserialize yields the original schema
+ self.assertEqual(schema, from_string(schema_str))
+
+ # predicate annotations are serialized
+ annotations = {
+ ns.rdfs.label: 'hello world',
+ ns.schema.description: 'some text',
+ ns.bsfs.foo: 1234,
+ ns.bsfs.bar: False,
+ }
+ p_annotation = types.ROOT_PREDICATE.child(ns.bsfs.Annotation, **annotations)
+ self.assertDictEqual(
+ annotations,
+ from_string(to_string(Schema({p_annotation}))).predicate(ns.bsfs.Annotation).annotations)
+
+
+ def test_feature(self):
+ # auxiliary types
+ n_ent = types.ROOT_NODE.child(ns.bsfs.Entity)
+ l_array = types.ROOT_LITERAL.child(ns.bsfs.array)
+ # root features
+ f_colors = types.ROOT_FEATURE.child(URI('http://bsfs.ai/schema/Feature/colors'),
+ range=l_array, unique=True, distance=ns.bsfs.cosine)
+ # derived features
+ f_colors1234 = f_colors.child(URI('http://bsfs.ai/schema/Feature/colors#1234'),
+ dimension=1024, domain=n_ent) # inherits range/dtype/distance
+ f_colors4321 = f_colors.child(URI('http://bsfs.ai/schema/Feature/colors#4321'),
+ dimension=2048, distance=ns.bsfs.euclidean) # inherits domain/range/dtype
+ # create schema
+ schema = Schema({f_colors, f_colors1234, f_colors4321})
+
+ schema_str = to_string(schema)
+ # all symbols are serialized
+ self.assertIn('bsfs:Entity', schema_str)
+ self.assertIn('bsfs:array', schema_str)
+ self.assertIn('<http://bsfs.ai/schema/Feature/colors', schema_str)
+ self.assertIn('<http://bsfs.ai/schema/Feature/colors#1234', schema_str)
+ self.assertIn('<http://bsfs.ai/schema/Feature/colors#4321', schema_str)
+ # inherited properties are not serialized
+ self.assertIsNotNone(re.search(r'<http://bsfs\.ai/schema/Feature/colors#1234>[^\.]*rdfs:domain[^\.]', schema_str))
+ self.assertIsNotNone(re.search(r'<http://bsfs\.ai/schema/Feature/colors#1234>[^\.]*bsfs:dimension[^\.]', schema_str))
+ self.assertIsNone(re.search(r'<http://bsfs\.ai/schema/Feature/colors#1234>[^\.]*rdfs:range[^\.]', schema_str))
+ self.assertIsNone(re.search(r'<http://bsfs\.ai/schema/Feature/colors#1234>[^\.]*bsfs:dtype[^\.]', schema_str))
+ self.assertIsNone(re.search(r'<http://bsfs\.ai/schema/Feature/colors#1234>[^\.]*bsfs:distance[^\.]', schema_str))
+ self.assertIsNotNone(re.search(r'<http://bsfs\.ai/schema/Feature/colors#4321>[^\.]*bsfs:dimension[^\.]', schema_str))
+ self.assertIsNotNone(re.search(r'<http://bsfs\.ai/schema/Feature/colors#4321>[^\.]*bsfs:distance[^\.]', schema_str))
+ self.assertIsNone(re.search(r'<http://bsfs\.ai/schema/Feature/colors#4321>[^\.]*rdfs:domain[^\.]', schema_str))
+ self.assertIsNone(re.search(r'<http://bsfs\.ai/schema/Feature/colors#4321>[^\.]*rdfs:range[^\.]', schema_str))
+ self.assertIsNone(re.search(r'<http://bsfs\.ai/schema/Feature/colors#4321>[^\.]*bsfs:dtype[^\.]', schema_str))
+ # unserialize yields the original schema
+ self.assertEqual(schema, from_string(schema_str))
+
+ # predicate annotations are serialized
+ annotations = {
+ ns.rdfs.label: 'hello world',
+ ns.schema.description: 'some text',
+ ns.bsfs.foo: 1234,
+ ns.bsfs.bar: False,
+ }
+ f_colors = types.ROOT_FEATURE.child(URI('http://bsfs.ai/schema/Feature/colors'),
+ domain=n_ent, range=l_array, unique=True, dtype=ns.bsfs.f16, distance=ns.bsfs.euclidean,
+ **annotations)
+ self.assertDictEqual(
+ annotations,
+ from_string(to_string(Schema({f_colors}))).predicate(URI('http://bsfs.ai/schema/Feature/colors')).annotations)
## main ##