From cb819b8c268908b5f6cc680173db86e172847c46 Mon Sep 17 00:00:00 2001 From: Matthias Baumgartner Date: Wed, 8 Feb 2023 20:15:41 +0100 Subject: binary blob in schema and sparql triple store --- bsfs/schema/schema.py | 1 + bsfs/schema/types.py | 5 +++++ bsfs/triple_store/sparql/sparql.py | 13 +++++++++++-- test/graph/test_nodes.py | 1 + test/schema/test_schema.py | 19 ++++++++++--------- test/triple_store/sparql/test_sparql.py | 26 ++++++++++++++++++++++++++ 6 files changed, 54 insertions(+), 11 deletions(-) diff --git a/bsfs/schema/schema.py b/bsfs/schema/schema.py index 1644926..0de4203 100644 --- a/bsfs/schema/schema.py +++ b/bsfs/schema/schema.py @@ -69,6 +69,7 @@ class Schema(): literals.add(types.ROOT_LITERAL) predicates.add(types.ROOT_PREDICATE) # add minimally necessary types to the schema + literals.add(types.ROOT_BLOB) literals.add(types.ROOT_NUMBER) literals.add(types.ROOT_TIME) literals.add(types.ROOT_ARRAY) diff --git a/bsfs/schema/types.py b/bsfs/schema/types.py index 3a2e10c..12e7e94 100644 --- a/bsfs/schema/types.py +++ b/bsfs/schema/types.py @@ -380,6 +380,11 @@ ROOT_LITERAL = Literal( parent=None, ) +ROOT_BLOB = Literal( + uri=ns.bsfs.BinaryBlob, + parent=ROOT_LITERAL, + ) + ROOT_NUMBER = Literal( uri=ns.bsfs.Number, parent=ROOT_LITERAL, diff --git a/bsfs/triple_store/sparql/sparql.py b/bsfs/triple_store/sparql/sparql.py index a0dd12e..dbf9d45 100644 --- a/bsfs/triple_store/sparql/sparql.py +++ b/bsfs/triple_store/sparql/sparql.py @@ -5,8 +5,11 @@ A copy of the license is provided with the project. Author: Matthias Baumgartner, 2022 """ # imports +import base64 import itertools import typing + +# external imports import rdflib # bsfs imports @@ -30,6 +33,8 @@ __all__: typing.Sequence[str] = ( ## code ## +rdflib.term.bind(ns.bsfs.BinaryBlob, bytes, constructor=base64.b64decode) + class _Transaction(): """Lightweight rdflib transactions for in-memory databases.""" @@ -242,7 +247,7 @@ class SparqlStore(base.TripleStoreBase): ) -> typing.Iterator[URI]: if node_type not in self.schema.nodes(): raise errors.ConsistencyError(f'{node_type} is not defined in the schema') - if not isinstance(filter, ast.filter.FilterExpression): + if filter is not None and not isinstance(filter, ast.filter.FilterExpression): raise TypeError(filter) # compose query query = self._filter_parser(node_type, filter) @@ -334,7 +339,11 @@ class SparqlStore(base.TripleStoreBase): guid = rdflib.URIRef(guid) # convert value if isinstance(predicate.range, bsc.Literal): - value = rdflib.Literal(value, datatype=rdflib.URIRef(predicate.range.uri)) + dtype = rdflib.URIRef(predicate.range.uri) + if predicate.range <= self.schema.literal(ns.bsfs.BinaryBlob): + dtype = rdflib.URIRef(ns.bsfs.BinaryBlob) + value = base64.b64encode(value) + value = rdflib.Literal(value, datatype=dtype) elif isinstance(predicate.range, bsc.Node): value = rdflib.URIRef(value) else: diff --git a/test/graph/test_nodes.py b/test/graph/test_nodes.py index dabe794..6bb3ef3 100644 --- a/test/graph/test_nodes.py +++ b/test/graph/test_nodes.py @@ -92,6 +92,7 @@ class TestNodes(unittest.TestCase): (rdflib.URIRef(ns.bsfs.User), rdflib.RDFS.subClassOf, rdflib.URIRef(ns.bsfs.Node)), (rdflib.URIRef(ns.xsd.string), rdflib.RDFS.subClassOf, rdflib.URIRef(ns.bsfs.Literal)), (rdflib.URIRef(ns.bsfs.Array), rdflib.RDFS.subClassOf, rdflib.URIRef(ns.bsfs.Literal)), + (rdflib.URIRef(ns.bsfs.BinaryBlob), rdflib.RDFS.subClassOf, rdflib.URIRef(ns.bsfs.Literal)), (rdflib.URIRef(ns.bsfs.Feature), rdflib.RDFS.subClassOf, rdflib.URIRef(ns.bsfs.Array)), (rdflib.URIRef(ns.bsfs.Number), rdflib.RDFS.subClassOf, rdflib.URIRef(ns.bsfs.Literal)), (rdflib.URIRef(ns.bsfs.Time), rdflib.RDFS.subClassOf, rdflib.URIRef(ns.bsfs.Literal)), diff --git a/test/schema/test_schema.py b/test/schema/test_schema.py index 32dbc93..414e542 100644 --- a/test/schema/test_schema.py +++ b/test/schema/test_schema.py @@ -66,13 +66,14 @@ class TestSchema(unittest.TestCase): # literals self.l_root = types.ROOT_LITERAL self.l_number = types.ROOT_NUMBER + self.l_blob = types.ROOT_BLOB self.l_array = types.ROOT_ARRAY self.l_time = types.ROOT_TIME self.l_string = self.l_root.child(ns.xsd.string) self.l_integer = self.l_root.child(ns.xsd.integer) self.l_unused = self.l_root.child(ns.xsd.boolean) self.f_root = types.ROOT_FEATURE - self.literals = [self.l_root, self.l_array, self.f_root, self.l_number, self.l_time, self.l_string, self.l_integer, self.l_unused] + self.literals = [self.l_root, self.l_array, self.f_root, self.l_number, self.l_time, self.l_string, self.l_integer, self.l_unused, self.l_blob] # predicates self.p_root = types.ROOT_PREDICATE @@ -85,13 +86,13 @@ class TestSchema(unittest.TestCase): # no args yields a minimal schema schema = Schema() self.assertSetEqual(set(schema.nodes()), {self.n_root}) - self.assertSetEqual(set(schema.literals()), {self.l_root, self.l_number, self.l_array, self.l_time, self.f_root}) + self.assertSetEqual(set(schema.literals()), {self.l_root, self.l_number, self.l_array, self.l_time, self.f_root, self.l_blob}) self.assertSetEqual(set(schema.predicates()), {self.p_root}) # nodes and literals are optional schema = Schema(self.predicates) self.assertSetEqual(set(schema.nodes()), {self.n_root, self.n_ent, self.n_img, self.n_tag}) - self.assertSetEqual(set(schema.literals()), {self.l_root, self.l_string, self.l_number, self.l_time, self.l_array, self.f_root}) + self.assertSetEqual(set(schema.literals()), {self.l_root, self.l_string, self.l_number, self.l_time, self.l_array, self.f_root, self.l_blob}) self.assertSetEqual(set(schema.predicates()), set(self.predicates)) # predicates, nodes, and literals are respected @@ -112,13 +113,13 @@ class TestSchema(unittest.TestCase): # literals are complete schema = Schema(self.predicates, self.nodes, None) - self.assertSetEqual(set(schema.literals()), {self.l_root, self.l_string, self.l_number, self.l_array, self.l_time, self.f_root}) + self.assertSetEqual(set(schema.literals()), {self.l_root, self.l_string, self.l_number, self.l_array, self.l_time, self.f_root, self.l_blob}) schema = Schema(self.predicates, self.nodes, []) - self.assertSetEqual(set(schema.literals()), {self.l_root, self.l_string, self.l_number, self.l_array, self.l_time, self.f_root}) + self.assertSetEqual(set(schema.literals()), {self.l_root, self.l_string, self.l_number, self.l_array, self.l_time, self.f_root, self.l_blob}) schema = Schema(self.predicates, self.nodes, [self.l_string]) - self.assertSetEqual(set(schema.literals()), {self.l_root, self.l_string, self.l_number, self.l_array, self.l_time, self.f_root}) + self.assertSetEqual(set(schema.literals()), {self.l_root, self.l_string, self.l_number, self.l_array, self.l_time, self.f_root, self.l_blob}) schema = Schema(self.predicates, self.nodes, [self.l_integer]) - self.assertSetEqual(set(schema.literals()), {self.l_root, self.l_string, self.l_integer, self.l_number, self.l_array, self.l_time, self.f_root}) + self.assertSetEqual(set(schema.literals()), {self.l_root, self.l_string, self.l_integer, self.l_number, self.l_array, self.l_time, self.f_root, self.l_blob}) schema = Schema(self.predicates, self.nodes, [self.l_integer, self.l_unused]) self.assertSetEqual(set(schema.literals()), set(self.literals)) @@ -178,13 +179,13 @@ class TestSchema(unittest.TestCase): self.assertEqual(str(Schema(self.predicates, self.nodes, self.literals)), 'Schema()') # repr conversion with only default nodes, literals, and predicates n = [ns.bsfs.Node] - l = [ns.bsfs.Array, ns.bsfs.Feature, ns.bsfs.Literal, ns.bsfs.Number, ns.bsfs.Time] + l = [ns.bsfs.Array, ns.bsfs.BinaryBlob, ns.bsfs.Feature, ns.bsfs.Literal, ns.bsfs.Number, ns.bsfs.Time] p = [ns.bsfs.Predicate] self.assertEqual(repr(Schema()), f'Schema({n}, {l}, {p})') self.assertEqual(repr(Schema([], [], [])), f'Schema({n}, {l}, {p})') # repr conversion n = [ns.bsfs.Entity, ns.bsfs.Image, ns.bsfs.Node, ns.bsfs.Tag, ns.bsfs.Unused] - l = [ns.bsfs.Array, ns.bsfs.Feature, ns.bsfs.Literal, ns.bsfs.Number, ns.bsfs.Time, ns.xsd.boolean, ns.xsd.integer, ns.xsd.string] + l = [ns.bsfs.Array, ns.bsfs.BinaryBlob, ns.bsfs.Feature, ns.bsfs.Literal, ns.bsfs.Number, ns.bsfs.Time, ns.xsd.boolean, ns.xsd.integer, ns.xsd.string] p = [ns.bse.comment, ns.bse.group, ns.bse.tag, ns.bsfs.Predicate] self.assertEqual(repr(Schema(self.predicates, self.nodes, self.literals)), f'Schema({n}, {l}, {p})') diff --git a/test/triple_store/sparql/test_sparql.py b/test/triple_store/sparql/test_sparql.py index c58fae3..30876f2 100644 --- a/test/triple_store/sparql/test_sparql.py +++ b/test/triple_store/sparql/test_sparql.py @@ -34,6 +34,7 @@ class TestSparqlStore(unittest.TestCase): bsfs:User rdfs:subClassOf bsfs:Node . xsd:string rdfs:subClassOf bsfs:Literal . bsfs:Number rdfs:subClassOf bsfs:Literal . + bsfs:BinaryBlob rdfs:subClassOf bsfs:Literal . xsd:integer rdfs:subClassOf bsfs:Number . # non-unique literal @@ -60,6 +61,11 @@ class TestSparqlStore(unittest.TestCase): rdfs:range bsfs:User ; bsfs:unique "true"^^xsd:boolean . + # binary range + bse:asset rdfs:subClassOf bsfs:Predicate ; + rdfs:domain bsfs:Entity ; + rdfs:range bsfs:BinaryBlob . + ''') self.schema_triples = { # schema hierarchy @@ -68,6 +74,7 @@ class TestSparqlStore(unittest.TestCase): (rdflib.URIRef(ns.bsfs.User), rdflib.RDFS.subClassOf, rdflib.URIRef(ns.bsfs.Node)), (rdflib.URIRef(ns.xsd.string), rdflib.RDFS.subClassOf, rdflib.URIRef(ns.bsfs.Literal)), (rdflib.URIRef(ns.bsfs.Array), rdflib.RDFS.subClassOf, rdflib.URIRef(ns.bsfs.Literal)), + (rdflib.URIRef(ns.bsfs.BinaryBlob), rdflib.RDFS.subClassOf, rdflib.URIRef(ns.bsfs.Literal)), (rdflib.URIRef(ns.bsfs.Feature), rdflib.RDFS.subClassOf, rdflib.URIRef(ns.bsfs.Array)), (rdflib.URIRef(ns.bsfs.Number), rdflib.RDFS.subClassOf, rdflib.URIRef(ns.bsfs.Literal)), (rdflib.URIRef(ns.bsfs.Time), rdflib.RDFS.subClassOf, rdflib.URIRef(ns.bsfs.Literal)), @@ -76,6 +83,7 @@ class TestSparqlStore(unittest.TestCase): (rdflib.URIRef(ns.bse.filesize), rdflib.RDFS.subClassOf, rdflib.URIRef(ns.bsfs.Predicate)), (rdflib.URIRef(ns.bse.tag), rdflib.RDFS.subClassOf, rdflib.URIRef(ns.bsfs.Predicate)), (rdflib.URIRef(ns.bse.author), rdflib.RDFS.subClassOf, rdflib.URIRef(ns.bsfs.Predicate)), + (rdflib.URIRef(ns.bse.asset), rdflib.RDFS.subClassOf, rdflib.URIRef(ns.bsfs.Predicate)), } def test_essentials(self): @@ -358,6 +366,7 @@ class TestSparqlStore(unittest.TestCase): (rdflib.URIRef(ns.bsfs.User), rdflib.RDFS.subClassOf, rdflib.URIRef(ns.bsfs.Node)), (rdflib.URIRef(ns.xsd.boolean), rdflib.RDFS.subClassOf, rdflib.URIRef(ns.bsfs.Literal)), (rdflib.URIRef(ns.bsfs.Array), rdflib.RDFS.subClassOf, rdflib.URIRef(ns.bsfs.Literal)), + (rdflib.URIRef(ns.bsfs.BinaryBlob), rdflib.RDFS.subClassOf, rdflib.URIRef(ns.bsfs.Literal)), (rdflib.URIRef(ns.bsfs.Feature), rdflib.RDFS.subClassOf, rdflib.URIRef(ns.bsfs.Array)), (rdflib.URIRef(ns.bsfs.Number), rdflib.RDFS.subClassOf, rdflib.URIRef(ns.bsfs.Literal)), (rdflib.URIRef(ns.bsfs.Time), rdflib.RDFS.subClassOf, rdflib.URIRef(ns.bsfs.Literal)), @@ -931,6 +940,23 @@ class TestSparqlStore(unittest.TestCase): # inexistent guids self.assertRaises(errors.InstanceError, store.set, ent_type, {URI('http://example.com/me/entity#foobar')}, p_comment, {'xyz'}) + # BinaryBlob values are base64 encoded + p_asset = store.schema.predicate(ns.bse.asset) + store.set(ent_type, ent_ids, p_asset, {bytes(range(128)), bytes(range(128, 256))}) + blob1 = rdflib.Literal('AAECAwQFBgcICQoLDA0ODxAREhMUFRYXGBkaGxwdHh8gISIjJCUmJygpKissLS4vMDEyMzQ1Njc4OTo7PD0+P0BBQkNERUZHSElKS0xNTk9QUVJTVFVWV1hZWltcXV5fYGFiY2RlZmdoaWprbG1ub3BxcnN0dXZ3eHl6e3x9fn8=', + datatype=rdflib.URIRef(ns.bsfs.BinaryBlob)) + blob2 = rdflib.Literal('gIGCg4SFhoeIiYqLjI2Oj5CRkpOUlZaXmJmam5ydnp+goaKjpKWmp6ipqqusra6vsLGys7S1tre4ubq7vL2+v8DBwsPExcbHyMnKy8zNzs/Q0dLT1NXW19jZ2tvc3d7f4OHi4+Tl5ufo6err7O3u7/Dx8vP09fb3+Pn6+/z9/v8=', + datatype=rdflib.URIRef(ns.bsfs.BinaryBlob)) + self.assertTrue(set(store._graph).issuperset({ + (rdflib.URIRef('http://example.com/me/entity#1234'), rdflib.URIRef(p_asset.uri), blob1), + (rdflib.URIRef('http://example.com/me/entity#1234'), rdflib.URIRef(p_asset.uri), blob2), + (rdflib.URIRef('http://example.com/me/entity#4321'), rdflib.URIRef(p_asset.uri), blob1), + (rdflib.URIRef('http://example.com/me/entity#4321'), rdflib.URIRef(p_asset.uri), blob2), + })) + # lit.value returns the original bytes value + self.assertSetEqual({lit.value for lit in store._graph.objects(None, rdflib.URIRef(p_asset.uri))}, + {bytes(range(128)), bytes(range(128, 256))}) + ## main ## -- cgit v1.2.3