From 791918039979d0743fd2ea4b9a5e74593ff96fd0 Mon Sep 17 00:00:00 2001 From: Matthias Baumgartner Date: Mon, 19 Dec 2022 13:32:34 +0100 Subject: query ast file structures and essential interfaces --- bsfs/triple_store/sparql/sparql.py | 256 +++++++++++++++++++++++++++++++++++++ 1 file changed, 256 insertions(+) create mode 100644 bsfs/triple_store/sparql/sparql.py (limited to 'bsfs/triple_store/sparql/sparql.py') diff --git a/bsfs/triple_store/sparql/sparql.py b/bsfs/triple_store/sparql/sparql.py new file mode 100644 index 0000000..fff540a --- /dev/null +++ b/bsfs/triple_store/sparql/sparql.py @@ -0,0 +1,256 @@ +""" + +Part of the BlackStar filesystem (bsfs) module. +A copy of the license is provided with the project. +Author: Matthias Baumgartner, 2022 +""" +# imports +import itertools +import typing +import rdflib + +# bsfs imports +from bsfs import schema as bsc +from bsfs.query import ast +from bsfs.utils import errors, URI + +# inner-module imports +from . import base + + +# exports +__all__: typing.Sequence[str] = ( + 'SparqlStore', + ) + + +## code ## + +class _Transaction(): + """Lightweight rdflib transactions for in-memory databases.""" + + # graph instance. + _graph: rdflib.Graph + + # current log of added triples. + _added: typing.List[typing.Any] + + # current log of removed triples. + _removed: typing.List[typing.Any] + + def __init__(self, graph: rdflib.Graph): + self._graph = graph + # initialize internal structures + self.commit() + + def commit(self): + """Commit temporary changes.""" + self._added = [] + self._removed = [] + + def rollback(self): + """Undo changes since the last commit.""" + for triple in self._added: + self._graph.remove(triple) + for triple in self._removed: + self._graph.add(triple) + + def add(self, triple: typing.Any): + """Add a triple to the graph.""" + if triple not in self._graph: + self._added.append(triple) + self._graph.add(triple) + + def remove(self, triple: typing.Any): + """Remove a triple from the graph.""" + if triple in self._graph: + self._removed.append(triple) + self._graph.remove(triple) + + +class SparqlStore(base.TripleStoreBase): + """Sparql-based triple store. + + The sparql triple store uses a third-party backend + (currently rdflib) to store triples and manages them via + the Sparql query language. + + """ + + # The rdflib graph. + _graph: rdflib.Graph + + # Current transaction. + _transaction: _Transaction + + # The local schema. + _schema: bsc.Schema + + def __init__(self): + super().__init__(None) + self._graph = rdflib.Graph() + self._transaction = _Transaction(self._graph) + self._schema = bsc.Schema.Empty() + + # NOTE: mypy and pylint complain about the **kwargs not being listed (contrasting super) + # However, not having it here is clearer since it's explicit that there are no arguments. + @classmethod + def Open(cls) -> 'SparqlStore': # type: ignore [override] # pylint: disable=arguments-differ + return cls() + + def commit(self): + self._transaction.commit() + + def rollback(self): + self._transaction.rollback() + + @property + def schema(self) -> bsc.Schema: + return self._schema + + @schema.setter + def schema(self, schema: bsc.Schema): + # check args: Schema instanace + if not isinstance(schema, bsc.Schema): + raise TypeError(schema) + # check compatibility: No contradicting definitions + if not self.schema.consistent_with(schema): + raise errors.ConsistencyError(f'{schema} is inconsistent with {self.schema}') + + # commit the current transaction + self.commit() + + # adjust instances: + # nothing to do for added classes + # delete instances of removed classes + + # get deleted classes + sub = self.schema - schema + + # remove predicate instances + for pred in sub.predicates: + for src, trg in self._graph.subject_objects(rdflib.URIRef(pred.uri)): + self._transaction.remove((src, rdflib.URIRef(pred.uri), trg)) + + # remove node instances + for node in sub.nodes: + # iterate through node instances + for inst in self._graph.subjects(rdflib.RDF.type, rdflib.URIRef(node.uri)): + # remove triples where the instance is in the object position + for src, pred in self._graph.subject_predicates(inst): + self._transaction.remove((src, pred, inst)) + # remove triples where the instance is in the subject position + for pred, trg in self._graph.predicate_objects(inst): + self._transaction.remove((inst, pred, trg)) + # remove instance + self._transaction.remove((inst, rdflib.RDF.type, rdflib.URIRef(node.uri))) + + # NOTE: Nothing to do for literals + + # commit instance changes + self.commit() + + # migrate schema + self._schema = schema + + def get(self, node_type: bsc.Node, query: ast.filter.FilterExpression) -> typing.Iterator[URI]: + raise NotImplementedError() + + def _has_type(self, subject: URI, node_type: bsc.Node) -> bool: + """Return True if *subject* is a node of class *node_type* or a subclass thereof.""" + if node_type not in self.schema.nodes(): + raise errors.ConsistencyError(f'{node_type} is not defined in the schema') + + subject_types = list(self._graph.objects(rdflib.URIRef(subject), rdflib.RDF.type)) + if len(subject_types) == 0: + return False + if len(subject_types) == 1: + node = self.schema.node(URI(subject_types[0])) # type: ignore [arg-type] # URI is a subtype of str + if node == node_type: + return True + if node_type in node.parents(): + return True + return False + raise errors.UnreachableError() + + def exists( + self, + node_type: bsc.Node, + guids: typing.Iterable[URI], + ) -> typing.Iterable[URI]: + return (subj for subj in guids if self._has_type(subj, node_type)) + + def create( + self, + node_type: bsc.Node, + guids: typing.Iterable[URI], + ): + # check node_type + if node_type not in self.schema.nodes(): + raise errors.ConsistencyError(f'{node_type} is not defined in the schema') + # check and create guids + for guid in guids: + subject = rdflib.URIRef(guid) + # check node existence + if (subject, rdflib.RDF.type, None) in self._graph: + # FIXME: node exists and may have a different type! ignore? raise? report? + continue + # add node + self._transaction.add((subject, rdflib.RDF.type, rdflib.URIRef(node_type.uri))) + + def set( + self, + node_type: bsc.Node, + guids: typing.Iterable[URI], + predicate: bsc.Predicate, + values: typing.Iterable[typing.Any], + ): + # check node_type + if node_type not in self.schema.nodes(): + raise errors.ConsistencyError(f'{node_type} is not defined in the schema') + # check predicate + if predicate not in self.schema.predicates(): + raise errors.ConsistencyError(f'{predicate} is not defined in the schema') + if not node_type <= predicate.domain: + raise errors.ConsistencyError(f'{node_type} must be a subclass of {predicate.domain}') + # NOTE: predicate.range is in the schema since predicate is in the schema. + # materialize values + values = set(values) + # check values + if len(values) == 0: + return + if predicate.unique and len(values) != 1: + raise ValueError(values) + if isinstance(predicate.range, bsc.Node): + values = set(values) # materialize to safeguard against iterators passed as argument + inconsistent = {val for val in values if not self._has_type(val, predicate.range)} + # catches nodes that don't exist and nodes that have an inconsistent type + if len(inconsistent) > 0: + raise errors.InstanceError(inconsistent) + # check guids + # FIXME: Fail or skip inexistent nodes? + guids = set(guids) + inconsistent = {guid for guid in guids if not self._has_type(guid, node_type)} + if len(inconsistent) > 0: + raise errors.InstanceError(inconsistent) + + # add triples + pred = rdflib.URIRef(predicate.uri) + for guid, value in itertools.product(guids, values): + guid = rdflib.URIRef(guid) + # convert value + if isinstance(predicate.range, bsc.Literal): + value = rdflib.Literal(value, datatype=rdflib.URIRef(predicate.range.uri)) + elif isinstance(predicate.range, bsc.Node): + value = rdflib.URIRef(value) + else: + raise errors.UnreachableError() + # clear triples for unique predicates + if predicate.unique: + for obj in self._graph.objects(guid, pred): + if obj != value: + self._transaction.remove((guid, pred, obj)) + # add triple + self._transaction.add((guid, pred, value)) + +## EOF ## -- cgit v1.2.3 From a0f2308adcb226d28de3355bc7115a6d9b669462 Mon Sep 17 00:00:00 2001 From: Matthias Baumgartner Date: Mon, 19 Dec 2022 13:40:02 +0100 Subject: import fixes --- bsfs/triple_store/sparql/sparql.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'bsfs/triple_store/sparql/sparql.py') diff --git a/bsfs/triple_store/sparql/sparql.py b/bsfs/triple_store/sparql/sparql.py index fff540a..7172f34 100644 --- a/bsfs/triple_store/sparql/sparql.py +++ b/bsfs/triple_store/sparql/sparql.py @@ -15,7 +15,7 @@ from bsfs.query import ast from bsfs.utils import errors, URI # inner-module imports -from . import base +from .. import base # exports -- cgit v1.2.3 From 73e39cb4967949025aefe874f401e27b0abb772c Mon Sep 17 00:00:00 2001 From: Matthias Baumgartner Date: Thu, 22 Dec 2022 20:29:57 +0100 Subject: filter ast parser and get method in sparql store --- bsfs/triple_store/sparql/sparql.py | 51 ++++++++++++++++++++++++++++++++++---- 1 file changed, 46 insertions(+), 5 deletions(-) (limited to 'bsfs/triple_store/sparql/sparql.py') diff --git a/bsfs/triple_store/sparql/sparql.py b/bsfs/triple_store/sparql/sparql.py index 7172f34..c3cbff6 100644 --- a/bsfs/triple_store/sparql/sparql.py +++ b/bsfs/triple_store/sparql/sparql.py @@ -15,6 +15,7 @@ from bsfs.query import ast from bsfs.utils import errors, URI # inner-module imports +from . import parse_filter from .. import base @@ -86,11 +87,15 @@ class SparqlStore(base.TripleStoreBase): # The local schema. _schema: bsc.Schema + # Filter parser + _filter_parser: parse_filter.Filter + def __init__(self): super().__init__(None) self._graph = rdflib.Graph() self._transaction = _Transaction(self._graph) self._schema = bsc.Schema.Empty() + self._filter_parser = parse_filter.Filter(self._schema) # NOTE: mypy and pylint complain about the **kwargs not being listed (contrasting super) # However, not having it here is clearer since it's explicit that there are no arguments. @@ -127,10 +132,17 @@ class SparqlStore(base.TripleStoreBase): # get deleted classes sub = self.schema - schema - # remove predicate instances for pred in sub.predicates: + # remove predicate instances for src, trg in self._graph.subject_objects(rdflib.URIRef(pred.uri)): self._transaction.remove((src, rdflib.URIRef(pred.uri), trg)) + # remove predicate definition + if pred.parent is not None: + self._transaction.remove(( + rdflib.URIRef(pred.uri), + rdflib.RDFS.subClassOf, + rdflib.URIRef(pred.parent.uri), + )) # remove node instances for node in sub.nodes: @@ -144,17 +156,46 @@ class SparqlStore(base.TripleStoreBase): self._transaction.remove((inst, pred, trg)) # remove instance self._transaction.remove((inst, rdflib.RDF.type, rdflib.URIRef(node.uri))) - - # NOTE: Nothing to do for literals + # remove node definition + if node.parent is not None: + self._transaction.remove(( + rdflib.URIRef(node.uri), + rdflib.RDFS.subClassOf, + rdflib.URIRef(node.parent.uri), + )) + + for lit in sub.literals: + # remove literal definition + if lit.parent is not None: + self._transaction.remove(( + rdflib.URIRef(lit.uri), + rdflib.RDFS.subClassOf, + rdflib.URIRef(lit.parent.uri), + )) + + # add predicate, node, and literal hierarchies to the graph + for itm in itertools.chain(schema.predicates(), schema.nodes(), schema.literals()): + if itm.parent is not None: + self._transaction.add((rdflib.URIRef(itm.uri), rdflib.RDFS.subClassOf, rdflib.URIRef(itm.parent.uri))) # commit instance changes self.commit() # migrate schema self._schema = schema + self._filter_parser.schema = schema - def get(self, node_type: bsc.Node, query: ast.filter.FilterExpression) -> typing.Iterator[URI]: - raise NotImplementedError() + def get( + self, + node_type: bsc.Node, + query: typing.Optional[ast.filter.FilterExpression] = None, + ) -> typing.Iterator[URI]: + if node_type not in self.schema.nodes(): + raise errors.ConsistencyError(f'{node_type} is not defined in the schema') + if not isinstance(query, ast.filter.FilterExpression): + raise TypeError(query) + for guid, in self._graph.query(self._filter_parser(node_type, query)): + yield URI(guid) def _has_type(self, subject: URI, node_type: bsc.Node) -> bool: """Return True if *subject* is a node of class *node_type* or a subclass thereof.""" -- cgit v1.2.3 From 6b3e32b29799a8143e8ce9d20c5f27e3e166b9bb Mon Sep 17 00:00:00 2001 From: Matthias Baumgartner Date: Thu, 12 Jan 2023 10:17:07 +0100 Subject: changed path to from_string in clients --- bsfs/triple_store/sparql/sparql.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'bsfs/triple_store/sparql/sparql.py') diff --git a/bsfs/triple_store/sparql/sparql.py b/bsfs/triple_store/sparql/sparql.py index c3cbff6..ddace35 100644 --- a/bsfs/triple_store/sparql/sparql.py +++ b/bsfs/triple_store/sparql/sparql.py @@ -94,7 +94,7 @@ class SparqlStore(base.TripleStoreBase): super().__init__(None) self._graph = rdflib.Graph() self._transaction = _Transaction(self._graph) - self._schema = bsc.Schema.Empty() + self._schema = bsc.Schema() self._filter_parser = parse_filter.Filter(self._schema) # NOTE: mypy and pylint complain about the **kwargs not being listed (contrasting super) -- cgit v1.2.3 From b0ff4ed674ad78bf113c3cc0c2ccd187ccb91048 Mon Sep 17 00:00:00 2001 From: Matthias Baumgartner Date: Thu, 12 Jan 2023 10:26:30 +0100 Subject: number literal adaptions --- bsfs/triple_store/sparql/sparql.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'bsfs/triple_store/sparql/sparql.py') diff --git a/bsfs/triple_store/sparql/sparql.py b/bsfs/triple_store/sparql/sparql.py index ddace35..87467ff 100644 --- a/bsfs/triple_store/sparql/sparql.py +++ b/bsfs/triple_store/sparql/sparql.py @@ -11,6 +11,7 @@ import rdflib # bsfs imports from bsfs import schema as bsc +from bsfs.namespace import ns from bsfs.query import ast from bsfs.utils import errors, URI @@ -94,7 +95,8 @@ class SparqlStore(base.TripleStoreBase): super().__init__(None) self._graph = rdflib.Graph() self._transaction = _Transaction(self._graph) - self._schema = bsc.Schema() + # NOTE: parsing bsfs.query.ast.filter.Has requires xsd:integer. + self._schema = bsc.Schema(literals={bsc.ROOT_NUMBER.child(ns.xsd.integer)}) self._filter_parser = parse_filter.Filter(self._schema) # NOTE: mypy and pylint complain about the **kwargs not being listed (contrasting super) -- cgit v1.2.3 From e708016ae366e96051281f3a744af35a8c06d98b Mon Sep 17 00:00:00 2001 From: Matthias Baumgartner Date: Thu, 12 Jan 2023 10:28:16 +0100 Subject: cleanup and cosmetic changes --- bsfs/triple_store/sparql/sparql.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'bsfs/triple_store/sparql/sparql.py') diff --git a/bsfs/triple_store/sparql/sparql.py b/bsfs/triple_store/sparql/sparql.py index 87467ff..3877d1a 100644 --- a/bsfs/triple_store/sparql/sparql.py +++ b/bsfs/triple_store/sparql/sparql.py @@ -139,7 +139,7 @@ class SparqlStore(base.TripleStoreBase): for src, trg in self._graph.subject_objects(rdflib.URIRef(pred.uri)): self._transaction.remove((src, rdflib.URIRef(pred.uri), trg)) # remove predicate definition - if pred.parent is not None: + if pred.parent is not None: # NOTE: there shouldn't be any predicate w/o parent self._transaction.remove(( rdflib.URIRef(pred.uri), rdflib.RDFS.subClassOf, @@ -159,7 +159,7 @@ class SparqlStore(base.TripleStoreBase): # remove instance self._transaction.remove((inst, rdflib.RDF.type, rdflib.URIRef(node.uri))) # remove node definition - if node.parent is not None: + if node.parent is not None: # NOTE: there shouldn't be any node w/o parent self._transaction.remove(( rdflib.URIRef(node.uri), rdflib.RDFS.subClassOf, @@ -168,7 +168,7 @@ class SparqlStore(base.TripleStoreBase): for lit in sub.literals: # remove literal definition - if lit.parent is not None: + if lit.parent is not None: # NOTE: there shouldn't be any literal w/o parent self._transaction.remove(( rdflib.URIRef(lit.uri), rdflib.RDFS.subClassOf, -- cgit v1.2.3 From 80a97bfa9f22d0d6dd25928fe1754a3a0d1de78a Mon Sep 17 00:00:00 2001 From: Matthias Baumgartner Date: Sun, 15 Jan 2023 21:00:12 +0100 Subject: Distance filter ast node --- bsfs/triple_store/sparql/sparql.py | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) (limited to 'bsfs/triple_store/sparql/sparql.py') diff --git a/bsfs/triple_store/sparql/sparql.py b/bsfs/triple_store/sparql/sparql.py index 3877d1a..dfd9871 100644 --- a/bsfs/triple_store/sparql/sparql.py +++ b/bsfs/triple_store/sparql/sparql.py @@ -18,6 +18,7 @@ from bsfs.utils import errors, URI # inner-module imports from . import parse_filter from .. import base +from .distance import DISTANCE_FU # exports @@ -97,7 +98,7 @@ class SparqlStore(base.TripleStoreBase): self._transaction = _Transaction(self._graph) # NOTE: parsing bsfs.query.ast.filter.Has requires xsd:integer. self._schema = bsc.Schema(literals={bsc.ROOT_NUMBER.child(ns.xsd.integer)}) - self._filter_parser = parse_filter.Filter(self._schema) + self._filter_parser = parse_filter.Filter(self._graph, self._schema) # NOTE: mypy and pylint complain about the **kwargs not being listed (contrasting super) # However, not having it here is clearer since it's explicit that there are no arguments. @@ -123,6 +124,16 @@ class SparqlStore(base.TripleStoreBase): # check compatibility: No contradicting definitions if not self.schema.consistent_with(schema): raise errors.ConsistencyError(f'{schema} is inconsistent with {self.schema}') + # check distance functions of features + invalid = { + (cand.uri, cand.distance) + for cand + in schema.literals() + if isinstance(cand, bsc.Feature) and cand.distance not in DISTANCE_FU} + if len(invalid) > 0: + cand, dist = zip(*invalid) + raise ValueError( + f'unknown distance function {",".join(dist)} in feature {", ".join(cand)}') # commit the current transaction self.commit() -- cgit v1.2.3 From 3504609e1ba1f7f653fa79910474bebd3ec24d8a Mon Sep 17 00:00:00 2001 From: Matthias Baumgartner Date: Mon, 16 Jan 2023 21:41:20 +0100 Subject: various minor fixes --- bsfs/triple_store/sparql/sparql.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'bsfs/triple_store/sparql/sparql.py') diff --git a/bsfs/triple_store/sparql/sparql.py b/bsfs/triple_store/sparql/sparql.py index dfd9871..fedd227 100644 --- a/bsfs/triple_store/sparql/sparql.py +++ b/bsfs/triple_store/sparql/sparql.py @@ -132,7 +132,7 @@ class SparqlStore(base.TripleStoreBase): if isinstance(cand, bsc.Feature) and cand.distance not in DISTANCE_FU} if len(invalid) > 0: cand, dist = zip(*invalid) - raise ValueError( + raise errors.UnsupportedError( f'unknown distance function {",".join(dist)} in feature {", ".join(cand)}') # commit the current transaction -- cgit v1.2.3 From 965f4dfe41afd552ed6477c75e1286c14e3580f6 Mon Sep 17 00:00:00 2001 From: Matthias Baumgartner Date: Sat, 21 Jan 2023 16:31:08 +0100 Subject: Fetch in triple stores: * fetch interface * sparql fetch ast parser * sparql fetch implementation --- bsfs/triple_store/sparql/sparql.py | 50 ++++++++++++++++++++++++++++++++++---- 1 file changed, 45 insertions(+), 5 deletions(-) (limited to 'bsfs/triple_store/sparql/sparql.py') diff --git a/bsfs/triple_store/sparql/sparql.py b/bsfs/triple_store/sparql/sparql.py index fedd227..a0dd12e 100644 --- a/bsfs/triple_store/sparql/sparql.py +++ b/bsfs/triple_store/sparql/sparql.py @@ -16,6 +16,7 @@ from bsfs.query import ast from bsfs.utils import errors, URI # inner-module imports +from . import parse_fetch from . import parse_filter from .. import base from .distance import DISTANCE_FU @@ -92,13 +93,16 @@ class SparqlStore(base.TripleStoreBase): # Filter parser _filter_parser: parse_filter.Filter + # Fetch parser + _fetch_parser: parse_fetch.Fetch + def __init__(self): super().__init__(None) self._graph = rdflib.Graph() self._transaction = _Transaction(self._graph) - # NOTE: parsing bsfs.query.ast.filter.Has requires xsd:integer. self._schema = bsc.Schema(literals={bsc.ROOT_NUMBER.child(ns.xsd.integer)}) self._filter_parser = parse_filter.Filter(self._graph, self._schema) + self._fetch_parser = parse_fetch.Fetch(self._schema) # NOTE: mypy and pylint complain about the **kwargs not being listed (contrasting super) # However, not having it here is clearer since it's explicit that there are no arguments. @@ -197,17 +201,53 @@ class SparqlStore(base.TripleStoreBase): # migrate schema self._schema = schema self._filter_parser.schema = schema + self._fetch_parser.schema = schema + + def fetch( + self, + node_type: bsc.Node, + filter: ast.filter.FilterExpression, # pylint: disable=redefined-builtin + fetch: ast.fetch.FetchExpression, + ) -> typing.Iterator[typing.Tuple[URI, str, typing.Any]]: + if node_type not in self.schema.nodes(): + raise errors.ConsistencyError(f'{node_type} is not defined in the schema') + if not isinstance(filter, ast.filter.FilterExpression): + raise TypeError(filter) + if not isinstance(fetch, ast.fetch.FetchExpression): + raise TypeError(fetch) + # compose a query from fetch and filter ast + query = self._filter_parser(node_type, filter) + query += self._fetch_parser(node_type, fetch) + # run query + emitted = set() + for result in query(self._graph): + guid = URI(result[0]) + for name, raw in zip(query.names, result[1:]): + if raw is None: # undefined value + continue + if isinstance(raw, rdflib.Literal): + value = raw.value + else: + value = URI(raw) + # emit triple + triple = (guid, name, value) + if triple not in emitted: # FIXME: needs a better solution! + emitted.add(triple) + yield guid, name, value def get( self, node_type: bsc.Node, - query: typing.Optional[ast.filter.FilterExpression] = None, + filter: typing.Optional[ast.filter.FilterExpression] = None, # pylint: disable=redefined-builtin ) -> typing.Iterator[URI]: if node_type not in self.schema.nodes(): raise errors.ConsistencyError(f'{node_type} is not defined in the schema') - if not isinstance(query, ast.filter.FilterExpression): - raise TypeError(query) - for guid, in self._graph.query(self._filter_parser(node_type, query)): + if not isinstance(filter, ast.filter.FilterExpression): + raise TypeError(filter) + # compose query + query = self._filter_parser(node_type, filter) + # run query + for guid, in query(self._graph): yield URI(guid) def _has_type(self, subject: URI, node_type: bsc.Node) -> bool: -- cgit v1.2.3 From cb819b8c268908b5f6cc680173db86e172847c46 Mon Sep 17 00:00:00 2001 From: Matthias Baumgartner Date: Wed, 8 Feb 2023 20:15:41 +0100 Subject: binary blob in schema and sparql triple store --- bsfs/triple_store/sparql/sparql.py | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) (limited to 'bsfs/triple_store/sparql/sparql.py') diff --git a/bsfs/triple_store/sparql/sparql.py b/bsfs/triple_store/sparql/sparql.py index a0dd12e..dbf9d45 100644 --- a/bsfs/triple_store/sparql/sparql.py +++ b/bsfs/triple_store/sparql/sparql.py @@ -5,8 +5,11 @@ A copy of the license is provided with the project. Author: Matthias Baumgartner, 2022 """ # imports +import base64 import itertools import typing + +# external imports import rdflib # bsfs imports @@ -30,6 +33,8 @@ __all__: typing.Sequence[str] = ( ## code ## +rdflib.term.bind(ns.bsfs.BinaryBlob, bytes, constructor=base64.b64decode) + class _Transaction(): """Lightweight rdflib transactions for in-memory databases.""" @@ -242,7 +247,7 @@ class SparqlStore(base.TripleStoreBase): ) -> typing.Iterator[URI]: if node_type not in self.schema.nodes(): raise errors.ConsistencyError(f'{node_type} is not defined in the schema') - if not isinstance(filter, ast.filter.FilterExpression): + if filter is not None and not isinstance(filter, ast.filter.FilterExpression): raise TypeError(filter) # compose query query = self._filter_parser(node_type, filter) @@ -334,7 +339,11 @@ class SparqlStore(base.TripleStoreBase): guid = rdflib.URIRef(guid) # convert value if isinstance(predicate.range, bsc.Literal): - value = rdflib.Literal(value, datatype=rdflib.URIRef(predicate.range.uri)) + dtype = rdflib.URIRef(predicate.range.uri) + if predicate.range <= self.schema.literal(ns.bsfs.BinaryBlob): + dtype = rdflib.URIRef(ns.bsfs.BinaryBlob) + value = base64.b64encode(value) + value = rdflib.Literal(value, datatype=dtype) elif isinstance(predicate.range, bsc.Node): value = rdflib.URIRef(value) else: -- cgit v1.2.3 From 2e07f33314c238e42bfadc5f39805f93ffbc622e Mon Sep 17 00:00:00 2001 From: Matthias Baumgartner Date: Thu, 2 Mar 2023 15:10:05 +0100 Subject: removed author and license notices from individual files --- bsfs/triple_store/sparql/sparql.py | 5 ----- 1 file changed, 5 deletions(-) (limited to 'bsfs/triple_store/sparql/sparql.py') diff --git a/bsfs/triple_store/sparql/sparql.py b/bsfs/triple_store/sparql/sparql.py index dbf9d45..5890bcc 100644 --- a/bsfs/triple_store/sparql/sparql.py +++ b/bsfs/triple_store/sparql/sparql.py @@ -1,9 +1,4 @@ -""" -Part of the BlackStar filesystem (bsfs) module. -A copy of the license is provided with the project. -Author: Matthias Baumgartner, 2022 -""" # imports import base64 import itertools -- cgit v1.2.3 From b66ed641d5cbb4cb83f4a571223e4d65d80ed05c Mon Sep 17 00:00:00 2001 From: Matthias Baumgartner Date: Thu, 2 Mar 2023 15:29:12 +0100 Subject: check non-serializable URIs in the sparql store --- bsfs/triple_store/sparql/sparql.py | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'bsfs/triple_store/sparql/sparql.py') diff --git a/bsfs/triple_store/sparql/sparql.py b/bsfs/triple_store/sparql/sparql.py index 5890bcc..bd98f46 100644 --- a/bsfs/triple_store/sparql/sparql.py +++ b/bsfs/triple_store/sparql/sparql.py @@ -284,6 +284,9 @@ class SparqlStore(base.TripleStoreBase): raise errors.ConsistencyError(f'{node_type} is not defined in the schema') # check and create guids for guid in guids: + # check convert to rdflib.URIRef + if not rdflib.term._is_valid_uri(guid): # pylint: disable=protected-access + raise ValueError(guids) subject = rdflib.URIRef(guid) # check node existence if (subject, rdflib.RDF.type, None) in self._graph: @@ -324,6 +327,9 @@ class SparqlStore(base.TripleStoreBase): # check guids # FIXME: Fail or skip inexistent nodes? guids = set(guids) + invalid = {guid for guid in guids if not rdflib.term._is_valid_uri(guid)} # pylint: disable=protected-access + if len(invalid) > 0: + raise ValueError(invalid) inconsistent = {guid for guid in guids if not self._has_type(guid, node_type)} if len(inconsistent) > 0: raise errors.InstanceError(inconsistent) -- cgit v1.2.3 From 28a021483c13e974e00b6159f0653b0727df9d10 Mon Sep 17 00:00:00 2001 From: Matthias Baumgartner Date: Thu, 2 Mar 2023 16:40:00 +0100 Subject: prohibit certain characters in URI and ensure URIs in bsfs.graph --- bsfs/triple_store/sparql/sparql.py | 10 ++-------- 1 file changed, 2 insertions(+), 8 deletions(-) (limited to 'bsfs/triple_store/sparql/sparql.py') diff --git a/bsfs/triple_store/sparql/sparql.py b/bsfs/triple_store/sparql/sparql.py index bd98f46..68c0027 100644 --- a/bsfs/triple_store/sparql/sparql.py +++ b/bsfs/triple_store/sparql/sparql.py @@ -284,10 +284,7 @@ class SparqlStore(base.TripleStoreBase): raise errors.ConsistencyError(f'{node_type} is not defined in the schema') # check and create guids for guid in guids: - # check convert to rdflib.URIRef - if not rdflib.term._is_valid_uri(guid): # pylint: disable=protected-access - raise ValueError(guids) - subject = rdflib.URIRef(guid) + subject = rdflib.URIRef(URI(guid)) # check node existence if (subject, rdflib.RDF.type, None) in self._graph: # FIXME: node exists and may have a different type! ignore? raise? report? @@ -326,10 +323,7 @@ class SparqlStore(base.TripleStoreBase): raise errors.InstanceError(inconsistent) # check guids # FIXME: Fail or skip inexistent nodes? - guids = set(guids) - invalid = {guid for guid in guids if not rdflib.term._is_valid_uri(guid)} # pylint: disable=protected-access - if len(invalid) > 0: - raise ValueError(invalid) + guids = {URI(guid) for guid in guids} inconsistent = {guid for guid in guids if not self._has_type(guid, node_type)} if len(inconsistent) > 0: raise errors.InstanceError(inconsistent) -- cgit v1.2.3 From 4fead04055be4967d9ea3b24ff61fe37a93108dd Mon Sep 17 00:00:00 2001 From: Matthias Baumgartner Date: Sat, 4 Mar 2023 13:31:11 +0100 Subject: namespace refactoring and cleanup --- bsfs/triple_store/sparql/sparql.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'bsfs/triple_store/sparql/sparql.py') diff --git a/bsfs/triple_store/sparql/sparql.py b/bsfs/triple_store/sparql/sparql.py index 68c0027..99e67d6 100644 --- a/bsfs/triple_store/sparql/sparql.py +++ b/bsfs/triple_store/sparql/sparql.py @@ -28,7 +28,7 @@ __all__: typing.Sequence[str] = ( ## code ## -rdflib.term.bind(ns.bsfs.BinaryBlob, bytes, constructor=base64.b64decode) +rdflib.term.bind(ns.bsl.BinaryBlob, bytes, constructor=base64.b64decode) class _Transaction(): """Lightweight rdflib transactions for in-memory databases.""" @@ -335,8 +335,8 @@ class SparqlStore(base.TripleStoreBase): # convert value if isinstance(predicate.range, bsc.Literal): dtype = rdflib.URIRef(predicate.range.uri) - if predicate.range <= self.schema.literal(ns.bsfs.BinaryBlob): - dtype = rdflib.URIRef(ns.bsfs.BinaryBlob) + if predicate.range <= self.schema.literal(ns.bsl.BinaryBlob): + dtype = rdflib.URIRef(ns.bsl.BinaryBlob) value = base64.b64encode(value) value = rdflib.Literal(value, datatype=dtype) elif isinstance(predicate.range, bsc.Node): -- cgit v1.2.3