diff options
author | Matthias Baumgartner <dev@igsor.net> | 2022-12-18 14:15:18 +0100 |
---|---|---|
committer | Matthias Baumgartner <dev@igsor.net> | 2022-12-18 14:15:18 +0100 |
commit | e19c8f9d0818a147832df0945188ea14de9c7690 (patch) | |
tree | 6a1e388af7ace081fbe516b0c56e28ff1f1e48db /bsfs/triple_store | |
parent | 58496960926a56149c10d64e01b6df7d048eed0e (diff) | |
download | bsfs-e19c8f9d0818a147832df0945188ea14de9c7690.tar.gz bsfs-e19c8f9d0818a147832df0945188ea14de9c7690.tar.bz2 bsfs-e19c8f9d0818a147832df0945188ea14de9c7690.zip |
documentation, types, and style fixes
Diffstat (limited to 'bsfs/triple_store')
-rw-r--r-- | bsfs/triple_store/base.py | 33 | ||||
-rw-r--r-- | bsfs/triple_store/sparql.py | 73 |
2 files changed, 63 insertions, 43 deletions
diff --git a/bsfs/triple_store/base.py b/bsfs/triple_store/base.py index 942a16b..6561262 100644 --- a/bsfs/triple_store/base.py +++ b/bsfs/triple_store/base.py @@ -21,7 +21,21 @@ __all__: typing.Sequence[str] = ( ## code ## class TripleStoreBase(abc.ABC): - """ + """TripleStore base class. + + Use the `Open` method to create a new instance and to initialize + the required structures. + + Triple stores express a graph via its (subject, predicate, object) triples. + They provides methods to add and remove triples, and to query the storage + for given graph structures. The subject is always a node in the graph, + whereas nodes are identifiable by a unique URI. Note that blank nodes + (without an explicit URI) are not supported. The object can be another + Node or a Literal value. The relation between a subject and an object + is expressed via a Predicate. The graph structures are governed by a + schema that defines which Node, Literal, and Predicate classes exist + and how they can interact (see `bsfs.schema.Schema`). + """ # storage's URI. None implies a temporary location. @@ -99,9 +113,8 @@ class TripleStoreBase(abc.ABC): self, node_type: _schema.Node, guids: typing.Iterable[URI], - ): - """ - """ + ) -> typing.Iterable[URI]: + """Return those *guids* that exist and have type *node_type* or a subclass thereof.""" @abc.abstractmethod def create( @@ -119,7 +132,17 @@ class TripleStoreBase(abc.ABC): predicate: _schema.Predicate, values: typing.Iterable[typing.Any], ): - """ + """Add triples to the graph. + + It is assumed that all of *guids* exist and have *node_type*. + This method adds a triple (guid, predicate, value) for every guid in + *guids* and each value in *values* (cartesian product). Note that + *values* must have length one for unique predicates, and that + currently existing values will be overwritten in this case. + It also verifies that all symbols are part of the schema and that + the *predicate* matches the *node_type*. + Raises `bsfs.errors.ConsistencyError` if these assumptions are violated. + """ ## EOF ## diff --git a/bsfs/triple_store/sparql.py b/bsfs/triple_store/sparql.py index fc161b3..23059f7 100644 --- a/bsfs/triple_store/sparql.py +++ b/bsfs/triple_store/sparql.py @@ -28,33 +28,52 @@ __all__: typing.Sequence[str] = ( class _Transaction(): """Lightweight rdflib transactions for in-memory databases.""" - def __init__(self, graph): + # graph instance. + _graph: rdflib.Graph + + # current log of added triples. + _added: typing.List[typing.Any] + + # current log of removed triples. + _removed: typing.List[typing.Any] + + def __init__(self, graph: rdflib.Graph): self._graph = graph - self.commit() # initialize + # initialize internal structures + self.commit() def commit(self): + """Commit temporary changes.""" self._added = [] self._removed = [] def rollback(self): + """Undo changes since the last commit.""" for triple in self._added: self._graph.remove(triple) for triple in self._removed: self._graph.add(triple) - def add(self, triple): + def add(self, triple: typing.Any): + """Add a triple to the graph.""" if triple not in self._graph: self._added.append(triple) self._graph.add(triple) - def remove(self, triple): + def remove(self, triple: typing.Any): + """Remove a triple from the graph.""" if triple in self._graph: self._removed.append(triple) self._graph.remove(triple) class SparqlStore(base.TripleStoreBase): - """ + """Sparql-based triple store. + + The sparql triple store uses a third-party backend + (currently rdflib) to store triples and manages them via + the Sparql query language. + """ # The rdflib graph. @@ -89,27 +108,7 @@ class SparqlStore(base.TripleStoreBase): return self._schema @schema.setter - def schema(self, schema: _schema.Schema): - """Migrate to new schema by adding or removing class definitions. - - Commits before and after the migration. - - Instances of removed classes will be deleted irreversably. - Note that modifying an existing class is not directly supported. - Also, it is generally discouraged, since changing definitions may - lead to inconsistencies across multiple clients in a distributed - setting. Instead, consider introducing a new class under its own - uri. Such a migration would look as follows: - - 1. Add new class definitions. - 2. Create instances of the new classes and copy relevant data. - 3. Remove the old definitions. - - To modify a class, i.e., re-use a previous uri with a new - class definition, you would have to migrate via temporary - class definitions, and thus repeat the above procedure two times. - - """ + def schema(self, schema: bsc.Schema): # check args: Schema instanace if not isinstance(schema, bsc.Schema): raise TypeError(schema) @@ -162,16 +161,14 @@ class SparqlStore(base.TripleStoreBase): subject_types = list(self._graph.objects(rdflib.URIRef(subject), rdflib.RDF.type)) if len(subject_types) == 0: return False - elif len(subject_types) == 1: - node = self.schema.node(URI(subject_types[0])) + if len(subject_types) == 1: + node = self.schema.node(URI(subject_types[0])) # type: ignore [arg-type] # URI is a subtype of str if node == node_type: return True - elif node_type in node.parents(): + if node_type in node.parents(): return True - else: - return False - else: - raise errors.UnreachableError() + return False + raise errors.UnreachableError() def exists( self, @@ -187,20 +184,18 @@ class SparqlStore(base.TripleStoreBase): node_type: bsc.Node, guids: typing.Iterable[URI], ): - """ - """ # check node_type if node_type not in self.schema.nodes(): raise errors.ConsistencyError(f'{node_type} is not defined in the schema') # check and create guids for guid in guids: - guid = rdflib.URIRef(guid) + subject = rdflib.URIRef(guid) # check node existence - if (guid, rdflib.RDF.type, None) in self.graph: + if (subject, rdflib.RDF.type, None) in self._graph: # FIXME: node exists and may have a different type! ignore? raise? report? continue # add node - self._transaction.add((guid, rdflib.RDF.type, rdflib.URIRef(node_type.uri))) + self._transaction.add((subject, rdflib.RDF.type, rdflib.URIRef(node_type.uri))) def set( self, @@ -218,6 +213,8 @@ class SparqlStore(base.TripleStoreBase): if not node_type <= predicate.domain: raise errors.ConsistencyError(f'{node_type} must be a subclass of {predicate.domain}') # NOTE: predicate.range is in the schema since predicate is in the schema. + # materialize values + values = set(values) # check values if len(values) == 0: return |