diff options
author | Matthias Baumgartner <dev@igsor.net> | 2022-12-18 14:15:18 +0100 |
---|---|---|
committer | Matthias Baumgartner <dev@igsor.net> | 2022-12-18 14:15:18 +0100 |
commit | e19c8f9d0818a147832df0945188ea14de9c7690 (patch) | |
tree | 6a1e388af7ace081fbe516b0c56e28ff1f1e48db /bsfs | |
parent | 58496960926a56149c10d64e01b6df7d048eed0e (diff) | |
download | bsfs-e19c8f9d0818a147832df0945188ea14de9c7690.tar.gz bsfs-e19c8f9d0818a147832df0945188ea14de9c7690.tar.bz2 bsfs-e19c8f9d0818a147832df0945188ea14de9c7690.zip |
documentation, types, and style fixes
Diffstat (limited to 'bsfs')
-rw-r--r-- | bsfs/graph/ac/base.py | 6 | ||||
-rw-r--r-- | bsfs/graph/ac/null.py | 3 | ||||
-rw-r--r-- | bsfs/graph/graph.py | 17 | ||||
-rw-r--r-- | bsfs/graph/nodes.py | 44 | ||||
-rw-r--r-- | bsfs/schema/schema.py | 56 | ||||
-rw-r--r-- | bsfs/schema/types.py | 65 | ||||
-rw-r--r-- | bsfs/triple_store/base.py | 33 | ||||
-rw-r--r-- | bsfs/triple_store/sparql.py | 73 |
8 files changed, 170 insertions, 127 deletions
diff --git a/bsfs/graph/ac/base.py b/bsfs/graph/ac/base.py index 80742d7..bc9aeb3 100644 --- a/bsfs/graph/ac/base.py +++ b/bsfs/graph/ac/base.py @@ -22,7 +22,11 @@ __all__: typing.Sequence[str] = ( ## code ## class AccessControlBase(abc.ABC): - """ + """Defines the interface for access control policies. + + An access control policy governs which actions a user may take to query + or to manipulate a graph. + """ # The triple store backend. diff --git a/bsfs/graph/ac/null.py b/bsfs/graph/ac/null.py index 288a0da..36838bd 100644 --- a/bsfs/graph/ac/null.py +++ b/bsfs/graph/ac/null.py @@ -24,8 +24,7 @@ __all__: typing.Sequence[str] = ( ## code ## class NullAC(base.AccessControlBase): - """ - """ + """The NULL access control implements a dummy policy that allows any action to any user.""" def is_protected_predicate(self, pred: schema.Predicate) -> bool: """Return True if a predicate cannot be modified manually.""" diff --git a/bsfs/graph/graph.py b/bsfs/graph/graph.py index 4a36ff6..87f7a31 100644 --- a/bsfs/graph/graph.py +++ b/bsfs/graph/graph.py @@ -25,8 +25,15 @@ __all__: typing.Sequence[str] = ( ## code ## class Graph(): + """The Graph class is + + The Graph class provides a convenient interface to query and access a graph. + Since it logically builds on the concept of graphs it is easier to + navigate than raw triple stores. Naturally, it uses a triple store + as *backend*. It also controls actions via access permissions to a *user*. + """ - """ + # link to the triple storage backend. _backend: TripleStoreBase @@ -81,8 +88,14 @@ class Graph(): return self def nodes(self, node_type: URI, guids: typing.Iterable[URI]) -> _nodes.Nodes: + """Return nodes *guids* of type *node_type* as a `bsfs.graph.Nodes` instance. + + Note that the *guids* need not to exist (however, the *node_type* has + to be part of the schema). Inexistent guids will be created (using + *node_type*) once some data is assigned to them. + """ - node_type = self.schema.node(node_type) + type_ = self.schema.node(node_type) # NOTE: Nodes constructor materializes guids. return _nodes.Nodes(self._backend, self._user, type_, guids) diff --git a/bsfs/graph/nodes.py b/bsfs/graph/nodes.py index 7b0e8f4..c417a0e 100644 --- a/bsfs/graph/nodes.py +++ b/bsfs/graph/nodes.py @@ -5,7 +5,6 @@ A copy of the license is provided with the project. Author: Matthias Baumgartner, 2022 """ # imports -import itertools import time import typing @@ -87,34 +86,14 @@ class Nodes(): pred: URI, # FIXME: URI or _schema.Predicate? value: typing.Any, ) -> 'Nodes': - """ - """ - try: - # insert triples - self.__set(pred, value) - # save changes - self._backend.commit() - - except ( - errors.PermissionDeniedError, # tried to set a protected predicate (ns.bsm.t_created) - errors.ConsistencyError, # node types are not in the schema or don't match the predicate - errors.InstanceError, # guids/values don't have the correct type - TypeError, # value is supposed to be a Nodes instance - ValueError, # multiple values passed to unique predicate - ): - # revert changes - self._backend.rollback() - # notify the client - raise - - return self + """Set predicate *pred* to *value*.""" + return self.set_from_iterable([(pred, value)]) def set_from_iterable( self, predicate_values: typing.Iterable[typing.Tuple[URI, typing.Any]], # FIXME: URI or _schema.Predicate? ) -> 'Nodes': - """ - """ + """Set mutliple predicate-value pairs at once.""" # TODO: Could group predicate_values by predicate to gain some efficiency # TODO: ignore errors on some predicates; For now this could leave residual # data (e.g. some nodes were created, some not). @@ -137,14 +116,11 @@ class Nodes(): # notify the client raise + # FIXME: How about other errors? Shouldn't I then rollback as well?! + return self - def __set( - self, - predicate: URI, - value: typing.Any, - #on_error: str = 'ignore', # ignore, rollback - ): + def __set(self, predicate: URI, value: typing.Any): """ """ # get normalized predicate. Raises KeyError if *pred* not in the schema. @@ -216,11 +192,9 @@ class Nodes(): else: raise errors.UnreachableError() - def _ensure_nodes( - self, - node_type: _schema.Node, - guids: typing.Iterable[URI], - ): + def _ensure_nodes(self, node_type: _schema.Node, guids: typing.Iterable[URI]): + """ + """ # check node existence guids = set(guids) existing = set(self._backend.exists(node_type, guids)) diff --git a/bsfs/schema/schema.py b/bsfs/schema/schema.py index b6f37a7..c5d4571 100644 --- a/bsfs/schema/schema.py +++ b/bsfs/schema/schema.py @@ -25,11 +25,28 @@ __all__: typing.Sequence[str] = ( ## code ## class Schema(): - """ + """Graph schema. + + Use `Schema.Empty()` to create a new, empty Schema rather than construct + it directly. + + The schema is defined by three sets: Predicates, Nodes, and Literals. + + The Schema class guarantees two properties: completeness and consistency. + Completeness means that the schema covers all class that are referred to + by any other class in the schema. Consistency means that each class is + identified by a unique URI and all classes that use that URI consequently + use the same definition. + """ + # node classes. _nodes: typing.Dict[URI, types.Node] + + # literal classes. _literals: typing.Dict[URI, types.Literal] + + # predicate classes. _predicates: typing.Dict[URI, types.Predicate] def __init__( @@ -47,7 +64,8 @@ class Schema(): literals = set(literals) predicates = set(predicates) # include parents in predicates set - predicates |= {par for pred in predicates for par in pred.parents()} + # TODO: review type annotations and ignores for python >= 3.11 (parents is _Type but should be typing.Self) + predicates |= {par for pred in predicates for par in pred.parents()} # type: ignore [misc] # include predicate domain in nodes set nodes |= {pred.domain for pred in predicates} # include predicate range in nodes and literals sets @@ -57,8 +75,8 @@ class Schema(): # include parents in nodes and literals sets # NOTE: Must be done after predicate domain/range was handled # so that their parents are included as well. - nodes |= {par for node in nodes for par in node.parents()} - literals |= {par for lit in literals for par in lit.parents()} + nodes |= {par for node in nodes for par in node.parents()} # type: ignore [misc] + literals |= {par for lit in literals for par in lit.parents()} # type: ignore [misc] # assign members self._nodes = {node.uri: node for node in nodes} self._literals = {lit.uri: lit for lit in literals} @@ -153,9 +171,7 @@ class Schema(): return self.diff(other) def consistent_with(self, other: 'Schema') -> bool: - """Checks if two schemas have different definitions for the same uri. - Tests nodes, literals, and predicates. - """ + """Checks if two schemas have different predicate, node, or literal definitions for the same uri.""" # check arg if not isinstance(other, Schema): raise TypeError(other) @@ -181,7 +197,10 @@ class Schema(): return True @classmethod - def Union(cls, *args: typing.Union['Schema', typing.Iterable['Schema']]) -> 'Schema': + def Union( # pylint: disable=invalid-name # capitalized classmethod + cls, + *args: typing.Union['Schema', typing.Iterable['Schema']] + ) -> 'Schema': """Combine multiple Schema instances into a single one. As argument, you can either pass multiple Schema instances, or a single iterable over Schema instances. Any abc.Iterable will be accepted. @@ -200,7 +219,7 @@ class Schema(): if isinstance(args[0], cls): # args is sequence of Schema instances pass elif len(args) == 1 and isinstance(args[0], abc.Iterable): # args is a single iterable - args = args[0] + args = args[0] # type: ignore [assignment] # we checked and thus know that args[0] is an iterable else: raise TypeError(f'expected multiple Schema instances or a single Iterable, found {args}') @@ -237,25 +256,31 @@ class Schema(): ## getters ## - # FIXME: which of the getters below are actually needed? + # FIXME: nodes, predicates, literals could be properties # FIXME: interchangeability of URI and _Type?! def has_node(self, node: URI) -> bool: + """Return True if a Node with URI *node* is part of the schema.""" return node in self._nodes def has_literal(self, lit: URI) -> bool: + """Return True if a Literal with URI *lit* is part of the schema.""" return lit in self._literals def has_predicate(self, pred: URI) -> bool: + """Return True if a Predicate with URI *pred* is part of the schema.""" return pred in self._predicates - def nodes(self) -> typing.Iterator[types.Node]: # FIXME: type annotation + def nodes(self) -> typing.Iterable[types.Node]: + """Return an iterator over Node classes.""" return self._nodes.values() - def literals(self) -> typing.Iterator[types.Literal]: # FIXME: type annotation + def literals(self) -> typing.Iterable[types.Literal]: + """Return an iterator over Literal classes.""" return self._literals.values() - def predicates(self) -> typing.Iterator[types.Predicate]: # FIXME: type annotation + def predicates(self) -> typing.Iterable[types.Predicate]: + """Return an iterator over Predicate classes.""" return self._predicates.values() def node(self, uri: URI) -> types.Node: @@ -275,7 +300,8 @@ class Schema(): @classmethod - def Empty(cls) -> 'Schema': + def Empty(cls) -> 'Schema': # pylint: disable=invalid-name # capitalized classmethod + """Return a minimal Schema.""" node = types.Node(ns.bsfs.Node, None) literal = types.Literal(ns.bsfs.Literal, None) predicate = types.Predicate( @@ -289,7 +315,7 @@ class Schema(): @classmethod - def from_string(cls, schema: str) -> 'Schema': + def from_string(cls, schema: str) -> 'Schema': # pylint: disable=invalid-name # capitalized classmethod """Load and return a Schema from a string.""" # parse string into rdf graph graph = rdflib.Graph() diff --git a/bsfs/schema/types.py b/bsfs/schema/types.py index 6e257e3..54a7e99 100644 --- a/bsfs/schema/types.py +++ b/bsfs/schema/types.py @@ -93,7 +93,7 @@ class _Type(): uri: URI # parent's class uris. - parent: typing.Optional['_Type'] + parent: typing.Optional['_Type'] # TODO: for python >=3.11: use typing.Self def __init__( self, @@ -123,63 +123,70 @@ class _Type(): def __hash__(self) -> int: return hash((type(self), self.uri, self.parent)) + # NOTE: For equality and order functions (lt, gt, le, ge) we explicitly want type equality! + # Consider the statements below, with class Vehicle(_Type) and class TwoWheel(Vehicle): + # * Vehicle('foo', None) == TwoWheel('foo', None): Instances of different types cannot be equivalent. + # * Vehicle('foo', None) <= TwoWheel('foo', None): Cannot compare the different types Vehicles and TwoWheel. + def __eq__(self, other: typing.Any) -> bool: """Return True iff *self* is equivalent to *other*.""" - return type(self) == type(other) \ + # pylint: disable=unidiomatic-typecheck + return type(other) is type(self) \ and self.uri == other.uri \ and self.parent == other.parent + def __lt__(self, other: typing.Any) -> bool: """Return True iff *self* is a true subclass of *other*.""" - if not type(self) == type(other): # type mismatch + if not type(self) is type(other): # type mismatch # pylint: disable=unidiomatic-typecheck return NotImplemented - elif self.uri == other.uri: # equivalence + if self.uri == other.uri: # equivalence return False - elif self in other.parents(): # superclass + if self in other.parents(): # superclass return False - elif other in self.parents(): # subclass + if other in self.parents(): # subclass return True - else: # not related - return False + # not related + return False def __le__(self, other: typing.Any) -> bool: """Return True iff *self* is equivalent or a subclass of *other*.""" - if not type(self) == type(other): # type mismatch + if not type(self) is type(other): # type mismatch # pylint: disable=unidiomatic-typecheck return NotImplemented - elif self.uri == other.uri: # equivalence + if self.uri == other.uri: # equivalence return True - elif self in other.parents(): # superclass + if self in other.parents(): # superclass return False - elif other in self.parents(): # subclass + if other in self.parents(): # subclass return True - else: # not related - return False + # not related + return False def __gt__(self, other: typing.Any) -> bool: """Return True iff *self* is a true superclass of *other*.""" - if not type(self) == type(other): # type mismatch + if not type(self) is type(other): # type mismatch # pylint: disable=unidiomatic-typecheck return NotImplemented - elif self.uri == other.uri: # equivalence + if self.uri == other.uri: # equivalence return False - elif self in other.parents(): # superclass + if self in other.parents(): # superclass return True - elif other in self.parents(): # subclass - return False - else: # not related + if other in self.parents(): # subclass return False + # not related + return False def __ge__(self, other: typing.Any) -> bool: """Return True iff *self* is eqiuvalent or a superclass of *other*.""" - if not type(self) == type(other): # type mismatch + if not type(self) is type(other): # type mismatch # pylint: disable=unidiomatic-typecheck return NotImplemented - elif self.uri == other.uri: # equivalence + if self.uri == other.uri: # equivalence return True - elif self in other.parents(): # superclass + if self in other.parents(): # superclass return True - elif other in self.parents(): # subclass - return False - else: # not related + if other in self.parents(): # subclass return False + # not related + return False class _Vertex(_Type): @@ -216,10 +223,10 @@ class Predicate(_Type): self, # Type members uri: URI, - parent: 'Predicate', + parent: typing.Optional['Predicate'], # Predicate members domain: Node, - range: typing.Optional[typing.Union[Node, Literal]], + range: typing.Optional[typing.Union[Node, Literal]], # pylint: disable=redefined-builtin unique: bool, ): # check arguments @@ -246,7 +253,7 @@ class Predicate(_Type): self, uri: URI, domain: typing.Optional[Node] = None, - range: typing.Optional[_Vertex] = None, + range: typing.Optional[_Vertex] = None, # pylint: disable=redefined-builtin unique: typing.Optional[bool] = None, **kwargs, ): diff --git a/bsfs/triple_store/base.py b/bsfs/triple_store/base.py index 942a16b..6561262 100644 --- a/bsfs/triple_store/base.py +++ b/bsfs/triple_store/base.py @@ -21,7 +21,21 @@ __all__: typing.Sequence[str] = ( ## code ## class TripleStoreBase(abc.ABC): - """ + """TripleStore base class. + + Use the `Open` method to create a new instance and to initialize + the required structures. + + Triple stores express a graph via its (subject, predicate, object) triples. + They provides methods to add and remove triples, and to query the storage + for given graph structures. The subject is always a node in the graph, + whereas nodes are identifiable by a unique URI. Note that blank nodes + (without an explicit URI) are not supported. The object can be another + Node or a Literal value. The relation between a subject and an object + is expressed via a Predicate. The graph structures are governed by a + schema that defines which Node, Literal, and Predicate classes exist + and how they can interact (see `bsfs.schema.Schema`). + """ # storage's URI. None implies a temporary location. @@ -99,9 +113,8 @@ class TripleStoreBase(abc.ABC): self, node_type: _schema.Node, guids: typing.Iterable[URI], - ): - """ - """ + ) -> typing.Iterable[URI]: + """Return those *guids* that exist and have type *node_type* or a subclass thereof.""" @abc.abstractmethod def create( @@ -119,7 +132,17 @@ class TripleStoreBase(abc.ABC): predicate: _schema.Predicate, values: typing.Iterable[typing.Any], ): - """ + """Add triples to the graph. + + It is assumed that all of *guids* exist and have *node_type*. + This method adds a triple (guid, predicate, value) for every guid in + *guids* and each value in *values* (cartesian product). Note that + *values* must have length one for unique predicates, and that + currently existing values will be overwritten in this case. + It also verifies that all symbols are part of the schema and that + the *predicate* matches the *node_type*. + Raises `bsfs.errors.ConsistencyError` if these assumptions are violated. + """ ## EOF ## diff --git a/bsfs/triple_store/sparql.py b/bsfs/triple_store/sparql.py index fc161b3..23059f7 100644 --- a/bsfs/triple_store/sparql.py +++ b/bsfs/triple_store/sparql.py @@ -28,33 +28,52 @@ __all__: typing.Sequence[str] = ( class _Transaction(): """Lightweight rdflib transactions for in-memory databases.""" - def __init__(self, graph): + # graph instance. + _graph: rdflib.Graph + + # current log of added triples. + _added: typing.List[typing.Any] + + # current log of removed triples. + _removed: typing.List[typing.Any] + + def __init__(self, graph: rdflib.Graph): self._graph = graph - self.commit() # initialize + # initialize internal structures + self.commit() def commit(self): + """Commit temporary changes.""" self._added = [] self._removed = [] def rollback(self): + """Undo changes since the last commit.""" for triple in self._added: self._graph.remove(triple) for triple in self._removed: self._graph.add(triple) - def add(self, triple): + def add(self, triple: typing.Any): + """Add a triple to the graph.""" if triple not in self._graph: self._added.append(triple) self._graph.add(triple) - def remove(self, triple): + def remove(self, triple: typing.Any): + """Remove a triple from the graph.""" if triple in self._graph: self._removed.append(triple) self._graph.remove(triple) class SparqlStore(base.TripleStoreBase): - """ + """Sparql-based triple store. + + The sparql triple store uses a third-party backend + (currently rdflib) to store triples and manages them via + the Sparql query language. + """ # The rdflib graph. @@ -89,27 +108,7 @@ class SparqlStore(base.TripleStoreBase): return self._schema @schema.setter - def schema(self, schema: _schema.Schema): - """Migrate to new schema by adding or removing class definitions. - - Commits before and after the migration. - - Instances of removed classes will be deleted irreversably. - Note that modifying an existing class is not directly supported. - Also, it is generally discouraged, since changing definitions may - lead to inconsistencies across multiple clients in a distributed - setting. Instead, consider introducing a new class under its own - uri. Such a migration would look as follows: - - 1. Add new class definitions. - 2. Create instances of the new classes and copy relevant data. - 3. Remove the old definitions. - - To modify a class, i.e., re-use a previous uri with a new - class definition, you would have to migrate via temporary - class definitions, and thus repeat the above procedure two times. - - """ + def schema(self, schema: bsc.Schema): # check args: Schema instanace if not isinstance(schema, bsc.Schema): raise TypeError(schema) @@ -162,16 +161,14 @@ class SparqlStore(base.TripleStoreBase): subject_types = list(self._graph.objects(rdflib.URIRef(subject), rdflib.RDF.type)) if len(subject_types) == 0: return False - elif len(subject_types) == 1: - node = self.schema.node(URI(subject_types[0])) + if len(subject_types) == 1: + node = self.schema.node(URI(subject_types[0])) # type: ignore [arg-type] # URI is a subtype of str if node == node_type: return True - elif node_type in node.parents(): + if node_type in node.parents(): return True - else: - return False - else: - raise errors.UnreachableError() + return False + raise errors.UnreachableError() def exists( self, @@ -187,20 +184,18 @@ class SparqlStore(base.TripleStoreBase): node_type: bsc.Node, guids: typing.Iterable[URI], ): - """ - """ # check node_type if node_type not in self.schema.nodes(): raise errors.ConsistencyError(f'{node_type} is not defined in the schema') # check and create guids for guid in guids: - guid = rdflib.URIRef(guid) + subject = rdflib.URIRef(guid) # check node existence - if (guid, rdflib.RDF.type, None) in self.graph: + if (subject, rdflib.RDF.type, None) in self._graph: # FIXME: node exists and may have a different type! ignore? raise? report? continue # add node - self._transaction.add((guid, rdflib.RDF.type, rdflib.URIRef(node_type.uri))) + self._transaction.add((subject, rdflib.RDF.type, rdflib.URIRef(node_type.uri))) def set( self, @@ -218,6 +213,8 @@ class SparqlStore(base.TripleStoreBase): if not node_type <= predicate.domain: raise errors.ConsistencyError(f'{node_type} must be a subclass of {predicate.domain}') # NOTE: predicate.range is in the schema since predicate is in the schema. + # materialize values + values = set(values) # check values if len(values) == 0: return |