aboutsummaryrefslogtreecommitdiffstats
path: root/bsfs
diff options
context:
space:
mode:
authorMatthias Baumgartner <dev@igsor.net>2022-12-18 14:15:18 +0100
committerMatthias Baumgartner <dev@igsor.net>2022-12-18 14:15:18 +0100
commite19c8f9d0818a147832df0945188ea14de9c7690 (patch)
tree6a1e388af7ace081fbe516b0c56e28ff1f1e48db /bsfs
parent58496960926a56149c10d64e01b6df7d048eed0e (diff)
downloadbsfs-e19c8f9d0818a147832df0945188ea14de9c7690.tar.gz
bsfs-e19c8f9d0818a147832df0945188ea14de9c7690.tar.bz2
bsfs-e19c8f9d0818a147832df0945188ea14de9c7690.zip
documentation, types, and style fixes
Diffstat (limited to 'bsfs')
-rw-r--r--bsfs/graph/ac/base.py6
-rw-r--r--bsfs/graph/ac/null.py3
-rw-r--r--bsfs/graph/graph.py17
-rw-r--r--bsfs/graph/nodes.py44
-rw-r--r--bsfs/schema/schema.py56
-rw-r--r--bsfs/schema/types.py65
-rw-r--r--bsfs/triple_store/base.py33
-rw-r--r--bsfs/triple_store/sparql.py73
8 files changed, 170 insertions, 127 deletions
diff --git a/bsfs/graph/ac/base.py b/bsfs/graph/ac/base.py
index 80742d7..bc9aeb3 100644
--- a/bsfs/graph/ac/base.py
+++ b/bsfs/graph/ac/base.py
@@ -22,7 +22,11 @@ __all__: typing.Sequence[str] = (
## code ##
class AccessControlBase(abc.ABC):
- """
+ """Defines the interface for access control policies.
+
+ An access control policy governs which actions a user may take to query
+ or to manipulate a graph.
+
"""
# The triple store backend.
diff --git a/bsfs/graph/ac/null.py b/bsfs/graph/ac/null.py
index 288a0da..36838bd 100644
--- a/bsfs/graph/ac/null.py
+++ b/bsfs/graph/ac/null.py
@@ -24,8 +24,7 @@ __all__: typing.Sequence[str] = (
## code ##
class NullAC(base.AccessControlBase):
- """
- """
+ """The NULL access control implements a dummy policy that allows any action to any user."""
def is_protected_predicate(self, pred: schema.Predicate) -> bool:
"""Return True if a predicate cannot be modified manually."""
diff --git a/bsfs/graph/graph.py b/bsfs/graph/graph.py
index 4a36ff6..87f7a31 100644
--- a/bsfs/graph/graph.py
+++ b/bsfs/graph/graph.py
@@ -25,8 +25,15 @@ __all__: typing.Sequence[str] = (
## code ##
class Graph():
+ """The Graph class is
+
+ The Graph class provides a convenient interface to query and access a graph.
+ Since it logically builds on the concept of graphs it is easier to
+ navigate than raw triple stores. Naturally, it uses a triple store
+ as *backend*. It also controls actions via access permissions to a *user*.
+
"""
- """
+
# link to the triple storage backend.
_backend: TripleStoreBase
@@ -81,8 +88,14 @@ class Graph():
return self
def nodes(self, node_type: URI, guids: typing.Iterable[URI]) -> _nodes.Nodes:
+ """Return nodes *guids* of type *node_type* as a `bsfs.graph.Nodes` instance.
+
+ Note that the *guids* need not to exist (however, the *node_type* has
+ to be part of the schema). Inexistent guids will be created (using
+ *node_type*) once some data is assigned to them.
+
"""
- node_type = self.schema.node(node_type)
+ type_ = self.schema.node(node_type)
# NOTE: Nodes constructor materializes guids.
return _nodes.Nodes(self._backend, self._user, type_, guids)
diff --git a/bsfs/graph/nodes.py b/bsfs/graph/nodes.py
index 7b0e8f4..c417a0e 100644
--- a/bsfs/graph/nodes.py
+++ b/bsfs/graph/nodes.py
@@ -5,7 +5,6 @@ A copy of the license is provided with the project.
Author: Matthias Baumgartner, 2022
"""
# imports
-import itertools
import time
import typing
@@ -87,34 +86,14 @@ class Nodes():
pred: URI, # FIXME: URI or _schema.Predicate?
value: typing.Any,
) -> 'Nodes':
- """
- """
- try:
- # insert triples
- self.__set(pred, value)
- # save changes
- self._backend.commit()
-
- except (
- errors.PermissionDeniedError, # tried to set a protected predicate (ns.bsm.t_created)
- errors.ConsistencyError, # node types are not in the schema or don't match the predicate
- errors.InstanceError, # guids/values don't have the correct type
- TypeError, # value is supposed to be a Nodes instance
- ValueError, # multiple values passed to unique predicate
- ):
- # revert changes
- self._backend.rollback()
- # notify the client
- raise
-
- return self
+ """Set predicate *pred* to *value*."""
+ return self.set_from_iterable([(pred, value)])
def set_from_iterable(
self,
predicate_values: typing.Iterable[typing.Tuple[URI, typing.Any]], # FIXME: URI or _schema.Predicate?
) -> 'Nodes':
- """
- """
+ """Set mutliple predicate-value pairs at once."""
# TODO: Could group predicate_values by predicate to gain some efficiency
# TODO: ignore errors on some predicates; For now this could leave residual
# data (e.g. some nodes were created, some not).
@@ -137,14 +116,11 @@ class Nodes():
# notify the client
raise
+ # FIXME: How about other errors? Shouldn't I then rollback as well?!
+
return self
- def __set(
- self,
- predicate: URI,
- value: typing.Any,
- #on_error: str = 'ignore', # ignore, rollback
- ):
+ def __set(self, predicate: URI, value: typing.Any):
"""
"""
# get normalized predicate. Raises KeyError if *pred* not in the schema.
@@ -216,11 +192,9 @@ class Nodes():
else:
raise errors.UnreachableError()
- def _ensure_nodes(
- self,
- node_type: _schema.Node,
- guids: typing.Iterable[URI],
- ):
+ def _ensure_nodes(self, node_type: _schema.Node, guids: typing.Iterable[URI]):
+ """
+ """
# check node existence
guids = set(guids)
existing = set(self._backend.exists(node_type, guids))
diff --git a/bsfs/schema/schema.py b/bsfs/schema/schema.py
index b6f37a7..c5d4571 100644
--- a/bsfs/schema/schema.py
+++ b/bsfs/schema/schema.py
@@ -25,11 +25,28 @@ __all__: typing.Sequence[str] = (
## code ##
class Schema():
- """
+ """Graph schema.
+
+ Use `Schema.Empty()` to create a new, empty Schema rather than construct
+ it directly.
+
+ The schema is defined by three sets: Predicates, Nodes, and Literals.
+
+ The Schema class guarantees two properties: completeness and consistency.
+ Completeness means that the schema covers all class that are referred to
+ by any other class in the schema. Consistency means that each class is
+ identified by a unique URI and all classes that use that URI consequently
+ use the same definition.
+
"""
+ # node classes.
_nodes: typing.Dict[URI, types.Node]
+
+ # literal classes.
_literals: typing.Dict[URI, types.Literal]
+
+ # predicate classes.
_predicates: typing.Dict[URI, types.Predicate]
def __init__(
@@ -47,7 +64,8 @@ class Schema():
literals = set(literals)
predicates = set(predicates)
# include parents in predicates set
- predicates |= {par for pred in predicates for par in pred.parents()}
+ # TODO: review type annotations and ignores for python >= 3.11 (parents is _Type but should be typing.Self)
+ predicates |= {par for pred in predicates for par in pred.parents()} # type: ignore [misc]
# include predicate domain in nodes set
nodes |= {pred.domain for pred in predicates}
# include predicate range in nodes and literals sets
@@ -57,8 +75,8 @@ class Schema():
# include parents in nodes and literals sets
# NOTE: Must be done after predicate domain/range was handled
# so that their parents are included as well.
- nodes |= {par for node in nodes for par in node.parents()}
- literals |= {par for lit in literals for par in lit.parents()}
+ nodes |= {par for node in nodes for par in node.parents()} # type: ignore [misc]
+ literals |= {par for lit in literals for par in lit.parents()} # type: ignore [misc]
# assign members
self._nodes = {node.uri: node for node in nodes}
self._literals = {lit.uri: lit for lit in literals}
@@ -153,9 +171,7 @@ class Schema():
return self.diff(other)
def consistent_with(self, other: 'Schema') -> bool:
- """Checks if two schemas have different definitions for the same uri.
- Tests nodes, literals, and predicates.
- """
+ """Checks if two schemas have different predicate, node, or literal definitions for the same uri."""
# check arg
if not isinstance(other, Schema):
raise TypeError(other)
@@ -181,7 +197,10 @@ class Schema():
return True
@classmethod
- def Union(cls, *args: typing.Union['Schema', typing.Iterable['Schema']]) -> 'Schema':
+ def Union( # pylint: disable=invalid-name # capitalized classmethod
+ cls,
+ *args: typing.Union['Schema', typing.Iterable['Schema']]
+ ) -> 'Schema':
"""Combine multiple Schema instances into a single one.
As argument, you can either pass multiple Schema instances, or a single
iterable over Schema instances. Any abc.Iterable will be accepted.
@@ -200,7 +219,7 @@ class Schema():
if isinstance(args[0], cls): # args is sequence of Schema instances
pass
elif len(args) == 1 and isinstance(args[0], abc.Iterable): # args is a single iterable
- args = args[0]
+ args = args[0] # type: ignore [assignment] # we checked and thus know that args[0] is an iterable
else:
raise TypeError(f'expected multiple Schema instances or a single Iterable, found {args}')
@@ -237,25 +256,31 @@ class Schema():
## getters ##
- # FIXME: which of the getters below are actually needed?
+ # FIXME: nodes, predicates, literals could be properties
# FIXME: interchangeability of URI and _Type?!
def has_node(self, node: URI) -> bool:
+ """Return True if a Node with URI *node* is part of the schema."""
return node in self._nodes
def has_literal(self, lit: URI) -> bool:
+ """Return True if a Literal with URI *lit* is part of the schema."""
return lit in self._literals
def has_predicate(self, pred: URI) -> bool:
+ """Return True if a Predicate with URI *pred* is part of the schema."""
return pred in self._predicates
- def nodes(self) -> typing.Iterator[types.Node]: # FIXME: type annotation
+ def nodes(self) -> typing.Iterable[types.Node]:
+ """Return an iterator over Node classes."""
return self._nodes.values()
- def literals(self) -> typing.Iterator[types.Literal]: # FIXME: type annotation
+ def literals(self) -> typing.Iterable[types.Literal]:
+ """Return an iterator over Literal classes."""
return self._literals.values()
- def predicates(self) -> typing.Iterator[types.Predicate]: # FIXME: type annotation
+ def predicates(self) -> typing.Iterable[types.Predicate]:
+ """Return an iterator over Predicate classes."""
return self._predicates.values()
def node(self, uri: URI) -> types.Node:
@@ -275,7 +300,8 @@ class Schema():
@classmethod
- def Empty(cls) -> 'Schema':
+ def Empty(cls) -> 'Schema': # pylint: disable=invalid-name # capitalized classmethod
+ """Return a minimal Schema."""
node = types.Node(ns.bsfs.Node, None)
literal = types.Literal(ns.bsfs.Literal, None)
predicate = types.Predicate(
@@ -289,7 +315,7 @@ class Schema():
@classmethod
- def from_string(cls, schema: str) -> 'Schema':
+ def from_string(cls, schema: str) -> 'Schema': # pylint: disable=invalid-name # capitalized classmethod
"""Load and return a Schema from a string."""
# parse string into rdf graph
graph = rdflib.Graph()
diff --git a/bsfs/schema/types.py b/bsfs/schema/types.py
index 6e257e3..54a7e99 100644
--- a/bsfs/schema/types.py
+++ b/bsfs/schema/types.py
@@ -93,7 +93,7 @@ class _Type():
uri: URI
# parent's class uris.
- parent: typing.Optional['_Type']
+ parent: typing.Optional['_Type'] # TODO: for python >=3.11: use typing.Self
def __init__(
self,
@@ -123,63 +123,70 @@ class _Type():
def __hash__(self) -> int:
return hash((type(self), self.uri, self.parent))
+ # NOTE: For equality and order functions (lt, gt, le, ge) we explicitly want type equality!
+ # Consider the statements below, with class Vehicle(_Type) and class TwoWheel(Vehicle):
+ # * Vehicle('foo', None) == TwoWheel('foo', None): Instances of different types cannot be equivalent.
+ # * Vehicle('foo', None) <= TwoWheel('foo', None): Cannot compare the different types Vehicles and TwoWheel.
+
def __eq__(self, other: typing.Any) -> bool:
"""Return True iff *self* is equivalent to *other*."""
- return type(self) == type(other) \
+ # pylint: disable=unidiomatic-typecheck
+ return type(other) is type(self) \
and self.uri == other.uri \
and self.parent == other.parent
+
def __lt__(self, other: typing.Any) -> bool:
"""Return True iff *self* is a true subclass of *other*."""
- if not type(self) == type(other): # type mismatch
+ if not type(self) is type(other): # type mismatch # pylint: disable=unidiomatic-typecheck
return NotImplemented
- elif self.uri == other.uri: # equivalence
+ if self.uri == other.uri: # equivalence
return False
- elif self in other.parents(): # superclass
+ if self in other.parents(): # superclass
return False
- elif other in self.parents(): # subclass
+ if other in self.parents(): # subclass
return True
- else: # not related
- return False
+ # not related
+ return False
def __le__(self, other: typing.Any) -> bool:
"""Return True iff *self* is equivalent or a subclass of *other*."""
- if not type(self) == type(other): # type mismatch
+ if not type(self) is type(other): # type mismatch # pylint: disable=unidiomatic-typecheck
return NotImplemented
- elif self.uri == other.uri: # equivalence
+ if self.uri == other.uri: # equivalence
return True
- elif self in other.parents(): # superclass
+ if self in other.parents(): # superclass
return False
- elif other in self.parents(): # subclass
+ if other in self.parents(): # subclass
return True
- else: # not related
- return False
+ # not related
+ return False
def __gt__(self, other: typing.Any) -> bool:
"""Return True iff *self* is a true superclass of *other*."""
- if not type(self) == type(other): # type mismatch
+ if not type(self) is type(other): # type mismatch # pylint: disable=unidiomatic-typecheck
return NotImplemented
- elif self.uri == other.uri: # equivalence
+ if self.uri == other.uri: # equivalence
return False
- elif self in other.parents(): # superclass
+ if self in other.parents(): # superclass
return True
- elif other in self.parents(): # subclass
- return False
- else: # not related
+ if other in self.parents(): # subclass
return False
+ # not related
+ return False
def __ge__(self, other: typing.Any) -> bool:
"""Return True iff *self* is eqiuvalent or a superclass of *other*."""
- if not type(self) == type(other): # type mismatch
+ if not type(self) is type(other): # type mismatch # pylint: disable=unidiomatic-typecheck
return NotImplemented
- elif self.uri == other.uri: # equivalence
+ if self.uri == other.uri: # equivalence
return True
- elif self in other.parents(): # superclass
+ if self in other.parents(): # superclass
return True
- elif other in self.parents(): # subclass
- return False
- else: # not related
+ if other in self.parents(): # subclass
return False
+ # not related
+ return False
class _Vertex(_Type):
@@ -216,10 +223,10 @@ class Predicate(_Type):
self,
# Type members
uri: URI,
- parent: 'Predicate',
+ parent: typing.Optional['Predicate'],
# Predicate members
domain: Node,
- range: typing.Optional[typing.Union[Node, Literal]],
+ range: typing.Optional[typing.Union[Node, Literal]], # pylint: disable=redefined-builtin
unique: bool,
):
# check arguments
@@ -246,7 +253,7 @@ class Predicate(_Type):
self,
uri: URI,
domain: typing.Optional[Node] = None,
- range: typing.Optional[_Vertex] = None,
+ range: typing.Optional[_Vertex] = None, # pylint: disable=redefined-builtin
unique: typing.Optional[bool] = None,
**kwargs,
):
diff --git a/bsfs/triple_store/base.py b/bsfs/triple_store/base.py
index 942a16b..6561262 100644
--- a/bsfs/triple_store/base.py
+++ b/bsfs/triple_store/base.py
@@ -21,7 +21,21 @@ __all__: typing.Sequence[str] = (
## code ##
class TripleStoreBase(abc.ABC):
- """
+ """TripleStore base class.
+
+ Use the `Open` method to create a new instance and to initialize
+ the required structures.
+
+ Triple stores express a graph via its (subject, predicate, object) triples.
+ They provides methods to add and remove triples, and to query the storage
+ for given graph structures. The subject is always a node in the graph,
+ whereas nodes are identifiable by a unique URI. Note that blank nodes
+ (without an explicit URI) are not supported. The object can be another
+ Node or a Literal value. The relation between a subject and an object
+ is expressed via a Predicate. The graph structures are governed by a
+ schema that defines which Node, Literal, and Predicate classes exist
+ and how they can interact (see `bsfs.schema.Schema`).
+
"""
# storage's URI. None implies a temporary location.
@@ -99,9 +113,8 @@ class TripleStoreBase(abc.ABC):
self,
node_type: _schema.Node,
guids: typing.Iterable[URI],
- ):
- """
- """
+ ) -> typing.Iterable[URI]:
+ """Return those *guids* that exist and have type *node_type* or a subclass thereof."""
@abc.abstractmethod
def create(
@@ -119,7 +132,17 @@ class TripleStoreBase(abc.ABC):
predicate: _schema.Predicate,
values: typing.Iterable[typing.Any],
):
- """
+ """Add triples to the graph.
+
+ It is assumed that all of *guids* exist and have *node_type*.
+ This method adds a triple (guid, predicate, value) for every guid in
+ *guids* and each value in *values* (cartesian product). Note that
+ *values* must have length one for unique predicates, and that
+ currently existing values will be overwritten in this case.
+ It also verifies that all symbols are part of the schema and that
+ the *predicate* matches the *node_type*.
+ Raises `bsfs.errors.ConsistencyError` if these assumptions are violated.
+
"""
## EOF ##
diff --git a/bsfs/triple_store/sparql.py b/bsfs/triple_store/sparql.py
index fc161b3..23059f7 100644
--- a/bsfs/triple_store/sparql.py
+++ b/bsfs/triple_store/sparql.py
@@ -28,33 +28,52 @@ __all__: typing.Sequence[str] = (
class _Transaction():
"""Lightweight rdflib transactions for in-memory databases."""
- def __init__(self, graph):
+ # graph instance.
+ _graph: rdflib.Graph
+
+ # current log of added triples.
+ _added: typing.List[typing.Any]
+
+ # current log of removed triples.
+ _removed: typing.List[typing.Any]
+
+ def __init__(self, graph: rdflib.Graph):
self._graph = graph
- self.commit() # initialize
+ # initialize internal structures
+ self.commit()
def commit(self):
+ """Commit temporary changes."""
self._added = []
self._removed = []
def rollback(self):
+ """Undo changes since the last commit."""
for triple in self._added:
self._graph.remove(triple)
for triple in self._removed:
self._graph.add(triple)
- def add(self, triple):
+ def add(self, triple: typing.Any):
+ """Add a triple to the graph."""
if triple not in self._graph:
self._added.append(triple)
self._graph.add(triple)
- def remove(self, triple):
+ def remove(self, triple: typing.Any):
+ """Remove a triple from the graph."""
if triple in self._graph:
self._removed.append(triple)
self._graph.remove(triple)
class SparqlStore(base.TripleStoreBase):
- """
+ """Sparql-based triple store.
+
+ The sparql triple store uses a third-party backend
+ (currently rdflib) to store triples and manages them via
+ the Sparql query language.
+
"""
# The rdflib graph.
@@ -89,27 +108,7 @@ class SparqlStore(base.TripleStoreBase):
return self._schema
@schema.setter
- def schema(self, schema: _schema.Schema):
- """Migrate to new schema by adding or removing class definitions.
-
- Commits before and after the migration.
-
- Instances of removed classes will be deleted irreversably.
- Note that modifying an existing class is not directly supported.
- Also, it is generally discouraged, since changing definitions may
- lead to inconsistencies across multiple clients in a distributed
- setting. Instead, consider introducing a new class under its own
- uri. Such a migration would look as follows:
-
- 1. Add new class definitions.
- 2. Create instances of the new classes and copy relevant data.
- 3. Remove the old definitions.
-
- To modify a class, i.e., re-use a previous uri with a new
- class definition, you would have to migrate via temporary
- class definitions, and thus repeat the above procedure two times.
-
- """
+ def schema(self, schema: bsc.Schema):
# check args: Schema instanace
if not isinstance(schema, bsc.Schema):
raise TypeError(schema)
@@ -162,16 +161,14 @@ class SparqlStore(base.TripleStoreBase):
subject_types = list(self._graph.objects(rdflib.URIRef(subject), rdflib.RDF.type))
if len(subject_types) == 0:
return False
- elif len(subject_types) == 1:
- node = self.schema.node(URI(subject_types[0]))
+ if len(subject_types) == 1:
+ node = self.schema.node(URI(subject_types[0])) # type: ignore [arg-type] # URI is a subtype of str
if node == node_type:
return True
- elif node_type in node.parents():
+ if node_type in node.parents():
return True
- else:
- return False
- else:
- raise errors.UnreachableError()
+ return False
+ raise errors.UnreachableError()
def exists(
self,
@@ -187,20 +184,18 @@ class SparqlStore(base.TripleStoreBase):
node_type: bsc.Node,
guids: typing.Iterable[URI],
):
- """
- """
# check node_type
if node_type not in self.schema.nodes():
raise errors.ConsistencyError(f'{node_type} is not defined in the schema')
# check and create guids
for guid in guids:
- guid = rdflib.URIRef(guid)
+ subject = rdflib.URIRef(guid)
# check node existence
- if (guid, rdflib.RDF.type, None) in self.graph:
+ if (subject, rdflib.RDF.type, None) in self._graph:
# FIXME: node exists and may have a different type! ignore? raise? report?
continue
# add node
- self._transaction.add((guid, rdflib.RDF.type, rdflib.URIRef(node_type.uri)))
+ self._transaction.add((subject, rdflib.RDF.type, rdflib.URIRef(node_type.uri)))
def set(
self,
@@ -218,6 +213,8 @@ class SparqlStore(base.TripleStoreBase):
if not node_type <= predicate.domain:
raise errors.ConsistencyError(f'{node_type} must be a subclass of {predicate.domain}')
# NOTE: predicate.range is in the schema since predicate is in the schema.
+ # materialize values
+ values = set(values)
# check values
if len(values) == 0:
return