aboutsummaryrefslogtreecommitdiffstats
path: root/bsfs
diff options
context:
space:
mode:
authorMatthias Baumgartner <dev@igsor.net>2022-12-08 16:34:13 +0100
committerMatthias Baumgartner <dev@igsor.net>2022-12-08 16:34:13 +0100
commit7eb61d117a995b076d36c55d2c7c268665360813 (patch)
treec47a0fe523349dd3a14c1501281d42347c3b8954 /bsfs
parent729f025f392d45b621941da9d052834e0d81506e (diff)
downloadbsfs-7eb61d117a995b076d36c55d2c7c268665360813.tar.gz
bsfs-7eb61d117a995b076d36c55d2c7c268665360813.tar.bz2
bsfs-7eb61d117a995b076d36c55d2c7c268665360813.zip
schema
Diffstat (limited to 'bsfs')
-rw-r--r--bsfs/schema/__init__.py24
-rw-r--r--bsfs/schema/schema.py325
-rw-r--r--bsfs/schema/types.py269
3 files changed, 618 insertions, 0 deletions
diff --git a/bsfs/schema/__init__.py b/bsfs/schema/__init__.py
new file mode 100644
index 0000000..ce381ec
--- /dev/null
+++ b/bsfs/schema/__init__.py
@@ -0,0 +1,24 @@
+"""
+
+Part of the BlackStar filesystem (bsfs) module.
+A copy of the license is provided with the project.
+Author: Matthias Baumgartner, 2022
+"""
+# imports
+import typing
+
+# inner-module imports
+#from . import types
+from .schema import Schema
+from .types import Literal, Node, Predicate
+
+# exports
+__all__: typing.Sequence[str] = (
+ 'Literal',
+ 'Node',
+ 'Predicate',
+ 'Schema',
+ #'types',
+ )
+
+## EOF ##
diff --git a/bsfs/schema/schema.py b/bsfs/schema/schema.py
new file mode 100644
index 0000000..0e053c0
--- /dev/null
+++ b/bsfs/schema/schema.py
@@ -0,0 +1,325 @@
+"""
+
+Part of the BlackStar filesystem (bsfs) module.
+A copy of the license is provided with the project.
+Author: Matthias Baumgartner, 2022
+"""
+# imports
+from collections import abc, namedtuple
+import typing
+import rdflib
+
+# bsfs imports
+from bsfs.namespace import ns
+from bsfs.utils import errors, URI, typename
+
+# inner-module imports
+from . import types
+
+# exports
+__all__: typing.Sequence[str] = (
+ 'Schema',
+ )
+
+
+## code ##
+
+class Schema():
+ """
+ """
+
+ _nodes: typing.Dict[URI, types.Node]
+ _literals: typing.Dict[URI, types.Literal]
+ _predicates: typing.Dict[URI, types.Predicate]
+
+ def __init__(
+ self,
+ predicates: typing.Iterable[types.Predicate],
+ nodes: typing.Optional[typing.Iterable[types.Node]] = None,
+ literals: typing.Optional[typing.Iterable[types.Literal]] = None,
+ ):
+ # materialize arguments
+ if nodes is None:
+ nodes = set()
+ if literals is None:
+ literals = set()
+ nodes = set(nodes)
+ literals = set(literals)
+ predicates = set(predicates)
+ # include parents in predicates set
+ predicates |= {par for pred in predicates for par in pred.parents()}
+ # include predicate domain in nodes set
+ nodes |= {pred.domain for pred in predicates}
+ # include predicate range in nodes and literals sets
+ prange = {pred.range for pred in predicates if pred.range is not None}
+ nodes |= {vert for vert in prange if isinstance(vert, types.Node)}
+ literals |= {vert for vert in prange if isinstance(vert, types.Literal)}
+ # include parents in nodes and literals sets
+ # NOTE: Must be done after predicate domain/range was handled
+ # so that their parents are included as well.
+ nodes |= {par for node in nodes for par in node.parents()}
+ literals |= {par for lit in literals for par in lit.parents()}
+ # assign members
+ self._nodes = {node.uri: node for node in nodes}
+ self._literals = {lit.uri: lit for lit in literals}
+ self._predicates = {pred.uri: pred for pred in predicates}
+ # verify unique uris
+ if len(nodes) != len(self._nodes):
+ raise errors.ConsistencyError('inconsistent nodes')
+ if len(literals) != len(self._literals):
+ raise errors.ConsistencyError('inconsistent literals')
+ if len(predicates) != len(self._predicates):
+ raise errors.ConsistencyError('inconsistent predicates')
+ # verify globally unique uris
+ n_uris = len(set(self._nodes) | set(self._literals) | set(self._predicates))
+ if n_uris != len(self._nodes) + len(self._literals) + len(self._predicates):
+ raise errors.ConsistencyError('URI dual use')
+
+
+ ## essentials ##
+
+ def __str__(self) -> str:
+ return f'{typename(self)}()'
+
+ def __repr__(self) -> str:
+ return f'{typename(self)}({sorted(self._nodes)}, {sorted(self._literals)}, {sorted(self._predicates)})'
+
+ def __hash__(self) -> int:
+ return hash((
+ type(self),
+ tuple(sorted(self._nodes.values())),
+ tuple(sorted(self._literals.values())),
+ tuple(sorted(self._predicates.values())),
+ ))
+
+ def __eq__(self, other: typing.Any) -> bool:
+ return isinstance(other, type(self)) \
+ and self._nodes == other._nodes \
+ and self._literals == other._literals \
+ and self._predicates == other._predicates
+
+
+ ## operators ##
+
+ SchemaDiff = namedtuple('SchemaDiff', ['nodes', 'literals', 'predicates'])
+
+ def diff(self, other: 'Schema') -> SchemaDiff:
+ """Return node, literals, and predicates that are in *self* but not in *other*."""
+ return self.SchemaDiff(
+ nodes=set(self.nodes()) - set(other.nodes()),
+ literals=set(self.literals()) - set(other.literals()),
+ predicates=set(self.predicates()) - set(other.predicates()),
+ )
+
+ def __sub__(self, other: typing.Any) -> SchemaDiff:
+ """Alias for `Schema.diff`."""
+ if not isinstance(other, Schema):
+ return NotImplemented
+ return self.diff(other)
+
+ def consistent_with(self, other: 'Schema') -> bool:
+ """Checks if two schemas have different definitions for the same uri.
+ Tests nodes, literals, and predicates.
+ """
+ # check arg
+ if not isinstance(other, Schema):
+ raise TypeError(other)
+ # node consistency
+ nodes = set(self.nodes()) | set(other.nodes())
+ nuris = {node.uri for node in nodes}
+ if len(nodes) != len(nuris):
+ return False
+ # literal consistency
+ literals = set(self.literals()) | set(other.literals())
+ luris = {lit.uri for lit in literals}
+ if len(literals) != len(luris):
+ return False
+ # predicate consistency
+ predicates = set(self.predicates()) | set(other.predicates())
+ puris = {pred.uri for pred in predicates}
+ if len(predicates) != len(puris):
+ return False
+ # global consistency
+ if len(puris | luris | nuris) != len(nodes) + len(literals) + len(predicates):
+ return False
+ # all checks passed
+ return True
+
+ @classmethod
+ def Union(cls, *args: typing.Union['Schema', typing.Iterable['Schema']]) -> 'Schema':
+ """Combine multiple Schema instances into a single one.
+ As argument, you can either pass multiple Schema instances, or a single
+ iterable over Schema instances. Any abc.Iterable will be accepted.
+
+ Example:
+
+ >>> a, b, c = Schema.Empty(), Schema.Empty(), Schema.Empty()
+ >>> # multiple Schema instances
+ >>> Schema.Union(a, b, c)
+ >>> # A single iterable over Schema instances
+ >>> Schema.Union([a, b, c])
+
+ """
+ if len(args) == 0:
+ raise TypeError('Schema.Union requires at least one argument (Schema or Iterable)')
+ if isinstance(args[0], cls): # args is sequence of Schema instances
+ pass
+ elif len(args) == 1 and isinstance(args[0], abc.Iterable): # args is a single iterable
+ args = args[0]
+ else:
+ raise TypeError(f'expected multiple Schema instances or a single Iterable, found {args}')
+
+ nodes, literals, predicates = set(), set(), set()
+ for schema in args:
+ # check argument
+ if not isinstance(schema, cls):
+ raise TypeError(schema)
+ # merge with previous schemas
+ nodes |= set(schema.nodes())
+ literals |= set(schema.literals())
+ predicates |= set(schema.predicates())
+ # return new Schema instance
+ return cls(predicates, nodes, literals)
+
+ def union(self, other: 'Schema') -> 'Schema':
+ """Merge *other* and *self* into a new Schema. *self* takes precedence."""
+ # check type
+ if not isinstance(other, type(self)):
+ raise TypeError(other)
+ # return combined schemas
+ return self.Union(self, other)
+
+ def __add__(self, other: typing.Any) -> 'Schema':
+ """Alias for Schema.union."""
+ try: # return merged schemas
+ return self.union(other)
+ except TypeError:
+ return NotImplemented
+
+ def __or__(self, other: typing.Any) -> 'Schema':
+ """Alias for Schema.union."""
+ return self.__add__(other)
+
+
+ ## getters ##
+ # FIXME: which of the getters below are actually needed?
+ # FIXME: interchangeability of URI and _Type?!
+
+ def has_node(self, node: URI) -> bool:
+ return node in self._nodes
+
+ def has_literal(self, lit: URI) -> bool:
+ return lit in self._literals
+
+ def has_predicate(self, pred: URI) -> bool:
+ return pred in self._predicates
+
+ def nodes(self) -> typing.Iterator[types.Node]: # FIXME: type annotation
+ return self._nodes.values()
+
+ def literals(self) -> typing.Iterator[types.Literal]: # FIXME: type annotation
+ return self._literals.values()
+
+ def predicates(self) -> typing.Iterator[types.Predicate]: # FIXME: type annotation
+ return self._predicates.values()
+
+ def node(self, uri: URI) -> types.Node:
+ """Return the Node matching the *uri*."""
+ return self._nodes[uri]
+
+ def predicate(self, uri: URI) -> types.Predicate:
+ """Return the Predicate matching the *uri*."""
+ return self._predicates[uri]
+
+ def literal(self, uri: URI) -> types.Literal:
+ """Return the Literal matching the *uri*."""
+ return self._literals[uri]
+
+
+ ## constructors ##
+
+
+ @classmethod
+ def Empty(cls) -> 'Schema':
+ node = types.Node(ns.bsfs.Node, None)
+ literal = types.Literal(ns.bsfs.Literal, None)
+ predicate = types.Predicate(
+ uri=ns.bsfs.Predicate,
+ parent=None,
+ domain=node,
+ range=None,
+ unique=False,
+ )
+ return cls((predicate, ), (node, ), (literal, ))
+
+
+ @classmethod
+ def from_string(cls, schema: str) -> 'Schema':
+ """Load and return a Schema from a string."""
+ # parse string into rdf graph
+ graph = rdflib.Graph()
+ graph.parse(data=schema, format='turtle')
+
+ def _fetch_hierarchically(factory, curr):
+ # emit current node
+ yield curr
+ # walk through childs
+ for child in graph.subjects(rdflib.URIRef(ns.rdfs.subClassOf), rdflib.URIRef(curr.uri)):
+ # convert to URI
+ child = URI(child)
+ # check circular dependency
+ if child == curr.uri or child in {node.uri for node in curr.parents()}:
+ raise errors.ConsistencyError('circular dependency')
+ # recurse and emit (sub*)childs
+ yield from _fetch_hierarchically(factory, factory(child, curr))
+
+ # fetch nodes
+ nodes = set(_fetch_hierarchically(types.Node, types.Node(ns.bsfs.Node, None)))
+ nodes_lut = {node.uri: node for node in nodes}
+ if len(nodes_lut) != len(nodes):
+ raise errors.ConsistencyError('inconsistent nodes')
+
+ # fetch literals
+ literals = set(_fetch_hierarchically(types.Literal, types.Literal(ns.bsfs.Literal, None)))
+ literals_lut = {lit.uri: lit for lit in literals}
+ if len(literals_lut) != len(literals):
+ raise errors.ConsistencyError('inconsistent literals')
+
+ # fetch predicates
+ def build_predicate(uri, parent):
+ uri = rdflib.URIRef(uri)
+ # get domain
+ domains = set(graph.objects(uri, rdflib.RDFS.domain))
+ if len(domains) != 1:
+ raise errors.ConsistencyError(f'inconsistent domain: {domains}')
+ dom = nodes_lut.get(next(iter(domains)))
+ if dom is None:
+ raise errors.ConsistencyError('missing domain')
+ # get range
+ ranges = set(graph.objects(uri, rdflib.RDFS.range))
+ if len(ranges) != 1:
+ raise errors.ConsistencyError(f'inconsistent range: {ranges}')
+ rng = next(iter(ranges))
+ rng = nodes_lut.get(rng, literals_lut.get(rng))
+ if rng is None:
+ raise errors.ConsistencyError('missing range')
+ # get unique flag
+ uniques = set(graph.objects(uri, rdflib.URIRef(ns.bsfs.unique)))
+ if len(uniques) != 1:
+ raise errors.ConsistencyError(f'inconsistent unique flags: {uniques}')
+ unique = bool(next(iter(uniques)))
+ # build Predicate
+ return types.Predicate(URI(uri), parent, dom, rng, unique)
+
+ root_predicate = types.Predicate(
+ uri=ns.bsfs.Predicate,
+ parent=None,
+ domain=nodes_lut[ns.bsfs.Node],
+ range=None, # FIXME: Unclear how to handle this! Can be either a Literal or a Node
+ unique=False,
+ )
+ predicates = _fetch_hierarchically(build_predicate, root_predicate)
+ # return Schema
+ return cls(predicates, nodes, literals)
+
+## EOF ##
diff --git a/bsfs/schema/types.py b/bsfs/schema/types.py
new file mode 100644
index 0000000..6e257e3
--- /dev/null
+++ b/bsfs/schema/types.py
@@ -0,0 +1,269 @@
+"""
+
+Part of the BlackStar filesystem (bsfs) module.
+A copy of the license is provided with the project.
+Author: Matthias Baumgartner, 2022
+"""
+# imports
+import typing
+
+# bsfs imports
+from bsfs.utils import errors, URI, typename
+
+# exports
+__all__: typing.Sequence[str] = (
+ 'Literal',
+ 'Node',
+ 'Predicate',
+ )
+
+
+## code ##
+
+class _Type():
+ """A class is defined via its uri.
+
+ Classes define a partial order.
+ The order operators indicate whether some class is a
+ superclass (greater-than) or a subclass (less-than) of another.
+ Comparisons are only supported within the same type.
+
+ For example, consider the class hierarchy below:
+
+ Vehicle
+ Two-wheel
+ Bike
+ Bicycle
+
+ >>> vehicle = _Type('Vehicle')
+ >>> twowheel = _Type('Two-wheel', vehicle)
+ >>> bike = _Type('Bike', twowheel)
+ >>> bicycle = _Type('Bicycle', twowheel)
+
+ Two-wheel is equivalent to itself
+ >>> twowheel == vehicle
+ False
+ >>> twowheel == twowheel
+ True
+ >>> twowheel == bicycle
+ False
+
+ Two-wheel is a true subclass of Vehicle
+ >>> twowheel < vehicle
+ True
+ >>> twowheel < twowheel
+ False
+ >>> twowheel < bicycle
+ False
+
+ Two-wheel is a subclass of itself and Vehicle
+ >>> twowheel <= vehicle
+ True
+ >>> twowheel <= twowheel
+ True
+ >>> twowheel <= bicycle
+ False
+
+ Two-wheel is a true superclass of Bicycle
+ >>> twowheel > vehicle
+ False
+ >>> twowheel > twowheel
+ False
+ >>> twowheel > bicycle
+ True
+
+ Two-wheel is a superclass of itself and Bicycle
+ >>> twowheel >= vehicle
+ False
+ >>> twowheel >= twowheel
+ True
+ >>> twowheel >= bicycle
+ True
+
+ Analoguous to sets, this is not a total order:
+ >>> bike < bicycle
+ False
+ >>> bike > bicycle
+ False
+ >>> bike == bicycle
+ False
+ """
+
+ # class uri.
+ uri: URI
+
+ # parent's class uris.
+ parent: typing.Optional['_Type']
+
+ def __init__(
+ self,
+ uri: URI,
+ parent: typing.Optional['_Type'] = None,
+ ):
+ self.uri = uri
+ self.parent = parent
+
+ def parents(self) -> typing.Generator['_Type', None, None]:
+ """Generate a list of parent nodes."""
+ curr = self.parent
+ while curr is not None:
+ yield curr
+ curr = curr.parent
+
+ def get_child(self, uri: URI, **kwargs):
+ """Return a child of the current class."""
+ return type(self)(uri, self, **kwargs)
+
+ def __str__(self) -> str:
+ return f'{typename(self)}({self.uri})'
+
+ def __repr__(self) -> str:
+ return f'{typename(self)}({self.uri}, {repr(self.parent)})'
+
+ def __hash__(self) -> int:
+ return hash((type(self), self.uri, self.parent))
+
+ def __eq__(self, other: typing.Any) -> bool:
+ """Return True iff *self* is equivalent to *other*."""
+ return type(self) == type(other) \
+ and self.uri == other.uri \
+ and self.parent == other.parent
+
+ def __lt__(self, other: typing.Any) -> bool:
+ """Return True iff *self* is a true subclass of *other*."""
+ if not type(self) == type(other): # type mismatch
+ return NotImplemented
+ elif self.uri == other.uri: # equivalence
+ return False
+ elif self in other.parents(): # superclass
+ return False
+ elif other in self.parents(): # subclass
+ return True
+ else: # not related
+ return False
+
+ def __le__(self, other: typing.Any) -> bool:
+ """Return True iff *self* is equivalent or a subclass of *other*."""
+ if not type(self) == type(other): # type mismatch
+ return NotImplemented
+ elif self.uri == other.uri: # equivalence
+ return True
+ elif self in other.parents(): # superclass
+ return False
+ elif other in self.parents(): # subclass
+ return True
+ else: # not related
+ return False
+
+ def __gt__(self, other: typing.Any) -> bool:
+ """Return True iff *self* is a true superclass of *other*."""
+ if not type(self) == type(other): # type mismatch
+ return NotImplemented
+ elif self.uri == other.uri: # equivalence
+ return False
+ elif self in other.parents(): # superclass
+ return True
+ elif other in self.parents(): # subclass
+ return False
+ else: # not related
+ return False
+
+ def __ge__(self, other: typing.Any) -> bool:
+ """Return True iff *self* is eqiuvalent or a superclass of *other*."""
+ if not type(self) == type(other): # type mismatch
+ return NotImplemented
+ elif self.uri == other.uri: # equivalence
+ return True
+ elif self in other.parents(): # superclass
+ return True
+ elif other in self.parents(): # subclass
+ return False
+ else: # not related
+ return False
+
+
+class _Vertex(_Type):
+ """Graph vertex types. Can be a Node or a Literal."""
+ def __init__(self, uri: URI, parent: typing.Optional['_Vertex']):
+ super().__init__(uri, parent)
+
+
+class Node(_Vertex):
+ """Node type."""
+ def __init__(self, uri: URI, parent: typing.Optional['Node']):
+ super().__init__(uri, parent)
+
+
+class Literal(_Vertex):
+ """Literal type."""
+ def __init__(self, uri: URI, parent: typing.Optional['Literal']):
+ super().__init__(uri, parent)
+
+
+class Predicate(_Type):
+ """Predicate type."""
+
+ # source type.
+ domain: Node
+
+ # destination type.
+ range: typing.Optional[typing.Union[Node, Literal]]
+
+ # maximum cardinality of type.
+ unique: bool
+
+ def __init__(
+ self,
+ # Type members
+ uri: URI,
+ parent: 'Predicate',
+ # Predicate members
+ domain: Node,
+ range: typing.Optional[typing.Union[Node, Literal]],
+ unique: bool,
+ ):
+ # check arguments
+ if not isinstance(domain, Node):
+ raise TypeError(domain)
+ if range is not None and not isinstance(range, Node) and not isinstance(range, Literal):
+ raise TypeError(range)
+ # initialize
+ super().__init__(uri, parent)
+ self.domain = domain
+ self.range = range
+ self.unique = unique
+
+ def __hash__(self) -> int:
+ return hash((super().__hash__(), self.domain, self.range, self.unique))
+
+ def __eq__(self, other: typing.Any) -> bool:
+ return super().__eq__(other) \
+ and self.domain == other.domain \
+ and self.range == other.range \
+ and self.unique == other.unique
+
+ def get_child(
+ self,
+ uri: URI,
+ domain: typing.Optional[Node] = None,
+ range: typing.Optional[_Vertex] = None,
+ unique: typing.Optional[bool] = None,
+ **kwargs,
+ ):
+ """Return a child of the current class."""
+ if domain is None:
+ domain = self.domain
+ if not domain <= self.domain:
+ raise errors.ConsistencyError(f'{domain} must be a subclass of {self.domain}')
+ if range is None:
+ range = self.range
+ if range is None: # inherited range from ns.bsfs.Predicate
+ raise ValueError('range must be defined by the parent or argument')
+ if self.range is not None and not range <= self.range:
+ raise errors.ConsistencyError(f'{range} must be a subclass of {self.range}')
+ if unique is None:
+ unique = self.unique
+ return super().get_child(uri, domain=domain, range=range, unique=unique, **kwargs)
+
+
+## EOF ##