diff options
author | Matthias Baumgartner <dev@igsor.net> | 2022-12-08 16:34:13 +0100 |
---|---|---|
committer | Matthias Baumgartner <dev@igsor.net> | 2022-12-08 16:34:13 +0100 |
commit | 7eb61d117a995b076d36c55d2c7c268665360813 (patch) | |
tree | c47a0fe523349dd3a14c1501281d42347c3b8954 /bsfs | |
parent | 729f025f392d45b621941da9d052834e0d81506e (diff) | |
download | bsfs-7eb61d117a995b076d36c55d2c7c268665360813.tar.gz bsfs-7eb61d117a995b076d36c55d2c7c268665360813.tar.bz2 bsfs-7eb61d117a995b076d36c55d2c7c268665360813.zip |
schema
Diffstat (limited to 'bsfs')
-rw-r--r-- | bsfs/schema/__init__.py | 24 | ||||
-rw-r--r-- | bsfs/schema/schema.py | 325 | ||||
-rw-r--r-- | bsfs/schema/types.py | 269 |
3 files changed, 618 insertions, 0 deletions
diff --git a/bsfs/schema/__init__.py b/bsfs/schema/__init__.py new file mode 100644 index 0000000..ce381ec --- /dev/null +++ b/bsfs/schema/__init__.py @@ -0,0 +1,24 @@ +""" + +Part of the BlackStar filesystem (bsfs) module. +A copy of the license is provided with the project. +Author: Matthias Baumgartner, 2022 +""" +# imports +import typing + +# inner-module imports +#from . import types +from .schema import Schema +from .types import Literal, Node, Predicate + +# exports +__all__: typing.Sequence[str] = ( + 'Literal', + 'Node', + 'Predicate', + 'Schema', + #'types', + ) + +## EOF ## diff --git a/bsfs/schema/schema.py b/bsfs/schema/schema.py new file mode 100644 index 0000000..0e053c0 --- /dev/null +++ b/bsfs/schema/schema.py @@ -0,0 +1,325 @@ +""" + +Part of the BlackStar filesystem (bsfs) module. +A copy of the license is provided with the project. +Author: Matthias Baumgartner, 2022 +""" +# imports +from collections import abc, namedtuple +import typing +import rdflib + +# bsfs imports +from bsfs.namespace import ns +from bsfs.utils import errors, URI, typename + +# inner-module imports +from . import types + +# exports +__all__: typing.Sequence[str] = ( + 'Schema', + ) + + +## code ## + +class Schema(): + """ + """ + + _nodes: typing.Dict[URI, types.Node] + _literals: typing.Dict[URI, types.Literal] + _predicates: typing.Dict[URI, types.Predicate] + + def __init__( + self, + predicates: typing.Iterable[types.Predicate], + nodes: typing.Optional[typing.Iterable[types.Node]] = None, + literals: typing.Optional[typing.Iterable[types.Literal]] = None, + ): + # materialize arguments + if nodes is None: + nodes = set() + if literals is None: + literals = set() + nodes = set(nodes) + literals = set(literals) + predicates = set(predicates) + # include parents in predicates set + predicates |= {par for pred in predicates for par in pred.parents()} + # include predicate domain in nodes set + nodes |= {pred.domain for pred in predicates} + # include predicate range in nodes and literals sets + prange = {pred.range for pred in predicates if pred.range is not None} + nodes |= {vert for vert in prange if isinstance(vert, types.Node)} + literals |= {vert for vert in prange if isinstance(vert, types.Literal)} + # include parents in nodes and literals sets + # NOTE: Must be done after predicate domain/range was handled + # so that their parents are included as well. + nodes |= {par for node in nodes for par in node.parents()} + literals |= {par for lit in literals for par in lit.parents()} + # assign members + self._nodes = {node.uri: node for node in nodes} + self._literals = {lit.uri: lit for lit in literals} + self._predicates = {pred.uri: pred for pred in predicates} + # verify unique uris + if len(nodes) != len(self._nodes): + raise errors.ConsistencyError('inconsistent nodes') + if len(literals) != len(self._literals): + raise errors.ConsistencyError('inconsistent literals') + if len(predicates) != len(self._predicates): + raise errors.ConsistencyError('inconsistent predicates') + # verify globally unique uris + n_uris = len(set(self._nodes) | set(self._literals) | set(self._predicates)) + if n_uris != len(self._nodes) + len(self._literals) + len(self._predicates): + raise errors.ConsistencyError('URI dual use') + + + ## essentials ## + + def __str__(self) -> str: + return f'{typename(self)}()' + + def __repr__(self) -> str: + return f'{typename(self)}({sorted(self._nodes)}, {sorted(self._literals)}, {sorted(self._predicates)})' + + def __hash__(self) -> int: + return hash(( + type(self), + tuple(sorted(self._nodes.values())), + tuple(sorted(self._literals.values())), + tuple(sorted(self._predicates.values())), + )) + + def __eq__(self, other: typing.Any) -> bool: + return isinstance(other, type(self)) \ + and self._nodes == other._nodes \ + and self._literals == other._literals \ + and self._predicates == other._predicates + + + ## operators ## + + SchemaDiff = namedtuple('SchemaDiff', ['nodes', 'literals', 'predicates']) + + def diff(self, other: 'Schema') -> SchemaDiff: + """Return node, literals, and predicates that are in *self* but not in *other*.""" + return self.SchemaDiff( + nodes=set(self.nodes()) - set(other.nodes()), + literals=set(self.literals()) - set(other.literals()), + predicates=set(self.predicates()) - set(other.predicates()), + ) + + def __sub__(self, other: typing.Any) -> SchemaDiff: + """Alias for `Schema.diff`.""" + if not isinstance(other, Schema): + return NotImplemented + return self.diff(other) + + def consistent_with(self, other: 'Schema') -> bool: + """Checks if two schemas have different definitions for the same uri. + Tests nodes, literals, and predicates. + """ + # check arg + if not isinstance(other, Schema): + raise TypeError(other) + # node consistency + nodes = set(self.nodes()) | set(other.nodes()) + nuris = {node.uri for node in nodes} + if len(nodes) != len(nuris): + return False + # literal consistency + literals = set(self.literals()) | set(other.literals()) + luris = {lit.uri for lit in literals} + if len(literals) != len(luris): + return False + # predicate consistency + predicates = set(self.predicates()) | set(other.predicates()) + puris = {pred.uri for pred in predicates} + if len(predicates) != len(puris): + return False + # global consistency + if len(puris | luris | nuris) != len(nodes) + len(literals) + len(predicates): + return False + # all checks passed + return True + + @classmethod + def Union(cls, *args: typing.Union['Schema', typing.Iterable['Schema']]) -> 'Schema': + """Combine multiple Schema instances into a single one. + As argument, you can either pass multiple Schema instances, or a single + iterable over Schema instances. Any abc.Iterable will be accepted. + + Example: + + >>> a, b, c = Schema.Empty(), Schema.Empty(), Schema.Empty() + >>> # multiple Schema instances + >>> Schema.Union(a, b, c) + >>> # A single iterable over Schema instances + >>> Schema.Union([a, b, c]) + + """ + if len(args) == 0: + raise TypeError('Schema.Union requires at least one argument (Schema or Iterable)') + if isinstance(args[0], cls): # args is sequence of Schema instances + pass + elif len(args) == 1 and isinstance(args[0], abc.Iterable): # args is a single iterable + args = args[0] + else: + raise TypeError(f'expected multiple Schema instances or a single Iterable, found {args}') + + nodes, literals, predicates = set(), set(), set() + for schema in args: + # check argument + if not isinstance(schema, cls): + raise TypeError(schema) + # merge with previous schemas + nodes |= set(schema.nodes()) + literals |= set(schema.literals()) + predicates |= set(schema.predicates()) + # return new Schema instance + return cls(predicates, nodes, literals) + + def union(self, other: 'Schema') -> 'Schema': + """Merge *other* and *self* into a new Schema. *self* takes precedence.""" + # check type + if not isinstance(other, type(self)): + raise TypeError(other) + # return combined schemas + return self.Union(self, other) + + def __add__(self, other: typing.Any) -> 'Schema': + """Alias for Schema.union.""" + try: # return merged schemas + return self.union(other) + except TypeError: + return NotImplemented + + def __or__(self, other: typing.Any) -> 'Schema': + """Alias for Schema.union.""" + return self.__add__(other) + + + ## getters ## + # FIXME: which of the getters below are actually needed? + # FIXME: interchangeability of URI and _Type?! + + def has_node(self, node: URI) -> bool: + return node in self._nodes + + def has_literal(self, lit: URI) -> bool: + return lit in self._literals + + def has_predicate(self, pred: URI) -> bool: + return pred in self._predicates + + def nodes(self) -> typing.Iterator[types.Node]: # FIXME: type annotation + return self._nodes.values() + + def literals(self) -> typing.Iterator[types.Literal]: # FIXME: type annotation + return self._literals.values() + + def predicates(self) -> typing.Iterator[types.Predicate]: # FIXME: type annotation + return self._predicates.values() + + def node(self, uri: URI) -> types.Node: + """Return the Node matching the *uri*.""" + return self._nodes[uri] + + def predicate(self, uri: URI) -> types.Predicate: + """Return the Predicate matching the *uri*.""" + return self._predicates[uri] + + def literal(self, uri: URI) -> types.Literal: + """Return the Literal matching the *uri*.""" + return self._literals[uri] + + + ## constructors ## + + + @classmethod + def Empty(cls) -> 'Schema': + node = types.Node(ns.bsfs.Node, None) + literal = types.Literal(ns.bsfs.Literal, None) + predicate = types.Predicate( + uri=ns.bsfs.Predicate, + parent=None, + domain=node, + range=None, + unique=False, + ) + return cls((predicate, ), (node, ), (literal, )) + + + @classmethod + def from_string(cls, schema: str) -> 'Schema': + """Load and return a Schema from a string.""" + # parse string into rdf graph + graph = rdflib.Graph() + graph.parse(data=schema, format='turtle') + + def _fetch_hierarchically(factory, curr): + # emit current node + yield curr + # walk through childs + for child in graph.subjects(rdflib.URIRef(ns.rdfs.subClassOf), rdflib.URIRef(curr.uri)): + # convert to URI + child = URI(child) + # check circular dependency + if child == curr.uri or child in {node.uri for node in curr.parents()}: + raise errors.ConsistencyError('circular dependency') + # recurse and emit (sub*)childs + yield from _fetch_hierarchically(factory, factory(child, curr)) + + # fetch nodes + nodes = set(_fetch_hierarchically(types.Node, types.Node(ns.bsfs.Node, None))) + nodes_lut = {node.uri: node for node in nodes} + if len(nodes_lut) != len(nodes): + raise errors.ConsistencyError('inconsistent nodes') + + # fetch literals + literals = set(_fetch_hierarchically(types.Literal, types.Literal(ns.bsfs.Literal, None))) + literals_lut = {lit.uri: lit for lit in literals} + if len(literals_lut) != len(literals): + raise errors.ConsistencyError('inconsistent literals') + + # fetch predicates + def build_predicate(uri, parent): + uri = rdflib.URIRef(uri) + # get domain + domains = set(graph.objects(uri, rdflib.RDFS.domain)) + if len(domains) != 1: + raise errors.ConsistencyError(f'inconsistent domain: {domains}') + dom = nodes_lut.get(next(iter(domains))) + if dom is None: + raise errors.ConsistencyError('missing domain') + # get range + ranges = set(graph.objects(uri, rdflib.RDFS.range)) + if len(ranges) != 1: + raise errors.ConsistencyError(f'inconsistent range: {ranges}') + rng = next(iter(ranges)) + rng = nodes_lut.get(rng, literals_lut.get(rng)) + if rng is None: + raise errors.ConsistencyError('missing range') + # get unique flag + uniques = set(graph.objects(uri, rdflib.URIRef(ns.bsfs.unique))) + if len(uniques) != 1: + raise errors.ConsistencyError(f'inconsistent unique flags: {uniques}') + unique = bool(next(iter(uniques))) + # build Predicate + return types.Predicate(URI(uri), parent, dom, rng, unique) + + root_predicate = types.Predicate( + uri=ns.bsfs.Predicate, + parent=None, + domain=nodes_lut[ns.bsfs.Node], + range=None, # FIXME: Unclear how to handle this! Can be either a Literal or a Node + unique=False, + ) + predicates = _fetch_hierarchically(build_predicate, root_predicate) + # return Schema + return cls(predicates, nodes, literals) + +## EOF ## diff --git a/bsfs/schema/types.py b/bsfs/schema/types.py new file mode 100644 index 0000000..6e257e3 --- /dev/null +++ b/bsfs/schema/types.py @@ -0,0 +1,269 @@ +""" + +Part of the BlackStar filesystem (bsfs) module. +A copy of the license is provided with the project. +Author: Matthias Baumgartner, 2022 +""" +# imports +import typing + +# bsfs imports +from bsfs.utils import errors, URI, typename + +# exports +__all__: typing.Sequence[str] = ( + 'Literal', + 'Node', + 'Predicate', + ) + + +## code ## + +class _Type(): + """A class is defined via its uri. + + Classes define a partial order. + The order operators indicate whether some class is a + superclass (greater-than) or a subclass (less-than) of another. + Comparisons are only supported within the same type. + + For example, consider the class hierarchy below: + + Vehicle + Two-wheel + Bike + Bicycle + + >>> vehicle = _Type('Vehicle') + >>> twowheel = _Type('Two-wheel', vehicle) + >>> bike = _Type('Bike', twowheel) + >>> bicycle = _Type('Bicycle', twowheel) + + Two-wheel is equivalent to itself + >>> twowheel == vehicle + False + >>> twowheel == twowheel + True + >>> twowheel == bicycle + False + + Two-wheel is a true subclass of Vehicle + >>> twowheel < vehicle + True + >>> twowheel < twowheel + False + >>> twowheel < bicycle + False + + Two-wheel is a subclass of itself and Vehicle + >>> twowheel <= vehicle + True + >>> twowheel <= twowheel + True + >>> twowheel <= bicycle + False + + Two-wheel is a true superclass of Bicycle + >>> twowheel > vehicle + False + >>> twowheel > twowheel + False + >>> twowheel > bicycle + True + + Two-wheel is a superclass of itself and Bicycle + >>> twowheel >= vehicle + False + >>> twowheel >= twowheel + True + >>> twowheel >= bicycle + True + + Analoguous to sets, this is not a total order: + >>> bike < bicycle + False + >>> bike > bicycle + False + >>> bike == bicycle + False + """ + + # class uri. + uri: URI + + # parent's class uris. + parent: typing.Optional['_Type'] + + def __init__( + self, + uri: URI, + parent: typing.Optional['_Type'] = None, + ): + self.uri = uri + self.parent = parent + + def parents(self) -> typing.Generator['_Type', None, None]: + """Generate a list of parent nodes.""" + curr = self.parent + while curr is not None: + yield curr + curr = curr.parent + + def get_child(self, uri: URI, **kwargs): + """Return a child of the current class.""" + return type(self)(uri, self, **kwargs) + + def __str__(self) -> str: + return f'{typename(self)}({self.uri})' + + def __repr__(self) -> str: + return f'{typename(self)}({self.uri}, {repr(self.parent)})' + + def __hash__(self) -> int: + return hash((type(self), self.uri, self.parent)) + + def __eq__(self, other: typing.Any) -> bool: + """Return True iff *self* is equivalent to *other*.""" + return type(self) == type(other) \ + and self.uri == other.uri \ + and self.parent == other.parent + + def __lt__(self, other: typing.Any) -> bool: + """Return True iff *self* is a true subclass of *other*.""" + if not type(self) == type(other): # type mismatch + return NotImplemented + elif self.uri == other.uri: # equivalence + return False + elif self in other.parents(): # superclass + return False + elif other in self.parents(): # subclass + return True + else: # not related + return False + + def __le__(self, other: typing.Any) -> bool: + """Return True iff *self* is equivalent or a subclass of *other*.""" + if not type(self) == type(other): # type mismatch + return NotImplemented + elif self.uri == other.uri: # equivalence + return True + elif self in other.parents(): # superclass + return False + elif other in self.parents(): # subclass + return True + else: # not related + return False + + def __gt__(self, other: typing.Any) -> bool: + """Return True iff *self* is a true superclass of *other*.""" + if not type(self) == type(other): # type mismatch + return NotImplemented + elif self.uri == other.uri: # equivalence + return False + elif self in other.parents(): # superclass + return True + elif other in self.parents(): # subclass + return False + else: # not related + return False + + def __ge__(self, other: typing.Any) -> bool: + """Return True iff *self* is eqiuvalent or a superclass of *other*.""" + if not type(self) == type(other): # type mismatch + return NotImplemented + elif self.uri == other.uri: # equivalence + return True + elif self in other.parents(): # superclass + return True + elif other in self.parents(): # subclass + return False + else: # not related + return False + + +class _Vertex(_Type): + """Graph vertex types. Can be a Node or a Literal.""" + def __init__(self, uri: URI, parent: typing.Optional['_Vertex']): + super().__init__(uri, parent) + + +class Node(_Vertex): + """Node type.""" + def __init__(self, uri: URI, parent: typing.Optional['Node']): + super().__init__(uri, parent) + + +class Literal(_Vertex): + """Literal type.""" + def __init__(self, uri: URI, parent: typing.Optional['Literal']): + super().__init__(uri, parent) + + +class Predicate(_Type): + """Predicate type.""" + + # source type. + domain: Node + + # destination type. + range: typing.Optional[typing.Union[Node, Literal]] + + # maximum cardinality of type. + unique: bool + + def __init__( + self, + # Type members + uri: URI, + parent: 'Predicate', + # Predicate members + domain: Node, + range: typing.Optional[typing.Union[Node, Literal]], + unique: bool, + ): + # check arguments + if not isinstance(domain, Node): + raise TypeError(domain) + if range is not None and not isinstance(range, Node) and not isinstance(range, Literal): + raise TypeError(range) + # initialize + super().__init__(uri, parent) + self.domain = domain + self.range = range + self.unique = unique + + def __hash__(self) -> int: + return hash((super().__hash__(), self.domain, self.range, self.unique)) + + def __eq__(self, other: typing.Any) -> bool: + return super().__eq__(other) \ + and self.domain == other.domain \ + and self.range == other.range \ + and self.unique == other.unique + + def get_child( + self, + uri: URI, + domain: typing.Optional[Node] = None, + range: typing.Optional[_Vertex] = None, + unique: typing.Optional[bool] = None, + **kwargs, + ): + """Return a child of the current class.""" + if domain is None: + domain = self.domain + if not domain <= self.domain: + raise errors.ConsistencyError(f'{domain} must be a subclass of {self.domain}') + if range is None: + range = self.range + if range is None: # inherited range from ns.bsfs.Predicate + raise ValueError('range must be defined by the parent or argument') + if self.range is not None and not range <= self.range: + raise errors.ConsistencyError(f'{range} must be a subclass of {self.range}') + if unique is None: + unique = self.unique + return super().get_child(uri, domain=domain, range=range, unique=unique, **kwargs) + + +## EOF ## |