From bbfcee4fffc553b5dd08f37a79dd6ccddbf340f8 Mon Sep 17 00:00:00 2001 From: Matthias Baumgartner Date: Thu, 8 Dec 2022 16:32:52 +0100 Subject: uri and some utils --- bsfs/__init__.py | 8 ++ bsfs/utils/__init__.py | 20 +++++ bsfs/utils/commons.py | 23 ++++++ bsfs/utils/uri.py | 196 +++++++++++++++++++++++++++++++++++++++++++++++++ 4 files changed, 247 insertions(+) create mode 100644 bsfs/__init__.py create mode 100644 bsfs/utils/__init__.py create mode 100644 bsfs/utils/commons.py create mode 100644 bsfs/utils/uri.py (limited to 'bsfs') diff --git a/bsfs/__init__.py b/bsfs/__init__.py new file mode 100644 index 0000000..f5f5cbc --- /dev/null +++ b/bsfs/__init__.py @@ -0,0 +1,8 @@ +""" + +Part of the BlackStar filesystem (bsfs) module. +A copy of the license is provided with the project. +Author: Matthias Baumgartner, 2022 +""" + +## EOF ## diff --git a/bsfs/utils/__init__.py b/bsfs/utils/__init__.py new file mode 100644 index 0000000..56a9323 --- /dev/null +++ b/bsfs/utils/__init__.py @@ -0,0 +1,20 @@ +""" + +Part of the BlackStar filesystem (bsfs) module. +A copy of the license is provided with the project. +Author: Matthias Baumgartner, 2022 +""" +# imports +import typing + +# inner-module imports +from .commons import typename +from .uri import URI + +# exports +__all__ : typing.Sequence[str] = ( + 'URI', + 'typename', + ) + +## EOF ## diff --git a/bsfs/utils/commons.py b/bsfs/utils/commons.py new file mode 100644 index 0000000..bad2fe0 --- /dev/null +++ b/bsfs/utils/commons.py @@ -0,0 +1,23 @@ +""" + +Part of the BlackStar filesystem (bsfs) module. +A copy of the license is provided with the project. +Author: Matthias Baumgartner, 2022 +""" +# imports +import typing + +# exports +__all__: typing.Sequence[str] = ( + 'typename', + ) + + +## code ## + +def typename(obj) -> str: + """Return the type name of *obj*.""" + return type(obj).__name__ + + +## EOF ## diff --git a/bsfs/utils/uri.py b/bsfs/utils/uri.py new file mode 100644 index 0000000..a56423a --- /dev/null +++ b/bsfs/utils/uri.py @@ -0,0 +1,196 @@ +""" + +Part of the BlackStar filesystem (bsfs) module. +A copy of the license is provided with the project. +Author: Matthias Baumgartner, 2022 +""" +# imports +import re +import typing + +# constants +RX_URI = re.compile(r''' + ^ + (?:(?P[^:/?#]+):)? # scheme, ://-delimited + (?://(?P[^/?#]*))? # authority (user@host:port), [/#?]-delimited + (?P[^?#]*) # path, [#?]-delimited + (?:\?(?P[^#]*))? # query, [#]-delimited + (?:\#(?P.*))? # fragment, remaining characters + $ + ''', re.VERBOSE + re.IGNORECASE) + +RX_HOST = re.compile(r''' + ^ + (?:(?P[^@]*)@)? # userinfo + (?P + (?:\[[^\]]+\]) | # IPv6 address + (?:[^:]+) # IPv4 address or regname + ) + (?::(?P\d*))? # port + $ + ''', re.VERBOSE + re.IGNORECASE) + +# exports +__all__: typing.Sequence[str] = ( + 'URI', + ) + + +## code ## + +def _get_optional( + regexp: re.Pattern, + query: str, + grp: str + ) -> str: + """Return the regular expression *regexp*'s group *grp* of *query* + or raise a `ValueError` if the *query* doesn't match the expression. + """ + parts = regexp.search(query) + if parts is not None: + if parts.group(grp) is not None: + return parts.group(grp) + raise ValueError(query) + + +class URI(str): + """URI additions to built-in strings. + + Provides properties to access the different components of an URI, + according to RFC 3986 (https://datatracker.ietf.org/doc/html/rfc3986). + + Note that this class does not actually validate an URI but only offers + access to components of a *well-formed* URI. Use `urllib.parse` for + more advanced purposes. + + """ + + def __new__(cls, value: str): + """Create a new URI instance. + Raises a `ValueError` if the (supposed) URI is malformatted. + """ + if not cls.is_parseable(value): + raise ValueError(value) + return str.__new__(cls, value) + + @staticmethod + def is_parseable(query: str) -> bool: + """Return True if the *query* can be decomposed into the URI components. + + Note that a valid URI is always parseable, however, an invalid URI + might be parseable as well. The return value of this method makes + no claim about the validity of an URI! + + """ + # check uri + parts = RX_URI.match(query) + if parts is not None: + # check authority + authority = parts.group('authority') + if authority is None or RX_HOST.match(authority) is not None: + return True + # some check not passed + return False + + @staticmethod + def compose( + path: str, + scheme: typing.Optional[str] = None, + authority: typing.Optional[str] = None, + user: typing.Optional[str] = None, + host: typing.Optional[str] = None, + port: typing.Optional[int] = None, + query: typing.Optional[str] = None, + fragment: typing.Optional[str] = None, + ): + """URI composition from components. + + If the *host* argument is supplied, the authority is composed of *user*, + *host*, and *port* arguments, and the *authority* argument is ignored. + Note that if the *host* is an IPv6 address, it must be enclosed in brackets. + """ + # strip whitespaces + path = path.strip() + + # compose authority + if host is not None: + authority = '' + if user is not None: + authority += user + '@' + authority += host + if port is not None: + authority += ':' + str(port) + + # ensure root on path + if path[0] != '/': + path = '/' + path + + # compose uri + uri = '' + if scheme is not None: + uri += scheme + ':' + if authority is not None: + uri += '//' + authority + uri += path + if query is not None: + uri += '?' + query + if fragment is not None: + uri += '#' + fragment + + # return as URI + return URI(uri) + + @property + def scheme(self) -> str: + """Return the protocol/scheme part of the URI.""" + return _get_optional(RX_URI, self, 'scheme') + + @property + def authority(self) -> str: + """Return the authority part of the URI, including userinfo and port.""" + return _get_optional(RX_URI, self, 'authority') + + @property + def userinfo(self) -> str: + """Return the userinfo part of the URI.""" + return _get_optional(RX_HOST, self.authority, 'userinfo') + + @property + def host(self) -> str: + """Return the host part of the URI.""" + return _get_optional(RX_HOST, self.authority, 'host') + + @property + def port(self) -> int: + """Return the port part of the URI.""" + return int(_get_optional(RX_HOST, self.authority, 'port')) + + @property + def path(self) -> str: + """Return the path part of the URI.""" + return _get_optional(RX_URI, self, 'path') + + @property + def query(self) -> str: + """Return the query part of the URI.""" + return _get_optional(RX_URI, self, 'query') + + @property + def fragment(self) -> str: + """Return the fragment part of the URI.""" + return _get_optional(RX_URI, self, 'fragment') + + def get(self, component: str, default: typing.Optional[typing.Any] = None) -> typing.Optional[typing.Any]: + """Return the component or a default value.""" + # check args + if component not in ('scheme', 'authority', 'userinfo', 'host', + 'port', 'path', 'query', 'fragment'): + raise ValueError(component) + try: + # return component's value + return getattr(self, component) + except ValueError: + # return the default value + return default + +## EOF ## -- cgit v1.2.3 From 729f025f392d45b621941da9d052834e0d81506e Mon Sep 17 00:00:00 2001 From: Matthias Baumgartner Date: Thu, 8 Dec 2022 16:33:36 +0100 Subject: namespaces --- bsfs/namespace/__init__.py | 21 ++++++++++++ bsfs/namespace/namespace.py | 80 ++++++++++++++++++++++++++++++++++++++++++++ bsfs/namespace/predefined.py | 39 +++++++++++++++++++++ 3 files changed, 140 insertions(+) create mode 100644 bsfs/namespace/__init__.py create mode 100644 bsfs/namespace/namespace.py create mode 100644 bsfs/namespace/predefined.py (limited to 'bsfs') diff --git a/bsfs/namespace/__init__.py b/bsfs/namespace/__init__.py new file mode 100644 index 0000000..98d472f --- /dev/null +++ b/bsfs/namespace/__init__.py @@ -0,0 +1,21 @@ +""" + +Part of the BlackStar filesystem (bsfs) module. +A copy of the license is provided with the project. +Author: Matthias Baumgartner, 2022 +""" +# imports +import typing + +# inner-module imports +from . import predefined as ns +from .namespace import ClosedNamespace, Namespace + +# exports +__all__: typing.Sequence[str] = ( + 'ClosedNamespace', + 'Namespace', + 'ns', + ) + +## EOF ## diff --git a/bsfs/namespace/namespace.py b/bsfs/namespace/namespace.py new file mode 100644 index 0000000..8080f5d --- /dev/null +++ b/bsfs/namespace/namespace.py @@ -0,0 +1,80 @@ +""" + +Part of the BlackStar filesystem (bsfs) module. +A copy of the license is provided with the project. +Author: Matthias Baumgartner, 2022 +""" +# imports +import typing + +# bsfs imports +from bsfs.utils import URI, typename + +# exports +__all__: typing.Sequence[str] = ( + 'ClosedNamespace', + 'Namespace', + ) + + +## code ## + +class Namespace(): + """A namespace consists of a common prefix that is used in a set of URIs. + + Note that the prefix must include the separator between + path and fragment (typically a '#' or a '/'). + """ + + # namespace prefix. + prefix: URI + + def __init__(self, prefix: URI): + self.prefix = URI(prefix) + + def __eq__(self, other: typing.Any) -> bool: + return isinstance(other, type(self)) and self.prefix == other.prefix + + def __hash__(self) -> int: + return hash((type(self), self.prefix)) + + def __str__(self) -> str: + return f'{typename(self)}({self.prefix})' + + def __repr__(self) -> str: + return f'{typename(self)}({self.prefix})' + + def __getattr__(self, fragment: str) -> URI: + """Return prefix + fragment.""" + return URI(self.prefix + fragment) + + def __getitem__(self, fragment: str) -> URI: + """Alias for getattr(self, fragment).""" + return self.__getattr__(fragment) + + +class ClosedNamespace(Namespace): + """Namespace that covers a restricted set of URIs.""" + + # set of permissible fragments. + fragments: typing.Set[str] + + def __init__(self, prefix: URI, *args: str): + super().__init__(prefix) + self.fragments = set(args) + + def __eq__(self, other: typing.Any) -> bool: + return super().__eq__(other) and self.fragments == other.fragments + + def __hash__(self) -> int: + return hash((type(self), self.prefix, tuple(sorted(self.fragments)))) + + def __getattr__(self, fragment: str) -> URI: + """Return prefix + fragment. + Raises a KeyError if the fragment is not allowed in this namespace. + """ + if fragment not in self.fragments: + raise KeyError('fragment') + return super().__getattr__(fragment) + +## EOF ## diff --git a/bsfs/namespace/predefined.py b/bsfs/namespace/predefined.py new file mode 100644 index 0000000..21ca560 --- /dev/null +++ b/bsfs/namespace/predefined.py @@ -0,0 +1,39 @@ +""" + +Part of the BlackStar filesystem (bsfs) module. +A copy of the license is provided with the project. +Author: Matthias Baumgartner, 2022 +""" +# imports +import typing + +# bsfs imports +from bsfs.utils import URI + +# inner-module imports +from . import namespace + +# essential bsfs namespaces +bsfs: namespace.Namespace = namespace.Namespace(URI('http://bsfs.ai/schema/')) + +# additional bsfs namespaces +bse: namespace.Namespace = namespace.Namespace(URI('http://bsfs.ai/schema/Entity#')) +bsm: namespace.Namespace = namespace.Namespace(URI('http://bsfs.ai/schema/Meta#')) + +# generic namespaces +rdf: namespace.Namespace = namespace.Namespace(URI('http://www.w3.org/1999/02/22-rdf-syntax-ns#')) +rdfs: namespace.Namespace = namespace.Namespace(URI('http://www.w3.org/2000/01/rdf-schema#')) +schema: namespace.Namespace = namespace.Namespace(URI('http://schema.org/')) +xsd: namespace.Namespace = namespace.Namespace(URI('http://www.w3.org/2001/XMLSchema#')) + +__all__: typing.Sequence[str] = ( + 'bse', + 'bsfs', + 'bsm', + 'rdf', + 'rdfs', + 'schema', + 'xsd', + ) + +## EOF ## -- cgit v1.2.3 From 7eb61d117a995b076d36c55d2c7c268665360813 Mon Sep 17 00:00:00 2001 From: Matthias Baumgartner Date: Thu, 8 Dec 2022 16:34:13 +0100 Subject: schema --- bsfs/schema/__init__.py | 24 ++++ bsfs/schema/schema.py | 325 ++++++++++++++++++++++++++++++++++++++++++++++++ bsfs/schema/types.py | 269 +++++++++++++++++++++++++++++++++++++++ 3 files changed, 618 insertions(+) create mode 100644 bsfs/schema/__init__.py create mode 100644 bsfs/schema/schema.py create mode 100644 bsfs/schema/types.py (limited to 'bsfs') diff --git a/bsfs/schema/__init__.py b/bsfs/schema/__init__.py new file mode 100644 index 0000000..ce381ec --- /dev/null +++ b/bsfs/schema/__init__.py @@ -0,0 +1,24 @@ +""" + +Part of the BlackStar filesystem (bsfs) module. +A copy of the license is provided with the project. +Author: Matthias Baumgartner, 2022 +""" +# imports +import typing + +# inner-module imports +#from . import types +from .schema import Schema +from .types import Literal, Node, Predicate + +# exports +__all__: typing.Sequence[str] = ( + 'Literal', + 'Node', + 'Predicate', + 'Schema', + #'types', + ) + +## EOF ## diff --git a/bsfs/schema/schema.py b/bsfs/schema/schema.py new file mode 100644 index 0000000..0e053c0 --- /dev/null +++ b/bsfs/schema/schema.py @@ -0,0 +1,325 @@ +""" + +Part of the BlackStar filesystem (bsfs) module. +A copy of the license is provided with the project. +Author: Matthias Baumgartner, 2022 +""" +# imports +from collections import abc, namedtuple +import typing +import rdflib + +# bsfs imports +from bsfs.namespace import ns +from bsfs.utils import errors, URI, typename + +# inner-module imports +from . import types + +# exports +__all__: typing.Sequence[str] = ( + 'Schema', + ) + + +## code ## + +class Schema(): + """ + """ + + _nodes: typing.Dict[URI, types.Node] + _literals: typing.Dict[URI, types.Literal] + _predicates: typing.Dict[URI, types.Predicate] + + def __init__( + self, + predicates: typing.Iterable[types.Predicate], + nodes: typing.Optional[typing.Iterable[types.Node]] = None, + literals: typing.Optional[typing.Iterable[types.Literal]] = None, + ): + # materialize arguments + if nodes is None: + nodes = set() + if literals is None: + literals = set() + nodes = set(nodes) + literals = set(literals) + predicates = set(predicates) + # include parents in predicates set + predicates |= {par for pred in predicates for par in pred.parents()} + # include predicate domain in nodes set + nodes |= {pred.domain for pred in predicates} + # include predicate range in nodes and literals sets + prange = {pred.range for pred in predicates if pred.range is not None} + nodes |= {vert for vert in prange if isinstance(vert, types.Node)} + literals |= {vert for vert in prange if isinstance(vert, types.Literal)} + # include parents in nodes and literals sets + # NOTE: Must be done after predicate domain/range was handled + # so that their parents are included as well. + nodes |= {par for node in nodes for par in node.parents()} + literals |= {par for lit in literals for par in lit.parents()} + # assign members + self._nodes = {node.uri: node for node in nodes} + self._literals = {lit.uri: lit for lit in literals} + self._predicates = {pred.uri: pred for pred in predicates} + # verify unique uris + if len(nodes) != len(self._nodes): + raise errors.ConsistencyError('inconsistent nodes') + if len(literals) != len(self._literals): + raise errors.ConsistencyError('inconsistent literals') + if len(predicates) != len(self._predicates): + raise errors.ConsistencyError('inconsistent predicates') + # verify globally unique uris + n_uris = len(set(self._nodes) | set(self._literals) | set(self._predicates)) + if n_uris != len(self._nodes) + len(self._literals) + len(self._predicates): + raise errors.ConsistencyError('URI dual use') + + + ## essentials ## + + def __str__(self) -> str: + return f'{typename(self)}()' + + def __repr__(self) -> str: + return f'{typename(self)}({sorted(self._nodes)}, {sorted(self._literals)}, {sorted(self._predicates)})' + + def __hash__(self) -> int: + return hash(( + type(self), + tuple(sorted(self._nodes.values())), + tuple(sorted(self._literals.values())), + tuple(sorted(self._predicates.values())), + )) + + def __eq__(self, other: typing.Any) -> bool: + return isinstance(other, type(self)) \ + and self._nodes == other._nodes \ + and self._literals == other._literals \ + and self._predicates == other._predicates + + + ## operators ## + + SchemaDiff = namedtuple('SchemaDiff', ['nodes', 'literals', 'predicates']) + + def diff(self, other: 'Schema') -> SchemaDiff: + """Return node, literals, and predicates that are in *self* but not in *other*.""" + return self.SchemaDiff( + nodes=set(self.nodes()) - set(other.nodes()), + literals=set(self.literals()) - set(other.literals()), + predicates=set(self.predicates()) - set(other.predicates()), + ) + + def __sub__(self, other: typing.Any) -> SchemaDiff: + """Alias for `Schema.diff`.""" + if not isinstance(other, Schema): + return NotImplemented + return self.diff(other) + + def consistent_with(self, other: 'Schema') -> bool: + """Checks if two schemas have different definitions for the same uri. + Tests nodes, literals, and predicates. + """ + # check arg + if not isinstance(other, Schema): + raise TypeError(other) + # node consistency + nodes = set(self.nodes()) | set(other.nodes()) + nuris = {node.uri for node in nodes} + if len(nodes) != len(nuris): + return False + # literal consistency + literals = set(self.literals()) | set(other.literals()) + luris = {lit.uri for lit in literals} + if len(literals) != len(luris): + return False + # predicate consistency + predicates = set(self.predicates()) | set(other.predicates()) + puris = {pred.uri for pred in predicates} + if len(predicates) != len(puris): + return False + # global consistency + if len(puris | luris | nuris) != len(nodes) + len(literals) + len(predicates): + return False + # all checks passed + return True + + @classmethod + def Union(cls, *args: typing.Union['Schema', typing.Iterable['Schema']]) -> 'Schema': + """Combine multiple Schema instances into a single one. + As argument, you can either pass multiple Schema instances, or a single + iterable over Schema instances. Any abc.Iterable will be accepted. + + Example: + + >>> a, b, c = Schema.Empty(), Schema.Empty(), Schema.Empty() + >>> # multiple Schema instances + >>> Schema.Union(a, b, c) + >>> # A single iterable over Schema instances + >>> Schema.Union([a, b, c]) + + """ + if len(args) == 0: + raise TypeError('Schema.Union requires at least one argument (Schema or Iterable)') + if isinstance(args[0], cls): # args is sequence of Schema instances + pass + elif len(args) == 1 and isinstance(args[0], abc.Iterable): # args is a single iterable + args = args[0] + else: + raise TypeError(f'expected multiple Schema instances or a single Iterable, found {args}') + + nodes, literals, predicates = set(), set(), set() + for schema in args: + # check argument + if not isinstance(schema, cls): + raise TypeError(schema) + # merge with previous schemas + nodes |= set(schema.nodes()) + literals |= set(schema.literals()) + predicates |= set(schema.predicates()) + # return new Schema instance + return cls(predicates, nodes, literals) + + def union(self, other: 'Schema') -> 'Schema': + """Merge *other* and *self* into a new Schema. *self* takes precedence.""" + # check type + if not isinstance(other, type(self)): + raise TypeError(other) + # return combined schemas + return self.Union(self, other) + + def __add__(self, other: typing.Any) -> 'Schema': + """Alias for Schema.union.""" + try: # return merged schemas + return self.union(other) + except TypeError: + return NotImplemented + + def __or__(self, other: typing.Any) -> 'Schema': + """Alias for Schema.union.""" + return self.__add__(other) + + + ## getters ## + # FIXME: which of the getters below are actually needed? + # FIXME: interchangeability of URI and _Type?! + + def has_node(self, node: URI) -> bool: + return node in self._nodes + + def has_literal(self, lit: URI) -> bool: + return lit in self._literals + + def has_predicate(self, pred: URI) -> bool: + return pred in self._predicates + + def nodes(self) -> typing.Iterator[types.Node]: # FIXME: type annotation + return self._nodes.values() + + def literals(self) -> typing.Iterator[types.Literal]: # FIXME: type annotation + return self._literals.values() + + def predicates(self) -> typing.Iterator[types.Predicate]: # FIXME: type annotation + return self._predicates.values() + + def node(self, uri: URI) -> types.Node: + """Return the Node matching the *uri*.""" + return self._nodes[uri] + + def predicate(self, uri: URI) -> types.Predicate: + """Return the Predicate matching the *uri*.""" + return self._predicates[uri] + + def literal(self, uri: URI) -> types.Literal: + """Return the Literal matching the *uri*.""" + return self._literals[uri] + + + ## constructors ## + + + @classmethod + def Empty(cls) -> 'Schema': + node = types.Node(ns.bsfs.Node, None) + literal = types.Literal(ns.bsfs.Literal, None) + predicate = types.Predicate( + uri=ns.bsfs.Predicate, + parent=None, + domain=node, + range=None, + unique=False, + ) + return cls((predicate, ), (node, ), (literal, )) + + + @classmethod + def from_string(cls, schema: str) -> 'Schema': + """Load and return a Schema from a string.""" + # parse string into rdf graph + graph = rdflib.Graph() + graph.parse(data=schema, format='turtle') + + def _fetch_hierarchically(factory, curr): + # emit current node + yield curr + # walk through childs + for child in graph.subjects(rdflib.URIRef(ns.rdfs.subClassOf), rdflib.URIRef(curr.uri)): + # convert to URI + child = URI(child) + # check circular dependency + if child == curr.uri or child in {node.uri for node in curr.parents()}: + raise errors.ConsistencyError('circular dependency') + # recurse and emit (sub*)childs + yield from _fetch_hierarchically(factory, factory(child, curr)) + + # fetch nodes + nodes = set(_fetch_hierarchically(types.Node, types.Node(ns.bsfs.Node, None))) + nodes_lut = {node.uri: node for node in nodes} + if len(nodes_lut) != len(nodes): + raise errors.ConsistencyError('inconsistent nodes') + + # fetch literals + literals = set(_fetch_hierarchically(types.Literal, types.Literal(ns.bsfs.Literal, None))) + literals_lut = {lit.uri: lit for lit in literals} + if len(literals_lut) != len(literals): + raise errors.ConsistencyError('inconsistent literals') + + # fetch predicates + def build_predicate(uri, parent): + uri = rdflib.URIRef(uri) + # get domain + domains = set(graph.objects(uri, rdflib.RDFS.domain)) + if len(domains) != 1: + raise errors.ConsistencyError(f'inconsistent domain: {domains}') + dom = nodes_lut.get(next(iter(domains))) + if dom is None: + raise errors.ConsistencyError('missing domain') + # get range + ranges = set(graph.objects(uri, rdflib.RDFS.range)) + if len(ranges) != 1: + raise errors.ConsistencyError(f'inconsistent range: {ranges}') + rng = next(iter(ranges)) + rng = nodes_lut.get(rng, literals_lut.get(rng)) + if rng is None: + raise errors.ConsistencyError('missing range') + # get unique flag + uniques = set(graph.objects(uri, rdflib.URIRef(ns.bsfs.unique))) + if len(uniques) != 1: + raise errors.ConsistencyError(f'inconsistent unique flags: {uniques}') + unique = bool(next(iter(uniques))) + # build Predicate + return types.Predicate(URI(uri), parent, dom, rng, unique) + + root_predicate = types.Predicate( + uri=ns.bsfs.Predicate, + parent=None, + domain=nodes_lut[ns.bsfs.Node], + range=None, # FIXME: Unclear how to handle this! Can be either a Literal or a Node + unique=False, + ) + predicates = _fetch_hierarchically(build_predicate, root_predicate) + # return Schema + return cls(predicates, nodes, literals) + +## EOF ## diff --git a/bsfs/schema/types.py b/bsfs/schema/types.py new file mode 100644 index 0000000..6e257e3 --- /dev/null +++ b/bsfs/schema/types.py @@ -0,0 +1,269 @@ +""" + +Part of the BlackStar filesystem (bsfs) module. +A copy of the license is provided with the project. +Author: Matthias Baumgartner, 2022 +""" +# imports +import typing + +# bsfs imports +from bsfs.utils import errors, URI, typename + +# exports +__all__: typing.Sequence[str] = ( + 'Literal', + 'Node', + 'Predicate', + ) + + +## code ## + +class _Type(): + """A class is defined via its uri. + + Classes define a partial order. + The order operators indicate whether some class is a + superclass (greater-than) or a subclass (less-than) of another. + Comparisons are only supported within the same type. + + For example, consider the class hierarchy below: + + Vehicle + Two-wheel + Bike + Bicycle + + >>> vehicle = _Type('Vehicle') + >>> twowheel = _Type('Two-wheel', vehicle) + >>> bike = _Type('Bike', twowheel) + >>> bicycle = _Type('Bicycle', twowheel) + + Two-wheel is equivalent to itself + >>> twowheel == vehicle + False + >>> twowheel == twowheel + True + >>> twowheel == bicycle + False + + Two-wheel is a true subclass of Vehicle + >>> twowheel < vehicle + True + >>> twowheel < twowheel + False + >>> twowheel < bicycle + False + + Two-wheel is a subclass of itself and Vehicle + >>> twowheel <= vehicle + True + >>> twowheel <= twowheel + True + >>> twowheel <= bicycle + False + + Two-wheel is a true superclass of Bicycle + >>> twowheel > vehicle + False + >>> twowheel > twowheel + False + >>> twowheel > bicycle + True + + Two-wheel is a superclass of itself and Bicycle + >>> twowheel >= vehicle + False + >>> twowheel >= twowheel + True + >>> twowheel >= bicycle + True + + Analoguous to sets, this is not a total order: + >>> bike < bicycle + False + >>> bike > bicycle + False + >>> bike == bicycle + False + """ + + # class uri. + uri: URI + + # parent's class uris. + parent: typing.Optional['_Type'] + + def __init__( + self, + uri: URI, + parent: typing.Optional['_Type'] = None, + ): + self.uri = uri + self.parent = parent + + def parents(self) -> typing.Generator['_Type', None, None]: + """Generate a list of parent nodes.""" + curr = self.parent + while curr is not None: + yield curr + curr = curr.parent + + def get_child(self, uri: URI, **kwargs): + """Return a child of the current class.""" + return type(self)(uri, self, **kwargs) + + def __str__(self) -> str: + return f'{typename(self)}({self.uri})' + + def __repr__(self) -> str: + return f'{typename(self)}({self.uri}, {repr(self.parent)})' + + def __hash__(self) -> int: + return hash((type(self), self.uri, self.parent)) + + def __eq__(self, other: typing.Any) -> bool: + """Return True iff *self* is equivalent to *other*.""" + return type(self) == type(other) \ + and self.uri == other.uri \ + and self.parent == other.parent + + def __lt__(self, other: typing.Any) -> bool: + """Return True iff *self* is a true subclass of *other*.""" + if not type(self) == type(other): # type mismatch + return NotImplemented + elif self.uri == other.uri: # equivalence + return False + elif self in other.parents(): # superclass + return False + elif other in self.parents(): # subclass + return True + else: # not related + return False + + def __le__(self, other: typing.Any) -> bool: + """Return True iff *self* is equivalent or a subclass of *other*.""" + if not type(self) == type(other): # type mismatch + return NotImplemented + elif self.uri == other.uri: # equivalence + return True + elif self in other.parents(): # superclass + return False + elif other in self.parents(): # subclass + return True + else: # not related + return False + + def __gt__(self, other: typing.Any) -> bool: + """Return True iff *self* is a true superclass of *other*.""" + if not type(self) == type(other): # type mismatch + return NotImplemented + elif self.uri == other.uri: # equivalence + return False + elif self in other.parents(): # superclass + return True + elif other in self.parents(): # subclass + return False + else: # not related + return False + + def __ge__(self, other: typing.Any) -> bool: + """Return True iff *self* is eqiuvalent or a superclass of *other*.""" + if not type(self) == type(other): # type mismatch + return NotImplemented + elif self.uri == other.uri: # equivalence + return True + elif self in other.parents(): # superclass + return True + elif other in self.parents(): # subclass + return False + else: # not related + return False + + +class _Vertex(_Type): + """Graph vertex types. Can be a Node or a Literal.""" + def __init__(self, uri: URI, parent: typing.Optional['_Vertex']): + super().__init__(uri, parent) + + +class Node(_Vertex): + """Node type.""" + def __init__(self, uri: URI, parent: typing.Optional['Node']): + super().__init__(uri, parent) + + +class Literal(_Vertex): + """Literal type.""" + def __init__(self, uri: URI, parent: typing.Optional['Literal']): + super().__init__(uri, parent) + + +class Predicate(_Type): + """Predicate type.""" + + # source type. + domain: Node + + # destination type. + range: typing.Optional[typing.Union[Node, Literal]] + + # maximum cardinality of type. + unique: bool + + def __init__( + self, + # Type members + uri: URI, + parent: 'Predicate', + # Predicate members + domain: Node, + range: typing.Optional[typing.Union[Node, Literal]], + unique: bool, + ): + # check arguments + if not isinstance(domain, Node): + raise TypeError(domain) + if range is not None and not isinstance(range, Node) and not isinstance(range, Literal): + raise TypeError(range) + # initialize + super().__init__(uri, parent) + self.domain = domain + self.range = range + self.unique = unique + + def __hash__(self) -> int: + return hash((super().__hash__(), self.domain, self.range, self.unique)) + + def __eq__(self, other: typing.Any) -> bool: + return super().__eq__(other) \ + and self.domain == other.domain \ + and self.range == other.range \ + and self.unique == other.unique + + def get_child( + self, + uri: URI, + domain: typing.Optional[Node] = None, + range: typing.Optional[_Vertex] = None, + unique: typing.Optional[bool] = None, + **kwargs, + ): + """Return a child of the current class.""" + if domain is None: + domain = self.domain + if not domain <= self.domain: + raise errors.ConsistencyError(f'{domain} must be a subclass of {self.domain}') + if range is None: + range = self.range + if range is None: # inherited range from ns.bsfs.Predicate + raise ValueError('range must be defined by the parent or argument') + if self.range is not None and not range <= self.range: + raise errors.ConsistencyError(f'{range} must be a subclass of {self.range}') + if unique is None: + unique = self.unique + return super().get_child(uri, domain=domain, range=range, unique=unique, **kwargs) + + +## EOF ## -- cgit v1.2.3 From 547aa08b1f05ec0cdf725c34a7b1d1512b694063 Mon Sep 17 00:00:00 2001 From: Matthias Baumgartner Date: Thu, 8 Dec 2022 16:35:20 +0100 Subject: remaining essentials: uuid, errors --- bsfs/utils/__init__.py | 5 +++ bsfs/utils/errors.py | 38 +++++++++++++++++ bsfs/utils/uuid.py | 108 +++++++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 151 insertions(+) create mode 100644 bsfs/utils/errors.py create mode 100644 bsfs/utils/uuid.py (limited to 'bsfs') diff --git a/bsfs/utils/__init__.py b/bsfs/utils/__init__.py index 56a9323..94680ee 100644 --- a/bsfs/utils/__init__.py +++ b/bsfs/utils/__init__.py @@ -8,12 +8,17 @@ Author: Matthias Baumgartner, 2022 import typing # inner-module imports +from . import errors from .commons import typename from .uri import URI +from .uuid import UUID, UCID # exports __all__ : typing.Sequence[str] = ( + 'UCID', 'URI', + 'UUID', + 'errors', 'typename', ) diff --git a/bsfs/utils/errors.py b/bsfs/utils/errors.py new file mode 100644 index 0000000..04561a2 --- /dev/null +++ b/bsfs/utils/errors.py @@ -0,0 +1,38 @@ +""" + +Part of the BlackStar filesystem (bsfs) module. +A copy of the license is provided with the project. +Author: Matthias Baumgartner, 2022 +""" +# imports +import typing + +# exports +__all__: typing.Sequence[str] = ( + ) + + +## code ## + +class _BSFSError(Exception): + """Generic bsfs error.""" + +class SchemaError(_BSFSError): + """Generic schema errios.""" + +class ConsistencyError(SchemaError): + """A requested operation is inconsistent with the schema.""" + +class InstanceError(SchemaError): + """An instance affected by some operation is inconsistent with the schema.""" + +class PermissionDeniedError(_BSFSError): + """An operation was aborted due to access control restrictions.""" + +class ProgrammingError(_BSFSError): + """An assertion-like error that indicates a code-base issue.""" + +class UnreachableError(ProgrammingError): + """Bravo, you've reached a point in code that should logically not be reachable.""" + +## EOF ## diff --git a/bsfs/utils/uuid.py b/bsfs/utils/uuid.py new file mode 100644 index 0000000..7c39128 --- /dev/null +++ b/bsfs/utils/uuid.py @@ -0,0 +1,108 @@ +""" + +Part of the BlackStar filesystem (bsfs) module. +A copy of the license is provided with the project. +Author: Matthias Baumgartner, 2022 +""" +# imports +from collections import abc +import hashlib +import os +import platform +import random +import threading +import time +import typing +import uuid + +# constants +HASH = hashlib.sha256 + +# exports +__all__: typing.Sequence[str] = [ + 'UCID', + 'UUID', + ] + + +## code ## + +class UUID(abc.Iterator, abc.Callable): + """Generate 256-bit universally unique IDs. + + This is a 'best-effort' kind of implementation that tries to ensure global + uniqueness, even tough actual uniqueness cannot be guaranteed. + The approach is different from python's uuid module (which implements + RFC 4122) in that it generates longer UUIDs and in that it cannot be + reconstructed whether two UUIDs were generated on the same system. + + The ID is a cryptographic hash over several components: + * host + * system + * process + * thread + * random + * time + * cpu cycles + * content (if available) + + """ + + # host identifier + host: str + + # system identifier + system: str + + # process identifier + process: str + + # thread identifier + thread: str + + def __init__(self, seed: typing.Optional[int] = None): + # initialize static components + self.host = str(uuid.getnode()) + self.system = '-'.join(platform.uname()) + self.process = str(os.getpid()) + self.thread = str(threading.get_ident()) + # initialize random component + random.seed(seed) + + def __call__(self, content: typing.Optional[str] = None) -> str: + """Return a globally unique ID.""" + # content component + content = str(content) if content is not None else '' + # time component + now = str(time.time()) + # clock component + clk = str(time.perf_counter()) + # random component + rnd = str(random.random()) + # build the token from all available components + token = self.host + self.system + self.process + self.thread + rnd + now + clk + content + # return the token's hash + return HASH(token.encode('ascii', 'ignore')).hexdigest() + + def __iter__(self) -> typing.Iterator[str]: + """Iterate indefinitely over universally unique IDs.""" + return self + + def __next__(self) -> str: + """Generate universally unique IDs.""" + return self() + + +class UCID(abc.Callable): + """Generate 256-bit content IDs. + + Effectively computes a cryptographic hash over the content. + + """ + @staticmethod + def from_path(path: str) -> str: + """Read the content from a file.""" + with open(path, 'rb') as ifile: + return HASH(ifile.read()).hexdigest() + +## EOF ## -- cgit v1.2.3 From e8492489098ef5f8566214e083cd2c2d1d449f5a Mon Sep 17 00:00:00 2001 From: Matthias Baumgartner Date: Thu, 8 Dec 2022 16:36:19 +0100 Subject: sparql triple store and graph (nodes, mostly) --- bsfs/graph/__init__.py | 15 +++ bsfs/graph/ac/__init__.py | 20 ++++ bsfs/graph/ac/base.py | 67 +++++++++++ bsfs/graph/ac/null.py | 53 +++++++++ bsfs/graph/graph.py | 65 +++++++++++ bsfs/graph/nodes.py | 243 ++++++++++++++++++++++++++++++++++++++++ bsfs/triple_store/__init__.py | 20 ++++ bsfs/triple_store/base.py | 128 +++++++++++++++++++++ bsfs/triple_store/sparql.py | 253 ++++++++++++++++++++++++++++++++++++++++++ 9 files changed, 864 insertions(+) create mode 100644 bsfs/graph/__init__.py create mode 100644 bsfs/graph/ac/__init__.py create mode 100644 bsfs/graph/ac/base.py create mode 100644 bsfs/graph/ac/null.py create mode 100644 bsfs/graph/graph.py create mode 100644 bsfs/graph/nodes.py create mode 100644 bsfs/triple_store/__init__.py create mode 100644 bsfs/triple_store/base.py create mode 100644 bsfs/triple_store/sparql.py (limited to 'bsfs') diff --git a/bsfs/graph/__init__.py b/bsfs/graph/__init__.py new file mode 100644 index 0000000..3a131e9 --- /dev/null +++ b/bsfs/graph/__init__.py @@ -0,0 +1,15 @@ +""" + +Part of the BlackStar filesystem (bsfs) module. +A copy of the license is provided with the project. +Author: Matthias Baumgartner, 2022 +""" +# imports +import typing + +# inner-module imports + +# exports +__all__: typing.Sequence[str] = [] + +## EOF ## diff --git a/bsfs/graph/ac/__init__.py b/bsfs/graph/ac/__init__.py new file mode 100644 index 0000000..420de01 --- /dev/null +++ b/bsfs/graph/ac/__init__.py @@ -0,0 +1,20 @@ +""" + +Part of the BlackStar filesystem (bsfs) module. +A copy of the license is provided with the project. +Author: Matthias Baumgartner, 2022 +""" +# imports +import typing + +# inner-module imports +from .base import AccessControlBase +from .null import NullAC + +# exports +__all__: typing.Sequence[str] = ( + 'AccessControlBase', + 'NullAC', + ) + +## EOF ## diff --git a/bsfs/graph/ac/base.py b/bsfs/graph/ac/base.py new file mode 100644 index 0000000..70475d2 --- /dev/null +++ b/bsfs/graph/ac/base.py @@ -0,0 +1,67 @@ +""" + +Part of the BlackStar filesystem (bsfs) module. +A copy of the license is provided with the project. +Author: Matthias Baumgartner, 2022 +""" +# imports +import abc +import typing + +# bsfs imports +from bsfs import schema as _schema +from bsfs.triple_store import TripleStoreBase +from bsfs.utils import URI + +# exports +__all__: typing.Sequence[str] = ( + 'AccessControlBase', + ) + + +## code ## + +class AccessControlBase(abc.ABC): + """ + """ + + # + __backend: TripleStoreBase + + # + __user: URI + + def __init__( + self, + backend: TripleStoreBase, + user: URI, + ): + self.__backend = backend + self.__user = URI(user) + + @abc.abstractmethod + def is_protected_predicate(self, pred: _schema.Predicate) -> bool: + """Return True if a predicate cannot be modified manually.""" + + @abc.abstractmethod + def create(self, node_type: _schema.Node, guids: typing.Iterable[URI]): + """Perform post-creation operations on nodes, e.g. ownership information.""" + + @abc.abstractmethod + def link_from_node(self, node_type: _schema.Node, guids: typing.Iterable[URI]) -> typing.Iterable[URI]: + """Return nodes for which outbound links can be written.""" + + @abc.abstractmethod + def link_to_node(self, node_type: _schema.Node, guids: typing.Iterable[URI]) -> typing.Iterable[URI]: + """Return nodes for which inbound links can be written.""" + + @abc.abstractmethod + def write_literal(self, node_type: _schema.Node, guids: typing.Iterable[URI]) -> typing.Iterable[URI]: + """Return nodes to which literals can be attached.""" + + @abc.abstractmethod + def createable(self, node_type: _schema.Node, guids: typing.Iterable[URI]) -> typing.Iterable[URI]: + """Return nodes that are allowed to be created.""" + + +## EOF ## diff --git a/bsfs/graph/ac/null.py b/bsfs/graph/ac/null.py new file mode 100644 index 0000000..a39b7b9 --- /dev/null +++ b/bsfs/graph/ac/null.py @@ -0,0 +1,53 @@ +""" + +Part of the BlackStar filesystem (bsfs) module. +A copy of the license is provided with the project. +Author: Matthias Baumgartner, 2022 +""" +# imports +import typing + +# bsfs imports +from bsfs import schema as _schema +from bsfs.namespace import ns +from bsfs.utils import URI + +# inner-module imports +from . import base + +# exports +__all__: typing.Sequence[str] = ( + 'NullAC', + ) + + +## code ## + +class NullAC(base.AccessControlBase): + """ + """ + + def is_protected_predicate(self, pred: _schema.Predicate) -> bool: + """Return True if a predicate cannot be modified manually.""" + return pred.uri == ns.bsm.t_created + + def create(self, node_type: _schema.Node, guids: typing.Iterable[URI]): + """Perform post-creation operations on nodes, e.g. ownership information.""" + + def link_from_node(self, node_type: _schema.Node, guids: typing.Iterable[URI]) -> typing.Iterable[URI]: + """Return nodes for which outbound links can be written.""" + return guids + + def link_to_node(self, node_type: _schema.Node, guids: typing.Iterable[URI]) -> typing.Iterable[URI]: + """Return nodes for which inbound links can be written.""" + return guids + + def write_literal(self, node_type: _schema.Node, guids: typing.Iterable[URI]) -> typing.Iterable[URI]: + """Return nodes to which literals can be attached.""" + return guids + + def createable(self, node_type: _schema.Node, guids: typing.Iterable[URI]) -> typing.Iterable[URI]: + """Return nodes that are allowed to be created.""" + return guids + +## EOF ## diff --git a/bsfs/graph/graph.py b/bsfs/graph/graph.py new file mode 100644 index 0000000..06271f6 --- /dev/null +++ b/bsfs/graph/graph.py @@ -0,0 +1,65 @@ +""" + +Part of the BlackStar filesystem (bsfs) module. +A copy of the license is provided with the project. +Author: Matthias Baumgartner, 2022 +""" +# imports +import typing + +# bsfs imports +from bsfs.schema import Schema +from bsfs.triple_store import TripleStoreBase +from bsfs.utils import URI, typename + +# inner-module imports +from . import nodes + +# exports +__all__: typing.Sequence[str] = ( + 'Graph', + ) + + +## code ## + +class Graph(): + """ + """ + # link to the triple storage backend. + __backend: TripleStoreBase + + # user uri. + __user: URI + + def __init__(self, backend: TripleStoreBase, user: URI): + self.__backend = backend + self.__user = user + + def __hash__(self) -> int: + return hash((type(self), self.__backend, self.__user)) + + def __eq__(self, other) -> bool: + return isinstance(other, type(self)) \ + and self.__backend == other.__backend \ + and self.__user == other.__user + + def __repr__(self) -> str: + return f'{typename(self)}(backend={repr(self.__backend)}, user={self.__user})' + + def __str__(self) -> str: + return f'{typename(self)}({str(self.__backend)}, {self.__user})' + + @property + def schema(self) -> Schema: + """Return the store's local schema.""" + return self.__backend.schema + + def nodes(self, node_type: URI, guids: typing.Iterable[URI]) -> nodes.Nodes: + """ + """ + node_type = self.schema.node(node_type) + # NOTE: Nodes constructor materializes guids. + return nodes.Nodes(self.__backend, self.__user, node_type, guids) + +## EOF ## diff --git a/bsfs/graph/nodes.py b/bsfs/graph/nodes.py new file mode 100644 index 0000000..7d2e9b3 --- /dev/null +++ b/bsfs/graph/nodes.py @@ -0,0 +1,243 @@ +""" + +Part of the BlackStar filesystem (bsfs) module. +A copy of the license is provided with the project. +Author: Matthias Baumgartner, 2022 +""" +# imports +import itertools +import time +import typing + +# bsfs imports +from bsfs import schema as _schema +from bsfs.namespace import ns +from bsfs.triple_store import TripleStoreBase +from bsfs.utils import errors, URI, typename + +# inner-module imports +from . import ac + +# exports +__all__: typing.Sequence[str] = ( + 'Nodes', + ) + + +## code ## + +class Nodes(): + """ + NOTE: guids may or may not exist. This is not verified as nodes are created on demand. + """ + + # triple store backend. + __backend: TripleStoreBase + + # user uri. + __user: URI + + # node type. + __node_type: _schema.Node + + # guids of nodes. Can be empty. + __guids: typing.Set[URI] + + def __init__( + self, + backend: TripleStoreBase, + user: URI, + node_type: _schema.Node, + guids: typing.Iterable[URI], + ): + self.__backend = backend + self.__user = user + self.__node_type = node_type + self.__guids = set(guids) + self.__ac = ac.NullAC(self.__backend, self.__user) + + def __eq__(self, other: typing.Any) -> bool: + return isinstance(other, Nodes) \ + and self.__backend == other.__backend \ + and self.__user == other.__user \ + and self.__node_type == other.__node_type \ + and self.__guids == other.__guids + + def __hash__(self) -> int: + return hash((type(self), self.__backend, self.__user, self.__node_type, tuple(sorted(self.__guids)))) + + def __repr__(self) -> str: + return f'{typename(self)}({self.__backend}, {self.__user}, {self.__node_type}, {self.__guids})' + + def __str__(self) -> str: + return f'{typename(self)}({self.__node_type}, {self.__guids})' + + @property + def node_type(self) -> _schema.Node: + """Return the node's type.""" + return self.__node_type + + @property + def guids(self) -> typing.Iterator[URI]: + """Return all node guids.""" + return iter(self.__guids) + + def set( + self, + pred: URI, # FIXME: URI or _schema.Predicate? + value: typing.Any, + ) -> 'Nodes': + """ + """ + try: + # insert triples + self.__set(pred, value) + # save changes + self.__backend.commit() + + except ( + errors.PermissionDeniedError, # tried to set a protected predicate (ns.bsm.t_created) + errors.ConsistencyError, # node types are not in the schema or don't match the predicate + errors.InstanceError, # guids/values don't have the correct type + TypeError, # value is supposed to be a Nodes instance + ValueError, # multiple values passed to unique predicate + ): + # revert changes + self.__backend.rollback() + # notify the client + raise + + return self + + def set_from_iterable( + self, + predicate_values: typing.Iterable[typing.Tuple[URI, typing.Any]], # FIXME: URI or _schema.Predicate? + ) -> 'Nodes': + """ + """ + # TODO: Could group predicate_values by predicate to gain some efficiency + # TODO: ignore errors on some predicates; For now this could leave residual + # data (e.g. some nodes were created, some not). + try: + # insert triples + for pred, value in predicate_values: + self.__set(pred, value) + # save changes + self.__backend.commit() + + except ( + errors.PermissionDeniedError, # tried to set a protected predicate (ns.bsm.t_created) + errors.ConsistencyError, # node types are not in the schema or don't match the predicate + errors.InstanceError, # guids/values don't have the correct type + TypeError, # value is supposed to be a Nodes instance + ValueError, # multiple values passed to unique predicate + ): + # revert changes + self.__backend.rollback() + # notify the client + raise + + return self + + def __set( + self, + predicate: URI, + value: typing.Any, + #on_error: str = 'ignore', # ignore, rollback + ): + """ + """ + # get normalized predicate. Raises KeyError if *pred* not in the schema. + pred = self.__backend.schema.predicate(predicate) + + # node_type must be a subclass of the predicate's domain + node_type = self.node_type + if not node_type <= pred.domain: + raise errors.ConsistencyError(f'{node_type} must be a subclass of {pred.domain}') + + # check reserved predicates (access controls, metadata, internal structures) + # FIXME: Needed? Could be integrated into other AC methods (by passing the predicate!) + # This could allow more fine-grained predicate control (e.g. based on ownership) + # rather than a global approach like this. + if self.__ac.is_protected_predicate(pred): + raise errors.PermissionDeniedError(pred) + + # set operation affects all nodes (if possible) + guids = set(self.guids) + + # ensure subject node existence; create nodes if need be + guids = set(self._ensure_nodes(node_type, guids)) + + # check value + if isinstance(pred.range, _schema.Literal): + # check write permissions on existing nodes + # As long as the user has write permissions, we don't restrict + # the creation or modification of literal values. + guids = set(self.__ac.write_literal(node_type, guids)) + + # insert literals + # TODO: Support passing iterators as values for non-unique predicates + self.__backend.set( + node_type, + guids, + pred, + [value], + ) + + elif isinstance(pred.range, _schema.Node): + # check value type + if not isinstance(value, Nodes): + raise TypeError(value) + # value's node_type must be a subclass of the predicate's range + if not value.node_type <= pred.range: + raise errors.ConsistencyError(f'{value.node_type} must be a subclass of {pred.range}') + + # check link permissions on source nodes + # Link permissions cover adding and removing links on the source node. + # Specifically, link permissions also allow to remove links to other + # nodes if needed (e.g. for unique predicates). + guids = set(self.__ac.link_from_node(node_type, guids)) + + # get link targets + targets = set(value.guids) + # ensure existence of value nodes; create nodes if need be + targets = set(self._ensure_nodes(value.node_type, targets)) + # check link permissions on target nodes + targets = set(self.__ac.link_to_node(value.node_type, targets)) + + # insert node links + self.__backend.set( + node_type, + guids, + pred, + targets, + ) + + else: + raise errors.UnreachableError() + + def _ensure_nodes( + self, + node_type: _schema.Node, + guids: typing.Iterable[URI], + ): + # check node existence + guids = set(guids) + existing = set(self.__backend.exists(node_type, guids)) + # get nodes to be created + missing = guids - existing + # create nodes if need be + if len(missing) > 0: + # check which missing nodes can be created + missing = set(self.__ac.createable(node_type, missing)) + # create nodes + self.__backend.create(node_type, missing) + # add bookkeeping triples + self.__backend.set(node_type, missing, + self.__backend.schema.predicate(ns.bsm.t_created), [time.time()]) + # add permission triples + self.__ac.create(node_type, missing) + # return available nodes + return existing | missing + +## EOF ## diff --git a/bsfs/triple_store/__init__.py b/bsfs/triple_store/__init__.py new file mode 100644 index 0000000..fb5a8a9 --- /dev/null +++ b/bsfs/triple_store/__init__.py @@ -0,0 +1,20 @@ +""" + +Part of the BlackStar filesystem (bsfs) module. +A copy of the license is provided with the project. +Author: Matthias Baumgartner, 2022 +""" +# imports +import typing + +# inner-module imports +from .base import TripleStoreBase +from .sparql import SparqlStore + +# exports +__all__: typing.Sequence[str] = ( + 'SparqlStore', + 'TripleStoreBase', + ) + +## EOF ## diff --git a/bsfs/triple_store/base.py b/bsfs/triple_store/base.py new file mode 100644 index 0000000..a2668c3 --- /dev/null +++ b/bsfs/triple_store/base.py @@ -0,0 +1,128 @@ +""" + +Part of the BlackStar filesystem (bsfs) module. +A copy of the license is provided with the project. +Author: Matthias Baumgartner, 2022 +""" +# imports +import abc +import typing + +# inner-module imports +from bsfs.utils import URI, typename +import bsfs.schema as _schema + +# exports +__all__: typing.Sequence[str] = ( + 'TripleStoreBase', + ) + + +## code ## + +class TripleStoreBase(abc.ABC): + """ + """ + + # storage's URI. None implies a temporary location. + uri: typing.Optional[URI] = None + + def __init__(self, uri: typing.Optional[URI] = None): + self.uri = uri + + def __hash__(self) -> int: + uri = self.uri if self.uri is not None else id(self) + return hash((type(self), uri)) + + def __eq__(self, other) -> bool: + return isinstance(other, type(self)) \ + and (( self.uri is not None \ + and other.uri is not None \ + and self.uri == other.uri ) \ + or id(self) == id(other)) + + def __repr__(self) -> str: + return f'{typename(self)}(uri={self.uri})' + + def __str__(self) -> str: + return f'{typename(self)}(uri={self.uri})' + + def is_persistent(self) -> bool: + """Return True if data is stored persistently.""" + return self.uri is not None + + + @classmethod + @abc.abstractmethod + def Open( + cls, + uri: str, + **kwargs: typing.Any, + ) -> 'TripleStoreBase': + """Return a TripleStoreBase instance connected to *uri*.""" + + @abc.abstractmethod + def commit(self): + """Commit the current transaction.""" + + @abc.abstractmethod + def rollback(self): + """Undo changes since the last commit.""" + + @property + @abc.abstractmethod + def schema(self) -> _schema.Schema: + """Return the store's local schema.""" + + @schema.setter + def schema(self, schema: _schema.Schema): + """Migrate to new schema by adding or removing class definitions. + + Commits before and after the migration. + + Instances of removed classes will be deleted irreversably. + Note that modifying an existing class is not directly supported. + Also, it is generally discouraged, since changing definitions may + lead to inconsistencies across multiple clients in a distributed + setting. Instead, consider introducing a new class under its own + uri. Such a migration would look as follows: + + 1. Add new class definitions. + 2. Create instances of the new classes and copy relevant data. + 3. Remove the old definitions. + + To modify a class, i.e., re-use a previous uri with a new + class definition, you would have to migrate via temporary + class definitions, and thus repeat the above procedure two times. + + """ + + @abc.abstractmethod + def exists( + self, + node_type: _schema.Node, + guids: typing.Iterable[URI], + ): + """ + """ + + @abc.abstractmethod + def create( + self, + node_type: _schema.Node, + guids: typing.Iterable[URI], + ): + """Create *guid* nodes with type *subject*.""" + + @abc.abstractmethod + def set( + self, + node_type: _schema.Node, # FIXME: is the node_type even needed? Couldn't I infer from the predicate? + guids: typing.Iterable[URI], + predicate: _schema.Predicate, + values: typing.Iterable[typing.Any], + ): + """ + """ + +## EOF ## diff --git a/bsfs/triple_store/sparql.py b/bsfs/triple_store/sparql.py new file mode 100644 index 0000000..3eab869 --- /dev/null +++ b/bsfs/triple_store/sparql.py @@ -0,0 +1,253 @@ +""" + +Part of the BlackStar filesystem (bsfs) module. +A copy of the license is provided with the project. +Author: Matthias Baumgartner, 2022 +""" +# imports +import itertools +import typing +import rdflib + +# bsfs imports +from bsfs.utils import URI +from bsfs.utils import errors +import bsfs.schema as _schema + +# inner-module imports +from . import base + + +# exports +__all__: typing.Sequence[str] = ( + 'SparqlStore', + ) + + +## code ## + +class Transaction(): + """Lightweight rdflib transactions for in-memory databases.""" + + def __init__(self, graph): + self._graph = graph + self.commit() # initialize + + def commit(self): + self._added = [] + self._removed = [] + + def rollback(self): + for triple in self._added: + self._graph.remove(triple) + for triple in self._removed: + self._graph.add(triple) + + def add(self, triple): + if triple not in self._graph: + self._added.append(triple) + self._graph.add(triple) + + def remove(self, triple): + if triple in self._graph: + self._removed.append(triple) + self._graph.remove(triple) + + +class SparqlStore(base.TripleStoreBase): + """ + """ + + def __init__(self, uri: typing.Optional[URI] = None): + super().__init__(uri) + self.graph = rdflib.Graph() + self.transaction = Transaction(self.graph) + self.__schema = _schema.Schema.Empty() + + @classmethod + def Open( + cls, + uri: str, + **kwargs: typing.Any, + ) -> 'SparqlStore': + return cls(None) + + def commit(self): + self.transaction.commit() + + def rollback(self): + self.transaction.rollback() + + @property + def schema(self) -> _schema.Schema: + """Return the current schema.""" + return self.__schema + + @schema.setter + def schema(self, schema: _schema.Schema): + """Migrate to new schema by adding or removing class definitions. + + Commits before and after the migration. + + Instances of removed classes will be deleted irreversably. + Note that modifying an existing class is not directly supported. + Also, it is generally discouraged, since changing definitions may + lead to inconsistencies across multiple clients in a distributed + setting. Instead, consider introducing a new class under its own + uri. Such a migration would look as follows: + + 1. Add new class definitions. + 2. Create instances of the new classes and copy relevant data. + 3. Remove the old definitions. + + To modify a class, i.e., re-use a previous uri with a new + class definition, you would have to migrate via temporary + class definitions, and thus repeat the above procedure two times. + + """ + # check args: Schema instanace + if not isinstance(schema, _schema.Schema): + raise TypeError(schema) + # check compatibility: No contradicting definitions + if not self.schema.consistent_with(schema): + raise errors.ConsistencyError(f'{schema} is inconsistent with {self.schema}') + + # commit the current transaction + self.commit() + + # adjust instances: + # nothing to do for added classes + # delete instances of removed classes + + # get deleted classes + sub = self.schema - schema + + # remove predicate instances + for pred in sub.predicates: + for src, trg in self.graph.subject_objects(rdflib.URIRef(pred.uri)): + self.transaction.remove((src, rdflib.URIRef(pred.uri), trg)) + + # remove node instances + for node in sub.nodes: + # iterate through node instances + for inst in self.graph.subjects(rdflib.RDF.type, rdflib.URIRef(node.uri)): + # remove triples where the instance is in the object position + for src, pred in self.graph.subject_predicates(inst): + self.transaction.remove((src, pred, inst)) + # remove triples where the instance is in the subject position + for pred, trg in self.graph.predicate_objects(inst): + self.transaction.remove((inst, pred, trg)) + # remove instance + self.transaction.remove((inst, rdflib.RDF.type, rdflib.URIRef(node.uri))) + + # NOTE: Nothing to do for literals + + # commit instance changes + self.commit() + + # migrate schema + self.__schema = schema + + + def _has_type(self, subject: URI, node_type: _schema.Node) -> bool: + """Return True if *subject* is a node of class *node_type* or a subclass thereof.""" + if node_type not in self.schema.nodes(): + raise errors.ConsistencyError(f'{node_type} is not defined in the schema') + + subject_types = list(self.graph.objects(rdflib.URIRef(subject), rdflib.RDF.type)) + if len(subject_types) == 0: + return False + elif len(subject_types) == 1: + node = self.schema.node(URI(subject_types[0])) + if node == node_type: + return True + elif node_type in node.parents(): + return True + else: + return False + else: + raise errors.UnreachableError() + + def exists( + self, + node_type: _schema.Node, + guids: typing.Iterable[URI], + ): + """ + """ + return {subj for subj in guids if self._has_type(subj, node_type)} + + def create( + self, + node_type: _schema.Node, + guids: typing.Iterable[URI], + ): + """ + """ + # check node_type + if node_type not in self.schema.nodes(): + raise errors.ConsistencyError(f'{node_type} is not defined in the schema') + # check and create guids + for guid in guids: + guid = rdflib.URIRef(guid) + # check node existence + if (guid, rdflib.RDF.type, None) in self.graph: + # FIXME: node exists and may have a different type! ignore? raise? report? + continue + # add node + self.transaction.add((guid, rdflib.RDF.type, rdflib.URIRef(node_type.uri))) + + def set( + self, + node_type: _schema.Node, # FIXME: is the node_type even needed? Couldn't I infer from the predicate? + guids: typing.Iterable[URI], + predicate: _schema.Predicate, + values: typing.Iterable[typing.Any], + ): + # check node_type + if node_type not in self.schema.nodes(): + raise errors.ConsistencyError(f'{node_type} is not defined in the schema') + # check predicate + if predicate not in self.schema.predicates(): + raise errors.ConsistencyError(f'{predicate} is not defined in the schema') + if not node_type <= predicate.domain: + raise errors.ConsistencyError(f'{node_type} must be a subclass of {predicate.domain}') + # NOTE: predicate.range is in the schema since predicate is in the schema. + # check values + if len(values) == 0: + return + if predicate.unique and len(values) != 1: + raise ValueError(values) + if isinstance(predicate.range, _schema.Node): + values = set(values) # materialize to safeguard against iterators passed as argument + inconsistent = {val for val in values if not self._has_type(val, predicate.range)} + # catches nodes that don't exist and nodes that have an inconsistent type + if len(inconsistent) > 0: + raise errors.InstanceError(inconsistent) + # check guids + # FIXME: Fail or skip inexistent nodes? + guids = set(guids) + inconsistent = {guid for guid in guids if not self._has_type(guid, node_type)} + if len(inconsistent) > 0: + raise errors.InstanceError(inconsistent) + + # add triples + pred = rdflib.URIRef(predicate.uri) + for guid, value in itertools.product(guids, values): + guid = rdflib.URIRef(guid) + # convert value + if isinstance(predicate.range, _schema.Literal): + value = rdflib.Literal(value, datatype=rdflib.URIRef(predicate.range.uri)) + elif isinstance(predicate.range, _schema.Node): + value = rdflib.URIRef(value) + else: + raise errors.UnreachableError() + # clear triples for unique predicates + if predicate.unique: + for obj in self.graph.objects(guid, pred): + if obj != value: + self.transaction.remove((guid, pred, obj)) + # add triple + self.transaction.add((guid, pred, value)) + +## EOF ## -- cgit v1.2.3 From 21a02197d73f263ae222f2ccc49248d8617e2d7d Mon Sep 17 00:00:00 2001 From: Matthias Baumgartner Date: Sun, 18 Dec 2022 13:40:49 +0100 Subject: project cosmetics --- bsfs/utils/uuid.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'bsfs') diff --git a/bsfs/utils/uuid.py b/bsfs/utils/uuid.py index 7c39128..6366b18 100644 --- a/bsfs/utils/uuid.py +++ b/bsfs/utils/uuid.py @@ -27,7 +27,7 @@ __all__: typing.Sequence[str] = [ ## code ## -class UUID(abc.Iterator, abc.Callable): +class UUID(abc.Iterator, abc.Callable): # type: ignore [misc] # abc.Callable "is an invalid base class" """Generate 256-bit universally unique IDs. This is a 'best-effort' kind of implementation that tries to ensure global @@ -69,7 +69,7 @@ class UUID(abc.Iterator, abc.Callable): # initialize random component random.seed(seed) - def __call__(self, content: typing.Optional[str] = None) -> str: + def __call__(self, content: typing.Optional[str] = None) -> str: # pylint: disable=arguments-differ """Return a globally unique ID.""" # content component content = str(content) if content is not None else '' @@ -93,7 +93,7 @@ class UUID(abc.Iterator, abc.Callable): return self() -class UCID(abc.Callable): +class UCID(): """Generate 256-bit content IDs. Effectively computes a cryptographic hash over the content. -- cgit v1.2.3 From 0e52514639b043454425a9cc2317d27e628a1027 Mon Sep 17 00:00:00 2001 From: Matthias Baumgartner Date: Sun, 18 Dec 2022 13:42:34 +0100 Subject: namespace and uri extensions --- bsfs/namespace/namespace.py | 48 +++++++++++++++++++++++++++++++----------- bsfs/namespace/predefined.py | 14 ++++++------- bsfs/utils/uri.py | 50 ++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 93 insertions(+), 19 deletions(-) (limited to 'bsfs') diff --git a/bsfs/namespace/namespace.py b/bsfs/namespace/namespace.py index 8080f5d..f652dcd 100644 --- a/bsfs/namespace/namespace.py +++ b/bsfs/namespace/namespace.py @@ -29,29 +29,55 @@ class Namespace(): # namespace prefix. prefix: URI - def __init__(self, prefix: URI): - self.prefix = URI(prefix) + # fragment separator. + fsep: str + + # path separator. + psep: str + + def __init__(self, prefix: URI, fsep: str = '#', psep: str = '/'): + # ensure prefix type + prefix = URI(prefix) + # truncate fragment separator + while prefix.endswith(fsep): + prefix = URI(prefix[:-1]) + # truncate path separator + while prefix.endswith(psep): + prefix = URI(prefix[:-1]) + # store members + self.prefix = prefix + self.fsep = fsep + self.psep = psep def __eq__(self, other: typing.Any) -> bool: - return isinstance(other, type(self)) and self.prefix == other.prefix + return isinstance(other, type(self)) \ + and self.prefix == other.prefix \ + and self.fsep == other.fsep \ + and self.psep == other.psep def __hash__(self) -> int: - return hash((type(self), self.prefix)) + return hash((type(self), self.prefix, self.fsep, self.psep)) def __str__(self) -> str: return f'{typename(self)}({self.prefix})' def __repr__(self) -> str: - return f'{typename(self)}({self.prefix})' + return f'{typename(self)}({self.prefix}, {self.fsep}, {self.psep})' def __getattr__(self, fragment: str) -> URI: """Return prefix + fragment.""" - return URI(self.prefix + fragment) + return URI(self.prefix + self.fsep + fragment) def __getitem__(self, fragment: str) -> URI: """Alias for getattr(self, fragment).""" return self.__getattr__(fragment) + def __add__(self, value: typing.Any) -> 'Namespace': + """Concatenate another namespace to this one.""" + if not isinstance(value, str): + return NotImplemented + return Namespace(self.prefix + self.psep + value, self.fsep, self.psep) + class ClosedNamespace(Namespace): """Namespace that covers a restricted set of URIs.""" @@ -59,8 +85,8 @@ class ClosedNamespace(Namespace): # set of permissible fragments. fragments: typing.Set[str] - def __init__(self, prefix: URI, *args: str): - super().__init__(prefix) + def __init__(self, prefix: URI, *args: str, fsep: str = '#', psep: str = '/'): + super().__init__(prefix, fsep, psep) self.fragments = set(args) def __eq__(self, other: typing.Any) -> bool: @@ -70,11 +96,9 @@ class ClosedNamespace(Namespace): return hash((type(self), self.prefix, tuple(sorted(self.fragments)))) def __getattr__(self, fragment: str) -> URI: - """Return prefix + fragment. - Raises a KeyError if the fragment is not allowed in this namespace. - """ + """Return prefix + fragment or raise a KeyError if the fragment is not part of this namespace.""" if fragment not in self.fragments: - raise KeyError('fragment') + raise KeyError(f'{fragment} is not a valid fragment of namespace {self.prefix}') return super().__getattr__(fragment) ## EOF ## diff --git a/bsfs/namespace/predefined.py b/bsfs/namespace/predefined.py index 21ca560..cd48a46 100644 --- a/bsfs/namespace/predefined.py +++ b/bsfs/namespace/predefined.py @@ -14,17 +14,17 @@ from bsfs.utils import URI from . import namespace # essential bsfs namespaces -bsfs: namespace.Namespace = namespace.Namespace(URI('http://bsfs.ai/schema/')) +bsfs: namespace.Namespace = namespace.Namespace(URI('http://bsfs.ai/schema'), fsep='/') # additional bsfs namespaces -bse: namespace.Namespace = namespace.Namespace(URI('http://bsfs.ai/schema/Entity#')) -bsm: namespace.Namespace = namespace.Namespace(URI('http://bsfs.ai/schema/Meta#')) +bse: namespace.Namespace = namespace.Namespace(URI('http://bsfs.ai/schema/Entity')) +bsm: namespace.Namespace = namespace.Namespace(URI('http://bsfs.ai/schema/Meta')) # generic namespaces -rdf: namespace.Namespace = namespace.Namespace(URI('http://www.w3.org/1999/02/22-rdf-syntax-ns#')) -rdfs: namespace.Namespace = namespace.Namespace(URI('http://www.w3.org/2000/01/rdf-schema#')) -schema: namespace.Namespace = namespace.Namespace(URI('http://schema.org/')) -xsd: namespace.Namespace = namespace.Namespace(URI('http://www.w3.org/2001/XMLSchema#')) +rdf: namespace.Namespace = namespace.Namespace(URI('http://www.w3.org/1999/02/22-rdf-syntax-ns')) +rdfs: namespace.Namespace = namespace.Namespace(URI('http://www.w3.org/2000/01/rdf-schema')) +schema: namespace.Namespace = namespace.Namespace(URI('http://schema.org'), fsep='/') +xsd: namespace.Namespace = namespace.Namespace(URI('http://www.w3.org/2001/XMLSchema')) __all__: typing.Sequence[str] = ( 'bse', diff --git a/bsfs/utils/uri.py b/bsfs/utils/uri.py index a56423a..84854a4 100644 --- a/bsfs/utils/uri.py +++ b/bsfs/utils/uri.py @@ -193,4 +193,54 @@ class URI(str): # return the default value return default + + # overload composition methods + + def __add__(self, *args) -> 'URI': + return URI(super().__add__(*args)) + + def join(self, *args) -> 'URI': + return URI(super().join(*args)) + + def __mul__(self, *args) -> 'URI': + return URI(super().__mul__(*args)) + + def __rmul__(self, *args) -> 'URI': + return URI(super().__rmul__(*args)) + + + # overload casefold methods + + def lower(self, *args) -> 'URI': + return URI(super().lower(*args)) + + def upper(self, *args) -> 'URI': + return URI(super().upper(*args)) + + + # overload stripping methods + + def strip(self, *args) -> 'URI': + return URI(super().strip(*args)) + + def lstrip(self, *args) -> 'URI': + return URI(super().lstrip(*args)) + + def rstrip(self, *args) -> 'URI': + return URI(super().rstrip(*args)) + + + # overload formatting methods + + def format(self, *args, **kwargs) -> 'URI': + return URI(super().format(*args, **kwargs)) + + def __mod__(self, *args) -> 'URI': + return URI(super().__mod__(*args)) + + def replace(self, *args) -> 'URI': + return URI(super().replace(*args)) + + + ## EOF ## -- cgit v1.2.3 From 1b570e45a4e99a4e7f9ad9d01b4fa93e38fbff38 Mon Sep 17 00:00:00 2001 From: Matthias Baumgartner Date: Sun, 18 Dec 2022 13:45:35 +0100 Subject: schema ordering --- bsfs/schema/schema.py | 35 +++++++++++++++++++++++++++++++++++ 1 file changed, 35 insertions(+) (limited to 'bsfs') diff --git a/bsfs/schema/schema.py b/bsfs/schema/schema.py index 0e053c0..b6f37a7 100644 --- a/bsfs/schema/schema.py +++ b/bsfs/schema/schema.py @@ -103,6 +103,41 @@ class Schema(): SchemaDiff = namedtuple('SchemaDiff', ['nodes', 'literals', 'predicates']) + def _issubset(self, other: 'Schema') -> bool: + # inconsistent schema can't be ordered. + if not self.consistent_with(other): + return False + # since schemas are consistent, it's sufficient to compare their URIs. + # self's sets are fully contained in other's sets + # pylint: disable=protected-access + return set(self._predicates) <= set(other._predicates) \ + and set(self._nodes) <= set(other._nodes) \ + and set(self._literals) <= set(other._literals) + + def __lt__(self, other: typing.Any) -> bool: + """Return True if *other* is a true subset of *self*.""" + if not isinstance(other, Schema): # other is not a Schema + return NotImplemented + return self != other and self._issubset(other) + + def __le__(self, other: typing.Any) -> bool: + """Return True if *other* is a subset of *self*.""" + if not isinstance(other, Schema): # other is not a Schema + return NotImplemented + return self == other or self._issubset(other) + + def __gt__(self, other: typing.Any) -> bool: + """Return True if *other* is a true superset of *self*.""" + if not isinstance(other, Schema): # other is not a Schema + return NotImplemented + return self != other and other._issubset(self) + + def __ge__(self, other: typing.Any) -> bool: + """Return True if *other* is a superset of *self*.""" + if not isinstance(other, Schema): # other is not a Schema + return NotImplemented + return self == other or other._issubset(self) + def diff(self, other: 'Schema') -> SchemaDiff: """Return node, literals, and predicates that are in *self* but not in *other*.""" return self.SchemaDiff( -- cgit v1.2.3 From ebc3ccb5fdce950649bfcbf18f88ecb4a9dbcad0 Mon Sep 17 00:00:00 2001 From: Matthias Baumgartner Date: Sun, 18 Dec 2022 13:53:34 +0100 Subject: import fixes --- bsfs/graph/__init__.py | 5 ++++- bsfs/graph/ac/base.py | 14 +++++++------- bsfs/graph/ac/null.py | 14 +++++++------- bsfs/graph/graph.py | 6 +++--- bsfs/schema/__init__.py | 2 -- 5 files changed, 21 insertions(+), 20 deletions(-) (limited to 'bsfs') diff --git a/bsfs/graph/__init__.py b/bsfs/graph/__init__.py index 3a131e9..82d2235 100644 --- a/bsfs/graph/__init__.py +++ b/bsfs/graph/__init__.py @@ -8,8 +8,11 @@ Author: Matthias Baumgartner, 2022 import typing # inner-module imports +from .graph import Graph # exports -__all__: typing.Sequence[str] = [] +__all__: typing.Sequence[str] = ( + 'Graph', + ) ## EOF ## diff --git a/bsfs/graph/ac/base.py b/bsfs/graph/ac/base.py index 70475d2..eef444b 100644 --- a/bsfs/graph/ac/base.py +++ b/bsfs/graph/ac/base.py @@ -9,7 +9,7 @@ import abc import typing # bsfs imports -from bsfs import schema as _schema +from bsfs import schema from bsfs.triple_store import TripleStoreBase from bsfs.utils import URI @@ -40,27 +40,27 @@ class AccessControlBase(abc.ABC): self.__user = URI(user) @abc.abstractmethod - def is_protected_predicate(self, pred: _schema.Predicate) -> bool: + def is_protected_predicate(self, pred: schema.Predicate) -> bool: """Return True if a predicate cannot be modified manually.""" @abc.abstractmethod - def create(self, node_type: _schema.Node, guids: typing.Iterable[URI]): + def create(self, node_type: schema.Node, guids: typing.Iterable[URI]): """Perform post-creation operations on nodes, e.g. ownership information.""" @abc.abstractmethod - def link_from_node(self, node_type: _schema.Node, guids: typing.Iterable[URI]) -> typing.Iterable[URI]: + def link_from_node(self, node_type: schema.Node, guids: typing.Iterable[URI]) -> typing.Iterable[URI]: """Return nodes for which outbound links can be written.""" @abc.abstractmethod - def link_to_node(self, node_type: _schema.Node, guids: typing.Iterable[URI]) -> typing.Iterable[URI]: + def link_to_node(self, node_type: schema.Node, guids: typing.Iterable[URI]) -> typing.Iterable[URI]: """Return nodes for which inbound links can be written.""" @abc.abstractmethod - def write_literal(self, node_type: _schema.Node, guids: typing.Iterable[URI]) -> typing.Iterable[URI]: + def write_literal(self, node_type: schema.Node, guids: typing.Iterable[URI]) -> typing.Iterable[URI]: """Return nodes to which literals can be attached.""" @abc.abstractmethod - def createable(self, node_type: _schema.Node, guids: typing.Iterable[URI]) -> typing.Iterable[URI]: + def createable(self, node_type: schema.Node, guids: typing.Iterable[URI]) -> typing.Iterable[URI]: """Return nodes that are allowed to be created.""" diff --git a/bsfs/graph/ac/null.py b/bsfs/graph/ac/null.py index a39b7b9..288a0da 100644 --- a/bsfs/graph/ac/null.py +++ b/bsfs/graph/ac/null.py @@ -8,7 +8,7 @@ Author: Matthias Baumgartner, 2022 import typing # bsfs imports -from bsfs import schema as _schema +from bsfs import schema from bsfs.namespace import ns from bsfs.utils import URI @@ -27,26 +27,26 @@ class NullAC(base.AccessControlBase): """ """ - def is_protected_predicate(self, pred: _schema.Predicate) -> bool: + def is_protected_predicate(self, pred: schema.Predicate) -> bool: """Return True if a predicate cannot be modified manually.""" return pred.uri == ns.bsm.t_created - def create(self, node_type: _schema.Node, guids: typing.Iterable[URI]): + def create(self, node_type: schema.Node, guids: typing.Iterable[URI]): """Perform post-creation operations on nodes, e.g. ownership information.""" - def link_from_node(self, node_type: _schema.Node, guids: typing.Iterable[URI]) -> typing.Iterable[URI]: + def link_from_node(self, node_type: schema.Node, guids: typing.Iterable[URI]) -> typing.Iterable[URI]: """Return nodes for which outbound links can be written.""" return guids - def link_to_node(self, node_type: _schema.Node, guids: typing.Iterable[URI]) -> typing.Iterable[URI]: + def link_to_node(self, node_type: schema.Node, guids: typing.Iterable[URI]) -> typing.Iterable[URI]: """Return nodes for which inbound links can be written.""" return guids - def write_literal(self, node_type: _schema.Node, guids: typing.Iterable[URI]) -> typing.Iterable[URI]: + def write_literal(self, node_type: schema.Node, guids: typing.Iterable[URI]) -> typing.Iterable[URI]: """Return nodes to which literals can be attached.""" return guids - def createable(self, node_type: _schema.Node, guids: typing.Iterable[URI]) -> typing.Iterable[URI]: + def createable(self, node_type: schema.Node, guids: typing.Iterable[URI]) -> typing.Iterable[URI]: """Return nodes that are allowed to be created.""" return guids diff --git a/bsfs/graph/graph.py b/bsfs/graph/graph.py index 06271f6..d5e1b88 100644 --- a/bsfs/graph/graph.py +++ b/bsfs/graph/graph.py @@ -13,7 +13,7 @@ from bsfs.triple_store import TripleStoreBase from bsfs.utils import URI, typename # inner-module imports -from . import nodes +from . import nodes as _nodes # exports __all__: typing.Sequence[str] = ( @@ -55,11 +55,11 @@ class Graph(): """Return the store's local schema.""" return self.__backend.schema - def nodes(self, node_type: URI, guids: typing.Iterable[URI]) -> nodes.Nodes: """ + def nodes(self, node_type: URI, guids: typing.Iterable[URI]) -> _nodes.Nodes: """ node_type = self.schema.node(node_type) # NOTE: Nodes constructor materializes guids. - return nodes.Nodes(self.__backend, self.__user, node_type, guids) + return _nodes.Nodes(self._backend, self._user, type_, guids) ## EOF ## diff --git a/bsfs/schema/__init__.py b/bsfs/schema/__init__.py index ce381ec..ad4d456 100644 --- a/bsfs/schema/__init__.py +++ b/bsfs/schema/__init__.py @@ -8,7 +8,6 @@ Author: Matthias Baumgartner, 2022 import typing # inner-module imports -#from . import types from .schema import Schema from .types import Literal, Node, Predicate @@ -18,7 +17,6 @@ __all__: typing.Sequence[str] = ( 'Node', 'Predicate', 'Schema', - #'types', ) ## EOF ## -- cgit v1.2.3 From edd5390b6db1550f6a80a46f0eaf5f3916997532 Mon Sep 17 00:00:00 2001 From: Matthias Baumgartner Date: Sun, 18 Dec 2022 14:06:58 +0100 Subject: information hiding --- bsfs/graph/ac/base.py | 12 +++---- bsfs/graph/graph.py | 20 +++++------ bsfs/graph/nodes.py | 58 ++++++++++++++++---------------- bsfs/triple_store/sparql.py | 81 ++++++++++++++++++++++++--------------------- 4 files changed, 89 insertions(+), 82 deletions(-) (limited to 'bsfs') diff --git a/bsfs/graph/ac/base.py b/bsfs/graph/ac/base.py index eef444b..80742d7 100644 --- a/bsfs/graph/ac/base.py +++ b/bsfs/graph/ac/base.py @@ -25,19 +25,19 @@ class AccessControlBase(abc.ABC): """ """ - # - __backend: TripleStoreBase + # The triple store backend. + _backend: TripleStoreBase - # - __user: URI + # The current user. + _user: URI def __init__( self, backend: TripleStoreBase, user: URI, ): - self.__backend = backend - self.__user = URI(user) + self._backend = backend + self._user = URI(user) @abc.abstractmethod def is_protected_predicate(self, pred: schema.Predicate) -> bool: diff --git a/bsfs/graph/graph.py b/bsfs/graph/graph.py index d5e1b88..71973c2 100644 --- a/bsfs/graph/graph.py +++ b/bsfs/graph/graph.py @@ -27,33 +27,33 @@ class Graph(): """ """ # link to the triple storage backend. - __backend: TripleStoreBase + _backend: TripleStoreBase # user uri. - __user: URI + _user: URI def __init__(self, backend: TripleStoreBase, user: URI): - self.__backend = backend - self.__user = user + self._backend = backend + self._user = user def __hash__(self) -> int: - return hash((type(self), self.__backend, self.__user)) + return hash((type(self), self._backend, self._user)) def __eq__(self, other) -> bool: return isinstance(other, type(self)) \ - and self.__backend == other.__backend \ - and self.__user == other.__user + and self._backend == other._backend \ + and self._user == other._user def __repr__(self) -> str: - return f'{typename(self)}(backend={repr(self.__backend)}, user={self.__user})' + return f'{typename(self)}(backend={repr(self._backend)}, user={self._user})' def __str__(self) -> str: - return f'{typename(self)}({str(self.__backend)}, {self.__user})' + return f'{typename(self)}({str(self._backend)}, {self._user})' @property def schema(self) -> Schema: """Return the store's local schema.""" - return self.__backend.schema + return self._backend.schema """ def nodes(self, node_type: URI, guids: typing.Iterable[URI]) -> _nodes.Nodes: diff --git a/bsfs/graph/nodes.py b/bsfs/graph/nodes.py index 7d2e9b3..7b0e8f4 100644 --- a/bsfs/graph/nodes.py +++ b/bsfs/graph/nodes.py @@ -32,16 +32,16 @@ class Nodes(): """ # triple store backend. - __backend: TripleStoreBase + _backend: TripleStoreBase # user uri. - __user: URI + _user: URI # node type. - __node_type: _schema.Node + _node_type: _schema.Node # guids of nodes. Can be empty. - __guids: typing.Set[URI] + _guids: typing.Set[URI] def __init__( self, @@ -50,37 +50,37 @@ class Nodes(): node_type: _schema.Node, guids: typing.Iterable[URI], ): - self.__backend = backend - self.__user = user - self.__node_type = node_type - self.__guids = set(guids) - self.__ac = ac.NullAC(self.__backend, self.__user) + self._backend = backend + self._user = user + self._node_type = node_type + self._guids = set(guids) + self.__ac = ac.NullAC(self._backend, self._user) def __eq__(self, other: typing.Any) -> bool: return isinstance(other, Nodes) \ - and self.__backend == other.__backend \ - and self.__user == other.__user \ - and self.__node_type == other.__node_type \ - and self.__guids == other.__guids + and self._backend == other._backend \ + and self._user == other._user \ + and self._node_type == other._node_type \ + and self._guids == other._guids def __hash__(self) -> int: - return hash((type(self), self.__backend, self.__user, self.__node_type, tuple(sorted(self.__guids)))) + return hash((type(self), self._backend, self._user, self._node_type, tuple(sorted(self._guids)))) def __repr__(self) -> str: - return f'{typename(self)}({self.__backend}, {self.__user}, {self.__node_type}, {self.__guids})' + return f'{typename(self)}({self._backend}, {self._user}, {self._node_type}, {self._guids})' def __str__(self) -> str: - return f'{typename(self)}({self.__node_type}, {self.__guids})' + return f'{typename(self)}({self._node_type}, {self._guids})' @property def node_type(self) -> _schema.Node: """Return the node's type.""" - return self.__node_type + return self._node_type @property def guids(self) -> typing.Iterator[URI]: """Return all node guids.""" - return iter(self.__guids) + return iter(self._guids) def set( self, @@ -93,7 +93,7 @@ class Nodes(): # insert triples self.__set(pred, value) # save changes - self.__backend.commit() + self._backend.commit() except ( errors.PermissionDeniedError, # tried to set a protected predicate (ns.bsm.t_created) @@ -103,7 +103,7 @@ class Nodes(): ValueError, # multiple values passed to unique predicate ): # revert changes - self.__backend.rollback() + self._backend.rollback() # notify the client raise @@ -123,7 +123,7 @@ class Nodes(): for pred, value in predicate_values: self.__set(pred, value) # save changes - self.__backend.commit() + self._backend.commit() except ( errors.PermissionDeniedError, # tried to set a protected predicate (ns.bsm.t_created) @@ -133,7 +133,7 @@ class Nodes(): ValueError, # multiple values passed to unique predicate ): # revert changes - self.__backend.rollback() + self._backend.rollback() # notify the client raise @@ -148,7 +148,7 @@ class Nodes(): """ """ # get normalized predicate. Raises KeyError if *pred* not in the schema. - pred = self.__backend.schema.predicate(predicate) + pred = self._backend.schema.predicate(predicate) # node_type must be a subclass of the predicate's domain node_type = self.node_type @@ -177,7 +177,7 @@ class Nodes(): # insert literals # TODO: Support passing iterators as values for non-unique predicates - self.__backend.set( + self._backend.set( node_type, guids, pred, @@ -206,7 +206,7 @@ class Nodes(): targets = set(self.__ac.link_to_node(value.node_type, targets)) # insert node links - self.__backend.set( + self._backend.set( node_type, guids, pred, @@ -223,7 +223,7 @@ class Nodes(): ): # check node existence guids = set(guids) - existing = set(self.__backend.exists(node_type, guids)) + existing = set(self._backend.exists(node_type, guids)) # get nodes to be created missing = guids - existing # create nodes if need be @@ -231,10 +231,10 @@ class Nodes(): # check which missing nodes can be created missing = set(self.__ac.createable(node_type, missing)) # create nodes - self.__backend.create(node_type, missing) + self._backend.create(node_type, missing) # add bookkeeping triples - self.__backend.set(node_type, missing, - self.__backend.schema.predicate(ns.bsm.t_created), [time.time()]) + self._backend.set(node_type, missing, + self._backend.schema.predicate(ns.bsm.t_created), [time.time()]) # add permission triples self.__ac.create(node_type, missing) # return available nodes diff --git a/bsfs/triple_store/sparql.py b/bsfs/triple_store/sparql.py index 3eab869..d9ed55a 100644 --- a/bsfs/triple_store/sparql.py +++ b/bsfs/triple_store/sparql.py @@ -10,9 +10,8 @@ import typing import rdflib # bsfs imports -from bsfs.utils import URI -from bsfs.utils import errors -import bsfs.schema as _schema +from bsfs import schema as bsc +from bsfs.utils import errors, URI # inner-module imports from . import base @@ -26,7 +25,7 @@ __all__: typing.Sequence[str] = ( ## code ## -class Transaction(): +class _Transaction(): """Lightweight rdflib transactions for in-memory databases.""" def __init__(self, graph): @@ -58,11 +57,20 @@ class SparqlStore(base.TripleStoreBase): """ """ - def __init__(self, uri: typing.Optional[URI] = None): - super().__init__(uri) - self.graph = rdflib.Graph() - self.transaction = Transaction(self.graph) - self.__schema = _schema.Schema.Empty() + # The rdflib graph. + _graph: rdflib.Graph + + # Current transaction. + _transaction: _Transaction + + # The local schema. + _schema: bsc.Schema + + def __init__(self): + super().__init__(None) + self._graph = rdflib.Graph() + self._transaction = _Transaction(self._graph) + self._schema = bsc.Schema.Empty() @classmethod def Open( @@ -73,15 +81,14 @@ class SparqlStore(base.TripleStoreBase): return cls(None) def commit(self): - self.transaction.commit() + self._transaction.commit() def rollback(self): - self.transaction.rollback() + self._transaction.rollback() @property - def schema(self) -> _schema.Schema: - """Return the current schema.""" - return self.__schema + def schema(self) -> bsc.Schema: + return self._schema @schema.setter def schema(self, schema: _schema.Schema): @@ -106,7 +113,7 @@ class SparqlStore(base.TripleStoreBase): """ # check args: Schema instanace - if not isinstance(schema, _schema.Schema): + if not isinstance(schema, bsc.Schema): raise TypeError(schema) # check compatibility: No contradicting definitions if not self.schema.consistent_with(schema): @@ -124,21 +131,21 @@ class SparqlStore(base.TripleStoreBase): # remove predicate instances for pred in sub.predicates: - for src, trg in self.graph.subject_objects(rdflib.URIRef(pred.uri)): - self.transaction.remove((src, rdflib.URIRef(pred.uri), trg)) + for src, trg in self._graph.subject_objects(rdflib.URIRef(pred.uri)): + self._transaction.remove((src, rdflib.URIRef(pred.uri), trg)) # remove node instances for node in sub.nodes: # iterate through node instances - for inst in self.graph.subjects(rdflib.RDF.type, rdflib.URIRef(node.uri)): + for inst in self._graph.subjects(rdflib.RDF.type, rdflib.URIRef(node.uri)): # remove triples where the instance is in the object position - for src, pred in self.graph.subject_predicates(inst): - self.transaction.remove((src, pred, inst)) + for src, pred in self._graph.subject_predicates(inst): + self._transaction.remove((src, pred, inst)) # remove triples where the instance is in the subject position - for pred, trg in self.graph.predicate_objects(inst): - self.transaction.remove((inst, pred, trg)) + for pred, trg in self._graph.predicate_objects(inst): + self._transaction.remove((inst, pred, trg)) # remove instance - self.transaction.remove((inst, rdflib.RDF.type, rdflib.URIRef(node.uri))) + self._transaction.remove((inst, rdflib.RDF.type, rdflib.URIRef(node.uri))) # NOTE: Nothing to do for literals @@ -146,15 +153,15 @@ class SparqlStore(base.TripleStoreBase): self.commit() # migrate schema - self.__schema = schema + self._schema = schema - def _has_type(self, subject: URI, node_type: _schema.Node) -> bool: + def _has_type(self, subject: URI, node_type: bsc.Node) -> bool: """Return True if *subject* is a node of class *node_type* or a subclass thereof.""" if node_type not in self.schema.nodes(): raise errors.ConsistencyError(f'{node_type} is not defined in the schema') - subject_types = list(self.graph.objects(rdflib.URIRef(subject), rdflib.RDF.type)) + subject_types = list(self._graph.objects(rdflib.URIRef(subject), rdflib.RDF.type)) if len(subject_types) == 0: return False elif len(subject_types) == 1: @@ -170,7 +177,7 @@ class SparqlStore(base.TripleStoreBase): def exists( self, - node_type: _schema.Node, + node_type: bsc.Node, guids: typing.Iterable[URI], ): """ @@ -179,7 +186,7 @@ class SparqlStore(base.TripleStoreBase): def create( self, - node_type: _schema.Node, + node_type: bsc.Node, guids: typing.Iterable[URI], ): """ @@ -195,13 +202,13 @@ class SparqlStore(base.TripleStoreBase): # FIXME: node exists and may have a different type! ignore? raise? report? continue # add node - self.transaction.add((guid, rdflib.RDF.type, rdflib.URIRef(node_type.uri))) + self._transaction.add((guid, rdflib.RDF.type, rdflib.URIRef(node_type.uri))) def set( self, - node_type: _schema.Node, # FIXME: is the node_type even needed? Couldn't I infer from the predicate? + node_type: bsc.Node, guids: typing.Iterable[URI], - predicate: _schema.Predicate, + predicate: bsc.Predicate, values: typing.Iterable[typing.Any], ): # check node_type @@ -218,7 +225,7 @@ class SparqlStore(base.TripleStoreBase): return if predicate.unique and len(values) != 1: raise ValueError(values) - if isinstance(predicate.range, _schema.Node): + if isinstance(predicate.range, bsc.Node): values = set(values) # materialize to safeguard against iterators passed as argument inconsistent = {val for val in values if not self._has_type(val, predicate.range)} # catches nodes that don't exist and nodes that have an inconsistent type @@ -236,18 +243,18 @@ class SparqlStore(base.TripleStoreBase): for guid, value in itertools.product(guids, values): guid = rdflib.URIRef(guid) # convert value - if isinstance(predicate.range, _schema.Literal): + if isinstance(predicate.range, bsc.Literal): value = rdflib.Literal(value, datatype=rdflib.URIRef(predicate.range.uri)) - elif isinstance(predicate.range, _schema.Node): + elif isinstance(predicate.range, bsc.Node): value = rdflib.URIRef(value) else: raise errors.UnreachableError() # clear triples for unique predicates if predicate.unique: - for obj in self.graph.objects(guid, pred): + for obj in self._graph.objects(guid, pred): if obj != value: - self.transaction.remove((guid, pred, obj)) + self._transaction.remove((guid, pred, obj)) # add triple - self.transaction.add((guid, pred, value)) + self._transaction.add((guid, pred, value)) ## EOF ## -- cgit v1.2.3 From 3165c3609a5061135ff7393747f8dc3f7f7abe0c Mon Sep 17 00:00:00 2001 From: Matthias Baumgartner Date: Sun, 18 Dec 2022 14:07:56 +0100 Subject: graph schema migration --- bsfs/graph/graph.py | 24 ++++++++++++++++++++++++ bsfs/graph/schema.nt | 18 ++++++++++++++++++ 2 files changed, 42 insertions(+) create mode 100644 bsfs/graph/schema.nt (limited to 'bsfs') diff --git a/bsfs/graph/graph.py b/bsfs/graph/graph.py index 71973c2..4a36ff6 100644 --- a/bsfs/graph/graph.py +++ b/bsfs/graph/graph.py @@ -5,6 +5,7 @@ A copy of the license is provided with the project. Author: Matthias Baumgartner, 2022 """ # imports +import os import typing # bsfs imports @@ -35,6 +36,8 @@ class Graph(): def __init__(self, backend: TripleStoreBase, user: URI): self._backend = backend self._user = user + # ensure Graph schema requirements + self.migrate(self._backend.schema) def __hash__(self) -> int: return hash((type(self), self._backend, self._user)) @@ -55,7 +58,28 @@ class Graph(): """Return the store's local schema.""" return self._backend.schema + def migrate(self, schema: Schema, append: bool = True) -> 'Graph': + """Migrate the current schema to a new *schema*. + + Appends to the current schema by default; control this via *append*. + The `Graph` may add additional classes to the schema that are required for its interals. + """ + # check args + if not isinstance(schema, Schema): + raise TypeError(schema) + # append to current schema + if append: + schema = schema + self._backend.schema + # add Graph schema requirements + with open(os.path.join(os.path.dirname(__file__), 'schema.nt'), mode='rt', encoding='UTF-8') as ifile: + schema = schema + Schema.from_string(ifile.read()) + # migrate schema in backend + # FIXME: consult access controls! + self._backend.schema = schema + # return self + return self + def nodes(self, node_type: URI, guids: typing.Iterable[URI]) -> _nodes.Nodes: """ node_type = self.schema.node(node_type) diff --git a/bsfs/graph/schema.nt b/bsfs/graph/schema.nt new file mode 100644 index 0000000..8612681 --- /dev/null +++ b/bsfs/graph/schema.nt @@ -0,0 +1,18 @@ + +# generic prefixes +prefix rdfs: +prefix xsd: + +# bsfs prefixes +prefix bsfs: +prefix bsm: + +# literals +xsd:integer rdfs:subClassOf bsfs:Literal . + +# predicates +bsm:t_created rdfs:subClassOf bsfs:Predicate ; + rdfs:domain bsfs:Node ; + rdfs:range xsd:integer ; + bsfs:unique "true"^^xsd:boolean . + -- cgit v1.2.3 From 58496960926a56149c10d64e01b6df7d048eed0e Mon Sep 17 00:00:00 2001 From: Matthias Baumgartner Date: Sun, 18 Dec 2022 14:11:27 +0100 Subject: triple store Open interface --- bsfs/triple_store/base.py | 7 ++----- bsfs/triple_store/sparql.py | 10 ++++------ 2 files changed, 6 insertions(+), 11 deletions(-) (limited to 'bsfs') diff --git a/bsfs/triple_store/base.py b/bsfs/triple_store/base.py index a2668c3..942a16b 100644 --- a/bsfs/triple_store/base.py +++ b/bsfs/triple_store/base.py @@ -54,11 +54,7 @@ class TripleStoreBase(abc.ABC): @classmethod @abc.abstractmethod - def Open( - cls, - uri: str, - **kwargs: typing.Any, - ) -> 'TripleStoreBase': + def Open(cls, **kwargs: typing.Any) -> 'TripleStoreBase': # pylint: disable=invalid-name # capitalized classmethod """Return a TripleStoreBase instance connected to *uri*.""" @abc.abstractmethod @@ -75,6 +71,7 @@ class TripleStoreBase(abc.ABC): """Return the store's local schema.""" @schema.setter + @abc.abstractmethod def schema(self, schema: _schema.Schema): """Migrate to new schema by adding or removing class definitions. diff --git a/bsfs/triple_store/sparql.py b/bsfs/triple_store/sparql.py index d9ed55a..fc161b3 100644 --- a/bsfs/triple_store/sparql.py +++ b/bsfs/triple_store/sparql.py @@ -72,13 +72,11 @@ class SparqlStore(base.TripleStoreBase): self._transaction = _Transaction(self._graph) self._schema = bsc.Schema.Empty() + # NOTE: mypy and pylint complain about the **kwargs not being listed (contrasting super) + # However, not having it here is clearer since it's explicit that there are no arguments. @classmethod - def Open( - cls, - uri: str, - **kwargs: typing.Any, - ) -> 'SparqlStore': - return cls(None) + def Open(cls) -> 'SparqlStore': # type: ignore [override] # pylint: disable=arguments-differ + return cls() def commit(self): self._transaction.commit() -- cgit v1.2.3 From e19c8f9d0818a147832df0945188ea14de9c7690 Mon Sep 17 00:00:00 2001 From: Matthias Baumgartner Date: Sun, 18 Dec 2022 14:15:18 +0100 Subject: documentation, types, and style fixes --- bsfs/graph/ac/base.py | 6 +++- bsfs/graph/ac/null.py | 3 +- bsfs/graph/graph.py | 17 +++++++++-- bsfs/graph/nodes.py | 44 ++++++--------------------- bsfs/schema/schema.py | 56 ++++++++++++++++++++++++---------- bsfs/schema/types.py | 65 ++++++++++++++++++++++------------------ bsfs/triple_store/base.py | 33 ++++++++++++++++---- bsfs/triple_store/sparql.py | 73 ++++++++++++++++++++++----------------------- 8 files changed, 170 insertions(+), 127 deletions(-) (limited to 'bsfs') diff --git a/bsfs/graph/ac/base.py b/bsfs/graph/ac/base.py index 80742d7..bc9aeb3 100644 --- a/bsfs/graph/ac/base.py +++ b/bsfs/graph/ac/base.py @@ -22,7 +22,11 @@ __all__: typing.Sequence[str] = ( ## code ## class AccessControlBase(abc.ABC): - """ + """Defines the interface for access control policies. + + An access control policy governs which actions a user may take to query + or to manipulate a graph. + """ # The triple store backend. diff --git a/bsfs/graph/ac/null.py b/bsfs/graph/ac/null.py index 288a0da..36838bd 100644 --- a/bsfs/graph/ac/null.py +++ b/bsfs/graph/ac/null.py @@ -24,8 +24,7 @@ __all__: typing.Sequence[str] = ( ## code ## class NullAC(base.AccessControlBase): - """ - """ + """The NULL access control implements a dummy policy that allows any action to any user.""" def is_protected_predicate(self, pred: schema.Predicate) -> bool: """Return True if a predicate cannot be modified manually.""" diff --git a/bsfs/graph/graph.py b/bsfs/graph/graph.py index 4a36ff6..87f7a31 100644 --- a/bsfs/graph/graph.py +++ b/bsfs/graph/graph.py @@ -25,8 +25,15 @@ __all__: typing.Sequence[str] = ( ## code ## class Graph(): + """The Graph class is + + The Graph class provides a convenient interface to query and access a graph. + Since it logically builds on the concept of graphs it is easier to + navigate than raw triple stores. Naturally, it uses a triple store + as *backend*. It also controls actions via access permissions to a *user*. + """ - """ + # link to the triple storage backend. _backend: TripleStoreBase @@ -81,8 +88,14 @@ class Graph(): return self def nodes(self, node_type: URI, guids: typing.Iterable[URI]) -> _nodes.Nodes: + """Return nodes *guids* of type *node_type* as a `bsfs.graph.Nodes` instance. + + Note that the *guids* need not to exist (however, the *node_type* has + to be part of the schema). Inexistent guids will be created (using + *node_type*) once some data is assigned to them. + """ - node_type = self.schema.node(node_type) + type_ = self.schema.node(node_type) # NOTE: Nodes constructor materializes guids. return _nodes.Nodes(self._backend, self._user, type_, guids) diff --git a/bsfs/graph/nodes.py b/bsfs/graph/nodes.py index 7b0e8f4..c417a0e 100644 --- a/bsfs/graph/nodes.py +++ b/bsfs/graph/nodes.py @@ -5,7 +5,6 @@ A copy of the license is provided with the project. Author: Matthias Baumgartner, 2022 """ # imports -import itertools import time import typing @@ -87,34 +86,14 @@ class Nodes(): pred: URI, # FIXME: URI or _schema.Predicate? value: typing.Any, ) -> 'Nodes': - """ - """ - try: - # insert triples - self.__set(pred, value) - # save changes - self._backend.commit() - - except ( - errors.PermissionDeniedError, # tried to set a protected predicate (ns.bsm.t_created) - errors.ConsistencyError, # node types are not in the schema or don't match the predicate - errors.InstanceError, # guids/values don't have the correct type - TypeError, # value is supposed to be a Nodes instance - ValueError, # multiple values passed to unique predicate - ): - # revert changes - self._backend.rollback() - # notify the client - raise - - return self + """Set predicate *pred* to *value*.""" + return self.set_from_iterable([(pred, value)]) def set_from_iterable( self, predicate_values: typing.Iterable[typing.Tuple[URI, typing.Any]], # FIXME: URI or _schema.Predicate? ) -> 'Nodes': - """ - """ + """Set mutliple predicate-value pairs at once.""" # TODO: Could group predicate_values by predicate to gain some efficiency # TODO: ignore errors on some predicates; For now this could leave residual # data (e.g. some nodes were created, some not). @@ -137,14 +116,11 @@ class Nodes(): # notify the client raise + # FIXME: How about other errors? Shouldn't I then rollback as well?! + return self - def __set( - self, - predicate: URI, - value: typing.Any, - #on_error: str = 'ignore', # ignore, rollback - ): + def __set(self, predicate: URI, value: typing.Any): """ """ # get normalized predicate. Raises KeyError if *pred* not in the schema. @@ -216,11 +192,9 @@ class Nodes(): else: raise errors.UnreachableError() - def _ensure_nodes( - self, - node_type: _schema.Node, - guids: typing.Iterable[URI], - ): + def _ensure_nodes(self, node_type: _schema.Node, guids: typing.Iterable[URI]): + """ + """ # check node existence guids = set(guids) existing = set(self._backend.exists(node_type, guids)) diff --git a/bsfs/schema/schema.py b/bsfs/schema/schema.py index b6f37a7..c5d4571 100644 --- a/bsfs/schema/schema.py +++ b/bsfs/schema/schema.py @@ -25,11 +25,28 @@ __all__: typing.Sequence[str] = ( ## code ## class Schema(): - """ + """Graph schema. + + Use `Schema.Empty()` to create a new, empty Schema rather than construct + it directly. + + The schema is defined by three sets: Predicates, Nodes, and Literals. + + The Schema class guarantees two properties: completeness and consistency. + Completeness means that the schema covers all class that are referred to + by any other class in the schema. Consistency means that each class is + identified by a unique URI and all classes that use that URI consequently + use the same definition. + """ + # node classes. _nodes: typing.Dict[URI, types.Node] + + # literal classes. _literals: typing.Dict[URI, types.Literal] + + # predicate classes. _predicates: typing.Dict[URI, types.Predicate] def __init__( @@ -47,7 +64,8 @@ class Schema(): literals = set(literals) predicates = set(predicates) # include parents in predicates set - predicates |= {par for pred in predicates for par in pred.parents()} + # TODO: review type annotations and ignores for python >= 3.11 (parents is _Type but should be typing.Self) + predicates |= {par for pred in predicates for par in pred.parents()} # type: ignore [misc] # include predicate domain in nodes set nodes |= {pred.domain for pred in predicates} # include predicate range in nodes and literals sets @@ -57,8 +75,8 @@ class Schema(): # include parents in nodes and literals sets # NOTE: Must be done after predicate domain/range was handled # so that their parents are included as well. - nodes |= {par for node in nodes for par in node.parents()} - literals |= {par for lit in literals for par in lit.parents()} + nodes |= {par for node in nodes for par in node.parents()} # type: ignore [misc] + literals |= {par for lit in literals for par in lit.parents()} # type: ignore [misc] # assign members self._nodes = {node.uri: node for node in nodes} self._literals = {lit.uri: lit for lit in literals} @@ -153,9 +171,7 @@ class Schema(): return self.diff(other) def consistent_with(self, other: 'Schema') -> bool: - """Checks if two schemas have different definitions for the same uri. - Tests nodes, literals, and predicates. - """ + """Checks if two schemas have different predicate, node, or literal definitions for the same uri.""" # check arg if not isinstance(other, Schema): raise TypeError(other) @@ -181,7 +197,10 @@ class Schema(): return True @classmethod - def Union(cls, *args: typing.Union['Schema', typing.Iterable['Schema']]) -> 'Schema': + def Union( # pylint: disable=invalid-name # capitalized classmethod + cls, + *args: typing.Union['Schema', typing.Iterable['Schema']] + ) -> 'Schema': """Combine multiple Schema instances into a single one. As argument, you can either pass multiple Schema instances, or a single iterable over Schema instances. Any abc.Iterable will be accepted. @@ -200,7 +219,7 @@ class Schema(): if isinstance(args[0], cls): # args is sequence of Schema instances pass elif len(args) == 1 and isinstance(args[0], abc.Iterable): # args is a single iterable - args = args[0] + args = args[0] # type: ignore [assignment] # we checked and thus know that args[0] is an iterable else: raise TypeError(f'expected multiple Schema instances or a single Iterable, found {args}') @@ -237,25 +256,31 @@ class Schema(): ## getters ## - # FIXME: which of the getters below are actually needed? + # FIXME: nodes, predicates, literals could be properties # FIXME: interchangeability of URI and _Type?! def has_node(self, node: URI) -> bool: + """Return True if a Node with URI *node* is part of the schema.""" return node in self._nodes def has_literal(self, lit: URI) -> bool: + """Return True if a Literal with URI *lit* is part of the schema.""" return lit in self._literals def has_predicate(self, pred: URI) -> bool: + """Return True if a Predicate with URI *pred* is part of the schema.""" return pred in self._predicates - def nodes(self) -> typing.Iterator[types.Node]: # FIXME: type annotation + def nodes(self) -> typing.Iterable[types.Node]: + """Return an iterator over Node classes.""" return self._nodes.values() - def literals(self) -> typing.Iterator[types.Literal]: # FIXME: type annotation + def literals(self) -> typing.Iterable[types.Literal]: + """Return an iterator over Literal classes.""" return self._literals.values() - def predicates(self) -> typing.Iterator[types.Predicate]: # FIXME: type annotation + def predicates(self) -> typing.Iterable[types.Predicate]: + """Return an iterator over Predicate classes.""" return self._predicates.values() def node(self, uri: URI) -> types.Node: @@ -275,7 +300,8 @@ class Schema(): @classmethod - def Empty(cls) -> 'Schema': + def Empty(cls) -> 'Schema': # pylint: disable=invalid-name # capitalized classmethod + """Return a minimal Schema.""" node = types.Node(ns.bsfs.Node, None) literal = types.Literal(ns.bsfs.Literal, None) predicate = types.Predicate( @@ -289,7 +315,7 @@ class Schema(): @classmethod - def from_string(cls, schema: str) -> 'Schema': + def from_string(cls, schema: str) -> 'Schema': # pylint: disable=invalid-name # capitalized classmethod """Load and return a Schema from a string.""" # parse string into rdf graph graph = rdflib.Graph() diff --git a/bsfs/schema/types.py b/bsfs/schema/types.py index 6e257e3..54a7e99 100644 --- a/bsfs/schema/types.py +++ b/bsfs/schema/types.py @@ -93,7 +93,7 @@ class _Type(): uri: URI # parent's class uris. - parent: typing.Optional['_Type'] + parent: typing.Optional['_Type'] # TODO: for python >=3.11: use typing.Self def __init__( self, @@ -123,63 +123,70 @@ class _Type(): def __hash__(self) -> int: return hash((type(self), self.uri, self.parent)) + # NOTE: For equality and order functions (lt, gt, le, ge) we explicitly want type equality! + # Consider the statements below, with class Vehicle(_Type) and class TwoWheel(Vehicle): + # * Vehicle('foo', None) == TwoWheel('foo', None): Instances of different types cannot be equivalent. + # * Vehicle('foo', None) <= TwoWheel('foo', None): Cannot compare the different types Vehicles and TwoWheel. + def __eq__(self, other: typing.Any) -> bool: """Return True iff *self* is equivalent to *other*.""" - return type(self) == type(other) \ + # pylint: disable=unidiomatic-typecheck + return type(other) is type(self) \ and self.uri == other.uri \ and self.parent == other.parent + def __lt__(self, other: typing.Any) -> bool: """Return True iff *self* is a true subclass of *other*.""" - if not type(self) == type(other): # type mismatch + if not type(self) is type(other): # type mismatch # pylint: disable=unidiomatic-typecheck return NotImplemented - elif self.uri == other.uri: # equivalence + if self.uri == other.uri: # equivalence return False - elif self in other.parents(): # superclass + if self in other.parents(): # superclass return False - elif other in self.parents(): # subclass + if other in self.parents(): # subclass return True - else: # not related - return False + # not related + return False def __le__(self, other: typing.Any) -> bool: """Return True iff *self* is equivalent or a subclass of *other*.""" - if not type(self) == type(other): # type mismatch + if not type(self) is type(other): # type mismatch # pylint: disable=unidiomatic-typecheck return NotImplemented - elif self.uri == other.uri: # equivalence + if self.uri == other.uri: # equivalence return True - elif self in other.parents(): # superclass + if self in other.parents(): # superclass return False - elif other in self.parents(): # subclass + if other in self.parents(): # subclass return True - else: # not related - return False + # not related + return False def __gt__(self, other: typing.Any) -> bool: """Return True iff *self* is a true superclass of *other*.""" - if not type(self) == type(other): # type mismatch + if not type(self) is type(other): # type mismatch # pylint: disable=unidiomatic-typecheck return NotImplemented - elif self.uri == other.uri: # equivalence + if self.uri == other.uri: # equivalence return False - elif self in other.parents(): # superclass + if self in other.parents(): # superclass return True - elif other in self.parents(): # subclass - return False - else: # not related + if other in self.parents(): # subclass return False + # not related + return False def __ge__(self, other: typing.Any) -> bool: """Return True iff *self* is eqiuvalent or a superclass of *other*.""" - if not type(self) == type(other): # type mismatch + if not type(self) is type(other): # type mismatch # pylint: disable=unidiomatic-typecheck return NotImplemented - elif self.uri == other.uri: # equivalence + if self.uri == other.uri: # equivalence return True - elif self in other.parents(): # superclass + if self in other.parents(): # superclass return True - elif other in self.parents(): # subclass - return False - else: # not related + if other in self.parents(): # subclass return False + # not related + return False class _Vertex(_Type): @@ -216,10 +223,10 @@ class Predicate(_Type): self, # Type members uri: URI, - parent: 'Predicate', + parent: typing.Optional['Predicate'], # Predicate members domain: Node, - range: typing.Optional[typing.Union[Node, Literal]], + range: typing.Optional[typing.Union[Node, Literal]], # pylint: disable=redefined-builtin unique: bool, ): # check arguments @@ -246,7 +253,7 @@ class Predicate(_Type): self, uri: URI, domain: typing.Optional[Node] = None, - range: typing.Optional[_Vertex] = None, + range: typing.Optional[_Vertex] = None, # pylint: disable=redefined-builtin unique: typing.Optional[bool] = None, **kwargs, ): diff --git a/bsfs/triple_store/base.py b/bsfs/triple_store/base.py index 942a16b..6561262 100644 --- a/bsfs/triple_store/base.py +++ b/bsfs/triple_store/base.py @@ -21,7 +21,21 @@ __all__: typing.Sequence[str] = ( ## code ## class TripleStoreBase(abc.ABC): - """ + """TripleStore base class. + + Use the `Open` method to create a new instance and to initialize + the required structures. + + Triple stores express a graph via its (subject, predicate, object) triples. + They provides methods to add and remove triples, and to query the storage + for given graph structures. The subject is always a node in the graph, + whereas nodes are identifiable by a unique URI. Note that blank nodes + (without an explicit URI) are not supported. The object can be another + Node or a Literal value. The relation between a subject and an object + is expressed via a Predicate. The graph structures are governed by a + schema that defines which Node, Literal, and Predicate classes exist + and how they can interact (see `bsfs.schema.Schema`). + """ # storage's URI. None implies a temporary location. @@ -99,9 +113,8 @@ class TripleStoreBase(abc.ABC): self, node_type: _schema.Node, guids: typing.Iterable[URI], - ): - """ - """ + ) -> typing.Iterable[URI]: + """Return those *guids* that exist and have type *node_type* or a subclass thereof.""" @abc.abstractmethod def create( @@ -119,7 +132,17 @@ class TripleStoreBase(abc.ABC): predicate: _schema.Predicate, values: typing.Iterable[typing.Any], ): - """ + """Add triples to the graph. + + It is assumed that all of *guids* exist and have *node_type*. + This method adds a triple (guid, predicate, value) for every guid in + *guids* and each value in *values* (cartesian product). Note that + *values* must have length one for unique predicates, and that + currently existing values will be overwritten in this case. + It also verifies that all symbols are part of the schema and that + the *predicate* matches the *node_type*. + Raises `bsfs.errors.ConsistencyError` if these assumptions are violated. + """ ## EOF ## diff --git a/bsfs/triple_store/sparql.py b/bsfs/triple_store/sparql.py index fc161b3..23059f7 100644 --- a/bsfs/triple_store/sparql.py +++ b/bsfs/triple_store/sparql.py @@ -28,33 +28,52 @@ __all__: typing.Sequence[str] = ( class _Transaction(): """Lightweight rdflib transactions for in-memory databases.""" - def __init__(self, graph): + # graph instance. + _graph: rdflib.Graph + + # current log of added triples. + _added: typing.List[typing.Any] + + # current log of removed triples. + _removed: typing.List[typing.Any] + + def __init__(self, graph: rdflib.Graph): self._graph = graph - self.commit() # initialize + # initialize internal structures + self.commit() def commit(self): + """Commit temporary changes.""" self._added = [] self._removed = [] def rollback(self): + """Undo changes since the last commit.""" for triple in self._added: self._graph.remove(triple) for triple in self._removed: self._graph.add(triple) - def add(self, triple): + def add(self, triple: typing.Any): + """Add a triple to the graph.""" if triple not in self._graph: self._added.append(triple) self._graph.add(triple) - def remove(self, triple): + def remove(self, triple: typing.Any): + """Remove a triple from the graph.""" if triple in self._graph: self._removed.append(triple) self._graph.remove(triple) class SparqlStore(base.TripleStoreBase): - """ + """Sparql-based triple store. + + The sparql triple store uses a third-party backend + (currently rdflib) to store triples and manages them via + the Sparql query language. + """ # The rdflib graph. @@ -89,27 +108,7 @@ class SparqlStore(base.TripleStoreBase): return self._schema @schema.setter - def schema(self, schema: _schema.Schema): - """Migrate to new schema by adding or removing class definitions. - - Commits before and after the migration. - - Instances of removed classes will be deleted irreversably. - Note that modifying an existing class is not directly supported. - Also, it is generally discouraged, since changing definitions may - lead to inconsistencies across multiple clients in a distributed - setting. Instead, consider introducing a new class under its own - uri. Such a migration would look as follows: - - 1. Add new class definitions. - 2. Create instances of the new classes and copy relevant data. - 3. Remove the old definitions. - - To modify a class, i.e., re-use a previous uri with a new - class definition, you would have to migrate via temporary - class definitions, and thus repeat the above procedure two times. - - """ + def schema(self, schema: bsc.Schema): # check args: Schema instanace if not isinstance(schema, bsc.Schema): raise TypeError(schema) @@ -162,16 +161,14 @@ class SparqlStore(base.TripleStoreBase): subject_types = list(self._graph.objects(rdflib.URIRef(subject), rdflib.RDF.type)) if len(subject_types) == 0: return False - elif len(subject_types) == 1: - node = self.schema.node(URI(subject_types[0])) + if len(subject_types) == 1: + node = self.schema.node(URI(subject_types[0])) # type: ignore [arg-type] # URI is a subtype of str if node == node_type: return True - elif node_type in node.parents(): + if node_type in node.parents(): return True - else: - return False - else: - raise errors.UnreachableError() + return False + raise errors.UnreachableError() def exists( self, @@ -187,20 +184,18 @@ class SparqlStore(base.TripleStoreBase): node_type: bsc.Node, guids: typing.Iterable[URI], ): - """ - """ # check node_type if node_type not in self.schema.nodes(): raise errors.ConsistencyError(f'{node_type} is not defined in the schema') # check and create guids for guid in guids: - guid = rdflib.URIRef(guid) + subject = rdflib.URIRef(guid) # check node existence - if (guid, rdflib.RDF.type, None) in self.graph: + if (subject, rdflib.RDF.type, None) in self._graph: # FIXME: node exists and may have a different type! ignore? raise? report? continue # add node - self._transaction.add((guid, rdflib.RDF.type, rdflib.URIRef(node_type.uri))) + self._transaction.add((subject, rdflib.RDF.type, rdflib.URIRef(node_type.uri))) def set( self, @@ -218,6 +213,8 @@ class SparqlStore(base.TripleStoreBase): if not node_type <= predicate.domain: raise errors.ConsistencyError(f'{node_type} must be a subclass of {predicate.domain}') # NOTE: predicate.range is in the schema since predicate is in the schema. + # materialize values + values = set(values) # check values if len(values) == 0: return -- cgit v1.2.3 From a5ce14c8bbd55f4a078ceea9384cda56bf42a18b Mon Sep 17 00:00:00 2001 From: Matthias Baumgartner Date: Sun, 18 Dec 2022 14:16:06 +0100 Subject: SparqlStore.exists bugfix --- bsfs/triple_store/sparql.py | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) (limited to 'bsfs') diff --git a/bsfs/triple_store/sparql.py b/bsfs/triple_store/sparql.py index 23059f7..7516dff 100644 --- a/bsfs/triple_store/sparql.py +++ b/bsfs/triple_store/sparql.py @@ -174,10 +174,8 @@ class SparqlStore(base.TripleStoreBase): self, node_type: bsc.Node, guids: typing.Iterable[URI], - ): - """ - """ - return {subj for subj in guids if self._has_type(subj, node_type)} + ) -> typing.Iterable[URI]: + return (subj for subj in guids if self._has_type(subj, node_type)) def create( self, -- cgit v1.2.3 From 8ed8dbb4010a9a75cf6e61d185327825fe783776 Mon Sep 17 00:00:00 2001 From: Matthias Baumgartner Date: Sun, 18 Dec 2022 14:16:40 +0100 Subject: Graph.node interface --- bsfs/graph/graph.py | 11 +++++++++++ 1 file changed, 11 insertions(+) (limited to 'bsfs') diff --git a/bsfs/graph/graph.py b/bsfs/graph/graph.py index 87f7a31..b7b9f1c 100644 --- a/bsfs/graph/graph.py +++ b/bsfs/graph/graph.py @@ -99,4 +99,15 @@ class Graph(): # NOTE: Nodes constructor materializes guids. return _nodes.Nodes(self._backend, self._user, type_, guids) + def node(self, node_type: URI, guid: URI) -> _nodes.Nodes: + """Return node *guid* of type *node_type* as a `bsfs.graph.Nodes` instance. + + Note that the *guids* need not to exist (however, the *node_type* has + to be part of the schema). An inexistent guid will be created (using + *node_type*) once some data is assigned to them. + + """ + type_ = self.schema.node(node_type) + return _nodes.Nodes(self._backend, self._user, type_, {guid}) + ## EOF ## -- cgit v1.2.3 From 12d95ed8bda18f2ef9d36190919cb838bfb5efcf Mon Sep 17 00:00:00 2001 From: Matthias Baumgartner Date: Sun, 18 Dec 2022 14:17:44 +0100 Subject: bsfs lib and builders --- bsfs/__init__.py | 15 ++++++++++ bsfs/front/__init__.py | 20 ++++++++++++++ bsfs/front/bsfs.py | 29 +++++++++++++++++++ bsfs/front/builder.py | 75 ++++++++++++++++++++++++++++++++++++++++++++++++++ 4 files changed, 139 insertions(+) create mode 100644 bsfs/front/__init__.py create mode 100644 bsfs/front/bsfs.py create mode 100644 bsfs/front/builder.py (limited to 'bsfs') diff --git a/bsfs/__init__.py b/bsfs/__init__.py index f5f5cbc..079ffaf 100644 --- a/bsfs/__init__.py +++ b/bsfs/__init__.py @@ -4,5 +4,20 @@ Part of the BlackStar filesystem (bsfs) module. A copy of the license is provided with the project. Author: Matthias Baumgartner, 2022 """ +# imports +import collections +import typing + +# bsfs imports +from .front import Open + +# constants +T_VERSION_INFO = collections.namedtuple('T_VERSION_INFO', ('major', 'minor', 'micro')) # pylint: disable=invalid-name +version_info = T_VERSION_INFO(0, 0, 1) + +# exports +__all__: typing.Sequence[str] = ( + 'Open', + ) ## EOF ## diff --git a/bsfs/front/__init__.py b/bsfs/front/__init__.py new file mode 100644 index 0000000..92886ab --- /dev/null +++ b/bsfs/front/__init__.py @@ -0,0 +1,20 @@ +""" + +Part of the BlackStar filesystem (bsfs) module. +A copy of the license is provided with the project. +Author: Matthias Baumgartner, 2022 +""" +# imports +import typing + +# inner-module imports +from .bsfs import Open +from .builder import build_graph + +# exports +__all__: typing.Sequence[str] = ( + 'Open', + 'build_graph', + ) + +## EOF ## diff --git a/bsfs/front/bsfs.py b/bsfs/front/bsfs.py new file mode 100644 index 0000000..968b3f5 --- /dev/null +++ b/bsfs/front/bsfs.py @@ -0,0 +1,29 @@ +""" + +Part of the BlackStar filesystem (bsfs) module. +A copy of the license is provided with the project. +Author: Matthias Baumgartner, 2022 +""" +# imports +import typing + +# bsfs imports +from bsfs.graph import Graph + +# inner-module imports +from . import builder + +# exports +__all__: typing.Sequence[str] = ( + 'Open', + ) + + +## code ## + +# NOTE: Capitalized to mark entry point and to separate from builtin open. +def Open(cfg: typing.Any) -> Graph: # pylint: disable=invalid-name + """Open a BSFS storage and return a `bsfs.graph.Graph` instance.""" + return builder.build_graph(cfg) + +## EOF ## diff --git a/bsfs/front/builder.py b/bsfs/front/builder.py new file mode 100644 index 0000000..73f1703 --- /dev/null +++ b/bsfs/front/builder.py @@ -0,0 +1,75 @@ +""" + +Part of the BlackStar filesystem (bsfs) module. +A copy of the license is provided with the project. +Author: Matthias Baumgartner, 2022 +""" +# imports +import typing + +# bsfs imports +from bsfs.graph import Graph +from bsfs.triple_store import TripleStoreBase, SparqlStore +from bsfs.utils import URI, errors + +# exports +__all__: typing.Sequence[str] = ( + 'build_graph', + ) + +# constants +_graph_classes = { + 'Graph': Graph, + } + +_backend_classes = { + 'SparqlStore': SparqlStore, + } + + +## code ## + +def build_backend(cfg: typing.Any) -> TripleStoreBase: + """Build and return a backend from user-provided config.""" + # essential checks + if not isinstance(cfg, dict): + raise TypeError(cfg) + if len(cfg) != 1: + raise errors.ConfigError(f'expected a single key that identifies the backend class, found {list(cfg)}') + # unpack from config + name = next(iter(cfg)) + args = cfg[name] + # check name + if name not in _backend_classes: + raise errors.ConfigError(f'{name} is not a valid triple store class name') + # build and return backend + cls = _backend_classes[name] + return cls.Open(**args) + + +def build_graph(cfg: typing.Any) -> Graph: + """Build and return a Graph from user-provided config.""" + # essential checks + if not isinstance(cfg, dict): + raise TypeError(cfg) + if len(cfg) != 1: + raise errors.ConfigError(f'expected a single key that identifies the graph class, found {list(cfg)}') + # unpack from config + name = next(iter(cfg)) + args = cfg[name] + # check name + if name not in _graph_classes: + raise errors.ConfigError(f'{name} is not a valid graph class name') + # check user argument + if 'user' not in args: + raise errors.ConfigError('required argument "user" is not provided') + user = URI(args['user']) + # check backend argument + if 'backend' not in args: + raise errors.ConfigError('required argument "backend" is not provided') + backend = build_backend(args['backend']) + # build and return graph + cls = _graph_classes[name] + return cls(backend, user) + +## EOF ## -- cgit v1.2.3 From e94368c75468e3e94382b12705e55d396249eaca Mon Sep 17 00:00:00 2001 From: Matthias Baumgartner Date: Sun, 18 Dec 2022 14:20:25 +0100 Subject: bsfs applications --- bsfs/apps/__init__.py | 20 ++++++++++++++ bsfs/apps/init.py | 73 +++++++++++++++++++++++++++++++++++++++++++++++++++ bsfs/apps/migrate.py | 67 ++++++++++++++++++++++++++++++++++++++++++++++ bsfs/utils/errors.py | 3 +++ 4 files changed, 163 insertions(+) create mode 100644 bsfs/apps/__init__.py create mode 100644 bsfs/apps/init.py create mode 100644 bsfs/apps/migrate.py (limited to 'bsfs') diff --git a/bsfs/apps/__init__.py b/bsfs/apps/__init__.py new file mode 100644 index 0000000..7efaa87 --- /dev/null +++ b/bsfs/apps/__init__.py @@ -0,0 +1,20 @@ +""" + +Part of the BlackStar filesystem (bsfs) module. +A copy of the license is provided with the project. +Author: Matthias Baumgartner, 2022 +""" +# imports +import typing + +# inner-module imports +from .init import main as init +from .migrate import main as migrate + +# exports +__all__: typing.Sequence[str] = ( + 'init', + 'migrate', + ) + +## EOF ## diff --git a/bsfs/apps/init.py b/bsfs/apps/init.py new file mode 100644 index 0000000..3e2ef37 --- /dev/null +++ b/bsfs/apps/init.py @@ -0,0 +1,73 @@ +""" + +Part of the BlackStar filesystem (bsfs) module. +A copy of the license is provided with the project. +Author: Matthias Baumgartner, 2022 +""" +# imports +import argparse +import json +import sys +import typing + +# bsfs imports +from bsfs.utils import errors + +# exports +__all__: typing.Sequence[str] = ( + 'main', + ) + +## code ## + +def init_sparql_store(user) -> typing.Any: + """Initialize a SparqlStore backend. Returns a configuration to load it.""" + # nothing to do for non-persistent store + # return config to storage + return { + 'Graph': { + 'user': user, + 'backend': { + 'SparqlStore': {}, + }, + } + } + + +def main(argv): + """Create a new bsfs storage structure.""" + parser = argparse.ArgumentParser(description=main.__doc__, prog='init') + # global arguments + parser.add_argument('--user', type=str, default='http://example.com/me', + help='Default user.') + parser.add_argument('--output', type=str, default=None, + help='Write the config to a file instead of standard output.') + #parser.add_argument('--schema', type=str, default=None, + # help='Initial schema.') + # storage selection + parser.add_argument('store', choices=('sparql', ), + help='Which storage to initialize.') + # storage args + # parse args + args = parser.parse_args(argv) + + # initialize selected storage + if args.store == 'sparql': + config = init_sparql_store(args.user) + else: + raise errors.UnreachableError() + + # print config + if args.output is not None: + with open(args.output, mode='wt', encoding='UTF-8') as ofile: + json.dump(config, ofile) + else: + json.dump(config, sys.stdout) + + +## main ## + +if __name__ == '__main__': + main(sys.argv[1:]) + +## EOF ## diff --git a/bsfs/apps/migrate.py b/bsfs/apps/migrate.py new file mode 100644 index 0000000..91c1661 --- /dev/null +++ b/bsfs/apps/migrate.py @@ -0,0 +1,67 @@ +""" + +Part of the BlackStar filesystem (bsfs) module. +A copy of the license is provided with the project. +Author: Matthias Baumgartner, 2022 +""" +# imports +import argparse +import json +import logging +import sys +import typing + +# bsfs imports +import bsfs + +# exports +__all__: typing.Sequence[str] = ( + 'main', + ) + + +## code ## + +logger = logging.getLogger(__name__) + +def main(argv): + """Migrate a storage structure to a modified schema.""" + parser = argparse.ArgumentParser(description=main.__doc__, prog='migrate') + parser.add_argument('--remove', action='store_true', default=False, + help='Remove classes that are not specified in the provided schema.') + parser.add_argument('config', type=str, default=None, + help='Path to the storage config file.') + parser.add_argument('schema', nargs=argparse.REMAINDER, + help='Paths to schema files. Reads from standard input if no file is supplied.') + args = parser.parse_args(argv) + + # load storage config + with open(args.config, mode='rt', encoding='UTF-8') as ifile: + config = json.load(ifile) + # open bsfs storage + graph = bsfs.Open(config) + + # initialize schema + schema = bsfs.schema.Schema.Empty() + if len(args.schema) == 0: + # assemble schema from standard input + schema = schema + bsfs.schema.Schema.from_string(sys.stdin.read()) + else: + # assemble schema from input files + for pth in args.schema: + with open(pth, mode='rt', encoding='UTF-8') as ifile: + schema = schema + bsfs.schema.Schema.from_string(ifile.read()) + + # migrate schema + graph.migrate(schema, not args.remove) + + # return the migrated storage + return graph + + +## main ## + +if __name__ == '__main__': + main(sys.argv[1:]) + +## EOF ## diff --git a/bsfs/utils/errors.py b/bsfs/utils/errors.py index 04561a2..c5e8e16 100644 --- a/bsfs/utils/errors.py +++ b/bsfs/utils/errors.py @@ -35,4 +35,7 @@ class ProgrammingError(_BSFSError): class UnreachableError(ProgrammingError): """Bravo, you've reached a point in code that should logically not be reachable.""" +class ConfigError(_BSFSError): + """User config issue.""" + ## EOF ## -- cgit v1.2.3