diff options
65 files changed, 6182 insertions, 1 deletions
diff --git a/.coveragerc b/.coveragerc new file mode 100644 index 0000000..ff789f3 --- /dev/null +++ b/.coveragerc @@ -0,0 +1,15 @@ +[run] +dynamic_context = test_function +branch = True +source = bsfs +data_file = .coverage +command_line = -m unittest + +[report] +show_missing = True +skip_empty = True + +[html] +directory = .htmlcov +show_contexts = True + @@ -12,6 +12,7 @@ __pycache__ bsfs.egg-info htmlcov tags +dev/ env # dist builds diff --git a/.mypy.ini b/.mypy.ini new file mode 100644 index 0000000..a1f7fca --- /dev/null +++ b/.mypy.ini @@ -0,0 +1,3 @@ +[mypy] +ignore_missing_imports = True +packages=bsfs diff --git a/.pylintrc b/.pylintrc new file mode 100644 index 0000000..7885c4e --- /dev/null +++ b/.pylintrc @@ -0,0 +1,193 @@ +[MAIN] + +# Pickle collected data for later comparisons. +persistent=no + +# Minimum Python version to use for version dependent checks. Will default to +# the version used to run pylint. +py-version=3.8 + +# Discover python modules and packages in the file system subtree. +recursive=yes + +# When enabled, pylint would attempt to guess common misconfiguration and emit +# user-friendly hints instead of false-positive error messages. +suggestion-mode=yes + + +[BASIC] + +# Minimum line length for functions/classes that require docstrings, shorter +# ones are exempt. +docstring-min-length=-1 + +# Bad variable names which should always be refused, separated by a comma. +bad-names=foo,bar,abc,cba,xyz,zyx,foobar,hello,world + +# Good variable names which should always be accepted, separated by a comma. +good-names=i,j,k,n,_,rx + +# Naming style matching correct argument names. +argument-naming-style=snake_case + +# Naming style matching correct attribute names. +attr-naming-style=snake_case + +# Naming style matching correct class attribute names. +class-attribute-naming-style=any + +# Naming style matching correct class constant names. +class-const-naming-style=UPPER_CASE + +# Naming style matching correct class names. +class-naming-style=PascalCase + +# Naming style matching correct constant names. +const-naming-style=UPPER_CASE + +# Naming style matching correct function names. +function-naming-style=snake_case + +# Include a hint for the correct naming format with invalid-name. +include-naming-hint=yes + +# Naming style matching correct inline iteration names. +inlinevar-naming-style=any + +# Naming style matching correct method names. +method-naming-style=snake_case + +# Naming style matching correct module names. +module-naming-style=snake_case + +# Naming style matching correct variable names. +variable-naming-style=snake_case + + +[DESIGN] + +# Maximum number of arguments for function / method. +max-args=10 + +# Maximum number of attributes for a class (see R0902). +max-attributes=7 + +# Maximum number of boolean expressions in an if statement (see R0916). +max-bool-expr=5 + +# Maximum number of branch for function / method body. +max-branches=15 + +# Maximum number of locals for function / method body. +max-locals=15 + +# Maximum number of parents for a class (see R0901). +max-parents=7 + +# Maximum number of public methods for a class (see R0904). +max-public-methods=20 + +# Maximum number of return / yield for function / method body. +max-returns=6 + +# Maximum number of statements in function / method body. +max-statements=50 + +# Minimum number of public methods for a class (see R0903). +min-public-methods=1 + + +[FORMAT] + +# Expected format of line ending, e.g. empty (any line ending), LF or CRLF. +expected-line-ending-format= + +# Regexp for a line that is allowed to be longer than the limit. +ignore-long-lines=^\s*(# )?<?https?://\S+>?$ + +# Number of spaces of indent required inside a hanging or continued line. +indent-after-paren=4 + +# String used as indentation unit. This is usually " " (4 spaces) or "\t" (1 +# tab). +indent-string=' ' + +# Maximum number of characters on a single line. +max-line-length=120 + +# Maximum number of lines in a module. +max-module-lines=1000 + +# Allow the body of a class to be on the same line as the declaration if body +# contains single statement. +single-line-class-stmt=no + +# Allow the body of an if to be on the same line as the test if there is no +# else. +single-line-if-stmt=no + + +[IMPORTS] + +# List of modules that can be imported at any level, not just the top level +# one. +allow-any-import-level= + +# Allow wildcard imports from modules that define __all__. +allow-wildcard-with-all=no + + +[LOGGING] + +# The type of string formatting that logging methods do. `old` means using % +# formatting, `new` is for `{}` formatting. +logging-format-style=old + + + +[MISCELLANEOUS] + +# List of note tags to take in consideration, separated by a comma. +notes=FIXME,TODO + + + +[REPORTS] + +# Tells whether to display a full report or only the messages. +reports=yes + +# Activate the evaluation score. +score=yes + + +[SIMILARITIES] + +# Minimum lines number of a similarity. +min-similarity-lines=4 + + +[STRING] + +# This flag controls whether inconsistent-quotes generates a warning when the +# character used as a quote delimiter is used inconsistently within a module. +check-quote-consistency=yes + + +[TYPECHECK] + +# Tells whether to warn about missing members when the owner of the attribute +# is inferred to be None. +ignore-none=no + + +[VARIABLES] + +# List of strings which can identify a callback function by name. A callback +# name must start or end with one of those strings. +callbacks=clbk,callback + + + + +# Disable: R1735 (use-dict-literal) @@ -1,4 +1,4 @@ -Copyright (c) 2021, Matthias Baumgartner +Copyright (c) 2022, Matthias Baumgartner All rights reserved. Redistribution and use in source and binary forms, with or without @@ -3,3 +3,55 @@ The Black Star File System ========================== +### Developer tools setup + +#### Test coverage (coverage) + +Resources: +* https://coverage.readthedocs.io/en/6.5.0/index.html +* https://nedbatchelder.com/blog/200710/flaws_in_coverage_measurement.html + +Commands: +$ pip install coverage +$ coverage run ; coverage html ; xdg-open .htmlcov/index.html + + + +#### Static code analysis (pylint) + +Resources: +* https://github.com/PyCQA/pylint +* https://pylint.org/ +* https://pylint.pycqa.org/en/latest/user_guide/messages/messages_overview.html#messages-overview + +Commands: +$ pip install pylint +$ pylint bsfs + + + +#### Type analysis (mypy) + +Resources: +* https://github.com/python/mypy +* https://mypy.readthedocs.io/en/stable/ + +Commands: +$ pip install mypy +$ mypy + + + +#### Documentation (sphinx) + +Resources: +* +* + +Commands: +$ pip install ... +$ + + + + diff --git a/bsfs.app b/bsfs.app new file mode 100755 index 0000000..babacbb --- /dev/null +++ b/bsfs.app @@ -0,0 +1,52 @@ +"""BSFS tools. + +Part of the BlackStar filesystem (bsfs) module. +A copy of the license is provided with the project. +Author: Matthias Baumgartner, 2022 +""" +# imports +import argparse +import typing + +# module imports +import bsfs +import bsfs.apps + +# exports +__all__: typing.Sequence[str] = ( + 'main', + ) + +# config +apps = { + 'init' : bsfs.apps.init, + 'migrate' : bsfs.apps.migrate, + } + + +## code ## + +def main(argv): + """Black Star File System maintenance tools.""" + parser = argparse.ArgumentParser(description=main.__doc__, prog='bsfs') + # version + parser.add_argument('--version', action='version', + version='%(prog)s version {}.{}.{}'.format(*bsfs.version_info)) + # application selection + parser.add_argument('app', choices=apps.keys(), + help='Select the application to run.') + # dangling args + parser.add_argument('rest', nargs=argparse.REMAINDER) + # parse + args = parser.parse_args() + # run application + apps[args.app](args.rest) + + +## main ## + +if __name__ == '__main__': + import sys + main(sys.argv[1:]) + +## EOF ## diff --git a/bsfs.toml b/bsfs.toml new file mode 100644 index 0000000..45bf1c9 --- /dev/null +++ b/bsfs.toml @@ -0,0 +1,11 @@ +[project] +name = "bsfs" +description = "A content aware graph file system." +version = "0.0.1" +license = {text = "BSD 3-Clause License"} +authors = [{name='Matthias Baumgartner', email="dev@igsor.net"}] +dependencies = [ + "rdflib", +] +requires-python = ">=3.7" + diff --git a/bsfs/__init__.py b/bsfs/__init__.py new file mode 100644 index 0000000..079ffaf --- /dev/null +++ b/bsfs/__init__.py @@ -0,0 +1,23 @@ +""" + +Part of the BlackStar filesystem (bsfs) module. +A copy of the license is provided with the project. +Author: Matthias Baumgartner, 2022 +""" +# imports +import collections +import typing + +# bsfs imports +from .front import Open + +# constants +T_VERSION_INFO = collections.namedtuple('T_VERSION_INFO', ('major', 'minor', 'micro')) # pylint: disable=invalid-name +version_info = T_VERSION_INFO(0, 0, 1) + +# exports +__all__: typing.Sequence[str] = ( + 'Open', + ) + +## EOF ## diff --git a/bsfs/apps/__init__.py b/bsfs/apps/__init__.py new file mode 100644 index 0000000..7efaa87 --- /dev/null +++ b/bsfs/apps/__init__.py @@ -0,0 +1,20 @@ +""" + +Part of the BlackStar filesystem (bsfs) module. +A copy of the license is provided with the project. +Author: Matthias Baumgartner, 2022 +""" +# imports +import typing + +# inner-module imports +from .init import main as init +from .migrate import main as migrate + +# exports +__all__: typing.Sequence[str] = ( + 'init', + 'migrate', + ) + +## EOF ## diff --git a/bsfs/apps/init.py b/bsfs/apps/init.py new file mode 100644 index 0000000..3e2ef37 --- /dev/null +++ b/bsfs/apps/init.py @@ -0,0 +1,73 @@ +""" + +Part of the BlackStar filesystem (bsfs) module. +A copy of the license is provided with the project. +Author: Matthias Baumgartner, 2022 +""" +# imports +import argparse +import json +import sys +import typing + +# bsfs imports +from bsfs.utils import errors + +# exports +__all__: typing.Sequence[str] = ( + 'main', + ) + +## code ## + +def init_sparql_store(user) -> typing.Any: + """Initialize a SparqlStore backend. Returns a configuration to load it.""" + # nothing to do for non-persistent store + # return config to storage + return { + 'Graph': { + 'user': user, + 'backend': { + 'SparqlStore': {}, + }, + } + } + + +def main(argv): + """Create a new bsfs storage structure.""" + parser = argparse.ArgumentParser(description=main.__doc__, prog='init') + # global arguments + parser.add_argument('--user', type=str, default='http://example.com/me', + help='Default user.') + parser.add_argument('--output', type=str, default=None, + help='Write the config to a file instead of standard output.') + #parser.add_argument('--schema', type=str, default=None, + # help='Initial schema.') + # storage selection + parser.add_argument('store', choices=('sparql', ), + help='Which storage to initialize.') + # storage args + # parse args + args = parser.parse_args(argv) + + # initialize selected storage + if args.store == 'sparql': + config = init_sparql_store(args.user) + else: + raise errors.UnreachableError() + + # print config + if args.output is not None: + with open(args.output, mode='wt', encoding='UTF-8') as ofile: + json.dump(config, ofile) + else: + json.dump(config, sys.stdout) + + +## main ## + +if __name__ == '__main__': + main(sys.argv[1:]) + +## EOF ## diff --git a/bsfs/apps/migrate.py b/bsfs/apps/migrate.py new file mode 100644 index 0000000..91c1661 --- /dev/null +++ b/bsfs/apps/migrate.py @@ -0,0 +1,67 @@ +""" + +Part of the BlackStar filesystem (bsfs) module. +A copy of the license is provided with the project. +Author: Matthias Baumgartner, 2022 +""" +# imports +import argparse +import json +import logging +import sys +import typing + +# bsfs imports +import bsfs + +# exports +__all__: typing.Sequence[str] = ( + 'main', + ) + + +## code ## + +logger = logging.getLogger(__name__) + +def main(argv): + """Migrate a storage structure to a modified schema.""" + parser = argparse.ArgumentParser(description=main.__doc__, prog='migrate') + parser.add_argument('--remove', action='store_true', default=False, + help='Remove classes that are not specified in the provided schema.') + parser.add_argument('config', type=str, default=None, + help='Path to the storage config file.') + parser.add_argument('schema', nargs=argparse.REMAINDER, + help='Paths to schema files. Reads from standard input if no file is supplied.') + args = parser.parse_args(argv) + + # load storage config + with open(args.config, mode='rt', encoding='UTF-8') as ifile: + config = json.load(ifile) + # open bsfs storage + graph = bsfs.Open(config) + + # initialize schema + schema = bsfs.schema.Schema.Empty() + if len(args.schema) == 0: + # assemble schema from standard input + schema = schema + bsfs.schema.Schema.from_string(sys.stdin.read()) + else: + # assemble schema from input files + for pth in args.schema: + with open(pth, mode='rt', encoding='UTF-8') as ifile: + schema = schema + bsfs.schema.Schema.from_string(ifile.read()) + + # migrate schema + graph.migrate(schema, not args.remove) + + # return the migrated storage + return graph + + +## main ## + +if __name__ == '__main__': + main(sys.argv[1:]) + +## EOF ## diff --git a/bsfs/front/__init__.py b/bsfs/front/__init__.py new file mode 100644 index 0000000..92886ab --- /dev/null +++ b/bsfs/front/__init__.py @@ -0,0 +1,20 @@ +""" + +Part of the BlackStar filesystem (bsfs) module. +A copy of the license is provided with the project. +Author: Matthias Baumgartner, 2022 +""" +# imports +import typing + +# inner-module imports +from .bsfs import Open +from .builder import build_graph + +# exports +__all__: typing.Sequence[str] = ( + 'Open', + 'build_graph', + ) + +## EOF ## diff --git a/bsfs/front/bsfs.py b/bsfs/front/bsfs.py new file mode 100644 index 0000000..968b3f5 --- /dev/null +++ b/bsfs/front/bsfs.py @@ -0,0 +1,29 @@ +""" + +Part of the BlackStar filesystem (bsfs) module. +A copy of the license is provided with the project. +Author: Matthias Baumgartner, 2022 +""" +# imports +import typing + +# bsfs imports +from bsfs.graph import Graph + +# inner-module imports +from . import builder + +# exports +__all__: typing.Sequence[str] = ( + 'Open', + ) + + +## code ## + +# NOTE: Capitalized to mark entry point and to separate from builtin open. +def Open(cfg: typing.Any) -> Graph: # pylint: disable=invalid-name + """Open a BSFS storage and return a `bsfs.graph.Graph` instance.""" + return builder.build_graph(cfg) + +## EOF ## diff --git a/bsfs/front/builder.py b/bsfs/front/builder.py new file mode 100644 index 0000000..73f1703 --- /dev/null +++ b/bsfs/front/builder.py @@ -0,0 +1,75 @@ +""" + +Part of the BlackStar filesystem (bsfs) module. +A copy of the license is provided with the project. +Author: Matthias Baumgartner, 2022 +""" +# imports +import typing + +# bsfs imports +from bsfs.graph import Graph +from bsfs.triple_store import TripleStoreBase, SparqlStore +from bsfs.utils import URI, errors + +# exports +__all__: typing.Sequence[str] = ( + 'build_graph', + ) + +# constants +_graph_classes = { + 'Graph': Graph, + } + +_backend_classes = { + 'SparqlStore': SparqlStore, + } + + +## code ## + +def build_backend(cfg: typing.Any) -> TripleStoreBase: + """Build and return a backend from user-provided config.""" + # essential checks + if not isinstance(cfg, dict): + raise TypeError(cfg) + if len(cfg) != 1: + raise errors.ConfigError(f'expected a single key that identifies the backend class, found {list(cfg)}') + # unpack from config + name = next(iter(cfg)) + args = cfg[name] + # check name + if name not in _backend_classes: + raise errors.ConfigError(f'{name} is not a valid triple store class name') + # build and return backend + cls = _backend_classes[name] + return cls.Open(**args) + + +def build_graph(cfg: typing.Any) -> Graph: + """Build and return a Graph from user-provided config.""" + # essential checks + if not isinstance(cfg, dict): + raise TypeError(cfg) + if len(cfg) != 1: + raise errors.ConfigError(f'expected a single key that identifies the graph class, found {list(cfg)}') + # unpack from config + name = next(iter(cfg)) + args = cfg[name] + # check name + if name not in _graph_classes: + raise errors.ConfigError(f'{name} is not a valid graph class name') + # check user argument + if 'user' not in args: + raise errors.ConfigError('required argument "user" is not provided') + user = URI(args['user']) + # check backend argument + if 'backend' not in args: + raise errors.ConfigError('required argument "backend" is not provided') + backend = build_backend(args['backend']) + # build and return graph + cls = _graph_classes[name] + return cls(backend, user) + +## EOF ## diff --git a/bsfs/graph/__init__.py b/bsfs/graph/__init__.py new file mode 100644 index 0000000..82d2235 --- /dev/null +++ b/bsfs/graph/__init__.py @@ -0,0 +1,18 @@ +""" + +Part of the BlackStar filesystem (bsfs) module. +A copy of the license is provided with the project. +Author: Matthias Baumgartner, 2022 +""" +# imports +import typing + +# inner-module imports +from .graph import Graph + +# exports +__all__: typing.Sequence[str] = ( + 'Graph', + ) + +## EOF ## diff --git a/bsfs/graph/ac/__init__.py b/bsfs/graph/ac/__init__.py new file mode 100644 index 0000000..420de01 --- /dev/null +++ b/bsfs/graph/ac/__init__.py @@ -0,0 +1,20 @@ +""" + +Part of the BlackStar filesystem (bsfs) module. +A copy of the license is provided with the project. +Author: Matthias Baumgartner, 2022 +""" +# imports +import typing + +# inner-module imports +from .base import AccessControlBase +from .null import NullAC + +# exports +__all__: typing.Sequence[str] = ( + 'AccessControlBase', + 'NullAC', + ) + +## EOF ## diff --git a/bsfs/graph/ac/base.py b/bsfs/graph/ac/base.py new file mode 100644 index 0000000..bc9aeb3 --- /dev/null +++ b/bsfs/graph/ac/base.py @@ -0,0 +1,71 @@ +""" + +Part of the BlackStar filesystem (bsfs) module. +A copy of the license is provided with the project. +Author: Matthias Baumgartner, 2022 +""" +# imports +import abc +import typing + +# bsfs imports +from bsfs import schema +from bsfs.triple_store import TripleStoreBase +from bsfs.utils import URI + +# exports +__all__: typing.Sequence[str] = ( + 'AccessControlBase', + ) + + +## code ## + +class AccessControlBase(abc.ABC): + """Defines the interface for access control policies. + + An access control policy governs which actions a user may take to query + or to manipulate a graph. + + """ + + # The triple store backend. + _backend: TripleStoreBase + + # The current user. + _user: URI + + def __init__( + self, + backend: TripleStoreBase, + user: URI, + ): + self._backend = backend + self._user = URI(user) + + @abc.abstractmethod + def is_protected_predicate(self, pred: schema.Predicate) -> bool: + """Return True if a predicate cannot be modified manually.""" + + @abc.abstractmethod + def create(self, node_type: schema.Node, guids: typing.Iterable[URI]): + """Perform post-creation operations on nodes, e.g. ownership information.""" + + @abc.abstractmethod + def link_from_node(self, node_type: schema.Node, guids: typing.Iterable[URI]) -> typing.Iterable[URI]: + """Return nodes for which outbound links can be written.""" + + @abc.abstractmethod + def link_to_node(self, node_type: schema.Node, guids: typing.Iterable[URI]) -> typing.Iterable[URI]: + """Return nodes for which inbound links can be written.""" + + @abc.abstractmethod + def write_literal(self, node_type: schema.Node, guids: typing.Iterable[URI]) -> typing.Iterable[URI]: + """Return nodes to which literals can be attached.""" + + @abc.abstractmethod + def createable(self, node_type: schema.Node, guids: typing.Iterable[URI]) -> typing.Iterable[URI]: + """Return nodes that are allowed to be created.""" + + +## EOF ## diff --git a/bsfs/graph/ac/null.py b/bsfs/graph/ac/null.py new file mode 100644 index 0000000..36838bd --- /dev/null +++ b/bsfs/graph/ac/null.py @@ -0,0 +1,52 @@ +""" + +Part of the BlackStar filesystem (bsfs) module. +A copy of the license is provided with the project. +Author: Matthias Baumgartner, 2022 +""" +# imports +import typing + +# bsfs imports +from bsfs import schema +from bsfs.namespace import ns +from bsfs.utils import URI + +# inner-module imports +from . import base + +# exports +__all__: typing.Sequence[str] = ( + 'NullAC', + ) + + +## code ## + +class NullAC(base.AccessControlBase): + """The NULL access control implements a dummy policy that allows any action to any user.""" + + def is_protected_predicate(self, pred: schema.Predicate) -> bool: + """Return True if a predicate cannot be modified manually.""" + return pred.uri == ns.bsm.t_created + + def create(self, node_type: schema.Node, guids: typing.Iterable[URI]): + """Perform post-creation operations on nodes, e.g. ownership information.""" + + def link_from_node(self, node_type: schema.Node, guids: typing.Iterable[URI]) -> typing.Iterable[URI]: + """Return nodes for which outbound links can be written.""" + return guids + + def link_to_node(self, node_type: schema.Node, guids: typing.Iterable[URI]) -> typing.Iterable[URI]: + """Return nodes for which inbound links can be written.""" + return guids + + def write_literal(self, node_type: schema.Node, guids: typing.Iterable[URI]) -> typing.Iterable[URI]: + """Return nodes to which literals can be attached.""" + return guids + + def createable(self, node_type: schema.Node, guids: typing.Iterable[URI]) -> typing.Iterable[URI]: + """Return nodes that are allowed to be created.""" + return guids + +## EOF ## diff --git a/bsfs/graph/graph.py b/bsfs/graph/graph.py new file mode 100644 index 0000000..b7b9f1c --- /dev/null +++ b/bsfs/graph/graph.py @@ -0,0 +1,113 @@ +""" + +Part of the BlackStar filesystem (bsfs) module. +A copy of the license is provided with the project. +Author: Matthias Baumgartner, 2022 +""" +# imports +import os +import typing + +# bsfs imports +from bsfs.schema import Schema +from bsfs.triple_store import TripleStoreBase +from bsfs.utils import URI, typename + +# inner-module imports +from . import nodes as _nodes + +# exports +__all__: typing.Sequence[str] = ( + 'Graph', + ) + + +## code ## + +class Graph(): + """The Graph class is + + The Graph class provides a convenient interface to query and access a graph. + Since it logically builds on the concept of graphs it is easier to + navigate than raw triple stores. Naturally, it uses a triple store + as *backend*. It also controls actions via access permissions to a *user*. + + """ + + # link to the triple storage backend. + _backend: TripleStoreBase + + # user uri. + _user: URI + + def __init__(self, backend: TripleStoreBase, user: URI): + self._backend = backend + self._user = user + # ensure Graph schema requirements + self.migrate(self._backend.schema) + + def __hash__(self) -> int: + return hash((type(self), self._backend, self._user)) + + def __eq__(self, other) -> bool: + return isinstance(other, type(self)) \ + and self._backend == other._backend \ + and self._user == other._user + + def __repr__(self) -> str: + return f'{typename(self)}(backend={repr(self._backend)}, user={self._user})' + + def __str__(self) -> str: + return f'{typename(self)}({str(self._backend)}, {self._user})' + + @property + def schema(self) -> Schema: + """Return the store's local schema.""" + return self._backend.schema + + def migrate(self, schema: Schema, append: bool = True) -> 'Graph': + """Migrate the current schema to a new *schema*. + + Appends to the current schema by default; control this via *append*. + The `Graph` may add additional classes to the schema that are required for its interals. + + """ + # check args + if not isinstance(schema, Schema): + raise TypeError(schema) + # append to current schema + if append: + schema = schema + self._backend.schema + # add Graph schema requirements + with open(os.path.join(os.path.dirname(__file__), 'schema.nt'), mode='rt', encoding='UTF-8') as ifile: + schema = schema + Schema.from_string(ifile.read()) + # migrate schema in backend + # FIXME: consult access controls! + self._backend.schema = schema + # return self + return self + + def nodes(self, node_type: URI, guids: typing.Iterable[URI]) -> _nodes.Nodes: + """Return nodes *guids* of type *node_type* as a `bsfs.graph.Nodes` instance. + + Note that the *guids* need not to exist (however, the *node_type* has + to be part of the schema). Inexistent guids will be created (using + *node_type*) once some data is assigned to them. + + """ + type_ = self.schema.node(node_type) + # NOTE: Nodes constructor materializes guids. + return _nodes.Nodes(self._backend, self._user, type_, guids) + + def node(self, node_type: URI, guid: URI) -> _nodes.Nodes: + """Return node *guid* of type *node_type* as a `bsfs.graph.Nodes` instance. + + Note that the *guids* need not to exist (however, the *node_type* has + to be part of the schema). An inexistent guid will be created (using + *node_type*) once some data is assigned to them. + + """ + type_ = self.schema.node(node_type) + return _nodes.Nodes(self._backend, self._user, type_, {guid}) + +## EOF ## diff --git a/bsfs/graph/nodes.py b/bsfs/graph/nodes.py new file mode 100644 index 0000000..c417a0e --- /dev/null +++ b/bsfs/graph/nodes.py @@ -0,0 +1,217 @@ +""" + +Part of the BlackStar filesystem (bsfs) module. +A copy of the license is provided with the project. +Author: Matthias Baumgartner, 2022 +""" +# imports +import time +import typing + +# bsfs imports +from bsfs import schema as _schema +from bsfs.namespace import ns +from bsfs.triple_store import TripleStoreBase +from bsfs.utils import errors, URI, typename + +# inner-module imports +from . import ac + +# exports +__all__: typing.Sequence[str] = ( + 'Nodes', + ) + + +## code ## + +class Nodes(): + """ + NOTE: guids may or may not exist. This is not verified as nodes are created on demand. + """ + + # triple store backend. + _backend: TripleStoreBase + + # user uri. + _user: URI + + # node type. + _node_type: _schema.Node + + # guids of nodes. Can be empty. + _guids: typing.Set[URI] + + def __init__( + self, + backend: TripleStoreBase, + user: URI, + node_type: _schema.Node, + guids: typing.Iterable[URI], + ): + self._backend = backend + self._user = user + self._node_type = node_type + self._guids = set(guids) + self.__ac = ac.NullAC(self._backend, self._user) + + def __eq__(self, other: typing.Any) -> bool: + return isinstance(other, Nodes) \ + and self._backend == other._backend \ + and self._user == other._user \ + and self._node_type == other._node_type \ + and self._guids == other._guids + + def __hash__(self) -> int: + return hash((type(self), self._backend, self._user, self._node_type, tuple(sorted(self._guids)))) + + def __repr__(self) -> str: + return f'{typename(self)}({self._backend}, {self._user}, {self._node_type}, {self._guids})' + + def __str__(self) -> str: + return f'{typename(self)}({self._node_type}, {self._guids})' + + @property + def node_type(self) -> _schema.Node: + """Return the node's type.""" + return self._node_type + + @property + def guids(self) -> typing.Iterator[URI]: + """Return all node guids.""" + return iter(self._guids) + + def set( + self, + pred: URI, # FIXME: URI or _schema.Predicate? + value: typing.Any, + ) -> 'Nodes': + """Set predicate *pred* to *value*.""" + return self.set_from_iterable([(pred, value)]) + + def set_from_iterable( + self, + predicate_values: typing.Iterable[typing.Tuple[URI, typing.Any]], # FIXME: URI or _schema.Predicate? + ) -> 'Nodes': + """Set mutliple predicate-value pairs at once.""" + # TODO: Could group predicate_values by predicate to gain some efficiency + # TODO: ignore errors on some predicates; For now this could leave residual + # data (e.g. some nodes were created, some not). + try: + # insert triples + for pred, value in predicate_values: + self.__set(pred, value) + # save changes + self._backend.commit() + + except ( + errors.PermissionDeniedError, # tried to set a protected predicate (ns.bsm.t_created) + errors.ConsistencyError, # node types are not in the schema or don't match the predicate + errors.InstanceError, # guids/values don't have the correct type + TypeError, # value is supposed to be a Nodes instance + ValueError, # multiple values passed to unique predicate + ): + # revert changes + self._backend.rollback() + # notify the client + raise + + # FIXME: How about other errors? Shouldn't I then rollback as well?! + + return self + + def __set(self, predicate: URI, value: typing.Any): + """ + """ + # get normalized predicate. Raises KeyError if *pred* not in the schema. + pred = self._backend.schema.predicate(predicate) + + # node_type must be a subclass of the predicate's domain + node_type = self.node_type + if not node_type <= pred.domain: + raise errors.ConsistencyError(f'{node_type} must be a subclass of {pred.domain}') + + # check reserved predicates (access controls, metadata, internal structures) + # FIXME: Needed? Could be integrated into other AC methods (by passing the predicate!) + # This could allow more fine-grained predicate control (e.g. based on ownership) + # rather than a global approach like this. + if self.__ac.is_protected_predicate(pred): + raise errors.PermissionDeniedError(pred) + + # set operation affects all nodes (if possible) + guids = set(self.guids) + + # ensure subject node existence; create nodes if need be + guids = set(self._ensure_nodes(node_type, guids)) + + # check value + if isinstance(pred.range, _schema.Literal): + # check write permissions on existing nodes + # As long as the user has write permissions, we don't restrict + # the creation or modification of literal values. + guids = set(self.__ac.write_literal(node_type, guids)) + + # insert literals + # TODO: Support passing iterators as values for non-unique predicates + self._backend.set( + node_type, + guids, + pred, + [value], + ) + + elif isinstance(pred.range, _schema.Node): + # check value type + if not isinstance(value, Nodes): + raise TypeError(value) + # value's node_type must be a subclass of the predicate's range + if not value.node_type <= pred.range: + raise errors.ConsistencyError(f'{value.node_type} must be a subclass of {pred.range}') + + # check link permissions on source nodes + # Link permissions cover adding and removing links on the source node. + # Specifically, link permissions also allow to remove links to other + # nodes if needed (e.g. for unique predicates). + guids = set(self.__ac.link_from_node(node_type, guids)) + + # get link targets + targets = set(value.guids) + # ensure existence of value nodes; create nodes if need be + targets = set(self._ensure_nodes(value.node_type, targets)) + # check link permissions on target nodes + targets = set(self.__ac.link_to_node(value.node_type, targets)) + + # insert node links + self._backend.set( + node_type, + guids, + pred, + targets, + ) + + else: + raise errors.UnreachableError() + + def _ensure_nodes(self, node_type: _schema.Node, guids: typing.Iterable[URI]): + """ + """ + # check node existence + guids = set(guids) + existing = set(self._backend.exists(node_type, guids)) + # get nodes to be created + missing = guids - existing + # create nodes if need be + if len(missing) > 0: + # check which missing nodes can be created + missing = set(self.__ac.createable(node_type, missing)) + # create nodes + self._backend.create(node_type, missing) + # add bookkeeping triples + self._backend.set(node_type, missing, + self._backend.schema.predicate(ns.bsm.t_created), [time.time()]) + # add permission triples + self.__ac.create(node_type, missing) + # return available nodes + return existing | missing + +## EOF ## diff --git a/bsfs/graph/schema.nt b/bsfs/graph/schema.nt new file mode 100644 index 0000000..8612681 --- /dev/null +++ b/bsfs/graph/schema.nt @@ -0,0 +1,18 @@ + +# generic prefixes +prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#> +prefix xsd: <http://www.w3.org/2001/XMLSchema#> + +# bsfs prefixes +prefix bsfs: <http://bsfs.ai/schema/> +prefix bsm: <http://bsfs.ai/schema/Meta#> + +# literals +xsd:integer rdfs:subClassOf bsfs:Literal . + +# predicates +bsm:t_created rdfs:subClassOf bsfs:Predicate ; + rdfs:domain bsfs:Node ; + rdfs:range xsd:integer ; + bsfs:unique "true"^^xsd:boolean . + diff --git a/bsfs/namespace/__init__.py b/bsfs/namespace/__init__.py new file mode 100644 index 0000000..98d472f --- /dev/null +++ b/bsfs/namespace/__init__.py @@ -0,0 +1,21 @@ +""" + +Part of the BlackStar filesystem (bsfs) module. +A copy of the license is provided with the project. +Author: Matthias Baumgartner, 2022 +""" +# imports +import typing + +# inner-module imports +from . import predefined as ns +from .namespace import ClosedNamespace, Namespace + +# exports +__all__: typing.Sequence[str] = ( + 'ClosedNamespace', + 'Namespace', + 'ns', + ) + +## EOF ## diff --git a/bsfs/namespace/namespace.py b/bsfs/namespace/namespace.py new file mode 100644 index 0000000..f652dcd --- /dev/null +++ b/bsfs/namespace/namespace.py @@ -0,0 +1,104 @@ +""" + +Part of the BlackStar filesystem (bsfs) module. +A copy of the license is provided with the project. +Author: Matthias Baumgartner, 2022 +""" +# imports +import typing + +# bsfs imports +from bsfs.utils import URI, typename + +# exports +__all__: typing.Sequence[str] = ( + 'ClosedNamespace', + 'Namespace', + ) + + +## code ## + +class Namespace(): + """A namespace consists of a common prefix that is used in a set of URIs. + + Note that the prefix must include the separator between + path and fragment (typically a '#' or a '/'). + """ + + # namespace prefix. + prefix: URI + + # fragment separator. + fsep: str + + # path separator. + psep: str + + def __init__(self, prefix: URI, fsep: str = '#', psep: str = '/'): + # ensure prefix type + prefix = URI(prefix) + # truncate fragment separator + while prefix.endswith(fsep): + prefix = URI(prefix[:-1]) + # truncate path separator + while prefix.endswith(psep): + prefix = URI(prefix[:-1]) + # store members + self.prefix = prefix + self.fsep = fsep + self.psep = psep + + def __eq__(self, other: typing.Any) -> bool: + return isinstance(other, type(self)) \ + and self.prefix == other.prefix \ + and self.fsep == other.fsep \ + and self.psep == other.psep + + def __hash__(self) -> int: + return hash((type(self), self.prefix, self.fsep, self.psep)) + + def __str__(self) -> str: + return f'{typename(self)}({self.prefix})' + + def __repr__(self) -> str: + return f'{typename(self)}({self.prefix}, {self.fsep}, {self.psep})' + + def __getattr__(self, fragment: str) -> URI: + """Return prefix + fragment.""" + return URI(self.prefix + self.fsep + fragment) + + def __getitem__(self, fragment: str) -> URI: + """Alias for getattr(self, fragment).""" + return self.__getattr__(fragment) + + def __add__(self, value: typing.Any) -> 'Namespace': + """Concatenate another namespace to this one.""" + if not isinstance(value, str): + return NotImplemented + return Namespace(self.prefix + self.psep + value, self.fsep, self.psep) + + +class ClosedNamespace(Namespace): + """Namespace that covers a restricted set of URIs.""" + + # set of permissible fragments. + fragments: typing.Set[str] + + def __init__(self, prefix: URI, *args: str, fsep: str = '#', psep: str = '/'): + super().__init__(prefix, fsep, psep) + self.fragments = set(args) + + def __eq__(self, other: typing.Any) -> bool: + return super().__eq__(other) and self.fragments == other.fragments + + def __hash__(self) -> int: + return hash((type(self), self.prefix, tuple(sorted(self.fragments)))) + + def __getattr__(self, fragment: str) -> URI: + """Return prefix + fragment or raise a KeyError if the fragment is not part of this namespace.""" + if fragment not in self.fragments: + raise KeyError(f'{fragment} is not a valid fragment of namespace {self.prefix}') + return super().__getattr__(fragment) + +## EOF ## diff --git a/bsfs/namespace/predefined.py b/bsfs/namespace/predefined.py new file mode 100644 index 0000000..cd48a46 --- /dev/null +++ b/bsfs/namespace/predefined.py @@ -0,0 +1,39 @@ +""" + +Part of the BlackStar filesystem (bsfs) module. +A copy of the license is provided with the project. +Author: Matthias Baumgartner, 2022 +""" +# imports +import typing + +# bsfs imports +from bsfs.utils import URI + +# inner-module imports +from . import namespace + +# essential bsfs namespaces +bsfs: namespace.Namespace = namespace.Namespace(URI('http://bsfs.ai/schema'), fsep='/') + +# additional bsfs namespaces +bse: namespace.Namespace = namespace.Namespace(URI('http://bsfs.ai/schema/Entity')) +bsm: namespace.Namespace = namespace.Namespace(URI('http://bsfs.ai/schema/Meta')) + +# generic namespaces +rdf: namespace.Namespace = namespace.Namespace(URI('http://www.w3.org/1999/02/22-rdf-syntax-ns')) +rdfs: namespace.Namespace = namespace.Namespace(URI('http://www.w3.org/2000/01/rdf-schema')) +schema: namespace.Namespace = namespace.Namespace(URI('http://schema.org'), fsep='/') +xsd: namespace.Namespace = namespace.Namespace(URI('http://www.w3.org/2001/XMLSchema')) + +__all__: typing.Sequence[str] = ( + 'bse', + 'bsfs', + 'bsm', + 'rdf', + 'rdfs', + 'schema', + 'xsd', + ) + +## EOF ## diff --git a/bsfs/schema/__init__.py b/bsfs/schema/__init__.py new file mode 100644 index 0000000..ad4d456 --- /dev/null +++ b/bsfs/schema/__init__.py @@ -0,0 +1,22 @@ +""" + +Part of the BlackStar filesystem (bsfs) module. +A copy of the license is provided with the project. +Author: Matthias Baumgartner, 2022 +""" +# imports +import typing + +# inner-module imports +from .schema import Schema +from .types import Literal, Node, Predicate + +# exports +__all__: typing.Sequence[str] = ( + 'Literal', + 'Node', + 'Predicate', + 'Schema', + ) + +## EOF ## diff --git a/bsfs/schema/schema.py b/bsfs/schema/schema.py new file mode 100644 index 0000000..c5d4571 --- /dev/null +++ b/bsfs/schema/schema.py @@ -0,0 +1,386 @@ +""" + +Part of the BlackStar filesystem (bsfs) module. +A copy of the license is provided with the project. +Author: Matthias Baumgartner, 2022 +""" +# imports +from collections import abc, namedtuple +import typing +import rdflib + +# bsfs imports +from bsfs.namespace import ns +from bsfs.utils import errors, URI, typename + +# inner-module imports +from . import types + +# exports +__all__: typing.Sequence[str] = ( + 'Schema', + ) + + +## code ## + +class Schema(): + """Graph schema. + + Use `Schema.Empty()` to create a new, empty Schema rather than construct + it directly. + + The schema is defined by three sets: Predicates, Nodes, and Literals. + + The Schema class guarantees two properties: completeness and consistency. + Completeness means that the schema covers all class that are referred to + by any other class in the schema. Consistency means that each class is + identified by a unique URI and all classes that use that URI consequently + use the same definition. + + """ + + # node classes. + _nodes: typing.Dict[URI, types.Node] + + # literal classes. + _literals: typing.Dict[URI, types.Literal] + + # predicate classes. + _predicates: typing.Dict[URI, types.Predicate] + + def __init__( + self, + predicates: typing.Iterable[types.Predicate], + nodes: typing.Optional[typing.Iterable[types.Node]] = None, + literals: typing.Optional[typing.Iterable[types.Literal]] = None, + ): + # materialize arguments + if nodes is None: + nodes = set() + if literals is None: + literals = set() + nodes = set(nodes) + literals = set(literals) + predicates = set(predicates) + # include parents in predicates set + # TODO: review type annotations and ignores for python >= 3.11 (parents is _Type but should be typing.Self) + predicates |= {par for pred in predicates for par in pred.parents()} # type: ignore [misc] + # include predicate domain in nodes set + nodes |= {pred.domain for pred in predicates} + # include predicate range in nodes and literals sets + prange = {pred.range for pred in predicates if pred.range is not None} + nodes |= {vert for vert in prange if isinstance(vert, types.Node)} + literals |= {vert for vert in prange if isinstance(vert, types.Literal)} + # include parents in nodes and literals sets + # NOTE: Must be done after predicate domain/range was handled + # so that their parents are included as well. + nodes |= {par for node in nodes for par in node.parents()} # type: ignore [misc] + literals |= {par for lit in literals for par in lit.parents()} # type: ignore [misc] + # assign members + self._nodes = {node.uri: node for node in nodes} + self._literals = {lit.uri: lit for lit in literals} + self._predicates = {pred.uri: pred for pred in predicates} + # verify unique uris + if len(nodes) != len(self._nodes): + raise errors.ConsistencyError('inconsistent nodes') + if len(literals) != len(self._literals): + raise errors.ConsistencyError('inconsistent literals') + if len(predicates) != len(self._predicates): + raise errors.ConsistencyError('inconsistent predicates') + # verify globally unique uris + n_uris = len(set(self._nodes) | set(self._literals) | set(self._predicates)) + if n_uris != len(self._nodes) + len(self._literals) + len(self._predicates): + raise errors.ConsistencyError('URI dual use') + + + ## essentials ## + + def __str__(self) -> str: + return f'{typename(self)}()' + + def __repr__(self) -> str: + return f'{typename(self)}({sorted(self._nodes)}, {sorted(self._literals)}, {sorted(self._predicates)})' + + def __hash__(self) -> int: + return hash(( + type(self), + tuple(sorted(self._nodes.values())), + tuple(sorted(self._literals.values())), + tuple(sorted(self._predicates.values())), + )) + + def __eq__(self, other: typing.Any) -> bool: + return isinstance(other, type(self)) \ + and self._nodes == other._nodes \ + and self._literals == other._literals \ + and self._predicates == other._predicates + + + ## operators ## + + SchemaDiff = namedtuple('SchemaDiff', ['nodes', 'literals', 'predicates']) + + def _issubset(self, other: 'Schema') -> bool: + # inconsistent schema can't be ordered. + if not self.consistent_with(other): + return False + # since schemas are consistent, it's sufficient to compare their URIs. + # self's sets are fully contained in other's sets + # pylint: disable=protected-access + return set(self._predicates) <= set(other._predicates) \ + and set(self._nodes) <= set(other._nodes) \ + and set(self._literals) <= set(other._literals) + + def __lt__(self, other: typing.Any) -> bool: + """Return True if *other* is a true subset of *self*.""" + if not isinstance(other, Schema): # other is not a Schema + return NotImplemented + return self != other and self._issubset(other) + + def __le__(self, other: typing.Any) -> bool: + """Return True if *other* is a subset of *self*.""" + if not isinstance(other, Schema): # other is not a Schema + return NotImplemented + return self == other or self._issubset(other) + + def __gt__(self, other: typing.Any) -> bool: + """Return True if *other* is a true superset of *self*.""" + if not isinstance(other, Schema): # other is not a Schema + return NotImplemented + return self != other and other._issubset(self) + + def __ge__(self, other: typing.Any) -> bool: + """Return True if *other* is a superset of *self*.""" + if not isinstance(other, Schema): # other is not a Schema + return NotImplemented + return self == other or other._issubset(self) + + def diff(self, other: 'Schema') -> SchemaDiff: + """Return node, literals, and predicates that are in *self* but not in *other*.""" + return self.SchemaDiff( + nodes=set(self.nodes()) - set(other.nodes()), + literals=set(self.literals()) - set(other.literals()), + predicates=set(self.predicates()) - set(other.predicates()), + ) + + def __sub__(self, other: typing.Any) -> SchemaDiff: + """Alias for `Schema.diff`.""" + if not isinstance(other, Schema): + return NotImplemented + return self.diff(other) + + def consistent_with(self, other: 'Schema') -> bool: + """Checks if two schemas have different predicate, node, or literal definitions for the same uri.""" + # check arg + if not isinstance(other, Schema): + raise TypeError(other) + # node consistency + nodes = set(self.nodes()) | set(other.nodes()) + nuris = {node.uri for node in nodes} + if len(nodes) != len(nuris): + return False + # literal consistency + literals = set(self.literals()) | set(other.literals()) + luris = {lit.uri for lit in literals} + if len(literals) != len(luris): + return False + # predicate consistency + predicates = set(self.predicates()) | set(other.predicates()) + puris = {pred.uri for pred in predicates} + if len(predicates) != len(puris): + return False + # global consistency + if len(puris | luris | nuris) != len(nodes) + len(literals) + len(predicates): + return False + # all checks passed + return True + + @classmethod + def Union( # pylint: disable=invalid-name # capitalized classmethod + cls, + *args: typing.Union['Schema', typing.Iterable['Schema']] + ) -> 'Schema': + """Combine multiple Schema instances into a single one. + As argument, you can either pass multiple Schema instances, or a single + iterable over Schema instances. Any abc.Iterable will be accepted. + + Example: + + >>> a, b, c = Schema.Empty(), Schema.Empty(), Schema.Empty() + >>> # multiple Schema instances + >>> Schema.Union(a, b, c) + >>> # A single iterable over Schema instances + >>> Schema.Union([a, b, c]) + + """ + if len(args) == 0: + raise TypeError('Schema.Union requires at least one argument (Schema or Iterable)') + if isinstance(args[0], cls): # args is sequence of Schema instances + pass + elif len(args) == 1 and isinstance(args[0], abc.Iterable): # args is a single iterable + args = args[0] # type: ignore [assignment] # we checked and thus know that args[0] is an iterable + else: + raise TypeError(f'expected multiple Schema instances or a single Iterable, found {args}') + + nodes, literals, predicates = set(), set(), set() + for schema in args: + # check argument + if not isinstance(schema, cls): + raise TypeError(schema) + # merge with previous schemas + nodes |= set(schema.nodes()) + literals |= set(schema.literals()) + predicates |= set(schema.predicates()) + # return new Schema instance + return cls(predicates, nodes, literals) + + def union(self, other: 'Schema') -> 'Schema': + """Merge *other* and *self* into a new Schema. *self* takes precedence.""" + # check type + if not isinstance(other, type(self)): + raise TypeError(other) + # return combined schemas + return self.Union(self, other) + + def __add__(self, other: typing.Any) -> 'Schema': + """Alias for Schema.union.""" + try: # return merged schemas + return self.union(other) + except TypeError: + return NotImplemented + + def __or__(self, other: typing.Any) -> 'Schema': + """Alias for Schema.union.""" + return self.__add__(other) + + + ## getters ## + # FIXME: nodes, predicates, literals could be properties + # FIXME: interchangeability of URI and _Type?! + + def has_node(self, node: URI) -> bool: + """Return True if a Node with URI *node* is part of the schema.""" + return node in self._nodes + + def has_literal(self, lit: URI) -> bool: + """Return True if a Literal with URI *lit* is part of the schema.""" + return lit in self._literals + + def has_predicate(self, pred: URI) -> bool: + """Return True if a Predicate with URI *pred* is part of the schema.""" + return pred in self._predicates + + def nodes(self) -> typing.Iterable[types.Node]: + """Return an iterator over Node classes.""" + return self._nodes.values() + + def literals(self) -> typing.Iterable[types.Literal]: + """Return an iterator over Literal classes.""" + return self._literals.values() + + def predicates(self) -> typing.Iterable[types.Predicate]: + """Return an iterator over Predicate classes.""" + return self._predicates.values() + + def node(self, uri: URI) -> types.Node: + """Return the Node matching the *uri*.""" + return self._nodes[uri] + + def predicate(self, uri: URI) -> types.Predicate: + """Return the Predicate matching the *uri*.""" + return self._predicates[uri] + + def literal(self, uri: URI) -> types.Literal: + """Return the Literal matching the *uri*.""" + return self._literals[uri] + + + ## constructors ## + + + @classmethod + def Empty(cls) -> 'Schema': # pylint: disable=invalid-name # capitalized classmethod + """Return a minimal Schema.""" + node = types.Node(ns.bsfs.Node, None) + literal = types.Literal(ns.bsfs.Literal, None) + predicate = types.Predicate( + uri=ns.bsfs.Predicate, + parent=None, + domain=node, + range=None, + unique=False, + ) + return cls((predicate, ), (node, ), (literal, )) + + + @classmethod + def from_string(cls, schema: str) -> 'Schema': # pylint: disable=invalid-name # capitalized classmethod + """Load and return a Schema from a string.""" + # parse string into rdf graph + graph = rdflib.Graph() + graph.parse(data=schema, format='turtle') + + def _fetch_hierarchically(factory, curr): + # emit current node + yield curr + # walk through childs + for child in graph.subjects(rdflib.URIRef(ns.rdfs.subClassOf), rdflib.URIRef(curr.uri)): + # convert to URI + child = URI(child) + # check circular dependency + if child == curr.uri or child in {node.uri for node in curr.parents()}: + raise errors.ConsistencyError('circular dependency') + # recurse and emit (sub*)childs + yield from _fetch_hierarchically(factory, factory(child, curr)) + + # fetch nodes + nodes = set(_fetch_hierarchically(types.Node, types.Node(ns.bsfs.Node, None))) + nodes_lut = {node.uri: node for node in nodes} + if len(nodes_lut) != len(nodes): + raise errors.ConsistencyError('inconsistent nodes') + + # fetch literals + literals = set(_fetch_hierarchically(types.Literal, types.Literal(ns.bsfs.Literal, None))) + literals_lut = {lit.uri: lit for lit in literals} + if len(literals_lut) != len(literals): + raise errors.ConsistencyError('inconsistent literals') + + # fetch predicates + def build_predicate(uri, parent): + uri = rdflib.URIRef(uri) + # get domain + domains = set(graph.objects(uri, rdflib.RDFS.domain)) + if len(domains) != 1: + raise errors.ConsistencyError(f'inconsistent domain: {domains}') + dom = nodes_lut.get(next(iter(domains))) + if dom is None: + raise errors.ConsistencyError('missing domain') + # get range + ranges = set(graph.objects(uri, rdflib.RDFS.range)) + if len(ranges) != 1: + raise errors.ConsistencyError(f'inconsistent range: {ranges}') + rng = next(iter(ranges)) + rng = nodes_lut.get(rng, literals_lut.get(rng)) + if rng is None: + raise errors.ConsistencyError('missing range') + # get unique flag + uniques = set(graph.objects(uri, rdflib.URIRef(ns.bsfs.unique))) + if len(uniques) != 1: + raise errors.ConsistencyError(f'inconsistent unique flags: {uniques}') + unique = bool(next(iter(uniques))) + # build Predicate + return types.Predicate(URI(uri), parent, dom, rng, unique) + + root_predicate = types.Predicate( + uri=ns.bsfs.Predicate, + parent=None, + domain=nodes_lut[ns.bsfs.Node], + range=None, # FIXME: Unclear how to handle this! Can be either a Literal or a Node + unique=False, + ) + predicates = _fetch_hierarchically(build_predicate, root_predicate) + # return Schema + return cls(predicates, nodes, literals) + +## EOF ## diff --git a/bsfs/schema/types.py b/bsfs/schema/types.py new file mode 100644 index 0000000..54a7e99 --- /dev/null +++ b/bsfs/schema/types.py @@ -0,0 +1,276 @@ +""" + +Part of the BlackStar filesystem (bsfs) module. +A copy of the license is provided with the project. +Author: Matthias Baumgartner, 2022 +""" +# imports +import typing + +# bsfs imports +from bsfs.utils import errors, URI, typename + +# exports +__all__: typing.Sequence[str] = ( + 'Literal', + 'Node', + 'Predicate', + ) + + +## code ## + +class _Type(): + """A class is defined via its uri. + + Classes define a partial order. + The order operators indicate whether some class is a + superclass (greater-than) or a subclass (less-than) of another. + Comparisons are only supported within the same type. + + For example, consider the class hierarchy below: + + Vehicle + Two-wheel + Bike + Bicycle + + >>> vehicle = _Type('Vehicle') + >>> twowheel = _Type('Two-wheel', vehicle) + >>> bike = _Type('Bike', twowheel) + >>> bicycle = _Type('Bicycle', twowheel) + + Two-wheel is equivalent to itself + >>> twowheel == vehicle + False + >>> twowheel == twowheel + True + >>> twowheel == bicycle + False + + Two-wheel is a true subclass of Vehicle + >>> twowheel < vehicle + True + >>> twowheel < twowheel + False + >>> twowheel < bicycle + False + + Two-wheel is a subclass of itself and Vehicle + >>> twowheel <= vehicle + True + >>> twowheel <= twowheel + True + >>> twowheel <= bicycle + False + + Two-wheel is a true superclass of Bicycle + >>> twowheel > vehicle + False + >>> twowheel > twowheel + False + >>> twowheel > bicycle + True + + Two-wheel is a superclass of itself and Bicycle + >>> twowheel >= vehicle + False + >>> twowheel >= twowheel + True + >>> twowheel >= bicycle + True + + Analoguous to sets, this is not a total order: + >>> bike < bicycle + False + >>> bike > bicycle + False + >>> bike == bicycle + False + """ + + # class uri. + uri: URI + + # parent's class uris. + parent: typing.Optional['_Type'] # TODO: for python >=3.11: use typing.Self + + def __init__( + self, + uri: URI, + parent: typing.Optional['_Type'] = None, + ): + self.uri = uri + self.parent = parent + + def parents(self) -> typing.Generator['_Type', None, None]: + """Generate a list of parent nodes.""" + curr = self.parent + while curr is not None: + yield curr + curr = curr.parent + + def get_child(self, uri: URI, **kwargs): + """Return a child of the current class.""" + return type(self)(uri, self, **kwargs) + + def __str__(self) -> str: + return f'{typename(self)}({self.uri})' + + def __repr__(self) -> str: + return f'{typename(self)}({self.uri}, {repr(self.parent)})' + + def __hash__(self) -> int: + return hash((type(self), self.uri, self.parent)) + + # NOTE: For equality and order functions (lt, gt, le, ge) we explicitly want type equality! + # Consider the statements below, with class Vehicle(_Type) and class TwoWheel(Vehicle): + # * Vehicle('foo', None) == TwoWheel('foo', None): Instances of different types cannot be equivalent. + # * Vehicle('foo', None) <= TwoWheel('foo', None): Cannot compare the different types Vehicles and TwoWheel. + + def __eq__(self, other: typing.Any) -> bool: + """Return True iff *self* is equivalent to *other*.""" + # pylint: disable=unidiomatic-typecheck + return type(other) is type(self) \ + and self.uri == other.uri \ + and self.parent == other.parent + + + def __lt__(self, other: typing.Any) -> bool: + """Return True iff *self* is a true subclass of *other*.""" + if not type(self) is type(other): # type mismatch # pylint: disable=unidiomatic-typecheck + return NotImplemented + if self.uri == other.uri: # equivalence + return False + if self in other.parents(): # superclass + return False + if other in self.parents(): # subclass + return True + # not related + return False + + def __le__(self, other: typing.Any) -> bool: + """Return True iff *self* is equivalent or a subclass of *other*.""" + if not type(self) is type(other): # type mismatch # pylint: disable=unidiomatic-typecheck + return NotImplemented + if self.uri == other.uri: # equivalence + return True + if self in other.parents(): # superclass + return False + if other in self.parents(): # subclass + return True + # not related + return False + + def __gt__(self, other: typing.Any) -> bool: + """Return True iff *self* is a true superclass of *other*.""" + if not type(self) is type(other): # type mismatch # pylint: disable=unidiomatic-typecheck + return NotImplemented + if self.uri == other.uri: # equivalence + return False + if self in other.parents(): # superclass + return True + if other in self.parents(): # subclass + return False + # not related + return False + + def __ge__(self, other: typing.Any) -> bool: + """Return True iff *self* is eqiuvalent or a superclass of *other*.""" + if not type(self) is type(other): # type mismatch # pylint: disable=unidiomatic-typecheck + return NotImplemented + if self.uri == other.uri: # equivalence + return True + if self in other.parents(): # superclass + return True + if other in self.parents(): # subclass + return False + # not related + return False + + +class _Vertex(_Type): + """Graph vertex types. Can be a Node or a Literal.""" + def __init__(self, uri: URI, parent: typing.Optional['_Vertex']): + super().__init__(uri, parent) + + +class Node(_Vertex): + """Node type.""" + def __init__(self, uri: URI, parent: typing.Optional['Node']): + super().__init__(uri, parent) + + +class Literal(_Vertex): + """Literal type.""" + def __init__(self, uri: URI, parent: typing.Optional['Literal']): + super().__init__(uri, parent) + + +class Predicate(_Type): + """Predicate type.""" + + # source type. + domain: Node + + # destination type. + range: typing.Optional[typing.Union[Node, Literal]] + + # maximum cardinality of type. + unique: bool + + def __init__( + self, + # Type members + uri: URI, + parent: typing.Optional['Predicate'], + # Predicate members + domain: Node, + range: typing.Optional[typing.Union[Node, Literal]], # pylint: disable=redefined-builtin + unique: bool, + ): + # check arguments + if not isinstance(domain, Node): + raise TypeError(domain) + if range is not None and not isinstance(range, Node) and not isinstance(range, Literal): + raise TypeError(range) + # initialize + super().__init__(uri, parent) + self.domain = domain + self.range = range + self.unique = unique + + def __hash__(self) -> int: + return hash((super().__hash__(), self.domain, self.range, self.unique)) + + def __eq__(self, other: typing.Any) -> bool: + return super().__eq__(other) \ + and self.domain == other.domain \ + and self.range == other.range \ + and self.unique == other.unique + + def get_child( + self, + uri: URI, + domain: typing.Optional[Node] = None, + range: typing.Optional[_Vertex] = None, # pylint: disable=redefined-builtin + unique: typing.Optional[bool] = None, + **kwargs, + ): + """Return a child of the current class.""" + if domain is None: + domain = self.domain + if not domain <= self.domain: + raise errors.ConsistencyError(f'{domain} must be a subclass of {self.domain}') + if range is None: + range = self.range + if range is None: # inherited range from ns.bsfs.Predicate + raise ValueError('range must be defined by the parent or argument') + if self.range is not None and not range <= self.range: + raise errors.ConsistencyError(f'{range} must be a subclass of {self.range}') + if unique is None: + unique = self.unique + return super().get_child(uri, domain=domain, range=range, unique=unique, **kwargs) + + +## EOF ## diff --git a/bsfs/triple_store/__init__.py b/bsfs/triple_store/__init__.py new file mode 100644 index 0000000..fb5a8a9 --- /dev/null +++ b/bsfs/triple_store/__init__.py @@ -0,0 +1,20 @@ +""" + +Part of the BlackStar filesystem (bsfs) module. +A copy of the license is provided with the project. +Author: Matthias Baumgartner, 2022 +""" +# imports +import typing + +# inner-module imports +from .base import TripleStoreBase +from .sparql import SparqlStore + +# exports +__all__: typing.Sequence[str] = ( + 'SparqlStore', + 'TripleStoreBase', + ) + +## EOF ## diff --git a/bsfs/triple_store/base.py b/bsfs/triple_store/base.py new file mode 100644 index 0000000..6561262 --- /dev/null +++ b/bsfs/triple_store/base.py @@ -0,0 +1,148 @@ +""" + +Part of the BlackStar filesystem (bsfs) module. +A copy of the license is provided with the project. +Author: Matthias Baumgartner, 2022 +""" +# imports +import abc +import typing + +# inner-module imports +from bsfs.utils import URI, typename +import bsfs.schema as _schema + +# exports +__all__: typing.Sequence[str] = ( + 'TripleStoreBase', + ) + + +## code ## + +class TripleStoreBase(abc.ABC): + """TripleStore base class. + + Use the `Open` method to create a new instance and to initialize + the required structures. + + Triple stores express a graph via its (subject, predicate, object) triples. + They provides methods to add and remove triples, and to query the storage + for given graph structures. The subject is always a node in the graph, + whereas nodes are identifiable by a unique URI. Note that blank nodes + (without an explicit URI) are not supported. The object can be another + Node or a Literal value. The relation between a subject and an object + is expressed via a Predicate. The graph structures are governed by a + schema that defines which Node, Literal, and Predicate classes exist + and how they can interact (see `bsfs.schema.Schema`). + + """ + + # storage's URI. None implies a temporary location. + uri: typing.Optional[URI] = None + + def __init__(self, uri: typing.Optional[URI] = None): + self.uri = uri + + def __hash__(self) -> int: + uri = self.uri if self.uri is not None else id(self) + return hash((type(self), uri)) + + def __eq__(self, other) -> bool: + return isinstance(other, type(self)) \ + and (( self.uri is not None \ + and other.uri is not None \ + and self.uri == other.uri ) \ + or id(self) == id(other)) + + def __repr__(self) -> str: + return f'{typename(self)}(uri={self.uri})' + + def __str__(self) -> str: + return f'{typename(self)}(uri={self.uri})' + + def is_persistent(self) -> bool: + """Return True if data is stored persistently.""" + return self.uri is not None + + + @classmethod + @abc.abstractmethod + def Open(cls, **kwargs: typing.Any) -> 'TripleStoreBase': # pylint: disable=invalid-name # capitalized classmethod + """Return a TripleStoreBase instance connected to *uri*.""" + + @abc.abstractmethod + def commit(self): + """Commit the current transaction.""" + + @abc.abstractmethod + def rollback(self): + """Undo changes since the last commit.""" + + @property + @abc.abstractmethod + def schema(self) -> _schema.Schema: + """Return the store's local schema.""" + + @schema.setter + @abc.abstractmethod + def schema(self, schema: _schema.Schema): + """Migrate to new schema by adding or removing class definitions. + + Commits before and after the migration. + + Instances of removed classes will be deleted irreversably. + Note that modifying an existing class is not directly supported. + Also, it is generally discouraged, since changing definitions may + lead to inconsistencies across multiple clients in a distributed + setting. Instead, consider introducing a new class under its own + uri. Such a migration would look as follows: + + 1. Add new class definitions. + 2. Create instances of the new classes and copy relevant data. + 3. Remove the old definitions. + + To modify a class, i.e., re-use a previous uri with a new + class definition, you would have to migrate via temporary + class definitions, and thus repeat the above procedure two times. + + """ + + @abc.abstractmethod + def exists( + self, + node_type: _schema.Node, + guids: typing.Iterable[URI], + ) -> typing.Iterable[URI]: + """Return those *guids* that exist and have type *node_type* or a subclass thereof.""" + + @abc.abstractmethod + def create( + self, + node_type: _schema.Node, + guids: typing.Iterable[URI], + ): + """Create *guid* nodes with type *subject*.""" + + @abc.abstractmethod + def set( + self, + node_type: _schema.Node, # FIXME: is the node_type even needed? Couldn't I infer from the predicate? + guids: typing.Iterable[URI], + predicate: _schema.Predicate, + values: typing.Iterable[typing.Any], + ): + """Add triples to the graph. + + It is assumed that all of *guids* exist and have *node_type*. + This method adds a triple (guid, predicate, value) for every guid in + *guids* and each value in *values* (cartesian product). Note that + *values* must have length one for unique predicates, and that + currently existing values will be overwritten in this case. + It also verifies that all symbols are part of the schema and that + the *predicate* matches the *node_type*. + Raises `bsfs.errors.ConsistencyError` if these assumptions are violated. + + """ + +## EOF ## diff --git a/bsfs/triple_store/sparql.py b/bsfs/triple_store/sparql.py new file mode 100644 index 0000000..7516dff --- /dev/null +++ b/bsfs/triple_store/sparql.py @@ -0,0 +1,253 @@ +""" + +Part of the BlackStar filesystem (bsfs) module. +A copy of the license is provided with the project. +Author: Matthias Baumgartner, 2022 +""" +# imports +import itertools +import typing +import rdflib + +# bsfs imports +from bsfs import schema as bsc +from bsfs.utils import errors, URI + +# inner-module imports +from . import base + + +# exports +__all__: typing.Sequence[str] = ( + 'SparqlStore', + ) + + +## code ## + +class _Transaction(): + """Lightweight rdflib transactions for in-memory databases.""" + + # graph instance. + _graph: rdflib.Graph + + # current log of added triples. + _added: typing.List[typing.Any] + + # current log of removed triples. + _removed: typing.List[typing.Any] + + def __init__(self, graph: rdflib.Graph): + self._graph = graph + # initialize internal structures + self.commit() + + def commit(self): + """Commit temporary changes.""" + self._added = [] + self._removed = [] + + def rollback(self): + """Undo changes since the last commit.""" + for triple in self._added: + self._graph.remove(triple) + for triple in self._removed: + self._graph.add(triple) + + def add(self, triple: typing.Any): + """Add a triple to the graph.""" + if triple not in self._graph: + self._added.append(triple) + self._graph.add(triple) + + def remove(self, triple: typing.Any): + """Remove a triple from the graph.""" + if triple in self._graph: + self._removed.append(triple) + self._graph.remove(triple) + + +class SparqlStore(base.TripleStoreBase): + """Sparql-based triple store. + + The sparql triple store uses a third-party backend + (currently rdflib) to store triples and manages them via + the Sparql query language. + + """ + + # The rdflib graph. + _graph: rdflib.Graph + + # Current transaction. + _transaction: _Transaction + + # The local schema. + _schema: bsc.Schema + + def __init__(self): + super().__init__(None) + self._graph = rdflib.Graph() + self._transaction = _Transaction(self._graph) + self._schema = bsc.Schema.Empty() + + # NOTE: mypy and pylint complain about the **kwargs not being listed (contrasting super) + # However, not having it here is clearer since it's explicit that there are no arguments. + @classmethod + def Open(cls) -> 'SparqlStore': # type: ignore [override] # pylint: disable=arguments-differ + return cls() + + def commit(self): + self._transaction.commit() + + def rollback(self): + self._transaction.rollback() + + @property + def schema(self) -> bsc.Schema: + return self._schema + + @schema.setter + def schema(self, schema: bsc.Schema): + # check args: Schema instanace + if not isinstance(schema, bsc.Schema): + raise TypeError(schema) + # check compatibility: No contradicting definitions + if not self.schema.consistent_with(schema): + raise errors.ConsistencyError(f'{schema} is inconsistent with {self.schema}') + + # commit the current transaction + self.commit() + + # adjust instances: + # nothing to do for added classes + # delete instances of removed classes + + # get deleted classes + sub = self.schema - schema + + # remove predicate instances + for pred in sub.predicates: + for src, trg in self._graph.subject_objects(rdflib.URIRef(pred.uri)): + self._transaction.remove((src, rdflib.URIRef(pred.uri), trg)) + + # remove node instances + for node in sub.nodes: + # iterate through node instances + for inst in self._graph.subjects(rdflib.RDF.type, rdflib.URIRef(node.uri)): + # remove triples where the instance is in the object position + for src, pred in self._graph.subject_predicates(inst): + self._transaction.remove((src, pred, inst)) + # remove triples where the instance is in the subject position + for pred, trg in self._graph.predicate_objects(inst): + self._transaction.remove((inst, pred, trg)) + # remove instance + self._transaction.remove((inst, rdflib.RDF.type, rdflib.URIRef(node.uri))) + + # NOTE: Nothing to do for literals + + # commit instance changes + self.commit() + + # migrate schema + self._schema = schema + + + def _has_type(self, subject: URI, node_type: bsc.Node) -> bool: + """Return True if *subject* is a node of class *node_type* or a subclass thereof.""" + if node_type not in self.schema.nodes(): + raise errors.ConsistencyError(f'{node_type} is not defined in the schema') + + subject_types = list(self._graph.objects(rdflib.URIRef(subject), rdflib.RDF.type)) + if len(subject_types) == 0: + return False + if len(subject_types) == 1: + node = self.schema.node(URI(subject_types[0])) # type: ignore [arg-type] # URI is a subtype of str + if node == node_type: + return True + if node_type in node.parents(): + return True + return False + raise errors.UnreachableError() + + def exists( + self, + node_type: bsc.Node, + guids: typing.Iterable[URI], + ) -> typing.Iterable[URI]: + return (subj for subj in guids if self._has_type(subj, node_type)) + + def create( + self, + node_type: bsc.Node, + guids: typing.Iterable[URI], + ): + # check node_type + if node_type not in self.schema.nodes(): + raise errors.ConsistencyError(f'{node_type} is not defined in the schema') + # check and create guids + for guid in guids: + subject = rdflib.URIRef(guid) + # check node existence + if (subject, rdflib.RDF.type, None) in self._graph: + # FIXME: node exists and may have a different type! ignore? raise? report? + continue + # add node + self._transaction.add((subject, rdflib.RDF.type, rdflib.URIRef(node_type.uri))) + + def set( + self, + node_type: bsc.Node, + guids: typing.Iterable[URI], + predicate: bsc.Predicate, + values: typing.Iterable[typing.Any], + ): + # check node_type + if node_type not in self.schema.nodes(): + raise errors.ConsistencyError(f'{node_type} is not defined in the schema') + # check predicate + if predicate not in self.schema.predicates(): + raise errors.ConsistencyError(f'{predicate} is not defined in the schema') + if not node_type <= predicate.domain: + raise errors.ConsistencyError(f'{node_type} must be a subclass of {predicate.domain}') + # NOTE: predicate.range is in the schema since predicate is in the schema. + # materialize values + values = set(values) + # check values + if len(values) == 0: + return + if predicate.unique and len(values) != 1: + raise ValueError(values) + if isinstance(predicate.range, bsc.Node): + values = set(values) # materialize to safeguard against iterators passed as argument + inconsistent = {val for val in values if not self._has_type(val, predicate.range)} + # catches nodes that don't exist and nodes that have an inconsistent type + if len(inconsistent) > 0: + raise errors.InstanceError(inconsistent) + # check guids + # FIXME: Fail or skip inexistent nodes? + guids = set(guids) + inconsistent = {guid for guid in guids if not self._has_type(guid, node_type)} + if len(inconsistent) > 0: + raise errors.InstanceError(inconsistent) + + # add triples + pred = rdflib.URIRef(predicate.uri) + for guid, value in itertools.product(guids, values): + guid = rdflib.URIRef(guid) + # convert value + if isinstance(predicate.range, bsc.Literal): + value = rdflib.Literal(value, datatype=rdflib.URIRef(predicate.range.uri)) + elif isinstance(predicate.range, bsc.Node): + value = rdflib.URIRef(value) + else: + raise errors.UnreachableError() + # clear triples for unique predicates + if predicate.unique: + for obj in self._graph.objects(guid, pred): + if obj != value: + self._transaction.remove((guid, pred, obj)) + # add triple + self._transaction.add((guid, pred, value)) + +## EOF ## diff --git a/bsfs/utils/__init__.py b/bsfs/utils/__init__.py new file mode 100644 index 0000000..94680ee --- /dev/null +++ b/bsfs/utils/__init__.py @@ -0,0 +1,25 @@ +""" + +Part of the BlackStar filesystem (bsfs) module. +A copy of the license is provided with the project. +Author: Matthias Baumgartner, 2022 +""" +# imports +import typing + +# inner-module imports +from . import errors +from .commons import typename +from .uri import URI +from .uuid import UUID, UCID + +# exports +__all__ : typing.Sequence[str] = ( + 'UCID', + 'URI', + 'UUID', + 'errors', + 'typename', + ) + +## EOF ## diff --git a/bsfs/utils/commons.py b/bsfs/utils/commons.py new file mode 100644 index 0000000..bad2fe0 --- /dev/null +++ b/bsfs/utils/commons.py @@ -0,0 +1,23 @@ +""" + +Part of the BlackStar filesystem (bsfs) module. +A copy of the license is provided with the project. +Author: Matthias Baumgartner, 2022 +""" +# imports +import typing + +# exports +__all__: typing.Sequence[str] = ( + 'typename', + ) + + +## code ## + +def typename(obj) -> str: + """Return the type name of *obj*.""" + return type(obj).__name__ + + +## EOF ## diff --git a/bsfs/utils/errors.py b/bsfs/utils/errors.py new file mode 100644 index 0000000..c5e8e16 --- /dev/null +++ b/bsfs/utils/errors.py @@ -0,0 +1,41 @@ +""" + +Part of the BlackStar filesystem (bsfs) module. +A copy of the license is provided with the project. +Author: Matthias Baumgartner, 2022 +""" +# imports +import typing + +# exports +__all__: typing.Sequence[str] = ( + ) + + +## code ## + +class _BSFSError(Exception): + """Generic bsfs error.""" + +class SchemaError(_BSFSError): + """Generic schema errios.""" + +class ConsistencyError(SchemaError): + """A requested operation is inconsistent with the schema.""" + +class InstanceError(SchemaError): + """An instance affected by some operation is inconsistent with the schema.""" + +class PermissionDeniedError(_BSFSError): + """An operation was aborted due to access control restrictions.""" + +class ProgrammingError(_BSFSError): + """An assertion-like error that indicates a code-base issue.""" + +class UnreachableError(ProgrammingError): + """Bravo, you've reached a point in code that should logically not be reachable.""" + +class ConfigError(_BSFSError): + """User config issue.""" + +## EOF ## diff --git a/bsfs/utils/uri.py b/bsfs/utils/uri.py new file mode 100644 index 0000000..84854a4 --- /dev/null +++ b/bsfs/utils/uri.py @@ -0,0 +1,246 @@ +""" + +Part of the BlackStar filesystem (bsfs) module. +A copy of the license is provided with the project. +Author: Matthias Baumgartner, 2022 +""" +# imports +import re +import typing + +# constants +RX_URI = re.compile(r''' + ^ + (?:(?P<scheme>[^:/?#]+):)? # scheme, ://-delimited + (?://(?P<authority>[^/?#]*))? # authority (user@host:port), [/#?]-delimited + (?P<path>[^?#]*) # path, [#?]-delimited + (?:\?(?P<query>[^#]*))? # query, [#]-delimited + (?:\#(?P<fragment>.*))? # fragment, remaining characters + $ + ''', re.VERBOSE + re.IGNORECASE) + +RX_HOST = re.compile(r''' + ^ + (?:(?P<userinfo>[^@]*)@)? # userinfo + (?P<host> + (?:\[[^\]]+\]) | # IPv6 address + (?:[^:]+) # IPv4 address or regname + ) + (?::(?P<port>\d*))? # port + $ + ''', re.VERBOSE + re.IGNORECASE) + +# exports +__all__: typing.Sequence[str] = ( + 'URI', + ) + + +## code ## + +def _get_optional( + regexp: re.Pattern, + query: str, + grp: str + ) -> str: + """Return the regular expression *regexp*'s group *grp* of *query* + or raise a `ValueError` if the *query* doesn't match the expression. + """ + parts = regexp.search(query) + if parts is not None: + if parts.group(grp) is not None: + return parts.group(grp) + raise ValueError(query) + + +class URI(str): + """URI additions to built-in strings. + + Provides properties to access the different components of an URI, + according to RFC 3986 (https://datatracker.ietf.org/doc/html/rfc3986). + + Note that this class does not actually validate an URI but only offers + access to components of a *well-formed* URI. Use `urllib.parse` for + more advanced purposes. + + """ + + def __new__(cls, value: str): + """Create a new URI instance. + Raises a `ValueError` if the (supposed) URI is malformatted. + """ + if not cls.is_parseable(value): + raise ValueError(value) + return str.__new__(cls, value) + + @staticmethod + def is_parseable(query: str) -> bool: + """Return True if the *query* can be decomposed into the URI components. + + Note that a valid URI is always parseable, however, an invalid URI + might be parseable as well. The return value of this method makes + no claim about the validity of an URI! + + """ + # check uri + parts = RX_URI.match(query) + if parts is not None: + # check authority + authority = parts.group('authority') + if authority is None or RX_HOST.match(authority) is not None: + return True + # some check not passed + return False + + @staticmethod + def compose( + path: str, + scheme: typing.Optional[str] = None, + authority: typing.Optional[str] = None, + user: typing.Optional[str] = None, + host: typing.Optional[str] = None, + port: typing.Optional[int] = None, + query: typing.Optional[str] = None, + fragment: typing.Optional[str] = None, + ): + """URI composition from components. + + If the *host* argument is supplied, the authority is composed of *user*, + *host*, and *port* arguments, and the *authority* argument is ignored. + Note that if the *host* is an IPv6 address, it must be enclosed in brackets. + """ + # strip whitespaces + path = path.strip() + + # compose authority + if host is not None: + authority = '' + if user is not None: + authority += user + '@' + authority += host + if port is not None: + authority += ':' + str(port) + + # ensure root on path + if path[0] != '/': + path = '/' + path + + # compose uri + uri = '' + if scheme is not None: + uri += scheme + ':' + if authority is not None: + uri += '//' + authority + uri += path + if query is not None: + uri += '?' + query + if fragment is not None: + uri += '#' + fragment + + # return as URI + return URI(uri) + + @property + def scheme(self) -> str: + """Return the protocol/scheme part of the URI.""" + return _get_optional(RX_URI, self, 'scheme') + + @property + def authority(self) -> str: + """Return the authority part of the URI, including userinfo and port.""" + return _get_optional(RX_URI, self, 'authority') + + @property + def userinfo(self) -> str: + """Return the userinfo part of the URI.""" + return _get_optional(RX_HOST, self.authority, 'userinfo') + + @property + def host(self) -> str: + """Return the host part of the URI.""" + return _get_optional(RX_HOST, self.authority, 'host') + + @property + def port(self) -> int: + """Return the port part of the URI.""" + return int(_get_optional(RX_HOST, self.authority, 'port')) + + @property + def path(self) -> str: + """Return the path part of the URI.""" + return _get_optional(RX_URI, self, 'path') + + @property + def query(self) -> str: + """Return the query part of the URI.""" + return _get_optional(RX_URI, self, 'query') + + @property + def fragment(self) -> str: + """Return the fragment part of the URI.""" + return _get_optional(RX_URI, self, 'fragment') + + def get(self, component: str, default: typing.Optional[typing.Any] = None) -> typing.Optional[typing.Any]: + """Return the component or a default value.""" + # check args + if component not in ('scheme', 'authority', 'userinfo', 'host', + 'port', 'path', 'query', 'fragment'): + raise ValueError(component) + try: + # return component's value + return getattr(self, component) + except ValueError: + # return the default value + return default + + + # overload composition methods + + def __add__(self, *args) -> 'URI': + return URI(super().__add__(*args)) + + def join(self, *args) -> 'URI': + return URI(super().join(*args)) + + def __mul__(self, *args) -> 'URI': + return URI(super().__mul__(*args)) + + def __rmul__(self, *args) -> 'URI': + return URI(super().__rmul__(*args)) + + + # overload casefold methods + + def lower(self, *args) -> 'URI': + return URI(super().lower(*args)) + + def upper(self, *args) -> 'URI': + return URI(super().upper(*args)) + + + # overload stripping methods + + def strip(self, *args) -> 'URI': + return URI(super().strip(*args)) + + def lstrip(self, *args) -> 'URI': + return URI(super().lstrip(*args)) + + def rstrip(self, *args) -> 'URI': + return URI(super().rstrip(*args)) + + + # overload formatting methods + + def format(self, *args, **kwargs) -> 'URI': + return URI(super().format(*args, **kwargs)) + + def __mod__(self, *args) -> 'URI': + return URI(super().__mod__(*args)) + + def replace(self, *args) -> 'URI': + return URI(super().replace(*args)) + + + +## EOF ## diff --git a/bsfs/utils/uuid.py b/bsfs/utils/uuid.py new file mode 100644 index 0000000..6366b18 --- /dev/null +++ b/bsfs/utils/uuid.py @@ -0,0 +1,108 @@ +""" + +Part of the BlackStar filesystem (bsfs) module. +A copy of the license is provided with the project. +Author: Matthias Baumgartner, 2022 +""" +# imports +from collections import abc +import hashlib +import os +import platform +import random +import threading +import time +import typing +import uuid + +# constants +HASH = hashlib.sha256 + +# exports +__all__: typing.Sequence[str] = [ + 'UCID', + 'UUID', + ] + + +## code ## + +class UUID(abc.Iterator, abc.Callable): # type: ignore [misc] # abc.Callable "is an invalid base class" + """Generate 256-bit universally unique IDs. + + This is a 'best-effort' kind of implementation that tries to ensure global + uniqueness, even tough actual uniqueness cannot be guaranteed. + The approach is different from python's uuid module (which implements + RFC 4122) in that it generates longer UUIDs and in that it cannot be + reconstructed whether two UUIDs were generated on the same system. + + The ID is a cryptographic hash over several components: + * host + * system + * process + * thread + * random + * time + * cpu cycles + * content (if available) + + """ + + # host identifier + host: str + + # system identifier + system: str + + # process identifier + process: str + + # thread identifier + thread: str + + def __init__(self, seed: typing.Optional[int] = None): + # initialize static components + self.host = str(uuid.getnode()) + self.system = '-'.join(platform.uname()) + self.process = str(os.getpid()) + self.thread = str(threading.get_ident()) + # initialize random component + random.seed(seed) + + def __call__(self, content: typing.Optional[str] = None) -> str: # pylint: disable=arguments-differ + """Return a globally unique ID.""" + # content component + content = str(content) if content is not None else '' + # time component + now = str(time.time()) + # clock component + clk = str(time.perf_counter()) + # random component + rnd = str(random.random()) + # build the token from all available components + token = self.host + self.system + self.process + self.thread + rnd + now + clk + content + # return the token's hash + return HASH(token.encode('ascii', 'ignore')).hexdigest() + + def __iter__(self) -> typing.Iterator[str]: + """Iterate indefinitely over universally unique IDs.""" + return self + + def __next__(self) -> str: + """Generate universally unique IDs.""" + return self() + + +class UCID(): + """Generate 256-bit content IDs. + + Effectively computes a cryptographic hash over the content. + + """ + @staticmethod + def from_path(path: str) -> str: + """Read the content from a file.""" + with open(path, 'rb') as ifile: + return HASH(ifile.read()).hexdigest() + +## EOF ## diff --git a/setup.py b/setup.py new file mode 100644 index 0000000..ab3864a --- /dev/null +++ b/setup.py @@ -0,0 +1,23 @@ + +from setuptools import setup +import os + +setup( + name='bsfs', + version='0.0.1', + author='Matthias Baumgartner', + author_email='dev@igsor.net', + description='A content aware graph file system.', + long_description=open(os.path.join(os.path.dirname(__file__), 'README')).read(), + license='BSD', + license_files=('LICENSE', ), + url='https://www.igsor.net/projects/blackstar/bsfs/', + download_url='https://pip.igsor.net', + packages=('bsfs', ), + install_requires=('rdflib', ), + python_requires=">=3.7", +) + +# FIXME: bsfs/graph/schema.nt +# FIXME: bsfs.app + diff --git a/test/__init__.py b/test/__init__.py new file mode 100644 index 0000000..e69de29 --- /dev/null +++ b/test/__init__.py diff --git a/test/apps/__init__.py b/test/apps/__init__.py new file mode 100644 index 0000000..e69de29 --- /dev/null +++ b/test/apps/__init__.py diff --git a/test/apps/config.json b/test/apps/config.json new file mode 100644 index 0000000..ffc5ef7 --- /dev/null +++ b/test/apps/config.json @@ -0,0 +1,8 @@ +{ + "Graph": { + "user": "http://example.com/me", + "backend": { + "SparqlStore": {} + } + } +} diff --git a/test/apps/schema-1.nt b/test/apps/schema-1.nt new file mode 100644 index 0000000..e57146d --- /dev/null +++ b/test/apps/schema-1.nt @@ -0,0 +1,19 @@ + +prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#> +prefix xsd: <http://www.w3.org/2001/XMLSchema#> + +# common bsfs prefixes +prefix bsfs: <http://bsfs.ai/schema/> +prefix bse: <http://bsfs.ai/schema/Entity#> + +# essential nodes +bsfs:Entity rdfs:subClassOf bsfs:Node . + +# common definitions +xsd:string rdfs:subClassOf bsfs:Literal . + +bse:filename rdfs:subClassOf bsfs:Predicate ; + rdfs:domain bsfs:Entity ; + rdfs:range xsd:string ; + bsfs:unique "false"^^xsd:boolean . + diff --git a/test/apps/schema-2.nt b/test/apps/schema-2.nt new file mode 100644 index 0000000..525ac99 --- /dev/null +++ b/test/apps/schema-2.nt @@ -0,0 +1,19 @@ + +prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#> +prefix xsd: <http://www.w3.org/2001/XMLSchema#> + +# common bsfs prefixes +prefix bsfs: <http://bsfs.ai/schema/> +prefix bse: <http://bsfs.ai/schema/Entity#> + +# essential nodes +bsfs:Entity rdfs:subClassOf bsfs:Node . + +# common definitions +xsd:integer rdfs:subClassOf bsfs:Literal . + +bse:filesize rdfs:subClassOf bsfs:Predicate ; + rdfs:domain bsfs:Entity ; + rdfs:range xsd:integer ; + bsfs:unique "true"^^xsd:boolean . + diff --git a/test/apps/test_init.py b/test/apps/test_init.py new file mode 100644 index 0000000..bae6a68 --- /dev/null +++ b/test/apps/test_init.py @@ -0,0 +1,91 @@ +""" + +Part of the bsfs test suite. +A copy of the license is provided with the project. +Author: Matthias Baumgartner, 2022 +""" +# imports +import contextlib +import io +import json +import os +import tempfile +import unittest + +# bsie imports +from bsfs.front import build_graph +from bsfs.graph import Graph + +# objects to test +from bsfs.apps.init import main, init_sparql_store + + +## code ## + +class TestInit(unittest.TestCase): + def test_main(self): + + # cannot pass an invalid store + with contextlib.redirect_stderr(io.StringIO()): + self.assertRaises(SystemExit, main, ['--user', 'http://example.com/me', 'foobar']) + + # produces a config structure + outbuf = io.StringIO() + with contextlib.redirect_stdout(outbuf): + main(['--user', 'http://example.com/me', 'sparql']) + self.assertEqual(json.loads(outbuf.getvalue()), { + 'Graph': { + 'user': 'http://example.com/me', + 'backend': { + 'SparqlStore': {}}}}) + # config is valid + self.assertIsInstance(build_graph(json.loads(outbuf.getvalue())), Graph) + + # respects user flag + outbuf = io.StringIO() + with contextlib.redirect_stdout(outbuf): + main(['--user', 'http://example.com/you', 'sparql']) + self.assertEqual(json.loads(outbuf.getvalue()), { + 'Graph': { + 'user': 'http://example.com/you', + 'backend': { + 'SparqlStore': {}}}}) + + # respects output flag + _, path = tempfile.mkstemp(prefix='bsfs-test-', text=True) + outbuf = io.StringIO() + with contextlib.redirect_stdout(outbuf): + main(['--user', 'http://example.com/me', '--output', path, 'sparql']) + with open(path, 'rt') as ifile: + config = ifile.read() + os.unlink(path) + self.assertEqual(outbuf.getvalue(), '') + self.assertEqual(json.loads(config), { + 'Graph': { + 'user': 'http://example.com/me', + 'backend': { + 'SparqlStore': {}}}}) + + def test_init_sparql_store(self): + # returns a config structure + self.assertEqual(init_sparql_store('http://example.com/me'), { + 'Graph': { + 'user': 'http://example.com/me', + 'backend': { + 'SparqlStore': {}}}}) + # respects user + self.assertEqual(init_sparql_store('http://example.com/you'), { + 'Graph': { + 'user': 'http://example.com/you', + 'backend': { + 'SparqlStore': {}}}}) + # the config is valid + self.assertIsInstance(build_graph(init_sparql_store('http://example.com/me')), Graph) + + +## main ## + +if __name__ == '__main__': + unittest.main() + +## EOF ## diff --git a/test/apps/test_migrate.py b/test/apps/test_migrate.py new file mode 100644 index 0000000..957509a --- /dev/null +++ b/test/apps/test_migrate.py @@ -0,0 +1,66 @@ +""" + +Part of the bsfs test suite. +A copy of the license is provided with the project. +Author: Matthias Baumgartner, 2022 +""" +# imports +import contextlib +import io +import os +import sys +import unittest +import unittest.mock + +# bsie imports +from bsfs.schema import Schema + +# objects to test +from bsfs.apps.migrate import main + + +## code ## + +class TestMigrate(unittest.TestCase): + def test_main(self): + config = os.path.join(os.path.dirname(__file__), 'config.json') + schema_1 = os.path.join(os.path.dirname(__file__), 'schema-1.nt') + schema_2 = os.path.join(os.path.dirname(__file__), 'schema-2.nt') + + # provide no config + with contextlib.redirect_stderr(io.StringIO()): + self.assertRaises(SystemExit, main, []) + + # read schema from file + with open(schema_1) as ifile: + target = Schema.from_string(ifile.read()) + graph = main([config, schema_1]) + self.assertTrue(target <= graph.schema) + + # read schema from multiple files + with open(schema_1) as ifile: + target = Schema.from_string(ifile.read()) + with open(schema_2) as ifile: + target = target + Schema.from_string(ifile.read()) + graph = main([config, schema_1, schema_2]) + self.assertTrue(target <= graph.schema) + + # read schema from stdin + with open(schema_1, 'rt') as ifile: + target = Schema.from_string(ifile.read()) + with open(schema_1, 'rt') as ifile: + with unittest.mock.patch('sys.stdin', ifile): + graph = main([config]) + self.assertTrue(target <= graph.schema) + + # remove predicates + # NOTE: cannot currently test this since there's nothing to remove in the loaded (empty) schema. + + +## main ## + +if __name__ == '__main__': + unittest.main() + +## EOF ## + diff --git a/test/front/__init__.py b/test/front/__init__.py new file mode 100644 index 0000000..e69de29 --- /dev/null +++ b/test/front/__init__.py diff --git a/test/front/test_bsfs.py b/test/front/test_bsfs.py new file mode 100644 index 0000000..0d7f383 --- /dev/null +++ b/test/front/test_bsfs.py @@ -0,0 +1,38 @@ +""" + +Part of the bsfs test suite. +A copy of the license is provided with the project. +Author: Matthias Baumgartner, 2022 +""" +# imports +import unittest + +# bsie imports +from bsfs.graph import Graph +from bsfs.triple_store import SparqlStore +from bsfs.utils import errors, URI + +# objects to test +from bsfs.front.bsfs import Open + + +## code ## + +class TestBSFS(unittest.TestCase): + def test_open(self): + # valid config produces a valid graph + config = {'Graph': {'backend': {'SparqlStore': {}}, 'user': 'http://example.com/me'}} + graph = Open(config) + self.assertIsInstance(graph, Graph) + self.assertIsInstance(graph._backend, SparqlStore) + self.assertEqual(graph._user, URI('http://example.com/me')) + # invalid config raises an error + self.assertRaises(errors.ConfigError, Open, {}) + + +## main ## + +if __name__ == '__main__': + unittest.main() + +## EOF ## diff --git a/test/front/test_builder.py b/test/front/test_builder.py new file mode 100644 index 0000000..08f2027 --- /dev/null +++ b/test/front/test_builder.py @@ -0,0 +1,64 @@ +""" + +Part of the bsfs test suite. +A copy of the license is provided with the project. +Author: Matthias Baumgartner, 2022 +""" +# imports +import unittest + +# bsie imports +from bsfs.graph import Graph +from bsfs.triple_store import SparqlStore +from bsfs.utils import errors, URI + +# objects to test +from bsfs.front.builder import build_backend, build_graph + + +## code ## + +class TestBuilder(unittest.TestCase): + def test_build_backend(self): + # valid config produces a valid store + store = build_backend({'SparqlStore': {}}) + self.assertIsInstance(store, SparqlStore) + self.assertIsNone(store.uri) + # cannot create an invalid store + self.assertRaises(errors.ConfigError, build_backend, {'MyStore': {}}) + # must pass a dict + self.assertRaises(TypeError, build_backend, 1234) + self.assertRaises(TypeError, build_backend, 'hello world') + self.assertRaises(TypeError, build_backend, [1,2,3]) + # cannot create a store from an invalid config + self.assertRaises(errors.ConfigError, build_backend, {}) + self.assertRaises(errors.ConfigError, build_backend, {'SparqlStore': {}, 'OtherStore': {}}) + self.assertRaises(TypeError, build_backend, {'SparqlStore': {'hello': 'world'}}) + + def test_build_graph(self): + # valid config produces a valid graph + graph = build_graph({'Graph': {'backend': {'SparqlStore': {}}, 'user': 'http://example.com/me'}}) + self.assertIsInstance(graph, Graph) + self.assertIsInstance(graph._backend, SparqlStore) + self.assertEqual(graph._user, URI('http://example.com/me')) + # cannot create an invalid graph + self.assertRaises(errors.ConfigError, build_graph, {'MyGraph': {}}) + # must pass a dict + self.assertRaises(TypeError, build_graph, 1234) + self.assertRaises(TypeError, build_graph, 'hello world') + self.assertRaises(TypeError, build_graph, [1,2,3]) + # cannot create a graph from an invalid config + self.assertRaises(errors.ConfigError, build_graph, {}) + self.assertRaises(errors.ConfigError, build_graph, {'Graph': {}, 'Graph2': {}}) + self.assertRaises(errors.ConfigError, build_graph, {'Graph': {}}) + self.assertRaises(errors.ConfigError, build_graph, {'Graph': {'user': 'http://example.com/me'}}) + self.assertRaises(errors.ConfigError, build_graph, {'Graph': {'backend': 'Hello world'}}) + self.assertRaises(TypeError, build_graph, {'Graph': {'user': 'http://example.com/me', 'backend': 'Hello world'}}) + + +## main ## + +if __name__ == '__main__': + unittest.main() + +## EOF ## diff --git a/test/graph/__init__.py b/test/graph/__init__.py new file mode 100644 index 0000000..e69de29 --- /dev/null +++ b/test/graph/__init__.py diff --git a/test/graph/ac/__init__.py b/test/graph/ac/__init__.py new file mode 100644 index 0000000..e69de29 --- /dev/null +++ b/test/graph/ac/__init__.py diff --git a/test/graph/ac/test_null.py b/test/graph/ac/test_null.py new file mode 100644 index 0000000..f39c9be --- /dev/null +++ b/test/graph/ac/test_null.py @@ -0,0 +1,102 @@ +""" + +Part of the bsfs test suite. +A copy of the license is provided with the project. +Author: Matthias Baumgartner, 2022 +""" +# imports +import unittest + +# bsie imports +from bsfs import schema as _schema +from bsfs.namespace import ns +from bsfs.triple_store import SparqlStore +from bsfs.utils import URI + +# objects to test +from bsfs.graph.ac.null import NullAC + + +## code ## + +class TestNullAC(unittest.TestCase): + def setUp(self): + self.backend = SparqlStore() + self.backend.schema = _schema.Schema.from_string(''' + prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#> + prefix xsd: <http://www.w3.org/2001/XMLSchema#> + + prefix bsfs: <http://bsfs.ai/schema/> + prefix bsm: <http://bsfs.ai/schema/Meta#> + prefix bse: <http://bsfs.ai/schema/Entity#> + + bsfs:Entity rdfs:subClassOf bsfs:Node . + bsfs:Tag rdfs:subClassOf bsfs:Node . + xsd:string rdfs:subClassOf bsfs:Literal . + xsd:integer rdfs:subClassOf bsfs:Literal . + + # predicates mandated by Nodes + bsm:t_created rdfs:subClassOf bsfs:Predicate ; + rdfs:domain bsfs:Node ; + rdfs:range xsd:integer ; + bsfs:unique "true"^^xsd:boolean . + + # additionally defined predicates + bse:tag rdfs:subClassOf bsfs:Predicate ; + rdfs:domain bsfs:Entity ; + rdfs:range bsfs:Tag ; + bsfs:unique "false"^^xsd:boolean . + + bse:author rdfs:subClassOf bsfs:Predicate ; + rdfs:domain bsfs:Entity ; + rdfs:range xsd:string ; + bsfs:unique "true"^^xsd:boolean . + + bse:filesize rdfs:subClassOf bsfs:Predicate ; + rdfs:domain bsfs:Entity ; + rdfs:range xsd:integer ; + bsfs:unique "false"^^xsd:boolean . + + ''') + self.user = URI('http://www.example.com/me') + self.p_author = self.backend.schema.predicate(ns.bse.author) + self.p_filesize = self.backend.schema.predicate(ns.bse.filesize) + self.p_tag = self.backend.schema.predicate(ns.bse.tag) + self.p_created = self.backend.schema.predicate(ns.bsm.t_created) + self.ent_type = self.backend.schema.node(ns.bsfs.Entity) + self.ent_ids = {URI('http://www.example.com/me/entity#1234'), URI('http://www.example.com/me/entity#4321')} + + def test_is_protected_predicate(self): + ac = NullAC(self.backend, self.user) + self.assertTrue(ac.is_protected_predicate(self.p_created)) + self.assertFalse(ac.is_protected_predicate(self.p_filesize)) + self.assertFalse(ac.is_protected_predicate(self.p_author)) + self.assertFalse(ac.is_protected_predicate(self.p_tag)) + + def test_create(self): + ac = NullAC(self.backend, self.user) + self.assertEqual(None, ac.create(self.ent_type, self.ent_ids)) + + def test_link_from_node(self): + ac = NullAC(self.backend, self.user) + self.assertSetEqual(self.ent_ids, ac.link_from_node(self.ent_type, self.ent_ids)) + + def test_link_to_node(self): + ac = NullAC(self.backend, self.user) + self.assertSetEqual(self.ent_ids, ac.link_to_node(self.ent_type, self.ent_ids)) + + def test_write_literal(self): + ac = NullAC(self.backend, self.user) + self.assertSetEqual(self.ent_ids, ac.write_literal(self.ent_type, self.ent_ids)) + + def test_createable(self): + ac = NullAC(self.backend, self.user) + self.assertSetEqual(self.ent_ids, ac.createable(self.ent_type, self.ent_ids)) + + +## main ## + +if __name__ == '__main__': + unittest.main() + +## EOF ## diff --git a/test/graph/test_graph.py b/test/graph/test_graph.py new file mode 100644 index 0000000..33cf6aa --- /dev/null +++ b/test/graph/test_graph.py @@ -0,0 +1,201 @@ +""" + +Part of the bsfs test suite. +A copy of the license is provided with the project. +Author: Matthias Baumgartner, 2022 +""" +# imports +import unittest + +# bsie imports +from bsfs import schema +from bsfs.namespace import ns +from bsfs.triple_store import SparqlStore +from bsfs.utils import URI, errors +from bsfs.graph.nodes import Nodes + +# objects to test +from bsfs.graph.graph import Graph + + +## code ## + +class TestGraph(unittest.TestCase): + def setUp(self): + self.user = URI('http://example.com/me') + self.backend = SparqlStore.Open() + self.backend.schema = schema.Schema.from_string(''' + prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#> + prefix bsfs: <http://bsfs.ai/schema/> + bsfs:Entity rdfs:subClassOf bsfs:Node . + ''') + + def test_str(self): + self.assertEqual(str(Graph(self.backend, self.user)), + 'Graph(SparqlStore(uri=None), http://example.com/me)') + self.assertEqual(repr(Graph(self.backend, self.user)), + 'Graph(backend=SparqlStore(uri=None), user=http://example.com/me)') + # str respects backend + class Foo(SparqlStore): pass + self.assertEqual(str(Graph(Foo.Open(), self.user)), + 'Graph(Foo(uri=None), http://example.com/me)') + self.assertEqual(repr(Graph(Foo.Open(), self.user)), + 'Graph(backend=Foo(uri=None), user=http://example.com/me)') + # str respect user + self.assertEqual(str(Graph(self.backend, URI('http://example.com/you'))), + 'Graph(SparqlStore(uri=None), http://example.com/you)') + self.assertEqual(repr(Graph(self.backend, URI('http://example.com/you'))), + 'Graph(backend=SparqlStore(uri=None), user=http://example.com/you)') + # str respects type + class Bar(Graph): pass + self.assertEqual(str(Bar(self.backend, self.user)), + 'Bar(SparqlStore(uri=None), http://example.com/me)') + self.assertEqual(repr(Bar(self.backend, self.user)), + 'Bar(backend=SparqlStore(uri=None), user=http://example.com/me)') + + def test_equality(self): + graph = Graph(self.backend, self.user) + # instance is equal to itself + self.assertEqual(graph, graph) + self.assertEqual(hash(graph), hash(graph)) + # instance is equal to a clone + self.assertEqual(graph, Graph(self.backend, self.user)) + self.assertEqual(hash(graph), hash(Graph(self.backend, self.user))) + # equality respects backend + self.assertNotEqual(graph, Graph(SparqlStore.Open(), self.user)) + self.assertNotEqual(hash(graph), hash(Graph(SparqlStore.Open(), self.user))) + # equality respects user + self.assertNotEqual(graph, Graph(self.backend, URI('http://example.com/you'))) + self.assertNotEqual(hash(graph), hash(Graph(self.backend, URI('http://example.com/you')))) + + def test_essentials(self): + graph = Graph(self.backend, self.user) + # schema + self.assertEqual(graph.schema, self.backend.schema) + self.assertRaises(AttributeError, setattr, graph, 'schema', None) + + def test_node(self): + graph = Graph(self.backend, self.user) + guid = URI('http://example.com/me/entity#1234') + # returns a Nodes instance + self.assertEqual( + graph.node(ns.bsfs.Entity, guid), + Nodes(self.backend, self.user, graph.schema.node(ns.bsfs.Entity), {guid})) + # node_type must be in the schema + self.assertRaises(KeyError, graph.node, ns.bsfs.Invalid, guid) + + def test_nodes(self): + graph = Graph(self.backend, self.user) + guids = {URI('http://example.com/me/entity#1234'), URI('http://example.com/me/entity#4321')} + # returns a Nodes instance + self.assertEqual( + graph.nodes(ns.bsfs.Entity, guids), + Nodes(self.backend, self.user, graph.schema.node(ns.bsfs.Entity), guids)) + # node_type must be in the schema + self.assertRaises(KeyError, graph.nodes, ns.bsfs.Invalid, guids) + + def test_migrate(self): + # setup + graph = Graph(self.backend, self.user) + + # argument must be a schema + class Foo(): pass + self.assertRaises(TypeError, graph.migrate, 'hello world') + self.assertRaises(TypeError, graph.migrate, 1234) + self.assertRaises(TypeError, graph.migrate, Foo()) + + # cannot append inconsistent schema + self.assertRaises(errors.ConsistencyError, graph.migrate, schema.Schema({}, { + schema.Node(ns.bsfs.Entity, + schema.Node(ns.bsfs.Intermediate, + schema.Node(ns.bsfs.Node, None)))}), append=True) + + # cannot migrate to inconsistent schema + self.assertRaises(errors.ConsistencyError, graph.migrate, schema.Schema({}, { + schema.Node(ns.bsfs.Entity, + schema.Node(ns.bsfs.Intermediate, + schema.Node(ns.bsfs.Node, None)))}), append=False) + + # can migrate to compatible schema + target_1 = schema.Schema.from_string(''' + prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#> + prefix xsd: <http://www.w3.org/2001/XMLSchema#> + prefix bsfs: <http://bsfs.ai/schema/> + prefix bse: <http://bsfs.ai/schema/Entity#> + bsfs:Entity rdfs:subClassOf bsfs:Node . + xsd:string rdfs:subClassOf bsfs:Literal . + xsd:integer rdfs:subClassOf bsfs:Literal . + + bse:filename rdfs:subClassOf bsfs:Predicate ; + rdfs:domain bsfs:Entity ; + rdfs:range xsd:string ; + bsfs:unique "false"^^xsd:boolean . + + bse:filesize rdfs:subClassOf bsfs:Predicate ; + rdfs:domain bsfs:Entity ; + rdfs:range xsd:integer; + bsfs:unique "false"^^xsd:boolean . + + ''') + graph.migrate(target_1) + # new schema is applied + self.assertLess(target_1, graph.schema) + # graph appends its predicates + self.assertEqual(graph.schema, target_1 + schema.Schema.from_string(''' + prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#> + prefix xsd: <http://www.w3.org/2001/XMLSchema#> + prefix bsfs: <http://bsfs.ai/schema/> + prefix bsm: <http://bsfs.ai/schema/Meta#> + xsd:integer rdfs:subClassOf bsfs:Literal . + bsm:t_created rdfs:subClassOf bsfs:Predicate ; + rdfs:domain bsfs:Node ; + rdfs:range xsd:integer ; + bsfs:unique "true"^^xsd:boolean . + ''')) + + # can overwrite the current schema + target_2 = schema.Schema.from_string(''' + prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#> + prefix xsd: <http://www.w3.org/2001/XMLSchema#> + prefix bsfs: <http://bsfs.ai/schema/> + prefix bse: <http://bsfs.ai/schema/Entity#> + bsfs:Entity rdfs:subClassOf bsfs:Node . + xsd:string rdfs:subClassOf bsfs:Literal . + xsd:integer rdfs:subClassOf bsfs:Literal . + + bse:filename rdfs:subClassOf bsfs:Predicate ; + rdfs:domain bsfs:Entity ; + rdfs:range xsd:string ; + bsfs:unique "false"^^xsd:boolean . + + bse:author rdfs:subClassOf bsfs:Predicate ; + rdfs:domain bsfs:Entity ; + rdfs:range xsd:string ; + bsfs:unique "true"^^xsd:boolean . + + ''') + graph.migrate(target_2, append=False) + # append overwrites existing predicates + self.assertFalse(target_1 <= graph.schema) + # new schema is applied + self.assertLess(target_2, graph.schema) + # graph appends its predicates + self.assertEqual(graph.schema, target_2 + schema.Schema.from_string(''' + prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#> + prefix xsd: <http://www.w3.org/2001/XMLSchema#> + prefix bsfs: <http://bsfs.ai/schema/> + prefix bsm: <http://bsfs.ai/schema/Meta#> + xsd:integer rdfs:subClassOf bsfs:Literal . + bsm:t_created rdfs:subClassOf bsfs:Predicate ; + rdfs:domain bsfs:Node ; + rdfs:range xsd:integer ; + bsfs:unique "true"^^xsd:boolean . + ''')) + + +## main ## + +if __name__ == '__main__': + unittest.main() + +## EOF ## diff --git a/test/graph/test_nodes.py b/test/graph/test_nodes.py new file mode 100644 index 0000000..43e7f6f --- /dev/null +++ b/test/graph/test_nodes.py @@ -0,0 +1,361 @@ +""" + +Part of the bsfs test suite. +A copy of the license is provided with the project. +Author: Matthias Baumgartner, 2022 +""" +# imports +import rdflib +import unittest + +# bsie imports +from bsfs import schema as _schema +from bsfs.namespace import ns +from bsfs.triple_store.sparql import SparqlStore +from bsfs.utils import errors, URI + +# objects to test +from bsfs.graph.nodes import Nodes + + +## code ## + +class TestNodes(unittest.TestCase): + def setUp(self): + # initialize backend + self.backend = SparqlStore() + self.backend.schema = _schema.Schema.from_string(''' + prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#> + prefix xsd: <http://www.w3.org/2001/XMLSchema#> + + prefix bsfs: <http://bsfs.ai/schema/> + prefix bsm: <http://bsfs.ai/schema/Meta#> + prefix bse: <http://bsfs.ai/schema/Entity#> + prefix bst: <http://bsfs.ai/schema/Tag#> + + bsfs:Entity rdfs:subClassOf bsfs:Node . + bsfs:Tag rdfs:subClassOf bsfs:Node . + bsfs:User rdfs:subClassOf bsfs:Node . + xsd:string rdfs:subClassOf bsfs:Literal . + xsd:integer rdfs:subClassOf bsfs:Literal . + + # predicates mandated by Nodes + bsm:t_created rdfs:subClassOf bsfs:Predicate ; + rdfs:domain bsfs:Node ; + rdfs:range xsd:integer ; + bsfs:unique "true"^^xsd:boolean . + + # additionally defined predicates + bse:comment rdfs:subClassOf bsfs:Predicate ; + rdfs:domain bsfs:Entity ; + rdfs:range xsd:string ; + bsfs:unique "false"^^xsd:boolean . + + bse:filesize rdfs:subClassOf bsfs:Predicate ; + rdfs:domain bsfs:Entity ; + rdfs:range xsd:integer ; + bsfs:unique "true"^^xsd:boolean . + + bse:tag rdfs:subClassOf bsfs:Predicate ; + rdfs:domain bsfs:Entity ; + rdfs:range bsfs:Tag ; + bsfs:unique "false"^^xsd:boolean . + + bse:author rdfs:subClassOf bsfs:Predicate ; + rdfs:domain bsfs:Entity ; + rdfs:range bsfs:User ; + bsfs:unique "true"^^xsd:boolean . + + bst:representative rdfs:subClassOf bsfs:Predicate ; + rdfs:domain bsfs:Tag ; + rdfs:range bsfs:Entity ; + bsfs:unique "true"^^xsd:boolean . + + ''') + # Nodes constructor args + self.user = URI('http://example.com/me') + # set args + self.tag_type = self.backend.schema.node(ns.bsfs.Tag) + self.ent_type = self.backend.schema.node(ns.bsfs.Entity) + self.user_type = self.backend.schema.node(ns.bsfs.User) + self.p_filesize = self.backend.schema.predicate(ns.bse.filesize) + self.p_author = self.backend.schema.predicate(ns.bse.author) + self.p_tag = self.backend.schema.predicate(ns.bse.tag) + self.p_representative = self.backend.schema.predicate(URI('http://bsfs.ai/schema/Tag#representative')) + self.t_created = self.backend.schema.predicate(ns.bsm.t_created) + self.ent_ids = { + URI('http://example.com/me/entity#1234'), + URI('http://example.com/me/entity#4321'), + } + self.tag_ids = { + URI('http://example.com/me/tag#1234'), + URI('http://example.com/me/tag#4321'), + } + + def test_str(self): + # str baseline + nodes = Nodes(self.backend, self.user, self.ent_type, self.ent_ids) + self.assertEqual(str(nodes), f'Nodes({self.ent_type}, {self.ent_ids})') + self.assertEqual(repr(nodes), f'Nodes({self.backend}, {self.user}, {self.ent_type}, {self.ent_ids})') + # str respects node_type + nodes = Nodes(self.backend, self.user, self.tag_type, self.tag_ids) + self.assertEqual(str(nodes), f'Nodes({self.tag_type}, {self.tag_ids})') + self.assertEqual(repr(nodes), f'Nodes({self.backend}, {self.user}, {self.tag_type}, {self.tag_ids})') + # str respects guids + nodes = Nodes(self.backend, self.user, self.ent_type, {URI('http://example.com/me/entity#foo')}) + self.assertEqual(str(nodes), f'Nodes({self.ent_type}, {{\'http://example.com/me/entity#foo\'}})') + self.assertEqual(repr(nodes), f'Nodes({self.backend}, {self.user}, {self.ent_type}, {{\'http://example.com/me/entity#foo\'}})') + # repr respects backend + class Foo(SparqlStore): pass + backend = Foo.Open() + backend.schema = self.backend.schema + nodes = Nodes(backend, self.user, self.ent_type, self.ent_ids) + self.assertEqual(repr(nodes), f'Nodes({backend}, {self.user}, {self.ent_type}, {self.ent_ids})') + # repr respects user + nodes = Nodes(self.backend, URI('http://example.com/you'), self.ent_type, self.ent_ids) + self.assertEqual(repr(nodes), f'Nodes({self.backend}, http://example.com/you, {self.ent_type}, {self.ent_ids})') + + def test_equality(self): + nodes = Nodes(self.backend, self.user, self.ent_type, self.ent_ids) + # instance is equal to itself + self.assertEqual(nodes, nodes) + self.assertEqual(hash(nodes), hash(nodes)) + # instance is equal to a clone + self.assertEqual(nodes, Nodes(self.backend, self.user, self.ent_type, self.ent_ids)) + self.assertEqual(Nodes(self.backend, self.user, self.ent_type, self.ent_ids), nodes) + self.assertEqual(hash(nodes), hash(Nodes(self.backend, self.user, self.ent_type, self.ent_ids))) + # equality respects backend + backend = SparqlStore.Open() + backend.schema = self.backend.schema + self.assertNotEqual(nodes, Nodes(backend, self.user, self.ent_type, self.ent_ids)) + self.assertNotEqual(hash(nodes), hash(Nodes(backend, self.user, self.ent_type, self.ent_ids))) + # equality respects user + self.assertNotEqual(nodes, Nodes(self.backend, URI('http://example.com/you'), self.ent_type, self.ent_ids)) + self.assertNotEqual(hash(nodes), hash(Nodes(self.backend, URI('http://example.com/you'), self.ent_type, self.ent_ids))) + # equality respects node_type + self.assertNotEqual(nodes, Nodes(self.backend, self.user, self.tag_type, self.ent_ids)) + self.assertNotEqual(hash(nodes), hash(Nodes(self.backend, self.user, self.tag_type, self.ent_ids))) + # equality respects guids + self.assertNotEqual(nodes, Nodes(self.backend, self.user, self.ent_type, self.tag_ids)) + self.assertNotEqual(hash(nodes), hash(Nodes(self.backend, self.user, self.ent_type, self.tag_ids))) + + def test_properties(self): + # node_type + self.assertEqual(self.ent_type, Nodes( + self.backend, self.user, self.ent_type, self.ent_ids).node_type) + self.assertEqual(self.tag_type, Nodes( + self.backend, self.user, self.tag_type, self.tag_ids).node_type) + # guids + self.assertSetEqual(self.ent_ids, set(Nodes( + self.backend, self.user, self.ent_type, self.ent_ids).guids)) + self.assertSetEqual(self.tag_ids, set(Nodes( + self.backend, self.user, self.tag_type, self.tag_ids).guids)) + + def test__ensure_nodes(self): + nodes = Nodes(self.backend, self.user, self.ent_type, self.ent_ids) + + # missing nodes are created + self.assertSetEqual(self.ent_ids, nodes._ensure_nodes(self.ent_type, self.ent_ids)) + # get creation time from backend manually + time_triples = list(self.backend._graph.objects(rdflib.URIRef('http://example.com/me/entity#1234'), rdflib.URIRef(self.t_created.uri))) + t_ent_created = float(time_triples[0]) if len(time_triples) > 0 else 0.0 + # check triples + self.assertSetEqual(set(self.backend._graph), { + # entity definitions + (rdflib.URIRef('http://example.com/me/entity#1234'), rdflib.RDF.type, rdflib.URIRef('http://bsfs.ai/schema/Entity')), + (rdflib.URIRef('http://example.com/me/entity#4321'), rdflib.RDF.type, rdflib.URIRef('http://bsfs.ai/schema/Entity')), + # bookkeeping + (rdflib.URIRef('http://example.com/me/entity#1234'), rdflib.URIRef(self.t_created.uri), rdflib.Literal(t_ent_created, datatype=rdflib.XSD.integer)), + (rdflib.URIRef('http://example.com/me/entity#4321'), rdflib.URIRef(self.t_created.uri), rdflib.Literal(t_ent_created, datatype=rdflib.XSD.integer)), + }) + + # existing nodes remain unchanged + self.assertSetEqual(self.ent_ids, nodes._ensure_nodes(self.ent_type, self.ent_ids)) + self.assertSetEqual(set(self.backend._graph), { + # entity definitions + (rdflib.URIRef('http://example.com/me/entity#1234'), rdflib.RDF.type, rdflib.URIRef('http://bsfs.ai/schema/Entity')), + (rdflib.URIRef('http://example.com/me/entity#4321'), rdflib.RDF.type, rdflib.URIRef('http://bsfs.ai/schema/Entity')), + # bookkeeping + (rdflib.URIRef('http://example.com/me/entity#1234'), rdflib.URIRef(self.t_created.uri), rdflib.Literal(t_ent_created, datatype=rdflib.XSD.integer)), + (rdflib.URIRef('http://example.com/me/entity#4321'), rdflib.URIRef(self.t_created.uri), rdflib.Literal(t_ent_created, datatype=rdflib.XSD.integer)), + }) + + # type and guids don't need to match the node instance's members + self.assertSetEqual(self.tag_ids, nodes._ensure_nodes(self.tag_type, self.tag_ids)) + # get creation time from backend manually + time_triples = list(self.backend._graph.objects(rdflib.URIRef('http://example.com/me/tag#1234'), rdflib.URIRef(self.t_created.uri))) + t_tag_created = float(time_triples[0]) if len(time_triples) > 0 else 0.0 + # check triples + self.assertSetEqual(set(self.backend._graph), { + # previous triples + (rdflib.URIRef('http://example.com/me/entity#1234'), rdflib.RDF.type, rdflib.URIRef('http://bsfs.ai/schema/Entity')), + (rdflib.URIRef('http://example.com/me/entity#4321'), rdflib.RDF.type, rdflib.URIRef('http://bsfs.ai/schema/Entity')), + (rdflib.URIRef('http://example.com/me/entity#1234'), rdflib.URIRef(self.t_created.uri), rdflib.Literal(t_ent_created, datatype=rdflib.XSD.integer)), + (rdflib.URIRef('http://example.com/me/entity#4321'), rdflib.URIRef(self.t_created.uri), rdflib.Literal(t_ent_created, datatype=rdflib.XSD.integer)), + # new triples + (rdflib.URIRef('http://example.com/me/tag#1234'), rdflib.RDF.type, rdflib.URIRef('http://bsfs.ai/schema/Tag')), + (rdflib.URIRef('http://example.com/me/tag#4321'), rdflib.RDF.type, rdflib.URIRef('http://bsfs.ai/schema/Tag')), + (rdflib.URIRef('http://example.com/me/tag#1234'), rdflib.URIRef(self.t_created.uri), rdflib.Literal(t_tag_created, datatype=rdflib.XSD.integer)), + (rdflib.URIRef('http://example.com/me/tag#4321'), rdflib.URIRef(self.t_created.uri), rdflib.Literal(t_tag_created, datatype=rdflib.XSD.integer)), + }) + + def test___set(self): + # setup + nodes = Nodes(self.backend, self.user, self.ent_type, {URI('http://example.com/me/entity#1234'), URI('http://example.com/me/entity#4321')}) + self.assertSetEqual(set(self.backend._graph), set()) + set_ = nodes._Nodes__set + + # node_type must match predicate's domain + self.assertRaises(errors.ConsistencyError, set_, self.p_representative.uri, self.ent_ids) + + # cannot set protected predicates + self.assertRaises(errors.PermissionDeniedError, set_, self.t_created.uri, 1234) + + # set literal value + set_(self.p_filesize.uri, 1234) + # get creation time from backend manually + time_triples = list(self.backend._graph.objects(rdflib.URIRef('http://example.com/me/entity#1234'), rdflib.URIRef(self.t_created.uri))) + t_ent_created = float(time_triples[0]) if len(time_triples) > 0 else 0.0 + # verify triples + self.assertSetEqual(set(self.backend._graph), { + # entity definitions + (rdflib.URIRef('http://example.com/me/entity#1234'), rdflib.RDF.type, rdflib.URIRef('http://bsfs.ai/schema/Entity')), + (rdflib.URIRef('http://example.com/me/entity#4321'), rdflib.RDF.type, rdflib.URIRef('http://bsfs.ai/schema/Entity')), + # bookkeeping + (rdflib.URIRef('http://example.com/me/entity#1234'), rdflib.URIRef(self.t_created.uri), rdflib.Literal(t_ent_created, datatype=rdflib.XSD.integer)), + (rdflib.URIRef('http://example.com/me/entity#4321'), rdflib.URIRef(self.t_created.uri), rdflib.Literal(t_ent_created, datatype=rdflib.XSD.integer)), + # literals + (rdflib.URIRef('http://example.com/me/entity#1234'), rdflib.URIRef(self.p_filesize.uri), rdflib.Literal(1234, datatype=rdflib.XSD.integer)), + (rdflib.URIRef('http://example.com/me/entity#4321'), rdflib.URIRef(self.p_filesize.uri), rdflib.Literal(1234, datatype=rdflib.XSD.integer)), + }) + + # set node value + tags = Nodes(self.backend, self.user, self.tag_type, {URI('http://example.com/me/tag#1234'), URI('http://example.com/me/tag#4321')}) + set_(self.p_tag.uri, tags) + # get creation time from backend manually + time_triples = list(self.backend._graph.objects(rdflib.URIRef('http://example.com/me/tag#1234'), rdflib.URIRef(self.t_created.uri))) + t_tag_created = float(time_triples[0]) if len(time_triples) > 0 else 0.0 + # verify triples + self.assertSetEqual(set(self.backend._graph), { + # previous values + (rdflib.URIRef('http://example.com/me/entity#1234'), rdflib.RDF.type, rdflib.URIRef('http://bsfs.ai/schema/Entity')), + (rdflib.URIRef('http://example.com/me/entity#4321'), rdflib.RDF.type, rdflib.URIRef('http://bsfs.ai/schema/Entity')), + (rdflib.URIRef('http://example.com/me/entity#1234'), rdflib.URIRef(self.p_filesize.uri), rdflib.Literal(1234, datatype=rdflib.XSD.integer)), + (rdflib.URIRef('http://example.com/me/entity#4321'), rdflib.URIRef(self.p_filesize.uri), rdflib.Literal(1234, datatype=rdflib.XSD.integer)), + (rdflib.URIRef('http://example.com/me/entity#1234'), rdflib.URIRef(self.t_created.uri), rdflib.Literal(t_ent_created, datatype=rdflib.XSD.integer)), + (rdflib.URIRef('http://example.com/me/entity#4321'), rdflib.URIRef(self.t_created.uri), rdflib.Literal(t_ent_created, datatype=rdflib.XSD.integer)), + # tag definitions + (rdflib.URIRef('http://example.com/me/tag#1234'), rdflib.RDF.type, rdflib.URIRef('http://bsfs.ai/schema/Tag')), + (rdflib.URIRef('http://example.com/me/tag#4321'), rdflib.RDF.type, rdflib.URIRef('http://bsfs.ai/schema/Tag')), + # tag bookkeeping + (rdflib.URIRef('http://example.com/me/tag#1234'), rdflib.URIRef(self.t_created.uri), rdflib.Literal(t_tag_created, datatype=rdflib.XSD.integer)), + (rdflib.URIRef('http://example.com/me/tag#4321'), rdflib.URIRef(self.t_created.uri), rdflib.Literal(t_tag_created, datatype=rdflib.XSD.integer)), + # entity -> tag links + (rdflib.URIRef('http://example.com/me/entity#1234'), rdflib.URIRef(self.p_tag.uri), rdflib.URIRef('http://example.com/me/tag#1234')), + (rdflib.URIRef('http://example.com/me/entity#4321'), rdflib.URIRef(self.p_tag.uri), rdflib.URIRef('http://example.com/me/tag#1234')), + (rdflib.URIRef('http://example.com/me/entity#1234'), rdflib.URIRef(self.p_tag.uri), rdflib.URIRef('http://example.com/me/tag#4321')), + (rdflib.URIRef('http://example.com/me/entity#4321'), rdflib.URIRef(self.p_tag.uri), rdflib.URIRef('http://example.com/me/tag#4321')), + }) + # value must be a nodes instance + self.assertRaises(TypeError, set_, self.p_tag.uri, 'foobar') + self.assertRaises(TypeError, set_, self.p_tag.uri, self.tag_ids) + self.assertRaises(TypeError, set_, self.p_tag.uri, URI('http://example.com/me/tag#1234')) + # value's node_type must match the predicate's range + self.assertRaises(errors.ConsistencyError, set_, self.p_tag.uri, + Nodes(self.backend, self.user, self.ent_type, self.ent_ids)) + + def test_set(self): + self.assertSetEqual(set(self.backend._graph), set()) + nodes = Nodes(self.backend, self.user, self.ent_type, self.ent_ids) + # can set literal values + self.assertEqual(nodes, nodes.set(self.p_filesize.uri, 1234)) + self.assertTrue(set(self.backend._graph).issuperset({ + # nodes exist + (rdflib.URIRef('http://example.com/me/entity#1234'), rdflib.RDF.type, rdflib.URIRef('http://bsfs.ai/schema/Entity')), + (rdflib.URIRef('http://example.com/me/entity#4321'), rdflib.RDF.type, rdflib.URIRef('http://bsfs.ai/schema/Entity')), + # links exist + (rdflib.URIRef('http://example.com/me/entity#1234'), rdflib.URIRef(self.p_filesize.uri), rdflib.Literal(1234, datatype=rdflib.XSD.integer)), + (rdflib.URIRef('http://example.com/me/entity#4321'), rdflib.URIRef(self.p_filesize.uri), rdflib.Literal(1234, datatype=rdflib.XSD.integer)), + })) + # can set node values + self.assertEqual(nodes, nodes.set(self.p_tag.uri, Nodes(self.backend, self.user, self.tag_type, self.tag_ids))) + self.assertTrue(set(self.backend._graph).issuperset({ + # nodes exist + (rdflib.URIRef('http://example.com/me/entity#1234'), rdflib.RDF.type, rdflib.URIRef('http://bsfs.ai/schema/Entity')), + (rdflib.URIRef('http://example.com/me/entity#4321'), rdflib.RDF.type, rdflib.URIRef('http://bsfs.ai/schema/Entity')), + (rdflib.URIRef('http://example.com/me/tag#1234'), rdflib.RDF.type, rdflib.URIRef('http://bsfs.ai/schema/Tag')), + (rdflib.URIRef('http://example.com/me/tag#4321'), rdflib.RDF.type, rdflib.URIRef('http://bsfs.ai/schema/Tag')), + # links exist + (rdflib.URIRef('http://example.com/me/entity#1234'), rdflib.URIRef(self.p_tag.uri), rdflib.URIRef('http://example.com/me/tag#1234')), + (rdflib.URIRef('http://example.com/me/entity#1234'), rdflib.URIRef(self.p_tag.uri), rdflib.URIRef('http://example.com/me/tag#4321')), + (rdflib.URIRef('http://example.com/me/entity#4321'), rdflib.URIRef(self.p_tag.uri), rdflib.URIRef('http://example.com/me/tag#1234')), + (rdflib.URIRef('http://example.com/me/entity#4321'), rdflib.URIRef(self.p_tag.uri), rdflib.URIRef('http://example.com/me/tag#4321')), + })) + + # cannot set protected predicate + curr = set(self.backend._graph) + self.assertRaises(errors.PermissionDeniedError, nodes.set, self.t_created.uri, 12345) + self.assertSetEqual(curr, set(self.backend._graph)) + # predicate.domain must match node_type + self.assertRaises(errors.ConsistencyError, nodes.set, self.p_representative.uri, nodes) + self.assertSetEqual(curr, set(self.backend._graph)) + # value's node_type must match predicate's range + self.assertRaises(errors.ConsistencyError, nodes.set, self.p_tag.uri, nodes) + self.assertSetEqual(curr, set(self.backend._graph)) + # value type must match predicate's range type + self.assertRaises(TypeError, nodes.set, self.p_tag.uri, 'invalid') + self.assertSetEqual(curr, set(self.backend._graph)) + # cannot assing multiple values to unique predicate + self.assertRaises(ValueError, nodes.set, self.p_author.uri, + Nodes(self.backend, self.user, self.user_type, {URI('http://example.com/me/user#1234'), URI('http://example.com/me/user#4321')})) + self.assertSetEqual(curr, set(self.backend._graph)) + + + def test_set_from_iterable(self): + self.assertSetEqual(set(self.backend._graph), set()) + nodes = Nodes(self.backend, self.user, self.ent_type, self.ent_ids) + # can set literal and node values simultaneously + self.assertEqual(nodes, nodes.set_from_iterable({ + self.p_filesize.uri: 1234, + self.p_tag.uri: Nodes(self.backend, self.user, self.tag_type, self.tag_ids), + }.items())) + self.assertTrue(set(self.backend._graph).issuperset({ + # nodes exist + (rdflib.URIRef('http://example.com/me/entity#1234'), rdflib.RDF.type, rdflib.URIRef('http://bsfs.ai/schema/Entity')), + (rdflib.URIRef('http://example.com/me/entity#4321'), rdflib.RDF.type, rdflib.URIRef('http://bsfs.ai/schema/Entity')), + (rdflib.URIRef('http://example.com/me/tag#1234'), rdflib.RDF.type, rdflib.URIRef('http://bsfs.ai/schema/Tag')), + (rdflib.URIRef('http://example.com/me/tag#4321'), rdflib.RDF.type, rdflib.URIRef('http://bsfs.ai/schema/Tag')), + # links exist + (rdflib.URIRef('http://example.com/me/entity#1234'), rdflib.URIRef(self.p_filesize.uri), rdflib.Literal(1234, datatype=rdflib.XSD.integer)), + (rdflib.URIRef('http://example.com/me/entity#4321'), rdflib.URIRef(self.p_filesize.uri), rdflib.Literal(1234, datatype=rdflib.XSD.integer)), + (rdflib.URIRef('http://example.com/me/entity#1234'), rdflib.URIRef(self.p_tag.uri), rdflib.URIRef('http://example.com/me/tag#1234')), + (rdflib.URIRef('http://example.com/me/entity#1234'), rdflib.URIRef(self.p_tag.uri), rdflib.URIRef('http://example.com/me/tag#4321')), + (rdflib.URIRef('http://example.com/me/entity#4321'), rdflib.URIRef(self.p_tag.uri), rdflib.URIRef('http://example.com/me/tag#1234')), + (rdflib.URIRef('http://example.com/me/entity#4321'), rdflib.URIRef(self.p_tag.uri), rdflib.URIRef('http://example.com/me/tag#4321')), + })) + + # cannot set protected predicate + curr = set(self.backend._graph) + self.assertRaises(errors.PermissionDeniedError, nodes.set_from_iterable, ((self.p_filesize.uri, 1234), (self.t_created.uri, 12345))) + self.assertSetEqual(curr, set(self.backend._graph)) + # predicate.domain must match node_type + self.assertRaises(errors.ConsistencyError, nodes.set_from_iterable, ((self.p_filesize.uri, 1234), (self.p_representative.uri, nodes))) + self.assertSetEqual(curr, set(self.backend._graph)) + # value's node_type must match predicate's range + self.assertRaises(errors.ConsistencyError, nodes.set_from_iterable, ((self.p_filesize.uri, 1234), (self.p_tag.uri, nodes))) + self.assertSetEqual(curr, set(self.backend._graph)) + # value type must match predicate's range type + self.assertRaises(TypeError, nodes.set_from_iterable, ((self.p_filesize.uri, 1234), (self.p_tag.uri, 'invalid'))) + self.assertSetEqual(curr, set(self.backend._graph)) + # cannot assing multiple values to unique predicate + self.assertRaises(ValueError, nodes.set_from_iterable, ((self.p_filesize.uri, 1234), + (self.p_author.uri, Nodes(self.backend, self.user, self.user_type, {URI('http://example.com/me/user#1234'), URI('http://example.com/me/user#4321')})))) + self.assertSetEqual(curr, set(self.backend._graph)) + + +## main ## + +if __name__ == '__main__': + unittest.main() + +## EOF ## diff --git a/test/namespace/__init__.py b/test/namespace/__init__.py new file mode 100644 index 0000000..e69de29 --- /dev/null +++ b/test/namespace/__init__.py diff --git a/test/namespace/test_namespace.py b/test/namespace/test_namespace.py new file mode 100644 index 0000000..f109653 --- /dev/null +++ b/test/namespace/test_namespace.py @@ -0,0 +1,132 @@ +""" + +Part of the tagit test suite. +A copy of the license is provided with the project. +Author: Matthias Baumgartner, 2022 +""" +# imports +import operator +import unittest + +# bsfs imports +from bsfs.utils import URI + +# objects to test +from bsfs.namespace.namespace import Namespace, ClosedNamespace + + +## code ## + +class TestNamespace(unittest.TestCase): + def test_essentials(self): + # string conversion + self.assertEqual(str(Namespace('http://example.org/')), 'Namespace(http://example.org)') + self.assertEqual(str(Namespace('http://example.org#')), 'Namespace(http://example.org)') + self.assertEqual(repr(Namespace('http://example.org/')), 'Namespace(http://example.org, #, /)') + self.assertEqual(repr(Namespace('http://example.org#')), 'Namespace(http://example.org, #, /)') + self.assertEqual(repr(Namespace('http://example.org', fsep='.')), 'Namespace(http://example.org, ., /)') + self.assertEqual(repr(Namespace('http://example.org', psep='.')), 'Namespace(http://example.org, #, .)') + # repeated separators are truncated + self.assertEqual(str(Namespace('http://example.org////')), 'Namespace(http://example.org)') + self.assertEqual(str(Namespace('http://example.org####')), 'Namespace(http://example.org)') + self.assertEqual(repr(Namespace('http://example.org///##')), 'Namespace(http://example.org, #, /)') + # comparison + class Foo(Namespace): pass + self.assertEqual(Namespace('http://example.org/'), Namespace('http://example.org/')) + self.assertEqual(Namespace('http://example.org/'), Namespace('http://example.org')) + self.assertEqual(Namespace('http://example.org/'), Namespace('http://example.org#')) + self.assertNotEqual(Namespace('http://example.org'), Namespace('http://example.org', fsep='.')) + self.assertNotEqual(Namespace('http://example.org'), Namespace('http://example.org', psep='.')) + self.assertNotEqual(Namespace('http://example.org/'), Foo('http://example.org/')) + self.assertNotEqual(Foo('http://example.org/'), Namespace('http://example.org/')) + # hashing + self.assertEqual(hash(Namespace('http://example.org/')), hash(Namespace('http://example.org/'))) + self.assertEqual(hash(Namespace('http://example.org/')), hash(Namespace('http://example.org'))) + self.assertEqual(hash(Namespace('http://example.org/')), hash(Namespace('http://example.org#'))) + self.assertNotEqual(hash(Namespace('http://example.org')), hash(Namespace('http://example.com'))) + self.assertNotEqual(hash(Namespace('http://example.org')), hash(Namespace('http://example.org', fsep='.'))) + self.assertNotEqual(hash(Namespace('http://example.org')), hash(Namespace('http://example.org', psep='.'))) + self.assertNotEqual(hash(Namespace('http://example.org/')), hash(Foo('http://example.org/'))) + self.assertNotEqual(hash(Foo('http://example.org/')), hash(Namespace('http://example.org/'))) + + def test_getattr(self): + self.assertEqual(Namespace('http://example.org/').foo, 'http://example.org#foo') + self.assertEqual(Namespace('http://example.org/').bar, 'http://example.org#bar') + self.assertEqual(Namespace('http://example.org/', fsep='/').foo, 'http://example.org/foo') + self.assertEqual(Namespace('http://example.org/', fsep='/').bar, 'http://example.org/bar') + self.assertEqual(Namespace('http://example.org', fsep='/').foo, 'http://example.org/foo') + self.assertEqual(Namespace('http://example.org', fsep='/').bar, 'http://example.org/bar') + self.assertEqual(Namespace('http://example.org#', fsep='/').foo, 'http://example.org#/foo') + self.assertEqual(Namespace('http://example.org#', fsep='/').bar, 'http://example.org#/bar') + self.assertEqual(Namespace('http://example.org/me#').foo, 'http://example.org/me#foo') + self.assertEqual(Namespace('http://example.org/me#').bar, 'http://example.org/me#bar') + + def test_getitem(self): + self.assertEqual(Namespace('http://example.org')['foo'], 'http://example.org#foo') + self.assertEqual(Namespace('http://example.org')['bar'], 'http://example.org#bar') + self.assertEqual(Namespace('http://example.org', fsep='/')['foo'], 'http://example.org/foo') + self.assertEqual(Namespace('http://example.org', fsep='/')['bar'], 'http://example.org/bar') + self.assertEqual(Namespace('http://example.org/me#')['foo'], 'http://example.org/me#foo') + self.assertEqual(Namespace('http://example.org/me#')['bar'], 'http://example.org/me#bar') + + def test_add(self): + self.assertEqual(Namespace('http://example.org') + 'foo', Namespace('http://example.org/foo')) + self.assertEqual(Namespace('http://example.org', psep='.') + 'foo', Namespace('http://example.org.foo', psep='.')) + self.assertEqual(Namespace('http://example.org') + 'foo' + 'bar', Namespace('http://example.org/foo/bar')) + # can add URIs + self.assertEqual(Namespace('http://example.org') + URI('foo'), Namespace('http://example.org/foo')) + # can only add strings + self.assertRaises(TypeError, operator.add, Namespace('http://example.org'), 1234) + self.assertRaises(TypeError, operator.add, Namespace('http://example.org'), Namespace('http://example.com')) + + +class TestClosedNamespace(unittest.TestCase): + def test_essentials(self): + # string conversion + self.assertEqual(str(ClosedNamespace('http://example.org/')), 'ClosedNamespace(http://example.org)') + self.assertEqual(str(ClosedNamespace('http://example.org#')), 'ClosedNamespace(http://example.org)') + self.assertEqual(repr(ClosedNamespace('http://example.org/')), 'ClosedNamespace(http://example.org, #, /)') + self.assertEqual(repr(ClosedNamespace('http://example.org#')), 'ClosedNamespace(http://example.org, #, /)') + self.assertEqual(repr(ClosedNamespace('http://example.org', fsep='.')), 'ClosedNamespace(http://example.org, ., /)') + self.assertEqual(repr(ClosedNamespace('http://example.org', psep='.')), 'ClosedNamespace(http://example.org, #, .)') + # comparison + class Foo(ClosedNamespace): pass + self.assertEqual(ClosedNamespace('http://example.org'), ClosedNamespace('http://example.org#')) + self.assertEqual(ClosedNamespace('http://example.org'), ClosedNamespace('http://example.org')) + self.assertEqual(ClosedNamespace('http://example.org'), ClosedNamespace('http://example.org/')) + self.assertEqual(ClosedNamespace('http://example.org/', 'foo', 'bar'), ClosedNamespace('http://example.org/', 'foo', 'bar')) + self.assertNotEqual(ClosedNamespace('http://example.org/', 'foo'), ClosedNamespace('http://example.org/', 'bar')) + self.assertNotEqual(ClosedNamespace('http://example.org/'), Foo('http://example.org/')) + self.assertNotEqual(Foo('http://example.org/'), ClosedNamespace('http://example.org/')) + # hashing + self.assertEqual(hash(ClosedNamespace('http://example.org')), hash(ClosedNamespace('http://example.org'))) + self.assertEqual(hash(ClosedNamespace('http://example.org')), hash(ClosedNamespace('http://example.org/'))) + self.assertEqual(hash(ClosedNamespace('http://example.org')), hash(ClosedNamespace('http://example.org#'))) + self.assertEqual(hash(ClosedNamespace('http://example.org/', 'foo', 'bar')), hash(ClosedNamespace('http://example.org/', 'foo', 'bar'))) + self.assertNotEqual(hash(ClosedNamespace('http://example.org/', 'foo')), hash(ClosedNamespace('http://example.org/', 'bar'))) + self.assertNotEqual(hash(ClosedNamespace('http://example.org/')), hash(Foo('http://example.org/'))) + self.assertNotEqual(hash(Foo('http://example.org/')), hash(ClosedNamespace('http://example.org/'))) + + def test_getattr(self): + self.assertEqual(ClosedNamespace('http://example.org/', 'foo', 'bar').foo, 'http://example.org#foo') + self.assertEqual(ClosedNamespace('http://example.org/', 'bar', 'bar').bar, 'http://example.org#bar') + self.assertEqual(ClosedNamespace('http://example.org/me#', 'foo', 'bar').foo, 'http://example.org/me#foo') + self.assertEqual(ClosedNamespace('http://example.org/me#', 'foo', 'bar').bar, 'http://example.org/me#bar') + self.assertRaises(KeyError, getattr, ClosedNamespace('http://example.org/', 'bar', 'bar'), 'foobar') + self.assertRaises(KeyError, getattr, ClosedNamespace('http://example.org#', 'bar', 'bar'), 'foobar') + + def test_getitem(self): + self.assertEqual(ClosedNamespace('http://example.org/', 'foo', 'bar')['foo'], 'http://example.org#foo') + self.assertEqual(ClosedNamespace('http://example.org/', 'foo', 'bar')['bar'], 'http://example.org#bar') + self.assertEqual(ClosedNamespace('http://example.org/me#', 'foo', 'bar')['foo'], 'http://example.org/me#foo') + self.assertEqual(ClosedNamespace('http://example.org/me#', 'foo', 'bar')['bar'], 'http://example.org/me#bar') + self.assertRaises(KeyError, ClosedNamespace('http://example.org/', 'bar', 'bar').__getitem__, 'foobar') + self.assertRaises(KeyError, ClosedNamespace('http://example.org#', 'bar', 'bar').__getitem__, 'foobar') + + +## main ## + +if __name__ == '__main__': + unittest.main() + +## EOF ## diff --git a/test/schema/__init__.py b/test/schema/__init__.py new file mode 100644 index 0000000..e69de29 --- /dev/null +++ b/test/schema/__init__.py diff --git a/test/schema/test_schema.py b/test/schema/test_schema.py new file mode 100644 index 0000000..888cdca --- /dev/null +++ b/test/schema/test_schema.py @@ -0,0 +1,745 @@ +""" + +Part of the tagit test suite. +A copy of the license is provided with the project. +Author: Matthias Baumgartner, 2022 +""" +# imports +import operator +import unittest + +# bsfs imports +from bsfs.namespace import ns +from bsfs.schema import types +from bsfs.utils import errors + +# objects to test +from bsfs.schema.schema import Schema + + +## code ## + +class TestSchema(unittest.TestCase): + + def setUp(self): + self.schema_str = ''' + prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#> + prefix xsd: <http://www.w3.org/2001/XMLSchema#> + + prefix bsfs: <http://bsfs.ai/schema/> + prefix bse: <http://bsfs.ai/schema/Entity#> + + bsfs:Entity rdfs:subClassOf bsfs:Node . + bsfs:Tag rdfs:subClassOf bsfs:Node . + bsfs:Image rdfs:subClassOf bsfs:Entity . + bsfs:Unused rdfs:subClassOf bsfs:Node . + + xsd:string rdfs:subClassOf bsfs:Literal . + xsd:integer rdfs:subClassOf bsfs:Literal . + xsd:boolean rdfs:subClassOf bsfs:Literal . + + bse:tag rdfs:subClassOf bsfs:Predicate ; + rdfs:domain bsfs:Entity ; + rdfs:range bsfs:Tag ; + bsfs:unique "false"^^xsd:boolean . + + bse:group rdfs:subClassOf bse:tag ; + rdfs:domain bsfs:Image ; + rdfs:range bsfs:Tag ; + bsfs:unique "false"^^xsd:boolean . + + bse:comment rdfs:subClassOf bsfs:Predicate ; + rdfs:domain bsfs:Node ; + rdfs:range xsd:string ; + bsfs:unique "true"^^xsd:boolean . + + ''' + # nodes + self.n_root = types.Node(ns.bsfs.Node, None) + self.n_ent = types.Node(ns.bsfs.Entity, types.Node(ns.bsfs.Node, None)) + self.n_img = types.Node(ns.bsfs.Image, types.Node(ns.bsfs.Entity, types.Node(ns.bsfs.Node, None))) + self.n_tag = types.Node(ns.bsfs.Tag, types.Node(ns.bsfs.Node, None)) + self.n_unused = types.Node(ns.bsfs.Unused, types.Node(ns.bsfs.Node, None)) + self.nodes = [self.n_root, self.n_ent, self.n_img, self.n_tag, self.n_unused] + + # literals + self.l_root = types.Literal(ns.bsfs.Literal, None) + self.l_string = types.Literal(ns.xsd.string, types.Literal(ns.bsfs.Literal, None)) + self.l_integer = types.Literal(ns.xsd.integer, types.Literal(ns.bsfs.Literal, None)) + self.l_unused = types.Literal(ns.xsd.boolean, types.Literal(ns.bsfs.Literal, None)) + self.literals = [self.l_root, self.l_string, self.l_integer, self.l_unused] + + # predicates + self.p_root = types.Predicate(ns.bsfs.Predicate, None, types.Node(ns.bsfs.Node, None), None, False) + self.p_tag = self.p_root.get_child(ns.bse.tag, self.n_ent, self.n_tag, False) + self.p_group = self.p_tag.get_child(ns.bse.group, self.n_img, self.n_tag, False) + self.p_comment = self.p_root.get_child(ns.bse.comment, self.n_root, self.l_string, True) + self.predicates = [self.p_root, self.p_tag, self.p_group, self.p_comment] + + def test_construction(self): + # nodes and literals are optional + schema = Schema(self.predicates) + self.assertSetEqual(set(schema.nodes()), {self.n_root, self.n_ent, self.n_img, self.n_tag}) + self.assertSetEqual(set(schema.literals()), {self.l_root, self.l_string}) + self.assertSetEqual(set(schema.predicates()), set(self.predicates)) + + # predicates, nodes, and literals are respected + schema = Schema(self.predicates, self.nodes, self.literals) + self.assertSetEqual(set(schema.nodes()), set(self.nodes)) + self.assertSetEqual(set(schema.literals()), set(self.literals)) + self.assertSetEqual(set(schema.predicates()), set(self.predicates)) + + # nodes are complete (w/o unused) + schema = Schema(self.predicates, None, self.literals) + self.assertSetEqual(set(schema.nodes()), {self.n_root, self.n_ent, self.n_img, self.n_tag}) + schema = Schema(self.predicates, [], self.literals) + self.assertSetEqual(set(schema.nodes()), {self.n_root, self.n_ent, self.n_img, self.n_tag}) + schema = Schema(self.predicates, [self.n_img, self.n_tag], self.literals) + self.assertSetEqual(set(schema.nodes()), {self.n_root, self.n_ent, self.n_img, self.n_tag}) + schema = Schema(self.predicates, [self.n_unused], self.literals) + self.assertSetEqual(set(schema.nodes()), set(self.nodes)) + + # literals are complete + schema = Schema(self.predicates, self.nodes, None) + self.assertSetEqual(set(schema.literals()), {self.l_root, self.l_string}) + schema = Schema(self.predicates, self.nodes, []) + self.assertSetEqual(set(schema.literals()), {self.l_root, self.l_string}) + schema = Schema(self.predicates, self.nodes, [self.l_string]) + self.assertSetEqual(set(schema.literals()), {self.l_root, self.l_string}) + schema = Schema(self.predicates, self.nodes, [self.l_integer]) + self.assertSetEqual(set(schema.literals()), {self.l_root, self.l_string, self.l_integer}) + schema = Schema(self.predicates, self.nodes, [self.l_integer, self.l_unused]) + self.assertSetEqual(set(schema.literals()), set(self.literals)) + + # predicates are complete + schema = Schema([], self.nodes, self.literals) + self.assertSetEqual(set(schema.predicates()), set()) + schema = Schema([self.p_group], self.nodes, self.literals) + self.assertSetEqual(set(schema.predicates()), {self.p_root, self.p_tag, self.p_group}) + schema = Schema([self.p_group, self.p_comment], self.nodes, self.literals) + self.assertSetEqual(set(schema.predicates()), set(self.predicates)) + + # node uris must be unique + self.assertRaises(errors.ConsistencyError, Schema, self.predicates, + self.nodes + [types.Node(ns.bsfs.Entity, None)], self.literals) + self.assertRaises(errors.ConsistencyError, Schema, self.predicates, + self.nodes + [types.Node(ns.bsfs.Entity, types.Node(ns.bsfs.Foo, None))], self.literals) + self.assertRaises(errors.ConsistencyError, Schema, self.predicates, + self.nodes + [types.Node(ns.bsfs.Entity, self.n_img)], self.literals) + self.assertRaises(errors.ConsistencyError, Schema, self.predicates, + [types.Node(ns.bsfs.Entity, self.n_img)], self.literals) + + # literal uris must be unique + self.assertRaises(errors.ConsistencyError, Schema, self.predicates, self.nodes, + self.literals + [types.Literal(ns.xsd.string, None)]) + self.assertRaises(errors.ConsistencyError, Schema, self.predicates, self.nodes, + self.literals + [types.Literal(ns.xsd.string, types.Literal(ns.bsfs.Foo, None))]) + self.assertRaises(errors.ConsistencyError, Schema, self.predicates, self.nodes, + self.literals + [types.Literal(ns.xsd.string, self.l_integer)]) + self.assertRaises(errors.ConsistencyError, Schema, self.predicates, self.nodes, + [types.Literal(ns.xsd.string, self.l_integer)]) + + # predicate uris must be unique + self.assertRaises(errors.ConsistencyError, Schema, + self.predicates + [types.Predicate(ns.bse.tag, self.p_root, self.n_root, self.n_tag, False)]) + self.assertRaises(errors.ConsistencyError, Schema, + self.predicates + [types.Predicate(ns.bse.tag, self.p_root, self.n_ent, self.n_img, False)]) + self.assertRaises(errors.ConsistencyError, Schema, + self.predicates + [types.Predicate(ns.bse.tag, self.p_root, self.n_ent, self.n_tag, True)]) + self.assertRaises(errors.ConsistencyError, Schema, + self.predicates + [types.Predicate(ns.bse.tag, None, self.n_ent, self.n_tag, False)]) + + # uris must be unique across nodes, literals, and predicates + self.assertRaises(errors.ConsistencyError, Schema, + {}, {types.Node(ns.bsfs.Foo, None)}, {types.Node(ns.bsfs.Foo, None)}) + self.assertRaises(errors.ConsistencyError, Schema, + {types.Predicate(ns.bsfs.Foo, None, types.Node(ns.bsfs.Node, None), None, False)}, {}, {types.Node(ns.bsfs.Foo, None)}) + self.assertRaises(errors.ConsistencyError, Schema, + {types.Predicate(ns.bsfs.Foo, None, types.Node(ns.bsfs.Node, None), None, False)}, {types.Node(ns.bsfs.Foo, None)}, {}) + self.assertRaises(errors.ConsistencyError, Schema, + {types.Predicate(ns.bsfs.Foo, None, types.Node(ns.bsfs.Node, None), None, False)}, {types.Node(ns.bsfs.Foo, None)}, {types.Node(ns.bsfs.Foo, None)}) + + def test_str(self): + self.assertEqual(str(Schema([])), 'Schema()') + self.assertEqual(str(Schema([], [], [])), 'Schema()') + self.assertEqual(str(Schema(self.predicates, self.nodes, self.literals)), 'Schema()') + self.assertEqual(repr(Schema([])), 'Schema([], [], [])') + self.assertEqual(repr(Schema([], [], [])), 'Schema([], [], [])') + n = [ns.bsfs.Entity, ns.bsfs.Image, ns.bsfs.Node, ns.bsfs.Tag, ns.bsfs.Unused] + l = [ns.bsfs.Literal, ns.xsd.boolean, ns.xsd.integer, ns.xsd.string] + p = [ns.bse.comment, ns.bse.group, ns.bse.tag, ns.bsfs.Predicate] + self.assertEqual(repr(Schema(self.predicates, self.nodes, self.literals)), f'Schema({n}, {l}, {p})') + + def test_equality(self): + schema = Schema(self.predicates, self.nodes, self.literals) + # instance is equal to itself + self.assertEqual(schema, schema) + self.assertEqual(hash(schema), hash(schema)) + # instance is equal to a clone + self.assertEqual(schema, Schema(self.predicates, self.nodes, self.literals)) + self.assertEqual(hash(schema), hash(Schema(self.predicates, self.nodes, self.literals))) + # equality respects nodes + self.assertNotEqual(schema, + Schema(self.predicates, [self.n_root, self.n_ent, self.n_img, self.n_tag], self.literals)) + self.assertNotEqual(hash(schema), + hash(Schema(self.predicates, [self.n_root, self.n_ent, self.n_img, self.n_tag], self.literals))) + self.assertNotEqual(schema, + Schema(self.predicates, self.nodes + [types.Node(ns.bsfs.Document, self.n_ent)], self.literals)) + self.assertNotEqual(hash(schema), + hash(Schema(self.predicates, self.nodes + [types.Node(ns.bsfs.Document, self.n_ent)], self.literals))) + # equality respects literals + self.assertNotEqual(schema, + Schema(self.predicates, self.nodes, [self.l_root, self.l_string, self.l_integer])) + self.assertNotEqual(hash(schema), + hash(Schema(self.predicates, self.nodes, [self.l_root, self.l_string, self.l_integer]))) + self.assertNotEqual(schema, + Schema(self.predicates, self.nodes, self.literals + [types.Literal(ns.xsd.number, self.l_root)])) + self.assertNotEqual(hash(schema), + hash(Schema(self.predicates, self.nodes, self.literals + [types.Literal(ns.xsd.number, self.l_root)]))) + # equality respects predicates + self.assertNotEqual(schema, + Schema([self.p_group, self.p_tag, self.p_root], self.nodes, self.literals)) + self.assertNotEqual(hash(schema), + hash(Schema([self.p_group, self.p_tag, self.p_root], self.nodes, self.literals))) + self.assertNotEqual(schema, + Schema(self.predicates + [self.p_root.get_child(ns.bse.filesize, self.n_ent, self.l_integer)], self.nodes, self.literals)) + self.assertNotEqual(hash(schema), + hash(Schema(self.predicates + [self.p_root.get_child(ns.bse.filesize, self.n_ent, self.l_integer)], self.nodes, self.literals))) + + def test_order(self): + # setup + class Foo(): pass + p_foo = self.p_root.get_child(ns.bse.foo, self.n_ent, self.l_string, True) + p_sub = p_foo.get_child(ns.bse.sub, self.n_ent, self.l_string, True) + p_bar = self.p_root.get_child(ns.bse.bar, self.n_ent, self.l_string, True) + + # can only compare schema to other schema + # < + self.assertRaises(TypeError, operator.lt, Schema({p_foo, p_bar}), 'hello world') + self.assertRaises(TypeError, operator.lt, Schema({p_foo, p_bar}), 1234) + self.assertRaises(TypeError, operator.lt, Schema({p_foo, p_bar}), p_foo) + self.assertRaises(TypeError, operator.lt, Schema({p_foo, p_bar}), Foo()) + # <= + self.assertRaises(TypeError, operator.le, Schema({p_foo, p_bar}), 'hello world') + self.assertRaises(TypeError, operator.le, Schema({p_foo, p_bar}), 1234) + self.assertRaises(TypeError, operator.le, Schema({p_foo, p_bar}), p_foo) + self.assertRaises(TypeError, operator.le, Schema({p_foo, p_bar}), Foo()) + # > + self.assertRaises(TypeError, operator.gt, Schema({p_foo, p_bar}), 'hello world') + self.assertRaises(TypeError, operator.gt, Schema({p_foo, p_bar}), 1234) + self.assertRaises(TypeError, operator.gt, Schema({p_foo, p_bar}), p_foo) + self.assertRaises(TypeError, operator.gt, Schema({p_foo, p_bar}), Foo()) + # >= + self.assertRaises(TypeError, operator.ge, Schema({p_foo, p_bar}), 'hello world') + self.assertRaises(TypeError, operator.ge, Schema({p_foo, p_bar}), 1234) + self.assertRaises(TypeError, operator.ge, Schema({p_foo, p_bar}), p_foo) + self.assertRaises(TypeError, operator.ge, Schema({p_foo, p_bar}), Foo()) + + # a schema is a subset of itself + self.assertTrue(operator.le(Schema({self.p_tag}), Schema({self.p_tag}))) + # a schema is a superset of itself + self.assertTrue(operator.ge(Schema({self.p_tag}), Schema({self.p_tag}))) + # a schema is not a true subset of itself + self.assertFalse(operator.lt(Schema({self.p_tag}), Schema({self.p_tag}))) + # a schema is not a true superset of itself + self.assertFalse(operator.gt(Schema({self.p_tag}), Schema({self.p_tag}))) + + # subset considers predicates + self.assertTrue(operator.lt(Schema({p_foo}), Schema({p_foo, p_bar}))) + self.assertTrue(operator.lt(Schema({p_foo}), Schema({p_sub}))) + self.assertFalse(operator.lt(Schema({p_foo}), Schema({p_bar}))) + # subset considers nodes + self.assertTrue(operator.lt(Schema({self.p_tag}), Schema({self.p_tag}, {self.n_unused}))) + self.assertFalse(operator.lt(Schema({self.p_tag}, {self.n_unused}), Schema({self.p_tag}))) + # subset considers literals + self.assertTrue(operator.lt(Schema({self.p_tag}), Schema({self.p_tag}, {}, {self.l_unused}))) + self.assertFalse(operator.lt(Schema({self.p_tag}, {}, {self.l_unused}), Schema({self.p_tag}))) + # subset considers differences in predicates and nodes + self.assertTrue(operator.lt(Schema({self.p_tag}), Schema({self.p_group}))) + self.assertTrue(operator.le(Schema({self.p_tag}), Schema({self.p_group}))) + # subset considers differences in predicates and literals + self.assertTrue(operator.lt(Schema.Empty(), Schema({self.p_comment}))) + # subset considers differences in predicates, nodes, and literals + self.assertTrue(operator.lt(Schema({}), Schema.Empty())) + self.assertTrue(operator.lt(Schema({self.p_tag}), Schema.from_string(self.schema_str))) + self.assertTrue(operator.le(Schema({self.p_tag}), Schema.from_string(self.schema_str))) + self.assertFalse(operator.lt(Schema({self.p_comment}), Schema({self.p_tag}))) + self.assertFalse(operator.le(Schema({self.p_comment}), Schema({self.p_tag}))) + + # superset considers predicates + self.assertTrue(operator.gt(Schema({p_foo, p_bar}), Schema({p_foo}))) + self.assertTrue(operator.gt(Schema({p_sub}), Schema({p_foo}))) + self.assertFalse(operator.gt(Schema({p_foo}), Schema({p_bar}))) + # superset considers nodes + self.assertTrue(operator.gt(Schema({self.p_tag}, {self.n_unused}), Schema({self.p_tag}))) + self.assertFalse(operator.gt(Schema({self.p_tag}), Schema({self.p_tag}, {self.n_unused}))) + # superset considers literals + self.assertTrue(operator.gt(Schema({self.p_tag}, {}, {self.l_unused}), Schema({self.p_tag}))) + self.assertFalse(operator.gt(Schema({self.p_tag}), Schema({self.p_tag}, {}, {self.l_unused}))) + # superset considers differences in predicates and nodes + self.assertTrue(operator.gt(Schema({self.p_group}), Schema({self.p_tag}))) + self.assertTrue(operator.ge(Schema({self.p_group}), Schema({self.p_tag}))) + # superset considers differences in predicates and literals + self.assertTrue(operator.gt(Schema({self.p_comment}), Schema.Empty())) + # superset considers differences in predicates, nodes, and literals + self.assertTrue(operator.gt(Schema.Empty(), Schema({}))) + self.assertTrue(operator.gt(Schema.from_string(self.schema_str), Schema({self.p_tag}))) + self.assertTrue(operator.ge(Schema.from_string(self.schema_str), Schema({self.p_tag}))) + self.assertFalse(operator.gt(Schema({self.p_tag}), Schema({self.p_comment}))) + self.assertFalse(operator.ge(Schema({self.p_tag}), Schema({self.p_comment}))) + + # inconsistent schema cannot be a subset + self.assertFalse(operator.le(Schema({p_foo}), Schema({ + self.p_root.get_child(ns.bse.foo, self.n_ent, self.l_integer, True)}))) # inconsistent w.r.t. literal + self.assertFalse(operator.le(Schema({p_foo}), Schema({ + self.p_root.get_child(ns.bse.foo, self.n_img, self.l_string, True)}))) # inconsistent w.r.t. node + self.assertFalse(operator.le(Schema({p_foo}), Schema({ + self.p_root.get_child(ns.bse.foo, self.n_ent, self.l_string, False)}))) # inconsistent w.r.t. unique + self.assertFalse(operator.le(Schema({}, {self.n_img}), Schema({}, { + types.Node(ns.bsfs.Image, types.Node(ns.bsfs.Node, None))}))) + self.assertFalse(operator.le(Schema({}, {}, {self.l_integer}), Schema({}, {}, { + types.Literal(ns.xsd.integer, types.Literal(ns.xsd.number, types.Literal(ns.bsfs.Literal, None)))}))) + # inconsistent schema cannot be a true subset + self.assertFalse(operator.lt(Schema({p_foo}), Schema({ + self.p_root.get_child(ns.bse.foo, self.n_ent, self.l_integer, True)}))) # inconsistent w.r.t. literal + self.assertFalse(operator.lt(Schema({p_foo}), Schema({ + self.p_root.get_child(ns.bse.foo, self.n_img, self.l_string, True)}))) # inconsistent w.r.t. node + self.assertFalse(operator.lt(Schema({p_foo}), Schema({ + self.p_root.get_child(ns.bse.foo, self.n_ent, self.l_string, False)}))) # inconsistent w.r.t. unique + self.assertFalse(operator.lt(Schema({}, {self.n_img}), Schema({}, { + types.Node(ns.bsfs.Image, types.Node(ns.bsfs.Node, None))}))) + self.assertFalse(operator.lt(Schema({}, {}, {self.l_integer}), Schema({}, {}, { + types.Literal(ns.xsd.integer, types.Literal(ns.xsd.number, types.Literal(ns.bsfs.Literal, None)))}))) + # inconsistent schema cannot be a superset + self.assertFalse(operator.ge(Schema({p_foo}), Schema({ + self.p_root.get_child(ns.bse.foo, self.n_ent, self.l_integer, True)}))) # inconsistent w.r.t. literal + self.assertFalse(operator.ge(Schema({p_foo}), Schema({ + self.p_root.get_child(ns.bse.foo, self.n_img, self.l_string, True)}))) # inconsistent w.r.t. node + self.assertFalse(operator.ge(Schema({p_foo}), Schema({ + self.p_root.get_child(ns.bse.foo, self.n_ent, self.l_string, False)}))) # inconsistent w.r.t. unique + self.assertFalse(operator.ge(Schema({}, {self.n_img}), Schema({}, { + types.Node(ns.bsfs.Image, types.Node(ns.bsfs.Node, None))}))) + self.assertFalse(operator.ge(Schema({}, {}, {self.l_integer}), Schema({}, {}, { + types.Literal(ns.xsd.integer, types.Literal(ns.xsd.number, types.Literal(ns.bsfs.Literal, None)))}))) + # inconsistent schema cannot be a true superset + self.assertFalse(operator.gt(Schema({p_foo}), Schema({ + self.p_root.get_child(ns.bse.foo, self.n_ent, self.l_integer, True)}))) # inconsistent w.r.t. literal + self.assertFalse(operator.gt(Schema({p_foo}), Schema({ + self.p_root.get_child(ns.bse.foo, self.n_img, self.l_string, True)}))) # inconsistent w.r.t. node + self.assertFalse(operator.gt(Schema({p_foo}), Schema({ + self.p_root.get_child(ns.bse.foo, self.n_ent, self.l_string, False)}))) # inconsistent w.r.t. unique + self.assertFalse(operator.gt(Schema({}, {self.n_img}), Schema({}, { + types.Node(ns.bsfs.Image, types.Node(ns.bsfs.Node, None))}))) + self.assertFalse(operator.gt(Schema({}, {}, {self.l_integer}), Schema({}, {}, { + types.Literal(ns.xsd.integer, types.Literal(ns.xsd.number, types.Literal(ns.bsfs.Literal, None)))}))) + + + + def test_diff(self): + # difference can be empty + diff = Schema({self.p_tag}).diff(Schema({self.p_group})) + self.assertSetEqual(set(diff.nodes), set()) + self.assertSetEqual(set(diff.literals), set()) + self.assertSetEqual(set(diff.predicates), set()) + + # difference contains predicates from the LHS + diff = Schema({self.p_group}).diff(Schema({self.p_tag})) + self.assertSetEqual(set(diff.nodes), {self.n_img}) + self.assertSetEqual(set(diff.literals), set()) + self.assertSetEqual(set(diff.predicates), {self.p_group}) + + # difference does not contain predicates from the RHS + diff = Schema({self.p_tag, self.p_comment}).diff(Schema({self.p_group})) + self.assertSetEqual(set(diff.nodes), set()) + self.assertSetEqual(set(diff.literals), {self.l_root, self.l_string}) + self.assertSetEqual(set(diff.predicates), {self.p_comment}) + + # difference considers extra nodes and literals + diff = Schema({self.p_tag}, {self.n_unused}, {self.l_unused}).diff(Schema({self.p_tag})) + self.assertSetEqual(set(diff.nodes), {self.n_unused}) + self.assertSetEqual(set(diff.literals), {self.l_root, self.l_unused}) + self.assertSetEqual(set(diff.predicates), set()) + + # difference considers inconsistent types + diff = Schema({self.p_tag}, {self.n_unused}, {self.l_unused}).diff( + Schema({self.p_tag}, {types.Node(ns.bsfs.Unused, None)}, {types.Literal(ns.xsd.boolean, None)})) + self.assertSetEqual(set(diff.nodes), {self.n_unused}) + self.assertSetEqual(set(diff.literals), {self.l_root, self.l_unused}) + self.assertSetEqual(set(diff.predicates), set()) + + # __sub__ is an alias for diff + diff = Schema({self.p_comment}, {self.n_unused}, {self.l_unused}) - Schema({self.p_group}) + self.assertSetEqual(set(diff.nodes), {self.n_unused}) + self.assertSetEqual(set(diff.literals), {self.l_root, self.l_string, self.l_unused}) + self.assertSetEqual(set(diff.predicates), {self.p_comment}) + # __sub__ only accepts Schema instances + class Foo(): pass + self.assertRaises(TypeError, operator.sub, Schema({self.p_comment}, {self.n_unused}, {self.l_unused}), 1234) + self.assertRaises(TypeError, operator.sub, Schema({self.p_comment}, {self.n_unused}, {self.l_unused}), 'hello world') + self.assertRaises(TypeError, operator.sub, Schema({self.p_comment}, {self.n_unused}, {self.l_unused}), Foo()) + + def test_consistent_with(self): + # argument must be a schema + class Foo(): pass + self.assertRaises(TypeError, Schema([]).consistent_with, 1234) + self.assertRaises(TypeError, Schema([]).consistent_with, 'hello world') + self.assertRaises(TypeError, Schema([]).consistent_with, Foo()) + + # node consistency + self.assertTrue(Schema([], {self.n_ent, self.n_tag, self.n_unused}).consistent_with( + Schema(self.predicates))) + self.assertFalse(Schema([], {types.Node(ns.bsfs.Entity, None)}).consistent_with( + Schema(self.predicates))) + # order doesn't matter + self.assertTrue(Schema(self.predicates).consistent_with( + Schema([], {self.n_ent, self.n_tag, self.n_unused}))) + + # literal consistency + self.assertTrue(Schema([], [], {self.l_string, self.l_unused}).consistent_with( + Schema(self.predicates))) + self.assertFalse(Schema([], [], {types.Literal(ns.xsd.string, None)}).consistent_with( + Schema(self.predicates))) + # order doesn't matter + self.assertTrue(Schema(self.predicates).consistent_with( + Schema([], [], {self.l_string, self.l_unused}))) + + # predicate consistency + self.assertTrue(Schema({self.p_tag}).consistent_with( + Schema(self.predicates))) + self.assertFalse(Schema({types.Predicate(ns.bse.tag, None, self.n_root, self.n_root, False)}).consistent_with( + Schema(self.predicates))) + # order doesn't matter + self.assertTrue(Schema(self.predicates).consistent_with( + Schema({self.p_tag}))) + + # global consistency + self.assertFalse(Schema({types.Predicate(ns.bsfs.Entity, None, self.n_root, self.n_root, False)}).consistent_with( + Schema(self.predicates))) + self.assertFalse(Schema([], {types.Node(ns.xsd.string, None)}).consistent_with( + Schema(self.predicates))) + self.assertFalse(Schema([], [], {types.Literal(ns.bsfs.Entity, None)}).consistent_with( + Schema(self.predicates))) + + + def test_union(self): + # must provide at least one schema + self.assertRaises(TypeError, Schema.Union) + + # can pass schemas as list + self.assertEqual(Schema.Union([Schema({self.p_tag})]), Schema({self.p_tag})) + self.assertEqual(Schema.Union([Schema({self.p_tag}), Schema({self.p_comment})]), + Schema({self.p_tag, self.p_comment})) + + # can pass schemas as arguments + self.assertEqual(Schema.Union(Schema({self.p_tag})), Schema({self.p_tag})) + self.assertEqual(Schema.Union(Schema({self.p_tag}), Schema({self.p_comment})), + Schema({self.p_tag, self.p_comment})) + + # cannot mix the two argument passing styles + self.assertRaises(TypeError, Schema.Union, [Schema(self.predicates)], Schema(self.predicates)) + + # all arguments must be Schema instances + self.assertRaises(TypeError, Schema.Union, Schema(self.predicates), 1234) + self.assertRaises(TypeError, Schema.Union, Schema(self.predicates), 1234, Schema(self.predicates)) + self.assertRaises(TypeError, Schema.Union, Schema(self.predicates), 'hello world') + + # Union merges predicates, nodes, and literals + self.assertEqual(Schema.Union( + Schema({self.p_comment}, {self.n_unused}, {}), + Schema({self.p_group}, {self.n_img}, {self.l_unused})), + Schema({self.p_comment, self.p_group}, {self.n_img, self.n_unused}, {self.l_unused})) + + # Union does not accept inconsistent nodes + self.assertRaises(errors.ConsistencyError, Schema.Union, Schema(self.predicates), + Schema({}, {types.Node(ns.bsfs.Entity, None)})) + self.assertRaises(errors.ConsistencyError, Schema.Union, Schema({}, {self.n_ent}), + Schema({}, {types.Node(ns.bsfs.Entity, None)})) + self.assertRaises(errors.ConsistencyError, Schema.Union, Schema({}, {self.n_ent}), + Schema({}, {}, {types.Literal(ns.bsfs.Entity, None)})) + + # Union does not accept inconsistent literals + self.assertRaises(errors.ConsistencyError, Schema.Union, Schema(self.predicates), + Schema({}, {}, {types.Literal(ns.xsd.string, None)})) + self.assertRaises(errors.ConsistencyError, Schema.Union, Schema({}, {}, {self.l_string}), + Schema({}, {}, {types.Literal(ns.xsd.string, None)})) + self.assertRaises(errors.ConsistencyError, Schema.Union, Schema({}, {}, {self.l_string}), + Schema({}, {types.Node(ns.xsd.string, None)})) + + # Union does not accept inconsistent predicates + self.assertRaises(errors.ConsistencyError, Schema.Union, Schema({self.p_tag}), + Schema({types.Predicate(ns.bse.tag, None, self.n_ent, self.n_tag, False)})) + self.assertRaises(errors.ConsistencyError, Schema.Union, Schema({self.p_tag}), + Schema({}, {types.Node(ns.bse.tag, None)})) + + # union is an alias for Union + self.assertEqual(Schema({self.p_comment}, {self.n_unused}, {}).union( + Schema({self.p_group}, {self.n_img}, {self.l_unused})), + Schema({self.p_comment, self.p_group}, {self.n_img, self.n_unused}, {self.l_unused})) + # union only accepts Schema instances + class Foo(): pass + self.assertRaises(TypeError, Schema({self.p_comment}, {self.n_unused}, {}).union, 1234) + self.assertRaises(TypeError, Schema({self.p_comment}, {self.n_unused}, {}).union, 'hello world') + self.assertRaises(TypeError, Schema({self.p_comment}, {self.n_unused}, {}).union, Foo()) + + # __add__ is an alias for Union + self.assertEqual(Schema({self.p_comment}, {self.n_unused}, {}) + Schema({self.p_group}, {self.n_img}, {self.l_unused}), + Schema({self.p_comment, self.p_group}, {self.n_img, self.n_unused}, {self.l_unused})) + # __add__ only accepts Schema instances + class Foo(): pass + self.assertRaises(TypeError, operator.add, Schema({self.p_comment}, {self.n_unused}, {}), 1234) + self.assertRaises(TypeError, operator.add, Schema({self.p_comment}, {self.n_unused}, {}), 'hello world') + self.assertRaises(TypeError, operator.add, Schema({self.p_comment}, {self.n_unused}, {}), Foo()) + + # __or__ is an alias for Union + self.assertEqual(Schema({self.p_comment}, {self.n_unused}, {}) | Schema({self.p_group}, {self.n_img}, {self.l_unused}), + Schema({self.p_comment, self.p_group}, {self.n_img, self.n_unused}, {self.l_unused})) + # __or__ only accepts Schema instances + class Foo(): pass + self.assertRaises(TypeError, operator.or_, Schema({self.p_comment}, {self.n_unused}, {}), 1234) + self.assertRaises(TypeError, operator.or_, Schema({self.p_comment}, {self.n_unused}, {}), 'hello world') + self.assertRaises(TypeError, operator.or_, Schema({self.p_comment}, {self.n_unused}, {}), Foo()) + + def test_type_getters(self): + schema = Schema(self.predicates, self.nodes, self.literals) + # nodes + self.assertEqual(self.n_root, schema.node(ns.bsfs.Node)) + self.assertEqual(self.n_ent, schema.node(ns.bsfs.Entity)) + self.assertEqual(self.n_img, schema.node(ns.bsfs.Image)) + self.assertRaises(KeyError, schema.node, ns.bsfs.Document) + self.assertRaises(KeyError, schema.node, self.n_root) + # literals + self.assertEqual(self.l_root, schema.literal(ns.bsfs.Literal)) + self.assertEqual(self.l_string, schema.literal(ns.xsd.string)) + self.assertEqual(self.l_integer, schema.literal(ns.xsd.integer)) + self.assertRaises(KeyError, schema.literal, ns.xsd.number) + self.assertRaises(KeyError, schema.literal, self.l_root) + # predicates + self.assertEqual(self.p_root, schema.predicate(ns.bsfs.Predicate)) + self.assertEqual(self.p_tag, schema.predicate(ns.bse.tag)) + self.assertEqual(self.p_group, schema.predicate(ns.bse.group)) + self.assertRaises(KeyError, schema.predicate, ns.bse.mimetype) + self.assertRaises(KeyError, schema.predicate, self.p_root) + + def test_list_getters(self): + schema = Schema(self.predicates, self.nodes, self.literals) + self.assertSetEqual(set(self.nodes), set(schema.nodes())) + self.assertSetEqual(set(self.literals), set(schema.literals())) + self.assertSetEqual(set(self.predicates), set(schema.predicates())) + + def test_has(self): + schema = Schema(self.predicates, self.nodes, self.literals) + # nodes + self.assertTrue(schema.has_node(ns.bsfs.Node)) + self.assertTrue(schema.has_node(ns.bsfs.Entity)) + self.assertTrue(schema.has_node(ns.bsfs.Image)) + self.assertFalse(schema.has_node(ns.bsfs.Document)) + self.assertFalse(schema.has_node(self.n_root)) + # literals + self.assertTrue(schema.has_literal(ns.bsfs.Literal)) + self.assertTrue(schema.has_literal(ns.xsd.string)) + self.assertTrue(schema.has_literal(ns.xsd.integer)) + self.assertFalse(schema.has_literal(ns.xsd.number)) + self.assertFalse(schema.has_literal(self.l_root)) + # predicates + self.assertTrue(schema.has_predicate(ns.bsfs.Predicate)) + self.assertTrue(schema.has_predicate(ns.bse.tag)) + self.assertTrue(schema.has_predicate(ns.bse.group)) + self.assertFalse(schema.has_predicate(ns.bse.mimetype)) + self.assertFalse(schema.has_predicate(self.p_root)) + + def test_empty(self): + self.assertEqual(Schema.Empty(), Schema( + [types.Predicate(ns.bsfs.Predicate, None, types.Node(ns.bsfs.Node, None), None, False)], + [types.Node(ns.bsfs.Node, None)], + [types.Literal(ns.bsfs.Literal, None)], + )) + + def test_from_string(self): + # from_string creates a schema + self.assertEqual( + Schema(self.predicates, self.nodes, self.literals), + Schema.from_string(self.schema_str)) + + # schema contains at least the root types + self.assertEqual(Schema.from_string(''), Schema({self.p_root}, {self.n_root}, {self.l_root})) + + # custom example + self.assertEqual( + Schema({types.Predicate(ns.bsfs.Predicate, None, self.n_root, None, False).get_child( + ns.bse.filename, self.n_ent, self.l_string, False)}), + Schema.from_string(''' + prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#> + prefix xsd: <http://www.w3.org/2001/XMLSchema#> + prefix bsfs: <http://bsfs.ai/schema/> + prefix bse: <http://bsfs.ai/schema/Entity#> + + bsfs:Entity rdfs:subClassOf bsfs:Node . + xsd:string rdfs:subClassOf bsfs:Literal . + + bse:filename rdfs:subClassOf bsfs:Predicate ; + rdfs:domain bsfs:Entity ; + rdfs:range xsd:string ; + bsfs:unique "false"^^xsd:boolean . + ''')) + + # all nodes must be defined + self.assertRaises(errors.ConsistencyError, Schema.from_string, ''' + prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#> + prefix xsd: <http://www.w3.org/2001/XMLSchema#> + prefix bsfs: <http://bsfs.ai/schema/> + prefix bse: <http://bsfs.ai/schema/Entity#> + + xsd:string rdfs:subClassOf bsfs:Literal . + + bse:filename rdfs:subClassOf bsfs:Predicate ; + rdfs:domain bsfs:Entity ; + rdfs:range xsd:string ; + bsfs:unique "false"^^xsd:boolean . + ''') + + # all literals must be defined + self.assertRaises(errors.ConsistencyError, Schema.from_string, ''' + prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#> + prefix xsd: <http://www.w3.org/2001/XMLSchema#> + prefix bsfs: <http://bsfs.ai/schema/> + prefix bse: <http://bsfs.ai/schema/Entity#> + + bsfs:Entity rdfs:subClassOf bsfs:Node . + + bse:filename rdfs:subClassOf bsfs:Predicate ; + rdfs:domain bsfs:Entity ; + rdfs:range xsd:string ; + bsfs:unique "false"^^xsd:boolean . + ''') + + # must not have circular dependencies + self.assertRaises(errors.ConsistencyError, Schema.from_string, ''' + prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#> + prefix bsfs: <http://bsfs.ai/schema/> + bsfs:Entity rdfs:subClassOf bsfs:Node . + # ah, a nice circular dependency + bsfs:Entity rdfs:subClassOf bsfs:Document . + bsfs:Document rdfs:subClassOf bsfs:Entity . + bsfs:PDF rdfs:subClassOf bsfs:Document . + ''') + + # range must be a node or literal + self.assertRaises(errors.ConsistencyError, Schema.from_string, ''' + prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#> + prefix xsd: <http://www.w3.org/2001/XMLSchema#> + prefix bsfs: <http://bsfs.ai/schema/> + prefix bse: <http://bsfs.ai/schema/Entity#> + + bsfs:Entity rdfs:subClassOf bsfs:Node . + + bse:filename rdfs:subClassOf bsfs:Predicate ; + rdfs:domain bsfs:Entity ; + rdfs:range xsd:string ; + bsfs:unique "false"^^xsd:boolean . + ''') + self.assertRaises(errors.ConsistencyError, Schema.from_string, ''' + prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#> + prefix xsd: <http://www.w3.org/2001/XMLSchema#> + prefix bsfs: <http://bsfs.ai/schema/> + prefix bse: <http://bsfs.ai/schema/Entity#> + + bsfs:Entity rdfs:subClassOf bsfs:Node . + + bse:filename rdfs:subClassOf bsfs:Predicate ; + rdfs:domain bsfs:Entity ; + rdfs:range bsfs:Foo ; + bsfs:unique "false"^^xsd:boolean . + ''') + self.assertRaises(errors.ConsistencyError, Schema.from_string, ''' + prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#> + prefix xsd: <http://www.w3.org/2001/XMLSchema#> + prefix bsfs: <http://bsfs.ai/schema/> + prefix bse: <http://bsfs.ai/schema/Entity#> + + bsfs:Entity rdfs:subClassOf bsfs:Node . + + bse:filename rdfs:subClassOf bsfs:Predicate ; + rdfs:domain bsfs:Entity ; + rdfs:range bsfs:Predicate ; + bsfs:unique "false"^^xsd:boolean . + ''') + + # must be consistent + self.assertRaises(errors.ConsistencyError, Schema.from_string, ''' + prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#> + prefix xsd: <http://www.w3.org/2001/XMLSchema#> + prefix bsfs: <http://bsfs.ai/schema/> + + bsfs:Entity rdfs:subClassOf bsfs:Node . + bsfs:Document rdfs:subClassOf bsfs:Node . + bsfs:Document rdfs:subClassOf bsfs:Entity. + ''') + self.assertRaises(errors.ConsistencyError, Schema.from_string, ''' + prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#> + prefix xsd: <http://www.w3.org/2001/XMLSchema#> + prefix bsfs: <http://bsfs.ai/schema/> + + xsd:string rdfs:subClassOf bsfs:Literal . + xsd:name rdfs:subClassOf bsfs:Literal . + xsd:name rdfs:subClassOf xsd:string . + ''') + self.assertRaises(errors.ConsistencyError, Schema.from_string, ''' + prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#> + prefix xsd: <http://www.w3.org/2001/XMLSchema#> + prefix bsfs: <http://bsfs.ai/schema/> + prefix bse: <http://bsfs.ai/schema/Entity#> + + bsfs:Entity rdfs:subClassOf bsfs:Node . + + bse:foo rdfs:subClassOf bsfs:Predicate ; + rdfs:domain bsfs:Node ; + rdfs:range bsfs:Node ; + bsfs:unique "false"^^xsd:boolean . + + bse:foo rdfs:subClassOf bsfs:Predicate ; + rdfs:domain bsfs:Entity . + + ''') + self.assertRaises(errors.ConsistencyError, Schema.from_string, ''' + prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#> + prefix xsd: <http://www.w3.org/2001/XMLSchema#> + prefix bsfs: <http://bsfs.ai/schema/> + prefix bse: <http://bsfs.ai/schema/Entity#> + + bsfs:Entity rdfs:subClassOf bsfs:Node . + + bse:foo rdfs:subClassOf bsfs:Predicate ; + rdfs:domain bsfs:Node ; + rdfs:range bsfs:Node ; + bsfs:unique "false"^^xsd:boolean . + + bse:foo rdfs:subClassOf bsfs:Predicate ; + rdfs:range bsfs:Entity . + + ''') + self.assertRaises(errors.ConsistencyError, Schema.from_string, ''' + prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#> + prefix xsd: <http://www.w3.org/2001/XMLSchema#> + prefix bsfs: <http://bsfs.ai/schema/> + prefix bse: <http://bsfs.ai/schema/Entity#> + + bsfs:Entity rdfs:subClassOf bsfs:Node . + + bse:foo rdfs:subClassOf bsfs:Predicate ; + rdfs:domain bsfs:Node ; + rdfs:range bsfs:Node ; + bsfs:unique "false"^^xsd:boolean . + + bse:foo rdfs:subClassOf bsfs:Predicate ; + bsfs:unique "true"^^xsd:boolean . + + ''') + + + +## main ## + +if __name__ == '__main__': + unittest.main() + +## EOF ## diff --git a/test/schema/test_types.py b/test/schema/test_types.py new file mode 100644 index 0000000..4a49e6e --- /dev/null +++ b/test/schema/test_types.py @@ -0,0 +1,225 @@ +""" + +Part of the tagit test suite. +A copy of the license is provided with the project. +Author: Matthias Baumgartner, 2022 +""" +# imports +import operator +import unittest + +# bsfs imports +from bsfs.namespace import ns +from bsfs.utils import errors + +# objects to test +from bsfs.schema.types import _Type, _Vertex, Node, Literal, Predicate + + +## code ## + +class TestType(unittest.TestCase): + def test_parents(self): + # create some types + fst = _Type('First') + snd = _Type('Second', fst) + trd = _Type('Third', snd) + frd = _Type('Fourth', trd) + # check parents + self.assertListEqual(list(fst.parents()), []) + self.assertListEqual(list(snd.parents()), [fst]) + self.assertListEqual(list(trd.parents()), [snd, fst]) + self.assertListEqual(list(frd.parents()), [trd, snd, fst]) + + def test_essentials(self): + # type w/o parent + self.assertEqual(str(_Type('Foo')), '_Type(Foo)') + self.assertEqual(repr(_Type('Foo')), '_Type(Foo, None)') + # type w/ parent + self.assertEqual(str(_Type('Foo', _Type('Bar'))), '_Type(Foo)') + self.assertEqual(repr(_Type('Foo', _Type('Bar'))), '_Type(Foo, _Type(Bar, None))') + # subtype w/o parent + class SubType(_Type): pass + self.assertEqual(str(SubType('Foo')), 'SubType(Foo)') + self.assertEqual(repr(SubType('Foo')), 'SubType(Foo, None)') + # subtype w/ parent + self.assertEqual(str(SubType('Foo', SubType('Bar'))), 'SubType(Foo)') + self.assertEqual(repr(SubType('Foo', SubType('Bar'))), 'SubType(Foo, SubType(Bar, None))') + # subtype and type mixed + self.assertEqual(str(SubType('Foo', _Type('Bar'))), 'SubType(Foo)') + self.assertEqual(repr(SubType('Foo', _Type('Bar'))), 'SubType(Foo, _Type(Bar, None))') + self.assertEqual(str(_Type('Foo', SubType('Bar'))), '_Type(Foo)') + self.assertEqual(repr(_Type('Foo', SubType('Bar'))), '_Type(Foo, SubType(Bar, None))') + + def test_get_child(self): + # callee is used as parent + self.assertEqual(_Type('First').get_child('Second'), _Type('Second', _Type('First'))) + # works with multiple parents + self.assertEqual(_Type('First').get_child('Second').get_child('Third'), _Type('Third', _Type('Second', _Type('First')))) + # type persists + class Foo(_Type): pass + self.assertEqual(Foo('First').get_child('Second'), Foo('Second', Foo('First'))) + + def test_equality(self): + # equality depends on uri + self.assertEqual(_Type('Foo'), _Type('Foo')) + self.assertEqual(hash(_Type('Foo')), hash(_Type('Foo'))) + self.assertNotEqual(_Type('Foo'), _Type('Bar')) + self.assertNotEqual(hash(_Type('Foo')), hash(_Type('Bar'))) + # comparison is case-sensitive + self.assertNotEqual(_Type('FOO'), _Type('foo')) + self.assertNotEqual(hash(_Type('FOO')), hash(_Type('foo'))) + # comparison respects type + class Foo(_Type): pass + self.assertNotEqual(_Type('Foo'), Foo('Foo')) + self.assertNotEqual(hash(_Type('Foo')), hash(Foo('Foo'))) + # comparison respects parent + self.assertNotEqual(_Type('Foo', _Type('Bar')), _Type('Foo')) + self.assertNotEqual(hash(_Type('Foo', _Type('Bar'))), hash(_Type('Foo'))) + + def test_order(self): + # create some types. + vehicle = _Type('Vehicle') + twowheel = _Type('Two-wheel', vehicle) + bike = _Type('Bike', twowheel) + bicycle = _Type('Bicycle', twowheel) + # two-wheel is equivalent to itself + self.assertFalse(twowheel == vehicle) + self.assertTrue(twowheel == twowheel) + self.assertFalse(twowheel == bicycle) + # two-wheel is a true subclass of Vehicle + self.assertTrue(twowheel < vehicle) + self.assertFalse(twowheel < twowheel) + self.assertFalse(twowheel < bicycle) + # two-wheel is a subclass of itself and Vehicle + self.assertTrue(twowheel <= vehicle) + self.assertTrue(twowheel <= twowheel) + self.assertFalse(twowheel <= bicycle) + # two-wheel is a true superclass of Bicycle + self.assertFalse(twowheel > vehicle) + self.assertFalse(twowheel > twowheel) + self.assertTrue(twowheel > bicycle) + # two-wheel is a superclass of itself and Bicycle + self.assertFalse(twowheel >= vehicle) + self.assertTrue(twowheel >= twowheel) + self.assertTrue(twowheel >= bicycle) + # analoguous to sets, this is not a total order + self.assertFalse(bike <= bicycle) + self.assertFalse(bike < bicycle) + self.assertFalse(bike > bicycle) + self.assertFalse(bike >= bicycle) + self.assertFalse(bike == bicycle) + class Foo(_Type): pass + foo = Foo(bike.uri, bike.parent) + # cannot compare different types + self.assertRaises(TypeError, operator.lt, foo, bike) + self.assertRaises(TypeError, operator.le, foo, bike) + self.assertRaises(TypeError, operator.gt, foo, bike) + self.assertRaises(TypeError, operator.ge, foo, bike) + # goes both ways + self.assertRaises(TypeError, operator.lt, bike, foo) + self.assertRaises(TypeError, operator.le, bike, foo) + self.assertRaises(TypeError, operator.gt, bike, foo) + self.assertRaises(TypeError, operator.ge, bike, foo) + +class TestPredicate(unittest.TestCase): + def test_construction(self): + # domain must be a node + self.assertRaises(TypeError, Predicate, ns.bse.foo, 1234, None, True) + self.assertRaises(TypeError, Predicate, ns.bse.foo, None, Literal(ns.bsfs.Foo, None), None, True) + # range must be None, a Literal, or a Node + self.assertRaises(TypeError, Predicate, ns.bse.foo, None, Node(ns.bsfs.Node, None), 1234, True) + self.assertRaises(TypeError, Predicate, ns.bse.foo, None, Node(ns.bsfs.Node, None), _Vertex(ns.bsfs.Foo, None), True) + self.assertRaises(TypeError, Predicate, ns.bse.foo, None, Node(ns.bsfs.Node, None), _Type(ns.bsfs.Foo, None), True) + class Foo(): pass + self.assertRaises(TypeError, Predicate, ns.bse.foo, None, Node(ns.bsfs.Node, None), Foo(), True) + + def test_equality(self): + n_root = Node(ns.bsfs.Node, None) + n_ent = Node(ns.bsfs.Entity, Node(ns.bsfs.Node, None)) + n_tag = Node(ns.bsfs.Tag, Node(ns.bsfs.Tag, None)) + root = Predicate( + uri=ns.bsfs.Predicate, + parent=None, + domain=n_root, + range=None, + unique=False, + ) + # instance is equal to itself + self.assertEqual(root, root) + self.assertEqual(hash(root), hash(root)) + # instance is equal to a clone + self.assertEqual(root, Predicate(ns.bsfs.Predicate, None, n_root, None, False)) + self.assertEqual(hash(root), hash(Predicate(ns.bsfs.Predicate, None, n_root, None, False))) + # equality respects uri + self.assertNotEqual(root, Predicate(ns.bsfs.Alternative, None, n_root, None, False)) + self.assertNotEqual(hash(root), hash(Predicate(ns.bsfs.Alternative, None, n_root, None, False))) + # equality respects parent + self.assertNotEqual(root, Predicate(ns.bsfs.Predicate, n_root, n_root, None, False)) + self.assertNotEqual(hash(root), hash(Predicate(ns.bsfs.Predicate, n_root, n_root, None, False))) + # equality respects domain + self.assertNotEqual(root, Predicate(ns.bsfs.Predicate, None, n_ent, None, False)) + self.assertNotEqual(hash(root), hash(Predicate(ns.bsfs.Predicate, None, n_ent, None, False))) + # equality respects range + self.assertNotEqual(root, Predicate(ns.bsfs.Predicate, None, n_root, n_root, False)) + self.assertNotEqual(hash(root), hash(Predicate(ns.bsfs.Predicate, None, n_root, n_root, False))) + # equality respects unique + self.assertNotEqual(root, Predicate(ns.bsfs.Predicate, None, n_root, None, True)) + self.assertNotEqual(hash(root), hash(Predicate(ns.bsfs.Predicate, None, n_root, None, True))) + + def test_get_child(self): + n_root = Node(ns.bsfs.Node, None) + n_ent = Node(ns.bsfs.Entity, Node(ns.bsfs.Node, None)) + n_tag = Node(ns.bsfs.Tag, Node(ns.bsfs.Tag, None)) + root = Predicate( + uri=ns.bsfs.Predicate, + parent=None, + domain=n_root, + range=None, + unique=False, + ) + tag = Predicate( + uri=ns.bsfs.Entity, + parent=root, + domain=n_ent, + range=n_tag, + unique=False, + ) + + # uri is respected + self.assertEqual(ns.bse.foo, tag.get_child(ns.bse.foo).uri) + # domain is respected + dom = Node(ns.bsfs.Image, n_ent) + self.assertEqual(dom, tag.get_child(ns.bse.foo, domain=dom).domain) + # range is respected + rng = Node(ns.bsfs.Group, n_tag) + self.assertEqual(rng, tag.get_child(ns.bse.foo, range=rng).range) + # cannot set range to None + self.assertEqual(n_tag, tag.get_child(ns.bse.foo, range=None).range) + # unique is respected + self.assertTrue(tag.get_child(ns.bse.foo, unique=True).unique) + + # domain is inherited from parent + self.assertEqual(n_ent, tag.get_child(ns.bse.foo).domain) + # range is inherited from parent + self.assertEqual(n_tag, tag.get_child(ns.bse.foo).range) + # uniqueness is inherited from parent + self.assertFalse(tag.get_child(ns.bse.foo).unique) + + # domain must be subtype of parent's domain + self.assertRaises(errors.ConsistencyError, tag.get_child, ns.bse.foo, domain=n_root) + self.assertRaises(errors.ConsistencyError, tag.get_child, ns.bse.foo, domain=Node(ns.bsfs.Image, n_root)) + # range cannot be None + self.assertRaises(ValueError, root.get_child, ns.bse.foo) + # range must be subtype of parent's range + self.assertRaises(errors.ConsistencyError, tag.get_child, ns.bse.foo, range=n_root) + self.assertRaises(errors.ConsistencyError, tag.get_child, ns.bse.foo, range=Node(ns.bsfs.Image, n_root)) + + +## main ## + +if __name__ == '__main__': + unittest.main() + +## EOF ## + diff --git a/test/triple_store/__init__.py b/test/triple_store/__init__.py new file mode 100644 index 0000000..e69de29 --- /dev/null +++ b/test/triple_store/__init__.py diff --git a/test/triple_store/test_base.py b/test/triple_store/test_base.py new file mode 100644 index 0000000..a4b0559 --- /dev/null +++ b/test/triple_store/test_base.py @@ -0,0 +1,150 @@ +""" + +Part of the bsfs test suite. +A copy of the license is provided with the project. +Author: Matthias Baumgartner, 2022 +""" +# imports +import unittest + +# bsie imports +from bsfs.utils import URI + +# objects to test +from bsfs.triple_store.base import TripleStoreBase + + +## code ## + +class DummyBase(TripleStoreBase): + @classmethod + def Open(cls, uri, **kwargs): + return cls(uri) + + def commit(self): + pass + + def rollback(self): + pass + + @property + def schema(self): + pass + + @schema.setter + def schema(self, schema): + pass + + def exists(self, node_type, guids): + pass + + def create(self, node_type, guids): + pass + + def set(self, node_type, guids, predicate, values): + pass + +class DummyStore(DummyBase): + pass + +class DummyAlternative(DummyBase): + pass + + +class TestTripleStoreBase(unittest.TestCase): + + def test_equality(self): + # identical instances are equal + store = DummyStore.Open(None) + self.assertEqual(store, store) + self.assertEqual(hash(store), hash(store)) + store = DummyStore.Open(URI('http://example.com/store')) + self.assertEqual(store, store) + self.assertEqual(hash(store), hash(store)) + # in-memory storages are not equal + # NOTE: Don't use + # >>> self.assertNotEqual(hash(DummyStore(None)), hash(DummyStore(None))) + # The two stores are created subsequently since each of them is deleted + # right after hashing. Because the two instances never exist at the same + # time, their id may (and typically will) be identical. + # This only matters when the `id` function is used, i.e. when uri=None. + a, b = DummyStore.Open(None), DummyStore.Open(None) + self.assertNotEqual(a, b) + self.assertNotEqual(hash(a), hash(b)) + a, b = DummyStore.Open(None), DummyStore.Open(URI('http://example.com/store')) + self.assertNotEqual(a, b) + self.assertNotEqual(hash(a), hash(b)) + a, b = DummyStore.Open(URI('http://example.com/store')), DummyStore.Open(None) + self.assertNotEqual(a, b) + self.assertNotEqual(hash(a), hash(b)) + a, b = DummyStore.Open(None), DummyStore.Open(URI('http://example.com/alternative')) + self.assertNotEqual(a, b) + self.assertNotEqual(hash(a), hash(b)) + # equality respects uri + self.assertEqual( + DummyStore.Open(URI('http://example.com/store')), + DummyStore.Open(URI('http://example.com/store'))) + self.assertEqual( + hash(DummyStore.Open(URI('http://example.com/alternative'))), + hash(DummyStore.Open(URI('http://example.com/alternative')))) + self.assertNotEqual( + DummyStore.Open(URI('http://example.com/store')), + DummyStore.Open(URI('http://example.com/alternative'))) + self.assertNotEqual( + DummyStore.Open(URI('http://example.com/store')), + hash(DummyStore.Open(URI('http://example.com/alternative')))) + # equality respects type + self.assertNotEqual(DummyStore.Open(None), None) + self.assertNotEqual(hash(DummyStore.Open(None)), hash(None)) + self.assertNotEqual(DummyStore.Open(None), 'hello world') + self.assertNotEqual(hash(DummyStore.Open(None)), hash('hello world')) + self.assertNotEqual(DummyStore.Open(None), 1234) + self.assertNotEqual(hash(DummyStore.Open(None)), hash(1234)) + class Foo(): pass + f = Foo() + self.assertNotEqual(DummyStore.Open(None), f) + self.assertNotEqual(hash(DummyStore.Open(None)), hash(f)) + self.assertNotEqual( + DummyStore.Open(None), + DummyAlternative.Open(None)) + self.assertNotEqual( + hash(DummyStore.Open(None)), + hash(DummyAlternative.Open(None))) + + def test_string_conversion(self): + # string conversion respects uri + self.assertEqual('DummyStore(uri=http://example.com/store)', + str(DummyStore.Open(URI('http://example.com/store')))) + self.assertEqual('DummyStore(uri=http://example.com/store)', + repr(DummyStore.Open(URI('http://example.com/store')))) + self.assertEqual('DummyStore(uri=http://example.com/alternative)', + str(DummyStore.Open(URI('http://example.com/alternative')))) + self.assertEqual('DummyStore(uri=http://example.com/alternative)', + repr(DummyStore.Open(URI('http://example.com/alternative')))) + self.assertEqual('DummyStore(uri=None)', + str(DummyStore.Open(None))) + self.assertEqual('DummyStore(uri=None)', + repr(DummyStore.Open(None))) + # string conversion respects type + self.assertEqual('DummyAlternative(uri=http://example.com/store)', + str(DummyAlternative.Open(URI('http://example.com/store')))) + + def test_uri(self): + # uri returns correct value + self.assertEqual(None, + DummyStore.Open(None).uri) + self.assertEqual(URI('http://example.com/store'), + DummyStore.Open(URI('http://example.com/store')).uri) + self.assertEqual(URI('http://example.com/alternative'), + DummyStore.Open(URI('http://example.com/alternative')).uri) + # persistence respects uri + self.assertFalse(DummyStore.Open(None).is_persistent()) + self.assertTrue(DummyStore.Open(URI('http://example.com/store')).is_persistent()) + self.assertTrue(DummyStore.Open(URI('http://example.com/alternative')).is_persistent()) + +## main ## + +if __name__ == '__main__': + unittest.main() + +## EOF ## diff --git a/test/triple_store/test_sparql.py b/test/triple_store/test_sparql.py new file mode 100644 index 0000000..8d98749 --- /dev/null +++ b/test/triple_store/test_sparql.py @@ -0,0 +1,769 @@ +""" + +Part of the bsfs test suite. +A copy of the license is provided with the project. +Author: Matthias Baumgartner, 2022 +""" +# imports +import rdflib +import unittest + +# bsie imports +from bsfs import schema as _schema +from bsfs.namespace import ns +from bsfs.utils import errors, URI + +# objects to test +from bsfs.triple_store.sparql import SparqlStore + + +## code ## + +class TestSparqlStore(unittest.TestCase): + def setUp(self): + self.schema = _schema.Schema.from_string(''' + prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#> + prefix xsd: <http://www.w3.org/2001/XMLSchema#> + + prefix bsfs: <http://bsfs.ai/schema/> + prefix bse: <http://bsfs.ai/schema/Entity#> + + bsfs:Entity rdfs:subClassOf bsfs:Node . + bsfs:Tag rdfs:subClassOf bsfs:Node . + bsfs:User rdfs:subClassOf bsfs:Node . + xsd:string rdfs:subClassOf bsfs:Literal . + xsd:integer rdfs:subClassOf bsfs:Literal . + + # non-unique literal + bse:comment rdfs:subClassOf bsfs:Predicate ; + rdfs:domain bsfs:Entity ; + rdfs:range xsd:string ; + bsfs:unique "false"^^xsd:boolean . + + # unique literal + bse:filesize rdfs:subClassOf bsfs:Predicate ; + rdfs:domain bsfs:Entity ; + rdfs:range xsd:integer ; + bsfs:unique "true"^^xsd:boolean . + + # non-unique node + bse:tag rdfs:subClassOf bsfs:Predicate ; + rdfs:domain bsfs:Entity ; + rdfs:range bsfs:Tag ; + bsfs:unique "false"^^xsd:boolean . + + # unique node + bse:author rdfs:subClassOf bsfs:Predicate ; + rdfs:domain bsfs:Entity ; + rdfs:range bsfs:User ; + bsfs:unique "true"^^xsd:boolean . + + ''') + + def test_essentials(self): + store = SparqlStore.Open() + # equality + self.assertEqual(store, store) + self.assertEqual(hash(store), hash(store)) + self.assertNotEqual(store, SparqlStore.Open()) + self.assertNotEqual(hash(store), hash(SparqlStore.Open())) + # string conversion + self.assertEqual(str(store), 'SparqlStore(uri=None)') + self.assertEqual(repr(store), 'SparqlStore(uri=None)') + # open + self.assertIsInstance(SparqlStore.Open(), SparqlStore) + + + def test__has_type(self): + # setup store + store = SparqlStore.Open() + store.schema = _schema.Schema.from_string(''' + prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#> + prefix xsd: <http://www.w3.org/2001/XMLSchema#> + prefix bsfs: <http://bsfs.ai/schema/> + + bsfs:Entity rdfs:subClassOf bsfs:Node . + bsfs:Document rdfs:subClassOf bsfs:Entity . + bsfs:Image rdfs:subClassOf bsfs:Entity . + bsfs:PDF rdfs:subClassOf bsfs:Document . + + ''') + # add some instances + store.create(store.schema.node(ns.bsfs.Entity), {URI('http://example.com/me/entity#1234')}) + store.create(store.schema.node(ns.bsfs.Document), {URI('http://example.com/me/document#1234')}) + store.create(store.schema.node(ns.bsfs.Image), {URI('http://example.com/me/image#1234')}) + store.create(store.schema.node(ns.bsfs.PDF), {URI('http://example.com/me/pdf#1234')}) + + # node_type must be in the schema + self.assertRaises(errors.ConsistencyError, store._has_type, URI('http://example.com/me/entity#1234'), store.schema.node(ns.bsfs.Node).get_child(ns.bsfs.invalid)) + + # returns False on inexistent nodes + self.assertFalse(store._has_type(URI('http://example.com/me/entity#4321'), store.schema.node(ns.bsfs.Entity))) + self.assertFalse(store._has_type(URI('http://example.com/me/document#4321'), store.schema.node(ns.bsfs.Document))) + self.assertFalse(store._has_type(URI('http://example.com/me/image#4321'), store.schema.node(ns.bsfs.Image))) + self.assertFalse(store._has_type(URI('http://example.com/me/pdf#4321'), store.schema.node(ns.bsfs.PDF))) + + # _has_type checks direct types + self.assertTrue(store._has_type(URI('http://example.com/me/entity#1234'), store.schema.node(ns.bsfs.Entity))) + self.assertTrue(store._has_type(URI('http://example.com/me/document#1234'), store.schema.node(ns.bsfs.Document))) + self.assertTrue(store._has_type(URI('http://example.com/me/image#1234'), store.schema.node(ns.bsfs.Image))) + self.assertTrue(store._has_type(URI('http://example.com/me/pdf#1234'), store.schema.node(ns.bsfs.PDF))) + + # _has_type checks type hierarchy + self.assertFalse(store._has_type(URI('http://example.com/me/entity#1234'), store.schema.node(ns.bsfs.Document))) + self.assertFalse(store._has_type(URI('http://example.com/me/entity#1234'), store.schema.node(ns.bsfs.Image))) + self.assertFalse(store._has_type(URI('http://example.com/me/entity#1234'), store.schema.node(ns.bsfs.PDF))) + + self.assertTrue(store._has_type(URI('http://example.com/me/document#1234'), store.schema.node(ns.bsfs.Entity))) + self.assertFalse(store._has_type(URI('http://example.com/me/document#1234'), store.schema.node(ns.bsfs.Image))) + self.assertFalse(store._has_type(URI('http://example.com/me/document#1234'), store.schema.node(ns.bsfs.PDF))) + + self.assertTrue(store._has_type(URI('http://example.com/me/image#1234'), store.schema.node(ns.bsfs.Entity))) + self.assertFalse(store._has_type(URI('http://example.com/me/image#1234'), store.schema.node(ns.bsfs.Document))) + self.assertFalse(store._has_type(URI('http://example.com/me/image#1234'), store.schema.node(ns.bsfs.PDF))) + + self.assertTrue(store._has_type(URI('http://example.com/me/pdf#1234'), store.schema.node(ns.bsfs.Entity))) + self.assertTrue(store._has_type(URI('http://example.com/me/pdf#1234'), store.schema.node(ns.bsfs.Document))) + self.assertFalse(store._has_type(URI('http://example.com/me/pdf#1234'), store.schema.node(ns.bsfs.Image))) + + + def test_schema(self): + # setup + store = SparqlStore.Open() + curr = self.schema + p_comment = curr.predicate(ns.bse.comment) + p_filesize = curr.predicate(ns.bse.filesize) + p_tag = curr.predicate(ns.bse.tag) + p_author = curr.predicate(ns.bse.author) + + # migrate to an initial schema + store.schema = curr + # store has migrated + self.assertEqual(store.schema, curr) + + # add some instances + ent_ids = {URI('http://example.com/me/entity#1234'), URI('http://example.com/me/entity#4321')} + tag_ids = {URI('http://example.com/me/tag#1234'), URI('http://example.com/me/tag#4321')} + store.create(curr.node(ns.bsfs.Entity), ent_ids) + store.create(curr.node(ns.bsfs.Tag), tag_ids) + store.create(curr.node(ns.bsfs.User), {URI('http://example.com/me')}) + # add some triples + store.set(curr.node(ns.bsfs.Entity), ent_ids, p_comment, {'foo', 'bar'}) + store.set(curr.node(ns.bsfs.Entity), ent_ids, p_filesize, {1234}) + store.set(curr.node(ns.bsfs.Entity), ent_ids, p_tag, + {URI('http://example.com/me/tag#1234'), URI('http://example.com/me/tag#4321')}) + store.set(curr.node(ns.bsfs.Entity), ent_ids, p_author, + {URI('http://example.com/me')}) + # check instances + instances = { + # node instances + (rdflib.URIRef('http://example.com/me/entity#1234'), rdflib.RDF.type, rdflib.URIRef(ns.bsfs.Entity)), + (rdflib.URIRef('http://example.com/me/entity#4321'), rdflib.RDF.type, rdflib.URIRef(ns.bsfs.Entity)), + (rdflib.URIRef('http://example.com/me/tag#1234'), rdflib.RDF.type, rdflib.URIRef(ns.bsfs.Tag)), + (rdflib.URIRef('http://example.com/me/tag#4321'), rdflib.RDF.type, rdflib.URIRef(ns.bsfs.Tag)), + (rdflib.URIRef('http://example.com/me'), rdflib.RDF.type, rdflib.URIRef(ns.bsfs.User)), + # comments + (rdflib.URIRef('http://example.com/me/entity#1234'), rdflib.URIRef(p_comment.uri), rdflib.Literal('foo', datatype=rdflib.XSD.string)), + (rdflib.URIRef('http://example.com/me/entity#1234'), rdflib.URIRef(p_comment.uri), rdflib.Literal('bar', datatype=rdflib.XSD.string)), + (rdflib.URIRef('http://example.com/me/entity#4321'), rdflib.URIRef(p_comment.uri), rdflib.Literal('foo', datatype=rdflib.XSD.string)), + (rdflib.URIRef('http://example.com/me/entity#4321'), rdflib.URIRef(p_comment.uri), rdflib.Literal('bar', datatype=rdflib.XSD.string)), + # filesize + (rdflib.URIRef('http://example.com/me/entity#1234'), rdflib.URIRef(p_filesize.uri), rdflib.Literal('1234', datatype=rdflib.XSD.integer)), + (rdflib.URIRef('http://example.com/me/entity#4321'), rdflib.URIRef(p_filesize.uri), rdflib.Literal('1234', datatype=rdflib.XSD.integer)), + # tags + (rdflib.URIRef('http://example.com/me/entity#1234'), rdflib.URIRef(p_tag.uri), rdflib.URIRef('http://example.com/me/tag#1234')), + (rdflib.URIRef('http://example.com/me/entity#1234'), rdflib.URIRef(p_tag.uri), rdflib.URIRef('http://example.com/me/tag#4321')), + (rdflib.URIRef('http://example.com/me/entity#4321'), rdflib.URIRef(p_tag.uri), rdflib.URIRef('http://example.com/me/tag#1234')), + (rdflib.URIRef('http://example.com/me/entity#4321'), rdflib.URIRef(p_tag.uri), rdflib.URIRef('http://example.com/me/tag#4321')), + # author + (rdflib.URIRef('http://example.com/me/entity#1234'), rdflib.URIRef(p_author.uri), rdflib.URIRef('http://example.com/me')), + (rdflib.URIRef('http://example.com/me/entity#4321'), rdflib.URIRef(p_author.uri), rdflib.URIRef('http://example.com/me')), + } + self.assertSetEqual(set(store._graph), instances) + + # add some classes to the schema + curr = curr + _schema.Schema.from_string(''' + prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#> + prefix xsd: <http://www.w3.org/2001/XMLSchema#> + prefix bsfs: <http://bsfs.ai/schema/> + prefix bse: <http://bsfs.ai/schema/Entity#> + prefix bst: <http://bsfs.ai/schema/Tag#> + prefix bsc: <http://bsfs.ai/schema/Collection#> + + bsfs:Entity rdfs:subClassOf bsfs:Node . + bsfs:Tag rdfs:subClassOf bsfs:Node . + bsfs:Collection rdfs:subClassOf bsfs:Node . + xsd:boolean rdfs:subClassOf bsfs:Literal . + + # literal + bse:shared rdfs:subClassOf bsfs:Predicate ; + rdfs:domain bsfs:Entity ; + rdfs:range xsd:boolean ; + bsfs:unique "true"^^xsd:boolean . + + # node + bse:partOf rdfs:subClassOf bsfs:Predicate ; + rdfs:domain bsfs:Entity ; + rdfs:range bsfs:Collection ; + bsfs:unique "false"^^xsd:boolean . + + # predicates across auxiliary node classes + bst:usedIn rdfs:subClassOf bsfs:Predicate ; + rdfs:domain bsfs:Tag ; + rdfs:range bsfs:Collection ; + bsfs:unique "false"^^xsd:boolean . + + bsc:tag rdfs:subClassOf bsfs:Predicate ; + rdfs:domain bsfs:Collection ; + rdfs:range bsfs:Tag ; + bsfs:unique "false"^^xsd:boolean . + + bst:principal rdfs:subClassOf bsfs:Predicate ; + rdfs:domain bsfs:Tag ; + rdfs:range bsfs:Node ; + bsfs:unique "true"^^xsd:boolean . + + ''') + # store migrated to the new schema + store.schema = curr + self.assertEqual(store.schema, curr) + # instances have not changed + self.assertSetEqual(set(store._graph), instances) + # add some instances of the new classes + p_partOf = curr.predicate(ns.bse.partOf) + p_shared = curr.predicate(ns.bse.shared) + p_usedIn = curr.predicate('http://bsfs.ai/schema/Tag#usedIn') + p_ctag = curr.predicate('http://bsfs.ai/schema/Collection#tag') + p_principal = curr.predicate('http://bsfs.ai/schema/Tag#principal') + store.create(curr.node(ns.bsfs.Collection), {URI('http://example.com/me/collection#1234'), URI('http://example.com/me/collection#4321')}) + # add some more triples + store.set(curr.node(ns.bsfs.Entity), ent_ids, p_shared, {True}) + store.set(curr.node(ns.bsfs.Entity), ent_ids, p_partOf, + {URI('http://example.com/me/collection#1234'), URI('http://example.com/me/collection#4321')}) + store.set(curr.node(ns.bsfs.Tag), {URI('http://example.com/me/tag#1234')}, p_usedIn, + {URI('http://example.com/me/collection#1234')}) + store.set(curr.node(ns.bsfs.Collection), {URI('http://example.com/me/collection#4321')}, p_ctag, + {URI('http://example.com/me/tag#1234'), URI('http://example.com/me/tag#4321')}) + store.set(curr.node(ns.bsfs.Tag), {URI('http://example.com/me/tag#1234')}, p_principal, + {URI('http://example.com/me/collection#1234')}) + # new instances are now in the graph + self.assertSetEqual(set(store._graph), instances | { + # collections + (rdflib.URIRef('http://example.com/me/collection#1234'), rdflib.RDF.type, rdflib.URIRef(ns.bsfs.Collection)), + (rdflib.URIRef('http://example.com/me/collection#4321'), rdflib.RDF.type, rdflib.URIRef(ns.bsfs.Collection)), + # partOf + (rdflib.URIRef('http://example.com/me/entity#1234'), rdflib.URIRef(p_partOf.uri), rdflib.URIRef('http://example.com/me/collection#1234')), + (rdflib.URIRef('http://example.com/me/entity#1234'), rdflib.URIRef(p_partOf.uri), rdflib.URIRef('http://example.com/me/collection#4321')), + (rdflib.URIRef('http://example.com/me/entity#4321'), rdflib.URIRef(p_partOf.uri), rdflib.URIRef('http://example.com/me/collection#1234')), + (rdflib.URIRef('http://example.com/me/entity#4321'), rdflib.URIRef(p_partOf.uri), rdflib.URIRef('http://example.com/me/collection#4321')), + # shared + (rdflib.URIRef('http://example.com/me/entity#1234'), rdflib.URIRef(p_shared.uri), rdflib.Literal('true', datatype=rdflib.XSD.boolean)), + (rdflib.URIRef('http://example.com/me/entity#4321'), rdflib.URIRef(p_shared.uri), rdflib.Literal('true', datatype=rdflib.XSD.boolean)), + # auxiliary node connections + (rdflib.URIRef('http://example.com/me/tag#1234'), rdflib.URIRef(p_usedIn.uri), rdflib.URIRef('http://example.com/me/collection#1234')), + (rdflib.URIRef('http://example.com/me/tag#1234'), rdflib.URIRef(p_principal.uri), rdflib.URIRef('http://example.com/me/collection#1234')), + (rdflib.URIRef('http://example.com/me/collection#4321'), rdflib.URIRef(p_ctag.uri), rdflib.URIRef('http://example.com/me/tag#1234')), + (rdflib.URIRef('http://example.com/me/collection#4321'), rdflib.URIRef(p_ctag.uri), rdflib.URIRef('http://example.com/me/tag#4321')), + }) + + + # remove some classes from the schema + curr = _schema.Schema.from_string(''' + prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#> + prefix xsd: <http://www.w3.org/2001/XMLSchema#> + prefix bsfs: <http://bsfs.ai/schema/> + prefix bse: <http://bsfs.ai/schema/Entity#> + prefix bst: <http://bsfs.ai/schema/Tag#> + + bsfs:Entity rdfs:subClassOf bsfs:Node . + bsfs:Tag rdfs:subClassOf bsfs:Node . + bsfs:User rdfs:subClassOf bsfs:Node . + + xsd:boolean rdfs:subClassOf bsfs:Literal . + xsd:integer rdfs:subClassOf bsfs:Literal . + + bse:filesize rdfs:subClassOf bsfs:Predicate ; + rdfs:domain bsfs:Entity ; + rdfs:range xsd:integer ; + bsfs:unique "true"^^xsd:boolean . + + bse:tag rdfs:subClassOf bsfs:Predicate ; + rdfs:domain bsfs:Entity ; + rdfs:range bsfs:Tag ; + bsfs:unique "false"^^xsd:boolean . + + bse:shared rdfs:subClassOf bsfs:Predicate ; + rdfs:domain bsfs:Entity ; + rdfs:range xsd:boolean ; + bsfs:unique "true"^^xsd:boolean . + + bst:principal rdfs:subClassOf bsfs:Predicate ; + rdfs:domain bsfs:Tag ; + rdfs:range bsfs:Node ; + bsfs:unique "true"^^xsd:boolean . + + # removed: bsfs:Collection + # removed: xsd:string + # removed: bse:comment (bsfs:Entity -> xsd:string) + # removed: bse:partOf (bsfs:Entity -> bsfs:Collection) + # removed: bse:author (bsfs:entity -> bsfs:User) + # removed: bst:usedIn (bsfs:Tag -> bsfs:Collection) + # removed: bsc:tag (bsfs:Collection -> bsfs:Tag) + + ''') + # store migrated to the new schema + store.schema = curr + self.assertEqual(store.schema, curr) + # instances of old classes were removed + self.assertSetEqual(set(store._graph), { + # node instances + (rdflib.URIRef('http://example.com/me/entity#1234'), rdflib.RDF.type, rdflib.URIRef(ns.bsfs.Entity)), + (rdflib.URIRef('http://example.com/me/entity#4321'), rdflib.RDF.type, rdflib.URIRef(ns.bsfs.Entity)), + (rdflib.URIRef('http://example.com/me/tag#1234'), rdflib.RDF.type, rdflib.URIRef(ns.bsfs.Tag)), + (rdflib.URIRef('http://example.com/me/tag#4321'), rdflib.RDF.type, rdflib.URIRef(ns.bsfs.Tag)), + (rdflib.URIRef('http://example.com/me'), rdflib.RDF.type, rdflib.URIRef(ns.bsfs.User)), + # filesize + (rdflib.URIRef('http://example.com/me/entity#1234'), rdflib.URIRef(p_filesize.uri), rdflib.Literal('1234', datatype=rdflib.XSD.integer)), + (rdflib.URIRef('http://example.com/me/entity#4321'), rdflib.URIRef(p_filesize.uri), rdflib.Literal('1234', datatype=rdflib.XSD.integer)), + # tags + (rdflib.URIRef('http://example.com/me/entity#1234'), rdflib.URIRef(p_tag.uri), rdflib.URIRef('http://example.com/me/tag#1234')), + (rdflib.URIRef('http://example.com/me/entity#1234'), rdflib.URIRef(p_tag.uri), rdflib.URIRef('http://example.com/me/tag#4321')), + (rdflib.URIRef('http://example.com/me/entity#4321'), rdflib.URIRef(p_tag.uri), rdflib.URIRef('http://example.com/me/tag#1234')), + (rdflib.URIRef('http://example.com/me/entity#4321'), rdflib.URIRef(p_tag.uri), rdflib.URIRef('http://example.com/me/tag#4321')), + # shared + (rdflib.URIRef('http://example.com/me/entity#1234'), rdflib.URIRef(p_shared.uri), rdflib.Literal('true', datatype=rdflib.XSD.boolean)), + (rdflib.URIRef('http://example.com/me/entity#4321'), rdflib.URIRef(p_shared.uri), rdflib.Literal('true', datatype=rdflib.XSD.boolean)), + }) + + # can only assign schema instances + self.assertRaises(TypeError, setattr, store, 'schema', None) + self.assertRaises(TypeError, setattr, store, 'schema', 1234) + self.assertRaises(TypeError, setattr, store, 'schema', 'foo') + class Foo(): pass + self.assertRaises(TypeError, setattr, store, 'schema', Foo()) + + # cannot migrate to incompatible schema + invalid = _schema.Schema.from_string(''' + prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#> + prefix xsd: <http://www.w3.org/2001/XMLSchema#> + prefix bsfs: <http://bsfs.ai/schema/> + prefix bse: <http://bsfs.ai/schema/Entity#> + + bsfs:Entity rdfs:subClassOf bsfs:Node . + bsfs:Tag rdfs:subClassOf bsfs:Entity . # inconsistent with previous tag definition + + bse:tag rdfs:subClassOf bsfs:Predicate ; + rdfs:domain bsfs:Entity ; + rdfs:range bsfs:Tag ; + bsfs:unique "false"^^xsd:boolean . + + ''') + self.assertRaises(errors.ConsistencyError, setattr, store, 'schema', invalid) + invalid = _schema.Schema.from_string(''' + prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#> + prefix xsd: <http://www.w3.org/2001/XMLSchema#> + prefix bsfs: <http://bsfs.ai/schema/> + prefix bse: <http://bsfs.ai/schema/Entity#> + + bsfs:Entity rdfs:subClassOf bsfs:Node . + bsfs:User rdfs:subClassOf bsfs:Node . + + # inconsistent predicate + bse:tag rdfs:subClassOf bsfs:Predicate ; + rdfs:domain bsfs:Entity ; + rdfs:range bsfs:User; + bsfs:unique "false"^^xsd:boolean . + + ''') + self.assertRaises(errors.ConsistencyError, setattr, store, 'schema', invalid) + + + def test_transaction(self): + # store setup + store = SparqlStore.Open() + store.schema = self.schema + p_tag = store.schema.predicate(ns.bse.tag) + p_filesize = store.schema.predicate(ns.bse.filesize) + # prepare node types + ent_type = store.schema.node(ns.bsfs.Entity) + tag_type = store.schema.node(ns.bsfs.Tag) + ent_ids = {URI('http://example.com/me/entity#1234'), URI('http://example.com/me/entity#4321')} + tag_ids = {URI('http://example.com/me/tag#1234'), URI('http://example.com/me/tag#4321')} + # target instances + instances = { + # node instances + (rdflib.URIRef('http://example.com/me/entity#1234'), rdflib.RDF.type, rdflib.URIRef(ns.bsfs.Entity)), + (rdflib.URIRef('http://example.com/me/entity#4321'), rdflib.RDF.type, rdflib.URIRef(ns.bsfs.Entity)), + (rdflib.URIRef('http://example.com/me/tag#1234'), rdflib.RDF.type, rdflib.URIRef(ns.bsfs.Tag)), + (rdflib.URIRef('http://example.com/me/tag#4321'), rdflib.RDF.type, rdflib.URIRef(ns.bsfs.Tag)), + # links + (rdflib.URIRef('http://example.com/me/entity#1234'), rdflib.URIRef(p_tag.uri), rdflib.URIRef('http://example.com/me/tag#1234')), + (rdflib.URIRef('http://example.com/me/entity#1234'), rdflib.URIRef(p_tag.uri), rdflib.URIRef('http://example.com/me/tag#4321')), + (rdflib.URIRef('http://example.com/me/entity#4321'), rdflib.URIRef(p_tag.uri), rdflib.URIRef('http://example.com/me/tag#1234')), + (rdflib.URIRef('http://example.com/me/entity#4321'), rdflib.URIRef(p_tag.uri), rdflib.URIRef('http://example.com/me/tag#4321')), + } + + # add some data + store.create(ent_type, ent_ids) + store.create(tag_type, tag_ids) + store.set(ent_type, ent_ids, p_tag, tag_ids) + store.set(ent_type, ent_ids, p_filesize, {1234}) + # current transaction is visible + self.assertSetEqual(set(store._graph), instances | { + (rdflib.URIRef('http://example.com/me/entity#1234'), rdflib.URIRef(p_filesize.uri), rdflib.Literal(1234, datatype=rdflib.XSD.integer)), + (rdflib.URIRef('http://example.com/me/entity#4321'), rdflib.URIRef(p_filesize.uri), rdflib.Literal(1234, datatype=rdflib.XSD.integer)), + }) + + # rollback undoes previous changes + store.rollback() + self.assertSetEqual(set(store._graph), set()) + + # add some data once more + store.create(ent_type, ent_ids) + store.create(tag_type, tag_ids) + store.set(ent_type, ent_ids, p_tag, tag_ids) + store.set(ent_type, ent_ids, p_filesize, {1234}) + # current transaction is visible + self.assertSetEqual(set(store._graph), instances | { + (rdflib.URIRef('http://example.com/me/entity#1234'), rdflib.URIRef(p_filesize.uri), rdflib.Literal(1234, datatype=rdflib.XSD.integer)), + (rdflib.URIRef('http://example.com/me/entity#4321'), rdflib.URIRef(p_filesize.uri), rdflib.Literal(1234, datatype=rdflib.XSD.integer)), + }) + + # commit saves changes + store.commit() + self.assertSetEqual(set(store._graph), instances | { + (rdflib.URIRef('http://example.com/me/entity#1234'), rdflib.URIRef(p_filesize.uri), rdflib.Literal(1234, datatype=rdflib.XSD.integer)), + (rdflib.URIRef('http://example.com/me/entity#4321'), rdflib.URIRef(p_filesize.uri), rdflib.Literal(1234, datatype=rdflib.XSD.integer)), + }) + + # add additional data + store.create(ent_type, {URI('http://example.com/me/entity#hello')}) + store.set(ent_type, {URI('http://example.com/me/entity#hello')}, p_tag, tag_ids) + store.set(ent_type, ent_ids, p_filesize, {4321}) + self.assertSetEqual(set(store._graph), instances | { + (rdflib.URIRef('http://example.com/me/entity#hello'), rdflib.RDF.type, rdflib.URIRef(ns.bsfs.Entity)), + (rdflib.URIRef('http://example.com/me/entity#hello'), rdflib.URIRef(p_tag.uri), rdflib.URIRef('http://example.com/me/tag#1234')), + (rdflib.URIRef('http://example.com/me/entity#hello'), rdflib.URIRef(p_tag.uri), rdflib.URIRef('http://example.com/me/tag#4321')), + (rdflib.URIRef('http://example.com/me/entity#1234'), rdflib.URIRef(p_filesize.uri), rdflib.Literal(4321, datatype=rdflib.XSD.integer)), + (rdflib.URIRef('http://example.com/me/entity#4321'), rdflib.URIRef(p_filesize.uri), rdflib.Literal(4321, datatype=rdflib.XSD.integer)), + }) + + # rollback undoes only changes since last commit + store.rollback() + self.assertSetEqual(set(store._graph), instances | { + (rdflib.URIRef('http://example.com/me/entity#1234'), rdflib.URIRef(p_filesize.uri), rdflib.Literal(1234, datatype=rdflib.XSD.integer)), + (rdflib.URIRef('http://example.com/me/entity#4321'), rdflib.URIRef(p_filesize.uri), rdflib.Literal(1234, datatype=rdflib.XSD.integer)), + }) + + + def test_exists(self): + # store setup + store = SparqlStore.Open() + store.schema = self.schema + # prepare node types + ent_type = store.schema.node(ns.bsfs.Entity) + tag_type = store.schema.node(ns.bsfs.Tag) + # create node instances + ent_ids = { + URI('http://example.com/me/entity#1234'), + URI('http://example.com/me/entity#4321'), + } + tag_ids = { + URI('http://example.com/me/tag#1234'), + URI('http://example.com/me/tag#4321'), + } + store.create(ent_type, ent_ids) + store.create(tag_type, tag_ids) + + # exists returns all existing nodes of the correct type + self.assertSetEqual(ent_ids, set(store.exists(ent_type, ent_ids))) + self.assertSetEqual(tag_ids, set(store.exists(tag_type, tag_ids))) + # exists returns only nodes that match the type + self.assertSetEqual(set(), set(store.exists(ent_type, tag_ids))) + self.assertSetEqual({URI('http://example.com/me/entity#1234')}, set(store.exists(ent_type, { + URI('http://example.com/me/tag#1234'), + URI('http://example.com/me/entity#1234'), + }))) + # exists returns only nodes that exist + self.assertSetEqual(set(), set(store.exists(ent_type, { + URI('http://example.com/me/entity#foo'), + URI('http://example.com/me/entity#bar'), + }))) + self.assertSetEqual({URI('http://example.com/me/entity#1234')}, set(store.exists(ent_type, { + URI('http://example.com/me/entity#foo'), + URI('http://example.com/me/entity#1234'), + }))) + + + def test_create(self): + # setup + store = SparqlStore.Open() + store.schema = self.schema + + # node type must be valid + self.assertRaises(errors.ConsistencyError, store.create, self.schema.node(ns.bsfs.Entity).get_child(ns.bsfs.invalid), { + URI('http://example.com/me/entity#1234'), URI('http://example.com/me/entity#4321')}) + + # can create some nodes + ent_type = store.schema.node(ns.bsfs.Entity) + store.create(ent_type, {URI('http://example.com/me/entity#1234'), URI('http://example.com/me/entity#4321')}) + self.assertSetEqual(set(store._graph), { + (rdflib.URIRef('http://example.com/me/entity#1234'), rdflib.RDF.type, rdflib.URIRef(ns.bsfs.Entity)), + (rdflib.URIRef('http://example.com/me/entity#4321'), rdflib.RDF.type, rdflib.URIRef(ns.bsfs.Entity)), + }) + + # existing nodes are skipped + store.create(ent_type, {URI('http://example.com/me/entity#1234'), URI('http://example.com/me/entity#5678')}) + self.assertSetEqual(set(store._graph), { + # previous triples + (rdflib.URIRef('http://example.com/me/entity#1234'), rdflib.RDF.type, rdflib.URIRef(ns.bsfs.Entity)), + (rdflib.URIRef('http://example.com/me/entity#4321'), rdflib.RDF.type, rdflib.URIRef(ns.bsfs.Entity)), + # new triples + (rdflib.URIRef('http://example.com/me/entity#5678'), rdflib.RDF.type, rdflib.URIRef(ns.bsfs.Entity)), + }) + + # can create nodes of a different type + tag_type = store.schema.node(ns.bsfs.Tag) + store.create(tag_type, {URI('http://example.com/me/tag#1234'), URI('http://example.com/me/tag#4321')}) + self.assertSetEqual(set(store._graph), { + # previous triples + (rdflib.URIRef('http://example.com/me/entity#1234'), rdflib.RDF.type, rdflib.URIRef(ns.bsfs.Entity)), + (rdflib.URIRef('http://example.com/me/entity#4321'), rdflib.RDF.type, rdflib.URIRef(ns.bsfs.Entity)), + (rdflib.URIRef('http://example.com/me/entity#5678'), rdflib.RDF.type, rdflib.URIRef(ns.bsfs.Entity)), + # new triples + (rdflib.URIRef('http://example.com/me/tag#1234'), rdflib.RDF.type, rdflib.URIRef(ns.bsfs.Tag)), + (rdflib.URIRef('http://example.com/me/tag#4321'), rdflib.RDF.type, rdflib.URIRef(ns.bsfs.Tag)), + }) + + # creation does not change types of existing nodes + tag_type = store.schema.node(ns.bsfs.Tag) + store.create(tag_type, {URI('http://example.com/me/entity#1234'), URI('http://example.com/me/entity#4321')}) + self.assertSetEqual(set(store._graph), { + # previous triples + (rdflib.URIRef('http://example.com/me/entity#1234'), rdflib.RDF.type, rdflib.URIRef(ns.bsfs.Entity)), + (rdflib.URIRef('http://example.com/me/entity#4321'), rdflib.RDF.type, rdflib.URIRef(ns.bsfs.Entity)), + (rdflib.URIRef('http://example.com/me/entity#5678'), rdflib.RDF.type, rdflib.URIRef(ns.bsfs.Entity)), + # new triples + (rdflib.URIRef('http://example.com/me/tag#1234'), rdflib.RDF.type, rdflib.URIRef(ns.bsfs.Tag)), + (rdflib.URIRef('http://example.com/me/tag#4321'), rdflib.RDF.type, rdflib.URIRef(ns.bsfs.Tag)), + }) + + + def test_set(self): + # store setup + store = SparqlStore.Open() + store.schema = self.schema + # prepare node types + ent_type = store.schema.node(ns.bsfs.Entity) + user_type = store.schema.node(ns.bsfs.User) + tag_type = store.schema.node(ns.bsfs.Tag) + # prepare predicates + p_filesize = store.schema.predicate(ns.bse.filesize) + p_comment = store.schema.predicate(ns.bse.comment) + p_author = store.schema.predicate(ns.bse.author) + p_tag = store.schema.predicate(ns.bse.tag) + p_invalid = store.schema.predicate(ns.bsfs.Predicate).get_child(ns.bsfs.foo, range=store.schema.node(ns.bsfs.Tag)) + # create node instances + ent_ids = { + URI('http://example.com/me/entity#1234'), + URI('http://example.com/me/entity#4321'), + } + tag_ids = { + URI('http://example.com/me/tag#1234'), + URI('http://example.com/me/tag#4321'), + URI('http://example.com/me/tag#foo'), + URI('http://example.com/me/tag#bar'), + URI('http://example.com/me/tag#foobar'), + URI('http://example.com/me/tag#xyz'), + } + user_ids = { + URI('http://example.com/me/user#1234'), + URI('http://example.com/me/user#4321'), + } + store.create(ent_type, ent_ids) + store.create(tag_type, tag_ids) + store.create(user_type, user_ids) + + # invalid node_type is not permitted + self.assertRaises(errors.ConsistencyError, store.set, self.schema.node(ns.bsfs.Node).get_child(ns.bse.foo), + ent_ids, p_comment, {'hello world'}) + + # invalid predicate is not permitted + self.assertRaises(errors.ConsistencyError, store.set, ent_type, ent_ids, p_invalid, {'http://example.com/me/tag#1234'}) + + # predicate must match node_type + self.assertRaises(errors.ConsistencyError, store.set, tag_type, tag_ids, p_filesize, {1234}) + + # empty value does not change the graph + plen = len(store._graph) + store.set(ent_type, ent_ids, p_filesize, []) + store.set(ent_type, ent_ids, p_comment, []) + store.set(ent_type, ent_ids, p_author, []) + store.set(ent_type, ent_ids, p_tag, []) + self.assertEqual(plen, len(store._graph)) + + # cannot set multiple values on unique predicates + self.assertRaises(ValueError, store.set, ent_type, ent_ids, p_filesize, {1234, 4321}) + self.assertRaises(ValueError, store.set, ent_type, ent_ids, p_author, {URI('http://example.com/me/user#1234'), URI('http://example.com/me/user#4321')}) + + # value nodes must exist + self.assertRaises(errors.InstanceError, store.set, ent_type, ent_ids, p_author, {URI('http://example.com/me/user#invalid')}) + self.assertRaises(errors.InstanceError, store.set, ent_type, ent_ids, p_tag, {URI('http://example.com/me/tag#invalid')}) + + # value node types must be consistent with the predicate + self.assertRaises(errors.InstanceError, store.set, ent_type, ent_ids, p_author, {URI('http://example.com/me/entity#1234')}) + self.assertRaises(errors.InstanceError, store.set, ent_type, ent_ids, p_tag, {URI('http://example.com/me/entity#1234')}) + + # all value nodes must exist and be consistent + self.assertRaises(errors.InstanceError, store.set, ent_type, ent_ids, p_tag, { + URI('http://example.com/me/tag#1234'), URI('http://example.com/me/tag#invalid'), URI('http://example.com/me/entity#1234')}) + + + # set unique literal + store.set(ent_type, ent_ids, p_filesize, {1234}) + self.assertIn( + (rdflib.URIRef('http://example.com/me/entity#1234'), rdflib.URIRef(p_filesize.uri), rdflib.Literal('1234', datatype=rdflib.XSD.integer)), + set(store._graph)) + self.assertIn( + (rdflib.URIRef('http://example.com/me/entity#4321'), rdflib.URIRef(p_filesize.uri), rdflib.Literal('1234', datatype=rdflib.XSD.integer)), + set(store._graph)) + # re-assigning the same node changes nothing + store.set(ent_type, ent_ids, p_filesize, {1234}) + self.assertIn( + (rdflib.URIRef('http://example.com/me/entity#1234'), rdflib.URIRef(p_filesize.uri), rdflib.Literal('1234', datatype=rdflib.XSD.integer)), + set(store._graph)) + self.assertIn( + (rdflib.URIRef('http://example.com/me/entity#4321'), rdflib.URIRef(p_filesize.uri), rdflib.Literal('1234', datatype=rdflib.XSD.integer)), + set(store._graph)) + # cannot set multiple unique literals + self.assertRaises(ValueError, store.set, ent_type, ent_ids, p_filesize, {1234, 4321}) # same test as above + # unique literals are overwritten by set + store.set(ent_type, ent_ids, p_filesize, {4321}) + self.assertIn( + (rdflib.URIRef('http://example.com/me/entity#1234'), rdflib.URIRef(p_filesize.uri), rdflib.Literal('4321', datatype=rdflib.XSD.integer)), + set(store._graph)) + self.assertNotIn( + (rdflib.URIRef('http://example.com/me/entity#1234'), rdflib.URIRef(p_filesize.uri), rdflib.Literal('1234', datatype=rdflib.XSD.integer)), + set(store._graph)) + self.assertIn( + (rdflib.URIRef('http://example.com/me/entity#4321'), rdflib.URIRef(p_filesize.uri), rdflib.Literal('4321', datatype=rdflib.XSD.integer)), + set(store._graph)) + self.assertNotIn( + (rdflib.URIRef('http://example.com/me/entity#4321'), rdflib.URIRef(p_filesize.uri), rdflib.Literal('1234', datatype=rdflib.XSD.integer)), + set(store._graph)) + + # set non-unique literal + store.set(ent_type, ent_ids, p_comment, {'foobar'}) + self.assertTrue(set(store._graph).issuperset({ + (rdflib.URIRef('http://example.com/me/entity#1234'), rdflib.URIRef(p_comment.uri), rdflib.Literal('foobar', datatype=rdflib.XSD.string)), + (rdflib.URIRef('http://example.com/me/entity#4321'), rdflib.URIRef(p_comment.uri), rdflib.Literal('foobar', datatype=rdflib.XSD.string)), + })) + # re-assigning the same node changes nothing + store.set(ent_type, ent_ids, p_comment, {'foobar'}) + self.assertTrue(set(store._graph).issuperset({ + (rdflib.URIRef('http://example.com/me/entity#1234'), rdflib.URIRef(p_comment.uri), rdflib.Literal('foobar', datatype=rdflib.XSD.string)), + (rdflib.URIRef('http://example.com/me/entity#4321'), rdflib.URIRef(p_comment.uri), rdflib.Literal('foobar', datatype=rdflib.XSD.string)), + })) + # can set multiple non-unique literals at once + store.set(ent_type, ent_ids, p_comment, {'foo', 'bar'}) + self.assertTrue(set(store._graph).issuperset({ + (rdflib.URIRef('http://example.com/me/entity#1234'), rdflib.URIRef(p_comment.uri), rdflib.Literal('foo', datatype=rdflib.XSD.string)), + (rdflib.URIRef('http://example.com/me/entity#1234'), rdflib.URIRef(p_comment.uri), rdflib.Literal('bar', datatype=rdflib.XSD.string)), + (rdflib.URIRef('http://example.com/me/entity#4321'), rdflib.URIRef(p_comment.uri), rdflib.Literal('foo', datatype=rdflib.XSD.string)), + (rdflib.URIRef('http://example.com/me/entity#4321'), rdflib.URIRef(p_comment.uri), rdflib.Literal('bar', datatype=rdflib.XSD.string)), + })) + # non-unique literals are appended by set + store.set(ent_type, ent_ids, p_comment, {'hello world'}) + self.assertTrue(set(store._graph).issuperset({ + (rdflib.URIRef('http://example.com/me/entity#1234'), rdflib.URIRef(p_comment.uri), rdflib.Literal('foo', datatype=rdflib.XSD.string)), + (rdflib.URIRef('http://example.com/me/entity#1234'), rdflib.URIRef(p_comment.uri), rdflib.Literal('bar', datatype=rdflib.XSD.string)), + (rdflib.URIRef('http://example.com/me/entity#1234'), rdflib.URIRef(p_comment.uri), rdflib.Literal('hello world', datatype=rdflib.XSD.string)), + (rdflib.URIRef('http://example.com/me/entity#4321'), rdflib.URIRef(p_comment.uri), rdflib.Literal('foo', datatype=rdflib.XSD.string)), + (rdflib.URIRef('http://example.com/me/entity#4321'), rdflib.URIRef(p_comment.uri), rdflib.Literal('bar', datatype=rdflib.XSD.string)), + (rdflib.URIRef('http://example.com/me/entity#4321'), rdflib.URIRef(p_comment.uri), rdflib.Literal('hello world', datatype=rdflib.XSD.string)), + })) + + # set unique node + store.set(ent_type, ent_ids, p_author, {URI('http://example.com/me/user#1234')}) + self.assertIn( + (rdflib.URIRef('http://example.com/me/entity#1234'), rdflib.URIRef(p_author.uri), rdflib.URIRef('http://example.com/me/user#1234')), + set(store._graph)) + self.assertIn( + (rdflib.URIRef('http://example.com/me/entity#4321'), rdflib.URIRef(p_author.uri), rdflib.URIRef('http://example.com/me/user#1234')), + set(store._graph)) + # re-assigning the same node changes nothing + store.set(ent_type, ent_ids, p_author, {URI('http://example.com/me/user#1234')}) + self.assertIn( + (rdflib.URIRef('http://example.com/me/entity#1234'), rdflib.URIRef(p_author.uri), rdflib.URIRef('http://example.com/me/user#1234')), + set(store._graph)) + self.assertIn( + (rdflib.URIRef('http://example.com/me/entity#4321'), rdflib.URIRef(p_author.uri), rdflib.URIRef('http://example.com/me/user#1234')), + set(store._graph)) + # cannot set multiple unique nodes + self.assertRaises(ValueError, store.set, ent_type, ent_ids, p_author, {URI('http://example.com/me/user#1234'), URI('http://example.com/me/user#4321')}) + # unique nodes are overwritten by set + store.set(ent_type, ent_ids, p_author, {URI('http://example.com/me/user#4321')}) + self.assertIn( + (rdflib.URIRef('http://example.com/me/entity#1234'), rdflib.URIRef(p_author.uri), rdflib.URIRef('http://example.com/me/user#4321')), + set(store._graph)) + self.assertNotIn( + (rdflib.URIRef('http://example.com/me/entity#1234'), rdflib.URIRef(p_author.uri), rdflib.URIRef('http://example.com/me/user#1234')), + set(store._graph)) + self.assertIn( + (rdflib.URIRef('http://example.com/me/entity#4321'), rdflib.URIRef(p_author.uri), rdflib.URIRef('http://example.com/me/user#4321')), + set(store._graph)) + self.assertNotIn( + (rdflib.URIRef('http://example.com/me/entity#4321'), rdflib.URIRef(p_author.uri), rdflib.URIRef('http://example.com/me/user#1234')), + set(store._graph)) + + # set non-unique node + store.set(ent_type, ent_ids, p_tag, {'http://example.com/me/tag#foobar'}) + self.assertTrue(set(store._graph).issuperset({ + (rdflib.URIRef('http://example.com/me/entity#1234'), rdflib.URIRef(p_tag.uri), rdflib.URIRef('http://example.com/me/tag#foobar')), + (rdflib.URIRef('http://example.com/me/entity#4321'), rdflib.URIRef(p_tag.uri), rdflib.URIRef('http://example.com/me/tag#foobar')), + })) + # re-assigning the same node changes nothing + store.set(ent_type, ent_ids, p_tag, {'http://example.com/me/tag#foobar'}) + self.assertTrue(set(store._graph).issuperset({ + (rdflib.URIRef('http://example.com/me/entity#1234'), rdflib.URIRef(p_tag.uri), rdflib.URIRef('http://example.com/me/tag#foobar')), + (rdflib.URIRef('http://example.com/me/entity#4321'), rdflib.URIRef(p_tag.uri), rdflib.URIRef('http://example.com/me/tag#foobar')), + })) + # can set multiple non-unique literals at once + store.set(ent_type, ent_ids, p_tag, {'http://example.com/me/tag#1234', 'http://example.com/me/tag#4321'}) + self.assertTrue(set(store._graph).issuperset({ + (rdflib.URIRef('http://example.com/me/entity#1234'), rdflib.URIRef(p_tag.uri), rdflib.URIRef('http://example.com/me/tag#1234')), + (rdflib.URIRef('http://example.com/me/entity#1234'), rdflib.URIRef(p_tag.uri), rdflib.URIRef('http://example.com/me/tag#4321')), + (rdflib.URIRef('http://example.com/me/entity#4321'), rdflib.URIRef(p_tag.uri), rdflib.URIRef('http://example.com/me/tag#1234')), + (rdflib.URIRef('http://example.com/me/entity#4321'), rdflib.URIRef(p_tag.uri), rdflib.URIRef('http://example.com/me/tag#4321')), + })) + # non-unique nodes are appended by set + store.set(ent_type, ent_ids, p_tag, {'http://example.com/me/tag#foo', 'http://example.com/me/tag#bar'}) + self.assertTrue(set(store._graph).issuperset({ + (rdflib.URIRef('http://example.com/me/entity#1234'), rdflib.URIRef(p_tag.uri), rdflib.URIRef('http://example.com/me/tag#1234')), + (rdflib.URIRef('http://example.com/me/entity#1234'), rdflib.URIRef(p_tag.uri), rdflib.URIRef('http://example.com/me/tag#4321')), + (rdflib.URIRef('http://example.com/me/entity#1234'), rdflib.URIRef(p_tag.uri), rdflib.URIRef('http://example.com/me/tag#foo')), + (rdflib.URIRef('http://example.com/me/entity#1234'), rdflib.URIRef(p_tag.uri), rdflib.URIRef('http://example.com/me/tag#bar')), + (rdflib.URIRef('http://example.com/me/entity#4321'), rdflib.URIRef(p_tag.uri), rdflib.URIRef('http://example.com/me/tag#1234')), + (rdflib.URIRef('http://example.com/me/entity#4321'), rdflib.URIRef(p_tag.uri), rdflib.URIRef('http://example.com/me/tag#4321')), + (rdflib.URIRef('http://example.com/me/entity#4321'), rdflib.URIRef(p_tag.uri), rdflib.URIRef('http://example.com/me/tag#foo')), + (rdflib.URIRef('http://example.com/me/entity#4321'), rdflib.URIRef(p_tag.uri), rdflib.URIRef('http://example.com/me/tag#bar')), + })) + + # nothing happens when no guids are given + plen = len(store._graph) + store.set(ent_type, set(), p_comment, {'xyz'}) + store.set(ent_type, set(), p_tag, {URI('http://example.com/me/tag#xyz')}) + self.assertEqual(plen, len(store._graph)) + + # guids must be instances of node_type + self.assertRaises(errors.InstanceError, store.set, ent_type, tag_ids, p_comment, {'xyz'}) + # inexistent guids + self.assertRaises(errors.InstanceError, store.set, ent_type, {URI('http://example.com/me/entity#foobar')}, p_comment, {'xyz'}) + + +## main ## + +if __name__ == '__main__': + unittest.main() + +## EOF ## diff --git a/test/utils/__init__.py b/test/utils/__init__.py new file mode 100644 index 0000000..e69de29 --- /dev/null +++ b/test/utils/__init__.py diff --git a/test/utils/test_commons.py b/test/utils/test_commons.py new file mode 100644 index 0000000..ce73788 --- /dev/null +++ b/test/utils/test_commons.py @@ -0,0 +1,31 @@ +""" + +Part of the tagit test suite. +A copy of the license is provided with the project. +Author: Matthias Baumgartner, 2022 +""" +# imports +import unittest + +# objects to test +from bsfs.utils.commons import typename + + +## code ## + +class TestCommons(unittest.TestCase): + def test_typename(self): + class Foo(): pass + self.assertEqual(typename(Foo()), 'Foo') + self.assertEqual(typename('hello'), 'str') + self.assertEqual(typename(123), 'int') + self.assertEqual(typename(None), 'NoneType') + + + +## main ## + +if __name__ == '__main__': + unittest.main() + +## EOF ## diff --git a/test/utils/test_uri.py b/test/utils/test_uri.py new file mode 100644 index 0000000..770e65a --- /dev/null +++ b/test/utils/test_uri.py @@ -0,0 +1,189 @@ +""" + +Part of the tagit test suite. +A copy of the license is provided with the project. +Author: Matthias Baumgartner, 2022 +""" +# imports +import operator +import unittest + +# objects to test +from bsfs.utils.uri import URI + + +## code ## + +class TestURI(unittest.TestCase): + + def test_new(self): + # cannot create an unparseable URI + self.assertRaises(ValueError, URI, 'http://') + # returns URI otherwise + self.assertIsInstance(URI('http://user@www.example.com:1234/path0/path1?query#fragment'), URI) + + def test_is_parseable(self): + # empty string is a parseable uri + self.assertTrue(URI.is_parseable('')) + # examples from the RFC are parseable + self.assertTrue(URI.is_parseable('foo://example.com:8042/over/there?name=ferret#nose')) + self.assertTrue(URI.is_parseable('urn:example:animal:ferret:nose')) + self.assertTrue(URI.is_parseable('mailto:fred@xample.com')) + self.assertTrue(URI.is_parseable('www.w3.org/Addressing/')) + self.assertTrue(URI.is_parseable('ftp://cnn.example.com&store=breaking_news@10.0.0.1/top_story.htm')) + self.assertTrue(URI.is_parseable('ftp://ftp.is.co.za/rfc/rfc1808.txt')) + self.assertTrue(URI.is_parseable('http://www.ietf.org/rfc/rfc2396.txt')) + self.assertTrue(URI.is_parseable('ldap://[2001:db8::7]/c=GB?objectClass?one')) + self.assertTrue(URI.is_parseable('mailto:John.Doe@example.com')) + self.assertTrue(URI.is_parseable('news:comp.infosystems.www.servers.unix')) + self.assertTrue(URI.is_parseable('tel:+1-816-555-1212')) + self.assertTrue(URI.is_parseable('telnet://192.0.2.16:80/')) + self.assertTrue(URI.is_parseable('urn:oasis:names:specification:docbook:dtd:xml:4.1.2')) + + # uri cannot end with a scheme delimiter + self.assertFalse(URI.is_parseable('http://')) + # port must be a number + self.assertFalse(URI.is_parseable('http://example.com:foo/')) + # the double slash (//) implies a authority + self.assertFalse(URI.is_parseable('http:///path0/path1?query#fragment')) + + def test_compose(self): + self.assertEqual(URI.compose('path'), '/path') + self.assertEqual(URI.compose('/path'), '/path') # leading slash is not repeated + self.assertEqual(URI.compose('path', scheme='scheme'), 'scheme:/path') + self.assertEqual(URI.compose('path', authority='authority'), '//authority/path') + self.assertEqual(URI.compose('path', host='host'), '//host/path') + self.assertEqual(URI.compose('path', user='user'), '/path') # user w/o host is ignored + self.assertEqual(URI.compose('path', host='host', user='user'), '//user@host/path') + self.assertEqual(URI.compose('path', port='port'), '/path') # port w/o host is ignored + self.assertEqual(URI.compose('path', host='host', port=1234), '//host:1234/path') + self.assertEqual(URI.compose('path', host='host', port='1234'), '//host:1234/path') + self.assertRaises(ValueError, URI.compose, 'path', host='host', port='foo') # port must be a number + self.assertEqual(URI.compose('path', host='host', user='foo', port='1234'), '//foo@host:1234/path') + self.assertEqual(URI.compose('path', query='query'), '/path?query') + self.assertEqual(URI.compose('path', fragment='fragment'), '/path#fragment') + self.assertEqual(URI.compose('path', 'scheme', 'authority', 'user', 'host', 1234, 'query', 'fragment'), + 'scheme://user@host:1234/path?query#fragment') + + def test_get(self): + # get returns the respective component + self.assertEqual(URI('http://user@www.example.com:1234/path0/path1?query#fragment').get('scheme'), 'http') + self.assertEqual(URI('http://user@www.example.com:1234/path0/path1?query#fragment').get('authority'), 'user@www.example.com:1234') + self.assertEqual(URI('http://user@www.example.com:1234/path0/path1?query#fragment').get('userinfo'), 'user') + self.assertEqual(URI('http://user@www.example.com:1234/path0/path1?query#fragment').get('host'), 'www.example.com') + self.assertEqual(URI('http://user@www.example.com:1234/path0/path1?query#fragment').get('port'), 1234) + self.assertEqual(URI('http://user@www.example.com:1234/path0/path1?query#fragment').get('path'), '/path0/path1') + self.assertEqual(URI('http://user@www.example.com:1234/path0/path1?query#fragment').get('query'), 'query') + self.assertEqual(URI('http://user@www.example.com:1234/path0/path1?query#fragment').get('fragment'), 'fragment') + # get returns a default value if the component is missing + class Foo(): pass + foo = Foo() + self.assertEqual(URI('//user@www.example.com:1234/path0/path1?query#fragment').get('scheme', foo), foo) + self.assertEqual(URI('/path0/path1?query#fragment').get('authority', foo), foo) + self.assertEqual(URI('http://www.example.com:1234/path0/path1?query#fragment').get('userinfo', foo), foo) + self.assertEqual(URI('/path0/path1?query#fragment').get('host', foo), foo) + self.assertEqual(URI('http://user@www.example.com/path0/path1?query#fragment').get('port', foo), foo) + self.assertEqual(URI('http://user@www.example.com:1234/path0/path1#fragment').get('query', foo), foo) + self.assertEqual(URI('http://user@www.example.com:1234/path0/path1?query').get('fragment', foo), foo) + # can only get components + self.assertRaises(ValueError, URI('').get, 'foobar') + + def test_scheme(self): + # full URI + self.assertEqual(URI('http://user@www.example.com:1234/path0/path1?query#fragment').scheme, 'http') + self.assertEqual(URI('ftp://user@www.example.com:1234/path0/path1?query#fragment').scheme, 'ftp') + self.assertEqual(URI('myown://user@www.example.com:1234/path0/path1?query#fragment').scheme, 'myown') + # empty scheme + self.assertRaises(ValueError, getattr, URI('www.example.com/path0/path1?query#fragment'), 'scheme') + # empty URI + self.assertRaises(ValueError, getattr, URI(''), 'scheme') + + def test_authority(self): + # full URI + self.assertEqual(URI('http://user@www.example.com:1234/path0/path1?query#fragment').authority, 'user@www.example.com:1234') + # empty authority + self.assertRaises(ValueError, getattr, URI('http/path0/path1?query#fragment'), 'authority') + # empty URI + self.assertRaises(ValueError, getattr, URI(''), 'authority') + + def test_userinfo(self): + # full URI + self.assertEqual(URI('http://user@www.example.com:1234/path0/path1?query#fragment').scheme, 'http') + # empty authority + self.assertRaises(ValueError, getattr, URI('http/path0/path1?query#fragment'), 'userinfo') + # empty userinfo + self.assertRaises(ValueError, getattr, URI('http://www.example.com:1234/path0/path1?query#fragment'), 'userinfo') + # empty URI + self.assertRaises(ValueError, getattr, URI(''), 'userinfo') + + def test_host(self): + # full URI + self.assertEqual(URI('http://user@www.example.com:1234/path0/path1?query#fragment').host, 'www.example.com') + # IPv4 host + self.assertEqual(URI('http://user@10.0.0.1:1234/path0/path1?query#fragment').host, '10.0.0.1') + # IPv6 host + self.assertEqual(URI('http://user@[::64]:1234/path0/path1?query#fragment').host, '[::64]') + # empty authority + self.assertRaises(ValueError, getattr, URI('http/path0/path1?query#fragment'), 'host') + # empty URI + self.assertRaises(ValueError, getattr, URI(''), 'host') + + def test_port(self): + # full URI + self.assertEqual(URI('http://user@www.example.com:1234/path0/path1?query#fragment').port, 1234) + # empty authority + self.assertRaises(ValueError, getattr, URI('http/path0/path1?query#fragment'), 'port') + # empty port + self.assertRaises(ValueError, getattr, URI('http://user@www.example.com/path0/path1?query#fragment'), 'port') + # empty URI + self.assertRaises(ValueError, getattr, URI(''), 'port') + + def test_path(self): + # full URI + self.assertEqual(URI('http://user@www.example.com:1234/path0/path1?query#fragment').path, '/path0/path1') + # empty path + self.assertEqual(URI('http://user@www.example.com:1234?query#fragment').path, '') + # empty URI + self.assertEqual(URI('').path, '') + + def test_query(self): + # full URI + self.assertEqual(URI('http://user@www.example.com:1234/path0/path1?query#fragment').query, 'query') + # empty query + self.assertRaises(ValueError, getattr, URI('http://user@www.example.com:1234/path0/path1#fragment'), 'query') + # empty URI + self.assertRaises(ValueError, getattr, URI(''), 'query') + + def test_fragment(self): + # full URI + self.assertEqual(URI('http://user@www.example.com:1234/path0/path1?query#fragment').fragment, 'fragment') + # empty fragment + self.assertRaises(ValueError, getattr, URI('http://user@www.example.com:1234/path0/path1?query'), 'fragment') + # empty URI + self.assertRaises(ValueError, getattr, URI(''), 'fragment') + + def test_overloaded(self): + # composition + self.assertIsInstance(URI('http://user@www.example.com:1234/{}/path1?{}#fragment') + 'hello', URI) + self.assertIsInstance(URI('http://user@www.example.com:1234/{}/path1?{}#fragment') * 2, URI) + self.assertIsInstance(2 * URI('http://user@www.example.com:1234/{}/path1?{}#fragment'), URI) # rmul + self.assertIsInstance(URI('http://user@www.example.com:1234/{}/path1?{}#fragment').join(['hello', 'world']) , URI) + # stripping + self.assertIsInstance(URI('http://user@www.example.com:1234/path0/path1?query#fragment').strip(), URI) + self.assertIsInstance(URI('http://user@www.example.com:1234/path0/path1?query#fragment').lstrip(), URI) + self.assertIsInstance(URI('http://user@www.example.com:1234/path0/path1?query#fragment').rstrip(), URI) + # case fold + self.assertIsInstance(URI('http://user@www.example.com:1234/path0/path1?query#fragment').lower(), URI) + self.assertIsInstance(URI('http://user@www.example.com:1234/path0/path1?query#fragment').upper(), URI) + # formatting + self.assertIsInstance(URI('http://user@www.example.com:1234/{}/path1?{}#fragment').format('hello', 'world'), URI) + self.assertIsInstance(URI('http://user@www.example.com:1234/%s/path1?%s#fragment') % ('hello', 'world'), URI) + self.assertIsInstance(URI('http://user@www.example.com:1234/path0/path1?query#fragment').replace('path0', 'pathX'), URI) + + +## main ## + +if __name__ == '__main__': + unittest.main() + +## EOF ## diff --git a/test/utils/test_uuid.py b/test/utils/test_uuid.py new file mode 100644 index 0000000..49176d4 --- /dev/null +++ b/test/utils/test_uuid.py @@ -0,0 +1,92 @@ +""" + +Part of the tagit test suite. +A copy of the license is provided with the project. +Author: Matthias Baumgartner, 2022 +""" +# imports +import os +import re +import unittest + +# objects to test +from bsfs.utils.uuid import UUID, UCID + + +## code ## + +class TestUUID(unittest.TestCase): + """Test the UUID generator. + + The UUID is expected to generate random strings of 64 characters(0-9, A-F, case insensitive). + Due to the random nature of UUIDs, we cannot actually check if an uid is 'valid' besides + matching the expected format. + + At best, we can check if the number of collisions (values generated repeatedly) is below some + threshold. One would expect the number of collisions to increase with the number of generated uids. + Hence, we only perform an empirical test, whereas the exact test parameters (NUM_SAMPLES, + COLLISIONS_THRESHOLD) are subject to the application requirements. Note that this simple test + cannot replace a thorough statistical analysis. + + """ + + # expected uuid string format + _RX_FORMAT = re.compile('[0-9A-Fa-f]{64}') + + # number of uuids to generate for collisions test + _NUM_SAMPLES = 100_000 + + # number of permitted collisions (less-than test; exclusive) + _COLLISIONS_THRESHOLD = 2 # zero or one collisions to pass the test + + def _test_format(self, uid): + self.assertIsInstance(uid, str) + self.assertTrue(self._RX_FORMAT.fullmatch(uid) is not None) + + def test_call(self): + gen = UUID() + # w/o content + self._test_format(gen()) + # with content + self._test_format(gen('hello world')) + + def test_iter(self): + for _, uid in zip(range(1_000), iter(UUID())): + self._test_format(uid) + + def test_next(self): + gen = UUID() + for _ in range(1_000): + uid = next(gen) + self._test_format(uid) + + def test_collisions(self): + # generated uuids are reasonably unique. + # Note that we cannot guarantee no collisions. + uids = {uid for _, uid in zip(range(self._NUM_SAMPLES), UUID())} + self.assertGreater(len(uids), self._NUM_SAMPLES - self._COLLISIONS_THRESHOLD) + # uuids are reasonably unique across instances + uidA = {uid for _, uid in zip(range(self._NUM_SAMPLES), UUID())} + uidB = {uid for _, uid in zip(range(self._NUM_SAMPLES), UUID())} + self.assertLess(len(uidA & uidB), self._COLLISIONS_THRESHOLD) + # uuids are reasonably unique despite identical seeds. + uidA = {uid for _, uid in zip(range(self._NUM_SAMPLES), UUID(seed=123))} + uidB = {uid for _, uid in zip(range(self._NUM_SAMPLES), UUID(seed=123))} + self.assertLess(len(uidA & uidB), self._COLLISIONS_THRESHOLD) + + +class TestUCID(unittest.TestCase): + def setUp(self): + self._checksum = 'a948904f2f0f479b8f8197694b30184b0d2ed1c1cd2a1ec0fb85d299a192a447' # sha256 + self._path = os.path.join(os.path.dirname(__file__), 'testfile.t') + + def test_from_path(self): + self.assertEqual(UCID.from_path(self._path), self._checksum) + + +## main ## + +if __name__ == '__main__': + unittest.main() + +## EOF ## diff --git a/test/utils/testfile.t b/test/utils/testfile.t new file mode 100644 index 0000000..3b18e51 --- /dev/null +++ b/test/utils/testfile.t @@ -0,0 +1 @@ +hello world |