From d2b4a528465dc01e8db92b61293c458c7911a333 Mon Sep 17 00:00:00 2001 From: Matthias Baumgartner Date: Mon, 31 Oct 2022 12:21:22 +0100 Subject: essential interfaces (reader, extractor, errors) --- bsie/__init__.py | 13 +++++++++++++ bsie/base/__init__.py | 24 ++++++++++++++++++++++++ bsie/base/errors.py | 22 ++++++++++++++++++++++ bsie/base/extractor.py | 50 ++++++++++++++++++++++++++++++++++++++++++++++++++ bsie/base/reader.py | 48 ++++++++++++++++++++++++++++++++++++++++++++++++ bsie/utils/__init__.py | 20 ++++++++++++++++++++ bsie/utils/bsfs.py | 20 ++++++++++++++++++++ bsie/utils/node.py | 39 +++++++++++++++++++++++++++++++++++++++ 8 files changed, 236 insertions(+) create mode 100644 bsie/__init__.py create mode 100644 bsie/base/__init__.py create mode 100644 bsie/base/errors.py create mode 100644 bsie/base/extractor.py create mode 100644 bsie/base/reader.py create mode 100644 bsie/utils/__init__.py create mode 100644 bsie/utils/bsfs.py create mode 100644 bsie/utils/node.py diff --git a/bsie/__init__.py b/bsie/__init__.py new file mode 100644 index 0000000..2f2477a --- /dev/null +++ b/bsie/__init__.py @@ -0,0 +1,13 @@ +"""The BSIE module extracts triples from files for insertion into a BSFS storage. + +Part of the bsie module. +A copy of the license is provided with the project. +Author: Matthias Baumgartner, 2022 +""" +# imports +import typing + +# exports +__all__: typing.Sequence[str] = [] + +## EOF ## diff --git a/bsie/base/__init__.py b/bsie/base/__init__.py new file mode 100644 index 0000000..0154862 --- /dev/null +++ b/bsie/base/__init__.py @@ -0,0 +1,24 @@ +"""The base module defines the BSIE interfaces. + +You'll mostly find abstract classes here. + +Part of the bsie module. +A copy of the license is provided with the project. +Author: Matthias Baumgartner, 2022 +""" +# imports +import typing + +# inner-module imports +from . import errors +from . import extractor +from . import reader + +# exports +__all__: typing.Sequence[str] = ( + 'errors', + 'extractor', + 'reader', + ) + +## EOF ## diff --git a/bsie/base/errors.py b/bsie/base/errors.py new file mode 100644 index 0000000..f86ffb2 --- /dev/null +++ b/bsie/base/errors.py @@ -0,0 +1,22 @@ +"""Common BSIE exceptions. + +Part of the bsie module. +A copy of the license is provided with the project. +Author: Matthias Baumgartner, 2022 +""" +# imports +import typing + +# exports +__all__: typing.Sequence[str] = [] + + +## code ## + +class _BSIE_Error(Exception): + """Generic BSIE error.""" + +class ReaderError(_BSIE_Error): + """The Reader failed to read the given file.""" + +## EOF ## diff --git a/bsie/base/extractor.py b/bsie/base/extractor.py new file mode 100644 index 0000000..d5b0922 --- /dev/null +++ b/bsie/base/extractor.py @@ -0,0 +1,50 @@ +"""The Extractor classes transform content into triples. + +Part of the bsie module. +A copy of the license is provided with the project. +Author: Matthias Baumgartner, 2022 +""" +# imports +import abc +import collections +import typing + +# inner-module imports +from . import reader +from bsie.utils import node +from bsie.utils.bsfs import URI, typename + +# exports +__all__: typing.Sequence[str] = ( + 'Extractor', + ) + + +## code ## + +class Extractor(abc.ABC, collections.abc.Iterable, collections.abc.Callable): + """Produce (node, predicate, value)-triples from some content.""" + + # what type of content is expected (i.e. reader subclass). + CONTENT_READER: typing.Optional[typing.Type[reader.Reader]] = None + + def __str__(self) -> str: + return typename(self) + + def __repr__(self) -> str: + return f'{typename(self)}()' + + @abc.abstractmethod + def schema(self) -> str: + """Return the schema (predicates and nodes) produced by this Extractor.""" + + @abc.abstractmethod + def extract( + self, + subject: node.Node, + content: typing.Any, + predicates: typing.Iterable[URI], + ) -> typing.Iterator[typing.Tuple[node.Node, URI, typing.Any]]: + """Return (node, predicate, value) triples.""" + +## EOF ## diff --git a/bsie/base/reader.py b/bsie/base/reader.py new file mode 100644 index 0000000..f29e451 --- /dev/null +++ b/bsie/base/reader.py @@ -0,0 +1,48 @@ +"""The Reader classes return high-level content structures from files. + +The Reader fulfills two purposes: + First, it brokers between multiple libraries and file formats. + Second, it separates multiple aspects of a file into distinct content types. + +Part of the bsie module. +A copy of the license is provided with the project. +Author: Matthias Baumgartner, 2022 +""" +# imports +import abc +import typing + +# inner-module imports +from bsie.utils.bsfs import URI, typename + +# exports +__all__: typing.Sequence[str] = ( + 'Aggregator', + 'Reader', + ) + + +## code ## + +class Reader(abc.ABC): + """Read and return some content from a file.""" + + # In what data structure content is returned + CONTENT_TYPE = typing.Union[typing.Any] + # NOTE: Child classes must also assign a typing.Union even if there's + # only one options + + def __str__(self) -> str: + return typename(self) + + def __repr__(self) -> str: + return f'{typename(self)}()' + + # FIXME: How about using contexts instead of calls? + @abc.abstractmethod + def __call__(self, path: URI) -> CONTENT_TYPE: + """Return some content of the file at *path*. + Raises a `ReaderError` if the reader cannot make sense of the file format. + """ + +## EOF ## diff --git a/bsie/utils/__init__.py b/bsie/utils/__init__.py new file mode 100644 index 0000000..1137187 --- /dev/null +++ b/bsie/utils/__init__.py @@ -0,0 +1,20 @@ +"""Common tools and definitions. + +Part of the bsie module. +A copy of the license is provided with the project. +Author: Matthias Baumgartner, 2022 +""" +# imports +import typing + +# inner-module imports +from . import bsfs +from . import node + +# exports +__all__: typing.Sequence[str] = ( + 'bsfs', + 'node', + ) + +## EOF ## diff --git a/bsie/utils/bsfs.py b/bsie/utils/bsfs.py new file mode 100644 index 0000000..33eb178 --- /dev/null +++ b/bsie/utils/bsfs.py @@ -0,0 +1,20 @@ +"""BSFS bridge, provides BSFS bindings for BSIE. + +Part of the bsie module. +A copy of the license is provided with the project. +Author: Matthias Baumgartner, 2022 +""" +# imports +import typing + +# bsfs imports +from bsfs.utils import URI +from bsfs.utils import typename + +# exports +__all__: typing.Sequence[str] = ( + 'URI', + 'typename', + ) + +## EOF ## diff --git a/bsie/utils/node.py b/bsie/utils/node.py new file mode 100644 index 0000000..60863a4 --- /dev/null +++ b/bsie/utils/node.py @@ -0,0 +1,39 @@ +"""Lighweight Node to bridge to BSFS. + +Part of the bsie module. +A copy of the license is provided with the project. +Author: Matthias Baumgartner, 2022 +""" +# imports +import typing + +# inner-module imports +from bsie.utils.bsfs import URI + +# exports +__all__: typing.Sequence[str] = ( + 'Node' + ) + + +## code ## + +class Node(): + """Lightweight Node, disconnected from any bsfs structures.""" + + # node type. + node_type: URI + + # node URI. + uri: URI + + def __init__( + self, + node_type: URI, + uri: URI, + ): + # assign members + self.node_type = URI(node_type) + self.uri = URI(uri) + +## EOF ## -- cgit v1.2.3 From 068b3651c16916877eb8d5fdfec52485a507e204 Mon Sep 17 00:00:00 2001 From: Matthias Baumgartner Date: Mon, 31 Oct 2022 13:05:31 +0100 Subject: path and stat readers --- bsie/reader/__init__.py | 19 +++++++++++++++++++ bsie/reader/path.py | 31 +++++++++++++++++++++++++++++++ bsie/reader/stat.py | 34 ++++++++++++++++++++++++++++++++++ test/reader/test_path.py | 28 ++++++++++++++++++++++++++++ test/reader/test_stat.py | 34 ++++++++++++++++++++++++++++++++++ 5 files changed, 146 insertions(+) create mode 100644 bsie/reader/__init__.py create mode 100644 bsie/reader/path.py create mode 100644 bsie/reader/stat.py create mode 100644 test/reader/test_path.py create mode 100644 test/reader/test_stat.py diff --git a/bsie/reader/__init__.py b/bsie/reader/__init__.py new file mode 100644 index 0000000..a45f22b --- /dev/null +++ b/bsie/reader/__init__.py @@ -0,0 +1,19 @@ +"""The Reader classes return high-level content structures from files. + +The Reader fulfills two purposes: + First, it brokers between multiple libraries and file formats. + Second, it separates multiple aspects of a file into distinct content types. + +Often, different libraries focus on reading different types of content from a +file. E.g. one would use different modules to read file system infos than to +read exif or pixel data of an image. Hence, this module is organized by content +type. Each distinct type can be implemented in a file or submodule that +provides a Reader implementation. Through utilization of submodules, different +file formats can be supported. + +Part of the bsie module. +A copy of the license is provided with the project. +Author: Matthias Baumgartner, 2022 +""" + +## EOF ## diff --git a/bsie/reader/path.py b/bsie/reader/path.py new file mode 100644 index 0000000..d27c664 --- /dev/null +++ b/bsie/reader/path.py @@ -0,0 +1,31 @@ +"""The Path reader produces a file path. + +Part of the bsie module. +A copy of the license is provided with the project. +Author: Matthias Baumgartner, 2022 +""" +# imports +import os +import typing + +# inner-module imports +from bsie.base import reader + +# exports +__all__: typing.Sequence[str] = ( + 'Path', + ) + + +## code ## + +class Path(reader.Reader): + """Return the path.""" + + CONTENT_TYPE = typing.Union[str] + + def __call__(self, path: str) -> CONTENT_TYPE: + return path + + +## EOF ## diff --git a/bsie/reader/stat.py b/bsie/reader/stat.py new file mode 100644 index 0000000..f0b83fb --- /dev/null +++ b/bsie/reader/stat.py @@ -0,0 +1,34 @@ +"""The Stat reader produces filesystem stat information. + +Part of the bsie module. +A copy of the license is provided with the project. +Author: Matthias Baumgartner, 2022 +""" +# imports +import os +import typing + +# inner-module imports +from bsie.base import reader, errors + +# exports +__all__: typing.Sequence[str] = ( + 'Stat', + ) + + +## code ## + +class Stat(reader.Reader): + """Read and return the filesystem's stat infos.""" + + CONTENT_TYPE = typing.Union[os.stat_result] + + def __call__(self, path: str) -> CONTENT_TYPE: + try: + return os.stat(path) + except Exception: + raise errors.ReaderError(path) + + +## EOF ## diff --git a/test/reader/test_path.py b/test/reader/test_path.py new file mode 100644 index 0000000..fd7bc5a --- /dev/null +++ b/test/reader/test_path.py @@ -0,0 +1,28 @@ +""" + +Part of the bsie test suite. +A copy of the license is provided with the project. +Author: Matthias Baumgartner, 2022 +""" +# imports +import unittest + +# objects to test +from bsie.reader.path import Path + + +## code ## + +class TestPath(unittest.TestCase): + def test_call(self): + self.assertEqual('', Path()('')) + self.assertEqual('/tmp/foo/bar', Path()('/tmp/foo/bar')) + self.assertEqual('/home/myself/some file', Path()('/home/myself/some file')) + + +## main ## + +if __name__ == '__main__': + unittest.main() + +## EOF ## diff --git a/test/reader/test_stat.py b/test/reader/test_stat.py new file mode 100644 index 0000000..d12ad9c --- /dev/null +++ b/test/reader/test_stat.py @@ -0,0 +1,34 @@ +""" + +Part of the bsie test suite. +A copy of the license is provided with the project. +Author: Matthias Baumgartner, 2022 +""" +# imports +import os +import unittest + +# bsie imports +from bsie.base import errors + +# objects to test +from bsie.reader.stat import Stat + + +## code ## + +class TestPath(unittest.TestCase): + def test_call(self): + # test self + self.assertEqual(os.stat(__file__), Stat()(__file__)) + # test invalid file + self.assertRaises(errors.ReaderError, Stat(), '') + self.assertRaises(errors.ReaderError, Stat(), None) + + +## main ## + +if __name__ == '__main__': + unittest.main() + +## EOF ## -- cgit v1.2.3 From 2da348c638ac5058d5acf09ab5df323ee04503d5 Mon Sep 17 00:00:00 2001 From: Matthias Baumgartner Date: Mon, 31 Oct 2022 14:14:42 +0100 Subject: constant, filesize, and filename extractors --- bsie/base/extractor.py | 3 +- bsie/extractor/__init__.py | 15 +++++++ bsie/extractor/generic/__init__.py | 16 ++++++++ bsie/extractor/generic/constant.py | 52 ++++++++++++++++++++++++ bsie/extractor/generic/path.py | 70 ++++++++++++++++++++++++++++++++ bsie/extractor/generic/stat.py | 71 +++++++++++++++++++++++++++++++++ bsie/utils/__init__.py | 2 + bsie/utils/bsfs.py | 5 ++- bsie/utils/namespaces.py | 25 ++++++++++++ test/__init__.py | 0 test/extractor/__init__.py | 0 test/extractor/generic/__init__.py | 0 test/extractor/generic/test_constant.py | 63 +++++++++++++++++++++++++++++ test/extractor/generic/test_path.py | 45 +++++++++++++++++++++ test/extractor/generic/test_stat.py | 43 ++++++++++++++++++++ test/reader/__init__.py | 0 16 files changed, 406 insertions(+), 4 deletions(-) create mode 100644 bsie/extractor/__init__.py create mode 100644 bsie/extractor/generic/__init__.py create mode 100644 bsie/extractor/generic/constant.py create mode 100644 bsie/extractor/generic/path.py create mode 100644 bsie/extractor/generic/stat.py create mode 100644 bsie/utils/namespaces.py create mode 100644 test/__init__.py create mode 100644 test/extractor/__init__.py create mode 100644 test/extractor/generic/__init__.py create mode 100644 test/extractor/generic/test_constant.py create mode 100644 test/extractor/generic/test_path.py create mode 100644 test/extractor/generic/test_stat.py create mode 100644 test/reader/__init__.py diff --git a/bsie/base/extractor.py b/bsie/base/extractor.py index d5b0922..ea43925 100644 --- a/bsie/base/extractor.py +++ b/bsie/base/extractor.py @@ -6,7 +6,6 @@ Author: Matthias Baumgartner, 2022 """ # imports import abc -import collections import typing # inner-module imports @@ -22,7 +21,7 @@ __all__: typing.Sequence[str] = ( ## code ## -class Extractor(abc.ABC, collections.abc.Iterable, collections.abc.Callable): +class Extractor(abc.ABC): """Produce (node, predicate, value)-triples from some content.""" # what type of content is expected (i.e. reader subclass). diff --git a/bsie/extractor/__init__.py b/bsie/extractor/__init__.py new file mode 100644 index 0000000..ef31343 --- /dev/null +++ b/bsie/extractor/__init__.py @@ -0,0 +1,15 @@ +"""Extractors produce triples from some content. + +Each Extractor class is linked to the Reader class whose content it requires. + +Part of the bsie module. +A copy of the license is provided with the project. +Author: Matthias Baumgartner, 2022 +""" +# imports +import typing + +# exports +__all__: typing.Sequence[str] = [] + +## EOF ## diff --git a/bsie/extractor/generic/__init__.py b/bsie/extractor/generic/__init__.py new file mode 100644 index 0000000..0cb7e7f --- /dev/null +++ b/bsie/extractor/generic/__init__.py @@ -0,0 +1,16 @@ +"""Generic extractors focus on information that is typically available on all +files. Examples include file system information (file name and size, mime type, +etc.) and information that is independent of the actual file (constant triples, +host platform infos, current time, etc.). + +Part of the bsie module. +A copy of the license is provided with the project. +Author: Matthias Baumgartner, 2022 +""" +# imports +import typing + +# exports +__all__: typing.Sequence[str] = [] + +## EOF ## diff --git a/bsie/extractor/generic/constant.py b/bsie/extractor/generic/constant.py new file mode 100644 index 0000000..e243131 --- /dev/null +++ b/bsie/extractor/generic/constant.py @@ -0,0 +1,52 @@ +"""The Constant extractor produces pre-specified triples. + +Part of the bsie module. +A copy of the license is provided with the project. +Author: Matthias Baumgartner, 2022 +""" +# imports +import typing + +# inner-module imports +from bsie.base import extractor +from bsie.utils.bsfs import URI +from bsie.utils.node import Node + +# exports +__all__: typing.Sequence[str] = ( + 'Constant', + ) + + +## code ## + +class Constant(extractor.Extractor): + """Extract information from file's path.""" + + CONTENT_READER = None + + def __init__( + self, + schema: str, + tuples: typing.Iterable[typing.Tuple[URI, typing.Any]], + ): + self._schema = schema + self._tuples = tuples + # FIXME: use schema instance for predicate checking + #self._tuples = [(pred, value) for pred, value in tuples if pred in schema] + # FIXME: use schema instance for value checking + + def schema(self) -> str: + return self._schema + + def extract( + self, + subject: Node, + content: None, + predicates: typing.Iterable[URI], + ) -> typing.Iterator[typing.Tuple[Node, URI, typing.Any]]: + for pred, value in self._tuples: + if pred in predicates: + yield subject, pred, value + +## EOF ## diff --git a/bsie/extractor/generic/path.py b/bsie/extractor/generic/path.py new file mode 100644 index 0000000..c39bbd2 --- /dev/null +++ b/bsie/extractor/generic/path.py @@ -0,0 +1,70 @@ +""" + +Part of the bsie module. +A copy of the license is provided with the project. +Author: Matthias Baumgartner, 2022 +""" +# imports +import os +import typing + +# inner-module imports +from bsie.base import extractor +from bsie.utils import node, ns +from bsie.utils.bsfs import URI +import bsie.reader.path + +# exports +__all__: typing.Sequence[str] = ( + 'Path', + ) + + +## code ## + +class Path(extractor.Extractor): + """Extract information from file's path.""" + + CONTENT_READER = bsie.reader.path.Path + + def __init__(self): + self.__callmap = { + ns.bse.filename: self.__filename, + } + + def schema(self) -> str: + return ''' + bse:filename a bsfs:Predicate ; + rdfs:domain bsfs:Entity ; + rdfs:range xsd:string ; + rdf:label "File name"^^xsd:string ; + schema:description "Filename of entity in some filesystem."^^xsd:string ; + owl:maxCardinality "INF"^^xsd:number . + ''' + + def extract( + self, + subject: node.Node, + content: CONTENT_READER.CONTENT_TYPE, + predicates: typing.Iterable[URI], + ) -> typing.Iterator[typing.Tuple[node.Node, URI, typing.Any]]: + for pred in predicates: + # find callback + clbk = self.__callmap.get(pred) + if clbk is None: + continue + # get value + value = clbk(content) + if value is None: + continue + # produce triple + yield subject, pred, value + + def __filename(self, path: str) -> str: + try: + return os.path.basename(path) + except Exception: + # FIXME: some kind of error reporting (e.g. logging) + return None + +## EOF ## diff --git a/bsie/extractor/generic/stat.py b/bsie/extractor/generic/stat.py new file mode 100644 index 0000000..d74369c --- /dev/null +++ b/bsie/extractor/generic/stat.py @@ -0,0 +1,71 @@ +"""Extract information from the file system, such as filesize. + +Part of the bsie module. +A copy of the license is provided with the project. +Author: Matthias Baumgartner, 2022 +""" +# imports +import typing + +# inner-module imports +from bsie.base import extractor +from bsie.utils import node, ns +from bsie.utils.bsfs import URI +import bsie.reader.stat + + +# exports +__all__: typing.Sequence[str] = ( + 'Stat', + ) + + +## code ## + +class Stat(extractor.Extractor): + """Extract information from the file system.""" + + CONTENT_READER = bsie.reader.stat.Stat + + def __init__(self): + self.__callmap = { + ns.bse.filesize: self.__filesize, + } + + def schema(self) -> str: + return ''' + bse:filesize a bsfs:Predicate ; + rdfs:domain bsfs:Entity ; + rdfs:range xsd:integer ; + rdf:label "File size"^^xsd:string ; + schema:description "File size of entity in some filesystem."^^xsd:string ; + owl:maxCardinality "INF"^^xsd:number . + ''' + + def extract( + self, + subject: node.Node, + content: CONTENT_READER.CONTENT_TYPE, + predicates: typing.Iterable[URI], + ) -> typing.Iterator[typing.Tuple[node.Node, URI, typing.Any]]: + for pred in predicates: + # find callback + clbk = self.__callmap.get(pred) + if clbk is None: + continue + # get value + value = clbk(content) + if value is None: + continue + # produce triple + yield subject, pred, value + + def __filesize(self, content: CONTENT_READER.CONTENT_TYPE) -> int: + """Return the file size.""" + try: + return content.st_size + except Exception: + # FIXME: some kind of error reporting (e.g. logging) + return None + +## EOF ## diff --git a/bsie/utils/__init__.py b/bsie/utils/__init__.py index 1137187..bd22236 100644 --- a/bsie/utils/__init__.py +++ b/bsie/utils/__init__.py @@ -9,12 +9,14 @@ import typing # inner-module imports from . import bsfs +from . import namespaces as ns from . import node # exports __all__: typing.Sequence[str] = ( 'bsfs', 'node', + 'ns', ) ## EOF ## diff --git a/bsie/utils/bsfs.py b/bsie/utils/bsfs.py index 33eb178..1ae657c 100644 --- a/bsie/utils/bsfs.py +++ b/bsie/utils/bsfs.py @@ -8,11 +8,12 @@ Author: Matthias Baumgartner, 2022 import typing # bsfs imports -from bsfs.utils import URI -from bsfs.utils import typename +from bsfs.namespace import Namespace +from bsfs.utils import URI, typename # exports __all__: typing.Sequence[str] = ( + 'Namespace', 'URI', 'typename', ) diff --git a/bsie/utils/namespaces.py b/bsie/utils/namespaces.py new file mode 100644 index 0000000..67ccc71 --- /dev/null +++ b/bsie/utils/namespaces.py @@ -0,0 +1,25 @@ +"""Default namespaces used throughout BSIE. + +Part of the bsie module. +A copy of the license is provided with the project. +Author: Matthias Baumgartner, 2022 +""" +# imports +import typing + +# bsie imports +from . import bsfs as _bsfs + +# constants +bse = _bsfs.Namespace('http://bsfs.ai/schema/Entity#') +bsfs = _bsfs.Namespace('http://bsfs.ai/schema/') +bsm = _bsfs.Namespace('http://bsfs.ai/schema/meta#') + +# export +__all__: typing.Sequence[str] = ( + 'bse', + 'bsfs', + 'bsm', + ) + +## EOF ## diff --git a/test/__init__.py b/test/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/test/extractor/__init__.py b/test/extractor/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/test/extractor/generic/__init__.py b/test/extractor/generic/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/test/extractor/generic/test_constant.py b/test/extractor/generic/test_constant.py new file mode 100644 index 0000000..f3ab0a3 --- /dev/null +++ b/test/extractor/generic/test_constant.py @@ -0,0 +1,63 @@ +""" + +Part of the bsie test suite. +A copy of the license is provided with the project. +Author: Matthias Baumgartner, 2022 +""" +# imports +import unittest + +# bsie imports +from bsie.utils import ns +from bsie.utils.node import Node + +# objects to test +from bsie.extractor.generic.constant import Constant + + +## code ## + +class TestConstant(unittest.TestCase): + def test_extract(self): + schema = ''' + bse:author a bsfs:Predicate ; + rdfs:domain bsfs:Entity ; + rdfs:range xsd:string ; + owl:maxCardinality "1"^^xsd:number . + + bse:comment a bsfs:Predicate ; + rdfs:domain bsfs:Entity ; + rdfs:range xsd:string ; + owl:maxCardinality "INF"^^xsd:number . + + ''' + tuples = [ + (ns.bse.author, 'Me, myself, and I'), + (ns.bse.comment, 'the quick brown fox jumps over the lazy dog.'), + ] + node = Node(ns.bsfs.Entity, '') # Blank node + predicates = (ns.bse.author, ns.bse.comment) + ext = Constant(schema, tuples) + # baseline + self.assertSetEqual(set(ext.extract(node, None, predicates)), + {(node, pred, value) for pred, value in tuples}) + # predicates is respected + self.assertSetEqual(set(ext.extract(node, None, (ns.bse.author, ns.bse.foobar))), + {(node, ns.bse.author, 'Me, myself, and I')}) + self.assertSetEqual(set(ext.extract(node, None, (ns.bse.comment, ns.bse.foobar))), + {(node, ns.bse.comment, 'the quick brown fox jumps over the lazy dog.')}) + self.assertSetEqual(set(ext.extract(node, None, (ns.bse.foobar, ns.bse.barfoo))), set()) + + # FIXME: should change! + # for now: no schema compliance + ext = Constant('', tuples) + self.assertSetEqual(set(ext.extract(node, None, predicates)), + {(node, pred, value) for pred, value in tuples}) + + +## main ## + +if __name__ == '__main__': + unittest.main() + +## EOF ## diff --git a/test/extractor/generic/test_path.py b/test/extractor/generic/test_path.py new file mode 100644 index 0000000..8623490 --- /dev/null +++ b/test/extractor/generic/test_path.py @@ -0,0 +1,45 @@ +""" + +Part of the bsie test suite. +A copy of the license is provided with the project. +Author: Matthias Baumgartner, 2022 +""" +# imports +import unittest + +# bsie imports +from bsie.utils import ns +from bsie.utils.node import Node + +# objects to test +from bsie.extractor.generic.path import Path + + +## code ## + +class TestPath(unittest.TestCase): + def test_extract(self): + node = Node(ns.bsfs.Entity, '') # Blank node + ext = Path() + + # baseline + self.assertSetEqual(set(ext.extract(node, '/tmp/foo/bar', (ns.bse.filename, ))), + {(node, ns.bse.filename, 'bar')}) + # predicates parameter is respected + self.assertSetEqual(set(ext.extract(node, '/tmp/foo/bar', (ns.bse.filename, ns.bse.foo))), + {(node, ns.bse.filename, 'bar')}) + self.assertSetEqual(set(ext.extract(node, '/tmp/foo/bar', (ns.bse.foo, ))), set()) + # path variations + self.assertSetEqual(set(ext.extract(node, 'bar', (ns.bse.filename, ))), + {(node, ns.bse.filename, 'bar')}) + self.assertSetEqual(set(ext.extract(node, '', (ns.bse.filename, ))), + {(node, ns.bse.filename, '')}) + self.assertSetEqual(set(ext.extract(node, None, (ns.bse.filename, ))), set()) + + +## main ## + +if __name__ == '__main__': + unittest.main() + +## EOF ## diff --git a/test/extractor/generic/test_stat.py b/test/extractor/generic/test_stat.py new file mode 100644 index 0000000..f89b053 --- /dev/null +++ b/test/extractor/generic/test_stat.py @@ -0,0 +1,43 @@ +""" + +Part of the bsie test suite. +A copy of the license is provided with the project. +Author: Matthias Baumgartner, 2022 +""" +# imports +import os +import unittest + +# bsie imports +from bsie.utils import ns +from bsie.utils.node import Node + +# objects to test +from bsie.extractor.generic.stat import Stat + + +## code ## + +class TestConstant(unittest.TestCase): + def test_extract(self): + node = Node(ns.bsfs.Entity, '') # Blank node + content = os.stat(__file__) + ext = Stat() + + # baseline + self.assertSetEqual(set(ext.extract(node, content, (ns.bse.filesize, ))), + {(node, ns.bse.filesize, content.st_size)}) + # predicates parameter is respected + self.assertSetEqual(set(ext.extract(node, content, (ns.bse.filesize, ns.bse.foo))), + {(node, ns.bse.filesize, content.st_size)}) + self.assertSetEqual(set(ext.extract(node, content, (ns.bse.foo, ))), set()) + # content variations + self.assertSetEqual(set(ext.extract(node, None, (ns.bse.filesize, ))), set()) + + +## main ## + +if __name__ == '__main__': + unittest.main() + +## EOF ## diff --git a/test/reader/__init__.py b/test/reader/__init__.py new file mode 100644 index 0000000..e69de29 -- cgit v1.2.3 From e174a25585e64eb1b0759440cad48d642dd31829 Mon Sep 17 00:00:00 2001 From: Matthias Baumgartner Date: Fri, 25 Nov 2022 14:31:29 +0100 Subject: use schema and predicate types in extractors --- bsie/base/errors.py | 13 +++++-- bsie/base/extractor.py | 51 ++++++++++++++++++++++---- bsie/extractor/generic/constant.py | 20 +++++------ bsie/extractor/generic/path.py | 40 +++++++++++---------- bsie/extractor/generic/stat.py | 34 +++++++++--------- bsie/utils/bsfs.py | 2 ++ bsie/utils/namespaces.py | 3 +- bsie/utils/node.py | 2 +- test/extractor/generic/test_constant.py | 63 +++++++++++++++++++++++---------- test/extractor/generic/test_path.py | 53 +++++++++++++++++++++------ test/extractor/generic/test_stat.py | 48 ++++++++++++++++++++----- 11 files changed, 235 insertions(+), 94 deletions(-) diff --git a/bsie/base/errors.py b/bsie/base/errors.py index f86ffb2..eedce3b 100644 --- a/bsie/base/errors.py +++ b/bsie/base/errors.py @@ -8,15 +8,22 @@ Author: Matthias Baumgartner, 2022 import typing # exports -__all__: typing.Sequence[str] = [] +__all__: typing.Sequence[str] = ( + 'ExtractorError', + ) + + ## code ## -class _BSIE_Error(Exception): +class _BSIEError(Exception): """Generic BSIE error.""" -class ReaderError(_BSIE_Error): +class ExtractorError(_BSIEError): + """The Extractor failed to process the given content.""" + +class ReaderError(_BSIEError): """The Reader failed to read the given file.""" ## EOF ## diff --git a/bsie/base/extractor.py b/bsie/base/extractor.py index ea43925..a6a69c6 100644 --- a/bsie/base/extractor.py +++ b/bsie/base/extractor.py @@ -11,13 +11,38 @@ import typing # inner-module imports from . import reader from bsie.utils import node -from bsie.utils.bsfs import URI, typename +from bsie.utils.bsfs import schema as _schema, typename # exports __all__: typing.Sequence[str] = ( 'Extractor', ) +# constants + +# essential definitions typically used in extractor schemas. +# NOTE: The definition here is only for convenience; Each Extractor must implement its use, if so desired. +SCHEMA_PREAMBLE = ''' + # common external prefixes + prefix owl: + prefix rdf: + prefix rdfs: + prefix xsd: + prefix schema: + + # common bsfs prefixes + prefix bsfs: + prefix bse: + + # essential nodes + bsfs:Entity rdfs:subClassOf bsfs:Node . + + # common definitions + xsd:string rdfs:subClassOf bsfs:Literal . + xsd:integer rdfs:subClassOf bsfs:Literal . + + ''' + ## code ## @@ -27,23 +52,37 @@ class Extractor(abc.ABC): # what type of content is expected (i.e. reader subclass). CONTENT_READER: typing.Optional[typing.Type[reader.Reader]] = None + # extractor schema. + schema: _schema.Schema + + def __init__(self, schema: _schema.Schema): + self.schema = schema + def __str__(self) -> str: return typename(self) def __repr__(self) -> str: return f'{typename(self)}()' - @abc.abstractmethod - def schema(self) -> str: - """Return the schema (predicates and nodes) produced by this Extractor.""" + + def predicates(self) -> typing.Iterator[_schema.Predicate]: + """Return the predicates that may be part of extracted triples.""" + # NOTE: Some predicates in the schema might not occur in actual triples, + # but are defined due to predicate class hierarchy. E.g., bsfs:Predicate + # is part of every schema but should not be used in triples. + # Announcing all predicates might not be the most efficient way, however, + # it is the most safe one. Concrete extractors that produce additional + # predicates (e.g. auxiliary nodes with their own predicates) should + # overwrite this method to only include the principal predicates. + return self.schema.predicates() @abc.abstractmethod def extract( self, subject: node.Node, content: typing.Any, - predicates: typing.Iterable[URI], - ) -> typing.Iterator[typing.Tuple[node.Node, URI, typing.Any]]: + predicates: typing.Iterable[_schema.Predicate], + ) -> typing.Iterator[typing.Tuple[node.Node, _schema.Predicate, typing.Any]]: """Return (node, predicate, value) triples.""" ## EOF ## diff --git a/bsie/extractor/generic/constant.py b/bsie/extractor/generic/constant.py index e243131..795bac6 100644 --- a/bsie/extractor/generic/constant.py +++ b/bsie/extractor/generic/constant.py @@ -7,9 +7,9 @@ Author: Matthias Baumgartner, 2022 # imports import typing -# inner-module imports +# bsie imports from bsie.base import extractor -from bsie.utils.bsfs import URI +from bsie.utils.bsfs import URI, schema as _schema from bsie.utils.node import Node # exports @@ -25,26 +25,26 @@ class Constant(extractor.Extractor): CONTENT_READER = None + # predicate/value pairs to be produced. + _tuples: typing.Tuple[typing.Tuple[_schema.Predicate, typing.Any], ...] + def __init__( self, schema: str, tuples: typing.Iterable[typing.Tuple[URI, typing.Any]], ): - self._schema = schema - self._tuples = tuples - # FIXME: use schema instance for predicate checking - #self._tuples = [(pred, value) for pred, value in tuples if pred in schema] + super().__init__(_schema.Schema.from_string(extractor.SCHEMA_PREAMBLE + schema)) + # NOTE: Raises a KeyError if the predicate is not part of the schema + self._tuples = tuple((self.schema.predicate(p_uri), value) for p_uri, value in tuples) # FIXME: use schema instance for value checking - def schema(self) -> str: - return self._schema def extract( self, subject: Node, content: None, - predicates: typing.Iterable[URI], - ) -> typing.Iterator[typing.Tuple[Node, URI, typing.Any]]: + predicates: typing.Iterable[_schema.Predicate], + ) -> typing.Iterator[typing.Tuple[Node, _schema.Predicate, typing.Any]]: for pred, value in self._tuples: if pred in predicates: yield subject, pred, value diff --git a/bsie/extractor/generic/path.py b/bsie/extractor/generic/path.py index c39bbd2..f358a79 100644 --- a/bsie/extractor/generic/path.py +++ b/bsie/extractor/generic/path.py @@ -8,11 +8,10 @@ Author: Matthias Baumgartner, 2022 import os import typing -# inner-module imports +# bsie imports from bsie.base import extractor from bsie.utils import node, ns -from bsie.utils.bsfs import URI -import bsie.reader.path +from bsie.utils.bsfs import schema # exports __all__: typing.Sequence[str] = ( @@ -27,30 +26,31 @@ class Path(extractor.Extractor): CONTENT_READER = bsie.reader.path.Path - def __init__(self): - self.__callmap = { - ns.bse.filename: self.__filename, - } + # mapping from predicate to handler function. + _callmap: typing.Dict[schema.Predicate, typing.Callable[[str], typing.Any]] - def schema(self) -> str: - return ''' - bse:filename a bsfs:Predicate ; + def __init__(self): + super().__init__(schema.Schema.from_string(extractor.SCHEMA_PREAMBLE + ''' + bse:filename rdfs:subClassOf bsfs:Predicate ; rdfs:domain bsfs:Entity ; rdfs:range xsd:string ; - rdf:label "File name"^^xsd:string ; + rdfs:label "File name"^^xsd:string ; schema:description "Filename of entity in some filesystem."^^xsd:string ; owl:maxCardinality "INF"^^xsd:number . - ''' + ''')) + self._callmap = { + self.schema.predicate(ns.bse.filename): self.__filename, + } def extract( self, subject: node.Node, content: CONTENT_READER.CONTENT_TYPE, - predicates: typing.Iterable[URI], - ) -> typing.Iterator[typing.Tuple[node.Node, URI, typing.Any]]: + predicates: typing.Iterable[schema.Predicate], + ) -> typing.Iterator[typing.Tuple[node.Node, schema.Predicate, typing.Any]]: for pred in predicates: # find callback - clbk = self.__callmap.get(pred) + clbk = self._callmap.get(pred) if clbk is None: continue # get value @@ -60,11 +60,15 @@ class Path(extractor.Extractor): # produce triple yield subject, pred, value - def __filename(self, path: str) -> str: + def __filename(self, path: str) -> typing.Optional[str]: try: return os.path.basename(path) - except Exception: - # FIXME: some kind of error reporting (e.g. logging) + except Exception: # some error, skip. + # FIXME: some kind of error reporting (e.g. logging)? + # Options: (a) Fail silently (current); (b) Skip and report to log; + # (c) Raise ExtractorError (aborts extraction); (d) separate content type + # checks from basename errors (report content type errors, skip basename + # errors) return None ## EOF ## diff --git a/bsie/extractor/generic/stat.py b/bsie/extractor/generic/stat.py index d74369c..e5387af 100644 --- a/bsie/extractor/generic/stat.py +++ b/bsie/extractor/generic/stat.py @@ -5,14 +5,13 @@ A copy of the license is provided with the project. Author: Matthias Baumgartner, 2022 """ # imports +import os import typing -# inner-module imports +# bsie imports from bsie.base import extractor from bsie.utils import node, ns -from bsie.utils.bsfs import URI -import bsie.reader.stat - +from bsie.utils.bsfs import schema as _schema # exports __all__: typing.Sequence[str] = ( @@ -27,30 +26,31 @@ class Stat(extractor.Extractor): CONTENT_READER = bsie.reader.stat.Stat - def __init__(self): - self.__callmap = { - ns.bse.filesize: self.__filesize, - } + # mapping from predicate to handler function. + _callmap: typing.Dict[_schema.Predicate, typing.Callable[[os.stat_result], typing.Any]] - def schema(self) -> str: - return ''' - bse:filesize a bsfs:Predicate ; + def __init__(self): + super().__init__(_schema.Schema.from_string(extractor.SCHEMA_PREAMBLE + ''' + bse:filesize rdfs:subClassOf bsfs:Predicate ; rdfs:domain bsfs:Entity ; rdfs:range xsd:integer ; - rdf:label "File size"^^xsd:string ; + rdfs:label "File size"^^xsd:string ; schema:description "File size of entity in some filesystem."^^xsd:string ; owl:maxCardinality "INF"^^xsd:number . - ''' + ''')) + self._callmap = { + self.schema.predicate(ns.bse.filesize): self.__filesize, + } def extract( self, subject: node.Node, content: CONTENT_READER.CONTENT_TYPE, - predicates: typing.Iterable[URI], - ) -> typing.Iterator[typing.Tuple[node.Node, URI, typing.Any]]: + predicates: typing.Iterable[_schema.Predicate], + ) -> typing.Iterator[typing.Tuple[node.Node, _schema.Predicate, typing.Any]]: for pred in predicates: # find callback - clbk = self.__callmap.get(pred) + clbk = self._callmap.get(pred) if clbk is None: continue # get value @@ -60,7 +60,7 @@ class Stat(extractor.Extractor): # produce triple yield subject, pred, value - def __filesize(self, content: CONTENT_READER.CONTENT_TYPE) -> int: + def __filesize(self, content: os.stat_result) -> typing.Optional[int]: """Return the file size.""" try: return content.st_size diff --git a/bsie/utils/bsfs.py b/bsie/utils/bsfs.py index 1ae657c..01ec5d1 100644 --- a/bsie/utils/bsfs.py +++ b/bsie/utils/bsfs.py @@ -8,6 +8,7 @@ Author: Matthias Baumgartner, 2022 import typing # bsfs imports +from bsfs import schema from bsfs.namespace import Namespace from bsfs.utils import URI, typename @@ -15,6 +16,7 @@ from bsfs.utils import URI, typename __all__: typing.Sequence[str] = ( 'Namespace', 'URI', + 'schema', 'typename', ) diff --git a/bsie/utils/namespaces.py b/bsie/utils/namespaces.py index 67ccc71..13be96b 100644 --- a/bsie/utils/namespaces.py +++ b/bsie/utils/namespaces.py @@ -7,13 +7,14 @@ Author: Matthias Baumgartner, 2022 # imports import typing -# bsie imports +# inner-module imports from . import bsfs as _bsfs # constants bse = _bsfs.Namespace('http://bsfs.ai/schema/Entity#') bsfs = _bsfs.Namespace('http://bsfs.ai/schema/') bsm = _bsfs.Namespace('http://bsfs.ai/schema/meta#') +xsd = _bsfs.Namespace('http://www.w3.org/2001/XMLSchema#') # export __all__: typing.Sequence[str] = ( diff --git a/bsie/utils/node.py b/bsie/utils/node.py index 60863a4..3a0f06b 100644 --- a/bsie/utils/node.py +++ b/bsie/utils/node.py @@ -12,7 +12,7 @@ from bsie.utils.bsfs import URI # exports __all__: typing.Sequence[str] = ( - 'Node' + 'Node', ) diff --git a/test/extractor/generic/test_constant.py b/test/extractor/generic/test_constant.py index f3ab0a3..7fdb8ac 100644 --- a/test/extractor/generic/test_constant.py +++ b/test/extractor/generic/test_constant.py @@ -20,39 +20,64 @@ from bsie.extractor.generic.constant import Constant class TestConstant(unittest.TestCase): def test_extract(self): schema = ''' - bse:author a bsfs:Predicate ; + bse:author rdfs:subClassOf bsfs:Predicate ; rdfs:domain bsfs:Entity ; rdfs:range xsd:string ; owl:maxCardinality "1"^^xsd:number . - - bse:comment a bsfs:Predicate ; + bse:comment rdfs:subClassOf bsfs:Predicate ; rdfs:domain bsfs:Entity ; rdfs:range xsd:string ; owl:maxCardinality "INF"^^xsd:number . - ''' tuples = [ (ns.bse.author, 'Me, myself, and I'), (ns.bse.comment, 'the quick brown fox jumps over the lazy dog.'), ] - node = Node(ns.bsfs.Entity, '') # Blank node - predicates = (ns.bse.author, ns.bse.comment) ext = Constant(schema, tuples) + node = Node(ns.bsfs.Entity, '') # Blank node + p_author = ext.schema.predicate(ns.bse.author) + p_comment = ext.schema.predicate(ns.bse.comment) + entity = ext.schema.node(ns.bsfs.Node).get_child(ns.bsfs.Entity) + string = ext.schema.literal(ns.bsfs.Literal).get_child(ns.xsd.string) # baseline - self.assertSetEqual(set(ext.extract(node, None, predicates)), - {(node, pred, value) for pred, value in tuples}) + self.assertSetEqual(set(ext.extract(node, None, (p_author, p_comment))), + {(node, p_author, 'Me, myself, and I'), + (node, p_comment, 'the quick brown fox jumps over the lazy dog.')}) # predicates is respected - self.assertSetEqual(set(ext.extract(node, None, (ns.bse.author, ns.bse.foobar))), - {(node, ns.bse.author, 'Me, myself, and I')}) - self.assertSetEqual(set(ext.extract(node, None, (ns.bse.comment, ns.bse.foobar))), - {(node, ns.bse.comment, 'the quick brown fox jumps over the lazy dog.')}) - self.assertSetEqual(set(ext.extract(node, None, (ns.bse.foobar, ns.bse.barfoo))), set()) - - # FIXME: should change! - # for now: no schema compliance - ext = Constant('', tuples) - self.assertSetEqual(set(ext.extract(node, None, predicates)), - {(node, pred, value) for pred, value in tuples}) + p_foobar = ext.schema.predicate(ns.bsfs.Predicate).get_child(ns.bse.foobar, domain=entity, range=entity) + self.assertSetEqual(set(ext.extract(node, None, (p_author, p_foobar))), + {(node, p_author, 'Me, myself, and I')}) + self.assertSetEqual(set(ext.extract(node, None, (p_comment, p_foobar))), + {(node, p_comment, 'the quick brown fox jumps over the lazy dog.')}) + p_barfoo = ext.schema.predicate(ns.bse.author).get_child(ns.bse.comment, domain=entity, range=string) + self.assertSetEqual(set(ext.extract(node, None, (p_foobar, p_barfoo))), set()) + + def test_construct(self): + # schema compliance + schema = ''' + bse:author rdfs:subClassOf bsfs:Predicate ; + rdfs:domain bsfs:Entity ; + rdfs:range xsd:string ; + owl:maxCardinality "1"^^xsd:number . + bse:comment rdfs:subClassOf bsfs:Predicate ; + rdfs:domain bsfs:Entity ; + rdfs:range xsd:string ; + owl:maxCardinality "INF"^^xsd:number . + ''' + # can create a schema + self.assertIsInstance(Constant(schema, [ + (ns.bse.author, 'Me, myself, and I'), + (ns.bse.comment, 'the quick brown fox jumps over the lazy dog.'), + ]), Constant) + # predicates are validated + self.assertRaises(KeyError, Constant, schema, [ + (ns.bse.author, 'Me, myself, and I'), + (ns.bse.foobar, 'foobar!')]) + # FIXME: values are validated + #class Foo(): pass # not string compatible + #self.assertRaises(ValueError, Constant, schema, [ + # (ns.bse.author, Foo())]) + ## main ## diff --git a/test/extractor/generic/test_path.py b/test/extractor/generic/test_path.py index 8623490..9376c7c 100644 --- a/test/extractor/generic/test_path.py +++ b/test/extractor/generic/test_path.py @@ -8,7 +8,9 @@ Author: Matthias Baumgartner, 2022 import unittest # bsie imports +from bsie import base from bsie.utils import ns +from bsie.utils.bsfs import schema from bsie.utils.node import Node # objects to test @@ -18,23 +20,52 @@ from bsie.extractor.generic.path import Path ## code ## class TestPath(unittest.TestCase): + def test_eq(self): + # distinct instances, same data + self.assertEqual(Path(), Path()) + # different classes + class Foo(): pass + self.assertNotEqual(Path(), Foo()) + self.assertNotEqual(Path(), 123) + self.assertNotEqual(Path(), None) + + def test_schema(self): + self.assertEqual(Path().schema, + schema.Schema.from_string(base.extractor.SCHEMA_PREAMBLE + ''' + bse:filename rdfs:subClassOf bsfs:Predicate ; + rdfs:domain bsfs:Entity ; + rdfs:range xsd:string ; + owl:maxCardinality "INF"^^xsd:number . + ''')) + def test_extract(self): - node = Node(ns.bsfs.Entity, '') # Blank node ext = Path() + node = Node(ns.bsfs.Entity, '') # Blank node + content = '/tmp/foo/bar' + p_filename = ext.schema.predicate(ns.bse.filename) + entity = ext.schema.node(ns.bsfs.Node).get_child(ns.bsfs.Entity) + string = ext.schema.literal(ns.bsfs.Literal).get_child(ns.xsd.string) # baseline - self.assertSetEqual(set(ext.extract(node, '/tmp/foo/bar', (ns.bse.filename, ))), - {(node, ns.bse.filename, 'bar')}) + self.assertSetEqual(set(ext.extract(node, content, (p_filename, ))), + {(node, p_filename, 'bar')}) # predicates parameter is respected - self.assertSetEqual(set(ext.extract(node, '/tmp/foo/bar', (ns.bse.filename, ns.bse.foo))), - {(node, ns.bse.filename, 'bar')}) - self.assertSetEqual(set(ext.extract(node, '/tmp/foo/bar', (ns.bse.foo, ))), set()) + p_foo = ext.schema.predicate(ns.bsfs.Predicate).get_child(ns.bse.foo, domain=entity, range=string) # unsupported predicate + self.assertSetEqual(set(ext.extract(node, content, (p_filename, p_foo))), + {(node, p_filename, 'bar')}) + self.assertSetEqual(set(ext.extract(node, content, (p_foo, ))), set()) + # predicates are validated + p_bar = p_foo.get_child(ns.bse.filename) # same URI but different hierarchy + self.assertSetEqual(set(ext.extract(node, content, (p_filename, p_bar))), + {(node, p_filename, 'bar')}) + self.assertSetEqual(set(ext.extract(node, content, (p_bar, ))), set()) # path variations - self.assertSetEqual(set(ext.extract(node, 'bar', (ns.bse.filename, ))), - {(node, ns.bse.filename, 'bar')}) - self.assertSetEqual(set(ext.extract(node, '', (ns.bse.filename, ))), - {(node, ns.bse.filename, '')}) - self.assertSetEqual(set(ext.extract(node, None, (ns.bse.filename, ))), set()) + self.assertSetEqual(set(ext.extract(node, 'bar', (p_filename, ))), + {(node, p_filename, 'bar')}) + self.assertSetEqual(set(ext.extract(node, '', (p_filename, ))), + {(node, p_filename, '')}) + # errors are suppressed + self.assertSetEqual(set(ext.extract(node, None, (p_filename, ))), set()) ## main ## diff --git a/test/extractor/generic/test_stat.py b/test/extractor/generic/test_stat.py index f89b053..26dad6a 100644 --- a/test/extractor/generic/test_stat.py +++ b/test/extractor/generic/test_stat.py @@ -9,7 +9,9 @@ import os import unittest # bsie imports +from bsie import base from bsie.utils import ns +from bsie.utils.bsfs import schema from bsie.utils.node import Node # objects to test @@ -18,21 +20,51 @@ from bsie.extractor.generic.stat import Stat ## code ## -class TestConstant(unittest.TestCase): +class TestStat(unittest.TestCase): + def test_eq(self): + # distinct instances, same data + self.assertEqual(Stat(), Stat()) + # different classes + class Foo(): pass + self.assertNotEqual(Stat(), Foo()) + self.assertNotEqual(Stat(), 123) + self.assertNotEqual(Stat(), None) + + def test_schema(self): + self.assertEqual(Stat().schema, + schema.Schema.from_string(base.extractor.SCHEMA_PREAMBLE + ''' + bse:filesize rdfs:subClassOf bsfs:Predicate ; + rdfs:domain bsfs:Entity ; + rdfs:range xsd:integer ; + owl:maxCardinality "INF"^^xsd:number . + ''')) + def test_extract(self): + ext = Stat() node = Node(ns.bsfs.Entity, '') # Blank node content = os.stat(__file__) - ext = Stat() + p_filesize = ext.schema.predicate(ns.bse.filesize) + entity = ext.schema.node(ns.bsfs.Node).get_child(ns.bsfs.Entity) + string = ext.schema.literal(ns.bsfs.Literal).get_child(ns.xsd.string) # baseline - self.assertSetEqual(set(ext.extract(node, content, (ns.bse.filesize, ))), - {(node, ns.bse.filesize, content.st_size)}) + self.assertSetEqual(set(ext.extract(node, content, (p_filesize, ))), + {(node, p_filesize, content.st_size)}) # predicates parameter is respected - self.assertSetEqual(set(ext.extract(node, content, (ns.bse.filesize, ns.bse.foo))), - {(node, ns.bse.filesize, content.st_size)}) - self.assertSetEqual(set(ext.extract(node, content, (ns.bse.foo, ))), set()) + p_foo = ext.schema.predicate(ns.bsfs.Predicate).get_child(ns.bse.foo, domain=entity, range=string) # unsupported predicate + self.assertSetEqual(set(ext.extract(node, content, (p_filesize, p_foo))), + {(node, p_filesize, content.st_size)}) + self.assertSetEqual(set(ext.extract(node, content, (p_foo, ))), set()) + # predicates are validated + p_bar = p_foo.get_child(ns.bse.filesizse) # same URI but different hierarchy + self.assertSetEqual(set(ext.extract(node, content, (p_filesize, p_bar))), + {(node, p_filesize, content.st_size)}) + self.assertSetEqual(set(ext.extract(node, content, (p_bar, ))), set()) # content variations - self.assertSetEqual(set(ext.extract(node, None, (ns.bse.filesize, ))), set()) + self.assertSetEqual(set(ext.extract(node, os.stat_result([12345] * len(content)), (p_filesize, p_bar))), + {(node, p_filesize, 12345)}) + # errors are suppressed + self.assertSetEqual(set(ext.extract(node, None, (p_filesize, ))), set()) ## main ## -- cgit v1.2.3 From b96c6e2096c387b70e2a4c1f0bc53b6044a0dc6f Mon Sep 17 00:00:00 2001 From: Matthias Baumgartner Date: Fri, 25 Nov 2022 14:36:27 +0100 Subject: decouple readers and extractors; use strings for reference and repeated type annotations --- bsie/base/extractor.py | 5 ++--- bsie/base/reader.py | 11 ++--------- bsie/extractor/generic/path.py | 4 ++-- bsie/extractor/generic/stat.py | 4 ++-- bsie/reader/path.py | 7 ++----- bsie/reader/stat.py | 6 ++---- 6 files changed, 12 insertions(+), 25 deletions(-) diff --git a/bsie/base/extractor.py b/bsie/base/extractor.py index a6a69c6..7acf2bd 100644 --- a/bsie/base/extractor.py +++ b/bsie/base/extractor.py @@ -8,8 +8,7 @@ Author: Matthias Baumgartner, 2022 import abc import typing -# inner-module imports -from . import reader +# bsie imports from bsie.utils import node from bsie.utils.bsfs import schema as _schema, typename @@ -50,7 +49,7 @@ class Extractor(abc.ABC): """Produce (node, predicate, value)-triples from some content.""" # what type of content is expected (i.e. reader subclass). - CONTENT_READER: typing.Optional[typing.Type[reader.Reader]] = None + CONTENT_READER: typing.Optional[str] = None # extractor schema. schema: _schema.Schema diff --git a/bsie/base/reader.py b/bsie/base/reader.py index f29e451..e59abef 100644 --- a/bsie/base/reader.py +++ b/bsie/base/reader.py @@ -12,12 +12,11 @@ Author: Matthias Baumgartner, 2022 import abc import typing -# inner-module imports +# bsie imports from bsie.utils.bsfs import URI, typename # exports __all__: typing.Sequence[str] = ( - 'Aggregator', 'Reader', ) @@ -27,20 +26,14 @@ __all__: typing.Sequence[str] = ( class Reader(abc.ABC): """Read and return some content from a file.""" - # In what data structure content is returned - CONTENT_TYPE = typing.Union[typing.Any] - # NOTE: Child classes must also assign a typing.Union even if there's - # only one options - def __str__(self) -> str: return typename(self) def __repr__(self) -> str: return f'{typename(self)}()' - # FIXME: How about using contexts instead of calls? @abc.abstractmethod - def __call__(self, path: URI) -> CONTENT_TYPE: + def __call__(self, path: URI) -> typing.Any: """Return some content of the file at *path*. Raises a `ReaderError` if the reader cannot make sense of the file format. """ diff --git a/bsie/extractor/generic/path.py b/bsie/extractor/generic/path.py index f358a79..f346f97 100644 --- a/bsie/extractor/generic/path.py +++ b/bsie/extractor/generic/path.py @@ -24,7 +24,7 @@ __all__: typing.Sequence[str] = ( class Path(extractor.Extractor): """Extract information from file's path.""" - CONTENT_READER = bsie.reader.path.Path + CONTENT_READER = 'bsie.reader.path.Path' # mapping from predicate to handler function. _callmap: typing.Dict[schema.Predicate, typing.Callable[[str], typing.Any]] @@ -45,7 +45,7 @@ class Path(extractor.Extractor): def extract( self, subject: node.Node, - content: CONTENT_READER.CONTENT_TYPE, + content: str, predicates: typing.Iterable[schema.Predicate], ) -> typing.Iterator[typing.Tuple[node.Node, schema.Predicate, typing.Any]]: for pred in predicates: diff --git a/bsie/extractor/generic/stat.py b/bsie/extractor/generic/stat.py index e5387af..7088c0a 100644 --- a/bsie/extractor/generic/stat.py +++ b/bsie/extractor/generic/stat.py @@ -24,7 +24,7 @@ __all__: typing.Sequence[str] = ( class Stat(extractor.Extractor): """Extract information from the file system.""" - CONTENT_READER = bsie.reader.stat.Stat + CONTENT_READER = 'bsie.reader.stat.Stat' # mapping from predicate to handler function. _callmap: typing.Dict[_schema.Predicate, typing.Callable[[os.stat_result], typing.Any]] @@ -45,7 +45,7 @@ class Stat(extractor.Extractor): def extract( self, subject: node.Node, - content: CONTENT_READER.CONTENT_TYPE, + content: os.stat_result, predicates: typing.Iterable[_schema.Predicate], ) -> typing.Iterator[typing.Tuple[node.Node, _schema.Predicate, typing.Any]]: for pred in predicates: diff --git a/bsie/reader/path.py b/bsie/reader/path.py index d27c664..d60f187 100644 --- a/bsie/reader/path.py +++ b/bsie/reader/path.py @@ -5,10 +5,9 @@ A copy of the license is provided with the project. Author: Matthias Baumgartner, 2022 """ # imports -import os import typing -# inner-module imports +# bsie imports from bsie.base import reader # exports @@ -22,9 +21,7 @@ __all__: typing.Sequence[str] = ( class Path(reader.Reader): """Return the path.""" - CONTENT_TYPE = typing.Union[str] - - def __call__(self, path: str) -> CONTENT_TYPE: + def __call__(self, path: str) -> str: return path diff --git a/bsie/reader/stat.py b/bsie/reader/stat.py index f0b83fb..6d40ab8 100644 --- a/bsie/reader/stat.py +++ b/bsie/reader/stat.py @@ -8,7 +8,7 @@ Author: Matthias Baumgartner, 2022 import os import typing -# inner-module imports +# bsie imports from bsie.base import reader, errors # exports @@ -22,9 +22,7 @@ __all__: typing.Sequence[str] = ( class Stat(reader.Reader): """Read and return the filesystem's stat infos.""" - CONTENT_TYPE = typing.Union[os.stat_result] - - def __call__(self, path: str) -> CONTENT_TYPE: + def __call__(self, path: str) -> os.stat_result: try: return os.stat(path) except Exception: -- cgit v1.2.3 From 9ce32829b2bb85907a34a543bfcaa9183d1e362c Mon Sep 17 00:00:00 2001 From: Matthias Baumgartner Date: Fri, 25 Nov 2022 14:39:18 +0100 Subject: string conversion and equality checks --- bsie/base/extractor.py | 7 ++++ bsie/base/reader.py | 6 +++ bsie/extractor/generic/constant.py | 6 +++ bsie/utils/node.py | 18 ++++++++- test/base/__init__.py | 0 test/base/test_extractor.py | 70 +++++++++++++++++++++++++++++++++ test/base/test_reader.py | 45 +++++++++++++++++++++ test/extractor/generic/test_constant.py | 37 +++++++++++++++++ test/utils/__init__.py | 0 test/utils/test_node.py | 66 +++++++++++++++++++++++++++++++ 10 files changed, 253 insertions(+), 2 deletions(-) create mode 100644 test/base/__init__.py create mode 100644 test/base/test_extractor.py create mode 100644 test/base/test_reader.py create mode 100644 test/utils/__init__.py create mode 100644 test/utils/test_node.py diff --git a/bsie/base/extractor.py b/bsie/base/extractor.py index 7acf2bd..2fc4f18 100644 --- a/bsie/base/extractor.py +++ b/bsie/base/extractor.py @@ -63,6 +63,13 @@ class Extractor(abc.ABC): def __repr__(self) -> str: return f'{typename(self)}()' + def __eq__(self, other: typing.Any) -> bool: + return isinstance(other, type(self)) \ + and self.CONTENT_READER == other.CONTENT_READER \ + and self.schema == other.schema + + def __hash__(self) -> int: + return hash((type(self), self.CONTENT_READER, self.schema)) def predicates(self) -> typing.Iterator[_schema.Predicate]: """Return the predicates that may be part of extracted triples.""" diff --git a/bsie/base/reader.py b/bsie/base/reader.py index e59abef..b7eabf7 100644 --- a/bsie/base/reader.py +++ b/bsie/base/reader.py @@ -32,6 +32,12 @@ class Reader(abc.ABC): def __repr__(self) -> str: return f'{typename(self)}()' + def __eq__(self, other: typing.Any) -> bool: + return isinstance(other, type(self)) + + def __hash__(self) -> int: + return hash(type(self)) + @abc.abstractmethod def __call__(self, path: URI) -> typing.Any: """Return some content of the file at *path*. diff --git a/bsie/extractor/generic/constant.py b/bsie/extractor/generic/constant.py index 795bac6..7da792a 100644 --- a/bsie/extractor/generic/constant.py +++ b/bsie/extractor/generic/constant.py @@ -38,6 +38,12 @@ class Constant(extractor.Extractor): self._tuples = tuple((self.schema.predicate(p_uri), value) for p_uri, value in tuples) # FIXME: use schema instance for value checking + def __eq__(self, other: typing.Any) -> bool: + return super().__eq__(other) \ + and self._tuples == other._tuples + + def __hash__(self) -> int: + return hash((super().__hash__(), self._tuples)) def extract( self, diff --git a/bsie/utils/node.py b/bsie/utils/node.py index 3a0f06b..c9c494f 100644 --- a/bsie/utils/node.py +++ b/bsie/utils/node.py @@ -7,8 +7,8 @@ Author: Matthias Baumgartner, 2022 # imports import typing -# inner-module imports -from bsie.utils.bsfs import URI +# bsie imports +from bsie.utils.bsfs import URI, typename # exports __all__: typing.Sequence[str] = ( @@ -36,4 +36,18 @@ class Node(): self.node_type = URI(node_type) self.uri = URI(uri) + def __eq__(self, other: typing.Any) -> bool: + return isinstance(other, Node) \ + and other.node_type == self.node_type \ + and other.uri == self.uri + + def __hash__(self) -> int: + return hash((type(self), self.node_type, self.uri)) + + def __str__(self) -> str: + return f'{typename(self)}({self.node_type}, {self.uri})' + + def __repr__(self) -> str: + return f'{typename(self)}({self.node_type}, {self.uri})' + ## EOF ## diff --git a/test/base/__init__.py b/test/base/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/test/base/test_extractor.py b/test/base/test_extractor.py new file mode 100644 index 0000000..7a00079 --- /dev/null +++ b/test/base/test_extractor.py @@ -0,0 +1,70 @@ +""" + +Part of the bsie test suite. +A copy of the license is provided with the project. +Author: Matthias Baumgartner, 2022 +""" +# imports +import unittest + +# bsie imports +from bsie.utils import ns +from bsie.utils.bsfs import schema as _schema, URI + +# objects to test +from bsie.base import extractor + + +## code ## + +class StubExtractor(extractor.Extractor): + def __init__(self): + super().__init__(_schema.Schema.from_string(extractor.SCHEMA_PREAMBLE + ''' + bse:author rdfs:subClassOf bsfs:Predicate ; + rdfs:domain bsfs:Entity ; + rdfs:range xsd:string ; + owl:maxCardinality "INF"^^xsd:number . + bse:comment rdfs:subClassOf bsfs:Predicate ; + rdfs:domain bsfs:Entity ; + rdfs:range xsd:string ; + owl:maxCardinality "INF"^^xsd:number . + ''')) + + def extract(self, subject, content, predicates): + raise NotImplementedError() + +class StubSub(StubExtractor): + pass + +class TestExtractor(unittest.TestCase): + def test_essentials(self): + ext = StubExtractor() + self.assertEqual(str(ext), 'StubExtractor') + self.assertEqual(repr(ext), 'StubExtractor()') + self.assertEqual(ext, StubExtractor()) + self.assertEqual(hash(ext), hash(StubExtractor())) + + sub = StubSub() + self.assertEqual(str(sub), 'StubSub') + self.assertEqual(repr(sub), 'StubSub()') + self.assertEqual(sub, StubSub()) + self.assertEqual(hash(sub), hash(StubSub())) + self.assertNotEqual(ext, sub) + self.assertNotEqual(hash(ext), hash(sub)) + + def test_predicates(self): + schema = _schema.Schema.Empty() + entity = schema.node(ns.bsfs.Node).get_child(ns.bsfs.Entity) + string = schema.literal(ns.bsfs.Literal).get_child(URI('http://www.w3.org/2001/XMLSchema#string')) + p_author = schema.predicate(ns.bsfs.Predicate).get_child(ns.bse.author, domain=entity, range=string) + p_comment = schema.predicate(ns.bsfs.Predicate).get_child(ns.bse.comment, domain=entity, range=string) + ext = StubExtractor() + self.assertSetEqual(set(ext.predicates()), {p_author, p_comment} | set(schema.predicates())) + + +## main ## + +if __name__ == '__main__': + unittest.main() + +## EOF ## diff --git a/test/base/test_reader.py b/test/base/test_reader.py new file mode 100644 index 0000000..802b314 --- /dev/null +++ b/test/base/test_reader.py @@ -0,0 +1,45 @@ +""" + +Part of the bsie test suite. +A copy of the license is provided with the project. +Author: Matthias Baumgartner, 2022 +""" +# imports +import unittest + +# objects to test +from bsie.base import reader + + +## code ## + +class StubReader(reader.Reader): + def __call__(self, path): + raise NotImplementedError() + +class StubSub(StubReader): + pass + +class TestReader(unittest.TestCase): + def test_essentials(self): + ext = StubReader() + self.assertEqual(str(ext), 'StubReader') + self.assertEqual(repr(ext), 'StubReader()') + self.assertEqual(ext, StubReader()) + self.assertEqual(hash(ext), hash(StubReader())) + + sub = StubSub() + self.assertEqual(str(sub), 'StubSub') + self.assertEqual(repr(sub), 'StubSub()') + self.assertEqual(sub, StubSub()) + self.assertEqual(hash(sub), hash(StubSub())) + self.assertNotEqual(ext, sub) + self.assertNotEqual(hash(ext), hash(sub)) + + +## main ## + +if __name__ == '__main__': + unittest.main() + +## EOF ## diff --git a/test/extractor/generic/test_constant.py b/test/extractor/generic/test_constant.py index 7fdb8ac..aa33fb4 100644 --- a/test/extractor/generic/test_constant.py +++ b/test/extractor/generic/test_constant.py @@ -78,6 +78,43 @@ class TestConstant(unittest.TestCase): #self.assertRaises(ValueError, Constant, schema, [ # (ns.bse.author, Foo())]) + def test_eq(self): + schema_a = ''' + bse:author rdfs:subClassOf bsfs:Predicate ; + rdfs:domain bsfs:Entity ; + rdfs:range xsd:string ; + owl:maxCardinality "1"^^xsd:number . + ''' + schema_b = ''' + bse:comment rdfs:subClassOf bsfs:Predicate ; + rdfs:domain bsfs:Entity ; + rdfs:range xsd:string ; + owl:maxCardinality "INF"^^xsd:number . + ''' + tuples_a = [(ns.bse.author, 'Me, myself, and I')] + tuples_b = [(ns.bse.comment, 'the quick brown fox jumps over the lazy dog.') ] + # distinct instances, same data + self.assertEqual( + Constant(schema_a, tuples_a), + Constant(schema_a, tuples_a)) + self.assertEqual( + hash(Constant(schema_a, tuples_a)), + hash(Constant(schema_a, tuples_a))) + # different data + self.assertNotEqual( + Constant(schema_a, tuples_a), + Constant(schema_b, tuples_b)) + self.assertNotEqual( + hash(Constant(schema_a, tuples_a)), + hash(Constant(schema_b, tuples_b))) + # different objects + class Foo(): pass + self.assertNotEqual(Constant(schema_a, tuples_a), Foo()) + self.assertNotEqual(hash(Constant(schema_a, tuples_a)), hash(Foo())) + self.assertNotEqual(Constant(schema_a, tuples_a), 123) + self.assertNotEqual(hash(Constant(schema_a, tuples_a)), hash(123)) + self.assertNotEqual(Constant(schema_a, tuples_a), None) + self.assertNotEqual(hash(Constant(schema_a, tuples_a)), hash(None)) ## main ## diff --git a/test/utils/__init__.py b/test/utils/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/test/utils/test_node.py b/test/utils/test_node.py new file mode 100644 index 0000000..826f199 --- /dev/null +++ b/test/utils/test_node.py @@ -0,0 +1,66 @@ +""" + +Part of the bsie test suite. +A copy of the license is provided with the project. +Author: Matthias Baumgartner, 2022 +""" +# imports +import unittest + +# bsie imports +from bsie.utils.bsfs import URI +from bsie.utils import ns + +# objects to test +from bsie.utils.node import Node + + +## code ## + +class TestNode(unittest.TestCase): + def test_equality(self): + uri = URI('http://example.com/me/entity#1234') + node = Node(ns.bsfs.Entity, uri) + # basic equivalence + self.assertEqual(node, Node(ns.bsfs.Entity, URI('http://example.com/me/entity#1234'))) + self.assertEqual(hash(node), hash(Node(ns.bsfs.Entity, URI('http://example.com/me/entity#1234')))) + # equality respects uri + self.assertNotEqual(node, Node(ns.bsfs.Entity, URI('http://example.com/me/entity#4321'))) + self.assertNotEqual(hash(node), hash(Node(ns.bsfs.Entity, URI('http://example.com/me/entity#4321')))) + # equality respects node_type + self.assertNotEqual(node, Node(ns.bsfs.Foo, uri)) + self.assertNotEqual(hash(node), hash(Node(ns.bsfs.Foo, uri))) + # not equal to other types + self.assertNotEqual(node, 1234) + self.assertNotEqual(hash(node), hash(1234)) + self.assertNotEqual(node, uri) + self.assertNotEqual(hash(node), hash(uri)) + self.assertNotEqual(node, ns.bsfs.Entity) + self.assertNotEqual(hash(node), hash(ns.bsfs.Entity)) + class Foo(): pass + self.assertNotEqual(node, Foo()) + self.assertNotEqual(hash(node), hash(Foo())) + + def test_str(self): + uri = URI('http://example.com/me/entity#1234') + # basic string conversion + node = Node(ns.bsfs.Entity, uri) + self.assertEqual(str(node), 'Node(http://bsfs.ai/schema/Entity, http://example.com/me/entity#1234)') + self.assertEqual(repr(node), 'Node(http://bsfs.ai/schema/Entity, http://example.com/me/entity#1234)') + # string conversion respects node_type + node = Node(ns.bsfs.Foo, uri) + self.assertEqual(str(node), 'Node(http://bsfs.ai/schema/Foo, http://example.com/me/entity#1234)') + self.assertEqual(repr(node), 'Node(http://bsfs.ai/schema/Foo, http://example.com/me/entity#1234)') + # string conversion respects uri + node = Node(ns.bsfs.Entity, URI('http://example.com/me/entity#4321')) + self.assertEqual(str(node), 'Node(http://bsfs.ai/schema/Entity, http://example.com/me/entity#4321)') + self.assertEqual(repr(node), 'Node(http://bsfs.ai/schema/Entity, http://example.com/me/entity#4321)') + + + +## main ## + +if __name__ == '__main__': + unittest.main() + +## EOF ## -- cgit v1.2.3 From c9a1dea230054f5d6f40b7fd5e3930609c5f6416 Mon Sep 17 00:00:00 2001 From: Matthias Baumgartner Date: Fri, 25 Nov 2022 14:41:38 +0100 Subject: code analysis tool configs and minor fixes --- .coveragerc | 15 ++++ .mypy.ini | 3 + .pylintrc | 193 ++++++++++++++++++++++++++++++++++++++++++++++++++++ README | 51 ++++++++++++++ bsie/base/errors.py | 3 +- bsie/reader/stat.py | 4 +- 6 files changed, 265 insertions(+), 4 deletions(-) create mode 100644 .coveragerc create mode 100644 .mypy.ini create mode 100644 .pylintrc diff --git a/.coveragerc b/.coveragerc new file mode 100644 index 0000000..40f07cc --- /dev/null +++ b/.coveragerc @@ -0,0 +1,15 @@ +[run] +dynamic_context = test_function +branch = True +source = bsie +data_file = .coverage +command_line = -m unittest + +[report] +show_missing = True +skip_empty = True + +[html] +directory = .htmlcov +show_contexts = True + diff --git a/.mypy.ini b/.mypy.ini new file mode 100644 index 0000000..4d0a25d --- /dev/null +++ b/.mypy.ini @@ -0,0 +1,3 @@ +[mypy] +ignore_missing_imports = True +packages=bsie diff --git a/.pylintrc b/.pylintrc new file mode 100644 index 0000000..3cfae38 --- /dev/null +++ b/.pylintrc @@ -0,0 +1,193 @@ +[MAIN] + +# Pickle collected data for later comparisons. +persistent=no + +# Minimum Python version to use for version dependent checks. Will default to +# the version used to run pylint. +py-version=3.8 + +# Discover python modules and packages in the file system subtree. +recursive=yes + +# When enabled, pylint would attempt to guess common misconfiguration and emit +# user-friendly hints instead of false-positive error messages. +suggestion-mode=yes + + +[BASIC] + +# Minimum line length for functions/classes that require docstrings, shorter +# ones are exempt. +docstring-min-length=-1 + +# Bad variable names which should always be refused, separated by a comma. +bad-names=foo,bar,abc,cba,xyz,zyx,foobar,hello,world + +# Good variable names which should always be accepted, separated by a comma. +good-names=i,j,k,n,_ + +# Naming style matching correct argument names. +argument-naming-style=snake_case + +# Naming style matching correct attribute names. +attr-naming-style=snake_case + +# Naming style matching correct class attribute names. +class-attribute-naming-style=any + +# Naming style matching correct class constant names. +class-const-naming-style=UPPER_CASE + +# Naming style matching correct class names. +class-naming-style=PascalCase + +# Naming style matching correct constant names. +const-naming-style=UPPER_CASE + +# Naming style matching correct function names. +function-naming-style=snake_case + +# Include a hint for the correct naming format with invalid-name. +include-naming-hint=yes + +# Naming style matching correct inline iteration names. +inlinevar-naming-style=any + +# Naming style matching correct method names. +method-naming-style=snake_case + +# Naming style matching correct module names. +module-naming-style=snake_case + +# Naming style matching correct variable names. +variable-naming-style=snake_case + + +[DESIGN] + +# Maximum number of arguments for function / method. +max-args=5 + +# Maximum number of attributes for a class (see R0902). +max-attributes=7 + +# Maximum number of boolean expressions in an if statement (see R0916). +max-bool-expr=5 + +# Maximum number of branch for function / method body. +max-branches=12 + +# Maximum number of locals for function / method body. +max-locals=15 + +# Maximum number of parents for a class (see R0901). +max-parents=7 + +# Maximum number of public methods for a class (see R0904). +max-public-methods=20 + +# Maximum number of return / yield for function / method body. +max-returns=6 + +# Maximum number of statements in function / method body. +max-statements=50 + +# Minimum number of public methods for a class (see R0903). +min-public-methods=1 + + +[FORMAT] + +# Expected format of line ending, e.g. empty (any line ending), LF or CRLF. +expected-line-ending-format= + +# Regexp for a line that is allowed to be longer than the limit. +ignore-long-lines=^\s*(# )??$ + +# Number of spaces of indent required inside a hanging or continued line. +indent-after-paren=4 + +# String used as indentation unit. This is usually " " (4 spaces) or "\t" (1 +# tab). +indent-string=' ' + +# Maximum number of characters on a single line. +max-line-length=120 + +# Maximum number of lines in a module. +max-module-lines=1000 + +# Allow the body of a class to be on the same line as the declaration if body +# contains single statement. +single-line-class-stmt=no + +# Allow the body of an if to be on the same line as the test if there is no +# else. +single-line-if-stmt=no + + +[IMPORTS] + +# List of modules that can be imported at any level, not just the top level +# one. +allow-any-import-level= + +# Allow wildcard imports from modules that define __all__. +allow-wildcard-with-all=no + + +[LOGGING] + +# The type of string formatting that logging methods do. `old` means using % +# formatting, `new` is for `{}` formatting. +logging-format-style=old + + + +[MISCELLANEOUS] + +# List of note tags to take in consideration, separated by a comma. +notes=FIXME,TODO,NOTE + + + +[REPORTS] + +# Tells whether to display a full report or only the messages. +reports=yes + +# Activate the evaluation score. +score=yes + + +[SIMILARITIES] + +# Minimum lines number of a similarity. +min-similarity-lines=4 + + +[STRING] + +# This flag controls whether inconsistent-quotes generates a warning when the +# character used as a quote delimiter is used inconsistently within a module. +check-quote-consistency=yes + + +[TYPECHECK] + +# Tells whether to warn about missing members when the owner of the attribute +# is inferred to be None. +ignore-none=no + + +[VARIABLES] + +# List of strings which can identify a callback function by name. A callback +# name must start or end with one of those strings. +callbacks=clbk,callback + + + + +# Disable: R1735 (use-dict-literal) diff --git a/README b/README index b790244..3326196 100644 --- a/README +++ b/README @@ -3,3 +3,54 @@ Black Star Information Extraction ================================= +### Developer tools setup + +#### Test coverage (coverage) + +Resources: +* https://coverage.readthedocs.io/en/6.5.0/index.html +* https://nedbatchelder.com/blog/200710/flaws_in_coverage_measurement.html + +Commands: +$ pip install coverage +$ coverage run ; coverage html ; xdg-open .htmlcov/index.html + + + +#### Static code analysis (pylint) + +Resources: +* https://github.com/PyCQA/pylint +* https://pylint.org/ +* https://pylint.pycqa.org/en/latest/user_guide/messages/messages_overview.html#messages-overview + +Commands: +$ pip install pylint +$ pylint bsie + + + +#### Type analysis (mypy) + +Resources: +* https://github.com/python/mypy +* https://mypy.readthedocs.io/en/stable/ + +Commands: +$ pip install mypy +$ mypy + + + +#### Documentation (sphinx) + +Resources: +* +* + +Commands: +$ pip install ... +$ + + + diff --git a/bsie/base/errors.py b/bsie/base/errors.py index eedce3b..a86b7e8 100644 --- a/bsie/base/errors.py +++ b/bsie/base/errors.py @@ -10,11 +10,10 @@ import typing # exports __all__: typing.Sequence[str] = ( 'ExtractorError', + 'ReaderError', ) - - ## code ## class _BSIEError(Exception): diff --git a/bsie/reader/stat.py b/bsie/reader/stat.py index 6d40ab8..592d912 100644 --- a/bsie/reader/stat.py +++ b/bsie/reader/stat.py @@ -25,8 +25,8 @@ class Stat(reader.Reader): def __call__(self, path: str) -> os.stat_result: try: return os.stat(path) - except Exception: - raise errors.ReaderError(path) + except Exception as err: + raise errors.ReaderError(path) from err ## EOF ## -- cgit v1.2.3 From 3e6a69ce7f109f0fd4352507ad60d58d4cbd24a7 Mon Sep 17 00:00:00 2001 From: Matthias Baumgartner Date: Fri, 25 Nov 2022 14:43:12 +0100 Subject: builders and pipeline --- bsie/base/errors.py | 8 ++ bsie/tools/__init__.py | 20 ++++ bsie/tools/builder.py | 217 ++++++++++++++++++++++++++++++++++++++ bsie/tools/pipeline.py | 121 ++++++++++++++++++++++ bsie/utils/bsfs.py | 3 +- test/tools/__init__.py | 0 test/tools/test_builder.py | 247 ++++++++++++++++++++++++++++++++++++++++++++ test/tools/test_pipeline.py | 167 ++++++++++++++++++++++++++++++ test/tools/testfile.t | 1 + 9 files changed, 783 insertions(+), 1 deletion(-) create mode 100644 bsie/tools/__init__.py create mode 100644 bsie/tools/builder.py create mode 100644 bsie/tools/pipeline.py create mode 100644 test/tools/__init__.py create mode 100644 test/tools/test_builder.py create mode 100644 test/tools/test_pipeline.py create mode 100644 test/tools/testfile.t diff --git a/bsie/base/errors.py b/bsie/base/errors.py index a86b7e8..760351f 100644 --- a/bsie/base/errors.py +++ b/bsie/base/errors.py @@ -9,7 +9,9 @@ import typing # exports __all__: typing.Sequence[str] = ( + 'BuilderError', 'ExtractorError', + 'LoaderError', 'ReaderError', ) @@ -19,6 +21,12 @@ __all__: typing.Sequence[str] = ( class _BSIEError(Exception): """Generic BSIE error.""" +class BuilderError(_BSIEError): + """The Builder failed to create an instance.""" + +class LoaderError(BuilderError): + """Failed to load a module or class.""" + class ExtractorError(_BSIEError): """The Extractor failed to process the given content.""" diff --git a/bsie/tools/__init__.py b/bsie/tools/__init__.py new file mode 100644 index 0000000..8ca9620 --- /dev/null +++ b/bsie/tools/__init__.py @@ -0,0 +1,20 @@ +""" + +Part of the bsie module. +A copy of the license is provided with the project. +Author: Matthias Baumgartner, 2022 +""" +# imports +import typing + +# inner-module imports +from . import builder +from . import pipeline + +# exports +__all__: typing.Sequence[str] = ( + 'builder', + 'pipeline', + ) + +## EOF ## diff --git a/bsie/tools/builder.py b/bsie/tools/builder.py new file mode 100644 index 0000000..8f7a410 --- /dev/null +++ b/bsie/tools/builder.py @@ -0,0 +1,217 @@ +""" + +Part of the bsie module. +A copy of the license is provided with the project. +Author: Matthias Baumgartner, 2022 +""" +# imports +import importlib +import logging +import typing + +# bsie imports +from bsie import base +from bsie.base import errors +from bsie.utils.bsfs import URI, typename + +# inner-module imports +from . import pipeline + +# exports +__all__: typing.Sequence[str] = ( + 'ExtractorBuilder', + 'PipelineBuilder', + 'ReaderBuilder', + ) + + +## code ## + +logger = logging.getLogger(__name__) + +def _safe_load(module_name: str, class_name: str): + """Get a class from a module. Raise BuilderError if anything goes wrong.""" + try: + # load the module + module = importlib.import_module(module_name) + except Exception as err: + # cannot import module + raise errors.LoaderError(f'cannot load module {module_name}') from err + + try: + # get the class from the module + cls = getattr(module, class_name) + except Exception as err: + # cannot find the class + raise errors.LoaderError(f'cannot load class {class_name} from module {module_name}') from err + + return cls + + +def _unpack_name(name): + """Split a name into its module and class component (dot-separated).""" + if not isinstance(name, str): + raise TypeError(name) + if '.' not in name: + raise ValueError('name must be a qualified class name.') + module_name, class_name = name[:name.rfind('.')], name[name.rfind('.')+1:] + if module_name == '': + raise ValueError('name must be a qualified class name.') + return module_name, class_name + + +class ReaderBuilder(): + """Build `bsie.base.reader.Reader` instances. + + Readers are defined via their qualified class name + (e.g., bsie.reader.path.Path) and optional keyword + arguments that are passed to the constructor via + the *kwargs* argument (name as key, kwargs as value). + The ReaderBuilder keeps a cache of previously built + reader instances, as they are anyway built with + identical keyword arguments. + + """ + + # keyword arguments + kwargs: typing.Dict[str, typing.Dict[str, typing.Any]] + + # cached readers + cache: typing.Dict[str, base.reader.Reader] + + def __init__(self, kwargs: typing.Dict[str, typing.Dict[str, typing.Any]]): + self.kwargs = kwargs + self.cache = {} + + def build(self, name: str) -> base.reader.Reader: + """Return an instance for the qualified class name.""" + # return cached instance + if name in self.cache: + return self.cache[name] + + # check name and get module/class components + module_name, class_name = _unpack_name(name) + + # import reader class + cls = _safe_load(module_name, class_name) + + # get kwargs + kwargs = self.kwargs.get(name, {}) + if not isinstance(kwargs, dict): + raise TypeError(f'expected a kwargs dict, found {typename(kwargs)}') + + try: # build, cache, and return instance + obj = cls(**kwargs) + # cache instance + self.cache[name] = obj + # return instance + return obj + + except Exception as err: + raise errors.BuilderError(f'failed to build reader {name} due to {typename(err)}: {err}') from err + + +class ExtractorBuilder(): + """Build `bsie.base.extractor.Extractor instances. + + It is permissible to build multiple instances of the same extractor + (typically with different arguments), hence the ExtractorBuilder + receives a list of build specifications. Each specification is + a dict with a single key (extractor's qualified name) and a dict + to be used as keyword arguments. + Example: [{'bsie.extractor.generic.path.Path': {}}, ] + + """ + + # build specifications + specs: typing.List[typing.Dict[str, typing.Dict[str, typing.Any]]] + + def __init__(self, specs: typing.List[typing.Dict[str, typing.Dict[str, typing.Any]]]): + self.specs = specs + + def __iter__(self) -> typing.Iterator[int]: + """Iterate over extractor specifications.""" + return iter(range(len(self.specs))) + + def build(self, index: int) -> base.extractor.Extractor: + """Return an instance of the n'th extractor (n=*index*).""" + # get build instructions + specs = self.specs[index] + + # check specs structure. expecting[{name: {kwargs}}] + if not isinstance(specs, dict): + raise TypeError(f'expected a dict, found {typename(specs)}') + if len(specs) != 1: + raise TypeError(f'expected a dict of length one, found {len(specs)}') + + # get name and args from specs + name = next(iter(specs.keys())) + kwargs = specs[name] + + # check kwargs structure + if not isinstance(kwargs, dict): + raise TypeError(f'expected a dict, found {typename(kwargs)}') + + # check name and get module/class components + module_name, class_name = _unpack_name(name) + + # import extractor class + cls = _safe_load(module_name, class_name) + + try: # build and return instance + return cls(**kwargs) + + except Exception as err: + raise errors.BuilderError(f'failed to build extractor {name} due to {typename(err)}: {err}') from err + + +class PipelineBuilder(): + """Build `bsie.tools.pipeline.Pipeline` instances.""" + + def __init__( + self, + prefix: URI, + reader_builder: ReaderBuilder, + extractor_builder: ExtractorBuilder, + ): + self.prefix = prefix + self.rbuild = reader_builder + self.ebuild = extractor_builder + + def build(self) -> pipeline.Pipeline: + """Return a Pipeline instance.""" + ext2rdr = {} + + for eidx in self.ebuild: + # build extractor + try: + ext = self.ebuild.build(eidx) + + except errors.LoaderError as err: # failed to load extractor; skip + logger.error('failed to load extractor: %s', err) + continue + + except errors.BuilderError as err: # failed to build instance; skip + logger.error(str(err)) + continue + + try: + # get reader required by extractor + if ext.CONTENT_READER is not None: + rdr = self.rbuild.build(ext.CONTENT_READER) + else: + rdr = None + # store extractor + ext2rdr[ext] = rdr + + except errors.LoaderError as err: # failed to load reader + logger.error('failed to load reader: %s', err) + + except errors.BuilderError as err: # failed to build reader + logger.error(str(err)) + + return pipeline.Pipeline(self.prefix, ext2rdr) + + + +## EOF ## diff --git a/bsie/tools/pipeline.py b/bsie/tools/pipeline.py new file mode 100644 index 0000000..8e1c992 --- /dev/null +++ b/bsie/tools/pipeline.py @@ -0,0 +1,121 @@ +""" + +Part of the bsie module. +A copy of the license is provided with the project. +Author: Matthias Baumgartner, 2022 +""" +# imports +from collections import defaultdict +import logging +import typing + +# bsie imports +from bsie import base +from bsie.utils import ns +from bsie.utils.node import Node +from bsie.utils.bsfs import schema as _schema, URI, uuid as _uuid, typename + +# exports +__all__: typing.Sequence[str] = ( + 'Pipeline', + ) + +## code ## + +logger = logging.getLogger(__name__) + +class Pipeline(): + """Extraction pipeline to generate triples from files. + + The Pipeline binds readers and extractors, and performs + the necessary operations to produce triples from a file. + It takes a best-effort approach to extract as many triples + as possible. Errors during the extraction are passed over + and reported to the log. + + """ + + # combined extractor schemas. + schema: _schema.Schema + + # node prefix. + _prefix: URI + + # extractor -> reader mapping + _ext2rdr: typing.Dict[base.extractor.Extractor, typing.Optional[base.reader.Reader]] + + def __init__( + self, + prefix: URI, + ext2rdr: typing.Dict[base.extractor.Extractor, typing.Optional[base.reader.Reader]] + ): + # store core members + self._prefix = prefix + self._ext2rdr = ext2rdr + # compile schema from all extractors + self.schema = _schema.Schema.Union(ext.schema for ext in ext2rdr) + + def __str__(self) -> str: + return typename(self) + + def __repr__(self) -> str: + return f'{typename(self)}(...)' + + def __hash__(self) -> int: + return hash((type(self), self._prefix, self.schema, tuple(self._ext2rdr), tuple(self._ext2rdr.values()))) + + def __eq__(self, other: typing.Any) -> bool: + return isinstance(other, type(self)) \ + and self.schema == other.schema \ + and self._prefix == other._prefix \ + and self._ext2rdr == other._ext2rdr + + def __call__( + self, + path: URI, + predicates: typing.Optional[typing.Iterable[_schema.Predicate]] = None, + ) -> typing.Iterator[typing.Tuple[Node, _schema.Predicate, typing.Any]]: + """Extract triples from the file at *path*. Optionally, limit triples to *predicates*.""" + # get predicates + predicates = set(predicates) if predicates is not None else set(self.schema.predicates()) + + # get extractors + extractors = {ext for ext in self._ext2rdr if not set(ext.predicates()).isdisjoint(predicates)} + + # corner-case short-cut + if len(extractors) == 0: + return + + # get readers -> extractors mapping + rdr2ext = defaultdict(set) + for ext in extractors: + rdr = self._ext2rdr[ext] + rdr2ext[rdr].add(ext) + + # create subject for file + uuid = _uuid.UCID.from_path(path) + subject = Node(ns.bsfs.Entity, self._prefix + uuid) + + # extract information + for rdr, extrs in rdr2ext.items(): + try: + # get content + content = rdr(path) if rdr is not None else None + + # apply extractors on this content + for ext in extrs: + try: + # get predicate/value tuples + for node, pred, value in ext.extract(subject, content, predicates): + yield node, pred, value + + except base.errors.ExtractorError as err: + # critical extractor failure. + logger.error('%s failed to extract triples from content: %s', ext, err) + + except base.errors.ReaderError as err: + # failed to read any content. skip. + logger.error('%s failed to read content: %s', rdr, err) + + +## EOF ## diff --git a/bsie/utils/bsfs.py b/bsie/utils/bsfs.py index 01ec5d1..a4b7626 100644 --- a/bsie/utils/bsfs.py +++ b/bsie/utils/bsfs.py @@ -10,7 +10,7 @@ import typing # bsfs imports from bsfs import schema from bsfs.namespace import Namespace -from bsfs.utils import URI, typename +from bsfs.utils import URI, typename, uuid # exports __all__: typing.Sequence[str] = ( @@ -18,6 +18,7 @@ __all__: typing.Sequence[str] = ( 'URI', 'schema', 'typename', + 'uuid', ) ## EOF ## diff --git a/test/tools/__init__.py b/test/tools/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/test/tools/test_builder.py b/test/tools/test_builder.py new file mode 100644 index 0000000..bef0e9d --- /dev/null +++ b/test/tools/test_builder.py @@ -0,0 +1,247 @@ +""" + +Part of the bsie test suite. +A copy of the license is provided with the project. +Author: Matthias Baumgartner, 2022 +""" +# imports +import logging +import unittest + +# bsie imports +from bsie import base +from bsie.base import errors +from bsie.utils.bsfs import URI + +# objects to test +from bsie.tools.builder import ExtractorBuilder +from bsie.tools.builder import PipelineBuilder +from bsie.tools.builder import ReaderBuilder +from bsie.tools.builder import _safe_load +from bsie.tools.builder import _unpack_name + + +## code ## + +class TestUtils(unittest.TestCase): + def test_safe_load(self): + # invalid module + self.assertRaises(errors.LoaderError, _safe_load, 'dBGHMSAYOoKeKMpywDoKZQycENFPvN', 'foobar') + self.assertRaises(errors.LoaderError, _safe_load, 'dBGHMSAYOoKeKMpywDoKZQycENFPvN.bar', 'foobar') + # partially valid module + self.assertRaises(errors.LoaderError, _safe_load, 'os.foo', 'foobar') + # invalid class + self.assertRaises(errors.LoaderError, _safe_load, 'os.path', 'foo') + # valid module and class + cls = _safe_load('collections.abc', 'Container') + import collections.abc + self.assertEqual(cls, collections.abc.Container) + + def test_unpack_name(self): + self.assertRaises(TypeError, _unpack_name, 123) + self.assertRaises(TypeError, _unpack_name, None) + self.assertRaises(ValueError, _unpack_name, '') + self.assertRaises(ValueError, _unpack_name, 'path') + self.assertRaises(ValueError, _unpack_name, '.Path') + self.assertEqual(_unpack_name('path.Path'), ('path', 'Path')) + self.assertEqual(_unpack_name('path.foo.bar.Path'), ('path.foo.bar', 'Path')) + + +class TestReaderBuilder(unittest.TestCase): + def test_build(self): + builder = ReaderBuilder({'bsie.reader.path.Path': {}}) + # build configured reader + cls = builder.build('bsie.reader.path.Path') + import bsie.reader.path + self.assertIsInstance(cls, bsie.reader.path.Path) + # build unconfigured reader + cls = builder.build('bsie.reader.stat.Stat') + import bsie.reader.stat + self.assertIsInstance(cls, bsie.reader.stat.Stat) + # re-build previous reader (test cache) + self.assertEqual(cls, builder.build('bsie.reader.stat.Stat')) + # test invalid + self.assertRaises(TypeError, builder.build, 123) + self.assertRaises(TypeError, builder.build, None) + self.assertRaises(ValueError, builder.build, '') + self.assertRaises(ValueError, builder.build, 'Path') + self.assertRaises(errors.BuilderError, builder.build, 'path.Path') + # invalid config + builder = ReaderBuilder({'bsie.reader.stat.Stat': dict(foo=123)}) + self.assertRaises(errors.BuilderError, builder.build, 'bsie.reader.stat.Stat') + builder = ReaderBuilder({'bsie.reader.stat.Stat': 123}) + self.assertRaises(TypeError, builder.build, 'bsie.reader.stat.Stat') + # no instructions + builder = ReaderBuilder({}) + cls = builder.build('bsie.reader.stat.Stat') + self.assertIsInstance(cls, bsie.reader.stat.Stat) + + + +class TestExtractorBuilder(unittest.TestCase): + def test_iter(self): + # no specifications + self.assertListEqual(list(ExtractorBuilder([])), []) + # some specifications + builder = ExtractorBuilder([ + {'bsie.extractor.generic.path.Path': {}}, + {'bsie.extractor.generic.stat.Stat': {}}, + {'bsie.extractor.generic.path.Path': {}}, + ]) + self.assertListEqual(list(builder), [0, 1, 2]) + + def test_build(self): + # simple and repeated extractors + builder = ExtractorBuilder([ + {'bsie.extractor.generic.path.Path': {}}, + {'bsie.extractor.generic.stat.Stat': {}}, + {'bsie.extractor.generic.path.Path': {}}, + ]) + ext = [builder.build(0), builder.build(1), builder.build(2)] + import bsie.extractor.generic.path + import bsie.extractor.generic.stat + self.assertListEqual(ext, [ + bsie.extractor.generic.path.Path(), + bsie.extractor.generic.stat.Stat(), + bsie.extractor.generic.path.Path(), + ]) + # out-of-bounds raises KeyError + self.assertRaises(IndexError, builder.build, 3) + + # building with args + builder = ExtractorBuilder([ + {'bsie.extractor.generic.constant.Constant': { + 'schema': ''' + bse:author rdfs:subClassOf bsfs:Predicate ; + rdfs:domain bsfs:Entity ; + rdfs:range xsd:string ; + owl:maxCardinality "1"^^xsd:number . + bse:rating rdfs:subClassOf bsfs:Predicate ; + rdfs:domain bsfs:Entity ; + rdfs:range xsd:integer ; + owl:maxCardinality "1"^^xsd:number . + ''', + 'tuples': [ + ('http://bsfs.ai/schema/Entity#author', 'Me, myself, and I'), + ('http://bsfs.ai/schema/Entity#rating', 123), + ], + }}]) + obj = builder.build(0) + import bsie.extractor.generic.constant + self.assertEqual(obj, bsie.extractor.generic.constant.Constant(''' + bse:author rdfs:subClassOf bsfs:Predicate ; + rdfs:domain bsfs:Entity ; + rdfs:range xsd:string ; + owl:maxCardinality "1"^^xsd:number . + bse:rating rdfs:subClassOf bsfs:Predicate ; + rdfs:domain bsfs:Entity ; + rdfs:range xsd:integer ; + owl:maxCardinality "1"^^xsd:number . + ''', [ + ('http://bsfs.ai/schema/Entity#author', 'Me, myself, and I'), + ('http://bsfs.ai/schema/Entity#rating', 123), + ])) + + # building with invalid args + self.assertRaises(errors.BuilderError, ExtractorBuilder( + [{'bsie.extractor.generic.path.Path': {'foo': 123}}]).build, 0) + # non-dict build specification + self.assertRaises(TypeError, ExtractorBuilder( + [('bsie.extractor.generic.path.Path', {})]).build, 0) + # multiple keys per build specification + self.assertRaises(TypeError, ExtractorBuilder( + [{'bsie.extractor.generic.path.Path': {}, + 'bsie.extractor.generic.stat.Stat': {}}]).build, 0) + # non-dict value for kwargs + self.assertRaises(TypeError, ExtractorBuilder( + [{'bsie.extractor.generic.path.Path': 123}]).build, 0) + + + + +class TestPipelineBuilder(unittest.TestCase): + def test_build(self): + prefix = URI('http://example.com/local/file#') + c_schema = ''' + bse:author rdfs:subClassOf bsfs:Predicate ; + rdfs:domain bsfs:Entity ; + rdfs:range xsd:string ; + owl:maxCardinality "1"^^xsd:number . + ''' + c_tuples = [('http://bsfs.ai/schema/Entity#author', 'Me, myself, and I')] + # prepare builders + rbuild = ReaderBuilder({}) + ebuild = ExtractorBuilder([ + {'bsie.extractor.generic.path.Path': {}}, + {'bsie.extractor.generic.stat.Stat': {}}, + {'bsie.extractor.generic.constant.Constant': dict( + schema=c_schema, + tuples=c_tuples, + )}, + ]) + # build pipeline + builder = PipelineBuilder(prefix, rbuild, ebuild) + pipeline = builder.build() + # delayed import + import bsie.reader.path + import bsie.reader.stat + import bsie.extractor.generic.path + import bsie.extractor.generic.stat + import bsie.extractor.generic.constant + # check pipeline + self.assertDictEqual(pipeline._ext2rdr, { + bsie.extractor.generic.path.Path(): bsie.reader.path.Path(), + bsie.extractor.generic.stat.Stat(): bsie.reader.stat.Stat(), + bsie.extractor.generic.constant.Constant(c_schema, c_tuples): None, + }) + + # fail to load extractor + ebuild_err = ExtractorBuilder([ + {'bsie.extractor.generic.foo.Foo': {}}, + {'bsie.extractor.generic.path.Path': {}}, + ]) + with self.assertLogs(logging.getLogger('bsie.tools.builder'), logging.ERROR): + pipeline = PipelineBuilder(prefix, rbuild, ebuild_err).build() + self.assertDictEqual(pipeline._ext2rdr, { + bsie.extractor.generic.path.Path(): bsie.reader.path.Path()}) + + # fail to build extractor + ebuild_err = ExtractorBuilder([ + {'bsie.extractor.generic.path.Path': {'foo': 123}}, + {'bsie.extractor.generic.path.Path': {}}, + ]) + with self.assertLogs(logging.getLogger('bsie.tools.builder'), logging.ERROR): + pipeline = PipelineBuilder(prefix, rbuild, ebuild_err).build() + self.assertDictEqual(pipeline._ext2rdr, { + bsie.extractor.generic.path.Path(): bsie.reader.path.Path()}) + + # fail to load reader + with self.assertLogs(logging.getLogger('bsie.tools.builder'), logging.ERROR): + # switch reader of an extractor + old_reader = bsie.extractor.generic.path.Path.CONTENT_READER + bsie.extractor.generic.path.Path.CONTENT_READER = 'bsie.reader.foo.Foo' + # build pipeline with invalid reader reference + pipeline = PipelineBuilder(prefix, rbuild, ebuild).build() + self.assertDictEqual(pipeline._ext2rdr, { + bsie.extractor.generic.stat.Stat(): bsie.reader.stat.Stat(), + bsie.extractor.generic.constant.Constant(c_schema, c_tuples): None, + }) + # switch back + bsie.extractor.generic.path.Path.CONTENT_READER = old_reader + + # fail to build reader + rbuild_err = ReaderBuilder({'bsie.reader.stat.Stat': dict(foo=123)}) + with self.assertLogs(logging.getLogger('bsie.tools.builder'), logging.ERROR): + pipeline = PipelineBuilder(prefix, rbuild_err, ebuild).build() + self.assertDictEqual(pipeline._ext2rdr, { + bsie.extractor.generic.path.Path(): bsie.reader.path.Path(), + bsie.extractor.generic.constant.Constant(c_schema, c_tuples): None, + }) + + +## main ## + +if __name__ == '__main__': + unittest.main() + +## EOF ## diff --git a/test/tools/test_pipeline.py b/test/tools/test_pipeline.py new file mode 100644 index 0000000..9888d2e --- /dev/null +++ b/test/tools/test_pipeline.py @@ -0,0 +1,167 @@ +""" + +Part of the bsie test suite. +A copy of the license is provided with the project. +Author: Matthias Baumgartner, 2022 +""" +# imports +import logging +import os +import unittest + +# bsie imports +from bsie.base import errors +from bsie.utils import ns +from bsie.utils.bsfs import URI +from bsie.utils.node import Node +import bsie.extractor.generic.constant +import bsie.extractor.generic.path +import bsie.extractor.generic.stat +import bsie.reader.path +import bsie.reader.stat + +# objects to test +from bsie.tools.pipeline import Pipeline + + +## code ## + +class TestPipeline(unittest.TestCase): + def setUp(self): + # constant A + csA = ''' + bse:author rdfs:subClassOf bsfs:Predicate ; + rdfs:domain bsfs:Entity ; + rdfs:range xsd:string ; + owl:maxCardinality "1"^^xsd:number . + ''' + tupA = [('http://bsfs.ai/schema/Entity#author', 'Me, myself, and I')] + # constant B + csB = ''' + bse:rating rdfs:subClassOf bsfs:Predicate ; + rdfs:domain bsfs:Entity ; + rdfs:range xsd:integer ; + owl:maxCardinality "1"^^xsd:number . + ''' + tupB = [('http://bsfs.ai/schema/Entity#rating', 123)] + # extractors/readers + self.ext2rdr = { + bsie.extractor.generic.path.Path(): bsie.reader.path.Path(), + bsie.extractor.generic.stat.Stat(): bsie.reader.stat.Stat(), + bsie.extractor.generic.constant.Constant(csA, tupA): None, + bsie.extractor.generic.constant.Constant(csB, tupB): None, + } + self.prefix = URI('http://example.com/local/file#') + + def test_essentials(self): + pipeline = Pipeline(self.prefix, self.ext2rdr) + self.assertEqual(str(pipeline), 'Pipeline') + self.assertEqual(repr(pipeline), 'Pipeline(...)') + + def test_equality(self): + pipeline = Pipeline(self.prefix, self.ext2rdr) + # a pipeline is equivalent to itself + self.assertEqual(pipeline, pipeline) + self.assertEqual(hash(pipeline), hash(pipeline)) + # identical builds are equivalent + self.assertEqual(pipeline, Pipeline(self.prefix, self.ext2rdr)) + self.assertEqual(hash(pipeline), hash(Pipeline(self.prefix, self.ext2rdr))) + + # equivalence respects prefix + self.assertNotEqual(pipeline, Pipeline(URI('http://example.com/global/ent#'), self.ext2rdr)) + self.assertNotEqual(hash(pipeline), hash(Pipeline(URI('http://example.com/global/ent#'), self.ext2rdr))) + # equivalence respects extractors/readers + ext2rdr = {ext: rdr for idx, (ext, rdr) in enumerate(self.ext2rdr.items()) if idx % 2 == 0} + self.assertNotEqual(pipeline, Pipeline(self.prefix, ext2rdr)) + self.assertNotEqual(hash(pipeline), hash(Pipeline(self.prefix, ext2rdr))) + + # equivalence respects schema + p2 = Pipeline(self.prefix, self.ext2rdr) + p2.schema = pipeline.schema.Empty() + self.assertNotEqual(pipeline, p2) + self.assertNotEqual(hash(pipeline), hash(p2)) + + # not equal to other types + class Foo(): pass + self.assertNotEqual(pipeline, Foo()) + self.assertNotEqual(hash(pipeline), hash(Foo())) + self.assertNotEqual(pipeline, 123) + self.assertNotEqual(hash(pipeline), hash(123)) + self.assertNotEqual(pipeline, None) + self.assertNotEqual(hash(pipeline), hash(None)) + + + def test_call(self): + # build pipeline + pipeline = Pipeline(self.prefix, self.ext2rdr) + # build objects for tests + content_hash = 'e3bb4ab54e4a50d75626a1f76814f152f4edc60a82ad724aa2aa922ca5534427' + subject = Node(ns.bsfs.Entity, self.prefix + content_hash) + testfile = os.path.join(os.path.dirname(__file__), 'testfile.t') + p_filename = pipeline.schema.predicate(ns.bse.filename) + p_filesize = pipeline.schema.predicate(ns.bse.filesize) + p_author = pipeline.schema.predicate(ns.bse.author) + p_rating = pipeline.schema.predicate(ns.bse.rating) + entity = pipeline.schema.node(ns.bsfs.Entity) + p_invalid = pipeline.schema.predicate(ns.bsfs.Predicate).get_child(ns.bse.foo, range=entity) + + # extract given predicates + self.assertSetEqual(set(pipeline(testfile, {p_filename, p_filesize})), { + (subject, p_filename, 'testfile.t'), + (subject, p_filesize, 11), + }) + self.assertSetEqual(set(pipeline(testfile, {p_author})), { + (subject, p_author, 'Me, myself, and I'), + }) + self.assertSetEqual(set(pipeline(testfile, {p_filename})), { + (subject, p_filename, 'testfile.t'), + }) + self.assertSetEqual(set(pipeline(testfile, {p_filesize})), { + (subject, p_filesize, 11), + }) + # extract all predicates + self.assertSetEqual(set(pipeline(testfile)), { + (subject, p_filename, 'testfile.t'), + (subject, p_filesize, 11), + (subject, p_author, 'Me, myself, and I'), + (subject, p_rating, 123), + }) + # invalid predicate + self.assertSetEqual(set(pipeline(testfile, {p_invalid})), set()) + # valid/invalid predicates mixed + self.assertSetEqual(set(pipeline(testfile, {p_filename, p_invalid})), { + (subject, p_filename, 'testfile.t'), + }) + # invalid path + self.assertRaises(FileNotFoundError, list, pipeline('inexistent_file')) + # FIXME: unreadable file (e.g. permissions error) + + def test_call_reader_err(self): + class FaultyReader(bsie.reader.path.Path): + def __call__(self, path): + raise errors.ReaderError('reader error') + + pipeline = Pipeline(self.prefix, {bsie.extractor.generic.path.Path(): FaultyReader()}) + with self.assertLogs(logging.getLogger('bsie.tools.pipeline'), logging.ERROR): + testfile = os.path.join(os.path.dirname(__file__), 'testfile.t') + p_filename = pipeline.schema.predicate(ns.bse.filename) + self.assertSetEqual(set(pipeline(testfile, {p_filename})), set()) + + def test_call_extractor_err(self): + class FaultyExtractor(bsie.extractor.generic.path.Path): + def extract(self, subject, content, predicates): + raise errors.ExtractorError('extractor error') + + pipeline = Pipeline(self.prefix, {FaultyExtractor(): bsie.reader.path.Path()}) + with self.assertLogs(logging.getLogger('bsie.tools.pipeline'), logging.ERROR): + testfile = os.path.join(os.path.dirname(__file__), 'testfile.t') + p_filename = pipeline.schema.predicate(ns.bse.filename) + self.assertSetEqual(set(pipeline(testfile, {p_filename})), set()) + + +## main ## + +if __name__ == '__main__': + unittest.main() + +## EOF ## diff --git a/test/tools/testfile.t b/test/tools/testfile.t new file mode 100644 index 0000000..58bf1b8 --- /dev/null +++ b/test/tools/testfile.t @@ -0,0 +1 @@ +hello worl -- cgit v1.2.3 From edc747252a04675c46059215751719b6666a77f9 Mon Sep 17 00:00:00 2001 From: Matthias Baumgartner Date: Sat, 3 Dec 2022 18:57:58 +0100 Subject: adapt to schema interface update: owl:maxCardinality changed to bsfs:unique --- bsie/base/extractor.py | 1 - bsie/extractor/generic/path.py | 2 +- bsie/extractor/generic/stat.py | 2 +- test/base/test_extractor.py | 4 ++-- test/extractor/generic/test_constant.py | 12 ++++++------ test/extractor/generic/test_path.py | 2 +- test/extractor/generic/test_stat.py | 2 +- test/tools/test_builder.py | 10 +++++----- test/tools/test_pipeline.py | 4 ++-- 9 files changed, 19 insertions(+), 20 deletions(-) diff --git a/bsie/base/extractor.py b/bsie/base/extractor.py index 2fc4f18..75b7173 100644 --- a/bsie/base/extractor.py +++ b/bsie/base/extractor.py @@ -23,7 +23,6 @@ __all__: typing.Sequence[str] = ( # NOTE: The definition here is only for convenience; Each Extractor must implement its use, if so desired. SCHEMA_PREAMBLE = ''' # common external prefixes - prefix owl: prefix rdf: prefix rdfs: prefix xsd: diff --git a/bsie/extractor/generic/path.py b/bsie/extractor/generic/path.py index f346f97..e6b901e 100644 --- a/bsie/extractor/generic/path.py +++ b/bsie/extractor/generic/path.py @@ -36,7 +36,7 @@ class Path(extractor.Extractor): rdfs:range xsd:string ; rdfs:label "File name"^^xsd:string ; schema:description "Filename of entity in some filesystem."^^xsd:string ; - owl:maxCardinality "INF"^^xsd:number . + bsfs:unique "false"^^xsd:boolean . ''')) self._callmap = { self.schema.predicate(ns.bse.filename): self.__filename, diff --git a/bsie/extractor/generic/stat.py b/bsie/extractor/generic/stat.py index 7088c0a..6493d37 100644 --- a/bsie/extractor/generic/stat.py +++ b/bsie/extractor/generic/stat.py @@ -36,7 +36,7 @@ class Stat(extractor.Extractor): rdfs:range xsd:integer ; rdfs:label "File size"^^xsd:string ; schema:description "File size of entity in some filesystem."^^xsd:string ; - owl:maxCardinality "INF"^^xsd:number . + bsfs:unique "false"^^xsd:boolean . ''')) self._callmap = { self.schema.predicate(ns.bse.filesize): self.__filesize, diff --git a/test/base/test_extractor.py b/test/base/test_extractor.py index 7a00079..be876ad 100644 --- a/test/base/test_extractor.py +++ b/test/base/test_extractor.py @@ -23,11 +23,11 @@ class StubExtractor(extractor.Extractor): bse:author rdfs:subClassOf bsfs:Predicate ; rdfs:domain bsfs:Entity ; rdfs:range xsd:string ; - owl:maxCardinality "INF"^^xsd:number . + bsfs:unique "false"^^xsd:boolean . bse:comment rdfs:subClassOf bsfs:Predicate ; rdfs:domain bsfs:Entity ; rdfs:range xsd:string ; - owl:maxCardinality "INF"^^xsd:number . + bsfs:unique "false"^^xsd:boolean . ''')) def extract(self, subject, content, predicates): diff --git a/test/extractor/generic/test_constant.py b/test/extractor/generic/test_constant.py index aa33fb4..7f72ccf 100644 --- a/test/extractor/generic/test_constant.py +++ b/test/extractor/generic/test_constant.py @@ -23,11 +23,11 @@ class TestConstant(unittest.TestCase): bse:author rdfs:subClassOf bsfs:Predicate ; rdfs:domain bsfs:Entity ; rdfs:range xsd:string ; - owl:maxCardinality "1"^^xsd:number . + bsfs:unique "true"^^xsd:boolean . bse:comment rdfs:subClassOf bsfs:Predicate ; rdfs:domain bsfs:Entity ; rdfs:range xsd:string ; - owl:maxCardinality "INF"^^xsd:number . + bsfs:unique "false"^^xsd:boolean . ''' tuples = [ (ns.bse.author, 'Me, myself, and I'), @@ -58,11 +58,11 @@ class TestConstant(unittest.TestCase): bse:author rdfs:subClassOf bsfs:Predicate ; rdfs:domain bsfs:Entity ; rdfs:range xsd:string ; - owl:maxCardinality "1"^^xsd:number . + bsfs:unique "true"^^xsd:boolean . bse:comment rdfs:subClassOf bsfs:Predicate ; rdfs:domain bsfs:Entity ; rdfs:range xsd:string ; - owl:maxCardinality "INF"^^xsd:number . + bsfs:unique "false"^^xsd:boolean . ''' # can create a schema self.assertIsInstance(Constant(schema, [ @@ -83,13 +83,13 @@ class TestConstant(unittest.TestCase): bse:author rdfs:subClassOf bsfs:Predicate ; rdfs:domain bsfs:Entity ; rdfs:range xsd:string ; - owl:maxCardinality "1"^^xsd:number . + bsfs:unique "true"^^xsd:boolean . ''' schema_b = ''' bse:comment rdfs:subClassOf bsfs:Predicate ; rdfs:domain bsfs:Entity ; rdfs:range xsd:string ; - owl:maxCardinality "INF"^^xsd:number . + bsfs:unique "false"^^xsd:boolean . ''' tuples_a = [(ns.bse.author, 'Me, myself, and I')] tuples_b = [(ns.bse.comment, 'the quick brown fox jumps over the lazy dog.') ] diff --git a/test/extractor/generic/test_path.py b/test/extractor/generic/test_path.py index 9376c7c..aa21b04 100644 --- a/test/extractor/generic/test_path.py +++ b/test/extractor/generic/test_path.py @@ -35,7 +35,7 @@ class TestPath(unittest.TestCase): bse:filename rdfs:subClassOf bsfs:Predicate ; rdfs:domain bsfs:Entity ; rdfs:range xsd:string ; - owl:maxCardinality "INF"^^xsd:number . + bsfs:unique "false"^^xsd:boolean . ''')) def test_extract(self): diff --git a/test/extractor/generic/test_stat.py b/test/extractor/generic/test_stat.py index 26dad6a..bed5fab 100644 --- a/test/extractor/generic/test_stat.py +++ b/test/extractor/generic/test_stat.py @@ -36,7 +36,7 @@ class TestStat(unittest.TestCase): bse:filesize rdfs:subClassOf bsfs:Predicate ; rdfs:domain bsfs:Entity ; rdfs:range xsd:integer ; - owl:maxCardinality "INF"^^xsd:number . + bsfs:unique "false"^^xsd:boolean . ''')) def test_extract(self): diff --git a/test/tools/test_builder.py b/test/tools/test_builder.py index bef0e9d..bc6f903 100644 --- a/test/tools/test_builder.py +++ b/test/tools/test_builder.py @@ -115,11 +115,11 @@ class TestExtractorBuilder(unittest.TestCase): bse:author rdfs:subClassOf bsfs:Predicate ; rdfs:domain bsfs:Entity ; rdfs:range xsd:string ; - owl:maxCardinality "1"^^xsd:number . + bsfs:unique "true"^^xsd:boolean . bse:rating rdfs:subClassOf bsfs:Predicate ; rdfs:domain bsfs:Entity ; rdfs:range xsd:integer ; - owl:maxCardinality "1"^^xsd:number . + bsfs:unique "true"^^xsd:boolean . ''', 'tuples': [ ('http://bsfs.ai/schema/Entity#author', 'Me, myself, and I'), @@ -132,11 +132,11 @@ class TestExtractorBuilder(unittest.TestCase): bse:author rdfs:subClassOf bsfs:Predicate ; rdfs:domain bsfs:Entity ; rdfs:range xsd:string ; - owl:maxCardinality "1"^^xsd:number . + bsfs:unique "true"^^xsd:boolean . bse:rating rdfs:subClassOf bsfs:Predicate ; rdfs:domain bsfs:Entity ; rdfs:range xsd:integer ; - owl:maxCardinality "1"^^xsd:number . + bsfs:unique "true"^^xsd:boolean . ''', [ ('http://bsfs.ai/schema/Entity#author', 'Me, myself, and I'), ('http://bsfs.ai/schema/Entity#rating', 123), @@ -166,7 +166,7 @@ class TestPipelineBuilder(unittest.TestCase): bse:author rdfs:subClassOf bsfs:Predicate ; rdfs:domain bsfs:Entity ; rdfs:range xsd:string ; - owl:maxCardinality "1"^^xsd:number . + bsfs:unique "true"^^xsd:boolean . ''' c_tuples = [('http://bsfs.ai/schema/Entity#author', 'Me, myself, and I')] # prepare builders diff --git a/test/tools/test_pipeline.py b/test/tools/test_pipeline.py index 9888d2e..f98b329 100644 --- a/test/tools/test_pipeline.py +++ b/test/tools/test_pipeline.py @@ -33,7 +33,7 @@ class TestPipeline(unittest.TestCase): bse:author rdfs:subClassOf bsfs:Predicate ; rdfs:domain bsfs:Entity ; rdfs:range xsd:string ; - owl:maxCardinality "1"^^xsd:number . + bsfs:unique "true"^^xsd:boolean . ''' tupA = [('http://bsfs.ai/schema/Entity#author', 'Me, myself, and I')] # constant B @@ -41,7 +41,7 @@ class TestPipeline(unittest.TestCase): bse:rating rdfs:subClassOf bsfs:Predicate ; rdfs:domain bsfs:Entity ; rdfs:range xsd:integer ; - owl:maxCardinality "1"^^xsd:number . + bsfs:unique "true"^^xsd:boolean . ''' tupB = [('http://bsfs.ai/schema/Entity#rating', 123)] # extractors/readers -- cgit v1.2.3 From 559e643bb1fa39feefd2eb73847ad9420daf1deb Mon Sep 17 00:00:00 2001 From: Matthias Baumgartner Date: Wed, 14 Dec 2022 06:10:25 +0100 Subject: bsie extraction and info apps --- bsie.app | 49 ++++++ bsie/__init__.py | 6 + bsie/apps/__init__.py | 20 +++ bsie/apps/index.py | 131 ++++++++++++++++ bsie/apps/info.py | 74 +++++++++ bsie/base/errors.py | 6 + bsie/lib/__init__.py | 13 ++ bsie/lib/bsie.py | 80 ++++++++++ bsie/tools/pipeline.py | 4 + bsie/utils/namespaces.py | 2 +- test/apps/__init__.py | 0 test/apps/test_index.py | 159 ++++++++++++++++++++ test/apps/test_info.py | 42 ++++++ test/apps/testdir/alpha/alpha_first | 16 ++ test/apps/testdir/alpha/alpha_second | 12 ++ test/apps/testdir/alpha/omega/omega_first | 14 ++ test/apps/testdir/alpha/omega/omega_second | 10 ++ test/apps/testdir/foo/bar/bar_first | 20 +++ test/apps/testdir/foo/bar/bar_second | 14 ++ test/apps/testdir/foo/foo_first | 11 ++ test/apps/testdir/foo/foo_second | 12 ++ test/apps/testdir/td_first | 18 +++ test/apps/testdir/td_second | 14 ++ test/apps/testfile | 16 ++ test/lib/__init__.py | 0 test/lib/test_bsie.py | 231 +++++++++++++++++++++++++++++ test/lib/testfile.t | 1 + test/tools/test_pipeline.py | 20 ++- test/tools/testfile.t | 2 +- 29 files changed, 991 insertions(+), 6 deletions(-) create mode 100755 bsie.app create mode 100644 bsie/apps/__init__.py create mode 100644 bsie/apps/index.py create mode 100644 bsie/apps/info.py create mode 100644 bsie/lib/__init__.py create mode 100644 bsie/lib/bsie.py create mode 100644 test/apps/__init__.py create mode 100644 test/apps/test_index.py create mode 100644 test/apps/test_info.py create mode 100644 test/apps/testdir/alpha/alpha_first create mode 100644 test/apps/testdir/alpha/alpha_second create mode 100644 test/apps/testdir/alpha/omega/omega_first create mode 100644 test/apps/testdir/alpha/omega/omega_second create mode 100644 test/apps/testdir/foo/bar/bar_first create mode 100644 test/apps/testdir/foo/bar/bar_second create mode 100644 test/apps/testdir/foo/foo_first create mode 100644 test/apps/testdir/foo/foo_second create mode 100644 test/apps/testdir/td_first create mode 100644 test/apps/testdir/td_second create mode 100644 test/apps/testfile create mode 100644 test/lib/__init__.py create mode 100644 test/lib/test_bsie.py create mode 100644 test/lib/testfile.t diff --git a/bsie.app b/bsie.app new file mode 100755 index 0000000..ba9cee7 --- /dev/null +++ b/bsie.app @@ -0,0 +1,49 @@ +"""BSIE tools. + +Part of the bsie module. +A copy of the license is provided with the project. +Author: Matthias Baumgartner, 2022 +""" +# imports +import argparse +import typing + +# module imports +import bsie +import bsie.apps + +# exports +__all__: typing.Sequence[str] = ( + 'main', + ) + +# config +apps = { + 'index' : bsie.apps.index, + 'info' : bsie.apps.info, + } + + +## code ## + +def main(argv): + """Black Star File System maintenance tools.""" + parser = argparse.ArgumentParser(description=main.__doc__, prog='bsie') + parser.add_argument('--version', action='version', + version='%(prog)s version {}.{}.{}'.format(*bsie.version_info)) + parser.add_argument('app', choices=apps.keys(), + help='Select the application to run.') + parser.add_argument('rest', nargs=argparse.REMAINDER) + # parse + args = parser.parse_args() + # run application + apps[args.app](args.rest) + + +## main ## + +if __name__ == '__main__': + import sys + main(sys.argv[1:]) + +## EOF ## diff --git a/bsie/__init__.py b/bsie/__init__.py index 2f2477a..2b874bd 100644 --- a/bsie/__init__.py +++ b/bsie/__init__.py @@ -5,8 +5,14 @@ A copy of the license is provided with the project. Author: Matthias Baumgartner, 2022 """ # imports +import collections import typing +# constants +version_info = collections.namedtuple('version_info', + ('major', 'minor', 'micro')) \ + (0, 0, 1) + # exports __all__: typing.Sequence[str] = [] diff --git a/bsie/apps/__init__.py b/bsie/apps/__init__.py new file mode 100644 index 0000000..a548c3c --- /dev/null +++ b/bsie/apps/__init__.py @@ -0,0 +1,20 @@ +""" + +Part of the bsie module. +A copy of the license is provided with the project. +Author: Matthias Baumgartner, 2022 +""" +# imports +import typing + +# inner-module imports +from .index import main as index +from .info import main as info + +# exports +__all__: typing.Sequence[str] = ( + 'index', + 'info', + ) + +## EOF ## diff --git a/bsie/apps/index.py b/bsie/apps/index.py new file mode 100644 index 0000000..821aa4c --- /dev/null +++ b/bsie/apps/index.py @@ -0,0 +1,131 @@ +""" + +Part of the bsie module. +A copy of the license is provided with the project. +Author: Matthias Baumgartner, 2022 +""" +# imports +import argparse +import os +import typing + +# bsfs imports +import bsfs + +# bsie imports +from bsie.base import errors +from bsie.lib.bsie import BSIE +from bsie.tools import builder +from bsie.utils.bsfs import URI + +# exports +__all__: typing.Sequence[str] = ( + 'main', + ) + + +## code ## + +def main(argv): + """Index files or directories into BSFS.""" + parser = argparse.ArgumentParser(description=main.__doc__, prog='index') + parser.add_argument('--user', type=URI, default=URI('http://example.com/me'), + help='') + parser.add_argument('--collect', action='append', default=[], + help='') + parser.add_argument('--discard', action='append', default=[], + help='') + parser.add_argument('-r', '--recursive', action='store_true', default=False, + help='') + parser.add_argument('--follow', action='store_true', default=False, + help='') + parser.add_argument('--print', action='store_true', default=False, + help='') + parser.add_argument('input_file', nargs=argparse.REMAINDER, + help='') + args = parser.parse_args(argv) + + # FIXME: Read reader/extractor configs from a config file + # reader builder + rbuild = builder.ReaderBuilder({}) + # extractor builder + ebuild = builder.ExtractorBuilder([ + {'bsie.extractor.generic.path.Path': {}}, + {'bsie.extractor.generic.stat.Stat': {}}, + {'bsie.extractor.generic.constant.Constant': dict( + tuples=[('http://bsfs.ai/schema/Entity#author', 'Me, myself, and I')], + schema=''' + bse:author rdfs:subClassOf bsfs:Predicate ; + rdfs:domain bsfs:Entity ; + rdfs:range xsd:string ; + bsfs:unique "true"^^xsd:boolean . + ''', + )}, + ]) + # pipeline builder + prefix = URI(args.user + ('file#' if args.user.endswith('/') else '/file#')) + pbuild = builder.PipelineBuilder( + prefix, + rbuild, + ebuild, + ) + + # build pipeline + pipeline = pbuild.build() + # build BSIE frontend + bsie = BSIE(pipeline, args.collect, args.discard) + + + def walk(handle): + """Walk through given input files.""" + # FIXME: collect all triples by node, set all predicates at once + # FIXME: simplify code (below but maybe also above) + # FIXME: How to handle dependencies between data? + # E.g. do I still want to link to a tag despite not being permitted to set its label? + # FIXME: node renaming? + + # index input paths + for path in args.input_file: + if os.path.isdir(path) and args.recursive: + for dirpath, _, filenames in os.walk(path, topdown=True, followlinks=args.follow): + for filename in filenames: + for node, pred, value in bsie.from_file(os.path.join(dirpath, filename)): + handle(node, pred, value) + elif os.path.isfile(path): + for node, pred, value in bsie.from_file(path): + handle(node, pred, value) + else: + raise errors.UnreachableError() + + + if args.print: + walk(print) + return None + + else: + # initialize bsfs + # NOTE: With presistent storages, the schema migration will be a seaparte operation. + # Here, we'd simply examine the schema and potentially discard more predicates. + store = bsfs.Open({ + 'Graph': { + 'user': args.user, + 'backend': { + 'SparqlStore': {}}, + }}) + store.migrate(bsie.schema) + # process files + def handle(node, pred, value): + store.node(node.node_type, node.uri).set(pred.uri, value) + walk(handle) + # return store + return store + + + +## main ## + +if __name__ == '__main__': + import sys + main(sys.argv[1:]) + +## EOF ## diff --git a/bsie/apps/info.py b/bsie/apps/info.py new file mode 100644 index 0000000..8cc6dca --- /dev/null +++ b/bsie/apps/info.py @@ -0,0 +1,74 @@ +""" + +Part of the bsie module. +A copy of the license is provided with the project. +Author: Matthias Baumgartner, 2022 +""" +# imports +import argparse +import sys +import typing + +# bsie imports +from bsie.base import errors +from bsie.tools import builder +from bsie.utils.bsfs import URI + +# exports +__all__: typing.Sequence[str] = ( + 'main', + ) + + +## code ## + +def main(argv): + """Show information from BSIE.""" + parser = argparse.ArgumentParser(description=main.__doc__, prog='info') + parser.add_argument('what', choices=('predicates', ), + help='Select what information to show.') + args = parser.parse_args(argv) + + # FIXME: Read reader/extractor configs from a config file + # reader builder + rbuild = builder.ReaderBuilder({}) + # extractor builder + ebuild = builder.ExtractorBuilder([ + {'bsie.extractor.generic.path.Path': {}}, + {'bsie.extractor.generic.stat.Stat': {}}, + {'bsie.extractor.generic.constant.Constant': dict( + tuples=[('http://bsfs.ai/schema/Entity#author', 'Me, myself, and I')], + schema=''' + bse:author rdfs:subClassOf bsfs:Predicate ; + rdfs:domain bsfs:Entity ; + rdfs:range xsd:string ; + bsfs:unique "true"^^xsd:boolean . + ''', + )}, + ]) + # pipeline builder + pbuild = builder.PipelineBuilder( + URI('http://example.com/me/file#'), # not actually used + rbuild, + ebuild, + ) + + # build pipeline + pipeline = pbuild.build() + + # show info + if args.what == 'predicates': + # show predicates + for pred in pipeline.schema.predicates(): + print(pred.uri) + else: + # args.what is already checked by argparse + raise errors.UnreachableError() + + +## main ## + +if __name__ == '__main__': + main(sys.argv[1:]) + +## EOF ## diff --git a/bsie/base/errors.py b/bsie/base/errors.py index 760351f..dc3c30e 100644 --- a/bsie/base/errors.py +++ b/bsie/base/errors.py @@ -33,4 +33,10 @@ class ExtractorError(_BSIEError): class ReaderError(_BSIEError): """The Reader failed to read the given file.""" +class ProgrammingError(_BSIEError): + """An assertion-like error that indicates a code-base issue.""" + +class UnreachableError(ProgrammingError): + """Bravo, you've reached a point in code that should logically not be reachable.""" + ## EOF ## diff --git a/bsie/lib/__init__.py b/bsie/lib/__init__.py new file mode 100644 index 0000000..f6c9018 --- /dev/null +++ b/bsie/lib/__init__.py @@ -0,0 +1,13 @@ +""" + +Part of the bsie module. +A copy of the license is provided with the project. +Author: Matthias Baumgartner, 2022 +""" +# imports +import typing + +# exports +__all__: typing.Sequence[str] = [] + +## EOF ## diff --git a/bsie/lib/bsie.py b/bsie/lib/bsie.py new file mode 100644 index 0000000..aeccc8c --- /dev/null +++ b/bsie/lib/bsie.py @@ -0,0 +1,80 @@ +""" + +Part of the bsie module. +A copy of the license is provided with the project. +Author: Matthias Baumgartner, 2022 +""" +# imports +import typing + +# bsie imports +from bsie.tools.pipeline import Pipeline +from bsie.utils import node, ns +from bsie.utils.bsfs import URI, schema as schema_ + +# exports +__all__: typing.Sequence[str] = ( + 'BSIE', + ) + + +## code ## + +class BSIE(): + """Extract triples from files. + + Controls which predicates to extract (*collect*) and + which to not extract (*discard*). Note that this only affects + principal predicates not auxililary predicates like, e.g., tag labels. + + """ + + # predicates to extract. + predicates: typing.Set[URI] + + # local schema. + schema: schema_.Schema + + def __init__( + self, + # pipeline builder. + pipeline: Pipeline, + # predicates to extract at most. None implies all available w.r.t. extractors. + collect: typing.Optional[typing.Iterable[URI]] = None, + # predicates to discard. + discard: typing.Optional[typing.Iterable[URI]] = None, + ): + # store pipeline + self.pipeline = pipeline + # start off with available predicates + self.predicates = {pred.uri for pred in self.pipeline.predicates()} + # limit predicates to specified ones by argument. + if collect is not None: + collect = set(collect) + if len(collect) > 0: + self.predicates &= collect + # discard predicates. + if discard is not None: + self.predicates -= set(discard) + # discard ns.bsfs.Predicate + self.predicates.discard(ns.bsfs.Predicate) + # compile a schema that only contains the requested predicates (and implied types) + self.schema = schema_.Schema({ + self.pipeline.schema.predicate(pred) for pred in self.predicates}) + + def from_file( + self, + path: URI, + predicates: typing.Optional[typing.Iterable[URI]] = None, + ) -> typing.Iterator[typing.Tuple[node.Node, URI, typing.Any]]: + """Produce triples for a given *path*. Limit to *predicates* if given.""" + # get requested predicates. + predicates = set(predicates) if predicates is not None else self.predicates + # filter through requested predicates. + predicates &= self.predicates + # predicate lookup + predicates = {self.schema.predicate(pred) for pred in predicates} + # invoke pipeline + yield from self.pipeline(path, predicates) + +## EOF ## diff --git a/bsie/tools/pipeline.py b/bsie/tools/pipeline.py index 8e1c992..da422c0 100644 --- a/bsie/tools/pipeline.py +++ b/bsie/tools/pipeline.py @@ -70,6 +70,10 @@ class Pipeline(): and self._prefix == other._prefix \ and self._ext2rdr == other._ext2rdr + def predicates(self) -> typing.Iterator[_schema.Predicate]: + """Return the predicates that are extracted from a file.""" + return iter({pred for ext in self._ext2rdr for pred in ext.predicates()}) + def __call__( self, path: URI, diff --git a/bsie/utils/namespaces.py b/bsie/utils/namespaces.py index 13be96b..2fcb2dc 100644 --- a/bsie/utils/namespaces.py +++ b/bsie/utils/namespaces.py @@ -13,7 +13,7 @@ from . import bsfs as _bsfs # constants bse = _bsfs.Namespace('http://bsfs.ai/schema/Entity#') bsfs = _bsfs.Namespace('http://bsfs.ai/schema/') -bsm = _bsfs.Namespace('http://bsfs.ai/schema/meta#') +bsm = _bsfs.Namespace('http://bsfs.ai/schema/Meta#') xsd = _bsfs.Namespace('http://www.w3.org/2001/XMLSchema#') # export diff --git a/test/apps/__init__.py b/test/apps/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/test/apps/test_index.py b/test/apps/test_index.py new file mode 100644 index 0000000..6d47df8 --- /dev/null +++ b/test/apps/test_index.py @@ -0,0 +1,159 @@ +""" + +Part of the bsie test suite. +A copy of the license is provided with the project. +Author: Matthias Baumgartner, 2022 +""" +# imports +import io +import os +import rdflib +import sys +import unittest + +# bsie imports +from bsie.utils import ns + +# objects to test +from bsie.apps.index import main + + +## code ## + +class TestIndex(unittest.TestCase): + def test_main(self): + bsfs = main([ + '-r', + '--user', 'http://example.com/me', + os.path.join(os.path.dirname(__file__), 'testdir'), + os.path.join(os.path.dirname(__file__), 'testfile'), + ]) + + prefix = 'http://example.com/me/file#' + self.assertTrue(set(bsfs._Graph__backend.graph).issuperset({ + (rdflib.URIRef(prefix + '2f4109b40107cc50e0884755a1a961ed126887e49b8dbaf0e146b2e226aa6647'), rdflib.RDF.type, rdflib.URIRef(ns.bsfs.Entity)), + (rdflib.URIRef(prefix + '2f4109b40107cc50e0884755a1a961ed126887e49b8dbaf0e146b2e226aa6647'), rdflib.URIRef(ns.bse.author), rdflib.Literal('Me, myself, and I', datatype=rdflib.XSD.string)), + (rdflib.URIRef(prefix + '2f4109b40107cc50e0884755a1a961ed126887e49b8dbaf0e146b2e226aa6647'), rdflib.URIRef(ns.bse.filename), rdflib.Literal('alpha_second', datatype=rdflib.XSD.string)), + (rdflib.URIRef(prefix + '2f4109b40107cc50e0884755a1a961ed126887e49b8dbaf0e146b2e226aa6647'), rdflib.URIRef(ns.bse.filesize), rdflib.Literal('696', datatype=rdflib.XSD.integer)), + (rdflib.URIRef(prefix + '441f3d10c8ff489fe8e33e639606512f6c463151cc429de7e554b9af670c2ece'), rdflib.RDF.type, rdflib.URIRef(ns.bsfs.Entity)), + (rdflib.URIRef(prefix + '441f3d10c8ff489fe8e33e639606512f6c463151cc429de7e554b9af670c2ece'), rdflib.URIRef(ns.bse.author), rdflib.Literal('Me, myself, and I', datatype=rdflib.XSD.string)), + (rdflib.URIRef(prefix + '441f3d10c8ff489fe8e33e639606512f6c463151cc429de7e554b9af670c2ece'), rdflib.URIRef(ns.bse.filename), rdflib.Literal('omega_second', datatype=rdflib.XSD.string)), + (rdflib.URIRef(prefix + '441f3d10c8ff489fe8e33e639606512f6c463151cc429de7e554b9af670c2ece'), rdflib.URIRef(ns.bse.filesize), rdflib.Literal('503', datatype=rdflib.XSD.integer)), + (rdflib.URIRef(prefix + '69b98ecf7aff3e95b09688ba93331678eb8397817111f674c9558e6dd8f5e871'), rdflib.RDF.type, rdflib.URIRef(ns.bsfs.Entity)), + (rdflib.URIRef(prefix + '69b98ecf7aff3e95b09688ba93331678eb8397817111f674c9558e6dd8f5e871'), rdflib.URIRef(ns.bse.author), rdflib.Literal('Me, myself, and I', datatype=rdflib.XSD.string)), + (rdflib.URIRef(prefix + '69b98ecf7aff3e95b09688ba93331678eb8397817111f674c9558e6dd8f5e871'), rdflib.URIRef(ns.bse.filename), rdflib.Literal('td_first', datatype=rdflib.XSD.string)), + (rdflib.URIRef(prefix + '69b98ecf7aff3e95b09688ba93331678eb8397817111f674c9558e6dd8f5e871'), rdflib.URIRef(ns.bse.filesize), rdflib.Literal('911', datatype=rdflib.XSD.integer)), + (rdflib.URIRef(prefix + '78f7eb7f0d8221cdb2cb26c978fa42a11f75eb87becc768f4474134cb1e06926'), rdflib.RDF.type, rdflib.URIRef(ns.bsfs.Entity)), + (rdflib.URIRef(prefix + '78f7eb7f0d8221cdb2cb26c978fa42a11f75eb87becc768f4474134cb1e06926'), rdflib.URIRef(ns.bse.author), rdflib.Literal('Me, myself, and I', datatype=rdflib.XSD.string)), + (rdflib.URIRef(prefix + '78f7eb7f0d8221cdb2cb26c978fa42a11f75eb87becc768f4474134cb1e06926'), rdflib.URIRef(ns.bse.filename), rdflib.Literal('testfile', datatype=rdflib.XSD.string)), + (rdflib.URIRef(prefix + '78f7eb7f0d8221cdb2cb26c978fa42a11f75eb87becc768f4474134cb1e06926'), rdflib.URIRef(ns.bse.filesize), rdflib.Literal('885', datatype=rdflib.XSD.integer)), + (rdflib.URIRef(prefix + '80818b8ec2ee1919116dba9c8a7e0a4608313cf3b463cd88e9ed77a700dd92d3'), rdflib.RDF.type, rdflib.URIRef(ns.bsfs.Entity)), + (rdflib.URIRef(prefix + '80818b8ec2ee1919116dba9c8a7e0a4608313cf3b463cd88e9ed77a700dd92d3'), rdflib.URIRef(ns.bse.author), rdflib.Literal('Me, myself, and I', datatype=rdflib.XSD.string)), + (rdflib.URIRef(prefix + '80818b8ec2ee1919116dba9c8a7e0a4608313cf3b463cd88e9ed77a700dd92d3'), rdflib.URIRef(ns.bse.filename), rdflib.Literal('bar_first', datatype=rdflib.XSD.string)), + (rdflib.URIRef(prefix + '80818b8ec2ee1919116dba9c8a7e0a4608313cf3b463cd88e9ed77a700dd92d3'), rdflib.URIRef(ns.bse.filesize), rdflib.Literal('956', datatype=rdflib.XSD.integer)), + (rdflib.URIRef(prefix + '976d2ea0e58488678cc7e435fbfadabfb6eb6cf50ad51862f38f73729ed11795'), rdflib.RDF.type, rdflib.URIRef(ns.bsfs.Entity)), + (rdflib.URIRef(prefix + '976d2ea0e58488678cc7e435fbfadabfb6eb6cf50ad51862f38f73729ed11795'), rdflib.URIRef(ns.bse.author), rdflib.Literal('Me, myself, and I', datatype=rdflib.XSD.string)), + (rdflib.URIRef(prefix + '976d2ea0e58488678cc7e435fbfadabfb6eb6cf50ad51862f38f73729ed11795'), rdflib.URIRef(ns.bse.filename), rdflib.Literal('omega_first', datatype=rdflib.XSD.string)), + (rdflib.URIRef(prefix + '976d2ea0e58488678cc7e435fbfadabfb6eb6cf50ad51862f38f73729ed11795'), rdflib.URIRef(ns.bse.filesize), rdflib.Literal('648', datatype=rdflib.XSD.integer)), + (rdflib.URIRef(prefix + '997e2fbb7494a3818ec782d2bc87bf1cffafba6b9c0f658e4a6c18a723e944d3'), rdflib.RDF.type, rdflib.URIRef(ns.bsfs.Entity)), + (rdflib.URIRef(prefix + '997e2fbb7494a3818ec782d2bc87bf1cffafba6b9c0f658e4a6c18a723e944d3'), rdflib.URIRef(ns.bse.author), rdflib.Literal('Me, myself, and I', datatype=rdflib.XSD.string)), + (rdflib.URIRef(prefix + '997e2fbb7494a3818ec782d2bc87bf1cffafba6b9c0f658e4a6c18a723e944d3'), rdflib.URIRef(ns.bse.filename), rdflib.Literal('alpha_first', datatype=rdflib.XSD.string)), + (rdflib.URIRef(prefix + '997e2fbb7494a3818ec782d2bc87bf1cffafba6b9c0f658e4a6c18a723e944d3'), rdflib.URIRef(ns.bse.filesize), rdflib.Literal('754', datatype=rdflib.XSD.integer)), + (rdflib.URIRef(prefix + 'a8af899ecdab60dfaea8ec7f934053624c80a1054539e163f2c7eaa986c2777d'), rdflib.RDF.type, rdflib.URIRef(ns.bsfs.Entity)), + (rdflib.URIRef(prefix + 'a8af899ecdab60dfaea8ec7f934053624c80a1054539e163f2c7eaa986c2777d'), rdflib.URIRef(ns.bse.author), rdflib.Literal('Me, myself, and I', datatype=rdflib.XSD.string)), + (rdflib.URIRef(prefix + 'a8af899ecdab60dfaea8ec7f934053624c80a1054539e163f2c7eaa986c2777d'), rdflib.URIRef(ns.bse.filename), rdflib.Literal('foo_second', datatype=rdflib.XSD.string)), + (rdflib.URIRef(prefix + 'a8af899ecdab60dfaea8ec7f934053624c80a1054539e163f2c7eaa986c2777d'), rdflib.URIRef(ns.bse.filesize), rdflib.Literal('585', datatype=rdflib.XSD.integer)), + (rdflib.URIRef(prefix + 'b8fd7fba818254166a6043195004138ebda6923e012442f819a2c49671136c70'), rdflib.RDF.type, rdflib.URIRef(ns.bsfs.Entity)), + (rdflib.URIRef(prefix + 'b8fd7fba818254166a6043195004138ebda6923e012442f819a2c49671136c70'), rdflib.URIRef(ns.bse.author), rdflib.Literal('Me, myself, and I', datatype=rdflib.XSD.string)), + (rdflib.URIRef(prefix + 'b8fd7fba818254166a6043195004138ebda6923e012442f819a2c49671136c70'), rdflib.URIRef(ns.bse.filename), rdflib.Literal('bar_second', datatype=rdflib.XSD.string)), + (rdflib.URIRef(prefix + 'b8fd7fba818254166a6043195004138ebda6923e012442f819a2c49671136c70'), rdflib.URIRef(ns.bse.filesize), rdflib.Literal('636', datatype=rdflib.XSD.integer)), + (rdflib.URIRef(prefix + 'd43758ace82154a1cc10ca0dfef63cb20dd831f9c87edd6dc06539eefe67371d'), rdflib.RDF.type, rdflib.URIRef(ns.bsfs.Entity)), + (rdflib.URIRef(prefix + 'd43758ace82154a1cc10ca0dfef63cb20dd831f9c87edd6dc06539eefe67371d'), rdflib.URIRef(ns.bse.author), rdflib.Literal('Me, myself, and I', datatype=rdflib.XSD.string)), + (rdflib.URIRef(prefix + 'd43758ace82154a1cc10ca0dfef63cb20dd831f9c87edd6dc06539eefe67371d'), rdflib.URIRef(ns.bse.filename), rdflib.Literal('foo_first', datatype=rdflib.XSD.string)), + (rdflib.URIRef(prefix + 'd43758ace82154a1cc10ca0dfef63cb20dd831f9c87edd6dc06539eefe67371d'), rdflib.URIRef(ns.bse.filesize), rdflib.Literal('546', datatype=rdflib.XSD.integer)), + (rdflib.URIRef(prefix + 'd803187cbf3676ae9d38126270a6152c60431589aa3bb3824baf8954e9c097f1'), rdflib.RDF.type, rdflib.URIRef(ns.bsfs.Entity)), + (rdflib.URIRef(prefix + 'd803187cbf3676ae9d38126270a6152c60431589aa3bb3824baf8954e9c097f1'), rdflib.URIRef(ns.bse.author), rdflib.Literal('Me, myself, and I', datatype=rdflib.XSD.string)), + (rdflib.URIRef(prefix + 'd803187cbf3676ae9d38126270a6152c60431589aa3bb3824baf8954e9c097f1'), rdflib.URIRef(ns.bse.filename), rdflib.Literal('td_second', datatype=rdflib.XSD.string)), + (rdflib.URIRef(prefix + 'd803187cbf3676ae9d38126270a6152c60431589aa3bb3824baf8954e9c097f1'), rdflib.URIRef(ns.bse.filesize), rdflib.Literal('703', datatype=rdflib.XSD.integer)), + })) + + # NOTE: we don't check ns.bsm.t_created since it depends on the execution time. Triples would look like this: + # (rdflib.URIRef(prefix + '2f4109b40107cc50e0884755a1a961ed126887e49b8dbaf0e146b2e226aa6647'), rdflib.URIRef(ns.bsm.t_created), rdflib.Literal('1670..........', datatype=rdflib.XSD.integer)), + # (rdflib.URIRef(prefix + '441f3d10c8ff489fe8e33e639606512f6c463151cc429de7e554b9af670c2ece'), rdflib.URIRef(ns.bsm.t_created), rdflib.Literal('1670..........', datatype=rdflib.XSD.integer)), + # (rdflib.URIRef(prefix + '69b98ecf7aff3e95b09688ba93331678eb8397817111f674c9558e6dd8f5e871'), rdflib.URIRef(ns.bsm.t_created), rdflib.Literal('1670..........', datatype=rdflib.XSD.integer)), + # (rdflib.URIRef(prefix + '78f7eb7f0d8221cdb2cb26c978fa42a11f75eb87becc768f4474134cb1e06926'), rdflib.URIRef(ns.bsm.t_created), rdflib.Literal('1670..........', datatype=rdflib.XSD.integer)), + # (rdflib.URIRef(prefix + '80818b8ec2ee1919116dba9c8a7e0a4608313cf3b463cd88e9ed77a700dd92d3'), rdflib.URIRef(ns.bsm.t_created), rdflib.Literal('1670..........', datatype=rdflib.XSD.integer)), + # (rdflib.URIRef(prefix + '976d2ea0e58488678cc7e435fbfadabfb6eb6cf50ad51862f38f73729ed11795'), rdflib.URIRef(ns.bsm.t_created), rdflib.Literal('1670..........', datatype=rdflib.XSD.integer)), + # (rdflib.URIRef(prefix + '997e2fbb7494a3818ec782d2bc87bf1cffafba6b9c0f658e4a6c18a723e944d3'), rdflib.URIRef(ns.bsm.t_created), rdflib.Literal('1670..........', datatype=rdflib.XSD.integer)), + # (rdflib.URIRef(prefix + 'a8af899ecdab60dfaea8ec7f934053624c80a1054539e163f2c7eaa986c2777d'), rdflib.URIRef(ns.bsm.t_created), rdflib.Literal('1670..........', datatype=rdflib.XSD.integer)), + # (rdflib.URIRef(prefix + 'b8fd7fba818254166a6043195004138ebda6923e012442f819a2c49671136c70'), rdflib.URIRef(ns.bsm.t_created), rdflib.Literal('1670..........', datatype=rdflib.XSD.integer)), + # (rdflib.URIRef(prefix + 'd43758ace82154a1cc10ca0dfef63cb20dd831f9c87edd6dc06539eefe67371d'), rdflib.URIRef(ns.bsm.t_created), rdflib.Literal('1670..........', datatype=rdflib.XSD.integer)), + # (rdflib.URIRef(prefix + 'd803187cbf3676ae9d38126270a6152c60431589aa3bb3824baf8954e9c097f1'), rdflib.URIRef(ns.bsm.t_created), rdflib.Literal('1670..........', datatype=rdflib.XSD.integer)), + # instead, we simply check if there's such a predicate for each file + self.assertSetEqual({sub for sub, _ in bsfs._Graph__backend.graph.subject_objects(rdflib.URIRef(ns.bsm.t_created))}, { + rdflib.URIRef(prefix + '2f4109b40107cc50e0884755a1a961ed126887e49b8dbaf0e146b2e226aa6647'), + rdflib.URIRef(prefix + '441f3d10c8ff489fe8e33e639606512f6c463151cc429de7e554b9af670c2ece'), + rdflib.URIRef(prefix + '69b98ecf7aff3e95b09688ba93331678eb8397817111f674c9558e6dd8f5e871'), + rdflib.URIRef(prefix + '78f7eb7f0d8221cdb2cb26c978fa42a11f75eb87becc768f4474134cb1e06926'), + rdflib.URIRef(prefix + '80818b8ec2ee1919116dba9c8a7e0a4608313cf3b463cd88e9ed77a700dd92d3'), + rdflib.URIRef(prefix + '976d2ea0e58488678cc7e435fbfadabfb6eb6cf50ad51862f38f73729ed11795'), + rdflib.URIRef(prefix + '997e2fbb7494a3818ec782d2bc87bf1cffafba6b9c0f658e4a6c18a723e944d3'), + rdflib.URIRef(prefix + 'a8af899ecdab60dfaea8ec7f934053624c80a1054539e163f2c7eaa986c2777d'), + rdflib.URIRef(prefix + 'b8fd7fba818254166a6043195004138ebda6923e012442f819a2c49671136c70'), + rdflib.URIRef(prefix + 'd43758ace82154a1cc10ca0dfef63cb20dd831f9c87edd6dc06539eefe67371d'), + rdflib.URIRef(prefix + 'd803187cbf3676ae9d38126270a6152c60431589aa3bb3824baf8954e9c097f1'), + }) + + def test_print(self): + stdout, sys.stdout = sys.stdout, io.StringIO() + bsfs = main([ + '--print', + '-r', + '--user', 'http://example.com/me', + os.path.join(os.path.dirname(__file__), 'testdir'), + os.path.join(os.path.dirname(__file__), 'testfile'), + ]) + outbuf, sys.stdout = sys.stdout, stdout + self.assertSetEqual(set(outbuf.getvalue().split('\n')) - {''}, { + f'Node(http://bsfs.ai/schema/Entity, http://example.com/me/file#2f4109b40107cc50e0884755a1a961ed126887e49b8dbaf0e146b2e226aa6647) Predicate({ns.bse.author}) Me, myself, and I', + f'Node(http://bsfs.ai/schema/Entity, http://example.com/me/file#2f4109b40107cc50e0884755a1a961ed126887e49b8dbaf0e146b2e226aa6647) Predicate({ns.bse.filename}) alpha_second', + f'Node(http://bsfs.ai/schema/Entity, http://example.com/me/file#2f4109b40107cc50e0884755a1a961ed126887e49b8dbaf0e146b2e226aa6647) Predicate({ns.bse.filesize}) 696', + f'Node(http://bsfs.ai/schema/Entity, http://example.com/me/file#441f3d10c8ff489fe8e33e639606512f6c463151cc429de7e554b9af670c2ece) Predicate({ns.bse.author}) Me, myself, and I', + f'Node(http://bsfs.ai/schema/Entity, http://example.com/me/file#441f3d10c8ff489fe8e33e639606512f6c463151cc429de7e554b9af670c2ece) Predicate({ns.bse.filename}) omega_second', + f'Node(http://bsfs.ai/schema/Entity, http://example.com/me/file#441f3d10c8ff489fe8e33e639606512f6c463151cc429de7e554b9af670c2ece) Predicate({ns.bse.filesize}) 503', + f'Node(http://bsfs.ai/schema/Entity, http://example.com/me/file#69b98ecf7aff3e95b09688ba93331678eb8397817111f674c9558e6dd8f5e871) Predicate({ns.bse.author}) Me, myself, and I', + f'Node(http://bsfs.ai/schema/Entity, http://example.com/me/file#69b98ecf7aff3e95b09688ba93331678eb8397817111f674c9558e6dd8f5e871) Predicate({ns.bse.filename}) td_first', + f'Node(http://bsfs.ai/schema/Entity, http://example.com/me/file#69b98ecf7aff3e95b09688ba93331678eb8397817111f674c9558e6dd8f5e871) Predicate({ns.bse.filesize}) 911', + f'Node(http://bsfs.ai/schema/Entity, http://example.com/me/file#78f7eb7f0d8221cdb2cb26c978fa42a11f75eb87becc768f4474134cb1e06926) Predicate({ns.bse.author}) Me, myself, and I', + f'Node(http://bsfs.ai/schema/Entity, http://example.com/me/file#78f7eb7f0d8221cdb2cb26c978fa42a11f75eb87becc768f4474134cb1e06926) Predicate({ns.bse.filename}) testfile', + f'Node(http://bsfs.ai/schema/Entity, http://example.com/me/file#78f7eb7f0d8221cdb2cb26c978fa42a11f75eb87becc768f4474134cb1e06926) Predicate({ns.bse.filesize}) 885', + f'Node(http://bsfs.ai/schema/Entity, http://example.com/me/file#80818b8ec2ee1919116dba9c8a7e0a4608313cf3b463cd88e9ed77a700dd92d3) Predicate({ns.bse.author}) Me, myself, and I', + f'Node(http://bsfs.ai/schema/Entity, http://example.com/me/file#80818b8ec2ee1919116dba9c8a7e0a4608313cf3b463cd88e9ed77a700dd92d3) Predicate({ns.bse.filename}) bar_first', + f'Node(http://bsfs.ai/schema/Entity, http://example.com/me/file#80818b8ec2ee1919116dba9c8a7e0a4608313cf3b463cd88e9ed77a700dd92d3) Predicate({ns.bse.filesize}) 956', + f'Node(http://bsfs.ai/schema/Entity, http://example.com/me/file#976d2ea0e58488678cc7e435fbfadabfb6eb6cf50ad51862f38f73729ed11795) Predicate({ns.bse.author}) Me, myself, and I', + f'Node(http://bsfs.ai/schema/Entity, http://example.com/me/file#976d2ea0e58488678cc7e435fbfadabfb6eb6cf50ad51862f38f73729ed11795) Predicate({ns.bse.filename}) omega_first', + f'Node(http://bsfs.ai/schema/Entity, http://example.com/me/file#976d2ea0e58488678cc7e435fbfadabfb6eb6cf50ad51862f38f73729ed11795) Predicate({ns.bse.filesize}) 648', + f'Node(http://bsfs.ai/schema/Entity, http://example.com/me/file#997e2fbb7494a3818ec782d2bc87bf1cffafba6b9c0f658e4a6c18a723e944d3) Predicate({ns.bse.author}) Me, myself, and I', + f'Node(http://bsfs.ai/schema/Entity, http://example.com/me/file#997e2fbb7494a3818ec782d2bc87bf1cffafba6b9c0f658e4a6c18a723e944d3) Predicate({ns.bse.filename}) alpha_first', + f'Node(http://bsfs.ai/schema/Entity, http://example.com/me/file#997e2fbb7494a3818ec782d2bc87bf1cffafba6b9c0f658e4a6c18a723e944d3) Predicate({ns.bse.filesize}) 754', + f'Node(http://bsfs.ai/schema/Entity, http://example.com/me/file#a8af899ecdab60dfaea8ec7f934053624c80a1054539e163f2c7eaa986c2777d) Predicate({ns.bse.author}) Me, myself, and I', + f'Node(http://bsfs.ai/schema/Entity, http://example.com/me/file#a8af899ecdab60dfaea8ec7f934053624c80a1054539e163f2c7eaa986c2777d) Predicate({ns.bse.filename}) foo_second', + f'Node(http://bsfs.ai/schema/Entity, http://example.com/me/file#a8af899ecdab60dfaea8ec7f934053624c80a1054539e163f2c7eaa986c2777d) Predicate({ns.bse.filesize}) 585', + f'Node(http://bsfs.ai/schema/Entity, http://example.com/me/file#b8fd7fba818254166a6043195004138ebda6923e012442f819a2c49671136c70) Predicate({ns.bse.author}) Me, myself, and I', + f'Node(http://bsfs.ai/schema/Entity, http://example.com/me/file#b8fd7fba818254166a6043195004138ebda6923e012442f819a2c49671136c70) Predicate({ns.bse.filename}) bar_second', + f'Node(http://bsfs.ai/schema/Entity, http://example.com/me/file#b8fd7fba818254166a6043195004138ebda6923e012442f819a2c49671136c70) Predicate({ns.bse.filesize}) 636', + f'Node(http://bsfs.ai/schema/Entity, http://example.com/me/file#d43758ace82154a1cc10ca0dfef63cb20dd831f9c87edd6dc06539eefe67371d) Predicate({ns.bse.author}) Me, myself, and I', + f'Node(http://bsfs.ai/schema/Entity, http://example.com/me/file#d43758ace82154a1cc10ca0dfef63cb20dd831f9c87edd6dc06539eefe67371d) Predicate({ns.bse.filename}) foo_first', + f'Node(http://bsfs.ai/schema/Entity, http://example.com/me/file#d43758ace82154a1cc10ca0dfef63cb20dd831f9c87edd6dc06539eefe67371d) Predicate({ns.bse.filesize}) 546', + f'Node(http://bsfs.ai/schema/Entity, http://example.com/me/file#d803187cbf3676ae9d38126270a6152c60431589aa3bb3824baf8954e9c097f1) Predicate({ns.bse.author}) Me, myself, and I', + f'Node(http://bsfs.ai/schema/Entity, http://example.com/me/file#d803187cbf3676ae9d38126270a6152c60431589aa3bb3824baf8954e9c097f1) Predicate({ns.bse.filename}) td_second', + f'Node(http://bsfs.ai/schema/Entity, http://example.com/me/file#d803187cbf3676ae9d38126270a6152c60431589aa3bb3824baf8954e9c097f1) Predicate({ns.bse.filesize}) 703', + }) + + +## main ## + +if __name__ == '__main__': + unittest.main() + +## EOF ## diff --git a/test/apps/test_info.py b/test/apps/test_info.py new file mode 100644 index 0000000..60a540e --- /dev/null +++ b/test/apps/test_info.py @@ -0,0 +1,42 @@ +""" + +Part of the bsie test suite. +A copy of the license is provided with the project. +Author: Matthias Baumgartner, 2022 +""" +# imports +import argparse +import io +import os +import sys +import unittest + +# objects to test +from bsie.apps.info import main + + +## code ## + +class TestIndex(unittest.TestCase): + def test_predicates(self): + stdout, sys.stdout = sys.stdout, io.StringIO() + # show predicates infos + main(['predicates']) + outbuf, sys.stdout = sys.stdout, stdout + # verify output + self.assertSetEqual({pred for pred in outbuf.getvalue().split('\n') if pred != ''}, { + 'http://bsfs.ai/schema/Entity#author', + 'http://bsfs.ai/schema/Predicate', + 'http://bsfs.ai/schema/Entity#filename', + 'http://bsfs.ai/schema/Entity#filesize', + }) + + def test_invalid(self): + self.assertRaises(SystemExit, main, ['foobar']) + +## main ## + +if __name__ == '__main__': + unittest.main() + +## EOF ## diff --git a/test/apps/testdir/alpha/alpha_first b/test/apps/testdir/alpha/alpha_first new file mode 100644 index 0000000..f96fdee --- /dev/null +++ b/test/apps/testdir/alpha/alpha_first @@ -0,0 +1,16 @@ +Turpis tincidunt id aliquet risus feugiat in ante metus. +Vel turpis nunc eget lorem dolor. +Lorem mollis aliquam ut porttitor leo a diam sollicitudin. +Sit amet mattis vulputate enim nulla aliquet porttitor lacus luctus. +Vitae et leo duis ut diam. +Integer eget aliquet nibh praesent tristique magna sit. +Volutpat sed cras ornare arcu dui. +Consectetur adipiscing elit duis tristique sollicitudin nibh. +Interdum varius sit amet mattis vulputate. +A arcu cursus vitae congue. +Risus nec feugiat in fermentum posuere urna nec tincidunt praesent. +Sit amet dictum sit amet justo donec enim diam. +Maecenas accumsan lacus vel facilisis. +Erat velit scelerisque in dictum non consectetur a. +Tempor orci dapibus ultrices in iaculis nunc. +Nisi lacus sed viverra tellus. diff --git a/test/apps/testdir/alpha/alpha_second b/test/apps/testdir/alpha/alpha_second new file mode 100644 index 0000000..ae83ce8 --- /dev/null +++ b/test/apps/testdir/alpha/alpha_second @@ -0,0 +1,12 @@ +Et sollicitudin ac orci phasellus egestas tellus rutrum tellus. +Orci dapibus ultrices in iaculis nunc sed augue. +Tincidunt vitae semper quis lectus nulla at. +Maecenas ultricies mi eget mauris pharetra et. +Porttitor massa id neque aliquam vestibulum morbi blandit. +Et magnis dis parturient montes nascetur ridiculus mus mauris. +Ac orci phasellus egestas tellus rutrum tellus pellentesque. +Donec ac odio tempor orci dapibus. +Quis imperdiet massa tincidunt nunc pulvinar sapien et ligula. +Potenti nullam ac tortor vitae purus faucibus ornare suspendisse sed. +Orci porta non pulvinar neque laoreet suspendisse interdum consectetur. +Mauris pellentesque pulvinar pellentesque habitant morbi tristique. diff --git a/test/apps/testdir/alpha/omega/omega_first b/test/apps/testdir/alpha/omega/omega_first new file mode 100644 index 0000000..e594737 --- /dev/null +++ b/test/apps/testdir/alpha/omega/omega_first @@ -0,0 +1,14 @@ +Neque gravida in fermentum et sollicitudin. +Sodales ut eu sem integer vitae justo eget magna fermentum. +Amet nulla facilisi morbi tempus iaculis. +Proin sagittis nisl rhoncus mattis rhoncus urna neque. +Aliquam sem fringilla ut morbi tincidunt augue interdum velit euismod. +Sagittis eu volutpat odio facilisis. +Aliquet porttitor lacus luctus accumsan tortor posuere ac ut. +Sed arcu non odio euismod lacinia. +Faucibus et molestie ac feugiat. +Urna neque viverra justo nec ultrices dui sapien eget. +Amet commodo nulla facilisi nullam. +Pretium lectus quam id leo in vitae. +A cras semper auctor neque. +Sed arcu non odio euismod lacinia at quis risus sed. diff --git a/test/apps/testdir/alpha/omega/omega_second b/test/apps/testdir/alpha/omega/omega_second new file mode 100644 index 0000000..0c9857d --- /dev/null +++ b/test/apps/testdir/alpha/omega/omega_second @@ -0,0 +1,10 @@ +Commodo sed egestas egestas fringilla phasellus. +Ac tortor dignissim convallis aenean et tortor at risus. +Lorem dolor sed viverra ipsum nunc aliquet bibendum enim. +Quis lectus nulla at volutpat diam ut. +Tincidunt id aliquet risus feugiat in ante metus. +Tincidunt arcu non sodales neque. +Amet est placerat in egestas erat imperdiet sed euismod. +Duis tristique sollicitudin nibh sit amet. +Sed arcu non odio euismod lacinia at. +Ullamcorper morbi tincidunt ornare massa eget egestas purus viverra accumsan. diff --git a/test/apps/testdir/foo/bar/bar_first b/test/apps/testdir/foo/bar/bar_first new file mode 100644 index 0000000..e9edb3f --- /dev/null +++ b/test/apps/testdir/foo/bar/bar_first @@ -0,0 +1,20 @@ +Elementum eu facilisis sed odio morbi quis commodo. +Enim nunc faucibus a pellentesque sit amet porttitor. +Etiam non quam lacus suspendisse faucibus interdum. +Viverra aliquet eget sit amet tellus. +Arcu vitae elementum curabitur vitae. +Feugiat vivamus at augue eget arcu dictum. +Commodo quis imperdiet massa tincidunt nunc. +Urna duis convallis convallis tellus id interdum. +Commodo sed egestas egestas fringilla phasellus. +Sodales neque sodales ut etiam sit amet nisl. +Sem integer vitae justo eget magna fermentum iaculis. +Id diam maecenas ultricies mi. +Aliquet nibh praesent tristique magna sit amet purus gravida. +Ut enim blandit volutpat maecenas volutpat. +Ipsum a arcu cursus vitae congue mauris. +Donec ultrices tincidunt arcu non. +Nulla posuere sollicitudin aliquam ultrices sagittis orci a scelerisque purus. +Egestas maecenas pharetra convallis posuere. +Feugiat in fermentum posuere urna nec. +Nulla malesuada pellentesque elit eget gravida cum sociis. diff --git a/test/apps/testdir/foo/bar/bar_second b/test/apps/testdir/foo/bar/bar_second new file mode 100644 index 0000000..fb95896 --- /dev/null +++ b/test/apps/testdir/foo/bar/bar_second @@ -0,0 +1,14 @@ +Augue ut lectus arcu bibendum at varius vel pharetra vel. +Mattis aliquam faucibus purus in. +In tellus integer feugiat scelerisque. +Eget velit aliquet sagittis id consectetur purus ut faucibus pulvinar. +Augue mauris augue neque gravida. +Pulvinar neque laoreet suspendisse interdum consectetur libero id faucibus. +Tellus elementum sagittis vitae et leo duis. +Eget est lorem ipsum dolor sit amet consectetur. +Volutpat sed cras ornare arcu. +Faucibus a pellentesque sit amet. +Turpis egestas maecenas pharetra convallis. +Faucibus interdum posuere lorem ipsum dolor sit amet. +Id semper risus in hendrerit. +Amet volutpat consequat mauris nunc. diff --git a/test/apps/testdir/foo/foo_first b/test/apps/testdir/foo/foo_first new file mode 100644 index 0000000..ed1e052 --- /dev/null +++ b/test/apps/testdir/foo/foo_first @@ -0,0 +1,11 @@ +Venenatis tellus in metus vulputate eu scelerisque felis imperdiet proin. +Orci phasellus egestas tellus rutrum. +Feugiat vivamus at augue eget arcu dictum varius. +Justo eget magna fermentum iaculis eu non. +A erat nam at lectus urna duis. +Quam quisque id diam vel quam elementum pulvinar etiam. +Amet commodo nulla facilisi nullam vehicula ipsum a. +Sapien faucibus et molestie ac feugiat. +Aliquam vestibulum morbi blandit cursus risus at ultrices. +Purus faucibus ornare suspendisse sed nisi. +In massa tempor nec feugiat nisl pretium fusce id velit. diff --git a/test/apps/testdir/foo/foo_second b/test/apps/testdir/foo/foo_second new file mode 100644 index 0000000..95e46ae --- /dev/null +++ b/test/apps/testdir/foo/foo_second @@ -0,0 +1,12 @@ +Sit amet consectetur adipiscing elit ut aliquam purus. +Vulputate dignissim suspendisse in est ante in nibh. +Eu feugiat pretium nibh ipsum consequat nisl vel pretium. +Egestas purus viverra accumsan in nisl. +Ac odio tempor orci dapibus ultrices. +At imperdiet dui accumsan sit amet. +Elementum integer enim neque volutpat ac tincidunt vitae semper. +Mi in nulla posuere sollicitudin aliquam ultrices sagittis. +Aliquam sem et tortor consequat. +Tristique senectus et netus et malesuada fames ac turpis. +Quis hendrerit dolor magna eget est lorem ipsum. +Ut consequat semper viverra nam libero. diff --git a/test/apps/testdir/td_first b/test/apps/testdir/td_first new file mode 100644 index 0000000..21eab9c --- /dev/null +++ b/test/apps/testdir/td_first @@ -0,0 +1,18 @@ +Urna duis convallis convallis tellus id interdum velit. +Risus in hendrerit gravida rutrum. +Odio pellentesque diam volutpat commodo sed. +Duis convallis convallis tellus id interdum velit laoreet id donec. +Duis at tellus at urna. +Egestas maecenas pharetra convallis posuere morbi leo urna molestie at. +Et leo duis ut diam quam nulla porttitor massa id. +Nunc eget lorem dolor sed viverra ipsum nunc aliquet bibendum. +Sodales ut etiam sit amet nisl purus in. +Ac felis donec et odio pellentesque diam volutpat commodo. +Nunc mi ipsum faucibus vitae aliquet. +Volutpat ac tincidunt vitae semper quis lectus nulla at volutpat. +Mollis aliquam ut porttitor leo. +Vestibulum rhoncus est pellentesque elit ullamcorper dignissim cras. +Pulvinar proin gravida hendrerit lectus a. +Ultrices dui sapien eget mi proin. +Dui vivamus arcu felis bibendum ut. +Aliquam eleifend mi in nulla posuere sollicitudin aliquam ultrices sagittis. diff --git a/test/apps/testdir/td_second b/test/apps/testdir/td_second new file mode 100644 index 0000000..496ff0e --- /dev/null +++ b/test/apps/testdir/td_second @@ -0,0 +1,14 @@ +Egestas purus viverra accumsan in. +Auctor urna nunc id cursus metus aliquam eleifend. +Morbi tincidunt augue interdum velit. +In egestas erat imperdiet sed euismod nisi porta lorem mollis. +Sed augue lacus viverra vitae congue eu consequat. +Ut pharetra sit amet aliquam id. +Aenean euismod elementum nisi quis eleifend. +Hac habitasse platea dictumst vestibulum rhoncus est pellentesque elit ullamcorper. +Eget nunc lobortis mattis aliquam faucibus purus. +Sit amet luctus venenatis lectus magna fringilla. +Placerat orci nulla pellentesque dignissim enim sit amet venenatis. +Montes nascetur ridiculus mus mauris. +Morbi enim nunc faucibus a pellentesque sit amet. +Et netus et malesuada fames ac turpis egestas. diff --git a/test/apps/testfile b/test/apps/testfile new file mode 100644 index 0000000..b56928e --- /dev/null +++ b/test/apps/testfile @@ -0,0 +1,16 @@ +Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do eiusmod tempor incididunt ut labore et dolore magna aliqua. +Fames ac turpis egestas maecenas pharetra convallis posuere morbi. +Etiam erat velit scelerisque in dictum non consectetur a erat. +Dolor purus non enim praesent elementum facilisis. +Nulla porttitor massa id neque aliquam vestibulum morbi blandit cursus. +Adipiscing vitae proin sagittis nisl rhoncus mattis rhoncus urna neque. +Aenean pharetra magna ac placerat. +Pulvinar proin gravida hendrerit lectus a. +Iaculis nunc sed augue lacus viverra vitae. +Ac tortor vitae purus faucibus ornare suspendisse sed. +Purus in mollis nunc sed id semper. +Non consectetur a erat nam at lectus urna. +In ante metus dictum at tempor commodo ullamcorper. +Auctor augue mauris augue neque gravida in fermentum. +Nunc scelerisque viverra mauris in. +Morbi leo urna molestie at elementum. diff --git a/test/lib/__init__.py b/test/lib/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/test/lib/test_bsie.py b/test/lib/test_bsie.py new file mode 100644 index 0000000..277ac67 --- /dev/null +++ b/test/lib/test_bsie.py @@ -0,0 +1,231 @@ +""" + +Part of the bsie test suite. +A copy of the license is provided with the project. +Author: Matthias Baumgartner, 2022 +""" +# imports +import os +import unittest + +# bsie imports +from bsie.tools import builder +from bsie.utils import ns +from bsie.utils.bsfs import URI, schema +from bsie.utils.node import Node + +# objects to test +from bsie.lib.bsie import BSIE + + +## code ## + +class TestBSIE(unittest.TestCase): + def setUp(self): + # reader builder + rbuild = builder.ReaderBuilder({}) + # extractor builder + ebuild = builder.ExtractorBuilder([ + {'bsie.extractor.generic.path.Path': {}}, + {'bsie.extractor.generic.stat.Stat': {}}, + {'bsie.extractor.generic.constant.Constant': dict( + tuples=[('http://bsfs.ai/schema/Entity#author', 'Me, myself, and I')], + schema=''' + bse:author rdfs:subClassOf bsfs:Predicate ; + rdfs:domain bsfs:Entity ; + rdfs:range xsd:string ; + bsfs:unique "true"^^xsd:boolean . + ''', + )}, + ]) + # build pipeline + self.prefix = URI('http://example.com/local/file#') + pbuild = builder.PipelineBuilder(self.prefix, rbuild, ebuild) + self.pipeline = pbuild.build() + + def test_construction(self): + # pipeline only + lib = BSIE(self.pipeline) + self.assertSetEqual(lib.predicates, { + ns.bse.filename, + ns.bse.filesize, + ns.bse.author, + }) + self.assertEqual(lib.schema, schema.Schema.from_string(''' + prefix rdfs: + prefix xsd: + prefix bsfs: + prefix bse: + # essential nodes + bsfs:Entity rdfs:subClassOf bsfs:Node . + # common definitions + xsd:string rdfs:subClassOf bsfs:Literal . + xsd:integer rdfs:subClassOf bsfs:Literal . + + bse:filename rdfs:subClassOf bsfs:Predicate ; + rdfs:domain bsfs:Entity ; + rdfs:range xsd:string ; + bsfs:unique "false"^^xsd:boolean . + + bse:filesize rdfs:subClassOf bsfs:Predicate ; + rdfs:domain bsfs:Entity ; + rdfs:range xsd:integer; + bsfs:unique "false"^^xsd:boolean . + + bse:author rdfs:subClassOf bsfs:Predicate ; + rdfs:domain bsfs:Entity ; + rdfs:range xsd:string ; + bsfs:unique "true"^^xsd:boolean . + + ''')) + + # specify collect + lib = BSIE(self.pipeline, collect={ + ns.bse.filesize, + ns.bse.author, + ns.bse.inexistent, + }) + self.assertSetEqual(lib.predicates, { + ns.bse.filesize, + ns.bse.author, + }) + self.assertEqual(lib.schema, schema.Schema.from_string(''' + prefix rdfs: + prefix xsd: + prefix bsfs: + prefix bse: + # essential nodes + bsfs:Entity rdfs:subClassOf bsfs:Node . + # common definitions + xsd:string rdfs:subClassOf bsfs:Literal . + xsd:integer rdfs:subClassOf bsfs:Literal . + + bse:filesize rdfs:subClassOf bsfs:Predicate ; + rdfs:domain bsfs:Entity ; + rdfs:range xsd:integer; + bsfs:unique "false"^^xsd:boolean . + + bse:author rdfs:subClassOf bsfs:Predicate ; + rdfs:domain bsfs:Entity ; + rdfs:range xsd:string ; + bsfs:unique "true"^^xsd:boolean . + + ''')) + # empty collect is disregarded + lib = BSIE(self.pipeline, collect={}) + self.assertSetEqual(lib.predicates, { + ns.bse.filename, + ns.bse.filesize, + ns.bse.author, + }) + self.assertEqual(lib.schema, schema.Schema.from_string(''' + prefix rdfs: + prefix xsd: + prefix bsfs: + prefix bse: + # essential nodes + bsfs:Entity rdfs:subClassOf bsfs:Node . + # common definitions + xsd:string rdfs:subClassOf bsfs:Literal . + xsd:integer rdfs:subClassOf bsfs:Literal . + + bse:filename rdfs:subClassOf bsfs:Predicate ; + rdfs:domain bsfs:Entity ; + rdfs:range xsd:string ; + bsfs:unique "false"^^xsd:boolean . + + bse:filesize rdfs:subClassOf bsfs:Predicate ; + rdfs:domain bsfs:Entity ; + rdfs:range xsd:integer; + bsfs:unique "false"^^xsd:boolean . + + bse:author rdfs:subClassOf bsfs:Predicate ; + rdfs:domain bsfs:Entity ; + rdfs:range xsd:string ; + bsfs:unique "true"^^xsd:boolean . + + ''')) + + # specify discard + lib = BSIE(self.pipeline, discard={ + ns.bse.filesize, + ns.bse.filename, + ns.bse.inexistent, + }) + self.assertSetEqual(lib.predicates, { + ns.bse.author, + }) + self.assertEqual(lib.schema, schema.Schema.from_string(''' + prefix rdfs: + prefix xsd: + prefix bsfs: + prefix bse: + # essential nodes + bsfs:Entity rdfs:subClassOf bsfs:Node . + # common definitions + xsd:string rdfs:subClassOf bsfs:Literal . + + bse:author rdfs:subClassOf bsfs:Predicate ; + rdfs:domain bsfs:Entity ; + rdfs:range xsd:string ; + bsfs:unique "true"^^xsd:boolean . + + ''')) + + # specify collect and discard + lib = BSIE(self.pipeline, + collect={ns.bse.filesize, ns.bse.author, ns.bse.foo, ns.bse.bar}, + discard={ns.bse.author, ns.bse.foo, ns.bse.foobar}, + ) + self.assertSetEqual(lib.predicates, { + ns.bse.filesize, + }) + self.assertEqual(lib.schema, schema.Schema.from_string(''' + prefix rdfs: + prefix xsd: + prefix bsfs: + prefix bse: + # essential nodes + bsfs:Entity rdfs:subClassOf bsfs:Node . + # common definitions + xsd:integer rdfs:subClassOf bsfs:Literal . + + bse:filesize rdfs:subClassOf bsfs:Predicate ; + rdfs:domain bsfs:Entity ; + rdfs:range xsd:integer; + bsfs:unique "false"^^xsd:boolean . + + ''')) + + + def test_from_file(self): + # setup + lib = BSIE(self.pipeline) + self.assertSetEqual(set(lib.predicates), { + ns.bse.filesize, + ns.bse.filename, + ns.bse.author, + }) + content_hash = 'a948904f2f0f479b8f8197694b30184b0d2ed1c1cd2a1ec0fb85d299a192a447' + subject = Node(ns.bsfs.Entity, self.prefix + content_hash) + testfile = os.path.join(os.path.dirname(__file__), 'testfile.t') + + # from_file extracts all available triples + self.assertSetEqual(set(lib.from_file(testfile)), { + (subject, lib.schema.predicate(ns.bse.filename), 'testfile.t'), + (subject, lib.schema.predicate(ns.bse.filesize), 12), + (subject, lib.schema.predicate(ns.bse.author), 'Me, myself, and I'), + }) + + # from_file respects predicate argument + self.assertSetEqual(set(lib.from_file(testfile, {ns.bse.filename, ns.bse.invalid})), { + (subject, lib.schema.predicate(ns.bse.filename), 'testfile.t'), + }) + + +## main ## + +if __name__ == '__main__': + unittest.main() + +## EOF ## diff --git a/test/lib/testfile.t b/test/lib/testfile.t new file mode 100644 index 0000000..3b18e51 --- /dev/null +++ b/test/lib/testfile.t @@ -0,0 +1 @@ +hello world diff --git a/test/tools/test_pipeline.py b/test/tools/test_pipeline.py index f98b329..0dd8c75 100644 --- a/test/tools/test_pipeline.py +++ b/test/tools/test_pipeline.py @@ -95,7 +95,7 @@ class TestPipeline(unittest.TestCase): # build pipeline pipeline = Pipeline(self.prefix, self.ext2rdr) # build objects for tests - content_hash = 'e3bb4ab54e4a50d75626a1f76814f152f4edc60a82ad724aa2aa922ca5534427' + content_hash = 'a948904f2f0f479b8f8197694b30184b0d2ed1c1cd2a1ec0fb85d299a192a447' subject = Node(ns.bsfs.Entity, self.prefix + content_hash) testfile = os.path.join(os.path.dirname(__file__), 'testfile.t') p_filename = pipeline.schema.predicate(ns.bse.filename) @@ -108,7 +108,7 @@ class TestPipeline(unittest.TestCase): # extract given predicates self.assertSetEqual(set(pipeline(testfile, {p_filename, p_filesize})), { (subject, p_filename, 'testfile.t'), - (subject, p_filesize, 11), + (subject, p_filesize, 12), }) self.assertSetEqual(set(pipeline(testfile, {p_author})), { (subject, p_author, 'Me, myself, and I'), @@ -117,12 +117,12 @@ class TestPipeline(unittest.TestCase): (subject, p_filename, 'testfile.t'), }) self.assertSetEqual(set(pipeline(testfile, {p_filesize})), { - (subject, p_filesize, 11), + (subject, p_filesize, 12), }) # extract all predicates self.assertSetEqual(set(pipeline(testfile)), { (subject, p_filename, 'testfile.t'), - (subject, p_filesize, 11), + (subject, p_filesize, 12), (subject, p_author, 'Me, myself, and I'), (subject, p_rating, 123), }) @@ -158,6 +158,18 @@ class TestPipeline(unittest.TestCase): p_filename = pipeline.schema.predicate(ns.bse.filename) self.assertSetEqual(set(pipeline(testfile, {p_filename})), set()) + def test_predicates(self): + # build pipeline + pipeline = Pipeline(self.prefix, self.ext2rdr) + # + self.assertSetEqual(set(pipeline.predicates()), { + pipeline.schema.predicate(ns.bsfs.Predicate), + pipeline.schema.predicate(ns.bse.filename), + pipeline.schema.predicate(ns.bse.filesize), + pipeline.schema.predicate(ns.bse.author), + pipeline.schema.predicate(ns.bse.rating), + }) + ## main ## diff --git a/test/tools/testfile.t b/test/tools/testfile.t index 58bf1b8..3b18e51 100644 --- a/test/tools/testfile.t +++ b/test/tools/testfile.t @@ -1 +1 @@ -hello worl +hello world -- cgit v1.2.3 From 3dc3e9a9b0fc8c9727f91359814866d3deae6e79 Mon Sep 17 00:00:00 2001 From: Matthias Baumgartner Date: Thu, 15 Dec 2022 16:42:07 +0100 Subject: minor fixes and comments --- .pylintrc | 2 +- bsie/__init__.py | 5 ++--- bsie/base/extractor.py | 9 +++++++-- bsie/utils/namespaces.py | 1 + 4 files changed, 11 insertions(+), 6 deletions(-) diff --git a/.pylintrc b/.pylintrc index 3cfae38..1b34854 100644 --- a/.pylintrc +++ b/.pylintrc @@ -148,7 +148,7 @@ logging-format-style=old [MISCELLANEOUS] # List of note tags to take in consideration, separated by a comma. -notes=FIXME,TODO,NOTE +notes=FIXME,TODO diff --git a/bsie/__init__.py b/bsie/__init__.py index 2b874bd..96e6953 100644 --- a/bsie/__init__.py +++ b/bsie/__init__.py @@ -9,9 +9,8 @@ import collections import typing # constants -version_info = collections.namedtuple('version_info', - ('major', 'minor', 'micro')) \ - (0, 0, 1) +T_VERSION_INFO = collections.namedtuple('T_VERSION_INFO', ('major', 'minor', 'micro')) +version_info = T_VERSION_INFO(0, 0, 1) # exports __all__: typing.Sequence[str] = [] diff --git a/bsie/base/extractor.py b/bsie/base/extractor.py index 75b7173..bfa403c 100644 --- a/bsie/base/extractor.py +++ b/bsie/base/extractor.py @@ -20,7 +20,7 @@ __all__: typing.Sequence[str] = ( # constants # essential definitions typically used in extractor schemas. -# NOTE: The definition here is only for convenience; Each Extractor must implement its use, if so desired. +# NOTE: This preamble is only for convenience; Each Extractor must implement its use, if so desired. SCHEMA_PREAMBLE = ''' # common external prefixes prefix rdf: @@ -45,7 +45,12 @@ SCHEMA_PREAMBLE = ''' ## code ## class Extractor(abc.ABC): - """Produce (node, predicate, value)-triples from some content.""" + """Produce (subject, predicate, value)-triples from some content. + The Extractor produces princpal predicates that provide information + about the content itself (i.e., triples that include the subject), + and may also generate triples with auxiliary predicates if the + extracted value is a node itself. + """ # what type of content is expected (i.e. reader subclass). CONTENT_READER: typing.Optional[str] = None diff --git a/bsie/utils/namespaces.py b/bsie/utils/namespaces.py index 2fcb2dc..d6e1c72 100644 --- a/bsie/utils/namespaces.py +++ b/bsie/utils/namespaces.py @@ -21,6 +21,7 @@ __all__: typing.Sequence[str] = ( 'bse', 'bsfs', 'bsm', + 'xsd', ) ## EOF ## -- cgit v1.2.3 From 49cf03fc212c813862453de5352436dc90d1e458 Mon Sep 17 00:00:00 2001 From: Matthias Baumgartner Date: Thu, 15 Dec 2022 16:50:53 +0100 Subject: imports and init files --- bsie/apps/index.py | 7 ++----- bsie/base/__init__.py | 8 ++++---- bsie/base/reader.py | 8 ++++---- bsie/lib/__init__.py | 7 ++++++- bsie/reader/stat.py | 2 +- bsie/tools/__init__.py | 4 ++-- bsie/utils/bsfs.py | 3 ++- bsie/utils/node.py | 18 +++++++++--------- test/base/test_extractor.py | 9 ++++----- test/base/test_reader.py | 4 ++-- test/extractor/generic/test_constant.py | 5 ++--- test/extractor/generic/test_path.py | 10 ++++------ test/extractor/generic/test_stat.py | 10 ++++------ 13 files changed, 46 insertions(+), 49 deletions(-) diff --git a/bsie/apps/index.py b/bsie/apps/index.py index 821aa4c..aa26d0f 100644 --- a/bsie/apps/index.py +++ b/bsie/apps/index.py @@ -9,14 +9,11 @@ import argparse import os import typing -# bsfs imports -import bsfs - # bsie imports from bsie.base import errors -from bsie.lib.bsie import BSIE +from bsie.lib import BSIE from bsie.tools import builder -from bsie.utils.bsfs import URI +from bsie.utils import bsfs # exports __all__: typing.Sequence[str] = ( diff --git a/bsie/base/__init__.py b/bsie/base/__init__.py index 0154862..0d362cd 100644 --- a/bsie/base/__init__.py +++ b/bsie/base/__init__.py @@ -11,14 +11,14 @@ import typing # inner-module imports from . import errors -from . import extractor -from . import reader +from .extractor import Extractor +from .reader import Reader # exports __all__: typing.Sequence[str] = ( + 'Extractor', + 'Reader', 'errors', - 'extractor', - 'reader', ) ## EOF ## diff --git a/bsie/base/reader.py b/bsie/base/reader.py index b7eabf7..cbabd36 100644 --- a/bsie/base/reader.py +++ b/bsie/base/reader.py @@ -13,7 +13,7 @@ import abc import typing # bsie imports -from bsie.utils.bsfs import URI, typename +from bsie.utils import bsfs # exports __all__: typing.Sequence[str] = ( @@ -27,10 +27,10 @@ class Reader(abc.ABC): """Read and return some content from a file.""" def __str__(self) -> str: - return typename(self) + return bsfs.typename(self) def __repr__(self) -> str: - return f'{typename(self)}()' + return f'{bsfs.typename(self)}()' def __eq__(self, other: typing.Any) -> bool: return isinstance(other, type(self)) @@ -39,7 +39,7 @@ class Reader(abc.ABC): return hash(type(self)) @abc.abstractmethod - def __call__(self, path: URI) -> typing.Any: + def __call__(self, path: bsfs.URI) -> typing.Any: """Return some content of the file at *path*. Raises a `ReaderError` if the reader cannot make sense of the file format. """ diff --git a/bsie/lib/__init__.py b/bsie/lib/__init__.py index f6c9018..578c2c4 100644 --- a/bsie/lib/__init__.py +++ b/bsie/lib/__init__.py @@ -7,7 +7,12 @@ Author: Matthias Baumgartner, 2022 # imports import typing +# inner-module imports +from .bsie import BSIE + # exports -__all__: typing.Sequence[str] = [] +__all__: typing.Sequence[str] = ( + 'BSIE', + ) ## EOF ## diff --git a/bsie/reader/stat.py b/bsie/reader/stat.py index 592d912..fc5fb24 100644 --- a/bsie/reader/stat.py +++ b/bsie/reader/stat.py @@ -9,7 +9,7 @@ import os import typing # bsie imports -from bsie.base import reader, errors +from bsie.base import errors, reader # exports __all__: typing.Sequence[str] = ( diff --git a/bsie/tools/__init__.py b/bsie/tools/__init__.py index 8ca9620..803c321 100644 --- a/bsie/tools/__init__.py +++ b/bsie/tools/__init__.py @@ -9,12 +9,12 @@ import typing # inner-module imports from . import builder -from . import pipeline +from .pipeline import Pipeline # exports __all__: typing.Sequence[str] = ( 'builder', - 'pipeline', + 'Pipeline', ) ## EOF ## diff --git a/bsie/utils/bsfs.py b/bsie/utils/bsfs.py index a4b7626..c48049d 100644 --- a/bsie/utils/bsfs.py +++ b/bsie/utils/bsfs.py @@ -8,13 +8,14 @@ Author: Matthias Baumgartner, 2022 import typing # bsfs imports -from bsfs import schema +from bsfs import Open, schema from bsfs.namespace import Namespace from bsfs.utils import URI, typename, uuid # exports __all__: typing.Sequence[str] = ( 'Namespace', + 'Open', 'URI', 'schema', 'typename', diff --git a/bsie/utils/node.py b/bsie/utils/node.py index c9c494f..ecf39cd 100644 --- a/bsie/utils/node.py +++ b/bsie/utils/node.py @@ -8,7 +8,7 @@ Author: Matthias Baumgartner, 2022 import typing # bsie imports -from bsie.utils.bsfs import URI, typename +from bsie.utils import bsfs # exports __all__: typing.Sequence[str] = ( @@ -22,19 +22,19 @@ class Node(): """Lightweight Node, disconnected from any bsfs structures.""" # node type. - node_type: URI + node_type: bsfs.URI # node URI. - uri: URI + uri: bsfs.URI def __init__( self, - node_type: URI, - uri: URI, + node_type: bsfs.URI, + uri: bsfs.URI, ): # assign members - self.node_type = URI(node_type) - self.uri = URI(uri) + self.node_type = bsfs.URI(node_type) + self.uri = bsfs.URI(uri) def __eq__(self, other: typing.Any) -> bool: return isinstance(other, Node) \ @@ -45,9 +45,9 @@ class Node(): return hash((type(self), self.node_type, self.uri)) def __str__(self) -> str: - return f'{typename(self)}({self.node_type}, {self.uri})' + return f'{bsfs.typename(self)}({self.node_type}, {self.uri})' def __repr__(self) -> str: - return f'{typename(self)}({self.node_type}, {self.uri})' + return f'{bsfs.typename(self)}({self.node_type}, {self.uri})' ## EOF ## diff --git a/test/base/test_extractor.py b/test/base/test_extractor.py index be876ad..5410ae0 100644 --- a/test/base/test_extractor.py +++ b/test/base/test_extractor.py @@ -8,8 +8,7 @@ Author: Matthias Baumgartner, 2022 import unittest # bsie imports -from bsie.utils import ns -from bsie.utils.bsfs import schema as _schema, URI +from bsie.utils import bsfs, ns # objects to test from bsie.base import extractor @@ -19,7 +18,7 @@ from bsie.base import extractor class StubExtractor(extractor.Extractor): def __init__(self): - super().__init__(_schema.Schema.from_string(extractor.SCHEMA_PREAMBLE + ''' + super().__init__(bsfs.schema.Schema.from_string(extractor.SCHEMA_PREAMBLE + ''' bse:author rdfs:subClassOf bsfs:Predicate ; rdfs:domain bsfs:Entity ; rdfs:range xsd:string ; @@ -53,9 +52,9 @@ class TestExtractor(unittest.TestCase): self.assertNotEqual(hash(ext), hash(sub)) def test_predicates(self): - schema = _schema.Schema.Empty() + schema = bsfs.schema.Schema.Empty() entity = schema.node(ns.bsfs.Node).get_child(ns.bsfs.Entity) - string = schema.literal(ns.bsfs.Literal).get_child(URI('http://www.w3.org/2001/XMLSchema#string')) + string = schema.literal(ns.bsfs.Literal).get_child(bsfs.URI('http://www.w3.org/2001/XMLSchema#string')) p_author = schema.predicate(ns.bsfs.Predicate).get_child(ns.bse.author, domain=entity, range=string) p_comment = schema.predicate(ns.bsfs.Predicate).get_child(ns.bse.comment, domain=entity, range=string) ext = StubExtractor() diff --git a/test/base/test_reader.py b/test/base/test_reader.py index 802b314..a907eb9 100644 --- a/test/base/test_reader.py +++ b/test/base/test_reader.py @@ -8,12 +8,12 @@ Author: Matthias Baumgartner, 2022 import unittest # objects to test -from bsie.base import reader +from bsie import base ## code ## -class StubReader(reader.Reader): +class StubReader(base.Reader): def __call__(self, path): raise NotImplementedError() diff --git a/test/extractor/generic/test_constant.py b/test/extractor/generic/test_constant.py index 7f72ccf..9dbaced 100644 --- a/test/extractor/generic/test_constant.py +++ b/test/extractor/generic/test_constant.py @@ -8,8 +8,7 @@ Author: Matthias Baumgartner, 2022 import unittest # bsie imports -from bsie.utils import ns -from bsie.utils.node import Node +from bsie.utils import node as _node, ns # objects to test from bsie.extractor.generic.constant import Constant @@ -34,7 +33,7 @@ class TestConstant(unittest.TestCase): (ns.bse.comment, 'the quick brown fox jumps over the lazy dog.'), ] ext = Constant(schema, tuples) - node = Node(ns.bsfs.Entity, '') # Blank node + node = _node.Node(ns.bsfs.Entity, '') # Blank node p_author = ext.schema.predicate(ns.bse.author) p_comment = ext.schema.predicate(ns.bse.comment) entity = ext.schema.node(ns.bsfs.Node).get_child(ns.bsfs.Entity) diff --git a/test/extractor/generic/test_path.py b/test/extractor/generic/test_path.py index aa21b04..d2b6c61 100644 --- a/test/extractor/generic/test_path.py +++ b/test/extractor/generic/test_path.py @@ -8,10 +8,8 @@ Author: Matthias Baumgartner, 2022 import unittest # bsie imports -from bsie import base -from bsie.utils import ns -from bsie.utils.bsfs import schema -from bsie.utils.node import Node +from bsie.base import extractor +from bsie.utils import bsfs, node as _node, ns # objects to test from bsie.extractor.generic.path import Path @@ -31,7 +29,7 @@ class TestPath(unittest.TestCase): def test_schema(self): self.assertEqual(Path().schema, - schema.Schema.from_string(base.extractor.SCHEMA_PREAMBLE + ''' + bsfs.schema.Schema.from_string(extractor.SCHEMA_PREAMBLE + ''' bse:filename rdfs:subClassOf bsfs:Predicate ; rdfs:domain bsfs:Entity ; rdfs:range xsd:string ; @@ -40,7 +38,7 @@ class TestPath(unittest.TestCase): def test_extract(self): ext = Path() - node = Node(ns.bsfs.Entity, '') # Blank node + node = _node.Node(ns.bsfs.File, '') # Blank node content = '/tmp/foo/bar' p_filename = ext.schema.predicate(ns.bse.filename) entity = ext.schema.node(ns.bsfs.Node).get_child(ns.bsfs.Entity) diff --git a/test/extractor/generic/test_stat.py b/test/extractor/generic/test_stat.py index bed5fab..6cfc57f 100644 --- a/test/extractor/generic/test_stat.py +++ b/test/extractor/generic/test_stat.py @@ -9,10 +9,8 @@ import os import unittest # bsie imports -from bsie import base -from bsie.utils import ns -from bsie.utils.bsfs import schema -from bsie.utils.node import Node +from bsie.base import extractor +from bsie.utils import bsfs, node as _node, ns # objects to test from bsie.extractor.generic.stat import Stat @@ -32,7 +30,7 @@ class TestStat(unittest.TestCase): def test_schema(self): self.assertEqual(Stat().schema, - schema.Schema.from_string(base.extractor.SCHEMA_PREAMBLE + ''' + bsfs.schema.Schema.from_string(extractor.SCHEMA_PREAMBLE + ''' bse:filesize rdfs:subClassOf bsfs:Predicate ; rdfs:domain bsfs:Entity ; rdfs:range xsd:integer ; @@ -41,7 +39,7 @@ class TestStat(unittest.TestCase): def test_extract(self): ext = Stat() - node = Node(ns.bsfs.Entity, '') # Blank node + node = _node.Node(ns.bsfs.File, '') # Blank node content = os.stat(__file__) p_filesize = ext.schema.predicate(ns.bse.filesize) entity = ext.schema.node(ns.bsfs.Node).get_child(ns.bsfs.Entity) -- cgit v1.2.3 From 3b7fee369924eb7704709edeb8c17fff9c020dfb Mon Sep 17 00:00:00 2001 From: Matthias Baumgartner Date: Thu, 15 Dec 2022 17:06:09 +0100 Subject: import fixes --- bsie/base/extractor.py | 5 +++-- bsie/extractor/generic/constant.py | 9 +++++---- bsie/extractor/generic/path.py | 6 +++--- bsie/extractor/generic/stat.py | 6 +++--- bsie/lib/bsie.py | 11 ++++++----- bsie/tools/builder.py | 17 +++++++++-------- bsie/tools/pipeline.py | 6 +++--- test/lib/test_bsie.py | 6 +++--- test/tools/test_builder.py | 19 +++++++++---------- test/tools/test_pipeline.py | 9 ++++----- test/utils/test_node.py | 17 ++++++++--------- 11 files changed, 56 insertions(+), 55 deletions(-) diff --git a/bsie/base/extractor.py b/bsie/base/extractor.py index bfa403c..a5c7846 100644 --- a/bsie/base/extractor.py +++ b/bsie/base/extractor.py @@ -11,6 +11,7 @@ import typing # bsie imports from bsie.utils import node from bsie.utils.bsfs import schema as _schema, typename +from bsie.utils import bsfs, node, ns # exports __all__: typing.Sequence[str] = ( @@ -62,10 +63,10 @@ class Extractor(abc.ABC): self.schema = schema def __str__(self) -> str: - return typename(self) + return bsfs.typename(self) def __repr__(self) -> str: - return f'{typename(self)}()' + return f'{bsfs.typename(self)}()' def __eq__(self, other: typing.Any) -> bool: return isinstance(other, type(self)) \ diff --git a/bsie/extractor/generic/constant.py b/bsie/extractor/generic/constant.py index 7da792a..f9e3415 100644 --- a/bsie/extractor/generic/constant.py +++ b/bsie/extractor/generic/constant.py @@ -11,6 +11,7 @@ import typing from bsie.base import extractor from bsie.utils.bsfs import URI, schema as _schema from bsie.utils.node import Node +from bsie.utils import bsfs, node # exports __all__: typing.Sequence[str] = ( @@ -26,14 +27,14 @@ class Constant(extractor.Extractor): CONTENT_READER = None # predicate/value pairs to be produced. - _tuples: typing.Tuple[typing.Tuple[_schema.Predicate, typing.Any], ...] + _tuples: typing.Tuple[typing.Tuple[bsfs.schema.Predicate, typing.Any], ...] def __init__( self, schema: str, - tuples: typing.Iterable[typing.Tuple[URI, typing.Any]], + tuples: typing.Iterable[typing.Tuple[bsfs.URI, typing.Any]], ): - super().__init__(_schema.Schema.from_string(extractor.SCHEMA_PREAMBLE + schema)) + super().__init__(bsfs.schema.Schema.from_string(extractor.SCHEMA_PREAMBLE + schema)) # NOTE: Raises a KeyError if the predicate is not part of the schema self._tuples = tuple((self.schema.predicate(p_uri), value) for p_uri, value in tuples) # FIXME: use schema instance for value checking @@ -47,7 +48,7 @@ class Constant(extractor.Extractor): def extract( self, - subject: Node, + subject: node.Node, content: None, predicates: typing.Iterable[_schema.Predicate], ) -> typing.Iterator[typing.Tuple[Node, _schema.Predicate, typing.Any]]: diff --git a/bsie/extractor/generic/path.py b/bsie/extractor/generic/path.py index e6b901e..2cc592a 100644 --- a/bsie/extractor/generic/path.py +++ b/bsie/extractor/generic/path.py @@ -10,8 +10,8 @@ import typing # bsie imports from bsie.base import extractor -from bsie.utils import node, ns from bsie.utils.bsfs import schema +from bsie.utils import bsfs, node, ns # exports __all__: typing.Sequence[str] = ( @@ -27,10 +27,10 @@ class Path(extractor.Extractor): CONTENT_READER = 'bsie.reader.path.Path' # mapping from predicate to handler function. - _callmap: typing.Dict[schema.Predicate, typing.Callable[[str], typing.Any]] + _callmap: typing.Dict[bsfs.schema.Predicate, typing.Callable[[str], typing.Any]] def __init__(self): - super().__init__(schema.Schema.from_string(extractor.SCHEMA_PREAMBLE + ''' + super().__init__(bsfs.schema.Schema.from_string(extractor.SCHEMA_PREAMBLE + ''' bse:filename rdfs:subClassOf bsfs:Predicate ; rdfs:domain bsfs:Entity ; rdfs:range xsd:string ; diff --git a/bsie/extractor/generic/stat.py b/bsie/extractor/generic/stat.py index 6493d37..dfde7d2 100644 --- a/bsie/extractor/generic/stat.py +++ b/bsie/extractor/generic/stat.py @@ -10,8 +10,8 @@ import typing # bsie imports from bsie.base import extractor -from bsie.utils import node, ns from bsie.utils.bsfs import schema as _schema +from bsie.utils import bsfs, node, ns # exports __all__: typing.Sequence[str] = ( @@ -27,10 +27,10 @@ class Stat(extractor.Extractor): CONTENT_READER = 'bsie.reader.stat.Stat' # mapping from predicate to handler function. - _callmap: typing.Dict[_schema.Predicate, typing.Callable[[os.stat_result], typing.Any]] + _callmap: typing.Dict[bsfs.schema.Predicate, typing.Callable[[os.stat_result], typing.Any]] def __init__(self): - super().__init__(_schema.Schema.from_string(extractor.SCHEMA_PREAMBLE + ''' + super().__init__(bsfs.schema.Schema.from_string(extractor.SCHEMA_PREAMBLE + ''' bse:filesize rdfs:subClassOf bsfs:Predicate ; rdfs:domain bsfs:Entity ; rdfs:range xsd:integer ; diff --git a/bsie/lib/bsie.py b/bsie/lib/bsie.py index aeccc8c..3aeee2b 100644 --- a/bsie/lib/bsie.py +++ b/bsie/lib/bsie.py @@ -9,8 +9,9 @@ import typing # bsie imports from bsie.tools.pipeline import Pipeline -from bsie.utils import node, ns from bsie.utils.bsfs import URI, schema as schema_ +from bsie.tools import Pipeline +from bsie.utils import bsfs, node, ns # exports __all__: typing.Sequence[str] = ( @@ -39,10 +40,10 @@ class BSIE(): self, # pipeline builder. pipeline: Pipeline, - # predicates to extract at most. None implies all available w.r.t. extractors. - collect: typing.Optional[typing.Iterable[URI]] = None, - # predicates to discard. - discard: typing.Optional[typing.Iterable[URI]] = None, + # principals to extract at most. None implies all available w.r.t. extractors. + collect: typing.Optional[typing.Iterable[bsfs.URI]] = None, + # principals to discard. + discard: typing.Optional[typing.Iterable[bsfs.URI]] = None, ): # store pipeline self.pipeline = pipeline diff --git a/bsie/tools/builder.py b/bsie/tools/builder.py index 8f7a410..8c6b931 100644 --- a/bsie/tools/builder.py +++ b/bsie/tools/builder.py @@ -13,6 +13,7 @@ import typing from bsie import base from bsie.base import errors from bsie.utils.bsfs import URI, typename +from bsie.utils import bsfs # inner-module imports from . import pipeline @@ -61,7 +62,7 @@ def _unpack_name(name): class ReaderBuilder(): - """Build `bsie.base.reader.Reader` instances. + """Build `bsie.base.Reader` instances. Readers are defined via their qualified class name (e.g., bsie.reader.path.Path) and optional keyword @@ -83,7 +84,7 @@ class ReaderBuilder(): self.kwargs = kwargs self.cache = {} - def build(self, name: str) -> base.reader.Reader: + def build(self, name: str) -> base.Reader: """Return an instance for the qualified class name.""" # return cached instance if name in self.cache: @@ -98,7 +99,7 @@ class ReaderBuilder(): # get kwargs kwargs = self.kwargs.get(name, {}) if not isinstance(kwargs, dict): - raise TypeError(f'expected a kwargs dict, found {typename(kwargs)}') + raise TypeError(f'expected a kwargs dict, found {bsfs.typename(kwargs)}') try: # build, cache, and return instance obj = cls(**kwargs) @@ -108,11 +109,11 @@ class ReaderBuilder(): return obj except Exception as err: - raise errors.BuilderError(f'failed to build reader {name} due to {typename(err)}: {err}') from err + raise errors.BuilderError(f'failed to build reader {name} due to {bsfs.typename(err)}: {err}') from err class ExtractorBuilder(): - """Build `bsie.base.extractor.Extractor instances. + """Build `bsie.base.Extractor instances. It is permissible to build multiple instances of the same extractor (typically with different arguments), hence the ExtractorBuilder @@ -133,14 +134,14 @@ class ExtractorBuilder(): """Iterate over extractor specifications.""" return iter(range(len(self.specs))) - def build(self, index: int) -> base.extractor.Extractor: + def build(self, index: int) -> base.Extractor: """Return an instance of the n'th extractor (n=*index*).""" # get build instructions specs = self.specs[index] # check specs structure. expecting[{name: {kwargs}}] if not isinstance(specs, dict): - raise TypeError(f'expected a dict, found {typename(specs)}') + raise TypeError(f'expected a dict, found {bsfs.typename(specs)}') if len(specs) != 1: raise TypeError(f'expected a dict of length one, found {len(specs)}') @@ -150,7 +151,7 @@ class ExtractorBuilder(): # check kwargs structure if not isinstance(kwargs, dict): - raise TypeError(f'expected a dict, found {typename(kwargs)}') + raise TypeError(f'expected a dict, found {bsfs.typename(kwargs)}') # check name and get module/class components module_name, class_name = _unpack_name(name) diff --git a/bsie/tools/pipeline.py b/bsie/tools/pipeline.py index da422c0..7fdd935 100644 --- a/bsie/tools/pipeline.py +++ b/bsie/tools/pipeline.py @@ -11,9 +11,9 @@ import typing # bsie imports from bsie import base -from bsie.utils import ns from bsie.utils.node import Node from bsie.utils.bsfs import schema as _schema, URI, uuid as _uuid, typename +from bsie.utils import bsfs, node, ns # exports __all__: typing.Sequence[str] = ( @@ -56,10 +56,10 @@ class Pipeline(): self.schema = _schema.Schema.Union(ext.schema for ext in ext2rdr) def __str__(self) -> str: - return typename(self) + return bsfs.typename(self) def __repr__(self) -> str: - return f'{typename(self)}(...)' + return f'{bsfs.typename(self)}(...)' def __hash__(self) -> int: return hash((type(self), self._prefix, self.schema, tuple(self._ext2rdr), tuple(self._ext2rdr.values()))) diff --git a/test/lib/test_bsie.py b/test/lib/test_bsie.py index 277ac67..5b71752 100644 --- a/test/lib/test_bsie.py +++ b/test/lib/test_bsie.py @@ -9,10 +9,11 @@ import os import unittest # bsie imports +from bsie.base import extractor from bsie.tools import builder -from bsie.utils import ns from bsie.utils.bsfs import URI, schema from bsie.utils.node import Node +from bsie.utils import bsfs, node, ns # objects to test from bsie.lib.bsie import BSIE @@ -76,7 +77,6 @@ class TestBSIE(unittest.TestCase): rdfs:domain bsfs:Entity ; rdfs:range xsd:string ; bsfs:unique "true"^^xsd:boolean . - ''')) # specify collect @@ -207,7 +207,7 @@ class TestBSIE(unittest.TestCase): ns.bse.author, }) content_hash = 'a948904f2f0f479b8f8197694b30184b0d2ed1c1cd2a1ec0fb85d299a192a447' - subject = Node(ns.bsfs.Entity, self.prefix + content_hash) + subject = node.Node(ns.bsfs.File, self.prefix + 'file#' + content_hash) testfile = os.path.join(os.path.dirname(__file__), 'testfile.t') # from_file extracts all available triples diff --git a/test/tools/test_builder.py b/test/tools/test_builder.py index bc6f903..62c637c 100644 --- a/test/tools/test_builder.py +++ b/test/tools/test_builder.py @@ -10,8 +10,7 @@ import unittest # bsie imports from bsie import base -from bsie.base import errors -from bsie.utils.bsfs import URI +from bsie.utils import bsfs # objects to test from bsie.tools.builder import ExtractorBuilder @@ -26,12 +25,12 @@ from bsie.tools.builder import _unpack_name class TestUtils(unittest.TestCase): def test_safe_load(self): # invalid module - self.assertRaises(errors.LoaderError, _safe_load, 'dBGHMSAYOoKeKMpywDoKZQycENFPvN', 'foobar') - self.assertRaises(errors.LoaderError, _safe_load, 'dBGHMSAYOoKeKMpywDoKZQycENFPvN.bar', 'foobar') + self.assertRaises(base.errors.LoaderError, _safe_load, 'dBGHMSAYOoKeKMpywDoKZQycENFPvN', 'foobar') + self.assertRaises(base.errors.LoaderError, _safe_load, 'dBGHMSAYOoKeKMpywDoKZQycENFPvN.bar', 'foobar') # partially valid module - self.assertRaises(errors.LoaderError, _safe_load, 'os.foo', 'foobar') + self.assertRaises(base.errors.LoaderError, _safe_load, 'os.foo', 'foobar') # invalid class - self.assertRaises(errors.LoaderError, _safe_load, 'os.path', 'foo') + self.assertRaises(base.errors.LoaderError, _safe_load, 'os.path', 'foo') # valid module and class cls = _safe_load('collections.abc', 'Container') import collections.abc @@ -65,10 +64,10 @@ class TestReaderBuilder(unittest.TestCase): self.assertRaises(TypeError, builder.build, None) self.assertRaises(ValueError, builder.build, '') self.assertRaises(ValueError, builder.build, 'Path') - self.assertRaises(errors.BuilderError, builder.build, 'path.Path') + self.assertRaises(base.errors.BuilderError, builder.build, 'path.Path') # invalid config builder = ReaderBuilder({'bsie.reader.stat.Stat': dict(foo=123)}) - self.assertRaises(errors.BuilderError, builder.build, 'bsie.reader.stat.Stat') + self.assertRaises(base.errors.BuilderError, builder.build, 'bsie.reader.stat.Stat') builder = ReaderBuilder({'bsie.reader.stat.Stat': 123}) self.assertRaises(TypeError, builder.build, 'bsie.reader.stat.Stat') # no instructions @@ -143,7 +142,7 @@ class TestExtractorBuilder(unittest.TestCase): ])) # building with invalid args - self.assertRaises(errors.BuilderError, ExtractorBuilder( + self.assertRaises(base.errors.BuilderError, ExtractorBuilder( [{'bsie.extractor.generic.path.Path': {'foo': 123}}]).build, 0) # non-dict build specification self.assertRaises(TypeError, ExtractorBuilder( @@ -161,7 +160,7 @@ class TestExtractorBuilder(unittest.TestCase): class TestPipelineBuilder(unittest.TestCase): def test_build(self): - prefix = URI('http://example.com/local/file#') + prefix = bsfs.URI('http://example.com/local/file#') c_schema = ''' bse:author rdfs:subClassOf bsfs:Predicate ; rdfs:domain bsfs:Entity ; diff --git a/test/tools/test_pipeline.py b/test/tools/test_pipeline.py index 0dd8c75..92801ed 100644 --- a/test/tools/test_pipeline.py +++ b/test/tools/test_pipeline.py @@ -11,9 +11,8 @@ import unittest # bsie imports from bsie.base import errors -from bsie.utils import ns from bsie.utils.bsfs import URI -from bsie.utils.node import Node +from bsie.utils import bsfs, node, ns import bsie.extractor.generic.constant import bsie.extractor.generic.path import bsie.extractor.generic.stat @@ -68,8 +67,8 @@ class TestPipeline(unittest.TestCase): self.assertEqual(hash(pipeline), hash(Pipeline(self.prefix, self.ext2rdr))) # equivalence respects prefix - self.assertNotEqual(pipeline, Pipeline(URI('http://example.com/global/ent#'), self.ext2rdr)) - self.assertNotEqual(hash(pipeline), hash(Pipeline(URI('http://example.com/global/ent#'), self.ext2rdr))) + self.assertNotEqual(pipeline, Pipeline(bsfs.URI('http://example.com/global/ent#'), self.ext2rdr)) + self.assertNotEqual(hash(pipeline), hash(Pipeline(bsfs.URI('http://example.com/global/ent#'), self.ext2rdr))) # equivalence respects extractors/readers ext2rdr = {ext: rdr for idx, (ext, rdr) in enumerate(self.ext2rdr.items()) if idx % 2 == 0} self.assertNotEqual(pipeline, Pipeline(self.prefix, ext2rdr)) @@ -96,7 +95,7 @@ class TestPipeline(unittest.TestCase): pipeline = Pipeline(self.prefix, self.ext2rdr) # build objects for tests content_hash = 'a948904f2f0f479b8f8197694b30184b0d2ed1c1cd2a1ec0fb85d299a192a447' - subject = Node(ns.bsfs.Entity, self.prefix + content_hash) + subject = node.Node(ns.bsfs.File, self.prefix + 'file#' + content_hash) testfile = os.path.join(os.path.dirname(__file__), 'testfile.t') p_filename = pipeline.schema.predicate(ns.bse.filename) p_filesize = pipeline.schema.predicate(ns.bse.filesize) diff --git a/test/utils/test_node.py b/test/utils/test_node.py index 826f199..c70f0b8 100644 --- a/test/utils/test_node.py +++ b/test/utils/test_node.py @@ -8,8 +8,7 @@ Author: Matthias Baumgartner, 2022 import unittest # bsie imports -from bsie.utils.bsfs import URI -from bsie.utils import ns +from bsie.utils import bsfs, ns # objects to test from bsie.utils.node import Node @@ -19,14 +18,14 @@ from bsie.utils.node import Node class TestNode(unittest.TestCase): def test_equality(self): - uri = URI('http://example.com/me/entity#1234') + uri = bsfs.URI('http://example.com/me/entity#1234') node = Node(ns.bsfs.Entity, uri) # basic equivalence - self.assertEqual(node, Node(ns.bsfs.Entity, URI('http://example.com/me/entity#1234'))) - self.assertEqual(hash(node), hash(Node(ns.bsfs.Entity, URI('http://example.com/me/entity#1234')))) + self.assertEqual(node, Node(ns.bsfs.Entity, bsfs.URI('http://example.com/me/entity#1234'))) + self.assertEqual(hash(node), hash(Node(ns.bsfs.Entity, bsfs.URI('http://example.com/me/entity#1234')))) # equality respects uri - self.assertNotEqual(node, Node(ns.bsfs.Entity, URI('http://example.com/me/entity#4321'))) - self.assertNotEqual(hash(node), hash(Node(ns.bsfs.Entity, URI('http://example.com/me/entity#4321')))) + self.assertNotEqual(node, Node(ns.bsfs.Entity, bsfs.URI('http://example.com/me/entity#4321'))) + self.assertNotEqual(hash(node), hash(Node(ns.bsfs.Entity, bsfs.URI('http://example.com/me/entity#4321')))) # equality respects node_type self.assertNotEqual(node, Node(ns.bsfs.Foo, uri)) self.assertNotEqual(hash(node), hash(Node(ns.bsfs.Foo, uri))) @@ -42,7 +41,7 @@ class TestNode(unittest.TestCase): self.assertNotEqual(hash(node), hash(Foo())) def test_str(self): - uri = URI('http://example.com/me/entity#1234') + uri = bsfs.URI('http://example.com/me/entity#1234') # basic string conversion node = Node(ns.bsfs.Entity, uri) self.assertEqual(str(node), 'Node(http://bsfs.ai/schema/Entity, http://example.com/me/entity#1234)') @@ -52,7 +51,7 @@ class TestNode(unittest.TestCase): self.assertEqual(str(node), 'Node(http://bsfs.ai/schema/Foo, http://example.com/me/entity#1234)') self.assertEqual(repr(node), 'Node(http://bsfs.ai/schema/Foo, http://example.com/me/entity#1234)') # string conversion respects uri - node = Node(ns.bsfs.Entity, URI('http://example.com/me/entity#4321')) + node = Node(ns.bsfs.Entity, bsfs.URI('http://example.com/me/entity#4321')) self.assertEqual(str(node), 'Node(http://bsfs.ai/schema/Entity, http://example.com/me/entity#4321)') self.assertEqual(repr(node), 'Node(http://bsfs.ai/schema/Entity, http://example.com/me/entity#4321)') -- cgit v1.2.3 From 8e6d27ea75d2c8d68f6dd8b3d529aaa278f291cc Mon Sep 17 00:00:00 2001 From: Matthias Baumgartner Date: Thu, 15 Dec 2022 17:12:56 +0100 Subject: file node class in default schema --- bsie/base/extractor.py | 17 +++--- bsie/extractor/generic/path.py | 2 +- bsie/extractor/generic/stat.py | 2 +- bsie/tools/pipeline.py | 4 +- test/apps/test_index.py | 106 ++++++++++++++++++------------------ test/extractor/generic/test_path.py | 2 +- test/extractor/generic/test_stat.py | 2 +- test/lib/test_bsie.py | 12 ++-- test/tools/test_pipeline.py | 4 +- 9 files changed, 75 insertions(+), 76 deletions(-) diff --git a/bsie/base/extractor.py b/bsie/base/extractor.py index a5c7846..678dcec 100644 --- a/bsie/base/extractor.py +++ b/bsie/base/extractor.py @@ -35,6 +35,7 @@ SCHEMA_PREAMBLE = ''' # essential nodes bsfs:Entity rdfs:subClassOf bsfs:Node . + bsfs:File rdfs:subClassOf bsfs:Entity . # common definitions xsd:string rdfs:subClassOf bsfs:Literal . @@ -77,15 +78,13 @@ class Extractor(abc.ABC): return hash((type(self), self.CONTENT_READER, self.schema)) def predicates(self) -> typing.Iterator[_schema.Predicate]: - """Return the predicates that may be part of extracted triples.""" - # NOTE: Some predicates in the schema might not occur in actual triples, - # but are defined due to predicate class hierarchy. E.g., bsfs:Predicate - # is part of every schema but should not be used in triples. - # Announcing all predicates might not be the most efficient way, however, - # it is the most safe one. Concrete extractors that produce additional - # predicates (e.g. auxiliary nodes with their own predicates) should - # overwrite this method to only include the principal predicates. - return self.schema.predicates() + ent = self.schema.node(ns.bsfs.Entity) + return ( + pred + for pred + in self.schema.predicates() + if pred.domain <= ent or (pred.range is not None and pred.range <= ent) + ) @abc.abstractmethod def extract( diff --git a/bsie/extractor/generic/path.py b/bsie/extractor/generic/path.py index 2cc592a..00165e3 100644 --- a/bsie/extractor/generic/path.py +++ b/bsie/extractor/generic/path.py @@ -32,7 +32,7 @@ class Path(extractor.Extractor): def __init__(self): super().__init__(bsfs.schema.Schema.from_string(extractor.SCHEMA_PREAMBLE + ''' bse:filename rdfs:subClassOf bsfs:Predicate ; - rdfs:domain bsfs:Entity ; + rdfs:domain bsfs:File ; rdfs:range xsd:string ; rdfs:label "File name"^^xsd:string ; schema:description "Filename of entity in some filesystem."^^xsd:string ; diff --git a/bsie/extractor/generic/stat.py b/bsie/extractor/generic/stat.py index dfde7d2..0f4267f 100644 --- a/bsie/extractor/generic/stat.py +++ b/bsie/extractor/generic/stat.py @@ -32,7 +32,7 @@ class Stat(extractor.Extractor): def __init__(self): super().__init__(bsfs.schema.Schema.from_string(extractor.SCHEMA_PREAMBLE + ''' bse:filesize rdfs:subClassOf bsfs:Predicate ; - rdfs:domain bsfs:Entity ; + rdfs:domain bsfs:File ; rdfs:range xsd:integer ; rdfs:label "File size"^^xsd:string ; schema:description "File size of entity in some filesystem."^^xsd:string ; diff --git a/bsie/tools/pipeline.py b/bsie/tools/pipeline.py index 7fdd935..3d08993 100644 --- a/bsie/tools/pipeline.py +++ b/bsie/tools/pipeline.py @@ -97,8 +97,8 @@ class Pipeline(): rdr2ext[rdr].add(ext) # create subject for file - uuid = _uuid.UCID.from_path(path) - subject = Node(ns.bsfs.Entity, self._prefix + uuid) + uuid = bsfs.uuid.UCID.from_path(path) + subject = node.Node(ns.bsfs.File, self._prefix + 'file#' + uuid) # extract information for rdr, extrs in rdr2ext.items(): diff --git a/test/apps/test_index.py b/test/apps/test_index.py index 6d47df8..c567dea 100644 --- a/test/apps/test_index.py +++ b/test/apps/test_index.py @@ -31,47 +31,47 @@ class TestIndex(unittest.TestCase): prefix = 'http://example.com/me/file#' self.assertTrue(set(bsfs._Graph__backend.graph).issuperset({ - (rdflib.URIRef(prefix + '2f4109b40107cc50e0884755a1a961ed126887e49b8dbaf0e146b2e226aa6647'), rdflib.RDF.type, rdflib.URIRef(ns.bsfs.Entity)), + (rdflib.URIRef(prefix + '2f4109b40107cc50e0884755a1a961ed126887e49b8dbaf0e146b2e226aa6647'), rdflib.RDF.type, rdflib.URIRef(ns.bsfs.File)), (rdflib.URIRef(prefix + '2f4109b40107cc50e0884755a1a961ed126887e49b8dbaf0e146b2e226aa6647'), rdflib.URIRef(ns.bse.author), rdflib.Literal('Me, myself, and I', datatype=rdflib.XSD.string)), (rdflib.URIRef(prefix + '2f4109b40107cc50e0884755a1a961ed126887e49b8dbaf0e146b2e226aa6647'), rdflib.URIRef(ns.bse.filename), rdflib.Literal('alpha_second', datatype=rdflib.XSD.string)), (rdflib.URIRef(prefix + '2f4109b40107cc50e0884755a1a961ed126887e49b8dbaf0e146b2e226aa6647'), rdflib.URIRef(ns.bse.filesize), rdflib.Literal('696', datatype=rdflib.XSD.integer)), - (rdflib.URIRef(prefix + '441f3d10c8ff489fe8e33e639606512f6c463151cc429de7e554b9af670c2ece'), rdflib.RDF.type, rdflib.URIRef(ns.bsfs.Entity)), + (rdflib.URIRef(prefix + '441f3d10c8ff489fe8e33e639606512f6c463151cc429de7e554b9af670c2ece'), rdflib.RDF.type, rdflib.URIRef(ns.bsfs.File)), (rdflib.URIRef(prefix + '441f3d10c8ff489fe8e33e639606512f6c463151cc429de7e554b9af670c2ece'), rdflib.URIRef(ns.bse.author), rdflib.Literal('Me, myself, and I', datatype=rdflib.XSD.string)), (rdflib.URIRef(prefix + '441f3d10c8ff489fe8e33e639606512f6c463151cc429de7e554b9af670c2ece'), rdflib.URIRef(ns.bse.filename), rdflib.Literal('omega_second', datatype=rdflib.XSD.string)), (rdflib.URIRef(prefix + '441f3d10c8ff489fe8e33e639606512f6c463151cc429de7e554b9af670c2ece'), rdflib.URIRef(ns.bse.filesize), rdflib.Literal('503', datatype=rdflib.XSD.integer)), - (rdflib.URIRef(prefix + '69b98ecf7aff3e95b09688ba93331678eb8397817111f674c9558e6dd8f5e871'), rdflib.RDF.type, rdflib.URIRef(ns.bsfs.Entity)), + (rdflib.URIRef(prefix + '69b98ecf7aff3e95b09688ba93331678eb8397817111f674c9558e6dd8f5e871'), rdflib.RDF.type, rdflib.URIRef(ns.bsfs.File)), (rdflib.URIRef(prefix + '69b98ecf7aff3e95b09688ba93331678eb8397817111f674c9558e6dd8f5e871'), rdflib.URIRef(ns.bse.author), rdflib.Literal('Me, myself, and I', datatype=rdflib.XSD.string)), (rdflib.URIRef(prefix + '69b98ecf7aff3e95b09688ba93331678eb8397817111f674c9558e6dd8f5e871'), rdflib.URIRef(ns.bse.filename), rdflib.Literal('td_first', datatype=rdflib.XSD.string)), (rdflib.URIRef(prefix + '69b98ecf7aff3e95b09688ba93331678eb8397817111f674c9558e6dd8f5e871'), rdflib.URIRef(ns.bse.filesize), rdflib.Literal('911', datatype=rdflib.XSD.integer)), - (rdflib.URIRef(prefix + '78f7eb7f0d8221cdb2cb26c978fa42a11f75eb87becc768f4474134cb1e06926'), rdflib.RDF.type, rdflib.URIRef(ns.bsfs.Entity)), + (rdflib.URIRef(prefix + '78f7eb7f0d8221cdb2cb26c978fa42a11f75eb87becc768f4474134cb1e06926'), rdflib.RDF.type, rdflib.URIRef(ns.bsfs.File)), (rdflib.URIRef(prefix + '78f7eb7f0d8221cdb2cb26c978fa42a11f75eb87becc768f4474134cb1e06926'), rdflib.URIRef(ns.bse.author), rdflib.Literal('Me, myself, and I', datatype=rdflib.XSD.string)), (rdflib.URIRef(prefix + '78f7eb7f0d8221cdb2cb26c978fa42a11f75eb87becc768f4474134cb1e06926'), rdflib.URIRef(ns.bse.filename), rdflib.Literal('testfile', datatype=rdflib.XSD.string)), (rdflib.URIRef(prefix + '78f7eb7f0d8221cdb2cb26c978fa42a11f75eb87becc768f4474134cb1e06926'), rdflib.URIRef(ns.bse.filesize), rdflib.Literal('885', datatype=rdflib.XSD.integer)), - (rdflib.URIRef(prefix + '80818b8ec2ee1919116dba9c8a7e0a4608313cf3b463cd88e9ed77a700dd92d3'), rdflib.RDF.type, rdflib.URIRef(ns.bsfs.Entity)), + (rdflib.URIRef(prefix + '80818b8ec2ee1919116dba9c8a7e0a4608313cf3b463cd88e9ed77a700dd92d3'), rdflib.RDF.type, rdflib.URIRef(ns.bsfs.File)), (rdflib.URIRef(prefix + '80818b8ec2ee1919116dba9c8a7e0a4608313cf3b463cd88e9ed77a700dd92d3'), rdflib.URIRef(ns.bse.author), rdflib.Literal('Me, myself, and I', datatype=rdflib.XSD.string)), (rdflib.URIRef(prefix + '80818b8ec2ee1919116dba9c8a7e0a4608313cf3b463cd88e9ed77a700dd92d3'), rdflib.URIRef(ns.bse.filename), rdflib.Literal('bar_first', datatype=rdflib.XSD.string)), (rdflib.URIRef(prefix + '80818b8ec2ee1919116dba9c8a7e0a4608313cf3b463cd88e9ed77a700dd92d3'), rdflib.URIRef(ns.bse.filesize), rdflib.Literal('956', datatype=rdflib.XSD.integer)), - (rdflib.URIRef(prefix + '976d2ea0e58488678cc7e435fbfadabfb6eb6cf50ad51862f38f73729ed11795'), rdflib.RDF.type, rdflib.URIRef(ns.bsfs.Entity)), + (rdflib.URIRef(prefix + '976d2ea0e58488678cc7e435fbfadabfb6eb6cf50ad51862f38f73729ed11795'), rdflib.RDF.type, rdflib.URIRef(ns.bsfs.File)), (rdflib.URIRef(prefix + '976d2ea0e58488678cc7e435fbfadabfb6eb6cf50ad51862f38f73729ed11795'), rdflib.URIRef(ns.bse.author), rdflib.Literal('Me, myself, and I', datatype=rdflib.XSD.string)), (rdflib.URIRef(prefix + '976d2ea0e58488678cc7e435fbfadabfb6eb6cf50ad51862f38f73729ed11795'), rdflib.URIRef(ns.bse.filename), rdflib.Literal('omega_first', datatype=rdflib.XSD.string)), (rdflib.URIRef(prefix + '976d2ea0e58488678cc7e435fbfadabfb6eb6cf50ad51862f38f73729ed11795'), rdflib.URIRef(ns.bse.filesize), rdflib.Literal('648', datatype=rdflib.XSD.integer)), - (rdflib.URIRef(prefix + '997e2fbb7494a3818ec782d2bc87bf1cffafba6b9c0f658e4a6c18a723e944d3'), rdflib.RDF.type, rdflib.URIRef(ns.bsfs.Entity)), + (rdflib.URIRef(prefix + '997e2fbb7494a3818ec782d2bc87bf1cffafba6b9c0f658e4a6c18a723e944d3'), rdflib.RDF.type, rdflib.URIRef(ns.bsfs.File)), (rdflib.URIRef(prefix + '997e2fbb7494a3818ec782d2bc87bf1cffafba6b9c0f658e4a6c18a723e944d3'), rdflib.URIRef(ns.bse.author), rdflib.Literal('Me, myself, and I', datatype=rdflib.XSD.string)), (rdflib.URIRef(prefix + '997e2fbb7494a3818ec782d2bc87bf1cffafba6b9c0f658e4a6c18a723e944d3'), rdflib.URIRef(ns.bse.filename), rdflib.Literal('alpha_first', datatype=rdflib.XSD.string)), (rdflib.URIRef(prefix + '997e2fbb7494a3818ec782d2bc87bf1cffafba6b9c0f658e4a6c18a723e944d3'), rdflib.URIRef(ns.bse.filesize), rdflib.Literal('754', datatype=rdflib.XSD.integer)), - (rdflib.URIRef(prefix + 'a8af899ecdab60dfaea8ec7f934053624c80a1054539e163f2c7eaa986c2777d'), rdflib.RDF.type, rdflib.URIRef(ns.bsfs.Entity)), + (rdflib.URIRef(prefix + 'a8af899ecdab60dfaea8ec7f934053624c80a1054539e163f2c7eaa986c2777d'), rdflib.RDF.type, rdflib.URIRef(ns.bsfs.File)), (rdflib.URIRef(prefix + 'a8af899ecdab60dfaea8ec7f934053624c80a1054539e163f2c7eaa986c2777d'), rdflib.URIRef(ns.bse.author), rdflib.Literal('Me, myself, and I', datatype=rdflib.XSD.string)), (rdflib.URIRef(prefix + 'a8af899ecdab60dfaea8ec7f934053624c80a1054539e163f2c7eaa986c2777d'), rdflib.URIRef(ns.bse.filename), rdflib.Literal('foo_second', datatype=rdflib.XSD.string)), (rdflib.URIRef(prefix + 'a8af899ecdab60dfaea8ec7f934053624c80a1054539e163f2c7eaa986c2777d'), rdflib.URIRef(ns.bse.filesize), rdflib.Literal('585', datatype=rdflib.XSD.integer)), - (rdflib.URIRef(prefix + 'b8fd7fba818254166a6043195004138ebda6923e012442f819a2c49671136c70'), rdflib.RDF.type, rdflib.URIRef(ns.bsfs.Entity)), + (rdflib.URIRef(prefix + 'b8fd7fba818254166a6043195004138ebda6923e012442f819a2c49671136c70'), rdflib.RDF.type, rdflib.URIRef(ns.bsfs.File)), (rdflib.URIRef(prefix + 'b8fd7fba818254166a6043195004138ebda6923e012442f819a2c49671136c70'), rdflib.URIRef(ns.bse.author), rdflib.Literal('Me, myself, and I', datatype=rdflib.XSD.string)), (rdflib.URIRef(prefix + 'b8fd7fba818254166a6043195004138ebda6923e012442f819a2c49671136c70'), rdflib.URIRef(ns.bse.filename), rdflib.Literal('bar_second', datatype=rdflib.XSD.string)), (rdflib.URIRef(prefix + 'b8fd7fba818254166a6043195004138ebda6923e012442f819a2c49671136c70'), rdflib.URIRef(ns.bse.filesize), rdflib.Literal('636', datatype=rdflib.XSD.integer)), - (rdflib.URIRef(prefix + 'd43758ace82154a1cc10ca0dfef63cb20dd831f9c87edd6dc06539eefe67371d'), rdflib.RDF.type, rdflib.URIRef(ns.bsfs.Entity)), + (rdflib.URIRef(prefix + 'd43758ace82154a1cc10ca0dfef63cb20dd831f9c87edd6dc06539eefe67371d'), rdflib.RDF.type, rdflib.URIRef(ns.bsfs.File)), (rdflib.URIRef(prefix + 'd43758ace82154a1cc10ca0dfef63cb20dd831f9c87edd6dc06539eefe67371d'), rdflib.URIRef(ns.bse.author), rdflib.Literal('Me, myself, and I', datatype=rdflib.XSD.string)), (rdflib.URIRef(prefix + 'd43758ace82154a1cc10ca0dfef63cb20dd831f9c87edd6dc06539eefe67371d'), rdflib.URIRef(ns.bse.filename), rdflib.Literal('foo_first', datatype=rdflib.XSD.string)), (rdflib.URIRef(prefix + 'd43758ace82154a1cc10ca0dfef63cb20dd831f9c87edd6dc06539eefe67371d'), rdflib.URIRef(ns.bse.filesize), rdflib.Literal('546', datatype=rdflib.XSD.integer)), - (rdflib.URIRef(prefix + 'd803187cbf3676ae9d38126270a6152c60431589aa3bb3824baf8954e9c097f1'), rdflib.RDF.type, rdflib.URIRef(ns.bsfs.Entity)), + (rdflib.URIRef(prefix + 'd803187cbf3676ae9d38126270a6152c60431589aa3bb3824baf8954e9c097f1'), rdflib.RDF.type, rdflib.URIRef(ns.bsfs.File)), (rdflib.URIRef(prefix + 'd803187cbf3676ae9d38126270a6152c60431589aa3bb3824baf8954e9c097f1'), rdflib.URIRef(ns.bse.author), rdflib.Literal('Me, myself, and I', datatype=rdflib.XSD.string)), (rdflib.URIRef(prefix + 'd803187cbf3676ae9d38126270a6152c60431589aa3bb3824baf8954e9c097f1'), rdflib.URIRef(ns.bse.filename), rdflib.Literal('td_second', datatype=rdflib.XSD.string)), (rdflib.URIRef(prefix + 'd803187cbf3676ae9d38126270a6152c60431589aa3bb3824baf8954e9c097f1'), rdflib.URIRef(ns.bse.filesize), rdflib.Literal('703', datatype=rdflib.XSD.integer)), @@ -105,49 +105,49 @@ class TestIndex(unittest.TestCase): }) def test_print(self): - stdout, sys.stdout = sys.stdout, io.StringIO() - bsfs = main([ - '--print', - '-r', - '--user', 'http://example.com/me', - os.path.join(os.path.dirname(__file__), 'testdir'), - os.path.join(os.path.dirname(__file__), 'testfile'), - ]) - outbuf, sys.stdout = sys.stdout, stdout + outbuf = io.StringIO() + with contextlib.redirect_stdout(outbuf): + bsfs = main([ + '--print', + '-r', + '--user', 'http://example.com/me', + os.path.join(os.path.dirname(__file__), 'testdir'), + os.path.join(os.path.dirname(__file__), 'testfile'), + ]) self.assertSetEqual(set(outbuf.getvalue().split('\n')) - {''}, { - f'Node(http://bsfs.ai/schema/Entity, http://example.com/me/file#2f4109b40107cc50e0884755a1a961ed126887e49b8dbaf0e146b2e226aa6647) Predicate({ns.bse.author}) Me, myself, and I', - f'Node(http://bsfs.ai/schema/Entity, http://example.com/me/file#2f4109b40107cc50e0884755a1a961ed126887e49b8dbaf0e146b2e226aa6647) Predicate({ns.bse.filename}) alpha_second', - f'Node(http://bsfs.ai/schema/Entity, http://example.com/me/file#2f4109b40107cc50e0884755a1a961ed126887e49b8dbaf0e146b2e226aa6647) Predicate({ns.bse.filesize}) 696', - f'Node(http://bsfs.ai/schema/Entity, http://example.com/me/file#441f3d10c8ff489fe8e33e639606512f6c463151cc429de7e554b9af670c2ece) Predicate({ns.bse.author}) Me, myself, and I', - f'Node(http://bsfs.ai/schema/Entity, http://example.com/me/file#441f3d10c8ff489fe8e33e639606512f6c463151cc429de7e554b9af670c2ece) Predicate({ns.bse.filename}) omega_second', - f'Node(http://bsfs.ai/schema/Entity, http://example.com/me/file#441f3d10c8ff489fe8e33e639606512f6c463151cc429de7e554b9af670c2ece) Predicate({ns.bse.filesize}) 503', - f'Node(http://bsfs.ai/schema/Entity, http://example.com/me/file#69b98ecf7aff3e95b09688ba93331678eb8397817111f674c9558e6dd8f5e871) Predicate({ns.bse.author}) Me, myself, and I', - f'Node(http://bsfs.ai/schema/Entity, http://example.com/me/file#69b98ecf7aff3e95b09688ba93331678eb8397817111f674c9558e6dd8f5e871) Predicate({ns.bse.filename}) td_first', - f'Node(http://bsfs.ai/schema/Entity, http://example.com/me/file#69b98ecf7aff3e95b09688ba93331678eb8397817111f674c9558e6dd8f5e871) Predicate({ns.bse.filesize}) 911', - f'Node(http://bsfs.ai/schema/Entity, http://example.com/me/file#78f7eb7f0d8221cdb2cb26c978fa42a11f75eb87becc768f4474134cb1e06926) Predicate({ns.bse.author}) Me, myself, and I', - f'Node(http://bsfs.ai/schema/Entity, http://example.com/me/file#78f7eb7f0d8221cdb2cb26c978fa42a11f75eb87becc768f4474134cb1e06926) Predicate({ns.bse.filename}) testfile', - f'Node(http://bsfs.ai/schema/Entity, http://example.com/me/file#78f7eb7f0d8221cdb2cb26c978fa42a11f75eb87becc768f4474134cb1e06926) Predicate({ns.bse.filesize}) 885', - f'Node(http://bsfs.ai/schema/Entity, http://example.com/me/file#80818b8ec2ee1919116dba9c8a7e0a4608313cf3b463cd88e9ed77a700dd92d3) Predicate({ns.bse.author}) Me, myself, and I', - f'Node(http://bsfs.ai/schema/Entity, http://example.com/me/file#80818b8ec2ee1919116dba9c8a7e0a4608313cf3b463cd88e9ed77a700dd92d3) Predicate({ns.bse.filename}) bar_first', - f'Node(http://bsfs.ai/schema/Entity, http://example.com/me/file#80818b8ec2ee1919116dba9c8a7e0a4608313cf3b463cd88e9ed77a700dd92d3) Predicate({ns.bse.filesize}) 956', - f'Node(http://bsfs.ai/schema/Entity, http://example.com/me/file#976d2ea0e58488678cc7e435fbfadabfb6eb6cf50ad51862f38f73729ed11795) Predicate({ns.bse.author}) Me, myself, and I', - f'Node(http://bsfs.ai/schema/Entity, http://example.com/me/file#976d2ea0e58488678cc7e435fbfadabfb6eb6cf50ad51862f38f73729ed11795) Predicate({ns.bse.filename}) omega_first', - f'Node(http://bsfs.ai/schema/Entity, http://example.com/me/file#976d2ea0e58488678cc7e435fbfadabfb6eb6cf50ad51862f38f73729ed11795) Predicate({ns.bse.filesize}) 648', - f'Node(http://bsfs.ai/schema/Entity, http://example.com/me/file#997e2fbb7494a3818ec782d2bc87bf1cffafba6b9c0f658e4a6c18a723e944d3) Predicate({ns.bse.author}) Me, myself, and I', - f'Node(http://bsfs.ai/schema/Entity, http://example.com/me/file#997e2fbb7494a3818ec782d2bc87bf1cffafba6b9c0f658e4a6c18a723e944d3) Predicate({ns.bse.filename}) alpha_first', - f'Node(http://bsfs.ai/schema/Entity, http://example.com/me/file#997e2fbb7494a3818ec782d2bc87bf1cffafba6b9c0f658e4a6c18a723e944d3) Predicate({ns.bse.filesize}) 754', - f'Node(http://bsfs.ai/schema/Entity, http://example.com/me/file#a8af899ecdab60dfaea8ec7f934053624c80a1054539e163f2c7eaa986c2777d) Predicate({ns.bse.author}) Me, myself, and I', - f'Node(http://bsfs.ai/schema/Entity, http://example.com/me/file#a8af899ecdab60dfaea8ec7f934053624c80a1054539e163f2c7eaa986c2777d) Predicate({ns.bse.filename}) foo_second', - f'Node(http://bsfs.ai/schema/Entity, http://example.com/me/file#a8af899ecdab60dfaea8ec7f934053624c80a1054539e163f2c7eaa986c2777d) Predicate({ns.bse.filesize}) 585', - f'Node(http://bsfs.ai/schema/Entity, http://example.com/me/file#b8fd7fba818254166a6043195004138ebda6923e012442f819a2c49671136c70) Predicate({ns.bse.author}) Me, myself, and I', - f'Node(http://bsfs.ai/schema/Entity, http://example.com/me/file#b8fd7fba818254166a6043195004138ebda6923e012442f819a2c49671136c70) Predicate({ns.bse.filename}) bar_second', - f'Node(http://bsfs.ai/schema/Entity, http://example.com/me/file#b8fd7fba818254166a6043195004138ebda6923e012442f819a2c49671136c70) Predicate({ns.bse.filesize}) 636', - f'Node(http://bsfs.ai/schema/Entity, http://example.com/me/file#d43758ace82154a1cc10ca0dfef63cb20dd831f9c87edd6dc06539eefe67371d) Predicate({ns.bse.author}) Me, myself, and I', - f'Node(http://bsfs.ai/schema/Entity, http://example.com/me/file#d43758ace82154a1cc10ca0dfef63cb20dd831f9c87edd6dc06539eefe67371d) Predicate({ns.bse.filename}) foo_first', - f'Node(http://bsfs.ai/schema/Entity, http://example.com/me/file#d43758ace82154a1cc10ca0dfef63cb20dd831f9c87edd6dc06539eefe67371d) Predicate({ns.bse.filesize}) 546', - f'Node(http://bsfs.ai/schema/Entity, http://example.com/me/file#d803187cbf3676ae9d38126270a6152c60431589aa3bb3824baf8954e9c097f1) Predicate({ns.bse.author}) Me, myself, and I', - f'Node(http://bsfs.ai/schema/Entity, http://example.com/me/file#d803187cbf3676ae9d38126270a6152c60431589aa3bb3824baf8954e9c097f1) Predicate({ns.bse.filename}) td_second', - f'Node(http://bsfs.ai/schema/Entity, http://example.com/me/file#d803187cbf3676ae9d38126270a6152c60431589aa3bb3824baf8954e9c097f1) Predicate({ns.bse.filesize}) 703', + f'Node(http://bsfs.ai/schema/File, http://example.com/me/file#2f4109b40107cc50e0884755a1a961ed126887e49b8dbaf0e146b2e226aa6647) Predicate({ns.bse.author}) Me, myself, and I', + f'Node(http://bsfs.ai/schema/File, http://example.com/me/file#2f4109b40107cc50e0884755a1a961ed126887e49b8dbaf0e146b2e226aa6647) Predicate({ns.bse.filename}) alpha_second', + f'Node(http://bsfs.ai/schema/File, http://example.com/me/file#2f4109b40107cc50e0884755a1a961ed126887e49b8dbaf0e146b2e226aa6647) Predicate({ns.bse.filesize}) 696', + f'Node(http://bsfs.ai/schema/File, http://example.com/me/file#441f3d10c8ff489fe8e33e639606512f6c463151cc429de7e554b9af670c2ece) Predicate({ns.bse.author}) Me, myself, and I', + f'Node(http://bsfs.ai/schema/File, http://example.com/me/file#441f3d10c8ff489fe8e33e639606512f6c463151cc429de7e554b9af670c2ece) Predicate({ns.bse.filename}) omega_second', + f'Node(http://bsfs.ai/schema/File, http://example.com/me/file#441f3d10c8ff489fe8e33e639606512f6c463151cc429de7e554b9af670c2ece) Predicate({ns.bse.filesize}) 503', + f'Node(http://bsfs.ai/schema/File, http://example.com/me/file#69b98ecf7aff3e95b09688ba93331678eb8397817111f674c9558e6dd8f5e871) Predicate({ns.bse.author}) Me, myself, and I', + f'Node(http://bsfs.ai/schema/File, http://example.com/me/file#69b98ecf7aff3e95b09688ba93331678eb8397817111f674c9558e6dd8f5e871) Predicate({ns.bse.filename}) td_first', + f'Node(http://bsfs.ai/schema/File, http://example.com/me/file#69b98ecf7aff3e95b09688ba93331678eb8397817111f674c9558e6dd8f5e871) Predicate({ns.bse.filesize}) 911', + f'Node(http://bsfs.ai/schema/File, http://example.com/me/file#78f7eb7f0d8221cdb2cb26c978fa42a11f75eb87becc768f4474134cb1e06926) Predicate({ns.bse.author}) Me, myself, and I', + f'Node(http://bsfs.ai/schema/File, http://example.com/me/file#78f7eb7f0d8221cdb2cb26c978fa42a11f75eb87becc768f4474134cb1e06926) Predicate({ns.bse.filename}) testfile', + f'Node(http://bsfs.ai/schema/File, http://example.com/me/file#78f7eb7f0d8221cdb2cb26c978fa42a11f75eb87becc768f4474134cb1e06926) Predicate({ns.bse.filesize}) 885', + f'Node(http://bsfs.ai/schema/File, http://example.com/me/file#80818b8ec2ee1919116dba9c8a7e0a4608313cf3b463cd88e9ed77a700dd92d3) Predicate({ns.bse.author}) Me, myself, and I', + f'Node(http://bsfs.ai/schema/File, http://example.com/me/file#80818b8ec2ee1919116dba9c8a7e0a4608313cf3b463cd88e9ed77a700dd92d3) Predicate({ns.bse.filename}) bar_first', + f'Node(http://bsfs.ai/schema/File, http://example.com/me/file#80818b8ec2ee1919116dba9c8a7e0a4608313cf3b463cd88e9ed77a700dd92d3) Predicate({ns.bse.filesize}) 956', + f'Node(http://bsfs.ai/schema/File, http://example.com/me/file#976d2ea0e58488678cc7e435fbfadabfb6eb6cf50ad51862f38f73729ed11795) Predicate({ns.bse.author}) Me, myself, and I', + f'Node(http://bsfs.ai/schema/File, http://example.com/me/file#976d2ea0e58488678cc7e435fbfadabfb6eb6cf50ad51862f38f73729ed11795) Predicate({ns.bse.filename}) omega_first', + f'Node(http://bsfs.ai/schema/File, http://example.com/me/file#976d2ea0e58488678cc7e435fbfadabfb6eb6cf50ad51862f38f73729ed11795) Predicate({ns.bse.filesize}) 648', + f'Node(http://bsfs.ai/schema/File, http://example.com/me/file#997e2fbb7494a3818ec782d2bc87bf1cffafba6b9c0f658e4a6c18a723e944d3) Predicate({ns.bse.author}) Me, myself, and I', + f'Node(http://bsfs.ai/schema/File, http://example.com/me/file#997e2fbb7494a3818ec782d2bc87bf1cffafba6b9c0f658e4a6c18a723e944d3) Predicate({ns.bse.filename}) alpha_first', + f'Node(http://bsfs.ai/schema/File, http://example.com/me/file#997e2fbb7494a3818ec782d2bc87bf1cffafba6b9c0f658e4a6c18a723e944d3) Predicate({ns.bse.filesize}) 754', + f'Node(http://bsfs.ai/schema/File, http://example.com/me/file#a8af899ecdab60dfaea8ec7f934053624c80a1054539e163f2c7eaa986c2777d) Predicate({ns.bse.author}) Me, myself, and I', + f'Node(http://bsfs.ai/schema/File, http://example.com/me/file#a8af899ecdab60dfaea8ec7f934053624c80a1054539e163f2c7eaa986c2777d) Predicate({ns.bse.filename}) foo_second', + f'Node(http://bsfs.ai/schema/File, http://example.com/me/file#a8af899ecdab60dfaea8ec7f934053624c80a1054539e163f2c7eaa986c2777d) Predicate({ns.bse.filesize}) 585', + f'Node(http://bsfs.ai/schema/File, http://example.com/me/file#b8fd7fba818254166a6043195004138ebda6923e012442f819a2c49671136c70) Predicate({ns.bse.author}) Me, myself, and I', + f'Node(http://bsfs.ai/schema/File, http://example.com/me/file#b8fd7fba818254166a6043195004138ebda6923e012442f819a2c49671136c70) Predicate({ns.bse.filename}) bar_second', + f'Node(http://bsfs.ai/schema/File, http://example.com/me/file#b8fd7fba818254166a6043195004138ebda6923e012442f819a2c49671136c70) Predicate({ns.bse.filesize}) 636', + f'Node(http://bsfs.ai/schema/File, http://example.com/me/file#d43758ace82154a1cc10ca0dfef63cb20dd831f9c87edd6dc06539eefe67371d) Predicate({ns.bse.author}) Me, myself, and I', + f'Node(http://bsfs.ai/schema/File, http://example.com/me/file#d43758ace82154a1cc10ca0dfef63cb20dd831f9c87edd6dc06539eefe67371d) Predicate({ns.bse.filename}) foo_first', + f'Node(http://bsfs.ai/schema/File, http://example.com/me/file#d43758ace82154a1cc10ca0dfef63cb20dd831f9c87edd6dc06539eefe67371d) Predicate({ns.bse.filesize}) 546', + f'Node(http://bsfs.ai/schema/File, http://example.com/me/file#d803187cbf3676ae9d38126270a6152c60431589aa3bb3824baf8954e9c097f1) Predicate({ns.bse.author}) Me, myself, and I', + f'Node(http://bsfs.ai/schema/File, http://example.com/me/file#d803187cbf3676ae9d38126270a6152c60431589aa3bb3824baf8954e9c097f1) Predicate({ns.bse.filename}) td_second', + f'Node(http://bsfs.ai/schema/File, http://example.com/me/file#d803187cbf3676ae9d38126270a6152c60431589aa3bb3824baf8954e9c097f1) Predicate({ns.bse.filesize}) 703', }) diff --git a/test/extractor/generic/test_path.py b/test/extractor/generic/test_path.py index d2b6c61..820f402 100644 --- a/test/extractor/generic/test_path.py +++ b/test/extractor/generic/test_path.py @@ -31,7 +31,7 @@ class TestPath(unittest.TestCase): self.assertEqual(Path().schema, bsfs.schema.Schema.from_string(extractor.SCHEMA_PREAMBLE + ''' bse:filename rdfs:subClassOf bsfs:Predicate ; - rdfs:domain bsfs:Entity ; + rdfs:domain bsfs:File ; rdfs:range xsd:string ; bsfs:unique "false"^^xsd:boolean . ''')) diff --git a/test/extractor/generic/test_stat.py b/test/extractor/generic/test_stat.py index 6cfc57f..3441438 100644 --- a/test/extractor/generic/test_stat.py +++ b/test/extractor/generic/test_stat.py @@ -32,7 +32,7 @@ class TestStat(unittest.TestCase): self.assertEqual(Stat().schema, bsfs.schema.Schema.from_string(extractor.SCHEMA_PREAMBLE + ''' bse:filesize rdfs:subClassOf bsfs:Predicate ; - rdfs:domain bsfs:Entity ; + rdfs:domain bsfs:File ; rdfs:range xsd:integer ; bsfs:unique "false"^^xsd:boolean . ''')) diff --git a/test/lib/test_bsie.py b/test/lib/test_bsie.py index 5b71752..6720746 100644 --- a/test/lib/test_bsie.py +++ b/test/lib/test_bsie.py @@ -64,12 +64,12 @@ class TestBSIE(unittest.TestCase): xsd:integer rdfs:subClassOf bsfs:Literal . bse:filename rdfs:subClassOf bsfs:Predicate ; - rdfs:domain bsfs:Entity ; + rdfs:domain bsfs:File ; rdfs:range xsd:string ; bsfs:unique "false"^^xsd:boolean . bse:filesize rdfs:subClassOf bsfs:Predicate ; - rdfs:domain bsfs:Entity ; + rdfs:domain bsfs:File ; rdfs:range xsd:integer; bsfs:unique "false"^^xsd:boolean . @@ -101,7 +101,7 @@ class TestBSIE(unittest.TestCase): xsd:integer rdfs:subClassOf bsfs:Literal . bse:filesize rdfs:subClassOf bsfs:Predicate ; - rdfs:domain bsfs:Entity ; + rdfs:domain bsfs:File ; rdfs:range xsd:integer; bsfs:unique "false"^^xsd:boolean . @@ -130,12 +130,12 @@ class TestBSIE(unittest.TestCase): xsd:integer rdfs:subClassOf bsfs:Literal . bse:filename rdfs:subClassOf bsfs:Predicate ; - rdfs:domain bsfs:Entity ; + rdfs:domain bsfs:File ; rdfs:range xsd:string ; bsfs:unique "false"^^xsd:boolean . bse:filesize rdfs:subClassOf bsfs:Predicate ; - rdfs:domain bsfs:Entity ; + rdfs:domain bsfs:File ; rdfs:range xsd:integer; bsfs:unique "false"^^xsd:boolean . @@ -191,7 +191,7 @@ class TestBSIE(unittest.TestCase): xsd:integer rdfs:subClassOf bsfs:Literal . bse:filesize rdfs:subClassOf bsfs:Predicate ; - rdfs:domain bsfs:Entity ; + rdfs:domain bsfs:File ; rdfs:range xsd:integer; bsfs:unique "false"^^xsd:boolean . diff --git a/test/tools/test_pipeline.py b/test/tools/test_pipeline.py index 92801ed..611f8b0 100644 --- a/test/tools/test_pipeline.py +++ b/test/tools/test_pipeline.py @@ -30,7 +30,7 @@ class TestPipeline(unittest.TestCase): # constant A csA = ''' bse:author rdfs:subClassOf bsfs:Predicate ; - rdfs:domain bsfs:Entity ; + rdfs:domain bsfs:File ; rdfs:range xsd:string ; bsfs:unique "true"^^xsd:boolean . ''' @@ -38,7 +38,7 @@ class TestPipeline(unittest.TestCase): # constant B csB = ''' bse:rating rdfs:subClassOf bsfs:Predicate ; - rdfs:domain bsfs:Entity ; + rdfs:domain bsfs:File ; rdfs:range xsd:integer ; bsfs:unique "true"^^xsd:boolean . ''' -- cgit v1.2.3 From 5d9526783ad8432c7d6dfe18c0e9f2b37950b470 Mon Sep 17 00:00:00 2001 From: Matthias Baumgartner Date: Thu, 15 Dec 2022 17:16:25 +0100 Subject: Pipeline.prefix as Namespace instead of URI --- bsie/apps/index.py | 5 ++--- bsie/apps/info.py | 4 ++-- bsie/tools/builder.py | 13 +++++++++++-- bsie/tools/pipeline.py | 4 ++-- test/lib/test_bsie.py | 2 +- test/tools/test_pipeline.py | 5 ++--- 6 files changed, 20 insertions(+), 13 deletions(-) diff --git a/bsie/apps/index.py b/bsie/apps/index.py index aa26d0f..e37684b 100644 --- a/bsie/apps/index.py +++ b/bsie/apps/index.py @@ -26,7 +26,7 @@ __all__: typing.Sequence[str] = ( def main(argv): """Index files or directories into BSFS.""" parser = argparse.ArgumentParser(description=main.__doc__, prog='index') - parser.add_argument('--user', type=URI, default=URI('http://example.com/me'), + parser.add_argument('--user', type=bsfs.URI, default=bsfs.URI('http://example.com/me'), help='') parser.add_argument('--collect', action='append', default=[], help='') @@ -60,9 +60,8 @@ def main(argv): )}, ]) # pipeline builder - prefix = URI(args.user + ('file#' if args.user.endswith('/') else '/file#')) pbuild = builder.PipelineBuilder( - prefix, + bsfs.Namespace(args.user + ('/' if not args.user.endswith('/') else '')), rbuild, ebuild, ) diff --git a/bsie/apps/info.py b/bsie/apps/info.py index 8cc6dca..eaf1f71 100644 --- a/bsie/apps/info.py +++ b/bsie/apps/info.py @@ -12,7 +12,7 @@ import typing # bsie imports from bsie.base import errors from bsie.tools import builder -from bsie.utils.bsfs import URI +from bsie.utils import bsfs # exports __all__: typing.Sequence[str] = ( @@ -48,7 +48,7 @@ def main(argv): ]) # pipeline builder pbuild = builder.PipelineBuilder( - URI('http://example.com/me/file#'), # not actually used + bsfs.Namespace('http://example.com/me/'), # not actually used rbuild, ebuild, ) diff --git a/bsie/tools/builder.py b/bsie/tools/builder.py index 8c6b931..24aea84 100644 --- a/bsie/tools/builder.py +++ b/bsie/tools/builder.py @@ -163,15 +163,24 @@ class ExtractorBuilder(): return cls(**kwargs) except Exception as err: - raise errors.BuilderError(f'failed to build extractor {name} due to {typename(err)}: {err}') from err + raise errors.BuilderError(f'failed to build extractor {name} due to {bsfs.typename(err)}: {err}') from err class PipelineBuilder(): """Build `bsie.tools.pipeline.Pipeline` instances.""" + # Prefix to be used in the Pipeline. + prefix: bsfs.Namespace + + # builder for Readers. + rbuild: ReaderBuilder + + # builder for Extractors. + ebuild: ExtractorBuilder + def __init__( self, - prefix: URI, + prefix: bsfs.Namespace, reader_builder: ReaderBuilder, extractor_builder: ExtractorBuilder, ): diff --git a/bsie/tools/pipeline.py b/bsie/tools/pipeline.py index 3d08993..834bd99 100644 --- a/bsie/tools/pipeline.py +++ b/bsie/tools/pipeline.py @@ -39,14 +39,14 @@ class Pipeline(): schema: _schema.Schema # node prefix. - _prefix: URI + _prefix: bsfs.Namespace # extractor -> reader mapping _ext2rdr: typing.Dict[base.extractor.Extractor, typing.Optional[base.reader.Reader]] def __init__( self, - prefix: URI, + prefix: bsfs.Namespace, ext2rdr: typing.Dict[base.extractor.Extractor, typing.Optional[base.reader.Reader]] ): # store core members diff --git a/test/lib/test_bsie.py b/test/lib/test_bsie.py index 6720746..43e7b1d 100644 --- a/test/lib/test_bsie.py +++ b/test/lib/test_bsie.py @@ -40,7 +40,7 @@ class TestBSIE(unittest.TestCase): )}, ]) # build pipeline - self.prefix = URI('http://example.com/local/file#') + self.prefix = bsfs.Namespace('http://example.com/local/file#') pbuild = builder.PipelineBuilder(self.prefix, rbuild, ebuild) self.pipeline = pbuild.build() diff --git a/test/tools/test_pipeline.py b/test/tools/test_pipeline.py index 611f8b0..e440ab5 100644 --- a/test/tools/test_pipeline.py +++ b/test/tools/test_pipeline.py @@ -11,7 +11,6 @@ import unittest # bsie imports from bsie.base import errors -from bsie.utils.bsfs import URI from bsie.utils import bsfs, node, ns import bsie.extractor.generic.constant import bsie.extractor.generic.path @@ -50,7 +49,7 @@ class TestPipeline(unittest.TestCase): bsie.extractor.generic.constant.Constant(csA, tupA): None, bsie.extractor.generic.constant.Constant(csB, tupB): None, } - self.prefix = URI('http://example.com/local/file#') + self.prefix = bsfs.Namespace('http://example.com/local/') def test_essentials(self): pipeline = Pipeline(self.prefix, self.ext2rdr) @@ -101,7 +100,7 @@ class TestPipeline(unittest.TestCase): p_filesize = pipeline.schema.predicate(ns.bse.filesize) p_author = pipeline.schema.predicate(ns.bse.author) p_rating = pipeline.schema.predicate(ns.bse.rating) - entity = pipeline.schema.node(ns.bsfs.Entity) + entity = pipeline.schema.node(ns.bsfs.File) p_invalid = pipeline.schema.predicate(ns.bsfs.Predicate).get_child(ns.bse.foo, range=entity) # extract given predicates -- cgit v1.2.3 From 3426b4e201cf03b78d2a3f144876955fcda2f66b Mon Sep 17 00:00:00 2001 From: Matthias Baumgartner Date: Thu, 15 Dec 2022 17:17:53 +0100 Subject: extractor interface revision * schema as property * predicates -> principals --- bsie/base/extractor.py | 21 +++++++++++++-------- bsie/extractor/generic/constant.py | 8 +++----- bsie/extractor/generic/path.py | 7 +++---- bsie/extractor/generic/stat.py | 7 +++---- test/base/test_extractor.py | 5 +++-- 5 files changed, 25 insertions(+), 23 deletions(-) diff --git a/bsie/base/extractor.py b/bsie/base/extractor.py index 678dcec..c44021b 100644 --- a/bsie/base/extractor.py +++ b/bsie/base/extractor.py @@ -9,8 +9,6 @@ import abc import typing # bsie imports -from bsie.utils import node -from bsie.utils.bsfs import schema as _schema, typename from bsie.utils import bsfs, node, ns # exports @@ -58,10 +56,10 @@ class Extractor(abc.ABC): CONTENT_READER: typing.Optional[str] = None # extractor schema. - schema: _schema.Schema + _schema: bsfs.schema.Schema - def __init__(self, schema: _schema.Schema): - self.schema = schema + def __init__(self, schema: bsfs.schema.Schema): + self._schema = schema def __str__(self) -> str: return bsfs.typename(self) @@ -77,7 +75,14 @@ class Extractor(abc.ABC): def __hash__(self) -> int: return hash((type(self), self.CONTENT_READER, self.schema)) - def predicates(self) -> typing.Iterator[_schema.Predicate]: + @property + def schema(self) -> bsfs.schema.Schema: + """Return the extractor's schema.""" + return self._schema + + @property + def principals(self) -> typing.Iterator[bsfs.schema.Predicate]: + """Return the principal predicates, i.e., relations from/to the extraction subject.""" ent = self.schema.node(ns.bsfs.Entity) return ( pred @@ -91,8 +96,8 @@ class Extractor(abc.ABC): self, subject: node.Node, content: typing.Any, - predicates: typing.Iterable[_schema.Predicate], - ) -> typing.Iterator[typing.Tuple[node.Node, _schema.Predicate, typing.Any]]: + principals: typing.Iterable[bsfs.schema.Predicate], + ) -> typing.Iterator[typing.Tuple[node.Node, bsfs.schema.Predicate, typing.Any]]: """Return (node, predicate, value) triples.""" ## EOF ## diff --git a/bsie/extractor/generic/constant.py b/bsie/extractor/generic/constant.py index f9e3415..cdb2ef6 100644 --- a/bsie/extractor/generic/constant.py +++ b/bsie/extractor/generic/constant.py @@ -9,8 +9,6 @@ import typing # bsie imports from bsie.base import extractor -from bsie.utils.bsfs import URI, schema as _schema -from bsie.utils.node import Node from bsie.utils import bsfs, node # exports @@ -50,10 +48,10 @@ class Constant(extractor.Extractor): self, subject: node.Node, content: None, - predicates: typing.Iterable[_schema.Predicate], - ) -> typing.Iterator[typing.Tuple[Node, _schema.Predicate, typing.Any]]: + principals: typing.Iterable[bsfs.schema.Predicate], + ) -> typing.Iterator[typing.Tuple[node.Node, bsfs.schema.Predicate, typing.Any]]: for pred, value in self._tuples: - if pred in predicates: + if pred in principals: yield subject, pred, value ## EOF ## diff --git a/bsie/extractor/generic/path.py b/bsie/extractor/generic/path.py index 00165e3..23ae80b 100644 --- a/bsie/extractor/generic/path.py +++ b/bsie/extractor/generic/path.py @@ -10,7 +10,6 @@ import typing # bsie imports from bsie.base import extractor -from bsie.utils.bsfs import schema from bsie.utils import bsfs, node, ns # exports @@ -46,9 +45,9 @@ class Path(extractor.Extractor): self, subject: node.Node, content: str, - predicates: typing.Iterable[schema.Predicate], - ) -> typing.Iterator[typing.Tuple[node.Node, schema.Predicate, typing.Any]]: - for pred in predicates: + principals: typing.Iterable[bsfs.schema.Predicate], + ) -> typing.Iterator[typing.Tuple[node.Node, bsfs.schema.Predicate, typing.Any]]: + for pred in principals: # find callback clbk = self._callmap.get(pred) if clbk is None: diff --git a/bsie/extractor/generic/stat.py b/bsie/extractor/generic/stat.py index 0f4267f..1dcfedf 100644 --- a/bsie/extractor/generic/stat.py +++ b/bsie/extractor/generic/stat.py @@ -10,7 +10,6 @@ import typing # bsie imports from bsie.base import extractor -from bsie.utils.bsfs import schema as _schema from bsie.utils import bsfs, node, ns # exports @@ -46,9 +45,9 @@ class Stat(extractor.Extractor): self, subject: node.Node, content: os.stat_result, - predicates: typing.Iterable[_schema.Predicate], - ) -> typing.Iterator[typing.Tuple[node.Node, _schema.Predicate, typing.Any]]: - for pred in predicates: + principals: typing.Iterable[bsfs.schema.Predicate], + ) -> typing.Iterator[typing.Tuple[node.Node, bsfs.schema.Predicate, typing.Any]]: + for pred in principals: # find callback clbk = self._callmap.get(pred) if clbk is None: diff --git a/test/base/test_extractor.py b/test/base/test_extractor.py index 5410ae0..30974ef 100644 --- a/test/base/test_extractor.py +++ b/test/base/test_extractor.py @@ -51,14 +51,15 @@ class TestExtractor(unittest.TestCase): self.assertNotEqual(ext, sub) self.assertNotEqual(hash(ext), hash(sub)) - def test_predicates(self): + def test_principals(self): schema = bsfs.schema.Schema.Empty() entity = schema.node(ns.bsfs.Node).get_child(ns.bsfs.Entity) string = schema.literal(ns.bsfs.Literal).get_child(bsfs.URI('http://www.w3.org/2001/XMLSchema#string')) p_author = schema.predicate(ns.bsfs.Predicate).get_child(ns.bse.author, domain=entity, range=string) p_comment = schema.predicate(ns.bsfs.Predicate).get_child(ns.bse.comment, domain=entity, range=string) ext = StubExtractor() - self.assertSetEqual(set(ext.predicates()), {p_author, p_comment} | set(schema.predicates())) + self.assertSetEqual(set(ext.principals), + {p_author, p_comment} | set(schema.predicates()) - {schema.predicate(ns.bsfs.Predicate)}) ## main ## -- cgit v1.2.3 From 37510d134458bf954ca2da6d40be0d6c76661e8c Mon Sep 17 00:00:00 2001 From: Matthias Baumgartner Date: Thu, 15 Dec 2022 17:19:21 +0100 Subject: bsie/pipeline interface revision: * predicates -> principals * schema as property * principals as property * information hiding * full subschema instead of only predicates --- bsie/lib/bsie.py | 61 ++++++++++++++++++++++--------------- bsie/tools/pipeline.py | 52 ++++++++++++++++++++----------- test/lib/test_bsie.py | 74 +++++++-------------------------------------- test/tools/test_pipeline.py | 5 ++- 4 files changed, 83 insertions(+), 109 deletions(-) diff --git a/bsie/lib/bsie.py b/bsie/lib/bsie.py index 3aeee2b..e087fa9 100644 --- a/bsie/lib/bsie.py +++ b/bsie/lib/bsie.py @@ -8,8 +8,6 @@ Author: Matthias Baumgartner, 2022 import typing # bsie imports -from bsie.tools.pipeline import Pipeline -from bsie.utils.bsfs import URI, schema as schema_ from bsie.tools import Pipeline from bsie.utils import bsfs, node, ns @@ -30,11 +28,14 @@ class BSIE(): """ + # pipeline + _pipeline: Pipeline + # predicates to extract. - predicates: typing.Set[URI] + _principals: typing.Set[bsfs.URI] # local schema. - schema: schema_.Schema + _schema: bsfs.schema.Schema def __init__( self, @@ -46,36 +47,46 @@ class BSIE(): discard: typing.Optional[typing.Iterable[bsfs.URI]] = None, ): # store pipeline - self.pipeline = pipeline - # start off with available predicates - self.predicates = {pred.uri for pred in self.pipeline.predicates()} - # limit predicates to specified ones by argument. + self._pipeline = pipeline + # start off with available principals + self._principals = {pred.uri for pred in self._pipeline.principals} + # limit principals to specified ones by argument. if collect is not None: collect = set(collect) if len(collect) > 0: - self.predicates &= collect - # discard predicates. + self._principals &= collect + # discard principals. if discard is not None: - self.predicates -= set(discard) + self._principals -= set(discard) # discard ns.bsfs.Predicate - self.predicates.discard(ns.bsfs.Predicate) - # compile a schema that only contains the requested predicates (and implied types) - self.schema = schema_.Schema({ - self.pipeline.schema.predicate(pred) for pred in self.predicates}) + self._principals.discard(ns.bsfs.Predicate) + # compile a schema that only contains the requested principals (and auxiliary predicates) + self._schema = self._pipeline.subschema( + self._pipeline.schema.predicate(pred) for pred in self._principals) + + @property + def schema(self) -> bsfs.schema.Schema: + """Return the BSIE schema.""" + return self._schema + + @property + def principals(self) -> typing.Iterator[bsfs.URI]: + """Return an iterator to the principal predicates.""" + return iter(self._principals) def from_file( self, - path: URI, - predicates: typing.Optional[typing.Iterable[URI]] = None, - ) -> typing.Iterator[typing.Tuple[node.Node, URI, typing.Any]]: - """Produce triples for a given *path*. Limit to *predicates* if given.""" - # get requested predicates. - predicates = set(predicates) if predicates is not None else self.predicates - # filter through requested predicates. - predicates &= self.predicates + path: bsfs.URI, + principals: typing.Optional[typing.Iterable[bsfs.URI]] = None, + ) -> typing.Iterator[typing.Tuple[node.Node, bsfs.URI, typing.Any]]: + """Produce triples for a given *path*. Limit to *principals* if given.""" + # get requested principals. + principals = set(principals) if principals is not None else self._principals + # filter through requested principals. + principals &= self._principals # predicate lookup - predicates = {self.schema.predicate(pred) for pred in predicates} + principals = {self.schema.predicate(pred) for pred in principals} # invoke pipeline - yield from self.pipeline(path, predicates) + yield from self._pipeline(path, principals) ## EOF ## diff --git a/bsie/tools/pipeline.py b/bsie/tools/pipeline.py index 834bd99..52ce526 100644 --- a/bsie/tools/pipeline.py +++ b/bsie/tools/pipeline.py @@ -11,8 +11,6 @@ import typing # bsie imports from bsie import base -from bsie.utils.node import Node -from bsie.utils.bsfs import schema as _schema, URI, uuid as _uuid, typename from bsie.utils import bsfs, node, ns # exports @@ -36,7 +34,7 @@ class Pipeline(): """ # combined extractor schemas. - schema: _schema.Schema + _schema: bsfs.schema.Schema # node prefix. _prefix: bsfs.Namespace @@ -53,7 +51,7 @@ class Pipeline(): self._prefix = prefix self._ext2rdr = ext2rdr # compile schema from all extractors - self.schema = _schema.Schema.Union(ext.schema for ext in ext2rdr) + self._schema = bsfs.schema.Schema.Union(ext.schema for ext in ext2rdr) def __str__(self) -> str: return bsfs.typename(self) @@ -62,29 +60,47 @@ class Pipeline(): return f'{bsfs.typename(self)}(...)' def __hash__(self) -> int: - return hash((type(self), self._prefix, self.schema, tuple(self._ext2rdr), tuple(self._ext2rdr.values()))) + return hash((type(self), self._prefix, self._schema, tuple(self._ext2rdr), tuple(self._ext2rdr.values()))) def __eq__(self, other: typing.Any) -> bool: return isinstance(other, type(self)) \ - and self.schema == other.schema \ + and self._schema == other._schema \ and self._prefix == other._prefix \ and self._ext2rdr == other._ext2rdr - def predicates(self) -> typing.Iterator[_schema.Predicate]: - """Return the predicates that are extracted from a file.""" - return iter({pred for ext in self._ext2rdr for pred in ext.predicates()}) + @property + def schema(self) -> bsfs.schema.Schema: + """Return the pipeline's schema (combined from all extractors).""" + return self._schema + + @property + def principals(self) -> typing.Iterator[bsfs.schema.Predicate]: + """Return the principal predicates that can be extracted.""" + return iter({pred for ext in self._ext2rdr for pred in ext.principals}) + + def subschema(self, principals: typing.Iterable[bsfs.schema.Predicate]) -> bsfs.schema.Schema: + """Return the subset of the schema that supports the given *principals*.""" + # materialize principals + principals = set(principals) + # collect and combine schemas from extractors + return bsfs.schema.Schema.Union({ + ext.schema + for ext + in self._ext2rdr + if not set(ext.principals).isdisjoint(principals) + }) def __call__( self, - path: URI, - predicates: typing.Optional[typing.Iterable[_schema.Predicate]] = None, - ) -> typing.Iterator[typing.Tuple[Node, _schema.Predicate, typing.Any]]: - """Extract triples from the file at *path*. Optionally, limit triples to *predicates*.""" - # get predicates - predicates = set(predicates) if predicates is not None else set(self.schema.predicates()) + path: bsfs.URI, + principals: typing.Optional[typing.Iterable[bsfs.schema.Predicate]] = None, + ) -> typing.Iterator[typing.Tuple[node.Node, bsfs.schema.Predicate, typing.Any]]: + """Extract triples from the file at *path*. Optionally, limit triples to *principals*.""" + # get principals + principals = set(principals) if principals is not None else set(self.schema.predicates()) # get extractors - extractors = {ext for ext in self._ext2rdr if not set(ext.predicates()).isdisjoint(predicates)} + extractors = {ext for ext in self._ext2rdr if not set(ext.principals).isdisjoint(principals)} # corner-case short-cut if len(extractors) == 0: @@ -110,8 +126,8 @@ class Pipeline(): for ext in extrs: try: # get predicate/value tuples - for node, pred, value in ext.extract(subject, content, predicates): - yield node, pred, value + for subject, pred, value in ext.extract(subject, content, principals): + yield subject, pred, value except base.errors.ExtractorError as err: # critical extractor failure. diff --git a/test/lib/test_bsie.py b/test/lib/test_bsie.py index 43e7b1d..f3f476e 100644 --- a/test/lib/test_bsie.py +++ b/test/lib/test_bsie.py @@ -11,8 +11,6 @@ import unittest # bsie imports from bsie.base import extractor from bsie.tools import builder -from bsie.utils.bsfs import URI, schema -from bsie.utils.node import Node from bsie.utils import bsfs, node, ns # objects to test @@ -47,22 +45,12 @@ class TestBSIE(unittest.TestCase): def test_construction(self): # pipeline only lib = BSIE(self.pipeline) - self.assertSetEqual(lib.predicates, { + self.assertSetEqual(set(lib.principals), { ns.bse.filename, ns.bse.filesize, ns.bse.author, }) - self.assertEqual(lib.schema, schema.Schema.from_string(''' - prefix rdfs: - prefix xsd: - prefix bsfs: - prefix bse: - # essential nodes - bsfs:Entity rdfs:subClassOf bsfs:Node . - # common definitions - xsd:string rdfs:subClassOf bsfs:Literal . - xsd:integer rdfs:subClassOf bsfs:Literal . - + self.assertEqual(lib.schema, bsfs.schema.Schema.from_string(extractor.SCHEMA_PREAMBLE + ''' bse:filename rdfs:subClassOf bsfs:Predicate ; rdfs:domain bsfs:File ; rdfs:range xsd:string ; @@ -85,21 +73,11 @@ class TestBSIE(unittest.TestCase): ns.bse.author, ns.bse.inexistent, }) - self.assertSetEqual(lib.predicates, { + self.assertSetEqual(set(lib.principals), { ns.bse.filesize, ns.bse.author, }) - self.assertEqual(lib.schema, schema.Schema.from_string(''' - prefix rdfs: - prefix xsd: - prefix bsfs: - prefix bse: - # essential nodes - bsfs:Entity rdfs:subClassOf bsfs:Node . - # common definitions - xsd:string rdfs:subClassOf bsfs:Literal . - xsd:integer rdfs:subClassOf bsfs:Literal . - + self.assertEqual(lib.schema, bsfs.schema.Schema.from_string(extractor.SCHEMA_PREAMBLE + ''' bse:filesize rdfs:subClassOf bsfs:Predicate ; rdfs:domain bsfs:File ; rdfs:range xsd:integer; @@ -109,26 +87,15 @@ class TestBSIE(unittest.TestCase): rdfs:domain bsfs:Entity ; rdfs:range xsd:string ; bsfs:unique "true"^^xsd:boolean . - ''')) # empty collect is disregarded lib = BSIE(self.pipeline, collect={}) - self.assertSetEqual(lib.predicates, { + self.assertSetEqual(set(lib.principals), { ns.bse.filename, ns.bse.filesize, ns.bse.author, }) - self.assertEqual(lib.schema, schema.Schema.from_string(''' - prefix rdfs: - prefix xsd: - prefix bsfs: - prefix bse: - # essential nodes - bsfs:Entity rdfs:subClassOf bsfs:Node . - # common definitions - xsd:string rdfs:subClassOf bsfs:Literal . - xsd:integer rdfs:subClassOf bsfs:Literal . - + self.assertEqual(lib.schema, bsfs.schema.Schema.from_string(extractor.SCHEMA_PREAMBLE + ''' bse:filename rdfs:subClassOf bsfs:Predicate ; rdfs:domain bsfs:File ; rdfs:range xsd:string ; @@ -152,24 +119,14 @@ class TestBSIE(unittest.TestCase): ns.bse.filename, ns.bse.inexistent, }) - self.assertSetEqual(lib.predicates, { + self.assertSetEqual(set(lib.principals), { ns.bse.author, }) - self.assertEqual(lib.schema, schema.Schema.from_string(''' - prefix rdfs: - prefix xsd: - prefix bsfs: - prefix bse: - # essential nodes - bsfs:Entity rdfs:subClassOf bsfs:Node . - # common definitions - xsd:string rdfs:subClassOf bsfs:Literal . - + self.assertEqual(lib.schema, bsfs.schema.Schema.from_string(extractor.SCHEMA_PREAMBLE + ''' bse:author rdfs:subClassOf bsfs:Predicate ; rdfs:domain bsfs:Entity ; rdfs:range xsd:string ; bsfs:unique "true"^^xsd:boolean . - ''')) # specify collect and discard @@ -177,19 +134,10 @@ class TestBSIE(unittest.TestCase): collect={ns.bse.filesize, ns.bse.author, ns.bse.foo, ns.bse.bar}, discard={ns.bse.author, ns.bse.foo, ns.bse.foobar}, ) - self.assertSetEqual(lib.predicates, { + self.assertSetEqual(set(lib.principals), { ns.bse.filesize, }) - self.assertEqual(lib.schema, schema.Schema.from_string(''' - prefix rdfs: - prefix xsd: - prefix bsfs: - prefix bse: - # essential nodes - bsfs:Entity rdfs:subClassOf bsfs:Node . - # common definitions - xsd:integer rdfs:subClassOf bsfs:Literal . - + self.assertEqual(lib.schema, bsfs.schema.Schema.from_string(extractor.SCHEMA_PREAMBLE + ''' bse:filesize rdfs:subClassOf bsfs:Predicate ; rdfs:domain bsfs:File ; rdfs:range xsd:integer; @@ -201,7 +149,7 @@ class TestBSIE(unittest.TestCase): def test_from_file(self): # setup lib = BSIE(self.pipeline) - self.assertSetEqual(set(lib.predicates), { + self.assertSetEqual(set(lib.principals), { ns.bse.filesize, ns.bse.filename, ns.bse.author, diff --git a/test/tools/test_pipeline.py b/test/tools/test_pipeline.py index e440ab5..91bf736 100644 --- a/test/tools/test_pipeline.py +++ b/test/tools/test_pipeline.py @@ -75,7 +75,7 @@ class TestPipeline(unittest.TestCase): # equivalence respects schema p2 = Pipeline(self.prefix, self.ext2rdr) - p2.schema = pipeline.schema.Empty() + p2._schema = pipeline.schema.Empty() self.assertNotEqual(pipeline, p2) self.assertNotEqual(hash(pipeline), hash(p2)) @@ -160,8 +160,7 @@ class TestPipeline(unittest.TestCase): # build pipeline pipeline = Pipeline(self.prefix, self.ext2rdr) # - self.assertSetEqual(set(pipeline.predicates()), { - pipeline.schema.predicate(ns.bsfs.Predicate), + self.assertSetEqual(set(pipeline.principals), { pipeline.schema.predicate(ns.bse.filename), pipeline.schema.predicate(ns.bse.filesize), pipeline.schema.predicate(ns.bse.author), -- cgit v1.2.3 From 3b41b2a4b7532c911b63b41066a75b3e1546d214 Mon Sep 17 00:00:00 2001 From: Matthias Baumgartner Date: Thu, 15 Dec 2022 17:21:20 +0100 Subject: minor test improvements and information hiding in builder --- bsie/tools/builder.py | 25 ++++++++++++------------- test/apps/test_index.py | 2 +- test/apps/test_info.py | 13 +++++++------ 3 files changed, 20 insertions(+), 20 deletions(-) diff --git a/bsie/tools/builder.py b/bsie/tools/builder.py index 24aea84..190d9bf 100644 --- a/bsie/tools/builder.py +++ b/bsie/tools/builder.py @@ -12,7 +12,6 @@ import typing # bsie imports from bsie import base from bsie.base import errors -from bsie.utils.bsfs import URI, typename from bsie.utils import bsfs # inner-module imports @@ -75,20 +74,20 @@ class ReaderBuilder(): """ # keyword arguments - kwargs: typing.Dict[str, typing.Dict[str, typing.Any]] + _kwargs: typing.Dict[str, typing.Dict[str, typing.Any]] # cached readers - cache: typing.Dict[str, base.reader.Reader] + _cache: typing.Dict[str, base.Reader] def __init__(self, kwargs: typing.Dict[str, typing.Dict[str, typing.Any]]): - self.kwargs = kwargs - self.cache = {} + self._kwargs = kwargs + self._cache = {} def build(self, name: str) -> base.Reader: """Return an instance for the qualified class name.""" # return cached instance - if name in self.cache: - return self.cache[name] + if name in self._cache: + return self._cache[name] # check name and get module/class components module_name, class_name = _unpack_name(name) @@ -97,14 +96,14 @@ class ReaderBuilder(): cls = _safe_load(module_name, class_name) # get kwargs - kwargs = self.kwargs.get(name, {}) + kwargs = self._kwargs.get(name, {}) if not isinstance(kwargs, dict): raise TypeError(f'expected a kwargs dict, found {bsfs.typename(kwargs)}') try: # build, cache, and return instance obj = cls(**kwargs) # cache instance - self.cache[name] = obj + self._cache[name] = obj # return instance return obj @@ -125,19 +124,19 @@ class ExtractorBuilder(): """ # build specifications - specs: typing.List[typing.Dict[str, typing.Dict[str, typing.Any]]] + _specs: typing.List[typing.Dict[str, typing.Dict[str, typing.Any]]] def __init__(self, specs: typing.List[typing.Dict[str, typing.Dict[str, typing.Any]]]): - self.specs = specs + self._specs = specs def __iter__(self) -> typing.Iterator[int]: """Iterate over extractor specifications.""" - return iter(range(len(self.specs))) + return iter(range(len(self._specs))) def build(self, index: int) -> base.Extractor: """Return an instance of the n'th extractor (n=*index*).""" # get build instructions - specs = self.specs[index] + specs = self._specs[index] # check specs structure. expecting[{name: {kwargs}}] if not isinstance(specs, dict): diff --git a/test/apps/test_index.py b/test/apps/test_index.py index c567dea..2be8470 100644 --- a/test/apps/test_index.py +++ b/test/apps/test_index.py @@ -5,10 +5,10 @@ A copy of the license is provided with the project. Author: Matthias Baumgartner, 2022 """ # imports +import contextlib import io import os import rdflib -import sys import unittest # bsie imports diff --git a/test/apps/test_info.py b/test/apps/test_info.py index 60a540e..ad39c64 100644 --- a/test/apps/test_info.py +++ b/test/apps/test_info.py @@ -6,9 +6,9 @@ Author: Matthias Baumgartner, 2022 """ # imports import argparse +import contextlib import io import os -import sys import unittest # objects to test @@ -19,10 +19,10 @@ from bsie.apps.info import main class TestIndex(unittest.TestCase): def test_predicates(self): - stdout, sys.stdout = sys.stdout, io.StringIO() - # show predicates infos - main(['predicates']) - outbuf, sys.stdout = sys.stdout, stdout + outbuf = io.StringIO() + with contextlib.redirect_stdout(outbuf): + # show predicates infos + main(['predicates']) # verify output self.assertSetEqual({pred for pred in outbuf.getvalue().split('\n') if pred != ''}, { 'http://bsfs.ai/schema/Entity#author', @@ -32,7 +32,8 @@ class TestIndex(unittest.TestCase): }) def test_invalid(self): - self.assertRaises(SystemExit, main, ['foobar']) + with contextlib.redirect_stderr(io.StringIO()): + self.assertRaises(SystemExit, main, ['foobar']) ## main ## -- cgit v1.2.3 From 22896f662ed49dd9fa283af2b3dad9e4ec6dd340 Mon Sep 17 00:00:00 2001 From: Matthias Baumgartner Date: Fri, 16 Dec 2022 10:12:25 +0100 Subject: setup files --- bsie.toml | 11 +++++++++++ setup.py | 20 ++++++++++++++++++++ 2 files changed, 31 insertions(+) create mode 100644 bsie.toml create mode 100644 setup.py diff --git a/bsie.toml b/bsie.toml new file mode 100644 index 0000000..10b0f37 --- /dev/null +++ b/bsie.toml @@ -0,0 +1,11 @@ +[project] +name = "bsie" +description = "Extract information from files and store them in a BSFS." +version = "0.0.1" +license = {text = "BSD 3-Clause License"} +authors = [{name='Matthias Baumgartner', email="dev@igsor.net"}] +dependencies = [ + "rdflib", + "bsfs", +] +requires-python = ">=3.7" diff --git a/setup.py b/setup.py new file mode 100644 index 0000000..ee9e0fd --- /dev/null +++ b/setup.py @@ -0,0 +1,20 @@ + +from setuptools import setup +import os + +setup( + name='bsie', + version='0.0.1', + author='Matthias Baumgartner', + author_email='dev@igsor.net', + description='Extract information from files and store them in a BSFS.', + long_description=open(os.path.join(os.path.dirname(__file__), 'README')).read(), + license='BSD', + license_files=('LICENSE', ), + url='https://www.igsor.net/projects/blackstar/bsie/', + download_url='https://pip.igsor.net', + packages=('bsie', ), + install_requires=('rdflib', 'bsfs'), + python_requires=">=3.7", +) + -- cgit v1.2.3 From 5850ff2bcb1052883cf301590126609b0657fbc9 Mon Sep 17 00:00:00 2001 From: Matthias Baumgartner Date: Sun, 18 Dec 2022 13:37:02 +0100 Subject: cosmetic changes --- bsie/__init__.py | 2 +- bsie/apps/index.py | 28 +++++++++++----------------- bsie/extractor/generic/constant.py | 2 +- bsie/extractor/generic/path.py | 3 ++- bsie/extractor/generic/stat.py | 2 +- test/apps/test_info.py | 1 - 6 files changed, 16 insertions(+), 22 deletions(-) diff --git a/bsie/__init__.py b/bsie/__init__.py index 96e6953..8d2308c 100644 --- a/bsie/__init__.py +++ b/bsie/__init__.py @@ -9,7 +9,7 @@ import collections import typing # constants -T_VERSION_INFO = collections.namedtuple('T_VERSION_INFO', ('major', 'minor', 'micro')) +T_VERSION_INFO = collections.namedtuple('T_VERSION_INFO', ('major', 'minor', 'micro')) # pylint: disable=invalid-name version_info = T_VERSION_INFO(0, 0, 1) # exports diff --git a/bsie/apps/index.py b/bsie/apps/index.py index e37684b..1dbfdd8 100644 --- a/bsie/apps/index.py +++ b/bsie/apps/index.py @@ -98,23 +98,17 @@ def main(argv): walk(print) return None - else: - # initialize bsfs - # NOTE: With presistent storages, the schema migration will be a seaparte operation. - # Here, we'd simply examine the schema and potentially discard more predicates. - store = bsfs.Open({ - 'Graph': { - 'user': args.user, - 'backend': { - 'SparqlStore': {}}, - }}) - store.migrate(bsie.schema) - # process files - def handle(node, pred, value): - store.node(node.node_type, node.uri).set(pred.uri, value) - walk(handle) - # return store - return store + # initialize bsfs + # NOTE: With presistent storages, the schema migration will be a seaparte operation. + # Here, we'd simply examine the schema and potentially discard more predicates. + store = bsfs.Open(bsfs.init_sparql_store(args.user)) + store.migrate(bsie.schema) + # process files + def handle(node, pred, value): + store.node(node.node_type, node.uri).set(pred.uri, value) + walk(handle) + # return store + return store diff --git a/bsie/extractor/generic/constant.py b/bsie/extractor/generic/constant.py index cdb2ef6..11384e6 100644 --- a/bsie/extractor/generic/constant.py +++ b/bsie/extractor/generic/constant.py @@ -35,7 +35,7 @@ class Constant(extractor.Extractor): super().__init__(bsfs.schema.Schema.from_string(extractor.SCHEMA_PREAMBLE + schema)) # NOTE: Raises a KeyError if the predicate is not part of the schema self._tuples = tuple((self.schema.predicate(p_uri), value) for p_uri, value in tuples) - # FIXME: use schema instance for value checking + # TODO: use schema instance for value checking def __eq__(self, other: typing.Any) -> bool: return super().__eq__(other) \ diff --git a/bsie/extractor/generic/path.py b/bsie/extractor/generic/path.py index 23ae80b..7018e12 100644 --- a/bsie/extractor/generic/path.py +++ b/bsie/extractor/generic/path.py @@ -62,7 +62,8 @@ class Path(extractor.Extractor): def __filename(self, path: str) -> typing.Optional[str]: try: return os.path.basename(path) - except Exception: # some error, skip. + except Exception: # pylint: disable=broad-except # we explicitly want to catch everything + # some error, skip # FIXME: some kind of error reporting (e.g. logging)? # Options: (a) Fail silently (current); (b) Skip and report to log; # (c) Raise ExtractorError (aborts extraction); (d) separate content type diff --git a/bsie/extractor/generic/stat.py b/bsie/extractor/generic/stat.py index 1dcfedf..0b9ce29 100644 --- a/bsie/extractor/generic/stat.py +++ b/bsie/extractor/generic/stat.py @@ -63,7 +63,7 @@ class Stat(extractor.Extractor): """Return the file size.""" try: return content.st_size - except Exception: + except Exception: # pylint: disable=broad-except # we explicitly want to catch everything # FIXME: some kind of error reporting (e.g. logging) return None diff --git a/test/apps/test_info.py b/test/apps/test_info.py index ad39c64..6f4d98f 100644 --- a/test/apps/test_info.py +++ b/test/apps/test_info.py @@ -8,7 +8,6 @@ Author: Matthias Baumgartner, 2022 import argparse import contextlib import io -import os import unittest # objects to test -- cgit v1.2.3 From 057e09d6537bf5c39815661a75819081e3e5fda7 Mon Sep 17 00:00:00 2001 From: Matthias Baumgartner Date: Sun, 18 Dec 2022 13:37:59 +0100 Subject: adaptions to updates in bsfs --- bsie/tools/pipeline.py | 7 +++++-- bsie/utils/bsfs.py | 2 ++ bsie/utils/namespaces.py | 8 ++++---- test/apps/test_index.py | 4 ++-- test/lib/test_bsie.py | 4 ++-- test/tools/test_pipeline.py | 2 +- 6 files changed, 16 insertions(+), 11 deletions(-) diff --git a/bsie/tools/pipeline.py b/bsie/tools/pipeline.py index 52ce526..20e8ddf 100644 --- a/bsie/tools/pipeline.py +++ b/bsie/tools/pipeline.py @@ -18,6 +18,9 @@ __all__: typing.Sequence[str] = ( 'Pipeline', ) +# constants +FILE_PREFIX = 'file#' + ## code ## logger = logging.getLogger(__name__) @@ -48,7 +51,7 @@ class Pipeline(): ext2rdr: typing.Dict[base.extractor.Extractor, typing.Optional[base.reader.Reader]] ): # store core members - self._prefix = prefix + self._prefix = prefix + FILE_PREFIX self._ext2rdr = ext2rdr # compile schema from all extractors self._schema = bsfs.schema.Schema.Union(ext.schema for ext in ext2rdr) @@ -114,7 +117,7 @@ class Pipeline(): # create subject for file uuid = bsfs.uuid.UCID.from_path(path) - subject = node.Node(ns.bsfs.File, self._prefix + 'file#' + uuid) + subject = node.Node(ns.bsfs.File, self._prefix[uuid]) # extract information for rdr, extrs in rdr2ext.items(): diff --git a/bsie/utils/bsfs.py b/bsie/utils/bsfs.py index c48049d..0b88479 100644 --- a/bsie/utils/bsfs.py +++ b/bsie/utils/bsfs.py @@ -9,6 +9,7 @@ import typing # bsfs imports from bsfs import Open, schema +from bsfs.apps.init import init_sparql_store from bsfs.namespace import Namespace from bsfs.utils import URI, typename, uuid @@ -17,6 +18,7 @@ __all__: typing.Sequence[str] = ( 'Namespace', 'Open', 'URI', + 'init_sparql_store', 'schema', 'typename', 'uuid', diff --git a/bsie/utils/namespaces.py b/bsie/utils/namespaces.py index d6e1c72..a29fc1b 100644 --- a/bsie/utils/namespaces.py +++ b/bsie/utils/namespaces.py @@ -11,10 +11,10 @@ import typing from . import bsfs as _bsfs # constants -bse = _bsfs.Namespace('http://bsfs.ai/schema/Entity#') -bsfs = _bsfs.Namespace('http://bsfs.ai/schema/') -bsm = _bsfs.Namespace('http://bsfs.ai/schema/Meta#') -xsd = _bsfs.Namespace('http://www.w3.org/2001/XMLSchema#') +bse = _bsfs.Namespace('http://bsfs.ai/schema/Entity') +bsfs = _bsfs.Namespace('http://bsfs.ai/schema', fsep='/') +bsm = _bsfs.Namespace('http://bsfs.ai/schema/Meta') +xsd = _bsfs.Namespace('http://www.w3.org/2001/XMLSchema') # export __all__: typing.Sequence[str] = ( diff --git a/test/apps/test_index.py b/test/apps/test_index.py index 2be8470..9cdc656 100644 --- a/test/apps/test_index.py +++ b/test/apps/test_index.py @@ -30,7 +30,7 @@ class TestIndex(unittest.TestCase): ]) prefix = 'http://example.com/me/file#' - self.assertTrue(set(bsfs._Graph__backend.graph).issuperset({ + self.assertTrue(set(bsfs._backend._graph).issuperset({ (rdflib.URIRef(prefix + '2f4109b40107cc50e0884755a1a961ed126887e49b8dbaf0e146b2e226aa6647'), rdflib.RDF.type, rdflib.URIRef(ns.bsfs.File)), (rdflib.URIRef(prefix + '2f4109b40107cc50e0884755a1a961ed126887e49b8dbaf0e146b2e226aa6647'), rdflib.URIRef(ns.bse.author), rdflib.Literal('Me, myself, and I', datatype=rdflib.XSD.string)), (rdflib.URIRef(prefix + '2f4109b40107cc50e0884755a1a961ed126887e49b8dbaf0e146b2e226aa6647'), rdflib.URIRef(ns.bse.filename), rdflib.Literal('alpha_second', datatype=rdflib.XSD.string)), @@ -90,7 +90,7 @@ class TestIndex(unittest.TestCase): # (rdflib.URIRef(prefix + 'd43758ace82154a1cc10ca0dfef63cb20dd831f9c87edd6dc06539eefe67371d'), rdflib.URIRef(ns.bsm.t_created), rdflib.Literal('1670..........', datatype=rdflib.XSD.integer)), # (rdflib.URIRef(prefix + 'd803187cbf3676ae9d38126270a6152c60431589aa3bb3824baf8954e9c097f1'), rdflib.URIRef(ns.bsm.t_created), rdflib.Literal('1670..........', datatype=rdflib.XSD.integer)), # instead, we simply check if there's such a predicate for each file - self.assertSetEqual({sub for sub, _ in bsfs._Graph__backend.graph.subject_objects(rdflib.URIRef(ns.bsm.t_created))}, { + self.assertSetEqual({sub for sub, _ in bsfs._backend._graph.subject_objects(rdflib.URIRef(ns.bsm.t_created))}, { rdflib.URIRef(prefix + '2f4109b40107cc50e0884755a1a961ed126887e49b8dbaf0e146b2e226aa6647'), rdflib.URIRef(prefix + '441f3d10c8ff489fe8e33e639606512f6c463151cc429de7e554b9af670c2ece'), rdflib.URIRef(prefix + '69b98ecf7aff3e95b09688ba93331678eb8397817111f674c9558e6dd8f5e871'), diff --git a/test/lib/test_bsie.py b/test/lib/test_bsie.py index f3f476e..771a0c2 100644 --- a/test/lib/test_bsie.py +++ b/test/lib/test_bsie.py @@ -38,7 +38,7 @@ class TestBSIE(unittest.TestCase): )}, ]) # build pipeline - self.prefix = bsfs.Namespace('http://example.com/local/file#') + self.prefix = bsfs.Namespace('http://example.com/local/') pbuild = builder.PipelineBuilder(self.prefix, rbuild, ebuild) self.pipeline = pbuild.build() @@ -155,7 +155,7 @@ class TestBSIE(unittest.TestCase): ns.bse.author, }) content_hash = 'a948904f2f0f479b8f8197694b30184b0d2ed1c1cd2a1ec0fb85d299a192a447' - subject = node.Node(ns.bsfs.File, self.prefix + 'file#' + content_hash) + subject = node.Node(ns.bsfs.File, (self.prefix + 'file#')[content_hash]) testfile = os.path.join(os.path.dirname(__file__), 'testfile.t') # from_file extracts all available triples diff --git a/test/tools/test_pipeline.py b/test/tools/test_pipeline.py index 91bf736..a116a30 100644 --- a/test/tools/test_pipeline.py +++ b/test/tools/test_pipeline.py @@ -94,7 +94,7 @@ class TestPipeline(unittest.TestCase): pipeline = Pipeline(self.prefix, self.ext2rdr) # build objects for tests content_hash = 'a948904f2f0f479b8f8197694b30184b0d2ed1c1cd2a1ec0fb85d299a192a447' - subject = node.Node(ns.bsfs.File, self.prefix + 'file#' + content_hash) + subject = node.Node(ns.bsfs.File, (self.prefix + 'file#')[content_hash]) testfile = os.path.join(os.path.dirname(__file__), 'testfile.t') p_filename = pipeline.schema.predicate(ns.bse.filename) p_filesize = pipeline.schema.predicate(ns.bse.filesize) -- cgit v1.2.3