From d2b4a528465dc01e8db92b61293c458c7911a333 Mon Sep 17 00:00:00 2001 From: Matthias Baumgartner Date: Mon, 31 Oct 2022 12:21:22 +0100 Subject: essential interfaces (reader, extractor, errors) --- bsie/__init__.py | 13 +++++++++++++ bsie/base/__init__.py | 24 ++++++++++++++++++++++++ bsie/base/errors.py | 22 ++++++++++++++++++++++ bsie/base/extractor.py | 50 ++++++++++++++++++++++++++++++++++++++++++++++++++ bsie/base/reader.py | 48 ++++++++++++++++++++++++++++++++++++++++++++++++ bsie/utils/__init__.py | 20 ++++++++++++++++++++ bsie/utils/bsfs.py | 20 ++++++++++++++++++++ bsie/utils/node.py | 39 +++++++++++++++++++++++++++++++++++++++ 8 files changed, 236 insertions(+) create mode 100644 bsie/__init__.py create mode 100644 bsie/base/__init__.py create mode 100644 bsie/base/errors.py create mode 100644 bsie/base/extractor.py create mode 100644 bsie/base/reader.py create mode 100644 bsie/utils/__init__.py create mode 100644 bsie/utils/bsfs.py create mode 100644 bsie/utils/node.py diff --git a/bsie/__init__.py b/bsie/__init__.py new file mode 100644 index 0000000..2f2477a --- /dev/null +++ b/bsie/__init__.py @@ -0,0 +1,13 @@ +"""The BSIE module extracts triples from files for insertion into a BSFS storage. + +Part of the bsie module. +A copy of the license is provided with the project. +Author: Matthias Baumgartner, 2022 +""" +# imports +import typing + +# exports +__all__: typing.Sequence[str] = [] + +## EOF ## diff --git a/bsie/base/__init__.py b/bsie/base/__init__.py new file mode 100644 index 0000000..0154862 --- /dev/null +++ b/bsie/base/__init__.py @@ -0,0 +1,24 @@ +"""The base module defines the BSIE interfaces. + +You'll mostly find abstract classes here. + +Part of the bsie module. +A copy of the license is provided with the project. +Author: Matthias Baumgartner, 2022 +""" +# imports +import typing + +# inner-module imports +from . import errors +from . import extractor +from . import reader + +# exports +__all__: typing.Sequence[str] = ( + 'errors', + 'extractor', + 'reader', + ) + +## EOF ## diff --git a/bsie/base/errors.py b/bsie/base/errors.py new file mode 100644 index 0000000..f86ffb2 --- /dev/null +++ b/bsie/base/errors.py @@ -0,0 +1,22 @@ +"""Common BSIE exceptions. + +Part of the bsie module. +A copy of the license is provided with the project. +Author: Matthias Baumgartner, 2022 +""" +# imports +import typing + +# exports +__all__: typing.Sequence[str] = [] + + +## code ## + +class _BSIE_Error(Exception): + """Generic BSIE error.""" + +class ReaderError(_BSIE_Error): + """The Reader failed to read the given file.""" + +## EOF ## diff --git a/bsie/base/extractor.py b/bsie/base/extractor.py new file mode 100644 index 0000000..d5b0922 --- /dev/null +++ b/bsie/base/extractor.py @@ -0,0 +1,50 @@ +"""The Extractor classes transform content into triples. + +Part of the bsie module. +A copy of the license is provided with the project. +Author: Matthias Baumgartner, 2022 +""" +# imports +import abc +import collections +import typing + +# inner-module imports +from . import reader +from bsie.utils import node +from bsie.utils.bsfs import URI, typename + +# exports +__all__: typing.Sequence[str] = ( + 'Extractor', + ) + + +## code ## + +class Extractor(abc.ABC, collections.abc.Iterable, collections.abc.Callable): + """Produce (node, predicate, value)-triples from some content.""" + + # what type of content is expected (i.e. reader subclass). + CONTENT_READER: typing.Optional[typing.Type[reader.Reader]] = None + + def __str__(self) -> str: + return typename(self) + + def __repr__(self) -> str: + return f'{typename(self)}()' + + @abc.abstractmethod + def schema(self) -> str: + """Return the schema (predicates and nodes) produced by this Extractor.""" + + @abc.abstractmethod + def extract( + self, + subject: node.Node, + content: typing.Any, + predicates: typing.Iterable[URI], + ) -> typing.Iterator[typing.Tuple[node.Node, URI, typing.Any]]: + """Return (node, predicate, value) triples.""" + +## EOF ## diff --git a/bsie/base/reader.py b/bsie/base/reader.py new file mode 100644 index 0000000..f29e451 --- /dev/null +++ b/bsie/base/reader.py @@ -0,0 +1,48 @@ +"""The Reader classes return high-level content structures from files. + +The Reader fulfills two purposes: + First, it brokers between multiple libraries and file formats. + Second, it separates multiple aspects of a file into distinct content types. + +Part of the bsie module. +A copy of the license is provided with the project. +Author: Matthias Baumgartner, 2022 +""" +# imports +import abc +import typing + +# inner-module imports +from bsie.utils.bsfs import URI, typename + +# exports +__all__: typing.Sequence[str] = ( + 'Aggregator', + 'Reader', + ) + + +## code ## + +class Reader(abc.ABC): + """Read and return some content from a file.""" + + # In what data structure content is returned + CONTENT_TYPE = typing.Union[typing.Any] + # NOTE: Child classes must also assign a typing.Union even if there's + # only one options + + def __str__(self) -> str: + return typename(self) + + def __repr__(self) -> str: + return f'{typename(self)}()' + + # FIXME: How about using contexts instead of calls? + @abc.abstractmethod + def __call__(self, path: URI) -> CONTENT_TYPE: + """Return some content of the file at *path*. + Raises a `ReaderError` if the reader cannot make sense of the file format. + """ + +## EOF ## diff --git a/bsie/utils/__init__.py b/bsie/utils/__init__.py new file mode 100644 index 0000000..1137187 --- /dev/null +++ b/bsie/utils/__init__.py @@ -0,0 +1,20 @@ +"""Common tools and definitions. + +Part of the bsie module. +A copy of the license is provided with the project. +Author: Matthias Baumgartner, 2022 +""" +# imports +import typing + +# inner-module imports +from . import bsfs +from . import node + +# exports +__all__: typing.Sequence[str] = ( + 'bsfs', + 'node', + ) + +## EOF ## diff --git a/bsie/utils/bsfs.py b/bsie/utils/bsfs.py new file mode 100644 index 0000000..33eb178 --- /dev/null +++ b/bsie/utils/bsfs.py @@ -0,0 +1,20 @@ +"""BSFS bridge, provides BSFS bindings for BSIE. + +Part of the bsie module. +A copy of the license is provided with the project. +Author: Matthias Baumgartner, 2022 +""" +# imports +import typing + +# bsfs imports +from bsfs.utils import URI +from bsfs.utils import typename + +# exports +__all__: typing.Sequence[str] = ( + 'URI', + 'typename', + ) + +## EOF ## diff --git a/bsie/utils/node.py b/bsie/utils/node.py new file mode 100644 index 0000000..60863a4 --- /dev/null +++ b/bsie/utils/node.py @@ -0,0 +1,39 @@ +"""Lighweight Node to bridge to BSFS. + +Part of the bsie module. +A copy of the license is provided with the project. +Author: Matthias Baumgartner, 2022 +""" +# imports +import typing + +# inner-module imports +from bsie.utils.bsfs import URI + +# exports +__all__: typing.Sequence[str] = ( + 'Node' + ) + + +## code ## + +class Node(): + """Lightweight Node, disconnected from any bsfs structures.""" + + # node type. + node_type: URI + + # node URI. + uri: URI + + def __init__( + self, + node_type: URI, + uri: URI, + ): + # assign members + self.node_type = URI(node_type) + self.uri = URI(uri) + +## EOF ## -- cgit v1.2.3