aboutsummaryrefslogtreecommitdiffstats
path: root/bsie/base
diff options
context:
space:
mode:
authorMatthias Baumgartner <dev@igsor.net>2022-12-18 14:22:31 +0100
committerMatthias Baumgartner <dev@igsor.net>2022-12-18 14:22:31 +0100
commit7582c280ad5324a2f0427999911c7e7abc14a6ab (patch)
tree0a59bbfe1c44d3497daad9f25ff9e7eb2bf9eb82 /bsie/base
parentcb49e4567a18de6851286ff672e54f9a91865fe9 (diff)
parent057e09d6537bf5c39815661a75819081e3e5fda7 (diff)
downloadbsie-7582c280ad5324a2f0427999911c7e7abc14a6ab.tar.gz
bsie-7582c280ad5324a2f0427999911c7e7abc14a6ab.tar.bz2
bsie-7582c280ad5324a2f0427999911c7e7abc14a6ab.zip
Merge branch 'develop' into main
Diffstat (limited to 'bsie/base')
-rw-r--r--bsie/base/__init__.py24
-rw-r--r--bsie/base/errors.py42
-rw-r--r--bsie/base/extractor.py103
-rw-r--r--bsie/base/reader.py47
4 files changed, 216 insertions, 0 deletions
diff --git a/bsie/base/__init__.py b/bsie/base/__init__.py
new file mode 100644
index 0000000..0d362cd
--- /dev/null
+++ b/bsie/base/__init__.py
@@ -0,0 +1,24 @@
+"""The base module defines the BSIE interfaces.
+
+You'll mostly find abstract classes here.
+
+Part of the bsie module.
+A copy of the license is provided with the project.
+Author: Matthias Baumgartner, 2022
+"""
+# imports
+import typing
+
+# inner-module imports
+from . import errors
+from .extractor import Extractor
+from .reader import Reader
+
+# exports
+__all__: typing.Sequence[str] = (
+ 'Extractor',
+ 'Reader',
+ 'errors',
+ )
+
+## EOF ##
diff --git a/bsie/base/errors.py b/bsie/base/errors.py
new file mode 100644
index 0000000..dc3c30e
--- /dev/null
+++ b/bsie/base/errors.py
@@ -0,0 +1,42 @@
+"""Common BSIE exceptions.
+
+Part of the bsie module.
+A copy of the license is provided with the project.
+Author: Matthias Baumgartner, 2022
+"""
+# imports
+import typing
+
+# exports
+__all__: typing.Sequence[str] = (
+ 'BuilderError',
+ 'ExtractorError',
+ 'LoaderError',
+ 'ReaderError',
+ )
+
+
+## code ##
+
+class _BSIEError(Exception):
+ """Generic BSIE error."""
+
+class BuilderError(_BSIEError):
+ """The Builder failed to create an instance."""
+
+class LoaderError(BuilderError):
+ """Failed to load a module or class."""
+
+class ExtractorError(_BSIEError):
+ """The Extractor failed to process the given content."""
+
+class ReaderError(_BSIEError):
+ """The Reader failed to read the given file."""
+
+class ProgrammingError(_BSIEError):
+ """An assertion-like error that indicates a code-base issue."""
+
+class UnreachableError(ProgrammingError):
+ """Bravo, you've reached a point in code that should logically not be reachable."""
+
+## EOF ##
diff --git a/bsie/base/extractor.py b/bsie/base/extractor.py
new file mode 100644
index 0000000..c44021b
--- /dev/null
+++ b/bsie/base/extractor.py
@@ -0,0 +1,103 @@
+"""The Extractor classes transform content into triples.
+
+Part of the bsie module.
+A copy of the license is provided with the project.
+Author: Matthias Baumgartner, 2022
+"""
+# imports
+import abc
+import typing
+
+# bsie imports
+from bsie.utils import bsfs, node, ns
+
+# exports
+__all__: typing.Sequence[str] = (
+ 'Extractor',
+ )
+
+# constants
+
+# essential definitions typically used in extractor schemas.
+# NOTE: This preamble is only for convenience; Each Extractor must implement its use, if so desired.
+SCHEMA_PREAMBLE = '''
+ # common external prefixes
+ prefix rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
+ prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#>
+ prefix xsd: <http://www.w3.org/2001/XMLSchema#>
+ prefix schema: <http://schema.org/>
+
+ # common bsfs prefixes
+ prefix bsfs: <http://bsfs.ai/schema/>
+ prefix bse: <http://bsfs.ai/schema/Entity#>
+
+ # essential nodes
+ bsfs:Entity rdfs:subClassOf bsfs:Node .
+ bsfs:File rdfs:subClassOf bsfs:Entity .
+
+ # common definitions
+ xsd:string rdfs:subClassOf bsfs:Literal .
+ xsd:integer rdfs:subClassOf bsfs:Literal .
+
+ '''
+
+
+## code ##
+
+class Extractor(abc.ABC):
+ """Produce (subject, predicate, value)-triples from some content.
+ The Extractor produces princpal predicates that provide information
+ about the content itself (i.e., triples that include the subject),
+ and may also generate triples with auxiliary predicates if the
+ extracted value is a node itself.
+ """
+
+ # what type of content is expected (i.e. reader subclass).
+ CONTENT_READER: typing.Optional[str] = None
+
+ # extractor schema.
+ _schema: bsfs.schema.Schema
+
+ def __init__(self, schema: bsfs.schema.Schema):
+ self._schema = schema
+
+ def __str__(self) -> str:
+ return bsfs.typename(self)
+
+ def __repr__(self) -> str:
+ return f'{bsfs.typename(self)}()'
+
+ def __eq__(self, other: typing.Any) -> bool:
+ return isinstance(other, type(self)) \
+ and self.CONTENT_READER == other.CONTENT_READER \
+ and self.schema == other.schema
+
+ def __hash__(self) -> int:
+ return hash((type(self), self.CONTENT_READER, self.schema))
+
+ @property
+ def schema(self) -> bsfs.schema.Schema:
+ """Return the extractor's schema."""
+ return self._schema
+
+ @property
+ def principals(self) -> typing.Iterator[bsfs.schema.Predicate]:
+ """Return the principal predicates, i.e., relations from/to the extraction subject."""
+ ent = self.schema.node(ns.bsfs.Entity)
+ return (
+ pred
+ for pred
+ in self.schema.predicates()
+ if pred.domain <= ent or (pred.range is not None and pred.range <= ent)
+ )
+
+ @abc.abstractmethod
+ def extract(
+ self,
+ subject: node.Node,
+ content: typing.Any,
+ principals: typing.Iterable[bsfs.schema.Predicate],
+ ) -> typing.Iterator[typing.Tuple[node.Node, bsfs.schema.Predicate, typing.Any]]:
+ """Return (node, predicate, value) triples."""
+
+## EOF ##
diff --git a/bsie/base/reader.py b/bsie/base/reader.py
new file mode 100644
index 0000000..cbabd36
--- /dev/null
+++ b/bsie/base/reader.py
@@ -0,0 +1,47 @@
+"""The Reader classes return high-level content structures from files.
+
+The Reader fulfills two purposes:
+ First, it brokers between multiple libraries and file formats.
+ Second, it separates multiple aspects of a file into distinct content types.
+
+Part of the bsie module.
+A copy of the license is provided with the project.
+Author: Matthias Baumgartner, 2022
+"""
+# imports
+import abc
+import typing
+
+# bsie imports
+from bsie.utils import bsfs
+
+# exports
+__all__: typing.Sequence[str] = (
+ 'Reader',
+ )
+
+
+## code ##
+
+class Reader(abc.ABC):
+ """Read and return some content from a file."""
+
+ def __str__(self) -> str:
+ return bsfs.typename(self)
+
+ def __repr__(self) -> str:
+ return f'{bsfs.typename(self)}()'
+
+ def __eq__(self, other: typing.Any) -> bool:
+ return isinstance(other, type(self))
+
+ def __hash__(self) -> int:
+ return hash(type(self))
+
+ @abc.abstractmethod
+ def __call__(self, path: bsfs.URI) -> typing.Any:
+ """Return some content of the file at *path*.
+ Raises a `ReaderError` if the reader cannot make sense of the file format.
+ """
+
+## EOF ##