aboutsummaryrefslogtreecommitdiffstats
path: root/bsie/base/extractor.py
diff options
context:
space:
mode:
authorMatthias Baumgartner <dev@igsor.net>2022-10-31 12:21:37 +0100
committerMatthias Baumgartner <dev@igsor.net>2022-10-31 12:21:37 +0100
commitce848b215086bd8d0f3e67a25a08239386b11ddb (patch)
tree68b1eaba6c2b702dc51e15a6e273b845ef251c89 /bsie/base/extractor.py
parentcb49e4567a18de6851286ff672e54f9a91865fe9 (diff)
parentd2b4a528465dc01e8db92b61293c458c7911a333 (diff)
downloadbsie-ce848b215086bd8d0f3e67a25a08239386b11ddb.tar.gz
bsie-ce848b215086bd8d0f3e67a25a08239386b11ddb.tar.bz2
bsie-ce848b215086bd8d0f3e67a25a08239386b11ddb.zip
Merge branch 'mb/arch' into develop
Diffstat (limited to 'bsie/base/extractor.py')
-rw-r--r--bsie/base/extractor.py50
1 files changed, 50 insertions, 0 deletions
diff --git a/bsie/base/extractor.py b/bsie/base/extractor.py
new file mode 100644
index 0000000..d5b0922
--- /dev/null
+++ b/bsie/base/extractor.py
@@ -0,0 +1,50 @@
+"""The Extractor classes transform content into triples.
+
+Part of the bsie module.
+A copy of the license is provided with the project.
+Author: Matthias Baumgartner, 2022
+"""
+# imports
+import abc
+import collections
+import typing
+
+# inner-module imports
+from . import reader
+from bsie.utils import node
+from bsie.utils.bsfs import URI, typename
+
+# exports
+__all__: typing.Sequence[str] = (
+ 'Extractor',
+ )
+
+
+## code ##
+
+class Extractor(abc.ABC, collections.abc.Iterable, collections.abc.Callable):
+ """Produce (node, predicate, value)-triples from some content."""
+
+ # what type of content is expected (i.e. reader subclass).
+ CONTENT_READER: typing.Optional[typing.Type[reader.Reader]] = None
+
+ def __str__(self) -> str:
+ return typename(self)
+
+ def __repr__(self) -> str:
+ return f'{typename(self)}()'
+
+ @abc.abstractmethod
+ def schema(self) -> str:
+ """Return the schema (predicates and nodes) produced by this Extractor."""
+
+ @abc.abstractmethod
+ def extract(
+ self,
+ subject: node.Node,
+ content: typing.Any,
+ predicates: typing.Iterable[URI],
+ ) -> typing.Iterator[typing.Tuple[node.Node, URI, typing.Any]]:
+ """Return (node, predicate, value) triples."""
+
+## EOF ##