aboutsummaryrefslogtreecommitdiffstats
path: root/bsie/base/extractor.py
diff options
context:
space:
mode:
Diffstat (limited to 'bsie/base/extractor.py')
-rw-r--r--bsie/base/extractor.py50
1 files changed, 50 insertions, 0 deletions
diff --git a/bsie/base/extractor.py b/bsie/base/extractor.py
new file mode 100644
index 0000000..d5b0922
--- /dev/null
+++ b/bsie/base/extractor.py
@@ -0,0 +1,50 @@
+"""The Extractor classes transform content into triples.
+
+Part of the bsie module.
+A copy of the license is provided with the project.
+Author: Matthias Baumgartner, 2022
+"""
+# imports
+import abc
+import collections
+import typing
+
+# inner-module imports
+from . import reader
+from bsie.utils import node
+from bsie.utils.bsfs import URI, typename
+
+# exports
+__all__: typing.Sequence[str] = (
+ 'Extractor',
+ )
+
+
+## code ##
+
+class Extractor(abc.ABC, collections.abc.Iterable, collections.abc.Callable):
+ """Produce (node, predicate, value)-triples from some content."""
+
+ # what type of content is expected (i.e. reader subclass).
+ CONTENT_READER: typing.Optional[typing.Type[reader.Reader]] = None
+
+ def __str__(self) -> str:
+ return typename(self)
+
+ def __repr__(self) -> str:
+ return f'{typename(self)}()'
+
+ @abc.abstractmethod
+ def schema(self) -> str:
+ """Return the schema (predicates and nodes) produced by this Extractor."""
+
+ @abc.abstractmethod
+ def extract(
+ self,
+ subject: node.Node,
+ content: typing.Any,
+ predicates: typing.Iterable[URI],
+ ) -> typing.Iterator[typing.Tuple[node.Node, URI, typing.Any]]:
+ """Return (node, predicate, value) triples."""
+
+## EOF ##