aboutsummaryrefslogtreecommitdiffstats
path: root/bsie/lib/bsie.py
diff options
context:
space:
mode:
Diffstat (limited to 'bsie/lib/bsie.py')
-rw-r--r--bsie/lib/bsie.py92
1 files changed, 92 insertions, 0 deletions
diff --git a/bsie/lib/bsie.py b/bsie/lib/bsie.py
new file mode 100644
index 0000000..e087fa9
--- /dev/null
+++ b/bsie/lib/bsie.py
@@ -0,0 +1,92 @@
+"""
+
+Part of the bsie module.
+A copy of the license is provided with the project.
+Author: Matthias Baumgartner, 2022
+"""
+# imports
+import typing
+
+# bsie imports
+from bsie.tools import Pipeline
+from bsie.utils import bsfs, node, ns
+
+# exports
+__all__: typing.Sequence[str] = (
+ 'BSIE',
+ )
+
+
+## code ##
+
+class BSIE():
+ """Extract triples from files.
+
+ Controls which predicates to extract (*collect*) and
+ which to not extract (*discard*). Note that this only affects
+ principal predicates not auxililary predicates like, e.g., tag labels.
+
+ """
+
+ # pipeline
+ _pipeline: Pipeline
+
+ # predicates to extract.
+ _principals: typing.Set[bsfs.URI]
+
+ # local schema.
+ _schema: bsfs.schema.Schema
+
+ def __init__(
+ self,
+ # pipeline builder.
+ pipeline: Pipeline,
+ # principals to extract at most. None implies all available w.r.t. extractors.
+ collect: typing.Optional[typing.Iterable[bsfs.URI]] = None,
+ # principals to discard.
+ discard: typing.Optional[typing.Iterable[bsfs.URI]] = None,
+ ):
+ # store pipeline
+ self._pipeline = pipeline
+ # start off with available principals
+ self._principals = {pred.uri for pred in self._pipeline.principals}
+ # limit principals to specified ones by argument.
+ if collect is not None:
+ collect = set(collect)
+ if len(collect) > 0:
+ self._principals &= collect
+ # discard principals.
+ if discard is not None:
+ self._principals -= set(discard)
+ # discard ns.bsfs.Predicate
+ self._principals.discard(ns.bsfs.Predicate)
+ # compile a schema that only contains the requested principals (and auxiliary predicates)
+ self._schema = self._pipeline.subschema(
+ self._pipeline.schema.predicate(pred) for pred in self._principals)
+
+ @property
+ def schema(self) -> bsfs.schema.Schema:
+ """Return the BSIE schema."""
+ return self._schema
+
+ @property
+ def principals(self) -> typing.Iterator[bsfs.URI]:
+ """Return an iterator to the principal predicates."""
+ return iter(self._principals)
+
+ def from_file(
+ self,
+ path: bsfs.URI,
+ principals: typing.Optional[typing.Iterable[bsfs.URI]] = None,
+ ) -> typing.Iterator[typing.Tuple[node.Node, bsfs.URI, typing.Any]]:
+ """Produce triples for a given *path*. Limit to *principals* if given."""
+ # get requested principals.
+ principals = set(principals) if principals is not None else self._principals
+ # filter through requested principals.
+ principals &= self._principals
+ # predicate lookup
+ principals = {self.schema.predicate(pred) for pred in principals}
+ # invoke pipeline
+ yield from self._pipeline(path, principals)
+
+## EOF ##