aboutsummaryrefslogtreecommitdiffstats
path: root/bsie/lib
diff options
context:
space:
mode:
authorMatthias Baumgartner <dev@igsor.net>2022-12-18 14:22:31 +0100
committerMatthias Baumgartner <dev@igsor.net>2022-12-18 14:22:31 +0100
commit7582c280ad5324a2f0427999911c7e7abc14a6ab (patch)
tree0a59bbfe1c44d3497daad9f25ff9e7eb2bf9eb82 /bsie/lib
parentcb49e4567a18de6851286ff672e54f9a91865fe9 (diff)
parent057e09d6537bf5c39815661a75819081e3e5fda7 (diff)
downloadbsie-7582c280ad5324a2f0427999911c7e7abc14a6ab.tar.gz
bsie-7582c280ad5324a2f0427999911c7e7abc14a6ab.tar.bz2
bsie-7582c280ad5324a2f0427999911c7e7abc14a6ab.zip
Merge branch 'develop' into main
Diffstat (limited to 'bsie/lib')
-rw-r--r--bsie/lib/__init__.py18
-rw-r--r--bsie/lib/bsie.py92
2 files changed, 110 insertions, 0 deletions
diff --git a/bsie/lib/__init__.py b/bsie/lib/__init__.py
new file mode 100644
index 0000000..578c2c4
--- /dev/null
+++ b/bsie/lib/__init__.py
@@ -0,0 +1,18 @@
+"""
+
+Part of the bsie module.
+A copy of the license is provided with the project.
+Author: Matthias Baumgartner, 2022
+"""
+# imports
+import typing
+
+# inner-module imports
+from .bsie import BSIE
+
+# exports
+__all__: typing.Sequence[str] = (
+ 'BSIE',
+ )
+
+## EOF ##
diff --git a/bsie/lib/bsie.py b/bsie/lib/bsie.py
new file mode 100644
index 0000000..e087fa9
--- /dev/null
+++ b/bsie/lib/bsie.py
@@ -0,0 +1,92 @@
+"""
+
+Part of the bsie module.
+A copy of the license is provided with the project.
+Author: Matthias Baumgartner, 2022
+"""
+# imports
+import typing
+
+# bsie imports
+from bsie.tools import Pipeline
+from bsie.utils import bsfs, node, ns
+
+# exports
+__all__: typing.Sequence[str] = (
+ 'BSIE',
+ )
+
+
+## code ##
+
+class BSIE():
+ """Extract triples from files.
+
+ Controls which predicates to extract (*collect*) and
+ which to not extract (*discard*). Note that this only affects
+ principal predicates not auxililary predicates like, e.g., tag labels.
+
+ """
+
+ # pipeline
+ _pipeline: Pipeline
+
+ # predicates to extract.
+ _principals: typing.Set[bsfs.URI]
+
+ # local schema.
+ _schema: bsfs.schema.Schema
+
+ def __init__(
+ self,
+ # pipeline builder.
+ pipeline: Pipeline,
+ # principals to extract at most. None implies all available w.r.t. extractors.
+ collect: typing.Optional[typing.Iterable[bsfs.URI]] = None,
+ # principals to discard.
+ discard: typing.Optional[typing.Iterable[bsfs.URI]] = None,
+ ):
+ # store pipeline
+ self._pipeline = pipeline
+ # start off with available principals
+ self._principals = {pred.uri for pred in self._pipeline.principals}
+ # limit principals to specified ones by argument.
+ if collect is not None:
+ collect = set(collect)
+ if len(collect) > 0:
+ self._principals &= collect
+ # discard principals.
+ if discard is not None:
+ self._principals -= set(discard)
+ # discard ns.bsfs.Predicate
+ self._principals.discard(ns.bsfs.Predicate)
+ # compile a schema that only contains the requested principals (and auxiliary predicates)
+ self._schema = self._pipeline.subschema(
+ self._pipeline.schema.predicate(pred) for pred in self._principals)
+
+ @property
+ def schema(self) -> bsfs.schema.Schema:
+ """Return the BSIE schema."""
+ return self._schema
+
+ @property
+ def principals(self) -> typing.Iterator[bsfs.URI]:
+ """Return an iterator to the principal predicates."""
+ return iter(self._principals)
+
+ def from_file(
+ self,
+ path: bsfs.URI,
+ principals: typing.Optional[typing.Iterable[bsfs.URI]] = None,
+ ) -> typing.Iterator[typing.Tuple[node.Node, bsfs.URI, typing.Any]]:
+ """Produce triples for a given *path*. Limit to *principals* if given."""
+ # get requested principals.
+ principals = set(principals) if principals is not None else self._principals
+ # filter through requested principals.
+ principals &= self._principals
+ # predicate lookup
+ principals = {self.schema.predicate(pred) for pred in principals}
+ # invoke pipeline
+ yield from self._pipeline(path, principals)
+
+## EOF ##