aboutsummaryrefslogtreecommitdiffstats
path: root/bsie/lib/pipeline.py
diff options
context:
space:
mode:
authorMatthias Baumgartner <dev@igsor.net>2023-02-08 19:25:19 +0100
committerMatthias Baumgartner <dev@igsor.net>2023-02-08 19:25:19 +0100
commit7bf6b33fa6d6b901e4933bfe0b2a9939d7b3f3f3 (patch)
treed280d9d1e19e4f7a9d0d4b5405603c729e1fdcce /bsie/lib/pipeline.py
parent05a841215c82ef40d4679dfc4d2c26572bd4d349 (diff)
parent0d0144466919cfb168e75c2af26d5cb74e10bfa0 (diff)
downloadbsie-7bf6b33fa6d6b901e4933bfe0b2a9939d7b3f3f3.tar.gz
bsie-7bf6b33fa6d6b901e4933bfe0b2a9939d7b3f3f3.tar.bz2
bsie-7bf6b33fa6d6b901e4933bfe0b2a9939d7b3f3f3.zip
Merge branch 'previews' into develop
Diffstat (limited to 'bsie/lib/pipeline.py')
-rw-r--r--bsie/lib/pipeline.py18
1 files changed, 5 insertions, 13 deletions
diff --git a/bsie/lib/pipeline.py b/bsie/lib/pipeline.py
index 44685ba..0bc5109 100644
--- a/bsie/lib/pipeline.py
+++ b/bsie/lib/pipeline.py
@@ -19,8 +19,6 @@ __all__: typing.Sequence[str] = (
'Pipeline',
)
-# constants
-FILE_PREFIX = 'file#'
## code ##
@@ -40,19 +38,14 @@ class Pipeline():
# combined extractor schemas.
_schema: bsfs.schema.Schema
- # node prefix.
- _prefix: bsfs.Namespace
-
# extractor -> reader mapping
_ext2rdr: typing.Dict[Extractor, typing.Optional[Reader]]
def __init__(
self,
- prefix: bsfs.Namespace,
ext2rdr: typing.Dict[Extractor, typing.Optional[Reader]]
):
# store core members
- self._prefix = prefix + FILE_PREFIX
self._ext2rdr = ext2rdr
# compile schema from all extractors
self._schema = bsfs.schema.Schema.Union(ext.schema for ext in ext2rdr)
@@ -64,12 +57,11 @@ class Pipeline():
return f'{bsfs.typename(self)}(...)'
def __hash__(self) -> int:
- return hash((type(self), self._prefix, self._schema, tuple(self._ext2rdr), tuple(self._ext2rdr.values())))
+ return hash((type(self), self._schema, tuple(self._ext2rdr), tuple(self._ext2rdr.values())))
def __eq__(self, other: typing.Any) -> bool:
return isinstance(other, type(self)) \
and self._schema == other._schema \
- and self._prefix == other._prefix \
and self._ext2rdr == other._ext2rdr
@property
@@ -117,8 +109,9 @@ class Pipeline():
rdr2ext[rdr].add(ext)
# create subject for file
- uuid = bsfs.uuid.UCID.from_path(path)
- subject = node.Node(ns.bsfs.File, self._prefix[uuid])
+ subject = node.Node(ns.bsfs.File,
+ ucid=bsfs.uuid.UCID.from_path(path),
+ )
# extract information
for rdr, extrs in rdr2ext.items():
@@ -131,8 +124,7 @@ class Pipeline():
for ext in extrs:
try:
# get predicate/value tuples
- for subject, pred, value in ext.extract(subject, content, principals):
- yield subject, pred, value
+ yield from ext.extract(subject, content, principals)
except errors.ExtractorError as err:
# critical extractor failure.