1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
|
"""
Part of the bsie module.
A copy of the license is provided with the project.
Author: Matthias Baumgartner, 2022
"""
# standard imports
import typing
# bsie imports
from bsie.utils import bsfs, node, ns
# inner-module imports
from .naming_policy import NamingPolicy
from .pipeline import Pipeline
# exports
__all__: typing.Sequence[str] = (
'BSIE',
)
## code ##
class BSIE():
"""Extract triples from files.
Controls which predicates to extract (*collect*) and
which to not extract (*discard*). Note that this only affects
principal predicates not auxililary predicates like, e.g., tag labels.
"""
# pipeline
_pipeline: Pipeline
# predicates to extract.
_principals: typing.Set[bsfs.URI]
# local schema.
_schema: bsfs.schema.Schema
def __init__(
self,
# pipeline.
pipeline: Pipeline,
# naming policy
naming_policy: NamingPolicy,
# principals to extract at most. None implies all available w.r.t. extractors.
collect: typing.Optional[typing.Iterable[bsfs.URI]] = None,
# principals to discard.
discard: typing.Optional[typing.Iterable[bsfs.URI]] = None,
):
# store pipeline and naming policy
self._pipeline = pipeline
self._naming_policy = naming_policy
# start off with available principals
self._principals = {pred.uri for pred in self._pipeline.principals}
# limit principals to specified ones by argument.
if collect is not None:
collect = set(collect)
if len(collect) > 0:
self._principals &= collect
# discard principals.
if discard is not None:
self._principals -= set(discard)
# discard ns.bsfs.Predicate
self._principals.discard(ns.bsfs.Predicate)
# compile a schema that only contains the requested principals (and auxiliary predicates)
self._schema = self._pipeline.subschema(
self._pipeline.schema.predicate(pred) for pred in self._principals)
@property
def schema(self) -> bsfs.schema.Schema:
"""Return the BSIE schema."""
return self._schema
@property
def principals(self) -> typing.Iterator[bsfs.URI]:
"""Return an iterator to the principal predicates."""
return iter(self._principals)
def from_file(
self,
path: bsfs.URI,
principals: typing.Optional[typing.Iterable[bsfs.URI]] = None,
) -> typing.Iterator[typing.Tuple[node.Node, bsfs.URI, typing.Any]]:
"""Produce triples for a given *path*. Limit to *principals* if given."""
# get requested principals.
principals = set(principals) if principals is not None else self._principals
# filter through requested principals.
principals &= self._principals
# predicate lookup
principals = {self.schema.predicate(pred) for pred in principals}
# invoke pipeline
yield from self._naming_policy(self._pipeline(path, principals))
## EOF ##
|