diff options
author | Matthias Baumgartner <dev@igsor.net> | 2022-10-31 14:14:57 +0100 |
---|---|---|
committer | Matthias Baumgartner <dev@igsor.net> | 2022-10-31 14:14:57 +0100 |
commit | 9389c741bdbbca9adbff6099d440706cd63deac4 (patch) | |
tree | 48ee0e912e2f19f51bd684d790f0bcc2d906e887 /bsie/extractor/generic/path.py | |
parent | d4023fa972af379a4235f51783954671de974372 (diff) | |
parent | 2da348c638ac5058d5acf09ab5df323ee04503d5 (diff) | |
download | bsie-9389c741bdbbca9adbff6099d440706cd63deac4.tar.gz bsie-9389c741bdbbca9adbff6099d440706cd63deac4.tar.bz2 bsie-9389c741bdbbca9adbff6099d440706cd63deac4.zip |
Merge branch 'mb/extractors' into develop
Diffstat (limited to 'bsie/extractor/generic/path.py')
-rw-r--r-- | bsie/extractor/generic/path.py | 70 |
1 files changed, 70 insertions, 0 deletions
diff --git a/bsie/extractor/generic/path.py b/bsie/extractor/generic/path.py new file mode 100644 index 0000000..c39bbd2 --- /dev/null +++ b/bsie/extractor/generic/path.py @@ -0,0 +1,70 @@ +""" + +Part of the bsie module. +A copy of the license is provided with the project. +Author: Matthias Baumgartner, 2022 +""" +# imports +import os +import typing + +# inner-module imports +from bsie.base import extractor +from bsie.utils import node, ns +from bsie.utils.bsfs import URI +import bsie.reader.path + +# exports +__all__: typing.Sequence[str] = ( + 'Path', + ) + + +## code ## + +class Path(extractor.Extractor): + """Extract information from file's path.""" + + CONTENT_READER = bsie.reader.path.Path + + def __init__(self): + self.__callmap = { + ns.bse.filename: self.__filename, + } + + def schema(self) -> str: + return ''' + bse:filename a bsfs:Predicate ; + rdfs:domain bsfs:Entity ; + rdfs:range xsd:string ; + rdf:label "File name"^^xsd:string ; + schema:description "Filename of entity in some filesystem."^^xsd:string ; + owl:maxCardinality "INF"^^xsd:number . + ''' + + def extract( + self, + subject: node.Node, + content: CONTENT_READER.CONTENT_TYPE, + predicates: typing.Iterable[URI], + ) -> typing.Iterator[typing.Tuple[node.Node, URI, typing.Any]]: + for pred in predicates: + # find callback + clbk = self.__callmap.get(pred) + if clbk is None: + continue + # get value + value = clbk(content) + if value is None: + continue + # produce triple + yield subject, pred, value + + def __filename(self, path: str) -> str: + try: + return os.path.basename(path) + except Exception: + # FIXME: some kind of error reporting (e.g. logging) + return None + +## EOF ## |