aboutsummaryrefslogtreecommitdiffstats
path: root/bsie/extractor/generic/path.py
diff options
context:
space:
mode:
Diffstat (limited to 'bsie/extractor/generic/path.py')
-rw-r--r--bsie/extractor/generic/path.py44
1 files changed, 24 insertions, 20 deletions
diff --git a/bsie/extractor/generic/path.py b/bsie/extractor/generic/path.py
index c39bbd2..f346f97 100644
--- a/bsie/extractor/generic/path.py
+++ b/bsie/extractor/generic/path.py
@@ -8,11 +8,10 @@ Author: Matthias Baumgartner, 2022
import os
import typing
-# inner-module imports
+# bsie imports
from bsie.base import extractor
from bsie.utils import node, ns
-from bsie.utils.bsfs import URI
-import bsie.reader.path
+from bsie.utils.bsfs import schema
# exports
__all__: typing.Sequence[str] = (
@@ -25,32 +24,33 @@ __all__: typing.Sequence[str] = (
class Path(extractor.Extractor):
"""Extract information from file's path."""
- CONTENT_READER = bsie.reader.path.Path
+ CONTENT_READER = 'bsie.reader.path.Path'
- def __init__(self):
- self.__callmap = {
- ns.bse.filename: self.__filename,
- }
+ # mapping from predicate to handler function.
+ _callmap: typing.Dict[schema.Predicate, typing.Callable[[str], typing.Any]]
- def schema(self) -> str:
- return '''
- bse:filename a bsfs:Predicate ;
+ def __init__(self):
+ super().__init__(schema.Schema.from_string(extractor.SCHEMA_PREAMBLE + '''
+ bse:filename rdfs:subClassOf bsfs:Predicate ;
rdfs:domain bsfs:Entity ;
rdfs:range xsd:string ;
- rdf:label "File name"^^xsd:string ;
+ rdfs:label "File name"^^xsd:string ;
schema:description "Filename of entity in some filesystem."^^xsd:string ;
owl:maxCardinality "INF"^^xsd:number .
- '''
+ '''))
+ self._callmap = {
+ self.schema.predicate(ns.bse.filename): self.__filename,
+ }
def extract(
self,
subject: node.Node,
- content: CONTENT_READER.CONTENT_TYPE,
- predicates: typing.Iterable[URI],
- ) -> typing.Iterator[typing.Tuple[node.Node, URI, typing.Any]]:
+ content: str,
+ predicates: typing.Iterable[schema.Predicate],
+ ) -> typing.Iterator[typing.Tuple[node.Node, schema.Predicate, typing.Any]]:
for pred in predicates:
# find callback
- clbk = self.__callmap.get(pred)
+ clbk = self._callmap.get(pred)
if clbk is None:
continue
# get value
@@ -60,11 +60,15 @@ class Path(extractor.Extractor):
# produce triple
yield subject, pred, value
- def __filename(self, path: str) -> str:
+ def __filename(self, path: str) -> typing.Optional[str]:
try:
return os.path.basename(path)
- except Exception:
- # FIXME: some kind of error reporting (e.g. logging)
+ except Exception: # some error, skip.
+ # FIXME: some kind of error reporting (e.g. logging)?
+ # Options: (a) Fail silently (current); (b) Skip and report to log;
+ # (c) Raise ExtractorError (aborts extraction); (d) separate content type
+ # checks from basename errors (report content type errors, skip basename
+ # errors)
return None
## EOF ##