aboutsummaryrefslogtreecommitdiffstats
path: root/bsie
diff options
context:
space:
mode:
Diffstat (limited to 'bsie')
-rw-r--r--bsie/lib/pipeline.py7
-rw-r--r--bsie/reader/chain.py7
-rw-r--r--bsie/reader/image/_pillow.py2
-rw-r--r--bsie/reader/image/_raw.py2
-rw-r--r--bsie/utils/errors.py3
5 files changed, 18 insertions, 3 deletions
diff --git a/bsie/lib/pipeline.py b/bsie/lib/pipeline.py
index e5ce1b7..02119bc 100644
--- a/bsie/lib/pipeline.py
+++ b/bsie/lib/pipeline.py
@@ -126,6 +126,8 @@ class Pipeline():
# get content
content = rdr(path) if rdr is not None else None
+ #logger.info('extracted %s from %s', rdr, path)
+
# apply extractors on this content
for ext in extrs:
try:
@@ -137,6 +139,11 @@ class Pipeline():
# critical extractor failure.
logger.error('%s failed to extract triples from content: %s', ext, err)
+ except errors.UnsupportedFileFormatError as err:
+ # failed to read the file format. skip.
+ #logger.warning('%s could not process the file format of %s', rdr, err)
+ pass
+
except errors.ReaderError as err:
# failed to read any content. skip.
logger.error('%s failed to read content: %s', rdr, err)
diff --git a/bsie/reader/chain.py b/bsie/reader/chain.py
index 8e900e1..db7c2d5 100644
--- a/bsie/reader/chain.py
+++ b/bsie/reader/chain.py
@@ -73,13 +73,16 @@ class ReaderChain(base.Reader, typing.Generic[T_CONTENT]):
return hash((super().__hash__(), self._children))
def __call__(self, path: str) -> T_CONTENT:
+ raise_error = errors.UnsupportedFileFormatError
for child in self._children:
try:
return child(path)
+ except errors.UnsupportedFileFormatError:
+ pass
except errors.ReaderError:
# child cannot read the file, skip.
- pass
+ raise_error = errors.ReaderError # type: ignore [assignment] # mypy is confused
- raise errors.ReaderError(path)
+ raise raise_error(path)
## EOF ##
diff --git a/bsie/reader/image/_pillow.py b/bsie/reader/image/_pillow.py
index ee0662d..3144509 100644
--- a/bsie/reader/image/_pillow.py
+++ b/bsie/reader/image/_pillow.py
@@ -31,6 +31,8 @@ class PillowImage(base.Reader):
try:
# open file with PIL
return PIL.Image.open(path)
+ except PIL.UnidentifiedImageError as err:
+ raise errors.UnsupportedFileFormatError(path) from err
except IOError as err:
raise errors.ReaderError(path) from err
diff --git a/bsie/reader/image/_raw.py b/bsie/reader/image/_raw.py
index 77be357..cd60453 100644
--- a/bsie/reader/image/_raw.py
+++ b/bsie/reader/image/_raw.py
@@ -45,7 +45,7 @@ class RawImage(base.Reader):
def __call__(self, path: str) -> PIL.Image:
# perform quick checks first
if not self._match(path):
- raise errors.ReaderError(path)
+ raise errors.UnsupportedFileFormatError(path)
try:
# open file with rawpy
diff --git a/bsie/utils/errors.py b/bsie/utils/errors.py
index fbc16f7..8133cd4 100644
--- a/bsie/utils/errors.py
+++ b/bsie/utils/errors.py
@@ -42,4 +42,7 @@ class UnreachableError(ProgrammingError):
class ParserError(_BSIEError):
"""Failed to parse due to invalid syntax or structures."""
+class UnsupportedFileFormatError(ReaderError):
+ """Failed to read a file format."""
+
## EOF ##