diff options
Diffstat (limited to 'bsie/reader/document')
-rw-r--r-- | bsie/reader/document/__init__.py | 32 | ||||
-rw-r--r-- | bsie/reader/document/_plain.py | 38 |
2 files changed, 70 insertions, 0 deletions
diff --git a/bsie/reader/document/__init__.py b/bsie/reader/document/__init__.py new file mode 100644 index 0000000..4ae3613 --- /dev/null +++ b/bsie/reader/document/__init__.py @@ -0,0 +1,32 @@ + +# standard imports +import typing + +# inner-module imports +from .. import chain + +# constants +_FILE_FORMAT_READERS: typing.Sequence[str] = ( + #__package__ + '._docx.Docx', + #__package__ + '._odt.ODT', + #__package__ + '._pdf.PDF', + #__package__ + '._rtf.RTF', + #__package__ + '._ps.PS', + __package__ + '._plain.Plain', + ) + +# exports +__all__: typing.Sequence[str] = ( + 'Document' + ) + + +## code ## + +class Document(chain.ReaderChain[typing.Sequence[str]]): + """Read paragraphs from a text file.""" + + def __init__(self, cfg: typing.Optional[typing.Any] = None): + super().__init__(_FILE_FORMAT_READERS, cfg) + +## EOF ## diff --git a/bsie/reader/document/_plain.py b/bsie/reader/document/_plain.py new file mode 100644 index 0000000..a589265 --- /dev/null +++ b/bsie/reader/document/_plain.py @@ -0,0 +1,38 @@ + +# standard imports +import typing + +# bsie imports +from bsie.utils import errors, filematcher + +# inner-module imports +from .. import base + +# constants +MATCH_RULE = 'mime=text/plain' + +# exports +__all__: typing.Sequence[str] = ( + 'Plain', + ) + + +## code ## + +class Plain(base.Reader): + + _match: filematcher.Matcher + + def __init__(self): + self._match = filematcher.parse(MATCH_RULE) + + def __call__(self, path: str) -> typing.Sequence[str]: + # perform quick checks first + if not self._match(path): + raise errors.UnsupportedFileFormatError(path) + + # open file in text mode + with open(path, 'rt') as ifile: + return [line.strip() for line in ifile.read().split('\n') if len(line.strip()) > 0] + +## EOF ## |