aboutsummaryrefslogtreecommitdiffstats
path: root/bsie/reader/document
diff options
context:
space:
mode:
Diffstat (limited to 'bsie/reader/document')
-rw-r--r--bsie/reader/document/__init__.py32
-rw-r--r--bsie/reader/document/_plain.py38
2 files changed, 70 insertions, 0 deletions
diff --git a/bsie/reader/document/__init__.py b/bsie/reader/document/__init__.py
new file mode 100644
index 0000000..4ae3613
--- /dev/null
+++ b/bsie/reader/document/__init__.py
@@ -0,0 +1,32 @@
+
+# standard imports
+import typing
+
+# inner-module imports
+from .. import chain
+
+# constants
+_FILE_FORMAT_READERS: typing.Sequence[str] = (
+ #__package__ + '._docx.Docx',
+ #__package__ + '._odt.ODT',
+ #__package__ + '._pdf.PDF',
+ #__package__ + '._rtf.RTF',
+ #__package__ + '._ps.PS',
+ __package__ + '._plain.Plain',
+ )
+
+# exports
+__all__: typing.Sequence[str] = (
+ 'Document'
+ )
+
+
+## code ##
+
+class Document(chain.ReaderChain[typing.Sequence[str]]):
+ """Read paragraphs from a text file."""
+
+ def __init__(self, cfg: typing.Optional[typing.Any] = None):
+ super().__init__(_FILE_FORMAT_READERS, cfg)
+
+## EOF ##
diff --git a/bsie/reader/document/_plain.py b/bsie/reader/document/_plain.py
new file mode 100644
index 0000000..a589265
--- /dev/null
+++ b/bsie/reader/document/_plain.py
@@ -0,0 +1,38 @@
+
+# standard imports
+import typing
+
+# bsie imports
+from bsie.utils import errors, filematcher
+
+# inner-module imports
+from .. import base
+
+# constants
+MATCH_RULE = 'mime=text/plain'
+
+# exports
+__all__: typing.Sequence[str] = (
+ 'Plain',
+ )
+
+
+## code ##
+
+class Plain(base.Reader):
+
+ _match: filematcher.Matcher
+
+ def __init__(self):
+ self._match = filematcher.parse(MATCH_RULE)
+
+ def __call__(self, path: str) -> typing.Sequence[str]:
+ # perform quick checks first
+ if not self._match(path):
+ raise errors.UnsupportedFileFormatError(path)
+
+ # open file in text mode
+ with open(path, 'rt') as ifile:
+ return [line.strip() for line in ifile.read().split('\n') if len(line.strip()) > 0]
+
+## EOF ##