From 11b26a913d39edb7f36cd0a3b3d8e74c96738579 Mon Sep 17 00:00:00 2001 From: Matthias Baumgartner Date: Fri, 28 Jul 2023 11:31:24 +0200 Subject: document digestion: * plaintext reader * text metrics extractor * text summary extractor --- bsie/reader/document/__init__.py | 32 ++++++++++++++++++++++++++++++++ 1 file changed, 32 insertions(+) create mode 100644 bsie/reader/document/__init__.py (limited to 'bsie/reader/document/__init__.py') diff --git a/bsie/reader/document/__init__.py b/bsie/reader/document/__init__.py new file mode 100644 index 0000000..4ae3613 --- /dev/null +++ b/bsie/reader/document/__init__.py @@ -0,0 +1,32 @@ + +# standard imports +import typing + +# inner-module imports +from .. import chain + +# constants +_FILE_FORMAT_READERS: typing.Sequence[str] = ( + #__package__ + '._docx.Docx', + #__package__ + '._odt.ODT', + #__package__ + '._pdf.PDF', + #__package__ + '._rtf.RTF', + #__package__ + '._ps.PS', + __package__ + '._plain.Plain', + ) + +# exports +__all__: typing.Sequence[str] = ( + 'Document' + ) + + +## code ## + +class Document(chain.ReaderChain[typing.Sequence[str]]): + """Read paragraphs from a text file.""" + + def __init__(self, cfg: typing.Optional[typing.Any] = None): + super().__init__(_FILE_FORMAT_READERS, cfg) + +## EOF ## -- cgit v1.2.3