From ed2074ae88f2db6cb6b38716b43b35e29eb2e16c Mon Sep 17 00:00:00 2001 From: Matthias Baumgartner Date: Fri, 23 Dec 2022 16:25:51 +0100 Subject: filematcher: check file properties, formulate them as a string --- bsie/base/errors.py | 3 +++ 1 file changed, 3 insertions(+) (limited to 'bsie/base') diff --git a/bsie/base/errors.py b/bsie/base/errors.py index dc3c30e..5fafd5b 100644 --- a/bsie/base/errors.py +++ b/bsie/base/errors.py @@ -39,4 +39,7 @@ class ProgrammingError(_BSIEError): class UnreachableError(ProgrammingError): """Bravo, you've reached a point in code that should logically not be reachable.""" +class ParserError(_BSIEError): + """Failed to parse due to invalid syntax or structures.""" + ## EOF ## -- cgit v1.2.3 From 266c2c9a072bf3289fd7f2d75278b7d59528378c Mon Sep 17 00:00:00 2001 From: Matthias Baumgartner Date: Sat, 24 Dec 2022 10:27:09 +0100 Subject: package restructuring: base * Reader and Extractor to respective reader/extractor modules * ReaderBuilder to reader module * ExtractorBuilder to extractor module * Loading module in utils (safe_load, unpack_name) * Pipeline and PipelineBuilder to lib module * errors to utils * documentation: "standard import" and "external import" --- bsie/base/__init__.py | 24 ------------ bsie/base/errors.py | 45 --------------------- bsie/base/extractor.py | 103 ------------------------------------------------- bsie/base/reader.py | 47 ---------------------- 4 files changed, 219 deletions(-) delete mode 100644 bsie/base/__init__.py delete mode 100644 bsie/base/errors.py delete mode 100644 bsie/base/extractor.py delete mode 100644 bsie/base/reader.py (limited to 'bsie/base') diff --git a/bsie/base/__init__.py b/bsie/base/__init__.py deleted file mode 100644 index 0d362cd..0000000 --- a/bsie/base/__init__.py +++ /dev/null @@ -1,24 +0,0 @@ -"""The base module defines the BSIE interfaces. - -You'll mostly find abstract classes here. - -Part of the bsie module. -A copy of the license is provided with the project. -Author: Matthias Baumgartner, 2022 -""" -# imports -import typing - -# inner-module imports -from . import errors -from .extractor import Extractor -from .reader import Reader - -# exports -__all__: typing.Sequence[str] = ( - 'Extractor', - 'Reader', - 'errors', - ) - -## EOF ## diff --git a/bsie/base/errors.py b/bsie/base/errors.py deleted file mode 100644 index 5fafd5b..0000000 --- a/bsie/base/errors.py +++ /dev/null @@ -1,45 +0,0 @@ -"""Common BSIE exceptions. - -Part of the bsie module. -A copy of the license is provided with the project. -Author: Matthias Baumgartner, 2022 -""" -# imports -import typing - -# exports -__all__: typing.Sequence[str] = ( - 'BuilderError', - 'ExtractorError', - 'LoaderError', - 'ReaderError', - ) - - -## code ## - -class _BSIEError(Exception): - """Generic BSIE error.""" - -class BuilderError(_BSIEError): - """The Builder failed to create an instance.""" - -class LoaderError(BuilderError): - """Failed to load a module or class.""" - -class ExtractorError(_BSIEError): - """The Extractor failed to process the given content.""" - -class ReaderError(_BSIEError): - """The Reader failed to read the given file.""" - -class ProgrammingError(_BSIEError): - """An assertion-like error that indicates a code-base issue.""" - -class UnreachableError(ProgrammingError): - """Bravo, you've reached a point in code that should logically not be reachable.""" - -class ParserError(_BSIEError): - """Failed to parse due to invalid syntax or structures.""" - -## EOF ## diff --git a/bsie/base/extractor.py b/bsie/base/extractor.py deleted file mode 100644 index c44021b..0000000 --- a/bsie/base/extractor.py +++ /dev/null @@ -1,103 +0,0 @@ -"""The Extractor classes transform content into triples. - -Part of the bsie module. -A copy of the license is provided with the project. -Author: Matthias Baumgartner, 2022 -""" -# imports -import abc -import typing - -# bsie imports -from bsie.utils import bsfs, node, ns - -# exports -__all__: typing.Sequence[str] = ( - 'Extractor', - ) - -# constants - -# essential definitions typically used in extractor schemas. -# NOTE: This preamble is only for convenience; Each Extractor must implement its use, if so desired. -SCHEMA_PREAMBLE = ''' - # common external prefixes - prefix rdf: - prefix rdfs: - prefix xsd: - prefix schema: - - # common bsfs prefixes - prefix bsfs: - prefix bse: - - # essential nodes - bsfs:Entity rdfs:subClassOf bsfs:Node . - bsfs:File rdfs:subClassOf bsfs:Entity . - - # common definitions - xsd:string rdfs:subClassOf bsfs:Literal . - xsd:integer rdfs:subClassOf bsfs:Literal . - - ''' - - -## code ## - -class Extractor(abc.ABC): - """Produce (subject, predicate, value)-triples from some content. - The Extractor produces princpal predicates that provide information - about the content itself (i.e., triples that include the subject), - and may also generate triples with auxiliary predicates if the - extracted value is a node itself. - """ - - # what type of content is expected (i.e. reader subclass). - CONTENT_READER: typing.Optional[str] = None - - # extractor schema. - _schema: bsfs.schema.Schema - - def __init__(self, schema: bsfs.schema.Schema): - self._schema = schema - - def __str__(self) -> str: - return bsfs.typename(self) - - def __repr__(self) -> str: - return f'{bsfs.typename(self)}()' - - def __eq__(self, other: typing.Any) -> bool: - return isinstance(other, type(self)) \ - and self.CONTENT_READER == other.CONTENT_READER \ - and self.schema == other.schema - - def __hash__(self) -> int: - return hash((type(self), self.CONTENT_READER, self.schema)) - - @property - def schema(self) -> bsfs.schema.Schema: - """Return the extractor's schema.""" - return self._schema - - @property - def principals(self) -> typing.Iterator[bsfs.schema.Predicate]: - """Return the principal predicates, i.e., relations from/to the extraction subject.""" - ent = self.schema.node(ns.bsfs.Entity) - return ( - pred - for pred - in self.schema.predicates() - if pred.domain <= ent or (pred.range is not None and pred.range <= ent) - ) - - @abc.abstractmethod - def extract( - self, - subject: node.Node, - content: typing.Any, - principals: typing.Iterable[bsfs.schema.Predicate], - ) -> typing.Iterator[typing.Tuple[node.Node, bsfs.schema.Predicate, typing.Any]]: - """Return (node, predicate, value) triples.""" - -## EOF ## diff --git a/bsie/base/reader.py b/bsie/base/reader.py deleted file mode 100644 index cbabd36..0000000 --- a/bsie/base/reader.py +++ /dev/null @@ -1,47 +0,0 @@ -"""The Reader classes return high-level content structures from files. - -The Reader fulfills two purposes: - First, it brokers between multiple libraries and file formats. - Second, it separates multiple aspects of a file into distinct content types. - -Part of the bsie module. -A copy of the license is provided with the project. -Author: Matthias Baumgartner, 2022 -""" -# imports -import abc -import typing - -# bsie imports -from bsie.utils import bsfs - -# exports -__all__: typing.Sequence[str] = ( - 'Reader', - ) - - -## code ## - -class Reader(abc.ABC): - """Read and return some content from a file.""" - - def __str__(self) -> str: - return bsfs.typename(self) - - def __repr__(self) -> str: - return f'{bsfs.typename(self)}()' - - def __eq__(self, other: typing.Any) -> bool: - return isinstance(other, type(self)) - - def __hash__(self) -> int: - return hash(type(self)) - - @abc.abstractmethod - def __call__(self, path: bsfs.URI) -> typing.Any: - """Return some content of the file at *path*. - Raises a `ReaderError` if the reader cannot make sense of the file format. - """ - -## EOF ## -- cgit v1.2.3