From 266c2c9a072bf3289fd7f2d75278b7d59528378c Mon Sep 17 00:00:00 2001
From: Matthias Baumgartner <dev@igsor.net>
Date: Sat, 24 Dec 2022 10:27:09 +0100
Subject: package restructuring: base

* Reader and Extractor to respective reader/extractor modules
* ReaderBuilder to reader module
* ExtractorBuilder to extractor module
* Loading module in utils (safe_load, unpack_name)
* Pipeline and PipelineBuilder to lib module
* errors to utils
* documentation: "standard import" and "external import"
---
 bsie/apps/index.py                 |  16 +--
 bsie/apps/info.py                  |  16 +--
 bsie/base/__init__.py              |  24 ----
 bsie/base/errors.py                |  45 --------
 bsie/base/extractor.py             | 103 -----------------
 bsie/base/reader.py                |  47 --------
 bsie/extractor/__init__.py         |  11 +-
 bsie/extractor/base.py             | 103 +++++++++++++++++
 bsie/extractor/builder.py          |  77 +++++++++++++
 bsie/extractor/generic/constant.py |  10 +-
 bsie/extractor/generic/path.py     |   8 +-
 bsie/extractor/generic/stat.py     |  10 +-
 bsie/lib/__init__.py               |   4 +-
 bsie/lib/bsie.py                   |   6 +-
 bsie/lib/builder.py                |  85 ++++++++++++++
 bsie/lib/pipeline.py               | 145 ++++++++++++++++++++++++
 bsie/reader/__init__.py            |  13 +++
 bsie/reader/base.py                |  47 ++++++++
 bsie/reader/builder.py             |  74 ++++++++++++
 bsie/reader/path.py                |   8 +-
 bsie/reader/stat.py                |   9 +-
 bsie/tools/__init__.py             |  20 ----
 bsie/tools/builder.py              | 226 -------------------------------------
 bsie/tools/pipeline.py             | 144 -----------------------
 bsie/utils/__init__.py             |   9 +-
 bsie/utils/errors.py               |  45 ++++++++
 bsie/utils/filematcher/parser.py   |   6 +-
 bsie/utils/loading.py              |  54 +++++++++
 28 files changed, 710 insertions(+), 655 deletions(-)
 delete mode 100644 bsie/base/__init__.py
 delete mode 100644 bsie/base/errors.py
 delete mode 100644 bsie/base/extractor.py
 delete mode 100644 bsie/base/reader.py
 create mode 100644 bsie/extractor/base.py
 create mode 100644 bsie/extractor/builder.py
 create mode 100644 bsie/lib/builder.py
 create mode 100644 bsie/lib/pipeline.py
 create mode 100644 bsie/reader/base.py
 create mode 100644 bsie/reader/builder.py
 delete mode 100644 bsie/tools/__init__.py
 delete mode 100644 bsie/tools/builder.py
 delete mode 100644 bsie/tools/pipeline.py
 create mode 100644 bsie/utils/errors.py
 create mode 100644 bsie/utils/loading.py

(limited to 'bsie')

diff --git a/bsie/apps/index.py b/bsie/apps/index.py
index 1dbfdd8..0c6296f 100644
--- a/bsie/apps/index.py
+++ b/bsie/apps/index.py
@@ -4,16 +4,16 @@ Part of the bsie module.
 A copy of the license is provided with the project.
 Author: Matthias Baumgartner, 2022
 """
-# imports
+# standard imports
 import argparse
 import os
 import typing
 
 # bsie imports
-from bsie.base import errors
-from bsie.lib import BSIE
-from bsie.tools import builder
-from bsie.utils import bsfs
+from bsie.extractor import ExtractorBuilder
+from bsie.lib import BSIE, PipelineBuilder
+from bsie.reader import ReaderBuilder
+from bsie.utils import bsfs, errors
 
 # exports
 __all__: typing.Sequence[str] = (
@@ -44,9 +44,9 @@ def main(argv):
 
     # FIXME: Read reader/extractor configs from a config file
     # reader builder
-    rbuild = builder.ReaderBuilder({})
+    rbuild = ReaderBuilder({})
     # extractor builder
-    ebuild = builder.ExtractorBuilder([
+    ebuild = ExtractorBuilder([
         {'bsie.extractor.generic.path.Path': {}},
         {'bsie.extractor.generic.stat.Stat': {}},
         {'bsie.extractor.generic.constant.Constant': dict(
@@ -60,7 +60,7 @@ def main(argv):
             )},
         ])
     # pipeline builder
-    pbuild = builder.PipelineBuilder(
+    pbuild = PipelineBuilder(
         bsfs.Namespace(args.user + ('/' if not args.user.endswith('/') else '')),
         rbuild,
         ebuild,
diff --git a/bsie/apps/info.py b/bsie/apps/info.py
index eaf1f71..a4e611c 100644
--- a/bsie/apps/info.py
+++ b/bsie/apps/info.py
@@ -4,15 +4,16 @@ Part of the bsie module.
 A copy of the license is provided with the project.
 Author: Matthias Baumgartner, 2022
 """
-# imports
+# standard imports
 import argparse
 import sys
 import typing
 
 # bsie imports
-from bsie.base import errors
-from bsie.tools import builder
-from bsie.utils import bsfs
+from bsie.extractor import ExtractorBuilder
+from bsie.lib import PipelineBuilder
+from bsie.reader import ReaderBuilder
+from bsie.utils import bsfs, errors
 
 # exports
 __all__: typing.Sequence[str] = (
@@ -31,9 +32,10 @@ def main(argv):
 
     # FIXME: Read reader/extractor configs from a config file
     # reader builder
-    rbuild = builder.ReaderBuilder({})
+    rbuild = ReaderBuilder({
+        })
     # extractor builder
-    ebuild = builder.ExtractorBuilder([
+    ebuild = ExtractorBuilder([
         {'bsie.extractor.generic.path.Path': {}},
         {'bsie.extractor.generic.stat.Stat': {}},
         {'bsie.extractor.generic.constant.Constant': dict(
@@ -47,7 +49,7 @@ def main(argv):
             )},
         ])
     # pipeline builder
-    pbuild = builder.PipelineBuilder(
+    pbuild = PipelineBuilder(
         bsfs.Namespace('http://example.com/me/'), # not actually used
         rbuild,
         ebuild,
diff --git a/bsie/base/__init__.py b/bsie/base/__init__.py
deleted file mode 100644
index 0d362cd..0000000
--- a/bsie/base/__init__.py
+++ /dev/null
@@ -1,24 +0,0 @@
-"""The base module defines the BSIE interfaces.
-
-You'll mostly find abstract classes here.
-
-Part of the bsie module.
-A copy of the license is provided with the project.
-Author: Matthias Baumgartner, 2022
-"""
-# imports
-import typing
-
-# inner-module imports
-from . import errors
-from .extractor import Extractor
-from .reader import Reader
-
-# exports
-__all__: typing.Sequence[str] = (
-    'Extractor',
-    'Reader',
-    'errors',
-    )
-
-## EOF ##
diff --git a/bsie/base/errors.py b/bsie/base/errors.py
deleted file mode 100644
index 5fafd5b..0000000
--- a/bsie/base/errors.py
+++ /dev/null
@@ -1,45 +0,0 @@
-"""Common BSIE exceptions.
-
-Part of the bsie module.
-A copy of the license is provided with the project.
-Author: Matthias Baumgartner, 2022
-"""
-# imports
-import typing
-
-# exports
-__all__: typing.Sequence[str] = (
-    'BuilderError',
-    'ExtractorError',
-    'LoaderError',
-    'ReaderError',
-    )
-
-
-## code ##
-
-class _BSIEError(Exception):
-    """Generic BSIE error."""
-
-class BuilderError(_BSIEError):
-    """The Builder failed to create an instance."""
-
-class LoaderError(BuilderError):
-    """Failed to load a module or class."""
-
-class ExtractorError(_BSIEError):
-    """The Extractor failed to process the given content."""
-
-class ReaderError(_BSIEError):
-    """The Reader failed to read the given file."""
-
-class ProgrammingError(_BSIEError):
-    """An assertion-like error that indicates a code-base issue."""
-
-class UnreachableError(ProgrammingError):
-    """Bravo, you've reached a point in code that should logically not be reachable."""
-
-class ParserError(_BSIEError):
-    """Failed to parse due to invalid syntax or structures."""
-
-## EOF ##
diff --git a/bsie/base/extractor.py b/bsie/base/extractor.py
deleted file mode 100644
index c44021b..0000000
--- a/bsie/base/extractor.py
+++ /dev/null
@@ -1,103 +0,0 @@
-"""The Extractor classes transform content into triples.
-
-Part of the bsie module.
-A copy of the license is provided with the project.
-Author: Matthias Baumgartner, 2022
-"""
-# imports
-import abc
-import typing
-
-# bsie imports
-from bsie.utils import bsfs, node, ns
-
-# exports
-__all__: typing.Sequence[str] = (
-    'Extractor',
-    )
-
-# constants
-
-# essential definitions typically used in extractor schemas.
-# NOTE: This preamble is only for convenience; Each Extractor must implement its use, if so desired.
-SCHEMA_PREAMBLE = '''
-    # common external prefixes
-    prefix rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
-    prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#>
-    prefix xsd: <http://www.w3.org/2001/XMLSchema#>
-    prefix schema: <http://schema.org/>
-
-    # common bsfs prefixes
-    prefix bsfs: <http://bsfs.ai/schema/>
-    prefix bse: <http://bsfs.ai/schema/Entity#>
-
-    # essential nodes
-    bsfs:Entity rdfs:subClassOf bsfs:Node .
-    bsfs:File rdfs:subClassOf bsfs:Entity .
-
-    # common definitions
-    xsd:string rdfs:subClassOf bsfs:Literal .
-    xsd:integer rdfs:subClassOf bsfs:Literal .
-
-    '''
-
-
-## code ##
-
-class Extractor(abc.ABC):
-    """Produce (subject, predicate, value)-triples from some content.
-    The Extractor produces princpal predicates that provide information
-    about the content itself (i.e., triples that include the subject),
-    and may also generate triples with auxiliary predicates if the
-    extracted value is a node itself.
-    """
-
-    # what type of content is expected (i.e. reader subclass).
-    CONTENT_READER: typing.Optional[str] = None
-
-    # extractor schema.
-    _schema: bsfs.schema.Schema
-
-    def __init__(self, schema: bsfs.schema.Schema):
-        self._schema = schema
-
-    def __str__(self) -> str:
-        return bsfs.typename(self)
-
-    def __repr__(self) -> str:
-        return f'{bsfs.typename(self)}()'
-
-    def __eq__(self, other: typing.Any) -> bool:
-        return isinstance(other, type(self)) \
-          and self.CONTENT_READER == other.CONTENT_READER \
-          and self.schema == other.schema
-
-    def __hash__(self) -> int:
-        return hash((type(self), self.CONTENT_READER, self.schema))
-
-    @property
-    def schema(self) -> bsfs.schema.Schema:
-        """Return the extractor's schema."""
-        return self._schema
-
-    @property
-    def principals(self) -> typing.Iterator[bsfs.schema.Predicate]:
-        """Return the principal predicates, i.e., relations from/to the extraction subject."""
-        ent = self.schema.node(ns.bsfs.Entity)
-        return (
-            pred
-            for pred
-            in self.schema.predicates()
-            if pred.domain <= ent or (pred.range is not None and pred.range <= ent)
-            )
-
-    @abc.abstractmethod
-    def extract(
-            self,
-            subject: node.Node,
-            content: typing.Any,
-            principals: typing.Iterable[bsfs.schema.Predicate],
-            ) -> typing.Iterator[typing.Tuple[node.Node, bsfs.schema.Predicate, typing.Any]]:
-        """Return (node, predicate, value) triples."""
-
-## EOF ##
diff --git a/bsie/base/reader.py b/bsie/base/reader.py
deleted file mode 100644
index cbabd36..0000000
--- a/bsie/base/reader.py
+++ /dev/null
@@ -1,47 +0,0 @@
-"""The Reader classes return high-level content structures from files.
-
-The Reader fulfills two purposes:
-    First, it brokers between multiple libraries and file formats.
-    Second, it separates multiple aspects of a file into distinct content types.
-
-Part of the bsie module.
-A copy of the license is provided with the project.
-Author: Matthias Baumgartner, 2022
-"""
-# imports
-import abc
-import typing
-
-# bsie imports
-from bsie.utils import bsfs
-
-# exports
-__all__: typing.Sequence[str] = (
-    'Reader',
-    )
-
-
-## code ##
-
-class Reader(abc.ABC):
-    """Read and return some content from a file."""
-
-    def __str__(self) -> str:
-        return bsfs.typename(self)
-
-    def __repr__(self) -> str:
-        return f'{bsfs.typename(self)}()'
-
-    def __eq__(self, other: typing.Any) -> bool:
-        return isinstance(other, type(self))
-
-    def __hash__(self) -> int:
-        return hash(type(self))
-
-    @abc.abstractmethod
-    def __call__(self, path: bsfs.URI) -> typing.Any:
-        """Return some content of the file at *path*.
-        Raises a `ReaderError` if the reader cannot make sense of the file format.
-        """
-
-## EOF ##
diff --git a/bsie/extractor/__init__.py b/bsie/extractor/__init__.py
index ef31343..5f385ee 100644
--- a/bsie/extractor/__init__.py
+++ b/bsie/extractor/__init__.py
@@ -6,10 +6,17 @@ Part of the bsie module.
 A copy of the license is provided with the project.
 Author: Matthias Baumgartner, 2022
 """
-# imports
+# standard imports
 import typing
 
+# inner-module imports
+from .base import Extractor
+from .builder import ExtractorBuilder
+
 # exports
-__all__: typing.Sequence[str] = []
+__all__: typing.Sequence[str] = (
+    'Extractor',
+    'ExtractorBuilder',
+    )
 
 ## EOF ##
diff --git a/bsie/extractor/base.py b/bsie/extractor/base.py
new file mode 100644
index 0000000..c44021b
--- /dev/null
+++ b/bsie/extractor/base.py
@@ -0,0 +1,103 @@
+"""The Extractor classes transform content into triples.
+
+Part of the bsie module.
+A copy of the license is provided with the project.
+Author: Matthias Baumgartner, 2022
+"""
+# imports
+import abc
+import typing
+
+# bsie imports
+from bsie.utils import bsfs, node, ns
+
+# exports
+__all__: typing.Sequence[str] = (
+    'Extractor',
+    )
+
+# constants
+
+# essential definitions typically used in extractor schemas.
+# NOTE: This preamble is only for convenience; Each Extractor must implement its use, if so desired.
+SCHEMA_PREAMBLE = '''
+    # common external prefixes
+    prefix rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
+    prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#>
+    prefix xsd: <http://www.w3.org/2001/XMLSchema#>
+    prefix schema: <http://schema.org/>
+
+    # common bsfs prefixes
+    prefix bsfs: <http://bsfs.ai/schema/>
+    prefix bse: <http://bsfs.ai/schema/Entity#>
+
+    # essential nodes
+    bsfs:Entity rdfs:subClassOf bsfs:Node .
+    bsfs:File rdfs:subClassOf bsfs:Entity .
+
+    # common definitions
+    xsd:string rdfs:subClassOf bsfs:Literal .
+    xsd:integer rdfs:subClassOf bsfs:Literal .
+
+    '''
+
+
+## code ##
+
+class Extractor(abc.ABC):
+    """Produce (subject, predicate, value)-triples from some content.
+    The Extractor produces princpal predicates that provide information
+    about the content itself (i.e., triples that include the subject),
+    and may also generate triples with auxiliary predicates if the
+    extracted value is a node itself.
+    """
+
+    # what type of content is expected (i.e. reader subclass).
+    CONTENT_READER: typing.Optional[str] = None
+
+    # extractor schema.
+    _schema: bsfs.schema.Schema
+
+    def __init__(self, schema: bsfs.schema.Schema):
+        self._schema = schema
+
+    def __str__(self) -> str:
+        return bsfs.typename(self)
+
+    def __repr__(self) -> str:
+        return f'{bsfs.typename(self)}()'
+
+    def __eq__(self, other: typing.Any) -> bool:
+        return isinstance(other, type(self)) \
+          and self.CONTENT_READER == other.CONTENT_READER \
+          and self.schema == other.schema
+
+    def __hash__(self) -> int:
+        return hash((type(self), self.CONTENT_READER, self.schema))
+
+    @property
+    def schema(self) -> bsfs.schema.Schema:
+        """Return the extractor's schema."""
+        return self._schema
+
+    @property
+    def principals(self) -> typing.Iterator[bsfs.schema.Predicate]:
+        """Return the principal predicates, i.e., relations from/to the extraction subject."""
+        ent = self.schema.node(ns.bsfs.Entity)
+        return (
+            pred
+            for pred
+            in self.schema.predicates()
+            if pred.domain <= ent or (pred.range is not None and pred.range <= ent)
+            )
+
+    @abc.abstractmethod
+    def extract(
+            self,
+            subject: node.Node,
+            content: typing.Any,
+            principals: typing.Iterable[bsfs.schema.Predicate],
+            ) -> typing.Iterator[typing.Tuple[node.Node, bsfs.schema.Predicate, typing.Any]]:
+        """Return (node, predicate, value) triples."""
+
+## EOF ##
diff --git a/bsie/extractor/builder.py b/bsie/extractor/builder.py
new file mode 100644
index 0000000..0fd3685
--- /dev/null
+++ b/bsie/extractor/builder.py
@@ -0,0 +1,77 @@
+"""
+
+Part of the bsie module.
+A copy of the license is provided with the project.
+Author: Matthias Baumgartner, 2022
+"""
+# standard imports
+import typing
+
+# bsie imports
+from bsie.utils import bsfs, errors, safe_load, unpack_qualified_name
+
+# inner-module imports
+from . import base
+
+# exports
+__all__: typing.Sequence[str] = (
+    'ExtractorBuilder',
+    )
+
+
+## code ##
+
+class ExtractorBuilder():
+    """Build `bsie.base.Extractor instances.
+
+    It is permissible to build multiple instances of the same extractor
+    (typically with different arguments), hence the ExtractorBuilder
+    receives a list of build specifications. Each specification is
+    a dict with a single key (extractor's qualified name) and a dict
+    to be used as keyword arguments.
+    Example: [{'bsie.extractor.generic.path.Path': {}}, ]
+
+    """
+
+    # build specifications
+    _specs: typing.List[typing.Dict[str, typing.Dict[str, typing.Any]]]
+
+    def __init__(self, specs: typing.List[typing.Dict[str, typing.Dict[str, typing.Any]]]):
+        self._specs = specs
+
+    def __iter__(self) -> typing.Iterator[int]:
+        """Iterate over extractor specifications."""
+        return iter(range(len(self._specs)))
+
+    def build(self, index: int) -> base.Extractor:
+        """Return an instance of the n'th extractor (n=*index*)."""
+        # get build instructions
+        specs = self._specs[index]
+
+        # check specs structure. expecting[{name: {kwargs}}]
+        if not isinstance(specs, dict):
+            raise TypeError(f'expected a dict, found {bsfs.typename(specs)}')
+        if len(specs) != 1:
+            raise TypeError(f'expected a dict of length one, found {len(specs)}')
+
+        # get name and args from specs
+        name = next(iter(specs.keys()))
+        kwargs = specs[name]
+
+        # check kwargs structure
+        if not isinstance(kwargs, dict):
+            raise TypeError(f'expected a dict, found {bsfs.typename(kwargs)}')
+
+        # check name and get module/class components
+        module_name, class_name = unpack_qualified_name(name)
+
+        # import extractor class
+        cls = safe_load(module_name, class_name)
+
+        try: # build and return instance
+            return cls(**kwargs)
+
+        except Exception as err:
+            raise errors.BuilderError(f'failed to build extractor {name} due to {bsfs.typename(err)}: {err}') from err
+
+## EOF ##
diff --git a/bsie/extractor/generic/constant.py b/bsie/extractor/generic/constant.py
index 11384e6..7b1d942 100644
--- a/bsie/extractor/generic/constant.py
+++ b/bsie/extractor/generic/constant.py
@@ -4,13 +4,15 @@ Part of the bsie module.
 A copy of the license is provided with the project.
 Author: Matthias Baumgartner, 2022
 """
-# imports
+# standard imports
 import typing
 
 # bsie imports
-from bsie.base import extractor
 from bsie.utils import bsfs, node
 
+# inner-module imports
+from .. import base
+
 # exports
 __all__: typing.Sequence[str] = (
     'Constant',
@@ -19,7 +21,7 @@ __all__: typing.Sequence[str] = (
 
 ## code ##
 
-class Constant(extractor.Extractor):
+class Constant(base.Extractor):
     """Extract information from file's path."""
 
     CONTENT_READER = None
@@ -32,7 +34,7 @@ class Constant(extractor.Extractor):
             schema: str,
             tuples: typing.Iterable[typing.Tuple[bsfs.URI, typing.Any]],
             ):
-        super().__init__(bsfs.schema.Schema.from_string(extractor.SCHEMA_PREAMBLE + schema))
+        super().__init__(bsfs.schema.Schema.from_string(base.SCHEMA_PREAMBLE + schema))
         # NOTE: Raises a KeyError if the predicate is not part of the schema
         self._tuples = tuple((self.schema.predicate(p_uri), value) for p_uri, value in tuples)
         # TODO: use schema instance for value checking
diff --git a/bsie/extractor/generic/path.py b/bsie/extractor/generic/path.py
index 7018e12..295715f 100644
--- a/bsie/extractor/generic/path.py
+++ b/bsie/extractor/generic/path.py
@@ -4,12 +4,12 @@ Part of the bsie module.
 A copy of the license is provided with the project.
 Author: Matthias Baumgartner, 2022
 """
-# imports
+# standard imports
 import os
 import typing
 
 # bsie imports
-from bsie.base import extractor
+from bsie.extractor import base
 from bsie.utils import bsfs, node, ns
 
 # exports
@@ -20,7 +20,7 @@ __all__: typing.Sequence[str] = (
 
 ## code ##
 
-class Path(extractor.Extractor):
+class Path(base.Extractor):
     """Extract information from file's path."""
 
     CONTENT_READER = 'bsie.reader.path.Path'
@@ -29,7 +29,7 @@ class Path(extractor.Extractor):
     _callmap: typing.Dict[bsfs.schema.Predicate, typing.Callable[[str], typing.Any]]
 
     def __init__(self):
-        super().__init__(bsfs.schema.Schema.from_string(extractor.SCHEMA_PREAMBLE + '''
+        super().__init__(bsfs.schema.Schema.from_string(base.SCHEMA_PREAMBLE + '''
             bse:filename rdfs:subClassOf bsfs:Predicate ;
                 rdfs:domain bsfs:File ;
                 rdfs:range xsd:string ;
diff --git a/bsie/extractor/generic/stat.py b/bsie/extractor/generic/stat.py
index 0b9ce29..1381fe2 100644
--- a/bsie/extractor/generic/stat.py
+++ b/bsie/extractor/generic/stat.py
@@ -4,14 +4,16 @@ Part of the bsie module.
 A copy of the license is provided with the project.
 Author: Matthias Baumgartner, 2022
 """
-# imports
+# standard imports
 import os
 import typing
 
 # bsie imports
-from bsie.base import extractor
 from bsie.utils import bsfs, node, ns
 
+# inner-module imports
+from .. import base
+
 # exports
 __all__: typing.Sequence[str] = (
     'Stat',
@@ -20,7 +22,7 @@ __all__: typing.Sequence[str] = (
 
 ## code ##
 
-class Stat(extractor.Extractor):
+class Stat(base.Extractor):
     """Extract information from the file system."""
 
     CONTENT_READER = 'bsie.reader.stat.Stat'
@@ -29,7 +31,7 @@ class Stat(extractor.Extractor):
     _callmap: typing.Dict[bsfs.schema.Predicate, typing.Callable[[os.stat_result], typing.Any]]
 
     def __init__(self):
-        super().__init__(bsfs.schema.Schema.from_string(extractor.SCHEMA_PREAMBLE + '''
+        super().__init__(bsfs.schema.Schema.from_string(base.SCHEMA_PREAMBLE + '''
             bse:filesize rdfs:subClassOf bsfs:Predicate ;
                 rdfs:domain bsfs:File ;
                 rdfs:range xsd:integer ;
diff --git a/bsie/lib/__init__.py b/bsie/lib/__init__.py
index 578c2c4..4239d3b 100644
--- a/bsie/lib/__init__.py
+++ b/bsie/lib/__init__.py
@@ -4,15 +4,17 @@ Part of the bsie module.
 A copy of the license is provided with the project.
 Author: Matthias Baumgartner, 2022
 """
-# imports
+# standard imports
 import typing
 
 # inner-module imports
 from .bsie import BSIE
+from .builder import PipelineBuilder
 
 # exports
 __all__: typing.Sequence[str] = (
     'BSIE',
+    'PipelineBuilder',
     )
 
 ## EOF ##
diff --git a/bsie/lib/bsie.py b/bsie/lib/bsie.py
index e087fa9..668783d 100644
--- a/bsie/lib/bsie.py
+++ b/bsie/lib/bsie.py
@@ -4,13 +4,15 @@ Part of the bsie module.
 A copy of the license is provided with the project.
 Author: Matthias Baumgartner, 2022
 """
-# imports
+# standard imports
 import typing
 
 # bsie imports
-from bsie.tools import Pipeline
 from bsie.utils import bsfs, node, ns
 
+# inner-module imports
+from .pipeline import Pipeline
+
 # exports
 __all__: typing.Sequence[str] = (
     'BSIE',
diff --git a/bsie/lib/builder.py b/bsie/lib/builder.py
new file mode 100644
index 0000000..c2abffe
--- /dev/null
+++ b/bsie/lib/builder.py
@@ -0,0 +1,85 @@
+"""
+
+Part of the bsie module.
+A copy of the license is provided with the project.
+Author: Matthias Baumgartner, 2022
+"""
+# standard imports
+import logging
+import typing
+
+# bsie imports
+from bsie.extractor import ExtractorBuilder
+from bsie.reader import ReaderBuilder
+from bsie.utils import bsfs, errors
+
+# inner-module imports
+from . import pipeline
+
+# exports
+__all__: typing.Sequence[str] = (
+    'PipelineBuilder',
+    )
+
+
+## code ##
+
+logger = logging.getLogger(__name__)
+
+class PipelineBuilder():
+    """Build `bsie.tools.pipeline.Pipeline` instances."""
+
+    # Prefix to be used in the Pipeline.
+    prefix: bsfs.Namespace
+
+    # builder for Readers.
+    rbuild: ReaderBuilder
+
+    # builder for Extractors.
+    ebuild: ExtractorBuilder
+
+    def __init__(
+            self,
+            prefix: bsfs.Namespace,
+            reader_builder: ReaderBuilder,
+            extractor_builder: ExtractorBuilder,
+            ):
+        self.prefix = prefix
+        self.rbuild = reader_builder
+        self.ebuild = extractor_builder
+
+    def build(self) -> pipeline.Pipeline:
+        """Return a Pipeline instance."""
+        ext2rdr = {}
+
+        for eidx in self.ebuild:
+            # build extractor
+            try:
+                ext = self.ebuild.build(eidx)
+
+            except errors.LoaderError as err: # failed to load extractor; skip
+                logger.error('failed to load extractor: %s', err)
+                continue
+
+            except errors.BuilderError as err: # failed to build instance; skip
+                logger.error(str(err))
+                continue
+
+            try:
+                # get reader required by extractor
+                if ext.CONTENT_READER is not None:
+                    rdr = self.rbuild.build(ext.CONTENT_READER)
+                else:
+                    rdr = None
+                # store extractor
+                ext2rdr[ext] = rdr
+
+            except errors.LoaderError as err: # failed to load reader
+                logger.error('failed to load reader: %s', err)
+
+            except errors.BuilderError as err: # failed to build reader
+                logger.error(str(err))
+
+        return pipeline.Pipeline(self.prefix, ext2rdr)
+
+## EOF ##
diff --git a/bsie/lib/pipeline.py b/bsie/lib/pipeline.py
new file mode 100644
index 0000000..e5ce1b7
--- /dev/null
+++ b/bsie/lib/pipeline.py
@@ -0,0 +1,145 @@
+"""
+
+Part of the bsie module.
+A copy of the license is provided with the project.
+Author: Matthias Baumgartner, 2022
+"""
+# standard imports
+from collections import defaultdict
+import logging
+import typing
+
+# bsie imports
+from bsie.extractor import Extractor
+from bsie.reader import Reader
+from bsie.utils import bsfs, errors, node, ns
+
+# exports
+__all__: typing.Sequence[str] = (
+    'Pipeline',
+    )
+
+# constants
+FILE_PREFIX = 'file#'
+
+## code ##
+
+logger = logging.getLogger(__name__)
+
+class Pipeline():
+    """Extraction pipeline to generate triples from files.
+
+    The Pipeline binds readers and extractors, and performs
+    the necessary operations to produce triples from a file.
+    It takes a best-effort approach to extract as many triples
+    as possible. Errors during the extraction are passed over
+    and reported to the log.
+
+    """
+
+    # combined extractor schemas.
+    _schema: bsfs.schema.Schema
+
+    # node prefix.
+    _prefix: bsfs.Namespace
+
+    # extractor -> reader mapping
+    _ext2rdr: typing.Dict[Extractor, typing.Optional[Reader]]
+
+    def __init__(
+            self,
+            prefix: bsfs.Namespace,
+            ext2rdr: typing.Dict[Extractor, typing.Optional[Reader]]
+            ):
+        # store core members
+        self._prefix = prefix + FILE_PREFIX
+        self._ext2rdr = ext2rdr
+        # compile schema from all extractors
+        self._schema = bsfs.schema.Schema.Union(ext.schema for ext in ext2rdr)
+
+    def __str__(self) -> str:
+        return bsfs.typename(self)
+
+    def __repr__(self) -> str:
+        return f'{bsfs.typename(self)}(...)'
+
+    def __hash__(self) -> int:
+        return hash((type(self), self._prefix, self._schema, tuple(self._ext2rdr), tuple(self._ext2rdr.values())))
+
+    def __eq__(self, other: typing.Any) -> bool:
+        return isinstance(other, type(self)) \
+           and self._schema == other._schema \
+           and self._prefix == other._prefix \
+           and self._ext2rdr == other._ext2rdr
+
+    @property
+    def schema(self) -> bsfs.schema.Schema:
+        """Return the pipeline's schema (combined from all extractors)."""
+        return self._schema
+
+    @property
+    def principals(self) -> typing.Iterator[bsfs.schema.Predicate]:
+        """Return the principal predicates that can be extracted."""
+        return iter({pred for ext in self._ext2rdr for pred in ext.principals})
+
+    def subschema(self, principals: typing.Iterable[bsfs.schema.Predicate]) -> bsfs.schema.Schema:
+        """Return the subset of the schema that supports the given *principals*."""
+        # materialize principals
+        principals = set(principals)
+        # collect and combine schemas from extractors
+        return bsfs.schema.Schema.Union({
+            ext.schema
+            for ext
+            in self._ext2rdr
+            if not set(ext.principals).isdisjoint(principals)
+            })
+
+    def __call__(
+            self,
+            path: bsfs.URI,
+            principals: typing.Optional[typing.Iterable[bsfs.schema.Predicate]] = None,
+            ) -> typing.Iterator[typing.Tuple[node.Node, bsfs.schema.Predicate, typing.Any]]:
+        """Extract triples from the file at *path*. Optionally, limit triples to *principals*."""
+        # get principals
+        principals = set(principals) if principals is not None else set(self.schema.predicates())
+
+        # get extractors
+        extractors = {ext for ext in self._ext2rdr if not set(ext.principals).isdisjoint(principals)}
+
+        # corner-case short-cut
+        if len(extractors) == 0:
+            return
+
+        # get readers -> extractors mapping
+        rdr2ext = defaultdict(set)
+        for ext in extractors:
+            rdr = self._ext2rdr[ext]
+            rdr2ext[rdr].add(ext)
+
+        # create subject for file
+        uuid = bsfs.uuid.UCID.from_path(path)
+        subject = node.Node(ns.bsfs.File, self._prefix[uuid])
+
+        # extract information
+        for rdr, extrs in rdr2ext.items():
+            try:
+                # get content
+                content = rdr(path) if rdr is not None else None
+
+                # apply extractors on this content
+                for ext in extrs:
+                    try:
+                        # get predicate/value tuples
+                        for subject, pred, value in ext.extract(subject, content, principals):
+                            yield subject, pred, value
+
+                    except errors.ExtractorError as err:
+                        # critical extractor failure.
+                        logger.error('%s failed to extract triples from content: %s', ext, err)
+
+            except errors.ReaderError as err:
+                # failed to read any content. skip.
+                logger.error('%s failed to read content: %s', rdr, err)
+
+
+## EOF ##
diff --git a/bsie/reader/__init__.py b/bsie/reader/__init__.py
index a45f22b..4163d1c 100644
--- a/bsie/reader/__init__.py
+++ b/bsie/reader/__init__.py
@@ -15,5 +15,18 @@ Part of the bsie module.
 A copy of the license is provided with the project.
 Author: Matthias Baumgartner, 2022
 """
+# standard imports
+import typing
 
+# inner-module imports
+from .base import Reader
+from .builder import ReaderBuilder
+
+# exports
+__all__: typing.Sequence[str] = (
+    'Reader',
+    'ReaderBuilder',
+    )
+
+## EOF ##
 ## EOF ##
diff --git a/bsie/reader/base.py b/bsie/reader/base.py
new file mode 100644
index 0000000..cbabd36
--- /dev/null
+++ b/bsie/reader/base.py
@@ -0,0 +1,47 @@
+"""The Reader classes return high-level content structures from files.
+
+The Reader fulfills two purposes:
+    First, it brokers between multiple libraries and file formats.
+    Second, it separates multiple aspects of a file into distinct content types.
+
+Part of the bsie module.
+A copy of the license is provided with the project.
+Author: Matthias Baumgartner, 2022
+"""
+# imports
+import abc
+import typing
+
+# bsie imports
+from bsie.utils import bsfs
+
+# exports
+__all__: typing.Sequence[str] = (
+    'Reader',
+    )
+
+
+## code ##
+
+class Reader(abc.ABC):
+    """Read and return some content from a file."""
+
+    def __str__(self) -> str:
+        return bsfs.typename(self)
+
+    def __repr__(self) -> str:
+        return f'{bsfs.typename(self)}()'
+
+    def __eq__(self, other: typing.Any) -> bool:
+        return isinstance(other, type(self))
+
+    def __hash__(self) -> int:
+        return hash(type(self))
+
+    @abc.abstractmethod
+    def __call__(self, path: bsfs.URI) -> typing.Any:
+        """Return some content of the file at *path*.
+        Raises a `ReaderError` if the reader cannot make sense of the file format.
+        """
+
+## EOF ##
diff --git a/bsie/reader/builder.py b/bsie/reader/builder.py
new file mode 100644
index 0000000..bce5397
--- /dev/null
+++ b/bsie/reader/builder.py
@@ -0,0 +1,74 @@
+"""
+
+Part of the bsie module.
+A copy of the license is provided with the project.
+Author: Matthias Baumgartner, 2022
+"""
+# standard imports
+import typing
+
+# bsie imports
+from bsie.utils import bsfs, errors, safe_load, unpack_qualified_name
+
+# inner-module imports
+from . import base
+
+# exports
+__all__: typing.Sequence[str] = (
+    'ReaderBuilder',
+    )
+
+
+## code ##
+
+class ReaderBuilder():
+    """Build `bsie.base.Reader` instances.
+
+    Readers are defined via their qualified class name
+    (e.g., bsie.reader.path.Path) and optional keyword
+    arguments that are passed to the constructor via
+    the *kwargs* argument (name as key, kwargs as value).
+    The ReaderBuilder keeps a cache of previously built
+    reader instances, as they are anyway built with
+    identical keyword arguments.
+
+    """
+
+    # keyword arguments
+    _kwargs: typing.Dict[str, typing.Dict[str, typing.Any]]
+
+    # cached readers
+    _cache: typing.Dict[str, base.Reader]
+
+    def __init__(self, kwargs: typing.Dict[str, typing.Dict[str, typing.Any]]):
+        self._kwargs = kwargs
+        self._cache = {}
+
+    def build(self, name: str) -> base.Reader:
+        """Return an instance for the qualified class name."""
+        # return cached instance
+        if name in self._cache:
+            return self._cache[name]
+
+        # check name and get module/class components
+        module_name, class_name = unpack_qualified_name(name)
+
+        # import reader class
+        cls = safe_load(module_name, class_name)
+
+        # get kwargs
+        kwargs = self._kwargs.get(name, {})
+        if not isinstance(kwargs, dict):
+            raise TypeError(f'expected a kwargs dict, found {bsfs.typename(kwargs)}')
+
+        try: # build, cache, and return instance
+            obj = cls(**kwargs)
+            # cache instance
+            self._cache[name] = obj
+            # return instance
+            return obj
+
+        except Exception as err:
+            raise errors.BuilderError(f'failed to build reader {name} due to {bsfs.typename(err)}: {err}') from err
+
+## EOF ##
diff --git a/bsie/reader/path.py b/bsie/reader/path.py
index d60f187..1ca05a0 100644
--- a/bsie/reader/path.py
+++ b/bsie/reader/path.py
@@ -4,11 +4,11 @@ Part of the bsie module.
 A copy of the license is provided with the project.
 Author: Matthias Baumgartner, 2022
 """
-# imports
+# standard imports
 import typing
 
-# bsie imports
-from bsie.base import reader
+# inner-module imports
+from . import base
 
 # exports
 __all__: typing.Sequence[str] = (
@@ -18,7 +18,7 @@ __all__: typing.Sequence[str] = (
 
 ## code ##
 
-class Path(reader.Reader):
+class Path(base.Reader):
     """Return the path."""
 
     def __call__(self, path: str) -> str:
diff --git a/bsie/reader/stat.py b/bsie/reader/stat.py
index fc5fb24..706dc47 100644
--- a/bsie/reader/stat.py
+++ b/bsie/reader/stat.py
@@ -4,12 +4,15 @@ Part of the bsie module.
 A copy of the license is provided with the project.
 Author: Matthias Baumgartner, 2022
 """
-# imports
+# standard imports
 import os
 import typing
 
 # bsie imports
-from bsie.base import errors, reader
+from bsie.utils import errors
+
+# inner-module imports
+from . import base
 
 # exports
 __all__: typing.Sequence[str] = (
@@ -19,7 +22,7 @@ __all__: typing.Sequence[str] = (
 
 ## code ##
 
-class Stat(reader.Reader):
+class Stat(base.Reader):
     """Read and return the filesystem's stat infos."""
 
     def __call__(self, path: str) -> os.stat_result:
diff --git a/bsie/tools/__init__.py b/bsie/tools/__init__.py
deleted file mode 100644
index 803c321..0000000
--- a/bsie/tools/__init__.py
+++ /dev/null
@@ -1,20 +0,0 @@
-"""
-
-Part of the bsie module.
-A copy of the license is provided with the project.
-Author: Matthias Baumgartner, 2022
-"""
-# imports
-import typing
-
-# inner-module imports
-from . import builder
-from .pipeline import Pipeline
-
-# exports
-__all__: typing.Sequence[str] = (
-    'builder',
-    'Pipeline',
-    )
-
-## EOF ##
diff --git a/bsie/tools/builder.py b/bsie/tools/builder.py
deleted file mode 100644
index 190d9bf..0000000
--- a/bsie/tools/builder.py
+++ /dev/null
@@ -1,226 +0,0 @@
-"""
-
-Part of the bsie module.
-A copy of the license is provided with the project.
-Author: Matthias Baumgartner, 2022
-"""
-# imports
-import importlib
-import logging
-import typing
-
-# bsie imports
-from bsie import base
-from bsie.base import errors
-from bsie.utils import bsfs
-
-# inner-module imports
-from . import pipeline
-
-# exports
-__all__: typing.Sequence[str] = (
-    'ExtractorBuilder',
-    'PipelineBuilder',
-    'ReaderBuilder',
-    )
-
-
-## code ##
-
-logger = logging.getLogger(__name__)
-
-def _safe_load(module_name: str, class_name: str):
-    """Get a class from a module. Raise BuilderError if anything goes wrong."""
-    try:
-        # load the module
-        module = importlib.import_module(module_name)
-    except Exception as err:
-        # cannot import module
-        raise errors.LoaderError(f'cannot load module {module_name}') from err
-
-    try:
-        # get the class from the module
-        cls = getattr(module, class_name)
-    except Exception as err:
-        # cannot find the class
-        raise errors.LoaderError(f'cannot load class {class_name} from module {module_name}') from err
-
-    return cls
-
-
-def _unpack_name(name):
-    """Split a name into its module and class component (dot-separated)."""
-    if not isinstance(name, str):
-        raise TypeError(name)
-    if '.' not in name:
-        raise ValueError('name must be a qualified class name.')
-    module_name, class_name = name[:name.rfind('.')], name[name.rfind('.')+1:]
-    if module_name == '':
-        raise ValueError('name must be a qualified class name.')
-    return module_name, class_name
-
-
-class ReaderBuilder():
-    """Build `bsie.base.Reader` instances.
-
-    Readers are defined via their qualified class name
-    (e.g., bsie.reader.path.Path) and optional keyword
-    arguments that are passed to the constructor via
-    the *kwargs* argument (name as key, kwargs as value).
-    The ReaderBuilder keeps a cache of previously built
-    reader instances, as they are anyway built with
-    identical keyword arguments.
-
-    """
-
-    # keyword arguments
-    _kwargs: typing.Dict[str, typing.Dict[str, typing.Any]]
-
-    # cached readers
-    _cache: typing.Dict[str, base.Reader]
-
-    def __init__(self, kwargs: typing.Dict[str, typing.Dict[str, typing.Any]]):
-        self._kwargs = kwargs
-        self._cache = {}
-
-    def build(self, name: str) -> base.Reader:
-        """Return an instance for the qualified class name."""
-        # return cached instance
-        if name in self._cache:
-            return self._cache[name]
-
-        # check name and get module/class components
-        module_name, class_name = _unpack_name(name)
-
-        # import reader class
-        cls = _safe_load(module_name, class_name)
-
-        # get kwargs
-        kwargs = self._kwargs.get(name, {})
-        if not isinstance(kwargs, dict):
-            raise TypeError(f'expected a kwargs dict, found {bsfs.typename(kwargs)}')
-
-        try: # build, cache, and return instance
-            obj = cls(**kwargs)
-            # cache instance
-            self._cache[name] = obj
-            # return instance
-            return obj
-
-        except Exception as err:
-            raise errors.BuilderError(f'failed to build reader {name} due to {bsfs.typename(err)}: {err}') from err
-
-
-class ExtractorBuilder():
-    """Build `bsie.base.Extractor instances.
-
-    It is permissible to build multiple instances of the same extractor
-    (typically with different arguments), hence the ExtractorBuilder
-    receives a list of build specifications. Each specification is
-    a dict with a single key (extractor's qualified name) and a dict
-    to be used as keyword arguments.
-    Example: [{'bsie.extractor.generic.path.Path': {}}, ]
-
-    """
-
-    # build specifications
-    _specs: typing.List[typing.Dict[str, typing.Dict[str, typing.Any]]]
-
-    def __init__(self, specs: typing.List[typing.Dict[str, typing.Dict[str, typing.Any]]]):
-        self._specs = specs
-
-    def __iter__(self) -> typing.Iterator[int]:
-        """Iterate over extractor specifications."""
-        return iter(range(len(self._specs)))
-
-    def build(self, index: int) -> base.Extractor:
-        """Return an instance of the n'th extractor (n=*index*)."""
-        # get build instructions
-        specs = self._specs[index]
-
-        # check specs structure. expecting[{name: {kwargs}}]
-        if not isinstance(specs, dict):
-            raise TypeError(f'expected a dict, found {bsfs.typename(specs)}')
-        if len(specs) != 1:
-            raise TypeError(f'expected a dict of length one, found {len(specs)}')
-
-        # get name and args from specs
-        name = next(iter(specs.keys()))
-        kwargs = specs[name]
-
-        # check kwargs structure
-        if not isinstance(kwargs, dict):
-            raise TypeError(f'expected a dict, found {bsfs.typename(kwargs)}')
-
-        # check name and get module/class components
-        module_name, class_name = _unpack_name(name)
-
-        # import extractor class
-        cls = _safe_load(module_name, class_name)
-
-        try: # build and return instance
-            return cls(**kwargs)
-
-        except Exception as err:
-            raise errors.BuilderError(f'failed to build extractor {name} due to {bsfs.typename(err)}: {err}') from err
-
-
-class PipelineBuilder():
-    """Build `bsie.tools.pipeline.Pipeline` instances."""
-
-    # Prefix to be used in the Pipeline.
-    prefix: bsfs.Namespace
-
-    # builder for Readers.
-    rbuild: ReaderBuilder
-
-    # builder for Extractors.
-    ebuild: ExtractorBuilder
-
-    def __init__(
-            self,
-            prefix: bsfs.Namespace,
-            reader_builder: ReaderBuilder,
-            extractor_builder: ExtractorBuilder,
-            ):
-        self.prefix = prefix
-        self.rbuild = reader_builder
-        self.ebuild = extractor_builder
-
-    def build(self) -> pipeline.Pipeline:
-        """Return a Pipeline instance."""
-        ext2rdr = {}
-
-        for eidx in self.ebuild:
-            # build extractor
-            try:
-                ext = self.ebuild.build(eidx)
-
-            except errors.LoaderError as err: # failed to load extractor; skip
-                logger.error('failed to load extractor: %s', err)
-                continue
-
-            except errors.BuilderError as err: # failed to build instance; skip
-                logger.error(str(err))
-                continue
-
-            try:
-                # get reader required by extractor
-                if ext.CONTENT_READER is not None:
-                    rdr = self.rbuild.build(ext.CONTENT_READER)
-                else:
-                    rdr = None
-                # store extractor
-                ext2rdr[ext] = rdr
-
-            except errors.LoaderError as err: # failed to load reader
-                logger.error('failed to load reader: %s', err)
-
-            except errors.BuilderError as err: # failed to build reader
-                logger.error(str(err))
-
-        return pipeline.Pipeline(self.prefix, ext2rdr)
-
-
-
-## EOF ##
diff --git a/bsie/tools/pipeline.py b/bsie/tools/pipeline.py
deleted file mode 100644
index 20e8ddf..0000000
--- a/bsie/tools/pipeline.py
+++ /dev/null
@@ -1,144 +0,0 @@
-"""
-
-Part of the bsie module.
-A copy of the license is provided with the project.
-Author: Matthias Baumgartner, 2022
-"""
-# imports
-from collections import defaultdict
-import logging
-import typing
-
-# bsie imports
-from bsie import base
-from bsie.utils import bsfs, node, ns
-
-# exports
-__all__: typing.Sequence[str] = (
-    'Pipeline',
-    )
-
-# constants
-FILE_PREFIX = 'file#'
-
-## code ##
-
-logger = logging.getLogger(__name__)
-
-class Pipeline():
-    """Extraction pipeline to generate triples from files.
-
-    The Pipeline binds readers and extractors, and performs
-    the necessary operations to produce triples from a file.
-    It takes a best-effort approach to extract as many triples
-    as possible. Errors during the extraction are passed over
-    and reported to the log.
-
-    """
-
-    # combined extractor schemas.
-    _schema: bsfs.schema.Schema
-
-    # node prefix.
-    _prefix: bsfs.Namespace
-
-    # extractor -> reader mapping
-    _ext2rdr: typing.Dict[base.extractor.Extractor, typing.Optional[base.reader.Reader]]
-
-    def __init__(
-            self,
-            prefix: bsfs.Namespace,
-            ext2rdr: typing.Dict[base.extractor.Extractor, typing.Optional[base.reader.Reader]]
-            ):
-        # store core members
-        self._prefix = prefix + FILE_PREFIX
-        self._ext2rdr = ext2rdr
-        # compile schema from all extractors
-        self._schema = bsfs.schema.Schema.Union(ext.schema for ext in ext2rdr)
-
-    def __str__(self) -> str:
-        return bsfs.typename(self)
-
-    def __repr__(self) -> str:
-        return f'{bsfs.typename(self)}(...)'
-
-    def __hash__(self) -> int:
-        return hash((type(self), self._prefix, self._schema, tuple(self._ext2rdr), tuple(self._ext2rdr.values())))
-
-    def __eq__(self, other: typing.Any) -> bool:
-        return isinstance(other, type(self)) \
-           and self._schema == other._schema \
-           and self._prefix == other._prefix \
-           and self._ext2rdr == other._ext2rdr
-
-    @property
-    def schema(self) -> bsfs.schema.Schema:
-        """Return the pipeline's schema (combined from all extractors)."""
-        return self._schema
-
-    @property
-    def principals(self) -> typing.Iterator[bsfs.schema.Predicate]:
-        """Return the principal predicates that can be extracted."""
-        return iter({pred for ext in self._ext2rdr for pred in ext.principals})
-
-    def subschema(self, principals: typing.Iterable[bsfs.schema.Predicate]) -> bsfs.schema.Schema:
-        """Return the subset of the schema that supports the given *principals*."""
-        # materialize principals
-        principals = set(principals)
-        # collect and combine schemas from extractors
-        return bsfs.schema.Schema.Union({
-            ext.schema
-            for ext
-            in self._ext2rdr
-            if not set(ext.principals).isdisjoint(principals)
-            })
-
-    def __call__(
-            self,
-            path: bsfs.URI,
-            principals: typing.Optional[typing.Iterable[bsfs.schema.Predicate]] = None,
-            ) -> typing.Iterator[typing.Tuple[node.Node, bsfs.schema.Predicate, typing.Any]]:
-        """Extract triples from the file at *path*. Optionally, limit triples to *principals*."""
-        # get principals
-        principals = set(principals) if principals is not None else set(self.schema.predicates())
-
-        # get extractors
-        extractors = {ext for ext in self._ext2rdr if not set(ext.principals).isdisjoint(principals)}
-
-        # corner-case short-cut
-        if len(extractors) == 0:
-            return
-
-        # get readers -> extractors mapping
-        rdr2ext = defaultdict(set)
-        for ext in extractors:
-            rdr = self._ext2rdr[ext]
-            rdr2ext[rdr].add(ext)
-
-        # create subject for file
-        uuid = bsfs.uuid.UCID.from_path(path)
-        subject = node.Node(ns.bsfs.File, self._prefix[uuid])
-
-        # extract information
-        for rdr, extrs in rdr2ext.items():
-            try:
-                # get content
-                content = rdr(path) if rdr is not None else None
-
-                # apply extractors on this content
-                for ext in extrs:
-                    try:
-                        # get predicate/value tuples
-                        for subject, pred, value in ext.extract(subject, content, principals):
-                            yield subject, pred, value
-
-                    except base.errors.ExtractorError as err:
-                        # critical extractor failure.
-                        logger.error('%s failed to extract triples from content: %s', ext, err)
-
-            except base.errors.ReaderError as err:
-                # failed to read any content. skip.
-                logger.error('%s failed to read content: %s', rdr, err)
-
-
-## EOF ##
diff --git a/bsie/utils/__init__.py b/bsie/utils/__init__.py
index 3981dc7..9cb60ed 100644
--- a/bsie/utils/__init__.py
+++ b/bsie/utils/__init__.py
@@ -4,21 +4,24 @@ Part of the bsie module.
 A copy of the license is provided with the project.
 Author: Matthias Baumgartner, 2022
 """
-# imports
+# standard imports
 import typing
 
 # inner-module imports
 from . import bsfs
+from . import filematcher
 from . import namespaces as ns
 from . import node
-from . import filematcher
+from .loading import safe_load, unpack_qualified_name
 
 # exports
 __all__: typing.Sequence[str] = (
-    'filematcher',
     'bsfs',
+    'filematcher',
     'node',
     'ns',
+    'safe_load',
+    'unpack_qualified_name',
     )
 
 ## EOF ##
diff --git a/bsie/utils/errors.py b/bsie/utils/errors.py
new file mode 100644
index 0000000..5fafd5b
--- /dev/null
+++ b/bsie/utils/errors.py
@@ -0,0 +1,45 @@
+"""Common BSIE exceptions.
+
+Part of the bsie module.
+A copy of the license is provided with the project.
+Author: Matthias Baumgartner, 2022
+"""
+# imports
+import typing
+
+# exports
+__all__: typing.Sequence[str] = (
+    'BuilderError',
+    'ExtractorError',
+    'LoaderError',
+    'ReaderError',
+    )
+
+
+## code ##
+
+class _BSIEError(Exception):
+    """Generic BSIE error."""
+
+class BuilderError(_BSIEError):
+    """The Builder failed to create an instance."""
+
+class LoaderError(BuilderError):
+    """Failed to load a module or class."""
+
+class ExtractorError(_BSIEError):
+    """The Extractor failed to process the given content."""
+
+class ReaderError(_BSIEError):
+    """The Reader failed to read the given file."""
+
+class ProgrammingError(_BSIEError):
+    """An assertion-like error that indicates a code-base issue."""
+
+class UnreachableError(ProgrammingError):
+    """Bravo, you've reached a point in code that should logically not be reachable."""
+
+class ParserError(_BSIEError):
+    """Failed to parse due to invalid syntax or structures."""
+
+## EOF ##
diff --git a/bsie/utils/filematcher/parser.py b/bsie/utils/filematcher/parser.py
index 0654742..2f82875 100644
--- a/bsie/utils/filematcher/parser.py
+++ b/bsie/utils/filematcher/parser.py
@@ -7,16 +7,14 @@ Author: Matthias Baumgartner, 2021
 # standard imports
 import typing
 
-# non-standard imports
+# external imports
 import pyparsing
 from pyparsing import printables, alphas8bit, punc8bit, QuotedString, Word, \
                       delimitedList, Or, CaselessKeyword, Group, oneOf, Optional
 
-# bsie imports
-from bsie.base import errors
-
 # inner-module imports
 from . import matcher
+from .. import errors
 
 # exports
 __all__: typing.Sequence[str] = (
diff --git a/bsie/utils/loading.py b/bsie/utils/loading.py
new file mode 100644
index 0000000..eb05c35
--- /dev/null
+++ b/bsie/utils/loading.py
@@ -0,0 +1,54 @@
+"""
+
+Part of the bsie module.
+A copy of the license is provided with the project.
+Author: Matthias Baumgartner, 2022
+"""
+# standard imports
+import importlib
+import typing
+
+# inner-module imports
+from . import errors
+
+# exports
+__all__: typing.Sequence[str] = (
+    'safe_load',
+    'unpack_qualified_name',
+    )
+
+
+## code ##
+
+def safe_load(module_name: str, class_name: str):
+    """Get a class from a module. Raise BuilderError if anything goes wrong."""
+    try:
+        # load the module
+        module = importlib.import_module(module_name)
+    except Exception as err:
+        # cannot import module
+        raise errors.LoaderError(f'cannot load module {module_name}') from err
+
+    try:
+        # get the class from the module
+        cls = getattr(module, class_name)
+    except Exception as err:
+        # cannot find the class
+        raise errors.LoaderError(f'cannot load class {class_name} from module {module_name}') from err
+
+    return cls
+
+
+def unpack_qualified_name(name):
+    """Split a name into its module and class component (dot-separated)."""
+    if not isinstance(name, str):
+        raise TypeError(name)
+    if '.' not in name:
+        raise ValueError('name must be a qualified class name.')
+    module_name, class_name = name[:name.rfind('.')], name[name.rfind('.')+1:]
+    if module_name == '':
+        raise ValueError('name must be a qualified class name.')
+    return module_name, class_name
+
+
+## EOF ##
-- 
cgit v1.2.3