From d2b4a528465dc01e8db92b61293c458c7911a333 Mon Sep 17 00:00:00 2001
From: Matthias Baumgartner <dev@igsor.net>
Date: Mon, 31 Oct 2022 12:21:22 +0100
Subject: essential interfaces (reader, extractor, errors)

---
 bsie/__init__.py       | 13 +++++++++++++
 bsie/base/__init__.py  | 24 ++++++++++++++++++++++++
 bsie/base/errors.py    | 22 ++++++++++++++++++++++
 bsie/base/extractor.py | 50 ++++++++++++++++++++++++++++++++++++++++++++++++++
 bsie/base/reader.py    | 48 ++++++++++++++++++++++++++++++++++++++++++++++++
 bsie/utils/__init__.py | 20 ++++++++++++++++++++
 bsie/utils/bsfs.py     | 20 ++++++++++++++++++++
 bsie/utils/node.py     | 39 +++++++++++++++++++++++++++++++++++++++
 8 files changed, 236 insertions(+)
 create mode 100644 bsie/__init__.py
 create mode 100644 bsie/base/__init__.py
 create mode 100644 bsie/base/errors.py
 create mode 100644 bsie/base/extractor.py
 create mode 100644 bsie/base/reader.py
 create mode 100644 bsie/utils/__init__.py
 create mode 100644 bsie/utils/bsfs.py
 create mode 100644 bsie/utils/node.py

diff --git a/bsie/__init__.py b/bsie/__init__.py
new file mode 100644
index 0000000..2f2477a
--- /dev/null
+++ b/bsie/__init__.py
@@ -0,0 +1,13 @@
+"""The BSIE module extracts triples from files for insertion into a BSFS storage.
+
+Part of the bsie module.
+A copy of the license is provided with the project.
+Author: Matthias Baumgartner, 2022
+"""
+# imports
+import typing
+
+# exports
+__all__: typing.Sequence[str] = []
+
+## EOF ##
diff --git a/bsie/base/__init__.py b/bsie/base/__init__.py
new file mode 100644
index 0000000..0154862
--- /dev/null
+++ b/bsie/base/__init__.py
@@ -0,0 +1,24 @@
+"""The base module defines the BSIE interfaces.
+
+You'll mostly find abstract classes here.
+
+Part of the bsie module.
+A copy of the license is provided with the project.
+Author: Matthias Baumgartner, 2022
+"""
+# imports
+import typing
+
+# inner-module imports
+from . import errors
+from . import extractor
+from . import reader
+
+# exports
+__all__: typing.Sequence[str] = (
+    'errors',
+    'extractor',
+    'reader',
+    )
+
+## EOF ##
diff --git a/bsie/base/errors.py b/bsie/base/errors.py
new file mode 100644
index 0000000..f86ffb2
--- /dev/null
+++ b/bsie/base/errors.py
@@ -0,0 +1,22 @@
+"""Common BSIE exceptions.
+
+Part of the bsie module.
+A copy of the license is provided with the project.
+Author: Matthias Baumgartner, 2022
+"""
+# imports
+import typing
+
+# exports
+__all__: typing.Sequence[str] = []
+
+
+## code ##
+
+class _BSIE_Error(Exception):
+    """Generic BSIE error."""
+
+class ReaderError(_BSIE_Error):
+    """The Reader failed to read the given file."""
+
+## EOF ##
diff --git a/bsie/base/extractor.py b/bsie/base/extractor.py
new file mode 100644
index 0000000..d5b0922
--- /dev/null
+++ b/bsie/base/extractor.py
@@ -0,0 +1,50 @@
+"""The Extractor classes transform content into triples.
+
+Part of the bsie module.
+A copy of the license is provided with the project.
+Author: Matthias Baumgartner, 2022
+"""
+# imports
+import abc
+import collections
+import typing
+
+# inner-module imports
+from . import reader
+from bsie.utils import node
+from bsie.utils.bsfs import URI, typename
+
+# exports
+__all__: typing.Sequence[str] = (
+    'Extractor',
+    )
+
+
+## code ##
+
+class Extractor(abc.ABC, collections.abc.Iterable, collections.abc.Callable):
+    """Produce (node, predicate, value)-triples from some content."""
+
+    # what type of content is expected (i.e. reader subclass).
+    CONTENT_READER: typing.Optional[typing.Type[reader.Reader]] = None
+
+    def __str__(self) -> str:
+        return typename(self)
+
+    def __repr__(self) -> str:
+        return f'{typename(self)}()'
+
+    @abc.abstractmethod
+    def schema(self) -> str:
+        """Return the schema (predicates and nodes) produced by this Extractor."""
+
+    @abc.abstractmethod
+    def extract(
+            self,
+            subject: node.Node,
+            content: typing.Any,
+            predicates: typing.Iterable[URI],
+            ) -> typing.Iterator[typing.Tuple[node.Node, URI, typing.Any]]:
+        """Return (node, predicate, value) triples."""
+
+## EOF ##
diff --git a/bsie/base/reader.py b/bsie/base/reader.py
new file mode 100644
index 0000000..f29e451
--- /dev/null
+++ b/bsie/base/reader.py
@@ -0,0 +1,48 @@
+"""The Reader classes return high-level content structures from files.
+
+The Reader fulfills two purposes:
+    First, it brokers between multiple libraries and file formats.
+    Second, it separates multiple aspects of a file into distinct content types.
+
+Part of the bsie module.
+A copy of the license is provided with the project.
+Author: Matthias Baumgartner, 2022
+"""
+# imports
+import abc
+import typing
+
+# inner-module imports
+from bsie.utils.bsfs import URI, typename
+
+# exports
+__all__: typing.Sequence[str] = (
+    'Aggregator',
+    'Reader',
+    )
+
+
+## code ##
+
+class Reader(abc.ABC):
+    """Read and return some content from a file."""
+
+    # In what data structure content is returned
+    CONTENT_TYPE = typing.Union[typing.Any]
+    # NOTE: Child classes must also assign a typing.Union even if there's
+    # only one options
+
+    def __str__(self) -> str:
+        return typename(self)
+
+    def __repr__(self) -> str:
+        return f'{typename(self)}()'
+
+    # FIXME: How about using contexts instead of calls?
+    @abc.abstractmethod
+    def __call__(self, path: URI) -> CONTENT_TYPE:
+        """Return some content of the file at *path*.
+        Raises a `ReaderError` if the reader cannot make sense of the file format.
+        """
+
+## EOF ##
diff --git a/bsie/utils/__init__.py b/bsie/utils/__init__.py
new file mode 100644
index 0000000..1137187
--- /dev/null
+++ b/bsie/utils/__init__.py
@@ -0,0 +1,20 @@
+"""Common tools and definitions.
+
+Part of the bsie module.
+A copy of the license is provided with the project.
+Author: Matthias Baumgartner, 2022
+"""
+# imports
+import typing
+
+# inner-module imports
+from . import bsfs
+from . import node
+
+# exports
+__all__: typing.Sequence[str] = (
+    'bsfs',
+    'node',
+    )
+
+## EOF ##
diff --git a/bsie/utils/bsfs.py b/bsie/utils/bsfs.py
new file mode 100644
index 0000000..33eb178
--- /dev/null
+++ b/bsie/utils/bsfs.py
@@ -0,0 +1,20 @@
+"""BSFS bridge, provides BSFS bindings for BSIE.
+
+Part of the bsie module.
+A copy of the license is provided with the project.
+Author: Matthias Baumgartner, 2022
+"""
+# imports
+import typing
+
+# bsfs imports
+from bsfs.utils import URI
+from bsfs.utils import typename
+
+# exports
+__all__: typing.Sequence[str] = (
+    'URI',
+    'typename',
+    )
+
+## EOF ##
diff --git a/bsie/utils/node.py b/bsie/utils/node.py
new file mode 100644
index 0000000..60863a4
--- /dev/null
+++ b/bsie/utils/node.py
@@ -0,0 +1,39 @@
+"""Lighweight Node to bridge to BSFS.
+
+Part of the bsie module.
+A copy of the license is provided with the project.
+Author: Matthias Baumgartner, 2022
+"""
+# imports
+import typing
+
+# inner-module imports
+from bsie.utils.bsfs import URI
+
+# exports
+__all__: typing.Sequence[str] = (
+    'Node'
+    )
+
+
+## code ##
+
+class Node():
+    """Lightweight Node, disconnected from any bsfs structures."""
+
+    # node type.
+    node_type: URI
+
+    # node URI.
+    uri: URI
+
+    def __init__(
+            self,
+            node_type: URI,
+            uri: URI,
+            ):
+        # assign members
+        self.node_type = URI(node_type)
+        self.uri = URI(uri)
+
+## EOF ##
-- 
cgit v1.2.3


From 068b3651c16916877eb8d5fdfec52485a507e204 Mon Sep 17 00:00:00 2001
From: Matthias Baumgartner <dev@igsor.net>
Date: Mon, 31 Oct 2022 13:05:31 +0100
Subject: path and stat readers

---
 bsie/reader/__init__.py  | 19 +++++++++++++++++++
 bsie/reader/path.py      | 31 +++++++++++++++++++++++++++++++
 bsie/reader/stat.py      | 34 ++++++++++++++++++++++++++++++++++
 test/reader/test_path.py | 28 ++++++++++++++++++++++++++++
 test/reader/test_stat.py | 34 ++++++++++++++++++++++++++++++++++
 5 files changed, 146 insertions(+)
 create mode 100644 bsie/reader/__init__.py
 create mode 100644 bsie/reader/path.py
 create mode 100644 bsie/reader/stat.py
 create mode 100644 test/reader/test_path.py
 create mode 100644 test/reader/test_stat.py

diff --git a/bsie/reader/__init__.py b/bsie/reader/__init__.py
new file mode 100644
index 0000000..a45f22b
--- /dev/null
+++ b/bsie/reader/__init__.py
@@ -0,0 +1,19 @@
+"""The Reader classes return high-level content structures from files.
+
+The Reader fulfills two purposes:
+    First, it brokers between multiple libraries and file formats.
+    Second, it separates multiple aspects of a file into distinct content types.
+
+Often, different libraries focus on reading different types of content from a
+file. E.g. one would use different modules to read file system infos than to
+read exif or pixel data of an image. Hence, this module is organized by content
+type. Each distinct type can be implemented in a file or submodule that
+provides a Reader implementation. Through utilization of submodules, different
+file formats can be supported.
+
+Part of the bsie module.
+A copy of the license is provided with the project.
+Author: Matthias Baumgartner, 2022
+"""
+
+## EOF ##
diff --git a/bsie/reader/path.py b/bsie/reader/path.py
new file mode 100644
index 0000000..d27c664
--- /dev/null
+++ b/bsie/reader/path.py
@@ -0,0 +1,31 @@
+"""The Path reader produces a file path.
+
+Part of the bsie module.
+A copy of the license is provided with the project.
+Author: Matthias Baumgartner, 2022
+"""
+# imports
+import os
+import typing
+
+# inner-module imports
+from bsie.base import reader
+
+# exports
+__all__: typing.Sequence[str] = (
+    'Path',
+    )
+
+
+## code ##
+
+class Path(reader.Reader):
+    """Return the path."""
+
+    CONTENT_TYPE = typing.Union[str]
+
+    def __call__(self, path: str) -> CONTENT_TYPE:
+        return path
+
+
+## EOF ##
diff --git a/bsie/reader/stat.py b/bsie/reader/stat.py
new file mode 100644
index 0000000..f0b83fb
--- /dev/null
+++ b/bsie/reader/stat.py
@@ -0,0 +1,34 @@
+"""The Stat reader produces filesystem stat information.
+
+Part of the bsie module.
+A copy of the license is provided with the project.
+Author: Matthias Baumgartner, 2022
+"""
+# imports
+import os
+import typing
+
+# inner-module imports
+from bsie.base import reader, errors
+
+# exports
+__all__: typing.Sequence[str] = (
+    'Stat',
+    )
+
+
+## code ##
+
+class Stat(reader.Reader):
+    """Read and return the filesystem's stat infos."""
+
+    CONTENT_TYPE = typing.Union[os.stat_result]
+
+    def __call__(self, path: str) -> CONTENT_TYPE:
+        try:
+            return os.stat(path)
+        except Exception:
+            raise errors.ReaderError(path)
+
+
+## EOF ##
diff --git a/test/reader/test_path.py b/test/reader/test_path.py
new file mode 100644
index 0000000..fd7bc5a
--- /dev/null
+++ b/test/reader/test_path.py
@@ -0,0 +1,28 @@
+"""
+
+Part of the bsie test suite.
+A copy of the license is provided with the project.
+Author: Matthias Baumgartner, 2022
+"""
+# imports
+import unittest
+
+# objects to test
+from bsie.reader.path import Path
+
+
+## code ##
+
+class TestPath(unittest.TestCase):
+    def test_call(self):
+        self.assertEqual('', Path()(''))
+        self.assertEqual('/tmp/foo/bar', Path()('/tmp/foo/bar'))
+        self.assertEqual('/home/myself/some file', Path()('/home/myself/some file'))
+
+
+## main ##
+
+if __name__ == '__main__':
+    unittest.main()
+
+## EOF ##
diff --git a/test/reader/test_stat.py b/test/reader/test_stat.py
new file mode 100644
index 0000000..d12ad9c
--- /dev/null
+++ b/test/reader/test_stat.py
@@ -0,0 +1,34 @@
+"""
+
+Part of the bsie test suite.
+A copy of the license is provided with the project.
+Author: Matthias Baumgartner, 2022
+"""
+# imports
+import os
+import unittest
+
+# bsie imports
+from bsie.base import errors
+
+# objects to test
+from bsie.reader.stat import Stat
+
+
+## code ##
+
+class TestPath(unittest.TestCase):
+    def test_call(self):
+        # test self
+        self.assertEqual(os.stat(__file__), Stat()(__file__))
+        # test invalid file
+        self.assertRaises(errors.ReaderError, Stat(), '')
+        self.assertRaises(errors.ReaderError, Stat(), None)
+
+
+## main ##
+
+if __name__ == '__main__':
+    unittest.main()
+
+## EOF ##
-- 
cgit v1.2.3


From 2da348c638ac5058d5acf09ab5df323ee04503d5 Mon Sep 17 00:00:00 2001
From: Matthias Baumgartner <dev@igsor.net>
Date: Mon, 31 Oct 2022 14:14:42 +0100
Subject: constant, filesize, and filename extractors

---
 bsie/base/extractor.py                  |  3 +-
 bsie/extractor/__init__.py              | 15 +++++++
 bsie/extractor/generic/__init__.py      | 16 ++++++++
 bsie/extractor/generic/constant.py      | 52 ++++++++++++++++++++++++
 bsie/extractor/generic/path.py          | 70 ++++++++++++++++++++++++++++++++
 bsie/extractor/generic/stat.py          | 71 +++++++++++++++++++++++++++++++++
 bsie/utils/__init__.py                  |  2 +
 bsie/utils/bsfs.py                      |  5 ++-
 bsie/utils/namespaces.py                | 25 ++++++++++++
 test/__init__.py                        |  0
 test/extractor/__init__.py              |  0
 test/extractor/generic/__init__.py      |  0
 test/extractor/generic/test_constant.py | 63 +++++++++++++++++++++++++++++
 test/extractor/generic/test_path.py     | 45 +++++++++++++++++++++
 test/extractor/generic/test_stat.py     | 43 ++++++++++++++++++++
 test/reader/__init__.py                 |  0
 16 files changed, 406 insertions(+), 4 deletions(-)
 create mode 100644 bsie/extractor/__init__.py
 create mode 100644 bsie/extractor/generic/__init__.py
 create mode 100644 bsie/extractor/generic/constant.py
 create mode 100644 bsie/extractor/generic/path.py
 create mode 100644 bsie/extractor/generic/stat.py
 create mode 100644 bsie/utils/namespaces.py
 create mode 100644 test/__init__.py
 create mode 100644 test/extractor/__init__.py
 create mode 100644 test/extractor/generic/__init__.py
 create mode 100644 test/extractor/generic/test_constant.py
 create mode 100644 test/extractor/generic/test_path.py
 create mode 100644 test/extractor/generic/test_stat.py
 create mode 100644 test/reader/__init__.py

diff --git a/bsie/base/extractor.py b/bsie/base/extractor.py
index d5b0922..ea43925 100644
--- a/bsie/base/extractor.py
+++ b/bsie/base/extractor.py
@@ -6,7 +6,6 @@ Author: Matthias Baumgartner, 2022
 """
 # imports
 import abc
-import collections
 import typing
 
 # inner-module imports
@@ -22,7 +21,7 @@ __all__: typing.Sequence[str] = (
 
 ## code ##
 
-class Extractor(abc.ABC, collections.abc.Iterable, collections.abc.Callable):
+class Extractor(abc.ABC):
     """Produce (node, predicate, value)-triples from some content."""
 
     # what type of content is expected (i.e. reader subclass).
diff --git a/bsie/extractor/__init__.py b/bsie/extractor/__init__.py
new file mode 100644
index 0000000..ef31343
--- /dev/null
+++ b/bsie/extractor/__init__.py
@@ -0,0 +1,15 @@
+"""Extractors produce triples from some content.
+
+Each Extractor class is linked to the Reader class whose content it requires.
+
+Part of the bsie module.
+A copy of the license is provided with the project.
+Author: Matthias Baumgartner, 2022
+"""
+# imports
+import typing
+
+# exports
+__all__: typing.Sequence[str] = []
+
+## EOF ##
diff --git a/bsie/extractor/generic/__init__.py b/bsie/extractor/generic/__init__.py
new file mode 100644
index 0000000..0cb7e7f
--- /dev/null
+++ b/bsie/extractor/generic/__init__.py
@@ -0,0 +1,16 @@
+"""Generic extractors focus on information that is typically available on all
+files. Examples include file system information (file name and size, mime type,
+etc.) and information that is independent of the actual file (constant triples,
+host platform infos, current time, etc.).
+
+Part of the bsie module.
+A copy of the license is provided with the project.
+Author: Matthias Baumgartner, 2022
+"""
+# imports
+import typing
+
+# exports
+__all__: typing.Sequence[str] = []
+
+## EOF ##
diff --git a/bsie/extractor/generic/constant.py b/bsie/extractor/generic/constant.py
new file mode 100644
index 0000000..e243131
--- /dev/null
+++ b/bsie/extractor/generic/constant.py
@@ -0,0 +1,52 @@
+"""The Constant extractor produces pre-specified triples.
+
+Part of the bsie module.
+A copy of the license is provided with the project.
+Author: Matthias Baumgartner, 2022
+"""
+# imports
+import typing
+
+# inner-module imports
+from bsie.base import extractor
+from bsie.utils.bsfs import URI
+from bsie.utils.node import Node
+
+# exports
+__all__: typing.Sequence[str] = (
+    'Constant',
+    )
+
+
+## code ##
+
+class Constant(extractor.Extractor):
+    """Extract information from file's path."""
+
+    CONTENT_READER = None
+
+    def __init__(
+            self,
+            schema: str,
+            tuples: typing.Iterable[typing.Tuple[URI, typing.Any]],
+            ):
+        self._schema = schema
+        self._tuples = tuples
+        # FIXME: use schema instance for predicate checking
+        #self._tuples = [(pred, value) for pred, value in tuples if pred in schema]
+        # FIXME: use schema instance for value checking
+
+    def schema(self) -> str:
+        return self._schema
+
+    def extract(
+            self,
+            subject: Node,
+            content: None,
+            predicates: typing.Iterable[URI],
+            ) -> typing.Iterator[typing.Tuple[Node, URI, typing.Any]]:
+        for pred, value in self._tuples:
+            if pred in predicates:
+                yield subject, pred, value
+
+## EOF ##
diff --git a/bsie/extractor/generic/path.py b/bsie/extractor/generic/path.py
new file mode 100644
index 0000000..c39bbd2
--- /dev/null
+++ b/bsie/extractor/generic/path.py
@@ -0,0 +1,70 @@
+"""
+
+Part of the bsie module.
+A copy of the license is provided with the project.
+Author: Matthias Baumgartner, 2022
+"""
+# imports
+import os
+import typing
+
+# inner-module imports
+from bsie.base import extractor
+from bsie.utils import node, ns
+from bsie.utils.bsfs import URI
+import bsie.reader.path
+
+# exports
+__all__: typing.Sequence[str] = (
+    'Path',
+    )
+
+
+## code ##
+
+class Path(extractor.Extractor):
+    """Extract information from file's path."""
+
+    CONTENT_READER = bsie.reader.path.Path
+
+    def __init__(self):
+        self.__callmap = {
+            ns.bse.filename:    self.__filename,
+            }
+
+    def schema(self) -> str:
+        return '''
+            bse:filename a bsfs:Predicate ;
+                rdfs:domain bsfs:Entity ;
+                rdfs:range xsd:string ;
+                rdf:label "File name"^^xsd:string ;
+                schema:description "Filename of entity in some filesystem."^^xsd:string ;
+                owl:maxCardinality "INF"^^xsd:number .
+            '''
+
+    def extract(
+            self,
+            subject: node.Node,
+            content: CONTENT_READER.CONTENT_TYPE,
+            predicates: typing.Iterable[URI],
+            ) -> typing.Iterator[typing.Tuple[node.Node, URI, typing.Any]]:
+        for pred in predicates:
+            # find callback
+            clbk = self.__callmap.get(pred)
+            if clbk is None:
+                continue
+            # get value
+            value = clbk(content)
+            if value is None:
+                continue
+            # produce triple
+            yield subject, pred, value
+
+    def __filename(self, path: str) -> str:
+        try:
+            return os.path.basename(path)
+        except Exception:
+            # FIXME: some kind of error reporting (e.g. logging)
+            return None
+
+## EOF ##
diff --git a/bsie/extractor/generic/stat.py b/bsie/extractor/generic/stat.py
new file mode 100644
index 0000000..d74369c
--- /dev/null
+++ b/bsie/extractor/generic/stat.py
@@ -0,0 +1,71 @@
+"""Extract information from the file system, such as filesize.
+
+Part of the bsie module.
+A copy of the license is provided with the project.
+Author: Matthias Baumgartner, 2022
+"""
+# imports
+import typing
+
+# inner-module imports
+from bsie.base import extractor
+from bsie.utils import node, ns
+from bsie.utils.bsfs import URI
+import bsie.reader.stat
+
+
+# exports
+__all__: typing.Sequence[str] = (
+    'Stat',
+    )
+
+
+## code ##
+
+class Stat(extractor.Extractor):
+    """Extract information from the file system."""
+
+    CONTENT_READER = bsie.reader.stat.Stat
+
+    def __init__(self):
+        self.__callmap = {
+            ns.bse.filesize:    self.__filesize,
+            }
+
+    def schema(self) -> str:
+        return '''
+            bse:filesize a bsfs:Predicate ;
+                rdfs:domain bsfs:Entity ;
+                rdfs:range xsd:integer ;
+                rdf:label "File size"^^xsd:string ;
+                schema:description "File size of entity in some filesystem."^^xsd:string ;
+                owl:maxCardinality "INF"^^xsd:number .
+            '''
+
+    def extract(
+            self,
+            subject: node.Node,
+            content: CONTENT_READER.CONTENT_TYPE,
+            predicates: typing.Iterable[URI],
+            ) -> typing.Iterator[typing.Tuple[node.Node, URI, typing.Any]]:
+        for pred in predicates:
+            # find callback
+            clbk = self.__callmap.get(pred)
+            if clbk is None:
+                continue
+            # get value
+            value = clbk(content)
+            if value is None:
+                continue
+            # produce triple
+            yield subject, pred, value
+
+    def __filesize(self, content: CONTENT_READER.CONTENT_TYPE) -> int:
+        """Return the file size."""
+        try:
+            return content.st_size
+        except Exception:
+            # FIXME: some kind of error reporting (e.g. logging)
+            return None
+
+## EOF ##
diff --git a/bsie/utils/__init__.py b/bsie/utils/__init__.py
index 1137187..bd22236 100644
--- a/bsie/utils/__init__.py
+++ b/bsie/utils/__init__.py
@@ -9,12 +9,14 @@ import typing
 
 # inner-module imports
 from . import bsfs
+from . import namespaces as ns
 from . import node
 
 # exports
 __all__: typing.Sequence[str] = (
     'bsfs',
     'node',
+    'ns',
     )
 
 ## EOF ##
diff --git a/bsie/utils/bsfs.py b/bsie/utils/bsfs.py
index 33eb178..1ae657c 100644
--- a/bsie/utils/bsfs.py
+++ b/bsie/utils/bsfs.py
@@ -8,11 +8,12 @@ Author: Matthias Baumgartner, 2022
 import typing
 
 # bsfs imports
-from bsfs.utils import URI
-from bsfs.utils import typename
+from bsfs.namespace import Namespace
+from bsfs.utils import URI, typename
 
 # exports
 __all__: typing.Sequence[str] = (
+    'Namespace',
     'URI',
     'typename',
     )
diff --git a/bsie/utils/namespaces.py b/bsie/utils/namespaces.py
new file mode 100644
index 0000000..67ccc71
--- /dev/null
+++ b/bsie/utils/namespaces.py
@@ -0,0 +1,25 @@
+"""Default namespaces used throughout BSIE.
+
+Part of the bsie module.
+A copy of the license is provided with the project.
+Author: Matthias Baumgartner, 2022
+"""
+# imports
+import typing
+
+# bsie imports
+from . import bsfs as _bsfs
+
+# constants
+bse = _bsfs.Namespace('http://bsfs.ai/schema/Entity#')
+bsfs = _bsfs.Namespace('http://bsfs.ai/schema/')
+bsm = _bsfs.Namespace('http://bsfs.ai/schema/meta#')
+
+# export
+__all__: typing.Sequence[str] = (
+    'bse',
+    'bsfs',
+    'bsm',
+    )
+
+## EOF ##
diff --git a/test/__init__.py b/test/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/test/extractor/__init__.py b/test/extractor/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/test/extractor/generic/__init__.py b/test/extractor/generic/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/test/extractor/generic/test_constant.py b/test/extractor/generic/test_constant.py
new file mode 100644
index 0000000..f3ab0a3
--- /dev/null
+++ b/test/extractor/generic/test_constant.py
@@ -0,0 +1,63 @@
+"""
+
+Part of the bsie test suite.
+A copy of the license is provided with the project.
+Author: Matthias Baumgartner, 2022
+"""
+# imports
+import unittest
+
+# bsie imports
+from bsie.utils import ns
+from bsie.utils.node import Node
+
+# objects to test
+from bsie.extractor.generic.constant import Constant
+
+
+## code ##
+
+class TestConstant(unittest.TestCase):
+    def test_extract(self):
+        schema = '''
+            bse:author a bsfs:Predicate ;
+                rdfs:domain bsfs:Entity ;
+                rdfs:range xsd:string ;
+                owl:maxCardinality "1"^^xsd:number .
+
+            bse:comment a bsfs:Predicate ;
+                rdfs:domain bsfs:Entity ;
+                rdfs:range xsd:string ;
+                owl:maxCardinality "INF"^^xsd:number .
+
+            '''
+        tuples = [
+            (ns.bse.author, 'Me, myself, and I'),
+            (ns.bse.comment, 'the quick brown fox jumps over the lazy dog.'),
+            ]
+        node = Node(ns.bsfs.Entity, '') # Blank node
+        predicates = (ns.bse.author, ns.bse.comment)
+        ext = Constant(schema, tuples)
+        # baseline
+        self.assertSetEqual(set(ext.extract(node, None, predicates)),
+            {(node, pred, value) for pred, value in tuples})
+        # predicates is respected
+        self.assertSetEqual(set(ext.extract(node, None, (ns.bse.author, ns.bse.foobar))),
+            {(node, ns.bse.author, 'Me, myself, and I')})
+        self.assertSetEqual(set(ext.extract(node, None, (ns.bse.comment, ns.bse.foobar))),
+            {(node, ns.bse.comment, 'the quick brown fox jumps over the lazy dog.')})
+        self.assertSetEqual(set(ext.extract(node, None, (ns.bse.foobar, ns.bse.barfoo))), set())
+
+        # FIXME: should change!
+        # for now: no schema compliance
+        ext = Constant('', tuples)
+        self.assertSetEqual(set(ext.extract(node, None, predicates)),
+            {(node, pred, value) for pred, value in tuples})
+
+
+## main ##
+
+if __name__ == '__main__':
+    unittest.main()
+
+## EOF ##
diff --git a/test/extractor/generic/test_path.py b/test/extractor/generic/test_path.py
new file mode 100644
index 0000000..8623490
--- /dev/null
+++ b/test/extractor/generic/test_path.py
@@ -0,0 +1,45 @@
+"""
+
+Part of the bsie test suite.
+A copy of the license is provided with the project.
+Author: Matthias Baumgartner, 2022
+"""
+# imports
+import unittest
+
+# bsie imports
+from bsie.utils import ns
+from bsie.utils.node import Node
+
+# objects to test
+from bsie.extractor.generic.path import Path
+
+
+## code ##
+
+class TestPath(unittest.TestCase):
+    def test_extract(self):
+        node = Node(ns.bsfs.Entity, '') # Blank node
+        ext = Path()
+
+        # baseline
+        self.assertSetEqual(set(ext.extract(node, '/tmp/foo/bar', (ns.bse.filename, ))),
+            {(node, ns.bse.filename, 'bar')})
+        # predicates parameter is respected
+        self.assertSetEqual(set(ext.extract(node, '/tmp/foo/bar', (ns.bse.filename, ns.bse.foo))),
+            {(node, ns.bse.filename, 'bar')})
+        self.assertSetEqual(set(ext.extract(node, '/tmp/foo/bar', (ns.bse.foo, ))), set())
+        # path variations
+        self.assertSetEqual(set(ext.extract(node, 'bar', (ns.bse.filename, ))),
+            {(node, ns.bse.filename, 'bar')})
+        self.assertSetEqual(set(ext.extract(node, '', (ns.bse.filename, ))),
+            {(node, ns.bse.filename, '')})
+        self.assertSetEqual(set(ext.extract(node, None, (ns.bse.filename, ))), set())
+
+
+## main ##
+
+if __name__ == '__main__':
+    unittest.main()
+
+## EOF ##
diff --git a/test/extractor/generic/test_stat.py b/test/extractor/generic/test_stat.py
new file mode 100644
index 0000000..f89b053
--- /dev/null
+++ b/test/extractor/generic/test_stat.py
@@ -0,0 +1,43 @@
+"""
+
+Part of the bsie test suite.
+A copy of the license is provided with the project.
+Author: Matthias Baumgartner, 2022
+"""
+# imports
+import os
+import unittest
+
+# bsie imports
+from bsie.utils import ns
+from bsie.utils.node import Node
+
+# objects to test
+from bsie.extractor.generic.stat import Stat
+
+
+## code ##
+
+class TestConstant(unittest.TestCase):
+    def test_extract(self):
+        node = Node(ns.bsfs.Entity, '') # Blank node
+        content = os.stat(__file__)
+        ext = Stat()
+
+        # baseline
+        self.assertSetEqual(set(ext.extract(node, content, (ns.bse.filesize, ))),
+            {(node, ns.bse.filesize, content.st_size)})
+        # predicates parameter is respected
+        self.assertSetEqual(set(ext.extract(node, content, (ns.bse.filesize, ns.bse.foo))),
+            {(node, ns.bse.filesize, content.st_size)})
+        self.assertSetEqual(set(ext.extract(node, content, (ns.bse.foo, ))), set())
+        # content variations
+        self.assertSetEqual(set(ext.extract(node, None, (ns.bse.filesize, ))), set())
+
+
+## main ##
+
+if __name__ == '__main__':
+    unittest.main()
+
+## EOF ##
diff --git a/test/reader/__init__.py b/test/reader/__init__.py
new file mode 100644
index 0000000..e69de29
-- 
cgit v1.2.3


From e174a25585e64eb1b0759440cad48d642dd31829 Mon Sep 17 00:00:00 2001
From: Matthias Baumgartner <dev@igsor.net>
Date: Fri, 25 Nov 2022 14:31:29 +0100
Subject: use schema and predicate types in extractors

---
 bsie/base/errors.py                     | 13 +++++--
 bsie/base/extractor.py                  | 51 ++++++++++++++++++++++----
 bsie/extractor/generic/constant.py      | 20 +++++------
 bsie/extractor/generic/path.py          | 40 +++++++++++----------
 bsie/extractor/generic/stat.py          | 34 +++++++++---------
 bsie/utils/bsfs.py                      |  2 ++
 bsie/utils/namespaces.py                |  3 +-
 bsie/utils/node.py                      |  2 +-
 test/extractor/generic/test_constant.py | 63 +++++++++++++++++++++++----------
 test/extractor/generic/test_path.py     | 53 +++++++++++++++++++++------
 test/extractor/generic/test_stat.py     | 48 ++++++++++++++++++++-----
 11 files changed, 235 insertions(+), 94 deletions(-)

diff --git a/bsie/base/errors.py b/bsie/base/errors.py
index f86ffb2..eedce3b 100644
--- a/bsie/base/errors.py
+++ b/bsie/base/errors.py
@@ -8,15 +8,22 @@ Author: Matthias Baumgartner, 2022
 import typing
 
 # exports
-__all__: typing.Sequence[str] = []
+__all__: typing.Sequence[str] = (
+    'ExtractorError',
+    )
+
+
 
 
 ## code ##
 
-class _BSIE_Error(Exception):
+class _BSIEError(Exception):
     """Generic BSIE error."""
 
-class ReaderError(_BSIE_Error):
+class ExtractorError(_BSIEError):
+    """The Extractor failed to process the given content."""
+
+class ReaderError(_BSIEError):
     """The Reader failed to read the given file."""
 
 ## EOF ##
diff --git a/bsie/base/extractor.py b/bsie/base/extractor.py
index ea43925..a6a69c6 100644
--- a/bsie/base/extractor.py
+++ b/bsie/base/extractor.py
@@ -11,13 +11,38 @@ import typing
 # inner-module imports
 from . import reader
 from bsie.utils import node
-from bsie.utils.bsfs import URI, typename
+from bsie.utils.bsfs import schema as _schema, typename
 
 # exports
 __all__: typing.Sequence[str] = (
     'Extractor',
     )
 
+# constants
+
+# essential definitions typically used in extractor schemas.
+# NOTE: The definition here is only for convenience; Each Extractor must implement its use, if so desired.
+SCHEMA_PREAMBLE = '''
+    # common external prefixes
+    prefix owl: <http://www.w3.org/2002/07/owl#>
+    prefix rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
+    prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#>
+    prefix xsd: <http://www.w3.org/2001/XMLSchema#>
+    prefix schema: <http://schema.org/>
+
+    # common bsfs prefixes
+    prefix bsfs: <http://bsfs.ai/schema/>
+    prefix bse: <http://bsfs.ai/schema/Entity#>
+
+    # essential nodes
+    bsfs:Entity rdfs:subClassOf bsfs:Node .
+
+    # common definitions
+    xsd:string rdfs:subClassOf bsfs:Literal .
+    xsd:integer rdfs:subClassOf bsfs:Literal .
+
+    '''
+
 
 ## code ##
 
@@ -27,23 +52,37 @@ class Extractor(abc.ABC):
     # what type of content is expected (i.e. reader subclass).
     CONTENT_READER: typing.Optional[typing.Type[reader.Reader]] = None
 
+    # extractor schema.
+    schema: _schema.Schema
+
+    def __init__(self, schema: _schema.Schema):
+        self.schema = schema
+
     def __str__(self) -> str:
         return typename(self)
 
     def __repr__(self) -> str:
         return f'{typename(self)}()'
 
-    @abc.abstractmethod
-    def schema(self) -> str:
-        """Return the schema (predicates and nodes) produced by this Extractor."""
+
+    def predicates(self) -> typing.Iterator[_schema.Predicate]:
+        """Return the predicates that may be part of extracted triples."""
+        # NOTE: Some predicates in the schema might not occur in actual triples,
+        # but are defined due to predicate class hierarchy. E.g., bsfs:Predicate
+        # is part of every schema but should not be used in triples.
+        # Announcing all predicates might not be the most efficient way, however,
+        # it is the most safe one. Concrete extractors that produce additional
+        # predicates (e.g. auxiliary nodes with their own predicates) should
+        # overwrite this method to only include the principal predicates.
+        return self.schema.predicates()
 
     @abc.abstractmethod
     def extract(
             self,
             subject: node.Node,
             content: typing.Any,
-            predicates: typing.Iterable[URI],
-            ) -> typing.Iterator[typing.Tuple[node.Node, URI, typing.Any]]:
+            predicates: typing.Iterable[_schema.Predicate],
+            ) -> typing.Iterator[typing.Tuple[node.Node, _schema.Predicate, typing.Any]]:
         """Return (node, predicate, value) triples."""
 
 ## EOF ##
diff --git a/bsie/extractor/generic/constant.py b/bsie/extractor/generic/constant.py
index e243131..795bac6 100644
--- a/bsie/extractor/generic/constant.py
+++ b/bsie/extractor/generic/constant.py
@@ -7,9 +7,9 @@ Author: Matthias Baumgartner, 2022
 # imports
 import typing
 
-# inner-module imports
+# bsie imports
 from bsie.base import extractor
-from bsie.utils.bsfs import URI
+from bsie.utils.bsfs import URI, schema as _schema
 from bsie.utils.node import Node
 
 # exports
@@ -25,26 +25,26 @@ class Constant(extractor.Extractor):
 
     CONTENT_READER = None
 
+    # predicate/value pairs to be produced.
+    _tuples: typing.Tuple[typing.Tuple[_schema.Predicate, typing.Any], ...]
+
     def __init__(
             self,
             schema: str,
             tuples: typing.Iterable[typing.Tuple[URI, typing.Any]],
             ):
-        self._schema = schema
-        self._tuples = tuples
-        # FIXME: use schema instance for predicate checking
-        #self._tuples = [(pred, value) for pred, value in tuples if pred in schema]
+        super().__init__(_schema.Schema.from_string(extractor.SCHEMA_PREAMBLE + schema))
+        # NOTE: Raises a KeyError if the predicate is not part of the schema
+        self._tuples = tuple((self.schema.predicate(p_uri), value) for p_uri, value in tuples)
         # FIXME: use schema instance for value checking
 
-    def schema(self) -> str:
-        return self._schema
 
     def extract(
             self,
             subject: Node,
             content: None,
-            predicates: typing.Iterable[URI],
-            ) -> typing.Iterator[typing.Tuple[Node, URI, typing.Any]]:
+            predicates: typing.Iterable[_schema.Predicate],
+            ) -> typing.Iterator[typing.Tuple[Node, _schema.Predicate, typing.Any]]:
         for pred, value in self._tuples:
             if pred in predicates:
                 yield subject, pred, value
diff --git a/bsie/extractor/generic/path.py b/bsie/extractor/generic/path.py
index c39bbd2..f358a79 100644
--- a/bsie/extractor/generic/path.py
+++ b/bsie/extractor/generic/path.py
@@ -8,11 +8,10 @@ Author: Matthias Baumgartner, 2022
 import os
 import typing
 
-# inner-module imports
+# bsie imports
 from bsie.base import extractor
 from bsie.utils import node, ns
-from bsie.utils.bsfs import URI
-import bsie.reader.path
+from bsie.utils.bsfs import schema
 
 # exports
 __all__: typing.Sequence[str] = (
@@ -27,30 +26,31 @@ class Path(extractor.Extractor):
 
     CONTENT_READER = bsie.reader.path.Path
 
-    def __init__(self):
-        self.__callmap = {
-            ns.bse.filename:    self.__filename,
-            }
+    # mapping from predicate to handler function.
+    _callmap: typing.Dict[schema.Predicate, typing.Callable[[str], typing.Any]]
 
-    def schema(self) -> str:
-        return '''
-            bse:filename a bsfs:Predicate ;
+    def __init__(self):
+        super().__init__(schema.Schema.from_string(extractor.SCHEMA_PREAMBLE + '''
+            bse:filename rdfs:subClassOf bsfs:Predicate ;
                 rdfs:domain bsfs:Entity ;
                 rdfs:range xsd:string ;
-                rdf:label "File name"^^xsd:string ;
+                rdfs:label "File name"^^xsd:string ;
                 schema:description "Filename of entity in some filesystem."^^xsd:string ;
                 owl:maxCardinality "INF"^^xsd:number .
-            '''
+            '''))
+        self._callmap = {
+            self.schema.predicate(ns.bse.filename): self.__filename,
+            }
 
     def extract(
             self,
             subject: node.Node,
             content: CONTENT_READER.CONTENT_TYPE,
-            predicates: typing.Iterable[URI],
-            ) -> typing.Iterator[typing.Tuple[node.Node, URI, typing.Any]]:
+            predicates: typing.Iterable[schema.Predicate],
+            ) -> typing.Iterator[typing.Tuple[node.Node, schema.Predicate, typing.Any]]:
         for pred in predicates:
             # find callback
-            clbk = self.__callmap.get(pred)
+            clbk = self._callmap.get(pred)
             if clbk is None:
                 continue
             # get value
@@ -60,11 +60,15 @@ class Path(extractor.Extractor):
             # produce triple
             yield subject, pred, value
 
-    def __filename(self, path: str) -> str:
+    def __filename(self, path: str) -> typing.Optional[str]:
         try:
             return os.path.basename(path)
-        except Exception:
-            # FIXME: some kind of error reporting (e.g. logging)
+        except Exception: # some error, skip.
+            # FIXME: some kind of error reporting (e.g. logging)?
+            # Options: (a) Fail silently (current); (b) Skip and report to log;
+            # (c) Raise ExtractorError (aborts extraction); (d) separate content type
+            # checks from basename errors (report content type errors, skip basename
+            # errors)
             return None
 
 ## EOF ##
diff --git a/bsie/extractor/generic/stat.py b/bsie/extractor/generic/stat.py
index d74369c..e5387af 100644
--- a/bsie/extractor/generic/stat.py
+++ b/bsie/extractor/generic/stat.py
@@ -5,14 +5,13 @@ A copy of the license is provided with the project.
 Author: Matthias Baumgartner, 2022
 """
 # imports
+import os
 import typing
 
-# inner-module imports
+# bsie imports
 from bsie.base import extractor
 from bsie.utils import node, ns
-from bsie.utils.bsfs import URI
-import bsie.reader.stat
-
+from bsie.utils.bsfs import schema as _schema
 
 # exports
 __all__: typing.Sequence[str] = (
@@ -27,30 +26,31 @@ class Stat(extractor.Extractor):
 
     CONTENT_READER = bsie.reader.stat.Stat
 
-    def __init__(self):
-        self.__callmap = {
-            ns.bse.filesize:    self.__filesize,
-            }
+    # mapping from predicate to handler function.
+    _callmap: typing.Dict[_schema.Predicate, typing.Callable[[os.stat_result], typing.Any]]
 
-    def schema(self) -> str:
-        return '''
-            bse:filesize a bsfs:Predicate ;
+    def __init__(self):
+        super().__init__(_schema.Schema.from_string(extractor.SCHEMA_PREAMBLE + '''
+            bse:filesize rdfs:subClassOf bsfs:Predicate ;
                 rdfs:domain bsfs:Entity ;
                 rdfs:range xsd:integer ;
-                rdf:label "File size"^^xsd:string ;
+                rdfs:label "File size"^^xsd:string ;
                 schema:description "File size of entity in some filesystem."^^xsd:string ;
                 owl:maxCardinality "INF"^^xsd:number .
-            '''
+            '''))
+        self._callmap = {
+            self.schema.predicate(ns.bse.filesize): self.__filesize,
+            }
 
     def extract(
             self,
             subject: node.Node,
             content: CONTENT_READER.CONTENT_TYPE,
-            predicates: typing.Iterable[URI],
-            ) -> typing.Iterator[typing.Tuple[node.Node, URI, typing.Any]]:
+            predicates: typing.Iterable[_schema.Predicate],
+            ) -> typing.Iterator[typing.Tuple[node.Node, _schema.Predicate, typing.Any]]:
         for pred in predicates:
             # find callback
-            clbk = self.__callmap.get(pred)
+            clbk = self._callmap.get(pred)
             if clbk is None:
                 continue
             # get value
@@ -60,7 +60,7 @@ class Stat(extractor.Extractor):
             # produce triple
             yield subject, pred, value
 
-    def __filesize(self, content: CONTENT_READER.CONTENT_TYPE) -> int:
+    def __filesize(self, content: os.stat_result) -> typing.Optional[int]:
         """Return the file size."""
         try:
             return content.st_size
diff --git a/bsie/utils/bsfs.py b/bsie/utils/bsfs.py
index 1ae657c..01ec5d1 100644
--- a/bsie/utils/bsfs.py
+++ b/bsie/utils/bsfs.py
@@ -8,6 +8,7 @@ Author: Matthias Baumgartner, 2022
 import typing
 
 # bsfs imports
+from bsfs import schema
 from bsfs.namespace import Namespace
 from bsfs.utils import URI, typename
 
@@ -15,6 +16,7 @@ from bsfs.utils import URI, typename
 __all__: typing.Sequence[str] = (
     'Namespace',
     'URI',
+    'schema',
     'typename',
     )
 
diff --git a/bsie/utils/namespaces.py b/bsie/utils/namespaces.py
index 67ccc71..13be96b 100644
--- a/bsie/utils/namespaces.py
+++ b/bsie/utils/namespaces.py
@@ -7,13 +7,14 @@ Author: Matthias Baumgartner, 2022
 # imports
 import typing
 
-# bsie imports
+# inner-module imports
 from . import bsfs as _bsfs
 
 # constants
 bse = _bsfs.Namespace('http://bsfs.ai/schema/Entity#')
 bsfs = _bsfs.Namespace('http://bsfs.ai/schema/')
 bsm = _bsfs.Namespace('http://bsfs.ai/schema/meta#')
+xsd = _bsfs.Namespace('http://www.w3.org/2001/XMLSchema#')
 
 # export
 __all__: typing.Sequence[str] = (
diff --git a/bsie/utils/node.py b/bsie/utils/node.py
index 60863a4..3a0f06b 100644
--- a/bsie/utils/node.py
+++ b/bsie/utils/node.py
@@ -12,7 +12,7 @@ from bsie.utils.bsfs import URI
 
 # exports
 __all__: typing.Sequence[str] = (
-    'Node'
+    'Node',
     )
 
 
diff --git a/test/extractor/generic/test_constant.py b/test/extractor/generic/test_constant.py
index f3ab0a3..7fdb8ac 100644
--- a/test/extractor/generic/test_constant.py
+++ b/test/extractor/generic/test_constant.py
@@ -20,39 +20,64 @@ from bsie.extractor.generic.constant import Constant
 class TestConstant(unittest.TestCase):
     def test_extract(self):
         schema = '''
-            bse:author a bsfs:Predicate ;
+            bse:author rdfs:subClassOf bsfs:Predicate ;
                 rdfs:domain bsfs:Entity ;
                 rdfs:range xsd:string ;
                 owl:maxCardinality "1"^^xsd:number .
-
-            bse:comment a bsfs:Predicate ;
+            bse:comment rdfs:subClassOf bsfs:Predicate ;
                 rdfs:domain bsfs:Entity ;
                 rdfs:range xsd:string ;
                 owl:maxCardinality "INF"^^xsd:number .
-
             '''
         tuples = [
             (ns.bse.author, 'Me, myself, and I'),
             (ns.bse.comment, 'the quick brown fox jumps over the lazy dog.'),
             ]
-        node = Node(ns.bsfs.Entity, '') # Blank node
-        predicates = (ns.bse.author, ns.bse.comment)
         ext = Constant(schema, tuples)
+        node = Node(ns.bsfs.Entity, '') # Blank node
+        p_author = ext.schema.predicate(ns.bse.author)
+        p_comment = ext.schema.predicate(ns.bse.comment)
+        entity = ext.schema.node(ns.bsfs.Node).get_child(ns.bsfs.Entity)
+        string = ext.schema.literal(ns.bsfs.Literal).get_child(ns.xsd.string)
         # baseline
-        self.assertSetEqual(set(ext.extract(node, None, predicates)),
-            {(node, pred, value) for pred, value in tuples})
+        self.assertSetEqual(set(ext.extract(node, None, (p_author, p_comment))),
+            {(node, p_author, 'Me, myself, and I'),
+             (node, p_comment, 'the quick brown fox jumps over the lazy dog.')})
         # predicates is respected
-        self.assertSetEqual(set(ext.extract(node, None, (ns.bse.author, ns.bse.foobar))),
-            {(node, ns.bse.author, 'Me, myself, and I')})
-        self.assertSetEqual(set(ext.extract(node, None, (ns.bse.comment, ns.bse.foobar))),
-            {(node, ns.bse.comment, 'the quick brown fox jumps over the lazy dog.')})
-        self.assertSetEqual(set(ext.extract(node, None, (ns.bse.foobar, ns.bse.barfoo))), set())
-
-        # FIXME: should change!
-        # for now: no schema compliance
-        ext = Constant('', tuples)
-        self.assertSetEqual(set(ext.extract(node, None, predicates)),
-            {(node, pred, value) for pred, value in tuples})
+        p_foobar = ext.schema.predicate(ns.bsfs.Predicate).get_child(ns.bse.foobar, domain=entity, range=entity)
+        self.assertSetEqual(set(ext.extract(node, None, (p_author, p_foobar))),
+            {(node, p_author, 'Me, myself, and I')})
+        self.assertSetEqual(set(ext.extract(node, None, (p_comment, p_foobar))),
+            {(node, p_comment, 'the quick brown fox jumps over the lazy dog.')})
+        p_barfoo = ext.schema.predicate(ns.bse.author).get_child(ns.bse.comment, domain=entity, range=string)
+        self.assertSetEqual(set(ext.extract(node, None, (p_foobar, p_barfoo))), set())
+
+    def test_construct(self):
+        # schema compliance
+        schema = '''
+            bse:author rdfs:subClassOf bsfs:Predicate ;
+                rdfs:domain bsfs:Entity ;
+                rdfs:range xsd:string ;
+                owl:maxCardinality "1"^^xsd:number .
+            bse:comment rdfs:subClassOf bsfs:Predicate ;
+                rdfs:domain bsfs:Entity ;
+                rdfs:range xsd:string ;
+                owl:maxCardinality "INF"^^xsd:number .
+            '''
+        # can create a schema
+        self.assertIsInstance(Constant(schema, [
+            (ns.bse.author, 'Me, myself, and I'),
+            (ns.bse.comment, 'the quick brown fox jumps over the lazy dog.'),
+            ]), Constant)
+        # predicates are validated
+        self.assertRaises(KeyError, Constant, schema, [
+            (ns.bse.author, 'Me, myself, and I'),
+            (ns.bse.foobar, 'foobar!')])
+        # FIXME: values are validated
+        #class Foo(): pass # not string compatible
+        #self.assertRaises(ValueError, Constant, schema, [
+        #    (ns.bse.author, Foo())])
+
 
 
 ## main ##
diff --git a/test/extractor/generic/test_path.py b/test/extractor/generic/test_path.py
index 8623490..9376c7c 100644
--- a/test/extractor/generic/test_path.py
+++ b/test/extractor/generic/test_path.py
@@ -8,7 +8,9 @@ Author: Matthias Baumgartner, 2022
 import unittest
 
 # bsie imports
+from bsie import base
 from bsie.utils import ns
+from bsie.utils.bsfs import schema
 from bsie.utils.node import Node
 
 # objects to test
@@ -18,23 +20,52 @@ from bsie.extractor.generic.path import Path
 ## code ##
 
 class TestPath(unittest.TestCase):
+    def test_eq(self):
+        # distinct instances, same data
+        self.assertEqual(Path(), Path())
+        # different classes
+        class Foo(): pass
+        self.assertNotEqual(Path(), Foo())
+        self.assertNotEqual(Path(), 123)
+        self.assertNotEqual(Path(), None)
+
+    def test_schema(self):
+        self.assertEqual(Path().schema,
+            schema.Schema.from_string(base.extractor.SCHEMA_PREAMBLE + '''
+                bse:filename rdfs:subClassOf bsfs:Predicate ;
+                    rdfs:domain bsfs:Entity ;
+                    rdfs:range xsd:string ;
+                    owl:maxCardinality "INF"^^xsd:number .
+                '''))
+
     def test_extract(self):
-        node = Node(ns.bsfs.Entity, '') # Blank node
         ext = Path()
+        node = Node(ns.bsfs.Entity, '') # Blank node
+        content = '/tmp/foo/bar'
+        p_filename = ext.schema.predicate(ns.bse.filename)
+        entity = ext.schema.node(ns.bsfs.Node).get_child(ns.bsfs.Entity)
+        string = ext.schema.literal(ns.bsfs.Literal).get_child(ns.xsd.string)
 
         # baseline
-        self.assertSetEqual(set(ext.extract(node, '/tmp/foo/bar', (ns.bse.filename, ))),
-            {(node, ns.bse.filename, 'bar')})
+        self.assertSetEqual(set(ext.extract(node, content, (p_filename, ))),
+            {(node, p_filename, 'bar')})
         # predicates parameter is respected
-        self.assertSetEqual(set(ext.extract(node, '/tmp/foo/bar', (ns.bse.filename, ns.bse.foo))),
-            {(node, ns.bse.filename, 'bar')})
-        self.assertSetEqual(set(ext.extract(node, '/tmp/foo/bar', (ns.bse.foo, ))), set())
+        p_foo = ext.schema.predicate(ns.bsfs.Predicate).get_child(ns.bse.foo, domain=entity, range=string) # unsupported predicate
+        self.assertSetEqual(set(ext.extract(node, content, (p_filename, p_foo))),
+            {(node, p_filename, 'bar')})
+        self.assertSetEqual(set(ext.extract(node, content, (p_foo, ))), set())
+        # predicates are validated
+        p_bar = p_foo.get_child(ns.bse.filename) # same URI but different hierarchy
+        self.assertSetEqual(set(ext.extract(node, content, (p_filename, p_bar))),
+            {(node, p_filename, 'bar')})
+        self.assertSetEqual(set(ext.extract(node, content, (p_bar, ))), set())
         # path variations
-        self.assertSetEqual(set(ext.extract(node, 'bar', (ns.bse.filename, ))),
-            {(node, ns.bse.filename, 'bar')})
-        self.assertSetEqual(set(ext.extract(node, '', (ns.bse.filename, ))),
-            {(node, ns.bse.filename, '')})
-        self.assertSetEqual(set(ext.extract(node, None, (ns.bse.filename, ))), set())
+        self.assertSetEqual(set(ext.extract(node, 'bar', (p_filename, ))),
+            {(node, p_filename, 'bar')})
+        self.assertSetEqual(set(ext.extract(node, '', (p_filename, ))),
+            {(node, p_filename, '')})
+        # errors are suppressed
+        self.assertSetEqual(set(ext.extract(node, None, (p_filename, ))), set())
 
 
 ## main ##
diff --git a/test/extractor/generic/test_stat.py b/test/extractor/generic/test_stat.py
index f89b053..26dad6a 100644
--- a/test/extractor/generic/test_stat.py
+++ b/test/extractor/generic/test_stat.py
@@ -9,7 +9,9 @@ import os
 import unittest
 
 # bsie imports
+from bsie import base
 from bsie.utils import ns
+from bsie.utils.bsfs import schema
 from bsie.utils.node import Node
 
 # objects to test
@@ -18,21 +20,51 @@ from bsie.extractor.generic.stat import Stat
 
 ## code ##
 
-class TestConstant(unittest.TestCase):
+class TestStat(unittest.TestCase):
+    def test_eq(self):
+        # distinct instances, same data
+        self.assertEqual(Stat(), Stat())
+        # different classes
+        class Foo(): pass
+        self.assertNotEqual(Stat(), Foo())
+        self.assertNotEqual(Stat(), 123)
+        self.assertNotEqual(Stat(), None)
+
+    def test_schema(self):
+        self.assertEqual(Stat().schema,
+            schema.Schema.from_string(base.extractor.SCHEMA_PREAMBLE + '''
+                bse:filesize rdfs:subClassOf bsfs:Predicate ;
+                    rdfs:domain bsfs:Entity ;
+                    rdfs:range xsd:integer ;
+                    owl:maxCardinality "INF"^^xsd:number .
+                '''))
+
     def test_extract(self):
+        ext = Stat()
         node = Node(ns.bsfs.Entity, '') # Blank node
         content = os.stat(__file__)
-        ext = Stat()
+        p_filesize = ext.schema.predicate(ns.bse.filesize)
+        entity = ext.schema.node(ns.bsfs.Node).get_child(ns.bsfs.Entity)
+        string = ext.schema.literal(ns.bsfs.Literal).get_child(ns.xsd.string)
 
         # baseline
-        self.assertSetEqual(set(ext.extract(node, content, (ns.bse.filesize, ))),
-            {(node, ns.bse.filesize, content.st_size)})
+        self.assertSetEqual(set(ext.extract(node, content, (p_filesize, ))),
+            {(node, p_filesize, content.st_size)})
         # predicates parameter is respected
-        self.assertSetEqual(set(ext.extract(node, content, (ns.bse.filesize, ns.bse.foo))),
-            {(node, ns.bse.filesize, content.st_size)})
-        self.assertSetEqual(set(ext.extract(node, content, (ns.bse.foo, ))), set())
+        p_foo = ext.schema.predicate(ns.bsfs.Predicate).get_child(ns.bse.foo, domain=entity, range=string) # unsupported predicate
+        self.assertSetEqual(set(ext.extract(node, content, (p_filesize, p_foo))),
+            {(node, p_filesize, content.st_size)})
+        self.assertSetEqual(set(ext.extract(node, content, (p_foo, ))), set())
+        # predicates are validated
+        p_bar = p_foo.get_child(ns.bse.filesizse) # same URI but different hierarchy
+        self.assertSetEqual(set(ext.extract(node, content, (p_filesize, p_bar))),
+            {(node, p_filesize, content.st_size)})
+        self.assertSetEqual(set(ext.extract(node, content, (p_bar, ))), set())
         # content variations
-        self.assertSetEqual(set(ext.extract(node, None, (ns.bse.filesize, ))), set())
+        self.assertSetEqual(set(ext.extract(node, os.stat_result([12345] * len(content)), (p_filesize, p_bar))),
+            {(node, p_filesize, 12345)})
+        # errors are suppressed
+        self.assertSetEqual(set(ext.extract(node, None, (p_filesize, ))), set())
 
 
 ## main ##
-- 
cgit v1.2.3


From b96c6e2096c387b70e2a4c1f0bc53b6044a0dc6f Mon Sep 17 00:00:00 2001
From: Matthias Baumgartner <dev@igsor.net>
Date: Fri, 25 Nov 2022 14:36:27 +0100
Subject: decouple readers and extractors; use strings for reference and
 repeated type annotations

---
 bsie/base/extractor.py         |  5 ++---
 bsie/base/reader.py            | 11 ++---------
 bsie/extractor/generic/path.py |  4 ++--
 bsie/extractor/generic/stat.py |  4 ++--
 bsie/reader/path.py            |  7 ++-----
 bsie/reader/stat.py            |  6 ++----
 6 files changed, 12 insertions(+), 25 deletions(-)

diff --git a/bsie/base/extractor.py b/bsie/base/extractor.py
index a6a69c6..7acf2bd 100644
--- a/bsie/base/extractor.py
+++ b/bsie/base/extractor.py
@@ -8,8 +8,7 @@ Author: Matthias Baumgartner, 2022
 import abc
 import typing
 
-# inner-module imports
-from . import reader
+# bsie imports
 from bsie.utils import node
 from bsie.utils.bsfs import schema as _schema, typename
 
@@ -50,7 +49,7 @@ class Extractor(abc.ABC):
     """Produce (node, predicate, value)-triples from some content."""
 
     # what type of content is expected (i.e. reader subclass).
-    CONTENT_READER: typing.Optional[typing.Type[reader.Reader]] = None
+    CONTENT_READER: typing.Optional[str] = None
 
     # extractor schema.
     schema: _schema.Schema
diff --git a/bsie/base/reader.py b/bsie/base/reader.py
index f29e451..e59abef 100644
--- a/bsie/base/reader.py
+++ b/bsie/base/reader.py
@@ -12,12 +12,11 @@ Author: Matthias Baumgartner, 2022
 import abc
 import typing
 
-# inner-module imports
+# bsie imports
 from bsie.utils.bsfs import URI, typename
 
 # exports
 __all__: typing.Sequence[str] = (
-    'Aggregator',
     'Reader',
     )
 
@@ -27,20 +26,14 @@ __all__: typing.Sequence[str] = (
 class Reader(abc.ABC):
     """Read and return some content from a file."""
 
-    # In what data structure content is returned
-    CONTENT_TYPE = typing.Union[typing.Any]
-    # NOTE: Child classes must also assign a typing.Union even if there's
-    # only one options
-
     def __str__(self) -> str:
         return typename(self)
 
     def __repr__(self) -> str:
         return f'{typename(self)}()'
 
-    # FIXME: How about using contexts instead of calls?
     @abc.abstractmethod
-    def __call__(self, path: URI) -> CONTENT_TYPE:
+    def __call__(self, path: URI) -> typing.Any:
         """Return some content of the file at *path*.
         Raises a `ReaderError` if the reader cannot make sense of the file format.
         """
diff --git a/bsie/extractor/generic/path.py b/bsie/extractor/generic/path.py
index f358a79..f346f97 100644
--- a/bsie/extractor/generic/path.py
+++ b/bsie/extractor/generic/path.py
@@ -24,7 +24,7 @@ __all__: typing.Sequence[str] = (
 class Path(extractor.Extractor):
     """Extract information from file's path."""
 
-    CONTENT_READER = bsie.reader.path.Path
+    CONTENT_READER = 'bsie.reader.path.Path'
 
     # mapping from predicate to handler function.
     _callmap: typing.Dict[schema.Predicate, typing.Callable[[str], typing.Any]]
@@ -45,7 +45,7 @@ class Path(extractor.Extractor):
     def extract(
             self,
             subject: node.Node,
-            content: CONTENT_READER.CONTENT_TYPE,
+            content: str,
             predicates: typing.Iterable[schema.Predicate],
             ) -> typing.Iterator[typing.Tuple[node.Node, schema.Predicate, typing.Any]]:
         for pred in predicates:
diff --git a/bsie/extractor/generic/stat.py b/bsie/extractor/generic/stat.py
index e5387af..7088c0a 100644
--- a/bsie/extractor/generic/stat.py
+++ b/bsie/extractor/generic/stat.py
@@ -24,7 +24,7 @@ __all__: typing.Sequence[str] = (
 class Stat(extractor.Extractor):
     """Extract information from the file system."""
 
-    CONTENT_READER = bsie.reader.stat.Stat
+    CONTENT_READER = 'bsie.reader.stat.Stat'
 
     # mapping from predicate to handler function.
     _callmap: typing.Dict[_schema.Predicate, typing.Callable[[os.stat_result], typing.Any]]
@@ -45,7 +45,7 @@ class Stat(extractor.Extractor):
     def extract(
             self,
             subject: node.Node,
-            content: CONTENT_READER.CONTENT_TYPE,
+            content: os.stat_result,
             predicates: typing.Iterable[_schema.Predicate],
             ) -> typing.Iterator[typing.Tuple[node.Node, _schema.Predicate, typing.Any]]:
         for pred in predicates:
diff --git a/bsie/reader/path.py b/bsie/reader/path.py
index d27c664..d60f187 100644
--- a/bsie/reader/path.py
+++ b/bsie/reader/path.py
@@ -5,10 +5,9 @@ A copy of the license is provided with the project.
 Author: Matthias Baumgartner, 2022
 """
 # imports
-import os
 import typing
 
-# inner-module imports
+# bsie imports
 from bsie.base import reader
 
 # exports
@@ -22,9 +21,7 @@ __all__: typing.Sequence[str] = (
 class Path(reader.Reader):
     """Return the path."""
 
-    CONTENT_TYPE = typing.Union[str]
-
-    def __call__(self, path: str) -> CONTENT_TYPE:
+    def __call__(self, path: str) -> str:
         return path
 
 
diff --git a/bsie/reader/stat.py b/bsie/reader/stat.py
index f0b83fb..6d40ab8 100644
--- a/bsie/reader/stat.py
+++ b/bsie/reader/stat.py
@@ -8,7 +8,7 @@ Author: Matthias Baumgartner, 2022
 import os
 import typing
 
-# inner-module imports
+# bsie imports
 from bsie.base import reader, errors
 
 # exports
@@ -22,9 +22,7 @@ __all__: typing.Sequence[str] = (
 class Stat(reader.Reader):
     """Read and return the filesystem's stat infos."""
 
-    CONTENT_TYPE = typing.Union[os.stat_result]
-
-    def __call__(self, path: str) -> CONTENT_TYPE:
+    def __call__(self, path: str) -> os.stat_result:
         try:
             return os.stat(path)
         except Exception:
-- 
cgit v1.2.3


From 9ce32829b2bb85907a34a543bfcaa9183d1e362c Mon Sep 17 00:00:00 2001
From: Matthias Baumgartner <dev@igsor.net>
Date: Fri, 25 Nov 2022 14:39:18 +0100
Subject: string conversion and equality checks

---
 bsie/base/extractor.py                  |  7 ++++
 bsie/base/reader.py                     |  6 +++
 bsie/extractor/generic/constant.py      |  6 +++
 bsie/utils/node.py                      | 18 ++++++++-
 test/base/__init__.py                   |  0
 test/base/test_extractor.py             | 70 +++++++++++++++++++++++++++++++++
 test/base/test_reader.py                | 45 +++++++++++++++++++++
 test/extractor/generic/test_constant.py | 37 +++++++++++++++++
 test/utils/__init__.py                  |  0
 test/utils/test_node.py                 | 66 +++++++++++++++++++++++++++++++
 10 files changed, 253 insertions(+), 2 deletions(-)
 create mode 100644 test/base/__init__.py
 create mode 100644 test/base/test_extractor.py
 create mode 100644 test/base/test_reader.py
 create mode 100644 test/utils/__init__.py
 create mode 100644 test/utils/test_node.py

diff --git a/bsie/base/extractor.py b/bsie/base/extractor.py
index 7acf2bd..2fc4f18 100644
--- a/bsie/base/extractor.py
+++ b/bsie/base/extractor.py
@@ -63,6 +63,13 @@ class Extractor(abc.ABC):
     def __repr__(self) -> str:
         return f'{typename(self)}()'
 
+    def __eq__(self, other: typing.Any) -> bool:
+        return isinstance(other, type(self)) \
+          and self.CONTENT_READER == other.CONTENT_READER \
+          and self.schema == other.schema
+
+    def __hash__(self) -> int:
+        return hash((type(self), self.CONTENT_READER, self.schema))
 
     def predicates(self) -> typing.Iterator[_schema.Predicate]:
         """Return the predicates that may be part of extracted triples."""
diff --git a/bsie/base/reader.py b/bsie/base/reader.py
index e59abef..b7eabf7 100644
--- a/bsie/base/reader.py
+++ b/bsie/base/reader.py
@@ -32,6 +32,12 @@ class Reader(abc.ABC):
     def __repr__(self) -> str:
         return f'{typename(self)}()'
 
+    def __eq__(self, other: typing.Any) -> bool:
+        return isinstance(other, type(self))
+
+    def __hash__(self) -> int:
+        return hash(type(self))
+
     @abc.abstractmethod
     def __call__(self, path: URI) -> typing.Any:
         """Return some content of the file at *path*.
diff --git a/bsie/extractor/generic/constant.py b/bsie/extractor/generic/constant.py
index 795bac6..7da792a 100644
--- a/bsie/extractor/generic/constant.py
+++ b/bsie/extractor/generic/constant.py
@@ -38,6 +38,12 @@ class Constant(extractor.Extractor):
         self._tuples = tuple((self.schema.predicate(p_uri), value) for p_uri, value in tuples)
         # FIXME: use schema instance for value checking
 
+    def __eq__(self, other: typing.Any) -> bool:
+        return super().__eq__(other) \
+           and self._tuples == other._tuples
+
+    def __hash__(self) -> int:
+        return hash((super().__hash__(), self._tuples))
 
     def extract(
             self,
diff --git a/bsie/utils/node.py b/bsie/utils/node.py
index 3a0f06b..c9c494f 100644
--- a/bsie/utils/node.py
+++ b/bsie/utils/node.py
@@ -7,8 +7,8 @@ Author: Matthias Baumgartner, 2022
 # imports
 import typing
 
-# inner-module imports
-from bsie.utils.bsfs import URI
+# bsie imports
+from bsie.utils.bsfs import URI, typename
 
 # exports
 __all__: typing.Sequence[str] = (
@@ -36,4 +36,18 @@ class Node():
         self.node_type = URI(node_type)
         self.uri = URI(uri)
 
+    def __eq__(self, other: typing.Any) -> bool:
+        return isinstance(other, Node) \
+            and other.node_type == self.node_type \
+            and other.uri == self.uri
+
+    def __hash__(self) -> int:
+        return hash((type(self), self.node_type, self.uri))
+
+    def __str__(self) -> str:
+        return f'{typename(self)}({self.node_type}, {self.uri})'
+
+    def __repr__(self) -> str:
+        return f'{typename(self)}({self.node_type}, {self.uri})'
+
 ## EOF ##
diff --git a/test/base/__init__.py b/test/base/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/test/base/test_extractor.py b/test/base/test_extractor.py
new file mode 100644
index 0000000..7a00079
--- /dev/null
+++ b/test/base/test_extractor.py
@@ -0,0 +1,70 @@
+"""
+
+Part of the bsie test suite.
+A copy of the license is provided with the project.
+Author: Matthias Baumgartner, 2022
+"""
+# imports
+import unittest
+
+# bsie imports
+from bsie.utils import ns
+from bsie.utils.bsfs import schema as _schema, URI
+
+# objects to test
+from bsie.base import extractor
+
+
+## code ##
+
+class StubExtractor(extractor.Extractor):
+    def __init__(self):
+        super().__init__(_schema.Schema.from_string(extractor.SCHEMA_PREAMBLE + '''
+            bse:author rdfs:subClassOf bsfs:Predicate ;
+                rdfs:domain bsfs:Entity ;
+                rdfs:range xsd:string ;
+                owl:maxCardinality "INF"^^xsd:number .
+            bse:comment rdfs:subClassOf bsfs:Predicate ;
+                rdfs:domain bsfs:Entity ;
+                rdfs:range xsd:string ;
+                owl:maxCardinality "INF"^^xsd:number .
+        '''))
+
+    def extract(self, subject, content, predicates):
+        raise NotImplementedError()
+
+class StubSub(StubExtractor):
+    pass
+
+class TestExtractor(unittest.TestCase):
+    def test_essentials(self):
+        ext = StubExtractor()
+        self.assertEqual(str(ext), 'StubExtractor')
+        self.assertEqual(repr(ext), 'StubExtractor()')
+        self.assertEqual(ext, StubExtractor())
+        self.assertEqual(hash(ext), hash(StubExtractor()))
+
+        sub = StubSub()
+        self.assertEqual(str(sub), 'StubSub')
+        self.assertEqual(repr(sub), 'StubSub()')
+        self.assertEqual(sub, StubSub())
+        self.assertEqual(hash(sub), hash(StubSub()))
+        self.assertNotEqual(ext, sub)
+        self.assertNotEqual(hash(ext), hash(sub))
+
+    def test_predicates(self):
+        schema = _schema.Schema.Empty()
+        entity = schema.node(ns.bsfs.Node).get_child(ns.bsfs.Entity)
+        string = schema.literal(ns.bsfs.Literal).get_child(URI('http://www.w3.org/2001/XMLSchema#string'))
+        p_author = schema.predicate(ns.bsfs.Predicate).get_child(ns.bse.author, domain=entity, range=string)
+        p_comment = schema.predicate(ns.bsfs.Predicate).get_child(ns.bse.comment, domain=entity, range=string)
+        ext = StubExtractor()
+        self.assertSetEqual(set(ext.predicates()), {p_author, p_comment} | set(schema.predicates()))
+
+
+## main ##
+
+if __name__ == '__main__':
+    unittest.main()
+
+## EOF ##
diff --git a/test/base/test_reader.py b/test/base/test_reader.py
new file mode 100644
index 0000000..802b314
--- /dev/null
+++ b/test/base/test_reader.py
@@ -0,0 +1,45 @@
+"""
+
+Part of the bsie test suite.
+A copy of the license is provided with the project.
+Author: Matthias Baumgartner, 2022
+"""
+# imports
+import unittest
+
+# objects to test
+from bsie.base import reader
+
+
+## code ##
+
+class StubReader(reader.Reader):
+    def __call__(self, path):
+        raise NotImplementedError()
+
+class StubSub(StubReader):
+    pass
+
+class TestReader(unittest.TestCase):
+    def test_essentials(self):
+        ext = StubReader()
+        self.assertEqual(str(ext), 'StubReader')
+        self.assertEqual(repr(ext), 'StubReader()')
+        self.assertEqual(ext, StubReader())
+        self.assertEqual(hash(ext), hash(StubReader()))
+
+        sub = StubSub()
+        self.assertEqual(str(sub), 'StubSub')
+        self.assertEqual(repr(sub), 'StubSub()')
+        self.assertEqual(sub, StubSub())
+        self.assertEqual(hash(sub), hash(StubSub()))
+        self.assertNotEqual(ext, sub)
+        self.assertNotEqual(hash(ext), hash(sub))
+
+
+## main ##
+
+if __name__ == '__main__':
+    unittest.main()
+
+## EOF ##
diff --git a/test/extractor/generic/test_constant.py b/test/extractor/generic/test_constant.py
index 7fdb8ac..aa33fb4 100644
--- a/test/extractor/generic/test_constant.py
+++ b/test/extractor/generic/test_constant.py
@@ -78,6 +78,43 @@ class TestConstant(unittest.TestCase):
         #self.assertRaises(ValueError, Constant, schema, [
         #    (ns.bse.author, Foo())])
 
+    def test_eq(self):
+        schema_a = '''
+            bse:author rdfs:subClassOf bsfs:Predicate ;
+                rdfs:domain bsfs:Entity ;
+                rdfs:range xsd:string ;
+                owl:maxCardinality "1"^^xsd:number .
+            '''
+        schema_b = '''
+            bse:comment rdfs:subClassOf bsfs:Predicate ;
+                rdfs:domain bsfs:Entity ;
+                rdfs:range xsd:string ;
+                owl:maxCardinality "INF"^^xsd:number .
+            '''
+        tuples_a = [(ns.bse.author, 'Me, myself, and I')]
+        tuples_b = [(ns.bse.comment, 'the quick brown fox jumps over the lazy dog.') ]
+        # distinct instances, same data
+        self.assertEqual(
+            Constant(schema_a, tuples_a),
+            Constant(schema_a, tuples_a))
+        self.assertEqual(
+            hash(Constant(schema_a, tuples_a)),
+            hash(Constant(schema_a, tuples_a)))
+        # different data
+        self.assertNotEqual(
+            Constant(schema_a, tuples_a),
+            Constant(schema_b, tuples_b))
+        self.assertNotEqual(
+            hash(Constant(schema_a, tuples_a)),
+            hash(Constant(schema_b, tuples_b)))
+        # different objects
+        class Foo(): pass
+        self.assertNotEqual(Constant(schema_a, tuples_a), Foo())
+        self.assertNotEqual(hash(Constant(schema_a, tuples_a)), hash(Foo()))
+        self.assertNotEqual(Constant(schema_a, tuples_a), 123)
+        self.assertNotEqual(hash(Constant(schema_a, tuples_a)), hash(123))
+        self.assertNotEqual(Constant(schema_a, tuples_a), None)
+        self.assertNotEqual(hash(Constant(schema_a, tuples_a)), hash(None))
 
 
 ## main ##
diff --git a/test/utils/__init__.py b/test/utils/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/test/utils/test_node.py b/test/utils/test_node.py
new file mode 100644
index 0000000..826f199
--- /dev/null
+++ b/test/utils/test_node.py
@@ -0,0 +1,66 @@
+"""
+
+Part of the bsie test suite.
+A copy of the license is provided with the project.
+Author: Matthias Baumgartner, 2022
+"""
+# imports
+import unittest
+
+# bsie imports
+from bsie.utils.bsfs import URI
+from bsie.utils import ns
+
+# objects to test
+from bsie.utils.node import Node
+
+
+## code ##
+
+class TestNode(unittest.TestCase):
+    def test_equality(self):
+        uri = URI('http://example.com/me/entity#1234')
+        node = Node(ns.bsfs.Entity, uri)
+        # basic equivalence
+        self.assertEqual(node, Node(ns.bsfs.Entity, URI('http://example.com/me/entity#1234')))
+        self.assertEqual(hash(node), hash(Node(ns.bsfs.Entity, URI('http://example.com/me/entity#1234'))))
+        # equality respects uri
+        self.assertNotEqual(node, Node(ns.bsfs.Entity, URI('http://example.com/me/entity#4321')))
+        self.assertNotEqual(hash(node), hash(Node(ns.bsfs.Entity, URI('http://example.com/me/entity#4321'))))
+        # equality respects node_type
+        self.assertNotEqual(node, Node(ns.bsfs.Foo, uri))
+        self.assertNotEqual(hash(node), hash(Node(ns.bsfs.Foo, uri)))
+        # not equal to other types
+        self.assertNotEqual(node, 1234)
+        self.assertNotEqual(hash(node), hash(1234))
+        self.assertNotEqual(node, uri)
+        self.assertNotEqual(hash(node), hash(uri))
+        self.assertNotEqual(node, ns.bsfs.Entity)
+        self.assertNotEqual(hash(node), hash(ns.bsfs.Entity))
+        class Foo(): pass
+        self.assertNotEqual(node, Foo())
+        self.assertNotEqual(hash(node), hash(Foo()))
+
+    def test_str(self):
+        uri = URI('http://example.com/me/entity#1234')
+        # basic string conversion
+        node = Node(ns.bsfs.Entity, uri)
+        self.assertEqual(str(node), 'Node(http://bsfs.ai/schema/Entity, http://example.com/me/entity#1234)')
+        self.assertEqual(repr(node), 'Node(http://bsfs.ai/schema/Entity, http://example.com/me/entity#1234)')
+        # string conversion respects node_type
+        node = Node(ns.bsfs.Foo, uri)
+        self.assertEqual(str(node), 'Node(http://bsfs.ai/schema/Foo, http://example.com/me/entity#1234)')
+        self.assertEqual(repr(node), 'Node(http://bsfs.ai/schema/Foo, http://example.com/me/entity#1234)')
+        # string conversion respects uri
+        node = Node(ns.bsfs.Entity, URI('http://example.com/me/entity#4321'))
+        self.assertEqual(str(node), 'Node(http://bsfs.ai/schema/Entity, http://example.com/me/entity#4321)')
+        self.assertEqual(repr(node), 'Node(http://bsfs.ai/schema/Entity, http://example.com/me/entity#4321)')
+
+
+
+## main ##
+
+if __name__ == '__main__':
+    unittest.main()
+
+## EOF ##
-- 
cgit v1.2.3


From c9a1dea230054f5d6f40b7fd5e3930609c5f6416 Mon Sep 17 00:00:00 2001
From: Matthias Baumgartner <dev@igsor.net>
Date: Fri, 25 Nov 2022 14:41:38 +0100
Subject: code analysis tool configs and minor fixes

---
 .coveragerc         |  15 ++++
 .mypy.ini           |   3 +
 .pylintrc           | 193 ++++++++++++++++++++++++++++++++++++++++++++++++++++
 README              |  51 ++++++++++++++
 bsie/base/errors.py |   3 +-
 bsie/reader/stat.py |   4 +-
 6 files changed, 265 insertions(+), 4 deletions(-)
 create mode 100644 .coveragerc
 create mode 100644 .mypy.ini
 create mode 100644 .pylintrc

diff --git a/.coveragerc b/.coveragerc
new file mode 100644
index 0000000..40f07cc
--- /dev/null
+++ b/.coveragerc
@@ -0,0 +1,15 @@
+[run]
+dynamic_context = test_function
+branch = True
+source = bsie
+data_file = .coverage
+command_line = -m unittest
+
+[report]
+show_missing = True
+skip_empty = True
+
+[html]
+directory = .htmlcov
+show_contexts = True
+
diff --git a/.mypy.ini b/.mypy.ini
new file mode 100644
index 0000000..4d0a25d
--- /dev/null
+++ b/.mypy.ini
@@ -0,0 +1,3 @@
+[mypy]
+ignore_missing_imports = True
+packages=bsie
diff --git a/.pylintrc b/.pylintrc
new file mode 100644
index 0000000..3cfae38
--- /dev/null
+++ b/.pylintrc
@@ -0,0 +1,193 @@
+[MAIN]
+
+# Pickle collected data for later comparisons.
+persistent=no
+
+# Minimum Python version to use for version dependent checks. Will default to
+# the version used to run pylint.
+py-version=3.8
+
+# Discover python modules and packages in the file system subtree.
+recursive=yes
+
+# When enabled, pylint would attempt to guess common misconfiguration and emit
+# user-friendly hints instead of false-positive error messages.
+suggestion-mode=yes
+
+
+[BASIC]
+
+# Minimum line length for functions/classes that require docstrings, shorter
+# ones are exempt.
+docstring-min-length=-1
+
+# Bad variable names which should always be refused, separated by a comma.
+bad-names=foo,bar,abc,cba,xyz,zyx,foobar,hello,world
+
+# Good variable names which should always be accepted, separated by a comma.
+good-names=i,j,k,n,_
+
+# Naming style matching correct argument names.
+argument-naming-style=snake_case
+
+# Naming style matching correct attribute names.
+attr-naming-style=snake_case
+
+# Naming style matching correct class attribute names.
+class-attribute-naming-style=any
+
+# Naming style matching correct class constant names.
+class-const-naming-style=UPPER_CASE
+
+# Naming style matching correct class names.
+class-naming-style=PascalCase
+
+# Naming style matching correct constant names.
+const-naming-style=UPPER_CASE
+
+# Naming style matching correct function names.
+function-naming-style=snake_case
+
+# Include a hint for the correct naming format with invalid-name.
+include-naming-hint=yes
+
+# Naming style matching correct inline iteration names.
+inlinevar-naming-style=any
+
+# Naming style matching correct method names.
+method-naming-style=snake_case
+
+# Naming style matching correct module names.
+module-naming-style=snake_case
+
+# Naming style matching correct variable names.
+variable-naming-style=snake_case
+
+
+[DESIGN]
+
+# Maximum number of arguments for function / method.
+max-args=5
+
+# Maximum number of attributes for a class (see R0902).
+max-attributes=7
+
+# Maximum number of boolean expressions in an if statement (see R0916).
+max-bool-expr=5
+
+# Maximum number of branch for function / method body.
+max-branches=12
+
+# Maximum number of locals for function / method body.
+max-locals=15
+
+# Maximum number of parents for a class (see R0901).
+max-parents=7
+
+# Maximum number of public methods for a class (see R0904).
+max-public-methods=20
+
+# Maximum number of return / yield for function / method body.
+max-returns=6
+
+# Maximum number of statements in function / method body.
+max-statements=50
+
+# Minimum number of public methods for a class (see R0903).
+min-public-methods=1
+
+
+[FORMAT]
+
+# Expected format of line ending, e.g. empty (any line ending), LF or CRLF.
+expected-line-ending-format=
+
+# Regexp for a line that is allowed to be longer than the limit.
+ignore-long-lines=^\s*(# )?<?https?://\S+>?$
+
+# Number of spaces of indent required inside a hanging or continued line.
+indent-after-paren=4
+
+# String used as indentation unit. This is usually "    " (4 spaces) or "\t" (1
+# tab).
+indent-string='    '
+
+# Maximum number of characters on a single line.
+max-line-length=120
+
+# Maximum number of lines in a module.
+max-module-lines=1000
+
+# Allow the body of a class to be on the same line as the declaration if body
+# contains single statement.
+single-line-class-stmt=no
+
+# Allow the body of an if to be on the same line as the test if there is no
+# else.
+single-line-if-stmt=no
+
+
+[IMPORTS]
+
+# List of modules that can be imported at any level, not just the top level
+# one.
+allow-any-import-level=
+
+# Allow wildcard imports from modules that define __all__.
+allow-wildcard-with-all=no
+
+
+[LOGGING]
+
+# The type of string formatting that logging methods do. `old` means using %
+# formatting, `new` is for `{}` formatting.
+logging-format-style=old
+
+
+
+[MISCELLANEOUS]
+
+# List of note tags to take in consideration, separated by a comma.
+notes=FIXME,TODO,NOTE
+
+
+
+[REPORTS]
+
+# Tells whether to display a full report or only the messages.
+reports=yes
+
+# Activate the evaluation score.
+score=yes
+
+
+[SIMILARITIES]
+
+# Minimum lines number of a similarity.
+min-similarity-lines=4
+
+
+[STRING]
+
+# This flag controls whether inconsistent-quotes generates a warning when the
+# character used as a quote delimiter is used inconsistently within a module.
+check-quote-consistency=yes
+
+
+[TYPECHECK]
+
+# Tells whether to warn about missing members when the owner of the attribute
+# is inferred to be None.
+ignore-none=no
+
+
+[VARIABLES]
+
+# List of strings which can identify a callback function by name. A callback
+# name must start or end with one of those strings.
+callbacks=clbk,callback
+
+
+
+
+# Disable: R1735 (use-dict-literal)
diff --git a/README b/README
index b790244..3326196 100644
--- a/README
+++ b/README
@@ -3,3 +3,54 @@ Black Star Information Extraction
 =================================
 
 
+### Developer tools setup
+
+#### Test coverage (coverage)
+
+Resources:
+* https://coverage.readthedocs.io/en/6.5.0/index.html
+* https://nedbatchelder.com/blog/200710/flaws_in_coverage_measurement.html
+
+Commands:
+$ pip install coverage
+$ coverage run ; coverage html ; xdg-open .htmlcov/index.html
+
+
+
+#### Static code analysis (pylint)
+
+Resources:
+* https://github.com/PyCQA/pylint
+* https://pylint.org/
+* https://pylint.pycqa.org/en/latest/user_guide/messages/messages_overview.html#messages-overview
+
+Commands:
+$ pip install pylint
+$ pylint bsie
+
+
+
+#### Type analysis (mypy)
+
+Resources:
+* https://github.com/python/mypy
+* https://mypy.readthedocs.io/en/stable/
+
+Commands:
+$ pip install mypy
+$ mypy
+
+
+
+#### Documentation (sphinx)
+
+Resources:
+* 
+* 
+
+Commands:
+$ pip install ...
+$ 
+
+
+
diff --git a/bsie/base/errors.py b/bsie/base/errors.py
index eedce3b..a86b7e8 100644
--- a/bsie/base/errors.py
+++ b/bsie/base/errors.py
@@ -10,11 +10,10 @@ import typing
 # exports
 __all__: typing.Sequence[str] = (
     'ExtractorError',
+    'ReaderError',
     )
 
 
-
-
 ## code ##
 
 class _BSIEError(Exception):
diff --git a/bsie/reader/stat.py b/bsie/reader/stat.py
index 6d40ab8..592d912 100644
--- a/bsie/reader/stat.py
+++ b/bsie/reader/stat.py
@@ -25,8 +25,8 @@ class Stat(reader.Reader):
     def __call__(self, path: str) -> os.stat_result:
         try:
             return os.stat(path)
-        except Exception:
-            raise errors.ReaderError(path)
+        except Exception as err:
+            raise errors.ReaderError(path) from err
 
 
 ## EOF ##
-- 
cgit v1.2.3


From 3e6a69ce7f109f0fd4352507ad60d58d4cbd24a7 Mon Sep 17 00:00:00 2001
From: Matthias Baumgartner <dev@igsor.net>
Date: Fri, 25 Nov 2022 14:43:12 +0100
Subject: builders and pipeline

---
 bsie/base/errors.py         |   8 ++
 bsie/tools/__init__.py      |  20 ++++
 bsie/tools/builder.py       | 217 ++++++++++++++++++++++++++++++++++++++
 bsie/tools/pipeline.py      | 121 ++++++++++++++++++++++
 bsie/utils/bsfs.py          |   3 +-
 test/tools/__init__.py      |   0
 test/tools/test_builder.py  | 247 ++++++++++++++++++++++++++++++++++++++++++++
 test/tools/test_pipeline.py | 167 ++++++++++++++++++++++++++++++
 test/tools/testfile.t       |   1 +
 9 files changed, 783 insertions(+), 1 deletion(-)
 create mode 100644 bsie/tools/__init__.py
 create mode 100644 bsie/tools/builder.py
 create mode 100644 bsie/tools/pipeline.py
 create mode 100644 test/tools/__init__.py
 create mode 100644 test/tools/test_builder.py
 create mode 100644 test/tools/test_pipeline.py
 create mode 100644 test/tools/testfile.t

diff --git a/bsie/base/errors.py b/bsie/base/errors.py
index a86b7e8..760351f 100644
--- a/bsie/base/errors.py
+++ b/bsie/base/errors.py
@@ -9,7 +9,9 @@ import typing
 
 # exports
 __all__: typing.Sequence[str] = (
+    'BuilderError',
     'ExtractorError',
+    'LoaderError',
     'ReaderError',
     )
 
@@ -19,6 +21,12 @@ __all__: typing.Sequence[str] = (
 class _BSIEError(Exception):
     """Generic BSIE error."""
 
+class BuilderError(_BSIEError):
+    """The Builder failed to create an instance."""
+
+class LoaderError(BuilderError):
+    """Failed to load a module or class."""
+
 class ExtractorError(_BSIEError):
     """The Extractor failed to process the given content."""
 
diff --git a/bsie/tools/__init__.py b/bsie/tools/__init__.py
new file mode 100644
index 0000000..8ca9620
--- /dev/null
+++ b/bsie/tools/__init__.py
@@ -0,0 +1,20 @@
+"""
+
+Part of the bsie module.
+A copy of the license is provided with the project.
+Author: Matthias Baumgartner, 2022
+"""
+# imports
+import typing
+
+# inner-module imports
+from . import builder
+from . import pipeline
+
+# exports
+__all__: typing.Sequence[str] = (
+    'builder',
+    'pipeline',
+    )
+
+## EOF ##
diff --git a/bsie/tools/builder.py b/bsie/tools/builder.py
new file mode 100644
index 0000000..8f7a410
--- /dev/null
+++ b/bsie/tools/builder.py
@@ -0,0 +1,217 @@
+"""
+
+Part of the bsie module.
+A copy of the license is provided with the project.
+Author: Matthias Baumgartner, 2022
+"""
+# imports
+import importlib
+import logging
+import typing
+
+# bsie imports
+from bsie import base
+from bsie.base import errors
+from bsie.utils.bsfs import URI, typename
+
+# inner-module imports
+from . import pipeline
+
+# exports
+__all__: typing.Sequence[str] = (
+    'ExtractorBuilder',
+    'PipelineBuilder',
+    'ReaderBuilder',
+    )
+
+
+## code ##
+
+logger = logging.getLogger(__name__)
+
+def _safe_load(module_name: str, class_name: str):
+    """Get a class from a module. Raise BuilderError if anything goes wrong."""
+    try:
+        # load the module
+        module = importlib.import_module(module_name)
+    except Exception as err:
+        # cannot import module
+        raise errors.LoaderError(f'cannot load module {module_name}') from err
+
+    try:
+        # get the class from the module
+        cls = getattr(module, class_name)
+    except Exception as err:
+        # cannot find the class
+        raise errors.LoaderError(f'cannot load class {class_name} from module {module_name}') from err
+
+    return cls
+
+
+def _unpack_name(name):
+    """Split a name into its module and class component (dot-separated)."""
+    if not isinstance(name, str):
+        raise TypeError(name)
+    if '.' not in name:
+        raise ValueError('name must be a qualified class name.')
+    module_name, class_name = name[:name.rfind('.')], name[name.rfind('.')+1:]
+    if module_name == '':
+        raise ValueError('name must be a qualified class name.')
+    return module_name, class_name
+
+
+class ReaderBuilder():
+    """Build `bsie.base.reader.Reader` instances.
+
+    Readers are defined via their qualified class name
+    (e.g., bsie.reader.path.Path) and optional keyword
+    arguments that are passed to the constructor via
+    the *kwargs* argument (name as key, kwargs as value).
+    The ReaderBuilder keeps a cache of previously built
+    reader instances, as they are anyway built with
+    identical keyword arguments.
+
+    """
+
+    # keyword arguments
+    kwargs: typing.Dict[str, typing.Dict[str, typing.Any]]
+
+    # cached readers
+    cache: typing.Dict[str, base.reader.Reader]
+
+    def __init__(self, kwargs: typing.Dict[str, typing.Dict[str, typing.Any]]):
+        self.kwargs = kwargs
+        self.cache = {}
+
+    def build(self, name: str) -> base.reader.Reader:
+        """Return an instance for the qualified class name."""
+        # return cached instance
+        if name in self.cache:
+            return self.cache[name]
+
+        # check name and get module/class components
+        module_name, class_name = _unpack_name(name)
+
+        # import reader class
+        cls = _safe_load(module_name, class_name)
+
+        # get kwargs
+        kwargs = self.kwargs.get(name, {})
+        if not isinstance(kwargs, dict):
+            raise TypeError(f'expected a kwargs dict, found {typename(kwargs)}')
+
+        try: # build, cache, and return instance
+            obj = cls(**kwargs)
+            # cache instance
+            self.cache[name] = obj
+            # return instance
+            return obj
+
+        except Exception as err:
+            raise errors.BuilderError(f'failed to build reader {name} due to {typename(err)}: {err}') from err
+
+
+class ExtractorBuilder():
+    """Build `bsie.base.extractor.Extractor instances.
+
+    It is permissible to build multiple instances of the same extractor
+    (typically with different arguments), hence the ExtractorBuilder
+    receives a list of build specifications. Each specification is
+    a dict with a single key (extractor's qualified name) and a dict
+    to be used as keyword arguments.
+    Example: [{'bsie.extractor.generic.path.Path': {}}, ]
+
+    """
+
+    # build specifications
+    specs: typing.List[typing.Dict[str, typing.Dict[str, typing.Any]]]
+
+    def __init__(self, specs: typing.List[typing.Dict[str, typing.Dict[str, typing.Any]]]):
+        self.specs = specs
+
+    def __iter__(self) -> typing.Iterator[int]:
+        """Iterate over extractor specifications."""
+        return iter(range(len(self.specs)))
+
+    def build(self, index: int) -> base.extractor.Extractor:
+        """Return an instance of the n'th extractor (n=*index*)."""
+        # get build instructions
+        specs = self.specs[index]
+
+        # check specs structure. expecting[{name: {kwargs}}]
+        if not isinstance(specs, dict):
+            raise TypeError(f'expected a dict, found {typename(specs)}')
+        if len(specs) != 1:
+            raise TypeError(f'expected a dict of length one, found {len(specs)}')
+
+        # get name and args from specs
+        name = next(iter(specs.keys()))
+        kwargs = specs[name]
+
+        # check kwargs structure
+        if not isinstance(kwargs, dict):
+            raise TypeError(f'expected a dict, found {typename(kwargs)}')
+
+        # check name and get module/class components
+        module_name, class_name = _unpack_name(name)
+
+        # import extractor class
+        cls = _safe_load(module_name, class_name)
+
+        try: # build and return instance
+            return cls(**kwargs)
+
+        except Exception as err:
+            raise errors.BuilderError(f'failed to build extractor {name} due to {typename(err)}: {err}') from err
+
+
+class PipelineBuilder():
+    """Build `bsie.tools.pipeline.Pipeline` instances."""
+
+    def __init__(
+            self,
+            prefix: URI,
+            reader_builder: ReaderBuilder,
+            extractor_builder: ExtractorBuilder,
+            ):
+        self.prefix = prefix
+        self.rbuild = reader_builder
+        self.ebuild = extractor_builder
+
+    def build(self) -> pipeline.Pipeline:
+        """Return a Pipeline instance."""
+        ext2rdr = {}
+
+        for eidx in self.ebuild:
+            # build extractor
+            try:
+                ext = self.ebuild.build(eidx)
+
+            except errors.LoaderError as err: # failed to load extractor; skip
+                logger.error('failed to load extractor: %s', err)
+                continue
+
+            except errors.BuilderError as err: # failed to build instance; skip
+                logger.error(str(err))
+                continue
+
+            try:
+                # get reader required by extractor
+                if ext.CONTENT_READER is not None:
+                    rdr = self.rbuild.build(ext.CONTENT_READER)
+                else:
+                    rdr = None
+                # store extractor
+                ext2rdr[ext] = rdr
+
+            except errors.LoaderError as err: # failed to load reader
+                logger.error('failed to load reader: %s', err)
+
+            except errors.BuilderError as err: # failed to build reader
+                logger.error(str(err))
+
+        return pipeline.Pipeline(self.prefix, ext2rdr)
+
+
+
+## EOF ##
diff --git a/bsie/tools/pipeline.py b/bsie/tools/pipeline.py
new file mode 100644
index 0000000..8e1c992
--- /dev/null
+++ b/bsie/tools/pipeline.py
@@ -0,0 +1,121 @@
+"""
+
+Part of the bsie module.
+A copy of the license is provided with the project.
+Author: Matthias Baumgartner, 2022
+"""
+# imports
+from collections import defaultdict
+import logging
+import typing
+
+# bsie imports
+from bsie import base
+from bsie.utils import ns
+from bsie.utils.node import Node
+from bsie.utils.bsfs import schema as _schema, URI, uuid as _uuid, typename
+
+# exports
+__all__: typing.Sequence[str] = (
+    'Pipeline',
+    )
+
+## code ##
+
+logger = logging.getLogger(__name__)
+
+class Pipeline():
+    """Extraction pipeline to generate triples from files.
+
+    The Pipeline binds readers and extractors, and performs
+    the necessary operations to produce triples from a file.
+    It takes a best-effort approach to extract as many triples
+    as possible. Errors during the extraction are passed over
+    and reported to the log.
+
+    """
+
+    # combined extractor schemas.
+    schema: _schema.Schema
+
+    # node prefix.
+    _prefix: URI
+
+    # extractor -> reader mapping
+    _ext2rdr: typing.Dict[base.extractor.Extractor, typing.Optional[base.reader.Reader]]
+
+    def __init__(
+            self,
+            prefix: URI,
+            ext2rdr: typing.Dict[base.extractor.Extractor, typing.Optional[base.reader.Reader]]
+            ):
+        # store core members
+        self._prefix = prefix
+        self._ext2rdr = ext2rdr
+        # compile schema from all extractors
+        self.schema = _schema.Schema.Union(ext.schema for ext in ext2rdr)
+
+    def __str__(self) -> str:
+        return typename(self)
+
+    def __repr__(self) -> str:
+        return f'{typename(self)}(...)'
+
+    def __hash__(self) -> int:
+        return hash((type(self), self._prefix, self.schema, tuple(self._ext2rdr), tuple(self._ext2rdr.values())))
+
+    def __eq__(self, other: typing.Any) -> bool:
+        return isinstance(other, type(self)) \
+           and self.schema == other.schema \
+           and self._prefix == other._prefix \
+           and self._ext2rdr == other._ext2rdr
+
+    def __call__(
+            self,
+            path: URI,
+            predicates: typing.Optional[typing.Iterable[_schema.Predicate]] = None,
+            ) -> typing.Iterator[typing.Tuple[Node, _schema.Predicate, typing.Any]]:
+        """Extract triples from the file at *path*. Optionally, limit triples to *predicates*."""
+        # get predicates
+        predicates = set(predicates) if predicates is not None else set(self.schema.predicates())
+
+        # get extractors
+        extractors = {ext for ext in self._ext2rdr if not set(ext.predicates()).isdisjoint(predicates)}
+
+        # corner-case short-cut
+        if len(extractors) == 0:
+            return
+
+        # get readers -> extractors mapping
+        rdr2ext = defaultdict(set)
+        for ext in extractors:
+            rdr = self._ext2rdr[ext]
+            rdr2ext[rdr].add(ext)
+
+        # create subject for file
+        uuid = _uuid.UCID.from_path(path)
+        subject = Node(ns.bsfs.Entity, self._prefix + uuid)
+
+        # extract information
+        for rdr, extrs in rdr2ext.items():
+            try:
+                # get content
+                content = rdr(path) if rdr is not None else None
+
+                # apply extractors on this content
+                for ext in extrs:
+                    try:
+                        # get predicate/value tuples
+                        for node, pred, value in ext.extract(subject, content, predicates):
+                            yield node, pred, value
+
+                    except base.errors.ExtractorError as err:
+                        # critical extractor failure.
+                        logger.error('%s failed to extract triples from content: %s', ext, err)
+
+            except base.errors.ReaderError as err:
+                # failed to read any content. skip.
+                logger.error('%s failed to read content: %s', rdr, err)
+
+
+## EOF ##
diff --git a/bsie/utils/bsfs.py b/bsie/utils/bsfs.py
index 01ec5d1..a4b7626 100644
--- a/bsie/utils/bsfs.py
+++ b/bsie/utils/bsfs.py
@@ -10,7 +10,7 @@ import typing
 # bsfs imports
 from bsfs import schema
 from bsfs.namespace import Namespace
-from bsfs.utils import URI, typename
+from bsfs.utils import URI, typename, uuid
 
 # exports
 __all__: typing.Sequence[str] = (
@@ -18,6 +18,7 @@ __all__: typing.Sequence[str] = (
     'URI',
     'schema',
     'typename',
+    'uuid',
     )
 
 ## EOF ##
diff --git a/test/tools/__init__.py b/test/tools/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/test/tools/test_builder.py b/test/tools/test_builder.py
new file mode 100644
index 0000000..bef0e9d
--- /dev/null
+++ b/test/tools/test_builder.py
@@ -0,0 +1,247 @@
+"""
+
+Part of the bsie test suite.
+A copy of the license is provided with the project.
+Author: Matthias Baumgartner, 2022
+"""
+# imports
+import logging
+import unittest
+
+# bsie imports
+from bsie import base
+from bsie.base import errors
+from bsie.utils.bsfs import URI
+
+# objects to test
+from bsie.tools.builder import ExtractorBuilder
+from bsie.tools.builder import PipelineBuilder
+from bsie.tools.builder import ReaderBuilder
+from bsie.tools.builder import _safe_load
+from bsie.tools.builder import _unpack_name
+
+
+## code ##
+
+class TestUtils(unittest.TestCase):
+    def test_safe_load(self):
+        # invalid module
+        self.assertRaises(errors.LoaderError, _safe_load, 'dBGHMSAYOoKeKMpywDoKZQycENFPvN', 'foobar')
+        self.assertRaises(errors.LoaderError, _safe_load, 'dBGHMSAYOoKeKMpywDoKZQycENFPvN.bar', 'foobar')
+        # partially valid module
+        self.assertRaises(errors.LoaderError, _safe_load, 'os.foo', 'foobar')
+        # invalid class
+        self.assertRaises(errors.LoaderError, _safe_load, 'os.path', 'foo')
+        # valid module and class
+        cls = _safe_load('collections.abc', 'Container')
+        import collections.abc
+        self.assertEqual(cls, collections.abc.Container)
+
+    def test_unpack_name(self):
+        self.assertRaises(TypeError, _unpack_name, 123)
+        self.assertRaises(TypeError, _unpack_name, None)
+        self.assertRaises(ValueError, _unpack_name, '')
+        self.assertRaises(ValueError, _unpack_name, 'path')
+        self.assertRaises(ValueError, _unpack_name, '.Path')
+        self.assertEqual(_unpack_name('path.Path'), ('path', 'Path'))
+        self.assertEqual(_unpack_name('path.foo.bar.Path'), ('path.foo.bar', 'Path'))
+
+
+class TestReaderBuilder(unittest.TestCase):
+    def test_build(self):
+        builder = ReaderBuilder({'bsie.reader.path.Path': {}})
+        # build configured reader
+        cls = builder.build('bsie.reader.path.Path')
+        import bsie.reader.path
+        self.assertIsInstance(cls, bsie.reader.path.Path)
+        # build unconfigured reader
+        cls = builder.build('bsie.reader.stat.Stat')
+        import bsie.reader.stat
+        self.assertIsInstance(cls, bsie.reader.stat.Stat)
+        # re-build previous reader (test cache)
+        self.assertEqual(cls, builder.build('bsie.reader.stat.Stat'))
+        # test invalid
+        self.assertRaises(TypeError, builder.build, 123)
+        self.assertRaises(TypeError, builder.build, None)
+        self.assertRaises(ValueError, builder.build, '')
+        self.assertRaises(ValueError, builder.build, 'Path')
+        self.assertRaises(errors.BuilderError, builder.build, 'path.Path')
+        # invalid config
+        builder = ReaderBuilder({'bsie.reader.stat.Stat': dict(foo=123)})
+        self.assertRaises(errors.BuilderError, builder.build, 'bsie.reader.stat.Stat')
+        builder = ReaderBuilder({'bsie.reader.stat.Stat': 123})
+        self.assertRaises(TypeError, builder.build, 'bsie.reader.stat.Stat')
+        # no instructions
+        builder = ReaderBuilder({})
+        cls = builder.build('bsie.reader.stat.Stat')
+        self.assertIsInstance(cls, bsie.reader.stat.Stat)
+
+
+
+class TestExtractorBuilder(unittest.TestCase):
+    def test_iter(self):
+        # no specifications
+        self.assertListEqual(list(ExtractorBuilder([])), [])
+        # some specifications
+        builder = ExtractorBuilder([
+            {'bsie.extractor.generic.path.Path': {}},
+            {'bsie.extractor.generic.stat.Stat': {}},
+            {'bsie.extractor.generic.path.Path': {}},
+            ])
+        self.assertListEqual(list(builder), [0, 1, 2])
+
+    def test_build(self):
+        # simple and repeated extractors
+        builder = ExtractorBuilder([
+            {'bsie.extractor.generic.path.Path': {}},
+            {'bsie.extractor.generic.stat.Stat': {}},
+            {'bsie.extractor.generic.path.Path': {}},
+            ])
+        ext = [builder.build(0), builder.build(1), builder.build(2)]
+        import bsie.extractor.generic.path
+        import bsie.extractor.generic.stat
+        self.assertListEqual(ext, [
+            bsie.extractor.generic.path.Path(),
+            bsie.extractor.generic.stat.Stat(),
+            bsie.extractor.generic.path.Path(),
+            ])
+        # out-of-bounds raises KeyError
+        self.assertRaises(IndexError, builder.build, 3)
+
+        # building with args
+        builder = ExtractorBuilder([
+            {'bsie.extractor.generic.constant.Constant': {
+                'schema': '''
+                    bse:author rdfs:subClassOf bsfs:Predicate ;
+                        rdfs:domain bsfs:Entity ;
+                        rdfs:range xsd:string ;
+                        owl:maxCardinality "1"^^xsd:number .
+                    bse:rating rdfs:subClassOf bsfs:Predicate ;
+                        rdfs:domain bsfs:Entity ;
+                        rdfs:range xsd:integer ;
+                        owl:maxCardinality "1"^^xsd:number .
+                    ''',
+                'tuples': [
+                    ('http://bsfs.ai/schema/Entity#author', 'Me, myself, and I'),
+                    ('http://bsfs.ai/schema/Entity#rating', 123),
+                    ],
+            }}])
+        obj = builder.build(0)
+        import bsie.extractor.generic.constant
+        self.assertEqual(obj, bsie.extractor.generic.constant.Constant('''
+                    bse:author rdfs:subClassOf bsfs:Predicate ;
+                        rdfs:domain bsfs:Entity ;
+                        rdfs:range xsd:string ;
+                        owl:maxCardinality "1"^^xsd:number .
+                    bse:rating rdfs:subClassOf bsfs:Predicate ;
+                        rdfs:domain bsfs:Entity ;
+                        rdfs:range xsd:integer ;
+                        owl:maxCardinality "1"^^xsd:number .
+                    ''', [
+            ('http://bsfs.ai/schema/Entity#author', 'Me, myself, and I'),
+            ('http://bsfs.ai/schema/Entity#rating', 123),
+            ]))
+
+        # building with invalid args
+        self.assertRaises(errors.BuilderError, ExtractorBuilder(
+            [{'bsie.extractor.generic.path.Path': {'foo': 123}}]).build, 0)
+        # non-dict build specification
+        self.assertRaises(TypeError, ExtractorBuilder(
+            [('bsie.extractor.generic.path.Path', {})]).build, 0)
+        # multiple keys per build specification
+        self.assertRaises(TypeError, ExtractorBuilder(
+            [{'bsie.extractor.generic.path.Path': {},
+              'bsie.extractor.generic.stat.Stat': {}}]).build, 0)
+        # non-dict value for kwargs
+        self.assertRaises(TypeError, ExtractorBuilder(
+            [{'bsie.extractor.generic.path.Path': 123}]).build, 0)
+
+
+
+
+class TestPipelineBuilder(unittest.TestCase):
+    def test_build(self):
+        prefix = URI('http://example.com/local/file#')
+        c_schema = '''
+            bse:author rdfs:subClassOf bsfs:Predicate ;
+                rdfs:domain bsfs:Entity ;
+                rdfs:range xsd:string ;
+                owl:maxCardinality "1"^^xsd:number .
+            '''
+        c_tuples = [('http://bsfs.ai/schema/Entity#author', 'Me, myself, and I')]
+        # prepare builders
+        rbuild = ReaderBuilder({})
+        ebuild = ExtractorBuilder([
+            {'bsie.extractor.generic.path.Path': {}},
+            {'bsie.extractor.generic.stat.Stat': {}},
+            {'bsie.extractor.generic.constant.Constant': dict(
+                schema=c_schema,
+                tuples=c_tuples,
+                )},
+            ])
+        # build pipeline
+        builder = PipelineBuilder(prefix, rbuild, ebuild)
+        pipeline = builder.build()
+        # delayed import
+        import bsie.reader.path
+        import bsie.reader.stat
+        import bsie.extractor.generic.path
+        import bsie.extractor.generic.stat
+        import bsie.extractor.generic.constant
+        # check pipeline
+        self.assertDictEqual(pipeline._ext2rdr, {
+            bsie.extractor.generic.path.Path(): bsie.reader.path.Path(),
+            bsie.extractor.generic.stat.Stat(): bsie.reader.stat.Stat(),
+            bsie.extractor.generic.constant.Constant(c_schema, c_tuples): None,
+            })
+
+        # fail to load extractor
+        ebuild_err = ExtractorBuilder([
+            {'bsie.extractor.generic.foo.Foo': {}},
+            {'bsie.extractor.generic.path.Path': {}},
+            ])
+        with self.assertLogs(logging.getLogger('bsie.tools.builder'), logging.ERROR):
+            pipeline = PipelineBuilder(prefix, rbuild, ebuild_err).build()
+            self.assertDictEqual(pipeline._ext2rdr, {
+                bsie.extractor.generic.path.Path(): bsie.reader.path.Path()})
+
+        # fail to build extractor
+        ebuild_err = ExtractorBuilder([
+            {'bsie.extractor.generic.path.Path': {'foo': 123}},
+            {'bsie.extractor.generic.path.Path': {}},
+            ])
+        with self.assertLogs(logging.getLogger('bsie.tools.builder'), logging.ERROR):
+            pipeline = PipelineBuilder(prefix, rbuild, ebuild_err).build()
+            self.assertDictEqual(pipeline._ext2rdr, {
+                bsie.extractor.generic.path.Path(): bsie.reader.path.Path()})
+
+        # fail to load reader
+        with self.assertLogs(logging.getLogger('bsie.tools.builder'), logging.ERROR):
+            # switch reader of an extractor
+            old_reader = bsie.extractor.generic.path.Path.CONTENT_READER
+            bsie.extractor.generic.path.Path.CONTENT_READER = 'bsie.reader.foo.Foo'
+            # build pipeline with invalid reader reference
+            pipeline = PipelineBuilder(prefix, rbuild, ebuild).build()
+            self.assertDictEqual(pipeline._ext2rdr, {
+                bsie.extractor.generic.stat.Stat(): bsie.reader.stat.Stat(),
+                bsie.extractor.generic.constant.Constant(c_schema, c_tuples): None,
+                })
+            # switch back
+            bsie.extractor.generic.path.Path.CONTENT_READER = old_reader
+
+        # fail to build reader
+        rbuild_err = ReaderBuilder({'bsie.reader.stat.Stat': dict(foo=123)})
+        with self.assertLogs(logging.getLogger('bsie.tools.builder'), logging.ERROR):
+            pipeline = PipelineBuilder(prefix, rbuild_err, ebuild).build()
+            self.assertDictEqual(pipeline._ext2rdr, {
+                bsie.extractor.generic.path.Path(): bsie.reader.path.Path(),
+                bsie.extractor.generic.constant.Constant(c_schema, c_tuples): None,
+                })
+
+
+## main ##
+
+if __name__ == '__main__':
+    unittest.main()
+
+## EOF ##
diff --git a/test/tools/test_pipeline.py b/test/tools/test_pipeline.py
new file mode 100644
index 0000000..9888d2e
--- /dev/null
+++ b/test/tools/test_pipeline.py
@@ -0,0 +1,167 @@
+"""
+
+Part of the bsie test suite.
+A copy of the license is provided with the project.
+Author: Matthias Baumgartner, 2022
+"""
+# imports
+import logging
+import os
+import unittest
+
+# bsie imports
+from bsie.base import errors
+from bsie.utils import ns
+from bsie.utils.bsfs import URI
+from bsie.utils.node import Node
+import bsie.extractor.generic.constant
+import bsie.extractor.generic.path
+import bsie.extractor.generic.stat
+import bsie.reader.path
+import bsie.reader.stat
+
+# objects to test
+from bsie.tools.pipeline import Pipeline
+
+
+## code ##
+
+class TestPipeline(unittest.TestCase):
+    def setUp(self):
+        # constant A
+        csA = '''
+            bse:author rdfs:subClassOf bsfs:Predicate ;
+                rdfs:domain bsfs:Entity ;
+                rdfs:range xsd:string ;
+                owl:maxCardinality "1"^^xsd:number .
+            '''
+        tupA = [('http://bsfs.ai/schema/Entity#author', 'Me, myself, and I')]
+        # constant B
+        csB = '''
+            bse:rating rdfs:subClassOf bsfs:Predicate ;
+                rdfs:domain bsfs:Entity ;
+                rdfs:range xsd:integer ;
+                owl:maxCardinality "1"^^xsd:number .
+            '''
+        tupB = [('http://bsfs.ai/schema/Entity#rating', 123)]
+        # extractors/readers
+        self.ext2rdr = {
+            bsie.extractor.generic.path.Path(): bsie.reader.path.Path(),
+            bsie.extractor.generic.stat.Stat(): bsie.reader.stat.Stat(),
+            bsie.extractor.generic.constant.Constant(csA, tupA): None,
+            bsie.extractor.generic.constant.Constant(csB, tupB): None,
+        }
+        self.prefix = URI('http://example.com/local/file#')
+
+    def test_essentials(self):
+        pipeline = Pipeline(self.prefix, self.ext2rdr)
+        self.assertEqual(str(pipeline), 'Pipeline')
+        self.assertEqual(repr(pipeline), 'Pipeline(...)')
+
+    def test_equality(self):
+        pipeline = Pipeline(self.prefix, self.ext2rdr)
+        # a pipeline is equivalent to itself
+        self.assertEqual(pipeline, pipeline)
+        self.assertEqual(hash(pipeline), hash(pipeline))
+        # identical builds are equivalent
+        self.assertEqual(pipeline, Pipeline(self.prefix, self.ext2rdr))
+        self.assertEqual(hash(pipeline), hash(Pipeline(self.prefix, self.ext2rdr)))
+
+        # equivalence respects prefix
+        self.assertNotEqual(pipeline, Pipeline(URI('http://example.com/global/ent#'), self.ext2rdr))
+        self.assertNotEqual(hash(pipeline), hash(Pipeline(URI('http://example.com/global/ent#'), self.ext2rdr)))
+        # equivalence respects extractors/readers
+        ext2rdr = {ext: rdr for idx, (ext, rdr) in enumerate(self.ext2rdr.items()) if idx % 2 == 0}
+        self.assertNotEqual(pipeline, Pipeline(self.prefix, ext2rdr))
+        self.assertNotEqual(hash(pipeline), hash(Pipeline(self.prefix, ext2rdr)))
+
+        # equivalence respects schema
+        p2 = Pipeline(self.prefix, self.ext2rdr)
+        p2.schema = pipeline.schema.Empty()
+        self.assertNotEqual(pipeline, p2)
+        self.assertNotEqual(hash(pipeline), hash(p2))
+
+        # not equal to other types
+        class Foo(): pass
+        self.assertNotEqual(pipeline, Foo())
+        self.assertNotEqual(hash(pipeline), hash(Foo()))
+        self.assertNotEqual(pipeline, 123)
+        self.assertNotEqual(hash(pipeline), hash(123))
+        self.assertNotEqual(pipeline, None)
+        self.assertNotEqual(hash(pipeline), hash(None))
+
+
+    def test_call(self):
+        # build pipeline
+        pipeline = Pipeline(self.prefix, self.ext2rdr)
+        # build objects for tests
+        content_hash = 'e3bb4ab54e4a50d75626a1f76814f152f4edc60a82ad724aa2aa922ca5534427'
+        subject = Node(ns.bsfs.Entity, self.prefix + content_hash)
+        testfile = os.path.join(os.path.dirname(__file__), 'testfile.t')
+        p_filename = pipeline.schema.predicate(ns.bse.filename)
+        p_filesize = pipeline.schema.predicate(ns.bse.filesize)
+        p_author = pipeline.schema.predicate(ns.bse.author)
+        p_rating = pipeline.schema.predicate(ns.bse.rating)
+        entity = pipeline.schema.node(ns.bsfs.Entity)
+        p_invalid = pipeline.schema.predicate(ns.bsfs.Predicate).get_child(ns.bse.foo, range=entity)
+
+        # extract given predicates
+        self.assertSetEqual(set(pipeline(testfile, {p_filename, p_filesize})), {
+            (subject, p_filename, 'testfile.t'),
+            (subject, p_filesize, 11),
+            })
+        self.assertSetEqual(set(pipeline(testfile, {p_author})), {
+            (subject, p_author, 'Me, myself, and I'),
+            })
+        self.assertSetEqual(set(pipeline(testfile, {p_filename})), {
+            (subject, p_filename, 'testfile.t'),
+            })
+        self.assertSetEqual(set(pipeline(testfile, {p_filesize})), {
+            (subject, p_filesize, 11),
+            })
+        # extract all predicates
+        self.assertSetEqual(set(pipeline(testfile)), {
+            (subject, p_filename, 'testfile.t'),
+            (subject, p_filesize, 11),
+            (subject, p_author, 'Me, myself, and I'),
+            (subject, p_rating, 123),
+            })
+        # invalid predicate
+        self.assertSetEqual(set(pipeline(testfile, {p_invalid})), set())
+        # valid/invalid predicates mixed
+        self.assertSetEqual(set(pipeline(testfile, {p_filename, p_invalid})), {
+            (subject, p_filename, 'testfile.t'),
+            })
+        # invalid path
+        self.assertRaises(FileNotFoundError, list, pipeline('inexistent_file'))
+        # FIXME: unreadable file (e.g. permissions error)
+
+    def test_call_reader_err(self):
+        class FaultyReader(bsie.reader.path.Path):
+            def __call__(self, path):
+                raise errors.ReaderError('reader error')
+
+        pipeline = Pipeline(self.prefix, {bsie.extractor.generic.path.Path(): FaultyReader()})
+        with self.assertLogs(logging.getLogger('bsie.tools.pipeline'), logging.ERROR):
+            testfile = os.path.join(os.path.dirname(__file__), 'testfile.t')
+            p_filename = pipeline.schema.predicate(ns.bse.filename)
+            self.assertSetEqual(set(pipeline(testfile, {p_filename})), set())
+
+    def test_call_extractor_err(self):
+        class FaultyExtractor(bsie.extractor.generic.path.Path):
+            def extract(self, subject, content, predicates):
+                raise errors.ExtractorError('extractor error')
+
+        pipeline = Pipeline(self.prefix, {FaultyExtractor(): bsie.reader.path.Path()})
+        with self.assertLogs(logging.getLogger('bsie.tools.pipeline'), logging.ERROR):
+            testfile = os.path.join(os.path.dirname(__file__), 'testfile.t')
+            p_filename = pipeline.schema.predicate(ns.bse.filename)
+            self.assertSetEqual(set(pipeline(testfile, {p_filename})), set())
+
+
+## main ##
+
+if __name__ == '__main__':
+    unittest.main()
+
+## EOF ##
diff --git a/test/tools/testfile.t b/test/tools/testfile.t
new file mode 100644
index 0000000..58bf1b8
--- /dev/null
+++ b/test/tools/testfile.t
@@ -0,0 +1 @@
+hello worl
-- 
cgit v1.2.3


From edc747252a04675c46059215751719b6666a77f9 Mon Sep 17 00:00:00 2001
From: Matthias Baumgartner <dev@igsor.net>
Date: Sat, 3 Dec 2022 18:57:58 +0100
Subject: adapt to schema interface update: owl:maxCardinality changed to
 bsfs:unique

---
 bsie/base/extractor.py                  |  1 -
 bsie/extractor/generic/path.py          |  2 +-
 bsie/extractor/generic/stat.py          |  2 +-
 test/base/test_extractor.py             |  4 ++--
 test/extractor/generic/test_constant.py | 12 ++++++------
 test/extractor/generic/test_path.py     |  2 +-
 test/extractor/generic/test_stat.py     |  2 +-
 test/tools/test_builder.py              | 10 +++++-----
 test/tools/test_pipeline.py             |  4 ++--
 9 files changed, 19 insertions(+), 20 deletions(-)

diff --git a/bsie/base/extractor.py b/bsie/base/extractor.py
index 2fc4f18..75b7173 100644
--- a/bsie/base/extractor.py
+++ b/bsie/base/extractor.py
@@ -23,7 +23,6 @@ __all__: typing.Sequence[str] = (
 # NOTE: The definition here is only for convenience; Each Extractor must implement its use, if so desired.
 SCHEMA_PREAMBLE = '''
     # common external prefixes
-    prefix owl: <http://www.w3.org/2002/07/owl#>
     prefix rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
     prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#>
     prefix xsd: <http://www.w3.org/2001/XMLSchema#>
diff --git a/bsie/extractor/generic/path.py b/bsie/extractor/generic/path.py
index f346f97..e6b901e 100644
--- a/bsie/extractor/generic/path.py
+++ b/bsie/extractor/generic/path.py
@@ -36,7 +36,7 @@ class Path(extractor.Extractor):
                 rdfs:range xsd:string ;
                 rdfs:label "File name"^^xsd:string ;
                 schema:description "Filename of entity in some filesystem."^^xsd:string ;
-                owl:maxCardinality "INF"^^xsd:number .
+                bsfs:unique "false"^^xsd:boolean .
             '''))
         self._callmap = {
             self.schema.predicate(ns.bse.filename): self.__filename,
diff --git a/bsie/extractor/generic/stat.py b/bsie/extractor/generic/stat.py
index 7088c0a..6493d37 100644
--- a/bsie/extractor/generic/stat.py
+++ b/bsie/extractor/generic/stat.py
@@ -36,7 +36,7 @@ class Stat(extractor.Extractor):
                 rdfs:range xsd:integer ;
                 rdfs:label "File size"^^xsd:string ;
                 schema:description "File size of entity in some filesystem."^^xsd:string ;
-                owl:maxCardinality "INF"^^xsd:number .
+                bsfs:unique "false"^^xsd:boolean .
             '''))
         self._callmap = {
             self.schema.predicate(ns.bse.filesize): self.__filesize,
diff --git a/test/base/test_extractor.py b/test/base/test_extractor.py
index 7a00079..be876ad 100644
--- a/test/base/test_extractor.py
+++ b/test/base/test_extractor.py
@@ -23,11 +23,11 @@ class StubExtractor(extractor.Extractor):
             bse:author rdfs:subClassOf bsfs:Predicate ;
                 rdfs:domain bsfs:Entity ;
                 rdfs:range xsd:string ;
-                owl:maxCardinality "INF"^^xsd:number .
+                bsfs:unique "false"^^xsd:boolean .
             bse:comment rdfs:subClassOf bsfs:Predicate ;
                 rdfs:domain bsfs:Entity ;
                 rdfs:range xsd:string ;
-                owl:maxCardinality "INF"^^xsd:number .
+                bsfs:unique "false"^^xsd:boolean .
         '''))
 
     def extract(self, subject, content, predicates):
diff --git a/test/extractor/generic/test_constant.py b/test/extractor/generic/test_constant.py
index aa33fb4..7f72ccf 100644
--- a/test/extractor/generic/test_constant.py
+++ b/test/extractor/generic/test_constant.py
@@ -23,11 +23,11 @@ class TestConstant(unittest.TestCase):
             bse:author rdfs:subClassOf bsfs:Predicate ;
                 rdfs:domain bsfs:Entity ;
                 rdfs:range xsd:string ;
-                owl:maxCardinality "1"^^xsd:number .
+                bsfs:unique "true"^^xsd:boolean .
             bse:comment rdfs:subClassOf bsfs:Predicate ;
                 rdfs:domain bsfs:Entity ;
                 rdfs:range xsd:string ;
-                owl:maxCardinality "INF"^^xsd:number .
+                bsfs:unique "false"^^xsd:boolean .
             '''
         tuples = [
             (ns.bse.author, 'Me, myself, and I'),
@@ -58,11 +58,11 @@ class TestConstant(unittest.TestCase):
             bse:author rdfs:subClassOf bsfs:Predicate ;
                 rdfs:domain bsfs:Entity ;
                 rdfs:range xsd:string ;
-                owl:maxCardinality "1"^^xsd:number .
+                bsfs:unique "true"^^xsd:boolean .
             bse:comment rdfs:subClassOf bsfs:Predicate ;
                 rdfs:domain bsfs:Entity ;
                 rdfs:range xsd:string ;
-                owl:maxCardinality "INF"^^xsd:number .
+                bsfs:unique "false"^^xsd:boolean .
             '''
         # can create a schema
         self.assertIsInstance(Constant(schema, [
@@ -83,13 +83,13 @@ class TestConstant(unittest.TestCase):
             bse:author rdfs:subClassOf bsfs:Predicate ;
                 rdfs:domain bsfs:Entity ;
                 rdfs:range xsd:string ;
-                owl:maxCardinality "1"^^xsd:number .
+                bsfs:unique "true"^^xsd:boolean .
             '''
         schema_b = '''
             bse:comment rdfs:subClassOf bsfs:Predicate ;
                 rdfs:domain bsfs:Entity ;
                 rdfs:range xsd:string ;
-                owl:maxCardinality "INF"^^xsd:number .
+                bsfs:unique "false"^^xsd:boolean .
             '''
         tuples_a = [(ns.bse.author, 'Me, myself, and I')]
         tuples_b = [(ns.bse.comment, 'the quick brown fox jumps over the lazy dog.') ]
diff --git a/test/extractor/generic/test_path.py b/test/extractor/generic/test_path.py
index 9376c7c..aa21b04 100644
--- a/test/extractor/generic/test_path.py
+++ b/test/extractor/generic/test_path.py
@@ -35,7 +35,7 @@ class TestPath(unittest.TestCase):
                 bse:filename rdfs:subClassOf bsfs:Predicate ;
                     rdfs:domain bsfs:Entity ;
                     rdfs:range xsd:string ;
-                    owl:maxCardinality "INF"^^xsd:number .
+                    bsfs:unique "false"^^xsd:boolean .
                 '''))
 
     def test_extract(self):
diff --git a/test/extractor/generic/test_stat.py b/test/extractor/generic/test_stat.py
index 26dad6a..bed5fab 100644
--- a/test/extractor/generic/test_stat.py
+++ b/test/extractor/generic/test_stat.py
@@ -36,7 +36,7 @@ class TestStat(unittest.TestCase):
                 bse:filesize rdfs:subClassOf bsfs:Predicate ;
                     rdfs:domain bsfs:Entity ;
                     rdfs:range xsd:integer ;
-                    owl:maxCardinality "INF"^^xsd:number .
+                    bsfs:unique "false"^^xsd:boolean .
                 '''))
 
     def test_extract(self):
diff --git a/test/tools/test_builder.py b/test/tools/test_builder.py
index bef0e9d..bc6f903 100644
--- a/test/tools/test_builder.py
+++ b/test/tools/test_builder.py
@@ -115,11 +115,11 @@ class TestExtractorBuilder(unittest.TestCase):
                     bse:author rdfs:subClassOf bsfs:Predicate ;
                         rdfs:domain bsfs:Entity ;
                         rdfs:range xsd:string ;
-                        owl:maxCardinality "1"^^xsd:number .
+                        bsfs:unique "true"^^xsd:boolean .
                     bse:rating rdfs:subClassOf bsfs:Predicate ;
                         rdfs:domain bsfs:Entity ;
                         rdfs:range xsd:integer ;
-                        owl:maxCardinality "1"^^xsd:number .
+                        bsfs:unique "true"^^xsd:boolean .
                     ''',
                 'tuples': [
                     ('http://bsfs.ai/schema/Entity#author', 'Me, myself, and I'),
@@ -132,11 +132,11 @@ class TestExtractorBuilder(unittest.TestCase):
                     bse:author rdfs:subClassOf bsfs:Predicate ;
                         rdfs:domain bsfs:Entity ;
                         rdfs:range xsd:string ;
-                        owl:maxCardinality "1"^^xsd:number .
+                        bsfs:unique "true"^^xsd:boolean .
                     bse:rating rdfs:subClassOf bsfs:Predicate ;
                         rdfs:domain bsfs:Entity ;
                         rdfs:range xsd:integer ;
-                        owl:maxCardinality "1"^^xsd:number .
+                        bsfs:unique "true"^^xsd:boolean .
                     ''', [
             ('http://bsfs.ai/schema/Entity#author', 'Me, myself, and I'),
             ('http://bsfs.ai/schema/Entity#rating', 123),
@@ -166,7 +166,7 @@ class TestPipelineBuilder(unittest.TestCase):
             bse:author rdfs:subClassOf bsfs:Predicate ;
                 rdfs:domain bsfs:Entity ;
                 rdfs:range xsd:string ;
-                owl:maxCardinality "1"^^xsd:number .
+                bsfs:unique "true"^^xsd:boolean .
             '''
         c_tuples = [('http://bsfs.ai/schema/Entity#author', 'Me, myself, and I')]
         # prepare builders
diff --git a/test/tools/test_pipeline.py b/test/tools/test_pipeline.py
index 9888d2e..f98b329 100644
--- a/test/tools/test_pipeline.py
+++ b/test/tools/test_pipeline.py
@@ -33,7 +33,7 @@ class TestPipeline(unittest.TestCase):
             bse:author rdfs:subClassOf bsfs:Predicate ;
                 rdfs:domain bsfs:Entity ;
                 rdfs:range xsd:string ;
-                owl:maxCardinality "1"^^xsd:number .
+                bsfs:unique "true"^^xsd:boolean .
             '''
         tupA = [('http://bsfs.ai/schema/Entity#author', 'Me, myself, and I')]
         # constant B
@@ -41,7 +41,7 @@ class TestPipeline(unittest.TestCase):
             bse:rating rdfs:subClassOf bsfs:Predicate ;
                 rdfs:domain bsfs:Entity ;
                 rdfs:range xsd:integer ;
-                owl:maxCardinality "1"^^xsd:number .
+                bsfs:unique "true"^^xsd:boolean .
             '''
         tupB = [('http://bsfs.ai/schema/Entity#rating', 123)]
         # extractors/readers
-- 
cgit v1.2.3


From 559e643bb1fa39feefd2eb73847ad9420daf1deb Mon Sep 17 00:00:00 2001
From: Matthias Baumgartner <dev@igsor.net>
Date: Wed, 14 Dec 2022 06:10:25 +0100
Subject: bsie extraction and info apps

---
 bsie.app                                   |  49 ++++++
 bsie/__init__.py                           |   6 +
 bsie/apps/__init__.py                      |  20 +++
 bsie/apps/index.py                         | 131 ++++++++++++++++
 bsie/apps/info.py                          |  74 +++++++++
 bsie/base/errors.py                        |   6 +
 bsie/lib/__init__.py                       |  13 ++
 bsie/lib/bsie.py                           |  80 ++++++++++
 bsie/tools/pipeline.py                     |   4 +
 bsie/utils/namespaces.py                   |   2 +-
 test/apps/__init__.py                      |   0
 test/apps/test_index.py                    | 159 ++++++++++++++++++++
 test/apps/test_info.py                     |  42 ++++++
 test/apps/testdir/alpha/alpha_first        |  16 ++
 test/apps/testdir/alpha/alpha_second       |  12 ++
 test/apps/testdir/alpha/omega/omega_first  |  14 ++
 test/apps/testdir/alpha/omega/omega_second |  10 ++
 test/apps/testdir/foo/bar/bar_first        |  20 +++
 test/apps/testdir/foo/bar/bar_second       |  14 ++
 test/apps/testdir/foo/foo_first            |  11 ++
 test/apps/testdir/foo/foo_second           |  12 ++
 test/apps/testdir/td_first                 |  18 +++
 test/apps/testdir/td_second                |  14 ++
 test/apps/testfile                         |  16 ++
 test/lib/__init__.py                       |   0
 test/lib/test_bsie.py                      | 231 +++++++++++++++++++++++++++++
 test/lib/testfile.t                        |   1 +
 test/tools/test_pipeline.py                |  20 ++-
 test/tools/testfile.t                      |   2 +-
 29 files changed, 991 insertions(+), 6 deletions(-)
 create mode 100755 bsie.app
 create mode 100644 bsie/apps/__init__.py
 create mode 100644 bsie/apps/index.py
 create mode 100644 bsie/apps/info.py
 create mode 100644 bsie/lib/__init__.py
 create mode 100644 bsie/lib/bsie.py
 create mode 100644 test/apps/__init__.py
 create mode 100644 test/apps/test_index.py
 create mode 100644 test/apps/test_info.py
 create mode 100644 test/apps/testdir/alpha/alpha_first
 create mode 100644 test/apps/testdir/alpha/alpha_second
 create mode 100644 test/apps/testdir/alpha/omega/omega_first
 create mode 100644 test/apps/testdir/alpha/omega/omega_second
 create mode 100644 test/apps/testdir/foo/bar/bar_first
 create mode 100644 test/apps/testdir/foo/bar/bar_second
 create mode 100644 test/apps/testdir/foo/foo_first
 create mode 100644 test/apps/testdir/foo/foo_second
 create mode 100644 test/apps/testdir/td_first
 create mode 100644 test/apps/testdir/td_second
 create mode 100644 test/apps/testfile
 create mode 100644 test/lib/__init__.py
 create mode 100644 test/lib/test_bsie.py
 create mode 100644 test/lib/testfile.t

diff --git a/bsie.app b/bsie.app
new file mode 100755
index 0000000..ba9cee7
--- /dev/null
+++ b/bsie.app
@@ -0,0 +1,49 @@
+"""BSIE tools.
+
+Part of the bsie module.
+A copy of the license is provided with the project.
+Author: Matthias Baumgartner, 2022
+"""
+# imports
+import argparse
+import typing
+
+# module imports
+import bsie
+import bsie.apps
+
+# exports
+__all__: typing.Sequence[str] = (
+    'main',
+    )
+
+# config
+apps = {
+    'index'     : bsie.apps.index,
+    'info'      : bsie.apps.info,
+    }
+
+
+## code ##
+
+def main(argv):
+    """Black Star File System maintenance tools."""
+    parser = argparse.ArgumentParser(description=main.__doc__, prog='bsie')
+    parser.add_argument('--version', action='version',
+        version='%(prog)s version {}.{}.{}'.format(*bsie.version_info))
+    parser.add_argument('app', choices=apps.keys(),
+        help='Select the application to run.')
+    parser.add_argument('rest', nargs=argparse.REMAINDER)
+    # parse
+    args = parser.parse_args()
+    # run application
+    apps[args.app](args.rest)
+
+   
+## main ##
+
+if __name__ == '__main__':
+    import sys
+    main(sys.argv[1:])
+
+## EOF ##
diff --git a/bsie/__init__.py b/bsie/__init__.py
index 2f2477a..2b874bd 100644
--- a/bsie/__init__.py
+++ b/bsie/__init__.py
@@ -5,8 +5,14 @@ A copy of the license is provided with the project.
 Author: Matthias Baumgartner, 2022
 """
 # imports
+import collections
 import typing
 
+# constants
+version_info = collections.namedtuple('version_info',
+    ('major', 'minor', 'micro')) \
+    (0, 0, 1)
+
 # exports
 __all__: typing.Sequence[str] = []
 
diff --git a/bsie/apps/__init__.py b/bsie/apps/__init__.py
new file mode 100644
index 0000000..a548c3c
--- /dev/null
+++ b/bsie/apps/__init__.py
@@ -0,0 +1,20 @@
+"""
+
+Part of the bsie module.
+A copy of the license is provided with the project.
+Author: Matthias Baumgartner, 2022
+"""
+# imports
+import typing
+
+# inner-module imports
+from .index import main as index
+from .info import main as info
+
+# exports
+__all__: typing.Sequence[str] = (
+    'index',
+    'info',
+    )
+
+## EOF ##
diff --git a/bsie/apps/index.py b/bsie/apps/index.py
new file mode 100644
index 0000000..821aa4c
--- /dev/null
+++ b/bsie/apps/index.py
@@ -0,0 +1,131 @@
+"""
+
+Part of the bsie module.
+A copy of the license is provided with the project.
+Author: Matthias Baumgartner, 2022
+"""
+# imports
+import argparse
+import os
+import typing
+
+# bsfs imports
+import bsfs
+
+# bsie imports
+from bsie.base import errors
+from bsie.lib.bsie import BSIE
+from bsie.tools import builder
+from bsie.utils.bsfs import URI
+
+# exports
+__all__: typing.Sequence[str] = (
+    'main',
+    )
+
+
+## code ##
+
+def main(argv):
+    """Index files or directories into BSFS."""
+    parser = argparse.ArgumentParser(description=main.__doc__, prog='index')
+    parser.add_argument('--user', type=URI, default=URI('http://example.com/me'),
+        help='')
+    parser.add_argument('--collect', action='append', default=[],
+        help='')
+    parser.add_argument('--discard', action='append', default=[],
+        help='')
+    parser.add_argument('-r', '--recursive', action='store_true', default=False,
+        help='')
+    parser.add_argument('--follow', action='store_true', default=False,
+        help='')
+    parser.add_argument('--print', action='store_true', default=False,
+        help='')
+    parser.add_argument('input_file', nargs=argparse.REMAINDER,
+        help='')
+    args = parser.parse_args(argv)
+
+    # FIXME: Read reader/extractor configs from a config file
+    # reader builder
+    rbuild = builder.ReaderBuilder({})
+    # extractor builder
+    ebuild = builder.ExtractorBuilder([
+        {'bsie.extractor.generic.path.Path': {}},
+        {'bsie.extractor.generic.stat.Stat': {}},
+        {'bsie.extractor.generic.constant.Constant': dict(
+            tuples=[('http://bsfs.ai/schema/Entity#author', 'Me, myself, and I')],
+            schema='''
+                bse:author rdfs:subClassOf bsfs:Predicate ;
+                    rdfs:domain bsfs:Entity ;
+                    rdfs:range xsd:string ;
+                    bsfs:unique "true"^^xsd:boolean .
+                ''',
+            )},
+        ])
+    # pipeline builder
+    prefix = URI(args.user + ('file#' if args.user.endswith('/') else '/file#'))
+    pbuild = builder.PipelineBuilder(
+        prefix,
+        rbuild,
+        ebuild,
+        )
+
+    # build pipeline
+    pipeline = pbuild.build()
+    # build BSIE frontend
+    bsie = BSIE(pipeline, args.collect, args.discard)
+
+
+    def walk(handle):
+        """Walk through given input files."""
+        # FIXME: collect all triples by node, set all predicates at once
+        # FIXME: simplify code (below but maybe also above)
+        # FIXME: How to handle dependencies between data?
+        #        E.g. do I still want to link to a tag despite not being permitted to set its label?
+        # FIXME: node renaming?
+
+        # index input paths
+        for path in args.input_file:
+            if os.path.isdir(path) and args.recursive:
+                for dirpath, _, filenames in os.walk(path, topdown=True, followlinks=args.follow):
+                    for filename in filenames:
+                        for node, pred, value in bsie.from_file(os.path.join(dirpath, filename)):
+                            handle(node, pred, value)
+            elif os.path.isfile(path):
+                for node, pred, value in bsie.from_file(path):
+                    handle(node, pred, value)
+            else:
+                raise errors.UnreachableError()
+
+
+    if args.print:
+        walk(print)
+        return None
+
+    else:
+        # initialize bsfs
+        # NOTE: With presistent storages, the schema migration will be a seaparte operation.
+        # Here, we'd simply examine the schema and potentially discard more predicates.
+        store = bsfs.Open({
+            'Graph': {
+                'user': args.user,
+                'backend': {
+                    'SparqlStore': {}},
+                }})
+        store.migrate(bsie.schema)
+        # process files
+        def handle(node, pred, value):
+            store.node(node.node_type, node.uri).set(pred.uri, value)
+        walk(handle)
+        # return store
+        return store
+
+
+
+## main ##
+
+if __name__ == '__main__':
+    import sys
+    main(sys.argv[1:])
+
+## EOF ##
diff --git a/bsie/apps/info.py b/bsie/apps/info.py
new file mode 100644
index 0000000..8cc6dca
--- /dev/null
+++ b/bsie/apps/info.py
@@ -0,0 +1,74 @@
+"""
+
+Part of the bsie module.
+A copy of the license is provided with the project.
+Author: Matthias Baumgartner, 2022
+"""
+# imports
+import argparse
+import sys
+import typing
+
+# bsie imports
+from bsie.base import errors
+from bsie.tools import builder
+from bsie.utils.bsfs import URI
+
+# exports
+__all__: typing.Sequence[str] = (
+    'main',
+    )
+
+
+## code ##
+
+def main(argv):
+    """Show information from BSIE."""
+    parser = argparse.ArgumentParser(description=main.__doc__, prog='info')
+    parser.add_argument('what', choices=('predicates', ),
+        help='Select what information to show.')
+    args = parser.parse_args(argv)
+
+    # FIXME: Read reader/extractor configs from a config file
+    # reader builder
+    rbuild = builder.ReaderBuilder({})
+    # extractor builder
+    ebuild = builder.ExtractorBuilder([
+        {'bsie.extractor.generic.path.Path': {}},
+        {'bsie.extractor.generic.stat.Stat': {}},
+        {'bsie.extractor.generic.constant.Constant': dict(
+            tuples=[('http://bsfs.ai/schema/Entity#author', 'Me, myself, and I')],
+            schema='''
+                bse:author rdfs:subClassOf bsfs:Predicate ;
+                    rdfs:domain bsfs:Entity ;
+                    rdfs:range xsd:string ;
+                    bsfs:unique "true"^^xsd:boolean .
+                ''',
+            )},
+        ])
+    # pipeline builder
+    pbuild = builder.PipelineBuilder(
+        URI('http://example.com/me/file#'), # not actually used
+        rbuild,
+        ebuild,
+        )
+
+    # build pipeline
+    pipeline = pbuild.build()
+
+    # show info
+    if args.what == 'predicates':
+        # show predicates
+        for pred in pipeline.schema.predicates():
+            print(pred.uri)
+    else:
+        # args.what is already checked by argparse
+        raise errors.UnreachableError()
+
+
+## main ##
+
+if __name__ == '__main__':
+    main(sys.argv[1:])
+
+## EOF ##
diff --git a/bsie/base/errors.py b/bsie/base/errors.py
index 760351f..dc3c30e 100644
--- a/bsie/base/errors.py
+++ b/bsie/base/errors.py
@@ -33,4 +33,10 @@ class ExtractorError(_BSIEError):
 class ReaderError(_BSIEError):
     """The Reader failed to read the given file."""
 
+class ProgrammingError(_BSIEError):
+    """An assertion-like error that indicates a code-base issue."""
+
+class UnreachableError(ProgrammingError):
+    """Bravo, you've reached a point in code that should logically not be reachable."""
+
 ## EOF ##
diff --git a/bsie/lib/__init__.py b/bsie/lib/__init__.py
new file mode 100644
index 0000000..f6c9018
--- /dev/null
+++ b/bsie/lib/__init__.py
@@ -0,0 +1,13 @@
+"""
+
+Part of the bsie module.
+A copy of the license is provided with the project.
+Author: Matthias Baumgartner, 2022
+"""
+# imports
+import typing
+
+# exports
+__all__: typing.Sequence[str] = []
+
+## EOF ##
diff --git a/bsie/lib/bsie.py b/bsie/lib/bsie.py
new file mode 100644
index 0000000..aeccc8c
--- /dev/null
+++ b/bsie/lib/bsie.py
@@ -0,0 +1,80 @@
+"""
+
+Part of the bsie module.
+A copy of the license is provided with the project.
+Author: Matthias Baumgartner, 2022
+"""
+# imports
+import typing
+
+# bsie imports
+from bsie.tools.pipeline import Pipeline
+from bsie.utils import node, ns
+from bsie.utils.bsfs import URI, schema as schema_
+
+# exports
+__all__: typing.Sequence[str] = (
+    'BSIE',
+    )
+
+
+## code ##
+
+class BSIE():
+    """Extract triples from files.
+
+    Controls which predicates to extract (*collect*) and
+    which to not extract (*discard*). Note that this only affects
+    principal predicates not auxililary predicates like, e.g., tag labels.
+
+    """
+
+    # predicates to extract.
+    predicates: typing.Set[URI]
+
+    # local schema.
+    schema: schema_.Schema
+
+    def __init__(
+            self,
+            # pipeline builder.
+            pipeline: Pipeline,
+            # predicates to extract at most. None implies all available w.r.t. extractors.
+            collect: typing.Optional[typing.Iterable[URI]] = None,
+            # predicates to discard.
+            discard: typing.Optional[typing.Iterable[URI]] = None,
+            ):
+        # store pipeline
+        self.pipeline = pipeline
+        # start off with available predicates
+        self.predicates = {pred.uri for pred in self.pipeline.predicates()}
+        # limit predicates to specified ones by argument.
+        if collect is not None:
+            collect = set(collect)
+            if len(collect) > 0:
+                self.predicates &= collect
+        # discard predicates.
+        if discard is not None:
+            self.predicates -= set(discard)
+        # discard ns.bsfs.Predicate
+        self.predicates.discard(ns.bsfs.Predicate)
+        # compile a schema that only contains the requested predicates (and implied types)
+        self.schema = schema_.Schema({
+            self.pipeline.schema.predicate(pred) for pred in self.predicates})
+
+    def from_file(
+            self,
+            path: URI,
+            predicates: typing.Optional[typing.Iterable[URI]] = None,
+            ) -> typing.Iterator[typing.Tuple[node.Node, URI, typing.Any]]:
+        """Produce triples for a given *path*. Limit to *predicates* if given."""
+        # get requested predicates.
+        predicates = set(predicates) if predicates is not None else self.predicates
+        # filter through requested predicates.
+        predicates &= self.predicates
+        # predicate lookup
+        predicates = {self.schema.predicate(pred) for pred in predicates}
+        # invoke pipeline
+        yield from self.pipeline(path, predicates)
+
+## EOF ##
diff --git a/bsie/tools/pipeline.py b/bsie/tools/pipeline.py
index 8e1c992..da422c0 100644
--- a/bsie/tools/pipeline.py
+++ b/bsie/tools/pipeline.py
@@ -70,6 +70,10 @@ class Pipeline():
            and self._prefix == other._prefix \
            and self._ext2rdr == other._ext2rdr
 
+    def predicates(self) -> typing.Iterator[_schema.Predicate]:
+        """Return the predicates that are extracted from a file."""
+        return iter({pred for ext in self._ext2rdr for pred in ext.predicates()})
+
     def __call__(
             self,
             path: URI,
diff --git a/bsie/utils/namespaces.py b/bsie/utils/namespaces.py
index 13be96b..2fcb2dc 100644
--- a/bsie/utils/namespaces.py
+++ b/bsie/utils/namespaces.py
@@ -13,7 +13,7 @@ from . import bsfs as _bsfs
 # constants
 bse = _bsfs.Namespace('http://bsfs.ai/schema/Entity#')
 bsfs = _bsfs.Namespace('http://bsfs.ai/schema/')
-bsm = _bsfs.Namespace('http://bsfs.ai/schema/meta#')
+bsm = _bsfs.Namespace('http://bsfs.ai/schema/Meta#')
 xsd = _bsfs.Namespace('http://www.w3.org/2001/XMLSchema#')
 
 # export
diff --git a/test/apps/__init__.py b/test/apps/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/test/apps/test_index.py b/test/apps/test_index.py
new file mode 100644
index 0000000..6d47df8
--- /dev/null
+++ b/test/apps/test_index.py
@@ -0,0 +1,159 @@
+"""
+
+Part of the bsie test suite.
+A copy of the license is provided with the project.
+Author: Matthias Baumgartner, 2022
+"""
+# imports
+import io
+import os
+import rdflib
+import sys
+import unittest
+
+# bsie imports
+from bsie.utils import ns
+
+# objects to test
+from bsie.apps.index import main
+
+
+## code ##
+
+class TestIndex(unittest.TestCase):
+    def test_main(self):
+        bsfs = main([
+            '-r',
+            '--user', 'http://example.com/me',
+            os.path.join(os.path.dirname(__file__), 'testdir'),
+            os.path.join(os.path.dirname(__file__), 'testfile'),
+            ])
+
+        prefix = 'http://example.com/me/file#'
+        self.assertTrue(set(bsfs._Graph__backend.graph).issuperset({
+            (rdflib.URIRef(prefix + '2f4109b40107cc50e0884755a1a961ed126887e49b8dbaf0e146b2e226aa6647'), rdflib.RDF.type, rdflib.URIRef(ns.bsfs.Entity)),
+            (rdflib.URIRef(prefix + '2f4109b40107cc50e0884755a1a961ed126887e49b8dbaf0e146b2e226aa6647'), rdflib.URIRef(ns.bse.author), rdflib.Literal('Me, myself, and I', datatype=rdflib.XSD.string)),
+            (rdflib.URIRef(prefix + '2f4109b40107cc50e0884755a1a961ed126887e49b8dbaf0e146b2e226aa6647'), rdflib.URIRef(ns.bse.filename), rdflib.Literal('alpha_second', datatype=rdflib.XSD.string)),
+            (rdflib.URIRef(prefix + '2f4109b40107cc50e0884755a1a961ed126887e49b8dbaf0e146b2e226aa6647'), rdflib.URIRef(ns.bse.filesize), rdflib.Literal('696', datatype=rdflib.XSD.integer)),
+            (rdflib.URIRef(prefix + '441f3d10c8ff489fe8e33e639606512f6c463151cc429de7e554b9af670c2ece'), rdflib.RDF.type, rdflib.URIRef(ns.bsfs.Entity)),
+            (rdflib.URIRef(prefix + '441f3d10c8ff489fe8e33e639606512f6c463151cc429de7e554b9af670c2ece'), rdflib.URIRef(ns.bse.author), rdflib.Literal('Me, myself, and I', datatype=rdflib.XSD.string)),
+            (rdflib.URIRef(prefix + '441f3d10c8ff489fe8e33e639606512f6c463151cc429de7e554b9af670c2ece'), rdflib.URIRef(ns.bse.filename), rdflib.Literal('omega_second', datatype=rdflib.XSD.string)),
+            (rdflib.URIRef(prefix + '441f3d10c8ff489fe8e33e639606512f6c463151cc429de7e554b9af670c2ece'), rdflib.URIRef(ns.bse.filesize), rdflib.Literal('503', datatype=rdflib.XSD.integer)),
+            (rdflib.URIRef(prefix + '69b98ecf7aff3e95b09688ba93331678eb8397817111f674c9558e6dd8f5e871'), rdflib.RDF.type, rdflib.URIRef(ns.bsfs.Entity)),
+            (rdflib.URIRef(prefix + '69b98ecf7aff3e95b09688ba93331678eb8397817111f674c9558e6dd8f5e871'), rdflib.URIRef(ns.bse.author), rdflib.Literal('Me, myself, and I', datatype=rdflib.XSD.string)),
+            (rdflib.URIRef(prefix + '69b98ecf7aff3e95b09688ba93331678eb8397817111f674c9558e6dd8f5e871'), rdflib.URIRef(ns.bse.filename), rdflib.Literal('td_first', datatype=rdflib.XSD.string)),
+            (rdflib.URIRef(prefix + '69b98ecf7aff3e95b09688ba93331678eb8397817111f674c9558e6dd8f5e871'), rdflib.URIRef(ns.bse.filesize), rdflib.Literal('911', datatype=rdflib.XSD.integer)),
+            (rdflib.URIRef(prefix + '78f7eb7f0d8221cdb2cb26c978fa42a11f75eb87becc768f4474134cb1e06926'), rdflib.RDF.type, rdflib.URIRef(ns.bsfs.Entity)),
+            (rdflib.URIRef(prefix + '78f7eb7f0d8221cdb2cb26c978fa42a11f75eb87becc768f4474134cb1e06926'), rdflib.URIRef(ns.bse.author), rdflib.Literal('Me, myself, and I', datatype=rdflib.XSD.string)),
+            (rdflib.URIRef(prefix + '78f7eb7f0d8221cdb2cb26c978fa42a11f75eb87becc768f4474134cb1e06926'), rdflib.URIRef(ns.bse.filename), rdflib.Literal('testfile', datatype=rdflib.XSD.string)),
+            (rdflib.URIRef(prefix + '78f7eb7f0d8221cdb2cb26c978fa42a11f75eb87becc768f4474134cb1e06926'), rdflib.URIRef(ns.bse.filesize), rdflib.Literal('885', datatype=rdflib.XSD.integer)),
+            (rdflib.URIRef(prefix + '80818b8ec2ee1919116dba9c8a7e0a4608313cf3b463cd88e9ed77a700dd92d3'), rdflib.RDF.type, rdflib.URIRef(ns.bsfs.Entity)),
+            (rdflib.URIRef(prefix + '80818b8ec2ee1919116dba9c8a7e0a4608313cf3b463cd88e9ed77a700dd92d3'), rdflib.URIRef(ns.bse.author), rdflib.Literal('Me, myself, and I', datatype=rdflib.XSD.string)),
+            (rdflib.URIRef(prefix + '80818b8ec2ee1919116dba9c8a7e0a4608313cf3b463cd88e9ed77a700dd92d3'), rdflib.URIRef(ns.bse.filename), rdflib.Literal('bar_first', datatype=rdflib.XSD.string)),
+            (rdflib.URIRef(prefix + '80818b8ec2ee1919116dba9c8a7e0a4608313cf3b463cd88e9ed77a700dd92d3'), rdflib.URIRef(ns.bse.filesize), rdflib.Literal('956', datatype=rdflib.XSD.integer)),
+            (rdflib.URIRef(prefix + '976d2ea0e58488678cc7e435fbfadabfb6eb6cf50ad51862f38f73729ed11795'), rdflib.RDF.type, rdflib.URIRef(ns.bsfs.Entity)),
+            (rdflib.URIRef(prefix + '976d2ea0e58488678cc7e435fbfadabfb6eb6cf50ad51862f38f73729ed11795'), rdflib.URIRef(ns.bse.author), rdflib.Literal('Me, myself, and I', datatype=rdflib.XSD.string)),
+            (rdflib.URIRef(prefix + '976d2ea0e58488678cc7e435fbfadabfb6eb6cf50ad51862f38f73729ed11795'), rdflib.URIRef(ns.bse.filename), rdflib.Literal('omega_first', datatype=rdflib.XSD.string)),
+            (rdflib.URIRef(prefix + '976d2ea0e58488678cc7e435fbfadabfb6eb6cf50ad51862f38f73729ed11795'), rdflib.URIRef(ns.bse.filesize), rdflib.Literal('648', datatype=rdflib.XSD.integer)),
+            (rdflib.URIRef(prefix + '997e2fbb7494a3818ec782d2bc87bf1cffafba6b9c0f658e4a6c18a723e944d3'), rdflib.RDF.type, rdflib.URIRef(ns.bsfs.Entity)),
+            (rdflib.URIRef(prefix + '997e2fbb7494a3818ec782d2bc87bf1cffafba6b9c0f658e4a6c18a723e944d3'), rdflib.URIRef(ns.bse.author), rdflib.Literal('Me, myself, and I', datatype=rdflib.XSD.string)),
+            (rdflib.URIRef(prefix + '997e2fbb7494a3818ec782d2bc87bf1cffafba6b9c0f658e4a6c18a723e944d3'), rdflib.URIRef(ns.bse.filename), rdflib.Literal('alpha_first', datatype=rdflib.XSD.string)),
+            (rdflib.URIRef(prefix + '997e2fbb7494a3818ec782d2bc87bf1cffafba6b9c0f658e4a6c18a723e944d3'), rdflib.URIRef(ns.bse.filesize), rdflib.Literal('754', datatype=rdflib.XSD.integer)),
+            (rdflib.URIRef(prefix + 'a8af899ecdab60dfaea8ec7f934053624c80a1054539e163f2c7eaa986c2777d'), rdflib.RDF.type, rdflib.URIRef(ns.bsfs.Entity)),
+            (rdflib.URIRef(prefix + 'a8af899ecdab60dfaea8ec7f934053624c80a1054539e163f2c7eaa986c2777d'), rdflib.URIRef(ns.bse.author), rdflib.Literal('Me, myself, and I', datatype=rdflib.XSD.string)),
+            (rdflib.URIRef(prefix + 'a8af899ecdab60dfaea8ec7f934053624c80a1054539e163f2c7eaa986c2777d'), rdflib.URIRef(ns.bse.filename), rdflib.Literal('foo_second', datatype=rdflib.XSD.string)),
+            (rdflib.URIRef(prefix + 'a8af899ecdab60dfaea8ec7f934053624c80a1054539e163f2c7eaa986c2777d'), rdflib.URIRef(ns.bse.filesize), rdflib.Literal('585', datatype=rdflib.XSD.integer)),
+            (rdflib.URIRef(prefix + 'b8fd7fba818254166a6043195004138ebda6923e012442f819a2c49671136c70'), rdflib.RDF.type, rdflib.URIRef(ns.bsfs.Entity)),
+            (rdflib.URIRef(prefix + 'b8fd7fba818254166a6043195004138ebda6923e012442f819a2c49671136c70'), rdflib.URIRef(ns.bse.author), rdflib.Literal('Me, myself, and I', datatype=rdflib.XSD.string)),
+            (rdflib.URIRef(prefix + 'b8fd7fba818254166a6043195004138ebda6923e012442f819a2c49671136c70'), rdflib.URIRef(ns.bse.filename), rdflib.Literal('bar_second', datatype=rdflib.XSD.string)),
+            (rdflib.URIRef(prefix + 'b8fd7fba818254166a6043195004138ebda6923e012442f819a2c49671136c70'), rdflib.URIRef(ns.bse.filesize), rdflib.Literal('636', datatype=rdflib.XSD.integer)),
+            (rdflib.URIRef(prefix + 'd43758ace82154a1cc10ca0dfef63cb20dd831f9c87edd6dc06539eefe67371d'), rdflib.RDF.type, rdflib.URIRef(ns.bsfs.Entity)),
+            (rdflib.URIRef(prefix + 'd43758ace82154a1cc10ca0dfef63cb20dd831f9c87edd6dc06539eefe67371d'), rdflib.URIRef(ns.bse.author), rdflib.Literal('Me, myself, and I', datatype=rdflib.XSD.string)),
+            (rdflib.URIRef(prefix + 'd43758ace82154a1cc10ca0dfef63cb20dd831f9c87edd6dc06539eefe67371d'), rdflib.URIRef(ns.bse.filename), rdflib.Literal('foo_first', datatype=rdflib.XSD.string)),
+            (rdflib.URIRef(prefix + 'd43758ace82154a1cc10ca0dfef63cb20dd831f9c87edd6dc06539eefe67371d'), rdflib.URIRef(ns.bse.filesize), rdflib.Literal('546', datatype=rdflib.XSD.integer)),
+            (rdflib.URIRef(prefix + 'd803187cbf3676ae9d38126270a6152c60431589aa3bb3824baf8954e9c097f1'), rdflib.RDF.type, rdflib.URIRef(ns.bsfs.Entity)),
+            (rdflib.URIRef(prefix + 'd803187cbf3676ae9d38126270a6152c60431589aa3bb3824baf8954e9c097f1'), rdflib.URIRef(ns.bse.author), rdflib.Literal('Me, myself, and I', datatype=rdflib.XSD.string)),
+            (rdflib.URIRef(prefix + 'd803187cbf3676ae9d38126270a6152c60431589aa3bb3824baf8954e9c097f1'), rdflib.URIRef(ns.bse.filename), rdflib.Literal('td_second', datatype=rdflib.XSD.string)),
+            (rdflib.URIRef(prefix + 'd803187cbf3676ae9d38126270a6152c60431589aa3bb3824baf8954e9c097f1'), rdflib.URIRef(ns.bse.filesize), rdflib.Literal('703', datatype=rdflib.XSD.integer)),
+            }))
+
+        # NOTE: we don't check ns.bsm.t_created since it depends on the execution time. Triples would look like this:
+        #   (rdflib.URIRef(prefix + '2f4109b40107cc50e0884755a1a961ed126887e49b8dbaf0e146b2e226aa6647'), rdflib.URIRef(ns.bsm.t_created), rdflib.Literal('1670..........', datatype=rdflib.XSD.integer)),
+        #   (rdflib.URIRef(prefix + '441f3d10c8ff489fe8e33e639606512f6c463151cc429de7e554b9af670c2ece'), rdflib.URIRef(ns.bsm.t_created), rdflib.Literal('1670..........', datatype=rdflib.XSD.integer)),
+        #   (rdflib.URIRef(prefix + '69b98ecf7aff3e95b09688ba93331678eb8397817111f674c9558e6dd8f5e871'), rdflib.URIRef(ns.bsm.t_created), rdflib.Literal('1670..........', datatype=rdflib.XSD.integer)),
+        #   (rdflib.URIRef(prefix + '78f7eb7f0d8221cdb2cb26c978fa42a11f75eb87becc768f4474134cb1e06926'), rdflib.URIRef(ns.bsm.t_created), rdflib.Literal('1670..........', datatype=rdflib.XSD.integer)),
+        #   (rdflib.URIRef(prefix + '80818b8ec2ee1919116dba9c8a7e0a4608313cf3b463cd88e9ed77a700dd92d3'), rdflib.URIRef(ns.bsm.t_created), rdflib.Literal('1670..........', datatype=rdflib.XSD.integer)),
+        #   (rdflib.URIRef(prefix + '976d2ea0e58488678cc7e435fbfadabfb6eb6cf50ad51862f38f73729ed11795'), rdflib.URIRef(ns.bsm.t_created), rdflib.Literal('1670..........', datatype=rdflib.XSD.integer)),
+        #   (rdflib.URIRef(prefix + '997e2fbb7494a3818ec782d2bc87bf1cffafba6b9c0f658e4a6c18a723e944d3'), rdflib.URIRef(ns.bsm.t_created), rdflib.Literal('1670..........', datatype=rdflib.XSD.integer)),
+        #   (rdflib.URIRef(prefix + 'a8af899ecdab60dfaea8ec7f934053624c80a1054539e163f2c7eaa986c2777d'), rdflib.URIRef(ns.bsm.t_created), rdflib.Literal('1670..........', datatype=rdflib.XSD.integer)),
+        #   (rdflib.URIRef(prefix + 'b8fd7fba818254166a6043195004138ebda6923e012442f819a2c49671136c70'), rdflib.URIRef(ns.bsm.t_created), rdflib.Literal('1670..........', datatype=rdflib.XSD.integer)),
+        #   (rdflib.URIRef(prefix + 'd43758ace82154a1cc10ca0dfef63cb20dd831f9c87edd6dc06539eefe67371d'), rdflib.URIRef(ns.bsm.t_created), rdflib.Literal('1670..........', datatype=rdflib.XSD.integer)),
+        #   (rdflib.URIRef(prefix + 'd803187cbf3676ae9d38126270a6152c60431589aa3bb3824baf8954e9c097f1'), rdflib.URIRef(ns.bsm.t_created), rdflib.Literal('1670..........', datatype=rdflib.XSD.integer)),
+        # instead, we simply check if there's such a predicate for each file
+        self.assertSetEqual({sub for sub, _ in bsfs._Graph__backend.graph.subject_objects(rdflib.URIRef(ns.bsm.t_created))}, {
+            rdflib.URIRef(prefix + '2f4109b40107cc50e0884755a1a961ed126887e49b8dbaf0e146b2e226aa6647'),
+            rdflib.URIRef(prefix + '441f3d10c8ff489fe8e33e639606512f6c463151cc429de7e554b9af670c2ece'),
+            rdflib.URIRef(prefix + '69b98ecf7aff3e95b09688ba93331678eb8397817111f674c9558e6dd8f5e871'),
+            rdflib.URIRef(prefix + '78f7eb7f0d8221cdb2cb26c978fa42a11f75eb87becc768f4474134cb1e06926'),
+            rdflib.URIRef(prefix + '80818b8ec2ee1919116dba9c8a7e0a4608313cf3b463cd88e9ed77a700dd92d3'),
+            rdflib.URIRef(prefix + '976d2ea0e58488678cc7e435fbfadabfb6eb6cf50ad51862f38f73729ed11795'),
+            rdflib.URIRef(prefix + '997e2fbb7494a3818ec782d2bc87bf1cffafba6b9c0f658e4a6c18a723e944d3'),
+            rdflib.URIRef(prefix + 'a8af899ecdab60dfaea8ec7f934053624c80a1054539e163f2c7eaa986c2777d'),
+            rdflib.URIRef(prefix + 'b8fd7fba818254166a6043195004138ebda6923e012442f819a2c49671136c70'),
+            rdflib.URIRef(prefix + 'd43758ace82154a1cc10ca0dfef63cb20dd831f9c87edd6dc06539eefe67371d'),
+            rdflib.URIRef(prefix + 'd803187cbf3676ae9d38126270a6152c60431589aa3bb3824baf8954e9c097f1'),
+            })
+
+    def test_print(self):
+        stdout, sys.stdout = sys.stdout, io.StringIO()
+        bsfs = main([
+            '--print',
+            '-r',
+            '--user', 'http://example.com/me',
+            os.path.join(os.path.dirname(__file__), 'testdir'),
+            os.path.join(os.path.dirname(__file__), 'testfile'),
+            ])
+        outbuf, sys.stdout = sys.stdout, stdout
+        self.assertSetEqual(set(outbuf.getvalue().split('\n')) - {''}, {
+            f'Node(http://bsfs.ai/schema/Entity, http://example.com/me/file#2f4109b40107cc50e0884755a1a961ed126887e49b8dbaf0e146b2e226aa6647) Predicate({ns.bse.author}) Me, myself, and I',
+            f'Node(http://bsfs.ai/schema/Entity, http://example.com/me/file#2f4109b40107cc50e0884755a1a961ed126887e49b8dbaf0e146b2e226aa6647) Predicate({ns.bse.filename}) alpha_second',
+            f'Node(http://bsfs.ai/schema/Entity, http://example.com/me/file#2f4109b40107cc50e0884755a1a961ed126887e49b8dbaf0e146b2e226aa6647) Predicate({ns.bse.filesize}) 696',
+            f'Node(http://bsfs.ai/schema/Entity, http://example.com/me/file#441f3d10c8ff489fe8e33e639606512f6c463151cc429de7e554b9af670c2ece) Predicate({ns.bse.author}) Me, myself, and I',
+            f'Node(http://bsfs.ai/schema/Entity, http://example.com/me/file#441f3d10c8ff489fe8e33e639606512f6c463151cc429de7e554b9af670c2ece) Predicate({ns.bse.filename}) omega_second',
+            f'Node(http://bsfs.ai/schema/Entity, http://example.com/me/file#441f3d10c8ff489fe8e33e639606512f6c463151cc429de7e554b9af670c2ece) Predicate({ns.bse.filesize}) 503',
+            f'Node(http://bsfs.ai/schema/Entity, http://example.com/me/file#69b98ecf7aff3e95b09688ba93331678eb8397817111f674c9558e6dd8f5e871) Predicate({ns.bse.author}) Me, myself, and I',
+            f'Node(http://bsfs.ai/schema/Entity, http://example.com/me/file#69b98ecf7aff3e95b09688ba93331678eb8397817111f674c9558e6dd8f5e871) Predicate({ns.bse.filename}) td_first',
+            f'Node(http://bsfs.ai/schema/Entity, http://example.com/me/file#69b98ecf7aff3e95b09688ba93331678eb8397817111f674c9558e6dd8f5e871) Predicate({ns.bse.filesize}) 911',
+            f'Node(http://bsfs.ai/schema/Entity, http://example.com/me/file#78f7eb7f0d8221cdb2cb26c978fa42a11f75eb87becc768f4474134cb1e06926) Predicate({ns.bse.author}) Me, myself, and I',
+            f'Node(http://bsfs.ai/schema/Entity, http://example.com/me/file#78f7eb7f0d8221cdb2cb26c978fa42a11f75eb87becc768f4474134cb1e06926) Predicate({ns.bse.filename}) testfile',
+            f'Node(http://bsfs.ai/schema/Entity, http://example.com/me/file#78f7eb7f0d8221cdb2cb26c978fa42a11f75eb87becc768f4474134cb1e06926) Predicate({ns.bse.filesize}) 885',
+            f'Node(http://bsfs.ai/schema/Entity, http://example.com/me/file#80818b8ec2ee1919116dba9c8a7e0a4608313cf3b463cd88e9ed77a700dd92d3) Predicate({ns.bse.author}) Me, myself, and I',
+            f'Node(http://bsfs.ai/schema/Entity, http://example.com/me/file#80818b8ec2ee1919116dba9c8a7e0a4608313cf3b463cd88e9ed77a700dd92d3) Predicate({ns.bse.filename}) bar_first',
+            f'Node(http://bsfs.ai/schema/Entity, http://example.com/me/file#80818b8ec2ee1919116dba9c8a7e0a4608313cf3b463cd88e9ed77a700dd92d3) Predicate({ns.bse.filesize}) 956',
+            f'Node(http://bsfs.ai/schema/Entity, http://example.com/me/file#976d2ea0e58488678cc7e435fbfadabfb6eb6cf50ad51862f38f73729ed11795) Predicate({ns.bse.author}) Me, myself, and I',
+            f'Node(http://bsfs.ai/schema/Entity, http://example.com/me/file#976d2ea0e58488678cc7e435fbfadabfb6eb6cf50ad51862f38f73729ed11795) Predicate({ns.bse.filename}) omega_first',
+            f'Node(http://bsfs.ai/schema/Entity, http://example.com/me/file#976d2ea0e58488678cc7e435fbfadabfb6eb6cf50ad51862f38f73729ed11795) Predicate({ns.bse.filesize}) 648',
+            f'Node(http://bsfs.ai/schema/Entity, http://example.com/me/file#997e2fbb7494a3818ec782d2bc87bf1cffafba6b9c0f658e4a6c18a723e944d3) Predicate({ns.bse.author}) Me, myself, and I',
+            f'Node(http://bsfs.ai/schema/Entity, http://example.com/me/file#997e2fbb7494a3818ec782d2bc87bf1cffafba6b9c0f658e4a6c18a723e944d3) Predicate({ns.bse.filename}) alpha_first',
+            f'Node(http://bsfs.ai/schema/Entity, http://example.com/me/file#997e2fbb7494a3818ec782d2bc87bf1cffafba6b9c0f658e4a6c18a723e944d3) Predicate({ns.bse.filesize}) 754',
+            f'Node(http://bsfs.ai/schema/Entity, http://example.com/me/file#a8af899ecdab60dfaea8ec7f934053624c80a1054539e163f2c7eaa986c2777d) Predicate({ns.bse.author}) Me, myself, and I',
+            f'Node(http://bsfs.ai/schema/Entity, http://example.com/me/file#a8af899ecdab60dfaea8ec7f934053624c80a1054539e163f2c7eaa986c2777d) Predicate({ns.bse.filename}) foo_second',
+            f'Node(http://bsfs.ai/schema/Entity, http://example.com/me/file#a8af899ecdab60dfaea8ec7f934053624c80a1054539e163f2c7eaa986c2777d) Predicate({ns.bse.filesize}) 585',
+            f'Node(http://bsfs.ai/schema/Entity, http://example.com/me/file#b8fd7fba818254166a6043195004138ebda6923e012442f819a2c49671136c70) Predicate({ns.bse.author}) Me, myself, and I',
+            f'Node(http://bsfs.ai/schema/Entity, http://example.com/me/file#b8fd7fba818254166a6043195004138ebda6923e012442f819a2c49671136c70) Predicate({ns.bse.filename}) bar_second',
+            f'Node(http://bsfs.ai/schema/Entity, http://example.com/me/file#b8fd7fba818254166a6043195004138ebda6923e012442f819a2c49671136c70) Predicate({ns.bse.filesize}) 636',
+            f'Node(http://bsfs.ai/schema/Entity, http://example.com/me/file#d43758ace82154a1cc10ca0dfef63cb20dd831f9c87edd6dc06539eefe67371d) Predicate({ns.bse.author}) Me, myself, and I',
+            f'Node(http://bsfs.ai/schema/Entity, http://example.com/me/file#d43758ace82154a1cc10ca0dfef63cb20dd831f9c87edd6dc06539eefe67371d) Predicate({ns.bse.filename}) foo_first',
+            f'Node(http://bsfs.ai/schema/Entity, http://example.com/me/file#d43758ace82154a1cc10ca0dfef63cb20dd831f9c87edd6dc06539eefe67371d) Predicate({ns.bse.filesize}) 546',
+            f'Node(http://bsfs.ai/schema/Entity, http://example.com/me/file#d803187cbf3676ae9d38126270a6152c60431589aa3bb3824baf8954e9c097f1) Predicate({ns.bse.author}) Me, myself, and I',
+            f'Node(http://bsfs.ai/schema/Entity, http://example.com/me/file#d803187cbf3676ae9d38126270a6152c60431589aa3bb3824baf8954e9c097f1) Predicate({ns.bse.filename}) td_second',
+            f'Node(http://bsfs.ai/schema/Entity, http://example.com/me/file#d803187cbf3676ae9d38126270a6152c60431589aa3bb3824baf8954e9c097f1) Predicate({ns.bse.filesize}) 703',
+            })
+
+
+## main ##
+
+if __name__ == '__main__':
+    unittest.main()
+
+## EOF ##
diff --git a/test/apps/test_info.py b/test/apps/test_info.py
new file mode 100644
index 0000000..60a540e
--- /dev/null
+++ b/test/apps/test_info.py
@@ -0,0 +1,42 @@
+"""
+
+Part of the bsie test suite.
+A copy of the license is provided with the project.
+Author: Matthias Baumgartner, 2022
+"""
+# imports
+import argparse
+import io
+import os
+import sys
+import unittest
+
+# objects to test
+from bsie.apps.info import main
+
+
+## code ##
+
+class TestIndex(unittest.TestCase):
+    def test_predicates(self):
+        stdout, sys.stdout = sys.stdout, io.StringIO()
+        # show predicates infos
+        main(['predicates'])
+        outbuf, sys.stdout = sys.stdout, stdout
+        # verify output
+        self.assertSetEqual({pred for pred in outbuf.getvalue().split('\n') if pred != ''}, {
+            'http://bsfs.ai/schema/Entity#author',
+            'http://bsfs.ai/schema/Predicate',
+            'http://bsfs.ai/schema/Entity#filename',
+            'http://bsfs.ai/schema/Entity#filesize',
+            })
+
+    def test_invalid(self):
+        self.assertRaises(SystemExit, main, ['foobar'])
+
+## main ##
+
+if __name__ == '__main__':
+    unittest.main()
+
+## EOF ##
diff --git a/test/apps/testdir/alpha/alpha_first b/test/apps/testdir/alpha/alpha_first
new file mode 100644
index 0000000..f96fdee
--- /dev/null
+++ b/test/apps/testdir/alpha/alpha_first
@@ -0,0 +1,16 @@
+Turpis tincidunt id aliquet risus feugiat in ante metus.
+Vel turpis nunc eget lorem dolor.
+Lorem mollis aliquam ut porttitor leo a diam sollicitudin.
+Sit amet mattis vulputate enim nulla aliquet porttitor lacus luctus.
+Vitae et leo duis ut diam.
+Integer eget aliquet nibh praesent tristique magna sit.
+Volutpat sed cras ornare arcu dui.
+Consectetur adipiscing elit duis tristique sollicitudin nibh.
+Interdum varius sit amet mattis vulputate.
+A arcu cursus vitae congue.
+Risus nec feugiat in fermentum posuere urna nec tincidunt praesent.
+Sit amet dictum sit amet justo donec enim diam.
+Maecenas accumsan lacus vel facilisis.
+Erat velit scelerisque in dictum non consectetur a.
+Tempor orci dapibus ultrices in iaculis nunc.
+Nisi lacus sed viverra tellus.
diff --git a/test/apps/testdir/alpha/alpha_second b/test/apps/testdir/alpha/alpha_second
new file mode 100644
index 0000000..ae83ce8
--- /dev/null
+++ b/test/apps/testdir/alpha/alpha_second
@@ -0,0 +1,12 @@
+Et sollicitudin ac orci phasellus egestas tellus rutrum tellus.
+Orci dapibus ultrices in iaculis nunc sed augue.
+Tincidunt vitae semper quis lectus nulla at.
+Maecenas ultricies mi eget mauris pharetra et.
+Porttitor massa id neque aliquam vestibulum morbi blandit.
+Et magnis dis parturient montes nascetur ridiculus mus mauris.
+Ac orci phasellus egestas tellus rutrum tellus pellentesque.
+Donec ac odio tempor orci dapibus.
+Quis imperdiet massa tincidunt nunc pulvinar sapien et ligula.
+Potenti nullam ac tortor vitae purus faucibus ornare suspendisse sed.
+Orci porta non pulvinar neque laoreet suspendisse interdum consectetur.
+Mauris pellentesque pulvinar pellentesque habitant morbi tristique.
diff --git a/test/apps/testdir/alpha/omega/omega_first b/test/apps/testdir/alpha/omega/omega_first
new file mode 100644
index 0000000..e594737
--- /dev/null
+++ b/test/apps/testdir/alpha/omega/omega_first
@@ -0,0 +1,14 @@
+Neque gravida in fermentum et sollicitudin.
+Sodales ut eu sem integer vitae justo eget magna fermentum.
+Amet nulla facilisi morbi tempus iaculis.
+Proin sagittis nisl rhoncus mattis rhoncus urna neque.
+Aliquam sem fringilla ut morbi tincidunt augue interdum velit euismod.
+Sagittis eu volutpat odio facilisis.
+Aliquet porttitor lacus luctus accumsan tortor posuere ac ut.
+Sed arcu non odio euismod lacinia.
+Faucibus et molestie ac feugiat.
+Urna neque viverra justo nec ultrices dui sapien eget.
+Amet commodo nulla facilisi nullam.
+Pretium lectus quam id leo in vitae.
+A cras semper auctor neque.
+Sed arcu non odio euismod lacinia at quis risus sed.
diff --git a/test/apps/testdir/alpha/omega/omega_second b/test/apps/testdir/alpha/omega/omega_second
new file mode 100644
index 0000000..0c9857d
--- /dev/null
+++ b/test/apps/testdir/alpha/omega/omega_second
@@ -0,0 +1,10 @@
+Commodo sed egestas egestas fringilla phasellus.
+Ac tortor dignissim convallis aenean et tortor at risus.
+Lorem dolor sed viverra ipsum nunc aliquet bibendum enim.
+Quis lectus nulla at volutpat diam ut.
+Tincidunt id aliquet risus feugiat in ante metus.
+Tincidunt arcu non sodales neque.
+Amet est placerat in egestas erat imperdiet sed euismod.
+Duis tristique sollicitudin nibh sit amet.
+Sed arcu non odio euismod lacinia at.
+Ullamcorper morbi tincidunt ornare massa eget egestas purus viverra accumsan.
diff --git a/test/apps/testdir/foo/bar/bar_first b/test/apps/testdir/foo/bar/bar_first
new file mode 100644
index 0000000..e9edb3f
--- /dev/null
+++ b/test/apps/testdir/foo/bar/bar_first
@@ -0,0 +1,20 @@
+Elementum eu facilisis sed odio morbi quis commodo.
+Enim nunc faucibus a pellentesque sit amet porttitor.
+Etiam non quam lacus suspendisse faucibus interdum.
+Viverra aliquet eget sit amet tellus.
+Arcu vitae elementum curabitur vitae.
+Feugiat vivamus at augue eget arcu dictum.
+Commodo quis imperdiet massa tincidunt nunc.
+Urna duis convallis convallis tellus id interdum.
+Commodo sed egestas egestas fringilla phasellus.
+Sodales neque sodales ut etiam sit amet nisl.
+Sem integer vitae justo eget magna fermentum iaculis.
+Id diam maecenas ultricies mi.
+Aliquet nibh praesent tristique magna sit amet purus gravida.
+Ut enim blandit volutpat maecenas volutpat.
+Ipsum a arcu cursus vitae congue mauris.
+Donec ultrices tincidunt arcu non.
+Nulla posuere sollicitudin aliquam ultrices sagittis orci a scelerisque purus.
+Egestas maecenas pharetra convallis posuere.
+Feugiat in fermentum posuere urna nec.
+Nulla malesuada pellentesque elit eget gravida cum sociis.
diff --git a/test/apps/testdir/foo/bar/bar_second b/test/apps/testdir/foo/bar/bar_second
new file mode 100644
index 0000000..fb95896
--- /dev/null
+++ b/test/apps/testdir/foo/bar/bar_second
@@ -0,0 +1,14 @@
+Augue ut lectus arcu bibendum at varius vel pharetra vel.
+Mattis aliquam faucibus purus in.
+In tellus integer feugiat scelerisque.
+Eget velit aliquet sagittis id consectetur purus ut faucibus pulvinar.
+Augue mauris augue neque gravida.
+Pulvinar neque laoreet suspendisse interdum consectetur libero id faucibus.
+Tellus elementum sagittis vitae et leo duis.
+Eget est lorem ipsum dolor sit amet consectetur.
+Volutpat sed cras ornare arcu.
+Faucibus a pellentesque sit amet.
+Turpis egestas maecenas pharetra convallis.
+Faucibus interdum posuere lorem ipsum dolor sit amet.
+Id semper risus in hendrerit.
+Amet volutpat consequat mauris nunc.
diff --git a/test/apps/testdir/foo/foo_first b/test/apps/testdir/foo/foo_first
new file mode 100644
index 0000000..ed1e052
--- /dev/null
+++ b/test/apps/testdir/foo/foo_first
@@ -0,0 +1,11 @@
+Venenatis tellus in metus vulputate eu scelerisque felis imperdiet proin.
+Orci phasellus egestas tellus rutrum.
+Feugiat vivamus at augue eget arcu dictum varius.
+Justo eget magna fermentum iaculis eu non.
+A erat nam at lectus urna duis.
+Quam quisque id diam vel quam elementum pulvinar etiam.
+Amet commodo nulla facilisi nullam vehicula ipsum a.
+Sapien faucibus et molestie ac feugiat.
+Aliquam vestibulum morbi blandit cursus risus at ultrices.
+Purus faucibus ornare suspendisse sed nisi.
+In massa tempor nec feugiat nisl pretium fusce id velit.
diff --git a/test/apps/testdir/foo/foo_second b/test/apps/testdir/foo/foo_second
new file mode 100644
index 0000000..95e46ae
--- /dev/null
+++ b/test/apps/testdir/foo/foo_second
@@ -0,0 +1,12 @@
+Sit amet consectetur adipiscing elit ut aliquam purus.
+Vulputate dignissim suspendisse in est ante in nibh.
+Eu feugiat pretium nibh ipsum consequat nisl vel pretium.
+Egestas purus viverra accumsan in nisl.
+Ac odio tempor orci dapibus ultrices.
+At imperdiet dui accumsan sit amet.
+Elementum integer enim neque volutpat ac tincidunt vitae semper.
+Mi in nulla posuere sollicitudin aliquam ultrices sagittis.
+Aliquam sem et tortor consequat.
+Tristique senectus et netus et malesuada fames ac turpis.
+Quis hendrerit dolor magna eget est lorem ipsum.
+Ut consequat semper viverra nam libero.
diff --git a/test/apps/testdir/td_first b/test/apps/testdir/td_first
new file mode 100644
index 0000000..21eab9c
--- /dev/null
+++ b/test/apps/testdir/td_first
@@ -0,0 +1,18 @@
+Urna duis convallis convallis tellus id interdum velit.
+Risus in hendrerit gravida rutrum.
+Odio pellentesque diam volutpat commodo sed.
+Duis convallis convallis tellus id interdum velit laoreet id donec.
+Duis at tellus at urna.
+Egestas maecenas pharetra convallis posuere morbi leo urna molestie at.
+Et leo duis ut diam quam nulla porttitor massa id.
+Nunc eget lorem dolor sed viverra ipsum nunc aliquet bibendum.
+Sodales ut etiam sit amet nisl purus in.
+Ac felis donec et odio pellentesque diam volutpat commodo.
+Nunc mi ipsum faucibus vitae aliquet.
+Volutpat ac tincidunt vitae semper quis lectus nulla at volutpat.
+Mollis aliquam ut porttitor leo.
+Vestibulum rhoncus est pellentesque elit ullamcorper dignissim cras.
+Pulvinar proin gravida hendrerit lectus a.
+Ultrices dui sapien eget mi proin.
+Dui vivamus arcu felis bibendum ut.
+Aliquam eleifend mi in nulla posuere sollicitudin aliquam ultrices sagittis.
diff --git a/test/apps/testdir/td_second b/test/apps/testdir/td_second
new file mode 100644
index 0000000..496ff0e
--- /dev/null
+++ b/test/apps/testdir/td_second
@@ -0,0 +1,14 @@
+Egestas purus viverra accumsan in.
+Auctor urna nunc id cursus metus aliquam eleifend.
+Morbi tincidunt augue interdum velit.
+In egestas erat imperdiet sed euismod nisi porta lorem mollis.
+Sed augue lacus viverra vitae congue eu consequat.
+Ut pharetra sit amet aliquam id.
+Aenean euismod elementum nisi quis eleifend.
+Hac habitasse platea dictumst vestibulum rhoncus est pellentesque elit ullamcorper.
+Eget nunc lobortis mattis aliquam faucibus purus.
+Sit amet luctus venenatis lectus magna fringilla.
+Placerat orci nulla pellentesque dignissim enim sit amet venenatis.
+Montes nascetur ridiculus mus mauris.
+Morbi enim nunc faucibus a pellentesque sit amet.
+Et netus et malesuada fames ac turpis egestas.
diff --git a/test/apps/testfile b/test/apps/testfile
new file mode 100644
index 0000000..b56928e
--- /dev/null
+++ b/test/apps/testfile
@@ -0,0 +1,16 @@
+Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do eiusmod tempor incididunt ut labore et dolore magna aliqua.
+Fames ac turpis egestas maecenas pharetra convallis posuere morbi.
+Etiam erat velit scelerisque in dictum non consectetur a erat.
+Dolor purus non enim praesent elementum facilisis.
+Nulla porttitor massa id neque aliquam vestibulum morbi blandit cursus.
+Adipiscing vitae proin sagittis nisl rhoncus mattis rhoncus urna neque.
+Aenean pharetra magna ac placerat.
+Pulvinar proin gravida hendrerit lectus a.
+Iaculis nunc sed augue lacus viverra vitae.
+Ac tortor vitae purus faucibus ornare suspendisse sed.
+Purus in mollis nunc sed id semper.
+Non consectetur a erat nam at lectus urna.
+In ante metus dictum at tempor commodo ullamcorper.
+Auctor augue mauris augue neque gravida in fermentum.
+Nunc scelerisque viverra mauris in.
+Morbi leo urna molestie at elementum.
diff --git a/test/lib/__init__.py b/test/lib/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/test/lib/test_bsie.py b/test/lib/test_bsie.py
new file mode 100644
index 0000000..277ac67
--- /dev/null
+++ b/test/lib/test_bsie.py
@@ -0,0 +1,231 @@
+"""
+
+Part of the bsie test suite.
+A copy of the license is provided with the project.
+Author: Matthias Baumgartner, 2022
+"""
+# imports
+import os
+import unittest
+
+# bsie imports
+from bsie.tools import builder
+from bsie.utils import ns
+from bsie.utils.bsfs import URI, schema
+from bsie.utils.node import Node
+
+# objects to test
+from bsie.lib.bsie import BSIE
+
+
+## code ##
+
+class TestBSIE(unittest.TestCase):
+    def setUp(self):
+        # reader builder
+        rbuild = builder.ReaderBuilder({})
+        # extractor builder
+        ebuild = builder.ExtractorBuilder([
+            {'bsie.extractor.generic.path.Path': {}},
+            {'bsie.extractor.generic.stat.Stat': {}},
+            {'bsie.extractor.generic.constant.Constant': dict(
+                tuples=[('http://bsfs.ai/schema/Entity#author', 'Me, myself, and I')],
+                schema='''
+                    bse:author rdfs:subClassOf bsfs:Predicate ;
+                        rdfs:domain bsfs:Entity ;
+                        rdfs:range xsd:string ;
+                        bsfs:unique "true"^^xsd:boolean .
+                    ''',
+                )},
+            ])
+        # build pipeline
+        self.prefix = URI('http://example.com/local/file#')
+        pbuild = builder.PipelineBuilder(self.prefix, rbuild, ebuild)
+        self.pipeline = pbuild.build()
+
+    def test_construction(self):
+        # pipeline only
+        lib = BSIE(self.pipeline)
+        self.assertSetEqual(lib.predicates, {
+            ns.bse.filename,
+            ns.bse.filesize,
+            ns.bse.author,
+            })
+        self.assertEqual(lib.schema, schema.Schema.from_string('''
+            prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#>
+            prefix xsd: <http://www.w3.org/2001/XMLSchema#>
+            prefix bsfs: <http://bsfs.ai/schema/>
+            prefix bse: <http://bsfs.ai/schema/Entity#>
+            # essential nodes
+            bsfs:Entity rdfs:subClassOf bsfs:Node .
+            # common definitions
+            xsd:string rdfs:subClassOf bsfs:Literal .
+            xsd:integer rdfs:subClassOf bsfs:Literal .
+
+            bse:filename rdfs:subClassOf bsfs:Predicate ;
+                rdfs:domain bsfs:Entity ;
+                rdfs:range xsd:string ;
+                bsfs:unique "false"^^xsd:boolean .
+
+            bse:filesize rdfs:subClassOf bsfs:Predicate ;
+                rdfs:domain bsfs:Entity ;
+                rdfs:range xsd:integer;
+                bsfs:unique "false"^^xsd:boolean .
+
+            bse:author rdfs:subClassOf bsfs:Predicate ;
+                rdfs:domain bsfs:Entity ;
+                rdfs:range xsd:string ;
+                bsfs:unique "true"^^xsd:boolean .
+
+            '''))
+
+        # specify collect
+        lib = BSIE(self.pipeline, collect={
+            ns.bse.filesize,
+            ns.bse.author,
+            ns.bse.inexistent,
+            })
+        self.assertSetEqual(lib.predicates, {
+            ns.bse.filesize,
+            ns.bse.author,
+            })
+        self.assertEqual(lib.schema, schema.Schema.from_string('''
+            prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#>
+            prefix xsd: <http://www.w3.org/2001/XMLSchema#>
+            prefix bsfs: <http://bsfs.ai/schema/>
+            prefix bse: <http://bsfs.ai/schema/Entity#>
+            # essential nodes
+            bsfs:Entity rdfs:subClassOf bsfs:Node .
+            # common definitions
+            xsd:string rdfs:subClassOf bsfs:Literal .
+            xsd:integer rdfs:subClassOf bsfs:Literal .
+
+            bse:filesize rdfs:subClassOf bsfs:Predicate ;
+                rdfs:domain bsfs:Entity ;
+                rdfs:range xsd:integer;
+                bsfs:unique "false"^^xsd:boolean .
+
+            bse:author rdfs:subClassOf bsfs:Predicate ;
+                rdfs:domain bsfs:Entity ;
+                rdfs:range xsd:string ;
+                bsfs:unique "true"^^xsd:boolean .
+
+            '''))
+        # empty collect is disregarded
+        lib = BSIE(self.pipeline, collect={})
+        self.assertSetEqual(lib.predicates, {
+            ns.bse.filename,
+            ns.bse.filesize,
+            ns.bse.author,
+            })
+        self.assertEqual(lib.schema, schema.Schema.from_string('''
+            prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#>
+            prefix xsd: <http://www.w3.org/2001/XMLSchema#>
+            prefix bsfs: <http://bsfs.ai/schema/>
+            prefix bse: <http://bsfs.ai/schema/Entity#>
+            # essential nodes
+            bsfs:Entity rdfs:subClassOf bsfs:Node .
+            # common definitions
+            xsd:string rdfs:subClassOf bsfs:Literal .
+            xsd:integer rdfs:subClassOf bsfs:Literal .
+
+            bse:filename rdfs:subClassOf bsfs:Predicate ;
+                rdfs:domain bsfs:Entity ;
+                rdfs:range xsd:string ;
+                bsfs:unique "false"^^xsd:boolean .
+
+            bse:filesize rdfs:subClassOf bsfs:Predicate ;
+                rdfs:domain bsfs:Entity ;
+                rdfs:range xsd:integer;
+                bsfs:unique "false"^^xsd:boolean .
+
+            bse:author rdfs:subClassOf bsfs:Predicate ;
+                rdfs:domain bsfs:Entity ;
+                rdfs:range xsd:string ;
+                bsfs:unique "true"^^xsd:boolean .
+
+            '''))
+
+        # specify discard
+        lib = BSIE(self.pipeline, discard={
+            ns.bse.filesize,
+            ns.bse.filename,
+            ns.bse.inexistent,
+            })
+        self.assertSetEqual(lib.predicates, {
+            ns.bse.author,
+            })
+        self.assertEqual(lib.schema, schema.Schema.from_string('''
+            prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#>
+            prefix xsd: <http://www.w3.org/2001/XMLSchema#>
+            prefix bsfs: <http://bsfs.ai/schema/>
+            prefix bse: <http://bsfs.ai/schema/Entity#>
+            # essential nodes
+            bsfs:Entity rdfs:subClassOf bsfs:Node .
+            # common definitions
+            xsd:string rdfs:subClassOf bsfs:Literal .
+
+            bse:author rdfs:subClassOf bsfs:Predicate ;
+                rdfs:domain bsfs:Entity ;
+                rdfs:range xsd:string ;
+                bsfs:unique "true"^^xsd:boolean .
+
+            '''))
+
+        # specify collect and discard
+        lib = BSIE(self.pipeline,
+            collect={ns.bse.filesize, ns.bse.author, ns.bse.foo, ns.bse.bar},
+            discard={ns.bse.author, ns.bse.foo, ns.bse.foobar},
+            )
+        self.assertSetEqual(lib.predicates, {
+            ns.bse.filesize,
+            })
+        self.assertEqual(lib.schema, schema.Schema.from_string('''
+            prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#>
+            prefix xsd: <http://www.w3.org/2001/XMLSchema#>
+            prefix bsfs: <http://bsfs.ai/schema/>
+            prefix bse: <http://bsfs.ai/schema/Entity#>
+            # essential nodes
+            bsfs:Entity rdfs:subClassOf bsfs:Node .
+            # common definitions
+            xsd:integer rdfs:subClassOf bsfs:Literal .
+
+            bse:filesize rdfs:subClassOf bsfs:Predicate ;
+                rdfs:domain bsfs:Entity ;
+                rdfs:range xsd:integer;
+                bsfs:unique "false"^^xsd:boolean .
+
+            '''))
+
+
+    def test_from_file(self):
+        # setup
+        lib = BSIE(self.pipeline)
+        self.assertSetEqual(set(lib.predicates), {
+            ns.bse.filesize,
+            ns.bse.filename,
+            ns.bse.author,
+            })
+        content_hash = 'a948904f2f0f479b8f8197694b30184b0d2ed1c1cd2a1ec0fb85d299a192a447'
+        subject = Node(ns.bsfs.Entity, self.prefix + content_hash)
+        testfile = os.path.join(os.path.dirname(__file__), 'testfile.t')
+
+        # from_file extracts all available triples
+        self.assertSetEqual(set(lib.from_file(testfile)), {
+            (subject, lib.schema.predicate(ns.bse.filename), 'testfile.t'),
+            (subject, lib.schema.predicate(ns.bse.filesize), 12),
+            (subject, lib.schema.predicate(ns.bse.author), 'Me, myself, and I'),
+            })
+
+        # from_file respects predicate argument
+        self.assertSetEqual(set(lib.from_file(testfile, {ns.bse.filename, ns.bse.invalid})), {
+            (subject, lib.schema.predicate(ns.bse.filename), 'testfile.t'),
+            })
+
+
+## main ##
+
+if __name__ == '__main__':
+    unittest.main()
+
+## EOF ##
diff --git a/test/lib/testfile.t b/test/lib/testfile.t
new file mode 100644
index 0000000..3b18e51
--- /dev/null
+++ b/test/lib/testfile.t
@@ -0,0 +1 @@
+hello world
diff --git a/test/tools/test_pipeline.py b/test/tools/test_pipeline.py
index f98b329..0dd8c75 100644
--- a/test/tools/test_pipeline.py
+++ b/test/tools/test_pipeline.py
@@ -95,7 +95,7 @@ class TestPipeline(unittest.TestCase):
         # build pipeline
         pipeline = Pipeline(self.prefix, self.ext2rdr)
         # build objects for tests
-        content_hash = 'e3bb4ab54e4a50d75626a1f76814f152f4edc60a82ad724aa2aa922ca5534427'
+        content_hash = 'a948904f2f0f479b8f8197694b30184b0d2ed1c1cd2a1ec0fb85d299a192a447'
         subject = Node(ns.bsfs.Entity, self.prefix + content_hash)
         testfile = os.path.join(os.path.dirname(__file__), 'testfile.t')
         p_filename = pipeline.schema.predicate(ns.bse.filename)
@@ -108,7 +108,7 @@ class TestPipeline(unittest.TestCase):
         # extract given predicates
         self.assertSetEqual(set(pipeline(testfile, {p_filename, p_filesize})), {
             (subject, p_filename, 'testfile.t'),
-            (subject, p_filesize, 11),
+            (subject, p_filesize, 12),
             })
         self.assertSetEqual(set(pipeline(testfile, {p_author})), {
             (subject, p_author, 'Me, myself, and I'),
@@ -117,12 +117,12 @@ class TestPipeline(unittest.TestCase):
             (subject, p_filename, 'testfile.t'),
             })
         self.assertSetEqual(set(pipeline(testfile, {p_filesize})), {
-            (subject, p_filesize, 11),
+            (subject, p_filesize, 12),
             })
         # extract all predicates
         self.assertSetEqual(set(pipeline(testfile)), {
             (subject, p_filename, 'testfile.t'),
-            (subject, p_filesize, 11),
+            (subject, p_filesize, 12),
             (subject, p_author, 'Me, myself, and I'),
             (subject, p_rating, 123),
             })
@@ -158,6 +158,18 @@ class TestPipeline(unittest.TestCase):
             p_filename = pipeline.schema.predicate(ns.bse.filename)
             self.assertSetEqual(set(pipeline(testfile, {p_filename})), set())
 
+    def test_predicates(self):
+        # build pipeline
+        pipeline = Pipeline(self.prefix, self.ext2rdr)
+        #
+        self.assertSetEqual(set(pipeline.predicates()), {
+            pipeline.schema.predicate(ns.bsfs.Predicate),
+            pipeline.schema.predicate(ns.bse.filename),
+            pipeline.schema.predicate(ns.bse.filesize),
+            pipeline.schema.predicate(ns.bse.author),
+            pipeline.schema.predicate(ns.bse.rating),
+            })
+
 
 ## main ##
 
diff --git a/test/tools/testfile.t b/test/tools/testfile.t
index 58bf1b8..3b18e51 100644
--- a/test/tools/testfile.t
+++ b/test/tools/testfile.t
@@ -1 +1 @@
-hello worl
+hello world
-- 
cgit v1.2.3


From 3dc3e9a9b0fc8c9727f91359814866d3deae6e79 Mon Sep 17 00:00:00 2001
From: Matthias Baumgartner <dev@igsor.net>
Date: Thu, 15 Dec 2022 16:42:07 +0100
Subject: minor fixes and comments

---
 .pylintrc                | 2 +-
 bsie/__init__.py         | 5 ++---
 bsie/base/extractor.py   | 9 +++++++--
 bsie/utils/namespaces.py | 1 +
 4 files changed, 11 insertions(+), 6 deletions(-)

diff --git a/.pylintrc b/.pylintrc
index 3cfae38..1b34854 100644
--- a/.pylintrc
+++ b/.pylintrc
@@ -148,7 +148,7 @@ logging-format-style=old
 [MISCELLANEOUS]
 
 # List of note tags to take in consideration, separated by a comma.
-notes=FIXME,TODO,NOTE
+notes=FIXME,TODO
 
 
diff --git a/bsie/__init__.py b/bsie/__init__.py
index 2b874bd..96e6953 100644
--- a/bsie/__init__.py
+++ b/bsie/__init__.py
@@ -9,9 +9,8 @@ import collections
 import typing
 
 # constants
-version_info = collections.namedtuple('version_info',
-    ('major', 'minor', 'micro')) \
-    (0, 0, 1)
+T_VERSION_INFO = collections.namedtuple('T_VERSION_INFO', ('major', 'minor', 'micro'))
+version_info = T_VERSION_INFO(0, 0, 1)
 
 # exports
 __all__: typing.Sequence[str] = []
diff --git a/bsie/base/extractor.py b/bsie/base/extractor.py
index 75b7173..bfa403c 100644
--- a/bsie/base/extractor.py
+++ b/bsie/base/extractor.py
@@ -20,7 +20,7 @@ __all__: typing.Sequence[str] = (
 # constants
 
 # essential definitions typically used in extractor schemas.
-# NOTE: The definition here is only for convenience; Each Extractor must implement its use, if so desired.
+# NOTE: This preamble is only for convenience; Each Extractor must implement its use, if so desired.
 SCHEMA_PREAMBLE = '''
     # common external prefixes
     prefix rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
@@ -45,7 +45,12 @@ SCHEMA_PREAMBLE = '''
 ## code ##
 
 class Extractor(abc.ABC):
-    """Produce (node, predicate, value)-triples from some content."""
+    """Produce (subject, predicate, value)-triples from some content.
+    The Extractor produces princpal predicates that provide information
+    about the content itself (i.e., triples that include the subject),
+    and may also generate triples with auxiliary predicates if the
+    extracted value is a node itself.
+    """
 
     # what type of content is expected (i.e. reader subclass).
     CONTENT_READER: typing.Optional[str] = None
diff --git a/bsie/utils/namespaces.py b/bsie/utils/namespaces.py
index 2fcb2dc..d6e1c72 100644
--- a/bsie/utils/namespaces.py
+++ b/bsie/utils/namespaces.py
@@ -21,6 +21,7 @@ __all__: typing.Sequence[str] = (
     'bse',
     'bsfs',
     'bsm',
+    'xsd',
     )
 
 ## EOF ##
-- 
cgit v1.2.3


From 49cf03fc212c813862453de5352436dc90d1e458 Mon Sep 17 00:00:00 2001
From: Matthias Baumgartner <dev@igsor.net>
Date: Thu, 15 Dec 2022 16:50:53 +0100
Subject: imports and init files

---
 bsie/apps/index.py                      |  7 ++-----
 bsie/base/__init__.py                   |  8 ++++----
 bsie/base/reader.py                     |  8 ++++----
 bsie/lib/__init__.py                    |  7 ++++++-
 bsie/reader/stat.py                     |  2 +-
 bsie/tools/__init__.py                  |  4 ++--
 bsie/utils/bsfs.py                      |  3 ++-
 bsie/utils/node.py                      | 18 +++++++++---------
 test/base/test_extractor.py             |  9 ++++-----
 test/base/test_reader.py                |  4 ++--
 test/extractor/generic/test_constant.py |  5 ++---
 test/extractor/generic/test_path.py     | 10 ++++------
 test/extractor/generic/test_stat.py     | 10 ++++------
 13 files changed, 46 insertions(+), 49 deletions(-)

diff --git a/bsie/apps/index.py b/bsie/apps/index.py
index 821aa4c..aa26d0f 100644
--- a/bsie/apps/index.py
+++ b/bsie/apps/index.py
@@ -9,14 +9,11 @@ import argparse
 import os
 import typing
 
-# bsfs imports
-import bsfs
-
 # bsie imports
 from bsie.base import errors
-from bsie.lib.bsie import BSIE
+from bsie.lib import BSIE
 from bsie.tools import builder
-from bsie.utils.bsfs import URI
+from bsie.utils import bsfs
 
 # exports
 __all__: typing.Sequence[str] = (
diff --git a/bsie/base/__init__.py b/bsie/base/__init__.py
index 0154862..0d362cd 100644
--- a/bsie/base/__init__.py
+++ b/bsie/base/__init__.py
@@ -11,14 +11,14 @@ import typing
 
 # inner-module imports
 from . import errors
-from . import extractor
-from . import reader
+from .extractor import Extractor
+from .reader import Reader
 
 # exports
 __all__: typing.Sequence[str] = (
+    'Extractor',
+    'Reader',
     'errors',
-    'extractor',
-    'reader',
     )
 
 ## EOF ##
diff --git a/bsie/base/reader.py b/bsie/base/reader.py
index b7eabf7..cbabd36 100644
--- a/bsie/base/reader.py
+++ b/bsie/base/reader.py
@@ -13,7 +13,7 @@ import abc
 import typing
 
 # bsie imports
-from bsie.utils.bsfs import URI, typename
+from bsie.utils import bsfs
 
 # exports
 __all__: typing.Sequence[str] = (
@@ -27,10 +27,10 @@ class Reader(abc.ABC):
     """Read and return some content from a file."""
 
     def __str__(self) -> str:
-        return typename(self)
+        return bsfs.typename(self)
 
     def __repr__(self) -> str:
-        return f'{typename(self)}()'
+        return f'{bsfs.typename(self)}()'
 
     def __eq__(self, other: typing.Any) -> bool:
         return isinstance(other, type(self))
@@ -39,7 +39,7 @@ class Reader(abc.ABC):
         return hash(type(self))
 
     @abc.abstractmethod
-    def __call__(self, path: URI) -> typing.Any:
+    def __call__(self, path: bsfs.URI) -> typing.Any:
         """Return some content of the file at *path*.
         Raises a `ReaderError` if the reader cannot make sense of the file format.
         """
diff --git a/bsie/lib/__init__.py b/bsie/lib/__init__.py
index f6c9018..578c2c4 100644
--- a/bsie/lib/__init__.py
+++ b/bsie/lib/__init__.py
@@ -7,7 +7,12 @@ Author: Matthias Baumgartner, 2022
 # imports
 import typing
 
+# inner-module imports
+from .bsie import BSIE
+
 # exports
-__all__: typing.Sequence[str] = []
+__all__: typing.Sequence[str] = (
+    'BSIE',
+    )
 
 ## EOF ##
diff --git a/bsie/reader/stat.py b/bsie/reader/stat.py
index 592d912..fc5fb24 100644
--- a/bsie/reader/stat.py
+++ b/bsie/reader/stat.py
@@ -9,7 +9,7 @@ import os
 import typing
 
 # bsie imports
-from bsie.base import reader, errors
+from bsie.base import errors, reader
 
 # exports
 __all__: typing.Sequence[str] = (
diff --git a/bsie/tools/__init__.py b/bsie/tools/__init__.py
index 8ca9620..803c321 100644
--- a/bsie/tools/__init__.py
+++ b/bsie/tools/__init__.py
@@ -9,12 +9,12 @@ import typing
 
 # inner-module imports
 from . import builder
-from . import pipeline
+from .pipeline import Pipeline
 
 # exports
 __all__: typing.Sequence[str] = (
     'builder',
-    'pipeline',
+    'Pipeline',
     )
 
 ## EOF ##
diff --git a/bsie/utils/bsfs.py b/bsie/utils/bsfs.py
index a4b7626..c48049d 100644
--- a/bsie/utils/bsfs.py
+++ b/bsie/utils/bsfs.py
@@ -8,13 +8,14 @@ Author: Matthias Baumgartner, 2022
 import typing
 
 # bsfs imports
-from bsfs import schema
+from bsfs import Open, schema
 from bsfs.namespace import Namespace
 from bsfs.utils import URI, typename, uuid
 
 # exports
 __all__: typing.Sequence[str] = (
     'Namespace',
+    'Open',
     'URI',
     'schema',
     'typename',
diff --git a/bsie/utils/node.py b/bsie/utils/node.py
index c9c494f..ecf39cd 100644
--- a/bsie/utils/node.py
+++ b/bsie/utils/node.py
@@ -8,7 +8,7 @@ Author: Matthias Baumgartner, 2022
 import typing
 
 # bsie imports
-from bsie.utils.bsfs import URI, typename
+from bsie.utils import bsfs
 
 # exports
 __all__: typing.Sequence[str] = (
@@ -22,19 +22,19 @@ class Node():
     """Lightweight Node, disconnected from any bsfs structures."""
 
     # node type.
-    node_type: URI
+    node_type: bsfs.URI
 
     # node URI.
-    uri: URI
+    uri: bsfs.URI
 
     def __init__(
             self,
-            node_type: URI,
-            uri: URI,
+            node_type: bsfs.URI,
+            uri: bsfs.URI,
             ):
         # assign members
-        self.node_type = URI(node_type)
-        self.uri = URI(uri)
+        self.node_type = bsfs.URI(node_type)
+        self.uri = bsfs.URI(uri)
 
     def __eq__(self, other: typing.Any) -> bool:
         return isinstance(other, Node) \
@@ -45,9 +45,9 @@ class Node():
         return hash((type(self), self.node_type, self.uri))
 
     def __str__(self) -> str:
-        return f'{typename(self)}({self.node_type}, {self.uri})'
+        return f'{bsfs.typename(self)}({self.node_type}, {self.uri})'
 
     def __repr__(self) -> str:
-        return f'{typename(self)}({self.node_type}, {self.uri})'
+        return f'{bsfs.typename(self)}({self.node_type}, {self.uri})'
 
 ## EOF ##
diff --git a/test/base/test_extractor.py b/test/base/test_extractor.py
index be876ad..5410ae0 100644
--- a/test/base/test_extractor.py
+++ b/test/base/test_extractor.py
@@ -8,8 +8,7 @@ Author: Matthias Baumgartner, 2022
 import unittest
 
 # bsie imports
-from bsie.utils import ns
-from bsie.utils.bsfs import schema as _schema, URI
+from bsie.utils import bsfs, ns
 
 # objects to test
 from bsie.base import extractor
@@ -19,7 +18,7 @@ from bsie.base import extractor
 
 class StubExtractor(extractor.Extractor):
     def __init__(self):
-        super().__init__(_schema.Schema.from_string(extractor.SCHEMA_PREAMBLE + '''
+        super().__init__(bsfs.schema.Schema.from_string(extractor.SCHEMA_PREAMBLE + '''
             bse:author rdfs:subClassOf bsfs:Predicate ;
                 rdfs:domain bsfs:Entity ;
                 rdfs:range xsd:string ;
@@ -53,9 +52,9 @@ class TestExtractor(unittest.TestCase):
         self.assertNotEqual(hash(ext), hash(sub))
 
     def test_predicates(self):
-        schema = _schema.Schema.Empty()
+        schema = bsfs.schema.Schema.Empty()
         entity = schema.node(ns.bsfs.Node).get_child(ns.bsfs.Entity)
-        string = schema.literal(ns.bsfs.Literal).get_child(URI('http://www.w3.org/2001/XMLSchema#string'))
+        string = schema.literal(ns.bsfs.Literal).get_child(bsfs.URI('http://www.w3.org/2001/XMLSchema#string'))
         p_author = schema.predicate(ns.bsfs.Predicate).get_child(ns.bse.author, domain=entity, range=string)
         p_comment = schema.predicate(ns.bsfs.Predicate).get_child(ns.bse.comment, domain=entity, range=string)
         ext = StubExtractor()
diff --git a/test/base/test_reader.py b/test/base/test_reader.py
index 802b314..a907eb9 100644
--- a/test/base/test_reader.py
+++ b/test/base/test_reader.py
@@ -8,12 +8,12 @@ Author: Matthias Baumgartner, 2022
 import unittest
 
 # objects to test
-from bsie.base import reader
+from bsie import base
 
 
 ## code ##
 
-class StubReader(reader.Reader):
+class StubReader(base.Reader):
     def __call__(self, path):
         raise NotImplementedError()
 
diff --git a/test/extractor/generic/test_constant.py b/test/extractor/generic/test_constant.py
index 7f72ccf..9dbaced 100644
--- a/test/extractor/generic/test_constant.py
+++ b/test/extractor/generic/test_constant.py
@@ -8,8 +8,7 @@ Author: Matthias Baumgartner, 2022
 import unittest
 
 # bsie imports
-from bsie.utils import ns
-from bsie.utils.node import Node
+from bsie.utils import node as _node, ns
 
 # objects to test
 from bsie.extractor.generic.constant import Constant
@@ -34,7 +33,7 @@ class TestConstant(unittest.TestCase):
             (ns.bse.comment, 'the quick brown fox jumps over the lazy dog.'),
             ]
         ext = Constant(schema, tuples)
-        node = Node(ns.bsfs.Entity, '') # Blank node
+        node = _node.Node(ns.bsfs.Entity, '') # Blank node
         p_author = ext.schema.predicate(ns.bse.author)
         p_comment = ext.schema.predicate(ns.bse.comment)
         entity = ext.schema.node(ns.bsfs.Node).get_child(ns.bsfs.Entity)
diff --git a/test/extractor/generic/test_path.py b/test/extractor/generic/test_path.py
index aa21b04..d2b6c61 100644
--- a/test/extractor/generic/test_path.py
+++ b/test/extractor/generic/test_path.py
@@ -8,10 +8,8 @@ Author: Matthias Baumgartner, 2022
 import unittest
 
 # bsie imports
-from bsie import base
-from bsie.utils import ns
-from bsie.utils.bsfs import schema
-from bsie.utils.node import Node
+from bsie.base import extractor
+from bsie.utils import bsfs, node as _node, ns
 
 # objects to test
 from bsie.extractor.generic.path import Path
@@ -31,7 +29,7 @@ class TestPath(unittest.TestCase):
 
     def test_schema(self):
         self.assertEqual(Path().schema,
-            schema.Schema.from_string(base.extractor.SCHEMA_PREAMBLE + '''
+            bsfs.schema.Schema.from_string(extractor.SCHEMA_PREAMBLE + '''
                 bse:filename rdfs:subClassOf bsfs:Predicate ;
                     rdfs:domain bsfs:Entity ;
                     rdfs:range xsd:string ;
@@ -40,7 +38,7 @@ class TestPath(unittest.TestCase):
 
     def test_extract(self):
         ext = Path()
-        node = Node(ns.bsfs.Entity, '') # Blank node
+        node = _node.Node(ns.bsfs.File, '') # Blank node
         content = '/tmp/foo/bar'
         p_filename = ext.schema.predicate(ns.bse.filename)
         entity = ext.schema.node(ns.bsfs.Node).get_child(ns.bsfs.Entity)
diff --git a/test/extractor/generic/test_stat.py b/test/extractor/generic/test_stat.py
index bed5fab..6cfc57f 100644
--- a/test/extractor/generic/test_stat.py
+++ b/test/extractor/generic/test_stat.py
@@ -9,10 +9,8 @@ import os
 import unittest
 
 # bsie imports
-from bsie import base
-from bsie.utils import ns
-from bsie.utils.bsfs import schema
-from bsie.utils.node import Node
+from bsie.base import extractor
+from bsie.utils import bsfs, node as _node, ns
 
 # objects to test
 from bsie.extractor.generic.stat import Stat
@@ -32,7 +30,7 @@ class TestStat(unittest.TestCase):
 
     def test_schema(self):
         self.assertEqual(Stat().schema,
-            schema.Schema.from_string(base.extractor.SCHEMA_PREAMBLE + '''
+            bsfs.schema.Schema.from_string(extractor.SCHEMA_PREAMBLE + '''
                 bse:filesize rdfs:subClassOf bsfs:Predicate ;
                     rdfs:domain bsfs:Entity ;
                     rdfs:range xsd:integer ;
@@ -41,7 +39,7 @@ class TestStat(unittest.TestCase):
 
     def test_extract(self):
         ext = Stat()
-        node = Node(ns.bsfs.Entity, '') # Blank node
+        node = _node.Node(ns.bsfs.File, '') # Blank node
         content = os.stat(__file__)
         p_filesize = ext.schema.predicate(ns.bse.filesize)
         entity = ext.schema.node(ns.bsfs.Node).get_child(ns.bsfs.Entity)
-- 
cgit v1.2.3


From 3b7fee369924eb7704709edeb8c17fff9c020dfb Mon Sep 17 00:00:00 2001
From: Matthias Baumgartner <dev@igsor.net>
Date: Thu, 15 Dec 2022 17:06:09 +0100
Subject: import fixes

---
 bsie/base/extractor.py             |  5 +++--
 bsie/extractor/generic/constant.py |  9 +++++----
 bsie/extractor/generic/path.py     |  6 +++---
 bsie/extractor/generic/stat.py     |  6 +++---
 bsie/lib/bsie.py                   | 11 ++++++-----
 bsie/tools/builder.py              | 17 +++++++++--------
 bsie/tools/pipeline.py             |  6 +++---
 test/lib/test_bsie.py              |  6 +++---
 test/tools/test_builder.py         | 19 +++++++++----------
 test/tools/test_pipeline.py        |  9 ++++-----
 test/utils/test_node.py            | 17 ++++++++---------
 11 files changed, 56 insertions(+), 55 deletions(-)

diff --git a/bsie/base/extractor.py b/bsie/base/extractor.py
index bfa403c..a5c7846 100644
--- a/bsie/base/extractor.py
+++ b/bsie/base/extractor.py
@@ -11,6 +11,7 @@ import typing
 # bsie imports
 from bsie.utils import node
 from bsie.utils.bsfs import schema as _schema, typename
+from bsie.utils import bsfs, node, ns
 
 # exports
 __all__: typing.Sequence[str] = (
@@ -62,10 +63,10 @@ class Extractor(abc.ABC):
         self.schema = schema
 
     def __str__(self) -> str:
-        return typename(self)
+        return bsfs.typename(self)
 
     def __repr__(self) -> str:
-        return f'{typename(self)}()'
+        return f'{bsfs.typename(self)}()'
 
     def __eq__(self, other: typing.Any) -> bool:
         return isinstance(other, type(self)) \
diff --git a/bsie/extractor/generic/constant.py b/bsie/extractor/generic/constant.py
index 7da792a..f9e3415 100644
--- a/bsie/extractor/generic/constant.py
+++ b/bsie/extractor/generic/constant.py
@@ -11,6 +11,7 @@ import typing
 from bsie.base import extractor
 from bsie.utils.bsfs import URI, schema as _schema
 from bsie.utils.node import Node
+from bsie.utils import bsfs, node
 
 # exports
 __all__: typing.Sequence[str] = (
@@ -26,14 +27,14 @@ class Constant(extractor.Extractor):
     CONTENT_READER = None
 
     # predicate/value pairs to be produced.
-    _tuples: typing.Tuple[typing.Tuple[_schema.Predicate, typing.Any], ...]
+    _tuples: typing.Tuple[typing.Tuple[bsfs.schema.Predicate, typing.Any], ...]
 
     def __init__(
             self,
             schema: str,
-            tuples: typing.Iterable[typing.Tuple[URI, typing.Any]],
+            tuples: typing.Iterable[typing.Tuple[bsfs.URI, typing.Any]],
             ):
-        super().__init__(_schema.Schema.from_string(extractor.SCHEMA_PREAMBLE + schema))
+        super().__init__(bsfs.schema.Schema.from_string(extractor.SCHEMA_PREAMBLE + schema))
         # NOTE: Raises a KeyError if the predicate is not part of the schema
         self._tuples = tuple((self.schema.predicate(p_uri), value) for p_uri, value in tuples)
         # FIXME: use schema instance for value checking
@@ -47,7 +48,7 @@ class Constant(extractor.Extractor):
 
     def extract(
             self,
-            subject: Node,
+            subject: node.Node,
             content: None,
             predicates: typing.Iterable[_schema.Predicate],
             ) -> typing.Iterator[typing.Tuple[Node, _schema.Predicate, typing.Any]]:
diff --git a/bsie/extractor/generic/path.py b/bsie/extractor/generic/path.py
index e6b901e..2cc592a 100644
--- a/bsie/extractor/generic/path.py
+++ b/bsie/extractor/generic/path.py
@@ -10,8 +10,8 @@ import typing
 
 # bsie imports
 from bsie.base import extractor
-from bsie.utils import node, ns
 from bsie.utils.bsfs import schema
+from bsie.utils import bsfs, node, ns
 
 # exports
 __all__: typing.Sequence[str] = (
@@ -27,10 +27,10 @@ class Path(extractor.Extractor):
     CONTENT_READER = 'bsie.reader.path.Path'
 
     # mapping from predicate to handler function.
-    _callmap: typing.Dict[schema.Predicate, typing.Callable[[str], typing.Any]]
+    _callmap: typing.Dict[bsfs.schema.Predicate, typing.Callable[[str], typing.Any]]
 
     def __init__(self):
-        super().__init__(schema.Schema.from_string(extractor.SCHEMA_PREAMBLE + '''
+        super().__init__(bsfs.schema.Schema.from_string(extractor.SCHEMA_PREAMBLE + '''
             bse:filename rdfs:subClassOf bsfs:Predicate ;
                 rdfs:domain bsfs:Entity ;
                 rdfs:range xsd:string ;
diff --git a/bsie/extractor/generic/stat.py b/bsie/extractor/generic/stat.py
index 6493d37..dfde7d2 100644
--- a/bsie/extractor/generic/stat.py
+++ b/bsie/extractor/generic/stat.py
@@ -10,8 +10,8 @@ import typing
 
 # bsie imports
 from bsie.base import extractor
-from bsie.utils import node, ns
 from bsie.utils.bsfs import schema as _schema
+from bsie.utils import bsfs, node, ns
 
 # exports
 __all__: typing.Sequence[str] = (
@@ -27,10 +27,10 @@ class Stat(extractor.Extractor):
     CONTENT_READER = 'bsie.reader.stat.Stat'
 
     # mapping from predicate to handler function.
-    _callmap: typing.Dict[_schema.Predicate, typing.Callable[[os.stat_result], typing.Any]]
+    _callmap: typing.Dict[bsfs.schema.Predicate, typing.Callable[[os.stat_result], typing.Any]]
 
     def __init__(self):
-        super().__init__(_schema.Schema.from_string(extractor.SCHEMA_PREAMBLE + '''
+        super().__init__(bsfs.schema.Schema.from_string(extractor.SCHEMA_PREAMBLE + '''
             bse:filesize rdfs:subClassOf bsfs:Predicate ;
                 rdfs:domain bsfs:Entity ;
                 rdfs:range xsd:integer ;
diff --git a/bsie/lib/bsie.py b/bsie/lib/bsie.py
index aeccc8c..3aeee2b 100644
--- a/bsie/lib/bsie.py
+++ b/bsie/lib/bsie.py
@@ -9,8 +9,9 @@ import typing
 
 # bsie imports
 from bsie.tools.pipeline import Pipeline
-from bsie.utils import node, ns
 from bsie.utils.bsfs import URI, schema as schema_
+from bsie.tools import Pipeline
+from bsie.utils import bsfs, node, ns
 
 # exports
 __all__: typing.Sequence[str] = (
@@ -39,10 +40,10 @@ class BSIE():
             self,
             # pipeline builder.
             pipeline: Pipeline,
-            # predicates to extract at most. None implies all available w.r.t. extractors.
-            collect: typing.Optional[typing.Iterable[URI]] = None,
-            # predicates to discard.
-            discard: typing.Optional[typing.Iterable[URI]] = None,
+            # principals to extract at most. None implies all available w.r.t. extractors.
+            collect: typing.Optional[typing.Iterable[bsfs.URI]] = None,
+            # principals to discard.
+            discard: typing.Optional[typing.Iterable[bsfs.URI]] = None,
             ):
         # store pipeline
         self.pipeline = pipeline
diff --git a/bsie/tools/builder.py b/bsie/tools/builder.py
index 8f7a410..8c6b931 100644
--- a/bsie/tools/builder.py
+++ b/bsie/tools/builder.py
@@ -13,6 +13,7 @@ import typing
 from bsie import base
 from bsie.base import errors
 from bsie.utils.bsfs import URI, typename
+from bsie.utils import bsfs
 
 # inner-module imports
 from . import pipeline
@@ -61,7 +62,7 @@ def _unpack_name(name):
 
 
 class ReaderBuilder():
-    """Build `bsie.base.reader.Reader` instances.
+    """Build `bsie.base.Reader` instances.
 
     Readers are defined via their qualified class name
     (e.g., bsie.reader.path.Path) and optional keyword
@@ -83,7 +84,7 @@ class ReaderBuilder():
         self.kwargs = kwargs
         self.cache = {}
 
-    def build(self, name: str) -> base.reader.Reader:
+    def build(self, name: str) -> base.Reader:
         """Return an instance for the qualified class name."""
         # return cached instance
         if name in self.cache:
@@ -98,7 +99,7 @@ class ReaderBuilder():
         # get kwargs
         kwargs = self.kwargs.get(name, {})
         if not isinstance(kwargs, dict):
-            raise TypeError(f'expected a kwargs dict, found {typename(kwargs)}')
+            raise TypeError(f'expected a kwargs dict, found {bsfs.typename(kwargs)}')
 
         try: # build, cache, and return instance
             obj = cls(**kwargs)
@@ -108,11 +109,11 @@ class ReaderBuilder():
             return obj
 
         except Exception as err:
-            raise errors.BuilderError(f'failed to build reader {name} due to {typename(err)}: {err}') from err
+            raise errors.BuilderError(f'failed to build reader {name} due to {bsfs.typename(err)}: {err}') from err
 
 
 class ExtractorBuilder():
-    """Build `bsie.base.extractor.Extractor instances.
+    """Build `bsie.base.Extractor instances.
 
     It is permissible to build multiple instances of the same extractor
     (typically with different arguments), hence the ExtractorBuilder
@@ -133,14 +134,14 @@ class ExtractorBuilder():
         """Iterate over extractor specifications."""
         return iter(range(len(self.specs)))
 
-    def build(self, index: int) -> base.extractor.Extractor:
+    def build(self, index: int) -> base.Extractor:
         """Return an instance of the n'th extractor (n=*index*)."""
         # get build instructions
         specs = self.specs[index]
 
         # check specs structure. expecting[{name: {kwargs}}]
         if not isinstance(specs, dict):
-            raise TypeError(f'expected a dict, found {typename(specs)}')
+            raise TypeError(f'expected a dict, found {bsfs.typename(specs)}')
         if len(specs) != 1:
             raise TypeError(f'expected a dict of length one, found {len(specs)}')
 
@@ -150,7 +151,7 @@ class ExtractorBuilder():
 
         # check kwargs structure
         if not isinstance(kwargs, dict):
-            raise TypeError(f'expected a dict, found {typename(kwargs)}')
+            raise TypeError(f'expected a dict, found {bsfs.typename(kwargs)}')
 
         # check name and get module/class components
         module_name, class_name = _unpack_name(name)
diff --git a/bsie/tools/pipeline.py b/bsie/tools/pipeline.py
index da422c0..7fdd935 100644
--- a/bsie/tools/pipeline.py
+++ b/bsie/tools/pipeline.py
@@ -11,9 +11,9 @@ import typing
 
 # bsie imports
 from bsie import base
-from bsie.utils import ns
 from bsie.utils.node import Node
 from bsie.utils.bsfs import schema as _schema, URI, uuid as _uuid, typename
+from bsie.utils import bsfs, node, ns
 
 # exports
 __all__: typing.Sequence[str] = (
@@ -56,10 +56,10 @@ class Pipeline():
         self.schema = _schema.Schema.Union(ext.schema for ext in ext2rdr)
 
     def __str__(self) -> str:
-        return typename(self)
+        return bsfs.typename(self)
 
     def __repr__(self) -> str:
-        return f'{typename(self)}(...)'
+        return f'{bsfs.typename(self)}(...)'
 
     def __hash__(self) -> int:
         return hash((type(self), self._prefix, self.schema, tuple(self._ext2rdr), tuple(self._ext2rdr.values())))
diff --git a/test/lib/test_bsie.py b/test/lib/test_bsie.py
index 277ac67..5b71752 100644
--- a/test/lib/test_bsie.py
+++ b/test/lib/test_bsie.py
@@ -9,10 +9,11 @@ import os
 import unittest
 
 # bsie imports
+from bsie.base import extractor
 from bsie.tools import builder
-from bsie.utils import ns
 from bsie.utils.bsfs import URI, schema
 from bsie.utils.node import Node
+from bsie.utils import bsfs, node, ns
 
 # objects to test
 from bsie.lib.bsie import BSIE
@@ -76,7 +77,6 @@ class TestBSIE(unittest.TestCase):
                 rdfs:domain bsfs:Entity ;
                 rdfs:range xsd:string ;
                 bsfs:unique "true"^^xsd:boolean .
-
             '''))
 
         # specify collect
@@ -207,7 +207,7 @@ class TestBSIE(unittest.TestCase):
             ns.bse.author,
             })
         content_hash = 'a948904f2f0f479b8f8197694b30184b0d2ed1c1cd2a1ec0fb85d299a192a447'
-        subject = Node(ns.bsfs.Entity, self.prefix + content_hash)
+        subject = node.Node(ns.bsfs.File, self.prefix + 'file#' + content_hash)
         testfile = os.path.join(os.path.dirname(__file__), 'testfile.t')
 
         # from_file extracts all available triples
diff --git a/test/tools/test_builder.py b/test/tools/test_builder.py
index bc6f903..62c637c 100644
--- a/test/tools/test_builder.py
+++ b/test/tools/test_builder.py
@@ -10,8 +10,7 @@ import unittest
 
 # bsie imports
 from bsie import base
-from bsie.base import errors
-from bsie.utils.bsfs import URI
+from bsie.utils import bsfs
 
 # objects to test
 from bsie.tools.builder import ExtractorBuilder
@@ -26,12 +25,12 @@ from bsie.tools.builder import _unpack_name
 class TestUtils(unittest.TestCase):
     def test_safe_load(self):
         # invalid module
-        self.assertRaises(errors.LoaderError, _safe_load, 'dBGHMSAYOoKeKMpywDoKZQycENFPvN', 'foobar')
-        self.assertRaises(errors.LoaderError, _safe_load, 'dBGHMSAYOoKeKMpywDoKZQycENFPvN.bar', 'foobar')
+        self.assertRaises(base.errors.LoaderError, _safe_load, 'dBGHMSAYOoKeKMpywDoKZQycENFPvN', 'foobar')
+        self.assertRaises(base.errors.LoaderError, _safe_load, 'dBGHMSAYOoKeKMpywDoKZQycENFPvN.bar', 'foobar')
         # partially valid module
-        self.assertRaises(errors.LoaderError, _safe_load, 'os.foo', 'foobar')
+        self.assertRaises(base.errors.LoaderError, _safe_load, 'os.foo', 'foobar')
         # invalid class
-        self.assertRaises(errors.LoaderError, _safe_load, 'os.path', 'foo')
+        self.assertRaises(base.errors.LoaderError, _safe_load, 'os.path', 'foo')
         # valid module and class
         cls = _safe_load('collections.abc', 'Container')
         import collections.abc
@@ -65,10 +64,10 @@ class TestReaderBuilder(unittest.TestCase):
         self.assertRaises(TypeError, builder.build, None)
         self.assertRaises(ValueError, builder.build, '')
         self.assertRaises(ValueError, builder.build, 'Path')
-        self.assertRaises(errors.BuilderError, builder.build, 'path.Path')
+        self.assertRaises(base.errors.BuilderError, builder.build, 'path.Path')
         # invalid config
         builder = ReaderBuilder({'bsie.reader.stat.Stat': dict(foo=123)})
-        self.assertRaises(errors.BuilderError, builder.build, 'bsie.reader.stat.Stat')
+        self.assertRaises(base.errors.BuilderError, builder.build, 'bsie.reader.stat.Stat')
         builder = ReaderBuilder({'bsie.reader.stat.Stat': 123})
         self.assertRaises(TypeError, builder.build, 'bsie.reader.stat.Stat')
         # no instructions
@@ -143,7 +142,7 @@ class TestExtractorBuilder(unittest.TestCase):
             ]))
 
         # building with invalid args
-        self.assertRaises(errors.BuilderError, ExtractorBuilder(
+        self.assertRaises(base.errors.BuilderError, ExtractorBuilder(
             [{'bsie.extractor.generic.path.Path': {'foo': 123}}]).build, 0)
         # non-dict build specification
         self.assertRaises(TypeError, ExtractorBuilder(
@@ -161,7 +160,7 @@ class TestExtractorBuilder(unittest.TestCase):
 
 class TestPipelineBuilder(unittest.TestCase):
     def test_build(self):
-        prefix = URI('http://example.com/local/file#')
+        prefix = bsfs.URI('http://example.com/local/file#')
         c_schema = '''
             bse:author rdfs:subClassOf bsfs:Predicate ;
                 rdfs:domain bsfs:Entity ;
diff --git a/test/tools/test_pipeline.py b/test/tools/test_pipeline.py
index 0dd8c75..92801ed 100644
--- a/test/tools/test_pipeline.py
+++ b/test/tools/test_pipeline.py
@@ -11,9 +11,8 @@ import unittest
 
 # bsie imports
 from bsie.base import errors
-from bsie.utils import ns
 from bsie.utils.bsfs import URI
-from bsie.utils.node import Node
+from bsie.utils import bsfs, node, ns
 import bsie.extractor.generic.constant
 import bsie.extractor.generic.path
 import bsie.extractor.generic.stat
@@ -68,8 +67,8 @@ class TestPipeline(unittest.TestCase):
         self.assertEqual(hash(pipeline), hash(Pipeline(self.prefix, self.ext2rdr)))
 
         # equivalence respects prefix
-        self.assertNotEqual(pipeline, Pipeline(URI('http://example.com/global/ent#'), self.ext2rdr))
-        self.assertNotEqual(hash(pipeline), hash(Pipeline(URI('http://example.com/global/ent#'), self.ext2rdr)))
+        self.assertNotEqual(pipeline, Pipeline(bsfs.URI('http://example.com/global/ent#'), self.ext2rdr))
+        self.assertNotEqual(hash(pipeline), hash(Pipeline(bsfs.URI('http://example.com/global/ent#'), self.ext2rdr)))
         # equivalence respects extractors/readers
         ext2rdr = {ext: rdr for idx, (ext, rdr) in enumerate(self.ext2rdr.items()) if idx % 2 == 0}
         self.assertNotEqual(pipeline, Pipeline(self.prefix, ext2rdr))
@@ -96,7 +95,7 @@ class TestPipeline(unittest.TestCase):
         pipeline = Pipeline(self.prefix, self.ext2rdr)
         # build objects for tests
         content_hash = 'a948904f2f0f479b8f8197694b30184b0d2ed1c1cd2a1ec0fb85d299a192a447'
-        subject = Node(ns.bsfs.Entity, self.prefix + content_hash)
+        subject = node.Node(ns.bsfs.File, self.prefix + 'file#' + content_hash)
         testfile = os.path.join(os.path.dirname(__file__), 'testfile.t')
         p_filename = pipeline.schema.predicate(ns.bse.filename)
         p_filesize = pipeline.schema.predicate(ns.bse.filesize)
diff --git a/test/utils/test_node.py b/test/utils/test_node.py
index 826f199..c70f0b8 100644
--- a/test/utils/test_node.py
+++ b/test/utils/test_node.py
@@ -8,8 +8,7 @@ Author: Matthias Baumgartner, 2022
 import unittest
 
 # bsie imports
-from bsie.utils.bsfs import URI
-from bsie.utils import ns
+from bsie.utils import bsfs, ns
 
 # objects to test
 from bsie.utils.node import Node
@@ -19,14 +18,14 @@ from bsie.utils.node import Node
 
 class TestNode(unittest.TestCase):
     def test_equality(self):
-        uri = URI('http://example.com/me/entity#1234')
+        uri = bsfs.URI('http://example.com/me/entity#1234')
         node = Node(ns.bsfs.Entity, uri)
         # basic equivalence
-        self.assertEqual(node, Node(ns.bsfs.Entity, URI('http://example.com/me/entity#1234')))
-        self.assertEqual(hash(node), hash(Node(ns.bsfs.Entity, URI('http://example.com/me/entity#1234'))))
+        self.assertEqual(node, Node(ns.bsfs.Entity, bsfs.URI('http://example.com/me/entity#1234')))
+        self.assertEqual(hash(node), hash(Node(ns.bsfs.Entity, bsfs.URI('http://example.com/me/entity#1234'))))
         # equality respects uri
-        self.assertNotEqual(node, Node(ns.bsfs.Entity, URI('http://example.com/me/entity#4321')))
-        self.assertNotEqual(hash(node), hash(Node(ns.bsfs.Entity, URI('http://example.com/me/entity#4321'))))
+        self.assertNotEqual(node, Node(ns.bsfs.Entity, bsfs.URI('http://example.com/me/entity#4321')))
+        self.assertNotEqual(hash(node), hash(Node(ns.bsfs.Entity, bsfs.URI('http://example.com/me/entity#4321'))))
         # equality respects node_type
         self.assertNotEqual(node, Node(ns.bsfs.Foo, uri))
         self.assertNotEqual(hash(node), hash(Node(ns.bsfs.Foo, uri)))
@@ -42,7 +41,7 @@ class TestNode(unittest.TestCase):
         self.assertNotEqual(hash(node), hash(Foo()))
 
     def test_str(self):
-        uri = URI('http://example.com/me/entity#1234')
+        uri = bsfs.URI('http://example.com/me/entity#1234')
         # basic string conversion
         node = Node(ns.bsfs.Entity, uri)
         self.assertEqual(str(node), 'Node(http://bsfs.ai/schema/Entity, http://example.com/me/entity#1234)')
@@ -52,7 +51,7 @@ class TestNode(unittest.TestCase):
         self.assertEqual(str(node), 'Node(http://bsfs.ai/schema/Foo, http://example.com/me/entity#1234)')
         self.assertEqual(repr(node), 'Node(http://bsfs.ai/schema/Foo, http://example.com/me/entity#1234)')
         # string conversion respects uri
-        node = Node(ns.bsfs.Entity, URI('http://example.com/me/entity#4321'))
+        node = Node(ns.bsfs.Entity, bsfs.URI('http://example.com/me/entity#4321'))
         self.assertEqual(str(node), 'Node(http://bsfs.ai/schema/Entity, http://example.com/me/entity#4321)')
         self.assertEqual(repr(node), 'Node(http://bsfs.ai/schema/Entity, http://example.com/me/entity#4321)')
 
-- 
cgit v1.2.3


From 8e6d27ea75d2c8d68f6dd8b3d529aaa278f291cc Mon Sep 17 00:00:00 2001
From: Matthias Baumgartner <dev@igsor.net>
Date: Thu, 15 Dec 2022 17:12:56 +0100
Subject: file node class in default schema

---
 bsie/base/extractor.py              |  17 +++---
 bsie/extractor/generic/path.py      |   2 +-
 bsie/extractor/generic/stat.py      |   2 +-
 bsie/tools/pipeline.py              |   4 +-
 test/apps/test_index.py             | 106 ++++++++++++++++++------------------
 test/extractor/generic/test_path.py |   2 +-
 test/extractor/generic/test_stat.py |   2 +-
 test/lib/test_bsie.py               |  12 ++--
 test/tools/test_pipeline.py         |   4 +-
 9 files changed, 75 insertions(+), 76 deletions(-)

diff --git a/bsie/base/extractor.py b/bsie/base/extractor.py
index a5c7846..678dcec 100644
--- a/bsie/base/extractor.py
+++ b/bsie/base/extractor.py
@@ -35,6 +35,7 @@ SCHEMA_PREAMBLE = '''
 
     # essential nodes
     bsfs:Entity rdfs:subClassOf bsfs:Node .
+    bsfs:File rdfs:subClassOf bsfs:Entity .
 
     # common definitions
     xsd:string rdfs:subClassOf bsfs:Literal .
@@ -77,15 +78,13 @@ class Extractor(abc.ABC):
         return hash((type(self), self.CONTENT_READER, self.schema))
 
     def predicates(self) -> typing.Iterator[_schema.Predicate]:
-        """Return the predicates that may be part of extracted triples."""
-        # NOTE: Some predicates in the schema might not occur in actual triples,
-        # but are defined due to predicate class hierarchy. E.g., bsfs:Predicate
-        # is part of every schema but should not be used in triples.
-        # Announcing all predicates might not be the most efficient way, however,
-        # it is the most safe one. Concrete extractors that produce additional
-        # predicates (e.g. auxiliary nodes with their own predicates) should
-        # overwrite this method to only include the principal predicates.
-        return self.schema.predicates()
+        ent = self.schema.node(ns.bsfs.Entity)
+        return (
+            pred
+            for pred
+            in self.schema.predicates()
+            if pred.domain <= ent or (pred.range is not None and pred.range <= ent)
+            )
 
     @abc.abstractmethod
     def extract(
diff --git a/bsie/extractor/generic/path.py b/bsie/extractor/generic/path.py
index 2cc592a..00165e3 100644
--- a/bsie/extractor/generic/path.py
+++ b/bsie/extractor/generic/path.py
@@ -32,7 +32,7 @@ class Path(extractor.Extractor):
     def __init__(self):
         super().__init__(bsfs.schema.Schema.from_string(extractor.SCHEMA_PREAMBLE + '''
             bse:filename rdfs:subClassOf bsfs:Predicate ;
-                rdfs:domain bsfs:Entity ;
+                rdfs:domain bsfs:File ;
                 rdfs:range xsd:string ;
                 rdfs:label "File name"^^xsd:string ;
                 schema:description "Filename of entity in some filesystem."^^xsd:string ;
diff --git a/bsie/extractor/generic/stat.py b/bsie/extractor/generic/stat.py
index dfde7d2..0f4267f 100644
--- a/bsie/extractor/generic/stat.py
+++ b/bsie/extractor/generic/stat.py
@@ -32,7 +32,7 @@ class Stat(extractor.Extractor):
     def __init__(self):
         super().__init__(bsfs.schema.Schema.from_string(extractor.SCHEMA_PREAMBLE + '''
             bse:filesize rdfs:subClassOf bsfs:Predicate ;
-                rdfs:domain bsfs:Entity ;
+                rdfs:domain bsfs:File ;
                 rdfs:range xsd:integer ;
                 rdfs:label "File size"^^xsd:string ;
                 schema:description "File size of entity in some filesystem."^^xsd:string ;
diff --git a/bsie/tools/pipeline.py b/bsie/tools/pipeline.py
index 7fdd935..3d08993 100644
--- a/bsie/tools/pipeline.py
+++ b/bsie/tools/pipeline.py
@@ -97,8 +97,8 @@ class Pipeline():
             rdr2ext[rdr].add(ext)
 
         # create subject for file
-        uuid = _uuid.UCID.from_path(path)
-        subject = Node(ns.bsfs.Entity, self._prefix + uuid)
+        uuid = bsfs.uuid.UCID.from_path(path)
+        subject = node.Node(ns.bsfs.File, self._prefix + 'file#' + uuid)
 
         # extract information
         for rdr, extrs in rdr2ext.items():
diff --git a/test/apps/test_index.py b/test/apps/test_index.py
index 6d47df8..c567dea 100644
--- a/test/apps/test_index.py
+++ b/test/apps/test_index.py
@@ -31,47 +31,47 @@ class TestIndex(unittest.TestCase):
 
         prefix = 'http://example.com/me/file#'
         self.assertTrue(set(bsfs._Graph__backend.graph).issuperset({
-            (rdflib.URIRef(prefix + '2f4109b40107cc50e0884755a1a961ed126887e49b8dbaf0e146b2e226aa6647'), rdflib.RDF.type, rdflib.URIRef(ns.bsfs.Entity)),
+            (rdflib.URIRef(prefix + '2f4109b40107cc50e0884755a1a961ed126887e49b8dbaf0e146b2e226aa6647'), rdflib.RDF.type, rdflib.URIRef(ns.bsfs.File)),
             (rdflib.URIRef(prefix + '2f4109b40107cc50e0884755a1a961ed126887e49b8dbaf0e146b2e226aa6647'), rdflib.URIRef(ns.bse.author), rdflib.Literal('Me, myself, and I', datatype=rdflib.XSD.string)),
             (rdflib.URIRef(prefix + '2f4109b40107cc50e0884755a1a961ed126887e49b8dbaf0e146b2e226aa6647'), rdflib.URIRef(ns.bse.filename), rdflib.Literal('alpha_second', datatype=rdflib.XSD.string)),
             (rdflib.URIRef(prefix + '2f4109b40107cc50e0884755a1a961ed126887e49b8dbaf0e146b2e226aa6647'), rdflib.URIRef(ns.bse.filesize), rdflib.Literal('696', datatype=rdflib.XSD.integer)),
-            (rdflib.URIRef(prefix + '441f3d10c8ff489fe8e33e639606512f6c463151cc429de7e554b9af670c2ece'), rdflib.RDF.type, rdflib.URIRef(ns.bsfs.Entity)),
+            (rdflib.URIRef(prefix + '441f3d10c8ff489fe8e33e639606512f6c463151cc429de7e554b9af670c2ece'), rdflib.RDF.type, rdflib.URIRef(ns.bsfs.File)),
             (rdflib.URIRef(prefix + '441f3d10c8ff489fe8e33e639606512f6c463151cc429de7e554b9af670c2ece'), rdflib.URIRef(ns.bse.author), rdflib.Literal('Me, myself, and I', datatype=rdflib.XSD.string)),
             (rdflib.URIRef(prefix + '441f3d10c8ff489fe8e33e639606512f6c463151cc429de7e554b9af670c2ece'), rdflib.URIRef(ns.bse.filename), rdflib.Literal('omega_second', datatype=rdflib.XSD.string)),
             (rdflib.URIRef(prefix + '441f3d10c8ff489fe8e33e639606512f6c463151cc429de7e554b9af670c2ece'), rdflib.URIRef(ns.bse.filesize), rdflib.Literal('503', datatype=rdflib.XSD.integer)),
-            (rdflib.URIRef(prefix + '69b98ecf7aff3e95b09688ba93331678eb8397817111f674c9558e6dd8f5e871'), rdflib.RDF.type, rdflib.URIRef(ns.bsfs.Entity)),
+            (rdflib.URIRef(prefix + '69b98ecf7aff3e95b09688ba93331678eb8397817111f674c9558e6dd8f5e871'), rdflib.RDF.type, rdflib.URIRef(ns.bsfs.File)),
             (rdflib.URIRef(prefix + '69b98ecf7aff3e95b09688ba93331678eb8397817111f674c9558e6dd8f5e871'), rdflib.URIRef(ns.bse.author), rdflib.Literal('Me, myself, and I', datatype=rdflib.XSD.string)),
             (rdflib.URIRef(prefix + '69b98ecf7aff3e95b09688ba93331678eb8397817111f674c9558e6dd8f5e871'), rdflib.URIRef(ns.bse.filename), rdflib.Literal('td_first', datatype=rdflib.XSD.string)),
             (rdflib.URIRef(prefix + '69b98ecf7aff3e95b09688ba93331678eb8397817111f674c9558e6dd8f5e871'), rdflib.URIRef(ns.bse.filesize), rdflib.Literal('911', datatype=rdflib.XSD.integer)),
-            (rdflib.URIRef(prefix + '78f7eb7f0d8221cdb2cb26c978fa42a11f75eb87becc768f4474134cb1e06926'), rdflib.RDF.type, rdflib.URIRef(ns.bsfs.Entity)),
+            (rdflib.URIRef(prefix + '78f7eb7f0d8221cdb2cb26c978fa42a11f75eb87becc768f4474134cb1e06926'), rdflib.RDF.type, rdflib.URIRef(ns.bsfs.File)),
             (rdflib.URIRef(prefix + '78f7eb7f0d8221cdb2cb26c978fa42a11f75eb87becc768f4474134cb1e06926'), rdflib.URIRef(ns.bse.author), rdflib.Literal('Me, myself, and I', datatype=rdflib.XSD.string)),
             (rdflib.URIRef(prefix + '78f7eb7f0d8221cdb2cb26c978fa42a11f75eb87becc768f4474134cb1e06926'), rdflib.URIRef(ns.bse.filename), rdflib.Literal('testfile', datatype=rdflib.XSD.string)),
             (rdflib.URIRef(prefix + '78f7eb7f0d8221cdb2cb26c978fa42a11f75eb87becc768f4474134cb1e06926'), rdflib.URIRef(ns.bse.filesize), rdflib.Literal('885', datatype=rdflib.XSD.integer)),
-            (rdflib.URIRef(prefix + '80818b8ec2ee1919116dba9c8a7e0a4608313cf3b463cd88e9ed77a700dd92d3'), rdflib.RDF.type, rdflib.URIRef(ns.bsfs.Entity)),
+            (rdflib.URIRef(prefix + '80818b8ec2ee1919116dba9c8a7e0a4608313cf3b463cd88e9ed77a700dd92d3'), rdflib.RDF.type, rdflib.URIRef(ns.bsfs.File)),
             (rdflib.URIRef(prefix + '80818b8ec2ee1919116dba9c8a7e0a4608313cf3b463cd88e9ed77a700dd92d3'), rdflib.URIRef(ns.bse.author), rdflib.Literal('Me, myself, and I', datatype=rdflib.XSD.string)),
             (rdflib.URIRef(prefix + '80818b8ec2ee1919116dba9c8a7e0a4608313cf3b463cd88e9ed77a700dd92d3'), rdflib.URIRef(ns.bse.filename), rdflib.Literal('bar_first', datatype=rdflib.XSD.string)),
             (rdflib.URIRef(prefix + '80818b8ec2ee1919116dba9c8a7e0a4608313cf3b463cd88e9ed77a700dd92d3'), rdflib.URIRef(ns.bse.filesize), rdflib.Literal('956', datatype=rdflib.XSD.integer)),
-            (rdflib.URIRef(prefix + '976d2ea0e58488678cc7e435fbfadabfb6eb6cf50ad51862f38f73729ed11795'), rdflib.RDF.type, rdflib.URIRef(ns.bsfs.Entity)),
+            (rdflib.URIRef(prefix + '976d2ea0e58488678cc7e435fbfadabfb6eb6cf50ad51862f38f73729ed11795'), rdflib.RDF.type, rdflib.URIRef(ns.bsfs.File)),
             (rdflib.URIRef(prefix + '976d2ea0e58488678cc7e435fbfadabfb6eb6cf50ad51862f38f73729ed11795'), rdflib.URIRef(ns.bse.author), rdflib.Literal('Me, myself, and I', datatype=rdflib.XSD.string)),
             (rdflib.URIRef(prefix + '976d2ea0e58488678cc7e435fbfadabfb6eb6cf50ad51862f38f73729ed11795'), rdflib.URIRef(ns.bse.filename), rdflib.Literal('omega_first', datatype=rdflib.XSD.string)),
             (rdflib.URIRef(prefix + '976d2ea0e58488678cc7e435fbfadabfb6eb6cf50ad51862f38f73729ed11795'), rdflib.URIRef(ns.bse.filesize), rdflib.Literal('648', datatype=rdflib.XSD.integer)),
-            (rdflib.URIRef(prefix + '997e2fbb7494a3818ec782d2bc87bf1cffafba6b9c0f658e4a6c18a723e944d3'), rdflib.RDF.type, rdflib.URIRef(ns.bsfs.Entity)),
+            (rdflib.URIRef(prefix + '997e2fbb7494a3818ec782d2bc87bf1cffafba6b9c0f658e4a6c18a723e944d3'), rdflib.RDF.type, rdflib.URIRef(ns.bsfs.File)),
             (rdflib.URIRef(prefix + '997e2fbb7494a3818ec782d2bc87bf1cffafba6b9c0f658e4a6c18a723e944d3'), rdflib.URIRef(ns.bse.author), rdflib.Literal('Me, myself, and I', datatype=rdflib.XSD.string)),
             (rdflib.URIRef(prefix + '997e2fbb7494a3818ec782d2bc87bf1cffafba6b9c0f658e4a6c18a723e944d3'), rdflib.URIRef(ns.bse.filename), rdflib.Literal('alpha_first', datatype=rdflib.XSD.string)),
             (rdflib.URIRef(prefix + '997e2fbb7494a3818ec782d2bc87bf1cffafba6b9c0f658e4a6c18a723e944d3'), rdflib.URIRef(ns.bse.filesize), rdflib.Literal('754', datatype=rdflib.XSD.integer)),
-            (rdflib.URIRef(prefix + 'a8af899ecdab60dfaea8ec7f934053624c80a1054539e163f2c7eaa986c2777d'), rdflib.RDF.type, rdflib.URIRef(ns.bsfs.Entity)),
+            (rdflib.URIRef(prefix + 'a8af899ecdab60dfaea8ec7f934053624c80a1054539e163f2c7eaa986c2777d'), rdflib.RDF.type, rdflib.URIRef(ns.bsfs.File)),
             (rdflib.URIRef(prefix + 'a8af899ecdab60dfaea8ec7f934053624c80a1054539e163f2c7eaa986c2777d'), rdflib.URIRef(ns.bse.author), rdflib.Literal('Me, myself, and I', datatype=rdflib.XSD.string)),
             (rdflib.URIRef(prefix + 'a8af899ecdab60dfaea8ec7f934053624c80a1054539e163f2c7eaa986c2777d'), rdflib.URIRef(ns.bse.filename), rdflib.Literal('foo_second', datatype=rdflib.XSD.string)),
             (rdflib.URIRef(prefix + 'a8af899ecdab60dfaea8ec7f934053624c80a1054539e163f2c7eaa986c2777d'), rdflib.URIRef(ns.bse.filesize), rdflib.Literal('585', datatype=rdflib.XSD.integer)),
-            (rdflib.URIRef(prefix + 'b8fd7fba818254166a6043195004138ebda6923e012442f819a2c49671136c70'), rdflib.RDF.type, rdflib.URIRef(ns.bsfs.Entity)),
+            (rdflib.URIRef(prefix + 'b8fd7fba818254166a6043195004138ebda6923e012442f819a2c49671136c70'), rdflib.RDF.type, rdflib.URIRef(ns.bsfs.File)),
             (rdflib.URIRef(prefix + 'b8fd7fba818254166a6043195004138ebda6923e012442f819a2c49671136c70'), rdflib.URIRef(ns.bse.author), rdflib.Literal('Me, myself, and I', datatype=rdflib.XSD.string)),
             (rdflib.URIRef(prefix + 'b8fd7fba818254166a6043195004138ebda6923e012442f819a2c49671136c70'), rdflib.URIRef(ns.bse.filename), rdflib.Literal('bar_second', datatype=rdflib.XSD.string)),
             (rdflib.URIRef(prefix + 'b8fd7fba818254166a6043195004138ebda6923e012442f819a2c49671136c70'), rdflib.URIRef(ns.bse.filesize), rdflib.Literal('636', datatype=rdflib.XSD.integer)),
-            (rdflib.URIRef(prefix + 'd43758ace82154a1cc10ca0dfef63cb20dd831f9c87edd6dc06539eefe67371d'), rdflib.RDF.type, rdflib.URIRef(ns.bsfs.Entity)),
+            (rdflib.URIRef(prefix + 'd43758ace82154a1cc10ca0dfef63cb20dd831f9c87edd6dc06539eefe67371d'), rdflib.RDF.type, rdflib.URIRef(ns.bsfs.File)),
             (rdflib.URIRef(prefix + 'd43758ace82154a1cc10ca0dfef63cb20dd831f9c87edd6dc06539eefe67371d'), rdflib.URIRef(ns.bse.author), rdflib.Literal('Me, myself, and I', datatype=rdflib.XSD.string)),
             (rdflib.URIRef(prefix + 'd43758ace82154a1cc10ca0dfef63cb20dd831f9c87edd6dc06539eefe67371d'), rdflib.URIRef(ns.bse.filename), rdflib.Literal('foo_first', datatype=rdflib.XSD.string)),
             (rdflib.URIRef(prefix + 'd43758ace82154a1cc10ca0dfef63cb20dd831f9c87edd6dc06539eefe67371d'), rdflib.URIRef(ns.bse.filesize), rdflib.Literal('546', datatype=rdflib.XSD.integer)),
-            (rdflib.URIRef(prefix + 'd803187cbf3676ae9d38126270a6152c60431589aa3bb3824baf8954e9c097f1'), rdflib.RDF.type, rdflib.URIRef(ns.bsfs.Entity)),
+            (rdflib.URIRef(prefix + 'd803187cbf3676ae9d38126270a6152c60431589aa3bb3824baf8954e9c097f1'), rdflib.RDF.type, rdflib.URIRef(ns.bsfs.File)),
             (rdflib.URIRef(prefix + 'd803187cbf3676ae9d38126270a6152c60431589aa3bb3824baf8954e9c097f1'), rdflib.URIRef(ns.bse.author), rdflib.Literal('Me, myself, and I', datatype=rdflib.XSD.string)),
             (rdflib.URIRef(prefix + 'd803187cbf3676ae9d38126270a6152c60431589aa3bb3824baf8954e9c097f1'), rdflib.URIRef(ns.bse.filename), rdflib.Literal('td_second', datatype=rdflib.XSD.string)),
             (rdflib.URIRef(prefix + 'd803187cbf3676ae9d38126270a6152c60431589aa3bb3824baf8954e9c097f1'), rdflib.URIRef(ns.bse.filesize), rdflib.Literal('703', datatype=rdflib.XSD.integer)),
@@ -105,49 +105,49 @@ class TestIndex(unittest.TestCase):
             })
 
     def test_print(self):
-        stdout, sys.stdout = sys.stdout, io.StringIO()
-        bsfs = main([
-            '--print',
-            '-r',
-            '--user', 'http://example.com/me',
-            os.path.join(os.path.dirname(__file__), 'testdir'),
-            os.path.join(os.path.dirname(__file__), 'testfile'),
-            ])
-        outbuf, sys.stdout = sys.stdout, stdout
+        outbuf = io.StringIO()
+        with contextlib.redirect_stdout(outbuf):
+            bsfs = main([
+                '--print',
+                '-r',
+                '--user', 'http://example.com/me',
+                os.path.join(os.path.dirname(__file__), 'testdir'),
+                os.path.join(os.path.dirname(__file__), 'testfile'),
+                ])
         self.assertSetEqual(set(outbuf.getvalue().split('\n')) - {''}, {
-            f'Node(http://bsfs.ai/schema/Entity, http://example.com/me/file#2f4109b40107cc50e0884755a1a961ed126887e49b8dbaf0e146b2e226aa6647) Predicate({ns.bse.author}) Me, myself, and I',
-            f'Node(http://bsfs.ai/schema/Entity, http://example.com/me/file#2f4109b40107cc50e0884755a1a961ed126887e49b8dbaf0e146b2e226aa6647) Predicate({ns.bse.filename}) alpha_second',
-            f'Node(http://bsfs.ai/schema/Entity, http://example.com/me/file#2f4109b40107cc50e0884755a1a961ed126887e49b8dbaf0e146b2e226aa6647) Predicate({ns.bse.filesize}) 696',
-            f'Node(http://bsfs.ai/schema/Entity, http://example.com/me/file#441f3d10c8ff489fe8e33e639606512f6c463151cc429de7e554b9af670c2ece) Predicate({ns.bse.author}) Me, myself, and I',
-            f'Node(http://bsfs.ai/schema/Entity, http://example.com/me/file#441f3d10c8ff489fe8e33e639606512f6c463151cc429de7e554b9af670c2ece) Predicate({ns.bse.filename}) omega_second',
-            f'Node(http://bsfs.ai/schema/Entity, http://example.com/me/file#441f3d10c8ff489fe8e33e639606512f6c463151cc429de7e554b9af670c2ece) Predicate({ns.bse.filesize}) 503',
-            f'Node(http://bsfs.ai/schema/Entity, http://example.com/me/file#69b98ecf7aff3e95b09688ba93331678eb8397817111f674c9558e6dd8f5e871) Predicate({ns.bse.author}) Me, myself, and I',
-            f'Node(http://bsfs.ai/schema/Entity, http://example.com/me/file#69b98ecf7aff3e95b09688ba93331678eb8397817111f674c9558e6dd8f5e871) Predicate({ns.bse.filename}) td_first',
-            f'Node(http://bsfs.ai/schema/Entity, http://example.com/me/file#69b98ecf7aff3e95b09688ba93331678eb8397817111f674c9558e6dd8f5e871) Predicate({ns.bse.filesize}) 911',
-            f'Node(http://bsfs.ai/schema/Entity, http://example.com/me/file#78f7eb7f0d8221cdb2cb26c978fa42a11f75eb87becc768f4474134cb1e06926) Predicate({ns.bse.author}) Me, myself, and I',
-            f'Node(http://bsfs.ai/schema/Entity, http://example.com/me/file#78f7eb7f0d8221cdb2cb26c978fa42a11f75eb87becc768f4474134cb1e06926) Predicate({ns.bse.filename}) testfile',
-            f'Node(http://bsfs.ai/schema/Entity, http://example.com/me/file#78f7eb7f0d8221cdb2cb26c978fa42a11f75eb87becc768f4474134cb1e06926) Predicate({ns.bse.filesize}) 885',
-            f'Node(http://bsfs.ai/schema/Entity, http://example.com/me/file#80818b8ec2ee1919116dba9c8a7e0a4608313cf3b463cd88e9ed77a700dd92d3) Predicate({ns.bse.author}) Me, myself, and I',
-            f'Node(http://bsfs.ai/schema/Entity, http://example.com/me/file#80818b8ec2ee1919116dba9c8a7e0a4608313cf3b463cd88e9ed77a700dd92d3) Predicate({ns.bse.filename}) bar_first',
-            f'Node(http://bsfs.ai/schema/Entity, http://example.com/me/file#80818b8ec2ee1919116dba9c8a7e0a4608313cf3b463cd88e9ed77a700dd92d3) Predicate({ns.bse.filesize}) 956',
-            f'Node(http://bsfs.ai/schema/Entity, http://example.com/me/file#976d2ea0e58488678cc7e435fbfadabfb6eb6cf50ad51862f38f73729ed11795) Predicate({ns.bse.author}) Me, myself, and I',
-            f'Node(http://bsfs.ai/schema/Entity, http://example.com/me/file#976d2ea0e58488678cc7e435fbfadabfb6eb6cf50ad51862f38f73729ed11795) Predicate({ns.bse.filename}) omega_first',
-            f'Node(http://bsfs.ai/schema/Entity, http://example.com/me/file#976d2ea0e58488678cc7e435fbfadabfb6eb6cf50ad51862f38f73729ed11795) Predicate({ns.bse.filesize}) 648',
-            f'Node(http://bsfs.ai/schema/Entity, http://example.com/me/file#997e2fbb7494a3818ec782d2bc87bf1cffafba6b9c0f658e4a6c18a723e944d3) Predicate({ns.bse.author}) Me, myself, and I',
-            f'Node(http://bsfs.ai/schema/Entity, http://example.com/me/file#997e2fbb7494a3818ec782d2bc87bf1cffafba6b9c0f658e4a6c18a723e944d3) Predicate({ns.bse.filename}) alpha_first',
-            f'Node(http://bsfs.ai/schema/Entity, http://example.com/me/file#997e2fbb7494a3818ec782d2bc87bf1cffafba6b9c0f658e4a6c18a723e944d3) Predicate({ns.bse.filesize}) 754',
-            f'Node(http://bsfs.ai/schema/Entity, http://example.com/me/file#a8af899ecdab60dfaea8ec7f934053624c80a1054539e163f2c7eaa986c2777d) Predicate({ns.bse.author}) Me, myself, and I',
-            f'Node(http://bsfs.ai/schema/Entity, http://example.com/me/file#a8af899ecdab60dfaea8ec7f934053624c80a1054539e163f2c7eaa986c2777d) Predicate({ns.bse.filename}) foo_second',
-            f'Node(http://bsfs.ai/schema/Entity, http://example.com/me/file#a8af899ecdab60dfaea8ec7f934053624c80a1054539e163f2c7eaa986c2777d) Predicate({ns.bse.filesize}) 585',
-            f'Node(http://bsfs.ai/schema/Entity, http://example.com/me/file#b8fd7fba818254166a6043195004138ebda6923e012442f819a2c49671136c70) Predicate({ns.bse.author}) Me, myself, and I',
-            f'Node(http://bsfs.ai/schema/Entity, http://example.com/me/file#b8fd7fba818254166a6043195004138ebda6923e012442f819a2c49671136c70) Predicate({ns.bse.filename}) bar_second',
-            f'Node(http://bsfs.ai/schema/Entity, http://example.com/me/file#b8fd7fba818254166a6043195004138ebda6923e012442f819a2c49671136c70) Predicate({ns.bse.filesize}) 636',
-            f'Node(http://bsfs.ai/schema/Entity, http://example.com/me/file#d43758ace82154a1cc10ca0dfef63cb20dd831f9c87edd6dc06539eefe67371d) Predicate({ns.bse.author}) Me, myself, and I',
-            f'Node(http://bsfs.ai/schema/Entity, http://example.com/me/file#d43758ace82154a1cc10ca0dfef63cb20dd831f9c87edd6dc06539eefe67371d) Predicate({ns.bse.filename}) foo_first',
-            f'Node(http://bsfs.ai/schema/Entity, http://example.com/me/file#d43758ace82154a1cc10ca0dfef63cb20dd831f9c87edd6dc06539eefe67371d) Predicate({ns.bse.filesize}) 546',
-            f'Node(http://bsfs.ai/schema/Entity, http://example.com/me/file#d803187cbf3676ae9d38126270a6152c60431589aa3bb3824baf8954e9c097f1) Predicate({ns.bse.author}) Me, myself, and I',
-            f'Node(http://bsfs.ai/schema/Entity, http://example.com/me/file#d803187cbf3676ae9d38126270a6152c60431589aa3bb3824baf8954e9c097f1) Predicate({ns.bse.filename}) td_second',
-            f'Node(http://bsfs.ai/schema/Entity, http://example.com/me/file#d803187cbf3676ae9d38126270a6152c60431589aa3bb3824baf8954e9c097f1) Predicate({ns.bse.filesize}) 703',
+            f'Node(http://bsfs.ai/schema/File, http://example.com/me/file#2f4109b40107cc50e0884755a1a961ed126887e49b8dbaf0e146b2e226aa6647) Predicate({ns.bse.author}) Me, myself, and I',
+            f'Node(http://bsfs.ai/schema/File, http://example.com/me/file#2f4109b40107cc50e0884755a1a961ed126887e49b8dbaf0e146b2e226aa6647) Predicate({ns.bse.filename}) alpha_second',
+            f'Node(http://bsfs.ai/schema/File, http://example.com/me/file#2f4109b40107cc50e0884755a1a961ed126887e49b8dbaf0e146b2e226aa6647) Predicate({ns.bse.filesize}) 696',
+            f'Node(http://bsfs.ai/schema/File, http://example.com/me/file#441f3d10c8ff489fe8e33e639606512f6c463151cc429de7e554b9af670c2ece) Predicate({ns.bse.author}) Me, myself, and I',
+            f'Node(http://bsfs.ai/schema/File, http://example.com/me/file#441f3d10c8ff489fe8e33e639606512f6c463151cc429de7e554b9af670c2ece) Predicate({ns.bse.filename}) omega_second',
+            f'Node(http://bsfs.ai/schema/File, http://example.com/me/file#441f3d10c8ff489fe8e33e639606512f6c463151cc429de7e554b9af670c2ece) Predicate({ns.bse.filesize}) 503',
+            f'Node(http://bsfs.ai/schema/File, http://example.com/me/file#69b98ecf7aff3e95b09688ba93331678eb8397817111f674c9558e6dd8f5e871) Predicate({ns.bse.author}) Me, myself, and I',
+            f'Node(http://bsfs.ai/schema/File, http://example.com/me/file#69b98ecf7aff3e95b09688ba93331678eb8397817111f674c9558e6dd8f5e871) Predicate({ns.bse.filename}) td_first',
+            f'Node(http://bsfs.ai/schema/File, http://example.com/me/file#69b98ecf7aff3e95b09688ba93331678eb8397817111f674c9558e6dd8f5e871) Predicate({ns.bse.filesize}) 911',
+            f'Node(http://bsfs.ai/schema/File, http://example.com/me/file#78f7eb7f0d8221cdb2cb26c978fa42a11f75eb87becc768f4474134cb1e06926) Predicate({ns.bse.author}) Me, myself, and I',
+            f'Node(http://bsfs.ai/schema/File, http://example.com/me/file#78f7eb7f0d8221cdb2cb26c978fa42a11f75eb87becc768f4474134cb1e06926) Predicate({ns.bse.filename}) testfile',
+            f'Node(http://bsfs.ai/schema/File, http://example.com/me/file#78f7eb7f0d8221cdb2cb26c978fa42a11f75eb87becc768f4474134cb1e06926) Predicate({ns.bse.filesize}) 885',
+            f'Node(http://bsfs.ai/schema/File, http://example.com/me/file#80818b8ec2ee1919116dba9c8a7e0a4608313cf3b463cd88e9ed77a700dd92d3) Predicate({ns.bse.author}) Me, myself, and I',
+            f'Node(http://bsfs.ai/schema/File, http://example.com/me/file#80818b8ec2ee1919116dba9c8a7e0a4608313cf3b463cd88e9ed77a700dd92d3) Predicate({ns.bse.filename}) bar_first',
+            f'Node(http://bsfs.ai/schema/File, http://example.com/me/file#80818b8ec2ee1919116dba9c8a7e0a4608313cf3b463cd88e9ed77a700dd92d3) Predicate({ns.bse.filesize}) 956',
+            f'Node(http://bsfs.ai/schema/File, http://example.com/me/file#976d2ea0e58488678cc7e435fbfadabfb6eb6cf50ad51862f38f73729ed11795) Predicate({ns.bse.author}) Me, myself, and I',
+            f'Node(http://bsfs.ai/schema/File, http://example.com/me/file#976d2ea0e58488678cc7e435fbfadabfb6eb6cf50ad51862f38f73729ed11795) Predicate({ns.bse.filename}) omega_first',
+            f'Node(http://bsfs.ai/schema/File, http://example.com/me/file#976d2ea0e58488678cc7e435fbfadabfb6eb6cf50ad51862f38f73729ed11795) Predicate({ns.bse.filesize}) 648',
+            f'Node(http://bsfs.ai/schema/File, http://example.com/me/file#997e2fbb7494a3818ec782d2bc87bf1cffafba6b9c0f658e4a6c18a723e944d3) Predicate({ns.bse.author}) Me, myself, and I',
+            f'Node(http://bsfs.ai/schema/File, http://example.com/me/file#997e2fbb7494a3818ec782d2bc87bf1cffafba6b9c0f658e4a6c18a723e944d3) Predicate({ns.bse.filename}) alpha_first',
+            f'Node(http://bsfs.ai/schema/File, http://example.com/me/file#997e2fbb7494a3818ec782d2bc87bf1cffafba6b9c0f658e4a6c18a723e944d3) Predicate({ns.bse.filesize}) 754',
+            f'Node(http://bsfs.ai/schema/File, http://example.com/me/file#a8af899ecdab60dfaea8ec7f934053624c80a1054539e163f2c7eaa986c2777d) Predicate({ns.bse.author}) Me, myself, and I',
+            f'Node(http://bsfs.ai/schema/File, http://example.com/me/file#a8af899ecdab60dfaea8ec7f934053624c80a1054539e163f2c7eaa986c2777d) Predicate({ns.bse.filename}) foo_second',
+            f'Node(http://bsfs.ai/schema/File, http://example.com/me/file#a8af899ecdab60dfaea8ec7f934053624c80a1054539e163f2c7eaa986c2777d) Predicate({ns.bse.filesize}) 585',
+            f'Node(http://bsfs.ai/schema/File, http://example.com/me/file#b8fd7fba818254166a6043195004138ebda6923e012442f819a2c49671136c70) Predicate({ns.bse.author}) Me, myself, and I',
+            f'Node(http://bsfs.ai/schema/File, http://example.com/me/file#b8fd7fba818254166a6043195004138ebda6923e012442f819a2c49671136c70) Predicate({ns.bse.filename}) bar_second',
+            f'Node(http://bsfs.ai/schema/File, http://example.com/me/file#b8fd7fba818254166a6043195004138ebda6923e012442f819a2c49671136c70) Predicate({ns.bse.filesize}) 636',
+            f'Node(http://bsfs.ai/schema/File, http://example.com/me/file#d43758ace82154a1cc10ca0dfef63cb20dd831f9c87edd6dc06539eefe67371d) Predicate({ns.bse.author}) Me, myself, and I',
+            f'Node(http://bsfs.ai/schema/File, http://example.com/me/file#d43758ace82154a1cc10ca0dfef63cb20dd831f9c87edd6dc06539eefe67371d) Predicate({ns.bse.filename}) foo_first',
+            f'Node(http://bsfs.ai/schema/File, http://example.com/me/file#d43758ace82154a1cc10ca0dfef63cb20dd831f9c87edd6dc06539eefe67371d) Predicate({ns.bse.filesize}) 546',
+            f'Node(http://bsfs.ai/schema/File, http://example.com/me/file#d803187cbf3676ae9d38126270a6152c60431589aa3bb3824baf8954e9c097f1) Predicate({ns.bse.author}) Me, myself, and I',
+            f'Node(http://bsfs.ai/schema/File, http://example.com/me/file#d803187cbf3676ae9d38126270a6152c60431589aa3bb3824baf8954e9c097f1) Predicate({ns.bse.filename}) td_second',
+            f'Node(http://bsfs.ai/schema/File, http://example.com/me/file#d803187cbf3676ae9d38126270a6152c60431589aa3bb3824baf8954e9c097f1) Predicate({ns.bse.filesize}) 703',
             })
 
 
diff --git a/test/extractor/generic/test_path.py b/test/extractor/generic/test_path.py
index d2b6c61..820f402 100644
--- a/test/extractor/generic/test_path.py
+++ b/test/extractor/generic/test_path.py
@@ -31,7 +31,7 @@ class TestPath(unittest.TestCase):
         self.assertEqual(Path().schema,
             bsfs.schema.Schema.from_string(extractor.SCHEMA_PREAMBLE + '''
                 bse:filename rdfs:subClassOf bsfs:Predicate ;
-                    rdfs:domain bsfs:Entity ;
+                    rdfs:domain bsfs:File ;
                     rdfs:range xsd:string ;
                     bsfs:unique "false"^^xsd:boolean .
                 '''))
diff --git a/test/extractor/generic/test_stat.py b/test/extractor/generic/test_stat.py
index 6cfc57f..3441438 100644
--- a/test/extractor/generic/test_stat.py
+++ b/test/extractor/generic/test_stat.py
@@ -32,7 +32,7 @@ class TestStat(unittest.TestCase):
         self.assertEqual(Stat().schema,
             bsfs.schema.Schema.from_string(extractor.SCHEMA_PREAMBLE + '''
                 bse:filesize rdfs:subClassOf bsfs:Predicate ;
-                    rdfs:domain bsfs:Entity ;
+                    rdfs:domain bsfs:File ;
                     rdfs:range xsd:integer ;
                     bsfs:unique "false"^^xsd:boolean .
                 '''))
diff --git a/test/lib/test_bsie.py b/test/lib/test_bsie.py
index 5b71752..6720746 100644
--- a/test/lib/test_bsie.py
+++ b/test/lib/test_bsie.py
@@ -64,12 +64,12 @@ class TestBSIE(unittest.TestCase):
             xsd:integer rdfs:subClassOf bsfs:Literal .
 
             bse:filename rdfs:subClassOf bsfs:Predicate ;
-                rdfs:domain bsfs:Entity ;
+                rdfs:domain bsfs:File ;
                 rdfs:range xsd:string ;
                 bsfs:unique "false"^^xsd:boolean .
 
             bse:filesize rdfs:subClassOf bsfs:Predicate ;
-                rdfs:domain bsfs:Entity ;
+                rdfs:domain bsfs:File ;
                 rdfs:range xsd:integer;
                 bsfs:unique "false"^^xsd:boolean .
 
@@ -101,7 +101,7 @@ class TestBSIE(unittest.TestCase):
             xsd:integer rdfs:subClassOf bsfs:Literal .
 
             bse:filesize rdfs:subClassOf bsfs:Predicate ;
-                rdfs:domain bsfs:Entity ;
+                rdfs:domain bsfs:File ;
                 rdfs:range xsd:integer;
                 bsfs:unique "false"^^xsd:boolean .
 
@@ -130,12 +130,12 @@ class TestBSIE(unittest.TestCase):
             xsd:integer rdfs:subClassOf bsfs:Literal .
 
             bse:filename rdfs:subClassOf bsfs:Predicate ;
-                rdfs:domain bsfs:Entity ;
+                rdfs:domain bsfs:File ;
                 rdfs:range xsd:string ;
                 bsfs:unique "false"^^xsd:boolean .
 
             bse:filesize rdfs:subClassOf bsfs:Predicate ;
-                rdfs:domain bsfs:Entity ;
+                rdfs:domain bsfs:File ;
                 rdfs:range xsd:integer;
                 bsfs:unique "false"^^xsd:boolean .
 
@@ -191,7 +191,7 @@ class TestBSIE(unittest.TestCase):
             xsd:integer rdfs:subClassOf bsfs:Literal .
 
             bse:filesize rdfs:subClassOf bsfs:Predicate ;
-                rdfs:domain bsfs:Entity ;
+                rdfs:domain bsfs:File ;
                 rdfs:range xsd:integer;
                 bsfs:unique "false"^^xsd:boolean .
 
diff --git a/test/tools/test_pipeline.py b/test/tools/test_pipeline.py
index 92801ed..611f8b0 100644
--- a/test/tools/test_pipeline.py
+++ b/test/tools/test_pipeline.py
@@ -30,7 +30,7 @@ class TestPipeline(unittest.TestCase):
         # constant A
         csA = '''
             bse:author rdfs:subClassOf bsfs:Predicate ;
-                rdfs:domain bsfs:Entity ;
+                rdfs:domain bsfs:File ;
                 rdfs:range xsd:string ;
                 bsfs:unique "true"^^xsd:boolean .
             '''
@@ -38,7 +38,7 @@ class TestPipeline(unittest.TestCase):
         # constant B
         csB = '''
             bse:rating rdfs:subClassOf bsfs:Predicate ;
-                rdfs:domain bsfs:Entity ;
+                rdfs:domain bsfs:File ;
                 rdfs:range xsd:integer ;
                 bsfs:unique "true"^^xsd:boolean .
             '''
-- 
cgit v1.2.3


From 5d9526783ad8432c7d6dfe18c0e9f2b37950b470 Mon Sep 17 00:00:00 2001
From: Matthias Baumgartner <dev@igsor.net>
Date: Thu, 15 Dec 2022 17:16:25 +0100
Subject: Pipeline.prefix as Namespace instead of URI

---
 bsie/apps/index.py          |  5 ++---
 bsie/apps/info.py           |  4 ++--
 bsie/tools/builder.py       | 13 +++++++++++--
 bsie/tools/pipeline.py      |  4 ++--
 test/lib/test_bsie.py       |  2 +-
 test/tools/test_pipeline.py |  5 ++---
 6 files changed, 20 insertions(+), 13 deletions(-)

diff --git a/bsie/apps/index.py b/bsie/apps/index.py
index aa26d0f..e37684b 100644
--- a/bsie/apps/index.py
+++ b/bsie/apps/index.py
@@ -26,7 +26,7 @@ __all__: typing.Sequence[str] = (
 def main(argv):
     """Index files or directories into BSFS."""
     parser = argparse.ArgumentParser(description=main.__doc__, prog='index')
-    parser.add_argument('--user', type=URI, default=URI('http://example.com/me'),
+    parser.add_argument('--user', type=bsfs.URI, default=bsfs.URI('http://example.com/me'),
         help='')
     parser.add_argument('--collect', action='append', default=[],
         help='')
@@ -60,9 +60,8 @@ def main(argv):
             )},
         ])
     # pipeline builder
-    prefix = URI(args.user + ('file#' if args.user.endswith('/') else '/file#'))
     pbuild = builder.PipelineBuilder(
-        prefix,
+        bsfs.Namespace(args.user + ('/' if not args.user.endswith('/') else '')),
         rbuild,
         ebuild,
         )
diff --git a/bsie/apps/info.py b/bsie/apps/info.py
index 8cc6dca..eaf1f71 100644
--- a/bsie/apps/info.py
+++ b/bsie/apps/info.py
@@ -12,7 +12,7 @@ import typing
 # bsie imports
 from bsie.base import errors
 from bsie.tools import builder
-from bsie.utils.bsfs import URI
+from bsie.utils import bsfs
 
 # exports
 __all__: typing.Sequence[str] = (
@@ -48,7 +48,7 @@ def main(argv):
         ])
     # pipeline builder
     pbuild = builder.PipelineBuilder(
-        URI('http://example.com/me/file#'), # not actually used
+        bsfs.Namespace('http://example.com/me/'), # not actually used
         rbuild,
         ebuild,
         )
diff --git a/bsie/tools/builder.py b/bsie/tools/builder.py
index 8c6b931..24aea84 100644
--- a/bsie/tools/builder.py
+++ b/bsie/tools/builder.py
@@ -163,15 +163,24 @@ class ExtractorBuilder():
             return cls(**kwargs)
 
         except Exception as err:
-            raise errors.BuilderError(f'failed to build extractor {name} due to {typename(err)}: {err}') from err
+            raise errors.BuilderError(f'failed to build extractor {name} due to {bsfs.typename(err)}: {err}') from err
 
 
 class PipelineBuilder():
     """Build `bsie.tools.pipeline.Pipeline` instances."""
 
+    # Prefix to be used in the Pipeline.
+    prefix: bsfs.Namespace
+
+    # builder for Readers.
+    rbuild: ReaderBuilder
+
+    # builder for Extractors.
+    ebuild: ExtractorBuilder
+
     def __init__(
             self,
-            prefix: URI,
+            prefix: bsfs.Namespace,
             reader_builder: ReaderBuilder,
             extractor_builder: ExtractorBuilder,
             ):
diff --git a/bsie/tools/pipeline.py b/bsie/tools/pipeline.py
index 3d08993..834bd99 100644
--- a/bsie/tools/pipeline.py
+++ b/bsie/tools/pipeline.py
@@ -39,14 +39,14 @@ class Pipeline():
     schema: _schema.Schema
 
     # node prefix.
-    _prefix: URI
+    _prefix: bsfs.Namespace
 
     # extractor -> reader mapping
     _ext2rdr: typing.Dict[base.extractor.Extractor, typing.Optional[base.reader.Reader]]
 
     def __init__(
             self,
-            prefix: URI,
+            prefix: bsfs.Namespace,
             ext2rdr: typing.Dict[base.extractor.Extractor, typing.Optional[base.reader.Reader]]
             ):
         # store core members
diff --git a/test/lib/test_bsie.py b/test/lib/test_bsie.py
index 6720746..43e7b1d 100644
--- a/test/lib/test_bsie.py
+++ b/test/lib/test_bsie.py
@@ -40,7 +40,7 @@ class TestBSIE(unittest.TestCase):
                 )},
             ])
         # build pipeline
-        self.prefix = URI('http://example.com/local/file#')
+        self.prefix = bsfs.Namespace('http://example.com/local/file#')
         pbuild = builder.PipelineBuilder(self.prefix, rbuild, ebuild)
         self.pipeline = pbuild.build()
 
diff --git a/test/tools/test_pipeline.py b/test/tools/test_pipeline.py
index 611f8b0..e440ab5 100644
--- a/test/tools/test_pipeline.py
+++ b/test/tools/test_pipeline.py
@@ -11,7 +11,6 @@ import unittest
 
 # bsie imports
 from bsie.base import errors
-from bsie.utils.bsfs import URI
 from bsie.utils import bsfs, node, ns
 import bsie.extractor.generic.constant
 import bsie.extractor.generic.path
@@ -50,7 +49,7 @@ class TestPipeline(unittest.TestCase):
             bsie.extractor.generic.constant.Constant(csA, tupA): None,
             bsie.extractor.generic.constant.Constant(csB, tupB): None,
         }
-        self.prefix = URI('http://example.com/local/file#')
+        self.prefix = bsfs.Namespace('http://example.com/local/')
 
     def test_essentials(self):
         pipeline = Pipeline(self.prefix, self.ext2rdr)
@@ -101,7 +100,7 @@ class TestPipeline(unittest.TestCase):
         p_filesize = pipeline.schema.predicate(ns.bse.filesize)
         p_author = pipeline.schema.predicate(ns.bse.author)
         p_rating = pipeline.schema.predicate(ns.bse.rating)
-        entity = pipeline.schema.node(ns.bsfs.Entity)
+        entity = pipeline.schema.node(ns.bsfs.File)
         p_invalid = pipeline.schema.predicate(ns.bsfs.Predicate).get_child(ns.bse.foo, range=entity)
 
         # extract given predicates
-- 
cgit v1.2.3


From 3426b4e201cf03b78d2a3f144876955fcda2f66b Mon Sep 17 00:00:00 2001
From: Matthias Baumgartner <dev@igsor.net>
Date: Thu, 15 Dec 2022 17:17:53 +0100
Subject: extractor interface revision * schema as property * predicates ->
 principals

---
 bsie/base/extractor.py             | 21 +++++++++++++--------
 bsie/extractor/generic/constant.py |  8 +++-----
 bsie/extractor/generic/path.py     |  7 +++----
 bsie/extractor/generic/stat.py     |  7 +++----
 test/base/test_extractor.py        |  5 +++--
 5 files changed, 25 insertions(+), 23 deletions(-)

diff --git a/bsie/base/extractor.py b/bsie/base/extractor.py
index 678dcec..c44021b 100644
--- a/bsie/base/extractor.py
+++ b/bsie/base/extractor.py
@@ -9,8 +9,6 @@ import abc
 import typing
 
 # bsie imports
-from bsie.utils import node
-from bsie.utils.bsfs import schema as _schema, typename
 from bsie.utils import bsfs, node, ns
 
 # exports
@@ -58,10 +56,10 @@ class Extractor(abc.ABC):
     CONTENT_READER: typing.Optional[str] = None
 
     # extractor schema.
-    schema: _schema.Schema
+    _schema: bsfs.schema.Schema
 
-    def __init__(self, schema: _schema.Schema):
-        self.schema = schema
+    def __init__(self, schema: bsfs.schema.Schema):
+        self._schema = schema
 
     def __str__(self) -> str:
         return bsfs.typename(self)
@@ -77,7 +75,14 @@ class Extractor(abc.ABC):
     def __hash__(self) -> int:
         return hash((type(self), self.CONTENT_READER, self.schema))
 
-    def predicates(self) -> typing.Iterator[_schema.Predicate]:
+    @property
+    def schema(self) -> bsfs.schema.Schema:
+        """Return the extractor's schema."""
+        return self._schema
+
+    @property
+    def principals(self) -> typing.Iterator[bsfs.schema.Predicate]:
+        """Return the principal predicates, i.e., relations from/to the extraction subject."""
         ent = self.schema.node(ns.bsfs.Entity)
         return (
             pred
@@ -91,8 +96,8 @@ class Extractor(abc.ABC):
             self,
             subject: node.Node,
             content: typing.Any,
-            predicates: typing.Iterable[_schema.Predicate],
-            ) -> typing.Iterator[typing.Tuple[node.Node, _schema.Predicate, typing.Any]]:
+            principals: typing.Iterable[bsfs.schema.Predicate],
+            ) -> typing.Iterator[typing.Tuple[node.Node, bsfs.schema.Predicate, typing.Any]]:
         """Return (node, predicate, value) triples."""
 
 ## EOF ##
diff --git a/bsie/extractor/generic/constant.py b/bsie/extractor/generic/constant.py
index f9e3415..cdb2ef6 100644
--- a/bsie/extractor/generic/constant.py
+++ b/bsie/extractor/generic/constant.py
@@ -9,8 +9,6 @@ import typing
 
 # bsie imports
 from bsie.base import extractor
-from bsie.utils.bsfs import URI, schema as _schema
-from bsie.utils.node import Node
 from bsie.utils import bsfs, node
 
 # exports
@@ -50,10 +48,10 @@ class Constant(extractor.Extractor):
             self,
             subject: node.Node,
             content: None,
-            predicates: typing.Iterable[_schema.Predicate],
-            ) -> typing.Iterator[typing.Tuple[Node, _schema.Predicate, typing.Any]]:
+            principals: typing.Iterable[bsfs.schema.Predicate],
+            ) -> typing.Iterator[typing.Tuple[node.Node, bsfs.schema.Predicate, typing.Any]]:
         for pred, value in self._tuples:
-            if pred in predicates:
+            if pred in principals:
                 yield subject, pred, value
 
 ## EOF ##
diff --git a/bsie/extractor/generic/path.py b/bsie/extractor/generic/path.py
index 00165e3..23ae80b 100644
--- a/bsie/extractor/generic/path.py
+++ b/bsie/extractor/generic/path.py
@@ -10,7 +10,6 @@ import typing
 
 # bsie imports
 from bsie.base import extractor
-from bsie.utils.bsfs import schema
 from bsie.utils import bsfs, node, ns
 
 # exports
@@ -46,9 +45,9 @@ class Path(extractor.Extractor):
             self,
             subject: node.Node,
             content: str,
-            predicates: typing.Iterable[schema.Predicate],
-            ) -> typing.Iterator[typing.Tuple[node.Node, schema.Predicate, typing.Any]]:
-        for pred in predicates:
+            principals: typing.Iterable[bsfs.schema.Predicate],
+            ) -> typing.Iterator[typing.Tuple[node.Node, bsfs.schema.Predicate, typing.Any]]:
+        for pred in principals:
             # find callback
             clbk = self._callmap.get(pred)
             if clbk is None:
diff --git a/bsie/extractor/generic/stat.py b/bsie/extractor/generic/stat.py
index 0f4267f..1dcfedf 100644
--- a/bsie/extractor/generic/stat.py
+++ b/bsie/extractor/generic/stat.py
@@ -10,7 +10,6 @@ import typing
 
 # bsie imports
 from bsie.base import extractor
-from bsie.utils.bsfs import schema as _schema
 from bsie.utils import bsfs, node, ns
 
 # exports
@@ -46,9 +45,9 @@ class Stat(extractor.Extractor):
             self,
             subject: node.Node,
             content: os.stat_result,
-            predicates: typing.Iterable[_schema.Predicate],
-            ) -> typing.Iterator[typing.Tuple[node.Node, _schema.Predicate, typing.Any]]:
-        for pred in predicates:
+            principals: typing.Iterable[bsfs.schema.Predicate],
+            ) -> typing.Iterator[typing.Tuple[node.Node, bsfs.schema.Predicate, typing.Any]]:
+        for pred in principals:
             # find callback
             clbk = self._callmap.get(pred)
             if clbk is None:
diff --git a/test/base/test_extractor.py b/test/base/test_extractor.py
index 5410ae0..30974ef 100644
--- a/test/base/test_extractor.py
+++ b/test/base/test_extractor.py
@@ -51,14 +51,15 @@ class TestExtractor(unittest.TestCase):
         self.assertNotEqual(ext, sub)
         self.assertNotEqual(hash(ext), hash(sub))
 
-    def test_predicates(self):
+    def test_principals(self):
         schema = bsfs.schema.Schema.Empty()
         entity = schema.node(ns.bsfs.Node).get_child(ns.bsfs.Entity)
         string = schema.literal(ns.bsfs.Literal).get_child(bsfs.URI('http://www.w3.org/2001/XMLSchema#string'))
         p_author = schema.predicate(ns.bsfs.Predicate).get_child(ns.bse.author, domain=entity, range=string)
         p_comment = schema.predicate(ns.bsfs.Predicate).get_child(ns.bse.comment, domain=entity, range=string)
         ext = StubExtractor()
-        self.assertSetEqual(set(ext.predicates()), {p_author, p_comment} | set(schema.predicates()))
+        self.assertSetEqual(set(ext.principals),
+            {p_author, p_comment} | set(schema.predicates()) - {schema.predicate(ns.bsfs.Predicate)})
 
 
 ## main ##
-- 
cgit v1.2.3


From 37510d134458bf954ca2da6d40be0d6c76661e8c Mon Sep 17 00:00:00 2001
From: Matthias Baumgartner <dev@igsor.net>
Date: Thu, 15 Dec 2022 17:19:21 +0100
Subject: bsie/pipeline interface revision: * predicates -> principals * schema
 as property * principals as property * information hiding * full subschema
 instead of only predicates

---
 bsie/lib/bsie.py            | 61 ++++++++++++++++++++++---------------
 bsie/tools/pipeline.py      | 52 ++++++++++++++++++++-----------
 test/lib/test_bsie.py       | 74 +++++++--------------------------------------
 test/tools/test_pipeline.py |  5 ++-
 4 files changed, 83 insertions(+), 109 deletions(-)

diff --git a/bsie/lib/bsie.py b/bsie/lib/bsie.py
index 3aeee2b..e087fa9 100644
--- a/bsie/lib/bsie.py
+++ b/bsie/lib/bsie.py
@@ -8,8 +8,6 @@ Author: Matthias Baumgartner, 2022
 import typing
 
 # bsie imports
-from bsie.tools.pipeline import Pipeline
-from bsie.utils.bsfs import URI, schema as schema_
 from bsie.tools import Pipeline
 from bsie.utils import bsfs, node, ns
 
@@ -30,11 +28,14 @@ class BSIE():
 
     """
 
+    # pipeline
+    _pipeline: Pipeline
+
     # predicates to extract.
-    predicates: typing.Set[URI]
+    _principals: typing.Set[bsfs.URI]
 
     # local schema.
-    schema: schema_.Schema
+    _schema: bsfs.schema.Schema
 
     def __init__(
             self,
@@ -46,36 +47,46 @@ class BSIE():
             discard: typing.Optional[typing.Iterable[bsfs.URI]] = None,
             ):
         # store pipeline
-        self.pipeline = pipeline
-        # start off with available predicates
-        self.predicates = {pred.uri for pred in self.pipeline.predicates()}
-        # limit predicates to specified ones by argument.
+        self._pipeline = pipeline
+        # start off with available principals
+        self._principals = {pred.uri for pred in self._pipeline.principals}
+        # limit principals to specified ones by argument.
         if collect is not None:
             collect = set(collect)
             if len(collect) > 0:
-                self.predicates &= collect
-        # discard predicates.
+                self._principals &= collect
+        # discard principals.
         if discard is not None:
-            self.predicates -= set(discard)
+            self._principals -= set(discard)
         # discard ns.bsfs.Predicate
-        self.predicates.discard(ns.bsfs.Predicate)
-        # compile a schema that only contains the requested predicates (and implied types)
-        self.schema = schema_.Schema({
-            self.pipeline.schema.predicate(pred) for pred in self.predicates})
+        self._principals.discard(ns.bsfs.Predicate)
+        # compile a schema that only contains the requested principals (and auxiliary predicates)
+        self._schema = self._pipeline.subschema(
+            self._pipeline.schema.predicate(pred) for pred in self._principals)
+
+    @property
+    def schema(self) -> bsfs.schema.Schema:
+        """Return the BSIE schema."""
+        return self._schema
+
+    @property
+    def principals(self) -> typing.Iterator[bsfs.URI]:
+        """Return an iterator to the principal predicates."""
+        return iter(self._principals)
 
     def from_file(
             self,
-            path: URI,
-            predicates: typing.Optional[typing.Iterable[URI]] = None,
-            ) -> typing.Iterator[typing.Tuple[node.Node, URI, typing.Any]]:
-        """Produce triples for a given *path*. Limit to *predicates* if given."""
-        # get requested predicates.
-        predicates = set(predicates) if predicates is not None else self.predicates
-        # filter through requested predicates.
-        predicates &= self.predicates
+            path: bsfs.URI,
+            principals: typing.Optional[typing.Iterable[bsfs.URI]] = None,
+            ) -> typing.Iterator[typing.Tuple[node.Node, bsfs.URI, typing.Any]]:
+        """Produce triples for a given *path*. Limit to *principals* if given."""
+        # get requested principals.
+        principals = set(principals) if principals is not None else self._principals
+        # filter through requested principals.
+        principals &= self._principals
         # predicate lookup
-        predicates = {self.schema.predicate(pred) for pred in predicates}
+        principals = {self.schema.predicate(pred) for pred in principals}
         # invoke pipeline
-        yield from self.pipeline(path, predicates)
+        yield from self._pipeline(path, principals)
 
 ## EOF ##
diff --git a/bsie/tools/pipeline.py b/bsie/tools/pipeline.py
index 834bd99..52ce526 100644
--- a/bsie/tools/pipeline.py
+++ b/bsie/tools/pipeline.py
@@ -11,8 +11,6 @@ import typing
 
 # bsie imports
 from bsie import base
-from bsie.utils.node import Node
-from bsie.utils.bsfs import schema as _schema, URI, uuid as _uuid, typename
 from bsie.utils import bsfs, node, ns
 
 # exports
@@ -36,7 +34,7 @@ class Pipeline():
     """
 
     # combined extractor schemas.
-    schema: _schema.Schema
+    _schema: bsfs.schema.Schema
 
     # node prefix.
     _prefix: bsfs.Namespace
@@ -53,7 +51,7 @@ class Pipeline():
         self._prefix = prefix
         self._ext2rdr = ext2rdr
         # compile schema from all extractors
-        self.schema = _schema.Schema.Union(ext.schema for ext in ext2rdr)
+        self._schema = bsfs.schema.Schema.Union(ext.schema for ext in ext2rdr)
 
     def __str__(self) -> str:
         return bsfs.typename(self)
@@ -62,29 +60,47 @@ class Pipeline():
         return f'{bsfs.typename(self)}(...)'
 
     def __hash__(self) -> int:
-        return hash((type(self), self._prefix, self.schema, tuple(self._ext2rdr), tuple(self._ext2rdr.values())))
+        return hash((type(self), self._prefix, self._schema, tuple(self._ext2rdr), tuple(self._ext2rdr.values())))
 
     def __eq__(self, other: typing.Any) -> bool:
         return isinstance(other, type(self)) \
-           and self.schema == other.schema \
+           and self._schema == other._schema \
            and self._prefix == other._prefix \
            and self._ext2rdr == other._ext2rdr
 
-    def predicates(self) -> typing.Iterator[_schema.Predicate]:
-        """Return the predicates that are extracted from a file."""
-        return iter({pred for ext in self._ext2rdr for pred in ext.predicates()})
+    @property
+    def schema(self) -> bsfs.schema.Schema:
+        """Return the pipeline's schema (combined from all extractors)."""
+        return self._schema
+
+    @property
+    def principals(self) -> typing.Iterator[bsfs.schema.Predicate]:
+        """Return the principal predicates that can be extracted."""
+        return iter({pred for ext in self._ext2rdr for pred in ext.principals})
+
+    def subschema(self, principals: typing.Iterable[bsfs.schema.Predicate]) -> bsfs.schema.Schema:
+        """Return the subset of the schema that supports the given *principals*."""
+        # materialize principals
+        principals = set(principals)
+        # collect and combine schemas from extractors
+        return bsfs.schema.Schema.Union({
+            ext.schema
+            for ext
+            in self._ext2rdr
+            if not set(ext.principals).isdisjoint(principals)
+            })
 
     def __call__(
             self,
-            path: URI,
-            predicates: typing.Optional[typing.Iterable[_schema.Predicate]] = None,
-            ) -> typing.Iterator[typing.Tuple[Node, _schema.Predicate, typing.Any]]:
-        """Extract triples from the file at *path*. Optionally, limit triples to *predicates*."""
-        # get predicates
-        predicates = set(predicates) if predicates is not None else set(self.schema.predicates())
+            path: bsfs.URI,
+            principals: typing.Optional[typing.Iterable[bsfs.schema.Predicate]] = None,
+            ) -> typing.Iterator[typing.Tuple[node.Node, bsfs.schema.Predicate, typing.Any]]:
+        """Extract triples from the file at *path*. Optionally, limit triples to *principals*."""
+        # get principals
+        principals = set(principals) if principals is not None else set(self.schema.predicates())
 
         # get extractors
-        extractors = {ext for ext in self._ext2rdr if not set(ext.predicates()).isdisjoint(predicates)}
+        extractors = {ext for ext in self._ext2rdr if not set(ext.principals).isdisjoint(principals)}
 
         # corner-case short-cut
         if len(extractors) == 0:
@@ -110,8 +126,8 @@ class Pipeline():
                 for ext in extrs:
                     try:
                         # get predicate/value tuples
-                        for node, pred, value in ext.extract(subject, content, predicates):
-                            yield node, pred, value
+                        for subject, pred, value in ext.extract(subject, content, principals):
+                            yield subject, pred, value
 
                     except base.errors.ExtractorError as err:
                         # critical extractor failure.
diff --git a/test/lib/test_bsie.py b/test/lib/test_bsie.py
index 43e7b1d..f3f476e 100644
--- a/test/lib/test_bsie.py
+++ b/test/lib/test_bsie.py
@@ -11,8 +11,6 @@ import unittest
 # bsie imports
 from bsie.base import extractor
 from bsie.tools import builder
-from bsie.utils.bsfs import URI, schema
-from bsie.utils.node import Node
 from bsie.utils import bsfs, node, ns
 
 # objects to test
@@ -47,22 +45,12 @@ class TestBSIE(unittest.TestCase):
     def test_construction(self):
         # pipeline only
         lib = BSIE(self.pipeline)
-        self.assertSetEqual(lib.predicates, {
+        self.assertSetEqual(set(lib.principals), {
             ns.bse.filename,
             ns.bse.filesize,
             ns.bse.author,
             })
-        self.assertEqual(lib.schema, schema.Schema.from_string('''
-            prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#>
-            prefix xsd: <http://www.w3.org/2001/XMLSchema#>
-            prefix bsfs: <http://bsfs.ai/schema/>
-            prefix bse: <http://bsfs.ai/schema/Entity#>
-            # essential nodes
-            bsfs:Entity rdfs:subClassOf bsfs:Node .
-            # common definitions
-            xsd:string rdfs:subClassOf bsfs:Literal .
-            xsd:integer rdfs:subClassOf bsfs:Literal .
-
+        self.assertEqual(lib.schema, bsfs.schema.Schema.from_string(extractor.SCHEMA_PREAMBLE + '''
             bse:filename rdfs:subClassOf bsfs:Predicate ;
                 rdfs:domain bsfs:File ;
                 rdfs:range xsd:string ;
@@ -85,21 +73,11 @@ class TestBSIE(unittest.TestCase):
             ns.bse.author,
             ns.bse.inexistent,
             })
-        self.assertSetEqual(lib.predicates, {
+        self.assertSetEqual(set(lib.principals), {
             ns.bse.filesize,
             ns.bse.author,
             })
-        self.assertEqual(lib.schema, schema.Schema.from_string('''
-            prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#>
-            prefix xsd: <http://www.w3.org/2001/XMLSchema#>
-            prefix bsfs: <http://bsfs.ai/schema/>
-            prefix bse: <http://bsfs.ai/schema/Entity#>
-            # essential nodes
-            bsfs:Entity rdfs:subClassOf bsfs:Node .
-            # common definitions
-            xsd:string rdfs:subClassOf bsfs:Literal .
-            xsd:integer rdfs:subClassOf bsfs:Literal .
-
+        self.assertEqual(lib.schema, bsfs.schema.Schema.from_string(extractor.SCHEMA_PREAMBLE + '''
             bse:filesize rdfs:subClassOf bsfs:Predicate ;
                 rdfs:domain bsfs:File ;
                 rdfs:range xsd:integer;
@@ -109,26 +87,15 @@ class TestBSIE(unittest.TestCase):
                 rdfs:domain bsfs:Entity ;
                 rdfs:range xsd:string ;
                 bsfs:unique "true"^^xsd:boolean .
-
             '''))
         # empty collect is disregarded
         lib = BSIE(self.pipeline, collect={})
-        self.assertSetEqual(lib.predicates, {
+        self.assertSetEqual(set(lib.principals), {
             ns.bse.filename,
             ns.bse.filesize,
             ns.bse.author,
             })
-        self.assertEqual(lib.schema, schema.Schema.from_string('''
-            prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#>
-            prefix xsd: <http://www.w3.org/2001/XMLSchema#>
-            prefix bsfs: <http://bsfs.ai/schema/>
-            prefix bse: <http://bsfs.ai/schema/Entity#>
-            # essential nodes
-            bsfs:Entity rdfs:subClassOf bsfs:Node .
-            # common definitions
-            xsd:string rdfs:subClassOf bsfs:Literal .
-            xsd:integer rdfs:subClassOf bsfs:Literal .
-
+        self.assertEqual(lib.schema, bsfs.schema.Schema.from_string(extractor.SCHEMA_PREAMBLE + '''
             bse:filename rdfs:subClassOf bsfs:Predicate ;
                 rdfs:domain bsfs:File ;
                 rdfs:range xsd:string ;
@@ -152,24 +119,14 @@ class TestBSIE(unittest.TestCase):
             ns.bse.filename,
             ns.bse.inexistent,
             })
-        self.assertSetEqual(lib.predicates, {
+        self.assertSetEqual(set(lib.principals), {
             ns.bse.author,
             })
-        self.assertEqual(lib.schema, schema.Schema.from_string('''
-            prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#>
-            prefix xsd: <http://www.w3.org/2001/XMLSchema#>
-            prefix bsfs: <http://bsfs.ai/schema/>
-            prefix bse: <http://bsfs.ai/schema/Entity#>
-            # essential nodes
-            bsfs:Entity rdfs:subClassOf bsfs:Node .
-            # common definitions
-            xsd:string rdfs:subClassOf bsfs:Literal .
-
+        self.assertEqual(lib.schema, bsfs.schema.Schema.from_string(extractor.SCHEMA_PREAMBLE + '''
             bse:author rdfs:subClassOf bsfs:Predicate ;
                 rdfs:domain bsfs:Entity ;
                 rdfs:range xsd:string ;
                 bsfs:unique "true"^^xsd:boolean .
-
             '''))
 
         # specify collect and discard
@@ -177,19 +134,10 @@ class TestBSIE(unittest.TestCase):
             collect={ns.bse.filesize, ns.bse.author, ns.bse.foo, ns.bse.bar},
             discard={ns.bse.author, ns.bse.foo, ns.bse.foobar},
             )
-        self.assertSetEqual(lib.predicates, {
+        self.assertSetEqual(set(lib.principals), {
             ns.bse.filesize,
             })
-        self.assertEqual(lib.schema, schema.Schema.from_string('''
-            prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#>
-            prefix xsd: <http://www.w3.org/2001/XMLSchema#>
-            prefix bsfs: <http://bsfs.ai/schema/>
-            prefix bse: <http://bsfs.ai/schema/Entity#>
-            # essential nodes
-            bsfs:Entity rdfs:subClassOf bsfs:Node .
-            # common definitions
-            xsd:integer rdfs:subClassOf bsfs:Literal .
-
+        self.assertEqual(lib.schema, bsfs.schema.Schema.from_string(extractor.SCHEMA_PREAMBLE + '''
             bse:filesize rdfs:subClassOf bsfs:Predicate ;
                 rdfs:domain bsfs:File ;
                 rdfs:range xsd:integer;
@@ -201,7 +149,7 @@ class TestBSIE(unittest.TestCase):
     def test_from_file(self):
         # setup
         lib = BSIE(self.pipeline)
-        self.assertSetEqual(set(lib.predicates), {
+        self.assertSetEqual(set(lib.principals), {
             ns.bse.filesize,
             ns.bse.filename,
             ns.bse.author,
diff --git a/test/tools/test_pipeline.py b/test/tools/test_pipeline.py
index e440ab5..91bf736 100644
--- a/test/tools/test_pipeline.py
+++ b/test/tools/test_pipeline.py
@@ -75,7 +75,7 @@ class TestPipeline(unittest.TestCase):
 
         # equivalence respects schema
         p2 = Pipeline(self.prefix, self.ext2rdr)
-        p2.schema = pipeline.schema.Empty()
+        p2._schema = pipeline.schema.Empty()
         self.assertNotEqual(pipeline, p2)
         self.assertNotEqual(hash(pipeline), hash(p2))
 
@@ -160,8 +160,7 @@ class TestPipeline(unittest.TestCase):
         # build pipeline
         pipeline = Pipeline(self.prefix, self.ext2rdr)
         #
-        self.assertSetEqual(set(pipeline.predicates()), {
-            pipeline.schema.predicate(ns.bsfs.Predicate),
+        self.assertSetEqual(set(pipeline.principals), {
             pipeline.schema.predicate(ns.bse.filename),
             pipeline.schema.predicate(ns.bse.filesize),
             pipeline.schema.predicate(ns.bse.author),
-- 
cgit v1.2.3


From 3b41b2a4b7532c911b63b41066a75b3e1546d214 Mon Sep 17 00:00:00 2001
From: Matthias Baumgartner <dev@igsor.net>
Date: Thu, 15 Dec 2022 17:21:20 +0100
Subject: minor test improvements and information hiding in builder

---
 bsie/tools/builder.py   | 25 ++++++++++++-------------
 test/apps/test_index.py |  2 +-
 test/apps/test_info.py  | 13 +++++++------
 3 files changed, 20 insertions(+), 20 deletions(-)

diff --git a/bsie/tools/builder.py b/bsie/tools/builder.py
index 24aea84..190d9bf 100644
--- a/bsie/tools/builder.py
+++ b/bsie/tools/builder.py
@@ -12,7 +12,6 @@ import typing
 # bsie imports
 from bsie import base
 from bsie.base import errors
-from bsie.utils.bsfs import URI, typename
 from bsie.utils import bsfs
 
 # inner-module imports
@@ -75,20 +74,20 @@ class ReaderBuilder():
     """
 
     # keyword arguments
-    kwargs: typing.Dict[str, typing.Dict[str, typing.Any]]
+    _kwargs: typing.Dict[str, typing.Dict[str, typing.Any]]
 
     # cached readers
-    cache: typing.Dict[str, base.reader.Reader]
+    _cache: typing.Dict[str, base.Reader]
 
     def __init__(self, kwargs: typing.Dict[str, typing.Dict[str, typing.Any]]):
-        self.kwargs = kwargs
-        self.cache = {}
+        self._kwargs = kwargs
+        self._cache = {}
 
     def build(self, name: str) -> base.Reader:
         """Return an instance for the qualified class name."""
         # return cached instance
-        if name in self.cache:
-            return self.cache[name]
+        if name in self._cache:
+            return self._cache[name]
 
         # check name and get module/class components
         module_name, class_name = _unpack_name(name)
@@ -97,14 +96,14 @@ class ReaderBuilder():
         cls = _safe_load(module_name, class_name)
 
         # get kwargs
-        kwargs = self.kwargs.get(name, {})
+        kwargs = self._kwargs.get(name, {})
         if not isinstance(kwargs, dict):
             raise TypeError(f'expected a kwargs dict, found {bsfs.typename(kwargs)}')
 
         try: # build, cache, and return instance
             obj = cls(**kwargs)
             # cache instance
-            self.cache[name] = obj
+            self._cache[name] = obj
             # return instance
             return obj
 
@@ -125,19 +124,19 @@ class ExtractorBuilder():
     """
 
     # build specifications
-    specs: typing.List[typing.Dict[str, typing.Dict[str, typing.Any]]]
+    _specs: typing.List[typing.Dict[str, typing.Dict[str, typing.Any]]]
 
     def __init__(self, specs: typing.List[typing.Dict[str, typing.Dict[str, typing.Any]]]):
-        self.specs = specs
+        self._specs = specs
 
     def __iter__(self) -> typing.Iterator[int]:
         """Iterate over extractor specifications."""
-        return iter(range(len(self.specs)))
+        return iter(range(len(self._specs)))
 
     def build(self, index: int) -> base.Extractor:
         """Return an instance of the n'th extractor (n=*index*)."""
         # get build instructions
-        specs = self.specs[index]
+        specs = self._specs[index]
 
         # check specs structure. expecting[{name: {kwargs}}]
         if not isinstance(specs, dict):
diff --git a/test/apps/test_index.py b/test/apps/test_index.py
index c567dea..2be8470 100644
--- a/test/apps/test_index.py
+++ b/test/apps/test_index.py
@@ -5,10 +5,10 @@ A copy of the license is provided with the project.
 Author: Matthias Baumgartner, 2022
 """
 # imports
+import contextlib
 import io
 import os
 import rdflib
-import sys
 import unittest
 
 # bsie imports
diff --git a/test/apps/test_info.py b/test/apps/test_info.py
index 60a540e..ad39c64 100644
--- a/test/apps/test_info.py
+++ b/test/apps/test_info.py
@@ -6,9 +6,9 @@ Author: Matthias Baumgartner, 2022
 """
 # imports
 import argparse
+import contextlib
 import io
 import os
-import sys
 import unittest
 
 # objects to test
@@ -19,10 +19,10 @@ from bsie.apps.info import main
 
 class TestIndex(unittest.TestCase):
     def test_predicates(self):
-        stdout, sys.stdout = sys.stdout, io.StringIO()
-        # show predicates infos
-        main(['predicates'])
-        outbuf, sys.stdout = sys.stdout, stdout
+        outbuf = io.StringIO()
+        with contextlib.redirect_stdout(outbuf):
+            # show predicates infos
+            main(['predicates'])
         # verify output
         self.assertSetEqual({pred for pred in outbuf.getvalue().split('\n') if pred != ''}, {
             'http://bsfs.ai/schema/Entity#author',
@@ -32,7 +32,8 @@ class TestIndex(unittest.TestCase):
             })
 
     def test_invalid(self):
-        self.assertRaises(SystemExit, main, ['foobar'])
+        with contextlib.redirect_stderr(io.StringIO()):
+            self.assertRaises(SystemExit, main, ['foobar'])
 
 ## main ##
 
-- 
cgit v1.2.3


From 22896f662ed49dd9fa283af2b3dad9e4ec6dd340 Mon Sep 17 00:00:00 2001
From: Matthias Baumgartner <dev@igsor.net>
Date: Fri, 16 Dec 2022 10:12:25 +0100
Subject: setup files

---
 bsie.toml | 11 +++++++++++
 setup.py  | 20 ++++++++++++++++++++
 2 files changed, 31 insertions(+)
 create mode 100644 bsie.toml
 create mode 100644 setup.py

diff --git a/bsie.toml b/bsie.toml
new file mode 100644
index 0000000..10b0f37
--- /dev/null
+++ b/bsie.toml
@@ -0,0 +1,11 @@
+[project]
+name = "bsie"
+description = "Extract information from files and store them in a BSFS."
+version = "0.0.1"
+license = {text = "BSD 3-Clause License"}
+authors = [{name='Matthias Baumgartner', email="dev@igsor.net"}]
+dependencies = [
+  "rdflib",
+  "bsfs",
+]
+requires-python = ">=3.7"
diff --git a/setup.py b/setup.py
new file mode 100644
index 0000000..ee9e0fd
--- /dev/null
+++ b/setup.py
@@ -0,0 +1,20 @@
+
+from setuptools import setup
+import os
+
+setup(
+    name='bsie',
+    version='0.0.1',
+    author='Matthias Baumgartner',
+    author_email='dev@igsor.net',
+    description='Extract information from files and store them in a BSFS.',
+    long_description=open(os.path.join(os.path.dirname(__file__), 'README')).read(),
+    license='BSD',
+    license_files=('LICENSE', ),
+    url='https://www.igsor.net/projects/blackstar/bsie/',
+    download_url='https://pip.igsor.net',
+    packages=('bsie', ),
+    install_requires=('rdflib', 'bsfs'),
+    python_requires=">=3.7",
+)
+
-- 
cgit v1.2.3


From 5850ff2bcb1052883cf301590126609b0657fbc9 Mon Sep 17 00:00:00 2001
From: Matthias Baumgartner <dev@igsor.net>
Date: Sun, 18 Dec 2022 13:37:02 +0100
Subject: cosmetic changes

---
 bsie/__init__.py                   |  2 +-
 bsie/apps/index.py                 | 28 +++++++++++-----------------
 bsie/extractor/generic/constant.py |  2 +-
 bsie/extractor/generic/path.py     |  3 ++-
 bsie/extractor/generic/stat.py     |  2 +-
 test/apps/test_info.py             |  1 -
 6 files changed, 16 insertions(+), 22 deletions(-)

diff --git a/bsie/__init__.py b/bsie/__init__.py
index 96e6953..8d2308c 100644
--- a/bsie/__init__.py
+++ b/bsie/__init__.py
@@ -9,7 +9,7 @@ import collections
 import typing
 
 # constants
-T_VERSION_INFO = collections.namedtuple('T_VERSION_INFO', ('major', 'minor', 'micro'))
+T_VERSION_INFO = collections.namedtuple('T_VERSION_INFO', ('major', 'minor', 'micro')) # pylint: disable=invalid-name
 version_info = T_VERSION_INFO(0, 0, 1)
 
 # exports
diff --git a/bsie/apps/index.py b/bsie/apps/index.py
index e37684b..1dbfdd8 100644
--- a/bsie/apps/index.py
+++ b/bsie/apps/index.py
@@ -98,23 +98,17 @@ def main(argv):
         walk(print)
         return None
 
-    else:
-        # initialize bsfs
-        # NOTE: With presistent storages, the schema migration will be a seaparte operation.
-        # Here, we'd simply examine the schema and potentially discard more predicates.
-        store = bsfs.Open({
-            'Graph': {
-                'user': args.user,
-                'backend': {
-                    'SparqlStore': {}},
-                }})
-        store.migrate(bsie.schema)
-        # process files
-        def handle(node, pred, value):
-            store.node(node.node_type, node.uri).set(pred.uri, value)
-        walk(handle)
-        # return store
-        return store
+    # initialize bsfs
+    # NOTE: With presistent storages, the schema migration will be a seaparte operation.
+    # Here, we'd simply examine the schema and potentially discard more predicates.
+    store = bsfs.Open(bsfs.init_sparql_store(args.user))
+    store.migrate(bsie.schema)
+    # process files
+    def handle(node, pred, value):
+        store.node(node.node_type, node.uri).set(pred.uri, value)
+    walk(handle)
+    # return store
+    return store
 
 
diff --git a/bsie/extractor/generic/constant.py b/bsie/extractor/generic/constant.py
index cdb2ef6..11384e6 100644
--- a/bsie/extractor/generic/constant.py
+++ b/bsie/extractor/generic/constant.py
@@ -35,7 +35,7 @@ class Constant(extractor.Extractor):
         super().__init__(bsfs.schema.Schema.from_string(extractor.SCHEMA_PREAMBLE + schema))
         # NOTE: Raises a KeyError if the predicate is not part of the schema
         self._tuples = tuple((self.schema.predicate(p_uri), value) for p_uri, value in tuples)
-        # FIXME: use schema instance for value checking
+        # TODO: use schema instance for value checking
 
     def __eq__(self, other: typing.Any) -> bool:
         return super().__eq__(other) \
diff --git a/bsie/extractor/generic/path.py b/bsie/extractor/generic/path.py
index 23ae80b..7018e12 100644
--- a/bsie/extractor/generic/path.py
+++ b/bsie/extractor/generic/path.py
@@ -62,7 +62,8 @@ class Path(extractor.Extractor):
     def __filename(self, path: str) -> typing.Optional[str]:
         try:
             return os.path.basename(path)
-        except Exception: # some error, skip.
+        except Exception: # pylint: disable=broad-except # we explicitly want to catch everything
+            # some error, skip
             # FIXME: some kind of error reporting (e.g. logging)?
             # Options: (a) Fail silently (current); (b) Skip and report to log;
             # (c) Raise ExtractorError (aborts extraction); (d) separate content type
diff --git a/bsie/extractor/generic/stat.py b/bsie/extractor/generic/stat.py
index 1dcfedf..0b9ce29 100644
--- a/bsie/extractor/generic/stat.py
+++ b/bsie/extractor/generic/stat.py
@@ -63,7 +63,7 @@ class Stat(extractor.Extractor):
         """Return the file size."""
         try:
             return content.st_size
-        except Exception:
+        except Exception: # pylint: disable=broad-except # we explicitly want to catch everything
             # FIXME: some kind of error reporting (e.g. logging)
             return None
 
diff --git a/test/apps/test_info.py b/test/apps/test_info.py
index ad39c64..6f4d98f 100644
--- a/test/apps/test_info.py
+++ b/test/apps/test_info.py
@@ -8,7 +8,6 @@ Author: Matthias Baumgartner, 2022
 import argparse
 import contextlib
 import io
-import os
 import unittest
 
 # objects to test
-- 
cgit v1.2.3


From 057e09d6537bf5c39815661a75819081e3e5fda7 Mon Sep 17 00:00:00 2001
From: Matthias Baumgartner <dev@igsor.net>
Date: Sun, 18 Dec 2022 13:37:59 +0100
Subject: adaptions to updates in bsfs

---
 bsie/tools/pipeline.py      | 7 +++++--
 bsie/utils/bsfs.py          | 2 ++
 bsie/utils/namespaces.py    | 8 ++++----
 test/apps/test_index.py     | 4 ++--
 test/lib/test_bsie.py       | 4 ++--
 test/tools/test_pipeline.py | 2 +-
 6 files changed, 16 insertions(+), 11 deletions(-)

diff --git a/bsie/tools/pipeline.py b/bsie/tools/pipeline.py
index 52ce526..20e8ddf 100644
--- a/bsie/tools/pipeline.py
+++ b/bsie/tools/pipeline.py
@@ -18,6 +18,9 @@ __all__: typing.Sequence[str] = (
     'Pipeline',
     )
 
+# constants
+FILE_PREFIX = 'file#'
+
 ## code ##
 
 logger = logging.getLogger(__name__)
@@ -48,7 +51,7 @@ class Pipeline():
             ext2rdr: typing.Dict[base.extractor.Extractor, typing.Optional[base.reader.Reader]]
             ):
         # store core members
-        self._prefix = prefix
+        self._prefix = prefix + FILE_PREFIX
         self._ext2rdr = ext2rdr
         # compile schema from all extractors
         self._schema = bsfs.schema.Schema.Union(ext.schema for ext in ext2rdr)
@@ -114,7 +117,7 @@ class Pipeline():
 
         # create subject for file
         uuid = bsfs.uuid.UCID.from_path(path)
-        subject = node.Node(ns.bsfs.File, self._prefix + 'file#' + uuid)
+        subject = node.Node(ns.bsfs.File, self._prefix[uuid])
 
         # extract information
         for rdr, extrs in rdr2ext.items():
diff --git a/bsie/utils/bsfs.py b/bsie/utils/bsfs.py
index c48049d..0b88479 100644
--- a/bsie/utils/bsfs.py
+++ b/bsie/utils/bsfs.py
@@ -9,6 +9,7 @@ import typing
 
 # bsfs imports
 from bsfs import Open, schema
+from bsfs.apps.init import init_sparql_store
 from bsfs.namespace import Namespace
 from bsfs.utils import URI, typename, uuid
 
@@ -17,6 +18,7 @@ __all__: typing.Sequence[str] = (
     'Namespace',
     'Open',
     'URI',
+    'init_sparql_store',
     'schema',
     'typename',
     'uuid',
diff --git a/bsie/utils/namespaces.py b/bsie/utils/namespaces.py
index d6e1c72..a29fc1b 100644
--- a/bsie/utils/namespaces.py
+++ b/bsie/utils/namespaces.py
@@ -11,10 +11,10 @@ import typing
 from . import bsfs as _bsfs
 
 # constants
-bse = _bsfs.Namespace('http://bsfs.ai/schema/Entity#')
-bsfs = _bsfs.Namespace('http://bsfs.ai/schema/')
-bsm = _bsfs.Namespace('http://bsfs.ai/schema/Meta#')
-xsd = _bsfs.Namespace('http://www.w3.org/2001/XMLSchema#')
+bse = _bsfs.Namespace('http://bsfs.ai/schema/Entity')
+bsfs = _bsfs.Namespace('http://bsfs.ai/schema', fsep='/')
+bsm = _bsfs.Namespace('http://bsfs.ai/schema/Meta')
+xsd = _bsfs.Namespace('http://www.w3.org/2001/XMLSchema')
 
 # export
 __all__: typing.Sequence[str] = (
diff --git a/test/apps/test_index.py b/test/apps/test_index.py
index 2be8470..9cdc656 100644
--- a/test/apps/test_index.py
+++ b/test/apps/test_index.py
@@ -30,7 +30,7 @@ class TestIndex(unittest.TestCase):
             ])
 
         prefix = 'http://example.com/me/file#'
-        self.assertTrue(set(bsfs._Graph__backend.graph).issuperset({
+        self.assertTrue(set(bsfs._backend._graph).issuperset({
             (rdflib.URIRef(prefix + '2f4109b40107cc50e0884755a1a961ed126887e49b8dbaf0e146b2e226aa6647'), rdflib.RDF.type, rdflib.URIRef(ns.bsfs.File)),
             (rdflib.URIRef(prefix + '2f4109b40107cc50e0884755a1a961ed126887e49b8dbaf0e146b2e226aa6647'), rdflib.URIRef(ns.bse.author), rdflib.Literal('Me, myself, and I', datatype=rdflib.XSD.string)),
             (rdflib.URIRef(prefix + '2f4109b40107cc50e0884755a1a961ed126887e49b8dbaf0e146b2e226aa6647'), rdflib.URIRef(ns.bse.filename), rdflib.Literal('alpha_second', datatype=rdflib.XSD.string)),
@@ -90,7 +90,7 @@ class TestIndex(unittest.TestCase):
         #   (rdflib.URIRef(prefix + 'd43758ace82154a1cc10ca0dfef63cb20dd831f9c87edd6dc06539eefe67371d'), rdflib.URIRef(ns.bsm.t_created), rdflib.Literal('1670..........', datatype=rdflib.XSD.integer)),
         #   (rdflib.URIRef(prefix + 'd803187cbf3676ae9d38126270a6152c60431589aa3bb3824baf8954e9c097f1'), rdflib.URIRef(ns.bsm.t_created), rdflib.Literal('1670..........', datatype=rdflib.XSD.integer)),
         # instead, we simply check if there's such a predicate for each file
-        self.assertSetEqual({sub for sub, _ in bsfs._Graph__backend.graph.subject_objects(rdflib.URIRef(ns.bsm.t_created))}, {
+        self.assertSetEqual({sub for sub, _ in bsfs._backend._graph.subject_objects(rdflib.URIRef(ns.bsm.t_created))}, {
             rdflib.URIRef(prefix + '2f4109b40107cc50e0884755a1a961ed126887e49b8dbaf0e146b2e226aa6647'),
             rdflib.URIRef(prefix + '441f3d10c8ff489fe8e33e639606512f6c463151cc429de7e554b9af670c2ece'),
             rdflib.URIRef(prefix + '69b98ecf7aff3e95b09688ba93331678eb8397817111f674c9558e6dd8f5e871'),
diff --git a/test/lib/test_bsie.py b/test/lib/test_bsie.py
index f3f476e..771a0c2 100644
--- a/test/lib/test_bsie.py
+++ b/test/lib/test_bsie.py
@@ -38,7 +38,7 @@ class TestBSIE(unittest.TestCase):
                 )},
             ])
         # build pipeline
-        self.prefix = bsfs.Namespace('http://example.com/local/file#')
+        self.prefix = bsfs.Namespace('http://example.com/local/')
         pbuild = builder.PipelineBuilder(self.prefix, rbuild, ebuild)
         self.pipeline = pbuild.build()
 
@@ -155,7 +155,7 @@ class TestBSIE(unittest.TestCase):
             ns.bse.author,
             })
         content_hash = 'a948904f2f0f479b8f8197694b30184b0d2ed1c1cd2a1ec0fb85d299a192a447'
-        subject = node.Node(ns.bsfs.File, self.prefix + 'file#' + content_hash)
+        subject = node.Node(ns.bsfs.File, (self.prefix + 'file#')[content_hash])
         testfile = os.path.join(os.path.dirname(__file__), 'testfile.t')
 
         # from_file extracts all available triples
diff --git a/test/tools/test_pipeline.py b/test/tools/test_pipeline.py
index 91bf736..a116a30 100644
--- a/test/tools/test_pipeline.py
+++ b/test/tools/test_pipeline.py
@@ -94,7 +94,7 @@ class TestPipeline(unittest.TestCase):
         pipeline = Pipeline(self.prefix, self.ext2rdr)
         # build objects for tests
         content_hash = 'a948904f2f0f479b8f8197694b30184b0d2ed1c1cd2a1ec0fb85d299a192a447'
-        subject = node.Node(ns.bsfs.File, self.prefix + 'file#' + content_hash)
+        subject = node.Node(ns.bsfs.File, (self.prefix + 'file#')[content_hash])
         testfile = os.path.join(os.path.dirname(__file__), 'testfile.t')
         p_filename = pipeline.schema.predicate(ns.bse.filename)
         p_filesize = pipeline.schema.predicate(ns.bse.filesize)
-- 
cgit v1.2.3