aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorMatthias Baumgartner <dev@igsor.net>2022-12-24 10:27:09 +0100
committerMatthias Baumgartner <dev@igsor.net>2022-12-24 10:27:09 +0100
commit266c2c9a072bf3289fd7f2d75278b7d59528378c (patch)
tree60760e0fec84d5cd7b3f3efef11e3892df5cc85a
parented2074ae88f2db6cb6b38716b43b35e29eb2e16c (diff)
downloadbsie-266c2c9a072bf3289fd7f2d75278b7d59528378c.tar.gz
bsie-266c2c9a072bf3289fd7f2d75278b7d59528378c.tar.bz2
bsie-266c2c9a072bf3289fd7f2d75278b7d59528378c.zip
package restructuring: base
* Reader and Extractor to respective reader/extractor modules * ReaderBuilder to reader module * ExtractorBuilder to extractor module * Loading module in utils (safe_load, unpack_name) * Pipeline and PipelineBuilder to lib module * errors to utils * documentation: "standard import" and "external import"
-rw-r--r--bsie/apps/index.py16
-rw-r--r--bsie/apps/info.py16
-rw-r--r--bsie/base/__init__.py24
-rw-r--r--bsie/extractor/__init__.py11
-rw-r--r--bsie/extractor/base.py (renamed from bsie/base/extractor.py)0
-rw-r--r--bsie/extractor/builder.py77
-rw-r--r--bsie/extractor/generic/constant.py10
-rw-r--r--bsie/extractor/generic/path.py8
-rw-r--r--bsie/extractor/generic/stat.py10
-rw-r--r--bsie/lib/__init__.py4
-rw-r--r--bsie/lib/bsie.py6
-rw-r--r--bsie/lib/builder.py85
-rw-r--r--bsie/lib/pipeline.py (renamed from bsie/tools/pipeline.py)15
-rw-r--r--bsie/reader/__init__.py13
-rw-r--r--bsie/reader/base.py (renamed from bsie/base/reader.py)0
-rw-r--r--bsie/reader/builder.py74
-rw-r--r--bsie/reader/path.py8
-rw-r--r--bsie/reader/stat.py9
-rw-r--r--bsie/tools/__init__.py20
-rw-r--r--bsie/tools/builder.py226
-rw-r--r--bsie/utils/__init__.py9
-rw-r--r--bsie/utils/errors.py (renamed from bsie/base/errors.py)0
-rw-r--r--bsie/utils/filematcher/parser.py6
-rw-r--r--bsie/utils/loading.py54
-rw-r--r--setup.py2
-rw-r--r--test/base/__init__.py0
-rw-r--r--test/extractor/generic/test_path.py6
-rw-r--r--test/extractor/generic/test_stat.py6
-rw-r--r--test/extractor/test_base.py (renamed from test/base/test_extractor.py)8
-rw-r--r--test/extractor/test_builder.py103
-rw-r--r--test/lib/test_bsie.py24
-rw-r--r--test/lib/test_builder.py107
-rw-r--r--test/lib/test_pipeline.py (renamed from test/tools/test_pipeline.py)11
-rw-r--r--test/reader/test_base.py (renamed from test/base/test_reader.py)6
-rw-r--r--test/reader/test_builder.py54
-rw-r--r--test/reader/test_stat.py4
-rw-r--r--test/tools/__init__.py0
-rw-r--r--test/tools/test_builder.py246
-rw-r--r--test/tools/testfile.t1
-rw-r--r--test/utils/filematcher/test_parser.py6
-rw-r--r--test/utils/test_loading.py48
41 files changed, 727 insertions, 606 deletions
diff --git a/bsie/apps/index.py b/bsie/apps/index.py
index 1dbfdd8..0c6296f 100644
--- a/bsie/apps/index.py
+++ b/bsie/apps/index.py
@@ -4,16 +4,16 @@ Part of the bsie module.
A copy of the license is provided with the project.
Author: Matthias Baumgartner, 2022
"""
-# imports
+# standard imports
import argparse
import os
import typing
# bsie imports
-from bsie.base import errors
-from bsie.lib import BSIE
-from bsie.tools import builder
-from bsie.utils import bsfs
+from bsie.extractor import ExtractorBuilder
+from bsie.lib import BSIE, PipelineBuilder
+from bsie.reader import ReaderBuilder
+from bsie.utils import bsfs, errors
# exports
__all__: typing.Sequence[str] = (
@@ -44,9 +44,9 @@ def main(argv):
# FIXME: Read reader/extractor configs from a config file
# reader builder
- rbuild = builder.ReaderBuilder({})
+ rbuild = ReaderBuilder({})
# extractor builder
- ebuild = builder.ExtractorBuilder([
+ ebuild = ExtractorBuilder([
{'bsie.extractor.generic.path.Path': {}},
{'bsie.extractor.generic.stat.Stat': {}},
{'bsie.extractor.generic.constant.Constant': dict(
@@ -60,7 +60,7 @@ def main(argv):
)},
])
# pipeline builder
- pbuild = builder.PipelineBuilder(
+ pbuild = PipelineBuilder(
bsfs.Namespace(args.user + ('/' if not args.user.endswith('/') else '')),
rbuild,
ebuild,
diff --git a/bsie/apps/info.py b/bsie/apps/info.py
index eaf1f71..a4e611c 100644
--- a/bsie/apps/info.py
+++ b/bsie/apps/info.py
@@ -4,15 +4,16 @@ Part of the bsie module.
A copy of the license is provided with the project.
Author: Matthias Baumgartner, 2022
"""
-# imports
+# standard imports
import argparse
import sys
import typing
# bsie imports
-from bsie.base import errors
-from bsie.tools import builder
-from bsie.utils import bsfs
+from bsie.extractor import ExtractorBuilder
+from bsie.lib import PipelineBuilder
+from bsie.reader import ReaderBuilder
+from bsie.utils import bsfs, errors
# exports
__all__: typing.Sequence[str] = (
@@ -31,9 +32,10 @@ def main(argv):
# FIXME: Read reader/extractor configs from a config file
# reader builder
- rbuild = builder.ReaderBuilder({})
+ rbuild = ReaderBuilder({
+ })
# extractor builder
- ebuild = builder.ExtractorBuilder([
+ ebuild = ExtractorBuilder([
{'bsie.extractor.generic.path.Path': {}},
{'bsie.extractor.generic.stat.Stat': {}},
{'bsie.extractor.generic.constant.Constant': dict(
@@ -47,7 +49,7 @@ def main(argv):
)},
])
# pipeline builder
- pbuild = builder.PipelineBuilder(
+ pbuild = PipelineBuilder(
bsfs.Namespace('http://example.com/me/'), # not actually used
rbuild,
ebuild,
diff --git a/bsie/base/__init__.py b/bsie/base/__init__.py
deleted file mode 100644
index 0d362cd..0000000
--- a/bsie/base/__init__.py
+++ /dev/null
@@ -1,24 +0,0 @@
-"""The base module defines the BSIE interfaces.
-
-You'll mostly find abstract classes here.
-
-Part of the bsie module.
-A copy of the license is provided with the project.
-Author: Matthias Baumgartner, 2022
-"""
-# imports
-import typing
-
-# inner-module imports
-from . import errors
-from .extractor import Extractor
-from .reader import Reader
-
-# exports
-__all__: typing.Sequence[str] = (
- 'Extractor',
- 'Reader',
- 'errors',
- )
-
-## EOF ##
diff --git a/bsie/extractor/__init__.py b/bsie/extractor/__init__.py
index ef31343..5f385ee 100644
--- a/bsie/extractor/__init__.py
+++ b/bsie/extractor/__init__.py
@@ -6,10 +6,17 @@ Part of the bsie module.
A copy of the license is provided with the project.
Author: Matthias Baumgartner, 2022
"""
-# imports
+# standard imports
import typing
+# inner-module imports
+from .base import Extractor
+from .builder import ExtractorBuilder
+
# exports
-__all__: typing.Sequence[str] = []
+__all__: typing.Sequence[str] = (
+ 'Extractor',
+ 'ExtractorBuilder',
+ )
## EOF ##
diff --git a/bsie/base/extractor.py b/bsie/extractor/base.py
index c44021b..c44021b 100644
--- a/bsie/base/extractor.py
+++ b/bsie/extractor/base.py
diff --git a/bsie/extractor/builder.py b/bsie/extractor/builder.py
new file mode 100644
index 0000000..0fd3685
--- /dev/null
+++ b/bsie/extractor/builder.py
@@ -0,0 +1,77 @@
+"""
+
+Part of the bsie module.
+A copy of the license is provided with the project.
+Author: Matthias Baumgartner, 2022
+"""
+# standard imports
+import typing
+
+# bsie imports
+from bsie.utils import bsfs, errors, safe_load, unpack_qualified_name
+
+# inner-module imports
+from . import base
+
+# exports
+__all__: typing.Sequence[str] = (
+ 'ExtractorBuilder',
+ )
+
+
+## code ##
+
+class ExtractorBuilder():
+ """Build `bsie.base.Extractor instances.
+
+ It is permissible to build multiple instances of the same extractor
+ (typically with different arguments), hence the ExtractorBuilder
+ receives a list of build specifications. Each specification is
+ a dict with a single key (extractor's qualified name) and a dict
+ to be used as keyword arguments.
+ Example: [{'bsie.extractor.generic.path.Path': {}}, ]
+
+ """
+
+ # build specifications
+ _specs: typing.List[typing.Dict[str, typing.Dict[str, typing.Any]]]
+
+ def __init__(self, specs: typing.List[typing.Dict[str, typing.Dict[str, typing.Any]]]):
+ self._specs = specs
+
+ def __iter__(self) -> typing.Iterator[int]:
+ """Iterate over extractor specifications."""
+ return iter(range(len(self._specs)))
+
+ def build(self, index: int) -> base.Extractor:
+ """Return an instance of the n'th extractor (n=*index*)."""
+ # get build instructions
+ specs = self._specs[index]
+
+ # check specs structure. expecting[{name: {kwargs}}]
+ if not isinstance(specs, dict):
+ raise TypeError(f'expected a dict, found {bsfs.typename(specs)}')
+ if len(specs) != 1:
+ raise TypeError(f'expected a dict of length one, found {len(specs)}')
+
+ # get name and args from specs
+ name = next(iter(specs.keys()))
+ kwargs = specs[name]
+
+ # check kwargs structure
+ if not isinstance(kwargs, dict):
+ raise TypeError(f'expected a dict, found {bsfs.typename(kwargs)}')
+
+ # check name and get module/class components
+ module_name, class_name = unpack_qualified_name(name)
+
+ # import extractor class
+ cls = safe_load(module_name, class_name)
+
+ try: # build and return instance
+ return cls(**kwargs)
+
+ except Exception as err:
+ raise errors.BuilderError(f'failed to build extractor {name} due to {bsfs.typename(err)}: {err}') from err
+
+## EOF ##
diff --git a/bsie/extractor/generic/constant.py b/bsie/extractor/generic/constant.py
index 11384e6..7b1d942 100644
--- a/bsie/extractor/generic/constant.py
+++ b/bsie/extractor/generic/constant.py
@@ -4,13 +4,15 @@ Part of the bsie module.
A copy of the license is provided with the project.
Author: Matthias Baumgartner, 2022
"""
-# imports
+# standard imports
import typing
# bsie imports
-from bsie.base import extractor
from bsie.utils import bsfs, node
+# inner-module imports
+from .. import base
+
# exports
__all__: typing.Sequence[str] = (
'Constant',
@@ -19,7 +21,7 @@ __all__: typing.Sequence[str] = (
## code ##
-class Constant(extractor.Extractor):
+class Constant(base.Extractor):
"""Extract information from file's path."""
CONTENT_READER = None
@@ -32,7 +34,7 @@ class Constant(extractor.Extractor):
schema: str,
tuples: typing.Iterable[typing.Tuple[bsfs.URI, typing.Any]],
):
- super().__init__(bsfs.schema.Schema.from_string(extractor.SCHEMA_PREAMBLE + schema))
+ super().__init__(bsfs.schema.Schema.from_string(base.SCHEMA_PREAMBLE + schema))
# NOTE: Raises a KeyError if the predicate is not part of the schema
self._tuples = tuple((self.schema.predicate(p_uri), value) for p_uri, value in tuples)
# TODO: use schema instance for value checking
diff --git a/bsie/extractor/generic/path.py b/bsie/extractor/generic/path.py
index 7018e12..295715f 100644
--- a/bsie/extractor/generic/path.py
+++ b/bsie/extractor/generic/path.py
@@ -4,12 +4,12 @@ Part of the bsie module.
A copy of the license is provided with the project.
Author: Matthias Baumgartner, 2022
"""
-# imports
+# standard imports
import os
import typing
# bsie imports
-from bsie.base import extractor
+from bsie.extractor import base
from bsie.utils import bsfs, node, ns
# exports
@@ -20,7 +20,7 @@ __all__: typing.Sequence[str] = (
## code ##
-class Path(extractor.Extractor):
+class Path(base.Extractor):
"""Extract information from file's path."""
CONTENT_READER = 'bsie.reader.path.Path'
@@ -29,7 +29,7 @@ class Path(extractor.Extractor):
_callmap: typing.Dict[bsfs.schema.Predicate, typing.Callable[[str], typing.Any]]
def __init__(self):
- super().__init__(bsfs.schema.Schema.from_string(extractor.SCHEMA_PREAMBLE + '''
+ super().__init__(bsfs.schema.Schema.from_string(base.SCHEMA_PREAMBLE + '''
bse:filename rdfs:subClassOf bsfs:Predicate ;
rdfs:domain bsfs:File ;
rdfs:range xsd:string ;
diff --git a/bsie/extractor/generic/stat.py b/bsie/extractor/generic/stat.py
index 0b9ce29..1381fe2 100644
--- a/bsie/extractor/generic/stat.py
+++ b/bsie/extractor/generic/stat.py
@@ -4,14 +4,16 @@ Part of the bsie module.
A copy of the license is provided with the project.
Author: Matthias Baumgartner, 2022
"""
-# imports
+# standard imports
import os
import typing
# bsie imports
-from bsie.base import extractor
from bsie.utils import bsfs, node, ns
+# inner-module imports
+from .. import base
+
# exports
__all__: typing.Sequence[str] = (
'Stat',
@@ -20,7 +22,7 @@ __all__: typing.Sequence[str] = (
## code ##
-class Stat(extractor.Extractor):
+class Stat(base.Extractor):
"""Extract information from the file system."""
CONTENT_READER = 'bsie.reader.stat.Stat'
@@ -29,7 +31,7 @@ class Stat(extractor.Extractor):
_callmap: typing.Dict[bsfs.schema.Predicate, typing.Callable[[os.stat_result], typing.Any]]
def __init__(self):
- super().__init__(bsfs.schema.Schema.from_string(extractor.SCHEMA_PREAMBLE + '''
+ super().__init__(bsfs.schema.Schema.from_string(base.SCHEMA_PREAMBLE + '''
bse:filesize rdfs:subClassOf bsfs:Predicate ;
rdfs:domain bsfs:File ;
rdfs:range xsd:integer ;
diff --git a/bsie/lib/__init__.py b/bsie/lib/__init__.py
index 578c2c4..4239d3b 100644
--- a/bsie/lib/__init__.py
+++ b/bsie/lib/__init__.py
@@ -4,15 +4,17 @@ Part of the bsie module.
A copy of the license is provided with the project.
Author: Matthias Baumgartner, 2022
"""
-# imports
+# standard imports
import typing
# inner-module imports
from .bsie import BSIE
+from .builder import PipelineBuilder
# exports
__all__: typing.Sequence[str] = (
'BSIE',
+ 'PipelineBuilder',
)
## EOF ##
diff --git a/bsie/lib/bsie.py b/bsie/lib/bsie.py
index e087fa9..668783d 100644
--- a/bsie/lib/bsie.py
+++ b/bsie/lib/bsie.py
@@ -4,13 +4,15 @@ Part of the bsie module.
A copy of the license is provided with the project.
Author: Matthias Baumgartner, 2022
"""
-# imports
+# standard imports
import typing
# bsie imports
-from bsie.tools import Pipeline
from bsie.utils import bsfs, node, ns
+# inner-module imports
+from .pipeline import Pipeline
+
# exports
__all__: typing.Sequence[str] = (
'BSIE',
diff --git a/bsie/lib/builder.py b/bsie/lib/builder.py
new file mode 100644
index 0000000..c2abffe
--- /dev/null
+++ b/bsie/lib/builder.py
@@ -0,0 +1,85 @@
+"""
+
+Part of the bsie module.
+A copy of the license is provided with the project.
+Author: Matthias Baumgartner, 2022
+"""
+# standard imports
+import logging
+import typing
+
+# bsie imports
+from bsie.extractor import ExtractorBuilder
+from bsie.reader import ReaderBuilder
+from bsie.utils import bsfs, errors
+
+# inner-module imports
+from . import pipeline
+
+# exports
+__all__: typing.Sequence[str] = (
+ 'PipelineBuilder',
+ )
+
+
+## code ##
+
+logger = logging.getLogger(__name__)
+
+class PipelineBuilder():
+ """Build `bsie.tools.pipeline.Pipeline` instances."""
+
+ # Prefix to be used in the Pipeline.
+ prefix: bsfs.Namespace
+
+ # builder for Readers.
+ rbuild: ReaderBuilder
+
+ # builder for Extractors.
+ ebuild: ExtractorBuilder
+
+ def __init__(
+ self,
+ prefix: bsfs.Namespace,
+ reader_builder: ReaderBuilder,
+ extractor_builder: ExtractorBuilder,
+ ):
+ self.prefix = prefix
+ self.rbuild = reader_builder
+ self.ebuild = extractor_builder
+
+ def build(self) -> pipeline.Pipeline:
+ """Return a Pipeline instance."""
+ ext2rdr = {}
+
+ for eidx in self.ebuild:
+ # build extractor
+ try:
+ ext = self.ebuild.build(eidx)
+
+ except errors.LoaderError as err: # failed to load extractor; skip
+ logger.error('failed to load extractor: %s', err)
+ continue
+
+ except errors.BuilderError as err: # failed to build instance; skip
+ logger.error(str(err))
+ continue
+
+ try:
+ # get reader required by extractor
+ if ext.CONTENT_READER is not None:
+ rdr = self.rbuild.build(ext.CONTENT_READER)
+ else:
+ rdr = None
+ # store extractor
+ ext2rdr[ext] = rdr
+
+ except errors.LoaderError as err: # failed to load reader
+ logger.error('failed to load reader: %s', err)
+
+ except errors.BuilderError as err: # failed to build reader
+ logger.error(str(err))
+
+ return pipeline.Pipeline(self.prefix, ext2rdr)
+
+## EOF ##
diff --git a/bsie/tools/pipeline.py b/bsie/lib/pipeline.py
index 20e8ddf..e5ce1b7 100644
--- a/bsie/tools/pipeline.py
+++ b/bsie/lib/pipeline.py
@@ -4,14 +4,15 @@ Part of the bsie module.
A copy of the license is provided with the project.
Author: Matthias Baumgartner, 2022
"""
-# imports
+# standard imports
from collections import defaultdict
import logging
import typing
# bsie imports
-from bsie import base
-from bsie.utils import bsfs, node, ns
+from bsie.extractor import Extractor
+from bsie.reader import Reader
+from bsie.utils import bsfs, errors, node, ns
# exports
__all__: typing.Sequence[str] = (
@@ -43,12 +44,12 @@ class Pipeline():
_prefix: bsfs.Namespace
# extractor -> reader mapping
- _ext2rdr: typing.Dict[base.extractor.Extractor, typing.Optional[base.reader.Reader]]
+ _ext2rdr: typing.Dict[Extractor, typing.Optional[Reader]]
def __init__(
self,
prefix: bsfs.Namespace,
- ext2rdr: typing.Dict[base.extractor.Extractor, typing.Optional[base.reader.Reader]]
+ ext2rdr: typing.Dict[Extractor, typing.Optional[Reader]]
):
# store core members
self._prefix = prefix + FILE_PREFIX
@@ -132,11 +133,11 @@ class Pipeline():
for subject, pred, value in ext.extract(subject, content, principals):
yield subject, pred, value
- except base.errors.ExtractorError as err:
+ except errors.ExtractorError as err:
# critical extractor failure.
logger.error('%s failed to extract triples from content: %s', ext, err)
- except base.errors.ReaderError as err:
+ except errors.ReaderError as err:
# failed to read any content. skip.
logger.error('%s failed to read content: %s', rdr, err)
diff --git a/bsie/reader/__init__.py b/bsie/reader/__init__.py
index a45f22b..4163d1c 100644
--- a/bsie/reader/__init__.py
+++ b/bsie/reader/__init__.py
@@ -15,5 +15,18 @@ Part of the bsie module.
A copy of the license is provided with the project.
Author: Matthias Baumgartner, 2022
"""
+# standard imports
+import typing
+# inner-module imports
+from .base import Reader
+from .builder import ReaderBuilder
+
+# exports
+__all__: typing.Sequence[str] = (
+ 'Reader',
+ 'ReaderBuilder',
+ )
+
+## EOF ##
## EOF ##
diff --git a/bsie/base/reader.py b/bsie/reader/base.py
index cbabd36..cbabd36 100644
--- a/bsie/base/reader.py
+++ b/bsie/reader/base.py
diff --git a/bsie/reader/builder.py b/bsie/reader/builder.py
new file mode 100644
index 0000000..bce5397
--- /dev/null
+++ b/bsie/reader/builder.py
@@ -0,0 +1,74 @@
+"""
+
+Part of the bsie module.
+A copy of the license is provided with the project.
+Author: Matthias Baumgartner, 2022
+"""
+# standard imports
+import typing
+
+# bsie imports
+from bsie.utils import bsfs, errors, safe_load, unpack_qualified_name
+
+# inner-module imports
+from . import base
+
+# exports
+__all__: typing.Sequence[str] = (
+ 'ReaderBuilder',
+ )
+
+
+## code ##
+
+class ReaderBuilder():
+ """Build `bsie.base.Reader` instances.
+
+ Readers are defined via their qualified class name
+ (e.g., bsie.reader.path.Path) and optional keyword
+ arguments that are passed to the constructor via
+ the *kwargs* argument (name as key, kwargs as value).
+ The ReaderBuilder keeps a cache of previously built
+ reader instances, as they are anyway built with
+ identical keyword arguments.
+
+ """
+
+ # keyword arguments
+ _kwargs: typing.Dict[str, typing.Dict[str, typing.Any]]
+
+ # cached readers
+ _cache: typing.Dict[str, base.Reader]
+
+ def __init__(self, kwargs: typing.Dict[str, typing.Dict[str, typing.Any]]):
+ self._kwargs = kwargs
+ self._cache = {}
+
+ def build(self, name: str) -> base.Reader:
+ """Return an instance for the qualified class name."""
+ # return cached instance
+ if name in self._cache:
+ return self._cache[name]
+
+ # check name and get module/class components
+ module_name, class_name = unpack_qualified_name(name)
+
+ # import reader class
+ cls = safe_load(module_name, class_name)
+
+ # get kwargs
+ kwargs = self._kwargs.get(name, {})
+ if not isinstance(kwargs, dict):
+ raise TypeError(f'expected a kwargs dict, found {bsfs.typename(kwargs)}')
+
+ try: # build, cache, and return instance
+ obj = cls(**kwargs)
+ # cache instance
+ self._cache[name] = obj
+ # return instance
+ return obj
+
+ except Exception as err:
+ raise errors.BuilderError(f'failed to build reader {name} due to {bsfs.typename(err)}: {err}') from err
+
+## EOF ##
diff --git a/bsie/reader/path.py b/bsie/reader/path.py
index d60f187..1ca05a0 100644
--- a/bsie/reader/path.py
+++ b/bsie/reader/path.py
@@ -4,11 +4,11 @@ Part of the bsie module.
A copy of the license is provided with the project.
Author: Matthias Baumgartner, 2022
"""
-# imports
+# standard imports
import typing
-# bsie imports
-from bsie.base import reader
+# inner-module imports
+from . import base
# exports
__all__: typing.Sequence[str] = (
@@ -18,7 +18,7 @@ __all__: typing.Sequence[str] = (
## code ##
-class Path(reader.Reader):
+class Path(base.Reader):
"""Return the path."""
def __call__(self, path: str) -> str:
diff --git a/bsie/reader/stat.py b/bsie/reader/stat.py
index fc5fb24..706dc47 100644
--- a/bsie/reader/stat.py
+++ b/bsie/reader/stat.py
@@ -4,12 +4,15 @@ Part of the bsie module.
A copy of the license is provided with the project.
Author: Matthias Baumgartner, 2022
"""
-# imports
+# standard imports
import os
import typing
# bsie imports
-from bsie.base import errors, reader
+from bsie.utils import errors
+
+# inner-module imports
+from . import base
# exports
__all__: typing.Sequence[str] = (
@@ -19,7 +22,7 @@ __all__: typing.Sequence[str] = (
## code ##
-class Stat(reader.Reader):
+class Stat(base.Reader):
"""Read and return the filesystem's stat infos."""
def __call__(self, path: str) -> os.stat_result:
diff --git a/bsie/tools/__init__.py b/bsie/tools/__init__.py
deleted file mode 100644
index 803c321..0000000
--- a/bsie/tools/__init__.py
+++ /dev/null
@@ -1,20 +0,0 @@
-"""
-
-Part of the bsie module.
-A copy of the license is provided with the project.
-Author: Matthias Baumgartner, 2022
-"""
-# imports
-import typing
-
-# inner-module imports
-from . import builder
-from .pipeline import Pipeline
-
-# exports
-__all__: typing.Sequence[str] = (
- 'builder',
- 'Pipeline',
- )
-
-## EOF ##
diff --git a/bsie/tools/builder.py b/bsie/tools/builder.py
deleted file mode 100644
index 190d9bf..0000000
--- a/bsie/tools/builder.py
+++ /dev/null
@@ -1,226 +0,0 @@
-"""
-
-Part of the bsie module.
-A copy of the license is provided with the project.
-Author: Matthias Baumgartner, 2022
-"""
-# imports
-import importlib
-import logging
-import typing
-
-# bsie imports
-from bsie import base
-from bsie.base import errors
-from bsie.utils import bsfs
-
-# inner-module imports
-from . import pipeline
-
-# exports
-__all__: typing.Sequence[str] = (
- 'ExtractorBuilder',
- 'PipelineBuilder',
- 'ReaderBuilder',
- )
-
-
-## code ##
-
-logger = logging.getLogger(__name__)
-
-def _safe_load(module_name: str, class_name: str):
- """Get a class from a module. Raise BuilderError if anything goes wrong."""
- try:
- # load the module
- module = importlib.import_module(module_name)
- except Exception as err:
- # cannot import module
- raise errors.LoaderError(f'cannot load module {module_name}') from err
-
- try:
- # get the class from the module
- cls = getattr(module, class_name)
- except Exception as err:
- # cannot find the class
- raise errors.LoaderError(f'cannot load class {class_name} from module {module_name}') from err
-
- return cls
-
-
-def _unpack_name(name):
- """Split a name into its module and class component (dot-separated)."""
- if not isinstance(name, str):
- raise TypeError(name)
- if '.' not in name:
- raise ValueError('name must be a qualified class name.')
- module_name, class_name = name[:name.rfind('.')], name[name.rfind('.')+1:]
- if module_name == '':
- raise ValueError('name must be a qualified class name.')
- return module_name, class_name
-
-
-class ReaderBuilder():
- """Build `bsie.base.Reader` instances.
-
- Readers are defined via their qualified class name
- (e.g., bsie.reader.path.Path) and optional keyword
- arguments that are passed to the constructor via
- the *kwargs* argument (name as key, kwargs as value).
- The ReaderBuilder keeps a cache of previously built
- reader instances, as they are anyway built with
- identical keyword arguments.
-
- """
-
- # keyword arguments
- _kwargs: typing.Dict[str, typing.Dict[str, typing.Any]]
-
- # cached readers
- _cache: typing.Dict[str, base.Reader]
-
- def __init__(self, kwargs: typing.Dict[str, typing.Dict[str, typing.Any]]):
- self._kwargs = kwargs
- self._cache = {}
-
- def build(self, name: str) -> base.Reader:
- """Return an instance for the qualified class name."""
- # return cached instance
- if name in self._cache:
- return self._cache[name]
-
- # check name and get module/class components
- module_name, class_name = _unpack_name(name)
-
- # import reader class
- cls = _safe_load(module_name, class_name)
-
- # get kwargs
- kwargs = self._kwargs.get(name, {})
- if not isinstance(kwargs, dict):
- raise TypeError(f'expected a kwargs dict, found {bsfs.typename(kwargs)}')
-
- try: # build, cache, and return instance
- obj = cls(**kwargs)
- # cache instance
- self._cache[name] = obj
- # return instance
- return obj
-
- except Exception as err:
- raise errors.BuilderError(f'failed to build reader {name} due to {bsfs.typename(err)}: {err}') from err
-
-
-class ExtractorBuilder():
- """Build `bsie.base.Extractor instances.
-
- It is permissible to build multiple instances of the same extractor
- (typically with different arguments), hence the ExtractorBuilder
- receives a list of build specifications. Each specification is
- a dict with a single key (extractor's qualified name) and a dict
- to be used as keyword arguments.
- Example: [{'bsie.extractor.generic.path.Path': {}}, ]
-
- """
-
- # build specifications
- _specs: typing.List[typing.Dict[str, typing.Dict[str, typing.Any]]]
-
- def __init__(self, specs: typing.List[typing.Dict[str, typing.Dict[str, typing.Any]]]):
- self._specs = specs
-
- def __iter__(self) -> typing.Iterator[int]:
- """Iterate over extractor specifications."""
- return iter(range(len(self._specs)))
-
- def build(self, index: int) -> base.Extractor:
- """Return an instance of the n'th extractor (n=*index*)."""
- # get build instructions
- specs = self._specs[index]
-
- # check specs structure. expecting[{name: {kwargs}}]
- if not isinstance(specs, dict):
- raise TypeError(f'expected a dict, found {bsfs.typename(specs)}')
- if len(specs) != 1:
- raise TypeError(f'expected a dict of length one, found {len(specs)}')
-
- # get name and args from specs
- name = next(iter(specs.keys()))
- kwargs = specs[name]
-
- # check kwargs structure
- if not isinstance(kwargs, dict):
- raise TypeError(f'expected a dict, found {bsfs.typename(kwargs)}')
-
- # check name and get module/class components
- module_name, class_name = _unpack_name(name)
-
- # import extractor class
- cls = _safe_load(module_name, class_name)
-
- try: # build and return instance
- return cls(**kwargs)
-
- except Exception as err:
- raise errors.BuilderError(f'failed to build extractor {name} due to {bsfs.typename(err)}: {err}') from err
-
-
-class PipelineBuilder():
- """Build `bsie.tools.pipeline.Pipeline` instances."""
-
- # Prefix to be used in the Pipeline.
- prefix: bsfs.Namespace
-
- # builder for Readers.
- rbuild: ReaderBuilder
-
- # builder for Extractors.
- ebuild: ExtractorBuilder
-
- def __init__(
- self,
- prefix: bsfs.Namespace,
- reader_builder: ReaderBuilder,
- extractor_builder: ExtractorBuilder,
- ):
- self.prefix = prefix
- self.rbuild = reader_builder
- self.ebuild = extractor_builder
-
- def build(self) -> pipeline.Pipeline:
- """Return a Pipeline instance."""
- ext2rdr = {}
-
- for eidx in self.ebuild:
- # build extractor
- try:
- ext = self.ebuild.build(eidx)
-
- except errors.LoaderError as err: # failed to load extractor; skip
- logger.error('failed to load extractor: %s', err)
- continue
-
- except errors.BuilderError as err: # failed to build instance; skip
- logger.error(str(err))
- continue
-
- try:
- # get reader required by extractor
- if ext.CONTENT_READER is not None:
- rdr = self.rbuild.build(ext.CONTENT_READER)
- else:
- rdr = None
- # store extractor
- ext2rdr[ext] = rdr
-
- except errors.LoaderError as err: # failed to load reader
- logger.error('failed to load reader: %s', err)
-
- except errors.BuilderError as err: # failed to build reader
- logger.error(str(err))
-
- return pipeline.Pipeline(self.prefix, ext2rdr)
-
-
-
-## EOF ##
diff --git a/bsie/utils/__init__.py b/bsie/utils/__init__.py
index 3981dc7..9cb60ed 100644
--- a/bsie/utils/__init__.py
+++ b/bsie/utils/__init__.py
@@ -4,21 +4,24 @@ Part of the bsie module.
A copy of the license is provided with the project.
Author: Matthias Baumgartner, 2022
"""
-# imports
+# standard imports
import typing
# inner-module imports
from . import bsfs
+from . import filematcher
from . import namespaces as ns
from . import node
-from . import filematcher
+from .loading import safe_load, unpack_qualified_name
# exports
__all__: typing.Sequence[str] = (
- 'filematcher',
'bsfs',
+ 'filematcher',
'node',
'ns',
+ 'safe_load',
+ 'unpack_qualified_name',
)
## EOF ##
diff --git a/bsie/base/errors.py b/bsie/utils/errors.py
index 5fafd5b..5fafd5b 100644
--- a/bsie/base/errors.py
+++ b/bsie/utils/errors.py
diff --git a/bsie/utils/filematcher/parser.py b/bsie/utils/filematcher/parser.py
index 0654742..2f82875 100644
--- a/bsie/utils/filematcher/parser.py
+++ b/bsie/utils/filematcher/parser.py
@@ -7,16 +7,14 @@ Author: Matthias Baumgartner, 2021
# standard imports
import typing
-# non-standard imports
+# external imports
import pyparsing
from pyparsing import printables, alphas8bit, punc8bit, QuotedString, Word, \
delimitedList, Or, CaselessKeyword, Group, oneOf, Optional
-# bsie imports
-from bsie.base import errors
-
# inner-module imports
from . import matcher
+from .. import errors
# exports
__all__: typing.Sequence[str] = (
diff --git a/bsie/utils/loading.py b/bsie/utils/loading.py
new file mode 100644
index 0000000..eb05c35
--- /dev/null
+++ b/bsie/utils/loading.py
@@ -0,0 +1,54 @@
+"""
+
+Part of the bsie module.
+A copy of the license is provided with the project.
+Author: Matthias Baumgartner, 2022
+"""
+# standard imports
+import importlib
+import typing
+
+# inner-module imports
+from . import errors
+
+# exports
+__all__: typing.Sequence[str] = (
+ 'safe_load',
+ 'unpack_qualified_name',
+ )
+
+
+## code ##
+
+def safe_load(module_name: str, class_name: str):
+ """Get a class from a module. Raise BuilderError if anything goes wrong."""
+ try:
+ # load the module
+ module = importlib.import_module(module_name)
+ except Exception as err:
+ # cannot import module
+ raise errors.LoaderError(f'cannot load module {module_name}') from err
+
+ try:
+ # get the class from the module
+ cls = getattr(module, class_name)
+ except Exception as err:
+ # cannot find the class
+ raise errors.LoaderError(f'cannot load class {class_name} from module {module_name}') from err
+
+ return cls
+
+
+def unpack_qualified_name(name):
+ """Split a name into its module and class component (dot-separated)."""
+ if not isinstance(name, str):
+ raise TypeError(name)
+ if '.' not in name:
+ raise ValueError('name must be a qualified class name.')
+ module_name, class_name = name[:name.rfind('.')], name[name.rfind('.')+1:]
+ if module_name == '':
+ raise ValueError('name must be a qualified class name.')
+ return module_name, class_name
+
+
+## EOF ##
diff --git a/setup.py b/setup.py
index 8e0efd4..6521593 100644
--- a/setup.py
+++ b/setup.py
@@ -14,7 +14,7 @@ setup(
url='https://www.igsor.net/projects/blackstar/bsie/',
download_url='https://pip.igsor.net',
packages=('bsie', ),
- install_requires=('rdflib', 'bsfs', 'python-magic'),
+ install_requires=('rdflib', 'bsfs', 'python-magic', 'pyparsing'),
python_requires=">=3.7",
)
diff --git a/test/base/__init__.py b/test/base/__init__.py
deleted file mode 100644
index e69de29..0000000
--- a/test/base/__init__.py
+++ /dev/null
diff --git a/test/extractor/generic/test_path.py b/test/extractor/generic/test_path.py
index 820f402..778ac5a 100644
--- a/test/extractor/generic/test_path.py
+++ b/test/extractor/generic/test_path.py
@@ -4,11 +4,11 @@ Part of the bsie test suite.
A copy of the license is provided with the project.
Author: Matthias Baumgartner, 2022
"""
-# imports
+# standard imports
import unittest
# bsie imports
-from bsie.base import extractor
+from bsie.extractor import base
from bsie.utils import bsfs, node as _node, ns
# objects to test
@@ -29,7 +29,7 @@ class TestPath(unittest.TestCase):
def test_schema(self):
self.assertEqual(Path().schema,
- bsfs.schema.Schema.from_string(extractor.SCHEMA_PREAMBLE + '''
+ bsfs.schema.Schema.from_string(base.SCHEMA_PREAMBLE + '''
bse:filename rdfs:subClassOf bsfs:Predicate ;
rdfs:domain bsfs:File ;
rdfs:range xsd:string ;
diff --git a/test/extractor/generic/test_stat.py b/test/extractor/generic/test_stat.py
index 3441438..ff74085 100644
--- a/test/extractor/generic/test_stat.py
+++ b/test/extractor/generic/test_stat.py
@@ -4,12 +4,12 @@ Part of the bsie test suite.
A copy of the license is provided with the project.
Author: Matthias Baumgartner, 2022
"""
-# imports
+# standard imports
import os
import unittest
# bsie imports
-from bsie.base import extractor
+from bsie.extractor import base
from bsie.utils import bsfs, node as _node, ns
# objects to test
@@ -30,7 +30,7 @@ class TestStat(unittest.TestCase):
def test_schema(self):
self.assertEqual(Stat().schema,
- bsfs.schema.Schema.from_string(extractor.SCHEMA_PREAMBLE + '''
+ bsfs.schema.Schema.from_string(base.SCHEMA_PREAMBLE + '''
bse:filesize rdfs:subClassOf bsfs:Predicate ;
rdfs:domain bsfs:File ;
rdfs:range xsd:integer ;
diff --git a/test/base/test_extractor.py b/test/extractor/test_base.py
index 30974ef..6a63c59 100644
--- a/test/base/test_extractor.py
+++ b/test/extractor/test_base.py
@@ -4,21 +4,21 @@ Part of the bsie test suite.
A copy of the license is provided with the project.
Author: Matthias Baumgartner, 2022
"""
-# imports
+# standard imports
import unittest
# bsie imports
from bsie.utils import bsfs, ns
# objects to test
-from bsie.base import extractor
+from bsie.extractor import base
## code ##
-class StubExtractor(extractor.Extractor):
+class StubExtractor(base.Extractor):
def __init__(self):
- super().__init__(bsfs.schema.Schema.from_string(extractor.SCHEMA_PREAMBLE + '''
+ super().__init__(bsfs.schema.Schema.from_string(base.SCHEMA_PREAMBLE + '''
bse:author rdfs:subClassOf bsfs:Predicate ;
rdfs:domain bsfs:Entity ;
rdfs:range xsd:string ;
diff --git a/test/extractor/test_builder.py b/test/extractor/test_builder.py
new file mode 100644
index 0000000..039ea53
--- /dev/null
+++ b/test/extractor/test_builder.py
@@ -0,0 +1,103 @@
+"""
+
+Part of the bsie test suite.
+A copy of the license is provided with the project.
+Author: Matthias Baumgartner, 2022
+"""
+# standard imports
+import unittest
+
+# bsie imports
+from bsie.utils import errors
+
+# objects to test
+from bsie.extractor import ExtractorBuilder
+
+
+## code ##
+
+class TestExtractorBuilder(unittest.TestCase):
+ def test_iter(self):
+ # no specifications
+ self.assertListEqual(list(ExtractorBuilder([])), [])
+ # some specifications
+ builder = ExtractorBuilder([
+ {'bsie.extractor.generic.path.Path': {}},
+ {'bsie.extractor.generic.stat.Stat': {}},
+ {'bsie.extractor.generic.path.Path': {}},
+ ])
+ self.assertListEqual(list(builder), [0, 1, 2])
+
+ def test_build(self):
+ # simple and repeated extractors
+ builder = ExtractorBuilder([
+ {'bsie.extractor.generic.path.Path': {}},
+ {'bsie.extractor.generic.stat.Stat': {}},
+ {'bsie.extractor.generic.path.Path': {}},
+ ])
+ ext = [builder.build(0), builder.build(1), builder.build(2)]
+ import bsie.extractor.generic.path
+ import bsie.extractor.generic.stat
+ self.assertListEqual(ext, [
+ bsie.extractor.generic.path.Path(),
+ bsie.extractor.generic.stat.Stat(),
+ bsie.extractor.generic.path.Path(),
+ ])
+ # out-of-bounds raises KeyError
+ self.assertRaises(IndexError, builder.build, 3)
+
+ # building with args
+ builder = ExtractorBuilder([
+ {'bsie.extractor.generic.constant.Constant': {
+ 'schema': '''
+ bse:author rdfs:subClassOf bsfs:Predicate ;
+ rdfs:domain bsfs:Entity ;
+ rdfs:range xsd:string ;
+ bsfs:unique "true"^^xsd:boolean .
+ bse:rating rdfs:subClassOf bsfs:Predicate ;
+ rdfs:domain bsfs:Entity ;
+ rdfs:range xsd:integer ;
+ bsfs:unique "true"^^xsd:boolean .
+ ''',
+ 'tuples': [
+ ('http://bsfs.ai/schema/Entity#author', 'Me, myself, and I'),
+ ('http://bsfs.ai/schema/Entity#rating', 123),
+ ],
+ }}])
+ obj = builder.build(0)
+ import bsie.extractor.generic.constant
+ self.assertEqual(obj, bsie.extractor.generic.constant.Constant('''
+ bse:author rdfs:subClassOf bsfs:Predicate ;
+ rdfs:domain bsfs:Entity ;
+ rdfs:range xsd:string ;
+ bsfs:unique "true"^^xsd:boolean .
+ bse:rating rdfs:subClassOf bsfs:Predicate ;
+ rdfs:domain bsfs:Entity ;
+ rdfs:range xsd:integer ;
+ bsfs:unique "true"^^xsd:boolean .
+ ''', [
+ ('http://bsfs.ai/schema/Entity#author', 'Me, myself, and I'),
+ ('http://bsfs.ai/schema/Entity#rating', 123),
+ ]))
+
+ # building with invalid args
+ self.assertRaises(errors.BuilderError, ExtractorBuilder(
+ [{'bsie.extractor.generic.path.Path': {'foo': 123}}]).build, 0)
+ # non-dict build specification
+ self.assertRaises(TypeError, ExtractorBuilder(
+ [('bsie.extractor.generic.path.Path', {})]).build, 0)
+ # multiple keys per build specification
+ self.assertRaises(TypeError, ExtractorBuilder(
+ [{'bsie.extractor.generic.path.Path': {},
+ 'bsie.extractor.generic.stat.Stat': {}}]).build, 0)
+ # non-dict value for kwargs
+ self.assertRaises(TypeError, ExtractorBuilder(
+ [{'bsie.extractor.generic.path.Path': 123}]).build, 0)
+
+
+## main ##
+
+if __name__ == '__main__':
+ unittest.main()
+
+## EOF ##
diff --git a/test/lib/test_bsie.py b/test/lib/test_bsie.py
index 771a0c2..52f1d44 100644
--- a/test/lib/test_bsie.py
+++ b/test/lib/test_bsie.py
@@ -4,13 +4,15 @@ Part of the bsie test suite.
A copy of the license is provided with the project.
Author: Matthias Baumgartner, 2022
"""
-# imports
+# standard imports
import os
import unittest
# bsie imports
-from bsie.base import extractor
-from bsie.tools import builder
+from bsie.extractor import ExtractorBuilder
+from bsie.extractor.base import SCHEMA_PREAMBLE
+from bsie.lib import PipelineBuilder
+from bsie.reader import ReaderBuilder
from bsie.utils import bsfs, node, ns
# objects to test
@@ -22,9 +24,9 @@ from bsie.lib.bsie import BSIE
class TestBSIE(unittest.TestCase):
def setUp(self):
# reader builder
- rbuild = builder.ReaderBuilder({})
+ rbuild = ReaderBuilder({})
# extractor builder
- ebuild = builder.ExtractorBuilder([
+ ebuild = ExtractorBuilder([
{'bsie.extractor.generic.path.Path': {}},
{'bsie.extractor.generic.stat.Stat': {}},
{'bsie.extractor.generic.constant.Constant': dict(
@@ -39,7 +41,7 @@ class TestBSIE(unittest.TestCase):
])
# build pipeline
self.prefix = bsfs.Namespace('http://example.com/local/')
- pbuild = builder.PipelineBuilder(self.prefix, rbuild, ebuild)
+ pbuild = PipelineBuilder(self.prefix, rbuild, ebuild)
self.pipeline = pbuild.build()
def test_construction(self):
@@ -50,7 +52,7 @@ class TestBSIE(unittest.TestCase):
ns.bse.filesize,
ns.bse.author,
})
- self.assertEqual(lib.schema, bsfs.schema.Schema.from_string(extractor.SCHEMA_PREAMBLE + '''
+ self.assertEqual(lib.schema, bsfs.schema.Schema.from_string(SCHEMA_PREAMBLE + '''
bse:filename rdfs:subClassOf bsfs:Predicate ;
rdfs:domain bsfs:File ;
rdfs:range xsd:string ;
@@ -77,7 +79,7 @@ class TestBSIE(unittest.TestCase):
ns.bse.filesize,
ns.bse.author,
})
- self.assertEqual(lib.schema, bsfs.schema.Schema.from_string(extractor.SCHEMA_PREAMBLE + '''
+ self.assertEqual(lib.schema, bsfs.schema.Schema.from_string(SCHEMA_PREAMBLE + '''
bse:filesize rdfs:subClassOf bsfs:Predicate ;
rdfs:domain bsfs:File ;
rdfs:range xsd:integer;
@@ -95,7 +97,7 @@ class TestBSIE(unittest.TestCase):
ns.bse.filesize,
ns.bse.author,
})
- self.assertEqual(lib.schema, bsfs.schema.Schema.from_string(extractor.SCHEMA_PREAMBLE + '''
+ self.assertEqual(lib.schema, bsfs.schema.Schema.from_string(SCHEMA_PREAMBLE + '''
bse:filename rdfs:subClassOf bsfs:Predicate ;
rdfs:domain bsfs:File ;
rdfs:range xsd:string ;
@@ -122,7 +124,7 @@ class TestBSIE(unittest.TestCase):
self.assertSetEqual(set(lib.principals), {
ns.bse.author,
})
- self.assertEqual(lib.schema, bsfs.schema.Schema.from_string(extractor.SCHEMA_PREAMBLE + '''
+ self.assertEqual(lib.schema, bsfs.schema.Schema.from_string(SCHEMA_PREAMBLE + '''
bse:author rdfs:subClassOf bsfs:Predicate ;
rdfs:domain bsfs:Entity ;
rdfs:range xsd:string ;
@@ -137,7 +139,7 @@ class TestBSIE(unittest.TestCase):
self.assertSetEqual(set(lib.principals), {
ns.bse.filesize,
})
- self.assertEqual(lib.schema, bsfs.schema.Schema.from_string(extractor.SCHEMA_PREAMBLE + '''
+ self.assertEqual(lib.schema, bsfs.schema.Schema.from_string(SCHEMA_PREAMBLE + '''
bse:filesize rdfs:subClassOf bsfs:Predicate ;
rdfs:domain bsfs:File ;
rdfs:range xsd:integer;
diff --git a/test/lib/test_builder.py b/test/lib/test_builder.py
new file mode 100644
index 0000000..273d620
--- /dev/null
+++ b/test/lib/test_builder.py
@@ -0,0 +1,107 @@
+"""
+
+Part of the bsie test suite.
+A copy of the license is provided with the project.
+Author: Matthias Baumgartner, 2022
+"""
+# standard imports
+import logging
+import unittest
+
+# bsie imports
+from bsie.extractor import ExtractorBuilder
+from bsie.reader import ReaderBuilder
+from bsie.utils import bsfs
+
+# objects to test
+from bsie.lib import PipelineBuilder
+
+
+## code ##
+
+class TestPipelineBuilder(unittest.TestCase):
+ def test_build(self):
+ prefix = bsfs.URI('http://example.com/local/file#')
+ c_schema = '''
+ bse:author rdfs:subClassOf bsfs:Predicate ;
+ rdfs:domain bsfs:Entity ;
+ rdfs:range xsd:string ;
+ bsfs:unique "true"^^xsd:boolean .
+ '''
+ c_tuples = [('http://bsfs.ai/schema/Entity#author', 'Me, myself, and I')]
+ # prepare builders
+ rbuild = ReaderBuilder({})
+ ebuild = ExtractorBuilder([
+ {'bsie.extractor.generic.path.Path': {}},
+ {'bsie.extractor.generic.stat.Stat': {}},
+ {'bsie.extractor.generic.constant.Constant': dict(
+ schema=c_schema,
+ tuples=c_tuples,
+ )},
+ ])
+ # build pipeline
+ builder = PipelineBuilder(prefix, rbuild, ebuild)
+ pipeline = builder.build()
+ # delayed import
+ import bsie.reader.path
+ import bsie.reader.stat
+ import bsie.extractor.generic.path
+ import bsie.extractor.generic.stat
+ import bsie.extractor.generic.constant
+ # check pipeline
+ self.assertDictEqual(pipeline._ext2rdr, {
+ bsie.extractor.generic.path.Path(): bsie.reader.path.Path(),
+ bsie.extractor.generic.stat.Stat(): bsie.reader.stat.Stat(),
+ bsie.extractor.generic.constant.Constant(c_schema, c_tuples): None,
+ })
+
+ # fail to load extractor
+ ebuild_err = ExtractorBuilder([
+ {'bsie.extractor.generic.foo.Foo': {}},
+ {'bsie.extractor.generic.path.Path': {}},
+ ])
+ with self.assertLogs(logging.getLogger('bsie.lib.builder'), logging.ERROR):
+ pipeline = PipelineBuilder(prefix, rbuild, ebuild_err).build()
+ self.assertDictEqual(pipeline._ext2rdr, {
+ bsie.extractor.generic.path.Path(): bsie.reader.path.Path()})
+
+ # fail to build extractor
+ ebuild_err = ExtractorBuilder([
+ {'bsie.extractor.generic.path.Path': {'foo': 123}},
+ {'bsie.extractor.generic.path.Path': {}},
+ ])
+ with self.assertLogs(logging.getLogger('bsie.lib.builder'), logging.ERROR):
+ pipeline = PipelineBuilder(prefix, rbuild, ebuild_err).build()
+ self.assertDictEqual(pipeline._ext2rdr, {
+ bsie.extractor.generic.path.Path(): bsie.reader.path.Path()})
+
+ # fail to load reader
+ with self.assertLogs(logging.getLogger('bsie.lib.builder'), logging.ERROR):
+ # switch reader of an extractor
+ old_reader = bsie.extractor.generic.path.Path.CONTENT_READER
+ bsie.extractor.generic.path.Path.CONTENT_READER = 'bsie.reader.foo.Foo'
+ # build pipeline with invalid reader reference
+ pipeline = PipelineBuilder(prefix, rbuild, ebuild).build()
+ self.assertDictEqual(pipeline._ext2rdr, {
+ bsie.extractor.generic.stat.Stat(): bsie.reader.stat.Stat(),
+ bsie.extractor.generic.constant.Constant(c_schema, c_tuples): None,
+ })
+ # switch back
+ bsie.extractor.generic.path.Path.CONTENT_READER = old_reader
+
+ # fail to build reader
+ rbuild_err = ReaderBuilder({'bsie.reader.stat.Stat': dict(foo=123)})
+ with self.assertLogs(logging.getLogger('bsie.lib.builder'), logging.ERROR):
+ pipeline = PipelineBuilder(prefix, rbuild_err, ebuild).build()
+ self.assertDictEqual(pipeline._ext2rdr, {
+ bsie.extractor.generic.path.Path(): bsie.reader.path.Path(),
+ bsie.extractor.generic.constant.Constant(c_schema, c_tuples): None,
+ })
+
+
+## main ##
+
+if __name__ == '__main__':
+ unittest.main()
+
+## EOF ##
diff --git a/test/tools/test_pipeline.py b/test/lib/test_pipeline.py
index a116a30..c6f7aba 100644
--- a/test/tools/test_pipeline.py
+++ b/test/lib/test_pipeline.py
@@ -4,14 +4,13 @@ Part of the bsie test suite.
A copy of the license is provided with the project.
Author: Matthias Baumgartner, 2022
"""
-# imports
+# standard imports
import logging
import os
import unittest
# bsie imports
-from bsie.base import errors
-from bsie.utils import bsfs, node, ns
+from bsie.utils import bsfs, errors, node, ns
import bsie.extractor.generic.constant
import bsie.extractor.generic.path
import bsie.extractor.generic.stat
@@ -19,7 +18,7 @@ import bsie.reader.path
import bsie.reader.stat
# objects to test
-from bsie.tools.pipeline import Pipeline
+from bsie.lib.pipeline import Pipeline
## code ##
@@ -140,7 +139,7 @@ class TestPipeline(unittest.TestCase):
raise errors.ReaderError('reader error')
pipeline = Pipeline(self.prefix, {bsie.extractor.generic.path.Path(): FaultyReader()})
- with self.assertLogs(logging.getLogger('bsie.tools.pipeline'), logging.ERROR):
+ with self.assertLogs(logging.getLogger('bsie.lib.pipeline'), logging.ERROR):
testfile = os.path.join(os.path.dirname(__file__), 'testfile.t')
p_filename = pipeline.schema.predicate(ns.bse.filename)
self.assertSetEqual(set(pipeline(testfile, {p_filename})), set())
@@ -151,7 +150,7 @@ class TestPipeline(unittest.TestCase):
raise errors.ExtractorError('extractor error')
pipeline = Pipeline(self.prefix, {FaultyExtractor(): bsie.reader.path.Path()})
- with self.assertLogs(logging.getLogger('bsie.tools.pipeline'), logging.ERROR):
+ with self.assertLogs(logging.getLogger('bsie.lib.pipeline'), logging.ERROR):
testfile = os.path.join(os.path.dirname(__file__), 'testfile.t')
p_filename = pipeline.schema.predicate(ns.bse.filename)
self.assertSetEqual(set(pipeline(testfile, {p_filename})), set())
diff --git a/test/base/test_reader.py b/test/reader/test_base.py
index a907eb9..41f4c29 100644
--- a/test/base/test_reader.py
+++ b/test/reader/test_base.py
@@ -4,16 +4,16 @@ Part of the bsie test suite.
A copy of the license is provided with the project.
Author: Matthias Baumgartner, 2022
"""
-# imports
+# standard imports
import unittest
# objects to test
-from bsie import base
+from bsie.reader import Reader
## code ##
-class StubReader(base.Reader):
+class StubReader(Reader):
def __call__(self, path):
raise NotImplementedError()
diff --git a/test/reader/test_builder.py b/test/reader/test_builder.py
new file mode 100644
index 0000000..92e9edc
--- /dev/null
+++ b/test/reader/test_builder.py
@@ -0,0 +1,54 @@
+"""
+
+Part of the bsie test suite.
+A copy of the license is provided with the project.
+Author: Matthias Baumgartner, 2022
+"""
+# standard imports
+import unittest
+
+# bsie imports
+from bsie.utils import errors
+
+# objects to test
+from bsie.reader import ReaderBuilder
+
+
+## code ##
+
+class TestReaderBuilder(unittest.TestCase):
+ def test_build(self):
+ builder = ReaderBuilder({'bsie.reader.path.Path': {}})
+ # build configured reader
+ cls = builder.build('bsie.reader.path.Path')
+ import bsie.reader.path
+ self.assertIsInstance(cls, bsie.reader.path.Path)
+ # build unconfigured reader
+ cls = builder.build('bsie.reader.stat.Stat')
+ import bsie.reader.stat
+ self.assertIsInstance(cls, bsie.reader.stat.Stat)
+ # re-build previous reader (test cache)
+ self.assertEqual(cls, builder.build('bsie.reader.stat.Stat'))
+ # test invalid
+ self.assertRaises(TypeError, builder.build, 123)
+ self.assertRaises(TypeError, builder.build, None)
+ self.assertRaises(ValueError, builder.build, '')
+ self.assertRaises(ValueError, builder.build, 'Path')
+ self.assertRaises(errors.BuilderError, builder.build, 'path.Path')
+ # invalid config
+ builder = ReaderBuilder({'bsie.reader.stat.Stat': dict(foo=123)})
+ self.assertRaises(errors.BuilderError, builder.build, 'bsie.reader.stat.Stat')
+ builder = ReaderBuilder({'bsie.reader.stat.Stat': 123})
+ self.assertRaises(TypeError, builder.build, 'bsie.reader.stat.Stat')
+ # no instructions
+ builder = ReaderBuilder({})
+ cls = builder.build('bsie.reader.stat.Stat')
+ self.assertIsInstance(cls, bsie.reader.stat.Stat)
+
+
+## main ##
+
+if __name__ == '__main__':
+ unittest.main()
+
+## EOF ##
diff --git a/test/reader/test_stat.py b/test/reader/test_stat.py
index d12ad9c..fd9fdcd 100644
--- a/test/reader/test_stat.py
+++ b/test/reader/test_stat.py
@@ -4,12 +4,12 @@ Part of the bsie test suite.
A copy of the license is provided with the project.
Author: Matthias Baumgartner, 2022
"""
-# imports
+# standard imports
import os
import unittest
# bsie imports
-from bsie.base import errors
+from bsie.utils import errors
# objects to test
from bsie.reader.stat import Stat
diff --git a/test/tools/__init__.py b/test/tools/__init__.py
deleted file mode 100644
index e69de29..0000000
--- a/test/tools/__init__.py
+++ /dev/null
diff --git a/test/tools/test_builder.py b/test/tools/test_builder.py
deleted file mode 100644
index 62c637c..0000000
--- a/test/tools/test_builder.py
+++ /dev/null
@@ -1,246 +0,0 @@
-"""
-
-Part of the bsie test suite.
-A copy of the license is provided with the project.
-Author: Matthias Baumgartner, 2022
-"""
-# imports
-import logging
-import unittest
-
-# bsie imports
-from bsie import base
-from bsie.utils import bsfs
-
-# objects to test
-from bsie.tools.builder import ExtractorBuilder
-from bsie.tools.builder import PipelineBuilder
-from bsie.tools.builder import ReaderBuilder
-from bsie.tools.builder import _safe_load
-from bsie.tools.builder import _unpack_name
-
-
-## code ##
-
-class TestUtils(unittest.TestCase):
- def test_safe_load(self):
- # invalid module
- self.assertRaises(base.errors.LoaderError, _safe_load, 'dBGHMSAYOoKeKMpywDoKZQycENFPvN', 'foobar')
- self.assertRaises(base.errors.LoaderError, _safe_load, 'dBGHMSAYOoKeKMpywDoKZQycENFPvN.bar', 'foobar')
- # partially valid module
- self.assertRaises(base.errors.LoaderError, _safe_load, 'os.foo', 'foobar')
- # invalid class
- self.assertRaises(base.errors.LoaderError, _safe_load, 'os.path', 'foo')
- # valid module and class
- cls = _safe_load('collections.abc', 'Container')
- import collections.abc
- self.assertEqual(cls, collections.abc.Container)
-
- def test_unpack_name(self):
- self.assertRaises(TypeError, _unpack_name, 123)
- self.assertRaises(TypeError, _unpack_name, None)
- self.assertRaises(ValueError, _unpack_name, '')
- self.assertRaises(ValueError, _unpack_name, 'path')
- self.assertRaises(ValueError, _unpack_name, '.Path')
- self.assertEqual(_unpack_name('path.Path'), ('path', 'Path'))
- self.assertEqual(_unpack_name('path.foo.bar.Path'), ('path.foo.bar', 'Path'))
-
-
-class TestReaderBuilder(unittest.TestCase):
- def test_build(self):
- builder = ReaderBuilder({'bsie.reader.path.Path': {}})
- # build configured reader
- cls = builder.build('bsie.reader.path.Path')
- import bsie.reader.path
- self.assertIsInstance(cls, bsie.reader.path.Path)
- # build unconfigured reader
- cls = builder.build('bsie.reader.stat.Stat')
- import bsie.reader.stat
- self.assertIsInstance(cls, bsie.reader.stat.Stat)
- # re-build previous reader (test cache)
- self.assertEqual(cls, builder.build('bsie.reader.stat.Stat'))
- # test invalid
- self.assertRaises(TypeError, builder.build, 123)
- self.assertRaises(TypeError, builder.build, None)
- self.assertRaises(ValueError, builder.build, '')
- self.assertRaises(ValueError, builder.build, 'Path')
- self.assertRaises(base.errors.BuilderError, builder.build, 'path.Path')
- # invalid config
- builder = ReaderBuilder({'bsie.reader.stat.Stat': dict(foo=123)})
- self.assertRaises(base.errors.BuilderError, builder.build, 'bsie.reader.stat.Stat')
- builder = ReaderBuilder({'bsie.reader.stat.Stat': 123})
- self.assertRaises(TypeError, builder.build, 'bsie.reader.stat.Stat')
- # no instructions
- builder = ReaderBuilder({})
- cls = builder.build('bsie.reader.stat.Stat')
- self.assertIsInstance(cls, bsie.reader.stat.Stat)
-
-
-
-class TestExtractorBuilder(unittest.TestCase):
- def test_iter(self):
- # no specifications
- self.assertListEqual(list(ExtractorBuilder([])), [])
- # some specifications
- builder = ExtractorBuilder([
- {'bsie.extractor.generic.path.Path': {}},
- {'bsie.extractor.generic.stat.Stat': {}},
- {'bsie.extractor.generic.path.Path': {}},
- ])
- self.assertListEqual(list(builder), [0, 1, 2])
-
- def test_build(self):
- # simple and repeated extractors
- builder = ExtractorBuilder([
- {'bsie.extractor.generic.path.Path': {}},
- {'bsie.extractor.generic.stat.Stat': {}},
- {'bsie.extractor.generic.path.Path': {}},
- ])
- ext = [builder.build(0), builder.build(1), builder.build(2)]
- import bsie.extractor.generic.path
- import bsie.extractor.generic.stat
- self.assertListEqual(ext, [
- bsie.extractor.generic.path.Path(),
- bsie.extractor.generic.stat.Stat(),
- bsie.extractor.generic.path.Path(),
- ])
- # out-of-bounds raises KeyError
- self.assertRaises(IndexError, builder.build, 3)
-
- # building with args
- builder = ExtractorBuilder([
- {'bsie.extractor.generic.constant.Constant': {
- 'schema': '''
- bse:author rdfs:subClassOf bsfs:Predicate ;
- rdfs:domain bsfs:Entity ;
- rdfs:range xsd:string ;
- bsfs:unique "true"^^xsd:boolean .
- bse:rating rdfs:subClassOf bsfs:Predicate ;
- rdfs:domain bsfs:Entity ;
- rdfs:range xsd:integer ;
- bsfs:unique "true"^^xsd:boolean .
- ''',
- 'tuples': [
- ('http://bsfs.ai/schema/Entity#author', 'Me, myself, and I'),
- ('http://bsfs.ai/schema/Entity#rating', 123),
- ],
- }}])
- obj = builder.build(0)
- import bsie.extractor.generic.constant
- self.assertEqual(obj, bsie.extractor.generic.constant.Constant('''
- bse:author rdfs:subClassOf bsfs:Predicate ;
- rdfs:domain bsfs:Entity ;
- rdfs:range xsd:string ;
- bsfs:unique "true"^^xsd:boolean .
- bse:rating rdfs:subClassOf bsfs:Predicate ;
- rdfs:domain bsfs:Entity ;
- rdfs:range xsd:integer ;
- bsfs:unique "true"^^xsd:boolean .
- ''', [
- ('http://bsfs.ai/schema/Entity#author', 'Me, myself, and I'),
- ('http://bsfs.ai/schema/Entity#rating', 123),
- ]))
-
- # building with invalid args
- self.assertRaises(base.errors.BuilderError, ExtractorBuilder(
- [{'bsie.extractor.generic.path.Path': {'foo': 123}}]).build, 0)
- # non-dict build specification
- self.assertRaises(TypeError, ExtractorBuilder(
- [('bsie.extractor.generic.path.Path', {})]).build, 0)
- # multiple keys per build specification
- self.assertRaises(TypeError, ExtractorBuilder(
- [{'bsie.extractor.generic.path.Path': {},
- 'bsie.extractor.generic.stat.Stat': {}}]).build, 0)
- # non-dict value for kwargs
- self.assertRaises(TypeError, ExtractorBuilder(
- [{'bsie.extractor.generic.path.Path': 123}]).build, 0)
-
-
-
-
-class TestPipelineBuilder(unittest.TestCase):
- def test_build(self):
- prefix = bsfs.URI('http://example.com/local/file#')
- c_schema = '''
- bse:author rdfs:subClassOf bsfs:Predicate ;
- rdfs:domain bsfs:Entity ;
- rdfs:range xsd:string ;
- bsfs:unique "true"^^xsd:boolean .
- '''
- c_tuples = [('http://bsfs.ai/schema/Entity#author', 'Me, myself, and I')]
- # prepare builders
- rbuild = ReaderBuilder({})
- ebuild = ExtractorBuilder([
- {'bsie.extractor.generic.path.Path': {}},
- {'bsie.extractor.generic.stat.Stat': {}},
- {'bsie.extractor.generic.constant.Constant': dict(
- schema=c_schema,
- tuples=c_tuples,
- )},
- ])
- # build pipeline
- builder = PipelineBuilder(prefix, rbuild, ebuild)
- pipeline = builder.build()
- # delayed import
- import bsie.reader.path
- import bsie.reader.stat
- import bsie.extractor.generic.path
- import bsie.extractor.generic.stat
- import bsie.extractor.generic.constant
- # check pipeline
- self.assertDictEqual(pipeline._ext2rdr, {
- bsie.extractor.generic.path.Path(): bsie.reader.path.Path(),
- bsie.extractor.generic.stat.Stat(): bsie.reader.stat.Stat(),
- bsie.extractor.generic.constant.Constant(c_schema, c_tuples): None,
- })
-
- # fail to load extractor
- ebuild_err = ExtractorBuilder([
- {'bsie.extractor.generic.foo.Foo': {}},
- {'bsie.extractor.generic.path.Path': {}},
- ])
- with self.assertLogs(logging.getLogger('bsie.tools.builder'), logging.ERROR):
- pipeline = PipelineBuilder(prefix, rbuild, ebuild_err).build()
- self.assertDictEqual(pipeline._ext2rdr, {
- bsie.extractor.generic.path.Path(): bsie.reader.path.Path()})
-
- # fail to build extractor
- ebuild_err = ExtractorBuilder([
- {'bsie.extractor.generic.path.Path': {'foo': 123}},
- {'bsie.extractor.generic.path.Path': {}},
- ])
- with self.assertLogs(logging.getLogger('bsie.tools.builder'), logging.ERROR):
- pipeline = PipelineBuilder(prefix, rbuild, ebuild_err).build()
- self.assertDictEqual(pipeline._ext2rdr, {
- bsie.extractor.generic.path.Path(): bsie.reader.path.Path()})
-
- # fail to load reader
- with self.assertLogs(logging.getLogger('bsie.tools.builder'), logging.ERROR):
- # switch reader of an extractor
- old_reader = bsie.extractor.generic.path.Path.CONTENT_READER
- bsie.extractor.generic.path.Path.CONTENT_READER = 'bsie.reader.foo.Foo'
- # build pipeline with invalid reader reference
- pipeline = PipelineBuilder(prefix, rbuild, ebuild).build()
- self.assertDictEqual(pipeline._ext2rdr, {
- bsie.extractor.generic.stat.Stat(): bsie.reader.stat.Stat(),
- bsie.extractor.generic.constant.Constant(c_schema, c_tuples): None,
- })
- # switch back
- bsie.extractor.generic.path.Path.CONTENT_READER = old_reader
-
- # fail to build reader
- rbuild_err = ReaderBuilder({'bsie.reader.stat.Stat': dict(foo=123)})
- with self.assertLogs(logging.getLogger('bsie.tools.builder'), logging.ERROR):
- pipeline = PipelineBuilder(prefix, rbuild_err, ebuild).build()
- self.assertDictEqual(pipeline._ext2rdr, {
- bsie.extractor.generic.path.Path(): bsie.reader.path.Path(),
- bsie.extractor.generic.constant.Constant(c_schema, c_tuples): None,
- })
-
-
-## main ##
-
-if __name__ == '__main__':
- unittest.main()
-
-## EOF ##
diff --git a/test/tools/testfile.t b/test/tools/testfile.t
deleted file mode 100644
index 3b18e51..0000000
--- a/test/tools/testfile.t
+++ /dev/null
@@ -1 +0,0 @@
-hello world
diff --git a/test/utils/filematcher/test_parser.py b/test/utils/filematcher/test_parser.py
index a81d2ed..c594747 100644
--- a/test/utils/filematcher/test_parser.py
+++ b/test/utils/filematcher/test_parser.py
@@ -4,11 +4,11 @@ Part of the bsie test suite.
A copy of the license is provided with the project.
Author: Matthias Baumgartner, 2022
"""
-# imports
+# standard imports
import unittest
-# inner-module imports
-from bsie.base import errors
+# bsie imports
+from bsie.utils import errors
from bsie.utils.filematcher import matcher
# objects to test
diff --git a/test/utils/test_loading.py b/test/utils/test_loading.py
new file mode 100644
index 0000000..58ff166
--- /dev/null
+++ b/test/utils/test_loading.py
@@ -0,0 +1,48 @@
+"""
+
+Part of the bsie test suite.
+A copy of the license is provided with the project.
+Author: Matthias Baumgartner, 2022
+"""
+# standard imports
+import unittest
+
+# bsie imports
+from bsie.utils import errors
+
+# objects to test
+from bsie.utils.loading import safe_load, unpack_qualified_name
+
+
+## code ##
+
+class TestUtils(unittest.TestCase):
+ def test_safe_load(self):
+ # invalid module
+ self.assertRaises(errors.LoaderError, safe_load, 'dBGHMSAYOoKeKMpywDoKZQycENFPvN', 'foobar')
+ self.assertRaises(errors.LoaderError, safe_load, 'dBGHMSAYOoKeKMpywDoKZQycENFPvN.bar', 'foobar')
+ # partially valid module
+ self.assertRaises(errors.LoaderError, safe_load, 'os.foo', 'foobar')
+ # invalid class
+ self.assertRaises(errors.LoaderError, safe_load, 'os.path', 'foo')
+ # valid module and class
+ cls = safe_load('collections.abc', 'Container')
+ import collections.abc
+ self.assertEqual(cls, collections.abc.Container)
+
+ def test_unpack_qualified_name(self):
+ self.assertRaises(TypeError, unpack_qualified_name, 123)
+ self.assertRaises(TypeError, unpack_qualified_name, None)
+ self.assertRaises(ValueError, unpack_qualified_name, '')
+ self.assertRaises(ValueError, unpack_qualified_name, 'path')
+ self.assertRaises(ValueError, unpack_qualified_name, '.Path')
+ self.assertEqual(unpack_qualified_name('path.Path'), ('path', 'Path'))
+ self.assertEqual(unpack_qualified_name('path.foo.bar.Path'), ('path.foo.bar', 'Path'))
+
+
+## main ##
+
+if __name__ == '__main__':
+ unittest.main()
+
+## EOF ##