aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorMatthias Baumgartner <dev@igsor.net>2023-01-16 21:37:09 +0100
committerMatthias Baumgartner <dev@igsor.net>2023-01-16 21:37:09 +0100
commit05a841215c82ef40d4679dfc4d2c26572bd4d349 (patch)
tree9888ae0bd2345816d1ab479dd34b4c6b902c158a
parent057e09d6537bf5c39815661a75819081e3e5fda7 (diff)
parent58aaa864f9747d27c065739256d4c6635ca9b751 (diff)
downloadbsie-05a841215c82ef40d4679dfc4d2c26572bd4d349.tar.gz
bsie-05a841215c82ef40d4679dfc4d2c26572bd4d349.tar.bz2
bsie-05a841215c82ef40d4679dfc4d2c26572bd4d349.zip
Merge branch 'mb/feature' into develop
-rw-r--r--.gitignore3
-rwxr-xr-xbsie.app4
-rw-r--r--bsie/__init__.py2
-rw-r--r--bsie/apps/__init__.py2
-rw-r--r--bsie/apps/index.py25
-rw-r--r--bsie/apps/info.py25
-rw-r--r--bsie/base/__init__.py24
-rw-r--r--bsie/extractor/__init__.py11
-rw-r--r--bsie/extractor/base.py (renamed from bsie/base/extractor.py)14
-rw-r--r--bsie/extractor/builder.py77
-rw-r--r--bsie/extractor/generic/__init__.py2
-rw-r--r--bsie/extractor/generic/constant.py10
-rw-r--r--bsie/extractor/generic/path.py8
-rw-r--r--bsie/extractor/generic/stat.py10
-rw-r--r--bsie/extractor/image/__init__.py13
-rw-r--r--bsie/extractor/image/colors_spatial.py154
-rw-r--r--bsie/lib/__init__.py4
-rw-r--r--bsie/lib/bsie.py6
-rw-r--r--bsie/lib/builder.py85
-rw-r--r--bsie/lib/pipeline.py (renamed from bsie/tools/pipeline.py)21
-rw-r--r--bsie/reader/__init__.py13
-rw-r--r--bsie/reader/base.py (renamed from bsie/base/reader.py)4
-rw-r--r--bsie/reader/builder.py78
-rw-r--r--bsie/reader/chain.py88
-rw-r--r--bsie/reader/image/__init__.py37
-rw-r--r--bsie/reader/image/_pillow.py39
-rw-r--r--bsie/reader/image/_raw.py61
-rw-r--r--bsie/reader/path.py8
-rw-r--r--bsie/reader/stat.py9
-rw-r--r--bsie/tools/builder.py226
-rw-r--r--bsie/utils/__init__.py7
-rw-r--r--bsie/utils/bsfs.py2
-rw-r--r--bsie/utils/errors.py (renamed from bsie/base/errors.py)8
-rw-r--r--bsie/utils/filematcher/__init__.py (renamed from bsie/tools/__init__.py)10
-rw-r--r--bsie/utils/filematcher/matcher.py179
-rw-r--r--bsie/utils/filematcher/parser.py146
-rw-r--r--bsie/utils/loading.py54
-rw-r--r--bsie/utils/namespaces.py3
-rw-r--r--bsie/utils/node.py2
-rw-r--r--setup.py14
-rw-r--r--test/apps/test_index.py26
-rw-r--r--test/apps/test_info.py21
-rw-r--r--test/apps/testdir/testimage.jpgbin0 -> 349264 bytes
-rw-r--r--test/extractor/generic/test_constant.py10
-rw-r--r--test/extractor/generic/test_path.py14
-rw-r--r--test/extractor/generic/test_stat.py14
-rw-r--r--test/extractor/image/__init__.py (renamed from test/base/__init__.py)0
-rw-r--r--test/extractor/image/test_colors_spatial.py100
-rw-r--r--test/extractor/image/testimage.jpgbin0 -> 349264 bytes
-rw-r--r--test/extractor/test_base.py (renamed from test/base/test_extractor.py)18
-rw-r--r--test/extractor/test_builder.py103
-rw-r--r--test/lib/test_bsie.py24
-rw-r--r--test/lib/test_builder.py107
-rw-r--r--test/lib/test_pipeline.py (renamed from test/tools/test_pipeline.py)15
-rw-r--r--test/reader/image/__init__.py (renamed from test/tools/__init__.py)0
-rw-r--r--test/reader/image/load_nef.py28
-rw-r--r--test/reader/image/test_image.py54
-rw-r--r--test/reader/image/test_pillow.py44
-rw-r--r--test/reader/image/test_raw_image.py53
-rw-r--r--test/reader/image/testimage.jpgbin0 -> 518 bytes
-rw-r--r--test/reader/test_base.py (renamed from test/base/test_reader.py)6
-rw-r--r--test/reader/test_builder.py54
-rw-r--r--test/reader/test_chain.py85
-rw-r--r--test/reader/test_path.py2
-rw-r--r--test/reader/test_stat.py4
-rw-r--r--test/tools/test_builder.py246
-rw-r--r--test/tools/testfile.t1
-rw-r--r--test/utils/filematcher/__init__.py0
-rw-r--r--test/utils/filematcher/empty0
-rw-r--r--test/utils/filematcher/test_matcher.py232
-rw-r--r--test/utils/filematcher/test_parser.py146
-rw-r--r--test/utils/filematcher/testimage.jpgbin0 -> 518 bytes
-rw-r--r--test/utils/filematcher/textfile.t4
-rw-r--r--test/utils/test_loading.py48
-rw-r--r--test/utils/test_node.py2
75 files changed, 2327 insertions, 622 deletions
diff --git a/.gitignore b/.gitignore
index e45b114..304ae08 100644
--- a/.gitignore
+++ b/.gitignore
@@ -22,4 +22,7 @@ build/
# doc builds
doc/build/
+# testing data
+test/reader/image/testimage.nef*
+
## EOF ##
diff --git a/bsie.app b/bsie.app
index ba9cee7..d5808e7 100755
--- a/bsie.app
+++ b/bsie.app
@@ -4,11 +4,11 @@ Part of the bsie module.
A copy of the license is provided with the project.
Author: Matthias Baumgartner, 2022
"""
-# imports
+# standard imports
import argparse
import typing
-# module imports
+# bsie imports
import bsie
import bsie.apps
diff --git a/bsie/__init__.py b/bsie/__init__.py
index 8d2308c..c253f39 100644
--- a/bsie/__init__.py
+++ b/bsie/__init__.py
@@ -4,7 +4,7 @@ Part of the bsie module.
A copy of the license is provided with the project.
Author: Matthias Baumgartner, 2022
"""
-# imports
+# standard imports
import collections
import typing
diff --git a/bsie/apps/__init__.py b/bsie/apps/__init__.py
index a548c3c..1c3d0f9 100644
--- a/bsie/apps/__init__.py
+++ b/bsie/apps/__init__.py
@@ -4,7 +4,7 @@ Part of the bsie module.
A copy of the license is provided with the project.
Author: Matthias Baumgartner, 2022
"""
-# imports
+# standard imports
import typing
# inner-module imports
diff --git a/bsie/apps/index.py b/bsie/apps/index.py
index 1dbfdd8..21c2318 100644
--- a/bsie/apps/index.py
+++ b/bsie/apps/index.py
@@ -4,16 +4,16 @@ Part of the bsie module.
A copy of the license is provided with the project.
Author: Matthias Baumgartner, 2022
"""
-# imports
+# standard imports
import argparse
import os
import typing
# bsie imports
-from bsie.base import errors
-from bsie.lib import BSIE
-from bsie.tools import builder
-from bsie.utils import bsfs
+from bsie.extractor import ExtractorBuilder
+from bsie.lib import BSIE, PipelineBuilder
+from bsie.reader import ReaderBuilder
+from bsie.utils import bsfs, errors
# exports
__all__: typing.Sequence[str] = (
@@ -44,9 +44,9 @@ def main(argv):
# FIXME: Read reader/extractor configs from a config file
# reader builder
- rbuild = builder.ReaderBuilder({})
+ rbuild = ReaderBuilder()
# extractor builder
- ebuild = builder.ExtractorBuilder([
+ ebuild = ExtractorBuilder([
{'bsie.extractor.generic.path.Path': {}},
{'bsie.extractor.generic.stat.Stat': {}},
{'bsie.extractor.generic.constant.Constant': dict(
@@ -58,9 +58,14 @@ def main(argv):
bsfs:unique "true"^^xsd:boolean .
''',
)},
+ {'bsie.extractor.image.colors_spatial.ColorsSpatial': {
+ 'width': 2,
+ 'height': 2,
+ 'exp': 2,
+ }},
])
# pipeline builder
- pbuild = builder.PipelineBuilder(
+ pbuild = PipelineBuilder(
bsfs.Namespace(args.user + ('/' if not args.user.endswith('/') else '')),
rbuild,
ebuild,
@@ -82,7 +87,9 @@ def main(argv):
# index input paths
for path in args.input_file:
- if os.path.isdir(path) and args.recursive:
+ if not os.path.exists(path):
+ pass # FIXME: notify the user
+ elif os.path.isdir(path) and args.recursive:
for dirpath, _, filenames in os.walk(path, topdown=True, followlinks=args.follow):
for filename in filenames:
for node, pred, value in bsie.from_file(os.path.join(dirpath, filename)):
diff --git a/bsie/apps/info.py b/bsie/apps/info.py
index eaf1f71..64a4eba 100644
--- a/bsie/apps/info.py
+++ b/bsie/apps/info.py
@@ -4,15 +4,16 @@ Part of the bsie module.
A copy of the license is provided with the project.
Author: Matthias Baumgartner, 2022
"""
-# imports
+# standard imports
import argparse
import sys
import typing
# bsie imports
-from bsie.base import errors
-from bsie.tools import builder
-from bsie.utils import bsfs
+from bsie.extractor import ExtractorBuilder
+from bsie.lib import PipelineBuilder
+from bsie.reader import ReaderBuilder
+from bsie.utils import bsfs, errors
# exports
__all__: typing.Sequence[str] = (
@@ -25,15 +26,15 @@ __all__: typing.Sequence[str] = (
def main(argv):
"""Show information from BSIE."""
parser = argparse.ArgumentParser(description=main.__doc__, prog='info')
- parser.add_argument('what', choices=('predicates', ),
+ parser.add_argument('what', choices=('predicates', 'schema'),
help='Select what information to show.')
args = parser.parse_args(argv)
# FIXME: Read reader/extractor configs from a config file
# reader builder
- rbuild = builder.ReaderBuilder({})
+ rbuild = ReaderBuilder()
# extractor builder
- ebuild = builder.ExtractorBuilder([
+ ebuild = ExtractorBuilder([
{'bsie.extractor.generic.path.Path': {}},
{'bsie.extractor.generic.stat.Stat': {}},
{'bsie.extractor.generic.constant.Constant': dict(
@@ -45,9 +46,14 @@ def main(argv):
bsfs:unique "true"^^xsd:boolean .
''',
)},
+ {'bsie.extractor.image.colors_spatial.ColorsSpatial': {
+ 'width': 2,
+ 'height': 2,
+ 'exp': 2,
+ }},
])
# pipeline builder
- pbuild = builder.PipelineBuilder(
+ pbuild = PipelineBuilder(
bsfs.Namespace('http://example.com/me/'), # not actually used
rbuild,
ebuild,
@@ -61,6 +67,9 @@ def main(argv):
# show predicates
for pred in pipeline.schema.predicates():
print(pred.uri)
+ elif args.what == 'schema':
+ # show schema
+ print(bsfs.schema.to_string(pipeline.schema))
else:
# args.what is already checked by argparse
raise errors.UnreachableError()
diff --git a/bsie/base/__init__.py b/bsie/base/__init__.py
deleted file mode 100644
index 0d362cd..0000000
--- a/bsie/base/__init__.py
+++ /dev/null
@@ -1,24 +0,0 @@
-"""The base module defines the BSIE interfaces.
-
-You'll mostly find abstract classes here.
-
-Part of the bsie module.
-A copy of the license is provided with the project.
-Author: Matthias Baumgartner, 2022
-"""
-# imports
-import typing
-
-# inner-module imports
-from . import errors
-from .extractor import Extractor
-from .reader import Reader
-
-# exports
-__all__: typing.Sequence[str] = (
- 'Extractor',
- 'Reader',
- 'errors',
- )
-
-## EOF ##
diff --git a/bsie/extractor/__init__.py b/bsie/extractor/__init__.py
index ef31343..5f385ee 100644
--- a/bsie/extractor/__init__.py
+++ b/bsie/extractor/__init__.py
@@ -6,10 +6,17 @@ Part of the bsie module.
A copy of the license is provided with the project.
Author: Matthias Baumgartner, 2022
"""
-# imports
+# standard imports
import typing
+# inner-module imports
+from .base import Extractor
+from .builder import ExtractorBuilder
+
# exports
-__all__: typing.Sequence[str] = []
+__all__: typing.Sequence[str] = (
+ 'Extractor',
+ 'ExtractorBuilder',
+ )
## EOF ##
diff --git a/bsie/base/extractor.py b/bsie/extractor/base.py
index c44021b..7401244 100644
--- a/bsie/base/extractor.py
+++ b/bsie/extractor/base.py
@@ -4,7 +4,7 @@ Part of the bsie module.
A copy of the license is provided with the project.
Author: Matthias Baumgartner, 2022
"""
-# imports
+# standard imports
import abc
import typing
@@ -31,13 +31,22 @@ SCHEMA_PREAMBLE = '''
prefix bsfs: <http://bsfs.ai/schema/>
prefix bse: <http://bsfs.ai/schema/Entity#>
+ # default definitions
+ bsfs:Array rdfs:subClassOf bsfs:Literal .
+ bsfs:Number rdfs:subClassOf bsfs:Literal .
+ bsfs:Time rdfs:subClassOf bsfs:Literal .
+ bsfs:Feature rdfs:subClassOf bsfs:Array ;
+ bsfs:dimension "1"^^xsd:integer ;
+ bsfs:dtype bsfs:f16 ;
+ bsfs:distance bsfs:euclidean .
+
# essential nodes
bsfs:Entity rdfs:subClassOf bsfs:Node .
bsfs:File rdfs:subClassOf bsfs:Entity .
# common definitions
xsd:string rdfs:subClassOf bsfs:Literal .
- xsd:integer rdfs:subClassOf bsfs:Literal .
+ xsd:integer rdfs:subClassOf bsfs:Number .
'''
@@ -99,5 +108,6 @@ class Extractor(abc.ABC):
principals: typing.Iterable[bsfs.schema.Predicate],
) -> typing.Iterator[typing.Tuple[node.Node, bsfs.schema.Predicate, typing.Any]]:
"""Return (node, predicate, value) triples."""
+ # FIXME: type annotation could be more strict: value is Hashable
## EOF ##
diff --git a/bsie/extractor/builder.py b/bsie/extractor/builder.py
new file mode 100644
index 0000000..0fd3685
--- /dev/null
+++ b/bsie/extractor/builder.py
@@ -0,0 +1,77 @@
+"""
+
+Part of the bsie module.
+A copy of the license is provided with the project.
+Author: Matthias Baumgartner, 2022
+"""
+# standard imports
+import typing
+
+# bsie imports
+from bsie.utils import bsfs, errors, safe_load, unpack_qualified_name
+
+# inner-module imports
+from . import base
+
+# exports
+__all__: typing.Sequence[str] = (
+ 'ExtractorBuilder',
+ )
+
+
+## code ##
+
+class ExtractorBuilder():
+ """Build `bsie.base.Extractor instances.
+
+ It is permissible to build multiple instances of the same extractor
+ (typically with different arguments), hence the ExtractorBuilder
+ receives a list of build specifications. Each specification is
+ a dict with a single key (extractor's qualified name) and a dict
+ to be used as keyword arguments.
+ Example: [{'bsie.extractor.generic.path.Path': {}}, ]
+
+ """
+
+ # build specifications
+ _specs: typing.List[typing.Dict[str, typing.Dict[str, typing.Any]]]
+
+ def __init__(self, specs: typing.List[typing.Dict[str, typing.Dict[str, typing.Any]]]):
+ self._specs = specs
+
+ def __iter__(self) -> typing.Iterator[int]:
+ """Iterate over extractor specifications."""
+ return iter(range(len(self._specs)))
+
+ def build(self, index: int) -> base.Extractor:
+ """Return an instance of the n'th extractor (n=*index*)."""
+ # get build instructions
+ specs = self._specs[index]
+
+ # check specs structure. expecting[{name: {kwargs}}]
+ if not isinstance(specs, dict):
+ raise TypeError(f'expected a dict, found {bsfs.typename(specs)}')
+ if len(specs) != 1:
+ raise TypeError(f'expected a dict of length one, found {len(specs)}')
+
+ # get name and args from specs
+ name = next(iter(specs.keys()))
+ kwargs = specs[name]
+
+ # check kwargs structure
+ if not isinstance(kwargs, dict):
+ raise TypeError(f'expected a dict, found {bsfs.typename(kwargs)}')
+
+ # check name and get module/class components
+ module_name, class_name = unpack_qualified_name(name)
+
+ # import extractor class
+ cls = safe_load(module_name, class_name)
+
+ try: # build and return instance
+ return cls(**kwargs)
+
+ except Exception as err:
+ raise errors.BuilderError(f'failed to build extractor {name} due to {bsfs.typename(err)}: {err}') from err
+
+## EOF ##
diff --git a/bsie/extractor/generic/__init__.py b/bsie/extractor/generic/__init__.py
index 0cb7e7f..4783949 100644
--- a/bsie/extractor/generic/__init__.py
+++ b/bsie/extractor/generic/__init__.py
@@ -7,7 +7,7 @@ Part of the bsie module.
A copy of the license is provided with the project.
Author: Matthias Baumgartner, 2022
"""
-# imports
+# standard imports
import typing
# exports
diff --git a/bsie/extractor/generic/constant.py b/bsie/extractor/generic/constant.py
index 11384e6..938e20c 100644
--- a/bsie/extractor/generic/constant.py
+++ b/bsie/extractor/generic/constant.py
@@ -4,13 +4,15 @@ Part of the bsie module.
A copy of the license is provided with the project.
Author: Matthias Baumgartner, 2022
"""
-# imports
+# standard imports
import typing
# bsie imports
-from bsie.base import extractor
from bsie.utils import bsfs, node
+# inner-module imports
+from .. import base
+
# exports
__all__: typing.Sequence[str] = (
'Constant',
@@ -19,7 +21,7 @@ __all__: typing.Sequence[str] = (
## code ##
-class Constant(extractor.Extractor):
+class Constant(base.Extractor):
"""Extract information from file's path."""
CONTENT_READER = None
@@ -32,7 +34,7 @@ class Constant(extractor.Extractor):
schema: str,
tuples: typing.Iterable[typing.Tuple[bsfs.URI, typing.Any]],
):
- super().__init__(bsfs.schema.Schema.from_string(extractor.SCHEMA_PREAMBLE + schema))
+ super().__init__(bsfs.schema.from_string(base.SCHEMA_PREAMBLE + schema))
# NOTE: Raises a KeyError if the predicate is not part of the schema
self._tuples = tuple((self.schema.predicate(p_uri), value) for p_uri, value in tuples)
# TODO: use schema instance for value checking
diff --git a/bsie/extractor/generic/path.py b/bsie/extractor/generic/path.py
index 7018e12..c984515 100644
--- a/bsie/extractor/generic/path.py
+++ b/bsie/extractor/generic/path.py
@@ -4,12 +4,12 @@ Part of the bsie module.
A copy of the license is provided with the project.
Author: Matthias Baumgartner, 2022
"""
-# imports
+# standard imports
import os
import typing
# bsie imports
-from bsie.base import extractor
+from bsie.extractor import base
from bsie.utils import bsfs, node, ns
# exports
@@ -20,7 +20,7 @@ __all__: typing.Sequence[str] = (
## code ##
-class Path(extractor.Extractor):
+class Path(base.Extractor):
"""Extract information from file's path."""
CONTENT_READER = 'bsie.reader.path.Path'
@@ -29,7 +29,7 @@ class Path(extractor.Extractor):
_callmap: typing.Dict[bsfs.schema.Predicate, typing.Callable[[str], typing.Any]]
def __init__(self):
- super().__init__(bsfs.schema.Schema.from_string(extractor.SCHEMA_PREAMBLE + '''
+ super().__init__(bsfs.schema.from_string(base.SCHEMA_PREAMBLE + '''
bse:filename rdfs:subClassOf bsfs:Predicate ;
rdfs:domain bsfs:File ;
rdfs:range xsd:string ;
diff --git a/bsie/extractor/generic/stat.py b/bsie/extractor/generic/stat.py
index 0b9ce29..9394456 100644
--- a/bsie/extractor/generic/stat.py
+++ b/bsie/extractor/generic/stat.py
@@ -4,14 +4,16 @@ Part of the bsie module.
A copy of the license is provided with the project.
Author: Matthias Baumgartner, 2022
"""
-# imports
+# standard imports
import os
import typing
# bsie imports
-from bsie.base import extractor
from bsie.utils import bsfs, node, ns
+# inner-module imports
+from .. import base
+
# exports
__all__: typing.Sequence[str] = (
'Stat',
@@ -20,7 +22,7 @@ __all__: typing.Sequence[str] = (
## code ##
-class Stat(extractor.Extractor):
+class Stat(base.Extractor):
"""Extract information from the file system."""
CONTENT_READER = 'bsie.reader.stat.Stat'
@@ -29,7 +31,7 @@ class Stat(extractor.Extractor):
_callmap: typing.Dict[bsfs.schema.Predicate, typing.Callable[[os.stat_result], typing.Any]]
def __init__(self):
- super().__init__(bsfs.schema.Schema.from_string(extractor.SCHEMA_PREAMBLE + '''
+ super().__init__(bsfs.schema.from_string(base.SCHEMA_PREAMBLE + '''
bse:filesize rdfs:subClassOf bsfs:Predicate ;
rdfs:domain bsfs:File ;
rdfs:range xsd:integer ;
diff --git a/bsie/extractor/image/__init__.py b/bsie/extractor/image/__init__.py
new file mode 100644
index 0000000..75b118d
--- /dev/null
+++ b/bsie/extractor/image/__init__.py
@@ -0,0 +1,13 @@
+"""
+
+Part of the bsie module.
+A copy of the license is provided with the project.
+Author: Matthias Baumgartner, 2022
+"""
+# standard imports
+import typing
+
+# exports
+__all__: typing.Sequence[str] = []
+
+## EOF ##
diff --git a/bsie/extractor/image/colors_spatial.py b/bsie/extractor/image/colors_spatial.py
new file mode 100644
index 0000000..ce5b9f2
--- /dev/null
+++ b/bsie/extractor/image/colors_spatial.py
@@ -0,0 +1,154 @@
+"""Spatial color features.
+
+Part of the bsie module.
+A copy of the license is provided with the project.
+Author: Matthias Baumgartner, 2022
+"""
+# standard imports
+import typing
+
+# external imports
+import PIL.Image
+import numpy as np
+
+# bsie imports
+from bsie.utils import bsfs, node, ns
+
+# inner-module imports
+from .. import base
+
+# constants
+FEATURE_NAME = ns.bsf + 'ColorsSpatial'
+PREDICATE_NAME = ns.bse + 'colors_spatial'
+
+# exports
+__all__: typing.Sequence[str] = (
+ 'ColorsSpatial',
+ )
+
+
+## code ##
+
+class ColorsSpatial(base.Extractor):
+ """Determine dominant colors of subregions in the image.
+
+ Computes the domiant color of increasingly smaller subregions of the image.
+ """
+
+ CONTENT_READER = 'bsie.reader.image.Image'
+
+ # Initial subregion width.
+ width: int
+
+ # Initial subregion height.
+ height: int
+
+ # Decrement exponent.
+ exp: float
+
+ # Principal predicate's URI.
+ _predicate_name: bsfs.URI
+
+ def __init__(
+ self,
+ width: int = 32,
+ height: int = 32,
+ exp: float = 4.,
+ ):
+ # instance identifier
+ uuid = bsfs.uuid.UCID.from_dict({
+ 'width': width,
+ 'height': height,
+ 'exp': exp,
+ })
+ # determine symbol names
+ instance_name = FEATURE_NAME[uuid]
+ predicate_name = PREDICATE_NAME[uuid]
+ # get vector dimension
+ dimension = self.dimension(width, height, exp)
+ # initialize parent with the schema
+ super().__init__(bsfs.schema.from_string(base.SCHEMA_PREAMBLE + f'''
+ <{FEATURE_NAME}> rdfs:subClassOf bsfs:Feature ;
+ # annotations
+ rdfs:label "Spatially dominant colors"^^xsd:string ;
+ schema:description "Domiant colors of subregions in an image."^^xsd:string ;
+ bsfs:dtype xsd:integer .
+
+ <{instance_name}> rdfs:subClassOf <{FEATURE_NAME}> ;
+ bsfs:dimension "{dimension}"^^xsd:integer ;
+ # annotations
+ <{FEATURE_NAME}/args#width> "{width}"^^xsd:integer ;
+ <{FEATURE_NAME}/args#height> "{height}"^^xsd:integer ;
+ <{FEATURE_NAME}/args#exp> "{exp}"^^xsd:float .
+
+ <{predicate_name}> rdfs:subClassOf bsfs:Predicate ;
+ rdfs:domain bsfs:File ;
+ rdfs:range <{instance_name}> ;
+ bsfs:unique "true"^^xsd:boolean .
+
+ '''))
+ # assign extra members
+ self.width = width
+ self.height = height
+ self.exp = exp
+ self._predicate_name = predicate_name
+
+ def __repr__(self) -> str:
+ return f'{bsfs.typename(self)}({self.width}, {self.height}, {self.exp})'
+
+ def __eq__(self, other: typing.Any) -> bool:
+ return super().__eq__(other) \
+ and self.width == other.width \
+ and self.height == other.height \
+ and self.exp == other.exp
+
+ def __hash__(self) -> int:
+ return hash((super().__hash__(), self.width, self.height, self.exp))
+
+ @staticmethod
+ def dimension(width: int, height: int, exp: float) -> int:
+ """Return the feature vector dimension."""
+ # FIXME: replace with a proper formula
+ dim = 0
+ while width >= 1 and height >= 1:
+ dim += width * height
+ width = np.floor(width / exp)
+ height = np.floor(height / exp)
+ dim *= 3 # per band
+ return int(dim)
+
+ def extract(
+ self,
+ subject: node.Node,
+ content: PIL.Image,
+ principals: typing.Iterable[bsfs.schema.Predicate],
+ ) -> typing.Iterator[typing.Tuple[node.Node, bsfs.schema.Predicate, typing.Any]]:
+ # check principals
+ if self.schema.predicate(self._predicate_name) not in principals:
+ # nothing to do; abort
+ return
+
+ # convert to HSV
+ content = content.convert('HSV')
+
+ # get dimensions
+ width, height = self.width, self.height
+ num_bands = len(content.getbands()) # it's three since we converted to HSV before
+
+ features = []
+ while width >= 1 and height >= 1:
+ # downsample
+ img = content.resize((width, height), resample=PIL.Image.Resampling.BOX)
+ # feature vector
+ features.append(
+ np.array(img.getdata()).reshape((width * height, num_bands)))
+ # iterate
+ width = int(np.floor(width / self.exp))
+ height = int(np.floor(height / self.exp))
+
+ # combine bands and convert features to tuple
+ value = tuple(np.vstack(features).reshape(-1))
+ # return triple with feature vector as value
+ yield subject, self.schema.predicate(self._predicate_name), value
+
+## EOF ##
diff --git a/bsie/lib/__init__.py b/bsie/lib/__init__.py
index 578c2c4..4239d3b 100644
--- a/bsie/lib/__init__.py
+++ b/bsie/lib/__init__.py
@@ -4,15 +4,17 @@ Part of the bsie module.
A copy of the license is provided with the project.
Author: Matthias Baumgartner, 2022
"""
-# imports
+# standard imports
import typing
# inner-module imports
from .bsie import BSIE
+from .builder import PipelineBuilder
# exports
__all__: typing.Sequence[str] = (
'BSIE',
+ 'PipelineBuilder',
)
## EOF ##
diff --git a/bsie/lib/bsie.py b/bsie/lib/bsie.py
index e087fa9..668783d 100644
--- a/bsie/lib/bsie.py
+++ b/bsie/lib/bsie.py
@@ -4,13 +4,15 @@ Part of the bsie module.
A copy of the license is provided with the project.
Author: Matthias Baumgartner, 2022
"""
-# imports
+# standard imports
import typing
# bsie imports
-from bsie.tools import Pipeline
from bsie.utils import bsfs, node, ns
+# inner-module imports
+from .pipeline import Pipeline
+
# exports
__all__: typing.Sequence[str] = (
'BSIE',
diff --git a/bsie/lib/builder.py b/bsie/lib/builder.py
new file mode 100644
index 0000000..c2abffe
--- /dev/null
+++ b/bsie/lib/builder.py
@@ -0,0 +1,85 @@
+"""
+
+Part of the bsie module.
+A copy of the license is provided with the project.
+Author: Matthias Baumgartner, 2022
+"""
+# standard imports
+import logging
+import typing
+
+# bsie imports
+from bsie.extractor import ExtractorBuilder
+from bsie.reader import ReaderBuilder
+from bsie.utils import bsfs, errors
+
+# inner-module imports
+from . import pipeline
+
+# exports
+__all__: typing.Sequence[str] = (
+ 'PipelineBuilder',
+ )
+
+
+## code ##
+
+logger = logging.getLogger(__name__)
+
+class PipelineBuilder():
+ """Build `bsie.tools.pipeline.Pipeline` instances."""
+
+ # Prefix to be used in the Pipeline.
+ prefix: bsfs.Namespace
+
+ # builder for Readers.
+ rbuild: ReaderBuilder
+
+ # builder for Extractors.
+ ebuild: ExtractorBuilder
+
+ def __init__(
+ self,
+ prefix: bsfs.Namespace,
+ reader_builder: ReaderBuilder,
+ extractor_builder: ExtractorBuilder,
+ ):
+ self.prefix = prefix
+ self.rbuild = reader_builder
+ self.ebuild = extractor_builder
+
+ def build(self) -> pipeline.Pipeline:
+ """Return a Pipeline instance."""
+ ext2rdr = {}
+
+ for eidx in self.ebuild:
+ # build extractor
+ try:
+ ext = self.ebuild.build(eidx)
+
+ except errors.LoaderError as err: # failed to load extractor; skip
+ logger.error('failed to load extractor: %s', err)
+ continue
+
+ except errors.BuilderError as err: # failed to build instance; skip
+ logger.error(str(err))
+ continue
+
+ try:
+ # get reader required by extractor
+ if ext.CONTENT_READER is not None:
+ rdr = self.rbuild.build(ext.CONTENT_READER)
+ else:
+ rdr = None
+ # store extractor
+ ext2rdr[ext] = rdr
+
+ except errors.LoaderError as err: # failed to load reader
+ logger.error('failed to load reader: %s', err)
+
+ except errors.BuilderError as err: # failed to build reader
+ logger.error(str(err))
+
+ return pipeline.Pipeline(self.prefix, ext2rdr)
+
+## EOF ##
diff --git a/bsie/tools/pipeline.py b/bsie/lib/pipeline.py
index 20e8ddf..44685ba 100644
--- a/bsie/tools/pipeline.py
+++ b/bsie/lib/pipeline.py
@@ -4,14 +4,15 @@ Part of the bsie module.
A copy of the license is provided with the project.
Author: Matthias Baumgartner, 2022
"""
-# imports
+# standard imports
from collections import defaultdict
import logging
import typing
# bsie imports
-from bsie import base
-from bsie.utils import bsfs, node, ns
+from bsie.extractor import Extractor
+from bsie.reader import Reader
+from bsie.utils import bsfs, errors, node, ns
# exports
__all__: typing.Sequence[str] = (
@@ -43,12 +44,12 @@ class Pipeline():
_prefix: bsfs.Namespace
# extractor -> reader mapping
- _ext2rdr: typing.Dict[base.extractor.Extractor, typing.Optional[base.reader.Reader]]
+ _ext2rdr: typing.Dict[Extractor, typing.Optional[Reader]]
def __init__(
self,
prefix: bsfs.Namespace,
- ext2rdr: typing.Dict[base.extractor.Extractor, typing.Optional[base.reader.Reader]]
+ ext2rdr: typing.Dict[Extractor, typing.Optional[Reader]]
):
# store core members
self._prefix = prefix + FILE_PREFIX
@@ -124,6 +125,7 @@ class Pipeline():
try:
# get content
content = rdr(path) if rdr is not None else None
+ #logger.info('extracted %s from %s', rdr, path)
# apply extractors on this content
for ext in extrs:
@@ -132,11 +134,16 @@ class Pipeline():
for subject, pred, value in ext.extract(subject, content, principals):
yield subject, pred, value
- except base.errors.ExtractorError as err:
+ except errors.ExtractorError as err:
# critical extractor failure.
logger.error('%s failed to extract triples from content: %s', ext, err)
- except base.errors.ReaderError as err:
+ except errors.UnsupportedFileFormatError:
+ # failed to read the file format. skip.
+ #logger.warning('%s could not process the file format of %s', rdr, err)
+ pass
+
+ except errors.ReaderError as err:
# failed to read any content. skip.
logger.error('%s failed to read content: %s', rdr, err)
diff --git a/bsie/reader/__init__.py b/bsie/reader/__init__.py
index a45f22b..4163d1c 100644
--- a/bsie/reader/__init__.py
+++ b/bsie/reader/__init__.py
@@ -15,5 +15,18 @@ Part of the bsie module.
A copy of the license is provided with the project.
Author: Matthias Baumgartner, 2022
"""
+# standard imports
+import typing
+# inner-module imports
+from .base import Reader
+from .builder import ReaderBuilder
+
+# exports
+__all__: typing.Sequence[str] = (
+ 'Reader',
+ 'ReaderBuilder',
+ )
+
+## EOF ##
## EOF ##
diff --git a/bsie/base/reader.py b/bsie/reader/base.py
index cbabd36..099a327 100644
--- a/bsie/base/reader.py
+++ b/bsie/reader/base.py
@@ -8,7 +8,7 @@ Part of the bsie module.
A copy of the license is provided with the project.
Author: Matthias Baumgartner, 2022
"""
-# imports
+# standard imports
import abc
import typing
@@ -39,7 +39,7 @@ class Reader(abc.ABC):
return hash(type(self))
@abc.abstractmethod
- def __call__(self, path: bsfs.URI) -> typing.Any:
+ def __call__(self, path: str) -> typing.Any:
"""Return some content of the file at *path*.
Raises a `ReaderError` if the reader cannot make sense of the file format.
"""
diff --git a/bsie/reader/builder.py b/bsie/reader/builder.py
new file mode 100644
index 0000000..8699e75
--- /dev/null
+++ b/bsie/reader/builder.py
@@ -0,0 +1,78 @@
+"""
+
+Part of the bsie module.
+A copy of the license is provided with the project.
+Author: Matthias Baumgartner, 2022
+"""
+# standard imports
+import typing
+
+# bsie imports
+from bsie.utils import bsfs, errors, safe_load, unpack_qualified_name
+
+# inner-module imports
+from . import base
+
+# exports
+__all__: typing.Sequence[str] = (
+ 'ReaderBuilder',
+ )
+
+
+## code ##
+
+class ReaderBuilder():
+ """Build `bsie.base.Reader` instances.
+
+ Readers are defined via their qualified class name
+ (e.g., bsie.reader.path.Path) and optional keyword
+ arguments that are passed to the constructor via
+ the *kwargs* argument (name as key, kwargs as value).
+ The ReaderBuilder keeps a cache of previously built
+ reader instances, as they are anyway built with
+ identical keyword arguments.
+
+ """
+
+ # keyword arguments
+ _kwargs: typing.Dict[str, typing.Dict[str, typing.Any]]
+
+ # cached readers
+ _cache: typing.Dict[str, base.Reader]
+
+ def __init__(
+ self,
+ kwargs: typing.Optional[typing.Dict[str, typing.Dict[str, typing.Any]]] = None):
+ if kwargs is None:
+ kwargs = {}
+ self._kwargs = kwargs
+ self._cache = {}
+
+ def build(self, name: str) -> base.Reader:
+ """Return an instance for the qualified class name."""
+ # return cached instance
+ if name in self._cache:
+ return self._cache[name]
+
+ # check name and get module/class components
+ module_name, class_name = unpack_qualified_name(name)
+
+ # import reader class
+ cls = safe_load(module_name, class_name)
+
+ # get kwargs
+ kwargs = self._kwargs.get(name, {})
+ if not isinstance(kwargs, dict):
+ raise TypeError(f'expected a kwargs dict, found {bsfs.typename(kwargs)}')
+
+ try: # build, cache, and return instance
+ obj = cls(**kwargs)
+ # cache instance
+ self._cache[name] = obj
+ # return instance
+ return obj
+
+ except Exception as err:
+ raise errors.BuilderError(f'failed to build reader {name} due to {bsfs.typename(err)}: {err}') from err
+
+## EOF ##
diff --git a/bsie/reader/chain.py b/bsie/reader/chain.py
new file mode 100644
index 0000000..5e9e0d5
--- /dev/null
+++ b/bsie/reader/chain.py
@@ -0,0 +1,88 @@
+"""
+
+Part of the bsie module.
+A copy of the license is provided with the project.
+Author: Matthias Baumgartner, 2022
+"""
+# standard imports
+import logging
+import typing
+
+# bsie imports
+from bsie.utils import bsfs, errors
+
+# inner-module imports
+from . import base
+from . import builder
+
+# exports
+__all__: typing.Sequence[str] = (
+ 'ReaderChain',
+ )
+
+
+## code ##
+
+logger = logging.getLogger(__name__)
+
+# Content type.
+T_CONTENT = typing.TypeVar('T_CONTENT') # pylint: disable=invalid-name
+
+class ReaderChain(base.Reader, typing.Generic[T_CONTENT]):
+ """Read an image."""
+
+ # sub-readers for specific file formats.
+ _children: typing.Tuple[base.Reader, ...]
+
+ def __init__(
+ self,
+ subreader_names: typing.Iterable[str],
+ cfg: typing.Optional[typing.Any] = None,
+ ):
+ rbuild = builder.ReaderBuilder(cfg)
+ children = []
+ for name in subreader_names:
+ try:
+ # build sub-reader
+ children.append(rbuild.build(name))
+ except (ValueError,
+ TypeError,
+ errors.LoaderError,
+ errors.BuilderError) as err:
+ # failed to build a child; skip and notify
+ logger.warning('failed to load reader: %s', err)
+
+ if len(children) == 0:
+ logger.warning('%s failed to load any sub-readers.', bsfs.typename(self))
+
+ # copy children to member
+ self._children = tuple(children)
+
+ def __str__(self) -> str:
+ substr = ', '.join(str(child) for child in self._children)
+ return f'{bsfs.typename(self)}({substr})'
+
+ def __repr__(self) -> str:
+ return f'{bsfs.typename(self)}({self._children})'
+
+ def __eq__(self, other: typing.Any) -> bool:
+ return super().__eq__(other) \
+ and self._children == other._children
+
+ def __hash__(self) -> int:
+ return hash((super().__hash__(), self._children))
+
+ def __call__(self, path: str) -> T_CONTENT:
+ raise_error = errors.UnsupportedFileFormatError
+ for child in self._children:
+ try:
+ return child(path)
+ except errors.UnsupportedFileFormatError:
+ pass
+ except errors.ReaderError:
+ # child cannot read the file, skip.
+ raise_error = errors.ReaderError # type: ignore [assignment] # mypy is confused
+
+ raise raise_error(path)
+
+## EOF ##
diff --git a/bsie/reader/image/__init__.py b/bsie/reader/image/__init__.py
new file mode 100644
index 0000000..1f290b5
--- /dev/null
+++ b/bsie/reader/image/__init__.py
@@ -0,0 +1,37 @@
+"""
+
+Part of the bsie module.
+A copy of the license is provided with the project.
+Author: Matthias Baumgartner, 2022
+"""
+# standard imports
+import typing
+
+# external imports
+import PIL.Image
+
+# inner-module imports
+from .. import chain
+
+# constants
+_FILE_FORMAT_READERS: typing.Sequence[str] = (
+ __package__ + '._raw.RawImage',
+ __package__ + '._pillow.PillowImage',
+ )
+
+# exports
+__all__: typing.Sequence[str] = (
+ 'Image',
+ )
+
+
+## code ##
+
+# FIXME: Check if PIL.Image or PIL.Image.Image, or if version-dependent
+class Image(chain.ReaderChain[PIL.Image.Image]): # pylint: disable=too-few-public-methods
+ """Read an image file."""
+
+ def __init__(self, cfg: typing.Optional[typing.Any] = None):
+ super().__init__(_FILE_FORMAT_READERS, cfg)
+
+## EOF ##
diff --git a/bsie/reader/image/_pillow.py b/bsie/reader/image/_pillow.py
new file mode 100644
index 0000000..3144509
--- /dev/null
+++ b/bsie/reader/image/_pillow.py
@@ -0,0 +1,39 @@
+"""
+
+Part of the bsie module.
+A copy of the license is provided with the project.
+Author: Matthias Baumgartner, 2022
+"""
+# standard imports
+import typing
+
+# external imports
+import PIL.Image
+
+# bsie imports
+from bsie.utils import errors
+
+# inner-module imports
+from .. import base
+
+# exports
+__all__: typing.Sequence[str] = (
+ 'PillowImage',
+ )
+
+
+## code ##
+
+class PillowImage(base.Reader):
+ """Use PIL to read content of a variety of image file types."""
+
+ def __call__(self, path: str) -> PIL.Image:
+ try:
+ # open file with PIL
+ return PIL.Image.open(path)
+ except PIL.UnidentifiedImageError as err:
+ raise errors.UnsupportedFileFormatError(path) from err
+ except IOError as err:
+ raise errors.ReaderError(path) from err
+
+# EOF ##
diff --git a/bsie/reader/image/_raw.py b/bsie/reader/image/_raw.py
new file mode 100644
index 0000000..cd60453
--- /dev/null
+++ b/bsie/reader/image/_raw.py
@@ -0,0 +1,61 @@
+"""
+
+Part of the bsie module.
+A copy of the license is provided with the project.
+Author: Matthias Baumgartner, 2022
+"""
+# standard imports
+import typing
+
+# external imports
+import PIL.Image
+import rawpy
+
+# bsie imports
+from bsie.utils import errors, filematcher
+
+# inner-module imports
+from .. import base
+
+# constants
+MATCH_RULE = 'mime={image/x-nikon-nef} | extension={nef}'
+
+# exports
+__all__: typing.Sequence[str] = (
+ 'RawImage',
+ )
+
+
+## code ##
+
+class RawImage(base.Reader):
+ """Use rawpy to read content of raw image file types."""
+
+ # file matcher
+ match: filematcher.Matcher
+
+ # additional kwargs to rawpy's postprocess
+ rawpy_kwargs: typing.Dict[str, typing.Any]
+
+ def __init__(self, **rawpy_kwargs):
+ match_rule = rawpy_kwargs.pop('file_match_rule', MATCH_RULE)
+ self._match = filematcher.parse(match_rule)
+ self._rawpy_kwargs = rawpy_kwargs
+
+ def __call__(self, path: str) -> PIL.Image:
+ # perform quick checks first
+ if not self._match(path):
+ raise errors.UnsupportedFileFormatError(path)
+
+ try:
+ # open file with rawpy
+ ary = rawpy.imread(path).postprocess(**self._rawpy_kwargs)
+ # convert to PIL.Image
+ return PIL.Image.fromarray(ary)
+ except (rawpy.LibRawFatalError, # pylint: disable=no-member # pylint doesn't find the errors
+ rawpy.NotSupportedError, # pylint: disable=no-member
+ rawpy.LibRawNonFatalError, # pylint: disable=no-member
+ ) as err:
+ raise errors.ReaderError(path) from err
+
+## EOF ##
diff --git a/bsie/reader/path.py b/bsie/reader/path.py
index d60f187..1ca05a0 100644
--- a/bsie/reader/path.py
+++ b/bsie/reader/path.py
@@ -4,11 +4,11 @@ Part of the bsie module.
A copy of the license is provided with the project.
Author: Matthias Baumgartner, 2022
"""
-# imports
+# standard imports
import typing
-# bsie imports
-from bsie.base import reader
+# inner-module imports
+from . import base
# exports
__all__: typing.Sequence[str] = (
@@ -18,7 +18,7 @@ __all__: typing.Sequence[str] = (
## code ##
-class Path(reader.Reader):
+class Path(base.Reader):
"""Return the path."""
def __call__(self, path: str) -> str:
diff --git a/bsie/reader/stat.py b/bsie/reader/stat.py
index fc5fb24..706dc47 100644
--- a/bsie/reader/stat.py
+++ b/bsie/reader/stat.py
@@ -4,12 +4,15 @@ Part of the bsie module.
A copy of the license is provided with the project.
Author: Matthias Baumgartner, 2022
"""
-# imports
+# standard imports
import os
import typing
# bsie imports
-from bsie.base import errors, reader
+from bsie.utils import errors
+
+# inner-module imports
+from . import base
# exports
__all__: typing.Sequence[str] = (
@@ -19,7 +22,7 @@ __all__: typing.Sequence[str] = (
## code ##
-class Stat(reader.Reader):
+class Stat(base.Reader):
"""Read and return the filesystem's stat infos."""
def __call__(self, path: str) -> os.stat_result:
diff --git a/bsie/tools/builder.py b/bsie/tools/builder.py
deleted file mode 100644
index 190d9bf..0000000
--- a/bsie/tools/builder.py
+++ /dev/null
@@ -1,226 +0,0 @@
-"""
-
-Part of the bsie module.
-A copy of the license is provided with the project.
-Author: Matthias Baumgartner, 2022
-"""
-# imports
-import importlib
-import logging
-import typing
-
-# bsie imports
-from bsie import base
-from bsie.base import errors
-from bsie.utils import bsfs
-
-# inner-module imports
-from . import pipeline
-
-# exports
-__all__: typing.Sequence[str] = (
- 'ExtractorBuilder',
- 'PipelineBuilder',
- 'ReaderBuilder',
- )
-
-
-## code ##
-
-logger = logging.getLogger(__name__)
-
-def _safe_load(module_name: str, class_name: str):
- """Get a class from a module. Raise BuilderError if anything goes wrong."""
- try:
- # load the module
- module = importlib.import_module(module_name)
- except Exception as err:
- # cannot import module
- raise errors.LoaderError(f'cannot load module {module_name}') from err
-
- try:
- # get the class from the module
- cls = getattr(module, class_name)
- except Exception as err:
- # cannot find the class
- raise errors.LoaderError(f'cannot load class {class_name} from module {module_name}') from err
-
- return cls
-
-
-def _unpack_name(name):
- """Split a name into its module and class component (dot-separated)."""
- if not isinstance(name, str):
- raise TypeError(name)
- if '.' not in name:
- raise ValueError('name must be a qualified class name.')
- module_name, class_name = name[:name.rfind('.')], name[name.rfind('.')+1:]
- if module_name == '':
- raise ValueError('name must be a qualified class name.')
- return module_name, class_name
-
-
-class ReaderBuilder():
- """Build `bsie.base.Reader` instances.
-
- Readers are defined via their qualified class name
- (e.g., bsie.reader.path.Path) and optional keyword
- arguments that are passed to the constructor via
- the *kwargs* argument (name as key, kwargs as value).
- The ReaderBuilder keeps a cache of previously built
- reader instances, as they are anyway built with
- identical keyword arguments.
-
- """
-
- # keyword arguments
- _kwargs: typing.Dict[str, typing.Dict[str, typing.Any]]
-
- # cached readers
- _cache: typing.Dict[str, base.Reader]
-
- def __init__(self, kwargs: typing.Dict[str, typing.Dict[str, typing.Any]]):
- self._kwargs = kwargs
- self._cache = {}
-
- def build(self, name: str) -> base.Reader:
- """Return an instance for the qualified class name."""
- # return cached instance
- if name in self._cache:
- return self._cache[name]
-
- # check name and get module/class components
- module_name, class_name = _unpack_name(name)
-
- # import reader class
- cls = _safe_load(module_name, class_name)
-
- # get kwargs
- kwargs = self._kwargs.get(name, {})
- if not isinstance(kwargs, dict):
- raise TypeError(f'expected a kwargs dict, found {bsfs.typename(kwargs)}')
-
- try: # build, cache, and return instance
- obj = cls(**kwargs)
- # cache instance
- self._cache[name] = obj
- # return instance
- return obj
-
- except Exception as err:
- raise errors.BuilderError(f'failed to build reader {name} due to {bsfs.typename(err)}: {err}') from err
-
-
-class ExtractorBuilder():
- """Build `bsie.base.Extractor instances.
-
- It is permissible to build multiple instances of the same extractor
- (typically with different arguments), hence the ExtractorBuilder
- receives a list of build specifications. Each specification is
- a dict with a single key (extractor's qualified name) and a dict
- to be used as keyword arguments.
- Example: [{'bsie.extractor.generic.path.Path': {}}, ]
-
- """
-
- # build specifications
- _specs: typing.List[typing.Dict[str, typing.Dict[str, typing.Any]]]
-
- def __init__(self, specs: typing.List[typing.Dict[str, typing.Dict[str, typing.Any]]]):
- self._specs = specs
-
- def __iter__(self) -> typing.Iterator[int]:
- """Iterate over extractor specifications."""
- return iter(range(len(self._specs)))
-
- def build(self, index: int) -> base.Extractor:
- """Return an instance of the n'th extractor (n=*index*)."""
- # get build instructions
- specs = self._specs[index]
-
- # check specs structure. expecting[{name: {kwargs}}]
- if not isinstance(specs, dict):
- raise TypeError(f'expected a dict, found {bsfs.typename(specs)}')
- if len(specs) != 1:
- raise TypeError(f'expected a dict of length one, found {len(specs)}')
-
- # get name and args from specs
- name = next(iter(specs.keys()))
- kwargs = specs[name]
-
- # check kwargs structure
- if not isinstance(kwargs, dict):
- raise TypeError(f'expected a dict, found {bsfs.typename(kwargs)}')
-
- # check name and get module/class components
- module_name, class_name = _unpack_name(name)
-
- # import extractor class
- cls = _safe_load(module_name, class_name)
-
- try: # build and return instance
- return cls(**kwargs)
-
- except Exception as err:
- raise errors.BuilderError(f'failed to build extractor {name} due to {bsfs.typename(err)}: {err}') from err
-
-
-class PipelineBuilder():
- """Build `bsie.tools.pipeline.Pipeline` instances."""
-
- # Prefix to be used in the Pipeline.
- prefix: bsfs.Namespace
-
- # builder for Readers.
- rbuild: ReaderBuilder
-
- # builder for Extractors.
- ebuild: ExtractorBuilder
-
- def __init__(
- self,
- prefix: bsfs.Namespace,
- reader_builder: ReaderBuilder,
- extractor_builder: ExtractorBuilder,
- ):
- self.prefix = prefix
- self.rbuild = reader_builder
- self.ebuild = extractor_builder
-
- def build(self) -> pipeline.Pipeline:
- """Return a Pipeline instance."""
- ext2rdr = {}
-
- for eidx in self.ebuild:
- # build extractor
- try:
- ext = self.ebuild.build(eidx)
-
- except errors.LoaderError as err: # failed to load extractor; skip
- logger.error('failed to load extractor: %s', err)
- continue
-
- except errors.BuilderError as err: # failed to build instance; skip
- logger.error(str(err))
- continue
-
- try:
- # get reader required by extractor
- if ext.CONTENT_READER is not None:
- rdr = self.rbuild.build(ext.CONTENT_READER)
- else:
- rdr = None
- # store extractor
- ext2rdr[ext] = rdr
-
- except errors.LoaderError as err: # failed to load reader
- logger.error('failed to load reader: %s', err)
-
- except errors.BuilderError as err: # failed to build reader
- logger.error(str(err))
-
- return pipeline.Pipeline(self.prefix, ext2rdr)
-
-
-
-## EOF ##
diff --git a/bsie/utils/__init__.py b/bsie/utils/__init__.py
index bd22236..9cb60ed 100644
--- a/bsie/utils/__init__.py
+++ b/bsie/utils/__init__.py
@@ -4,19 +4,24 @@ Part of the bsie module.
A copy of the license is provided with the project.
Author: Matthias Baumgartner, 2022
"""
-# imports
+# standard imports
import typing
# inner-module imports
from . import bsfs
+from . import filematcher
from . import namespaces as ns
from . import node
+from .loading import safe_load, unpack_qualified_name
# exports
__all__: typing.Sequence[str] = (
'bsfs',
+ 'filematcher',
'node',
'ns',
+ 'safe_load',
+ 'unpack_qualified_name',
)
## EOF ##
diff --git a/bsie/utils/bsfs.py b/bsie/utils/bsfs.py
index 0b88479..ef5db31 100644
--- a/bsie/utils/bsfs.py
+++ b/bsie/utils/bsfs.py
@@ -4,7 +4,7 @@ Part of the bsie module.
A copy of the license is provided with the project.
Author: Matthias Baumgartner, 2022
"""
-# imports
+# standard imports
import typing
# bsfs imports
diff --git a/bsie/base/errors.py b/bsie/utils/errors.py
index dc3c30e..8133cd4 100644
--- a/bsie/base/errors.py
+++ b/bsie/utils/errors.py
@@ -4,7 +4,7 @@ Part of the bsie module.
A copy of the license is provided with the project.
Author: Matthias Baumgartner, 2022
"""
-# imports
+# standard imports
import typing
# exports
@@ -39,4 +39,10 @@ class ProgrammingError(_BSIEError):
class UnreachableError(ProgrammingError):
"""Bravo, you've reached a point in code that should logically not be reachable."""
+class ParserError(_BSIEError):
+ """Failed to parse due to invalid syntax or structures."""
+
+class UnsupportedFileFormatError(ReaderError):
+ """Failed to read a file format."""
+
## EOF ##
diff --git a/bsie/tools/__init__.py b/bsie/utils/filematcher/__init__.py
index 803c321..1e23e4e 100644
--- a/bsie/tools/__init__.py
+++ b/bsie/utils/filematcher/__init__.py
@@ -4,17 +4,17 @@ Part of the bsie module.
A copy of the license is provided with the project.
Author: Matthias Baumgartner, 2022
"""
-# imports
+# standard imports
import typing
# inner-module imports
-from . import builder
-from .pipeline import Pipeline
+from .matcher import Matcher
+from .parser import parse
# exports
__all__: typing.Sequence[str] = (
- 'builder',
- 'Pipeline',
+ 'Matcher',
+ 'parse',
)
## EOF ##
diff --git a/bsie/utils/filematcher/matcher.py b/bsie/utils/filematcher/matcher.py
new file mode 100644
index 0000000..a279a4b
--- /dev/null
+++ b/bsie/utils/filematcher/matcher.py
@@ -0,0 +1,179 @@
+"""
+
+Part of the bsie module.
+A copy of the license is provided with the project.
+Author: Matthias Baumgartner, 2021
+"""
+# standard imports
+from collections.abc import Callable, Collection, Hashable
+import abc
+import os
+import typing
+
+# external imports
+import magic
+
+# exports
+__all__: typing.Sequence[str] = []
+
+
+## code ##
+
+# abstract nodes
+
+class Matcher(abc.ABC, Hashable, Callable, Collection): # type: ignore [misc] # Invalid base class Callable
+ """Matcher node base class."""
+
+ # child expressions or terminals
+ _childs: typing.Set[typing.Any]
+
+ def __init__(self, *childs: typing.Any):
+ if len(childs) == 1 and isinstance(childs[0], (list, tuple, set)):
+ self._childs = set(childs[0])
+ else:
+ self._childs = set(childs)
+
+ def __contains__(self, needle: typing.Any) -> bool:
+ return needle in self._childs
+
+ def __iter__(self) -> typing.Iterator[typing.Any]:
+ return iter(self._childs)
+
+ def __len__(self) -> int:
+ return len(self._childs)
+
+ def __repr__(self) -> str:
+ return f'{type(self).__name__}({self._childs})'
+
+ def __hash__(self) -> int:
+ return hash((type(self), tuple(set(self._childs))))
+
+ def __eq__(self, other: typing.Any) -> bool:
+ return isinstance(other, type(self)) \
+ and self._childs == other._childs
+
+ @abc.abstractmethod
+ def __call__(self, path: str) -> bool: # pylint: disable=arguments-differ
+ """Check if *path* satisfies the conditions set by the Matcher instance."""
+
+class NOT(Matcher):
+ """Invert a matcher result."""
+ def __init__(self, expr: Matcher):
+ super().__init__(expr)
+ def __call__(self, path: str) -> bool:
+ return not next(iter(self._childs))(path)
+
+# aggregate nodes
+
+class Aggregate(Matcher): # pylint: disable=too-few-public-methods # Yeah, it's an interface...
+ """Aggregation function base class (And, Or)."""
+
+class And(Aggregate):
+ """Accept only if all conditions are satisfied."""
+ def __call__(self, path: str) -> bool:
+ for itm in self:
+ if not itm(path):
+ return False
+ return True
+
+class Or(Aggregate):
+ """Accept only if at least one condition is satisfied."""
+ def __call__(self, path: str) -> bool:
+ for itm in self:
+ if itm(path):
+ return True
+ return False
+
+
+# criteria nodes
+
+class Criterion(Matcher):
+ """Criterion base class. Limits acceptance to certain values."""
+ def accepted(self) -> typing.Set[typing.Any]:
+ """Return a set of accepted values."""
+ return self._childs
+
+# criteria w/o value (valueless)
+
+class Any(Criterion):
+ """Accepts anything."""
+ def __call__(self, path: str) -> bool:
+ return True
+
+class Nothing(Criterion):
+ """Accepts nothing."""
+ def __call__(self, path: str) -> bool:
+ return False
+
+class Exists(Criterion):
+ """Filters by existence."""
+ def __call__(self, path: str) -> bool:
+ return os.path.exists(path)
+
+class IsFile(Criterion):
+ """Checks if the path is a regular file."""
+ def __call__(self, path: str) -> bool:
+ return os.path.isfile(path)
+
+class IsDir(Criterion):
+ """Checks if the path is a directory."""
+ def __call__(self, path: str) -> bool:
+ return os.path.isdir(path)
+
+class IsLink(Criterion):
+ """Checks if the path is a link."""
+ def __call__(self, path: str) -> bool:
+ return os.path.islink(path)
+
+class IsAbs(Criterion):
+ """Checks if the path is an absolute path."""
+ def __call__(self, path: str) -> bool:
+ return os.path.isabs(path)
+
+class IsRel(Criterion):
+ """Checks if the path is a relative path."""
+ def __call__(self, path: str) -> bool:
+ return not os.path.isabs(path)
+
+class IsMount(Criterion):
+ """Checks if the path is a mount point."""
+ def __call__(self, path: str) -> bool:
+ return os.path.ismount(path)
+
+class IsEmpty(Criterion):
+ """Checks if the path is an empty file."""
+ def __call__(self, path: str) -> bool:
+ return os.path.exists(path) and os.stat(path).st_size == 0
+
+class IsReadable(Criterion):
+ """Checks if the path is readable."""
+ def __call__(self, path: str) -> bool:
+ return os.path.exists(path) and os.access(path, os.R_OK)
+
+class IsWritable(Criterion):
+ """Checks if the path is writable."""
+ def __call__(self, path: str) -> bool:
+ return os.path.exists(path) and os.access(path, os.W_OK)
+
+class IsExecutable(Criterion):
+ """Checks if the path is executable."""
+ def __call__(self, path: str) -> bool:
+ return os.path.exists(path) and os.access(path, os.X_OK)
+
+# criteria w/ value
+
+class Extension(Criterion):
+ """Filters by file extension (without the dot)."""
+ def __call__(self, path: str) -> bool:
+ _, ext = os.path.splitext(path)
+ return ext[1:] in self.accepted()
+
+class Mime(Criterion):
+ """Filters by mime type."""
+ def __call__(self, path: str) -> bool:
+ try:
+ return magic.from_file(path, mime=True).lower() in self.accepted()
+ except FileNotFoundError:
+ return False
+
+## EOF ##
diff --git a/bsie/utils/filematcher/parser.py b/bsie/utils/filematcher/parser.py
new file mode 100644
index 0000000..2f82875
--- /dev/null
+++ b/bsie/utils/filematcher/parser.py
@@ -0,0 +1,146 @@
+"""
+
+Part of the bsie module.
+A copy of the license is provided with the project.
+Author: Matthias Baumgartner, 2021
+"""
+# standard imports
+import typing
+
+# external imports
+import pyparsing
+from pyparsing import printables, alphas8bit, punc8bit, QuotedString, Word, \
+ delimitedList, Or, CaselessKeyword, Group, oneOf, Optional
+
+# inner-module imports
+from . import matcher
+from .. import errors
+
+# exports
+__all__: typing.Sequence[str] = (
+ 'parse',
+ )
+
+
+## code ##
+
+class FileMatcherParser():
+ """
+ EXPR := RULES | RULES "|" RULES
+ RULESET := RULE | RULE, RULE
+ RULE := CRITERION OP VALUE | CRITERION OP {VALUES} | VALUELESS
+ OP := != | =
+ VALUES := VALUE | VALUE, VALUE
+ VALUE := [word]
+ CRITERION := mime | extension | ...
+ """
+
+ # criteria matcher nodes w/ arguments
+ _CRITERIA: typing.Dict[str, typing.Type[matcher.Matcher]] = {
+ 'extension': matcher.Extension,
+ 'mime': matcher.Mime,
+ }
+
+ # criteria matcher nodes w/o arguments
+ _VALUELESS: typing.Dict[str, typing.Type[matcher.Matcher]] = {
+ 'any': matcher.Any,
+ 'nothing': matcher.Nothing,
+ 'exists': matcher.Exists,
+ 'isfile': matcher.IsFile,
+ 'isdir': matcher.IsDir,
+ 'islink': matcher.IsLink,
+ 'isabs': matcher.IsAbs,
+ 'isrel': matcher.IsRel,
+ 'ismount': matcher.IsMount,
+ 'emtpy': matcher.IsEmpty,
+ 'readable': matcher.IsReadable,
+ 'writable': matcher.IsWritable,
+ 'executable': matcher.IsExecutable,
+ }
+
+ # pyparsing parser instance.
+ _parser: pyparsing.ParseExpression
+
+ def __init__(self):
+ # build the parser
+ # VALUE := [word]
+ alphabet = (printables + alphas8bit + punc8bit).translate(str.maketrans('', '', ',{}|='))
+ value = QuotedString(quoteChar='"', escChar='\\') ^ Word(alphabet)
+ # CRITERION := mime | extension | ...
+ criterion = Or([CaselessKeyword(p) for p in self._CRITERIA]).setResultsName('criterion')
+ valueless = Or([CaselessKeyword(p) for p in self._VALUELESS]).setResultsName('criterion')
+ # VALUES := VALUE | VALUE, VALUE
+ values = delimitedList(value, delim=',').setResultsName('value')
+ # OP := '=' | '!='
+ eqop = oneOf('= !=').setResultsName('op')
+ # RULE := CRITERION OP VALUE | CRITERION OP {VALUES} | VALUELESS
+ rule_none = Group(Optional('!').setResultsName('op') + valueless).setResultsName('rule_none')
+ rule_one = Group(criterion + eqop + value.setResultsName('value')).setResultsName('rule_one')
+ rule_few = Group(criterion + eqop + '{' + values + '}').setResultsName('rule_few')
+ # RULESET := RULE | RULE, RULE
+ ruleset = Group(delimitedList(rule_none ^ rule_one ^ rule_few, delim=','))
+ # EXPR := RULESET | RULESET \| RULESET
+ self._parser = delimitedList(ruleset, delim='|')
+
+ def parse(self, query: str) -> matcher.Matcher: # pylint: disable=too-many-branches
+ """Build a file matcher from a rule definition."""
+ # preprocess the query
+ query = query.strip()
+
+ # empty query
+ if len(query) == 0:
+ return matcher.Any()
+
+ try:
+ parsed = self._parser.parseString(query, parseAll=True)
+ except pyparsing.ParseException as err:
+ raise errors.ParserError(f'Cannot parse query {err}')
+
+ # convert to Matcher
+ rules = []
+ for exp in parsed:
+ tokens = []
+ for rule in exp:
+ # fetch accepted values
+ if rule.getName() == 'rule_none':
+ accepted = []
+ elif rule.getName() == 'rule_one':
+ accepted = [rule.value]
+ elif rule.getName() == 'rule_few':
+ accepted = list(rule.value)
+ else: # prevented by grammar
+ raise errors.UnreachableError('Invalid rule definition')
+
+ # build criterion
+ if rule.criterion in self._VALUELESS:
+ cls = self._VALUELESS[rule.criterion]
+ if rule.op == '!':
+ tokens.append(matcher.NOT(cls()))
+ else:
+ tokens.append(cls())
+ elif rule.criterion in self._CRITERIA:
+ cls = self._CRITERIA[rule.criterion]
+ if rule.op == '!=':
+ tokens.append(matcher.NOT(cls(accepted)))
+ else:
+ tokens.append(cls(accepted))
+ else: # prevented by grammar
+ raise errors.UnreachableError(f'Invalid condition "{rule.criterion}"')
+
+ # And-aggregate rules in one ruleset (if needed)
+ tokens = matcher.And(tokens) if len(tokens) > 1 else tokens[0]
+ rules.append(tokens)
+
+ # Or-aggregate rulesets
+ expr = matcher.Or(rules) if len(rules) > 1 else rules[0]
+
+ return expr
+
+# build default instance
+file_match_parser = FileMatcherParser()
+
+def parse(query: str) -> matcher.Matcher:
+ """Shortcut for FileMatcherParser()(query)."""
+ return file_match_parser.parse(query)
+
+## EOF ##
diff --git a/bsie/utils/loading.py b/bsie/utils/loading.py
new file mode 100644
index 0000000..eb05c35
--- /dev/null
+++ b/bsie/utils/loading.py
@@ -0,0 +1,54 @@
+"""
+
+Part of the bsie module.
+A copy of the license is provided with the project.
+Author: Matthias Baumgartner, 2022
+"""
+# standard imports
+import importlib
+import typing
+
+# inner-module imports
+from . import errors
+
+# exports
+__all__: typing.Sequence[str] = (
+ 'safe_load',
+ 'unpack_qualified_name',
+ )
+
+
+## code ##
+
+def safe_load(module_name: str, class_name: str):
+ """Get a class from a module. Raise BuilderError if anything goes wrong."""
+ try:
+ # load the module
+ module = importlib.import_module(module_name)
+ except Exception as err:
+ # cannot import module
+ raise errors.LoaderError(f'cannot load module {module_name}') from err
+
+ try:
+ # get the class from the module
+ cls = getattr(module, class_name)
+ except Exception as err:
+ # cannot find the class
+ raise errors.LoaderError(f'cannot load class {class_name} from module {module_name}') from err
+
+ return cls
+
+
+def unpack_qualified_name(name):
+ """Split a name into its module and class component (dot-separated)."""
+ if not isinstance(name, str):
+ raise TypeError(name)
+ if '.' not in name:
+ raise ValueError('name must be a qualified class name.')
+ module_name, class_name = name[:name.rfind('.')], name[name.rfind('.')+1:]
+ if module_name == '':
+ raise ValueError('name must be a qualified class name.')
+ return module_name, class_name
+
+
+## EOF ##
diff --git a/bsie/utils/namespaces.py b/bsie/utils/namespaces.py
index a29fc1b..393b436 100644
--- a/bsie/utils/namespaces.py
+++ b/bsie/utils/namespaces.py
@@ -4,7 +4,7 @@ Part of the bsie module.
A copy of the license is provided with the project.
Author: Matthias Baumgartner, 2022
"""
-# imports
+# standard imports
import typing
# inner-module imports
@@ -15,6 +15,7 @@ bse = _bsfs.Namespace('http://bsfs.ai/schema/Entity')
bsfs = _bsfs.Namespace('http://bsfs.ai/schema', fsep='/')
bsm = _bsfs.Namespace('http://bsfs.ai/schema/Meta')
xsd = _bsfs.Namespace('http://www.w3.org/2001/XMLSchema')
+bsf = _bsfs.Namespace('http://ie.bsfs.ai/schema/Feature')
# export
__all__: typing.Sequence[str] = (
diff --git a/bsie/utils/node.py b/bsie/utils/node.py
index ecf39cd..91e4f37 100644
--- a/bsie/utils/node.py
+++ b/bsie/utils/node.py
@@ -4,7 +4,7 @@ Part of the bsie module.
A copy of the license is provided with the project.
Author: Matthias Baumgartner, 2022
"""
-# imports
+# standard imports
import typing
# bsie imports
diff --git a/setup.py b/setup.py
index ee9e0fd..6dad7ac 100644
--- a/setup.py
+++ b/setup.py
@@ -14,7 +14,19 @@ setup(
url='https://www.igsor.net/projects/blackstar/bsie/',
download_url='https://pip.igsor.net',
packages=('bsie', ),
- install_requires=('rdflib', 'bsfs'),
+ install_requires=(
+ 'bsfs',
+ 'pyparsing',
+ 'python-magic',
+ 'rdflib', # only for tests
+ 'requests', # only for tests
+ ),
python_requires=">=3.7",
+ extra_require=(
+ # image reader
+ 'pillow', 'rawpy',
+ # image extractors
+ 'numpy',
+ )
)
diff --git a/test/apps/test_index.py b/test/apps/test_index.py
index 9cdc656..7f5be8e 100644
--- a/test/apps/test_index.py
+++ b/test/apps/test_index.py
@@ -4,13 +4,15 @@ Part of the bsie test suite.
A copy of the license is provided with the project.
Author: Matthias Baumgartner, 2022
"""
-# imports
+# standard imports
import contextlib
import io
import os
-import rdflib
import unittest
+# external imports
+import rdflib
+
# bsie imports
from bsie.utils import ns
@@ -21,6 +23,12 @@ from bsie.apps.index import main
## code ##
class TestIndex(unittest.TestCase):
+ def test_main_invalid(self):
+ outbuf = io.StringIO()
+ with contextlib.redirect_stdout(outbuf):
+ bsfs = main([os.path.join(os.path.dirname(__file__), 'inexistent-file.t')])
+ self.assertEqual(outbuf.getvalue().strip(), '')
+
def test_main(self):
bsfs = main([
'-r',
@@ -75,6 +83,14 @@ class TestIndex(unittest.TestCase):
(rdflib.URIRef(prefix + 'd803187cbf3676ae9d38126270a6152c60431589aa3bb3824baf8954e9c097f1'), rdflib.URIRef(ns.bse.author), rdflib.Literal('Me, myself, and I', datatype=rdflib.XSD.string)),
(rdflib.URIRef(prefix + 'd803187cbf3676ae9d38126270a6152c60431589aa3bb3824baf8954e9c097f1'), rdflib.URIRef(ns.bse.filename), rdflib.Literal('td_second', datatype=rdflib.XSD.string)),
(rdflib.URIRef(prefix + 'd803187cbf3676ae9d38126270a6152c60431589aa3bb3824baf8954e9c097f1'), rdflib.URIRef(ns.bse.filesize), rdflib.Literal('703', datatype=rdflib.XSD.integer)),
+ (rdflib.URIRef(prefix + 'accb115d266ad60c53cd01a7f7130f245886ce8eaf69bc85319febc11d9fe089'), rdflib.RDF.type, rdflib.URIRef(ns.bsfs.File)),
+ (rdflib.URIRef(prefix + 'accb115d266ad60c53cd01a7f7130f245886ce8eaf69bc85319febc11d9fe089'), rdflib.URIRef(ns.bse.author), rdflib.Literal('Me, myself, and I', datatype=rdflib.XSD.string)),
+ (rdflib.URIRef(prefix + 'accb115d266ad60c53cd01a7f7130f245886ce8eaf69bc85319febc11d9fe089'), rdflib.URIRef(ns.bse.filename), rdflib.Literal('testimage.jpg', datatype=rdflib.XSD.string)),
+ (rdflib.URIRef(prefix + 'accb115d266ad60c53cd01a7f7130f245886ce8eaf69bc85319febc11d9fe089'), rdflib.URIRef(ns.bse.filesize), rdflib.Literal('349264', datatype=rdflib.XSD.integer)),
+ (rdflib.URIRef(prefix + 'accb115d266ad60c53cd01a7f7130f245886ce8eaf69bc85319febc11d9fe089'), rdflib.URIRef('http://bsfs.ai/schema/Entity/colors_spatial#0658f2234a054e1dd59a14462c89f7733e019160419c796356aa831498bd0a04'),
+ rdflib.Literal(
+ '(91, 127, 121, 94, 138, 167, 163, 134, 190, 138, 170, 156, 121, 142, 159)',
+ datatype=rdflib.URIRef('http://ie.bsfs.ai/schema/Feature/ColorsSpatial#0658f2234a054e1dd59a14462c89f7733e019160419c796356aa831498bd0a04'))),
}))
# NOTE: we don't check ns.bsm.t_created since it depends on the execution time. Triples would look like this:
@@ -89,6 +105,7 @@ class TestIndex(unittest.TestCase):
# (rdflib.URIRef(prefix + 'b8fd7fba818254166a6043195004138ebda6923e012442f819a2c49671136c70'), rdflib.URIRef(ns.bsm.t_created), rdflib.Literal('1670..........', datatype=rdflib.XSD.integer)),
# (rdflib.URIRef(prefix + 'd43758ace82154a1cc10ca0dfef63cb20dd831f9c87edd6dc06539eefe67371d'), rdflib.URIRef(ns.bsm.t_created), rdflib.Literal('1670..........', datatype=rdflib.XSD.integer)),
# (rdflib.URIRef(prefix + 'd803187cbf3676ae9d38126270a6152c60431589aa3bb3824baf8954e9c097f1'), rdflib.URIRef(ns.bsm.t_created), rdflib.Literal('1670..........', datatype=rdflib.XSD.integer)),
+ # (rdflib.URIRef(prefix + 'accb115d266ad60c53cd01a7f7130f245886ce8eaf69bc85319febc11d9fe089'), rdflib.URIRef(ns.bsm.t_created), rdflib.Literal('1670..........', datatype=rdflib.XSD.integer)),
# instead, we simply check if there's such a predicate for each file
self.assertSetEqual({sub for sub, _ in bsfs._backend._graph.subject_objects(rdflib.URIRef(ns.bsm.t_created))}, {
rdflib.URIRef(prefix + '2f4109b40107cc50e0884755a1a961ed126887e49b8dbaf0e146b2e226aa6647'),
@@ -102,6 +119,7 @@ class TestIndex(unittest.TestCase):
rdflib.URIRef(prefix + 'b8fd7fba818254166a6043195004138ebda6923e012442f819a2c49671136c70'),
rdflib.URIRef(prefix + 'd43758ace82154a1cc10ca0dfef63cb20dd831f9c87edd6dc06539eefe67371d'),
rdflib.URIRef(prefix + 'd803187cbf3676ae9d38126270a6152c60431589aa3bb3824baf8954e9c097f1'),
+ rdflib.URIRef(prefix + 'accb115d266ad60c53cd01a7f7130f245886ce8eaf69bc85319febc11d9fe089'),
})
def test_print(self):
@@ -148,6 +166,10 @@ class TestIndex(unittest.TestCase):
f'Node(http://bsfs.ai/schema/File, http://example.com/me/file#d803187cbf3676ae9d38126270a6152c60431589aa3bb3824baf8954e9c097f1) Predicate({ns.bse.author}) Me, myself, and I',
f'Node(http://bsfs.ai/schema/File, http://example.com/me/file#d803187cbf3676ae9d38126270a6152c60431589aa3bb3824baf8954e9c097f1) Predicate({ns.bse.filename}) td_second',
f'Node(http://bsfs.ai/schema/File, http://example.com/me/file#d803187cbf3676ae9d38126270a6152c60431589aa3bb3824baf8954e9c097f1) Predicate({ns.bse.filesize}) 703',
+ f'Node(http://bsfs.ai/schema/File, http://example.com/me/file#accb115d266ad60c53cd01a7f7130f245886ce8eaf69bc85319febc11d9fe089) Predicate({ns.bse.filesize}) 349264',
+ f'Node(http://bsfs.ai/schema/File, http://example.com/me/file#accb115d266ad60c53cd01a7f7130f245886ce8eaf69bc85319febc11d9fe089) Predicate({ns.bse.author}) Me, myself, and I',
+ f'Node(http://bsfs.ai/schema/File, http://example.com/me/file#accb115d266ad60c53cd01a7f7130f245886ce8eaf69bc85319febc11d9fe089) Predicate({ns.bse.filename}) testimage.jpg',
+ f'Node(http://bsfs.ai/schema/File, http://example.com/me/file#accb115d266ad60c53cd01a7f7130f245886ce8eaf69bc85319febc11d9fe089) Predicate(http://bsfs.ai/schema/Entity/colors_spatial#0658f2234a054e1dd59a14462c89f7733e019160419c796356aa831498bd0a04) (91, 127, 121, 94, 138, 167, 163, 134, 190, 138, 170, 156, 121, 142, 159)',
})
diff --git a/test/apps/test_info.py b/test/apps/test_info.py
index 6f4d98f..60e9ba1 100644
--- a/test/apps/test_info.py
+++ b/test/apps/test_info.py
@@ -4,12 +4,15 @@ Part of the bsie test suite.
A copy of the license is provided with the project.
Author: Matthias Baumgartner, 2022
"""
-# imports
+# standard imports
import argparse
import contextlib
import io
import unittest
+# bsie imports
+from bsie.utils import bsfs
+
# objects to test
from bsie.apps.info import main
@@ -28,6 +31,22 @@ class TestIndex(unittest.TestCase):
'http://bsfs.ai/schema/Predicate',
'http://bsfs.ai/schema/Entity#filename',
'http://bsfs.ai/schema/Entity#filesize',
+ 'http://bsfs.ai/schema/Entity/colors_spatial#0658f2234a054e1dd59a14462c89f7733e019160419c796356aa831498bd0a04'
+ })
+
+ def test_schema(self):
+ outbuf = io.StringIO()
+ with contextlib.redirect_stdout(outbuf):
+ # show schema infos
+ main(['schema'])
+ # verify output
+ schema = bsfs.schema.from_string(outbuf.getvalue())
+ self.assertSetEqual({pred.uri for pred in schema.predicates()}, {
+ 'http://bsfs.ai/schema/Entity#author',
+ 'http://bsfs.ai/schema/Predicate',
+ 'http://bsfs.ai/schema/Entity#filename',
+ 'http://bsfs.ai/schema/Entity#filesize',
+ 'http://bsfs.ai/schema/Entity/colors_spatial#0658f2234a054e1dd59a14462c89f7733e019160419c796356aa831498bd0a04'
})
def test_invalid(self):
diff --git a/test/apps/testdir/testimage.jpg b/test/apps/testdir/testimage.jpg
new file mode 100644
index 0000000..c80bb48
--- /dev/null
+++ b/test/apps/testdir/testimage.jpg
Binary files differ
diff --git a/test/extractor/generic/test_constant.py b/test/extractor/generic/test_constant.py
index 9dbaced..bde3805 100644
--- a/test/extractor/generic/test_constant.py
+++ b/test/extractor/generic/test_constant.py
@@ -4,7 +4,7 @@ Part of the bsie test suite.
A copy of the license is provided with the project.
Author: Matthias Baumgartner, 2022
"""
-# imports
+# standard imports
import unittest
# bsie imports
@@ -36,19 +36,19 @@ class TestConstant(unittest.TestCase):
node = _node.Node(ns.bsfs.Entity, '') # Blank node
p_author = ext.schema.predicate(ns.bse.author)
p_comment = ext.schema.predicate(ns.bse.comment)
- entity = ext.schema.node(ns.bsfs.Node).get_child(ns.bsfs.Entity)
- string = ext.schema.literal(ns.bsfs.Literal).get_child(ns.xsd.string)
+ entity = ext.schema.node(ns.bsfs.Node).child(ns.bsfs.Entity)
+ string = ext.schema.literal(ns.bsfs.Literal).child(ns.xsd.string)
# baseline
self.assertSetEqual(set(ext.extract(node, None, (p_author, p_comment))),
{(node, p_author, 'Me, myself, and I'),
(node, p_comment, 'the quick brown fox jumps over the lazy dog.')})
# predicates is respected
- p_foobar = ext.schema.predicate(ns.bsfs.Predicate).get_child(ns.bse.foobar, domain=entity, range=entity)
+ p_foobar = ext.schema.predicate(ns.bsfs.Predicate).child(ns.bse.foobar, domain=entity, range=entity)
self.assertSetEqual(set(ext.extract(node, None, (p_author, p_foobar))),
{(node, p_author, 'Me, myself, and I')})
self.assertSetEqual(set(ext.extract(node, None, (p_comment, p_foobar))),
{(node, p_comment, 'the quick brown fox jumps over the lazy dog.')})
- p_barfoo = ext.schema.predicate(ns.bse.author).get_child(ns.bse.comment, domain=entity, range=string)
+ p_barfoo = ext.schema.predicate(ns.bse.author).child(ns.bse.comment, domain=entity, range=string)
self.assertSetEqual(set(ext.extract(node, None, (p_foobar, p_barfoo))), set())
def test_construct(self):
diff --git a/test/extractor/generic/test_path.py b/test/extractor/generic/test_path.py
index 820f402..ae68686 100644
--- a/test/extractor/generic/test_path.py
+++ b/test/extractor/generic/test_path.py
@@ -4,11 +4,11 @@ Part of the bsie test suite.
A copy of the license is provided with the project.
Author: Matthias Baumgartner, 2022
"""
-# imports
+# standard imports
import unittest
# bsie imports
-from bsie.base import extractor
+from bsie.extractor import base
from bsie.utils import bsfs, node as _node, ns
# objects to test
@@ -29,7 +29,7 @@ class TestPath(unittest.TestCase):
def test_schema(self):
self.assertEqual(Path().schema,
- bsfs.schema.Schema.from_string(extractor.SCHEMA_PREAMBLE + '''
+ bsfs.schema.from_string(base.SCHEMA_PREAMBLE + '''
bse:filename rdfs:subClassOf bsfs:Predicate ;
rdfs:domain bsfs:File ;
rdfs:range xsd:string ;
@@ -41,19 +41,19 @@ class TestPath(unittest.TestCase):
node = _node.Node(ns.bsfs.File, '') # Blank node
content = '/tmp/foo/bar'
p_filename = ext.schema.predicate(ns.bse.filename)
- entity = ext.schema.node(ns.bsfs.Node).get_child(ns.bsfs.Entity)
- string = ext.schema.literal(ns.bsfs.Literal).get_child(ns.xsd.string)
+ entity = ext.schema.node(ns.bsfs.Node).child(ns.bsfs.Entity)
+ string = ext.schema.literal(ns.bsfs.Literal).child(ns.xsd.string)
# baseline
self.assertSetEqual(set(ext.extract(node, content, (p_filename, ))),
{(node, p_filename, 'bar')})
# predicates parameter is respected
- p_foo = ext.schema.predicate(ns.bsfs.Predicate).get_child(ns.bse.foo, domain=entity, range=string) # unsupported predicate
+ p_foo = ext.schema.predicate(ns.bsfs.Predicate).child(ns.bse.foo, domain=entity, range=string) # unsupported predicate
self.assertSetEqual(set(ext.extract(node, content, (p_filename, p_foo))),
{(node, p_filename, 'bar')})
self.assertSetEqual(set(ext.extract(node, content, (p_foo, ))), set())
# predicates are validated
- p_bar = p_foo.get_child(ns.bse.filename) # same URI but different hierarchy
+ p_bar = p_foo.child(ns.bse.filename) # same URI but different hierarchy
self.assertSetEqual(set(ext.extract(node, content, (p_filename, p_bar))),
{(node, p_filename, 'bar')})
self.assertSetEqual(set(ext.extract(node, content, (p_bar, ))), set())
diff --git a/test/extractor/generic/test_stat.py b/test/extractor/generic/test_stat.py
index 3441438..e5562d1 100644
--- a/test/extractor/generic/test_stat.py
+++ b/test/extractor/generic/test_stat.py
@@ -4,12 +4,12 @@ Part of the bsie test suite.
A copy of the license is provided with the project.
Author: Matthias Baumgartner, 2022
"""
-# imports
+# standard imports
import os
import unittest
# bsie imports
-from bsie.base import extractor
+from bsie.extractor import base
from bsie.utils import bsfs, node as _node, ns
# objects to test
@@ -30,7 +30,7 @@ class TestStat(unittest.TestCase):
def test_schema(self):
self.assertEqual(Stat().schema,
- bsfs.schema.Schema.from_string(extractor.SCHEMA_PREAMBLE + '''
+ bsfs.schema.from_string(base.SCHEMA_PREAMBLE + '''
bse:filesize rdfs:subClassOf bsfs:Predicate ;
rdfs:domain bsfs:File ;
rdfs:range xsd:integer ;
@@ -42,19 +42,19 @@ class TestStat(unittest.TestCase):
node = _node.Node(ns.bsfs.File, '') # Blank node
content = os.stat(__file__)
p_filesize = ext.schema.predicate(ns.bse.filesize)
- entity = ext.schema.node(ns.bsfs.Node).get_child(ns.bsfs.Entity)
- string = ext.schema.literal(ns.bsfs.Literal).get_child(ns.xsd.string)
+ entity = ext.schema.node(ns.bsfs.Node).child(ns.bsfs.Entity)
+ string = ext.schema.literal(ns.bsfs.Literal).child(ns.xsd.string)
# baseline
self.assertSetEqual(set(ext.extract(node, content, (p_filesize, ))),
{(node, p_filesize, content.st_size)})
# predicates parameter is respected
- p_foo = ext.schema.predicate(ns.bsfs.Predicate).get_child(ns.bse.foo, domain=entity, range=string) # unsupported predicate
+ p_foo = ext.schema.predicate(ns.bsfs.Predicate).child(ns.bse.foo, domain=entity, range=string) # unsupported predicate
self.assertSetEqual(set(ext.extract(node, content, (p_filesize, p_foo))),
{(node, p_filesize, content.st_size)})
self.assertSetEqual(set(ext.extract(node, content, (p_foo, ))), set())
# predicates are validated
- p_bar = p_foo.get_child(ns.bse.filesizse) # same URI but different hierarchy
+ p_bar = p_foo.child(ns.bse.filesizse) # same URI but different hierarchy
self.assertSetEqual(set(ext.extract(node, content, (p_filesize, p_bar))),
{(node, p_filesize, content.st_size)})
self.assertSetEqual(set(ext.extract(node, content, (p_bar, ))), set())
diff --git a/test/base/__init__.py b/test/extractor/image/__init__.py
index e69de29..e69de29 100644
--- a/test/base/__init__.py
+++ b/test/extractor/image/__init__.py
diff --git a/test/extractor/image/test_colors_spatial.py b/test/extractor/image/test_colors_spatial.py
new file mode 100644
index 0000000..ba551f3
--- /dev/null
+++ b/test/extractor/image/test_colors_spatial.py
@@ -0,0 +1,100 @@
+"""
+
+Part of the bsie test suite.
+A copy of the license is provided with the project.
+Author: Matthias Baumgartner, 2022
+"""
+# standard imports
+import os
+import unittest
+
+# external imports
+import PIL.Image
+
+# bsie imports
+from bsie.extractor import base
+from bsie.utils import bsfs, ns, node as _node
+
+# objects to test
+from bsie.extractor.image.colors_spatial import ColorsSpatial
+
+
+## code ##
+
+class TestColorsSpatial(unittest.TestCase):
+ def setUp(self):
+ # content id with default constructors (width=32, height=32, exp=4)
+ self.instance_prefix = 'http://ie.bsfs.ai/schema/Feature/ColorsSpatial'
+ self.predicate_prefix = 'http://bsfs.ai/schema/Entity/colors_spatial'
+ self.uuid = 'adee8d6c43687021e1c5bffe56bcfe727f1638d792744137181304ef889dac2a'
+
+ def test_essentials(self):
+ # clones are equal
+ self.assertEqual(ColorsSpatial(32, 32, 4), ColorsSpatial(32, 32, 4))
+ self.assertEqual(hash(ColorsSpatial(32, 32, 4)), hash(ColorsSpatial(32, 32, 4)))
+ # equal respects type
+ self.assertNotEqual(ColorsSpatial(32, 32, 4), 'hello world')
+ self.assertNotEqual(hash(ColorsSpatial(32, 32, 4)), hash('hello world'))
+ # equals respects width
+ self.assertNotEqual(ColorsSpatial(32, 32, 4), ColorsSpatial(16, 32, 4))
+ self.assertNotEqual(hash(ColorsSpatial(32, 32, 4)), hash(ColorsSpatial(16, 32, 4)))
+ # equals respects height
+ self.assertNotEqual(ColorsSpatial(32, 32, 4), ColorsSpatial(32, 16, 4))
+ self.assertNotEqual(hash(ColorsSpatial(32, 32, 4)), hash(ColorsSpatial(32, 16, 4)))
+ # equals respects exp
+ self.assertNotEqual(ColorsSpatial(32, 32, 4), ColorsSpatial(32, 32, 8))
+ self.assertNotEqual(hash(ColorsSpatial(32, 32, 4)), hash(ColorsSpatial(32, 32, 8)))
+ # string representation
+ self.assertEqual(str(ColorsSpatial()), 'ColorsSpatial')
+ self.assertEqual(repr(ColorsSpatial(64, 16, 2)), 'ColorsSpatial(64, 16, 2)')
+
+ def test_dimension(self):
+ self.assertEqual(ColorsSpatial.dimension(32, 32, 4), 3 * (32*32 + 8*8 + 2*2))
+ self.assertEqual(ColorsSpatial.dimension(16, 16, 8), 3 * (16*16 + 2*2))
+ self.assertEqual(ColorsSpatial.dimension(64, 64, 16), 3 * (64*64 + 4*4))
+
+ def test_schema(self):
+ schema = bsfs.schema.from_string(base.SCHEMA_PREAMBLE + f'''
+ <{self.instance_prefix}> rdfs:subClassOf bsfs:Feature ;
+ # annotations
+ rdfs:label "Spatially dominant colors"^^xsd:string ;
+ schema:description "Domiant colors of subregions in an image."^^xsd:string ;
+ bsfs:dtype xsd:integer .
+
+ <{self.instance_prefix}#{self.uuid}> rdfs:subClassOf <{self.instance_prefix}> ;
+ bsfs:dimension "3276"^^xsd:integer ;
+ # annotations
+ <{self.instance_prefix}/args#width> "32"^^xsd:integer ;
+ <{self.instance_prefix}/args#height> "32"^^xsd:integer ;
+ <{self.instance_prefix}/args#exp> "4"^^xsd:float .
+
+ <{self.predicate_prefix}#{self.uuid}> rdfs:subClassOf bsfs:Predicate ;
+ rdfs:domain bsfs:File ;
+ rdfs:range <{self.instance_prefix}#{self.uuid}> ;
+ bsfs:unique "true"^^xsd:boolean .
+ ''')
+ self.assertEqual(schema, ColorsSpatial().schema)
+
+ def test_extract(self):
+ ext = ColorsSpatial(2,2,2)
+ img = PIL.Image.open(os.path.join(os.path.dirname(__file__), 'testimage.jpg'))
+ node = _node.Node(ns.bsfs.Entity, bsfs.URI('http://example.com/entity#1234'))
+ principals = set(ext.principals)
+ self.assertEqual(len(principals), 1)
+ # valid invocation yields feature
+ ret = list(ext.extract(node, img, principals))
+ self.assertEqual(ret[0], (
+ node,
+ list(principals)[0],
+ (91, 127, 121, 94, 138, 167, 163, 134, 190, 138, 170, 156, 121, 142, 159)))
+ # principals is respected
+ self.assertListEqual(list(ext.extract(node, img, {})), [])
+
+
+
+## main ##
+
+if __name__ == '__main__':
+ unittest.main()
+
+## EOF ##
diff --git a/test/extractor/image/testimage.jpg b/test/extractor/image/testimage.jpg
new file mode 100644
index 0000000..c80bb48
--- /dev/null
+++ b/test/extractor/image/testimage.jpg
Binary files differ
diff --git a/test/base/test_extractor.py b/test/extractor/test_base.py
index 30974ef..acfaf58 100644
--- a/test/base/test_extractor.py
+++ b/test/extractor/test_base.py
@@ -4,21 +4,21 @@ Part of the bsie test suite.
A copy of the license is provided with the project.
Author: Matthias Baumgartner, 2022
"""
-# imports
+# standard imports
import unittest
# bsie imports
from bsie.utils import bsfs, ns
# objects to test
-from bsie.base import extractor
+from bsie.extractor import base
## code ##
-class StubExtractor(extractor.Extractor):
+class StubExtractor(base.Extractor):
def __init__(self):
- super().__init__(bsfs.schema.Schema.from_string(extractor.SCHEMA_PREAMBLE + '''
+ super().__init__(bsfs.schema.from_string(base.SCHEMA_PREAMBLE + '''
bse:author rdfs:subClassOf bsfs:Predicate ;
rdfs:domain bsfs:Entity ;
rdfs:range xsd:string ;
@@ -52,11 +52,11 @@ class TestExtractor(unittest.TestCase):
self.assertNotEqual(hash(ext), hash(sub))
def test_principals(self):
- schema = bsfs.schema.Schema.Empty()
- entity = schema.node(ns.bsfs.Node).get_child(ns.bsfs.Entity)
- string = schema.literal(ns.bsfs.Literal).get_child(bsfs.URI('http://www.w3.org/2001/XMLSchema#string'))
- p_author = schema.predicate(ns.bsfs.Predicate).get_child(ns.bse.author, domain=entity, range=string)
- p_comment = schema.predicate(ns.bsfs.Predicate).get_child(ns.bse.comment, domain=entity, range=string)
+ schema = bsfs.schema.Schema()
+ entity = schema.node(ns.bsfs.Node).child(ns.bsfs.Entity)
+ string = schema.literal(ns.bsfs.Literal).child(bsfs.URI('http://www.w3.org/2001/XMLSchema#string'))
+ p_author = schema.predicate(ns.bsfs.Predicate).child(ns.bse.author, domain=entity, range=string)
+ p_comment = schema.predicate(ns.bsfs.Predicate).child(ns.bse.comment, domain=entity, range=string)
ext = StubExtractor()
self.assertSetEqual(set(ext.principals),
{p_author, p_comment} | set(schema.predicates()) - {schema.predicate(ns.bsfs.Predicate)})
diff --git a/test/extractor/test_builder.py b/test/extractor/test_builder.py
new file mode 100644
index 0000000..039ea53
--- /dev/null
+++ b/test/extractor/test_builder.py
@@ -0,0 +1,103 @@
+"""
+
+Part of the bsie test suite.
+A copy of the license is provided with the project.
+Author: Matthias Baumgartner, 2022
+"""
+# standard imports
+import unittest
+
+# bsie imports
+from bsie.utils import errors
+
+# objects to test
+from bsie.extractor import ExtractorBuilder
+
+
+## code ##
+
+class TestExtractorBuilder(unittest.TestCase):
+ def test_iter(self):
+ # no specifications
+ self.assertListEqual(list(ExtractorBuilder([])), [])
+ # some specifications
+ builder = ExtractorBuilder([
+ {'bsie.extractor.generic.path.Path': {}},
+ {'bsie.extractor.generic.stat.Stat': {}},
+ {'bsie.extractor.generic.path.Path': {}},
+ ])
+ self.assertListEqual(list(builder), [0, 1, 2])
+
+ def test_build(self):
+ # simple and repeated extractors
+ builder = ExtractorBuilder([
+ {'bsie.extractor.generic.path.Path': {}},
+ {'bsie.extractor.generic.stat.Stat': {}},
+ {'bsie.extractor.generic.path.Path': {}},
+ ])
+ ext = [builder.build(0), builder.build(1), builder.build(2)]
+ import bsie.extractor.generic.path
+ import bsie.extractor.generic.stat
+ self.assertListEqual(ext, [
+ bsie.extractor.generic.path.Path(),
+ bsie.extractor.generic.stat.Stat(),
+ bsie.extractor.generic.path.Path(),
+ ])
+ # out-of-bounds raises KeyError
+ self.assertRaises(IndexError, builder.build, 3)
+
+ # building with args
+ builder = ExtractorBuilder([
+ {'bsie.extractor.generic.constant.Constant': {
+ 'schema': '''
+ bse:author rdfs:subClassOf bsfs:Predicate ;
+ rdfs:domain bsfs:Entity ;
+ rdfs:range xsd:string ;
+ bsfs:unique "true"^^xsd:boolean .
+ bse:rating rdfs:subClassOf bsfs:Predicate ;
+ rdfs:domain bsfs:Entity ;
+ rdfs:range xsd:integer ;
+ bsfs:unique "true"^^xsd:boolean .
+ ''',
+ 'tuples': [
+ ('http://bsfs.ai/schema/Entity#author', 'Me, myself, and I'),
+ ('http://bsfs.ai/schema/Entity#rating', 123),
+ ],
+ }}])
+ obj = builder.build(0)
+ import bsie.extractor.generic.constant
+ self.assertEqual(obj, bsie.extractor.generic.constant.Constant('''
+ bse:author rdfs:subClassOf bsfs:Predicate ;
+ rdfs:domain bsfs:Entity ;
+ rdfs:range xsd:string ;
+ bsfs:unique "true"^^xsd:boolean .
+ bse:rating rdfs:subClassOf bsfs:Predicate ;
+ rdfs:domain bsfs:Entity ;
+ rdfs:range xsd:integer ;
+ bsfs:unique "true"^^xsd:boolean .
+ ''', [
+ ('http://bsfs.ai/schema/Entity#author', 'Me, myself, and I'),
+ ('http://bsfs.ai/schema/Entity#rating', 123),
+ ]))
+
+ # building with invalid args
+ self.assertRaises(errors.BuilderError, ExtractorBuilder(
+ [{'bsie.extractor.generic.path.Path': {'foo': 123}}]).build, 0)
+ # non-dict build specification
+ self.assertRaises(TypeError, ExtractorBuilder(
+ [('bsie.extractor.generic.path.Path', {})]).build, 0)
+ # multiple keys per build specification
+ self.assertRaises(TypeError, ExtractorBuilder(
+ [{'bsie.extractor.generic.path.Path': {},
+ 'bsie.extractor.generic.stat.Stat': {}}]).build, 0)
+ # non-dict value for kwargs
+ self.assertRaises(TypeError, ExtractorBuilder(
+ [{'bsie.extractor.generic.path.Path': 123}]).build, 0)
+
+
+## main ##
+
+if __name__ == '__main__':
+ unittest.main()
+
+## EOF ##
diff --git a/test/lib/test_bsie.py b/test/lib/test_bsie.py
index 771a0c2..38e6f59 100644
--- a/test/lib/test_bsie.py
+++ b/test/lib/test_bsie.py
@@ -4,13 +4,15 @@ Part of the bsie test suite.
A copy of the license is provided with the project.
Author: Matthias Baumgartner, 2022
"""
-# imports
+# standard imports
import os
import unittest
# bsie imports
-from bsie.base import extractor
-from bsie.tools import builder
+from bsie.extractor import ExtractorBuilder
+from bsie.extractor.base import SCHEMA_PREAMBLE
+from bsie.lib import PipelineBuilder
+from bsie.reader import ReaderBuilder
from bsie.utils import bsfs, node, ns
# objects to test
@@ -22,9 +24,9 @@ from bsie.lib.bsie import BSIE
class TestBSIE(unittest.TestCase):
def setUp(self):
# reader builder
- rbuild = builder.ReaderBuilder({})
+ rbuild = ReaderBuilder({})
# extractor builder
- ebuild = builder.ExtractorBuilder([
+ ebuild = ExtractorBuilder([
{'bsie.extractor.generic.path.Path': {}},
{'bsie.extractor.generic.stat.Stat': {}},
{'bsie.extractor.generic.constant.Constant': dict(
@@ -39,7 +41,7 @@ class TestBSIE(unittest.TestCase):
])
# build pipeline
self.prefix = bsfs.Namespace('http://example.com/local/')
- pbuild = builder.PipelineBuilder(self.prefix, rbuild, ebuild)
+ pbuild = PipelineBuilder(self.prefix, rbuild, ebuild)
self.pipeline = pbuild.build()
def test_construction(self):
@@ -50,7 +52,7 @@ class TestBSIE(unittest.TestCase):
ns.bse.filesize,
ns.bse.author,
})
- self.assertEqual(lib.schema, bsfs.schema.Schema.from_string(extractor.SCHEMA_PREAMBLE + '''
+ self.assertEqual(lib.schema, bsfs.schema.from_string(SCHEMA_PREAMBLE + '''
bse:filename rdfs:subClassOf bsfs:Predicate ;
rdfs:domain bsfs:File ;
rdfs:range xsd:string ;
@@ -77,7 +79,7 @@ class TestBSIE(unittest.TestCase):
ns.bse.filesize,
ns.bse.author,
})
- self.assertEqual(lib.schema, bsfs.schema.Schema.from_string(extractor.SCHEMA_PREAMBLE + '''
+ self.assertEqual(lib.schema, bsfs.schema.from_string(SCHEMA_PREAMBLE + '''
bse:filesize rdfs:subClassOf bsfs:Predicate ;
rdfs:domain bsfs:File ;
rdfs:range xsd:integer;
@@ -95,7 +97,7 @@ class TestBSIE(unittest.TestCase):
ns.bse.filesize,
ns.bse.author,
})
- self.assertEqual(lib.schema, bsfs.schema.Schema.from_string(extractor.SCHEMA_PREAMBLE + '''
+ self.assertEqual(lib.schema, bsfs.schema.from_string(SCHEMA_PREAMBLE + '''
bse:filename rdfs:subClassOf bsfs:Predicate ;
rdfs:domain bsfs:File ;
rdfs:range xsd:string ;
@@ -122,7 +124,7 @@ class TestBSIE(unittest.TestCase):
self.assertSetEqual(set(lib.principals), {
ns.bse.author,
})
- self.assertEqual(lib.schema, bsfs.schema.Schema.from_string(extractor.SCHEMA_PREAMBLE + '''
+ self.assertEqual(lib.schema, bsfs.schema.from_string(SCHEMA_PREAMBLE + '''
bse:author rdfs:subClassOf bsfs:Predicate ;
rdfs:domain bsfs:Entity ;
rdfs:range xsd:string ;
@@ -137,7 +139,7 @@ class TestBSIE(unittest.TestCase):
self.assertSetEqual(set(lib.principals), {
ns.bse.filesize,
})
- self.assertEqual(lib.schema, bsfs.schema.Schema.from_string(extractor.SCHEMA_PREAMBLE + '''
+ self.assertEqual(lib.schema, bsfs.schema.from_string(SCHEMA_PREAMBLE + '''
bse:filesize rdfs:subClassOf bsfs:Predicate ;
rdfs:domain bsfs:File ;
rdfs:range xsd:integer;
diff --git a/test/lib/test_builder.py b/test/lib/test_builder.py
new file mode 100644
index 0000000..273d620
--- /dev/null
+++ b/test/lib/test_builder.py
@@ -0,0 +1,107 @@
+"""
+
+Part of the bsie test suite.
+A copy of the license is provided with the project.
+Author: Matthias Baumgartner, 2022
+"""
+# standard imports
+import logging
+import unittest
+
+# bsie imports
+from bsie.extractor import ExtractorBuilder
+from bsie.reader import ReaderBuilder
+from bsie.utils import bsfs
+
+# objects to test
+from bsie.lib import PipelineBuilder
+
+
+## code ##
+
+class TestPipelineBuilder(unittest.TestCase):
+ def test_build(self):
+ prefix = bsfs.URI('http://example.com/local/file#')
+ c_schema = '''
+ bse:author rdfs:subClassOf bsfs:Predicate ;
+ rdfs:domain bsfs:Entity ;
+ rdfs:range xsd:string ;
+ bsfs:unique "true"^^xsd:boolean .
+ '''
+ c_tuples = [('http://bsfs.ai/schema/Entity#author', 'Me, myself, and I')]
+ # prepare builders
+ rbuild = ReaderBuilder({})
+ ebuild = ExtractorBuilder([
+ {'bsie.extractor.generic.path.Path': {}},
+ {'bsie.extractor.generic.stat.Stat': {}},
+ {'bsie.extractor.generic.constant.Constant': dict(
+ schema=c_schema,
+ tuples=c_tuples,
+ )},
+ ])
+ # build pipeline
+ builder = PipelineBuilder(prefix, rbuild, ebuild)
+ pipeline = builder.build()
+ # delayed import
+ import bsie.reader.path
+ import bsie.reader.stat
+ import bsie.extractor.generic.path
+ import bsie.extractor.generic.stat
+ import bsie.extractor.generic.constant
+ # check pipeline
+ self.assertDictEqual(pipeline._ext2rdr, {
+ bsie.extractor.generic.path.Path(): bsie.reader.path.Path(),
+ bsie.extractor.generic.stat.Stat(): bsie.reader.stat.Stat(),
+ bsie.extractor.generic.constant.Constant(c_schema, c_tuples): None,
+ })
+
+ # fail to load extractor
+ ebuild_err = ExtractorBuilder([
+ {'bsie.extractor.generic.foo.Foo': {}},
+ {'bsie.extractor.generic.path.Path': {}},
+ ])
+ with self.assertLogs(logging.getLogger('bsie.lib.builder'), logging.ERROR):
+ pipeline = PipelineBuilder(prefix, rbuild, ebuild_err).build()
+ self.assertDictEqual(pipeline._ext2rdr, {
+ bsie.extractor.generic.path.Path(): bsie.reader.path.Path()})
+
+ # fail to build extractor
+ ebuild_err = ExtractorBuilder([
+ {'bsie.extractor.generic.path.Path': {'foo': 123}},
+ {'bsie.extractor.generic.path.Path': {}},
+ ])
+ with self.assertLogs(logging.getLogger('bsie.lib.builder'), logging.ERROR):
+ pipeline = PipelineBuilder(prefix, rbuild, ebuild_err).build()
+ self.assertDictEqual(pipeline._ext2rdr, {
+ bsie.extractor.generic.path.Path(): bsie.reader.path.Path()})
+
+ # fail to load reader
+ with self.assertLogs(logging.getLogger('bsie.lib.builder'), logging.ERROR):
+ # switch reader of an extractor
+ old_reader = bsie.extractor.generic.path.Path.CONTENT_READER
+ bsie.extractor.generic.path.Path.CONTENT_READER = 'bsie.reader.foo.Foo'
+ # build pipeline with invalid reader reference
+ pipeline = PipelineBuilder(prefix, rbuild, ebuild).build()
+ self.assertDictEqual(pipeline._ext2rdr, {
+ bsie.extractor.generic.stat.Stat(): bsie.reader.stat.Stat(),
+ bsie.extractor.generic.constant.Constant(c_schema, c_tuples): None,
+ })
+ # switch back
+ bsie.extractor.generic.path.Path.CONTENT_READER = old_reader
+
+ # fail to build reader
+ rbuild_err = ReaderBuilder({'bsie.reader.stat.Stat': dict(foo=123)})
+ with self.assertLogs(logging.getLogger('bsie.lib.builder'), logging.ERROR):
+ pipeline = PipelineBuilder(prefix, rbuild_err, ebuild).build()
+ self.assertDictEqual(pipeline._ext2rdr, {
+ bsie.extractor.generic.path.Path(): bsie.reader.path.Path(),
+ bsie.extractor.generic.constant.Constant(c_schema, c_tuples): None,
+ })
+
+
+## main ##
+
+if __name__ == '__main__':
+ unittest.main()
+
+## EOF ##
diff --git a/test/tools/test_pipeline.py b/test/lib/test_pipeline.py
index a116a30..8fecc74 100644
--- a/test/tools/test_pipeline.py
+++ b/test/lib/test_pipeline.py
@@ -4,14 +4,13 @@ Part of the bsie test suite.
A copy of the license is provided with the project.
Author: Matthias Baumgartner, 2022
"""
-# imports
+# standard imports
import logging
import os
import unittest
# bsie imports
-from bsie.base import errors
-from bsie.utils import bsfs, node, ns
+from bsie.utils import bsfs, errors, node, ns
import bsie.extractor.generic.constant
import bsie.extractor.generic.path
import bsie.extractor.generic.stat
@@ -19,7 +18,7 @@ import bsie.reader.path
import bsie.reader.stat
# objects to test
-from bsie.tools.pipeline import Pipeline
+from bsie.lib.pipeline import Pipeline
## code ##
@@ -75,7 +74,7 @@ class TestPipeline(unittest.TestCase):
# equivalence respects schema
p2 = Pipeline(self.prefix, self.ext2rdr)
- p2._schema = pipeline.schema.Empty()
+ p2._schema = bsfs.schema.Schema()
self.assertNotEqual(pipeline, p2)
self.assertNotEqual(hash(pipeline), hash(p2))
@@ -101,7 +100,7 @@ class TestPipeline(unittest.TestCase):
p_author = pipeline.schema.predicate(ns.bse.author)
p_rating = pipeline.schema.predicate(ns.bse.rating)
entity = pipeline.schema.node(ns.bsfs.File)
- p_invalid = pipeline.schema.predicate(ns.bsfs.Predicate).get_child(ns.bse.foo, range=entity)
+ p_invalid = pipeline.schema.predicate(ns.bsfs.Predicate).child(ns.bse.foo, range=entity)
# extract given predicates
self.assertSetEqual(set(pipeline(testfile, {p_filename, p_filesize})), {
@@ -140,7 +139,7 @@ class TestPipeline(unittest.TestCase):
raise errors.ReaderError('reader error')
pipeline = Pipeline(self.prefix, {bsie.extractor.generic.path.Path(): FaultyReader()})
- with self.assertLogs(logging.getLogger('bsie.tools.pipeline'), logging.ERROR):
+ with self.assertLogs(logging.getLogger('bsie.lib.pipeline'), logging.ERROR):
testfile = os.path.join(os.path.dirname(__file__), 'testfile.t')
p_filename = pipeline.schema.predicate(ns.bse.filename)
self.assertSetEqual(set(pipeline(testfile, {p_filename})), set())
@@ -151,7 +150,7 @@ class TestPipeline(unittest.TestCase):
raise errors.ExtractorError('extractor error')
pipeline = Pipeline(self.prefix, {FaultyExtractor(): bsie.reader.path.Path()})
- with self.assertLogs(logging.getLogger('bsie.tools.pipeline'), logging.ERROR):
+ with self.assertLogs(logging.getLogger('bsie.lib.pipeline'), logging.ERROR):
testfile = os.path.join(os.path.dirname(__file__), 'testfile.t')
p_filename = pipeline.schema.predicate(ns.bse.filename)
self.assertSetEqual(set(pipeline(testfile, {p_filename})), set())
diff --git a/test/tools/__init__.py b/test/reader/image/__init__.py
index e69de29..e69de29 100644
--- a/test/tools/__init__.py
+++ b/test/reader/image/__init__.py
diff --git a/test/reader/image/load_nef.py b/test/reader/image/load_nef.py
new file mode 100644
index 0000000..5ba0adc
--- /dev/null
+++ b/test/reader/image/load_nef.py
@@ -0,0 +1,28 @@
+"""
+
+Part of the bsie test suite.
+A copy of the license is provided with the project.
+Author: Matthias Baumgartner, 2022
+"""
+# standard imports
+import os
+
+# external imports
+import requests
+
+# constants
+IMAGE_URL = 'http://igsor.net/eik7AhvohghaeN5.nef'
+
+## code ##
+
+def get():
+ """Download a raw test image."""
+ target = os.path.join(os.path.dirname(__file__), 'testimage.nef')
+ if not os.path.exists(target):
+ with open(target, 'wb') as ofile:
+ ans = requests.get(IMAGE_URL)
+ ofile.write(ans.content)
+
+
+
+## EOF ##
diff --git a/test/reader/image/test_image.py b/test/reader/image/test_image.py
new file mode 100644
index 0000000..26f6a93
--- /dev/null
+++ b/test/reader/image/test_image.py
@@ -0,0 +1,54 @@
+"""
+
+Part of the bsie test suite.
+A copy of the license is provided with the project.
+Author: Matthias Baumgartner, 2022
+"""
+# standard imports
+import importlib
+import os
+import unittest
+
+# bsie imports
+from bsie.utils import errors
+
+# objects to test
+from bsie.reader.image import Image
+
+
+## code ##
+
+class TestImage(unittest.TestCase):
+ def setUp(self):
+ if __package__ is None or __package__ == '': # direct call or local discovery
+ importlib.import_module('load_nef', __package__).get()
+ else: # parent discovery
+ importlib.import_module('.load_nef', __package__).get()
+
+ def test_construct(self):
+ image = Image({})
+ self.assertIsInstance(image, Image)
+ self.assertEqual(len(image._children), 2)
+
+ def test_call(self):
+ image = Image({})
+ # call returns raw image
+ img = image(os.path.join(os.path.dirname(__file__), 'testimage.nef'))
+ self.assertEqual(img.size, (6016, 4016)) # FIXME: change when image was replaced
+ img.close()
+ # call returns jpeg image
+ img = image(os.path.join(os.path.dirname(__file__), 'testimage.jpg'))
+ self.assertEqual(img.size, (1, 1))
+ img.close()
+ # call raises error if file cannot be read
+ self.assertRaises(errors.ReaderError, image,
+ os.path.join(os.path.dirname(__file__), 'invalid.nef'))
+ self.assertRaises(errors.ReaderError, image,
+ os.path.join(os.path.dirname(__file__), 'invalid.jpg'))
+
+## main ##
+
+if __name__ == '__main__':
+ unittest.main()
+
+## EOF ##
diff --git a/test/reader/image/test_pillow.py b/test/reader/image/test_pillow.py
new file mode 100644
index 0000000..8abf5c1
--- /dev/null
+++ b/test/reader/image/test_pillow.py
@@ -0,0 +1,44 @@
+"""
+
+Part of the bsie test suite.
+A copy of the license is provided with the project.
+Author: Matthias Baumgartner, 2022
+"""
+# standard imports
+import os
+import unittest
+
+# external imports
+import PIL.Image
+
+# bsie imports
+from bsie.utils import errors
+
+# objects to test
+from bsie.reader.image._pillow import PillowImage
+
+
+## code ##
+
+class TestPillowImage(unittest.TestCase):
+ def test_call(self):
+ rdr = PillowImage()
+ # returns PIL image
+ img = rdr(os.path.join(os.path.dirname(__file__), 'testimage.jpg'))
+ self.assertEqual(img.size, (1, 1))
+ self.assertEqual(img.getdata().getpixel((0, 0)), (0, 0, 0))
+ img.close()
+ # raises exception when image cannot be read
+ self.assertRaises(errors.ReaderError, rdr,
+ os.path.join(os.path.dirname(__file__), 'invalid.jpg'))
+ # NOTE: PIL can actually read raw image files (returns the thumbnail)
+ #self.assertRaises(errors.ReaderError, rdr,
+ # os.path.join(os.path.dirname(__file__), 'testimage.nef'))
+
+
+## main ##
+
+if __name__ == '__main__':
+ unittest.main()
+
+## EOF ##
diff --git a/test/reader/image/test_raw_image.py b/test/reader/image/test_raw_image.py
new file mode 100644
index 0000000..ba21b5a
--- /dev/null
+++ b/test/reader/image/test_raw_image.py
@@ -0,0 +1,53 @@
+"""
+
+Part of the bsie test suite.
+A copy of the license is provided with the project.
+Author: Matthias Baumgartner, 2022
+"""
+# standard imports
+import importlib
+import os
+import unittest
+
+# external imports
+import PIL.Image
+
+# bsie imports
+from bsie.utils import errors
+
+# objects to test
+from bsie.reader.image._raw import RawImage
+
+
+## code ##
+
+class TestRawImage(unittest.TestCase):
+ def setUp(self):
+ if __package__ is None or __package__ == '': # direct call or local discovery
+ importlib.import_module('load_nef', __package__).get()
+ else: # parent discovery
+ importlib.import_module('.load_nef', __package__).get()
+
+ def test_call(self):
+ rdr = RawImage()
+ # returns PIL image
+ img = rdr(os.path.join(os.path.dirname(__file__), 'testimage.nef'))
+ self.assertEqual(img.size, (6016, 4016)) # FIXME: change when image was replaced
+ #self.assertEqual(img.size, (1, 1))
+ #self.assertEqual(img.getdata().getpixel((0, 0)), (0, 0, 0))
+ img.close()
+ # raises exception when image cannot be read
+ self.assertRaises(errors.ReaderError, rdr,
+ os.path.join(os.path.dirname(__file__), 'invalid.nef'))
+ self.assertRaises(errors.ReaderError, rdr,
+ os.path.join(os.path.dirname(__file__), 'testimage.jpg'))
+
+
+
+
+## main ##
+
+if __name__ == '__main__':
+ unittest.main()
+
+## EOF ##
diff --git a/test/reader/image/testimage.jpg b/test/reader/image/testimage.jpg
new file mode 100644
index 0000000..ea7af63
--- /dev/null
+++ b/test/reader/image/testimage.jpg
Binary files differ
diff --git a/test/base/test_reader.py b/test/reader/test_base.py
index a907eb9..41f4c29 100644
--- a/test/base/test_reader.py
+++ b/test/reader/test_base.py
@@ -4,16 +4,16 @@ Part of the bsie test suite.
A copy of the license is provided with the project.
Author: Matthias Baumgartner, 2022
"""
-# imports
+# standard imports
import unittest
# objects to test
-from bsie import base
+from bsie.reader import Reader
## code ##
-class StubReader(base.Reader):
+class StubReader(Reader):
def __call__(self, path):
raise NotImplementedError()
diff --git a/test/reader/test_builder.py b/test/reader/test_builder.py
new file mode 100644
index 0000000..92e9edc
--- /dev/null
+++ b/test/reader/test_builder.py
@@ -0,0 +1,54 @@
+"""
+
+Part of the bsie test suite.
+A copy of the license is provided with the project.
+Author: Matthias Baumgartner, 2022
+"""
+# standard imports
+import unittest
+
+# bsie imports
+from bsie.utils import errors
+
+# objects to test
+from bsie.reader import ReaderBuilder
+
+
+## code ##
+
+class TestReaderBuilder(unittest.TestCase):
+ def test_build(self):
+ builder = ReaderBuilder({'bsie.reader.path.Path': {}})
+ # build configured reader
+ cls = builder.build('bsie.reader.path.Path')
+ import bsie.reader.path
+ self.assertIsInstance(cls, bsie.reader.path.Path)
+ # build unconfigured reader
+ cls = builder.build('bsie.reader.stat.Stat')
+ import bsie.reader.stat
+ self.assertIsInstance(cls, bsie.reader.stat.Stat)
+ # re-build previous reader (test cache)
+ self.assertEqual(cls, builder.build('bsie.reader.stat.Stat'))
+ # test invalid
+ self.assertRaises(TypeError, builder.build, 123)
+ self.assertRaises(TypeError, builder.build, None)
+ self.assertRaises(ValueError, builder.build, '')
+ self.assertRaises(ValueError, builder.build, 'Path')
+ self.assertRaises(errors.BuilderError, builder.build, 'path.Path')
+ # invalid config
+ builder = ReaderBuilder({'bsie.reader.stat.Stat': dict(foo=123)})
+ self.assertRaises(errors.BuilderError, builder.build, 'bsie.reader.stat.Stat')
+ builder = ReaderBuilder({'bsie.reader.stat.Stat': 123})
+ self.assertRaises(TypeError, builder.build, 'bsie.reader.stat.Stat')
+ # no instructions
+ builder = ReaderBuilder({})
+ cls = builder.build('bsie.reader.stat.Stat')
+ self.assertIsInstance(cls, bsie.reader.stat.Stat)
+
+
+## main ##
+
+if __name__ == '__main__':
+ unittest.main()
+
+## EOF ##
diff --git a/test/reader/test_chain.py b/test/reader/test_chain.py
new file mode 100644
index 0000000..901faa1
--- /dev/null
+++ b/test/reader/test_chain.py
@@ -0,0 +1,85 @@
+"""
+
+Part of the bsie test suite.
+A copy of the license is provided with the project.
+Author: Matthias Baumgartner, 2022
+"""
+# standard imports
+import logging
+import os
+import unittest
+
+# bsie imports
+from bsie.utils import errors
+import bsie.reader.path
+import bsie.reader.stat
+
+# objects to test
+from bsie.reader.chain import ReaderChain
+
+
+## code ##
+
+class TestReaderChain(unittest.TestCase):
+ def test_construct(self):
+ # subreaders are built
+ chain = ReaderChain(['bsie.reader.stat.Stat', 'bsie.reader.path.Path'], {})
+ self.assertIsInstance(chain, ReaderChain)
+ self.assertEqual(chain._children,
+ (bsie.reader.stat.Stat(), bsie.reader.path.Path()))
+ # subreaders that failed to build are omitted
+ with self.assertLogs(logging.getLogger('bsie.reader.chain'), logging.WARNING):
+ chain = ReaderChain(['bsie.reader.stat.Stat', 'bsie.reader.invalid.Invalid'], {})
+ self.assertEqual(chain._children, (bsie.reader.stat.Stat(), ))
+ with self.assertLogs(logging.getLogger('bsie.reader.chain'), logging.WARNING):
+ chain = ReaderChain(['bsie.reader.stat.Stat', 'bsie.reader.path.Invalid'], {})
+ self.assertEqual(chain._children, (bsie.reader.stat.Stat(), ))
+ # warning is issued if there are no subreaders
+ with self.assertLogs(logging.getLogger('bsie.reader.chain'), logging.WARNING):
+ chain = ReaderChain([], {})
+ self.assertEqual(chain._children, tuple())
+
+ def test_essentials(self):
+ chain = ReaderChain(['bsie.reader.stat.Stat', 'bsie.reader.path.Path'], {})
+ # identity
+ self.assertEqual(chain, chain)
+ self.assertEqual(hash(chain), hash(chain))
+ # comparison works across instances
+ self.assertEqual(chain,
+ ReaderChain(['bsie.reader.stat.Stat', 'bsie.reader.path.Path'], {}))
+ self.assertEqual(hash(chain),
+ hash(ReaderChain(['bsie.reader.stat.Stat', 'bsie.reader.path.Path'], {})))
+ # comparison respects subreaders
+ self.assertNotEqual(hash(chain),
+ hash(ReaderChain(['bsie.reader.path.Path'], {})))
+ self.assertNotEqual(hash(chain),
+ hash(ReaderChain(['bsie.reader.path.Path'], {})))
+ # comparison respects subreader order
+ self.assertNotEqual(chain,
+ ReaderChain(['bsie.reader.path.Path', 'bsie.reader.stat.Stat'], {}))
+ self.assertNotEqual(hash(chain),
+ hash(ReaderChain(['bsie.reader.path.Path', 'bsie.reader.stat.Stat'], {})))
+ # string representation
+ self.assertEqual(str(chain), 'ReaderChain(Stat, Path)')
+ self.assertEqual(repr(chain), 'ReaderChain((Stat(), Path()))')
+
+ def test_call(self):
+ chain = ReaderChain(['bsie.reader.stat.Stat', 'bsie.reader.path.Path'], {})
+ # chain first probes first child
+ self.assertEqual(chain(__file__), os.stat(__file__))
+ # chain probes second child if first one failes
+ self.assertEqual(chain(''), '')
+ self.assertEqual(chain('missing-file'), 'missing-file')
+
+ # chain raises a ReaderError if childs were exhausted
+ chain = ReaderChain(['bsie.reader.stat.Stat'], {})
+ # chain probes second child if first one failes
+ self.assertRaises(errors.ReaderError, chain, '')
+
+
+## main ##
+
+if __name__ == '__main__':
+ unittest.main()
+
+## EOF ##
diff --git a/test/reader/test_path.py b/test/reader/test_path.py
index fd7bc5a..95e447f 100644
--- a/test/reader/test_path.py
+++ b/test/reader/test_path.py
@@ -4,7 +4,7 @@ Part of the bsie test suite.
A copy of the license is provided with the project.
Author: Matthias Baumgartner, 2022
"""
-# imports
+# standard imports
import unittest
# objects to test
diff --git a/test/reader/test_stat.py b/test/reader/test_stat.py
index d12ad9c..fd9fdcd 100644
--- a/test/reader/test_stat.py
+++ b/test/reader/test_stat.py
@@ -4,12 +4,12 @@ Part of the bsie test suite.
A copy of the license is provided with the project.
Author: Matthias Baumgartner, 2022
"""
-# imports
+# standard imports
import os
import unittest
# bsie imports
-from bsie.base import errors
+from bsie.utils import errors
# objects to test
from bsie.reader.stat import Stat
diff --git a/test/tools/test_builder.py b/test/tools/test_builder.py
deleted file mode 100644
index 62c637c..0000000
--- a/test/tools/test_builder.py
+++ /dev/null
@@ -1,246 +0,0 @@
-"""
-
-Part of the bsie test suite.
-A copy of the license is provided with the project.
-Author: Matthias Baumgartner, 2022
-"""
-# imports
-import logging
-import unittest
-
-# bsie imports
-from bsie import base
-from bsie.utils import bsfs
-
-# objects to test
-from bsie.tools.builder import ExtractorBuilder
-from bsie.tools.builder import PipelineBuilder
-from bsie.tools.builder import ReaderBuilder
-from bsie.tools.builder import _safe_load
-from bsie.tools.builder import _unpack_name
-
-
-## code ##
-
-class TestUtils(unittest.TestCase):
- def test_safe_load(self):
- # invalid module
- self.assertRaises(base.errors.LoaderError, _safe_load, 'dBGHMSAYOoKeKMpywDoKZQycENFPvN', 'foobar')
- self.assertRaises(base.errors.LoaderError, _safe_load, 'dBGHMSAYOoKeKMpywDoKZQycENFPvN.bar', 'foobar')
- # partially valid module
- self.assertRaises(base.errors.LoaderError, _safe_load, 'os.foo', 'foobar')
- # invalid class
- self.assertRaises(base.errors.LoaderError, _safe_load, 'os.path', 'foo')
- # valid module and class
- cls = _safe_load('collections.abc', 'Container')
- import collections.abc
- self.assertEqual(cls, collections.abc.Container)
-
- def test_unpack_name(self):
- self.assertRaises(TypeError, _unpack_name, 123)
- self.assertRaises(TypeError, _unpack_name, None)
- self.assertRaises(ValueError, _unpack_name, '')
- self.assertRaises(ValueError, _unpack_name, 'path')
- self.assertRaises(ValueError, _unpack_name, '.Path')
- self.assertEqual(_unpack_name('path.Path'), ('path', 'Path'))
- self.assertEqual(_unpack_name('path.foo.bar.Path'), ('path.foo.bar', 'Path'))
-
-
-class TestReaderBuilder(unittest.TestCase):
- def test_build(self):
- builder = ReaderBuilder({'bsie.reader.path.Path': {}})
- # build configured reader
- cls = builder.build('bsie.reader.path.Path')
- import bsie.reader.path
- self.assertIsInstance(cls, bsie.reader.path.Path)
- # build unconfigured reader
- cls = builder.build('bsie.reader.stat.Stat')
- import bsie.reader.stat
- self.assertIsInstance(cls, bsie.reader.stat.Stat)
- # re-build previous reader (test cache)
- self.assertEqual(cls, builder.build('bsie.reader.stat.Stat'))
- # test invalid
- self.assertRaises(TypeError, builder.build, 123)
- self.assertRaises(TypeError, builder.build, None)
- self.assertRaises(ValueError, builder.build, '')
- self.assertRaises(ValueError, builder.build, 'Path')
- self.assertRaises(base.errors.BuilderError, builder.build, 'path.Path')
- # invalid config
- builder = ReaderBuilder({'bsie.reader.stat.Stat': dict(foo=123)})
- self.assertRaises(base.errors.BuilderError, builder.build, 'bsie.reader.stat.Stat')
- builder = ReaderBuilder({'bsie.reader.stat.Stat': 123})
- self.assertRaises(TypeError, builder.build, 'bsie.reader.stat.Stat')
- # no instructions
- builder = ReaderBuilder({})
- cls = builder.build('bsie.reader.stat.Stat')
- self.assertIsInstance(cls, bsie.reader.stat.Stat)
-
-
-
-class TestExtractorBuilder(unittest.TestCase):
- def test_iter(self):
- # no specifications
- self.assertListEqual(list(ExtractorBuilder([])), [])
- # some specifications
- builder = ExtractorBuilder([
- {'bsie.extractor.generic.path.Path': {}},
- {'bsie.extractor.generic.stat.Stat': {}},
- {'bsie.extractor.generic.path.Path': {}},
- ])
- self.assertListEqual(list(builder), [0, 1, 2])
-
- def test_build(self):
- # simple and repeated extractors
- builder = ExtractorBuilder([
- {'bsie.extractor.generic.path.Path': {}},
- {'bsie.extractor.generic.stat.Stat': {}},
- {'bsie.extractor.generic.path.Path': {}},
- ])
- ext = [builder.build(0), builder.build(1), builder.build(2)]
- import bsie.extractor.generic.path
- import bsie.extractor.generic.stat
- self.assertListEqual(ext, [
- bsie.extractor.generic.path.Path(),
- bsie.extractor.generic.stat.Stat(),
- bsie.extractor.generic.path.Path(),
- ])
- # out-of-bounds raises KeyError
- self.assertRaises(IndexError, builder.build, 3)
-
- # building with args
- builder = ExtractorBuilder([
- {'bsie.extractor.generic.constant.Constant': {
- 'schema': '''
- bse:author rdfs:subClassOf bsfs:Predicate ;
- rdfs:domain bsfs:Entity ;
- rdfs:range xsd:string ;
- bsfs:unique "true"^^xsd:boolean .
- bse:rating rdfs:subClassOf bsfs:Predicate ;
- rdfs:domain bsfs:Entity ;
- rdfs:range xsd:integer ;
- bsfs:unique "true"^^xsd:boolean .
- ''',
- 'tuples': [
- ('http://bsfs.ai/schema/Entity#author', 'Me, myself, and I'),
- ('http://bsfs.ai/schema/Entity#rating', 123),
- ],
- }}])
- obj = builder.build(0)
- import bsie.extractor.generic.constant
- self.assertEqual(obj, bsie.extractor.generic.constant.Constant('''
- bse:author rdfs:subClassOf bsfs:Predicate ;
- rdfs:domain bsfs:Entity ;
- rdfs:range xsd:string ;
- bsfs:unique "true"^^xsd:boolean .
- bse:rating rdfs:subClassOf bsfs:Predicate ;
- rdfs:domain bsfs:Entity ;
- rdfs:range xsd:integer ;
- bsfs:unique "true"^^xsd:boolean .
- ''', [
- ('http://bsfs.ai/schema/Entity#author', 'Me, myself, and I'),
- ('http://bsfs.ai/schema/Entity#rating', 123),
- ]))
-
- # building with invalid args
- self.assertRaises(base.errors.BuilderError, ExtractorBuilder(
- [{'bsie.extractor.generic.path.Path': {'foo': 123}}]).build, 0)
- # non-dict build specification
- self.assertRaises(TypeError, ExtractorBuilder(
- [('bsie.extractor.generic.path.Path', {})]).build, 0)
- # multiple keys per build specification
- self.assertRaises(TypeError, ExtractorBuilder(
- [{'bsie.extractor.generic.path.Path': {},
- 'bsie.extractor.generic.stat.Stat': {}}]).build, 0)
- # non-dict value for kwargs
- self.assertRaises(TypeError, ExtractorBuilder(
- [{'bsie.extractor.generic.path.Path': 123}]).build, 0)
-
-
-
-
-class TestPipelineBuilder(unittest.TestCase):
- def test_build(self):
- prefix = bsfs.URI('http://example.com/local/file#')
- c_schema = '''
- bse:author rdfs:subClassOf bsfs:Predicate ;
- rdfs:domain bsfs:Entity ;
- rdfs:range xsd:string ;
- bsfs:unique "true"^^xsd:boolean .
- '''
- c_tuples = [('http://bsfs.ai/schema/Entity#author', 'Me, myself, and I')]
- # prepare builders
- rbuild = ReaderBuilder({})
- ebuild = ExtractorBuilder([
- {'bsie.extractor.generic.path.Path': {}},
- {'bsie.extractor.generic.stat.Stat': {}},
- {'bsie.extractor.generic.constant.Constant': dict(
- schema=c_schema,
- tuples=c_tuples,
- )},
- ])
- # build pipeline
- builder = PipelineBuilder(prefix, rbuild, ebuild)
- pipeline = builder.build()
- # delayed import
- import bsie.reader.path
- import bsie.reader.stat
- import bsie.extractor.generic.path
- import bsie.extractor.generic.stat
- import bsie.extractor.generic.constant
- # check pipeline
- self.assertDictEqual(pipeline._ext2rdr, {
- bsie.extractor.generic.path.Path(): bsie.reader.path.Path(),
- bsie.extractor.generic.stat.Stat(): bsie.reader.stat.Stat(),
- bsie.extractor.generic.constant.Constant(c_schema, c_tuples): None,
- })
-
- # fail to load extractor
- ebuild_err = ExtractorBuilder([
- {'bsie.extractor.generic.foo.Foo': {}},
- {'bsie.extractor.generic.path.Path': {}},
- ])
- with self.assertLogs(logging.getLogger('bsie.tools.builder'), logging.ERROR):
- pipeline = PipelineBuilder(prefix, rbuild, ebuild_err).build()
- self.assertDictEqual(pipeline._ext2rdr, {
- bsie.extractor.generic.path.Path(): bsie.reader.path.Path()})
-
- # fail to build extractor
- ebuild_err = ExtractorBuilder([
- {'bsie.extractor.generic.path.Path': {'foo': 123}},
- {'bsie.extractor.generic.path.Path': {}},
- ])
- with self.assertLogs(logging.getLogger('bsie.tools.builder'), logging.ERROR):
- pipeline = PipelineBuilder(prefix, rbuild, ebuild_err).build()
- self.assertDictEqual(pipeline._ext2rdr, {
- bsie.extractor.generic.path.Path(): bsie.reader.path.Path()})
-
- # fail to load reader
- with self.assertLogs(logging.getLogger('bsie.tools.builder'), logging.ERROR):
- # switch reader of an extractor
- old_reader = bsie.extractor.generic.path.Path.CONTENT_READER
- bsie.extractor.generic.path.Path.CONTENT_READER = 'bsie.reader.foo.Foo'
- # build pipeline with invalid reader reference
- pipeline = PipelineBuilder(prefix, rbuild, ebuild).build()
- self.assertDictEqual(pipeline._ext2rdr, {
- bsie.extractor.generic.stat.Stat(): bsie.reader.stat.Stat(),
- bsie.extractor.generic.constant.Constant(c_schema, c_tuples): None,
- })
- # switch back
- bsie.extractor.generic.path.Path.CONTENT_READER = old_reader
-
- # fail to build reader
- rbuild_err = ReaderBuilder({'bsie.reader.stat.Stat': dict(foo=123)})
- with self.assertLogs(logging.getLogger('bsie.tools.builder'), logging.ERROR):
- pipeline = PipelineBuilder(prefix, rbuild_err, ebuild).build()
- self.assertDictEqual(pipeline._ext2rdr, {
- bsie.extractor.generic.path.Path(): bsie.reader.path.Path(),
- bsie.extractor.generic.constant.Constant(c_schema, c_tuples): None,
- })
-
-
-## main ##
-
-if __name__ == '__main__':
- unittest.main()
-
-## EOF ##
diff --git a/test/tools/testfile.t b/test/tools/testfile.t
deleted file mode 100644
index 3b18e51..0000000
--- a/test/tools/testfile.t
+++ /dev/null
@@ -1 +0,0 @@
-hello world
diff --git a/test/utils/filematcher/__init__.py b/test/utils/filematcher/__init__.py
new file mode 100644
index 0000000..e69de29
--- /dev/null
+++ b/test/utils/filematcher/__init__.py
diff --git a/test/utils/filematcher/empty b/test/utils/filematcher/empty
new file mode 100644
index 0000000..e69de29
--- /dev/null
+++ b/test/utils/filematcher/empty
diff --git a/test/utils/filematcher/test_matcher.py b/test/utils/filematcher/test_matcher.py
new file mode 100644
index 0000000..c3cccee
--- /dev/null
+++ b/test/utils/filematcher/test_matcher.py
@@ -0,0 +1,232 @@
+"""
+
+Part of the bsie test suite.
+A copy of the license is provided with the project.
+Author: Matthias Baumgartner, 2022
+"""
+# standard imports
+import os
+import stat
+import tempfile
+import unittest
+
+# objects to test
+from bsie.utils.filematcher import matcher
+
+
+## code ##
+
+class FakeMatcher(matcher.Matcher):
+ def __call__(self, *args, **kwargs):
+ pass
+
+class FakeCriterion(matcher.Criterion):
+ def __call__(self, *args, **kwargs):
+ pass
+
+class FakeAggregate(matcher.Aggregate):
+ def __call__(self, *args, **kwargs):
+ pass
+
+class TestMatcher(unittest.TestCase):
+ def setUp(self):
+ # paths
+ self.image = os.path.join(os.path.dirname(__file__), 'testimage.jpg')
+ self.text= os.path.join(os.path.dirname(__file__), 'textfile.t')
+ self.empty = os.path.join(os.path.dirname(__file__), 'empty')
+ self.missing = os.path.join(os.path.dirname(__file__), 'missing.jpg')
+
+ def test_matcher_skeleton(self):
+ # node: iteration and length
+ self.assertSetEqual(set(iter(FakeMatcher(1,2,3))), {1,2,3})
+ self.assertSetEqual(set(iter(FakeMatcher([1,2,3]))), {1,2,3})
+ self.assertEqual(len(FakeMatcher([1,2,3])), 3)
+ self.assertEqual(len(FakeMatcher(1,2,3)), 3)
+ self.assertEqual(len(FakeMatcher()), 0)
+ self.assertIn(1, FakeMatcher(1,2,3))
+ self.assertIn(3, FakeMatcher([1,2,3]))
+ self.assertNotIn(0, FakeMatcher(1,2,3))
+ self.assertNotIn(4, FakeMatcher([1,2,3]))
+ # node: comparison
+ self.assertEqual(FakeMatcher([1,2,3]), FakeMatcher([1,2,3]))
+ self.assertEqual(FakeMatcher(1,2,3), FakeMatcher(1,2,3))
+ self.assertEqual(FakeMatcher(1,2,3), FakeMatcher([1,2,3]))
+ self.assertEqual(FakeMatcher(1,2,3), FakeMatcher((1,2,3)))
+ self.assertNotEqual(FakeMatcher(1,2,3), FakeMatcher(1,2,4))
+ self.assertNotEqual(FakeMatcher(1,2,3), FakeMatcher(1,2,3,4))
+ self.assertNotEqual(FakeMatcher(1,2,3), FakeMatcher(1,2))
+ self.assertEqual(hash(FakeMatcher([1,2,3])), hash(FakeMatcher([1,2,3])))
+ self.assertEqual(hash(FakeMatcher(1,2,3)), hash(FakeMatcher(1,2,3)))
+ self.assertEqual(hash(FakeMatcher(1,2,3)), hash(FakeMatcher([1,2,3])))
+ self.assertEqual(hash(FakeMatcher(1,2,3)), hash(FakeMatcher((1,2,3))))
+ # node: representation
+ self.assertEqual(repr(FakeMatcher(1,2,3)), 'FakeMatcher({1, 2, 3})')
+
+ # criterion
+ self.assertEqual(repr(FakeCriterion(1,2,3)), 'FakeCriterion({1, 2, 3})')
+ self.assertEqual(hash(FakeCriterion(1,2,3)), hash(FakeCriterion(1,2,3)))
+ self.assertEqual(FakeCriterion(1,2,3), FakeCriterion([1,2,3]))
+ self.assertNotEqual(FakeCriterion(1,2,3), FakeCriterion(1,2))
+ self.assertNotEqual(FakeCriterion(1,2,3), FakeMatcher(1,2,3))
+ self.assertSetEqual(FakeCriterion(1,2,3).accepted(), {1,2,3})
+
+ # aggregate
+ self.assertEqual(repr(FakeAggregate(1,2,3)), 'FakeAggregate({1, 2, 3})')
+ self.assertNotEqual(FakeAggregate(1,2,3), FakeMatcher(1,2,3))
+
+ def test_any(self):
+ self.assertTrue(matcher.Any()(self.image))
+ self.assertTrue(matcher.Any()(self.text))
+ self.assertTrue(matcher.Any()(self.missing))
+ self.assertTrue(matcher.Any()(self.empty))
+
+ def test_nothing(self):
+ self.assertFalse(matcher.Nothing()(self.image))
+ self.assertFalse(matcher.Nothing()(self.text))
+ self.assertFalse(matcher.Nothing()(self.missing))
+ self.assertFalse(matcher.Nothing()(self.empty))
+
+ def test_exists(self):
+ self.assertTrue(matcher.Exists()(self.image))
+ self.assertTrue(matcher.Exists()(self.text))
+ self.assertTrue(matcher.Exists()(self.empty))
+ self.assertFalse(matcher.Exists()(self.missing))
+
+ def test_isfile(self):
+ self.assertTrue(matcher.IsFile()(self.image))
+ self.assertTrue(matcher.IsFile()(self.text))
+ self.assertFalse(matcher.IsFile()(self.missing))
+ self.assertFalse(matcher.IsFile()(os.path.dirname(self.image)))
+
+ def test_isdir(self):
+ self.assertTrue(matcher.IsDir()(os.path.dirname(self.image)))
+ self.assertFalse(matcher.IsDir()(self.image))
+ self.assertFalse(matcher.IsDir()(self.text))
+ self.assertFalse(matcher.IsDir()(self.missing))
+
+ def test_islink(self):
+ self.assertFalse(matcher.IsLink()(os.path.dirname(self.image)))
+ self.assertFalse(matcher.IsLink()(self.image))
+ self.assertFalse(matcher.IsLink()(self.text))
+ _, temp = tempfile.mkstemp(prefix='bsie-test-')
+ templink = temp + '-link'
+ os.symlink(temp, templink)
+ self.assertTrue(matcher.IsLink()(templink))
+ os.unlink(templink)
+ os.unlink(temp)
+
+ def test_isabs(self):
+ self.assertTrue(matcher.IsAbs()(os.path.abspath(self.image)))
+ self.assertTrue(matcher.IsAbs()(os.path.abspath(self.text)))
+ self.assertFalse(matcher.IsAbs()(os.path.relpath(self.text, os.path.dirname(self.text))))
+
+ def test_isrel(self):
+ self.assertFalse(matcher.IsRel()(os.path.abspath(self.image)))
+ self.assertFalse(matcher.IsRel()(os.path.abspath(self.text)))
+ self.assertTrue(matcher.IsRel()(os.path.relpath(self.text, os.path.dirname(self.text))))
+ self.assertTrue(matcher.IsRel()(os.path.basename(self.text)))
+
+ def test_ismount(self):
+ self.assertFalse(matcher.IsMount()(self.image))
+ self.assertFalse(matcher.IsMount()(self.text))
+ self.assertFalse(matcher.IsMount()(self.missing))
+ # there's no reasonable way to test a positive case
+
+ def test_isempty(self):
+ self.assertTrue(matcher.IsEmpty()(self.empty))
+ self.assertFalse(matcher.IsEmpty()(self.image))
+ self.assertFalse(matcher.IsEmpty()(self.text))
+ self.assertFalse(matcher.IsEmpty()(self.missing))
+
+ def test_isreadable(self):
+ self.assertTrue(matcher.IsReadable()(self.empty))
+ self.assertTrue(matcher.IsReadable()(self.image))
+ self.assertFalse(matcher.IsReadable()(self.missing))
+ _, temp = tempfile.mkstemp(prefix='bsie-test-')
+ os.chmod(temp, 0)
+ self.assertFalse(matcher.IsReadable()(temp))
+ os.unlink(temp)
+
+ def test_iswritable(self):
+ self.assertTrue(matcher.IsWritable()(self.empty))
+ self.assertTrue(matcher.IsWritable()(self.image))
+ self.assertFalse(matcher.IsWritable()(self.missing))
+ _, temp = tempfile.mkstemp(prefix='bsie-test-')
+ os.chmod(temp, 0)
+ self.assertFalse(matcher.IsWritable()(temp))
+ os.unlink(temp)
+
+ def test_isexecutable(self):
+ self.assertFalse(matcher.IsExecutable()(self.empty))
+ self.assertFalse(matcher.IsExecutable()(self.image))
+ self.assertFalse(matcher.IsExecutable()(self.missing))
+ _, temp = tempfile.mkstemp(prefix='bsie-test-')
+ os.chmod(temp, stat.S_IEXEC)
+ self.assertTrue(matcher.IsExecutable()(temp))
+ os.unlink(temp)
+
+ def test_extension(self):
+ self.assertTrue(matcher.Extension('jpg')(self.image))
+ self.assertTrue(matcher.Extension('jpg', 'png')(self.image))
+ self.assertTrue(matcher.Extension('jpg', 't')(self.text))
+ self.assertTrue(matcher.Extension('jpg', 'png', 't')(self.missing))
+ self.assertTrue(matcher.Extension('')(self.empty))
+
+ self.assertFalse(matcher.Extension()(self.image))
+ self.assertFalse(matcher.Extension('jpeg')(self.image))
+ self.assertFalse(matcher.Extension('.t')(self.text))
+ self.assertFalse(matcher.Extension('png', 't')(self.missing))
+ self.assertFalse(matcher.Extension('tiff')(self.empty))
+
+ def test_mime(self):
+ self.assertTrue(matcher.Mime('image/jpeg')(self.image))
+ self.assertTrue(matcher.Mime('image/tiff', 'image/jpeg')(self.image))
+ self.assertTrue(matcher.Mime('text/plain', 'image/jpeg')(self.text))
+ self.assertTrue(matcher.Mime('inode/x-empty')(self.empty))
+
+ self.assertFalse(matcher.Mime()(self.image))
+ self.assertFalse(matcher.Mime('image')(self.image))
+ self.assertFalse(matcher.Mime('image/tiff', 'image/png')(self.image))
+ self.assertFalse(matcher.Mime('')(self.text))
+ self.assertFalse(matcher.Mime('text')(self.text))
+ self.assertFalse(matcher.Mime('tiff')(self.empty))
+ self.assertFalse(matcher.Mime()(self.empty))
+ self.assertFalse(matcher.Mime('')(self.empty))
+ self.assertFalse(matcher.Mime()(self.missing))
+ self.assertFalse(matcher.Mime('')(self.missing))
+ self.assertFalse(matcher.Mime('inode/x-empty')(self.missing))
+
+ def test_not(self):
+ self.assertFalse(matcher.NOT(matcher.Mime('image/jpeg'))(self.image))
+ self.assertTrue(matcher.NOT(matcher.Mime('text/plain'))(self.image))
+
+ def test_and(self):
+ self.assertTrue(matcher.And(matcher.Mime('image/jpeg'), matcher.Extension('jpg'))(self.image))
+ self.assertTrue(matcher.And(matcher.Mime('image/jpeg'), matcher.Extension('jpg', 'tiff'))(self.image))
+ self.assertTrue(matcher.And(matcher.Mime('text/plain'), matcher.Extension('t', 'tiff'))(self.text))
+
+ self.assertFalse(matcher.And(matcher.Mime('image/jpeg'), matcher.Extension('tiff'))(self.image))
+ self.assertFalse(matcher.And(matcher.Mime('text/plain'), matcher.Extension('jpg'))(self.image))
+ self.assertFalse(matcher.And(matcher.Mime('inode/x-empty'), matcher.Extension('jpg'))(self.missing))
+ self.assertFalse(matcher.And(matcher.Mime('image/jpeg'), matcher.Extension('jpg', 't'))(self.text))
+
+ def test_or(self):
+ self.assertTrue(matcher.Or(matcher.Mime('image/jpeg'))(self.image))
+ self.assertFalse(matcher.Or(matcher.Mime('text/plain'))(self.image))
+
+ self.assertTrue(matcher.Or(matcher.Mime('image/jpeg'), matcher.Extension('jpg'))(self.image))
+ self.assertTrue(matcher.Or(matcher.Mime('image/jpeg'), matcher.Extension('t'))(self.image))
+ self.assertTrue(matcher.Or(matcher.Mime('text/plain'), matcher.Extension('jpg', 'tiff'))(self.image))
+ self.assertTrue(matcher.Or(matcher.Mime('text/plain'), matcher.Extension('tiff'))(self.text))
+ self.assertTrue(matcher.Or(matcher.Mime('image/jpeg'), matcher.Extension('jpg'))(self.missing))
+
+ self.assertFalse(matcher.Or(matcher.Mime('text/plain'), matcher.Extension('tiff'))(self.image))
+ self.assertFalse(matcher.Or(matcher.Mime('inode/x-empty'), matcher.Extension('jpg', 'tiff'))(self.text))
+
+
+## main ##
+
+if __name__ == '__main__':
+ unittest.main()
+
+## EOF ##
diff --git a/test/utils/filematcher/test_parser.py b/test/utils/filematcher/test_parser.py
new file mode 100644
index 0000000..c594747
--- /dev/null
+++ b/test/utils/filematcher/test_parser.py
@@ -0,0 +1,146 @@
+"""
+
+Part of the bsie test suite.
+A copy of the license is provided with the project.
+Author: Matthias Baumgartner, 2022
+"""
+# standard imports
+import unittest
+
+# bsie imports
+from bsie.utils import errors
+from bsie.utils.filematcher import matcher
+
+# objects to test
+from bsie.utils.filematcher import parse
+
+
+## code ##
+
+class TestFileMatcherParser(unittest.TestCase):
+ def test_empty(self):
+ # no criterion
+ self.assertEqual(parse(''), matcher.Any())
+
+ def test_ruleone(self):
+ # single criterion, single value
+ self.assertEqual(parse('mime=text'), matcher.Mime('text'))
+ self.assertEqual(parse('MIME=text'), matcher.Mime('text'))
+ self.assertEqual(parse('MiMe=text'), matcher.Mime('text'))
+ self.assertEqual(parse('MIME=TEXT'), matcher.Mime('TEXT'))
+ self.assertEqual(parse('mime={text}'), matcher.Mime('text'))
+ self.assertEqual(parse('mime=image/jpeg'), matcher.Mime('image/jpeg'))
+ self.assertEqual(parse('mime="image/jpeg"'), matcher.Mime('image/jpeg'))
+ self.assertEqual(parse('extension=pdf'), matcher.Extension('pdf'))
+ self.assertEqual(parse('extension={pdf}'), matcher.Extension('pdf'))
+ self.assertEqual(parse('extension="pdf"'), matcher.Extension('pdf'))
+ self.assertEqual(parse('extension="foo,bar"'), matcher.Extension('foo,bar'))
+ self.assertEqual(parse('extension="f{oo|ba}r"'), matcher.Extension('f{oo|ba}r'))
+ self.assertEqual(parse('extension=""'), matcher.Extension(''))
+ self.assertEqual(parse('extension="foo'), matcher.Extension('"foo'))
+ self.assertRaises(errors.ParserError, parse, 'extension=foo=bar')
+ self.assertRaises(errors.ParserError, parse, 'extension=')
+ self.assertRaises(errors.ParserError, parse, 'extension={}')
+ self.assertRaises(errors.ParserError, parse, 'extension={foo')
+
+ # valueless
+ self.assertEqual(parse('any'), matcher.Any())
+ self.assertEqual(parse('nothing'), matcher.Nothing())
+ self.assertEqual(parse('exists'), matcher.Exists())
+ self.assertEqual(parse('any, nothing'), matcher.And(matcher.Any(), matcher.Nothing()))
+ self.assertEqual(parse('any, nothing, exists'),
+ matcher.And(matcher.Any(), matcher.Nothing(), matcher.Exists()))
+ self.assertEqual(parse('any, extension=jpg'), matcher.And(matcher.Any(), matcher.Extension('jpg')))
+ self.assertRaises(errors.ParserError, parse, 'mime')
+ self.assertRaises(errors.ParserError, parse, 'extension')
+ self.assertRaises(errors.ParserError, parse, 'exists=True')
+ self.assertRaises(errors.ParserError, parse, 'exists=foo')
+ self.assertEqual(parse('!any'), matcher.NOT(matcher.Any()))
+ self.assertEqual(parse('!any, nothing'), matcher.And(matcher.NOT(matcher.Any()), matcher.Nothing()))
+ self.assertEqual(parse('!any, extension=jpg'),
+ matcher.And(matcher.NOT(matcher.Any()), matcher.Extension('jpg')))
+ self.assertRaises(errors.ParserError, parse, '!mime')
+ self.assertRaises(errors.ParserError, parse, '!extension')
+
+ def test_rulefew(self):
+ # single criterion, multiple values
+ self.assertEqual(parse('extension={jpg, jpeg}'), matcher.Extension('jpg', 'jpeg'))
+ self.assertEqual(parse('mime={image/jpeg, image/png}'),
+ matcher.Mime('image/jpeg', 'image/png'))
+ self.assertRaises(errors.ParserError, parse, 'mime=image/png, image/jpeg')
+ self.assertRaises(errors.ParserError, parse, 'extension=jpg, jpeg')
+
+ def test_rulesets_ruleone(self):
+ # mutliple criteria, single value
+ self.assertEqual(parse('mime=text, extension=t'),
+ matcher.And(matcher.Mime('text'), matcher.Extension('t')))
+ self.assertEqual(parse('mime=text/plain, extension=t'),
+ matcher.And(matcher.Mime('text/plain'), matcher.Extension('t')))
+ self.assertRaises(errors.ParserError, parse, 'mime=text/plain extension=t')
+ self.assertRaises(errors.ParserError, parse, 'mime={image/jpeg, extension=jpg'),
+
+ def test_rulesets_rulefew(self):
+ # multiple criteria, multiple values
+ self.assertEqual(parse('mime=image/jpeg, extension={jpg, jpeg}'),
+ matcher.And(matcher.Mime('image/jpeg'), matcher.Extension('jpg', 'jpeg')))
+ self.assertEqual(parse('mime={image/jpeg, image/tiff}, extension={jpg, jpeg}'),
+ matcher.And(matcher.Mime('image/jpeg', 'image/tiff'), matcher.Extension('jpg', 'jpeg')))
+ self.assertEqual(parse('mime={image/jpeg, image/tiff}, extension=jpg'),
+ matcher.And(matcher.Mime('image/jpeg', 'image/tiff'), matcher.Extension('jpg')))
+ self.assertRaises(errors.ParserError, parse, 'mime={image/jpeg, image/tiff, extension=jpg')
+ self.assertRaises(errors.ParserError, parse, 'mime=image/jpeg, image/tiff, extension=jpg')
+ self.assertRaises(errors.ParserError, parse, 'mime=image/jpeg, extension=jpg, ')
+
+ def test_not(self):
+ self.assertEqual(parse('extension!=jpg'), matcher.NOT(matcher.Extension('jpg')))
+ self.assertEqual(parse('extension!={jpg, jpeg}'),
+ matcher.NOT(matcher.Extension('jpg', 'jpeg')))
+ self.assertEqual(parse('extension!=jpg, mime=image/jpeg'),
+ matcher.And(matcher.NOT(matcher.Extension('jpg')), matcher.Mime('image/jpeg')))
+ self.assertEqual(parse('extension!=jpg, mime!=image/jpeg'),
+ matcher.And(matcher.NOT(matcher.Extension('jpg')), matcher.NOT(matcher.Mime('image/jpeg'))))
+ self.assertEqual(parse('extension!=jpg | mime=image/jpeg'),
+ matcher.Or(matcher.NOT(matcher.Extension('jpg')), matcher.Mime('image/jpeg')))
+ self.assertEqual(parse('extension!=jpg | mime!=image/jpeg'),
+ matcher.Or(matcher.NOT(matcher.Extension('jpg')), matcher.NOT(matcher.Mime('image/jpeg'))))
+
+ def test_expr(self):
+ # multiple rulesets
+ self.assertEqual(parse('mime=image/jpeg | extension=jpg'),
+ matcher.Or(matcher.Mime('image/jpeg'), matcher.Extension('jpg')))
+ self.assertEqual(parse('mime=image/jpeg | extension={jpg, jpeg}'),
+ matcher.Or(matcher.Mime('image/jpeg'), matcher.Extension('jpg', 'jpeg')))
+ self.assertEqual(parse('mime={image/jpeg, image/png} | extension={jpg, jpeg}'),
+ matcher.Or(matcher.Mime('image/jpeg', 'image/png'), matcher.Extension('jpg', 'jpeg')))
+ self.assertEqual(parse('mime=image/jpeg , extension=jpg | extension=jpg'),
+ matcher.Or(matcher.And(matcher.Mime('image/jpeg'), matcher.Extension('jpg')), matcher.Extension('jpg')))
+ self.assertEqual(parse(
+ 'mime={jpeg, text}, extension={jpg,t} | extension={png,txt}, mime={png, tiff}'),
+ matcher.Or(
+ matcher.And(matcher.Mime('jpeg', 'text'), matcher.Extension('jpg', 't')),
+ matcher.And(matcher.Extension('png', 'txt'), matcher.Mime('png', 'tiff'))))
+ self.assertEqual(parse('mime=text | extension=jpg | extension=png | mime=png'),
+ matcher.Or(matcher.Mime('text'), matcher.Extension('jpg'), matcher.Extension('png'), matcher.Mime('png')))
+ self.assertRaises(errors.ParserError, parse, 'mime=text |')
+ self.assertRaises(errors.ParserError, parse, '| mime=text')
+ self.assertRaises(errors.ParserError, parse, 'extension=png | mime=text, ')
+
+ def test_invalid(self):
+ # Invalid parses
+ self.assertRaises(errors.ParserError, parse, "extension=") # Empty value
+ self.assertRaises(errors.ParserError, parse, "mime=foo,bar") # Escaping
+ self.assertRaises(errors.ParserError, parse, "mime='foo,bar") # Quoting
+ self.assertRaises(errors.ParserError, parse, "mime=\"foo,bar") # Quoting
+
+ # Invalid input
+ self.assertRaises(AttributeError, parse, None)
+ self.assertRaises(AttributeError, parse, 123)
+ self.assertRaises(AttributeError, parse, [123,321])
+
+
+## main ##
+
+if __name__ == '__main__':
+ unittest.main()
+
+## EOF ##
diff --git a/test/utils/filematcher/testimage.jpg b/test/utils/filematcher/testimage.jpg
new file mode 100644
index 0000000..ea7af63
--- /dev/null
+++ b/test/utils/filematcher/testimage.jpg
Binary files differ
diff --git a/test/utils/filematcher/textfile.t b/test/utils/filematcher/textfile.t
new file mode 100644
index 0000000..c389011
--- /dev/null
+++ b/test/utils/filematcher/textfile.t
@@ -0,0 +1,4 @@
+Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do eiusmod tempor incididunt ut labore et dolore magna aliqua.
+Ut enim ad minim veniam, quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo consequat.
+Duis aute irure dolor in reprehenderit in voluptate velit esse cillum dolore eu fugiat nulla pariatur.
+Excepteur sint occaecat cupidatat non proident, sunt in culpa qui officia deserunt mollit anim id est laborum.
diff --git a/test/utils/test_loading.py b/test/utils/test_loading.py
new file mode 100644
index 0000000..58ff166
--- /dev/null
+++ b/test/utils/test_loading.py
@@ -0,0 +1,48 @@
+"""
+
+Part of the bsie test suite.
+A copy of the license is provided with the project.
+Author: Matthias Baumgartner, 2022
+"""
+# standard imports
+import unittest
+
+# bsie imports
+from bsie.utils import errors
+
+# objects to test
+from bsie.utils.loading import safe_load, unpack_qualified_name
+
+
+## code ##
+
+class TestUtils(unittest.TestCase):
+ def test_safe_load(self):
+ # invalid module
+ self.assertRaises(errors.LoaderError, safe_load, 'dBGHMSAYOoKeKMpywDoKZQycENFPvN', 'foobar')
+ self.assertRaises(errors.LoaderError, safe_load, 'dBGHMSAYOoKeKMpywDoKZQycENFPvN.bar', 'foobar')
+ # partially valid module
+ self.assertRaises(errors.LoaderError, safe_load, 'os.foo', 'foobar')
+ # invalid class
+ self.assertRaises(errors.LoaderError, safe_load, 'os.path', 'foo')
+ # valid module and class
+ cls = safe_load('collections.abc', 'Container')
+ import collections.abc
+ self.assertEqual(cls, collections.abc.Container)
+
+ def test_unpack_qualified_name(self):
+ self.assertRaises(TypeError, unpack_qualified_name, 123)
+ self.assertRaises(TypeError, unpack_qualified_name, None)
+ self.assertRaises(ValueError, unpack_qualified_name, '')
+ self.assertRaises(ValueError, unpack_qualified_name, 'path')
+ self.assertRaises(ValueError, unpack_qualified_name, '.Path')
+ self.assertEqual(unpack_qualified_name('path.Path'), ('path', 'Path'))
+ self.assertEqual(unpack_qualified_name('path.foo.bar.Path'), ('path.foo.bar', 'Path'))
+
+
+## main ##
+
+if __name__ == '__main__':
+ unittest.main()
+
+## EOF ##
diff --git a/test/utils/test_node.py b/test/utils/test_node.py
index c70f0b8..9feb051 100644
--- a/test/utils/test_node.py
+++ b/test/utils/test_node.py
@@ -4,7 +4,7 @@ Part of the bsie test suite.
A copy of the license is provided with the project.
Author: Matthias Baumgartner, 2022
"""
-# imports
+# standard imports
import unittest
# bsie imports