diff options
author | Matthias Baumgartner <dev@igsor.net> | 2023-01-16 21:37:09 +0100 |
---|---|---|
committer | Matthias Baumgartner <dev@igsor.net> | 2023-01-16 21:37:09 +0100 |
commit | 05a841215c82ef40d4679dfc4d2c26572bd4d349 (patch) | |
tree | 9888ae0bd2345816d1ab479dd34b4c6b902c158a | |
parent | 057e09d6537bf5c39815661a75819081e3e5fda7 (diff) | |
parent | 58aaa864f9747d27c065739256d4c6635ca9b751 (diff) | |
download | bsie-05a841215c82ef40d4679dfc4d2c26572bd4d349.tar.gz bsie-05a841215c82ef40d4679dfc4d2c26572bd4d349.tar.bz2 bsie-05a841215c82ef40d4679dfc4d2c26572bd4d349.zip |
Merge branch 'mb/feature' into develop
75 files changed, 2327 insertions, 622 deletions
@@ -22,4 +22,7 @@ build/ # doc builds doc/build/ +# testing data +test/reader/image/testimage.nef* + ## EOF ## @@ -4,11 +4,11 @@ Part of the bsie module. A copy of the license is provided with the project. Author: Matthias Baumgartner, 2022 """ -# imports +# standard imports import argparse import typing -# module imports +# bsie imports import bsie import bsie.apps diff --git a/bsie/__init__.py b/bsie/__init__.py index 8d2308c..c253f39 100644 --- a/bsie/__init__.py +++ b/bsie/__init__.py @@ -4,7 +4,7 @@ Part of the bsie module. A copy of the license is provided with the project. Author: Matthias Baumgartner, 2022 """ -# imports +# standard imports import collections import typing diff --git a/bsie/apps/__init__.py b/bsie/apps/__init__.py index a548c3c..1c3d0f9 100644 --- a/bsie/apps/__init__.py +++ b/bsie/apps/__init__.py @@ -4,7 +4,7 @@ Part of the bsie module. A copy of the license is provided with the project. Author: Matthias Baumgartner, 2022 """ -# imports +# standard imports import typing # inner-module imports diff --git a/bsie/apps/index.py b/bsie/apps/index.py index 1dbfdd8..21c2318 100644 --- a/bsie/apps/index.py +++ b/bsie/apps/index.py @@ -4,16 +4,16 @@ Part of the bsie module. A copy of the license is provided with the project. Author: Matthias Baumgartner, 2022 """ -# imports +# standard imports import argparse import os import typing # bsie imports -from bsie.base import errors -from bsie.lib import BSIE -from bsie.tools import builder -from bsie.utils import bsfs +from bsie.extractor import ExtractorBuilder +from bsie.lib import BSIE, PipelineBuilder +from bsie.reader import ReaderBuilder +from bsie.utils import bsfs, errors # exports __all__: typing.Sequence[str] = ( @@ -44,9 +44,9 @@ def main(argv): # FIXME: Read reader/extractor configs from a config file # reader builder - rbuild = builder.ReaderBuilder({}) + rbuild = ReaderBuilder() # extractor builder - ebuild = builder.ExtractorBuilder([ + ebuild = ExtractorBuilder([ {'bsie.extractor.generic.path.Path': {}}, {'bsie.extractor.generic.stat.Stat': {}}, {'bsie.extractor.generic.constant.Constant': dict( @@ -58,9 +58,14 @@ def main(argv): bsfs:unique "true"^^xsd:boolean . ''', )}, + {'bsie.extractor.image.colors_spatial.ColorsSpatial': { + 'width': 2, + 'height': 2, + 'exp': 2, + }}, ]) # pipeline builder - pbuild = builder.PipelineBuilder( + pbuild = PipelineBuilder( bsfs.Namespace(args.user + ('/' if not args.user.endswith('/') else '')), rbuild, ebuild, @@ -82,7 +87,9 @@ def main(argv): # index input paths for path in args.input_file: - if os.path.isdir(path) and args.recursive: + if not os.path.exists(path): + pass # FIXME: notify the user + elif os.path.isdir(path) and args.recursive: for dirpath, _, filenames in os.walk(path, topdown=True, followlinks=args.follow): for filename in filenames: for node, pred, value in bsie.from_file(os.path.join(dirpath, filename)): diff --git a/bsie/apps/info.py b/bsie/apps/info.py index eaf1f71..64a4eba 100644 --- a/bsie/apps/info.py +++ b/bsie/apps/info.py @@ -4,15 +4,16 @@ Part of the bsie module. A copy of the license is provided with the project. Author: Matthias Baumgartner, 2022 """ -# imports +# standard imports import argparse import sys import typing # bsie imports -from bsie.base import errors -from bsie.tools import builder -from bsie.utils import bsfs +from bsie.extractor import ExtractorBuilder +from bsie.lib import PipelineBuilder +from bsie.reader import ReaderBuilder +from bsie.utils import bsfs, errors # exports __all__: typing.Sequence[str] = ( @@ -25,15 +26,15 @@ __all__: typing.Sequence[str] = ( def main(argv): """Show information from BSIE.""" parser = argparse.ArgumentParser(description=main.__doc__, prog='info') - parser.add_argument('what', choices=('predicates', ), + parser.add_argument('what', choices=('predicates', 'schema'), help='Select what information to show.') args = parser.parse_args(argv) # FIXME: Read reader/extractor configs from a config file # reader builder - rbuild = builder.ReaderBuilder({}) + rbuild = ReaderBuilder() # extractor builder - ebuild = builder.ExtractorBuilder([ + ebuild = ExtractorBuilder([ {'bsie.extractor.generic.path.Path': {}}, {'bsie.extractor.generic.stat.Stat': {}}, {'bsie.extractor.generic.constant.Constant': dict( @@ -45,9 +46,14 @@ def main(argv): bsfs:unique "true"^^xsd:boolean . ''', )}, + {'bsie.extractor.image.colors_spatial.ColorsSpatial': { + 'width': 2, + 'height': 2, + 'exp': 2, + }}, ]) # pipeline builder - pbuild = builder.PipelineBuilder( + pbuild = PipelineBuilder( bsfs.Namespace('http://example.com/me/'), # not actually used rbuild, ebuild, @@ -61,6 +67,9 @@ def main(argv): # show predicates for pred in pipeline.schema.predicates(): print(pred.uri) + elif args.what == 'schema': + # show schema + print(bsfs.schema.to_string(pipeline.schema)) else: # args.what is already checked by argparse raise errors.UnreachableError() diff --git a/bsie/base/__init__.py b/bsie/base/__init__.py deleted file mode 100644 index 0d362cd..0000000 --- a/bsie/base/__init__.py +++ /dev/null @@ -1,24 +0,0 @@ -"""The base module defines the BSIE interfaces. - -You'll mostly find abstract classes here. - -Part of the bsie module. -A copy of the license is provided with the project. -Author: Matthias Baumgartner, 2022 -""" -# imports -import typing - -# inner-module imports -from . import errors -from .extractor import Extractor -from .reader import Reader - -# exports -__all__: typing.Sequence[str] = ( - 'Extractor', - 'Reader', - 'errors', - ) - -## EOF ## diff --git a/bsie/extractor/__init__.py b/bsie/extractor/__init__.py index ef31343..5f385ee 100644 --- a/bsie/extractor/__init__.py +++ b/bsie/extractor/__init__.py @@ -6,10 +6,17 @@ Part of the bsie module. A copy of the license is provided with the project. Author: Matthias Baumgartner, 2022 """ -# imports +# standard imports import typing +# inner-module imports +from .base import Extractor +from .builder import ExtractorBuilder + # exports -__all__: typing.Sequence[str] = [] +__all__: typing.Sequence[str] = ( + 'Extractor', + 'ExtractorBuilder', + ) ## EOF ## diff --git a/bsie/base/extractor.py b/bsie/extractor/base.py index c44021b..7401244 100644 --- a/bsie/base/extractor.py +++ b/bsie/extractor/base.py @@ -4,7 +4,7 @@ Part of the bsie module. A copy of the license is provided with the project. Author: Matthias Baumgartner, 2022 """ -# imports +# standard imports import abc import typing @@ -31,13 +31,22 @@ SCHEMA_PREAMBLE = ''' prefix bsfs: <http://bsfs.ai/schema/> prefix bse: <http://bsfs.ai/schema/Entity#> + # default definitions + bsfs:Array rdfs:subClassOf bsfs:Literal . + bsfs:Number rdfs:subClassOf bsfs:Literal . + bsfs:Time rdfs:subClassOf bsfs:Literal . + bsfs:Feature rdfs:subClassOf bsfs:Array ; + bsfs:dimension "1"^^xsd:integer ; + bsfs:dtype bsfs:f16 ; + bsfs:distance bsfs:euclidean . + # essential nodes bsfs:Entity rdfs:subClassOf bsfs:Node . bsfs:File rdfs:subClassOf bsfs:Entity . # common definitions xsd:string rdfs:subClassOf bsfs:Literal . - xsd:integer rdfs:subClassOf bsfs:Literal . + xsd:integer rdfs:subClassOf bsfs:Number . ''' @@ -99,5 +108,6 @@ class Extractor(abc.ABC): principals: typing.Iterable[bsfs.schema.Predicate], ) -> typing.Iterator[typing.Tuple[node.Node, bsfs.schema.Predicate, typing.Any]]: """Return (node, predicate, value) triples.""" + # FIXME: type annotation could be more strict: value is Hashable ## EOF ## diff --git a/bsie/extractor/builder.py b/bsie/extractor/builder.py new file mode 100644 index 0000000..0fd3685 --- /dev/null +++ b/bsie/extractor/builder.py @@ -0,0 +1,77 @@ +""" + +Part of the bsie module. +A copy of the license is provided with the project. +Author: Matthias Baumgartner, 2022 +""" +# standard imports +import typing + +# bsie imports +from bsie.utils import bsfs, errors, safe_load, unpack_qualified_name + +# inner-module imports +from . import base + +# exports +__all__: typing.Sequence[str] = ( + 'ExtractorBuilder', + ) + + +## code ## + +class ExtractorBuilder(): + """Build `bsie.base.Extractor instances. + + It is permissible to build multiple instances of the same extractor + (typically with different arguments), hence the ExtractorBuilder + receives a list of build specifications. Each specification is + a dict with a single key (extractor's qualified name) and a dict + to be used as keyword arguments. + Example: [{'bsie.extractor.generic.path.Path': {}}, ] + + """ + + # build specifications + _specs: typing.List[typing.Dict[str, typing.Dict[str, typing.Any]]] + + def __init__(self, specs: typing.List[typing.Dict[str, typing.Dict[str, typing.Any]]]): + self._specs = specs + + def __iter__(self) -> typing.Iterator[int]: + """Iterate over extractor specifications.""" + return iter(range(len(self._specs))) + + def build(self, index: int) -> base.Extractor: + """Return an instance of the n'th extractor (n=*index*).""" + # get build instructions + specs = self._specs[index] + + # check specs structure. expecting[{name: {kwargs}}] + if not isinstance(specs, dict): + raise TypeError(f'expected a dict, found {bsfs.typename(specs)}') + if len(specs) != 1: + raise TypeError(f'expected a dict of length one, found {len(specs)}') + + # get name and args from specs + name = next(iter(specs.keys())) + kwargs = specs[name] + + # check kwargs structure + if not isinstance(kwargs, dict): + raise TypeError(f'expected a dict, found {bsfs.typename(kwargs)}') + + # check name and get module/class components + module_name, class_name = unpack_qualified_name(name) + + # import extractor class + cls = safe_load(module_name, class_name) + + try: # build and return instance + return cls(**kwargs) + + except Exception as err: + raise errors.BuilderError(f'failed to build extractor {name} due to {bsfs.typename(err)}: {err}') from err + +## EOF ## diff --git a/bsie/extractor/generic/__init__.py b/bsie/extractor/generic/__init__.py index 0cb7e7f..4783949 100644 --- a/bsie/extractor/generic/__init__.py +++ b/bsie/extractor/generic/__init__.py @@ -7,7 +7,7 @@ Part of the bsie module. A copy of the license is provided with the project. Author: Matthias Baumgartner, 2022 """ -# imports +# standard imports import typing # exports diff --git a/bsie/extractor/generic/constant.py b/bsie/extractor/generic/constant.py index 11384e6..938e20c 100644 --- a/bsie/extractor/generic/constant.py +++ b/bsie/extractor/generic/constant.py @@ -4,13 +4,15 @@ Part of the bsie module. A copy of the license is provided with the project. Author: Matthias Baumgartner, 2022 """ -# imports +# standard imports import typing # bsie imports -from bsie.base import extractor from bsie.utils import bsfs, node +# inner-module imports +from .. import base + # exports __all__: typing.Sequence[str] = ( 'Constant', @@ -19,7 +21,7 @@ __all__: typing.Sequence[str] = ( ## code ## -class Constant(extractor.Extractor): +class Constant(base.Extractor): """Extract information from file's path.""" CONTENT_READER = None @@ -32,7 +34,7 @@ class Constant(extractor.Extractor): schema: str, tuples: typing.Iterable[typing.Tuple[bsfs.URI, typing.Any]], ): - super().__init__(bsfs.schema.Schema.from_string(extractor.SCHEMA_PREAMBLE + schema)) + super().__init__(bsfs.schema.from_string(base.SCHEMA_PREAMBLE + schema)) # NOTE: Raises a KeyError if the predicate is not part of the schema self._tuples = tuple((self.schema.predicate(p_uri), value) for p_uri, value in tuples) # TODO: use schema instance for value checking diff --git a/bsie/extractor/generic/path.py b/bsie/extractor/generic/path.py index 7018e12..c984515 100644 --- a/bsie/extractor/generic/path.py +++ b/bsie/extractor/generic/path.py @@ -4,12 +4,12 @@ Part of the bsie module. A copy of the license is provided with the project. Author: Matthias Baumgartner, 2022 """ -# imports +# standard imports import os import typing # bsie imports -from bsie.base import extractor +from bsie.extractor import base from bsie.utils import bsfs, node, ns # exports @@ -20,7 +20,7 @@ __all__: typing.Sequence[str] = ( ## code ## -class Path(extractor.Extractor): +class Path(base.Extractor): """Extract information from file's path.""" CONTENT_READER = 'bsie.reader.path.Path' @@ -29,7 +29,7 @@ class Path(extractor.Extractor): _callmap: typing.Dict[bsfs.schema.Predicate, typing.Callable[[str], typing.Any]] def __init__(self): - super().__init__(bsfs.schema.Schema.from_string(extractor.SCHEMA_PREAMBLE + ''' + super().__init__(bsfs.schema.from_string(base.SCHEMA_PREAMBLE + ''' bse:filename rdfs:subClassOf bsfs:Predicate ; rdfs:domain bsfs:File ; rdfs:range xsd:string ; diff --git a/bsie/extractor/generic/stat.py b/bsie/extractor/generic/stat.py index 0b9ce29..9394456 100644 --- a/bsie/extractor/generic/stat.py +++ b/bsie/extractor/generic/stat.py @@ -4,14 +4,16 @@ Part of the bsie module. A copy of the license is provided with the project. Author: Matthias Baumgartner, 2022 """ -# imports +# standard imports import os import typing # bsie imports -from bsie.base import extractor from bsie.utils import bsfs, node, ns +# inner-module imports +from .. import base + # exports __all__: typing.Sequence[str] = ( 'Stat', @@ -20,7 +22,7 @@ __all__: typing.Sequence[str] = ( ## code ## -class Stat(extractor.Extractor): +class Stat(base.Extractor): """Extract information from the file system.""" CONTENT_READER = 'bsie.reader.stat.Stat' @@ -29,7 +31,7 @@ class Stat(extractor.Extractor): _callmap: typing.Dict[bsfs.schema.Predicate, typing.Callable[[os.stat_result], typing.Any]] def __init__(self): - super().__init__(bsfs.schema.Schema.from_string(extractor.SCHEMA_PREAMBLE + ''' + super().__init__(bsfs.schema.from_string(base.SCHEMA_PREAMBLE + ''' bse:filesize rdfs:subClassOf bsfs:Predicate ; rdfs:domain bsfs:File ; rdfs:range xsd:integer ; diff --git a/bsie/extractor/image/__init__.py b/bsie/extractor/image/__init__.py new file mode 100644 index 0000000..75b118d --- /dev/null +++ b/bsie/extractor/image/__init__.py @@ -0,0 +1,13 @@ +""" + +Part of the bsie module. +A copy of the license is provided with the project. +Author: Matthias Baumgartner, 2022 +""" +# standard imports +import typing + +# exports +__all__: typing.Sequence[str] = [] + +## EOF ## diff --git a/bsie/extractor/image/colors_spatial.py b/bsie/extractor/image/colors_spatial.py new file mode 100644 index 0000000..ce5b9f2 --- /dev/null +++ b/bsie/extractor/image/colors_spatial.py @@ -0,0 +1,154 @@ +"""Spatial color features. + +Part of the bsie module. +A copy of the license is provided with the project. +Author: Matthias Baumgartner, 2022 +""" +# standard imports +import typing + +# external imports +import PIL.Image +import numpy as np + +# bsie imports +from bsie.utils import bsfs, node, ns + +# inner-module imports +from .. import base + +# constants +FEATURE_NAME = ns.bsf + 'ColorsSpatial' +PREDICATE_NAME = ns.bse + 'colors_spatial' + +# exports +__all__: typing.Sequence[str] = ( + 'ColorsSpatial', + ) + + +## code ## + +class ColorsSpatial(base.Extractor): + """Determine dominant colors of subregions in the image. + + Computes the domiant color of increasingly smaller subregions of the image. + """ + + CONTENT_READER = 'bsie.reader.image.Image' + + # Initial subregion width. + width: int + + # Initial subregion height. + height: int + + # Decrement exponent. + exp: float + + # Principal predicate's URI. + _predicate_name: bsfs.URI + + def __init__( + self, + width: int = 32, + height: int = 32, + exp: float = 4., + ): + # instance identifier + uuid = bsfs.uuid.UCID.from_dict({ + 'width': width, + 'height': height, + 'exp': exp, + }) + # determine symbol names + instance_name = FEATURE_NAME[uuid] + predicate_name = PREDICATE_NAME[uuid] + # get vector dimension + dimension = self.dimension(width, height, exp) + # initialize parent with the schema + super().__init__(bsfs.schema.from_string(base.SCHEMA_PREAMBLE + f''' + <{FEATURE_NAME}> rdfs:subClassOf bsfs:Feature ; + # annotations + rdfs:label "Spatially dominant colors"^^xsd:string ; + schema:description "Domiant colors of subregions in an image."^^xsd:string ; + bsfs:dtype xsd:integer . + + <{instance_name}> rdfs:subClassOf <{FEATURE_NAME}> ; + bsfs:dimension "{dimension}"^^xsd:integer ; + # annotations + <{FEATURE_NAME}/args#width> "{width}"^^xsd:integer ; + <{FEATURE_NAME}/args#height> "{height}"^^xsd:integer ; + <{FEATURE_NAME}/args#exp> "{exp}"^^xsd:float . + + <{predicate_name}> rdfs:subClassOf bsfs:Predicate ; + rdfs:domain bsfs:File ; + rdfs:range <{instance_name}> ; + bsfs:unique "true"^^xsd:boolean . + + ''')) + # assign extra members + self.width = width + self.height = height + self.exp = exp + self._predicate_name = predicate_name + + def __repr__(self) -> str: + return f'{bsfs.typename(self)}({self.width}, {self.height}, {self.exp})' + + def __eq__(self, other: typing.Any) -> bool: + return super().__eq__(other) \ + and self.width == other.width \ + and self.height == other.height \ + and self.exp == other.exp + + def __hash__(self) -> int: + return hash((super().__hash__(), self.width, self.height, self.exp)) + + @staticmethod + def dimension(width: int, height: int, exp: float) -> int: + """Return the feature vector dimension.""" + # FIXME: replace with a proper formula + dim = 0 + while width >= 1 and height >= 1: + dim += width * height + width = np.floor(width / exp) + height = np.floor(height / exp) + dim *= 3 # per band + return int(dim) + + def extract( + self, + subject: node.Node, + content: PIL.Image, + principals: typing.Iterable[bsfs.schema.Predicate], + ) -> typing.Iterator[typing.Tuple[node.Node, bsfs.schema.Predicate, typing.Any]]: + # check principals + if self.schema.predicate(self._predicate_name) not in principals: + # nothing to do; abort + return + + # convert to HSV + content = content.convert('HSV') + + # get dimensions + width, height = self.width, self.height + num_bands = len(content.getbands()) # it's three since we converted to HSV before + + features = [] + while width >= 1 and height >= 1: + # downsample + img = content.resize((width, height), resample=PIL.Image.Resampling.BOX) + # feature vector + features.append( + np.array(img.getdata()).reshape((width * height, num_bands))) + # iterate + width = int(np.floor(width / self.exp)) + height = int(np.floor(height / self.exp)) + + # combine bands and convert features to tuple + value = tuple(np.vstack(features).reshape(-1)) + # return triple with feature vector as value + yield subject, self.schema.predicate(self._predicate_name), value + +## EOF ## diff --git a/bsie/lib/__init__.py b/bsie/lib/__init__.py index 578c2c4..4239d3b 100644 --- a/bsie/lib/__init__.py +++ b/bsie/lib/__init__.py @@ -4,15 +4,17 @@ Part of the bsie module. A copy of the license is provided with the project. Author: Matthias Baumgartner, 2022 """ -# imports +# standard imports import typing # inner-module imports from .bsie import BSIE +from .builder import PipelineBuilder # exports __all__: typing.Sequence[str] = ( 'BSIE', + 'PipelineBuilder', ) ## EOF ## diff --git a/bsie/lib/bsie.py b/bsie/lib/bsie.py index e087fa9..668783d 100644 --- a/bsie/lib/bsie.py +++ b/bsie/lib/bsie.py @@ -4,13 +4,15 @@ Part of the bsie module. A copy of the license is provided with the project. Author: Matthias Baumgartner, 2022 """ -# imports +# standard imports import typing # bsie imports -from bsie.tools import Pipeline from bsie.utils import bsfs, node, ns +# inner-module imports +from .pipeline import Pipeline + # exports __all__: typing.Sequence[str] = ( 'BSIE', diff --git a/bsie/lib/builder.py b/bsie/lib/builder.py new file mode 100644 index 0000000..c2abffe --- /dev/null +++ b/bsie/lib/builder.py @@ -0,0 +1,85 @@ +""" + +Part of the bsie module. +A copy of the license is provided with the project. +Author: Matthias Baumgartner, 2022 +""" +# standard imports +import logging +import typing + +# bsie imports +from bsie.extractor import ExtractorBuilder +from bsie.reader import ReaderBuilder +from bsie.utils import bsfs, errors + +# inner-module imports +from . import pipeline + +# exports +__all__: typing.Sequence[str] = ( + 'PipelineBuilder', + ) + + +## code ## + +logger = logging.getLogger(__name__) + +class PipelineBuilder(): + """Build `bsie.tools.pipeline.Pipeline` instances.""" + + # Prefix to be used in the Pipeline. + prefix: bsfs.Namespace + + # builder for Readers. + rbuild: ReaderBuilder + + # builder for Extractors. + ebuild: ExtractorBuilder + + def __init__( + self, + prefix: bsfs.Namespace, + reader_builder: ReaderBuilder, + extractor_builder: ExtractorBuilder, + ): + self.prefix = prefix + self.rbuild = reader_builder + self.ebuild = extractor_builder + + def build(self) -> pipeline.Pipeline: + """Return a Pipeline instance.""" + ext2rdr = {} + + for eidx in self.ebuild: + # build extractor + try: + ext = self.ebuild.build(eidx) + + except errors.LoaderError as err: # failed to load extractor; skip + logger.error('failed to load extractor: %s', err) + continue + + except errors.BuilderError as err: # failed to build instance; skip + logger.error(str(err)) + continue + + try: + # get reader required by extractor + if ext.CONTENT_READER is not None: + rdr = self.rbuild.build(ext.CONTENT_READER) + else: + rdr = None + # store extractor + ext2rdr[ext] = rdr + + except errors.LoaderError as err: # failed to load reader + logger.error('failed to load reader: %s', err) + + except errors.BuilderError as err: # failed to build reader + logger.error(str(err)) + + return pipeline.Pipeline(self.prefix, ext2rdr) + +## EOF ## diff --git a/bsie/tools/pipeline.py b/bsie/lib/pipeline.py index 20e8ddf..44685ba 100644 --- a/bsie/tools/pipeline.py +++ b/bsie/lib/pipeline.py @@ -4,14 +4,15 @@ Part of the bsie module. A copy of the license is provided with the project. Author: Matthias Baumgartner, 2022 """ -# imports +# standard imports from collections import defaultdict import logging import typing # bsie imports -from bsie import base -from bsie.utils import bsfs, node, ns +from bsie.extractor import Extractor +from bsie.reader import Reader +from bsie.utils import bsfs, errors, node, ns # exports __all__: typing.Sequence[str] = ( @@ -43,12 +44,12 @@ class Pipeline(): _prefix: bsfs.Namespace # extractor -> reader mapping - _ext2rdr: typing.Dict[base.extractor.Extractor, typing.Optional[base.reader.Reader]] + _ext2rdr: typing.Dict[Extractor, typing.Optional[Reader]] def __init__( self, prefix: bsfs.Namespace, - ext2rdr: typing.Dict[base.extractor.Extractor, typing.Optional[base.reader.Reader]] + ext2rdr: typing.Dict[Extractor, typing.Optional[Reader]] ): # store core members self._prefix = prefix + FILE_PREFIX @@ -124,6 +125,7 @@ class Pipeline(): try: # get content content = rdr(path) if rdr is not None else None + #logger.info('extracted %s from %s', rdr, path) # apply extractors on this content for ext in extrs: @@ -132,11 +134,16 @@ class Pipeline(): for subject, pred, value in ext.extract(subject, content, principals): yield subject, pred, value - except base.errors.ExtractorError as err: + except errors.ExtractorError as err: # critical extractor failure. logger.error('%s failed to extract triples from content: %s', ext, err) - except base.errors.ReaderError as err: + except errors.UnsupportedFileFormatError: + # failed to read the file format. skip. + #logger.warning('%s could not process the file format of %s', rdr, err) + pass + + except errors.ReaderError as err: # failed to read any content. skip. logger.error('%s failed to read content: %s', rdr, err) diff --git a/bsie/reader/__init__.py b/bsie/reader/__init__.py index a45f22b..4163d1c 100644 --- a/bsie/reader/__init__.py +++ b/bsie/reader/__init__.py @@ -15,5 +15,18 @@ Part of the bsie module. A copy of the license is provided with the project. Author: Matthias Baumgartner, 2022 """ +# standard imports +import typing +# inner-module imports +from .base import Reader +from .builder import ReaderBuilder + +# exports +__all__: typing.Sequence[str] = ( + 'Reader', + 'ReaderBuilder', + ) + +## EOF ## ## EOF ## diff --git a/bsie/base/reader.py b/bsie/reader/base.py index cbabd36..099a327 100644 --- a/bsie/base/reader.py +++ b/bsie/reader/base.py @@ -8,7 +8,7 @@ Part of the bsie module. A copy of the license is provided with the project. Author: Matthias Baumgartner, 2022 """ -# imports +# standard imports import abc import typing @@ -39,7 +39,7 @@ class Reader(abc.ABC): return hash(type(self)) @abc.abstractmethod - def __call__(self, path: bsfs.URI) -> typing.Any: + def __call__(self, path: str) -> typing.Any: """Return some content of the file at *path*. Raises a `ReaderError` if the reader cannot make sense of the file format. """ diff --git a/bsie/reader/builder.py b/bsie/reader/builder.py new file mode 100644 index 0000000..8699e75 --- /dev/null +++ b/bsie/reader/builder.py @@ -0,0 +1,78 @@ +""" + +Part of the bsie module. +A copy of the license is provided with the project. +Author: Matthias Baumgartner, 2022 +""" +# standard imports +import typing + +# bsie imports +from bsie.utils import bsfs, errors, safe_load, unpack_qualified_name + +# inner-module imports +from . import base + +# exports +__all__: typing.Sequence[str] = ( + 'ReaderBuilder', + ) + + +## code ## + +class ReaderBuilder(): + """Build `bsie.base.Reader` instances. + + Readers are defined via their qualified class name + (e.g., bsie.reader.path.Path) and optional keyword + arguments that are passed to the constructor via + the *kwargs* argument (name as key, kwargs as value). + The ReaderBuilder keeps a cache of previously built + reader instances, as they are anyway built with + identical keyword arguments. + + """ + + # keyword arguments + _kwargs: typing.Dict[str, typing.Dict[str, typing.Any]] + + # cached readers + _cache: typing.Dict[str, base.Reader] + + def __init__( + self, + kwargs: typing.Optional[typing.Dict[str, typing.Dict[str, typing.Any]]] = None): + if kwargs is None: + kwargs = {} + self._kwargs = kwargs + self._cache = {} + + def build(self, name: str) -> base.Reader: + """Return an instance for the qualified class name.""" + # return cached instance + if name in self._cache: + return self._cache[name] + + # check name and get module/class components + module_name, class_name = unpack_qualified_name(name) + + # import reader class + cls = safe_load(module_name, class_name) + + # get kwargs + kwargs = self._kwargs.get(name, {}) + if not isinstance(kwargs, dict): + raise TypeError(f'expected a kwargs dict, found {bsfs.typename(kwargs)}') + + try: # build, cache, and return instance + obj = cls(**kwargs) + # cache instance + self._cache[name] = obj + # return instance + return obj + + except Exception as err: + raise errors.BuilderError(f'failed to build reader {name} due to {bsfs.typename(err)}: {err}') from err + +## EOF ## diff --git a/bsie/reader/chain.py b/bsie/reader/chain.py new file mode 100644 index 0000000..5e9e0d5 --- /dev/null +++ b/bsie/reader/chain.py @@ -0,0 +1,88 @@ +""" + +Part of the bsie module. +A copy of the license is provided with the project. +Author: Matthias Baumgartner, 2022 +""" +# standard imports +import logging +import typing + +# bsie imports +from bsie.utils import bsfs, errors + +# inner-module imports +from . import base +from . import builder + +# exports +__all__: typing.Sequence[str] = ( + 'ReaderChain', + ) + + +## code ## + +logger = logging.getLogger(__name__) + +# Content type. +T_CONTENT = typing.TypeVar('T_CONTENT') # pylint: disable=invalid-name + +class ReaderChain(base.Reader, typing.Generic[T_CONTENT]): + """Read an image.""" + + # sub-readers for specific file formats. + _children: typing.Tuple[base.Reader, ...] + + def __init__( + self, + subreader_names: typing.Iterable[str], + cfg: typing.Optional[typing.Any] = None, + ): + rbuild = builder.ReaderBuilder(cfg) + children = [] + for name in subreader_names: + try: + # build sub-reader + children.append(rbuild.build(name)) + except (ValueError, + TypeError, + errors.LoaderError, + errors.BuilderError) as err: + # failed to build a child; skip and notify + logger.warning('failed to load reader: %s', err) + + if len(children) == 0: + logger.warning('%s failed to load any sub-readers.', bsfs.typename(self)) + + # copy children to member + self._children = tuple(children) + + def __str__(self) -> str: + substr = ', '.join(str(child) for child in self._children) + return f'{bsfs.typename(self)}({substr})' + + def __repr__(self) -> str: + return f'{bsfs.typename(self)}({self._children})' + + def __eq__(self, other: typing.Any) -> bool: + return super().__eq__(other) \ + and self._children == other._children + + def __hash__(self) -> int: + return hash((super().__hash__(), self._children)) + + def __call__(self, path: str) -> T_CONTENT: + raise_error = errors.UnsupportedFileFormatError + for child in self._children: + try: + return child(path) + except errors.UnsupportedFileFormatError: + pass + except errors.ReaderError: + # child cannot read the file, skip. + raise_error = errors.ReaderError # type: ignore [assignment] # mypy is confused + + raise raise_error(path) + +## EOF ## diff --git a/bsie/reader/image/__init__.py b/bsie/reader/image/__init__.py new file mode 100644 index 0000000..1f290b5 --- /dev/null +++ b/bsie/reader/image/__init__.py @@ -0,0 +1,37 @@ +""" + +Part of the bsie module. +A copy of the license is provided with the project. +Author: Matthias Baumgartner, 2022 +""" +# standard imports +import typing + +# external imports +import PIL.Image + +# inner-module imports +from .. import chain + +# constants +_FILE_FORMAT_READERS: typing.Sequence[str] = ( + __package__ + '._raw.RawImage', + __package__ + '._pillow.PillowImage', + ) + +# exports +__all__: typing.Sequence[str] = ( + 'Image', + ) + + +## code ## + +# FIXME: Check if PIL.Image or PIL.Image.Image, or if version-dependent +class Image(chain.ReaderChain[PIL.Image.Image]): # pylint: disable=too-few-public-methods + """Read an image file.""" + + def __init__(self, cfg: typing.Optional[typing.Any] = None): + super().__init__(_FILE_FORMAT_READERS, cfg) + +## EOF ## diff --git a/bsie/reader/image/_pillow.py b/bsie/reader/image/_pillow.py new file mode 100644 index 0000000..3144509 --- /dev/null +++ b/bsie/reader/image/_pillow.py @@ -0,0 +1,39 @@ +""" + +Part of the bsie module. +A copy of the license is provided with the project. +Author: Matthias Baumgartner, 2022 +""" +# standard imports +import typing + +# external imports +import PIL.Image + +# bsie imports +from bsie.utils import errors + +# inner-module imports +from .. import base + +# exports +__all__: typing.Sequence[str] = ( + 'PillowImage', + ) + + +## code ## + +class PillowImage(base.Reader): + """Use PIL to read content of a variety of image file types.""" + + def __call__(self, path: str) -> PIL.Image: + try: + # open file with PIL + return PIL.Image.open(path) + except PIL.UnidentifiedImageError as err: + raise errors.UnsupportedFileFormatError(path) from err + except IOError as err: + raise errors.ReaderError(path) from err + +# EOF ## diff --git a/bsie/reader/image/_raw.py b/bsie/reader/image/_raw.py new file mode 100644 index 0000000..cd60453 --- /dev/null +++ b/bsie/reader/image/_raw.py @@ -0,0 +1,61 @@ +""" + +Part of the bsie module. +A copy of the license is provided with the project. +Author: Matthias Baumgartner, 2022 +""" +# standard imports +import typing + +# external imports +import PIL.Image +import rawpy + +# bsie imports +from bsie.utils import errors, filematcher + +# inner-module imports +from .. import base + +# constants +MATCH_RULE = 'mime={image/x-nikon-nef} | extension={nef}' + +# exports +__all__: typing.Sequence[str] = ( + 'RawImage', + ) + + +## code ## + +class RawImage(base.Reader): + """Use rawpy to read content of raw image file types.""" + + # file matcher + match: filematcher.Matcher + + # additional kwargs to rawpy's postprocess + rawpy_kwargs: typing.Dict[str, typing.Any] + + def __init__(self, **rawpy_kwargs): + match_rule = rawpy_kwargs.pop('file_match_rule', MATCH_RULE) + self._match = filematcher.parse(match_rule) + self._rawpy_kwargs = rawpy_kwargs + + def __call__(self, path: str) -> PIL.Image: + # perform quick checks first + if not self._match(path): + raise errors.UnsupportedFileFormatError(path) + + try: + # open file with rawpy + ary = rawpy.imread(path).postprocess(**self._rawpy_kwargs) + # convert to PIL.Image + return PIL.Image.fromarray(ary) + except (rawpy.LibRawFatalError, # pylint: disable=no-member # pylint doesn't find the errors + rawpy.NotSupportedError, # pylint: disable=no-member + rawpy.LibRawNonFatalError, # pylint: disable=no-member + ) as err: + raise errors.ReaderError(path) from err + +## EOF ## diff --git a/bsie/reader/path.py b/bsie/reader/path.py index d60f187..1ca05a0 100644 --- a/bsie/reader/path.py +++ b/bsie/reader/path.py @@ -4,11 +4,11 @@ Part of the bsie module. A copy of the license is provided with the project. Author: Matthias Baumgartner, 2022 """ -# imports +# standard imports import typing -# bsie imports -from bsie.base import reader +# inner-module imports +from . import base # exports __all__: typing.Sequence[str] = ( @@ -18,7 +18,7 @@ __all__: typing.Sequence[str] = ( ## code ## -class Path(reader.Reader): +class Path(base.Reader): """Return the path.""" def __call__(self, path: str) -> str: diff --git a/bsie/reader/stat.py b/bsie/reader/stat.py index fc5fb24..706dc47 100644 --- a/bsie/reader/stat.py +++ b/bsie/reader/stat.py @@ -4,12 +4,15 @@ Part of the bsie module. A copy of the license is provided with the project. Author: Matthias Baumgartner, 2022 """ -# imports +# standard imports import os import typing # bsie imports -from bsie.base import errors, reader +from bsie.utils import errors + +# inner-module imports +from . import base # exports __all__: typing.Sequence[str] = ( @@ -19,7 +22,7 @@ __all__: typing.Sequence[str] = ( ## code ## -class Stat(reader.Reader): +class Stat(base.Reader): """Read and return the filesystem's stat infos.""" def __call__(self, path: str) -> os.stat_result: diff --git a/bsie/tools/builder.py b/bsie/tools/builder.py deleted file mode 100644 index 190d9bf..0000000 --- a/bsie/tools/builder.py +++ /dev/null @@ -1,226 +0,0 @@ -""" - -Part of the bsie module. -A copy of the license is provided with the project. -Author: Matthias Baumgartner, 2022 -""" -# imports -import importlib -import logging -import typing - -# bsie imports -from bsie import base -from bsie.base import errors -from bsie.utils import bsfs - -# inner-module imports -from . import pipeline - -# exports -__all__: typing.Sequence[str] = ( - 'ExtractorBuilder', - 'PipelineBuilder', - 'ReaderBuilder', - ) - - -## code ## - -logger = logging.getLogger(__name__) - -def _safe_load(module_name: str, class_name: str): - """Get a class from a module. Raise BuilderError if anything goes wrong.""" - try: - # load the module - module = importlib.import_module(module_name) - except Exception as err: - # cannot import module - raise errors.LoaderError(f'cannot load module {module_name}') from err - - try: - # get the class from the module - cls = getattr(module, class_name) - except Exception as err: - # cannot find the class - raise errors.LoaderError(f'cannot load class {class_name} from module {module_name}') from err - - return cls - - -def _unpack_name(name): - """Split a name into its module and class component (dot-separated).""" - if not isinstance(name, str): - raise TypeError(name) - if '.' not in name: - raise ValueError('name must be a qualified class name.') - module_name, class_name = name[:name.rfind('.')], name[name.rfind('.')+1:] - if module_name == '': - raise ValueError('name must be a qualified class name.') - return module_name, class_name - - -class ReaderBuilder(): - """Build `bsie.base.Reader` instances. - - Readers are defined via their qualified class name - (e.g., bsie.reader.path.Path) and optional keyword - arguments that are passed to the constructor via - the *kwargs* argument (name as key, kwargs as value). - The ReaderBuilder keeps a cache of previously built - reader instances, as they are anyway built with - identical keyword arguments. - - """ - - # keyword arguments - _kwargs: typing.Dict[str, typing.Dict[str, typing.Any]] - - # cached readers - _cache: typing.Dict[str, base.Reader] - - def __init__(self, kwargs: typing.Dict[str, typing.Dict[str, typing.Any]]): - self._kwargs = kwargs - self._cache = {} - - def build(self, name: str) -> base.Reader: - """Return an instance for the qualified class name.""" - # return cached instance - if name in self._cache: - return self._cache[name] - - # check name and get module/class components - module_name, class_name = _unpack_name(name) - - # import reader class - cls = _safe_load(module_name, class_name) - - # get kwargs - kwargs = self._kwargs.get(name, {}) - if not isinstance(kwargs, dict): - raise TypeError(f'expected a kwargs dict, found {bsfs.typename(kwargs)}') - - try: # build, cache, and return instance - obj = cls(**kwargs) - # cache instance - self._cache[name] = obj - # return instance - return obj - - except Exception as err: - raise errors.BuilderError(f'failed to build reader {name} due to {bsfs.typename(err)}: {err}') from err - - -class ExtractorBuilder(): - """Build `bsie.base.Extractor instances. - - It is permissible to build multiple instances of the same extractor - (typically with different arguments), hence the ExtractorBuilder - receives a list of build specifications. Each specification is - a dict with a single key (extractor's qualified name) and a dict - to be used as keyword arguments. - Example: [{'bsie.extractor.generic.path.Path': {}}, ] - - """ - - # build specifications - _specs: typing.List[typing.Dict[str, typing.Dict[str, typing.Any]]] - - def __init__(self, specs: typing.List[typing.Dict[str, typing.Dict[str, typing.Any]]]): - self._specs = specs - - def __iter__(self) -> typing.Iterator[int]: - """Iterate over extractor specifications.""" - return iter(range(len(self._specs))) - - def build(self, index: int) -> base.Extractor: - """Return an instance of the n'th extractor (n=*index*).""" - # get build instructions - specs = self._specs[index] - - # check specs structure. expecting[{name: {kwargs}}] - if not isinstance(specs, dict): - raise TypeError(f'expected a dict, found {bsfs.typename(specs)}') - if len(specs) != 1: - raise TypeError(f'expected a dict of length one, found {len(specs)}') - - # get name and args from specs - name = next(iter(specs.keys())) - kwargs = specs[name] - - # check kwargs structure - if not isinstance(kwargs, dict): - raise TypeError(f'expected a dict, found {bsfs.typename(kwargs)}') - - # check name and get module/class components - module_name, class_name = _unpack_name(name) - - # import extractor class - cls = _safe_load(module_name, class_name) - - try: # build and return instance - return cls(**kwargs) - - except Exception as err: - raise errors.BuilderError(f'failed to build extractor {name} due to {bsfs.typename(err)}: {err}') from err - - -class PipelineBuilder(): - """Build `bsie.tools.pipeline.Pipeline` instances.""" - - # Prefix to be used in the Pipeline. - prefix: bsfs.Namespace - - # builder for Readers. - rbuild: ReaderBuilder - - # builder for Extractors. - ebuild: ExtractorBuilder - - def __init__( - self, - prefix: bsfs.Namespace, - reader_builder: ReaderBuilder, - extractor_builder: ExtractorBuilder, - ): - self.prefix = prefix - self.rbuild = reader_builder - self.ebuild = extractor_builder - - def build(self) -> pipeline.Pipeline: - """Return a Pipeline instance.""" - ext2rdr = {} - - for eidx in self.ebuild: - # build extractor - try: - ext = self.ebuild.build(eidx) - - except errors.LoaderError as err: # failed to load extractor; skip - logger.error('failed to load extractor: %s', err) - continue - - except errors.BuilderError as err: # failed to build instance; skip - logger.error(str(err)) - continue - - try: - # get reader required by extractor - if ext.CONTENT_READER is not None: - rdr = self.rbuild.build(ext.CONTENT_READER) - else: - rdr = None - # store extractor - ext2rdr[ext] = rdr - - except errors.LoaderError as err: # failed to load reader - logger.error('failed to load reader: %s', err) - - except errors.BuilderError as err: # failed to build reader - logger.error(str(err)) - - return pipeline.Pipeline(self.prefix, ext2rdr) - - - -## EOF ## diff --git a/bsie/utils/__init__.py b/bsie/utils/__init__.py index bd22236..9cb60ed 100644 --- a/bsie/utils/__init__.py +++ b/bsie/utils/__init__.py @@ -4,19 +4,24 @@ Part of the bsie module. A copy of the license is provided with the project. Author: Matthias Baumgartner, 2022 """ -# imports +# standard imports import typing # inner-module imports from . import bsfs +from . import filematcher from . import namespaces as ns from . import node +from .loading import safe_load, unpack_qualified_name # exports __all__: typing.Sequence[str] = ( 'bsfs', + 'filematcher', 'node', 'ns', + 'safe_load', + 'unpack_qualified_name', ) ## EOF ## diff --git a/bsie/utils/bsfs.py b/bsie/utils/bsfs.py index 0b88479..ef5db31 100644 --- a/bsie/utils/bsfs.py +++ b/bsie/utils/bsfs.py @@ -4,7 +4,7 @@ Part of the bsie module. A copy of the license is provided with the project. Author: Matthias Baumgartner, 2022 """ -# imports +# standard imports import typing # bsfs imports diff --git a/bsie/base/errors.py b/bsie/utils/errors.py index dc3c30e..8133cd4 100644 --- a/bsie/base/errors.py +++ b/bsie/utils/errors.py @@ -4,7 +4,7 @@ Part of the bsie module. A copy of the license is provided with the project. Author: Matthias Baumgartner, 2022 """ -# imports +# standard imports import typing # exports @@ -39,4 +39,10 @@ class ProgrammingError(_BSIEError): class UnreachableError(ProgrammingError): """Bravo, you've reached a point in code that should logically not be reachable.""" +class ParserError(_BSIEError): + """Failed to parse due to invalid syntax or structures.""" + +class UnsupportedFileFormatError(ReaderError): + """Failed to read a file format.""" + ## EOF ## diff --git a/bsie/tools/__init__.py b/bsie/utils/filematcher/__init__.py index 803c321..1e23e4e 100644 --- a/bsie/tools/__init__.py +++ b/bsie/utils/filematcher/__init__.py @@ -4,17 +4,17 @@ Part of the bsie module. A copy of the license is provided with the project. Author: Matthias Baumgartner, 2022 """ -# imports +# standard imports import typing # inner-module imports -from . import builder -from .pipeline import Pipeline +from .matcher import Matcher +from .parser import parse # exports __all__: typing.Sequence[str] = ( - 'builder', - 'Pipeline', + 'Matcher', + 'parse', ) ## EOF ## diff --git a/bsie/utils/filematcher/matcher.py b/bsie/utils/filematcher/matcher.py new file mode 100644 index 0000000..a279a4b --- /dev/null +++ b/bsie/utils/filematcher/matcher.py @@ -0,0 +1,179 @@ +""" + +Part of the bsie module. +A copy of the license is provided with the project. +Author: Matthias Baumgartner, 2021 +""" +# standard imports +from collections.abc import Callable, Collection, Hashable +import abc +import os +import typing + +# external imports +import magic + +# exports +__all__: typing.Sequence[str] = [] + + +## code ## + +# abstract nodes + +class Matcher(abc.ABC, Hashable, Callable, Collection): # type: ignore [misc] # Invalid base class Callable + """Matcher node base class.""" + + # child expressions or terminals + _childs: typing.Set[typing.Any] + + def __init__(self, *childs: typing.Any): + if len(childs) == 1 and isinstance(childs[0], (list, tuple, set)): + self._childs = set(childs[0]) + else: + self._childs = set(childs) + + def __contains__(self, needle: typing.Any) -> bool: + return needle in self._childs + + def __iter__(self) -> typing.Iterator[typing.Any]: + return iter(self._childs) + + def __len__(self) -> int: + return len(self._childs) + + def __repr__(self) -> str: + return f'{type(self).__name__}({self._childs})' + + def __hash__(self) -> int: + return hash((type(self), tuple(set(self._childs)))) + + def __eq__(self, other: typing.Any) -> bool: + return isinstance(other, type(self)) \ + and self._childs == other._childs + + @abc.abstractmethod + def __call__(self, path: str) -> bool: # pylint: disable=arguments-differ + """Check if *path* satisfies the conditions set by the Matcher instance.""" + +class NOT(Matcher): + """Invert a matcher result.""" + def __init__(self, expr: Matcher): + super().__init__(expr) + def __call__(self, path: str) -> bool: + return not next(iter(self._childs))(path) + +# aggregate nodes + +class Aggregate(Matcher): # pylint: disable=too-few-public-methods # Yeah, it's an interface... + """Aggregation function base class (And, Or).""" + +class And(Aggregate): + """Accept only if all conditions are satisfied.""" + def __call__(self, path: str) -> bool: + for itm in self: + if not itm(path): + return False + return True + +class Or(Aggregate): + """Accept only if at least one condition is satisfied.""" + def __call__(self, path: str) -> bool: + for itm in self: + if itm(path): + return True + return False + + +# criteria nodes + +class Criterion(Matcher): + """Criterion base class. Limits acceptance to certain values.""" + def accepted(self) -> typing.Set[typing.Any]: + """Return a set of accepted values.""" + return self._childs + +# criteria w/o value (valueless) + +class Any(Criterion): + """Accepts anything.""" + def __call__(self, path: str) -> bool: + return True + +class Nothing(Criterion): + """Accepts nothing.""" + def __call__(self, path: str) -> bool: + return False + +class Exists(Criterion): + """Filters by existence.""" + def __call__(self, path: str) -> bool: + return os.path.exists(path) + +class IsFile(Criterion): + """Checks if the path is a regular file.""" + def __call__(self, path: str) -> bool: + return os.path.isfile(path) + +class IsDir(Criterion): + """Checks if the path is a directory.""" + def __call__(self, path: str) -> bool: + return os.path.isdir(path) + +class IsLink(Criterion): + """Checks if the path is a link.""" + def __call__(self, path: str) -> bool: + return os.path.islink(path) + +class IsAbs(Criterion): + """Checks if the path is an absolute path.""" + def __call__(self, path: str) -> bool: + return os.path.isabs(path) + +class IsRel(Criterion): + """Checks if the path is a relative path.""" + def __call__(self, path: str) -> bool: + return not os.path.isabs(path) + +class IsMount(Criterion): + """Checks if the path is a mount point.""" + def __call__(self, path: str) -> bool: + return os.path.ismount(path) + +class IsEmpty(Criterion): + """Checks if the path is an empty file.""" + def __call__(self, path: str) -> bool: + return os.path.exists(path) and os.stat(path).st_size == 0 + +class IsReadable(Criterion): + """Checks if the path is readable.""" + def __call__(self, path: str) -> bool: + return os.path.exists(path) and os.access(path, os.R_OK) + +class IsWritable(Criterion): + """Checks if the path is writable.""" + def __call__(self, path: str) -> bool: + return os.path.exists(path) and os.access(path, os.W_OK) + +class IsExecutable(Criterion): + """Checks if the path is executable.""" + def __call__(self, path: str) -> bool: + return os.path.exists(path) and os.access(path, os.X_OK) + +# criteria w/ value + +class Extension(Criterion): + """Filters by file extension (without the dot).""" + def __call__(self, path: str) -> bool: + _, ext = os.path.splitext(path) + return ext[1:] in self.accepted() + +class Mime(Criterion): + """Filters by mime type.""" + def __call__(self, path: str) -> bool: + try: + return magic.from_file(path, mime=True).lower() in self.accepted() + except FileNotFoundError: + return False + +## EOF ## diff --git a/bsie/utils/filematcher/parser.py b/bsie/utils/filematcher/parser.py new file mode 100644 index 0000000..2f82875 --- /dev/null +++ b/bsie/utils/filematcher/parser.py @@ -0,0 +1,146 @@ +""" + +Part of the bsie module. +A copy of the license is provided with the project. +Author: Matthias Baumgartner, 2021 +""" +# standard imports +import typing + +# external imports +import pyparsing +from pyparsing import printables, alphas8bit, punc8bit, QuotedString, Word, \ + delimitedList, Or, CaselessKeyword, Group, oneOf, Optional + +# inner-module imports +from . import matcher +from .. import errors + +# exports +__all__: typing.Sequence[str] = ( + 'parse', + ) + + +## code ## + +class FileMatcherParser(): + """ + EXPR := RULES | RULES "|" RULES + RULESET := RULE | RULE, RULE + RULE := CRITERION OP VALUE | CRITERION OP {VALUES} | VALUELESS + OP := != | = + VALUES := VALUE | VALUE, VALUE + VALUE := [word] + CRITERION := mime | extension | ... + """ + + # criteria matcher nodes w/ arguments + _CRITERIA: typing.Dict[str, typing.Type[matcher.Matcher]] = { + 'extension': matcher.Extension, + 'mime': matcher.Mime, + } + + # criteria matcher nodes w/o arguments + _VALUELESS: typing.Dict[str, typing.Type[matcher.Matcher]] = { + 'any': matcher.Any, + 'nothing': matcher.Nothing, + 'exists': matcher.Exists, + 'isfile': matcher.IsFile, + 'isdir': matcher.IsDir, + 'islink': matcher.IsLink, + 'isabs': matcher.IsAbs, + 'isrel': matcher.IsRel, + 'ismount': matcher.IsMount, + 'emtpy': matcher.IsEmpty, + 'readable': matcher.IsReadable, + 'writable': matcher.IsWritable, + 'executable': matcher.IsExecutable, + } + + # pyparsing parser instance. + _parser: pyparsing.ParseExpression + + def __init__(self): + # build the parser + # VALUE := [word] + alphabet = (printables + alphas8bit + punc8bit).translate(str.maketrans('', '', ',{}|=')) + value = QuotedString(quoteChar='"', escChar='\\') ^ Word(alphabet) + # CRITERION := mime | extension | ... + criterion = Or([CaselessKeyword(p) for p in self._CRITERIA]).setResultsName('criterion') + valueless = Or([CaselessKeyword(p) for p in self._VALUELESS]).setResultsName('criterion') + # VALUES := VALUE | VALUE, VALUE + values = delimitedList(value, delim=',').setResultsName('value') + # OP := '=' | '!=' + eqop = oneOf('= !=').setResultsName('op') + # RULE := CRITERION OP VALUE | CRITERION OP {VALUES} | VALUELESS + rule_none = Group(Optional('!').setResultsName('op') + valueless).setResultsName('rule_none') + rule_one = Group(criterion + eqop + value.setResultsName('value')).setResultsName('rule_one') + rule_few = Group(criterion + eqop + '{' + values + '}').setResultsName('rule_few') + # RULESET := RULE | RULE, RULE + ruleset = Group(delimitedList(rule_none ^ rule_one ^ rule_few, delim=',')) + # EXPR := RULESET | RULESET \| RULESET + self._parser = delimitedList(ruleset, delim='|') + + def parse(self, query: str) -> matcher.Matcher: # pylint: disable=too-many-branches + """Build a file matcher from a rule definition.""" + # preprocess the query + query = query.strip() + + # empty query + if len(query) == 0: + return matcher.Any() + + try: + parsed = self._parser.parseString(query, parseAll=True) + except pyparsing.ParseException as err: + raise errors.ParserError(f'Cannot parse query {err}') + + # convert to Matcher + rules = [] + for exp in parsed: + tokens = [] + for rule in exp: + # fetch accepted values + if rule.getName() == 'rule_none': + accepted = [] + elif rule.getName() == 'rule_one': + accepted = [rule.value] + elif rule.getName() == 'rule_few': + accepted = list(rule.value) + else: # prevented by grammar + raise errors.UnreachableError('Invalid rule definition') + + # build criterion + if rule.criterion in self._VALUELESS: + cls = self._VALUELESS[rule.criterion] + if rule.op == '!': + tokens.append(matcher.NOT(cls())) + else: + tokens.append(cls()) + elif rule.criterion in self._CRITERIA: + cls = self._CRITERIA[rule.criterion] + if rule.op == '!=': + tokens.append(matcher.NOT(cls(accepted))) + else: + tokens.append(cls(accepted)) + else: # prevented by grammar + raise errors.UnreachableError(f'Invalid condition "{rule.criterion}"') + + # And-aggregate rules in one ruleset (if needed) + tokens = matcher.And(tokens) if len(tokens) > 1 else tokens[0] + rules.append(tokens) + + # Or-aggregate rulesets + expr = matcher.Or(rules) if len(rules) > 1 else rules[0] + + return expr + +# build default instance +file_match_parser = FileMatcherParser() + +def parse(query: str) -> matcher.Matcher: + """Shortcut for FileMatcherParser()(query).""" + return file_match_parser.parse(query) + +## EOF ## diff --git a/bsie/utils/loading.py b/bsie/utils/loading.py new file mode 100644 index 0000000..eb05c35 --- /dev/null +++ b/bsie/utils/loading.py @@ -0,0 +1,54 @@ +""" + +Part of the bsie module. +A copy of the license is provided with the project. +Author: Matthias Baumgartner, 2022 +""" +# standard imports +import importlib +import typing + +# inner-module imports +from . import errors + +# exports +__all__: typing.Sequence[str] = ( + 'safe_load', + 'unpack_qualified_name', + ) + + +## code ## + +def safe_load(module_name: str, class_name: str): + """Get a class from a module. Raise BuilderError if anything goes wrong.""" + try: + # load the module + module = importlib.import_module(module_name) + except Exception as err: + # cannot import module + raise errors.LoaderError(f'cannot load module {module_name}') from err + + try: + # get the class from the module + cls = getattr(module, class_name) + except Exception as err: + # cannot find the class + raise errors.LoaderError(f'cannot load class {class_name} from module {module_name}') from err + + return cls + + +def unpack_qualified_name(name): + """Split a name into its module and class component (dot-separated).""" + if not isinstance(name, str): + raise TypeError(name) + if '.' not in name: + raise ValueError('name must be a qualified class name.') + module_name, class_name = name[:name.rfind('.')], name[name.rfind('.')+1:] + if module_name == '': + raise ValueError('name must be a qualified class name.') + return module_name, class_name + + +## EOF ## diff --git a/bsie/utils/namespaces.py b/bsie/utils/namespaces.py index a29fc1b..393b436 100644 --- a/bsie/utils/namespaces.py +++ b/bsie/utils/namespaces.py @@ -4,7 +4,7 @@ Part of the bsie module. A copy of the license is provided with the project. Author: Matthias Baumgartner, 2022 """ -# imports +# standard imports import typing # inner-module imports @@ -15,6 +15,7 @@ bse = _bsfs.Namespace('http://bsfs.ai/schema/Entity') bsfs = _bsfs.Namespace('http://bsfs.ai/schema', fsep='/') bsm = _bsfs.Namespace('http://bsfs.ai/schema/Meta') xsd = _bsfs.Namespace('http://www.w3.org/2001/XMLSchema') +bsf = _bsfs.Namespace('http://ie.bsfs.ai/schema/Feature') # export __all__: typing.Sequence[str] = ( diff --git a/bsie/utils/node.py b/bsie/utils/node.py index ecf39cd..91e4f37 100644 --- a/bsie/utils/node.py +++ b/bsie/utils/node.py @@ -4,7 +4,7 @@ Part of the bsie module. A copy of the license is provided with the project. Author: Matthias Baumgartner, 2022 """ -# imports +# standard imports import typing # bsie imports @@ -14,7 +14,19 @@ setup( url='https://www.igsor.net/projects/blackstar/bsie/', download_url='https://pip.igsor.net', packages=('bsie', ), - install_requires=('rdflib', 'bsfs'), + install_requires=( + 'bsfs', + 'pyparsing', + 'python-magic', + 'rdflib', # only for tests + 'requests', # only for tests + ), python_requires=">=3.7", + extra_require=( + # image reader + 'pillow', 'rawpy', + # image extractors + 'numpy', + ) ) diff --git a/test/apps/test_index.py b/test/apps/test_index.py index 9cdc656..7f5be8e 100644 --- a/test/apps/test_index.py +++ b/test/apps/test_index.py @@ -4,13 +4,15 @@ Part of the bsie test suite. A copy of the license is provided with the project. Author: Matthias Baumgartner, 2022 """ -# imports +# standard imports import contextlib import io import os -import rdflib import unittest +# external imports +import rdflib + # bsie imports from bsie.utils import ns @@ -21,6 +23,12 @@ from bsie.apps.index import main ## code ## class TestIndex(unittest.TestCase): + def test_main_invalid(self): + outbuf = io.StringIO() + with contextlib.redirect_stdout(outbuf): + bsfs = main([os.path.join(os.path.dirname(__file__), 'inexistent-file.t')]) + self.assertEqual(outbuf.getvalue().strip(), '') + def test_main(self): bsfs = main([ '-r', @@ -75,6 +83,14 @@ class TestIndex(unittest.TestCase): (rdflib.URIRef(prefix + 'd803187cbf3676ae9d38126270a6152c60431589aa3bb3824baf8954e9c097f1'), rdflib.URIRef(ns.bse.author), rdflib.Literal('Me, myself, and I', datatype=rdflib.XSD.string)), (rdflib.URIRef(prefix + 'd803187cbf3676ae9d38126270a6152c60431589aa3bb3824baf8954e9c097f1'), rdflib.URIRef(ns.bse.filename), rdflib.Literal('td_second', datatype=rdflib.XSD.string)), (rdflib.URIRef(prefix + 'd803187cbf3676ae9d38126270a6152c60431589aa3bb3824baf8954e9c097f1'), rdflib.URIRef(ns.bse.filesize), rdflib.Literal('703', datatype=rdflib.XSD.integer)), + (rdflib.URIRef(prefix + 'accb115d266ad60c53cd01a7f7130f245886ce8eaf69bc85319febc11d9fe089'), rdflib.RDF.type, rdflib.URIRef(ns.bsfs.File)), + (rdflib.URIRef(prefix + 'accb115d266ad60c53cd01a7f7130f245886ce8eaf69bc85319febc11d9fe089'), rdflib.URIRef(ns.bse.author), rdflib.Literal('Me, myself, and I', datatype=rdflib.XSD.string)), + (rdflib.URIRef(prefix + 'accb115d266ad60c53cd01a7f7130f245886ce8eaf69bc85319febc11d9fe089'), rdflib.URIRef(ns.bse.filename), rdflib.Literal('testimage.jpg', datatype=rdflib.XSD.string)), + (rdflib.URIRef(prefix + 'accb115d266ad60c53cd01a7f7130f245886ce8eaf69bc85319febc11d9fe089'), rdflib.URIRef(ns.bse.filesize), rdflib.Literal('349264', datatype=rdflib.XSD.integer)), + (rdflib.URIRef(prefix + 'accb115d266ad60c53cd01a7f7130f245886ce8eaf69bc85319febc11d9fe089'), rdflib.URIRef('http://bsfs.ai/schema/Entity/colors_spatial#0658f2234a054e1dd59a14462c89f7733e019160419c796356aa831498bd0a04'), + rdflib.Literal( + '(91, 127, 121, 94, 138, 167, 163, 134, 190, 138, 170, 156, 121, 142, 159)', + datatype=rdflib.URIRef('http://ie.bsfs.ai/schema/Feature/ColorsSpatial#0658f2234a054e1dd59a14462c89f7733e019160419c796356aa831498bd0a04'))), })) # NOTE: we don't check ns.bsm.t_created since it depends on the execution time. Triples would look like this: @@ -89,6 +105,7 @@ class TestIndex(unittest.TestCase): # (rdflib.URIRef(prefix + 'b8fd7fba818254166a6043195004138ebda6923e012442f819a2c49671136c70'), rdflib.URIRef(ns.bsm.t_created), rdflib.Literal('1670..........', datatype=rdflib.XSD.integer)), # (rdflib.URIRef(prefix + 'd43758ace82154a1cc10ca0dfef63cb20dd831f9c87edd6dc06539eefe67371d'), rdflib.URIRef(ns.bsm.t_created), rdflib.Literal('1670..........', datatype=rdflib.XSD.integer)), # (rdflib.URIRef(prefix + 'd803187cbf3676ae9d38126270a6152c60431589aa3bb3824baf8954e9c097f1'), rdflib.URIRef(ns.bsm.t_created), rdflib.Literal('1670..........', datatype=rdflib.XSD.integer)), + # (rdflib.URIRef(prefix + 'accb115d266ad60c53cd01a7f7130f245886ce8eaf69bc85319febc11d9fe089'), rdflib.URIRef(ns.bsm.t_created), rdflib.Literal('1670..........', datatype=rdflib.XSD.integer)), # instead, we simply check if there's such a predicate for each file self.assertSetEqual({sub for sub, _ in bsfs._backend._graph.subject_objects(rdflib.URIRef(ns.bsm.t_created))}, { rdflib.URIRef(prefix + '2f4109b40107cc50e0884755a1a961ed126887e49b8dbaf0e146b2e226aa6647'), @@ -102,6 +119,7 @@ class TestIndex(unittest.TestCase): rdflib.URIRef(prefix + 'b8fd7fba818254166a6043195004138ebda6923e012442f819a2c49671136c70'), rdflib.URIRef(prefix + 'd43758ace82154a1cc10ca0dfef63cb20dd831f9c87edd6dc06539eefe67371d'), rdflib.URIRef(prefix + 'd803187cbf3676ae9d38126270a6152c60431589aa3bb3824baf8954e9c097f1'), + rdflib.URIRef(prefix + 'accb115d266ad60c53cd01a7f7130f245886ce8eaf69bc85319febc11d9fe089'), }) def test_print(self): @@ -148,6 +166,10 @@ class TestIndex(unittest.TestCase): f'Node(http://bsfs.ai/schema/File, http://example.com/me/file#d803187cbf3676ae9d38126270a6152c60431589aa3bb3824baf8954e9c097f1) Predicate({ns.bse.author}) Me, myself, and I', f'Node(http://bsfs.ai/schema/File, http://example.com/me/file#d803187cbf3676ae9d38126270a6152c60431589aa3bb3824baf8954e9c097f1) Predicate({ns.bse.filename}) td_second', f'Node(http://bsfs.ai/schema/File, http://example.com/me/file#d803187cbf3676ae9d38126270a6152c60431589aa3bb3824baf8954e9c097f1) Predicate({ns.bse.filesize}) 703', + f'Node(http://bsfs.ai/schema/File, http://example.com/me/file#accb115d266ad60c53cd01a7f7130f245886ce8eaf69bc85319febc11d9fe089) Predicate({ns.bse.filesize}) 349264', + f'Node(http://bsfs.ai/schema/File, http://example.com/me/file#accb115d266ad60c53cd01a7f7130f245886ce8eaf69bc85319febc11d9fe089) Predicate({ns.bse.author}) Me, myself, and I', + f'Node(http://bsfs.ai/schema/File, http://example.com/me/file#accb115d266ad60c53cd01a7f7130f245886ce8eaf69bc85319febc11d9fe089) Predicate({ns.bse.filename}) testimage.jpg', + f'Node(http://bsfs.ai/schema/File, http://example.com/me/file#accb115d266ad60c53cd01a7f7130f245886ce8eaf69bc85319febc11d9fe089) Predicate(http://bsfs.ai/schema/Entity/colors_spatial#0658f2234a054e1dd59a14462c89f7733e019160419c796356aa831498bd0a04) (91, 127, 121, 94, 138, 167, 163, 134, 190, 138, 170, 156, 121, 142, 159)', }) diff --git a/test/apps/test_info.py b/test/apps/test_info.py index 6f4d98f..60e9ba1 100644 --- a/test/apps/test_info.py +++ b/test/apps/test_info.py @@ -4,12 +4,15 @@ Part of the bsie test suite. A copy of the license is provided with the project. Author: Matthias Baumgartner, 2022 """ -# imports +# standard imports import argparse import contextlib import io import unittest +# bsie imports +from bsie.utils import bsfs + # objects to test from bsie.apps.info import main @@ -28,6 +31,22 @@ class TestIndex(unittest.TestCase): 'http://bsfs.ai/schema/Predicate', 'http://bsfs.ai/schema/Entity#filename', 'http://bsfs.ai/schema/Entity#filesize', + 'http://bsfs.ai/schema/Entity/colors_spatial#0658f2234a054e1dd59a14462c89f7733e019160419c796356aa831498bd0a04' + }) + + def test_schema(self): + outbuf = io.StringIO() + with contextlib.redirect_stdout(outbuf): + # show schema infos + main(['schema']) + # verify output + schema = bsfs.schema.from_string(outbuf.getvalue()) + self.assertSetEqual({pred.uri for pred in schema.predicates()}, { + 'http://bsfs.ai/schema/Entity#author', + 'http://bsfs.ai/schema/Predicate', + 'http://bsfs.ai/schema/Entity#filename', + 'http://bsfs.ai/schema/Entity#filesize', + 'http://bsfs.ai/schema/Entity/colors_spatial#0658f2234a054e1dd59a14462c89f7733e019160419c796356aa831498bd0a04' }) def test_invalid(self): diff --git a/test/apps/testdir/testimage.jpg b/test/apps/testdir/testimage.jpg Binary files differnew file mode 100644 index 0000000..c80bb48 --- /dev/null +++ b/test/apps/testdir/testimage.jpg diff --git a/test/extractor/generic/test_constant.py b/test/extractor/generic/test_constant.py index 9dbaced..bde3805 100644 --- a/test/extractor/generic/test_constant.py +++ b/test/extractor/generic/test_constant.py @@ -4,7 +4,7 @@ Part of the bsie test suite. A copy of the license is provided with the project. Author: Matthias Baumgartner, 2022 """ -# imports +# standard imports import unittest # bsie imports @@ -36,19 +36,19 @@ class TestConstant(unittest.TestCase): node = _node.Node(ns.bsfs.Entity, '') # Blank node p_author = ext.schema.predicate(ns.bse.author) p_comment = ext.schema.predicate(ns.bse.comment) - entity = ext.schema.node(ns.bsfs.Node).get_child(ns.bsfs.Entity) - string = ext.schema.literal(ns.bsfs.Literal).get_child(ns.xsd.string) + entity = ext.schema.node(ns.bsfs.Node).child(ns.bsfs.Entity) + string = ext.schema.literal(ns.bsfs.Literal).child(ns.xsd.string) # baseline self.assertSetEqual(set(ext.extract(node, None, (p_author, p_comment))), {(node, p_author, 'Me, myself, and I'), (node, p_comment, 'the quick brown fox jumps over the lazy dog.')}) # predicates is respected - p_foobar = ext.schema.predicate(ns.bsfs.Predicate).get_child(ns.bse.foobar, domain=entity, range=entity) + p_foobar = ext.schema.predicate(ns.bsfs.Predicate).child(ns.bse.foobar, domain=entity, range=entity) self.assertSetEqual(set(ext.extract(node, None, (p_author, p_foobar))), {(node, p_author, 'Me, myself, and I')}) self.assertSetEqual(set(ext.extract(node, None, (p_comment, p_foobar))), {(node, p_comment, 'the quick brown fox jumps over the lazy dog.')}) - p_barfoo = ext.schema.predicate(ns.bse.author).get_child(ns.bse.comment, domain=entity, range=string) + p_barfoo = ext.schema.predicate(ns.bse.author).child(ns.bse.comment, domain=entity, range=string) self.assertSetEqual(set(ext.extract(node, None, (p_foobar, p_barfoo))), set()) def test_construct(self): diff --git a/test/extractor/generic/test_path.py b/test/extractor/generic/test_path.py index 820f402..ae68686 100644 --- a/test/extractor/generic/test_path.py +++ b/test/extractor/generic/test_path.py @@ -4,11 +4,11 @@ Part of the bsie test suite. A copy of the license is provided with the project. Author: Matthias Baumgartner, 2022 """ -# imports +# standard imports import unittest # bsie imports -from bsie.base import extractor +from bsie.extractor import base from bsie.utils import bsfs, node as _node, ns # objects to test @@ -29,7 +29,7 @@ class TestPath(unittest.TestCase): def test_schema(self): self.assertEqual(Path().schema, - bsfs.schema.Schema.from_string(extractor.SCHEMA_PREAMBLE + ''' + bsfs.schema.from_string(base.SCHEMA_PREAMBLE + ''' bse:filename rdfs:subClassOf bsfs:Predicate ; rdfs:domain bsfs:File ; rdfs:range xsd:string ; @@ -41,19 +41,19 @@ class TestPath(unittest.TestCase): node = _node.Node(ns.bsfs.File, '') # Blank node content = '/tmp/foo/bar' p_filename = ext.schema.predicate(ns.bse.filename) - entity = ext.schema.node(ns.bsfs.Node).get_child(ns.bsfs.Entity) - string = ext.schema.literal(ns.bsfs.Literal).get_child(ns.xsd.string) + entity = ext.schema.node(ns.bsfs.Node).child(ns.bsfs.Entity) + string = ext.schema.literal(ns.bsfs.Literal).child(ns.xsd.string) # baseline self.assertSetEqual(set(ext.extract(node, content, (p_filename, ))), {(node, p_filename, 'bar')}) # predicates parameter is respected - p_foo = ext.schema.predicate(ns.bsfs.Predicate).get_child(ns.bse.foo, domain=entity, range=string) # unsupported predicate + p_foo = ext.schema.predicate(ns.bsfs.Predicate).child(ns.bse.foo, domain=entity, range=string) # unsupported predicate self.assertSetEqual(set(ext.extract(node, content, (p_filename, p_foo))), {(node, p_filename, 'bar')}) self.assertSetEqual(set(ext.extract(node, content, (p_foo, ))), set()) # predicates are validated - p_bar = p_foo.get_child(ns.bse.filename) # same URI but different hierarchy + p_bar = p_foo.child(ns.bse.filename) # same URI but different hierarchy self.assertSetEqual(set(ext.extract(node, content, (p_filename, p_bar))), {(node, p_filename, 'bar')}) self.assertSetEqual(set(ext.extract(node, content, (p_bar, ))), set()) diff --git a/test/extractor/generic/test_stat.py b/test/extractor/generic/test_stat.py index 3441438..e5562d1 100644 --- a/test/extractor/generic/test_stat.py +++ b/test/extractor/generic/test_stat.py @@ -4,12 +4,12 @@ Part of the bsie test suite. A copy of the license is provided with the project. Author: Matthias Baumgartner, 2022 """ -# imports +# standard imports import os import unittest # bsie imports -from bsie.base import extractor +from bsie.extractor import base from bsie.utils import bsfs, node as _node, ns # objects to test @@ -30,7 +30,7 @@ class TestStat(unittest.TestCase): def test_schema(self): self.assertEqual(Stat().schema, - bsfs.schema.Schema.from_string(extractor.SCHEMA_PREAMBLE + ''' + bsfs.schema.from_string(base.SCHEMA_PREAMBLE + ''' bse:filesize rdfs:subClassOf bsfs:Predicate ; rdfs:domain bsfs:File ; rdfs:range xsd:integer ; @@ -42,19 +42,19 @@ class TestStat(unittest.TestCase): node = _node.Node(ns.bsfs.File, '') # Blank node content = os.stat(__file__) p_filesize = ext.schema.predicate(ns.bse.filesize) - entity = ext.schema.node(ns.bsfs.Node).get_child(ns.bsfs.Entity) - string = ext.schema.literal(ns.bsfs.Literal).get_child(ns.xsd.string) + entity = ext.schema.node(ns.bsfs.Node).child(ns.bsfs.Entity) + string = ext.schema.literal(ns.bsfs.Literal).child(ns.xsd.string) # baseline self.assertSetEqual(set(ext.extract(node, content, (p_filesize, ))), {(node, p_filesize, content.st_size)}) # predicates parameter is respected - p_foo = ext.schema.predicate(ns.bsfs.Predicate).get_child(ns.bse.foo, domain=entity, range=string) # unsupported predicate + p_foo = ext.schema.predicate(ns.bsfs.Predicate).child(ns.bse.foo, domain=entity, range=string) # unsupported predicate self.assertSetEqual(set(ext.extract(node, content, (p_filesize, p_foo))), {(node, p_filesize, content.st_size)}) self.assertSetEqual(set(ext.extract(node, content, (p_foo, ))), set()) # predicates are validated - p_bar = p_foo.get_child(ns.bse.filesizse) # same URI but different hierarchy + p_bar = p_foo.child(ns.bse.filesizse) # same URI but different hierarchy self.assertSetEqual(set(ext.extract(node, content, (p_filesize, p_bar))), {(node, p_filesize, content.st_size)}) self.assertSetEqual(set(ext.extract(node, content, (p_bar, ))), set()) diff --git a/test/base/__init__.py b/test/extractor/image/__init__.py index e69de29..e69de29 100644 --- a/test/base/__init__.py +++ b/test/extractor/image/__init__.py diff --git a/test/extractor/image/test_colors_spatial.py b/test/extractor/image/test_colors_spatial.py new file mode 100644 index 0000000..ba551f3 --- /dev/null +++ b/test/extractor/image/test_colors_spatial.py @@ -0,0 +1,100 @@ +""" + +Part of the bsie test suite. +A copy of the license is provided with the project. +Author: Matthias Baumgartner, 2022 +""" +# standard imports +import os +import unittest + +# external imports +import PIL.Image + +# bsie imports +from bsie.extractor import base +from bsie.utils import bsfs, ns, node as _node + +# objects to test +from bsie.extractor.image.colors_spatial import ColorsSpatial + + +## code ## + +class TestColorsSpatial(unittest.TestCase): + def setUp(self): + # content id with default constructors (width=32, height=32, exp=4) + self.instance_prefix = 'http://ie.bsfs.ai/schema/Feature/ColorsSpatial' + self.predicate_prefix = 'http://bsfs.ai/schema/Entity/colors_spatial' + self.uuid = 'adee8d6c43687021e1c5bffe56bcfe727f1638d792744137181304ef889dac2a' + + def test_essentials(self): + # clones are equal + self.assertEqual(ColorsSpatial(32, 32, 4), ColorsSpatial(32, 32, 4)) + self.assertEqual(hash(ColorsSpatial(32, 32, 4)), hash(ColorsSpatial(32, 32, 4))) + # equal respects type + self.assertNotEqual(ColorsSpatial(32, 32, 4), 'hello world') + self.assertNotEqual(hash(ColorsSpatial(32, 32, 4)), hash('hello world')) + # equals respects width + self.assertNotEqual(ColorsSpatial(32, 32, 4), ColorsSpatial(16, 32, 4)) + self.assertNotEqual(hash(ColorsSpatial(32, 32, 4)), hash(ColorsSpatial(16, 32, 4))) + # equals respects height + self.assertNotEqual(ColorsSpatial(32, 32, 4), ColorsSpatial(32, 16, 4)) + self.assertNotEqual(hash(ColorsSpatial(32, 32, 4)), hash(ColorsSpatial(32, 16, 4))) + # equals respects exp + self.assertNotEqual(ColorsSpatial(32, 32, 4), ColorsSpatial(32, 32, 8)) + self.assertNotEqual(hash(ColorsSpatial(32, 32, 4)), hash(ColorsSpatial(32, 32, 8))) + # string representation + self.assertEqual(str(ColorsSpatial()), 'ColorsSpatial') + self.assertEqual(repr(ColorsSpatial(64, 16, 2)), 'ColorsSpatial(64, 16, 2)') + + def test_dimension(self): + self.assertEqual(ColorsSpatial.dimension(32, 32, 4), 3 * (32*32 + 8*8 + 2*2)) + self.assertEqual(ColorsSpatial.dimension(16, 16, 8), 3 * (16*16 + 2*2)) + self.assertEqual(ColorsSpatial.dimension(64, 64, 16), 3 * (64*64 + 4*4)) + + def test_schema(self): + schema = bsfs.schema.from_string(base.SCHEMA_PREAMBLE + f''' + <{self.instance_prefix}> rdfs:subClassOf bsfs:Feature ; + # annotations + rdfs:label "Spatially dominant colors"^^xsd:string ; + schema:description "Domiant colors of subregions in an image."^^xsd:string ; + bsfs:dtype xsd:integer . + + <{self.instance_prefix}#{self.uuid}> rdfs:subClassOf <{self.instance_prefix}> ; + bsfs:dimension "3276"^^xsd:integer ; + # annotations + <{self.instance_prefix}/args#width> "32"^^xsd:integer ; + <{self.instance_prefix}/args#height> "32"^^xsd:integer ; + <{self.instance_prefix}/args#exp> "4"^^xsd:float . + + <{self.predicate_prefix}#{self.uuid}> rdfs:subClassOf bsfs:Predicate ; + rdfs:domain bsfs:File ; + rdfs:range <{self.instance_prefix}#{self.uuid}> ; + bsfs:unique "true"^^xsd:boolean . + ''') + self.assertEqual(schema, ColorsSpatial().schema) + + def test_extract(self): + ext = ColorsSpatial(2,2,2) + img = PIL.Image.open(os.path.join(os.path.dirname(__file__), 'testimage.jpg')) + node = _node.Node(ns.bsfs.Entity, bsfs.URI('http://example.com/entity#1234')) + principals = set(ext.principals) + self.assertEqual(len(principals), 1) + # valid invocation yields feature + ret = list(ext.extract(node, img, principals)) + self.assertEqual(ret[0], ( + node, + list(principals)[0], + (91, 127, 121, 94, 138, 167, 163, 134, 190, 138, 170, 156, 121, 142, 159))) + # principals is respected + self.assertListEqual(list(ext.extract(node, img, {})), []) + + + +## main ## + +if __name__ == '__main__': + unittest.main() + +## EOF ## diff --git a/test/extractor/image/testimage.jpg b/test/extractor/image/testimage.jpg Binary files differnew file mode 100644 index 0000000..c80bb48 --- /dev/null +++ b/test/extractor/image/testimage.jpg diff --git a/test/base/test_extractor.py b/test/extractor/test_base.py index 30974ef..acfaf58 100644 --- a/test/base/test_extractor.py +++ b/test/extractor/test_base.py @@ -4,21 +4,21 @@ Part of the bsie test suite. A copy of the license is provided with the project. Author: Matthias Baumgartner, 2022 """ -# imports +# standard imports import unittest # bsie imports from bsie.utils import bsfs, ns # objects to test -from bsie.base import extractor +from bsie.extractor import base ## code ## -class StubExtractor(extractor.Extractor): +class StubExtractor(base.Extractor): def __init__(self): - super().__init__(bsfs.schema.Schema.from_string(extractor.SCHEMA_PREAMBLE + ''' + super().__init__(bsfs.schema.from_string(base.SCHEMA_PREAMBLE + ''' bse:author rdfs:subClassOf bsfs:Predicate ; rdfs:domain bsfs:Entity ; rdfs:range xsd:string ; @@ -52,11 +52,11 @@ class TestExtractor(unittest.TestCase): self.assertNotEqual(hash(ext), hash(sub)) def test_principals(self): - schema = bsfs.schema.Schema.Empty() - entity = schema.node(ns.bsfs.Node).get_child(ns.bsfs.Entity) - string = schema.literal(ns.bsfs.Literal).get_child(bsfs.URI('http://www.w3.org/2001/XMLSchema#string')) - p_author = schema.predicate(ns.bsfs.Predicate).get_child(ns.bse.author, domain=entity, range=string) - p_comment = schema.predicate(ns.bsfs.Predicate).get_child(ns.bse.comment, domain=entity, range=string) + schema = bsfs.schema.Schema() + entity = schema.node(ns.bsfs.Node).child(ns.bsfs.Entity) + string = schema.literal(ns.bsfs.Literal).child(bsfs.URI('http://www.w3.org/2001/XMLSchema#string')) + p_author = schema.predicate(ns.bsfs.Predicate).child(ns.bse.author, domain=entity, range=string) + p_comment = schema.predicate(ns.bsfs.Predicate).child(ns.bse.comment, domain=entity, range=string) ext = StubExtractor() self.assertSetEqual(set(ext.principals), {p_author, p_comment} | set(schema.predicates()) - {schema.predicate(ns.bsfs.Predicate)}) diff --git a/test/extractor/test_builder.py b/test/extractor/test_builder.py new file mode 100644 index 0000000..039ea53 --- /dev/null +++ b/test/extractor/test_builder.py @@ -0,0 +1,103 @@ +""" + +Part of the bsie test suite. +A copy of the license is provided with the project. +Author: Matthias Baumgartner, 2022 +""" +# standard imports +import unittest + +# bsie imports +from bsie.utils import errors + +# objects to test +from bsie.extractor import ExtractorBuilder + + +## code ## + +class TestExtractorBuilder(unittest.TestCase): + def test_iter(self): + # no specifications + self.assertListEqual(list(ExtractorBuilder([])), []) + # some specifications + builder = ExtractorBuilder([ + {'bsie.extractor.generic.path.Path': {}}, + {'bsie.extractor.generic.stat.Stat': {}}, + {'bsie.extractor.generic.path.Path': {}}, + ]) + self.assertListEqual(list(builder), [0, 1, 2]) + + def test_build(self): + # simple and repeated extractors + builder = ExtractorBuilder([ + {'bsie.extractor.generic.path.Path': {}}, + {'bsie.extractor.generic.stat.Stat': {}}, + {'bsie.extractor.generic.path.Path': {}}, + ]) + ext = [builder.build(0), builder.build(1), builder.build(2)] + import bsie.extractor.generic.path + import bsie.extractor.generic.stat + self.assertListEqual(ext, [ + bsie.extractor.generic.path.Path(), + bsie.extractor.generic.stat.Stat(), + bsie.extractor.generic.path.Path(), + ]) + # out-of-bounds raises KeyError + self.assertRaises(IndexError, builder.build, 3) + + # building with args + builder = ExtractorBuilder([ + {'bsie.extractor.generic.constant.Constant': { + 'schema': ''' + bse:author rdfs:subClassOf bsfs:Predicate ; + rdfs:domain bsfs:Entity ; + rdfs:range xsd:string ; + bsfs:unique "true"^^xsd:boolean . + bse:rating rdfs:subClassOf bsfs:Predicate ; + rdfs:domain bsfs:Entity ; + rdfs:range xsd:integer ; + bsfs:unique "true"^^xsd:boolean . + ''', + 'tuples': [ + ('http://bsfs.ai/schema/Entity#author', 'Me, myself, and I'), + ('http://bsfs.ai/schema/Entity#rating', 123), + ], + }}]) + obj = builder.build(0) + import bsie.extractor.generic.constant + self.assertEqual(obj, bsie.extractor.generic.constant.Constant(''' + bse:author rdfs:subClassOf bsfs:Predicate ; + rdfs:domain bsfs:Entity ; + rdfs:range xsd:string ; + bsfs:unique "true"^^xsd:boolean . + bse:rating rdfs:subClassOf bsfs:Predicate ; + rdfs:domain bsfs:Entity ; + rdfs:range xsd:integer ; + bsfs:unique "true"^^xsd:boolean . + ''', [ + ('http://bsfs.ai/schema/Entity#author', 'Me, myself, and I'), + ('http://bsfs.ai/schema/Entity#rating', 123), + ])) + + # building with invalid args + self.assertRaises(errors.BuilderError, ExtractorBuilder( + [{'bsie.extractor.generic.path.Path': {'foo': 123}}]).build, 0) + # non-dict build specification + self.assertRaises(TypeError, ExtractorBuilder( + [('bsie.extractor.generic.path.Path', {})]).build, 0) + # multiple keys per build specification + self.assertRaises(TypeError, ExtractorBuilder( + [{'bsie.extractor.generic.path.Path': {}, + 'bsie.extractor.generic.stat.Stat': {}}]).build, 0) + # non-dict value for kwargs + self.assertRaises(TypeError, ExtractorBuilder( + [{'bsie.extractor.generic.path.Path': 123}]).build, 0) + + +## main ## + +if __name__ == '__main__': + unittest.main() + +## EOF ## diff --git a/test/lib/test_bsie.py b/test/lib/test_bsie.py index 771a0c2..38e6f59 100644 --- a/test/lib/test_bsie.py +++ b/test/lib/test_bsie.py @@ -4,13 +4,15 @@ Part of the bsie test suite. A copy of the license is provided with the project. Author: Matthias Baumgartner, 2022 """ -# imports +# standard imports import os import unittest # bsie imports -from bsie.base import extractor -from bsie.tools import builder +from bsie.extractor import ExtractorBuilder +from bsie.extractor.base import SCHEMA_PREAMBLE +from bsie.lib import PipelineBuilder +from bsie.reader import ReaderBuilder from bsie.utils import bsfs, node, ns # objects to test @@ -22,9 +24,9 @@ from bsie.lib.bsie import BSIE class TestBSIE(unittest.TestCase): def setUp(self): # reader builder - rbuild = builder.ReaderBuilder({}) + rbuild = ReaderBuilder({}) # extractor builder - ebuild = builder.ExtractorBuilder([ + ebuild = ExtractorBuilder([ {'bsie.extractor.generic.path.Path': {}}, {'bsie.extractor.generic.stat.Stat': {}}, {'bsie.extractor.generic.constant.Constant': dict( @@ -39,7 +41,7 @@ class TestBSIE(unittest.TestCase): ]) # build pipeline self.prefix = bsfs.Namespace('http://example.com/local/') - pbuild = builder.PipelineBuilder(self.prefix, rbuild, ebuild) + pbuild = PipelineBuilder(self.prefix, rbuild, ebuild) self.pipeline = pbuild.build() def test_construction(self): @@ -50,7 +52,7 @@ class TestBSIE(unittest.TestCase): ns.bse.filesize, ns.bse.author, }) - self.assertEqual(lib.schema, bsfs.schema.Schema.from_string(extractor.SCHEMA_PREAMBLE + ''' + self.assertEqual(lib.schema, bsfs.schema.from_string(SCHEMA_PREAMBLE + ''' bse:filename rdfs:subClassOf bsfs:Predicate ; rdfs:domain bsfs:File ; rdfs:range xsd:string ; @@ -77,7 +79,7 @@ class TestBSIE(unittest.TestCase): ns.bse.filesize, ns.bse.author, }) - self.assertEqual(lib.schema, bsfs.schema.Schema.from_string(extractor.SCHEMA_PREAMBLE + ''' + self.assertEqual(lib.schema, bsfs.schema.from_string(SCHEMA_PREAMBLE + ''' bse:filesize rdfs:subClassOf bsfs:Predicate ; rdfs:domain bsfs:File ; rdfs:range xsd:integer; @@ -95,7 +97,7 @@ class TestBSIE(unittest.TestCase): ns.bse.filesize, ns.bse.author, }) - self.assertEqual(lib.schema, bsfs.schema.Schema.from_string(extractor.SCHEMA_PREAMBLE + ''' + self.assertEqual(lib.schema, bsfs.schema.from_string(SCHEMA_PREAMBLE + ''' bse:filename rdfs:subClassOf bsfs:Predicate ; rdfs:domain bsfs:File ; rdfs:range xsd:string ; @@ -122,7 +124,7 @@ class TestBSIE(unittest.TestCase): self.assertSetEqual(set(lib.principals), { ns.bse.author, }) - self.assertEqual(lib.schema, bsfs.schema.Schema.from_string(extractor.SCHEMA_PREAMBLE + ''' + self.assertEqual(lib.schema, bsfs.schema.from_string(SCHEMA_PREAMBLE + ''' bse:author rdfs:subClassOf bsfs:Predicate ; rdfs:domain bsfs:Entity ; rdfs:range xsd:string ; @@ -137,7 +139,7 @@ class TestBSIE(unittest.TestCase): self.assertSetEqual(set(lib.principals), { ns.bse.filesize, }) - self.assertEqual(lib.schema, bsfs.schema.Schema.from_string(extractor.SCHEMA_PREAMBLE + ''' + self.assertEqual(lib.schema, bsfs.schema.from_string(SCHEMA_PREAMBLE + ''' bse:filesize rdfs:subClassOf bsfs:Predicate ; rdfs:domain bsfs:File ; rdfs:range xsd:integer; diff --git a/test/lib/test_builder.py b/test/lib/test_builder.py new file mode 100644 index 0000000..273d620 --- /dev/null +++ b/test/lib/test_builder.py @@ -0,0 +1,107 @@ +""" + +Part of the bsie test suite. +A copy of the license is provided with the project. +Author: Matthias Baumgartner, 2022 +""" +# standard imports +import logging +import unittest + +# bsie imports +from bsie.extractor import ExtractorBuilder +from bsie.reader import ReaderBuilder +from bsie.utils import bsfs + +# objects to test +from bsie.lib import PipelineBuilder + + +## code ## + +class TestPipelineBuilder(unittest.TestCase): + def test_build(self): + prefix = bsfs.URI('http://example.com/local/file#') + c_schema = ''' + bse:author rdfs:subClassOf bsfs:Predicate ; + rdfs:domain bsfs:Entity ; + rdfs:range xsd:string ; + bsfs:unique "true"^^xsd:boolean . + ''' + c_tuples = [('http://bsfs.ai/schema/Entity#author', 'Me, myself, and I')] + # prepare builders + rbuild = ReaderBuilder({}) + ebuild = ExtractorBuilder([ + {'bsie.extractor.generic.path.Path': {}}, + {'bsie.extractor.generic.stat.Stat': {}}, + {'bsie.extractor.generic.constant.Constant': dict( + schema=c_schema, + tuples=c_tuples, + )}, + ]) + # build pipeline + builder = PipelineBuilder(prefix, rbuild, ebuild) + pipeline = builder.build() + # delayed import + import bsie.reader.path + import bsie.reader.stat + import bsie.extractor.generic.path + import bsie.extractor.generic.stat + import bsie.extractor.generic.constant + # check pipeline + self.assertDictEqual(pipeline._ext2rdr, { + bsie.extractor.generic.path.Path(): bsie.reader.path.Path(), + bsie.extractor.generic.stat.Stat(): bsie.reader.stat.Stat(), + bsie.extractor.generic.constant.Constant(c_schema, c_tuples): None, + }) + + # fail to load extractor + ebuild_err = ExtractorBuilder([ + {'bsie.extractor.generic.foo.Foo': {}}, + {'bsie.extractor.generic.path.Path': {}}, + ]) + with self.assertLogs(logging.getLogger('bsie.lib.builder'), logging.ERROR): + pipeline = PipelineBuilder(prefix, rbuild, ebuild_err).build() + self.assertDictEqual(pipeline._ext2rdr, { + bsie.extractor.generic.path.Path(): bsie.reader.path.Path()}) + + # fail to build extractor + ebuild_err = ExtractorBuilder([ + {'bsie.extractor.generic.path.Path': {'foo': 123}}, + {'bsie.extractor.generic.path.Path': {}}, + ]) + with self.assertLogs(logging.getLogger('bsie.lib.builder'), logging.ERROR): + pipeline = PipelineBuilder(prefix, rbuild, ebuild_err).build() + self.assertDictEqual(pipeline._ext2rdr, { + bsie.extractor.generic.path.Path(): bsie.reader.path.Path()}) + + # fail to load reader + with self.assertLogs(logging.getLogger('bsie.lib.builder'), logging.ERROR): + # switch reader of an extractor + old_reader = bsie.extractor.generic.path.Path.CONTENT_READER + bsie.extractor.generic.path.Path.CONTENT_READER = 'bsie.reader.foo.Foo' + # build pipeline with invalid reader reference + pipeline = PipelineBuilder(prefix, rbuild, ebuild).build() + self.assertDictEqual(pipeline._ext2rdr, { + bsie.extractor.generic.stat.Stat(): bsie.reader.stat.Stat(), + bsie.extractor.generic.constant.Constant(c_schema, c_tuples): None, + }) + # switch back + bsie.extractor.generic.path.Path.CONTENT_READER = old_reader + + # fail to build reader + rbuild_err = ReaderBuilder({'bsie.reader.stat.Stat': dict(foo=123)}) + with self.assertLogs(logging.getLogger('bsie.lib.builder'), logging.ERROR): + pipeline = PipelineBuilder(prefix, rbuild_err, ebuild).build() + self.assertDictEqual(pipeline._ext2rdr, { + bsie.extractor.generic.path.Path(): bsie.reader.path.Path(), + bsie.extractor.generic.constant.Constant(c_schema, c_tuples): None, + }) + + +## main ## + +if __name__ == '__main__': + unittest.main() + +## EOF ## diff --git a/test/tools/test_pipeline.py b/test/lib/test_pipeline.py index a116a30..8fecc74 100644 --- a/test/tools/test_pipeline.py +++ b/test/lib/test_pipeline.py @@ -4,14 +4,13 @@ Part of the bsie test suite. A copy of the license is provided with the project. Author: Matthias Baumgartner, 2022 """ -# imports +# standard imports import logging import os import unittest # bsie imports -from bsie.base import errors -from bsie.utils import bsfs, node, ns +from bsie.utils import bsfs, errors, node, ns import bsie.extractor.generic.constant import bsie.extractor.generic.path import bsie.extractor.generic.stat @@ -19,7 +18,7 @@ import bsie.reader.path import bsie.reader.stat # objects to test -from bsie.tools.pipeline import Pipeline +from bsie.lib.pipeline import Pipeline ## code ## @@ -75,7 +74,7 @@ class TestPipeline(unittest.TestCase): # equivalence respects schema p2 = Pipeline(self.prefix, self.ext2rdr) - p2._schema = pipeline.schema.Empty() + p2._schema = bsfs.schema.Schema() self.assertNotEqual(pipeline, p2) self.assertNotEqual(hash(pipeline), hash(p2)) @@ -101,7 +100,7 @@ class TestPipeline(unittest.TestCase): p_author = pipeline.schema.predicate(ns.bse.author) p_rating = pipeline.schema.predicate(ns.bse.rating) entity = pipeline.schema.node(ns.bsfs.File) - p_invalid = pipeline.schema.predicate(ns.bsfs.Predicate).get_child(ns.bse.foo, range=entity) + p_invalid = pipeline.schema.predicate(ns.bsfs.Predicate).child(ns.bse.foo, range=entity) # extract given predicates self.assertSetEqual(set(pipeline(testfile, {p_filename, p_filesize})), { @@ -140,7 +139,7 @@ class TestPipeline(unittest.TestCase): raise errors.ReaderError('reader error') pipeline = Pipeline(self.prefix, {bsie.extractor.generic.path.Path(): FaultyReader()}) - with self.assertLogs(logging.getLogger('bsie.tools.pipeline'), logging.ERROR): + with self.assertLogs(logging.getLogger('bsie.lib.pipeline'), logging.ERROR): testfile = os.path.join(os.path.dirname(__file__), 'testfile.t') p_filename = pipeline.schema.predicate(ns.bse.filename) self.assertSetEqual(set(pipeline(testfile, {p_filename})), set()) @@ -151,7 +150,7 @@ class TestPipeline(unittest.TestCase): raise errors.ExtractorError('extractor error') pipeline = Pipeline(self.prefix, {FaultyExtractor(): bsie.reader.path.Path()}) - with self.assertLogs(logging.getLogger('bsie.tools.pipeline'), logging.ERROR): + with self.assertLogs(logging.getLogger('bsie.lib.pipeline'), logging.ERROR): testfile = os.path.join(os.path.dirname(__file__), 'testfile.t') p_filename = pipeline.schema.predicate(ns.bse.filename) self.assertSetEqual(set(pipeline(testfile, {p_filename})), set()) diff --git a/test/tools/__init__.py b/test/reader/image/__init__.py index e69de29..e69de29 100644 --- a/test/tools/__init__.py +++ b/test/reader/image/__init__.py diff --git a/test/reader/image/load_nef.py b/test/reader/image/load_nef.py new file mode 100644 index 0000000..5ba0adc --- /dev/null +++ b/test/reader/image/load_nef.py @@ -0,0 +1,28 @@ +""" + +Part of the bsie test suite. +A copy of the license is provided with the project. +Author: Matthias Baumgartner, 2022 +""" +# standard imports +import os + +# external imports +import requests + +# constants +IMAGE_URL = 'http://igsor.net/eik7AhvohghaeN5.nef' + +## code ## + +def get(): + """Download a raw test image.""" + target = os.path.join(os.path.dirname(__file__), 'testimage.nef') + if not os.path.exists(target): + with open(target, 'wb') as ofile: + ans = requests.get(IMAGE_URL) + ofile.write(ans.content) + + + +## EOF ## diff --git a/test/reader/image/test_image.py b/test/reader/image/test_image.py new file mode 100644 index 0000000..26f6a93 --- /dev/null +++ b/test/reader/image/test_image.py @@ -0,0 +1,54 @@ +""" + +Part of the bsie test suite. +A copy of the license is provided with the project. +Author: Matthias Baumgartner, 2022 +""" +# standard imports +import importlib +import os +import unittest + +# bsie imports +from bsie.utils import errors + +# objects to test +from bsie.reader.image import Image + + +## code ## + +class TestImage(unittest.TestCase): + def setUp(self): + if __package__ is None or __package__ == '': # direct call or local discovery + importlib.import_module('load_nef', __package__).get() + else: # parent discovery + importlib.import_module('.load_nef', __package__).get() + + def test_construct(self): + image = Image({}) + self.assertIsInstance(image, Image) + self.assertEqual(len(image._children), 2) + + def test_call(self): + image = Image({}) + # call returns raw image + img = image(os.path.join(os.path.dirname(__file__), 'testimage.nef')) + self.assertEqual(img.size, (6016, 4016)) # FIXME: change when image was replaced + img.close() + # call returns jpeg image + img = image(os.path.join(os.path.dirname(__file__), 'testimage.jpg')) + self.assertEqual(img.size, (1, 1)) + img.close() + # call raises error if file cannot be read + self.assertRaises(errors.ReaderError, image, + os.path.join(os.path.dirname(__file__), 'invalid.nef')) + self.assertRaises(errors.ReaderError, image, + os.path.join(os.path.dirname(__file__), 'invalid.jpg')) + +## main ## + +if __name__ == '__main__': + unittest.main() + +## EOF ## diff --git a/test/reader/image/test_pillow.py b/test/reader/image/test_pillow.py new file mode 100644 index 0000000..8abf5c1 --- /dev/null +++ b/test/reader/image/test_pillow.py @@ -0,0 +1,44 @@ +""" + +Part of the bsie test suite. +A copy of the license is provided with the project. +Author: Matthias Baumgartner, 2022 +""" +# standard imports +import os +import unittest + +# external imports +import PIL.Image + +# bsie imports +from bsie.utils import errors + +# objects to test +from bsie.reader.image._pillow import PillowImage + + +## code ## + +class TestPillowImage(unittest.TestCase): + def test_call(self): + rdr = PillowImage() + # returns PIL image + img = rdr(os.path.join(os.path.dirname(__file__), 'testimage.jpg')) + self.assertEqual(img.size, (1, 1)) + self.assertEqual(img.getdata().getpixel((0, 0)), (0, 0, 0)) + img.close() + # raises exception when image cannot be read + self.assertRaises(errors.ReaderError, rdr, + os.path.join(os.path.dirname(__file__), 'invalid.jpg')) + # NOTE: PIL can actually read raw image files (returns the thumbnail) + #self.assertRaises(errors.ReaderError, rdr, + # os.path.join(os.path.dirname(__file__), 'testimage.nef')) + + +## main ## + +if __name__ == '__main__': + unittest.main() + +## EOF ## diff --git a/test/reader/image/test_raw_image.py b/test/reader/image/test_raw_image.py new file mode 100644 index 0000000..ba21b5a --- /dev/null +++ b/test/reader/image/test_raw_image.py @@ -0,0 +1,53 @@ +""" + +Part of the bsie test suite. +A copy of the license is provided with the project. +Author: Matthias Baumgartner, 2022 +""" +# standard imports +import importlib +import os +import unittest + +# external imports +import PIL.Image + +# bsie imports +from bsie.utils import errors + +# objects to test +from bsie.reader.image._raw import RawImage + + +## code ## + +class TestRawImage(unittest.TestCase): + def setUp(self): + if __package__ is None or __package__ == '': # direct call or local discovery + importlib.import_module('load_nef', __package__).get() + else: # parent discovery + importlib.import_module('.load_nef', __package__).get() + + def test_call(self): + rdr = RawImage() + # returns PIL image + img = rdr(os.path.join(os.path.dirname(__file__), 'testimage.nef')) + self.assertEqual(img.size, (6016, 4016)) # FIXME: change when image was replaced + #self.assertEqual(img.size, (1, 1)) + #self.assertEqual(img.getdata().getpixel((0, 0)), (0, 0, 0)) + img.close() + # raises exception when image cannot be read + self.assertRaises(errors.ReaderError, rdr, + os.path.join(os.path.dirname(__file__), 'invalid.nef')) + self.assertRaises(errors.ReaderError, rdr, + os.path.join(os.path.dirname(__file__), 'testimage.jpg')) + + + + +## main ## + +if __name__ == '__main__': + unittest.main() + +## EOF ## diff --git a/test/reader/image/testimage.jpg b/test/reader/image/testimage.jpg Binary files differnew file mode 100644 index 0000000..ea7af63 --- /dev/null +++ b/test/reader/image/testimage.jpg diff --git a/test/base/test_reader.py b/test/reader/test_base.py index a907eb9..41f4c29 100644 --- a/test/base/test_reader.py +++ b/test/reader/test_base.py @@ -4,16 +4,16 @@ Part of the bsie test suite. A copy of the license is provided with the project. Author: Matthias Baumgartner, 2022 """ -# imports +# standard imports import unittest # objects to test -from bsie import base +from bsie.reader import Reader ## code ## -class StubReader(base.Reader): +class StubReader(Reader): def __call__(self, path): raise NotImplementedError() diff --git a/test/reader/test_builder.py b/test/reader/test_builder.py new file mode 100644 index 0000000..92e9edc --- /dev/null +++ b/test/reader/test_builder.py @@ -0,0 +1,54 @@ +""" + +Part of the bsie test suite. +A copy of the license is provided with the project. +Author: Matthias Baumgartner, 2022 +""" +# standard imports +import unittest + +# bsie imports +from bsie.utils import errors + +# objects to test +from bsie.reader import ReaderBuilder + + +## code ## + +class TestReaderBuilder(unittest.TestCase): + def test_build(self): + builder = ReaderBuilder({'bsie.reader.path.Path': {}}) + # build configured reader + cls = builder.build('bsie.reader.path.Path') + import bsie.reader.path + self.assertIsInstance(cls, bsie.reader.path.Path) + # build unconfigured reader + cls = builder.build('bsie.reader.stat.Stat') + import bsie.reader.stat + self.assertIsInstance(cls, bsie.reader.stat.Stat) + # re-build previous reader (test cache) + self.assertEqual(cls, builder.build('bsie.reader.stat.Stat')) + # test invalid + self.assertRaises(TypeError, builder.build, 123) + self.assertRaises(TypeError, builder.build, None) + self.assertRaises(ValueError, builder.build, '') + self.assertRaises(ValueError, builder.build, 'Path') + self.assertRaises(errors.BuilderError, builder.build, 'path.Path') + # invalid config + builder = ReaderBuilder({'bsie.reader.stat.Stat': dict(foo=123)}) + self.assertRaises(errors.BuilderError, builder.build, 'bsie.reader.stat.Stat') + builder = ReaderBuilder({'bsie.reader.stat.Stat': 123}) + self.assertRaises(TypeError, builder.build, 'bsie.reader.stat.Stat') + # no instructions + builder = ReaderBuilder({}) + cls = builder.build('bsie.reader.stat.Stat') + self.assertIsInstance(cls, bsie.reader.stat.Stat) + + +## main ## + +if __name__ == '__main__': + unittest.main() + +## EOF ## diff --git a/test/reader/test_chain.py b/test/reader/test_chain.py new file mode 100644 index 0000000..901faa1 --- /dev/null +++ b/test/reader/test_chain.py @@ -0,0 +1,85 @@ +""" + +Part of the bsie test suite. +A copy of the license is provided with the project. +Author: Matthias Baumgartner, 2022 +""" +# standard imports +import logging +import os +import unittest + +# bsie imports +from bsie.utils import errors +import bsie.reader.path +import bsie.reader.stat + +# objects to test +from bsie.reader.chain import ReaderChain + + +## code ## + +class TestReaderChain(unittest.TestCase): + def test_construct(self): + # subreaders are built + chain = ReaderChain(['bsie.reader.stat.Stat', 'bsie.reader.path.Path'], {}) + self.assertIsInstance(chain, ReaderChain) + self.assertEqual(chain._children, + (bsie.reader.stat.Stat(), bsie.reader.path.Path())) + # subreaders that failed to build are omitted + with self.assertLogs(logging.getLogger('bsie.reader.chain'), logging.WARNING): + chain = ReaderChain(['bsie.reader.stat.Stat', 'bsie.reader.invalid.Invalid'], {}) + self.assertEqual(chain._children, (bsie.reader.stat.Stat(), )) + with self.assertLogs(logging.getLogger('bsie.reader.chain'), logging.WARNING): + chain = ReaderChain(['bsie.reader.stat.Stat', 'bsie.reader.path.Invalid'], {}) + self.assertEqual(chain._children, (bsie.reader.stat.Stat(), )) + # warning is issued if there are no subreaders + with self.assertLogs(logging.getLogger('bsie.reader.chain'), logging.WARNING): + chain = ReaderChain([], {}) + self.assertEqual(chain._children, tuple()) + + def test_essentials(self): + chain = ReaderChain(['bsie.reader.stat.Stat', 'bsie.reader.path.Path'], {}) + # identity + self.assertEqual(chain, chain) + self.assertEqual(hash(chain), hash(chain)) + # comparison works across instances + self.assertEqual(chain, + ReaderChain(['bsie.reader.stat.Stat', 'bsie.reader.path.Path'], {})) + self.assertEqual(hash(chain), + hash(ReaderChain(['bsie.reader.stat.Stat', 'bsie.reader.path.Path'], {}))) + # comparison respects subreaders + self.assertNotEqual(hash(chain), + hash(ReaderChain(['bsie.reader.path.Path'], {}))) + self.assertNotEqual(hash(chain), + hash(ReaderChain(['bsie.reader.path.Path'], {}))) + # comparison respects subreader order + self.assertNotEqual(chain, + ReaderChain(['bsie.reader.path.Path', 'bsie.reader.stat.Stat'], {})) + self.assertNotEqual(hash(chain), + hash(ReaderChain(['bsie.reader.path.Path', 'bsie.reader.stat.Stat'], {}))) + # string representation + self.assertEqual(str(chain), 'ReaderChain(Stat, Path)') + self.assertEqual(repr(chain), 'ReaderChain((Stat(), Path()))') + + def test_call(self): + chain = ReaderChain(['bsie.reader.stat.Stat', 'bsie.reader.path.Path'], {}) + # chain first probes first child + self.assertEqual(chain(__file__), os.stat(__file__)) + # chain probes second child if first one failes + self.assertEqual(chain(''), '') + self.assertEqual(chain('missing-file'), 'missing-file') + + # chain raises a ReaderError if childs were exhausted + chain = ReaderChain(['bsie.reader.stat.Stat'], {}) + # chain probes second child if first one failes + self.assertRaises(errors.ReaderError, chain, '') + + +## main ## + +if __name__ == '__main__': + unittest.main() + +## EOF ## diff --git a/test/reader/test_path.py b/test/reader/test_path.py index fd7bc5a..95e447f 100644 --- a/test/reader/test_path.py +++ b/test/reader/test_path.py @@ -4,7 +4,7 @@ Part of the bsie test suite. A copy of the license is provided with the project. Author: Matthias Baumgartner, 2022 """ -# imports +# standard imports import unittest # objects to test diff --git a/test/reader/test_stat.py b/test/reader/test_stat.py index d12ad9c..fd9fdcd 100644 --- a/test/reader/test_stat.py +++ b/test/reader/test_stat.py @@ -4,12 +4,12 @@ Part of the bsie test suite. A copy of the license is provided with the project. Author: Matthias Baumgartner, 2022 """ -# imports +# standard imports import os import unittest # bsie imports -from bsie.base import errors +from bsie.utils import errors # objects to test from bsie.reader.stat import Stat diff --git a/test/tools/test_builder.py b/test/tools/test_builder.py deleted file mode 100644 index 62c637c..0000000 --- a/test/tools/test_builder.py +++ /dev/null @@ -1,246 +0,0 @@ -""" - -Part of the bsie test suite. -A copy of the license is provided with the project. -Author: Matthias Baumgartner, 2022 -""" -# imports -import logging -import unittest - -# bsie imports -from bsie import base -from bsie.utils import bsfs - -# objects to test -from bsie.tools.builder import ExtractorBuilder -from bsie.tools.builder import PipelineBuilder -from bsie.tools.builder import ReaderBuilder -from bsie.tools.builder import _safe_load -from bsie.tools.builder import _unpack_name - - -## code ## - -class TestUtils(unittest.TestCase): - def test_safe_load(self): - # invalid module - self.assertRaises(base.errors.LoaderError, _safe_load, 'dBGHMSAYOoKeKMpywDoKZQycENFPvN', 'foobar') - self.assertRaises(base.errors.LoaderError, _safe_load, 'dBGHMSAYOoKeKMpywDoKZQycENFPvN.bar', 'foobar') - # partially valid module - self.assertRaises(base.errors.LoaderError, _safe_load, 'os.foo', 'foobar') - # invalid class - self.assertRaises(base.errors.LoaderError, _safe_load, 'os.path', 'foo') - # valid module and class - cls = _safe_load('collections.abc', 'Container') - import collections.abc - self.assertEqual(cls, collections.abc.Container) - - def test_unpack_name(self): - self.assertRaises(TypeError, _unpack_name, 123) - self.assertRaises(TypeError, _unpack_name, None) - self.assertRaises(ValueError, _unpack_name, '') - self.assertRaises(ValueError, _unpack_name, 'path') - self.assertRaises(ValueError, _unpack_name, '.Path') - self.assertEqual(_unpack_name('path.Path'), ('path', 'Path')) - self.assertEqual(_unpack_name('path.foo.bar.Path'), ('path.foo.bar', 'Path')) - - -class TestReaderBuilder(unittest.TestCase): - def test_build(self): - builder = ReaderBuilder({'bsie.reader.path.Path': {}}) - # build configured reader - cls = builder.build('bsie.reader.path.Path') - import bsie.reader.path - self.assertIsInstance(cls, bsie.reader.path.Path) - # build unconfigured reader - cls = builder.build('bsie.reader.stat.Stat') - import bsie.reader.stat - self.assertIsInstance(cls, bsie.reader.stat.Stat) - # re-build previous reader (test cache) - self.assertEqual(cls, builder.build('bsie.reader.stat.Stat')) - # test invalid - self.assertRaises(TypeError, builder.build, 123) - self.assertRaises(TypeError, builder.build, None) - self.assertRaises(ValueError, builder.build, '') - self.assertRaises(ValueError, builder.build, 'Path') - self.assertRaises(base.errors.BuilderError, builder.build, 'path.Path') - # invalid config - builder = ReaderBuilder({'bsie.reader.stat.Stat': dict(foo=123)}) - self.assertRaises(base.errors.BuilderError, builder.build, 'bsie.reader.stat.Stat') - builder = ReaderBuilder({'bsie.reader.stat.Stat': 123}) - self.assertRaises(TypeError, builder.build, 'bsie.reader.stat.Stat') - # no instructions - builder = ReaderBuilder({}) - cls = builder.build('bsie.reader.stat.Stat') - self.assertIsInstance(cls, bsie.reader.stat.Stat) - - - -class TestExtractorBuilder(unittest.TestCase): - def test_iter(self): - # no specifications - self.assertListEqual(list(ExtractorBuilder([])), []) - # some specifications - builder = ExtractorBuilder([ - {'bsie.extractor.generic.path.Path': {}}, - {'bsie.extractor.generic.stat.Stat': {}}, - {'bsie.extractor.generic.path.Path': {}}, - ]) - self.assertListEqual(list(builder), [0, 1, 2]) - - def test_build(self): - # simple and repeated extractors - builder = ExtractorBuilder([ - {'bsie.extractor.generic.path.Path': {}}, - {'bsie.extractor.generic.stat.Stat': {}}, - {'bsie.extractor.generic.path.Path': {}}, - ]) - ext = [builder.build(0), builder.build(1), builder.build(2)] - import bsie.extractor.generic.path - import bsie.extractor.generic.stat - self.assertListEqual(ext, [ - bsie.extractor.generic.path.Path(), - bsie.extractor.generic.stat.Stat(), - bsie.extractor.generic.path.Path(), - ]) - # out-of-bounds raises KeyError - self.assertRaises(IndexError, builder.build, 3) - - # building with args - builder = ExtractorBuilder([ - {'bsie.extractor.generic.constant.Constant': { - 'schema': ''' - bse:author rdfs:subClassOf bsfs:Predicate ; - rdfs:domain bsfs:Entity ; - rdfs:range xsd:string ; - bsfs:unique "true"^^xsd:boolean . - bse:rating rdfs:subClassOf bsfs:Predicate ; - rdfs:domain bsfs:Entity ; - rdfs:range xsd:integer ; - bsfs:unique "true"^^xsd:boolean . - ''', - 'tuples': [ - ('http://bsfs.ai/schema/Entity#author', 'Me, myself, and I'), - ('http://bsfs.ai/schema/Entity#rating', 123), - ], - }}]) - obj = builder.build(0) - import bsie.extractor.generic.constant - self.assertEqual(obj, bsie.extractor.generic.constant.Constant(''' - bse:author rdfs:subClassOf bsfs:Predicate ; - rdfs:domain bsfs:Entity ; - rdfs:range xsd:string ; - bsfs:unique "true"^^xsd:boolean . - bse:rating rdfs:subClassOf bsfs:Predicate ; - rdfs:domain bsfs:Entity ; - rdfs:range xsd:integer ; - bsfs:unique "true"^^xsd:boolean . - ''', [ - ('http://bsfs.ai/schema/Entity#author', 'Me, myself, and I'), - ('http://bsfs.ai/schema/Entity#rating', 123), - ])) - - # building with invalid args - self.assertRaises(base.errors.BuilderError, ExtractorBuilder( - [{'bsie.extractor.generic.path.Path': {'foo': 123}}]).build, 0) - # non-dict build specification - self.assertRaises(TypeError, ExtractorBuilder( - [('bsie.extractor.generic.path.Path', {})]).build, 0) - # multiple keys per build specification - self.assertRaises(TypeError, ExtractorBuilder( - [{'bsie.extractor.generic.path.Path': {}, - 'bsie.extractor.generic.stat.Stat': {}}]).build, 0) - # non-dict value for kwargs - self.assertRaises(TypeError, ExtractorBuilder( - [{'bsie.extractor.generic.path.Path': 123}]).build, 0) - - - - -class TestPipelineBuilder(unittest.TestCase): - def test_build(self): - prefix = bsfs.URI('http://example.com/local/file#') - c_schema = ''' - bse:author rdfs:subClassOf bsfs:Predicate ; - rdfs:domain bsfs:Entity ; - rdfs:range xsd:string ; - bsfs:unique "true"^^xsd:boolean . - ''' - c_tuples = [('http://bsfs.ai/schema/Entity#author', 'Me, myself, and I')] - # prepare builders - rbuild = ReaderBuilder({}) - ebuild = ExtractorBuilder([ - {'bsie.extractor.generic.path.Path': {}}, - {'bsie.extractor.generic.stat.Stat': {}}, - {'bsie.extractor.generic.constant.Constant': dict( - schema=c_schema, - tuples=c_tuples, - )}, - ]) - # build pipeline - builder = PipelineBuilder(prefix, rbuild, ebuild) - pipeline = builder.build() - # delayed import - import bsie.reader.path - import bsie.reader.stat - import bsie.extractor.generic.path - import bsie.extractor.generic.stat - import bsie.extractor.generic.constant - # check pipeline - self.assertDictEqual(pipeline._ext2rdr, { - bsie.extractor.generic.path.Path(): bsie.reader.path.Path(), - bsie.extractor.generic.stat.Stat(): bsie.reader.stat.Stat(), - bsie.extractor.generic.constant.Constant(c_schema, c_tuples): None, - }) - - # fail to load extractor - ebuild_err = ExtractorBuilder([ - {'bsie.extractor.generic.foo.Foo': {}}, - {'bsie.extractor.generic.path.Path': {}}, - ]) - with self.assertLogs(logging.getLogger('bsie.tools.builder'), logging.ERROR): - pipeline = PipelineBuilder(prefix, rbuild, ebuild_err).build() - self.assertDictEqual(pipeline._ext2rdr, { - bsie.extractor.generic.path.Path(): bsie.reader.path.Path()}) - - # fail to build extractor - ebuild_err = ExtractorBuilder([ - {'bsie.extractor.generic.path.Path': {'foo': 123}}, - {'bsie.extractor.generic.path.Path': {}}, - ]) - with self.assertLogs(logging.getLogger('bsie.tools.builder'), logging.ERROR): - pipeline = PipelineBuilder(prefix, rbuild, ebuild_err).build() - self.assertDictEqual(pipeline._ext2rdr, { - bsie.extractor.generic.path.Path(): bsie.reader.path.Path()}) - - # fail to load reader - with self.assertLogs(logging.getLogger('bsie.tools.builder'), logging.ERROR): - # switch reader of an extractor - old_reader = bsie.extractor.generic.path.Path.CONTENT_READER - bsie.extractor.generic.path.Path.CONTENT_READER = 'bsie.reader.foo.Foo' - # build pipeline with invalid reader reference - pipeline = PipelineBuilder(prefix, rbuild, ebuild).build() - self.assertDictEqual(pipeline._ext2rdr, { - bsie.extractor.generic.stat.Stat(): bsie.reader.stat.Stat(), - bsie.extractor.generic.constant.Constant(c_schema, c_tuples): None, - }) - # switch back - bsie.extractor.generic.path.Path.CONTENT_READER = old_reader - - # fail to build reader - rbuild_err = ReaderBuilder({'bsie.reader.stat.Stat': dict(foo=123)}) - with self.assertLogs(logging.getLogger('bsie.tools.builder'), logging.ERROR): - pipeline = PipelineBuilder(prefix, rbuild_err, ebuild).build() - self.assertDictEqual(pipeline._ext2rdr, { - bsie.extractor.generic.path.Path(): bsie.reader.path.Path(), - bsie.extractor.generic.constant.Constant(c_schema, c_tuples): None, - }) - - -## main ## - -if __name__ == '__main__': - unittest.main() - -## EOF ## diff --git a/test/tools/testfile.t b/test/tools/testfile.t deleted file mode 100644 index 3b18e51..0000000 --- a/test/tools/testfile.t +++ /dev/null @@ -1 +0,0 @@ -hello world diff --git a/test/utils/filematcher/__init__.py b/test/utils/filematcher/__init__.py new file mode 100644 index 0000000..e69de29 --- /dev/null +++ b/test/utils/filematcher/__init__.py diff --git a/test/utils/filematcher/empty b/test/utils/filematcher/empty new file mode 100644 index 0000000..e69de29 --- /dev/null +++ b/test/utils/filematcher/empty diff --git a/test/utils/filematcher/test_matcher.py b/test/utils/filematcher/test_matcher.py new file mode 100644 index 0000000..c3cccee --- /dev/null +++ b/test/utils/filematcher/test_matcher.py @@ -0,0 +1,232 @@ +""" + +Part of the bsie test suite. +A copy of the license is provided with the project. +Author: Matthias Baumgartner, 2022 +""" +# standard imports +import os +import stat +import tempfile +import unittest + +# objects to test +from bsie.utils.filematcher import matcher + + +## code ## + +class FakeMatcher(matcher.Matcher): + def __call__(self, *args, **kwargs): + pass + +class FakeCriterion(matcher.Criterion): + def __call__(self, *args, **kwargs): + pass + +class FakeAggregate(matcher.Aggregate): + def __call__(self, *args, **kwargs): + pass + +class TestMatcher(unittest.TestCase): + def setUp(self): + # paths + self.image = os.path.join(os.path.dirname(__file__), 'testimage.jpg') + self.text= os.path.join(os.path.dirname(__file__), 'textfile.t') + self.empty = os.path.join(os.path.dirname(__file__), 'empty') + self.missing = os.path.join(os.path.dirname(__file__), 'missing.jpg') + + def test_matcher_skeleton(self): + # node: iteration and length + self.assertSetEqual(set(iter(FakeMatcher(1,2,3))), {1,2,3}) + self.assertSetEqual(set(iter(FakeMatcher([1,2,3]))), {1,2,3}) + self.assertEqual(len(FakeMatcher([1,2,3])), 3) + self.assertEqual(len(FakeMatcher(1,2,3)), 3) + self.assertEqual(len(FakeMatcher()), 0) + self.assertIn(1, FakeMatcher(1,2,3)) + self.assertIn(3, FakeMatcher([1,2,3])) + self.assertNotIn(0, FakeMatcher(1,2,3)) + self.assertNotIn(4, FakeMatcher([1,2,3])) + # node: comparison + self.assertEqual(FakeMatcher([1,2,3]), FakeMatcher([1,2,3])) + self.assertEqual(FakeMatcher(1,2,3), FakeMatcher(1,2,3)) + self.assertEqual(FakeMatcher(1,2,3), FakeMatcher([1,2,3])) + self.assertEqual(FakeMatcher(1,2,3), FakeMatcher((1,2,3))) + self.assertNotEqual(FakeMatcher(1,2,3), FakeMatcher(1,2,4)) + self.assertNotEqual(FakeMatcher(1,2,3), FakeMatcher(1,2,3,4)) + self.assertNotEqual(FakeMatcher(1,2,3), FakeMatcher(1,2)) + self.assertEqual(hash(FakeMatcher([1,2,3])), hash(FakeMatcher([1,2,3]))) + self.assertEqual(hash(FakeMatcher(1,2,3)), hash(FakeMatcher(1,2,3))) + self.assertEqual(hash(FakeMatcher(1,2,3)), hash(FakeMatcher([1,2,3]))) + self.assertEqual(hash(FakeMatcher(1,2,3)), hash(FakeMatcher((1,2,3)))) + # node: representation + self.assertEqual(repr(FakeMatcher(1,2,3)), 'FakeMatcher({1, 2, 3})') + + # criterion + self.assertEqual(repr(FakeCriterion(1,2,3)), 'FakeCriterion({1, 2, 3})') + self.assertEqual(hash(FakeCriterion(1,2,3)), hash(FakeCriterion(1,2,3))) + self.assertEqual(FakeCriterion(1,2,3), FakeCriterion([1,2,3])) + self.assertNotEqual(FakeCriterion(1,2,3), FakeCriterion(1,2)) + self.assertNotEqual(FakeCriterion(1,2,3), FakeMatcher(1,2,3)) + self.assertSetEqual(FakeCriterion(1,2,3).accepted(), {1,2,3}) + + # aggregate + self.assertEqual(repr(FakeAggregate(1,2,3)), 'FakeAggregate({1, 2, 3})') + self.assertNotEqual(FakeAggregate(1,2,3), FakeMatcher(1,2,3)) + + def test_any(self): + self.assertTrue(matcher.Any()(self.image)) + self.assertTrue(matcher.Any()(self.text)) + self.assertTrue(matcher.Any()(self.missing)) + self.assertTrue(matcher.Any()(self.empty)) + + def test_nothing(self): + self.assertFalse(matcher.Nothing()(self.image)) + self.assertFalse(matcher.Nothing()(self.text)) + self.assertFalse(matcher.Nothing()(self.missing)) + self.assertFalse(matcher.Nothing()(self.empty)) + + def test_exists(self): + self.assertTrue(matcher.Exists()(self.image)) + self.assertTrue(matcher.Exists()(self.text)) + self.assertTrue(matcher.Exists()(self.empty)) + self.assertFalse(matcher.Exists()(self.missing)) + + def test_isfile(self): + self.assertTrue(matcher.IsFile()(self.image)) + self.assertTrue(matcher.IsFile()(self.text)) + self.assertFalse(matcher.IsFile()(self.missing)) + self.assertFalse(matcher.IsFile()(os.path.dirname(self.image))) + + def test_isdir(self): + self.assertTrue(matcher.IsDir()(os.path.dirname(self.image))) + self.assertFalse(matcher.IsDir()(self.image)) + self.assertFalse(matcher.IsDir()(self.text)) + self.assertFalse(matcher.IsDir()(self.missing)) + + def test_islink(self): + self.assertFalse(matcher.IsLink()(os.path.dirname(self.image))) + self.assertFalse(matcher.IsLink()(self.image)) + self.assertFalse(matcher.IsLink()(self.text)) + _, temp = tempfile.mkstemp(prefix='bsie-test-') + templink = temp + '-link' + os.symlink(temp, templink) + self.assertTrue(matcher.IsLink()(templink)) + os.unlink(templink) + os.unlink(temp) + + def test_isabs(self): + self.assertTrue(matcher.IsAbs()(os.path.abspath(self.image))) + self.assertTrue(matcher.IsAbs()(os.path.abspath(self.text))) + self.assertFalse(matcher.IsAbs()(os.path.relpath(self.text, os.path.dirname(self.text)))) + + def test_isrel(self): + self.assertFalse(matcher.IsRel()(os.path.abspath(self.image))) + self.assertFalse(matcher.IsRel()(os.path.abspath(self.text))) + self.assertTrue(matcher.IsRel()(os.path.relpath(self.text, os.path.dirname(self.text)))) + self.assertTrue(matcher.IsRel()(os.path.basename(self.text))) + + def test_ismount(self): + self.assertFalse(matcher.IsMount()(self.image)) + self.assertFalse(matcher.IsMount()(self.text)) + self.assertFalse(matcher.IsMount()(self.missing)) + # there's no reasonable way to test a positive case + + def test_isempty(self): + self.assertTrue(matcher.IsEmpty()(self.empty)) + self.assertFalse(matcher.IsEmpty()(self.image)) + self.assertFalse(matcher.IsEmpty()(self.text)) + self.assertFalse(matcher.IsEmpty()(self.missing)) + + def test_isreadable(self): + self.assertTrue(matcher.IsReadable()(self.empty)) + self.assertTrue(matcher.IsReadable()(self.image)) + self.assertFalse(matcher.IsReadable()(self.missing)) + _, temp = tempfile.mkstemp(prefix='bsie-test-') + os.chmod(temp, 0) + self.assertFalse(matcher.IsReadable()(temp)) + os.unlink(temp) + + def test_iswritable(self): + self.assertTrue(matcher.IsWritable()(self.empty)) + self.assertTrue(matcher.IsWritable()(self.image)) + self.assertFalse(matcher.IsWritable()(self.missing)) + _, temp = tempfile.mkstemp(prefix='bsie-test-') + os.chmod(temp, 0) + self.assertFalse(matcher.IsWritable()(temp)) + os.unlink(temp) + + def test_isexecutable(self): + self.assertFalse(matcher.IsExecutable()(self.empty)) + self.assertFalse(matcher.IsExecutable()(self.image)) + self.assertFalse(matcher.IsExecutable()(self.missing)) + _, temp = tempfile.mkstemp(prefix='bsie-test-') + os.chmod(temp, stat.S_IEXEC) + self.assertTrue(matcher.IsExecutable()(temp)) + os.unlink(temp) + + def test_extension(self): + self.assertTrue(matcher.Extension('jpg')(self.image)) + self.assertTrue(matcher.Extension('jpg', 'png')(self.image)) + self.assertTrue(matcher.Extension('jpg', 't')(self.text)) + self.assertTrue(matcher.Extension('jpg', 'png', 't')(self.missing)) + self.assertTrue(matcher.Extension('')(self.empty)) + + self.assertFalse(matcher.Extension()(self.image)) + self.assertFalse(matcher.Extension('jpeg')(self.image)) + self.assertFalse(matcher.Extension('.t')(self.text)) + self.assertFalse(matcher.Extension('png', 't')(self.missing)) + self.assertFalse(matcher.Extension('tiff')(self.empty)) + + def test_mime(self): + self.assertTrue(matcher.Mime('image/jpeg')(self.image)) + self.assertTrue(matcher.Mime('image/tiff', 'image/jpeg')(self.image)) + self.assertTrue(matcher.Mime('text/plain', 'image/jpeg')(self.text)) + self.assertTrue(matcher.Mime('inode/x-empty')(self.empty)) + + self.assertFalse(matcher.Mime()(self.image)) + self.assertFalse(matcher.Mime('image')(self.image)) + self.assertFalse(matcher.Mime('image/tiff', 'image/png')(self.image)) + self.assertFalse(matcher.Mime('')(self.text)) + self.assertFalse(matcher.Mime('text')(self.text)) + self.assertFalse(matcher.Mime('tiff')(self.empty)) + self.assertFalse(matcher.Mime()(self.empty)) + self.assertFalse(matcher.Mime('')(self.empty)) + self.assertFalse(matcher.Mime()(self.missing)) + self.assertFalse(matcher.Mime('')(self.missing)) + self.assertFalse(matcher.Mime('inode/x-empty')(self.missing)) + + def test_not(self): + self.assertFalse(matcher.NOT(matcher.Mime('image/jpeg'))(self.image)) + self.assertTrue(matcher.NOT(matcher.Mime('text/plain'))(self.image)) + + def test_and(self): + self.assertTrue(matcher.And(matcher.Mime('image/jpeg'), matcher.Extension('jpg'))(self.image)) + self.assertTrue(matcher.And(matcher.Mime('image/jpeg'), matcher.Extension('jpg', 'tiff'))(self.image)) + self.assertTrue(matcher.And(matcher.Mime('text/plain'), matcher.Extension('t', 'tiff'))(self.text)) + + self.assertFalse(matcher.And(matcher.Mime('image/jpeg'), matcher.Extension('tiff'))(self.image)) + self.assertFalse(matcher.And(matcher.Mime('text/plain'), matcher.Extension('jpg'))(self.image)) + self.assertFalse(matcher.And(matcher.Mime('inode/x-empty'), matcher.Extension('jpg'))(self.missing)) + self.assertFalse(matcher.And(matcher.Mime('image/jpeg'), matcher.Extension('jpg', 't'))(self.text)) + + def test_or(self): + self.assertTrue(matcher.Or(matcher.Mime('image/jpeg'))(self.image)) + self.assertFalse(matcher.Or(matcher.Mime('text/plain'))(self.image)) + + self.assertTrue(matcher.Or(matcher.Mime('image/jpeg'), matcher.Extension('jpg'))(self.image)) + self.assertTrue(matcher.Or(matcher.Mime('image/jpeg'), matcher.Extension('t'))(self.image)) + self.assertTrue(matcher.Or(matcher.Mime('text/plain'), matcher.Extension('jpg', 'tiff'))(self.image)) + self.assertTrue(matcher.Or(matcher.Mime('text/plain'), matcher.Extension('tiff'))(self.text)) + self.assertTrue(matcher.Or(matcher.Mime('image/jpeg'), matcher.Extension('jpg'))(self.missing)) + + self.assertFalse(matcher.Or(matcher.Mime('text/plain'), matcher.Extension('tiff'))(self.image)) + self.assertFalse(matcher.Or(matcher.Mime('inode/x-empty'), matcher.Extension('jpg', 'tiff'))(self.text)) + + +## main ## + +if __name__ == '__main__': + unittest.main() + +## EOF ## diff --git a/test/utils/filematcher/test_parser.py b/test/utils/filematcher/test_parser.py new file mode 100644 index 0000000..c594747 --- /dev/null +++ b/test/utils/filematcher/test_parser.py @@ -0,0 +1,146 @@ +""" + +Part of the bsie test suite. +A copy of the license is provided with the project. +Author: Matthias Baumgartner, 2022 +""" +# standard imports +import unittest + +# bsie imports +from bsie.utils import errors +from bsie.utils.filematcher import matcher + +# objects to test +from bsie.utils.filematcher import parse + + +## code ## + +class TestFileMatcherParser(unittest.TestCase): + def test_empty(self): + # no criterion + self.assertEqual(parse(''), matcher.Any()) + + def test_ruleone(self): + # single criterion, single value + self.assertEqual(parse('mime=text'), matcher.Mime('text')) + self.assertEqual(parse('MIME=text'), matcher.Mime('text')) + self.assertEqual(parse('MiMe=text'), matcher.Mime('text')) + self.assertEqual(parse('MIME=TEXT'), matcher.Mime('TEXT')) + self.assertEqual(parse('mime={text}'), matcher.Mime('text')) + self.assertEqual(parse('mime=image/jpeg'), matcher.Mime('image/jpeg')) + self.assertEqual(parse('mime="image/jpeg"'), matcher.Mime('image/jpeg')) + self.assertEqual(parse('extension=pdf'), matcher.Extension('pdf')) + self.assertEqual(parse('extension={pdf}'), matcher.Extension('pdf')) + self.assertEqual(parse('extension="pdf"'), matcher.Extension('pdf')) + self.assertEqual(parse('extension="foo,bar"'), matcher.Extension('foo,bar')) + self.assertEqual(parse('extension="f{oo|ba}r"'), matcher.Extension('f{oo|ba}r')) + self.assertEqual(parse('extension=""'), matcher.Extension('')) + self.assertEqual(parse('extension="foo'), matcher.Extension('"foo')) + self.assertRaises(errors.ParserError, parse, 'extension=foo=bar') + self.assertRaises(errors.ParserError, parse, 'extension=') + self.assertRaises(errors.ParserError, parse, 'extension={}') + self.assertRaises(errors.ParserError, parse, 'extension={foo') + + # valueless + self.assertEqual(parse('any'), matcher.Any()) + self.assertEqual(parse('nothing'), matcher.Nothing()) + self.assertEqual(parse('exists'), matcher.Exists()) + self.assertEqual(parse('any, nothing'), matcher.And(matcher.Any(), matcher.Nothing())) + self.assertEqual(parse('any, nothing, exists'), + matcher.And(matcher.Any(), matcher.Nothing(), matcher.Exists())) + self.assertEqual(parse('any, extension=jpg'), matcher.And(matcher.Any(), matcher.Extension('jpg'))) + self.assertRaises(errors.ParserError, parse, 'mime') + self.assertRaises(errors.ParserError, parse, 'extension') + self.assertRaises(errors.ParserError, parse, 'exists=True') + self.assertRaises(errors.ParserError, parse, 'exists=foo') + self.assertEqual(parse('!any'), matcher.NOT(matcher.Any())) + self.assertEqual(parse('!any, nothing'), matcher.And(matcher.NOT(matcher.Any()), matcher.Nothing())) + self.assertEqual(parse('!any, extension=jpg'), + matcher.And(matcher.NOT(matcher.Any()), matcher.Extension('jpg'))) + self.assertRaises(errors.ParserError, parse, '!mime') + self.assertRaises(errors.ParserError, parse, '!extension') + + def test_rulefew(self): + # single criterion, multiple values + self.assertEqual(parse('extension={jpg, jpeg}'), matcher.Extension('jpg', 'jpeg')) + self.assertEqual(parse('mime={image/jpeg, image/png}'), + matcher.Mime('image/jpeg', 'image/png')) + self.assertRaises(errors.ParserError, parse, 'mime=image/png, image/jpeg') + self.assertRaises(errors.ParserError, parse, 'extension=jpg, jpeg') + + def test_rulesets_ruleone(self): + # mutliple criteria, single value + self.assertEqual(parse('mime=text, extension=t'), + matcher.And(matcher.Mime('text'), matcher.Extension('t'))) + self.assertEqual(parse('mime=text/plain, extension=t'), + matcher.And(matcher.Mime('text/plain'), matcher.Extension('t'))) + self.assertRaises(errors.ParserError, parse, 'mime=text/plain extension=t') + self.assertRaises(errors.ParserError, parse, 'mime={image/jpeg, extension=jpg'), + + def test_rulesets_rulefew(self): + # multiple criteria, multiple values + self.assertEqual(parse('mime=image/jpeg, extension={jpg, jpeg}'), + matcher.And(matcher.Mime('image/jpeg'), matcher.Extension('jpg', 'jpeg'))) + self.assertEqual(parse('mime={image/jpeg, image/tiff}, extension={jpg, jpeg}'), + matcher.And(matcher.Mime('image/jpeg', 'image/tiff'), matcher.Extension('jpg', 'jpeg'))) + self.assertEqual(parse('mime={image/jpeg, image/tiff}, extension=jpg'), + matcher.And(matcher.Mime('image/jpeg', 'image/tiff'), matcher.Extension('jpg'))) + self.assertRaises(errors.ParserError, parse, 'mime={image/jpeg, image/tiff, extension=jpg') + self.assertRaises(errors.ParserError, parse, 'mime=image/jpeg, image/tiff, extension=jpg') + self.assertRaises(errors.ParserError, parse, 'mime=image/jpeg, extension=jpg, ') + + def test_not(self): + self.assertEqual(parse('extension!=jpg'), matcher.NOT(matcher.Extension('jpg'))) + self.assertEqual(parse('extension!={jpg, jpeg}'), + matcher.NOT(matcher.Extension('jpg', 'jpeg'))) + self.assertEqual(parse('extension!=jpg, mime=image/jpeg'), + matcher.And(matcher.NOT(matcher.Extension('jpg')), matcher.Mime('image/jpeg'))) + self.assertEqual(parse('extension!=jpg, mime!=image/jpeg'), + matcher.And(matcher.NOT(matcher.Extension('jpg')), matcher.NOT(matcher.Mime('image/jpeg')))) + self.assertEqual(parse('extension!=jpg | mime=image/jpeg'), + matcher.Or(matcher.NOT(matcher.Extension('jpg')), matcher.Mime('image/jpeg'))) + self.assertEqual(parse('extension!=jpg | mime!=image/jpeg'), + matcher.Or(matcher.NOT(matcher.Extension('jpg')), matcher.NOT(matcher.Mime('image/jpeg')))) + + def test_expr(self): + # multiple rulesets + self.assertEqual(parse('mime=image/jpeg | extension=jpg'), + matcher.Or(matcher.Mime('image/jpeg'), matcher.Extension('jpg'))) + self.assertEqual(parse('mime=image/jpeg | extension={jpg, jpeg}'), + matcher.Or(matcher.Mime('image/jpeg'), matcher.Extension('jpg', 'jpeg'))) + self.assertEqual(parse('mime={image/jpeg, image/png} | extension={jpg, jpeg}'), + matcher.Or(matcher.Mime('image/jpeg', 'image/png'), matcher.Extension('jpg', 'jpeg'))) + self.assertEqual(parse('mime=image/jpeg , extension=jpg | extension=jpg'), + matcher.Or(matcher.And(matcher.Mime('image/jpeg'), matcher.Extension('jpg')), matcher.Extension('jpg'))) + self.assertEqual(parse( + 'mime={jpeg, text}, extension={jpg,t} | extension={png,txt}, mime={png, tiff}'), + matcher.Or( + matcher.And(matcher.Mime('jpeg', 'text'), matcher.Extension('jpg', 't')), + matcher.And(matcher.Extension('png', 'txt'), matcher.Mime('png', 'tiff')))) + self.assertEqual(parse('mime=text | extension=jpg | extension=png | mime=png'), + matcher.Or(matcher.Mime('text'), matcher.Extension('jpg'), matcher.Extension('png'), matcher.Mime('png'))) + self.assertRaises(errors.ParserError, parse, 'mime=text |') + self.assertRaises(errors.ParserError, parse, '| mime=text') + self.assertRaises(errors.ParserError, parse, 'extension=png | mime=text, ') + + def test_invalid(self): + # Invalid parses + self.assertRaises(errors.ParserError, parse, "extension=") # Empty value + self.assertRaises(errors.ParserError, parse, "mime=foo,bar") # Escaping + self.assertRaises(errors.ParserError, parse, "mime='foo,bar") # Quoting + self.assertRaises(errors.ParserError, parse, "mime=\"foo,bar") # Quoting + + # Invalid input + self.assertRaises(AttributeError, parse, None) + self.assertRaises(AttributeError, parse, 123) + self.assertRaises(AttributeError, parse, [123,321]) + + +## main ## + +if __name__ == '__main__': + unittest.main() + +## EOF ## diff --git a/test/utils/filematcher/testimage.jpg b/test/utils/filematcher/testimage.jpg Binary files differnew file mode 100644 index 0000000..ea7af63 --- /dev/null +++ b/test/utils/filematcher/testimage.jpg diff --git a/test/utils/filematcher/textfile.t b/test/utils/filematcher/textfile.t new file mode 100644 index 0000000..c389011 --- /dev/null +++ b/test/utils/filematcher/textfile.t @@ -0,0 +1,4 @@ +Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do eiusmod tempor incididunt ut labore et dolore magna aliqua. +Ut enim ad minim veniam, quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo consequat. +Duis aute irure dolor in reprehenderit in voluptate velit esse cillum dolore eu fugiat nulla pariatur. +Excepteur sint occaecat cupidatat non proident, sunt in culpa qui officia deserunt mollit anim id est laborum. diff --git a/test/utils/test_loading.py b/test/utils/test_loading.py new file mode 100644 index 0000000..58ff166 --- /dev/null +++ b/test/utils/test_loading.py @@ -0,0 +1,48 @@ +""" + +Part of the bsie test suite. +A copy of the license is provided with the project. +Author: Matthias Baumgartner, 2022 +""" +# standard imports +import unittest + +# bsie imports +from bsie.utils import errors + +# objects to test +from bsie.utils.loading import safe_load, unpack_qualified_name + + +## code ## + +class TestUtils(unittest.TestCase): + def test_safe_load(self): + # invalid module + self.assertRaises(errors.LoaderError, safe_load, 'dBGHMSAYOoKeKMpywDoKZQycENFPvN', 'foobar') + self.assertRaises(errors.LoaderError, safe_load, 'dBGHMSAYOoKeKMpywDoKZQycENFPvN.bar', 'foobar') + # partially valid module + self.assertRaises(errors.LoaderError, safe_load, 'os.foo', 'foobar') + # invalid class + self.assertRaises(errors.LoaderError, safe_load, 'os.path', 'foo') + # valid module and class + cls = safe_load('collections.abc', 'Container') + import collections.abc + self.assertEqual(cls, collections.abc.Container) + + def test_unpack_qualified_name(self): + self.assertRaises(TypeError, unpack_qualified_name, 123) + self.assertRaises(TypeError, unpack_qualified_name, None) + self.assertRaises(ValueError, unpack_qualified_name, '') + self.assertRaises(ValueError, unpack_qualified_name, 'path') + self.assertRaises(ValueError, unpack_qualified_name, '.Path') + self.assertEqual(unpack_qualified_name('path.Path'), ('path', 'Path')) + self.assertEqual(unpack_qualified_name('path.foo.bar.Path'), ('path.foo.bar', 'Path')) + + +## main ## + +if __name__ == '__main__': + unittest.main() + +## EOF ## diff --git a/test/utils/test_node.py b/test/utils/test_node.py index c70f0b8..9feb051 100644 --- a/test/utils/test_node.py +++ b/test/utils/test_node.py @@ -4,7 +4,7 @@ Part of the bsie test suite. A copy of the license is provided with the project. Author: Matthias Baumgartner, 2022 """ -# imports +# standard imports import unittest # bsie imports |