aboutsummaryrefslogtreecommitdiffstats
path: root/bsie/utils
diff options
context:
space:
mode:
Diffstat (limited to 'bsie/utils')
-rw-r--r--bsie/utils/__init__.py7
-rw-r--r--bsie/utils/bsfs.py2
-rw-r--r--bsie/utils/errors.py48
-rw-r--r--bsie/utils/filematcher/__init__.py20
-rw-r--r--bsie/utils/filematcher/matcher.py179
-rw-r--r--bsie/utils/filematcher/parser.py146
-rw-r--r--bsie/utils/loading.py54
-rw-r--r--bsie/utils/namespaces.py3
-rw-r--r--bsie/utils/node.py2
9 files changed, 457 insertions, 4 deletions
diff --git a/bsie/utils/__init__.py b/bsie/utils/__init__.py
index bd22236..9cb60ed 100644
--- a/bsie/utils/__init__.py
+++ b/bsie/utils/__init__.py
@@ -4,19 +4,24 @@ Part of the bsie module.
A copy of the license is provided with the project.
Author: Matthias Baumgartner, 2022
"""
-# imports
+# standard imports
import typing
# inner-module imports
from . import bsfs
+from . import filematcher
from . import namespaces as ns
from . import node
+from .loading import safe_load, unpack_qualified_name
# exports
__all__: typing.Sequence[str] = (
'bsfs',
+ 'filematcher',
'node',
'ns',
+ 'safe_load',
+ 'unpack_qualified_name',
)
## EOF ##
diff --git a/bsie/utils/bsfs.py b/bsie/utils/bsfs.py
index 0b88479..ef5db31 100644
--- a/bsie/utils/bsfs.py
+++ b/bsie/utils/bsfs.py
@@ -4,7 +4,7 @@ Part of the bsie module.
A copy of the license is provided with the project.
Author: Matthias Baumgartner, 2022
"""
-# imports
+# standard imports
import typing
# bsfs imports
diff --git a/bsie/utils/errors.py b/bsie/utils/errors.py
new file mode 100644
index 0000000..8133cd4
--- /dev/null
+++ b/bsie/utils/errors.py
@@ -0,0 +1,48 @@
+"""Common BSIE exceptions.
+
+Part of the bsie module.
+A copy of the license is provided with the project.
+Author: Matthias Baumgartner, 2022
+"""
+# standard imports
+import typing
+
+# exports
+__all__: typing.Sequence[str] = (
+ 'BuilderError',
+ 'ExtractorError',
+ 'LoaderError',
+ 'ReaderError',
+ )
+
+
+## code ##
+
+class _BSIEError(Exception):
+ """Generic BSIE error."""
+
+class BuilderError(_BSIEError):
+ """The Builder failed to create an instance."""
+
+class LoaderError(BuilderError):
+ """Failed to load a module or class."""
+
+class ExtractorError(_BSIEError):
+ """The Extractor failed to process the given content."""
+
+class ReaderError(_BSIEError):
+ """The Reader failed to read the given file."""
+
+class ProgrammingError(_BSIEError):
+ """An assertion-like error that indicates a code-base issue."""
+
+class UnreachableError(ProgrammingError):
+ """Bravo, you've reached a point in code that should logically not be reachable."""
+
+class ParserError(_BSIEError):
+ """Failed to parse due to invalid syntax or structures."""
+
+class UnsupportedFileFormatError(ReaderError):
+ """Failed to read a file format."""
+
+## EOF ##
diff --git a/bsie/utils/filematcher/__init__.py b/bsie/utils/filematcher/__init__.py
new file mode 100644
index 0000000..1e23e4e
--- /dev/null
+++ b/bsie/utils/filematcher/__init__.py
@@ -0,0 +1,20 @@
+"""
+
+Part of the bsie module.
+A copy of the license is provided with the project.
+Author: Matthias Baumgartner, 2022
+"""
+# standard imports
+import typing
+
+# inner-module imports
+from .matcher import Matcher
+from .parser import parse
+
+# exports
+__all__: typing.Sequence[str] = (
+ 'Matcher',
+ 'parse',
+ )
+
+## EOF ##
diff --git a/bsie/utils/filematcher/matcher.py b/bsie/utils/filematcher/matcher.py
new file mode 100644
index 0000000..a279a4b
--- /dev/null
+++ b/bsie/utils/filematcher/matcher.py
@@ -0,0 +1,179 @@
+"""
+
+Part of the bsie module.
+A copy of the license is provided with the project.
+Author: Matthias Baumgartner, 2021
+"""
+# standard imports
+from collections.abc import Callable, Collection, Hashable
+import abc
+import os
+import typing
+
+# external imports
+import magic
+
+# exports
+__all__: typing.Sequence[str] = []
+
+
+## code ##
+
+# abstract nodes
+
+class Matcher(abc.ABC, Hashable, Callable, Collection): # type: ignore [misc] # Invalid base class Callable
+ """Matcher node base class."""
+
+ # child expressions or terminals
+ _childs: typing.Set[typing.Any]
+
+ def __init__(self, *childs: typing.Any):
+ if len(childs) == 1 and isinstance(childs[0], (list, tuple, set)):
+ self._childs = set(childs[0])
+ else:
+ self._childs = set(childs)
+
+ def __contains__(self, needle: typing.Any) -> bool:
+ return needle in self._childs
+
+ def __iter__(self) -> typing.Iterator[typing.Any]:
+ return iter(self._childs)
+
+ def __len__(self) -> int:
+ return len(self._childs)
+
+ def __repr__(self) -> str:
+ return f'{type(self).__name__}({self._childs})'
+
+ def __hash__(self) -> int:
+ return hash((type(self), tuple(set(self._childs))))
+
+ def __eq__(self, other: typing.Any) -> bool:
+ return isinstance(other, type(self)) \
+ and self._childs == other._childs
+
+ @abc.abstractmethod
+ def __call__(self, path: str) -> bool: # pylint: disable=arguments-differ
+ """Check if *path* satisfies the conditions set by the Matcher instance."""
+
+class NOT(Matcher):
+ """Invert a matcher result."""
+ def __init__(self, expr: Matcher):
+ super().__init__(expr)
+ def __call__(self, path: str) -> bool:
+ return not next(iter(self._childs))(path)
+
+# aggregate nodes
+
+class Aggregate(Matcher): # pylint: disable=too-few-public-methods # Yeah, it's an interface...
+ """Aggregation function base class (And, Or)."""
+
+class And(Aggregate):
+ """Accept only if all conditions are satisfied."""
+ def __call__(self, path: str) -> bool:
+ for itm in self:
+ if not itm(path):
+ return False
+ return True
+
+class Or(Aggregate):
+ """Accept only if at least one condition is satisfied."""
+ def __call__(self, path: str) -> bool:
+ for itm in self:
+ if itm(path):
+ return True
+ return False
+
+
+# criteria nodes
+
+class Criterion(Matcher):
+ """Criterion base class. Limits acceptance to certain values."""
+ def accepted(self) -> typing.Set[typing.Any]:
+ """Return a set of accepted values."""
+ return self._childs
+
+# criteria w/o value (valueless)
+
+class Any(Criterion):
+ """Accepts anything."""
+ def __call__(self, path: str) -> bool:
+ return True
+
+class Nothing(Criterion):
+ """Accepts nothing."""
+ def __call__(self, path: str) -> bool:
+ return False
+
+class Exists(Criterion):
+ """Filters by existence."""
+ def __call__(self, path: str) -> bool:
+ return os.path.exists(path)
+
+class IsFile(Criterion):
+ """Checks if the path is a regular file."""
+ def __call__(self, path: str) -> bool:
+ return os.path.isfile(path)
+
+class IsDir(Criterion):
+ """Checks if the path is a directory."""
+ def __call__(self, path: str) -> bool:
+ return os.path.isdir(path)
+
+class IsLink(Criterion):
+ """Checks if the path is a link."""
+ def __call__(self, path: str) -> bool:
+ return os.path.islink(path)
+
+class IsAbs(Criterion):
+ """Checks if the path is an absolute path."""
+ def __call__(self, path: str) -> bool:
+ return os.path.isabs(path)
+
+class IsRel(Criterion):
+ """Checks if the path is a relative path."""
+ def __call__(self, path: str) -> bool:
+ return not os.path.isabs(path)
+
+class IsMount(Criterion):
+ """Checks if the path is a mount point."""
+ def __call__(self, path: str) -> bool:
+ return os.path.ismount(path)
+
+class IsEmpty(Criterion):
+ """Checks if the path is an empty file."""
+ def __call__(self, path: str) -> bool:
+ return os.path.exists(path) and os.stat(path).st_size == 0
+
+class IsReadable(Criterion):
+ """Checks if the path is readable."""
+ def __call__(self, path: str) -> bool:
+ return os.path.exists(path) and os.access(path, os.R_OK)
+
+class IsWritable(Criterion):
+ """Checks if the path is writable."""
+ def __call__(self, path: str) -> bool:
+ return os.path.exists(path) and os.access(path, os.W_OK)
+
+class IsExecutable(Criterion):
+ """Checks if the path is executable."""
+ def __call__(self, path: str) -> bool:
+ return os.path.exists(path) and os.access(path, os.X_OK)
+
+# criteria w/ value
+
+class Extension(Criterion):
+ """Filters by file extension (without the dot)."""
+ def __call__(self, path: str) -> bool:
+ _, ext = os.path.splitext(path)
+ return ext[1:] in self.accepted()
+
+class Mime(Criterion):
+ """Filters by mime type."""
+ def __call__(self, path: str) -> bool:
+ try:
+ return magic.from_file(path, mime=True).lower() in self.accepted()
+ except FileNotFoundError:
+ return False
+
+## EOF ##
diff --git a/bsie/utils/filematcher/parser.py b/bsie/utils/filematcher/parser.py
new file mode 100644
index 0000000..2f82875
--- /dev/null
+++ b/bsie/utils/filematcher/parser.py
@@ -0,0 +1,146 @@
+"""
+
+Part of the bsie module.
+A copy of the license is provided with the project.
+Author: Matthias Baumgartner, 2021
+"""
+# standard imports
+import typing
+
+# external imports
+import pyparsing
+from pyparsing import printables, alphas8bit, punc8bit, QuotedString, Word, \
+ delimitedList, Or, CaselessKeyword, Group, oneOf, Optional
+
+# inner-module imports
+from . import matcher
+from .. import errors
+
+# exports
+__all__: typing.Sequence[str] = (
+ 'parse',
+ )
+
+
+## code ##
+
+class FileMatcherParser():
+ """
+ EXPR := RULES | RULES "|" RULES
+ RULESET := RULE | RULE, RULE
+ RULE := CRITERION OP VALUE | CRITERION OP {VALUES} | VALUELESS
+ OP := != | =
+ VALUES := VALUE | VALUE, VALUE
+ VALUE := [word]
+ CRITERION := mime | extension | ...
+ """
+
+ # criteria matcher nodes w/ arguments
+ _CRITERIA: typing.Dict[str, typing.Type[matcher.Matcher]] = {
+ 'extension': matcher.Extension,
+ 'mime': matcher.Mime,
+ }
+
+ # criteria matcher nodes w/o arguments
+ _VALUELESS: typing.Dict[str, typing.Type[matcher.Matcher]] = {
+ 'any': matcher.Any,
+ 'nothing': matcher.Nothing,
+ 'exists': matcher.Exists,
+ 'isfile': matcher.IsFile,
+ 'isdir': matcher.IsDir,
+ 'islink': matcher.IsLink,
+ 'isabs': matcher.IsAbs,
+ 'isrel': matcher.IsRel,
+ 'ismount': matcher.IsMount,
+ 'emtpy': matcher.IsEmpty,
+ 'readable': matcher.IsReadable,
+ 'writable': matcher.IsWritable,
+ 'executable': matcher.IsExecutable,
+ }
+
+ # pyparsing parser instance.
+ _parser: pyparsing.ParseExpression
+
+ def __init__(self):
+ # build the parser
+ # VALUE := [word]
+ alphabet = (printables + alphas8bit + punc8bit).translate(str.maketrans('', '', ',{}|='))
+ value = QuotedString(quoteChar='"', escChar='\\') ^ Word(alphabet)
+ # CRITERION := mime | extension | ...
+ criterion = Or([CaselessKeyword(p) for p in self._CRITERIA]).setResultsName('criterion')
+ valueless = Or([CaselessKeyword(p) for p in self._VALUELESS]).setResultsName('criterion')
+ # VALUES := VALUE | VALUE, VALUE
+ values = delimitedList(value, delim=',').setResultsName('value')
+ # OP := '=' | '!='
+ eqop = oneOf('= !=').setResultsName('op')
+ # RULE := CRITERION OP VALUE | CRITERION OP {VALUES} | VALUELESS
+ rule_none = Group(Optional('!').setResultsName('op') + valueless).setResultsName('rule_none')
+ rule_one = Group(criterion + eqop + value.setResultsName('value')).setResultsName('rule_one')
+ rule_few = Group(criterion + eqop + '{' + values + '}').setResultsName('rule_few')
+ # RULESET := RULE | RULE, RULE
+ ruleset = Group(delimitedList(rule_none ^ rule_one ^ rule_few, delim=','))
+ # EXPR := RULESET | RULESET \| RULESET
+ self._parser = delimitedList(ruleset, delim='|')
+
+ def parse(self, query: str) -> matcher.Matcher: # pylint: disable=too-many-branches
+ """Build a file matcher from a rule definition."""
+ # preprocess the query
+ query = query.strip()
+
+ # empty query
+ if len(query) == 0:
+ return matcher.Any()
+
+ try:
+ parsed = self._parser.parseString(query, parseAll=True)
+ except pyparsing.ParseException as err:
+ raise errors.ParserError(f'Cannot parse query {err}')
+
+ # convert to Matcher
+ rules = []
+ for exp in parsed:
+ tokens = []
+ for rule in exp:
+ # fetch accepted values
+ if rule.getName() == 'rule_none':
+ accepted = []
+ elif rule.getName() == 'rule_one':
+ accepted = [rule.value]
+ elif rule.getName() == 'rule_few':
+ accepted = list(rule.value)
+ else: # prevented by grammar
+ raise errors.UnreachableError('Invalid rule definition')
+
+ # build criterion
+ if rule.criterion in self._VALUELESS:
+ cls = self._VALUELESS[rule.criterion]
+ if rule.op == '!':
+ tokens.append(matcher.NOT(cls()))
+ else:
+ tokens.append(cls())
+ elif rule.criterion in self._CRITERIA:
+ cls = self._CRITERIA[rule.criterion]
+ if rule.op == '!=':
+ tokens.append(matcher.NOT(cls(accepted)))
+ else:
+ tokens.append(cls(accepted))
+ else: # prevented by grammar
+ raise errors.UnreachableError(f'Invalid condition "{rule.criterion}"')
+
+ # And-aggregate rules in one ruleset (if needed)
+ tokens = matcher.And(tokens) if len(tokens) > 1 else tokens[0]
+ rules.append(tokens)
+
+ # Or-aggregate rulesets
+ expr = matcher.Or(rules) if len(rules) > 1 else rules[0]
+
+ return expr
+
+# build default instance
+file_match_parser = FileMatcherParser()
+
+def parse(query: str) -> matcher.Matcher:
+ """Shortcut for FileMatcherParser()(query)."""
+ return file_match_parser.parse(query)
+
+## EOF ##
diff --git a/bsie/utils/loading.py b/bsie/utils/loading.py
new file mode 100644
index 0000000..eb05c35
--- /dev/null
+++ b/bsie/utils/loading.py
@@ -0,0 +1,54 @@
+"""
+
+Part of the bsie module.
+A copy of the license is provided with the project.
+Author: Matthias Baumgartner, 2022
+"""
+# standard imports
+import importlib
+import typing
+
+# inner-module imports
+from . import errors
+
+# exports
+__all__: typing.Sequence[str] = (
+ 'safe_load',
+ 'unpack_qualified_name',
+ )
+
+
+## code ##
+
+def safe_load(module_name: str, class_name: str):
+ """Get a class from a module. Raise BuilderError if anything goes wrong."""
+ try:
+ # load the module
+ module = importlib.import_module(module_name)
+ except Exception as err:
+ # cannot import module
+ raise errors.LoaderError(f'cannot load module {module_name}') from err
+
+ try:
+ # get the class from the module
+ cls = getattr(module, class_name)
+ except Exception as err:
+ # cannot find the class
+ raise errors.LoaderError(f'cannot load class {class_name} from module {module_name}') from err
+
+ return cls
+
+
+def unpack_qualified_name(name):
+ """Split a name into its module and class component (dot-separated)."""
+ if not isinstance(name, str):
+ raise TypeError(name)
+ if '.' not in name:
+ raise ValueError('name must be a qualified class name.')
+ module_name, class_name = name[:name.rfind('.')], name[name.rfind('.')+1:]
+ if module_name == '':
+ raise ValueError('name must be a qualified class name.')
+ return module_name, class_name
+
+
+## EOF ##
diff --git a/bsie/utils/namespaces.py b/bsie/utils/namespaces.py
index a29fc1b..393b436 100644
--- a/bsie/utils/namespaces.py
+++ b/bsie/utils/namespaces.py
@@ -4,7 +4,7 @@ Part of the bsie module.
A copy of the license is provided with the project.
Author: Matthias Baumgartner, 2022
"""
-# imports
+# standard imports
import typing
# inner-module imports
@@ -15,6 +15,7 @@ bse = _bsfs.Namespace('http://bsfs.ai/schema/Entity')
bsfs = _bsfs.Namespace('http://bsfs.ai/schema', fsep='/')
bsm = _bsfs.Namespace('http://bsfs.ai/schema/Meta')
xsd = _bsfs.Namespace('http://www.w3.org/2001/XMLSchema')
+bsf = _bsfs.Namespace('http://ie.bsfs.ai/schema/Feature')
# export
__all__: typing.Sequence[str] = (
diff --git a/bsie/utils/node.py b/bsie/utils/node.py
index ecf39cd..91e4f37 100644
--- a/bsie/utils/node.py
+++ b/bsie/utils/node.py
@@ -4,7 +4,7 @@ Part of the bsie module.
A copy of the license is provided with the project.
Author: Matthias Baumgartner, 2022
"""
-# imports
+# standard imports
import typing
# bsie imports