diff options
author | Matthias Baumgartner <dev@igsor.net> | 2022-12-23 16:25:51 +0100 |
---|---|---|
committer | Matthias Baumgartner <dev@igsor.net> | 2022-12-23 16:25:51 +0100 |
commit | ed2074ae88f2db6cb6b38716b43b35e29eb2e16c (patch) | |
tree | f84a28414c4e22e3f3c25ca430e19ff42a1ec2d9 /bsie/utils | |
parent | 057e09d6537bf5c39815661a75819081e3e5fda7 (diff) | |
download | bsie-ed2074ae88f2db6cb6b38716b43b35e29eb2e16c.tar.gz bsie-ed2074ae88f2db6cb6b38716b43b35e29eb2e16c.tar.bz2 bsie-ed2074ae88f2db6cb6b38716b43b35e29eb2e16c.zip |
filematcher: check file properties, formulate them as a string
Diffstat (limited to 'bsie/utils')
-rw-r--r-- | bsie/utils/__init__.py | 2 | ||||
-rw-r--r-- | bsie/utils/filematcher/__init__.py | 20 | ||||
-rw-r--r-- | bsie/utils/filematcher/matcher.py | 177 | ||||
-rw-r--r-- | bsie/utils/filematcher/parser.py | 148 |
4 files changed, 347 insertions, 0 deletions
diff --git a/bsie/utils/__init__.py b/bsie/utils/__init__.py index bd22236..3981dc7 100644 --- a/bsie/utils/__init__.py +++ b/bsie/utils/__init__.py @@ -11,9 +11,11 @@ import typing from . import bsfs from . import namespaces as ns from . import node +from . import filematcher # exports __all__: typing.Sequence[str] = ( + 'filematcher', 'bsfs', 'node', 'ns', diff --git a/bsie/utils/filematcher/__init__.py b/bsie/utils/filematcher/__init__.py new file mode 100644 index 0000000..b1c1b45 --- /dev/null +++ b/bsie/utils/filematcher/__init__.py @@ -0,0 +1,20 @@ +""" + +Part of the bsie module. +A copy of the license is provided with the project. +Author: Matthias Baumgartner, 2022 +""" +# imports +import typing + +# inner-module imports +from .matcher import Matcher +from .parser import parse + +# exports +__all__: typing.Sequence[str] = ( + 'Matcher', + 'parse', + ) + +## EOF ## diff --git a/bsie/utils/filematcher/matcher.py b/bsie/utils/filematcher/matcher.py new file mode 100644 index 0000000..164beeb --- /dev/null +++ b/bsie/utils/filematcher/matcher.py @@ -0,0 +1,177 @@ +""" + +Part of the bsie module. +A copy of the license is provided with the project. +Author: Matthias Baumgartner, 2021 +""" +# imports +from collections.abc import Callable, Collection, Hashable +import abc +import os +import typing +import magic + +# exports +__all__: typing.Sequence[str] = [] + + +## code ## + +# abstract nodes + +class Matcher(abc.ABC, Hashable, Callable, Collection): # type: ignore [misc] # Invalid base class Callable + """Matcher node base class.""" + + # child expressions or terminals + _childs: typing.Set[typing.Any] + + def __init__(self, *childs: typing.Any): + if len(childs) == 1 and isinstance(childs[0], (list, tuple, set)): + self._childs = set(childs[0]) + else: + self._childs = set(childs) + + def __contains__(self, needle: typing.Any) -> bool: + return needle in self._childs + + def __iter__(self) -> typing.Iterator[typing.Any]: + return iter(self._childs) + + def __len__(self) -> int: + return len(self._childs) + + def __repr__(self) -> str: + return f'{type(self).__name__}({self._childs})' + + def __hash__(self) -> int: + return hash((type(self), tuple(set(self._childs)))) + + def __eq__(self, other: typing.Any) -> bool: + return isinstance(other, type(self)) \ + and self._childs == other._childs + + @abc.abstractmethod + def __call__(self, path: str) -> bool: # pylint: disable=arguments-differ + """Check if *path* satisfies the conditions set by the Matcher instance.""" + +class NOT(Matcher): + """Invert a matcher result.""" + def __init__(self, expr: Matcher): + super().__init__(expr) + def __call__(self, path: str) -> bool: + return not next(iter(self._childs))(path) + +# aggregate nodes + +class Aggregate(Matcher): # pylint: disable=too-few-public-methods # Yeah, it's an interface... + """Aggregation function base class (And, Or).""" + +class And(Aggregate): + """Accept only if all conditions are satisfied.""" + def __call__(self, path: str) -> bool: + for itm in self: + if not itm(path): + return False + return True + +class Or(Aggregate): + """Accept only if at least one condition is satisfied.""" + def __call__(self, path: str) -> bool: + for itm in self: + if itm(path): + return True + return False + + +# criteria nodes + +class Criterion(Matcher): + """Criterion base class. Limits acceptance to certain values.""" + def accepted(self) -> typing.Set[typing.Any]: + """Return a set of accepted values.""" + return self._childs + +# criteria w/o value (valueless) + +class Any(Criterion): + """Accepts anything.""" + def __call__(self, path: str) -> bool: + return True + +class Nothing(Criterion): + """Accepts nothing.""" + def __call__(self, path: str) -> bool: + return False + +class Exists(Criterion): + """Filters by existence.""" + def __call__(self, path: str) -> bool: + return os.path.exists(path) + +class IsFile(Criterion): + """Checks if the path is a regular file.""" + def __call__(self, path: str) -> bool: + return os.path.isfile(path) + +class IsDir(Criterion): + """Checks if the path is a directory.""" + def __call__(self, path: str) -> bool: + return os.path.isdir(path) + +class IsLink(Criterion): + """Checks if the path is a link.""" + def __call__(self, path: str) -> bool: + return os.path.islink(path) + +class IsAbs(Criterion): + """Checks if the path is an absolute path.""" + def __call__(self, path: str) -> bool: + return os.path.isabs(path) + +class IsRel(Criterion): + """Checks if the path is a relative path.""" + def __call__(self, path: str) -> bool: + return not os.path.isabs(path) + +class IsMount(Criterion): + """Checks if the path is a mount point.""" + def __call__(self, path: str) -> bool: + return os.path.ismount(path) + +class IsEmpty(Criterion): + """Checks if the path is an empty file.""" + def __call__(self, path: str) -> bool: + return os.path.exists(path) and os.stat(path).st_size == 0 + +class IsReadable(Criterion): + """Checks if the path is readable.""" + def __call__(self, path: str) -> bool: + return os.path.exists(path) and os.access(path, os.R_OK) + +class IsWritable(Criterion): + """Checks if the path is writable.""" + def __call__(self, path: str) -> bool: + return os.path.exists(path) and os.access(path, os.W_OK) + +class IsExecutable(Criterion): + """Checks if the path is executable.""" + def __call__(self, path: str) -> bool: + return os.path.exists(path) and os.access(path, os.X_OK) + +# criteria w/ value + +class Extension(Criterion): + """Filters by file extension (without the dot).""" + def __call__(self, path: str) -> bool: + _, ext = os.path.splitext(path) + return ext[1:] in self.accepted() + +class Mime(Criterion): + """Filters by mime type.""" + def __call__(self, path: str) -> bool: + try: + return magic.from_file(path, mime=True).lower() in self.accepted() + except FileNotFoundError: + return False + +## EOF ## diff --git a/bsie/utils/filematcher/parser.py b/bsie/utils/filematcher/parser.py new file mode 100644 index 0000000..0654742 --- /dev/null +++ b/bsie/utils/filematcher/parser.py @@ -0,0 +1,148 @@ +""" + +Part of the bsie module. +A copy of the license is provided with the project. +Author: Matthias Baumgartner, 2021 +""" +# standard imports +import typing + +# non-standard imports +import pyparsing +from pyparsing import printables, alphas8bit, punc8bit, QuotedString, Word, \ + delimitedList, Or, CaselessKeyword, Group, oneOf, Optional + +# bsie imports +from bsie.base import errors + +# inner-module imports +from . import matcher + +# exports +__all__: typing.Sequence[str] = ( + 'parse', + ) + + +## code ## + +class FileMatcherParser(): + """ + EXPR := RULES | RULES "|" RULES + RULESET := RULE | RULE, RULE + RULE := CRITERION OP VALUE | CRITERION OP {VALUES} | VALUELESS + OP := != | = + VALUES := VALUE | VALUE, VALUE + VALUE := [word] + CRITERION := mime | extension | ... + """ + + # criteria matcher nodes w/ arguments + _CRITERIA: typing.Dict[str, typing.Type[matcher.Matcher]] = { + 'extension': matcher.Extension, + 'mime': matcher.Mime, + } + + # criteria matcher nodes w/o arguments + _VALUELESS: typing.Dict[str, typing.Type[matcher.Matcher]] = { + 'any': matcher.Any, + 'nothing': matcher.Nothing, + 'exists': matcher.Exists, + 'isfile': matcher.IsFile, + 'isdir': matcher.IsDir, + 'islink': matcher.IsLink, + 'isabs': matcher.IsAbs, + 'isrel': matcher.IsRel, + 'ismount': matcher.IsMount, + 'emtpy': matcher.IsEmpty, + 'readable': matcher.IsReadable, + 'writable': matcher.IsWritable, + 'executable': matcher.IsExecutable, + } + + # pyparsing parser instance. + _parser: pyparsing.ParseExpression + + def __init__(self): + # build the parser + # VALUE := [word] + alphabet = (printables + alphas8bit + punc8bit).translate(str.maketrans('', '', ',{}|=')) + value = QuotedString(quoteChar='"', escChar='\\') ^ Word(alphabet) + # CRITERION := mime | extension | ... + criterion = Or([CaselessKeyword(p) for p in self._CRITERIA]).setResultsName('criterion') + valueless = Or([CaselessKeyword(p) for p in self._VALUELESS]).setResultsName('criterion') + # VALUES := VALUE | VALUE, VALUE + values = delimitedList(value, delim=',').setResultsName('value') + # OP := '=' | '!=' + eqop = oneOf('= !=').setResultsName('op') + # RULE := CRITERION OP VALUE | CRITERION OP {VALUES} | VALUELESS + rule_none = Group(Optional('!').setResultsName('op') + valueless).setResultsName('rule_none') + rule_one = Group(criterion + eqop + value.setResultsName('value')).setResultsName('rule_one') + rule_few = Group(criterion + eqop + '{' + values + '}').setResultsName('rule_few') + # RULESET := RULE | RULE, RULE + ruleset = Group(delimitedList(rule_none ^ rule_one ^ rule_few, delim=',')) + # EXPR := RULESET | RULESET \| RULESET + self._parser = delimitedList(ruleset, delim='|') + + def parse(self, query: str) -> matcher.Matcher: # pylint: disable=too-many-branches + """Build a file matcher from a rule definition.""" + # preprocess the query + query = query.strip() + + # empty query + if len(query) == 0: + return matcher.Any() + + try: + parsed = self._parser.parseString(query, parseAll=True) + except pyparsing.ParseException as err: + raise errors.ParserError(f'Cannot parse query {err}') + + # convert to Matcher + rules = [] + for exp in parsed: + tokens = [] + for rule in exp: + # fetch accepted values + if rule.getName() == 'rule_none': + accepted = [] + elif rule.getName() == 'rule_one': + accepted = [rule.value] + elif rule.getName() == 'rule_few': + accepted = list(rule.value) + else: # prevented by grammar + raise errors.UnreachableError('Invalid rule definition') + + # build criterion + if rule.criterion in self._VALUELESS: + cls = self._VALUELESS[rule.criterion] + if rule.op == '!': + tokens.append(matcher.NOT(cls())) + else: + tokens.append(cls()) + elif rule.criterion in self._CRITERIA: + cls = self._CRITERIA[rule.criterion] + if rule.op == '!=': + tokens.append(matcher.NOT(cls(accepted))) + else: + tokens.append(cls(accepted)) + else: # prevented by grammar + raise errors.UnreachableError(f'Invalid condition "{rule.criterion}"') + + # And-aggregate rules in one ruleset (if needed) + tokens = matcher.And(tokens) if len(tokens) > 1 else tokens[0] + rules.append(tokens) + + # Or-aggregate rulesets + expr = matcher.Or(rules) if len(rules) > 1 else rules[0] + + return expr + +# build default instance +file_match_parser = FileMatcherParser() + +def parse(query: str) -> matcher.Matcher: + """Shortcut for FileMatcherParser()(query).""" + return file_match_parser.parse(query) + +## EOF ## |