aboutsummaryrefslogtreecommitdiffstats
path: root/bsie/utils
diff options
context:
space:
mode:
Diffstat (limited to 'bsie/utils')
-rw-r--r--bsie/utils/__init__.py11
-rw-r--r--bsie/utils/bsfs.py6
-rw-r--r--bsie/utils/errors.py44
-rw-r--r--bsie/utils/filematcher/__init__.py15
-rw-r--r--bsie/utils/filematcher/matcher.py174
-rw-r--r--bsie/utils/filematcher/parser.py141
-rw-r--r--bsie/utils/loading.py49
-rw-r--r--bsie/utils/namespaces.py33
-rw-r--r--bsie/utils/node.py35
9 files changed, 476 insertions, 32 deletions
diff --git a/bsie/utils/__init__.py b/bsie/utils/__init__.py
index bd22236..18c8db7 100644
--- a/bsie/utils/__init__.py
+++ b/bsie/utils/__init__.py
@@ -1,22 +1,23 @@
"""Common tools and definitions.
-
-Part of the bsie module.
-A copy of the license is provided with the project.
-Author: Matthias Baumgartner, 2022
"""
-# imports
+# standard imports
import typing
# inner-module imports
from . import bsfs
+from . import filematcher
from . import namespaces as ns
from . import node
+from .loading import safe_load, unpack_qualified_name
# exports
__all__: typing.Sequence[str] = (
'bsfs',
+ 'filematcher',
'node',
'ns',
+ 'safe_load',
+ 'unpack_qualified_name',
)
## EOF ##
diff --git a/bsie/utils/bsfs.py b/bsie/utils/bsfs.py
index 0b88479..fc045cc 100644
--- a/bsie/utils/bsfs.py
+++ b/bsie/utils/bsfs.py
@@ -1,10 +1,6 @@
"""BSFS bridge, provides BSFS bindings for BSIE.
-
-Part of the bsie module.
-A copy of the license is provided with the project.
-Author: Matthias Baumgartner, 2022
"""
-# imports
+# standard imports
import typing
# bsfs imports
diff --git a/bsie/utils/errors.py b/bsie/utils/errors.py
new file mode 100644
index 0000000..7c7e6ed
--- /dev/null
+++ b/bsie/utils/errors.py
@@ -0,0 +1,44 @@
+"""Common BSIE exceptions.
+"""
+# standard imports
+import typing
+
+# exports
+__all__: typing.Sequence[str] = (
+ 'BuilderError',
+ 'ExtractorError',
+ 'LoaderError',
+ 'ReaderError',
+ )
+
+
+## code ##
+
+class _BSIEError(Exception):
+ """Generic BSIE error."""
+
+class BuilderError(_BSIEError):
+ """The Builder failed to create an instance."""
+
+class LoaderError(BuilderError):
+ """Failed to load a module or class."""
+
+class ExtractorError(_BSIEError):
+ """The Extractor failed to process the given content."""
+
+class ReaderError(_BSIEError):
+ """The Reader failed to read the given file."""
+
+class ProgrammingError(_BSIEError):
+ """An assertion-like error that indicates a code-base issue."""
+
+class UnreachableError(ProgrammingError):
+ """Bravo, you've reached a point in code that should logically not be reachable."""
+
+class ParserError(_BSIEError):
+ """Failed to parse due to invalid syntax or structures."""
+
+class UnsupportedFileFormatError(_BSIEError):
+ """Failed to read a file format."""
+
+## EOF ##
diff --git a/bsie/utils/filematcher/__init__.py b/bsie/utils/filematcher/__init__.py
new file mode 100644
index 0000000..908de78
--- /dev/null
+++ b/bsie/utils/filematcher/__init__.py
@@ -0,0 +1,15 @@
+
+# standard imports
+import typing
+
+# inner-module imports
+from .matcher import Matcher
+from .parser import parse
+
+# exports
+__all__: typing.Sequence[str] = (
+ 'Matcher',
+ 'parse',
+ )
+
+## EOF ##
diff --git a/bsie/utils/filematcher/matcher.py b/bsie/utils/filematcher/matcher.py
new file mode 100644
index 0000000..1fa308e
--- /dev/null
+++ b/bsie/utils/filematcher/matcher.py
@@ -0,0 +1,174 @@
+
+# standard imports
+from collections.abc import Callable, Collection, Hashable
+import abc
+import os
+import typing
+
+# external imports
+import magic
+
+# exports
+__all__: typing.Sequence[str] = []
+
+
+## code ##
+
+# abstract nodes
+
+class Matcher(abc.ABC, Hashable, Callable, Collection): # type: ignore [misc] # Invalid base class Callable
+ """Matcher node base class."""
+
+ # child expressions or terminals
+ _childs: typing.Set[typing.Any]
+
+ def __init__(self, *childs: typing.Any):
+ if len(childs) == 1 and isinstance(childs[0], (list, tuple, set)):
+ self._childs = set(childs[0])
+ else:
+ self._childs = set(childs)
+
+ def __contains__(self, needle: typing.Any) -> bool:
+ return needle in self._childs
+
+ def __iter__(self) -> typing.Iterator[typing.Any]:
+ return iter(self._childs)
+
+ def __len__(self) -> int:
+ return len(self._childs)
+
+ def __repr__(self) -> str:
+ return f'{type(self).__name__}({self._childs})'
+
+ def __hash__(self) -> int:
+ return hash((type(self), tuple(set(self._childs))))
+
+ def __eq__(self, other: typing.Any) -> bool:
+ return isinstance(other, type(self)) \
+ and self._childs == other._childs
+
+ @abc.abstractmethod
+ def __call__(self, path: str) -> bool: # pylint: disable=arguments-differ
+ """Check if *path* satisfies the conditions set by the Matcher instance."""
+
+class NOT(Matcher):
+ """Invert a matcher result."""
+ def __init__(self, expr: Matcher):
+ super().__init__(expr)
+ def __call__(self, path: str) -> bool:
+ return not next(iter(self._childs))(path)
+
+# aggregate nodes
+
+class Aggregate(Matcher): # pylint: disable=too-few-public-methods # Yeah, it's an interface...
+ """Aggregation function base class (And, Or)."""
+
+class And(Aggregate):
+ """Accept only if all conditions are satisfied."""
+ def __call__(self, path: str) -> bool:
+ for itm in self:
+ if not itm(path):
+ return False
+ return True
+
+class Or(Aggregate):
+ """Accept only if at least one condition is satisfied."""
+ def __call__(self, path: str) -> bool:
+ for itm in self:
+ if itm(path):
+ return True
+ return False
+
+
+# criteria nodes
+
+class Criterion(Matcher):
+ """Criterion base class. Limits acceptance to certain values."""
+ def accepted(self) -> typing.Set[typing.Any]:
+ """Return a set of accepted values."""
+ return self._childs
+
+# criteria w/o value (valueless)
+
+class Any(Criterion):
+ """Accepts anything."""
+ def __call__(self, path: str) -> bool:
+ return True
+
+class Nothing(Criterion):
+ """Accepts nothing."""
+ def __call__(self, path: str) -> bool:
+ return False
+
+class Exists(Criterion):
+ """Filters by existence."""
+ def __call__(self, path: str) -> bool:
+ return os.path.exists(path)
+
+class IsFile(Criterion):
+ """Checks if the path is a regular file."""
+ def __call__(self, path: str) -> bool:
+ return os.path.isfile(path)
+
+class IsDir(Criterion):
+ """Checks if the path is a directory."""
+ def __call__(self, path: str) -> bool:
+ return os.path.isdir(path)
+
+class IsLink(Criterion):
+ """Checks if the path is a link."""
+ def __call__(self, path: str) -> bool:
+ return os.path.islink(path)
+
+class IsAbs(Criterion):
+ """Checks if the path is an absolute path."""
+ def __call__(self, path: str) -> bool:
+ return os.path.isabs(path)
+
+class IsRel(Criterion):
+ """Checks if the path is a relative path."""
+ def __call__(self, path: str) -> bool:
+ return not os.path.isabs(path)
+
+class IsMount(Criterion):
+ """Checks if the path is a mount point."""
+ def __call__(self, path: str) -> bool:
+ return os.path.ismount(path)
+
+class IsEmpty(Criterion):
+ """Checks if the path is an empty file."""
+ def __call__(self, path: str) -> bool:
+ return os.path.exists(path) and os.stat(path).st_size == 0
+
+class IsReadable(Criterion):
+ """Checks if the path is readable."""
+ def __call__(self, path: str) -> bool:
+ return os.path.exists(path) and os.access(path, os.R_OK)
+
+class IsWritable(Criterion):
+ """Checks if the path is writable."""
+ def __call__(self, path: str) -> bool:
+ return os.path.exists(path) and os.access(path, os.W_OK)
+
+class IsExecutable(Criterion):
+ """Checks if the path is executable."""
+ def __call__(self, path: str) -> bool:
+ return os.path.exists(path) and os.access(path, os.X_OK)
+
+# criteria w/ value
+
+class Extension(Criterion):
+ """Filters by file extension (without the dot)."""
+ def __call__(self, path: str) -> bool:
+ _, ext = os.path.splitext(path)
+ return ext[1:] in self.accepted()
+
+class Mime(Criterion):
+ """Filters by mime type."""
+ def __call__(self, path: str) -> bool:
+ try:
+ return magic.from_file(path, mime=True).lower() in self.accepted()
+ except FileNotFoundError:
+ return False
+
+## EOF ##
diff --git a/bsie/utils/filematcher/parser.py b/bsie/utils/filematcher/parser.py
new file mode 100644
index 0000000..dc28a0d
--- /dev/null
+++ b/bsie/utils/filematcher/parser.py
@@ -0,0 +1,141 @@
+
+# standard imports
+import typing
+
+# external imports
+import pyparsing
+from pyparsing import printables, alphas8bit, punc8bit, QuotedString, Word, \
+ delimitedList, Or, CaselessKeyword, Group, oneOf, Optional
+
+# inner-module imports
+from . import matcher
+from .. import errors
+
+# exports
+__all__: typing.Sequence[str] = (
+ 'parse',
+ )
+
+
+## code ##
+
+class FileMatcherParser():
+ """
+ EXPR := RULES | RULES "|" RULES
+ RULESET := RULE | RULE, RULE
+ RULE := CRITERION OP VALUE | CRITERION OP {VALUES} | VALUELESS
+ OP := != | =
+ VALUES := VALUE | VALUE, VALUE
+ VALUE := [word]
+ CRITERION := mime | extension | ...
+ """
+
+ # criteria matcher nodes w/ arguments
+ _CRITERIA: typing.Dict[str, typing.Type[matcher.Matcher]] = {
+ 'extension': matcher.Extension,
+ 'mime': matcher.Mime,
+ }
+
+ # criteria matcher nodes w/o arguments
+ _VALUELESS: typing.Dict[str, typing.Type[matcher.Matcher]] = {
+ 'any': matcher.Any,
+ 'nothing': matcher.Nothing,
+ 'exists': matcher.Exists,
+ 'isfile': matcher.IsFile,
+ 'isdir': matcher.IsDir,
+ 'islink': matcher.IsLink,
+ 'isabs': matcher.IsAbs,
+ 'isrel': matcher.IsRel,
+ 'ismount': matcher.IsMount,
+ 'emtpy': matcher.IsEmpty,
+ 'readable': matcher.IsReadable,
+ 'writable': matcher.IsWritable,
+ 'executable': matcher.IsExecutable,
+ }
+
+ # pyparsing parser instance.
+ _parser: pyparsing.ParseExpression
+
+ def __init__(self):
+ # build the parser
+ # VALUE := [word]
+ alphabet = (printables + alphas8bit + punc8bit).translate(str.maketrans('', '', ',{}|='))
+ value = QuotedString(quoteChar='"', escChar='\\') ^ Word(alphabet)
+ # CRITERION := mime | extension | ...
+ criterion = Or([CaselessKeyword(p) for p in self._CRITERIA]).setResultsName('criterion')
+ valueless = Or([CaselessKeyword(p) for p in self._VALUELESS]).setResultsName('criterion')
+ # VALUES := VALUE | VALUE, VALUE
+ values = delimitedList(value, delim=',').setResultsName('value')
+ # OP := '=' | '!='
+ eqop = oneOf('= !=').setResultsName('op')
+ # RULE := CRITERION OP VALUE | CRITERION OP {VALUES} | VALUELESS
+ rule_none = Group(Optional('!').setResultsName('op') + valueless).setResultsName('rule_none')
+ rule_one = Group(criterion + eqop + value.setResultsName('value')).setResultsName('rule_one')
+ rule_few = Group(criterion + eqop + '{' + values + '}').setResultsName('rule_few')
+ # RULESET := RULE | RULE, RULE
+ ruleset = Group(delimitedList(rule_none ^ rule_one ^ rule_few, delim=','))
+ # EXPR := RULESET | RULESET \| RULESET
+ self._parser = delimitedList(ruleset, delim='|')
+
+ def parse(self, query: str) -> matcher.Matcher: # pylint: disable=too-many-branches
+ """Build a file matcher from a rule definition."""
+ # preprocess the query
+ query = query.strip()
+
+ # empty query
+ if len(query) == 0:
+ return matcher.Any()
+
+ try:
+ parsed = self._parser.parseString(query, parseAll=True)
+ except pyparsing.ParseException as err:
+ raise errors.ParserError(f'Cannot parse query {err}')
+
+ # convert to Matcher
+ rules = []
+ for exp in parsed:
+ tokens = []
+ for rule in exp:
+ # fetch accepted values
+ if rule.getName() == 'rule_none':
+ accepted = []
+ elif rule.getName() == 'rule_one':
+ accepted = [rule.value]
+ elif rule.getName() == 'rule_few':
+ accepted = list(rule.value)
+ else: # prevented by grammar
+ raise errors.UnreachableError('Invalid rule definition')
+
+ # build criterion
+ if rule.criterion in self._VALUELESS:
+ cls = self._VALUELESS[rule.criterion]
+ if rule.op == '!':
+ tokens.append(matcher.NOT(cls()))
+ else:
+ tokens.append(cls())
+ elif rule.criterion in self._CRITERIA:
+ cls = self._CRITERIA[rule.criterion]
+ if rule.op == '!=':
+ tokens.append(matcher.NOT(cls(accepted)))
+ else:
+ tokens.append(cls(accepted))
+ else: # prevented by grammar
+ raise errors.UnreachableError(f'Invalid condition "{rule.criterion}"')
+
+ # And-aggregate rules in one ruleset (if needed)
+ tokens = matcher.And(tokens) if len(tokens) > 1 else tokens[0]
+ rules.append(tokens)
+
+ # Or-aggregate rulesets
+ expr = matcher.Or(rules) if len(rules) > 1 else rules[0]
+
+ return expr
+
+# build default instance
+file_match_parser = FileMatcherParser()
+
+def parse(query: str) -> matcher.Matcher:
+ """Shortcut for FileMatcherParser()(query)."""
+ return file_match_parser.parse(query)
+
+## EOF ##
diff --git a/bsie/utils/loading.py b/bsie/utils/loading.py
new file mode 100644
index 0000000..58202d1
--- /dev/null
+++ b/bsie/utils/loading.py
@@ -0,0 +1,49 @@
+
+# standard imports
+import importlib
+import typing
+
+# inner-module imports
+from . import errors
+
+# exports
+__all__: typing.Sequence[str] = (
+ 'safe_load',
+ 'unpack_qualified_name',
+ )
+
+
+## code ##
+
+def safe_load(module_name: str, class_name: str):
+ """Get a class from a module. Raise BuilderError if anything goes wrong."""
+ try:
+ # load the module
+ module = importlib.import_module(module_name)
+ except Exception as err:
+ # cannot import module
+ raise errors.LoaderError(f'cannot load module {module_name} ({err})') from err
+
+ try:
+ # get the class from the module
+ cls = getattr(module, class_name)
+ except Exception as err:
+ # cannot find the class
+ raise errors.LoaderError(f'cannot load class {class_name} from module {module_name} ({err})') from err
+
+ return cls
+
+
+def unpack_qualified_name(name):
+ """Split a name into its module and class component (dot-separated)."""
+ if not isinstance(name, str):
+ raise TypeError(name)
+ if '.' not in name:
+ raise ValueError('name must be a qualified class name.')
+ module_name, class_name = name[:name.rfind('.')], name[name.rfind('.')+1:]
+ if module_name == '':
+ raise ValueError('name must be a qualified class name.')
+ return module_name, class_name
+
+
+## EOF ##
diff --git a/bsie/utils/namespaces.py b/bsie/utils/namespaces.py
index a29fc1b..4a66048 100644
--- a/bsie/utils/namespaces.py
+++ b/bsie/utils/namespaces.py
@@ -1,26 +1,37 @@
"""Default namespaces used throughout BSIE.
-
-Part of the bsie module.
-A copy of the license is provided with the project.
-Author: Matthias Baumgartner, 2022
"""
-# imports
+# standard imports
import typing
# inner-module imports
from . import bsfs as _bsfs
-# constants
-bse = _bsfs.Namespace('http://bsfs.ai/schema/Entity')
-bsfs = _bsfs.Namespace('http://bsfs.ai/schema', fsep='/')
-bsm = _bsfs.Namespace('http://bsfs.ai/schema/Meta')
-xsd = _bsfs.Namespace('http://www.w3.org/2001/XMLSchema')
+# generic namespaces
+xsd = _bsfs.Namespace('http://www.w3.org/2001/XMLSchema')()
+
+# core bsfs/bsie namespaces
+bsfs = _bsfs.Namespace('https://schema.bsfs.io/core')
+bsie = _bsfs.Namespace('https://schema.bsfs.io/ie')
+
+# auxiliary namespaces
+bsd = bsie.distance()
+bse = bsie.Node.Entity()
+bsf = bsie.Literal.Array.Feature
+bsl = bsfs.Literal
+bsn = bsie.Node
+bsp = bsie.Node.Preview()
# export
__all__: typing.Sequence[str] = (
+ 'bsd',
'bse',
+ 'bsf',
'bsfs',
- 'bsm',
+ 'bsie',
+ 'bsl',
+ 'bsl',
+ 'bsn',
+ 'bsp',
'xsd',
)
diff --git a/bsie/utils/node.py b/bsie/utils/node.py
index ecf39cd..fa34b2e 100644
--- a/bsie/utils/node.py
+++ b/bsie/utils/node.py
@@ -1,10 +1,6 @@
"""Lighweight Node to bridge to BSFS.
-
-Part of the bsie module.
-A copy of the license is provided with the project.
-Author: Matthias Baumgartner, 2022
"""
-# imports
+# standard imports
import typing
# bsie imports
@@ -19,30 +15,47 @@ __all__: typing.Sequence[str] = (
## code ##
class Node():
- """Lightweight Node, disconnected from any bsfs structures."""
+ """Lightweight Node, disconnected from any bsfs structures.
+
+ In most cases, provide *hints* and leave setting the uri to a node
+ naming policy. Only provide an *uri* if it is absolutely determined.
+
+ """
# node type.
node_type: bsfs.URI
# node URI.
- uri: bsfs.URI
+ uri: typing.Optional[bsfs.URI]
+
+ # node naming hints.
+ hits: dict
def __init__(
self,
node_type: bsfs.URI,
- uri: bsfs.URI,
+ uri: typing.Optional[bsfs.URI] = None,
+ **uri_hints,
):
# assign members
self.node_type = bsfs.URI(node_type)
- self.uri = bsfs.URI(uri)
+ self.hints = uri_hints
+ self.uri = uri
def __eq__(self, other: typing.Any) -> bool:
+ """Compare two Node instances based on type and uri.
+ Compares hits only if the uri is not yet specified.
+ """
return isinstance(other, Node) \
and other.node_type == self.node_type \
- and other.uri == self.uri
+ and other.uri == self.uri \
+ and (self.uri is not None or self.hints == other.hints)
def __hash__(self) -> int:
- return hash((type(self), self.node_type, self.uri))
+ identifier = self.uri
+ if identifier is None:
+ identifier = tuple((key, self.hints[key]) for key in sorted(self.hints))
+ return hash((type(self), self.node_type, identifier))
def __str__(self) -> str:
return f'{bsfs.typename(self)}({self.node_type}, {self.uri})'