diff options
author | Matthias Baumgartner <dev@igsor.net> | 2023-02-02 10:04:03 +0100 |
---|---|---|
committer | Matthias Baumgartner <dev@igsor.net> | 2023-02-02 10:04:03 +0100 |
commit | c6856aa6fe2ad478dd5bc6285fb2544c150b2033 (patch) | |
tree | b084d75afbca13c34f2b71b609fd2c63a160522d /tagit/parsing | |
parent | 57327d3df562736cad9e278e13beeb55bf3b52ed (diff) | |
download | tagit-c6856aa6fe2ad478dd5bc6285fb2544c150b2033.tar.gz tagit-c6856aa6fe2ad478dd5bc6285fb2544c150b2033.tar.bz2 tagit-c6856aa6fe2ad478dd5bc6285fb2544c150b2033.zip |
filter port
Diffstat (limited to 'tagit/parsing')
-rw-r--r-- | tagit/parsing/filter/__init__.py | 4 | ||||
-rw-r--r-- | tagit/parsing/filter/from_string.py | 20 | ||||
-rw-r--r-- | tagit/parsing/filter/to_string.py | 255 |
3 files changed, 272 insertions, 7 deletions
diff --git a/tagit/parsing/filter/__init__.py b/tagit/parsing/filter/__init__.py index 88b6256..defb332 100644 --- a/tagit/parsing/filter/__init__.py +++ b/tagit/parsing/filter/__init__.py @@ -6,12 +6,12 @@ Author: Matthias Baumgartner, 2022 """ # inner-module imports from .from_string import FromString -#from .to_string import ToString +from .to_string import ToString # exports __all__ = ( 'FromString', - #'ToString', + 'ToString', ) ## EOF ## diff --git a/tagit/parsing/filter/from_string.py b/tagit/parsing/filter/from_string.py index 5a38723..ed24f63 100644 --- a/tagit/parsing/filter/from_string.py +++ b/tagit/parsing/filter/from_string.py @@ -16,7 +16,7 @@ from pyparsing import CaselessKeyword, Combine, Group, Optional, Or, Word, delim # tagit imports from tagit.parsing.datefmt import parse_datetime from tagit.utils import bsfs, errors, ns, ttime -from tagit.utils.bsfs import ast +from tagit.utils.bsfs import ast, URI # constants SEARCH_DELIM = '/' @@ -36,6 +36,9 @@ class FromString(): _DATETIME_PREDICATES = None _QUERY = None + # current schema. + schema: bsfs.schema.Schema + def __init__(self, schema: bsfs.schema.Schema): self.schema = schema @@ -51,7 +54,6 @@ class FromString(): def build_parser(self): """ """ - # valid predicates per type, as supplied by tagit.library # FIXME: # * range / type constraints # * how to filter predicates @@ -74,11 +76,11 @@ class FromString(): self._abb2uri = {pred.uri.fragment: pred.uri for pred in predicates} # FIXME: tie-breaking for duplicates self._uri2abb = {uri: fragment for fragment, uri in self._abb2uri.items()} # all predicates - _PREDICATES = {self._uri2abb[pred.uri] for pred in predicates} + _PREDICATES = {self._uri2abb[pred.uri] for pred in predicates} | {'id', 'group'} # FIXME: properly document additions # numeric predicates - _PREDICATES_NUMERIC = {self._uri2abb[pred.uri] for pred in predicates if isinstance(pred.range, bsfs.schema.Literal) and pred.range <= self.schema.literal(ns.bsfs.Number)} # FIXME: type check might become unnecessary + _PREDICATES_NUMERIC = {self._uri2abb[pred.uri] for pred in predicates if pred.range <= self.schema.literal(ns.bsfs.Number)} # datetime predicates - self._DATETIME_PREDICATES = {pred.uri for pred in predicates if isinstance(pred.range, bsfs.schema.Literal) and pred.range <= self.schema.literal(ns.bsfs.Time)} # FIXME: type check might become unnecessary + self._DATETIME_PREDICATES = {pred.uri for pred in predicates if pred.range <= self.schema.literal(ns.bsfs.Time)} _PREDICATES_DATETIME = {self._uri2abb[pred] for pred in self._DATETIME_PREDICATES} @@ -203,6 +205,14 @@ class FromString(): raise errors.ParserError('Invalid operator ({})'.format(exp.op), exp) tokens.append(tok) + elif exp.getName() == 'categorical' and exp.predicate.lower() == 'id': + values = [URI(s.strip()) for s in exp.value] + tokens.append(ast.filter.IsIn(*values)) + + elif exp.getName() == 'categorical' and exp.predicate.lower() == 'group': + values = [URI(s.strip()) for s in exp.value] + tokens.append(ast.filter.Any(ns.bse.group, ast.filter.IsIn(*values))) + elif exp.getName() == 'categorical': pred = self._abb2uri[exp.predicate.lower()] approx = False diff --git a/tagit/parsing/filter/to_string.py b/tagit/parsing/filter/to_string.py new file mode 100644 index 0000000..0b1a3e1 --- /dev/null +++ b/tagit/parsing/filter/to_string.py @@ -0,0 +1,255 @@ +""" + +Part of the tagit module. +A copy of the license is provided with the project. +Author: Matthias Baumgartner, 2022 +""" +# tagit imports +from tagit.utils.bsfs import ast, matcher, URI +from tagit.utils import errors, ns + +# exports +__all__ = ('ToString', ) + + +## code ## + +class ToString(): + + def __init__(self, schema): + self.matches = matcher.Filter() + + self.schema = schema + predicates = {pred for pred in self.schema.predicates() if pred.domain <= self.schema.node(ns.bsfs.Entity)} + # shortcuts + self._abb2uri = {pred.uri.fragment: pred.uri for pred in predicates} # FIXME: tie-breaking for duplicates + self._uri2abb = {uri: fragment for fragment, uri in self._abb2uri.items()} + + def __call__(self, query): + """ + """ + # FIXME: test query class type + if self.matches(query, ast.filter.And(matcher.Rest())): + return ' / '.join(self._parse(sub) for sub in query) + return self._parse(query) + + def _parse(self, query): + cases = ( + self._has, + self._entity, + self._group, + self._tag, + self._range, + self._categorical, + ) + for clbk in cases: + result = clbk(query) + if result is not None: + return result + + raise errors.BackendError() + + def _has(self, query): + # Has(<pred>) <-> has <pred> + # Not(Has(<pred>)) <-> has no <pred> + has = ast.filter.Has( + matcher.Partial(ast.filter.Predicate), + ast.filter.GreaterThan(1, strict=False)) + if self.matches(query, has): + # FIXME: guard against predicate mismatch + return f'has {self._uri2abb[query.predicate.predicate]}' + if self.matches(query, ast.filter.Not(has)): + # FIXME: guard against predicate mismatch + return f'has no {self._uri2abb[query.predicate.predicate]}' + return None + + def _categorical(self, query): + if not isinstance(query, ast.filter._Branch): + return None + + # shortcuts + expr = query.expr + pred = self._uri2abb.get(query.predicate.predicate, None) + if pred is None: + return None + + # positive constraints + if isinstance(query, ast.filter.Any): + # approximate positive constraint + # Any(<pred>, Includes(<values>, approx=True)) -> pred ~ ("...", ...) + if self.matches(expr, matcher.Partial(ast.filter.Substring)): + return f'{pred} ~ {expr.value}' + if self.matches(expr, ast.filter.Or(matcher.Rest(matcher.Partial(ast.filter.Substring)))): + values = '", "'.join(sub.value for sub in expr) + return f'{pred} ~ ("{values}")' + + # exact positive constraint + # ast.filter.Any(<pred>, ast.filter.Includes(<values>, approx=False)) -> pred = ("...", ...) + if self.matches(expr, matcher.Partial(ast.filter.Equals)): + return f'{pred} = {expr.value}' + if self.matches(query, ast.filter.Or(matcher.Rest(matcher.Partial(ast.filter.Equals)))): + values = '", "'.join(sub.value for sub in expr) + return f'{pred} = ("{values}")' + + # negative constraints + if isinstance(query, ast.filter.All): + # approximate negative constraint + # ast.filter.All(<pred>, ast.filter.Excludes(<values>, approx=True)) -> pred !~ ("...", ...) + if self.matches(query, ast.filter.Not(matcher.Partial(ast.filter.Substring))): + return f'{pred} !~ "{expr.value}"' + if self.matches(query, ast.filter.Not(ast.filter.Or(matcher.Rest(matcher.Partial(ast.filter.Substring))))): + values = '", "'.join(sub.value for sub in expr) + return f'{pred} !~ ("{values}")' + + # exact negative constraint + # ast.filter.All(<pred>, ast.filter.Excludes(<values>, approx=False)) -> pred != ("...", ...) + if self.matches(query, ast.filter.Not(matcher.Partial(ast.filter.Equals))): + return f'{pred} != "{expr.value}"' + if self.matches(query, ast.filter.Not(ast.filter.Or(matcher.Rest(matcher.Partial(ast.filter.Equals))))): + values = '", "'.join(sub.value for sub in expr) + return f'{pred} != ("{values}")' + + return None + + def _tag(self, query): + # positive constraint + # ast.filter.Any(ns.bse.tag, ast.filter.Any(ns.bst.label, ast.filter.Includes(..., approx=?))) <-> "...", ...; ~ "...", ... + if self.matches(query, ast.filter.Any(ns.bse.tag, ast.filter.Any(ns.bst.label, matcher.Any()))): + expr = query.expr.expr + # approximate positive constraint + if self.matches(expr, matcher.Partial(ast.filter.Substring)): + return f'~ {expr.value}' + if self.matches(expr, ast.filter.Or(matcher.Rest(matcher.Partial(ast.filter.Substring)))): + values = '", "'.join(sub.value for sub in expr) + return f'~ "{values}"' + # exact positive constraint + if self.matches(expr, matcher.Partial(ast.filter.Equals)): + return f'{expr.value}' + if self.matches(expr, ast.filter.Or(matcher.Rest(matcher.Partial(ast.filter.Equals)))): + values = '", "'.join(sub.value for sub in expr) + return f'"{values}"' + + # negative constraint + # ast.filter.All(ns.bse.tag, ast.filter.Any(ns.bst.label, ast.filter.Excludes(..., approx=?))) <-> ! "...", ... ; !~ "...", ... + if self.matches(query, ast.filter.All(ns.bse.tag, ast.filter.Any(ns.bst.label, ast.filter.Not(matcher.Any())))): + expr = query.expr.expr.expr + # approximate negative constraint + if self.matches(expr, matcher.Partial(ast.filter.Substring)): + return f'!~ {expr.value}' + if self.matches(expr, ast.filter.Or(matcher.Rest(matcher.Partial(ast.filter.Substring)))): + values = '", "'.join(sub.value for sub in expr) + return f'!~ "{values}"' + # exact negative constraint + if self.matches(expr, matcher.Partial(ast.filter.Equals)): + return f'! {expr.value}' + if self.matches(expr, ast.filter.Or(matcher.Rest(matcher.Partial(ast.filter.Equals)))): + values = '", "'.join(sub.value for sub in expr) + return f'! "{values}"' + + return None + + def _range(self, query): + # FIXME: handle dates and times! + # FIXME: use default/configurable separators from from_string + if not isinstance(query, ast.filter.Any): + return None + + expr = query.expr + pred = self._uri2abb.get(query.predicate.predicate, None) + if pred is None: + return None + + if self.matches(expr, matcher.Partial(ast.filter.Equals)): + return f'{pred} = {expr.value}' + if self.matches(expr, matcher.Partial(ast.filter.GreaterThan, strict=True)): + return f'{pred} > {expr.threshold}' + if self.matches(expr, matcher.Partial(ast.filter.GreaterThan, strict=False)): + return f'{pred} >= {expr.threshold}' + if self.matches(expr, matcher.Partial(ast.filter.LessThan, strict=True)): + return f'{pred} < {expr.threshold}' + if self.matches(expr, matcher.Partial(ast.filter.LessThan, strict=False)): + return f'{pred} <= {expr.threshold}' + if self.matches(expr, ast.filter.And( + matcher.Partial(ast.filter.GreaterThan), + matcher.Partial(ast.filter.LessThan))): + lo, hi = list(expr) + if self.matches(lo, matcher.Partial(ast.filter.LessThan)): + lo, hi = hi, lo + b_open = '(' if lo.strict else '[' + b_close = ')' if hi.strict else ']' + return f'{pred} = {b_open}{lo.threshold} - {hi.threshold}{b_close}' + """ + ast.filter.Any(<pred>, ast.filter.Between(lo, hi, lo_strict, hi_strict)) + pred <? hi + pred >? hi + pred = [lo, hi] + pred = (lo, hi) + pred = [lo, hi) + pred = (lo, hi] + """ + return None + + def _entity(self, query): + # defaults + negated = False + guids = set() + + def get_guids(value): + if isinstance(value, URI): + return {value} + else: # elif isinstance(query.value, Nodes): + return set(value.guids) + + if self.matches(query, matcher.Partial(ast.filter.Is)): + guids = get_guids(query.value) + elif self.matches(query, ast.filter.Or(matcher.Rest(matcher.Partial(ast.filter.Is)))): + guids = {guid for sub in query for guid in get_guids(sub.value) } + elif self.matches(query, ast.filter.Not(matcher.Partial(ast.filter.Is))): + negated = True + guids = get_guids(query.value) + elif self.matches(query, ast.filter.Not(ast.filter.Or(matcher.Rest(matcher.Partial(ast.filter.Is))))): + negated = True + guids = {guid for sub in query for guid in get_guids(sub.value) } + + if len(guids) == 0: + # no matches + return None + # some matches + cmp = 'not in' if negated else 'in' + values = '", "'.join(guids) + return f'id {cmp} "{values}"' + + def _group(self, query): + # ast.filter.Any(ns.bse.group, ast.filter.Is(...)) <-> group = ("...", ...) + if not self.matches(query, ast.filter.Any(ns.bse.group, matcher.Any())): + return None + + def get_guids(value): + if isinstance(value, URI): + return {value} + else: # elif isinstance(query.value, Nodes): + return set(value.guids) + + expr = query.expr + guids = set() + negated = False + + if self.matches(expr, matcher.Partial(ast.filter.Is)): + guids = get_guids(expr.value) + elif self.matches(expr, ast.filter.Or(matcher.Rest(matcher.Partial(ast.filter.Is)))): + guids = {guid for sub in expr for guid in get_guids(sub.value) } + elif self.matches(expr, ast.filter.Not(matcher.Partial(ast.filter.Is))): + negated = True + guids = get_guids(expr.value) + elif self.matches(expr, ast.filter.Not(ast.filter.Or(matcher.Rest(matcher.Partial(ast.filter.Is))))): + negated = True + guids = {guid for sub in expr for guid in get_guids(sub.value) } + + if len(guids) == 0: # no matches + return None + # some matches + cmp = 'not in' if negated else 'in' + values = '", "'.join(guids) + return f'group {cmp} "{values}"' + +## EOF ## |