diff options
author | Matthias Baumgartner <dev@igsor.net> | 2023-01-13 09:49:10 +0100 |
---|---|---|
committer | Matthias Baumgartner <dev@igsor.net> | 2023-01-13 09:49:10 +0100 |
commit | 9c366758665d9cfee7796ee45a8167a5412ae9ae (patch) | |
tree | b42e0a1fd4b1bd59fc31fad6267b83c2dc9a3a3b /tagit/parsing | |
parent | 8f2f697f7ed52b7e1c7a17411b2de526b6490691 (diff) | |
download | tagit-9c366758665d9cfee7796ee45a8167a5412ae9ae.tar.gz tagit-9c366758665d9cfee7796ee45a8167a5412ae9ae.tar.bz2 tagit-9c366758665d9cfee7796ee45a8167a5412ae9ae.zip |
filter early port, parsing adaptions
Diffstat (limited to 'tagit/parsing')
-rw-r--r-- | tagit/parsing/__init__.py | 8 | ||||
-rw-r--r-- | tagit/parsing/filter.py (renamed from tagit/parsing/search.py) | 169 | ||||
-rw-r--r-- | tagit/parsing/sort.py | 17 |
3 files changed, 75 insertions, 119 deletions
diff --git a/tagit/parsing/__init__.py b/tagit/parsing/__init__.py index 1c431a4..0070bf9 100644 --- a/tagit/parsing/__init__.py +++ b/tagit/parsing/__init__.py @@ -6,14 +6,14 @@ Author: Matthias Baumgartner, 2022 """ # inner-module imports from .datefmt import parse_datetime -from .search import ast_from_string -from .sort import sort_from_string +from .filter import Filter +from .sort import Sort # exports __all__ = ( - 'ast_from_string', + 'Filter', + 'Sort', 'parse_datetime', - 'sort_from_string', ) ## EOF ## diff --git a/tagit/parsing/search.py b/tagit/parsing/filter.py index 10d0e7c..ea8df51 100644 --- a/tagit/parsing/search.py +++ b/tagit/parsing/filter.py @@ -1,7 +1,7 @@ """User-specified search query parsing. >>> q = "has mime / tag in (november, october) / ! Apfel / time < 10.10.2004 / iso in (100, 200)" ->>> ast = ast_from_string(q) +>>> ast = filter_from_string(q) Part of the tagit module. A copy of the license is provided with the project. @@ -14,37 +14,29 @@ from datetime import datetime from pyparsing import CaselessKeyword, Combine, Group, Optional, Or, Word, delimitedList, nums, oneOf, ParseException, Literal, QuotedString, alphanums, alphas8bit, punc8bit # tagit imports -from tagit.utils import errors, ttime +from tagit.utils import bsfs, errors, ns, ttime +from tagit.utils.bsfs import ast # inner-module imports -from . import datefmt - -# exports -__all__ = ( - 'ast_from_string', - ) +from .datefmt import parse_datetime # constants SEARCH_DELIM = '/' VALUE_DELIM = ',' -DEFAULT_PREDICATE = 'tag' +# exports +__all__ = ( + 'Filter', + ) -## code ## -class SearchParser(): +## code ## - # valid predicates per type - _PREDICATES_CATEGORICAL = None - _PREDICATES_CONTINUOUS = None - _PREDICATES_DATETIME = None +class Filter(): # parsers - _CATEGORICAL = None - _CONTINUOUS = None - _EXISTENCE = None + _DATETIME_PREDICATES = None _QUERY = None - _TAG = None def __init__(self, schema: bsfs.schema.Schema): self.schema = schema @@ -61,9 +53,6 @@ class SearchParser(): def build_parser(self): """ """ - # The *predicate* argument is for compatibility with predicate listener. - # It's not actually used here. - # valid predicates per type, as supplied by tagit.library # FIXME: # * range / type constraints @@ -79,9 +68,21 @@ class SearchParser(): > Target: Entity (allow others?) -> rfds:domain > Require: searchable as specified in backend AND user-searchable as specified in frontend """ - self._PREDICATES_CATEGORICAL = self.schema.predicates(searchable=True, range=self.schema.tm.categorical) # FIXME! - self._PREDICATES_CONTINUOUS = self.schema.predicates(searchable=True, range=self.schema.tm.numerical) # FIXME! - self._PREDICATES_DATETIME = self.schema.predicates(searchable=True, range=self.schema.tm.datetime) # FIXME! + # all relevant predicates + predicates = {pred for pred in self.schema.predicates() if pred.domain <= self.schema.node(ns.bsfs.Entity)} + # filter through accept/reject lists + ... # FIXME + # shortcuts + self._abb2uri = {pred.uri.fragment: pred.uri for pred in predicates} # FIXME: tie-breaking for duplicates + self._uri2abb = {uri: fragment for fragment, uri in self._abb2uri.items()} + # all predicates + _PREDICATES = {self._uri2abb[pred.uri] for pred in predicates} + # numeric predicates + _PREDICATES_NUMERIC = {self._uri2abb[pred.uri] for pred in predicates if isinstance(pred.range, bsfs.schema.Literal) and pred.range <= self.schema.literal(ns.bsfs.Number)} # FIXME: type check might become unnecessary + # datetime predicates + self._DATETIME_PREDICATES = {pred.uri for pred in predicates if isinstance(pred.range, bsfs.schema.Literal) and pred.range <= self.schema.literal(ns.bsfs.Time)} # FIXME: type check might become unnecessary + _PREDICATES_DATETIME = {self._uri2abb[pred] for pred in self._DATETIME_PREDICATES} + # terminal symbols number = Group(Optional(oneOf('- +')) \ @@ -93,11 +94,11 @@ class SearchParser(): # FIXME: Non-ascii characters # predicates - predicate = Or([CaselessKeyword(p) for p in self._PREDICATES_CATEGORICAL]).setResultsName( + predicate = Or([CaselessKeyword(p) for p in _PREDICATES]).setResultsName( 'predicate') - date_predicate = Or([CaselessKeyword(p) for p in self._PREDICATES_DATETIME]).setResultsName( + date_predicate = Or([CaselessKeyword(p) for p in _PREDICATES_DATETIME]).setResultsName( 'predicate') - num_predicate = Or([CaselessKeyword(p) for p in self._PREDICATES_CONTINUOUS]).setResultsName( + num_predicate = Or([CaselessKeyword(p) for p in _PREDICATES_NUMERIC]).setResultsName( 'predicate') # existence @@ -106,7 +107,7 @@ class SearchParser(): PREDICATE := [predicate] """ op = (CaselessKeyword('has') ^ CaselessKeyword('has no') ^ CaselessKeyword('has not')).setResultsName('op') - self._EXISTENCE = Group(op + predicate).setResultsName('existence') + _EXISTENCE = Group(op + predicate).setResultsName('existence') # continuous @@ -127,7 +128,7 @@ class SearchParser(): bclose = oneOf(') ] [').setResultsName('bclose') bopen = oneOf('( [ ]').setResultsName('bopen') op = Or([':', '=', 'in']).setResultsName('op') - datefmt = datefmt.parse_datetime.DATETIME + datefmt = parse_datetime.DATETIME rngn = num_predicate + op + bopen + number('lo') + rsepn + number('hi') + bclose ^ \ num_predicate + op + bopen + rsepn + number('hi') + bclose ^ \ num_predicate + op + bopen + number('lo') + rsepn + bclose @@ -143,7 +144,7 @@ class SearchParser(): datefmt('vleft') + cmp('cleft') + date_predicate ^ \ datefmt('vleft') + cmp('cleft') + date_predicate + cmp('cright') + datefmt('vright') # combined - self._CONTINUOUS = Group( + _CONTINUOUS = Group( Group(eqn).setResultsName('eq') ^ Group(eqd).setResultsName('eq') ^ Group(rngn).setResultsName('range') ^ \ @@ -161,7 +162,7 @@ class SearchParser(): """ op = (CaselessKeyword('in') ^ CaselessKeyword('not in') ^ ':' ^ '=' ^ '!=' ^ '~' ^ '!~').setResultsName('op') value = delimitedList(words, delim=VALUE_DELIM).setResultsName('value') - self._CATEGORICAL = Group(predicate + op + ('(' + value + ')' | value) ).setResultsName('categorical') + _CATEGORICAL = Group(predicate + op + ('(' + value + ')' | value) ).setResultsName('categorical') # tag shortcuts @@ -173,35 +174,17 @@ class SearchParser(): """ op = oneOf('! ~ !~').setResultsName('op') value = delimitedList(words, delim=VALUE_DELIM).setResultsName('value') - self._TAG = Group(Optional(op) + '(' + value + ')' ^ Optional(op) + value).setResultsName('tag') + _TAG = Group(Optional(op) + '(' + value + ')' ^ Optional(op) + value).setResultsName('tag') # overall query """ QUERY := QUERY / QUERY | EXPR """ - self._QUERY = delimitedList(self._EXISTENCE | self._CONTINUOUS | self._CATEGORICAL | self._TAG, delim=SEARCH_DELIM) + self._QUERY = delimitedList(_EXISTENCE | _CONTINUOUS | _CATEGORICAL | _TAG, delim=SEARCH_DELIM) return self - def __del__(self): - if self._QUERY is not None: # remove listener - try: - self.predicates.ignore(self.build_parser) - except ImportError: - # The import fails if python is shutting down. - # In that case, the ignore becomes unnecessary anyway. - pass - def __call__(self, search): - # FIXME: mb/port/parsing - #if self._QUERY is None: - # # parsers were not initialized yet - # self.build_parser() - # # attach listener to receive future updates - # self.predicates.listen(self.build_parser) - # # FIXME: Additional filters would be handy - # #self.predicates.listen(self.build_parser, self.predicates.scope.library) - try: parsed = self._QUERY.parseString(search, parseAll=True) except ParseException as e: @@ -211,61 +194,58 @@ class SearchParser(): tokens = [] for exp in parsed: if exp.getName() == 'existence': + pred = self._abb2uri[exp.predicate.lower()] if 'op' not in exp: # prevented by grammar raise errors.ParserError('Missing operator', exp) elif exp.op == 'has': - cond = ast.Existence() + tok = ast.filter.Has(pred) elif exp.op in ('has no', 'has not'): - cond = ast.Inexistence() + tok = ast.filter.Not(ast.filter.Has(pred)) else: # prevented by grammar raise errors.ParserError('Invalid operator ({})'.format(exp.op), exp) - - tokens.append( - ast.Token(exp.predicate.lower(), cond)) + tokens.append(tok) elif exp.getName() == 'categorical': + pred = self._abb2uri[exp.predicate.lower()] + approx = False values = [s.strip() for s in exp.value] if 'op' not in exp: # prevented by grammar raise errors.ParserError('Missing operator', exp) - elif exp.op in (':', '=', 'in'): - cond = ast.SetInclude(values) - elif exp.op in ('!=', 'not in'): - cond = ast.SetExclude(values) - elif exp.op == '~': - cond = ast.SetInclude(values, approximate=True) - elif exp.op == '!~': - cond = ast.SetExclude(values, approximate=True) + if exp.op in ('~' '!~'): + approx = True + if exp.op in (':', '=', '~', 'in'): + tok = ast.filter.Any(pred, ast.filter.Includes(*values, approx=approx)) + elif exp.op in ('!=', '!~', 'not in'): + tok = ast.filter.All(pred, ast.filter.Excludes(*values, approx=approx)) else: # prevented by grammar raise errors.ParserError('Invalid operator ({})'.format(exp.op), exp) - - tokens.append( - ast.Token(exp.predicate.lower(), cond)) + tokens.append(tok) elif exp.getName() == 'tag': values = [s.strip() for s in exp.value] if 'op' not in exp: - cond = ast.SetInclude(values) + outer = ast.filter.Any + cond = ast.filter.Includes(*values) elif exp.op == '~': - cond = ast.SetInclude(values, approximate=True) + outer = ast.filter.Any + cond = ast.filter.Includes(*values, approx=True) elif exp.op == '!': - cond = ast.SetExclude(values) + outer = ast.filter.All + cond = ast.filter.Excludes(*values) elif exp.op == '!~': - cond = ast.SetExclude(values, approximate=True) + outer = ast.filter.All + cond = ast.filter.Excludes(*values, approx=True) else: # prevented by grammar raise errors.ParserError('Invalid operator ({})'.format(exp.op), exp) + tokens.append(outer(ns.bse.tag, ast.filter.Any(ns.bst.label, cond))) - tokens.append( - ast.Token(DEFAULT_PREDICATE, cond)) - - elif exp.getName() == 'continuous': - + elif exp.getName() == 'continuous': # FIXME: simplify and adapt bsfs.query.ast.filter.Between accordingly! lo, hi = None, None lo_inc, hi_inc = False, False predicate = None - if 'eq' in exp: # equation style - predicate = exp.eq.predicate.lower() + predicate = self._abb2uri[exp.eq.predicate.lower()] if ('>' in exp.eq.cleft and '<' in exp.eq.cright) or \ ('<' in exp.eq.cleft and '>' in exp.eq.cright) or \ @@ -294,7 +274,7 @@ class SearchParser(): lo_inc = hi_inc = True elif 'range' in exp: # value in [lo:hi] - predicate = exp.range.predicate.lower() + predicate = self._abb2uri[exp.range.predicate.lower()] if 'lo' in exp.range: lo = exp.range.lo @@ -307,7 +287,7 @@ class SearchParser(): raise errors.ParserError('Expression is neither a range nor an equation', exp) # interpret values - if predicate in set([p.lower() for p in self._PREDICATES_DATETIME]): + if predicate in self._DATETIME_PREDICATES: # turn into datetime lo, lfmt = datefmt.guess_datetime(lo) if lo is not None else (None, None) @@ -357,7 +337,8 @@ class SearchParser(): raise errors.ParserError('Lower bound must not exceed upper bound', (lo, hi)) tokens.append( - ast.Token(predicate, ast.TimeRange(lo, hi, lo_inc, hi_inc))) + ast.filter.Any(predicate, + ast.filter.Between(lo, hi, not lo_inc, not hi_inc))) else: # date specification # Check consistency @@ -368,7 +349,8 @@ class SearchParser(): raise errors.ParserError('Lower bound must not exceed upper bound', (lo, hi)) tokens.append( - ast.Token(predicate, ast.Datetime(lo, hi, lo_inc, hi_inc))) + ast.filter.Any(predicate, + ast.filter.Between(lo, hi, not lo_inc, not hi_inc))) else: # number predicate @@ -379,27 +361,14 @@ class SearchParser(): if not (lo < hi or (lo == hi and lo_inc and hi_inc)): raise errors.ParserError('Lower bound must not exceed upper bound', (lo, hi)) + # FIXME: mb/port: Three times the same code... optimize tokens.append( - ast.Token(predicate, ast.Continuous(lo, hi, lo_inc, hi_inc))) + ast.filter.Any(predicate, + ast.filter.Between(lo, hi, not lo_inc, not hi_inc))) else: # prevented by grammar raise errors.ParserError('Invalid expression', exp) - return ast.AND(tokens) - - - -"""Default SearchParser instance. - -To produce an ast, call - ->>> ast_from_string(search) - -Convenience shortcut for - ->>> SearchParser().parse(search) - -""" -ast_from_string = SearchParser(predicates) + return ast.filter.And(tokens) ## EOF ## diff --git a/tagit/parsing/sort.py b/tagit/parsing/sort.py index 8950613..75fa36c 100644 --- a/tagit/parsing/sort.py +++ b/tagit/parsing/sort.py @@ -12,13 +12,13 @@ from tagit.utils import errors, Struct # exports __all__ = ( - 'sort_from_string', + 'Sort', ) ## code ## -class SortParser(): +class Sort(): """Sort parser. A sort string can be as simple as a predicate, but also allows @@ -176,17 +176,4 @@ class SortParser(): else: return ast.Order(*tokens) -"""Default SortParser instance. - -To produce an ast, call - ->>> sort_from_string(sort) - -Convenience shortcut for - ->>> SortParser().parse(sort) - -""" -sort_from_string = SortParser(sortkeys) - ## EOF ## |