From 52fa64513dae60c3ed410622502f8c2369c1a348 Mon Sep 17 00:00:00 2001 From: Matthias Baumgartner Date: Fri, 13 Jan 2023 10:14:18 +0100 Subject: moved filter parsing code --- tagit/parsing/__init__.py | 4 +- tagit/parsing/filter.py | 374 ------------------------------------ tagit/parsing/filter/__init__.py | 17 ++ tagit/parsing/filter/from_string.py | 372 +++++++++++++++++++++++++++++++++++ tagit/utils/errors.py | 4 + tagit/widgets/session.py | 2 +- 6 files changed, 396 insertions(+), 377 deletions(-) delete mode 100644 tagit/parsing/filter.py create mode 100644 tagit/parsing/filter/__init__.py create mode 100644 tagit/parsing/filter/from_string.py (limited to 'tagit') diff --git a/tagit/parsing/__init__.py b/tagit/parsing/__init__.py index 0070bf9..86ad54f 100644 --- a/tagit/parsing/__init__.py +++ b/tagit/parsing/__init__.py @@ -6,12 +6,12 @@ Author: Matthias Baumgartner, 2022 """ # inner-module imports from .datefmt import parse_datetime -from .filter import Filter +from . import filter from .sort import Sort # exports __all__ = ( - 'Filter', + 'filter', 'Sort', 'parse_datetime', ) diff --git a/tagit/parsing/filter.py b/tagit/parsing/filter.py deleted file mode 100644 index ea8df51..0000000 --- a/tagit/parsing/filter.py +++ /dev/null @@ -1,374 +0,0 @@ -"""User-specified search query parsing. - ->>> q = "has mime / tag in (november, october) / ! Apfel / time < 10.10.2004 / iso in (100, 200)" ->>> ast = filter_from_string(q) - -Part of the tagit module. -A copy of the license is provided with the project. -Author: Matthias Baumgartner, 2022 -""" -# standard imports -from datetime import datetime - -# external imports -from pyparsing import CaselessKeyword, Combine, Group, Optional, Or, Word, delimitedList, nums, oneOf, ParseException, Literal, QuotedString, alphanums, alphas8bit, punc8bit - -# tagit imports -from tagit.utils import bsfs, errors, ns, ttime -from tagit.utils.bsfs import ast - -# inner-module imports -from .datefmt import parse_datetime - -# constants -SEARCH_DELIM = '/' -VALUE_DELIM = ',' - -# exports -__all__ = ( - 'Filter', - ) - - -## code ## - -class Filter(): - - # parsers - _DATETIME_PREDICATES = None - _QUERY = None - - def __init__(self, schema: bsfs.schema.Schema): - self.schema = schema - - @property - def schema(self) -> bsfs.schema.Schema: - return self._schema - - @schema.setter - def schema(self, schema: bsfs.schema.Schema): - self._schema = schema - self.build_parser() - - def build_parser(self): - """ - """ - # valid predicates per type, as supplied by tagit.library - # FIXME: - # * range / type constraints - # * how to filter predicates - # * distinguish between prefix and label - """ - Categorical: string, float, integer; labelled node (tag, group); maybe boolean - Continuous: float, integer - Datetime: datetime - Existencial: all of the above, particularly bool; unllabelled node (preview) - -> rfds:range - - > Target: Entity (allow others?) -> rfds:domain - > Require: searchable as specified in backend AND user-searchable as specified in frontend - """ - # all relevant predicates - predicates = {pred for pred in self.schema.predicates() if pred.domain <= self.schema.node(ns.bsfs.Entity)} - # filter through accept/reject lists - ... # FIXME - # shortcuts - self._abb2uri = {pred.uri.fragment: pred.uri for pred in predicates} # FIXME: tie-breaking for duplicates - self._uri2abb = {uri: fragment for fragment, uri in self._abb2uri.items()} - # all predicates - _PREDICATES = {self._uri2abb[pred.uri] for pred in predicates} - # numeric predicates - _PREDICATES_NUMERIC = {self._uri2abb[pred.uri] for pred in predicates if isinstance(pred.range, bsfs.schema.Literal) and pred.range <= self.schema.literal(ns.bsfs.Number)} # FIXME: type check might become unnecessary - # datetime predicates - self._DATETIME_PREDICATES = {pred.uri for pred in predicates if isinstance(pred.range, bsfs.schema.Literal) and pred.range <= self.schema.literal(ns.bsfs.Time)} # FIXME: type check might become unnecessary - _PREDICATES_DATETIME = {self._uri2abb[pred] for pred in self._DATETIME_PREDICATES} - - - # terminal symbols - number = Group(Optional(oneOf('- +')) \ - + Combine(Word(nums) + Optional('.' + Optional(Word(nums))))) - words = QuotedString(quoteChar='"', escChar='\\') \ - ^ QuotedString(quoteChar="'", escChar='\\') \ - ^ Word(alphanums + alphas8bit + punc8bit + ' *#%&-.:;<=>?@^_`{}~') - # FIXME: allow escaped chars "( ) / , [ ]" - # FIXME: Non-ascii characters - - # predicates - predicate = Or([CaselessKeyword(p) for p in _PREDICATES]).setResultsName( - 'predicate') - date_predicate = Or([CaselessKeyword(p) for p in _PREDICATES_DATETIME]).setResultsName( - 'predicate') - num_predicate = Or([CaselessKeyword(p) for p in _PREDICATES_NUMERIC]).setResultsName( - 'predicate') - - # existence - """ - EXPR := has PREDICATE | has no PREDICATE - PREDICATE := [predicate] - """ - op = (CaselessKeyword('has') ^ CaselessKeyword('has no') ^ CaselessKeyword('has not')).setResultsName('op') - _EXISTENCE = Group(op + predicate).setResultsName('existence') - - - # continuous - """ - EXPR := PREDICATE CMP VALUE | VALUE CMP PREDICATE CMP VALUE | PREDICATE OP RANGE - PREDICATE := [predicate] - CMP := < | <= | = | >= | > - OP := : | = | in | not in - RANGE := BOPEN VALUE RSEP VALUE BCLOSE | BOPEN RSEP VALUE BLOSE | BOPEN VALUE RSEP BCLOSE - BOPEN := ( | [ | ] - BCLOSE := ) | ] | [ - RSEP := : | - - VALUE := [digit] | [date] - """ - # range style - rsepn = oneOf(': -') - rsepd = Literal('-') - bclose = oneOf(') ] [').setResultsName('bclose') - bopen = oneOf('( [ ]').setResultsName('bopen') - op = Or([':', '=', 'in']).setResultsName('op') - datefmt = parse_datetime.DATETIME - rngn = num_predicate + op + bopen + number('lo') + rsepn + number('hi') + bclose ^ \ - num_predicate + op + bopen + rsepn + number('hi') + bclose ^ \ - num_predicate + op + bopen + number('lo') + rsepn + bclose - rngd = date_predicate + op + bopen + datefmt('lo') + rsepd + datefmt('hi') + bclose ^ \ - date_predicate + op + bopen + rsepd + datefmt('hi') + bclose ^ \ - date_predicate + op + bopen + datefmt('lo') + rsepd + bclose - # equation style - cmp = oneOf('< <= = >= >').setResultsName('cmp') - eqn = num_predicate + cmp('cright') + number('vright') ^ \ - number('vleft') + cmp('cleft') + num_predicate ^ \ - number('vleft') + cmp('cleft') + num_predicate + cmp('cright') + number('vright') - eqd = date_predicate + cmp('cright') + datefmt('vright') ^ \ - datefmt('vleft') + cmp('cleft') + date_predicate ^ \ - datefmt('vleft') + cmp('cleft') + date_predicate + cmp('cright') + datefmt('vright') - # combined - _CONTINUOUS = Group( - Group(eqn).setResultsName('eq') ^ - Group(eqd).setResultsName('eq') ^ - Group(rngn).setResultsName('range') ^ \ - Group(rngd).setResultsName('range') \ - ).setResultsName('continuous') - - - # categorical - """ - EXPR := PREDICATE OP VALUE | PREDICATE OP (VALUE) - PREDICATE := [predicate] - OP := : | = | in | not in | != | ~ | !~ - VALUE := TERM | VALUE, - TERM := [word] - """ - op = (CaselessKeyword('in') ^ CaselessKeyword('not in') ^ ':' ^ '=' ^ '!=' ^ '~' ^ '!~').setResultsName('op') - value = delimitedList(words, delim=VALUE_DELIM).setResultsName('value') - _CATEGORICAL = Group(predicate + op + ('(' + value + ')' | value) ).setResultsName('categorical') - - - # tag shortcuts - """ - EXPR := OP VALUE | OP (VALUE) | VALUE | (VALUE) - OP := ! | ~ | !~ - VALUE := TERM | VALUE, - TERM := [word] - """ - op = oneOf('! ~ !~').setResultsName('op') - value = delimitedList(words, delim=VALUE_DELIM).setResultsName('value') - _TAG = Group(Optional(op) + '(' + value + ')' ^ Optional(op) + value).setResultsName('tag') - - - # overall query - """ - QUERY := QUERY / QUERY | EXPR - """ - self._QUERY = delimitedList(_EXISTENCE | _CONTINUOUS | _CATEGORICAL | _TAG, delim=SEARCH_DELIM) - return self - - def __call__(self, search): - try: - parsed = self._QUERY.parseString(search, parseAll=True) - except ParseException as e: - raise errors.ParserError('Cannot parse query', e) - - # convert to AST - tokens = [] - for exp in parsed: - if exp.getName() == 'existence': - pred = self._abb2uri[exp.predicate.lower()] - if 'op' not in exp: # prevented by grammar - raise errors.ParserError('Missing operator', exp) - elif exp.op == 'has': - tok = ast.filter.Has(pred) - elif exp.op in ('has no', 'has not'): - tok = ast.filter.Not(ast.filter.Has(pred)) - else: # prevented by grammar - raise errors.ParserError('Invalid operator ({})'.format(exp.op), exp) - tokens.append(tok) - - elif exp.getName() == 'categorical': - pred = self._abb2uri[exp.predicate.lower()] - approx = False - values = [s.strip() for s in exp.value] - if 'op' not in exp: # prevented by grammar - raise errors.ParserError('Missing operator', exp) - if exp.op in ('~' '!~'): - approx = True - if exp.op in (':', '=', '~', 'in'): - tok = ast.filter.Any(pred, ast.filter.Includes(*values, approx=approx)) - elif exp.op in ('!=', '!~', 'not in'): - tok = ast.filter.All(pred, ast.filter.Excludes(*values, approx=approx)) - else: # prevented by grammar - raise errors.ParserError('Invalid operator ({})'.format(exp.op), exp) - tokens.append(tok) - - elif exp.getName() == 'tag': - values = [s.strip() for s in exp.value] - if 'op' not in exp: - outer = ast.filter.Any - cond = ast.filter.Includes(*values) - elif exp.op == '~': - outer = ast.filter.Any - cond = ast.filter.Includes(*values, approx=True) - elif exp.op == '!': - outer = ast.filter.All - cond = ast.filter.Excludes(*values) - elif exp.op == '!~': - outer = ast.filter.All - cond = ast.filter.Excludes(*values, approx=True) - else: # prevented by grammar - raise errors.ParserError('Invalid operator ({})'.format(exp.op), exp) - tokens.append(outer(ns.bse.tag, ast.filter.Any(ns.bst.label, cond))) - - elif exp.getName() == 'continuous': # FIXME: simplify and adapt bsfs.query.ast.filter.Between accordingly! - lo, hi = None, None - lo_inc, hi_inc = False, False - predicate = None - if 'eq' in exp: - # equation style - predicate = self._abb2uri[exp.eq.predicate.lower()] - - if ('>' in exp.eq.cleft and '<' in exp.eq.cright) or \ - ('<' in exp.eq.cleft and '>' in exp.eq.cright) or \ - (exp.eq.cleft == '=' and exp.eq.cright == '='): - # x > pred < y or x < pred > y or x = pred = y - raise errors.ParserError('Cannot have two lower or two upper bounds', exp) - - if '>' in exp.eq.cleft: - hi = exp.eq.vleft - hi_inc = '=' in exp.eq.cleft - elif '<' in exp.eq.cleft: - lo = exp.eq.vleft - lo_inc = '=' in exp.eq.cleft - elif exp.eq.cleft == '=': - hi = lo = exp.eq.vleft - lo_inc = hi_inc = True - - if '>' in exp.eq.cright: - lo = exp.eq.vright - lo_inc = '=' in exp.eq.cright - elif '<' in exp.eq.cright: - hi = exp.eq.vright - hi_inc = '=' in exp.eq.cright - elif exp.eq.cright == '=': - hi = lo = exp.eq.vright - lo_inc = hi_inc = True - - elif 'range' in exp: # value in [lo:hi] - predicate = self._abb2uri[exp.range.predicate.lower()] - - if 'lo' in exp.range: - lo = exp.range.lo - lo_inc = exp.range.bopen == '[' - if 'hi' in exp.range: - hi = exp.range.hi - hi_inc = exp.range.bclose == ']' - - else: # prevented by grammar - raise errors.ParserError('Expression is neither a range nor an equation', exp) - - # interpret values - if predicate in self._DATETIME_PREDICATES: - - # turn into datetime - lo, lfmt = datefmt.guess_datetime(lo) if lo is not None else (None, None) - hi, hfmt = datefmt.guess_datetime(hi) if hi is not None else (None, None) - - if lo is None and hi is None: # prevented by grammar - raise errors.ParserError('At least one bound must be present', exp) - - # turn the query into the format lo <= pred < hi by adjusting the boundaries - if hi == lo and lo_inc and hi_inc: - # example: pred = 2012 -> 1.1.2012 <= pred < 1.1.2013 - hi = datefmt.increment(lo, lfmt) - lo_inc = True - hi_inc = False - else: - if lo is not None: - # example: pred >= 2012 -> pred >= 1.1.2012, 00:00 - lo = datefmt.increment(lo, lfmt) if not lo_inc else lo - lo_inc = True - - if hi is not None: - # example: pred <= 2012 -> pred < 1.1.2013, 00:00 - hi = datefmt.increment(hi, hfmt) if hi_inc else hi - hi_inc = False - - # build the ast node - if (lo is not None and lfmt.is_time()) or (hi is not None and hfmt.is_time()): - # time specification - - if (lo is not None and not lfmt.is_time()) or \ - (hi is not None and not hfmt.is_time()): - # lo/hi must both be time specifications - raise errors.ParserError('Both bounds must be a time specification', (lo, hi)) - - if lo is None: - # example: pred < 5 am -> 0 <= pred < 05:00 - lo = ttime.from_timestamp_loc(0) - lo_inc = True - - if hi is None: - # example: pred > 5 am -> 06:00 <= pred <= 24:00 - hi = ttime.from_timestamp_loc(3600 * 24) - hi_inc = True - - # Check consistency - if not (lo < hi or (lo == hi and lo_inc and hi_inc)): - raise errors.ParserError('Lower bound must not exceed upper bound', (lo, hi)) - - tokens.append( - ast.filter.Any(predicate, - ast.filter.Between(lo, hi, not lo_inc, not hi_inc))) - - else: # date specification - # Check consistency - lo = lo if lo is not None else datetime.min - hi = hi if hi is not None else datetime.max - - if not (lo < hi or (lo == hi and lo_inc and hi_inc)): - raise errors.ParserError('Lower bound must not exceed upper bound', (lo, hi)) - - tokens.append( - ast.filter.Any(predicate, - ast.filter.Between(lo, hi, not lo_inc, not hi_inc))) - - else: - # number predicate - lo = float(''.join(lo)) if lo is not None else float('-inf') - hi = float(''.join(hi)) if hi is not None else float('inf') - - # Check consistency - if not (lo < hi or (lo == hi and lo_inc and hi_inc)): - raise errors.ParserError('Lower bound must not exceed upper bound', (lo, hi)) - - # FIXME: mb/port: Three times the same code... optimize - tokens.append( - ast.filter.Any(predicate, - ast.filter.Between(lo, hi, not lo_inc, not hi_inc))) - - else: # prevented by grammar - raise errors.ParserError('Invalid expression', exp) - - return ast.filter.And(tokens) - -## EOF ## diff --git a/tagit/parsing/filter/__init__.py b/tagit/parsing/filter/__init__.py new file mode 100644 index 0000000..88b6256 --- /dev/null +++ b/tagit/parsing/filter/__init__.py @@ -0,0 +1,17 @@ +""" + +Part of the tagit module. +A copy of the license is provided with the project. +Author: Matthias Baumgartner, 2022 +""" +# inner-module imports +from .from_string import FromString +#from .to_string import ToString + +# exports +__all__ = ( + 'FromString', + #'ToString', + ) + +## EOF ## diff --git a/tagit/parsing/filter/from_string.py b/tagit/parsing/filter/from_string.py new file mode 100644 index 0000000..5a38723 --- /dev/null +++ b/tagit/parsing/filter/from_string.py @@ -0,0 +1,372 @@ +"""User-specified search query parsing. + +>>> q = "has mime / tag in (november, october) / ! Apfel / time < 10.10.2004 / iso in (100, 200)" +>>> ast = filter_from_string(q) + +Part of the tagit module. +A copy of the license is provided with the project. +Author: Matthias Baumgartner, 2022 +""" +# standard imports +from datetime import datetime + +# external imports +from pyparsing import CaselessKeyword, Combine, Group, Optional, Or, Word, delimitedList, nums, oneOf, ParseException, Literal, QuotedString, alphanums, alphas8bit, punc8bit + +# tagit imports +from tagit.parsing.datefmt import parse_datetime +from tagit.utils import bsfs, errors, ns, ttime +from tagit.utils.bsfs import ast + +# constants +SEARCH_DELIM = '/' +VALUE_DELIM = ',' + +# exports +__all__ = ( + 'FromString', + ) + + +## code ## + +class FromString(): + + # parsers + _DATETIME_PREDICATES = None + _QUERY = None + + def __init__(self, schema: bsfs.schema.Schema): + self.schema = schema + + @property + def schema(self) -> bsfs.schema.Schema: + return self._schema + + @schema.setter + def schema(self, schema: bsfs.schema.Schema): + self._schema = schema + self.build_parser() + + def build_parser(self): + """ + """ + # valid predicates per type, as supplied by tagit.library + # FIXME: + # * range / type constraints + # * how to filter predicates + # * distinguish between prefix and label + """ + Categorical: string, float, integer; labelled node (tag, group); maybe boolean + Continuous: float, integer + Datetime: datetime + Existencial: all of the above, particularly bool; unllabelled node (preview) + -> rfds:range + + > Target: Entity (allow others?) -> rfds:domain + > Require: searchable as specified in backend AND user-searchable as specified in frontend + """ + # all relevant predicates + predicates = {pred for pred in self.schema.predicates() if pred.domain <= self.schema.node(ns.bsfs.Entity)} + # filter through accept/reject lists + ... # FIXME + # shortcuts + self._abb2uri = {pred.uri.fragment: pred.uri for pred in predicates} # FIXME: tie-breaking for duplicates + self._uri2abb = {uri: fragment for fragment, uri in self._abb2uri.items()} + # all predicates + _PREDICATES = {self._uri2abb[pred.uri] for pred in predicates} + # numeric predicates + _PREDICATES_NUMERIC = {self._uri2abb[pred.uri] for pred in predicates if isinstance(pred.range, bsfs.schema.Literal) and pred.range <= self.schema.literal(ns.bsfs.Number)} # FIXME: type check might become unnecessary + # datetime predicates + self._DATETIME_PREDICATES = {pred.uri for pred in predicates if isinstance(pred.range, bsfs.schema.Literal) and pred.range <= self.schema.literal(ns.bsfs.Time)} # FIXME: type check might become unnecessary + _PREDICATES_DATETIME = {self._uri2abb[pred] for pred in self._DATETIME_PREDICATES} + + + # terminal symbols + number = Group(Optional(oneOf('- +')) \ + + Combine(Word(nums) + Optional('.' + Optional(Word(nums))))) + words = QuotedString(quoteChar='"', escChar='\\') \ + ^ QuotedString(quoteChar="'", escChar='\\') \ + ^ Word(alphanums + alphas8bit + punc8bit + ' *#%&-.:;<=>?@^_`{}~') + # FIXME: allow escaped chars "( ) / , [ ]" + # FIXME: Non-ascii characters + + # predicates + predicate = Or([CaselessKeyword(p) for p in _PREDICATES]).setResultsName( + 'predicate') + date_predicate = Or([CaselessKeyword(p) for p in _PREDICATES_DATETIME]).setResultsName( + 'predicate') + num_predicate = Or([CaselessKeyword(p) for p in _PREDICATES_NUMERIC]).setResultsName( + 'predicate') + + # existence + """ + EXPR := has PREDICATE | has no PREDICATE + PREDICATE := [predicate] + """ + op = (CaselessKeyword('has') ^ CaselessKeyword('has no') ^ CaselessKeyword('has not')).setResultsName('op') + _EXISTENCE = Group(op + predicate).setResultsName('existence') + + + # continuous + """ + EXPR := PREDICATE CMP VALUE | VALUE CMP PREDICATE CMP VALUE | PREDICATE OP RANGE + PREDICATE := [predicate] + CMP := < | <= | = | >= | > + OP := : | = | in | not in + RANGE := BOPEN VALUE RSEP VALUE BCLOSE | BOPEN RSEP VALUE BLOSE | BOPEN VALUE RSEP BCLOSE + BOPEN := ( | [ | ] + BCLOSE := ) | ] | [ + RSEP := : | - + VALUE := [digit] | [date] + """ + # range style + rsepn = oneOf(': -') + rsepd = Literal('-') + bclose = oneOf(') ] [').setResultsName('bclose') + bopen = oneOf('( [ ]').setResultsName('bopen') + op = Or([':', '=', 'in']).setResultsName('op') + datefmt = parse_datetime.DATETIME + rngn = num_predicate + op + bopen + number('lo') + rsepn + number('hi') + bclose ^ \ + num_predicate + op + bopen + rsepn + number('hi') + bclose ^ \ + num_predicate + op + bopen + number('lo') + rsepn + bclose + rngd = date_predicate + op + bopen + datefmt('lo') + rsepd + datefmt('hi') + bclose ^ \ + date_predicate + op + bopen + rsepd + datefmt('hi') + bclose ^ \ + date_predicate + op + bopen + datefmt('lo') + rsepd + bclose + # equation style + cmp = oneOf('< <= = >= >').setResultsName('cmp') + eqn = num_predicate + cmp('cright') + number('vright') ^ \ + number('vleft') + cmp('cleft') + num_predicate ^ \ + number('vleft') + cmp('cleft') + num_predicate + cmp('cright') + number('vright') + eqd = date_predicate + cmp('cright') + datefmt('vright') ^ \ + datefmt('vleft') + cmp('cleft') + date_predicate ^ \ + datefmt('vleft') + cmp('cleft') + date_predicate + cmp('cright') + datefmt('vright') + # combined + _CONTINUOUS = Group( + Group(eqn).setResultsName('eq') ^ + Group(eqd).setResultsName('eq') ^ + Group(rngn).setResultsName('range') ^ \ + Group(rngd).setResultsName('range') \ + ).setResultsName('continuous') + + + # categorical + """ + EXPR := PREDICATE OP VALUE | PREDICATE OP (VALUE) + PREDICATE := [predicate] + OP := : | = | in | not in | != | ~ | !~ + VALUE := TERM | VALUE, + TERM := [word] + """ + op = (CaselessKeyword('in') ^ CaselessKeyword('not in') ^ ':' ^ '=' ^ '!=' ^ '~' ^ '!~').setResultsName('op') + value = delimitedList(words, delim=VALUE_DELIM).setResultsName('value') + _CATEGORICAL = Group(predicate + op + ('(' + value + ')' | value) ).setResultsName('categorical') + + + # tag shortcuts + """ + EXPR := OP VALUE | OP (VALUE) | VALUE | (VALUE) + OP := ! | ~ | !~ + VALUE := TERM | VALUE, + TERM := [word] + """ + op = oneOf('! ~ !~').setResultsName('op') + value = delimitedList(words, delim=VALUE_DELIM).setResultsName('value') + _TAG = Group(Optional(op) + '(' + value + ')' ^ Optional(op) + value).setResultsName('tag') + + + # overall query + """ + QUERY := QUERY / QUERY | EXPR + """ + self._QUERY = delimitedList(_EXISTENCE | _CONTINUOUS | _CATEGORICAL | _TAG, delim=SEARCH_DELIM) + return self + + def __call__(self, search): + try: + parsed = self._QUERY.parseString(search, parseAll=True) + except ParseException as e: + raise errors.ParserError('Cannot parse query', e) + + # convert to AST + tokens = [] + for exp in parsed: + if exp.getName() == 'existence': + pred = self._abb2uri[exp.predicate.lower()] + if 'op' not in exp: # prevented by grammar + raise errors.ParserError('Missing operator', exp) + elif exp.op == 'has': + tok = ast.filter.Has(pred) + elif exp.op in ('has no', 'has not'): + tok = ast.filter.Not(ast.filter.Has(pred)) + else: # prevented by grammar + raise errors.ParserError('Invalid operator ({})'.format(exp.op), exp) + tokens.append(tok) + + elif exp.getName() == 'categorical': + pred = self._abb2uri[exp.predicate.lower()] + approx = False + values = [s.strip() for s in exp.value] + if 'op' not in exp: # prevented by grammar + raise errors.ParserError('Missing operator', exp) + if exp.op in ('~' '!~'): + approx = True + if exp.op in (':', '=', '~', 'in'): + tok = ast.filter.Any(pred, ast.filter.Includes(*values, approx=approx)) + elif exp.op in ('!=', '!~', 'not in'): + tok = ast.filter.All(pred, ast.filter.Excludes(*values, approx=approx)) + else: # prevented by grammar + raise errors.ParserError('Invalid operator ({})'.format(exp.op), exp) + tokens.append(tok) + + elif exp.getName() == 'tag': + values = [s.strip() for s in exp.value] + if 'op' not in exp: + outer = ast.filter.Any + cond = ast.filter.Includes(*values) + elif exp.op == '~': + outer = ast.filter.Any + cond = ast.filter.Includes(*values, approx=True) + elif exp.op == '!': + outer = ast.filter.All + cond = ast.filter.Excludes(*values) + elif exp.op == '!~': + outer = ast.filter.All + cond = ast.filter.Excludes(*values, approx=True) + else: # prevented by grammar + raise errors.ParserError('Invalid operator ({})'.format(exp.op), exp) + tokens.append(outer(ns.bse.tag, ast.filter.Any(ns.bst.label, cond))) + + elif exp.getName() == 'continuous': # FIXME: simplify and adapt bsfs.query.ast.filter.Between accordingly! + lo, hi = None, None + lo_inc, hi_inc = False, False + predicate = None + if 'eq' in exp: + # equation style + predicate = self._abb2uri[exp.eq.predicate.lower()] + + if ('>' in exp.eq.cleft and '<' in exp.eq.cright) or \ + ('<' in exp.eq.cleft and '>' in exp.eq.cright) or \ + (exp.eq.cleft == '=' and exp.eq.cright == '='): + # x > pred < y or x < pred > y or x = pred = y + raise errors.ParserError('Cannot have two lower or two upper bounds', exp) + + if '>' in exp.eq.cleft: + hi = exp.eq.vleft + hi_inc = '=' in exp.eq.cleft + elif '<' in exp.eq.cleft: + lo = exp.eq.vleft + lo_inc = '=' in exp.eq.cleft + elif exp.eq.cleft == '=': + hi = lo = exp.eq.vleft + lo_inc = hi_inc = True + + if '>' in exp.eq.cright: + lo = exp.eq.vright + lo_inc = '=' in exp.eq.cright + elif '<' in exp.eq.cright: + hi = exp.eq.vright + hi_inc = '=' in exp.eq.cright + elif exp.eq.cright == '=': + hi = lo = exp.eq.vright + lo_inc = hi_inc = True + + elif 'range' in exp: # value in [lo:hi] + predicate = self._abb2uri[exp.range.predicate.lower()] + + if 'lo' in exp.range: + lo = exp.range.lo + lo_inc = exp.range.bopen == '[' + if 'hi' in exp.range: + hi = exp.range.hi + hi_inc = exp.range.bclose == ']' + + else: # prevented by grammar + raise errors.ParserError('Expression is neither a range nor an equation', exp) + + # interpret values + if predicate in self._DATETIME_PREDICATES: + + # turn into datetime + lo, lfmt = datefmt.guess_datetime(lo) if lo is not None else (None, None) + hi, hfmt = datefmt.guess_datetime(hi) if hi is not None else (None, None) + + if lo is None and hi is None: # prevented by grammar + raise errors.ParserError('At least one bound must be present', exp) + + # turn the query into the format lo <= pred < hi by adjusting the boundaries + if hi == lo and lo_inc and hi_inc: + # example: pred = 2012 -> 1.1.2012 <= pred < 1.1.2013 + hi = datefmt.increment(lo, lfmt) + lo_inc = True + hi_inc = False + else: + if lo is not None: + # example: pred >= 2012 -> pred >= 1.1.2012, 00:00 + lo = datefmt.increment(lo, lfmt) if not lo_inc else lo + lo_inc = True + + if hi is not None: + # example: pred <= 2012 -> pred < 1.1.2013, 00:00 + hi = datefmt.increment(hi, hfmt) if hi_inc else hi + hi_inc = False + + # build the ast node + if (lo is not None and lfmt.is_time()) or (hi is not None and hfmt.is_time()): + # time specification + + if (lo is not None and not lfmt.is_time()) or \ + (hi is not None and not hfmt.is_time()): + # lo/hi must both be time specifications + raise errors.ParserError('Both bounds must be a time specification', (lo, hi)) + + if lo is None: + # example: pred < 5 am -> 0 <= pred < 05:00 + lo = ttime.from_timestamp_loc(0) + lo_inc = True + + if hi is None: + # example: pred > 5 am -> 06:00 <= pred <= 24:00 + hi = ttime.from_timestamp_loc(3600 * 24) + hi_inc = True + + # Check consistency + if not (lo < hi or (lo == hi and lo_inc and hi_inc)): + raise errors.ParserError('Lower bound must not exceed upper bound', (lo, hi)) + + tokens.append( + ast.filter.Any(predicate, + ast.filter.Between(lo, hi, not lo_inc, not hi_inc))) + + else: # date specification + # Check consistency + lo = lo if lo is not None else datetime.min + hi = hi if hi is not None else datetime.max + + if not (lo < hi or (lo == hi and lo_inc and hi_inc)): + raise errors.ParserError('Lower bound must not exceed upper bound', (lo, hi)) + + tokens.append( + ast.filter.Any(predicate, + ast.filter.Between(lo, hi, not lo_inc, not hi_inc))) + + else: + # number predicate + lo = float(''.join(lo)) if lo is not None else float('-inf') + hi = float(''.join(hi)) if hi is not None else float('inf') + + # Check consistency + if not (lo < hi or (lo == hi and lo_inc and hi_inc)): + raise errors.ParserError('Lower bound must not exceed upper bound', (lo, hi)) + + # FIXME: mb/port: Three times the same code... optimize + tokens.append( + ast.filter.Any(predicate, + ast.filter.Between(lo, hi, not lo_inc, not hi_inc))) + + else: # prevented by grammar + raise errors.ParserError('Invalid expression', exp) + + return ast.filter.And(tokens) + +## EOF ## diff --git a/tagit/utils/errors.py b/tagit/utils/errors.py index 7a2556e..8b5e21a 100644 --- a/tagit/utils/errors.py +++ b/tagit/utils/errors.py @@ -53,4 +53,8 @@ class ParserError(Exception): """String parsing failure.""" pass +class BackendError(Exception): + """Generic backend error.""" + pass + ## EOF ## diff --git a/tagit/widgets/session.py b/tagit/widgets/session.py index ca8c595..f45ab35 100644 --- a/tagit/widgets/session.py +++ b/tagit/widgets/session.py @@ -40,7 +40,7 @@ class Session(Widget): self.storage = storage self.log = log # derived members - self.filter_from_string = parsing.Filter(self.storage.schema) + self.filter_from_string = parsing.filter.FromString(self.storage.schema) #self.sort_from_string = parsing.Sort(self.storage.schema) # FIXME: mb/port/parsing def __enter__(self): -- cgit v1.2.3