diff options
-rw-r--r-- | tagit/parsing/__init__.py | 19 | ||||
-rw-r--r-- | tagit/parsing/datefmt.py | 568 | ||||
-rw-r--r-- | tagit/parsing/search.py | 405 | ||||
-rw-r--r-- | tagit/parsing/sort.py | 192 | ||||
-rw-r--r-- | tagit/utils/errors.py | 20 | ||||
-rw-r--r-- | test/parsing/__init__.py | 0 | ||||
-rw-r--r-- | test/parsing/test_datefmt.py | 378 | ||||
-rw-r--r-- | test/parsing/test_search.py | 707 | ||||
-rw-r--r-- | test/parsing/test_sort.py | 96 |
9 files changed, 2377 insertions, 8 deletions
diff --git a/tagit/parsing/__init__.py b/tagit/parsing/__init__.py new file mode 100644 index 0000000..1c431a4 --- /dev/null +++ b/tagit/parsing/__init__.py @@ -0,0 +1,19 @@ +""" + +Part of the tagit module. +A copy of the license is provided with the project. +Author: Matthias Baumgartner, 2022 +""" +# inner-module imports +from .datefmt import parse_datetime +from .search import ast_from_string +from .sort import sort_from_string + +# exports +__all__ = ( + 'ast_from_string', + 'parse_datetime', + 'sort_from_string', + ) + +## EOF ## diff --git a/tagit/parsing/datefmt.py b/tagit/parsing/datefmt.py new file mode 100644 index 0000000..49de1c0 --- /dev/null +++ b/tagit/parsing/datefmt.py @@ -0,0 +1,568 @@ +"""Parse and interpret date strings. + +Consider the following date notations (DMY=04.11.2012): + +DMY 04.11.12 europe +YMD 12.11.04 iso +MDY 11.04.12 US +YDM 12.04.11 reverse US +DYM 04.12.11 too uncommon, ignored +MYD 11.12.04 too uncommon, ignored + +There's the general problem of ambiguity between the DMY and MDY formats. +Here, we give precedence to the DMY format. + +Note that the MDY format can still be used in unambiguous settings or +with the month spelled out, e.g. "2012, 23th of November" + +Similarly, consider the following shortened date notations: + +DM 04.11 europe, current year +MY 11.12 quarters +YM 12.11 quarters +MD 11.04 us, current year +DY 23.12 too uncommon, ignored +YD 12.23 too uncommon, ignored + +In addition to the different spellings, month names can be spelled out +and the string can be cluttered with additional common words. + +Part of the tagit module. +A copy of the license is provided with the project. +Author: Matthias Baumgartner, 2022 +""" +# standard imports +from collections import Counter +from datetime import date as ddate, time as dtime, datetime, timedelta +from math import floor + +# external imports +from dateutil.relativedelta import relativedelta +from pyparsing import Combine, Group, Literal, Optional, Or, Word, nums, oneOf, ParseException + +# tagit imports +from tagit.utils import errors, Struct, flatten + +# exports +__all__ = ( + # default format strings + 'DATE_FMT', + 'TIME_FMT', + 'DATETIME_FMT', + # exceptions + 'DateParserError', + 'TimeParserError', + 'DateFormatError' + # parsing + 'parse_datetime', + 'guess_datetime', + # postprocessing + 'increment', + ) + +## constants ## +"""Default strftime format strings.""" +DATE_FMT = '%d.%m.%Y' +TIME_FMT = '%H:%M' +DATETIME_FMT = '%d.%m.%Y, %H:%M' + +# Literal months +MONTH_LIT = { + 'Jan' : 1, + 'January' : 1, + 'Feb' : 2, + 'February' : 2, + 'Mar' : 3, + 'March' : 3, + 'Apr' : 4, + 'April' : 4, + 'May' : 5, + 'Jun' : 6, + 'June' : 6, + 'Jul' : 7, + 'July' : 7, + 'Aug' : 8, + 'August' : 8, + 'Sep' : 9, + 'September' : 9, + 'Oct' : 10, + 'October' : 10, + 'Nov' : 11, + 'November' : 11, + 'Dec' : 12, + 'December' : 12, + } + + +## code ## + +class DatefmtError(errors.ParserError): pass + +class DateParserError(DatefmtError): pass + +class TimeParserError(DatefmtError): pass + +class DateFormatError(DatefmtError): pass + +class DF(str): + """date/time user-supplied format.""" + # indicator characters, highest to lowest. + _valid_chars = "YMDhmsn" + # explicit mapping from unit to character + year = 'Y' + month = 'M' + day = 'D' + hour = 'h' + minute = 'm' + second = 's' + microsecond = 'n' + + def valid(self): + return len(self) and len(set(self._valid_chars) & set(self)) + + def lsb(self): + """Smallest unit specified.""" + if not self.valid(): + raise DateFormatError( + 'An empty date format string has no least significant position.', self) + + return [i for i in self._valid_chars if i in self][-1] + + def msb(self): + """Highest unit specified.""" + if not self.valid(): + raise DateFormatError( + 'An empty date format string has no most significant position.', self) + + return [i for i in self._valid_chars if i in self][0] + + def is_time(self): + """Return true if only a time (hour/minute/second/ms) was specified.""" + return True if self.valid() and self.msb() not in 'YMD' else False + + def is_date(self): + """Return true if only a date (year/month/day) was specified.""" + return True if self.valid() and self.lsb() not in 'hmsn' else False + +# priorities +PRIORITIES_INT = { + 'p2': [ + DF(DF.day + DF.month), # DM + DF(DF.month + DF.year), # MY + DF(DF.year + DF.month), # YM + DF(DF.month + DF.day), # MD + DF(DF.day + DF.year), # DY + DF(DF.year + DF.day), # YD + ], + 'p3': [ + DF(DF.day + DF.month + DF.year), # DMY + DF(DF.year + DF.month + DF.day), # YMD + DF(DF.month + DF.day + DF.year), # MDY + DF(DF.year + DF.day + DF.month), # YDM + DF(DF.day + DF.year + DF.month), # DYM + DF(DF.month + DF.year + DF.day), # MYD + ] + } + +PRIORITIES_US = { + 'p2': [ + DF(DF.month + DF.day), + DF(DF.year + DF.month), + DF(DF.day + DF.month), + DF(DF.month + DF.year), + DF(DF.day + DF.year), + DF(DF.year + DF.day), + ], + 'p3': [ + DF(DF.month + DF.day + DF.year), + DF(DF.year + DF.day + DF.month), + DF(DF.day + DF.month + DF.year), + DF(DF.year + DF.month + DF.day), + DF(DF.day + DF.year + DF.month), + DF(DF.month + DF.year + DF.day), + ] + } + +def guess_date(tokens, priorities=None): + """Guess the date from a string in an unknown format. + + The method uses the following clues to guess the meaning of each part: + * 4-digits implies it's a year + * 1-digit discards year (since it's more common to write 04 instead of 4 as a shorthand to 2004 + * Literal month + * 'of' is preceeded by day and succeeded by the month + * Any of (st, nd, rd, th) on a number makes it a day + * Number > 12 can't be a month + * Number > 31 can't be a day + * Date inexistence (e.g. 29.02.2018) + * precedence DMY > YMD > MDY > YDM + * precedence DM > MY > YM > MD + """ + priorities = PRIORITIES_INT if priorities is None else priorities + + # We need to figure out which token corresponds to which component + # (D, M, Y). Since this is ambiguous, guesswork is needed. We do so + # by eliminating impossible options. + + # initially, all three components are viable + guesses = [Struct(tok=tok.strip(), fmt=DF.year + DF.month + DF.day) for tok in tokens] + + # check indicators for specific formats + for idx in range(len(guesses)): + tok, options = guesses[idx].tok, guesses[idx].fmt + + if len(tok) == 1 and tok in '.,;': + # delimiter tokens can be ignored + guesses[idx].fmt = '' + elif tok == 'of': + # an 'of' token indicates a 'day of month' structure + guesses[idx-1].fmt = DF.day + guesses[idx+1].fmt = DF.month + guesses[idx].fmt = '' + elif tok[-2:] in ('st', 'nd', 'rd', 'th'): + # suffix indicates a day + guesses[idx].fmt = DF.day + guesses[idx].tok = tok[:-2] + elif len(tok) == 4 and tok.isdigit(): + # four digits must be a year + guesses[idx].fmt = DF.year + elif tok in MONTH_LIT: + # spelled out month is - tadaaa - a month + guesses[idx].tok = str(MONTH_LIT[tok]) + guesses[idx].fmt = DF.month + + # remove jitter (of, delimiters) + guesses = [itm for itm in guesses if len(itm.fmt) > 0] + + # eliminate impossible options + for idx in range(len(guesses)): + tok, options = guesses[idx].tok, guesses[idx].fmt + + if len(tok) == 1: + # can't be a year + guesses[idx].fmt = guesses[idx].fmt.replace(DF.year, '') + if tok.isdigit() and int(tok) > 12: + # can't be a month + guesses[idx].fmt = guesses[idx].fmt.replace(DF.month, '') + if tok.isdigit() and int(tok) > 31: + # can't be a day + guesses[idx].fmt = guesses[idx].fmt.replace(DF.day, '') + + # define helper function + def create_date(year, month, day): + """Return a datetime for the given components or None if that is not possible.""" + # check format + if DF.year not in year.fmt or DF.month not in month.fmt or DF.day not in day.fmt: + return None + + if len(str(year.tok)) == 2: + # ten years into the future is still the current century, otherwise the previous one + threshold = ddate.today().year + 10 - 2000 + year = Struct( + tok='20'+str(year.tok) if int(year.tok) < threshold else '19'+str(year.tok), + fmt=year.fmt + ) + + try: + # create date + return ddate(year=int(year.tok), month=int(month.tok), day=int(day.tok)) + except ValueError: + return None + + # placeholders for unspecified tokens + pyear = Struct(tok=ddate.today().year, fmt=DF.year) + pday = Struct(tok=1, fmt=DF.day) + pmon = Struct(tok=1, fmt=DF.month) + + if len(guesses) == 1: # one-part date (Y) + itm = guesses[0] + date = create_date(itm, pmon, pday) + if date is not None: + return date, DF(DF.year) + else: + raise DateParserError('Two-digit date format must contain the year') + + elif len(guesses) == 2: # two-part date (DM, MY, YM, MD) + fst, snd = guesses + # check components + if len(set(fst.fmt + snd.fmt)) < 2: + raise DateParserError('Invalid two-digit date format') + + if len(fst.fmt) == 1 and len(snd.fmt) == 1: # fully determined + date = { + DF.year: pyear, + DF.month: pmon, + DF.day: pday, + } + date.update({ + fst.fmt: fst, + snd.fmt: snd, + }) + return create_date(date[DF.year], date[DF.month], date[DF.day]), DF(fst.fmt + snd.fmt) + + # walk through prioritized formats + formats = { + DF(DF.day + DF.month): create_date(pyear, snd, fst), # DM + DF(DF.month + DF.year): create_date(snd, fst, pday), # MY + DF(DF.year + DF.month): create_date(fst, snd, pday), # YM + DF(DF.month + DF.day): create_date(pyear, fst, snd), # MD + DF(DF.day + DF.year): create_date(snd, pmon, fst), # DY + DF(DF.year + DF.day): create_date(fst, pmon, snd), # YD + } + + for fmt in priorities['p2']: + if formats.get(fmt, None) is not None: + return formats[fmt], fmt + + raise DateParserError('Cannot guess roles of a two-digit date format') + + elif len(guesses) == 3: # three-part date (DMY, YMD, MDY, YMD) + + # eliminate options based on uniqueness of component assignment + changed = True + while changed: + # resolved guesses: item has only one possible component option + resolved = set([itm.fmt for itm in guesses if len(itm.fmt) == 1]) + # single choice: component has only one possible position + unique = {comp for comp, freq in + Counter(flatten([set(itm.fmt) for itm in guesses])).items() + if freq == 1} + # assume no changes + changed = False + for itm in guesses: + if unique & set(itm.fmt) and not set(itm.fmt).issubset(unique): + # itm is the only option for one component + itm.fmt = DF(''.join(unique & set(itm.fmt))) + changed = True + elif resolved & set(itm.fmt) and not set(itm.fmt).issubset(resolved): + # itm contains options that already taken by a different item + itm.fmt = itm.fmt.translate(str.maketrans('', '', ''.join(resolved))) + changed = True + + fst, snd, trd = guesses + + # check components + if len(set(fst.fmt + snd.fmt + trd.fmt)) < 3: + raise DateParserError('Invalid three-digit date format') + + if len(fst.fmt) == 1 and len(snd.fmt) == 1 and len(trd.fmt) == 1: # fully determined + date = { + fst.fmt: fst, + snd.fmt: snd, + trd.fmt: trd, + } + return (create_date(date[DF.year], date[DF.month], date[DF.day]), + DF(fst.fmt + snd.fmt + trd.fmt)) + + # walk through prioritized formats + formats = { + DF(DF.day + DF.month + DF.year): create_date(year=trd, month=snd, day=fst), # DMY + DF(DF.year + DF.month + DF.day): create_date(year=fst, month=snd, day=trd), # YMD + DF(DF.month + DF.day + DF.year): create_date(year=trd, month=fst, day=snd), # MDY + DF(DF.year + DF.day + DF.month): create_date(year=fst, month=trd, day=snd), # YDM + DF(DF.day + DF.year + DF.month): create_date(year=snd, month=trd, day=fst), # DYM + DF(DF.month + DF.year + DF.day): create_date(year=snd, month=fst, day=trd), # MYD + } + + for fmt in priorities['p3']: + if formats.get(fmt, None) is not None: + return formats[fmt], fmt + + raise DateParserError('Cannot guess the roles of a three-digit date format') + + raise DateParserError('Cannot parse the date format') + +def guess_time(tokens): + """Guess the time from a string in an unknown format. + + * Always sorted from hi (hour) to low (sec) + * 4 Terms -> hour, min, sec, ns + * 3 Terms -> hour, min, sec + * 2 Terms -> hour, min | min, sec + * both terms > 24 -> min, sec + * am or pm present -> hour, min + * Dot separation -> min, sec + * Colon separation -> hour, min + """ + # remove spearators + tokens = [tok.lower() for tok in tokens if tok not in '.,:'] + # check if the am/pm format was used + is_am = 'am' in tokens + is_pm = 'pm' in tokens + + # remove non-numbers + tokens = [tok for tok in tokens if tok.isdigit()] + if not len(tokens): + raise TimeParserError() + + # convert to int + ms = int(tokens[-1].ljust(6, '0')) + tokens = [int(tok) for tok in tokens] + + # guess format + try: + if len(tokens) == 4: # H:M:S.NS + tokens[-1] = ms + return dtime(*tokens), DF(DF.hour + DF.minute + DF.second + DF.microsecond) + elif len(tokens) == 3: # H:M:S + return dtime(*tokens), DF(DF.hour + DF.minute + DF.second) + elif len(tokens) == 2: # H:M or M:S + if is_am: # am/pm notation was used + return dtime(*tokens), DF(DF.hour + DF.minute) + elif is_pm: # am/pm notation was used + return dtime(tokens[0] + 12, tokens[1]), DF(DF.hour + DF.minute) + elif tokens[0] > 24: # min, sec + return dtime(0, *tokens), DF(DF.minute + DF.second) + else: # hour, sec + return dtime(*tokens), DF(DF.hour + DF.minute) + elif len(tokens) == 1: # H + if is_am: # am/pm notation was used + return dtime(tokens[0]), DF(DF.hour) + elif is_pm: # am/pm notation was used + return dtime(tokens[0] + 12), DF(DF.hour) + else: + return dtime(tokens[0], 0), DF(DF.hour) + + except ValueError: + # invalid value was supplied, e.g. hour=85 + raise TimeParserError('Invalid value', tokens) + + raise TimeParserError('Unknown time format', tokens) + +def guess_datetime(exp): + """Return a datetime instance by guessing the components of a DATETIME parsed + user-supplied date and/or time string. Guessing might be necessary since dates + like 10.11.12 are ambiguous. *exp* is supposed to be a pyparsing.ParseResults + instance as returned by DATETIME.parseString(...). + """ + # For now I assumed unique separators (dot for date, colon for time, comma to separate the two) + if 'date' in exp and 'time' in exp: # both parts present + date, dfmt = guess_date(exp.date) + time, tfmt = guess_time(exp.time) + return datetime.combine(date, time), DF(dfmt+tfmt) + elif 'date' in exp: # date present + date, dfmt = guess_date(exp.date) + return datetime(date.year, date.month, date.day), dfmt + elif 'time' in exp: # time present + time, tfmt = guess_time(exp.time) + return datetime.combine(ddate.fromtimestamp(0), time), tfmt + else: + raise DateFormatError('Neither a date nor a time was found.') + +def increment(date, fmt): + """Increment the LSB of a datetime instance by one.""" + if fmt == '' or not fmt.valid(): + raise DateFormatError('Invalid date format string', fmt) + elif fmt.lsb() == fmt.microsecond: # 5.11.2012, 06:24:18.25 -> 5.11.2012, 06:25:18.26 + return date + relativedelta(microseconds=1) + elif fmt.lsb() == fmt.second: # 5.11.2012, 06:24:18 -> 5.11.2012, 06:25:19 + return date + relativedelta(seconds=1, microsecond=0) + elif fmt.lsb() == fmt.minute: # 5.11.2012, 06:24 -> 5.11.2012, 06:25 + return date + relativedelta(minutes=1, second=0, microsecond=0) + elif fmt.lsb() == fmt.hour: # 5.11.2012, 06am -> 5.11.2012, 07:00 + return date + relativedelta(hours=1, minute=0, second=0, microsecond=0) + elif fmt.lsb() == fmt.day: # 5.11.2012 -> 6.11.2012, 00:00 + return date + relativedelta(days=1, hour=0, minute=0, second=0, microsecond=0) + elif fmt.lsb() == fmt.month: # 11.2012 -> 1.12.2012 + return date + relativedelta(months=1, day=1, hour=0, minute=0, second=0, microsecond=0) + else: # fmt.lsb() == fmt.year: # 2012 -> 1.1.2013, 00:00 + return date + relativedelta( + years=1, month=1, day=1, hour=0, minute=0, second=0, microsecond=0) + +class DateTimeParser(): + + DATE = None + TIME = None + DATETIME = None + + def build_parser(self): + """ + DATE := YMD | DMY | MDY | YDM + YMD := YEAR SEP MON SEP DAY + DMY := DAY SEP [of] MON SEP YEAR + MDY := MON SEP DAY SEP YEAR + YDM := YEAR SEP DAY [of] MON + DM := DAY SEP [of] MON + YM := YEAR SEP MON + MY := MON SEP YEAR + MD := MON SEP DAY + DAY := [D]D | [D]D st | [D]D nd | [D]D rd | [D]D th + MON := [M]M | [month] + YEAR := [YY]YY + SEP := . | , | [whitespace] + {D,M,Y} := [digit] + """ + # FIXME: Allow more patterns (e.g. 2012, 10; April, 5th; April, 2020) + sep = Literal('.') # FIXME: Allow '. - :' + year = Word(nums, exact=2) ^ Word(nums, exact=4) + month = Word(nums, min=1, max=2) ^ oneOf(list(MONTH_LIT.keys())) + day = Combine(Word(nums, min=1, max=2) + Optional(oneOf('st nd rd th'))) + # three-part-date + YMD = year + sep + month + sep + day + DMY = day + (sep ^ 'of') + month + sep + year + MDY = month + sep + day + sep + year + YDM = year + sep + day + (sep ^ 'of') + month + # two-part-date + DM = day + (sep ^ 'of')+ month + YM = year + sep + month + MY = month + sep + year + MD = month + sep + day + Y = Word(nums, exact=4) + # date parser + self.DATE = Group(YMD | DMY | YDM | MDY | DM | YM | MY | MD | Y).setResultsName('date') + + """ + TIME := HOUR SEP MIN [SEP SEC [. MS]] | HOUR SEP MIN | HOUR [SEP MIN] {am|pm} + HOUR := [H]H + MIN := [M]M + SEC := [S]S + {H,M,S} := [digit] + SEP := : | . | , + """ + sep = Literal(':') # FIXME: Allow '. : -' + HMS = Word(nums, min=1, max=2) + MS = Word(nums, min=1) + # time parser + self.TIME = Group(HMS + sep + HMS + sep + HMS + oneOf('. :') + MS \ + | HMS + sep + HMS + sep + HMS \ + | HMS + Optional(sep + HMS) + oneOf('am pm') \ + | HMS + sep + HMS ).setResultsName('time') + + """ + DATETIME := DATE | TIME | DATE SEP TIME | TIME SEP DATE + SEP := , [whitespace] + """ + self.DATETIME = Group( + self.DATE \ + ^ self.TIME \ + ^ self.DATE + Optional(',') + self.TIME \ + ^ self.TIME + Optional(',') + self.DATE \ + ).setResultsName('datetime') + return self + + def __call__(self, datestr): + if self.DATETIME is None: + self.build_parser() + + try: + date, fmt = guess_datetime(self.DATETIME.parseString(datestr, parseAll=True)[0]) + return date + + except ParseException as e: + raise errors.ParserError('Cannot parse query', e) + + +"""Default DateTimeParser instance. + +To produce an datetime, call + +>>> parse_datetime(datestring) + +Convenience shortcut for + +>>> DateTimeParser().parse(datestring) + +""" +parse_datetime = DateTimeParser().build_parser() + +## EOF ## diff --git a/tagit/parsing/search.py b/tagit/parsing/search.py new file mode 100644 index 0000000..10d0e7c --- /dev/null +++ b/tagit/parsing/search.py @@ -0,0 +1,405 @@ +"""User-specified search query parsing. + +>>> q = "has mime / tag in (november, october) / ! Apfel / time < 10.10.2004 / iso in (100, 200)" +>>> ast = ast_from_string(q) + +Part of the tagit module. +A copy of the license is provided with the project. +Author: Matthias Baumgartner, 2022 +""" +# standard imports +from datetime import datetime + +# external imports +from pyparsing import CaselessKeyword, Combine, Group, Optional, Or, Word, delimitedList, nums, oneOf, ParseException, Literal, QuotedString, alphanums, alphas8bit, punc8bit + +# tagit imports +from tagit.utils import errors, ttime + +# inner-module imports +from . import datefmt + +# exports +__all__ = ( + 'ast_from_string', + ) + +# constants +SEARCH_DELIM = '/' +VALUE_DELIM = ',' +DEFAULT_PREDICATE = 'tag' + + +## code ## + +class SearchParser(): + + # valid predicates per type + _PREDICATES_CATEGORICAL = None + _PREDICATES_CONTINUOUS = None + _PREDICATES_DATETIME = None + + # parsers + _CATEGORICAL = None + _CONTINUOUS = None + _EXISTENCE = None + _QUERY = None + _TAG = None + + def __init__(self, schema: bsfs.schema.Schema): + self.schema = schema + + @property + def schema(self) -> bsfs.schema.Schema: + return self._schema + + @schema.setter + def schema(self, schema: bsfs.schema.Schema): + self._schema = schema + self.build_parser() + + def build_parser(self): + """ + """ + # The *predicate* argument is for compatibility with predicate listener. + # It's not actually used here. + + # valid predicates per type, as supplied by tagit.library + # FIXME: + # * range / type constraints + # * how to filter predicates + # * distinguish between prefix and label + """ + Categorical: string, float, integer; labelled node (tag, group); maybe boolean + Continuous: float, integer + Datetime: datetime + Existencial: all of the above, particularly bool; unllabelled node (preview) + -> rfds:range + + > Target: Entity (allow others?) -> rfds:domain + > Require: searchable as specified in backend AND user-searchable as specified in frontend + """ + self._PREDICATES_CATEGORICAL = self.schema.predicates(searchable=True, range=self.schema.tm.categorical) # FIXME! + self._PREDICATES_CONTINUOUS = self.schema.predicates(searchable=True, range=self.schema.tm.numerical) # FIXME! + self._PREDICATES_DATETIME = self.schema.predicates(searchable=True, range=self.schema.tm.datetime) # FIXME! + + # terminal symbols + number = Group(Optional(oneOf('- +')) \ + + Combine(Word(nums) + Optional('.' + Optional(Word(nums))))) + words = QuotedString(quoteChar='"', escChar='\\') \ + ^ QuotedString(quoteChar="'", escChar='\\') \ + ^ Word(alphanums + alphas8bit + punc8bit + ' *#%&-.:;<=>?@^_`{}~') + # FIXME: allow escaped chars "( ) / , [ ]" + # FIXME: Non-ascii characters + + # predicates + predicate = Or([CaselessKeyword(p) for p in self._PREDICATES_CATEGORICAL]).setResultsName( + 'predicate') + date_predicate = Or([CaselessKeyword(p) for p in self._PREDICATES_DATETIME]).setResultsName( + 'predicate') + num_predicate = Or([CaselessKeyword(p) for p in self._PREDICATES_CONTINUOUS]).setResultsName( + 'predicate') + + # existence + """ + EXPR := has PREDICATE | has no PREDICATE + PREDICATE := [predicate] + """ + op = (CaselessKeyword('has') ^ CaselessKeyword('has no') ^ CaselessKeyword('has not')).setResultsName('op') + self._EXISTENCE = Group(op + predicate).setResultsName('existence') + + + # continuous + """ + EXPR := PREDICATE CMP VALUE | VALUE CMP PREDICATE CMP VALUE | PREDICATE OP RANGE + PREDICATE := [predicate] + CMP := < | <= | = | >= | > + OP := : | = | in | not in + RANGE := BOPEN VALUE RSEP VALUE BCLOSE | BOPEN RSEP VALUE BLOSE | BOPEN VALUE RSEP BCLOSE + BOPEN := ( | [ | ] + BCLOSE := ) | ] | [ + RSEP := : | - + VALUE := [digit] | [date] + """ + # range style + rsepn = oneOf(': -') + rsepd = Literal('-') + bclose = oneOf(') ] [').setResultsName('bclose') + bopen = oneOf('( [ ]').setResultsName('bopen') + op = Or([':', '=', 'in']).setResultsName('op') + datefmt = datefmt.parse_datetime.DATETIME + rngn = num_predicate + op + bopen + number('lo') + rsepn + number('hi') + bclose ^ \ + num_predicate + op + bopen + rsepn + number('hi') + bclose ^ \ + num_predicate + op + bopen + number('lo') + rsepn + bclose + rngd = date_predicate + op + bopen + datefmt('lo') + rsepd + datefmt('hi') + bclose ^ \ + date_predicate + op + bopen + rsepd + datefmt('hi') + bclose ^ \ + date_predicate + op + bopen + datefmt('lo') + rsepd + bclose + # equation style + cmp = oneOf('< <= = >= >').setResultsName('cmp') + eqn = num_predicate + cmp('cright') + number('vright') ^ \ + number('vleft') + cmp('cleft') + num_predicate ^ \ + number('vleft') + cmp('cleft') + num_predicate + cmp('cright') + number('vright') + eqd = date_predicate + cmp('cright') + datefmt('vright') ^ \ + datefmt('vleft') + cmp('cleft') + date_predicate ^ \ + datefmt('vleft') + cmp('cleft') + date_predicate + cmp('cright') + datefmt('vright') + # combined + self._CONTINUOUS = Group( + Group(eqn).setResultsName('eq') ^ + Group(eqd).setResultsName('eq') ^ + Group(rngn).setResultsName('range') ^ \ + Group(rngd).setResultsName('range') \ + ).setResultsName('continuous') + + + # categorical + """ + EXPR := PREDICATE OP VALUE | PREDICATE OP (VALUE) + PREDICATE := [predicate] + OP := : | = | in | not in | != | ~ | !~ + VALUE := TERM | VALUE, + TERM := [word] + """ + op = (CaselessKeyword('in') ^ CaselessKeyword('not in') ^ ':' ^ '=' ^ '!=' ^ '~' ^ '!~').setResultsName('op') + value = delimitedList(words, delim=VALUE_DELIM).setResultsName('value') + self._CATEGORICAL = Group(predicate + op + ('(' + value + ')' | value) ).setResultsName('categorical') + + + # tag shortcuts + """ + EXPR := OP VALUE | OP (VALUE) | VALUE | (VALUE) + OP := ! | ~ | !~ + VALUE := TERM | VALUE, + TERM := [word] + """ + op = oneOf('! ~ !~').setResultsName('op') + value = delimitedList(words, delim=VALUE_DELIM).setResultsName('value') + self._TAG = Group(Optional(op) + '(' + value + ')' ^ Optional(op) + value).setResultsName('tag') + + + # overall query + """ + QUERY := QUERY / QUERY | EXPR + """ + self._QUERY = delimitedList(self._EXISTENCE | self._CONTINUOUS | self._CATEGORICAL | self._TAG, delim=SEARCH_DELIM) + return self + + def __del__(self): + if self._QUERY is not None: # remove listener + try: + self.predicates.ignore(self.build_parser) + except ImportError: + # The import fails if python is shutting down. + # In that case, the ignore becomes unnecessary anyway. + pass + + def __call__(self, search): + # FIXME: mb/port/parsing + #if self._QUERY is None: + # # parsers were not initialized yet + # self.build_parser() + # # attach listener to receive future updates + # self.predicates.listen(self.build_parser) + # # FIXME: Additional filters would be handy + # #self.predicates.listen(self.build_parser, self.predicates.scope.library) + + try: + parsed = self._QUERY.parseString(search, parseAll=True) + except ParseException as e: + raise errors.ParserError('Cannot parse query', e) + + # convert to AST + tokens = [] + for exp in parsed: + if exp.getName() == 'existence': + if 'op' not in exp: # prevented by grammar + raise errors.ParserError('Missing operator', exp) + elif exp.op == 'has': + cond = ast.Existence() + elif exp.op in ('has no', 'has not'): + cond = ast.Inexistence() + else: # prevented by grammar + raise errors.ParserError('Invalid operator ({})'.format(exp.op), exp) + + tokens.append( + ast.Token(exp.predicate.lower(), cond)) + + elif exp.getName() == 'categorical': + values = [s.strip() for s in exp.value] + if 'op' not in exp: # prevented by grammar + raise errors.ParserError('Missing operator', exp) + elif exp.op in (':', '=', 'in'): + cond = ast.SetInclude(values) + elif exp.op in ('!=', 'not in'): + cond = ast.SetExclude(values) + elif exp.op == '~': + cond = ast.SetInclude(values, approximate=True) + elif exp.op == '!~': + cond = ast.SetExclude(values, approximate=True) + else: # prevented by grammar + raise errors.ParserError('Invalid operator ({})'.format(exp.op), exp) + + tokens.append( + ast.Token(exp.predicate.lower(), cond)) + + elif exp.getName() == 'tag': + values = [s.strip() for s in exp.value] + if 'op' not in exp: + cond = ast.SetInclude(values) + elif exp.op == '~': + cond = ast.SetInclude(values, approximate=True) + elif exp.op == '!': + cond = ast.SetExclude(values) + elif exp.op == '!~': + cond = ast.SetExclude(values, approximate=True) + else: # prevented by grammar + raise errors.ParserError('Invalid operator ({})'.format(exp.op), exp) + + tokens.append( + ast.Token(DEFAULT_PREDICATE, cond)) + + elif exp.getName() == 'continuous': + + lo, hi = None, None + lo_inc, hi_inc = False, False + predicate = None + + if 'eq' in exp: + # equation style + predicate = exp.eq.predicate.lower() + + if ('>' in exp.eq.cleft and '<' in exp.eq.cright) or \ + ('<' in exp.eq.cleft and '>' in exp.eq.cright) or \ + (exp.eq.cleft == '=' and exp.eq.cright == '='): + # x > pred < y or x < pred > y or x = pred = y + raise errors.ParserError('Cannot have two lower or two upper bounds', exp) + + if '>' in exp.eq.cleft: + hi = exp.eq.vleft + hi_inc = '=' in exp.eq.cleft + elif '<' in exp.eq.cleft: + lo = exp.eq.vleft + lo_inc = '=' in exp.eq.cleft + elif exp.eq.cleft == '=': + hi = lo = exp.eq.vleft + lo_inc = hi_inc = True + + if '>' in exp.eq.cright: + lo = exp.eq.vright + lo_inc = '=' in exp.eq.cright + elif '<' in exp.eq.cright: + hi = exp.eq.vright + hi_inc = '=' in exp.eq.cright + elif exp.eq.cright == '=': + hi = lo = exp.eq.vright + lo_inc = hi_inc = True + + elif 'range' in exp: # value in [lo:hi] + predicate = exp.range.predicate.lower() + + if 'lo' in exp.range: + lo = exp.range.lo + lo_inc = exp.range.bopen == '[' + if 'hi' in exp.range: + hi = exp.range.hi + hi_inc = exp.range.bclose == ']' + + else: # prevented by grammar + raise errors.ParserError('Expression is neither a range nor an equation', exp) + + # interpret values + if predicate in set([p.lower() for p in self._PREDICATES_DATETIME]): + + # turn into datetime + lo, lfmt = datefmt.guess_datetime(lo) if lo is not None else (None, None) + hi, hfmt = datefmt.guess_datetime(hi) if hi is not None else (None, None) + + if lo is None and hi is None: # prevented by grammar + raise errors.ParserError('At least one bound must be present', exp) + + # turn the query into the format lo <= pred < hi by adjusting the boundaries + if hi == lo and lo_inc and hi_inc: + # example: pred = 2012 -> 1.1.2012 <= pred < 1.1.2013 + hi = datefmt.increment(lo, lfmt) + lo_inc = True + hi_inc = False + else: + if lo is not None: + # example: pred >= 2012 -> pred >= 1.1.2012, 00:00 + lo = datefmt.increment(lo, lfmt) if not lo_inc else lo + lo_inc = True + + if hi is not None: + # example: pred <= 2012 -> pred < 1.1.2013, 00:00 + hi = datefmt.increment(hi, hfmt) if hi_inc else hi + hi_inc = False + + # build the ast node + if (lo is not None and lfmt.is_time()) or (hi is not None and hfmt.is_time()): + # time specification + + if (lo is not None and not lfmt.is_time()) or \ + (hi is not None and not hfmt.is_time()): + # lo/hi must both be time specifications + raise errors.ParserError('Both bounds must be a time specification', (lo, hi)) + + if lo is None: + # example: pred < 5 am -> 0 <= pred < 05:00 + lo = ttime.from_timestamp_loc(0) + lo_inc = True + + if hi is None: + # example: pred > 5 am -> 06:00 <= pred <= 24:00 + hi = ttime.from_timestamp_loc(3600 * 24) + hi_inc = True + + # Check consistency + if not (lo < hi or (lo == hi and lo_inc and hi_inc)): + raise errors.ParserError('Lower bound must not exceed upper bound', (lo, hi)) + + tokens.append( + ast.Token(predicate, ast.TimeRange(lo, hi, lo_inc, hi_inc))) + + else: # date specification + # Check consistency + lo = lo if lo is not None else datetime.min + hi = hi if hi is not None else datetime.max + + if not (lo < hi or (lo == hi and lo_inc and hi_inc)): + raise errors.ParserError('Lower bound must not exceed upper bound', (lo, hi)) + + tokens.append( + ast.Token(predicate, ast.Datetime(lo, hi, lo_inc, hi_inc))) + + else: + # number predicate + lo = float(''.join(lo)) if lo is not None else float('-inf') + hi = float(''.join(hi)) if hi is not None else float('inf') + + # Check consistency + if not (lo < hi or (lo == hi and lo_inc and hi_inc)): + raise errors.ParserError('Lower bound must not exceed upper bound', (lo, hi)) + + tokens.append( + ast.Token(predicate, ast.Continuous(lo, hi, lo_inc, hi_inc))) + + else: # prevented by grammar + raise errors.ParserError('Invalid expression', exp) + + return ast.AND(tokens) + + + +"""Default SearchParser instance. + +To produce an ast, call + +>>> ast_from_string(search) + +Convenience shortcut for + +>>> SearchParser().parse(search) + +""" +ast_from_string = SearchParser(predicates) + +## EOF ## diff --git a/tagit/parsing/sort.py b/tagit/parsing/sort.py new file mode 100644 index 0000000..8950613 --- /dev/null +++ b/tagit/parsing/sort.py @@ -0,0 +1,192 @@ +""" + +Part of the tagit module. +A copy of the license is provided with the project. +Author: Matthias Baumgartner, 2022 +""" +# external imports +from pyparsing import CaselessKeyword, Group, Or, Word, delimitedList, oneOf, ParseException + +# tagit imports +from tagit.utils import errors, Struct + +# exports +__all__ = ( + 'sort_from_string', + ) + + +## code ## + +class SortParser(): + """Sort parser. + + A sort string can be as simple as a predicate, but also allows + a more verbose specification for more natural readability. + In brief and somewhat relaxed notation, the syntax is: + [sort [<type>] by] <predicate> [similarity to] [<anchor>] [<direction>] + Multiple sort terms are concatenated with a comma. + + Examples: + time + time asc + sort by time desc + sort numerically by time downwards + sort by tag similarity to AF39D281CE3 up + time, iso + """ + QUERY = None + PREDICATES = None + + def __init__(self, sortkeys): + self.sortkeys = sortkeys + + def __call__(self, query): + return self.parse(query) + + def build_parser(self, predicate=None): + # The *predicate* argument is for compatibility with predicate listener. + # It's not actually used here. + """ + The grammar is composed as follows: + + QUERY := EXPR | EXPR, EXPR + EXPR := PREFIX PREDICATE SUFFIX | PREDICATE SUFFIX | PREFIX PREDICATE | PREDICATE + PREFIX := sort TYPE by | sort by + SUFFIX := SIMILAR DIRECTION | SIMILAR | DIRECTION + SIMILAR := similarity to ANCHOR | ANCHOR + TYPE := numerically | alphabetically + PREDICATE := [predicate] + ANCHOR := [guid] + DIRECTION := up | down | asc | desc | ascending | descending | reversed | upwards | downwards + """ + # predicates from sortkeys + self.PREDICATES = self.sortkeys.scope.library | self.sortkeys.typedef.anchored + + ## terminals + # direction is just a list of keywords + direction = oneOf('up down asc desc ascending descending reversed upwards downwards', + caseless=True).setResultsName('direction') + # type is just a list of keywords + type_ = oneOf('numerically alphabetically').setResultsName('type') + # predicates are from an enum + predicate = Or([CaselessKeyword(p) for p in self.PREDICATES]).setResultsName('predicate') + # anchor is a hex digest + anchor = Word('abcdef0123456789ABCDEF').setResultsName('anchor') + + ## rules + similar = Or([CaselessKeyword('similarity to') + anchor, + anchor]) + suffix = Or([similar + direction, similar, + direction]) + prefix = Or([CaselessKeyword('sort') + type_ + CaselessKeyword('by'), + CaselessKeyword('sort by')]) + expr = Group(Or([prefix + predicate + suffix, + predicate + suffix, + prefix + predicate, + predicate])) + + self.QUERY = delimitedList(expr, delim=',') + return self + + def __del__(self): + if self.QUERY is not None: # remove listener + try: + self.sortkeys.ignore(self.build_parser) + except ImportError: + # The import fails if python is shutting down. + # In that case, the ignore becomes unnecessary anyway. + pass + + def parse(self, sort): + if self.QUERY is None: + # initialize parser + self.build_parser() + # attach listener to receive future updates + self.sortkeys.listen(self.build_parser) + + try: + parsed = self.QUERY.parseString(sort, parseAll=True) + except ParseException as e: + raise errors.ParserError('Cannot parse query', e) + + # convert to AST + tokens = [] + for exp in parsed: + args = Struct( + predicate=None, + type=None, + anchor=None, + direction='asc', + ) + args.update(**exp.asDict()) + + # check predicate + if args.predicate is None: # prevented by grammar + raise errors.ParserError('Missing sort key', exp) + if args.predicate not in self.sortkeys: # prevented by grammar + raise errors.ParserError('Invalid sort key', exp) + + # check direction + if args.direction in ('up', 'ascending', 'asc', 'upwards'): + reverse = False + elif args.direction in ('down', 'desc', 'descending', 'reversed', 'downwards'): + reverse = True + else: # prevented by grammar + raise errors.ParserError('Invalid direction', exp) + + # infer type from predicate if needed + if args.anchor is not None: + args.type = 'anchored' + elif args.type is None: + typedef = self.sortkeys.predicate(args.predicate).typedef + if not len(typedef): + raise errors.ParserError('Undefined type', exp) + elif len(typedef) == 1: + args.type = list(typedef)[0].lower() + else: + raise errors.ParserError('Ambiguous type', exp) + + # translate types + args.type = { + 'numerically': 'numerical', + 'alphabetically': 'alphabetical' + }.get(args.type, args.type) + + # check type compatibility + admissible_types = {t.lower() for t in self.sortkeys.predicate(args.predicate).typedef} + if args.type not in admissible_types: + raise errors.ParserError('Invalid type for predicate', exp) + elif args.type == 'anchored' and args.anchor is None: # type set if anchor isn't None + raise errors.ParserError('No anchor given', exp) + + # build AST + if args.type in ('anchored', ): + tokens.append(ast.AnchoredSort(args.predicate, args.anchor, reverse)) + elif args.type in ('alphabetical', 'alphabetically'): + tokens.append(ast.AlphabeticalSort(args.predicate, reverse)) + elif args.type in ('numerical', 'numerically'): + tokens.append(ast.NumericalSort(args.predicate, reverse)) + else: # prevented by grammar + raise errors.ParserError('Invalid type for predicate', exp) + + # aggregate if need be + if len(tokens) == 1: + return tokens[0] + else: + return ast.Order(*tokens) + +"""Default SortParser instance. + +To produce an ast, call + +>>> sort_from_string(sort) + +Convenience shortcut for + +>>> SortParser().parse(sort) + +""" +sort_from_string = SortParser(sortkeys) + +## EOF ## diff --git a/tagit/utils/errors.py b/tagit/utils/errors.py index 1bed670..7a2556e 100644 --- a/tagit/utils/errors.py +++ b/tagit/utils/errors.py @@ -2,17 +2,17 @@ Part of the tagit module. A copy of the license is provided with the project. -Author: Matthias Baumgartner, 2018 +Author: Matthias Baumgartner, 2022 """ # exports __all__ = ( - 'EmptyFileError', - 'LoaderError', - 'NotAFileError', - 'ProgrammingError', - 'UserError', - 'abstract', - ) + 'EmptyFileError', + 'LoaderError', + 'NotAFileError', + 'ProgrammingError', + 'UserError', + 'abstract', + ) ## code ## @@ -49,4 +49,8 @@ class ParserBackendError(Exception): """Generic parser backend error.""" pass +class ParserError(Exception): + """String parsing failure.""" + pass + ## EOF ## diff --git a/test/parsing/__init__.py b/test/parsing/__init__.py new file mode 100644 index 0000000..e69de29 --- /dev/null +++ b/test/parsing/__init__.py diff --git a/test/parsing/test_datefmt.py b/test/parsing/test_datefmt.py new file mode 100644 index 0000000..3f80c15 --- /dev/null +++ b/test/parsing/test_datefmt.py @@ -0,0 +1,378 @@ +"""Test datetime parser. + +Part of the tagit test suite. +A copy of the license is provided with the project. +Author: Matthias Baumgartner, 2022 +""" +# standard imports +import unittest +from datetime import date as ddate +from datetime import time as dtime +from datetime import datetime + +# external imports +from pyparsing import ParseException + +# tagit imports +from tagit.utils import errors, Struct + +# objects to test +#from tagit.parsing.datefmt import DatefmtError, DateParserError, TimeParserError, DateFormatError, guess_date, guess_time, guess_datetime, increment, PRIORITIES_US, DF +from tagit.parsing.datefmt import guess_date, guess_time, PRIORITIES_US, DateParserError, DateFormatError, TimeParserError, guess_datetime, DF, parse_datetime, increment, DateTimeParser + + +## code ## + +class TestGuessDatetime(unittest.TestCase): + def test_parse_datetime(self): + parse_datetime = DateTimeParser() + cyear = ddate.today().year + cmon = ddate.today().month + cday = ddate.today().day + + # date only: vary number formats + self.assertEqual(parse_datetime('3.4.12'), datetime(2012, 4, 3)) + self.assertEqual(parse_datetime('15.8.19'), datetime(2019, 8, 15)) + self.assertEqual(parse_datetime('8.11.98'), datetime(1998, 11, 8)) + self.assertEqual(parse_datetime('10.11.12'), datetime(2012, 11, 10)) + self.assertEqual(parse_datetime('3.4.1912'), datetime(1912, 4, 3)) + self.assertEqual(parse_datetime('15.8.1919'), datetime(1919, 8, 15)) + self.assertEqual(parse_datetime('8.11.1998'), datetime(1998, 11, 8)) + self.assertEqual(parse_datetime('10.11.1912'), datetime(1912, 11, 10)) + self.assertEqual(parse_datetime('8.1998'), datetime(1998, 8, 1)) + self.assertEqual(parse_datetime('10.1912'), datetime(1912, 10, 1)) + self.assertRaises(errors.ParserError, parse_datetime, 'ab.cd.ef') + self.assertRaises(errors.ParserError, parse_datetime, '123.123.2000') + self.assertRaises(errors.ParserError, parse_datetime, '.123.2000') + self.assertRaises(errors.ParserError, parse_datetime, '123..2000') + self.assertRaises(errors.ParserError, parse_datetime, '12.12.20001') + # date only: vary order (three-part) + self.assertEqual(parse_datetime('15.98.8'), datetime(1998, 8, 15)) + self.assertEqual(parse_datetime('8.98.15'), datetime(1998, 8, 15)) + self.assertEqual(parse_datetime('8.15.98'), datetime(1998, 8, 15)) + self.assertEqual(parse_datetime('15.8.98'), datetime(1998, 8, 15)) + self.assertEqual(parse_datetime('98.8.15'), datetime(1998, 8, 15)) + self.assertEqual(parse_datetime('98.15.8'), datetime(1998, 8, 15)) + # date only: vary order (two-part) + self.assertEqual(parse_datetime('15.98'), datetime(1998, 1, 15)) + self.assertEqual(parse_datetime('08.98'), datetime(1998, 8, 1)) + self.assertEqual(parse_datetime('08.15'), datetime(2015, 8, 1)) + self.assertEqual(parse_datetime('15.08'), datetime(cyear, 8, 15)) + self.assertEqual(parse_datetime('98.08'), datetime(1998, 8, 1)) + self.assertEqual(parse_datetime('98.15'), datetime(1998, 1, 15)) + # date only: one part + self.assertEqual(parse_datetime('1998'), datetime(1998, 1, 1)) + # date only: literal month + self.assertEqual(parse_datetime('98.April'), datetime(1998, 4, 1)) + # FIXME: Allow more patterns + #self.assertEqual(parse_datetime('98, April'), datetime(1998, 4, 1)) + #self.assertEqual(parse_datetime('April, 15'), datetime(2015, 4, 1)) + # date only: day with suffix + #self.assertEqual(parse_datetime('10th 2000'), datetime(2000, 1, 10)) + #self.assertEqual(parse_datetime('2000, 10th'), datetime(2000, 1, 10)) + #self.assertEqual(parse_datetime('April, 10th'), datetime(cyear, 4, 10)) + #self.assertEqual(parse_datetime('April 10th'), datetime(cyear, 4, 10)) + #self.assertEqual(parse_datetime('10th April'), datetime(cyear, 4, 10)) + # date only: of notation + #self.assertEqual(parse_datetime('10th of April, 2000'), datetime(2000, 4, 10)) + #self.assertEqual(parse_datetime('10th of April'), datetime(cyear, 4, 10)) + #self.assertEqual(parse_datetime('10 of 04'), datetime(cyear, 4, 10)) + # invalid ranges + self.assertRaises(DateParserError, parse_datetime, '10.93.2013') + self.assertRaises(DateParserError, parse_datetime, '48.10.2013') + self.assertRaises(DateParserError, parse_datetime, '48.93.2013') + self.assertRaises(DateParserError, parse_datetime, "52.74") + + # time only: am/pm + self.assertEqual(parse_datetime("10 am"), datetime(1970, 1, 1, 10)) + self.assertEqual(parse_datetime("10 pm"), datetime(1970, 1, 1, 22)) + self.assertEqual(parse_datetime("10:02 pm"), datetime(1970, 1, 1, 22, 2)) + self.assertEqual(parse_datetime("14 am"), datetime(1970, 1, 1, 14)) + self.assertRaises(TimeParserError, parse_datetime, "14 pm") + # time only: 24hrs format + self.assertEqual(parse_datetime("1:2"), datetime(1970, 1, 1, 1, 2)) + self.assertEqual(parse_datetime("12:34"), datetime(1970, 1, 1, 12, 34)) + self.assertEqual(parse_datetime("12:34:54"), datetime(1970, 1, 1, 12, 34, 54)) + self.assertEqual(parse_datetime("12:34:54.123"), datetime(1970, 1, 1, 12, 34, 54, 123000)) + self.assertEqual(parse_datetime("1:2:3.4"), datetime(1970, 1, 1, 1, 2, 3, 400000)) + self.assertRaises(errors.ParserError, parse_datetime, '84:12') + self.assertRaises(errors.ParserError, parse_datetime, '12:75') + self.assertRaises(errors.ParserError, parse_datetime, '12:13:84') + # time only: HH:MM + self.assertEqual(parse_datetime("54:34"), datetime(1970, 1, 1, 0, 54, 34)) + # time only: invalid format + self.assertRaises(errors.ParserError, parse_datetime, '12:') + + # date and time + self.assertEqual(parse_datetime("12:34 18.05.2012"), datetime(2012, 5, 18, 12, 34)) + self.assertEqual(parse_datetime("12:34, 18.05.2012"), datetime(2012, 5, 18, 12, 34)) + self.assertEqual(parse_datetime("18.05.2012 12:34"), datetime(2012, 5, 18, 12, 34)) + self.assertEqual(parse_datetime("18.05.2012, 12:34"), datetime(2012, 5, 18, 12, 34)) + self.assertEqual(parse_datetime("2012, 12:34"), datetime(2012, 1, 1, 12, 34)) + self.assertEqual(parse_datetime("2012, 12am"), datetime(2012, 1, 1, 12)) + self.assertRaises(errors.ParserError, parse_datetime, '12.34 18:05:2012') + + # invalid args + self.assertRaises(errors.ParserError, parse_datetime, '') + + def test_guess_date(self): + this_year = ddate.today().year + # some unambiguous formats + self.assertEqual(guess_date('18 . 05 . 2012'.split()), (ddate(2012, 5, 18), 'DMY')) + self.assertEqual(guess_date('18 5 2012'.split()), (ddate(2012, 5, 18), 'DMY')) + self.assertEqual(guess_date('2012 , 05 , 18'.split()), (ddate(2012, 5, 18), 'YMD')) + self.assertEqual(guess_date('2012 5 18'.split()), (ddate(2012, 5, 18), 'YMD')) + self.assertEqual(guess_date('18 5 2004'.split()), (ddate(2004, 5, 18), 'DMY')) + self.assertEqual(guess_date('2004 5 18'.split()), (ddate(2004, 5, 18), 'YMD')) + self.assertEqual(guess_date('10 11 12'.split()), (ddate(2012, 11, 10), 'DMY')) + self.assertEqual(guess_date('10 11 12'.split(), priorities=PRIORITIES_US), + (ddate(2012, 10, 11), 'MDY')) + self.assertEqual(guess_date('2012 04 05'.split()), (ddate(2012, 4, 5), 'YMD')) + self.assertEqual(guess_date('2012 4 5'.split()), (ddate(2012, 4, 5), 'YMD')) + self.assertEqual(guess_date('2012 May , 4th'.split()), (ddate(2012, 5, 4), 'YMD')) + self.assertEqual(guess_date('4 5 2012'.split()), (ddate(2012, 5, 4), 'DMY')) + self.assertEqual(guess_date('4th of May 2012'.split()), (ddate(2012, 5, 4), 'DMY')) + self.assertEqual(guess_date('2012 4th of May'.split()), (ddate(2012, 5, 4), 'YDM')) + + # three-part format + # unambiguous MD ranges, full year + self.assertEqual(guess_date('28 11 2018'.split()), (ddate(2018, 11, 28), 'DMY')) + self.assertEqual(guess_date('28 2018 11'.split()), (ddate(2018, 11, 28), 'DYM')) + self.assertEqual(guess_date('11 28 2018'.split()), (ddate(2018, 11, 28), 'MDY')) + self.assertEqual(guess_date('11 2018 28'.split()), (ddate(2018, 11, 28), 'MYD')) + self.assertEqual(guess_date('2018 11 28'.split()), (ddate(2018, 11, 28), 'YMD')) + self.assertEqual(guess_date('2018 28 11'.split()), (ddate(2018, 11, 28), 'YDM')) + # unambiguous MDY ranges + self.assertEqual(guess_date('28 11 98'.split()), (ddate(1998, 11, 28), 'DMY')) + self.assertEqual(guess_date('28 98 11'.split()), (ddate(1998, 11, 28), 'DYM')) + self.assertEqual(guess_date('11 28 98'.split()), (ddate(1998, 11, 28), 'MDY')) + self.assertEqual(guess_date('11 98 28'.split()), (ddate(1998, 11, 28), 'MYD')) + self.assertEqual(guess_date('98 11 28'.split()), (ddate(1998, 11, 28), 'YMD')) + self.assertEqual(guess_date('98 28 11'.split()), (ddate(1998, 11, 28), 'YDM')) + # explicit YMD + self.assertEqual(guess_date('10th of April 2018'.split()), (ddate(2018, 4, 10), 'DMY')) + self.assertEqual(guess_date('April 10th 98'.split()), (ddate(1998, 4, 10), 'MDY')) + self.assertEqual(guess_date('98 April 10th'.split()), (ddate(1998, 4, 10), 'YMD')) + self.assertEqual(guess_date('2018 10th of April'.split()), (ddate(2018, 4, 10), 'YDM')) + self.assertEqual(guess_date('2018 10 of 04'.split()), (ddate(2018, 4, 10), 'YDM')) + # explicit MY + self.assertEqual(guess_date('2018 10 April'.split()), (ddate(2018, 4, 10), 'YDM')) + self.assertEqual(guess_date('2018 April 10'.split()), (ddate(2018, 4, 10), 'YMD')) + self.assertEqual(guess_date('April 10 2018'.split()), (ddate(2018, 4, 10), 'MDY')) + # explicit DY + self.assertEqual(guess_date('10th 04 98'.split()), (ddate(1998, 4, 10), 'DMY')) + self.assertEqual(guess_date('2018 10th 04'.split()), (ddate(2018, 4, 10), 'YDM')) + self.assertEqual(guess_date('2018 04 10th'.split()), (ddate(2018, 4, 10), 'YMD')) + # explicit DM + self.assertEqual(guess_date('10th April 10'.split()), (ddate(2010, 4, 10), 'DMY')) + self.assertEqual(guess_date('10 10th April'.split()), (ddate(2010, 4, 10), 'YDM')) + self.assertEqual(guess_date('10 April 10th'.split()), (ddate(2010, 4, 10), 'YMD')) + # ambiguous formats: explicit Y + self.assertEqual(guess_date('2018 04 08'.split()), (ddate(2018, 4, 8), 'YMD')) + self.assertEqual(guess_date('04 2018 08'.split()), (ddate(2018, 8, 4), 'DYM')) + self.assertEqual(guess_date('08 04 2018'.split()), (ddate(2018, 4, 8), 'DMY')) + self.assertEqual(guess_date('4 8 11'.split()), (ddate(2011, 8, 4), 'DMY')) + # ambiguous formats: explicit D + self.assertEqual(guess_date('10 4th 11'.split()), (ddate(2011, 10, 4), 'MDY')) + self.assertEqual(guess_date('11 10 4th'.split()), (ddate(2011, 10, 4), 'YMD')) + self.assertEqual(guess_date('4th 11 10'.split()), (ddate(2010, 11, 4), 'DMY')) + self.assertEqual(guess_date('4th 10 11'.split()), (ddate(2011, 10, 4), 'DMY')) + # ambiguous formats: explicit M + self.assertEqual(guess_date('April 21 08'.split()), (ddate(2008, 4, 21), 'MDY')) + self.assertEqual(guess_date('08 April 21'.split()), (ddate(2021, 4, 8), 'DMY')) + self.assertEqual(guess_date('21 08 April'.split()), (ddate(2021, 4, 8), 'YDM')) + # fully ambiguous + self.assertEqual(guess_date('04 08 10'.split()), (ddate(2010, 8, 4), 'DMY')) + # errors + self.assertRaises(DateParserError, guess_date, '2012 98 10'.split()) + self.assertRaises(DateParserError, guess_date, 'April 98 April'.split()) + self.assertRaises(DateParserError, guess_date, '10th 98 29'.split()) + + # two-part format + # unambiguous DY ranges + self.assertEqual(guess_date('28 98'.split()), (ddate(1998, 1, 28), 'DY')) + self.assertEqual(guess_date('98 28'.split()), (ddate(1998, 1, 28), 'YD')) + self.assertEqual(guess_date('2010 28'.split()), (ddate(2010, 1, 28), 'YD')) + self.assertEqual(guess_date('28 2010'.split()), (ddate(2010, 1, 28), 'DY')) + # explicit DY + self.assertEqual(guess_date('28th 2010'.split()), (ddate(2010, 1, 28), 'DY')) + self.assertEqual(guess_date('2010 28th'.split()), (ddate(2010, 1, 28), 'YD')) + # explicit MY + self.assertEqual(guess_date('2010 April'.split()), (ddate(2010, 4, 1), 'YM')) + self.assertEqual(guess_date('April 2010'.split()), (ddate(2010, 4, 1), 'MY')) + # explicit DM + self.assertEqual(guess_date('April 10th'.split()), (ddate(this_year, 4, 10), 'MD')) + self.assertEqual(guess_date('10th April'.split()), (ddate(this_year, 4, 10), 'DM')) + self.assertEqual(guess_date('10th of April'.split()), (ddate(this_year, 4, 10), 'DM')) + self.assertEqual(guess_date('10 of 04'.split()), (ddate(this_year, 4, 10), 'DM')) + self.assertEqual(guess_date('10th 4'.split()), (ddate(this_year, 4, 10), 'DM')) + # explicit Y + self.assertEqual(guess_date('2010 04'.split()), (ddate(2010, 4, 1), 'YM')) + self.assertEqual(guess_date('04 2010'.split()), (ddate(2010, 4, 1), 'MY')) + self.assertEqual(guess_date('04 98'.split()), (ddate(1998, 4, 1), 'MY')) + self.assertEqual(guess_date('98 04'.split()), (ddate(1998, 4, 1), 'YM')) + # explicit M + self.assertEqual(guess_date('April 10'.split()), (ddate(2010, 4, 1), 'MY')) + self.assertEqual(guess_date('10 April'.split()), (ddate(this_year, 4, 10), 'DM')) + # explicit D + self.assertEqual(guess_date('10th 08'.split()), (ddate(this_year, 8, 10), 'DM')) + self.assertEqual(guess_date('08 10th'.split()), (ddate(this_year, 8, 10), 'MD')) + # some hints + self.assertEqual(guess_date('18 5'.split()), (ddate(this_year, 5, 18), 'DM')) + self.assertEqual(guess_date('4 8'.split()), (ddate(this_year, 8, 4), 'DM')) + # fully ambiguous + self.assertEqual(guess_date('08 10'.split()), (ddate(this_year, 10, 8), 'DM')) + + # one-part format + # full year + self.assertEqual(guess_date('2018'.split()), (ddate(2018, 1, 1), 'Y')) + # short year + self.assertEqual(guess_date('18'.split()), (ddate(2018, 1, 1), 'Y')) + self.assertEqual(guess_date('98'.split()), (ddate(1998, 1, 1), 'Y')) + self.assertEqual(guess_date('08'.split()), (ddate(2008, 1, 1), 'Y')) + # non-year token + self.assertRaises(DateParserError, guess_date, ('1', )) + self.assertRaises(DateParserError, guess_date, ('April', )) + self.assertRaises(DateParserError, guess_date, ('10th', )) + + # other errors + self.assertRaises(DateParserError, guess_date, '') + self.assertRaises(DateParserError, guess_date, 'fuuu'.split()) + self.assertRaises(DateParserError, guess_date, '1 fuuu'.split()) + self.assertRaises(DateParserError, guess_date, '1 fuuu bar 2'.split()) + self.assertRaises(DateParserError, guess_date, '1 2 3 4'.split()) + + def test_guess_time(self): + # single token + self.assertEqual(guess_time(['9']), (dtime(hour=9), 'h')) + # am/pm notation + self.assertEqual(guess_time("9 am".split()), (dtime(hour=9), 'h')) + self.assertEqual(guess_time("10 am".split()), (dtime(hour=10), 'h')) + self.assertEqual(guess_time("09 am".split()), (dtime(hour=9), 'h')) + self.assertEqual(guess_time("9 pm".split()), (dtime(hour=21), 'h')) + self.assertEqual(guess_time("10 pm".split()), (dtime(hour=22), 'h')) + self.assertEqual(guess_time("09 pm".split()), (dtime(hour=21), 'h')) + self.assertEqual(guess_time("10 02 am".split()), (dtime(hour=10, minute=2), 'hm')) + self.assertEqual(guess_time("10 02 pm".split()), (dtime(hour=22, minute=2), 'hm')) + self.assertEqual(guess_time("14 am".split()), (dtime(hour=14), 'h')) + self.assertRaises(TimeParserError, guess_time, "14 pm".split()) + + # 24-hrs notation + self.assertEqual(guess_time("12 34".split()), (dtime(hour=12, minute=34), 'hm')) + self.assertEqual(guess_time("15 32".split()), (dtime(hour=15, minute=32), 'hm')) + self.assertEqual(guess_time("12 04".split()), (dtime(hour=12, minute=4), 'hm')) + self.assertEqual(guess_time("12 4".split()), (dtime(hour=12, minute=4), 'hm')) + # range + self.assertEqual(guess_time("12 58".split()), (dtime(hour=12, minute=58), 'hm')) + self.assertEqual(guess_time("31 04".split()), (dtime(minute=31, second=4), 'ms')) + self.assertEqual(guess_time("31 58".split()), (dtime(minute=31, second=58), 'ms')) + # three terms + self.assertEqual(guess_time("12 34 54".split()), + (dtime(hour=12, minute=34, second=54), 'hms')) + # four terms + self.assertEqual(guess_time("12 34 54 984".split()), + (dtime(hour=12, minute=34, second=54, microsecond=984000), 'hmsn')) + # trailing zeros + self.assertEqual(guess_time("12 34 54 98400".split()), + (dtime(hour=12, minute=34, second=54, microsecond=984000), 'hmsn')) + # leading zeros + self.assertEqual(guess_time("12 34 54 098400".split()), + (dtime(hour=12, minute=34, second=54, microsecond=98400), 'hmsn')) + + # invalid formats + self.assertRaises(TimeParserError, guess_time, []) + self.assertRaises(TimeParserError, guess_time, ['0', '1', '2', '3', '4']) + self.assertRaises(TimeParserError, guess_time, "83 02".split()) + self.assertRaises(TimeParserError, guess_time, "52 74".split()) + + def test_guess_datetime(self): + self.assertEqual(guess_datetime( + Struct({'date': '18 05 2012'.split(), 'time': '12 34'.split()})), + (datetime(2012, 5, 18, 12, 34), 'DMYhm')) + self.assertEqual(guess_datetime(Struct({'date': '18 05 2012'.split()})), + (datetime(2012, 5, 18), 'DMY')) + self.assertEqual(guess_datetime(Struct({'time': '12 34'.split()})), + (datetime(1970, 1, 1, 12, 34), 'hm')) + self.assertRaises(DateFormatError, guess_datetime, Struct({})) + + def test_DF(self): + # msb + self.assertRaises(DateFormatError, DF('').msb) + self.assertRaises(DateFormatError, DF('abc').msb) + self.assertRaises(DateFormatError, DF('ydHSN').msb) + self.assertEqual('Y', DF('Yab').msb()) + self.assertEqual('Y', DF('YDM').msb()) + self.assertEqual('Y', DF('MdY').msb()) + self.assertEqual('D', DF('mDn').msb()) + self.assertEqual('m', DF('mdn').msb()) + self.assertEqual('n', DF('nab').msb()) + # lsb + self.assertRaises(DateFormatError, DF('').lsb) + self.assertRaises(DateFormatError, DF('abc').lsb) + self.assertEqual('Y', DF('Yab').lsb()) + self.assertEqual('D', DF('YDM').lsb()) + self.assertEqual('M', DF('MdY').lsb()) + self.assertEqual('n', DF('mDn').lsb()) + self.assertEqual('n', DF('nab').lsb()) + # is_time + self.assertTrue(DF('mshn').is_time()) + self.assertTrue(DF('mh').is_time()) + self.assertTrue(DF('h').is_time()) + self.assertTrue(DF('m').is_time()) + self.assertTrue(DF('s').is_time()) + self.assertTrue(DF('n').is_time()) + self.assertFalse(DF('').is_time()) + self.assertFalse(DF('abc').is_time()) + self.assertFalse(DF('Msnh').is_time()) + self.assertFalse(DF('Ymsnh').is_time()) + self.assertFalse(DF('Dmsnh').is_time()) + self.assertFalse(DF('YDM').is_time()) + # is_date + self.assertTrue(DF('YDM').is_date()) + self.assertTrue(DF('YM').is_date()) + self.assertTrue(DF('DM').is_date()) + self.assertTrue(DF('DY').is_date()) + self.assertTrue(DF('Y').is_date()) + self.assertTrue(DF('D').is_date()) + self.assertTrue(DF('M').is_date()) + self.assertFalse(DF('').is_date()) + self.assertFalse(DF('abc').is_date()) + self.assertFalse(DF('YDMn').is_date()) + self.assertFalse(DF('YDm').is_date()) + self.assertFalse(DF('YDh').is_date()) + self.assertFalse(DF('hmsn').is_date()) + # valid + self.assertTrue(DF('Y').valid()) + self.assertTrue(DF('YDMhsmn').valid()) + self.assertTrue(DF('Yabc').valid()) + self.assertFalse(DF('').valid()) + self.assertFalse(DF('abc').valid()) + self.assertFalse(DF('ydHSN').valid()) + + + def test_increment(self): + self.assertRaises(DateFormatError, increment, None, '') + self.assertEqual(increment(datetime(1970, 1, 1, 0, 0, 0, 10), DF('n')), + datetime(1970, 1, 1, 0, 0, 0, 11)) + self.assertEqual(increment(datetime(1970, 1, 1, 0, 0, 1), DF('s')), + datetime(1970, 1, 1, 0, 0, 2)) + self.assertEqual(increment(datetime(1970, 1, 1, 0, 1), DF('m')), + datetime(1970, 1, 1, 0, 2)) + self.assertEqual(increment(datetime(1970, 1, 1, 1), DF('h')), + datetime(1970, 1, 1, 2)) + self.assertEqual(increment(datetime(1970, 1, 1, 1), DF('h')), + datetime(1970, 1, 1, 2)) + self.assertEqual(increment(datetime(1970, 2, 3, 4, 5, 6), DF('D')), + datetime(1970, 2, 4)) + self.assertEqual(increment(datetime(1970, 2, 3, 4, 5, 6), DF('M')), + datetime(1970, 3, 1)) + self.assertEqual(increment(datetime(1970, 2, 3, 4, 5, 6), DF('Y')), + datetime(1971, 1, 1)) + self.assertRaises(DateFormatError, increment, datetime(1970, 2, 3, 4, 5, 6), DF('abc')) + +## main ## + +if __name__ == '__main__': + unittest.main() + +## EOF ## diff --git a/test/parsing/test_search.py b/test/parsing/test_search.py new file mode 100644 index 0000000..23801d0 --- /dev/null +++ b/test/parsing/test_search.py @@ -0,0 +1,707 @@ +""" + +Part of the tagit test suite. +A copy of the license is provided with the project. +Author: Matthias Baumgartner, 2022 +""" +# standard imports +import unittest +from datetime import datetime + +# external imports +from pyparsing import ParseException + +# tagit imports +from tagit.utils import errors +#from tagit.parsing.search import ast, predicates, PredicateScope # FIXME: mb/port/parsing + +# objects to test +from tagit.parsing.search import ast_from_string + + +## code ## + +class TestScope(PredicateScope): + _scope_order = ['major', 'minor', 'micro'] + _init_values = ['library'] + + +class TestParseContinuous(unittest.TestCase): + longMessage = True + + def setUp(self): + predicates.expose('mime', + TestScope('attribute', 'mime'), 'Categorical') + predicates.expose('iso', + TestScope('attribute', 'iso'), 'Continuous', 'Categorical') + predicates.expose('time', + TestScope('generic', 't_image_create_loc'), 'TimeRange', 'Datetime') + predicates.expose('tag', + TestScope('generic', 'tag'), 'Categorical') + + def _test(self, query, target): + predicate, condition = target + result = ast_from_string(query) + target = ast.AND([ast.Token(predicate, condition)]) + self.assertEqual(result, target, msg="in query '{}'".format(query)) + + def test_larger_than(self): + # larger than A (inclusive) + for editable in [ + # range + "{predicate} in [{num}:]", "{predicate} in [{num}:[", "{predicate} in [{num}:)", + "{predicate} : [{num}:]", "{predicate} : [{num}:[", "{predicate} : [{num}:)", + "{predicate} = [{num}:]", "{predicate} = [{num}:[", "{predicate} = [{num}:)", + ]: + # positive + self._test(editable.format(num=1.23, predicate='iso'), + ('iso', ast.Continuous(1.23, float('inf'), True, False))) + # negative + self._test(editable.format(num=-1.23, predicate='iso'), + ('iso', ast.Continuous(-1.23, float('inf'), True, False))) + + for editable in [ + # range + "{predicate} in [{num}-]", "{predicate} in [{num}-[", "{predicate} in [{num}-)", + "{predicate} : [{num}-]", "{predicate} : [{num}-[", "{predicate} : [{num}-)", + "{predicate} = [{num}-]", "{predicate} = [{num}-[", "{predicate} = [{num}-)", + # equation + "{predicate} >= {num}", "{num} <= {predicate}", + ]: + # positive + self._test(editable.format(num=1.23, predicate='iso'), + ('iso', ast.Continuous(1.23, float('inf'), True, False))) + # negative + self._test(editable.format(num=-1.23, predicate='iso'), + ('iso', ast.Continuous(-1.23, float('inf'), True, False))) + # date + self._test(editable.format(predicate='time', num="30.04.2012, 13:18"), + ('time', ast.Datetime(datetime(2012, 4, 30, 13, 18), datetime.max, True, False))) + + # larger than A (exclusive) + for editable in [ + # range / bracket + "{predicate} in ]{num}:]", "{predicate} in ]{num}:[", "{predicate} in ]{num}:)", + "{predicate} : ]{num}:]", "{predicate} : ]{num}:[", "{predicate} : ]{num}:)", + "{predicate} = ]{num}:]", "{predicate} = ]{num}:[", "{predicate} = ]{num}:)", + # range / parenthesis + "{predicate} in ({num}:]", "{predicate} in ({num}:[", "{predicate} in ({num}:)", + "{predicate} : ({num}:]", "{predicate} : ({num}:[", "{predicate} : ({num}:)", + "{predicate} = ({num}:]", "{predicate} = ({num}:[", "{predicate} = ({num}:)", + ]: + # positive + self._test(editable.format(num=1.23, predicate='iso'), + ('iso', ast.Continuous(1.23, float('inf'), False, False))) + # negative + self._test(editable.format(num=-1.23, predicate='iso'), + ('iso', ast.Continuous(-1.23, float('inf'), False, False))) + + for editable in [ + # range / bracket + "{predicate} in ]{num}-]", "{predicate} in ]{num}-[", "{predicate} in ]{num}-)", + "{predicate} : ]{num}-]", "{predicate} : ]{num}-[", "{predicate} : ]{num}-)", + "{predicate} = ]{num}-]", "{predicate} = ]{num}-[", "{predicate} = ]{num}-)", + # range / parenthesis + "{predicate} in ({num}-]", "{predicate} in ({num}-[", "{predicate} in ({num}-)", + "{predicate} : ({num}-]", "{predicate} : ({num}-[", "{predicate} : ({num}-)", + "{predicate} = ({num}-]", "{predicate} = ({num}-[", "{predicate} = ({num}-)", + # equation + "{predicate} > {num}", "{num} < {predicate}", + ]: + # positive + self._test(editable.format(num=1.23, predicate='iso'), + ('iso', ast.Continuous(1.23, float('inf'), False, False))) + # negative + self._test(editable.format(num=-1.23, predicate='iso'), + ('iso', ast.Continuous(-1.23, float('inf'), False, False))) + # date + self._test(editable.format(predicate='time', num="30.04.2012, 13:18"), + ('time', ast.Datetime(datetime(2012, 4, 30, 13, 19), datetime.max, True, False))) + + def test_smaller_than(self): + # smaller than B (inclusive) + for editable in [ + # range + "{predicate} in [:{num}]", "{predicate} in (:{num}]", "{predicate} in ]:{num}]", + "{predicate} : [:{num}]", "{predicate} : (:{num}]", "{predicate} : ]:{num}]", + "{predicate} = [:{num}]", "{predicate} = (:{num}]", "{predicate} = ]:{num}]", + ]: + # positives + self._test(editable.format(num=1.23, predicate='iso'), + ('iso', ast.Continuous(float('-inf'), 1.23, False, True))) + # negatives + self._test(editable.format(num=-1.23, predicate='iso'), + ('iso', ast.Continuous(float('-inf'), -1.23, False, True))) + + for editable in [ + # range + "{predicate} in [-{num}]", "{predicate} in (-{num}]", "{predicate} in ]-{num}]", + "{predicate} : [-{num}]", "{predicate} : (-{num}]", "{predicate} : ]-{num}]", + "{predicate} = [-{num}]", "{predicate} = (-{num}]", "{predicate} = ]-{num}]", + # equation + "{predicate} <={num}", "{num} >= {predicate}", + ]: + # positives + self._test(editable.format(num=1.23, predicate='iso'), + ('iso', ast.Continuous(float('-inf'), 1.23, False, True))) + # negatives + self._test(editable.format(num=-1.23, predicate='iso'), + ('iso', ast.Continuous(float('-inf'), -1.23, False, True))) + # date + self._test(editable.format(predicate='time', num="30.04.2012, 13:18"), + ('time', ast.Datetime(datetime.min, datetime(2012, 4, 30, 13, 19), False, False))) + + # smaller than B (exclusive) + for editable in [ + # range / bracket + "{predicate} in [:{num}[", "{predicate} in (:{num}[", "{predicate} in ]:{num}[", + "{predicate} : [:{num}[", "{predicate} : (:{num}[", "{predicate} : ]:{num}[", + "{predicate} = [:{num}[", "{predicate} = (:{num}[", "{predicate} = ]:{num}[", + # range / parenthesis + "{predicate} in [:{num})", "{predicate} in (:{num})", "{predicate} in ]:{num})", + "{predicate} : [:{num})", "{predicate} : (:{num})", "{predicate} : ]:{num})", + "{predicate} = [:{num})", "{predicate} = (:{num})", "{predicate} = ]:{num})", + ]: + # positives + self._test(editable.format(num=1.23, predicate='iso'), + ('iso', ast.Continuous(float('-inf'), 1.23, False, False))) + # negatives + self._test(editable.format(num=-1.23, predicate='iso'), + ('iso', ast.Continuous(float('-inf'), -1.23, False, False))) + + for editable in [ + # range / bracket + "{predicate} in [-{num}[", "{predicate} in (-{num}[", "{predicate} in ]-{num}[", + "{predicate} : [-{num}[", "{predicate} : (-{num}[", "{predicate} : ]-{num}[", + "{predicate} = [-{num}[", "{predicate} = (-{num}[", "{predicate} = ]-{num}[", + # range / parenthesis + "{predicate} in [-{num})", "{predicate} in (-{num})", "{predicate} in ]-{num})", + "{predicate} : [-{num})", "{predicate} : (-{num})", "{predicate} : ]-{num})", + "{predicate} = [-{num})", "{predicate} = (-{num})", "{predicate} = ]-{num})", + # equation + "{predicate} <{num}", "{num} > {predicate}", + ]: + # positives + self._test(editable.format(num=1.23, predicate='iso'), + ('iso', ast.Continuous(float('-inf'), 1.23, False, False))) + # negatives + self._test(editable.format(num=-1.23, predicate='iso'), + ('iso', ast.Continuous(float('-inf'), -1.23, False, False))) + # date + self._test(editable.format(predicate='time', num="30.04.2012, 13:18"), + ('time', ast.Datetime(datetime.min, datetime(2012, 4, 30, 13, 18), False, False))) + + def test_between(self): + # between A and B (including A, including B) + for editable in [ + # range + "{predicate} in [{numA}:{numB}]", "{predicate} : [{numA}:{numB}]", "{predicate} = [{numA}:{numB}]", + ]: + # positives + self._test(editable.format(predicate='iso', numA=1.23, numB=4.56), + ('iso', ast.Continuous(1.23, 4.56, True, True))) + # negatives + self._test(editable.format(predicate='iso', numA=-4.56, numB=-1.23), + ('iso', ast.Continuous(-4.56, -1.23, True, True))) + # mixed + self._test(editable.format(predicate='iso', numA=-1.23, numB=4.56), + ('iso', ast.Continuous(-1.23, 4.56, True, True))) + + for editable in [ + # range + "{predicate} in [{numA}-{numB}]", "{predicate} : [{numA}-{numB}]", "{predicate} = [{numA}-{numB}]", + # equation + "{numA} <= {predicate} <= {numB}" + ]: + # positives + self._test(editable.format(predicate='iso', numA=1.23, numB=4.56), + ('iso', ast.Continuous(1.23, 4.56, True, True))) + # negatives + self._test(editable.format(predicate='iso', numA=-4.56, numB=-1.23), + ('iso', ast.Continuous(-4.56, -1.23, True, True))) + # mixed + self._test(editable.format(predicate='iso', numA=-1.23, numB=4.56), + ('iso', ast.Continuous(-1.23, 4.56, True, True))) + # date + self._test(editable.format(predicate='time', numA="30.04.2012, 13:18", numB="13.6.2014, 18:27"), + ('time', ast.Datetime(datetime(2012, 4, 30, 13, 18), datetime(2014, 6, 13, 18, 28), True, False))) + + # between A and B (including A, excluding B) + for editable in [ + # range + "{predicate} in [{numA}:{numB})", "{predicate} in [{numA}:{numB}[", + "{predicate} : [{numA}:{numB})", "{predicate} : [{numA}:{numB}[", + "{predicate} = [{numA}:{numB})", "{predicate} = [{numA}:{numB}[", + ]: + # positives + self._test(editable.format(predicate='iso', numA=1.23, numB=4.56), + ('iso', ast.Continuous(1.23, 4.56, True, False))) + # negatives + self._test(editable.format(predicate='iso', numA=-4.56, numB=-1.23), + ('iso', ast.Continuous(-4.56, -1.23, True, False))) + # mixed + self._test(editable.format(predicate='iso', numA=-1.23, numB=4.56), + ('iso', ast.Continuous(-1.23, 4.56, True, False))) + + for editable in [ + # range + "{predicate} in [{numA}-{numB})", "{predicate} in [{numA}-{numB}[", + "{predicate} : [{numA}-{numB})", "{predicate} : [{numA}-{numB}[", + "{predicate} = [{numA}-{numB})", "{predicate} = [{numA}-{numB}[", + # equation + "{numA} <= {predicate} < {numB}", + ]: + # positives + self._test(editable.format(predicate='iso', numA=1.23, numB=4.56), + ('iso', ast.Continuous(1.23, 4.56, True, False))) + # negatives + self._test(editable.format(predicate='iso', numA=-4.56, numB=-1.23), + ('iso', ast.Continuous(-4.56, -1.23, True, False))) + # mixed + self._test(editable.format(predicate='iso', numA=-1.23, numB=4.56), + ('iso', ast.Continuous(-1.23, 4.56, True, False))) + # date + self._test(editable.format(predicate='time', numA="30.04.2012, 13:18", numB="13.6.2014, 18:27"), + ('time', ast.Datetime(datetime(2012, 4, 30, 13, 18), datetime(2014, 6, 13, 18, 27), True, False))) + + # between A and B (excluding A, including B) + for editable in [ + # range + "{predicate} in ({numA}:{numB}]", "{predicate} in ]{numA}:{numB}]", + "{predicate} : ({numA}:{numB}]", "{predicate} : ]{numA}:{numB}]", + "{predicate} = ({numA}:{numB}]", "{predicate} = ]{numA}:{numB}]", + ]: + # positives + self._test(editable.format(predicate='iso', numA=1.23, numB=4.56), + ('iso', ast.Continuous(1.23, 4.56, False, True))) + # negatives + self._test(editable.format(predicate='iso', numA=-4.56, numB=-1.23), + ('iso', ast.Continuous(-4.56, -1.23, False, True))) + # mixed + self._test(editable.format(predicate='iso', numA=-1.23, numB=4.56), + ('iso', ast.Continuous(-1.23, 4.56, False, True))) + + for editable in [ + # range + "{predicate} in ({numA}-{numB}]", "{predicate} in ]{numA}-{numB}]", + "{predicate} : ({numA}-{numB}]", "{predicate} : ]{numA}-{numB}]", + "{predicate} = ({numA}-{numB}]", "{predicate} = ]{numA}-{numB}]", + # equation + "{numA} < {predicate} <= {numB}", + ]: + # positives + self._test(editable.format(predicate='iso', numA=1.23, numB=4.56), + ('iso', ast.Continuous(1.23, 4.56, False, True))) + # negatives + self._test(editable.format(predicate='iso', numA=-4.56, numB=-1.23), + ('iso', ast.Continuous(-4.56, -1.23, False, True))) + # mixed + self._test(editable.format(predicate='iso', numA=-1.23, numB=4.56), + ('iso', ast.Continuous(-1.23, 4.56, False, True))) + # date + self._test(editable.format(predicate='time', numA="30.04.2012, 13:18", numB="13.6.2014, 18:27"), + ('time', ast.Datetime(datetime(2012, 4, 30, 13, 19), datetime(2014, 6, 13, 18, 28), True, False))) + + # between A and B (excluding A, excluding B) + for editable in [ + "{predicate} in ({numA}:{numB})", "{predicate} in ]{numA}:{numB}[", + "{predicate} : ({numA}:{numB})", "{predicate} : ]{numA}:{numB}[", + "{predicate} = ({numA}:{numB})", "{predicate} = ]{numA}:{numB}[", + ]: + # positives + self._test(editable.format(predicate='iso', numA=1.23, numB=4.56), + ('iso', ast.Continuous(1.23, 4.56, False, False))) + # negatives + self._test(editable.format(predicate='iso', numA=-4.56, numB=-1.23), + ('iso', ast.Continuous(-4.56, -1.23, False, False))) + # mixed + self._test(editable.format(predicate='iso', numA=-1.23, numB=4.56), + ('iso', ast.Continuous(-1.23, 4.56, False, False))) + + for editable in [ + "{predicate} in ({numA}-{numB})", "{predicate} in ]{numA}-{numB}[", + "{predicate} : ({numA}-{numB})", "{predicate} : ]{numA}-{numB}[", + "{predicate} = ({numA}-{numB})", "{predicate} = ]{numA}-{numB}[", + # equation + "{numA} < {predicate} < {numB}", + ]: + # positives + self._test(editable.format(predicate='iso', numA=1.23, numB=4.56), + ('iso', ast.Continuous(1.23, 4.56, False, False))) + # negatives + self._test(editable.format(predicate='iso', numA=-4.56, numB=-1.23), + ('iso', ast.Continuous(-4.56, -1.23, False, False))) + # mixed + self._test(editable.format(predicate='iso', numA=-1.23, numB=4.56), + ('iso', ast.Continuous(-1.23, 4.56, False, False))) + # date + self._test(editable.format(predicate='time', numA="30.04.2012, 13:18", numB="13.6.2014, 18:27"), + ('time', ast.Datetime(datetime(2012, 4, 30, 13, 19), datetime(2014, 6, 13, 18, 27), True, False))) + + def test_equal(self): + # equal to A + for editable in [ + # range + "{predicate} in [{num}:{num}]", "{predicate} : [{num}:{num}]", "{predicate} = [{num}:{num}]", + ]: + # positives + self._test(editable.format(predicate='iso', num=1.23), + ('iso', ast.Continuous(1.23, 1.23, True, True))) + # negatives + self._test(editable.format(predicate='iso', num=-1.23), + ('iso', ast.Continuous(-1.23, -1.23, True, True))) + + for editable in [ + # range + "{predicate} in [{num}-{num}]", "{predicate} : [{num}-{num}]", "{predicate} = [{num}-{num}]", + # equation + "{predicate} = {num}", "{num} = {predicate}", + ]: + # positives + self._test(editable.format(predicate='iso', num=1.23), + ('iso', ast.Continuous(1.23, 1.23, True, True))) + # negatives + self._test(editable.format(predicate='iso', num=-1.23), + ('iso', ast.Continuous(-1.23, -1.23, True, True))) + # date + self._test(editable.format(predicate='time', num="30.04.2012, 13:18"), + ('time', ast.Datetime(datetime(2012, 4, 30, 13, 18), datetime(2012, 4, 30, 13, 19), True, False))) + + def test_dates(self): + self._test("{predicate} < {num}".format(predicate='time', num="2012"), + ('time', ast.Datetime(datetime.min, datetime(2012, 1, 1), False, False))) + self._test("{predicate} < {num}".format(predicate='time', num="2012.04"), + ('time', ast.Datetime(datetime.min, datetime(2012, 4, 1), False, False))) + self._test("{predicate} < {num}".format(predicate='time', num="2012.04.30"), + ('time', ast.Datetime(datetime.min, datetime(2012, 4, 30), False, False))) + self._test("{predicate} < {num}".format(predicate='time', num="2012.04.30, 3 pm"), + ('time', ast.Datetime(datetime.min, datetime(2012, 4, 30, 15), False, False))) + self._test("{predicate} < {num}".format(predicate='time', num="2012.04.30, 15:34"), + ('time', ast.Datetime(datetime.min, datetime(2012, 4, 30, 15, 34), False, False))) + self._test("{predicate} < {num}".format(predicate='time', num="2012.04.30, 15:34:12"), + ('time', ast.Datetime(datetime.min, datetime(2012, 4, 30, 15, 34, 12), False, False))) + self._test("{predicate} < {num}".format(predicate='time', num="2012.04.30, 15:34:12.98"), + ('time', ast.Datetime(datetime.min, datetime(2012, 4, 30, 15, 34, 12, 980000), False, False))) + + self._test("{predicate} <= {num}".format(predicate='time', num="2012"), + ('time', ast.Datetime(datetime.min, datetime(2013, 1, 1), False, False))) + self._test("{predicate} <= {num}".format(predicate='time', num="2012.04"), + ('time', ast.Datetime(datetime.min, datetime(2012, 5, 1), False, False))) + self._test("{predicate} <= {num}".format(predicate='time', num="2012.04.30"), + ('time', ast.Datetime(datetime.min, datetime(2012, 5, 1), False, False))) + self._test("{predicate} <= {num}".format(predicate='time', num="2012.04.30, 3 pm"), + ('time', ast.Datetime(datetime.min, datetime(2012, 4, 30, 16), False, False))) + self._test("{predicate} <= {num}".format(predicate='time', num="2012.04.30, 15:34"), + ('time', ast.Datetime(datetime.min, datetime(2012, 4, 30, 15, 35), False, False))) + self._test("{predicate} <= {num}".format(predicate='time', num="2012.04.30, 15:34:12"), + ('time', ast.Datetime(datetime.min, datetime(2012, 4, 30, 15, 34, 13), False, False))) + self._test("{predicate} <= {num}".format(predicate='time', num="2012.04.30, 15:34:12.98"), + ('time', ast.Datetime(datetime.min, datetime(2012, 4, 30, 15, 34, 12, 980001), False, False))) + + def test_timerange(self): + self._test("{predicate} < {num}".format(predicate='time', num="15:34"), + ('time', ast.TimeRange(datetime.utcfromtimestamp(0.0), datetime(1970, 1, 1, 15, 34), True, False))) + self._test("{predicate} <= {num}".format(predicate='time', num="15:34"), + ('time', ast.TimeRange(datetime.utcfromtimestamp(0.0), datetime(1970, 1, 1, 15, 35), True, False))) + self._test("{predicate} = {num}".format(predicate='time', num="15:34"), + ('time', ast.TimeRange(datetime(1970, 1, 1, 15, 34), datetime(1970, 1, 1, 15, 35), True, False))) + self._test("{predicate} > {num}".format(predicate='time', num="15:34"), + ('time', ast.TimeRange(datetime(1970, 1, 1, 15, 35), datetime(1970, 1, 2), True, True))) + self._test("{predicate} >= {num}".format(predicate='time', num="15:34"), + ('time', ast.TimeRange(datetime(1970, 1, 1, 15, 34), datetime(1970, 1, 2), True, True))) + + self._test("{numA} <= {predicate} <= {numB}".format(predicate='time', numA="12:34", numB="15:28"), + ('time', ast.TimeRange(datetime(1970, 1, 1, 12, 34), datetime(1970, 1, 1, 15, 29), True, False))) + self._test("{numA} <= {predicate} < {numB}".format(predicate='time', numA="12:34", numB="15:28"), + ('time', ast.TimeRange(datetime(1970, 1, 1, 12, 34), datetime(1970, 1, 1, 15, 28), True, False))) + self._test("{numA} < {predicate} <= {numB}".format(predicate='time', numA="12:34", numB="15:28"), + ('time', ast.TimeRange(datetime(1970, 1, 1, 12, 35), datetime(1970, 1, 1, 15, 29), True, False))) + self._test("{numA} < {predicate} < {numB}".format(predicate='time', numA="12:34", numB="15:28"), + ('time', ast.TimeRange(datetime(1970, 1, 1, 12, 35), datetime(1970, 1, 1, 15, 28), True, False))) + + def test_special(self): + # special cases: explicit plus sign + self._test("{predicate} in [+1.23-+4.56]".format(predicate='iso'), + ('iso', ast.Continuous(1.23, 4.56, True, True))) + self._test("{predicate} in [-+4.56]".format(predicate='iso'), + ('iso', ast.Continuous(float('-inf'), 4.56, False, True))) + + def test_errors(self): + # parse errors + for editable in [ + # equal with exclusive + "{predicate} in ({num}:{num})", "{predicate} in ({num}-{num})", + "{predicate} in ({num}:{num}[", "{predicate} in ({num}-{num}[", + "{predicate} in ]{num}:{num})", "{predicate} in ]{num}-{num})", + "{predicate} in ]{num}:{num}[", "{predicate} in ]{num}-{num}[", + # invalid parentesis + "{predicate} in ){num}:{num}(", + # misc errors + # FIXME: Currently all special characters are allowed as categorical value. + # If this changes, don't forget to enable the tests below. + #"{predicate} in [{num}{num}]", + #"{predicate} [{num}:{num}:{num}]", + #"{predicate} = ({num})", + #"{predicate} = {num})", + ]: + self.assertRaises(errors.ParserError, ast_from_string, + editable.format(predicate='iso', num=1.23)) + + for editable in [ + "{predicate} in [{numA}:{numB}]", "{predicate} : [{numA}:{numB}]", "{predicate} = [{numA}:{numB}]", + "{predicate} in ]{numA}:{numB}]", "{predicate} : ]{numA}:{numB}]", "{predicate} = ]{numA}:{numB}]", + "{predicate} in [{numA}:{numB}[", "{predicate} : [{numA}:{numB}[", "{predicate} = [{numA}:{numB}[", + "{predicate} in ({numA}:{numB}]", "{predicate} : ({numA}:{numB}]", "{predicate} = ({numA}:{numB}]", + "{predicate} in [{numA}:{numB})", "{predicate} : [{numA}:{numB})", "{predicate} = [{numA}:{numB})", + "{predicate} in ]{numA}:{numB}[", "{predicate} : ]{numA}:{numB}[", "{predicate} = ]{numA}:{numB}[", + "{predicate} in ]{numA}:{numB})", "{predicate} : ]{numA}:{numB})", "{predicate} = ]{numA}:{numB})", + "{predicate} in ({numA}:{numB}[", "{predicate} : ({numA}:{numB}[", "{predicate} = ({numA}:{numB}[", + "{predicate} in ({numA}:{numB})", "{predicate} : ({numA}:{numB})", "{predicate} = ({numA}:{numB})", + "{numA} < {predicate} < {numB}", + "{numA} <= {predicate} < {numB}", + "{numA} < {predicate} <= {numB}", + ]: + self.assertRaises(errors.ParserError, ast_from_string, + editable.format(predicate='iso', numA=4.56, numB=1.23)) + self.assertRaises(errors.ParserError, ast_from_string, + editable.format(predicate='time', numA="17:35", numB="10:55")) + self.assertRaises(errors.ParserError, ast_from_string, + editable.format(predicate='time', numA="18.12.2035", numB="5.7.1999")) + + # special cases: empty range with boundary + self.assertRaises(ParseException, ast_from_string.CONTINUOUS.parseString, + "{predicate} in [:]".format(predicate='iso')) + self.assertRaises(ParseException, ast_from_string.CONTINUOUS.parseString, + "{predicate} in (:[".format(predicate='iso')) + self.assertRaises(ParseException, ast_from_string.CONTINUOUS.parseString, + "{predicate} in ]:)".format(predicate='iso')) + self.assertRaises(ParseException, ast_from_string.CONTINUOUS.parseString, + "{predicate} in ".format(predicate='iso')) + # misc + self.assertRaises(ParseException, ast_from_string.CONTINUOUS.parseString, + "{predicate} in [{num}{num}]".format(predicate='iso', num=1.23)) + self.assertRaises(ParseException, ast_from_string.CONTINUOUS.parseString, + "{predicate} [{num}:{num}:{num}]".format(predicate='iso', num=1.23)) + self.assertRaises(ParseException, ast_from_string.CONTINUOUS.parseString, + "{predicate} = ({num})".format(predicate='iso', num=1.23)) + self.assertRaises(ParseException, ast_from_string.CONTINUOUS.parseString, + "{predicate} = ({num}".format(predicate='iso', num=1.23), dict(parseAll=True)) + self.assertRaises(ParseException, ast_from_string.CONTINUOUS.parseString, + "{predicate} = {num})".format(predicate='iso', num=1.23), dict(parseAll=True)) + # range errors + self.assertRaises(errors.ParserError, ast_from_string, "100 >= iso < 200") + self.assertRaises(errors.ParserError, ast_from_string, "100 > iso < 200") + self.assertRaises(errors.ParserError, ast_from_string, "100 > iso <= 200") + self.assertRaises(errors.ParserError, ast_from_string, "100 >= iso <= 200") + self.assertRaises(errors.ParserError, ast_from_string, "100 = iso = 200") + # time/date mixture errors + self.assertRaises(errors.ParserError, ast_from_string, "12:45 < time < 17.5.2004") + self.assertRaises(errors.ParserError, ast_from_string, "17.5.2004 < time < 12:45") + # date/int mixture errors + self.assertRaises(errors.ParserError, ast_from_string, "17.5.2004 < time < 1245") + # 1245 is interpreted as the year + #self.assertRaises(errors.ParserError, ast_from_string, "1245 < time < 17.5.2004") + # time/int mixture errors + self.assertRaises(errors.ParserError, ast_from_string, "17:12 < time < 1245") + self.assertRaises(errors.ParserError, ast_from_string, "1712 < time < 12:45") + + # empty query + self.assertRaises(ParseException, ast_from_string.CONTINUOUS.parseString, "") + + +class TestParseSearch(unittest.TestCase): + def setUp(self): + predicates.expose('mime', + TestScope('attribute', 'mime'), 'Categorical') + predicates.expose('rank', + TestScope('attribute', 'rank'), 'Continuous') + predicates.expose('iso', + TestScope('attribute', 'iso'), 'Continuous', 'Categorical') + predicates.expose('time', + TestScope('generic', 't_image_create_loc'), 'TimeRange', 'Datetime') + predicates.expose('tag', + TestScope('generic', 'tag'), 'Categorical') + + def test_parse_existence(self): + self.assertEqual(ast_from_string("has mime"), + ast.AND([ast.Token('mime', ast.Existence())])) + self.assertEqual(ast_from_string("has no mime"), + ast.AND([ast.Token('mime', ast.Inexistence())])) + self.assertEqual(ast_from_string("has not mime"), + ast.AND([ast.Token('mime', ast.Inexistence())])) + + def test_parse_categorical(self): + # positive + self.assertEqual(ast_from_string("iso in 100, 200, 500"), + ast.AND([ast.Token('iso', ast.SetInclude(['100', '200', '500']))])) + self.assertEqual(ast_from_string("iso in (100, 200)"), + ast.AND([ast.Token('iso', ast.SetInclude(['100', '200']))])) + self.assertEqual(ast_from_string("iso = (100, 200)"), + ast.AND([ast.Token('iso', ast.SetInclude(['100', '200']))])) + # FIXME! + #self.assertEqual(ast_from_string("iso = 100, 200"), + # ast.AND([ast.Token('iso', ast.SetInclude(['100', '200']))])) + self.assertEqual(ast_from_string("iso : (100, 200)"), + ast.AND([ast.Token('iso', ast.SetInclude(['100', '200']))])) + self.assertEqual(ast_from_string("iso : 100, 200"), + ast.AND([ast.Token('iso', ast.SetInclude(['100', '200']))])) + self.assertEqual(ast_from_string("iso:(100,200)"), + ast.AND([ast.Token('iso', ast.SetInclude(['100', '200']))])) + self.assertEqual(ast_from_string("iso in (100,200)"), + ast.AND([ast.Token('iso', ast.SetInclude(['100', '200']))])) + self.assertEqual(ast_from_string("iso in 100,200"), + ast.AND([ast.Token('iso', ast.SetInclude(['100', '200']))])) + self.assertEqual(ast_from_string("iso ~ (100,200)"), + ast.AND([ast.Token('iso', ast.SetInclude(['100', '200'], approximate=True))])) + self.assertEqual(ast_from_string("iso ~ 100,200"), + ast.AND([ast.Token('iso', ast.SetInclude(['100', '200'], approximate=True))])) + + # negative + self.assertEqual(ast_from_string("iso not in 100,200"), + ast.AND([ast.Token('iso', ast.SetExclude(['100', '200']))])) + self.assertEqual(ast_from_string("iso not in (100, 200)"), + ast.AND([ast.Token('iso', ast.SetExclude(['100', '200']))])) + self.assertEqual(ast_from_string("iso != 100,200"), + ast.AND([ast.Token('iso', ast.SetExclude(['100', '200']))])) + self.assertEqual(ast_from_string("iso != (100, 200)"), + ast.AND([ast.Token('iso', ast.SetExclude(['100', '200']))])) + self.assertEqual(ast_from_string("iso !~ 100,200"), + ast.AND([ast.Token('iso', ast.SetExclude(['100', '200'], approximate=True))])) + self.assertEqual(ast_from_string("iso !~ (100, 200)"), + ast.AND([ast.Token('iso', ast.SetExclude(['100', '200'], approximate=True))])) + + # one value + self.assertEqual(ast_from_string("mime : text"), + ast.AND([ast.Token('mime', ast.SetInclude(['text']))])) + self.assertEqual(ast_from_string("mime in text"), + ast.AND([ast.Token('mime', ast.SetInclude(['text']))])) + self.assertEqual(ast_from_string("mime = text"), + ast.AND([ast.Token('mime', ast.SetInclude(['text']))])) + self.assertEqual(ast_from_string("mime ~ text"), + ast.AND([ast.Token('mime', ast.SetInclude(['text'], approximate=True))])) + self.assertEqual(ast_from_string("mime != text"), + ast.AND([ast.Token('mime', ast.SetExclude(['text']))])) + self.assertEqual(ast_from_string("mime not in text"), + ast.AND([ast.Token('mime', ast.SetExclude(['text']))])) + self.assertEqual(ast_from_string("mime !~ text"), + ast.AND([ast.Token('mime', ast.SetExclude(['text'], approximate=True))])) + + # expressions with slash and comma + self.assertEqual(ast_from_string('mime : "text"'), + ast.AND([ast.Token('mime', ast.SetInclude(['text']))])) + self.assertEqual(ast_from_string('mime : "text", "plain"'), + ast.AND([ast.Token('mime', ast.SetInclude(['text', 'plain']))])) + self.assertEqual(ast_from_string('mime : "text, plain"'), + ast.AND([ast.Token('mime', ast.SetInclude(['text, plain']))])) + self.assertEqual(ast_from_string('mime ~ "text/plain"'), + ast.AND([ast.Token('mime', ast.SetInclude(['text/plain'], approximate=True))])) + self.assertEqual(ast_from_string('mime = ("text/plain", "image/jpeg")'), + ast.AND([ast.Token('mime', ast.SetInclude(['text/plain', 'image/jpeg']))])) + + def test_parse_tag(self): + + # only tag: tag, tags, (tag), (tags) + self.assertEqual(ast_from_string("foo"), + ast.AND([ast.Token('tag', ast.SetInclude(['foo']))])) + self.assertEqual(ast_from_string("(foo)"), + ast.AND([ast.Token('tag', ast.SetInclude(['foo']))])) + self.assertEqual(ast_from_string("foo, bar"), + ast.AND([ast.Token('tag', ast.SetInclude(['foo', 'bar']))])) + self.assertEqual(ast_from_string("foo,bar"), + ast.AND([ast.Token('tag', ast.SetInclude(['foo', 'bar']))])) + self.assertEqual(ast_from_string("(foo, bar,foobar)"), + ast.AND([ast.Token('tag', ast.SetInclude(['foo', 'bar', 'foobar']))])) + + # op and tag: !tag, ~tag, !~tag + self.assertEqual(ast_from_string("~foo"), + ast.AND([ast.Token('tag', ast.SetInclude(['foo'], approximate=True))])) + self.assertEqual(ast_from_string("~ foo"), + ast.AND([ast.Token('tag', ast.SetInclude(['foo'], approximate=True))])) + self.assertEqual(ast_from_string("!foo"), + ast.AND([ast.Token('tag', ast.SetExclude(['foo']))])) + self.assertEqual(ast_from_string("! foo"), + ast.AND([ast.Token('tag', ast.SetExclude(['foo']))])) + self.assertEqual(ast_from_string("!~foo"), + ast.AND([ast.Token('tag', ast.SetExclude(['foo'], approximate=True))])) + self.assertEqual(ast_from_string("!~ foo"), + ast.AND([ast.Token('tag', ast.SetExclude(['foo'], approximate=True))])) + + # op and list: ! (tags), ~tags, ... + self.assertEqual(ast_from_string("~ foo, bar"), + ast.AND([ast.Token('tag', ast.SetInclude(['foo', 'bar'], approximate=True))])) + self.assertEqual(ast_from_string("~foo, bar"), + ast.AND([ast.Token('tag', ast.SetInclude(['foo', 'bar'], approximate=True))])) + self.assertEqual(ast_from_string("~ (foo, bar)"), + ast.AND([ast.Token('tag', ast.SetInclude(['foo', 'bar'], approximate=True))])) + self.assertEqual(ast_from_string("! foo, bar"), + ast.AND([ast.Token('tag', ast.SetExclude(['foo', 'bar']))])) + self.assertEqual(ast_from_string("! (foo, bar)"), + ast.AND([ast.Token('tag', ast.SetExclude(['foo', 'bar']))])) + self.assertEqual(ast_from_string("! (foo,bar)"), + ast.AND([ast.Token('tag', ast.SetExclude(['foo', 'bar']))])) + self.assertEqual(ast_from_string("!~ foo, bar"), + ast.AND([ast.Token('tag', ast.SetExclude(['foo', 'bar'], approximate=True))])) + self.assertEqual(ast_from_string("!~ (foo, bar)"), + ast.AND([ast.Token('tag', ast.SetExclude(['foo', 'bar'], approximate=True))])) + self.assertEqual(ast_from_string("!~(foo,bar)"), + ast.AND([ast.Token('tag', ast.SetExclude(['foo', 'bar'], approximate=True))])) + + def test_parse_query(self): + # simple query + self.assertEqual(ast_from_string('foo / bar'), ast.AND([ + ast.Token('tag', ast.SetInclude('foo')), + ast.Token('tag', ast.SetInclude('bar'))])) + self.assertEqual(ast_from_string('iso in ("foo", "bar") / mime = plain'), ast.AND([ + ast.Token('iso', ast.SetInclude('foo', 'bar')), + ast.Token('mime', ast.SetInclude('plain'))])) + self.assertEqual(ast_from_string('iso in ("foo", "bar") / mime = plain'), ast.AND([ + ast.Token('iso', ast.SetInclude('foo', 'bar')), + ast.Token('mime', ast.SetInclude('plain'))])) + self.assertEqual(ast_from_string('iso = 1.23 / rank < 5'), ast.AND([ + ast.Token('iso', ast.Continuous(1.23, 1.23, True, True)), + ast.Token('rank', ast.Continuous(hi=5))])) + self.assertEqual(ast_from_string('time >= 12:50 / time < 13:50'), ast.AND([ + ast.Token('time', ast.TimeRange(lo=datetime(1970, 1, 1, 12, 50), lo_inc=True, hi_inc=True)), + ast.Token('time', ast.TimeRange(hi=datetime(1970, 1, 1, 13, 50), lo_inc=True, hi_inc=False))])) + self.assertEqual(ast_from_string('time >= 17.5.2001 / time < 18.4.2002'), ast.AND([ + ast.Token('time', ast.Datetime(lo=datetime(2001, 5, 17, 0, 0), lo_inc=True)), + ast.Token('time', ast.Datetime(hi=datetime(2002, 4, 18, 0, 0)))])) + # mixing expressions + self.assertEqual(ast_from_string('foo / iso in "bar" / mime ~ "text/plain" / iso < 100 / time >= 17.5.2001 / time < 13:50'), ast.AND([ + ast.Token('tag', ast.SetInclude('foo')), + ast.Token('iso', ast.SetInclude('bar')), + ast.Token('mime', ast.SetInclude('text/plain', approximate=True)), + ast.Token('iso', ast.Continuous(hi=100)), + ast.Token('time', ast.Datetime(lo=datetime(2001, 5, 17, 0, 0), lo_inc=True)), + ast.Token('time', ast.TimeRange(hi=datetime(1970, 1, 1, 13, 50), lo_inc=True))])) + + # leading/trailing slashes + self.assertRaises(errors.ParserError, ast_from_string, '/ foobar') + self.assertRaises(errors.ParserError, ast_from_string, 'foobar /') + self.assertRaises(errors.ParserError, ast_from_string, 'foobar / ') + self.assertRaises(errors.ParserError, ast_from_string, 'foo // bar') + self.assertRaises(errors.ParserError, ast_from_string, 'foo / / bar') + + def test_quoting(self): + self.assertEqual(ast_from_string("tag in ('(foo, bar)', foobar)"), + ast.AND([ast.Token('tag', ast.SetInclude(['(foo, bar)', 'foobar']))])) + self.assertEqual(ast_from_string('tag in ("(foo, bar)", foobar)'), + ast.AND([ast.Token('tag', ast.SetInclude(['(foo, bar)', 'foobar']))])) + self.assertEqual(ast_from_string('tag in ("(foo, \\"bar\\")", foobar)'), + ast.AND([ast.Token('tag', ast.SetInclude(['(foo, "bar")', 'foobar']))])) + self.assertEqual(ast_from_string('tag in ("(foo, bar)", "foobar")'), + ast.AND([ast.Token('tag', ast.SetInclude(['(foo, bar)', 'foobar']))])) + self.assertEqual(ast_from_string('tag in ("(foo, bar)", \'foobar\')'), + ast.AND([ast.Token('tag', ast.SetInclude(['(foo, bar)', 'foobar']))])) + + # error cases + self.assertRaises(errors.ParserError, ast_from_string, ('tag in ("(foo, bar, foobar)')) + self.assertRaises(errors.ParserError, ast_from_string, ("tag in ('(foo, bar, foobar)")) + + +## main ## + +if __name__ == '__main__': + unittest.main() + +## EOF ## diff --git a/test/parsing/test_sort.py b/test/parsing/test_sort.py new file mode 100644 index 0000000..40c9ee1 --- /dev/null +++ b/test/parsing/test_sort.py @@ -0,0 +1,96 @@ +""" + +Part of the tagit test suite. +A copy of the license is provided with the project. +Author: Matthias Baumgartner, 2022 +""" +# standard imports +import unittest + +# tagit imports +from tagit.utils import errors +#from tagit.parsing.search import ast, sortkeys # FIXME: mb/port/parsing + +# objects to test +from tagit.parsing.sort import sort_from_string + + +## code ## + +class TestParseSort(unittest.TestCase): + def setUp(self): + sortkeys.expose('iso', + TestScope('attribute', 'iso'), 'Numerical') + sortkeys.expose('rank', + TestScope('attribute', 'rank'), 'Alphabetical', 'Numerical') + sortkeys.expose('time', + TestScope('attribute', 'time'), 'Numerical') + sortkeys.expose('entity', + TestScope('property', 'guid'), 'Alphabetical') + sortkeys.expose('tag', + TestScope('property', 't_image_create_loc'), 'Anchored') + sortkeys.expose('mistake', + TestScope('property', 't_image_create_loc')) + + def test_parse_sort(self): + # simple patterns + self.assertEqual(sort_from_string("time"), ast.NumericalSort('time', False)) + self.assertEqual(sort_from_string("entity"), ast.AlphabeticalSort('entity', False)) + self.assertEqual(sort_from_string("time asc"), ast.NumericalSort('time', False)) + self.assertEqual(sort_from_string("time desc"), ast.NumericalSort('time', True)) + self.assertEqual(sort_from_string("entity desc"), ast.AlphabeticalSort('entity', True)) + self.assertEqual(sort_from_string("sort by time"), ast.NumericalSort('time', False)) + self.assertEqual(sort_from_string("sort by time desc"), ast.NumericalSort('time', True)) + self.assertEqual(sort_from_string("sort by entity desc"), + ast.AlphabeticalSort('entity', True)) + # full pattern + self.assertEqual(sort_from_string("sort alphabetically by entity upwards"), + ast.AlphabeticalSort('entity', False)) + self.assertEqual(sort_from_string("sort numerically by time desc"), + ast.NumericalSort('time', True)) + # invalid type + self.assertRaises(errors.ParserError, sort_from_string, "sort alphabetically by time desc") + self.assertRaises(errors.ParserError, sort_from_string, "sort numerically by entity desc") + self.assertRaises(errors.ParserError, sort_from_string, "sort by time similarity to AF39D281CE3") + self.assertRaises(errors.ParserError, sort_from_string, "sort alphabetically by tag down,") + # ambiguous type + self.assertEqual(sort_from_string("sort alphabetically by rank"), + ast.AlphabeticalSort('rank', False)) + self.assertEqual(sort_from_string("sort numerically by rank"), + ast.NumericalSort('rank', False)) + self.assertRaises(errors.ParserError, sort_from_string, "sort by rank") + # anchor pattern + self.assertEqual(sort_from_string("sort by tag similarity to AF39D281CE3"), + ast.AnchoredSort('tag', 'AF39D281CE3', False)) + self.assertEqual(sort_from_string("tag AF39D281CE3 up"), + ast.AnchoredSort('tag', 'AF39D281CE3', False)) + self.assertEqual(sort_from_string("tag AF39D281CE3 down"), + ast.AnchoredSort('tag', 'AF39D281CE3', True)) + self.assertRaises(errors.ParserError, sort_from_string, "time AF39D281CE3") + self.assertRaises(errors.ParserError, sort_from_string, "sort by tag down,") + self.assertRaises(errors.ParserError, sort_from_string, "tag XXXXXXXXXXX down,") + # compound statements + self.assertEqual(sort_from_string("time, iso"), + ast.Order(ast.NumericalSort('time'), ast.NumericalSort('iso'))) + self.assertEqual(sort_from_string("tag AF39D281CE3 down, time up"), + ast.Order(ast.AnchoredSort('tag', 'AF39D281CE3', True), + ast.NumericalSort('time', False))) + self.assertRaises(errors.ParserError, sort_from_string, "tag AF39D281CE3 down,") + # empty string + self.assertRaises(errors.ParserError, sort_from_string, "") + # invalid predicate + self.assertRaises(errors.ParserError, sort_from_string, "foobar") + # invalid direction + self.assertRaises(errors.ParserError, sort_from_string, "sort by entity sideways") + # invalid typedef + self.assertRaises(errors.ParserError, sort_from_string, "sort by mistake") + # missing anchor + self.assertRaises(errors.ParserError, sort_from_string, "sort by time similarity to") + + +## main ## + +if __name__ == '__main__': + unittest.main() + +## EOF ## |