aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorMatthias Baumgartner <dev@igsor.net>2023-01-08 17:21:19 +0100
committerMatthias Baumgartner <dev@igsor.net>2023-01-08 17:21:19 +0100
commit06904269d867ed7c8355c0615473baf524fcf30b (patch)
treee94ecf79bd1c7f2738d0f031675303ad1ee20e9e
parent7cf03c4de5244e7db3f44362a275f92368fd86ac (diff)
downloadtagit-06904269d867ed7c8355c0615473baf524fcf30b.tar.gz
tagit-06904269d867ed7c8355c0615473baf524fcf30b.tar.bz2
tagit-06904269d867ed7c8355c0615473baf524fcf30b.zip
parsing early port
-rw-r--r--tagit/parsing/__init__.py19
-rw-r--r--tagit/parsing/datefmt.py568
-rw-r--r--tagit/parsing/search.py405
-rw-r--r--tagit/parsing/sort.py192
-rw-r--r--tagit/utils/errors.py20
-rw-r--r--test/parsing/__init__.py0
-rw-r--r--test/parsing/test_datefmt.py378
-rw-r--r--test/parsing/test_search.py707
-rw-r--r--test/parsing/test_sort.py96
9 files changed, 2377 insertions, 8 deletions
diff --git a/tagit/parsing/__init__.py b/tagit/parsing/__init__.py
new file mode 100644
index 0000000..1c431a4
--- /dev/null
+++ b/tagit/parsing/__init__.py
@@ -0,0 +1,19 @@
+"""
+
+Part of the tagit module.
+A copy of the license is provided with the project.
+Author: Matthias Baumgartner, 2022
+"""
+# inner-module imports
+from .datefmt import parse_datetime
+from .search import ast_from_string
+from .sort import sort_from_string
+
+# exports
+__all__ = (
+ 'ast_from_string',
+ 'parse_datetime',
+ 'sort_from_string',
+ )
+
+## EOF ##
diff --git a/tagit/parsing/datefmt.py b/tagit/parsing/datefmt.py
new file mode 100644
index 0000000..49de1c0
--- /dev/null
+++ b/tagit/parsing/datefmt.py
@@ -0,0 +1,568 @@
+"""Parse and interpret date strings.
+
+Consider the following date notations (DMY=04.11.2012):
+
+DMY 04.11.12 europe
+YMD 12.11.04 iso
+MDY 11.04.12 US
+YDM 12.04.11 reverse US
+DYM 04.12.11 too uncommon, ignored
+MYD 11.12.04 too uncommon, ignored
+
+There's the general problem of ambiguity between the DMY and MDY formats.
+Here, we give precedence to the DMY format.
+
+Note that the MDY format can still be used in unambiguous settings or
+with the month spelled out, e.g. "2012, 23th of November"
+
+Similarly, consider the following shortened date notations:
+
+DM 04.11 europe, current year
+MY 11.12 quarters
+YM 12.11 quarters
+MD 11.04 us, current year
+DY 23.12 too uncommon, ignored
+YD 12.23 too uncommon, ignored
+
+In addition to the different spellings, month names can be spelled out
+and the string can be cluttered with additional common words.
+
+Part of the tagit module.
+A copy of the license is provided with the project.
+Author: Matthias Baumgartner, 2022
+"""
+# standard imports
+from collections import Counter
+from datetime import date as ddate, time as dtime, datetime, timedelta
+from math import floor
+
+# external imports
+from dateutil.relativedelta import relativedelta
+from pyparsing import Combine, Group, Literal, Optional, Or, Word, nums, oneOf, ParseException
+
+# tagit imports
+from tagit.utils import errors, Struct, flatten
+
+# exports
+__all__ = (
+ # default format strings
+ 'DATE_FMT',
+ 'TIME_FMT',
+ 'DATETIME_FMT',
+ # exceptions
+ 'DateParserError',
+ 'TimeParserError',
+ 'DateFormatError'
+ # parsing
+ 'parse_datetime',
+ 'guess_datetime',
+ # postprocessing
+ 'increment',
+ )
+
+## constants ##
+"""Default strftime format strings."""
+DATE_FMT = '%d.%m.%Y'
+TIME_FMT = '%H:%M'
+DATETIME_FMT = '%d.%m.%Y, %H:%M'
+
+# Literal months
+MONTH_LIT = {
+ 'Jan' : 1,
+ 'January' : 1,
+ 'Feb' : 2,
+ 'February' : 2,
+ 'Mar' : 3,
+ 'March' : 3,
+ 'Apr' : 4,
+ 'April' : 4,
+ 'May' : 5,
+ 'Jun' : 6,
+ 'June' : 6,
+ 'Jul' : 7,
+ 'July' : 7,
+ 'Aug' : 8,
+ 'August' : 8,
+ 'Sep' : 9,
+ 'September' : 9,
+ 'Oct' : 10,
+ 'October' : 10,
+ 'Nov' : 11,
+ 'November' : 11,
+ 'Dec' : 12,
+ 'December' : 12,
+ }
+
+
+## code ##
+
+class DatefmtError(errors.ParserError): pass
+
+class DateParserError(DatefmtError): pass
+
+class TimeParserError(DatefmtError): pass
+
+class DateFormatError(DatefmtError): pass
+
+class DF(str):
+ """date/time user-supplied format."""
+ # indicator characters, highest to lowest.
+ _valid_chars = "YMDhmsn"
+ # explicit mapping from unit to character
+ year = 'Y'
+ month = 'M'
+ day = 'D'
+ hour = 'h'
+ minute = 'm'
+ second = 's'
+ microsecond = 'n'
+
+ def valid(self):
+ return len(self) and len(set(self._valid_chars) & set(self))
+
+ def lsb(self):
+ """Smallest unit specified."""
+ if not self.valid():
+ raise DateFormatError(
+ 'An empty date format string has no least significant position.', self)
+
+ return [i for i in self._valid_chars if i in self][-1]
+
+ def msb(self):
+ """Highest unit specified."""
+ if not self.valid():
+ raise DateFormatError(
+ 'An empty date format string has no most significant position.', self)
+
+ return [i for i in self._valid_chars if i in self][0]
+
+ def is_time(self):
+ """Return true if only a time (hour/minute/second/ms) was specified."""
+ return True if self.valid() and self.msb() not in 'YMD' else False
+
+ def is_date(self):
+ """Return true if only a date (year/month/day) was specified."""
+ return True if self.valid() and self.lsb() not in 'hmsn' else False
+
+# priorities
+PRIORITIES_INT = {
+ 'p2': [
+ DF(DF.day + DF.month), # DM
+ DF(DF.month + DF.year), # MY
+ DF(DF.year + DF.month), # YM
+ DF(DF.month + DF.day), # MD
+ DF(DF.day + DF.year), # DY
+ DF(DF.year + DF.day), # YD
+ ],
+ 'p3': [
+ DF(DF.day + DF.month + DF.year), # DMY
+ DF(DF.year + DF.month + DF.day), # YMD
+ DF(DF.month + DF.day + DF.year), # MDY
+ DF(DF.year + DF.day + DF.month), # YDM
+ DF(DF.day + DF.year + DF.month), # DYM
+ DF(DF.month + DF.year + DF.day), # MYD
+ ]
+ }
+
+PRIORITIES_US = {
+ 'p2': [
+ DF(DF.month + DF.day),
+ DF(DF.year + DF.month),
+ DF(DF.day + DF.month),
+ DF(DF.month + DF.year),
+ DF(DF.day + DF.year),
+ DF(DF.year + DF.day),
+ ],
+ 'p3': [
+ DF(DF.month + DF.day + DF.year),
+ DF(DF.year + DF.day + DF.month),
+ DF(DF.day + DF.month + DF.year),
+ DF(DF.year + DF.month + DF.day),
+ DF(DF.day + DF.year + DF.month),
+ DF(DF.month + DF.year + DF.day),
+ ]
+ }
+
+def guess_date(tokens, priorities=None):
+ """Guess the date from a string in an unknown format.
+
+ The method uses the following clues to guess the meaning of each part:
+ * 4-digits implies it's a year
+ * 1-digit discards year (since it's more common to write 04 instead of 4 as a shorthand to 2004
+ * Literal month
+ * 'of' is preceeded by day and succeeded by the month
+ * Any of (st, nd, rd, th) on a number makes it a day
+ * Number > 12 can't be a month
+ * Number > 31 can't be a day
+ * Date inexistence (e.g. 29.02.2018)
+ * precedence DMY > YMD > MDY > YDM
+ * precedence DM > MY > YM > MD
+ """
+ priorities = PRIORITIES_INT if priorities is None else priorities
+
+ # We need to figure out which token corresponds to which component
+ # (D, M, Y). Since this is ambiguous, guesswork is needed. We do so
+ # by eliminating impossible options.
+
+ # initially, all three components are viable
+ guesses = [Struct(tok=tok.strip(), fmt=DF.year + DF.month + DF.day) for tok in tokens]
+
+ # check indicators for specific formats
+ for idx in range(len(guesses)):
+ tok, options = guesses[idx].tok, guesses[idx].fmt
+
+ if len(tok) == 1 and tok in '.,;':
+ # delimiter tokens can be ignored
+ guesses[idx].fmt = ''
+ elif tok == 'of':
+ # an 'of' token indicates a 'day of month' structure
+ guesses[idx-1].fmt = DF.day
+ guesses[idx+1].fmt = DF.month
+ guesses[idx].fmt = ''
+ elif tok[-2:] in ('st', 'nd', 'rd', 'th'):
+ # suffix indicates a day
+ guesses[idx].fmt = DF.day
+ guesses[idx].tok = tok[:-2]
+ elif len(tok) == 4 and tok.isdigit():
+ # four digits must be a year
+ guesses[idx].fmt = DF.year
+ elif tok in MONTH_LIT:
+ # spelled out month is - tadaaa - a month
+ guesses[idx].tok = str(MONTH_LIT[tok])
+ guesses[idx].fmt = DF.month
+
+ # remove jitter (of, delimiters)
+ guesses = [itm for itm in guesses if len(itm.fmt) > 0]
+
+ # eliminate impossible options
+ for idx in range(len(guesses)):
+ tok, options = guesses[idx].tok, guesses[idx].fmt
+
+ if len(tok) == 1:
+ # can't be a year
+ guesses[idx].fmt = guesses[idx].fmt.replace(DF.year, '')
+ if tok.isdigit() and int(tok) > 12:
+ # can't be a month
+ guesses[idx].fmt = guesses[idx].fmt.replace(DF.month, '')
+ if tok.isdigit() and int(tok) > 31:
+ # can't be a day
+ guesses[idx].fmt = guesses[idx].fmt.replace(DF.day, '')
+
+ # define helper function
+ def create_date(year, month, day):
+ """Return a datetime for the given components or None if that is not possible."""
+ # check format
+ if DF.year not in year.fmt or DF.month not in month.fmt or DF.day not in day.fmt:
+ return None
+
+ if len(str(year.tok)) == 2:
+ # ten years into the future is still the current century, otherwise the previous one
+ threshold = ddate.today().year + 10 - 2000
+ year = Struct(
+ tok='20'+str(year.tok) if int(year.tok) < threshold else '19'+str(year.tok),
+ fmt=year.fmt
+ )
+
+ try:
+ # create date
+ return ddate(year=int(year.tok), month=int(month.tok), day=int(day.tok))
+ except ValueError:
+ return None
+
+ # placeholders for unspecified tokens
+ pyear = Struct(tok=ddate.today().year, fmt=DF.year)
+ pday = Struct(tok=1, fmt=DF.day)
+ pmon = Struct(tok=1, fmt=DF.month)
+
+ if len(guesses) == 1: # one-part date (Y)
+ itm = guesses[0]
+ date = create_date(itm, pmon, pday)
+ if date is not None:
+ return date, DF(DF.year)
+ else:
+ raise DateParserError('Two-digit date format must contain the year')
+
+ elif len(guesses) == 2: # two-part date (DM, MY, YM, MD)
+ fst, snd = guesses
+ # check components
+ if len(set(fst.fmt + snd.fmt)) < 2:
+ raise DateParserError('Invalid two-digit date format')
+
+ if len(fst.fmt) == 1 and len(snd.fmt) == 1: # fully determined
+ date = {
+ DF.year: pyear,
+ DF.month: pmon,
+ DF.day: pday,
+ }
+ date.update({
+ fst.fmt: fst,
+ snd.fmt: snd,
+ })
+ return create_date(date[DF.year], date[DF.month], date[DF.day]), DF(fst.fmt + snd.fmt)
+
+ # walk through prioritized formats
+ formats = {
+ DF(DF.day + DF.month): create_date(pyear, snd, fst), # DM
+ DF(DF.month + DF.year): create_date(snd, fst, pday), # MY
+ DF(DF.year + DF.month): create_date(fst, snd, pday), # YM
+ DF(DF.month + DF.day): create_date(pyear, fst, snd), # MD
+ DF(DF.day + DF.year): create_date(snd, pmon, fst), # DY
+ DF(DF.year + DF.day): create_date(fst, pmon, snd), # YD
+ }
+
+ for fmt in priorities['p2']:
+ if formats.get(fmt, None) is not None:
+ return formats[fmt], fmt
+
+ raise DateParserError('Cannot guess roles of a two-digit date format')
+
+ elif len(guesses) == 3: # three-part date (DMY, YMD, MDY, YMD)
+
+ # eliminate options based on uniqueness of component assignment
+ changed = True
+ while changed:
+ # resolved guesses: item has only one possible component option
+ resolved = set([itm.fmt for itm in guesses if len(itm.fmt) == 1])
+ # single choice: component has only one possible position
+ unique = {comp for comp, freq in
+ Counter(flatten([set(itm.fmt) for itm in guesses])).items()
+ if freq == 1}
+ # assume no changes
+ changed = False
+ for itm in guesses:
+ if unique & set(itm.fmt) and not set(itm.fmt).issubset(unique):
+ # itm is the only option for one component
+ itm.fmt = DF(''.join(unique & set(itm.fmt)))
+ changed = True
+ elif resolved & set(itm.fmt) and not set(itm.fmt).issubset(resolved):
+ # itm contains options that already taken by a different item
+ itm.fmt = itm.fmt.translate(str.maketrans('', '', ''.join(resolved)))
+ changed = True
+
+ fst, snd, trd = guesses
+
+ # check components
+ if len(set(fst.fmt + snd.fmt + trd.fmt)) < 3:
+ raise DateParserError('Invalid three-digit date format')
+
+ if len(fst.fmt) == 1 and len(snd.fmt) == 1 and len(trd.fmt) == 1: # fully determined
+ date = {
+ fst.fmt: fst,
+ snd.fmt: snd,
+ trd.fmt: trd,
+ }
+ return (create_date(date[DF.year], date[DF.month], date[DF.day]),
+ DF(fst.fmt + snd.fmt + trd.fmt))
+
+ # walk through prioritized formats
+ formats = {
+ DF(DF.day + DF.month + DF.year): create_date(year=trd, month=snd, day=fst), # DMY
+ DF(DF.year + DF.month + DF.day): create_date(year=fst, month=snd, day=trd), # YMD
+ DF(DF.month + DF.day + DF.year): create_date(year=trd, month=fst, day=snd), # MDY
+ DF(DF.year + DF.day + DF.month): create_date(year=fst, month=trd, day=snd), # YDM
+ DF(DF.day + DF.year + DF.month): create_date(year=snd, month=trd, day=fst), # DYM
+ DF(DF.month + DF.year + DF.day): create_date(year=snd, month=fst, day=trd), # MYD
+ }
+
+ for fmt in priorities['p3']:
+ if formats.get(fmt, None) is not None:
+ return formats[fmt], fmt
+
+ raise DateParserError('Cannot guess the roles of a three-digit date format')
+
+ raise DateParserError('Cannot parse the date format')
+
+def guess_time(tokens):
+ """Guess the time from a string in an unknown format.
+
+ * Always sorted from hi (hour) to low (sec)
+ * 4 Terms -> hour, min, sec, ns
+ * 3 Terms -> hour, min, sec
+ * 2 Terms -> hour, min | min, sec
+ * both terms > 24 -> min, sec
+ * am or pm present -> hour, min
+ * Dot separation -> min, sec
+ * Colon separation -> hour, min
+ """
+ # remove spearators
+ tokens = [tok.lower() for tok in tokens if tok not in '.,:']
+ # check if the am/pm format was used
+ is_am = 'am' in tokens
+ is_pm = 'pm' in tokens
+
+ # remove non-numbers
+ tokens = [tok for tok in tokens if tok.isdigit()]
+ if not len(tokens):
+ raise TimeParserError()
+
+ # convert to int
+ ms = int(tokens[-1].ljust(6, '0'))
+ tokens = [int(tok) for tok in tokens]
+
+ # guess format
+ try:
+ if len(tokens) == 4: # H:M:S.NS
+ tokens[-1] = ms
+ return dtime(*tokens), DF(DF.hour + DF.minute + DF.second + DF.microsecond)
+ elif len(tokens) == 3: # H:M:S
+ return dtime(*tokens), DF(DF.hour + DF.minute + DF.second)
+ elif len(tokens) == 2: # H:M or M:S
+ if is_am: # am/pm notation was used
+ return dtime(*tokens), DF(DF.hour + DF.minute)
+ elif is_pm: # am/pm notation was used
+ return dtime(tokens[0] + 12, tokens[1]), DF(DF.hour + DF.minute)
+ elif tokens[0] > 24: # min, sec
+ return dtime(0, *tokens), DF(DF.minute + DF.second)
+ else: # hour, sec
+ return dtime(*tokens), DF(DF.hour + DF.minute)
+ elif len(tokens) == 1: # H
+ if is_am: # am/pm notation was used
+ return dtime(tokens[0]), DF(DF.hour)
+ elif is_pm: # am/pm notation was used
+ return dtime(tokens[0] + 12), DF(DF.hour)
+ else:
+ return dtime(tokens[0], 0), DF(DF.hour)
+
+ except ValueError:
+ # invalid value was supplied, e.g. hour=85
+ raise TimeParserError('Invalid value', tokens)
+
+ raise TimeParserError('Unknown time format', tokens)
+
+def guess_datetime(exp):
+ """Return a datetime instance by guessing the components of a DATETIME parsed
+ user-supplied date and/or time string. Guessing might be necessary since dates
+ like 10.11.12 are ambiguous. *exp* is supposed to be a pyparsing.ParseResults
+ instance as returned by DATETIME.parseString(...).
+ """
+ # For now I assumed unique separators (dot for date, colon for time, comma to separate the two)
+ if 'date' in exp and 'time' in exp: # both parts present
+ date, dfmt = guess_date(exp.date)
+ time, tfmt = guess_time(exp.time)
+ return datetime.combine(date, time), DF(dfmt+tfmt)
+ elif 'date' in exp: # date present
+ date, dfmt = guess_date(exp.date)
+ return datetime(date.year, date.month, date.day), dfmt
+ elif 'time' in exp: # time present
+ time, tfmt = guess_time(exp.time)
+ return datetime.combine(ddate.fromtimestamp(0), time), tfmt
+ else:
+ raise DateFormatError('Neither a date nor a time was found.')
+
+def increment(date, fmt):
+ """Increment the LSB of a datetime instance by one."""
+ if fmt == '' or not fmt.valid():
+ raise DateFormatError('Invalid date format string', fmt)
+ elif fmt.lsb() == fmt.microsecond: # 5.11.2012, 06:24:18.25 -> 5.11.2012, 06:25:18.26
+ return date + relativedelta(microseconds=1)
+ elif fmt.lsb() == fmt.second: # 5.11.2012, 06:24:18 -> 5.11.2012, 06:25:19
+ return date + relativedelta(seconds=1, microsecond=0)
+ elif fmt.lsb() == fmt.minute: # 5.11.2012, 06:24 -> 5.11.2012, 06:25
+ return date + relativedelta(minutes=1, second=0, microsecond=0)
+ elif fmt.lsb() == fmt.hour: # 5.11.2012, 06am -> 5.11.2012, 07:00
+ return date + relativedelta(hours=1, minute=0, second=0, microsecond=0)
+ elif fmt.lsb() == fmt.day: # 5.11.2012 -> 6.11.2012, 00:00
+ return date + relativedelta(days=1, hour=0, minute=0, second=0, microsecond=0)
+ elif fmt.lsb() == fmt.month: # 11.2012 -> 1.12.2012
+ return date + relativedelta(months=1, day=1, hour=0, minute=0, second=0, microsecond=0)
+ else: # fmt.lsb() == fmt.year: # 2012 -> 1.1.2013, 00:00
+ return date + relativedelta(
+ years=1, month=1, day=1, hour=0, minute=0, second=0, microsecond=0)
+
+class DateTimeParser():
+
+ DATE = None
+ TIME = None
+ DATETIME = None
+
+ def build_parser(self):
+ """
+ DATE := YMD | DMY | MDY | YDM
+ YMD := YEAR SEP MON SEP DAY
+ DMY := DAY SEP [of] MON SEP YEAR
+ MDY := MON SEP DAY SEP YEAR
+ YDM := YEAR SEP DAY [of] MON
+ DM := DAY SEP [of] MON
+ YM := YEAR SEP MON
+ MY := MON SEP YEAR
+ MD := MON SEP DAY
+ DAY := [D]D | [D]D st | [D]D nd | [D]D rd | [D]D th
+ MON := [M]M | [month]
+ YEAR := [YY]YY
+ SEP := . | , | [whitespace]
+ {D,M,Y} := [digit]
+ """
+ # FIXME: Allow more patterns (e.g. 2012, 10; April, 5th; April, 2020)
+ sep = Literal('.') # FIXME: Allow '. - :'
+ year = Word(nums, exact=2) ^ Word(nums, exact=4)
+ month = Word(nums, min=1, max=2) ^ oneOf(list(MONTH_LIT.keys()))
+ day = Combine(Word(nums, min=1, max=2) + Optional(oneOf('st nd rd th')))
+ # three-part-date
+ YMD = year + sep + month + sep + day
+ DMY = day + (sep ^ 'of') + month + sep + year
+ MDY = month + sep + day + sep + year
+ YDM = year + sep + day + (sep ^ 'of') + month
+ # two-part-date
+ DM = day + (sep ^ 'of')+ month
+ YM = year + sep + month
+ MY = month + sep + year
+ MD = month + sep + day
+ Y = Word(nums, exact=4)
+ # date parser
+ self.DATE = Group(YMD | DMY | YDM | MDY | DM | YM | MY | MD | Y).setResultsName('date')
+
+ """
+ TIME := HOUR SEP MIN [SEP SEC [. MS]] | HOUR SEP MIN | HOUR [SEP MIN] {am|pm}
+ HOUR := [H]H
+ MIN := [M]M
+ SEC := [S]S
+ {H,M,S} := [digit]
+ SEP := : | . | ,
+ """
+ sep = Literal(':') # FIXME: Allow '. : -'
+ HMS = Word(nums, min=1, max=2)
+ MS = Word(nums, min=1)
+ # time parser
+ self.TIME = Group(HMS + sep + HMS + sep + HMS + oneOf('. :') + MS \
+ | HMS + sep + HMS + sep + HMS \
+ | HMS + Optional(sep + HMS) + oneOf('am pm') \
+ | HMS + sep + HMS ).setResultsName('time')
+
+ """
+ DATETIME := DATE | TIME | DATE SEP TIME | TIME SEP DATE
+ SEP := , [whitespace]
+ """
+ self.DATETIME = Group(
+ self.DATE \
+ ^ self.TIME \
+ ^ self.DATE + Optional(',') + self.TIME \
+ ^ self.TIME + Optional(',') + self.DATE \
+ ).setResultsName('datetime')
+ return self
+
+ def __call__(self, datestr):
+ if self.DATETIME is None:
+ self.build_parser()
+
+ try:
+ date, fmt = guess_datetime(self.DATETIME.parseString(datestr, parseAll=True)[0])
+ return date
+
+ except ParseException as e:
+ raise errors.ParserError('Cannot parse query', e)
+
+
+"""Default DateTimeParser instance.
+
+To produce an datetime, call
+
+>>> parse_datetime(datestring)
+
+Convenience shortcut for
+
+>>> DateTimeParser().parse(datestring)
+
+"""
+parse_datetime = DateTimeParser().build_parser()
+
+## EOF ##
diff --git a/tagit/parsing/search.py b/tagit/parsing/search.py
new file mode 100644
index 0000000..10d0e7c
--- /dev/null
+++ b/tagit/parsing/search.py
@@ -0,0 +1,405 @@
+"""User-specified search query parsing.
+
+>>> q = "has mime / tag in (november, october) / ! Apfel / time < 10.10.2004 / iso in (100, 200)"
+>>> ast = ast_from_string(q)
+
+Part of the tagit module.
+A copy of the license is provided with the project.
+Author: Matthias Baumgartner, 2022
+"""
+# standard imports
+from datetime import datetime
+
+# external imports
+from pyparsing import CaselessKeyword, Combine, Group, Optional, Or, Word, delimitedList, nums, oneOf, ParseException, Literal, QuotedString, alphanums, alphas8bit, punc8bit
+
+# tagit imports
+from tagit.utils import errors, ttime
+
+# inner-module imports
+from . import datefmt
+
+# exports
+__all__ = (
+ 'ast_from_string',
+ )
+
+# constants
+SEARCH_DELIM = '/'
+VALUE_DELIM = ','
+DEFAULT_PREDICATE = 'tag'
+
+
+## code ##
+
+class SearchParser():
+
+ # valid predicates per type
+ _PREDICATES_CATEGORICAL = None
+ _PREDICATES_CONTINUOUS = None
+ _PREDICATES_DATETIME = None
+
+ # parsers
+ _CATEGORICAL = None
+ _CONTINUOUS = None
+ _EXISTENCE = None
+ _QUERY = None
+ _TAG = None
+
+ def __init__(self, schema: bsfs.schema.Schema):
+ self.schema = schema
+
+ @property
+ def schema(self) -> bsfs.schema.Schema:
+ return self._schema
+
+ @schema.setter
+ def schema(self, schema: bsfs.schema.Schema):
+ self._schema = schema
+ self.build_parser()
+
+ def build_parser(self):
+ """
+ """
+ # The *predicate* argument is for compatibility with predicate listener.
+ # It's not actually used here.
+
+ # valid predicates per type, as supplied by tagit.library
+ # FIXME:
+ # * range / type constraints
+ # * how to filter predicates
+ # * distinguish between prefix and label
+ """
+ Categorical: string, float, integer; labelled node (tag, group); maybe boolean
+ Continuous: float, integer
+ Datetime: datetime
+ Existencial: all of the above, particularly bool; unllabelled node (preview)
+ -> rfds:range
+
+ > Target: Entity (allow others?) -> rfds:domain
+ > Require: searchable as specified in backend AND user-searchable as specified in frontend
+ """
+ self._PREDICATES_CATEGORICAL = self.schema.predicates(searchable=True, range=self.schema.tm.categorical) # FIXME!
+ self._PREDICATES_CONTINUOUS = self.schema.predicates(searchable=True, range=self.schema.tm.numerical) # FIXME!
+ self._PREDICATES_DATETIME = self.schema.predicates(searchable=True, range=self.schema.tm.datetime) # FIXME!
+
+ # terminal symbols
+ number = Group(Optional(oneOf('- +')) \
+ + Combine(Word(nums) + Optional('.' + Optional(Word(nums)))))
+ words = QuotedString(quoteChar='"', escChar='\\') \
+ ^ QuotedString(quoteChar="'", escChar='\\') \
+ ^ Word(alphanums + alphas8bit + punc8bit + ' *#%&-.:;<=>?@^_`{}~')
+ # FIXME: allow escaped chars "( ) / , [ ]"
+ # FIXME: Non-ascii characters
+
+ # predicates
+ predicate = Or([CaselessKeyword(p) for p in self._PREDICATES_CATEGORICAL]).setResultsName(
+ 'predicate')
+ date_predicate = Or([CaselessKeyword(p) for p in self._PREDICATES_DATETIME]).setResultsName(
+ 'predicate')
+ num_predicate = Or([CaselessKeyword(p) for p in self._PREDICATES_CONTINUOUS]).setResultsName(
+ 'predicate')
+
+ # existence
+ """
+ EXPR := has PREDICATE | has no PREDICATE
+ PREDICATE := [predicate]
+ """
+ op = (CaselessKeyword('has') ^ CaselessKeyword('has no') ^ CaselessKeyword('has not')).setResultsName('op')
+ self._EXISTENCE = Group(op + predicate).setResultsName('existence')
+
+
+ # continuous
+ """
+ EXPR := PREDICATE CMP VALUE | VALUE CMP PREDICATE CMP VALUE | PREDICATE OP RANGE
+ PREDICATE := [predicate]
+ CMP := < | <= | = | >= | >
+ OP := : | = | in | not in
+ RANGE := BOPEN VALUE RSEP VALUE BCLOSE | BOPEN RSEP VALUE BLOSE | BOPEN VALUE RSEP BCLOSE
+ BOPEN := ( | [ | ]
+ BCLOSE := ) | ] | [
+ RSEP := : | -
+ VALUE := [digit] | [date]
+ """
+ # range style
+ rsepn = oneOf(': -')
+ rsepd = Literal('-')
+ bclose = oneOf(') ] [').setResultsName('bclose')
+ bopen = oneOf('( [ ]').setResultsName('bopen')
+ op = Or([':', '=', 'in']).setResultsName('op')
+ datefmt = datefmt.parse_datetime.DATETIME
+ rngn = num_predicate + op + bopen + number('lo') + rsepn + number('hi') + bclose ^ \
+ num_predicate + op + bopen + rsepn + number('hi') + bclose ^ \
+ num_predicate + op + bopen + number('lo') + rsepn + bclose
+ rngd = date_predicate + op + bopen + datefmt('lo') + rsepd + datefmt('hi') + bclose ^ \
+ date_predicate + op + bopen + rsepd + datefmt('hi') + bclose ^ \
+ date_predicate + op + bopen + datefmt('lo') + rsepd + bclose
+ # equation style
+ cmp = oneOf('< <= = >= >').setResultsName('cmp')
+ eqn = num_predicate + cmp('cright') + number('vright') ^ \
+ number('vleft') + cmp('cleft') + num_predicate ^ \
+ number('vleft') + cmp('cleft') + num_predicate + cmp('cright') + number('vright')
+ eqd = date_predicate + cmp('cright') + datefmt('vright') ^ \
+ datefmt('vleft') + cmp('cleft') + date_predicate ^ \
+ datefmt('vleft') + cmp('cleft') + date_predicate + cmp('cright') + datefmt('vright')
+ # combined
+ self._CONTINUOUS = Group(
+ Group(eqn).setResultsName('eq') ^
+ Group(eqd).setResultsName('eq') ^
+ Group(rngn).setResultsName('range') ^ \
+ Group(rngd).setResultsName('range') \
+ ).setResultsName('continuous')
+
+
+ # categorical
+ """
+ EXPR := PREDICATE OP VALUE | PREDICATE OP (VALUE)
+ PREDICATE := [predicate]
+ OP := : | = | in | not in | != | ~ | !~
+ VALUE := TERM | VALUE,
+ TERM := [word]
+ """
+ op = (CaselessKeyword('in') ^ CaselessKeyword('not in') ^ ':' ^ '=' ^ '!=' ^ '~' ^ '!~').setResultsName('op')
+ value = delimitedList(words, delim=VALUE_DELIM).setResultsName('value')
+ self._CATEGORICAL = Group(predicate + op + ('(' + value + ')' | value) ).setResultsName('categorical')
+
+
+ # tag shortcuts
+ """
+ EXPR := OP VALUE | OP (VALUE) | VALUE | (VALUE)
+ OP := ! | ~ | !~
+ VALUE := TERM | VALUE,
+ TERM := [word]
+ """
+ op = oneOf('! ~ !~').setResultsName('op')
+ value = delimitedList(words, delim=VALUE_DELIM).setResultsName('value')
+ self._TAG = Group(Optional(op) + '(' + value + ')' ^ Optional(op) + value).setResultsName('tag')
+
+
+ # overall query
+ """
+ QUERY := QUERY / QUERY | EXPR
+ """
+ self._QUERY = delimitedList(self._EXISTENCE | self._CONTINUOUS | self._CATEGORICAL | self._TAG, delim=SEARCH_DELIM)
+ return self
+
+ def __del__(self):
+ if self._QUERY is not None: # remove listener
+ try:
+ self.predicates.ignore(self.build_parser)
+ except ImportError:
+ # The import fails if python is shutting down.
+ # In that case, the ignore becomes unnecessary anyway.
+ pass
+
+ def __call__(self, search):
+ # FIXME: mb/port/parsing
+ #if self._QUERY is None:
+ # # parsers were not initialized yet
+ # self.build_parser()
+ # # attach listener to receive future updates
+ # self.predicates.listen(self.build_parser)
+ # # FIXME: Additional filters would be handy
+ # #self.predicates.listen(self.build_parser, self.predicates.scope.library)
+
+ try:
+ parsed = self._QUERY.parseString(search, parseAll=True)
+ except ParseException as e:
+ raise errors.ParserError('Cannot parse query', e)
+
+ # convert to AST
+ tokens = []
+ for exp in parsed:
+ if exp.getName() == 'existence':
+ if 'op' not in exp: # prevented by grammar
+ raise errors.ParserError('Missing operator', exp)
+ elif exp.op == 'has':
+ cond = ast.Existence()
+ elif exp.op in ('has no', 'has not'):
+ cond = ast.Inexistence()
+ else: # prevented by grammar
+ raise errors.ParserError('Invalid operator ({})'.format(exp.op), exp)
+
+ tokens.append(
+ ast.Token(exp.predicate.lower(), cond))
+
+ elif exp.getName() == 'categorical':
+ values = [s.strip() for s in exp.value]
+ if 'op' not in exp: # prevented by grammar
+ raise errors.ParserError('Missing operator', exp)
+ elif exp.op in (':', '=', 'in'):
+ cond = ast.SetInclude(values)
+ elif exp.op in ('!=', 'not in'):
+ cond = ast.SetExclude(values)
+ elif exp.op == '~':
+ cond = ast.SetInclude(values, approximate=True)
+ elif exp.op == '!~':
+ cond = ast.SetExclude(values, approximate=True)
+ else: # prevented by grammar
+ raise errors.ParserError('Invalid operator ({})'.format(exp.op), exp)
+
+ tokens.append(
+ ast.Token(exp.predicate.lower(), cond))
+
+ elif exp.getName() == 'tag':
+ values = [s.strip() for s in exp.value]
+ if 'op' not in exp:
+ cond = ast.SetInclude(values)
+ elif exp.op == '~':
+ cond = ast.SetInclude(values, approximate=True)
+ elif exp.op == '!':
+ cond = ast.SetExclude(values)
+ elif exp.op == '!~':
+ cond = ast.SetExclude(values, approximate=True)
+ else: # prevented by grammar
+ raise errors.ParserError('Invalid operator ({})'.format(exp.op), exp)
+
+ tokens.append(
+ ast.Token(DEFAULT_PREDICATE, cond))
+
+ elif exp.getName() == 'continuous':
+
+ lo, hi = None, None
+ lo_inc, hi_inc = False, False
+ predicate = None
+
+ if 'eq' in exp:
+ # equation style
+ predicate = exp.eq.predicate.lower()
+
+ if ('>' in exp.eq.cleft and '<' in exp.eq.cright) or \
+ ('<' in exp.eq.cleft and '>' in exp.eq.cright) or \
+ (exp.eq.cleft == '=' and exp.eq.cright == '='):
+ # x > pred < y or x < pred > y or x = pred = y
+ raise errors.ParserError('Cannot have two lower or two upper bounds', exp)
+
+ if '>' in exp.eq.cleft:
+ hi = exp.eq.vleft
+ hi_inc = '=' in exp.eq.cleft
+ elif '<' in exp.eq.cleft:
+ lo = exp.eq.vleft
+ lo_inc = '=' in exp.eq.cleft
+ elif exp.eq.cleft == '=':
+ hi = lo = exp.eq.vleft
+ lo_inc = hi_inc = True
+
+ if '>' in exp.eq.cright:
+ lo = exp.eq.vright
+ lo_inc = '=' in exp.eq.cright
+ elif '<' in exp.eq.cright:
+ hi = exp.eq.vright
+ hi_inc = '=' in exp.eq.cright
+ elif exp.eq.cright == '=':
+ hi = lo = exp.eq.vright
+ lo_inc = hi_inc = True
+
+ elif 'range' in exp: # value in [lo:hi]
+ predicate = exp.range.predicate.lower()
+
+ if 'lo' in exp.range:
+ lo = exp.range.lo
+ lo_inc = exp.range.bopen == '['
+ if 'hi' in exp.range:
+ hi = exp.range.hi
+ hi_inc = exp.range.bclose == ']'
+
+ else: # prevented by grammar
+ raise errors.ParserError('Expression is neither a range nor an equation', exp)
+
+ # interpret values
+ if predicate in set([p.lower() for p in self._PREDICATES_DATETIME]):
+
+ # turn into datetime
+ lo, lfmt = datefmt.guess_datetime(lo) if lo is not None else (None, None)
+ hi, hfmt = datefmt.guess_datetime(hi) if hi is not None else (None, None)
+
+ if lo is None and hi is None: # prevented by grammar
+ raise errors.ParserError('At least one bound must be present', exp)
+
+ # turn the query into the format lo <= pred < hi by adjusting the boundaries
+ if hi == lo and lo_inc and hi_inc:
+ # example: pred = 2012 -> 1.1.2012 <= pred < 1.1.2013
+ hi = datefmt.increment(lo, lfmt)
+ lo_inc = True
+ hi_inc = False
+ else:
+ if lo is not None:
+ # example: pred >= 2012 -> pred >= 1.1.2012, 00:00
+ lo = datefmt.increment(lo, lfmt) if not lo_inc else lo
+ lo_inc = True
+
+ if hi is not None:
+ # example: pred <= 2012 -> pred < 1.1.2013, 00:00
+ hi = datefmt.increment(hi, hfmt) if hi_inc else hi
+ hi_inc = False
+
+ # build the ast node
+ if (lo is not None and lfmt.is_time()) or (hi is not None and hfmt.is_time()):
+ # time specification
+
+ if (lo is not None and not lfmt.is_time()) or \
+ (hi is not None and not hfmt.is_time()):
+ # lo/hi must both be time specifications
+ raise errors.ParserError('Both bounds must be a time specification', (lo, hi))
+
+ if lo is None:
+ # example: pred < 5 am -> 0 <= pred < 05:00
+ lo = ttime.from_timestamp_loc(0)
+ lo_inc = True
+
+ if hi is None:
+ # example: pred > 5 am -> 06:00 <= pred <= 24:00
+ hi = ttime.from_timestamp_loc(3600 * 24)
+ hi_inc = True
+
+ # Check consistency
+ if not (lo < hi or (lo == hi and lo_inc and hi_inc)):
+ raise errors.ParserError('Lower bound must not exceed upper bound', (lo, hi))
+
+ tokens.append(
+ ast.Token(predicate, ast.TimeRange(lo, hi, lo_inc, hi_inc)))
+
+ else: # date specification
+ # Check consistency
+ lo = lo if lo is not None else datetime.min
+ hi = hi if hi is not None else datetime.max
+
+ if not (lo < hi or (lo == hi and lo_inc and hi_inc)):
+ raise errors.ParserError('Lower bound must not exceed upper bound', (lo, hi))
+
+ tokens.append(
+ ast.Token(predicate, ast.Datetime(lo, hi, lo_inc, hi_inc)))
+
+ else:
+ # number predicate
+ lo = float(''.join(lo)) if lo is not None else float('-inf')
+ hi = float(''.join(hi)) if hi is not None else float('inf')
+
+ # Check consistency
+ if not (lo < hi or (lo == hi and lo_inc and hi_inc)):
+ raise errors.ParserError('Lower bound must not exceed upper bound', (lo, hi))
+
+ tokens.append(
+ ast.Token(predicate, ast.Continuous(lo, hi, lo_inc, hi_inc)))
+
+ else: # prevented by grammar
+ raise errors.ParserError('Invalid expression', exp)
+
+ return ast.AND(tokens)
+
+
+
+"""Default SearchParser instance.
+
+To produce an ast, call
+
+>>> ast_from_string(search)
+
+Convenience shortcut for
+
+>>> SearchParser().parse(search)
+
+"""
+ast_from_string = SearchParser(predicates)
+
+## EOF ##
diff --git a/tagit/parsing/sort.py b/tagit/parsing/sort.py
new file mode 100644
index 0000000..8950613
--- /dev/null
+++ b/tagit/parsing/sort.py
@@ -0,0 +1,192 @@
+"""
+
+Part of the tagit module.
+A copy of the license is provided with the project.
+Author: Matthias Baumgartner, 2022
+"""
+# external imports
+from pyparsing import CaselessKeyword, Group, Or, Word, delimitedList, oneOf, ParseException
+
+# tagit imports
+from tagit.utils import errors, Struct
+
+# exports
+__all__ = (
+ 'sort_from_string',
+ )
+
+
+## code ##
+
+class SortParser():
+ """Sort parser.
+
+ A sort string can be as simple as a predicate, but also allows
+ a more verbose specification for more natural readability.
+ In brief and somewhat relaxed notation, the syntax is:
+ [sort [<type>] by] <predicate> [similarity to] [<anchor>] [<direction>]
+ Multiple sort terms are concatenated with a comma.
+
+ Examples:
+ time
+ time asc
+ sort by time desc
+ sort numerically by time downwards
+ sort by tag similarity to AF39D281CE3 up
+ time, iso
+ """
+ QUERY = None
+ PREDICATES = None
+
+ def __init__(self, sortkeys):
+ self.sortkeys = sortkeys
+
+ def __call__(self, query):
+ return self.parse(query)
+
+ def build_parser(self, predicate=None):
+ # The *predicate* argument is for compatibility with predicate listener.
+ # It's not actually used here.
+ """
+ The grammar is composed as follows:
+
+ QUERY := EXPR | EXPR, EXPR
+ EXPR := PREFIX PREDICATE SUFFIX | PREDICATE SUFFIX | PREFIX PREDICATE | PREDICATE
+ PREFIX := sort TYPE by | sort by
+ SUFFIX := SIMILAR DIRECTION | SIMILAR | DIRECTION
+ SIMILAR := similarity to ANCHOR | ANCHOR
+ TYPE := numerically | alphabetically
+ PREDICATE := [predicate]
+ ANCHOR := [guid]
+ DIRECTION := up | down | asc | desc | ascending | descending | reversed | upwards | downwards
+ """
+ # predicates from sortkeys
+ self.PREDICATES = self.sortkeys.scope.library | self.sortkeys.typedef.anchored
+
+ ## terminals
+ # direction is just a list of keywords
+ direction = oneOf('up down asc desc ascending descending reversed upwards downwards',
+ caseless=True).setResultsName('direction')
+ # type is just a list of keywords
+ type_ = oneOf('numerically alphabetically').setResultsName('type')
+ # predicates are from an enum
+ predicate = Or([CaselessKeyword(p) for p in self.PREDICATES]).setResultsName('predicate')
+ # anchor is a hex digest
+ anchor = Word('abcdef0123456789ABCDEF').setResultsName('anchor')
+
+ ## rules
+ similar = Or([CaselessKeyword('similarity to') + anchor,
+ anchor])
+ suffix = Or([similar + direction, similar,
+ direction])
+ prefix = Or([CaselessKeyword('sort') + type_ + CaselessKeyword('by'),
+ CaselessKeyword('sort by')])
+ expr = Group(Or([prefix + predicate + suffix,
+ predicate + suffix,
+ prefix + predicate,
+ predicate]))
+
+ self.QUERY = delimitedList(expr, delim=',')
+ return self
+
+ def __del__(self):
+ if self.QUERY is not None: # remove listener
+ try:
+ self.sortkeys.ignore(self.build_parser)
+ except ImportError:
+ # The import fails if python is shutting down.
+ # In that case, the ignore becomes unnecessary anyway.
+ pass
+
+ def parse(self, sort):
+ if self.QUERY is None:
+ # initialize parser
+ self.build_parser()
+ # attach listener to receive future updates
+ self.sortkeys.listen(self.build_parser)
+
+ try:
+ parsed = self.QUERY.parseString(sort, parseAll=True)
+ except ParseException as e:
+ raise errors.ParserError('Cannot parse query', e)
+
+ # convert to AST
+ tokens = []
+ for exp in parsed:
+ args = Struct(
+ predicate=None,
+ type=None,
+ anchor=None,
+ direction='asc',
+ )
+ args.update(**exp.asDict())
+
+ # check predicate
+ if args.predicate is None: # prevented by grammar
+ raise errors.ParserError('Missing sort key', exp)
+ if args.predicate not in self.sortkeys: # prevented by grammar
+ raise errors.ParserError('Invalid sort key', exp)
+
+ # check direction
+ if args.direction in ('up', 'ascending', 'asc', 'upwards'):
+ reverse = False
+ elif args.direction in ('down', 'desc', 'descending', 'reversed', 'downwards'):
+ reverse = True
+ else: # prevented by grammar
+ raise errors.ParserError('Invalid direction', exp)
+
+ # infer type from predicate if needed
+ if args.anchor is not None:
+ args.type = 'anchored'
+ elif args.type is None:
+ typedef = self.sortkeys.predicate(args.predicate).typedef
+ if not len(typedef):
+ raise errors.ParserError('Undefined type', exp)
+ elif len(typedef) == 1:
+ args.type = list(typedef)[0].lower()
+ else:
+ raise errors.ParserError('Ambiguous type', exp)
+
+ # translate types
+ args.type = {
+ 'numerically': 'numerical',
+ 'alphabetically': 'alphabetical'
+ }.get(args.type, args.type)
+
+ # check type compatibility
+ admissible_types = {t.lower() for t in self.sortkeys.predicate(args.predicate).typedef}
+ if args.type not in admissible_types:
+ raise errors.ParserError('Invalid type for predicate', exp)
+ elif args.type == 'anchored' and args.anchor is None: # type set if anchor isn't None
+ raise errors.ParserError('No anchor given', exp)
+
+ # build AST
+ if args.type in ('anchored', ):
+ tokens.append(ast.AnchoredSort(args.predicate, args.anchor, reverse))
+ elif args.type in ('alphabetical', 'alphabetically'):
+ tokens.append(ast.AlphabeticalSort(args.predicate, reverse))
+ elif args.type in ('numerical', 'numerically'):
+ tokens.append(ast.NumericalSort(args.predicate, reverse))
+ else: # prevented by grammar
+ raise errors.ParserError('Invalid type for predicate', exp)
+
+ # aggregate if need be
+ if len(tokens) == 1:
+ return tokens[0]
+ else:
+ return ast.Order(*tokens)
+
+"""Default SortParser instance.
+
+To produce an ast, call
+
+>>> sort_from_string(sort)
+
+Convenience shortcut for
+
+>>> SortParser().parse(sort)
+
+"""
+sort_from_string = SortParser(sortkeys)
+
+## EOF ##
diff --git a/tagit/utils/errors.py b/tagit/utils/errors.py
index 1bed670..7a2556e 100644
--- a/tagit/utils/errors.py
+++ b/tagit/utils/errors.py
@@ -2,17 +2,17 @@
Part of the tagit module.
A copy of the license is provided with the project.
-Author: Matthias Baumgartner, 2018
+Author: Matthias Baumgartner, 2022
"""
# exports
__all__ = (
- 'EmptyFileError',
- 'LoaderError',
- 'NotAFileError',
- 'ProgrammingError',
- 'UserError',
- 'abstract',
- )
+ 'EmptyFileError',
+ 'LoaderError',
+ 'NotAFileError',
+ 'ProgrammingError',
+ 'UserError',
+ 'abstract',
+ )
## code ##
@@ -49,4 +49,8 @@ class ParserBackendError(Exception):
"""Generic parser backend error."""
pass
+class ParserError(Exception):
+ """String parsing failure."""
+ pass
+
## EOF ##
diff --git a/test/parsing/__init__.py b/test/parsing/__init__.py
new file mode 100644
index 0000000..e69de29
--- /dev/null
+++ b/test/parsing/__init__.py
diff --git a/test/parsing/test_datefmt.py b/test/parsing/test_datefmt.py
new file mode 100644
index 0000000..3f80c15
--- /dev/null
+++ b/test/parsing/test_datefmt.py
@@ -0,0 +1,378 @@
+"""Test datetime parser.
+
+Part of the tagit test suite.
+A copy of the license is provided with the project.
+Author: Matthias Baumgartner, 2022
+"""
+# standard imports
+import unittest
+from datetime import date as ddate
+from datetime import time as dtime
+from datetime import datetime
+
+# external imports
+from pyparsing import ParseException
+
+# tagit imports
+from tagit.utils import errors, Struct
+
+# objects to test
+#from tagit.parsing.datefmt import DatefmtError, DateParserError, TimeParserError, DateFormatError, guess_date, guess_time, guess_datetime, increment, PRIORITIES_US, DF
+from tagit.parsing.datefmt import guess_date, guess_time, PRIORITIES_US, DateParserError, DateFormatError, TimeParserError, guess_datetime, DF, parse_datetime, increment, DateTimeParser
+
+
+## code ##
+
+class TestGuessDatetime(unittest.TestCase):
+ def test_parse_datetime(self):
+ parse_datetime = DateTimeParser()
+ cyear = ddate.today().year
+ cmon = ddate.today().month
+ cday = ddate.today().day
+
+ # date only: vary number formats
+ self.assertEqual(parse_datetime('3.4.12'), datetime(2012, 4, 3))
+ self.assertEqual(parse_datetime('15.8.19'), datetime(2019, 8, 15))
+ self.assertEqual(parse_datetime('8.11.98'), datetime(1998, 11, 8))
+ self.assertEqual(parse_datetime('10.11.12'), datetime(2012, 11, 10))
+ self.assertEqual(parse_datetime('3.4.1912'), datetime(1912, 4, 3))
+ self.assertEqual(parse_datetime('15.8.1919'), datetime(1919, 8, 15))
+ self.assertEqual(parse_datetime('8.11.1998'), datetime(1998, 11, 8))
+ self.assertEqual(parse_datetime('10.11.1912'), datetime(1912, 11, 10))
+ self.assertEqual(parse_datetime('8.1998'), datetime(1998, 8, 1))
+ self.assertEqual(parse_datetime('10.1912'), datetime(1912, 10, 1))
+ self.assertRaises(errors.ParserError, parse_datetime, 'ab.cd.ef')
+ self.assertRaises(errors.ParserError, parse_datetime, '123.123.2000')
+ self.assertRaises(errors.ParserError, parse_datetime, '.123.2000')
+ self.assertRaises(errors.ParserError, parse_datetime, '123..2000')
+ self.assertRaises(errors.ParserError, parse_datetime, '12.12.20001')
+ # date only: vary order (three-part)
+ self.assertEqual(parse_datetime('15.98.8'), datetime(1998, 8, 15))
+ self.assertEqual(parse_datetime('8.98.15'), datetime(1998, 8, 15))
+ self.assertEqual(parse_datetime('8.15.98'), datetime(1998, 8, 15))
+ self.assertEqual(parse_datetime('15.8.98'), datetime(1998, 8, 15))
+ self.assertEqual(parse_datetime('98.8.15'), datetime(1998, 8, 15))
+ self.assertEqual(parse_datetime('98.15.8'), datetime(1998, 8, 15))
+ # date only: vary order (two-part)
+ self.assertEqual(parse_datetime('15.98'), datetime(1998, 1, 15))
+ self.assertEqual(parse_datetime('08.98'), datetime(1998, 8, 1))
+ self.assertEqual(parse_datetime('08.15'), datetime(2015, 8, 1))
+ self.assertEqual(parse_datetime('15.08'), datetime(cyear, 8, 15))
+ self.assertEqual(parse_datetime('98.08'), datetime(1998, 8, 1))
+ self.assertEqual(parse_datetime('98.15'), datetime(1998, 1, 15))
+ # date only: one part
+ self.assertEqual(parse_datetime('1998'), datetime(1998, 1, 1))
+ # date only: literal month
+ self.assertEqual(parse_datetime('98.April'), datetime(1998, 4, 1))
+ # FIXME: Allow more patterns
+ #self.assertEqual(parse_datetime('98, April'), datetime(1998, 4, 1))
+ #self.assertEqual(parse_datetime('April, 15'), datetime(2015, 4, 1))
+ # date only: day with suffix
+ #self.assertEqual(parse_datetime('10th 2000'), datetime(2000, 1, 10))
+ #self.assertEqual(parse_datetime('2000, 10th'), datetime(2000, 1, 10))
+ #self.assertEqual(parse_datetime('April, 10th'), datetime(cyear, 4, 10))
+ #self.assertEqual(parse_datetime('April 10th'), datetime(cyear, 4, 10))
+ #self.assertEqual(parse_datetime('10th April'), datetime(cyear, 4, 10))
+ # date only: of notation
+ #self.assertEqual(parse_datetime('10th of April, 2000'), datetime(2000, 4, 10))
+ #self.assertEqual(parse_datetime('10th of April'), datetime(cyear, 4, 10))
+ #self.assertEqual(parse_datetime('10 of 04'), datetime(cyear, 4, 10))
+ # invalid ranges
+ self.assertRaises(DateParserError, parse_datetime, '10.93.2013')
+ self.assertRaises(DateParserError, parse_datetime, '48.10.2013')
+ self.assertRaises(DateParserError, parse_datetime, '48.93.2013')
+ self.assertRaises(DateParserError, parse_datetime, "52.74")
+
+ # time only: am/pm
+ self.assertEqual(parse_datetime("10 am"), datetime(1970, 1, 1, 10))
+ self.assertEqual(parse_datetime("10 pm"), datetime(1970, 1, 1, 22))
+ self.assertEqual(parse_datetime("10:02 pm"), datetime(1970, 1, 1, 22, 2))
+ self.assertEqual(parse_datetime("14 am"), datetime(1970, 1, 1, 14))
+ self.assertRaises(TimeParserError, parse_datetime, "14 pm")
+ # time only: 24hrs format
+ self.assertEqual(parse_datetime("1:2"), datetime(1970, 1, 1, 1, 2))
+ self.assertEqual(parse_datetime("12:34"), datetime(1970, 1, 1, 12, 34))
+ self.assertEqual(parse_datetime("12:34:54"), datetime(1970, 1, 1, 12, 34, 54))
+ self.assertEqual(parse_datetime("12:34:54.123"), datetime(1970, 1, 1, 12, 34, 54, 123000))
+ self.assertEqual(parse_datetime("1:2:3.4"), datetime(1970, 1, 1, 1, 2, 3, 400000))
+ self.assertRaises(errors.ParserError, parse_datetime, '84:12')
+ self.assertRaises(errors.ParserError, parse_datetime, '12:75')
+ self.assertRaises(errors.ParserError, parse_datetime, '12:13:84')
+ # time only: HH:MM
+ self.assertEqual(parse_datetime("54:34"), datetime(1970, 1, 1, 0, 54, 34))
+ # time only: invalid format
+ self.assertRaises(errors.ParserError, parse_datetime, '12:')
+
+ # date and time
+ self.assertEqual(parse_datetime("12:34 18.05.2012"), datetime(2012, 5, 18, 12, 34))
+ self.assertEqual(parse_datetime("12:34, 18.05.2012"), datetime(2012, 5, 18, 12, 34))
+ self.assertEqual(parse_datetime("18.05.2012 12:34"), datetime(2012, 5, 18, 12, 34))
+ self.assertEqual(parse_datetime("18.05.2012, 12:34"), datetime(2012, 5, 18, 12, 34))
+ self.assertEqual(parse_datetime("2012, 12:34"), datetime(2012, 1, 1, 12, 34))
+ self.assertEqual(parse_datetime("2012, 12am"), datetime(2012, 1, 1, 12))
+ self.assertRaises(errors.ParserError, parse_datetime, '12.34 18:05:2012')
+
+ # invalid args
+ self.assertRaises(errors.ParserError, parse_datetime, '')
+
+ def test_guess_date(self):
+ this_year = ddate.today().year
+ # some unambiguous formats
+ self.assertEqual(guess_date('18 . 05 . 2012'.split()), (ddate(2012, 5, 18), 'DMY'))
+ self.assertEqual(guess_date('18 5 2012'.split()), (ddate(2012, 5, 18), 'DMY'))
+ self.assertEqual(guess_date('2012 , 05 , 18'.split()), (ddate(2012, 5, 18), 'YMD'))
+ self.assertEqual(guess_date('2012 5 18'.split()), (ddate(2012, 5, 18), 'YMD'))
+ self.assertEqual(guess_date('18 5 2004'.split()), (ddate(2004, 5, 18), 'DMY'))
+ self.assertEqual(guess_date('2004 5 18'.split()), (ddate(2004, 5, 18), 'YMD'))
+ self.assertEqual(guess_date('10 11 12'.split()), (ddate(2012, 11, 10), 'DMY'))
+ self.assertEqual(guess_date('10 11 12'.split(), priorities=PRIORITIES_US),
+ (ddate(2012, 10, 11), 'MDY'))
+ self.assertEqual(guess_date('2012 04 05'.split()), (ddate(2012, 4, 5), 'YMD'))
+ self.assertEqual(guess_date('2012 4 5'.split()), (ddate(2012, 4, 5), 'YMD'))
+ self.assertEqual(guess_date('2012 May , 4th'.split()), (ddate(2012, 5, 4), 'YMD'))
+ self.assertEqual(guess_date('4 5 2012'.split()), (ddate(2012, 5, 4), 'DMY'))
+ self.assertEqual(guess_date('4th of May 2012'.split()), (ddate(2012, 5, 4), 'DMY'))
+ self.assertEqual(guess_date('2012 4th of May'.split()), (ddate(2012, 5, 4), 'YDM'))
+
+ # three-part format
+ # unambiguous MD ranges, full year
+ self.assertEqual(guess_date('28 11 2018'.split()), (ddate(2018, 11, 28), 'DMY'))
+ self.assertEqual(guess_date('28 2018 11'.split()), (ddate(2018, 11, 28), 'DYM'))
+ self.assertEqual(guess_date('11 28 2018'.split()), (ddate(2018, 11, 28), 'MDY'))
+ self.assertEqual(guess_date('11 2018 28'.split()), (ddate(2018, 11, 28), 'MYD'))
+ self.assertEqual(guess_date('2018 11 28'.split()), (ddate(2018, 11, 28), 'YMD'))
+ self.assertEqual(guess_date('2018 28 11'.split()), (ddate(2018, 11, 28), 'YDM'))
+ # unambiguous MDY ranges
+ self.assertEqual(guess_date('28 11 98'.split()), (ddate(1998, 11, 28), 'DMY'))
+ self.assertEqual(guess_date('28 98 11'.split()), (ddate(1998, 11, 28), 'DYM'))
+ self.assertEqual(guess_date('11 28 98'.split()), (ddate(1998, 11, 28), 'MDY'))
+ self.assertEqual(guess_date('11 98 28'.split()), (ddate(1998, 11, 28), 'MYD'))
+ self.assertEqual(guess_date('98 11 28'.split()), (ddate(1998, 11, 28), 'YMD'))
+ self.assertEqual(guess_date('98 28 11'.split()), (ddate(1998, 11, 28), 'YDM'))
+ # explicit YMD
+ self.assertEqual(guess_date('10th of April 2018'.split()), (ddate(2018, 4, 10), 'DMY'))
+ self.assertEqual(guess_date('April 10th 98'.split()), (ddate(1998, 4, 10), 'MDY'))
+ self.assertEqual(guess_date('98 April 10th'.split()), (ddate(1998, 4, 10), 'YMD'))
+ self.assertEqual(guess_date('2018 10th of April'.split()), (ddate(2018, 4, 10), 'YDM'))
+ self.assertEqual(guess_date('2018 10 of 04'.split()), (ddate(2018, 4, 10), 'YDM'))
+ # explicit MY
+ self.assertEqual(guess_date('2018 10 April'.split()), (ddate(2018, 4, 10), 'YDM'))
+ self.assertEqual(guess_date('2018 April 10'.split()), (ddate(2018, 4, 10), 'YMD'))
+ self.assertEqual(guess_date('April 10 2018'.split()), (ddate(2018, 4, 10), 'MDY'))
+ # explicit DY
+ self.assertEqual(guess_date('10th 04 98'.split()), (ddate(1998, 4, 10), 'DMY'))
+ self.assertEqual(guess_date('2018 10th 04'.split()), (ddate(2018, 4, 10), 'YDM'))
+ self.assertEqual(guess_date('2018 04 10th'.split()), (ddate(2018, 4, 10), 'YMD'))
+ # explicit DM
+ self.assertEqual(guess_date('10th April 10'.split()), (ddate(2010, 4, 10), 'DMY'))
+ self.assertEqual(guess_date('10 10th April'.split()), (ddate(2010, 4, 10), 'YDM'))
+ self.assertEqual(guess_date('10 April 10th'.split()), (ddate(2010, 4, 10), 'YMD'))
+ # ambiguous formats: explicit Y
+ self.assertEqual(guess_date('2018 04 08'.split()), (ddate(2018, 4, 8), 'YMD'))
+ self.assertEqual(guess_date('04 2018 08'.split()), (ddate(2018, 8, 4), 'DYM'))
+ self.assertEqual(guess_date('08 04 2018'.split()), (ddate(2018, 4, 8), 'DMY'))
+ self.assertEqual(guess_date('4 8 11'.split()), (ddate(2011, 8, 4), 'DMY'))
+ # ambiguous formats: explicit D
+ self.assertEqual(guess_date('10 4th 11'.split()), (ddate(2011, 10, 4), 'MDY'))
+ self.assertEqual(guess_date('11 10 4th'.split()), (ddate(2011, 10, 4), 'YMD'))
+ self.assertEqual(guess_date('4th 11 10'.split()), (ddate(2010, 11, 4), 'DMY'))
+ self.assertEqual(guess_date('4th 10 11'.split()), (ddate(2011, 10, 4), 'DMY'))
+ # ambiguous formats: explicit M
+ self.assertEqual(guess_date('April 21 08'.split()), (ddate(2008, 4, 21), 'MDY'))
+ self.assertEqual(guess_date('08 April 21'.split()), (ddate(2021, 4, 8), 'DMY'))
+ self.assertEqual(guess_date('21 08 April'.split()), (ddate(2021, 4, 8), 'YDM'))
+ # fully ambiguous
+ self.assertEqual(guess_date('04 08 10'.split()), (ddate(2010, 8, 4), 'DMY'))
+ # errors
+ self.assertRaises(DateParserError, guess_date, '2012 98 10'.split())
+ self.assertRaises(DateParserError, guess_date, 'April 98 April'.split())
+ self.assertRaises(DateParserError, guess_date, '10th 98 29'.split())
+
+ # two-part format
+ # unambiguous DY ranges
+ self.assertEqual(guess_date('28 98'.split()), (ddate(1998, 1, 28), 'DY'))
+ self.assertEqual(guess_date('98 28'.split()), (ddate(1998, 1, 28), 'YD'))
+ self.assertEqual(guess_date('2010 28'.split()), (ddate(2010, 1, 28), 'YD'))
+ self.assertEqual(guess_date('28 2010'.split()), (ddate(2010, 1, 28), 'DY'))
+ # explicit DY
+ self.assertEqual(guess_date('28th 2010'.split()), (ddate(2010, 1, 28), 'DY'))
+ self.assertEqual(guess_date('2010 28th'.split()), (ddate(2010, 1, 28), 'YD'))
+ # explicit MY
+ self.assertEqual(guess_date('2010 April'.split()), (ddate(2010, 4, 1), 'YM'))
+ self.assertEqual(guess_date('April 2010'.split()), (ddate(2010, 4, 1), 'MY'))
+ # explicit DM
+ self.assertEqual(guess_date('April 10th'.split()), (ddate(this_year, 4, 10), 'MD'))
+ self.assertEqual(guess_date('10th April'.split()), (ddate(this_year, 4, 10), 'DM'))
+ self.assertEqual(guess_date('10th of April'.split()), (ddate(this_year, 4, 10), 'DM'))
+ self.assertEqual(guess_date('10 of 04'.split()), (ddate(this_year, 4, 10), 'DM'))
+ self.assertEqual(guess_date('10th 4'.split()), (ddate(this_year, 4, 10), 'DM'))
+ # explicit Y
+ self.assertEqual(guess_date('2010 04'.split()), (ddate(2010, 4, 1), 'YM'))
+ self.assertEqual(guess_date('04 2010'.split()), (ddate(2010, 4, 1), 'MY'))
+ self.assertEqual(guess_date('04 98'.split()), (ddate(1998, 4, 1), 'MY'))
+ self.assertEqual(guess_date('98 04'.split()), (ddate(1998, 4, 1), 'YM'))
+ # explicit M
+ self.assertEqual(guess_date('April 10'.split()), (ddate(2010, 4, 1), 'MY'))
+ self.assertEqual(guess_date('10 April'.split()), (ddate(this_year, 4, 10), 'DM'))
+ # explicit D
+ self.assertEqual(guess_date('10th 08'.split()), (ddate(this_year, 8, 10), 'DM'))
+ self.assertEqual(guess_date('08 10th'.split()), (ddate(this_year, 8, 10), 'MD'))
+ # some hints
+ self.assertEqual(guess_date('18 5'.split()), (ddate(this_year, 5, 18), 'DM'))
+ self.assertEqual(guess_date('4 8'.split()), (ddate(this_year, 8, 4), 'DM'))
+ # fully ambiguous
+ self.assertEqual(guess_date('08 10'.split()), (ddate(this_year, 10, 8), 'DM'))
+
+ # one-part format
+ # full year
+ self.assertEqual(guess_date('2018'.split()), (ddate(2018, 1, 1), 'Y'))
+ # short year
+ self.assertEqual(guess_date('18'.split()), (ddate(2018, 1, 1), 'Y'))
+ self.assertEqual(guess_date('98'.split()), (ddate(1998, 1, 1), 'Y'))
+ self.assertEqual(guess_date('08'.split()), (ddate(2008, 1, 1), 'Y'))
+ # non-year token
+ self.assertRaises(DateParserError, guess_date, ('1', ))
+ self.assertRaises(DateParserError, guess_date, ('April', ))
+ self.assertRaises(DateParserError, guess_date, ('10th', ))
+
+ # other errors
+ self.assertRaises(DateParserError, guess_date, '')
+ self.assertRaises(DateParserError, guess_date, 'fuuu'.split())
+ self.assertRaises(DateParserError, guess_date, '1 fuuu'.split())
+ self.assertRaises(DateParserError, guess_date, '1 fuuu bar 2'.split())
+ self.assertRaises(DateParserError, guess_date, '1 2 3 4'.split())
+
+ def test_guess_time(self):
+ # single token
+ self.assertEqual(guess_time(['9']), (dtime(hour=9), 'h'))
+ # am/pm notation
+ self.assertEqual(guess_time("9 am".split()), (dtime(hour=9), 'h'))
+ self.assertEqual(guess_time("10 am".split()), (dtime(hour=10), 'h'))
+ self.assertEqual(guess_time("09 am".split()), (dtime(hour=9), 'h'))
+ self.assertEqual(guess_time("9 pm".split()), (dtime(hour=21), 'h'))
+ self.assertEqual(guess_time("10 pm".split()), (dtime(hour=22), 'h'))
+ self.assertEqual(guess_time("09 pm".split()), (dtime(hour=21), 'h'))
+ self.assertEqual(guess_time("10 02 am".split()), (dtime(hour=10, minute=2), 'hm'))
+ self.assertEqual(guess_time("10 02 pm".split()), (dtime(hour=22, minute=2), 'hm'))
+ self.assertEqual(guess_time("14 am".split()), (dtime(hour=14), 'h'))
+ self.assertRaises(TimeParserError, guess_time, "14 pm".split())
+
+ # 24-hrs notation
+ self.assertEqual(guess_time("12 34".split()), (dtime(hour=12, minute=34), 'hm'))
+ self.assertEqual(guess_time("15 32".split()), (dtime(hour=15, minute=32), 'hm'))
+ self.assertEqual(guess_time("12 04".split()), (dtime(hour=12, minute=4), 'hm'))
+ self.assertEqual(guess_time("12 4".split()), (dtime(hour=12, minute=4), 'hm'))
+ # range
+ self.assertEqual(guess_time("12 58".split()), (dtime(hour=12, minute=58), 'hm'))
+ self.assertEqual(guess_time("31 04".split()), (dtime(minute=31, second=4), 'ms'))
+ self.assertEqual(guess_time("31 58".split()), (dtime(minute=31, second=58), 'ms'))
+ # three terms
+ self.assertEqual(guess_time("12 34 54".split()),
+ (dtime(hour=12, minute=34, second=54), 'hms'))
+ # four terms
+ self.assertEqual(guess_time("12 34 54 984".split()),
+ (dtime(hour=12, minute=34, second=54, microsecond=984000), 'hmsn'))
+ # trailing zeros
+ self.assertEqual(guess_time("12 34 54 98400".split()),
+ (dtime(hour=12, minute=34, second=54, microsecond=984000), 'hmsn'))
+ # leading zeros
+ self.assertEqual(guess_time("12 34 54 098400".split()),
+ (dtime(hour=12, minute=34, second=54, microsecond=98400), 'hmsn'))
+
+ # invalid formats
+ self.assertRaises(TimeParserError, guess_time, [])
+ self.assertRaises(TimeParserError, guess_time, ['0', '1', '2', '3', '4'])
+ self.assertRaises(TimeParserError, guess_time, "83 02".split())
+ self.assertRaises(TimeParserError, guess_time, "52 74".split())
+
+ def test_guess_datetime(self):
+ self.assertEqual(guess_datetime(
+ Struct({'date': '18 05 2012'.split(), 'time': '12 34'.split()})),
+ (datetime(2012, 5, 18, 12, 34), 'DMYhm'))
+ self.assertEqual(guess_datetime(Struct({'date': '18 05 2012'.split()})),
+ (datetime(2012, 5, 18), 'DMY'))
+ self.assertEqual(guess_datetime(Struct({'time': '12 34'.split()})),
+ (datetime(1970, 1, 1, 12, 34), 'hm'))
+ self.assertRaises(DateFormatError, guess_datetime, Struct({}))
+
+ def test_DF(self):
+ # msb
+ self.assertRaises(DateFormatError, DF('').msb)
+ self.assertRaises(DateFormatError, DF('abc').msb)
+ self.assertRaises(DateFormatError, DF('ydHSN').msb)
+ self.assertEqual('Y', DF('Yab').msb())
+ self.assertEqual('Y', DF('YDM').msb())
+ self.assertEqual('Y', DF('MdY').msb())
+ self.assertEqual('D', DF('mDn').msb())
+ self.assertEqual('m', DF('mdn').msb())
+ self.assertEqual('n', DF('nab').msb())
+ # lsb
+ self.assertRaises(DateFormatError, DF('').lsb)
+ self.assertRaises(DateFormatError, DF('abc').lsb)
+ self.assertEqual('Y', DF('Yab').lsb())
+ self.assertEqual('D', DF('YDM').lsb())
+ self.assertEqual('M', DF('MdY').lsb())
+ self.assertEqual('n', DF('mDn').lsb())
+ self.assertEqual('n', DF('nab').lsb())
+ # is_time
+ self.assertTrue(DF('mshn').is_time())
+ self.assertTrue(DF('mh').is_time())
+ self.assertTrue(DF('h').is_time())
+ self.assertTrue(DF('m').is_time())
+ self.assertTrue(DF('s').is_time())
+ self.assertTrue(DF('n').is_time())
+ self.assertFalse(DF('').is_time())
+ self.assertFalse(DF('abc').is_time())
+ self.assertFalse(DF('Msnh').is_time())
+ self.assertFalse(DF('Ymsnh').is_time())
+ self.assertFalse(DF('Dmsnh').is_time())
+ self.assertFalse(DF('YDM').is_time())
+ # is_date
+ self.assertTrue(DF('YDM').is_date())
+ self.assertTrue(DF('YM').is_date())
+ self.assertTrue(DF('DM').is_date())
+ self.assertTrue(DF('DY').is_date())
+ self.assertTrue(DF('Y').is_date())
+ self.assertTrue(DF('D').is_date())
+ self.assertTrue(DF('M').is_date())
+ self.assertFalse(DF('').is_date())
+ self.assertFalse(DF('abc').is_date())
+ self.assertFalse(DF('YDMn').is_date())
+ self.assertFalse(DF('YDm').is_date())
+ self.assertFalse(DF('YDh').is_date())
+ self.assertFalse(DF('hmsn').is_date())
+ # valid
+ self.assertTrue(DF('Y').valid())
+ self.assertTrue(DF('YDMhsmn').valid())
+ self.assertTrue(DF('Yabc').valid())
+ self.assertFalse(DF('').valid())
+ self.assertFalse(DF('abc').valid())
+ self.assertFalse(DF('ydHSN').valid())
+
+
+ def test_increment(self):
+ self.assertRaises(DateFormatError, increment, None, '')
+ self.assertEqual(increment(datetime(1970, 1, 1, 0, 0, 0, 10), DF('n')),
+ datetime(1970, 1, 1, 0, 0, 0, 11))
+ self.assertEqual(increment(datetime(1970, 1, 1, 0, 0, 1), DF('s')),
+ datetime(1970, 1, 1, 0, 0, 2))
+ self.assertEqual(increment(datetime(1970, 1, 1, 0, 1), DF('m')),
+ datetime(1970, 1, 1, 0, 2))
+ self.assertEqual(increment(datetime(1970, 1, 1, 1), DF('h')),
+ datetime(1970, 1, 1, 2))
+ self.assertEqual(increment(datetime(1970, 1, 1, 1), DF('h')),
+ datetime(1970, 1, 1, 2))
+ self.assertEqual(increment(datetime(1970, 2, 3, 4, 5, 6), DF('D')),
+ datetime(1970, 2, 4))
+ self.assertEqual(increment(datetime(1970, 2, 3, 4, 5, 6), DF('M')),
+ datetime(1970, 3, 1))
+ self.assertEqual(increment(datetime(1970, 2, 3, 4, 5, 6), DF('Y')),
+ datetime(1971, 1, 1))
+ self.assertRaises(DateFormatError, increment, datetime(1970, 2, 3, 4, 5, 6), DF('abc'))
+
+## main ##
+
+if __name__ == '__main__':
+ unittest.main()
+
+## EOF ##
diff --git a/test/parsing/test_search.py b/test/parsing/test_search.py
new file mode 100644
index 0000000..23801d0
--- /dev/null
+++ b/test/parsing/test_search.py
@@ -0,0 +1,707 @@
+"""
+
+Part of the tagit test suite.
+A copy of the license is provided with the project.
+Author: Matthias Baumgartner, 2022
+"""
+# standard imports
+import unittest
+from datetime import datetime
+
+# external imports
+from pyparsing import ParseException
+
+# tagit imports
+from tagit.utils import errors
+#from tagit.parsing.search import ast, predicates, PredicateScope # FIXME: mb/port/parsing
+
+# objects to test
+from tagit.parsing.search import ast_from_string
+
+
+## code ##
+
+class TestScope(PredicateScope):
+ _scope_order = ['major', 'minor', 'micro']
+ _init_values = ['library']
+
+
+class TestParseContinuous(unittest.TestCase):
+ longMessage = True
+
+ def setUp(self):
+ predicates.expose('mime',
+ TestScope('attribute', 'mime'), 'Categorical')
+ predicates.expose('iso',
+ TestScope('attribute', 'iso'), 'Continuous', 'Categorical')
+ predicates.expose('time',
+ TestScope('generic', 't_image_create_loc'), 'TimeRange', 'Datetime')
+ predicates.expose('tag',
+ TestScope('generic', 'tag'), 'Categorical')
+
+ def _test(self, query, target):
+ predicate, condition = target
+ result = ast_from_string(query)
+ target = ast.AND([ast.Token(predicate, condition)])
+ self.assertEqual(result, target, msg="in query '{}'".format(query))
+
+ def test_larger_than(self):
+ # larger than A (inclusive)
+ for editable in [
+ # range
+ "{predicate} in [{num}:]", "{predicate} in [{num}:[", "{predicate} in [{num}:)",
+ "{predicate} : [{num}:]", "{predicate} : [{num}:[", "{predicate} : [{num}:)",
+ "{predicate} = [{num}:]", "{predicate} = [{num}:[", "{predicate} = [{num}:)",
+ ]:
+ # positive
+ self._test(editable.format(num=1.23, predicate='iso'),
+ ('iso', ast.Continuous(1.23, float('inf'), True, False)))
+ # negative
+ self._test(editable.format(num=-1.23, predicate='iso'),
+ ('iso', ast.Continuous(-1.23, float('inf'), True, False)))
+
+ for editable in [
+ # range
+ "{predicate} in [{num}-]", "{predicate} in [{num}-[", "{predicate} in [{num}-)",
+ "{predicate} : [{num}-]", "{predicate} : [{num}-[", "{predicate} : [{num}-)",
+ "{predicate} = [{num}-]", "{predicate} = [{num}-[", "{predicate} = [{num}-)",
+ # equation
+ "{predicate} >= {num}", "{num} <= {predicate}",
+ ]:
+ # positive
+ self._test(editable.format(num=1.23, predicate='iso'),
+ ('iso', ast.Continuous(1.23, float('inf'), True, False)))
+ # negative
+ self._test(editable.format(num=-1.23, predicate='iso'),
+ ('iso', ast.Continuous(-1.23, float('inf'), True, False)))
+ # date
+ self._test(editable.format(predicate='time', num="30.04.2012, 13:18"),
+ ('time', ast.Datetime(datetime(2012, 4, 30, 13, 18), datetime.max, True, False)))
+
+ # larger than A (exclusive)
+ for editable in [
+ # range / bracket
+ "{predicate} in ]{num}:]", "{predicate} in ]{num}:[", "{predicate} in ]{num}:)",
+ "{predicate} : ]{num}:]", "{predicate} : ]{num}:[", "{predicate} : ]{num}:)",
+ "{predicate} = ]{num}:]", "{predicate} = ]{num}:[", "{predicate} = ]{num}:)",
+ # range / parenthesis
+ "{predicate} in ({num}:]", "{predicate} in ({num}:[", "{predicate} in ({num}:)",
+ "{predicate} : ({num}:]", "{predicate} : ({num}:[", "{predicate} : ({num}:)",
+ "{predicate} = ({num}:]", "{predicate} = ({num}:[", "{predicate} = ({num}:)",
+ ]:
+ # positive
+ self._test(editable.format(num=1.23, predicate='iso'),
+ ('iso', ast.Continuous(1.23, float('inf'), False, False)))
+ # negative
+ self._test(editable.format(num=-1.23, predicate='iso'),
+ ('iso', ast.Continuous(-1.23, float('inf'), False, False)))
+
+ for editable in [
+ # range / bracket
+ "{predicate} in ]{num}-]", "{predicate} in ]{num}-[", "{predicate} in ]{num}-)",
+ "{predicate} : ]{num}-]", "{predicate} : ]{num}-[", "{predicate} : ]{num}-)",
+ "{predicate} = ]{num}-]", "{predicate} = ]{num}-[", "{predicate} = ]{num}-)",
+ # range / parenthesis
+ "{predicate} in ({num}-]", "{predicate} in ({num}-[", "{predicate} in ({num}-)",
+ "{predicate} : ({num}-]", "{predicate} : ({num}-[", "{predicate} : ({num}-)",
+ "{predicate} = ({num}-]", "{predicate} = ({num}-[", "{predicate} = ({num}-)",
+ # equation
+ "{predicate} > {num}", "{num} < {predicate}",
+ ]:
+ # positive
+ self._test(editable.format(num=1.23, predicate='iso'),
+ ('iso', ast.Continuous(1.23, float('inf'), False, False)))
+ # negative
+ self._test(editable.format(num=-1.23, predicate='iso'),
+ ('iso', ast.Continuous(-1.23, float('inf'), False, False)))
+ # date
+ self._test(editable.format(predicate='time', num="30.04.2012, 13:18"),
+ ('time', ast.Datetime(datetime(2012, 4, 30, 13, 19), datetime.max, True, False)))
+
+ def test_smaller_than(self):
+ # smaller than B (inclusive)
+ for editable in [
+ # range
+ "{predicate} in [:{num}]", "{predicate} in (:{num}]", "{predicate} in ]:{num}]",
+ "{predicate} : [:{num}]", "{predicate} : (:{num}]", "{predicate} : ]:{num}]",
+ "{predicate} = [:{num}]", "{predicate} = (:{num}]", "{predicate} = ]:{num}]",
+ ]:
+ # positives
+ self._test(editable.format(num=1.23, predicate='iso'),
+ ('iso', ast.Continuous(float('-inf'), 1.23, False, True)))
+ # negatives
+ self._test(editable.format(num=-1.23, predicate='iso'),
+ ('iso', ast.Continuous(float('-inf'), -1.23, False, True)))
+
+ for editable in [
+ # range
+ "{predicate} in [-{num}]", "{predicate} in (-{num}]", "{predicate} in ]-{num}]",
+ "{predicate} : [-{num}]", "{predicate} : (-{num}]", "{predicate} : ]-{num}]",
+ "{predicate} = [-{num}]", "{predicate} = (-{num}]", "{predicate} = ]-{num}]",
+ # equation
+ "{predicate} <={num}", "{num} >= {predicate}",
+ ]:
+ # positives
+ self._test(editable.format(num=1.23, predicate='iso'),
+ ('iso', ast.Continuous(float('-inf'), 1.23, False, True)))
+ # negatives
+ self._test(editable.format(num=-1.23, predicate='iso'),
+ ('iso', ast.Continuous(float('-inf'), -1.23, False, True)))
+ # date
+ self._test(editable.format(predicate='time', num="30.04.2012, 13:18"),
+ ('time', ast.Datetime(datetime.min, datetime(2012, 4, 30, 13, 19), False, False)))
+
+ # smaller than B (exclusive)
+ for editable in [
+ # range / bracket
+ "{predicate} in [:{num}[", "{predicate} in (:{num}[", "{predicate} in ]:{num}[",
+ "{predicate} : [:{num}[", "{predicate} : (:{num}[", "{predicate} : ]:{num}[",
+ "{predicate} = [:{num}[", "{predicate} = (:{num}[", "{predicate} = ]:{num}[",
+ # range / parenthesis
+ "{predicate} in [:{num})", "{predicate} in (:{num})", "{predicate} in ]:{num})",
+ "{predicate} : [:{num})", "{predicate} : (:{num})", "{predicate} : ]:{num})",
+ "{predicate} = [:{num})", "{predicate} = (:{num})", "{predicate} = ]:{num})",
+ ]:
+ # positives
+ self._test(editable.format(num=1.23, predicate='iso'),
+ ('iso', ast.Continuous(float('-inf'), 1.23, False, False)))
+ # negatives
+ self._test(editable.format(num=-1.23, predicate='iso'),
+ ('iso', ast.Continuous(float('-inf'), -1.23, False, False)))
+
+ for editable in [
+ # range / bracket
+ "{predicate} in [-{num}[", "{predicate} in (-{num}[", "{predicate} in ]-{num}[",
+ "{predicate} : [-{num}[", "{predicate} : (-{num}[", "{predicate} : ]-{num}[",
+ "{predicate} = [-{num}[", "{predicate} = (-{num}[", "{predicate} = ]-{num}[",
+ # range / parenthesis
+ "{predicate} in [-{num})", "{predicate} in (-{num})", "{predicate} in ]-{num})",
+ "{predicate} : [-{num})", "{predicate} : (-{num})", "{predicate} : ]-{num})",
+ "{predicate} = [-{num})", "{predicate} = (-{num})", "{predicate} = ]-{num})",
+ # equation
+ "{predicate} <{num}", "{num} > {predicate}",
+ ]:
+ # positives
+ self._test(editable.format(num=1.23, predicate='iso'),
+ ('iso', ast.Continuous(float('-inf'), 1.23, False, False)))
+ # negatives
+ self._test(editable.format(num=-1.23, predicate='iso'),
+ ('iso', ast.Continuous(float('-inf'), -1.23, False, False)))
+ # date
+ self._test(editable.format(predicate='time', num="30.04.2012, 13:18"),
+ ('time', ast.Datetime(datetime.min, datetime(2012, 4, 30, 13, 18), False, False)))
+
+ def test_between(self):
+ # between A and B (including A, including B)
+ for editable in [
+ # range
+ "{predicate} in [{numA}:{numB}]", "{predicate} : [{numA}:{numB}]", "{predicate} = [{numA}:{numB}]",
+ ]:
+ # positives
+ self._test(editable.format(predicate='iso', numA=1.23, numB=4.56),
+ ('iso', ast.Continuous(1.23, 4.56, True, True)))
+ # negatives
+ self._test(editable.format(predicate='iso', numA=-4.56, numB=-1.23),
+ ('iso', ast.Continuous(-4.56, -1.23, True, True)))
+ # mixed
+ self._test(editable.format(predicate='iso', numA=-1.23, numB=4.56),
+ ('iso', ast.Continuous(-1.23, 4.56, True, True)))
+
+ for editable in [
+ # range
+ "{predicate} in [{numA}-{numB}]", "{predicate} : [{numA}-{numB}]", "{predicate} = [{numA}-{numB}]",
+ # equation
+ "{numA} <= {predicate} <= {numB}"
+ ]:
+ # positives
+ self._test(editable.format(predicate='iso', numA=1.23, numB=4.56),
+ ('iso', ast.Continuous(1.23, 4.56, True, True)))
+ # negatives
+ self._test(editable.format(predicate='iso', numA=-4.56, numB=-1.23),
+ ('iso', ast.Continuous(-4.56, -1.23, True, True)))
+ # mixed
+ self._test(editable.format(predicate='iso', numA=-1.23, numB=4.56),
+ ('iso', ast.Continuous(-1.23, 4.56, True, True)))
+ # date
+ self._test(editable.format(predicate='time', numA="30.04.2012, 13:18", numB="13.6.2014, 18:27"),
+ ('time', ast.Datetime(datetime(2012, 4, 30, 13, 18), datetime(2014, 6, 13, 18, 28), True, False)))
+
+ # between A and B (including A, excluding B)
+ for editable in [
+ # range
+ "{predicate} in [{numA}:{numB})", "{predicate} in [{numA}:{numB}[",
+ "{predicate} : [{numA}:{numB})", "{predicate} : [{numA}:{numB}[",
+ "{predicate} = [{numA}:{numB})", "{predicate} = [{numA}:{numB}[",
+ ]:
+ # positives
+ self._test(editable.format(predicate='iso', numA=1.23, numB=4.56),
+ ('iso', ast.Continuous(1.23, 4.56, True, False)))
+ # negatives
+ self._test(editable.format(predicate='iso', numA=-4.56, numB=-1.23),
+ ('iso', ast.Continuous(-4.56, -1.23, True, False)))
+ # mixed
+ self._test(editable.format(predicate='iso', numA=-1.23, numB=4.56),
+ ('iso', ast.Continuous(-1.23, 4.56, True, False)))
+
+ for editable in [
+ # range
+ "{predicate} in [{numA}-{numB})", "{predicate} in [{numA}-{numB}[",
+ "{predicate} : [{numA}-{numB})", "{predicate} : [{numA}-{numB}[",
+ "{predicate} = [{numA}-{numB})", "{predicate} = [{numA}-{numB}[",
+ # equation
+ "{numA} <= {predicate} < {numB}",
+ ]:
+ # positives
+ self._test(editable.format(predicate='iso', numA=1.23, numB=4.56),
+ ('iso', ast.Continuous(1.23, 4.56, True, False)))
+ # negatives
+ self._test(editable.format(predicate='iso', numA=-4.56, numB=-1.23),
+ ('iso', ast.Continuous(-4.56, -1.23, True, False)))
+ # mixed
+ self._test(editable.format(predicate='iso', numA=-1.23, numB=4.56),
+ ('iso', ast.Continuous(-1.23, 4.56, True, False)))
+ # date
+ self._test(editable.format(predicate='time', numA="30.04.2012, 13:18", numB="13.6.2014, 18:27"),
+ ('time', ast.Datetime(datetime(2012, 4, 30, 13, 18), datetime(2014, 6, 13, 18, 27), True, False)))
+
+ # between A and B (excluding A, including B)
+ for editable in [
+ # range
+ "{predicate} in ({numA}:{numB}]", "{predicate} in ]{numA}:{numB}]",
+ "{predicate} : ({numA}:{numB}]", "{predicate} : ]{numA}:{numB}]",
+ "{predicate} = ({numA}:{numB}]", "{predicate} = ]{numA}:{numB}]",
+ ]:
+ # positives
+ self._test(editable.format(predicate='iso', numA=1.23, numB=4.56),
+ ('iso', ast.Continuous(1.23, 4.56, False, True)))
+ # negatives
+ self._test(editable.format(predicate='iso', numA=-4.56, numB=-1.23),
+ ('iso', ast.Continuous(-4.56, -1.23, False, True)))
+ # mixed
+ self._test(editable.format(predicate='iso', numA=-1.23, numB=4.56),
+ ('iso', ast.Continuous(-1.23, 4.56, False, True)))
+
+ for editable in [
+ # range
+ "{predicate} in ({numA}-{numB}]", "{predicate} in ]{numA}-{numB}]",
+ "{predicate} : ({numA}-{numB}]", "{predicate} : ]{numA}-{numB}]",
+ "{predicate} = ({numA}-{numB}]", "{predicate} = ]{numA}-{numB}]",
+ # equation
+ "{numA} < {predicate} <= {numB}",
+ ]:
+ # positives
+ self._test(editable.format(predicate='iso', numA=1.23, numB=4.56),
+ ('iso', ast.Continuous(1.23, 4.56, False, True)))
+ # negatives
+ self._test(editable.format(predicate='iso', numA=-4.56, numB=-1.23),
+ ('iso', ast.Continuous(-4.56, -1.23, False, True)))
+ # mixed
+ self._test(editable.format(predicate='iso', numA=-1.23, numB=4.56),
+ ('iso', ast.Continuous(-1.23, 4.56, False, True)))
+ # date
+ self._test(editable.format(predicate='time', numA="30.04.2012, 13:18", numB="13.6.2014, 18:27"),
+ ('time', ast.Datetime(datetime(2012, 4, 30, 13, 19), datetime(2014, 6, 13, 18, 28), True, False)))
+
+ # between A and B (excluding A, excluding B)
+ for editable in [
+ "{predicate} in ({numA}:{numB})", "{predicate} in ]{numA}:{numB}[",
+ "{predicate} : ({numA}:{numB})", "{predicate} : ]{numA}:{numB}[",
+ "{predicate} = ({numA}:{numB})", "{predicate} = ]{numA}:{numB}[",
+ ]:
+ # positives
+ self._test(editable.format(predicate='iso', numA=1.23, numB=4.56),
+ ('iso', ast.Continuous(1.23, 4.56, False, False)))
+ # negatives
+ self._test(editable.format(predicate='iso', numA=-4.56, numB=-1.23),
+ ('iso', ast.Continuous(-4.56, -1.23, False, False)))
+ # mixed
+ self._test(editable.format(predicate='iso', numA=-1.23, numB=4.56),
+ ('iso', ast.Continuous(-1.23, 4.56, False, False)))
+
+ for editable in [
+ "{predicate} in ({numA}-{numB})", "{predicate} in ]{numA}-{numB}[",
+ "{predicate} : ({numA}-{numB})", "{predicate} : ]{numA}-{numB}[",
+ "{predicate} = ({numA}-{numB})", "{predicate} = ]{numA}-{numB}[",
+ # equation
+ "{numA} < {predicate} < {numB}",
+ ]:
+ # positives
+ self._test(editable.format(predicate='iso', numA=1.23, numB=4.56),
+ ('iso', ast.Continuous(1.23, 4.56, False, False)))
+ # negatives
+ self._test(editable.format(predicate='iso', numA=-4.56, numB=-1.23),
+ ('iso', ast.Continuous(-4.56, -1.23, False, False)))
+ # mixed
+ self._test(editable.format(predicate='iso', numA=-1.23, numB=4.56),
+ ('iso', ast.Continuous(-1.23, 4.56, False, False)))
+ # date
+ self._test(editable.format(predicate='time', numA="30.04.2012, 13:18", numB="13.6.2014, 18:27"),
+ ('time', ast.Datetime(datetime(2012, 4, 30, 13, 19), datetime(2014, 6, 13, 18, 27), True, False)))
+
+ def test_equal(self):
+ # equal to A
+ for editable in [
+ # range
+ "{predicate} in [{num}:{num}]", "{predicate} : [{num}:{num}]", "{predicate} = [{num}:{num}]",
+ ]:
+ # positives
+ self._test(editable.format(predicate='iso', num=1.23),
+ ('iso', ast.Continuous(1.23, 1.23, True, True)))
+ # negatives
+ self._test(editable.format(predicate='iso', num=-1.23),
+ ('iso', ast.Continuous(-1.23, -1.23, True, True)))
+
+ for editable in [
+ # range
+ "{predicate} in [{num}-{num}]", "{predicate} : [{num}-{num}]", "{predicate} = [{num}-{num}]",
+ # equation
+ "{predicate} = {num}", "{num} = {predicate}",
+ ]:
+ # positives
+ self._test(editable.format(predicate='iso', num=1.23),
+ ('iso', ast.Continuous(1.23, 1.23, True, True)))
+ # negatives
+ self._test(editable.format(predicate='iso', num=-1.23),
+ ('iso', ast.Continuous(-1.23, -1.23, True, True)))
+ # date
+ self._test(editable.format(predicate='time', num="30.04.2012, 13:18"),
+ ('time', ast.Datetime(datetime(2012, 4, 30, 13, 18), datetime(2012, 4, 30, 13, 19), True, False)))
+
+ def test_dates(self):
+ self._test("{predicate} < {num}".format(predicate='time', num="2012"),
+ ('time', ast.Datetime(datetime.min, datetime(2012, 1, 1), False, False)))
+ self._test("{predicate} < {num}".format(predicate='time', num="2012.04"),
+ ('time', ast.Datetime(datetime.min, datetime(2012, 4, 1), False, False)))
+ self._test("{predicate} < {num}".format(predicate='time', num="2012.04.30"),
+ ('time', ast.Datetime(datetime.min, datetime(2012, 4, 30), False, False)))
+ self._test("{predicate} < {num}".format(predicate='time', num="2012.04.30, 3 pm"),
+ ('time', ast.Datetime(datetime.min, datetime(2012, 4, 30, 15), False, False)))
+ self._test("{predicate} < {num}".format(predicate='time', num="2012.04.30, 15:34"),
+ ('time', ast.Datetime(datetime.min, datetime(2012, 4, 30, 15, 34), False, False)))
+ self._test("{predicate} < {num}".format(predicate='time', num="2012.04.30, 15:34:12"),
+ ('time', ast.Datetime(datetime.min, datetime(2012, 4, 30, 15, 34, 12), False, False)))
+ self._test("{predicate} < {num}".format(predicate='time', num="2012.04.30, 15:34:12.98"),
+ ('time', ast.Datetime(datetime.min, datetime(2012, 4, 30, 15, 34, 12, 980000), False, False)))
+
+ self._test("{predicate} <= {num}".format(predicate='time', num="2012"),
+ ('time', ast.Datetime(datetime.min, datetime(2013, 1, 1), False, False)))
+ self._test("{predicate} <= {num}".format(predicate='time', num="2012.04"),
+ ('time', ast.Datetime(datetime.min, datetime(2012, 5, 1), False, False)))
+ self._test("{predicate} <= {num}".format(predicate='time', num="2012.04.30"),
+ ('time', ast.Datetime(datetime.min, datetime(2012, 5, 1), False, False)))
+ self._test("{predicate} <= {num}".format(predicate='time', num="2012.04.30, 3 pm"),
+ ('time', ast.Datetime(datetime.min, datetime(2012, 4, 30, 16), False, False)))
+ self._test("{predicate} <= {num}".format(predicate='time', num="2012.04.30, 15:34"),
+ ('time', ast.Datetime(datetime.min, datetime(2012, 4, 30, 15, 35), False, False)))
+ self._test("{predicate} <= {num}".format(predicate='time', num="2012.04.30, 15:34:12"),
+ ('time', ast.Datetime(datetime.min, datetime(2012, 4, 30, 15, 34, 13), False, False)))
+ self._test("{predicate} <= {num}".format(predicate='time', num="2012.04.30, 15:34:12.98"),
+ ('time', ast.Datetime(datetime.min, datetime(2012, 4, 30, 15, 34, 12, 980001), False, False)))
+
+ def test_timerange(self):
+ self._test("{predicate} < {num}".format(predicate='time', num="15:34"),
+ ('time', ast.TimeRange(datetime.utcfromtimestamp(0.0), datetime(1970, 1, 1, 15, 34), True, False)))
+ self._test("{predicate} <= {num}".format(predicate='time', num="15:34"),
+ ('time', ast.TimeRange(datetime.utcfromtimestamp(0.0), datetime(1970, 1, 1, 15, 35), True, False)))
+ self._test("{predicate} = {num}".format(predicate='time', num="15:34"),
+ ('time', ast.TimeRange(datetime(1970, 1, 1, 15, 34), datetime(1970, 1, 1, 15, 35), True, False)))
+ self._test("{predicate} > {num}".format(predicate='time', num="15:34"),
+ ('time', ast.TimeRange(datetime(1970, 1, 1, 15, 35), datetime(1970, 1, 2), True, True)))
+ self._test("{predicate} >= {num}".format(predicate='time', num="15:34"),
+ ('time', ast.TimeRange(datetime(1970, 1, 1, 15, 34), datetime(1970, 1, 2), True, True)))
+
+ self._test("{numA} <= {predicate} <= {numB}".format(predicate='time', numA="12:34", numB="15:28"),
+ ('time', ast.TimeRange(datetime(1970, 1, 1, 12, 34), datetime(1970, 1, 1, 15, 29), True, False)))
+ self._test("{numA} <= {predicate} < {numB}".format(predicate='time', numA="12:34", numB="15:28"),
+ ('time', ast.TimeRange(datetime(1970, 1, 1, 12, 34), datetime(1970, 1, 1, 15, 28), True, False)))
+ self._test("{numA} < {predicate} <= {numB}".format(predicate='time', numA="12:34", numB="15:28"),
+ ('time', ast.TimeRange(datetime(1970, 1, 1, 12, 35), datetime(1970, 1, 1, 15, 29), True, False)))
+ self._test("{numA} < {predicate} < {numB}".format(predicate='time', numA="12:34", numB="15:28"),
+ ('time', ast.TimeRange(datetime(1970, 1, 1, 12, 35), datetime(1970, 1, 1, 15, 28), True, False)))
+
+ def test_special(self):
+ # special cases: explicit plus sign
+ self._test("{predicate} in [+1.23-+4.56]".format(predicate='iso'),
+ ('iso', ast.Continuous(1.23, 4.56, True, True)))
+ self._test("{predicate} in [-+4.56]".format(predicate='iso'),
+ ('iso', ast.Continuous(float('-inf'), 4.56, False, True)))
+
+ def test_errors(self):
+ # parse errors
+ for editable in [
+ # equal with exclusive
+ "{predicate} in ({num}:{num})", "{predicate} in ({num}-{num})",
+ "{predicate} in ({num}:{num}[", "{predicate} in ({num}-{num}[",
+ "{predicate} in ]{num}:{num})", "{predicate} in ]{num}-{num})",
+ "{predicate} in ]{num}:{num}[", "{predicate} in ]{num}-{num}[",
+ # invalid parentesis
+ "{predicate} in ){num}:{num}(",
+ # misc errors
+ # FIXME: Currently all special characters are allowed as categorical value.
+ # If this changes, don't forget to enable the tests below.
+ #"{predicate} in [{num}{num}]",
+ #"{predicate} [{num}:{num}:{num}]",
+ #"{predicate} = ({num})",
+ #"{predicate} = {num})",
+ ]:
+ self.assertRaises(errors.ParserError, ast_from_string,
+ editable.format(predicate='iso', num=1.23))
+
+ for editable in [
+ "{predicate} in [{numA}:{numB}]", "{predicate} : [{numA}:{numB}]", "{predicate} = [{numA}:{numB}]",
+ "{predicate} in ]{numA}:{numB}]", "{predicate} : ]{numA}:{numB}]", "{predicate} = ]{numA}:{numB}]",
+ "{predicate} in [{numA}:{numB}[", "{predicate} : [{numA}:{numB}[", "{predicate} = [{numA}:{numB}[",
+ "{predicate} in ({numA}:{numB}]", "{predicate} : ({numA}:{numB}]", "{predicate} = ({numA}:{numB}]",
+ "{predicate} in [{numA}:{numB})", "{predicate} : [{numA}:{numB})", "{predicate} = [{numA}:{numB})",
+ "{predicate} in ]{numA}:{numB}[", "{predicate} : ]{numA}:{numB}[", "{predicate} = ]{numA}:{numB}[",
+ "{predicate} in ]{numA}:{numB})", "{predicate} : ]{numA}:{numB})", "{predicate} = ]{numA}:{numB})",
+ "{predicate} in ({numA}:{numB}[", "{predicate} : ({numA}:{numB}[", "{predicate} = ({numA}:{numB}[",
+ "{predicate} in ({numA}:{numB})", "{predicate} : ({numA}:{numB})", "{predicate} = ({numA}:{numB})",
+ "{numA} < {predicate} < {numB}",
+ "{numA} <= {predicate} < {numB}",
+ "{numA} < {predicate} <= {numB}",
+ ]:
+ self.assertRaises(errors.ParserError, ast_from_string,
+ editable.format(predicate='iso', numA=4.56, numB=1.23))
+ self.assertRaises(errors.ParserError, ast_from_string,
+ editable.format(predicate='time', numA="17:35", numB="10:55"))
+ self.assertRaises(errors.ParserError, ast_from_string,
+ editable.format(predicate='time', numA="18.12.2035", numB="5.7.1999"))
+
+ # special cases: empty range with boundary
+ self.assertRaises(ParseException, ast_from_string.CONTINUOUS.parseString,
+ "{predicate} in [:]".format(predicate='iso'))
+ self.assertRaises(ParseException, ast_from_string.CONTINUOUS.parseString,
+ "{predicate} in (:[".format(predicate='iso'))
+ self.assertRaises(ParseException, ast_from_string.CONTINUOUS.parseString,
+ "{predicate} in ]:)".format(predicate='iso'))
+ self.assertRaises(ParseException, ast_from_string.CONTINUOUS.parseString,
+ "{predicate} in ".format(predicate='iso'))
+ # misc
+ self.assertRaises(ParseException, ast_from_string.CONTINUOUS.parseString,
+ "{predicate} in [{num}{num}]".format(predicate='iso', num=1.23))
+ self.assertRaises(ParseException, ast_from_string.CONTINUOUS.parseString,
+ "{predicate} [{num}:{num}:{num}]".format(predicate='iso', num=1.23))
+ self.assertRaises(ParseException, ast_from_string.CONTINUOUS.parseString,
+ "{predicate} = ({num})".format(predicate='iso', num=1.23))
+ self.assertRaises(ParseException, ast_from_string.CONTINUOUS.parseString,
+ "{predicate} = ({num}".format(predicate='iso', num=1.23), dict(parseAll=True))
+ self.assertRaises(ParseException, ast_from_string.CONTINUOUS.parseString,
+ "{predicate} = {num})".format(predicate='iso', num=1.23), dict(parseAll=True))
+ # range errors
+ self.assertRaises(errors.ParserError, ast_from_string, "100 >= iso < 200")
+ self.assertRaises(errors.ParserError, ast_from_string, "100 > iso < 200")
+ self.assertRaises(errors.ParserError, ast_from_string, "100 > iso <= 200")
+ self.assertRaises(errors.ParserError, ast_from_string, "100 >= iso <= 200")
+ self.assertRaises(errors.ParserError, ast_from_string, "100 = iso = 200")
+ # time/date mixture errors
+ self.assertRaises(errors.ParserError, ast_from_string, "12:45 < time < 17.5.2004")
+ self.assertRaises(errors.ParserError, ast_from_string, "17.5.2004 < time < 12:45")
+ # date/int mixture errors
+ self.assertRaises(errors.ParserError, ast_from_string, "17.5.2004 < time < 1245")
+ # 1245 is interpreted as the year
+ #self.assertRaises(errors.ParserError, ast_from_string, "1245 < time < 17.5.2004")
+ # time/int mixture errors
+ self.assertRaises(errors.ParserError, ast_from_string, "17:12 < time < 1245")
+ self.assertRaises(errors.ParserError, ast_from_string, "1712 < time < 12:45")
+
+ # empty query
+ self.assertRaises(ParseException, ast_from_string.CONTINUOUS.parseString, "")
+
+
+class TestParseSearch(unittest.TestCase):
+ def setUp(self):
+ predicates.expose('mime',
+ TestScope('attribute', 'mime'), 'Categorical')
+ predicates.expose('rank',
+ TestScope('attribute', 'rank'), 'Continuous')
+ predicates.expose('iso',
+ TestScope('attribute', 'iso'), 'Continuous', 'Categorical')
+ predicates.expose('time',
+ TestScope('generic', 't_image_create_loc'), 'TimeRange', 'Datetime')
+ predicates.expose('tag',
+ TestScope('generic', 'tag'), 'Categorical')
+
+ def test_parse_existence(self):
+ self.assertEqual(ast_from_string("has mime"),
+ ast.AND([ast.Token('mime', ast.Existence())]))
+ self.assertEqual(ast_from_string("has no mime"),
+ ast.AND([ast.Token('mime', ast.Inexistence())]))
+ self.assertEqual(ast_from_string("has not mime"),
+ ast.AND([ast.Token('mime', ast.Inexistence())]))
+
+ def test_parse_categorical(self):
+ # positive
+ self.assertEqual(ast_from_string("iso in 100, 200, 500"),
+ ast.AND([ast.Token('iso', ast.SetInclude(['100', '200', '500']))]))
+ self.assertEqual(ast_from_string("iso in (100, 200)"),
+ ast.AND([ast.Token('iso', ast.SetInclude(['100', '200']))]))
+ self.assertEqual(ast_from_string("iso = (100, 200)"),
+ ast.AND([ast.Token('iso', ast.SetInclude(['100', '200']))]))
+ # FIXME!
+ #self.assertEqual(ast_from_string("iso = 100, 200"),
+ # ast.AND([ast.Token('iso', ast.SetInclude(['100', '200']))]))
+ self.assertEqual(ast_from_string("iso : (100, 200)"),
+ ast.AND([ast.Token('iso', ast.SetInclude(['100', '200']))]))
+ self.assertEqual(ast_from_string("iso : 100, 200"),
+ ast.AND([ast.Token('iso', ast.SetInclude(['100', '200']))]))
+ self.assertEqual(ast_from_string("iso:(100,200)"),
+ ast.AND([ast.Token('iso', ast.SetInclude(['100', '200']))]))
+ self.assertEqual(ast_from_string("iso in (100,200)"),
+ ast.AND([ast.Token('iso', ast.SetInclude(['100', '200']))]))
+ self.assertEqual(ast_from_string("iso in 100,200"),
+ ast.AND([ast.Token('iso', ast.SetInclude(['100', '200']))]))
+ self.assertEqual(ast_from_string("iso ~ (100,200)"),
+ ast.AND([ast.Token('iso', ast.SetInclude(['100', '200'], approximate=True))]))
+ self.assertEqual(ast_from_string("iso ~ 100,200"),
+ ast.AND([ast.Token('iso', ast.SetInclude(['100', '200'], approximate=True))]))
+
+ # negative
+ self.assertEqual(ast_from_string("iso not in 100,200"),
+ ast.AND([ast.Token('iso', ast.SetExclude(['100', '200']))]))
+ self.assertEqual(ast_from_string("iso not in (100, 200)"),
+ ast.AND([ast.Token('iso', ast.SetExclude(['100', '200']))]))
+ self.assertEqual(ast_from_string("iso != 100,200"),
+ ast.AND([ast.Token('iso', ast.SetExclude(['100', '200']))]))
+ self.assertEqual(ast_from_string("iso != (100, 200)"),
+ ast.AND([ast.Token('iso', ast.SetExclude(['100', '200']))]))
+ self.assertEqual(ast_from_string("iso !~ 100,200"),
+ ast.AND([ast.Token('iso', ast.SetExclude(['100', '200'], approximate=True))]))
+ self.assertEqual(ast_from_string("iso !~ (100, 200)"),
+ ast.AND([ast.Token('iso', ast.SetExclude(['100', '200'], approximate=True))]))
+
+ # one value
+ self.assertEqual(ast_from_string("mime : text"),
+ ast.AND([ast.Token('mime', ast.SetInclude(['text']))]))
+ self.assertEqual(ast_from_string("mime in text"),
+ ast.AND([ast.Token('mime', ast.SetInclude(['text']))]))
+ self.assertEqual(ast_from_string("mime = text"),
+ ast.AND([ast.Token('mime', ast.SetInclude(['text']))]))
+ self.assertEqual(ast_from_string("mime ~ text"),
+ ast.AND([ast.Token('mime', ast.SetInclude(['text'], approximate=True))]))
+ self.assertEqual(ast_from_string("mime != text"),
+ ast.AND([ast.Token('mime', ast.SetExclude(['text']))]))
+ self.assertEqual(ast_from_string("mime not in text"),
+ ast.AND([ast.Token('mime', ast.SetExclude(['text']))]))
+ self.assertEqual(ast_from_string("mime !~ text"),
+ ast.AND([ast.Token('mime', ast.SetExclude(['text'], approximate=True))]))
+
+ # expressions with slash and comma
+ self.assertEqual(ast_from_string('mime : "text"'),
+ ast.AND([ast.Token('mime', ast.SetInclude(['text']))]))
+ self.assertEqual(ast_from_string('mime : "text", "plain"'),
+ ast.AND([ast.Token('mime', ast.SetInclude(['text', 'plain']))]))
+ self.assertEqual(ast_from_string('mime : "text, plain"'),
+ ast.AND([ast.Token('mime', ast.SetInclude(['text, plain']))]))
+ self.assertEqual(ast_from_string('mime ~ "text/plain"'),
+ ast.AND([ast.Token('mime', ast.SetInclude(['text/plain'], approximate=True))]))
+ self.assertEqual(ast_from_string('mime = ("text/plain", "image/jpeg")'),
+ ast.AND([ast.Token('mime', ast.SetInclude(['text/plain', 'image/jpeg']))]))
+
+ def test_parse_tag(self):
+
+ # only tag: tag, tags, (tag), (tags)
+ self.assertEqual(ast_from_string("foo"),
+ ast.AND([ast.Token('tag', ast.SetInclude(['foo']))]))
+ self.assertEqual(ast_from_string("(foo)"),
+ ast.AND([ast.Token('tag', ast.SetInclude(['foo']))]))
+ self.assertEqual(ast_from_string("foo, bar"),
+ ast.AND([ast.Token('tag', ast.SetInclude(['foo', 'bar']))]))
+ self.assertEqual(ast_from_string("foo,bar"),
+ ast.AND([ast.Token('tag', ast.SetInclude(['foo', 'bar']))]))
+ self.assertEqual(ast_from_string("(foo, bar,foobar)"),
+ ast.AND([ast.Token('tag', ast.SetInclude(['foo', 'bar', 'foobar']))]))
+
+ # op and tag: !tag, ~tag, !~tag
+ self.assertEqual(ast_from_string("~foo"),
+ ast.AND([ast.Token('tag', ast.SetInclude(['foo'], approximate=True))]))
+ self.assertEqual(ast_from_string("~ foo"),
+ ast.AND([ast.Token('tag', ast.SetInclude(['foo'], approximate=True))]))
+ self.assertEqual(ast_from_string("!foo"),
+ ast.AND([ast.Token('tag', ast.SetExclude(['foo']))]))
+ self.assertEqual(ast_from_string("! foo"),
+ ast.AND([ast.Token('tag', ast.SetExclude(['foo']))]))
+ self.assertEqual(ast_from_string("!~foo"),
+ ast.AND([ast.Token('tag', ast.SetExclude(['foo'], approximate=True))]))
+ self.assertEqual(ast_from_string("!~ foo"),
+ ast.AND([ast.Token('tag', ast.SetExclude(['foo'], approximate=True))]))
+
+ # op and list: ! (tags), ~tags, ...
+ self.assertEqual(ast_from_string("~ foo, bar"),
+ ast.AND([ast.Token('tag', ast.SetInclude(['foo', 'bar'], approximate=True))]))
+ self.assertEqual(ast_from_string("~foo, bar"),
+ ast.AND([ast.Token('tag', ast.SetInclude(['foo', 'bar'], approximate=True))]))
+ self.assertEqual(ast_from_string("~ (foo, bar)"),
+ ast.AND([ast.Token('tag', ast.SetInclude(['foo', 'bar'], approximate=True))]))
+ self.assertEqual(ast_from_string("! foo, bar"),
+ ast.AND([ast.Token('tag', ast.SetExclude(['foo', 'bar']))]))
+ self.assertEqual(ast_from_string("! (foo, bar)"),
+ ast.AND([ast.Token('tag', ast.SetExclude(['foo', 'bar']))]))
+ self.assertEqual(ast_from_string("! (foo,bar)"),
+ ast.AND([ast.Token('tag', ast.SetExclude(['foo', 'bar']))]))
+ self.assertEqual(ast_from_string("!~ foo, bar"),
+ ast.AND([ast.Token('tag', ast.SetExclude(['foo', 'bar'], approximate=True))]))
+ self.assertEqual(ast_from_string("!~ (foo, bar)"),
+ ast.AND([ast.Token('tag', ast.SetExclude(['foo', 'bar'], approximate=True))]))
+ self.assertEqual(ast_from_string("!~(foo,bar)"),
+ ast.AND([ast.Token('tag', ast.SetExclude(['foo', 'bar'], approximate=True))]))
+
+ def test_parse_query(self):
+ # simple query
+ self.assertEqual(ast_from_string('foo / bar'), ast.AND([
+ ast.Token('tag', ast.SetInclude('foo')),
+ ast.Token('tag', ast.SetInclude('bar'))]))
+ self.assertEqual(ast_from_string('iso in ("foo", "bar") / mime = plain'), ast.AND([
+ ast.Token('iso', ast.SetInclude('foo', 'bar')),
+ ast.Token('mime', ast.SetInclude('plain'))]))
+ self.assertEqual(ast_from_string('iso in ("foo", "bar") / mime = plain'), ast.AND([
+ ast.Token('iso', ast.SetInclude('foo', 'bar')),
+ ast.Token('mime', ast.SetInclude('plain'))]))
+ self.assertEqual(ast_from_string('iso = 1.23 / rank < 5'), ast.AND([
+ ast.Token('iso', ast.Continuous(1.23, 1.23, True, True)),
+ ast.Token('rank', ast.Continuous(hi=5))]))
+ self.assertEqual(ast_from_string('time >= 12:50 / time < 13:50'), ast.AND([
+ ast.Token('time', ast.TimeRange(lo=datetime(1970, 1, 1, 12, 50), lo_inc=True, hi_inc=True)),
+ ast.Token('time', ast.TimeRange(hi=datetime(1970, 1, 1, 13, 50), lo_inc=True, hi_inc=False))]))
+ self.assertEqual(ast_from_string('time >= 17.5.2001 / time < 18.4.2002'), ast.AND([
+ ast.Token('time', ast.Datetime(lo=datetime(2001, 5, 17, 0, 0), lo_inc=True)),
+ ast.Token('time', ast.Datetime(hi=datetime(2002, 4, 18, 0, 0)))]))
+ # mixing expressions
+ self.assertEqual(ast_from_string('foo / iso in "bar" / mime ~ "text/plain" / iso < 100 / time >= 17.5.2001 / time < 13:50'), ast.AND([
+ ast.Token('tag', ast.SetInclude('foo')),
+ ast.Token('iso', ast.SetInclude('bar')),
+ ast.Token('mime', ast.SetInclude('text/plain', approximate=True)),
+ ast.Token('iso', ast.Continuous(hi=100)),
+ ast.Token('time', ast.Datetime(lo=datetime(2001, 5, 17, 0, 0), lo_inc=True)),
+ ast.Token('time', ast.TimeRange(hi=datetime(1970, 1, 1, 13, 50), lo_inc=True))]))
+
+ # leading/trailing slashes
+ self.assertRaises(errors.ParserError, ast_from_string, '/ foobar')
+ self.assertRaises(errors.ParserError, ast_from_string, 'foobar /')
+ self.assertRaises(errors.ParserError, ast_from_string, 'foobar / ')
+ self.assertRaises(errors.ParserError, ast_from_string, 'foo // bar')
+ self.assertRaises(errors.ParserError, ast_from_string, 'foo / / bar')
+
+ def test_quoting(self):
+ self.assertEqual(ast_from_string("tag in ('(foo, bar)', foobar)"),
+ ast.AND([ast.Token('tag', ast.SetInclude(['(foo, bar)', 'foobar']))]))
+ self.assertEqual(ast_from_string('tag in ("(foo, bar)", foobar)'),
+ ast.AND([ast.Token('tag', ast.SetInclude(['(foo, bar)', 'foobar']))]))
+ self.assertEqual(ast_from_string('tag in ("(foo, \\"bar\\")", foobar)'),
+ ast.AND([ast.Token('tag', ast.SetInclude(['(foo, "bar")', 'foobar']))]))
+ self.assertEqual(ast_from_string('tag in ("(foo, bar)", "foobar")'),
+ ast.AND([ast.Token('tag', ast.SetInclude(['(foo, bar)', 'foobar']))]))
+ self.assertEqual(ast_from_string('tag in ("(foo, bar)", \'foobar\')'),
+ ast.AND([ast.Token('tag', ast.SetInclude(['(foo, bar)', 'foobar']))]))
+
+ # error cases
+ self.assertRaises(errors.ParserError, ast_from_string, ('tag in ("(foo, bar, foobar)'))
+ self.assertRaises(errors.ParserError, ast_from_string, ("tag in ('(foo, bar, foobar)"))
+
+
+## main ##
+
+if __name__ == '__main__':
+ unittest.main()
+
+## EOF ##
diff --git a/test/parsing/test_sort.py b/test/parsing/test_sort.py
new file mode 100644
index 0000000..40c9ee1
--- /dev/null
+++ b/test/parsing/test_sort.py
@@ -0,0 +1,96 @@
+"""
+
+Part of the tagit test suite.
+A copy of the license is provided with the project.
+Author: Matthias Baumgartner, 2022
+"""
+# standard imports
+import unittest
+
+# tagit imports
+from tagit.utils import errors
+#from tagit.parsing.search import ast, sortkeys # FIXME: mb/port/parsing
+
+# objects to test
+from tagit.parsing.sort import sort_from_string
+
+
+## code ##
+
+class TestParseSort(unittest.TestCase):
+ def setUp(self):
+ sortkeys.expose('iso',
+ TestScope('attribute', 'iso'), 'Numerical')
+ sortkeys.expose('rank',
+ TestScope('attribute', 'rank'), 'Alphabetical', 'Numerical')
+ sortkeys.expose('time',
+ TestScope('attribute', 'time'), 'Numerical')
+ sortkeys.expose('entity',
+ TestScope('property', 'guid'), 'Alphabetical')
+ sortkeys.expose('tag',
+ TestScope('property', 't_image_create_loc'), 'Anchored')
+ sortkeys.expose('mistake',
+ TestScope('property', 't_image_create_loc'))
+
+ def test_parse_sort(self):
+ # simple patterns
+ self.assertEqual(sort_from_string("time"), ast.NumericalSort('time', False))
+ self.assertEqual(sort_from_string("entity"), ast.AlphabeticalSort('entity', False))
+ self.assertEqual(sort_from_string("time asc"), ast.NumericalSort('time', False))
+ self.assertEqual(sort_from_string("time desc"), ast.NumericalSort('time', True))
+ self.assertEqual(sort_from_string("entity desc"), ast.AlphabeticalSort('entity', True))
+ self.assertEqual(sort_from_string("sort by time"), ast.NumericalSort('time', False))
+ self.assertEqual(sort_from_string("sort by time desc"), ast.NumericalSort('time', True))
+ self.assertEqual(sort_from_string("sort by entity desc"),
+ ast.AlphabeticalSort('entity', True))
+ # full pattern
+ self.assertEqual(sort_from_string("sort alphabetically by entity upwards"),
+ ast.AlphabeticalSort('entity', False))
+ self.assertEqual(sort_from_string("sort numerically by time desc"),
+ ast.NumericalSort('time', True))
+ # invalid type
+ self.assertRaises(errors.ParserError, sort_from_string, "sort alphabetically by time desc")
+ self.assertRaises(errors.ParserError, sort_from_string, "sort numerically by entity desc")
+ self.assertRaises(errors.ParserError, sort_from_string, "sort by time similarity to AF39D281CE3")
+ self.assertRaises(errors.ParserError, sort_from_string, "sort alphabetically by tag down,")
+ # ambiguous type
+ self.assertEqual(sort_from_string("sort alphabetically by rank"),
+ ast.AlphabeticalSort('rank', False))
+ self.assertEqual(sort_from_string("sort numerically by rank"),
+ ast.NumericalSort('rank', False))
+ self.assertRaises(errors.ParserError, sort_from_string, "sort by rank")
+ # anchor pattern
+ self.assertEqual(sort_from_string("sort by tag similarity to AF39D281CE3"),
+ ast.AnchoredSort('tag', 'AF39D281CE3', False))
+ self.assertEqual(sort_from_string("tag AF39D281CE3 up"),
+ ast.AnchoredSort('tag', 'AF39D281CE3', False))
+ self.assertEqual(sort_from_string("tag AF39D281CE3 down"),
+ ast.AnchoredSort('tag', 'AF39D281CE3', True))
+ self.assertRaises(errors.ParserError, sort_from_string, "time AF39D281CE3")
+ self.assertRaises(errors.ParserError, sort_from_string, "sort by tag down,")
+ self.assertRaises(errors.ParserError, sort_from_string, "tag XXXXXXXXXXX down,")
+ # compound statements
+ self.assertEqual(sort_from_string("time, iso"),
+ ast.Order(ast.NumericalSort('time'), ast.NumericalSort('iso')))
+ self.assertEqual(sort_from_string("tag AF39D281CE3 down, time up"),
+ ast.Order(ast.AnchoredSort('tag', 'AF39D281CE3', True),
+ ast.NumericalSort('time', False)))
+ self.assertRaises(errors.ParserError, sort_from_string, "tag AF39D281CE3 down,")
+ # empty string
+ self.assertRaises(errors.ParserError, sort_from_string, "")
+ # invalid predicate
+ self.assertRaises(errors.ParserError, sort_from_string, "foobar")
+ # invalid direction
+ self.assertRaises(errors.ParserError, sort_from_string, "sort by entity sideways")
+ # invalid typedef
+ self.assertRaises(errors.ParserError, sort_from_string, "sort by mistake")
+ # missing anchor
+ self.assertRaises(errors.ParserError, sort_from_string, "sort by time similarity to")
+
+
+## main ##
+
+if __name__ == '__main__':
+ unittest.main()
+
+## EOF ##