aboutsummaryrefslogtreecommitdiffstats
path: root/tagit/parsing
diff options
context:
space:
mode:
authorMatthias Baumgartner <dev@igsor.net>2023-01-13 09:49:10 +0100
committerMatthias Baumgartner <dev@igsor.net>2023-01-13 09:49:10 +0100
commit9c366758665d9cfee7796ee45a8167a5412ae9ae (patch)
treeb42e0a1fd4b1bd59fc31fad6267b83c2dc9a3a3b /tagit/parsing
parent8f2f697f7ed52b7e1c7a17411b2de526b6490691 (diff)
downloadtagit-9c366758665d9cfee7796ee45a8167a5412ae9ae.tar.gz
tagit-9c366758665d9cfee7796ee45a8167a5412ae9ae.tar.bz2
tagit-9c366758665d9cfee7796ee45a8167a5412ae9ae.zip
filter early port, parsing adaptions
Diffstat (limited to 'tagit/parsing')
-rw-r--r--tagit/parsing/__init__.py8
-rw-r--r--tagit/parsing/filter.py (renamed from tagit/parsing/search.py)169
-rw-r--r--tagit/parsing/sort.py17
3 files changed, 75 insertions, 119 deletions
diff --git a/tagit/parsing/__init__.py b/tagit/parsing/__init__.py
index 1c431a4..0070bf9 100644
--- a/tagit/parsing/__init__.py
+++ b/tagit/parsing/__init__.py
@@ -6,14 +6,14 @@ Author: Matthias Baumgartner, 2022
"""
# inner-module imports
from .datefmt import parse_datetime
-from .search import ast_from_string
-from .sort import sort_from_string
+from .filter import Filter
+from .sort import Sort
# exports
__all__ = (
- 'ast_from_string',
+ 'Filter',
+ 'Sort',
'parse_datetime',
- 'sort_from_string',
)
## EOF ##
diff --git a/tagit/parsing/search.py b/tagit/parsing/filter.py
index 10d0e7c..ea8df51 100644
--- a/tagit/parsing/search.py
+++ b/tagit/parsing/filter.py
@@ -1,7 +1,7 @@
"""User-specified search query parsing.
>>> q = "has mime / tag in (november, october) / ! Apfel / time < 10.10.2004 / iso in (100, 200)"
->>> ast = ast_from_string(q)
+>>> ast = filter_from_string(q)
Part of the tagit module.
A copy of the license is provided with the project.
@@ -14,37 +14,29 @@ from datetime import datetime
from pyparsing import CaselessKeyword, Combine, Group, Optional, Or, Word, delimitedList, nums, oneOf, ParseException, Literal, QuotedString, alphanums, alphas8bit, punc8bit
# tagit imports
-from tagit.utils import errors, ttime
+from tagit.utils import bsfs, errors, ns, ttime
+from tagit.utils.bsfs import ast
# inner-module imports
-from . import datefmt
-
-# exports
-__all__ = (
- 'ast_from_string',
- )
+from .datefmt import parse_datetime
# constants
SEARCH_DELIM = '/'
VALUE_DELIM = ','
-DEFAULT_PREDICATE = 'tag'
+# exports
+__all__ = (
+ 'Filter',
+ )
-## code ##
-class SearchParser():
+## code ##
- # valid predicates per type
- _PREDICATES_CATEGORICAL = None
- _PREDICATES_CONTINUOUS = None
- _PREDICATES_DATETIME = None
+class Filter():
# parsers
- _CATEGORICAL = None
- _CONTINUOUS = None
- _EXISTENCE = None
+ _DATETIME_PREDICATES = None
_QUERY = None
- _TAG = None
def __init__(self, schema: bsfs.schema.Schema):
self.schema = schema
@@ -61,9 +53,6 @@ class SearchParser():
def build_parser(self):
"""
"""
- # The *predicate* argument is for compatibility with predicate listener.
- # It's not actually used here.
-
# valid predicates per type, as supplied by tagit.library
# FIXME:
# * range / type constraints
@@ -79,9 +68,21 @@ class SearchParser():
> Target: Entity (allow others?) -> rfds:domain
> Require: searchable as specified in backend AND user-searchable as specified in frontend
"""
- self._PREDICATES_CATEGORICAL = self.schema.predicates(searchable=True, range=self.schema.tm.categorical) # FIXME!
- self._PREDICATES_CONTINUOUS = self.schema.predicates(searchable=True, range=self.schema.tm.numerical) # FIXME!
- self._PREDICATES_DATETIME = self.schema.predicates(searchable=True, range=self.schema.tm.datetime) # FIXME!
+ # all relevant predicates
+ predicates = {pred for pred in self.schema.predicates() if pred.domain <= self.schema.node(ns.bsfs.Entity)}
+ # filter through accept/reject lists
+ ... # FIXME
+ # shortcuts
+ self._abb2uri = {pred.uri.fragment: pred.uri for pred in predicates} # FIXME: tie-breaking for duplicates
+ self._uri2abb = {uri: fragment for fragment, uri in self._abb2uri.items()}
+ # all predicates
+ _PREDICATES = {self._uri2abb[pred.uri] for pred in predicates}
+ # numeric predicates
+ _PREDICATES_NUMERIC = {self._uri2abb[pred.uri] for pred in predicates if isinstance(pred.range, bsfs.schema.Literal) and pred.range <= self.schema.literal(ns.bsfs.Number)} # FIXME: type check might become unnecessary
+ # datetime predicates
+ self._DATETIME_PREDICATES = {pred.uri for pred in predicates if isinstance(pred.range, bsfs.schema.Literal) and pred.range <= self.schema.literal(ns.bsfs.Time)} # FIXME: type check might become unnecessary
+ _PREDICATES_DATETIME = {self._uri2abb[pred] for pred in self._DATETIME_PREDICATES}
+
# terminal symbols
number = Group(Optional(oneOf('- +')) \
@@ -93,11 +94,11 @@ class SearchParser():
# FIXME: Non-ascii characters
# predicates
- predicate = Or([CaselessKeyword(p) for p in self._PREDICATES_CATEGORICAL]).setResultsName(
+ predicate = Or([CaselessKeyword(p) for p in _PREDICATES]).setResultsName(
'predicate')
- date_predicate = Or([CaselessKeyword(p) for p in self._PREDICATES_DATETIME]).setResultsName(
+ date_predicate = Or([CaselessKeyword(p) for p in _PREDICATES_DATETIME]).setResultsName(
'predicate')
- num_predicate = Or([CaselessKeyword(p) for p in self._PREDICATES_CONTINUOUS]).setResultsName(
+ num_predicate = Or([CaselessKeyword(p) for p in _PREDICATES_NUMERIC]).setResultsName(
'predicate')
# existence
@@ -106,7 +107,7 @@ class SearchParser():
PREDICATE := [predicate]
"""
op = (CaselessKeyword('has') ^ CaselessKeyword('has no') ^ CaselessKeyword('has not')).setResultsName('op')
- self._EXISTENCE = Group(op + predicate).setResultsName('existence')
+ _EXISTENCE = Group(op + predicate).setResultsName('existence')
# continuous
@@ -127,7 +128,7 @@ class SearchParser():
bclose = oneOf(') ] [').setResultsName('bclose')
bopen = oneOf('( [ ]').setResultsName('bopen')
op = Or([':', '=', 'in']).setResultsName('op')
- datefmt = datefmt.parse_datetime.DATETIME
+ datefmt = parse_datetime.DATETIME
rngn = num_predicate + op + bopen + number('lo') + rsepn + number('hi') + bclose ^ \
num_predicate + op + bopen + rsepn + number('hi') + bclose ^ \
num_predicate + op + bopen + number('lo') + rsepn + bclose
@@ -143,7 +144,7 @@ class SearchParser():
datefmt('vleft') + cmp('cleft') + date_predicate ^ \
datefmt('vleft') + cmp('cleft') + date_predicate + cmp('cright') + datefmt('vright')
# combined
- self._CONTINUOUS = Group(
+ _CONTINUOUS = Group(
Group(eqn).setResultsName('eq') ^
Group(eqd).setResultsName('eq') ^
Group(rngn).setResultsName('range') ^ \
@@ -161,7 +162,7 @@ class SearchParser():
"""
op = (CaselessKeyword('in') ^ CaselessKeyword('not in') ^ ':' ^ '=' ^ '!=' ^ '~' ^ '!~').setResultsName('op')
value = delimitedList(words, delim=VALUE_DELIM).setResultsName('value')
- self._CATEGORICAL = Group(predicate + op + ('(' + value + ')' | value) ).setResultsName('categorical')
+ _CATEGORICAL = Group(predicate + op + ('(' + value + ')' | value) ).setResultsName('categorical')
# tag shortcuts
@@ -173,35 +174,17 @@ class SearchParser():
"""
op = oneOf('! ~ !~').setResultsName('op')
value = delimitedList(words, delim=VALUE_DELIM).setResultsName('value')
- self._TAG = Group(Optional(op) + '(' + value + ')' ^ Optional(op) + value).setResultsName('tag')
+ _TAG = Group(Optional(op) + '(' + value + ')' ^ Optional(op) + value).setResultsName('tag')
# overall query
"""
QUERY := QUERY / QUERY | EXPR
"""
- self._QUERY = delimitedList(self._EXISTENCE | self._CONTINUOUS | self._CATEGORICAL | self._TAG, delim=SEARCH_DELIM)
+ self._QUERY = delimitedList(_EXISTENCE | _CONTINUOUS | _CATEGORICAL | _TAG, delim=SEARCH_DELIM)
return self
- def __del__(self):
- if self._QUERY is not None: # remove listener
- try:
- self.predicates.ignore(self.build_parser)
- except ImportError:
- # The import fails if python is shutting down.
- # In that case, the ignore becomes unnecessary anyway.
- pass
-
def __call__(self, search):
- # FIXME: mb/port/parsing
- #if self._QUERY is None:
- # # parsers were not initialized yet
- # self.build_parser()
- # # attach listener to receive future updates
- # self.predicates.listen(self.build_parser)
- # # FIXME: Additional filters would be handy
- # #self.predicates.listen(self.build_parser, self.predicates.scope.library)
-
try:
parsed = self._QUERY.parseString(search, parseAll=True)
except ParseException as e:
@@ -211,61 +194,58 @@ class SearchParser():
tokens = []
for exp in parsed:
if exp.getName() == 'existence':
+ pred = self._abb2uri[exp.predicate.lower()]
if 'op' not in exp: # prevented by grammar
raise errors.ParserError('Missing operator', exp)
elif exp.op == 'has':
- cond = ast.Existence()
+ tok = ast.filter.Has(pred)
elif exp.op in ('has no', 'has not'):
- cond = ast.Inexistence()
+ tok = ast.filter.Not(ast.filter.Has(pred))
else: # prevented by grammar
raise errors.ParserError('Invalid operator ({})'.format(exp.op), exp)
-
- tokens.append(
- ast.Token(exp.predicate.lower(), cond))
+ tokens.append(tok)
elif exp.getName() == 'categorical':
+ pred = self._abb2uri[exp.predicate.lower()]
+ approx = False
values = [s.strip() for s in exp.value]
if 'op' not in exp: # prevented by grammar
raise errors.ParserError('Missing operator', exp)
- elif exp.op in (':', '=', 'in'):
- cond = ast.SetInclude(values)
- elif exp.op in ('!=', 'not in'):
- cond = ast.SetExclude(values)
- elif exp.op == '~':
- cond = ast.SetInclude(values, approximate=True)
- elif exp.op == '!~':
- cond = ast.SetExclude(values, approximate=True)
+ if exp.op in ('~' '!~'):
+ approx = True
+ if exp.op in (':', '=', '~', 'in'):
+ tok = ast.filter.Any(pred, ast.filter.Includes(*values, approx=approx))
+ elif exp.op in ('!=', '!~', 'not in'):
+ tok = ast.filter.All(pred, ast.filter.Excludes(*values, approx=approx))
else: # prevented by grammar
raise errors.ParserError('Invalid operator ({})'.format(exp.op), exp)
-
- tokens.append(
- ast.Token(exp.predicate.lower(), cond))
+ tokens.append(tok)
elif exp.getName() == 'tag':
values = [s.strip() for s in exp.value]
if 'op' not in exp:
- cond = ast.SetInclude(values)
+ outer = ast.filter.Any
+ cond = ast.filter.Includes(*values)
elif exp.op == '~':
- cond = ast.SetInclude(values, approximate=True)
+ outer = ast.filter.Any
+ cond = ast.filter.Includes(*values, approx=True)
elif exp.op == '!':
- cond = ast.SetExclude(values)
+ outer = ast.filter.All
+ cond = ast.filter.Excludes(*values)
elif exp.op == '!~':
- cond = ast.SetExclude(values, approximate=True)
+ outer = ast.filter.All
+ cond = ast.filter.Excludes(*values, approx=True)
else: # prevented by grammar
raise errors.ParserError('Invalid operator ({})'.format(exp.op), exp)
+ tokens.append(outer(ns.bse.tag, ast.filter.Any(ns.bst.label, cond)))
- tokens.append(
- ast.Token(DEFAULT_PREDICATE, cond))
-
- elif exp.getName() == 'continuous':
-
+ elif exp.getName() == 'continuous': # FIXME: simplify and adapt bsfs.query.ast.filter.Between accordingly!
lo, hi = None, None
lo_inc, hi_inc = False, False
predicate = None
-
if 'eq' in exp:
# equation style
- predicate = exp.eq.predicate.lower()
+ predicate = self._abb2uri[exp.eq.predicate.lower()]
if ('>' in exp.eq.cleft and '<' in exp.eq.cright) or \
('<' in exp.eq.cleft and '>' in exp.eq.cright) or \
@@ -294,7 +274,7 @@ class SearchParser():
lo_inc = hi_inc = True
elif 'range' in exp: # value in [lo:hi]
- predicate = exp.range.predicate.lower()
+ predicate = self._abb2uri[exp.range.predicate.lower()]
if 'lo' in exp.range:
lo = exp.range.lo
@@ -307,7 +287,7 @@ class SearchParser():
raise errors.ParserError('Expression is neither a range nor an equation', exp)
# interpret values
- if predicate in set([p.lower() for p in self._PREDICATES_DATETIME]):
+ if predicate in self._DATETIME_PREDICATES:
# turn into datetime
lo, lfmt = datefmt.guess_datetime(lo) if lo is not None else (None, None)
@@ -357,7 +337,8 @@ class SearchParser():
raise errors.ParserError('Lower bound must not exceed upper bound', (lo, hi))
tokens.append(
- ast.Token(predicate, ast.TimeRange(lo, hi, lo_inc, hi_inc)))
+ ast.filter.Any(predicate,
+ ast.filter.Between(lo, hi, not lo_inc, not hi_inc)))
else: # date specification
# Check consistency
@@ -368,7 +349,8 @@ class SearchParser():
raise errors.ParserError('Lower bound must not exceed upper bound', (lo, hi))
tokens.append(
- ast.Token(predicate, ast.Datetime(lo, hi, lo_inc, hi_inc)))
+ ast.filter.Any(predicate,
+ ast.filter.Between(lo, hi, not lo_inc, not hi_inc)))
else:
# number predicate
@@ -379,27 +361,14 @@ class SearchParser():
if not (lo < hi or (lo == hi and lo_inc and hi_inc)):
raise errors.ParserError('Lower bound must not exceed upper bound', (lo, hi))
+ # FIXME: mb/port: Three times the same code... optimize
tokens.append(
- ast.Token(predicate, ast.Continuous(lo, hi, lo_inc, hi_inc)))
+ ast.filter.Any(predicate,
+ ast.filter.Between(lo, hi, not lo_inc, not hi_inc)))
else: # prevented by grammar
raise errors.ParserError('Invalid expression', exp)
- return ast.AND(tokens)
-
-
-
-"""Default SearchParser instance.
-
-To produce an ast, call
-
->>> ast_from_string(search)
-
-Convenience shortcut for
-
->>> SearchParser().parse(search)
-
-"""
-ast_from_string = SearchParser(predicates)
+ return ast.filter.And(tokens)
## EOF ##
diff --git a/tagit/parsing/sort.py b/tagit/parsing/sort.py
index 8950613..75fa36c 100644
--- a/tagit/parsing/sort.py
+++ b/tagit/parsing/sort.py
@@ -12,13 +12,13 @@ from tagit.utils import errors, Struct
# exports
__all__ = (
- 'sort_from_string',
+ 'Sort',
)
## code ##
-class SortParser():
+class Sort():
"""Sort parser.
A sort string can be as simple as a predicate, but also allows
@@ -176,17 +176,4 @@ class SortParser():
else:
return ast.Order(*tokens)
-"""Default SortParser instance.
-
-To produce an ast, call
-
->>> sort_from_string(sort)
-
-Convenience shortcut for
-
->>> SortParser().parse(sort)
-
-"""
-sort_from_string = SortParser(sortkeys)
-
## EOF ##