From a4789394e40aaa3152ad6009955709a6c7d277c2 Mon Sep 17 00:00:00 2001 From: Matthias Baumgartner Date: Fri, 20 Jan 2023 14:36:11 +0100 Subject: fetch AST --- bsfs/query/ast/__init__.py | 4 +- bsfs/query/ast/fetch.py | 175 +++++++++++++++++++++++++++++++++++++++++++++ bsfs/query/ast/filter_.py | 1 + 3 files changed, 179 insertions(+), 1 deletion(-) create mode 100644 bsfs/query/ast/fetch.py (limited to 'bsfs/query/ast') diff --git a/bsfs/query/ast/__init__.py b/bsfs/query/ast/__init__.py index 704d051..66b097d 100644 --- a/bsfs/query/ast/__init__.py +++ b/bsfs/query/ast/__init__.py @@ -1,6 +1,6 @@ """Query AST components. -The query AST consists of a Filter syntax tree. +The query AST consists of a Filter and a Fetch syntax trees. Classes beginning with an underscore (_) represent internal type hierarchies and should not be used for parsing. Note that the AST structures do not @@ -14,10 +14,12 @@ Author: Matthias Baumgartner, 2022 import typing # inner-module imports +from . import fetch from . import filter_ as filter # pylint: disable=redefined-builtin # exports __all__: typing.Sequence[str] = ( + 'fetch', 'filter', ) diff --git a/bsfs/query/ast/fetch.py b/bsfs/query/ast/fetch.py new file mode 100644 index 0000000..5e603a1 --- /dev/null +++ b/bsfs/query/ast/fetch.py @@ -0,0 +1,175 @@ +""" + +Part of the BlackStar filesystem (bsfs) module. +A copy of the license is provided with the project. +Author: Matthias Baumgartner, 2022 +""" +# imports +from collections import abc +import typing + +# bsfs imports +from bsfs.utils import URI, typename, normalize_args + +# exports +__all__ : typing.Sequence[str] = ( + 'All', + 'Fetch', + 'FetchExpression', + 'Node', + 'This', + 'Value', + ) + + +## code ## + +class FetchExpression(abc.Hashable): + """Generic Fetch expression.""" + + def __repr__(self) -> str: + """Return the expressions's string representation.""" + return f'{typename(self)}()' + + def __hash__(self) -> int: + """Return the expression's integer representation.""" + return hash(type(self)) + + def __eq__(self, other: typing.Any) -> bool: + """Return True if *self* and *other* are equivalent.""" + return isinstance(other, type(self)) + + +class All(FetchExpression): + """Fetch all child expressions.""" + + # child expressions. + expr: typing.Set[FetchExpression] + + def __init__(self, *expr): + # unpack child expressions + unfolded = set(normalize_args(*expr)) + # check child expressions + if len(unfolded) == 0: + raise AttributeError('expected at least one expression, found none') + if not all(isinstance(itm, FetchExpression) for itm in unfolded): + raise TypeError(expr) + # initialize + super().__init__() + # assign members + self.expr = unfolded + + def __iter__(self) -> typing.Iterator[FetchExpression]: + return iter(self.expr) + + def __len__(self) -> int: + return len(self.expr) + + def __repr__(self) -> str: + return f'{typename(self)}({self.expr})' + + def __hash__(self) -> int: + # FIXME: Produces different hashes for different orders of self.expr + return hash((super().__hash__(), tuple(self.expr))) + + def __eq__(self, other: typing.Any) -> bool: + return super().__eq__(other) and self.expr == other.expr + + +class _Branch(FetchExpression): + """Branch along a predicate.""" + + # FIXME: Use a Predicate (like in ast.filter) so that we can also reverse them! + + # predicate to follow. + predicate: URI + + def __init__(self, predicate: URI): + if not isinstance(predicate, URI): + raise TypeError(predicate) + self.predicate = predicate + + def __repr__(self) -> str: + return f'{typename(self)}({self.predicate})' + + def __hash__(self) -> int: + return hash((super().__hash__(), self.predicate)) + + def __eq__(self, other: typing.Any) -> bool: + return super().__eq__(other) and self.predicate == other.predicate + + +class Fetch(_Branch): + """Follow a predicate before evaluating a child epxression.""" + + # child expression. + expr: FetchExpression + + def __init__(self, predicate: URI, expr: FetchExpression): + # check child expressions + if not isinstance(expr, FetchExpression): + raise TypeError(expr) + # initialize + super().__init__(predicate) + # assign members + self.expr = expr + + def __repr__(self) -> str: + return f'{typename(self)}({self.predicate}, {self.expr})' + + def __hash__(self) -> int: + return hash((super().__hash__(), self.expr)) + + def __eq__(self, other: typing.Any) -> bool: + return super().__eq__(other) and self.expr == other.expr + + +class _Named(_Branch): + """Fetch a (named) symbol at a predicate.""" + + # symbol name. + name: str + + def __init__(self, predicate: URI, name: str): + super().__init__(predicate) + self.name = str(name) + + def __repr__(self) -> str: + return f'{typename(self)}({self.predicate}, {self.name})' + + def __hash__(self) -> int: + return hash((super().__hash__(), self.name)) + + def __eq__(self, other: typing.Any) -> bool: + return super().__eq__(other) and self.name == other.name + + +class Node(_Named): # pylint: disable=too-few-public-methods + """Fetch a Node at a predicate.""" + # FIXME: Is this actually needed? + + +class Value(_Named): # pylint: disable=too-few-public-methods + """Fetch a Literal at a predicate.""" + + +class This(FetchExpression): + """Fetch the current Node.""" + + # symbol name. + name: str + + def __init__(self, name: str): + super().__init__() + self.name = str(name) + + def __repr__(self) -> str: + return f'{typename(self)}({self.name})' + + def __hash__(self) -> int: + return hash((super().__hash__(), self.name)) + + def __eq__(self, other: typing.Any) -> bool: + return super().__eq__(other) and self.name == other.name + +## EOF ## diff --git a/bsfs/query/ast/filter_.py b/bsfs/query/ast/filter_.py index 2f0270c..81b0de2 100644 --- a/bsfs/query/ast/filter_.py +++ b/bsfs/query/ast/filter_.py @@ -153,6 +153,7 @@ class _Agg(FilterExpression, abc.Collection): # check type if not all(isinstance(e, FilterExpression) for e in unfolded): raise TypeError(expr) + # FIXME: Require at least one child expression? # assign member self.expr = unfolded -- cgit v1.2.3 From c8fdaaa676afbdcf33344d72bd92b3ccb981cbf8 Mon Sep 17 00:00:00 2001 From: Matthias Baumgartner Date: Wed, 8 Feb 2023 19:54:24 +0100 Subject: ast fixes --- bsfs/query/ast/fetch.py | 3 +-- bsfs/query/ast/filter_.py | 4 ++-- 2 files changed, 3 insertions(+), 4 deletions(-) (limited to 'bsfs/query/ast') diff --git a/bsfs/query/ast/fetch.py b/bsfs/query/ast/fetch.py index 5e603a1..d653a8a 100644 --- a/bsfs/query/ast/fetch.py +++ b/bsfs/query/ast/fetch.py @@ -69,8 +69,7 @@ class All(FetchExpression): return f'{typename(self)}({self.expr})' def __hash__(self) -> int: - # FIXME: Produces different hashes for different orders of self.expr - return hash((super().__hash__(), tuple(self.expr))) + return hash((super().__hash__(), tuple(sorted(self.expr, key=repr)))) def __eq__(self, other: typing.Any) -> bool: return super().__eq__(other) and self.expr == other.expr diff --git a/bsfs/query/ast/filter_.py b/bsfs/query/ast/filter_.py index 81b0de2..798d37f 100644 --- a/bsfs/query/ast/filter_.py +++ b/bsfs/query/ast/filter_.py @@ -173,7 +173,7 @@ class _Agg(FilterExpression, abc.Collection): return f'{typename(self)}({self.expr})' def __hash__(self) -> int: - return hash((super().__hash__(), tuple(self.expr))) # FIXME: Unique hash of different orders over self.expr + return hash((super().__hash__(), tuple(sorted(self.expr, key=repr)))) def __eq__(self, other) -> bool: return super().__eq__(other) and self.expr == other.expr @@ -450,7 +450,7 @@ class OneOf(PredicateExpression, abc.Collection): return f'{typename(self)}({self.expr})' def __hash__(self) -> int: - return hash((super().__hash__(), tuple(self.expr))) # FIXME: Unique hash of different orders over self.expr + return hash((super().__hash__(), tuple(sorted(self.expr, key=repr)))) def __eq__(self, other) -> bool: return super().__eq__(other) and self.expr == other.expr -- cgit v1.2.3 From 64f3ac76a2f8d6b51380c06233accfcc19dca228 Mon Sep 17 00:00:00 2001 From: Matthias Baumgartner Date: Wed, 8 Feb 2023 20:47:18 +0100 Subject: filter query convenience functions --- bsfs/query/ast/filter_.py | 58 +++++++++++++++++++++++++++++++++++++++++++---- 1 file changed, 53 insertions(+), 5 deletions(-) (limited to 'bsfs/query/ast') diff --git a/bsfs/query/ast/filter_.py b/bsfs/query/ast/filter_.py index 798d37f..44490fc 100644 --- a/bsfs/query/ast/filter_.py +++ b/bsfs/query/ast/filter_.py @@ -31,10 +31,7 @@ from collections import abc import typing # bsfs imports -from bsfs.utils import URI, typename, normalize_args - -# inner-module imports -#from . import utils +from bsfs.utils import URI, errors, typename, normalize_args # exports __all__ : typing.Sequence[str] = ( @@ -460,10 +457,61 @@ class OneOf(PredicateExpression, abc.Collection): def IsIn(*values): # pylint: disable=invalid-name # explicitly mimics an expression """Match any of the given URIs.""" - return Or(Is(value) for value in normalize_args(*values)) + args = normalize_args(*values) + if len(args) == 0: + raise AttributeError('expected at least one value, found none') + if len(args) == 1: + return Is(args[0]) + return Or(Is(value) for value in args) def IsNotIn(*values): # pylint: disable=invalid-name # explicitly mimics an expression """Match none of the given URIs.""" return Not(IsIn(*values)) + +def Between( + lo: float = float('-inf'), + hi: float = float('inf'), + lo_strict: bool = True, + hi_strict: bool = True, + ): + """Match numerical values between *lo* and *hi*. Include bounds if strict is False.""" + if abs(lo) == hi == float('inf'): + raise ValueError('range cannot be INF on both sides') + if lo > hi: + raise ValueError(f'lower bound ({lo}) cannot be less than upper bound ({hi})') + if lo == hi and not lo_strict and not hi_strict: + return Equals(lo) + if lo == hi: # either bound is strict + raise ValueError(f'bounds cannot be equal when either is strict') + if lo != float('-inf') and hi != float('inf'): + return And(GreaterThan(lo, lo_strict), LessThan(hi, hi_strict)) + if lo != float('-inf'): + return GreaterThan(lo, lo_strict) + # hi != float('inf'): + return LessThan(hi, hi_strict) + + +def Includes(*values, approx: bool = False): + """Match any of the given *values*. Uses `Substring` if *approx* is set.""" + args = normalize_args(*values) + cls = Substring if approx else Equals + if len(args) == 0: + raise AttributeError('expected at least one value, found none') + if len(args) == 1: + return cls(args[0]) + return Or(cls(v) for v in args) + + +def Excludes(*values, approx: bool = False): + """Match none of the given *values*. Uses `Substring` if *approx* is set.""" + args = normalize_args(*values) + cls = Substring if approx else Equals + if len(args) == 0: + raise AttributeError('expected at least one value, found none') + if len(args) == 1: + return Not(cls(args[0])) + return Not(Or(cls(v) for v in args)) + + ## EOF ## -- cgit v1.2.3 From f31a0d005785d474a37ec769c1f7f5e27aa08a57 Mon Sep 17 00:00:00 2001 From: Matthias Baumgartner Date: Wed, 8 Feb 2023 21:08:24 +0100 Subject: minor comments --- bsfs/query/ast/filter_.py | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) (limited to 'bsfs/query/ast') diff --git a/bsfs/query/ast/filter_.py b/bsfs/query/ast/filter_.py index 44490fc..b29d89e 100644 --- a/bsfs/query/ast/filter_.py +++ b/bsfs/query/ast/filter_.py @@ -31,7 +31,7 @@ from collections import abc import typing # bsfs imports -from bsfs.utils import URI, errors, typename, normalize_args +from bsfs.utils import URI, typename, normalize_args # exports __all__ : typing.Sequence[str] = ( @@ -454,8 +454,9 @@ class OneOf(PredicateExpression, abc.Collection): # Helpers +# invalid-name is disabled since they explicitly mimic an expression -def IsIn(*values): # pylint: disable=invalid-name # explicitly mimics an expression +def IsIn(*values) -> FilterExpression: # pylint: disable=invalid-name """Match any of the given URIs.""" args = normalize_args(*values) if len(args) == 0: @@ -464,17 +465,17 @@ def IsIn(*values): # pylint: disable=invalid-name # explicitly mimics an express return Is(args[0]) return Or(Is(value) for value in args) -def IsNotIn(*values): # pylint: disable=invalid-name # explicitly mimics an expression +def IsNotIn(*values) -> FilterExpression: # pylint: disable=invalid-name """Match none of the given URIs.""" return Not(IsIn(*values)) -def Between( +def Between( # pylint: disable=invalid-name lo: float = float('-inf'), hi: float = float('inf'), lo_strict: bool = True, hi_strict: bool = True, - ): + ) -> FilterExpression : """Match numerical values between *lo* and *hi*. Include bounds if strict is False.""" if abs(lo) == hi == float('inf'): raise ValueError('range cannot be INF on both sides') @@ -483,7 +484,7 @@ def Between( if lo == hi and not lo_strict and not hi_strict: return Equals(lo) if lo == hi: # either bound is strict - raise ValueError(f'bounds cannot be equal when either is strict') + raise ValueError('bounds cannot be equal when either is strict') if lo != float('-inf') and hi != float('inf'): return And(GreaterThan(lo, lo_strict), LessThan(hi, hi_strict)) if lo != float('-inf'): @@ -492,7 +493,7 @@ def Between( return LessThan(hi, hi_strict) -def Includes(*values, approx: bool = False): +def Includes(*values, approx: bool = False) -> FilterExpression: # pylint: disable=invalid-name """Match any of the given *values*. Uses `Substring` if *approx* is set.""" args = normalize_args(*values) cls = Substring if approx else Equals @@ -503,7 +504,7 @@ def Includes(*values, approx: bool = False): return Or(cls(v) for v in args) -def Excludes(*values, approx: bool = False): +def Excludes(*values, approx: bool = False) -> FilterExpression: # pylint: disable=invalid-name """Match none of the given *values*. Uses `Substring` if *approx* is set.""" args = normalize_args(*values) cls = Substring if approx else Equals -- cgit v1.2.3