From c664d19e7d4a0aa0762c30a72ae238cf818891ab Mon Sep 17 00:00:00 2001 From: Matthias Baumgartner Date: Wed, 11 Jan 2023 21:20:47 +0100 Subject: Feature support in the schema * Type annotations * Feature type * Moved from_string from Schema to its own file/function * Root predicate has a valid (not-None) range * ROOT_... export in schema.types * Empty as the default Schema constructor * Schema loads some additional default symbols * _Type instances compare along class hierarchy --- bsfs/schema/__init__.py | 5 +- bsfs/schema/schema.py | 111 +++++----------------------- bsfs/schema/serialize.py | 143 ++++++++++++++++++++++++++++++++++++ bsfs/schema/types.py | 183 ++++++++++++++++++++++++++++++++++++++++------- 4 files changed, 325 insertions(+), 117 deletions(-) create mode 100644 bsfs/schema/serialize.py (limited to 'bsfs/schema') diff --git a/bsfs/schema/__init__.py b/bsfs/schema/__init__.py index ad4d456..dc24313 100644 --- a/bsfs/schema/__init__.py +++ b/bsfs/schema/__init__.py @@ -9,7 +9,8 @@ import typing # inner-module imports from .schema import Schema -from .types import Literal, Node, Predicate +from .serialize import from_string, to_string +from .types import Literal, Node, Predicate, _Vertex # FIXME: _Vertex # exports __all__: typing.Sequence[str] = ( @@ -17,6 +18,8 @@ __all__: typing.Sequence[str] = ( 'Node', 'Predicate', 'Schema', + 'from_string', + 'to_string', ) ## EOF ## diff --git a/bsfs/schema/schema.py b/bsfs/schema/schema.py index c5d4571..1c4c807 100644 --- a/bsfs/schema/schema.py +++ b/bsfs/schema/schema.py @@ -51,11 +51,13 @@ class Schema(): def __init__( self, - predicates: typing.Iterable[types.Predicate], + predicates: typing.Optional[typing.Iterable[types.Predicate]] = None, nodes: typing.Optional[typing.Iterable[types.Node]] = None, literals: typing.Optional[typing.Iterable[types.Literal]] = None, ): # materialize arguments + if predicates is None: + predicates = set() if nodes is None: nodes = set() if literals is None: @@ -63,24 +65,36 @@ class Schema(): nodes = set(nodes) literals = set(literals) predicates = set(predicates) + + # add root types to the schema + nodes.add(types.ROOT_NODE) + literals.add(types.ROOT_LITERAL) + predicates.add(types.ROOT_PREDICATE) + # add minimally necessary types to the schema + literals.add(types.ROOT_NUMBER) + predicates.add(types.ROOT_FEATURE) + # include parents in predicates set # TODO: review type annotations and ignores for python >= 3.11 (parents is _Type but should be typing.Self) predicates |= {par for pred in predicates for par in pred.parents()} # type: ignore [misc] # include predicate domain in nodes set nodes |= {pred.domain for pred in predicates} # include predicate range in nodes and literals sets - prange = {pred.range for pred in predicates if pred.range is not None} + prange = {pred.range for pred in predicates} nodes |= {vert for vert in prange if isinstance(vert, types.Node)} literals |= {vert for vert in prange if isinstance(vert, types.Literal)} + # NOTE: ROOT_PREDICATE has a _Vertex as range which is neither in nodes nor literals + # FIXME: with the ROOT_VERTEX missing, the schema is not complete anymore! + # include parents in nodes and literals sets - # NOTE: Must be done after predicate domain/range was handled - # so that their parents are included as well. + # NOTE: Must come after predicate domain/range was handled to have their parents as well. nodes |= {par for node in nodes for par in node.parents()} # type: ignore [misc] literals |= {par for lit in literals for par in lit.parents()} # type: ignore [misc] # assign members self._nodes = {node.uri: node for node in nodes} self._literals = {lit.uri: lit for lit in literals} self._predicates = {pred.uri: pred for pred in predicates} + # verify unique uris if len(nodes) != len(self._nodes): raise errors.ConsistencyError('inconsistent nodes') @@ -214,6 +228,7 @@ class Schema(): >>> Schema.Union([a, b, c]) """ + # FIXME: copy type annotations? if len(args) == 0: raise TypeError('Schema.Union requires at least one argument (Schema or Iterable)') if isinstance(args[0], cls): # args is sequence of Schema instances @@ -295,92 +310,4 @@ class Schema(): """Return the Literal matching the *uri*.""" return self._literals[uri] - - ## constructors ## - - - @classmethod - def Empty(cls) -> 'Schema': # pylint: disable=invalid-name # capitalized classmethod - """Return a minimal Schema.""" - node = types.Node(ns.bsfs.Node, None) - literal = types.Literal(ns.bsfs.Literal, None) - predicate = types.Predicate( - uri=ns.bsfs.Predicate, - parent=None, - domain=node, - range=None, - unique=False, - ) - return cls((predicate, ), (node, ), (literal, )) - - - @classmethod - def from_string(cls, schema: str) -> 'Schema': # pylint: disable=invalid-name # capitalized classmethod - """Load and return a Schema from a string.""" - # parse string into rdf graph - graph = rdflib.Graph() - graph.parse(data=schema, format='turtle') - - def _fetch_hierarchically(factory, curr): - # emit current node - yield curr - # walk through childs - for child in graph.subjects(rdflib.URIRef(ns.rdfs.subClassOf), rdflib.URIRef(curr.uri)): - # convert to URI - child = URI(child) - # check circular dependency - if child == curr.uri or child in {node.uri for node in curr.parents()}: - raise errors.ConsistencyError('circular dependency') - # recurse and emit (sub*)childs - yield from _fetch_hierarchically(factory, factory(child, curr)) - - # fetch nodes - nodes = set(_fetch_hierarchically(types.Node, types.Node(ns.bsfs.Node, None))) - nodes_lut = {node.uri: node for node in nodes} - if len(nodes_lut) != len(nodes): - raise errors.ConsistencyError('inconsistent nodes') - - # fetch literals - literals = set(_fetch_hierarchically(types.Literal, types.Literal(ns.bsfs.Literal, None))) - literals_lut = {lit.uri: lit for lit in literals} - if len(literals_lut) != len(literals): - raise errors.ConsistencyError('inconsistent literals') - - # fetch predicates - def build_predicate(uri, parent): - uri = rdflib.URIRef(uri) - # get domain - domains = set(graph.objects(uri, rdflib.RDFS.domain)) - if len(domains) != 1: - raise errors.ConsistencyError(f'inconsistent domain: {domains}') - dom = nodes_lut.get(next(iter(domains))) - if dom is None: - raise errors.ConsistencyError('missing domain') - # get range - ranges = set(graph.objects(uri, rdflib.RDFS.range)) - if len(ranges) != 1: - raise errors.ConsistencyError(f'inconsistent range: {ranges}') - rng = next(iter(ranges)) - rng = nodes_lut.get(rng, literals_lut.get(rng)) - if rng is None: - raise errors.ConsistencyError('missing range') - # get unique flag - uniques = set(graph.objects(uri, rdflib.URIRef(ns.bsfs.unique))) - if len(uniques) != 1: - raise errors.ConsistencyError(f'inconsistent unique flags: {uniques}') - unique = bool(next(iter(uniques))) - # build Predicate - return types.Predicate(URI(uri), parent, dom, rng, unique) - - root_predicate = types.Predicate( - uri=ns.bsfs.Predicate, - parent=None, - domain=nodes_lut[ns.bsfs.Node], - range=None, # FIXME: Unclear how to handle this! Can be either a Literal or a Node - unique=False, - ) - predicates = _fetch_hierarchically(build_predicate, root_predicate) - # return Schema - return cls(predicates, nodes, literals) - ## EOF ## diff --git a/bsfs/schema/serialize.py b/bsfs/schema/serialize.py new file mode 100644 index 0000000..1222aa6 --- /dev/null +++ b/bsfs/schema/serialize.py @@ -0,0 +1,143 @@ +""" + +Part of the BlackStar filesystem (bsfs) module. +A copy of the license is provided with the project. +Author: Matthias Baumgartner, 2022 +""" +# standard imports +from collections import abc +import itertools +import typing + +# external imports +import rdflib + +# bsfs imports +from bsfs.namespace import ns +from bsfs.utils import errors, URI, typename + +# inner-module imports +from . import types +from . import schema + +# exports +__all__: typing.Sequence[str] = ( + 'to_string', + 'from_string', + ) + + +## code ## + +def from_string(schema_str: str) -> schema.Schema: + """Load and return a Schema from a string.""" + # parse string into rdf graph + graph = rdflib.Graph() + graph.parse(data=schema_str, format='turtle') + + # helper functions + def _convert(value): + """Convert the subject type from rdflib to a bsfs native type.""" + if isinstance(value, rdflib.Literal): + return value.value + if isinstance(value, rdflib.URIRef): + return URI(value) + raise errors.BackendError(f'expected Literal or URIRef, found {typename(value)}') + + def _fetch_hierarchically(factory, curr): + """Walk through a rdfs:subClassOf hierarchy, creating symbols along the way.""" + # emit current node + yield curr + # walk through childs + for child in graph.subjects(rdflib.URIRef(ns.rdfs.subClassOf), rdflib.URIRef(curr.uri)): + # fetch annotations + annotations = { + URI(pred): _convert(value) + for pred, value # FIXME: preserve datatype of value?! + in graph.predicate_objects(child) + if URI(pred) != ns.rdfs.subClassOf + } + # convert child to URI + child = URI(child) + # check circular dependency + if child == curr.uri or child in {node.uri for node in curr.parents()}: + raise errors.ConsistencyError('circular dependency') + # recurse and emit (sub*)childs + yield from _fetch_hierarchically(factory, factory(child, curr, **annotations)) + + # fetch nodes + nodes = set(_fetch_hierarchically(types.Node, types.ROOT_NODE)) + nodes_lut = {node.uri: node for node in nodes} + if len(nodes_lut) != len(nodes): + raise errors.ConsistencyError('inconsistent nodes') + + # fetch literals + literals = set(_fetch_hierarchically(types.Literal, types.ROOT_LITERAL)) + literals_lut = {lit.uri: lit for lit in literals} + if len(literals_lut) != len(literals): + raise errors.ConsistencyError('inconsistent literals') + + # fetch predicates + # FIXME: type annotation + def _fetch_value(subject: URI, predicate: rdflib.URIRef, value_factory) -> typing.Optional[typing.Any]: + """Fetch the object of a given subject and predicate. Raises a `errors.ConsistencyError` if multiple objects match.""" + values = list(graph.objects(rdflib.URIRef(subject), predicate)) + if len(values) == 0: + return None + elif len(values) == 1: + return value_factory(values[0]) + else: + raise errors.ConsistencyError(f'{subject} has multiple values for predicate {str(predicate)}, expected zero or one') + + def _build_predicate(uri, parent, **annotations): + """Predicate factory.""" + # break out on root feature type + if uri == types.ROOT_FEATURE.uri: + return types.ROOT_FEATURE + # clean annotations + annotations.pop(ns.rdfs.domain, None) + annotations.pop(ns.rdfs.range, None) + annotations.pop(ns.bsfs.unique, None) + # get domain + dom = _fetch_value(uri, rdflib.RDFS.domain, URI) + if dom is not None and dom not in nodes_lut: + raise errors.ConsistencyError(f'predicate {uri} has undefined domain {dom}') + elif dom is not None: + dom = nodes_lut[dom] + # get range + rng = _fetch_value(uri, rdflib.RDFS.range, URI) + if rng is not None and rng not in nodes_lut and rng not in literals_lut: + raise errors.ConsistencyError(f'predicate {uri} has undefined range {rng}') + elif rng is not None: + rng = nodes_lut.get(rng, literals_lut.get(rng)) + # get unique + unique = _fetch_value(uri, rdflib.URIRef(ns.bsfs.unique), bool) + # handle feature types + if isinstance(parent, types.Feature): + # clean annotations + annotations.pop(ns.bsfs.dimension, None) + annotations.pop(ns.bsfs.dtype, None) + annotations.pop(ns.bsfs.distance, None) + # get dimension + dimension = _fetch_value(uri, rdflib.URIRef(ns.bsfs.dimension), int) + # get dtype + dtype = _fetch_value(uri, rdflib.URIRef(ns.bsfs.dtype), URI) + # get distance + distance = _fetch_value(uri, rdflib.URIRef(ns.bsfs.distance), URI) + # return feature + return parent.get_child(URI(uri), domain=dom, range=rng, unique=unique, + dtype=dtype, dimension=dimension, distance=distance, **annotations) + # handle non-feature predicate + return parent.get_child(URI(uri), domain=dom, range=rng, unique=unique, **annotations) + predicates = _fetch_hierarchically(_build_predicate, types.ROOT_PREDICATE) + + return schema.Schema(predicates, nodes, literals) + + + +def to_string(schema_inst: schema.Schema) -> str: + """ + """ + raise NotImplementedError() + +## EOF ## diff --git a/bsfs/schema/types.py b/bsfs/schema/types.py index 54a7e99..e737263 100644 --- a/bsfs/schema/types.py +++ b/bsfs/schema/types.py @@ -8,6 +8,7 @@ Author: Matthias Baumgartner, 2022 import typing # bsfs imports +from bsfs.namespace import ns from bsfs.utils import errors, URI, typename # exports @@ -15,6 +16,7 @@ __all__: typing.Sequence[str] = ( 'Literal', 'Node', 'Predicate', + 'Feature', ) @@ -99,9 +101,11 @@ class _Type(): self, uri: URI, parent: typing.Optional['_Type'] = None, + **annotations: typing.Any, ): self.uri = uri self.parent = parent + self.annotations = annotations def parents(self) -> typing.Generator['_Type', None, None]: """Generate a list of parent nodes.""" @@ -110,9 +114,17 @@ class _Type(): yield curr curr = curr.parent - def get_child(self, uri: URI, **kwargs): + def get_child( + self, + uri: URI, + **kwargs, + ): """Return a child of the current class.""" - return type(self)(uri, self, **kwargs) + return type(self)( + uri=uri, + parent=self, + **kwargs + ) def __str__(self) -> str: return f'{typename(self)}({self.uri})' @@ -138,7 +150,7 @@ class _Type(): def __lt__(self, other: typing.Any) -> bool: """Return True iff *self* is a true subclass of *other*.""" - if not type(self) is type(other): # type mismatch # pylint: disable=unidiomatic-typecheck + if not isinstance(other, type(self)): return NotImplemented if self.uri == other.uri: # equivalence return False @@ -151,7 +163,7 @@ class _Type(): def __le__(self, other: typing.Any) -> bool: """Return True iff *self* is equivalent or a subclass of *other*.""" - if not type(self) is type(other): # type mismatch # pylint: disable=unidiomatic-typecheck + if not isinstance(other, type(self)): return NotImplemented if self.uri == other.uri: # equivalence return True @@ -164,7 +176,7 @@ class _Type(): def __gt__(self, other: typing.Any) -> bool: """Return True iff *self* is a true superclass of *other*.""" - if not type(self) is type(other): # type mismatch # pylint: disable=unidiomatic-typecheck + if not isinstance(other, type(self)): return NotImplemented if self.uri == other.uri: # equivalence return False @@ -177,7 +189,7 @@ class _Type(): def __ge__(self, other: typing.Any) -> bool: """Return True iff *self* is eqiuvalent or a superclass of *other*.""" - if not type(self) is type(other): # type mismatch # pylint: disable=unidiomatic-typecheck + if not isinstance(other, type(self)): return NotImplemented if self.uri == other.uri: # equivalence return True @@ -191,30 +203,33 @@ class _Type(): class _Vertex(_Type): """Graph vertex types. Can be a Node or a Literal.""" - def __init__(self, uri: URI, parent: typing.Optional['_Vertex']): - super().__init__(uri, parent) + parent: typing.Optional['_Vertex'] + def __init__(self, uri: URI, parent: typing.Optional['_Vertex'], **kwargs): + super().__init__(uri, parent, **kwargs) class Node(_Vertex): """Node type.""" - def __init__(self, uri: URI, parent: typing.Optional['Node']): - super().__init__(uri, parent) + parent: typing.Optional['Node'] + def __init__(self, uri: URI, parent: typing.Optional['Node'], **kwargs): + super().__init__(uri, parent, **kwargs) class Literal(_Vertex): """Literal type.""" - def __init__(self, uri: URI, parent: typing.Optional['Literal']): - super().__init__(uri, parent) + parent: typing.Optional['Literal'] + def __init__(self, uri: URI, parent: typing.Optional['Literal'] ,**kwargs): + super().__init__(uri, parent, **kwargs) class Predicate(_Type): - """Predicate type.""" + """Predicate base type.""" # source type. domain: Node # destination type. - range: typing.Optional[typing.Union[Node, Literal]] + range: _Vertex # maximum cardinality of type. unique: bool @@ -223,25 +238,26 @@ class Predicate(_Type): self, # Type members uri: URI, - parent: typing.Optional['Predicate'], + parent: '_PredicateBase', # Predicate members domain: Node, - range: typing.Optional[typing.Union[Node, Literal]], # pylint: disable=redefined-builtin + range: _Vertex, # pylint: disable=redefined-builtin unique: bool, + **kwargs, ): # check arguments if not isinstance(domain, Node): raise TypeError(domain) - if range is not None and not isinstance(range, Node) and not isinstance(range, Literal): + if range != ROOT_VERTEX and not isinstance(range, (Node, Literal)): raise TypeError(range) # initialize - super().__init__(uri, parent) + super().__init__(uri, parent, **kwargs) self.domain = domain self.range = range - self.unique = unique + self.unique = bool(unique) def __hash__(self) -> int: - return hash((super().__hash__(), self.domain, self.range, self.unique)) + return hash((super().__hash__(), self.domain, self.unique, self.range)) def __eq__(self, other: typing.Any) -> bool: return super().__eq__(other) \ @@ -264,13 +280,132 @@ class Predicate(_Type): raise errors.ConsistencyError(f'{domain} must be a subclass of {self.domain}') if range is None: range = self.range - if range is None: # inherited range from ns.bsfs.Predicate - raise ValueError('range must be defined by the parent or argument') - if self.range is not None and not range <= self.range: + # NOTE: The root predicate has a Vertex as range, which is neither a parent of the root + # Node nor Literal. Hence, that test is skipped since a child should be allowed to + # specialize from Vertex to anything. + if self.range != ROOT_VERTEX and not range <= self.range: raise errors.ConsistencyError(f'{range} must be a subclass of {self.range}') if unique is None: unique = self.unique - return super().get_child(uri, domain=domain, range=range, unique=unique, **kwargs) + return super().get_child( + uri=uri, + domain=domain, + range=range, + unique=unique, + **kwargs + ) + + +class Feature(Predicate): + """Feature base type.""" + + # Number of feature vector dimensions. + dimension: int + + # Feature vector datatype. + dtype: URI + + # Distance measure to compare feature vectors. + distance: URI + + def __init__( + self, + # Type members + uri: URI, + parent: Predicate, + # Predicate members + domain: Node, + range: Literal, + unique: bool, + # Feature members + dimension: int, + dtype: URI, + distance: URI, + **kwargs, + ): + super().__init__(uri, parent, domain, range, unique, **kwargs) + self.dimension = int(dimension) + self.dtype = URI(dtype) + self.distance = URI(distance) + + def __hash__(self) -> int: + return hash((super().__hash__(), self.dimension, self.dtype, self.distance)) + + def __eq__(self, other: typing.Any) -> bool: + return super().__eq__(other) \ + and self.dimension == other.dimension \ + and self.dtype == other.dtype \ + and self.distance == other.distance + + def get_child( + self, + uri: URI, + domain: typing.Optional[Node] = None, + range: typing.Optional[Literal] = None, # pylint: disable=redefined-builtin + unique: typing.Optional[bool] = None, + dimension: typing.Optional[int] = None, + dtype: typing.Optional[URI] = None, + distance: typing.Optional[URI] = None, + **kwargs, + ): + """Return a child of the current class.""" + if dimension is None: + dimension = self.dimension + if dtype is None: + dtype = self.dtype + if distance is None: + distance = self.distance + return super().get_child( + uri=uri, + domain=domain, + range=range, + unique=unique, + dimension=dimension, + dtype=dtype, + distance=distance, + **kwargs, + ) +# essential vertices +ROOT_VERTEX = _Vertex( + uri=ns.bsfs.Vertex, + parent=None, + ) + +ROOT_NODE = Node( + uri=ns.bsfs.Node, + parent=None, + ) + +ROOT_LITERAL = Literal( + uri=ns.bsfs.Literal, + parent=None, + ) + +ROOT_NUMBER = Literal( + uri=ns.bsfs.Number, + parent=ROOT_LITERAL, + ) + +# essential predicates +ROOT_PREDICATE = Predicate( + uri=ns.bsfs.Predicate, + parent=None, + domain=ROOT_NODE, + range=ROOT_VERTEX, + unique=False, + ) + +ROOT_FEATURE = Feature( + uri=ns.bsfs.Feature, + parent=ROOT_PREDICATE, + domain=ROOT_NODE, + range=ROOT_LITERAL, + unique=False, + dimension=1, + dtype=ns.bsfs.f16, + distance=ns.bsfs.euclidean, + ) + ## EOF ## -- cgit v1.2.3 From 6fd984e694b0a7b749ab947211d792f5b011ee6f Mon Sep 17 00:00:00 2001 From: Matthias Baumgartner Date: Thu, 12 Jan 2023 08:44:25 +0100 Subject: renamed get_child to child in schema.types._Type and _Vertex to Vertex in schema.types --- bsfs/schema/__init__.py | 2 +- bsfs/schema/schema.py | 2 +- bsfs/schema/serialize.py | 4 ++-- bsfs/schema/types.py | 28 ++++++++++++++-------------- 4 files changed, 18 insertions(+), 18 deletions(-) (limited to 'bsfs/schema') diff --git a/bsfs/schema/__init__.py b/bsfs/schema/__init__.py index dc24313..5162a01 100644 --- a/bsfs/schema/__init__.py +++ b/bsfs/schema/__init__.py @@ -10,7 +10,7 @@ import typing # inner-module imports from .schema import Schema from .serialize import from_string, to_string -from .types import Literal, Node, Predicate, _Vertex # FIXME: _Vertex +from .types import Literal, Node, Predicate, Vertex, ROOT_FEATURE, ROOT_LITERAL, ROOT_NODE, ROOT_NUMBER, ROOT_PREDICATE, ROOT_VERTEX # exports __all__: typing.Sequence[str] = ( diff --git a/bsfs/schema/schema.py b/bsfs/schema/schema.py index 1c4c807..80cb58a 100644 --- a/bsfs/schema/schema.py +++ b/bsfs/schema/schema.py @@ -83,7 +83,7 @@ class Schema(): prange = {pred.range for pred in predicates} nodes |= {vert for vert in prange if isinstance(vert, types.Node)} literals |= {vert for vert in prange if isinstance(vert, types.Literal)} - # NOTE: ROOT_PREDICATE has a _Vertex as range which is neither in nodes nor literals + # NOTE: ROOT_PREDICATE has a Vertex as range which is neither in nodes nor literals # FIXME: with the ROOT_VERTEX missing, the schema is not complete anymore! # include parents in nodes and literals sets diff --git a/bsfs/schema/serialize.py b/bsfs/schema/serialize.py index 1222aa6..c1ac9a9 100644 --- a/bsfs/schema/serialize.py +++ b/bsfs/schema/serialize.py @@ -125,10 +125,10 @@ def from_string(schema_str: str) -> schema.Schema: # get distance distance = _fetch_value(uri, rdflib.URIRef(ns.bsfs.distance), URI) # return feature - return parent.get_child(URI(uri), domain=dom, range=rng, unique=unique, + return parent.child(URI(uri), domain=dom, range=rng, unique=unique, dtype=dtype, dimension=dimension, distance=distance, **annotations) # handle non-feature predicate - return parent.get_child(URI(uri), domain=dom, range=rng, unique=unique, **annotations) + return parent.child(URI(uri), domain=dom, range=rng, unique=unique, **annotations) predicates = _fetch_hierarchically(_build_predicate, types.ROOT_PREDICATE) return schema.Schema(predicates, nodes, literals) diff --git a/bsfs/schema/types.py b/bsfs/schema/types.py index e737263..4f49efe 100644 --- a/bsfs/schema/types.py +++ b/bsfs/schema/types.py @@ -114,7 +114,7 @@ class _Type(): yield curr curr = curr.parent - def get_child( + def child( self, uri: URI, **kwargs, @@ -201,21 +201,21 @@ class _Type(): return False -class _Vertex(_Type): +class Vertex(_Type): """Graph vertex types. Can be a Node or a Literal.""" - parent: typing.Optional['_Vertex'] - def __init__(self, uri: URI, parent: typing.Optional['_Vertex'], **kwargs): + parent: typing.Optional['Vertex'] + def __init__(self, uri: URI, parent: typing.Optional['Vertex'], **kwargs): super().__init__(uri, parent, **kwargs) -class Node(_Vertex): +class Node(Vertex): """Node type.""" parent: typing.Optional['Node'] def __init__(self, uri: URI, parent: typing.Optional['Node'], **kwargs): super().__init__(uri, parent, **kwargs) -class Literal(_Vertex): +class Literal(Vertex): """Literal type.""" parent: typing.Optional['Literal'] def __init__(self, uri: URI, parent: typing.Optional['Literal'] ,**kwargs): @@ -229,7 +229,7 @@ class Predicate(_Type): domain: Node # destination type. - range: _Vertex + range: Vertex # maximum cardinality of type. unique: bool @@ -241,7 +241,7 @@ class Predicate(_Type): parent: '_PredicateBase', # Predicate members domain: Node, - range: _Vertex, # pylint: disable=redefined-builtin + range: Vertex, # pylint: disable=redefined-builtin unique: bool, **kwargs, ): @@ -265,11 +265,11 @@ class Predicate(_Type): and self.range == other.range \ and self.unique == other.unique - def get_child( + def child( self, uri: URI, domain: typing.Optional[Node] = None, - range: typing.Optional[_Vertex] = None, # pylint: disable=redefined-builtin + range: typing.Optional[Vertex] = None, # pylint: disable=redefined-builtin unique: typing.Optional[bool] = None, **kwargs, ): @@ -287,7 +287,7 @@ class Predicate(_Type): raise errors.ConsistencyError(f'{range} must be a subclass of {self.range}') if unique is None: unique = self.unique - return super().get_child( + return super().child( uri=uri, domain=domain, range=range, @@ -337,7 +337,7 @@ class Feature(Predicate): and self.dtype == other.dtype \ and self.distance == other.distance - def get_child( + def child( self, uri: URI, domain: typing.Optional[Node] = None, @@ -355,7 +355,7 @@ class Feature(Predicate): dtype = self.dtype if distance is None: distance = self.distance - return super().get_child( + return super().child( uri=uri, domain=domain, range=range, @@ -368,7 +368,7 @@ class Feature(Predicate): # essential vertices -ROOT_VERTEX = _Vertex( +ROOT_VERTEX = Vertex( uri=ns.bsfs.Vertex, parent=None, ) -- cgit v1.2.3 From e708016ae366e96051281f3a744af35a8c06d98b Mon Sep 17 00:00:00 2001 From: Matthias Baumgartner Date: Thu, 12 Jan 2023 10:28:16 +0100 Subject: cleanup and cosmetic changes --- bsfs/schema/__init__.py | 3 ++- bsfs/schema/schema.py | 2 -- bsfs/schema/serialize.py | 17 +++++++++-------- bsfs/schema/types.py | 8 ++++---- 4 files changed, 15 insertions(+), 15 deletions(-) (limited to 'bsfs/schema') diff --git a/bsfs/schema/__init__.py b/bsfs/schema/__init__.py index 5162a01..31d7d61 100644 --- a/bsfs/schema/__init__.py +++ b/bsfs/schema/__init__.py @@ -10,7 +10,8 @@ import typing # inner-module imports from .schema import Schema from .serialize import from_string, to_string -from .types import Literal, Node, Predicate, Vertex, ROOT_FEATURE, ROOT_LITERAL, ROOT_NODE, ROOT_NUMBER, ROOT_PREDICATE, ROOT_VERTEX +from .types import Literal, Node, Predicate, Vertex, \ + ROOT_FEATURE, ROOT_LITERAL, ROOT_NODE, ROOT_NUMBER, ROOT_PREDICATE, ROOT_VERTEX # exports __all__: typing.Sequence[str] = ( diff --git a/bsfs/schema/schema.py b/bsfs/schema/schema.py index 80cb58a..52ad191 100644 --- a/bsfs/schema/schema.py +++ b/bsfs/schema/schema.py @@ -7,10 +7,8 @@ Author: Matthias Baumgartner, 2022 # imports from collections import abc, namedtuple import typing -import rdflib # bsfs imports -from bsfs.namespace import ns from bsfs.utils import errors, URI, typename # inner-module imports diff --git a/bsfs/schema/serialize.py b/bsfs/schema/serialize.py index c1ac9a9..0eb6628 100644 --- a/bsfs/schema/serialize.py +++ b/bsfs/schema/serialize.py @@ -5,7 +5,6 @@ A copy of the license is provided with the project. Author: Matthias Baumgartner, 2022 """ # standard imports -from collections import abc import itertools import typing @@ -42,7 +41,7 @@ def from_string(schema_str: str) -> schema.Schema: return value.value if isinstance(value, rdflib.URIRef): return URI(value) - raise errors.BackendError(f'expected Literal or URIRef, found {typename(value)}') + raise errors.UnreachableError(f'expected Literal or URIRef, found {typename(value)}') def _fetch_hierarchically(factory, curr): """Walk through a rdfs:subClassOf hierarchy, creating symbols along the way.""" @@ -80,14 +79,16 @@ def from_string(schema_str: str) -> schema.Schema: # fetch predicates # FIXME: type annotation def _fetch_value(subject: URI, predicate: rdflib.URIRef, value_factory) -> typing.Optional[typing.Any]: - """Fetch the object of a given subject and predicate. Raises a `errors.ConsistencyError` if multiple objects match.""" + """Fetch the object of a given subject and predicate. + Raises a `errors.ConsistencyError` if multiple objects match. + """ values = list(graph.objects(rdflib.URIRef(subject), predicate)) if len(values) == 0: return None - elif len(values) == 1: + if len(values) == 1: return value_factory(values[0]) - else: - raise errors.ConsistencyError(f'{subject} has multiple values for predicate {str(predicate)}, expected zero or one') + raise errors.ConsistencyError( + f'{subject} has multiple values for predicate {str(predicate)}, expected zero or one') def _build_predicate(uri, parent, **annotations): """Predicate factory.""" @@ -102,13 +103,13 @@ def from_string(schema_str: str) -> schema.Schema: dom = _fetch_value(uri, rdflib.RDFS.domain, URI) if dom is not None and dom not in nodes_lut: raise errors.ConsistencyError(f'predicate {uri} has undefined domain {dom}') - elif dom is not None: + if dom is not None: dom = nodes_lut[dom] # get range rng = _fetch_value(uri, rdflib.RDFS.range, URI) if rng is not None and rng not in nodes_lut and rng not in literals_lut: raise errors.ConsistencyError(f'predicate {uri} has undefined range {rng}') - elif rng is not None: + if rng is not None: rng = nodes_lut.get(rng, literals_lut.get(rng)) # get unique unique = _fetch_value(uri, rdflib.URIRef(ns.bsfs.unique), bool) diff --git a/bsfs/schema/types.py b/bsfs/schema/types.py index 4f49efe..6257dee 100644 --- a/bsfs/schema/types.py +++ b/bsfs/schema/types.py @@ -238,7 +238,7 @@ class Predicate(_Type): self, # Type members uri: URI, - parent: '_PredicateBase', + parent: typing.Optional['Predicate'], # Predicate members domain: Node, range: Vertex, # pylint: disable=redefined-builtin @@ -312,10 +312,10 @@ class Feature(Predicate): self, # Type members uri: URI, - parent: Predicate, + parent: typing.Optional[Predicate], # Predicate members domain: Node, - range: Literal, + range: Literal, # pylint: disable=redefined-builtin unique: bool, # Feature members dimension: int, @@ -341,7 +341,7 @@ class Feature(Predicate): self, uri: URI, domain: typing.Optional[Node] = None, - range: typing.Optional[Literal] = None, # pylint: disable=redefined-builtin + range: typing.Optional[Vertex] = None, # pylint: disable=redefined-builtin unique: typing.Optional[bool] = None, dimension: typing.Optional[int] = None, dtype: typing.Optional[URI] = None, -- cgit v1.2.3 From 1b7ef16c3795bb7112683662b8c22a774e219269 Mon Sep 17 00:00:00 2001 From: Matthias Baumgartner Date: Thu, 12 Jan 2023 16:57:58 +0100 Subject: schema to string --- bsfs/schema/schema.py | 2 + bsfs/schema/serialize.py | 104 +++++++++++++++++++++++++++++++++++++++++++++-- 2 files changed, 103 insertions(+), 3 deletions(-) (limited to 'bsfs/schema') diff --git a/bsfs/schema/schema.py b/bsfs/schema/schema.py index 52ad191..bc50d4e 100644 --- a/bsfs/schema/schema.py +++ b/bsfs/schema/schema.py @@ -72,6 +72,8 @@ class Schema(): literals.add(types.ROOT_NUMBER) predicates.add(types.ROOT_FEATURE) + # FIXME: ensure that types derive from the right root? + # include parents in predicates set # TODO: review type annotations and ignores for python >= 3.11 (parents is _Type but should be typing.Self) predicates |= {par for pred in predicates for par in pred.parents()} # type: ignore [misc] diff --git a/bsfs/schema/serialize.py b/bsfs/schema/serialize.py index 0eb6628..a566d65 100644 --- a/bsfs/schema/serialize.py +++ b/bsfs/schema/serialize.py @@ -136,9 +136,107 @@ def from_string(schema_str: str) -> schema.Schema: -def to_string(schema_inst: schema.Schema) -> str: +def to_string(schema_inst: schema.Schema, fmt: str = 'turtle') -> str: + """Serialize a `bsfs.schema.Schema` to a string. + See `rdflib.Graph.serialize` for viable formats (default: turtle). """ - """ - raise NotImplementedError() + + # type of emitted triples. + T_TRIPLE = typing.Iterator[typing.Tuple[rdflib.URIRef, rdflib.URIRef, rdflib.term.Identifier]] + + def _type(tpe: types._Type) -> T_TRIPLE : + """Emit _Type properties (parent, annotations).""" + # emit parent + if tpe.parent is not None: + yield ( + rdflib.URIRef(tpe.uri), + rdflib.URIRef(ns.rdfs.subClassOf), + rdflib.URIRef(tpe.parent.uri), + ) + # emit annotations + for prop, value in tpe.annotations.items(): + yield ( + rdflib.URIRef(tpe.uri), + rdflib.URIRef(prop), + rdflib.Literal(value), # FIXME: datatype?! + ) + + def _predicate(pred: types.Predicate) -> T_TRIPLE: + """Emit Predicate properties (domain, range, unique).""" + # no need to emit anything for the root predicate + if pred == types.ROOT_PREDICATE: + return + # emit domain + if pred.domain != getattr(pred.parent, 'domain', None): + yield ( + rdflib.URIRef(pred.uri), + rdflib.URIRef(ns.rdfs.domain), + rdflib.URIRef(pred.domain.uri), + ) + # emit range + if pred.range != getattr(pred.parent, 'range', None): + yield ( + rdflib.URIRef(pred.uri), + rdflib.URIRef(ns.rdfs.range), + rdflib.URIRef(pred.range.uri), + ) + # emit cardinality + if pred.unique != getattr(pred.parent, 'unique', None): + yield ( + rdflib.URIRef(pred.uri), + rdflib.URIRef(ns.bsfs.unique), + rdflib.Literal(pred.unique, datatype=rdflib.XSD.boolean), + ) + + def _feature(feat: types.Feature) -> T_TRIPLE: + """Emit Feature properties (dimension, dtype, distance).""" + # emit size + if feat.dimension != getattr(feat.parent, 'dimension', None): + yield ( + rdflib.URIRef(feat.uri), + rdflib.URIRef(ns.bsfs.dimension), + rdflib.Literal(feat.dimension, datatype=rdflib.XSD.integer), + ) + # emit dtype + if feat.dtype != getattr(feat.parent, 'dtype', None): + yield ( + rdflib.URIRef(feat.uri), + rdflib.URIRef(ns.bsfs.dtype), + rdflib.URIRef(feat.dtype), + ) + # emit distance + if feat.distance != getattr(feat.parent, 'distance', None): + yield ( + rdflib.URIRef(feat.uri), + rdflib.URIRef(ns.bsfs.distance), + rdflib.URIRef(feat.distance), + ) + + def _parse(node: types._Type) -> T_TRIPLE: + """Emit all properties of a type.""" + if isinstance(node, types._Type): # pylint: disable=protected-access + # NOTE: all nodes are _Type + yield from _type(node) + if isinstance(node, types.Predicate): + yield from _predicate(node) + if isinstance(node, types.Feature): + yield from _feature(node) + + # create graph + graph = rdflib.Graph() + # add triples to graph + nodes = itertools.chain( + schema_inst.nodes(), + schema_inst.literals(), + schema_inst.predicates()) + for node in nodes: + for triple in _parse(node): + graph.add(triple) + # add known namespaces for readability + # FIXME: more systematically (e.g. for all in ns?) + graph.bind('bsfs', rdflib.URIRef('http://bsfs.ai/schema/')) + graph.bind('bse', rdflib.URIRef('http://bsfs.ai/schema/Entity#')) + # serialize to turtle + return graph.serialize(format=fmt) ## EOF ## -- cgit v1.2.3 From 60257ed3c2aa6ea2891f362a691bde9d7ef17831 Mon Sep 17 00:00:00 2001 From: Matthias Baumgartner Date: Fri, 13 Jan 2023 12:22:34 +0100 Subject: schema type comparison across classes --- bsfs/schema/types.py | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) (limited to 'bsfs/schema') diff --git a/bsfs/schema/types.py b/bsfs/schema/types.py index 6257dee..95dc66a 100644 --- a/bsfs/schema/types.py +++ b/bsfs/schema/types.py @@ -150,8 +150,10 @@ class _Type(): def __lt__(self, other: typing.Any) -> bool: """Return True iff *self* is a true subclass of *other*.""" - if not isinstance(other, type(self)): + if not isinstance(other, _Type): return NotImplemented + if not isinstance(other, type(self)): # FIXME: necessary? + return False if self.uri == other.uri: # equivalence return False if self in other.parents(): # superclass @@ -163,8 +165,10 @@ class _Type(): def __le__(self, other: typing.Any) -> bool: """Return True iff *self* is equivalent or a subclass of *other*.""" - if not isinstance(other, type(self)): + if not isinstance(other, _Type): return NotImplemented + if not isinstance(other, type(self)): # FIXME: necessary? + return False if self.uri == other.uri: # equivalence return True if self in other.parents(): # superclass @@ -176,8 +180,10 @@ class _Type(): def __gt__(self, other: typing.Any) -> bool: """Return True iff *self* is a true superclass of *other*.""" - if not isinstance(other, type(self)): + if not isinstance(other, _Type): return NotImplemented + if not isinstance(other, type(self)): # FIXME: necessary? + return False if self.uri == other.uri: # equivalence return False if self in other.parents(): # superclass @@ -189,8 +195,10 @@ class _Type(): def __ge__(self, other: typing.Any) -> bool: """Return True iff *self* is eqiuvalent or a superclass of *other*.""" - if not isinstance(other, type(self)): + if not isinstance(other, _Type): return NotImplemented + if not isinstance(other, type(self)): # FIXME: necessary? + return False if self.uri == other.uri: # equivalence return True if self in other.parents(): # superclass -- cgit v1.2.3 From ccaee71e2b6135d3b324fe551c8652940b67aab3 Mon Sep 17 00:00:00 2001 From: Matthias Baumgartner Date: Sun, 15 Jan 2023 20:57:42 +0100 Subject: Feature as Literal instead of Predicate subtype --- bsfs/schema/__init__.py | 7 +- bsfs/schema/schema.py | 4 +- bsfs/schema/serialize.py | 83 +++++++++++++----------- bsfs/schema/types.py | 162 +++++++++++++++++++++++------------------------ 4 files changed, 133 insertions(+), 123 deletions(-) (limited to 'bsfs/schema') diff --git a/bsfs/schema/__init__.py b/bsfs/schema/__init__.py index 31d7d61..f53512e 100644 --- a/bsfs/schema/__init__.py +++ b/bsfs/schema/__init__.py @@ -10,8 +10,11 @@ import typing # inner-module imports from .schema import Schema from .serialize import from_string, to_string -from .types import Literal, Node, Predicate, Vertex, \ - ROOT_FEATURE, ROOT_LITERAL, ROOT_NODE, ROOT_NUMBER, ROOT_PREDICATE, ROOT_VERTEX +from .types import Literal, Node, Predicate, Vertex, Feature, \ + ROOT_VERTEX, ROOT_NODE, ROOT_LITERAL, \ + ROOT_NUMBER, ROOT_TIME, \ + ROOT_ARRAY, ROOT_FEATURE, \ + ROOT_PREDICATE # exports __all__: typing.Sequence[str] = ( diff --git a/bsfs/schema/schema.py b/bsfs/schema/schema.py index bc50d4e..8d9a821 100644 --- a/bsfs/schema/schema.py +++ b/bsfs/schema/schema.py @@ -70,7 +70,9 @@ class Schema(): predicates.add(types.ROOT_PREDICATE) # add minimally necessary types to the schema literals.add(types.ROOT_NUMBER) - predicates.add(types.ROOT_FEATURE) + literals.add(types.ROOT_TIME) + literals.add(types.ROOT_ARRAY) + literals.add(types.ROOT_FEATURE) # FIXME: ensure that types derive from the right root? diff --git a/bsfs/schema/serialize.py b/bsfs/schema/serialize.py index a566d65..8b31737 100644 --- a/bsfs/schema/serialize.py +++ b/bsfs/schema/serialize.py @@ -35,13 +35,27 @@ def from_string(schema_str: str) -> schema.Schema: graph.parse(data=schema_str, format='turtle') # helper functions + # FIXME: type annotation + def _fetch_value(subject: URI, predicate: rdflib.URIRef, value_factory) -> typing.Optional[typing.Any]: + """Fetch the object of a given subject and predicate. + Raises a `errors.ConsistencyError` if multiple objects match. + """ + values = list(graph.objects(rdflib.URIRef(subject), predicate)) + if len(values) == 0: + return None + if len(values) == 1: + return value_factory(values[0]) + raise errors.ConsistencyError( + f'{subject} has multiple values for predicate {str(predicate)}, expected zero or one') + def _convert(value): """Convert the subject type from rdflib to a bsfs native type.""" if isinstance(value, rdflib.Literal): return value.value if isinstance(value, rdflib.URIRef): return URI(value) - raise errors.UnreachableError(f'expected Literal or URIRef, found {typename(value)}') + # value is neither a node nor a literal, but e.g. a blank node + raise errors.BackendError(f'expected Literal or URIRef, found {typename(value)}') def _fetch_hierarchically(factory, curr): """Walk through a rdfs:subClassOf hierarchy, creating symbols along the way.""" @@ -71,30 +85,36 @@ def from_string(schema_str: str) -> schema.Schema: raise errors.ConsistencyError('inconsistent nodes') # fetch literals - literals = set(_fetch_hierarchically(types.Literal, types.ROOT_LITERAL)) + def _build_literal(uri, parent, **annotations): + """Literal factory.""" + # break out on root feature type + if uri == types.ROOT_FEATURE.uri: + return types.ROOT_FEATURE + # handle feature types + if isinstance(parent, types.Feature): + # clean annotations + annotations.pop(ns.bsfs.dimension, None) + annotations.pop(ns.bsfs.dtype, None) + annotations.pop(ns.bsfs.distance, None) + # get dimension + dimension = _fetch_value(uri, rdflib.URIRef(ns.bsfs.dimension), int) + # get dtype + dtype = _fetch_value(uri, rdflib.URIRef(ns.bsfs.dtype), URI) + # get distance + distance = _fetch_value(uri, rdflib.URIRef(ns.bsfs.distance), URI) + # return feature + return parent.child(URI(uri), dtype=dtype, dimension=dimension, distance=distance, **annotations) + # handle non-feature types + return parent.child(URI(uri), **annotations) + + literals = set(_fetch_hierarchically(_build_literal, types.ROOT_LITERAL)) literals_lut = {lit.uri: lit for lit in literals} if len(literals_lut) != len(literals): raise errors.ConsistencyError('inconsistent literals') # fetch predicates - # FIXME: type annotation - def _fetch_value(subject: URI, predicate: rdflib.URIRef, value_factory) -> typing.Optional[typing.Any]: - """Fetch the object of a given subject and predicate. - Raises a `errors.ConsistencyError` if multiple objects match. - """ - values = list(graph.objects(rdflib.URIRef(subject), predicate)) - if len(values) == 0: - return None - if len(values) == 1: - return value_factory(values[0]) - raise errors.ConsistencyError( - f'{subject} has multiple values for predicate {str(predicate)}, expected zero or one') - def _build_predicate(uri, parent, **annotations): """Predicate factory.""" - # break out on root feature type - if uri == types.ROOT_FEATURE.uri: - return types.ROOT_FEATURE # clean annotations annotations.pop(ns.rdfs.domain, None) annotations.pop(ns.rdfs.range, None) @@ -113,23 +133,9 @@ def from_string(schema_str: str) -> schema.Schema: rng = nodes_lut.get(rng, literals_lut.get(rng)) # get unique unique = _fetch_value(uri, rdflib.URIRef(ns.bsfs.unique), bool) - # handle feature types - if isinstance(parent, types.Feature): - # clean annotations - annotations.pop(ns.bsfs.dimension, None) - annotations.pop(ns.bsfs.dtype, None) - annotations.pop(ns.bsfs.distance, None) - # get dimension - dimension = _fetch_value(uri, rdflib.URIRef(ns.bsfs.dimension), int) - # get dtype - dtype = _fetch_value(uri, rdflib.URIRef(ns.bsfs.dtype), URI) - # get distance - distance = _fetch_value(uri, rdflib.URIRef(ns.bsfs.distance), URI) - # return feature - return parent.child(URI(uri), domain=dom, range=rng, unique=unique, - dtype=dtype, dimension=dimension, distance=distance, **annotations) - # handle non-feature predicate + # build predicate return parent.child(URI(uri), domain=dom, range=rng, unique=unique, **annotations) + predicates = _fetch_hierarchically(_build_predicate, types.ROOT_PREDICATE) return schema.Schema(predicates, nodes, literals) @@ -214,9 +220,12 @@ def to_string(schema_inst: schema.Schema, fmt: str = 'turtle') -> str: def _parse(node: types._Type) -> T_TRIPLE: """Emit all properties of a type.""" - if isinstance(node, types._Type): # pylint: disable=protected-access - # NOTE: all nodes are _Type - yield from _type(node) + # check arg + if not isinstance(node, types._Type): # pylint: disable=protected-access + raise TypeError(node) + # emit _Type essentials + yield from _type(node) + # emit properties of derived types if isinstance(node, types.Predicate): yield from _predicate(node) if isinstance(node, types.Feature): diff --git a/bsfs/schema/types.py b/bsfs/schema/types.py index 95dc66a..3a2e10c 100644 --- a/bsfs/schema/types.py +++ b/bsfs/schema/types.py @@ -226,10 +226,70 @@ class Node(Vertex): class Literal(Vertex): """Literal type.""" parent: typing.Optional['Literal'] - def __init__(self, uri: URI, parent: typing.Optional['Literal'] ,**kwargs): + def __init__(self, uri: URI, parent: typing.Optional['Literal'], **kwargs): super().__init__(uri, parent, **kwargs) +class Feature(Literal): + """Feature type.""" + + # Number of feature vector dimensions. + dimension: int + + # Feature vector datatype. + dtype: URI + + # Distance measure to compare feature vectors. + distance: URI + + def __init__( + self, + # Type members + uri: URI, + parent: typing.Optional[Literal], + # Feature members + dimension: int, + dtype: URI, + distance: URI, + **kwargs, + ): + super().__init__(uri, parent, **kwargs) + self.dimension = int(dimension) + self.dtype = URI(dtype) + self.distance = URI(distance) + + def __hash__(self) -> int: + return hash((super().__hash__(), self.dimension, self.dtype, self.distance)) + + def __eq__(self, other: typing.Any) -> bool: + return super().__eq__(other) \ + and self.dimension == other.dimension \ + and self.dtype == other.dtype \ + and self.distance == other.distance + + def child( + self, + uri: URI, + dimension: typing.Optional[int] = None, + dtype: typing.Optional[URI] = None, + distance: typing.Optional[URI] = None, + **kwargs, + ): + """Return a child of the current class.""" + if dimension is None: + dimension = self.dimension + if dtype is None: + dtype = self.dtype + if distance is None: + distance = self.distance + return super().child( + uri=uri, + dimension=dimension, + dtype=dtype, + distance=distance, + **kwargs, + ) + class Predicate(_Type): """Predicate base type.""" @@ -304,77 +364,6 @@ class Predicate(_Type): ) -class Feature(Predicate): - """Feature base type.""" - - # Number of feature vector dimensions. - dimension: int - - # Feature vector datatype. - dtype: URI - - # Distance measure to compare feature vectors. - distance: URI - - def __init__( - self, - # Type members - uri: URI, - parent: typing.Optional[Predicate], - # Predicate members - domain: Node, - range: Literal, # pylint: disable=redefined-builtin - unique: bool, - # Feature members - dimension: int, - dtype: URI, - distance: URI, - **kwargs, - ): - super().__init__(uri, parent, domain, range, unique, **kwargs) - self.dimension = int(dimension) - self.dtype = URI(dtype) - self.distance = URI(distance) - - def __hash__(self) -> int: - return hash((super().__hash__(), self.dimension, self.dtype, self.distance)) - - def __eq__(self, other: typing.Any) -> bool: - return super().__eq__(other) \ - and self.dimension == other.dimension \ - and self.dtype == other.dtype \ - and self.distance == other.distance - - def child( - self, - uri: URI, - domain: typing.Optional[Node] = None, - range: typing.Optional[Vertex] = None, # pylint: disable=redefined-builtin - unique: typing.Optional[bool] = None, - dimension: typing.Optional[int] = None, - dtype: typing.Optional[URI] = None, - distance: typing.Optional[URI] = None, - **kwargs, - ): - """Return a child of the current class.""" - if dimension is None: - dimension = self.dimension - if dtype is None: - dtype = self.dtype - if distance is None: - distance = self.distance - return super().child( - uri=uri, - domain=domain, - range=range, - unique=unique, - dimension=dimension, - dtype=dtype, - distance=distance, - **kwargs, - ) - - # essential vertices ROOT_VERTEX = Vertex( uri=ns.bsfs.Vertex, @@ -396,24 +385,31 @@ ROOT_NUMBER = Literal( parent=ROOT_LITERAL, ) -# essential predicates -ROOT_PREDICATE = Predicate( - uri=ns.bsfs.Predicate, - parent=None, - domain=ROOT_NODE, - range=ROOT_VERTEX, - unique=False, +ROOT_TIME = Literal( + uri=ns.bsfs.Time, + parent=ROOT_LITERAL, + ) + +ROOT_ARRAY = Literal( + uri=ns.bsfs.Array, + parent=ROOT_LITERAL, ) ROOT_FEATURE = Feature( uri=ns.bsfs.Feature, - parent=ROOT_PREDICATE, - domain=ROOT_NODE, - range=ROOT_LITERAL, - unique=False, + parent=ROOT_ARRAY, dimension=1, dtype=ns.bsfs.f16, distance=ns.bsfs.euclidean, ) +# essential predicates +ROOT_PREDICATE = Predicate( + uri=ns.bsfs.Predicate, + parent=None, + domain=ROOT_NODE, + range=ROOT_VERTEX, + unique=False, + ) + ## EOF ## -- cgit v1.2.3 From 3504609e1ba1f7f653fa79910474bebd3ec24d8a Mon Sep 17 00:00:00 2001 From: Matthias Baumgartner Date: Mon, 16 Jan 2023 21:41:20 +0100 Subject: various minor fixes --- bsfs/schema/serialize.py | 18 +++++++++++++----- 1 file changed, 13 insertions(+), 5 deletions(-) (limited to 'bsfs/schema') diff --git a/bsfs/schema/serialize.py b/bsfs/schema/serialize.py index 8b31737..acc009a 100644 --- a/bsfs/schema/serialize.py +++ b/bsfs/schema/serialize.py @@ -35,8 +35,11 @@ def from_string(schema_str: str) -> schema.Schema: graph.parse(data=schema_str, format='turtle') # helper functions - # FIXME: type annotation - def _fetch_value(subject: URI, predicate: rdflib.URIRef, value_factory) -> typing.Optional[typing.Any]: + def _fetch_value( + subject: URI, + predicate: rdflib.URIRef, + value_factory: typing.Callable[[typing.Any], typing.Any], + ) -> typing.Optional[typing.Any]: """Fetch the object of a given subject and predicate. Raises a `errors.ConsistencyError` if multiple objects match. """ @@ -242,9 +245,14 @@ def to_string(schema_inst: schema.Schema, fmt: str = 'turtle') -> str: for triple in _parse(node): graph.add(triple) # add known namespaces for readability - # FIXME: more systematically (e.g. for all in ns?) - graph.bind('bsfs', rdflib.URIRef('http://bsfs.ai/schema/')) - graph.bind('bse', rdflib.URIRef('http://bsfs.ai/schema/Entity#')) + # FIXME: more generically? + graph.bind('bse', rdflib.URIRef(ns.bse[''])) + graph.bind('bsfs', rdflib.URIRef(ns.bsfs[''])) + graph.bind('bsm', rdflib.URIRef(ns.bsm[''])) + graph.bind('rdf', rdflib.URIRef(ns.rdf[''])) + graph.bind('rdfs', rdflib.URIRef(ns.rdfs[''])) + graph.bind('schema', rdflib.URIRef(ns.schema[''])) + graph.bind('xsd', rdflib.URIRef(ns.xsd[''])) # serialize to turtle return graph.serialize(format=fmt) -- cgit v1.2.3 From 9310610a7edf4dcbb934aedcecff1d11348197bb Mon Sep 17 00:00:00 2001 From: Matthias Baumgartner Date: Sat, 21 Jan 2023 22:32:33 +0100 Subject: nodes predicate walk sugar --- bsfs/schema/schema.py | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'bsfs/schema') diff --git a/bsfs/schema/schema.py b/bsfs/schema/schema.py index 8d9a821..1644926 100644 --- a/bsfs/schema/schema.py +++ b/bsfs/schema/schema.py @@ -312,4 +312,8 @@ class Schema(): """Return the Literal matching the *uri*.""" return self._literals[uri] + def predicates_at(self, node: types.Node) -> typing.Iterator[types.Predicate]: + """Return predicates that have domain *node* (or superclass thereof).""" + return iter(pred for pred in self._predicates.values() if node <= pred.domain) + ## EOF ## -- cgit v1.2.3 From cb819b8c268908b5f6cc680173db86e172847c46 Mon Sep 17 00:00:00 2001 From: Matthias Baumgartner Date: Wed, 8 Feb 2023 20:15:41 +0100 Subject: binary blob in schema and sparql triple store --- bsfs/schema/schema.py | 1 + bsfs/schema/types.py | 5 +++++ 2 files changed, 6 insertions(+) (limited to 'bsfs/schema') diff --git a/bsfs/schema/schema.py b/bsfs/schema/schema.py index 1644926..0de4203 100644 --- a/bsfs/schema/schema.py +++ b/bsfs/schema/schema.py @@ -69,6 +69,7 @@ class Schema(): literals.add(types.ROOT_LITERAL) predicates.add(types.ROOT_PREDICATE) # add minimally necessary types to the schema + literals.add(types.ROOT_BLOB) literals.add(types.ROOT_NUMBER) literals.add(types.ROOT_TIME) literals.add(types.ROOT_ARRAY) diff --git a/bsfs/schema/types.py b/bsfs/schema/types.py index 3a2e10c..12e7e94 100644 --- a/bsfs/schema/types.py +++ b/bsfs/schema/types.py @@ -380,6 +380,11 @@ ROOT_LITERAL = Literal( parent=None, ) +ROOT_BLOB = Literal( + uri=ns.bsfs.BinaryBlob, + parent=ROOT_LITERAL, + ) + ROOT_NUMBER = Literal( uri=ns.bsfs.Number, parent=ROOT_LITERAL, -- cgit v1.2.3 From 2e07f33314c238e42bfadc5f39805f93ffbc622e Mon Sep 17 00:00:00 2001 From: Matthias Baumgartner Date: Thu, 2 Mar 2023 15:10:05 +0100 Subject: removed author and license notices from individual files --- bsfs/schema/__init__.py | 5 ----- bsfs/schema/schema.py | 5 ----- bsfs/schema/serialize.py | 5 ----- bsfs/schema/types.py | 5 ----- 4 files changed, 20 deletions(-) (limited to 'bsfs/schema') diff --git a/bsfs/schema/__init__.py b/bsfs/schema/__init__.py index f53512e..ca2e0cd 100644 --- a/bsfs/schema/__init__.py +++ b/bsfs/schema/__init__.py @@ -1,9 +1,4 @@ -""" -Part of the BlackStar filesystem (bsfs) module. -A copy of the license is provided with the project. -Author: Matthias Baumgartner, 2022 -""" # imports import typing diff --git a/bsfs/schema/schema.py b/bsfs/schema/schema.py index 0de4203..c104436 100644 --- a/bsfs/schema/schema.py +++ b/bsfs/schema/schema.py @@ -1,9 +1,4 @@ -""" -Part of the BlackStar filesystem (bsfs) module. -A copy of the license is provided with the project. -Author: Matthias Baumgartner, 2022 -""" # imports from collections import abc, namedtuple import typing diff --git a/bsfs/schema/serialize.py b/bsfs/schema/serialize.py index acc009a..b05b289 100644 --- a/bsfs/schema/serialize.py +++ b/bsfs/schema/serialize.py @@ -1,9 +1,4 @@ -""" -Part of the BlackStar filesystem (bsfs) module. -A copy of the license is provided with the project. -Author: Matthias Baumgartner, 2022 -""" # standard imports import itertools import typing diff --git a/bsfs/schema/types.py b/bsfs/schema/types.py index 12e7e94..54adffb 100644 --- a/bsfs/schema/types.py +++ b/bsfs/schema/types.py @@ -1,9 +1,4 @@ -""" -Part of the BlackStar filesystem (bsfs) module. -A copy of the license is provided with the project. -Author: Matthias Baumgartner, 2022 -""" # imports import typing -- cgit v1.2.3 From 28a021483c13e974e00b6159f0653b0727df9d10 Mon Sep 17 00:00:00 2001 From: Matthias Baumgartner Date: Thu, 2 Mar 2023 16:40:00 +0100 Subject: prohibit certain characters in URI and ensure URIs in bsfs.graph --- bsfs/schema/types.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'bsfs/schema') diff --git a/bsfs/schema/types.py b/bsfs/schema/types.py index 54adffb..104580d 100644 --- a/bsfs/schema/types.py +++ b/bsfs/schema/types.py @@ -98,7 +98,7 @@ class _Type(): parent: typing.Optional['_Type'] = None, **annotations: typing.Any, ): - self.uri = uri + self.uri = URI(uri) self.parent = parent self.annotations = annotations -- cgit v1.2.3 From 4fead04055be4967d9ea3b24ff61fe37a93108dd Mon Sep 17 00:00:00 2001 From: Matthias Baumgartner Date: Sat, 4 Mar 2023 13:31:11 +0100 Subject: namespace refactoring and cleanup --- bsfs/schema/serialize.py | 15 ++++++++------- bsfs/schema/types.py | 14 +++++++------- 2 files changed, 15 insertions(+), 14 deletions(-) (limited to 'bsfs/schema') diff --git a/bsfs/schema/serialize.py b/bsfs/schema/serialize.py index b05b289..ea8b2f4 100644 --- a/bsfs/schema/serialize.py +++ b/bsfs/schema/serialize.py @@ -241,13 +241,14 @@ def to_string(schema_inst: schema.Schema, fmt: str = 'turtle') -> str: graph.add(triple) # add known namespaces for readability # FIXME: more generically? - graph.bind('bse', rdflib.URIRef(ns.bse[''])) - graph.bind('bsfs', rdflib.URIRef(ns.bsfs[''])) - graph.bind('bsm', rdflib.URIRef(ns.bsm[''])) - graph.bind('rdf', rdflib.URIRef(ns.rdf[''])) - graph.bind('rdfs', rdflib.URIRef(ns.rdfs[''])) - graph.bind('schema', rdflib.URIRef(ns.schema[''])) - graph.bind('xsd', rdflib.URIRef(ns.xsd[''])) + graph.bind('bsfs', rdflib.URIRef(ns.bsfs + '/')) + graph.bind('bsl', rdflib.URIRef(ns.bsl + '/')) + graph.bind('bsn', rdflib.URIRef(ns.bsn + '#')) + graph.bind('bse', rdflib.URIRef(ns.bsfs.Entity() + '#')) + graph.bind('rdf', rdflib.URIRef(ns.rdf)) + graph.bind('rdfs', rdflib.URIRef(ns.rdfs)) + graph.bind('schema', rdflib.URIRef(ns.schema)) + graph.bind('xsd', rdflib.URIRef(ns.xsd)) # serialize to turtle return graph.serialize(format=fmt) diff --git a/bsfs/schema/types.py b/bsfs/schema/types.py index 104580d..5834df8 100644 --- a/bsfs/schema/types.py +++ b/bsfs/schema/types.py @@ -376,31 +376,31 @@ ROOT_LITERAL = Literal( ) ROOT_BLOB = Literal( - uri=ns.bsfs.BinaryBlob, + uri=ns.bsl.BinaryBlob, parent=ROOT_LITERAL, ) ROOT_NUMBER = Literal( - uri=ns.bsfs.Number, + uri=ns.bsl.Number, parent=ROOT_LITERAL, ) ROOT_TIME = Literal( - uri=ns.bsfs.Time, + uri=ns.bsl.Time, parent=ROOT_LITERAL, ) ROOT_ARRAY = Literal( - uri=ns.bsfs.Array, + uri=ns.bsl.Array, parent=ROOT_LITERAL, ) ROOT_FEATURE = Feature( - uri=ns.bsfs.Feature, + uri=ns.bsl.Array.Feature, parent=ROOT_ARRAY, dimension=1, - dtype=ns.bsfs.f16, - distance=ns.bsfs.euclidean, + dtype=ns.bsfs.dtype().f16, + distance=ns.bsd.euclidean, ) # essential predicates -- cgit v1.2.3