aboutsummaryrefslogtreecommitdiffstats
path: root/bsfs/triple_store/base.py
blob: 28ebb86c9f8a53a9d2de9d7c61b28fd44fa91242 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
"""

Part of the BlackStar filesystem (bsfs) module.
A copy of the license is provided with the project.
Author: Matthias Baumgartner, 2022
"""
# imports
import abc
import typing

# inner-module imports
from bsfs.utils import URI, typename
import bsfs.schema  as _schema

# exports
__all__: typing.Sequence[str] = (
    'TripleStoreBase',
    )


## code ##

class TripleStoreBase(abc.ABC):
    """TripleStore base class.

    Use the `Open` method to create a new instance and to initialize
    the required structures.

    Triple stores express a graph via its (subject, predicate, object) triples.
    They provides methods to add and remove triples, and to query the storage
    for given graph structures. The subject is always a node in the graph,
    whereas nodes are identifiable by a unique URI. Note that blank nodes
    (without an explicit URI) are not supported. The object can be another
    Node or a Literal value. The relation between a subject and an object
    is expressed via a Predicate. The graph structures are governed by a
    schema that defines which Node, Literal, and Predicate classes exist
    and how they can interact (see `bsfs.schema.Schema`).

    """

    # storage's URI. None implies a temporary location.
    uri: typing.Optional[URI] = None

    def __init__(self, uri: typing.Optional[URI] = None):
        self.uri = uri

    def __hash__(self) -> int:
        uri = self.uri if self.uri is not None else id(self)
        return hash((type(self), uri))

    def __eq__(self, other) -> bool:
        return isinstance(other, type(self)) \
           and (( self.uri is not None \
           and    other.uri is not None \
           and    self.uri == other.uri ) \
            or  id(self) == id(other))

    def __repr__(self) -> str:
        return f'{typename(self)}(uri={self.uri})'

    def __str__(self) -> str:
        return f'{typename(self)}(uri={self.uri})'

    def is_persistent(self) -> bool:
        """Return True if data is stored persistently."""
        return self.uri is not None


    @classmethod
    @abc.abstractmethod
    def Open(cls, **kwargs: typing.Any) -> 'TripleStoreBase': # pylint: disable=invalid-name # capitalized classmethod
        """Return a TripleStoreBase instance connected to *uri*."""

    @abc.abstractmethod
    def commit(self):
        """Commit the current transaction."""

    @abc.abstractmethod
    def rollback(self):
        """Undo changes since the last commit."""

    @property
    @abc.abstractmethod
    def schema(self) -> _schema.Schema:
        """Return the store's local schema."""

    @schema.setter
    @abc.abstractmethod
    def schema(self, schema: _schema.Schema):
        """Migrate to new schema by adding or removing class definitions.

        Commits before and after the migration.

        Instances of removed classes will be deleted irreversably.
        Note that modifying an existing class is not directly supported.
        Also, it is generally discouraged, since changing definitions may
        lead to inconsistencies across multiple clients in a distributed
        setting. Instead, consider introducing a new class under its own
        uri. Such a migration would look as follows:

        1. Add new class definitions.
        2. Create instances of the new classes and copy relevant data.
        3. Remove the old definitions.

        To modify a class, i.e., re-use a previous uri with a new
        class definition, you would have to migrate via temporary
        class definitions, and thus repeat the above procedure two times.

        """

    @abc.abstractmethod
    def get(
            self,
            node_type: bsc.Node,
            query: ast.filter.FilterExpression,
            ) -> typing.Iterator[URI]:
        """Return guids of nodes of type *node_type* that match the *query*."""

    @abc.abstractmethod
    def exists(
            self,
            node_type: _schema.Node,
            guids: typing.Iterable[URI],
            ) -> typing.Iterable[URI]:
        """Return those *guids* that exist and have type *node_type* or a subclass thereof."""

    @abc.abstractmethod
    def create(
            self,
            node_type: _schema.Node,
            guids: typing.Iterable[URI],
            ):
        """Create *guid* nodes with type *subject*."""

    @abc.abstractmethod
    def set(
            self,
            node_type: _schema.Node, # FIXME: is the node_type even needed? Couldn't I infer from the predicate?
            guids: typing.Iterable[URI],
            predicate: _schema.Predicate,
            values: typing.Iterable[typing.Any],
            ):
        """Add triples to the graph.

        It is assumed that all of *guids* exist and have *node_type*.
        This method adds a triple (guid, predicate, value) for every guid in
        *guids* and each value in *values* (cartesian product). Note that
        *values* must have length one for unique predicates, and that
        currently existing values will be overwritten in this case.
        It also verifies that all symbols are part of the schema and that
        the *predicate* matches the *node_type*.
        Raises `bsfs.errors.ConsistencyError` if these assumptions are violated.

        """

## EOF ##