aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorMatthias Baumgartner <dev@igsor.net>2022-12-08 16:35:20 +0100
committerMatthias Baumgartner <dev@igsor.net>2022-12-08 16:35:20 +0100
commit547aa08b1f05ec0cdf725c34a7b1d1512b694063 (patch)
tree0bc59fb50af55b681da77540728cb557ee544496
parent7eb61d117a995b076d36c55d2c7c268665360813 (diff)
downloadbsfs-547aa08b1f05ec0cdf725c34a7b1d1512b694063.tar.gz
bsfs-547aa08b1f05ec0cdf725c34a7b1d1512b694063.tar.bz2
bsfs-547aa08b1f05ec0cdf725c34a7b1d1512b694063.zip
remaining essentials: uuid, errors
-rw-r--r--bsfs/utils/__init__.py5
-rw-r--r--bsfs/utils/errors.py38
-rw-r--r--bsfs/utils/uuid.py108
-rw-r--r--test/utils/test_uuid.py92
-rw-r--r--test/utils/testfile.t1
5 files changed, 244 insertions, 0 deletions
diff --git a/bsfs/utils/__init__.py b/bsfs/utils/__init__.py
index 56a9323..94680ee 100644
--- a/bsfs/utils/__init__.py
+++ b/bsfs/utils/__init__.py
@@ -8,12 +8,17 @@ Author: Matthias Baumgartner, 2022
import typing
# inner-module imports
+from . import errors
from .commons import typename
from .uri import URI
+from .uuid import UUID, UCID
# exports
__all__ : typing.Sequence[str] = (
+ 'UCID',
'URI',
+ 'UUID',
+ 'errors',
'typename',
)
diff --git a/bsfs/utils/errors.py b/bsfs/utils/errors.py
new file mode 100644
index 0000000..04561a2
--- /dev/null
+++ b/bsfs/utils/errors.py
@@ -0,0 +1,38 @@
+"""
+
+Part of the BlackStar filesystem (bsfs) module.
+A copy of the license is provided with the project.
+Author: Matthias Baumgartner, 2022
+"""
+# imports
+import typing
+
+# exports
+__all__: typing.Sequence[str] = (
+ )
+
+
+## code ##
+
+class _BSFSError(Exception):
+ """Generic bsfs error."""
+
+class SchemaError(_BSFSError):
+ """Generic schema errios."""
+
+class ConsistencyError(SchemaError):
+ """A requested operation is inconsistent with the schema."""
+
+class InstanceError(SchemaError):
+ """An instance affected by some operation is inconsistent with the schema."""
+
+class PermissionDeniedError(_BSFSError):
+ """An operation was aborted due to access control restrictions."""
+
+class ProgrammingError(_BSFSError):
+ """An assertion-like error that indicates a code-base issue."""
+
+class UnreachableError(ProgrammingError):
+ """Bravo, you've reached a point in code that should logically not be reachable."""
+
+## EOF ##
diff --git a/bsfs/utils/uuid.py b/bsfs/utils/uuid.py
new file mode 100644
index 0000000..7c39128
--- /dev/null
+++ b/bsfs/utils/uuid.py
@@ -0,0 +1,108 @@
+"""
+
+Part of the BlackStar filesystem (bsfs) module.
+A copy of the license is provided with the project.
+Author: Matthias Baumgartner, 2022
+"""
+# imports
+from collections import abc
+import hashlib
+import os
+import platform
+import random
+import threading
+import time
+import typing
+import uuid
+
+# constants
+HASH = hashlib.sha256
+
+# exports
+__all__: typing.Sequence[str] = [
+ 'UCID',
+ 'UUID',
+ ]
+
+
+## code ##
+
+class UUID(abc.Iterator, abc.Callable):
+ """Generate 256-bit universally unique IDs.
+
+ This is a 'best-effort' kind of implementation that tries to ensure global
+ uniqueness, even tough actual uniqueness cannot be guaranteed.
+ The approach is different from python's uuid module (which implements
+ RFC 4122) in that it generates longer UUIDs and in that it cannot be
+ reconstructed whether two UUIDs were generated on the same system.
+
+ The ID is a cryptographic hash over several components:
+ * host
+ * system
+ * process
+ * thread
+ * random
+ * time
+ * cpu cycles
+ * content (if available)
+
+ """
+
+ # host identifier
+ host: str
+
+ # system identifier
+ system: str
+
+ # process identifier
+ process: str
+
+ # thread identifier
+ thread: str
+
+ def __init__(self, seed: typing.Optional[int] = None):
+ # initialize static components
+ self.host = str(uuid.getnode())
+ self.system = '-'.join(platform.uname())
+ self.process = str(os.getpid())
+ self.thread = str(threading.get_ident())
+ # initialize random component
+ random.seed(seed)
+
+ def __call__(self, content: typing.Optional[str] = None) -> str:
+ """Return a globally unique ID."""
+ # content component
+ content = str(content) if content is not None else ''
+ # time component
+ now = str(time.time())
+ # clock component
+ clk = str(time.perf_counter())
+ # random component
+ rnd = str(random.random())
+ # build the token from all available components
+ token = self.host + self.system + self.process + self.thread + rnd + now + clk + content
+ # return the token's hash
+ return HASH(token.encode('ascii', 'ignore')).hexdigest()
+
+ def __iter__(self) -> typing.Iterator[str]:
+ """Iterate indefinitely over universally unique IDs."""
+ return self
+
+ def __next__(self) -> str:
+ """Generate universally unique IDs."""
+ return self()
+
+
+class UCID(abc.Callable):
+ """Generate 256-bit content IDs.
+
+ Effectively computes a cryptographic hash over the content.
+
+ """
+ @staticmethod
+ def from_path(path: str) -> str:
+ """Read the content from a file."""
+ with open(path, 'rb') as ifile:
+ return HASH(ifile.read()).hexdigest()
+
+## EOF ##
diff --git a/test/utils/test_uuid.py b/test/utils/test_uuid.py
new file mode 100644
index 0000000..49176d4
--- /dev/null
+++ b/test/utils/test_uuid.py
@@ -0,0 +1,92 @@
+"""
+
+Part of the tagit test suite.
+A copy of the license is provided with the project.
+Author: Matthias Baumgartner, 2022
+"""
+# imports
+import os
+import re
+import unittest
+
+# objects to test
+from bsfs.utils.uuid import UUID, UCID
+
+
+## code ##
+
+class TestUUID(unittest.TestCase):
+ """Test the UUID generator.
+
+ The UUID is expected to generate random strings of 64 characters(0-9, A-F, case insensitive).
+ Due to the random nature of UUIDs, we cannot actually check if an uid is 'valid' besides
+ matching the expected format.
+
+ At best, we can check if the number of collisions (values generated repeatedly) is below some
+ threshold. One would expect the number of collisions to increase with the number of generated uids.
+ Hence, we only perform an empirical test, whereas the exact test parameters (NUM_SAMPLES,
+ COLLISIONS_THRESHOLD) are subject to the application requirements. Note that this simple test
+ cannot replace a thorough statistical analysis.
+
+ """
+
+ # expected uuid string format
+ _RX_FORMAT = re.compile('[0-9A-Fa-f]{64}')
+
+ # number of uuids to generate for collisions test
+ _NUM_SAMPLES = 100_000
+
+ # number of permitted collisions (less-than test; exclusive)
+ _COLLISIONS_THRESHOLD = 2 # zero or one collisions to pass the test
+
+ def _test_format(self, uid):
+ self.assertIsInstance(uid, str)
+ self.assertTrue(self._RX_FORMAT.fullmatch(uid) is not None)
+
+ def test_call(self):
+ gen = UUID()
+ # w/o content
+ self._test_format(gen())
+ # with content
+ self._test_format(gen('hello world'))
+
+ def test_iter(self):
+ for _, uid in zip(range(1_000), iter(UUID())):
+ self._test_format(uid)
+
+ def test_next(self):
+ gen = UUID()
+ for _ in range(1_000):
+ uid = next(gen)
+ self._test_format(uid)
+
+ def test_collisions(self):
+ # generated uuids are reasonably unique.
+ # Note that we cannot guarantee no collisions.
+ uids = {uid for _, uid in zip(range(self._NUM_SAMPLES), UUID())}
+ self.assertGreater(len(uids), self._NUM_SAMPLES - self._COLLISIONS_THRESHOLD)
+ # uuids are reasonably unique across instances
+ uidA = {uid for _, uid in zip(range(self._NUM_SAMPLES), UUID())}
+ uidB = {uid for _, uid in zip(range(self._NUM_SAMPLES), UUID())}
+ self.assertLess(len(uidA & uidB), self._COLLISIONS_THRESHOLD)
+ # uuids are reasonably unique despite identical seeds.
+ uidA = {uid for _, uid in zip(range(self._NUM_SAMPLES), UUID(seed=123))}
+ uidB = {uid for _, uid in zip(range(self._NUM_SAMPLES), UUID(seed=123))}
+ self.assertLess(len(uidA & uidB), self._COLLISIONS_THRESHOLD)
+
+
+class TestUCID(unittest.TestCase):
+ def setUp(self):
+ self._checksum = 'a948904f2f0f479b8f8197694b30184b0d2ed1c1cd2a1ec0fb85d299a192a447' # sha256
+ self._path = os.path.join(os.path.dirname(__file__), 'testfile.t')
+
+ def test_from_path(self):
+ self.assertEqual(UCID.from_path(self._path), self._checksum)
+
+
+## main ##
+
+if __name__ == '__main__':
+ unittest.main()
+
+## EOF ##
diff --git a/test/utils/testfile.t b/test/utils/testfile.t
new file mode 100644
index 0000000..3b18e51
--- /dev/null
+++ b/test/utils/testfile.t
@@ -0,0 +1 @@
+hello world