2 files changed, 93 insertions, 0 deletions
diff --git a/test/utils/test_uuid.py b/test/utils/test_uuid.py
new file mode 100644
index 0000000..49176d4
--- /dev/null
+++ b/test/utils/test_uuid.py
@@ -0,0 +1,92 @@
+"""
+
+Part of the tagit test suite.
+A copy of the license is provided with the project.
+Author: Matthias Baumgartner, 2022
+"""
+# imports
+import os
+import re
+import unittest
+
+# objects to test
+from bsfs.utils.uuid import UUID, UCID
+
+
+## code ##
+
+class TestUUID(unittest.TestCase):
+    """Test the UUID generator.
+
+    The UUID is expected to generate random strings of 64 characters(0-9, A-F, case insensitive).
+    Due to the random nature of UUIDs, we cannot actually check if an uid is 'valid' besides
+    matching the expected format.
+
+    At best, we can check if the number of collisions (values generated repeatedly) is below some
+    threshold. One would expect the number of collisions to increase with the number of generated uids.
+    Hence, we only perform an empirical test, whereas the exact test parameters (NUM_SAMPLES,
+    COLLISIONS_THRESHOLD) are subject to the application requirements. Note that this simple test
+    cannot replace a thorough statistical analysis.
+
+    """
+
+    # expected uuid string format
+    _RX_FORMAT = re.compile('[0-9A-Fa-f]{64}')
+
+    # number of uuids to generate for collisions test
+    _NUM_SAMPLES = 100_000
+
+    # number of permitted collisions (less-than test; exclusive)
+    _COLLISIONS_THRESHOLD = 2 # zero or one collisions to pass the test
+
+    def _test_format(self, uid):
+        self.assertIsInstance(uid, str)
+        self.assertTrue(self._RX_FORMAT.fullmatch(uid) is not None)
+
+    def test_call(self):
+        gen = UUID()
+        # w/o content
+        self._test_format(gen())
+        # with content
+        self._test_format(gen('hello world'))
+
+    def test_iter(self):
+        for _, uid in zip(range(1_000), iter(UUID())):
+            self._test_format(uid)
+
+    def test_next(self):
+        gen = UUID()
+        for _ in range(1_000):
+            uid = next(gen)
+            self._test_format(uid)
+
+    def test_collisions(self):
+        # generated uuids are reasonably unique.
+        # Note that we cannot guarantee no collisions.
+        uids = {uid for _, uid in zip(range(self._NUM_SAMPLES), UUID())}
+        self.assertGreater(len(uids), self._NUM_SAMPLES - self._COLLISIONS_THRESHOLD)
+        # uuids are reasonably unique across instances
+        uidA = {uid for _, uid in zip(range(self._NUM_SAMPLES), UUID())}
+        uidB = {uid for _, uid in zip(range(self._NUM_SAMPLES), UUID())}
+        self.assertLess(len(uidA & uidB), self._COLLISIONS_THRESHOLD)
+        # uuids are reasonably unique despite identical seeds.
+        uidA = {uid for _, uid in zip(range(self._NUM_SAMPLES), UUID(seed=123))}
+        uidB = {uid for _, uid in zip(range(self._NUM_SAMPLES), UUID(seed=123))}
+        self.assertLess(len(uidA & uidB), self._COLLISIONS_THRESHOLD)
+
+
+class TestUCID(unittest.TestCase):
+    def setUp(self):
+        self._checksum = 'a948904f2f0f479b8f8197694b30184b0d2ed1c1cd2a1ec0fb85d299a192a447' # sha256
+        self._path = os.path.join(os.path.dirname(__file__), 'testfile.t')
+
+    def test_from_path(self):
+        self.assertEqual(UCID.from_path(self._path), self._checksum)
+
+
+## main ##
+
+if __name__ == '__main__':
+    unittest.main()
+
+## EOF ##
diff --git a/test/utils/testfile.t b/test/utils/testfile.t
new file mode 100644
index 0000000..3b18e51
--- /dev/null
+++ b/test/utils/testfile.t
@@ -0,0 +1 @@
+hello world