test/utils/test_uuid.py


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92

"""

Part of the tagit test suite.
A copy of the license is provided with the project.
Author: Matthias Baumgartner, 2022
"""
# imports
import os
import re
import unittest

# objects to test
from bsfs.utils.uuid import UUID, UCID


## code ##

class TestUUID(unittest.TestCase):
    """Test the UUID generator.

    The UUID is expected to generate random strings of 64 characters(0-9, A-F, case insensitive).
    Due to the random nature of UUIDs, we cannot actually check if an uid is 'valid' besides
    matching the expected format.

    At best, we can check if the number of collisions (values generated repeatedly) is below some
    threshold. One would expect the number of collisions to increase with the number of generated uids.
    Hence, we only perform an empirical test, whereas the exact test parameters (NUM_SAMPLES,
    COLLISIONS_THRESHOLD) are subject to the application requirements. Note that this simple test
    cannot replace a thorough statistical analysis.

    """

    # expected uuid string format
    _RX_FORMAT = re.compile('[0-9A-Fa-f]{64}')

    # number of uuids to generate for collisions test
    _NUM_SAMPLES = 100_000

    # number of permitted collisions (less-than test; exclusive)
    _COLLISIONS_THRESHOLD = 2 # zero or one collisions to pass the test

    def _test_format(self, uid):
        self.assertIsInstance(uid, str)
        self.assertTrue(self._RX_FORMAT.fullmatch(uid) is not None)

    def test_call(self):
        gen = UUID()
        # w/o content
        self._test_format(gen())
        # with content
        self._test_format(gen('hello world'))

    def test_iter(self):
        for _, uid in zip(range(1_000), iter(UUID())):
            self._test_format(uid)

    def test_next(self):
        gen = UUID()
        for _ in range(1_000):
            uid = next(gen)
            self._test_format(uid)

    def test_collisions(self):
        # generated uuids are reasonably unique.
        # Note that we cannot guarantee no collisions.
        uids = {uid for _, uid in zip(range(self._NUM_SAMPLES), UUID())}
        self.assertGreater(len(uids), self._NUM_SAMPLES - self._COLLISIONS_THRESHOLD)
        # uuids are reasonably unique across instances
        uidA = {uid for _, uid in zip(range(self._NUM_SAMPLES), UUID())}
        uidB = {uid for _, uid in zip(range(self._NUM_SAMPLES), UUID())}
        self.assertLess(len(uidA & uidB), self._COLLISIONS_THRESHOLD)
        # uuids are reasonably unique despite identical seeds.
        uidA = {uid for _, uid in zip(range(self._NUM_SAMPLES), UUID(seed=123))}
        uidB = {uid for _, uid in zip(range(self._NUM_SAMPLES), UUID(seed=123))}
        self.assertLess(len(uidA & uidB), self._COLLISIONS_THRESHOLD)


class TestUCID(unittest.TestCase):
    def setUp(self):
        self._checksum = 'a948904f2f0f479b8f8197694b30184b0d2ed1c1cd2a1ec0fb85d299a192a447' # sha256
        self._path = os.path.join(os.path.dirname(__file__), 'testfile.t')

    def test_from_path(self):
        self.assertEqual(UCID.from_path(self._path), self._checksum)


## main ##

if __name__ == '__main__':
    unittest.main()

## EOF ##