1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
|
"""
Part of the tagit test suite.
A copy of the license is provided with the project.
Author: Matthias Baumgartner, 2022
"""
# imports
import os
import re
import unittest
# objects to test
from bsfs.utils.uuid import UUID, UCID
## code ##
class TestUUID(unittest.TestCase):
"""Test the UUID generator.
The UUID is expected to generate random strings of 64 characters(0-9, A-F, case insensitive).
Due to the random nature of UUIDs, we cannot actually check if an uid is 'valid' besides
matching the expected format.
At best, we can check if the number of collisions (values generated repeatedly) is below some
threshold. One would expect the number of collisions to increase with the number of generated uids.
Hence, we only perform an empirical test, whereas the exact test parameters (NUM_SAMPLES,
COLLISIONS_THRESHOLD) are subject to the application requirements. Note that this simple test
cannot replace a thorough statistical analysis.
"""
# expected uuid string format
_RX_FORMAT = re.compile('[0-9A-Fa-f]{64}')
# number of uuids to generate for collisions test
_NUM_SAMPLES = 100_000
# number of permitted collisions (less-than test; exclusive)
_COLLISIONS_THRESHOLD = 2 # zero or one collisions to pass the test
def _test_format(self, uid):
self.assertIsInstance(uid, str)
self.assertTrue(self._RX_FORMAT.fullmatch(uid) is not None)
def test_call(self):
gen = UUID()
# w/o content
self._test_format(gen())
# with content
self._test_format(gen('hello world'))
def test_iter(self):
for _, uid in zip(range(1_000), iter(UUID())):
self._test_format(uid)
def test_next(self):
gen = UUID()
for _ in range(1_000):
uid = next(gen)
self._test_format(uid)
def test_collisions(self):
# generated uuids are reasonably unique.
# Note that we cannot guarantee no collisions.
uids = {uid for _, uid in zip(range(self._NUM_SAMPLES), UUID())}
self.assertGreater(len(uids), self._NUM_SAMPLES - self._COLLISIONS_THRESHOLD)
# uuids are reasonably unique across instances
uidA = {uid for _, uid in zip(range(self._NUM_SAMPLES), UUID())}
uidB = {uid for _, uid in zip(range(self._NUM_SAMPLES), UUID())}
self.assertLess(len(uidA & uidB), self._COLLISIONS_THRESHOLD)
# uuids are reasonably unique despite identical seeds.
uidA = {uid for _, uid in zip(range(self._NUM_SAMPLES), UUID(seed=123))}
uidB = {uid for _, uid in zip(range(self._NUM_SAMPLES), UUID(seed=123))}
self.assertLess(len(uidA & uidB), self._COLLISIONS_THRESHOLD)
class TestUCID(unittest.TestCase):
def setUp(self):
self._checksum = 'a948904f2f0f479b8f8197694b30184b0d2ed1c1cd2a1ec0fb85d299a192a447' # sha256
self._path = os.path.join(os.path.dirname(__file__), 'testfile.t')
def test_from_path(self):
self.assertEqual(UCID.from_path(self._path), self._checksum)
## main ##
if __name__ == '__main__':
unittest.main()
## EOF ##
|