From 63fe1d017e2fad8181e3ff47185b974304957d56 Mon Sep 17 00:00:00 2001
From: Matthias Baumgartner <dev@igsor.net>
Date: Wed, 5 Apr 2023 17:16:14 +0200
Subject: IPTC tag extraction

---
 test/extractor/image/test_iptc.py |  69 ++++++++++++++++++++++++++++++++++++++
 test/lib/test_naming_policy.py    |  27 +++++++++++++++
 test/reader/test_exif.py          |  22 +++++++++++-
 test/reader/testimage_exif.jpg    | Bin 719 -> 777 bytes
 4 files changed, 117 insertions(+), 1 deletion(-)
 create mode 100644 test/extractor/image/test_iptc.py

(limited to 'test')

diff --git a/test/extractor/image/test_iptc.py b/test/extractor/image/test_iptc.py
new file mode 100644
index 0000000..5fa763d
--- /dev/null
+++ b/test/extractor/image/test_iptc.py
@@ -0,0 +1,69 @@
+
+# standard imports
+import unittest
+
+# bsie imports
+from bsie.extractor import base
+from bsie.utils import bsfs, node as _node, ns
+
+# objects to test
+from bsie.extractor.image.iptc import Iptc
+
+
+## code ##
+
+class TestIptc(unittest.TestCase):
+
+    def test_eq(self):
+        # identical instances are equal
+        self.assertEqual(Iptc(), Iptc())
+        self.assertEqual(hash(Iptc()), hash(Iptc()))
+        # comparison respects type
+        class Foo(): pass
+        self.assertNotEqual(Iptc(), Foo())
+        self.assertNotEqual(hash(Iptc()), hash(Foo()))
+        self.assertNotEqual(Iptc(), 1234)
+        self.assertNotEqual(hash(Iptc()), hash(1234))
+        self.assertNotEqual(Iptc(), None)
+        self.assertNotEqual(hash(Iptc()), hash(None))
+
+    def test_schema(self):
+        self.assertSetEqual({pred.uri for pred in Iptc().schema.predicates()}, {
+            ns.bsfs.Predicate,
+            ns.bse.tag,
+            ns.bst.label,
+            })
+
+    def test_extract(self):
+        ext = Iptc()
+        node = _node.Node(ns.bsfs.File, '') # Blank node
+        content = {
+            'Iptc.Application2.Keywords': ['hello', 'world'],
+            'Iptc.Application2.RecordVersion': '4',
+            }
+        # target tags
+        t_hello = _node.Node(ns.bsn.Tag, label='hello')
+        t_world = _node.Node(ns.bsn.Tag, label='world')
+
+        # invalid principals are ignored
+        self.assertSetEqual(set(ext.extract(node, content, {ns.bse.filename})), set())
+        # extract finds all relevant information
+        self.assertSetEqual(set(ext.extract(node, content, {ext.schema.predicate(ns.bse.tag)})), {
+            (node, ext.schema.predicate(ns.bse.tag), t_hello),
+            (node, ext.schema.predicate(ns.bse.tag), t_world),
+            (t_hello, ext.schema.predicate(ns.bst.label), 'hello'),
+            (t_world, ext.schema.predicate(ns.bst.label), 'world'),
+            })
+
+        # empty content is acceptable
+        self.assertSetEqual(set(ext.extract(node, {}, set(ext.principals))), set())
+        # no principals is acceptable
+        self.assertSetEqual(set(ext.extract(node, content, set())), set())
+
+
+## main ##
+
+if __name__ == '__main__':
+    unittest.main()
+
+## EOF ##
diff --git a/test/lib/test_naming_policy.py b/test/lib/test_naming_policy.py
index c9b0cd2..b284fc0 100644
--- a/test/lib/test_naming_policy.py
+++ b/test/lib/test_naming_policy.py
@@ -31,6 +31,10 @@ class TestDefaultNamingPolicy(unittest.TestCase):
         self.assertEqual(policy.handle_node(
             Node(ns.bsn.Preview, ucid='abc123cba', size=123)).uri,
             URI('http://example.com/me/preview#abc123cba_s123'))
+        # processes bsn:Tag
+        self.assertEqual(policy.handle_node(
+            Node(ns.bsn.Tag, label='hello')).uri,
+            URI('http://example.com/me/tag#hello'))
         # raises an exception on unknown types
         self.assertRaises(errors.ProgrammingError, policy.handle_node,
             Node(ns.bsn.Invalid, ucid='abc123cba', size=123))
@@ -71,6 +75,29 @@ class TestDefaultNamingPolicy(unittest.TestCase):
         self.assertTrue(policy.name_preview(
             Node(ns.bsn.Preview, size=200)).uri.endswith('_s200'))
 
+    def test_name_tag(self):
+        # setup
+        policy = DefaultNamingPolicy('http://example.com', 'me')
+        # name_tag uses label
+        self.assertEqual(policy.name_tag(
+            Node(ns.bsn.Tag, label='hello')).uri,
+            URI('http://example.com/me/tag#hello'))
+        # name_tag matches the label
+        self.assertEqual(
+            policy.name_tag(Node(ns.bsn.Tag, label='world')),
+            policy.name_tag(Node(ns.bsn.Tag, label='world')),
+            )
+        self.assertNotEqual(
+            policy.name_tag(Node(ns.bsn.Tag, label='hello')),
+            policy.name_tag(Node(ns.bsn.Tag, label='world')),
+            )
+        # label can include characters that are not valid for an uri
+        self.assertEqual(policy.name_tag(
+            Node(ns.bsn.Preview, label='hello world { foo bar ] ')).uri,
+            URI('http://example.com/me/tag#hello%20world%20%7B%20foo%20bar%20%5D%20'))
+        # name_tag falls back to a random guid
+        self.assertTrue(policy.name_tag(
+            Node(ns.bsn.Tag,)).uri.startswith('http://example.com/me/tag#'))
 
 class TestNamingPolicyIterator(unittest.TestCase):
 
diff --git a/test/reader/test_exif.py b/test/reader/test_exif.py
index de6e801..1767f12 100644
--- a/test/reader/test_exif.py
+++ b/test/reader/test_exif.py
@@ -10,7 +10,7 @@ import pyexiv2
 from bsie.utils import errors
 
 # objects to test
-from bsie.reader.exif import Exif
+from bsie.reader.exif import Exif, Iptc
 
 
 ## code ##
@@ -44,6 +44,26 @@ class TestExif(unittest.TestCase):
             })
 
 
+class TestIptc(unittest.TestCase):
+    def test_call(self):
+        rdr = Iptc()
+        # discards non-image files
+        self.assertRaises(errors.UnsupportedFileFormatError, rdr,
+            os.path.join(os.path.dirname(__file__), 'invalid.doc'))
+        # raises on invalid image files
+        self.assertRaises(errors.UnsupportedFileFormatError, rdr,
+            os.path.join(os.path.dirname(__file__), 'invalid.jpg'))
+        # raises on invalid image files
+        pyexiv2.set_log_level(3) # suppress log message
+        self.assertRaises(errors.ReaderError, rdr,
+            os.path.join(os.path.dirname(__file__), 'testimage_exif_corrupted.jpg'))
+        # returns dict with exif info
+        self.assertDictEqual(rdr(os.path.join(os.path.dirname(__file__), 'testimage_exif.jpg')), {
+            'Iptc.Application2.Keywords': ['hello', 'world'],
+            'Iptc.Application2.RecordVersion': '4',
+            })
+
+
 ## main ##
 
 if __name__ == '__main__':
diff --git a/test/reader/testimage_exif.jpg b/test/reader/testimage_exif.jpg
index a774bc2..bc331ac 100644
Binary files a/test/reader/testimage_exif.jpg and b/test/reader/testimage_exif.jpg differ
-- 
cgit v1.2.3