From 63fe1d017e2fad8181e3ff47185b974304957d56 Mon Sep 17 00:00:00 2001 From: Matthias Baumgartner Date: Wed, 5 Apr 2023 17:16:14 +0200 Subject: IPTC tag extraction --- test/extractor/image/test_iptc.py | 69 ++++++++++++++++++++++++++++++++++++++ test/lib/test_naming_policy.py | 27 +++++++++++++++ test/reader/test_exif.py | 22 +++++++++++- test/reader/testimage_exif.jpg | Bin 719 -> 777 bytes 4 files changed, 117 insertions(+), 1 deletion(-) create mode 100644 test/extractor/image/test_iptc.py (limited to 'test') diff --git a/test/extractor/image/test_iptc.py b/test/extractor/image/test_iptc.py new file mode 100644 index 0000000..5fa763d --- /dev/null +++ b/test/extractor/image/test_iptc.py @@ -0,0 +1,69 @@ + +# standard imports +import unittest + +# bsie imports +from bsie.extractor import base +from bsie.utils import bsfs, node as _node, ns + +# objects to test +from bsie.extractor.image.iptc import Iptc + + +## code ## + +class TestIptc(unittest.TestCase): + + def test_eq(self): + # identical instances are equal + self.assertEqual(Iptc(), Iptc()) + self.assertEqual(hash(Iptc()), hash(Iptc())) + # comparison respects type + class Foo(): pass + self.assertNotEqual(Iptc(), Foo()) + self.assertNotEqual(hash(Iptc()), hash(Foo())) + self.assertNotEqual(Iptc(), 1234) + self.assertNotEqual(hash(Iptc()), hash(1234)) + self.assertNotEqual(Iptc(), None) + self.assertNotEqual(hash(Iptc()), hash(None)) + + def test_schema(self): + self.assertSetEqual({pred.uri for pred in Iptc().schema.predicates()}, { + ns.bsfs.Predicate, + ns.bse.tag, + ns.bst.label, + }) + + def test_extract(self): + ext = Iptc() + node = _node.Node(ns.bsfs.File, '') # Blank node + content = { + 'Iptc.Application2.Keywords': ['hello', 'world'], + 'Iptc.Application2.RecordVersion': '4', + } + # target tags + t_hello = _node.Node(ns.bsn.Tag, label='hello') + t_world = _node.Node(ns.bsn.Tag, label='world') + + # invalid principals are ignored + self.assertSetEqual(set(ext.extract(node, content, {ns.bse.filename})), set()) + # extract finds all relevant information + self.assertSetEqual(set(ext.extract(node, content, {ext.schema.predicate(ns.bse.tag)})), { + (node, ext.schema.predicate(ns.bse.tag), t_hello), + (node, ext.schema.predicate(ns.bse.tag), t_world), + (t_hello, ext.schema.predicate(ns.bst.label), 'hello'), + (t_world, ext.schema.predicate(ns.bst.label), 'world'), + }) + + # empty content is acceptable + self.assertSetEqual(set(ext.extract(node, {}, set(ext.principals))), set()) + # no principals is acceptable + self.assertSetEqual(set(ext.extract(node, content, set())), set()) + + +## main ## + +if __name__ == '__main__': + unittest.main() + +## EOF ## diff --git a/test/lib/test_naming_policy.py b/test/lib/test_naming_policy.py index c9b0cd2..b284fc0 100644 --- a/test/lib/test_naming_policy.py +++ b/test/lib/test_naming_policy.py @@ -31,6 +31,10 @@ class TestDefaultNamingPolicy(unittest.TestCase): self.assertEqual(policy.handle_node( Node(ns.bsn.Preview, ucid='abc123cba', size=123)).uri, URI('http://example.com/me/preview#abc123cba_s123')) + # processes bsn:Tag + self.assertEqual(policy.handle_node( + Node(ns.bsn.Tag, label='hello')).uri, + URI('http://example.com/me/tag#hello')) # raises an exception on unknown types self.assertRaises(errors.ProgrammingError, policy.handle_node, Node(ns.bsn.Invalid, ucid='abc123cba', size=123)) @@ -71,6 +75,29 @@ class TestDefaultNamingPolicy(unittest.TestCase): self.assertTrue(policy.name_preview( Node(ns.bsn.Preview, size=200)).uri.endswith('_s200')) + def test_name_tag(self): + # setup + policy = DefaultNamingPolicy('http://example.com', 'me') + # name_tag uses label + self.assertEqual(policy.name_tag( + Node(ns.bsn.Tag, label='hello')).uri, + URI('http://example.com/me/tag#hello')) + # name_tag matches the label + self.assertEqual( + policy.name_tag(Node(ns.bsn.Tag, label='world')), + policy.name_tag(Node(ns.bsn.Tag, label='world')), + ) + self.assertNotEqual( + policy.name_tag(Node(ns.bsn.Tag, label='hello')), + policy.name_tag(Node(ns.bsn.Tag, label='world')), + ) + # label can include characters that are not valid for an uri + self.assertEqual(policy.name_tag( + Node(ns.bsn.Preview, label='hello world { foo bar ] ')).uri, + URI('http://example.com/me/tag#hello%20world%20%7B%20foo%20bar%20%5D%20')) + # name_tag falls back to a random guid + self.assertTrue(policy.name_tag( + Node(ns.bsn.Tag,)).uri.startswith('http://example.com/me/tag#')) class TestNamingPolicyIterator(unittest.TestCase): diff --git a/test/reader/test_exif.py b/test/reader/test_exif.py index de6e801..1767f12 100644 --- a/test/reader/test_exif.py +++ b/test/reader/test_exif.py @@ -10,7 +10,7 @@ import pyexiv2 from bsie.utils import errors # objects to test -from bsie.reader.exif import Exif +from bsie.reader.exif import Exif, Iptc ## code ## @@ -44,6 +44,26 @@ class TestExif(unittest.TestCase): }) +class TestIptc(unittest.TestCase): + def test_call(self): + rdr = Iptc() + # discards non-image files + self.assertRaises(errors.UnsupportedFileFormatError, rdr, + os.path.join(os.path.dirname(__file__), 'invalid.doc')) + # raises on invalid image files + self.assertRaises(errors.UnsupportedFileFormatError, rdr, + os.path.join(os.path.dirname(__file__), 'invalid.jpg')) + # raises on invalid image files + pyexiv2.set_log_level(3) # suppress log message + self.assertRaises(errors.ReaderError, rdr, + os.path.join(os.path.dirname(__file__), 'testimage_exif_corrupted.jpg')) + # returns dict with exif info + self.assertDictEqual(rdr(os.path.join(os.path.dirname(__file__), 'testimage_exif.jpg')), { + 'Iptc.Application2.Keywords': ['hello', 'world'], + 'Iptc.Application2.RecordVersion': '4', + }) + + ## main ## if __name__ == '__main__': diff --git a/test/reader/testimage_exif.jpg b/test/reader/testimage_exif.jpg index a774bc2..bc331ac 100644 Binary files a/test/reader/testimage_exif.jpg and b/test/reader/testimage_exif.jpg differ -- cgit v1.2.3