diff options
author | Matthias Baumgartner <dev@igsor.net> | 2023-03-01 21:38:09 +0100 |
---|---|---|
committer | Matthias Baumgartner <dev@igsor.net> | 2023-03-01 21:38:09 +0100 |
commit | ec9105b690974b0246e36769506e735c4edf069a (patch) | |
tree | c79a3a4489baae55fb74d84714ed728b79e50784 | |
parent | 464cc6cb54f55f6255bf0a485533c181d6018303 (diff) | |
download | bsie-ec9105b690974b0246e36769506e735c4edf069a.tar.gz bsie-ec9105b690974b0246e36769506e735c4edf069a.tar.bz2 bsie-ec9105b690974b0246e36769506e735c4edf069a.zip |
Exif data reader and extractor
-rw-r--r-- | bsie/apps/default_config.yaml | 8 | ||||
-rw-r--r-- | bsie/extractor/image/photometrics.py | 219 | ||||
-rw-r--r-- | bsie/reader/exif.py | 49 | ||||
-rw-r--r-- | test/extractor/image/test_photometrics.py | 138 | ||||
-rw-r--r-- | test/reader/test_exif.py | 48 | ||||
-rw-r--r-- | test/reader/testimage_exif.jpg | bin | 0 -> 719 bytes |
6 files changed, 459 insertions, 3 deletions
diff --git a/bsie/apps/default_config.yaml b/bsie/apps/default_config.yaml index 4d99e22..a59b0f3 100644 --- a/bsie/apps/default_config.yaml +++ b/bsie/apps/default_config.yaml @@ -11,7 +11,9 @@ ExtractorBuilder: - bsie.extractor.generic.stat.Stat: {} - bsie.extractor.image.colors_spatial.ColorsSpatial: - width: 2 - height: 2 - exp: 2 + width: 32 + height: 32 + exp: 4 + + - bsie.extractor.image.photometrics.Exif: {} diff --git a/bsie/extractor/image/photometrics.py b/bsie/extractor/image/photometrics.py new file mode 100644 index 0000000..ae0a541 --- /dev/null +++ b/bsie/extractor/image/photometrics.py @@ -0,0 +1,219 @@ +""" + +Part of the bsie module. +A copy of the license is provided with the project. +Author: Matthias Baumgartner, 2022 +""" +# standard imports +from fractions import Fraction +import typing + +# bsie imports +from bsie.utils import bsfs, node, ns + +# inner-module imports +from .. import base + +# exports +__all__: typing.Sequence[str] = ( + 'Exif', + ) + + +## code ## + +def _gps_to_dec(coords: typing.Tuple[float, float, float]) -> float: + """Convert GPS coordinates from exif to float.""" + # unpack args + deg, min, sec = coords + # convert to float + deg = float(Fraction(deg)) + min = float(Fraction(min)) + sec = float(Fraction(sec)) + + if float(sec) > 0: + # format is deg+min+sec + return (float(deg) * 3600 + float(min) * 60 + float(sec)) / 3600 + else: + # format is deg+min + return float(deg) + float(min) / 60 + + +class Exif(base.Extractor): + """Extract information from EXIF/IPTC tags of an image file.""" + + CONTENT_READER = 'bsie.reader.exif.Exif' + + def __init__(self): + super().__init__(bsfs.schema.from_string(base.SCHEMA_PREAMBLE + ''' + #bse:t_capture rdfs:subClassOf bsfs:Predicate ; + # rdfs:domain bsfs:File ; + # rdfs:range xsd:float ; + # bsfs:unique "true"^^xsd:boolean . + bse:exposure rdfs:subClassOf bsfs:Predicate ; + rdfs:domain bsfs:File ; + rdfs:range xsd:float ; + bsfs:unique "true"^^xsd:boolean . + bse:aperture rdfs:subClassOf bsfs:Predicate ; + rdfs:domain bsfs:File ; + rdfs:range xsd:float ; + bsfs:unique "true"^^xsd:boolean . + bse:iso rdfs:subClassOf bsfs:Predicate ; + rdfs:domain bsfs:File ; + rdfs:range xsd:integer ; + bsfs:unique "true"^^xsd:boolean . + bse:focal_length rdfs:subClassOf bsfs:Predicate ; + rdfs:domain bsfs:File ; + rdfs:range xsd:float ; + bsfs:unique "true"^^xsd:boolean . + bse:width rdfs:subClassOf bsfs:Predicate ; + rdfs:domain bsfs:File ; + rdfs:range xsd:integer ; + bsfs:unique "true"^^xsd:boolean . + bse:height rdfs:subClassOf bsfs:Predicate ; + rdfs:domain bsfs:File ; + rdfs:range xsd:integer ; + bsfs:unique "true"^^xsd:boolean . + bse:orientation rdfs:subClassOf bsfs:Predicate ; + rdfs:domain bsfs:File ; + rdfs:range xsd:integer ; + bsfs:unique "true"^^xsd:boolean . + bse:orientation_label rdfs:subClassOf bsfs:Predicate ; + rdfs:domain bsfs:File ; + rdfs:range xsd:string ; + bsfs:unique "true"^^xsd:boolean . + bse:altitude rdfs:subClassOf bsfs:Predicate ; + rdfs:domain bsfs:File ; + rdfs:range xsd:float ; + bsfs:unique "true"^^xsd:boolean . + bse:latitude rdfs:subClassOf bsfs:Predicate ; + rdfs:domain bsfs:File ; + rdfs:range xsd:float ; + bsfs:unique "true"^^xsd:boolean . + bse:longitude rdfs:subClassOf bsfs:Predicate ; + rdfs:domain bsfs:File ; + rdfs:range xsd:float ; + bsfs:unique "true"^^xsd:boolean . + ''')) + # initialize mapping from predicate to callback + self._callmap = { + #self.schema.predicate(ns.bse.t_capture): self._date, + self.schema.predicate(ns.bse.exposure): self._exposure, + self.schema.predicate(ns.bse.aperture): self._aperture, + self.schema.predicate(ns.bse.iso): self._iso, + self.schema.predicate(ns.bse.focal_length): self._focal_length, + self.schema.predicate(ns.bse.width): self._width, + self.schema.predicate(ns.bse.height): self._height, + self.schema.predicate(ns.bse.orientation): self._orientation, + self.schema.predicate(ns.bse.orientation_label): self._orientation_label, + self.schema.predicate(ns.bse.altitude): self._altitude, + self.schema.predicate(ns.bse.latitude): self._latitude, + self.schema.predicate(ns.bse.longitude): self._longitude, + } + + def extract( + self, + subject: node.Node, + content: dict, + principals: typing.Iterable[bsfs.schema.Predicate], + ) -> typing.Iterator[typing.Tuple[node.Node, bsfs.schema.Predicate, typing.Any]]: + for pred in principals: + # find callback + clbk = self._callmap.get(pred) + if clbk is None: + continue + # get value + value = clbk(content) + if value is None: + continue + # produce triple + yield subject, pred, value + + def _date(self, content: dict): # FIXME: Return type annotation + raise NotImplementedError() + #date_keys = ( + # 'Exif.Photo.DateTimeOriginal', + # 'Exif.Photo.DateTimeDigitized', + # 'Exif.Image.DateTime', + # ) + #for key in date_keys: + # if key in content: + # dt = content[key].value + # if dt.tzinfo is None: + # dt = dt.replace(tzinfo=ttime.NoTimeZone) + # return dt + #return None + + + ## photometrics + + def _exposure(self, content: dict) -> typing.Optional[float]: + if 'Exif.Photo.ExposureTime' in content: + return 1.0 / float(Fraction(content['Exif.Photo.ExposureTime'])) + return None + + def _aperture(self, content: dict) -> typing.Optional[float]: + if 'Exif.Photo.FNumber' in content: + return float(Fraction(content['Exif.Photo.FNumber'])) + return None + + def _iso(self, content: dict) -> typing.Optional[int]: + if 'Exif.Photo.ISOSpeedRatings' in content: + return int(content['Exif.Photo.ISOSpeedRatings']) + return None + + def _focal_length(self, content: dict) -> typing.Optional[float]: + if 'Exif.Photo.FocalLength' in content: + return float(Fraction(content['Exif.Photo.FocalLength'])) + return None + + + ## image dimensions + + def _width(self, content: dict) -> typing.Optional[int]: + # FIXME: consider orientation! + if 'Exif.Photo.PixelXDimension' in content: + return int(content['Exif.Photo.PixelXDimension']) + return None + + def _height(self, content: dict) -> typing.Optional[int]: + # FIXME: consider orientation! + if 'Exif.Photo.PixelYDimension' in content: + return int(content['Exif.Photo.PixelYDimension']) + return None + + def _orientation(self, content: dict) -> typing.Optional[int]: + if 'Exif.Image.Orientation' in content: + return int(content['Exif.Image.Orientation']) + return None + + def _orientation_label(self, content: dict) -> typing.Optional[str]: + width = self._width(content) + height = self._height(content) + ori = self._orientation(content) + if width is not None and height is not None and ori is not None: + if ori <= 4: + return 'landscape' if width >= height else 'portrait' + else: + return 'portrait' if width >= height else 'landscape' + return None + + + ## location + + def _altitude(self, content: dict) -> typing.Optional[float]: + if 'Exif.GPSInfo.GPSAltitude' in content: + return float(Fraction(content['Exif.GPSInfo.GPSAltitude'])) + return None + + def _latitude(self, content: dict) -> typing.Optional[float]: + if 'Exif.GPSInfo.GPSLatitude' in content: + return _gps_to_dec(content['Exif.GPSInfo.GPSLatitude'].split()) + return None + + def _longitude(self, content: dict) -> typing.Optional[float]: + if 'Exif.GPSInfo.GPSLongitude' in content: + return _gps_to_dec(content['Exif.GPSInfo.GPSLongitude'].split()) + return None + +## EOF ## diff --git a/bsie/reader/exif.py b/bsie/reader/exif.py new file mode 100644 index 0000000..e087bec --- /dev/null +++ b/bsie/reader/exif.py @@ -0,0 +1,49 @@ +""" + +Part of the bsie module. +A copy of the license is provided with the project. +Author: Matthias Baumgartner, 2022 +""" +# standard imports +import typing + +# external imports +import pyexiv2 + +# bsie imports +from bsie.utils import errors, filematcher + +# inner-module imports +from . import base + +# constants +MATCH_RULE = 'mime=image/jpeg' + +# exports +__all__: typing.Sequence[str] = ( + 'Exif', + ) + + +## code ## + +class Exif(base.Reader): + """Use pyexiv2 to read exif metadata from image files.""" + + def __init__(self): + self._match = filematcher.parse(MATCH_RULE) + + def __call__(self, path: str) -> dict: + # perform quick checks first + if not self._match(path): + raise errors.UnsupportedFileFormatError(path) + + try: + # open the file + img = pyexiv2.Image(path) + # read metadata + return img.read_exif() + except TypeError as err: + raise errors.ReaderError(path) from err + +## EOF ## diff --git a/test/extractor/image/test_photometrics.py b/test/extractor/image/test_photometrics.py new file mode 100644 index 0000000..6e3b661 --- /dev/null +++ b/test/extractor/image/test_photometrics.py @@ -0,0 +1,138 @@ +""" + +Part of the bsie test suite. +A copy of the license is provided with the project. +Author: Matthias Baumgartner, 2022 +""" +# standard imports +import unittest + +# bsie imports +from bsie.extractor import base +from bsie.utils import bsfs, node as _node, ns + +# objects to test +from bsie.extractor.image.photometrics import Exif, _gps_to_dec + + +## code ## + +class TestExif(unittest.TestCase): + + def test_gps_to_dec(self): + # deg+min+sec format + self.assertAlmostEqual(_gps_to_dec('29/1 58/1 45/1'.split()), 29.979167, 6) + self.assertAlmostEqual(_gps_to_dec('31 08 03'.split()), 31.134167, 6) + self.assertAlmostEqual(_gps_to_dec('20 40 586/10'.split()), 20.682944, 6) + self.assertAlmostEqual(_gps_to_dec('88/1 34 68/10'.split()), 88.568556, 6) + # deg+min format + self.assertAlmostEqual(_gps_to_dec('13 472167/10000 0/1 '.split()), 13.786945, 6) + self.assertAlmostEqual(_gps_to_dec('104/1 3215/100 0/1'.split()), 104.535833, 6) + + def test_eq(self): + # identical instances are equal + self.assertEqual(Exif(), Exif()) + self.assertEqual(hash(Exif()), hash(Exif())) + # comparison respects type + class Foo(): pass + self.assertNotEqual(Exif(), Foo()) + self.assertNotEqual(hash(Exif()), hash(Foo())) + self.assertNotEqual(Exif(), 1234) + self.assertNotEqual(hash(Exif()), hash(1234)) + self.assertNotEqual(Exif(), None) + self.assertNotEqual(hash(Exif()), hash(None)) + + def test_schema(self): + self.assertSetEqual({pred.uri for pred in Exif().schema.predicates()}, { + ns.bsfs.Predicate, + ns.bse.exposure, + ns.bse.aperture, + ns.bse.iso, + ns.bse.focal_length, + ns.bse.width, + ns.bse.height, + ns.bse.orientation, + ns.bse.orientation_label, + ns.bse.altitude, + ns.bse.latitude, + ns.bse.longitude, + }) + + def test_extract(self): + ext = Exif() + node = _node.Node(ns.bsfs.File, '') # Blank node + content = { + 'Exif.Photo.ExposureTime': '10/600', + 'Exif.Photo.FNumber': '48/10', + 'Exif.Photo.ISOSpeedRatings': '400', + 'Exif.Photo.FocalLength': '460/10', + 'Exif.Photo.PixelXDimension': '4288', + 'Exif.Photo.PixelYDimension': '2848', + 'Exif.Image.Orientation': '1', + 'Exif.GPSInfo.GPSAltitude': '431/1', + 'Exif.GPSInfo.GPSLatitude': '46/1 11397/625 0/1', + 'Exif.GPSInfo.GPSLongitude': '7/1 131250/2500 0/1', + } + + # invalid principals are ignored + self.assertSetEqual(set(ext.extract(node, content, {ns.bse.filename})), set()) + # extract finds all relevant information + self.assertSetEqual(set(ext.extract(node, content, {ext.schema.predicate(ns.bse.exposure)})), + {(node, ext.schema.predicate(ns.bse.exposure), 60.0)}) + self.assertSetEqual(set(ext.extract(node, content, {ext.schema.predicate(ns.bse.aperture)})), + {(node, ext.schema.predicate(ns.bse.aperture), 4.8)}) + self.assertSetEqual(set(ext.extract(node, content, {ext.schema.predicate(ns.bse.iso)})), + {(node, ext.schema.predicate(ns.bse.iso), 400)}) + self.assertSetEqual(set(ext.extract(node, content, {ext.schema.predicate(ns.bse.focal_length)})), + {(node, ext.schema.predicate(ns.bse.focal_length), 46.0)}) + self.assertSetEqual(set(ext.extract(node, content, {ext.schema.predicate(ns.bse.width)})), + {(node, ext.schema.predicate(ns.bse.width), 4288)}) + self.assertSetEqual(set(ext.extract(node, content, {ext.schema.predicate(ns.bse.height)})), + {(node, ext.schema.predicate(ns.bse.height), 2848)}) + self.assertSetEqual(set(ext.extract(node, content, {ext.schema.predicate(ns.bse.orientation)})), + {(node, ext.schema.predicate(ns.bse.orientation), 1)}) + self.assertSetEqual(set(ext.extract(node, content, {ext.schema.predicate(ns.bse.orientation_label)})), + {(node, ext.schema.predicate(ns.bse.orientation_label), 'landscape')}) + self.assertSetEqual(set(ext.extract(node, content, {ext.schema.predicate(ns.bse.altitude)})), + {(node, ext.schema.predicate(ns.bse.altitude), 431.0)}) + self.assertSetEqual(set(ext.extract(node, content, {ext.schema.predicate(ns.bse.latitude)})), + {(node, ext.schema.predicate(ns.bse.latitude), 46.30392)}) + self.assertSetEqual(set(ext.extract(node, content, {ext.schema.predicate(ns.bse.longitude)})), + {(node, ext.schema.predicate(ns.bse.longitude), 7.875)}) + + # can pass multiple principals + self.assertSetEqual(set(ext.extract(node, content, { + ext.schema.predicate(ns.bse.exposure), + ext.schema.predicate(ns.bse.iso), + ext.schema.predicate(ns.bse.focal_length), + })), { + (node, ext.schema.predicate(ns.bse.exposure), 60.0), + (node, ext.schema.predicate(ns.bse.iso), 400), + (node, ext.schema.predicate(ns.bse.focal_length), 46.0), + }) + + # principals w/o content are ignored + self.assertSetEqual(set(ext.extract( + node, + content={'Exif.Photo.ExposureTime': '10/600'}, + principals={ + ext.schema.predicate(ns.bse.exposure), + ext.schema.predicate(ns.bse.iso), + ext.schema.predicate(ns.bse.focal_length), + }) + ), { + (node, ext.schema.predicate(ns.bse.exposure), 60.0), + }) + + # empty content is acceptable + self.assertSetEqual(set(ext.extract(node, {}, set(ext.principals))), set()) + # no principals is acceptable + self.assertSetEqual(set(ext.extract(node, content, set())), set()) + + +## main ## + +if __name__ == '__main__': + unittest.main() + +## EOF ## diff --git a/test/reader/test_exif.py b/test/reader/test_exif.py new file mode 100644 index 0000000..f1330da --- /dev/null +++ b/test/reader/test_exif.py @@ -0,0 +1,48 @@ +""" + +Part of the bsie test suite. +A copy of the license is provided with the project. +Author: Matthias Baumgartner, 2022 +""" +# standard imports +import os +import unittest + +# bsie imports +from bsie.utils import errors + +# objects to test +from bsie.reader.exif import Exif + + +## code ## + +class TestExif(unittest.TestCase): + def test_call(self): + rdr = Exif() + # discards non-image files + self.assertRaises(errors.UnsupportedFileFormatError, rdr, 'invalid.doc') + # raises on invalid image files + self.assertRaises(errors.ReaderError, rdr, 'invalid.jpg') + # returns dict with exif info + self.assertDictEqual(rdr(os.path.join(os.path.dirname(__file__), 'testimage_exif.jpg')), { + 'Exif.Image.Artist': 'nobody', + 'Exif.Image.ExifTag': '110', + 'Exif.Image.ResolutionUnit': '2', + 'Exif.Image.XResolution': '300/1', + 'Exif.Image.YCbCrPositioning': '1', + 'Exif.Image.YResolution': '300/1', + 'Exif.Photo.ColorSpace': '65535', + 'Exif.Photo.ComponentsConfiguration': '1 2 3 0', + 'Exif.Photo.ExifVersion': '48 50 51 50', + 'Exif.Photo.FlashpixVersion': '48 49 48 48', + 'Exif.Photo.ISOSpeedRatings': '200', + }) + + +## main ## + +if __name__ == '__main__': + unittest.main() + +## EOF ## diff --git a/test/reader/testimage_exif.jpg b/test/reader/testimage_exif.jpg Binary files differnew file mode 100644 index 0000000..a774bc2 --- /dev/null +++ b/test/reader/testimage_exif.jpg |