aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorMatthias Baumgartner <dev@igsor.net>2023-03-01 21:38:09 +0100
committerMatthias Baumgartner <dev@igsor.net>2023-03-01 21:38:09 +0100
commitec9105b690974b0246e36769506e735c4edf069a (patch)
treec79a3a4489baae55fb74d84714ed728b79e50784
parent464cc6cb54f55f6255bf0a485533c181d6018303 (diff)
downloadbsie-ec9105b690974b0246e36769506e735c4edf069a.tar.gz
bsie-ec9105b690974b0246e36769506e735c4edf069a.tar.bz2
bsie-ec9105b690974b0246e36769506e735c4edf069a.zip
Exif data reader and extractor
-rw-r--r--bsie/apps/default_config.yaml8
-rw-r--r--bsie/extractor/image/photometrics.py219
-rw-r--r--bsie/reader/exif.py49
-rw-r--r--test/extractor/image/test_photometrics.py138
-rw-r--r--test/reader/test_exif.py48
-rw-r--r--test/reader/testimage_exif.jpgbin0 -> 719 bytes
6 files changed, 459 insertions, 3 deletions
diff --git a/bsie/apps/default_config.yaml b/bsie/apps/default_config.yaml
index 4d99e22..a59b0f3 100644
--- a/bsie/apps/default_config.yaml
+++ b/bsie/apps/default_config.yaml
@@ -11,7 +11,9 @@ ExtractorBuilder:
- bsie.extractor.generic.stat.Stat: {}
- bsie.extractor.image.colors_spatial.ColorsSpatial:
- width: 2
- height: 2
- exp: 2
+ width: 32
+ height: 32
+ exp: 4
+
+ - bsie.extractor.image.photometrics.Exif: {}
diff --git a/bsie/extractor/image/photometrics.py b/bsie/extractor/image/photometrics.py
new file mode 100644
index 0000000..ae0a541
--- /dev/null
+++ b/bsie/extractor/image/photometrics.py
@@ -0,0 +1,219 @@
+"""
+
+Part of the bsie module.
+A copy of the license is provided with the project.
+Author: Matthias Baumgartner, 2022
+"""
+# standard imports
+from fractions import Fraction
+import typing
+
+# bsie imports
+from bsie.utils import bsfs, node, ns
+
+# inner-module imports
+from .. import base
+
+# exports
+__all__: typing.Sequence[str] = (
+ 'Exif',
+ )
+
+
+## code ##
+
+def _gps_to_dec(coords: typing.Tuple[float, float, float]) -> float:
+ """Convert GPS coordinates from exif to float."""
+ # unpack args
+ deg, min, sec = coords
+ # convert to float
+ deg = float(Fraction(deg))
+ min = float(Fraction(min))
+ sec = float(Fraction(sec))
+
+ if float(sec) > 0:
+ # format is deg+min+sec
+ return (float(deg) * 3600 + float(min) * 60 + float(sec)) / 3600
+ else:
+ # format is deg+min
+ return float(deg) + float(min) / 60
+
+
+class Exif(base.Extractor):
+ """Extract information from EXIF/IPTC tags of an image file."""
+
+ CONTENT_READER = 'bsie.reader.exif.Exif'
+
+ def __init__(self):
+ super().__init__(bsfs.schema.from_string(base.SCHEMA_PREAMBLE + '''
+ #bse:t_capture rdfs:subClassOf bsfs:Predicate ;
+ # rdfs:domain bsfs:File ;
+ # rdfs:range xsd:float ;
+ # bsfs:unique "true"^^xsd:boolean .
+ bse:exposure rdfs:subClassOf bsfs:Predicate ;
+ rdfs:domain bsfs:File ;
+ rdfs:range xsd:float ;
+ bsfs:unique "true"^^xsd:boolean .
+ bse:aperture rdfs:subClassOf bsfs:Predicate ;
+ rdfs:domain bsfs:File ;
+ rdfs:range xsd:float ;
+ bsfs:unique "true"^^xsd:boolean .
+ bse:iso rdfs:subClassOf bsfs:Predicate ;
+ rdfs:domain bsfs:File ;
+ rdfs:range xsd:integer ;
+ bsfs:unique "true"^^xsd:boolean .
+ bse:focal_length rdfs:subClassOf bsfs:Predicate ;
+ rdfs:domain bsfs:File ;
+ rdfs:range xsd:float ;
+ bsfs:unique "true"^^xsd:boolean .
+ bse:width rdfs:subClassOf bsfs:Predicate ;
+ rdfs:domain bsfs:File ;
+ rdfs:range xsd:integer ;
+ bsfs:unique "true"^^xsd:boolean .
+ bse:height rdfs:subClassOf bsfs:Predicate ;
+ rdfs:domain bsfs:File ;
+ rdfs:range xsd:integer ;
+ bsfs:unique "true"^^xsd:boolean .
+ bse:orientation rdfs:subClassOf bsfs:Predicate ;
+ rdfs:domain bsfs:File ;
+ rdfs:range xsd:integer ;
+ bsfs:unique "true"^^xsd:boolean .
+ bse:orientation_label rdfs:subClassOf bsfs:Predicate ;
+ rdfs:domain bsfs:File ;
+ rdfs:range xsd:string ;
+ bsfs:unique "true"^^xsd:boolean .
+ bse:altitude rdfs:subClassOf bsfs:Predicate ;
+ rdfs:domain bsfs:File ;
+ rdfs:range xsd:float ;
+ bsfs:unique "true"^^xsd:boolean .
+ bse:latitude rdfs:subClassOf bsfs:Predicate ;
+ rdfs:domain bsfs:File ;
+ rdfs:range xsd:float ;
+ bsfs:unique "true"^^xsd:boolean .
+ bse:longitude rdfs:subClassOf bsfs:Predicate ;
+ rdfs:domain bsfs:File ;
+ rdfs:range xsd:float ;
+ bsfs:unique "true"^^xsd:boolean .
+ '''))
+ # initialize mapping from predicate to callback
+ self._callmap = {
+ #self.schema.predicate(ns.bse.t_capture): self._date,
+ self.schema.predicate(ns.bse.exposure): self._exposure,
+ self.schema.predicate(ns.bse.aperture): self._aperture,
+ self.schema.predicate(ns.bse.iso): self._iso,
+ self.schema.predicate(ns.bse.focal_length): self._focal_length,
+ self.schema.predicate(ns.bse.width): self._width,
+ self.schema.predicate(ns.bse.height): self._height,
+ self.schema.predicate(ns.bse.orientation): self._orientation,
+ self.schema.predicate(ns.bse.orientation_label): self._orientation_label,
+ self.schema.predicate(ns.bse.altitude): self._altitude,
+ self.schema.predicate(ns.bse.latitude): self._latitude,
+ self.schema.predicate(ns.bse.longitude): self._longitude,
+ }
+
+ def extract(
+ self,
+ subject: node.Node,
+ content: dict,
+ principals: typing.Iterable[bsfs.schema.Predicate],
+ ) -> typing.Iterator[typing.Tuple[node.Node, bsfs.schema.Predicate, typing.Any]]:
+ for pred in principals:
+ # find callback
+ clbk = self._callmap.get(pred)
+ if clbk is None:
+ continue
+ # get value
+ value = clbk(content)
+ if value is None:
+ continue
+ # produce triple
+ yield subject, pred, value
+
+ def _date(self, content: dict): # FIXME: Return type annotation
+ raise NotImplementedError()
+ #date_keys = (
+ # 'Exif.Photo.DateTimeOriginal',
+ # 'Exif.Photo.DateTimeDigitized',
+ # 'Exif.Image.DateTime',
+ # )
+ #for key in date_keys:
+ # if key in content:
+ # dt = content[key].value
+ # if dt.tzinfo is None:
+ # dt = dt.replace(tzinfo=ttime.NoTimeZone)
+ # return dt
+ #return None
+
+
+ ## photometrics
+
+ def _exposure(self, content: dict) -> typing.Optional[float]:
+ if 'Exif.Photo.ExposureTime' in content:
+ return 1.0 / float(Fraction(content['Exif.Photo.ExposureTime']))
+ return None
+
+ def _aperture(self, content: dict) -> typing.Optional[float]:
+ if 'Exif.Photo.FNumber' in content:
+ return float(Fraction(content['Exif.Photo.FNumber']))
+ return None
+
+ def _iso(self, content: dict) -> typing.Optional[int]:
+ if 'Exif.Photo.ISOSpeedRatings' in content:
+ return int(content['Exif.Photo.ISOSpeedRatings'])
+ return None
+
+ def _focal_length(self, content: dict) -> typing.Optional[float]:
+ if 'Exif.Photo.FocalLength' in content:
+ return float(Fraction(content['Exif.Photo.FocalLength']))
+ return None
+
+
+ ## image dimensions
+
+ def _width(self, content: dict) -> typing.Optional[int]:
+ # FIXME: consider orientation!
+ if 'Exif.Photo.PixelXDimension' in content:
+ return int(content['Exif.Photo.PixelXDimension'])
+ return None
+
+ def _height(self, content: dict) -> typing.Optional[int]:
+ # FIXME: consider orientation!
+ if 'Exif.Photo.PixelYDimension' in content:
+ return int(content['Exif.Photo.PixelYDimension'])
+ return None
+
+ def _orientation(self, content: dict) -> typing.Optional[int]:
+ if 'Exif.Image.Orientation' in content:
+ return int(content['Exif.Image.Orientation'])
+ return None
+
+ def _orientation_label(self, content: dict) -> typing.Optional[str]:
+ width = self._width(content)
+ height = self._height(content)
+ ori = self._orientation(content)
+ if width is not None and height is not None and ori is not None:
+ if ori <= 4:
+ return 'landscape' if width >= height else 'portrait'
+ else:
+ return 'portrait' if width >= height else 'landscape'
+ return None
+
+
+ ## location
+
+ def _altitude(self, content: dict) -> typing.Optional[float]:
+ if 'Exif.GPSInfo.GPSAltitude' in content:
+ return float(Fraction(content['Exif.GPSInfo.GPSAltitude']))
+ return None
+
+ def _latitude(self, content: dict) -> typing.Optional[float]:
+ if 'Exif.GPSInfo.GPSLatitude' in content:
+ return _gps_to_dec(content['Exif.GPSInfo.GPSLatitude'].split())
+ return None
+
+ def _longitude(self, content: dict) -> typing.Optional[float]:
+ if 'Exif.GPSInfo.GPSLongitude' in content:
+ return _gps_to_dec(content['Exif.GPSInfo.GPSLongitude'].split())
+ return None
+
+## EOF ##
diff --git a/bsie/reader/exif.py b/bsie/reader/exif.py
new file mode 100644
index 0000000..e087bec
--- /dev/null
+++ b/bsie/reader/exif.py
@@ -0,0 +1,49 @@
+"""
+
+Part of the bsie module.
+A copy of the license is provided with the project.
+Author: Matthias Baumgartner, 2022
+"""
+# standard imports
+import typing
+
+# external imports
+import pyexiv2
+
+# bsie imports
+from bsie.utils import errors, filematcher
+
+# inner-module imports
+from . import base
+
+# constants
+MATCH_RULE = 'mime=image/jpeg'
+
+# exports
+__all__: typing.Sequence[str] = (
+ 'Exif',
+ )
+
+
+## code ##
+
+class Exif(base.Reader):
+ """Use pyexiv2 to read exif metadata from image files."""
+
+ def __init__(self):
+ self._match = filematcher.parse(MATCH_RULE)
+
+ def __call__(self, path: str) -> dict:
+ # perform quick checks first
+ if not self._match(path):
+ raise errors.UnsupportedFileFormatError(path)
+
+ try:
+ # open the file
+ img = pyexiv2.Image(path)
+ # read metadata
+ return img.read_exif()
+ except TypeError as err:
+ raise errors.ReaderError(path) from err
+
+## EOF ##
diff --git a/test/extractor/image/test_photometrics.py b/test/extractor/image/test_photometrics.py
new file mode 100644
index 0000000..6e3b661
--- /dev/null
+++ b/test/extractor/image/test_photometrics.py
@@ -0,0 +1,138 @@
+"""
+
+Part of the bsie test suite.
+A copy of the license is provided with the project.
+Author: Matthias Baumgartner, 2022
+"""
+# standard imports
+import unittest
+
+# bsie imports
+from bsie.extractor import base
+from bsie.utils import bsfs, node as _node, ns
+
+# objects to test
+from bsie.extractor.image.photometrics import Exif, _gps_to_dec
+
+
+## code ##
+
+class TestExif(unittest.TestCase):
+
+ def test_gps_to_dec(self):
+ # deg+min+sec format
+ self.assertAlmostEqual(_gps_to_dec('29/1 58/1 45/1'.split()), 29.979167, 6)
+ self.assertAlmostEqual(_gps_to_dec('31 08 03'.split()), 31.134167, 6)
+ self.assertAlmostEqual(_gps_to_dec('20 40 586/10'.split()), 20.682944, 6)
+ self.assertAlmostEqual(_gps_to_dec('88/1 34 68/10'.split()), 88.568556, 6)
+ # deg+min format
+ self.assertAlmostEqual(_gps_to_dec('13 472167/10000 0/1 '.split()), 13.786945, 6)
+ self.assertAlmostEqual(_gps_to_dec('104/1 3215/100 0/1'.split()), 104.535833, 6)
+
+ def test_eq(self):
+ # identical instances are equal
+ self.assertEqual(Exif(), Exif())
+ self.assertEqual(hash(Exif()), hash(Exif()))
+ # comparison respects type
+ class Foo(): pass
+ self.assertNotEqual(Exif(), Foo())
+ self.assertNotEqual(hash(Exif()), hash(Foo()))
+ self.assertNotEqual(Exif(), 1234)
+ self.assertNotEqual(hash(Exif()), hash(1234))
+ self.assertNotEqual(Exif(), None)
+ self.assertNotEqual(hash(Exif()), hash(None))
+
+ def test_schema(self):
+ self.assertSetEqual({pred.uri for pred in Exif().schema.predicates()}, {
+ ns.bsfs.Predicate,
+ ns.bse.exposure,
+ ns.bse.aperture,
+ ns.bse.iso,
+ ns.bse.focal_length,
+ ns.bse.width,
+ ns.bse.height,
+ ns.bse.orientation,
+ ns.bse.orientation_label,
+ ns.bse.altitude,
+ ns.bse.latitude,
+ ns.bse.longitude,
+ })
+
+ def test_extract(self):
+ ext = Exif()
+ node = _node.Node(ns.bsfs.File, '') # Blank node
+ content = {
+ 'Exif.Photo.ExposureTime': '10/600',
+ 'Exif.Photo.FNumber': '48/10',
+ 'Exif.Photo.ISOSpeedRatings': '400',
+ 'Exif.Photo.FocalLength': '460/10',
+ 'Exif.Photo.PixelXDimension': '4288',
+ 'Exif.Photo.PixelYDimension': '2848',
+ 'Exif.Image.Orientation': '1',
+ 'Exif.GPSInfo.GPSAltitude': '431/1',
+ 'Exif.GPSInfo.GPSLatitude': '46/1 11397/625 0/1',
+ 'Exif.GPSInfo.GPSLongitude': '7/1 131250/2500 0/1',
+ }
+
+ # invalid principals are ignored
+ self.assertSetEqual(set(ext.extract(node, content, {ns.bse.filename})), set())
+ # extract finds all relevant information
+ self.assertSetEqual(set(ext.extract(node, content, {ext.schema.predicate(ns.bse.exposure)})),
+ {(node, ext.schema.predicate(ns.bse.exposure), 60.0)})
+ self.assertSetEqual(set(ext.extract(node, content, {ext.schema.predicate(ns.bse.aperture)})),
+ {(node, ext.schema.predicate(ns.bse.aperture), 4.8)})
+ self.assertSetEqual(set(ext.extract(node, content, {ext.schema.predicate(ns.bse.iso)})),
+ {(node, ext.schema.predicate(ns.bse.iso), 400)})
+ self.assertSetEqual(set(ext.extract(node, content, {ext.schema.predicate(ns.bse.focal_length)})),
+ {(node, ext.schema.predicate(ns.bse.focal_length), 46.0)})
+ self.assertSetEqual(set(ext.extract(node, content, {ext.schema.predicate(ns.bse.width)})),
+ {(node, ext.schema.predicate(ns.bse.width), 4288)})
+ self.assertSetEqual(set(ext.extract(node, content, {ext.schema.predicate(ns.bse.height)})),
+ {(node, ext.schema.predicate(ns.bse.height), 2848)})
+ self.assertSetEqual(set(ext.extract(node, content, {ext.schema.predicate(ns.bse.orientation)})),
+ {(node, ext.schema.predicate(ns.bse.orientation), 1)})
+ self.assertSetEqual(set(ext.extract(node, content, {ext.schema.predicate(ns.bse.orientation_label)})),
+ {(node, ext.schema.predicate(ns.bse.orientation_label), 'landscape')})
+ self.assertSetEqual(set(ext.extract(node, content, {ext.schema.predicate(ns.bse.altitude)})),
+ {(node, ext.schema.predicate(ns.bse.altitude), 431.0)})
+ self.assertSetEqual(set(ext.extract(node, content, {ext.schema.predicate(ns.bse.latitude)})),
+ {(node, ext.schema.predicate(ns.bse.latitude), 46.30392)})
+ self.assertSetEqual(set(ext.extract(node, content, {ext.schema.predicate(ns.bse.longitude)})),
+ {(node, ext.schema.predicate(ns.bse.longitude), 7.875)})
+
+ # can pass multiple principals
+ self.assertSetEqual(set(ext.extract(node, content, {
+ ext.schema.predicate(ns.bse.exposure),
+ ext.schema.predicate(ns.bse.iso),
+ ext.schema.predicate(ns.bse.focal_length),
+ })), {
+ (node, ext.schema.predicate(ns.bse.exposure), 60.0),
+ (node, ext.schema.predicate(ns.bse.iso), 400),
+ (node, ext.schema.predicate(ns.bse.focal_length), 46.0),
+ })
+
+ # principals w/o content are ignored
+ self.assertSetEqual(set(ext.extract(
+ node,
+ content={'Exif.Photo.ExposureTime': '10/600'},
+ principals={
+ ext.schema.predicate(ns.bse.exposure),
+ ext.schema.predicate(ns.bse.iso),
+ ext.schema.predicate(ns.bse.focal_length),
+ })
+ ), {
+ (node, ext.schema.predicate(ns.bse.exposure), 60.0),
+ })
+
+ # empty content is acceptable
+ self.assertSetEqual(set(ext.extract(node, {}, set(ext.principals))), set())
+ # no principals is acceptable
+ self.assertSetEqual(set(ext.extract(node, content, set())), set())
+
+
+## main ##
+
+if __name__ == '__main__':
+ unittest.main()
+
+## EOF ##
diff --git a/test/reader/test_exif.py b/test/reader/test_exif.py
new file mode 100644
index 0000000..f1330da
--- /dev/null
+++ b/test/reader/test_exif.py
@@ -0,0 +1,48 @@
+"""
+
+Part of the bsie test suite.
+A copy of the license is provided with the project.
+Author: Matthias Baumgartner, 2022
+"""
+# standard imports
+import os
+import unittest
+
+# bsie imports
+from bsie.utils import errors
+
+# objects to test
+from bsie.reader.exif import Exif
+
+
+## code ##
+
+class TestExif(unittest.TestCase):
+ def test_call(self):
+ rdr = Exif()
+ # discards non-image files
+ self.assertRaises(errors.UnsupportedFileFormatError, rdr, 'invalid.doc')
+ # raises on invalid image files
+ self.assertRaises(errors.ReaderError, rdr, 'invalid.jpg')
+ # returns dict with exif info
+ self.assertDictEqual(rdr(os.path.join(os.path.dirname(__file__), 'testimage_exif.jpg')), {
+ 'Exif.Image.Artist': 'nobody',
+ 'Exif.Image.ExifTag': '110',
+ 'Exif.Image.ResolutionUnit': '2',
+ 'Exif.Image.XResolution': '300/1',
+ 'Exif.Image.YCbCrPositioning': '1',
+ 'Exif.Image.YResolution': '300/1',
+ 'Exif.Photo.ColorSpace': '65535',
+ 'Exif.Photo.ComponentsConfiguration': '1 2 3 0',
+ 'Exif.Photo.ExifVersion': '48 50 51 50',
+ 'Exif.Photo.FlashpixVersion': '48 49 48 48',
+ 'Exif.Photo.ISOSpeedRatings': '200',
+ })
+
+
+## main ##
+
+if __name__ == '__main__':
+ unittest.main()
+
+## EOF ##
diff --git a/test/reader/testimage_exif.jpg b/test/reader/testimage_exif.jpg
new file mode 100644
index 0000000..a774bc2
--- /dev/null
+++ b/test/reader/testimage_exif.jpg
Binary files differ