diff options
author | Matthias Baumgartner <dev@igsor.net> | 2023-03-05 19:22:58 +0100 |
---|---|---|
committer | Matthias Baumgartner <dev@igsor.net> | 2023-03-05 19:22:58 +0100 |
commit | a35b33f4f1ddcf6f1bb8ab0f41b87bf2b847f11d (patch) | |
tree | fb220da28bb7248ebf37ce09af5de88f2c1aaad4 /bsie/extractor | |
parent | 7582c280ad5324a2f0427999911c7e7abc14a6ab (diff) | |
parent | af81318ae9311fd0b0e16949cef3cfaf7996970b (diff) | |
download | bsie-0.23.03.tar.gz bsie-0.23.03.tar.bz2 bsie-0.23.03.zip |
Diffstat (limited to 'bsie/extractor')
-rw-r--r-- | bsie/extractor/__init__.py | 14 | ||||
-rw-r--r-- | bsie/extractor/base.py | 116 | ||||
-rw-r--r-- | bsie/extractor/builder.py | 72 | ||||
-rw-r--r-- | bsie/extractor/generic/__init__.py | 5 | ||||
-rw-r--r-- | bsie/extractor/generic/constant.py | 14 | ||||
-rw-r--r-- | bsie/extractor/generic/path.py | 17 | ||||
-rw-r--r-- | bsie/extractor/generic/stat.py | 18 | ||||
-rw-r--r-- | bsie/extractor/image/__init__.py | 8 | ||||
-rw-r--r-- | bsie/extractor/image/colors_spatial.py | 150 | ||||
-rw-r--r-- | bsie/extractor/image/photometrics.py | 211 | ||||
-rw-r--r-- | bsie/extractor/preview.py | 96 |
11 files changed, 683 insertions, 38 deletions
diff --git a/bsie/extractor/__init__.py b/bsie/extractor/__init__.py index ef31343..36fa9ba 100644 --- a/bsie/extractor/__init__.py +++ b/bsie/extractor/__init__.py @@ -2,14 +2,18 @@ Each Extractor class is linked to the Reader class whose content it requires. -Part of the bsie module. -A copy of the license is provided with the project. -Author: Matthias Baumgartner, 2022 """ -# imports +# standard imports import typing +# inner-module imports +from .base import Extractor +from .builder import ExtractorBuilder + # exports -__all__: typing.Sequence[str] = [] +__all__: typing.Sequence[str] = ( + 'Extractor', + 'ExtractorBuilder', + ) ## EOF ## diff --git a/bsie/extractor/base.py b/bsie/extractor/base.py new file mode 100644 index 0000000..f92d7cc --- /dev/null +++ b/bsie/extractor/base.py @@ -0,0 +1,116 @@ +"""The Extractor classes transform content into triples. +""" +# standard imports +import abc +import typing + +# bsie imports +from bsie.utils import bsfs, node, ns + +# exports +__all__: typing.Sequence[str] = ( + 'Extractor', + ) + +# constants + +# essential definitions typically used in extractor schemas. +# NOTE: This preamble is only for convenience; Each Extractor must implement its use, if so desired. +SCHEMA_PREAMBLE = ''' + # common external prefixes + prefix rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> + prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#> + prefix xsd: <http://www.w3.org/2001/XMLSchema#> + prefix schema: <http://schema.org/> + + # common bsfs prefixes + prefix bsfs: <https://schema.bsfs.io/core/> + prefix bsl: <https://schema.bsfs.io/core/Literal/> + prefix bsa: <https://schema.bsfs.io/core/Literal/Array/> + prefix bsd: <https://schema.bsfs.io/core/distance#> + + prefix bsie: <https://schema.bsfs.io/ie/> + prefix bsn: <https://schema.bsfs.io/ie/Node/> + prefix bse: <https://schema.bsfs.io/ie/Node/Entity#> + prefix bsp: <https://schema.bsfs.io/ie/Node/Preview#> + + # default definitions + bsl:Array rdfs:subClassOf bsfs:Literal . + bsl:Number rdfs:subClassOf bsfs:Literal . + bsl:Time rdfs:subClassOf bsfs:Literal . + bsa:Feature rdfs:subClassOf bsl:Array ; + bsfs:dimension "1"^^xsd:integer ; + bsfs:dtype <https://schema.bsfs.io/core/dtype#f16> ; + bsfs:distance bsd:euclidean . + + # essential nodes + bsn:Entity rdfs:subClassOf bsfs:Node . + + # common definitions + xsd:string rdfs:subClassOf bsfs:Literal . + xsd:integer rdfs:subClassOf bsl:Number . + xsd:float rdfs:subClassOf bsl:Number . + + ''' + + +## code ## + +class Extractor(abc.ABC): + """Produce (subject, predicate, value)-triples from some content. + The Extractor produces princpal predicates that provide information + about the content itself (i.e., triples that include the subject), + and may also generate triples with auxiliary predicates if the + extracted value is a node itself. + """ + + # what type of content is expected (i.e. reader subclass). + CONTENT_READER: typing.Optional[str] = None + + # extractor schema. + _schema: bsfs.schema.Schema + + def __init__(self, schema: bsfs.schema.Schema): + self._schema = schema + + def __str__(self) -> str: + return bsfs.typename(self) + + def __repr__(self) -> str: + return f'{bsfs.typename(self)}()' + + def __eq__(self, other: typing.Any) -> bool: + return isinstance(other, type(self)) \ + and self.CONTENT_READER == other.CONTENT_READER \ + and self.schema == other.schema + + def __hash__(self) -> int: + return hash((type(self), self.CONTENT_READER, self.schema)) + + @property + def schema(self) -> bsfs.schema.Schema: + """Return the extractor's schema.""" + return self._schema + + @property + def principals(self) -> typing.Iterator[bsfs.schema.Predicate]: + """Return the principal predicates, i.e., relations from/to the extraction subject.""" + ent = self.schema.node(ns.bsn.Entity) + return ( + pred + for pred + in self.schema.predicates() + if pred.domain <= ent or (pred.range is not None and pred.range <= ent) + ) + + @abc.abstractmethod + def extract( + self, + subject: node.Node, + content: typing.Any, + principals: typing.Iterable[bsfs.schema.Predicate], + ) -> typing.Iterator[typing.Tuple[node.Node, bsfs.schema.Predicate, typing.Any]]: + """Return (node, predicate, value) triples.""" + # FIXME: type annotation could be more strict: value is Hashable + +## EOF ## diff --git a/bsie/extractor/builder.py b/bsie/extractor/builder.py new file mode 100644 index 0000000..d691b0e --- /dev/null +++ b/bsie/extractor/builder.py @@ -0,0 +1,72 @@ + +# standard imports +import typing + +# bsie imports +from bsie.utils import bsfs, errors, safe_load, unpack_qualified_name + +# inner-module imports +from . import base + +# exports +__all__: typing.Sequence[str] = ( + 'ExtractorBuilder', + ) + + +## code ## + +class ExtractorBuilder(): + """Build `bsie.base.Extractor instances. + + It is permissible to build multiple instances of the same extractor + (typically with different arguments), hence the ExtractorBuilder + receives a list of build specifications. Each specification is + a dict with a single key (extractor's qualified name) and a dict + to be used as keyword arguments. + Example: [{'bsie.extractor.generic.path.Path': {}}, ] + + """ + + # build specifications + _specs: typing.List[typing.Dict[str, typing.Dict[str, typing.Any]]] + + def __init__(self, specs: typing.List[typing.Dict[str, typing.Dict[str, typing.Any]]]): + self._specs = specs + + def __iter__(self) -> typing.Iterator[int]: + """Iterate over extractor specifications.""" + return iter(range(len(self._specs))) + + def build(self, index: int) -> base.Extractor: + """Return an instance of the n'th extractor (n=*index*).""" + # get build instructions + specs = self._specs[index] + + # check specs structure. expecting[{name: {kwargs}}] + if not isinstance(specs, dict): + raise TypeError(f'expected a dict, found {bsfs.typename(specs)}') + if len(specs) != 1: + raise TypeError(f'expected a dict of length one, found {len(specs)}') + + # get name and args from specs + name = next(iter(specs.keys())) + kwargs = specs[name] + + # check kwargs structure + if not isinstance(kwargs, dict): + raise TypeError(f'expected a dict, found {bsfs.typename(kwargs)}') + + # check name and get module/class components + module_name, class_name = unpack_qualified_name(name) + + # import extractor class + cls = safe_load(module_name, class_name) + + try: # build and return instance + return cls(**kwargs) + + except Exception as err: + raise errors.BuilderError(f'failed to build extractor {name} due to {bsfs.typename(err)}: {err}') from err + +## EOF ## diff --git a/bsie/extractor/generic/__init__.py b/bsie/extractor/generic/__init__.py index 0cb7e7f..46a4bd6 100644 --- a/bsie/extractor/generic/__init__.py +++ b/bsie/extractor/generic/__init__.py @@ -3,11 +3,8 @@ files. Examples include file system information (file name and size, mime type, etc.) and information that is independent of the actual file (constant triples, host platform infos, current time, etc.). -Part of the bsie module. -A copy of the license is provided with the project. -Author: Matthias Baumgartner, 2022 """ -# imports +# standard imports import typing # exports diff --git a/bsie/extractor/generic/constant.py b/bsie/extractor/generic/constant.py index 11384e6..7acbe95 100644 --- a/bsie/extractor/generic/constant.py +++ b/bsie/extractor/generic/constant.py @@ -1,16 +1,14 @@ """The Constant extractor produces pre-specified triples. - -Part of the bsie module. -A copy of the license is provided with the project. -Author: Matthias Baumgartner, 2022 """ -# imports +# standard imports import typing # bsie imports -from bsie.base import extractor from bsie.utils import bsfs, node +# inner-module imports +from .. import base + # exports __all__: typing.Sequence[str] = ( 'Constant', @@ -19,7 +17,7 @@ __all__: typing.Sequence[str] = ( ## code ## -class Constant(extractor.Extractor): +class Constant(base.Extractor): """Extract information from file's path.""" CONTENT_READER = None @@ -32,7 +30,7 @@ class Constant(extractor.Extractor): schema: str, tuples: typing.Iterable[typing.Tuple[bsfs.URI, typing.Any]], ): - super().__init__(bsfs.schema.Schema.from_string(extractor.SCHEMA_PREAMBLE + schema)) + super().__init__(bsfs.schema.from_string(base.SCHEMA_PREAMBLE + schema)) # NOTE: Raises a KeyError if the predicate is not part of the schema self._tuples = tuple((self.schema.predicate(p_uri), value) for p_uri, value in tuples) # TODO: use schema instance for value checking diff --git a/bsie/extractor/generic/path.py b/bsie/extractor/generic/path.py index 7018e12..00c1121 100644 --- a/bsie/extractor/generic/path.py +++ b/bsie/extractor/generic/path.py @@ -1,15 +1,10 @@ -""" -Part of the bsie module. -A copy of the license is provided with the project. -Author: Matthias Baumgartner, 2022 -""" -# imports +# standard imports import os import typing # bsie imports -from bsie.base import extractor +from bsie.extractor import base from bsie.utils import bsfs, node, ns # exports @@ -20,7 +15,7 @@ __all__: typing.Sequence[str] = ( ## code ## -class Path(extractor.Extractor): +class Path(base.Extractor): """Extract information from file's path.""" CONTENT_READER = 'bsie.reader.path.Path' @@ -29,13 +24,13 @@ class Path(extractor.Extractor): _callmap: typing.Dict[bsfs.schema.Predicate, typing.Callable[[str], typing.Any]] def __init__(self): - super().__init__(bsfs.schema.Schema.from_string(extractor.SCHEMA_PREAMBLE + ''' + super().__init__(bsfs.schema.from_string(base.SCHEMA_PREAMBLE + ''' bse:filename rdfs:subClassOf bsfs:Predicate ; - rdfs:domain bsfs:File ; + rdfs:domain bsn:Entity ; rdfs:range xsd:string ; rdfs:label "File name"^^xsd:string ; schema:description "Filename of entity in some filesystem."^^xsd:string ; - bsfs:unique "false"^^xsd:boolean . + bsfs:unique "true"^^xsd:boolean . ''')) self._callmap = { self.schema.predicate(ns.bse.filename): self.__filename, diff --git a/bsie/extractor/generic/stat.py b/bsie/extractor/generic/stat.py index 0b9ce29..92b51f3 100644 --- a/bsie/extractor/generic/stat.py +++ b/bsie/extractor/generic/stat.py @@ -1,17 +1,15 @@ """Extract information from the file system, such as filesize. - -Part of the bsie module. -A copy of the license is provided with the project. -Author: Matthias Baumgartner, 2022 """ -# imports +# standard imports import os import typing # bsie imports -from bsie.base import extractor from bsie.utils import bsfs, node, ns +# inner-module imports +from .. import base + # exports __all__: typing.Sequence[str] = ( 'Stat', @@ -20,7 +18,7 @@ __all__: typing.Sequence[str] = ( ## code ## -class Stat(extractor.Extractor): +class Stat(base.Extractor): """Extract information from the file system.""" CONTENT_READER = 'bsie.reader.stat.Stat' @@ -29,13 +27,13 @@ class Stat(extractor.Extractor): _callmap: typing.Dict[bsfs.schema.Predicate, typing.Callable[[os.stat_result], typing.Any]] def __init__(self): - super().__init__(bsfs.schema.Schema.from_string(extractor.SCHEMA_PREAMBLE + ''' + super().__init__(bsfs.schema.from_string(base.SCHEMA_PREAMBLE + ''' bse:filesize rdfs:subClassOf bsfs:Predicate ; - rdfs:domain bsfs:File ; + rdfs:domain bsn:Entity ; rdfs:range xsd:integer ; rdfs:label "File size"^^xsd:string ; schema:description "File size of entity in some filesystem."^^xsd:string ; - bsfs:unique "false"^^xsd:boolean . + bsfs:unique "true"^^xsd:boolean . ''')) self._callmap = { self.schema.predicate(ns.bse.filesize): self.__filesize, diff --git a/bsie/extractor/image/__init__.py b/bsie/extractor/image/__init__.py new file mode 100644 index 0000000..f82424a --- /dev/null +++ b/bsie/extractor/image/__init__.py @@ -0,0 +1,8 @@ + +# standard imports +import typing + +# exports +__all__: typing.Sequence[str] = [] + +## EOF ## diff --git a/bsie/extractor/image/colors_spatial.py b/bsie/extractor/image/colors_spatial.py new file mode 100644 index 0000000..e6661a9 --- /dev/null +++ b/bsie/extractor/image/colors_spatial.py @@ -0,0 +1,150 @@ +"""Spatial color features. +""" +# standard imports +import typing + +# external imports +import PIL.Image +import numpy as np + +# bsie imports +from bsie.utils import bsfs, node, ns + +# inner-module imports +from .. import base + +# constants +FEATURE_NAME = ns.bsf.ColorsSpatial() + +# exports +__all__: typing.Sequence[str] = ( + 'ColorsSpatial', + ) + + +## code ## + +class ColorsSpatial(base.Extractor): + """Determine dominant colors of subregions in the image. + + Computes the domiant color of increasingly smaller subregions of the image. + """ + + CONTENT_READER = 'bsie.reader.image.Image' + + # Initial subregion width. + width: int + + # Initial subregion height. + height: int + + # Decrement exponent. + exp: float + + # Principal predicate's URI. + _predicate_name: bsfs.URI + + def __init__( + self, + width: int = 32, + height: int = 32, + exp: float = 4., + ): + # instance identifier + uuid = bsfs.uuid.UCID.from_dict({ + 'width': width, + 'height': height, + 'exp': exp, + }) + # determine symbol names + instance_name = getattr(FEATURE_NAME, uuid) + predicate_name = getattr(ns.bse, 'colors_spatial_' + uuid) + # get vector dimension + dimension = self.dimension(width, height, exp) + # initialize parent with the schema + super().__init__(bsfs.schema.from_string(base.SCHEMA_PREAMBLE + f''' + <{FEATURE_NAME}> rdfs:subClassOf bsa:Feature ; + # annotations + rdfs:label "Spatially dominant colors"^^xsd:string ; + schema:description "Domiant colors of subregions in an image."^^xsd:string ; + bsfs:distance <https://schema.bsfs.io/core/distance#euclidean> ; + bsfs:dtype xsd:integer . + + <{instance_name}> rdfs:subClassOf <{FEATURE_NAME}> ; + bsfs:dimension "{dimension}"^^xsd:integer ; + # annotations + <{FEATURE_NAME}/args#width> "{width}"^^xsd:integer ; + <{FEATURE_NAME}/args#height> "{height}"^^xsd:integer ; + <{FEATURE_NAME}/args#exp> "{exp}"^^xsd:float . + + <{predicate_name}> rdfs:subClassOf bsfs:Predicate ; + rdfs:domain bsn:Entity ; + rdfs:range <{instance_name}> ; + bsfs:unique "true"^^xsd:boolean . + + ''')) + # assign extra members + self.width = width + self.height = height + self.exp = exp + self._predicate_name = predicate_name + + def __repr__(self) -> str: + return f'{bsfs.typename(self)}({self.width}, {self.height}, {self.exp})' + + def __eq__(self, other: typing.Any) -> bool: + return super().__eq__(other) \ + and self.width == other.width \ + and self.height == other.height \ + and self.exp == other.exp + + def __hash__(self) -> int: + return hash((super().__hash__(), self.width, self.height, self.exp)) + + @staticmethod + def dimension(width: int, height: int, exp: float) -> int: + """Return the feature vector dimension.""" + # FIXME: replace with a proper formula + dim = 0 + while width >= 1 and height >= 1: + dim += width * height + width = np.floor(width / exp) + height = np.floor(height / exp) + dim *= 3 # per band + return int(dim) + + def extract( + self, + subject: node.Node, + content: PIL.Image.Image, + principals: typing.Iterable[bsfs.schema.Predicate], + ) -> typing.Iterator[typing.Tuple[node.Node, bsfs.schema.Predicate, typing.Any]]: + # check principals + if self.schema.predicate(self._predicate_name) not in principals: + # nothing to do; abort + return + + # convert to HSV + content = content.convert('HSV') + + # get dimensions + width, height = self.width, self.height + num_bands = len(content.getbands()) # it's three since we converted to HSV before + + features = [] + while width >= 1 and height >= 1: + # downsample + img = content.resize((width, height), resample=PIL.Image.Resampling.BOX) + # feature vector + features.append( + np.array(img.getdata()).reshape((width * height, num_bands))) + # iterate + width = int(np.floor(width / self.exp)) + height = int(np.floor(height / self.exp)) + + # combine bands and convert features to tuple + value = tuple(np.vstack(features).reshape(-1)) + # return triple with feature vector as value + yield subject, self.schema.predicate(self._predicate_name), value + +## EOF ## diff --git a/bsie/extractor/image/photometrics.py b/bsie/extractor/image/photometrics.py new file mode 100644 index 0000000..42eb3c8 --- /dev/null +++ b/bsie/extractor/image/photometrics.py @@ -0,0 +1,211 @@ + +# standard imports +from fractions import Fraction +import typing + +# bsie imports +from bsie.utils import bsfs, node, ns + +# inner-module imports +from .. import base + +# exports +__all__: typing.Sequence[str] = ( + 'Exif', + ) + + +## code ## + +def _gps_to_dec(coords: typing.Tuple[float, float, float]) -> float: + """Convert GPS coordinates from exif to float.""" + # unpack args + deg, min, sec = coords # pylint: disable=redefined-builtin # min + # convert to float + deg = float(Fraction(deg)) + min = float(Fraction(min)) + sec = float(Fraction(sec)) + + if float(sec) > 0: + # format is deg+min+sec + return (float(deg) * 3600 + float(min) * 60 + float(sec)) / 3600 + # format is deg+min + return float(deg) + float(min) / 60 + + +class Exif(base.Extractor): + """Extract information from EXIF/IPTC tags of an image file.""" + + CONTENT_READER = 'bsie.reader.exif.Exif' + + def __init__(self): + super().__init__(bsfs.schema.from_string(base.SCHEMA_PREAMBLE + ''' + #bse:t_capture rdfs:subClassOf bsfs:Predicate ; + # rdfs:domain bsn:Entity ; + # rdfs:range xsd:float ; + # bsfs:unique "true"^^xsd:boolean . + bse:exposure rdfs:subClassOf bsfs:Predicate ; + rdfs:domain bsn:Entity ; + rdfs:range xsd:float ; + bsfs:unique "true"^^xsd:boolean . + bse:aperture rdfs:subClassOf bsfs:Predicate ; + rdfs:domain bsn:Entity ; + rdfs:range xsd:float ; + bsfs:unique "true"^^xsd:boolean . + bse:iso rdfs:subClassOf bsfs:Predicate ; + rdfs:domain bsn:Entity ; + rdfs:range xsd:integer ; + bsfs:unique "true"^^xsd:boolean . + bse:focal_length rdfs:subClassOf bsfs:Predicate ; + rdfs:domain bsn:Entity ; + rdfs:range xsd:float ; + bsfs:unique "true"^^xsd:boolean . + bse:width rdfs:subClassOf bsfs:Predicate ; + rdfs:domain bsn:Entity ; + rdfs:range xsd:integer ; + bsfs:unique "true"^^xsd:boolean . + bse:height rdfs:subClassOf bsfs:Predicate ; + rdfs:domain bsn:Entity ; + rdfs:range xsd:integer ; + bsfs:unique "true"^^xsd:boolean . + bse:orientation rdfs:subClassOf bsfs:Predicate ; + rdfs:domain bsn:Entity ; + rdfs:range xsd:integer ; + bsfs:unique "true"^^xsd:boolean . + bse:orientation_label rdfs:subClassOf bsfs:Predicate ; + rdfs:domain bsn:Entity ; + rdfs:range xsd:string ; + bsfs:unique "true"^^xsd:boolean . + bse:altitude rdfs:subClassOf bsfs:Predicate ; + rdfs:domain bsn:Entity ; + rdfs:range xsd:float ; + bsfs:unique "true"^^xsd:boolean . + bse:latitude rdfs:subClassOf bsfs:Predicate ; + rdfs:domain bsn:Entity ; + rdfs:range xsd:float ; + bsfs:unique "true"^^xsd:boolean . + bse:longitude rdfs:subClassOf bsfs:Predicate ; + rdfs:domain bsn:Entity ; + rdfs:range xsd:float ; + bsfs:unique "true"^^xsd:boolean . + ''')) + # initialize mapping from predicate to callback + self._callmap = { + #self.schema.predicate(ns.bse.t_capture): self._date, + self.schema.predicate(ns.bse.exposure): self._exposure, + self.schema.predicate(ns.bse.aperture): self._aperture, + self.schema.predicate(ns.bse.iso): self._iso, + self.schema.predicate(ns.bse.focal_length): self._focal_length, + self.schema.predicate(ns.bse.width): self._width, + self.schema.predicate(ns.bse.height): self._height, + self.schema.predicate(ns.bse.orientation): self._orientation, + self.schema.predicate(ns.bse.orientation_label): self._orientation_label, + self.schema.predicate(ns.bse.altitude): self._altitude, + self.schema.predicate(ns.bse.latitude): self._latitude, + self.schema.predicate(ns.bse.longitude): self._longitude, + } + + def extract( + self, + subject: node.Node, + content: dict, + principals: typing.Iterable[bsfs.schema.Predicate], + ) -> typing.Iterator[typing.Tuple[node.Node, bsfs.schema.Predicate, typing.Any]]: + for pred in principals: + # find callback + clbk = self._callmap.get(pred) + if clbk is None: + continue + # get value + value = clbk(content) + if value is None: + continue + # produce triple + yield subject, pred, value + + #def _date(self, content: dict): # FIXME: Return type annotation + # date_keys = ( + # 'Exif.Photo.DateTimeOriginal', + # 'Exif.Photo.DateTimeDigitized', + # 'Exif.Image.DateTime', + # ) + # for key in date_keys: + # if key in content: + # dt = content[key].value + # if dt.tzinfo is None: + # dt = dt.replace(tzinfo=ttime.NoTimeZone) + # return dt + # return None + + + ## photometrics + + def _exposure(self, content: dict) -> typing.Optional[float]: + if 'Exif.Photo.ExposureTime' in content: + return 1.0 / float(Fraction(content['Exif.Photo.ExposureTime'])) + return None + + def _aperture(self, content: dict) -> typing.Optional[float]: + if 'Exif.Photo.FNumber' in content: + return float(Fraction(content['Exif.Photo.FNumber'])) + return None + + def _iso(self, content: dict) -> typing.Optional[int]: + if 'Exif.Photo.ISOSpeedRatings' in content: + return int(content['Exif.Photo.ISOSpeedRatings']) + return None + + def _focal_length(self, content: dict) -> typing.Optional[float]: + if 'Exif.Photo.FocalLength' in content: + return float(Fraction(content['Exif.Photo.FocalLength'])) + return None + + + ## image dimensions + + def _width(self, content: dict) -> typing.Optional[int]: + # FIXME: consider orientation! + if 'Exif.Photo.PixelXDimension' in content: + return int(content['Exif.Photo.PixelXDimension']) + return None + + def _height(self, content: dict) -> typing.Optional[int]: + # FIXME: consider orientation! + if 'Exif.Photo.PixelYDimension' in content: + return int(content['Exif.Photo.PixelYDimension']) + return None + + def _orientation(self, content: dict) -> typing.Optional[int]: + if 'Exif.Image.Orientation' in content: + return int(content['Exif.Image.Orientation']) + return None + + def _orientation_label(self, content: dict) -> typing.Optional[str]: + width = self._width(content) + height = self._height(content) + ori = self._orientation(content) + if width is not None and height is not None and ori is not None: + if ori <= 4: + return 'landscape' if width >= height else 'portrait' + return 'portrait' if width >= height else 'landscape' + return None + + + ## location + + def _altitude(self, content: dict) -> typing.Optional[float]: + if 'Exif.GPSInfo.GPSAltitude' in content: + return float(Fraction(content['Exif.GPSInfo.GPSAltitude'])) + return None + + def _latitude(self, content: dict) -> typing.Optional[float]: + if 'Exif.GPSInfo.GPSLatitude' in content: + return _gps_to_dec(content['Exif.GPSInfo.GPSLatitude'].split()) + return None + + def _longitude(self, content: dict) -> typing.Optional[float]: + if 'Exif.GPSInfo.GPSLongitude' in content: + return _gps_to_dec(content['Exif.GPSInfo.GPSLongitude'].split()) + return None + +## EOF ## diff --git a/bsie/extractor/preview.py b/bsie/extractor/preview.py new file mode 100644 index 0000000..145a01a --- /dev/null +++ b/bsie/extractor/preview.py @@ -0,0 +1,96 @@ + +# imports +import io +import typing + +# external imports +import PIL.Image + +# bsie imports +from bsie.utils import bsfs, node, ns + +# inner-module imports +from . import base + +# exports +__all__: typing.Sequence[str] = ( + 'Preview', + ) + + +## code ## + +class Preview(base.Extractor): + """Extract previews.""" + + CONTENT_READER = 'bsie.reader.preview.Preview' + + def __init__(self, max_sides: typing.Iterable[int]): + super().__init__(bsfs.schema.from_string(base.SCHEMA_PREAMBLE + ''' + + + + bsn:Preview rdfs:subClassOf bsfs:Node . + bsl:BinaryBlob rdfs:subClassOf bsfs:Literal . + <https://schema.bsfs.io/ie/Literal/BinaryBlob/JPEG> rdfs:subClassOf bsl:BinaryBlob . + + bse:preview rdfs:subClassOf bsfs:Predicate ; + rdfs:domain bsn:Entity ; + rdfs:range bsn:Preview ; + bsfs:unique "false"^^xsd:boolean . + + bsp:width rdfs:subClassOf bsfs:Predicate ; + rdfs:domain bsn:Preview ; + rdfs:range xsd:integer ; + bsfs:unique "true"^^xsd:boolean . + + bsp:height rdfs:subClassOf bsfs:Predicate ; + rdfs:domain bsn:Preview ; + rdfs:range xsd:integer ; + bsfs:unique "true"^^xsd:boolean . + + bsp:asset rdfs:subClassOf bsfs:Predicate ; + rdfs:domain bsn:Preview ; + rdfs:range <https://schema.bsfs.io/ie/Literal/BinaryBlob/JPEG> ; + bsfs:unique "true"^^xsd:boolean . + + ''')) + # initialize extra args + self.max_sides = set(max_sides) + + def __eq__(self, other: typing.Any) -> bool: + return super().__eq__(other) \ + and self.max_sides == other.max_sides + + def __hash__(self) -> int: + return hash((super().__hash__(), tuple(sorted(self.max_sides)))) + + def extract( + self, + subject: node.Node, + content: typing.Callable[[int], PIL.Image.Image], + principals: typing.Iterable[bsfs.schema.Predicate], + ) -> typing.Iterator[typing.Tuple[node.Node, bsfs.schema.Predicate, typing.Any]]: + # check principals + if self.schema.predicate(ns.bse.preview) not in principals: + return + + for max_side in self.max_sides: + # get the preview in the right resolution + img = content(max_side) + # convert the preview to jpeg + buffer = io.BytesIO() + img.save(buffer, format='jpeg') + # create a preview node + preview = node.Node(ns.bsn.Preview, + ucid=bsfs.uuid.UCID.from_bytes(buffer.getvalue()), + size=max_side, + source=subject, + ) + # yield triples + yield subject, self.schema.predicate(ns.bse.preview), preview + yield preview, self.schema.predicate(ns.bsp.width), img.width + yield preview, self.schema.predicate(ns.bsp.height), img.height + yield preview, self.schema.predicate(ns.bsp.asset), buffer.getvalue() + +## EOF ## |