diff options
author | Matthias Baumgartner <dev@igsor.net> | 2023-03-05 19:22:46 +0100 |
---|---|---|
committer | Matthias Baumgartner <dev@igsor.net> | 2023-03-05 19:22:46 +0100 |
commit | af81318ae9311fd0b0e16949cef3cfaf7996970b (patch) | |
tree | fb220da28bb7248ebf37ce09af5de88f2c1aaad4 /bsie/extractor | |
parent | 7bf6b33fa6d6b901e4933bfe0b2a9939d7b3f3f3 (diff) | |
parent | 8b460aa0232cd841af7b7734c91982bc83486e03 (diff) | |
download | bsie-af81318ae9311fd0b0e16949cef3cfaf7996970b.tar.gz bsie-af81318ae9311fd0b0e16949cef3cfaf7996970b.tar.bz2 bsie-af81318ae9311fd0b0e16949cef3cfaf7996970b.zip |
Merge branch 'mb/diogenes' into develop
Diffstat (limited to 'bsie/extractor')
-rw-r--r-- | bsie/extractor/__init__.py | 3 | ||||
-rw-r--r-- | bsie/extractor/base.py | 36 | ||||
-rw-r--r-- | bsie/extractor/builder.py | 5 | ||||
-rw-r--r-- | bsie/extractor/generic/__init__.py | 3 | ||||
-rw-r--r-- | bsie/extractor/generic/constant.py | 4 | ||||
-rw-r--r-- | bsie/extractor/generic/path.py | 9 | ||||
-rw-r--r-- | bsie/extractor/generic/stat.py | 8 | ||||
-rw-r--r-- | bsie/extractor/image/__init__.py | 5 | ||||
-rw-r--r-- | bsie/extractor/image/colors_spatial.py | 16 | ||||
-rw-r--r-- | bsie/extractor/image/photometrics.py | 211 | ||||
-rw-r--r-- | bsie/extractor/preview.py | 27 |
11 files changed, 252 insertions, 75 deletions
diff --git a/bsie/extractor/__init__.py b/bsie/extractor/__init__.py index 5f385ee..36fa9ba 100644 --- a/bsie/extractor/__init__.py +++ b/bsie/extractor/__init__.py @@ -2,9 +2,6 @@ Each Extractor class is linked to the Reader class whose content it requires. -Part of the bsie module. -A copy of the license is provided with the project. -Author: Matthias Baumgartner, 2022 """ # standard imports import typing diff --git a/bsie/extractor/base.py b/bsie/extractor/base.py index 89183f9..f92d7cc 100644 --- a/bsie/extractor/base.py +++ b/bsie/extractor/base.py @@ -1,8 +1,4 @@ """The Extractor classes transform content into triples. - -Part of the bsie module. -A copy of the license is provided with the project. -Author: Matthias Baumgartner, 2022 """ # standard imports import abc @@ -28,26 +24,32 @@ SCHEMA_PREAMBLE = ''' prefix schema: <http://schema.org/> # common bsfs prefixes - prefix bsfs: <http://bsfs.ai/schema/> - prefix bse: <http://bsfs.ai/schema/Entity#> - prefix bsp: <http://bsfs.ai/schema/Preview#> + prefix bsfs: <https://schema.bsfs.io/core/> + prefix bsl: <https://schema.bsfs.io/core/Literal/> + prefix bsa: <https://schema.bsfs.io/core/Literal/Array/> + prefix bsd: <https://schema.bsfs.io/core/distance#> + + prefix bsie: <https://schema.bsfs.io/ie/> + prefix bsn: <https://schema.bsfs.io/ie/Node/> + prefix bse: <https://schema.bsfs.io/ie/Node/Entity#> + prefix bsp: <https://schema.bsfs.io/ie/Node/Preview#> # default definitions - bsfs:Array rdfs:subClassOf bsfs:Literal . - bsfs:Number rdfs:subClassOf bsfs:Literal . - bsfs:Time rdfs:subClassOf bsfs:Literal . - bsfs:Feature rdfs:subClassOf bsfs:Array ; + bsl:Array rdfs:subClassOf bsfs:Literal . + bsl:Number rdfs:subClassOf bsfs:Literal . + bsl:Time rdfs:subClassOf bsfs:Literal . + bsa:Feature rdfs:subClassOf bsl:Array ; bsfs:dimension "1"^^xsd:integer ; - bsfs:dtype bsfs:f16 ; - bsfs:distance bsfs:euclidean . + bsfs:dtype <https://schema.bsfs.io/core/dtype#f16> ; + bsfs:distance bsd:euclidean . # essential nodes - bsfs:Entity rdfs:subClassOf bsfs:Node . - bsfs:File rdfs:subClassOf bsfs:Entity . + bsn:Entity rdfs:subClassOf bsfs:Node . # common definitions xsd:string rdfs:subClassOf bsfs:Literal . - xsd:integer rdfs:subClassOf bsfs:Number . + xsd:integer rdfs:subClassOf bsl:Number . + xsd:float rdfs:subClassOf bsl:Number . ''' @@ -93,7 +95,7 @@ class Extractor(abc.ABC): @property def principals(self) -> typing.Iterator[bsfs.schema.Predicate]: """Return the principal predicates, i.e., relations from/to the extraction subject.""" - ent = self.schema.node(ns.bsfs.Entity) + ent = self.schema.node(ns.bsn.Entity) return ( pred for pred diff --git a/bsie/extractor/builder.py b/bsie/extractor/builder.py index 0fd3685..d691b0e 100644 --- a/bsie/extractor/builder.py +++ b/bsie/extractor/builder.py @@ -1,9 +1,4 @@ -""" -Part of the bsie module. -A copy of the license is provided with the project. -Author: Matthias Baumgartner, 2022 -""" # standard imports import typing diff --git a/bsie/extractor/generic/__init__.py b/bsie/extractor/generic/__init__.py index 4783949..46a4bd6 100644 --- a/bsie/extractor/generic/__init__.py +++ b/bsie/extractor/generic/__init__.py @@ -3,9 +3,6 @@ files. Examples include file system information (file name and size, mime type, etc.) and information that is independent of the actual file (constant triples, host platform infos, current time, etc.). -Part of the bsie module. -A copy of the license is provided with the project. -Author: Matthias Baumgartner, 2022 """ # standard imports import typing diff --git a/bsie/extractor/generic/constant.py b/bsie/extractor/generic/constant.py index 938e20c..7acbe95 100644 --- a/bsie/extractor/generic/constant.py +++ b/bsie/extractor/generic/constant.py @@ -1,8 +1,4 @@ """The Constant extractor produces pre-specified triples. - -Part of the bsie module. -A copy of the license is provided with the project. -Author: Matthias Baumgartner, 2022 """ # standard imports import typing diff --git a/bsie/extractor/generic/path.py b/bsie/extractor/generic/path.py index c984515..00c1121 100644 --- a/bsie/extractor/generic/path.py +++ b/bsie/extractor/generic/path.py @@ -1,9 +1,4 @@ -""" -Part of the bsie module. -A copy of the license is provided with the project. -Author: Matthias Baumgartner, 2022 -""" # standard imports import os import typing @@ -31,11 +26,11 @@ class Path(base.Extractor): def __init__(self): super().__init__(bsfs.schema.from_string(base.SCHEMA_PREAMBLE + ''' bse:filename rdfs:subClassOf bsfs:Predicate ; - rdfs:domain bsfs:File ; + rdfs:domain bsn:Entity ; rdfs:range xsd:string ; rdfs:label "File name"^^xsd:string ; schema:description "Filename of entity in some filesystem."^^xsd:string ; - bsfs:unique "false"^^xsd:boolean . + bsfs:unique "true"^^xsd:boolean . ''')) self._callmap = { self.schema.predicate(ns.bse.filename): self.__filename, diff --git a/bsie/extractor/generic/stat.py b/bsie/extractor/generic/stat.py index 9394456..92b51f3 100644 --- a/bsie/extractor/generic/stat.py +++ b/bsie/extractor/generic/stat.py @@ -1,8 +1,4 @@ """Extract information from the file system, such as filesize. - -Part of the bsie module. -A copy of the license is provided with the project. -Author: Matthias Baumgartner, 2022 """ # standard imports import os @@ -33,11 +29,11 @@ class Stat(base.Extractor): def __init__(self): super().__init__(bsfs.schema.from_string(base.SCHEMA_PREAMBLE + ''' bse:filesize rdfs:subClassOf bsfs:Predicate ; - rdfs:domain bsfs:File ; + rdfs:domain bsn:Entity ; rdfs:range xsd:integer ; rdfs:label "File size"^^xsd:string ; schema:description "File size of entity in some filesystem."^^xsd:string ; - bsfs:unique "false"^^xsd:boolean . + bsfs:unique "true"^^xsd:boolean . ''')) self._callmap = { self.schema.predicate(ns.bse.filesize): self.__filesize, diff --git a/bsie/extractor/image/__init__.py b/bsie/extractor/image/__init__.py index 75b118d..f82424a 100644 --- a/bsie/extractor/image/__init__.py +++ b/bsie/extractor/image/__init__.py @@ -1,9 +1,4 @@ -""" -Part of the bsie module. -A copy of the license is provided with the project. -Author: Matthias Baumgartner, 2022 -""" # standard imports import typing diff --git a/bsie/extractor/image/colors_spatial.py b/bsie/extractor/image/colors_spatial.py index 15fd281..e6661a9 100644 --- a/bsie/extractor/image/colors_spatial.py +++ b/bsie/extractor/image/colors_spatial.py @@ -1,8 +1,4 @@ """Spatial color features. - -Part of the bsie module. -A copy of the license is provided with the project. -Author: Matthias Baumgartner, 2022 """ # standard imports import typing @@ -18,8 +14,7 @@ from bsie.utils import bsfs, node, ns from .. import base # constants -FEATURE_NAME = ns.bsf + 'ColorsSpatial' -PREDICATE_NAME = ns.bse + 'colors_spatial' +FEATURE_NAME = ns.bsf.ColorsSpatial() # exports __all__: typing.Sequence[str] = ( @@ -62,16 +57,17 @@ class ColorsSpatial(base.Extractor): 'exp': exp, }) # determine symbol names - instance_name = FEATURE_NAME[uuid] - predicate_name = PREDICATE_NAME[uuid] + instance_name = getattr(FEATURE_NAME, uuid) + predicate_name = getattr(ns.bse, 'colors_spatial_' + uuid) # get vector dimension dimension = self.dimension(width, height, exp) # initialize parent with the schema super().__init__(bsfs.schema.from_string(base.SCHEMA_PREAMBLE + f''' - <{FEATURE_NAME}> rdfs:subClassOf bsfs:Feature ; + <{FEATURE_NAME}> rdfs:subClassOf bsa:Feature ; # annotations rdfs:label "Spatially dominant colors"^^xsd:string ; schema:description "Domiant colors of subregions in an image."^^xsd:string ; + bsfs:distance <https://schema.bsfs.io/core/distance#euclidean> ; bsfs:dtype xsd:integer . <{instance_name}> rdfs:subClassOf <{FEATURE_NAME}> ; @@ -82,7 +78,7 @@ class ColorsSpatial(base.Extractor): <{FEATURE_NAME}/args#exp> "{exp}"^^xsd:float . <{predicate_name}> rdfs:subClassOf bsfs:Predicate ; - rdfs:domain bsfs:File ; + rdfs:domain bsn:Entity ; rdfs:range <{instance_name}> ; bsfs:unique "true"^^xsd:boolean . diff --git a/bsie/extractor/image/photometrics.py b/bsie/extractor/image/photometrics.py new file mode 100644 index 0000000..42eb3c8 --- /dev/null +++ b/bsie/extractor/image/photometrics.py @@ -0,0 +1,211 @@ + +# standard imports +from fractions import Fraction +import typing + +# bsie imports +from bsie.utils import bsfs, node, ns + +# inner-module imports +from .. import base + +# exports +__all__: typing.Sequence[str] = ( + 'Exif', + ) + + +## code ## + +def _gps_to_dec(coords: typing.Tuple[float, float, float]) -> float: + """Convert GPS coordinates from exif to float.""" + # unpack args + deg, min, sec = coords # pylint: disable=redefined-builtin # min + # convert to float + deg = float(Fraction(deg)) + min = float(Fraction(min)) + sec = float(Fraction(sec)) + + if float(sec) > 0: + # format is deg+min+sec + return (float(deg) * 3600 + float(min) * 60 + float(sec)) / 3600 + # format is deg+min + return float(deg) + float(min) / 60 + + +class Exif(base.Extractor): + """Extract information from EXIF/IPTC tags of an image file.""" + + CONTENT_READER = 'bsie.reader.exif.Exif' + + def __init__(self): + super().__init__(bsfs.schema.from_string(base.SCHEMA_PREAMBLE + ''' + #bse:t_capture rdfs:subClassOf bsfs:Predicate ; + # rdfs:domain bsn:Entity ; + # rdfs:range xsd:float ; + # bsfs:unique "true"^^xsd:boolean . + bse:exposure rdfs:subClassOf bsfs:Predicate ; + rdfs:domain bsn:Entity ; + rdfs:range xsd:float ; + bsfs:unique "true"^^xsd:boolean . + bse:aperture rdfs:subClassOf bsfs:Predicate ; + rdfs:domain bsn:Entity ; + rdfs:range xsd:float ; + bsfs:unique "true"^^xsd:boolean . + bse:iso rdfs:subClassOf bsfs:Predicate ; + rdfs:domain bsn:Entity ; + rdfs:range xsd:integer ; + bsfs:unique "true"^^xsd:boolean . + bse:focal_length rdfs:subClassOf bsfs:Predicate ; + rdfs:domain bsn:Entity ; + rdfs:range xsd:float ; + bsfs:unique "true"^^xsd:boolean . + bse:width rdfs:subClassOf bsfs:Predicate ; + rdfs:domain bsn:Entity ; + rdfs:range xsd:integer ; + bsfs:unique "true"^^xsd:boolean . + bse:height rdfs:subClassOf bsfs:Predicate ; + rdfs:domain bsn:Entity ; + rdfs:range xsd:integer ; + bsfs:unique "true"^^xsd:boolean . + bse:orientation rdfs:subClassOf bsfs:Predicate ; + rdfs:domain bsn:Entity ; + rdfs:range xsd:integer ; + bsfs:unique "true"^^xsd:boolean . + bse:orientation_label rdfs:subClassOf bsfs:Predicate ; + rdfs:domain bsn:Entity ; + rdfs:range xsd:string ; + bsfs:unique "true"^^xsd:boolean . + bse:altitude rdfs:subClassOf bsfs:Predicate ; + rdfs:domain bsn:Entity ; + rdfs:range xsd:float ; + bsfs:unique "true"^^xsd:boolean . + bse:latitude rdfs:subClassOf bsfs:Predicate ; + rdfs:domain bsn:Entity ; + rdfs:range xsd:float ; + bsfs:unique "true"^^xsd:boolean . + bse:longitude rdfs:subClassOf bsfs:Predicate ; + rdfs:domain bsn:Entity ; + rdfs:range xsd:float ; + bsfs:unique "true"^^xsd:boolean . + ''')) + # initialize mapping from predicate to callback + self._callmap = { + #self.schema.predicate(ns.bse.t_capture): self._date, + self.schema.predicate(ns.bse.exposure): self._exposure, + self.schema.predicate(ns.bse.aperture): self._aperture, + self.schema.predicate(ns.bse.iso): self._iso, + self.schema.predicate(ns.bse.focal_length): self._focal_length, + self.schema.predicate(ns.bse.width): self._width, + self.schema.predicate(ns.bse.height): self._height, + self.schema.predicate(ns.bse.orientation): self._orientation, + self.schema.predicate(ns.bse.orientation_label): self._orientation_label, + self.schema.predicate(ns.bse.altitude): self._altitude, + self.schema.predicate(ns.bse.latitude): self._latitude, + self.schema.predicate(ns.bse.longitude): self._longitude, + } + + def extract( + self, + subject: node.Node, + content: dict, + principals: typing.Iterable[bsfs.schema.Predicate], + ) -> typing.Iterator[typing.Tuple[node.Node, bsfs.schema.Predicate, typing.Any]]: + for pred in principals: + # find callback + clbk = self._callmap.get(pred) + if clbk is None: + continue + # get value + value = clbk(content) + if value is None: + continue + # produce triple + yield subject, pred, value + + #def _date(self, content: dict): # FIXME: Return type annotation + # date_keys = ( + # 'Exif.Photo.DateTimeOriginal', + # 'Exif.Photo.DateTimeDigitized', + # 'Exif.Image.DateTime', + # ) + # for key in date_keys: + # if key in content: + # dt = content[key].value + # if dt.tzinfo is None: + # dt = dt.replace(tzinfo=ttime.NoTimeZone) + # return dt + # return None + + + ## photometrics + + def _exposure(self, content: dict) -> typing.Optional[float]: + if 'Exif.Photo.ExposureTime' in content: + return 1.0 / float(Fraction(content['Exif.Photo.ExposureTime'])) + return None + + def _aperture(self, content: dict) -> typing.Optional[float]: + if 'Exif.Photo.FNumber' in content: + return float(Fraction(content['Exif.Photo.FNumber'])) + return None + + def _iso(self, content: dict) -> typing.Optional[int]: + if 'Exif.Photo.ISOSpeedRatings' in content: + return int(content['Exif.Photo.ISOSpeedRatings']) + return None + + def _focal_length(self, content: dict) -> typing.Optional[float]: + if 'Exif.Photo.FocalLength' in content: + return float(Fraction(content['Exif.Photo.FocalLength'])) + return None + + + ## image dimensions + + def _width(self, content: dict) -> typing.Optional[int]: + # FIXME: consider orientation! + if 'Exif.Photo.PixelXDimension' in content: + return int(content['Exif.Photo.PixelXDimension']) + return None + + def _height(self, content: dict) -> typing.Optional[int]: + # FIXME: consider orientation! + if 'Exif.Photo.PixelYDimension' in content: + return int(content['Exif.Photo.PixelYDimension']) + return None + + def _orientation(self, content: dict) -> typing.Optional[int]: + if 'Exif.Image.Orientation' in content: + return int(content['Exif.Image.Orientation']) + return None + + def _orientation_label(self, content: dict) -> typing.Optional[str]: + width = self._width(content) + height = self._height(content) + ori = self._orientation(content) + if width is not None and height is not None and ori is not None: + if ori <= 4: + return 'landscape' if width >= height else 'portrait' + return 'portrait' if width >= height else 'landscape' + return None + + + ## location + + def _altitude(self, content: dict) -> typing.Optional[float]: + if 'Exif.GPSInfo.GPSAltitude' in content: + return float(Fraction(content['Exif.GPSInfo.GPSAltitude'])) + return None + + def _latitude(self, content: dict) -> typing.Optional[float]: + if 'Exif.GPSInfo.GPSLatitude' in content: + return _gps_to_dec(content['Exif.GPSInfo.GPSLatitude'].split()) + return None + + def _longitude(self, content: dict) -> typing.Optional[float]: + if 'Exif.GPSInfo.GPSLongitude' in content: + return _gps_to_dec(content['Exif.GPSInfo.GPSLongitude'].split()) + return None + +## EOF ## diff --git a/bsie/extractor/preview.py b/bsie/extractor/preview.py index 1531d62..145a01a 100644 --- a/bsie/extractor/preview.py +++ b/bsie/extractor/preview.py @@ -1,9 +1,4 @@ -""" -Part of the bsie module. -A copy of the license is provided with the project. -Author: Matthias Baumgartner, 2022 -""" # imports import io import typing @@ -33,28 +28,30 @@ class Preview(base.Extractor): def __init__(self, max_sides: typing.Iterable[int]): super().__init__(bsfs.schema.from_string(base.SCHEMA_PREAMBLE + ''' - bsfs:Preview rdfs:subClassOf bsfs:Node . - bsfs:BinaryBlob rdfs:subClassOf bsfs:Literal . - bsfs:JPEG rdfs:subClassOf bsfs:BinaryBlob . + + + bsn:Preview rdfs:subClassOf bsfs:Node . + bsl:BinaryBlob rdfs:subClassOf bsfs:Literal . + <https://schema.bsfs.io/ie/Literal/BinaryBlob/JPEG> rdfs:subClassOf bsl:BinaryBlob . bse:preview rdfs:subClassOf bsfs:Predicate ; - rdfs:domain bsfs:File ; - rdfs:range bsfs:Preview ; + rdfs:domain bsn:Entity ; + rdfs:range bsn:Preview ; bsfs:unique "false"^^xsd:boolean . bsp:width rdfs:subClassOf bsfs:Predicate ; - rdfs:domain bsfs:Preview ; + rdfs:domain bsn:Preview ; rdfs:range xsd:integer ; bsfs:unique "true"^^xsd:boolean . bsp:height rdfs:subClassOf bsfs:Predicate ; - rdfs:domain bsfs:Preview ; + rdfs:domain bsn:Preview ; rdfs:range xsd:integer ; bsfs:unique "true"^^xsd:boolean . bsp:asset rdfs:subClassOf bsfs:Predicate ; - rdfs:domain bsfs:Preview ; - rdfs:range bsfs:JPEG ; + rdfs:domain bsn:Preview ; + rdfs:range <https://schema.bsfs.io/ie/Literal/BinaryBlob/JPEG> ; bsfs:unique "true"^^xsd:boolean . ''')) @@ -85,7 +82,7 @@ class Preview(base.Extractor): buffer = io.BytesIO() img.save(buffer, format='jpeg') # create a preview node - preview = node.Node(ns.bsfs.Preview, + preview = node.Node(ns.bsn.Preview, ucid=bsfs.uuid.UCID.from_bytes(buffer.getvalue()), size=max_side, source=subject, |