From 2da348c638ac5058d5acf09ab5df323ee04503d5 Mon Sep 17 00:00:00 2001 From: Matthias Baumgartner Date: Mon, 31 Oct 2022 14:14:42 +0100 Subject: constant, filesize, and filename extractors --- bsie/extractor/generic/__init__.py | 16 +++++++++ bsie/extractor/generic/constant.py | 52 ++++++++++++++++++++++++++++ bsie/extractor/generic/path.py | 70 +++++++++++++++++++++++++++++++++++++ bsie/extractor/generic/stat.py | 71 ++++++++++++++++++++++++++++++++++++++ 4 files changed, 209 insertions(+) create mode 100644 bsie/extractor/generic/__init__.py create mode 100644 bsie/extractor/generic/constant.py create mode 100644 bsie/extractor/generic/path.py create mode 100644 bsie/extractor/generic/stat.py (limited to 'bsie/extractor/generic') diff --git a/bsie/extractor/generic/__init__.py b/bsie/extractor/generic/__init__.py new file mode 100644 index 0000000..0cb7e7f --- /dev/null +++ b/bsie/extractor/generic/__init__.py @@ -0,0 +1,16 @@ +"""Generic extractors focus on information that is typically available on all +files. Examples include file system information (file name and size, mime type, +etc.) and information that is independent of the actual file (constant triples, +host platform infos, current time, etc.). + +Part of the bsie module. +A copy of the license is provided with the project. +Author: Matthias Baumgartner, 2022 +""" +# imports +import typing + +# exports +__all__: typing.Sequence[str] = [] + +## EOF ## diff --git a/bsie/extractor/generic/constant.py b/bsie/extractor/generic/constant.py new file mode 100644 index 0000000..e243131 --- /dev/null +++ b/bsie/extractor/generic/constant.py @@ -0,0 +1,52 @@ +"""The Constant extractor produces pre-specified triples. + +Part of the bsie module. +A copy of the license is provided with the project. +Author: Matthias Baumgartner, 2022 +""" +# imports +import typing + +# inner-module imports +from bsie.base import extractor +from bsie.utils.bsfs import URI +from bsie.utils.node import Node + +# exports +__all__: typing.Sequence[str] = ( + 'Constant', + ) + + +## code ## + +class Constant(extractor.Extractor): + """Extract information from file's path.""" + + CONTENT_READER = None + + def __init__( + self, + schema: str, + tuples: typing.Iterable[typing.Tuple[URI, typing.Any]], + ): + self._schema = schema + self._tuples = tuples + # FIXME: use schema instance for predicate checking + #self._tuples = [(pred, value) for pred, value in tuples if pred in schema] + # FIXME: use schema instance for value checking + + def schema(self) -> str: + return self._schema + + def extract( + self, + subject: Node, + content: None, + predicates: typing.Iterable[URI], + ) -> typing.Iterator[typing.Tuple[Node, URI, typing.Any]]: + for pred, value in self._tuples: + if pred in predicates: + yield subject, pred, value + +## EOF ## diff --git a/bsie/extractor/generic/path.py b/bsie/extractor/generic/path.py new file mode 100644 index 0000000..c39bbd2 --- /dev/null +++ b/bsie/extractor/generic/path.py @@ -0,0 +1,70 @@ +""" + +Part of the bsie module. +A copy of the license is provided with the project. +Author: Matthias Baumgartner, 2022 +""" +# imports +import os +import typing + +# inner-module imports +from bsie.base import extractor +from bsie.utils import node, ns +from bsie.utils.bsfs import URI +import bsie.reader.path + +# exports +__all__: typing.Sequence[str] = ( + 'Path', + ) + + +## code ## + +class Path(extractor.Extractor): + """Extract information from file's path.""" + + CONTENT_READER = bsie.reader.path.Path + + def __init__(self): + self.__callmap = { + ns.bse.filename: self.__filename, + } + + def schema(self) -> str: + return ''' + bse:filename a bsfs:Predicate ; + rdfs:domain bsfs:Entity ; + rdfs:range xsd:string ; + rdf:label "File name"^^xsd:string ; + schema:description "Filename of entity in some filesystem."^^xsd:string ; + owl:maxCardinality "INF"^^xsd:number . + ''' + + def extract( + self, + subject: node.Node, + content: CONTENT_READER.CONTENT_TYPE, + predicates: typing.Iterable[URI], + ) -> typing.Iterator[typing.Tuple[node.Node, URI, typing.Any]]: + for pred in predicates: + # find callback + clbk = self.__callmap.get(pred) + if clbk is None: + continue + # get value + value = clbk(content) + if value is None: + continue + # produce triple + yield subject, pred, value + + def __filename(self, path: str) -> str: + try: + return os.path.basename(path) + except Exception: + # FIXME: some kind of error reporting (e.g. logging) + return None + +## EOF ## diff --git a/bsie/extractor/generic/stat.py b/bsie/extractor/generic/stat.py new file mode 100644 index 0000000..d74369c --- /dev/null +++ b/bsie/extractor/generic/stat.py @@ -0,0 +1,71 @@ +"""Extract information from the file system, such as filesize. + +Part of the bsie module. +A copy of the license is provided with the project. +Author: Matthias Baumgartner, 2022 +""" +# imports +import typing + +# inner-module imports +from bsie.base import extractor +from bsie.utils import node, ns +from bsie.utils.bsfs import URI +import bsie.reader.stat + + +# exports +__all__: typing.Sequence[str] = ( + 'Stat', + ) + + +## code ## + +class Stat(extractor.Extractor): + """Extract information from the file system.""" + + CONTENT_READER = bsie.reader.stat.Stat + + def __init__(self): + self.__callmap = { + ns.bse.filesize: self.__filesize, + } + + def schema(self) -> str: + return ''' + bse:filesize a bsfs:Predicate ; + rdfs:domain bsfs:Entity ; + rdfs:range xsd:integer ; + rdf:label "File size"^^xsd:string ; + schema:description "File size of entity in some filesystem."^^xsd:string ; + owl:maxCardinality "INF"^^xsd:number . + ''' + + def extract( + self, + subject: node.Node, + content: CONTENT_READER.CONTENT_TYPE, + predicates: typing.Iterable[URI], + ) -> typing.Iterator[typing.Tuple[node.Node, URI, typing.Any]]: + for pred in predicates: + # find callback + clbk = self.__callmap.get(pred) + if clbk is None: + continue + # get value + value = clbk(content) + if value is None: + continue + # produce triple + yield subject, pred, value + + def __filesize(self, content: CONTENT_READER.CONTENT_TYPE) -> int: + """Return the file size.""" + try: + return content.st_size + except Exception: + # FIXME: some kind of error reporting (e.g. logging) + return None + +## EOF ## -- cgit v1.2.3 From e174a25585e64eb1b0759440cad48d642dd31829 Mon Sep 17 00:00:00 2001 From: Matthias Baumgartner Date: Fri, 25 Nov 2022 14:31:29 +0100 Subject: use schema and predicate types in extractors --- bsie/extractor/generic/constant.py | 20 +++++++++---------- bsie/extractor/generic/path.py | 40 +++++++++++++++++++++----------------- bsie/extractor/generic/stat.py | 34 ++++++++++++++++---------------- 3 files changed, 49 insertions(+), 45 deletions(-) (limited to 'bsie/extractor/generic') diff --git a/bsie/extractor/generic/constant.py b/bsie/extractor/generic/constant.py index e243131..795bac6 100644 --- a/bsie/extractor/generic/constant.py +++ b/bsie/extractor/generic/constant.py @@ -7,9 +7,9 @@ Author: Matthias Baumgartner, 2022 # imports import typing -# inner-module imports +# bsie imports from bsie.base import extractor -from bsie.utils.bsfs import URI +from bsie.utils.bsfs import URI, schema as _schema from bsie.utils.node import Node # exports @@ -25,26 +25,26 @@ class Constant(extractor.Extractor): CONTENT_READER = None + # predicate/value pairs to be produced. + _tuples: typing.Tuple[typing.Tuple[_schema.Predicate, typing.Any], ...] + def __init__( self, schema: str, tuples: typing.Iterable[typing.Tuple[URI, typing.Any]], ): - self._schema = schema - self._tuples = tuples - # FIXME: use schema instance for predicate checking - #self._tuples = [(pred, value) for pred, value in tuples if pred in schema] + super().__init__(_schema.Schema.from_string(extractor.SCHEMA_PREAMBLE + schema)) + # NOTE: Raises a KeyError if the predicate is not part of the schema + self._tuples = tuple((self.schema.predicate(p_uri), value) for p_uri, value in tuples) # FIXME: use schema instance for value checking - def schema(self) -> str: - return self._schema def extract( self, subject: Node, content: None, - predicates: typing.Iterable[URI], - ) -> typing.Iterator[typing.Tuple[Node, URI, typing.Any]]: + predicates: typing.Iterable[_schema.Predicate], + ) -> typing.Iterator[typing.Tuple[Node, _schema.Predicate, typing.Any]]: for pred, value in self._tuples: if pred in predicates: yield subject, pred, value diff --git a/bsie/extractor/generic/path.py b/bsie/extractor/generic/path.py index c39bbd2..f358a79 100644 --- a/bsie/extractor/generic/path.py +++ b/bsie/extractor/generic/path.py @@ -8,11 +8,10 @@ Author: Matthias Baumgartner, 2022 import os import typing -# inner-module imports +# bsie imports from bsie.base import extractor from bsie.utils import node, ns -from bsie.utils.bsfs import URI -import bsie.reader.path +from bsie.utils.bsfs import schema # exports __all__: typing.Sequence[str] = ( @@ -27,30 +26,31 @@ class Path(extractor.Extractor): CONTENT_READER = bsie.reader.path.Path - def __init__(self): - self.__callmap = { - ns.bse.filename: self.__filename, - } + # mapping from predicate to handler function. + _callmap: typing.Dict[schema.Predicate, typing.Callable[[str], typing.Any]] - def schema(self) -> str: - return ''' - bse:filename a bsfs:Predicate ; + def __init__(self): + super().__init__(schema.Schema.from_string(extractor.SCHEMA_PREAMBLE + ''' + bse:filename rdfs:subClassOf bsfs:Predicate ; rdfs:domain bsfs:Entity ; rdfs:range xsd:string ; - rdf:label "File name"^^xsd:string ; + rdfs:label "File name"^^xsd:string ; schema:description "Filename of entity in some filesystem."^^xsd:string ; owl:maxCardinality "INF"^^xsd:number . - ''' + ''')) + self._callmap = { + self.schema.predicate(ns.bse.filename): self.__filename, + } def extract( self, subject: node.Node, content: CONTENT_READER.CONTENT_TYPE, - predicates: typing.Iterable[URI], - ) -> typing.Iterator[typing.Tuple[node.Node, URI, typing.Any]]: + predicates: typing.Iterable[schema.Predicate], + ) -> typing.Iterator[typing.Tuple[node.Node, schema.Predicate, typing.Any]]: for pred in predicates: # find callback - clbk = self.__callmap.get(pred) + clbk = self._callmap.get(pred) if clbk is None: continue # get value @@ -60,11 +60,15 @@ class Path(extractor.Extractor): # produce triple yield subject, pred, value - def __filename(self, path: str) -> str: + def __filename(self, path: str) -> typing.Optional[str]: try: return os.path.basename(path) - except Exception: - # FIXME: some kind of error reporting (e.g. logging) + except Exception: # some error, skip. + # FIXME: some kind of error reporting (e.g. logging)? + # Options: (a) Fail silently (current); (b) Skip and report to log; + # (c) Raise ExtractorError (aborts extraction); (d) separate content type + # checks from basename errors (report content type errors, skip basename + # errors) return None ## EOF ## diff --git a/bsie/extractor/generic/stat.py b/bsie/extractor/generic/stat.py index d74369c..e5387af 100644 --- a/bsie/extractor/generic/stat.py +++ b/bsie/extractor/generic/stat.py @@ -5,14 +5,13 @@ A copy of the license is provided with the project. Author: Matthias Baumgartner, 2022 """ # imports +import os import typing -# inner-module imports +# bsie imports from bsie.base import extractor from bsie.utils import node, ns -from bsie.utils.bsfs import URI -import bsie.reader.stat - +from bsie.utils.bsfs import schema as _schema # exports __all__: typing.Sequence[str] = ( @@ -27,30 +26,31 @@ class Stat(extractor.Extractor): CONTENT_READER = bsie.reader.stat.Stat - def __init__(self): - self.__callmap = { - ns.bse.filesize: self.__filesize, - } + # mapping from predicate to handler function. + _callmap: typing.Dict[_schema.Predicate, typing.Callable[[os.stat_result], typing.Any]] - def schema(self) -> str: - return ''' - bse:filesize a bsfs:Predicate ; + def __init__(self): + super().__init__(_schema.Schema.from_string(extractor.SCHEMA_PREAMBLE + ''' + bse:filesize rdfs:subClassOf bsfs:Predicate ; rdfs:domain bsfs:Entity ; rdfs:range xsd:integer ; - rdf:label "File size"^^xsd:string ; + rdfs:label "File size"^^xsd:string ; schema:description "File size of entity in some filesystem."^^xsd:string ; owl:maxCardinality "INF"^^xsd:number . - ''' + ''')) + self._callmap = { + self.schema.predicate(ns.bse.filesize): self.__filesize, + } def extract( self, subject: node.Node, content: CONTENT_READER.CONTENT_TYPE, - predicates: typing.Iterable[URI], - ) -> typing.Iterator[typing.Tuple[node.Node, URI, typing.Any]]: + predicates: typing.Iterable[_schema.Predicate], + ) -> typing.Iterator[typing.Tuple[node.Node, _schema.Predicate, typing.Any]]: for pred in predicates: # find callback - clbk = self.__callmap.get(pred) + clbk = self._callmap.get(pred) if clbk is None: continue # get value @@ -60,7 +60,7 @@ class Stat(extractor.Extractor): # produce triple yield subject, pred, value - def __filesize(self, content: CONTENT_READER.CONTENT_TYPE) -> int: + def __filesize(self, content: os.stat_result) -> typing.Optional[int]: """Return the file size.""" try: return content.st_size -- cgit v1.2.3 From b96c6e2096c387b70e2a4c1f0bc53b6044a0dc6f Mon Sep 17 00:00:00 2001 From: Matthias Baumgartner Date: Fri, 25 Nov 2022 14:36:27 +0100 Subject: decouple readers and extractors; use strings for reference and repeated type annotations --- bsie/extractor/generic/path.py | 4 ++-- bsie/extractor/generic/stat.py | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) (limited to 'bsie/extractor/generic') diff --git a/bsie/extractor/generic/path.py b/bsie/extractor/generic/path.py index f358a79..f346f97 100644 --- a/bsie/extractor/generic/path.py +++ b/bsie/extractor/generic/path.py @@ -24,7 +24,7 @@ __all__: typing.Sequence[str] = ( class Path(extractor.Extractor): """Extract information from file's path.""" - CONTENT_READER = bsie.reader.path.Path + CONTENT_READER = 'bsie.reader.path.Path' # mapping from predicate to handler function. _callmap: typing.Dict[schema.Predicate, typing.Callable[[str], typing.Any]] @@ -45,7 +45,7 @@ class Path(extractor.Extractor): def extract( self, subject: node.Node, - content: CONTENT_READER.CONTENT_TYPE, + content: str, predicates: typing.Iterable[schema.Predicate], ) -> typing.Iterator[typing.Tuple[node.Node, schema.Predicate, typing.Any]]: for pred in predicates: diff --git a/bsie/extractor/generic/stat.py b/bsie/extractor/generic/stat.py index e5387af..7088c0a 100644 --- a/bsie/extractor/generic/stat.py +++ b/bsie/extractor/generic/stat.py @@ -24,7 +24,7 @@ __all__: typing.Sequence[str] = ( class Stat(extractor.Extractor): """Extract information from the file system.""" - CONTENT_READER = bsie.reader.stat.Stat + CONTENT_READER = 'bsie.reader.stat.Stat' # mapping from predicate to handler function. _callmap: typing.Dict[_schema.Predicate, typing.Callable[[os.stat_result], typing.Any]] @@ -45,7 +45,7 @@ class Stat(extractor.Extractor): def extract( self, subject: node.Node, - content: CONTENT_READER.CONTENT_TYPE, + content: os.stat_result, predicates: typing.Iterable[_schema.Predicate], ) -> typing.Iterator[typing.Tuple[node.Node, _schema.Predicate, typing.Any]]: for pred in predicates: -- cgit v1.2.3 From 9ce32829b2bb85907a34a543bfcaa9183d1e362c Mon Sep 17 00:00:00 2001 From: Matthias Baumgartner Date: Fri, 25 Nov 2022 14:39:18 +0100 Subject: string conversion and equality checks --- bsie/extractor/generic/constant.py | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'bsie/extractor/generic') diff --git a/bsie/extractor/generic/constant.py b/bsie/extractor/generic/constant.py index 795bac6..7da792a 100644 --- a/bsie/extractor/generic/constant.py +++ b/bsie/extractor/generic/constant.py @@ -38,6 +38,12 @@ class Constant(extractor.Extractor): self._tuples = tuple((self.schema.predicate(p_uri), value) for p_uri, value in tuples) # FIXME: use schema instance for value checking + def __eq__(self, other: typing.Any) -> bool: + return super().__eq__(other) \ + and self._tuples == other._tuples + + def __hash__(self) -> int: + return hash((super().__hash__(), self._tuples)) def extract( self, -- cgit v1.2.3 From edc747252a04675c46059215751719b6666a77f9 Mon Sep 17 00:00:00 2001 From: Matthias Baumgartner Date: Sat, 3 Dec 2022 18:57:58 +0100 Subject: adapt to schema interface update: owl:maxCardinality changed to bsfs:unique --- bsie/extractor/generic/path.py | 2 +- bsie/extractor/generic/stat.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'bsie/extractor/generic') diff --git a/bsie/extractor/generic/path.py b/bsie/extractor/generic/path.py index f346f97..e6b901e 100644 --- a/bsie/extractor/generic/path.py +++ b/bsie/extractor/generic/path.py @@ -36,7 +36,7 @@ class Path(extractor.Extractor): rdfs:range xsd:string ; rdfs:label "File name"^^xsd:string ; schema:description "Filename of entity in some filesystem."^^xsd:string ; - owl:maxCardinality "INF"^^xsd:number . + bsfs:unique "false"^^xsd:boolean . ''')) self._callmap = { self.schema.predicate(ns.bse.filename): self.__filename, diff --git a/bsie/extractor/generic/stat.py b/bsie/extractor/generic/stat.py index 7088c0a..6493d37 100644 --- a/bsie/extractor/generic/stat.py +++ b/bsie/extractor/generic/stat.py @@ -36,7 +36,7 @@ class Stat(extractor.Extractor): rdfs:range xsd:integer ; rdfs:label "File size"^^xsd:string ; schema:description "File size of entity in some filesystem."^^xsd:string ; - owl:maxCardinality "INF"^^xsd:number . + bsfs:unique "false"^^xsd:boolean . ''')) self._callmap = { self.schema.predicate(ns.bse.filesize): self.__filesize, -- cgit v1.2.3 From 3b7fee369924eb7704709edeb8c17fff9c020dfb Mon Sep 17 00:00:00 2001 From: Matthias Baumgartner Date: Thu, 15 Dec 2022 17:06:09 +0100 Subject: import fixes --- bsie/extractor/generic/constant.py | 9 +++++---- bsie/extractor/generic/path.py | 6 +++--- bsie/extractor/generic/stat.py | 6 +++--- 3 files changed, 11 insertions(+), 10 deletions(-) (limited to 'bsie/extractor/generic') diff --git a/bsie/extractor/generic/constant.py b/bsie/extractor/generic/constant.py index 7da792a..f9e3415 100644 --- a/bsie/extractor/generic/constant.py +++ b/bsie/extractor/generic/constant.py @@ -11,6 +11,7 @@ import typing from bsie.base import extractor from bsie.utils.bsfs import URI, schema as _schema from bsie.utils.node import Node +from bsie.utils import bsfs, node # exports __all__: typing.Sequence[str] = ( @@ -26,14 +27,14 @@ class Constant(extractor.Extractor): CONTENT_READER = None # predicate/value pairs to be produced. - _tuples: typing.Tuple[typing.Tuple[_schema.Predicate, typing.Any], ...] + _tuples: typing.Tuple[typing.Tuple[bsfs.schema.Predicate, typing.Any], ...] def __init__( self, schema: str, - tuples: typing.Iterable[typing.Tuple[URI, typing.Any]], + tuples: typing.Iterable[typing.Tuple[bsfs.URI, typing.Any]], ): - super().__init__(_schema.Schema.from_string(extractor.SCHEMA_PREAMBLE + schema)) + super().__init__(bsfs.schema.Schema.from_string(extractor.SCHEMA_PREAMBLE + schema)) # NOTE: Raises a KeyError if the predicate is not part of the schema self._tuples = tuple((self.schema.predicate(p_uri), value) for p_uri, value in tuples) # FIXME: use schema instance for value checking @@ -47,7 +48,7 @@ class Constant(extractor.Extractor): def extract( self, - subject: Node, + subject: node.Node, content: None, predicates: typing.Iterable[_schema.Predicate], ) -> typing.Iterator[typing.Tuple[Node, _schema.Predicate, typing.Any]]: diff --git a/bsie/extractor/generic/path.py b/bsie/extractor/generic/path.py index e6b901e..2cc592a 100644 --- a/bsie/extractor/generic/path.py +++ b/bsie/extractor/generic/path.py @@ -10,8 +10,8 @@ import typing # bsie imports from bsie.base import extractor -from bsie.utils import node, ns from bsie.utils.bsfs import schema +from bsie.utils import bsfs, node, ns # exports __all__: typing.Sequence[str] = ( @@ -27,10 +27,10 @@ class Path(extractor.Extractor): CONTENT_READER = 'bsie.reader.path.Path' # mapping from predicate to handler function. - _callmap: typing.Dict[schema.Predicate, typing.Callable[[str], typing.Any]] + _callmap: typing.Dict[bsfs.schema.Predicate, typing.Callable[[str], typing.Any]] def __init__(self): - super().__init__(schema.Schema.from_string(extractor.SCHEMA_PREAMBLE + ''' + super().__init__(bsfs.schema.Schema.from_string(extractor.SCHEMA_PREAMBLE + ''' bse:filename rdfs:subClassOf bsfs:Predicate ; rdfs:domain bsfs:Entity ; rdfs:range xsd:string ; diff --git a/bsie/extractor/generic/stat.py b/bsie/extractor/generic/stat.py index 6493d37..dfde7d2 100644 --- a/bsie/extractor/generic/stat.py +++ b/bsie/extractor/generic/stat.py @@ -10,8 +10,8 @@ import typing # bsie imports from bsie.base import extractor -from bsie.utils import node, ns from bsie.utils.bsfs import schema as _schema +from bsie.utils import bsfs, node, ns # exports __all__: typing.Sequence[str] = ( @@ -27,10 +27,10 @@ class Stat(extractor.Extractor): CONTENT_READER = 'bsie.reader.stat.Stat' # mapping from predicate to handler function. - _callmap: typing.Dict[_schema.Predicate, typing.Callable[[os.stat_result], typing.Any]] + _callmap: typing.Dict[bsfs.schema.Predicate, typing.Callable[[os.stat_result], typing.Any]] def __init__(self): - super().__init__(_schema.Schema.from_string(extractor.SCHEMA_PREAMBLE + ''' + super().__init__(bsfs.schema.Schema.from_string(extractor.SCHEMA_PREAMBLE + ''' bse:filesize rdfs:subClassOf bsfs:Predicate ; rdfs:domain bsfs:Entity ; rdfs:range xsd:integer ; -- cgit v1.2.3 From 8e6d27ea75d2c8d68f6dd8b3d529aaa278f291cc Mon Sep 17 00:00:00 2001 From: Matthias Baumgartner Date: Thu, 15 Dec 2022 17:12:56 +0100 Subject: file node class in default schema --- bsie/extractor/generic/path.py | 2 +- bsie/extractor/generic/stat.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'bsie/extractor/generic') diff --git a/bsie/extractor/generic/path.py b/bsie/extractor/generic/path.py index 2cc592a..00165e3 100644 --- a/bsie/extractor/generic/path.py +++ b/bsie/extractor/generic/path.py @@ -32,7 +32,7 @@ class Path(extractor.Extractor): def __init__(self): super().__init__(bsfs.schema.Schema.from_string(extractor.SCHEMA_PREAMBLE + ''' bse:filename rdfs:subClassOf bsfs:Predicate ; - rdfs:domain bsfs:Entity ; + rdfs:domain bsfs:File ; rdfs:range xsd:string ; rdfs:label "File name"^^xsd:string ; schema:description "Filename of entity in some filesystem."^^xsd:string ; diff --git a/bsie/extractor/generic/stat.py b/bsie/extractor/generic/stat.py index dfde7d2..0f4267f 100644 --- a/bsie/extractor/generic/stat.py +++ b/bsie/extractor/generic/stat.py @@ -32,7 +32,7 @@ class Stat(extractor.Extractor): def __init__(self): super().__init__(bsfs.schema.Schema.from_string(extractor.SCHEMA_PREAMBLE + ''' bse:filesize rdfs:subClassOf bsfs:Predicate ; - rdfs:domain bsfs:Entity ; + rdfs:domain bsfs:File ; rdfs:range xsd:integer ; rdfs:label "File size"^^xsd:string ; schema:description "File size of entity in some filesystem."^^xsd:string ; -- cgit v1.2.3 From 3426b4e201cf03b78d2a3f144876955fcda2f66b Mon Sep 17 00:00:00 2001 From: Matthias Baumgartner Date: Thu, 15 Dec 2022 17:17:53 +0100 Subject: extractor interface revision * schema as property * predicates -> principals --- bsie/extractor/generic/constant.py | 8 +++----- bsie/extractor/generic/path.py | 7 +++---- bsie/extractor/generic/stat.py | 7 +++---- 3 files changed, 9 insertions(+), 13 deletions(-) (limited to 'bsie/extractor/generic') diff --git a/bsie/extractor/generic/constant.py b/bsie/extractor/generic/constant.py index f9e3415..cdb2ef6 100644 --- a/bsie/extractor/generic/constant.py +++ b/bsie/extractor/generic/constant.py @@ -9,8 +9,6 @@ import typing # bsie imports from bsie.base import extractor -from bsie.utils.bsfs import URI, schema as _schema -from bsie.utils.node import Node from bsie.utils import bsfs, node # exports @@ -50,10 +48,10 @@ class Constant(extractor.Extractor): self, subject: node.Node, content: None, - predicates: typing.Iterable[_schema.Predicate], - ) -> typing.Iterator[typing.Tuple[Node, _schema.Predicate, typing.Any]]: + principals: typing.Iterable[bsfs.schema.Predicate], + ) -> typing.Iterator[typing.Tuple[node.Node, bsfs.schema.Predicate, typing.Any]]: for pred, value in self._tuples: - if pred in predicates: + if pred in principals: yield subject, pred, value ## EOF ## diff --git a/bsie/extractor/generic/path.py b/bsie/extractor/generic/path.py index 00165e3..23ae80b 100644 --- a/bsie/extractor/generic/path.py +++ b/bsie/extractor/generic/path.py @@ -10,7 +10,6 @@ import typing # bsie imports from bsie.base import extractor -from bsie.utils.bsfs import schema from bsie.utils import bsfs, node, ns # exports @@ -46,9 +45,9 @@ class Path(extractor.Extractor): self, subject: node.Node, content: str, - predicates: typing.Iterable[schema.Predicate], - ) -> typing.Iterator[typing.Tuple[node.Node, schema.Predicate, typing.Any]]: - for pred in predicates: + principals: typing.Iterable[bsfs.schema.Predicate], + ) -> typing.Iterator[typing.Tuple[node.Node, bsfs.schema.Predicate, typing.Any]]: + for pred in principals: # find callback clbk = self._callmap.get(pred) if clbk is None: diff --git a/bsie/extractor/generic/stat.py b/bsie/extractor/generic/stat.py index 0f4267f..1dcfedf 100644 --- a/bsie/extractor/generic/stat.py +++ b/bsie/extractor/generic/stat.py @@ -10,7 +10,6 @@ import typing # bsie imports from bsie.base import extractor -from bsie.utils.bsfs import schema as _schema from bsie.utils import bsfs, node, ns # exports @@ -46,9 +45,9 @@ class Stat(extractor.Extractor): self, subject: node.Node, content: os.stat_result, - predicates: typing.Iterable[_schema.Predicate], - ) -> typing.Iterator[typing.Tuple[node.Node, _schema.Predicate, typing.Any]]: - for pred in predicates: + principals: typing.Iterable[bsfs.schema.Predicate], + ) -> typing.Iterator[typing.Tuple[node.Node, bsfs.schema.Predicate, typing.Any]]: + for pred in principals: # find callback clbk = self._callmap.get(pred) if clbk is None: -- cgit v1.2.3 From 5850ff2bcb1052883cf301590126609b0657fbc9 Mon Sep 17 00:00:00 2001 From: Matthias Baumgartner Date: Sun, 18 Dec 2022 13:37:02 +0100 Subject: cosmetic changes --- bsie/extractor/generic/constant.py | 2 +- bsie/extractor/generic/path.py | 3 ++- bsie/extractor/generic/stat.py | 2 +- 3 files changed, 4 insertions(+), 3 deletions(-) (limited to 'bsie/extractor/generic') diff --git a/bsie/extractor/generic/constant.py b/bsie/extractor/generic/constant.py index cdb2ef6..11384e6 100644 --- a/bsie/extractor/generic/constant.py +++ b/bsie/extractor/generic/constant.py @@ -35,7 +35,7 @@ class Constant(extractor.Extractor): super().__init__(bsfs.schema.Schema.from_string(extractor.SCHEMA_PREAMBLE + schema)) # NOTE: Raises a KeyError if the predicate is not part of the schema self._tuples = tuple((self.schema.predicate(p_uri), value) for p_uri, value in tuples) - # FIXME: use schema instance for value checking + # TODO: use schema instance for value checking def __eq__(self, other: typing.Any) -> bool: return super().__eq__(other) \ diff --git a/bsie/extractor/generic/path.py b/bsie/extractor/generic/path.py index 23ae80b..7018e12 100644 --- a/bsie/extractor/generic/path.py +++ b/bsie/extractor/generic/path.py @@ -62,7 +62,8 @@ class Path(extractor.Extractor): def __filename(self, path: str) -> typing.Optional[str]: try: return os.path.basename(path) - except Exception: # some error, skip. + except Exception: # pylint: disable=broad-except # we explicitly want to catch everything + # some error, skip # FIXME: some kind of error reporting (e.g. logging)? # Options: (a) Fail silently (current); (b) Skip and report to log; # (c) Raise ExtractorError (aborts extraction); (d) separate content type diff --git a/bsie/extractor/generic/stat.py b/bsie/extractor/generic/stat.py index 1dcfedf..0b9ce29 100644 --- a/bsie/extractor/generic/stat.py +++ b/bsie/extractor/generic/stat.py @@ -63,7 +63,7 @@ class Stat(extractor.Extractor): """Return the file size.""" try: return content.st_size - except Exception: + except Exception: # pylint: disable=broad-except # we explicitly want to catch everything # FIXME: some kind of error reporting (e.g. logging) return None -- cgit v1.2.3