From ed2074ae88f2db6cb6b38716b43b35e29eb2e16c Mon Sep 17 00:00:00 2001 From: Matthias Baumgartner Date: Fri, 23 Dec 2022 16:25:51 +0100 Subject: filematcher: check file properties, formulate them as a string --- bsie/base/errors.py | 3 + bsie/utils/__init__.py | 2 + bsie/utils/filematcher/__init__.py | 20 +++ bsie/utils/filematcher/matcher.py | 177 ++++++++++++++++++++++++++ bsie/utils/filematcher/parser.py | 148 ++++++++++++++++++++++ setup.py | 2 +- test/utils/filematcher/__init__.py | 0 test/utils/filematcher/empty | 0 test/utils/filematcher/test_ast.py | 232 ++++++++++++++++++++++++++++++++++ test/utils/filematcher/test_parser.py | 146 +++++++++++++++++++++ test/utils/filematcher/testimage.jpg | Bin 0 -> 518 bytes test/utils/filematcher/textfile.t | 4 + 12 files changed, 733 insertions(+), 1 deletion(-) create mode 100644 bsie/utils/filematcher/__init__.py create mode 100644 bsie/utils/filematcher/matcher.py create mode 100644 bsie/utils/filematcher/parser.py create mode 100644 test/utils/filematcher/__init__.py create mode 100644 test/utils/filematcher/empty create mode 100644 test/utils/filematcher/test_ast.py create mode 100644 test/utils/filematcher/test_parser.py create mode 100644 test/utils/filematcher/testimage.jpg create mode 100644 test/utils/filematcher/textfile.t diff --git a/bsie/base/errors.py b/bsie/base/errors.py index dc3c30e..5fafd5b 100644 --- a/bsie/base/errors.py +++ b/bsie/base/errors.py @@ -39,4 +39,7 @@ class ProgrammingError(_BSIEError): class UnreachableError(ProgrammingError): """Bravo, you've reached a point in code that should logically not be reachable.""" +class ParserError(_BSIEError): + """Failed to parse due to invalid syntax or structures.""" + ## EOF ## diff --git a/bsie/utils/__init__.py b/bsie/utils/__init__.py index bd22236..3981dc7 100644 --- a/bsie/utils/__init__.py +++ b/bsie/utils/__init__.py @@ -11,9 +11,11 @@ import typing from . import bsfs from . import namespaces as ns from . import node +from . import filematcher # exports __all__: typing.Sequence[str] = ( + 'filematcher', 'bsfs', 'node', 'ns', diff --git a/bsie/utils/filematcher/__init__.py b/bsie/utils/filematcher/__init__.py new file mode 100644 index 0000000..b1c1b45 --- /dev/null +++ b/bsie/utils/filematcher/__init__.py @@ -0,0 +1,20 @@ +""" + +Part of the bsie module. +A copy of the license is provided with the project. +Author: Matthias Baumgartner, 2022 +""" +# imports +import typing + +# inner-module imports +from .matcher import Matcher +from .parser import parse + +# exports +__all__: typing.Sequence[str] = ( + 'Matcher', + 'parse', + ) + +## EOF ## diff --git a/bsie/utils/filematcher/matcher.py b/bsie/utils/filematcher/matcher.py new file mode 100644 index 0000000..164beeb --- /dev/null +++ b/bsie/utils/filematcher/matcher.py @@ -0,0 +1,177 @@ +""" + +Part of the bsie module. +A copy of the license is provided with the project. +Author: Matthias Baumgartner, 2021 +""" +# imports +from collections.abc import Callable, Collection, Hashable +import abc +import os +import typing +import magic + +# exports +__all__: typing.Sequence[str] = [] + + +## code ## + +# abstract nodes + +class Matcher(abc.ABC, Hashable, Callable, Collection): # type: ignore [misc] # Invalid base class Callable + """Matcher node base class.""" + + # child expressions or terminals + _childs: typing.Set[typing.Any] + + def __init__(self, *childs: typing.Any): + if len(childs) == 1 and isinstance(childs[0], (list, tuple, set)): + self._childs = set(childs[0]) + else: + self._childs = set(childs) + + def __contains__(self, needle: typing.Any) -> bool: + return needle in self._childs + + def __iter__(self) -> typing.Iterator[typing.Any]: + return iter(self._childs) + + def __len__(self) -> int: + return len(self._childs) + + def __repr__(self) -> str: + return f'{type(self).__name__}({self._childs})' + + def __hash__(self) -> int: + return hash((type(self), tuple(set(self._childs)))) + + def __eq__(self, other: typing.Any) -> bool: + return isinstance(other, type(self)) \ + and self._childs == other._childs + + @abc.abstractmethod + def __call__(self, path: str) -> bool: # pylint: disable=arguments-differ + """Check if *path* satisfies the conditions set by the Matcher instance.""" + +class NOT(Matcher): + """Invert a matcher result.""" + def __init__(self, expr: Matcher): + super().__init__(expr) + def __call__(self, path: str) -> bool: + return not next(iter(self._childs))(path) + +# aggregate nodes + +class Aggregate(Matcher): # pylint: disable=too-few-public-methods # Yeah, it's an interface... + """Aggregation function base class (And, Or).""" + +class And(Aggregate): + """Accept only if all conditions are satisfied.""" + def __call__(self, path: str) -> bool: + for itm in self: + if not itm(path): + return False + return True + +class Or(Aggregate): + """Accept only if at least one condition is satisfied.""" + def __call__(self, path: str) -> bool: + for itm in self: + if itm(path): + return True + return False + + +# criteria nodes + +class Criterion(Matcher): + """Criterion base class. Limits acceptance to certain values.""" + def accepted(self) -> typing.Set[typing.Any]: + """Return a set of accepted values.""" + return self._childs + +# criteria w/o value (valueless) + +class Any(Criterion): + """Accepts anything.""" + def __call__(self, path: str) -> bool: + return True + +class Nothing(Criterion): + """Accepts nothing.""" + def __call__(self, path: str) -> bool: + return False + +class Exists(Criterion): + """Filters by existence.""" + def __call__(self, path: str) -> bool: + return os.path.exists(path) + +class IsFile(Criterion): + """Checks if the path is a regular file.""" + def __call__(self, path: str) -> bool: + return os.path.isfile(path) + +class IsDir(Criterion): + """Checks if the path is a directory.""" + def __call__(self, path: str) -> bool: + return os.path.isdir(path) + +class IsLink(Criterion): + """Checks if the path is a link.""" + def __call__(self, path: str) -> bool: + return os.path.islink(path) + +class IsAbs(Criterion): + """Checks if the path is an absolute path.""" + def __call__(self, path: str) -> bool: + return os.path.isabs(path) + +class IsRel(Criterion): + """Checks if the path is a relative path.""" + def __call__(self, path: str) -> bool: + return not os.path.isabs(path) + +class IsMount(Criterion): + """Checks if the path is a mount point.""" + def __call__(self, path: str) -> bool: + return os.path.ismount(path) + +class IsEmpty(Criterion): + """Checks if the path is an empty file.""" + def __call__(self, path: str) -> bool: + return os.path.exists(path) and os.stat(path).st_size == 0 + +class IsReadable(Criterion): + """Checks if the path is readable.""" + def __call__(self, path: str) -> bool: + return os.path.exists(path) and os.access(path, os.R_OK) + +class IsWritable(Criterion): + """Checks if the path is writable.""" + def __call__(self, path: str) -> bool: + return os.path.exists(path) and os.access(path, os.W_OK) + +class IsExecutable(Criterion): + """Checks if the path is executable.""" + def __call__(self, path: str) -> bool: + return os.path.exists(path) and os.access(path, os.X_OK) + +# criteria w/ value + +class Extension(Criterion): + """Filters by file extension (without the dot).""" + def __call__(self, path: str) -> bool: + _, ext = os.path.splitext(path) + return ext[1:] in self.accepted() + +class Mime(Criterion): + """Filters by mime type.""" + def __call__(self, path: str) -> bool: + try: + return magic.from_file(path, mime=True).lower() in self.accepted() + except FileNotFoundError: + return False + +## EOF ## diff --git a/bsie/utils/filematcher/parser.py b/bsie/utils/filematcher/parser.py new file mode 100644 index 0000000..0654742 --- /dev/null +++ b/bsie/utils/filematcher/parser.py @@ -0,0 +1,148 @@ +""" + +Part of the bsie module. +A copy of the license is provided with the project. +Author: Matthias Baumgartner, 2021 +""" +# standard imports +import typing + +# non-standard imports +import pyparsing +from pyparsing import printables, alphas8bit, punc8bit, QuotedString, Word, \ + delimitedList, Or, CaselessKeyword, Group, oneOf, Optional + +# bsie imports +from bsie.base import errors + +# inner-module imports +from . import matcher + +# exports +__all__: typing.Sequence[str] = ( + 'parse', + ) + + +## code ## + +class FileMatcherParser(): + """ + EXPR := RULES | RULES "|" RULES + RULESET := RULE | RULE, RULE + RULE := CRITERION OP VALUE | CRITERION OP {VALUES} | VALUELESS + OP := != | = + VALUES := VALUE | VALUE, VALUE + VALUE := [word] + CRITERION := mime | extension | ... + """ + + # criteria matcher nodes w/ arguments + _CRITERIA: typing.Dict[str, typing.Type[matcher.Matcher]] = { + 'extension': matcher.Extension, + 'mime': matcher.Mime, + } + + # criteria matcher nodes w/o arguments + _VALUELESS: typing.Dict[str, typing.Type[matcher.Matcher]] = { + 'any': matcher.Any, + 'nothing': matcher.Nothing, + 'exists': matcher.Exists, + 'isfile': matcher.IsFile, + 'isdir': matcher.IsDir, + 'islink': matcher.IsLink, + 'isabs': matcher.IsAbs, + 'isrel': matcher.IsRel, + 'ismount': matcher.IsMount, + 'emtpy': matcher.IsEmpty, + 'readable': matcher.IsReadable, + 'writable': matcher.IsWritable, + 'executable': matcher.IsExecutable, + } + + # pyparsing parser instance. + _parser: pyparsing.ParseExpression + + def __init__(self): + # build the parser + # VALUE := [word] + alphabet = (printables + alphas8bit + punc8bit).translate(str.maketrans('', '', ',{}|=')) + value = QuotedString(quoteChar='"', escChar='\\') ^ Word(alphabet) + # CRITERION := mime | extension | ... + criterion = Or([CaselessKeyword(p) for p in self._CRITERIA]).setResultsName('criterion') + valueless = Or([CaselessKeyword(p) for p in self._VALUELESS]).setResultsName('criterion') + # VALUES := VALUE | VALUE, VALUE + values = delimitedList(value, delim=',').setResultsName('value') + # OP := '=' | '!=' + eqop = oneOf('= !=').setResultsName('op') + # RULE := CRITERION OP VALUE | CRITERION OP {VALUES} | VALUELESS + rule_none = Group(Optional('!').setResultsName('op') + valueless).setResultsName('rule_none') + rule_one = Group(criterion + eqop + value.setResultsName('value')).setResultsName('rule_one') + rule_few = Group(criterion + eqop + '{' + values + '}').setResultsName('rule_few') + # RULESET := RULE | RULE, RULE + ruleset = Group(delimitedList(rule_none ^ rule_one ^ rule_few, delim=',')) + # EXPR := RULESET | RULESET \| RULESET + self._parser = delimitedList(ruleset, delim='|') + + def parse(self, query: str) -> matcher.Matcher: # pylint: disable=too-many-branches + """Build a file matcher from a rule definition.""" + # preprocess the query + query = query.strip() + + # empty query + if len(query) == 0: + return matcher.Any() + + try: + parsed = self._parser.parseString(query, parseAll=True) + except pyparsing.ParseException as err: + raise errors.ParserError(f'Cannot parse query {err}') + + # convert to Matcher + rules = [] + for exp in parsed: + tokens = [] + for rule in exp: + # fetch accepted values + if rule.getName() == 'rule_none': + accepted = [] + elif rule.getName() == 'rule_one': + accepted = [rule.value] + elif rule.getName() == 'rule_few': + accepted = list(rule.value) + else: # prevented by grammar + raise errors.UnreachableError('Invalid rule definition') + + # build criterion + if rule.criterion in self._VALUELESS: + cls = self._VALUELESS[rule.criterion] + if rule.op == '!': + tokens.append(matcher.NOT(cls())) + else: + tokens.append(cls()) + elif rule.criterion in self._CRITERIA: + cls = self._CRITERIA[rule.criterion] + if rule.op == '!=': + tokens.append(matcher.NOT(cls(accepted))) + else: + tokens.append(cls(accepted)) + else: # prevented by grammar + raise errors.UnreachableError(f'Invalid condition "{rule.criterion}"') + + # And-aggregate rules in one ruleset (if needed) + tokens = matcher.And(tokens) if len(tokens) > 1 else tokens[0] + rules.append(tokens) + + # Or-aggregate rulesets + expr = matcher.Or(rules) if len(rules) > 1 else rules[0] + + return expr + +# build default instance +file_match_parser = FileMatcherParser() + +def parse(query: str) -> matcher.Matcher: + """Shortcut for FileMatcherParser()(query).""" + return file_match_parser.parse(query) + +## EOF ## diff --git a/setup.py b/setup.py index ee9e0fd..8e0efd4 100644 --- a/setup.py +++ b/setup.py @@ -14,7 +14,7 @@ setup( url='https://www.igsor.net/projects/blackstar/bsie/', download_url='https://pip.igsor.net', packages=('bsie', ), - install_requires=('rdflib', 'bsfs'), + install_requires=('rdflib', 'bsfs', 'python-magic'), python_requires=">=3.7", ) diff --git a/test/utils/filematcher/__init__.py b/test/utils/filematcher/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/test/utils/filematcher/empty b/test/utils/filematcher/empty new file mode 100644 index 0000000..e69de29 diff --git a/test/utils/filematcher/test_ast.py b/test/utils/filematcher/test_ast.py new file mode 100644 index 0000000..ff4b86d --- /dev/null +++ b/test/utils/filematcher/test_ast.py @@ -0,0 +1,232 @@ +""" + +Part of the bsie test suite. +A copy of the license is provided with the project. +Author: Matthias Baumgartner, 2022 +""" +# imports +import os +import stat +import tempfile +import unittest + +# objects to test +from bsie.utils.filematcher import matcher + + +## code ## + +class FakeMatcher(matcher.Matcher): + def __call__(self, *args, **kwargs): + pass + +class FakeCriterion(matcher.Criterion): + def __call__(self, *args, **kwargs): + pass + +class FakeAggregate(matcher.Aggregate): + def __call__(self, *args, **kwargs): + pass + +class TestMatcher(unittest.TestCase): + def setUp(self): + # paths + self.image = os.path.join(os.path.dirname(__file__), 'testimage.jpg') + self.text= os.path.join(os.path.dirname(__file__), 'textfile.t') + self.empty = os.path.join(os.path.dirname(__file__), 'empty') + self.missing = os.path.join(os.path.dirname(__file__), 'missing.jpg') + + def test_matcher_skeleton(self): + # node: iteration and length + self.assertSetEqual(set(iter(FakeMatcher(1,2,3))), {1,2,3}) + self.assertSetEqual(set(iter(FakeMatcher([1,2,3]))), {1,2,3}) + self.assertEqual(len(FakeMatcher([1,2,3])), 3) + self.assertEqual(len(FakeMatcher(1,2,3)), 3) + self.assertEqual(len(FakeMatcher()), 0) + self.assertIn(1, FakeMatcher(1,2,3)) + self.assertIn(3, FakeMatcher([1,2,3])) + self.assertNotIn(0, FakeMatcher(1,2,3)) + self.assertNotIn(4, FakeMatcher([1,2,3])) + # node: comparison + self.assertEqual(FakeMatcher([1,2,3]), FakeMatcher([1,2,3])) + self.assertEqual(FakeMatcher(1,2,3), FakeMatcher(1,2,3)) + self.assertEqual(FakeMatcher(1,2,3), FakeMatcher([1,2,3])) + self.assertEqual(FakeMatcher(1,2,3), FakeMatcher((1,2,3))) + self.assertNotEqual(FakeMatcher(1,2,3), FakeMatcher(1,2,4)) + self.assertNotEqual(FakeMatcher(1,2,3), FakeMatcher(1,2,3,4)) + self.assertNotEqual(FakeMatcher(1,2,3), FakeMatcher(1,2)) + self.assertEqual(hash(FakeMatcher([1,2,3])), hash(FakeMatcher([1,2,3]))) + self.assertEqual(hash(FakeMatcher(1,2,3)), hash(FakeMatcher(1,2,3))) + self.assertEqual(hash(FakeMatcher(1,2,3)), hash(FakeMatcher([1,2,3]))) + self.assertEqual(hash(FakeMatcher(1,2,3)), hash(FakeMatcher((1,2,3)))) + # node: representation + self.assertEqual(repr(FakeMatcher(1,2,3)), 'FakeMatcher({1, 2, 3})') + + # criterion + self.assertEqual(repr(FakeCriterion(1,2,3)), 'FakeCriterion({1, 2, 3})') + self.assertEqual(hash(FakeCriterion(1,2,3)), hash(FakeCriterion(1,2,3))) + self.assertEqual(FakeCriterion(1,2,3), FakeCriterion([1,2,3])) + self.assertNotEqual(FakeCriterion(1,2,3), FakeCriterion(1,2)) + self.assertNotEqual(FakeCriterion(1,2,3), FakeMatcher(1,2,3)) + self.assertSetEqual(FakeCriterion(1,2,3).accepted(), {1,2,3}) + + # aggregate + self.assertEqual(repr(FakeAggregate(1,2,3)), 'FakeAggregate({1, 2, 3})') + self.assertNotEqual(FakeAggregate(1,2,3), FakeMatcher(1,2,3)) + + def test_any(self): + self.assertTrue(matcher.Any()(self.image)) + self.assertTrue(matcher.Any()(self.text)) + self.assertTrue(matcher.Any()(self.missing)) + self.assertTrue(matcher.Any()(self.empty)) + + def test_nothing(self): + self.assertFalse(matcher.Nothing()(self.image)) + self.assertFalse(matcher.Nothing()(self.text)) + self.assertFalse(matcher.Nothing()(self.missing)) + self.assertFalse(matcher.Nothing()(self.empty)) + + def test_exists(self): + self.assertTrue(matcher.Exists()(self.image)) + self.assertTrue(matcher.Exists()(self.text)) + self.assertTrue(matcher.Exists()(self.empty)) + self.assertFalse(matcher.Exists()(self.missing)) + + def test_isfile(self): + self.assertTrue(matcher.IsFile()(self.image)) + self.assertTrue(matcher.IsFile()(self.text)) + self.assertFalse(matcher.IsFile()(self.missing)) + self.assertFalse(matcher.IsFile()(os.path.dirname(self.image))) + + def test_isdir(self): + self.assertTrue(matcher.IsDir()(os.path.dirname(self.image))) + self.assertFalse(matcher.IsDir()(self.image)) + self.assertFalse(matcher.IsDir()(self.text)) + self.assertFalse(matcher.IsDir()(self.missing)) + + def test_islink(self): + self.assertFalse(matcher.IsLink()(os.path.dirname(self.image))) + self.assertFalse(matcher.IsLink()(self.image)) + self.assertFalse(matcher.IsLink()(self.text)) + _, temp = tempfile.mkstemp(prefix='bsie-test-') + templink = temp + '-link' + os.symlink(temp, templink) + self.assertTrue(matcher.IsLink()(templink)) + os.unlink(templink) + os.unlink(temp) + + def test_isabs(self): + self.assertTrue(matcher.IsAbs()(os.path.abspath(self.image))) + self.assertTrue(matcher.IsAbs()(os.path.abspath(self.text))) + self.assertFalse(matcher.IsAbs()(os.path.relpath(self.text, os.path.dirname(self.text)))) + + def test_isrel(self): + self.assertFalse(matcher.IsRel()(os.path.abspath(self.image))) + self.assertFalse(matcher.IsRel()(os.path.abspath(self.text))) + self.assertTrue(matcher.IsRel()(os.path.relpath(self.text, os.path.dirname(self.text)))) + self.assertTrue(matcher.IsRel()(os.path.basename(self.text))) + + def test_ismount(self): + self.assertFalse(matcher.IsMount()(self.image)) + self.assertFalse(matcher.IsMount()(self.text)) + self.assertFalse(matcher.IsMount()(self.missing)) + # there's no reasonable way to test a positive case + + def test_isempty(self): + self.assertTrue(matcher.IsEmpty()(self.empty)) + self.assertFalse(matcher.IsEmpty()(self.image)) + self.assertFalse(matcher.IsEmpty()(self.text)) + self.assertFalse(matcher.IsEmpty()(self.missing)) + + def test_isreadable(self): + self.assertTrue(matcher.IsReadable()(self.empty)) + self.assertTrue(matcher.IsReadable()(self.image)) + self.assertFalse(matcher.IsReadable()(self.missing)) + _, temp = tempfile.mkstemp(prefix='bsie-test-') + os.chmod(temp, 0) + self.assertFalse(matcher.IsReadable()(temp)) + os.unlink(temp) + + def test_iswritable(self): + self.assertTrue(matcher.IsWritable()(self.empty)) + self.assertTrue(matcher.IsWritable()(self.image)) + self.assertFalse(matcher.IsWritable()(self.missing)) + _, temp = tempfile.mkstemp(prefix='bsie-test-') + os.chmod(temp, 0) + self.assertFalse(matcher.IsWritable()(temp)) + os.unlink(temp) + + def test_isexecutable(self): + self.assertFalse(matcher.IsExecutable()(self.empty)) + self.assertFalse(matcher.IsExecutable()(self.image)) + self.assertFalse(matcher.IsExecutable()(self.missing)) + _, temp = tempfile.mkstemp(prefix='bsie-test-') + os.chmod(temp, stat.S_IEXEC) + self.assertTrue(matcher.IsExecutable()(temp)) + os.unlink(temp) + + def test_extension(self): + self.assertTrue(matcher.Extension('jpg')(self.image)) + self.assertTrue(matcher.Extension('jpg', 'png')(self.image)) + self.assertTrue(matcher.Extension('jpg', 't')(self.text)) + self.assertTrue(matcher.Extension('jpg', 'png', 't')(self.missing)) + self.assertTrue(matcher.Extension('')(self.empty)) + + self.assertFalse(matcher.Extension()(self.image)) + self.assertFalse(matcher.Extension('jpeg')(self.image)) + self.assertFalse(matcher.Extension('.t')(self.text)) + self.assertFalse(matcher.Extension('png', 't')(self.missing)) + self.assertFalse(matcher.Extension('tiff')(self.empty)) + + def test_mime(self): + self.assertTrue(matcher.Mime('image/jpeg')(self.image)) + self.assertTrue(matcher.Mime('image/tiff', 'image/jpeg')(self.image)) + self.assertTrue(matcher.Mime('text/plain', 'image/jpeg')(self.text)) + self.assertTrue(matcher.Mime('inode/x-empty')(self.empty)) + + self.assertFalse(matcher.Mime()(self.image)) + self.assertFalse(matcher.Mime('image')(self.image)) + self.assertFalse(matcher.Mime('image/tiff', 'image/png')(self.image)) + self.assertFalse(matcher.Mime('')(self.text)) + self.assertFalse(matcher.Mime('text')(self.text)) + self.assertFalse(matcher.Mime('tiff')(self.empty)) + self.assertFalse(matcher.Mime()(self.empty)) + self.assertFalse(matcher.Mime('')(self.empty)) + self.assertFalse(matcher.Mime()(self.missing)) + self.assertFalse(matcher.Mime('')(self.missing)) + self.assertFalse(matcher.Mime('inode/x-empty')(self.missing)) + + def test_not(self): + self.assertFalse(matcher.NOT(matcher.Mime('image/jpeg'))(self.image)) + self.assertTrue(matcher.NOT(matcher.Mime('text/plain'))(self.image)) + + def test_and(self): + self.assertTrue(matcher.And(matcher.Mime('image/jpeg'), matcher.Extension('jpg'))(self.image)) + self.assertTrue(matcher.And(matcher.Mime('image/jpeg'), matcher.Extension('jpg', 'tiff'))(self.image)) + self.assertTrue(matcher.And(matcher.Mime('text/plain'), matcher.Extension('t', 'tiff'))(self.text)) + + self.assertFalse(matcher.And(matcher.Mime('image/jpeg'), matcher.Extension('tiff'))(self.image)) + self.assertFalse(matcher.And(matcher.Mime('text/plain'), matcher.Extension('jpg'))(self.image)) + self.assertFalse(matcher.And(matcher.Mime('inode/x-empty'), matcher.Extension('jpg'))(self.missing)) + self.assertFalse(matcher.And(matcher.Mime('image/jpeg'), matcher.Extension('jpg', 't'))(self.text)) + + def test_or(self): + self.assertTrue(matcher.Or(matcher.Mime('image/jpeg'))(self.image)) + self.assertFalse(matcher.Or(matcher.Mime('text/plain'))(self.image)) + + self.assertTrue(matcher.Or(matcher.Mime('image/jpeg'), matcher.Extension('jpg'))(self.image)) + self.assertTrue(matcher.Or(matcher.Mime('image/jpeg'), matcher.Extension('t'))(self.image)) + self.assertTrue(matcher.Or(matcher.Mime('text/plain'), matcher.Extension('jpg', 'tiff'))(self.image)) + self.assertTrue(matcher.Or(matcher.Mime('text/plain'), matcher.Extension('tiff'))(self.text)) + self.assertTrue(matcher.Or(matcher.Mime('image/jpeg'), matcher.Extension('jpg'))(self.missing)) + + self.assertFalse(matcher.Or(matcher.Mime('text/plain'), matcher.Extension('tiff'))(self.image)) + self.assertFalse(matcher.Or(matcher.Mime('inode/x-empty'), matcher.Extension('jpg', 'tiff'))(self.text)) + + +## main ## + +if __name__ == '__main__': + unittest.main() + +## EOF ## diff --git a/test/utils/filematcher/test_parser.py b/test/utils/filematcher/test_parser.py new file mode 100644 index 0000000..a81d2ed --- /dev/null +++ b/test/utils/filematcher/test_parser.py @@ -0,0 +1,146 @@ +""" + +Part of the bsie test suite. +A copy of the license is provided with the project. +Author: Matthias Baumgartner, 2022 +""" +# imports +import unittest + +# inner-module imports +from bsie.base import errors +from bsie.utils.filematcher import matcher + +# objects to test +from bsie.utils.filematcher import parse + + +## code ## + +class TestFileMatcherParser(unittest.TestCase): + def test_empty(self): + # no criterion + self.assertEqual(parse(''), matcher.Any()) + + def test_ruleone(self): + # single criterion, single value + self.assertEqual(parse('mime=text'), matcher.Mime('text')) + self.assertEqual(parse('MIME=text'), matcher.Mime('text')) + self.assertEqual(parse('MiMe=text'), matcher.Mime('text')) + self.assertEqual(parse('MIME=TEXT'), matcher.Mime('TEXT')) + self.assertEqual(parse('mime={text}'), matcher.Mime('text')) + self.assertEqual(parse('mime=image/jpeg'), matcher.Mime('image/jpeg')) + self.assertEqual(parse('mime="image/jpeg"'), matcher.Mime('image/jpeg')) + self.assertEqual(parse('extension=pdf'), matcher.Extension('pdf')) + self.assertEqual(parse('extension={pdf}'), matcher.Extension('pdf')) + self.assertEqual(parse('extension="pdf"'), matcher.Extension('pdf')) + self.assertEqual(parse('extension="foo,bar"'), matcher.Extension('foo,bar')) + self.assertEqual(parse('extension="f{oo|ba}r"'), matcher.Extension('f{oo|ba}r')) + self.assertEqual(parse('extension=""'), matcher.Extension('')) + self.assertEqual(parse('extension="foo'), matcher.Extension('"foo')) + self.assertRaises(errors.ParserError, parse, 'extension=foo=bar') + self.assertRaises(errors.ParserError, parse, 'extension=') + self.assertRaises(errors.ParserError, parse, 'extension={}') + self.assertRaises(errors.ParserError, parse, 'extension={foo') + + # valueless + self.assertEqual(parse('any'), matcher.Any()) + self.assertEqual(parse('nothing'), matcher.Nothing()) + self.assertEqual(parse('exists'), matcher.Exists()) + self.assertEqual(parse('any, nothing'), matcher.And(matcher.Any(), matcher.Nothing())) + self.assertEqual(parse('any, nothing, exists'), + matcher.And(matcher.Any(), matcher.Nothing(), matcher.Exists())) + self.assertEqual(parse('any, extension=jpg'), matcher.And(matcher.Any(), matcher.Extension('jpg'))) + self.assertRaises(errors.ParserError, parse, 'mime') + self.assertRaises(errors.ParserError, parse, 'extension') + self.assertRaises(errors.ParserError, parse, 'exists=True') + self.assertRaises(errors.ParserError, parse, 'exists=foo') + self.assertEqual(parse('!any'), matcher.NOT(matcher.Any())) + self.assertEqual(parse('!any, nothing'), matcher.And(matcher.NOT(matcher.Any()), matcher.Nothing())) + self.assertEqual(parse('!any, extension=jpg'), + matcher.And(matcher.NOT(matcher.Any()), matcher.Extension('jpg'))) + self.assertRaises(errors.ParserError, parse, '!mime') + self.assertRaises(errors.ParserError, parse, '!extension') + + def test_rulefew(self): + # single criterion, multiple values + self.assertEqual(parse('extension={jpg, jpeg}'), matcher.Extension('jpg', 'jpeg')) + self.assertEqual(parse('mime={image/jpeg, image/png}'), + matcher.Mime('image/jpeg', 'image/png')) + self.assertRaises(errors.ParserError, parse, 'mime=image/png, image/jpeg') + self.assertRaises(errors.ParserError, parse, 'extension=jpg, jpeg') + + def test_rulesets_ruleone(self): + # mutliple criteria, single value + self.assertEqual(parse('mime=text, extension=t'), + matcher.And(matcher.Mime('text'), matcher.Extension('t'))) + self.assertEqual(parse('mime=text/plain, extension=t'), + matcher.And(matcher.Mime('text/plain'), matcher.Extension('t'))) + self.assertRaises(errors.ParserError, parse, 'mime=text/plain extension=t') + self.assertRaises(errors.ParserError, parse, 'mime={image/jpeg, extension=jpg'), + + def test_rulesets_rulefew(self): + # multiple criteria, multiple values + self.assertEqual(parse('mime=image/jpeg, extension={jpg, jpeg}'), + matcher.And(matcher.Mime('image/jpeg'), matcher.Extension('jpg', 'jpeg'))) + self.assertEqual(parse('mime={image/jpeg, image/tiff}, extension={jpg, jpeg}'), + matcher.And(matcher.Mime('image/jpeg', 'image/tiff'), matcher.Extension('jpg', 'jpeg'))) + self.assertEqual(parse('mime={image/jpeg, image/tiff}, extension=jpg'), + matcher.And(matcher.Mime('image/jpeg', 'image/tiff'), matcher.Extension('jpg'))) + self.assertRaises(errors.ParserError, parse, 'mime={image/jpeg, image/tiff, extension=jpg') + self.assertRaises(errors.ParserError, parse, 'mime=image/jpeg, image/tiff, extension=jpg') + self.assertRaises(errors.ParserError, parse, 'mime=image/jpeg, extension=jpg, ') + + def test_not(self): + self.assertEqual(parse('extension!=jpg'), matcher.NOT(matcher.Extension('jpg'))) + self.assertEqual(parse('extension!={jpg, jpeg}'), + matcher.NOT(matcher.Extension('jpg', 'jpeg'))) + self.assertEqual(parse('extension!=jpg, mime=image/jpeg'), + matcher.And(matcher.NOT(matcher.Extension('jpg')), matcher.Mime('image/jpeg'))) + self.assertEqual(parse('extension!=jpg, mime!=image/jpeg'), + matcher.And(matcher.NOT(matcher.Extension('jpg')), matcher.NOT(matcher.Mime('image/jpeg')))) + self.assertEqual(parse('extension!=jpg | mime=image/jpeg'), + matcher.Or(matcher.NOT(matcher.Extension('jpg')), matcher.Mime('image/jpeg'))) + self.assertEqual(parse('extension!=jpg | mime!=image/jpeg'), + matcher.Or(matcher.NOT(matcher.Extension('jpg')), matcher.NOT(matcher.Mime('image/jpeg')))) + + def test_expr(self): + # multiple rulesets + self.assertEqual(parse('mime=image/jpeg | extension=jpg'), + matcher.Or(matcher.Mime('image/jpeg'), matcher.Extension('jpg'))) + self.assertEqual(parse('mime=image/jpeg | extension={jpg, jpeg}'), + matcher.Or(matcher.Mime('image/jpeg'), matcher.Extension('jpg', 'jpeg'))) + self.assertEqual(parse('mime={image/jpeg, image/png} | extension={jpg, jpeg}'), + matcher.Or(matcher.Mime('image/jpeg', 'image/png'), matcher.Extension('jpg', 'jpeg'))) + self.assertEqual(parse('mime=image/jpeg , extension=jpg | extension=jpg'), + matcher.Or(matcher.And(matcher.Mime('image/jpeg'), matcher.Extension('jpg')), matcher.Extension('jpg'))) + self.assertEqual(parse( + 'mime={jpeg, text}, extension={jpg,t} | extension={png,txt}, mime={png, tiff}'), + matcher.Or( + matcher.And(matcher.Mime('jpeg', 'text'), matcher.Extension('jpg', 't')), + matcher.And(matcher.Extension('png', 'txt'), matcher.Mime('png', 'tiff')))) + self.assertEqual(parse('mime=text | extension=jpg | extension=png | mime=png'), + matcher.Or(matcher.Mime('text'), matcher.Extension('jpg'), matcher.Extension('png'), matcher.Mime('png'))) + self.assertRaises(errors.ParserError, parse, 'mime=text |') + self.assertRaises(errors.ParserError, parse, '| mime=text') + self.assertRaises(errors.ParserError, parse, 'extension=png | mime=text, ') + + def test_invalid(self): + # Invalid parses + self.assertRaises(errors.ParserError, parse, "extension=") # Empty value + self.assertRaises(errors.ParserError, parse, "mime=foo,bar") # Escaping + self.assertRaises(errors.ParserError, parse, "mime='foo,bar") # Quoting + self.assertRaises(errors.ParserError, parse, "mime=\"foo,bar") # Quoting + + # Invalid input + self.assertRaises(AttributeError, parse, None) + self.assertRaises(AttributeError, parse, 123) + self.assertRaises(AttributeError, parse, [123,321]) + + +## main ## + +if __name__ == '__main__': + unittest.main() + +## EOF ## diff --git a/test/utils/filematcher/testimage.jpg b/test/utils/filematcher/testimage.jpg new file mode 100644 index 0000000..ea7af63 Binary files /dev/null and b/test/utils/filematcher/testimage.jpg differ diff --git a/test/utils/filematcher/textfile.t b/test/utils/filematcher/textfile.t new file mode 100644 index 0000000..c389011 --- /dev/null +++ b/test/utils/filematcher/textfile.t @@ -0,0 +1,4 @@ +Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do eiusmod tempor incididunt ut labore et dolore magna aliqua. +Ut enim ad minim veniam, quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo consequat. +Duis aute irure dolor in reprehenderit in voluptate velit esse cillum dolore eu fugiat nulla pariatur. +Excepteur sint occaecat cupidatat non proident, sunt in culpa qui officia deserunt mollit anim id est laborum. -- cgit v1.2.3 From 266c2c9a072bf3289fd7f2d75278b7d59528378c Mon Sep 17 00:00:00 2001 From: Matthias Baumgartner Date: Sat, 24 Dec 2022 10:27:09 +0100 Subject: package restructuring: base * Reader and Extractor to respective reader/extractor modules * ReaderBuilder to reader module * ExtractorBuilder to extractor module * Loading module in utils (safe_load, unpack_name) * Pipeline and PipelineBuilder to lib module * errors to utils * documentation: "standard import" and "external import" --- bsie/apps/index.py | 16 +-- bsie/apps/info.py | 16 ++- bsie/base/__init__.py | 24 ---- bsie/base/errors.py | 45 ------- bsie/base/extractor.py | 103 -------------- bsie/base/reader.py | 47 ------- bsie/extractor/__init__.py | 11 +- bsie/extractor/base.py | 103 ++++++++++++++ bsie/extractor/builder.py | 77 +++++++++++ bsie/extractor/generic/constant.py | 10 +- bsie/extractor/generic/path.py | 8 +- bsie/extractor/generic/stat.py | 10 +- bsie/lib/__init__.py | 4 +- bsie/lib/bsie.py | 6 +- bsie/lib/builder.py | 85 ++++++++++++ bsie/lib/pipeline.py | 145 ++++++++++++++++++++ bsie/reader/__init__.py | 13 ++ bsie/reader/base.py | 47 +++++++ bsie/reader/builder.py | 74 ++++++++++ bsie/reader/path.py | 8 +- bsie/reader/stat.py | 9 +- bsie/tools/__init__.py | 20 --- bsie/tools/builder.py | 226 ------------------------------- bsie/tools/pipeline.py | 144 -------------------- bsie/utils/__init__.py | 9 +- bsie/utils/errors.py | 45 +++++++ bsie/utils/filematcher/parser.py | 6 +- bsie/utils/loading.py | 54 ++++++++ setup.py | 2 +- test/base/__init__.py | 0 test/base/test_extractor.py | 70 ---------- test/base/test_reader.py | 45 ------- test/extractor/generic/test_path.py | 6 +- test/extractor/generic/test_stat.py | 6 +- test/extractor/test_base.py | 70 ++++++++++ test/extractor/test_builder.py | 103 ++++++++++++++ test/lib/test_bsie.py | 24 ++-- test/lib/test_builder.py | 107 +++++++++++++++ test/lib/test_pipeline.py | 175 ++++++++++++++++++++++++ test/reader/test_base.py | 45 +++++++ test/reader/test_builder.py | 54 ++++++++ test/reader/test_stat.py | 4 +- test/tools/__init__.py | 0 test/tools/test_builder.py | 246 ---------------------------------- test/tools/test_pipeline.py | 176 ------------------------ test/tools/testfile.t | 1 - test/utils/filematcher/test_parser.py | 6 +- test/utils/test_loading.py | 48 +++++++ 48 files changed, 1337 insertions(+), 1216 deletions(-) delete mode 100644 bsie/base/__init__.py delete mode 100644 bsie/base/errors.py delete mode 100644 bsie/base/extractor.py delete mode 100644 bsie/base/reader.py create mode 100644 bsie/extractor/base.py create mode 100644 bsie/extractor/builder.py create mode 100644 bsie/lib/builder.py create mode 100644 bsie/lib/pipeline.py create mode 100644 bsie/reader/base.py create mode 100644 bsie/reader/builder.py delete mode 100644 bsie/tools/__init__.py delete mode 100644 bsie/tools/builder.py delete mode 100644 bsie/tools/pipeline.py create mode 100644 bsie/utils/errors.py create mode 100644 bsie/utils/loading.py delete mode 100644 test/base/__init__.py delete mode 100644 test/base/test_extractor.py delete mode 100644 test/base/test_reader.py create mode 100644 test/extractor/test_base.py create mode 100644 test/extractor/test_builder.py create mode 100644 test/lib/test_builder.py create mode 100644 test/lib/test_pipeline.py create mode 100644 test/reader/test_base.py create mode 100644 test/reader/test_builder.py delete mode 100644 test/tools/__init__.py delete mode 100644 test/tools/test_builder.py delete mode 100644 test/tools/test_pipeline.py delete mode 100644 test/tools/testfile.t create mode 100644 test/utils/test_loading.py diff --git a/bsie/apps/index.py b/bsie/apps/index.py index 1dbfdd8..0c6296f 100644 --- a/bsie/apps/index.py +++ b/bsie/apps/index.py @@ -4,16 +4,16 @@ Part of the bsie module. A copy of the license is provided with the project. Author: Matthias Baumgartner, 2022 """ -# imports +# standard imports import argparse import os import typing # bsie imports -from bsie.base import errors -from bsie.lib import BSIE -from bsie.tools import builder -from bsie.utils import bsfs +from bsie.extractor import ExtractorBuilder +from bsie.lib import BSIE, PipelineBuilder +from bsie.reader import ReaderBuilder +from bsie.utils import bsfs, errors # exports __all__: typing.Sequence[str] = ( @@ -44,9 +44,9 @@ def main(argv): # FIXME: Read reader/extractor configs from a config file # reader builder - rbuild = builder.ReaderBuilder({}) + rbuild = ReaderBuilder({}) # extractor builder - ebuild = builder.ExtractorBuilder([ + ebuild = ExtractorBuilder([ {'bsie.extractor.generic.path.Path': {}}, {'bsie.extractor.generic.stat.Stat': {}}, {'bsie.extractor.generic.constant.Constant': dict( @@ -60,7 +60,7 @@ def main(argv): )}, ]) # pipeline builder - pbuild = builder.PipelineBuilder( + pbuild = PipelineBuilder( bsfs.Namespace(args.user + ('/' if not args.user.endswith('/') else '')), rbuild, ebuild, diff --git a/bsie/apps/info.py b/bsie/apps/info.py index eaf1f71..a4e611c 100644 --- a/bsie/apps/info.py +++ b/bsie/apps/info.py @@ -4,15 +4,16 @@ Part of the bsie module. A copy of the license is provided with the project. Author: Matthias Baumgartner, 2022 """ -# imports +# standard imports import argparse import sys import typing # bsie imports -from bsie.base import errors -from bsie.tools import builder -from bsie.utils import bsfs +from bsie.extractor import ExtractorBuilder +from bsie.lib import PipelineBuilder +from bsie.reader import ReaderBuilder +from bsie.utils import bsfs, errors # exports __all__: typing.Sequence[str] = ( @@ -31,9 +32,10 @@ def main(argv): # FIXME: Read reader/extractor configs from a config file # reader builder - rbuild = builder.ReaderBuilder({}) + rbuild = ReaderBuilder({ + }) # extractor builder - ebuild = builder.ExtractorBuilder([ + ebuild = ExtractorBuilder([ {'bsie.extractor.generic.path.Path': {}}, {'bsie.extractor.generic.stat.Stat': {}}, {'bsie.extractor.generic.constant.Constant': dict( @@ -47,7 +49,7 @@ def main(argv): )}, ]) # pipeline builder - pbuild = builder.PipelineBuilder( + pbuild = PipelineBuilder( bsfs.Namespace('http://example.com/me/'), # not actually used rbuild, ebuild, diff --git a/bsie/base/__init__.py b/bsie/base/__init__.py deleted file mode 100644 index 0d362cd..0000000 --- a/bsie/base/__init__.py +++ /dev/null @@ -1,24 +0,0 @@ -"""The base module defines the BSIE interfaces. - -You'll mostly find abstract classes here. - -Part of the bsie module. -A copy of the license is provided with the project. -Author: Matthias Baumgartner, 2022 -""" -# imports -import typing - -# inner-module imports -from . import errors -from .extractor import Extractor -from .reader import Reader - -# exports -__all__: typing.Sequence[str] = ( - 'Extractor', - 'Reader', - 'errors', - ) - -## EOF ## diff --git a/bsie/base/errors.py b/bsie/base/errors.py deleted file mode 100644 index 5fafd5b..0000000 --- a/bsie/base/errors.py +++ /dev/null @@ -1,45 +0,0 @@ -"""Common BSIE exceptions. - -Part of the bsie module. -A copy of the license is provided with the project. -Author: Matthias Baumgartner, 2022 -""" -# imports -import typing - -# exports -__all__: typing.Sequence[str] = ( - 'BuilderError', - 'ExtractorError', - 'LoaderError', - 'ReaderError', - ) - - -## code ## - -class _BSIEError(Exception): - """Generic BSIE error.""" - -class BuilderError(_BSIEError): - """The Builder failed to create an instance.""" - -class LoaderError(BuilderError): - """Failed to load a module or class.""" - -class ExtractorError(_BSIEError): - """The Extractor failed to process the given content.""" - -class ReaderError(_BSIEError): - """The Reader failed to read the given file.""" - -class ProgrammingError(_BSIEError): - """An assertion-like error that indicates a code-base issue.""" - -class UnreachableError(ProgrammingError): - """Bravo, you've reached a point in code that should logically not be reachable.""" - -class ParserError(_BSIEError): - """Failed to parse due to invalid syntax or structures.""" - -## EOF ## diff --git a/bsie/base/extractor.py b/bsie/base/extractor.py deleted file mode 100644 index c44021b..0000000 --- a/bsie/base/extractor.py +++ /dev/null @@ -1,103 +0,0 @@ -"""The Extractor classes transform content into triples. - -Part of the bsie module. -A copy of the license is provided with the project. -Author: Matthias Baumgartner, 2022 -""" -# imports -import abc -import typing - -# bsie imports -from bsie.utils import bsfs, node, ns - -# exports -__all__: typing.Sequence[str] = ( - 'Extractor', - ) - -# constants - -# essential definitions typically used in extractor schemas. -# NOTE: This preamble is only for convenience; Each Extractor must implement its use, if so desired. -SCHEMA_PREAMBLE = ''' - # common external prefixes - prefix rdf: - prefix rdfs: - prefix xsd: - prefix schema: - - # common bsfs prefixes - prefix bsfs: - prefix bse: - - # essential nodes - bsfs:Entity rdfs:subClassOf bsfs:Node . - bsfs:File rdfs:subClassOf bsfs:Entity . - - # common definitions - xsd:string rdfs:subClassOf bsfs:Literal . - xsd:integer rdfs:subClassOf bsfs:Literal . - - ''' - - -## code ## - -class Extractor(abc.ABC): - """Produce (subject, predicate, value)-triples from some content. - The Extractor produces princpal predicates that provide information - about the content itself (i.e., triples that include the subject), - and may also generate triples with auxiliary predicates if the - extracted value is a node itself. - """ - - # what type of content is expected (i.e. reader subclass). - CONTENT_READER: typing.Optional[str] = None - - # extractor schema. - _schema: bsfs.schema.Schema - - def __init__(self, schema: bsfs.schema.Schema): - self._schema = schema - - def __str__(self) -> str: - return bsfs.typename(self) - - def __repr__(self) -> str: - return f'{bsfs.typename(self)}()' - - def __eq__(self, other: typing.Any) -> bool: - return isinstance(other, type(self)) \ - and self.CONTENT_READER == other.CONTENT_READER \ - and self.schema == other.schema - - def __hash__(self) -> int: - return hash((type(self), self.CONTENT_READER, self.schema)) - - @property - def schema(self) -> bsfs.schema.Schema: - """Return the extractor's schema.""" - return self._schema - - @property - def principals(self) -> typing.Iterator[bsfs.schema.Predicate]: - """Return the principal predicates, i.e., relations from/to the extraction subject.""" - ent = self.schema.node(ns.bsfs.Entity) - return ( - pred - for pred - in self.schema.predicates() - if pred.domain <= ent or (pred.range is not None and pred.range <= ent) - ) - - @abc.abstractmethod - def extract( - self, - subject: node.Node, - content: typing.Any, - principals: typing.Iterable[bsfs.schema.Predicate], - ) -> typing.Iterator[typing.Tuple[node.Node, bsfs.schema.Predicate, typing.Any]]: - """Return (node, predicate, value) triples.""" - -## EOF ## diff --git a/bsie/base/reader.py b/bsie/base/reader.py deleted file mode 100644 index cbabd36..0000000 --- a/bsie/base/reader.py +++ /dev/null @@ -1,47 +0,0 @@ -"""The Reader classes return high-level content structures from files. - -The Reader fulfills two purposes: - First, it brokers between multiple libraries and file formats. - Second, it separates multiple aspects of a file into distinct content types. - -Part of the bsie module. -A copy of the license is provided with the project. -Author: Matthias Baumgartner, 2022 -""" -# imports -import abc -import typing - -# bsie imports -from bsie.utils import bsfs - -# exports -__all__: typing.Sequence[str] = ( - 'Reader', - ) - - -## code ## - -class Reader(abc.ABC): - """Read and return some content from a file.""" - - def __str__(self) -> str: - return bsfs.typename(self) - - def __repr__(self) -> str: - return f'{bsfs.typename(self)}()' - - def __eq__(self, other: typing.Any) -> bool: - return isinstance(other, type(self)) - - def __hash__(self) -> int: - return hash(type(self)) - - @abc.abstractmethod - def __call__(self, path: bsfs.URI) -> typing.Any: - """Return some content of the file at *path*. - Raises a `ReaderError` if the reader cannot make sense of the file format. - """ - -## EOF ## diff --git a/bsie/extractor/__init__.py b/bsie/extractor/__init__.py index ef31343..5f385ee 100644 --- a/bsie/extractor/__init__.py +++ b/bsie/extractor/__init__.py @@ -6,10 +6,17 @@ Part of the bsie module. A copy of the license is provided with the project. Author: Matthias Baumgartner, 2022 """ -# imports +# standard imports import typing +# inner-module imports +from .base import Extractor +from .builder import ExtractorBuilder + # exports -__all__: typing.Sequence[str] = [] +__all__: typing.Sequence[str] = ( + 'Extractor', + 'ExtractorBuilder', + ) ## EOF ## diff --git a/bsie/extractor/base.py b/bsie/extractor/base.py new file mode 100644 index 0000000..c44021b --- /dev/null +++ b/bsie/extractor/base.py @@ -0,0 +1,103 @@ +"""The Extractor classes transform content into triples. + +Part of the bsie module. +A copy of the license is provided with the project. +Author: Matthias Baumgartner, 2022 +""" +# imports +import abc +import typing + +# bsie imports +from bsie.utils import bsfs, node, ns + +# exports +__all__: typing.Sequence[str] = ( + 'Extractor', + ) + +# constants + +# essential definitions typically used in extractor schemas. +# NOTE: This preamble is only for convenience; Each Extractor must implement its use, if so desired. +SCHEMA_PREAMBLE = ''' + # common external prefixes + prefix rdf: + prefix rdfs: + prefix xsd: + prefix schema: + + # common bsfs prefixes + prefix bsfs: + prefix bse: + + # essential nodes + bsfs:Entity rdfs:subClassOf bsfs:Node . + bsfs:File rdfs:subClassOf bsfs:Entity . + + # common definitions + xsd:string rdfs:subClassOf bsfs:Literal . + xsd:integer rdfs:subClassOf bsfs:Literal . + + ''' + + +## code ## + +class Extractor(abc.ABC): + """Produce (subject, predicate, value)-triples from some content. + The Extractor produces princpal predicates that provide information + about the content itself (i.e., triples that include the subject), + and may also generate triples with auxiliary predicates if the + extracted value is a node itself. + """ + + # what type of content is expected (i.e. reader subclass). + CONTENT_READER: typing.Optional[str] = None + + # extractor schema. + _schema: bsfs.schema.Schema + + def __init__(self, schema: bsfs.schema.Schema): + self._schema = schema + + def __str__(self) -> str: + return bsfs.typename(self) + + def __repr__(self) -> str: + return f'{bsfs.typename(self)}()' + + def __eq__(self, other: typing.Any) -> bool: + return isinstance(other, type(self)) \ + and self.CONTENT_READER == other.CONTENT_READER \ + and self.schema == other.schema + + def __hash__(self) -> int: + return hash((type(self), self.CONTENT_READER, self.schema)) + + @property + def schema(self) -> bsfs.schema.Schema: + """Return the extractor's schema.""" + return self._schema + + @property + def principals(self) -> typing.Iterator[bsfs.schema.Predicate]: + """Return the principal predicates, i.e., relations from/to the extraction subject.""" + ent = self.schema.node(ns.bsfs.Entity) + return ( + pred + for pred + in self.schema.predicates() + if pred.domain <= ent or (pred.range is not None and pred.range <= ent) + ) + + @abc.abstractmethod + def extract( + self, + subject: node.Node, + content: typing.Any, + principals: typing.Iterable[bsfs.schema.Predicate], + ) -> typing.Iterator[typing.Tuple[node.Node, bsfs.schema.Predicate, typing.Any]]: + """Return (node, predicate, value) triples.""" + +## EOF ## diff --git a/bsie/extractor/builder.py b/bsie/extractor/builder.py new file mode 100644 index 0000000..0fd3685 --- /dev/null +++ b/bsie/extractor/builder.py @@ -0,0 +1,77 @@ +""" + +Part of the bsie module. +A copy of the license is provided with the project. +Author: Matthias Baumgartner, 2022 +""" +# standard imports +import typing + +# bsie imports +from bsie.utils import bsfs, errors, safe_load, unpack_qualified_name + +# inner-module imports +from . import base + +# exports +__all__: typing.Sequence[str] = ( + 'ExtractorBuilder', + ) + + +## code ## + +class ExtractorBuilder(): + """Build `bsie.base.Extractor instances. + + It is permissible to build multiple instances of the same extractor + (typically with different arguments), hence the ExtractorBuilder + receives a list of build specifications. Each specification is + a dict with a single key (extractor's qualified name) and a dict + to be used as keyword arguments. + Example: [{'bsie.extractor.generic.path.Path': {}}, ] + + """ + + # build specifications + _specs: typing.List[typing.Dict[str, typing.Dict[str, typing.Any]]] + + def __init__(self, specs: typing.List[typing.Dict[str, typing.Dict[str, typing.Any]]]): + self._specs = specs + + def __iter__(self) -> typing.Iterator[int]: + """Iterate over extractor specifications.""" + return iter(range(len(self._specs))) + + def build(self, index: int) -> base.Extractor: + """Return an instance of the n'th extractor (n=*index*).""" + # get build instructions + specs = self._specs[index] + + # check specs structure. expecting[{name: {kwargs}}] + if not isinstance(specs, dict): + raise TypeError(f'expected a dict, found {bsfs.typename(specs)}') + if len(specs) != 1: + raise TypeError(f'expected a dict of length one, found {len(specs)}') + + # get name and args from specs + name = next(iter(specs.keys())) + kwargs = specs[name] + + # check kwargs structure + if not isinstance(kwargs, dict): + raise TypeError(f'expected a dict, found {bsfs.typename(kwargs)}') + + # check name and get module/class components + module_name, class_name = unpack_qualified_name(name) + + # import extractor class + cls = safe_load(module_name, class_name) + + try: # build and return instance + return cls(**kwargs) + + except Exception as err: + raise errors.BuilderError(f'failed to build extractor {name} due to {bsfs.typename(err)}: {err}') from err + +## EOF ## diff --git a/bsie/extractor/generic/constant.py b/bsie/extractor/generic/constant.py index 11384e6..7b1d942 100644 --- a/bsie/extractor/generic/constant.py +++ b/bsie/extractor/generic/constant.py @@ -4,13 +4,15 @@ Part of the bsie module. A copy of the license is provided with the project. Author: Matthias Baumgartner, 2022 """ -# imports +# standard imports import typing # bsie imports -from bsie.base import extractor from bsie.utils import bsfs, node +# inner-module imports +from .. import base + # exports __all__: typing.Sequence[str] = ( 'Constant', @@ -19,7 +21,7 @@ __all__: typing.Sequence[str] = ( ## code ## -class Constant(extractor.Extractor): +class Constant(base.Extractor): """Extract information from file's path.""" CONTENT_READER = None @@ -32,7 +34,7 @@ class Constant(extractor.Extractor): schema: str, tuples: typing.Iterable[typing.Tuple[bsfs.URI, typing.Any]], ): - super().__init__(bsfs.schema.Schema.from_string(extractor.SCHEMA_PREAMBLE + schema)) + super().__init__(bsfs.schema.Schema.from_string(base.SCHEMA_PREAMBLE + schema)) # NOTE: Raises a KeyError if the predicate is not part of the schema self._tuples = tuple((self.schema.predicate(p_uri), value) for p_uri, value in tuples) # TODO: use schema instance for value checking diff --git a/bsie/extractor/generic/path.py b/bsie/extractor/generic/path.py index 7018e12..295715f 100644 --- a/bsie/extractor/generic/path.py +++ b/bsie/extractor/generic/path.py @@ -4,12 +4,12 @@ Part of the bsie module. A copy of the license is provided with the project. Author: Matthias Baumgartner, 2022 """ -# imports +# standard imports import os import typing # bsie imports -from bsie.base import extractor +from bsie.extractor import base from bsie.utils import bsfs, node, ns # exports @@ -20,7 +20,7 @@ __all__: typing.Sequence[str] = ( ## code ## -class Path(extractor.Extractor): +class Path(base.Extractor): """Extract information from file's path.""" CONTENT_READER = 'bsie.reader.path.Path' @@ -29,7 +29,7 @@ class Path(extractor.Extractor): _callmap: typing.Dict[bsfs.schema.Predicate, typing.Callable[[str], typing.Any]] def __init__(self): - super().__init__(bsfs.schema.Schema.from_string(extractor.SCHEMA_PREAMBLE + ''' + super().__init__(bsfs.schema.Schema.from_string(base.SCHEMA_PREAMBLE + ''' bse:filename rdfs:subClassOf bsfs:Predicate ; rdfs:domain bsfs:File ; rdfs:range xsd:string ; diff --git a/bsie/extractor/generic/stat.py b/bsie/extractor/generic/stat.py index 0b9ce29..1381fe2 100644 --- a/bsie/extractor/generic/stat.py +++ b/bsie/extractor/generic/stat.py @@ -4,14 +4,16 @@ Part of the bsie module. A copy of the license is provided with the project. Author: Matthias Baumgartner, 2022 """ -# imports +# standard imports import os import typing # bsie imports -from bsie.base import extractor from bsie.utils import bsfs, node, ns +# inner-module imports +from .. import base + # exports __all__: typing.Sequence[str] = ( 'Stat', @@ -20,7 +22,7 @@ __all__: typing.Sequence[str] = ( ## code ## -class Stat(extractor.Extractor): +class Stat(base.Extractor): """Extract information from the file system.""" CONTENT_READER = 'bsie.reader.stat.Stat' @@ -29,7 +31,7 @@ class Stat(extractor.Extractor): _callmap: typing.Dict[bsfs.schema.Predicate, typing.Callable[[os.stat_result], typing.Any]] def __init__(self): - super().__init__(bsfs.schema.Schema.from_string(extractor.SCHEMA_PREAMBLE + ''' + super().__init__(bsfs.schema.Schema.from_string(base.SCHEMA_PREAMBLE + ''' bse:filesize rdfs:subClassOf bsfs:Predicate ; rdfs:domain bsfs:File ; rdfs:range xsd:integer ; diff --git a/bsie/lib/__init__.py b/bsie/lib/__init__.py index 578c2c4..4239d3b 100644 --- a/bsie/lib/__init__.py +++ b/bsie/lib/__init__.py @@ -4,15 +4,17 @@ Part of the bsie module. A copy of the license is provided with the project. Author: Matthias Baumgartner, 2022 """ -# imports +# standard imports import typing # inner-module imports from .bsie import BSIE +from .builder import PipelineBuilder # exports __all__: typing.Sequence[str] = ( 'BSIE', + 'PipelineBuilder', ) ## EOF ## diff --git a/bsie/lib/bsie.py b/bsie/lib/bsie.py index e087fa9..668783d 100644 --- a/bsie/lib/bsie.py +++ b/bsie/lib/bsie.py @@ -4,13 +4,15 @@ Part of the bsie module. A copy of the license is provided with the project. Author: Matthias Baumgartner, 2022 """ -# imports +# standard imports import typing # bsie imports -from bsie.tools import Pipeline from bsie.utils import bsfs, node, ns +# inner-module imports +from .pipeline import Pipeline + # exports __all__: typing.Sequence[str] = ( 'BSIE', diff --git a/bsie/lib/builder.py b/bsie/lib/builder.py new file mode 100644 index 0000000..c2abffe --- /dev/null +++ b/bsie/lib/builder.py @@ -0,0 +1,85 @@ +""" + +Part of the bsie module. +A copy of the license is provided with the project. +Author: Matthias Baumgartner, 2022 +""" +# standard imports +import logging +import typing + +# bsie imports +from bsie.extractor import ExtractorBuilder +from bsie.reader import ReaderBuilder +from bsie.utils import bsfs, errors + +# inner-module imports +from . import pipeline + +# exports +__all__: typing.Sequence[str] = ( + 'PipelineBuilder', + ) + + +## code ## + +logger = logging.getLogger(__name__) + +class PipelineBuilder(): + """Build `bsie.tools.pipeline.Pipeline` instances.""" + + # Prefix to be used in the Pipeline. + prefix: bsfs.Namespace + + # builder for Readers. + rbuild: ReaderBuilder + + # builder for Extractors. + ebuild: ExtractorBuilder + + def __init__( + self, + prefix: bsfs.Namespace, + reader_builder: ReaderBuilder, + extractor_builder: ExtractorBuilder, + ): + self.prefix = prefix + self.rbuild = reader_builder + self.ebuild = extractor_builder + + def build(self) -> pipeline.Pipeline: + """Return a Pipeline instance.""" + ext2rdr = {} + + for eidx in self.ebuild: + # build extractor + try: + ext = self.ebuild.build(eidx) + + except errors.LoaderError as err: # failed to load extractor; skip + logger.error('failed to load extractor: %s', err) + continue + + except errors.BuilderError as err: # failed to build instance; skip + logger.error(str(err)) + continue + + try: + # get reader required by extractor + if ext.CONTENT_READER is not None: + rdr = self.rbuild.build(ext.CONTENT_READER) + else: + rdr = None + # store extractor + ext2rdr[ext] = rdr + + except errors.LoaderError as err: # failed to load reader + logger.error('failed to load reader: %s', err) + + except errors.BuilderError as err: # failed to build reader + logger.error(str(err)) + + return pipeline.Pipeline(self.prefix, ext2rdr) + +## EOF ## diff --git a/bsie/lib/pipeline.py b/bsie/lib/pipeline.py new file mode 100644 index 0000000..e5ce1b7 --- /dev/null +++ b/bsie/lib/pipeline.py @@ -0,0 +1,145 @@ +""" + +Part of the bsie module. +A copy of the license is provided with the project. +Author: Matthias Baumgartner, 2022 +""" +# standard imports +from collections import defaultdict +import logging +import typing + +# bsie imports +from bsie.extractor import Extractor +from bsie.reader import Reader +from bsie.utils import bsfs, errors, node, ns + +# exports +__all__: typing.Sequence[str] = ( + 'Pipeline', + ) + +# constants +FILE_PREFIX = 'file#' + +## code ## + +logger = logging.getLogger(__name__) + +class Pipeline(): + """Extraction pipeline to generate triples from files. + + The Pipeline binds readers and extractors, and performs + the necessary operations to produce triples from a file. + It takes a best-effort approach to extract as many triples + as possible. Errors during the extraction are passed over + and reported to the log. + + """ + + # combined extractor schemas. + _schema: bsfs.schema.Schema + + # node prefix. + _prefix: bsfs.Namespace + + # extractor -> reader mapping + _ext2rdr: typing.Dict[Extractor, typing.Optional[Reader]] + + def __init__( + self, + prefix: bsfs.Namespace, + ext2rdr: typing.Dict[Extractor, typing.Optional[Reader]] + ): + # store core members + self._prefix = prefix + FILE_PREFIX + self._ext2rdr = ext2rdr + # compile schema from all extractors + self._schema = bsfs.schema.Schema.Union(ext.schema for ext in ext2rdr) + + def __str__(self) -> str: + return bsfs.typename(self) + + def __repr__(self) -> str: + return f'{bsfs.typename(self)}(...)' + + def __hash__(self) -> int: + return hash((type(self), self._prefix, self._schema, tuple(self._ext2rdr), tuple(self._ext2rdr.values()))) + + def __eq__(self, other: typing.Any) -> bool: + return isinstance(other, type(self)) \ + and self._schema == other._schema \ + and self._prefix == other._prefix \ + and self._ext2rdr == other._ext2rdr + + @property + def schema(self) -> bsfs.schema.Schema: + """Return the pipeline's schema (combined from all extractors).""" + return self._schema + + @property + def principals(self) -> typing.Iterator[bsfs.schema.Predicate]: + """Return the principal predicates that can be extracted.""" + return iter({pred for ext in self._ext2rdr for pred in ext.principals}) + + def subschema(self, principals: typing.Iterable[bsfs.schema.Predicate]) -> bsfs.schema.Schema: + """Return the subset of the schema that supports the given *principals*.""" + # materialize principals + principals = set(principals) + # collect and combine schemas from extractors + return bsfs.schema.Schema.Union({ + ext.schema + for ext + in self._ext2rdr + if not set(ext.principals).isdisjoint(principals) + }) + + def __call__( + self, + path: bsfs.URI, + principals: typing.Optional[typing.Iterable[bsfs.schema.Predicate]] = None, + ) -> typing.Iterator[typing.Tuple[node.Node, bsfs.schema.Predicate, typing.Any]]: + """Extract triples from the file at *path*. Optionally, limit triples to *principals*.""" + # get principals + principals = set(principals) if principals is not None else set(self.schema.predicates()) + + # get extractors + extractors = {ext for ext in self._ext2rdr if not set(ext.principals).isdisjoint(principals)} + + # corner-case short-cut + if len(extractors) == 0: + return + + # get readers -> extractors mapping + rdr2ext = defaultdict(set) + for ext in extractors: + rdr = self._ext2rdr[ext] + rdr2ext[rdr].add(ext) + + # create subject for file + uuid = bsfs.uuid.UCID.from_path(path) + subject = node.Node(ns.bsfs.File, self._prefix[uuid]) + + # extract information + for rdr, extrs in rdr2ext.items(): + try: + # get content + content = rdr(path) if rdr is not None else None + + # apply extractors on this content + for ext in extrs: + try: + # get predicate/value tuples + for subject, pred, value in ext.extract(subject, content, principals): + yield subject, pred, value + + except errors.ExtractorError as err: + # critical extractor failure. + logger.error('%s failed to extract triples from content: %s', ext, err) + + except errors.ReaderError as err: + # failed to read any content. skip. + logger.error('%s failed to read content: %s', rdr, err) + + +## EOF ## diff --git a/bsie/reader/__init__.py b/bsie/reader/__init__.py index a45f22b..4163d1c 100644 --- a/bsie/reader/__init__.py +++ b/bsie/reader/__init__.py @@ -15,5 +15,18 @@ Part of the bsie module. A copy of the license is provided with the project. Author: Matthias Baumgartner, 2022 """ +# standard imports +import typing +# inner-module imports +from .base import Reader +from .builder import ReaderBuilder + +# exports +__all__: typing.Sequence[str] = ( + 'Reader', + 'ReaderBuilder', + ) + +## EOF ## ## EOF ## diff --git a/bsie/reader/base.py b/bsie/reader/base.py new file mode 100644 index 0000000..cbabd36 --- /dev/null +++ b/bsie/reader/base.py @@ -0,0 +1,47 @@ +"""The Reader classes return high-level content structures from files. + +The Reader fulfills two purposes: + First, it brokers between multiple libraries and file formats. + Second, it separates multiple aspects of a file into distinct content types. + +Part of the bsie module. +A copy of the license is provided with the project. +Author: Matthias Baumgartner, 2022 +""" +# imports +import abc +import typing + +# bsie imports +from bsie.utils import bsfs + +# exports +__all__: typing.Sequence[str] = ( + 'Reader', + ) + + +## code ## + +class Reader(abc.ABC): + """Read and return some content from a file.""" + + def __str__(self) -> str: + return bsfs.typename(self) + + def __repr__(self) -> str: + return f'{bsfs.typename(self)}()' + + def __eq__(self, other: typing.Any) -> bool: + return isinstance(other, type(self)) + + def __hash__(self) -> int: + return hash(type(self)) + + @abc.abstractmethod + def __call__(self, path: bsfs.URI) -> typing.Any: + """Return some content of the file at *path*. + Raises a `ReaderError` if the reader cannot make sense of the file format. + """ + +## EOF ## diff --git a/bsie/reader/builder.py b/bsie/reader/builder.py new file mode 100644 index 0000000..bce5397 --- /dev/null +++ b/bsie/reader/builder.py @@ -0,0 +1,74 @@ +""" + +Part of the bsie module. +A copy of the license is provided with the project. +Author: Matthias Baumgartner, 2022 +""" +# standard imports +import typing + +# bsie imports +from bsie.utils import bsfs, errors, safe_load, unpack_qualified_name + +# inner-module imports +from . import base + +# exports +__all__: typing.Sequence[str] = ( + 'ReaderBuilder', + ) + + +## code ## + +class ReaderBuilder(): + """Build `bsie.base.Reader` instances. + + Readers are defined via their qualified class name + (e.g., bsie.reader.path.Path) and optional keyword + arguments that are passed to the constructor via + the *kwargs* argument (name as key, kwargs as value). + The ReaderBuilder keeps a cache of previously built + reader instances, as they are anyway built with + identical keyword arguments. + + """ + + # keyword arguments + _kwargs: typing.Dict[str, typing.Dict[str, typing.Any]] + + # cached readers + _cache: typing.Dict[str, base.Reader] + + def __init__(self, kwargs: typing.Dict[str, typing.Dict[str, typing.Any]]): + self._kwargs = kwargs + self._cache = {} + + def build(self, name: str) -> base.Reader: + """Return an instance for the qualified class name.""" + # return cached instance + if name in self._cache: + return self._cache[name] + + # check name and get module/class components + module_name, class_name = unpack_qualified_name(name) + + # import reader class + cls = safe_load(module_name, class_name) + + # get kwargs + kwargs = self._kwargs.get(name, {}) + if not isinstance(kwargs, dict): + raise TypeError(f'expected a kwargs dict, found {bsfs.typename(kwargs)}') + + try: # build, cache, and return instance + obj = cls(**kwargs) + # cache instance + self._cache[name] = obj + # return instance + return obj + + except Exception as err: + raise errors.BuilderError(f'failed to build reader {name} due to {bsfs.typename(err)}: {err}') from err + +## EOF ## diff --git a/bsie/reader/path.py b/bsie/reader/path.py index d60f187..1ca05a0 100644 --- a/bsie/reader/path.py +++ b/bsie/reader/path.py @@ -4,11 +4,11 @@ Part of the bsie module. A copy of the license is provided with the project. Author: Matthias Baumgartner, 2022 """ -# imports +# standard imports import typing -# bsie imports -from bsie.base import reader +# inner-module imports +from . import base # exports __all__: typing.Sequence[str] = ( @@ -18,7 +18,7 @@ __all__: typing.Sequence[str] = ( ## code ## -class Path(reader.Reader): +class Path(base.Reader): """Return the path.""" def __call__(self, path: str) -> str: diff --git a/bsie/reader/stat.py b/bsie/reader/stat.py index fc5fb24..706dc47 100644 --- a/bsie/reader/stat.py +++ b/bsie/reader/stat.py @@ -4,12 +4,15 @@ Part of the bsie module. A copy of the license is provided with the project. Author: Matthias Baumgartner, 2022 """ -# imports +# standard imports import os import typing # bsie imports -from bsie.base import errors, reader +from bsie.utils import errors + +# inner-module imports +from . import base # exports __all__: typing.Sequence[str] = ( @@ -19,7 +22,7 @@ __all__: typing.Sequence[str] = ( ## code ## -class Stat(reader.Reader): +class Stat(base.Reader): """Read and return the filesystem's stat infos.""" def __call__(self, path: str) -> os.stat_result: diff --git a/bsie/tools/__init__.py b/bsie/tools/__init__.py deleted file mode 100644 index 803c321..0000000 --- a/bsie/tools/__init__.py +++ /dev/null @@ -1,20 +0,0 @@ -""" - -Part of the bsie module. -A copy of the license is provided with the project. -Author: Matthias Baumgartner, 2022 -""" -# imports -import typing - -# inner-module imports -from . import builder -from .pipeline import Pipeline - -# exports -__all__: typing.Sequence[str] = ( - 'builder', - 'Pipeline', - ) - -## EOF ## diff --git a/bsie/tools/builder.py b/bsie/tools/builder.py deleted file mode 100644 index 190d9bf..0000000 --- a/bsie/tools/builder.py +++ /dev/null @@ -1,226 +0,0 @@ -""" - -Part of the bsie module. -A copy of the license is provided with the project. -Author: Matthias Baumgartner, 2022 -""" -# imports -import importlib -import logging -import typing - -# bsie imports -from bsie import base -from bsie.base import errors -from bsie.utils import bsfs - -# inner-module imports -from . import pipeline - -# exports -__all__: typing.Sequence[str] = ( - 'ExtractorBuilder', - 'PipelineBuilder', - 'ReaderBuilder', - ) - - -## code ## - -logger = logging.getLogger(__name__) - -def _safe_load(module_name: str, class_name: str): - """Get a class from a module. Raise BuilderError if anything goes wrong.""" - try: - # load the module - module = importlib.import_module(module_name) - except Exception as err: - # cannot import module - raise errors.LoaderError(f'cannot load module {module_name}') from err - - try: - # get the class from the module - cls = getattr(module, class_name) - except Exception as err: - # cannot find the class - raise errors.LoaderError(f'cannot load class {class_name} from module {module_name}') from err - - return cls - - -def _unpack_name(name): - """Split a name into its module and class component (dot-separated).""" - if not isinstance(name, str): - raise TypeError(name) - if '.' not in name: - raise ValueError('name must be a qualified class name.') - module_name, class_name = name[:name.rfind('.')], name[name.rfind('.')+1:] - if module_name == '': - raise ValueError('name must be a qualified class name.') - return module_name, class_name - - -class ReaderBuilder(): - """Build `bsie.base.Reader` instances. - - Readers are defined via their qualified class name - (e.g., bsie.reader.path.Path) and optional keyword - arguments that are passed to the constructor via - the *kwargs* argument (name as key, kwargs as value). - The ReaderBuilder keeps a cache of previously built - reader instances, as they are anyway built with - identical keyword arguments. - - """ - - # keyword arguments - _kwargs: typing.Dict[str, typing.Dict[str, typing.Any]] - - # cached readers - _cache: typing.Dict[str, base.Reader] - - def __init__(self, kwargs: typing.Dict[str, typing.Dict[str, typing.Any]]): - self._kwargs = kwargs - self._cache = {} - - def build(self, name: str) -> base.Reader: - """Return an instance for the qualified class name.""" - # return cached instance - if name in self._cache: - return self._cache[name] - - # check name and get module/class components - module_name, class_name = _unpack_name(name) - - # import reader class - cls = _safe_load(module_name, class_name) - - # get kwargs - kwargs = self._kwargs.get(name, {}) - if not isinstance(kwargs, dict): - raise TypeError(f'expected a kwargs dict, found {bsfs.typename(kwargs)}') - - try: # build, cache, and return instance - obj = cls(**kwargs) - # cache instance - self._cache[name] = obj - # return instance - return obj - - except Exception as err: - raise errors.BuilderError(f'failed to build reader {name} due to {bsfs.typename(err)}: {err}') from err - - -class ExtractorBuilder(): - """Build `bsie.base.Extractor instances. - - It is permissible to build multiple instances of the same extractor - (typically with different arguments), hence the ExtractorBuilder - receives a list of build specifications. Each specification is - a dict with a single key (extractor's qualified name) and a dict - to be used as keyword arguments. - Example: [{'bsie.extractor.generic.path.Path': {}}, ] - - """ - - # build specifications - _specs: typing.List[typing.Dict[str, typing.Dict[str, typing.Any]]] - - def __init__(self, specs: typing.List[typing.Dict[str, typing.Dict[str, typing.Any]]]): - self._specs = specs - - def __iter__(self) -> typing.Iterator[int]: - """Iterate over extractor specifications.""" - return iter(range(len(self._specs))) - - def build(self, index: int) -> base.Extractor: - """Return an instance of the n'th extractor (n=*index*).""" - # get build instructions - specs = self._specs[index] - - # check specs structure. expecting[{name: {kwargs}}] - if not isinstance(specs, dict): - raise TypeError(f'expected a dict, found {bsfs.typename(specs)}') - if len(specs) != 1: - raise TypeError(f'expected a dict of length one, found {len(specs)}') - - # get name and args from specs - name = next(iter(specs.keys())) - kwargs = specs[name] - - # check kwargs structure - if not isinstance(kwargs, dict): - raise TypeError(f'expected a dict, found {bsfs.typename(kwargs)}') - - # check name and get module/class components - module_name, class_name = _unpack_name(name) - - # import extractor class - cls = _safe_load(module_name, class_name) - - try: # build and return instance - return cls(**kwargs) - - except Exception as err: - raise errors.BuilderError(f'failed to build extractor {name} due to {bsfs.typename(err)}: {err}') from err - - -class PipelineBuilder(): - """Build `bsie.tools.pipeline.Pipeline` instances.""" - - # Prefix to be used in the Pipeline. - prefix: bsfs.Namespace - - # builder for Readers. - rbuild: ReaderBuilder - - # builder for Extractors. - ebuild: ExtractorBuilder - - def __init__( - self, - prefix: bsfs.Namespace, - reader_builder: ReaderBuilder, - extractor_builder: ExtractorBuilder, - ): - self.prefix = prefix - self.rbuild = reader_builder - self.ebuild = extractor_builder - - def build(self) -> pipeline.Pipeline: - """Return a Pipeline instance.""" - ext2rdr = {} - - for eidx in self.ebuild: - # build extractor - try: - ext = self.ebuild.build(eidx) - - except errors.LoaderError as err: # failed to load extractor; skip - logger.error('failed to load extractor: %s', err) - continue - - except errors.BuilderError as err: # failed to build instance; skip - logger.error(str(err)) - continue - - try: - # get reader required by extractor - if ext.CONTENT_READER is not None: - rdr = self.rbuild.build(ext.CONTENT_READER) - else: - rdr = None - # store extractor - ext2rdr[ext] = rdr - - except errors.LoaderError as err: # failed to load reader - logger.error('failed to load reader: %s', err) - - except errors.BuilderError as err: # failed to build reader - logger.error(str(err)) - - return pipeline.Pipeline(self.prefix, ext2rdr) - - - -## EOF ## diff --git a/bsie/tools/pipeline.py b/bsie/tools/pipeline.py deleted file mode 100644 index 20e8ddf..0000000 --- a/bsie/tools/pipeline.py +++ /dev/null @@ -1,144 +0,0 @@ -""" - -Part of the bsie module. -A copy of the license is provided with the project. -Author: Matthias Baumgartner, 2022 -""" -# imports -from collections import defaultdict -import logging -import typing - -# bsie imports -from bsie import base -from bsie.utils import bsfs, node, ns - -# exports -__all__: typing.Sequence[str] = ( - 'Pipeline', - ) - -# constants -FILE_PREFIX = 'file#' - -## code ## - -logger = logging.getLogger(__name__) - -class Pipeline(): - """Extraction pipeline to generate triples from files. - - The Pipeline binds readers and extractors, and performs - the necessary operations to produce triples from a file. - It takes a best-effort approach to extract as many triples - as possible. Errors during the extraction are passed over - and reported to the log. - - """ - - # combined extractor schemas. - _schema: bsfs.schema.Schema - - # node prefix. - _prefix: bsfs.Namespace - - # extractor -> reader mapping - _ext2rdr: typing.Dict[base.extractor.Extractor, typing.Optional[base.reader.Reader]] - - def __init__( - self, - prefix: bsfs.Namespace, - ext2rdr: typing.Dict[base.extractor.Extractor, typing.Optional[base.reader.Reader]] - ): - # store core members - self._prefix = prefix + FILE_PREFIX - self._ext2rdr = ext2rdr - # compile schema from all extractors - self._schema = bsfs.schema.Schema.Union(ext.schema for ext in ext2rdr) - - def __str__(self) -> str: - return bsfs.typename(self) - - def __repr__(self) -> str: - return f'{bsfs.typename(self)}(...)' - - def __hash__(self) -> int: - return hash((type(self), self._prefix, self._schema, tuple(self._ext2rdr), tuple(self._ext2rdr.values()))) - - def __eq__(self, other: typing.Any) -> bool: - return isinstance(other, type(self)) \ - and self._schema == other._schema \ - and self._prefix == other._prefix \ - and self._ext2rdr == other._ext2rdr - - @property - def schema(self) -> bsfs.schema.Schema: - """Return the pipeline's schema (combined from all extractors).""" - return self._schema - - @property - def principals(self) -> typing.Iterator[bsfs.schema.Predicate]: - """Return the principal predicates that can be extracted.""" - return iter({pred for ext in self._ext2rdr for pred in ext.principals}) - - def subschema(self, principals: typing.Iterable[bsfs.schema.Predicate]) -> bsfs.schema.Schema: - """Return the subset of the schema that supports the given *principals*.""" - # materialize principals - principals = set(principals) - # collect and combine schemas from extractors - return bsfs.schema.Schema.Union({ - ext.schema - for ext - in self._ext2rdr - if not set(ext.principals).isdisjoint(principals) - }) - - def __call__( - self, - path: bsfs.URI, - principals: typing.Optional[typing.Iterable[bsfs.schema.Predicate]] = None, - ) -> typing.Iterator[typing.Tuple[node.Node, bsfs.schema.Predicate, typing.Any]]: - """Extract triples from the file at *path*. Optionally, limit triples to *principals*.""" - # get principals - principals = set(principals) if principals is not None else set(self.schema.predicates()) - - # get extractors - extractors = {ext for ext in self._ext2rdr if not set(ext.principals).isdisjoint(principals)} - - # corner-case short-cut - if len(extractors) == 0: - return - - # get readers -> extractors mapping - rdr2ext = defaultdict(set) - for ext in extractors: - rdr = self._ext2rdr[ext] - rdr2ext[rdr].add(ext) - - # create subject for file - uuid = bsfs.uuid.UCID.from_path(path) - subject = node.Node(ns.bsfs.File, self._prefix[uuid]) - - # extract information - for rdr, extrs in rdr2ext.items(): - try: - # get content - content = rdr(path) if rdr is not None else None - - # apply extractors on this content - for ext in extrs: - try: - # get predicate/value tuples - for subject, pred, value in ext.extract(subject, content, principals): - yield subject, pred, value - - except base.errors.ExtractorError as err: - # critical extractor failure. - logger.error('%s failed to extract triples from content: %s', ext, err) - - except base.errors.ReaderError as err: - # failed to read any content. skip. - logger.error('%s failed to read content: %s', rdr, err) - - -## EOF ## diff --git a/bsie/utils/__init__.py b/bsie/utils/__init__.py index 3981dc7..9cb60ed 100644 --- a/bsie/utils/__init__.py +++ b/bsie/utils/__init__.py @@ -4,21 +4,24 @@ Part of the bsie module. A copy of the license is provided with the project. Author: Matthias Baumgartner, 2022 """ -# imports +# standard imports import typing # inner-module imports from . import bsfs +from . import filematcher from . import namespaces as ns from . import node -from . import filematcher +from .loading import safe_load, unpack_qualified_name # exports __all__: typing.Sequence[str] = ( - 'filematcher', 'bsfs', + 'filematcher', 'node', 'ns', + 'safe_load', + 'unpack_qualified_name', ) ## EOF ## diff --git a/bsie/utils/errors.py b/bsie/utils/errors.py new file mode 100644 index 0000000..5fafd5b --- /dev/null +++ b/bsie/utils/errors.py @@ -0,0 +1,45 @@ +"""Common BSIE exceptions. + +Part of the bsie module. +A copy of the license is provided with the project. +Author: Matthias Baumgartner, 2022 +""" +# imports +import typing + +# exports +__all__: typing.Sequence[str] = ( + 'BuilderError', + 'ExtractorError', + 'LoaderError', + 'ReaderError', + ) + + +## code ## + +class _BSIEError(Exception): + """Generic BSIE error.""" + +class BuilderError(_BSIEError): + """The Builder failed to create an instance.""" + +class LoaderError(BuilderError): + """Failed to load a module or class.""" + +class ExtractorError(_BSIEError): + """The Extractor failed to process the given content.""" + +class ReaderError(_BSIEError): + """The Reader failed to read the given file.""" + +class ProgrammingError(_BSIEError): + """An assertion-like error that indicates a code-base issue.""" + +class UnreachableError(ProgrammingError): + """Bravo, you've reached a point in code that should logically not be reachable.""" + +class ParserError(_BSIEError): + """Failed to parse due to invalid syntax or structures.""" + +## EOF ## diff --git a/bsie/utils/filematcher/parser.py b/bsie/utils/filematcher/parser.py index 0654742..2f82875 100644 --- a/bsie/utils/filematcher/parser.py +++ b/bsie/utils/filematcher/parser.py @@ -7,16 +7,14 @@ Author: Matthias Baumgartner, 2021 # standard imports import typing -# non-standard imports +# external imports import pyparsing from pyparsing import printables, alphas8bit, punc8bit, QuotedString, Word, \ delimitedList, Or, CaselessKeyword, Group, oneOf, Optional -# bsie imports -from bsie.base import errors - # inner-module imports from . import matcher +from .. import errors # exports __all__: typing.Sequence[str] = ( diff --git a/bsie/utils/loading.py b/bsie/utils/loading.py new file mode 100644 index 0000000..eb05c35 --- /dev/null +++ b/bsie/utils/loading.py @@ -0,0 +1,54 @@ +""" + +Part of the bsie module. +A copy of the license is provided with the project. +Author: Matthias Baumgartner, 2022 +""" +# standard imports +import importlib +import typing + +# inner-module imports +from . import errors + +# exports +__all__: typing.Sequence[str] = ( + 'safe_load', + 'unpack_qualified_name', + ) + + +## code ## + +def safe_load(module_name: str, class_name: str): + """Get a class from a module. Raise BuilderError if anything goes wrong.""" + try: + # load the module + module = importlib.import_module(module_name) + except Exception as err: + # cannot import module + raise errors.LoaderError(f'cannot load module {module_name}') from err + + try: + # get the class from the module + cls = getattr(module, class_name) + except Exception as err: + # cannot find the class + raise errors.LoaderError(f'cannot load class {class_name} from module {module_name}') from err + + return cls + + +def unpack_qualified_name(name): + """Split a name into its module and class component (dot-separated).""" + if not isinstance(name, str): + raise TypeError(name) + if '.' not in name: + raise ValueError('name must be a qualified class name.') + module_name, class_name = name[:name.rfind('.')], name[name.rfind('.')+1:] + if module_name == '': + raise ValueError('name must be a qualified class name.') + return module_name, class_name + + +## EOF ## diff --git a/setup.py b/setup.py index 8e0efd4..6521593 100644 --- a/setup.py +++ b/setup.py @@ -14,7 +14,7 @@ setup( url='https://www.igsor.net/projects/blackstar/bsie/', download_url='https://pip.igsor.net', packages=('bsie', ), - install_requires=('rdflib', 'bsfs', 'python-magic'), + install_requires=('rdflib', 'bsfs', 'python-magic', 'pyparsing'), python_requires=">=3.7", ) diff --git a/test/base/__init__.py b/test/base/__init__.py deleted file mode 100644 index e69de29..0000000 diff --git a/test/base/test_extractor.py b/test/base/test_extractor.py deleted file mode 100644 index 30974ef..0000000 --- a/test/base/test_extractor.py +++ /dev/null @@ -1,70 +0,0 @@ -""" - -Part of the bsie test suite. -A copy of the license is provided with the project. -Author: Matthias Baumgartner, 2022 -""" -# imports -import unittest - -# bsie imports -from bsie.utils import bsfs, ns - -# objects to test -from bsie.base import extractor - - -## code ## - -class StubExtractor(extractor.Extractor): - def __init__(self): - super().__init__(bsfs.schema.Schema.from_string(extractor.SCHEMA_PREAMBLE + ''' - bse:author rdfs:subClassOf bsfs:Predicate ; - rdfs:domain bsfs:Entity ; - rdfs:range xsd:string ; - bsfs:unique "false"^^xsd:boolean . - bse:comment rdfs:subClassOf bsfs:Predicate ; - rdfs:domain bsfs:Entity ; - rdfs:range xsd:string ; - bsfs:unique "false"^^xsd:boolean . - ''')) - - def extract(self, subject, content, predicates): - raise NotImplementedError() - -class StubSub(StubExtractor): - pass - -class TestExtractor(unittest.TestCase): - def test_essentials(self): - ext = StubExtractor() - self.assertEqual(str(ext), 'StubExtractor') - self.assertEqual(repr(ext), 'StubExtractor()') - self.assertEqual(ext, StubExtractor()) - self.assertEqual(hash(ext), hash(StubExtractor())) - - sub = StubSub() - self.assertEqual(str(sub), 'StubSub') - self.assertEqual(repr(sub), 'StubSub()') - self.assertEqual(sub, StubSub()) - self.assertEqual(hash(sub), hash(StubSub())) - self.assertNotEqual(ext, sub) - self.assertNotEqual(hash(ext), hash(sub)) - - def test_principals(self): - schema = bsfs.schema.Schema.Empty() - entity = schema.node(ns.bsfs.Node).get_child(ns.bsfs.Entity) - string = schema.literal(ns.bsfs.Literal).get_child(bsfs.URI('http://www.w3.org/2001/XMLSchema#string')) - p_author = schema.predicate(ns.bsfs.Predicate).get_child(ns.bse.author, domain=entity, range=string) - p_comment = schema.predicate(ns.bsfs.Predicate).get_child(ns.bse.comment, domain=entity, range=string) - ext = StubExtractor() - self.assertSetEqual(set(ext.principals), - {p_author, p_comment} | set(schema.predicates()) - {schema.predicate(ns.bsfs.Predicate)}) - - -## main ## - -if __name__ == '__main__': - unittest.main() - -## EOF ## diff --git a/test/base/test_reader.py b/test/base/test_reader.py deleted file mode 100644 index a907eb9..0000000 --- a/test/base/test_reader.py +++ /dev/null @@ -1,45 +0,0 @@ -""" - -Part of the bsie test suite. -A copy of the license is provided with the project. -Author: Matthias Baumgartner, 2022 -""" -# imports -import unittest - -# objects to test -from bsie import base - - -## code ## - -class StubReader(base.Reader): - def __call__(self, path): - raise NotImplementedError() - -class StubSub(StubReader): - pass - -class TestReader(unittest.TestCase): - def test_essentials(self): - ext = StubReader() - self.assertEqual(str(ext), 'StubReader') - self.assertEqual(repr(ext), 'StubReader()') - self.assertEqual(ext, StubReader()) - self.assertEqual(hash(ext), hash(StubReader())) - - sub = StubSub() - self.assertEqual(str(sub), 'StubSub') - self.assertEqual(repr(sub), 'StubSub()') - self.assertEqual(sub, StubSub()) - self.assertEqual(hash(sub), hash(StubSub())) - self.assertNotEqual(ext, sub) - self.assertNotEqual(hash(ext), hash(sub)) - - -## main ## - -if __name__ == '__main__': - unittest.main() - -## EOF ## diff --git a/test/extractor/generic/test_path.py b/test/extractor/generic/test_path.py index 820f402..778ac5a 100644 --- a/test/extractor/generic/test_path.py +++ b/test/extractor/generic/test_path.py @@ -4,11 +4,11 @@ Part of the bsie test suite. A copy of the license is provided with the project. Author: Matthias Baumgartner, 2022 """ -# imports +# standard imports import unittest # bsie imports -from bsie.base import extractor +from bsie.extractor import base from bsie.utils import bsfs, node as _node, ns # objects to test @@ -29,7 +29,7 @@ class TestPath(unittest.TestCase): def test_schema(self): self.assertEqual(Path().schema, - bsfs.schema.Schema.from_string(extractor.SCHEMA_PREAMBLE + ''' + bsfs.schema.Schema.from_string(base.SCHEMA_PREAMBLE + ''' bse:filename rdfs:subClassOf bsfs:Predicate ; rdfs:domain bsfs:File ; rdfs:range xsd:string ; diff --git a/test/extractor/generic/test_stat.py b/test/extractor/generic/test_stat.py index 3441438..ff74085 100644 --- a/test/extractor/generic/test_stat.py +++ b/test/extractor/generic/test_stat.py @@ -4,12 +4,12 @@ Part of the bsie test suite. A copy of the license is provided with the project. Author: Matthias Baumgartner, 2022 """ -# imports +# standard imports import os import unittest # bsie imports -from bsie.base import extractor +from bsie.extractor import base from bsie.utils import bsfs, node as _node, ns # objects to test @@ -30,7 +30,7 @@ class TestStat(unittest.TestCase): def test_schema(self): self.assertEqual(Stat().schema, - bsfs.schema.Schema.from_string(extractor.SCHEMA_PREAMBLE + ''' + bsfs.schema.Schema.from_string(base.SCHEMA_PREAMBLE + ''' bse:filesize rdfs:subClassOf bsfs:Predicate ; rdfs:domain bsfs:File ; rdfs:range xsd:integer ; diff --git a/test/extractor/test_base.py b/test/extractor/test_base.py new file mode 100644 index 0000000..6a63c59 --- /dev/null +++ b/test/extractor/test_base.py @@ -0,0 +1,70 @@ +""" + +Part of the bsie test suite. +A copy of the license is provided with the project. +Author: Matthias Baumgartner, 2022 +""" +# standard imports +import unittest + +# bsie imports +from bsie.utils import bsfs, ns + +# objects to test +from bsie.extractor import base + + +## code ## + +class StubExtractor(base.Extractor): + def __init__(self): + super().__init__(bsfs.schema.Schema.from_string(base.SCHEMA_PREAMBLE + ''' + bse:author rdfs:subClassOf bsfs:Predicate ; + rdfs:domain bsfs:Entity ; + rdfs:range xsd:string ; + bsfs:unique "false"^^xsd:boolean . + bse:comment rdfs:subClassOf bsfs:Predicate ; + rdfs:domain bsfs:Entity ; + rdfs:range xsd:string ; + bsfs:unique "false"^^xsd:boolean . + ''')) + + def extract(self, subject, content, predicates): + raise NotImplementedError() + +class StubSub(StubExtractor): + pass + +class TestExtractor(unittest.TestCase): + def test_essentials(self): + ext = StubExtractor() + self.assertEqual(str(ext), 'StubExtractor') + self.assertEqual(repr(ext), 'StubExtractor()') + self.assertEqual(ext, StubExtractor()) + self.assertEqual(hash(ext), hash(StubExtractor())) + + sub = StubSub() + self.assertEqual(str(sub), 'StubSub') + self.assertEqual(repr(sub), 'StubSub()') + self.assertEqual(sub, StubSub()) + self.assertEqual(hash(sub), hash(StubSub())) + self.assertNotEqual(ext, sub) + self.assertNotEqual(hash(ext), hash(sub)) + + def test_principals(self): + schema = bsfs.schema.Schema.Empty() + entity = schema.node(ns.bsfs.Node).get_child(ns.bsfs.Entity) + string = schema.literal(ns.bsfs.Literal).get_child(bsfs.URI('http://www.w3.org/2001/XMLSchema#string')) + p_author = schema.predicate(ns.bsfs.Predicate).get_child(ns.bse.author, domain=entity, range=string) + p_comment = schema.predicate(ns.bsfs.Predicate).get_child(ns.bse.comment, domain=entity, range=string) + ext = StubExtractor() + self.assertSetEqual(set(ext.principals), + {p_author, p_comment} | set(schema.predicates()) - {schema.predicate(ns.bsfs.Predicate)}) + + +## main ## + +if __name__ == '__main__': + unittest.main() + +## EOF ## diff --git a/test/extractor/test_builder.py b/test/extractor/test_builder.py new file mode 100644 index 0000000..039ea53 --- /dev/null +++ b/test/extractor/test_builder.py @@ -0,0 +1,103 @@ +""" + +Part of the bsie test suite. +A copy of the license is provided with the project. +Author: Matthias Baumgartner, 2022 +""" +# standard imports +import unittest + +# bsie imports +from bsie.utils import errors + +# objects to test +from bsie.extractor import ExtractorBuilder + + +## code ## + +class TestExtractorBuilder(unittest.TestCase): + def test_iter(self): + # no specifications + self.assertListEqual(list(ExtractorBuilder([])), []) + # some specifications + builder = ExtractorBuilder([ + {'bsie.extractor.generic.path.Path': {}}, + {'bsie.extractor.generic.stat.Stat': {}}, + {'bsie.extractor.generic.path.Path': {}}, + ]) + self.assertListEqual(list(builder), [0, 1, 2]) + + def test_build(self): + # simple and repeated extractors + builder = ExtractorBuilder([ + {'bsie.extractor.generic.path.Path': {}}, + {'bsie.extractor.generic.stat.Stat': {}}, + {'bsie.extractor.generic.path.Path': {}}, + ]) + ext = [builder.build(0), builder.build(1), builder.build(2)] + import bsie.extractor.generic.path + import bsie.extractor.generic.stat + self.assertListEqual(ext, [ + bsie.extractor.generic.path.Path(), + bsie.extractor.generic.stat.Stat(), + bsie.extractor.generic.path.Path(), + ]) + # out-of-bounds raises KeyError + self.assertRaises(IndexError, builder.build, 3) + + # building with args + builder = ExtractorBuilder([ + {'bsie.extractor.generic.constant.Constant': { + 'schema': ''' + bse:author rdfs:subClassOf bsfs:Predicate ; + rdfs:domain bsfs:Entity ; + rdfs:range xsd:string ; + bsfs:unique "true"^^xsd:boolean . + bse:rating rdfs:subClassOf bsfs:Predicate ; + rdfs:domain bsfs:Entity ; + rdfs:range xsd:integer ; + bsfs:unique "true"^^xsd:boolean . + ''', + 'tuples': [ + ('http://bsfs.ai/schema/Entity#author', 'Me, myself, and I'), + ('http://bsfs.ai/schema/Entity#rating', 123), + ], + }}]) + obj = builder.build(0) + import bsie.extractor.generic.constant + self.assertEqual(obj, bsie.extractor.generic.constant.Constant(''' + bse:author rdfs:subClassOf bsfs:Predicate ; + rdfs:domain bsfs:Entity ; + rdfs:range xsd:string ; + bsfs:unique "true"^^xsd:boolean . + bse:rating rdfs:subClassOf bsfs:Predicate ; + rdfs:domain bsfs:Entity ; + rdfs:range xsd:integer ; + bsfs:unique "true"^^xsd:boolean . + ''', [ + ('http://bsfs.ai/schema/Entity#author', 'Me, myself, and I'), + ('http://bsfs.ai/schema/Entity#rating', 123), + ])) + + # building with invalid args + self.assertRaises(errors.BuilderError, ExtractorBuilder( + [{'bsie.extractor.generic.path.Path': {'foo': 123}}]).build, 0) + # non-dict build specification + self.assertRaises(TypeError, ExtractorBuilder( + [('bsie.extractor.generic.path.Path', {})]).build, 0) + # multiple keys per build specification + self.assertRaises(TypeError, ExtractorBuilder( + [{'bsie.extractor.generic.path.Path': {}, + 'bsie.extractor.generic.stat.Stat': {}}]).build, 0) + # non-dict value for kwargs + self.assertRaises(TypeError, ExtractorBuilder( + [{'bsie.extractor.generic.path.Path': 123}]).build, 0) + + +## main ## + +if __name__ == '__main__': + unittest.main() + +## EOF ## diff --git a/test/lib/test_bsie.py b/test/lib/test_bsie.py index 771a0c2..52f1d44 100644 --- a/test/lib/test_bsie.py +++ b/test/lib/test_bsie.py @@ -4,13 +4,15 @@ Part of the bsie test suite. A copy of the license is provided with the project. Author: Matthias Baumgartner, 2022 """ -# imports +# standard imports import os import unittest # bsie imports -from bsie.base import extractor -from bsie.tools import builder +from bsie.extractor import ExtractorBuilder +from bsie.extractor.base import SCHEMA_PREAMBLE +from bsie.lib import PipelineBuilder +from bsie.reader import ReaderBuilder from bsie.utils import bsfs, node, ns # objects to test @@ -22,9 +24,9 @@ from bsie.lib.bsie import BSIE class TestBSIE(unittest.TestCase): def setUp(self): # reader builder - rbuild = builder.ReaderBuilder({}) + rbuild = ReaderBuilder({}) # extractor builder - ebuild = builder.ExtractorBuilder([ + ebuild = ExtractorBuilder([ {'bsie.extractor.generic.path.Path': {}}, {'bsie.extractor.generic.stat.Stat': {}}, {'bsie.extractor.generic.constant.Constant': dict( @@ -39,7 +41,7 @@ class TestBSIE(unittest.TestCase): ]) # build pipeline self.prefix = bsfs.Namespace('http://example.com/local/') - pbuild = builder.PipelineBuilder(self.prefix, rbuild, ebuild) + pbuild = PipelineBuilder(self.prefix, rbuild, ebuild) self.pipeline = pbuild.build() def test_construction(self): @@ -50,7 +52,7 @@ class TestBSIE(unittest.TestCase): ns.bse.filesize, ns.bse.author, }) - self.assertEqual(lib.schema, bsfs.schema.Schema.from_string(extractor.SCHEMA_PREAMBLE + ''' + self.assertEqual(lib.schema, bsfs.schema.Schema.from_string(SCHEMA_PREAMBLE + ''' bse:filename rdfs:subClassOf bsfs:Predicate ; rdfs:domain bsfs:File ; rdfs:range xsd:string ; @@ -77,7 +79,7 @@ class TestBSIE(unittest.TestCase): ns.bse.filesize, ns.bse.author, }) - self.assertEqual(lib.schema, bsfs.schema.Schema.from_string(extractor.SCHEMA_PREAMBLE + ''' + self.assertEqual(lib.schema, bsfs.schema.Schema.from_string(SCHEMA_PREAMBLE + ''' bse:filesize rdfs:subClassOf bsfs:Predicate ; rdfs:domain bsfs:File ; rdfs:range xsd:integer; @@ -95,7 +97,7 @@ class TestBSIE(unittest.TestCase): ns.bse.filesize, ns.bse.author, }) - self.assertEqual(lib.schema, bsfs.schema.Schema.from_string(extractor.SCHEMA_PREAMBLE + ''' + self.assertEqual(lib.schema, bsfs.schema.Schema.from_string(SCHEMA_PREAMBLE + ''' bse:filename rdfs:subClassOf bsfs:Predicate ; rdfs:domain bsfs:File ; rdfs:range xsd:string ; @@ -122,7 +124,7 @@ class TestBSIE(unittest.TestCase): self.assertSetEqual(set(lib.principals), { ns.bse.author, }) - self.assertEqual(lib.schema, bsfs.schema.Schema.from_string(extractor.SCHEMA_PREAMBLE + ''' + self.assertEqual(lib.schema, bsfs.schema.Schema.from_string(SCHEMA_PREAMBLE + ''' bse:author rdfs:subClassOf bsfs:Predicate ; rdfs:domain bsfs:Entity ; rdfs:range xsd:string ; @@ -137,7 +139,7 @@ class TestBSIE(unittest.TestCase): self.assertSetEqual(set(lib.principals), { ns.bse.filesize, }) - self.assertEqual(lib.schema, bsfs.schema.Schema.from_string(extractor.SCHEMA_PREAMBLE + ''' + self.assertEqual(lib.schema, bsfs.schema.Schema.from_string(SCHEMA_PREAMBLE + ''' bse:filesize rdfs:subClassOf bsfs:Predicate ; rdfs:domain bsfs:File ; rdfs:range xsd:integer; diff --git a/test/lib/test_builder.py b/test/lib/test_builder.py new file mode 100644 index 0000000..273d620 --- /dev/null +++ b/test/lib/test_builder.py @@ -0,0 +1,107 @@ +""" + +Part of the bsie test suite. +A copy of the license is provided with the project. +Author: Matthias Baumgartner, 2022 +""" +# standard imports +import logging +import unittest + +# bsie imports +from bsie.extractor import ExtractorBuilder +from bsie.reader import ReaderBuilder +from bsie.utils import bsfs + +# objects to test +from bsie.lib import PipelineBuilder + + +## code ## + +class TestPipelineBuilder(unittest.TestCase): + def test_build(self): + prefix = bsfs.URI('http://example.com/local/file#') + c_schema = ''' + bse:author rdfs:subClassOf bsfs:Predicate ; + rdfs:domain bsfs:Entity ; + rdfs:range xsd:string ; + bsfs:unique "true"^^xsd:boolean . + ''' + c_tuples = [('http://bsfs.ai/schema/Entity#author', 'Me, myself, and I')] + # prepare builders + rbuild = ReaderBuilder({}) + ebuild = ExtractorBuilder([ + {'bsie.extractor.generic.path.Path': {}}, + {'bsie.extractor.generic.stat.Stat': {}}, + {'bsie.extractor.generic.constant.Constant': dict( + schema=c_schema, + tuples=c_tuples, + )}, + ]) + # build pipeline + builder = PipelineBuilder(prefix, rbuild, ebuild) + pipeline = builder.build() + # delayed import + import bsie.reader.path + import bsie.reader.stat + import bsie.extractor.generic.path + import bsie.extractor.generic.stat + import bsie.extractor.generic.constant + # check pipeline + self.assertDictEqual(pipeline._ext2rdr, { + bsie.extractor.generic.path.Path(): bsie.reader.path.Path(), + bsie.extractor.generic.stat.Stat(): bsie.reader.stat.Stat(), + bsie.extractor.generic.constant.Constant(c_schema, c_tuples): None, + }) + + # fail to load extractor + ebuild_err = ExtractorBuilder([ + {'bsie.extractor.generic.foo.Foo': {}}, + {'bsie.extractor.generic.path.Path': {}}, + ]) + with self.assertLogs(logging.getLogger('bsie.lib.builder'), logging.ERROR): + pipeline = PipelineBuilder(prefix, rbuild, ebuild_err).build() + self.assertDictEqual(pipeline._ext2rdr, { + bsie.extractor.generic.path.Path(): bsie.reader.path.Path()}) + + # fail to build extractor + ebuild_err = ExtractorBuilder([ + {'bsie.extractor.generic.path.Path': {'foo': 123}}, + {'bsie.extractor.generic.path.Path': {}}, + ]) + with self.assertLogs(logging.getLogger('bsie.lib.builder'), logging.ERROR): + pipeline = PipelineBuilder(prefix, rbuild, ebuild_err).build() + self.assertDictEqual(pipeline._ext2rdr, { + bsie.extractor.generic.path.Path(): bsie.reader.path.Path()}) + + # fail to load reader + with self.assertLogs(logging.getLogger('bsie.lib.builder'), logging.ERROR): + # switch reader of an extractor + old_reader = bsie.extractor.generic.path.Path.CONTENT_READER + bsie.extractor.generic.path.Path.CONTENT_READER = 'bsie.reader.foo.Foo' + # build pipeline with invalid reader reference + pipeline = PipelineBuilder(prefix, rbuild, ebuild).build() + self.assertDictEqual(pipeline._ext2rdr, { + bsie.extractor.generic.stat.Stat(): bsie.reader.stat.Stat(), + bsie.extractor.generic.constant.Constant(c_schema, c_tuples): None, + }) + # switch back + bsie.extractor.generic.path.Path.CONTENT_READER = old_reader + + # fail to build reader + rbuild_err = ReaderBuilder({'bsie.reader.stat.Stat': dict(foo=123)}) + with self.assertLogs(logging.getLogger('bsie.lib.builder'), logging.ERROR): + pipeline = PipelineBuilder(prefix, rbuild_err, ebuild).build() + self.assertDictEqual(pipeline._ext2rdr, { + bsie.extractor.generic.path.Path(): bsie.reader.path.Path(), + bsie.extractor.generic.constant.Constant(c_schema, c_tuples): None, + }) + + +## main ## + +if __name__ == '__main__': + unittest.main() + +## EOF ## diff --git a/test/lib/test_pipeline.py b/test/lib/test_pipeline.py new file mode 100644 index 0000000..c6f7aba --- /dev/null +++ b/test/lib/test_pipeline.py @@ -0,0 +1,175 @@ +""" + +Part of the bsie test suite. +A copy of the license is provided with the project. +Author: Matthias Baumgartner, 2022 +""" +# standard imports +import logging +import os +import unittest + +# bsie imports +from bsie.utils import bsfs, errors, node, ns +import bsie.extractor.generic.constant +import bsie.extractor.generic.path +import bsie.extractor.generic.stat +import bsie.reader.path +import bsie.reader.stat + +# objects to test +from bsie.lib.pipeline import Pipeline + + +## code ## + +class TestPipeline(unittest.TestCase): + def setUp(self): + # constant A + csA = ''' + bse:author rdfs:subClassOf bsfs:Predicate ; + rdfs:domain bsfs:File ; + rdfs:range xsd:string ; + bsfs:unique "true"^^xsd:boolean . + ''' + tupA = [('http://bsfs.ai/schema/Entity#author', 'Me, myself, and I')] + # constant B + csB = ''' + bse:rating rdfs:subClassOf bsfs:Predicate ; + rdfs:domain bsfs:File ; + rdfs:range xsd:integer ; + bsfs:unique "true"^^xsd:boolean . + ''' + tupB = [('http://bsfs.ai/schema/Entity#rating', 123)] + # extractors/readers + self.ext2rdr = { + bsie.extractor.generic.path.Path(): bsie.reader.path.Path(), + bsie.extractor.generic.stat.Stat(): bsie.reader.stat.Stat(), + bsie.extractor.generic.constant.Constant(csA, tupA): None, + bsie.extractor.generic.constant.Constant(csB, tupB): None, + } + self.prefix = bsfs.Namespace('http://example.com/local/') + + def test_essentials(self): + pipeline = Pipeline(self.prefix, self.ext2rdr) + self.assertEqual(str(pipeline), 'Pipeline') + self.assertEqual(repr(pipeline), 'Pipeline(...)') + + def test_equality(self): + pipeline = Pipeline(self.prefix, self.ext2rdr) + # a pipeline is equivalent to itself + self.assertEqual(pipeline, pipeline) + self.assertEqual(hash(pipeline), hash(pipeline)) + # identical builds are equivalent + self.assertEqual(pipeline, Pipeline(self.prefix, self.ext2rdr)) + self.assertEqual(hash(pipeline), hash(Pipeline(self.prefix, self.ext2rdr))) + + # equivalence respects prefix + self.assertNotEqual(pipeline, Pipeline(bsfs.URI('http://example.com/global/ent#'), self.ext2rdr)) + self.assertNotEqual(hash(pipeline), hash(Pipeline(bsfs.URI('http://example.com/global/ent#'), self.ext2rdr))) + # equivalence respects extractors/readers + ext2rdr = {ext: rdr for idx, (ext, rdr) in enumerate(self.ext2rdr.items()) if idx % 2 == 0} + self.assertNotEqual(pipeline, Pipeline(self.prefix, ext2rdr)) + self.assertNotEqual(hash(pipeline), hash(Pipeline(self.prefix, ext2rdr))) + + # equivalence respects schema + p2 = Pipeline(self.prefix, self.ext2rdr) + p2._schema = pipeline.schema.Empty() + self.assertNotEqual(pipeline, p2) + self.assertNotEqual(hash(pipeline), hash(p2)) + + # not equal to other types + class Foo(): pass + self.assertNotEqual(pipeline, Foo()) + self.assertNotEqual(hash(pipeline), hash(Foo())) + self.assertNotEqual(pipeline, 123) + self.assertNotEqual(hash(pipeline), hash(123)) + self.assertNotEqual(pipeline, None) + self.assertNotEqual(hash(pipeline), hash(None)) + + + def test_call(self): + # build pipeline + pipeline = Pipeline(self.prefix, self.ext2rdr) + # build objects for tests + content_hash = 'a948904f2f0f479b8f8197694b30184b0d2ed1c1cd2a1ec0fb85d299a192a447' + subject = node.Node(ns.bsfs.File, (self.prefix + 'file#')[content_hash]) + testfile = os.path.join(os.path.dirname(__file__), 'testfile.t') + p_filename = pipeline.schema.predicate(ns.bse.filename) + p_filesize = pipeline.schema.predicate(ns.bse.filesize) + p_author = pipeline.schema.predicate(ns.bse.author) + p_rating = pipeline.schema.predicate(ns.bse.rating) + entity = pipeline.schema.node(ns.bsfs.File) + p_invalid = pipeline.schema.predicate(ns.bsfs.Predicate).get_child(ns.bse.foo, range=entity) + + # extract given predicates + self.assertSetEqual(set(pipeline(testfile, {p_filename, p_filesize})), { + (subject, p_filename, 'testfile.t'), + (subject, p_filesize, 12), + }) + self.assertSetEqual(set(pipeline(testfile, {p_author})), { + (subject, p_author, 'Me, myself, and I'), + }) + self.assertSetEqual(set(pipeline(testfile, {p_filename})), { + (subject, p_filename, 'testfile.t'), + }) + self.assertSetEqual(set(pipeline(testfile, {p_filesize})), { + (subject, p_filesize, 12), + }) + # extract all predicates + self.assertSetEqual(set(pipeline(testfile)), { + (subject, p_filename, 'testfile.t'), + (subject, p_filesize, 12), + (subject, p_author, 'Me, myself, and I'), + (subject, p_rating, 123), + }) + # invalid predicate + self.assertSetEqual(set(pipeline(testfile, {p_invalid})), set()) + # valid/invalid predicates mixed + self.assertSetEqual(set(pipeline(testfile, {p_filename, p_invalid})), { + (subject, p_filename, 'testfile.t'), + }) + # invalid path + self.assertRaises(FileNotFoundError, list, pipeline('inexistent_file')) + # FIXME: unreadable file (e.g. permissions error) + + def test_call_reader_err(self): + class FaultyReader(bsie.reader.path.Path): + def __call__(self, path): + raise errors.ReaderError('reader error') + + pipeline = Pipeline(self.prefix, {bsie.extractor.generic.path.Path(): FaultyReader()}) + with self.assertLogs(logging.getLogger('bsie.lib.pipeline'), logging.ERROR): + testfile = os.path.join(os.path.dirname(__file__), 'testfile.t') + p_filename = pipeline.schema.predicate(ns.bse.filename) + self.assertSetEqual(set(pipeline(testfile, {p_filename})), set()) + + def test_call_extractor_err(self): + class FaultyExtractor(bsie.extractor.generic.path.Path): + def extract(self, subject, content, predicates): + raise errors.ExtractorError('extractor error') + + pipeline = Pipeline(self.prefix, {FaultyExtractor(): bsie.reader.path.Path()}) + with self.assertLogs(logging.getLogger('bsie.lib.pipeline'), logging.ERROR): + testfile = os.path.join(os.path.dirname(__file__), 'testfile.t') + p_filename = pipeline.schema.predicate(ns.bse.filename) + self.assertSetEqual(set(pipeline(testfile, {p_filename})), set()) + + def test_predicates(self): + # build pipeline + pipeline = Pipeline(self.prefix, self.ext2rdr) + # + self.assertSetEqual(set(pipeline.principals), { + pipeline.schema.predicate(ns.bse.filename), + pipeline.schema.predicate(ns.bse.filesize), + pipeline.schema.predicate(ns.bse.author), + pipeline.schema.predicate(ns.bse.rating), + }) + + +## main ## + +if __name__ == '__main__': + unittest.main() + +## EOF ## diff --git a/test/reader/test_base.py b/test/reader/test_base.py new file mode 100644 index 0000000..41f4c29 --- /dev/null +++ b/test/reader/test_base.py @@ -0,0 +1,45 @@ +""" + +Part of the bsie test suite. +A copy of the license is provided with the project. +Author: Matthias Baumgartner, 2022 +""" +# standard imports +import unittest + +# objects to test +from bsie.reader import Reader + + +## code ## + +class StubReader(Reader): + def __call__(self, path): + raise NotImplementedError() + +class StubSub(StubReader): + pass + +class TestReader(unittest.TestCase): + def test_essentials(self): + ext = StubReader() + self.assertEqual(str(ext), 'StubReader') + self.assertEqual(repr(ext), 'StubReader()') + self.assertEqual(ext, StubReader()) + self.assertEqual(hash(ext), hash(StubReader())) + + sub = StubSub() + self.assertEqual(str(sub), 'StubSub') + self.assertEqual(repr(sub), 'StubSub()') + self.assertEqual(sub, StubSub()) + self.assertEqual(hash(sub), hash(StubSub())) + self.assertNotEqual(ext, sub) + self.assertNotEqual(hash(ext), hash(sub)) + + +## main ## + +if __name__ == '__main__': + unittest.main() + +## EOF ## diff --git a/test/reader/test_builder.py b/test/reader/test_builder.py new file mode 100644 index 0000000..92e9edc --- /dev/null +++ b/test/reader/test_builder.py @@ -0,0 +1,54 @@ +""" + +Part of the bsie test suite. +A copy of the license is provided with the project. +Author: Matthias Baumgartner, 2022 +""" +# standard imports +import unittest + +# bsie imports +from bsie.utils import errors + +# objects to test +from bsie.reader import ReaderBuilder + + +## code ## + +class TestReaderBuilder(unittest.TestCase): + def test_build(self): + builder = ReaderBuilder({'bsie.reader.path.Path': {}}) + # build configured reader + cls = builder.build('bsie.reader.path.Path') + import bsie.reader.path + self.assertIsInstance(cls, bsie.reader.path.Path) + # build unconfigured reader + cls = builder.build('bsie.reader.stat.Stat') + import bsie.reader.stat + self.assertIsInstance(cls, bsie.reader.stat.Stat) + # re-build previous reader (test cache) + self.assertEqual(cls, builder.build('bsie.reader.stat.Stat')) + # test invalid + self.assertRaises(TypeError, builder.build, 123) + self.assertRaises(TypeError, builder.build, None) + self.assertRaises(ValueError, builder.build, '') + self.assertRaises(ValueError, builder.build, 'Path') + self.assertRaises(errors.BuilderError, builder.build, 'path.Path') + # invalid config + builder = ReaderBuilder({'bsie.reader.stat.Stat': dict(foo=123)}) + self.assertRaises(errors.BuilderError, builder.build, 'bsie.reader.stat.Stat') + builder = ReaderBuilder({'bsie.reader.stat.Stat': 123}) + self.assertRaises(TypeError, builder.build, 'bsie.reader.stat.Stat') + # no instructions + builder = ReaderBuilder({}) + cls = builder.build('bsie.reader.stat.Stat') + self.assertIsInstance(cls, bsie.reader.stat.Stat) + + +## main ## + +if __name__ == '__main__': + unittest.main() + +## EOF ## diff --git a/test/reader/test_stat.py b/test/reader/test_stat.py index d12ad9c..fd9fdcd 100644 --- a/test/reader/test_stat.py +++ b/test/reader/test_stat.py @@ -4,12 +4,12 @@ Part of the bsie test suite. A copy of the license is provided with the project. Author: Matthias Baumgartner, 2022 """ -# imports +# standard imports import os import unittest # bsie imports -from bsie.base import errors +from bsie.utils import errors # objects to test from bsie.reader.stat import Stat diff --git a/test/tools/__init__.py b/test/tools/__init__.py deleted file mode 100644 index e69de29..0000000 diff --git a/test/tools/test_builder.py b/test/tools/test_builder.py deleted file mode 100644 index 62c637c..0000000 --- a/test/tools/test_builder.py +++ /dev/null @@ -1,246 +0,0 @@ -""" - -Part of the bsie test suite. -A copy of the license is provided with the project. -Author: Matthias Baumgartner, 2022 -""" -# imports -import logging -import unittest - -# bsie imports -from bsie import base -from bsie.utils import bsfs - -# objects to test -from bsie.tools.builder import ExtractorBuilder -from bsie.tools.builder import PipelineBuilder -from bsie.tools.builder import ReaderBuilder -from bsie.tools.builder import _safe_load -from bsie.tools.builder import _unpack_name - - -## code ## - -class TestUtils(unittest.TestCase): - def test_safe_load(self): - # invalid module - self.assertRaises(base.errors.LoaderError, _safe_load, 'dBGHMSAYOoKeKMpywDoKZQycENFPvN', 'foobar') - self.assertRaises(base.errors.LoaderError, _safe_load, 'dBGHMSAYOoKeKMpywDoKZQycENFPvN.bar', 'foobar') - # partially valid module - self.assertRaises(base.errors.LoaderError, _safe_load, 'os.foo', 'foobar') - # invalid class - self.assertRaises(base.errors.LoaderError, _safe_load, 'os.path', 'foo') - # valid module and class - cls = _safe_load('collections.abc', 'Container') - import collections.abc - self.assertEqual(cls, collections.abc.Container) - - def test_unpack_name(self): - self.assertRaises(TypeError, _unpack_name, 123) - self.assertRaises(TypeError, _unpack_name, None) - self.assertRaises(ValueError, _unpack_name, '') - self.assertRaises(ValueError, _unpack_name, 'path') - self.assertRaises(ValueError, _unpack_name, '.Path') - self.assertEqual(_unpack_name('path.Path'), ('path', 'Path')) - self.assertEqual(_unpack_name('path.foo.bar.Path'), ('path.foo.bar', 'Path')) - - -class TestReaderBuilder(unittest.TestCase): - def test_build(self): - builder = ReaderBuilder({'bsie.reader.path.Path': {}}) - # build configured reader - cls = builder.build('bsie.reader.path.Path') - import bsie.reader.path - self.assertIsInstance(cls, bsie.reader.path.Path) - # build unconfigured reader - cls = builder.build('bsie.reader.stat.Stat') - import bsie.reader.stat - self.assertIsInstance(cls, bsie.reader.stat.Stat) - # re-build previous reader (test cache) - self.assertEqual(cls, builder.build('bsie.reader.stat.Stat')) - # test invalid - self.assertRaises(TypeError, builder.build, 123) - self.assertRaises(TypeError, builder.build, None) - self.assertRaises(ValueError, builder.build, '') - self.assertRaises(ValueError, builder.build, 'Path') - self.assertRaises(base.errors.BuilderError, builder.build, 'path.Path') - # invalid config - builder = ReaderBuilder({'bsie.reader.stat.Stat': dict(foo=123)}) - self.assertRaises(base.errors.BuilderError, builder.build, 'bsie.reader.stat.Stat') - builder = ReaderBuilder({'bsie.reader.stat.Stat': 123}) - self.assertRaises(TypeError, builder.build, 'bsie.reader.stat.Stat') - # no instructions - builder = ReaderBuilder({}) - cls = builder.build('bsie.reader.stat.Stat') - self.assertIsInstance(cls, bsie.reader.stat.Stat) - - - -class TestExtractorBuilder(unittest.TestCase): - def test_iter(self): - # no specifications - self.assertListEqual(list(ExtractorBuilder([])), []) - # some specifications - builder = ExtractorBuilder([ - {'bsie.extractor.generic.path.Path': {}}, - {'bsie.extractor.generic.stat.Stat': {}}, - {'bsie.extractor.generic.path.Path': {}}, - ]) - self.assertListEqual(list(builder), [0, 1, 2]) - - def test_build(self): - # simple and repeated extractors - builder = ExtractorBuilder([ - {'bsie.extractor.generic.path.Path': {}}, - {'bsie.extractor.generic.stat.Stat': {}}, - {'bsie.extractor.generic.path.Path': {}}, - ]) - ext = [builder.build(0), builder.build(1), builder.build(2)] - import bsie.extractor.generic.path - import bsie.extractor.generic.stat - self.assertListEqual(ext, [ - bsie.extractor.generic.path.Path(), - bsie.extractor.generic.stat.Stat(), - bsie.extractor.generic.path.Path(), - ]) - # out-of-bounds raises KeyError - self.assertRaises(IndexError, builder.build, 3) - - # building with args - builder = ExtractorBuilder([ - {'bsie.extractor.generic.constant.Constant': { - 'schema': ''' - bse:author rdfs:subClassOf bsfs:Predicate ; - rdfs:domain bsfs:Entity ; - rdfs:range xsd:string ; - bsfs:unique "true"^^xsd:boolean . - bse:rating rdfs:subClassOf bsfs:Predicate ; - rdfs:domain bsfs:Entity ; - rdfs:range xsd:integer ; - bsfs:unique "true"^^xsd:boolean . - ''', - 'tuples': [ - ('http://bsfs.ai/schema/Entity#author', 'Me, myself, and I'), - ('http://bsfs.ai/schema/Entity#rating', 123), - ], - }}]) - obj = builder.build(0) - import bsie.extractor.generic.constant - self.assertEqual(obj, bsie.extractor.generic.constant.Constant(''' - bse:author rdfs:subClassOf bsfs:Predicate ; - rdfs:domain bsfs:Entity ; - rdfs:range xsd:string ; - bsfs:unique "true"^^xsd:boolean . - bse:rating rdfs:subClassOf bsfs:Predicate ; - rdfs:domain bsfs:Entity ; - rdfs:range xsd:integer ; - bsfs:unique "true"^^xsd:boolean . - ''', [ - ('http://bsfs.ai/schema/Entity#author', 'Me, myself, and I'), - ('http://bsfs.ai/schema/Entity#rating', 123), - ])) - - # building with invalid args - self.assertRaises(base.errors.BuilderError, ExtractorBuilder( - [{'bsie.extractor.generic.path.Path': {'foo': 123}}]).build, 0) - # non-dict build specification - self.assertRaises(TypeError, ExtractorBuilder( - [('bsie.extractor.generic.path.Path', {})]).build, 0) - # multiple keys per build specification - self.assertRaises(TypeError, ExtractorBuilder( - [{'bsie.extractor.generic.path.Path': {}, - 'bsie.extractor.generic.stat.Stat': {}}]).build, 0) - # non-dict value for kwargs - self.assertRaises(TypeError, ExtractorBuilder( - [{'bsie.extractor.generic.path.Path': 123}]).build, 0) - - - - -class TestPipelineBuilder(unittest.TestCase): - def test_build(self): - prefix = bsfs.URI('http://example.com/local/file#') - c_schema = ''' - bse:author rdfs:subClassOf bsfs:Predicate ; - rdfs:domain bsfs:Entity ; - rdfs:range xsd:string ; - bsfs:unique "true"^^xsd:boolean . - ''' - c_tuples = [('http://bsfs.ai/schema/Entity#author', 'Me, myself, and I')] - # prepare builders - rbuild = ReaderBuilder({}) - ebuild = ExtractorBuilder([ - {'bsie.extractor.generic.path.Path': {}}, - {'bsie.extractor.generic.stat.Stat': {}}, - {'bsie.extractor.generic.constant.Constant': dict( - schema=c_schema, - tuples=c_tuples, - )}, - ]) - # build pipeline - builder = PipelineBuilder(prefix, rbuild, ebuild) - pipeline = builder.build() - # delayed import - import bsie.reader.path - import bsie.reader.stat - import bsie.extractor.generic.path - import bsie.extractor.generic.stat - import bsie.extractor.generic.constant - # check pipeline - self.assertDictEqual(pipeline._ext2rdr, { - bsie.extractor.generic.path.Path(): bsie.reader.path.Path(), - bsie.extractor.generic.stat.Stat(): bsie.reader.stat.Stat(), - bsie.extractor.generic.constant.Constant(c_schema, c_tuples): None, - }) - - # fail to load extractor - ebuild_err = ExtractorBuilder([ - {'bsie.extractor.generic.foo.Foo': {}}, - {'bsie.extractor.generic.path.Path': {}}, - ]) - with self.assertLogs(logging.getLogger('bsie.tools.builder'), logging.ERROR): - pipeline = PipelineBuilder(prefix, rbuild, ebuild_err).build() - self.assertDictEqual(pipeline._ext2rdr, { - bsie.extractor.generic.path.Path(): bsie.reader.path.Path()}) - - # fail to build extractor - ebuild_err = ExtractorBuilder([ - {'bsie.extractor.generic.path.Path': {'foo': 123}}, - {'bsie.extractor.generic.path.Path': {}}, - ]) - with self.assertLogs(logging.getLogger('bsie.tools.builder'), logging.ERROR): - pipeline = PipelineBuilder(prefix, rbuild, ebuild_err).build() - self.assertDictEqual(pipeline._ext2rdr, { - bsie.extractor.generic.path.Path(): bsie.reader.path.Path()}) - - # fail to load reader - with self.assertLogs(logging.getLogger('bsie.tools.builder'), logging.ERROR): - # switch reader of an extractor - old_reader = bsie.extractor.generic.path.Path.CONTENT_READER - bsie.extractor.generic.path.Path.CONTENT_READER = 'bsie.reader.foo.Foo' - # build pipeline with invalid reader reference - pipeline = PipelineBuilder(prefix, rbuild, ebuild).build() - self.assertDictEqual(pipeline._ext2rdr, { - bsie.extractor.generic.stat.Stat(): bsie.reader.stat.Stat(), - bsie.extractor.generic.constant.Constant(c_schema, c_tuples): None, - }) - # switch back - bsie.extractor.generic.path.Path.CONTENT_READER = old_reader - - # fail to build reader - rbuild_err = ReaderBuilder({'bsie.reader.stat.Stat': dict(foo=123)}) - with self.assertLogs(logging.getLogger('bsie.tools.builder'), logging.ERROR): - pipeline = PipelineBuilder(prefix, rbuild_err, ebuild).build() - self.assertDictEqual(pipeline._ext2rdr, { - bsie.extractor.generic.path.Path(): bsie.reader.path.Path(), - bsie.extractor.generic.constant.Constant(c_schema, c_tuples): None, - }) - - -## main ## - -if __name__ == '__main__': - unittest.main() - -## EOF ## diff --git a/test/tools/test_pipeline.py b/test/tools/test_pipeline.py deleted file mode 100644 index a116a30..0000000 --- a/test/tools/test_pipeline.py +++ /dev/null @@ -1,176 +0,0 @@ -""" - -Part of the bsie test suite. -A copy of the license is provided with the project. -Author: Matthias Baumgartner, 2022 -""" -# imports -import logging -import os -import unittest - -# bsie imports -from bsie.base import errors -from bsie.utils import bsfs, node, ns -import bsie.extractor.generic.constant -import bsie.extractor.generic.path -import bsie.extractor.generic.stat -import bsie.reader.path -import bsie.reader.stat - -# objects to test -from bsie.tools.pipeline import Pipeline - - -## code ## - -class TestPipeline(unittest.TestCase): - def setUp(self): - # constant A - csA = ''' - bse:author rdfs:subClassOf bsfs:Predicate ; - rdfs:domain bsfs:File ; - rdfs:range xsd:string ; - bsfs:unique "true"^^xsd:boolean . - ''' - tupA = [('http://bsfs.ai/schema/Entity#author', 'Me, myself, and I')] - # constant B - csB = ''' - bse:rating rdfs:subClassOf bsfs:Predicate ; - rdfs:domain bsfs:File ; - rdfs:range xsd:integer ; - bsfs:unique "true"^^xsd:boolean . - ''' - tupB = [('http://bsfs.ai/schema/Entity#rating', 123)] - # extractors/readers - self.ext2rdr = { - bsie.extractor.generic.path.Path(): bsie.reader.path.Path(), - bsie.extractor.generic.stat.Stat(): bsie.reader.stat.Stat(), - bsie.extractor.generic.constant.Constant(csA, tupA): None, - bsie.extractor.generic.constant.Constant(csB, tupB): None, - } - self.prefix = bsfs.Namespace('http://example.com/local/') - - def test_essentials(self): - pipeline = Pipeline(self.prefix, self.ext2rdr) - self.assertEqual(str(pipeline), 'Pipeline') - self.assertEqual(repr(pipeline), 'Pipeline(...)') - - def test_equality(self): - pipeline = Pipeline(self.prefix, self.ext2rdr) - # a pipeline is equivalent to itself - self.assertEqual(pipeline, pipeline) - self.assertEqual(hash(pipeline), hash(pipeline)) - # identical builds are equivalent - self.assertEqual(pipeline, Pipeline(self.prefix, self.ext2rdr)) - self.assertEqual(hash(pipeline), hash(Pipeline(self.prefix, self.ext2rdr))) - - # equivalence respects prefix - self.assertNotEqual(pipeline, Pipeline(bsfs.URI('http://example.com/global/ent#'), self.ext2rdr)) - self.assertNotEqual(hash(pipeline), hash(Pipeline(bsfs.URI('http://example.com/global/ent#'), self.ext2rdr))) - # equivalence respects extractors/readers - ext2rdr = {ext: rdr for idx, (ext, rdr) in enumerate(self.ext2rdr.items()) if idx % 2 == 0} - self.assertNotEqual(pipeline, Pipeline(self.prefix, ext2rdr)) - self.assertNotEqual(hash(pipeline), hash(Pipeline(self.prefix, ext2rdr))) - - # equivalence respects schema - p2 = Pipeline(self.prefix, self.ext2rdr) - p2._schema = pipeline.schema.Empty() - self.assertNotEqual(pipeline, p2) - self.assertNotEqual(hash(pipeline), hash(p2)) - - # not equal to other types - class Foo(): pass - self.assertNotEqual(pipeline, Foo()) - self.assertNotEqual(hash(pipeline), hash(Foo())) - self.assertNotEqual(pipeline, 123) - self.assertNotEqual(hash(pipeline), hash(123)) - self.assertNotEqual(pipeline, None) - self.assertNotEqual(hash(pipeline), hash(None)) - - - def test_call(self): - # build pipeline - pipeline = Pipeline(self.prefix, self.ext2rdr) - # build objects for tests - content_hash = 'a948904f2f0f479b8f8197694b30184b0d2ed1c1cd2a1ec0fb85d299a192a447' - subject = node.Node(ns.bsfs.File, (self.prefix + 'file#')[content_hash]) - testfile = os.path.join(os.path.dirname(__file__), 'testfile.t') - p_filename = pipeline.schema.predicate(ns.bse.filename) - p_filesize = pipeline.schema.predicate(ns.bse.filesize) - p_author = pipeline.schema.predicate(ns.bse.author) - p_rating = pipeline.schema.predicate(ns.bse.rating) - entity = pipeline.schema.node(ns.bsfs.File) - p_invalid = pipeline.schema.predicate(ns.bsfs.Predicate).get_child(ns.bse.foo, range=entity) - - # extract given predicates - self.assertSetEqual(set(pipeline(testfile, {p_filename, p_filesize})), { - (subject, p_filename, 'testfile.t'), - (subject, p_filesize, 12), - }) - self.assertSetEqual(set(pipeline(testfile, {p_author})), { - (subject, p_author, 'Me, myself, and I'), - }) - self.assertSetEqual(set(pipeline(testfile, {p_filename})), { - (subject, p_filename, 'testfile.t'), - }) - self.assertSetEqual(set(pipeline(testfile, {p_filesize})), { - (subject, p_filesize, 12), - }) - # extract all predicates - self.assertSetEqual(set(pipeline(testfile)), { - (subject, p_filename, 'testfile.t'), - (subject, p_filesize, 12), - (subject, p_author, 'Me, myself, and I'), - (subject, p_rating, 123), - }) - # invalid predicate - self.assertSetEqual(set(pipeline(testfile, {p_invalid})), set()) - # valid/invalid predicates mixed - self.assertSetEqual(set(pipeline(testfile, {p_filename, p_invalid})), { - (subject, p_filename, 'testfile.t'), - }) - # invalid path - self.assertRaises(FileNotFoundError, list, pipeline('inexistent_file')) - # FIXME: unreadable file (e.g. permissions error) - - def test_call_reader_err(self): - class FaultyReader(bsie.reader.path.Path): - def __call__(self, path): - raise errors.ReaderError('reader error') - - pipeline = Pipeline(self.prefix, {bsie.extractor.generic.path.Path(): FaultyReader()}) - with self.assertLogs(logging.getLogger('bsie.tools.pipeline'), logging.ERROR): - testfile = os.path.join(os.path.dirname(__file__), 'testfile.t') - p_filename = pipeline.schema.predicate(ns.bse.filename) - self.assertSetEqual(set(pipeline(testfile, {p_filename})), set()) - - def test_call_extractor_err(self): - class FaultyExtractor(bsie.extractor.generic.path.Path): - def extract(self, subject, content, predicates): - raise errors.ExtractorError('extractor error') - - pipeline = Pipeline(self.prefix, {FaultyExtractor(): bsie.reader.path.Path()}) - with self.assertLogs(logging.getLogger('bsie.tools.pipeline'), logging.ERROR): - testfile = os.path.join(os.path.dirname(__file__), 'testfile.t') - p_filename = pipeline.schema.predicate(ns.bse.filename) - self.assertSetEqual(set(pipeline(testfile, {p_filename})), set()) - - def test_predicates(self): - # build pipeline - pipeline = Pipeline(self.prefix, self.ext2rdr) - # - self.assertSetEqual(set(pipeline.principals), { - pipeline.schema.predicate(ns.bse.filename), - pipeline.schema.predicate(ns.bse.filesize), - pipeline.schema.predicate(ns.bse.author), - pipeline.schema.predicate(ns.bse.rating), - }) - - -## main ## - -if __name__ == '__main__': - unittest.main() - -## EOF ## diff --git a/test/tools/testfile.t b/test/tools/testfile.t deleted file mode 100644 index 3b18e51..0000000 --- a/test/tools/testfile.t +++ /dev/null @@ -1 +0,0 @@ -hello world diff --git a/test/utils/filematcher/test_parser.py b/test/utils/filematcher/test_parser.py index a81d2ed..c594747 100644 --- a/test/utils/filematcher/test_parser.py +++ b/test/utils/filematcher/test_parser.py @@ -4,11 +4,11 @@ Part of the bsie test suite. A copy of the license is provided with the project. Author: Matthias Baumgartner, 2022 """ -# imports +# standard imports import unittest -# inner-module imports -from bsie.base import errors +# bsie imports +from bsie.utils import errors from bsie.utils.filematcher import matcher # objects to test diff --git a/test/utils/test_loading.py b/test/utils/test_loading.py new file mode 100644 index 0000000..58ff166 --- /dev/null +++ b/test/utils/test_loading.py @@ -0,0 +1,48 @@ +""" + +Part of the bsie test suite. +A copy of the license is provided with the project. +Author: Matthias Baumgartner, 2022 +""" +# standard imports +import unittest + +# bsie imports +from bsie.utils import errors + +# objects to test +from bsie.utils.loading import safe_load, unpack_qualified_name + + +## code ## + +class TestUtils(unittest.TestCase): + def test_safe_load(self): + # invalid module + self.assertRaises(errors.LoaderError, safe_load, 'dBGHMSAYOoKeKMpywDoKZQycENFPvN', 'foobar') + self.assertRaises(errors.LoaderError, safe_load, 'dBGHMSAYOoKeKMpywDoKZQycENFPvN.bar', 'foobar') + # partially valid module + self.assertRaises(errors.LoaderError, safe_load, 'os.foo', 'foobar') + # invalid class + self.assertRaises(errors.LoaderError, safe_load, 'os.path', 'foo') + # valid module and class + cls = safe_load('collections.abc', 'Container') + import collections.abc + self.assertEqual(cls, collections.abc.Container) + + def test_unpack_qualified_name(self): + self.assertRaises(TypeError, unpack_qualified_name, 123) + self.assertRaises(TypeError, unpack_qualified_name, None) + self.assertRaises(ValueError, unpack_qualified_name, '') + self.assertRaises(ValueError, unpack_qualified_name, 'path') + self.assertRaises(ValueError, unpack_qualified_name, '.Path') + self.assertEqual(unpack_qualified_name('path.Path'), ('path', 'Path')) + self.assertEqual(unpack_qualified_name('path.foo.bar.Path'), ('path.foo.bar', 'Path')) + + +## main ## + +if __name__ == '__main__': + unittest.main() + +## EOF ## -- cgit v1.2.3 From 07219685d01f803dc46c8d5465fa542c1d822cb4 Mon Sep 17 00:00:00 2001 From: Matthias Baumgartner Date: Sat, 24 Dec 2022 10:39:51 +0100 Subject: documentation: standard vs external import --- bsie.app | 4 +- bsie/__init__.py | 2 +- bsie/apps/__init__.py | 2 +- bsie/extractor/base.py | 2 +- bsie/extractor/generic/__init__.py | 2 +- bsie/reader/base.py | 2 +- bsie/utils/bsfs.py | 2 +- bsie/utils/errors.py | 2 +- bsie/utils/filematcher/__init__.py | 2 +- bsie/utils/filematcher/matcher.py | 4 +- bsie/utils/namespaces.py | 2 +- bsie/utils/node.py | 2 +- test/apps/test_index.py | 6 +- test/apps/test_info.py | 2 +- test/extractor/generic/test_constant.py | 2 +- test/reader/test_path.py | 2 +- test/utils/filematcher/test_ast.py | 232 -------------------------------- test/utils/filematcher/test_matcher.py | 232 ++++++++++++++++++++++++++++++++ test/utils/test_node.py | 2 +- 19 files changed, 255 insertions(+), 251 deletions(-) delete mode 100644 test/utils/filematcher/test_ast.py create mode 100644 test/utils/filematcher/test_matcher.py diff --git a/bsie.app b/bsie.app index ba9cee7..d5808e7 100755 --- a/bsie.app +++ b/bsie.app @@ -4,11 +4,11 @@ Part of the bsie module. A copy of the license is provided with the project. Author: Matthias Baumgartner, 2022 """ -# imports +# standard imports import argparse import typing -# module imports +# bsie imports import bsie import bsie.apps diff --git a/bsie/__init__.py b/bsie/__init__.py index 8d2308c..c253f39 100644 --- a/bsie/__init__.py +++ b/bsie/__init__.py @@ -4,7 +4,7 @@ Part of the bsie module. A copy of the license is provided with the project. Author: Matthias Baumgartner, 2022 """ -# imports +# standard imports import collections import typing diff --git a/bsie/apps/__init__.py b/bsie/apps/__init__.py index a548c3c..1c3d0f9 100644 --- a/bsie/apps/__init__.py +++ b/bsie/apps/__init__.py @@ -4,7 +4,7 @@ Part of the bsie module. A copy of the license is provided with the project. Author: Matthias Baumgartner, 2022 """ -# imports +# standard imports import typing # inner-module imports diff --git a/bsie/extractor/base.py b/bsie/extractor/base.py index c44021b..95689a5 100644 --- a/bsie/extractor/base.py +++ b/bsie/extractor/base.py @@ -4,7 +4,7 @@ Part of the bsie module. A copy of the license is provided with the project. Author: Matthias Baumgartner, 2022 """ -# imports +# standard imports import abc import typing diff --git a/bsie/extractor/generic/__init__.py b/bsie/extractor/generic/__init__.py index 0cb7e7f..4783949 100644 --- a/bsie/extractor/generic/__init__.py +++ b/bsie/extractor/generic/__init__.py @@ -7,7 +7,7 @@ Part of the bsie module. A copy of the license is provided with the project. Author: Matthias Baumgartner, 2022 """ -# imports +# standard imports import typing # exports diff --git a/bsie/reader/base.py b/bsie/reader/base.py index cbabd36..08d6cc6 100644 --- a/bsie/reader/base.py +++ b/bsie/reader/base.py @@ -8,7 +8,7 @@ Part of the bsie module. A copy of the license is provided with the project. Author: Matthias Baumgartner, 2022 """ -# imports +# standard imports import abc import typing diff --git a/bsie/utils/bsfs.py b/bsie/utils/bsfs.py index 0b88479..ef5db31 100644 --- a/bsie/utils/bsfs.py +++ b/bsie/utils/bsfs.py @@ -4,7 +4,7 @@ Part of the bsie module. A copy of the license is provided with the project. Author: Matthias Baumgartner, 2022 """ -# imports +# standard imports import typing # bsfs imports diff --git a/bsie/utils/errors.py b/bsie/utils/errors.py index 5fafd5b..fbc16f7 100644 --- a/bsie/utils/errors.py +++ b/bsie/utils/errors.py @@ -4,7 +4,7 @@ Part of the bsie module. A copy of the license is provided with the project. Author: Matthias Baumgartner, 2022 """ -# imports +# standard imports import typing # exports diff --git a/bsie/utils/filematcher/__init__.py b/bsie/utils/filematcher/__init__.py index b1c1b45..1e23e4e 100644 --- a/bsie/utils/filematcher/__init__.py +++ b/bsie/utils/filematcher/__init__.py @@ -4,7 +4,7 @@ Part of the bsie module. A copy of the license is provided with the project. Author: Matthias Baumgartner, 2022 """ -# imports +# standard imports import typing # inner-module imports diff --git a/bsie/utils/filematcher/matcher.py b/bsie/utils/filematcher/matcher.py index 164beeb..a279a4b 100644 --- a/bsie/utils/filematcher/matcher.py +++ b/bsie/utils/filematcher/matcher.py @@ -4,11 +4,13 @@ Part of the bsie module. A copy of the license is provided with the project. Author: Matthias Baumgartner, 2021 """ -# imports +# standard imports from collections.abc import Callable, Collection, Hashable import abc import os import typing + +# external imports import magic # exports diff --git a/bsie/utils/namespaces.py b/bsie/utils/namespaces.py index a29fc1b..2d0b535 100644 --- a/bsie/utils/namespaces.py +++ b/bsie/utils/namespaces.py @@ -4,7 +4,7 @@ Part of the bsie module. A copy of the license is provided with the project. Author: Matthias Baumgartner, 2022 """ -# imports +# standard imports import typing # inner-module imports diff --git a/bsie/utils/node.py b/bsie/utils/node.py index ecf39cd..91e4f37 100644 --- a/bsie/utils/node.py +++ b/bsie/utils/node.py @@ -4,7 +4,7 @@ Part of the bsie module. A copy of the license is provided with the project. Author: Matthias Baumgartner, 2022 """ -# imports +# standard imports import typing # bsie imports diff --git a/test/apps/test_index.py b/test/apps/test_index.py index 9cdc656..6fc3335 100644 --- a/test/apps/test_index.py +++ b/test/apps/test_index.py @@ -4,13 +4,15 @@ Part of the bsie test suite. A copy of the license is provided with the project. Author: Matthias Baumgartner, 2022 """ -# imports +# standard imports import contextlib import io import os -import rdflib import unittest +# external imports +import rdflib + # bsie imports from bsie.utils import ns diff --git a/test/apps/test_info.py b/test/apps/test_info.py index 6f4d98f..f52c581 100644 --- a/test/apps/test_info.py +++ b/test/apps/test_info.py @@ -4,7 +4,7 @@ Part of the bsie test suite. A copy of the license is provided with the project. Author: Matthias Baumgartner, 2022 """ -# imports +# standard imports import argparse import contextlib import io diff --git a/test/extractor/generic/test_constant.py b/test/extractor/generic/test_constant.py index 9dbaced..a49345b 100644 --- a/test/extractor/generic/test_constant.py +++ b/test/extractor/generic/test_constant.py @@ -4,7 +4,7 @@ Part of the bsie test suite. A copy of the license is provided with the project. Author: Matthias Baumgartner, 2022 """ -# imports +# standard imports import unittest # bsie imports diff --git a/test/reader/test_path.py b/test/reader/test_path.py index fd7bc5a..95e447f 100644 --- a/test/reader/test_path.py +++ b/test/reader/test_path.py @@ -4,7 +4,7 @@ Part of the bsie test suite. A copy of the license is provided with the project. Author: Matthias Baumgartner, 2022 """ -# imports +# standard imports import unittest # objects to test diff --git a/test/utils/filematcher/test_ast.py b/test/utils/filematcher/test_ast.py deleted file mode 100644 index ff4b86d..0000000 --- a/test/utils/filematcher/test_ast.py +++ /dev/null @@ -1,232 +0,0 @@ -""" - -Part of the bsie test suite. -A copy of the license is provided with the project. -Author: Matthias Baumgartner, 2022 -""" -# imports -import os -import stat -import tempfile -import unittest - -# objects to test -from bsie.utils.filematcher import matcher - - -## code ## - -class FakeMatcher(matcher.Matcher): - def __call__(self, *args, **kwargs): - pass - -class FakeCriterion(matcher.Criterion): - def __call__(self, *args, **kwargs): - pass - -class FakeAggregate(matcher.Aggregate): - def __call__(self, *args, **kwargs): - pass - -class TestMatcher(unittest.TestCase): - def setUp(self): - # paths - self.image = os.path.join(os.path.dirname(__file__), 'testimage.jpg') - self.text= os.path.join(os.path.dirname(__file__), 'textfile.t') - self.empty = os.path.join(os.path.dirname(__file__), 'empty') - self.missing = os.path.join(os.path.dirname(__file__), 'missing.jpg') - - def test_matcher_skeleton(self): - # node: iteration and length - self.assertSetEqual(set(iter(FakeMatcher(1,2,3))), {1,2,3}) - self.assertSetEqual(set(iter(FakeMatcher([1,2,3]))), {1,2,3}) - self.assertEqual(len(FakeMatcher([1,2,3])), 3) - self.assertEqual(len(FakeMatcher(1,2,3)), 3) - self.assertEqual(len(FakeMatcher()), 0) - self.assertIn(1, FakeMatcher(1,2,3)) - self.assertIn(3, FakeMatcher([1,2,3])) - self.assertNotIn(0, FakeMatcher(1,2,3)) - self.assertNotIn(4, FakeMatcher([1,2,3])) - # node: comparison - self.assertEqual(FakeMatcher([1,2,3]), FakeMatcher([1,2,3])) - self.assertEqual(FakeMatcher(1,2,3), FakeMatcher(1,2,3)) - self.assertEqual(FakeMatcher(1,2,3), FakeMatcher([1,2,3])) - self.assertEqual(FakeMatcher(1,2,3), FakeMatcher((1,2,3))) - self.assertNotEqual(FakeMatcher(1,2,3), FakeMatcher(1,2,4)) - self.assertNotEqual(FakeMatcher(1,2,3), FakeMatcher(1,2,3,4)) - self.assertNotEqual(FakeMatcher(1,2,3), FakeMatcher(1,2)) - self.assertEqual(hash(FakeMatcher([1,2,3])), hash(FakeMatcher([1,2,3]))) - self.assertEqual(hash(FakeMatcher(1,2,3)), hash(FakeMatcher(1,2,3))) - self.assertEqual(hash(FakeMatcher(1,2,3)), hash(FakeMatcher([1,2,3]))) - self.assertEqual(hash(FakeMatcher(1,2,3)), hash(FakeMatcher((1,2,3)))) - # node: representation - self.assertEqual(repr(FakeMatcher(1,2,3)), 'FakeMatcher({1, 2, 3})') - - # criterion - self.assertEqual(repr(FakeCriterion(1,2,3)), 'FakeCriterion({1, 2, 3})') - self.assertEqual(hash(FakeCriterion(1,2,3)), hash(FakeCriterion(1,2,3))) - self.assertEqual(FakeCriterion(1,2,3), FakeCriterion([1,2,3])) - self.assertNotEqual(FakeCriterion(1,2,3), FakeCriterion(1,2)) - self.assertNotEqual(FakeCriterion(1,2,3), FakeMatcher(1,2,3)) - self.assertSetEqual(FakeCriterion(1,2,3).accepted(), {1,2,3}) - - # aggregate - self.assertEqual(repr(FakeAggregate(1,2,3)), 'FakeAggregate({1, 2, 3})') - self.assertNotEqual(FakeAggregate(1,2,3), FakeMatcher(1,2,3)) - - def test_any(self): - self.assertTrue(matcher.Any()(self.image)) - self.assertTrue(matcher.Any()(self.text)) - self.assertTrue(matcher.Any()(self.missing)) - self.assertTrue(matcher.Any()(self.empty)) - - def test_nothing(self): - self.assertFalse(matcher.Nothing()(self.image)) - self.assertFalse(matcher.Nothing()(self.text)) - self.assertFalse(matcher.Nothing()(self.missing)) - self.assertFalse(matcher.Nothing()(self.empty)) - - def test_exists(self): - self.assertTrue(matcher.Exists()(self.image)) - self.assertTrue(matcher.Exists()(self.text)) - self.assertTrue(matcher.Exists()(self.empty)) - self.assertFalse(matcher.Exists()(self.missing)) - - def test_isfile(self): - self.assertTrue(matcher.IsFile()(self.image)) - self.assertTrue(matcher.IsFile()(self.text)) - self.assertFalse(matcher.IsFile()(self.missing)) - self.assertFalse(matcher.IsFile()(os.path.dirname(self.image))) - - def test_isdir(self): - self.assertTrue(matcher.IsDir()(os.path.dirname(self.image))) - self.assertFalse(matcher.IsDir()(self.image)) - self.assertFalse(matcher.IsDir()(self.text)) - self.assertFalse(matcher.IsDir()(self.missing)) - - def test_islink(self): - self.assertFalse(matcher.IsLink()(os.path.dirname(self.image))) - self.assertFalse(matcher.IsLink()(self.image)) - self.assertFalse(matcher.IsLink()(self.text)) - _, temp = tempfile.mkstemp(prefix='bsie-test-') - templink = temp + '-link' - os.symlink(temp, templink) - self.assertTrue(matcher.IsLink()(templink)) - os.unlink(templink) - os.unlink(temp) - - def test_isabs(self): - self.assertTrue(matcher.IsAbs()(os.path.abspath(self.image))) - self.assertTrue(matcher.IsAbs()(os.path.abspath(self.text))) - self.assertFalse(matcher.IsAbs()(os.path.relpath(self.text, os.path.dirname(self.text)))) - - def test_isrel(self): - self.assertFalse(matcher.IsRel()(os.path.abspath(self.image))) - self.assertFalse(matcher.IsRel()(os.path.abspath(self.text))) - self.assertTrue(matcher.IsRel()(os.path.relpath(self.text, os.path.dirname(self.text)))) - self.assertTrue(matcher.IsRel()(os.path.basename(self.text))) - - def test_ismount(self): - self.assertFalse(matcher.IsMount()(self.image)) - self.assertFalse(matcher.IsMount()(self.text)) - self.assertFalse(matcher.IsMount()(self.missing)) - # there's no reasonable way to test a positive case - - def test_isempty(self): - self.assertTrue(matcher.IsEmpty()(self.empty)) - self.assertFalse(matcher.IsEmpty()(self.image)) - self.assertFalse(matcher.IsEmpty()(self.text)) - self.assertFalse(matcher.IsEmpty()(self.missing)) - - def test_isreadable(self): - self.assertTrue(matcher.IsReadable()(self.empty)) - self.assertTrue(matcher.IsReadable()(self.image)) - self.assertFalse(matcher.IsReadable()(self.missing)) - _, temp = tempfile.mkstemp(prefix='bsie-test-') - os.chmod(temp, 0) - self.assertFalse(matcher.IsReadable()(temp)) - os.unlink(temp) - - def test_iswritable(self): - self.assertTrue(matcher.IsWritable()(self.empty)) - self.assertTrue(matcher.IsWritable()(self.image)) - self.assertFalse(matcher.IsWritable()(self.missing)) - _, temp = tempfile.mkstemp(prefix='bsie-test-') - os.chmod(temp, 0) - self.assertFalse(matcher.IsWritable()(temp)) - os.unlink(temp) - - def test_isexecutable(self): - self.assertFalse(matcher.IsExecutable()(self.empty)) - self.assertFalse(matcher.IsExecutable()(self.image)) - self.assertFalse(matcher.IsExecutable()(self.missing)) - _, temp = tempfile.mkstemp(prefix='bsie-test-') - os.chmod(temp, stat.S_IEXEC) - self.assertTrue(matcher.IsExecutable()(temp)) - os.unlink(temp) - - def test_extension(self): - self.assertTrue(matcher.Extension('jpg')(self.image)) - self.assertTrue(matcher.Extension('jpg', 'png')(self.image)) - self.assertTrue(matcher.Extension('jpg', 't')(self.text)) - self.assertTrue(matcher.Extension('jpg', 'png', 't')(self.missing)) - self.assertTrue(matcher.Extension('')(self.empty)) - - self.assertFalse(matcher.Extension()(self.image)) - self.assertFalse(matcher.Extension('jpeg')(self.image)) - self.assertFalse(matcher.Extension('.t')(self.text)) - self.assertFalse(matcher.Extension('png', 't')(self.missing)) - self.assertFalse(matcher.Extension('tiff')(self.empty)) - - def test_mime(self): - self.assertTrue(matcher.Mime('image/jpeg')(self.image)) - self.assertTrue(matcher.Mime('image/tiff', 'image/jpeg')(self.image)) - self.assertTrue(matcher.Mime('text/plain', 'image/jpeg')(self.text)) - self.assertTrue(matcher.Mime('inode/x-empty')(self.empty)) - - self.assertFalse(matcher.Mime()(self.image)) - self.assertFalse(matcher.Mime('image')(self.image)) - self.assertFalse(matcher.Mime('image/tiff', 'image/png')(self.image)) - self.assertFalse(matcher.Mime('')(self.text)) - self.assertFalse(matcher.Mime('text')(self.text)) - self.assertFalse(matcher.Mime('tiff')(self.empty)) - self.assertFalse(matcher.Mime()(self.empty)) - self.assertFalse(matcher.Mime('')(self.empty)) - self.assertFalse(matcher.Mime()(self.missing)) - self.assertFalse(matcher.Mime('')(self.missing)) - self.assertFalse(matcher.Mime('inode/x-empty')(self.missing)) - - def test_not(self): - self.assertFalse(matcher.NOT(matcher.Mime('image/jpeg'))(self.image)) - self.assertTrue(matcher.NOT(matcher.Mime('text/plain'))(self.image)) - - def test_and(self): - self.assertTrue(matcher.And(matcher.Mime('image/jpeg'), matcher.Extension('jpg'))(self.image)) - self.assertTrue(matcher.And(matcher.Mime('image/jpeg'), matcher.Extension('jpg', 'tiff'))(self.image)) - self.assertTrue(matcher.And(matcher.Mime('text/plain'), matcher.Extension('t', 'tiff'))(self.text)) - - self.assertFalse(matcher.And(matcher.Mime('image/jpeg'), matcher.Extension('tiff'))(self.image)) - self.assertFalse(matcher.And(matcher.Mime('text/plain'), matcher.Extension('jpg'))(self.image)) - self.assertFalse(matcher.And(matcher.Mime('inode/x-empty'), matcher.Extension('jpg'))(self.missing)) - self.assertFalse(matcher.And(matcher.Mime('image/jpeg'), matcher.Extension('jpg', 't'))(self.text)) - - def test_or(self): - self.assertTrue(matcher.Or(matcher.Mime('image/jpeg'))(self.image)) - self.assertFalse(matcher.Or(matcher.Mime('text/plain'))(self.image)) - - self.assertTrue(matcher.Or(matcher.Mime('image/jpeg'), matcher.Extension('jpg'))(self.image)) - self.assertTrue(matcher.Or(matcher.Mime('image/jpeg'), matcher.Extension('t'))(self.image)) - self.assertTrue(matcher.Or(matcher.Mime('text/plain'), matcher.Extension('jpg', 'tiff'))(self.image)) - self.assertTrue(matcher.Or(matcher.Mime('text/plain'), matcher.Extension('tiff'))(self.text)) - self.assertTrue(matcher.Or(matcher.Mime('image/jpeg'), matcher.Extension('jpg'))(self.missing)) - - self.assertFalse(matcher.Or(matcher.Mime('text/plain'), matcher.Extension('tiff'))(self.image)) - self.assertFalse(matcher.Or(matcher.Mime('inode/x-empty'), matcher.Extension('jpg', 'tiff'))(self.text)) - - -## main ## - -if __name__ == '__main__': - unittest.main() - -## EOF ## diff --git a/test/utils/filematcher/test_matcher.py b/test/utils/filematcher/test_matcher.py new file mode 100644 index 0000000..c3cccee --- /dev/null +++ b/test/utils/filematcher/test_matcher.py @@ -0,0 +1,232 @@ +""" + +Part of the bsie test suite. +A copy of the license is provided with the project. +Author: Matthias Baumgartner, 2022 +""" +# standard imports +import os +import stat +import tempfile +import unittest + +# objects to test +from bsie.utils.filematcher import matcher + + +## code ## + +class FakeMatcher(matcher.Matcher): + def __call__(self, *args, **kwargs): + pass + +class FakeCriterion(matcher.Criterion): + def __call__(self, *args, **kwargs): + pass + +class FakeAggregate(matcher.Aggregate): + def __call__(self, *args, **kwargs): + pass + +class TestMatcher(unittest.TestCase): + def setUp(self): + # paths + self.image = os.path.join(os.path.dirname(__file__), 'testimage.jpg') + self.text= os.path.join(os.path.dirname(__file__), 'textfile.t') + self.empty = os.path.join(os.path.dirname(__file__), 'empty') + self.missing = os.path.join(os.path.dirname(__file__), 'missing.jpg') + + def test_matcher_skeleton(self): + # node: iteration and length + self.assertSetEqual(set(iter(FakeMatcher(1,2,3))), {1,2,3}) + self.assertSetEqual(set(iter(FakeMatcher([1,2,3]))), {1,2,3}) + self.assertEqual(len(FakeMatcher([1,2,3])), 3) + self.assertEqual(len(FakeMatcher(1,2,3)), 3) + self.assertEqual(len(FakeMatcher()), 0) + self.assertIn(1, FakeMatcher(1,2,3)) + self.assertIn(3, FakeMatcher([1,2,3])) + self.assertNotIn(0, FakeMatcher(1,2,3)) + self.assertNotIn(4, FakeMatcher([1,2,3])) + # node: comparison + self.assertEqual(FakeMatcher([1,2,3]), FakeMatcher([1,2,3])) + self.assertEqual(FakeMatcher(1,2,3), FakeMatcher(1,2,3)) + self.assertEqual(FakeMatcher(1,2,3), FakeMatcher([1,2,3])) + self.assertEqual(FakeMatcher(1,2,3), FakeMatcher((1,2,3))) + self.assertNotEqual(FakeMatcher(1,2,3), FakeMatcher(1,2,4)) + self.assertNotEqual(FakeMatcher(1,2,3), FakeMatcher(1,2,3,4)) + self.assertNotEqual(FakeMatcher(1,2,3), FakeMatcher(1,2)) + self.assertEqual(hash(FakeMatcher([1,2,3])), hash(FakeMatcher([1,2,3]))) + self.assertEqual(hash(FakeMatcher(1,2,3)), hash(FakeMatcher(1,2,3))) + self.assertEqual(hash(FakeMatcher(1,2,3)), hash(FakeMatcher([1,2,3]))) + self.assertEqual(hash(FakeMatcher(1,2,3)), hash(FakeMatcher((1,2,3)))) + # node: representation + self.assertEqual(repr(FakeMatcher(1,2,3)), 'FakeMatcher({1, 2, 3})') + + # criterion + self.assertEqual(repr(FakeCriterion(1,2,3)), 'FakeCriterion({1, 2, 3})') + self.assertEqual(hash(FakeCriterion(1,2,3)), hash(FakeCriterion(1,2,3))) + self.assertEqual(FakeCriterion(1,2,3), FakeCriterion([1,2,3])) + self.assertNotEqual(FakeCriterion(1,2,3), FakeCriterion(1,2)) + self.assertNotEqual(FakeCriterion(1,2,3), FakeMatcher(1,2,3)) + self.assertSetEqual(FakeCriterion(1,2,3).accepted(), {1,2,3}) + + # aggregate + self.assertEqual(repr(FakeAggregate(1,2,3)), 'FakeAggregate({1, 2, 3})') + self.assertNotEqual(FakeAggregate(1,2,3), FakeMatcher(1,2,3)) + + def test_any(self): + self.assertTrue(matcher.Any()(self.image)) + self.assertTrue(matcher.Any()(self.text)) + self.assertTrue(matcher.Any()(self.missing)) + self.assertTrue(matcher.Any()(self.empty)) + + def test_nothing(self): + self.assertFalse(matcher.Nothing()(self.image)) + self.assertFalse(matcher.Nothing()(self.text)) + self.assertFalse(matcher.Nothing()(self.missing)) + self.assertFalse(matcher.Nothing()(self.empty)) + + def test_exists(self): + self.assertTrue(matcher.Exists()(self.image)) + self.assertTrue(matcher.Exists()(self.text)) + self.assertTrue(matcher.Exists()(self.empty)) + self.assertFalse(matcher.Exists()(self.missing)) + + def test_isfile(self): + self.assertTrue(matcher.IsFile()(self.image)) + self.assertTrue(matcher.IsFile()(self.text)) + self.assertFalse(matcher.IsFile()(self.missing)) + self.assertFalse(matcher.IsFile()(os.path.dirname(self.image))) + + def test_isdir(self): + self.assertTrue(matcher.IsDir()(os.path.dirname(self.image))) + self.assertFalse(matcher.IsDir()(self.image)) + self.assertFalse(matcher.IsDir()(self.text)) + self.assertFalse(matcher.IsDir()(self.missing)) + + def test_islink(self): + self.assertFalse(matcher.IsLink()(os.path.dirname(self.image))) + self.assertFalse(matcher.IsLink()(self.image)) + self.assertFalse(matcher.IsLink()(self.text)) + _, temp = tempfile.mkstemp(prefix='bsie-test-') + templink = temp + '-link' + os.symlink(temp, templink) + self.assertTrue(matcher.IsLink()(templink)) + os.unlink(templink) + os.unlink(temp) + + def test_isabs(self): + self.assertTrue(matcher.IsAbs()(os.path.abspath(self.image))) + self.assertTrue(matcher.IsAbs()(os.path.abspath(self.text))) + self.assertFalse(matcher.IsAbs()(os.path.relpath(self.text, os.path.dirname(self.text)))) + + def test_isrel(self): + self.assertFalse(matcher.IsRel()(os.path.abspath(self.image))) + self.assertFalse(matcher.IsRel()(os.path.abspath(self.text))) + self.assertTrue(matcher.IsRel()(os.path.relpath(self.text, os.path.dirname(self.text)))) + self.assertTrue(matcher.IsRel()(os.path.basename(self.text))) + + def test_ismount(self): + self.assertFalse(matcher.IsMount()(self.image)) + self.assertFalse(matcher.IsMount()(self.text)) + self.assertFalse(matcher.IsMount()(self.missing)) + # there's no reasonable way to test a positive case + + def test_isempty(self): + self.assertTrue(matcher.IsEmpty()(self.empty)) + self.assertFalse(matcher.IsEmpty()(self.image)) + self.assertFalse(matcher.IsEmpty()(self.text)) + self.assertFalse(matcher.IsEmpty()(self.missing)) + + def test_isreadable(self): + self.assertTrue(matcher.IsReadable()(self.empty)) + self.assertTrue(matcher.IsReadable()(self.image)) + self.assertFalse(matcher.IsReadable()(self.missing)) + _, temp = tempfile.mkstemp(prefix='bsie-test-') + os.chmod(temp, 0) + self.assertFalse(matcher.IsReadable()(temp)) + os.unlink(temp) + + def test_iswritable(self): + self.assertTrue(matcher.IsWritable()(self.empty)) + self.assertTrue(matcher.IsWritable()(self.image)) + self.assertFalse(matcher.IsWritable()(self.missing)) + _, temp = tempfile.mkstemp(prefix='bsie-test-') + os.chmod(temp, 0) + self.assertFalse(matcher.IsWritable()(temp)) + os.unlink(temp) + + def test_isexecutable(self): + self.assertFalse(matcher.IsExecutable()(self.empty)) + self.assertFalse(matcher.IsExecutable()(self.image)) + self.assertFalse(matcher.IsExecutable()(self.missing)) + _, temp = tempfile.mkstemp(prefix='bsie-test-') + os.chmod(temp, stat.S_IEXEC) + self.assertTrue(matcher.IsExecutable()(temp)) + os.unlink(temp) + + def test_extension(self): + self.assertTrue(matcher.Extension('jpg')(self.image)) + self.assertTrue(matcher.Extension('jpg', 'png')(self.image)) + self.assertTrue(matcher.Extension('jpg', 't')(self.text)) + self.assertTrue(matcher.Extension('jpg', 'png', 't')(self.missing)) + self.assertTrue(matcher.Extension('')(self.empty)) + + self.assertFalse(matcher.Extension()(self.image)) + self.assertFalse(matcher.Extension('jpeg')(self.image)) + self.assertFalse(matcher.Extension('.t')(self.text)) + self.assertFalse(matcher.Extension('png', 't')(self.missing)) + self.assertFalse(matcher.Extension('tiff')(self.empty)) + + def test_mime(self): + self.assertTrue(matcher.Mime('image/jpeg')(self.image)) + self.assertTrue(matcher.Mime('image/tiff', 'image/jpeg')(self.image)) + self.assertTrue(matcher.Mime('text/plain', 'image/jpeg')(self.text)) + self.assertTrue(matcher.Mime('inode/x-empty')(self.empty)) + + self.assertFalse(matcher.Mime()(self.image)) + self.assertFalse(matcher.Mime('image')(self.image)) + self.assertFalse(matcher.Mime('image/tiff', 'image/png')(self.image)) + self.assertFalse(matcher.Mime('')(self.text)) + self.assertFalse(matcher.Mime('text')(self.text)) + self.assertFalse(matcher.Mime('tiff')(self.empty)) + self.assertFalse(matcher.Mime()(self.empty)) + self.assertFalse(matcher.Mime('')(self.empty)) + self.assertFalse(matcher.Mime()(self.missing)) + self.assertFalse(matcher.Mime('')(self.missing)) + self.assertFalse(matcher.Mime('inode/x-empty')(self.missing)) + + def test_not(self): + self.assertFalse(matcher.NOT(matcher.Mime('image/jpeg'))(self.image)) + self.assertTrue(matcher.NOT(matcher.Mime('text/plain'))(self.image)) + + def test_and(self): + self.assertTrue(matcher.And(matcher.Mime('image/jpeg'), matcher.Extension('jpg'))(self.image)) + self.assertTrue(matcher.And(matcher.Mime('image/jpeg'), matcher.Extension('jpg', 'tiff'))(self.image)) + self.assertTrue(matcher.And(matcher.Mime('text/plain'), matcher.Extension('t', 'tiff'))(self.text)) + + self.assertFalse(matcher.And(matcher.Mime('image/jpeg'), matcher.Extension('tiff'))(self.image)) + self.assertFalse(matcher.And(matcher.Mime('text/plain'), matcher.Extension('jpg'))(self.image)) + self.assertFalse(matcher.And(matcher.Mime('inode/x-empty'), matcher.Extension('jpg'))(self.missing)) + self.assertFalse(matcher.And(matcher.Mime('image/jpeg'), matcher.Extension('jpg', 't'))(self.text)) + + def test_or(self): + self.assertTrue(matcher.Or(matcher.Mime('image/jpeg'))(self.image)) + self.assertFalse(matcher.Or(matcher.Mime('text/plain'))(self.image)) + + self.assertTrue(matcher.Or(matcher.Mime('image/jpeg'), matcher.Extension('jpg'))(self.image)) + self.assertTrue(matcher.Or(matcher.Mime('image/jpeg'), matcher.Extension('t'))(self.image)) + self.assertTrue(matcher.Or(matcher.Mime('text/plain'), matcher.Extension('jpg', 'tiff'))(self.image)) + self.assertTrue(matcher.Or(matcher.Mime('text/plain'), matcher.Extension('tiff'))(self.text)) + self.assertTrue(matcher.Or(matcher.Mime('image/jpeg'), matcher.Extension('jpg'))(self.missing)) + + self.assertFalse(matcher.Or(matcher.Mime('text/plain'), matcher.Extension('tiff'))(self.image)) + self.assertFalse(matcher.Or(matcher.Mime('inode/x-empty'), matcher.Extension('jpg', 'tiff'))(self.text)) + + +## main ## + +if __name__ == '__main__': + unittest.main() + +## EOF ## diff --git a/test/utils/test_node.py b/test/utils/test_node.py index c70f0b8..9feb051 100644 --- a/test/utils/test_node.py +++ b/test/utils/test_node.py @@ -4,7 +4,7 @@ Part of the bsie test suite. A copy of the license is provided with the project. Author: Matthias Baumgartner, 2022 """ -# imports +# standard imports import unittest # bsie imports -- cgit v1.2.3 From 17f03ae3d3dc53fe973f37fe4dea4a831b4f97d7 Mon Sep 17 00:00:00 2001 From: Matthias Baumgartner Date: Sat, 24 Dec 2022 16:06:16 +0100 Subject: ReaderChain and image reader --- .gitignore | 3 ++ bsie/reader/base.py | 2 +- bsie/reader/chain.py | 85 ++++++++++++++++++++++++++++++++++++ bsie/reader/image/__init__.py | 36 +++++++++++++++ bsie/reader/image/_pillow.py | 37 ++++++++++++++++ bsie/reader/image/_raw.py | 61 ++++++++++++++++++++++++++ setup.py | 12 ++++- test/reader/image/__init__.py | 0 test/reader/image/load_nef.py | 28 ++++++++++++ test/reader/image/test_image.py | 51 ++++++++++++++++++++++ test/reader/image/test_pillow.py | 44 +++++++++++++++++++ test/reader/image/test_raw_image.py | 50 +++++++++++++++++++++ test/reader/image/testimage.jpg | Bin 0 -> 518 bytes test/reader/test_chain.py | 85 ++++++++++++++++++++++++++++++++++++ 14 files changed, 492 insertions(+), 2 deletions(-) create mode 100644 bsie/reader/chain.py create mode 100644 bsie/reader/image/__init__.py create mode 100644 bsie/reader/image/_pillow.py create mode 100644 bsie/reader/image/_raw.py create mode 100644 test/reader/image/__init__.py create mode 100644 test/reader/image/load_nef.py create mode 100644 test/reader/image/test_image.py create mode 100644 test/reader/image/test_pillow.py create mode 100644 test/reader/image/test_raw_image.py create mode 100644 test/reader/image/testimage.jpg create mode 100644 test/reader/test_chain.py diff --git a/.gitignore b/.gitignore index e45b114..8e1df10 100644 --- a/.gitignore +++ b/.gitignore @@ -22,4 +22,7 @@ build/ # doc builds doc/build/ +# testing data +test/reader/image/testimage.nef + ## EOF ## diff --git a/bsie/reader/base.py b/bsie/reader/base.py index 08d6cc6..099a327 100644 --- a/bsie/reader/base.py +++ b/bsie/reader/base.py @@ -39,7 +39,7 @@ class Reader(abc.ABC): return hash(type(self)) @abc.abstractmethod - def __call__(self, path: bsfs.URI) -> typing.Any: + def __call__(self, path: str) -> typing.Any: """Return some content of the file at *path*. Raises a `ReaderError` if the reader cannot make sense of the file format. """ diff --git a/bsie/reader/chain.py b/bsie/reader/chain.py new file mode 100644 index 0000000..8e900e1 --- /dev/null +++ b/bsie/reader/chain.py @@ -0,0 +1,85 @@ +""" + +Part of the bsie module. +A copy of the license is provided with the project. +Author: Matthias Baumgartner, 2022 +""" +# standard imports +import logging +import typing + +# bsie imports +from bsie.utils import bsfs, errors + +# inner-module imports +from . import base +from . import builder + +# exports +__all__: typing.Sequence[str] = ( + 'ReaderChain', + ) + + +## code ## + +logger = logging.getLogger(__name__) + +# Content type. +T_CONTENT = typing.TypeVar('T_CONTENT') # pylint: disable=invalid-name + +class ReaderChain(base.Reader, typing.Generic[T_CONTENT]): + """Read an image.""" + + # sub-readers for specific file formats. + _children: typing.Tuple[base.Reader, ...] + + def __init__( + self, + subreader_names: typing.Iterable[str], + cfg: typing.Any, + ): + rbuild = builder.ReaderBuilder(cfg) + children = [] + for name in subreader_names: + try: + # build sub-reader + children.append(rbuild.build(name)) + except (ValueError, + TypeError, + errors.LoaderError, + errors.BuilderError) as err: + # failed to build a child; skip and notify + logger.warning('failed to load reader: %s', err) + + if len(children) == 0: + logger.warning('%s failed to load any sub-readers.', bsfs.typename(self)) + + # copy children to member + self._children = tuple(children) + + def __str__(self) -> str: + substr = ', '.join(str(child) for child in self._children) + return f'{bsfs.typename(self)}({substr})' + + def __repr__(self) -> str: + return f'{bsfs.typename(self)}({self._children})' + + def __eq__(self, other: typing.Any) -> bool: + return super().__eq__(other) \ + and self._children == other._children + + def __hash__(self) -> int: + return hash((super().__hash__(), self._children)) + + def __call__(self, path: str) -> T_CONTENT: + for child in self._children: + try: + return child(path) + except errors.ReaderError: + # child cannot read the file, skip. + pass + + raise errors.ReaderError(path) + +## EOF ## diff --git a/bsie/reader/image/__init__.py b/bsie/reader/image/__init__.py new file mode 100644 index 0000000..85dad85 --- /dev/null +++ b/bsie/reader/image/__init__.py @@ -0,0 +1,36 @@ +""" + +Part of the bsie module. +A copy of the license is provided with the project. +Author: Matthias Baumgartner, 2022 +""" +# standard imports +import typing + +# external imports +import PIL.Image + +# inner-module imports +from .. import chain + +# constants +_FILE_FORMAT_READERS: typing.Sequence[str] = ( + __package__ + '._raw.RawImage', + __package__ + '._pillow.PillowImage', + ) + +# exports +__all__: typing.Sequence[str] = ( + 'Image', + ) + + +## code ## + +class Image(chain.ReaderChain[PIL.Image]): # pylint: disable=too-few-public-methods + """Read an image file.""" + + def __init__(self, cfg): + super().__init__(_FILE_FORMAT_READERS, cfg) + +## EOF ## diff --git a/bsie/reader/image/_pillow.py b/bsie/reader/image/_pillow.py new file mode 100644 index 0000000..ee0662d --- /dev/null +++ b/bsie/reader/image/_pillow.py @@ -0,0 +1,37 @@ +""" + +Part of the bsie module. +A copy of the license is provided with the project. +Author: Matthias Baumgartner, 2022 +""" +# standard imports +import typing + +# external imports +import PIL.Image + +# bsie imports +from bsie.utils import errors + +# inner-module imports +from .. import base + +# exports +__all__: typing.Sequence[str] = ( + 'PillowImage', + ) + + +## code ## + +class PillowImage(base.Reader): + """Use PIL to read content of a variety of image file types.""" + + def __call__(self, path: str) -> PIL.Image: + try: + # open file with PIL + return PIL.Image.open(path) + except IOError as err: + raise errors.ReaderError(path) from err + +# EOF ## diff --git a/bsie/reader/image/_raw.py b/bsie/reader/image/_raw.py new file mode 100644 index 0000000..77be357 --- /dev/null +++ b/bsie/reader/image/_raw.py @@ -0,0 +1,61 @@ +""" + +Part of the bsie module. +A copy of the license is provided with the project. +Author: Matthias Baumgartner, 2022 +""" +# standard imports +import typing + +# external imports +import PIL.Image +import rawpy + +# bsie imports +from bsie.utils import errors, filematcher + +# inner-module imports +from .. import base + +# constants +MATCH_RULE = 'mime={image/x-nikon-nef} | extension={nef}' + +# exports +__all__: typing.Sequence[str] = ( + 'RawImage', + ) + + +## code ## + +class RawImage(base.Reader): + """Use rawpy to read content of raw image file types.""" + + # file matcher + match: filematcher.Matcher + + # additional kwargs to rawpy's postprocess + rawpy_kwargs: typing.Dict[str, typing.Any] + + def __init__(self, **rawpy_kwargs): + match_rule = rawpy_kwargs.pop('file_match_rule', MATCH_RULE) + self._match = filematcher.parse(match_rule) + self._rawpy_kwargs = rawpy_kwargs + + def __call__(self, path: str) -> PIL.Image: + # perform quick checks first + if not self._match(path): + raise errors.ReaderError(path) + + try: + # open file with rawpy + ary = rawpy.imread(path).postprocess(**self._rawpy_kwargs) + # convert to PIL.Image + return PIL.Image.fromarray(ary) + except (rawpy.LibRawFatalError, # pylint: disable=no-member # pylint doesn't find the errors + rawpy.NotSupportedError, # pylint: disable=no-member + rawpy.LibRawNonFatalError, # pylint: disable=no-member + ) as err: + raise errors.ReaderError(path) from err + +## EOF ## diff --git a/setup.py b/setup.py index 6521593..2f7a485 100644 --- a/setup.py +++ b/setup.py @@ -14,7 +14,17 @@ setup( url='https://www.igsor.net/projects/blackstar/bsie/', download_url='https://pip.igsor.net', packages=('bsie', ), - install_requires=('rdflib', 'bsfs', 'python-magic', 'pyparsing'), + install_requires=( + 'bsfs', + 'pyparsing', + 'python-magic', + 'rdflib', # only for tests + 'requests', # only for tests + ), python_requires=">=3.7", + extra_require=( + # image reader + 'pillow', 'rawpy', + ) ) diff --git a/test/reader/image/__init__.py b/test/reader/image/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/test/reader/image/load_nef.py b/test/reader/image/load_nef.py new file mode 100644 index 0000000..5ba0adc --- /dev/null +++ b/test/reader/image/load_nef.py @@ -0,0 +1,28 @@ +""" + +Part of the bsie test suite. +A copy of the license is provided with the project. +Author: Matthias Baumgartner, 2022 +""" +# standard imports +import os + +# external imports +import requests + +# constants +IMAGE_URL = 'http://igsor.net/eik7AhvohghaeN5.nef' + +## code ## + +def get(): + """Download a raw test image.""" + target = os.path.join(os.path.dirname(__file__), 'testimage.nef') + if not os.path.exists(target): + with open(target, 'wb') as ofile: + ans = requests.get(IMAGE_URL) + ofile.write(ans.content) + + + +## EOF ## diff --git a/test/reader/image/test_image.py b/test/reader/image/test_image.py new file mode 100644 index 0000000..c60ca75 --- /dev/null +++ b/test/reader/image/test_image.py @@ -0,0 +1,51 @@ +""" + +Part of the bsie test suite. +A copy of the license is provided with the project. +Author: Matthias Baumgartner, 2022 +""" +# standard imports +import importlib +import os +import unittest + +# bsie imports +from bsie.utils import errors + +# objects to test +from bsie.reader.image import Image + + +## code ## + +class TestImage(unittest.TestCase): + def setUp(self): + importlib.import_module(__package__ + '.load_nef').get() + + def test_construct(self): + image = Image({}) + self.assertIsInstance(image, Image) + self.assertEqual(len(image._children), 2) + + def test_call(self): + image = Image({}) + # call returns raw image + img = image(os.path.join(os.path.dirname(__file__), 'testimage.nef')) + self.assertEqual(img.size, (6016, 4016)) # FIXME: change when image was replaced + img.close() + # call returns jpeg image + img = image(os.path.join(os.path.dirname(__file__), 'testimage.jpg')) + self.assertEqual(img.size, (1, 1)) + img.close() + # call raises error if file cannot be read + self.assertRaises(errors.ReaderError, image, + os.path.join(os.path.dirname(__file__), 'invalid.nef')) + self.assertRaises(errors.ReaderError, image, + os.path.join(os.path.dirname(__file__), 'invalid.jpg')) + +## main ## + +if __name__ == '__main__': + unittest.main() + +## EOF ## diff --git a/test/reader/image/test_pillow.py b/test/reader/image/test_pillow.py new file mode 100644 index 0000000..8abf5c1 --- /dev/null +++ b/test/reader/image/test_pillow.py @@ -0,0 +1,44 @@ +""" + +Part of the bsie test suite. +A copy of the license is provided with the project. +Author: Matthias Baumgartner, 2022 +""" +# standard imports +import os +import unittest + +# external imports +import PIL.Image + +# bsie imports +from bsie.utils import errors + +# objects to test +from bsie.reader.image._pillow import PillowImage + + +## code ## + +class TestPillowImage(unittest.TestCase): + def test_call(self): + rdr = PillowImage() + # returns PIL image + img = rdr(os.path.join(os.path.dirname(__file__), 'testimage.jpg')) + self.assertEqual(img.size, (1, 1)) + self.assertEqual(img.getdata().getpixel((0, 0)), (0, 0, 0)) + img.close() + # raises exception when image cannot be read + self.assertRaises(errors.ReaderError, rdr, + os.path.join(os.path.dirname(__file__), 'invalid.jpg')) + # NOTE: PIL can actually read raw image files (returns the thumbnail) + #self.assertRaises(errors.ReaderError, rdr, + # os.path.join(os.path.dirname(__file__), 'testimage.nef')) + + +## main ## + +if __name__ == '__main__': + unittest.main() + +## EOF ## diff --git a/test/reader/image/test_raw_image.py b/test/reader/image/test_raw_image.py new file mode 100644 index 0000000..3d5f887 --- /dev/null +++ b/test/reader/image/test_raw_image.py @@ -0,0 +1,50 @@ +""" + +Part of the bsie test suite. +A copy of the license is provided with the project. +Author: Matthias Baumgartner, 2022 +""" +# standard imports +import importlib +import os +import unittest + +# external imports +import PIL.Image + +# bsie imports +from bsie.utils import errors + +# objects to test +from bsie.reader.image._raw import RawImage + + +## code ## + +class TestRawImage(unittest.TestCase): + def setUp(self): + importlib.import_module(__package__ + '.load_nef').get() + + def test_call(self): + rdr = RawImage() + # returns PIL image + img = rdr(os.path.join(os.path.dirname(__file__), 'testimage.nef')) + self.assertEqual(img.size, (6016, 4016)) # FIXME: change when image was replaced + #self.assertEqual(img.size, (1, 1)) + #self.assertEqual(img.getdata().getpixel((0, 0)), (0, 0, 0)) + img.close() + # raises exception when image cannot be read + self.assertRaises(errors.ReaderError, rdr, + os.path.join(os.path.dirname(__file__), 'invalid.nef')) + self.assertRaises(errors.ReaderError, rdr, + os.path.join(os.path.dirname(__file__), 'testimage.jpg')) + + + + +## main ## + +if __name__ == '__main__': + unittest.main() + +## EOF ## diff --git a/test/reader/image/testimage.jpg b/test/reader/image/testimage.jpg new file mode 100644 index 0000000..ea7af63 Binary files /dev/null and b/test/reader/image/testimage.jpg differ diff --git a/test/reader/test_chain.py b/test/reader/test_chain.py new file mode 100644 index 0000000..901faa1 --- /dev/null +++ b/test/reader/test_chain.py @@ -0,0 +1,85 @@ +""" + +Part of the bsie test suite. +A copy of the license is provided with the project. +Author: Matthias Baumgartner, 2022 +""" +# standard imports +import logging +import os +import unittest + +# bsie imports +from bsie.utils import errors +import bsie.reader.path +import bsie.reader.stat + +# objects to test +from bsie.reader.chain import ReaderChain + + +## code ## + +class TestReaderChain(unittest.TestCase): + def test_construct(self): + # subreaders are built + chain = ReaderChain(['bsie.reader.stat.Stat', 'bsie.reader.path.Path'], {}) + self.assertIsInstance(chain, ReaderChain) + self.assertEqual(chain._children, + (bsie.reader.stat.Stat(), bsie.reader.path.Path())) + # subreaders that failed to build are omitted + with self.assertLogs(logging.getLogger('bsie.reader.chain'), logging.WARNING): + chain = ReaderChain(['bsie.reader.stat.Stat', 'bsie.reader.invalid.Invalid'], {}) + self.assertEqual(chain._children, (bsie.reader.stat.Stat(), )) + with self.assertLogs(logging.getLogger('bsie.reader.chain'), logging.WARNING): + chain = ReaderChain(['bsie.reader.stat.Stat', 'bsie.reader.path.Invalid'], {}) + self.assertEqual(chain._children, (bsie.reader.stat.Stat(), )) + # warning is issued if there are no subreaders + with self.assertLogs(logging.getLogger('bsie.reader.chain'), logging.WARNING): + chain = ReaderChain([], {}) + self.assertEqual(chain._children, tuple()) + + def test_essentials(self): + chain = ReaderChain(['bsie.reader.stat.Stat', 'bsie.reader.path.Path'], {}) + # identity + self.assertEqual(chain, chain) + self.assertEqual(hash(chain), hash(chain)) + # comparison works across instances + self.assertEqual(chain, + ReaderChain(['bsie.reader.stat.Stat', 'bsie.reader.path.Path'], {})) + self.assertEqual(hash(chain), + hash(ReaderChain(['bsie.reader.stat.Stat', 'bsie.reader.path.Path'], {}))) + # comparison respects subreaders + self.assertNotEqual(hash(chain), + hash(ReaderChain(['bsie.reader.path.Path'], {}))) + self.assertNotEqual(hash(chain), + hash(ReaderChain(['bsie.reader.path.Path'], {}))) + # comparison respects subreader order + self.assertNotEqual(chain, + ReaderChain(['bsie.reader.path.Path', 'bsie.reader.stat.Stat'], {})) + self.assertNotEqual(hash(chain), + hash(ReaderChain(['bsie.reader.path.Path', 'bsie.reader.stat.Stat'], {}))) + # string representation + self.assertEqual(str(chain), 'ReaderChain(Stat, Path)') + self.assertEqual(repr(chain), 'ReaderChain((Stat(), Path()))') + + def test_call(self): + chain = ReaderChain(['bsie.reader.stat.Stat', 'bsie.reader.path.Path'], {}) + # chain first probes first child + self.assertEqual(chain(__file__), os.stat(__file__)) + # chain probes second child if first one failes + self.assertEqual(chain(''), '') + self.assertEqual(chain('missing-file'), 'missing-file') + + # chain raises a ReaderError if childs were exhausted + chain = ReaderChain(['bsie.reader.stat.Stat'], {}) + # chain probes second child if first one failes + self.assertRaises(errors.ReaderError, chain, '') + + +## main ## + +if __name__ == '__main__': + unittest.main() + +## EOF ## -- cgit v1.2.3 From 76f2cc9206276ca21a395dd9417ff7dfed0467fd Mon Sep 17 00:00:00 2001 From: Matthias Baumgartner Date: Mon, 16 Jan 2023 08:53:41 +0100 Subject: schema serialization adjustments and app --- bsie/apps/info.py | 5 ++++- bsie/extractor/base.py | 11 ++++++++++- bsie/extractor/generic/constant.py | 2 +- bsie/extractor/generic/path.py | 2 +- bsie/extractor/generic/stat.py | 2 +- 5 files changed, 17 insertions(+), 5 deletions(-) diff --git a/bsie/apps/info.py b/bsie/apps/info.py index a4e611c..cd28685 100644 --- a/bsie/apps/info.py +++ b/bsie/apps/info.py @@ -26,7 +26,7 @@ __all__: typing.Sequence[str] = ( def main(argv): """Show information from BSIE.""" parser = argparse.ArgumentParser(description=main.__doc__, prog='info') - parser.add_argument('what', choices=('predicates', ), + parser.add_argument('what', choices=('predicates', 'schema'), help='Select what information to show.') args = parser.parse_args(argv) @@ -63,6 +63,9 @@ def main(argv): # show predicates for pred in pipeline.schema.predicates(): print(pred.uri) + elif args.what == 'schema': + # show schema + print(bsfs.schema.to_string(pipeline.schema)) else: # args.what is already checked by argparse raise errors.UnreachableError() diff --git a/bsie/extractor/base.py b/bsie/extractor/base.py index 95689a5..8ab1124 100644 --- a/bsie/extractor/base.py +++ b/bsie/extractor/base.py @@ -31,13 +31,22 @@ SCHEMA_PREAMBLE = ''' prefix bsfs: prefix bse: + # default definitions + bsfs:Array rdfs:subClassOf bsfs:Literal . + bsfs:Number rdfs:subClassOf bsfs:Literal . + bsfs:Time rdfs:subClassOf bsfs:Literal . + bsfs:Feature rdfs:subClassOf bsfs:Array ; + bsfs:dimension "1"^^xsd:integer ; + bsfs:dtype bsfs:f16 ; + bsfs:distance bsfs:euclidean . + # essential nodes bsfs:Entity rdfs:subClassOf bsfs:Node . bsfs:File rdfs:subClassOf bsfs:Entity . # common definitions xsd:string rdfs:subClassOf bsfs:Literal . - xsd:integer rdfs:subClassOf bsfs:Literal . + xsd:integer rdfs:subClassOf bsfs:Number . ''' diff --git a/bsie/extractor/generic/constant.py b/bsie/extractor/generic/constant.py index 7b1d942..938e20c 100644 --- a/bsie/extractor/generic/constant.py +++ b/bsie/extractor/generic/constant.py @@ -34,7 +34,7 @@ class Constant(base.Extractor): schema: str, tuples: typing.Iterable[typing.Tuple[bsfs.URI, typing.Any]], ): - super().__init__(bsfs.schema.Schema.from_string(base.SCHEMA_PREAMBLE + schema)) + super().__init__(bsfs.schema.from_string(base.SCHEMA_PREAMBLE + schema)) # NOTE: Raises a KeyError if the predicate is not part of the schema self._tuples = tuple((self.schema.predicate(p_uri), value) for p_uri, value in tuples) # TODO: use schema instance for value checking diff --git a/bsie/extractor/generic/path.py b/bsie/extractor/generic/path.py index 295715f..c984515 100644 --- a/bsie/extractor/generic/path.py +++ b/bsie/extractor/generic/path.py @@ -29,7 +29,7 @@ class Path(base.Extractor): _callmap: typing.Dict[bsfs.schema.Predicate, typing.Callable[[str], typing.Any]] def __init__(self): - super().__init__(bsfs.schema.Schema.from_string(base.SCHEMA_PREAMBLE + ''' + super().__init__(bsfs.schema.from_string(base.SCHEMA_PREAMBLE + ''' bse:filename rdfs:subClassOf bsfs:Predicate ; rdfs:domain bsfs:File ; rdfs:range xsd:string ; diff --git a/bsie/extractor/generic/stat.py b/bsie/extractor/generic/stat.py index 1381fe2..9394456 100644 --- a/bsie/extractor/generic/stat.py +++ b/bsie/extractor/generic/stat.py @@ -31,7 +31,7 @@ class Stat(base.Extractor): _callmap: typing.Dict[bsfs.schema.Predicate, typing.Callable[[os.stat_result], typing.Any]] def __init__(self): - super().__init__(bsfs.schema.Schema.from_string(base.SCHEMA_PREAMBLE + ''' + super().__init__(bsfs.schema.from_string(base.SCHEMA_PREAMBLE + ''' bse:filesize rdfs:subClassOf bsfs:Predicate ; rdfs:domain bsfs:File ; rdfs:range xsd:integer ; -- cgit v1.2.3 From 5d7fa2716009bc32c08f27e686cd92ca4c02b670 Mon Sep 17 00:00:00 2001 From: Matthias Baumgartner Date: Mon, 16 Jan 2023 14:38:01 +0100 Subject: colors spatial feature --- bsie/extractor/image/__init__.py | 13 +++ bsie/extractor/image/colors_spatial.py | 155 ++++++++++++++++++++++++++++ bsie/utils/namespaces.py | 1 + test/extractor/image/__init__.py | 0 test/extractor/image/test_colors_spatial.py | 95 +++++++++++++++++ test/extractor/image/testimage.jpg | Bin 0 -> 349264 bytes 6 files changed, 264 insertions(+) create mode 100644 bsie/extractor/image/__init__.py create mode 100644 bsie/extractor/image/colors_spatial.py create mode 100644 test/extractor/image/__init__.py create mode 100644 test/extractor/image/test_colors_spatial.py create mode 100644 test/extractor/image/testimage.jpg diff --git a/bsie/extractor/image/__init__.py b/bsie/extractor/image/__init__.py new file mode 100644 index 0000000..75b118d --- /dev/null +++ b/bsie/extractor/image/__init__.py @@ -0,0 +1,13 @@ +""" + +Part of the bsie module. +A copy of the license is provided with the project. +Author: Matthias Baumgartner, 2022 +""" +# standard imports +import typing + +# exports +__all__: typing.Sequence[str] = [] + +## EOF ## diff --git a/bsie/extractor/image/colors_spatial.py b/bsie/extractor/image/colors_spatial.py new file mode 100644 index 0000000..fa31ea7 --- /dev/null +++ b/bsie/extractor/image/colors_spatial.py @@ -0,0 +1,155 @@ +"""Spatial color features. + +Part of the bsie module. +A copy of the license is provided with the project. +Author: Matthias Baumgartner, 2022 +""" +# standard imports +import typing + +# external imports +import PIL.Image +import numpy as np + +# bsie imports +from bsie.utils import bsfs, node, ns + +# inner-module imports +from .. import base + +# constants +FEATURE_NAME = ns.bsf + 'ColorsSpatial' +PREDICATE_NAME = ns.bse + 'colors_spatial' + +# exports +__all__: typing.Sequence[str] = ( + 'ColorsSpatial', + ) + + +## code ## + +class ColorsSpatial(base.Extractor): + """Determine dominant colors of subregions in the image. + + Computes the domiant color of increasingly smaller subregions of the image. + """ + + CONTENT_READER = 'bsie.reader.image.Image' + + # Initial subregion width. + width: int + + # Initial subregion height. + height: int + + # Decrement exponent. + exp: float + + # Principal predicate's URI. + _predicate_name: bsfs.URI + + def __init__( + self, + width: int = 32, + height: int = 32, + exp: float = 4., + ): + # instance identifier + uuid = bsfs.uuid.UCID.from_dict({ + 'width': width, + 'height': height, + 'exp': exp, + }) + # determine symbol names + instance_name = FEATURE_NAME[uuid] + predicate_name = PREDICATE_NAME[uuid] + # get vector dimension + dimension = self.dimension(width, height, exp) + # initialize parent with the schema + super().__init__(bsfs.schema.from_string(base.SCHEMA_PREAMBLE + f''' + <{FEATURE_NAME}> rdfs:subClassOf bsfs:Feature ; + # annotations + rdfs:label "Spatially dominant colors"^^xsd:string ; + schema:description "Domiant colors of subregions in an image."^^xsd:string . + + <{instance_name}> rdfs:subClassOf <{FEATURE_NAME}> ; + bsfs:dimension "{dimension}"^^xsd:integer ; + # annotations + <{FEATURE_NAME}/args#width> "{width}"^^xsd:integer ; + <{FEATURE_NAME}/args#height> "{height}"^^xsd:integer ; + <{FEATURE_NAME}/args#exp> "{exp}"^^xsd:float . + + <{predicate_name}> rdfs:subClassOf bsfs:Predicate ; + rdfs:domain bsfs:File ; + rdfs:range <{instance_name}> ; + bsfs:unique "true"^^xsd:boolean . + + ''')) + # assign extra members + self.width = width + self.height = height + self.exp = exp + self._predicate_name = predicate_name + + def __repr__(self) -> str: + return f'{bsfs.typename(self)}({self.width}, {self.height}, {self.exp})' + + def __eq__(self, other: typing.Any) -> bool: + return super().__eq__(other) \ + and self.width == other.width \ + and self.height == other.height \ + and self.exp == other.exp + + def __hash__(self) -> int: + return hash((super().__hash__(), self.width, self.height, self.exp)) + + @staticmethod + def dimension(width: int, height: int, exp: float) -> int: + """Return the feature vector dimension.""" + # FIXME: replace with a proper formula + dim = 0 + while width >= 1 and height >= 1: + dim += width * height + width = np.floor(width / exp) + height = np.floor(height / exp) + dim *= 3 # per band + return int(dim) + + def extract( + self, + subject: node.Node, + content: PIL.Image, + principals: typing.Iterable[bsfs.schema.Predicate], + ) -> typing.Iterator[typing.Tuple[node.Node, bsfs.schema.Predicate, typing.List[float]]]: + # check principals + if self.schema.predicate(self._predicate_name) not in principals: + # nothing to do; abort + return + + # convert to HSV + content = content.convert('HSV') + + # get dimensions + width, height = self.width, self.height + num_bands = len(content.getbands()) # it's three since we converted to HSV before + + features = [] + while width >= 1 and height >= 1: + # downsample + img = content.resize((width, height), resample=PIL.Image.Resampling.BOX) + # feature vector + features.append( + np.array(img.getdata()).reshape((width * height, num_bands))) + # iterate + width = int(np.floor(width / self.exp)) + height = int(np.floor(height / self.exp)) + + # combine features + value = np.vstack(features) + # convert features + value = value.reshape(-1).tolist() # several bands + # return triple with feature vector as value + yield subject, self.schema.predicate(self._predicate_name), value + +## EOF ## diff --git a/bsie/utils/namespaces.py b/bsie/utils/namespaces.py index 2d0b535..393b436 100644 --- a/bsie/utils/namespaces.py +++ b/bsie/utils/namespaces.py @@ -15,6 +15,7 @@ bse = _bsfs.Namespace('http://bsfs.ai/schema/Entity') bsfs = _bsfs.Namespace('http://bsfs.ai/schema', fsep='/') bsm = _bsfs.Namespace('http://bsfs.ai/schema/Meta') xsd = _bsfs.Namespace('http://www.w3.org/2001/XMLSchema') +bsf = _bsfs.Namespace('http://ie.bsfs.ai/schema/Feature') # export __all__: typing.Sequence[str] = ( diff --git a/test/extractor/image/__init__.py b/test/extractor/image/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/test/extractor/image/test_colors_spatial.py b/test/extractor/image/test_colors_spatial.py new file mode 100644 index 0000000..b704e3f --- /dev/null +++ b/test/extractor/image/test_colors_spatial.py @@ -0,0 +1,95 @@ +""" + +Part of the bsie test suite. +A copy of the license is provided with the project. +Author: Matthias Baumgartner, 2022 +""" +# standard imports +import os +import unittest + +# external imports +import PIL.Image + +# bsie imports +from bsie.extractor import base +from bsie.utils import bsfs, ns, node as _node + +# objects to test +from bsie.extractor.image.colors_spatial import ColorsSpatial + + +## code ## + +class TestColorsSpatial(unittest.TestCase): + def setUp(self): + # content id with default constructors (width=32, height=32, exp=4) + self.instance_prefix = 'http://ie.bsfs.ai/schema/Feature/ColorsSpatial' + self.predicate_prefix = 'http://bsfs.ai/schema/Entity/colors_spatial' + self.uuid = 'adee8d6c43687021e1c5bffe56bcfe727f1638d792744137181304ef889dac2a' + + def test_essentials(self): + # clones are equal + self.assertEqual(ColorsSpatial(32, 32, 4), ColorsSpatial(32, 32, 4)) + self.assertEqual(hash(ColorsSpatial(32, 32, 4)), hash(ColorsSpatial(32, 32, 4))) + # equal respects type + self.assertNotEqual(ColorsSpatial(32, 32, 4), 'hello world') + self.assertNotEqual(hash(ColorsSpatial(32, 32, 4)), hash('hello world')) + # equals respects width + self.assertNotEqual(ColorsSpatial(32, 32, 4), ColorsSpatial(16, 32, 4)) + self.assertNotEqual(hash(ColorsSpatial(32, 32, 4)), hash(ColorsSpatial(16, 32, 4))) + # equals respects height + self.assertNotEqual(ColorsSpatial(32, 32, 4), ColorsSpatial(32, 16, 4)) + self.assertNotEqual(hash(ColorsSpatial(32, 32, 4)), hash(ColorsSpatial(32, 16, 4))) + # equals respects exp + self.assertNotEqual(ColorsSpatial(32, 32, 4), ColorsSpatial(32, 32, 8)) + self.assertNotEqual(hash(ColorsSpatial(32, 32, 4)), hash(ColorsSpatial(32, 32, 8))) + # string representation + self.assertEqual(str(ColorsSpatial()), 'ColorsSpatial') + self.assertEqual(repr(ColorsSpatial(64, 16, 2)), 'ColorsSpatial(64, 16, 2)') + + def test_dimension(self): + self.assertEqual(ColorsSpatial.dimension(32, 32, 4), 3 * (32*32 + 8*8 + 2*2)) + self.assertEqual(ColorsSpatial.dimension(16, 16, 8), 3 * (16*16 + 2*2)) + self.assertEqual(ColorsSpatial.dimension(64, 64, 16), 3 * (64*64 + 4*4)) + + def test_schema(self): + schema = bsfs.schema.from_string(base.SCHEMA_PREAMBLE + f''' + <{self.instance_prefix}> rdfs:subClassOf bsfs:Feature ; + # annotations + rdfs:label ""^^xsd:string ; + schema:description ""^^xsd:string . + + <{self.instance_prefix}#{self.uuid}> rdfs:subClassOf <{self.instance_prefix}> ; + bsfs:dimension "3276"^^xsd:integer ; + # annotations + <{self.instance_prefix}/args#width> "32"^^xsd:integer ; + <{self.instance_prefix}/args#height> "32"^^xsd:integer ; + <{self.instance_prefix}/args#exp> "4"^^xsd:float . + + <{self.predicate_prefix}#{self.uuid}> rdfs:subClassOf bsfs:Predicate ; + rdfs:domain bsfs:File ; + rdfs:range <{self.instance_prefix}#{self.uuid}> ; + bsfs:unique "true"^^xsd:boolean . + ''') + self.assertEqual(schema, ColorsSpatial().schema) + + def test_extract(self): + ext = ColorsSpatial(2,2,2) + img = PIL.Image.open(os.path.join(os.path.dirname(__file__), 'testimage.jpg')) + node = _node.Node(ns.bsfs.Entity, bsfs.URI('http://example.com/entity#1234')) + principals = set(ext.principals) + self.assertEqual(len(principals), 1) + ret = list(ext.extract(node, img, principals)) + self.assertEqual(ret[0], ( + node, + list(principals)[0], + [91, 127, 121, 94, 138, 167, 163, 134, 190, 138, 170, 156, 121, 142, 159])) + + +## main ## + +if __name__ == '__main__': + unittest.main() + +## EOF ## diff --git a/test/extractor/image/testimage.jpg b/test/extractor/image/testimage.jpg new file mode 100644 index 0000000..c80bb48 Binary files /dev/null and b/test/extractor/image/testimage.jpg differ -- cgit v1.2.3 From a0d1af36bdc09fe8eebe0c87a3f587395908ae28 Mon Sep 17 00:00:00 2001 From: Matthias Baumgartner Date: Mon, 16 Jan 2023 14:43:08 +0100 Subject: bsfs changes propagated to tests --- .gitignore | 2 +- test/extractor/generic/test_constant.py | 8 ++++---- test/extractor/generic/test_path.py | 10 +++++----- test/extractor/generic/test_stat.py | 10 +++++----- test/extractor/test_base.py | 12 ++++++------ test/lib/test_bsie.py | 10 +++++----- test/lib/test_pipeline.py | 4 ++-- 7 files changed, 28 insertions(+), 28 deletions(-) diff --git a/.gitignore b/.gitignore index 8e1df10..304ae08 100644 --- a/.gitignore +++ b/.gitignore @@ -23,6 +23,6 @@ build/ doc/build/ # testing data -test/reader/image/testimage.nef +test/reader/image/testimage.nef* ## EOF ## diff --git a/test/extractor/generic/test_constant.py b/test/extractor/generic/test_constant.py index a49345b..bde3805 100644 --- a/test/extractor/generic/test_constant.py +++ b/test/extractor/generic/test_constant.py @@ -36,19 +36,19 @@ class TestConstant(unittest.TestCase): node = _node.Node(ns.bsfs.Entity, '') # Blank node p_author = ext.schema.predicate(ns.bse.author) p_comment = ext.schema.predicate(ns.bse.comment) - entity = ext.schema.node(ns.bsfs.Node).get_child(ns.bsfs.Entity) - string = ext.schema.literal(ns.bsfs.Literal).get_child(ns.xsd.string) + entity = ext.schema.node(ns.bsfs.Node).child(ns.bsfs.Entity) + string = ext.schema.literal(ns.bsfs.Literal).child(ns.xsd.string) # baseline self.assertSetEqual(set(ext.extract(node, None, (p_author, p_comment))), {(node, p_author, 'Me, myself, and I'), (node, p_comment, 'the quick brown fox jumps over the lazy dog.')}) # predicates is respected - p_foobar = ext.schema.predicate(ns.bsfs.Predicate).get_child(ns.bse.foobar, domain=entity, range=entity) + p_foobar = ext.schema.predicate(ns.bsfs.Predicate).child(ns.bse.foobar, domain=entity, range=entity) self.assertSetEqual(set(ext.extract(node, None, (p_author, p_foobar))), {(node, p_author, 'Me, myself, and I')}) self.assertSetEqual(set(ext.extract(node, None, (p_comment, p_foobar))), {(node, p_comment, 'the quick brown fox jumps over the lazy dog.')}) - p_barfoo = ext.schema.predicate(ns.bse.author).get_child(ns.bse.comment, domain=entity, range=string) + p_barfoo = ext.schema.predicate(ns.bse.author).child(ns.bse.comment, domain=entity, range=string) self.assertSetEqual(set(ext.extract(node, None, (p_foobar, p_barfoo))), set()) def test_construct(self): diff --git a/test/extractor/generic/test_path.py b/test/extractor/generic/test_path.py index 778ac5a..ae68686 100644 --- a/test/extractor/generic/test_path.py +++ b/test/extractor/generic/test_path.py @@ -29,7 +29,7 @@ class TestPath(unittest.TestCase): def test_schema(self): self.assertEqual(Path().schema, - bsfs.schema.Schema.from_string(base.SCHEMA_PREAMBLE + ''' + bsfs.schema.from_string(base.SCHEMA_PREAMBLE + ''' bse:filename rdfs:subClassOf bsfs:Predicate ; rdfs:domain bsfs:File ; rdfs:range xsd:string ; @@ -41,19 +41,19 @@ class TestPath(unittest.TestCase): node = _node.Node(ns.bsfs.File, '') # Blank node content = '/tmp/foo/bar' p_filename = ext.schema.predicate(ns.bse.filename) - entity = ext.schema.node(ns.bsfs.Node).get_child(ns.bsfs.Entity) - string = ext.schema.literal(ns.bsfs.Literal).get_child(ns.xsd.string) + entity = ext.schema.node(ns.bsfs.Node).child(ns.bsfs.Entity) + string = ext.schema.literal(ns.bsfs.Literal).child(ns.xsd.string) # baseline self.assertSetEqual(set(ext.extract(node, content, (p_filename, ))), {(node, p_filename, 'bar')}) # predicates parameter is respected - p_foo = ext.schema.predicate(ns.bsfs.Predicate).get_child(ns.bse.foo, domain=entity, range=string) # unsupported predicate + p_foo = ext.schema.predicate(ns.bsfs.Predicate).child(ns.bse.foo, domain=entity, range=string) # unsupported predicate self.assertSetEqual(set(ext.extract(node, content, (p_filename, p_foo))), {(node, p_filename, 'bar')}) self.assertSetEqual(set(ext.extract(node, content, (p_foo, ))), set()) # predicates are validated - p_bar = p_foo.get_child(ns.bse.filename) # same URI but different hierarchy + p_bar = p_foo.child(ns.bse.filename) # same URI but different hierarchy self.assertSetEqual(set(ext.extract(node, content, (p_filename, p_bar))), {(node, p_filename, 'bar')}) self.assertSetEqual(set(ext.extract(node, content, (p_bar, ))), set()) diff --git a/test/extractor/generic/test_stat.py b/test/extractor/generic/test_stat.py index ff74085..e5562d1 100644 --- a/test/extractor/generic/test_stat.py +++ b/test/extractor/generic/test_stat.py @@ -30,7 +30,7 @@ class TestStat(unittest.TestCase): def test_schema(self): self.assertEqual(Stat().schema, - bsfs.schema.Schema.from_string(base.SCHEMA_PREAMBLE + ''' + bsfs.schema.from_string(base.SCHEMA_PREAMBLE + ''' bse:filesize rdfs:subClassOf bsfs:Predicate ; rdfs:domain bsfs:File ; rdfs:range xsd:integer ; @@ -42,19 +42,19 @@ class TestStat(unittest.TestCase): node = _node.Node(ns.bsfs.File, '') # Blank node content = os.stat(__file__) p_filesize = ext.schema.predicate(ns.bse.filesize) - entity = ext.schema.node(ns.bsfs.Node).get_child(ns.bsfs.Entity) - string = ext.schema.literal(ns.bsfs.Literal).get_child(ns.xsd.string) + entity = ext.schema.node(ns.bsfs.Node).child(ns.bsfs.Entity) + string = ext.schema.literal(ns.bsfs.Literal).child(ns.xsd.string) # baseline self.assertSetEqual(set(ext.extract(node, content, (p_filesize, ))), {(node, p_filesize, content.st_size)}) # predicates parameter is respected - p_foo = ext.schema.predicate(ns.bsfs.Predicate).get_child(ns.bse.foo, domain=entity, range=string) # unsupported predicate + p_foo = ext.schema.predicate(ns.bsfs.Predicate).child(ns.bse.foo, domain=entity, range=string) # unsupported predicate self.assertSetEqual(set(ext.extract(node, content, (p_filesize, p_foo))), {(node, p_filesize, content.st_size)}) self.assertSetEqual(set(ext.extract(node, content, (p_foo, ))), set()) # predicates are validated - p_bar = p_foo.get_child(ns.bse.filesizse) # same URI but different hierarchy + p_bar = p_foo.child(ns.bse.filesizse) # same URI but different hierarchy self.assertSetEqual(set(ext.extract(node, content, (p_filesize, p_bar))), {(node, p_filesize, content.st_size)}) self.assertSetEqual(set(ext.extract(node, content, (p_bar, ))), set()) diff --git a/test/extractor/test_base.py b/test/extractor/test_base.py index 6a63c59..acfaf58 100644 --- a/test/extractor/test_base.py +++ b/test/extractor/test_base.py @@ -18,7 +18,7 @@ from bsie.extractor import base class StubExtractor(base.Extractor): def __init__(self): - super().__init__(bsfs.schema.Schema.from_string(base.SCHEMA_PREAMBLE + ''' + super().__init__(bsfs.schema.from_string(base.SCHEMA_PREAMBLE + ''' bse:author rdfs:subClassOf bsfs:Predicate ; rdfs:domain bsfs:Entity ; rdfs:range xsd:string ; @@ -52,11 +52,11 @@ class TestExtractor(unittest.TestCase): self.assertNotEqual(hash(ext), hash(sub)) def test_principals(self): - schema = bsfs.schema.Schema.Empty() - entity = schema.node(ns.bsfs.Node).get_child(ns.bsfs.Entity) - string = schema.literal(ns.bsfs.Literal).get_child(bsfs.URI('http://www.w3.org/2001/XMLSchema#string')) - p_author = schema.predicate(ns.bsfs.Predicate).get_child(ns.bse.author, domain=entity, range=string) - p_comment = schema.predicate(ns.bsfs.Predicate).get_child(ns.bse.comment, domain=entity, range=string) + schema = bsfs.schema.Schema() + entity = schema.node(ns.bsfs.Node).child(ns.bsfs.Entity) + string = schema.literal(ns.bsfs.Literal).child(bsfs.URI('http://www.w3.org/2001/XMLSchema#string')) + p_author = schema.predicate(ns.bsfs.Predicate).child(ns.bse.author, domain=entity, range=string) + p_comment = schema.predicate(ns.bsfs.Predicate).child(ns.bse.comment, domain=entity, range=string) ext = StubExtractor() self.assertSetEqual(set(ext.principals), {p_author, p_comment} | set(schema.predicates()) - {schema.predicate(ns.bsfs.Predicate)}) diff --git a/test/lib/test_bsie.py b/test/lib/test_bsie.py index 52f1d44..38e6f59 100644 --- a/test/lib/test_bsie.py +++ b/test/lib/test_bsie.py @@ -52,7 +52,7 @@ class TestBSIE(unittest.TestCase): ns.bse.filesize, ns.bse.author, }) - self.assertEqual(lib.schema, bsfs.schema.Schema.from_string(SCHEMA_PREAMBLE + ''' + self.assertEqual(lib.schema, bsfs.schema.from_string(SCHEMA_PREAMBLE + ''' bse:filename rdfs:subClassOf bsfs:Predicate ; rdfs:domain bsfs:File ; rdfs:range xsd:string ; @@ -79,7 +79,7 @@ class TestBSIE(unittest.TestCase): ns.bse.filesize, ns.bse.author, }) - self.assertEqual(lib.schema, bsfs.schema.Schema.from_string(SCHEMA_PREAMBLE + ''' + self.assertEqual(lib.schema, bsfs.schema.from_string(SCHEMA_PREAMBLE + ''' bse:filesize rdfs:subClassOf bsfs:Predicate ; rdfs:domain bsfs:File ; rdfs:range xsd:integer; @@ -97,7 +97,7 @@ class TestBSIE(unittest.TestCase): ns.bse.filesize, ns.bse.author, }) - self.assertEqual(lib.schema, bsfs.schema.Schema.from_string(SCHEMA_PREAMBLE + ''' + self.assertEqual(lib.schema, bsfs.schema.from_string(SCHEMA_PREAMBLE + ''' bse:filename rdfs:subClassOf bsfs:Predicate ; rdfs:domain bsfs:File ; rdfs:range xsd:string ; @@ -124,7 +124,7 @@ class TestBSIE(unittest.TestCase): self.assertSetEqual(set(lib.principals), { ns.bse.author, }) - self.assertEqual(lib.schema, bsfs.schema.Schema.from_string(SCHEMA_PREAMBLE + ''' + self.assertEqual(lib.schema, bsfs.schema.from_string(SCHEMA_PREAMBLE + ''' bse:author rdfs:subClassOf bsfs:Predicate ; rdfs:domain bsfs:Entity ; rdfs:range xsd:string ; @@ -139,7 +139,7 @@ class TestBSIE(unittest.TestCase): self.assertSetEqual(set(lib.principals), { ns.bse.filesize, }) - self.assertEqual(lib.schema, bsfs.schema.Schema.from_string(SCHEMA_PREAMBLE + ''' + self.assertEqual(lib.schema, bsfs.schema.from_string(SCHEMA_PREAMBLE + ''' bse:filesize rdfs:subClassOf bsfs:Predicate ; rdfs:domain bsfs:File ; rdfs:range xsd:integer; diff --git a/test/lib/test_pipeline.py b/test/lib/test_pipeline.py index c6f7aba..8fecc74 100644 --- a/test/lib/test_pipeline.py +++ b/test/lib/test_pipeline.py @@ -74,7 +74,7 @@ class TestPipeline(unittest.TestCase): # equivalence respects schema p2 = Pipeline(self.prefix, self.ext2rdr) - p2._schema = pipeline.schema.Empty() + p2._schema = bsfs.schema.Schema() self.assertNotEqual(pipeline, p2) self.assertNotEqual(hash(pipeline), hash(p2)) @@ -100,7 +100,7 @@ class TestPipeline(unittest.TestCase): p_author = pipeline.schema.predicate(ns.bse.author) p_rating = pipeline.schema.predicate(ns.bse.rating) entity = pipeline.schema.node(ns.bsfs.File) - p_invalid = pipeline.schema.predicate(ns.bsfs.Predicate).get_child(ns.bse.foo, range=entity) + p_invalid = pipeline.schema.predicate(ns.bsfs.Predicate).child(ns.bse.foo, range=entity) # extract given predicates self.assertSetEqual(set(pipeline(testfile, {p_filename, p_filesize})), { -- cgit v1.2.3 From 3f93be488638fdf6668e0e03e2b1634bb969ca80 Mon Sep 17 00:00:00 2001 From: Matthias Baumgartner Date: Mon, 16 Jan 2023 15:39:16 +0100 Subject: random fixes --- bsie/apps/info.py | 4 ++++ bsie/reader/image/__init__.py | 3 ++- setup.py | 2 ++ test/reader/image/test_image.py | 5 ++++- test/reader/image/test_raw_image.py | 5 ++++- 5 files changed, 16 insertions(+), 3 deletions(-) diff --git a/bsie/apps/info.py b/bsie/apps/info.py index cd28685..5b6fb0e 100644 --- a/bsie/apps/info.py +++ b/bsie/apps/info.py @@ -33,6 +33,10 @@ def main(argv): # FIXME: Read reader/extractor configs from a config file # reader builder rbuild = ReaderBuilder({ + 'bsie.reader.image.Image': { + 'bsie.reader.image._raw.RawImage': { + } + } }) # extractor builder ebuild = ExtractorBuilder([ diff --git a/bsie/reader/image/__init__.py b/bsie/reader/image/__init__.py index 85dad85..b7587e7 100644 --- a/bsie/reader/image/__init__.py +++ b/bsie/reader/image/__init__.py @@ -27,7 +27,8 @@ __all__: typing.Sequence[str] = ( ## code ## -class Image(chain.ReaderChain[PIL.Image]): # pylint: disable=too-few-public-methods +# FIXME: Check if PIL.Image or PIL.Image.Image, or if version-dependent +class Image(chain.ReaderChain[PIL.Image.Image]): # pylint: disable=too-few-public-methods """Read an image file.""" def __init__(self, cfg): diff --git a/setup.py b/setup.py index 2f7a485..6dad7ac 100644 --- a/setup.py +++ b/setup.py @@ -25,6 +25,8 @@ setup( extra_require=( # image reader 'pillow', 'rawpy', + # image extractors + 'numpy', ) ) diff --git a/test/reader/image/test_image.py b/test/reader/image/test_image.py index c60ca75..26f6a93 100644 --- a/test/reader/image/test_image.py +++ b/test/reader/image/test_image.py @@ -20,7 +20,10 @@ from bsie.reader.image import Image class TestImage(unittest.TestCase): def setUp(self): - importlib.import_module(__package__ + '.load_nef').get() + if __package__ is None or __package__ == '': # direct call or local discovery + importlib.import_module('load_nef', __package__).get() + else: # parent discovery + importlib.import_module('.load_nef', __package__).get() def test_construct(self): image = Image({}) diff --git a/test/reader/image/test_raw_image.py b/test/reader/image/test_raw_image.py index 3d5f887..ba21b5a 100644 --- a/test/reader/image/test_raw_image.py +++ b/test/reader/image/test_raw_image.py @@ -23,7 +23,10 @@ from bsie.reader.image._raw import RawImage class TestRawImage(unittest.TestCase): def setUp(self): - importlib.import_module(__package__ + '.load_nef').get() + if __package__ is None or __package__ == '': # direct call or local discovery + importlib.import_module('load_nef', __package__).get() + else: # parent discovery + importlib.import_module('.load_nef', __package__).get() def test_call(self): rdr = RawImage() -- cgit v1.2.3 From afd165000c1661a9cca117a4844ad3f89d926fdb Mon Sep 17 00:00:00 2001 From: Matthias Baumgartner Date: Mon, 16 Jan 2023 20:53:39 +0100 Subject: unsupported file format exception --- bsie/lib/pipeline.py | 7 +++++++ bsie/reader/chain.py | 7 +++++-- bsie/reader/image/_pillow.py | 2 ++ bsie/reader/image/_raw.py | 2 +- bsie/utils/errors.py | 3 +++ 5 files changed, 18 insertions(+), 3 deletions(-) diff --git a/bsie/lib/pipeline.py b/bsie/lib/pipeline.py index e5ce1b7..02119bc 100644 --- a/bsie/lib/pipeline.py +++ b/bsie/lib/pipeline.py @@ -126,6 +126,8 @@ class Pipeline(): # get content content = rdr(path) if rdr is not None else None + #logger.info('extracted %s from %s', rdr, path) + # apply extractors on this content for ext in extrs: try: @@ -137,6 +139,11 @@ class Pipeline(): # critical extractor failure. logger.error('%s failed to extract triples from content: %s', ext, err) + except errors.UnsupportedFileFormatError as err: + # failed to read the file format. skip. + #logger.warning('%s could not process the file format of %s', rdr, err) + pass + except errors.ReaderError as err: # failed to read any content. skip. logger.error('%s failed to read content: %s', rdr, err) diff --git a/bsie/reader/chain.py b/bsie/reader/chain.py index 8e900e1..db7c2d5 100644 --- a/bsie/reader/chain.py +++ b/bsie/reader/chain.py @@ -73,13 +73,16 @@ class ReaderChain(base.Reader, typing.Generic[T_CONTENT]): return hash((super().__hash__(), self._children)) def __call__(self, path: str) -> T_CONTENT: + raise_error = errors.UnsupportedFileFormatError for child in self._children: try: return child(path) + except errors.UnsupportedFileFormatError: + pass except errors.ReaderError: # child cannot read the file, skip. - pass + raise_error = errors.ReaderError # type: ignore [assignment] # mypy is confused - raise errors.ReaderError(path) + raise raise_error(path) ## EOF ## diff --git a/bsie/reader/image/_pillow.py b/bsie/reader/image/_pillow.py index ee0662d..3144509 100644 --- a/bsie/reader/image/_pillow.py +++ b/bsie/reader/image/_pillow.py @@ -31,6 +31,8 @@ class PillowImage(base.Reader): try: # open file with PIL return PIL.Image.open(path) + except PIL.UnidentifiedImageError as err: + raise errors.UnsupportedFileFormatError(path) from err except IOError as err: raise errors.ReaderError(path) from err diff --git a/bsie/reader/image/_raw.py b/bsie/reader/image/_raw.py index 77be357..cd60453 100644 --- a/bsie/reader/image/_raw.py +++ b/bsie/reader/image/_raw.py @@ -45,7 +45,7 @@ class RawImage(base.Reader): def __call__(self, path: str) -> PIL.Image: # perform quick checks first if not self._match(path): - raise errors.ReaderError(path) + raise errors.UnsupportedFileFormatError(path) try: # open file with rawpy diff --git a/bsie/utils/errors.py b/bsie/utils/errors.py index fbc16f7..8133cd4 100644 --- a/bsie/utils/errors.py +++ b/bsie/utils/errors.py @@ -42,4 +42,7 @@ class UnreachableError(ProgrammingError): class ParserError(_BSIEError): """Failed to parse due to invalid syntax or structures.""" +class UnsupportedFileFormatError(ReaderError): + """Failed to read a file format.""" + ## EOF ## -- cgit v1.2.3 From bffe6bb52d00e60665b4e8e2144ab91e4465173e Mon Sep 17 00:00:00 2001 From: Matthias Baumgartner Date: Mon, 16 Jan 2023 20:54:25 +0100 Subject: minor bugfixes --- bsie/apps/index.py | 4 +++- bsie/extractor/base.py | 1 + bsie/extractor/image/colors_spatial.py | 8 +++----- 3 files changed, 7 insertions(+), 6 deletions(-) diff --git a/bsie/apps/index.py b/bsie/apps/index.py index 0c6296f..7cf94d3 100644 --- a/bsie/apps/index.py +++ b/bsie/apps/index.py @@ -82,7 +82,9 @@ def main(argv): # index input paths for path in args.input_file: - if os.path.isdir(path) and args.recursive: + if not os.path.exists(path): + pass # FIXME: notify the user + elif os.path.isdir(path) and args.recursive: for dirpath, _, filenames in os.walk(path, topdown=True, followlinks=args.follow): for filename in filenames: for node, pred, value in bsie.from_file(os.path.join(dirpath, filename)): diff --git a/bsie/extractor/base.py b/bsie/extractor/base.py index 8ab1124..7401244 100644 --- a/bsie/extractor/base.py +++ b/bsie/extractor/base.py @@ -108,5 +108,6 @@ class Extractor(abc.ABC): principals: typing.Iterable[bsfs.schema.Predicate], ) -> typing.Iterator[typing.Tuple[node.Node, bsfs.schema.Predicate, typing.Any]]: """Return (node, predicate, value) triples.""" + # FIXME: type annotation could be more strict: value is Hashable ## EOF ## diff --git a/bsie/extractor/image/colors_spatial.py b/bsie/extractor/image/colors_spatial.py index fa31ea7..38f1db4 100644 --- a/bsie/extractor/image/colors_spatial.py +++ b/bsie/extractor/image/colors_spatial.py @@ -121,7 +121,7 @@ class ColorsSpatial(base.Extractor): subject: node.Node, content: PIL.Image, principals: typing.Iterable[bsfs.schema.Predicate], - ) -> typing.Iterator[typing.Tuple[node.Node, bsfs.schema.Predicate, typing.List[float]]]: + ) -> typing.Iterator[typing.Tuple[node.Node, bsfs.schema.Predicate, typing.Any]]: # check principals if self.schema.predicate(self._predicate_name) not in principals: # nothing to do; abort @@ -145,10 +145,8 @@ class ColorsSpatial(base.Extractor): width = int(np.floor(width / self.exp)) height = int(np.floor(height / self.exp)) - # combine features - value = np.vstack(features) - # convert features - value = value.reshape(-1).tolist() # several bands + # combine bands and convert features to tuple + value = tuple(np.vstack(features).reshape(-1)) # return triple with feature vector as value yield subject, self.schema.predicate(self._predicate_name), value -- cgit v1.2.3 From 02bbad817077e9a23f7b24b82845fcde24de63a9 Mon Sep 17 00:00:00 2001 From: Matthias Baumgartner Date: Mon, 16 Jan 2023 20:55:13 +0100 Subject: image feature integration test --- bsie/apps/index.py | 10 +++++++++- test/apps/test_index.py | 14 ++++++++++++++ test/apps/testdir/testimage.jpg | Bin 0 -> 349264 bytes 3 files changed, 23 insertions(+), 1 deletion(-) create mode 100644 test/apps/testdir/testimage.jpg diff --git a/bsie/apps/index.py b/bsie/apps/index.py index 7cf94d3..25e006f 100644 --- a/bsie/apps/index.py +++ b/bsie/apps/index.py @@ -44,7 +44,10 @@ def main(argv): # FIXME: Read reader/extractor configs from a config file # reader builder - rbuild = ReaderBuilder({}) + rbuild = ReaderBuilder({ + 'bsie.reader.image.Image': { + 'cfg': {}}, # FIXME: cfg should be optional! + }) # extractor builder ebuild = ExtractorBuilder([ {'bsie.extractor.generic.path.Path': {}}, @@ -58,6 +61,11 @@ def main(argv): bsfs:unique "true"^^xsd:boolean . ''', )}, + {'bsie.extractor.image.colors_spatial.ColorsSpatial': { + 'width': 2, + 'height': 2, + 'exp': 2, + }}, ]) # pipeline builder pbuild = PipelineBuilder( diff --git a/test/apps/test_index.py b/test/apps/test_index.py index 6fc3335..c3960b8 100644 --- a/test/apps/test_index.py +++ b/test/apps/test_index.py @@ -77,6 +77,14 @@ class TestIndex(unittest.TestCase): (rdflib.URIRef(prefix + 'd803187cbf3676ae9d38126270a6152c60431589aa3bb3824baf8954e9c097f1'), rdflib.URIRef(ns.bse.author), rdflib.Literal('Me, myself, and I', datatype=rdflib.XSD.string)), (rdflib.URIRef(prefix + 'd803187cbf3676ae9d38126270a6152c60431589aa3bb3824baf8954e9c097f1'), rdflib.URIRef(ns.bse.filename), rdflib.Literal('td_second', datatype=rdflib.XSD.string)), (rdflib.URIRef(prefix + 'd803187cbf3676ae9d38126270a6152c60431589aa3bb3824baf8954e9c097f1'), rdflib.URIRef(ns.bse.filesize), rdflib.Literal('703', datatype=rdflib.XSD.integer)), + (rdflib.URIRef(prefix + 'accb115d266ad60c53cd01a7f7130f245886ce8eaf69bc85319febc11d9fe089'), rdflib.RDF.type, rdflib.URIRef(ns.bsfs.File)), + (rdflib.URIRef(prefix + 'accb115d266ad60c53cd01a7f7130f245886ce8eaf69bc85319febc11d9fe089'), rdflib.URIRef(ns.bse.author), rdflib.Literal('Me, myself, and I', datatype=rdflib.XSD.string)), + (rdflib.URIRef(prefix + 'accb115d266ad60c53cd01a7f7130f245886ce8eaf69bc85319febc11d9fe089'), rdflib.URIRef(ns.bse.filename), rdflib.Literal('testimage.jpg', datatype=rdflib.XSD.string)), + (rdflib.URIRef(prefix + 'accb115d266ad60c53cd01a7f7130f245886ce8eaf69bc85319febc11d9fe089'), rdflib.URIRef(ns.bse.filesize), rdflib.Literal('349264', datatype=rdflib.XSD.integer)), + (rdflib.URIRef(prefix + 'accb115d266ad60c53cd01a7f7130f245886ce8eaf69bc85319febc11d9fe089'), rdflib.URIRef('http://bsfs.ai/schema/Entity/colors_spatial#0658f2234a054e1dd59a14462c89f7733e019160419c796356aa831498bd0a04'), + rdflib.Literal( + '(91, 127, 121, 94, 138, 167, 163, 134, 190, 138, 170, 156, 121, 142, 159)', + datatype=rdflib.URIRef('http://ie.bsfs.ai/schema/Feature/ColorsSpatial#0658f2234a054e1dd59a14462c89f7733e019160419c796356aa831498bd0a04'))), })) # NOTE: we don't check ns.bsm.t_created since it depends on the execution time. Triples would look like this: @@ -91,6 +99,7 @@ class TestIndex(unittest.TestCase): # (rdflib.URIRef(prefix + 'b8fd7fba818254166a6043195004138ebda6923e012442f819a2c49671136c70'), rdflib.URIRef(ns.bsm.t_created), rdflib.Literal('1670..........', datatype=rdflib.XSD.integer)), # (rdflib.URIRef(prefix + 'd43758ace82154a1cc10ca0dfef63cb20dd831f9c87edd6dc06539eefe67371d'), rdflib.URIRef(ns.bsm.t_created), rdflib.Literal('1670..........', datatype=rdflib.XSD.integer)), # (rdflib.URIRef(prefix + 'd803187cbf3676ae9d38126270a6152c60431589aa3bb3824baf8954e9c097f1'), rdflib.URIRef(ns.bsm.t_created), rdflib.Literal('1670..........', datatype=rdflib.XSD.integer)), + # (rdflib.URIRef(prefix + 'accb115d266ad60c53cd01a7f7130f245886ce8eaf69bc85319febc11d9fe089'), rdflib.URIRef(ns.bsm.t_created), rdflib.Literal('1670..........', datatype=rdflib.XSD.integer)), # instead, we simply check if there's such a predicate for each file self.assertSetEqual({sub for sub, _ in bsfs._backend._graph.subject_objects(rdflib.URIRef(ns.bsm.t_created))}, { rdflib.URIRef(prefix + '2f4109b40107cc50e0884755a1a961ed126887e49b8dbaf0e146b2e226aa6647'), @@ -104,6 +113,7 @@ class TestIndex(unittest.TestCase): rdflib.URIRef(prefix + 'b8fd7fba818254166a6043195004138ebda6923e012442f819a2c49671136c70'), rdflib.URIRef(prefix + 'd43758ace82154a1cc10ca0dfef63cb20dd831f9c87edd6dc06539eefe67371d'), rdflib.URIRef(prefix + 'd803187cbf3676ae9d38126270a6152c60431589aa3bb3824baf8954e9c097f1'), + rdflib.URIRef(prefix + 'accb115d266ad60c53cd01a7f7130f245886ce8eaf69bc85319febc11d9fe089'), }) def test_print(self): @@ -150,6 +160,10 @@ class TestIndex(unittest.TestCase): f'Node(http://bsfs.ai/schema/File, http://example.com/me/file#d803187cbf3676ae9d38126270a6152c60431589aa3bb3824baf8954e9c097f1) Predicate({ns.bse.author}) Me, myself, and I', f'Node(http://bsfs.ai/schema/File, http://example.com/me/file#d803187cbf3676ae9d38126270a6152c60431589aa3bb3824baf8954e9c097f1) Predicate({ns.bse.filename}) td_second', f'Node(http://bsfs.ai/schema/File, http://example.com/me/file#d803187cbf3676ae9d38126270a6152c60431589aa3bb3824baf8954e9c097f1) Predicate({ns.bse.filesize}) 703', + f'Node(http://bsfs.ai/schema/File, http://example.com/me/file#accb115d266ad60c53cd01a7f7130f245886ce8eaf69bc85319febc11d9fe089) Predicate({ns.bse.filesize}) 349264', + f'Node(http://bsfs.ai/schema/File, http://example.com/me/file#accb115d266ad60c53cd01a7f7130f245886ce8eaf69bc85319febc11d9fe089) Predicate({ns.bse.author}) Me, myself, and I', + f'Node(http://bsfs.ai/schema/File, http://example.com/me/file#accb115d266ad60c53cd01a7f7130f245886ce8eaf69bc85319febc11d9fe089) Predicate({ns.bse.filename}) testimage.jpg', + f'Node(http://bsfs.ai/schema/File, http://example.com/me/file#accb115d266ad60c53cd01a7f7130f245886ce8eaf69bc85319febc11d9fe089) Predicate(http://bsfs.ai/schema/Entity/colors_spatial#0658f2234a054e1dd59a14462c89f7733e019160419c796356aa831498bd0a04) (91, 127, 121, 94, 138, 167, 163, 134, 190, 138, 170, 156, 121, 142, 159)', }) diff --git a/test/apps/testdir/testimage.jpg b/test/apps/testdir/testimage.jpg new file mode 100644 index 0000000..c80bb48 Binary files /dev/null and b/test/apps/testdir/testimage.jpg differ -- cgit v1.2.3 From e7dff4f04b2b02d135bd166b9d340291517e47e8 Mon Sep 17 00:00:00 2001 From: Matthias Baumgartner Date: Mon, 16 Jan 2023 20:56:30 +0100 Subject: minor bugfix --- test/extractor/image/test_colors_spatial.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/extractor/image/test_colors_spatial.py b/test/extractor/image/test_colors_spatial.py index b704e3f..d8a4209 100644 --- a/test/extractor/image/test_colors_spatial.py +++ b/test/extractor/image/test_colors_spatial.py @@ -84,7 +84,7 @@ class TestColorsSpatial(unittest.TestCase): self.assertEqual(ret[0], ( node, list(principals)[0], - [91, 127, 121, 94, 138, 167, 163, 134, 190, 138, 170, 156, 121, 142, 159])) + (91, 127, 121, 94, 138, 167, 163, 134, 190, 138, 170, 156, 121, 142, 159))) ## main ## -- cgit v1.2.3 From 8439089807bbad92e95ad9062dc74c3d71f5d7eb Mon Sep 17 00:00:00 2001 From: Matthias Baumgartner Date: Mon, 16 Jan 2023 21:35:19 +0100 Subject: ReaderBuilder optional config --- bsie/apps/index.py | 5 +---- bsie/apps/info.py | 7 +------ bsie/reader/builder.py | 6 +++++- bsie/reader/chain.py | 2 +- bsie/reader/image/__init__.py | 2 +- 5 files changed, 9 insertions(+), 13 deletions(-) diff --git a/bsie/apps/index.py b/bsie/apps/index.py index 25e006f..21c2318 100644 --- a/bsie/apps/index.py +++ b/bsie/apps/index.py @@ -44,10 +44,7 @@ def main(argv): # FIXME: Read reader/extractor configs from a config file # reader builder - rbuild = ReaderBuilder({ - 'bsie.reader.image.Image': { - 'cfg': {}}, # FIXME: cfg should be optional! - }) + rbuild = ReaderBuilder() # extractor builder ebuild = ExtractorBuilder([ {'bsie.extractor.generic.path.Path': {}}, diff --git a/bsie/apps/info.py b/bsie/apps/info.py index 5b6fb0e..d8a70a6 100644 --- a/bsie/apps/info.py +++ b/bsie/apps/info.py @@ -32,12 +32,7 @@ def main(argv): # FIXME: Read reader/extractor configs from a config file # reader builder - rbuild = ReaderBuilder({ - 'bsie.reader.image.Image': { - 'bsie.reader.image._raw.RawImage': { - } - } - }) + rbuild = ReaderBuilder() # extractor builder ebuild = ExtractorBuilder([ {'bsie.extractor.generic.path.Path': {}}, diff --git a/bsie/reader/builder.py b/bsie/reader/builder.py index bce5397..8699e75 100644 --- a/bsie/reader/builder.py +++ b/bsie/reader/builder.py @@ -40,7 +40,11 @@ class ReaderBuilder(): # cached readers _cache: typing.Dict[str, base.Reader] - def __init__(self, kwargs: typing.Dict[str, typing.Dict[str, typing.Any]]): + def __init__( + self, + kwargs: typing.Optional[typing.Dict[str, typing.Dict[str, typing.Any]]] = None): + if kwargs is None: + kwargs = {} self._kwargs = kwargs self._cache = {} diff --git a/bsie/reader/chain.py b/bsie/reader/chain.py index db7c2d5..5e9e0d5 100644 --- a/bsie/reader/chain.py +++ b/bsie/reader/chain.py @@ -37,7 +37,7 @@ class ReaderChain(base.Reader, typing.Generic[T_CONTENT]): def __init__( self, subreader_names: typing.Iterable[str], - cfg: typing.Any, + cfg: typing.Optional[typing.Any] = None, ): rbuild = builder.ReaderBuilder(cfg) children = [] diff --git a/bsie/reader/image/__init__.py b/bsie/reader/image/__init__.py index b7587e7..1f290b5 100644 --- a/bsie/reader/image/__init__.py +++ b/bsie/reader/image/__init__.py @@ -31,7 +31,7 @@ __all__: typing.Sequence[str] = ( class Image(chain.ReaderChain[PIL.Image.Image]): # pylint: disable=too-few-public-methods """Read an image file.""" - def __init__(self, cfg): + def __init__(self, cfg: typing.Optional[typing.Any] = None): super().__init__(_FILE_FORMAT_READERS, cfg) ## EOF ## -- cgit v1.2.3 From 4f868bcb3be2658960eace3222563cc9a819366a Mon Sep 17 00:00:00 2001 From: Matthias Baumgartner Date: Mon, 16 Jan 2023 21:36:11 +0100 Subject: info schema and feature tests --- bsie/apps/info.py | 5 +++++ test/apps/test_info.py | 19 +++++++++++++++++++ 2 files changed, 24 insertions(+) diff --git a/bsie/apps/info.py b/bsie/apps/info.py index d8a70a6..64a4eba 100644 --- a/bsie/apps/info.py +++ b/bsie/apps/info.py @@ -46,6 +46,11 @@ def main(argv): bsfs:unique "true"^^xsd:boolean . ''', )}, + {'bsie.extractor.image.colors_spatial.ColorsSpatial': { + 'width': 2, + 'height': 2, + 'exp': 2, + }}, ]) # pipeline builder pbuild = PipelineBuilder( diff --git a/test/apps/test_info.py b/test/apps/test_info.py index f52c581..60e9ba1 100644 --- a/test/apps/test_info.py +++ b/test/apps/test_info.py @@ -10,6 +10,9 @@ import contextlib import io import unittest +# bsie imports +from bsie.utils import bsfs + # objects to test from bsie.apps.info import main @@ -28,6 +31,22 @@ class TestIndex(unittest.TestCase): 'http://bsfs.ai/schema/Predicate', 'http://bsfs.ai/schema/Entity#filename', 'http://bsfs.ai/schema/Entity#filesize', + 'http://bsfs.ai/schema/Entity/colors_spatial#0658f2234a054e1dd59a14462c89f7733e019160419c796356aa831498bd0a04' + }) + + def test_schema(self): + outbuf = io.StringIO() + with contextlib.redirect_stdout(outbuf): + # show schema infos + main(['schema']) + # verify output + schema = bsfs.schema.from_string(outbuf.getvalue()) + self.assertSetEqual({pred.uri for pred in schema.predicates()}, { + 'http://bsfs.ai/schema/Entity#author', + 'http://bsfs.ai/schema/Predicate', + 'http://bsfs.ai/schema/Entity#filename', + 'http://bsfs.ai/schema/Entity#filesize', + 'http://bsfs.ai/schema/Entity/colors_spatial#0658f2234a054e1dd59a14462c89f7733e019160419c796356aa831498bd0a04' }) def test_invalid(self): -- cgit v1.2.3 From 58aaa864f9747d27c065739256d4c6635ca9b751 Mon Sep 17 00:00:00 2001 From: Matthias Baumgartner Date: Mon, 16 Jan 2023 21:36:50 +0100 Subject: minor fixes --- bsie/extractor/image/colors_spatial.py | 3 ++- bsie/lib/pipeline.py | 3 +-- test/apps/test_index.py | 6 ++++++ test/extractor/image/test_colors_spatial.py | 9 +++++++-- 4 files changed, 16 insertions(+), 5 deletions(-) diff --git a/bsie/extractor/image/colors_spatial.py b/bsie/extractor/image/colors_spatial.py index 38f1db4..ce5b9f2 100644 --- a/bsie/extractor/image/colors_spatial.py +++ b/bsie/extractor/image/colors_spatial.py @@ -71,7 +71,8 @@ class ColorsSpatial(base.Extractor): <{FEATURE_NAME}> rdfs:subClassOf bsfs:Feature ; # annotations rdfs:label "Spatially dominant colors"^^xsd:string ; - schema:description "Domiant colors of subregions in an image."^^xsd:string . + schema:description "Domiant colors of subregions in an image."^^xsd:string ; + bsfs:dtype xsd:integer . <{instance_name}> rdfs:subClassOf <{FEATURE_NAME}> ; bsfs:dimension "{dimension}"^^xsd:integer ; diff --git a/bsie/lib/pipeline.py b/bsie/lib/pipeline.py index 02119bc..44685ba 100644 --- a/bsie/lib/pipeline.py +++ b/bsie/lib/pipeline.py @@ -125,7 +125,6 @@ class Pipeline(): try: # get content content = rdr(path) if rdr is not None else None - #logger.info('extracted %s from %s', rdr, path) # apply extractors on this content @@ -139,7 +138,7 @@ class Pipeline(): # critical extractor failure. logger.error('%s failed to extract triples from content: %s', ext, err) - except errors.UnsupportedFileFormatError as err: + except errors.UnsupportedFileFormatError: # failed to read the file format. skip. #logger.warning('%s could not process the file format of %s', rdr, err) pass diff --git a/test/apps/test_index.py b/test/apps/test_index.py index c3960b8..7f5be8e 100644 --- a/test/apps/test_index.py +++ b/test/apps/test_index.py @@ -23,6 +23,12 @@ from bsie.apps.index import main ## code ## class TestIndex(unittest.TestCase): + def test_main_invalid(self): + outbuf = io.StringIO() + with contextlib.redirect_stdout(outbuf): + bsfs = main([os.path.join(os.path.dirname(__file__), 'inexistent-file.t')]) + self.assertEqual(outbuf.getvalue().strip(), '') + def test_main(self): bsfs = main([ '-r', diff --git a/test/extractor/image/test_colors_spatial.py b/test/extractor/image/test_colors_spatial.py index d8a4209..ba551f3 100644 --- a/test/extractor/image/test_colors_spatial.py +++ b/test/extractor/image/test_colors_spatial.py @@ -57,8 +57,9 @@ class TestColorsSpatial(unittest.TestCase): schema = bsfs.schema.from_string(base.SCHEMA_PREAMBLE + f''' <{self.instance_prefix}> rdfs:subClassOf bsfs:Feature ; # annotations - rdfs:label ""^^xsd:string ; - schema:description ""^^xsd:string . + rdfs:label "Spatially dominant colors"^^xsd:string ; + schema:description "Domiant colors of subregions in an image."^^xsd:string ; + bsfs:dtype xsd:integer . <{self.instance_prefix}#{self.uuid}> rdfs:subClassOf <{self.instance_prefix}> ; bsfs:dimension "3276"^^xsd:integer ; @@ -80,11 +81,15 @@ class TestColorsSpatial(unittest.TestCase): node = _node.Node(ns.bsfs.Entity, bsfs.URI('http://example.com/entity#1234')) principals = set(ext.principals) self.assertEqual(len(principals), 1) + # valid invocation yields feature ret = list(ext.extract(node, img, principals)) self.assertEqual(ret[0], ( node, list(principals)[0], (91, 127, 121, 94, 138, 167, 163, 134, 190, 138, 170, 156, 121, 142, 159))) + # principals is respected + self.assertListEqual(list(ext.extract(node, img, {})), []) + ## main ## -- cgit v1.2.3 From 9c26a5ef759b010d8cf4384b0515cc188b885d81 Mon Sep 17 00:00:00 2001 From: Matthias Baumgartner Date: Wed, 8 Feb 2023 17:44:00 +0100 Subject: node naming policy --- bsie/apps/index.py | 16 ++++--- bsie/apps/info.py | 1 - bsie/lib/__init__.py | 1 + bsie/lib/bsie.py | 10 ++-- bsie/lib/builder.py | 9 +--- bsie/lib/naming_policy.py | 101 +++++++++++++++++++++++++++++++++++++++++ bsie/lib/pipeline.py | 18 ++------ bsie/utils/node.py | 29 +++++++++--- test/lib/test_bsie.py | 22 ++++----- test/lib/test_builder.py | 11 ++--- test/lib/test_naming_policy.py | 86 +++++++++++++++++++++++++++++++++++ test/lib/test_pipeline.py | 28 +++++------- test/utils/test_node.py | 54 +++++++++++++++++----- 13 files changed, 306 insertions(+), 80 deletions(-) create mode 100644 bsie/lib/naming_policy.py create mode 100644 test/lib/test_naming_policy.py diff --git a/bsie/apps/index.py b/bsie/apps/index.py index 21c2318..a870364 100644 --- a/bsie/apps/index.py +++ b/bsie/apps/index.py @@ -11,7 +11,7 @@ import typing # bsie imports from bsie.extractor import ExtractorBuilder -from bsie.lib import BSIE, PipelineBuilder +from bsie.lib import BSIE, PipelineBuilder, DefaultNamingPolicy from bsie.reader import ReaderBuilder from bsie.utils import bsfs, errors @@ -26,7 +26,9 @@ __all__: typing.Sequence[str] = ( def main(argv): """Index files or directories into BSFS.""" parser = argparse.ArgumentParser(description=main.__doc__, prog='index') - parser.add_argument('--user', type=bsfs.URI, default=bsfs.URI('http://example.com/me'), + parser.add_argument('--host', type=bsfs.URI, default=bsfs.URI('http://example.com'), + help='') + parser.add_argument('--user', type=str, default='me', help='') parser.add_argument('--collect', action='append', default=[], help='') @@ -66,16 +68,19 @@ def main(argv): ]) # pipeline builder pbuild = PipelineBuilder( - bsfs.Namespace(args.user + ('/' if not args.user.endswith('/') else '')), rbuild, ebuild, ) # build pipeline pipeline = pbuild.build() + # build the naming policy + naming_policy = DefaultNamingPolicy( + host=args.host, + user=args.user, + ) # build BSIE frontend - bsie = BSIE(pipeline, args.collect, args.discard) - + bsie = BSIE(pipeline, naming_policy, args.collect, args.discard) def walk(handle): """Walk through given input files.""" @@ -83,7 +88,6 @@ def main(argv): # FIXME: simplify code (below but maybe also above) # FIXME: How to handle dependencies between data? # E.g. do I still want to link to a tag despite not being permitted to set its label? - # FIXME: node renaming? # index input paths for path in args.input_file: diff --git a/bsie/apps/info.py b/bsie/apps/info.py index 64a4eba..4e948fc 100644 --- a/bsie/apps/info.py +++ b/bsie/apps/info.py @@ -54,7 +54,6 @@ def main(argv): ]) # pipeline builder pbuild = PipelineBuilder( - bsfs.Namespace('http://example.com/me/'), # not actually used rbuild, ebuild, ) diff --git a/bsie/lib/__init__.py b/bsie/lib/__init__.py index 4239d3b..48379de 100644 --- a/bsie/lib/__init__.py +++ b/bsie/lib/__init__.py @@ -10,6 +10,7 @@ import typing # inner-module imports from .bsie import BSIE from .builder import PipelineBuilder +from .naming_policy import DefaultNamingPolicy # exports __all__: typing.Sequence[str] = ( diff --git a/bsie/lib/bsie.py b/bsie/lib/bsie.py index 668783d..a572525 100644 --- a/bsie/lib/bsie.py +++ b/bsie/lib/bsie.py @@ -11,6 +11,7 @@ import typing from bsie.utils import bsfs, node, ns # inner-module imports +from .naming_policy import NamingPolicy from .pipeline import Pipeline # exports @@ -41,15 +42,18 @@ class BSIE(): def __init__( self, - # pipeline builder. + # pipeline. pipeline: Pipeline, + # naming policy + naming_policy: NamingPolicy, # principals to extract at most. None implies all available w.r.t. extractors. collect: typing.Optional[typing.Iterable[bsfs.URI]] = None, # principals to discard. discard: typing.Optional[typing.Iterable[bsfs.URI]] = None, ): - # store pipeline + # store pipeline and naming policy self._pipeline = pipeline + self._naming_policy = naming_policy # start off with available principals self._principals = {pred.uri for pred in self._pipeline.principals} # limit principals to specified ones by argument. @@ -89,6 +93,6 @@ class BSIE(): # predicate lookup principals = {self.schema.predicate(pred) for pred in principals} # invoke pipeline - yield from self._pipeline(path, principals) + yield from self._naming_policy(self._pipeline(path, principals)) ## EOF ## diff --git a/bsie/lib/builder.py b/bsie/lib/builder.py index c2abffe..39da441 100644 --- a/bsie/lib/builder.py +++ b/bsie/lib/builder.py @@ -11,7 +11,7 @@ import typing # bsie imports from bsie.extractor import ExtractorBuilder from bsie.reader import ReaderBuilder -from bsie.utils import bsfs, errors +from bsie.utils import errors # inner-module imports from . import pipeline @@ -29,9 +29,6 @@ logger = logging.getLogger(__name__) class PipelineBuilder(): """Build `bsie.tools.pipeline.Pipeline` instances.""" - # Prefix to be used in the Pipeline. - prefix: bsfs.Namespace - # builder for Readers. rbuild: ReaderBuilder @@ -40,11 +37,9 @@ class PipelineBuilder(): def __init__( self, - prefix: bsfs.Namespace, reader_builder: ReaderBuilder, extractor_builder: ExtractorBuilder, ): - self.prefix = prefix self.rbuild = reader_builder self.ebuild = extractor_builder @@ -80,6 +75,6 @@ class PipelineBuilder(): except errors.BuilderError as err: # failed to build reader logger.error(str(err)) - return pipeline.Pipeline(self.prefix, ext2rdr) + return pipeline.Pipeline(ext2rdr) ## EOF ## diff --git a/bsie/lib/naming_policy.py b/bsie/lib/naming_policy.py new file mode 100644 index 0000000..360abde --- /dev/null +++ b/bsie/lib/naming_policy.py @@ -0,0 +1,101 @@ +""" + +Part of the bsie module. +A copy of the license is provided with the project. +Author: Matthias Baumgartner, 2022 +""" +# standard imports +import abc +import os +import typing + +# bsie imports +from bsie.utils import bsfs, errors, ns +from bsie.utils.node import Node + +# exports +__all__: typing.Sequence[str] = ( + 'DefaultNamingPolicy', + ) + + +## code ## + +class NamingPolicy(): + """Determine node uri's from node hints.""" + def __call__( + self, + iterable: typing.Iterable[typing.Tuple[Node, bsfs.URI, typing.Any]], + ): + """Apply the policy on a triple iterator.""" + return NamingPolicyIterator(self, iterable) + + @abc.abstractmethod + def handle_node(self, node: Node) -> Node: + """Apply the policy on a node.""" + + +class NamingPolicyIterator(): + """Iterates over triples, determines uris according to a *policy* as it goes.""" + + # source triple iterator. + _iterable: typing.Iterable[typing.Tuple[Node, bsfs.URI, typing.Any]] + + # naming policy + _policy: NamingPolicy + + def __init__( + self, + policy: NamingPolicy, + iterable: typing.Iterable[typing.Tuple[Node, bsfs.URI, typing.Any]], + ): + self._iterable = iterable + self._policy = policy + + def __iter__(self): + for node, pred, value in self._iterable: + # handle subject + self._policy.handle_node(node) + # handle value + if isinstance(value, Node): + self._policy.handle_node(value) + # yield triple + yield node, pred, value + + +class DefaultNamingPolicy(NamingPolicy): + """Compose URIs as + + What information is used as fragment depends on the node type. + Typically, the default is to use the "ucid" hint. + The fallback in all cases is to generate a random uuid. + + Never changes previously assigned uris. Sets uris in-place. + + """ + + def __init__( + self, + host: bsfs.URI, + user: str, + ): + self._prefix = bsfs.Namespace(os.path.join(host, user)) + self._uuid = bsfs.uuid.UUID() + + def handle_node(self, node: Node) -> Node: + if node.uri is not None: + return node + if node.node_type == ns.bsfs.File: + return self.name_file(node) + raise errors.ProgrammingError('no naming policy available for {node.node_type}') + + def name_file(self, node: Node) -> Node: + """Set a bsfs:File node's uri fragment to its ucid.""" + if 'ucid' in node.hints: # content id + fragment = node.hints['ucid'] + else: # random name + fragment = self._uuid() + node.uri = (self._prefix + 'file')[fragment] + return node + +## EOF ## diff --git a/bsie/lib/pipeline.py b/bsie/lib/pipeline.py index 44685ba..0bc5109 100644 --- a/bsie/lib/pipeline.py +++ b/bsie/lib/pipeline.py @@ -19,8 +19,6 @@ __all__: typing.Sequence[str] = ( 'Pipeline', ) -# constants -FILE_PREFIX = 'file#' ## code ## @@ -40,19 +38,14 @@ class Pipeline(): # combined extractor schemas. _schema: bsfs.schema.Schema - # node prefix. - _prefix: bsfs.Namespace - # extractor -> reader mapping _ext2rdr: typing.Dict[Extractor, typing.Optional[Reader]] def __init__( self, - prefix: bsfs.Namespace, ext2rdr: typing.Dict[Extractor, typing.Optional[Reader]] ): # store core members - self._prefix = prefix + FILE_PREFIX self._ext2rdr = ext2rdr # compile schema from all extractors self._schema = bsfs.schema.Schema.Union(ext.schema for ext in ext2rdr) @@ -64,12 +57,11 @@ class Pipeline(): return f'{bsfs.typename(self)}(...)' def __hash__(self) -> int: - return hash((type(self), self._prefix, self._schema, tuple(self._ext2rdr), tuple(self._ext2rdr.values()))) + return hash((type(self), self._schema, tuple(self._ext2rdr), tuple(self._ext2rdr.values()))) def __eq__(self, other: typing.Any) -> bool: return isinstance(other, type(self)) \ and self._schema == other._schema \ - and self._prefix == other._prefix \ and self._ext2rdr == other._ext2rdr @property @@ -117,8 +109,9 @@ class Pipeline(): rdr2ext[rdr].add(ext) # create subject for file - uuid = bsfs.uuid.UCID.from_path(path) - subject = node.Node(ns.bsfs.File, self._prefix[uuid]) + subject = node.Node(ns.bsfs.File, + ucid=bsfs.uuid.UCID.from_path(path), + ) # extract information for rdr, extrs in rdr2ext.items(): @@ -131,8 +124,7 @@ class Pipeline(): for ext in extrs: try: # get predicate/value tuples - for subject, pred, value in ext.extract(subject, content, principals): - yield subject, pred, value + yield from ext.extract(subject, content, principals) except errors.ExtractorError as err: # critical extractor failure. diff --git a/bsie/utils/node.py b/bsie/utils/node.py index 91e4f37..aa62c06 100644 --- a/bsie/utils/node.py +++ b/bsie/utils/node.py @@ -19,30 +19,47 @@ __all__: typing.Sequence[str] = ( ## code ## class Node(): - """Lightweight Node, disconnected from any bsfs structures.""" + """Lightweight Node, disconnected from any bsfs structures. + + In most cases, provide *hints* and leave setting the uri to a node + naming policy. Only provide an *uri* if it is absolutely determined. + + """ # node type. node_type: bsfs.URI # node URI. - uri: bsfs.URI + uri: typing.Optional[bsfs.URI] + + # node naming hints. + hits: dict def __init__( self, node_type: bsfs.URI, - uri: bsfs.URI, + uri: typing.Optional[bsfs.URI] = None, + **uri_hints, ): # assign members self.node_type = bsfs.URI(node_type) - self.uri = bsfs.URI(uri) + self.hints = uri_hints + self.uri = uri def __eq__(self, other: typing.Any) -> bool: + """Compare two Node instances based on type and uri. + Compares hits only if the uri is not yet specified. + """ return isinstance(other, Node) \ and other.node_type == self.node_type \ - and other.uri == self.uri + and other.uri == self.uri \ + and (self.uri is not None or self.hints == other.hints) def __hash__(self) -> int: - return hash((type(self), self.node_type, self.uri)) + identifier = self.uri + if identifier is None: + identifier = tuple((key, self.hints[key]) for key in sorted(self.hints)) + return hash((type(self), self.node_type, identifier)) def __str__(self) -> str: return f'{bsfs.typename(self)}({self.node_type}, {self.uri})' diff --git a/test/lib/test_bsie.py b/test/lib/test_bsie.py index 38e6f59..ae23c4b 100644 --- a/test/lib/test_bsie.py +++ b/test/lib/test_bsie.py @@ -11,7 +11,7 @@ import unittest # bsie imports from bsie.extractor import ExtractorBuilder from bsie.extractor.base import SCHEMA_PREAMBLE -from bsie.lib import PipelineBuilder +from bsie.lib import PipelineBuilder, DefaultNamingPolicy from bsie.reader import ReaderBuilder from bsie.utils import bsfs, node, ns @@ -40,13 +40,13 @@ class TestBSIE(unittest.TestCase): )}, ]) # build pipeline - self.prefix = bsfs.Namespace('http://example.com/local/') - pbuild = PipelineBuilder(self.prefix, rbuild, ebuild) + self.naming_policy = DefaultNamingPolicy(host='http://example.com/local', user='') + pbuild = PipelineBuilder(rbuild, ebuild) self.pipeline = pbuild.build() def test_construction(self): - # pipeline only - lib = BSIE(self.pipeline) + # only pipeline and naming policy + lib = BSIE(self.pipeline, self.naming_policy) self.assertSetEqual(set(lib.principals), { ns.bse.filename, ns.bse.filesize, @@ -70,7 +70,7 @@ class TestBSIE(unittest.TestCase): ''')) # specify collect - lib = BSIE(self.pipeline, collect={ + lib = BSIE(self.pipeline, self.naming_policy, collect={ ns.bse.filesize, ns.bse.author, ns.bse.inexistent, @@ -91,7 +91,7 @@ class TestBSIE(unittest.TestCase): bsfs:unique "true"^^xsd:boolean . ''')) # empty collect is disregarded - lib = BSIE(self.pipeline, collect={}) + lib = BSIE(self.pipeline, self.naming_policy, collect={}) self.assertSetEqual(set(lib.principals), { ns.bse.filename, ns.bse.filesize, @@ -116,7 +116,7 @@ class TestBSIE(unittest.TestCase): ''')) # specify discard - lib = BSIE(self.pipeline, discard={ + lib = BSIE(self.pipeline, self.naming_policy, discard={ ns.bse.filesize, ns.bse.filename, ns.bse.inexistent, @@ -132,7 +132,7 @@ class TestBSIE(unittest.TestCase): ''')) # specify collect and discard - lib = BSIE(self.pipeline, + lib = BSIE(self.pipeline, self.naming_policy, collect={ns.bse.filesize, ns.bse.author, ns.bse.foo, ns.bse.bar}, discard={ns.bse.author, ns.bse.foo, ns.bse.foobar}, ) @@ -150,14 +150,14 @@ class TestBSIE(unittest.TestCase): def test_from_file(self): # setup - lib = BSIE(self.pipeline) + lib = BSIE(self.pipeline, self.naming_policy) self.assertSetEqual(set(lib.principals), { ns.bse.filesize, ns.bse.filename, ns.bse.author, }) content_hash = 'a948904f2f0f479b8f8197694b30184b0d2ed1c1cd2a1ec0fb85d299a192a447' - subject = node.Node(ns.bsfs.File, (self.prefix + 'file#')[content_hash]) + subject = node.Node(ns.bsfs.File, uri=f'http://example.com/local/file#{content_hash}') testfile = os.path.join(os.path.dirname(__file__), 'testfile.t') # from_file extracts all available triples diff --git a/test/lib/test_builder.py b/test/lib/test_builder.py index 273d620..48e932b 100644 --- a/test/lib/test_builder.py +++ b/test/lib/test_builder.py @@ -21,7 +21,6 @@ from bsie.lib import PipelineBuilder class TestPipelineBuilder(unittest.TestCase): def test_build(self): - prefix = bsfs.URI('http://example.com/local/file#') c_schema = ''' bse:author rdfs:subClassOf bsfs:Predicate ; rdfs:domain bsfs:Entity ; @@ -40,7 +39,7 @@ class TestPipelineBuilder(unittest.TestCase): )}, ]) # build pipeline - builder = PipelineBuilder(prefix, rbuild, ebuild) + builder = PipelineBuilder(rbuild, ebuild) pipeline = builder.build() # delayed import import bsie.reader.path @@ -61,7 +60,7 @@ class TestPipelineBuilder(unittest.TestCase): {'bsie.extractor.generic.path.Path': {}}, ]) with self.assertLogs(logging.getLogger('bsie.lib.builder'), logging.ERROR): - pipeline = PipelineBuilder(prefix, rbuild, ebuild_err).build() + pipeline = PipelineBuilder(rbuild, ebuild_err).build() self.assertDictEqual(pipeline._ext2rdr, { bsie.extractor.generic.path.Path(): bsie.reader.path.Path()}) @@ -71,7 +70,7 @@ class TestPipelineBuilder(unittest.TestCase): {'bsie.extractor.generic.path.Path': {}}, ]) with self.assertLogs(logging.getLogger('bsie.lib.builder'), logging.ERROR): - pipeline = PipelineBuilder(prefix, rbuild, ebuild_err).build() + pipeline = PipelineBuilder(rbuild, ebuild_err).build() self.assertDictEqual(pipeline._ext2rdr, { bsie.extractor.generic.path.Path(): bsie.reader.path.Path()}) @@ -81,7 +80,7 @@ class TestPipelineBuilder(unittest.TestCase): old_reader = bsie.extractor.generic.path.Path.CONTENT_READER bsie.extractor.generic.path.Path.CONTENT_READER = 'bsie.reader.foo.Foo' # build pipeline with invalid reader reference - pipeline = PipelineBuilder(prefix, rbuild, ebuild).build() + pipeline = PipelineBuilder(rbuild, ebuild).build() self.assertDictEqual(pipeline._ext2rdr, { bsie.extractor.generic.stat.Stat(): bsie.reader.stat.Stat(), bsie.extractor.generic.constant.Constant(c_schema, c_tuples): None, @@ -92,7 +91,7 @@ class TestPipelineBuilder(unittest.TestCase): # fail to build reader rbuild_err = ReaderBuilder({'bsie.reader.stat.Stat': dict(foo=123)}) with self.assertLogs(logging.getLogger('bsie.lib.builder'), logging.ERROR): - pipeline = PipelineBuilder(prefix, rbuild_err, ebuild).build() + pipeline = PipelineBuilder(rbuild_err, ebuild).build() self.assertDictEqual(pipeline._ext2rdr, { bsie.extractor.generic.path.Path(): bsie.reader.path.Path(), bsie.extractor.generic.constant.Constant(c_schema, c_tuples): None, diff --git a/test/lib/test_naming_policy.py b/test/lib/test_naming_policy.py new file mode 100644 index 0000000..763537b --- /dev/null +++ b/test/lib/test_naming_policy.py @@ -0,0 +1,86 @@ +""" + +Part of the bsie test suite. +A copy of the license is provided with the project. +Author: Matthias Baumgartner, 2022 +""" +# standard imports +import unittest + +# bsie imports +from bsie.utils import ns, errors +from bsie.utils.bsfs import URI +from bsie.utils.node import Node + +# objects to test +from bsie.lib.naming_policy import NamingPolicy, NamingPolicyIterator, DefaultNamingPolicy + + + +## code ## + +class TestDefaultNamingPolicy(unittest.TestCase): + + def test_handle_node(self): + # setup + policy = DefaultNamingPolicy('http://example.com', 'me') + # handle_node doesn't modify existing uris + self.assertEqual(policy.handle_node( + Node(ns.bsfs.Entity, uri='http://example.com/you/foo#bar')).uri, + URI('http://example.com/you/foo#bar')) + # processes bsfs:File + self.assertEqual(policy.handle_node( + Node(ns.bsfs.File, ucid='abc123cba')).uri, + URI('http://example.com/me/file#abc123cba')) + # raises an exception on unknown types + self.assertRaises(errors.ProgrammingError, policy.handle_node, + Node(ns.bsfs.Entity, ucid='abc123cba', size=123)) + + def test_name_file(self): + # setup + policy = DefaultNamingPolicy('http://example.com', 'me') + # name_file uses ucid + self.assertEqual(policy.name_file( + Node(ns.bsfs.File, ucid='123abc321')).uri, + URI('http://example.com/me/file#123abc321')) + # name_file falls back to a random guid + self.assertTrue(policy.name_file( + Node(ns.bsfs.File)).uri.startswith('http://example.com/me/file#')) + + +class TestNamingPolicyIterator(unittest.TestCase): + + def test_call(self): # NOTE: We test NamingPolicy.__call__ here + # setup + policy = DefaultNamingPolicy('http://example.com', 'me') + # call accepts list + triples = [('node', 'pred', 'value'), ('node', 'pred', 'value')] + it = policy(triples) + self.assertIsInstance(it, NamingPolicyIterator) + self.assertEqual(it._iterable, triples) + self.assertEqual(it._policy, policy) + # call accepts iterator + triples = iter([('node', 'pred', 'value'), ('node', 'pred', 'value')]) + it = policy(triples) + self.assertIsInstance(it, NamingPolicyIterator) + self.assertEqual(it._iterable, triples) + self.assertEqual(it._policy, policy) + + def test_iter(self): + # setup + policy = DefaultNamingPolicy('http://example.com', 'me') + triples = [ + (Node(ns.bsfs.File, ucid='foo'), 'predA', 'hello'), + ] + # handles nodes, handles values, ignores predicate + self.assertListEqual(list(policy(triples)), [ + (Node(ns.bsfs.File, uri='http://example.com/me/file#foo'), 'predA', 'hello'), + ]) + + +## main ## + +if __name__ == '__main__': + unittest.main() + +## EOF ## diff --git a/test/lib/test_pipeline.py b/test/lib/test_pipeline.py index 8fecc74..61fddd7 100644 --- a/test/lib/test_pipeline.py +++ b/test/lib/test_pipeline.py @@ -48,32 +48,28 @@ class TestPipeline(unittest.TestCase): bsie.extractor.generic.constant.Constant(csA, tupA): None, bsie.extractor.generic.constant.Constant(csB, tupB): None, } - self.prefix = bsfs.Namespace('http://example.com/local/') def test_essentials(self): - pipeline = Pipeline(self.prefix, self.ext2rdr) + pipeline = Pipeline(self.ext2rdr) self.assertEqual(str(pipeline), 'Pipeline') self.assertEqual(repr(pipeline), 'Pipeline(...)') def test_equality(self): - pipeline = Pipeline(self.prefix, self.ext2rdr) + pipeline = Pipeline(self.ext2rdr) # a pipeline is equivalent to itself self.assertEqual(pipeline, pipeline) self.assertEqual(hash(pipeline), hash(pipeline)) # identical builds are equivalent - self.assertEqual(pipeline, Pipeline(self.prefix, self.ext2rdr)) - self.assertEqual(hash(pipeline), hash(Pipeline(self.prefix, self.ext2rdr))) + self.assertEqual(pipeline, Pipeline(self.ext2rdr)) + self.assertEqual(hash(pipeline), hash(Pipeline(self.ext2rdr))) - # equivalence respects prefix - self.assertNotEqual(pipeline, Pipeline(bsfs.URI('http://example.com/global/ent#'), self.ext2rdr)) - self.assertNotEqual(hash(pipeline), hash(Pipeline(bsfs.URI('http://example.com/global/ent#'), self.ext2rdr))) # equivalence respects extractors/readers ext2rdr = {ext: rdr for idx, (ext, rdr) in enumerate(self.ext2rdr.items()) if idx % 2 == 0} - self.assertNotEqual(pipeline, Pipeline(self.prefix, ext2rdr)) - self.assertNotEqual(hash(pipeline), hash(Pipeline(self.prefix, ext2rdr))) + self.assertNotEqual(pipeline, Pipeline(ext2rdr)) + self.assertNotEqual(hash(pipeline), hash(Pipeline(ext2rdr))) # equivalence respects schema - p2 = Pipeline(self.prefix, self.ext2rdr) + p2 = Pipeline(self.ext2rdr) p2._schema = bsfs.schema.Schema() self.assertNotEqual(pipeline, p2) self.assertNotEqual(hash(pipeline), hash(p2)) @@ -90,10 +86,10 @@ class TestPipeline(unittest.TestCase): def test_call(self): # build pipeline - pipeline = Pipeline(self.prefix, self.ext2rdr) + pipeline = Pipeline(self.ext2rdr) # build objects for tests content_hash = 'a948904f2f0f479b8f8197694b30184b0d2ed1c1cd2a1ec0fb85d299a192a447' - subject = node.Node(ns.bsfs.File, (self.prefix + 'file#')[content_hash]) + subject = node.Node(ns.bsfs.File, ucid=content_hash) testfile = os.path.join(os.path.dirname(__file__), 'testfile.t') p_filename = pipeline.schema.predicate(ns.bse.filename) p_filesize = pipeline.schema.predicate(ns.bse.filesize) @@ -138,7 +134,7 @@ class TestPipeline(unittest.TestCase): def __call__(self, path): raise errors.ReaderError('reader error') - pipeline = Pipeline(self.prefix, {bsie.extractor.generic.path.Path(): FaultyReader()}) + pipeline = Pipeline({bsie.extractor.generic.path.Path(): FaultyReader()}) with self.assertLogs(logging.getLogger('bsie.lib.pipeline'), logging.ERROR): testfile = os.path.join(os.path.dirname(__file__), 'testfile.t') p_filename = pipeline.schema.predicate(ns.bse.filename) @@ -149,7 +145,7 @@ class TestPipeline(unittest.TestCase): def extract(self, subject, content, predicates): raise errors.ExtractorError('extractor error') - pipeline = Pipeline(self.prefix, {FaultyExtractor(): bsie.reader.path.Path()}) + pipeline = Pipeline({FaultyExtractor(): bsie.reader.path.Path()}) with self.assertLogs(logging.getLogger('bsie.lib.pipeline'), logging.ERROR): testfile = os.path.join(os.path.dirname(__file__), 'testfile.t') p_filename = pipeline.schema.predicate(ns.bse.filename) @@ -157,7 +153,7 @@ class TestPipeline(unittest.TestCase): def test_predicates(self): # build pipeline - pipeline = Pipeline(self.prefix, self.ext2rdr) + pipeline = Pipeline(self.ext2rdr) # self.assertSetEqual(set(pipeline.principals), { pipeline.schema.predicate(ns.bse.filename), diff --git a/test/utils/test_node.py b/test/utils/test_node.py index 9feb051..1dcd0ed 100644 --- a/test/utils/test_node.py +++ b/test/utils/test_node.py @@ -18,22 +18,54 @@ from bsie.utils.node import Node class TestNode(unittest.TestCase): def test_equality(self): - uri = bsfs.URI('http://example.com/me/entity#1234') - node = Node(ns.bsfs.Entity, uri) - # basic equivalence - self.assertEqual(node, Node(ns.bsfs.Entity, bsfs.URI('http://example.com/me/entity#1234'))) - self.assertEqual(hash(node), hash(Node(ns.bsfs.Entity, bsfs.URI('http://example.com/me/entity#1234')))) + uri1 = bsfs.URI('http://example.com/me/entity#1234') + uri2 = bsfs.URI('http://example.com/me/entity#4321') + node = Node(ns.bsfs.Entity, uri1) # equality respects uri - self.assertNotEqual(node, Node(ns.bsfs.Entity, bsfs.URI('http://example.com/me/entity#4321'))) - self.assertNotEqual(hash(node), hash(Node(ns.bsfs.Entity, bsfs.URI('http://example.com/me/entity#4321')))) + self.assertEqual(node, Node(ns.bsfs.Entity, uri1)) + self.assertEqual(hash(node), hash(Node(ns.bsfs.Entity, uri1))) + self.assertNotEqual(node, Node(ns.bsfs.Entity, uri2)) + self.assertNotEqual(hash(node), hash(Node(ns.bsfs.Entity, uri2))) + # equality respects hints + self.assertEqual( + Node(ns.bsfs.Entity, foo='foo'), + Node(ns.bsfs.Entity, foo='foo')) + self.assertEqual( + hash(Node(ns.bsfs.Entity, foo='foo')), + hash(Node(ns.bsfs.Entity, foo='foo'))) + self.assertNotEqual( + Node(ns.bsfs.Entity, foo='foo'), + Node(ns.bsfs.Entity, foo='bar')) + self.assertNotEqual( + hash(Node(ns.bsfs.Entity, foo='foo')), + hash(Node(ns.bsfs.Entity, foo='bar'))) + self.assertNotEqual( + Node(ns.bsfs.Entity, foo='bar'), + Node(ns.bsfs.Entity, bar='foo')) + self.assertNotEqual( + hash(Node(ns.bsfs.Entity, foo='bar')), + hash(Node(ns.bsfs.Entity, bar='foo'))) + # hints are irrelevant if uri is set + self.assertEqual( + Node(ns.bsfs.Entity, uri=uri1, foo='bar'), + Node(ns.bsfs.Entity, uri=uri1, bar='foo')) + self.assertEqual( + hash(Node(ns.bsfs.Entity, uri=uri1, foo='bar')), + hash(Node(ns.bsfs.Entity, uri=uri1, bar='foo'))) + self.assertNotEqual( + Node(ns.bsfs.Entity, uri=uri1, foo='bar'), + Node(ns.bsfs.Entity, uri=uri2, bar='foo')) + self.assertNotEqual( + hash(Node(ns.bsfs.Entity, uri=uri1, foo='bar')), + hash(Node(ns.bsfs.Entity, uri=uri2, bar='foo'))) # equality respects node_type - self.assertNotEqual(node, Node(ns.bsfs.Foo, uri)) - self.assertNotEqual(hash(node), hash(Node(ns.bsfs.Foo, uri))) + self.assertNotEqual(node, Node(ns.bsfs.Foo, uri1)) + self.assertNotEqual(hash(node), hash(Node(ns.bsfs.Foo, uri1))) # not equal to other types self.assertNotEqual(node, 1234) self.assertNotEqual(hash(node), hash(1234)) - self.assertNotEqual(node, uri) - self.assertNotEqual(hash(node), hash(uri)) + self.assertNotEqual(node, uri1) + self.assertNotEqual(hash(node), hash(uri1)) self.assertNotEqual(node, ns.bsfs.Entity) self.assertNotEqual(hash(node), hash(ns.bsfs.Entity)) class Foo(): pass -- cgit v1.2.3 From 482235a8229261fa905f73ce167982bca57ab3e6 Mon Sep 17 00:00:00 2001 From: Matthias Baumgartner Date: Wed, 8 Feb 2023 19:21:28 +0100 Subject: preview reader --- .gitignore | 1 + bsie/reader/preview/__init__.py | 39 ++++++++++++++++ bsie/reader/preview/_pg.py | 86 ++++++++++++++++++++++++++++++++++++ bsie/reader/preview/_pillow.py | 44 ++++++++++++++++++ bsie/reader/preview/_rawpy.py | 66 +++++++++++++++++++++++++++ bsie/reader/preview/utils.py | 39 ++++++++++++++++ setup.py | 1 + test/reader/preview/__init__.py | 0 test/reader/preview/invalid.foo | 0 test/reader/preview/invalid.jpg | 0 test/reader/preview/load_nef.py | 28 ++++++++++++ test/reader/preview/test_pg.py | 82 ++++++++++++++++++++++++++++++++++ test/reader/preview/test_pillow.py | 50 +++++++++++++++++++++ test/reader/preview/test_preview.py | 77 ++++++++++++++++++++++++++++++++ test/reader/preview/test_rawpy.py | 59 +++++++++++++++++++++++++ test/reader/preview/test_utils.py | 44 ++++++++++++++++++ test/reader/preview/testfile.pdf | Bin 0 -> 7295 bytes test/reader/preview/testimage.jpg | Bin 0 -> 6476 bytes 18 files changed, 616 insertions(+) create mode 100644 bsie/reader/preview/__init__.py create mode 100644 bsie/reader/preview/_pg.py create mode 100644 bsie/reader/preview/_pillow.py create mode 100644 bsie/reader/preview/_rawpy.py create mode 100644 bsie/reader/preview/utils.py create mode 100644 test/reader/preview/__init__.py create mode 100644 test/reader/preview/invalid.foo create mode 100644 test/reader/preview/invalid.jpg create mode 100644 test/reader/preview/load_nef.py create mode 100644 test/reader/preview/test_pg.py create mode 100644 test/reader/preview/test_pillow.py create mode 100644 test/reader/preview/test_preview.py create mode 100644 test/reader/preview/test_rawpy.py create mode 100644 test/reader/preview/test_utils.py create mode 100644 test/reader/preview/testfile.pdf create mode 100644 test/reader/preview/testimage.jpg diff --git a/.gitignore b/.gitignore index 304ae08..c046d71 100644 --- a/.gitignore +++ b/.gitignore @@ -24,5 +24,6 @@ doc/build/ # testing data test/reader/image/testimage.nef* +test/reader/preview/testimage.nef* ## EOF ## diff --git a/bsie/reader/preview/__init__.py b/bsie/reader/preview/__init__.py new file mode 100644 index 0000000..3e69a4a --- /dev/null +++ b/bsie/reader/preview/__init__.py @@ -0,0 +1,39 @@ +""" + +Part of the bsie module. +A copy of the license is provided with the project. +Author: Matthias Baumgartner, 2022 +""" +# imports +import typing + +# external imports +import PIL.Image + +# inner-module imports +from .. import chain + +# constants +_FILE_FORMAT_READERS: typing.Sequence[str] = ( + # native image formats + __package__ + '._pillow.PillowPreviewReader', + __package__ + '._rawpy.RawpyPreviewReader', + # multiformat readers + __package__ + '._pg.PreviewGeneratorReader', + ) + +# exports +__all__: typing.Sequence[str] = ( + 'Preview', + ) + + +## code ## + +class Preview(chain.ReaderChain[typing.Callable[[int], PIL.Image.Image]]): # pylint: disable=too-few-public-methods + """Create a preview from a file.""" + + def __init__(self, cfg: typing.Optional[typing.Any] = None): + super().__init__(_FILE_FORMAT_READERS, cfg) + +## EOF ## diff --git a/bsie/reader/preview/_pg.py b/bsie/reader/preview/_pg.py new file mode 100644 index 0000000..097c513 --- /dev/null +++ b/bsie/reader/preview/_pg.py @@ -0,0 +1,86 @@ +""" + +Part of the bsie module. +A copy of the license is provided with the project. +Author: Matthias Baumgartner, 2022 +""" +# standard imports +from functools import partial +import contextlib +import io +import os +import shutil +import tempfile +import typing + +# external imports +from preview_generator.manager import PreviewManager +import PIL.Image + +# bsie imports +from bsie.utils import errors + +# inner-module imports +from .. import base + +# exports +__all__: typing.Sequence[str] = ( + 'PreviewGeneratorReader', + ) + + +## code ## + +class PreviewGeneratorReader(base.Reader): + """Uses preview_generator to create previews for various data formats. + See `https://github.com/algoo/preview-generator`_ for details. + """ + + # PreviewManager instance. + _mngr: PreviewManager + + # Set of mime types supported by PreviewManager. + _supported_mimetypes: typing.Set[str] + + # PreviewManager cache. + _cache: str + + # Determines whether the cache directory should be deleted after use. + _cleanup: bool + + def __init__(self, cache: typing.Optional[str] = None): + # initialize cache directory + # TODO: initialize in memory, e.g., via PyFilesystem + if cache is None: + self._cache = tempfile.mkdtemp(prefix='bsie-preview-cache-') + self._cleanup = True + else: + self._cache = cache + self._cleanup = False + # create preview generator + with contextlib.redirect_stderr(io.StringIO()): + self._mngr = PreviewManager(self._cache, create_folder=True) + self._supported_mimetypes = set(self._mngr.get_supported_mimetypes()) + + def __del__(self): + if self._cleanup: + shutil.rmtree(self._cache, ignore_errors=True) + + def __call__(self, path: str) -> typing.Callable[[int], PIL.Image.Image]: + if not os.path.exists(path): + raise errors.ReaderError(path) + if self._mngr.get_mimetype(path) not in self._supported_mimetypes: + raise errors.UnsupportedFileFormatError(path) + return partial(self._preview_callback, path) + + def _preview_callback(self, path: str, max_side: int) -> PIL.Image.Image: + """Produce a jpeg preview of *path* with at most *max_side* side length.""" + try: + # generate the preview + preview_path = self._mngr.get_jpeg_preview(path, width=max_side, height=max_side) + # open the preview and return + return PIL.Image.open(preview_path) + except Exception as err: # FIXME: less generic exception! + raise errors.ReaderError(path) from err + +## EOF ## diff --git a/bsie/reader/preview/_pillow.py b/bsie/reader/preview/_pillow.py new file mode 100644 index 0000000..174d509 --- /dev/null +++ b/bsie/reader/preview/_pillow.py @@ -0,0 +1,44 @@ +""" + +Part of the bsie module. +A copy of the license is provided with the project. +Author: Matthias Baumgartner, 2022 +""" +# standard imports +from functools import partial +import typing + +# external imports +import PIL.Image + +# bsie imports +from bsie.utils import errors + +# inner-module imports +from . import utils +from .. import base + +# exports +__all__: typing.Sequence[str] = ( + 'PillowPreviewReader', + ) + + +## code ## + +class PillowPreviewReader(base.Reader): + """Produce previews for image files using the Pillow library.""" + + def __call__(self, path: str) -> typing.Callable[[int], PIL.Image.Image]: + try: + # open file with PIL + img = PIL.Image.open(path) + # return callback + return partial(utils.resize, img) + except PIL.UnidentifiedImageError as err: + # failed to open, skip file + raise errors.UnsupportedFileFormatError(path) from err + except IOError as err: + raise errors.ReaderError(path) from err + +# EOF ## diff --git a/bsie/reader/preview/_rawpy.py b/bsie/reader/preview/_rawpy.py new file mode 100644 index 0000000..2c20a48 --- /dev/null +++ b/bsie/reader/preview/_rawpy.py @@ -0,0 +1,66 @@ +""" + +Part of the bsie module. +A copy of the license is provided with the project. +Author: Matthias Baumgartner, 2022 +""" +# standard imports +from functools import partial +import typing + +# external imports +import PIL.Image +import rawpy + +# bsie imports +from bsie.utils import errors, filematcher + +# inner-module imports +from . import utils +from .. import base + +# constants +MATCH_RULE = 'mime={image/x-nikon-nef} | extension={nef}' + +# exports +__all__: typing.Sequence[str] = ( + 'RawpyPreviewReader', + ) + + +## code ## + +class RawpyPreviewReader(base.Reader): + """Produce previews for raw image files using the rawpy library.""" + + # file matcher + _match: filematcher.Matcher + + # additional kwargs to rawpy's postprocess + _rawpy_kwargs: typing.Dict[str, typing.Any] + + def __init__(self, **rawpy_kwargs): + match_rule = rawpy_kwargs.pop('file_match_rule', MATCH_RULE) + self._match = filematcher.parse(match_rule) + self._rawpy_kwargs = rawpy_kwargs + + def __call__(self, path: str) -> typing.Callable[[int], PIL.Image.Image]: + # perform quick checks first + if not self._match(path): + raise errors.UnsupportedFileFormatError(path) + + try: + # open file with rawpy + ary = rawpy.imread(path).postprocess(**self._rawpy_kwargs) + # convert to PIL.Image + img = PIL.Image.fromarray(ary) + # return callback + return partial(utils.resize, img) + + except (rawpy.LibRawFatalError, # pylint: disable=no-member # pylint doesn't find the errors + rawpy.NotSupportedError, # pylint: disable=no-member + rawpy.LibRawNonFatalError, # pylint: disable=no-member + ) as err: + raise errors.ReaderError(path) from err + +## EOF ## diff --git a/bsie/reader/preview/utils.py b/bsie/reader/preview/utils.py new file mode 100644 index 0000000..2ef1562 --- /dev/null +++ b/bsie/reader/preview/utils.py @@ -0,0 +1,39 @@ +""" + +Part of the tagit module. +A copy of the license is provided with the project. +Author: Matthias Baumgartner, 2022 +""" +# standard imports +import typing + +# external imports +import PIL.Image + +# exports +__all__: typing.Sequence[str] = ( + 'resize', + ) + + +## code ## + +def resize( + img: PIL.Image.Image, + max_size: int, + ) -> PIL.Image.Image: + """Resize an image to a given maximum side length.""" + # determine target dimensions + ratio = img.width / img.height + if img.width > img.height: + width, height = max_size, round(max_size / ratio) + else: + width, height = round(ratio * max_size), max_size + # rescale and return + return img.resize( + (width, height), + resample=PIL.Image.Resampling.LANCZOS, # create high-quality image + reducing_gap=3.0, # optimize computation via fast size reduction + ) + +## EOF ## diff --git a/setup.py b/setup.py index 6dad7ac..d45f178 100644 --- a/setup.py +++ b/setup.py @@ -20,6 +20,7 @@ setup( 'python-magic', 'rdflib', # only for tests 'requests', # only for tests + 'preview_generator', # also depends on some system packages ), python_requires=">=3.7", extra_require=( diff --git a/test/reader/preview/__init__.py b/test/reader/preview/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/test/reader/preview/invalid.foo b/test/reader/preview/invalid.foo new file mode 100644 index 0000000..e69de29 diff --git a/test/reader/preview/invalid.jpg b/test/reader/preview/invalid.jpg new file mode 100644 index 0000000..e69de29 diff --git a/test/reader/preview/load_nef.py b/test/reader/preview/load_nef.py new file mode 100644 index 0000000..5ba0adc --- /dev/null +++ b/test/reader/preview/load_nef.py @@ -0,0 +1,28 @@ +""" + +Part of the bsie test suite. +A copy of the license is provided with the project. +Author: Matthias Baumgartner, 2022 +""" +# standard imports +import os + +# external imports +import requests + +# constants +IMAGE_URL = 'http://igsor.net/eik7AhvohghaeN5.nef' + +## code ## + +def get(): + """Download a raw test image.""" + target = os.path.join(os.path.dirname(__file__), 'testimage.nef') + if not os.path.exists(target): + with open(target, 'wb') as ofile: + ans = requests.get(IMAGE_URL) + ofile.write(ans.content) + + + +## EOF ## diff --git a/test/reader/preview/test_pg.py b/test/reader/preview/test_pg.py new file mode 100644 index 0000000..e492cfa --- /dev/null +++ b/test/reader/preview/test_pg.py @@ -0,0 +1,82 @@ +""" + +Part of the bsie test suite. +A copy of the license is provided with the project. +Author: Matthias Baumgartner, 2022 +""" +# standard imports +from functools import partial +import os +import shutil +import tempfile +import unittest + +# external imports +import PIL.Image + +# bsie imports +from bsie.utils import errors + +# objects to test +from bsie.reader.preview._pg import PreviewGeneratorReader + + +## code ## + +class TestPreviewGeneratorReader(unittest.TestCase): + def test_call(self): + rdr = PreviewGeneratorReader() + # inexistent file raises a ReaderError + self.assertRaises(errors.ReaderError, rdr, + os.path.join(os.path.dirname(__file__), 'missing.jpg')) + # unsupported file type raises an UnsupportedFileFormatError + self.assertRaises(errors.UnsupportedFileFormatError, rdr, + os.path.join(os.path.dirname(__file__), 'invalid.foo')) + # invalid file raises a ReaderError + self.assertRaises(errors.ReaderError, + rdr(os.path.join(os.path.dirname(__file__), 'invalid.jpg')), 100) + + # proper file produces a generator + gen = rdr(os.path.join(os.path.dirname(__file__), 'testimage.jpg')) + self.assertIsInstance(gen, partial) + # generator produces an image + img = gen(10) + self.assertIsInstance(img, PIL.Image.Image) + self.assertEqual(img.size, (10, 10)) + self.assertEqual(sum(img.getdata()), 0) + # cleanup + img.close() + + # preview generator can also extract data from non-image files + gen = rdr(os.path.join(os.path.dirname(__file__), 'testfile.pdf')) + self.assertIsInstance(gen, partial) + # generator produces an image + img = gen(10) + self.assertIsInstance(img, PIL.Image.Image) + self.assertEqual(img.size, (8, 10)) + self.assertEqual(sum(img.getdata()), 20258) + # cleanup + img.close() + + # can define a cache dir + pg_dir = tempfile.mkdtemp(prefix='bsie-test') + self.assertTrue(os.path.exists(pg_dir)) + rdr = PreviewGeneratorReader(cache=pg_dir) + gen = rdr(os.path.join(os.path.dirname(__file__), 'testimage.jpg')) + img = gen(10) + self.assertIsInstance(img, PIL.Image.Image) + self.assertEqual(img.size, (10, 10)) + self.assertEqual(sum(img.getdata()), 0) + img.close() + del rdr + # cache dir still exists after instance deletion + self.assertTrue(os.path.exists(pg_dir)) + shutil.rmtree(pg_dir, ignore_errors=True) + + +## main ## + +if __name__ == '__main__': + unittest.main() + +## EOF ## diff --git a/test/reader/preview/test_pillow.py b/test/reader/preview/test_pillow.py new file mode 100644 index 0000000..ca38d89 --- /dev/null +++ b/test/reader/preview/test_pillow.py @@ -0,0 +1,50 @@ +""" + +Part of the bsie test suite. +A copy of the license is provided with the project. +Author: Matthias Baumgartner, 2022 +""" +# standard imports +from functools import partial +import os +import unittest + +# external imports +import PIL.Image + +# bsie imports +from bsie.utils import errors + +# objects to test +from bsie.reader.preview._pillow import PillowPreviewReader + + +## code ## + +class TestPillowPreviewReader(unittest.TestCase): + def test_call(self): + rdr = PillowPreviewReader() + # raises exception when image cannot be read + self.assertRaises(errors.ReaderError, rdr, + os.path.join(os.path.dirname(__file__), 'invalid.jpg')) + # raises exception when image has invalid type + self.assertRaises(errors.UnsupportedFileFormatError, rdr, + os.path.join(os.path.dirname(__file__), 'invalid.foo')) + # proper file produces a generator + gen = rdr(os.path.join(os.path.dirname(__file__), 'testimage.jpg')) + self.assertIsInstance(gen, partial) + # generator produces an image + img = gen(10) + self.assertIsInstance(img, PIL.Image.Image) + self.assertEqual(img.size, (10, 10)) + self.assertEqual(sum(band for pix in img.getdata() for band in pix), 0) + # cleanup + img.close() + + +## main ## + +if __name__ == '__main__': + unittest.main() + +## EOF ## diff --git a/test/reader/preview/test_preview.py b/test/reader/preview/test_preview.py new file mode 100644 index 0000000..fde610f --- /dev/null +++ b/test/reader/preview/test_preview.py @@ -0,0 +1,77 @@ +""" + +Part of the bsie test suite. +A copy of the license is provided with the project. +Author: Matthias Baumgartner, 2022 +""" +# standard imports +from functools import partial +import importlib +import os +import unittest + +# external imports +import PIL.Image + +# bsie imports +from bsie.utils import errors + +# objects to test +from bsie.reader.preview import Preview + + +## code ## + +class TestPreview(unittest.TestCase): + def setUp(self): + if __package__ is None or __package__ == '': # direct call or local discovery + importlib.import_module('load_nef', __package__).get() + else: # parent discovery + importlib.import_module('.load_nef', __package__).get() + + def test_construct(self): + preview = Preview() + self.assertIsInstance(preview, Preview) + self.assertEqual(len(preview._children), 3) + + def test_call(self): + preview = Preview() + # call raises error if file cannot be read + self.assertRaises(errors.ReaderError, preview, + os.path.join(os.path.dirname(__file__), 'missing.jpg')) + self.assertRaises(errors.ReaderError, preview( + os.path.join(os.path.dirname(__file__), 'invalid.jpg')), 10) + self.assertRaises(errors.UnsupportedFileFormatError, preview, + os.path.join(os.path.dirname(__file__), 'invalid.foo')) + + # call returns raw preview + gen = preview(os.path.join(os.path.dirname(__file__), 'testimage.nef')) + img = gen(10) + self.assertIsInstance(img, PIL.Image.Image) + self.assertEqual(img.size, (10, 8)) + self.assertEqual(sum(band for pix in img.getdata() for band in pix), 25287) + img.close() + + # call returns jpeg image + gen = preview(os.path.join(os.path.dirname(__file__), 'testimage.jpg')) + img = gen(10) + self.assertIsInstance(img, PIL.Image.Image) + self.assertEqual(img.size, (10, 10)) + self.assertEqual(sum(band for pix in img.getdata() for band in pix), 0) + img.close() + + # preview generator can also extract data from non-image files + gen = preview(os.path.join(os.path.dirname(__file__), 'testfile.pdf')) + img = gen(10) + self.assertIsInstance(img, PIL.Image.Image) + self.assertEqual(img.size, (8, 10)) + self.assertEqual(sum(img.getdata()), 20258) + img.close() + + +## main ## + +if __name__ == '__main__': + unittest.main() + +## EOF ## diff --git a/test/reader/preview/test_rawpy.py b/test/reader/preview/test_rawpy.py new file mode 100644 index 0000000..ed35f53 --- /dev/null +++ b/test/reader/preview/test_rawpy.py @@ -0,0 +1,59 @@ +""" + +Part of the bsie test suite. +A copy of the license is provided with the project. +Author: Matthias Baumgartner, 2022 +""" +# standard imports +from functools import partial +import importlib +import os +import unittest + +# external imports +import PIL.Image + +# bsie imports +from bsie.utils import errors + +# objects to test +from bsie.reader.preview._rawpy import RawpyPreviewReader + + +## code ## + +class TestRawpyPreviewReader(unittest.TestCase): + def setUp(self): + if __package__ is None or __package__ == '': # direct call or local discovery + importlib.import_module('load_nef', __package__).get() + else: # parent discovery + importlib.import_module('.load_nef', __package__).get() + + def test_call(self): + rdr = RawpyPreviewReader() + # raises exception when image cannot be read + self.assertRaises(errors.ReaderError, rdr, + os.path.join(os.path.dirname(__file__), 'invalid.nef')) + # raises exception when image has invalid type + self.assertRaises(errors.UnsupportedFileFormatError, rdr, + os.path.join(os.path.dirname(__file__), 'invalid.jpg')) + self.assertRaises(errors.UnsupportedFileFormatError, rdr, + os.path.join(os.path.dirname(__file__), 'invalid.foo')) + # proper file produces a generator + gen = rdr(os.path.join(os.path.dirname(__file__), 'testimage.nef')) + self.assertIsInstance(gen, partial) + # generator produces an image + img = gen(10) + self.assertIsInstance(img, PIL.Image.Image) + self.assertEqual(img.size, (10, 7)) + self.assertEqual(sum(band for pix in img.getdata() for band in pix), 15269) + # cleanup + img.close() + + +## main ## + +if __name__ == '__main__': + unittest.main() + +## EOF ## diff --git a/test/reader/preview/test_utils.py b/test/reader/preview/test_utils.py new file mode 100644 index 0000000..c10c38c --- /dev/null +++ b/test/reader/preview/test_utils.py @@ -0,0 +1,44 @@ +""" + +Part of the bsie test suite. +A copy of the license is provided with the project. +Author: Matthias Baumgartner, 2022 +""" +# standard imports +import os +import unittest + +# external imports +import PIL.Image + +# objects to test +from bsie.reader.preview.utils import resize + + +## code ## + +class TestUtils(unittest.TestCase): + + def test_resize(self): + img = PIL.Image.open(os.path.join(os.path.dirname(__file__), 'testimage.jpg')) + landscape = img.resize((100, 80)) + portrait = img.resize((80, 100)) + self.assertEqual(img.size, (100, 100)) + self.assertEqual(landscape.size, (100, 80)) + self.assertEqual(portrait.size, (80, 100)) + # resize can downscale + self.assertEqual(resize(img, 10).size, (10, 10)) + self.assertEqual(resize(img, 20).size, (20, 20)) + # resize can upscale + self.assertEqual(resize(img, 200).size, (200, 200)) + # aspect ratio is preserved + self.assertEqual(resize(landscape, 10).size, (10, 8)) + self.assertEqual(resize(portrait, 10).size, (8, 10)) + + +## main ## + +if __name__ == '__main__': + unittest.main() + +## EOF ## diff --git a/test/reader/preview/testfile.pdf b/test/reader/preview/testfile.pdf new file mode 100644 index 0000000..592d448 Binary files /dev/null and b/test/reader/preview/testfile.pdf differ diff --git a/test/reader/preview/testimage.jpg b/test/reader/preview/testimage.jpg new file mode 100644 index 0000000..4c2aca5 Binary files /dev/null and b/test/reader/preview/testimage.jpg differ -- cgit v1.2.3 From a281d6b3a75a7d4a97e673c285ee430a327482ed Mon Sep 17 00:00:00 2001 From: Matthias Baumgartner Date: Wed, 8 Feb 2023 19:23:46 +0100 Subject: preview extractor --- bsie/apps/index.py | 7 +- bsie/apps/info.py | 3 + bsie/extractor/base.py | 1 + bsie/extractor/preview.py | 99 +++++++++++++++ bsie/lib/naming_policy.py | 19 +++ bsie/utils/namespaces.py | 4 +- test/apps/test_index.py | 273 +++++++++++++++++++++++++++++------------ test/apps/test_info.py | 12 +- test/extractor/test_preview.py | 128 +++++++++++++++++++ test/extractor/testimage.jpg | Bin 0 -> 6476 bytes test/lib/test_naming_policy.py | 34 +++++ 11 files changed, 498 insertions(+), 82 deletions(-) create mode 100644 bsie/extractor/preview.py create mode 100644 test/extractor/test_preview.py create mode 100644 test/extractor/testimage.jpg diff --git a/bsie/apps/index.py b/bsie/apps/index.py index a870364..8798c49 100644 --- a/bsie/apps/index.py +++ b/bsie/apps/index.py @@ -13,7 +13,7 @@ import typing from bsie.extractor import ExtractorBuilder from bsie.lib import BSIE, PipelineBuilder, DefaultNamingPolicy from bsie.reader import ReaderBuilder -from bsie.utils import bsfs, errors +from bsie.utils import bsfs, errors, node as node_ # exports __all__: typing.Sequence[str] = ( @@ -49,6 +49,9 @@ def main(argv): rbuild = ReaderBuilder() # extractor builder ebuild = ExtractorBuilder([ + {'bsie.extractor.preview.Preview': { + 'max_sides': [50], + }}, {'bsie.extractor.generic.path.Path': {}}, {'bsie.extractor.generic.stat.Stat': {}}, {'bsie.extractor.generic.constant.Constant': dict( @@ -116,6 +119,8 @@ def main(argv): store.migrate(bsie.schema) # process files def handle(node, pred, value): + if isinstance(value, node_.Node): + value = store.node(value.node_type, value.uri) store.node(node.node_type, node.uri).set(pred.uri, value) walk(handle) # return store diff --git a/bsie/apps/info.py b/bsie/apps/info.py index 4e948fc..750aedc 100644 --- a/bsie/apps/info.py +++ b/bsie/apps/info.py @@ -35,6 +35,9 @@ def main(argv): rbuild = ReaderBuilder() # extractor builder ebuild = ExtractorBuilder([ + {'bsie.extractor.preview.Preview': { + 'max_sides': [50, 200], + }}, {'bsie.extractor.generic.path.Path': {}}, {'bsie.extractor.generic.stat.Stat': {}}, {'bsie.extractor.generic.constant.Constant': dict( diff --git a/bsie/extractor/base.py b/bsie/extractor/base.py index 7401244..89183f9 100644 --- a/bsie/extractor/base.py +++ b/bsie/extractor/base.py @@ -30,6 +30,7 @@ SCHEMA_PREAMBLE = ''' # common bsfs prefixes prefix bsfs: prefix bse: + prefix bsp: # default definitions bsfs:Array rdfs:subClassOf bsfs:Literal . diff --git a/bsie/extractor/preview.py b/bsie/extractor/preview.py new file mode 100644 index 0000000..1531d62 --- /dev/null +++ b/bsie/extractor/preview.py @@ -0,0 +1,99 @@ +""" + +Part of the bsie module. +A copy of the license is provided with the project. +Author: Matthias Baumgartner, 2022 +""" +# imports +import io +import typing + +# external imports +import PIL.Image + +# bsie imports +from bsie.utils import bsfs, node, ns + +# inner-module imports +from . import base + +# exports +__all__: typing.Sequence[str] = ( + 'Preview', + ) + + +## code ## + +class Preview(base.Extractor): + """Extract previews.""" + + CONTENT_READER = 'bsie.reader.preview.Preview' + + def __init__(self, max_sides: typing.Iterable[int]): + super().__init__(bsfs.schema.from_string(base.SCHEMA_PREAMBLE + ''' + + bsfs:Preview rdfs:subClassOf bsfs:Node . + bsfs:BinaryBlob rdfs:subClassOf bsfs:Literal . + bsfs:JPEG rdfs:subClassOf bsfs:BinaryBlob . + + bse:preview rdfs:subClassOf bsfs:Predicate ; + rdfs:domain bsfs:File ; + rdfs:range bsfs:Preview ; + bsfs:unique "false"^^xsd:boolean . + + bsp:width rdfs:subClassOf bsfs:Predicate ; + rdfs:domain bsfs:Preview ; + rdfs:range xsd:integer ; + bsfs:unique "true"^^xsd:boolean . + + bsp:height rdfs:subClassOf bsfs:Predicate ; + rdfs:domain bsfs:Preview ; + rdfs:range xsd:integer ; + bsfs:unique "true"^^xsd:boolean . + + bsp:asset rdfs:subClassOf bsfs:Predicate ; + rdfs:domain bsfs:Preview ; + rdfs:range bsfs:JPEG ; + bsfs:unique "true"^^xsd:boolean . + + ''')) + # initialize extra args + self.max_sides = set(max_sides) + + def __eq__(self, other: typing.Any) -> bool: + return super().__eq__(other) \ + and self.max_sides == other.max_sides + + def __hash__(self) -> int: + return hash((super().__hash__(), tuple(sorted(self.max_sides)))) + + def extract( + self, + subject: node.Node, + content: typing.Callable[[int], PIL.Image.Image], + principals: typing.Iterable[bsfs.schema.Predicate], + ) -> typing.Iterator[typing.Tuple[node.Node, bsfs.schema.Predicate, typing.Any]]: + # check principals + if self.schema.predicate(ns.bse.preview) not in principals: + return + + for max_side in self.max_sides: + # get the preview in the right resolution + img = content(max_side) + # convert the preview to jpeg + buffer = io.BytesIO() + img.save(buffer, format='jpeg') + # create a preview node + preview = node.Node(ns.bsfs.Preview, + ucid=bsfs.uuid.UCID.from_bytes(buffer.getvalue()), + size=max_side, + source=subject, + ) + # yield triples + yield subject, self.schema.predicate(ns.bse.preview), preview + yield preview, self.schema.predicate(ns.bsp.width), img.width + yield preview, self.schema.predicate(ns.bsp.height), img.height + yield preview, self.schema.predicate(ns.bsp.asset), buffer.getvalue() + +## EOF ## diff --git a/bsie/lib/naming_policy.py b/bsie/lib/naming_policy.py index 360abde..131a70b 100644 --- a/bsie/lib/naming_policy.py +++ b/bsie/lib/naming_policy.py @@ -87,6 +87,8 @@ class DefaultNamingPolicy(NamingPolicy): return node if node.node_type == ns.bsfs.File: return self.name_file(node) + if node.node_type == ns.bsfs.Preview: + return self.name_preview(node) raise errors.ProgrammingError('no naming policy available for {node.node_type}') def name_file(self, node: Node) -> Node: @@ -98,4 +100,21 @@ class DefaultNamingPolicy(NamingPolicy): node.uri = (self._prefix + 'file')[fragment] return node + def name_preview(self, node: Node) -> Node: + """Set a bsfs:Preview node's uri fragment to its ucid. + Uses its source fragment as fallback. Appends the size if provided. + """ + fragment = None + if 'ucid' in node.hints: # content id + fragment = node.hints['ucid'] + if fragment is None and 'source' in node.hints: # source id + self.handle_node(node.hints['source']) + fragment = node.hints['source'].uri.get('fragment', None) + if fragment is None: # random name + fragment = self._uuid() + if 'size' in node.hints: # append size + fragment += '_s' + str(node.hints['size']) + node.uri = (self._prefix + 'preview')[fragment] + return node + ## EOF ## diff --git a/bsie/utils/namespaces.py b/bsie/utils/namespaces.py index 393b436..0af8ece 100644 --- a/bsie/utils/namespaces.py +++ b/bsie/utils/namespaces.py @@ -12,16 +12,18 @@ from . import bsfs as _bsfs # constants bse = _bsfs.Namespace('http://bsfs.ai/schema/Entity') +bsf = _bsfs.Namespace('http://ie.bsfs.ai/schema/Feature') bsfs = _bsfs.Namespace('http://bsfs.ai/schema', fsep='/') bsm = _bsfs.Namespace('http://bsfs.ai/schema/Meta') +bsp = _bsfs.Namespace('http://bsfs.ai/schema/Preview') xsd = _bsfs.Namespace('http://www.w3.org/2001/XMLSchema') -bsf = _bsfs.Namespace('http://ie.bsfs.ai/schema/Feature') # export __all__: typing.Sequence[str] = ( 'bse', 'bsfs', 'bsm', + 'bsp', 'xsd', ) diff --git a/test/apps/test_index.py b/test/apps/test_index.py index 7f5be8e..d1e7140 100644 --- a/test/apps/test_index.py +++ b/test/apps/test_index.py @@ -23,6 +23,9 @@ from bsie.apps.index import main ## code ## class TestIndex(unittest.TestCase): + def test_disclaimer(self): + print('Please wait, this test will take about 25 seconds') + def test_main_invalid(self): outbuf = io.StringIO() with contextlib.redirect_stdout(outbuf): @@ -32,94 +35,166 @@ class TestIndex(unittest.TestCase): def test_main(self): bsfs = main([ '-r', - '--user', 'http://example.com/me', + '--host', 'http://example.com', + '--user', 'me', os.path.join(os.path.dirname(__file__), 'testdir'), os.path.join(os.path.dirname(__file__), 'testfile'), ]) - prefix = 'http://example.com/me/file#' + pre_file = 'http://example.com/me/file#' + pre_preview = 'http://example.com/me/preview#' self.assertTrue(set(bsfs._backend._graph).issuperset({ - (rdflib.URIRef(prefix + '2f4109b40107cc50e0884755a1a961ed126887e49b8dbaf0e146b2e226aa6647'), rdflib.RDF.type, rdflib.URIRef(ns.bsfs.File)), - (rdflib.URIRef(prefix + '2f4109b40107cc50e0884755a1a961ed126887e49b8dbaf0e146b2e226aa6647'), rdflib.URIRef(ns.bse.author), rdflib.Literal('Me, myself, and I', datatype=rdflib.XSD.string)), - (rdflib.URIRef(prefix + '2f4109b40107cc50e0884755a1a961ed126887e49b8dbaf0e146b2e226aa6647'), rdflib.URIRef(ns.bse.filename), rdflib.Literal('alpha_second', datatype=rdflib.XSD.string)), - (rdflib.URIRef(prefix + '2f4109b40107cc50e0884755a1a961ed126887e49b8dbaf0e146b2e226aa6647'), rdflib.URIRef(ns.bse.filesize), rdflib.Literal('696', datatype=rdflib.XSD.integer)), - (rdflib.URIRef(prefix + '441f3d10c8ff489fe8e33e639606512f6c463151cc429de7e554b9af670c2ece'), rdflib.RDF.type, rdflib.URIRef(ns.bsfs.File)), - (rdflib.URIRef(prefix + '441f3d10c8ff489fe8e33e639606512f6c463151cc429de7e554b9af670c2ece'), rdflib.URIRef(ns.bse.author), rdflib.Literal('Me, myself, and I', datatype=rdflib.XSD.string)), - (rdflib.URIRef(prefix + '441f3d10c8ff489fe8e33e639606512f6c463151cc429de7e554b9af670c2ece'), rdflib.URIRef(ns.bse.filename), rdflib.Literal('omega_second', datatype=rdflib.XSD.string)), - (rdflib.URIRef(prefix + '441f3d10c8ff489fe8e33e639606512f6c463151cc429de7e554b9af670c2ece'), rdflib.URIRef(ns.bse.filesize), rdflib.Literal('503', datatype=rdflib.XSD.integer)), - (rdflib.URIRef(prefix + '69b98ecf7aff3e95b09688ba93331678eb8397817111f674c9558e6dd8f5e871'), rdflib.RDF.type, rdflib.URIRef(ns.bsfs.File)), - (rdflib.URIRef(prefix + '69b98ecf7aff3e95b09688ba93331678eb8397817111f674c9558e6dd8f5e871'), rdflib.URIRef(ns.bse.author), rdflib.Literal('Me, myself, and I', datatype=rdflib.XSD.string)), - (rdflib.URIRef(prefix + '69b98ecf7aff3e95b09688ba93331678eb8397817111f674c9558e6dd8f5e871'), rdflib.URIRef(ns.bse.filename), rdflib.Literal('td_first', datatype=rdflib.XSD.string)), - (rdflib.URIRef(prefix + '69b98ecf7aff3e95b09688ba93331678eb8397817111f674c9558e6dd8f5e871'), rdflib.URIRef(ns.bse.filesize), rdflib.Literal('911', datatype=rdflib.XSD.integer)), - (rdflib.URIRef(prefix + '78f7eb7f0d8221cdb2cb26c978fa42a11f75eb87becc768f4474134cb1e06926'), rdflib.RDF.type, rdflib.URIRef(ns.bsfs.File)), - (rdflib.URIRef(prefix + '78f7eb7f0d8221cdb2cb26c978fa42a11f75eb87becc768f4474134cb1e06926'), rdflib.URIRef(ns.bse.author), rdflib.Literal('Me, myself, and I', datatype=rdflib.XSD.string)), - (rdflib.URIRef(prefix + '78f7eb7f0d8221cdb2cb26c978fa42a11f75eb87becc768f4474134cb1e06926'), rdflib.URIRef(ns.bse.filename), rdflib.Literal('testfile', datatype=rdflib.XSD.string)), - (rdflib.URIRef(prefix + '78f7eb7f0d8221cdb2cb26c978fa42a11f75eb87becc768f4474134cb1e06926'), rdflib.URIRef(ns.bse.filesize), rdflib.Literal('885', datatype=rdflib.XSD.integer)), - (rdflib.URIRef(prefix + '80818b8ec2ee1919116dba9c8a7e0a4608313cf3b463cd88e9ed77a700dd92d3'), rdflib.RDF.type, rdflib.URIRef(ns.bsfs.File)), - (rdflib.URIRef(prefix + '80818b8ec2ee1919116dba9c8a7e0a4608313cf3b463cd88e9ed77a700dd92d3'), rdflib.URIRef(ns.bse.author), rdflib.Literal('Me, myself, and I', datatype=rdflib.XSD.string)), - (rdflib.URIRef(prefix + '80818b8ec2ee1919116dba9c8a7e0a4608313cf3b463cd88e9ed77a700dd92d3'), rdflib.URIRef(ns.bse.filename), rdflib.Literal('bar_first', datatype=rdflib.XSD.string)), - (rdflib.URIRef(prefix + '80818b8ec2ee1919116dba9c8a7e0a4608313cf3b463cd88e9ed77a700dd92d3'), rdflib.URIRef(ns.bse.filesize), rdflib.Literal('956', datatype=rdflib.XSD.integer)), - (rdflib.URIRef(prefix + '976d2ea0e58488678cc7e435fbfadabfb6eb6cf50ad51862f38f73729ed11795'), rdflib.RDF.type, rdflib.URIRef(ns.bsfs.File)), - (rdflib.URIRef(prefix + '976d2ea0e58488678cc7e435fbfadabfb6eb6cf50ad51862f38f73729ed11795'), rdflib.URIRef(ns.bse.author), rdflib.Literal('Me, myself, and I', datatype=rdflib.XSD.string)), - (rdflib.URIRef(prefix + '976d2ea0e58488678cc7e435fbfadabfb6eb6cf50ad51862f38f73729ed11795'), rdflib.URIRef(ns.bse.filename), rdflib.Literal('omega_first', datatype=rdflib.XSD.string)), - (rdflib.URIRef(prefix + '976d2ea0e58488678cc7e435fbfadabfb6eb6cf50ad51862f38f73729ed11795'), rdflib.URIRef(ns.bse.filesize), rdflib.Literal('648', datatype=rdflib.XSD.integer)), - (rdflib.URIRef(prefix + '997e2fbb7494a3818ec782d2bc87bf1cffafba6b9c0f658e4a6c18a723e944d3'), rdflib.RDF.type, rdflib.URIRef(ns.bsfs.File)), - (rdflib.URIRef(prefix + '997e2fbb7494a3818ec782d2bc87bf1cffafba6b9c0f658e4a6c18a723e944d3'), rdflib.URIRef(ns.bse.author), rdflib.Literal('Me, myself, and I', datatype=rdflib.XSD.string)), - (rdflib.URIRef(prefix + '997e2fbb7494a3818ec782d2bc87bf1cffafba6b9c0f658e4a6c18a723e944d3'), rdflib.URIRef(ns.bse.filename), rdflib.Literal('alpha_first', datatype=rdflib.XSD.string)), - (rdflib.URIRef(prefix + '997e2fbb7494a3818ec782d2bc87bf1cffafba6b9c0f658e4a6c18a723e944d3'), rdflib.URIRef(ns.bse.filesize), rdflib.Literal('754', datatype=rdflib.XSD.integer)), - (rdflib.URIRef(prefix + 'a8af899ecdab60dfaea8ec7f934053624c80a1054539e163f2c7eaa986c2777d'), rdflib.RDF.type, rdflib.URIRef(ns.bsfs.File)), - (rdflib.URIRef(prefix + 'a8af899ecdab60dfaea8ec7f934053624c80a1054539e163f2c7eaa986c2777d'), rdflib.URIRef(ns.bse.author), rdflib.Literal('Me, myself, and I', datatype=rdflib.XSD.string)), - (rdflib.URIRef(prefix + 'a8af899ecdab60dfaea8ec7f934053624c80a1054539e163f2c7eaa986c2777d'), rdflib.URIRef(ns.bse.filename), rdflib.Literal('foo_second', datatype=rdflib.XSD.string)), - (rdflib.URIRef(prefix + 'a8af899ecdab60dfaea8ec7f934053624c80a1054539e163f2c7eaa986c2777d'), rdflib.URIRef(ns.bse.filesize), rdflib.Literal('585', datatype=rdflib.XSD.integer)), - (rdflib.URIRef(prefix + 'b8fd7fba818254166a6043195004138ebda6923e012442f819a2c49671136c70'), rdflib.RDF.type, rdflib.URIRef(ns.bsfs.File)), - (rdflib.URIRef(prefix + 'b8fd7fba818254166a6043195004138ebda6923e012442f819a2c49671136c70'), rdflib.URIRef(ns.bse.author), rdflib.Literal('Me, myself, and I', datatype=rdflib.XSD.string)), - (rdflib.URIRef(prefix + 'b8fd7fba818254166a6043195004138ebda6923e012442f819a2c49671136c70'), rdflib.URIRef(ns.bse.filename), rdflib.Literal('bar_second', datatype=rdflib.XSD.string)), - (rdflib.URIRef(prefix + 'b8fd7fba818254166a6043195004138ebda6923e012442f819a2c49671136c70'), rdflib.URIRef(ns.bse.filesize), rdflib.Literal('636', datatype=rdflib.XSD.integer)), - (rdflib.URIRef(prefix + 'd43758ace82154a1cc10ca0dfef63cb20dd831f9c87edd6dc06539eefe67371d'), rdflib.RDF.type, rdflib.URIRef(ns.bsfs.File)), - (rdflib.URIRef(prefix + 'd43758ace82154a1cc10ca0dfef63cb20dd831f9c87edd6dc06539eefe67371d'), rdflib.URIRef(ns.bse.author), rdflib.Literal('Me, myself, and I', datatype=rdflib.XSD.string)), - (rdflib.URIRef(prefix + 'd43758ace82154a1cc10ca0dfef63cb20dd831f9c87edd6dc06539eefe67371d'), rdflib.URIRef(ns.bse.filename), rdflib.Literal('foo_first', datatype=rdflib.XSD.string)), - (rdflib.URIRef(prefix + 'd43758ace82154a1cc10ca0dfef63cb20dd831f9c87edd6dc06539eefe67371d'), rdflib.URIRef(ns.bse.filesize), rdflib.Literal('546', datatype=rdflib.XSD.integer)), - (rdflib.URIRef(prefix + 'd803187cbf3676ae9d38126270a6152c60431589aa3bb3824baf8954e9c097f1'), rdflib.RDF.type, rdflib.URIRef(ns.bsfs.File)), - (rdflib.URIRef(prefix + 'd803187cbf3676ae9d38126270a6152c60431589aa3bb3824baf8954e9c097f1'), rdflib.URIRef(ns.bse.author), rdflib.Literal('Me, myself, and I', datatype=rdflib.XSD.string)), - (rdflib.URIRef(prefix + 'd803187cbf3676ae9d38126270a6152c60431589aa3bb3824baf8954e9c097f1'), rdflib.URIRef(ns.bse.filename), rdflib.Literal('td_second', datatype=rdflib.XSD.string)), - (rdflib.URIRef(prefix + 'd803187cbf3676ae9d38126270a6152c60431589aa3bb3824baf8954e9c097f1'), rdflib.URIRef(ns.bse.filesize), rdflib.Literal('703', datatype=rdflib.XSD.integer)), - (rdflib.URIRef(prefix + 'accb115d266ad60c53cd01a7f7130f245886ce8eaf69bc85319febc11d9fe089'), rdflib.RDF.type, rdflib.URIRef(ns.bsfs.File)), - (rdflib.URIRef(prefix + 'accb115d266ad60c53cd01a7f7130f245886ce8eaf69bc85319febc11d9fe089'), rdflib.URIRef(ns.bse.author), rdflib.Literal('Me, myself, and I', datatype=rdflib.XSD.string)), - (rdflib.URIRef(prefix + 'accb115d266ad60c53cd01a7f7130f245886ce8eaf69bc85319febc11d9fe089'), rdflib.URIRef(ns.bse.filename), rdflib.Literal('testimage.jpg', datatype=rdflib.XSD.string)), - (rdflib.URIRef(prefix + 'accb115d266ad60c53cd01a7f7130f245886ce8eaf69bc85319febc11d9fe089'), rdflib.URIRef(ns.bse.filesize), rdflib.Literal('349264', datatype=rdflib.XSD.integer)), - (rdflib.URIRef(prefix + 'accb115d266ad60c53cd01a7f7130f245886ce8eaf69bc85319febc11d9fe089'), rdflib.URIRef('http://bsfs.ai/schema/Entity/colors_spatial#0658f2234a054e1dd59a14462c89f7733e019160419c796356aa831498bd0a04'), + # files and properties + (rdflib.URIRef(pre_file + '2f4109b40107cc50e0884755a1a961ed126887e49b8dbaf0e146b2e226aa6647'), rdflib.RDF.type, rdflib.URIRef(ns.bsfs.File)), + (rdflib.URIRef(pre_file + '2f4109b40107cc50e0884755a1a961ed126887e49b8dbaf0e146b2e226aa6647'), rdflib.URIRef(ns.bse.author), rdflib.Literal('Me, myself, and I', datatype=rdflib.XSD.string)), + (rdflib.URIRef(pre_file + '2f4109b40107cc50e0884755a1a961ed126887e49b8dbaf0e146b2e226aa6647'), rdflib.URIRef(ns.bse.filename), rdflib.Literal('alpha_second', datatype=rdflib.XSD.string)), + (rdflib.URIRef(pre_file + '2f4109b40107cc50e0884755a1a961ed126887e49b8dbaf0e146b2e226aa6647'), rdflib.URIRef(ns.bse.filesize), rdflib.Literal('696', datatype=rdflib.XSD.integer)), + (rdflib.URIRef(pre_file + '441f3d10c8ff489fe8e33e639606512f6c463151cc429de7e554b9af670c2ece'), rdflib.RDF.type, rdflib.URIRef(ns.bsfs.File)), + (rdflib.URIRef(pre_file + '441f3d10c8ff489fe8e33e639606512f6c463151cc429de7e554b9af670c2ece'), rdflib.URIRef(ns.bse.author), rdflib.Literal('Me, myself, and I', datatype=rdflib.XSD.string)), + (rdflib.URIRef(pre_file + '441f3d10c8ff489fe8e33e639606512f6c463151cc429de7e554b9af670c2ece'), rdflib.URIRef(ns.bse.filename), rdflib.Literal('omega_second', datatype=rdflib.XSD.string)), + (rdflib.URIRef(pre_file + '441f3d10c8ff489fe8e33e639606512f6c463151cc429de7e554b9af670c2ece'), rdflib.URIRef(ns.bse.filesize), rdflib.Literal('503', datatype=rdflib.XSD.integer)), + (rdflib.URIRef(pre_file + '69b98ecf7aff3e95b09688ba93331678eb8397817111f674c9558e6dd8f5e871'), rdflib.RDF.type, rdflib.URIRef(ns.bsfs.File)), + (rdflib.URIRef(pre_file + '69b98ecf7aff3e95b09688ba93331678eb8397817111f674c9558e6dd8f5e871'), rdflib.URIRef(ns.bse.author), rdflib.Literal('Me, myself, and I', datatype=rdflib.XSD.string)), + (rdflib.URIRef(pre_file + '69b98ecf7aff3e95b09688ba93331678eb8397817111f674c9558e6dd8f5e871'), rdflib.URIRef(ns.bse.filename), rdflib.Literal('td_first', datatype=rdflib.XSD.string)), + (rdflib.URIRef(pre_file + '69b98ecf7aff3e95b09688ba93331678eb8397817111f674c9558e6dd8f5e871'), rdflib.URIRef(ns.bse.filesize), rdflib.Literal('911', datatype=rdflib.XSD.integer)), + (rdflib.URIRef(pre_file + '78f7eb7f0d8221cdb2cb26c978fa42a11f75eb87becc768f4474134cb1e06926'), rdflib.RDF.type, rdflib.URIRef(ns.bsfs.File)), + (rdflib.URIRef(pre_file + '78f7eb7f0d8221cdb2cb26c978fa42a11f75eb87becc768f4474134cb1e06926'), rdflib.URIRef(ns.bse.author), rdflib.Literal('Me, myself, and I', datatype=rdflib.XSD.string)), + (rdflib.URIRef(pre_file + '78f7eb7f0d8221cdb2cb26c978fa42a11f75eb87becc768f4474134cb1e06926'), rdflib.URIRef(ns.bse.filename), rdflib.Literal('testfile', datatype=rdflib.XSD.string)), + (rdflib.URIRef(pre_file + '78f7eb7f0d8221cdb2cb26c978fa42a11f75eb87becc768f4474134cb1e06926'), rdflib.URIRef(ns.bse.filesize), rdflib.Literal('885', datatype=rdflib.XSD.integer)), + (rdflib.URIRef(pre_file + '80818b8ec2ee1919116dba9c8a7e0a4608313cf3b463cd88e9ed77a700dd92d3'), rdflib.RDF.type, rdflib.URIRef(ns.bsfs.File)), + (rdflib.URIRef(pre_file + '80818b8ec2ee1919116dba9c8a7e0a4608313cf3b463cd88e9ed77a700dd92d3'), rdflib.URIRef(ns.bse.author), rdflib.Literal('Me, myself, and I', datatype=rdflib.XSD.string)), + (rdflib.URIRef(pre_file + '80818b8ec2ee1919116dba9c8a7e0a4608313cf3b463cd88e9ed77a700dd92d3'), rdflib.URIRef(ns.bse.filename), rdflib.Literal('bar_first', datatype=rdflib.XSD.string)), + (rdflib.URIRef(pre_file + '80818b8ec2ee1919116dba9c8a7e0a4608313cf3b463cd88e9ed77a700dd92d3'), rdflib.URIRef(ns.bse.filesize), rdflib.Literal('956', datatype=rdflib.XSD.integer)), + (rdflib.URIRef(pre_file + '976d2ea0e58488678cc7e435fbfadabfb6eb6cf50ad51862f38f73729ed11795'), rdflib.RDF.type, rdflib.URIRef(ns.bsfs.File)), + (rdflib.URIRef(pre_file + '976d2ea0e58488678cc7e435fbfadabfb6eb6cf50ad51862f38f73729ed11795'), rdflib.URIRef(ns.bse.author), rdflib.Literal('Me, myself, and I', datatype=rdflib.XSD.string)), + (rdflib.URIRef(pre_file + '976d2ea0e58488678cc7e435fbfadabfb6eb6cf50ad51862f38f73729ed11795'), rdflib.URIRef(ns.bse.filename), rdflib.Literal('omega_first', datatype=rdflib.XSD.string)), + (rdflib.URIRef(pre_file + '976d2ea0e58488678cc7e435fbfadabfb6eb6cf50ad51862f38f73729ed11795'), rdflib.URIRef(ns.bse.filesize), rdflib.Literal('648', datatype=rdflib.XSD.integer)), + (rdflib.URIRef(pre_file + '997e2fbb7494a3818ec782d2bc87bf1cffafba6b9c0f658e4a6c18a723e944d3'), rdflib.RDF.type, rdflib.URIRef(ns.bsfs.File)), + (rdflib.URIRef(pre_file + '997e2fbb7494a3818ec782d2bc87bf1cffafba6b9c0f658e4a6c18a723e944d3'), rdflib.URIRef(ns.bse.author), rdflib.Literal('Me, myself, and I', datatype=rdflib.XSD.string)), + (rdflib.URIRef(pre_file + '997e2fbb7494a3818ec782d2bc87bf1cffafba6b9c0f658e4a6c18a723e944d3'), rdflib.URIRef(ns.bse.filename), rdflib.Literal('alpha_first', datatype=rdflib.XSD.string)), + (rdflib.URIRef(pre_file + '997e2fbb7494a3818ec782d2bc87bf1cffafba6b9c0f658e4a6c18a723e944d3'), rdflib.URIRef(ns.bse.filesize), rdflib.Literal('754', datatype=rdflib.XSD.integer)), + (rdflib.URIRef(pre_file + 'a8af899ecdab60dfaea8ec7f934053624c80a1054539e163f2c7eaa986c2777d'), rdflib.RDF.type, rdflib.URIRef(ns.bsfs.File)), + (rdflib.URIRef(pre_file + 'a8af899ecdab60dfaea8ec7f934053624c80a1054539e163f2c7eaa986c2777d'), rdflib.URIRef(ns.bse.author), rdflib.Literal('Me, myself, and I', datatype=rdflib.XSD.string)), + (rdflib.URIRef(pre_file + 'a8af899ecdab60dfaea8ec7f934053624c80a1054539e163f2c7eaa986c2777d'), rdflib.URIRef(ns.bse.filename), rdflib.Literal('foo_second', datatype=rdflib.XSD.string)), + (rdflib.URIRef(pre_file + 'a8af899ecdab60dfaea8ec7f934053624c80a1054539e163f2c7eaa986c2777d'), rdflib.URIRef(ns.bse.filesize), rdflib.Literal('585', datatype=rdflib.XSD.integer)), + (rdflib.URIRef(pre_file + 'b8fd7fba818254166a6043195004138ebda6923e012442f819a2c49671136c70'), rdflib.RDF.type, rdflib.URIRef(ns.bsfs.File)), + (rdflib.URIRef(pre_file + 'b8fd7fba818254166a6043195004138ebda6923e012442f819a2c49671136c70'), rdflib.URIRef(ns.bse.author), rdflib.Literal('Me, myself, and I', datatype=rdflib.XSD.string)), + (rdflib.URIRef(pre_file + 'b8fd7fba818254166a6043195004138ebda6923e012442f819a2c49671136c70'), rdflib.URIRef(ns.bse.filename), rdflib.Literal('bar_second', datatype=rdflib.XSD.string)), + (rdflib.URIRef(pre_file + 'b8fd7fba818254166a6043195004138ebda6923e012442f819a2c49671136c70'), rdflib.URIRef(ns.bse.filesize), rdflib.Literal('636', datatype=rdflib.XSD.integer)), + (rdflib.URIRef(pre_file + 'd43758ace82154a1cc10ca0dfef63cb20dd831f9c87edd6dc06539eefe67371d'), rdflib.RDF.type, rdflib.URIRef(ns.bsfs.File)), + (rdflib.URIRef(pre_file + 'd43758ace82154a1cc10ca0dfef63cb20dd831f9c87edd6dc06539eefe67371d'), rdflib.URIRef(ns.bse.author), rdflib.Literal('Me, myself, and I', datatype=rdflib.XSD.string)), + (rdflib.URIRef(pre_file + 'd43758ace82154a1cc10ca0dfef63cb20dd831f9c87edd6dc06539eefe67371d'), rdflib.URIRef(ns.bse.filename), rdflib.Literal('foo_first', datatype=rdflib.XSD.string)), + (rdflib.URIRef(pre_file + 'd43758ace82154a1cc10ca0dfef63cb20dd831f9c87edd6dc06539eefe67371d'), rdflib.URIRef(ns.bse.filesize), rdflib.Literal('546', datatype=rdflib.XSD.integer)), + (rdflib.URIRef(pre_file + 'd803187cbf3676ae9d38126270a6152c60431589aa3bb3824baf8954e9c097f1'), rdflib.RDF.type, rdflib.URIRef(ns.bsfs.File)), + (rdflib.URIRef(pre_file + 'd803187cbf3676ae9d38126270a6152c60431589aa3bb3824baf8954e9c097f1'), rdflib.URIRef(ns.bse.author), rdflib.Literal('Me, myself, and I', datatype=rdflib.XSD.string)), + (rdflib.URIRef(pre_file + 'd803187cbf3676ae9d38126270a6152c60431589aa3bb3824baf8954e9c097f1'), rdflib.URIRef(ns.bse.filename), rdflib.Literal('td_second', datatype=rdflib.XSD.string)), + (rdflib.URIRef(pre_file + 'd803187cbf3676ae9d38126270a6152c60431589aa3bb3824baf8954e9c097f1'), rdflib.URIRef(ns.bse.filesize), rdflib.Literal('703', datatype=rdflib.XSD.integer)), + (rdflib.URIRef(pre_file + 'accb115d266ad60c53cd01a7f7130f245886ce8eaf69bc85319febc11d9fe089'), rdflib.RDF.type, rdflib.URIRef(ns.bsfs.File)), + (rdflib.URIRef(pre_file + 'accb115d266ad60c53cd01a7f7130f245886ce8eaf69bc85319febc11d9fe089'), rdflib.URIRef(ns.bse.author), rdflib.Literal('Me, myself, and I', datatype=rdflib.XSD.string)), + (rdflib.URIRef(pre_file + 'accb115d266ad60c53cd01a7f7130f245886ce8eaf69bc85319febc11d9fe089'), rdflib.URIRef(ns.bse.filename), rdflib.Literal('testimage.jpg', datatype=rdflib.XSD.string)), + (rdflib.URIRef(pre_file + 'accb115d266ad60c53cd01a7f7130f245886ce8eaf69bc85319febc11d9fe089'), rdflib.URIRef(ns.bse.filesize), rdflib.Literal('349264', datatype=rdflib.XSD.integer)), + # features + (rdflib.URIRef(pre_file + 'accb115d266ad60c53cd01a7f7130f245886ce8eaf69bc85319febc11d9fe089'), rdflib.URIRef('http://bsfs.ai/schema/Entity/colors_spatial#0658f2234a054e1dd59a14462c89f7733e019160419c796356aa831498bd0a04'), rdflib.Literal( '(91, 127, 121, 94, 138, 167, 163, 134, 190, 138, 170, 156, 121, 142, 159)', datatype=rdflib.URIRef('http://ie.bsfs.ai/schema/Feature/ColorsSpatial#0658f2234a054e1dd59a14462c89f7733e019160419c796356aa831498bd0a04'))), + (rdflib.URIRef(pre_file + 'accb115d266ad60c53cd01a7f7130f245886ce8eaf69bc85319febc11d9fe089'), rdflib.URIRef(ns.bse.filesize), rdflib.Literal('349264', datatype=rdflib.XSD.integer)), + # links to previews + (rdflib.URIRef(pre_file + '2f4109b40107cc50e0884755a1a961ed126887e49b8dbaf0e146b2e226aa6647'), rdflib.URIRef(ns.bse.preview), rdflib.URIRef(pre_preview + '79cb8a7e6369361a4f4cb7ff729c1ed3fcf87204769623d6fbd6ebfae601e5c7_s50')), + (rdflib.URIRef(pre_file + '441f3d10c8ff489fe8e33e639606512f6c463151cc429de7e554b9af670c2ece'), rdflib.URIRef(ns.bse.preview), rdflib.URIRef(pre_preview + 'a8b3245636074d5370283b690281abda8ffdff12ce8b1af77c8bc0a4c85be860_s50')), + (rdflib.URIRef(pre_file + '69b98ecf7aff3e95b09688ba93331678eb8397817111f674c9558e6dd8f5e871'), rdflib.URIRef(ns.bse.preview), rdflib.URIRef(pre_preview + '968b9aa178585bc8d1fca0e4e32b8cf30b3941eff72f34e320584aaae8fd23ac_s50')), + (rdflib.URIRef(pre_file + '78f7eb7f0d8221cdb2cb26c978fa42a11f75eb87becc768f4474134cb1e06926'), rdflib.URIRef(ns.bse.preview), rdflib.URIRef(pre_preview + 'dbfd8ebc0557c4925e9ff8411629a74a15eca934a4c2a6bd3134dd81d2f95a36_s50')), + (rdflib.URIRef(pre_file + '80818b8ec2ee1919116dba9c8a7e0a4608313cf3b463cd88e9ed77a700dd92d3'), rdflib.URIRef(ns.bse.preview), rdflib.URIRef(pre_preview + '567049149769e1d02e6af6cfee3991f7cf0cbc935cbf6a566047f40155fb13a8_s50')), + (rdflib.URIRef(pre_file + '976d2ea0e58488678cc7e435fbfadabfb6eb6cf50ad51862f38f73729ed11795'), rdflib.URIRef(ns.bse.preview), rdflib.URIRef(pre_preview + 'df2185d8927ccef65c92fc90b94e800b02791354d8dede9dd9aa0e2c2cb1e91e_s50')), + (rdflib.URIRef(pre_file + '997e2fbb7494a3818ec782d2bc87bf1cffafba6b9c0f658e4a6c18a723e944d3'), rdflib.URIRef(ns.bse.preview), rdflib.URIRef(pre_preview + '9827509a74a60dfceed11936f7f624e9c932f66c8c0d20d355d56f8c3c9b56b1_s50')), + (rdflib.URIRef(pre_file + 'a8af899ecdab60dfaea8ec7f934053624c80a1054539e163f2c7eaa986c2777d'), rdflib.URIRef(ns.bse.preview), rdflib.URIRef(pre_preview + '7a975385a110c21fcd12e238fab9501550fa02f6328749068a3bffd65e291027_s50')), + (rdflib.URIRef(pre_file + 'accb115d266ad60c53cd01a7f7130f245886ce8eaf69bc85319febc11d9fe089'), rdflib.URIRef(ns.bse.preview), rdflib.URIRef(pre_preview + '2656e303d7218300326df73b64f312d8b37eb980358be27a38b5f63dae259be3_s50')), + (rdflib.URIRef(pre_file + 'b8fd7fba818254166a6043195004138ebda6923e012442f819a2c49671136c70'), rdflib.URIRef(ns.bse.preview), rdflib.URIRef(pre_preview + 'a63c84e647138a2b68113474212f6aee542b3707171ff178551db3c296e59817_s50')), + (rdflib.URIRef(pre_file + 'd43758ace82154a1cc10ca0dfef63cb20dd831f9c87edd6dc06539eefe67371d'), rdflib.URIRef(ns.bse.preview), rdflib.URIRef(pre_preview + '26f16643b2570ac5b2d1f8c373d492cb724aae2dd8d71a0b63647838ed651254_s50')), + (rdflib.URIRef(pre_file + 'd803187cbf3676ae9d38126270a6152c60431589aa3bb3824baf8954e9c097f1'), rdflib.URIRef(ns.bse.preview), rdflib.URIRef(pre_preview + '5d1235838c3d501204bb09c2de563d7e4a7fd17b7ec4ff302221c0e88c4741aa_s50')), + # preview dimensions + (rdflib.URIRef(pre_preview + '2656e303d7218300326df73b64f312d8b37eb980358be27a38b5f63dae259be3_s50'), rdflib.URIRef(ns.bsp.height), rdflib.Literal('33', datatype=rdflib.URIRef('http://www.w3.org/2001/XMLSchema#integer'))), + (rdflib.URIRef(pre_preview + '2656e303d7218300326df73b64f312d8b37eb980358be27a38b5f63dae259be3_s50'), rdflib.URIRef(ns.bsp.width), rdflib.Literal('50', datatype=rdflib.URIRef('http://www.w3.org/2001/XMLSchema#integer'))), + (rdflib.URIRef(pre_preview + '2656e303d7218300326df73b64f312d8b37eb980358be27a38b5f63dae259be3_s50'), rdflib.RDF.type, rdflib.URIRef(ns.bsfs.Preview)), + (rdflib.URIRef(pre_preview + '26f16643b2570ac5b2d1f8c373d492cb724aae2dd8d71a0b63647838ed651254_s50'), rdflib.URIRef(ns.bsp.height), rdflib.Literal('50', datatype=rdflib.URIRef('http://www.w3.org/2001/XMLSchema#integer'))), + (rdflib.URIRef(pre_preview + '26f16643b2570ac5b2d1f8c373d492cb724aae2dd8d71a0b63647838ed651254_s50'), rdflib.URIRef(ns.bsp.width), rdflib.Literal('36', datatype=rdflib.URIRef('http://www.w3.org/2001/XMLSchema#integer'))), + (rdflib.URIRef(pre_preview + '26f16643b2570ac5b2d1f8c373d492cb724aae2dd8d71a0b63647838ed651254_s50'), rdflib.RDF.type, rdflib.URIRef(ns.bsfs.Preview)), + (rdflib.URIRef(pre_preview + '567049149769e1d02e6af6cfee3991f7cf0cbc935cbf6a566047f40155fb13a8_s50'), rdflib.URIRef(ns.bsp.height), rdflib.Literal('50', datatype=rdflib.URIRef('http://www.w3.org/2001/XMLSchema#integer'))), + (rdflib.URIRef(pre_preview + '567049149769e1d02e6af6cfee3991f7cf0cbc935cbf6a566047f40155fb13a8_s50'), rdflib.URIRef(ns.bsp.width), rdflib.Literal('36', datatype=rdflib.URIRef('http://www.w3.org/2001/XMLSchema#integer'))), + (rdflib.URIRef(pre_preview + '567049149769e1d02e6af6cfee3991f7cf0cbc935cbf6a566047f40155fb13a8_s50'), rdflib.RDF.type, rdflib.URIRef(ns.bsfs.Preview)), + (rdflib.URIRef(pre_preview + '5d1235838c3d501204bb09c2de563d7e4a7fd17b7ec4ff302221c0e88c4741aa_s50'), rdflib.URIRef(ns.bsp.height), rdflib.Literal('50', datatype=rdflib.URIRef('http://www.w3.org/2001/XMLSchema#integer'))), + (rdflib.URIRef(pre_preview + '5d1235838c3d501204bb09c2de563d7e4a7fd17b7ec4ff302221c0e88c4741aa_s50'), rdflib.URIRef(ns.bsp.width), rdflib.Literal('36', datatype=rdflib.URIRef('http://www.w3.org/2001/XMLSchema#integer'))), + (rdflib.URIRef(pre_preview + '5d1235838c3d501204bb09c2de563d7e4a7fd17b7ec4ff302221c0e88c4741aa_s50'), rdflib.RDF.type, rdflib.URIRef(ns.bsfs.Preview)), + (rdflib.URIRef(pre_preview + '79cb8a7e6369361a4f4cb7ff729c1ed3fcf87204769623d6fbd6ebfae601e5c7_s50'), rdflib.URIRef(ns.bsp.height), rdflib.Literal('50', datatype=rdflib.URIRef('http://www.w3.org/2001/XMLSchema#integer'))), + (rdflib.URIRef(pre_preview + '79cb8a7e6369361a4f4cb7ff729c1ed3fcf87204769623d6fbd6ebfae601e5c7_s50'), rdflib.URIRef(ns.bsp.width), rdflib.Literal('36', datatype=rdflib.URIRef('http://www.w3.org/2001/XMLSchema#integer'))), + (rdflib.URIRef(pre_preview + '79cb8a7e6369361a4f4cb7ff729c1ed3fcf87204769623d6fbd6ebfae601e5c7_s50'), rdflib.RDF.type, rdflib.URIRef(ns.bsfs.Preview)), + (rdflib.URIRef(pre_preview + '7a975385a110c21fcd12e238fab9501550fa02f6328749068a3bffd65e291027_s50'), rdflib.URIRef(ns.bsp.height), rdflib.Literal('50', datatype=rdflib.URIRef('http://www.w3.org/2001/XMLSchema#integer'))), + (rdflib.URIRef(pre_preview + '7a975385a110c21fcd12e238fab9501550fa02f6328749068a3bffd65e291027_s50'), rdflib.URIRef(ns.bsp.width), rdflib.Literal('36', datatype=rdflib.URIRef('http://www.w3.org/2001/XMLSchema#integer'))), + (rdflib.URIRef(pre_preview + '7a975385a110c21fcd12e238fab9501550fa02f6328749068a3bffd65e291027_s50'), rdflib.RDF.type, rdflib.URIRef(ns.bsfs.Preview)), + (rdflib.URIRef(pre_preview + '968b9aa178585bc8d1fca0e4e32b8cf30b3941eff72f34e320584aaae8fd23ac_s50'), rdflib.URIRef(ns.bsp.height), rdflib.Literal('50', datatype=rdflib.URIRef('http://www.w3.org/2001/XMLSchema#integer'))), + (rdflib.URIRef(pre_preview + '968b9aa178585bc8d1fca0e4e32b8cf30b3941eff72f34e320584aaae8fd23ac_s50'), rdflib.URIRef(ns.bsp.width), rdflib.Literal('36', datatype=rdflib.URIRef('http://www.w3.org/2001/XMLSchema#integer'))), + (rdflib.URIRef(pre_preview + '968b9aa178585bc8d1fca0e4e32b8cf30b3941eff72f34e320584aaae8fd23ac_s50'), rdflib.RDF.type, rdflib.URIRef(ns.bsfs.Preview)), + (rdflib.URIRef(pre_preview + '9827509a74a60dfceed11936f7f624e9c932f66c8c0d20d355d56f8c3c9b56b1_s50'), rdflib.URIRef(ns.bsp.height), rdflib.Literal('50', datatype=rdflib.URIRef('http://www.w3.org/2001/XMLSchema#integer'))), + (rdflib.URIRef(pre_preview + '9827509a74a60dfceed11936f7f624e9c932f66c8c0d20d355d56f8c3c9b56b1_s50'), rdflib.URIRef(ns.bsp.width), rdflib.Literal('36', datatype=rdflib.URIRef('http://www.w3.org/2001/XMLSchema#integer'))), + (rdflib.URIRef(pre_preview + '9827509a74a60dfceed11936f7f624e9c932f66c8c0d20d355d56f8c3c9b56b1_s50'), rdflib.RDF.type, rdflib.URIRef(ns.bsfs.Preview)), + (rdflib.URIRef(pre_preview + 'a63c84e647138a2b68113474212f6aee542b3707171ff178551db3c296e59817_s50'), rdflib.URIRef(ns.bsp.height), rdflib.Literal('50', datatype=rdflib.URIRef('http://www.w3.org/2001/XMLSchema#integer'))), + (rdflib.URIRef(pre_preview + 'a63c84e647138a2b68113474212f6aee542b3707171ff178551db3c296e59817_s50'), rdflib.URIRef(ns.bsp.width), rdflib.Literal('36', datatype=rdflib.URIRef('http://www.w3.org/2001/XMLSchema#integer'))), + (rdflib.URIRef(pre_preview + 'a63c84e647138a2b68113474212f6aee542b3707171ff178551db3c296e59817_s50'), rdflib.RDF.type, rdflib.URIRef(ns.bsfs.Preview)), + (rdflib.URIRef(pre_preview + 'a8b3245636074d5370283b690281abda8ffdff12ce8b1af77c8bc0a4c85be860_s50'), rdflib.URIRef(ns.bsp.height), rdflib.Literal('50', datatype=rdflib.URIRef('http://www.w3.org/2001/XMLSchema#integer'))), + (rdflib.URIRef(pre_preview + 'a8b3245636074d5370283b690281abda8ffdff12ce8b1af77c8bc0a4c85be860_s50'), rdflib.URIRef(ns.bsp.width), rdflib.Literal('36', datatype=rdflib.URIRef('http://www.w3.org/2001/XMLSchema#integer'))), + (rdflib.URIRef(pre_preview + 'a8b3245636074d5370283b690281abda8ffdff12ce8b1af77c8bc0a4c85be860_s50'), rdflib.RDF.type, rdflib.URIRef(ns.bsfs.Preview)), + (rdflib.URIRef(pre_preview + 'dbfd8ebc0557c4925e9ff8411629a74a15eca934a4c2a6bd3134dd81d2f95a36_s50'), rdflib.URIRef(ns.bsp.height), rdflib.Literal('50', datatype=rdflib.URIRef('http://www.w3.org/2001/XMLSchema#integer'))), + (rdflib.URIRef(pre_preview + 'dbfd8ebc0557c4925e9ff8411629a74a15eca934a4c2a6bd3134dd81d2f95a36_s50'), rdflib.URIRef(ns.bsp.width), rdflib.Literal('36', datatype=rdflib.URIRef('http://www.w3.org/2001/XMLSchema#integer'))), + (rdflib.URIRef(pre_preview + 'dbfd8ebc0557c4925e9ff8411629a74a15eca934a4c2a6bd3134dd81d2f95a36_s50'), rdflib.RDF.type, rdflib.URIRef(ns.bsfs.Preview)), + (rdflib.URIRef(pre_preview + 'df2185d8927ccef65c92fc90b94e800b02791354d8dede9dd9aa0e2c2cb1e91e_s50'), rdflib.URIRef(ns.bsp.height), rdflib.Literal('50', datatype=rdflib.URIRef('http://www.w3.org/2001/XMLSchema#integer'))), + (rdflib.URIRef(pre_preview + 'df2185d8927ccef65c92fc90b94e800b02791354d8dede9dd9aa0e2c2cb1e91e_s50'), rdflib.URIRef(ns.bsp.width), rdflib.Literal('36', datatype=rdflib.URIRef('http://www.w3.org/2001/XMLSchema#integer'))), + (rdflib.URIRef(pre_preview + 'df2185d8927ccef65c92fc90b94e800b02791354d8dede9dd9aa0e2c2cb1e91e_s50'), rdflib.RDF.type, rdflib.URIRef(ns.bsfs.Preview)), + # assets + (rdflib.URIRef(pre_preview + '2656e303d7218300326df73b64f312d8b37eb980358be27a38b5f63dae259be3_s50'), rdflib.URIRef(ns.bsp.asset), rdflib.Literal('/9j/4AAQSkZJRgABAQAAAQABAAD/2wBDAAgGBgcGBQgHBwcJCQgKDBQNDAsLDBkSEw8UHRofHh0aHBwgJC4nICIsIxwcKDcpLDAxNDQ0Hyc5PTgyPC4zNDL/2wBDAQkJCQwLDBgNDRgyIRwhMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjL/wAARCAAhADIDASIAAhEBAxEB/8QAHwAAAQUBAQEBAQEAAAAAAAAAAAECAwQFBgcICQoL/8QAtRAAAgEDAwIEAwUFBAQAAAF9AQIDAAQRBRIhMUEGE1FhByJxFDKBkaEII0KxwRVS0fAkM2JyggkKFhcYGRolJicoKSo0NTY3ODk6Q0RFRkdISUpTVFVWV1hZWmNkZWZnaGlqc3R1dnd4eXqDhIWGh4iJipKTlJWWl5iZmqKjpKWmp6ipqrKztLW2t7i5usLDxMXGx8jJytLT1NXW19jZ2uHi4+Tl5ufo6erx8vP09fb3+Pn6/8QAHwEAAwEBAQEBAQEBAQAAAAAAAAECAwQFBgcICQoL/8QAtREAAgECBAQDBAcFBAQAAQJ3AAECAxEEBSExBhJBUQdhcRMiMoEIFEKRobHBCSMzUvAVYnLRChYkNOEl8RcYGRomJygpKjU2Nzg5OkNERUZHSElKU1RVVldYWVpjZGVmZ2hpanN0dXZ3eHl6goOEhYaHiImKkpOUlZaXmJmaoqOkpaanqKmqsrO0tba3uLm6wsPExcbHyMnK0tPU1dbX2Nna4uPk5ebn6Onq8vP09fb3+Pn6/9oADAMBAAIRAxEAPwDi9Ktb+O3khZTg/wAB7VSGnKkkhkAR85weteo3Vl9mvLtWjVWEJaN/9rsD7HkVwNza3kmsn7RYsDuzsdSVb/GvLo15W9rKNkVDmvzKN0dx4L1Sb+x2S7jZ7aM7BKF+XPoffkVdOpWDSSI9sGizk5HWuE8S69q0NnHptnB9ms7Nh8lr+6SSXALhsHJUcY7kitPTLi51nR0nMKpO6ZkCHABxngdq1xGKnSs1Kyvrc7qEMW2/Zrz/AKudnbXXhuaEiO3jjY9TtxVG8ht3mQwgOnaubuVmtbFV2gSjjn1q1prajJF+9dEQdMVjPHKtFxaXqc9fE1JXpzjr+J0Is7fA+VaKwmludx/0xevrRXLaH8xyfVZdjpNFsgsUlpryPkjyVuVJ6AnH8z/I1flS30m2ezvdt3bbd1teRL8yntu/xGfeua1zXtbs7dh5I8mRdhkD7mYEY5GOf51Jp+vW8Vnu1KT7FJKMmO5b5JcdwDxn1HFfR1KUZRd1v93zPoaFfD1J+5Kz+79DjfEV9Dc3E0hk5Zi5ZR1btx+NYNlrn9nllhkKgnPpnjr9Of1H0rrdc0bQtTvgsWbSRiwJjk2K+ADwrZ9RyOOa4/U/AWs21y0mmhL60dyI5IpVLduGGeCM/jXmPL201N3NK9SpfngrryOr0y+i1fT4lvZ9gR9pYfM5I9v8/wBK2/7FneFmCXEMLcIbhwpb3A6gVwGiaR4o03UYhbaZOZ88RqA27HXoeB9K9PgiYRRyal4Y1KKVhlyHbr3966MPgIRpuMtNROjTr+/JWn+P4mB/wix/5/o/+/lFdoLXT8DPhfUfx8yiuj6lT7v8P8hex85ffEZef8gu0+oriPiZ/rNI+j/zFFFbYn+Ez5uh8ZP4l/5Cq/8AYN/9nFU/CH/Hvd/9dv8A2Wiih/Ee7k/wv1/Q63Qv9fb/APXT+ldFrP8Ax/xfRP8A0IUUVX2T0K38RD5v9dJ/vH+dFFFUC2P/2Q==', datatype=rdflib.URIRef(ns.bsfs.BinaryBlob))), + (rdflib.URIRef(pre_preview + '26f16643b2570ac5b2d1f8c373d492cb724aae2dd8d71a0b63647838ed651254_s50'), rdflib.URIRef(ns.bsp.asset), rdflib.Literal('/9j/4AAQSkZJRgABAQAAAQABAAD/2wBDAAgGBgcGBQgHBwcJCQgKDBQNDAsLDBkSEw8UHRofHh0aHBwgJC4nICIsIxwcKDcpLDAxNDQ0Hyc5PTgyPC4zNDL/wAALCAAyACQBAREA/8QAHwAAAQUBAQEBAQEAAAAAAAAAAAECAwQFBgcICQoL/8QAtRAAAgEDAwIEAwUFBAQAAAF9AQIDAAQRBRIhMUEGE1FhByJxFDKBkaEII0KxwRVS0fAkM2JyggkKFhcYGRolJicoKSo0NTY3ODk6Q0RFRkdISUpTVFVWV1hZWmNkZWZnaGlqc3R1dnd4eXqDhIWGh4iJipKTlJWWl5iZmqKjpKWmp6ipqrKztLW2t7i5usLDxMXGx8jJytLT1NXW19jZ2uHi4+Tl5ufo6erx8vP09fb3+Pn6/9oACAEBAAA/APdLq6eAgKoI25Oc+oHYe9Qfb5sr+6UZHUhuv5VYjnZyQSo9gpNTgP3YH6Lj+tPqCa2jmILrkgEVH/Z8HHy8DjGB/hUq26IMAkj0IH+FTUUUUUUUUUUUUUUUUUUUUUUUUUUV/9k=', datatype=rdflib.URIRef(ns.bsfs.BinaryBlob))), + (rdflib.URIRef(pre_preview + '5d1235838c3d501204bb09c2de563d7e4a7fd17b7ec4ff302221c0e88c4741aa_s50'), rdflib.URIRef(ns.bsp.asset), rdflib.Literal('/9j/4AAQSkZJRgABAQAAAQABAAD/2wBDAAgGBgcGBQgHBwcJCQgKDBQNDAsLDBkSEw8UHRofHh0aHBwgJC4nICIsIxwcKDcpLDAxNDQ0Hyc5PTgyPC4zNDL/wAALCAAyACQBAREA/8QAHwAAAQUBAQEBAQEAAAAAAAAAAAECAwQFBgcICQoL/8QAtRAAAgEDAwIEAwUFBAQAAAF9AQIDAAQRBRIhMUEGE1FhByJxFDKBkaEII0KxwRVS0fAkM2JyggkKFhcYGRolJicoKSo0NTY3ODk6Q0RFRkdISUpTVFVWV1hZWmNkZWZnaGlqc3R1dnd4eXqDhIWGh4iJipKTlJWWl5iZmqKjpKWmp6ipqrKztLW2t7i5usLDxMXGx8jJytLT1NXW19jZ2uHi4+Tl5ufo6erx8vP09fb3+Pn6/9oACAEBAAA/APdLm5eBwFVTkd89c+1Qf2hMSAI15/3v8KtR3DSFgIiceh7/AMv1qcHIzjFLVO7tnncFduAMc+v5GoDZXJAw65A/vdT/AN81PDasjMXPBGAMgj8toqxHGIxgfyA/lUlFFFFFFFFFFFFFFFFFFFFFFFFFf//Z', datatype=rdflib.URIRef(ns.bsfs.BinaryBlob))), + (rdflib.URIRef(pre_preview + '79cb8a7e6369361a4f4cb7ff729c1ed3fcf87204769623d6fbd6ebfae601e5c7_s50'), rdflib.URIRef(ns.bsp.asset), rdflib.Literal('/9j/4AAQSkZJRgABAQAAAQABAAD/2wBDAAgGBgcGBQgHBwcJCQgKDBQNDAsLDBkSEw8UHRofHh0aHBwgJC4nICIsIxwcKDcpLDAxNDQ0Hyc5PTgyPC4zNDL/wAALCAAyACQBAREA/8QAHwAAAQUBAQEBAQEAAAAAAAAAAAECAwQFBgcICQoL/8QAtRAAAgEDAwIEAwUFBAQAAAF9AQIDAAQRBRIhMUEGE1FhByJxFDKBkaEII0KxwRVS0fAkM2JyggkKFhcYGRolJicoKSo0NTY3ODk6Q0RFRkdISUpTVFVWV1hZWmNkZWZnaGlqc3R1dnd4eXqDhIWGh4iJipKTlJWWl5iZmqKjpKWmp6ipqrKztLW2t7i5usLDxMXGx8jJytLT1NXW19jZ2uHi4+Tl5ufo6erx8vP09fb3+Pn6/9oACAEBAAA/APdLq5eFgEQEYySQf6CoDqEo2/u1yevDf4VJHfZ3eZhNvojHvj0q4h3IDnOR1AxmnVXntEuDlyR8u3gA8fiDUR02I4G9uOei9eeenvTvsS5J82Tn6cfpUsMPkrgSOw9Gxx+lTUUUUUUUUUUUUUUUUUUUUUUUUUV//9k=', datatype=rdflib.URIRef(ns.bsfs.BinaryBlob))), + (rdflib.URIRef(pre_preview + '7a975385a110c21fcd12e238fab9501550fa02f6328749068a3bffd65e291027_s50'), rdflib.URIRef(ns.bsp.asset), rdflib.Literal('/9j/4AAQSkZJRgABAQAAAQABAAD/2wBDAAgGBgcGBQgHBwcJCQgKDBQNDAsLDBkSEw8UHRofHh0aHBwgJC4nICIsIxwcKDcpLDAxNDQ0Hyc5PTgyPC4zNDL/wAALCAAyACQBAREA/8QAHwAAAQUBAQEBAQEAAAAAAAAAAAECAwQFBgcICQoL/8QAtRAAAgEDAwIEAwUFBAQAAAF9AQIDAAQRBRIhMUEGE1FhByJxFDKBkaEII0KxwRVS0fAkM2JyggkKFhcYGRolJicoKSo0NTY3ODk6Q0RFRkdISUpTVFVWV1hZWmNkZWZnaGlqc3R1dnd4eXqDhIWGh4iJipKTlJWWl5iZmqKjpKWmp6ipqrKztLW2t7i5usLDxMXGx8jJytLT1NXW19jZ2uHi4+Tl5ufo6erx8vP09fb3+Pn6/9oACAEBAAA/APdLq6eBgFUHjPIJ7+wqudQmGMxpyMk4bA6+30/OrSyzFvm8kDrjecgflVhSGGVIIPcUtV57RLjBZmBAxwB/UGov7NiGMMw25wQF45z6VKtrGuc7mz6n3zVjpRRRRRRRRRRRRRRRRRRRRRRRRRRX/9k=', datatype=rdflib.URIRef(ns.bsfs.BinaryBlob))), + (rdflib.URIRef(pre_preview + '968b9aa178585bc8d1fca0e4e32b8cf30b3941eff72f34e320584aaae8fd23ac_s50'), rdflib.URIRef(ns.bsp.asset), rdflib.Literal('/9j/4AAQSkZJRgABAQAAAQABAAD/2wBDAAgGBgcGBQgHBwcJCQgKDBQNDAsLDBkSEw8UHRofHh0aHBwgJC4nICIsIxwcKDcpLDAxNDQ0Hyc5PTgyPC4zNDL/wAALCAAyACQBAREA/8QAHwAAAQUBAQEBAQEAAAAAAAAAAAECAwQFBgcICQoL/8QAtRAAAgEDAwIEAwUFBAQAAAF9AQIDAAQRBRIhMUEGE1FhByJxFDKBkaEII0KxwRVS0fAkM2JyggkKFhcYGRolJicoKSo0NTY3ODk6Q0RFRkdISUpTVFVWV1hZWmNkZWZnaGlqc3R1dnd4eXqDhIWGh4iJipKTlJWWl5iZmqKjpKWmp6ipqrKztLW2t7i5usLDxMXGx8jJytLT1NXW19jZ2uHi4+Tl5ufo6erx8vP09fb3+Pn6/9oACAEBAAA/APdLm5eFsIgPy55zyc47CoRfTkf6uPOOckjnn29qmS5dmbK52noq5OPzqwj7/wCFl/3hin1n36MzgqhbCkcKTz+ANUzA5Cjynx7IRjn/AHfT271d8gAEFGxzwCfX2WrMR2gIQfbg/wCAqaiiiiiiiiiiiiiiiiiiiiiiiiiv/9k=', datatype=rdflib.URIRef(ns.bsfs.BinaryBlob))), + (rdflib.URIRef(pre_preview + '9827509a74a60dfceed11936f7f624e9c932f66c8c0d20d355d56f8c3c9b56b1_s50'), rdflib.URIRef(ns.bsp.asset), rdflib.Literal('/9j/4AAQSkZJRgABAQAAAQABAAD/2wBDAAgGBgcGBQgHBwcJCQgKDBQNDAsLDBkSEw8UHRofHh0aHBwgJC4nICIsIxwcKDcpLDAxNDQ0Hyc5PTgyPC4zNDL/wAALCAAyACQBAREA/8QAHwAAAQUBAQEBAQEAAAAAAAAAAAECAwQFBgcICQoL/8QAtRAAAgEDAwIEAwUFBAQAAAF9AQIDAAQRBRIhMUEGE1FhByJxFDKBkaEII0KxwRVS0fAkM2JyggkKFhcYGRolJicoKSo0NTY3ODk6Q0RFRkdISUpTVFVWV1hZWmNkZWZnaGlqc3R1dnd4eXqDhIWGh4iJipKTlJWWl5iZmqKjpKWmp6ipqrKztLW2t7i5usLDxMXGx8jJytLT1NXW19jZ2uHi4+Tl5ufo6erx8vP09fb3+Pn6/9oACAEBAAA/APdLq5eAgKoPyknIPb6ColvZywHlp79R+WRU3nT7iojHXqQR6VYUk9QR9adVK8t3mdSqggLjJxkHPuD/AJFVlsZ/l+RFIXGTtI6k9Npq3FbFdwYKAemAp/8AZR/WrKLtGMn9KdRRRRRRRRRRRRRRRRRRRRRRRRRX/9k=', datatype=rdflib.URIRef(ns.bsfs.BinaryBlob))), + (rdflib.URIRef(pre_preview + '567049149769e1d02e6af6cfee3991f7cf0cbc935cbf6a566047f40155fb13a8_s50'), rdflib.URIRef(ns.bsp.asset), rdflib.Literal('/9j/4AAQSkZJRgABAQAAAQABAAD/2wBDAAgGBgcGBQgHBwcJCQgKDBQNDAsLDBkSEw8UHRofHh0aHBwgJC4nICIsIxwcKDcpLDAxNDQ0Hyc5PTgyPC4zNDL/wAALCAAyACQBAREA/8QAHwAAAQUBAQEBAQEAAAAAAAAAAAECAwQFBgcICQoL/8QAtRAAAgEDAwIEAwUFBAQAAAF9AQIDAAQRBRIhMUEGE1FhByJxFDKBkaEII0KxwRVS0fAkM2JyggkKFhcYGRolJicoKSo0NTY3ODk6Q0RFRkdISUpTVFVWV1hZWmNkZWZnaGlqc3R1dnd4eXqDhIWGh4iJipKTlJWWl5iZmqKjpKWmp6ipqrKztLW2t7i5usLDxMXGx8jJytLT1NXW19jZ2uHi4+Tl5ufo6erx8vP09fb3+Pn6/9oACAEBAAA/APdLm5eFwFVSCpPIPYj0HvVf+0ZiFxGnIychhj9KtQyTSNkiLZ7E5B/EVZorN1EZkXAJIQ4wM9x7GqYAAXCAgDHKnGMn0X/Oa0LeMJudcKx64iJz+OATVoOOh3E+oUipKayK33lB+opPKj/55r+VHlR8/Iv5U+iiiiiiiiiiiiiiiiiiiiiv/9k=', datatype=rdflib.URIRef(ns.bsfs.BinaryBlob))), + (rdflib.URIRef(pre_preview + 'df2185d8927ccef65c92fc90b94e800b02791354d8dede9dd9aa0e2c2cb1e91e_s50'), rdflib.URIRef(ns.bsp.asset), rdflib.Literal('/9j/4AAQSkZJRgABAQAAAQABAAD/2wBDAAgGBgcGBQgHBwcJCQgKDBQNDAsLDBkSEw8UHRofHh0aHBwgJC4nICIsIxwcKDcpLDAxNDQ0Hyc5PTgyPC4zNDL/wAALCAAyACQBAREA/8QAHwAAAQUBAQEBAQEAAAAAAAAAAAECAwQFBgcICQoL/8QAtRAAAgEDAwIEAwUFBAQAAAF9AQIDAAQRBRIhMUEGE1FhByJxFDKBkaEII0KxwRVS0fAkM2JyggkKFhcYGRolJicoKSo0NTY3ODk6Q0RFRkdISUpTVFVWV1hZWmNkZWZnaGlqc3R1dnd4eXqDhIWGh4iJipKTlJWWl5iZmqKjpKWmp6ipqrKztLW2t7i5usLDxMXGx8jJytLT1NXW19jZ2uHi4+Tl5ufo6erx8vP09fb3+Pn6/9oACAEBAAA/APdLm5eGQBVU5XPIJ7j0FQfb5sgiNcH2b39vapkvGPDIucjoTjBOPSrKuHHGencEU+ql1am4IIYA7cDIB757g1CunFWByAAOCMAg5yD07VcCEN0P/fZNS0UUUUUUUUUUUUUUUUUUUUUUUUUV/9k=', datatype=rdflib.URIRef(ns.bsfs.BinaryBlob))), + (rdflib.URIRef(pre_preview + 'a63c84e647138a2b68113474212f6aee542b3707171ff178551db3c296e59817_s50'), rdflib.URIRef(ns.bsp.asset), rdflib.Literal('/9j/4AAQSkZJRgABAQAAAQABAAD/2wBDAAgGBgcGBQgHBwcJCQgKDBQNDAsLDBkSEw8UHRofHh0aHBwgJC4nICIsIxwcKDcpLDAxNDQ0Hyc5PTgyPC4zNDL/wAALCAAyACQBAREA/8QAHwAAAQUBAQEBAQEAAAAAAAAAAAECAwQFBgcICQoL/8QAtRAAAgEDAwIEAwUFBAQAAAF9AQIDAAQRBRIhMUEGE1FhByJxFDKBkaEII0KxwRVS0fAkM2JyggkKFhcYGRolJicoKSo0NTY3ODk6Q0RFRkdISUpTVFVWV1hZWmNkZWZnaGlqc3R1dnd4eXqDhIWGh4iJipKTlJWWl5iZmqKjpKWmp6ipqrKztLW2t7i5usLDxMXGx8jJytLT1NXW19jZ2uHi4+Tl5ufo6erx8vP09fb3+Pn6/9oACAEBAAA/APdLq6eBgFUH5c5IPqPQVANQmJUeWvPXhvUj09qspcM2cAnHHCE/rmpUl3nGx1+oxUlVLq1M7KQQPlI5APXHqDUKacyMDvHTsBk9fb3q4kXl5IYkk5OQB/ICpaKKKKKKKKKKKKKKKKKKKKKKKKKK/9k=', datatype=rdflib.URIRef(ns.bsfs.BinaryBlob))), + (rdflib.URIRef(pre_preview + 'a8b3245636074d5370283b690281abda8ffdff12ce8b1af77c8bc0a4c85be860_s50'), rdflib.URIRef(ns.bsp.asset), rdflib.Literal('/9j/4AAQSkZJRgABAQAAAQABAAD/2wBDAAgGBgcGBQgHBwcJCQgKDBQNDAsLDBkSEw8UHRofHh0aHBwgJC4nICIsIxwcKDcpLDAxNDQ0Hyc5PTgyPC4zNDL/wAALCAAyACQBAREA/8QAHwAAAQUBAQEBAQEAAAAAAAAAAAECAwQFBgcICQoL/8QAtRAAAgEDAwIEAwUFBAQAAAF9AQIDAAQRBRIhMUEGE1FhByJxFDKBkaEII0KxwRVS0fAkM2JyggkKFhcYGRolJicoKSo0NTY3ODk6Q0RFRkdISUpTVFVWV1hZWmNkZWZnaGlqc3R1dnd4eXqDhIWGh4iJipKTlJWWl5iZmqKjpKWmp6ipqrKztLW2t7i5usLDxMXGx8jJytLT1NXW19jZ2uHi4+Tl5ufo6erx8vP09fb3+Pn6/9oACAEBAAA/APdbm4eF1CqDkZOQT3HoKr/b5vl/dpkj/awDnHp06fzqZLiRmIPk47fMQf1FXKKikgilOZEDHGOaZ9itsg+SuQMA08QoowNwHoGP+NO8serf99Gn0UUUUUUUUUUUUUUUUUUUUUUUUV//2Q==', datatype=rdflib.URIRef(ns.bsfs.BinaryBlob))), + (rdflib.URIRef(pre_preview + 'dbfd8ebc0557c4925e9ff8411629a74a15eca934a4c2a6bd3134dd81d2f95a36_s50'), rdflib.URIRef(ns.bsp.asset), rdflib.Literal('/9j/4AAQSkZJRgABAQAAAQABAAD/2wBDAAgGBgcGBQgHBwcJCQgKDBQNDAsLDBkSEw8UHRofHh0aHBwgJC4nICIsIxwcKDcpLDAxNDQ0Hyc5PTgyPC4zNDL/wAALCAAyACQBAREA/8QAHwAAAQUBAQEBAQEAAAAAAAAAAAECAwQFBgcICQoL/8QAtRAAAgEDAwIEAwUFBAQAAAF9AQIDAAQRBRIhMUEGE1FhByJxFDKBkaEII0KxwRVS0fAkM2JyggkKFhcYGRolJicoKSo0NTY3ODk6Q0RFRkdISUpTVFVWV1hZWmNkZWZnaGlqc3R1dnd4eXqDhIWGh4iJipKTlJWWl5iZmqKjpKWmp6ipqrKztLW2t7i5usLDxMXGx8jJytLT1NXW19jZ2uHi4+Tl5ufo6erx8vP09fb3+Pn6/9oACAEBAAA/APdbm5eFwqqDkZ5B9QOw96rHUZgoPlLkgno3b8Kel7IzspVePYgdcck1YilaTA3JnqQFPT86sVQvoXkYFUDEKRz7np0NVWtZcAiLkd8Dg/8AfPqO3rWhEk6thsbc9mz+mP61Zoooooooooooooooooooooooooor/9k=', datatype=rdflib.URIRef(ns.bsfs.BinaryBlob))), })) # NOTE: we don't check ns.bsm.t_created since it depends on the execution time. Triples would look like this: - # (rdflib.URIRef(prefix + '2f4109b40107cc50e0884755a1a961ed126887e49b8dbaf0e146b2e226aa6647'), rdflib.URIRef(ns.bsm.t_created), rdflib.Literal('1670..........', datatype=rdflib.XSD.integer)), - # (rdflib.URIRef(prefix + '441f3d10c8ff489fe8e33e639606512f6c463151cc429de7e554b9af670c2ece'), rdflib.URIRef(ns.bsm.t_created), rdflib.Literal('1670..........', datatype=rdflib.XSD.integer)), - # (rdflib.URIRef(prefix + '69b98ecf7aff3e95b09688ba93331678eb8397817111f674c9558e6dd8f5e871'), rdflib.URIRef(ns.bsm.t_created), rdflib.Literal('1670..........', datatype=rdflib.XSD.integer)), - # (rdflib.URIRef(prefix + '78f7eb7f0d8221cdb2cb26c978fa42a11f75eb87becc768f4474134cb1e06926'), rdflib.URIRef(ns.bsm.t_created), rdflib.Literal('1670..........', datatype=rdflib.XSD.integer)), - # (rdflib.URIRef(prefix + '80818b8ec2ee1919116dba9c8a7e0a4608313cf3b463cd88e9ed77a700dd92d3'), rdflib.URIRef(ns.bsm.t_created), rdflib.Literal('1670..........', datatype=rdflib.XSD.integer)), - # (rdflib.URIRef(prefix + '976d2ea0e58488678cc7e435fbfadabfb6eb6cf50ad51862f38f73729ed11795'), rdflib.URIRef(ns.bsm.t_created), rdflib.Literal('1670..........', datatype=rdflib.XSD.integer)), - # (rdflib.URIRef(prefix + '997e2fbb7494a3818ec782d2bc87bf1cffafba6b9c0f658e4a6c18a723e944d3'), rdflib.URIRef(ns.bsm.t_created), rdflib.Literal('1670..........', datatype=rdflib.XSD.integer)), - # (rdflib.URIRef(prefix + 'a8af899ecdab60dfaea8ec7f934053624c80a1054539e163f2c7eaa986c2777d'), rdflib.URIRef(ns.bsm.t_created), rdflib.Literal('1670..........', datatype=rdflib.XSD.integer)), - # (rdflib.URIRef(prefix + 'b8fd7fba818254166a6043195004138ebda6923e012442f819a2c49671136c70'), rdflib.URIRef(ns.bsm.t_created), rdflib.Literal('1670..........', datatype=rdflib.XSD.integer)), - # (rdflib.URIRef(prefix + 'd43758ace82154a1cc10ca0dfef63cb20dd831f9c87edd6dc06539eefe67371d'), rdflib.URIRef(ns.bsm.t_created), rdflib.Literal('1670..........', datatype=rdflib.XSD.integer)), - # (rdflib.URIRef(prefix + 'd803187cbf3676ae9d38126270a6152c60431589aa3bb3824baf8954e9c097f1'), rdflib.URIRef(ns.bsm.t_created), rdflib.Literal('1670..........', datatype=rdflib.XSD.integer)), - # (rdflib.URIRef(prefix + 'accb115d266ad60c53cd01a7f7130f245886ce8eaf69bc85319febc11d9fe089'), rdflib.URIRef(ns.bsm.t_created), rdflib.Literal('1670..........', datatype=rdflib.XSD.integer)), + # (rdflib.URIRef(pre_file + '2f4109b40107cc50e0884755a1a961ed126887e49b8dbaf0e146b2e226aa6647'), rdflib.URIRef(ns.bsm.t_created), rdflib.Literal('1670..........', datatype=rdflib.XSD.integer)), + # (rdflib.URIRef(pre_file + '441f3d10c8ff489fe8e33e639606512f6c463151cc429de7e554b9af670c2ece'), rdflib.URIRef(ns.bsm.t_created), rdflib.Literal('1670..........', datatype=rdflib.XSD.integer)), + # (rdflib.URIRef(pre_file + '80818b8ec2ee1919116dba9c8a7e0a4608313cf3b463cd88e9ed77a700dd92d3'), rdflib.URIRef(ns.bsm.t_created), rdflib.Literal('1670..........', datatype=rdflib.XSD.integer)), + # ... # instead, we simply check if there's such a predicate for each file self.assertSetEqual({sub for sub, _ in bsfs._backend._graph.subject_objects(rdflib.URIRef(ns.bsm.t_created))}, { - rdflib.URIRef(prefix + '2f4109b40107cc50e0884755a1a961ed126887e49b8dbaf0e146b2e226aa6647'), - rdflib.URIRef(prefix + '441f3d10c8ff489fe8e33e639606512f6c463151cc429de7e554b9af670c2ece'), - rdflib.URIRef(prefix + '69b98ecf7aff3e95b09688ba93331678eb8397817111f674c9558e6dd8f5e871'), - rdflib.URIRef(prefix + '78f7eb7f0d8221cdb2cb26c978fa42a11f75eb87becc768f4474134cb1e06926'), - rdflib.URIRef(prefix + '80818b8ec2ee1919116dba9c8a7e0a4608313cf3b463cd88e9ed77a700dd92d3'), - rdflib.URIRef(prefix + '976d2ea0e58488678cc7e435fbfadabfb6eb6cf50ad51862f38f73729ed11795'), - rdflib.URIRef(prefix + '997e2fbb7494a3818ec782d2bc87bf1cffafba6b9c0f658e4a6c18a723e944d3'), - rdflib.URIRef(prefix + 'a8af899ecdab60dfaea8ec7f934053624c80a1054539e163f2c7eaa986c2777d'), - rdflib.URIRef(prefix + 'b8fd7fba818254166a6043195004138ebda6923e012442f819a2c49671136c70'), - rdflib.URIRef(prefix + 'd43758ace82154a1cc10ca0dfef63cb20dd831f9c87edd6dc06539eefe67371d'), - rdflib.URIRef(prefix + 'd803187cbf3676ae9d38126270a6152c60431589aa3bb3824baf8954e9c097f1'), - rdflib.URIRef(prefix + 'accb115d266ad60c53cd01a7f7130f245886ce8eaf69bc85319febc11d9fe089'), + rdflib.URIRef(pre_file + '2f4109b40107cc50e0884755a1a961ed126887e49b8dbaf0e146b2e226aa6647'), + rdflib.URIRef(pre_file + '441f3d10c8ff489fe8e33e639606512f6c463151cc429de7e554b9af670c2ece'), + rdflib.URIRef(pre_file + '69b98ecf7aff3e95b09688ba93331678eb8397817111f674c9558e6dd8f5e871'), + rdflib.URIRef(pre_file + '78f7eb7f0d8221cdb2cb26c978fa42a11f75eb87becc768f4474134cb1e06926'), + rdflib.URIRef(pre_file + '80818b8ec2ee1919116dba9c8a7e0a4608313cf3b463cd88e9ed77a700dd92d3'), + rdflib.URIRef(pre_file + '976d2ea0e58488678cc7e435fbfadabfb6eb6cf50ad51862f38f73729ed11795'), + rdflib.URIRef(pre_file + '997e2fbb7494a3818ec782d2bc87bf1cffafba6b9c0f658e4a6c18a723e944d3'), + rdflib.URIRef(pre_file + 'a8af899ecdab60dfaea8ec7f934053624c80a1054539e163f2c7eaa986c2777d'), + rdflib.URIRef(pre_file + 'b8fd7fba818254166a6043195004138ebda6923e012442f819a2c49671136c70'), + rdflib.URIRef(pre_file + 'd43758ace82154a1cc10ca0dfef63cb20dd831f9c87edd6dc06539eefe67371d'), + rdflib.URIRef(pre_file + 'd803187cbf3676ae9d38126270a6152c60431589aa3bb3824baf8954e9c097f1'), + rdflib.URIRef(pre_file + 'accb115d266ad60c53cd01a7f7130f245886ce8eaf69bc85319febc11d9fe089'), + rdflib.URIRef(pre_preview + '26f16643b2570ac5b2d1f8c373d492cb724aae2dd8d71a0b63647838ed651254_s50'), + rdflib.URIRef(pre_preview + 'a63c84e647138a2b68113474212f6aee542b3707171ff178551db3c296e59817_s50'), + rdflib.URIRef(pre_preview + '9827509a74a60dfceed11936f7f624e9c932f66c8c0d20d355d56f8c3c9b56b1_s50'), + rdflib.URIRef(pre_preview + '2656e303d7218300326df73b64f312d8b37eb980358be27a38b5f63dae259be3_s50'), + rdflib.URIRef(pre_preview + '79cb8a7e6369361a4f4cb7ff729c1ed3fcf87204769623d6fbd6ebfae601e5c7_s50'), + rdflib.URIRef(pre_preview + 'dbfd8ebc0557c4925e9ff8411629a74a15eca934a4c2a6bd3134dd81d2f95a36_s50'), + rdflib.URIRef(pre_preview + '5d1235838c3d501204bb09c2de563d7e4a7fd17b7ec4ff302221c0e88c4741aa_s50'), + rdflib.URIRef(pre_preview + '567049149769e1d02e6af6cfee3991f7cf0cbc935cbf6a566047f40155fb13a8_s50'), + rdflib.URIRef(pre_preview + 'df2185d8927ccef65c92fc90b94e800b02791354d8dede9dd9aa0e2c2cb1e91e_s50'), + rdflib.URIRef(pre_preview + 'a8b3245636074d5370283b690281abda8ffdff12ce8b1af77c8bc0a4c85be860_s50'), + rdflib.URIRef(pre_preview + '7a975385a110c21fcd12e238fab9501550fa02f6328749068a3bffd65e291027_s50'), + rdflib.URIRef(pre_preview + '968b9aa178585bc8d1fca0e4e32b8cf30b3941eff72f34e320584aaae8fd23ac_s50'), }) def test_print(self): @@ -128,11 +203,12 @@ class TestIndex(unittest.TestCase): bsfs = main([ '--print', '-r', - '--user', 'http://example.com/me', + '--host', 'http://example.com', + '--user', 'me', os.path.join(os.path.dirname(__file__), 'testdir'), os.path.join(os.path.dirname(__file__), 'testfile'), ]) - self.assertSetEqual(set(outbuf.getvalue().split('\n')) - {''}, { + self.assertTrue((set(outbuf.getvalue().split('\n')) - {''}).issuperset({ f'Node(http://bsfs.ai/schema/File, http://example.com/me/file#2f4109b40107cc50e0884755a1a961ed126887e49b8dbaf0e146b2e226aa6647) Predicate({ns.bse.author}) Me, myself, and I', f'Node(http://bsfs.ai/schema/File, http://example.com/me/file#2f4109b40107cc50e0884755a1a961ed126887e49b8dbaf0e146b2e226aa6647) Predicate({ns.bse.filename}) alpha_second', f'Node(http://bsfs.ai/schema/File, http://example.com/me/file#2f4109b40107cc50e0884755a1a961ed126887e49b8dbaf0e146b2e226aa6647) Predicate({ns.bse.filesize}) 696', @@ -169,8 +245,49 @@ class TestIndex(unittest.TestCase): f'Node(http://bsfs.ai/schema/File, http://example.com/me/file#accb115d266ad60c53cd01a7f7130f245886ce8eaf69bc85319febc11d9fe089) Predicate({ns.bse.filesize}) 349264', f'Node(http://bsfs.ai/schema/File, http://example.com/me/file#accb115d266ad60c53cd01a7f7130f245886ce8eaf69bc85319febc11d9fe089) Predicate({ns.bse.author}) Me, myself, and I', f'Node(http://bsfs.ai/schema/File, http://example.com/me/file#accb115d266ad60c53cd01a7f7130f245886ce8eaf69bc85319febc11d9fe089) Predicate({ns.bse.filename}) testimage.jpg', + # features f'Node(http://bsfs.ai/schema/File, http://example.com/me/file#accb115d266ad60c53cd01a7f7130f245886ce8eaf69bc85319febc11d9fe089) Predicate(http://bsfs.ai/schema/Entity/colors_spatial#0658f2234a054e1dd59a14462c89f7733e019160419c796356aa831498bd0a04) (91, 127, 121, 94, 138, 167, 163, 134, 190, 138, 170, 156, 121, 142, 159)', - }) + # links to previews + f'Node(http://bsfs.ai/schema/File, http://example.com/me/file#2f4109b40107cc50e0884755a1a961ed126887e49b8dbaf0e146b2e226aa6647) Predicate({ns.bse.preview}) Node(http://bsfs.ai/schema/Preview, http://example.com/me/preview#79cb8a7e6369361a4f4cb7ff729c1ed3fcf87204769623d6fbd6ebfae601e5c7_s50)', + f'Node(http://bsfs.ai/schema/File, http://example.com/me/file#441f3d10c8ff489fe8e33e639606512f6c463151cc429de7e554b9af670c2ece) Predicate({ns.bse.preview}) Node(http://bsfs.ai/schema/Preview, http://example.com/me/preview#a8b3245636074d5370283b690281abda8ffdff12ce8b1af77c8bc0a4c85be860_s50)', + f'Node(http://bsfs.ai/schema/File, http://example.com/me/file#69b98ecf7aff3e95b09688ba93331678eb8397817111f674c9558e6dd8f5e871) Predicate({ns.bse.preview}) Node(http://bsfs.ai/schema/Preview, http://example.com/me/preview#968b9aa178585bc8d1fca0e4e32b8cf30b3941eff72f34e320584aaae8fd23ac_s50)', + f'Node(http://bsfs.ai/schema/File, http://example.com/me/file#78f7eb7f0d8221cdb2cb26c978fa42a11f75eb87becc768f4474134cb1e06926) Predicate({ns.bse.preview}) Node(http://bsfs.ai/schema/Preview, http://example.com/me/preview#dbfd8ebc0557c4925e9ff8411629a74a15eca934a4c2a6bd3134dd81d2f95a36_s50)', + f'Node(http://bsfs.ai/schema/File, http://example.com/me/file#80818b8ec2ee1919116dba9c8a7e0a4608313cf3b463cd88e9ed77a700dd92d3) Predicate({ns.bse.preview}) Node(http://bsfs.ai/schema/Preview, http://example.com/me/preview#567049149769e1d02e6af6cfee3991f7cf0cbc935cbf6a566047f40155fb13a8_s50)', + f'Node(http://bsfs.ai/schema/File, http://example.com/me/file#976d2ea0e58488678cc7e435fbfadabfb6eb6cf50ad51862f38f73729ed11795) Predicate({ns.bse.preview}) Node(http://bsfs.ai/schema/Preview, http://example.com/me/preview#df2185d8927ccef65c92fc90b94e800b02791354d8dede9dd9aa0e2c2cb1e91e_s50)', + f'Node(http://bsfs.ai/schema/File, http://example.com/me/file#997e2fbb7494a3818ec782d2bc87bf1cffafba6b9c0f658e4a6c18a723e944d3) Predicate({ns.bse.preview}) Node(http://bsfs.ai/schema/Preview, http://example.com/me/preview#9827509a74a60dfceed11936f7f624e9c932f66c8c0d20d355d56f8c3c9b56b1_s50)', + f'Node(http://bsfs.ai/schema/File, http://example.com/me/file#a8af899ecdab60dfaea8ec7f934053624c80a1054539e163f2c7eaa986c2777d) Predicate({ns.bse.preview}) Node(http://bsfs.ai/schema/Preview, http://example.com/me/preview#7a975385a110c21fcd12e238fab9501550fa02f6328749068a3bffd65e291027_s50)', + f'Node(http://bsfs.ai/schema/File, http://example.com/me/file#accb115d266ad60c53cd01a7f7130f245886ce8eaf69bc85319febc11d9fe089) Predicate({ns.bse.preview}) Node(http://bsfs.ai/schema/Preview, http://example.com/me/preview#2656e303d7218300326df73b64f312d8b37eb980358be27a38b5f63dae259be3_s50)', + f'Node(http://bsfs.ai/schema/File, http://example.com/me/file#b8fd7fba818254166a6043195004138ebda6923e012442f819a2c49671136c70) Predicate({ns.bse.preview}) Node(http://bsfs.ai/schema/Preview, http://example.com/me/preview#a63c84e647138a2b68113474212f6aee542b3707171ff178551db3c296e59817_s50)', + f'Node(http://bsfs.ai/schema/File, http://example.com/me/file#d43758ace82154a1cc10ca0dfef63cb20dd831f9c87edd6dc06539eefe67371d) Predicate({ns.bse.preview}) Node(http://bsfs.ai/schema/Preview, http://example.com/me/preview#26f16643b2570ac5b2d1f8c373d492cb724aae2dd8d71a0b63647838ed651254_s50)', + f'Node(http://bsfs.ai/schema/File, http://example.com/me/file#d803187cbf3676ae9d38126270a6152c60431589aa3bb3824baf8954e9c097f1) Predicate({ns.bse.preview}) Node(http://bsfs.ai/schema/Preview, http://example.com/me/preview#5d1235838c3d501204bb09c2de563d7e4a7fd17b7ec4ff302221c0e88c4741aa_s50)', + # preview dimensions + f'Node(http://bsfs.ai/schema/Preview, http://example.com/me/preview#2656e303d7218300326df73b64f312d8b37eb980358be27a38b5f63dae259be3_s50) Predicate({ns.bsp.height}) 33', + f'Node(http://bsfs.ai/schema/Preview, http://example.com/me/preview#2656e303d7218300326df73b64f312d8b37eb980358be27a38b5f63dae259be3_s50) Predicate({ns.bsp.width}) 50', + f'Node(http://bsfs.ai/schema/Preview, http://example.com/me/preview#26f16643b2570ac5b2d1f8c373d492cb724aae2dd8d71a0b63647838ed651254_s50) Predicate({ns.bsp.height}) 50', + f'Node(http://bsfs.ai/schema/Preview, http://example.com/me/preview#26f16643b2570ac5b2d1f8c373d492cb724aae2dd8d71a0b63647838ed651254_s50) Predicate({ns.bsp.width}) 36', + f'Node(http://bsfs.ai/schema/Preview, http://example.com/me/preview#567049149769e1d02e6af6cfee3991f7cf0cbc935cbf6a566047f40155fb13a8_s50) Predicate({ns.bsp.height}) 50', + f'Node(http://bsfs.ai/schema/Preview, http://example.com/me/preview#567049149769e1d02e6af6cfee3991f7cf0cbc935cbf6a566047f40155fb13a8_s50) Predicate({ns.bsp.width}) 36', + f'Node(http://bsfs.ai/schema/Preview, http://example.com/me/preview#5d1235838c3d501204bb09c2de563d7e4a7fd17b7ec4ff302221c0e88c4741aa_s50) Predicate({ns.bsp.height}) 50', + f'Node(http://bsfs.ai/schema/Preview, http://example.com/me/preview#5d1235838c3d501204bb09c2de563d7e4a7fd17b7ec4ff302221c0e88c4741aa_s50) Predicate({ns.bsp.width}) 36', + f'Node(http://bsfs.ai/schema/Preview, http://example.com/me/preview#79cb8a7e6369361a4f4cb7ff729c1ed3fcf87204769623d6fbd6ebfae601e5c7_s50) Predicate({ns.bsp.height}) 50', + f'Node(http://bsfs.ai/schema/Preview, http://example.com/me/preview#79cb8a7e6369361a4f4cb7ff729c1ed3fcf87204769623d6fbd6ebfae601e5c7_s50) Predicate({ns.bsp.width}) 36', + f'Node(http://bsfs.ai/schema/Preview, http://example.com/me/preview#7a975385a110c21fcd12e238fab9501550fa02f6328749068a3bffd65e291027_s50) Predicate({ns.bsp.height}) 50', + f'Node(http://bsfs.ai/schema/Preview, http://example.com/me/preview#7a975385a110c21fcd12e238fab9501550fa02f6328749068a3bffd65e291027_s50) Predicate({ns.bsp.width}) 36', + f'Node(http://bsfs.ai/schema/Preview, http://example.com/me/preview#968b9aa178585bc8d1fca0e4e32b8cf30b3941eff72f34e320584aaae8fd23ac_s50) Predicate({ns.bsp.height}) 50', + f'Node(http://bsfs.ai/schema/Preview, http://example.com/me/preview#968b9aa178585bc8d1fca0e4e32b8cf30b3941eff72f34e320584aaae8fd23ac_s50) Predicate({ns.bsp.width}) 36', + f'Node(http://bsfs.ai/schema/Preview, http://example.com/me/preview#9827509a74a60dfceed11936f7f624e9c932f66c8c0d20d355d56f8c3c9b56b1_s50) Predicate({ns.bsp.height}) 50', + f'Node(http://bsfs.ai/schema/Preview, http://example.com/me/preview#9827509a74a60dfceed11936f7f624e9c932f66c8c0d20d355d56f8c3c9b56b1_s50) Predicate({ns.bsp.width}) 36', + f'Node(http://bsfs.ai/schema/Preview, http://example.com/me/preview#a63c84e647138a2b68113474212f6aee542b3707171ff178551db3c296e59817_s50) Predicate({ns.bsp.height}) 50', + f'Node(http://bsfs.ai/schema/Preview, http://example.com/me/preview#a63c84e647138a2b68113474212f6aee542b3707171ff178551db3c296e59817_s50) Predicate({ns.bsp.width}) 36', + f'Node(http://bsfs.ai/schema/Preview, http://example.com/me/preview#a8b3245636074d5370283b690281abda8ffdff12ce8b1af77c8bc0a4c85be860_s50) Predicate({ns.bsp.height}) 50', + f'Node(http://bsfs.ai/schema/Preview, http://example.com/me/preview#a8b3245636074d5370283b690281abda8ffdff12ce8b1af77c8bc0a4c85be860_s50) Predicate({ns.bsp.width}) 36', + f'Node(http://bsfs.ai/schema/Preview, http://example.com/me/preview#dbfd8ebc0557c4925e9ff8411629a74a15eca934a4c2a6bd3134dd81d2f95a36_s50) Predicate({ns.bsp.height}) 50', + f'Node(http://bsfs.ai/schema/Preview, http://example.com/me/preview#dbfd8ebc0557c4925e9ff8411629a74a15eca934a4c2a6bd3134dd81d2f95a36_s50) Predicate({ns.bsp.width}) 36', + f'Node(http://bsfs.ai/schema/Preview, http://example.com/me/preview#df2185d8927ccef65c92fc90b94e800b02791354d8dede9dd9aa0e2c2cb1e91e_s50) Predicate({ns.bsp.height}) 50', + f'Node(http://bsfs.ai/schema/Preview, http://example.com/me/preview#df2185d8927ccef65c92fc90b94e800b02791354d8dede9dd9aa0e2c2cb1e91e_s50) Predicate({ns.bsp.width}) 36', + # assets + # ... (not checked) + })) ## main ## diff --git a/test/apps/test_info.py b/test/apps/test_info.py index 60e9ba1..725fb65 100644 --- a/test/apps/test_info.py +++ b/test/apps/test_info.py @@ -31,7 +31,11 @@ class TestIndex(unittest.TestCase): 'http://bsfs.ai/schema/Predicate', 'http://bsfs.ai/schema/Entity#filename', 'http://bsfs.ai/schema/Entity#filesize', - 'http://bsfs.ai/schema/Entity/colors_spatial#0658f2234a054e1dd59a14462c89f7733e019160419c796356aa831498bd0a04' + 'http://bsfs.ai/schema/Entity/colors_spatial#0658f2234a054e1dd59a14462c89f7733e019160419c796356aa831498bd0a04', + 'http://bsfs.ai/schema/Entity#preview', + 'http://bsfs.ai/schema/Preview#width', + 'http://bsfs.ai/schema/Preview#height', + 'http://bsfs.ai/schema/Preview#asset', }) def test_schema(self): @@ -46,7 +50,11 @@ class TestIndex(unittest.TestCase): 'http://bsfs.ai/schema/Predicate', 'http://bsfs.ai/schema/Entity#filename', 'http://bsfs.ai/schema/Entity#filesize', - 'http://bsfs.ai/schema/Entity/colors_spatial#0658f2234a054e1dd59a14462c89f7733e019160419c796356aa831498bd0a04' + 'http://bsfs.ai/schema/Entity/colors_spatial#0658f2234a054e1dd59a14462c89f7733e019160419c796356aa831498bd0a04', + 'http://bsfs.ai/schema/Entity#preview', + 'http://bsfs.ai/schema/Preview#width', + 'http://bsfs.ai/schema/Preview#height', + 'http://bsfs.ai/schema/Preview#asset', }) def test_invalid(self): diff --git a/test/extractor/test_preview.py b/test/extractor/test_preview.py new file mode 100644 index 0000000..10d2a7f --- /dev/null +++ b/test/extractor/test_preview.py @@ -0,0 +1,128 @@ +""" + +Part of the bsie test suite. +A copy of the license is provided with the project. +Author: Matthias Baumgartner, 2022 +""" +# standard imports +import io +import os +import unittest + +# external imports +import PIL.Image + +# bsie imports +from bsie.extractor import base +from bsie.utils import bsfs, node as _node, ns +from bsie.reader.preview import Preview as Reader + +# objects to test +from bsie.extractor.preview import Preview + + +## code ## + +class TestPreview(unittest.TestCase): + def test_eq(self): + # identical instances are equal + self.assertEqual(Preview([1,2,3]), Preview([1,2,3])) + self.assertEqual(hash(Preview([1,2,3])), hash(Preview([1,2,3]))) + # comparison respects max_sides + self.assertNotEqual(Preview([1,2,3]), Preview([1,2])) + self.assertNotEqual(hash(Preview([1,2,3])), hash(Preview([1,2]))) + self.assertNotEqual(Preview([1,2]), Preview([1,2,3])) + self.assertNotEqual(hash(Preview([1,2])), hash(Preview([1,2,3]))) + # comparison respects type + class Foo(): pass + self.assertNotEqual(Preview([1,2,3]), Foo()) + self.assertNotEqual(hash(Preview([1,2,3])), hash(Foo())) + self.assertNotEqual(Preview([1,2,3]), 123) + self.assertNotEqual(hash(Preview([1,2,3])), hash(123)) + self.assertNotEqual(Preview([1,2,3]), None) + self.assertNotEqual(hash(Preview([1,2,3])), hash(None)) + + def test_schema(self): + self.assertEqual(Preview([1,2,3]).schema, + bsfs.schema.from_string(base.SCHEMA_PREAMBLE + ''' + bsfs:Preview rdfs:subClassOf bsfs:Node . + bsfs:BinaryBlob rdfs:subClassOf bsfs:Literal . + bsfs:JPEG rdfs:subClassOf bsfs:BinaryBlob . + + bse:preview rdfs:subClassOf bsfs:Predicate ; + rdfs:domain bsfs:File ; + rdfs:range bsfs:Preview ; + bsfs:unique "false"^^xsd:boolean . + + bsp:width rdfs:subClassOf bsfs:Predicate ; + rdfs:domain bsfs:Preview ; + rdfs:range xsd:integer ; + bsfs:unique "true"^^xsd:boolean . + + bsp:height rdfs:subClassOf bsfs:Predicate ; + rdfs:domain bsfs:Preview ; + rdfs:range xsd:integer ; + bsfs:unique "true"^^xsd:boolean . + + bsp:asset rdfs:subClassOf bsfs:Predicate ; + rdfs:domain bsfs:Preview ; + rdfs:range bsfs:JPEG ; + bsfs:unique "true"^^xsd:boolean . + + ''')) + + def test_extract(self): + # setup dependents + rdr = Reader() + subject = _node.Node(ns.bsfs.File) + path = os.path.join(os.path.dirname(__file__), 'testimage.jpg') + + # setup extractor + ext = Preview(max_sides=[10]) + principals = set(ext.principals) + self.assertEqual(principals, {ext.schema.predicate(ns.bse.preview)}) + # skip unknown predicates + gen = rdr(path) + self.assertSetEqual(set(), set(ext.extract(subject, gen, + {ext.schema.predicate(ns.bsfs.Predicate).child(ns.bse.unknown)}))) + gen(10) # NOTE: consume some image to avoid resource error warning + # extract a preview + triples = set(ext.extract(subject, rdr(path), principals)) + thumbs = {node for node, _, _ in triples if node.node_type == ns.bsfs.Preview} + self.assertEqual(len(thumbs), 1) + thumb = list(thumbs)[0] + # test properties + self.assertTrue(triples.issuperset({ + (subject, ext.schema.predicate(ns.bse.preview), thumb), + (thumb, ext.schema.predicate(ns.bsp.width), 10), + (thumb, ext.schema.predicate(ns.bsp.height), 10), + })) + # test image data + rawdata = {val for _, pred, val in triples if pred == ext.schema.predicate(ns.bsp.asset)} + self.assertEqual(len(rawdata), 1) + data = io.BytesIO(list(rawdata)[0]) + data.seek(0) + img = PIL.Image.open(data) + self.assertEqual(img.size, (10, 10)) + self.assertEqual(sum(band for pix in img.getdata() for band in pix), 0) + + # setup extractor + ext = Preview(max_sides=[10, 20]) + principals = set(ext.principals) + self.assertEqual(principals, {ext.schema.predicate(ns.bse.preview)}) + # extract a preview + triples = set(ext.extract(subject, rdr(path), principals)) + thumbs = {node for node, _, _ in triples if node.node_type == ns.bsfs.Preview} + self.assertEqual(len(thumbs), 2) + self.assertSetEqual({10, 20}, { + value for _, pred, value in triples if pred == ext.schema.predicate(ns.bsp.width)}) + self.assertSetEqual({10, 20}, { + value for _, pred, value in triples if pred == ext.schema.predicate(ns.bsp.height)}) + + +## main ## + +if __name__ == '__main__': + unittest.main() + +## EOF ## diff --git a/test/extractor/testimage.jpg b/test/extractor/testimage.jpg new file mode 100644 index 0000000..4c2aca5 Binary files /dev/null and b/test/extractor/testimage.jpg differ diff --git a/test/lib/test_naming_policy.py b/test/lib/test_naming_policy.py index 763537b..4861c84 100644 --- a/test/lib/test_naming_policy.py +++ b/test/lib/test_naming_policy.py @@ -32,6 +32,10 @@ class TestDefaultNamingPolicy(unittest.TestCase): self.assertEqual(policy.handle_node( Node(ns.bsfs.File, ucid='abc123cba')).uri, URI('http://example.com/me/file#abc123cba')) + # processes bsfs:Preview + self.assertEqual(policy.handle_node( + Node(ns.bsfs.Preview, ucid='abc123cba', size=123)).uri, + URI('http://example.com/me/preview#abc123cba_s123')) # raises an exception on unknown types self.assertRaises(errors.ProgrammingError, policy.handle_node, Node(ns.bsfs.Entity, ucid='abc123cba', size=123)) @@ -47,6 +51,31 @@ class TestDefaultNamingPolicy(unittest.TestCase): self.assertTrue(policy.name_file( Node(ns.bsfs.File)).uri.startswith('http://example.com/me/file#')) + def test_name_preview(self): + # setup + policy = DefaultNamingPolicy('http://example.com', 'me') + # name_preview uses ucid + self.assertEqual(policy.name_preview( + Node(ns.bsfs.Preview, ucid='123abc321')).uri, + URI('http://example.com/me/preview#123abc321')) + self.assertEqual(policy.name_preview( + Node(ns.bsfs.Preview, ucid='123abc321', size=400)).uri, + URI('http://example.com/me/preview#123abc321_s400')) + # name_preview uses source + self.assertEqual(policy.name_preview( + Node(ns.bsfs.Preview, source=Node(ns.bsfs.File, ucid='123file321'))).uri, + URI('http://example.com/me/preview#123file321')) + self.assertEqual(policy.name_preview( + Node(ns.bsfs.Preview, source=Node(ns.bsfs.File, ucid='123file321'), size=300)).uri, + URI('http://example.com/me/preview#123file321_s300')) + # name_preview falls back to a random guid + self.assertTrue(policy.name_preview( + Node(ns.bsfs.Preview)).uri.startswith('http://example.com/me/preview#')) + self.assertTrue(policy.name_preview( + Node(ns.bsfs.Preview, size=200)).uri.startswith('http://example.com/me/preview#')) + self.assertTrue(policy.name_preview( + Node(ns.bsfs.Preview, size=200)).uri.endswith('_s200')) + class TestNamingPolicyIterator(unittest.TestCase): @@ -71,10 +100,15 @@ class TestNamingPolicyIterator(unittest.TestCase): policy = DefaultNamingPolicy('http://example.com', 'me') triples = [ (Node(ns.bsfs.File, ucid='foo'), 'predA', 'hello'), + (Node(ns.bsfs.Preview, ucid='bar'), 'predB', 1234), + (Node(ns.bsfs.Preview, ucid='hello'), 'predC', Node(ns.bsfs.File, ucid='world')) ] # handles nodes, handles values, ignores predicate self.assertListEqual(list(policy(triples)), [ (Node(ns.bsfs.File, uri='http://example.com/me/file#foo'), 'predA', 'hello'), + (Node(ns.bsfs.Preview, uri='http://example.com/me/preview#bar'), 'predB', 1234), + (Node(ns.bsfs.Preview, uri='http://example.com/me/preview#hello'), 'predC', + Node(ns.bsfs.File, uri='http://example.com/me/file#world')), ]) -- cgit v1.2.3 From 0d0144466919cfb168e75c2af26d5cb74e10bfa0 Mon Sep 17 00:00:00 2001 From: Matthias Baumgartner Date: Wed, 8 Feb 2023 19:24:37 +0100 Subject: minor cleanup --- bsie/extractor/image/colors_spatial.py | 2 +- bsie/reader/chain.py | 11 +++++++---- bsie/reader/image/__init__.py | 1 - bsie/reader/image/_pillow.py | 2 +- bsie/reader/image/_raw.py | 6 +++--- 5 files changed, 12 insertions(+), 10 deletions(-) diff --git a/bsie/extractor/image/colors_spatial.py b/bsie/extractor/image/colors_spatial.py index ce5b9f2..15fd281 100644 --- a/bsie/extractor/image/colors_spatial.py +++ b/bsie/extractor/image/colors_spatial.py @@ -120,7 +120,7 @@ class ColorsSpatial(base.Extractor): def extract( self, subject: node.Node, - content: PIL.Image, + content: PIL.Image.Image, principals: typing.Iterable[bsfs.schema.Predicate], ) -> typing.Iterator[typing.Tuple[node.Node, bsfs.schema.Predicate, typing.Any]]: # check principals diff --git a/bsie/reader/chain.py b/bsie/reader/chain.py index 5e9e0d5..1dbc52b 100644 --- a/bsie/reader/chain.py +++ b/bsie/reader/chain.py @@ -73,16 +73,19 @@ class ReaderChain(base.Reader, typing.Generic[T_CONTENT]): return hash((super().__hash__(), self._children)) def __call__(self, path: str) -> T_CONTENT: - raise_error = errors.UnsupportedFileFormatError + raise_error = False for child in self._children: try: return child(path) except errors.UnsupportedFileFormatError: + # child cannot read the file, skip. pass except errors.ReaderError: - # child cannot read the file, skip. - raise_error = errors.ReaderError # type: ignore [assignment] # mypy is confused + # child failed to read the file, skip. + raise_error = True - raise raise_error(path) + if raise_error: + raise errors.ReaderError(path) + raise errors.UnsupportedFileFormatError(path) ## EOF ## diff --git a/bsie/reader/image/__init__.py b/bsie/reader/image/__init__.py index 1f290b5..c5d2a2a 100644 --- a/bsie/reader/image/__init__.py +++ b/bsie/reader/image/__init__.py @@ -27,7 +27,6 @@ __all__: typing.Sequence[str] = ( ## code ## -# FIXME: Check if PIL.Image or PIL.Image.Image, or if version-dependent class Image(chain.ReaderChain[PIL.Image.Image]): # pylint: disable=too-few-public-methods """Read an image file.""" diff --git a/bsie/reader/image/_pillow.py b/bsie/reader/image/_pillow.py index 3144509..5b2bdf2 100644 --- a/bsie/reader/image/_pillow.py +++ b/bsie/reader/image/_pillow.py @@ -27,7 +27,7 @@ __all__: typing.Sequence[str] = ( class PillowImage(base.Reader): """Use PIL to read content of a variety of image file types.""" - def __call__(self, path: str) -> PIL.Image: + def __call__(self, path: str) -> PIL.Image.Image: try: # open file with PIL return PIL.Image.open(path) diff --git a/bsie/reader/image/_raw.py b/bsie/reader/image/_raw.py index cd60453..257fdb3 100644 --- a/bsie/reader/image/_raw.py +++ b/bsie/reader/image/_raw.py @@ -32,17 +32,17 @@ class RawImage(base.Reader): """Use rawpy to read content of raw image file types.""" # file matcher - match: filematcher.Matcher + _match: filematcher.Matcher # additional kwargs to rawpy's postprocess - rawpy_kwargs: typing.Dict[str, typing.Any] + _rawpy_kwargs: typing.Dict[str, typing.Any] def __init__(self, **rawpy_kwargs): match_rule = rawpy_kwargs.pop('file_match_rule', MATCH_RULE) self._match = filematcher.parse(match_rule) self._rawpy_kwargs = rawpy_kwargs - def __call__(self, path: str) -> PIL.Image: + def __call__(self, path: str) -> PIL.Image.Image: # perform quick checks first if not self._match(path): raise errors.UnsupportedFileFormatError(path) -- cgit v1.2.3 From 02cd75f31120a766a35fc0ae00f8d0711c1c0ae9 Mon Sep 17 00:00:00 2001 From: Matthias Baumgartner Date: Wed, 1 Mar 2023 17:04:57 +0100 Subject: schema fixes --- bsie/extractor/base.py | 1 + bsie/extractor/generic/path.py | 2 +- bsie/extractor/generic/stat.py | 2 +- test/extractor/generic/test_path.py | 2 +- test/extractor/generic/test_stat.py | 2 +- test/lib/test_bsie.py | 12 ++++++------ 6 files changed, 11 insertions(+), 10 deletions(-) diff --git a/bsie/extractor/base.py b/bsie/extractor/base.py index 89183f9..d8b86a5 100644 --- a/bsie/extractor/base.py +++ b/bsie/extractor/base.py @@ -48,6 +48,7 @@ SCHEMA_PREAMBLE = ''' # common definitions xsd:string rdfs:subClassOf bsfs:Literal . xsd:integer rdfs:subClassOf bsfs:Number . + xsd:float rdfs:subClassOf bsfs:Number . ''' diff --git a/bsie/extractor/generic/path.py b/bsie/extractor/generic/path.py index c984515..cd8cec9 100644 --- a/bsie/extractor/generic/path.py +++ b/bsie/extractor/generic/path.py @@ -35,7 +35,7 @@ class Path(base.Extractor): rdfs:range xsd:string ; rdfs:label "File name"^^xsd:string ; schema:description "Filename of entity in some filesystem."^^xsd:string ; - bsfs:unique "false"^^xsd:boolean . + bsfs:unique "true"^^xsd:boolean . ''')) self._callmap = { self.schema.predicate(ns.bse.filename): self.__filename, diff --git a/bsie/extractor/generic/stat.py b/bsie/extractor/generic/stat.py index 9394456..f35f8e1 100644 --- a/bsie/extractor/generic/stat.py +++ b/bsie/extractor/generic/stat.py @@ -37,7 +37,7 @@ class Stat(base.Extractor): rdfs:range xsd:integer ; rdfs:label "File size"^^xsd:string ; schema:description "File size of entity in some filesystem."^^xsd:string ; - bsfs:unique "false"^^xsd:boolean . + bsfs:unique "true"^^xsd:boolean . ''')) self._callmap = { self.schema.predicate(ns.bse.filesize): self.__filesize, diff --git a/test/extractor/generic/test_path.py b/test/extractor/generic/test_path.py index ae68686..5568de7 100644 --- a/test/extractor/generic/test_path.py +++ b/test/extractor/generic/test_path.py @@ -33,7 +33,7 @@ class TestPath(unittest.TestCase): bse:filename rdfs:subClassOf bsfs:Predicate ; rdfs:domain bsfs:File ; rdfs:range xsd:string ; - bsfs:unique "false"^^xsd:boolean . + bsfs:unique "true"^^xsd:boolean . ''')) def test_extract(self): diff --git a/test/extractor/generic/test_stat.py b/test/extractor/generic/test_stat.py index e5562d1..f543386 100644 --- a/test/extractor/generic/test_stat.py +++ b/test/extractor/generic/test_stat.py @@ -34,7 +34,7 @@ class TestStat(unittest.TestCase): bse:filesize rdfs:subClassOf bsfs:Predicate ; rdfs:domain bsfs:File ; rdfs:range xsd:integer ; - bsfs:unique "false"^^xsd:boolean . + bsfs:unique "true"^^xsd:boolean . ''')) def test_extract(self): diff --git a/test/lib/test_bsie.py b/test/lib/test_bsie.py index ae23c4b..2ed9e10 100644 --- a/test/lib/test_bsie.py +++ b/test/lib/test_bsie.py @@ -56,12 +56,12 @@ class TestBSIE(unittest.TestCase): bse:filename rdfs:subClassOf bsfs:Predicate ; rdfs:domain bsfs:File ; rdfs:range xsd:string ; - bsfs:unique "false"^^xsd:boolean . + bsfs:unique "true"^^xsd:boolean . bse:filesize rdfs:subClassOf bsfs:Predicate ; rdfs:domain bsfs:File ; rdfs:range xsd:integer; - bsfs:unique "false"^^xsd:boolean . + bsfs:unique "true"^^xsd:boolean . bse:author rdfs:subClassOf bsfs:Predicate ; rdfs:domain bsfs:Entity ; @@ -83,7 +83,7 @@ class TestBSIE(unittest.TestCase): bse:filesize rdfs:subClassOf bsfs:Predicate ; rdfs:domain bsfs:File ; rdfs:range xsd:integer; - bsfs:unique "false"^^xsd:boolean . + bsfs:unique "true"^^xsd:boolean . bse:author rdfs:subClassOf bsfs:Predicate ; rdfs:domain bsfs:Entity ; @@ -101,12 +101,12 @@ class TestBSIE(unittest.TestCase): bse:filename rdfs:subClassOf bsfs:Predicate ; rdfs:domain bsfs:File ; rdfs:range xsd:string ; - bsfs:unique "false"^^xsd:boolean . + bsfs:unique "true"^^xsd:boolean . bse:filesize rdfs:subClassOf bsfs:Predicate ; rdfs:domain bsfs:File ; rdfs:range xsd:integer; - bsfs:unique "false"^^xsd:boolean . + bsfs:unique "true"^^xsd:boolean . bse:author rdfs:subClassOf bsfs:Predicate ; rdfs:domain bsfs:Entity ; @@ -143,7 +143,7 @@ class TestBSIE(unittest.TestCase): bse:filesize rdfs:subClassOf bsfs:Predicate ; rdfs:domain bsfs:File ; rdfs:range xsd:integer; - bsfs:unique "false"^^xsd:boolean . + bsfs:unique "true"^^xsd:boolean . ''')) -- cgit v1.2.3 From 464cc6cb54f55f6255bf0a485533c181d6018303 Mon Sep 17 00:00:00 2001 From: Matthias Baumgartner Date: Wed, 1 Mar 2023 17:07:06 +0100 Subject: load config from file --- bsie/apps/_loader.py | 52 +++++++++++++++++++++++++ bsie/apps/default_config.yaml | 17 +++++++++ bsie/apps/index.py | 44 ++++------------------ bsie/apps/info.py | 43 ++++----------------- test/apps/test_index.py | 64 ++++++++++++++++++------------- test/apps/test_info.py | 44 ++++++++++++++++++++-- test/apps/test_loader.py | 88 +++++++++++++++++++++++++++++++++++++++++++ 7 files changed, 251 insertions(+), 101 deletions(-) create mode 100644 bsie/apps/_loader.py create mode 100644 bsie/apps/default_config.yaml create mode 100644 test/apps/test_loader.py diff --git a/bsie/apps/_loader.py b/bsie/apps/_loader.py new file mode 100644 index 0000000..e02bed5 --- /dev/null +++ b/bsie/apps/_loader.py @@ -0,0 +1,52 @@ +""" + +Part of the bsie module. +A copy of the license is provided with the project. +Author: Matthias Baumgartner, 2022 +""" +# standard imports +import typing + +# external imports +import yaml + +# bsie imports +from bsie.extractor import ExtractorBuilder +from bsie.lib import PipelineBuilder +from bsie.lib.pipeline import Pipeline +from bsie.reader import ReaderBuilder + +# constants +DEFAULT_CONFIG_FILE = 'default_config.yaml' + +# exports +__all__: typing.Sequence[str] = ( + 'load', + 'DEFAULT_CONFIG_FILE', + ) + + +## code ## + +def load_pipeline(path: str) -> Pipeline: + """Load a pipeline according to a config at *path*.""" + # load config file + with open(path, 'rt') as ifile: + cfg = yaml.safe_load(ifile) + + # reader builder + rbuild = ReaderBuilder(cfg['ReaderBuilder']) + # extractor builder + ebuild = ExtractorBuilder(cfg['ExtractorBuilder']) + # pipeline builder + pbuild = PipelineBuilder( + rbuild, + ebuild, + ) + # build pipeline + pipeline = pbuild.build() + + # return pipeline + return pipeline + +## EOF ## diff --git a/bsie/apps/default_config.yaml b/bsie/apps/default_config.yaml new file mode 100644 index 0000000..4d99e22 --- /dev/null +++ b/bsie/apps/default_config.yaml @@ -0,0 +1,17 @@ + +ReaderBuilder: {} + +ExtractorBuilder: + + - bsie.extractor.preview.Preview: + max_sides: [50, 100, 200,400] + + - bsie.extractor.generic.path.Path: {} + + - bsie.extractor.generic.stat.Stat: {} + + - bsie.extractor.image.colors_spatial.ColorsSpatial: + width: 2 + height: 2 + exp: 2 + diff --git a/bsie/apps/index.py b/bsie/apps/index.py index 8798c49..2d147c9 100644 --- a/bsie/apps/index.py +++ b/bsie/apps/index.py @@ -10,11 +10,12 @@ import os import typing # bsie imports -from bsie.extractor import ExtractorBuilder -from bsie.lib import BSIE, PipelineBuilder, DefaultNamingPolicy -from bsie.reader import ReaderBuilder +from bsie.lib import BSIE, DefaultNamingPolicy from bsie.utils import bsfs, errors, node as node_ +# inner-module imports +from . import _loader + # exports __all__: typing.Sequence[str] = ( 'main', @@ -26,6 +27,9 @@ __all__: typing.Sequence[str] = ( def main(argv): """Index files or directories into BSFS.""" parser = argparse.ArgumentParser(description=main.__doc__, prog='index') + parser.add_argument('--config', type=str, + default=os.path.join(os.path.dirname(__file__), _loader.DEFAULT_CONFIG_FILE), + help='Path to the config file.') parser.add_argument('--host', type=bsfs.URI, default=bsfs.URI('http://example.com'), help='') parser.add_argument('--user', type=str, default='me', @@ -44,39 +48,8 @@ def main(argv): help='') args = parser.parse_args(argv) - # FIXME: Read reader/extractor configs from a config file - # reader builder - rbuild = ReaderBuilder() - # extractor builder - ebuild = ExtractorBuilder([ - {'bsie.extractor.preview.Preview': { - 'max_sides': [50], - }}, - {'bsie.extractor.generic.path.Path': {}}, - {'bsie.extractor.generic.stat.Stat': {}}, - {'bsie.extractor.generic.constant.Constant': dict( - tuples=[('http://bsfs.ai/schema/Entity#author', 'Me, myself, and I')], - schema=''' - bse:author rdfs:subClassOf bsfs:Predicate ; - rdfs:domain bsfs:Entity ; - rdfs:range xsd:string ; - bsfs:unique "true"^^xsd:boolean . - ''', - )}, - {'bsie.extractor.image.colors_spatial.ColorsSpatial': { - 'width': 2, - 'height': 2, - 'exp': 2, - }}, - ]) - # pipeline builder - pbuild = PipelineBuilder( - rbuild, - ebuild, - ) - # build pipeline - pipeline = pbuild.build() + pipeline = _loader.load_pipeline(args.config) # build the naming policy naming_policy = DefaultNamingPolicy( host=args.host, @@ -127,7 +100,6 @@ def main(argv): return store - ## main ## if __name__ == '__main__': diff --git a/bsie/apps/info.py b/bsie/apps/info.py index 750aedc..363ab30 100644 --- a/bsie/apps/info.py +++ b/bsie/apps/info.py @@ -6,15 +6,16 @@ Author: Matthias Baumgartner, 2022 """ # standard imports import argparse +import os import sys import typing # bsie imports -from bsie.extractor import ExtractorBuilder -from bsie.lib import PipelineBuilder -from bsie.reader import ReaderBuilder from bsie.utils import bsfs, errors +# inner-module imports +from . import _loader + # exports __all__: typing.Sequence[str] = ( 'main', @@ -26,43 +27,15 @@ __all__: typing.Sequence[str] = ( def main(argv): """Show information from BSIE.""" parser = argparse.ArgumentParser(description=main.__doc__, prog='info') + parser.add_argument('--config', type=str, + default=os.path.join(os.path.dirname(__file__), _loader.DEFAULT_CONFIG_FILE), + help='Path to the config file.') parser.add_argument('what', choices=('predicates', 'schema'), help='Select what information to show.') args = parser.parse_args(argv) - # FIXME: Read reader/extractor configs from a config file - # reader builder - rbuild = ReaderBuilder() - # extractor builder - ebuild = ExtractorBuilder([ - {'bsie.extractor.preview.Preview': { - 'max_sides': [50, 200], - }}, - {'bsie.extractor.generic.path.Path': {}}, - {'bsie.extractor.generic.stat.Stat': {}}, - {'bsie.extractor.generic.constant.Constant': dict( - tuples=[('http://bsfs.ai/schema/Entity#author', 'Me, myself, and I')], - schema=''' - bse:author rdfs:subClassOf bsfs:Predicate ; - rdfs:domain bsfs:Entity ; - rdfs:range xsd:string ; - bsfs:unique "true"^^xsd:boolean . - ''', - )}, - {'bsie.extractor.image.colors_spatial.ColorsSpatial': { - 'width': 2, - 'height': 2, - 'exp': 2, - }}, - ]) - # pipeline builder - pbuild = PipelineBuilder( - rbuild, - ebuild, - ) - # build pipeline - pipeline = pbuild.build() + pipeline = _loader.load_pipeline(args.config) # show info if args.what == 'predicates': diff --git a/test/apps/test_index.py b/test/apps/test_index.py index d1e7140..a877684 100644 --- a/test/apps/test_index.py +++ b/test/apps/test_index.py @@ -8,10 +8,12 @@ Author: Matthias Baumgartner, 2022 import contextlib import io import os +import tempfile import unittest # external imports import rdflib +import yaml # bsie imports from bsie.utils import ns @@ -23,17 +25,52 @@ from bsie.apps.index import main ## code ## class TestIndex(unittest.TestCase): + def setUp(self): + config = { + 'ReaderBuilder': {}, + 'ExtractorBuilder': [ + {'bsie.extractor.preview.Preview': { + 'max_sides': [50], + }}, + {'bsie.extractor.generic.path.Path': {}}, + {'bsie.extractor.generic.constant.Constant': { + 'schema': ''' + bse:author rdfs:subClassOf bsfs:Predicate ; + rdfs:domain bsfs:Entity ; + rdfs:range xsd:string ; + bsfs:unique "true"^^xsd:boolean . + ''', + 'tuples': [['http://bsfs.ai/schema/Entity#author', 'Me, myself, and I']], + }}, + {'bsie.extractor.image.colors_spatial.ColorsSpatial': { + 'width': 2, + 'height': 2, + 'exp': 2, + }}, + ] + } + # create config file + _, self.config_path = tempfile.mkstemp(prefix='bsie-test-', suffix='.yaml') + with open(self.config_path, 'wt') as cfile: + yaml.dump(config, cfile) + + def tearDown(self): + if os.path.exists(self.config_path): + os.unlink(self.config_path) + def test_disclaimer(self): print('Please wait, this test will take about 25 seconds') def test_main_invalid(self): outbuf = io.StringIO() with contextlib.redirect_stdout(outbuf): - bsfs = main([os.path.join(os.path.dirname(__file__), 'inexistent-file.t')]) + bsfs = main(['--config', self.config_path, os.path.join(os.path.dirname(__file__), 'inexistent-file.t')]) self.assertEqual(outbuf.getvalue().strip(), '') def test_main(self): bsfs = main([ + '--config', + self.config_path, '-r', '--host', 'http://example.com', '--user', 'me', @@ -48,57 +85,44 @@ class TestIndex(unittest.TestCase): (rdflib.URIRef(pre_file + '2f4109b40107cc50e0884755a1a961ed126887e49b8dbaf0e146b2e226aa6647'), rdflib.RDF.type, rdflib.URIRef(ns.bsfs.File)), (rdflib.URIRef(pre_file + '2f4109b40107cc50e0884755a1a961ed126887e49b8dbaf0e146b2e226aa6647'), rdflib.URIRef(ns.bse.author), rdflib.Literal('Me, myself, and I', datatype=rdflib.XSD.string)), (rdflib.URIRef(pre_file + '2f4109b40107cc50e0884755a1a961ed126887e49b8dbaf0e146b2e226aa6647'), rdflib.URIRef(ns.bse.filename), rdflib.Literal('alpha_second', datatype=rdflib.XSD.string)), - (rdflib.URIRef(pre_file + '2f4109b40107cc50e0884755a1a961ed126887e49b8dbaf0e146b2e226aa6647'), rdflib.URIRef(ns.bse.filesize), rdflib.Literal('696', datatype=rdflib.XSD.integer)), (rdflib.URIRef(pre_file + '441f3d10c8ff489fe8e33e639606512f6c463151cc429de7e554b9af670c2ece'), rdflib.RDF.type, rdflib.URIRef(ns.bsfs.File)), (rdflib.URIRef(pre_file + '441f3d10c8ff489fe8e33e639606512f6c463151cc429de7e554b9af670c2ece'), rdflib.URIRef(ns.bse.author), rdflib.Literal('Me, myself, and I', datatype=rdflib.XSD.string)), (rdflib.URIRef(pre_file + '441f3d10c8ff489fe8e33e639606512f6c463151cc429de7e554b9af670c2ece'), rdflib.URIRef(ns.bse.filename), rdflib.Literal('omega_second', datatype=rdflib.XSD.string)), - (rdflib.URIRef(pre_file + '441f3d10c8ff489fe8e33e639606512f6c463151cc429de7e554b9af670c2ece'), rdflib.URIRef(ns.bse.filesize), rdflib.Literal('503', datatype=rdflib.XSD.integer)), (rdflib.URIRef(pre_file + '69b98ecf7aff3e95b09688ba93331678eb8397817111f674c9558e6dd8f5e871'), rdflib.RDF.type, rdflib.URIRef(ns.bsfs.File)), (rdflib.URIRef(pre_file + '69b98ecf7aff3e95b09688ba93331678eb8397817111f674c9558e6dd8f5e871'), rdflib.URIRef(ns.bse.author), rdflib.Literal('Me, myself, and I', datatype=rdflib.XSD.string)), (rdflib.URIRef(pre_file + '69b98ecf7aff3e95b09688ba93331678eb8397817111f674c9558e6dd8f5e871'), rdflib.URIRef(ns.bse.filename), rdflib.Literal('td_first', datatype=rdflib.XSD.string)), - (rdflib.URIRef(pre_file + '69b98ecf7aff3e95b09688ba93331678eb8397817111f674c9558e6dd8f5e871'), rdflib.URIRef(ns.bse.filesize), rdflib.Literal('911', datatype=rdflib.XSD.integer)), (rdflib.URIRef(pre_file + '78f7eb7f0d8221cdb2cb26c978fa42a11f75eb87becc768f4474134cb1e06926'), rdflib.RDF.type, rdflib.URIRef(ns.bsfs.File)), (rdflib.URIRef(pre_file + '78f7eb7f0d8221cdb2cb26c978fa42a11f75eb87becc768f4474134cb1e06926'), rdflib.URIRef(ns.bse.author), rdflib.Literal('Me, myself, and I', datatype=rdflib.XSD.string)), (rdflib.URIRef(pre_file + '78f7eb7f0d8221cdb2cb26c978fa42a11f75eb87becc768f4474134cb1e06926'), rdflib.URIRef(ns.bse.filename), rdflib.Literal('testfile', datatype=rdflib.XSD.string)), - (rdflib.URIRef(pre_file + '78f7eb7f0d8221cdb2cb26c978fa42a11f75eb87becc768f4474134cb1e06926'), rdflib.URIRef(ns.bse.filesize), rdflib.Literal('885', datatype=rdflib.XSD.integer)), (rdflib.URIRef(pre_file + '80818b8ec2ee1919116dba9c8a7e0a4608313cf3b463cd88e9ed77a700dd92d3'), rdflib.RDF.type, rdflib.URIRef(ns.bsfs.File)), (rdflib.URIRef(pre_file + '80818b8ec2ee1919116dba9c8a7e0a4608313cf3b463cd88e9ed77a700dd92d3'), rdflib.URIRef(ns.bse.author), rdflib.Literal('Me, myself, and I', datatype=rdflib.XSD.string)), (rdflib.URIRef(pre_file + '80818b8ec2ee1919116dba9c8a7e0a4608313cf3b463cd88e9ed77a700dd92d3'), rdflib.URIRef(ns.bse.filename), rdflib.Literal('bar_first', datatype=rdflib.XSD.string)), - (rdflib.URIRef(pre_file + '80818b8ec2ee1919116dba9c8a7e0a4608313cf3b463cd88e9ed77a700dd92d3'), rdflib.URIRef(ns.bse.filesize), rdflib.Literal('956', datatype=rdflib.XSD.integer)), (rdflib.URIRef(pre_file + '976d2ea0e58488678cc7e435fbfadabfb6eb6cf50ad51862f38f73729ed11795'), rdflib.RDF.type, rdflib.URIRef(ns.bsfs.File)), (rdflib.URIRef(pre_file + '976d2ea0e58488678cc7e435fbfadabfb6eb6cf50ad51862f38f73729ed11795'), rdflib.URIRef(ns.bse.author), rdflib.Literal('Me, myself, and I', datatype=rdflib.XSD.string)), (rdflib.URIRef(pre_file + '976d2ea0e58488678cc7e435fbfadabfb6eb6cf50ad51862f38f73729ed11795'), rdflib.URIRef(ns.bse.filename), rdflib.Literal('omega_first', datatype=rdflib.XSD.string)), - (rdflib.URIRef(pre_file + '976d2ea0e58488678cc7e435fbfadabfb6eb6cf50ad51862f38f73729ed11795'), rdflib.URIRef(ns.bse.filesize), rdflib.Literal('648', datatype=rdflib.XSD.integer)), (rdflib.URIRef(pre_file + '997e2fbb7494a3818ec782d2bc87bf1cffafba6b9c0f658e4a6c18a723e944d3'), rdflib.RDF.type, rdflib.URIRef(ns.bsfs.File)), (rdflib.URIRef(pre_file + '997e2fbb7494a3818ec782d2bc87bf1cffafba6b9c0f658e4a6c18a723e944d3'), rdflib.URIRef(ns.bse.author), rdflib.Literal('Me, myself, and I', datatype=rdflib.XSD.string)), (rdflib.URIRef(pre_file + '997e2fbb7494a3818ec782d2bc87bf1cffafba6b9c0f658e4a6c18a723e944d3'), rdflib.URIRef(ns.bse.filename), rdflib.Literal('alpha_first', datatype=rdflib.XSD.string)), - (rdflib.URIRef(pre_file + '997e2fbb7494a3818ec782d2bc87bf1cffafba6b9c0f658e4a6c18a723e944d3'), rdflib.URIRef(ns.bse.filesize), rdflib.Literal('754', datatype=rdflib.XSD.integer)), (rdflib.URIRef(pre_file + 'a8af899ecdab60dfaea8ec7f934053624c80a1054539e163f2c7eaa986c2777d'), rdflib.RDF.type, rdflib.URIRef(ns.bsfs.File)), (rdflib.URIRef(pre_file + 'a8af899ecdab60dfaea8ec7f934053624c80a1054539e163f2c7eaa986c2777d'), rdflib.URIRef(ns.bse.author), rdflib.Literal('Me, myself, and I', datatype=rdflib.XSD.string)), (rdflib.URIRef(pre_file + 'a8af899ecdab60dfaea8ec7f934053624c80a1054539e163f2c7eaa986c2777d'), rdflib.URIRef(ns.bse.filename), rdflib.Literal('foo_second', datatype=rdflib.XSD.string)), - (rdflib.URIRef(pre_file + 'a8af899ecdab60dfaea8ec7f934053624c80a1054539e163f2c7eaa986c2777d'), rdflib.URIRef(ns.bse.filesize), rdflib.Literal('585', datatype=rdflib.XSD.integer)), (rdflib.URIRef(pre_file + 'b8fd7fba818254166a6043195004138ebda6923e012442f819a2c49671136c70'), rdflib.RDF.type, rdflib.URIRef(ns.bsfs.File)), (rdflib.URIRef(pre_file + 'b8fd7fba818254166a6043195004138ebda6923e012442f819a2c49671136c70'), rdflib.URIRef(ns.bse.author), rdflib.Literal('Me, myself, and I', datatype=rdflib.XSD.string)), (rdflib.URIRef(pre_file + 'b8fd7fba818254166a6043195004138ebda6923e012442f819a2c49671136c70'), rdflib.URIRef(ns.bse.filename), rdflib.Literal('bar_second', datatype=rdflib.XSD.string)), - (rdflib.URIRef(pre_file + 'b8fd7fba818254166a6043195004138ebda6923e012442f819a2c49671136c70'), rdflib.URIRef(ns.bse.filesize), rdflib.Literal('636', datatype=rdflib.XSD.integer)), (rdflib.URIRef(pre_file + 'd43758ace82154a1cc10ca0dfef63cb20dd831f9c87edd6dc06539eefe67371d'), rdflib.RDF.type, rdflib.URIRef(ns.bsfs.File)), (rdflib.URIRef(pre_file + 'd43758ace82154a1cc10ca0dfef63cb20dd831f9c87edd6dc06539eefe67371d'), rdflib.URIRef(ns.bse.author), rdflib.Literal('Me, myself, and I', datatype=rdflib.XSD.string)), (rdflib.URIRef(pre_file + 'd43758ace82154a1cc10ca0dfef63cb20dd831f9c87edd6dc06539eefe67371d'), rdflib.URIRef(ns.bse.filename), rdflib.Literal('foo_first', datatype=rdflib.XSD.string)), - (rdflib.URIRef(pre_file + 'd43758ace82154a1cc10ca0dfef63cb20dd831f9c87edd6dc06539eefe67371d'), rdflib.URIRef(ns.bse.filesize), rdflib.Literal('546', datatype=rdflib.XSD.integer)), (rdflib.URIRef(pre_file + 'd803187cbf3676ae9d38126270a6152c60431589aa3bb3824baf8954e9c097f1'), rdflib.RDF.type, rdflib.URIRef(ns.bsfs.File)), (rdflib.URIRef(pre_file + 'd803187cbf3676ae9d38126270a6152c60431589aa3bb3824baf8954e9c097f1'), rdflib.URIRef(ns.bse.author), rdflib.Literal('Me, myself, and I', datatype=rdflib.XSD.string)), (rdflib.URIRef(pre_file + 'd803187cbf3676ae9d38126270a6152c60431589aa3bb3824baf8954e9c097f1'), rdflib.URIRef(ns.bse.filename), rdflib.Literal('td_second', datatype=rdflib.XSD.string)), - (rdflib.URIRef(pre_file + 'd803187cbf3676ae9d38126270a6152c60431589aa3bb3824baf8954e9c097f1'), rdflib.URIRef(ns.bse.filesize), rdflib.Literal('703', datatype=rdflib.XSD.integer)), (rdflib.URIRef(pre_file + 'accb115d266ad60c53cd01a7f7130f245886ce8eaf69bc85319febc11d9fe089'), rdflib.RDF.type, rdflib.URIRef(ns.bsfs.File)), (rdflib.URIRef(pre_file + 'accb115d266ad60c53cd01a7f7130f245886ce8eaf69bc85319febc11d9fe089'), rdflib.URIRef(ns.bse.author), rdflib.Literal('Me, myself, and I', datatype=rdflib.XSD.string)), (rdflib.URIRef(pre_file + 'accb115d266ad60c53cd01a7f7130f245886ce8eaf69bc85319febc11d9fe089'), rdflib.URIRef(ns.bse.filename), rdflib.Literal('testimage.jpg', datatype=rdflib.XSD.string)), - (rdflib.URIRef(pre_file + 'accb115d266ad60c53cd01a7f7130f245886ce8eaf69bc85319febc11d9fe089'), rdflib.URIRef(ns.bse.filesize), rdflib.Literal('349264', datatype=rdflib.XSD.integer)), # features (rdflib.URIRef(pre_file + 'accb115d266ad60c53cd01a7f7130f245886ce8eaf69bc85319febc11d9fe089'), rdflib.URIRef('http://bsfs.ai/schema/Entity/colors_spatial#0658f2234a054e1dd59a14462c89f7733e019160419c796356aa831498bd0a04'), rdflib.Literal( '(91, 127, 121, 94, 138, 167, 163, 134, 190, 138, 170, 156, 121, 142, 159)', datatype=rdflib.URIRef('http://ie.bsfs.ai/schema/Feature/ColorsSpatial#0658f2234a054e1dd59a14462c89f7733e019160419c796356aa831498bd0a04'))), - (rdflib.URIRef(pre_file + 'accb115d266ad60c53cd01a7f7130f245886ce8eaf69bc85319febc11d9fe089'), rdflib.URIRef(ns.bse.filesize), rdflib.Literal('349264', datatype=rdflib.XSD.integer)), # links to previews (rdflib.URIRef(pre_file + '2f4109b40107cc50e0884755a1a961ed126887e49b8dbaf0e146b2e226aa6647'), rdflib.URIRef(ns.bse.preview), rdflib.URIRef(pre_preview + '79cb8a7e6369361a4f4cb7ff729c1ed3fcf87204769623d6fbd6ebfae601e5c7_s50')), (rdflib.URIRef(pre_file + '441f3d10c8ff489fe8e33e639606512f6c463151cc429de7e554b9af670c2ece'), rdflib.URIRef(ns.bse.preview), rdflib.URIRef(pre_preview + 'a8b3245636074d5370283b690281abda8ffdff12ce8b1af77c8bc0a4c85be860_s50')), @@ -211,38 +235,26 @@ class TestIndex(unittest.TestCase): self.assertTrue((set(outbuf.getvalue().split('\n')) - {''}).issuperset({ f'Node(http://bsfs.ai/schema/File, http://example.com/me/file#2f4109b40107cc50e0884755a1a961ed126887e49b8dbaf0e146b2e226aa6647) Predicate({ns.bse.author}) Me, myself, and I', f'Node(http://bsfs.ai/schema/File, http://example.com/me/file#2f4109b40107cc50e0884755a1a961ed126887e49b8dbaf0e146b2e226aa6647) Predicate({ns.bse.filename}) alpha_second', - f'Node(http://bsfs.ai/schema/File, http://example.com/me/file#2f4109b40107cc50e0884755a1a961ed126887e49b8dbaf0e146b2e226aa6647) Predicate({ns.bse.filesize}) 696', f'Node(http://bsfs.ai/schema/File, http://example.com/me/file#441f3d10c8ff489fe8e33e639606512f6c463151cc429de7e554b9af670c2ece) Predicate({ns.bse.author}) Me, myself, and I', f'Node(http://bsfs.ai/schema/File, http://example.com/me/file#441f3d10c8ff489fe8e33e639606512f6c463151cc429de7e554b9af670c2ece) Predicate({ns.bse.filename}) omega_second', - f'Node(http://bsfs.ai/schema/File, http://example.com/me/file#441f3d10c8ff489fe8e33e639606512f6c463151cc429de7e554b9af670c2ece) Predicate({ns.bse.filesize}) 503', f'Node(http://bsfs.ai/schema/File, http://example.com/me/file#69b98ecf7aff3e95b09688ba93331678eb8397817111f674c9558e6dd8f5e871) Predicate({ns.bse.author}) Me, myself, and I', f'Node(http://bsfs.ai/schema/File, http://example.com/me/file#69b98ecf7aff3e95b09688ba93331678eb8397817111f674c9558e6dd8f5e871) Predicate({ns.bse.filename}) td_first', - f'Node(http://bsfs.ai/schema/File, http://example.com/me/file#69b98ecf7aff3e95b09688ba93331678eb8397817111f674c9558e6dd8f5e871) Predicate({ns.bse.filesize}) 911', f'Node(http://bsfs.ai/schema/File, http://example.com/me/file#78f7eb7f0d8221cdb2cb26c978fa42a11f75eb87becc768f4474134cb1e06926) Predicate({ns.bse.author}) Me, myself, and I', f'Node(http://bsfs.ai/schema/File, http://example.com/me/file#78f7eb7f0d8221cdb2cb26c978fa42a11f75eb87becc768f4474134cb1e06926) Predicate({ns.bse.filename}) testfile', - f'Node(http://bsfs.ai/schema/File, http://example.com/me/file#78f7eb7f0d8221cdb2cb26c978fa42a11f75eb87becc768f4474134cb1e06926) Predicate({ns.bse.filesize}) 885', f'Node(http://bsfs.ai/schema/File, http://example.com/me/file#80818b8ec2ee1919116dba9c8a7e0a4608313cf3b463cd88e9ed77a700dd92d3) Predicate({ns.bse.author}) Me, myself, and I', f'Node(http://bsfs.ai/schema/File, http://example.com/me/file#80818b8ec2ee1919116dba9c8a7e0a4608313cf3b463cd88e9ed77a700dd92d3) Predicate({ns.bse.filename}) bar_first', - f'Node(http://bsfs.ai/schema/File, http://example.com/me/file#80818b8ec2ee1919116dba9c8a7e0a4608313cf3b463cd88e9ed77a700dd92d3) Predicate({ns.bse.filesize}) 956', f'Node(http://bsfs.ai/schema/File, http://example.com/me/file#976d2ea0e58488678cc7e435fbfadabfb6eb6cf50ad51862f38f73729ed11795) Predicate({ns.bse.author}) Me, myself, and I', f'Node(http://bsfs.ai/schema/File, http://example.com/me/file#976d2ea0e58488678cc7e435fbfadabfb6eb6cf50ad51862f38f73729ed11795) Predicate({ns.bse.filename}) omega_first', - f'Node(http://bsfs.ai/schema/File, http://example.com/me/file#976d2ea0e58488678cc7e435fbfadabfb6eb6cf50ad51862f38f73729ed11795) Predicate({ns.bse.filesize}) 648', f'Node(http://bsfs.ai/schema/File, http://example.com/me/file#997e2fbb7494a3818ec782d2bc87bf1cffafba6b9c0f658e4a6c18a723e944d3) Predicate({ns.bse.author}) Me, myself, and I', f'Node(http://bsfs.ai/schema/File, http://example.com/me/file#997e2fbb7494a3818ec782d2bc87bf1cffafba6b9c0f658e4a6c18a723e944d3) Predicate({ns.bse.filename}) alpha_first', - f'Node(http://bsfs.ai/schema/File, http://example.com/me/file#997e2fbb7494a3818ec782d2bc87bf1cffafba6b9c0f658e4a6c18a723e944d3) Predicate({ns.bse.filesize}) 754', f'Node(http://bsfs.ai/schema/File, http://example.com/me/file#a8af899ecdab60dfaea8ec7f934053624c80a1054539e163f2c7eaa986c2777d) Predicate({ns.bse.author}) Me, myself, and I', f'Node(http://bsfs.ai/schema/File, http://example.com/me/file#a8af899ecdab60dfaea8ec7f934053624c80a1054539e163f2c7eaa986c2777d) Predicate({ns.bse.filename}) foo_second', - f'Node(http://bsfs.ai/schema/File, http://example.com/me/file#a8af899ecdab60dfaea8ec7f934053624c80a1054539e163f2c7eaa986c2777d) Predicate({ns.bse.filesize}) 585', f'Node(http://bsfs.ai/schema/File, http://example.com/me/file#b8fd7fba818254166a6043195004138ebda6923e012442f819a2c49671136c70) Predicate({ns.bse.author}) Me, myself, and I', f'Node(http://bsfs.ai/schema/File, http://example.com/me/file#b8fd7fba818254166a6043195004138ebda6923e012442f819a2c49671136c70) Predicate({ns.bse.filename}) bar_second', - f'Node(http://bsfs.ai/schema/File, http://example.com/me/file#b8fd7fba818254166a6043195004138ebda6923e012442f819a2c49671136c70) Predicate({ns.bse.filesize}) 636', f'Node(http://bsfs.ai/schema/File, http://example.com/me/file#d43758ace82154a1cc10ca0dfef63cb20dd831f9c87edd6dc06539eefe67371d) Predicate({ns.bse.author}) Me, myself, and I', f'Node(http://bsfs.ai/schema/File, http://example.com/me/file#d43758ace82154a1cc10ca0dfef63cb20dd831f9c87edd6dc06539eefe67371d) Predicate({ns.bse.filename}) foo_first', - f'Node(http://bsfs.ai/schema/File, http://example.com/me/file#d43758ace82154a1cc10ca0dfef63cb20dd831f9c87edd6dc06539eefe67371d) Predicate({ns.bse.filesize}) 546', f'Node(http://bsfs.ai/schema/File, http://example.com/me/file#d803187cbf3676ae9d38126270a6152c60431589aa3bb3824baf8954e9c097f1) Predicate({ns.bse.author}) Me, myself, and I', f'Node(http://bsfs.ai/schema/File, http://example.com/me/file#d803187cbf3676ae9d38126270a6152c60431589aa3bb3824baf8954e9c097f1) Predicate({ns.bse.filename}) td_second', - f'Node(http://bsfs.ai/schema/File, http://example.com/me/file#d803187cbf3676ae9d38126270a6152c60431589aa3bb3824baf8954e9c097f1) Predicate({ns.bse.filesize}) 703', - f'Node(http://bsfs.ai/schema/File, http://example.com/me/file#accb115d266ad60c53cd01a7f7130f245886ce8eaf69bc85319febc11d9fe089) Predicate({ns.bse.filesize}) 349264', f'Node(http://bsfs.ai/schema/File, http://example.com/me/file#accb115d266ad60c53cd01a7f7130f245886ce8eaf69bc85319febc11d9fe089) Predicate({ns.bse.author}) Me, myself, and I', f'Node(http://bsfs.ai/schema/File, http://example.com/me/file#accb115d266ad60c53cd01a7f7130f245886ce8eaf69bc85319febc11d9fe089) Predicate({ns.bse.filename}) testimage.jpg', # features diff --git a/test/apps/test_info.py b/test/apps/test_info.py index 725fb65..cf8d52f 100644 --- a/test/apps/test_info.py +++ b/test/apps/test_info.py @@ -8,8 +8,13 @@ Author: Matthias Baumgartner, 2022 import argparse import contextlib import io +import os +import tempfile import unittest +# external imports +import yaml + # bsie imports from bsie.utils import bsfs @@ -20,17 +25,49 @@ from bsie.apps.info import main ## code ## class TestIndex(unittest.TestCase): + def setUp(self): + config = { + 'ReaderBuilder': {}, + 'ExtractorBuilder': [ + {'bsie.extractor.preview.Preview': { + 'max_sides': [50], + }}, + {'bsie.extractor.generic.path.Path': {}}, + {'bsie.extractor.generic.constant.Constant': { + 'schema': ''' + bse:author rdfs:subClassOf bsfs:Predicate ; + rdfs:domain bsfs:Entity ; + rdfs:range xsd:string ; + bsfs:unique "true"^^xsd:boolean . + ''', + 'tuples': [['http://bsfs.ai/schema/Entity#author', 'Me, myself, and I']], + }}, + {'bsie.extractor.image.colors_spatial.ColorsSpatial': { + 'width': 2, + 'height': 2, + 'exp': 2, + }}, + ] + } + # create config file + _, self.config_path = tempfile.mkstemp(prefix='bsie-test-', suffix='.yaml') + with open(self.config_path, 'wt') as cfile: + yaml.dump(config, cfile) + + def tearDown(self): + if os.path.exists(self.config_path): + os.unlink(self.config_path) + def test_predicates(self): outbuf = io.StringIO() with contextlib.redirect_stdout(outbuf): # show predicates infos - main(['predicates']) + main(['--config', self.config_path, 'predicates']) # verify output self.assertSetEqual({pred for pred in outbuf.getvalue().split('\n') if pred != ''}, { 'http://bsfs.ai/schema/Entity#author', 'http://bsfs.ai/schema/Predicate', 'http://bsfs.ai/schema/Entity#filename', - 'http://bsfs.ai/schema/Entity#filesize', 'http://bsfs.ai/schema/Entity/colors_spatial#0658f2234a054e1dd59a14462c89f7733e019160419c796356aa831498bd0a04', 'http://bsfs.ai/schema/Entity#preview', 'http://bsfs.ai/schema/Preview#width', @@ -42,14 +79,13 @@ class TestIndex(unittest.TestCase): outbuf = io.StringIO() with contextlib.redirect_stdout(outbuf): # show schema infos - main(['schema']) + main(['--config', self.config_path, 'schema']) # verify output schema = bsfs.schema.from_string(outbuf.getvalue()) self.assertSetEqual({pred.uri for pred in schema.predicates()}, { 'http://bsfs.ai/schema/Entity#author', 'http://bsfs.ai/schema/Predicate', 'http://bsfs.ai/schema/Entity#filename', - 'http://bsfs.ai/schema/Entity#filesize', 'http://bsfs.ai/schema/Entity/colors_spatial#0658f2234a054e1dd59a14462c89f7733e019160419c796356aa831498bd0a04', 'http://bsfs.ai/schema/Entity#preview', 'http://bsfs.ai/schema/Preview#width', diff --git a/test/apps/test_loader.py b/test/apps/test_loader.py new file mode 100644 index 0000000..09a9162 --- /dev/null +++ b/test/apps/test_loader.py @@ -0,0 +1,88 @@ +""" + +Part of the bsie test suite. +A copy of the license is provided with the project. +Author: Matthias Baumgartner, 2022 +""" +# standard imports +import os +import tempfile +import unittest + +# external imports +import yaml + +# objects to test +from bsie.apps._loader import load_pipeline + + +## code ## + +class TestLoader(unittest.TestCase): + def test_load_pipeline(self): + # config file can be empty + config = { + 'ReaderBuilder': {}, + 'ExtractorBuilder': [] + } + # create config file + _, path = tempfile.mkstemp(prefix='bsie-test-', suffix='.yaml') + with open(path, 'wt') as cfile: + yaml.dump(config, cfile) + # pipeline contains only default predicates + pipeline = load_pipeline(path) + self.assertSetEqual({pred.uri for pred in pipeline.schema.predicates()}, { + 'http://bsfs.ai/schema/Predicate', + }) + + # pipeline is built according to configured extractors + config = { + 'ReaderBuilder': {}, + 'ExtractorBuilder': [ + {'bsie.extractor.preview.Preview': { + 'max_sides': [50], + }}, + {'bsie.extractor.generic.path.Path': {}}, + {'bsie.extractor.generic.constant.Constant': { + 'schema': ''' + bse:author rdfs:subClassOf bsfs:Predicate ; + rdfs:domain bsfs:Entity ; + rdfs:range xsd:string ; + bsfs:unique "true"^^xsd:boolean . + ''', + 'tuples': [['http://bsfs.ai/schema/Entity#author', 'Me, myself, and I']], + }}, + {'bsie.extractor.image.colors_spatial.ColorsSpatial': { + 'width': 2, + 'height': 2, + 'exp': 2, + }}, + ] + } + # create config file + _, path = tempfile.mkstemp(prefix='bsie-test-', suffix='.yaml') + with open(path, 'wt') as cfile: + yaml.dump(config, cfile) + # pipeline contains all defined predicates + pipeline = load_pipeline(path) + self.assertSetEqual({pred.uri for pred in pipeline.schema.predicates()}, { + 'http://bsfs.ai/schema/Entity#author', + 'http://bsfs.ai/schema/Predicate', + 'http://bsfs.ai/schema/Entity#filename', + 'http://bsfs.ai/schema/Entity/colors_spatial#0658f2234a054e1dd59a14462c89f7733e019160419c796356aa831498bd0a04', + 'http://bsfs.ai/schema/Entity#preview', + 'http://bsfs.ai/schema/Preview#width', + 'http://bsfs.ai/schema/Preview#height', + 'http://bsfs.ai/schema/Preview#asset', + }) + + # config file must exist + self.assertRaises(OSError, load_pipeline, 'invalid.yaml') + + +## main ## + +if __name__ == '__main__': + unittest.main() + +## EOF ## -- cgit v1.2.3 From ec9105b690974b0246e36769506e735c4edf069a Mon Sep 17 00:00:00 2001 From: Matthias Baumgartner Date: Wed, 1 Mar 2023 21:38:09 +0100 Subject: Exif data reader and extractor --- bsie/apps/default_config.yaml | 8 +- bsie/extractor/image/photometrics.py | 219 ++++++++++++++++++++++++++++++ bsie/reader/exif.py | 49 +++++++ test/extractor/image/test_photometrics.py | 138 +++++++++++++++++++ test/reader/test_exif.py | 48 +++++++ test/reader/testimage_exif.jpg | Bin 0 -> 719 bytes 6 files changed, 459 insertions(+), 3 deletions(-) create mode 100644 bsie/extractor/image/photometrics.py create mode 100644 bsie/reader/exif.py create mode 100644 test/extractor/image/test_photometrics.py create mode 100644 test/reader/test_exif.py create mode 100644 test/reader/testimage_exif.jpg diff --git a/bsie/apps/default_config.yaml b/bsie/apps/default_config.yaml index 4d99e22..a59b0f3 100644 --- a/bsie/apps/default_config.yaml +++ b/bsie/apps/default_config.yaml @@ -11,7 +11,9 @@ ExtractorBuilder: - bsie.extractor.generic.stat.Stat: {} - bsie.extractor.image.colors_spatial.ColorsSpatial: - width: 2 - height: 2 - exp: 2 + width: 32 + height: 32 + exp: 4 + + - bsie.extractor.image.photometrics.Exif: {} diff --git a/bsie/extractor/image/photometrics.py b/bsie/extractor/image/photometrics.py new file mode 100644 index 0000000..ae0a541 --- /dev/null +++ b/bsie/extractor/image/photometrics.py @@ -0,0 +1,219 @@ +""" + +Part of the bsie module. +A copy of the license is provided with the project. +Author: Matthias Baumgartner, 2022 +""" +# standard imports +from fractions import Fraction +import typing + +# bsie imports +from bsie.utils import bsfs, node, ns + +# inner-module imports +from .. import base + +# exports +__all__: typing.Sequence[str] = ( + 'Exif', + ) + + +## code ## + +def _gps_to_dec(coords: typing.Tuple[float, float, float]) -> float: + """Convert GPS coordinates from exif to float.""" + # unpack args + deg, min, sec = coords + # convert to float + deg = float(Fraction(deg)) + min = float(Fraction(min)) + sec = float(Fraction(sec)) + + if float(sec) > 0: + # format is deg+min+sec + return (float(deg) * 3600 + float(min) * 60 + float(sec)) / 3600 + else: + # format is deg+min + return float(deg) + float(min) / 60 + + +class Exif(base.Extractor): + """Extract information from EXIF/IPTC tags of an image file.""" + + CONTENT_READER = 'bsie.reader.exif.Exif' + + def __init__(self): + super().__init__(bsfs.schema.from_string(base.SCHEMA_PREAMBLE + ''' + #bse:t_capture rdfs:subClassOf bsfs:Predicate ; + # rdfs:domain bsfs:File ; + # rdfs:range xsd:float ; + # bsfs:unique "true"^^xsd:boolean . + bse:exposure rdfs:subClassOf bsfs:Predicate ; + rdfs:domain bsfs:File ; + rdfs:range xsd:float ; + bsfs:unique "true"^^xsd:boolean . + bse:aperture rdfs:subClassOf bsfs:Predicate ; + rdfs:domain bsfs:File ; + rdfs:range xsd:float ; + bsfs:unique "true"^^xsd:boolean . + bse:iso rdfs:subClassOf bsfs:Predicate ; + rdfs:domain bsfs:File ; + rdfs:range xsd:integer ; + bsfs:unique "true"^^xsd:boolean . + bse:focal_length rdfs:subClassOf bsfs:Predicate ; + rdfs:domain bsfs:File ; + rdfs:range xsd:float ; + bsfs:unique "true"^^xsd:boolean . + bse:width rdfs:subClassOf bsfs:Predicate ; + rdfs:domain bsfs:File ; + rdfs:range xsd:integer ; + bsfs:unique "true"^^xsd:boolean . + bse:height rdfs:subClassOf bsfs:Predicate ; + rdfs:domain bsfs:File ; + rdfs:range xsd:integer ; + bsfs:unique "true"^^xsd:boolean . + bse:orientation rdfs:subClassOf bsfs:Predicate ; + rdfs:domain bsfs:File ; + rdfs:range xsd:integer ; + bsfs:unique "true"^^xsd:boolean . + bse:orientation_label rdfs:subClassOf bsfs:Predicate ; + rdfs:domain bsfs:File ; + rdfs:range xsd:string ; + bsfs:unique "true"^^xsd:boolean . + bse:altitude rdfs:subClassOf bsfs:Predicate ; + rdfs:domain bsfs:File ; + rdfs:range xsd:float ; + bsfs:unique "true"^^xsd:boolean . + bse:latitude rdfs:subClassOf bsfs:Predicate ; + rdfs:domain bsfs:File ; + rdfs:range xsd:float ; + bsfs:unique "true"^^xsd:boolean . + bse:longitude rdfs:subClassOf bsfs:Predicate ; + rdfs:domain bsfs:File ; + rdfs:range xsd:float ; + bsfs:unique "true"^^xsd:boolean . + ''')) + # initialize mapping from predicate to callback + self._callmap = { + #self.schema.predicate(ns.bse.t_capture): self._date, + self.schema.predicate(ns.bse.exposure): self._exposure, + self.schema.predicate(ns.bse.aperture): self._aperture, + self.schema.predicate(ns.bse.iso): self._iso, + self.schema.predicate(ns.bse.focal_length): self._focal_length, + self.schema.predicate(ns.bse.width): self._width, + self.schema.predicate(ns.bse.height): self._height, + self.schema.predicate(ns.bse.orientation): self._orientation, + self.schema.predicate(ns.bse.orientation_label): self._orientation_label, + self.schema.predicate(ns.bse.altitude): self._altitude, + self.schema.predicate(ns.bse.latitude): self._latitude, + self.schema.predicate(ns.bse.longitude): self._longitude, + } + + def extract( + self, + subject: node.Node, + content: dict, + principals: typing.Iterable[bsfs.schema.Predicate], + ) -> typing.Iterator[typing.Tuple[node.Node, bsfs.schema.Predicate, typing.Any]]: + for pred in principals: + # find callback + clbk = self._callmap.get(pred) + if clbk is None: + continue + # get value + value = clbk(content) + if value is None: + continue + # produce triple + yield subject, pred, value + + def _date(self, content: dict): # FIXME: Return type annotation + raise NotImplementedError() + #date_keys = ( + # 'Exif.Photo.DateTimeOriginal', + # 'Exif.Photo.DateTimeDigitized', + # 'Exif.Image.DateTime', + # ) + #for key in date_keys: + # if key in content: + # dt = content[key].value + # if dt.tzinfo is None: + # dt = dt.replace(tzinfo=ttime.NoTimeZone) + # return dt + #return None + + + ## photometrics + + def _exposure(self, content: dict) -> typing.Optional[float]: + if 'Exif.Photo.ExposureTime' in content: + return 1.0 / float(Fraction(content['Exif.Photo.ExposureTime'])) + return None + + def _aperture(self, content: dict) -> typing.Optional[float]: + if 'Exif.Photo.FNumber' in content: + return float(Fraction(content['Exif.Photo.FNumber'])) + return None + + def _iso(self, content: dict) -> typing.Optional[int]: + if 'Exif.Photo.ISOSpeedRatings' in content: + return int(content['Exif.Photo.ISOSpeedRatings']) + return None + + def _focal_length(self, content: dict) -> typing.Optional[float]: + if 'Exif.Photo.FocalLength' in content: + return float(Fraction(content['Exif.Photo.FocalLength'])) + return None + + + ## image dimensions + + def _width(self, content: dict) -> typing.Optional[int]: + # FIXME: consider orientation! + if 'Exif.Photo.PixelXDimension' in content: + return int(content['Exif.Photo.PixelXDimension']) + return None + + def _height(self, content: dict) -> typing.Optional[int]: + # FIXME: consider orientation! + if 'Exif.Photo.PixelYDimension' in content: + return int(content['Exif.Photo.PixelYDimension']) + return None + + def _orientation(self, content: dict) -> typing.Optional[int]: + if 'Exif.Image.Orientation' in content: + return int(content['Exif.Image.Orientation']) + return None + + def _orientation_label(self, content: dict) -> typing.Optional[str]: + width = self._width(content) + height = self._height(content) + ori = self._orientation(content) + if width is not None and height is not None and ori is not None: + if ori <= 4: + return 'landscape' if width >= height else 'portrait' + else: + return 'portrait' if width >= height else 'landscape' + return None + + + ## location + + def _altitude(self, content: dict) -> typing.Optional[float]: + if 'Exif.GPSInfo.GPSAltitude' in content: + return float(Fraction(content['Exif.GPSInfo.GPSAltitude'])) + return None + + def _latitude(self, content: dict) -> typing.Optional[float]: + if 'Exif.GPSInfo.GPSLatitude' in content: + return _gps_to_dec(content['Exif.GPSInfo.GPSLatitude'].split()) + return None + + def _longitude(self, content: dict) -> typing.Optional[float]: + if 'Exif.GPSInfo.GPSLongitude' in content: + return _gps_to_dec(content['Exif.GPSInfo.GPSLongitude'].split()) + return None + +## EOF ## diff --git a/bsie/reader/exif.py b/bsie/reader/exif.py new file mode 100644 index 0000000..e087bec --- /dev/null +++ b/bsie/reader/exif.py @@ -0,0 +1,49 @@ +""" + +Part of the bsie module. +A copy of the license is provided with the project. +Author: Matthias Baumgartner, 2022 +""" +# standard imports +import typing + +# external imports +import pyexiv2 + +# bsie imports +from bsie.utils import errors, filematcher + +# inner-module imports +from . import base + +# constants +MATCH_RULE = 'mime=image/jpeg' + +# exports +__all__: typing.Sequence[str] = ( + 'Exif', + ) + + +## code ## + +class Exif(base.Reader): + """Use pyexiv2 to read exif metadata from image files.""" + + def __init__(self): + self._match = filematcher.parse(MATCH_RULE) + + def __call__(self, path: str) -> dict: + # perform quick checks first + if not self._match(path): + raise errors.UnsupportedFileFormatError(path) + + try: + # open the file + img = pyexiv2.Image(path) + # read metadata + return img.read_exif() + except TypeError as err: + raise errors.ReaderError(path) from err + +## EOF ## diff --git a/test/extractor/image/test_photometrics.py b/test/extractor/image/test_photometrics.py new file mode 100644 index 0000000..6e3b661 --- /dev/null +++ b/test/extractor/image/test_photometrics.py @@ -0,0 +1,138 @@ +""" + +Part of the bsie test suite. +A copy of the license is provided with the project. +Author: Matthias Baumgartner, 2022 +""" +# standard imports +import unittest + +# bsie imports +from bsie.extractor import base +from bsie.utils import bsfs, node as _node, ns + +# objects to test +from bsie.extractor.image.photometrics import Exif, _gps_to_dec + + +## code ## + +class TestExif(unittest.TestCase): + + def test_gps_to_dec(self): + # deg+min+sec format + self.assertAlmostEqual(_gps_to_dec('29/1 58/1 45/1'.split()), 29.979167, 6) + self.assertAlmostEqual(_gps_to_dec('31 08 03'.split()), 31.134167, 6) + self.assertAlmostEqual(_gps_to_dec('20 40 586/10'.split()), 20.682944, 6) + self.assertAlmostEqual(_gps_to_dec('88/1 34 68/10'.split()), 88.568556, 6) + # deg+min format + self.assertAlmostEqual(_gps_to_dec('13 472167/10000 0/1 '.split()), 13.786945, 6) + self.assertAlmostEqual(_gps_to_dec('104/1 3215/100 0/1'.split()), 104.535833, 6) + + def test_eq(self): + # identical instances are equal + self.assertEqual(Exif(), Exif()) + self.assertEqual(hash(Exif()), hash(Exif())) + # comparison respects type + class Foo(): pass + self.assertNotEqual(Exif(), Foo()) + self.assertNotEqual(hash(Exif()), hash(Foo())) + self.assertNotEqual(Exif(), 1234) + self.assertNotEqual(hash(Exif()), hash(1234)) + self.assertNotEqual(Exif(), None) + self.assertNotEqual(hash(Exif()), hash(None)) + + def test_schema(self): + self.assertSetEqual({pred.uri for pred in Exif().schema.predicates()}, { + ns.bsfs.Predicate, + ns.bse.exposure, + ns.bse.aperture, + ns.bse.iso, + ns.bse.focal_length, + ns.bse.width, + ns.bse.height, + ns.bse.orientation, + ns.bse.orientation_label, + ns.bse.altitude, + ns.bse.latitude, + ns.bse.longitude, + }) + + def test_extract(self): + ext = Exif() + node = _node.Node(ns.bsfs.File, '') # Blank node + content = { + 'Exif.Photo.ExposureTime': '10/600', + 'Exif.Photo.FNumber': '48/10', + 'Exif.Photo.ISOSpeedRatings': '400', + 'Exif.Photo.FocalLength': '460/10', + 'Exif.Photo.PixelXDimension': '4288', + 'Exif.Photo.PixelYDimension': '2848', + 'Exif.Image.Orientation': '1', + 'Exif.GPSInfo.GPSAltitude': '431/1', + 'Exif.GPSInfo.GPSLatitude': '46/1 11397/625 0/1', + 'Exif.GPSInfo.GPSLongitude': '7/1 131250/2500 0/1', + } + + # invalid principals are ignored + self.assertSetEqual(set(ext.extract(node, content, {ns.bse.filename})), set()) + # extract finds all relevant information + self.assertSetEqual(set(ext.extract(node, content, {ext.schema.predicate(ns.bse.exposure)})), + {(node, ext.schema.predicate(ns.bse.exposure), 60.0)}) + self.assertSetEqual(set(ext.extract(node, content, {ext.schema.predicate(ns.bse.aperture)})), + {(node, ext.schema.predicate(ns.bse.aperture), 4.8)}) + self.assertSetEqual(set(ext.extract(node, content, {ext.schema.predicate(ns.bse.iso)})), + {(node, ext.schema.predicate(ns.bse.iso), 400)}) + self.assertSetEqual(set(ext.extract(node, content, {ext.schema.predicate(ns.bse.focal_length)})), + {(node, ext.schema.predicate(ns.bse.focal_length), 46.0)}) + self.assertSetEqual(set(ext.extract(node, content, {ext.schema.predicate(ns.bse.width)})), + {(node, ext.schema.predicate(ns.bse.width), 4288)}) + self.assertSetEqual(set(ext.extract(node, content, {ext.schema.predicate(ns.bse.height)})), + {(node, ext.schema.predicate(ns.bse.height), 2848)}) + self.assertSetEqual(set(ext.extract(node, content, {ext.schema.predicate(ns.bse.orientation)})), + {(node, ext.schema.predicate(ns.bse.orientation), 1)}) + self.assertSetEqual(set(ext.extract(node, content, {ext.schema.predicate(ns.bse.orientation_label)})), + {(node, ext.schema.predicate(ns.bse.orientation_label), 'landscape')}) + self.assertSetEqual(set(ext.extract(node, content, {ext.schema.predicate(ns.bse.altitude)})), + {(node, ext.schema.predicate(ns.bse.altitude), 431.0)}) + self.assertSetEqual(set(ext.extract(node, content, {ext.schema.predicate(ns.bse.latitude)})), + {(node, ext.schema.predicate(ns.bse.latitude), 46.30392)}) + self.assertSetEqual(set(ext.extract(node, content, {ext.schema.predicate(ns.bse.longitude)})), + {(node, ext.schema.predicate(ns.bse.longitude), 7.875)}) + + # can pass multiple principals + self.assertSetEqual(set(ext.extract(node, content, { + ext.schema.predicate(ns.bse.exposure), + ext.schema.predicate(ns.bse.iso), + ext.schema.predicate(ns.bse.focal_length), + })), { + (node, ext.schema.predicate(ns.bse.exposure), 60.0), + (node, ext.schema.predicate(ns.bse.iso), 400), + (node, ext.schema.predicate(ns.bse.focal_length), 46.0), + }) + + # principals w/o content are ignored + self.assertSetEqual(set(ext.extract( + node, + content={'Exif.Photo.ExposureTime': '10/600'}, + principals={ + ext.schema.predicate(ns.bse.exposure), + ext.schema.predicate(ns.bse.iso), + ext.schema.predicate(ns.bse.focal_length), + }) + ), { + (node, ext.schema.predicate(ns.bse.exposure), 60.0), + }) + + # empty content is acceptable + self.assertSetEqual(set(ext.extract(node, {}, set(ext.principals))), set()) + # no principals is acceptable + self.assertSetEqual(set(ext.extract(node, content, set())), set()) + + +## main ## + +if __name__ == '__main__': + unittest.main() + +## EOF ## diff --git a/test/reader/test_exif.py b/test/reader/test_exif.py new file mode 100644 index 0000000..f1330da --- /dev/null +++ b/test/reader/test_exif.py @@ -0,0 +1,48 @@ +""" + +Part of the bsie test suite. +A copy of the license is provided with the project. +Author: Matthias Baumgartner, 2022 +""" +# standard imports +import os +import unittest + +# bsie imports +from bsie.utils import errors + +# objects to test +from bsie.reader.exif import Exif + + +## code ## + +class TestExif(unittest.TestCase): + def test_call(self): + rdr = Exif() + # discards non-image files + self.assertRaises(errors.UnsupportedFileFormatError, rdr, 'invalid.doc') + # raises on invalid image files + self.assertRaises(errors.ReaderError, rdr, 'invalid.jpg') + # returns dict with exif info + self.assertDictEqual(rdr(os.path.join(os.path.dirname(__file__), 'testimage_exif.jpg')), { + 'Exif.Image.Artist': 'nobody', + 'Exif.Image.ExifTag': '110', + 'Exif.Image.ResolutionUnit': '2', + 'Exif.Image.XResolution': '300/1', + 'Exif.Image.YCbCrPositioning': '1', + 'Exif.Image.YResolution': '300/1', + 'Exif.Photo.ColorSpace': '65535', + 'Exif.Photo.ComponentsConfiguration': '1 2 3 0', + 'Exif.Photo.ExifVersion': '48 50 51 50', + 'Exif.Photo.FlashpixVersion': '48 49 48 48', + 'Exif.Photo.ISOSpeedRatings': '200', + }) + + +## main ## + +if __name__ == '__main__': + unittest.main() + +## EOF ## diff --git a/test/reader/testimage_exif.jpg b/test/reader/testimage_exif.jpg new file mode 100644 index 0000000..a774bc2 Binary files /dev/null and b/test/reader/testimage_exif.jpg differ -- cgit v1.2.3 From 6eca3af569997f28eee9d169a68cef4bbd6fd789 Mon Sep 17 00:00:00 2001 From: Matthias Baumgartner Date: Wed, 1 Mar 2023 21:50:04 +0100 Subject: Integrate main app into package --- bsie.app | 49 +++---------------------------------------------- bsie/apps/__init__.py | 40 +++++++++++++++++++++++++++++++++++++++- test/apps/test_index.py | 2 ++ 3 files changed, 44 insertions(+), 47 deletions(-) diff --git a/bsie.app b/bsie.app index d5808e7..0f6f7bc 100755 --- a/bsie.app +++ b/bsie.app @@ -1,49 +1,6 @@ -"""BSIE tools. - -Part of the bsie module. -A copy of the license is provided with the project. -Author: Matthias Baumgartner, 2022 -""" -# standard imports -import argparse -import typing - -# bsie imports -import bsie -import bsie.apps - -# exports -__all__: typing.Sequence[str] = ( - 'main', - ) - -# config -apps = { - 'index' : bsie.apps.index, - 'info' : bsie.apps.info, - } - - -## code ## - -def main(argv): - """Black Star File System maintenance tools.""" - parser = argparse.ArgumentParser(description=main.__doc__, prog='bsie') - parser.add_argument('--version', action='version', - version='%(prog)s version {}.{}.{}'.format(*bsie.version_info)) - parser.add_argument('app', choices=apps.keys(), - help='Select the application to run.') - parser.add_argument('rest', nargs=argparse.REMAINDER) - # parse - args = parser.parse_args() - # run application - apps[args.app](args.rest) - - -## main ## - +#!/usr/bin/env python3 if __name__ == '__main__': + import bsie.apps import sys - main(sys.argv[1:]) + bsie.apps.main(sys.argv[1:]) -## EOF ## diff --git a/bsie/apps/__init__.py b/bsie/apps/__init__.py index 1c3d0f9..4c852a9 100644 --- a/bsie/apps/__init__.py +++ b/bsie/apps/__init__.py @@ -1,12 +1,17 @@ -""" +#!/usr/bin/env python3 +"""BSIE tools. Part of the bsie module. A copy of the license is provided with the project. Author: Matthias Baumgartner, 2022 """ # standard imports +import argparse import typing +# bsie imports +import bsie + # inner-module imports from .index import main as index from .info import main as info @@ -15,6 +20,39 @@ from .info import main as info __all__: typing.Sequence[str] = ( 'index', 'info', + 'main', ) +# config +apps = { + 'index' : index, + 'info' : info, + } + + +## code ## + +def main(argv=None): + """Black Star File System maintenance tools.""" + parser = argparse.ArgumentParser(description=main.__doc__, prog='bsie') + # version + parser.add_argument('--version', action='version', + version='%(prog)s version {}.{}.{}'.format(*bsie.version_info)) + # application selection + parser.add_argument('app', choices=apps.keys(), + help='Select the application to run.') + # dangling args + parser.add_argument('rest', nargs=argparse.REMAINDER) + # parse + args = parser.parse_args(argv) + # run application + apps[args.app](args.rest) + + +## main ## + +if __name__ == '__main__': + import sys + main(sys.argv[1:]) + ## EOF ## diff --git a/test/apps/test_index.py b/test/apps/test_index.py index a877684..f632599 100644 --- a/test/apps/test_index.py +++ b/test/apps/test_index.py @@ -225,6 +225,8 @@ class TestIndex(unittest.TestCase): outbuf = io.StringIO() with contextlib.redirect_stdout(outbuf): bsfs = main([ + '--config', + self.config_path, '--print', '-r', '--host', 'http://example.com', -- cgit v1.2.3 From 365b36a30eb0afb706b706e0fa32b414f9d51a90 Mon Sep 17 00:00:00 2001 From: Matthias Baumgartner Date: Wed, 1 Mar 2023 21:58:23 +0100 Subject: build instructions --- MANIFEST.in | 1 + bsie.toml | 11 ----------- setup.py | 41 ++++++++++++++++++++++++++++------------- 3 files changed, 29 insertions(+), 24 deletions(-) create mode 100644 MANIFEST.in delete mode 100644 bsie.toml diff --git a/MANIFEST.in b/MANIFEST.in new file mode 100644 index 0000000..c4b7734 --- /dev/null +++ b/MANIFEST.in @@ -0,0 +1 @@ +include bsie/apps/default_config.yaml diff --git a/bsie.toml b/bsie.toml deleted file mode 100644 index 10b0f37..0000000 --- a/bsie.toml +++ /dev/null @@ -1,11 +0,0 @@ -[project] -name = "bsie" -description = "Extract information from files and store them in a BSFS." -version = "0.0.1" -license = {text = "BSD 3-Clause License"} -authors = [{name='Matthias Baumgartner', email="dev@igsor.net"}] -dependencies = [ - "rdflib", - "bsfs", -] -requires-python = ">=3.7" diff --git a/setup.py b/setup.py index d45f178..43bdd32 100644 --- a/setup.py +++ b/setup.py @@ -1,33 +1,48 @@ -from setuptools import setup +from setuptools import setup, find_packages import os setup( + # package metadata name='bsie', - version='0.0.1', + version='0.23.03', author='Matthias Baumgartner', - author_email='dev@igsor.net', + author_email='dev@bsfs.io', description='Extract information from files and store them in a BSFS.', long_description=open(os.path.join(os.path.dirname(__file__), 'README')).read(), license='BSD', license_files=('LICENSE', ), - url='https://www.igsor.net/projects/blackstar/bsie/', - download_url='https://pip.igsor.net', - packages=('bsie', ), + url='https://www.bsfs.io/bsie/', + download_url='https://pip.bsfs.io', + + # packages + packages=[p for p in find_packages() if p.startswith('bsie')], + # data files are included if mentioned in MANIFEST.in + include_package_data=True, + + # entrypoints + entry_points={ + 'console_scripts': [ + 'bsie = bsie.apps:main', + ], + }, + + # dependencies + python_requires=">=3.7", install_requires=( 'bsfs', + # filematcher 'pyparsing', 'python-magic', - 'rdflib', # only for tests - 'requests', # only for tests - 'preview_generator', # also depends on some system packages ), - python_requires=">=3.7", extra_require=( - # image reader - 'pillow', 'rawpy', - # image extractors + # image feature extractors 'numpy', + # preview reader + 'preview_generator', # also depends on some system packages + # image reader + 'pillow', + 'rawpy', ) ) -- cgit v1.2.3 From e0c4713c40367b4b41da926da0ba7ed05d47d54b Mon Sep 17 00:00:00 2001 From: Matthias Baumgartner Date: Wed, 1 Mar 2023 22:05:06 +0100 Subject: documentation --- .gitignore | 1 + doc/Makefile | 20 +++++++++++++ doc/make.bat | 35 ++++++++++++++++++++++ doc/source/architecture.rst | 71 +++++++++++++++++++++++++++++++++++++++++++++ doc/source/conf.py | 37 +++++++++++++++++++++++ doc/source/index.rst | 26 +++++++++++++++++ doc/source/installation.rst | 49 +++++++++++++++++++++++++++++++ 7 files changed, 239 insertions(+) create mode 100644 doc/Makefile create mode 100644 doc/make.bat create mode 100644 doc/source/architecture.rst create mode 100644 doc/source/conf.py create mode 100644 doc/source/index.rst create mode 100644 doc/source/installation.rst diff --git a/.gitignore b/.gitignore index c046d71..d2785ad 100644 --- a/.gitignore +++ b/.gitignore @@ -21,6 +21,7 @@ build/ # doc builds doc/build/ +doc/source/api # testing data test/reader/image/testimage.nef* diff --git a/doc/Makefile b/doc/Makefile new file mode 100644 index 0000000..d0c3cbf --- /dev/null +++ b/doc/Makefile @@ -0,0 +1,20 @@ +# Minimal makefile for Sphinx documentation +# + +# You can set these variables from the command line, and also +# from the environment for the first two. +SPHINXOPTS ?= +SPHINXBUILD ?= sphinx-build +SOURCEDIR = source +BUILDDIR = build + +# Put it first so that "make" without argument is like "make help". +help: + @$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) + +.PHONY: help Makefile + +# Catch-all target: route all unknown targets to Sphinx using the new +# "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS). +%: Makefile + @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) diff --git a/doc/make.bat b/doc/make.bat new file mode 100644 index 0000000..747ffb7 --- /dev/null +++ b/doc/make.bat @@ -0,0 +1,35 @@ +@ECHO OFF + +pushd %~dp0 + +REM Command file for Sphinx documentation + +if "%SPHINXBUILD%" == "" ( + set SPHINXBUILD=sphinx-build +) +set SOURCEDIR=source +set BUILDDIR=build + +%SPHINXBUILD% >NUL 2>NUL +if errorlevel 9009 ( + echo. + echo.The 'sphinx-build' command was not found. Make sure you have Sphinx + echo.installed, then set the SPHINXBUILD environment variable to point + echo.to the full path of the 'sphinx-build' executable. Alternatively you + echo.may add the Sphinx directory to PATH. + echo. + echo.If you don't have Sphinx installed, grab it from + echo.https://www.sphinx-doc.org/ + exit /b 1 +) + +if "%1" == "" goto help + +%SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O% +goto end + +:help +%SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O% + +:end +popd diff --git a/doc/source/architecture.rst b/doc/source/architecture.rst new file mode 100644 index 0000000..750319e --- /dev/null +++ b/doc/source/architecture.rst @@ -0,0 +1,71 @@ + +Architecture +============ + + +The information extraction pipeline traverses through three stages of abstraction: + +1. File format +2. Content +3. Predicate-value pairs + +For example, an image can be stored in various file formats (JPEG, TIFF, PNG). +In turn, a file format can store different kinds of information such as the image data (pixels) and additional metadata (image dimensions, EXIF tags). +Finally, we translate the information read from the file into predicate-value pairs that can be attached to a file node in BSFS, e.g., ``(bse:filesize, 8150000)``, ``(bse:width, 6000)``, ``(bse:height, 4000)``, ``(bse:iso, 100)``, etc. + +The extraction pipeline is thus divided into +:mod:`Readers ` that abstract from file formats and content types, +and :mod:`Extractors ` which produce predicate-value pairs from content artifacts. + + +Readers +------- + +:mod:`Readers ` read the actual file (considering different file formats) +and isolate specific content artifacts therein. +The content artifact (in an internal representation) +is then passed to an Extractor for further processing. + +For example, the :class:`Image ` reader aims at reading the content (pixels) of an image file. +It automatically detects which python package (e.g., `rawpy`_, `pillow`_) +to use when faced with the various existing image file formats. +The image data is then converted into a PIL.Image instance +(irrespective of which package was used to read the data), +and passed on to the extractor. + + +Extractors +---------- + +:mod:`Extractors ` turn content artifacts into +predicate-value pairs that can be inserted into a BSFS storage. +The predicate is defined by each extractor, as prescribed by BSFS' schema handling. + +For example, the class :class:`ColorsSpatial ` class. + +Also, that having to deal with various file formats and content artifacts +potentially pulls in a large number of dependencies. +To make matters worse, many of those might not be needed in a specific scenario, +e.g., if a user only works with a limited set of file formats. +BSIE therefore implements a best-effort approach, +that is modules that cannot be imported due to missing dependencies are ignored. + +With these two concerns taken care of, +BSIE offers a few :mod:`end-user applications ` +that reduce the complexity of the task to a relatively simple command. + + + +.. _pillow: https://python-pillow.org/ +.. _rawpy: https://github.com/letmaik/rawpy diff --git a/doc/source/conf.py b/doc/source/conf.py new file mode 100644 index 0000000..017e036 --- /dev/null +++ b/doc/source/conf.py @@ -0,0 +1,37 @@ +# Configuration file for the Sphinx documentation builder. +# +# For the full list of built-in configuration values, see the documentation: +# https://www.sphinx-doc.org/en/master/usage/configuration.html + +# -- Project information ----------------------------------------------------- +# https://www.sphinx-doc.org/en/master/usage/configuration.html#project-information + +project = 'Black Star Information Extraction' +copyright = '2023, Matthias Baumgartner' +author = 'Matthias Baumgartner' +release = '0.5' + +# -- General configuration --------------------------------------------------- +# https://www.sphinx-doc.org/en/master/usage/configuration.html#general-configuration + +extensions = [ + 'sphinx_copybutton', + 'sphinx.ext.autodoc', + ] + +templates_path = ['_templates'] +exclude_patterns = [] + + + +# -- Options for HTML output ------------------------------------------------- +# https://www.sphinx-doc.org/en/master/usage/configuration.html#options-for-html-output + +html_theme = 'furo' +html_static_path = ['_static'] + +html_title = 'bsie' +html_theme_options = { + 'announcement': 'This project is under heavy development and subject to rapid changes. Use at your own discretion.', + } + diff --git a/doc/source/index.rst b/doc/source/index.rst new file mode 100644 index 0000000..9cf06fe --- /dev/null +++ b/doc/source/index.rst @@ -0,0 +1,26 @@ + +Black Star Information Extraction +================================= + +A major advantage of the `Black Star File System (BSFS) `_ +is its ability to store various kinds of (meta)data associated with a file. +However, the BSFS itself is only a storage solution, +it does not inspect files or collect information about them. + +The Black Star Information Extraction (BSIE) package fills this gap by +extracting various kinds of information from a file and pushing that data to a BSFS instance. + +BSIE has the ability to process numerous file formats, +and it can turn various aspects of a file into usable information. +This includes metadata from a source file system, +metadata stored within the file, +and even excerpts or feature representations of the file's content itself. + +.. toctree:: + :maxdepth: 1 + + installation + architecture + api/modules + + diff --git a/doc/source/installation.rst b/doc/source/installation.rst new file mode 100644 index 0000000..42b1e4e --- /dev/null +++ b/doc/source/installation.rst @@ -0,0 +1,49 @@ + +Installation +============ + +Installation +------------ + +Install *bsie* via pip:: + + pip install --extra-index-url https://pip.bsfs.io bsie + +This installs the `bsie` python package as well as the `bsie.app` command. +It is recommended to install *bsie* in a virtual environment (via `virtualenv`). + + +License +------- + +This project is released under the terms of the 3-clause BSD License. +By downloading or using the application you agree to the license's terms and conditions. + +.. literalinclude:: ../../LICENSE + + +Source +------ + +Check out our git repository:: + + git clone https://git.bsfs.io/bsie.git + +You can further install *bsie* via the ususal `setuptools `_ commands from your bsie source directory:: + + python setup.py develop + +For development, you also need to install some additional dependencies:: + + # extra packages for tests + pip install rdflib requests + + # code style discipline + pip install mypy coverage pylint + + # documentation + pip install sphinx sphinx-copybutton furo + + # packaging + pip install build + -- cgit v1.2.3 From 4b5c4d486bb4f0f4da2e25ad464e8336a781cdcb Mon Sep 17 00:00:00 2001 From: Matthias Baumgartner Date: Wed, 1 Mar 2023 22:31:03 +0100 Subject: removed module header stubs --- bsie/__init__.py | 4 ---- bsie/apps/__init__.py | 4 ---- bsie/apps/_loader.py | 5 ----- bsie/apps/index.py | 5 ----- bsie/apps/info.py | 5 ----- bsie/extractor/__init__.py | 3 --- bsie/extractor/base.py | 4 ---- bsie/extractor/builder.py | 5 ----- bsie/extractor/generic/__init__.py | 3 --- bsie/extractor/generic/constant.py | 4 ---- bsie/extractor/generic/path.py | 5 ----- bsie/extractor/generic/stat.py | 4 ---- bsie/extractor/image/__init__.py | 5 ----- bsie/extractor/image/colors_spatial.py | 4 ---- bsie/extractor/image/photometrics.py | 5 ----- bsie/extractor/preview.py | 5 ----- bsie/lib/__init__.py | 5 ----- bsie/lib/bsie.py | 5 ----- bsie/lib/builder.py | 5 ----- bsie/lib/naming_policy.py | 5 ----- bsie/lib/pipeline.py | 5 ----- bsie/reader/__init__.py | 8 ++------ bsie/reader/base.py | 9 --------- bsie/reader/builder.py | 5 ----- bsie/reader/chain.py | 5 ----- bsie/reader/exif.py | 5 ----- bsie/reader/image/__init__.py | 5 ----- bsie/reader/image/_pillow.py | 5 ----- bsie/reader/image/_raw.py | 5 ----- bsie/reader/path.py | 4 ---- bsie/reader/preview/__init__.py | 5 ----- bsie/reader/preview/_pg.py | 5 ----- bsie/reader/preview/_pillow.py | 5 ----- bsie/reader/preview/_rawpy.py | 5 ----- bsie/reader/preview/utils.py | 5 ----- bsie/reader/stat.py | 4 ---- bsie/utils/__init__.py | 4 ---- bsie/utils/bsfs.py | 4 ---- bsie/utils/errors.py | 4 ---- bsie/utils/filematcher/__init__.py | 5 ----- bsie/utils/filematcher/matcher.py | 5 ----- bsie/utils/filematcher/parser.py | 5 ----- bsie/utils/loading.py | 5 ----- bsie/utils/namespaces.py | 4 ---- bsie/utils/node.py | 4 ---- test/apps/test_index.py | 5 ----- test/apps/test_info.py | 5 ----- test/apps/test_loader.py | 5 ----- test/extractor/generic/test_constant.py | 5 ----- test/extractor/generic/test_path.py | 5 ----- test/extractor/generic/test_stat.py | 5 ----- test/extractor/image/test_colors_spatial.py | 5 ----- test/extractor/image/test_photometrics.py | 5 ----- test/extractor/test_base.py | 5 ----- test/extractor/test_builder.py | 5 ----- test/extractor/test_preview.py | 5 ----- test/lib/test_bsie.py | 5 ----- test/lib/test_builder.py | 5 ----- test/lib/test_naming_policy.py | 5 ----- test/lib/test_pipeline.py | 5 ----- test/reader/image/load_nef.py | 5 ----- test/reader/image/test_image.py | 5 ----- test/reader/image/test_pillow.py | 5 ----- test/reader/image/test_raw_image.py | 5 ----- test/reader/preview/load_nef.py | 5 ----- test/reader/preview/test_pg.py | 5 ----- test/reader/preview/test_pillow.py | 5 ----- test/reader/preview/test_preview.py | 5 ----- test/reader/preview/test_rawpy.py | 5 ----- test/reader/preview/test_utils.py | 5 ----- test/reader/test_base.py | 5 ----- test/reader/test_builder.py | 5 ----- test/reader/test_chain.py | 5 ----- test/reader/test_exif.py | 5 ----- test/reader/test_path.py | 5 ----- test/reader/test_stat.py | 5 ----- test/utils/filematcher/test_matcher.py | 5 ----- test/utils/filematcher/test_parser.py | 5 ----- test/utils/test_loading.py | 5 ----- test/utils/test_node.py | 5 ----- 80 files changed, 2 insertions(+), 388 deletions(-) diff --git a/bsie/__init__.py b/bsie/__init__.py index c253f39..f6f2ff2 100644 --- a/bsie/__init__.py +++ b/bsie/__init__.py @@ -1,8 +1,4 @@ """The BSIE module extracts triples from files for insertion into a BSFS storage. - -Part of the bsie module. -A copy of the license is provided with the project. -Author: Matthias Baumgartner, 2022 """ # standard imports import collections diff --git a/bsie/apps/__init__.py b/bsie/apps/__init__.py index 4c852a9..cec8f84 100644 --- a/bsie/apps/__init__.py +++ b/bsie/apps/__init__.py @@ -1,9 +1,5 @@ #!/usr/bin/env python3 """BSIE tools. - -Part of the bsie module. -A copy of the license is provided with the project. -Author: Matthias Baumgartner, 2022 """ # standard imports import argparse diff --git a/bsie/apps/_loader.py b/bsie/apps/_loader.py index e02bed5..36dd8a6 100644 --- a/bsie/apps/_loader.py +++ b/bsie/apps/_loader.py @@ -1,9 +1,4 @@ -""" -Part of the bsie module. -A copy of the license is provided with the project. -Author: Matthias Baumgartner, 2022 -""" # standard imports import typing diff --git a/bsie/apps/index.py b/bsie/apps/index.py index 2d147c9..d64e8c2 100644 --- a/bsie/apps/index.py +++ b/bsie/apps/index.py @@ -1,9 +1,4 @@ -""" -Part of the bsie module. -A copy of the license is provided with the project. -Author: Matthias Baumgartner, 2022 -""" # standard imports import argparse import os diff --git a/bsie/apps/info.py b/bsie/apps/info.py index 363ab30..e27b70b 100644 --- a/bsie/apps/info.py +++ b/bsie/apps/info.py @@ -1,9 +1,4 @@ -""" -Part of the bsie module. -A copy of the license is provided with the project. -Author: Matthias Baumgartner, 2022 -""" # standard imports import argparse import os diff --git a/bsie/extractor/__init__.py b/bsie/extractor/__init__.py index 5f385ee..36fa9ba 100644 --- a/bsie/extractor/__init__.py +++ b/bsie/extractor/__init__.py @@ -2,9 +2,6 @@ Each Extractor class is linked to the Reader class whose content it requires. -Part of the bsie module. -A copy of the license is provided with the project. -Author: Matthias Baumgartner, 2022 """ # standard imports import typing diff --git a/bsie/extractor/base.py b/bsie/extractor/base.py index d8b86a5..3759c68 100644 --- a/bsie/extractor/base.py +++ b/bsie/extractor/base.py @@ -1,8 +1,4 @@ """The Extractor classes transform content into triples. - -Part of the bsie module. -A copy of the license is provided with the project. -Author: Matthias Baumgartner, 2022 """ # standard imports import abc diff --git a/bsie/extractor/builder.py b/bsie/extractor/builder.py index 0fd3685..d691b0e 100644 --- a/bsie/extractor/builder.py +++ b/bsie/extractor/builder.py @@ -1,9 +1,4 @@ -""" -Part of the bsie module. -A copy of the license is provided with the project. -Author: Matthias Baumgartner, 2022 -""" # standard imports import typing diff --git a/bsie/extractor/generic/__init__.py b/bsie/extractor/generic/__init__.py index 4783949..46a4bd6 100644 --- a/bsie/extractor/generic/__init__.py +++ b/bsie/extractor/generic/__init__.py @@ -3,9 +3,6 @@ files. Examples include file system information (file name and size, mime type, etc.) and information that is independent of the actual file (constant triples, host platform infos, current time, etc.). -Part of the bsie module. -A copy of the license is provided with the project. -Author: Matthias Baumgartner, 2022 """ # standard imports import typing diff --git a/bsie/extractor/generic/constant.py b/bsie/extractor/generic/constant.py index 938e20c..7acbe95 100644 --- a/bsie/extractor/generic/constant.py +++ b/bsie/extractor/generic/constant.py @@ -1,8 +1,4 @@ """The Constant extractor produces pre-specified triples. - -Part of the bsie module. -A copy of the license is provided with the project. -Author: Matthias Baumgartner, 2022 """ # standard imports import typing diff --git a/bsie/extractor/generic/path.py b/bsie/extractor/generic/path.py index cd8cec9..8b01933 100644 --- a/bsie/extractor/generic/path.py +++ b/bsie/extractor/generic/path.py @@ -1,9 +1,4 @@ -""" -Part of the bsie module. -A copy of the license is provided with the project. -Author: Matthias Baumgartner, 2022 -""" # standard imports import os import typing diff --git a/bsie/extractor/generic/stat.py b/bsie/extractor/generic/stat.py index f35f8e1..50821a7 100644 --- a/bsie/extractor/generic/stat.py +++ b/bsie/extractor/generic/stat.py @@ -1,8 +1,4 @@ """Extract information from the file system, such as filesize. - -Part of the bsie module. -A copy of the license is provided with the project. -Author: Matthias Baumgartner, 2022 """ # standard imports import os diff --git a/bsie/extractor/image/__init__.py b/bsie/extractor/image/__init__.py index 75b118d..f82424a 100644 --- a/bsie/extractor/image/__init__.py +++ b/bsie/extractor/image/__init__.py @@ -1,9 +1,4 @@ -""" -Part of the bsie module. -A copy of the license is provided with the project. -Author: Matthias Baumgartner, 2022 -""" # standard imports import typing diff --git a/bsie/extractor/image/colors_spatial.py b/bsie/extractor/image/colors_spatial.py index 15fd281..34cd615 100644 --- a/bsie/extractor/image/colors_spatial.py +++ b/bsie/extractor/image/colors_spatial.py @@ -1,8 +1,4 @@ """Spatial color features. - -Part of the bsie module. -A copy of the license is provided with the project. -Author: Matthias Baumgartner, 2022 """ # standard imports import typing diff --git a/bsie/extractor/image/photometrics.py b/bsie/extractor/image/photometrics.py index ae0a541..525f207 100644 --- a/bsie/extractor/image/photometrics.py +++ b/bsie/extractor/image/photometrics.py @@ -1,9 +1,4 @@ -""" -Part of the bsie module. -A copy of the license is provided with the project. -Author: Matthias Baumgartner, 2022 -""" # standard imports from fractions import Fraction import typing diff --git a/bsie/extractor/preview.py b/bsie/extractor/preview.py index 1531d62..7e4a171 100644 --- a/bsie/extractor/preview.py +++ b/bsie/extractor/preview.py @@ -1,9 +1,4 @@ -""" -Part of the bsie module. -A copy of the license is provided with the project. -Author: Matthias Baumgartner, 2022 -""" # imports import io import typing diff --git a/bsie/lib/__init__.py b/bsie/lib/__init__.py index 48379de..f44fb74 100644 --- a/bsie/lib/__init__.py +++ b/bsie/lib/__init__.py @@ -1,9 +1,4 @@ -""" -Part of the bsie module. -A copy of the license is provided with the project. -Author: Matthias Baumgartner, 2022 -""" # standard imports import typing diff --git a/bsie/lib/bsie.py b/bsie/lib/bsie.py index a572525..b02e707 100644 --- a/bsie/lib/bsie.py +++ b/bsie/lib/bsie.py @@ -1,9 +1,4 @@ -""" -Part of the bsie module. -A copy of the license is provided with the project. -Author: Matthias Baumgartner, 2022 -""" # standard imports import typing diff --git a/bsie/lib/builder.py b/bsie/lib/builder.py index 39da441..3a15311 100644 --- a/bsie/lib/builder.py +++ b/bsie/lib/builder.py @@ -1,9 +1,4 @@ -""" -Part of the bsie module. -A copy of the license is provided with the project. -Author: Matthias Baumgartner, 2022 -""" # standard imports import logging import typing diff --git a/bsie/lib/naming_policy.py b/bsie/lib/naming_policy.py index 131a70b..c99f8c8 100644 --- a/bsie/lib/naming_policy.py +++ b/bsie/lib/naming_policy.py @@ -1,9 +1,4 @@ -""" -Part of the bsie module. -A copy of the license is provided with the project. -Author: Matthias Baumgartner, 2022 -""" # standard imports import abc import os diff --git a/bsie/lib/pipeline.py b/bsie/lib/pipeline.py index 0bc5109..128eecc 100644 --- a/bsie/lib/pipeline.py +++ b/bsie/lib/pipeline.py @@ -1,9 +1,4 @@ -""" -Part of the bsie module. -A copy of the license is provided with the project. -Author: Matthias Baumgartner, 2022 -""" # standard imports from collections import defaultdict import logging diff --git a/bsie/reader/__init__.py b/bsie/reader/__init__.py index 4163d1c..a1c38a9 100644 --- a/bsie/reader/__init__.py +++ b/bsie/reader/__init__.py @@ -1,8 +1,8 @@ """The Reader classes return high-level content structures from files. The Reader fulfills two purposes: - First, it brokers between multiple libraries and file formats. - Second, it separates multiple aspects of a file into distinct content types. +First, it brokers between multiple libraries and file formats. +Second, it separates multiple aspects of a file into distinct content types. Often, different libraries focus on reading different types of content from a file. E.g. one would use different modules to read file system infos than to @@ -11,9 +11,6 @@ type. Each distinct type can be implemented in a file or submodule that provides a Reader implementation. Through utilization of submodules, different file formats can be supported. -Part of the bsie module. -A copy of the license is provided with the project. -Author: Matthias Baumgartner, 2022 """ # standard imports import typing @@ -29,4 +26,3 @@ __all__: typing.Sequence[str] = ( ) ## EOF ## -## EOF ## diff --git a/bsie/reader/base.py b/bsie/reader/base.py index 099a327..a775701 100644 --- a/bsie/reader/base.py +++ b/bsie/reader/base.py @@ -1,13 +1,4 @@ -"""The Reader classes return high-level content structures from files. -The Reader fulfills two purposes: - First, it brokers between multiple libraries and file formats. - Second, it separates multiple aspects of a file into distinct content types. - -Part of the bsie module. -A copy of the license is provided with the project. -Author: Matthias Baumgartner, 2022 -""" # standard imports import abc import typing diff --git a/bsie/reader/builder.py b/bsie/reader/builder.py index 8699e75..d32700b 100644 --- a/bsie/reader/builder.py +++ b/bsie/reader/builder.py @@ -1,9 +1,4 @@ -""" -Part of the bsie module. -A copy of the license is provided with the project. -Author: Matthias Baumgartner, 2022 -""" # standard imports import typing diff --git a/bsie/reader/chain.py b/bsie/reader/chain.py index 1dbc52b..79b44b4 100644 --- a/bsie/reader/chain.py +++ b/bsie/reader/chain.py @@ -1,9 +1,4 @@ -""" -Part of the bsie module. -A copy of the license is provided with the project. -Author: Matthias Baumgartner, 2022 -""" # standard imports import logging import typing diff --git a/bsie/reader/exif.py b/bsie/reader/exif.py index e087bec..8c74462 100644 --- a/bsie/reader/exif.py +++ b/bsie/reader/exif.py @@ -1,9 +1,4 @@ -""" -Part of the bsie module. -A copy of the license is provided with the project. -Author: Matthias Baumgartner, 2022 -""" # standard imports import typing diff --git a/bsie/reader/image/__init__.py b/bsie/reader/image/__init__.py index c5d2a2a..89642f2 100644 --- a/bsie/reader/image/__init__.py +++ b/bsie/reader/image/__init__.py @@ -1,9 +1,4 @@ -""" -Part of the bsie module. -A copy of the license is provided with the project. -Author: Matthias Baumgartner, 2022 -""" # standard imports import typing diff --git a/bsie/reader/image/_pillow.py b/bsie/reader/image/_pillow.py index 5b2bdf2..0611d3c 100644 --- a/bsie/reader/image/_pillow.py +++ b/bsie/reader/image/_pillow.py @@ -1,9 +1,4 @@ -""" -Part of the bsie module. -A copy of the license is provided with the project. -Author: Matthias Baumgartner, 2022 -""" # standard imports import typing diff --git a/bsie/reader/image/_raw.py b/bsie/reader/image/_raw.py index 257fdb3..e5745aa 100644 --- a/bsie/reader/image/_raw.py +++ b/bsie/reader/image/_raw.py @@ -1,9 +1,4 @@ -""" -Part of the bsie module. -A copy of the license is provided with the project. -Author: Matthias Baumgartner, 2022 -""" # standard imports import typing diff --git a/bsie/reader/path.py b/bsie/reader/path.py index 1ca05a0..45eb127 100644 --- a/bsie/reader/path.py +++ b/bsie/reader/path.py @@ -1,8 +1,4 @@ """The Path reader produces a file path. - -Part of the bsie module. -A copy of the license is provided with the project. -Author: Matthias Baumgartner, 2022 """ # standard imports import typing diff --git a/bsie/reader/preview/__init__.py b/bsie/reader/preview/__init__.py index 3e69a4a..791a133 100644 --- a/bsie/reader/preview/__init__.py +++ b/bsie/reader/preview/__init__.py @@ -1,9 +1,4 @@ -""" -Part of the bsie module. -A copy of the license is provided with the project. -Author: Matthias Baumgartner, 2022 -""" # imports import typing diff --git a/bsie/reader/preview/_pg.py b/bsie/reader/preview/_pg.py index 097c513..401b33d 100644 --- a/bsie/reader/preview/_pg.py +++ b/bsie/reader/preview/_pg.py @@ -1,9 +1,4 @@ -""" -Part of the bsie module. -A copy of the license is provided with the project. -Author: Matthias Baumgartner, 2022 -""" # standard imports from functools import partial import contextlib diff --git a/bsie/reader/preview/_pillow.py b/bsie/reader/preview/_pillow.py index 174d509..15c1c6d 100644 --- a/bsie/reader/preview/_pillow.py +++ b/bsie/reader/preview/_pillow.py @@ -1,9 +1,4 @@ -""" -Part of the bsie module. -A copy of the license is provided with the project. -Author: Matthias Baumgartner, 2022 -""" # standard imports from functools import partial import typing diff --git a/bsie/reader/preview/_rawpy.py b/bsie/reader/preview/_rawpy.py index 2c20a48..16e8675 100644 --- a/bsie/reader/preview/_rawpy.py +++ b/bsie/reader/preview/_rawpy.py @@ -1,9 +1,4 @@ -""" -Part of the bsie module. -A copy of the license is provided with the project. -Author: Matthias Baumgartner, 2022 -""" # standard imports from functools import partial import typing diff --git a/bsie/reader/preview/utils.py b/bsie/reader/preview/utils.py index 2ef1562..82ecc31 100644 --- a/bsie/reader/preview/utils.py +++ b/bsie/reader/preview/utils.py @@ -1,9 +1,4 @@ -""" -Part of the tagit module. -A copy of the license is provided with the project. -Author: Matthias Baumgartner, 2022 -""" # standard imports import typing diff --git a/bsie/reader/stat.py b/bsie/reader/stat.py index 706dc47..f42e7fb 100644 --- a/bsie/reader/stat.py +++ b/bsie/reader/stat.py @@ -1,8 +1,4 @@ """The Stat reader produces filesystem stat information. - -Part of the bsie module. -A copy of the license is provided with the project. -Author: Matthias Baumgartner, 2022 """ # standard imports import os diff --git a/bsie/utils/__init__.py b/bsie/utils/__init__.py index 9cb60ed..18c8db7 100644 --- a/bsie/utils/__init__.py +++ b/bsie/utils/__init__.py @@ -1,8 +1,4 @@ """Common tools and definitions. - -Part of the bsie module. -A copy of the license is provided with the project. -Author: Matthias Baumgartner, 2022 """ # standard imports import typing diff --git a/bsie/utils/bsfs.py b/bsie/utils/bsfs.py index ef5db31..fc045cc 100644 --- a/bsie/utils/bsfs.py +++ b/bsie/utils/bsfs.py @@ -1,8 +1,4 @@ """BSFS bridge, provides BSFS bindings for BSIE. - -Part of the bsie module. -A copy of the license is provided with the project. -Author: Matthias Baumgartner, 2022 """ # standard imports import typing diff --git a/bsie/utils/errors.py b/bsie/utils/errors.py index 8133cd4..e71fc60 100644 --- a/bsie/utils/errors.py +++ b/bsie/utils/errors.py @@ -1,8 +1,4 @@ """Common BSIE exceptions. - -Part of the bsie module. -A copy of the license is provided with the project. -Author: Matthias Baumgartner, 2022 """ # standard imports import typing diff --git a/bsie/utils/filematcher/__init__.py b/bsie/utils/filematcher/__init__.py index 1e23e4e..908de78 100644 --- a/bsie/utils/filematcher/__init__.py +++ b/bsie/utils/filematcher/__init__.py @@ -1,9 +1,4 @@ -""" -Part of the bsie module. -A copy of the license is provided with the project. -Author: Matthias Baumgartner, 2022 -""" # standard imports import typing diff --git a/bsie/utils/filematcher/matcher.py b/bsie/utils/filematcher/matcher.py index a279a4b..1fa308e 100644 --- a/bsie/utils/filematcher/matcher.py +++ b/bsie/utils/filematcher/matcher.py @@ -1,9 +1,4 @@ -""" -Part of the bsie module. -A copy of the license is provided with the project. -Author: Matthias Baumgartner, 2021 -""" # standard imports from collections.abc import Callable, Collection, Hashable import abc diff --git a/bsie/utils/filematcher/parser.py b/bsie/utils/filematcher/parser.py index 2f82875..dc28a0d 100644 --- a/bsie/utils/filematcher/parser.py +++ b/bsie/utils/filematcher/parser.py @@ -1,9 +1,4 @@ -""" -Part of the bsie module. -A copy of the license is provided with the project. -Author: Matthias Baumgartner, 2021 -""" # standard imports import typing diff --git a/bsie/utils/loading.py b/bsie/utils/loading.py index eb05c35..3c5c7c1 100644 --- a/bsie/utils/loading.py +++ b/bsie/utils/loading.py @@ -1,9 +1,4 @@ -""" -Part of the bsie module. -A copy of the license is provided with the project. -Author: Matthias Baumgartner, 2022 -""" # standard imports import importlib import typing diff --git a/bsie/utils/namespaces.py b/bsie/utils/namespaces.py index 0af8ece..310aa3f 100644 --- a/bsie/utils/namespaces.py +++ b/bsie/utils/namespaces.py @@ -1,8 +1,4 @@ """Default namespaces used throughout BSIE. - -Part of the bsie module. -A copy of the license is provided with the project. -Author: Matthias Baumgartner, 2022 """ # standard imports import typing diff --git a/bsie/utils/node.py b/bsie/utils/node.py index aa62c06..fa34b2e 100644 --- a/bsie/utils/node.py +++ b/bsie/utils/node.py @@ -1,8 +1,4 @@ """Lighweight Node to bridge to BSFS. - -Part of the bsie module. -A copy of the license is provided with the project. -Author: Matthias Baumgartner, 2022 """ # standard imports import typing diff --git a/test/apps/test_index.py b/test/apps/test_index.py index f632599..03209fe 100644 --- a/test/apps/test_index.py +++ b/test/apps/test_index.py @@ -1,9 +1,4 @@ -""" -Part of the bsie test suite. -A copy of the license is provided with the project. -Author: Matthias Baumgartner, 2022 -""" # standard imports import contextlib import io diff --git a/test/apps/test_info.py b/test/apps/test_info.py index cf8d52f..d705629 100644 --- a/test/apps/test_info.py +++ b/test/apps/test_info.py @@ -1,9 +1,4 @@ -""" -Part of the bsie test suite. -A copy of the license is provided with the project. -Author: Matthias Baumgartner, 2022 -""" # standard imports import argparse import contextlib diff --git a/test/apps/test_loader.py b/test/apps/test_loader.py index 09a9162..20254ec 100644 --- a/test/apps/test_loader.py +++ b/test/apps/test_loader.py @@ -1,9 +1,4 @@ -""" -Part of the bsie test suite. -A copy of the license is provided with the project. -Author: Matthias Baumgartner, 2022 -""" # standard imports import os import tempfile diff --git a/test/extractor/generic/test_constant.py b/test/extractor/generic/test_constant.py index bde3805..db55852 100644 --- a/test/extractor/generic/test_constant.py +++ b/test/extractor/generic/test_constant.py @@ -1,9 +1,4 @@ -""" -Part of the bsie test suite. -A copy of the license is provided with the project. -Author: Matthias Baumgartner, 2022 -""" # standard imports import unittest diff --git a/test/extractor/generic/test_path.py b/test/extractor/generic/test_path.py index 5568de7..d40a28c 100644 --- a/test/extractor/generic/test_path.py +++ b/test/extractor/generic/test_path.py @@ -1,9 +1,4 @@ -""" -Part of the bsie test suite. -A copy of the license is provided with the project. -Author: Matthias Baumgartner, 2022 -""" # standard imports import unittest diff --git a/test/extractor/generic/test_stat.py b/test/extractor/generic/test_stat.py index f543386..8868546 100644 --- a/test/extractor/generic/test_stat.py +++ b/test/extractor/generic/test_stat.py @@ -1,9 +1,4 @@ -""" -Part of the bsie test suite. -A copy of the license is provided with the project. -Author: Matthias Baumgartner, 2022 -""" # standard imports import os import unittest diff --git a/test/extractor/image/test_colors_spatial.py b/test/extractor/image/test_colors_spatial.py index ba551f3..967aaf2 100644 --- a/test/extractor/image/test_colors_spatial.py +++ b/test/extractor/image/test_colors_spatial.py @@ -1,9 +1,4 @@ -""" -Part of the bsie test suite. -A copy of the license is provided with the project. -Author: Matthias Baumgartner, 2022 -""" # standard imports import os import unittest diff --git a/test/extractor/image/test_photometrics.py b/test/extractor/image/test_photometrics.py index 6e3b661..0e0261b 100644 --- a/test/extractor/image/test_photometrics.py +++ b/test/extractor/image/test_photometrics.py @@ -1,9 +1,4 @@ -""" -Part of the bsie test suite. -A copy of the license is provided with the project. -Author: Matthias Baumgartner, 2022 -""" # standard imports import unittest diff --git a/test/extractor/test_base.py b/test/extractor/test_base.py index acfaf58..bb1f73b 100644 --- a/test/extractor/test_base.py +++ b/test/extractor/test_base.py @@ -1,9 +1,4 @@ -""" -Part of the bsie test suite. -A copy of the license is provided with the project. -Author: Matthias Baumgartner, 2022 -""" # standard imports import unittest diff --git a/test/extractor/test_builder.py b/test/extractor/test_builder.py index 039ea53..1b8951a 100644 --- a/test/extractor/test_builder.py +++ b/test/extractor/test_builder.py @@ -1,9 +1,4 @@ -""" -Part of the bsie test suite. -A copy of the license is provided with the project. -Author: Matthias Baumgartner, 2022 -""" # standard imports import unittest diff --git a/test/extractor/test_preview.py b/test/extractor/test_preview.py index 10d2a7f..0e0068d 100644 --- a/test/extractor/test_preview.py +++ b/test/extractor/test_preview.py @@ -1,9 +1,4 @@ -""" -Part of the bsie test suite. -A copy of the license is provided with the project. -Author: Matthias Baumgartner, 2022 -""" # standard imports import io import os diff --git a/test/lib/test_bsie.py b/test/lib/test_bsie.py index 2ed9e10..52da8b8 100644 --- a/test/lib/test_bsie.py +++ b/test/lib/test_bsie.py @@ -1,9 +1,4 @@ -""" -Part of the bsie test suite. -A copy of the license is provided with the project. -Author: Matthias Baumgartner, 2022 -""" # standard imports import os import unittest diff --git a/test/lib/test_builder.py b/test/lib/test_builder.py index 48e932b..2bd1994 100644 --- a/test/lib/test_builder.py +++ b/test/lib/test_builder.py @@ -1,9 +1,4 @@ -""" -Part of the bsie test suite. -A copy of the license is provided with the project. -Author: Matthias Baumgartner, 2022 -""" # standard imports import logging import unittest diff --git a/test/lib/test_naming_policy.py b/test/lib/test_naming_policy.py index 4861c84..b2a3649 100644 --- a/test/lib/test_naming_policy.py +++ b/test/lib/test_naming_policy.py @@ -1,9 +1,4 @@ -""" -Part of the bsie test suite. -A copy of the license is provided with the project. -Author: Matthias Baumgartner, 2022 -""" # standard imports import unittest diff --git a/test/lib/test_pipeline.py b/test/lib/test_pipeline.py index 61fddd7..5125a5c 100644 --- a/test/lib/test_pipeline.py +++ b/test/lib/test_pipeline.py @@ -1,9 +1,4 @@ -""" -Part of the bsie test suite. -A copy of the license is provided with the project. -Author: Matthias Baumgartner, 2022 -""" # standard imports import logging import os diff --git a/test/reader/image/load_nef.py b/test/reader/image/load_nef.py index 5ba0adc..02be470 100644 --- a/test/reader/image/load_nef.py +++ b/test/reader/image/load_nef.py @@ -1,9 +1,4 @@ -""" -Part of the bsie test suite. -A copy of the license is provided with the project. -Author: Matthias Baumgartner, 2022 -""" # standard imports import os diff --git a/test/reader/image/test_image.py b/test/reader/image/test_image.py index 26f6a93..ee9b8f9 100644 --- a/test/reader/image/test_image.py +++ b/test/reader/image/test_image.py @@ -1,9 +1,4 @@ -""" -Part of the bsie test suite. -A copy of the license is provided with the project. -Author: Matthias Baumgartner, 2022 -""" # standard imports import importlib import os diff --git a/test/reader/image/test_pillow.py b/test/reader/image/test_pillow.py index 8abf5c1..2cff768 100644 --- a/test/reader/image/test_pillow.py +++ b/test/reader/image/test_pillow.py @@ -1,9 +1,4 @@ -""" -Part of the bsie test suite. -A copy of the license is provided with the project. -Author: Matthias Baumgartner, 2022 -""" # standard imports import os import unittest diff --git a/test/reader/image/test_raw_image.py b/test/reader/image/test_raw_image.py index ba21b5a..0a5ed63 100644 --- a/test/reader/image/test_raw_image.py +++ b/test/reader/image/test_raw_image.py @@ -1,9 +1,4 @@ -""" -Part of the bsie test suite. -A copy of the license is provided with the project. -Author: Matthias Baumgartner, 2022 -""" # standard imports import importlib import os diff --git a/test/reader/preview/load_nef.py b/test/reader/preview/load_nef.py index 5ba0adc..02be470 100644 --- a/test/reader/preview/load_nef.py +++ b/test/reader/preview/load_nef.py @@ -1,9 +1,4 @@ -""" -Part of the bsie test suite. -A copy of the license is provided with the project. -Author: Matthias Baumgartner, 2022 -""" # standard imports import os diff --git a/test/reader/preview/test_pg.py b/test/reader/preview/test_pg.py index e492cfa..381344f 100644 --- a/test/reader/preview/test_pg.py +++ b/test/reader/preview/test_pg.py @@ -1,9 +1,4 @@ -""" -Part of the bsie test suite. -A copy of the license is provided with the project. -Author: Matthias Baumgartner, 2022 -""" # standard imports from functools import partial import os diff --git a/test/reader/preview/test_pillow.py b/test/reader/preview/test_pillow.py index ca38d89..d213bbb 100644 --- a/test/reader/preview/test_pillow.py +++ b/test/reader/preview/test_pillow.py @@ -1,9 +1,4 @@ -""" -Part of the bsie test suite. -A copy of the license is provided with the project. -Author: Matthias Baumgartner, 2022 -""" # standard imports from functools import partial import os diff --git a/test/reader/preview/test_preview.py b/test/reader/preview/test_preview.py index fde610f..e144877 100644 --- a/test/reader/preview/test_preview.py +++ b/test/reader/preview/test_preview.py @@ -1,9 +1,4 @@ -""" -Part of the bsie test suite. -A copy of the license is provided with the project. -Author: Matthias Baumgartner, 2022 -""" # standard imports from functools import partial import importlib diff --git a/test/reader/preview/test_rawpy.py b/test/reader/preview/test_rawpy.py index ed35f53..11a6f9b 100644 --- a/test/reader/preview/test_rawpy.py +++ b/test/reader/preview/test_rawpy.py @@ -1,9 +1,4 @@ -""" -Part of the bsie test suite. -A copy of the license is provided with the project. -Author: Matthias Baumgartner, 2022 -""" # standard imports from functools import partial import importlib diff --git a/test/reader/preview/test_utils.py b/test/reader/preview/test_utils.py index c10c38c..2b15bc6 100644 --- a/test/reader/preview/test_utils.py +++ b/test/reader/preview/test_utils.py @@ -1,9 +1,4 @@ -""" -Part of the bsie test suite. -A copy of the license is provided with the project. -Author: Matthias Baumgartner, 2022 -""" # standard imports import os import unittest diff --git a/test/reader/test_base.py b/test/reader/test_base.py index 41f4c29..5dd2855 100644 --- a/test/reader/test_base.py +++ b/test/reader/test_base.py @@ -1,9 +1,4 @@ -""" -Part of the bsie test suite. -A copy of the license is provided with the project. -Author: Matthias Baumgartner, 2022 -""" # standard imports import unittest diff --git a/test/reader/test_builder.py b/test/reader/test_builder.py index 92e9edc..84e8e7a 100644 --- a/test/reader/test_builder.py +++ b/test/reader/test_builder.py @@ -1,9 +1,4 @@ -""" -Part of the bsie test suite. -A copy of the license is provided with the project. -Author: Matthias Baumgartner, 2022 -""" # standard imports import unittest diff --git a/test/reader/test_chain.py b/test/reader/test_chain.py index 901faa1..665aabc 100644 --- a/test/reader/test_chain.py +++ b/test/reader/test_chain.py @@ -1,9 +1,4 @@ -""" -Part of the bsie test suite. -A copy of the license is provided with the project. -Author: Matthias Baumgartner, 2022 -""" # standard imports import logging import os diff --git a/test/reader/test_exif.py b/test/reader/test_exif.py index f1330da..b2bf843 100644 --- a/test/reader/test_exif.py +++ b/test/reader/test_exif.py @@ -1,9 +1,4 @@ -""" -Part of the bsie test suite. -A copy of the license is provided with the project. -Author: Matthias Baumgartner, 2022 -""" # standard imports import os import unittest diff --git a/test/reader/test_path.py b/test/reader/test_path.py index 95e447f..f2eee06 100644 --- a/test/reader/test_path.py +++ b/test/reader/test_path.py @@ -1,9 +1,4 @@ -""" -Part of the bsie test suite. -A copy of the license is provided with the project. -Author: Matthias Baumgartner, 2022 -""" # standard imports import unittest diff --git a/test/reader/test_stat.py b/test/reader/test_stat.py index fd9fdcd..f36b8b3 100644 --- a/test/reader/test_stat.py +++ b/test/reader/test_stat.py @@ -1,9 +1,4 @@ -""" -Part of the bsie test suite. -A copy of the license is provided with the project. -Author: Matthias Baumgartner, 2022 -""" # standard imports import os import unittest diff --git a/test/utils/filematcher/test_matcher.py b/test/utils/filematcher/test_matcher.py index c3cccee..88e96c2 100644 --- a/test/utils/filematcher/test_matcher.py +++ b/test/utils/filematcher/test_matcher.py @@ -1,9 +1,4 @@ -""" -Part of the bsie test suite. -A copy of the license is provided with the project. -Author: Matthias Baumgartner, 2022 -""" # standard imports import os import stat diff --git a/test/utils/filematcher/test_parser.py b/test/utils/filematcher/test_parser.py index c594747..536db00 100644 --- a/test/utils/filematcher/test_parser.py +++ b/test/utils/filematcher/test_parser.py @@ -1,9 +1,4 @@ -""" -Part of the bsie test suite. -A copy of the license is provided with the project. -Author: Matthias Baumgartner, 2022 -""" # standard imports import unittest diff --git a/test/utils/test_loading.py b/test/utils/test_loading.py index 58ff166..b8773ab 100644 --- a/test/utils/test_loading.py +++ b/test/utils/test_loading.py @@ -1,9 +1,4 @@ -""" -Part of the bsie test suite. -A copy of the license is provided with the project. -Author: Matthias Baumgartner, 2022 -""" # standard imports import unittest diff --git a/test/utils/test_node.py b/test/utils/test_node.py index 1dcd0ed..918ce42 100644 --- a/test/utils/test_node.py +++ b/test/utils/test_node.py @@ -1,9 +1,4 @@ -""" -Part of the bsie test suite. -A copy of the license is provided with the project. -Author: Matthias Baumgartner, 2022 -""" # standard imports import unittest -- cgit v1.2.3 From 70d77819a84c73292825b81f952e162bb30753d7 Mon Sep 17 00:00:00 2001 From: Matthias Baumgartner Date: Thu, 2 Mar 2023 08:56:57 +0100 Subject: reader error: untangle generic from unsupported format errors --- bsie/reader/exif.py | 2 +- bsie/reader/preview/_pillow.py | 2 +- bsie/utils/errors.py | 2 +- test/reader/image/test_raw_image.py | 4 ++-- test/reader/preview/test_pillow.py | 6 +++++- test/reader/test_exif.py | 13 +++++++++++-- test/reader/testimage_exif_corrupted.jpg | Bin 0 -> 551 bytes 7 files changed, 21 insertions(+), 8 deletions(-) create mode 100644 test/reader/testimage_exif_corrupted.jpg diff --git a/bsie/reader/exif.py b/bsie/reader/exif.py index 8c74462..2d0428b 100644 --- a/bsie/reader/exif.py +++ b/bsie/reader/exif.py @@ -38,7 +38,7 @@ class Exif(base.Reader): img = pyexiv2.Image(path) # read metadata return img.read_exif() - except TypeError as err: + except (TypeError, OSError, RuntimeError) as err: raise errors.ReaderError(path) from err ## EOF ## diff --git a/bsie/reader/preview/_pillow.py b/bsie/reader/preview/_pillow.py index 15c1c6d..2b797c6 100644 --- a/bsie/reader/preview/_pillow.py +++ b/bsie/reader/preview/_pillow.py @@ -33,7 +33,7 @@ class PillowPreviewReader(base.Reader): except PIL.UnidentifiedImageError as err: # failed to open, skip file raise errors.UnsupportedFileFormatError(path) from err - except IOError as err: + except OSError as err: raise errors.ReaderError(path) from err # EOF ## diff --git a/bsie/utils/errors.py b/bsie/utils/errors.py index e71fc60..7c7e6ed 100644 --- a/bsie/utils/errors.py +++ b/bsie/utils/errors.py @@ -38,7 +38,7 @@ class UnreachableError(ProgrammingError): class ParserError(_BSIEError): """Failed to parse due to invalid syntax or structures.""" -class UnsupportedFileFormatError(ReaderError): +class UnsupportedFileFormatError(_BSIEError): """Failed to read a file format.""" ## EOF ## diff --git a/test/reader/image/test_raw_image.py b/test/reader/image/test_raw_image.py index 0a5ed63..3b240d0 100644 --- a/test/reader/image/test_raw_image.py +++ b/test/reader/image/test_raw_image.py @@ -32,10 +32,10 @@ class TestRawImage(unittest.TestCase): #self.assertEqual(img.getdata().getpixel((0, 0)), (0, 0, 0)) img.close() # raises exception when image cannot be read + self.assertRaises(errors.UnsupportedFileFormatError, rdr, + os.path.join(os.path.dirname(__file__), 'testimage.jpg')) self.assertRaises(errors.ReaderError, rdr, os.path.join(os.path.dirname(__file__), 'invalid.nef')) - self.assertRaises(errors.ReaderError, rdr, - os.path.join(os.path.dirname(__file__), 'testimage.jpg')) diff --git a/test/reader/preview/test_pillow.py b/test/reader/preview/test_pillow.py index d213bbb..20f08ec 100644 --- a/test/reader/preview/test_pillow.py +++ b/test/reader/preview/test_pillow.py @@ -20,11 +20,15 @@ class TestPillowPreviewReader(unittest.TestCase): def test_call(self): rdr = PillowPreviewReader() # raises exception when image cannot be read - self.assertRaises(errors.ReaderError, rdr, + self.assertRaises(errors.UnsupportedFileFormatError, rdr, os.path.join(os.path.dirname(__file__), 'invalid.jpg')) + # raises exception when image cannot be read + self.assertRaises(errors.ReaderError, rdr, + os.path.join(os.path.dirname(__file__), 'inexistent.jpg')) # raises exception when image has invalid type self.assertRaises(errors.UnsupportedFileFormatError, rdr, os.path.join(os.path.dirname(__file__), 'invalid.foo')) + # proper file produces a generator gen = rdr(os.path.join(os.path.dirname(__file__), 'testimage.jpg')) self.assertIsInstance(gen, partial) diff --git a/test/reader/test_exif.py b/test/reader/test_exif.py index b2bf843..de6e801 100644 --- a/test/reader/test_exif.py +++ b/test/reader/test_exif.py @@ -3,6 +3,9 @@ import os import unittest +# external imports +import pyexiv2 + # bsie imports from bsie.utils import errors @@ -16,9 +19,15 @@ class TestExif(unittest.TestCase): def test_call(self): rdr = Exif() # discards non-image files - self.assertRaises(errors.UnsupportedFileFormatError, rdr, 'invalid.doc') + self.assertRaises(errors.UnsupportedFileFormatError, rdr, + os.path.join(os.path.dirname(__file__), 'invalid.doc')) + # raises on invalid image files + self.assertRaises(errors.UnsupportedFileFormatError, rdr, + os.path.join(os.path.dirname(__file__), 'invalid.jpg')) # raises on invalid image files - self.assertRaises(errors.ReaderError, rdr, 'invalid.jpg') + pyexiv2.set_log_level(3) # suppress log message + self.assertRaises(errors.ReaderError, rdr, + os.path.join(os.path.dirname(__file__), 'testimage_exif_corrupted.jpg')) # returns dict with exif info self.assertDictEqual(rdr(os.path.join(os.path.dirname(__file__), 'testimage_exif.jpg')), { 'Exif.Image.Artist': 'nobody', diff --git a/test/reader/testimage_exif_corrupted.jpg b/test/reader/testimage_exif_corrupted.jpg new file mode 100644 index 0000000..e51a9dc Binary files /dev/null and b/test/reader/testimage_exif_corrupted.jpg differ -- cgit v1.2.3 From ba6329bbe14c832d42773dee2fe30bd7669ca255 Mon Sep 17 00:00:00 2001 From: Matthias Baumgartner Date: Thu, 2 Mar 2023 08:58:29 +0100 Subject: various minor fixes --- .pylintrc | 17 +++++++-- bsie/apps/__init__.py | 2 +- bsie/apps/_loader.py | 4 +-- bsie/extractor/image/photometrics.py | 37 +++++++++----------- doc/source/installation.rst | 2 ++ test/apps/test_main.py | 57 +++++++++++++++++++++++++++++++ test/extractor/image/test_photometrics.py | 10 ++++++ test/reader/preview/test_pg.py | 1 + 8 files changed, 105 insertions(+), 25 deletions(-) create mode 100644 test/apps/test_main.py diff --git a/.pylintrc b/.pylintrc index 1b34854..576e81a 100644 --- a/.pylintrc +++ b/.pylintrc @@ -145,6 +145,21 @@ logging-format-style=old +[MESSAGES CONTROL] + +# disable similarities check +disable=raw-checker-failed, + bad-inline-option, + locally-disabled, + file-ignored, + suppressed-message, + useless-suppression, + deprecated-pragma, + use-symbolic-message-instead, + duplicate-code + + + [MISCELLANEOUS] # List of note tags to take in consideration, separated by a comma. @@ -188,6 +203,4 @@ ignore-none=no callbacks=clbk,callback - - # Disable: R1735 (use-dict-literal) diff --git a/bsie/apps/__init__.py b/bsie/apps/__init__.py index cec8f84..2fe4795 100644 --- a/bsie/apps/__init__.py +++ b/bsie/apps/__init__.py @@ -33,7 +33,7 @@ def main(argv=None): parser = argparse.ArgumentParser(description=main.__doc__, prog='bsie') # version parser.add_argument('--version', action='version', - version='%(prog)s version {}.{}.{}'.format(*bsie.version_info)) + version='%(prog)s version {}.{}.{}'.format(*bsie.version_info)) # pylint: disable=C0209 # application selection parser.add_argument('app', choices=apps.keys(), help='Select the application to run.') diff --git a/bsie/apps/_loader.py b/bsie/apps/_loader.py index 36dd8a6..6411f10 100644 --- a/bsie/apps/_loader.py +++ b/bsie/apps/_loader.py @@ -16,8 +16,8 @@ DEFAULT_CONFIG_FILE = 'default_config.yaml' # exports __all__: typing.Sequence[str] = ( - 'load', 'DEFAULT_CONFIG_FILE', + 'load_pipeline', ) @@ -26,7 +26,7 @@ __all__: typing.Sequence[str] = ( def load_pipeline(path: str) -> Pipeline: """Load a pipeline according to a config at *path*.""" # load config file - with open(path, 'rt') as ifile: + with open(path, 'rt', encoding='utf-8') as ifile: cfg = yaml.safe_load(ifile) # reader builder diff --git a/bsie/extractor/image/photometrics.py b/bsie/extractor/image/photometrics.py index 525f207..c5254ab 100644 --- a/bsie/extractor/image/photometrics.py +++ b/bsie/extractor/image/photometrics.py @@ -20,7 +20,7 @@ __all__: typing.Sequence[str] = ( def _gps_to_dec(coords: typing.Tuple[float, float, float]) -> float: """Convert GPS coordinates from exif to float.""" # unpack args - deg, min, sec = coords + deg, min, sec = coords # pylint: disable=redefined-builtin # min # convert to float deg = float(Fraction(deg)) min = float(Fraction(min)) @@ -29,9 +29,8 @@ def _gps_to_dec(coords: typing.Tuple[float, float, float]) -> float: if float(sec) > 0: # format is deg+min+sec return (float(deg) * 3600 + float(min) * 60 + float(sec)) / 3600 - else: - # format is deg+min - return float(deg) + float(min) / 60 + # format is deg+min + return float(deg) + float(min) / 60 class Exif(base.Extractor): @@ -124,20 +123,19 @@ class Exif(base.Extractor): # produce triple yield subject, pred, value - def _date(self, content: dict): # FIXME: Return type annotation - raise NotImplementedError() - #date_keys = ( - # 'Exif.Photo.DateTimeOriginal', - # 'Exif.Photo.DateTimeDigitized', - # 'Exif.Image.DateTime', - # ) - #for key in date_keys: - # if key in content: - # dt = content[key].value - # if dt.tzinfo is None: - # dt = dt.replace(tzinfo=ttime.NoTimeZone) - # return dt - #return None + #def _date(self, content: dict): # FIXME: Return type annotation + # date_keys = ( + # 'Exif.Photo.DateTimeOriginal', + # 'Exif.Photo.DateTimeDigitized', + # 'Exif.Image.DateTime', + # ) + # for key in date_keys: + # if key in content: + # dt = content[key].value + # if dt.tzinfo is None: + # dt = dt.replace(tzinfo=ttime.NoTimeZone) + # return dt + # return None ## photometrics @@ -189,8 +187,7 @@ class Exif(base.Extractor): if width is not None and height is not None and ori is not None: if ori <= 4: return 'landscape' if width >= height else 'portrait' - else: - return 'portrait' if width >= height else 'landscape' + return 'portrait' if width >= height else 'landscape' return None diff --git a/doc/source/installation.rst b/doc/source/installation.rst index 42b1e4e..b634457 100644 --- a/doc/source/installation.rst +++ b/doc/source/installation.rst @@ -40,6 +40,8 @@ For development, you also need to install some additional dependencies:: # code style discipline pip install mypy coverage pylint + # external type annotations for pyyaml + pip install types-PyYAML # documentation pip install sphinx sphinx-copybutton furo diff --git a/test/apps/test_main.py b/test/apps/test_main.py new file mode 100644 index 0000000..a1d8a49 --- /dev/null +++ b/test/apps/test_main.py @@ -0,0 +1,57 @@ + +# standard imports +import contextlib +import io +import json +import os +import tempfile +import unittest +import yaml + +# objects to test +from bsie.apps import main + + +## code ## + +class TestMain(unittest.TestCase): + def setUp(self): + config = { + 'ReaderBuilder': {}, + 'ExtractorBuilder': [ + {'bsie.extractor.generic.stat.Stat': {}}, + {'bsie.extractor.generic.path.Path': {}}, + ] + } + # create config file + _, self.config_path = tempfile.mkstemp(prefix='bsie-test-', suffix='.yaml') + with open(self.config_path, 'wt') as cfile: + yaml.dump(config, cfile) + + def tearDown(self): + if os.path.exists(self.config_path): + os.unlink(self.config_path) + + def test_main(self): + # must at least pass an app + with contextlib.redirect_stderr(io.StringIO()): + self.assertRaises(SystemExit, main, []) + # app takes over + with contextlib.redirect_stderr(io.StringIO()): + self.assertRaises(SystemExit, main, ['info']) + outbuf = io.StringIO() + with contextlib.redirect_stdout(outbuf): + main(['info', '--config', self.config_path, 'predicates']) + self.assertEqual(set(outbuf.getvalue().strip().split('\n')), { + 'http://bsfs.ai/schema/Entity#filename', + 'http://bsfs.ai/schema/Entity#filesize', + 'http://bsfs.ai/schema/Predicate', + }) + + +## main ## + +if __name__ == '__main__': + unittest.main() + +## EOF ## diff --git a/test/extractor/image/test_photometrics.py b/test/extractor/image/test_photometrics.py index 0e0261b..fb219e2 100644 --- a/test/extractor/image/test_photometrics.py +++ b/test/extractor/image/test_photometrics.py @@ -95,6 +95,16 @@ class TestExif(unittest.TestCase): self.assertSetEqual(set(ext.extract(node, content, {ext.schema.predicate(ns.bse.longitude)})), {(node, ext.schema.predicate(ns.bse.longitude), 7.875)}) + # check orientation label + self.assertSetEqual(set(ext.extract( + node, { + 'Exif.Photo.PixelXDimension': '4288', + 'Exif.Photo.PixelYDimension': '2848', + 'Exif.Image.Orientation': '5', + }, + {ext.schema.predicate(ns.bse.orientation_label)})), + {(node, ext.schema.predicate(ns.bse.orientation_label), 'portrait')}) + # can pass multiple principals self.assertSetEqual(set(ext.extract(node, content, { ext.schema.predicate(ns.bse.exposure), diff --git a/test/reader/preview/test_pg.py b/test/reader/preview/test_pg.py index 381344f..30095c5 100644 --- a/test/reader/preview/test_pg.py +++ b/test/reader/preview/test_pg.py @@ -52,6 +52,7 @@ class TestPreviewGeneratorReader(unittest.TestCase): self.assertEqual(sum(img.getdata()), 20258) # cleanup img.close() + del rdr # can define a cache dir pg_dir = tempfile.mkdtemp(prefix='bsie-test') -- cgit v1.2.3 From 2f0f28a0c5490f53e71e55a3bc4667bbeeae49b1 Mon Sep 17 00:00:00 2001 From: Matthias Baumgartner Date: Thu, 2 Mar 2023 09:23:18 +0100 Subject: readme and changelog --- CHANGELOG.md | 20 ++++++++++++++++++++ README | 56 -------------------------------------------------------- README.md | 55 +++++++++++++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 75 insertions(+), 56 deletions(-) create mode 100644 CHANGELOG.md delete mode 100644 README create mode 100644 README.md diff --git a/CHANGELOG.md b/CHANGELOG.md new file mode 100644 index 0000000..1240091 --- /dev/null +++ b/CHANGELOG.md @@ -0,0 +1,20 @@ + +# Changelog + +## 0.23.03 (Initial release) + +### Added + +- Information extraction pipeline essentials +- Filematcher to check the file type and format +- Index app to run the IE pipeline +- Initial documentation +- Basic extractors + - Constant + - Filename + - Filesize + - Previews +- Image extractors + - Exif + - Regionally dominant colors + diff --git a/README b/README deleted file mode 100644 index 3326196..0000000 --- a/README +++ /dev/null @@ -1,56 +0,0 @@ - -Black Star Information Extraction -================================= - - -### Developer tools setup - -#### Test coverage (coverage) - -Resources: -* https://coverage.readthedocs.io/en/6.5.0/index.html -* https://nedbatchelder.com/blog/200710/flaws_in_coverage_measurement.html - -Commands: -$ pip install coverage -$ coverage run ; coverage html ; xdg-open .htmlcov/index.html - - - -#### Static code analysis (pylint) - -Resources: -* https://github.com/PyCQA/pylint -* https://pylint.org/ -* https://pylint.pycqa.org/en/latest/user_guide/messages/messages_overview.html#messages-overview - -Commands: -$ pip install pylint -$ pylint bsie - - - -#### Type analysis (mypy) - -Resources: -* https://github.com/python/mypy -* https://mypy.readthedocs.io/en/stable/ - -Commands: -$ pip install mypy -$ mypy - - - -#### Documentation (sphinx) - -Resources: -* -* - -Commands: -$ pip install ... -$ - - - diff --git a/README.md b/README.md new file mode 100644 index 0000000..85cb68f --- /dev/null +++ b/README.md @@ -0,0 +1,55 @@ + +# Black Star Information Extraction + +The Black Star Information Extraction (BSIE) package provides a pipeline +to extract metadata and content-derived features from files and stores +that information in a BSFS storage. + +## Installation + +You can install BSIE via pip: + + $ pip install --extra-index-url https://pip.bsfs.io bsie + + +## Development + +Set up a virtual environment: + + $ virtualenv env + $ source env/bin/activate + +Install bsie as editable from the git repository: + + $ git clone https://git.bsfs.io/bsie.git + $ cd bsie + $ pip install -e . + +Install the following additional packages besides BSIE: + + $ pip install coverage mypy pylint + $ pip install rdflib requests types-PyYAML + $ pip install sphinx sphinx-copybutton furo + $ pip install build + +To ensure code style discipline, run the following commands: + + $ coverage run ; coverage html ; xdg-open .htmlcov/index.html + $ pylint bsie + $ mypy + +To build the package, do: + + $ python -m build + +To run only the tests (without coverage), run the following command from the **test folder**: + + $ python -m unittest + +To build the documentation, run the following commands from the **doc folder**: + + $ sphinx-apidoc -f -o source/api ../bsie/ --module-first -d 1 --separate + $ make html + $ xdg-open build/html/index.html + + -- cgit v1.2.3 From d2052e77210e0ace2c5f06e48afe2a8acb412965 Mon Sep 17 00:00:00 2001 From: Matthias Baumgartner Date: Sat, 4 Mar 2023 13:41:13 +0100 Subject: namespace refactoring and cleanup --- bsie/extractor/base.py | 33 +++-- bsie/extractor/generic/path.py | 2 +- bsie/extractor/generic/stat.py | 2 +- bsie/extractor/image/colors_spatial.py | 12 +- bsie/extractor/image/photometrics.py | 24 ++-- bsie/extractor/preview.py | 22 +-- bsie/lib/naming_policy.py | 8 +- bsie/lib/pipeline.py | 2 +- bsie/utils/namespaces.py | 28 ++-- test/apps/test_index.py | 204 ++++++++++++++-------------- test/apps/test_info.py | 36 ++--- test/apps/test_loader.py | 22 +-- test/apps/test_main.py | 6 +- test/extractor/generic/test_constant.py | 16 +-- test/extractor/generic/test_path.py | 6 +- test/extractor/generic/test_stat.py | 6 +- test/extractor/image/test_colors_spatial.py | 12 +- test/extractor/test_base.py | 8 +- test/extractor/test_builder.py | 16 +-- test/extractor/test_preview.py | 24 ++-- test/lib/test_bsie.py | 28 ++-- test/lib/test_builder.py | 4 +- test/lib/test_naming_policy.py | 40 +++--- test/lib/test_pipeline.py | 12 +- test/utils/test_node.py | 18 +-- 25 files changed, 305 insertions(+), 286 deletions(-) diff --git a/bsie/extractor/base.py b/bsie/extractor/base.py index 3759c68..f92d7cc 100644 --- a/bsie/extractor/base.py +++ b/bsie/extractor/base.py @@ -24,27 +24,32 @@ SCHEMA_PREAMBLE = ''' prefix schema: # common bsfs prefixes - prefix bsfs: - prefix bse: - prefix bsp: + prefix bsfs: + prefix bsl: + prefix bsa: + prefix bsd: + + prefix bsie: + prefix bsn: + prefix bse: + prefix bsp: # default definitions - bsfs:Array rdfs:subClassOf bsfs:Literal . - bsfs:Number rdfs:subClassOf bsfs:Literal . - bsfs:Time rdfs:subClassOf bsfs:Literal . - bsfs:Feature rdfs:subClassOf bsfs:Array ; + bsl:Array rdfs:subClassOf bsfs:Literal . + bsl:Number rdfs:subClassOf bsfs:Literal . + bsl:Time rdfs:subClassOf bsfs:Literal . + bsa:Feature rdfs:subClassOf bsl:Array ; bsfs:dimension "1"^^xsd:integer ; - bsfs:dtype bsfs:f16 ; - bsfs:distance bsfs:euclidean . + bsfs:dtype ; + bsfs:distance bsd:euclidean . # essential nodes - bsfs:Entity rdfs:subClassOf bsfs:Node . - bsfs:File rdfs:subClassOf bsfs:Entity . + bsn:Entity rdfs:subClassOf bsfs:Node . # common definitions xsd:string rdfs:subClassOf bsfs:Literal . - xsd:integer rdfs:subClassOf bsfs:Number . - xsd:float rdfs:subClassOf bsfs:Number . + xsd:integer rdfs:subClassOf bsl:Number . + xsd:float rdfs:subClassOf bsl:Number . ''' @@ -90,7 +95,7 @@ class Extractor(abc.ABC): @property def principals(self) -> typing.Iterator[bsfs.schema.Predicate]: """Return the principal predicates, i.e., relations from/to the extraction subject.""" - ent = self.schema.node(ns.bsfs.Entity) + ent = self.schema.node(ns.bsn.Entity) return ( pred for pred diff --git a/bsie/extractor/generic/path.py b/bsie/extractor/generic/path.py index 8b01933..00c1121 100644 --- a/bsie/extractor/generic/path.py +++ b/bsie/extractor/generic/path.py @@ -26,7 +26,7 @@ class Path(base.Extractor): def __init__(self): super().__init__(bsfs.schema.from_string(base.SCHEMA_PREAMBLE + ''' bse:filename rdfs:subClassOf bsfs:Predicate ; - rdfs:domain bsfs:File ; + rdfs:domain bsn:Entity ; rdfs:range xsd:string ; rdfs:label "File name"^^xsd:string ; schema:description "Filename of entity in some filesystem."^^xsd:string ; diff --git a/bsie/extractor/generic/stat.py b/bsie/extractor/generic/stat.py index 50821a7..92b51f3 100644 --- a/bsie/extractor/generic/stat.py +++ b/bsie/extractor/generic/stat.py @@ -29,7 +29,7 @@ class Stat(base.Extractor): def __init__(self): super().__init__(bsfs.schema.from_string(base.SCHEMA_PREAMBLE + ''' bse:filesize rdfs:subClassOf bsfs:Predicate ; - rdfs:domain bsfs:File ; + rdfs:domain bsn:Entity ; rdfs:range xsd:integer ; rdfs:label "File size"^^xsd:string ; schema:description "File size of entity in some filesystem."^^xsd:string ; diff --git a/bsie/extractor/image/colors_spatial.py b/bsie/extractor/image/colors_spatial.py index 34cd615..e6661a9 100644 --- a/bsie/extractor/image/colors_spatial.py +++ b/bsie/extractor/image/colors_spatial.py @@ -14,8 +14,7 @@ from bsie.utils import bsfs, node, ns from .. import base # constants -FEATURE_NAME = ns.bsf + 'ColorsSpatial' -PREDICATE_NAME = ns.bse + 'colors_spatial' +FEATURE_NAME = ns.bsf.ColorsSpatial() # exports __all__: typing.Sequence[str] = ( @@ -58,16 +57,17 @@ class ColorsSpatial(base.Extractor): 'exp': exp, }) # determine symbol names - instance_name = FEATURE_NAME[uuid] - predicate_name = PREDICATE_NAME[uuid] + instance_name = getattr(FEATURE_NAME, uuid) + predicate_name = getattr(ns.bse, 'colors_spatial_' + uuid) # get vector dimension dimension = self.dimension(width, height, exp) # initialize parent with the schema super().__init__(bsfs.schema.from_string(base.SCHEMA_PREAMBLE + f''' - <{FEATURE_NAME}> rdfs:subClassOf bsfs:Feature ; + <{FEATURE_NAME}> rdfs:subClassOf bsa:Feature ; # annotations rdfs:label "Spatially dominant colors"^^xsd:string ; schema:description "Domiant colors of subregions in an image."^^xsd:string ; + bsfs:distance ; bsfs:dtype xsd:integer . <{instance_name}> rdfs:subClassOf <{FEATURE_NAME}> ; @@ -78,7 +78,7 @@ class ColorsSpatial(base.Extractor): <{FEATURE_NAME}/args#exp> "{exp}"^^xsd:float . <{predicate_name}> rdfs:subClassOf bsfs:Predicate ; - rdfs:domain bsfs:File ; + rdfs:domain bsn:Entity ; rdfs:range <{instance_name}> ; bsfs:unique "true"^^xsd:boolean . diff --git a/bsie/extractor/image/photometrics.py b/bsie/extractor/image/photometrics.py index c5254ab..42eb3c8 100644 --- a/bsie/extractor/image/photometrics.py +++ b/bsie/extractor/image/photometrics.py @@ -41,51 +41,51 @@ class Exif(base.Extractor): def __init__(self): super().__init__(bsfs.schema.from_string(base.SCHEMA_PREAMBLE + ''' #bse:t_capture rdfs:subClassOf bsfs:Predicate ; - # rdfs:domain bsfs:File ; + # rdfs:domain bsn:Entity ; # rdfs:range xsd:float ; # bsfs:unique "true"^^xsd:boolean . bse:exposure rdfs:subClassOf bsfs:Predicate ; - rdfs:domain bsfs:File ; + rdfs:domain bsn:Entity ; rdfs:range xsd:float ; bsfs:unique "true"^^xsd:boolean . bse:aperture rdfs:subClassOf bsfs:Predicate ; - rdfs:domain bsfs:File ; + rdfs:domain bsn:Entity ; rdfs:range xsd:float ; bsfs:unique "true"^^xsd:boolean . bse:iso rdfs:subClassOf bsfs:Predicate ; - rdfs:domain bsfs:File ; + rdfs:domain bsn:Entity ; rdfs:range xsd:integer ; bsfs:unique "true"^^xsd:boolean . bse:focal_length rdfs:subClassOf bsfs:Predicate ; - rdfs:domain bsfs:File ; + rdfs:domain bsn:Entity ; rdfs:range xsd:float ; bsfs:unique "true"^^xsd:boolean . bse:width rdfs:subClassOf bsfs:Predicate ; - rdfs:domain bsfs:File ; + rdfs:domain bsn:Entity ; rdfs:range xsd:integer ; bsfs:unique "true"^^xsd:boolean . bse:height rdfs:subClassOf bsfs:Predicate ; - rdfs:domain bsfs:File ; + rdfs:domain bsn:Entity ; rdfs:range xsd:integer ; bsfs:unique "true"^^xsd:boolean . bse:orientation rdfs:subClassOf bsfs:Predicate ; - rdfs:domain bsfs:File ; + rdfs:domain bsn:Entity ; rdfs:range xsd:integer ; bsfs:unique "true"^^xsd:boolean . bse:orientation_label rdfs:subClassOf bsfs:Predicate ; - rdfs:domain bsfs:File ; + rdfs:domain bsn:Entity ; rdfs:range xsd:string ; bsfs:unique "true"^^xsd:boolean . bse:altitude rdfs:subClassOf bsfs:Predicate ; - rdfs:domain bsfs:File ; + rdfs:domain bsn:Entity ; rdfs:range xsd:float ; bsfs:unique "true"^^xsd:boolean . bse:latitude rdfs:subClassOf bsfs:Predicate ; - rdfs:domain bsfs:File ; + rdfs:domain bsn:Entity ; rdfs:range xsd:float ; bsfs:unique "true"^^xsd:boolean . bse:longitude rdfs:subClassOf bsfs:Predicate ; - rdfs:domain bsfs:File ; + rdfs:domain bsn:Entity ; rdfs:range xsd:float ; bsfs:unique "true"^^xsd:boolean . ''')) diff --git a/bsie/extractor/preview.py b/bsie/extractor/preview.py index 7e4a171..145a01a 100644 --- a/bsie/extractor/preview.py +++ b/bsie/extractor/preview.py @@ -28,28 +28,30 @@ class Preview(base.Extractor): def __init__(self, max_sides: typing.Iterable[int]): super().__init__(bsfs.schema.from_string(base.SCHEMA_PREAMBLE + ''' - bsfs:Preview rdfs:subClassOf bsfs:Node . - bsfs:BinaryBlob rdfs:subClassOf bsfs:Literal . - bsfs:JPEG rdfs:subClassOf bsfs:BinaryBlob . + + + bsn:Preview rdfs:subClassOf bsfs:Node . + bsl:BinaryBlob rdfs:subClassOf bsfs:Literal . + rdfs:subClassOf bsl:BinaryBlob . bse:preview rdfs:subClassOf bsfs:Predicate ; - rdfs:domain bsfs:File ; - rdfs:range bsfs:Preview ; + rdfs:domain bsn:Entity ; + rdfs:range bsn:Preview ; bsfs:unique "false"^^xsd:boolean . bsp:width rdfs:subClassOf bsfs:Predicate ; - rdfs:domain bsfs:Preview ; + rdfs:domain bsn:Preview ; rdfs:range xsd:integer ; bsfs:unique "true"^^xsd:boolean . bsp:height rdfs:subClassOf bsfs:Predicate ; - rdfs:domain bsfs:Preview ; + rdfs:domain bsn:Preview ; rdfs:range xsd:integer ; bsfs:unique "true"^^xsd:boolean . bsp:asset rdfs:subClassOf bsfs:Predicate ; - rdfs:domain bsfs:Preview ; - rdfs:range bsfs:JPEG ; + rdfs:domain bsn:Preview ; + rdfs:range ; bsfs:unique "true"^^xsd:boolean . ''')) @@ -80,7 +82,7 @@ class Preview(base.Extractor): buffer = io.BytesIO() img.save(buffer, format='jpeg') # create a preview node - preview = node.Node(ns.bsfs.Preview, + preview = node.Node(ns.bsn.Preview, ucid=bsfs.uuid.UCID.from_bytes(buffer.getvalue()), size=max_side, source=subject, diff --git a/bsie/lib/naming_policy.py b/bsie/lib/naming_policy.py index c99f8c8..9b9a45d 100644 --- a/bsie/lib/naming_policy.py +++ b/bsie/lib/naming_policy.py @@ -80,9 +80,9 @@ class DefaultNamingPolicy(NamingPolicy): def handle_node(self, node: Node) -> Node: if node.uri is not None: return node - if node.node_type == ns.bsfs.File: + if node.node_type == ns.bsn.Entity : return self.name_file(node) - if node.node_type == ns.bsfs.Preview: + if node.node_type == ns.bsn.Preview: return self.name_preview(node) raise errors.ProgrammingError('no naming policy available for {node.node_type}') @@ -92,7 +92,7 @@ class DefaultNamingPolicy(NamingPolicy): fragment = node.hints['ucid'] else: # random name fragment = self._uuid() - node.uri = (self._prefix + 'file')[fragment] + node.uri = getattr(self._prefix.file(), fragment) return node def name_preview(self, node: Node) -> Node: @@ -109,7 +109,7 @@ class DefaultNamingPolicy(NamingPolicy): fragment = self._uuid() if 'size' in node.hints: # append size fragment += '_s' + str(node.hints['size']) - node.uri = (self._prefix + 'preview')[fragment] + node.uri = getattr(self._prefix.preview(), fragment) return node ## EOF ## diff --git a/bsie/lib/pipeline.py b/bsie/lib/pipeline.py index 128eecc..30fd6fd 100644 --- a/bsie/lib/pipeline.py +++ b/bsie/lib/pipeline.py @@ -104,7 +104,7 @@ class Pipeline(): rdr2ext[rdr].add(ext) # create subject for file - subject = node.Node(ns.bsfs.File, + subject = node.Node(ns.bsn.Entity, ucid=bsfs.uuid.UCID.from_path(path), ) diff --git a/bsie/utils/namespaces.py b/bsie/utils/namespaces.py index 310aa3f..4a66048 100644 --- a/bsie/utils/namespaces.py +++ b/bsie/utils/namespaces.py @@ -6,19 +6,31 @@ import typing # inner-module imports from . import bsfs as _bsfs -# constants -bse = _bsfs.Namespace('http://bsfs.ai/schema/Entity') -bsf = _bsfs.Namespace('http://ie.bsfs.ai/schema/Feature') -bsfs = _bsfs.Namespace('http://bsfs.ai/schema', fsep='/') -bsm = _bsfs.Namespace('http://bsfs.ai/schema/Meta') -bsp = _bsfs.Namespace('http://bsfs.ai/schema/Preview') -xsd = _bsfs.Namespace('http://www.w3.org/2001/XMLSchema') +# generic namespaces +xsd = _bsfs.Namespace('http://www.w3.org/2001/XMLSchema')() + +# core bsfs/bsie namespaces +bsfs = _bsfs.Namespace('https://schema.bsfs.io/core') +bsie = _bsfs.Namespace('https://schema.bsfs.io/ie') + +# auxiliary namespaces +bsd = bsie.distance() +bse = bsie.Node.Entity() +bsf = bsie.Literal.Array.Feature +bsl = bsfs.Literal +bsn = bsie.Node +bsp = bsie.Node.Preview() # export __all__: typing.Sequence[str] = ( + 'bsd', 'bse', + 'bsf', 'bsfs', - 'bsm', + 'bsie', + 'bsl', + 'bsl', + 'bsn', 'bsp', 'xsd', ) diff --git a/test/apps/test_index.py b/test/apps/test_index.py index 03209fe..6927044 100644 --- a/test/apps/test_index.py +++ b/test/apps/test_index.py @@ -31,11 +31,11 @@ class TestIndex(unittest.TestCase): {'bsie.extractor.generic.constant.Constant': { 'schema': ''' bse:author rdfs:subClassOf bsfs:Predicate ; - rdfs:domain bsfs:Entity ; + rdfs:domain bsn:Entity ; rdfs:range xsd:string ; bsfs:unique "true"^^xsd:boolean . ''', - 'tuples': [['http://bsfs.ai/schema/Entity#author', 'Me, myself, and I']], + 'tuples': [['https://schema.bsfs.io/ie/Node/Entity#author', 'Me, myself, and I']], }}, {'bsie.extractor.image.colors_spatial.ColorsSpatial': { 'width': 2, @@ -77,47 +77,47 @@ class TestIndex(unittest.TestCase): pre_preview = 'http://example.com/me/preview#' self.assertTrue(set(bsfs._backend._graph).issuperset({ # files and properties - (rdflib.URIRef(pre_file + '2f4109b40107cc50e0884755a1a961ed126887e49b8dbaf0e146b2e226aa6647'), rdflib.RDF.type, rdflib.URIRef(ns.bsfs.File)), + (rdflib.URIRef(pre_file + '2f4109b40107cc50e0884755a1a961ed126887e49b8dbaf0e146b2e226aa6647'), rdflib.RDF.type, rdflib.URIRef(ns.bsn.Entity)), (rdflib.URIRef(pre_file + '2f4109b40107cc50e0884755a1a961ed126887e49b8dbaf0e146b2e226aa6647'), rdflib.URIRef(ns.bse.author), rdflib.Literal('Me, myself, and I', datatype=rdflib.XSD.string)), (rdflib.URIRef(pre_file + '2f4109b40107cc50e0884755a1a961ed126887e49b8dbaf0e146b2e226aa6647'), rdflib.URIRef(ns.bse.filename), rdflib.Literal('alpha_second', datatype=rdflib.XSD.string)), - (rdflib.URIRef(pre_file + '441f3d10c8ff489fe8e33e639606512f6c463151cc429de7e554b9af670c2ece'), rdflib.RDF.type, rdflib.URIRef(ns.bsfs.File)), + (rdflib.URIRef(pre_file + '441f3d10c8ff489fe8e33e639606512f6c463151cc429de7e554b9af670c2ece'), rdflib.RDF.type, rdflib.URIRef(ns.bsn.Entity)), (rdflib.URIRef(pre_file + '441f3d10c8ff489fe8e33e639606512f6c463151cc429de7e554b9af670c2ece'), rdflib.URIRef(ns.bse.author), rdflib.Literal('Me, myself, and I', datatype=rdflib.XSD.string)), (rdflib.URIRef(pre_file + '441f3d10c8ff489fe8e33e639606512f6c463151cc429de7e554b9af670c2ece'), rdflib.URIRef(ns.bse.filename), rdflib.Literal('omega_second', datatype=rdflib.XSD.string)), - (rdflib.URIRef(pre_file + '69b98ecf7aff3e95b09688ba93331678eb8397817111f674c9558e6dd8f5e871'), rdflib.RDF.type, rdflib.URIRef(ns.bsfs.File)), + (rdflib.URIRef(pre_file + '69b98ecf7aff3e95b09688ba93331678eb8397817111f674c9558e6dd8f5e871'), rdflib.RDF.type, rdflib.URIRef(ns.bsn.Entity)), (rdflib.URIRef(pre_file + '69b98ecf7aff3e95b09688ba93331678eb8397817111f674c9558e6dd8f5e871'), rdflib.URIRef(ns.bse.author), rdflib.Literal('Me, myself, and I', datatype=rdflib.XSD.string)), (rdflib.URIRef(pre_file + '69b98ecf7aff3e95b09688ba93331678eb8397817111f674c9558e6dd8f5e871'), rdflib.URIRef(ns.bse.filename), rdflib.Literal('td_first', datatype=rdflib.XSD.string)), - (rdflib.URIRef(pre_file + '78f7eb7f0d8221cdb2cb26c978fa42a11f75eb87becc768f4474134cb1e06926'), rdflib.RDF.type, rdflib.URIRef(ns.bsfs.File)), + (rdflib.URIRef(pre_file + '78f7eb7f0d8221cdb2cb26c978fa42a11f75eb87becc768f4474134cb1e06926'), rdflib.RDF.type, rdflib.URIRef(ns.bsn.Entity)), (rdflib.URIRef(pre_file + '78f7eb7f0d8221cdb2cb26c978fa42a11f75eb87becc768f4474134cb1e06926'), rdflib.URIRef(ns.bse.author), rdflib.Literal('Me, myself, and I', datatype=rdflib.XSD.string)), (rdflib.URIRef(pre_file + '78f7eb7f0d8221cdb2cb26c978fa42a11f75eb87becc768f4474134cb1e06926'), rdflib.URIRef(ns.bse.filename), rdflib.Literal('testfile', datatype=rdflib.XSD.string)), - (rdflib.URIRef(pre_file + '80818b8ec2ee1919116dba9c8a7e0a4608313cf3b463cd88e9ed77a700dd92d3'), rdflib.RDF.type, rdflib.URIRef(ns.bsfs.File)), + (rdflib.URIRef(pre_file + '80818b8ec2ee1919116dba9c8a7e0a4608313cf3b463cd88e9ed77a700dd92d3'), rdflib.RDF.type, rdflib.URIRef(ns.bsn.Entity)), (rdflib.URIRef(pre_file + '80818b8ec2ee1919116dba9c8a7e0a4608313cf3b463cd88e9ed77a700dd92d3'), rdflib.URIRef(ns.bse.author), rdflib.Literal('Me, myself, and I', datatype=rdflib.XSD.string)), (rdflib.URIRef(pre_file + '80818b8ec2ee1919116dba9c8a7e0a4608313cf3b463cd88e9ed77a700dd92d3'), rdflib.URIRef(ns.bse.filename), rdflib.Literal('bar_first', datatype=rdflib.XSD.string)), - (rdflib.URIRef(pre_file + '976d2ea0e58488678cc7e435fbfadabfb6eb6cf50ad51862f38f73729ed11795'), rdflib.RDF.type, rdflib.URIRef(ns.bsfs.File)), + (rdflib.URIRef(pre_file + '976d2ea0e58488678cc7e435fbfadabfb6eb6cf50ad51862f38f73729ed11795'), rdflib.RDF.type, rdflib.URIRef(ns.bsn.Entity)), (rdflib.URIRef(pre_file + '976d2ea0e58488678cc7e435fbfadabfb6eb6cf50ad51862f38f73729ed11795'), rdflib.URIRef(ns.bse.author), rdflib.Literal('Me, myself, and I', datatype=rdflib.XSD.string)), (rdflib.URIRef(pre_file + '976d2ea0e58488678cc7e435fbfadabfb6eb6cf50ad51862f38f73729ed11795'), rdflib.URIRef(ns.bse.filename), rdflib.Literal('omega_first', datatype=rdflib.XSD.string)), - (rdflib.URIRef(pre_file + '997e2fbb7494a3818ec782d2bc87bf1cffafba6b9c0f658e4a6c18a723e944d3'), rdflib.RDF.type, rdflib.URIRef(ns.bsfs.File)), + (rdflib.URIRef(pre_file + '997e2fbb7494a3818ec782d2bc87bf1cffafba6b9c0f658e4a6c18a723e944d3'), rdflib.RDF.type, rdflib.URIRef(ns.bsn.Entity)), (rdflib.URIRef(pre_file + '997e2fbb7494a3818ec782d2bc87bf1cffafba6b9c0f658e4a6c18a723e944d3'), rdflib.URIRef(ns.bse.author), rdflib.Literal('Me, myself, and I', datatype=rdflib.XSD.string)), (rdflib.URIRef(pre_file + '997e2fbb7494a3818ec782d2bc87bf1cffafba6b9c0f658e4a6c18a723e944d3'), rdflib.URIRef(ns.bse.filename), rdflib.Literal('alpha_first', datatype=rdflib.XSD.string)), - (rdflib.URIRef(pre_file + 'a8af899ecdab60dfaea8ec7f934053624c80a1054539e163f2c7eaa986c2777d'), rdflib.RDF.type, rdflib.URIRef(ns.bsfs.File)), + (rdflib.URIRef(pre_file + 'a8af899ecdab60dfaea8ec7f934053624c80a1054539e163f2c7eaa986c2777d'), rdflib.RDF.type, rdflib.URIRef(ns.bsn.Entity)), (rdflib.URIRef(pre_file + 'a8af899ecdab60dfaea8ec7f934053624c80a1054539e163f2c7eaa986c2777d'), rdflib.URIRef(ns.bse.author), rdflib.Literal('Me, myself, and I', datatype=rdflib.XSD.string)), (rdflib.URIRef(pre_file + 'a8af899ecdab60dfaea8ec7f934053624c80a1054539e163f2c7eaa986c2777d'), rdflib.URIRef(ns.bse.filename), rdflib.Literal('foo_second', datatype=rdflib.XSD.string)), - (rdflib.URIRef(pre_file + 'b8fd7fba818254166a6043195004138ebda6923e012442f819a2c49671136c70'), rdflib.RDF.type, rdflib.URIRef(ns.bsfs.File)), + (rdflib.URIRef(pre_file + 'b8fd7fba818254166a6043195004138ebda6923e012442f819a2c49671136c70'), rdflib.RDF.type, rdflib.URIRef(ns.bsn.Entity)), (rdflib.URIRef(pre_file + 'b8fd7fba818254166a6043195004138ebda6923e012442f819a2c49671136c70'), rdflib.URIRef(ns.bse.author), rdflib.Literal('Me, myself, and I', datatype=rdflib.XSD.string)), (rdflib.URIRef(pre_file + 'b8fd7fba818254166a6043195004138ebda6923e012442f819a2c49671136c70'), rdflib.URIRef(ns.bse.filename), rdflib.Literal('bar_second', datatype=rdflib.XSD.string)), - (rdflib.URIRef(pre_file + 'd43758ace82154a1cc10ca0dfef63cb20dd831f9c87edd6dc06539eefe67371d'), rdflib.RDF.type, rdflib.URIRef(ns.bsfs.File)), + (rdflib.URIRef(pre_file + 'd43758ace82154a1cc10ca0dfef63cb20dd831f9c87edd6dc06539eefe67371d'), rdflib.RDF.type, rdflib.URIRef(ns.bsn.Entity)), (rdflib.URIRef(pre_file + 'd43758ace82154a1cc10ca0dfef63cb20dd831f9c87edd6dc06539eefe67371d'), rdflib.URIRef(ns.bse.author), rdflib.Literal('Me, myself, and I', datatype=rdflib.XSD.string)), (rdflib.URIRef(pre_file + 'd43758ace82154a1cc10ca0dfef63cb20dd831f9c87edd6dc06539eefe67371d'), rdflib.URIRef(ns.bse.filename), rdflib.Literal('foo_first', datatype=rdflib.XSD.string)), - (rdflib.URIRef(pre_file + 'd803187cbf3676ae9d38126270a6152c60431589aa3bb3824baf8954e9c097f1'), rdflib.RDF.type, rdflib.URIRef(ns.bsfs.File)), + (rdflib.URIRef(pre_file + 'd803187cbf3676ae9d38126270a6152c60431589aa3bb3824baf8954e9c097f1'), rdflib.RDF.type, rdflib.URIRef(ns.bsn.Entity)), (rdflib.URIRef(pre_file + 'd803187cbf3676ae9d38126270a6152c60431589aa3bb3824baf8954e9c097f1'), rdflib.URIRef(ns.bse.author), rdflib.Literal('Me, myself, and I', datatype=rdflib.XSD.string)), (rdflib.URIRef(pre_file + 'd803187cbf3676ae9d38126270a6152c60431589aa3bb3824baf8954e9c097f1'), rdflib.URIRef(ns.bse.filename), rdflib.Literal('td_second', datatype=rdflib.XSD.string)), - (rdflib.URIRef(pre_file + 'accb115d266ad60c53cd01a7f7130f245886ce8eaf69bc85319febc11d9fe089'), rdflib.RDF.type, rdflib.URIRef(ns.bsfs.File)), + (rdflib.URIRef(pre_file + 'accb115d266ad60c53cd01a7f7130f245886ce8eaf69bc85319febc11d9fe089'), rdflib.RDF.type, rdflib.URIRef(ns.bsn.Entity)), (rdflib.URIRef(pre_file + 'accb115d266ad60c53cd01a7f7130f245886ce8eaf69bc85319febc11d9fe089'), rdflib.URIRef(ns.bse.author), rdflib.Literal('Me, myself, and I', datatype=rdflib.XSD.string)), (rdflib.URIRef(pre_file + 'accb115d266ad60c53cd01a7f7130f245886ce8eaf69bc85319febc11d9fe089'), rdflib.URIRef(ns.bse.filename), rdflib.Literal('testimage.jpg', datatype=rdflib.XSD.string)), # features - (rdflib.URIRef(pre_file + 'accb115d266ad60c53cd01a7f7130f245886ce8eaf69bc85319febc11d9fe089'), rdflib.URIRef('http://bsfs.ai/schema/Entity/colors_spatial#0658f2234a054e1dd59a14462c89f7733e019160419c796356aa831498bd0a04'), + (rdflib.URIRef(pre_file + 'accb115d266ad60c53cd01a7f7130f245886ce8eaf69bc85319febc11d9fe089'), rdflib.URIRef('https://schema.bsfs.io/ie/Node/Entity#colors_spatial_0658f2234a054e1dd59a14462c89f7733e019160419c796356aa831498bd0a04'), rdflib.Literal( '(91, 127, 121, 94, 138, 167, 163, 134, 190, 138, 170, 156, 121, 142, 159)', - datatype=rdflib.URIRef('http://ie.bsfs.ai/schema/Feature/ColorsSpatial#0658f2234a054e1dd59a14462c89f7733e019160419c796356aa831498bd0a04'))), + datatype=rdflib.URIRef('https://schema.bsfs.io/ie/Literal/Array/Feature/ColorsSpatial#0658f2234a054e1dd59a14462c89f7733e019160419c796356aa831498bd0a04'))), # links to previews (rdflib.URIRef(pre_file + '2f4109b40107cc50e0884755a1a961ed126887e49b8dbaf0e146b2e226aa6647'), rdflib.URIRef(ns.bse.preview), rdflib.URIRef(pre_preview + '79cb8a7e6369361a4f4cb7ff729c1ed3fcf87204769623d6fbd6ebfae601e5c7_s50')), (rdflib.URIRef(pre_file + '441f3d10c8ff489fe8e33e639606512f6c463151cc429de7e554b9af670c2ece'), rdflib.URIRef(ns.bse.preview), rdflib.URIRef(pre_preview + 'a8b3245636074d5370283b690281abda8ffdff12ce8b1af77c8bc0a4c85be860_s50')), @@ -134,53 +134,53 @@ class TestIndex(unittest.TestCase): # preview dimensions (rdflib.URIRef(pre_preview + '2656e303d7218300326df73b64f312d8b37eb980358be27a38b5f63dae259be3_s50'), rdflib.URIRef(ns.bsp.height), rdflib.Literal('33', datatype=rdflib.URIRef('http://www.w3.org/2001/XMLSchema#integer'))), (rdflib.URIRef(pre_preview + '2656e303d7218300326df73b64f312d8b37eb980358be27a38b5f63dae259be3_s50'), rdflib.URIRef(ns.bsp.width), rdflib.Literal('50', datatype=rdflib.URIRef('http://www.w3.org/2001/XMLSchema#integer'))), - (rdflib.URIRef(pre_preview + '2656e303d7218300326df73b64f312d8b37eb980358be27a38b5f63dae259be3_s50'), rdflib.RDF.type, rdflib.URIRef(ns.bsfs.Preview)), + (rdflib.URIRef(pre_preview + '2656e303d7218300326df73b64f312d8b37eb980358be27a38b5f63dae259be3_s50'), rdflib.RDF.type, rdflib.URIRef(ns.bsn.Preview)), (rdflib.URIRef(pre_preview + '26f16643b2570ac5b2d1f8c373d492cb724aae2dd8d71a0b63647838ed651254_s50'), rdflib.URIRef(ns.bsp.height), rdflib.Literal('50', datatype=rdflib.URIRef('http://www.w3.org/2001/XMLSchema#integer'))), (rdflib.URIRef(pre_preview + '26f16643b2570ac5b2d1f8c373d492cb724aae2dd8d71a0b63647838ed651254_s50'), rdflib.URIRef(ns.bsp.width), rdflib.Literal('36', datatype=rdflib.URIRef('http://www.w3.org/2001/XMLSchema#integer'))), - (rdflib.URIRef(pre_preview + '26f16643b2570ac5b2d1f8c373d492cb724aae2dd8d71a0b63647838ed651254_s50'), rdflib.RDF.type, rdflib.URIRef(ns.bsfs.Preview)), + (rdflib.URIRef(pre_preview + '26f16643b2570ac5b2d1f8c373d492cb724aae2dd8d71a0b63647838ed651254_s50'), rdflib.RDF.type, rdflib.URIRef(ns.bsn.Preview)), (rdflib.URIRef(pre_preview + '567049149769e1d02e6af6cfee3991f7cf0cbc935cbf6a566047f40155fb13a8_s50'), rdflib.URIRef(ns.bsp.height), rdflib.Literal('50', datatype=rdflib.URIRef('http://www.w3.org/2001/XMLSchema#integer'))), (rdflib.URIRef(pre_preview + '567049149769e1d02e6af6cfee3991f7cf0cbc935cbf6a566047f40155fb13a8_s50'), rdflib.URIRef(ns.bsp.width), rdflib.Literal('36', datatype=rdflib.URIRef('http://www.w3.org/2001/XMLSchema#integer'))), - (rdflib.URIRef(pre_preview + '567049149769e1d02e6af6cfee3991f7cf0cbc935cbf6a566047f40155fb13a8_s50'), rdflib.RDF.type, rdflib.URIRef(ns.bsfs.Preview)), + (rdflib.URIRef(pre_preview + '567049149769e1d02e6af6cfee3991f7cf0cbc935cbf6a566047f40155fb13a8_s50'), rdflib.RDF.type, rdflib.URIRef(ns.bsn.Preview)), (rdflib.URIRef(pre_preview + '5d1235838c3d501204bb09c2de563d7e4a7fd17b7ec4ff302221c0e88c4741aa_s50'), rdflib.URIRef(ns.bsp.height), rdflib.Literal('50', datatype=rdflib.URIRef('http://www.w3.org/2001/XMLSchema#integer'))), (rdflib.URIRef(pre_preview + '5d1235838c3d501204bb09c2de563d7e4a7fd17b7ec4ff302221c0e88c4741aa_s50'), rdflib.URIRef(ns.bsp.width), rdflib.Literal('36', datatype=rdflib.URIRef('http://www.w3.org/2001/XMLSchema#integer'))), - (rdflib.URIRef(pre_preview + '5d1235838c3d501204bb09c2de563d7e4a7fd17b7ec4ff302221c0e88c4741aa_s50'), rdflib.RDF.type, rdflib.URIRef(ns.bsfs.Preview)), + (rdflib.URIRef(pre_preview + '5d1235838c3d501204bb09c2de563d7e4a7fd17b7ec4ff302221c0e88c4741aa_s50'), rdflib.RDF.type, rdflib.URIRef(ns.bsn.Preview)), (rdflib.URIRef(pre_preview + '79cb8a7e6369361a4f4cb7ff729c1ed3fcf87204769623d6fbd6ebfae601e5c7_s50'), rdflib.URIRef(ns.bsp.height), rdflib.Literal('50', datatype=rdflib.URIRef('http://www.w3.org/2001/XMLSchema#integer'))), (rdflib.URIRef(pre_preview + '79cb8a7e6369361a4f4cb7ff729c1ed3fcf87204769623d6fbd6ebfae601e5c7_s50'), rdflib.URIRef(ns.bsp.width), rdflib.Literal('36', datatype=rdflib.URIRef('http://www.w3.org/2001/XMLSchema#integer'))), - (rdflib.URIRef(pre_preview + '79cb8a7e6369361a4f4cb7ff729c1ed3fcf87204769623d6fbd6ebfae601e5c7_s50'), rdflib.RDF.type, rdflib.URIRef(ns.bsfs.Preview)), + (rdflib.URIRef(pre_preview + '79cb8a7e6369361a4f4cb7ff729c1ed3fcf87204769623d6fbd6ebfae601e5c7_s50'), rdflib.RDF.type, rdflib.URIRef(ns.bsn.Preview)), (rdflib.URIRef(pre_preview + '7a975385a110c21fcd12e238fab9501550fa02f6328749068a3bffd65e291027_s50'), rdflib.URIRef(ns.bsp.height), rdflib.Literal('50', datatype=rdflib.URIRef('http://www.w3.org/2001/XMLSchema#integer'))), (rdflib.URIRef(pre_preview + '7a975385a110c21fcd12e238fab9501550fa02f6328749068a3bffd65e291027_s50'), rdflib.URIRef(ns.bsp.width), rdflib.Literal('36', datatype=rdflib.URIRef('http://www.w3.org/2001/XMLSchema#integer'))), - (rdflib.URIRef(pre_preview + '7a975385a110c21fcd12e238fab9501550fa02f6328749068a3bffd65e291027_s50'), rdflib.RDF.type, rdflib.URIRef(ns.bsfs.Preview)), + (rdflib.URIRef(pre_preview + '7a975385a110c21fcd12e238fab9501550fa02f6328749068a3bffd65e291027_s50'), rdflib.RDF.type, rdflib.URIRef(ns.bsn.Preview)), (rdflib.URIRef(pre_preview + '968b9aa178585bc8d1fca0e4e32b8cf30b3941eff72f34e320584aaae8fd23ac_s50'), rdflib.URIRef(ns.bsp.height), rdflib.Literal('50', datatype=rdflib.URIRef('http://www.w3.org/2001/XMLSchema#integer'))), (rdflib.URIRef(pre_preview + '968b9aa178585bc8d1fca0e4e32b8cf30b3941eff72f34e320584aaae8fd23ac_s50'), rdflib.URIRef(ns.bsp.width), rdflib.Literal('36', datatype=rdflib.URIRef('http://www.w3.org/2001/XMLSchema#integer'))), - (rdflib.URIRef(pre_preview + '968b9aa178585bc8d1fca0e4e32b8cf30b3941eff72f34e320584aaae8fd23ac_s50'), rdflib.RDF.type, rdflib.URIRef(ns.bsfs.Preview)), + (rdflib.URIRef(pre_preview + '968b9aa178585bc8d1fca0e4e32b8cf30b3941eff72f34e320584aaae8fd23ac_s50'), rdflib.RDF.type, rdflib.URIRef(ns.bsn.Preview)), (rdflib.URIRef(pre_preview + '9827509a74a60dfceed11936f7f624e9c932f66c8c0d20d355d56f8c3c9b56b1_s50'), rdflib.URIRef(ns.bsp.height), rdflib.Literal('50', datatype=rdflib.URIRef('http://www.w3.org/2001/XMLSchema#integer'))), (rdflib.URIRef(pre_preview + '9827509a74a60dfceed11936f7f624e9c932f66c8c0d20d355d56f8c3c9b56b1_s50'), rdflib.URIRef(ns.bsp.width), rdflib.Literal('36', datatype=rdflib.URIRef('http://www.w3.org/2001/XMLSchema#integer'))), - (rdflib.URIRef(pre_preview + '9827509a74a60dfceed11936f7f624e9c932f66c8c0d20d355d56f8c3c9b56b1_s50'), rdflib.RDF.type, rdflib.URIRef(ns.bsfs.Preview)), + (rdflib.URIRef(pre_preview + '9827509a74a60dfceed11936f7f624e9c932f66c8c0d20d355d56f8c3c9b56b1_s50'), rdflib.RDF.type, rdflib.URIRef(ns.bsn.Preview)), (rdflib.URIRef(pre_preview + 'a63c84e647138a2b68113474212f6aee542b3707171ff178551db3c296e59817_s50'), rdflib.URIRef(ns.bsp.height), rdflib.Literal('50', datatype=rdflib.URIRef('http://www.w3.org/2001/XMLSchema#integer'))), (rdflib.URIRef(pre_preview + 'a63c84e647138a2b68113474212f6aee542b3707171ff178551db3c296e59817_s50'), rdflib.URIRef(ns.bsp.width), rdflib.Literal('36', datatype=rdflib.URIRef('http://www.w3.org/2001/XMLSchema#integer'))), - (rdflib.URIRef(pre_preview + 'a63c84e647138a2b68113474212f6aee542b3707171ff178551db3c296e59817_s50'), rdflib.RDF.type, rdflib.URIRef(ns.bsfs.Preview)), + (rdflib.URIRef(pre_preview + 'a63c84e647138a2b68113474212f6aee542b3707171ff178551db3c296e59817_s50'), rdflib.RDF.type, rdflib.URIRef(ns.bsn.Preview)), (rdflib.URIRef(pre_preview + 'a8b3245636074d5370283b690281abda8ffdff12ce8b1af77c8bc0a4c85be860_s50'), rdflib.URIRef(ns.bsp.height), rdflib.Literal('50', datatype=rdflib.URIRef('http://www.w3.org/2001/XMLSchema#integer'))), (rdflib.URIRef(pre_preview + 'a8b3245636074d5370283b690281abda8ffdff12ce8b1af77c8bc0a4c85be860_s50'), rdflib.URIRef(ns.bsp.width), rdflib.Literal('36', datatype=rdflib.URIRef('http://www.w3.org/2001/XMLSchema#integer'))), - (rdflib.URIRef(pre_preview + 'a8b3245636074d5370283b690281abda8ffdff12ce8b1af77c8bc0a4c85be860_s50'), rdflib.RDF.type, rdflib.URIRef(ns.bsfs.Preview)), + (rdflib.URIRef(pre_preview + 'a8b3245636074d5370283b690281abda8ffdff12ce8b1af77c8bc0a4c85be860_s50'), rdflib.RDF.type, rdflib.URIRef(ns.bsn.Preview)), (rdflib.URIRef(pre_preview + 'dbfd8ebc0557c4925e9ff8411629a74a15eca934a4c2a6bd3134dd81d2f95a36_s50'), rdflib.URIRef(ns.bsp.height), rdflib.Literal('50', datatype=rdflib.URIRef('http://www.w3.org/2001/XMLSchema#integer'))), (rdflib.URIRef(pre_preview + 'dbfd8ebc0557c4925e9ff8411629a74a15eca934a4c2a6bd3134dd81d2f95a36_s50'), rdflib.URIRef(ns.bsp.width), rdflib.Literal('36', datatype=rdflib.URIRef('http://www.w3.org/2001/XMLSchema#integer'))), - (rdflib.URIRef(pre_preview + 'dbfd8ebc0557c4925e9ff8411629a74a15eca934a4c2a6bd3134dd81d2f95a36_s50'), rdflib.RDF.type, rdflib.URIRef(ns.bsfs.Preview)), + (rdflib.URIRef(pre_preview + 'dbfd8ebc0557c4925e9ff8411629a74a15eca934a4c2a6bd3134dd81d2f95a36_s50'), rdflib.RDF.type, rdflib.URIRef(ns.bsn.Preview)), (rdflib.URIRef(pre_preview + 'df2185d8927ccef65c92fc90b94e800b02791354d8dede9dd9aa0e2c2cb1e91e_s50'), rdflib.URIRef(ns.bsp.height), rdflib.Literal('50', datatype=rdflib.URIRef('http://www.w3.org/2001/XMLSchema#integer'))), (rdflib.URIRef(pre_preview + 'df2185d8927ccef65c92fc90b94e800b02791354d8dede9dd9aa0e2c2cb1e91e_s50'), rdflib.URIRef(ns.bsp.width), rdflib.Literal('36', datatype=rdflib.URIRef('http://www.w3.org/2001/XMLSchema#integer'))), - (rdflib.URIRef(pre_preview + 'df2185d8927ccef65c92fc90b94e800b02791354d8dede9dd9aa0e2c2cb1e91e_s50'), rdflib.RDF.type, rdflib.URIRef(ns.bsfs.Preview)), + (rdflib.URIRef(pre_preview + 'df2185d8927ccef65c92fc90b94e800b02791354d8dede9dd9aa0e2c2cb1e91e_s50'), rdflib.RDF.type, rdflib.URIRef(ns.bsn.Preview)), # assets - (rdflib.URIRef(pre_preview + '2656e303d7218300326df73b64f312d8b37eb980358be27a38b5f63dae259be3_s50'), rdflib.URIRef(ns.bsp.asset), rdflib.Literal('/9j/4AAQSkZJRgABAQAAAQABAAD/2wBDAAgGBgcGBQgHBwcJCQgKDBQNDAsLDBkSEw8UHRofHh0aHBwgJC4nICIsIxwcKDcpLDAxNDQ0Hyc5PTgyPC4zNDL/2wBDAQkJCQwLDBgNDRgyIRwhMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjL/wAARCAAhADIDASIAAhEBAxEB/8QAHwAAAQUBAQEBAQEAAAAAAAAAAAECAwQFBgcICQoL/8QAtRAAAgEDAwIEAwUFBAQAAAF9AQIDAAQRBRIhMUEGE1FhByJxFDKBkaEII0KxwRVS0fAkM2JyggkKFhcYGRolJicoKSo0NTY3ODk6Q0RFRkdISUpTVFVWV1hZWmNkZWZnaGlqc3R1dnd4eXqDhIWGh4iJipKTlJWWl5iZmqKjpKWmp6ipqrKztLW2t7i5usLDxMXGx8jJytLT1NXW19jZ2uHi4+Tl5ufo6erx8vP09fb3+Pn6/8QAHwEAAwEBAQEBAQEBAQAAAAAAAAECAwQFBgcICQoL/8QAtREAAgECBAQDBAcFBAQAAQJ3AAECAxEEBSExBhJBUQdhcRMiMoEIFEKRobHBCSMzUvAVYnLRChYkNOEl8RcYGRomJygpKjU2Nzg5OkNERUZHSElKU1RVVldYWVpjZGVmZ2hpanN0dXZ3eHl6goOEhYaHiImKkpOUlZaXmJmaoqOkpaanqKmqsrO0tba3uLm6wsPExcbHyMnK0tPU1dbX2Nna4uPk5ebn6Onq8vP09fb3+Pn6/9oADAMBAAIRAxEAPwDi9Ktb+O3khZTg/wAB7VSGnKkkhkAR85weteo3Vl9mvLtWjVWEJaN/9rsD7HkVwNza3kmsn7RYsDuzsdSVb/GvLo15W9rKNkVDmvzKN0dx4L1Sb+x2S7jZ7aM7BKF+XPoffkVdOpWDSSI9sGizk5HWuE8S69q0NnHptnB9ms7Nh8lr+6SSXALhsHJUcY7kitPTLi51nR0nMKpO6ZkCHABxngdq1xGKnSs1Kyvrc7qEMW2/Zrz/AKudnbXXhuaEiO3jjY9TtxVG8ht3mQwgOnaubuVmtbFV2gSjjn1q1prajJF+9dEQdMVjPHKtFxaXqc9fE1JXpzjr+J0Is7fA+VaKwmludx/0xevrRXLaH8xyfVZdjpNFsgsUlpryPkjyVuVJ6AnH8z/I1flS30m2ezvdt3bbd1teRL8yntu/xGfeua1zXtbs7dh5I8mRdhkD7mYEY5GOf51Jp+vW8Vnu1KT7FJKMmO5b5JcdwDxn1HFfR1KUZRd1v93zPoaFfD1J+5Kz+79DjfEV9Dc3E0hk5Zi5ZR1btx+NYNlrn9nllhkKgnPpnjr9Of1H0rrdc0bQtTvgsWbSRiwJjk2K+ADwrZ9RyOOa4/U/AWs21y0mmhL60dyI5IpVLduGGeCM/jXmPL201N3NK9SpfngrryOr0y+i1fT4lvZ9gR9pYfM5I9v8/wBK2/7FneFmCXEMLcIbhwpb3A6gVwGiaR4o03UYhbaZOZ88RqA27HXoeB9K9PgiYRRyal4Y1KKVhlyHbr3966MPgIRpuMtNROjTr+/JWn+P4mB/wix/5/o/+/lFdoLXT8DPhfUfx8yiuj6lT7v8P8hex85ffEZef8gu0+oriPiZ/rNI+j/zFFFbYn+Ez5uh8ZP4l/5Cq/8AYN/9nFU/CH/Hvd/9dv8A2Wiih/Ee7k/wv1/Q63Qv9fb/APXT+ldFrP8Ax/xfRP8A0IUUVX2T0K38RD5v9dJ/vH+dFFFUC2P/2Q==', datatype=rdflib.URIRef(ns.bsfs.BinaryBlob))), - (rdflib.URIRef(pre_preview + '26f16643b2570ac5b2d1f8c373d492cb724aae2dd8d71a0b63647838ed651254_s50'), rdflib.URIRef(ns.bsp.asset), rdflib.Literal('/9j/4AAQSkZJRgABAQAAAQABAAD/2wBDAAgGBgcGBQgHBwcJCQgKDBQNDAsLDBkSEw8UHRofHh0aHBwgJC4nICIsIxwcKDcpLDAxNDQ0Hyc5PTgyPC4zNDL/wAALCAAyACQBAREA/8QAHwAAAQUBAQEBAQEAAAAAAAAAAAECAwQFBgcICQoL/8QAtRAAAgEDAwIEAwUFBAQAAAF9AQIDAAQRBRIhMUEGE1FhByJxFDKBkaEII0KxwRVS0fAkM2JyggkKFhcYGRolJicoKSo0NTY3ODk6Q0RFRkdISUpTVFVWV1hZWmNkZWZnaGlqc3R1dnd4eXqDhIWGh4iJipKTlJWWl5iZmqKjpKWmp6ipqrKztLW2t7i5usLDxMXGx8jJytLT1NXW19jZ2uHi4+Tl5ufo6erx8vP09fb3+Pn6/9oACAEBAAA/APdLq6eAgKoI25Oc+oHYe9Qfb5sr+6UZHUhuv5VYjnZyQSo9gpNTgP3YH6Lj+tPqCa2jmILrkgEVH/Z8HHy8DjGB/hUq26IMAkj0IH+FTUUUUUUUUUUUUUUUUUUUUUUUUUUV/9k=', datatype=rdflib.URIRef(ns.bsfs.BinaryBlob))), - (rdflib.URIRef(pre_preview + '5d1235838c3d501204bb09c2de563d7e4a7fd17b7ec4ff302221c0e88c4741aa_s50'), rdflib.URIRef(ns.bsp.asset), rdflib.Literal('/9j/4AAQSkZJRgABAQAAAQABAAD/2wBDAAgGBgcGBQgHBwcJCQgKDBQNDAsLDBkSEw8UHRofHh0aHBwgJC4nICIsIxwcKDcpLDAxNDQ0Hyc5PTgyPC4zNDL/wAALCAAyACQBAREA/8QAHwAAAQUBAQEBAQEAAAAAAAAAAAECAwQFBgcICQoL/8QAtRAAAgEDAwIEAwUFBAQAAAF9AQIDAAQRBRIhMUEGE1FhByJxFDKBkaEII0KxwRVS0fAkM2JyggkKFhcYGRolJicoKSo0NTY3ODk6Q0RFRkdISUpTVFVWV1hZWmNkZWZnaGlqc3R1dnd4eXqDhIWGh4iJipKTlJWWl5iZmqKjpKWmp6ipqrKztLW2t7i5usLDxMXGx8jJytLT1NXW19jZ2uHi4+Tl5ufo6erx8vP09fb3+Pn6/9oACAEBAAA/APdLm5eBwFVTkd89c+1Qf2hMSAI15/3v8KtR3DSFgIiceh7/AMv1qcHIzjFLVO7tnncFduAMc+v5GoDZXJAw65A/vdT/AN81PDasjMXPBGAMgj8toqxHGIxgfyA/lUlFFFFFFFFFFFFFFFFFFFFFFFFFf//Z', datatype=rdflib.URIRef(ns.bsfs.BinaryBlob))), - (rdflib.URIRef(pre_preview + '79cb8a7e6369361a4f4cb7ff729c1ed3fcf87204769623d6fbd6ebfae601e5c7_s50'), rdflib.URIRef(ns.bsp.asset), rdflib.Literal('/9j/4AAQSkZJRgABAQAAAQABAAD/2wBDAAgGBgcGBQgHBwcJCQgKDBQNDAsLDBkSEw8UHRofHh0aHBwgJC4nICIsIxwcKDcpLDAxNDQ0Hyc5PTgyPC4zNDL/wAALCAAyACQBAREA/8QAHwAAAQUBAQEBAQEAAAAAAAAAAAECAwQFBgcICQoL/8QAtRAAAgEDAwIEAwUFBAQAAAF9AQIDAAQRBRIhMUEGE1FhByJxFDKBkaEII0KxwRVS0fAkM2JyggkKFhcYGRolJicoKSo0NTY3ODk6Q0RFRkdISUpTVFVWV1hZWmNkZWZnaGlqc3R1dnd4eXqDhIWGh4iJipKTlJWWl5iZmqKjpKWmp6ipqrKztLW2t7i5usLDxMXGx8jJytLT1NXW19jZ2uHi4+Tl5ufo6erx8vP09fb3+Pn6/9oACAEBAAA/APdLq5eFgEQEYySQf6CoDqEo2/u1yevDf4VJHfZ3eZhNvojHvj0q4h3IDnOR1AxmnVXntEuDlyR8u3gA8fiDUR02I4G9uOei9eeenvTvsS5J82Tn6cfpUsMPkrgSOw9Gxx+lTUUUUUUUUUUUUUUUUUUUUUUUUUV//9k=', datatype=rdflib.URIRef(ns.bsfs.BinaryBlob))), - (rdflib.URIRef(pre_preview + '7a975385a110c21fcd12e238fab9501550fa02f6328749068a3bffd65e291027_s50'), rdflib.URIRef(ns.bsp.asset), rdflib.Literal('/9j/4AAQSkZJRgABAQAAAQABAAD/2wBDAAgGBgcGBQgHBwcJCQgKDBQNDAsLDBkSEw8UHRofHh0aHBwgJC4nICIsIxwcKDcpLDAxNDQ0Hyc5PTgyPC4zNDL/wAALCAAyACQBAREA/8QAHwAAAQUBAQEBAQEAAAAAAAAAAAECAwQFBgcICQoL/8QAtRAAAgEDAwIEAwUFBAQAAAF9AQIDAAQRBRIhMUEGE1FhByJxFDKBkaEII0KxwRVS0fAkM2JyggkKFhcYGRolJicoKSo0NTY3ODk6Q0RFRkdISUpTVFVWV1hZWmNkZWZnaGlqc3R1dnd4eXqDhIWGh4iJipKTlJWWl5iZmqKjpKWmp6ipqrKztLW2t7i5usLDxMXGx8jJytLT1NXW19jZ2uHi4+Tl5ufo6erx8vP09fb3+Pn6/9oACAEBAAA/APdLq6eBgFUHjPIJ7+wqudQmGMxpyMk4bA6+30/OrSyzFvm8kDrjecgflVhSGGVIIPcUtV57RLjBZmBAxwB/UGov7NiGMMw25wQF45z6VKtrGuc7mz6n3zVjpRRRRRRRRRRRRRRRRRRRRRRRRRRX/9k=', datatype=rdflib.URIRef(ns.bsfs.BinaryBlob))), - (rdflib.URIRef(pre_preview + '968b9aa178585bc8d1fca0e4e32b8cf30b3941eff72f34e320584aaae8fd23ac_s50'), rdflib.URIRef(ns.bsp.asset), rdflib.Literal('/9j/4AAQSkZJRgABAQAAAQABAAD/2wBDAAgGBgcGBQgHBwcJCQgKDBQNDAsLDBkSEw8UHRofHh0aHBwgJC4nICIsIxwcKDcpLDAxNDQ0Hyc5PTgyPC4zNDL/wAALCAAyACQBAREA/8QAHwAAAQUBAQEBAQEAAAAAAAAAAAECAwQFBgcICQoL/8QAtRAAAgEDAwIEAwUFBAQAAAF9AQIDAAQRBRIhMUEGE1FhByJxFDKBkaEII0KxwRVS0fAkM2JyggkKFhcYGRolJicoKSo0NTY3ODk6Q0RFRkdISUpTVFVWV1hZWmNkZWZnaGlqc3R1dnd4eXqDhIWGh4iJipKTlJWWl5iZmqKjpKWmp6ipqrKztLW2t7i5usLDxMXGx8jJytLT1NXW19jZ2uHi4+Tl5ufo6erx8vP09fb3+Pn6/9oACAEBAAA/APdLm5eFsIgPy55zyc47CoRfTkf6uPOOckjnn29qmS5dmbK52noq5OPzqwj7/wCFl/3hin1n36MzgqhbCkcKTz+ANUzA5Cjynx7IRjn/AHfT271d8gAEFGxzwCfX2WrMR2gIQfbg/wCAqaiiiiiiiiiiiiiiiiiiiiiiiiiv/9k=', datatype=rdflib.URIRef(ns.bsfs.BinaryBlob))), - (rdflib.URIRef(pre_preview + '9827509a74a60dfceed11936f7f624e9c932f66c8c0d20d355d56f8c3c9b56b1_s50'), rdflib.URIRef(ns.bsp.asset), rdflib.Literal('/9j/4AAQSkZJRgABAQAAAQABAAD/2wBDAAgGBgcGBQgHBwcJCQgKDBQNDAsLDBkSEw8UHRofHh0aHBwgJC4nICIsIxwcKDcpLDAxNDQ0Hyc5PTgyPC4zNDL/wAALCAAyACQBAREA/8QAHwAAAQUBAQEBAQEAAAAAAAAAAAECAwQFBgcICQoL/8QAtRAAAgEDAwIEAwUFBAQAAAF9AQIDAAQRBRIhMUEGE1FhByJxFDKBkaEII0KxwRVS0fAkM2JyggkKFhcYGRolJicoKSo0NTY3ODk6Q0RFRkdISUpTVFVWV1hZWmNkZWZnaGlqc3R1dnd4eXqDhIWGh4iJipKTlJWWl5iZmqKjpKWmp6ipqrKztLW2t7i5usLDxMXGx8jJytLT1NXW19jZ2uHi4+Tl5ufo6erx8vP09fb3+Pn6/9oACAEBAAA/APdLq5eAgKoPyknIPb6ColvZywHlp79R+WRU3nT7iojHXqQR6VYUk9QR9adVK8t3mdSqggLjJxkHPuD/AJFVlsZ/l+RFIXGTtI6k9Npq3FbFdwYKAemAp/8AZR/WrKLtGMn9KdRRRRRRRRRRRRRRRRRRRRRRRRRX/9k=', datatype=rdflib.URIRef(ns.bsfs.BinaryBlob))), - (rdflib.URIRef(pre_preview + '567049149769e1d02e6af6cfee3991f7cf0cbc935cbf6a566047f40155fb13a8_s50'), rdflib.URIRef(ns.bsp.asset), rdflib.Literal('/9j/4AAQSkZJRgABAQAAAQABAAD/2wBDAAgGBgcGBQgHBwcJCQgKDBQNDAsLDBkSEw8UHRofHh0aHBwgJC4nICIsIxwcKDcpLDAxNDQ0Hyc5PTgyPC4zNDL/wAALCAAyACQBAREA/8QAHwAAAQUBAQEBAQEAAAAAAAAAAAECAwQFBgcICQoL/8QAtRAAAgEDAwIEAwUFBAQAAAF9AQIDAAQRBRIhMUEGE1FhByJxFDKBkaEII0KxwRVS0fAkM2JyggkKFhcYGRolJicoKSo0NTY3ODk6Q0RFRkdISUpTVFVWV1hZWmNkZWZnaGlqc3R1dnd4eXqDhIWGh4iJipKTlJWWl5iZmqKjpKWmp6ipqrKztLW2t7i5usLDxMXGx8jJytLT1NXW19jZ2uHi4+Tl5ufo6erx8vP09fb3+Pn6/9oACAEBAAA/APdLm5eFwFVSCpPIPYj0HvVf+0ZiFxGnIychhj9KtQyTSNkiLZ7E5B/EVZorN1EZkXAJIQ4wM9x7GqYAAXCAgDHKnGMn0X/Oa0LeMJudcKx64iJz+OATVoOOh3E+oUipKayK33lB+opPKj/55r+VHlR8/Iv5U+iiiiiiiiiiiiiiiiiiiiiv/9k=', datatype=rdflib.URIRef(ns.bsfs.BinaryBlob))), - (rdflib.URIRef(pre_preview + 'df2185d8927ccef65c92fc90b94e800b02791354d8dede9dd9aa0e2c2cb1e91e_s50'), rdflib.URIRef(ns.bsp.asset), rdflib.Literal('/9j/4AAQSkZJRgABAQAAAQABAAD/2wBDAAgGBgcGBQgHBwcJCQgKDBQNDAsLDBkSEw8UHRofHh0aHBwgJC4nICIsIxwcKDcpLDAxNDQ0Hyc5PTgyPC4zNDL/wAALCAAyACQBAREA/8QAHwAAAQUBAQEBAQEAAAAAAAAAAAECAwQFBgcICQoL/8QAtRAAAgEDAwIEAwUFBAQAAAF9AQIDAAQRBRIhMUEGE1FhByJxFDKBkaEII0KxwRVS0fAkM2JyggkKFhcYGRolJicoKSo0NTY3ODk6Q0RFRkdISUpTVFVWV1hZWmNkZWZnaGlqc3R1dnd4eXqDhIWGh4iJipKTlJWWl5iZmqKjpKWmp6ipqrKztLW2t7i5usLDxMXGx8jJytLT1NXW19jZ2uHi4+Tl5ufo6erx8vP09fb3+Pn6/9oACAEBAAA/APdLm5eGQBVU5XPIJ7j0FQfb5sgiNcH2b39vapkvGPDIucjoTjBOPSrKuHHGencEU+ql1am4IIYA7cDIB757g1CunFWByAAOCMAg5yD07VcCEN0P/fZNS0UUUUUUUUUUUUUUUUUUUUUUUUUV/9k=', datatype=rdflib.URIRef(ns.bsfs.BinaryBlob))), - (rdflib.URIRef(pre_preview + 'a63c84e647138a2b68113474212f6aee542b3707171ff178551db3c296e59817_s50'), rdflib.URIRef(ns.bsp.asset), rdflib.Literal('/9j/4AAQSkZJRgABAQAAAQABAAD/2wBDAAgGBgcGBQgHBwcJCQgKDBQNDAsLDBkSEw8UHRofHh0aHBwgJC4nICIsIxwcKDcpLDAxNDQ0Hyc5PTgyPC4zNDL/wAALCAAyACQBAREA/8QAHwAAAQUBAQEBAQEAAAAAAAAAAAECAwQFBgcICQoL/8QAtRAAAgEDAwIEAwUFBAQAAAF9AQIDAAQRBRIhMUEGE1FhByJxFDKBkaEII0KxwRVS0fAkM2JyggkKFhcYGRolJicoKSo0NTY3ODk6Q0RFRkdISUpTVFVWV1hZWmNkZWZnaGlqc3R1dnd4eXqDhIWGh4iJipKTlJWWl5iZmqKjpKWmp6ipqrKztLW2t7i5usLDxMXGx8jJytLT1NXW19jZ2uHi4+Tl5ufo6erx8vP09fb3+Pn6/9oACAEBAAA/APdLq6eBgFUH5c5IPqPQVANQmJUeWvPXhvUj09qspcM2cAnHHCE/rmpUl3nGx1+oxUlVLq1M7KQQPlI5APXHqDUKacyMDvHTsBk9fb3q4kXl5IYkk5OQB/ICpaKKKKKKKKKKKKKKKKKKKKKKKKKK/9k=', datatype=rdflib.URIRef(ns.bsfs.BinaryBlob))), - (rdflib.URIRef(pre_preview + 'a8b3245636074d5370283b690281abda8ffdff12ce8b1af77c8bc0a4c85be860_s50'), rdflib.URIRef(ns.bsp.asset), rdflib.Literal('/9j/4AAQSkZJRgABAQAAAQABAAD/2wBDAAgGBgcGBQgHBwcJCQgKDBQNDAsLDBkSEw8UHRofHh0aHBwgJC4nICIsIxwcKDcpLDAxNDQ0Hyc5PTgyPC4zNDL/wAALCAAyACQBAREA/8QAHwAAAQUBAQEBAQEAAAAAAAAAAAECAwQFBgcICQoL/8QAtRAAAgEDAwIEAwUFBAQAAAF9AQIDAAQRBRIhMUEGE1FhByJxFDKBkaEII0KxwRVS0fAkM2JyggkKFhcYGRolJicoKSo0NTY3ODk6Q0RFRkdISUpTVFVWV1hZWmNkZWZnaGlqc3R1dnd4eXqDhIWGh4iJipKTlJWWl5iZmqKjpKWmp6ipqrKztLW2t7i5usLDxMXGx8jJytLT1NXW19jZ2uHi4+Tl5ufo6erx8vP09fb3+Pn6/9oACAEBAAA/APdbm4eF1CqDkZOQT3HoKr/b5vl/dpkj/awDnHp06fzqZLiRmIPk47fMQf1FXKKikgilOZEDHGOaZ9itsg+SuQMA08QoowNwHoGP+NO8serf99Gn0UUUUUUUUUUUUUUUUUUUUUUUUV//2Q==', datatype=rdflib.URIRef(ns.bsfs.BinaryBlob))), - (rdflib.URIRef(pre_preview + 'dbfd8ebc0557c4925e9ff8411629a74a15eca934a4c2a6bd3134dd81d2f95a36_s50'), rdflib.URIRef(ns.bsp.asset), rdflib.Literal('/9j/4AAQSkZJRgABAQAAAQABAAD/2wBDAAgGBgcGBQgHBwcJCQgKDBQNDAsLDBkSEw8UHRofHh0aHBwgJC4nICIsIxwcKDcpLDAxNDQ0Hyc5PTgyPC4zNDL/wAALCAAyACQBAREA/8QAHwAAAQUBAQEBAQEAAAAAAAAAAAECAwQFBgcICQoL/8QAtRAAAgEDAwIEAwUFBAQAAAF9AQIDAAQRBRIhMUEGE1FhByJxFDKBkaEII0KxwRVS0fAkM2JyggkKFhcYGRolJicoKSo0NTY3ODk6Q0RFRkdISUpTVFVWV1hZWmNkZWZnaGlqc3R1dnd4eXqDhIWGh4iJipKTlJWWl5iZmqKjpKWmp6ipqrKztLW2t7i5usLDxMXGx8jJytLT1NXW19jZ2uHi4+Tl5ufo6erx8vP09fb3+Pn6/9oACAEBAAA/APdbm5eFwqqDkZ5B9QOw96rHUZgoPlLkgno3b8Kel7IzspVePYgdcck1YilaTA3JnqQFPT86sVQvoXkYFUDEKRz7np0NVWtZcAiLkd8Dg/8AfPqO3rWhEk6thsbc9mz+mP61Zoooooooooooooooooooooooooor/9k=', datatype=rdflib.URIRef(ns.bsfs.BinaryBlob))), + (rdflib.URIRef(pre_preview + '2656e303d7218300326df73b64f312d8b37eb980358be27a38b5f63dae259be3_s50'), rdflib.URIRef(ns.bsp.asset), rdflib.Literal('/9j/4AAQSkZJRgABAQAAAQABAAD/2wBDAAgGBgcGBQgHBwcJCQgKDBQNDAsLDBkSEw8UHRofHh0aHBwgJC4nICIsIxwcKDcpLDAxNDQ0Hyc5PTgyPC4zNDL/2wBDAQkJCQwLDBgNDRgyIRwhMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjL/wAARCAAhADIDASIAAhEBAxEB/8QAHwAAAQUBAQEBAQEAAAAAAAAAAAECAwQFBgcICQoL/8QAtRAAAgEDAwIEAwUFBAQAAAF9AQIDAAQRBRIhMUEGE1FhByJxFDKBkaEII0KxwRVS0fAkM2JyggkKFhcYGRolJicoKSo0NTY3ODk6Q0RFRkdISUpTVFVWV1hZWmNkZWZnaGlqc3R1dnd4eXqDhIWGh4iJipKTlJWWl5iZmqKjpKWmp6ipqrKztLW2t7i5usLDxMXGx8jJytLT1NXW19jZ2uHi4+Tl5ufo6erx8vP09fb3+Pn6/8QAHwEAAwEBAQEBAQEBAQAAAAAAAAECAwQFBgcICQoL/8QAtREAAgECBAQDBAcFBAQAAQJ3AAECAxEEBSExBhJBUQdhcRMiMoEIFEKRobHBCSMzUvAVYnLRChYkNOEl8RcYGRomJygpKjU2Nzg5OkNERUZHSElKU1RVVldYWVpjZGVmZ2hpanN0dXZ3eHl6goOEhYaHiImKkpOUlZaXmJmaoqOkpaanqKmqsrO0tba3uLm6wsPExcbHyMnK0tPU1dbX2Nna4uPk5ebn6Onq8vP09fb3+Pn6/9oADAMBAAIRAxEAPwDi9Ktb+O3khZTg/wAB7VSGnKkkhkAR85weteo3Vl9mvLtWjVWEJaN/9rsD7HkVwNza3kmsn7RYsDuzsdSVb/GvLo15W9rKNkVDmvzKN0dx4L1Sb+x2S7jZ7aM7BKF+XPoffkVdOpWDSSI9sGizk5HWuE8S69q0NnHptnB9ms7Nh8lr+6SSXALhsHJUcY7kitPTLi51nR0nMKpO6ZkCHABxngdq1xGKnSs1Kyvrc7qEMW2/Zrz/AKudnbXXhuaEiO3jjY9TtxVG8ht3mQwgOnaubuVmtbFV2gSjjn1q1prajJF+9dEQdMVjPHKtFxaXqc9fE1JXpzjr+J0Is7fA+VaKwmludx/0xevrRXLaH8xyfVZdjpNFsgsUlpryPkjyVuVJ6AnH8z/I1flS30m2ezvdt3bbd1teRL8yntu/xGfeua1zXtbs7dh5I8mRdhkD7mYEY5GOf51Jp+vW8Vnu1KT7FJKMmO5b5JcdwDxn1HFfR1KUZRd1v93zPoaFfD1J+5Kz+79DjfEV9Dc3E0hk5Zi5ZR1btx+NYNlrn9nllhkKgnPpnjr9Of1H0rrdc0bQtTvgsWbSRiwJjk2K+ADwrZ9RyOOa4/U/AWs21y0mmhL60dyI5IpVLduGGeCM/jXmPL201N3NK9SpfngrryOr0y+i1fT4lvZ9gR9pYfM5I9v8/wBK2/7FneFmCXEMLcIbhwpb3A6gVwGiaR4o03UYhbaZOZ88RqA27HXoeB9K9PgiYRRyal4Y1KKVhlyHbr3966MPgIRpuMtNROjTr+/JWn+P4mB/wix/5/o/+/lFdoLXT8DPhfUfx8yiuj6lT7v8P8hex85ffEZef8gu0+oriPiZ/rNI+j/zFFFbYn+Ez5uh8ZP4l/5Cq/8AYN/9nFU/CH/Hvd/9dv8A2Wiih/Ee7k/wv1/Q63Qv9fb/APXT+ldFrP8Ax/xfRP8A0IUUVX2T0K38RD5v9dJ/vH+dFFFUC2P/2Q==', datatype=rdflib.URIRef(ns.bsl.BinaryBlob))), + (rdflib.URIRef(pre_preview + '26f16643b2570ac5b2d1f8c373d492cb724aae2dd8d71a0b63647838ed651254_s50'), rdflib.URIRef(ns.bsp.asset), rdflib.Literal('/9j/4AAQSkZJRgABAQAAAQABAAD/2wBDAAgGBgcGBQgHBwcJCQgKDBQNDAsLDBkSEw8UHRofHh0aHBwgJC4nICIsIxwcKDcpLDAxNDQ0Hyc5PTgyPC4zNDL/wAALCAAyACQBAREA/8QAHwAAAQUBAQEBAQEAAAAAAAAAAAECAwQFBgcICQoL/8QAtRAAAgEDAwIEAwUFBAQAAAF9AQIDAAQRBRIhMUEGE1FhByJxFDKBkaEII0KxwRVS0fAkM2JyggkKFhcYGRolJicoKSo0NTY3ODk6Q0RFRkdISUpTVFVWV1hZWmNkZWZnaGlqc3R1dnd4eXqDhIWGh4iJipKTlJWWl5iZmqKjpKWmp6ipqrKztLW2t7i5usLDxMXGx8jJytLT1NXW19jZ2uHi4+Tl5ufo6erx8vP09fb3+Pn6/9oACAEBAAA/APdLq6eAgKoI25Oc+oHYe9Qfb5sr+6UZHUhuv5VYjnZyQSo9gpNTgP3YH6Lj+tPqCa2jmILrkgEVH/Z8HHy8DjGB/hUq26IMAkj0IH+FTUUUUUUUUUUUUUUUUUUUUUUUUUUV/9k=', datatype=rdflib.URIRef(ns.bsl.BinaryBlob))), + (rdflib.URIRef(pre_preview + '5d1235838c3d501204bb09c2de563d7e4a7fd17b7ec4ff302221c0e88c4741aa_s50'), rdflib.URIRef(ns.bsp.asset), rdflib.Literal('/9j/4AAQSkZJRgABAQAAAQABAAD/2wBDAAgGBgcGBQgHBwcJCQgKDBQNDAsLDBkSEw8UHRofHh0aHBwgJC4nICIsIxwcKDcpLDAxNDQ0Hyc5PTgyPC4zNDL/wAALCAAyACQBAREA/8QAHwAAAQUBAQEBAQEAAAAAAAAAAAECAwQFBgcICQoL/8QAtRAAAgEDAwIEAwUFBAQAAAF9AQIDAAQRBRIhMUEGE1FhByJxFDKBkaEII0KxwRVS0fAkM2JyggkKFhcYGRolJicoKSo0NTY3ODk6Q0RFRkdISUpTVFVWV1hZWmNkZWZnaGlqc3R1dnd4eXqDhIWGh4iJipKTlJWWl5iZmqKjpKWmp6ipqrKztLW2t7i5usLDxMXGx8jJytLT1NXW19jZ2uHi4+Tl5ufo6erx8vP09fb3+Pn6/9oACAEBAAA/APdLm5eBwFVTkd89c+1Qf2hMSAI15/3v8KtR3DSFgIiceh7/AMv1qcHIzjFLVO7tnncFduAMc+v5GoDZXJAw65A/vdT/AN81PDasjMXPBGAMgj8toqxHGIxgfyA/lUlFFFFFFFFFFFFFFFFFFFFFFFFFf//Z', datatype=rdflib.URIRef(ns.bsl.BinaryBlob))), + (rdflib.URIRef(pre_preview + '79cb8a7e6369361a4f4cb7ff729c1ed3fcf87204769623d6fbd6ebfae601e5c7_s50'), rdflib.URIRef(ns.bsp.asset), rdflib.Literal('/9j/4AAQSkZJRgABAQAAAQABAAD/2wBDAAgGBgcGBQgHBwcJCQgKDBQNDAsLDBkSEw8UHRofHh0aHBwgJC4nICIsIxwcKDcpLDAxNDQ0Hyc5PTgyPC4zNDL/wAALCAAyACQBAREA/8QAHwAAAQUBAQEBAQEAAAAAAAAAAAECAwQFBgcICQoL/8QAtRAAAgEDAwIEAwUFBAQAAAF9AQIDAAQRBRIhMUEGE1FhByJxFDKBkaEII0KxwRVS0fAkM2JyggkKFhcYGRolJicoKSo0NTY3ODk6Q0RFRkdISUpTVFVWV1hZWmNkZWZnaGlqc3R1dnd4eXqDhIWGh4iJipKTlJWWl5iZmqKjpKWmp6ipqrKztLW2t7i5usLDxMXGx8jJytLT1NXW19jZ2uHi4+Tl5ufo6erx8vP09fb3+Pn6/9oACAEBAAA/APdLq5eFgEQEYySQf6CoDqEo2/u1yevDf4VJHfZ3eZhNvojHvj0q4h3IDnOR1AxmnVXntEuDlyR8u3gA8fiDUR02I4G9uOei9eeenvTvsS5J82Tn6cfpUsMPkrgSOw9Gxx+lTUUUUUUUUUUUUUUUUUUUUUUUUUV//9k=', datatype=rdflib.URIRef(ns.bsl.BinaryBlob))), + (rdflib.URIRef(pre_preview + '7a975385a110c21fcd12e238fab9501550fa02f6328749068a3bffd65e291027_s50'), rdflib.URIRef(ns.bsp.asset), rdflib.Literal('/9j/4AAQSkZJRgABAQAAAQABAAD/2wBDAAgGBgcGBQgHBwcJCQgKDBQNDAsLDBkSEw8UHRofHh0aHBwgJC4nICIsIxwcKDcpLDAxNDQ0Hyc5PTgyPC4zNDL/wAALCAAyACQBAREA/8QAHwAAAQUBAQEBAQEAAAAAAAAAAAECAwQFBgcICQoL/8QAtRAAAgEDAwIEAwUFBAQAAAF9AQIDAAQRBRIhMUEGE1FhByJxFDKBkaEII0KxwRVS0fAkM2JyggkKFhcYGRolJicoKSo0NTY3ODk6Q0RFRkdISUpTVFVWV1hZWmNkZWZnaGlqc3R1dnd4eXqDhIWGh4iJipKTlJWWl5iZmqKjpKWmp6ipqrKztLW2t7i5usLDxMXGx8jJytLT1NXW19jZ2uHi4+Tl5ufo6erx8vP09fb3+Pn6/9oACAEBAAA/APdLq6eBgFUHjPIJ7+wqudQmGMxpyMk4bA6+30/OrSyzFvm8kDrjecgflVhSGGVIIPcUtV57RLjBZmBAxwB/UGov7NiGMMw25wQF45z6VKtrGuc7mz6n3zVjpRRRRRRRRRRRRRRRRRRRRRRRRRRX/9k=', datatype=rdflib.URIRef(ns.bsl.BinaryBlob))), + (rdflib.URIRef(pre_preview + '968b9aa178585bc8d1fca0e4e32b8cf30b3941eff72f34e320584aaae8fd23ac_s50'), rdflib.URIRef(ns.bsp.asset), rdflib.Literal('/9j/4AAQSkZJRgABAQAAAQABAAD/2wBDAAgGBgcGBQgHBwcJCQgKDBQNDAsLDBkSEw8UHRofHh0aHBwgJC4nICIsIxwcKDcpLDAxNDQ0Hyc5PTgyPC4zNDL/wAALCAAyACQBAREA/8QAHwAAAQUBAQEBAQEAAAAAAAAAAAECAwQFBgcICQoL/8QAtRAAAgEDAwIEAwUFBAQAAAF9AQIDAAQRBRIhMUEGE1FhByJxFDKBkaEII0KxwRVS0fAkM2JyggkKFhcYGRolJicoKSo0NTY3ODk6Q0RFRkdISUpTVFVWV1hZWmNkZWZnaGlqc3R1dnd4eXqDhIWGh4iJipKTlJWWl5iZmqKjpKWmp6ipqrKztLW2t7i5usLDxMXGx8jJytLT1NXW19jZ2uHi4+Tl5ufo6erx8vP09fb3+Pn6/9oACAEBAAA/APdLm5eFsIgPy55zyc47CoRfTkf6uPOOckjnn29qmS5dmbK52noq5OPzqwj7/wCFl/3hin1n36MzgqhbCkcKTz+ANUzA5Cjynx7IRjn/AHfT271d8gAEFGxzwCfX2WrMR2gIQfbg/wCAqaiiiiiiiiiiiiiiiiiiiiiiiiiv/9k=', datatype=rdflib.URIRef(ns.bsl.BinaryBlob))), + (rdflib.URIRef(pre_preview + '9827509a74a60dfceed11936f7f624e9c932f66c8c0d20d355d56f8c3c9b56b1_s50'), rdflib.URIRef(ns.bsp.asset), rdflib.Literal('/9j/4AAQSkZJRgABAQAAAQABAAD/2wBDAAgGBgcGBQgHBwcJCQgKDBQNDAsLDBkSEw8UHRofHh0aHBwgJC4nICIsIxwcKDcpLDAxNDQ0Hyc5PTgyPC4zNDL/wAALCAAyACQBAREA/8QAHwAAAQUBAQEBAQEAAAAAAAAAAAECAwQFBgcICQoL/8QAtRAAAgEDAwIEAwUFBAQAAAF9AQIDAAQRBRIhMUEGE1FhByJxFDKBkaEII0KxwRVS0fAkM2JyggkKFhcYGRolJicoKSo0NTY3ODk6Q0RFRkdISUpTVFVWV1hZWmNkZWZnaGlqc3R1dnd4eXqDhIWGh4iJipKTlJWWl5iZmqKjpKWmp6ipqrKztLW2t7i5usLDxMXGx8jJytLT1NXW19jZ2uHi4+Tl5ufo6erx8vP09fb3+Pn6/9oACAEBAAA/APdLq5eAgKoPyknIPb6ColvZywHlp79R+WRU3nT7iojHXqQR6VYUk9QR9adVK8t3mdSqggLjJxkHPuD/AJFVlsZ/l+RFIXGTtI6k9Npq3FbFdwYKAemAp/8AZR/WrKLtGMn9KdRRRRRRRRRRRRRRRRRRRRRRRRRX/9k=', datatype=rdflib.URIRef(ns.bsl.BinaryBlob))), + (rdflib.URIRef(pre_preview + '567049149769e1d02e6af6cfee3991f7cf0cbc935cbf6a566047f40155fb13a8_s50'), rdflib.URIRef(ns.bsp.asset), rdflib.Literal('/9j/4AAQSkZJRgABAQAAAQABAAD/2wBDAAgGBgcGBQgHBwcJCQgKDBQNDAsLDBkSEw8UHRofHh0aHBwgJC4nICIsIxwcKDcpLDAxNDQ0Hyc5PTgyPC4zNDL/wAALCAAyACQBAREA/8QAHwAAAQUBAQEBAQEAAAAAAAAAAAECAwQFBgcICQoL/8QAtRAAAgEDAwIEAwUFBAQAAAF9AQIDAAQRBRIhMUEGE1FhByJxFDKBkaEII0KxwRVS0fAkM2JyggkKFhcYGRolJicoKSo0NTY3ODk6Q0RFRkdISUpTVFVWV1hZWmNkZWZnaGlqc3R1dnd4eXqDhIWGh4iJipKTlJWWl5iZmqKjpKWmp6ipqrKztLW2t7i5usLDxMXGx8jJytLT1NXW19jZ2uHi4+Tl5ufo6erx8vP09fb3+Pn6/9oACAEBAAA/APdLm5eFwFVSCpPIPYj0HvVf+0ZiFxGnIychhj9KtQyTSNkiLZ7E5B/EVZorN1EZkXAJIQ4wM9x7GqYAAXCAgDHKnGMn0X/Oa0LeMJudcKx64iJz+OATVoOOh3E+oUipKayK33lB+opPKj/55r+VHlR8/Iv5U+iiiiiiiiiiiiiiiiiiiiiv/9k=', datatype=rdflib.URIRef(ns.bsl.BinaryBlob))), + (rdflib.URIRef(pre_preview + 'df2185d8927ccef65c92fc90b94e800b02791354d8dede9dd9aa0e2c2cb1e91e_s50'), rdflib.URIRef(ns.bsp.asset), rdflib.Literal('/9j/4AAQSkZJRgABAQAAAQABAAD/2wBDAAgGBgcGBQgHBwcJCQgKDBQNDAsLDBkSEw8UHRofHh0aHBwgJC4nICIsIxwcKDcpLDAxNDQ0Hyc5PTgyPC4zNDL/wAALCAAyACQBAREA/8QAHwAAAQUBAQEBAQEAAAAAAAAAAAECAwQFBgcICQoL/8QAtRAAAgEDAwIEAwUFBAQAAAF9AQIDAAQRBRIhMUEGE1FhByJxFDKBkaEII0KxwRVS0fAkM2JyggkKFhcYGRolJicoKSo0NTY3ODk6Q0RFRkdISUpTVFVWV1hZWmNkZWZnaGlqc3R1dnd4eXqDhIWGh4iJipKTlJWWl5iZmqKjpKWmp6ipqrKztLW2t7i5usLDxMXGx8jJytLT1NXW19jZ2uHi4+Tl5ufo6erx8vP09fb3+Pn6/9oACAEBAAA/APdLm5eGQBVU5XPIJ7j0FQfb5sgiNcH2b39vapkvGPDIucjoTjBOPSrKuHHGencEU+ql1am4IIYA7cDIB757g1CunFWByAAOCMAg5yD07VcCEN0P/fZNS0UUUUUUUUUUUUUUUUUUUUUUUUUV/9k=', datatype=rdflib.URIRef(ns.bsl.BinaryBlob))), + (rdflib.URIRef(pre_preview + 'a63c84e647138a2b68113474212f6aee542b3707171ff178551db3c296e59817_s50'), rdflib.URIRef(ns.bsp.asset), rdflib.Literal('/9j/4AAQSkZJRgABAQAAAQABAAD/2wBDAAgGBgcGBQgHBwcJCQgKDBQNDAsLDBkSEw8UHRofHh0aHBwgJC4nICIsIxwcKDcpLDAxNDQ0Hyc5PTgyPC4zNDL/wAALCAAyACQBAREA/8QAHwAAAQUBAQEBAQEAAAAAAAAAAAECAwQFBgcICQoL/8QAtRAAAgEDAwIEAwUFBAQAAAF9AQIDAAQRBRIhMUEGE1FhByJxFDKBkaEII0KxwRVS0fAkM2JyggkKFhcYGRolJicoKSo0NTY3ODk6Q0RFRkdISUpTVFVWV1hZWmNkZWZnaGlqc3R1dnd4eXqDhIWGh4iJipKTlJWWl5iZmqKjpKWmp6ipqrKztLW2t7i5usLDxMXGx8jJytLT1NXW19jZ2uHi4+Tl5ufo6erx8vP09fb3+Pn6/9oACAEBAAA/APdLq6eBgFUH5c5IPqPQVANQmJUeWvPXhvUj09qspcM2cAnHHCE/rmpUl3nGx1+oxUlVLq1M7KQQPlI5APXHqDUKacyMDvHTsBk9fb3q4kXl5IYkk5OQB/ICpaKKKKKKKKKKKKKKKKKKKKKKKKKK/9k=', datatype=rdflib.URIRef(ns.bsl.BinaryBlob))), + (rdflib.URIRef(pre_preview + 'a8b3245636074d5370283b690281abda8ffdff12ce8b1af77c8bc0a4c85be860_s50'), rdflib.URIRef(ns.bsp.asset), rdflib.Literal('/9j/4AAQSkZJRgABAQAAAQABAAD/2wBDAAgGBgcGBQgHBwcJCQgKDBQNDAsLDBkSEw8UHRofHh0aHBwgJC4nICIsIxwcKDcpLDAxNDQ0Hyc5PTgyPC4zNDL/wAALCAAyACQBAREA/8QAHwAAAQUBAQEBAQEAAAAAAAAAAAECAwQFBgcICQoL/8QAtRAAAgEDAwIEAwUFBAQAAAF9AQIDAAQRBRIhMUEGE1FhByJxFDKBkaEII0KxwRVS0fAkM2JyggkKFhcYGRolJicoKSo0NTY3ODk6Q0RFRkdISUpTVFVWV1hZWmNkZWZnaGlqc3R1dnd4eXqDhIWGh4iJipKTlJWWl5iZmqKjpKWmp6ipqrKztLW2t7i5usLDxMXGx8jJytLT1NXW19jZ2uHi4+Tl5ufo6erx8vP09fb3+Pn6/9oACAEBAAA/APdbm4eF1CqDkZOQT3HoKr/b5vl/dpkj/awDnHp06fzqZLiRmIPk47fMQf1FXKKikgilOZEDHGOaZ9itsg+SuQMA08QoowNwHoGP+NO8serf99Gn0UUUUUUUUUUUUUUUUUUUUUUUUV//2Q==', datatype=rdflib.URIRef(ns.bsl.BinaryBlob))), + (rdflib.URIRef(pre_preview + 'dbfd8ebc0557c4925e9ff8411629a74a15eca934a4c2a6bd3134dd81d2f95a36_s50'), rdflib.URIRef(ns.bsp.asset), rdflib.Literal('/9j/4AAQSkZJRgABAQAAAQABAAD/2wBDAAgGBgcGBQgHBwcJCQgKDBQNDAsLDBkSEw8UHRofHh0aHBwgJC4nICIsIxwcKDcpLDAxNDQ0Hyc5PTgyPC4zNDL/wAALCAAyACQBAREA/8QAHwAAAQUBAQEBAQEAAAAAAAAAAAECAwQFBgcICQoL/8QAtRAAAgEDAwIEAwUFBAQAAAF9AQIDAAQRBRIhMUEGE1FhByJxFDKBkaEII0KxwRVS0fAkM2JyggkKFhcYGRolJicoKSo0NTY3ODk6Q0RFRkdISUpTVFVWV1hZWmNkZWZnaGlqc3R1dnd4eXqDhIWGh4iJipKTlJWWl5iZmqKjpKWmp6ipqrKztLW2t7i5usLDxMXGx8jJytLT1NXW19jZ2uHi4+Tl5ufo6erx8vP09fb3+Pn6/9oACAEBAAA/APdbm5eFwqqDkZ5B9QOw96rHUZgoPlLkgno3b8Kel7IzspVePYgdcck1YilaTA3JnqQFPT86sVQvoXkYFUDEKRz7np0NVWtZcAiLkd8Dg/8AfPqO3rWhEk6thsbc9mz+mP61Zoooooooooooooooooooooooooor/9k=', datatype=rdflib.URIRef(ns.bsl.BinaryBlob))), })) # NOTE: we don't check ns.bsm.t_created since it depends on the execution time. Triples would look like this: @@ -189,7 +189,7 @@ class TestIndex(unittest.TestCase): # (rdflib.URIRef(pre_file + '80818b8ec2ee1919116dba9c8a7e0a4608313cf3b463cd88e9ed77a700dd92d3'), rdflib.URIRef(ns.bsm.t_created), rdflib.Literal('1670..........', datatype=rdflib.XSD.integer)), # ... # instead, we simply check if there's such a predicate for each file - self.assertSetEqual({sub for sub, _ in bsfs._backend._graph.subject_objects(rdflib.URIRef(ns.bsm.t_created))}, { + self.assertSetEqual({sub for sub, _ in bsfs._backend._graph.subject_objects(rdflib.URIRef(ns.bsfs.Node().t_created))}, { rdflib.URIRef(pre_file + '2f4109b40107cc50e0884755a1a961ed126887e49b8dbaf0e146b2e226aa6647'), rdflib.URIRef(pre_file + '441f3d10c8ff489fe8e33e639606512f6c463151cc429de7e554b9af670c2ece'), rdflib.URIRef(pre_file + '69b98ecf7aff3e95b09688ba93331678eb8397817111f674c9558e6dd8f5e871'), @@ -230,70 +230,70 @@ class TestIndex(unittest.TestCase): os.path.join(os.path.dirname(__file__), 'testfile'), ]) self.assertTrue((set(outbuf.getvalue().split('\n')) - {''}).issuperset({ - f'Node(http://bsfs.ai/schema/File, http://example.com/me/file#2f4109b40107cc50e0884755a1a961ed126887e49b8dbaf0e146b2e226aa6647) Predicate({ns.bse.author}) Me, myself, and I', - f'Node(http://bsfs.ai/schema/File, http://example.com/me/file#2f4109b40107cc50e0884755a1a961ed126887e49b8dbaf0e146b2e226aa6647) Predicate({ns.bse.filename}) alpha_second', - f'Node(http://bsfs.ai/schema/File, http://example.com/me/file#441f3d10c8ff489fe8e33e639606512f6c463151cc429de7e554b9af670c2ece) Predicate({ns.bse.author}) Me, myself, and I', - f'Node(http://bsfs.ai/schema/File, http://example.com/me/file#441f3d10c8ff489fe8e33e639606512f6c463151cc429de7e554b9af670c2ece) Predicate({ns.bse.filename}) omega_second', - f'Node(http://bsfs.ai/schema/File, http://example.com/me/file#69b98ecf7aff3e95b09688ba93331678eb8397817111f674c9558e6dd8f5e871) Predicate({ns.bse.author}) Me, myself, and I', - f'Node(http://bsfs.ai/schema/File, http://example.com/me/file#69b98ecf7aff3e95b09688ba93331678eb8397817111f674c9558e6dd8f5e871) Predicate({ns.bse.filename}) td_first', - f'Node(http://bsfs.ai/schema/File, http://example.com/me/file#78f7eb7f0d8221cdb2cb26c978fa42a11f75eb87becc768f4474134cb1e06926) Predicate({ns.bse.author}) Me, myself, and I', - f'Node(http://bsfs.ai/schema/File, http://example.com/me/file#78f7eb7f0d8221cdb2cb26c978fa42a11f75eb87becc768f4474134cb1e06926) Predicate({ns.bse.filename}) testfile', - f'Node(http://bsfs.ai/schema/File, http://example.com/me/file#80818b8ec2ee1919116dba9c8a7e0a4608313cf3b463cd88e9ed77a700dd92d3) Predicate({ns.bse.author}) Me, myself, and I', - f'Node(http://bsfs.ai/schema/File, http://example.com/me/file#80818b8ec2ee1919116dba9c8a7e0a4608313cf3b463cd88e9ed77a700dd92d3) Predicate({ns.bse.filename}) bar_first', - f'Node(http://bsfs.ai/schema/File, http://example.com/me/file#976d2ea0e58488678cc7e435fbfadabfb6eb6cf50ad51862f38f73729ed11795) Predicate({ns.bse.author}) Me, myself, and I', - f'Node(http://bsfs.ai/schema/File, http://example.com/me/file#976d2ea0e58488678cc7e435fbfadabfb6eb6cf50ad51862f38f73729ed11795) Predicate({ns.bse.filename}) omega_first', - f'Node(http://bsfs.ai/schema/File, http://example.com/me/file#997e2fbb7494a3818ec782d2bc87bf1cffafba6b9c0f658e4a6c18a723e944d3) Predicate({ns.bse.author}) Me, myself, and I', - f'Node(http://bsfs.ai/schema/File, http://example.com/me/file#997e2fbb7494a3818ec782d2bc87bf1cffafba6b9c0f658e4a6c18a723e944d3) Predicate({ns.bse.filename}) alpha_first', - f'Node(http://bsfs.ai/schema/File, http://example.com/me/file#a8af899ecdab60dfaea8ec7f934053624c80a1054539e163f2c7eaa986c2777d) Predicate({ns.bse.author}) Me, myself, and I', - f'Node(http://bsfs.ai/schema/File, http://example.com/me/file#a8af899ecdab60dfaea8ec7f934053624c80a1054539e163f2c7eaa986c2777d) Predicate({ns.bse.filename}) foo_second', - f'Node(http://bsfs.ai/schema/File, http://example.com/me/file#b8fd7fba818254166a6043195004138ebda6923e012442f819a2c49671136c70) Predicate({ns.bse.author}) Me, myself, and I', - f'Node(http://bsfs.ai/schema/File, http://example.com/me/file#b8fd7fba818254166a6043195004138ebda6923e012442f819a2c49671136c70) Predicate({ns.bse.filename}) bar_second', - f'Node(http://bsfs.ai/schema/File, http://example.com/me/file#d43758ace82154a1cc10ca0dfef63cb20dd831f9c87edd6dc06539eefe67371d) Predicate({ns.bse.author}) Me, myself, and I', - f'Node(http://bsfs.ai/schema/File, http://example.com/me/file#d43758ace82154a1cc10ca0dfef63cb20dd831f9c87edd6dc06539eefe67371d) Predicate({ns.bse.filename}) foo_first', - f'Node(http://bsfs.ai/schema/File, http://example.com/me/file#d803187cbf3676ae9d38126270a6152c60431589aa3bb3824baf8954e9c097f1) Predicate({ns.bse.author}) Me, myself, and I', - f'Node(http://bsfs.ai/schema/File, http://example.com/me/file#d803187cbf3676ae9d38126270a6152c60431589aa3bb3824baf8954e9c097f1) Predicate({ns.bse.filename}) td_second', - f'Node(http://bsfs.ai/schema/File, http://example.com/me/file#accb115d266ad60c53cd01a7f7130f245886ce8eaf69bc85319febc11d9fe089) Predicate({ns.bse.author}) Me, myself, and I', - f'Node(http://bsfs.ai/schema/File, http://example.com/me/file#accb115d266ad60c53cd01a7f7130f245886ce8eaf69bc85319febc11d9fe089) Predicate({ns.bse.filename}) testimage.jpg', + f'Node(https://schema.bsfs.io/ie/Node/Entity, http://example.com/me/file#2f4109b40107cc50e0884755a1a961ed126887e49b8dbaf0e146b2e226aa6647) Predicate({ns.bse.author}) Me, myself, and I', + f'Node(https://schema.bsfs.io/ie/Node/Entity, http://example.com/me/file#2f4109b40107cc50e0884755a1a961ed126887e49b8dbaf0e146b2e226aa6647) Predicate({ns.bse.filename}) alpha_second', + f'Node(https://schema.bsfs.io/ie/Node/Entity, http://example.com/me/file#441f3d10c8ff489fe8e33e639606512f6c463151cc429de7e554b9af670c2ece) Predicate({ns.bse.author}) Me, myself, and I', + f'Node(https://schema.bsfs.io/ie/Node/Entity, http://example.com/me/file#441f3d10c8ff489fe8e33e639606512f6c463151cc429de7e554b9af670c2ece) Predicate({ns.bse.filename}) omega_second', + f'Node(https://schema.bsfs.io/ie/Node/Entity, http://example.com/me/file#69b98ecf7aff3e95b09688ba93331678eb8397817111f674c9558e6dd8f5e871) Predicate({ns.bse.author}) Me, myself, and I', + f'Node(https://schema.bsfs.io/ie/Node/Entity, http://example.com/me/file#69b98ecf7aff3e95b09688ba93331678eb8397817111f674c9558e6dd8f5e871) Predicate({ns.bse.filename}) td_first', + f'Node(https://schema.bsfs.io/ie/Node/Entity, http://example.com/me/file#78f7eb7f0d8221cdb2cb26c978fa42a11f75eb87becc768f4474134cb1e06926) Predicate({ns.bse.author}) Me, myself, and I', + f'Node(https://schema.bsfs.io/ie/Node/Entity, http://example.com/me/file#78f7eb7f0d8221cdb2cb26c978fa42a11f75eb87becc768f4474134cb1e06926) Predicate({ns.bse.filename}) testfile', + f'Node(https://schema.bsfs.io/ie/Node/Entity, http://example.com/me/file#80818b8ec2ee1919116dba9c8a7e0a4608313cf3b463cd88e9ed77a700dd92d3) Predicate({ns.bse.author}) Me, myself, and I', + f'Node(https://schema.bsfs.io/ie/Node/Entity, http://example.com/me/file#80818b8ec2ee1919116dba9c8a7e0a4608313cf3b463cd88e9ed77a700dd92d3) Predicate({ns.bse.filename}) bar_first', + f'Node(https://schema.bsfs.io/ie/Node/Entity, http://example.com/me/file#976d2ea0e58488678cc7e435fbfadabfb6eb6cf50ad51862f38f73729ed11795) Predicate({ns.bse.author}) Me, myself, and I', + f'Node(https://schema.bsfs.io/ie/Node/Entity, http://example.com/me/file#976d2ea0e58488678cc7e435fbfadabfb6eb6cf50ad51862f38f73729ed11795) Predicate({ns.bse.filename}) omega_first', + f'Node(https://schema.bsfs.io/ie/Node/Entity, http://example.com/me/file#997e2fbb7494a3818ec782d2bc87bf1cffafba6b9c0f658e4a6c18a723e944d3) Predicate({ns.bse.author}) Me, myself, and I', + f'Node(https://schema.bsfs.io/ie/Node/Entity, http://example.com/me/file#997e2fbb7494a3818ec782d2bc87bf1cffafba6b9c0f658e4a6c18a723e944d3) Predicate({ns.bse.filename}) alpha_first', + f'Node(https://schema.bsfs.io/ie/Node/Entity, http://example.com/me/file#a8af899ecdab60dfaea8ec7f934053624c80a1054539e163f2c7eaa986c2777d) Predicate({ns.bse.author}) Me, myself, and I', + f'Node(https://schema.bsfs.io/ie/Node/Entity, http://example.com/me/file#a8af899ecdab60dfaea8ec7f934053624c80a1054539e163f2c7eaa986c2777d) Predicate({ns.bse.filename}) foo_second', + f'Node(https://schema.bsfs.io/ie/Node/Entity, http://example.com/me/file#b8fd7fba818254166a6043195004138ebda6923e012442f819a2c49671136c70) Predicate({ns.bse.author}) Me, myself, and I', + f'Node(https://schema.bsfs.io/ie/Node/Entity, http://example.com/me/file#b8fd7fba818254166a6043195004138ebda6923e012442f819a2c49671136c70) Predicate({ns.bse.filename}) bar_second', + f'Node(https://schema.bsfs.io/ie/Node/Entity, http://example.com/me/file#d43758ace82154a1cc10ca0dfef63cb20dd831f9c87edd6dc06539eefe67371d) Predicate({ns.bse.author}) Me, myself, and I', + f'Node(https://schema.bsfs.io/ie/Node/Entity, http://example.com/me/file#d43758ace82154a1cc10ca0dfef63cb20dd831f9c87edd6dc06539eefe67371d) Predicate({ns.bse.filename}) foo_first', + f'Node(https://schema.bsfs.io/ie/Node/Entity, http://example.com/me/file#d803187cbf3676ae9d38126270a6152c60431589aa3bb3824baf8954e9c097f1) Predicate({ns.bse.author}) Me, myself, and I', + f'Node(https://schema.bsfs.io/ie/Node/Entity, http://example.com/me/file#d803187cbf3676ae9d38126270a6152c60431589aa3bb3824baf8954e9c097f1) Predicate({ns.bse.filename}) td_second', + f'Node(https://schema.bsfs.io/ie/Node/Entity, http://example.com/me/file#accb115d266ad60c53cd01a7f7130f245886ce8eaf69bc85319febc11d9fe089) Predicate({ns.bse.author}) Me, myself, and I', + f'Node(https://schema.bsfs.io/ie/Node/Entity, http://example.com/me/file#accb115d266ad60c53cd01a7f7130f245886ce8eaf69bc85319febc11d9fe089) Predicate({ns.bse.filename}) testimage.jpg', # features - f'Node(http://bsfs.ai/schema/File, http://example.com/me/file#accb115d266ad60c53cd01a7f7130f245886ce8eaf69bc85319febc11d9fe089) Predicate(http://bsfs.ai/schema/Entity/colors_spatial#0658f2234a054e1dd59a14462c89f7733e019160419c796356aa831498bd0a04) (91, 127, 121, 94, 138, 167, 163, 134, 190, 138, 170, 156, 121, 142, 159)', + f'Node(https://schema.bsfs.io/ie/Node/Entity, http://example.com/me/file#accb115d266ad60c53cd01a7f7130f245886ce8eaf69bc85319febc11d9fe089) Predicate(https://schema.bsfs.io/ie/Node/Entity#colors_spatial_0658f2234a054e1dd59a14462c89f7733e019160419c796356aa831498bd0a04) (91, 127, 121, 94, 138, 167, 163, 134, 190, 138, 170, 156, 121, 142, 159)', # links to previews - f'Node(http://bsfs.ai/schema/File, http://example.com/me/file#2f4109b40107cc50e0884755a1a961ed126887e49b8dbaf0e146b2e226aa6647) Predicate({ns.bse.preview}) Node(http://bsfs.ai/schema/Preview, http://example.com/me/preview#79cb8a7e6369361a4f4cb7ff729c1ed3fcf87204769623d6fbd6ebfae601e5c7_s50)', - f'Node(http://bsfs.ai/schema/File, http://example.com/me/file#441f3d10c8ff489fe8e33e639606512f6c463151cc429de7e554b9af670c2ece) Predicate({ns.bse.preview}) Node(http://bsfs.ai/schema/Preview, http://example.com/me/preview#a8b3245636074d5370283b690281abda8ffdff12ce8b1af77c8bc0a4c85be860_s50)', - f'Node(http://bsfs.ai/schema/File, http://example.com/me/file#69b98ecf7aff3e95b09688ba93331678eb8397817111f674c9558e6dd8f5e871) Predicate({ns.bse.preview}) Node(http://bsfs.ai/schema/Preview, http://example.com/me/preview#968b9aa178585bc8d1fca0e4e32b8cf30b3941eff72f34e320584aaae8fd23ac_s50)', - f'Node(http://bsfs.ai/schema/File, http://example.com/me/file#78f7eb7f0d8221cdb2cb26c978fa42a11f75eb87becc768f4474134cb1e06926) Predicate({ns.bse.preview}) Node(http://bsfs.ai/schema/Preview, http://example.com/me/preview#dbfd8ebc0557c4925e9ff8411629a74a15eca934a4c2a6bd3134dd81d2f95a36_s50)', - f'Node(http://bsfs.ai/schema/File, http://example.com/me/file#80818b8ec2ee1919116dba9c8a7e0a4608313cf3b463cd88e9ed77a700dd92d3) Predicate({ns.bse.preview}) Node(http://bsfs.ai/schema/Preview, http://example.com/me/preview#567049149769e1d02e6af6cfee3991f7cf0cbc935cbf6a566047f40155fb13a8_s50)', - f'Node(http://bsfs.ai/schema/File, http://example.com/me/file#976d2ea0e58488678cc7e435fbfadabfb6eb6cf50ad51862f38f73729ed11795) Predicate({ns.bse.preview}) Node(http://bsfs.ai/schema/Preview, http://example.com/me/preview#df2185d8927ccef65c92fc90b94e800b02791354d8dede9dd9aa0e2c2cb1e91e_s50)', - f'Node(http://bsfs.ai/schema/File, http://example.com/me/file#997e2fbb7494a3818ec782d2bc87bf1cffafba6b9c0f658e4a6c18a723e944d3) Predicate({ns.bse.preview}) Node(http://bsfs.ai/schema/Preview, http://example.com/me/preview#9827509a74a60dfceed11936f7f624e9c932f66c8c0d20d355d56f8c3c9b56b1_s50)', - f'Node(http://bsfs.ai/schema/File, http://example.com/me/file#a8af899ecdab60dfaea8ec7f934053624c80a1054539e163f2c7eaa986c2777d) Predicate({ns.bse.preview}) Node(http://bsfs.ai/schema/Preview, http://example.com/me/preview#7a975385a110c21fcd12e238fab9501550fa02f6328749068a3bffd65e291027_s50)', - f'Node(http://bsfs.ai/schema/File, http://example.com/me/file#accb115d266ad60c53cd01a7f7130f245886ce8eaf69bc85319febc11d9fe089) Predicate({ns.bse.preview}) Node(http://bsfs.ai/schema/Preview, http://example.com/me/preview#2656e303d7218300326df73b64f312d8b37eb980358be27a38b5f63dae259be3_s50)', - f'Node(http://bsfs.ai/schema/File, http://example.com/me/file#b8fd7fba818254166a6043195004138ebda6923e012442f819a2c49671136c70) Predicate({ns.bse.preview}) Node(http://bsfs.ai/schema/Preview, http://example.com/me/preview#a63c84e647138a2b68113474212f6aee542b3707171ff178551db3c296e59817_s50)', - f'Node(http://bsfs.ai/schema/File, http://example.com/me/file#d43758ace82154a1cc10ca0dfef63cb20dd831f9c87edd6dc06539eefe67371d) Predicate({ns.bse.preview}) Node(http://bsfs.ai/schema/Preview, http://example.com/me/preview#26f16643b2570ac5b2d1f8c373d492cb724aae2dd8d71a0b63647838ed651254_s50)', - f'Node(http://bsfs.ai/schema/File, http://example.com/me/file#d803187cbf3676ae9d38126270a6152c60431589aa3bb3824baf8954e9c097f1) Predicate({ns.bse.preview}) Node(http://bsfs.ai/schema/Preview, http://example.com/me/preview#5d1235838c3d501204bb09c2de563d7e4a7fd17b7ec4ff302221c0e88c4741aa_s50)', + f'Node(https://schema.bsfs.io/ie/Node/Entity, http://example.com/me/file#2f4109b40107cc50e0884755a1a961ed126887e49b8dbaf0e146b2e226aa6647) Predicate({ns.bse.preview}) Node(https://schema.bsfs.io/ie/Node/Preview, http://example.com/me/preview#79cb8a7e6369361a4f4cb7ff729c1ed3fcf87204769623d6fbd6ebfae601e5c7_s50)', + f'Node(https://schema.bsfs.io/ie/Node/Entity, http://example.com/me/file#441f3d10c8ff489fe8e33e639606512f6c463151cc429de7e554b9af670c2ece) Predicate({ns.bse.preview}) Node(https://schema.bsfs.io/ie/Node/Preview, http://example.com/me/preview#a8b3245636074d5370283b690281abda8ffdff12ce8b1af77c8bc0a4c85be860_s50)', + f'Node(https://schema.bsfs.io/ie/Node/Entity, http://example.com/me/file#69b98ecf7aff3e95b09688ba93331678eb8397817111f674c9558e6dd8f5e871) Predicate({ns.bse.preview}) Node(https://schema.bsfs.io/ie/Node/Preview, http://example.com/me/preview#968b9aa178585bc8d1fca0e4e32b8cf30b3941eff72f34e320584aaae8fd23ac_s50)', + f'Node(https://schema.bsfs.io/ie/Node/Entity, http://example.com/me/file#78f7eb7f0d8221cdb2cb26c978fa42a11f75eb87becc768f4474134cb1e06926) Predicate({ns.bse.preview}) Node(https://schema.bsfs.io/ie/Node/Preview, http://example.com/me/preview#dbfd8ebc0557c4925e9ff8411629a74a15eca934a4c2a6bd3134dd81d2f95a36_s50)', + f'Node(https://schema.bsfs.io/ie/Node/Entity, http://example.com/me/file#80818b8ec2ee1919116dba9c8a7e0a4608313cf3b463cd88e9ed77a700dd92d3) Predicate({ns.bse.preview}) Node(https://schema.bsfs.io/ie/Node/Preview, http://example.com/me/preview#567049149769e1d02e6af6cfee3991f7cf0cbc935cbf6a566047f40155fb13a8_s50)', + f'Node(https://schema.bsfs.io/ie/Node/Entity, http://example.com/me/file#976d2ea0e58488678cc7e435fbfadabfb6eb6cf50ad51862f38f73729ed11795) Predicate({ns.bse.preview}) Node(https://schema.bsfs.io/ie/Node/Preview, http://example.com/me/preview#df2185d8927ccef65c92fc90b94e800b02791354d8dede9dd9aa0e2c2cb1e91e_s50)', + f'Node(https://schema.bsfs.io/ie/Node/Entity, http://example.com/me/file#997e2fbb7494a3818ec782d2bc87bf1cffafba6b9c0f658e4a6c18a723e944d3) Predicate({ns.bse.preview}) Node(https://schema.bsfs.io/ie/Node/Preview, http://example.com/me/preview#9827509a74a60dfceed11936f7f624e9c932f66c8c0d20d355d56f8c3c9b56b1_s50)', + f'Node(https://schema.bsfs.io/ie/Node/Entity, http://example.com/me/file#a8af899ecdab60dfaea8ec7f934053624c80a1054539e163f2c7eaa986c2777d) Predicate({ns.bse.preview}) Node(https://schema.bsfs.io/ie/Node/Preview, http://example.com/me/preview#7a975385a110c21fcd12e238fab9501550fa02f6328749068a3bffd65e291027_s50)', + f'Node(https://schema.bsfs.io/ie/Node/Entity, http://example.com/me/file#accb115d266ad60c53cd01a7f7130f245886ce8eaf69bc85319febc11d9fe089) Predicate({ns.bse.preview}) Node(https://schema.bsfs.io/ie/Node/Preview, http://example.com/me/preview#2656e303d7218300326df73b64f312d8b37eb980358be27a38b5f63dae259be3_s50)', + f'Node(https://schema.bsfs.io/ie/Node/Entity, http://example.com/me/file#b8fd7fba818254166a6043195004138ebda6923e012442f819a2c49671136c70) Predicate({ns.bse.preview}) Node(https://schema.bsfs.io/ie/Node/Preview, http://example.com/me/preview#a63c84e647138a2b68113474212f6aee542b3707171ff178551db3c296e59817_s50)', + f'Node(https://schema.bsfs.io/ie/Node/Entity, http://example.com/me/file#d43758ace82154a1cc10ca0dfef63cb20dd831f9c87edd6dc06539eefe67371d) Predicate({ns.bse.preview}) Node(https://schema.bsfs.io/ie/Node/Preview, http://example.com/me/preview#26f16643b2570ac5b2d1f8c373d492cb724aae2dd8d71a0b63647838ed651254_s50)', + f'Node(https://schema.bsfs.io/ie/Node/Entity, http://example.com/me/file#d803187cbf3676ae9d38126270a6152c60431589aa3bb3824baf8954e9c097f1) Predicate({ns.bse.preview}) Node(https://schema.bsfs.io/ie/Node/Preview, http://example.com/me/preview#5d1235838c3d501204bb09c2de563d7e4a7fd17b7ec4ff302221c0e88c4741aa_s50)', # preview dimensions - f'Node(http://bsfs.ai/schema/Preview, http://example.com/me/preview#2656e303d7218300326df73b64f312d8b37eb980358be27a38b5f63dae259be3_s50) Predicate({ns.bsp.height}) 33', - f'Node(http://bsfs.ai/schema/Preview, http://example.com/me/preview#2656e303d7218300326df73b64f312d8b37eb980358be27a38b5f63dae259be3_s50) Predicate({ns.bsp.width}) 50', - f'Node(http://bsfs.ai/schema/Preview, http://example.com/me/preview#26f16643b2570ac5b2d1f8c373d492cb724aae2dd8d71a0b63647838ed651254_s50) Predicate({ns.bsp.height}) 50', - f'Node(http://bsfs.ai/schema/Preview, http://example.com/me/preview#26f16643b2570ac5b2d1f8c373d492cb724aae2dd8d71a0b63647838ed651254_s50) Predicate({ns.bsp.width}) 36', - f'Node(http://bsfs.ai/schema/Preview, http://example.com/me/preview#567049149769e1d02e6af6cfee3991f7cf0cbc935cbf6a566047f40155fb13a8_s50) Predicate({ns.bsp.height}) 50', - f'Node(http://bsfs.ai/schema/Preview, http://example.com/me/preview#567049149769e1d02e6af6cfee3991f7cf0cbc935cbf6a566047f40155fb13a8_s50) Predicate({ns.bsp.width}) 36', - f'Node(http://bsfs.ai/schema/Preview, http://example.com/me/preview#5d1235838c3d501204bb09c2de563d7e4a7fd17b7ec4ff302221c0e88c4741aa_s50) Predicate({ns.bsp.height}) 50', - f'Node(http://bsfs.ai/schema/Preview, http://example.com/me/preview#5d1235838c3d501204bb09c2de563d7e4a7fd17b7ec4ff302221c0e88c4741aa_s50) Predicate({ns.bsp.width}) 36', - f'Node(http://bsfs.ai/schema/Preview, http://example.com/me/preview#79cb8a7e6369361a4f4cb7ff729c1ed3fcf87204769623d6fbd6ebfae601e5c7_s50) Predicate({ns.bsp.height}) 50', - f'Node(http://bsfs.ai/schema/Preview, http://example.com/me/preview#79cb8a7e6369361a4f4cb7ff729c1ed3fcf87204769623d6fbd6ebfae601e5c7_s50) Predicate({ns.bsp.width}) 36', - f'Node(http://bsfs.ai/schema/Preview, http://example.com/me/preview#7a975385a110c21fcd12e238fab9501550fa02f6328749068a3bffd65e291027_s50) Predicate({ns.bsp.height}) 50', - f'Node(http://bsfs.ai/schema/Preview, http://example.com/me/preview#7a975385a110c21fcd12e238fab9501550fa02f6328749068a3bffd65e291027_s50) Predicate({ns.bsp.width}) 36', - f'Node(http://bsfs.ai/schema/Preview, http://example.com/me/preview#968b9aa178585bc8d1fca0e4e32b8cf30b3941eff72f34e320584aaae8fd23ac_s50) Predicate({ns.bsp.height}) 50', - f'Node(http://bsfs.ai/schema/Preview, http://example.com/me/preview#968b9aa178585bc8d1fca0e4e32b8cf30b3941eff72f34e320584aaae8fd23ac_s50) Predicate({ns.bsp.width}) 36', - f'Node(http://bsfs.ai/schema/Preview, http://example.com/me/preview#9827509a74a60dfceed11936f7f624e9c932f66c8c0d20d355d56f8c3c9b56b1_s50) Predicate({ns.bsp.height}) 50', - f'Node(http://bsfs.ai/schema/Preview, http://example.com/me/preview#9827509a74a60dfceed11936f7f624e9c932f66c8c0d20d355d56f8c3c9b56b1_s50) Predicate({ns.bsp.width}) 36', - f'Node(http://bsfs.ai/schema/Preview, http://example.com/me/preview#a63c84e647138a2b68113474212f6aee542b3707171ff178551db3c296e59817_s50) Predicate({ns.bsp.height}) 50', - f'Node(http://bsfs.ai/schema/Preview, http://example.com/me/preview#a63c84e647138a2b68113474212f6aee542b3707171ff178551db3c296e59817_s50) Predicate({ns.bsp.width}) 36', - f'Node(http://bsfs.ai/schema/Preview, http://example.com/me/preview#a8b3245636074d5370283b690281abda8ffdff12ce8b1af77c8bc0a4c85be860_s50) Predicate({ns.bsp.height}) 50', - f'Node(http://bsfs.ai/schema/Preview, http://example.com/me/preview#a8b3245636074d5370283b690281abda8ffdff12ce8b1af77c8bc0a4c85be860_s50) Predicate({ns.bsp.width}) 36', - f'Node(http://bsfs.ai/schema/Preview, http://example.com/me/preview#dbfd8ebc0557c4925e9ff8411629a74a15eca934a4c2a6bd3134dd81d2f95a36_s50) Predicate({ns.bsp.height}) 50', - f'Node(http://bsfs.ai/schema/Preview, http://example.com/me/preview#dbfd8ebc0557c4925e9ff8411629a74a15eca934a4c2a6bd3134dd81d2f95a36_s50) Predicate({ns.bsp.width}) 36', - f'Node(http://bsfs.ai/schema/Preview, http://example.com/me/preview#df2185d8927ccef65c92fc90b94e800b02791354d8dede9dd9aa0e2c2cb1e91e_s50) Predicate({ns.bsp.height}) 50', - f'Node(http://bsfs.ai/schema/Preview, http://example.com/me/preview#df2185d8927ccef65c92fc90b94e800b02791354d8dede9dd9aa0e2c2cb1e91e_s50) Predicate({ns.bsp.width}) 36', + f'Node(https://schema.bsfs.io/ie/Node/Preview, http://example.com/me/preview#2656e303d7218300326df73b64f312d8b37eb980358be27a38b5f63dae259be3_s50) Predicate({ns.bsp.height}) 33', + f'Node(https://schema.bsfs.io/ie/Node/Preview, http://example.com/me/preview#2656e303d7218300326df73b64f312d8b37eb980358be27a38b5f63dae259be3_s50) Predicate({ns.bsp.width}) 50', + f'Node(https://schema.bsfs.io/ie/Node/Preview, http://example.com/me/preview#26f16643b2570ac5b2d1f8c373d492cb724aae2dd8d71a0b63647838ed651254_s50) Predicate({ns.bsp.height}) 50', + f'Node(https://schema.bsfs.io/ie/Node/Preview, http://example.com/me/preview#26f16643b2570ac5b2d1f8c373d492cb724aae2dd8d71a0b63647838ed651254_s50) Predicate({ns.bsp.width}) 36', + f'Node(https://schema.bsfs.io/ie/Node/Preview, http://example.com/me/preview#567049149769e1d02e6af6cfee3991f7cf0cbc935cbf6a566047f40155fb13a8_s50) Predicate({ns.bsp.height}) 50', + f'Node(https://schema.bsfs.io/ie/Node/Preview, http://example.com/me/preview#567049149769e1d02e6af6cfee3991f7cf0cbc935cbf6a566047f40155fb13a8_s50) Predicate({ns.bsp.width}) 36', + f'Node(https://schema.bsfs.io/ie/Node/Preview, http://example.com/me/preview#5d1235838c3d501204bb09c2de563d7e4a7fd17b7ec4ff302221c0e88c4741aa_s50) Predicate({ns.bsp.height}) 50', + f'Node(https://schema.bsfs.io/ie/Node/Preview, http://example.com/me/preview#5d1235838c3d501204bb09c2de563d7e4a7fd17b7ec4ff302221c0e88c4741aa_s50) Predicate({ns.bsp.width}) 36', + f'Node(https://schema.bsfs.io/ie/Node/Preview, http://example.com/me/preview#79cb8a7e6369361a4f4cb7ff729c1ed3fcf87204769623d6fbd6ebfae601e5c7_s50) Predicate({ns.bsp.height}) 50', + f'Node(https://schema.bsfs.io/ie/Node/Preview, http://example.com/me/preview#79cb8a7e6369361a4f4cb7ff729c1ed3fcf87204769623d6fbd6ebfae601e5c7_s50) Predicate({ns.bsp.width}) 36', + f'Node(https://schema.bsfs.io/ie/Node/Preview, http://example.com/me/preview#7a975385a110c21fcd12e238fab9501550fa02f6328749068a3bffd65e291027_s50) Predicate({ns.bsp.height}) 50', + f'Node(https://schema.bsfs.io/ie/Node/Preview, http://example.com/me/preview#7a975385a110c21fcd12e238fab9501550fa02f6328749068a3bffd65e291027_s50) Predicate({ns.bsp.width}) 36', + f'Node(https://schema.bsfs.io/ie/Node/Preview, http://example.com/me/preview#968b9aa178585bc8d1fca0e4e32b8cf30b3941eff72f34e320584aaae8fd23ac_s50) Predicate({ns.bsp.height}) 50', + f'Node(https://schema.bsfs.io/ie/Node/Preview, http://example.com/me/preview#968b9aa178585bc8d1fca0e4e32b8cf30b3941eff72f34e320584aaae8fd23ac_s50) Predicate({ns.bsp.width}) 36', + f'Node(https://schema.bsfs.io/ie/Node/Preview, http://example.com/me/preview#9827509a74a60dfceed11936f7f624e9c932f66c8c0d20d355d56f8c3c9b56b1_s50) Predicate({ns.bsp.height}) 50', + f'Node(https://schema.bsfs.io/ie/Node/Preview, http://example.com/me/preview#9827509a74a60dfceed11936f7f624e9c932f66c8c0d20d355d56f8c3c9b56b1_s50) Predicate({ns.bsp.width}) 36', + f'Node(https://schema.bsfs.io/ie/Node/Preview, http://example.com/me/preview#a63c84e647138a2b68113474212f6aee542b3707171ff178551db3c296e59817_s50) Predicate({ns.bsp.height}) 50', + f'Node(https://schema.bsfs.io/ie/Node/Preview, http://example.com/me/preview#a63c84e647138a2b68113474212f6aee542b3707171ff178551db3c296e59817_s50) Predicate({ns.bsp.width}) 36', + f'Node(https://schema.bsfs.io/ie/Node/Preview, http://example.com/me/preview#a8b3245636074d5370283b690281abda8ffdff12ce8b1af77c8bc0a4c85be860_s50) Predicate({ns.bsp.height}) 50', + f'Node(https://schema.bsfs.io/ie/Node/Preview, http://example.com/me/preview#a8b3245636074d5370283b690281abda8ffdff12ce8b1af77c8bc0a4c85be860_s50) Predicate({ns.bsp.width}) 36', + f'Node(https://schema.bsfs.io/ie/Node/Preview, http://example.com/me/preview#dbfd8ebc0557c4925e9ff8411629a74a15eca934a4c2a6bd3134dd81d2f95a36_s50) Predicate({ns.bsp.height}) 50', + f'Node(https://schema.bsfs.io/ie/Node/Preview, http://example.com/me/preview#dbfd8ebc0557c4925e9ff8411629a74a15eca934a4c2a6bd3134dd81d2f95a36_s50) Predicate({ns.bsp.width}) 36', + f'Node(https://schema.bsfs.io/ie/Node/Preview, http://example.com/me/preview#df2185d8927ccef65c92fc90b94e800b02791354d8dede9dd9aa0e2c2cb1e91e_s50) Predicate({ns.bsp.height}) 50', + f'Node(https://schema.bsfs.io/ie/Node/Preview, http://example.com/me/preview#df2185d8927ccef65c92fc90b94e800b02791354d8dede9dd9aa0e2c2cb1e91e_s50) Predicate({ns.bsp.width}) 36', # assets # ... (not checked) })) diff --git a/test/apps/test_info.py b/test/apps/test_info.py index d705629..ffcaecf 100644 --- a/test/apps/test_info.py +++ b/test/apps/test_info.py @@ -31,11 +31,11 @@ class TestIndex(unittest.TestCase): {'bsie.extractor.generic.constant.Constant': { 'schema': ''' bse:author rdfs:subClassOf bsfs:Predicate ; - rdfs:domain bsfs:Entity ; + rdfs:domain bsn:Entity ; rdfs:range xsd:string ; bsfs:unique "true"^^xsd:boolean . ''', - 'tuples': [['http://bsfs.ai/schema/Entity#author', 'Me, myself, and I']], + 'tuples': [['https://schema.bsfs.io/ie/Node/Entity#author', 'Me, myself, and I']], }}, {'bsie.extractor.image.colors_spatial.ColorsSpatial': { 'width': 2, @@ -60,14 +60,14 @@ class TestIndex(unittest.TestCase): main(['--config', self.config_path, 'predicates']) # verify output self.assertSetEqual({pred for pred in outbuf.getvalue().split('\n') if pred != ''}, { - 'http://bsfs.ai/schema/Entity#author', - 'http://bsfs.ai/schema/Predicate', - 'http://bsfs.ai/schema/Entity#filename', - 'http://bsfs.ai/schema/Entity/colors_spatial#0658f2234a054e1dd59a14462c89f7733e019160419c796356aa831498bd0a04', - 'http://bsfs.ai/schema/Entity#preview', - 'http://bsfs.ai/schema/Preview#width', - 'http://bsfs.ai/schema/Preview#height', - 'http://bsfs.ai/schema/Preview#asset', + 'https://schema.bsfs.io/ie/Node/Entity#author', + 'https://schema.bsfs.io/core/Predicate', + 'https://schema.bsfs.io/ie/Node/Entity#filename', + 'https://schema.bsfs.io/ie/Node/Entity#colors_spatial_0658f2234a054e1dd59a14462c89f7733e019160419c796356aa831498bd0a04', + 'https://schema.bsfs.io/ie/Node/Entity#preview', + 'https://schema.bsfs.io/ie/Node/Preview#width', + 'https://schema.bsfs.io/ie/Node/Preview#height', + 'https://schema.bsfs.io/ie/Node/Preview#asset', }) def test_schema(self): @@ -78,14 +78,14 @@ class TestIndex(unittest.TestCase): # verify output schema = bsfs.schema.from_string(outbuf.getvalue()) self.assertSetEqual({pred.uri for pred in schema.predicates()}, { - 'http://bsfs.ai/schema/Entity#author', - 'http://bsfs.ai/schema/Predicate', - 'http://bsfs.ai/schema/Entity#filename', - 'http://bsfs.ai/schema/Entity/colors_spatial#0658f2234a054e1dd59a14462c89f7733e019160419c796356aa831498bd0a04', - 'http://bsfs.ai/schema/Entity#preview', - 'http://bsfs.ai/schema/Preview#width', - 'http://bsfs.ai/schema/Preview#height', - 'http://bsfs.ai/schema/Preview#asset', + 'https://schema.bsfs.io/ie/Node/Entity#author', + 'https://schema.bsfs.io/core/Predicate', + 'https://schema.bsfs.io/ie/Node/Entity#filename', + 'https://schema.bsfs.io/ie/Node/Entity#colors_spatial_0658f2234a054e1dd59a14462c89f7733e019160419c796356aa831498bd0a04', + 'https://schema.bsfs.io/ie/Node/Entity#preview', + 'https://schema.bsfs.io/ie/Node/Preview#width', + 'https://schema.bsfs.io/ie/Node/Preview#height', + 'https://schema.bsfs.io/ie/Node/Preview#asset', }) def test_invalid(self): diff --git a/test/apps/test_loader.py b/test/apps/test_loader.py index 20254ec..4670266 100644 --- a/test/apps/test_loader.py +++ b/test/apps/test_loader.py @@ -27,7 +27,7 @@ class TestLoader(unittest.TestCase): # pipeline contains only default predicates pipeline = load_pipeline(path) self.assertSetEqual({pred.uri for pred in pipeline.schema.predicates()}, { - 'http://bsfs.ai/schema/Predicate', + 'https://schema.bsfs.io/core/Predicate', }) # pipeline is built according to configured extractors @@ -41,11 +41,11 @@ class TestLoader(unittest.TestCase): {'bsie.extractor.generic.constant.Constant': { 'schema': ''' bse:author rdfs:subClassOf bsfs:Predicate ; - rdfs:domain bsfs:Entity ; + rdfs:domain bsn:Entity ; rdfs:range xsd:string ; bsfs:unique "true"^^xsd:boolean . ''', - 'tuples': [['http://bsfs.ai/schema/Entity#author', 'Me, myself, and I']], + 'tuples': [['https://schema.bsfs.io/ie/Node/Entity#author', 'Me, myself, and I']], }}, {'bsie.extractor.image.colors_spatial.ColorsSpatial': { 'width': 2, @@ -61,14 +61,14 @@ class TestLoader(unittest.TestCase): # pipeline contains all defined predicates pipeline = load_pipeline(path) self.assertSetEqual({pred.uri for pred in pipeline.schema.predicates()}, { - 'http://bsfs.ai/schema/Entity#author', - 'http://bsfs.ai/schema/Predicate', - 'http://bsfs.ai/schema/Entity#filename', - 'http://bsfs.ai/schema/Entity/colors_spatial#0658f2234a054e1dd59a14462c89f7733e019160419c796356aa831498bd0a04', - 'http://bsfs.ai/schema/Entity#preview', - 'http://bsfs.ai/schema/Preview#width', - 'http://bsfs.ai/schema/Preview#height', - 'http://bsfs.ai/schema/Preview#asset', + 'https://schema.bsfs.io/ie/Node/Entity#author', + 'https://schema.bsfs.io/core/Predicate', + 'https://schema.bsfs.io/ie/Node/Entity#filename', + 'https://schema.bsfs.io/ie/Node/Entity#colors_spatial_0658f2234a054e1dd59a14462c89f7733e019160419c796356aa831498bd0a04', + 'https://schema.bsfs.io/ie/Node/Entity#preview', + 'https://schema.bsfs.io/ie/Node/Preview#width', + 'https://schema.bsfs.io/ie/Node/Preview#height', + 'https://schema.bsfs.io/ie/Node/Preview#asset', }) # config file must exist diff --git a/test/apps/test_main.py b/test/apps/test_main.py index a1d8a49..4fa094b 100644 --- a/test/apps/test_main.py +++ b/test/apps/test_main.py @@ -43,9 +43,9 @@ class TestMain(unittest.TestCase): with contextlib.redirect_stdout(outbuf): main(['info', '--config', self.config_path, 'predicates']) self.assertEqual(set(outbuf.getvalue().strip().split('\n')), { - 'http://bsfs.ai/schema/Entity#filename', - 'http://bsfs.ai/schema/Entity#filesize', - 'http://bsfs.ai/schema/Predicate', + 'https://schema.bsfs.io/ie/Node/Entity#filename', + 'https://schema.bsfs.io/ie/Node/Entity#filesize', + 'https://schema.bsfs.io/core/Predicate', }) diff --git a/test/extractor/generic/test_constant.py b/test/extractor/generic/test_constant.py index db55852..77ee02b 100644 --- a/test/extractor/generic/test_constant.py +++ b/test/extractor/generic/test_constant.py @@ -15,11 +15,11 @@ class TestConstant(unittest.TestCase): def test_extract(self): schema = ''' bse:author rdfs:subClassOf bsfs:Predicate ; - rdfs:domain bsfs:Entity ; + rdfs:domain bsn:Entity ; rdfs:range xsd:string ; bsfs:unique "true"^^xsd:boolean . bse:comment rdfs:subClassOf bsfs:Predicate ; - rdfs:domain bsfs:Entity ; + rdfs:domain bsn:Entity ; rdfs:range xsd:string ; bsfs:unique "false"^^xsd:boolean . ''' @@ -28,10 +28,10 @@ class TestConstant(unittest.TestCase): (ns.bse.comment, 'the quick brown fox jumps over the lazy dog.'), ] ext = Constant(schema, tuples) - node = _node.Node(ns.bsfs.Entity, '') # Blank node + node = _node.Node(ns.bsn.Entity, '') # Blank node p_author = ext.schema.predicate(ns.bse.author) p_comment = ext.schema.predicate(ns.bse.comment) - entity = ext.schema.node(ns.bsfs.Node).child(ns.bsfs.Entity) + entity = ext.schema.node(ns.bsfs.Node).child(ns.bsn.Entity) string = ext.schema.literal(ns.bsfs.Literal).child(ns.xsd.string) # baseline self.assertSetEqual(set(ext.extract(node, None, (p_author, p_comment))), @@ -50,11 +50,11 @@ class TestConstant(unittest.TestCase): # schema compliance schema = ''' bse:author rdfs:subClassOf bsfs:Predicate ; - rdfs:domain bsfs:Entity ; + rdfs:domain bsn:Entity ; rdfs:range xsd:string ; bsfs:unique "true"^^xsd:boolean . bse:comment rdfs:subClassOf bsfs:Predicate ; - rdfs:domain bsfs:Entity ; + rdfs:domain bsn:Entity ; rdfs:range xsd:string ; bsfs:unique "false"^^xsd:boolean . ''' @@ -75,13 +75,13 @@ class TestConstant(unittest.TestCase): def test_eq(self): schema_a = ''' bse:author rdfs:subClassOf bsfs:Predicate ; - rdfs:domain bsfs:Entity ; + rdfs:domain bsn:Entity ; rdfs:range xsd:string ; bsfs:unique "true"^^xsd:boolean . ''' schema_b = ''' bse:comment rdfs:subClassOf bsfs:Predicate ; - rdfs:domain bsfs:Entity ; + rdfs:domain bsn:Entity ; rdfs:range xsd:string ; bsfs:unique "false"^^xsd:boolean . ''' diff --git a/test/extractor/generic/test_path.py b/test/extractor/generic/test_path.py index d40a28c..0beb37e 100644 --- a/test/extractor/generic/test_path.py +++ b/test/extractor/generic/test_path.py @@ -26,17 +26,17 @@ class TestPath(unittest.TestCase): self.assertEqual(Path().schema, bsfs.schema.from_string(base.SCHEMA_PREAMBLE + ''' bse:filename rdfs:subClassOf bsfs:Predicate ; - rdfs:domain bsfs:File ; + rdfs:domain bsn:Entity ; rdfs:range xsd:string ; bsfs:unique "true"^^xsd:boolean . ''')) def test_extract(self): ext = Path() - node = _node.Node(ns.bsfs.File, '') # Blank node + node = _node.Node(ns.bsn.Entity, '') # Blank node content = '/tmp/foo/bar' p_filename = ext.schema.predicate(ns.bse.filename) - entity = ext.schema.node(ns.bsfs.Node).child(ns.bsfs.Entity) + entity = ext.schema.node(ns.bsfs.Node).child(ns.bsn.Entity) string = ext.schema.literal(ns.bsfs.Literal).child(ns.xsd.string) # baseline diff --git a/test/extractor/generic/test_stat.py b/test/extractor/generic/test_stat.py index 8868546..0e83e24 100644 --- a/test/extractor/generic/test_stat.py +++ b/test/extractor/generic/test_stat.py @@ -27,17 +27,17 @@ class TestStat(unittest.TestCase): self.assertEqual(Stat().schema, bsfs.schema.from_string(base.SCHEMA_PREAMBLE + ''' bse:filesize rdfs:subClassOf bsfs:Predicate ; - rdfs:domain bsfs:File ; + rdfs:domain bsn:Entity ; rdfs:range xsd:integer ; bsfs:unique "true"^^xsd:boolean . ''')) def test_extract(self): ext = Stat() - node = _node.Node(ns.bsfs.File, '') # Blank node + node = _node.Node(ns.bsn.Entity, '') # Blank node content = os.stat(__file__) p_filesize = ext.schema.predicate(ns.bse.filesize) - entity = ext.schema.node(ns.bsfs.Node).child(ns.bsfs.Entity) + entity = ext.schema.node(ns.bsfs.Node).child(ns.bsn.Entity) string = ext.schema.literal(ns.bsfs.Literal).child(ns.xsd.string) # baseline diff --git a/test/extractor/image/test_colors_spatial.py b/test/extractor/image/test_colors_spatial.py index 967aaf2..902ab6d 100644 --- a/test/extractor/image/test_colors_spatial.py +++ b/test/extractor/image/test_colors_spatial.py @@ -19,8 +19,8 @@ from bsie.extractor.image.colors_spatial import ColorsSpatial class TestColorsSpatial(unittest.TestCase): def setUp(self): # content id with default constructors (width=32, height=32, exp=4) - self.instance_prefix = 'http://ie.bsfs.ai/schema/Feature/ColorsSpatial' - self.predicate_prefix = 'http://bsfs.ai/schema/Entity/colors_spatial' + self.instance_prefix = 'https://schema.bsfs.io/ie/Literal/Array/Feature/ColorsSpatial' + self.predicate_prefix = 'https://schema.bsfs.io/ie/Node/Entity#colors_spatial_' self.uuid = 'adee8d6c43687021e1c5bffe56bcfe727f1638d792744137181304ef889dac2a' def test_essentials(self): @@ -50,7 +50,7 @@ class TestColorsSpatial(unittest.TestCase): def test_schema(self): schema = bsfs.schema.from_string(base.SCHEMA_PREAMBLE + f''' - <{self.instance_prefix}> rdfs:subClassOf bsfs:Feature ; + <{self.instance_prefix}> rdfs:subClassOf bsa:Feature ; # annotations rdfs:label "Spatially dominant colors"^^xsd:string ; schema:description "Domiant colors of subregions in an image."^^xsd:string ; @@ -63,8 +63,8 @@ class TestColorsSpatial(unittest.TestCase): <{self.instance_prefix}/args#height> "32"^^xsd:integer ; <{self.instance_prefix}/args#exp> "4"^^xsd:float . - <{self.predicate_prefix}#{self.uuid}> rdfs:subClassOf bsfs:Predicate ; - rdfs:domain bsfs:File ; + <{self.predicate_prefix}{self.uuid}> rdfs:subClassOf bsfs:Predicate ; + rdfs:domain bsn:Entity ; rdfs:range <{self.instance_prefix}#{self.uuid}> ; bsfs:unique "true"^^xsd:boolean . ''') @@ -73,7 +73,7 @@ class TestColorsSpatial(unittest.TestCase): def test_extract(self): ext = ColorsSpatial(2,2,2) img = PIL.Image.open(os.path.join(os.path.dirname(__file__), 'testimage.jpg')) - node = _node.Node(ns.bsfs.Entity, bsfs.URI('http://example.com/entity#1234')) + node = _node.Node(ns.bsn.Entity, bsfs.URI('http://example.com/entity#1234')) principals = set(ext.principals) self.assertEqual(len(principals), 1) # valid invocation yields feature diff --git a/test/extractor/test_base.py b/test/extractor/test_base.py index bb1f73b..81865e1 100644 --- a/test/extractor/test_base.py +++ b/test/extractor/test_base.py @@ -15,11 +15,11 @@ class StubExtractor(base.Extractor): def __init__(self): super().__init__(bsfs.schema.from_string(base.SCHEMA_PREAMBLE + ''' bse:author rdfs:subClassOf bsfs:Predicate ; - rdfs:domain bsfs:Entity ; + rdfs:domain bsn:Entity ; rdfs:range xsd:string ; bsfs:unique "false"^^xsd:boolean . bse:comment rdfs:subClassOf bsfs:Predicate ; - rdfs:domain bsfs:Entity ; + rdfs:domain bsn:Entity ; rdfs:range xsd:string ; bsfs:unique "false"^^xsd:boolean . ''')) @@ -48,8 +48,8 @@ class TestExtractor(unittest.TestCase): def test_principals(self): schema = bsfs.schema.Schema() - entity = schema.node(ns.bsfs.Node).child(ns.bsfs.Entity) - string = schema.literal(ns.bsfs.Literal).child(bsfs.URI('http://www.w3.org/2001/XMLSchema#string')) + entity = schema.node(ns.bsfs.Node).child(ns.bsn.Entity) + string = schema.literal(ns.bsfs.Literal).child(ns.xsd.string) p_author = schema.predicate(ns.bsfs.Predicate).child(ns.bse.author, domain=entity, range=string) p_comment = schema.predicate(ns.bsfs.Predicate).child(ns.bse.comment, domain=entity, range=string) ext = StubExtractor() diff --git a/test/extractor/test_builder.py b/test/extractor/test_builder.py index 1b8951a..fbb0895 100644 --- a/test/extractor/test_builder.py +++ b/test/extractor/test_builder.py @@ -46,33 +46,33 @@ class TestExtractorBuilder(unittest.TestCase): {'bsie.extractor.generic.constant.Constant': { 'schema': ''' bse:author rdfs:subClassOf bsfs:Predicate ; - rdfs:domain bsfs:Entity ; + rdfs:domain bsn:Entity ; rdfs:range xsd:string ; bsfs:unique "true"^^xsd:boolean . bse:rating rdfs:subClassOf bsfs:Predicate ; - rdfs:domain bsfs:Entity ; + rdfs:domain bsn:Entity ; rdfs:range xsd:integer ; bsfs:unique "true"^^xsd:boolean . ''', 'tuples': [ - ('http://bsfs.ai/schema/Entity#author', 'Me, myself, and I'), - ('http://bsfs.ai/schema/Entity#rating', 123), + ('https://schema.bsfs.io/ie/Node/Entity#author', 'Me, myself, and I'), + ('https://schema.bsfs.io/ie/Node/Entity#rating', 123), ], }}]) obj = builder.build(0) import bsie.extractor.generic.constant self.assertEqual(obj, bsie.extractor.generic.constant.Constant(''' bse:author rdfs:subClassOf bsfs:Predicate ; - rdfs:domain bsfs:Entity ; + rdfs:domain bsn:Entity ; rdfs:range xsd:string ; bsfs:unique "true"^^xsd:boolean . bse:rating rdfs:subClassOf bsfs:Predicate ; - rdfs:domain bsfs:Entity ; + rdfs:domain bsn:Entity ; rdfs:range xsd:integer ; bsfs:unique "true"^^xsd:boolean . ''', [ - ('http://bsfs.ai/schema/Entity#author', 'Me, myself, and I'), - ('http://bsfs.ai/schema/Entity#rating', 123), + ('https://schema.bsfs.io/ie/Node/Entity#author', 'Me, myself, and I'), + ('https://schema.bsfs.io/ie/Node/Entity#rating', 123), ])) # building with invalid args diff --git a/test/extractor/test_preview.py b/test/extractor/test_preview.py index 0e0068d..6526783 100644 --- a/test/extractor/test_preview.py +++ b/test/extractor/test_preview.py @@ -40,28 +40,28 @@ class TestPreview(unittest.TestCase): def test_schema(self): self.assertEqual(Preview([1,2,3]).schema, bsfs.schema.from_string(base.SCHEMA_PREAMBLE + ''' - bsfs:Preview rdfs:subClassOf bsfs:Node . - bsfs:BinaryBlob rdfs:subClassOf bsfs:Literal . - bsfs:JPEG rdfs:subClassOf bsfs:BinaryBlob . + bsn:Preview rdfs:subClassOf bsfs:Node . + bsl:BinaryBlob rdfs:subClassOf bsfs:Literal . + rdfs:subClassOf bsl:BinaryBlob . bse:preview rdfs:subClassOf bsfs:Predicate ; - rdfs:domain bsfs:File ; - rdfs:range bsfs:Preview ; + rdfs:domain bsn:Entity ; + rdfs:range bsn:Preview ; bsfs:unique "false"^^xsd:boolean . bsp:width rdfs:subClassOf bsfs:Predicate ; - rdfs:domain bsfs:Preview ; + rdfs:domain bsn:Preview ; rdfs:range xsd:integer ; bsfs:unique "true"^^xsd:boolean . bsp:height rdfs:subClassOf bsfs:Predicate ; - rdfs:domain bsfs:Preview ; + rdfs:domain bsn:Preview ; rdfs:range xsd:integer ; bsfs:unique "true"^^xsd:boolean . bsp:asset rdfs:subClassOf bsfs:Predicate ; - rdfs:domain bsfs:Preview ; - rdfs:range bsfs:JPEG ; + rdfs:domain bsn:Preview ; + rdfs:range ; bsfs:unique "true"^^xsd:boolean . ''')) @@ -69,7 +69,7 @@ class TestPreview(unittest.TestCase): def test_extract(self): # setup dependents rdr = Reader() - subject = _node.Node(ns.bsfs.File) + subject = _node.Node(ns.bsn.Entity) path = os.path.join(os.path.dirname(__file__), 'testimage.jpg') # setup extractor @@ -83,7 +83,7 @@ class TestPreview(unittest.TestCase): gen(10) # NOTE: consume some image to avoid resource error warning # extract a preview triples = set(ext.extract(subject, rdr(path), principals)) - thumbs = {node for node, _, _ in triples if node.node_type == ns.bsfs.Preview} + thumbs = {node for node, _, _ in triples if node.node_type == ns.bsn.Preview} self.assertEqual(len(thumbs), 1) thumb = list(thumbs)[0] # test properties @@ -107,7 +107,7 @@ class TestPreview(unittest.TestCase): self.assertEqual(principals, {ext.schema.predicate(ns.bse.preview)}) # extract a preview triples = set(ext.extract(subject, rdr(path), principals)) - thumbs = {node for node, _, _ in triples if node.node_type == ns.bsfs.Preview} + thumbs = {node for node, _, _ in triples if node.node_type == ns.bsn.Preview} self.assertEqual(len(thumbs), 2) self.assertSetEqual({10, 20}, { value for _, pred, value in triples if pred == ext.schema.predicate(ns.bsp.width)}) diff --git a/test/lib/test_bsie.py b/test/lib/test_bsie.py index 52da8b8..0c393cc 100644 --- a/test/lib/test_bsie.py +++ b/test/lib/test_bsie.py @@ -25,17 +25,17 @@ class TestBSIE(unittest.TestCase): {'bsie.extractor.generic.path.Path': {}}, {'bsie.extractor.generic.stat.Stat': {}}, {'bsie.extractor.generic.constant.Constant': dict( - tuples=[('http://bsfs.ai/schema/Entity#author', 'Me, myself, and I')], + tuples=[('https://schema.bsfs.io/ie/Node/Entity#author', 'Me, myself, and I')], schema=''' bse:author rdfs:subClassOf bsfs:Predicate ; - rdfs:domain bsfs:Entity ; + rdfs:domain bsn:Entity ; rdfs:range xsd:string ; bsfs:unique "true"^^xsd:boolean . ''', )}, ]) # build pipeline - self.naming_policy = DefaultNamingPolicy(host='http://example.com/local', user='') + self.naming_policy = DefaultNamingPolicy(host='http://example.com/local', user='me') pbuild = PipelineBuilder(rbuild, ebuild) self.pipeline = pbuild.build() @@ -49,17 +49,17 @@ class TestBSIE(unittest.TestCase): }) self.assertEqual(lib.schema, bsfs.schema.from_string(SCHEMA_PREAMBLE + ''' bse:filename rdfs:subClassOf bsfs:Predicate ; - rdfs:domain bsfs:File ; + rdfs:domain bsn:Entity ; rdfs:range xsd:string ; bsfs:unique "true"^^xsd:boolean . bse:filesize rdfs:subClassOf bsfs:Predicate ; - rdfs:domain bsfs:File ; + rdfs:domain bsn:Entity ; rdfs:range xsd:integer; bsfs:unique "true"^^xsd:boolean . bse:author rdfs:subClassOf bsfs:Predicate ; - rdfs:domain bsfs:Entity ; + rdfs:domain bsn:Entity ; rdfs:range xsd:string ; bsfs:unique "true"^^xsd:boolean . ''')) @@ -76,12 +76,12 @@ class TestBSIE(unittest.TestCase): }) self.assertEqual(lib.schema, bsfs.schema.from_string(SCHEMA_PREAMBLE + ''' bse:filesize rdfs:subClassOf bsfs:Predicate ; - rdfs:domain bsfs:File ; + rdfs:domain bsn:Entity ; rdfs:range xsd:integer; bsfs:unique "true"^^xsd:boolean . bse:author rdfs:subClassOf bsfs:Predicate ; - rdfs:domain bsfs:Entity ; + rdfs:domain bsn:Entity ; rdfs:range xsd:string ; bsfs:unique "true"^^xsd:boolean . ''')) @@ -94,17 +94,17 @@ class TestBSIE(unittest.TestCase): }) self.assertEqual(lib.schema, bsfs.schema.from_string(SCHEMA_PREAMBLE + ''' bse:filename rdfs:subClassOf bsfs:Predicate ; - rdfs:domain bsfs:File ; + rdfs:domain bsn:Entity ; rdfs:range xsd:string ; bsfs:unique "true"^^xsd:boolean . bse:filesize rdfs:subClassOf bsfs:Predicate ; - rdfs:domain bsfs:File ; + rdfs:domain bsn:Entity ; rdfs:range xsd:integer; bsfs:unique "true"^^xsd:boolean . bse:author rdfs:subClassOf bsfs:Predicate ; - rdfs:domain bsfs:Entity ; + rdfs:domain bsn:Entity ; rdfs:range xsd:string ; bsfs:unique "true"^^xsd:boolean . @@ -121,7 +121,7 @@ class TestBSIE(unittest.TestCase): }) self.assertEqual(lib.schema, bsfs.schema.from_string(SCHEMA_PREAMBLE + ''' bse:author rdfs:subClassOf bsfs:Predicate ; - rdfs:domain bsfs:Entity ; + rdfs:domain bsn:Entity ; rdfs:range xsd:string ; bsfs:unique "true"^^xsd:boolean . ''')) @@ -136,7 +136,7 @@ class TestBSIE(unittest.TestCase): }) self.assertEqual(lib.schema, bsfs.schema.from_string(SCHEMA_PREAMBLE + ''' bse:filesize rdfs:subClassOf bsfs:Predicate ; - rdfs:domain bsfs:File ; + rdfs:domain bsn:Entity ; rdfs:range xsd:integer; bsfs:unique "true"^^xsd:boolean . @@ -152,7 +152,7 @@ class TestBSIE(unittest.TestCase): ns.bse.author, }) content_hash = 'a948904f2f0f479b8f8197694b30184b0d2ed1c1cd2a1ec0fb85d299a192a447' - subject = node.Node(ns.bsfs.File, uri=f'http://example.com/local/file#{content_hash}') + subject = node.Node(ns.bsn.Entity, uri=f'http://example.com/local/me/file#{content_hash}') testfile = os.path.join(os.path.dirname(__file__), 'testfile.t') # from_file extracts all available triples diff --git a/test/lib/test_builder.py b/test/lib/test_builder.py index 2bd1994..3ecb3d3 100644 --- a/test/lib/test_builder.py +++ b/test/lib/test_builder.py @@ -18,11 +18,11 @@ class TestPipelineBuilder(unittest.TestCase): def test_build(self): c_schema = ''' bse:author rdfs:subClassOf bsfs:Predicate ; - rdfs:domain bsfs:Entity ; + rdfs:domain bsn:Entity ; rdfs:range xsd:string ; bsfs:unique "true"^^xsd:boolean . ''' - c_tuples = [('http://bsfs.ai/schema/Entity#author', 'Me, myself, and I')] + c_tuples = [('https://schema.bsfs.io/ie/Node/Entity#author', 'Me, myself, and I')] # prepare builders rbuild = ReaderBuilder({}) ebuild = ExtractorBuilder([ diff --git a/test/lib/test_naming_policy.py b/test/lib/test_naming_policy.py index b2a3649..c9b0cd2 100644 --- a/test/lib/test_naming_policy.py +++ b/test/lib/test_naming_policy.py @@ -21,55 +21,55 @@ class TestDefaultNamingPolicy(unittest.TestCase): policy = DefaultNamingPolicy('http://example.com', 'me') # handle_node doesn't modify existing uris self.assertEqual(policy.handle_node( - Node(ns.bsfs.Entity, uri='http://example.com/you/foo#bar')).uri, + Node(ns.bsn.Invalid, uri='http://example.com/you/foo#bar')).uri, URI('http://example.com/you/foo#bar')) # processes bsfs:File self.assertEqual(policy.handle_node( - Node(ns.bsfs.File, ucid='abc123cba')).uri, + Node(ns.bsn.Entity, ucid='abc123cba')).uri, URI('http://example.com/me/file#abc123cba')) # processes bsfs:Preview self.assertEqual(policy.handle_node( - Node(ns.bsfs.Preview, ucid='abc123cba', size=123)).uri, + Node(ns.bsn.Preview, ucid='abc123cba', size=123)).uri, URI('http://example.com/me/preview#abc123cba_s123')) # raises an exception on unknown types self.assertRaises(errors.ProgrammingError, policy.handle_node, - Node(ns.bsfs.Entity, ucid='abc123cba', size=123)) + Node(ns.bsn.Invalid, ucid='abc123cba', size=123)) def test_name_file(self): # setup policy = DefaultNamingPolicy('http://example.com', 'me') # name_file uses ucid self.assertEqual(policy.name_file( - Node(ns.bsfs.File, ucid='123abc321')).uri, + Node(ns.bsn.Entity, ucid='123abc321')).uri, URI('http://example.com/me/file#123abc321')) # name_file falls back to a random guid self.assertTrue(policy.name_file( - Node(ns.bsfs.File)).uri.startswith('http://example.com/me/file#')) + Node(ns.bsn.Entity)).uri.startswith('http://example.com/me/file#')) def test_name_preview(self): # setup policy = DefaultNamingPolicy('http://example.com', 'me') # name_preview uses ucid self.assertEqual(policy.name_preview( - Node(ns.bsfs.Preview, ucid='123abc321')).uri, + Node(ns.bsn.Preview, ucid='123abc321')).uri, URI('http://example.com/me/preview#123abc321')) self.assertEqual(policy.name_preview( - Node(ns.bsfs.Preview, ucid='123abc321', size=400)).uri, + Node(ns.bsn.Preview, ucid='123abc321', size=400)).uri, URI('http://example.com/me/preview#123abc321_s400')) # name_preview uses source self.assertEqual(policy.name_preview( - Node(ns.bsfs.Preview, source=Node(ns.bsfs.File, ucid='123file321'))).uri, + Node(ns.bsn.Preview, source=Node(ns.bsn.Entity, ucid='123file321'))).uri, URI('http://example.com/me/preview#123file321')) self.assertEqual(policy.name_preview( - Node(ns.bsfs.Preview, source=Node(ns.bsfs.File, ucid='123file321'), size=300)).uri, + Node(ns.bsn.Preview, source=Node(ns.bsn.Entity, ucid='123file321'), size=300)).uri, URI('http://example.com/me/preview#123file321_s300')) # name_preview falls back to a random guid self.assertTrue(policy.name_preview( - Node(ns.bsfs.Preview)).uri.startswith('http://example.com/me/preview#')) + Node(ns.bsn.Preview)).uri.startswith('http://example.com/me/preview#')) self.assertTrue(policy.name_preview( - Node(ns.bsfs.Preview, size=200)).uri.startswith('http://example.com/me/preview#')) + Node(ns.bsn.Preview, size=200)).uri.startswith('http://example.com/me/preview#')) self.assertTrue(policy.name_preview( - Node(ns.bsfs.Preview, size=200)).uri.endswith('_s200')) + Node(ns.bsn.Preview, size=200)).uri.endswith('_s200')) class TestNamingPolicyIterator(unittest.TestCase): @@ -94,16 +94,16 @@ class TestNamingPolicyIterator(unittest.TestCase): # setup policy = DefaultNamingPolicy('http://example.com', 'me') triples = [ - (Node(ns.bsfs.File, ucid='foo'), 'predA', 'hello'), - (Node(ns.bsfs.Preview, ucid='bar'), 'predB', 1234), - (Node(ns.bsfs.Preview, ucid='hello'), 'predC', Node(ns.bsfs.File, ucid='world')) + (Node(ns.bsn.Entity, ucid='foo'), 'predA', 'hello'), + (Node(ns.bsn.Preview, ucid='bar'), 'predB', 1234), + (Node(ns.bsn.Preview, ucid='hello'), 'predC', Node(ns.bsn.Entity, ucid='world')) ] # handles nodes, handles values, ignores predicate self.assertListEqual(list(policy(triples)), [ - (Node(ns.bsfs.File, uri='http://example.com/me/file#foo'), 'predA', 'hello'), - (Node(ns.bsfs.Preview, uri='http://example.com/me/preview#bar'), 'predB', 1234), - (Node(ns.bsfs.Preview, uri='http://example.com/me/preview#hello'), 'predC', - Node(ns.bsfs.File, uri='http://example.com/me/file#world')), + (Node(ns.bsn.Entity, uri='http://example.com/me/file#foo'), 'predA', 'hello'), + (Node(ns.bsn.Preview, uri='http://example.com/me/preview#bar'), 'predB', 1234), + (Node(ns.bsn.Preview, uri='http://example.com/me/preview#hello'), 'predC', + Node(ns.bsn.Entity, uri='http://example.com/me/file#world')), ]) diff --git a/test/lib/test_pipeline.py b/test/lib/test_pipeline.py index 5125a5c..eb088a9 100644 --- a/test/lib/test_pipeline.py +++ b/test/lib/test_pipeline.py @@ -23,19 +23,19 @@ class TestPipeline(unittest.TestCase): # constant A csA = ''' bse:author rdfs:subClassOf bsfs:Predicate ; - rdfs:domain bsfs:File ; + rdfs:domain bsn:Entity ; rdfs:range xsd:string ; bsfs:unique "true"^^xsd:boolean . ''' - tupA = [('http://bsfs.ai/schema/Entity#author', 'Me, myself, and I')] + tupA = [('https://schema.bsfs.io/ie/Node/Entity#author', 'Me, myself, and I')] # constant B csB = ''' bse:rating rdfs:subClassOf bsfs:Predicate ; - rdfs:domain bsfs:File ; + rdfs:domain bsn:Entity ; rdfs:range xsd:integer ; bsfs:unique "true"^^xsd:boolean . ''' - tupB = [('http://bsfs.ai/schema/Entity#rating', 123)] + tupB = [('https://schema.bsfs.io/ie/Node/Entity#rating', 123)] # extractors/readers self.ext2rdr = { bsie.extractor.generic.path.Path(): bsie.reader.path.Path(), @@ -84,13 +84,13 @@ class TestPipeline(unittest.TestCase): pipeline = Pipeline(self.ext2rdr) # build objects for tests content_hash = 'a948904f2f0f479b8f8197694b30184b0d2ed1c1cd2a1ec0fb85d299a192a447' - subject = node.Node(ns.bsfs.File, ucid=content_hash) + subject = node.Node(ns.bsn.Entity, ucid=content_hash) testfile = os.path.join(os.path.dirname(__file__), 'testfile.t') p_filename = pipeline.schema.predicate(ns.bse.filename) p_filesize = pipeline.schema.predicate(ns.bse.filesize) p_author = pipeline.schema.predicate(ns.bse.author) p_rating = pipeline.schema.predicate(ns.bse.rating) - entity = pipeline.schema.node(ns.bsfs.File) + entity = pipeline.schema.node(ns.bsn.Entity) p_invalid = pipeline.schema.predicate(ns.bsfs.Predicate).child(ns.bse.foo, range=entity) # extract given predicates diff --git a/test/utils/test_node.py b/test/utils/test_node.py index 918ce42..c0662a1 100644 --- a/test/utils/test_node.py +++ b/test/utils/test_node.py @@ -70,17 +70,17 @@ class TestNode(unittest.TestCase): def test_str(self): uri = bsfs.URI('http://example.com/me/entity#1234') # basic string conversion - node = Node(ns.bsfs.Entity, uri) - self.assertEqual(str(node), 'Node(http://bsfs.ai/schema/Entity, http://example.com/me/entity#1234)') - self.assertEqual(repr(node), 'Node(http://bsfs.ai/schema/Entity, http://example.com/me/entity#1234)') + node = Node(ns.bsn.Entity, uri) + self.assertEqual(str(node), 'Node(https://schema.bsfs.io/ie/Node/Entity, http://example.com/me/entity#1234)') + self.assertEqual(repr(node), 'Node(https://schema.bsfs.io/ie/Node/Entity, http://example.com/me/entity#1234)') # string conversion respects node_type - node = Node(ns.bsfs.Foo, uri) - self.assertEqual(str(node), 'Node(http://bsfs.ai/schema/Foo, http://example.com/me/entity#1234)') - self.assertEqual(repr(node), 'Node(http://bsfs.ai/schema/Foo, http://example.com/me/entity#1234)') + node = Node(ns.bsn.Foo, uri) + self.assertEqual(str(node), 'Node(https://schema.bsfs.io/ie/Node/Foo, http://example.com/me/entity#1234)') + self.assertEqual(repr(node), 'Node(https://schema.bsfs.io/ie/Node/Foo, http://example.com/me/entity#1234)') # string conversion respects uri - node = Node(ns.bsfs.Entity, bsfs.URI('http://example.com/me/entity#4321')) - self.assertEqual(str(node), 'Node(http://bsfs.ai/schema/Entity, http://example.com/me/entity#4321)') - self.assertEqual(repr(node), 'Node(http://bsfs.ai/schema/Entity, http://example.com/me/entity#4321)') + node = Node(ns.bsn.Entity, bsfs.URI('http://example.com/me/entity#4321')) + self.assertEqual(str(node), 'Node(https://schema.bsfs.io/ie/Node/Entity, http://example.com/me/entity#4321)') + self.assertEqual(repr(node), 'Node(https://schema.bsfs.io/ie/Node/Entity, http://example.com/me/entity#4321)') -- cgit v1.2.3 From 8b460aa0232cd841af7b7734c91982bc83486e03 Mon Sep 17 00:00:00 2001 From: Matthias Baumgartner Date: Sun, 5 Mar 2023 19:14:11 +0100 Subject: build fixes --- README.md | 41 ++++++++++++++++++++++++++++++++---- bsie/utils/loading.py | 4 ++-- doc/source/installation.rst | 34 +++++++++++++++++++++++++----- setup.py | 51 +++++++++++++++++++++++++++++++++------------ 4 files changed, 106 insertions(+), 24 deletions(-) diff --git a/README.md b/README.md index 85cb68f..eee19f7 100644 --- a/README.md +++ b/README.md @@ -7,10 +7,38 @@ that information in a BSFS storage. ## Installation -You can install BSIE via pip: +You can install BSIE via pip. BSIE comes with support for various file formats. +For this, it needs to install many external packages. BSIE lets you control +which of these you want to install. Note that if you choose to not install +support for some file types, BSIE will show a warning and skip them. +All other formats will be processed normally. + +To install only the minimally required software, use: $ pip install --extra-index-url https://pip.bsfs.io bsie +To install all dependencies, use the following shortcut: + + $ pip install --extra-index-url https://pip.bsfs.io bsie[all] + +To install a subset of all dependencies, modify the extras part (``[image, preview]``) +of the follwing command to your liking: + + $ pip install --extra-index-url https://pip.bsfs.io bsie[image,preview] + +Currently, BSIE providesthe following extra flags: + +* image: Read data from image files. + Note that you may also have to install ``exiftool`` through your system's + package manager (e.g. ``sudo apt install exiftool``). +* preview: Create previews from a variety of files. + Note that support for various file formats also depends on what + system packages you've installed. You should at least install ``imagemagick`` + through your system's package manager (e.g. ``sudo apt install imagemagick``). + See [Preview Generator](https://github.com/algoo/preview-generator) for + more detailed instructions. +* features: Extract feature vectors from images. + ## Development @@ -23,9 +51,15 @@ Install bsie as editable from the git repository: $ git clone https://git.bsfs.io/bsie.git $ cd bsie - $ pip install -e . + $ pip install -e .[all] + +If you want to develop (*dev*), run the tests (*test*), edit the +documentation (*doc*), or build a distributable (*build*), +install bsfs with the respective extras (in addition to file format extras): -Install the following additional packages besides BSIE: + $ pip install -e .[dev,doc,build,test] + +Or, you can manually install the following packages besides BSIE: $ pip install coverage mypy pylint $ pip install rdflib requests types-PyYAML @@ -52,4 +86,3 @@ To build the documentation, run the following commands from the **doc folder**: $ make html $ xdg-open build/html/index.html - diff --git a/bsie/utils/loading.py b/bsie/utils/loading.py index 3c5c7c1..58202d1 100644 --- a/bsie/utils/loading.py +++ b/bsie/utils/loading.py @@ -22,14 +22,14 @@ def safe_load(module_name: str, class_name: str): module = importlib.import_module(module_name) except Exception as err: # cannot import module - raise errors.LoaderError(f'cannot load module {module_name}') from err + raise errors.LoaderError(f'cannot load module {module_name} ({err})') from err try: # get the class from the module cls = getattr(module, class_name) except Exception as err: # cannot find the class - raise errors.LoaderError(f'cannot load class {class_name} from module {module_name}') from err + raise errors.LoaderError(f'cannot load class {class_name} from module {module_name} ({err})') from err return cls diff --git a/doc/source/installation.rst b/doc/source/installation.rst index b634457..ee6fadb 100644 --- a/doc/source/installation.rst +++ b/doc/source/installation.rst @@ -2,15 +2,39 @@ Installation ============ -Installation ------------- +You can install *bsie* via pip. BSIE comes with support for various file formats. +For this, it needs to install many external packages. BSIE lets you control +which of these you want to install. Note that if you choose to not install +support for some file types, BSIE will show a warning and skip them. +All other formats will be processed normally. +It is recommended to install *bsie* in a virtual environment (via ``virtualenv``). -Install *bsie* via pip:: +To install only the minimally required software, use:: pip install --extra-index-url https://pip.bsfs.io bsie -This installs the `bsie` python package as well as the `bsie.app` command. -It is recommended to install *bsie* in a virtual environment (via `virtualenv`). +To install all dependencies, use the following shortcut:: + + pip install --extra-index-url https://pip.bsfs.io bsie[all] + +To install a subset of all dependencies, modify the extras part (``[image, preview]``) +of the follwing command to your liking:: + + pip install --extra-index-url https://pip.bsfs.io bsie[image,preview] + +Currently, BSIE providesthe following extra flags: + +* image: Read data from image files. + Note that you may also have to install ``exiftool`` through your system's + package manager (e.g. ``sudo apt install exiftool``). +* preview: Create previews from a variety of files. + Note that support for various file formats also depends on what + system packages you've installed. You should at least install ``imagemagick`` + through your system's package manager (e.g. ``sudo apt install imagemagick``). + See `Preview Generator `_ for + more detailed instructions. +* features: Extract feature vectors from images. + License diff --git a/setup.py b/setup.py index 43bdd32..b1f5b2c 100644 --- a/setup.py +++ b/setup.py @@ -2,6 +2,28 @@ from setuptools import setup, find_packages import os +extras = { + # NOTE: an 'all' extra is added automatically + 'features': [ + # image feature extractors + 'numpy', + ], + 'preview': [ + # preview readers + 'preview_generator', # also depends on some system packages + 'pillow', + 'rawpy', + ], + 'image': [ + # image readers + 'pillow', + 'rawpy', + # exif reader + 'pyexiv2', + ], + } + + setup( # package metadata name='bsie', @@ -9,14 +31,15 @@ setup( author='Matthias Baumgartner', author_email='dev@bsfs.io', description='Extract information from files and store them in a BSFS.', - long_description=open(os.path.join(os.path.dirname(__file__), 'README')).read(), + long_description=open(os.path.join(os.path.dirname(__file__), 'README.md')).read(), license='BSD', license_files=('LICENSE', ), url='https://www.bsfs.io/bsie/', download_url='https://pip.bsfs.io', # packages - packages=[p for p in find_packages() if p.startswith('bsie')], + packages=find_packages(include=['bsie']), + package_dir={'bsie': 'bsie'}, # data files are included if mentioned in MANIFEST.in include_package_data=True, @@ -31,18 +54,20 @@ setup( python_requires=">=3.7", install_requires=( 'bsfs', - # filematcher 'pyparsing', 'python-magic', + 'pyyaml', ), - extra_require=( - # image feature extractors - 'numpy', - # preview reader - 'preview_generator', # also depends on some system packages - # image reader - 'pillow', - 'rawpy', - ) -) + extras_require=dict( + # development targets + build=['build'], + dev=['coverage', 'mypy', 'pylint'], + doc=['sphinx', 'sphinx-copybutton', 'furo'], + test=['rdflib', 'requests', 'types-PyYAML'], + # add 'all' + all=list({pkg for ext in extras.values() for pkg in ext}), + # add extras + **extras + ), + ) -- cgit v1.2.3