diff options
author | Matthias Baumgartner <dev@igsor.net> | 2023-04-05 17:45:25 +0200 |
---|---|---|
committer | Matthias Baumgartner <dev@igsor.net> | 2023-04-05 17:45:25 +0200 |
commit | aefd0cb4fa1a949beabc51e88a5c46843043a439 (patch) | |
tree | e978249655fcab58f9ee1479c268ca8b06af7e8d | |
parent | 0b6b1d27756d1c02a2a667ebfc1a119081ff079f (diff) | |
download | bsie-aefd0cb4fa1a949beabc51e88a5c46843043a439.tar.gz bsie-aefd0cb4fa1a949beabc51e88a5c46843043a439.tar.bz2 bsie-aefd0cb4fa1a949beabc51e88a5c46843043a439.zip |
move file walker into its own module
-rw-r--r-- | bsie/apps/index.py | 21 | ||||
-rw-r--r-- | bsie/utils/__init__.py | 1 | ||||
-rw-r--r-- | bsie/utils/filewalker.py | 31 | ||||
-rw-r--r-- | test/utils/test_filewalker.py | 125 |
4 files changed, 161 insertions, 17 deletions
diff --git a/bsie/apps/index.py b/bsie/apps/index.py index ef467bd..7dda6f4 100644 --- a/bsie/apps/index.py +++ b/bsie/apps/index.py @@ -6,7 +6,7 @@ import typing # bsie imports from bsie.lib import BSIE, DefaultNamingPolicy -from bsie.utils import bsfs, errors, node as node_ +from bsie.utils import bsfs, errors, node as node_, list_files # inner-module imports from . import _loader @@ -59,22 +59,9 @@ def main(argv): # FIXME: simplify code (below but maybe also above) # FIXME: How to handle dependencies between data? # E.g. do I still want to link to a tag despite not being permitted to set its label? - - # index input paths - for path in args.input_file: - if not os.path.exists(path): - pass # FIXME: notify the user - elif os.path.isdir(path) and args.recursive: - for dirpath, _, filenames in os.walk(path, topdown=True, followlinks=args.follow): - for filename in filenames: - for node, pred, value in bsie.from_file(os.path.join(dirpath, filename)): - handle(node, pred, value) - elif os.path.isfile(path): - for node, pred, value in bsie.from_file(path): - handle(node, pred, value) - else: - raise errors.UnreachableError() - + for path in list_files(args.input_file, args.recursive, args.follow): + for node, pred, value in bsie.from_file(path): + handle(node, pred, value) if args.print: walk(print) diff --git a/bsie/utils/__init__.py b/bsie/utils/__init__.py index 18c8db7..4f08604 100644 --- a/bsie/utils/__init__.py +++ b/bsie/utils/__init__.py @@ -8,6 +8,7 @@ from . import bsfs from . import filematcher from . import namespaces as ns from . import node +from .filewalker import list_files from .loading import safe_load, unpack_qualified_name # exports diff --git a/bsie/utils/filewalker.py b/bsie/utils/filewalker.py new file mode 100644 index 0000000..3c36926 --- /dev/null +++ b/bsie/utils/filewalker.py @@ -0,0 +1,31 @@ + +# standard imports +import os +import typing + +# exports +__all__: typing.Sequence[str] = ( + 'list_files', + ) + + +## code ## + +def list_files( + roots: typing.Iterable[str], + recursive: bool = True, + follow_symlinks: bool = True, + ) -> typing.Iterator[str]: + """Iterate over all files in *roots*, recursively by default.""" + # index input paths + for path in roots: + if not os.path.exists(path): + continue + elif os.path.isdir(path) and recursive: + for dirpath, _, filenames in os.walk(path, topdown=True, followlinks=follow_symlinks): + for filename in filenames: + yield os.path.join(dirpath, filename) + elif os.path.isfile(path): + yield path + +## EOF ## diff --git a/test/utils/test_filewalker.py b/test/utils/test_filewalker.py new file mode 100644 index 0000000..4aaba65 --- /dev/null +++ b/test/utils/test_filewalker.py @@ -0,0 +1,125 @@ + +# standard imports +import os +import shutil +import tempfile +import unittest + +# objects to test +from bsie.utils.filewalker import list_files + + +## code ## + +def touch(path, text='<test content>'): + # create folders + os.makedirs(os.path.dirname(path), exist_ok=True) + # create file + with open(path, 'wt') as ofile: + ofile.write(text) + +class TestListFiles(unittest.TestCase): + def setUp(self): + # set up directory structure + # <root> + # - zero* + # - foo + # - hello* + # - remote -> foobar/xyz + # - bar + # - world* + # - xyz* + # - bar + # - fst + # - abc* + # - zyx* + # - snd + # - cba* + # - xyz* + # - foobar + # - xyz* + # - hello* + # - world -> bar/snd + self.testdir = tempfile.mkdtemp(prefix='bsie-test-') + touch(os.path.join(self.testdir, 'zero')) + touch(os.path.join(self.testdir, 'foo', 'hello')) + touch(os.path.join(self.testdir, 'foo', 'bar', 'world')) + touch(os.path.join(self.testdir, 'foo', 'bar', 'xyz')) + touch(os.path.join(self.testdir, 'bar', 'fst', 'abc')) + touch(os.path.join(self.testdir, 'bar', 'fst', 'zyx')) + touch(os.path.join(self.testdir, 'bar', 'snd', 'cba')) + touch(os.path.join(self.testdir, 'bar', 'snd', 'xyz')) + touch(os.path.join(self.testdir, 'foobar', 'xyz')) + touch(os.path.join(self.testdir, 'foobar', 'hello')) + os.symlink( + os.path.join(self.testdir, 'bar', 'snd'), + os.path.join(self.testdir, 'foobar', 'world')) + os.symlink( + os.path.join(self.testdir, 'foobar', 'xyz'), + os.path.join(self.testdir, 'foo', 'remote')) + + def tearDown(self): + # remove testing dirs + shutil.rmtree(self.testdir, ignore_errors=True) + + def test_list_files(self): + # list_files lists all files beneath root + roots = [ + os.path.join(self.testdir, 'foo'), + os.path.join(self.testdir, 'bar'), + os.path.join(self.testdir, 'foobar'), + os.path.join(self.testdir, 'zero'), + ] + self.assertSetEqual(set(list_files(roots, recursive=True, follow_symlinks=True)), { + os.path.join(self.testdir, 'bar', 'fst', 'abc'), + os.path.join(self.testdir, 'bar', 'fst', 'zyx'), + os.path.join(self.testdir, 'bar', 'snd', 'cba'), + os.path.join(self.testdir, 'bar', 'snd', 'xyz'), + os.path.join(self.testdir, 'foo', 'bar', 'world'), + os.path.join(self.testdir, 'foo', 'bar', 'xyz'), + os.path.join(self.testdir, 'foo', 'hello'), + os.path.join(self.testdir, 'foo', 'remote'), + os.path.join(self.testdir, 'foobar', 'hello'), + os.path.join(self.testdir, 'foobar', 'world', 'cba'), + os.path.join(self.testdir, 'foobar', 'world', 'xyz'), + os.path.join(self.testdir, 'foobar', 'xyz'), + os.path.join(self.testdir, 'zero'), + }) + + # list_files lists respects root + self.assertSetEqual(set(list_files( + roots=[os.path.join(self.testdir, 'foo')], recursive=True, follow_symlinks=True)), { + os.path.join(self.testdir, 'foo', 'bar', 'world'), + os.path.join(self.testdir, 'foo', 'bar', 'xyz'), + os.path.join(self.testdir, 'foo', 'hello'), + os.path.join(self.testdir, 'foo', 'remote'), + }) + + # list_files lists respects recursive flag (lists only files in root!) + self.assertSetEqual(set(list_files(roots, recursive=False, follow_symlinks=True)), { + os.path.join(self.testdir, 'zero'), + }) + + # list_files lists respects symlink flag + # lists symlinked files but does not dive into symlinked folders + self.assertSetEqual(set(list_files(roots, recursive=True, follow_symlinks=False)), { + os.path.join(self.testdir, 'bar', 'fst', 'abc'), + os.path.join(self.testdir, 'bar', 'fst', 'zyx'), + os.path.join(self.testdir, 'bar', 'snd', 'cba'), + os.path.join(self.testdir, 'bar', 'snd', 'xyz'), + os.path.join(self.testdir, 'foo', 'bar', 'world'), + os.path.join(self.testdir, 'foo', 'bar', 'xyz'), + os.path.join(self.testdir, 'foo', 'hello'), + os.path.join(self.testdir, 'foo', 'remote'), + os.path.join(self.testdir, 'foobar', 'hello'), + os.path.join(self.testdir, 'foobar', 'xyz'), + os.path.join(self.testdir, 'zero'), + }) + + +## main ## + +if __name__ == '__main__': + unittest.main() + +## EOF ## |