aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--bsie/apps/index.py21
-rw-r--r--bsie/utils/__init__.py1
-rw-r--r--bsie/utils/filewalker.py31
-rw-r--r--test/utils/test_filewalker.py125
4 files changed, 161 insertions, 17 deletions
diff --git a/bsie/apps/index.py b/bsie/apps/index.py
index ef467bd..7dda6f4 100644
--- a/bsie/apps/index.py
+++ b/bsie/apps/index.py
@@ -6,7 +6,7 @@ import typing
# bsie imports
from bsie.lib import BSIE, DefaultNamingPolicy
-from bsie.utils import bsfs, errors, node as node_
+from bsie.utils import bsfs, errors, node as node_, list_files
# inner-module imports
from . import _loader
@@ -59,22 +59,9 @@ def main(argv):
# FIXME: simplify code (below but maybe also above)
# FIXME: How to handle dependencies between data?
# E.g. do I still want to link to a tag despite not being permitted to set its label?
-
- # index input paths
- for path in args.input_file:
- if not os.path.exists(path):
- pass # FIXME: notify the user
- elif os.path.isdir(path) and args.recursive:
- for dirpath, _, filenames in os.walk(path, topdown=True, followlinks=args.follow):
- for filename in filenames:
- for node, pred, value in bsie.from_file(os.path.join(dirpath, filename)):
- handle(node, pred, value)
- elif os.path.isfile(path):
- for node, pred, value in bsie.from_file(path):
- handle(node, pred, value)
- else:
- raise errors.UnreachableError()
-
+ for path in list_files(args.input_file, args.recursive, args.follow):
+ for node, pred, value in bsie.from_file(path):
+ handle(node, pred, value)
if args.print:
walk(print)
diff --git a/bsie/utils/__init__.py b/bsie/utils/__init__.py
index 18c8db7..4f08604 100644
--- a/bsie/utils/__init__.py
+++ b/bsie/utils/__init__.py
@@ -8,6 +8,7 @@ from . import bsfs
from . import filematcher
from . import namespaces as ns
from . import node
+from .filewalker import list_files
from .loading import safe_load, unpack_qualified_name
# exports
diff --git a/bsie/utils/filewalker.py b/bsie/utils/filewalker.py
new file mode 100644
index 0000000..3c36926
--- /dev/null
+++ b/bsie/utils/filewalker.py
@@ -0,0 +1,31 @@
+
+# standard imports
+import os
+import typing
+
+# exports
+__all__: typing.Sequence[str] = (
+ 'list_files',
+ )
+
+
+## code ##
+
+def list_files(
+ roots: typing.Iterable[str],
+ recursive: bool = True,
+ follow_symlinks: bool = True,
+ ) -> typing.Iterator[str]:
+ """Iterate over all files in *roots*, recursively by default."""
+ # index input paths
+ for path in roots:
+ if not os.path.exists(path):
+ continue
+ elif os.path.isdir(path) and recursive:
+ for dirpath, _, filenames in os.walk(path, topdown=True, followlinks=follow_symlinks):
+ for filename in filenames:
+ yield os.path.join(dirpath, filename)
+ elif os.path.isfile(path):
+ yield path
+
+## EOF ##
diff --git a/test/utils/test_filewalker.py b/test/utils/test_filewalker.py
new file mode 100644
index 0000000..4aaba65
--- /dev/null
+++ b/test/utils/test_filewalker.py
@@ -0,0 +1,125 @@
+
+# standard imports
+import os
+import shutil
+import tempfile
+import unittest
+
+# objects to test
+from bsie.utils.filewalker import list_files
+
+
+## code ##
+
+def touch(path, text='<test content>'):
+ # create folders
+ os.makedirs(os.path.dirname(path), exist_ok=True)
+ # create file
+ with open(path, 'wt') as ofile:
+ ofile.write(text)
+
+class TestListFiles(unittest.TestCase):
+ def setUp(self):
+ # set up directory structure
+ # <root>
+ # - zero*
+ # - foo
+ # - hello*
+ # - remote -> foobar/xyz
+ # - bar
+ # - world*
+ # - xyz*
+ # - bar
+ # - fst
+ # - abc*
+ # - zyx*
+ # - snd
+ # - cba*
+ # - xyz*
+ # - foobar
+ # - xyz*
+ # - hello*
+ # - world -> bar/snd
+ self.testdir = tempfile.mkdtemp(prefix='bsie-test-')
+ touch(os.path.join(self.testdir, 'zero'))
+ touch(os.path.join(self.testdir, 'foo', 'hello'))
+ touch(os.path.join(self.testdir, 'foo', 'bar', 'world'))
+ touch(os.path.join(self.testdir, 'foo', 'bar', 'xyz'))
+ touch(os.path.join(self.testdir, 'bar', 'fst', 'abc'))
+ touch(os.path.join(self.testdir, 'bar', 'fst', 'zyx'))
+ touch(os.path.join(self.testdir, 'bar', 'snd', 'cba'))
+ touch(os.path.join(self.testdir, 'bar', 'snd', 'xyz'))
+ touch(os.path.join(self.testdir, 'foobar', 'xyz'))
+ touch(os.path.join(self.testdir, 'foobar', 'hello'))
+ os.symlink(
+ os.path.join(self.testdir, 'bar', 'snd'),
+ os.path.join(self.testdir, 'foobar', 'world'))
+ os.symlink(
+ os.path.join(self.testdir, 'foobar', 'xyz'),
+ os.path.join(self.testdir, 'foo', 'remote'))
+
+ def tearDown(self):
+ # remove testing dirs
+ shutil.rmtree(self.testdir, ignore_errors=True)
+
+ def test_list_files(self):
+ # list_files lists all files beneath root
+ roots = [
+ os.path.join(self.testdir, 'foo'),
+ os.path.join(self.testdir, 'bar'),
+ os.path.join(self.testdir, 'foobar'),
+ os.path.join(self.testdir, 'zero'),
+ ]
+ self.assertSetEqual(set(list_files(roots, recursive=True, follow_symlinks=True)), {
+ os.path.join(self.testdir, 'bar', 'fst', 'abc'),
+ os.path.join(self.testdir, 'bar', 'fst', 'zyx'),
+ os.path.join(self.testdir, 'bar', 'snd', 'cba'),
+ os.path.join(self.testdir, 'bar', 'snd', 'xyz'),
+ os.path.join(self.testdir, 'foo', 'bar', 'world'),
+ os.path.join(self.testdir, 'foo', 'bar', 'xyz'),
+ os.path.join(self.testdir, 'foo', 'hello'),
+ os.path.join(self.testdir, 'foo', 'remote'),
+ os.path.join(self.testdir, 'foobar', 'hello'),
+ os.path.join(self.testdir, 'foobar', 'world', 'cba'),
+ os.path.join(self.testdir, 'foobar', 'world', 'xyz'),
+ os.path.join(self.testdir, 'foobar', 'xyz'),
+ os.path.join(self.testdir, 'zero'),
+ })
+
+ # list_files lists respects root
+ self.assertSetEqual(set(list_files(
+ roots=[os.path.join(self.testdir, 'foo')], recursive=True, follow_symlinks=True)), {
+ os.path.join(self.testdir, 'foo', 'bar', 'world'),
+ os.path.join(self.testdir, 'foo', 'bar', 'xyz'),
+ os.path.join(self.testdir, 'foo', 'hello'),
+ os.path.join(self.testdir, 'foo', 'remote'),
+ })
+
+ # list_files lists respects recursive flag (lists only files in root!)
+ self.assertSetEqual(set(list_files(roots, recursive=False, follow_symlinks=True)), {
+ os.path.join(self.testdir, 'zero'),
+ })
+
+ # list_files lists respects symlink flag
+ # lists symlinked files but does not dive into symlinked folders
+ self.assertSetEqual(set(list_files(roots, recursive=True, follow_symlinks=False)), {
+ os.path.join(self.testdir, 'bar', 'fst', 'abc'),
+ os.path.join(self.testdir, 'bar', 'fst', 'zyx'),
+ os.path.join(self.testdir, 'bar', 'snd', 'cba'),
+ os.path.join(self.testdir, 'bar', 'snd', 'xyz'),
+ os.path.join(self.testdir, 'foo', 'bar', 'world'),
+ os.path.join(self.testdir, 'foo', 'bar', 'xyz'),
+ os.path.join(self.testdir, 'foo', 'hello'),
+ os.path.join(self.testdir, 'foo', 'remote'),
+ os.path.join(self.testdir, 'foobar', 'hello'),
+ os.path.join(self.testdir, 'foobar', 'xyz'),
+ os.path.join(self.testdir, 'zero'),
+ })
+
+
+## main ##
+
+if __name__ == '__main__':
+ unittest.main()
+
+## EOF ##