Source code for xdev.util_path

import ubelt as ub
import os


[docs] class ChDir: """ Context manager that changes the current working directory and then returns you to where you were. Args: dpath (PathLike | None): The new directory to work in. If None, then the context manager is disabled. Example: >>> dpath = ub.Path.appdir('xdev/tests/chdir').ensuredir() >>> dir1 = (dpath / 'dir1').ensuredir() >>> dir2 = (dpath / 'dir2').ensuredir() >>> with ChDir(dpath): >>> assert ub.Path.cwd() == dpath >>> # changes to the given directory, and then returns back >>> with ChDir(dir1): >>> assert ub.Path.cwd() == dir1 >>> with ChDir(dir2): >>> assert ub.Path.cwd() == dir2 >>> # changes inside the context manager will be reset >>> os.chdir(dpath) >>> assert ub.Path.cwd() == dir1 >>> assert ub.Path.cwd() == dpath >>> with ChDir(dir1): >>> assert ub.Path.cwd() == dir1 >>> with ChDir(None): >>> assert ub.Path.cwd() == dir1 >>> # When disabled, the cwd does *not* reset at context exit >>> os.chdir(dir2) >>> assert ub.Path.cwd() == dir2 >>> os.chdir(dir1) >>> # Dont change dirs, but reset to your cwd at context end >>> with ChDir('.'): >>> os.chdir(dir2) >>> assert ub.Path.cwd() == dir1 >>> assert ub.Path.cwd() == dpath """ def __init__(self, dpath): self.context_dpath = dpath self.orig_dpath = None def __enter__(self): if self.context_dpath is not None: self.orig_dpath = os.getcwd() os.chdir(self.context_dpath) return self def __exit__(self, a, b, c): if self.context_dpath is not None: os.chdir(self.orig_dpath)
[docs] def sidecar_glob(main_pat, sidecar_ext, main_key='main', sidecar_key=None, recursive=0): """ Similar to a regular glob, but returns a dictionary with associated main-file / sidecar-file pairs. TODO: add as a general option to Pattern.paths? Args: main_pat (str | PathLike): glob pattern for the main non-sidecar file Yields: Dict[str, ub.Path | None] Notes: A sidecar file is defined by the sidecar extension. We usually use this for .dvc sidecars. When the pattern includes a .dvc suffix, the result will include those .dvc files and any matching main files they correspond to. Note: if you search for paths like `foo_*.dvc` this might skiped unstaged files. Therefore it is recommended to only include the .dvc suffix in the pattern ONLY if you do not want any unstaged files. If you want both staged and unstaged files, ensure the pattern does not exclude objects without a .dvc suffix (i.e. don't end the pattern with .dvc). When the pattern does not include a .dvc suffix, we include all those files, for other files that exist by adding a .dvc suffix. With the pattern matches both a dvc and non-dvc file, they are grouped together. Example: >>> from xdev.util_path import * # NOQA >>> dpath = ub.Path.appdir('xdev/tests/sidecar_glob') >>> dpath.delete().ensuredir() >>> (dpath / 'file1').touch() >>> (dpath / 'file1.ext').touch() >>> (dpath / 'file1.ext.car').touch() >>> (dpath / 'file2.ext').touch() >>> (dpath / 'file3.ext.car').touch() >>> (dpath / 'file4.car').touch() >>> (dpath / 'file5').touch() >>> (dpath / 'file6').touch() >>> (dpath / 'file6.car').touch() >>> (dpath / 'file7.bike').touch() >>> def _handle_results(results): ... results = list(results) ... for row in results: ... for k, v in row.items(): ... if v is not None: ... row[k] = v.relative_to(dpath) ... print(ub.repr2(results, sv=1)) ... return results >>> main_key = 'main', >>> sidecar_key = '.car' >>> sidecar_ext = '.car' >>> main_pat = dpath / '*' >>> _handle_results(sidecar_glob(main_pat, sidecar_ext)) >>> _handle_results(sidecar_glob(dpath / '*.ext', '.car')) >>> _handle_results(sidecar_glob(dpath / '*.car', '.car')) >>> _handle_results(sidecar_glob(dpath / 'file*.ext', '.car')) >>> _handle_results(sidecar_glob(dpath / '*', '.ext')) """ from xdev import patterns as util_pattern import warnings import os _len_ext = len(sidecar_ext) main_pat = os.fspath(main_pat) glob_patterns = [main_pat] if main_pat.endswith(sidecar_ext): warnings.warn( 'The main path query should not end with the sidecar extension.' ' {main_pat=} {sidecar_ext=}' ) # We could have a variant that removes the extension, but lets not do # that and document it. # glob_patterns.append(pat[:-_len_ext]) else: if main_pat.endswith('/*'): # Optimization dont need an extra pattern in this case pass else: glob_patterns.append(main_pat + sidecar_ext) mpat = util_pattern.MultiPattern.coerce(glob_patterns) if sidecar_key is None: sidecar_key = sidecar_ext default = {main_key: None, sidecar_key: None} id_to_row = ub.ddict(default.copy) paths = mpat.paths(recursive=recursive) def _gen(): for path in paths: parent = path.parent name = path.name if name.endswith(sidecar_ext): this_key = sidecar_key other_key = main_key main_path = parent / name[:-_len_ext] other_path = main_path else: this_key = main_key other_key = sidecar_key main_path = path other_path = parent / (name + sidecar_ext) needs_yeild = main_path not in id_to_row row = id_to_row[main_path] row[this_key] = path if row[other_key] is None: if other_path.exists(): row[other_key] = other_path if needs_yeild: yield row # without this, yilded rows might modify themselves later, that is # confusing for a user. Don't do it or come up with a scheme where we # detect if a row is "complete" and only yeild it then # We could more easilly do this if we used a walk-style find and pattern # match mechanism rows = list(_gen()) yield from rows
[docs] def tree(path): """ Like os.walk but yields a flat list of file and directory paths Args: path (str | os.PathLike): path to traverse Yields: str: path Example: >>> import itertools as it >>> import ubelt as ub >>> path = ub.Path('.') >>> gen = tree(path) >>> results = list(it.islice(gen, 5)) >>> print('results = {}'.format(ub.repr2(results, nl=1))) """ import os from os.path import join for r, fs, ds in os.walk(path): for f in fs: yield join(r, f) for d in ds: yield join(r, d)