Source code for xdev.util_path

import ubelt as ub
import os



[docs]
class ChDir:
    """
    Context manager that changes the current working directory and then
    returns you to where you were.

    Args:
        dpath (PathLike | None):
            The new directory to work in.
            If None, then the context manager is disabled.

    Example:
        >>> dpath = ub.Path.appdir('xdev/tests/chdir').ensuredir()
        >>> dir1 = (dpath / 'dir1').ensuredir()
        >>> dir2 = (dpath / 'dir2').ensuredir()
        >>> with ChDir(dpath):
        >>>     assert ub.Path.cwd() == dpath
        >>>     # changes to the given directory, and then returns back
        >>>     with ChDir(dir1):
        >>>         assert ub.Path.cwd() == dir1
        >>>         with ChDir(dir2):
        >>>             assert ub.Path.cwd() == dir2
        >>>             # changes inside the context manager will be reset
        >>>             os.chdir(dpath)
        >>>         assert ub.Path.cwd() == dir1
        >>>     assert ub.Path.cwd() == dpath
        >>>     with ChDir(dir1):
        >>>         assert ub.Path.cwd() == dir1
        >>>         with ChDir(None):
        >>>             assert ub.Path.cwd() == dir1
        >>>             # When disabled, the cwd does *not* reset at context exit
        >>>             os.chdir(dir2)
        >>>         assert ub.Path.cwd() == dir2
        >>>         os.chdir(dir1)
        >>>         # Dont change dirs, but reset to your cwd at context end
        >>>         with ChDir('.'):
        >>>             os.chdir(dir2)
        >>>         assert ub.Path.cwd() == dir1
        >>>     assert ub.Path.cwd() == dpath
    """
    def __init__(self, dpath):
        self.context_dpath = dpath
        self.orig_dpath = None

    def __enter__(self):
        if self.context_dpath is not None:
            self.orig_dpath = os.getcwd()
            os.chdir(self.context_dpath)
        return self

    def __exit__(self, a, b, c):
        if self.context_dpath is not None:
            os.chdir(self.orig_dpath)




[docs]
def sidecar_glob(main_pat, sidecar_ext, main_key='main', sidecar_key=None,
                 recursive=0):
    """
    Similar to a regular glob, but returns a dictionary with associated
    main-file / sidecar-file pairs.

    TODO:
        add as a general option to Pattern.paths?

    Args:
        main_pat (str | PathLike):
            glob pattern for the main non-sidecar file

    Yields:
        Dict[str, ub.Path | None]

    Notes:
        A sidecar file is defined by the sidecar extension. We usually use this
        for .dvc sidecars.

        When the pattern includes a .dvc suffix, the result will include those .dvc
        files and any matching main files they correspond to. Note: if you search
        for paths like `foo_*.dvc` this might skiped unstaged files. Therefore it
        is recommended to only include the .dvc suffix in the pattern ONLY if you
        do not want any unstaged files.

        If you want both staged and unstaged files, ensure the pattern does not
        exclude objects without a .dvc suffix (i.e. don't end the pattern with
        .dvc).

        When the pattern does not include a .dvc suffix, we include all those
        files, for other files that exist by adding a .dvc suffix.

        With the pattern matches both a dvc and non-dvc file, they are grouped
        together.

    Example:
        >>> from xdev.util_path import *  # NOQA
        >>> dpath = ub.Path.appdir('xdev/tests/sidecar_glob')
        >>> dpath.delete().ensuredir()
        >>> (dpath / 'file1').touch()
        >>> (dpath / 'file1.ext').touch()
        >>> (dpath / 'file1.ext.car').touch()
        >>> (dpath / 'file2.ext').touch()
        >>> (dpath / 'file3.ext.car').touch()
        >>> (dpath / 'file4.car').touch()
        >>> (dpath / 'file5').touch()
        >>> (dpath / 'file6').touch()
        >>> (dpath / 'file6.car').touch()
        >>> (dpath / 'file7.bike').touch()
        >>> def _handle_results(results):
        ...     results = list(results)
        ...     for row in results:
        ...         for k, v in row.items():
        ...             if v is not None:
        ...                 row[k] = v.relative_to(dpath)
        ...     print(ub.repr2(results, sv=1))
        ...     return results
        >>> main_key = 'main',
        >>> sidecar_key = '.car'
        >>> sidecar_ext = '.car'
        >>> main_pat = dpath / '*'
        >>> _handle_results(sidecar_glob(main_pat, sidecar_ext))
        >>> _handle_results(sidecar_glob(dpath / '*.ext', '.car'))
        >>> _handle_results(sidecar_glob(dpath / '*.car', '.car'))
        >>> _handle_results(sidecar_glob(dpath / 'file*.ext', '.car'))
        >>> _handle_results(sidecar_glob(dpath / '*', '.ext'))
    """
    from xdev import patterns as util_pattern
    import warnings
    import os
    _len_ext = len(sidecar_ext)
    main_pat = os.fspath(main_pat)
    glob_patterns = [main_pat]
    if main_pat.endswith(sidecar_ext):
        warnings.warn(
            'The main path query should not end with the sidecar extension.'
            ' {main_pat=} {sidecar_ext=}'
        )
        # We could have a variant that removes the extension, but lets not do
        # that and document it.
        # glob_patterns.append(pat[:-_len_ext])
    else:
        if main_pat.endswith('/*'):
            # Optimization dont need an extra pattern in this case
            pass
        else:
            glob_patterns.append(main_pat + sidecar_ext)

    mpat = util_pattern.MultiPattern.coerce(glob_patterns)
    if sidecar_key is None:
        sidecar_key = sidecar_ext
    default = {main_key: None, sidecar_key: None}
    id_to_row = ub.ddict(default.copy)
    paths = mpat.paths(recursive=recursive)

    def _gen():
        for path in paths:
            parent = path.parent
            name = path.name
            if name.endswith(sidecar_ext):
                this_key = sidecar_key
                other_key = main_key
                main_path = parent / name[:-_len_ext]
                other_path = main_path
            else:
                this_key = main_key
                other_key = sidecar_key
                main_path = path
                other_path = parent / (name + sidecar_ext)
            needs_yeild = main_path not in id_to_row
            row = id_to_row[main_path]
            row[this_key] = path
            if row[other_key] is None:
                if other_path.exists():
                    row[other_key] = other_path
            if needs_yeild:
                yield row
    # without this, yilded rows might modify themselves later, that is
    # confusing for a user. Don't do it or come up with a scheme where we
    # detect if a row is "complete" and only yeild it then
    # We could more easilly do this if we used a walk-style find and pattern
    # match mechanism
    rows = list(_gen())
    yield from rows




[docs]
def tree(path):
    """
    Like os.walk but yields a flat list of file and directory paths

    Args:
        path (str | os.PathLike): path to traverse

    Yields:
        str: path

    Example:
        >>> import itertools as it
        >>> import ubelt as ub
        >>> path = ub.Path('.')
        >>> gen = tree(path)
        >>> results = list(it.islice(gen, 5))
        >>> print('results = {}'.format(ub.repr2(results, nl=1)))
    """
    import os
    from os.path import join
    for r, fs, ds in os.walk(path):
        for f in fs:
            yield join(r, f)
        for d in ds:
            yield join(r, d)