Source code for xdev.search_replace

"""
Python implementations of sed, grep, and find

Porting from ~/local/rob/rob/rob_nav.py / ubelt
"""
from __future__ import absolute_import, division, print_function, unicode_literals
import os
import ubelt as ub
from os.path import relpath, split, join, abspath
from xdev.patterns import Pattern, RE_Pattern  # NOQA
from xdev.patterns import MultiPattern

# try:
#     from packaging.version import parse as parse_version
# except Exception:
#     from distutils.version import LooseVersion as parse_version


[docs] class GrepResult(ub.NiceRepr): """ Manage and format results from grep """ def __init__(self, fpath, pattern=None): self.pattern = pattern self.fpath = fpath self.found_lxs = [] self.found_lines = [] self.max_line = 100 def __nice__(self): return '{} in {}'.format(len(self), self.fpath) def __iter__(self): return iter(self.found_lines) def __len__(self): return len(self.found_lines)
[docs] def append(self, lx, line): self.found_lines.append(line) self.found_lxs.append(lx)
[docs] def format_text(self, color=True): summary = [] app = summary.append fname = ub.Path(self.fpath).name ndigits = str(len(str(self.max_line))) fmt_str = '{} : {:' + ndigits + 'd} |{}' ret = 'Found {} line(s) in {!r}: '.format(len(self), self.fpath) app('----------------------') color = 'red' app(ret) for (lx, line) in zip(self.found_lxs, self.found_lines): line = line.replace('\n', '') if color and self.pattern: found = self.pattern.search(line) s, t = found.span() line = line[:s] + ub.color_text(line[s:t], color) + line[t:] app(fmt_str.format(fname, lx, line)) return '\n'.join(summary)
[docs] def sed(regexpr, repl, dpath=None, include=None, exclude=None, dirblocklist=None, recursive=True, dry=False, verbose=1): r""" Execute a sed on multiple files. Args: regexpr (str | Pattern): pattern to find repl (str): the text to replace the found pattern with dpath (str | None): passed to :func:`find`. include (str | List[str] | MultiPattern | None): passed to :func:`find`. exclude (str | List[str] | MultiPattern | None): passed to :func:`find`. dirblocklist (str | List[str] | MultiPattern | None): passed to :func:`find`. recursive (bool): passed to :func:`find`. dry (bool): if True does not apply edits verbose (int): verbosity level Example: >>> from xdev.search_replace import * # NOQA >>> from xdev.search_replace import _create_test_filesystem >>> dpath = _create_test_filesystem()['root'] >>> sed('a', 'x', dpath=dpath, dry=True) """ num_changed = 0 num_files_checked = 0 num_skipped = 0 fpaths_changed = [] fpath_generator = find(dpath=dpath, type='f', include=include, exclude=exclude, dirblocklist=dirblocklist, recursive=recursive) for fpath in fpath_generator: try: changed_lines = sedfile(fpath, regexpr, repl, dry=dry) except UnicodeDecodeError: num_skipped += 1 else: num_files_checked += 1 if len(changed_lines) > 0: fpaths_changed.append(fpath) num_changed += len(changed_lines) if verbose: print('num_files_checked = {}'.format(num_files_checked)) print('num probable binary files skipped = {}'.format(num_skipped)) print('fpaths_changed = {}'.format(ub.repr2(sorted(fpaths_changed)))) print('total lines changed = {!r}'.format(num_changed))
[docs] def grep(regexpr, dpath=None, include=None, exclude=None, recursive=True, dirblocklist=None, verbose=1): r""" Execute a grep on multiple files. Args: regexpr (str | Pattern): pattern to find dpath (str | None): passed to :func:`find`. include (str | List[str] | MultiPattern | None): passed to :func:`find`. exclude (str | List[str] | MultiPattern | None): passed to :func:`find`. recursive (bool): passed to :func:`find`. dirblocklist (str | List[str] | MultiPattern | None): passed to :func:`find`. verbose (int): verbosity level Returns: List[GrepResult]: Example: >>> from xdev.search_replace import * # NOQA >>> from xdev.search_replace import _create_test_filesystem >>> dpath = _create_test_filesystem()['root'] >>> grep('a', dpath=dpath) """ grep_results = [] fpath_generator = find(dpath=dpath, type='f', include=include, exclude=exclude, recursive=recursive, dirblocklist=dirblocklist) for fpath in fpath_generator: grepres = grepfile(fpath, regexpr, verbose=verbose) if grepres: grep_results.append(grepres) if verbose: print('====================') print('====================') found_fpaths = [r.fpath for r in grep_results] print('\n'.join(found_fpaths)) return grep_results
[docs] def find(pattern=None, dpath=None, include=None, exclude=None, dirblocklist=None, type=None, recursive=True, followlinks=False): """ Find all paths in a root subject to a search criterion Args: pattern (str | Pattern | None): The glob pattern the path name must match to be returned dpath (str | Pattern | None): The root directory to search. Can also be a filepath, in which case, that is the only filepath considered. NOTE: in the future, this argument may change to `path` to indicate specifying a filepath is allowed. Defaults to cwd. include (str | List[str] | MultiPattern | None): Pattern or list of patterns. If specified, search only files whose base name matches this pattern. By default the pattern is GLOB. This only applies to the final name. Directories that do not match this name will still be traversed. exclude (str | List[str] | MultiPattern | None): Pattern or list of patterns. Skip any file with a name suffix that matches the pattern. By default the pattern is GLOB. This ONLY applies to the final name. Directories that match an exclude pattern will still be traversed. Use ``dirblocklist`` to specify patterns to exclude intermediate directories from traversal. dirblocklist (str | List[str] | MultiPattern | None): Any directory name matching this pattern will be removed from traversal. type (str | List[str] | None): A list of 1 character codes indicating what types of file can be returned. Currently we only allow either "f" for file or "d" for directory. Symbolic links are not currently distinguished. In the future we may support posix codes, see [1]_ for details. recursive: search all subdirectories recursively followlinks (bool, default=False): if True will follow directory symlinks References: _[1] https://linuxconfig.org/identifying-file-types-in-linux TODO: mindepth maxdepth ignore_case regex_match Example: >>> from xdev.search_replace import * # NOQA >>> from xdev.search_replace import _create_test_filesystem >>> dpath = _create_test_filesystem()['root'] >>> paths = list(find(pattern='*', dpath=dpath)) >>> assert len(paths) == 5 >>> paths = list(find(pattern='*', dpath=dpath, type='f')) >>> assert len(paths) == 4 """ if pattern is None: pattern = '*' if type is None: with_dirs = True with_files = True else: with_dirs = False with_files = False if 'd' in type: with_dirs = True if 'f' in type: with_files = True if dpath is None: dpath = '.' # os.getcwd() # Define helper for checking inclusion / exclusion include = None if include is None else MultiPattern.coerce(include, hint='glob') exclude = None if exclude is None else MultiPattern.coerce(exclude, hint='glob') dirblocklist = None if dirblocklist is None else MultiPattern.coerce(dirblocklist, hint='glob') main_pattern = Pattern.coerce(pattern, hint='glob') def is_included(name): if not main_pattern.match(name): return False if exclude is not None and exclude.match(name): return False if include is None or include.match(name): return True return False if os.path.isfile(dpath): # Spoof walk output when dpath is given as a file path root = os.path.dirname(dpath) dnames = [] fnames = [os.path.basename(dpath)] walkgen = [(root, dnames, fnames)] else: walkgen = os.walk(dpath, followlinks=followlinks) for root, dnames, fnames in walkgen: if dirblocklist is not None: dnames[:] = [ dname for dname in dnames if not dirblocklist.match(dname)] if with_files: for fname in fnames: if is_included(fname): yield join(root, fname) if with_dirs: for dname in dnames: if is_included(dname): yield join(root, dname) if not recursive: break
[docs] def sedfile(fpath, regexpr, repl, dry=False, verbose=1): r""" Execute a search and replace on a particular file Args: fpath (str | PathLike): file to search / replace on regexpr (str | Pattern): pattern to find repl (str): the text to replace the found pattern with dry (bool): if True does not apply edits verbose (int): verbosity level Returns: List[Tuple[str, str]]: changed lines TODO: - [ ] Store "SedResult" class, with lazy execution Example: >>> from xdev.search_replace import * # NOQA >>> from xdev.search_replace import _create_test_filesystem >>> fpath = _create_test_filesystem()['contents'][1] >>> changed_lines1 = sedfile(fpath, 'a', 'x', dry=True, verbose=1) >>> changed_lines2 = sedfile(fpath, 'a', 'x', dry=False, verbose=0) >>> assert changed_lines2 == changed_lines1 >>> changed_lines3 = sedfile(fpath, 'a', 'x', dry=False, verbose=0) >>> assert changed_lines3 != changed_lines2 """ import xdev mode_text = ['(real-run)', '(dry-run)'][dry] pattern = Pattern.coerce(regexpr, hint='regex') # print(f'regexpr={regexpr}') # print(f'pattern={pattern!r}') path, name = split(fpath) new_file_lines = [] try: with open(fpath, 'r') as file: file_lines = file.readlines() # Search each line for the desired regexpr new_file_lines = [pattern.sub(repl, line) for line in file_lines] except UnicodeDecodeError as ex: # Add the file name into the exception new_last_arg = ex.args[-1] + ' in fpath={!r}'.format(fpath) new_args = ex.args[:-1] + (new_last_arg,) raise UnicodeDecodeError(*new_args) from ex except Exception: raise # This does not preserve exception type # raise Exception('Failed to sedfile fpath = {!r}'.format(fpath)) from ex changed_lines = [(newline, line) for newline, line in zip(new_file_lines, file_lines) if newline != line] nChanged = len(changed_lines) if nChanged > 0: try: rel_fpath = relpath(fpath, os.getcwd()) except ValueError: # windows issues rel_fpath = abspath(fpath) if verbose: print(' * {} changed {} lines in {!r} '.format( mode_text, nChanged, rel_fpath)) print(' * --------------------') new_file = ''.join(new_file_lines) old_file = ub.ensure_unicode( ''.join(list(map(ub.ensure_unicode, file_lines)))) if verbose: print(xdev.difftext(old_file, new_file, colored=True)) if not dry: if verbose: print(' ! WRITING CHANGES') with open(fpath, 'w') as file: file.write(new_file) return changed_lines return []
[docs] def grepfile(fpath, regexpr, verbose=1): r""" Exceute grep on a single file Args: fpath (str | PathLike): file to search regexpr (str | Pattern): pattern to find verbose (int): verbosity level Returns: None | GrepResult Example: >>> from xdev.search_replace import * # NOQA >>> from xdev.search_replace import _create_test_filesystem >>> fpath = _create_test_filesystem()['contents'][1] >>> grep_result = grepfile(fpath, r'\bb\b') >>> print('grep_result = {}'.format(grep_result)) """ grep_result = None pattern = Pattern.coerce(regexpr, hint='regex') with open(fpath, 'r') as file: try: lines = file.readlines() except UnicodeDecodeError: print("UNABLE TO READ fpath={}".format(fpath)) else: grep_result = GrepResult(fpath, pattern) grep_result.max_line = len(lines) # Search each line for the desired pattern for lx, line in enumerate(lines): match_object = pattern.search(line) if match_object: grep_result.append(lx, line) # Print the results (if any) if verbose: if len(grep_result): print(grep_result.format_text()) return grep_result
[docs] def greptext(text, regexpr, fpath=None, verbose=1): r""" Exceute grep on text Args: text (str): text to search regexpr (str | Pattern): pattern to find verbose (int): verbosity level Returns: None | GrepResult """ from xdev.patterns import Pattern # from xdev.search_replace import GrepResult grep_result = None pattern = Pattern.coerce(regexpr, hint='regex') fpath = '<text>' try: lines = text.splitlines() except UnicodeDecodeError: print("UNABLE TO READ fpath={}".format(fpath)) else: grep_result = GrepResult(fpath, pattern) grep_result.max_line = len(lines) # Search each line for the desired pattern for lx, line in enumerate(lines): match_object = pattern.search(line) if match_object: grep_result.append(lx, line) # Print the results (if any) if verbose: if len(grep_result): print(grep_result.format_text()) return grep_result
[docs] def _create_test_filesystem(): dpath = ub.ensure_app_cache_dir('xdev/test_search_replace') text1 = ub.paragraph( ''' Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do eiusmod tempor incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam, quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo consequat. Duis aute irure dolor in reprehenderit in voluptate velit esse cillum dolore eu fugiat nulla pariatur. Excepteur sint occaecat cupidatat non proident, sunt in culpa qui officia deserunt mollit anim id est laborum. ''') text2 = ub.codeblock( ''' def fib(n): a, b = 0, 1 while a < n: print(a, end=' ') a, b = b, a+b print() fib(1000) ''') text3 = ub.codeblock( ''' This file contains Lorem and fib Newlines fib lorem fib ''') text4 = '' fpath1 = join(dpath, 'lorium.txt') fpath2 = join(dpath, 'fib.py') fpath3 = join(dpath, 'foo.txt') fpath4 = join(ub.ensuredir((dpath, 'subdir')), 'foo.txt') with open(fpath1, 'w') as file: file.write(text1) with open(fpath2, 'w') as file: file.write(text2) with open(fpath3, 'w') as file: file.write(text3) with open(fpath4, 'w') as file: file.write(text4) info = { 'root': dpath, 'contents': [fpath1, fpath2, fpath3], } return info
if __name__ == '__main__': """ CommandLine: python ~/code/xdev/xdev/search_replace.py xdoctest ~/code/xdev/xdev/search_replace.py """ import xdoctest xdoctest.doctest_module(__file__)