"""
Python implementations of sed, grep, and find
Porting from ~/local/rob/rob/rob_nav.py / ubelt
"""
from __future__ import absolute_import, division, print_function, unicode_literals
import os
import ubelt as ub
from os.path import relpath, split, join, abspath
from xdev.patterns import Pattern, RE_Pattern # NOQA
from xdev.patterns import MultiPattern
# try:
# from packaging.version import parse as parse_version
# except Exception:
# from distutils.version import LooseVersion as parse_version
[docs]
class GrepResult(ub.NiceRepr):
"""
Manage and format results from grep
"""
def __init__(self, fpath, pattern=None):
self.pattern = pattern
self.fpath = fpath
self.found_lxs = []
self.found_lines = []
self.max_line = 100
def __nice__(self):
return '{} in {}'.format(len(self), self.fpath)
def __iter__(self):
return iter(self.found_lines)
def __len__(self):
return len(self.found_lines)
[docs]
def append(self, lx, line):
self.found_lines.append(line)
self.found_lxs.append(lx)
[docs]
def format_text(self, color=True):
summary = []
app = summary.append
fname = ub.Path(self.fpath).name
ndigits = str(len(str(self.max_line)))
fmt_str = '{} : {:' + ndigits + 'd} |{}'
ret = 'Found {} line(s) in {!r}: '.format(len(self), self.fpath)
app('----------------------')
color = 'red'
app(ret)
for (lx, line) in zip(self.found_lxs, self.found_lines):
line = line.replace('\n', '')
if color and self.pattern:
found = self.pattern.search(line)
s, t = found.span()
line = line[:s] + ub.color_text(line[s:t], color) + line[t:]
app(fmt_str.format(fname, lx, line))
return '\n'.join(summary)
[docs]
def sed(regexpr, repl, dpath=None, include=None, exclude=None,
dirblocklist=None, recursive=True, dry=False, verbose=1):
r"""
Execute a sed on multiple files.
Args:
regexpr (str | Pattern): pattern to find
repl (str): the text to replace the found pattern with
dpath (str | None): passed to :func:`find`.
include (str | List[str] | MultiPattern | None): passed to :func:`find`.
exclude (str | List[str] | MultiPattern | None): passed to :func:`find`.
dirblocklist (str | List[str] | MultiPattern | None): passed to :func:`find`.
recursive (bool): passed to :func:`find`.
dry (bool): if True does not apply edits
verbose (int): verbosity level
Example:
>>> from xdev.search_replace import * # NOQA
>>> from xdev.search_replace import _create_test_filesystem
>>> dpath = _create_test_filesystem()['root']
>>> sed('a', 'x', dpath=dpath, dry=True)
"""
num_changed = 0
num_files_checked = 0
num_skipped = 0
fpaths_changed = []
fpath_generator = find(dpath=dpath, type='f', include=include,
exclude=exclude, dirblocklist=dirblocklist, recursive=recursive)
for fpath in fpath_generator:
try:
changed_lines = sedfile(fpath, regexpr, repl, dry=dry)
except UnicodeDecodeError:
num_skipped += 1
else:
num_files_checked += 1
if len(changed_lines) > 0:
fpaths_changed.append(fpath)
num_changed += len(changed_lines)
if verbose:
print('num_files_checked = {}'.format(num_files_checked))
print('num probable binary files skipped = {}'.format(num_skipped))
print('fpaths_changed = {}'.format(ub.repr2(sorted(fpaths_changed))))
print('total lines changed = {!r}'.format(num_changed))
[docs]
def grep(regexpr, dpath=None, include=None, exclude=None, recursive=True,
dirblocklist=None, verbose=1):
r"""
Execute a grep on multiple files.
Args:
regexpr (str | Pattern): pattern to find
dpath (str | None): passed to :func:`find`.
include (str | List[str] | MultiPattern | None): passed to :func:`find`.
exclude (str | List[str] | MultiPattern | None): passed to :func:`find`.
recursive (bool): passed to :func:`find`.
dirblocklist (str | List[str] | MultiPattern | None): passed to :func:`find`.
verbose (int): verbosity level
Returns:
List[GrepResult]:
Example:
>>> from xdev.search_replace import * # NOQA
>>> from xdev.search_replace import _create_test_filesystem
>>> dpath = _create_test_filesystem()['root']
>>> grep('a', dpath=dpath)
"""
grep_results = []
fpath_generator = find(dpath=dpath, type='f', include=include,
exclude=exclude, recursive=recursive,
dirblocklist=dirblocklist)
for fpath in fpath_generator:
grepres = grepfile(fpath, regexpr, verbose=verbose)
if grepres:
grep_results.append(grepres)
if verbose:
print('====================')
print('====================')
found_fpaths = [r.fpath for r in grep_results]
print('\n'.join(found_fpaths))
return grep_results
[docs]
def find(pattern=None, dpath=None, include=None, exclude=None,
dirblocklist=None, type=None, recursive=True, followlinks=False):
"""
Find all paths in a root subject to a search criterion
Args:
pattern (str | Pattern | None):
The glob pattern the path name must match to be returned
dpath (str | Pattern | None):
The root directory to search.
Can also be a filepath, in which case, that is the only filepath
considered.
NOTE: in the future, this argument may change to `path` to indicate
specifying a filepath is allowed.
Defaults to cwd.
include (str | List[str] | MultiPattern | None):
Pattern or list of patterns. If specified, search only files whose
base name matches this pattern. By default the pattern is GLOB.
This only applies to the final name. Directories that do not match
this name will still be traversed.
exclude (str | List[str] | MultiPattern | None):
Pattern or list of patterns. Skip any file with a name suffix that
matches the pattern. By default the pattern is GLOB. This ONLY
applies to the final name. Directories that match an exclude
pattern will still be traversed. Use ``dirblocklist`` to specify
patterns to exclude intermediate directories from traversal.
dirblocklist (str | List[str] | MultiPattern | None):
Any directory name matching this pattern will be removed from
traversal.
type (str | List[str] | None):
A list of 1 character codes indicating what types of file can be
returned. Currently we only allow either "f" for file or "d" for
directory. Symbolic links are not currently distinguished. In the
future we may support posix codes, see [1]_ for details.
recursive:
search all subdirectories recursively
followlinks (bool, default=False):
if True will follow directory symlinks
References:
_[1] https://linuxconfig.org/identifying-file-types-in-linux
TODO:
mindepth
maxdepth
ignore_case
regex_match
Example:
>>> from xdev.search_replace import * # NOQA
>>> from xdev.search_replace import _create_test_filesystem
>>> dpath = _create_test_filesystem()['root']
>>> paths = list(find(pattern='*', dpath=dpath))
>>> assert len(paths) == 5
>>> paths = list(find(pattern='*', dpath=dpath, type='f'))
>>> assert len(paths) == 4
"""
if pattern is None:
pattern = '*'
if type is None:
with_dirs = True
with_files = True
else:
with_dirs = False
with_files = False
if 'd' in type:
with_dirs = True
if 'f' in type:
with_files = True
if dpath is None:
dpath = '.' # os.getcwd()
# Define helper for checking inclusion / exclusion
include = None if include is None else MultiPattern.coerce(include, hint='glob')
exclude = None if exclude is None else MultiPattern.coerce(exclude, hint='glob')
dirblocklist = None if dirblocklist is None else MultiPattern.coerce(dirblocklist, hint='glob')
main_pattern = Pattern.coerce(pattern, hint='glob')
def is_included(name):
if not main_pattern.match(name):
return False
if exclude is not None and exclude.match(name):
return False
if include is None or include.match(name):
return True
return False
if os.path.isfile(dpath):
# Spoof walk output when dpath is given as a file path
root = os.path.dirname(dpath)
dnames = []
fnames = [os.path.basename(dpath)]
walkgen = [(root, dnames, fnames)]
else:
walkgen = os.walk(dpath, followlinks=followlinks)
for root, dnames, fnames in walkgen:
if dirblocklist is not None:
dnames[:] = [
dname for dname in dnames if not dirblocklist.match(dname)]
if with_files:
for fname in fnames:
if is_included(fname):
yield join(root, fname)
if with_dirs:
for dname in dnames:
if is_included(dname):
yield join(root, dname)
if not recursive:
break
[docs]
def sedfile(fpath, regexpr, repl, dry=False, verbose=1):
r"""
Execute a search and replace on a particular file
Args:
fpath (str | PathLike): file to search / replace on
regexpr (str | Pattern): pattern to find
repl (str): the text to replace the found pattern with
dry (bool): if True does not apply edits
verbose (int): verbosity level
Returns:
List[Tuple[str, str]]: changed lines
TODO:
- [ ] Store "SedResult" class, with lazy execution
Example:
>>> from xdev.search_replace import * # NOQA
>>> from xdev.search_replace import _create_test_filesystem
>>> fpath = _create_test_filesystem()['contents'][1]
>>> changed_lines1 = sedfile(fpath, 'a', 'x', dry=True, verbose=1)
>>> changed_lines2 = sedfile(fpath, 'a', 'x', dry=False, verbose=0)
>>> assert changed_lines2 == changed_lines1
>>> changed_lines3 = sedfile(fpath, 'a', 'x', dry=False, verbose=0)
>>> assert changed_lines3 != changed_lines2
"""
import xdev
mode_text = ['(real-run)', '(dry-run)'][dry]
pattern = Pattern.coerce(regexpr, hint='regex')
# print(f'regexpr={regexpr}')
# print(f'pattern={pattern!r}')
path, name = split(fpath)
new_file_lines = []
try:
with open(fpath, 'r') as file:
file_lines = file.readlines()
# Search each line for the desired regexpr
new_file_lines = [pattern.sub(repl, line) for line in file_lines]
except UnicodeDecodeError as ex:
# Add the file name into the exception
new_last_arg = ex.args[-1] + ' in fpath={!r}'.format(fpath)
new_args = ex.args[:-1] + (new_last_arg,)
raise UnicodeDecodeError(*new_args) from ex
except Exception:
raise
# This does not preserve exception type
# raise Exception('Failed to sedfile fpath = {!r}'.format(fpath)) from ex
changed_lines = [(newline, line)
for newline, line in zip(new_file_lines, file_lines)
if newline != line]
nChanged = len(changed_lines)
if nChanged > 0:
try:
rel_fpath = relpath(fpath, os.getcwd())
except ValueError:
# windows issues
rel_fpath = abspath(fpath)
if verbose:
print(' * {} changed {} lines in {!r} '.format(
mode_text, nChanged, rel_fpath))
print(' * --------------------')
new_file = ''.join(new_file_lines)
old_file = ub.ensure_unicode(
''.join(list(map(ub.ensure_unicode, file_lines))))
if verbose:
print(xdev.difftext(old_file, new_file, colored=True))
if not dry:
if verbose:
print(' ! WRITING CHANGES')
with open(fpath, 'w') as file:
file.write(new_file)
return changed_lines
return []
[docs]
def grepfile(fpath, regexpr, verbose=1):
r"""
Exceute grep on a single file
Args:
fpath (str | PathLike): file to search
regexpr (str | Pattern): pattern to find
verbose (int): verbosity level
Returns:
None | GrepResult
Example:
>>> from xdev.search_replace import * # NOQA
>>> from xdev.search_replace import _create_test_filesystem
>>> fpath = _create_test_filesystem()['contents'][1]
>>> grep_result = grepfile(fpath, r'\bb\b')
>>> print('grep_result = {}'.format(grep_result))
"""
grep_result = None
pattern = Pattern.coerce(regexpr, hint='regex')
with open(fpath, 'r') as file:
try:
lines = file.readlines()
except UnicodeDecodeError:
print("UNABLE TO READ fpath={}".format(fpath))
else:
grep_result = GrepResult(fpath, pattern)
grep_result.max_line = len(lines)
# Search each line for the desired pattern
for lx, line in enumerate(lines):
match_object = pattern.search(line)
if match_object:
grep_result.append(lx, line)
# Print the results (if any)
if verbose:
if len(grep_result):
print(grep_result.format_text())
return grep_result
[docs]
def greptext(text, regexpr, fpath=None, verbose=1):
r"""
Exceute grep on text
Args:
text (str): text to search
regexpr (str | Pattern): pattern to find
verbose (int): verbosity level
Returns:
None | GrepResult
"""
from xdev.patterns import Pattern
# from xdev.search_replace import GrepResult
grep_result = None
pattern = Pattern.coerce(regexpr, hint='regex')
fpath = '<text>'
try:
lines = text.splitlines()
except UnicodeDecodeError:
print("UNABLE TO READ fpath={}".format(fpath))
else:
grep_result = GrepResult(fpath, pattern)
grep_result.max_line = len(lines)
# Search each line for the desired pattern
for lx, line in enumerate(lines):
match_object = pattern.search(line)
if match_object:
grep_result.append(lx, line)
# Print the results (if any)
if verbose:
if len(grep_result):
print(grep_result.format_text())
return grep_result
[docs]
def _create_test_filesystem():
dpath = ub.ensure_app_cache_dir('xdev/test_search_replace')
text1 = ub.paragraph(
'''
Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do eiusmod
tempor incididunt ut labore et dolore magna aliqua. Ut enim ad minim
veniam, quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea
commodo consequat. Duis aute irure dolor in reprehenderit in voluptate
velit esse cillum dolore eu fugiat nulla pariatur. Excepteur sint
occaecat cupidatat non proident, sunt in culpa qui officia deserunt
mollit anim id est laborum.
''')
text2 = ub.codeblock(
'''
def fib(n):
a, b = 0, 1
while a < n:
print(a, end=' ')
a, b = b, a+b
print()
fib(1000)
''')
text3 = ub.codeblock(
'''
This file contains Lorem and fib
Newlines
fib
lorem
fib
''')
text4 = ''
fpath1 = join(dpath, 'lorium.txt')
fpath2 = join(dpath, 'fib.py')
fpath3 = join(dpath, 'foo.txt')
fpath4 = join(ub.ensuredir((dpath, 'subdir')), 'foo.txt')
with open(fpath1, 'w') as file:
file.write(text1)
with open(fpath2, 'w') as file:
file.write(text2)
with open(fpath3, 'w') as file:
file.write(text3)
with open(fpath4, 'w') as file:
file.write(text4)
info = {
'root': dpath,
'contents': [fpath1, fpath2, fpath3],
}
return info
if __name__ == '__main__':
"""
CommandLine:
python ~/code/xdev/xdev/search_replace.py
xdoctest ~/code/xdev/xdev/search_replace.py
"""
import xdoctest
xdoctest.doctest_module(__file__)