from __future__ import absolute_import, division, print_function, unicode_literals
import six
import inspect
import re
import types
import ubelt as ub
import textwrap
from xinspect.static_kwargs import parse_kwarg_keys
REGEX_NONGREEDY = '*?'
# THIS IS THE CANNONICAL API FUNCTION. TODO: MAKE OTHER PRIVATE
[docs]
def get_func_kwargs(func, max_depth=None):
"""
Dynamically parse the kwargs accepted by this function.
This function uses Python signatures where possible, but it also uses
heuristics by inspecting the way any `keywords` dictionary is used.
Args:
func (callable): function to introspect kwargs from
max_depth (int, default=None): by default we recursively parse
any kwargs passed to subfunctions.
"""
argspec = get_func_argspec(func)
if argspec.defaults is None:
header_kw = {}
else:
header_kw = dict(zip(argspec.args[::-1], argspec.defaults[::-1]))
if argspec.keywords is not None:
header_kw.update(dict(recursive_parse_kwargs(func, max_depth=max_depth)))
return header_kw
def bref_field(key):
""" regex backreference """
return r'\g<%s>' % (key)
def named_field(key, regex, vim=False):
"""
Creates a named regex group that can be referend via a backref.
If key is None the backref is referenced by number.
References:
https://docs.python.org/2/library/re.html#regular-expression-syntax
"""
if key is None:
#return regex
return r'(%s)' % (regex,)
if vim:
return r'\(%s\)' % (regex)
else:
return r'(?P<%s>%s)' % (key, regex)
def is_func_or_method(var):
return isinstance(var, (types.MethodType, types.FunctionType))
def get_funcglobals(func):
if six.PY2:
return getattr(func, 'func_globals')
else:
return getattr(func, '__globals__')
def parse_func_kwarg_keys(func, with_vals=False):
""" hacky inference of kwargs keys
SeeAlso:
argparse_funckw
recursive_parse_kwargs
parse_kwarg_keys
parse_func_kwarg_keys
get_func_kwargs
"""
sourcecode = get_func_sourcecode(func, strip_docstr=True,
strip_comments=True)
kwkeys = parse_kwarg_keys(sourcecode, with_vals=with_vals)
#get_func_kwargs TODO
return kwkeys
def get_kwdefaults(func, parse_source=False):
r"""
Args:
func (func):
Returns:
dict:
# CommandLine:
# python -m utool.util_inspect get_kwdefaults
# Example:
# >>> # ENABLE_DOCTEST
# >>> from utool.util_inspect import * # NOQA
# >>> func = dummy_func
# >>> parse_source = True
# >>> kwdefaults = get_kwdefaults(func, parse_source)
# >>> print('kwdefaults = %s' % (ub.repr2(kwdefaults),))
"""
argspec = inspect.getargspec(func)
kwdefaults = {}
if argspec.args is None or argspec.defaults is None:
pass
else:
args = argspec.args
defaults = argspec.defaults
#kwdefaults = OrderedDict(zip(argspec.args[::-1], argspec.defaults[::-1]))
kwpos = len(args) - len(defaults)
kwdefaults = ub.odict(zip(args[kwpos:], defaults))
if parse_source and argspec.keywords:
# TODO parse for kwargs.get/pop
keyword_defaults = parse_func_kwarg_keys(func, with_vals=True)
for key, val in keyword_defaults:
assert key not in kwdefaults, 'parsing error'
kwdefaults[key] = val
return kwdefaults
def lookup_attribute_chain(attrname, namespace):
"""
>>> attrname = funcname
>>> namespace = mod.__dict__
>>> attrname = 'KWReg.print_defaultkw'
"""
#subdict = meta_util_six.get_funcglobals(root_func)
subtup = attrname.split('.')
subdict = namespace
for attr in subtup[:-1]:
subdict = subdict[attr].__dict__
leaf_name = subtup[-1]
leaf_attr = subdict[leaf_name]
return leaf_attr
def recursive_parse_kwargs(root_func, path_=None, verbose=None, max_depth=None):
"""
recursive kwargs parser
Args:
root_func (function): live python function
path_ (PathLike, default=None):
max_depth (int, default=None): if specified only recurse to this depth.
Returns:
list:
TODO:
- [ ] rectify with others
- [ ] if docstr indentation is off, this fails
Example:
>>> modname = ub.argval('--mod', default='ubelt')
>>> funcname = ub.argval('--func', default='cmd')
>>> mod = ub.import_module_from_name(modname)
>>> root_func = lookup_attribute_chain(funcname, mod.__dict__)
>>> path_ = None
>>> parsed = recursive_parse_kwargs(root_func)
>>> flags = ub.unique_flags([p[0] for p in parsed])
>>> unique = list(ub.compress(parsed, flags))
>>> print('parsed = %s' % (ub.repr2(parsed),))
>>> print('unique = %s' % (ub.repr2(unique),))
"""
if max_depth is None:
max_depth = float('inf')
if verbose is None:
verbose = False
if verbose:
print('[inspect] recursive parse kwargs root_func = %r ' % (root_func,))
if path_ is None:
path_ = []
if root_func in path_:
if verbose:
print('[inspect] Encountered cycle. returning')
return []
path_.append(root_func)
spec = get_func_argspec(root_func)
# ADD MORE
kwargs_list = []
found_explicit = list(get_kwdefaults(root_func, parse_source=False).items())
if verbose:
print('[inspect] * Found explicit %r' % (found_explicit,))
sourcecode = get_func_sourcecode(root_func, strip_docstr=True,
strip_def=True, strip_decor=True)
sourcecode1 = get_func_sourcecode(root_func, strip_docstr=True,
strip_def=False, strip_decor=True)
found_implicit = parse_kwarg_keys(sourcecode1, spec.keywords,
with_vals=True)
if verbose:
print('[inspect] * Found found_implicit %r' % (found_implicit,))
kwargs_list = found_explicit + found_implicit
def hack_lookup_mod_attrs(attr):
# HACKS TODO: have find_funcs_called_with_kwargs infer an attribute is a
# module / function / type. In the module case, we can import it and
# look it up. Maybe args, or returns can help infer type. Maybe just
# register some known varnames. Maybe jedi has some better way to do
# this.
# if attr == 'ut':
# subdict = ut.__dict__
# elif attr == 'pt':
# import plottool as pt
# subdict = pt.__dict__
# else:
subdict = None
return subdict
def resolve_attr_subfunc(subfunc_name):
# look up attriute chain
#subdict = root_func.func_globals
subdict = get_funcglobals(root_func)
subtup = subfunc_name.split('.')
try:
subdict = lookup_attribute_chain(subfunc_name, subdict)
if is_func_or_method(subdict):
# Was subdict supposed to be named something else here?
subfunc = subdict
return subfunc
except (KeyError, TypeError):
for attr in subtup[:-1]:
try:
subdict = subdict[attr].__dict__
except (KeyError, TypeError):
# limited support for class lookup
if isinstance(root_func, (types.MethodType,)) and spec.args[0] == attr:
subdict = root_func.im_class.__dict__
else:
# FIXME TODO lookup_attribute_chain
subdict = hack_lookup_mod_attrs(attr)
if subdict is None:
print('Unable to find attribute of attr=%r' % (attr,))
if subdict is not None:
attr_name = subtup[-1]
subfunc = subdict[attr_name]
else:
subfunc = None
return subfunc
def check_subfunc_name(subfunc_name):
if isinstance(subfunc_name, tuple) or '.' in subfunc_name:
subfunc = resolve_attr_subfunc(subfunc_name)
else:
# try to directly take func from globals
func_globals = root_func.__globals__
try:
subfunc = func_globals[subfunc_name]
except KeyError:
print('Unable to find function definition subfunc_name=%r' %
(subfunc_name,))
subfunc = None
if subfunc is not None and max_depth > 0:
subkw_list = recursive_parse_kwargs(subfunc, path_,
verbose=verbose,
max_depth=max_depth - 1)
new_subkw = subkw_list
else:
new_subkw = []
return new_subkw
if spec.keywords is not None:
if verbose:
print('[inspect] Checking spec.keywords=%r' % (spec.keywords,))
subfunc_name_list = find_funcs_called_with_kwargs(sourcecode, spec.keywords)
if verbose:
print('[inspect] Checking subfunc_name_list=%r' % (subfunc_name_list,))
for subfunc_name in subfunc_name_list:
try:
new_subkw = check_subfunc_name(subfunc_name)
if verbose:
print('[inspect] * Found %r' % (new_subkw,))
kwargs_list.extend(new_subkw)
except TypeError:
print('warning: unable to recursivley parse type of : %r' % (subfunc_name,))
return kwargs_list
def find_funcs_called_with_kwargs(sourcecode, target_kwargs_name='kwargs'):
r"""
Finds functions that are called with the keyword `kwargs` variable
Example:
>>> # ENABLE_DOCTEST
>>> sourcecode = ub.codeblock(
'''
x, y = list(zip(*ub.ichunks(data, 2)))
somecall(arg1, arg2, arg3=4, **kwargs)
import sys
sys.badcall(**kwargs)
def foo():
bar(**kwargs)
ub.holymoly(**kwargs)
baz()
def biz(**kwargs):
foo2(**kwargs)
''')
>>> child_funcnamess = find_funcs_called_with_kwargs(sourcecode)
>>> print('child_funcnamess = %r' % (child_funcnamess,))
>>> assert 'foo2' not in child_funcnamess, 'foo2 should not be found'
>>> assert 'bar' in child_funcnamess, 'bar should be found'
"""
import ast
sourcecode = 'from __future__ import print_function\n' + sourcecode
pt = ast.parse(sourcecode)
child_funcnamess = []
debug = False
if debug:
print('\nInput:')
print('target_kwargs_name = %r' % (target_kwargs_name,))
print('\nSource:')
print(sourcecode)
import astor
print('\nParse:')
print(astor.dump(pt))
class KwargParseVisitor(ast.NodeVisitor):
"""
TODO: understand dict update ie, know when kwargs is passed to these
functions and then look assume the object that was updated is a
dictionary and check wherever that is passed to kwargs as well.
"""
def visit_FunctionDef(self, node):
if debug:
print('\nVISIT FunctionDef node = %r' % (node,))
print('node.args.kwarg = %r' % (node.args.kwarg,))
if six.PY2:
kwarg_name = node.args.kwarg
else:
if node.args.kwarg is None:
kwarg_name = None
else:
kwarg_name = node.args.kwarg.arg
if kwarg_name != target_kwargs_name:
# target kwargs is still in scope
ast.NodeVisitor.generic_visit(self, node)
def visit_Call(self, node):
if debug:
print('\nVISIT Call node = %r' % (node,))
if isinstance(node.func, ast.Attribute):
try:
funcname = node.func.value.id + '.' + node.func.attr
except AttributeError:
funcname = None
elif isinstance(node.func, ast.Name):
funcname = node.func.id
else:
raise NotImplementedError(
'do not know how to parse: node.func = %r' % (node.func,))
if six.PY2:
kwargs = node.kwargs
kwargs_name = None if kwargs is None else kwargs.id
if funcname is not None and kwargs_name == target_kwargs_name:
child_funcnamess.append(funcname)
if debug:
print('funcname = %r' % (funcname,))
print('kwargs_name = %r' % (kwargs_name,))
else:
if node.keywords:
for kwargs in node.keywords:
if kwargs.arg is None:
if hasattr(kwargs.value, 'id'):
kwargs_name = kwargs.value.id
if funcname is not None and kwargs_name == target_kwargs_name:
child_funcnamess.append(funcname)
if debug:
print('funcname = %r' % (funcname,))
print('kwargs_name = %r' % (kwargs_name,))
ast.NodeVisitor.generic_visit(self, node)
try:
KwargParseVisitor().visit(pt)
except Exception:
raise
return child_funcnamess
#print('child_funcnamess = %r' % (child_funcnamess,))
def get_func_argspec(func):
"""
wrapper around inspect.getargspec but takes into account utool decorators
"""
if hasattr(func, '_utinfo'):
argspec = func._utinfo['orig_argspec']
return argspec
if isinstance(func, property):
func = func.fget
argspec = inspect.getargspec(func)
return argspec
def get_func_sourcecode(func, strip_def=False, strip_ret=False,
strip_docstr=False, strip_comments=False,
remove_linenums=None, strip_decor=False):
"""
wrapper around inspect.getsource but takes into account utool decorators
strip flags are very hacky as of now
Args:
func (function):
strip_def (bool):
strip_ret (bool): (default = False)
strip_docstr (bool): (default = False)
strip_comments (bool): (default = False)
remove_linenums (None): (default = None)
Example:
>>> # build test data
>>> func = get_func_sourcecode
>>> strip_def = True
>>> strip_ret = True
>>> sourcecode = get_func_sourcecode(func, strip_def)
>>> print('sourcecode = {}'.format(sourcecode))
"""
inspect.linecache.clearcache() # HACK: fix inspect bug
sourcefile = inspect.getsourcefile(func)
if hasattr(func, '_utinfo'):
# DEPRICATE
func2 = func._utinfo['orig_func']
sourcecode = get_func_sourcecode(func2)
elif sourcefile is not None and (sourcefile != '<string>'):
try_limit = 2
for num_tries in range(try_limit):
try:
#print(func)
sourcecode = inspect.getsource(func)
if not isinstance(sourcecode, six.text_type):
sourcecode = sourcecode.decode('utf-8')
#print(sourcecode)
except (IndexError, OSError, SyntaxError) as ex:
print('WARNING: Error getting source')
inspect.linecache.clearcache()
if num_tries + 1 != try_limit:
tries_left = try_limit - num_tries - 1
print('Attempting %d more time(s)' % (tries_left))
else:
raise
else:
sourcecode = None
if strip_def:
# hacky
# TODO: use redbaron or something like that for a more robust appraoch
sourcecode = textwrap.dedent(sourcecode)
regex_decor = '^@.' + REGEX_NONGREEDY
regex_defline = '^def [^:]*\\):\n'
patern = '(' + regex_decor + ')?' + regex_defline
RE_FLAGS = re.MULTILINE | re.DOTALL
RE_KWARGS = {'flags': RE_FLAGS}
nodef_source = re.sub(patern, '', sourcecode, **RE_KWARGS)
sourcecode = textwrap.dedent(nodef_source)
#print(sourcecode)
pass
if strip_ret:
r""" \s is a whitespace char """
return_ = named_field('return', 'return .*$')
prereturn = named_field('prereturn', r'^\s*')
return_bref = bref_field('return')
prereturn_bref = bref_field('prereturn')
regex = prereturn + return_
repl = prereturn_bref + 'pass # ' + return_bref
sourcecode_ = re.sub(regex, repl, sourcecode, flags=re.MULTILINE)
sourcecode = sourcecode_
pass
if strip_docstr or strip_comments:
# pip install pyminifier
# References: http://code.activestate.com/recipes/576704/
#from pyminifier import minification, token_utils
def remove_docstrings_or_comments(source):
"""
TODO: commit clean version to pyminifier
"""
import tokenize
from six.moves import StringIO
io_obj = StringIO(source)
out = ''
prev_toktype = tokenize.INDENT
last_lineno = -1
last_col = 0
for tok in tokenize.generate_tokens(io_obj.readline):
token_type = tok[0]
token_string = tok[1]
start_line, start_col = tok[2]
end_line, end_col = tok[3]
if start_line > last_lineno:
last_col = 0
if start_col > last_col:
out += (' ' * (start_col - last_col))
# Remove comments:
if strip_comments and token_type == tokenize.COMMENT:
pass
elif strip_docstr and token_type == tokenize.STRING:
if prev_toktype != tokenize.INDENT:
# This is likely a docstring; double-check we're not inside an operator:
if prev_toktype != tokenize.NEWLINE:
if start_col > 0:
out += token_string
else:
out += token_string
prev_toktype = token_type
last_col = end_col
last_lineno = end_line
return out
sourcecode = remove_docstrings_or_comments(sourcecode)
#sourcecode = minification.remove_comments_and_docstrings(sourcecode)
#tokens = token_utils.listified_tokenizer(sourcecode)
#minification.remove_comments(tokens)
#minification.remove_docstrings(tokens)
#token_utils.untokenize(tokens)
if strip_decor:
try:
import redbaron
red = redbaron.RedBaron(ub.codeblock(sourcecode))
except Exception:
hack_text = ub.ensure_unicode(ub.codeblock(sourcecode)).encode('ascii', 'replace')
red = redbaron.RedBaron(hack_text)
pass
if len(red) == 1:
redfunc = red[0]
if redfunc.type == 'def':
# Remove decorators
del redfunc.decorators[:]
sourcecode = redfunc.dumps()
if remove_linenums is not None:
source_lines = sourcecode.strip('\n').split('\n')
delete_items_by_index(source_lines, remove_linenums)
sourcecode = '\n'.join(source_lines)
return sourcecode
def delete_items_by_index(list_, index_list, copy=False):
"""
Remove items from ``list_`` at positions specified in ``index_list``
The original ``list_`` is preserved if ``copy`` is True
Args:
list_ (list):
index_list (list):
copy (bool): preserves original list if True
Example:
>>> list_ = [8, 1, 8, 1, 6, 6, 3, 4, 4, 5, 6]
>>> index_list = [2, -1]
>>> delete_items_by_index(list_, index_list)
[8, 1, 1, 6, 6, 3, 4, 4, 5]
"""
if copy:
list_ = list_[:]
# Rectify negative indicies
index_list_ = [(len(list_) + x if x < 0 else x) for x in index_list]
# Remove largest indicies first
index_list_ = sorted(index_list_, reverse=True)
for index in index_list_:
del list_[index]
return list_