Source code for xinspect.dynamic_kwargs

from __future__ import absolute_import, division, print_function, unicode_literals
import six
import inspect
import re
import types
import ubelt as ub
import textwrap
from xinspect.static_kwargs import parse_kwarg_keys


REGEX_NONGREEDY = '*?'


# THIS IS THE CANNONICAL API FUNCTION. TODO: MAKE OTHER PRIVATE

[docs]
def get_func_kwargs(func, max_depth=None):
    """
    Dynamically parse the kwargs accepted by this function.

    This function uses Python signatures where possible, but it also uses
    heuristics by inspecting the way any `keywords` dictionary is used.

    Args:
        func (callable): function to introspect kwargs from
        max_depth (int, default=None): by default we recursively parse
            any kwargs passed to subfunctions.
    """
    argspec = get_func_argspec(func)
    if argspec.defaults is None:
        header_kw = {}
    else:
        header_kw = dict(zip(argspec.args[::-1], argspec.defaults[::-1]))
    if argspec.keywords is not None:
        header_kw.update(dict(recursive_parse_kwargs(func, max_depth=max_depth)))
    return header_kw



def bref_field(key):
    """ regex backreference """
    return r'\g<%s>' % (key)


def named_field(key, regex, vim=False):
    """
    Creates a named regex group that can be referend via a backref.
    If key is None the backref is referenced by number.

    References:
        https://docs.python.org/2/library/re.html#regular-expression-syntax
    """
    if key is None:
        #return regex
        return r'(%s)' % (regex,)
    if vim:
        return r'\(%s\)' % (regex)
    else:
        return r'(?P<%s>%s)' % (key, regex)


def is_func_or_method(var):
    return isinstance(var, (types.MethodType, types.FunctionType))


def get_funcglobals(func):
    if six.PY2:
        return getattr(func, 'func_globals')
    else:
        return getattr(func, '__globals__')


def parse_func_kwarg_keys(func, with_vals=False):
    """ hacky inference of kwargs keys

    SeeAlso:
        argparse_funckw
        recursive_parse_kwargs
        parse_kwarg_keys
        parse_func_kwarg_keys
        get_func_kwargs

    """
    sourcecode = get_func_sourcecode(func, strip_docstr=True,
                                     strip_comments=True)
    kwkeys = parse_kwarg_keys(sourcecode, with_vals=with_vals)
    #get_func_kwargs  TODO
    return kwkeys


def get_kwdefaults(func, parse_source=False):
    r"""
    Args:
        func (func):

    Returns:
        dict:

    # CommandLine:
    #     python -m utool.util_inspect get_kwdefaults

    # Example:
    #     >>> # ENABLE_DOCTEST
    #     >>> from utool.util_inspect import *  # NOQA
    #     >>> func = dummy_func
    #     >>> parse_source = True
    #     >>> kwdefaults = get_kwdefaults(func, parse_source)
    #     >>> print('kwdefaults = %s' % (ub.repr2(kwdefaults),))
    """
    argspec = inspect.getargspec(func)
    kwdefaults = {}
    if argspec.args is None or argspec.defaults is None:
        pass
    else:
        args = argspec.args
        defaults = argspec.defaults
        #kwdefaults = OrderedDict(zip(argspec.args[::-1], argspec.defaults[::-1]))
        kwpos = len(args) - len(defaults)
        kwdefaults = ub.odict(zip(args[kwpos:], defaults))
    if parse_source and argspec.keywords:
        # TODO parse for kwargs.get/pop
        keyword_defaults = parse_func_kwarg_keys(func, with_vals=True)
        for key, val in keyword_defaults:
            assert key not in kwdefaults, 'parsing error'
            kwdefaults[key] = val
    return kwdefaults


def lookup_attribute_chain(attrname, namespace):
    """
        >>> attrname = funcname
        >>> namespace = mod.__dict__
        >>> attrname = 'KWReg.print_defaultkw'
    """
    #subdict = meta_util_six.get_funcglobals(root_func)
    subtup = attrname.split('.')
    subdict = namespace
    for attr in subtup[:-1]:
        subdict = subdict[attr].__dict__
    leaf_name = subtup[-1]
    leaf_attr = subdict[leaf_name]
    return leaf_attr


def recursive_parse_kwargs(root_func, path_=None, verbose=None, max_depth=None):
    """
    recursive kwargs parser

    Args:
        root_func (function):  live python function
        path_ (PathLike, default=None):
        max_depth (int, default=None): if specified only recurse to this depth.

    Returns:
        list:

    TODO:
        - [ ] rectify with others
        - [ ] if docstr indentation is off, this fails

    Example:
        >>> modname = ub.argval('--mod', default='ubelt')
        >>> funcname = ub.argval('--func', default='cmd')
        >>> mod = ub.import_module_from_name(modname)
        >>> root_func = lookup_attribute_chain(funcname, mod.__dict__)
        >>> path_ = None
        >>> parsed = recursive_parse_kwargs(root_func)
        >>> flags = ub.unique_flags([p[0] for p in parsed])
        >>> unique = list(ub.compress(parsed, flags))
        >>> print('parsed = %s' % (ub.repr2(parsed),))
        >>> print('unique = %s' % (ub.repr2(unique),))
    """
    if max_depth is None:
        max_depth = float('inf')

    if verbose is None:
        verbose = False
    if verbose:
        print('[inspect] recursive parse kwargs root_func = %r ' % (root_func,))

    if path_ is None:
        path_ = []
    if root_func in path_:
        if verbose:
            print('[inspect] Encountered cycle. returning')
        return []
    path_.append(root_func)
    spec = get_func_argspec(root_func)
    # ADD MORE
    kwargs_list = []
    found_explicit = list(get_kwdefaults(root_func, parse_source=False).items())
    if verbose:
        print('[inspect] * Found explicit %r' % (found_explicit,))

    sourcecode = get_func_sourcecode(root_func, strip_docstr=True,
                                        strip_def=True, strip_decor=True)
    sourcecode1 = get_func_sourcecode(root_func, strip_docstr=True,
                                      strip_def=False, strip_decor=True)
    found_implicit = parse_kwarg_keys(sourcecode1, spec.keywords,
                                         with_vals=True)
    if verbose:
        print('[inspect] * Found found_implicit %r' % (found_implicit,))
    kwargs_list = found_explicit + found_implicit

    def hack_lookup_mod_attrs(attr):
        # HACKS TODO: have find_funcs_called_with_kwargs infer an attribute is a
        # module / function / type. In the module case, we can import it and
        # look it up.  Maybe args, or returns can help infer type.  Maybe just
        # register some known varnames.  Maybe jedi has some better way to do
        # this.
        # if attr == 'ut':
        #     subdict = ut.__dict__
        # elif attr == 'pt':
        #     import plottool as pt
        #     subdict = pt.__dict__
        # else:
        subdict = None
        return subdict

    def resolve_attr_subfunc(subfunc_name):
        # look up attriute chain
        #subdict = root_func.func_globals
        subdict = get_funcglobals(root_func)
        subtup = subfunc_name.split('.')
        try:
            subdict = lookup_attribute_chain(subfunc_name, subdict)
            if is_func_or_method(subdict):
                # Was subdict supposed to be named something else here?
                subfunc = subdict
                return subfunc
        except (KeyError, TypeError):
            for attr in subtup[:-1]:
                try:
                    subdict = subdict[attr].__dict__
                except (KeyError, TypeError):
                    # limited support for class lookup
                    if isinstance(root_func, (types.MethodType,)) and spec.args[0] == attr:
                        subdict = root_func.im_class.__dict__
                    else:
                        # FIXME TODO lookup_attribute_chain
                        subdict = hack_lookup_mod_attrs(attr)
                        if subdict is None:
                            print('Unable to find attribute of attr=%r' % (attr,))
        if subdict is not None:
            attr_name = subtup[-1]
            subfunc = subdict[attr_name]
        else:
            subfunc = None
        return subfunc

    def check_subfunc_name(subfunc_name):
        if isinstance(subfunc_name, tuple) or '.' in subfunc_name:
            subfunc = resolve_attr_subfunc(subfunc_name)
        else:
            # try to directly take func from globals
            func_globals = root_func.__globals__
            try:
                subfunc = func_globals[subfunc_name]
            except KeyError:
                print('Unable to find function definition subfunc_name=%r' %
                      (subfunc_name,))
                subfunc = None
        if subfunc is not None and max_depth > 0:
            subkw_list = recursive_parse_kwargs(subfunc, path_,
                                                verbose=verbose,
                                                max_depth=max_depth - 1)
            new_subkw = subkw_list
        else:
            new_subkw = []
        return new_subkw

    if spec.keywords is not None:
        if verbose:
            print('[inspect] Checking spec.keywords=%r' % (spec.keywords,))
        subfunc_name_list = find_funcs_called_with_kwargs(sourcecode, spec.keywords)
        if verbose:
            print('[inspect] Checking subfunc_name_list=%r' % (subfunc_name_list,))
        for subfunc_name in subfunc_name_list:
            try:
                new_subkw = check_subfunc_name(subfunc_name)
                if verbose:
                    print('[inspect] * Found %r' % (new_subkw,))
                kwargs_list.extend(new_subkw)
            except TypeError:
                print('warning: unable to recursivley parse type of : %r' % (subfunc_name,))
    return kwargs_list


def find_funcs_called_with_kwargs(sourcecode, target_kwargs_name='kwargs'):
    r"""
    Finds functions that are called with the keyword `kwargs` variable

    Example:
        >>> # ENABLE_DOCTEST
        >>> sourcecode = ub.codeblock(
                '''
                x, y = list(zip(*ub.ichunks(data, 2)))
                somecall(arg1, arg2, arg3=4, **kwargs)
                import sys
                sys.badcall(**kwargs)
                def foo():
                    bar(**kwargs)
                    ub.holymoly(**kwargs)
                    baz()
                    def biz(**kwargs):
                        foo2(**kwargs)
                ''')
        >>> child_funcnamess = find_funcs_called_with_kwargs(sourcecode)
        >>> print('child_funcnamess = %r' % (child_funcnamess,))
        >>> assert 'foo2' not in child_funcnamess, 'foo2 should not be found'
        >>> assert 'bar' in child_funcnamess, 'bar should be found'
    """
    import ast
    sourcecode = 'from __future__ import print_function\n' + sourcecode
    pt = ast.parse(sourcecode)
    child_funcnamess = []
    debug = False

    if debug:
        print('\nInput:')
        print('target_kwargs_name = %r' % (target_kwargs_name,))
        print('\nSource:')
        print(sourcecode)
        import astor
        print('\nParse:')
        print(astor.dump(pt))

    class KwargParseVisitor(ast.NodeVisitor):
        """
        TODO: understand dict update ie, know when kwargs is passed to these
        functions and then look assume the object that was updated is a
        dictionary and check wherever that is passed to kwargs as well.
        """
        def visit_FunctionDef(self, node):
            if debug:
                print('\nVISIT FunctionDef node = %r' % (node,))
                print('node.args.kwarg = %r' % (node.args.kwarg,))
            if six.PY2:
                kwarg_name = node.args.kwarg
            else:
                if node.args.kwarg is None:
                    kwarg_name = None
                else:
                    kwarg_name = node.args.kwarg.arg
            if kwarg_name != target_kwargs_name:
                # target kwargs is still in scope
                ast.NodeVisitor.generic_visit(self, node)

        def visit_Call(self, node):
            if debug:
                print('\nVISIT Call node = %r' % (node,))
            if isinstance(node.func, ast.Attribute):
                try:
                    funcname = node.func.value.id + '.' + node.func.attr
                except AttributeError:
                    funcname = None
            elif isinstance(node.func, ast.Name):
                funcname = node.func.id
            else:
                raise NotImplementedError(
                    'do not know how to parse: node.func = %r' % (node.func,))
            if six.PY2:
                kwargs = node.kwargs
                kwargs_name = None if kwargs is None else kwargs.id
                if funcname is not None and kwargs_name == target_kwargs_name:
                    child_funcnamess.append(funcname)
                if debug:
                    print('funcname = %r' % (funcname,))
                    print('kwargs_name = %r' % (kwargs_name,))
            else:
                if node.keywords:
                    for kwargs in node.keywords:
                        if kwargs.arg is None:
                            if hasattr(kwargs.value, 'id'):
                                kwargs_name = kwargs.value.id
                                if funcname is not None and kwargs_name == target_kwargs_name:
                                    child_funcnamess.append(funcname)
                                if debug:
                                    print('funcname = %r' % (funcname,))
                                    print('kwargs_name = %r' % (kwargs_name,))
            ast.NodeVisitor.generic_visit(self, node)
    try:
        KwargParseVisitor().visit(pt)
    except Exception:
        raise
    return child_funcnamess
    #print('child_funcnamess = %r' % (child_funcnamess,))


def get_func_argspec(func):
    """
    wrapper around inspect.getargspec but takes into account utool decorators
    """
    if hasattr(func, '_utinfo'):
        argspec = func._utinfo['orig_argspec']
        return argspec
    if isinstance(func, property):
        func = func.fget
    argspec = inspect.getargspec(func)
    return argspec


def get_func_sourcecode(func, strip_def=False, strip_ret=False,
                        strip_docstr=False, strip_comments=False,
                        remove_linenums=None, strip_decor=False):
    """
    wrapper around inspect.getsource but takes into account utool decorators
    strip flags are very hacky as of now

    Args:
        func (function):
        strip_def (bool):
        strip_ret (bool): (default = False)
        strip_docstr (bool): (default = False)
        strip_comments (bool): (default = False)
        remove_linenums (None): (default = None)

    Example:
        >>> # build test data
        >>> func = get_func_sourcecode
        >>> strip_def = True
        >>> strip_ret = True
        >>> sourcecode = get_func_sourcecode(func, strip_def)
        >>> print('sourcecode = {}'.format(sourcecode))
    """
    inspect.linecache.clearcache()  # HACK: fix inspect bug
    sourcefile = inspect.getsourcefile(func)
    if hasattr(func, '_utinfo'):
        # DEPRICATE
        func2 = func._utinfo['orig_func']
        sourcecode = get_func_sourcecode(func2)
    elif sourcefile is not None and (sourcefile != '<string>'):
        try_limit = 2
        for num_tries in range(try_limit):
            try:
                #print(func)
                sourcecode = inspect.getsource(func)
                if not isinstance(sourcecode, six.text_type):
                    sourcecode = sourcecode.decode('utf-8')
                #print(sourcecode)
            except (IndexError, OSError, SyntaxError) as ex:
                print('WARNING: Error getting source')
                inspect.linecache.clearcache()
                if num_tries + 1 != try_limit:
                    tries_left = try_limit - num_tries - 1
                    print('Attempting %d more time(s)' % (tries_left))
                else:
                    raise
    else:
        sourcecode = None
    if strip_def:
        # hacky
        # TODO: use redbaron or something like that for a more robust appraoch
        sourcecode = textwrap.dedent(sourcecode)
        regex_decor = '^@.' + REGEX_NONGREEDY
        regex_defline = '^def [^:]*\\):\n'
        patern = '(' + regex_decor + ')?' + regex_defline
        RE_FLAGS = re.MULTILINE | re.DOTALL
        RE_KWARGS = {'flags': RE_FLAGS}
        nodef_source = re.sub(patern, '', sourcecode, **RE_KWARGS)
        sourcecode = textwrap.dedent(nodef_source)
        #print(sourcecode)
        pass
    if strip_ret:
        r""" \s is a whitespace char """
        return_ = named_field('return', 'return .*$')
        prereturn = named_field('prereturn', r'^\s*')
        return_bref = bref_field('return')
        prereturn_bref = bref_field('prereturn')
        regex = prereturn + return_
        repl = prereturn_bref + 'pass  # ' + return_bref
        sourcecode_ = re.sub(regex, repl, sourcecode, flags=re.MULTILINE)
        sourcecode = sourcecode_
        pass
    if strip_docstr or strip_comments:
        # pip install pyminifier
        # References: http://code.activestate.com/recipes/576704/
        #from pyminifier import minification, token_utils
        def remove_docstrings_or_comments(source):
            """
            TODO: commit clean version to pyminifier
            """
            import tokenize
            from six.moves import StringIO
            io_obj = StringIO(source)
            out = ''
            prev_toktype = tokenize.INDENT
            last_lineno = -1
            last_col = 0
            for tok in tokenize.generate_tokens(io_obj.readline):
                token_type = tok[0]
                token_string = tok[1]
                start_line, start_col = tok[2]
                end_line, end_col = tok[3]
                if start_line > last_lineno:
                    last_col = 0
                if start_col > last_col:
                    out += (' ' * (start_col - last_col))
                # Remove comments:
                if strip_comments and token_type == tokenize.COMMENT:
                    pass
                elif strip_docstr and token_type == tokenize.STRING:
                    if prev_toktype != tokenize.INDENT:
                        # This is likely a docstring; double-check we're not inside an operator:
                        if prev_toktype != tokenize.NEWLINE:
                            if start_col > 0:
                                out += token_string
                else:
                    out += token_string
                prev_toktype = token_type
                last_col = end_col
                last_lineno = end_line
            return out
        sourcecode = remove_docstrings_or_comments(sourcecode)
        #sourcecode = minification.remove_comments_and_docstrings(sourcecode)
        #tokens = token_utils.listified_tokenizer(sourcecode)
        #minification.remove_comments(tokens)
        #minification.remove_docstrings(tokens)
        #token_utils.untokenize(tokens)

    if strip_decor:
        try:
            import redbaron
            red = redbaron.RedBaron(ub.codeblock(sourcecode))
        except Exception:
            hack_text = ub.ensure_unicode(ub.codeblock(sourcecode)).encode('ascii', 'replace')
            red = redbaron.RedBaron(hack_text)
            pass
        if len(red) == 1:
            redfunc = red[0]
            if redfunc.type == 'def':
                # Remove decorators
                del redfunc.decorators[:]
                sourcecode = redfunc.dumps()

    if remove_linenums is not None:
        source_lines = sourcecode.strip('\n').split('\n')
        delete_items_by_index(source_lines, remove_linenums)
        sourcecode = '\n'.join(source_lines)
    return sourcecode


def delete_items_by_index(list_, index_list, copy=False):
    """
    Remove items from ``list_`` at positions specified in ``index_list``
    The original ``list_`` is preserved if ``copy`` is True

    Args:
        list_ (list):
        index_list (list):
        copy (bool): preserves original list if True

    Example:
        >>> list_ = [8, 1, 8, 1, 6, 6, 3, 4, 4, 5, 6]
        >>> index_list = [2, -1]
        >>> delete_items_by_index(list_, index_list)
        [8, 1, 1, 6, 6, 3, 4, 4, 5]
    """
    if copy:
        list_ = list_[:]
    # Rectify negative indicies
    index_list_ = [(len(list_) + x if x < 0 else x) for x in index_list]
    # Remove largest indicies first
    index_list_ = sorted(index_list_, reverse=True)
    for index in index_list_:
        del list_[index]
    return list_