Source code for xdev.misc

# -*- coding: utf-8 -*-
from __future__ import absolute_import, division, print_function, unicode_literals
import ubelt as ub



[docs]
def quantum_random(pure=False):
    """
    Returns a quantum random number as a 32 bit unsigned integer.
    Does this by making a network request to the ANU Quantum Random Number
    Generator web service, so an internet connection is required.

    Args:
        pure (bool): if False, mixes this data with pseudorandom data for
            security. Otherwise returns the raw quantum numbers that were
            sent over the web (i.e. subject to MitM attacks).

    Requirements:
        quantumrandom >= 1.9.0

    Returns:
        numpy.uint32: the random number
    """
    import numpy as np
    import os
    import quantumrandom

    # Data was sent over a network
    qr_data16 = quantumrandom.uint16(array_length=2)
    nbytes = qr_data16.size * qr_data16.dtype.itemsize

    if pure:
        data16 = qr_data16
    else:
        # Cryptographically generated
        buf = memoryview(os.urandom(nbytes))
        pr_data16 = np.frombuffer(buf, dtype=qr_data16.dtype)
        # xor to mix data
        data16 = (pr_data16 ^ qr_data16)

    assert data16.flags['C_CONTIGUOUS']
    data32 = data16.view(np.dtype('uint32'))[0]
    return data32




[docs]
def byte_str(num, unit='auto', precision=2):
    """
    Automatically chooses relevant unit (KB, MB, or GB) for displaying some
    number of bytes.

    Args:
        num (int): number of bytes
        unit (str): which unit to use, can be auto, B, KB, MB, GB, TB, PB, EB,
            ZB, or YB.
        precision (int): number of decimals of precision

    References:
        https://en.wikipedia.org/wiki/Orders_of_magnitude_(data)

    Returns:
        str: string representing the number of bytes with appropriate units

    Example:
        >>> num_list = [1, 100, 1024,  1048576, 1073741824, 1099511627776]
        >>> result = ub.repr2(list(map(byte_str, num_list)), nl=0)
        >>> print(result)
        ['0.00 KB', '0.10 KB', '1.00 KB', '1.00 MB', '1.00 GB', '1.00 TB']
    """
    abs_num = abs(num)
    if unit == 'auto':
        if abs_num < 2.0 ** 10:
            unit = 'KB'
        elif abs_num < 2.0 ** 20:
            unit = 'KB'
        elif abs_num < 2.0 ** 30:
            unit = 'MB'
        elif abs_num < 2.0 ** 40:
            unit = 'GB'
        elif abs_num < 2.0 ** 50:
            unit = 'TB'
        elif abs_num < 2.0 ** 60:
            unit = 'PB'
        elif abs_num < 2.0 ** 70:
            unit = 'EB'
        elif abs_num < 2.0 ** 80:
            unit = 'ZB'
        else:
            unit = 'YB'
    if unit.lower().startswith('b'):
        num_unit = num
    elif unit.lower().startswith('k'):
        num_unit =  num / (2.0 ** 10)
    elif unit.lower().startswith('m'):
        num_unit =  num / (2.0 ** 20)
    elif unit.lower().startswith('g'):
        num_unit = num / (2.0 ** 30)
    elif unit.lower().startswith('t'):
        num_unit = num / (2.0 ** 40)
    elif unit.lower().startswith('p'):
        num_unit = num / (2.0 ** 50)
    elif unit.lower().startswith('e'):
        num_unit = num / (2.0 ** 60)
    elif unit.lower().startswith('z'):
        num_unit = num / (2.0 ** 70)
    elif unit.lower().startswith('y'):
        num_unit = num / (2.0 ** 80)
    else:
        raise ValueError('unknown num={!r} unit={!r}'.format(num, unit))
    return ub.repr2(num_unit, precision=precision) + ' ' + unit




[docs]
def set_overlaps(set1, set2, s1='s1', s2='s2'):
    """
    Return sizes about set overlaps

    Args:
        set1 (Iterable):
        set2 (Iterable):
        s1 (str): name for set1
        s2 (str): name for set2

    Returns:
        Dict[str, int]: sizes of sets intersections unions and differences

    Notes:
        This function needs a rename. Possible candidates brainstorm:
            * set_analysis
            * set_binary_analysis
            * set_binary_describe
            * set_relationships
            * describe_sets
            * describe_relations
            * describe_set_relations
            * sets_summary
            * sumarize_sets
            * sumerset
    """
    set1 = set(set1)
    set2 = set(set2)
    overlaps = ub.odict([
        (s1, len(set1)),
        (s2, len(set2)),
        ('isect', len(set1.intersection(set2))),
        ('union', len(set1.union(set2))),
        ('%s - %s' % (s1, s2), len(set1.difference(set2))),
        ('%s - %s' % (s2, s1), len(set2.difference(set1))),
    ])
    return overlaps




[docs]
def nested_type(obj, unions=False):
    """
    Compute the :module:`typing` compatible annotation type.

    Args:
        obj (Any): a typing template based on a specific object
        unions (bool): if True use unions, otherwise use Any

    Returns:
        str: type code (might change to return actual type)

    Example:
        >>> obj = {'a': [1, 2], 'b': [3, 4, 5]}
        >>> print(nested_type(obj))
        Dict[str, List[int]]

        >>> import numpy as np
        >>> obj = {'b': {'a': 1.0, 'b': 'foo', 'c': np.array([1, 2])}}
        >>> print(nested_type(obj, unions=True))
        Dict[str, Dict[str, float | ndarray | str]]
    """
    def _resolve(_types):
        if len(_types) == 1:
            return ub.peek(_types)
        else:
            if unions:
                return ' | '.join(sorted(_types))
            else:
                return 'Any'

    from functools  import partial
    _nested = partial(nested_type, unions=unions)
    if isinstance(obj, dict):
        keytypes = {_nested(k) for k in obj.keys()}
        valtypes = {_nested(v) for v in obj.values()}
        keytype = _resolve(keytypes)
        valtype = _resolve(valtypes)
        objtype = 'Dict[{}, {}]'.format(keytype, valtype)
    elif isinstance(obj, list):
        itemtypes = {_nested(item) for item in obj}
        itemtype = _resolve(itemtypes)
        objtype = 'List[{}]'.format(itemtype)
    elif isinstance(obj, set):
        itemtypes = [_nested(item) for item in obj]
        itemtype = _resolve(itemtypes)
        objtype = 'Set[{}]'.format(itemtype)
    elif isinstance(obj, tuple):
        itemtypes = [_nested(item) for item in obj]
        objtype = 'Tuple[{}]'.format(', '.join(itemtypes))
    else:
        import typing
        objtype = type(obj).__name__
        if hasattr(typing, '_normalize_alias'):
            objtype = typing._normalize_alias.get(objtype, objtype)
        else:
            objtype = {'list': 'List',
                       'tuple': 'Tuple',
                       'dict': 'Dict',
                       'set': 'Set',
                       'frozenset': 'FrozenSet',
                       'deque': 'Deque',
                       'defaultdict': 'DefaultDict',
                       'type': 'Type',
                       'Set': 'AbstractSet'}.get(objtype, objtype)
        return objtype
    return objtype




[docs]
def difftext(text1, text2, context_lines=0, ignore_whitespace=False,
             colored=False):
    r"""
    Uses difflib to return a difference string between two similar texts

    Args:
        text1 (str): old text
        text2 (str): new text
        context_lines (int): number of lines of unchanged context
        ignore_whitespace (bool):
        colored (bool): if true highlight the diff

    Returns:
        str: formatted difference text message

    References:
        http://www.java2s.com/Code/Python/Utility/IntelligentdiffbetweentextfilesTimPeters.htm

    Example:
        >>> # build test data
        >>> text1 = 'one\ntwo\nthree'
        >>> text2 = 'one\ntwo\nfive'
        >>> # execute function
        >>> result = difftext(text1, text2)
        >>> # verify results
        >>> print(result)
        - three
        + five

    Example:
        >>> # build test data
        >>> text1 = 'one\ntwo\nthree\n3.1\n3.14\n3.1415\npi\n3.4\n3.5\n4'
        >>> text2 = 'one\ntwo\nfive\n3.1\n3.14\n3.1415\npi\n3.4\n4'
        >>> # execute function
        >>> context_lines = 1
        >>> result = difftext(text1, text2, context_lines, colored=True)
        >>> # verify results
        >>> print(result)
    """
    import ubelt as ub
    import difflib
    text1 = ub.ensure_unicode(text1)
    text2 = ub.ensure_unicode(text2)
    text1_lines = text1.splitlines()
    text2_lines = text2.splitlines()
    if ignore_whitespace:
        text1_lines = [t.rstrip() for t in text1_lines]
        text2_lines = [t.rstrip() for t in text2_lines]
        ndiff_kw = dict(linejunk=difflib.IS_LINE_JUNK,
                        charjunk=difflib.IS_CHARACTER_JUNK)
    else:
        ndiff_kw = {}
    all_diff_lines = list(difflib.ndiff(text1_lines, text2_lines, **ndiff_kw))

    if context_lines is None:
        diff_lines = all_diff_lines
    else:
        # boolean for every line if it is marked or not
        ismarked_list = [len(line) > 0 and line[0] in '+-?'
                         for line in all_diff_lines]
        # flag lines that are within context_lines away from a diff line
        isvalid_list = ismarked_list[:]
        for i in range(1, context_lines + 1):
            isvalid_list[:-i] = list(map(any, zip(
                isvalid_list[:-i], ismarked_list[i:])))
            isvalid_list[i:] = list(map(any, zip(
                isvalid_list[i:], ismarked_list[:-i])))

        USE_BREAK_LINE = True
        if USE_BREAK_LINE:
            # insert a visual break when there is a break in context
            diff_lines = []
            prev = False
            visual_break = '\n <... FILTERED CONTEXT ...> \n'
            #print(isvalid_list)
            for line, valid in zip(all_diff_lines, isvalid_list):
                if valid:
                    diff_lines.append(line)
                elif prev:
                    if False:
                        diff_lines.append(visual_break)
                prev = valid
        else:
            diff_lines = list(ub.compress(all_diff_lines, isvalid_list))
    text = '\n'.join(diff_lines)
    if colored:
        text = ub.highlight_code(text, lexer_name='diff')
    return text




[docs]
def tree_repr(cwd=None, max_files=100, dirblocklist=None, show_nfiles='auto',
              return_text=False, return_tree=False, pathstyle='name',
              max_depth=None, with_type=False, abs_root_label=True,
              ignore_dotprefix=True, colors=not ub.NO_COLOR):
    """
    Filesystem tree representation

    Like the unix util tree, but allow writing numbers of files per directory
    when given -d option

    Args:
        cwd (None | str | PathLike) : directory to print
        max_files (int | None) : maximum files to print before supressing a directory
        pathstyle (str): can be rel, name, or abs
        return_tree (bool): if True return the tree
        return_text (bool): if True return the text
        maxdepth (int | None): maximum depth to descend
        abs_root_label (bool): if True force the root to always be absolute
        colors (bool): if True use rich

    SeeAlso:
        xdev.tree - generator

    Ignore:
        >>> import xdev
        >>> import ubelt as ub
        >>> dpath = ub.Path.appdir('xdev/tests/test_tree_repr').delete().ensuredir()
        >>> outside_fpath = ((dpath / 'outside_path').ensuredir() / 'file').touch()
        >>> outside_dpath2 = ((dpath / 'outside_path/path2')).ensuredir()
        >>> outside_dpath1 = ((dpath / 'outside_path/path')).ensuredir()
        >>> (outside_dpath1 / 'file1').write_text('foo')
        >>> (outside_dpath1 / 'subdir1').ensuredir()
        >>> (outside_dpath1 / 'subdir1/file2').write_text('bar')
        >>> cwd = (dpath / 'root').ensuredir()
        >>> (cwd / 'dir1').ensuredir()
        >>> (cwd / 'dir2').ensuredir()
        >>> (cwd / 'dir3').ensuredir()
        >>> ub.symlink(link_path=(cwd / 'dir1/file_link'), real_path=outside_fpath, verbose=3)
        >>> ub.symlink(link_path=(cwd / 'dir1/dir_link1'), real_path=outside_dpath1, verbose=3)
        >>> ub.symlink(link_path=(cwd / 'dir1/dir_link2'), real_path=outside_dpath2, verbose=3)
        >>> ub.symlink(link_path=(cwd / 'dir1/broken_link'), real_path=outside_dpath1 / 'does-not-exist', verbose=3)
        >>> (cwd / 'dir1/subdir1').ensuredir()
        >>> (cwd / 'dir1/subdir2').ensuredir()
        >>> (cwd / 'dir2/subdir1').ensuredir()
        >>> (cwd / 'dir2/subdir2').ensuredir()
        >>> (cwd / 'dir1/subdir1/file1').touch()
        >>> (cwd / 'dir1/subdir1/file2').touch()
        >>> (cwd / 'dir1/subdir1/file3').touch()
        >>> (cwd / 'dir1/subdir2/file4').touch()
        >>> print('---------')
        >>> xdev.tree_repr(cwd, show_nfiles=True, with_type=True)
        >>> print('---------')
        >>> xdev.tree_repr(cwd, max_files=1)
        >>> print('---------')

        if 1:
            _ = ub.cmd('tree ' + cwd, verbose=3)
    """
    import os
    from xdev.patterns import MultiPattern

    if cwd is None:
        cwd = os.getcwd()

    from xdev.cli import dirstats

    if dirblocklist is not None:
        dirblocklist = MultiPattern.coerce(dirblocklist, hint='glob')

    if ignore_dotprefix:
        if dirblocklist is None:
            dirblocklist = MultiPattern.coerce('.*', hint='glob')
        else:
            dirblocklist = MultiPattern.coerce([dirblocklist, '.*'], hint='glob')

    walker = dirstats.DirectoryWalker(
        cwd,
        block_dnames=dirblocklist,
        max_files=max_files,
        abs_root_label=abs_root_label,
        pathstyle=pathstyle,
        show_nfiles=show_nfiles,
        show_progress=False,
        show_types=with_type,
        colors=colors,
    )
    walker._walk()
    walker._update_labels()
    tree = walker.graph

    from xdev.util_networkx import write_network_text
    import io
    file = io.StringIO()
    write_network_text(tree, file)
    text = file.getvalue()

    info = {}

    if return_text:
        info['text'] = text
    else:
        if colors:
            from rich import print as rprint
            rprint(text)
        else:
            print(text)

    if return_tree:
        info['tree'] = tree
        info['walker'] = walker
    return info




[docs]
def textfind(text, pattern):
    """
    Return a colored text that highlights the pattern
    """
    import re
    pat = re.compile('(' + pattern + ')')
    parts = pat.split(text)
    new_text = ''.join([p if idx % 2 == 0 else ub.color_text(p, 'red') for idx, p in enumerate(parts)])
    print(new_text)