Source code for xdev.misc

# -*- coding: utf-8 -*-
from __future__ import absolute_import, division, print_function, unicode_literals
import ubelt as ub


[docs] def quantum_random(pure=False): """ Returns a quantum random number as a 32 bit unsigned integer. Does this by making a network request to the ANU Quantum Random Number Generator web service, so an internet connection is required. Args: pure (bool): if False, mixes this data with pseudorandom data for security. Otherwise returns the raw quantum numbers that were sent over the web (i.e. subject to MitM attacks). Requirements: quantumrandom >= 1.9.0 Returns: numpy.uint32: the random number """ import numpy as np import os import quantumrandom # Data was sent over a network qr_data16 = quantumrandom.uint16(array_length=2) nbytes = qr_data16.size * qr_data16.dtype.itemsize if pure: data16 = qr_data16 else: # Cryptographically generated buf = memoryview(os.urandom(nbytes)) pr_data16 = np.frombuffer(buf, dtype=qr_data16.dtype) # xor to mix data data16 = (pr_data16 ^ qr_data16) assert data16.flags['C_CONTIGUOUS'] data32 = data16.view(np.dtype('uint32'))[0] return data32
[docs] def byte_str(num, unit='auto', precision=2): """ Automatically chooses relevant unit (KB, MB, or GB) for displaying some number of bytes. Args: num (int): number of bytes unit (str): which unit to use, can be auto, B, KB, MB, GB, TB, PB, EB, ZB, or YB. precision (int): number of decimals of precision References: https://en.wikipedia.org/wiki/Orders_of_magnitude_(data) Returns: str: string representing the number of bytes with appropriate units Example: >>> num_list = [1, 100, 1024, 1048576, 1073741824, 1099511627776] >>> result = ub.repr2(list(map(byte_str, num_list)), nl=0) >>> print(result) ['0.00 KB', '0.10 KB', '1.00 KB', '1.00 MB', '1.00 GB', '1.00 TB'] """ abs_num = abs(num) if unit == 'auto': if abs_num < 2.0 ** 10: unit = 'KB' elif abs_num < 2.0 ** 20: unit = 'KB' elif abs_num < 2.0 ** 30: unit = 'MB' elif abs_num < 2.0 ** 40: unit = 'GB' elif abs_num < 2.0 ** 50: unit = 'TB' elif abs_num < 2.0 ** 60: unit = 'PB' elif abs_num < 2.0 ** 70: unit = 'EB' elif abs_num < 2.0 ** 80: unit = 'ZB' else: unit = 'YB' if unit.lower().startswith('b'): num_unit = num elif unit.lower().startswith('k'): num_unit = num / (2.0 ** 10) elif unit.lower().startswith('m'): num_unit = num / (2.0 ** 20) elif unit.lower().startswith('g'): num_unit = num / (2.0 ** 30) elif unit.lower().startswith('t'): num_unit = num / (2.0 ** 40) elif unit.lower().startswith('p'): num_unit = num / (2.0 ** 50) elif unit.lower().startswith('e'): num_unit = num / (2.0 ** 60) elif unit.lower().startswith('z'): num_unit = num / (2.0 ** 70) elif unit.lower().startswith('y'): num_unit = num / (2.0 ** 80) else: raise ValueError('unknown num={!r} unit={!r}'.format(num, unit)) return ub.repr2(num_unit, precision=precision) + ' ' + unit
[docs] def set_overlaps(set1, set2, s1='s1', s2='s2'): """ Return sizes about set overlaps Args: set1 (Iterable): set2 (Iterable): s1 (str): name for set1 s2 (str): name for set2 Returns: Dict[str, int]: sizes of sets intersections unions and differences Notes: This function needs a rename. Possible candidates brainstorm: * set_analysis * set_binary_analysis * set_binary_describe * set_relationships * describe_sets * describe_relations * describe_set_relations * sets_summary * sumarize_sets * sumerset """ set1 = set(set1) set2 = set(set2) overlaps = ub.odict([ (s1, len(set1)), (s2, len(set2)), ('isect', len(set1.intersection(set2))), ('union', len(set1.union(set2))), ('%s - %s' % (s1, s2), len(set1.difference(set2))), ('%s - %s' % (s2, s1), len(set2.difference(set1))), ]) return overlaps
[docs] def nested_type(obj, unions=False): """ Compute the :module:`typing` compatible annotation type. Args: obj (Any): a typing template based on a specific object unions (bool): if True use unions, otherwise use Any Returns: str: type code (might change to return actual type) Example: >>> obj = {'a': [1, 2], 'b': [3, 4, 5]} >>> print(nested_type(obj)) Dict[str, List[int]] >>> import numpy as np >>> obj = {'b': {'a': 1.0, 'b': 'foo', 'c': np.array([1, 2])}} >>> print(nested_type(obj, unions=True)) Dict[str, Dict[str, float | ndarray | str]] """ def _resolve(_types): if len(_types) == 1: return ub.peek(_types) else: if unions: return ' | '.join(sorted(_types)) else: return 'Any' from functools import partial _nested = partial(nested_type, unions=unions) if isinstance(obj, dict): keytypes = {_nested(k) for k in obj.keys()} valtypes = {_nested(v) for v in obj.values()} keytype = _resolve(keytypes) valtype = _resolve(valtypes) objtype = 'Dict[{}, {}]'.format(keytype, valtype) elif isinstance(obj, list): itemtypes = {_nested(item) for item in obj} itemtype = _resolve(itemtypes) objtype = 'List[{}]'.format(itemtype) elif isinstance(obj, set): itemtypes = [_nested(item) for item in obj] itemtype = _resolve(itemtypes) objtype = 'Set[{}]'.format(itemtype) elif isinstance(obj, tuple): itemtypes = [_nested(item) for item in obj] objtype = 'Tuple[{}]'.format(', '.join(itemtypes)) else: import typing objtype = type(obj).__name__ if hasattr(typing, '_normalize_alias'): objtype = typing._normalize_alias.get(objtype, objtype) else: objtype = {'list': 'List', 'tuple': 'Tuple', 'dict': 'Dict', 'set': 'Set', 'frozenset': 'FrozenSet', 'deque': 'Deque', 'defaultdict': 'DefaultDict', 'type': 'Type', 'Set': 'AbstractSet'}.get(objtype, objtype) return objtype return objtype
[docs] def difftext(text1, text2, context_lines=0, ignore_whitespace=False, colored=False): r""" Uses difflib to return a difference string between two similar texts Args: text1 (str): old text text2 (str): new text context_lines (int): number of lines of unchanged context ignore_whitespace (bool): colored (bool): if true highlight the diff Returns: str: formatted difference text message References: http://www.java2s.com/Code/Python/Utility/IntelligentdiffbetweentextfilesTimPeters.htm Example: >>> # build test data >>> text1 = 'one\ntwo\nthree' >>> text2 = 'one\ntwo\nfive' >>> # execute function >>> result = difftext(text1, text2) >>> # verify results >>> print(result) - three + five Example: >>> # build test data >>> text1 = 'one\ntwo\nthree\n3.1\n3.14\n3.1415\npi\n3.4\n3.5\n4' >>> text2 = 'one\ntwo\nfive\n3.1\n3.14\n3.1415\npi\n3.4\n4' >>> # execute function >>> context_lines = 1 >>> result = difftext(text1, text2, context_lines, colored=True) >>> # verify results >>> print(result) """ import ubelt as ub import difflib text1 = ub.ensure_unicode(text1) text2 = ub.ensure_unicode(text2) text1_lines = text1.splitlines() text2_lines = text2.splitlines() if ignore_whitespace: text1_lines = [t.rstrip() for t in text1_lines] text2_lines = [t.rstrip() for t in text2_lines] ndiff_kw = dict(linejunk=difflib.IS_LINE_JUNK, charjunk=difflib.IS_CHARACTER_JUNK) else: ndiff_kw = {} all_diff_lines = list(difflib.ndiff(text1_lines, text2_lines, **ndiff_kw)) if context_lines is None: diff_lines = all_diff_lines else: # boolean for every line if it is marked or not ismarked_list = [len(line) > 0 and line[0] in '+-?' for line in all_diff_lines] # flag lines that are within context_lines away from a diff line isvalid_list = ismarked_list[:] for i in range(1, context_lines + 1): isvalid_list[:-i] = list(map(any, zip( isvalid_list[:-i], ismarked_list[i:]))) isvalid_list[i:] = list(map(any, zip( isvalid_list[i:], ismarked_list[:-i]))) USE_BREAK_LINE = True if USE_BREAK_LINE: # insert a visual break when there is a break in context diff_lines = [] prev = False visual_break = '\n <... FILTERED CONTEXT ...> \n' #print(isvalid_list) for line, valid in zip(all_diff_lines, isvalid_list): if valid: diff_lines.append(line) elif prev: if False: diff_lines.append(visual_break) prev = valid else: diff_lines = list(ub.compress(all_diff_lines, isvalid_list)) text = '\n'.join(diff_lines) if colored: text = ub.highlight_code(text, lexer_name='diff') return text
[docs] def tree_repr(cwd=None, max_files=100, dirblocklist=None, show_nfiles='auto', return_text=False, return_tree=False, pathstyle='name', max_depth=None, with_type=False, abs_root_label=True, ignore_dotprefix=True, colors=not ub.NO_COLOR): """ Filesystem tree representation Like the unix util tree, but allow writing numbers of files per directory when given -d option Args: cwd (None | str | PathLike) : directory to print max_files (int | None) : maximum files to print before supressing a directory pathstyle (str): can be rel, name, or abs return_tree (bool): if True return the tree return_text (bool): if True return the text maxdepth (int | None): maximum depth to descend abs_root_label (bool): if True force the root to always be absolute colors (bool): if True use rich SeeAlso: xdev.tree - generator Ignore: >>> import xdev >>> import ubelt as ub >>> dpath = ub.Path.appdir('xdev/tests/test_tree_repr').delete().ensuredir() >>> outside_fpath = ((dpath / 'outside_path').ensuredir() / 'file').touch() >>> outside_dpath2 = ((dpath / 'outside_path/path2')).ensuredir() >>> outside_dpath1 = ((dpath / 'outside_path/path')).ensuredir() >>> (outside_dpath1 / 'file1').write_text('foo') >>> (outside_dpath1 / 'subdir1').ensuredir() >>> (outside_dpath1 / 'subdir1/file2').write_text('bar') >>> cwd = (dpath / 'root').ensuredir() >>> (cwd / 'dir1').ensuredir() >>> (cwd / 'dir2').ensuredir() >>> (cwd / 'dir3').ensuredir() >>> ub.symlink(link_path=(cwd / 'dir1/file_link'), real_path=outside_fpath, verbose=3) >>> ub.symlink(link_path=(cwd / 'dir1/dir_link1'), real_path=outside_dpath1, verbose=3) >>> ub.symlink(link_path=(cwd / 'dir1/dir_link2'), real_path=outside_dpath2, verbose=3) >>> ub.symlink(link_path=(cwd / 'dir1/broken_link'), real_path=outside_dpath1 / 'does-not-exist', verbose=3) >>> (cwd / 'dir1/subdir1').ensuredir() >>> (cwd / 'dir1/subdir2').ensuredir() >>> (cwd / 'dir2/subdir1').ensuredir() >>> (cwd / 'dir2/subdir2').ensuredir() >>> (cwd / 'dir1/subdir1/file1').touch() >>> (cwd / 'dir1/subdir1/file2').touch() >>> (cwd / 'dir1/subdir1/file3').touch() >>> (cwd / 'dir1/subdir2/file4').touch() >>> print('---------') >>> xdev.tree_repr(cwd, show_nfiles=True, with_type=True) >>> print('---------') >>> xdev.tree_repr(cwd, max_files=1) >>> print('---------') if 1: _ = ub.cmd('tree ' + cwd, verbose=3) """ import os from xdev.patterns import MultiPattern if cwd is None: cwd = os.getcwd() from xdev.cli import dirstats if dirblocklist is not None: dirblocklist = MultiPattern.coerce(dirblocklist, hint='glob') if ignore_dotprefix: if dirblocklist is None: dirblocklist = MultiPattern.coerce('.*', hint='glob') else: dirblocklist = MultiPattern.coerce([dirblocklist, '.*'], hint='glob') walker = dirstats.DirectoryWalker( cwd, block_dnames=dirblocklist, max_files=max_files, abs_root_label=abs_root_label, pathstyle=pathstyle, show_nfiles=show_nfiles, show_progress=False, show_types=with_type, colors=colors, ) walker._walk() walker._update_labels() tree = walker.graph from xdev.util_networkx import write_network_text import io file = io.StringIO() write_network_text(tree, file) text = file.getvalue() info = {} if return_text: info['text'] = text else: if colors: from rich import print as rprint rprint(text) else: print(text) if return_tree: info['tree'] = tree info['walker'] = walker return info
[docs] def textfind(text, pattern): """ Return a colored text that highlights the pattern """ import re pat = re.compile('(' + pattern + ')') parts = pat.split(text) new_text = ''.join([p if idx % 2 == 0 else ub.color_text(p, 'red') for idx, p in enumerate(parts)]) print(new_text)