from __future__ import absolute_import, division, print_function, unicode_literals
import numpy as np
import decimal
from collections import defaultdict
import ubelt as ub
[docs]
def edit_distance(string1, string2):
"""
Edit distance algorithm. String1 and string2 can be either
strings or lists of strings
Args:
string1 (str | List[str]):
string2 (str | List[str]):
Requirements:
pip install python-Levenshtein
Returns:
float | List[float] | List[List[float]]
Example:
>>> # xdoctest: +REQUIRES(module:Levenshtein)
>>> string1 = 'hello world'
>>> string2 = ['goodbye world', 'rofl', 'hello', 'world', 'lowo']
>>> edit_distance(['hello', 'one'], ['goodbye', 'two'])
>>> edit_distance('hello', ['goodbye', 'two'])
>>> edit_distance(['hello', 'one'], 'goodbye')
>>> edit_distance('hello', 'goodbye')
>>> distmat = edit_distance(string1, string2)
>>> result = ('distmat = %s' % (ub.repr2(distmat),))
>>> print(result)
>>> [7, 9, 6, 6, 7]
"""
import Levenshtein
isiter1 = ub.iterable(string1)
isiter2 = ub.iterable(string2)
strs1 = string1 if isiter1 else [string1]
strs2 = string2 if isiter2 else [string2]
distmat = [
[Levenshtein.distance(str1, str2) for str2 in strs2]
for str1 in strs1
]
# broadcast
if not isiter2:
distmat = [row[0] for row in distmat]
if not isiter1:
distmat = distmat[0]
return distmat
[docs]
def knapsack(items, maxweight, method='iterative'):
r"""
Solve the knapsack problem by finding the most valuable subsequence of
`items` subject that weighs no more than `maxweight`.
Args:
items (tuple): is a sequence of tuples `(value, weight, id_)`, where
`value` is a number and `weight` is a non-negative integer, and
`id_` is an item identifier.
maxweight (numbers.Real):
a non-negative number indicating the maximum weight of items we can
take.
Returns:
tuple: (total_value, items_subset) - a pair whose first element is the
sum of values in the most valuable subsequence, and whose second
element is the subsequence. Subset may be different depending on
implementation (ie top-odwn recusrive vs bottom-up iterative)
References:
http://codereview.stackexchange.com/questions/20569/dynamic-programming-solution-to-knapsack-problem
http://stackoverflow.com/questions/141779/solving-the-np-complete-problem-in-xkcd
http://www.es.ele.tue.nl/education/5MC10/Solutions/knapsack.pdf
Example:
>>> # ENABLE_DOCTEST
>>> import ubelt as ub
>>> items = [(4, 12, 0), (2, 1, 1), (6, 4, 2), (1, 1, 3), (2, 2, 4)]
>>> maxweight = 15
>>> total_value1, items_subset1 = knapsack(items, maxweight, method='iterative')
>>> print('items_subset1 = {}'.format(ub.repr2(items_subset1, nl=1)))
>>> print('total_value1 = {}'.format(ub.repr2(total_value1, nl=1)))
Example:
>>> # ENABLE_DOCTEST
>>> # Solve https://xkcd.com/287/
>>> weights = [2.15, 2.75, 3.35, 3.55, 4.2, 5.8] * 2
>>> items = [(w, w, i) for i, w in enumerate(weights)]
>>> maxweight = 15.05
>>> total_value1, items_subset1 = knapsack(items, maxweight, method='iterative')
>>> total_weight = sum([t[1] for t in items_subset1])
>>> print('total_weight = %r' % (total_weight,))
>>> print('items_subset1 = %r' % (items_subset1,))
>>> #assert items_subset1 == items_subset, 'NOT EQ\n%r !=\n%r' % (items_subset1, items_subset)
Timeit:
# >>> import ubelt as ub
# >>> setup = ub.codeblock(
# >>> '''
# import ubelt as ut
# weights = [215, 275, 335, 355, 42, 58] * 40
# items = [(w, w, i) for i, w in enumerate(weights)]
# maxweight = 2505
# #import numba
# #knapsack_numba = numba.autojit(knapsack_iterative)
# #knapsack_numba = numba.autojit(knapsack_iterative_numpy)
# ''')
# >>> # Test load time
# >>> stmt_list1 = ub.codeblock(
# >>> '''
# knapsack_iterative(items, maxweight)
# knapsack_ilp(items, maxweight)
# #knapsack_numba(items, maxweight)
# #knapsack_iterative_numpy(items, maxweight)
# ''').split('\n')
# ut.util_dev.timeit_compare(stmt_list1, setup, int(5))
from xdev.algo import * # NOQA
def knapsack_inputs(n):
rng = np.random
items = [
(rng.randint(0, 10), rng.randint(0, 10), idx) for idx in range(n)]
return {'items': items, 'maxweight': 100}
def input_wrapper(func):
import functools
@functools.wraps(func)
def _wrap(kwargs):
return func(**kwargs)
return _wrap
import perfplot
perfplot.show(
setup=knapsack_inputs,
kernels=list(map(input_wrapper, [knapsack_iterative, knapsack_ilp])),
n_range=[2 ** k for k in range(15)],
xlabel="maxweight",
equality_check=None,
)
"""
if method == 'iterative':
return knapsack_iterative(items, maxweight)
elif method == 'ilp':
return knapsack_ilp(items, maxweight)
else:
raise NotImplementedError('[util_alg] knapsack method=%r' % (method,))
#return knapsack_iterative_numpy(items, maxweight)
[docs]
def knapsack_ilp(items, maxweight, verbose=False):
"""
solves knapsack using an integer linear program
Example:
>>> # DISABLE_DOCTEST
>>> import ubelt as ub
>>> # Solve https://xkcd.com/287/
>>> weights = [2.15, 2.75, 3.35, 3.55, 4.2, 5.8, 6.55]
>>> values = [2.15, 2.75, 3.35, 3.55, 4.2, 5.8, 6.55]
>>> indices = ['mixed fruit', 'french fries', 'side salad',
>>> 'hot wings', 'mozzarella sticks', 'sampler plate',
>>> 'barbecue']
>>> items = [(v, w, i) for v, w, i in zip(values, weights, indices)]
>>> #items += [(3.95, 3.95, 'mystery plate')]
>>> maxweight = 15.05
>>> verbose = True
>>> total_value, items_subset = knapsack_ilp(items, maxweight, verbose)
>>> print('items_subset = %s' % (ub.repr2(items_subset, nl=1),))
"""
import pulp
# Given Input
values = [t[0] for t in items]
weights = [t[1] for t in items]
indices = [t[2] for t in items]
# Formulate integer program
prob = pulp.LpProblem("Knapsack", pulp.LpMaximize)
# Solution variables
x = pulp.LpVariable.dicts(name='x', indexs=indices,
lowBound=0, upBound=1, cat=pulp.LpInteger)
# maximize objective function
prob.objective = sum(v * x[i] for v, i in zip(values, indices))
# subject to
prob.add(sum(w * x[i] for w, i in zip(weights, indices)) <= maxweight)
# Solve using with solver like CPLEX, GLPK, or SCIP.
#pulp.CPLEX().solve(prob)
pulp.PULP_CBC_CMD().solve(prob)
# Read solution
flags = [x[i].varValue for i in indices]
total_value = sum([val for val, flag in zip(values, flags) if flag])
items_subset = [item for item, flag in zip(items, flags) if flag]
# Print summary
if verbose:
print(prob)
print('OPT:')
print('\n'.join([' %s = %s' % (x[i].name, x[i].varValue) for i in indices]))
print('total_value = %r' % (total_value,))
return total_value, items_subset
[docs]
def number_of_decimals(num):
r"""
Args:
num (float):
References:
stackoverflow.com/questions/6189956/finding-decimal-places
Example:
>>> # ENABLE_DOCTEST
>>> num = 15.05
>>> result = number_of_decimals(num)
>>> print(result)
2
"""
exp = decimal.Decimal(str(num)).as_tuple().exponent
return max(0, -exp)
[docs]
def knapsack_iterative(items, maxweight):
# Knapsack requires integral weights
weights = [t[1] for t in items]
max_exp = max([number_of_decimals(w_) for w_ in weights])
coeff = 10 ** max_exp
# Adjust weights to be integral
int_maxweight = int(maxweight * coeff)
int_items = [(v, int(w * coeff), idx) for v, w, idx in items]
"""
items = int_items
maxweight = int_maxweight
"""
return knapsack_iterative_int(int_items, int_maxweight)
[docs]
def knapsack_iterative_int(items, maxweight):
r"""
Iterative knapsack method
Math:
maximize \sum_{i \in T} v_i
subject to \sum_{i \in T} w_i \leq W
Notes:
dpmat is the dynamic programming memoization matrix.
dpmat[i, w] is the total value of the items with weight at most W
T is idx_subset, the set of indicies in the optimal solution
Example:
>>> # ENABLE_DOCTEST
>>> weights = [1, 3, 3, 5, 2, 1] * 2
>>> items = [(w, w, i) for i, w in enumerate(weights)]
>>> maxweight = 10
>>> items = [(.8, 700, 0)]
>>> maxweight = 2000
>>> print('maxweight = %r' % (maxweight,))
>>> print('items = %r' % (items,))
>>> total_value, items_subset = knapsack_iterative_int(items, maxweight)
>>> total_weight = sum([t[1] for t in items_subset])
>>> print('total_weight = %r' % (total_weight,))
>>> print('items_subset = %r' % (items_subset,))
>>> result = 'total_value = %.2f' % (total_value,)
>>> print(result)
total_value = 0.80
Ignore:
DPMAT = [[dpmat[r][c] for c in range(maxweight)] for r in range(len(items))]
KMAT = [[kmat[r][c] for c in range(maxweight)] for r in range(len(items))]
"""
values = [t[0] for t in items]
weights = [t[1] for t in items]
maxsize = maxweight + 1
# Sparse representation seems better
dpmat = defaultdict(lambda: defaultdict(lambda: np.inf))
kmat = defaultdict(lambda: defaultdict(lambda: False))
idx_subset = [] # NOQA
for w in range(maxsize):
dpmat[0][w] = 0
# For each item consider to include it or not
for idx in range(len(items)):
item_val = values[idx]
item_weight = weights[idx]
# consider at each possible bag size
for w in range(maxsize):
valid_item = item_weight <= w
if idx > 0:
prev_val = dpmat[idx - 1][w]
prev_noitem_val = dpmat[idx - 1][w - item_weight]
else:
prev_val = 0
prev_noitem_val = 0
withitem_val = item_val + prev_noitem_val
more_valuable = withitem_val > prev_val
if valid_item and more_valuable:
dpmat[idx][w] = withitem_val
kmat[idx][w] = True
else:
dpmat[idx][w] = prev_val
kmat[idx][w] = False
# Trace backwards to get the items used in the solution
K = maxweight
for idx in reversed(range(len(items))):
if kmat[idx][K]:
idx_subset.append(idx)
K = K - weights[idx]
idx_subset = sorted(idx_subset)
items_subset = [items[i] for i in idx_subset]
total_value = dpmat[len(items) - 1][maxweight]
return total_value, items_subset
[docs]
def knapsack_iterative_numpy(items, maxweight):
r"""
Iterative knapsack method
maximize \sum_{i \in T} v_i
subject to \sum_{i \in T} w_i \leq W
Notes:
dpmat is the dynamic programming memoization matrix.
dpmat[i, w] is the total value of the items with weight at most W
T is the set of indicies in the optimal solution
"""
#import numpy as np
items = np.array(items)
weights = items.T[1]
# Find maximum decimal place (this problem is in NP)
max_exp = max([number_of_decimals(w_) for w_ in weights])
coeff = 10 ** max_exp
# Adjust weights to be integral
weights = (weights * coeff).astype(np.int)
values = items.T[0]
MAXWEIGHT = int(maxweight * coeff)
W_SIZE = MAXWEIGHT + 1
dpmat = np.full((len(items), W_SIZE), np.inf)
kmat = np.full((len(items), W_SIZE), 0, dtype=np.bool)
idx_subset = []
for w in range(W_SIZE):
dpmat[0][w] = 0
for idx in range(1, len(items)):
item_val = values[idx]
item_weight = weights[idx]
for w in range(W_SIZE):
valid_item = item_weight <= w
prev_val = dpmat[idx - 1][w]
if valid_item:
prev_noitem_val = dpmat[idx - 1][w - item_weight]
withitem_val = item_val + prev_noitem_val
more_valuable = withitem_val > prev_val
else:
more_valuable = False
dpmat[idx][w] = withitem_val if more_valuable else prev_val
kmat[idx][w] = more_valuable
K = MAXWEIGHT
for idx in reversed(range(1, len(items))):
if kmat[idx, K]:
idx_subset.append(idx)
K = K - weights[idx]
idx_subset = sorted(idx_subset)
items_subset = [items[i] for i in idx_subset]
total_value = dpmat[len(items) - 1][MAXWEIGHT]
return total_value, items_subset
#def knapsack_all_solns(items, maxweight):
# """
# TODO: return all optimal solutions to the knapsack problem
# References:
# stackoverflow.com/questions/30554290/all-solutions-from-knapsack-dp-matrix
# >>> items = [(1, 2, 0), (1, 3, 1), (1, 4, 2), (1, 3, 3), (1, 3, 4), (1, 5, 5), (1, 4, 6), (1, 1, 7), (1, 1, 8), (1, 3, 9)]
# >>> weights = ut.get_list_column(items, 1)
# >>> maxweight = 6
# """
[docs]
def knapsack_greedy(items, maxweight):
r"""
non-optimal greedy version of knapsack algorithm
does not sort input. Sort the input by largest value
first if desired.
Args:
`items` (tuple): is a sequence of tuples `(value, weight, id_)`, where `value`
is a scalar and `weight` is a non-negative integer, and `id_` is an
item identifier.
`maxweight` (scalar): is a non-negative integer.
Example:
>>> # ENABLE_DOCTEST
>>> items = [(4, 12, 0), (2, 1, 1), (6, 4, 2), (1, 1, 3), (2, 2, 4)]
>>> maxweight = 15
>>> total_value, items_subset = knapsack_greedy(items, maxweight)
>>> result = 'total_value = %r\n' % (total_value,)
>>> result += 'items_subset = %r' % (items_subset,)
>>> print(result)
total_value = 7
items_subset = [(4, 12, 0), (2, 1, 1), (1, 1, 3)]
"""
items_subset = []
total_weight = 0
total_value = 0
for item in items:
value, weight = item[0:2]
if total_weight + weight > maxweight:
continue
else:
items_subset.append(item)
total_weight += weight
total_value += value
return total_value, items_subset
if __name__ == '__main__':
"""
CommandLine:
python ~/code/xdev/xdev/algo.py all
"""
import xdoctest
xdoctest.doctest_module(__file__)