Source code for utool.util_alg

# -*- coding: utf-8 -*-
# TODO Licence:
#
# TODO:  move library intensive functions to vtool
from __future__ import absolute_import, division, print_function, unicode_literals
import operator as op
import decimal
import six
import itertools
from six.moves import zip, range, reduce, map
from collections import defaultdict
import math
from utool import util_type
from utool import util_list
from utool import util_dict
from utool import util_inject
from utool import util_decor

try:
    import numpy as np

    HAVE_NUMPY = True
except ImportError:
    HAVE_NUMPY = False
    # TODO remove numpy
    pass
try:
    import scipy.spatial.distance as spdist

    HAVE_SCIPY = True
except ImportError:
    HAVE_SCIPY = False
print, rrr, profile = util_inject.inject2(__name__)


# Constants
PHI = 1.61803398875
PHI_A = 1 / PHI
PHI_B = 1 - PHI_A
TAU = 2 * math.pi
# Conversion factors
KM_PER_MILE = 1.609344
MM_PER_INCH = 25.4
FOOT_PER_MILE = 5280


[docs]def find_group_differences(groups1, groups2): r""" Returns a measure of how disimilar two groupings are Args: groups1 (list): true grouping of items groups2 (list): predicted grouping of items CommandLine: python -m utool.util_alg find_group_differences SeeAlso: vtool.group_indicies vtool.apply_grouping Example: >>> # ENABLE_DOCTEST >>> from utool.util_alg import * # NOQA >>> groups1 = [[1, 2, 3], [4], [5, 6], [7, 8], [9, 10, 11]] >>> groups2 = [[1, 2, 11], [3, 4], [5, 6], [7], [8, 9], [10]] >>> total_error = find_group_differences(groups1, groups2) >>> result = ('total_error = %r' % (total_error,)) >>> print(result) total_error = 20 Example: >>> # ENABLE_DOCTEST >>> from utool.util_alg import * # NOQA >>> groups1 = [[1, 2, 3], [4], [5, 6]] >>> groups2 = [[1, 2, 3], [4], [5, 6]] >>> total_error = find_group_differences(groups1, groups2) >>> result = ('total_error = %r' % (total_error,)) >>> print(result) total_error = 0 Example: >>> # ENABLE_DOCTEST >>> from utool.util_alg import * # NOQA >>> groups1 = [[1, 2, 3], [4], [5, 6]] >>> groups2 = [[1, 2], [4], [5, 6]] >>> total_error = find_group_differences(groups1, groups2) >>> result = ('total_error = %r' % (total_error,)) >>> print(result) total_error = 4 Ignore: # Can this be done via sklearn label analysis? # maybe no... the labels assigned to each component are arbitrary # maybe if we label edges? likely too many labels. groups1 = [[1, 2, 3], [4], [5, 6], [7, 8], [9, 10, 11]] groups2 = [[1, 2, 11], [3, 4], [5, 6], [7], [8, 9], [10]] """ import utool as ut # For each group, build mapping from each item to the members the group item_to_others1 = { item: set(_group) - {item} for _group in groups1 for item in _group } item_to_others2 = { item: set(_group) - {item} for _group in groups2 for item in _group } flat_items1 = ut.flatten(groups1) flat_items2 = ut.flatten(groups2) flat_items = list(set(flat_items1 + flat_items2)) errors = [] item_to_error = {} for item in flat_items: # Determine the number of unshared members in each group others1 = item_to_others1.get(item, set([])) others2 = item_to_others2.get(item, set([])) missing1 = others1 - others2 missing2 = others2 - others1 error = len(missing1) + len(missing2) if error > 0: item_to_error[item] = error errors.append(error) total_error = sum(errors) return total_error
[docs]def find_group_consistencies(groups1, groups2): r""" Returns a measure of group consistency Example: >>> # ENABLE_DOCTEST >>> from utool.util_alg import * # NOQA >>> groups1 = [[1, 2, 3], [4], [5, 6]] >>> groups2 = [[1, 2], [4], [5, 6]] >>> common_groups = find_group_consistencies(groups1, groups2) >>> result = ('common_groups = %r' % (common_groups,)) >>> print(result) common_groups = [(5, 6), (4,)] """ group1_list = {tuple(sorted(_group)) for _group in groups1} group2_list = {tuple(sorted(_group)) for _group in groups2} common_groups = list(group1_list.intersection(group2_list)) return common_groups
[docs]def compare_groups(true_groups, pred_groups): r""" Finds how predictions need to be modified to match the true grouping. Notes: pred_merges - the merges needed that would need to be done for the pred_groups to match true_groups. pred_hybrid - the hybrid split/merges needed that would need to be done for the pred_groups to match true_groups. Ignore: >>> # ENABLE_DOCTEST >>> from utool.util_alg import * # NOQA >>> import utool as ut >>> true_groups = [ >>> [20, 21], [22, 23], [1, 2], [12, 13, 14], [4], [5, 6, 3], [7, 8], >>> [9, 10, 11], [31, 32, 33, 34, 35], [41, 42, 43, 44], [45], [50] >>> ] >>> pred_groups = [ >>> [20, 21, 22, 23], [1, 2], [12], [13, 14], [3, 4], [5, 6,11], >>> [7], [8, 9], [10], [31, 32], [33, 34, 35], [41, 42, 43, 44, 45] >>> ] >>> comparisons = ut.compare_groups(true_groups, pred_groups) >>> print(comparisons) >>> result = ut.repr4(comparisons) >>> print(result) { 'common': {{1, 2}}, 'pred_hybrid': {{10}, {3, 4}, {5, 6, 11}, {7}, {8, 9}}, 'pred_merges': [{{12}, {13, 14}}, {{31, 32}, {33, 34, 35}}], 'pred_splits': [{20, 21, 22, 23}, {41, 42, 43, 44, 45}], 'true_hybrid': {{3, 5, 6}, {4}, {50}, {7, 8}, {9, 10, 11}}, 'true_merges': [{12, 13, 14}, {31, 32, 33, 34, 35}], 'true_splits': [{{20, 21}, {22, 23}}, {{41, 42, 43, 44}, {45}}], } """ import utool as ut true = {frozenset(_group) for _group in true_groups} pred = {frozenset(_group) for _group in pred_groups} # Find the groups that are exactly the same common = true.intersection(pred) true_sets = true.difference(common) pred_sets = pred.difference(common) # connected compoment lookups pred_conn = {p: frozenset(ps) for ps in pred for p in ps} true_conn = {t: frozenset(ts) for ts in true for t in ts} # How many predictions can be merged into perfect pieces? # For each true sets, find if it can be made via merging pred sets pred_merges = [] true_merges = [] for ts in true_sets: ccs = set([pred_conn.get(t, frozenset()) for t in ts]) if frozenset.union(*ccs) == ts: # This is a pure merge pred_merges.append(ccs) true_merges.append(ts) # How many predictions can be split into perfect pieces? true_splits = [] pred_splits = [] for ps in pred_sets: ccs = set([true_conn.get(p, frozenset()) for p in ps]) if frozenset.union(*ccs) == ps: # This is a pure merge true_splits.append(ccs) pred_splits.append(ps) pred_merges_flat = ut.flatten(pred_merges) true_splits_flat = ut.flatten(true_splits) pred_hybrid = frozenset(map(frozenset, pred_sets)).difference( set(pred_splits + pred_merges_flat) ) true_hybrid = frozenset(map(frozenset, true_sets)).difference( set(true_merges + true_splits_flat) ) comparisons = { 'common': common, # 'true_splits_flat': true_splits_flat, 'true_splits': true_splits, 'true_merges': true_merges, 'true_hybrid': true_hybrid, 'pred_splits': pred_splits, 'pred_merges': pred_merges, # 'pred_merges_flat': pred_merges_flat, 'pred_hybrid': pred_hybrid, } return comparisons
[docs]def grouping_delta(old, new, pure=True): r""" Finds what happened to the old groups to form the new groups. Args: old (set of frozensets): old grouping new (set of frozensets): new grouping pure (bool): hybrids are separated from pure merges and splits if pure is True, otherwise hybrid cases are grouped in merges and splits. Returns: dict: delta: dictionary of changes containing the merges, splits, unchanged, and hybrid cases. Except for unchanged, case a subdict with new and old keys. For splits / merges, one of these contains nested sequences to indicate what the split / merge is. TODO: incorporate addition / deletion of elements? Notes: merges - which old groups were merged into a single new group. splits - which old groups were split into multiple new groups. hybrid - which old groups had split/merge actions applied. unchanged - which old groups are the same as new groups. Ignore: >>> # ENABLE_DOCTEST >>> from utool.util_alg import * # NOQA >>> import utool as ut >>> old = [ >>> [20, 21, 22, 23], [1, 2], [12], [13, 14], [3, 4], [5, 6,11], >>> [7], [8, 9], [10], [31, 32], [33, 34, 35], [41, 42, 43, 44, 45] >>> ] >>> new = [ >>> [20, 21], [22, 23], [1, 2], [12, 13, 14], [4], [5, 6, 3], [7, 8], >>> [9, 10, 11], [31, 32, 33, 34, 35], [41, 42, 43, 44], [45], >>> ] >>> delta = ut.grouping_delta(old, new) >>> assert set(old[0]) in delta['splits']['old'] >>> assert set(new[3]) in delta['merges']['new'] >>> assert set(old[1]) in delta['unchanged'] >>> result = ut.repr4(delta, nl=2, nobr=True, sk=True) >>> print(result) unchanged: { {1, 2}, }, splits: { old: [{20, 21, 22, 23}, {41, 42, 43, 44, 45}], new: [{{20, 21}, {22, 23}}, {{41, 42, 43, 44}, {45}}], }, merges: { old: [{{12}, {13, 14}}, {{31, 32}, {33, 34, 35}}], new: [{12, 13, 14}, {31, 32, 33, 34, 35}], }, hybrid: { old: {{10}, {3, 4}, {5, 6, 11}, {7}, {8, 9}}, new: {{3, 5, 6}, {4}, {7, 8}, {9, 10, 11}}, splits: [{{7}}, {{11}, {5, 6}}, {{10}}, {{3}, {4}}, {{8}, {9}}], merges: [{{7}, {8}}, {{4}}, {{3}, {5, 6}}, {{10}, {11}, {9}}], }, Ignore: >>> # ENABLE_DOCTEST >>> from utool.util_alg import * # NOQA >>> import utool as ut >>> old = [ >>> [1, 2, 3], [4], [5, 6, 7, 8, 9], [10, 11, 12] >>> ] >>> new = [ >>> [1], [2], [3, 4], [5, 6, 7], [8, 9, 10, 11, 12] >>> ] >>> # every case here is hybrid >>> pure_delta = ut.grouping_delta(old, new, pure=True) >>> assert len(ut.flatten(pure_delta['merges'].values())) == 0 >>> assert len(ut.flatten(pure_delta['splits'].values())) == 0 >>> delta = ut.grouping_delta(old, new, pure=False) >>> delta = ut.order_dict_by(delta, ['unchanged', 'splits', 'merges']) >>> result = ut.repr4(delta, nl=2, sk=True) >>> print(result) { unchanged: {}, splits: [ [{2}, {3}, {1}], [{8, 9}, {5, 6, 7}], ], merges: [ [{4}, {3}], [{8, 9}, {10, 11, 12}], ], } """ import utool as ut _old = {frozenset(_group) for _group in old} _new = {frozenset(_group) for _group in new} _new_items = set(ut.flatten(_new)) _old_items = set(ut.flatten(_old)) assert _new_items == _old_items, 'new and old sets must be the same' # Find the groups that are exactly the same unchanged = _new.intersection(_old) new_sets = _new.difference(unchanged) old_sets = _old.difference(unchanged) # connected compoment lookups old_conn = {p: frozenset(ps) for ps in _old for p in ps} new_conn = {t: frozenset(ts) for ts in _new for t in ts} # How many old sets can be merged into perfect pieces? # For each new sets, find if it can be made via merging old sets old_merges = [] new_merges = [] for ts in new_sets: ccs = set([old_conn.get(t, frozenset()) for t in ts]) if frozenset.union(*ccs) == ts: # This is a pure merge old_merges.append(ccs) new_merges.append(ts) # How many oldictions can be split into perfect pieces? new_splits = [] old_splits = [] for ps in old_sets: ccs = set([new_conn.get(p, frozenset()) for p in ps]) if frozenset.union(*ccs) == ps: # This is a pure merge new_splits.append(ccs) old_splits.append(ps) old_merges_flat = ut.flatten(old_merges) new_splits_flat = ut.flatten(new_splits) old_hybrid = frozenset(map(frozenset, old_sets)).difference( set(old_splits + old_merges_flat) ) new_hybrid = frozenset(map(frozenset, new_sets)).difference( set(new_merges + new_splits_flat) ) breakup_hybrids = True if breakup_hybrids: # First split each hybrid lookup = {a: n for n, items in enumerate(new_hybrid) for a in items} hybrid_splits = [] for items in old_hybrid: nids = ut.take(lookup, items) split_part = list(ut.group_items(items, nids).values()) hybrid_splits.append(set(map(frozenset, split_part))) # And then merge them into new groups hybrid_merge_parts = ut.flatten(hybrid_splits) part_nids = [lookup[next(iter(aids))] for aids in hybrid_merge_parts] hybrid_merges = list( map(set, ut.group_items(hybrid_merge_parts, part_nids).values()) ) if pure: delta = ut.odict() delta['unchanged'] = unchanged delta['splits'] = ut.odict([('old', old_splits), ('new', new_splits)]) delta['merges'] = ut.odict([('old', old_merges), ('new', new_merges)]) delta['hybrid'] = ut.odict( [ ('old', old_hybrid), ('new', new_hybrid), ('splits', hybrid_splits), ('merges', hybrid_merges), ] ) else: # Incorporate hybrid partial cases with pure splits and merges new_splits2 = [s for s in hybrid_splits if len(s) > 1] old_merges2 = [m for m in hybrid_merges if len(m) > 1] all_new_splits = new_splits + new_splits2 all_old_merges = old_merges + old_merges2 # Don't bother differentiating old and new # old_splits2 = [frozenset(ut.flatten(s)) for s in new_splits2] # new_merges2 = [frozenset(ut.flatten(m)) for m in old_merges2] # all_old_splits = old_splits + old_splits2 # all_new_merges = new_merges + new_merges2 splits = all_new_splits merges = all_old_merges # Sort by split and merge sizes splits = ut.sortedby(splits, [len(ut.flatten(_)) for _ in splits]) merges = ut.sortedby(merges, [len(ut.flatten(_)) for _ in merges]) splits = [ut.sortedby(_, ut.emap(len, _)) for _ in splits] merges = [ut.sortedby(_, ut.emap(len, _)) for _ in merges] delta = ut.odict() delta['unchanged'] = unchanged delta['splits'] = splits delta['merges'] = merges return delta
[docs]def grouping_delta_stats(old, new): """ Returns statistics about grouping changes Args: old (set of frozenset): old grouping new (set of frozenset): new grouping Returns: pd.DataFrame: df: data frame of size statistics Example: >>> # ENABLE_DOCTEST >>> from utool.util_alg import * # NOQA >>> import utool as ut >>> old = [ >>> [20, 21, 22, 23], [1, 2], [12], [13, 14], [3, 4], [5, 6,11], >>> [7], [8, 9], [10], [31, 32], [33, 34, 35], [41, 42, 43, 44, 45] >>> ] >>> new = [ >>> [20, 21], [22, 23], [1, 2], [12, 13, 14], [4], [5, 6, 3], [7, 8], >>> [9, 10, 11], [31, 32, 33, 34, 35], [41, 42, 43, 44], [45], >>> ] >>> df = ut.grouping_delta_stats(old, new) >>> print(df) """ import pandas as pd import utool as ut group_delta = ut.grouping_delta(old, new) stats = ut.odict() unchanged = group_delta['unchanged'] splits = group_delta['splits'] merges = group_delta['merges'] hybrid = group_delta['hybrid'] statsmap = ut.partial(lambda x: ut.stats_dict(map(len, x), size=True)) stats['unchanged'] = statsmap(unchanged) stats['old_split'] = statsmap(splits['old']) stats['new_split'] = statsmap(ut.flatten(splits['new'])) stats['old_merge'] = statsmap(ut.flatten(merges['old'])) stats['new_merge'] = statsmap(merges['new']) stats['old_hybrid'] = statsmap(hybrid['old']) stats['new_hybrid'] = statsmap(hybrid['new']) df = pd.DataFrame.from_dict(stats, orient='index') df = df.loc[list(stats.keys())] return df
[docs]def upper_diag_self_prodx(list_): """ upper diagnoal of cartesian product of self and self. Weird name. fixme Args: list_ (list): Returns: list: CommandLine: python -m utool.util_alg --exec-upper_diag_self_prodx Example: >>> # ENABLE_DOCTEST >>> from utool.util_alg import * # NOQA >>> list_ = [1, 2, 3] >>> result = upper_diag_self_prodx(list_) >>> print(result) [(1, 2), (1, 3), (2, 3)] """ return [ (item1, item2) for n1, item1 in enumerate(list_) for n2, item2 in enumerate(list_) if n1 < n2 ]
[docs]def diagonalized_iter(size): r""" TODO: generalize to more than 2 dimensions to be more like itertools.product. CommandLine: python -m utool.util_alg --exec-diagonalized_iter python -m utool.util_alg --exec-diagonalized_iter --size=5 Example: >>> # ENABLE_DOCTEST >>> from utool.util_alg import * # NOQA >>> import utool as ut >>> size = ut.get_argval('--size', default=4) >>> iter_ = diagonalized_iter(size) >>> mat = [[None] * size for _ in range(size)] >>> for count, (r, c) in enumerate(iter_): >>> mat[r][c] = count >>> result = ut.repr2(mat, nl=1, packed=True) >>> print(result) [[0, 2, 5, 9], [1, 4, 8, 12], [3, 7, 11, 14], [6, 10, 13, 15],] """ for i in range(0, size + 1): for r, c in zip(reversed(range(i)), (range(i))): yield (r, c) for i in range(1, size): for r, c in zip(reversed(range(i, size)), (range(i, size))): yield (r, c)
[docs]def colwise_diag_idxs(size, num=2): r""" dont trust this implementation or this function name Args: size (int): Returns: ?: upper_diag_idxs CommandLine: python -m utool.util_alg --exec-colwise_diag_idxs --size=5 --num=2 python -m utool.util_alg --exec-colwise_diag_idxs --size=3 --num=3 Example: >>> # DISABLE_DOCTEST >>> from utool.util_alg import * # NOQA >>> size = ut.get_argval('--size', default=5) >>> num = ut.get_argval('--num', default=2) >>> mat = np.zeros([size] * num, dtype=np.int) >>> upper_diag_idxs = colwise_diag_idxs(size, num) >>> poses = np.array(upper_diag_idxs) >>> idxs = np.ravel_multi_index(poses.T, mat.shape) >>> print('poses.T =\n%s' % (ut.repr2(poses.T),)) >>> mat[tuple(poses.T)] = np.arange(1, len(poses) + 1) >>> print(mat) poses.T = np.array([[0, 0, 1, 0, 1, 2, 0, 1, 2, 3], [1, 2, 2, 3, 3, 3, 4, 4, 4, 4]]) """ # diag_idxs = list(diagonalized_iter(size)) # upper_diag_idxs = [(r, c) for r, c in diag_idxs if r < c] # # diag_idxs = list(diagonalized_iter(size)) import utool as ut diag_idxs = ut.iprod(*[range(size) for _ in range(num)]) # diag_idxs = list(ut.iprod(range(size), range(size))) # this is pretty much a simple c ordering upper_diag_idxs = [ tup[::-1] for tup in diag_idxs if all([a > b for a, b in ut.itertwo(tup)]) # if all([a > b for a, b in ut.itertwo(tup[:2])]) ] # upper_diag_idxs = [(c, r) for r, c in diag_idxs if r > c] # # upper_diag_idxs = [(r, c) for r, c in diag_idxs if r > c] return upper_diag_idxs
[docs]def self_prodx(list_): return [ (item1, item2) for n1, item1 in enumerate(list_) for n2, item2 in enumerate(list_) if n1 != n2 ]
[docs]def product_nonsame(list1, list2): """product of list1 and list2 where items are non equal""" for item1, item2 in itertools.product(list1, list2): if item1 != item2: yield (item1, item2)
[docs]def product_nonsame_self(list_): return product_nonsame(list_, list_)
[docs]def greedy_max_inden_setcover(candidate_sets_dict, items, max_covers=None): """ greedy algorithm for maximum independent set cover Covers items with sets from candidate sets. Could be made faster. CommandLine: python -m utool.util_alg --test-greedy_max_inden_setcover Example: >>> # ENABLE_DOCTEST >>> from utool.util_alg import * # NOQA >>> import utool as ut >>> candidate_sets_dict = {'a': [5, 3], 'b': [2, 3, 5], ... 'c': [4, 8], 'd': [7, 6, 2, 1]} >>> items = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9] >>> max_covers = None >>> tup = greedy_max_inden_setcover(candidate_sets_dict, items, max_covers) >>> (uncovered_items, covered_items_list, accepted_keys) = tup >>> result = ut.repr4((uncovered_items, sorted(list(accepted_keys))), nl=False) >>> print(result) ([0, 9], ['a', 'c', 'd']) Example: >>> # ENABLE_DOCTEST >>> from utool.util_alg import * # NOQA >>> import utool as ut >>> candidate_sets_dict = {'a': [5, 3], 'b': [2, 3, 5], ... 'c': [4, 8], 'd': [7, 6, 2, 1]} >>> items = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9] >>> max_covers = 1 >>> tup = greedy_max_inden_setcover(candidate_sets_dict, items, max_covers) >>> (uncovered_items, covered_items_list, accepted_keys) = tup >>> result = ut.repr4((uncovered_items, sorted(list(accepted_keys))), nl=False) >>> print(result) ([0, 3, 4, 5, 8, 9], ['d']) """ uncovered_set = set(items) rejected_keys = set() accepted_keys = set() covered_items_list = [] while True: # Break if we have enough covers if max_covers is not None and len(covered_items_list) >= max_covers: break maxkey = None maxlen = -1 # Loop over candidates to find the biggested unadded cover set for key, candidate_items in six.iteritems(candidate_sets_dict): if key in rejected_keys or key in accepted_keys: continue # print('Checking %r' % (key,)) lenval = len(candidate_items) # len(uncovered_set.intersection(candidate_items)) == lenval: if uncovered_set.issuperset(candidate_items): if lenval > maxlen: maxkey = key maxlen = lenval else: rejected_keys.add(key) # Add the set to the cover if maxkey is None: break maxval = candidate_sets_dict[maxkey] accepted_keys.add(maxkey) covered_items_list.append(list(maxval)) # Add values in this key to the cover uncovered_set.difference_update(maxval) uncovered_items = list(uncovered_set) covertup = uncovered_items, covered_items_list, accepted_keys return covertup
[docs]def setcover_greedy( candidate_sets_dict, items=None, set_weights=None, item_values=None, max_weight=None ): r""" Greedy algorithm for various covering problems. approximation gaurentees depending on specifications like set_weights and item values Set Cover: log(len(items) + 1) approximation algorithm Weighted Maximum Cover: 1 - 1/e == .632 approximation algorithm Generalized maximum coverage is not implemented References: https://en.wikipedia.org/wiki/Maximum_coverage_problem Example: >>> # DISABLE_DOCTEST >>> from utool.util_alg import * # NOQA >>> import utool as ut >>> candidate_sets_dict = { >>> 'a': [1, 2, 3, 8, 9, 0], >>> 'b': [1, 2, 3, 4, 5], >>> 'c': [4, 5, 7], >>> 'd': [5, 6, 7], >>> 'e': [6, 7, 8, 9, 0], >>> } >>> max_weight = None >>> items = None >>> set_weights = None >>> item_values = None >>> greedy_soln = ut.sort_dict(ut.setcover_greedy(candidate_sets_dict)) >>> exact_soln = ut.sort_dict(ut.setcover_ilp(candidate_sets_dict)) >>> print('greedy_soln = %r' % (greedy_soln,)) >>> print('exact_soln = %r' % (exact_soln,)) """ import utool as ut solution_cover = {} # If candset_weights or item_values not given use the length as defaults if items is None: items = ut.flatten(candidate_sets_dict.values()) if set_weights is None: get_weight = len else: def get_weight(solution_cover): sum([set_weights[key] for key in solution_cover.keys()]) if item_values is None: get_value = len else: def get_value(vals): sum([item_values[v] for v in vals]) if max_weight is None: max_weight = get_weight(candidate_sets_dict) avail_covers = {key: set(val) for key, val in candidate_sets_dict.items()} # While we still need covers while get_weight(solution_cover) < max_weight and len(avail_covers) > 0: # Find candiate set with the most uncovered items avail_covers.values() uncovered_values = list(map(get_value, avail_covers.values())) chosen_idx = ut.argmax(uncovered_values) if uncovered_values[chosen_idx] <= 0: # needlessly adding value-less items break chosen_key = list(avail_covers.keys())[chosen_idx] # Add values in this key to the cover chosen_set = avail_covers[chosen_key] solution_cover[chosen_key] = candidate_sets_dict[chosen_key] # Remove chosen set from available options and covered items # from remaining available sets del avail_covers[chosen_key] for vals in avail_covers.values(): vals.difference_update(chosen_set) return solution_cover
[docs]def setcover_ilp( candidate_sets_dict, items=None, set_weights=None, item_values=None, max_weight=None, verbose=False, ): """ Set cover / Weighted Maximum Cover exact algorithm https://en.wikipedia.org/wiki/Maximum_coverage_problem """ import utool as ut import pulp if items is None: items = ut.flatten(candidate_sets_dict.values()) if set_weights is None: set_weights = {i: 1 for i in candidate_sets_dict.keys()} if item_values is None: item_values = {e: 1 for e in items} if max_weight is None: max_weight = sum(ut.take(set_weights, candidate_sets_dict.keys())) if False: # This is true set coer # Formulate integer program prob = pulp.LpProblem('Set Cover', pulp.LpMinimize) # Solution variable indicates if set it chosen or not set_indices = candidate_sets_dict.keys() x = pulp.LpVariable.dicts( name='x', indexs=set_indices, lowBound=0, upBound=1, cat=pulp.LpInteger ) # minimize the number of sets prob.objective = sum(x[i] for i in set_indices) # subject to for e in items: # each element is covered containing_sets = [i for i in set_indices if e in candidate_sets_dict[i]] prob.add(sum(x[i] for i in containing_sets) >= 1) # Solve using with solver like CPLEX, GLPK, or SCIP. # pulp.CPLEX().solve(prob) pulp.PULP_CBC_CMD().solve(prob) # Read solution solution_keys = [i for i in set_indices if x[i].varValue] solution_cover = {i: candidate_sets_dict[i] for i in solution_keys} # Print summary if verbose: print(prob) print('OPT:') print( '\n'.join( [' %s = %s' % (x[i].name, x[i].varValue) for i in set_indices] ) ) print('solution_cover = %r' % (solution_cover,)) else: prob = pulp.LpProblem('Maximum Cover', pulp.LpMaximize) # Solution variable indicates if set it chosen or not item_indicies = items set_indices = candidate_sets_dict.keys() x = pulp.LpVariable.dicts( name='x', indexs=set_indices, lowBound=0, upBound=1, cat=pulp.LpInteger ) y = pulp.LpVariable.dicts( name='y', indexs=item_indicies, lowBound=0, upBound=1, cat=pulp.LpInteger ) r = pulp.LpVariable.dicts(name='r', indexs=item_indicies) # maximize the value of the covered items primary_objective = sum(item_values[e] * y[e] for e in item_indicies) # minimize the number of sets used (make sure it does not influence the chosen primary objective) # This is only possible when values are non-negative # TODO: minimize redundency min_influence = min(item_values.values()) secondary_weight = min_influence / (1.1 * len(set_indices)) secondary_objective = (sum(-x[i] for i in set_indices)) * secondary_weight # prob.objective = primary_objective + secondary_objective # subject to # no more than the maximum weight prob.add(sum(x[i] * set_weights[i] for i in set_indices) <= max_weight) # If an item is chosen than at least one set containing it is chosen for e in item_indicies: containing_sets = [i for i in set_indices if e in candidate_sets_dict[i]] if len(containing_sets) > 0: prob.add(sum(x[i] for i in containing_sets) >= y[e]) # record number of times each item is covered prob.add(sum(x[i] for i in containing_sets) == r[e]) # Solve using with solver like CPLEX, GLPK, or SCIP. # pulp.CPLEX().solve(prob) pulp.PULP_CBC_CMD().solve(prob) # Read solution solution_keys = [i for i in set_indices if x[i].varValue] solution_cover = {i: candidate_sets_dict[i] for i in solution_keys} # Print summary if verbose: print(prob) print('OPT:') print( '\n'.join( [' %s = %s' % (x[i].name, x[i].varValue) for i in set_indices] ) ) print( '\n'.join( [' %s = %s' % (y[i].name, y[i].varValue) for i in item_indicies] ) ) print('solution_cover = %r' % (solution_cover,)) return solution_cover
[docs]def bayes_rule(b_given_a, prob_a, prob_b): r""" bayes_rule P(A | B) = \frac{ P(B | A) P(A) }{ P(B) } Args: b_given_a (ndarray or float): prob_a (ndarray or float): prob_b (ndarray or float): Returns: ndarray or float: a_given_b Example: >>> # ENABLE_DOCTEST >>> from utool.util_alg import * # NOQA >>> b_given_a = .1 >>> prob_a = .3 >>> prob_b = .4 >>> a_given_b = bayes_rule(b_given_a, prob_a, prob_b) >>> result = a_given_b >>> print(result) 0.075 """ a_given_b = (b_given_a * prob_a) / prob_b return a_given_b
[docs]def negative_minclamp_inplace(arr): arr[arr > 0] -= arr[arr > 0].min() arr[arr <= 0] = arr[arr > 0].min() return arr
[docs]def xywh_to_tlbr(bbox, img_wh): """converts xywh format to (tlx, tly, blx, bly)""" (img_w, img_h) = img_wh if img_w == 0 or img_h == 0: img_w = 1 img_h = 1 msg = '[cc2.1] Your csv tables have an invalid ANNOTATION.' print(msg) # warnings.warn(msg) # ht = 1 # wt = 1 # Ensure ANNOTATION is within bounds (x, y, w, h) = bbox x1 = max(x, 0) y1 = max(y, 0) x2 = min(x + w, img_w - 1) y2 = min(y + h, img_h - 1) return (x1, y1, x2, y2)
[docs]def item_hist(list_): """counts the number of times each item appears in the dictionary""" dict_hist = {} # Insert each item into the correct group for item in list_: if item not in dict_hist: dict_hist[item] = 0 dict_hist[item] += 1 return dict_hist
[docs]def flatten_membership_mapping(uid_list, members_list): num_members = sum(list(map(len, members_list))) flat_uids = [None for _ in range(num_members)] flat_members = [None for _ in range(num_members)] count = 0 for uid, members in zip(uid_list, members_list): for member in members: flat_uids[count] = uid flat_members[count] = member count += 1 return flat_uids, flat_members
[docs]def get_phi(): """Golden Ratio: phi = 1 / sqrt(5) / 2.0 = 1.61803398875""" # phi = (1.0 + sqrt(5)) / 2.0 = 1.61803398875 # return phi return PHI
[docs]def get_phi_ratio1(): return 1.0 / get_phi()
[docs]def is_prime(num): """ naive function for finding primes. Good for stress testing References: http://thelivingpearl.com/2013/01/06/how-to-find-prime-numbers-in-python/ CommandLine: python -m utool.util_alg --test-is_prime Example: >>> # ENABLE_DOCTEST >>> from utool.util_alg import * # NOQA >>> import utool as ut >>> with ut.Timer('isprime'): >>> series = [is_prime(n) for n in range(30)] >>> result = ('primes = %s' % (str(ut.list_where(series[0:10])),)) >>> print(result) primes = [2, 3, 5, 7] """ return num >= 2 and not any(num % j == 0 for j in range(2, num))
# if num < 2: # return False # for j in range(2, num): # if (num % j) == 0: # return False # return True
[docs]def fibonacci_recursive(n): """ CommandLine: python -m utool.util_alg --test-fibonacci_recursive Example: >>> # ENABLE_DOCTEST >>> from utool.util_alg import * # NOQA >>> import utool as ut >>> with ut.Timer('fib rec'): >>> series = [fibonacci_recursive(n) for n in range(20)] >>> result = ('series = %s' % (str(series[0:10]),)) >>> print(result) series = [0, 1, 1, 2, 3, 5, 8, 13, 21, 34] """ if n < 2: return n return fibonacci_recursive(n - 2) + fibonacci_recursive(n - 1)
[docs]def fibonacci_iterative(n): """ Args: n (int): Returns: int: the n-th fibonacci number References: http://stackoverflow.com/questions/15047116/iterative-alg-fib CommandLine: python -m utool.util_alg fibonacci_iterative Example: >>> # ENABLE_DOCTEST >>> from utool.util_alg import * # NOQA >>> import utool as ut >>> with ut.Timer('fib iter'): >>> series = [fibonacci_iterative(n) for n in range(20)] >>> result = ('series = %s' % (str(series[0:10]),)) >>> print(result) series = [0, 1, 1, 2, 3, 5, 8, 13, 21, 34] """ a, b = 0, 1 for _ in range(0, n): a, b = b, a + b return a
[docs]def fibonacci_approx(n): r""" approximate value (due to numerical errors) of fib(n) using closed form expression Args: n (int): Returns: int: the n-th fib number CommandLine: python -m utool.util_alg fibonacci_approx Example: >>> # DISABLE_DOCTEST >>> from utool.util_alg import * # NOQA >>> import utool as ut >>> series = [int(fibonacci_approx(n)) for n in range(20)] >>> result = ('series = %s' % (str(series[0:10]),)) >>> print(result) """ sqrt_5 = math.sqrt(5) phi = (1 + sqrt_5) / 2 return ((phi ** n) - (-phi) ** (-n)) / sqrt_5
fibonacci = fibonacci_iterative
[docs]def enumerate_primes(max_prime=4100): primes = [num for num in range(2, max_prime) if is_prime(num)] return primes
[docs]def get_nth_prime(n, max_prime=4100, safe=True): """hacky but still brute force algorithm for finding nth prime for small tests""" if n <= 100: first_100_primes = ( 2, 3, 5, 7, 11, 13, 17, 19, 23, 29, 31, 37, 41, 43, 47, 53, 59, 61, 67, 71, 73, 79, 83, 89, 97, 101, 103, 107, 109, 113, 127, 131, 137, 139, 149, 151, 157, 163, 167, 173, 179, 181, 191, 193, 197, 199, 211, 223, 227, 229, 233, 239, 241, 251, 257, 263, 269, 271, 277, 281, 283, 293, 307, 311, 313, 317, 331, 337, 347, 349, 353, 359, 367, 373, 379, 383, 389, 397, 401, 409, 419, 421, 431, 433, 439, 443, 449, 457, 461, 463, 467, 479, 487, 491, 499, 503, 509, 521, 523, 541, ) # print(len(first_100_primes)) nth_prime = first_100_primes[n - 1] else: if safe: primes = [num for num in range(2, max_prime) if is_prime(num)] nth_prime = primes[n] else: # This can run for a while... get it? while? nth_prime = get_nth_prime_bruteforce(n) return nth_prime
[docs]def get_nth_prime_bruteforce(n, start_guess=2, start_num_primes=0): """ Args: n (int): the n-th prime (n=2000 takes about a second) CommandLine: python -m utool.util_alg get_nth_prime_bruteforce --show Example: >>> # DISABLE_DOCTEST >>> from utool.util_alg import * # NOQA >>> import utool as ut >>> n_list = [] >>> time_list = [] >>> for n in range(1, 2000 + 2, 500): >>> with ut.Timer(verbose=0) as t: >>> get_nth_prime_bruteforce(n) >>> time_list += [t.ellapsed] >>> n_list += [n] >>> ut.quit_if_noshow() >>> import wbia.plottool as pt >>> pt.multi_plot(n_list, [time_list], xlabel='prime', ylabel='time') >>> ut.show_if_requested() """ guess = start_guess num_primes_found = start_num_primes while True: if is_prime(guess): num_primes_found += 1 if num_primes_found == n: nth_prime = guess break guess += 1 return nth_prime
[docs]def get_prime_index(prime): guess = 2 num_primes_found = 0 while True: if is_prime(guess): num_primes_found += 1 if guess == prime: return num_primes_found else: assert guess != prime, 'input=%r is not prime. has %r primes less than it' % ( prime, num_primes_found, ) guess += 1
[docs]def generate_primes(stop=None, start_guess=2): guess = start_guess num_generated = 0 while True: if is_prime(guess): num_generated += 1 yield guess if num_generated >= stop: break guess += 1
[docs]def knapsack(items, maxweight, method='recursive'): r""" Solve the knapsack problem by finding the most valuable subsequence of `items` subject that weighs no more than `maxweight`. Args: items (tuple): is a sequence of tuples `(value, weight, id_)`, where `value` is a number and `weight` is a non-negative integer, and `id_` is an item identifier. maxweight (scalar): is a non-negative integer. Returns: tuple: (total_value, items_subset) - a pair whose first element is the sum of values in the most valuable subsequence, and whose second element is the subsequence. Subset may be different depending on implementation (ie top-odwn recusrive vs bottom-up iterative) References: http://codereview.stackexchange.com/questions/20569/dynamic-programming-solution-to-knapsack-problem http://stackoverflow.com/questions/141779/solving-the-np-complete-problem-in-xkcd http://www.es.ele.tue.nl/education/5MC10/Solutions/knapsack.pdf CommandLine: python -m utool.util_alg --test-knapsack python -m utool.util_alg --test-knapsack:0 python -m utool.util_alg --exec-knapsack:1 Ignore: >>> annots_per_view = 2 >>> maxweight = 2 >>> items = [ >>> (0.7005208343554686, 0.7005208343554686, 0), >>> (0.669270834329427, 0.669270834329427, 1), >>> (0.669270834329427, 0.669270834329427, 2), >>> (0.7005208343554686, 0.7005208343554686, 3), >>> (0.7005208343554686, 0.7005208343554686, 4), >>> (0.669270834329427, 0.669270834329427, 5), >>> (0.669270834329427, 0.669270834329427, 6), >>> (0.669270834329427, 0.669270834329427, 7), >>> (0.669270834329427, 0.669270834329427, 8), >>> (0.669270834329427, 0.669270834329427, 9), >>> (0.669270834329427, 0.669270834329427, 10), >>> (0.669270834329427, 0.669270834329427, 11), >>> (0.669270834329427, 0.669270834329427, 12), >>> (0.669270834329427, 0.669270834329427, 13), >>> (0.669270834329427, 0.669270834329427, 14), >>> (0.669270834329427, 0.669270834329427, 15), >>> (0.669270834329427, 0.669270834329427, 16), >>> (0.669270834329427, 0.669270834329427, 17), >>> (0.7005208343554686, 0.7005208343554686, 18), >>> (0.7005208343554686, 0.7005208343554686, 19), >>> (0.669270834329427, 0.669270834329427, 20), >>> (0.7005208343554686, 0.7005208343554686, 21), >>> (0.669270834329427, 0.669270834329427, 22), >>> (0.669270834329427, 0.669270834329427, 23), >>> (0.669270834329427, 0.669270834329427, 24), >>> (0.669270834329427, 0.669270834329427, 25), >>> (0.669270834329427, 0.669270834329427, 26), >>> (0.669270834329427, 0.669270834329427, 27), >>> (0.669270834329427, 0.669270834329427, 28), >>> (0.7005208343554686, 0.7005208343554686, 29), >>> (0.669270834329427, 0.669270834329427, 30), >>> (0.669270834329427, 0.669270834329427, 31), >>> (0.669270834329427, 0.669270834329427, 32), >>> (0.669270834329427, 0.669270834329427, 33), >>> (0.7005208343554686, 0.7005208343554686, 34), >>> (0.669270834329427, 0.669270834329427, 35), >>> (0.669270834329427, 0.669270834329427, 36), >>> (0.669270834329427, 0.669270834329427, 37), >>> (0.7005208343554686, 0.7005208343554686, 38), >>> (0.669270834329427, 0.669270834329427, 39), >>> (0.669270834329427, 0.669270834329427, 40), >>> (0.7005208343554686, 0.7005208343554686, 41), >>> (0.669270834329427, 0.669270834329427, 42), >>> (0.669270834329427, 0.669270834329427, 43), >>> (0.669270834329427, 0.669270834329427, 44), >>> ] >>> values = ut.take_column(items, 0) >>> weights = ut.take_column(items, 1) >>> indices = ut.take_column(items, 2) Example: >>> # ENABLE_DOCTEST >>> from utool.util_alg import * # NOQA >>> import utool as ut >>> items = [(4, 12, 0), (2, 1, 1), (6, 4, 2), (1, 1, 3), (2, 2, 4)] >>> maxweight = 15 >>> total_value, items_subset = knapsack(items, maxweight, method='recursive') >>> total_value1, items_subset1 = knapsack(items, maxweight, method='iterative') >>> result = 'total_value = %.2f\n' % (total_value,) >>> result += 'items_subset = %r' % (items_subset,) >>> ut.assert_eq(total_value1, total_value) >>> ut.assert_eq(items_subset1, items_subset) >>> print(result) total_value = 11.00 items_subset = [(2, 1, 1), (6, 4, 2), (1, 1, 3), (2, 2, 4)] Example: >>> # ENABLE_DOCTEST >>> from utool.util_alg import * # NOQA >>> import utool as ut >>> # Solve https://xkcd.com/287/ >>> weights = [2.15, 2.75, 3.35, 3.55, 4.2, 5.8] * 2 >>> items = [(w, w, i) for i, w in enumerate(weights)] >>> maxweight = 15.05 >>> total_value, items_subset = knapsack(items, maxweight, method='recursive') >>> total_value1, items_subset1 = knapsack(items, maxweight, method='iterative') >>> total_weight = sum([t[1] for t in items_subset]) >>> print('total_weight = %r' % (total_weight,)) >>> result = 'total_value = %.2f' % (total_value,) >>> print('items_subset = %r' % (items_subset,)) >>> print('items_subset1 = %r' % (items_subset1,)) >>> #assert items_subset1 == items_subset, 'NOT EQ\n%r !=\n%r' % (items_subset1, items_subset) >>> print(result) total_value = 15.05 Timeit: >>> import utool as ut >>> setup = ut.codeblock( >>> ''' >>> import utool as ut >>> weights = [215, 275, 335, 355, 42, 58] * 40 >>> items = [(w, w, i) for i, w in enumerate(weights)] >>> maxweight = 2505 >>> #import numba >>> #knapsack_numba = numba.autojit(ut.knapsack_iterative) >>> #knapsack_numba = numba.autojit(ut.knapsack_iterative_numpy) >>> ''') >>> # Test load time >>> stmt_list1 = ut.codeblock( >>> ''' >>> #ut.knapsack_recursive(items, maxweight) >>> ut.knapsack_iterative(items, maxweight) >>> ut.knapsack_ilp(items, maxweight) >>> #knapsack_numba(items, maxweight) >>> #ut.knapsack_iterative_numpy(items, maxweight) >>> ''').split('\n') >>> ut.util_dev.timeit_compare(stmt_list1, setup, int(5)) """ if method == 'recursive': return knapsack_recursive(items, maxweight) elif method == 'iterative': return knapsack_iterative(items, maxweight) elif method == 'ilp': return knapsack_ilp(items, maxweight) else: raise NotImplementedError('[util_alg] knapsack method=%r' % (method,))
# return knapsack_iterative_numpy(items, maxweight)
[docs]def knapsack_ilp(items, maxweight, verbose=False): """ solves knapsack using an integer linear program CommandLine: python -m utool.util_alg knapsack_ilp Example: >>> # DISABLE_DOCTEST >>> from utool.util_alg import * # NOQA >>> import utool as ut >>> # Solve https://xkcd.com/287/ >>> weights = [2.15, 2.75, 3.35, 3.55, 4.2, 5.8, 6.55] >>> values = [2.15, 2.75, 3.35, 3.55, 4.2, 5.8, 6.55] >>> indices = ['mixed fruit', 'french fries', 'side salad', >>> 'hot wings', 'mozzarella sticks', 'sampler plate', >>> 'barbecue'] >>> items = [(v, w, i) for v, w, i in zip(values, weights, indices)] >>> #items += [(3.95, 3.95, 'mystery plate')] >>> maxweight = 15.05 >>> verbose = True >>> total_value, items_subset = knapsack_ilp(items, maxweight, verbose) >>> print('items_subset = %s' % (ut.repr3(items_subset, nl=1),)) """ import pulp # Given Input values = [t[0] for t in items] weights = [t[1] for t in items] indices = [t[2] for t in items] # Formulate integer program prob = pulp.LpProblem('Knapsack', pulp.LpMaximize) # Solution variables x = pulp.LpVariable.dicts( name='x', indexs=indices, lowBound=0, upBound=1, cat=pulp.LpInteger ) # maximize objective function prob.objective = sum(v * x[i] for v, i in zip(values, indices)) # subject to prob.add(sum(w * x[i] for w, i in zip(weights, indices)) <= maxweight) # Solve using with solver like CPLEX, GLPK, or SCIP. # pulp.CPLEX().solve(prob) pulp.PULP_CBC_CMD().solve(prob) # Read solution flags = [x[i].varValue for i in indices] total_value = sum([val for val, flag in zip(values, flags) if flag]) items_subset = [item for item, flag in zip(items, flags) if flag] # Print summary if verbose: print(prob) print('OPT:') print('\n'.join([' %s = %s' % (x[i].name, x[i].varValue) for i in indices])) print('total_value = %r' % (total_value,)) return total_value, items_subset
[docs]def knapsack_recursive(items, maxweight): @util_decor.memoize_nonzero def bestvalue(i, j): """Return the value of the most valuable subsequence of the first i elements in items whose weights sum to no more than j.""" if i == 0: return 0 value, weight = items[i - 1][0:2] if weight > j: return bestvalue(i - 1, j) else: return max(bestvalue(i - 1, j), bestvalue(i - 1, j - weight) + value) j = maxweight items_subset = [] for i in range(len(items), 0, -1): if bestvalue(i, j) != bestvalue(i - 1, j): items_subset.append(items[i - 1]) j -= items[i - 1][1] items_subset.reverse() total_value = bestvalue(len(items), maxweight) return total_value, items_subset
[docs]def number_of_decimals(num): r""" Args: num (float): References: stackoverflow.com/questions/6189956/finding-decimal-places Example: >>> # ENABLE_DOCTEST >>> from utool.util_alg import * # NOQA >>> num = 15.05 >>> result = number_of_decimals(num) >>> print(result) 2 """ exp = decimal.Decimal(str(num)).as_tuple().exponent return max(0, -exp)
[docs]def knapsack_iterative(items, maxweight): # Knapsack requires integral weights weights = [t[1] for t in items] max_exp = max([number_of_decimals(w_) for w_ in weights]) coeff = 10 ** max_exp # Adjust weights to be integral int_maxweight = int(maxweight * coeff) int_items = [(v, int(w * coeff), idx) for v, w, idx in items] """ items = int_items maxweight = int_maxweight """ return knapsack_iterative_int(int_items, int_maxweight)
[docs]def knapsack_iterative_int(items, maxweight): r""" Iterative knapsack method Math: maximize \sum_{i \in T} v_i subject to \sum_{i \in T} w_i \leq W Notes: dpmat is the dynamic programming memoization matrix. dpmat[i, w] is the total value of the items with weight at most W T is idx_subset, the set of indicies in the optimal solution CommandLine: python -m utool.util_alg --exec-knapsack_iterative_int --show Example: >>> # ENABLE_DOCTEST >>> from utool.util_alg import * # NOQA >>> weights = [1, 3, 3, 5, 2, 1] * 2 >>> items = [(w, w, i) for i, w in enumerate(weights)] >>> maxweight = 10 >>> items = [(.8, 700, 0)] >>> maxweight = 2000 >>> print('maxweight = %r' % (maxweight,)) >>> print('items = %r' % (items,)) >>> total_value, items_subset = knapsack_iterative_int(items, maxweight) >>> total_weight = sum([t[1] for t in items_subset]) >>> print('total_weight = %r' % (total_weight,)) >>> print('items_subset = %r' % (items_subset,)) >>> result = 'total_value = %.2f' % (total_value,) >>> print(result) total_value = 0.80 Ignore: DPMAT = [[dpmat[r][c] for c in range(maxweight)] for r in range(len(items))] KMAT = [[kmat[r][c] for c in range(maxweight)] for r in range(len(items))] """ values = [t[0] for t in items] weights = [t[1] for t in items] maxsize = maxweight + 1 # Sparse representation seems better dpmat = defaultdict(lambda: defaultdict(lambda: np.inf)) kmat = defaultdict(lambda: defaultdict(lambda: False)) idx_subset = [] # NOQA for w in range(maxsize): dpmat[0][w] = 0 # For each item consider to include it or not for idx in range(len(items)): item_val = values[idx] item_weight = weights[idx] # consider at each possible bag size for w in range(maxsize): valid_item = item_weight <= w if idx > 0: prev_val = dpmat[idx - 1][w] prev_noitem_val = dpmat[idx - 1][w - item_weight] else: prev_val = 0 prev_noitem_val = 0 withitem_val = item_val + prev_noitem_val more_valuable = withitem_val > prev_val if valid_item and more_valuable: dpmat[idx][w] = withitem_val kmat[idx][w] = True else: dpmat[idx][w] = prev_val kmat[idx][w] = False # Trace backwards to get the items used in the solution K = maxweight for idx in reversed(range(len(items))): if kmat[idx][K]: idx_subset.append(idx) K = K - weights[idx] idx_subset = sorted(idx_subset) items_subset = [items[i] for i in idx_subset] total_value = dpmat[len(items) - 1][maxweight] return total_value, items_subset
[docs]def knapsack_iterative_numpy(items, maxweight): r""" Iterative knapsack method maximize \sum_{i \in T} v_i subject to \sum_{i \in T} w_i \leq W Notes: dpmat is the dynamic programming memoization matrix. dpmat[i, w] is the total value of the items with weight at most W T is the set of indicies in the optimal solution """ # import numpy as np items = np.array(items) weights = items.T[1] # Find maximum decimal place (this problem is in NP) max_exp = max([number_of_decimals(w_) for w_ in weights]) coeff = 10 ** max_exp # Adjust weights to be integral weights = (weights * coeff).astype(np.int) values = items.T[0] MAXWEIGHT = int(maxweight * coeff) W_SIZE = MAXWEIGHT + 1 dpmat = np.full((len(items), W_SIZE), np.inf) kmat = np.full((len(items), W_SIZE), 0, dtype=np.bool) idx_subset = [] for w in range(W_SIZE): dpmat[0][w] = 0 for idx in range(1, len(items)): item_val = values[idx] item_weight = weights[idx] for w in range(W_SIZE): valid_item = item_weight <= w prev_val = dpmat[idx - 1][w] if valid_item: prev_noitem_val = dpmat[idx - 1][w - item_weight] withitem_val = item_val + prev_noitem_val more_valuable = withitem_val > prev_val else: more_valuable = False dpmat[idx][w] = withitem_val if more_valuable else prev_val kmat[idx][w] = more_valuable K = MAXWEIGHT for idx in reversed(range(1, len(items))): if kmat[idx, K]: idx_subset.append(idx) K = K - weights[idx] idx_subset = sorted(idx_subset) items_subset = [items[i] for i in idx_subset] total_value = dpmat[len(items) - 1][MAXWEIGHT] return total_value, items_subset
# def knapsack_all_solns(items, maxweight): # """ # TODO: return all optimal solutions to the knapsack problem # References: # stackoverflow.com/questions/30554290/all-solutions-from-knapsack-dp-matrix # >>> items = [(1, 2, 0), (1, 3, 1), (1, 4, 2), (1, 3, 3), (1, 3, 4), (1, 5, 5), (1, 4, 6), (1, 1, 7), (1, 1, 8), (1, 3, 9)] # >>> weights = ut.get_list_column(items, 1) # >>> maxweight = 6 # """
[docs]def knapsack_greedy(items, maxweight): r""" non-optimal greedy version of knapsack algorithm does not sort input. Sort the input by largest value first if desired. Args: `items` (tuple): is a sequence of tuples `(value, weight, id_)`, where `value` is a scalar and `weight` is a non-negative integer, and `id_` is an item identifier. `maxweight` (scalar): is a non-negative integer. CommandLine: python -m utool.util_alg --exec-knapsack_greedy Example: >>> # ENABLE_DOCTEST >>> from utool.util_alg import * # NOQA >>> items = [(4, 12, 0), (2, 1, 1), (6, 4, 2), (1, 1, 3), (2, 2, 4)] >>> maxweight = 15 >>> total_value, items_subset = knapsack_greedy(items, maxweight) >>> result = 'total_value = %r\n' % (total_value,) >>> result += 'items_subset = %r' % (items_subset,) >>> print(result) total_value = 7 items_subset = [(4, 12, 0), (2, 1, 1), (1, 1, 3)] """ items_subset = [] total_weight = 0 total_value = 0 for item in items: value, weight = item[0:2] if total_weight + weight > maxweight: continue else: items_subset.append(item) total_weight += weight total_value += value return total_value, items_subset
[docs]def prod(item_list, initial=1.0): """ product of all number in a list (like np.prod) Args: item_list (list): list of numbers or items supporting mulitplicatiuon initial (value): initial identity (default=1) Returns: float: Multiplied value Example: >>> # ENABLE_DOCTEST >>> from utool.util_alg import * # NOQA >>> initial = 1.0 >>> item_list = [1, 2, 3, 4, 5] >>> assert prod(item_list, initial) == 120.0 >>> assert prod([]) == 1.0 >>> assert prod([5]) == 5.0 """ return reduce(op.mul, item_list, initial)
[docs]def cumsum(item_list, initial=0): """python cumsum Args: item_list (list): list of numbers or items supporting addition initial (value): initial zero value Returns: list: list of accumulated values References: stackoverflow.com/questions/9258602/elegant-pythonic-cumsum CommandLine: python -m utool.util_alg cumsum Example: >>> # ENABLE_DOCTEST >>> from utool.util_alg import * # NOQA >>> import utool as ut >>> item_list = [1, 2, 3, 4, 5] >>> initial = 0 >>> result = cumsum(item_list, initial) >>> assert result == [1, 3, 6, 10, 15] >>> print(result) >>> item_list = zip([1, 2, 3, 4, 5]) >>> initial = tuple() >>> result2 = cumsum(item_list, initial) >>> assert result2 == [(1,), (1, 2), (1, 2, 3), (1, 2, 3, 4), (1, 2, 3, 4, 5)] >>> print(result2) """ def accum(acc, itm): return op.iadd(acc, [acc[-1] + itm]) return reduce(accum, item_list, [initial])[1:]
[docs]def safe_div(a, b): return None if a is None or b is None else a / b
[docs]def choose(n, k): """ N choose k binomial combination (without replacement) scipy.special.binom """ import scipy.special return scipy.special.comb(n, k, exact=True, repetition=False)
[docs]def triangular_number(n): r""" Latex: T_n = \sum_{k=1}^{n} k = \frac{n (n + 1)}{2} = \binom{n + 1}{2} References: en.wikipedia.org/wiki/Triangular_number """ return (n * (n + 1)) / 2
# Functions using NUMPY / SCIPY (need to make python only or move to vtool)
[docs]def maximin_distance_subset1d(items, K=None, min_thresh=None, verbose=False): r""" Greedy algorithm, may be exact for 1d case. First, choose the first item, then choose the next item that is farthest away from all previously chosen items. Iterate. CommandLine: python -m utool.util_alg --exec-maximin_distance_subset1d Notes: Given a set of items V. Let $E = V \times V$ be the the set of all item pairs. The goal is to return the largest subset of item such that the distance between any pair of items in the subset is greater than some threshold. Let t[u, v] be the distance between u and v. Let x[u, v] = 1 if the annotation pair (u, v) is included. Let y[u] = 1 if the annotation u is included. Objective: maximize sum(y[u] for u in V) subject to: # Annotations pairs are only included if their timedelta is less than # the threshold. x[u, v] = 0 if t[u, v] > thresh # If an edge is exclued at least one of its endpoints must be # excluded y[u] + y[v] - x[u, v] < 2 Example: >>> # DISABLE_DOCTEST >>> import utool as ut >>> from utool.util_alg import * # NOQA >>> #items = [1, 2, 3, 4, 5, 6, 7] >>> items = [20, 1, 1, 9, 21, 6, 22] >>> min_thresh = 5 >>> K = None >>> result = maximin_distance_subset1d(items, K, min_thresh, verbose=True) >>> print(result) (array([1, 3, 6]), [1, 9, 22]) Example: >>> # DISABLE_DOCTEST >>> import utool as ut >>> from utool.util_alg import * # NOQA >>> #items = [1, 2, 3, 4, 5, 6, 7] >>> items = [0, 1] >>> min_thresh = 5 >>> K = None >>> result = maximin_distance_subset1d(items, K, min_thresh, verbose=True) >>> print(result) """ if False: import pulp # Formulate integer program prob = pulp.LpProblem('MaxSizeLargeDistSubset', pulp.LpMaximize) # Solution variable indicates if set it chosen or not item_indices = list(range(len(items))) pair_indices = list(ut.combinations(item_indices, 2)) x = pulp.LpVariable.dicts( name='x', indexs=pair_indices, lowBound=0, upBound=1, cat=pulp.LpInteger ) y = pulp.LpVariable.dicts( name='y', indexs=item_indices, lowBound=0, upBound=1, cat=pulp.LpInteger ) # minimize the number of sets prob.objective = sum(y[i] for i in item_indices) # subject to count = 0 for u, v in pair_indices: # Minimum thresh constraint if abs(items[u] - items[v]) < min_thresh: prob.add(x[(u, v)] == 0, name='thresh_%r' % (count,)) count += 1 count = 0 for u, v in pair_indices: prob.add(y[u] + y[v] - x[(u, v)] <= 1, 'exclusion_%r' % (count,)) count += 1 pulp.PULP_CBC_CMD().solve(prob) # Read solution flags = [y[i].varValue >= 1.0 for i in item_indices] chosen_items_idxs = ut.where(flags) chosen_items = ut.take(items, chosen_items_idxs) # total_value = sum([val for val, flag in zip(values, flags) if flag]) # items_subset = [item for item, flag in zip(items, flags) if flag] # each element is covered # containing_sets = [i for i in set_indices if e in candidate_sets_dict[i]] # prob.add(sum(x[i] for i in containing_sets) >= 1) import utool as ut try: import vtool as vt except ImportError: import vtool as vt points = np.array(items)[:, None] # Initial sorting of 1d points initial_sortx = points.argsort(axis=0).flatten() points = points.take(initial_sortx, axis=0) if K is None: K = len(items) def distfunc(x, y): return np.abs(x - y) assert points.shape[1] == 1 assert len(points) >= K, 'cannot return subset' if K == 1: current_idx = [0] else: current_idx = [0, -1] if min_thresh is not None and distfunc(points[0], points[-1])[0] < min_thresh: current_idx = [0] chosen_mask = vt.index_to_boolmask(current_idx, len(points)) for k in range(2, K): unchosen_idx = np.nonzero(~chosen_mask)[0] unchosen_items = points.compress(~chosen_mask, axis=0) chosen_items = points.compress(chosen_mask, axis=0) distances = distfunc(unchosen_items, chosen_items.T) min_distances = distances.min(axis=1) argx = min_distances.argmax() if min_thresh is not None: if min_distances[argx] < min_thresh: break new_idx = unchosen_idx[argx] chosen_mask[new_idx] = True # Put chosen mask back in the input order of items chosen_items_mask = chosen_mask.take(initial_sortx.argsort()) chosen_items_idxs = np.nonzero(chosen_items_mask)[0] chosen_items = ut.take(items, chosen_items_idxs) # current_idx = np.nonzero(chosen_mask)[0] if verbose: print('Chose subset') chosen_points = points.compress(chosen_mask, axis=0) distances = spdist.pdist(chosen_points, distfunc) print('chosen_items_idxs = %r' % (chosen_items_idxs,)) print('chosen_items = %r' % (chosen_items,)) print('distances = %r' % (distances,)) return chosen_items_idxs, chosen_items
[docs]def maximum_distance_subset(items, K, verbose=False): """ FIXME: I believe this does not work. Returns a subset of size K from items with the maximum pairwise distance References: stackoverflow.com/questions/12278528/subset-elements-furthest-apart-eachother stackoverflow.com/questions/13079563/condensed-distance-matrix-pdist Recurance: Let X[n,k] be the solution for selecting k elements from first n elements items. X[n, k] = max( max( X[m, k - 1] + (sum_{p in prev_solution} dist(o, p)) for o < n and o not in prev solution) ) for m < n. Example: >>> # DISABLE_DOCTEST >>> import scipy.spatial.distance as spdist >>> items = [1, 6, 20, 21, 22] CommandLine: python -m utool.util_alg --exec-maximum_distance_subset Example: >>> # DISABLE_DOCTEST >>> from utool.util_alg import * # NOQA >>> #items = [1, 2, 3, 4, 5, 6, 7] >>> items = [1, 6, 20, 21, 22] >>> K = 3 >>> result = maximum_distance_subset(items, K) >>> print(result) (42.0, array([4, 3, 0]), array([22, 21, 1])) """ from utool import util_decor if verbose: print('maximum_distance_subset len(items)=%r, K=%r' % (len(items), K)) points = np.array(items)[:, None] if False: # alternative definition (not sure if works) distmat = spdist.squareform(spdist.pdist(points, lambda x, y: np.abs(x - y))) D = np.triu(distmat) remaining_idxs = np.arange(len(D)) for count in range(len(points) - K): values = D.sum(axis=1) + D.sum(axis=0) remove_idx = values.argmin() # index with minimum pairwise distance remaining_idxs = np.delete(remaining_idxs, remove_idx) D = np.delete(np.delete(D, remove_idx, axis=0), remove_idx, axis=1) value = D.sum() subset_idx = remaining_idxs.tolist() value, subset_idx subset = points.take(subset_idx) # print((value, subset_idx, subset)) sortx = points.T[0].argsort()[::-1] sorted_points = points.take(sortx, axis=0) pairwise_distance = spdist.pdist(sorted_points, lambda x, y: np.abs(x - y)) distmat = spdist.squareform(pairwise_distance) def condensed_idx(i, j): if i >= len(sorted_points) or j >= len(sorted_points): raise IndexError('i=%r j=%r out of range' % (i, j)) elif i == j: return None elif j < i: i, j = j, i return (i * len(sorted_points) + j) - (i * (i + 1) // 2) - (i) - (1) def dist(i, j): idx = condensed_idx(i, j) return 0 if idx is None else pairwise_distance[idx] @util_decor.memoize_nonzero def optimal_solution(n, k): """ Givem sorted items sorted_points Pick subset_idx of size k from sorted_points[:n] with maximum pairwise distance Dynamic programming solution """ '# FIXME BROKEN ' assert n <= len(sorted_points) and k <= len(sorted_points) if k < 2 or n < 2 or n < k: # BASE CASE value, subset_idx = 0, [] elif k == 2: # BASE CASE # when k==2 we choose the maximum pairwise pair subdist = np.triu(distmat[0:n, 0:n]) maxpos = subdist.argmax() ix, jx = np.unravel_index(maxpos, subdist.shape) value = distmat[ix, jx] subset_idx = [ix, jx] else: # RECURSIVE CASE value = 0 subset_idx = None # MAX OVER ALL OTHER NODES (might not need a full on loop here, but this will definitely work) for m in range(k - 1, n): # Choose which point to add would maximize the distance with the previous best answer. prev_value, prev_subset_idx = optimal_solution(m, k - 1) for o in range(n): if o in prev_subset_idx: continue add_value = sum([distmat[o, px] for px in prev_subset_idx]) cur_value = prev_value + add_value if cur_value > value: value = cur_value subset_idx = prev_subset_idx + [o] return value, subset_idx value, sorted_subset_idx = optimal_solution(len(points), K) subset_idx = sortx.take(sorted_subset_idx) subset = points.take(subset_idx) # print((value, subset_idx, subset)) return value, subset_idx, subset
# np.array([[dist(i, k) if k < i else 0 for k in range(len(A))] for i in range(len(A))]) # raise NotImplementedError('unfinished') # def safe_max(arr): # return np.nan if arr is None or len(arr) == 0 else arr.max() # def safe_min(arr): # return np.nan if arr is None or len(arr) == 0 else arr.min()
[docs]def deg_to_rad(degree): degree %= 360.0 return (degree / 360.0) * TAU
[docs]def rad_to_deg(radians): radians %= TAU return (radians / TAU) * 360.0
[docs]def inbounds(num, low, high, eq=False): r""" Args: num (scalar or ndarray): low (scalar or ndarray): high (scalar or ndarray): eq (bool): Returns: scalar or ndarray: is_inbounds CommandLine: python -m utool.util_alg --test-inbounds Example: >>> # DISABLE_DOCTEST >>> from utool.util_alg import * # NOQA >>> import utool as ut >>> num = np.array([[ 0. , 0.431, 0.279], ... [ 0.204, 0.352, 0.08 ], ... [ 0.107, 0.325, 0.179]]) >>> low = .1 >>> high = .4 >>> eq = False >>> is_inbounds = inbounds(num, low, high, eq) >>> result = ut.repr2(is_inbounds, with_dtype=True) >>> print(result) np.array([[False, False, True], [ True, True, False], [ True, True, True]], dtype=bool) """ less = op.le if eq else op.lt greater = op.ge if eq else op.gt and_ = np.logical_and if isinstance(num, np.ndarray) else op.and_ is_inbounds = and_(greater(num, low), less(num, high)) return is_inbounds
[docs]def almost_eq(arr1, arr2, thresh=1e-11, ret_error=False): """checks if floating point number are equal to a threshold""" error = np.abs(arr1 - arr2) passed = error < thresh if ret_error: return passed, error return passed
[docs]def almost_allsame(vals): if len(vals) == 0: return True x = vals[0] return np.all([np.isclose(item, x) for item in vals])
[docs]def unixtime_hourdiff(x, y): r""" Args: x (?): y (ndarray): labels Returns: ?: CommandLine: python -m utool.util_alg --exec-unixtime_hourdiff --show Example: >>> # DISABLE_DOCTEST >>> from utool.util_alg import * # NOQA >>> x = np.nan >>> y = 0 >>> result = unixtime_hourdiff(x, y) >>> print(result) >>> ut.quit_if_noshow() >>> import wbia.plottool as pt >>> ut.show_if_requested() """ return np.abs((x - y)) / (60.0 ** 2)
[docs]def absdiff(x, y): return np.abs(np.subtract(x, y))
[docs]def safe_pdist(arr, *args, **kwargs): """ Kwargs: metric = ut.absdiff SeeAlso: scipy.spatial.distance.pdist TODO: move to vtool """ if arr is None or len(arr) < 2: return None else: try: import vtool as vt except ImportError: import vtool as vt arr_ = vt.atleast_nd(arr, 2) return spdist.pdist(arr_, *args, **kwargs)
[docs]def square_pdist(arr, *args, **kwargs): dists = safe_pdist(arr, *args, **kwargs) if dists is None: return np.zeros((1, 1)) return spdist.squareform(dists)
[docs]def normalize(array, dim=0): return norm_zero_one(array, dim)
[docs]def norm_zero_one(array, dim=None): """ normalizes a numpy array from 0 to 1 based in its extent Args: array (ndarray): dim (int): Returns: ndarray: CommandLine: python -m utool.util_alg --test-norm_zero_one Example: >>> # ENABLE_DOCTEST >>> from utool.util_alg import * # NOQA >>> import utool as ut >>> array = np.array([ 22, 1, 3, 2, 10, 42, ]) >>> dim = None >>> array_norm = norm_zero_one(array, dim) >>> result = ut.repr2(list(array_norm), precision=3) >>> print(result) [0.512, 0.000, 0.049, 0.024, 0.220, 1.000] """ if not util_type.is_float(array): array = array.astype(np.float32) array_max = array.max(dim) array_min = array.min(dim) array_exnt = np.subtract(array_max, array_min) array_norm = np.divide(np.subtract(array, array_min), array_exnt) return array_norm
[docs]def euclidean_dist(vecs1, vec2, dtype=None): if dtype is None: dtype = np.float32 return np.sqrt(((vecs1.astype(dtype) - vec2.astype(dtype)) ** 2).sum(1))
[docs]def max_size_max_distance_subset(items, min_thresh=0, Kstart=2, verbose=False): r""" Args: items (?): min_thresh (int): (default = 0) Kstart (int): (default = 2) Returns: ?: prev_subset_idx CommandLine: python -m utool.util_alg --exec-max_size_max_distance_subset Example: >>> # DISABLE_DOCTEST >>> from utool.util_alg import * # NOQA >>> items = [1, 2, 3, 4, 5, 6, 7, 8, 9] >>> min_thresh = 3 >>> Kstart = 2 >>> verbose = True >>> prev_subset_idx = max_size_max_distance_subset(items, min_thresh, >>> Kstart, verbose=verbose) >>> result = ('prev_subset_idx = %s' % (str(prev_subset_idx),)) >>> print(result) """ import utool as ut assert Kstart >= 2, 'must start with group of size 2' best_idxs = [] for K in range(Kstart, len(items)): if verbose: print('Running subset chooser') value, subset_idx, subset = ut.maximum_distance_subset( items, K=K, verbose=verbose ) if verbose: print('subset = %r' % (subset,)) print('subset_idx = %r' % (subset_idx,)) print('value = %r' % (value,)) distances = ut.safe_pdist(subset[:, None]) if np.any(distances < min_thresh): break best_idxs = subset_idx return best_idxs
[docs]def group_indices(groupid_list): """ groups indicies of each item in ``groupid_list`` Args: groupid_list (list): list of group ids SeeAlso: vt.group_indices - optimized numpy version ut.apply_grouping CommandLine: python -m utool.util_alg --test-group_indices python3 -m utool.util_alg --test-group_indices Example: >>> # ENABLE_DOCTEST >>> from utool.util_alg import * # NOQA >>> import utool as ut >>> groupid_list = ['b', 1, 'b', 1, 'b', 1, 'b', 'c', 'c', 'c', 'c'] >>> (keys, groupxs) = ut.group_indices(groupid_list) >>> result = ut.repr3((keys, groupxs), nobraces=1, nl=1) >>> print(result) [1, 'b', 'c'], [[1, 3, 5], [0, 2, 4, 6], [7, 8, 9, 10]], """ item_list = range(len(groupid_list)) grouped_dict = util_dict.group_items(item_list, groupid_list) # Sort by groupid for cache efficiency keys_ = list(grouped_dict.keys()) try: keys = sorted(keys_) except TypeError: # Python 3 does not allow sorting mixed types keys = util_list.sortedby2(keys_, keys_) groupxs = util_dict.dict_take(grouped_dict, keys) return keys, groupxs
[docs]def apply_grouping(items, groupxs): r""" applies grouping from group_indicies non-optimized version Args: items (list): items to group groupxs (list of list of ints): grouped lists of indicies SeeAlso: vt.apply_grouping - optimized numpy version ut.group_indices CommandLine: python -m utool.util_alg --exec-apply_grouping --show Example: >>> # ENABLE_DOCTEST >>> from utool.util_alg import * # NOQA >>> import utool as ut >>> idx2_groupid = [2, 1, 2, 1, 2, 1, 2, 3, 3, 3, 3] >>> items = [1, 8, 5, 5, 8, 6, 7, 5, 3, 0, 9] >>> (keys, groupxs) = ut.group_indices(idx2_groupid) >>> grouped_items = ut.apply_grouping(items, groupxs) >>> result = ut.repr2(grouped_items) >>> print(result) [[8, 5, 6], [1, 5, 8, 7], [5, 3, 0, 9]] """ return [util_list.list_take(items, xs) for xs in groupxs]
[docs]def iapply_grouping(items, groupxs): r""" Iterates over groups from group_indicies Args: items (list): items to group groupxs (list of list of ints): grouped lists of indicies SeeAlso: vt.apply_grouping - optimized numpy version ut.group_indices CommandLine: python -m utool.util_alg --exec-apply_grouping --show Example: >>> # ENABLE_DOCTEST >>> from utool.util_alg import * # NOQA >>> import utool as ut >>> idx2_groupid = [2, 1, 2, 1, 2, 1, 2, 3, 3, 3, 3] >>> items = [1, 8, 5, 5, 8, 6, 7, 5, 3, 0, 9] >>> (keys, groupxs) = ut.group_indices(idx2_groupid) >>> grouped_items = list(ut.iapply_grouping(items, groupxs)) >>> result = ut.repr2(grouped_items) >>> print(result) [[8, 5, 6], [1, 5, 8, 7], [5, 3, 0, 9]] """ for xs in groupxs: yield [items[x] for x in xs]
[docs]def ungroup(grouped_items, groupxs, maxval=None, fill=None): """ Ungroups items Args: grouped_items (list): groupxs (list): maxval (int): (default = None) Returns: list: ungrouped_items SeeAlso: vt.invert_apply_grouping CommandLine: python -m utool.util_alg ungroup_unique Example: >>> # ENABLE_DOCTEST >>> from utool.util_alg import * # NOQA >>> import utool as ut >>> grouped_items = [[1.1, 1.2], [2.1, 2.2], [3.1, 3.2]] >>> groupxs = [[0, 2], [1, 5], [4, 3]] >>> maxval = None >>> ungrouped_items = ungroup(grouped_items, groupxs, maxval) >>> result = ('ungrouped_items = %s' % (ut.repr2(ungrouped_items),)) >>> print(result) ungrouped_items = [1.1, 2.1, 1.2, 3.2, 3.1, 2.2] """ if maxval is None: # Determine the number of items if unknown maxpergroup = [max(xs) if len(xs) else 0 for xs in groupxs] maxval = max(maxpergroup) if len(maxpergroup) else 0 # Allocate an array containing the newly flattened items ungrouped_items = [fill] * (maxval + 1) # Populate the array for itemgroup, xs in zip(grouped_items, groupxs): for item, x in zip(itemgroup, xs): ungrouped_items[x] = item return ungrouped_items
[docs]def ungroup_gen(grouped_items, groupxs, fill=None): """ Ungroups items returning a generator. Note that this is much slower than the list version and is not gaurenteed to have better memory usage. Args: grouped_items (list): groupxs (list): maxval (int): (default = None) Returns: list: ungrouped_items SeeAlso: vt.invert_apply_grouping CommandLine: python -m utool.util_alg ungroup_unique Example: >>> # ENABLE_DOCTEST >>> from utool.util_alg import * # NOQA >>> import utool as ut >>> grouped_items = [[1.1, 1.2], [2.1, 2.2], [3.1, 3.2]] >>> groupxs = [[1, 2], [5, 6], [9, 3]] >>> ungrouped_items1 = list(ungroup_gen(grouped_items, groupxs)) >>> ungrouped_items2 = ungroup(grouped_items, groupxs) >>> assert ungrouped_items1 == ungrouped_items2 >>> grouped_items = [[1.1, 1.2], [2.1, 2.2], [3.1, 3.2]] >>> groupxs = [[0, 2], [1, 5], [4, 3]] >>> ungrouped_items1 = list(ungroup_gen(grouped_items, groupxs)) >>> ungrouped_items2 = ungroup(grouped_items, groupxs) >>> assert ungrouped_items1 == ungrouped_items2 Ignore: labels = np.random.randint(0, 64, 10000) unique_labels, groupxs = ut.group_indices(labels) grouped_items = ut.apply_grouping(np.arange(len(labels)), groupxs) ungrouped_items1 = list(ungroup_gen(grouped_items, groupxs)) ungrouped_items2 = ungroup(grouped_items, groupxs) assert ungrouped_items2 == ungrouped_items1 %timeit list(ungroup_gen(grouped_items, groupxs)) %timeit ungroup(grouped_items, groupxs) """ import utool as ut # Determine the number of items if unknown # maxpergroup = [max(xs) if len(xs) else 0 for xs in groupxs] # maxval = max(maxpergroup) if len(maxpergroup) else 0 minpergroup = [min(xs) if len(xs) else 0 for xs in groupxs] minval = min(minpergroup) if len(minpergroup) else 0 flat_groupx = ut.flatten(groupxs) sortx = ut.argsort(flat_groupx) # Indicates the index being yeilded groupx_sorted = ut.take(flat_groupx, sortx) flat_items = ut.iflatten(grouped_items) # Storage for data weiting to be yeilded toyeild = {} items_yeilded = 0 # Indicates the index we are curently yeilding current_index = 0 # Determine where fills need to happen num_fills_before = [minval] + (np.diff(groupx_sorted) - 1).tolist() + [0] # Check if there are fills before the first item fills = num_fills_before[items_yeilded] if fills > 0: for _ in range(fills): yield None current_index += 1 # Yield items as possible for yeild_at, item in zip(flat_groupx, flat_items): if yeild_at > current_index: toyeild[yeild_at] = item elif yeild_at == current_index: # When we find the next element to yeild yield item current_index += 1 items_yeilded += 1 # Check if there are fills before the next item fills = num_fills_before[items_yeilded] if fills > 0: for _ in range(fills): yield None current_index += 1 # Now yield everything that came before this while current_index in toyeild: item = toyeild.pop(current_index) yield item current_index += 1 items_yeilded += 1 # Check if there are fills before the next item fills = num_fills_before[items_yeilded] if fills > 0: for _ in range(fills): yield None current_index += 1
[docs]def ungroup_unique(unique_items, groupxs, maxval=None): """ Ungroups unique items to correspond to original non-unique list Args: unique_items (list): groupxs (list): maxval (int): (default = None) Returns: list: ungrouped_items CommandLine: python -m utool.util_alg ungroup_unique Example: >>> # ENABLE_DOCTEST >>> from utool.util_alg import * # NOQA >>> import utool as ut >>> unique_items = [1, 2, 3] >>> groupxs = [[0, 2], [1, 3], [4, 5]] >>> maxval = None >>> ungrouped_items = ungroup_unique(unique_items, groupxs, maxval) >>> result = ('ungrouped_items = %s' % (ut.repr2(ungrouped_items),)) >>> print(result) ungrouped_items = [1, 2, 1, 2, 3, 3] """ if maxval is None: maxpergroup = [max(xs) if len(xs) else 0 for xs in groupxs] maxval = max(maxpergroup) if len(maxpergroup) else 0 ungrouped_items = [None] * (maxval + 1) for item, xs in zip(unique_items, groupxs): for x in xs: ungrouped_items[x] = item return ungrouped_items
[docs]def edit_distance(string1, string2): """ Edit distance algorithm. String1 and string2 can be either strings or lists of strings pip install python-Levenshtein Args: string1 (str or list): string2 (str or list): CommandLine: python -m utool.util_alg edit_distance --show Example: >>> # DISABLE_DOCTEST >>> from utool.util_alg import * # NOQA >>> import utool as ut >>> string1 = 'hello world' >>> string2 = ['goodbye world', 'rofl', 'hello', 'world', 'lowo'] >>> edit_distance(['hello', 'one'], ['goodbye', 'two']) >>> edit_distance('hello', ['goodbye', 'two']) >>> edit_distance(['hello', 'one'], 'goodbye') >>> edit_distance('hello', 'goodbye') >>> distmat = edit_distance(string1, string2) >>> result = ('distmat = %s' % (ut.repr2(distmat),)) >>> print(result) >>> [7, 9, 6, 6, 7] """ import utool as ut try: import Levenshtein except ImportError as ex: ut.printex(ex, 'pip install python-Levenshtein') raise # np.vectorize(Levenshtein.distance, [np.int]) # vec_lev = np.frompyfunc(Levenshtein.distance, 2, 1) # return vec_lev(string1, string2) import utool as ut isiter1 = ut.isiterable(string1) isiter2 = ut.isiterable(string2) strs1 = string1 if isiter1 else [string1] strs2 = string2 if isiter2 else [string2] distmat = [[Levenshtein.distance(str1, str2) for str2 in strs2] for str1 in strs1] # broadcast if not isiter2: distmat = ut.take_column(distmat, 0) if not isiter1: distmat = distmat[0] return distmat
[docs]def get_nth_bell_number(n): """ Returns the (num_items - 1)-th Bell number using recursion. The Bell numbers count the number of partitions of a set. Args: n (int): number of items in a set Returns: int: References: http://adorio-research.org/wordpress/?p=11460 CommandLine: python -m utool.util_alg --exec-bell --show Example: >>> # DISABLE_DOCTEST >>> from utool.util_alg import * # NOQA >>> n = 3 >>> result = get_nth_bell_number(n) >>> print(result) 5 """ import utool as ut import scipy.special @ut.memoize def bell_(n): if n < 2: return 1 sum_ = 0 for k in range(1, n + 1): sum_ = sum_ + scipy.special.binom(n - 1, k - 1) * bell_(k - 1) return sum_ nth_bell = bell_(n) return nth_bell
[docs]def num_partitions(num_items): return get_nth_bell_number(num_items - 1)
[docs]def standardize_boolexpr(boolexpr_, parens=False): r""" Standardizes a boolean expression into an or-ing of and-ed variables Args: boolexpr_ (str): Returns: str: final_expr CommandLine: sudo pip install git+https://github.com/tpircher/quine-mccluskey.git python -m utool.util_alg standardize_boolexpr --show Example: >>> # DISABLE_DOCTEST >>> from utool.util_alg import * # NOQA >>> import utool as ut >>> boolexpr_ = 'not force_opencv and (orient_ or is_gif)' >>> result = standardize_boolexpr(boolexpr_, parens=True) >>> print(result) (orient_ and (not force_opencv)) or (is_gif and (not force_opencv)) """ import utool as ut import re onlyvars = boolexpr_ onlyvars = re.sub('\\bnot\\b', '', onlyvars) onlyvars = re.sub('\\band\\b', '', onlyvars) onlyvars = re.sub('\\bor\\b', '', onlyvars) onlyvars = re.sub('\\(', '', onlyvars) onlyvars = re.sub('\\)', '', onlyvars) varnames = ut.remove_doublspaces(onlyvars).strip().split(' ') varied_dict = {var: [True, False] for var in varnames} bool_states = ut.all_dict_combinations(varied_dict) outputs = [eval(boolexpr_, state.copy(), state.copy()) for state in bool_states] true_states = ut.compress(bool_states, outputs) true_tuples = ut.take_column(true_states, varnames) true_cases = [str(''.join([str(int(t)) for t in tup])) for tup in true_tuples] # Convert to binary ones_bin = [int(x, 2) for x in true_cases] # ones_str = [str(x) for x in true_cases] from quine_mccluskey.qm import QuineMcCluskey qm = QuineMcCluskey() result = qm.simplify(ones=ones_bin, num_bits=len(varnames)) # result = qm.simplify_los(ones=ones_str, num_bits=len(varnames)) grouped_terms = [dict(ut.group_items(varnames, rs)) for rs in result] def parenjoin(char, list_): if len(list_) == 0: return '' else: if parens: return '(' + char.join(list_) + ')' else: return char.join(list_) if parens: expanded_terms = [ ( term.get('1', []) + ['(not ' + b + ')' for b in term.get('0', [])] + [ parenjoin(' ^ ', term.get('^', [])), parenjoin(' ~ ', term.get('~', [])), ] ) for term in grouped_terms ] else: expanded_terms = [ ( term.get('1', []) + ['not ' + b for b in term.get('0', [])] + [ parenjoin(' ^ ', term.get('^', [])), parenjoin(' ~ ', term.get('~', [])), ] ) for term in grouped_terms ] final_terms = [[t for t in term if t] for term in expanded_terms] products = [parenjoin(' and ', [f for f in form if f]) for form in final_terms] final_expr = ' or '.join(products) return final_expr
[docs]def solve_boolexpr(): """ sudo pip install git+https://github.com/tpircher/quine-mccluskey.git sudo pip uninstall quine_mccluskey pip uninstall quine_mccluskey pip install git+https://github.com/tpircher/quine-mccluskey.git Args: varnames (?): Returns: ?: CommandLine: python -m utool.util_alg solve_boolexpr --show Example: >>> # DISABLE_DOCTEST >>> from utool.util_alg import * # NOQA >>> import utool as ut >>> varnames = ['sa', 'said', 'aid'] >>> result = solve_boolexpr() >>> print(result) """ # false_cases = [ # int('111', 2), # int('011', 2), # int('001', 2), # ] # true_cases = list(set(range(2 ** 3)) - set(false_cases)) varnames = ['sa', 'said', 'aid'] # import utool as ut truth_table = [ dict(sa=True, said=True, aid=True, output=False), dict(sa=True, said=True, aid=False, output=True), dict(sa=True, said=False, aid=True, output=True), dict(sa=True, said=False, aid=False, output=True), dict(sa=False, said=True, aid=True, output=False), dict(sa=False, said=True, aid=False, output=True), dict(sa=False, said=False, aid=True, output=False), dict(sa=False, said=False, aid=False, output=True), ] truth_tuples = [ut.dict_take(d, varnames) for d in truth_table] outputs = [d['output'] for d in truth_table] true_tuples = ut.compress(truth_tuples, outputs) # false_tuples = ut.compress(truth_tuples, ut.not_list(outputs)) true_cases = [''.join([str(int(t)) for t in tup]) for tup in true_tuples] true_cases = [''.join([str(int(t)) for t in tup]) for tup in true_tuples] # truth_nums = [int(s, 2) for s in true_cases] from quine_mccluskey.qm import QuineMcCluskey qm = QuineMcCluskey(use_xor=False) result = qm.simplify_los(true_cases, num_bits=len(varnames)) print(result) # ut.chr_range(3) # symbol_map = { # '-': '', # '1': '{v}', # '0': 'not {v}', # '^': '^', # } #'-' don't care: this bit can be either zero or one. #'1' the bit must be one. #'0' the bit must be zero. #'^' all bits with the caret are XOR-ed together. #'~' all bits with the tilde are XNOR-ed together. # formulas = [[symbol_map[r].format(v=v) for v, r in zip(varnames, rs)] for rs in result] grouped_terms = [dict(ut.group_items(varnames, rs)) for rs in result] def parenjoin(char, list_): if len(list_) == 0: return '' else: return '(' + char.join(list_) + ')' expanded_terms = [ ( term.get('1', []) + ['(not ' + b + ')' for b in term.get('0', [])] + [parenjoin(' ^ ', term.get('^', [])), parenjoin(' ~ ', term.get('~', []))] ) for term in grouped_terms ] final_terms = [[t for t in term if t] for term in expanded_terms] products = [parenjoin(' and ', [f for f in form if f]) for form in final_terms] final_expr = ' or '.join(products) print(final_expr)
[docs]def longest_common_substring(s1, s2): """ References: # https://en.wikibooks.org/wiki/Algorithm_Implementation/Strings/Longest_common_substring#Python2 """ m = [[0] * (1 + len(s2)) for i in range(1 + len(s1))] longest, x_longest = 0, 0 for x in range(1, 1 + len(s1)): for y in range(1, 1 + len(s2)): if s1[x - 1] == s2[y - 1]: m[x][y] = m[x - 1][y - 1] + 1 if m[x][y] > longest: longest = m[x][y] x_longest = x else: m[x][y] = 0 return s1[x_longest - longest : x_longest]
[docs]@profile def expensive_task_gen(num=8700): r""" Runs a task that takes some time Args: num (int): (default = 8700) CommandLine: python -m utool.util_alg expensive_task_gen --show Example: >>> # DISABLE_DOCTEST >>> from utool.util_alg import * # NOQA >>> import utool as ut >>> #num = 8700 >>> num = 40000 >>> with ut.Timer('expanesive task'): >>> time_list = list(ut.expensive_task_gen(num)) >>> print(sum(time_list)) >>> ut.quit_if_noshow() >>> import wbia.plottool as pt >>> #pt.plot(time_list) >>> from scipy.optimize import curve_fit >>> def func(x, a, b, c, d): >>> return a * np.exp(-c * x) + d >>> #a*x**3 + b*x**2 +c*x + d >>> y = np.array(time_list) >>> y = np.array(ut.cumsum(y)) >>> x = np.arange(len(y)) >>> #popt, pcov = curve_fit(func, x, y, p0=(1, 1e-6, 1)) >>> #print('pcov = %r' % (pcov,)) >>> #print('popt = %r' % (popt,)) >>> # http://stackoverflow.com/questions/3433486/-curve-fitting-in-python >>> pt.plt.plot(x[::num//50], y[::num//50], 'rx', label='measured data') >>> #x2 = np.arange(len(y) * 2) >>> #pt.plt.plot(x2, func(x2, *popt), 'b', label="Fitted Curve") #same as line above \/ >>> #pt.plt.legend(loc='upper left') >>> ut.show_if_requested() """ import utool as ut # time_list = [] for x in range(0, num): with ut.Timer(verbose=False) as t: ut.is_prime(x) yield t.ellapsed
# time_list.append(t.ellapsed) # print('t.ellapsed = %r' % (t.ellapsed,)) # return time_list
[docs]def factors(n): """ Computes all the integer factors of the number `n` Example: >>> # ENABLE_DOCTEST >>> from utool.util_alg import * # NOQA >>> import utool as ut >>> result = sorted(ut.factors(10)) >>> print(result) [1, 2, 5, 10] References: http://stackoverflow.com/questions/6800193/finding-all-the-factors """ return set( reduce( list.__add__, ([i, n // i] for i in range(1, int(n ** 0.5) + 1) if n % i == 0) ) )
if __name__ == '__main__': """ CommandLine: python -m utool.util_alg python -m utool.util_alg --allexamples """ import multiprocessing multiprocessing.freeze_support() # for win32 import utool as ut # NOQA ut.doctest_funcs()