# -*- coding: utf-8 -*-
from __future__ import absolute_import, division, print_function, unicode_literals
import six
import numpy as np
import utool as ut
import pandas as pd
import itertools as it
import networkx as nx
import vtool as vt
from os.path import join # NOQA
from wbia.algo.graph import nx_utils as nxu
from wbia.algo.graph.nx_utils import e_
from wbia.algo.graph.state import POSTV, NEGTV, INCMP, UNREV # NOQA
print, rrr, profile = ut.inject2(__name__)
[docs]@six.add_metaclass(ut.ReloadingMetaclass)
class AnnotInfrMatching(object):
"""
Methods for running matching algorithms
"""
[docs] @profile
def exec_matching(
infr,
qaids=None,
daids=None,
prog_hook=None,
cfgdict=None,
name_method='node',
use_cache=True,
invalidate_supercache=False,
):
"""
Loads chip matches into the inference structure
Uses graph name labeling and ignores wbia labeling
"""
infr._make_rankings(
qaids,
daids,
prog_hook,
cfgdict,
name_method,
use_cache=use_cache,
invalidate_supercache=invalidate_supercache,
)
def _set_vsmany_info(infr, qreq_, cm_list):
infr.vsmany_qreq_ = qreq_
infr.vsmany_cm_list = cm_list
infr.cm_list = cm_list
infr.qreq_ = qreq_
def _make_rankings(
infr,
qaids=None,
daids=None,
prog_hook=None,
cfgdict=None,
name_method='node',
use_cache=None,
invalidate_supercache=None,
):
# from wbia.algo.graph import graph_iden
# TODO: expose other ranking algos like SMK
rank_algo = 'LNBNN'
infr.print('Exec {} ranking algorithm'.format(rank_algo), 1)
ibs = infr.ibs
if qaids is None:
qaids = infr.aids
qaids = ut.ensure_iterable(qaids)
if daids is None:
daids = infr.aids
if cfgdict is None:
cfgdict = {
# 'can_match_samename': False,
'can_match_samename': True,
'can_match_sameimg': True,
# 'augment_queryside_hack': True,
'K': 3,
'Knorm': 3,
'prescore_method': 'csum',
'score_method': 'csum',
}
cfgdict.update(infr.ranker_params)
infr.print('Using LNBNN config = %r' % (cfgdict,))
# hack for using current nids
if name_method == 'node':
aids = sorted(set(ut.aslist(qaids) + ut.aslist(daids)))
custom_nid_lookup = infr.get_node_attrs('name_label', aids)
elif name_method == 'edge':
custom_nid_lookup = {
aid: nid for nid, cc in infr.pos_graph._ccs.items() for aid in cc
}
elif name_method == 'wbia':
custom_nid_lookup = None
else:
raise KeyError('Unknown name_method={}'.format(name_method))
qreq_ = ibs.new_query_request(
qaids,
daids,
cfgdict=cfgdict,
custom_nid_lookup=custom_nid_lookup,
verbose=infr.verbose >= 2,
)
# cacher = qreq_.get_big_cacher()
# if not cacher.exists():
# pass
# # import sys
# # sys.exit(1)
cm_list = qreq_.execute(
prog_hook=prog_hook,
use_cache=use_cache,
invalidate_supercache=invalidate_supercache,
)
infr._set_vsmany_info(qreq_, cm_list)
edges = set(infr._cm_breaking(cm_list, review_cfg={'ranks_top': 5}))
return edges
# return cm_list
def _make_matches_from(infr, edges, config=None, prog_hook=None):
from wbia.algo.verif import pairfeat
if config is None:
config = infr.verifier_params
extr = pairfeat.PairwiseFeatureExtractor(infr.ibs, config=config)
match_list = extr._exec_pairwise_match(edges, prog_hook=prog_hook)
return match_list
[docs] def exec_vsone_subset(infr, edges, prog_hook=None):
r"""
Args:
prog_hook (None): (default = None)
CommandLine:
python -m wbia.algo.graph.core exec_vsone_subset
Example:
>>> # ENABLE_DOCTEST
>>> from wbia.algo.graph.core import * # NOQA
>>> infr = testdata_infr('testdb1')
>>> infr.ensure_full()
>>> edges = [(1, 2), (2, 3)]
>>> result = infr.exec_vsone_subset(edges)
>>> print(result)
"""
match_list = infr._make_matches_from(edges, prog_hook)
# TODO: is this code necessary anymore?
vsone_matches = {e_(u, v): match for (u, v), match in zip(edges, match_list)}
infr.vsone_matches.update(vsone_matches)
edge_to_score = {e: match.fs.sum() for e, match in vsone_matches.items()}
infr.graph.add_edges_from(edge_to_score.keys())
infr.set_edge_attrs('score', edge_to_score)
return match_list
[docs] def lookup_cm(infr, aid1, aid2):
"""
Get chipmatch object associated with an edge if one exists.
"""
if infr.cm_list is None:
return None, aid1, aid2
# TODO: keep chip matches in dictionary by default?
aid2_idx = ut.make_index_lookup([cm.qaid for cm in infr.cm_list])
switch_order = False
if aid1 in aid2_idx:
idx = aid2_idx[aid1]
cm = infr.cm_list[idx]
if aid2 not in cm.daid2_idx:
switch_order = True
# raise KeyError('switch order')
else:
switch_order = True
if switch_order:
# switch order
aid1, aid2 = aid2, aid1
idx = aid2_idx[aid1]
cm = infr.cm_list[idx]
if aid2 not in cm.daid2_idx:
raise KeyError('No ChipMatch for edge (%r, %r)' % (aid1, aid2))
return cm, aid1, aid2
[docs] @profile
def apply_match_edges(infr, review_cfg={}):
"""
Adds results from one-vs-many rankings as edges in the graph
"""
if infr.cm_list is None:
infr.print('apply_match_edges - matching has not been run!')
return
infr.print('apply_match_edges', 1)
edges = infr._cm_breaking(review_cfg)
# Create match-based graph structure
infr.print('apply_match_edges adding %d edges' % len(edges), 1)
infr.graph.add_edges_from(edges)
infr.apply_match_scores()
def _cm_breaking(infr, cm_list=None, review_cfg={}):
"""
>>> from wbia.algo.graph.core import * # NOQA
>>> review_cfg = {}
"""
if cm_list is None:
cm_list = infr.cm_list
ranks_top = review_cfg.get('ranks_top', None)
ranks_bot = review_cfg.get('ranks_bot', None)
# Construct K-broken graph
edges = []
if ranks_bot is None:
ranks_bot = 0
for count, cm in enumerate(cm_list):
score_list = cm.annot_score_list
rank_list = ut.argsort(score_list)[::-1]
sortx = ut.argsort(rank_list)
top_sortx = sortx[:ranks_top]
bot_sortx = sortx[len(sortx) - ranks_bot :]
short_sortx = ut.unique(top_sortx + bot_sortx)
daid_list = ut.take(cm.daid_list, short_sortx)
for daid in daid_list:
u, v = (cm.qaid, daid)
if v < u:
u, v = v, u
edges.append((u, v))
return edges
def _cm_training_pairs(
infr,
qreq_=None,
cm_list=None,
top_gt=2,
mid_gt=2,
bot_gt=2,
top_gf=2,
mid_gf=2,
bot_gf=2,
rand_gt=2,
rand_gf=2,
rng=None,
):
"""
Constructs training data for a pairwise classifier
CommandLine:
python -m wbia.algo.graph.core _cm_training_pairs
Example:
>>> # ENABLE_DOCTEST
>>> from wbia.algo.graph.core import * # NOQA
>>> infr = testdata_infr('PZ_MTEST')
>>> infr.exec_matching(cfgdict={
>>> 'can_match_samename': True,
>>> 'K': 4,
>>> 'Knorm': 1,
>>> 'prescore_method': 'csum',
>>> 'score_method': 'csum'
>>> })
>>> from wbia.algo.graph.core import * # NOQA
>>> exec(ut.execstr_funckw(infr._cm_training_pairs))
>>> rng = np.random.RandomState(42)
>>> aid_pairs = np.array(infr._cm_training_pairs(rng=rng))
>>> print(len(aid_pairs))
>>> assert np.sum(aid_pairs.T[0] == aid_pairs.T[1]) == 0
"""
if qreq_ is None:
cm_list = infr.cm_list
qreq_ = infr.qreq_
ibs = infr.ibs
aid_pairs = []
dnids = qreq_.get_qreq_annot_nids(qreq_.daids)
# dnids = qreq_.get_qreq_annot_nids(qreq_.daids)
rng = ut.ensure_rng(rng)
for cm in ut.ProgIter(cm_list, lbl='building pairs'):
all_gt_aids = cm.get_top_gt_aids(ibs)
all_gf_aids = cm.get_top_gf_aids(ibs)
gt_aids = ut.take_percentile_parts(all_gt_aids, top_gt, mid_gt, bot_gt)
gf_aids = ut.take_percentile_parts(all_gf_aids, top_gf, mid_gf, bot_gf)
# get unscored examples
unscored_gt_aids = [
aid for aid in qreq_.daids[cm.qnid == dnids] if aid not in cm.daid2_idx
]
rand_gt_aids = ut.random_sample(unscored_gt_aids, rand_gt, rng=rng)
# gf_aids = cm.get_groundfalse_daids()
_gf_aids = qreq_.daids[cm.qnid != dnids]
_gf_aids = qreq_.daids.compress(cm.qnid != dnids)
# gf_aids = ibs.get_annot_groundfalse(cm.qaid, daid_list=qreq_.daids)
rand_gf_aids = ut.random_sample(_gf_aids, rand_gf, rng=rng).tolist()
chosen_daids = ut.unique(gt_aids + gf_aids + rand_gf_aids + rand_gt_aids)
aid_pairs.extend([(cm.qaid, aid) for aid in chosen_daids if cm.qaid != aid])
return aid_pairs
def _get_cm_agg_aid_ranking(infr, cc):
aid_to_cm = {cm.qaid: cm for cm in infr.cm_list}
all_scores = ut.ddict(list)
for qaid in cc:
cm = aid_to_cm[qaid]
# should we be doing nids?
for daid, score in zip(cm.get_top_aids(), cm.get_top_scores()):
all_scores[daid].append(score)
max_scores = sorted((max(scores), aid) for aid, scores in all_scores.items())[
::-1
]
ranked_aids = ut.take_column(max_scores, 1)
return ranked_aids
def _get_cm_edge_data(infr, edges, cm_list=None):
symmetric = True
if cm_list is None:
cm_list = infr.cm_list
# Find scores for the edges that exist in the graph
edge_to_data = ut.ddict(dict)
aid_to_cm = {cm.qaid: cm for cm in cm_list}
for u, v in edges:
if symmetric:
u, v = e_(u, v)
cm1 = aid_to_cm.get(u, None)
cm2 = aid_to_cm.get(v, None)
scores = []
ranks = []
for cm in ut.filter_Nones([cm1, cm2]):
for aid in [u, v]:
idx = cm.daid2_idx.get(aid, None)
if idx is None:
continue
score = cm.annot_score_list[idx]
rank = cm.get_annot_ranks([aid])[0]
scores.append(score)
ranks.append(rank)
if len(scores) == 0:
score = None
rank = None
else:
# Choose whichever one gave the best score
idx = vt.safe_argmax(scores, nans=False)
score = scores[idx]
rank = ranks[idx]
edge_to_data[(u, v)]['score'] = score
edge_to_data[(u, v)]['rank'] = rank
return edge_to_data
[docs] @profile
def apply_match_scores(infr):
"""
Applies precomputed matching scores to edges that already exist in the
graph. Typically you should run infr.apply_match_edges() before running
this.
CommandLine:
python -m wbia.algo.graph.core apply_match_scores --show
Example:
>>> # ENABLE_DOCTEST
>>> from wbia.algo.graph.core import * # NOQA
>>> infr = testdata_infr('PZ_MTEST')
>>> infr.exec_matching()
>>> infr.apply_match_edges()
>>> infr.apply_match_scores()
>>> infr.get_edge_attrs('score')
"""
if infr.cm_list is None:
infr.print('apply_match_scores - no scores to apply!')
return
infr.print('apply_match_scores', 1)
edges = list(infr.graph.edges())
edge_to_data = infr._get_cm_edge_data(edges)
# Remove existing attrs
ut.nx_delete_edge_attr(infr.graph, 'score')
ut.nx_delete_edge_attr(infr.graph, 'rank')
ut.nx_delete_edge_attr(infr.graph, 'normscore')
edges = list(edge_to_data.keys())
edge_scores = list(ut.take_column(edge_to_data.values(), 'score'))
edge_scores = ut.replace_nones(edge_scores, np.nan)
edge_scores = np.array(edge_scores)
edge_ranks = np.array(ut.take_column(edge_to_data.values(), 'rank'))
# take the inf-norm
normscores = edge_scores / vt.safe_max(edge_scores, nans=False)
# Add new attrs
infr.set_edge_attrs('score', ut.dzip(edges, edge_scores))
infr.set_edge_attrs('rank', ut.dzip(edges, edge_ranks))
# Hack away zero probabilites
# probs = np.vstack([p_nomatch, p_match, p_notcomp]).T + 1e-9
# probs = vt.normalize(probs, axis=1, ord=1, out=probs)
# entropy = -(np.log2(probs) * probs).sum(axis=1)
infr.set_edge_attrs('normscore', dict(zip(edges, normscores)))
[docs]class InfrLearning(object):
[docs] def learn_deploy_verifiers(infr, publish=False):
"""
Uses current knowledge to train verifiers for new unseen pairs.
Example:
>>> # DISABLE_DOCTEST
>>> import wbia
>>> ibs = wbia.opendb('PZ_MTEST')
>>> infr = wbia.AnnotInference(ibs, aids='all')
>>> infr.ensure_mst()
>>> publish = False
>>> infr.learn_deploy_verifiers()
Ignore:
publish = True
"""
infr.print('learn_deploy_verifiers')
from wbia.algo.verif import vsone
pblm = vsone.OneVsOneProblem(infr, verbose=True)
pblm.primary_task_key = 'match_state'
pblm.default_clf_key = 'RF'
pblm.default_data_key = 'learn(sum,glob)'
pblm.setup()
dpath = '.'
task_key = 'match_state'
pblm.deploy(dpath, task_key=task_key, publish=publish)
task_key = 'photobomb_state'
if task_key in pblm.eval_task_keys:
pblm.deploy(dpath, task_key=task_key)
[docs] def learn_evaluation_verifiers(infr):
"""
Creates a cross-validated ensemble of classifiers to evaluate
verifier error cases and groundtruth errors.
CommandLine:
python -m wbia.algo.graph.mixin_matching learn_evaluation_verifiers
Doctest:
>>> import wbia
>>> infr = wbia.AnnotInference(
>>> 'PZ_MTEST', aids='all', autoinit='annotmatch',
>>> verbose=4)
>>> verifiers = infr.learn_evaluation_verifiers()
>>> edges = list(infr.edges())
>>> verif = verifiers['match_state']
>>> probs = verif.predict_proba_df(edges)
>>> print(probs)
"""
infr.print('learn_evaluataion_verifiers')
from wbia.algo.verif import vsone
pblm = vsone.OneVsOneProblem(infr, verbose=5)
pblm.primary_task_key = 'match_state'
pblm.eval_clf_keys = ['RF']
pblm.eval_data_keys = ['learn(sum,glob)']
pblm.setup_evaluation()
if True:
pblm.report_evaluation()
verifiers = pblm._make_evaluation_verifiers(pblm.eval_task_keys)
return verifiers
[docs] def load_published(infr):
"""
Downloads, caches, and loads pre-trained verifiers.
This is the default action.
"""
from wbia.algo.verif import deploy
ibs = infr.ibs
species = ibs.get_primary_database_species(infr.aids)
infr.print('Loading task_thresh for species: %r' % (species,))
assert species in infr.task_thresh_dict
infr.task_thresh = infr.task_thresh_dict[species]
infr.print('infr.task_thresh: %r' % (infr.task_thresh,))
infr.print('Loading verifiers for species: %r' % (species,))
infr.verifiers = deploy.Deployer().load_published(ibs, species)
[docs] def load_latest_classifiers(infr, dpath):
from wbia.algo.verif import deploy
task_clf_fpaths = deploy.Deployer(dpath).find_latest_local()
classifiers = {}
for task_key, fpath in task_clf_fpaths.items():
clf_info = ut.load_data(fpath)
assert (
clf_info['metadata']['task_key'] == task_key
), 'bad saved clf at fpath={}'.format(fpath)
classifiers[task_key] = clf_info
infr.verifiers = classifiers
# return classifiers
[docs] def photobomb_samples(infr):
edges = list(infr.edges())
tags_list = list(infr.gen_edge_values('tags', edges=edges, default=[]))
flags = ut.filterflags_general_tags(tags_list, has_any=['photobomb'])
pb_edges = ut.compress(edges, flags)
return pb_edges
class _RedundancyAugmentation(object):
# def rand_neg_check_edges(infr, c1_nodes, c2_nodes):
# """
# Find enough edges to between two pccs to make them k-negative complete
# """
# k = infr.params['redun.neg']
# existing_edges = nxu.edges_cross(infr.graph, c1_nodes, c2_nodes)
# reviewed_edges = {
# edge: state
# for edge, state in infr.get_edge_attrs(
# 'decision', existing_edges,
# default=UNREV).items()
# if state != UNREV
# }
# n_neg = sum([state == NEGTV for state in reviewed_edges.values()])
# if n_neg < k:
# # Find k random negative edges
# check_edges = existing_edges - set(reviewed_edges)
# if len(check_edges) < k:
# edges = it.starmap(nxu.e_, it.product(c1_nodes, c2_nodes))
# for edge in edges:
# if edge not in reviewed_edges:
# check_edges.add(edge)
# if len(check_edges) == k:
# break
# else:
# check_edges = {}
# return check_edges
def find_neg_augment_edges(infr, cc1, cc2, k=None):
"""
Find enough edges to between two pccs to make them k-negative complete
The two CCs should be disjoint and not have any positive edges between
them.
Args:
cc1 (set): nodes in one PCC
cc2 (set): nodes in another positive-disjoint PCC
k (int): redundnacy level (if None uses infr.params['redun.neg'])
Example:
>>> # DISABLE_DOCTEST
>>> from wbia.algo.graph import demo
>>> k = 2
>>> cc1, cc2 = {1}, {2, 3}
>>> # --- return an augmentation if feasible
>>> infr = demo.demodata_infr(ccs=[cc1, cc2], ignore_pair=True)
>>> edges = set(infr.find_neg_augment_edges(cc1, cc2, k=k))
>>> assert edges == {(1, 2), (1, 3)}
>>> # --- if infeasible return a partial augmentation
>>> infr.add_feedback((1, 2), INCMP)
>>> edges = set(infr.find_neg_augment_edges(cc1, cc2, k=k))
>>> assert edges == {(1, 3)}
"""
if k is None:
k = infr.params['redun.neg']
assert cc1 is not cc2, 'CCs should be disjoint (but they are the same)'
assert len(cc1.intersection(cc2)) == 0, 'CCs should be disjoint'
existing_edges = set(nxu.edges_cross(infr.graph, cc1, cc2))
reviewed_edges = {
edge: state
for edge, state in zip(
existing_edges, infr.edge_decision_from(existing_edges)
)
if state != UNREV
}
# Find how many negative edges we already have
num = sum([state == NEGTV for state in reviewed_edges.values()])
if num < k:
# Find k random negative edges
check_edges = existing_edges - set(reviewed_edges)
# Check the existing but unreviewed edges first
for edge in check_edges:
num += 1
yield edge
if num >= k:
return
# Check non-existing edges next
seed = 2827295125
try:
seed += sum(cc1) + sum(cc2)
except Exception:
pass
rng = np.random.RandomState(seed)
cc1 = ut.shuffle(list(cc1), rng=rng)
cc2 = ut.shuffle(list(cc2), rng=rng)
cc1 = ut.shuffle(list(cc1), rng=rng)
for edge in it.starmap(nxu.e_, nxu.diag_product(cc1, cc2)):
if edge not in existing_edges:
num += 1
yield edge
if num >= k:
return
def find_pos_augment_edges(infr, pcc, k=None):
"""
# [[1, 0], [0, 2], [1, 2], [3, 1]]
pos_sub = nx.Graph([[0, 1], [1, 2], [0, 2], [1, 3]])
"""
if k is None:
pos_k = infr.params['redun.pos']
else:
pos_k = k
pos_sub = infr.pos_graph.subgraph(pcc)
# TODO:
# weight by pairs most likely to be comparable
# First try to augment only with unreviewed existing edges
unrev_avail = list(nxu.edges_inside(infr.unreviewed_graph, pcc))
try:
check_edges = list(
nxu.k_edge_augmentation(
pos_sub, k=pos_k, avail=unrev_avail, partial=False
)
)
except nx.NetworkXUnfeasible:
check_edges = None
if not check_edges:
# Allow new edges to be introduced
full_sub = infr.graph.subgraph(pcc).copy()
new_avail = ut.estarmap(infr.e_, nx.complement(full_sub).edges())
full_avail = unrev_avail + new_avail
n_max = (len(pos_sub) * (len(pos_sub) - 1)) // 2
n_complement = n_max - pos_sub.number_of_edges()
if len(full_avail) == n_complement:
# can use the faster algorithm
check_edges = list(
nxu.k_edge_augmentation(pos_sub, k=pos_k, partial=True)
)
else:
# have to use the slow approximate algo
check_edges = list(
nxu.k_edge_augmentation(
pos_sub, k=pos_k, avail=full_avail, partial=True
)
)
check_edges = set(it.starmap(e_, check_edges))
return check_edges
@profile
def find_pos_redun_candidate_edges(infr, k=None, verbose=False):
r"""
Searches for augmenting edges that would make PCCs k-positive redundant
Doctest:
>>> from wbia.algo.graph.mixin_matching import * # NOQA
>>> from wbia.algo.graph import demo
>>> infr = demo.demodata_infr(ccs=[(1, 2, 3, 4, 5), (7, 8, 9, 10)])
>>> infr.add_feedback((2, 5), 'match')
>>> infr.add_feedback((1, 5), 'notcomp')
>>> infr.params['redun.pos'] = 2
>>> candidate_edges = list(infr.find_pos_redun_candidate_edges())
>>> result = ('candidate_edges = ' + ut.repr2(candidate_edges))
>>> print(result)
candidate_edges = []
"""
# Add random edges between exisiting non-redundant PCCs
if k is None:
k = infr.params['redun.pos']
# infr.find_non_pos_redundant_pccs(k=k, relax=True)
pcc_gen = list(infr.positive_components())
prog = ut.ProgIter(pcc_gen, enabled=verbose, freq=1, adjust=False)
for pcc in prog:
if not infr.is_pos_redundant(pcc, k=k, relax=True, assume_connected=True):
for edge in infr.find_pos_augment_edges(pcc, k=k):
print()
yield nxu.e_(*edge)
@profile
def find_neg_redun_candidate_edges(infr, k=None):
"""
Get pairs of PCCs that are not complete.
Finds edges that might complete them.
Example:
>>> # DISABLE_DOCTEST
>>> from wbia.algo.graph.mixin_matching import * # NOQA
>>> from wbia.algo.graph import demo
>>> infr = demo.demodata_infr(ccs=[(1,), (2,), (3,)], ignore_pair=True)
>>> edges = list(infr.find_neg_redun_candidate_edges())
>>> assert len(edges) == 3, 'all should be needed here'
>>> infr.add_feedback_from(edges, evidence_decision=NEGTV)
>>> assert len(list(infr.find_neg_redun_candidate_edges())) == 0
Example:
>>> # DISABLE_DOCTEST
>>> from wbia.algo.graph import demo
>>> infr = demo.demodata_infr(pcc_sizes=[3] * 20, ignore_pair=True)
>>> ccs = list(infr.positive_components())
>>> gen = infr.find_neg_redun_candidate_edges(k=2)
>>> for edge in gen:
>>> # What happens when we make ccs positive
>>> print(infr.node_labels(edge))
>>> infr.add_feedback(edge, evidence_decision=POSTV)
>>> import ubelt as ub
>>> infr = demo.demodata_infr(pcc_sizes=[1] * 30, ignore_pair=True)
>>> ccs = list(infr.positive_components())
>>> gen = infr.find_neg_redun_candidate_edges(k=3)
>>> for chunk in ub.chunks(gen, 2):
>>> for edge in chunk:
>>> # What happens when we make ccs positive
>>> print(infr.node_labels(edge))
>>> infr.add_feedback(edge, evidence_decision=POSTV)
list(gen)
"""
if k is None:
k = infr.params['redun.neg']
# Loop through all pairs
for cc1, cc2 in infr.find_non_neg_redun_pccs(k=k):
if len(cc1.intersection(cc2)) > 0:
# If there is modification of the underlying graph while we
# iterate, then two ccs may not be disjoint. Skip these cases.
continue
for u, v in infr.find_neg_augment_edges(cc1, cc2, k):
edge = e_(u, v)
infr.assert_edge(edge)
yield edge
[docs]class CandidateSearch(_RedundancyAugmentation):
""" Search for candidate edges """
[docs] @profile
def find_lnbnn_candidate_edges(infr):
"""
Example:
>>> # ENABLE_DOCTEST
>>> from wbia.algo.graph import demo
>>> infr = demo.demodata_mtest_infr()
>>> cand_edges = infr.find_lnbnn_candidate_edges()
>>> assert len(cand_edges) > 200
"""
# Refresh the name labels
# TODO: abstract into a Ranker class
# do LNBNN query for new edges
# Use one-vs-many to establish candidate edges to classify
infr.exec_matching(
name_method='edge',
cfgdict={
'resize_dim': 'width',
'dim_size': 700,
'requery': True,
'can_match_samename': False,
'can_match_sameimg': False,
# 'sv_on': False,
},
)
# infr.apply_match_edges(review_cfg={'ranks_top': 5})
ranks_top = infr.params['ranking.ntop']
lnbnn_results = set(infr._cm_breaking(review_cfg={'ranks_top': ranks_top}))
candidate_edges = {
edge
for edge, state in zip(lnbnn_results, infr.edge_decision_from(lnbnn_results))
if state == UNREV
}
infr.print(
'ranking alg found {}/{} unreviewed edges'.format(
len(candidate_edges), len(lnbnn_results)
),
1,
)
return candidate_edges
[docs] def ensure_task_probs(infr, edges):
"""
Ensures that probabilities are assigned to the edges.
This gaurentees that infr.task_probs contains data for edges.
(Currently only the primary task is actually ensured)
CommandLine:
python -m wbia.algo.graph.mixin_matching ensure_task_probs
Doctest:
>>> # DISABLE_DOCTEST
>>> from wbia.algo.graph.mixin_matching import *
>>> import wbia
>>> infr = wbia.AnnotInference('PZ_MTEST', aids='all',
>>> autoinit='staging')
>>> edges = list(infr.edges())[0:3]
>>> infr.load_published()
>>> assert len(infr.task_probs['match_state']) == 0
>>> infr.ensure_task_probs(edges)
>>> assert len(infr.task_probs['match_state']) == 3
>>> infr.ensure_task_probs(edges)
>>> assert len(infr.task_probs['match_state']) == 3
Doctest:
>>> # DISABLE_DOCTEST
>>> from wbia.algo.graph.mixin_matching import *
>>> from wbia.algo.graph import demo
>>> infr = demo.demodata_infr(num_pccs=6, p_incon=.5, size_std=2)
>>> edges = list(infr.edges())
>>> infr.ensure_task_probs(edges)
>>> assert all([np.isclose(sum(p.values()), 1)
>>> for p in infr.task_probs['match_state'].values()])
"""
if not infr.verifiers:
raise Exception('Verifiers are needed to predict probabilities')
# Construct pairwise features on edges in infr
primary_task = 'match_state'
match_task = infr.task_probs[primary_task]
need_flags = [e not in match_task for e in edges]
if any(need_flags):
need_edges = ut.compress(edges, need_flags)
infr.print(
'There are {} edges without probabilities'.format(len(need_edges)), 1
)
# Only recompute for the needed edges
task_probs = infr._make_task_probs(need_edges)
# Store task probs in internal data structure
# FIXME: this is slow
for task, probs in task_probs.items():
probs_dict = probs.to_dict(orient='index')
if task not in infr.task_probs:
infr.task_probs[task] = probs_dict
else:
infr.task_probs[task].update(probs_dict)
# Set edge task attribute as well
infr.set_edge_attrs(task, probs_dict)
[docs] @profile
def ensure_priority_scores(infr, priority_edges):
"""
Ensures that priority attributes are assigned to the edges.
This does not change the state of the queue.
Doctest:
>>> import wbia
>>> ibs = wbia.opendb('PZ_MTEST')
>>> infr = wbia.AnnotInference(ibs, aids='all')
>>> infr.ensure_mst()
>>> priority_edges = list(infr.edges())[0:1]
>>> infr.ensure_priority_scores(priority_edges)
Doctest:
>>> import wbia
>>> ibs = wbia.opendb('PZ_MTEST')
>>> infr = wbia.AnnotInference(ibs, aids='all')
>>> infr.ensure_mst()
>>> # infr.load_published()
>>> priority_edges = list(infr.edges())
>>> infr.ensure_priority_scores(priority_edges)
Doctest:
>>> from wbia.algo.graph import demo
>>> infr = demo.demodata_infr(num_pccs=6, p_incon=.5, size_std=2)
>>> edges = list(infr.edges())
>>> infr.ensure_priority_scores(edges)
"""
infr.print('Checking for verifiers: %r' % (infr.verifiers,))
if infr.verifiers and infr.ibs is not None:
infr.print(
'Prioritizing {} edges with one-vs-one probs'.format(len(priority_edges)),
1,
)
infr.print('Using thresholds: %r' % (infr.task_thresh,))
infr.print(
'Using infr.params[autoreview.enabled] : %r'
% (infr.params['autoreview.enabled'],)
)
infr.print(
'Using infr.params[autoreview.prioritize_nonpos]: %r'
% (infr.params['autoreview.prioritize_nonpos'],)
)
infr.ensure_task_probs(priority_edges)
infr.load_published()
primary_task = 'match_state'
match_probs = infr.task_probs[primary_task]
primary_thresh = infr.task_thresh[primary_task]
# Read match_probs into a DataFrame
primary_probs = pd.DataFrame(
ut.take(match_probs, priority_edges),
index=nxu.ensure_multi_index(priority_edges, ('aid1', 'aid2')),
)
# Convert match-state probabilities into priorities
prob_match = primary_probs[POSTV]
# Initialize priorities to probability of matching
default_priority = prob_match.copy()
# If the edges are currently between the same individual, then
# prioritize by non-positive probability (because those edges might
# expose an inconsistency)
already_pos = [
infr.pos_graph.node_label(u) == infr.pos_graph.node_label(v)
for u, v in priority_edges
]
default_priority[already_pos] = 1 - default_priority[already_pos]
if infr.params['autoreview.enabled']:
if infr.params['autoreview.prioritize_nonpos']:
# Give positives that pass automatic thresholds high priority
_probs = primary_probs[POSTV]
flags = _probs > primary_thresh[POSTV]
default_priority[flags] = (
np.maximum(default_priority[flags], _probs[flags]) + 1
)
# Give negatives that pass automatic thresholds high priority
_probs = primary_probs[NEGTV]
flags = _probs > primary_thresh[NEGTV]
default_priority[flags] = (
np.maximum(default_priority[flags], _probs[flags]) + 1
)
# Give not-comps that pass automatic thresholds high priority
_probs = primary_probs[INCMP]
flags = _probs > primary_thresh[INCMP]
default_priority[flags] = (
np.maximum(default_priority[flags], _probs[flags]) + 1
)
infr.set_edge_attrs('prob_match', prob_match.to_dict())
infr.set_edge_attrs('default_priority', default_priority.to_dict())
metric = 'default_priority'
priority = default_priority
elif infr.cm_list is not None:
infr.print(
'Prioritizing {} edges with one-vs-vsmany scores'.format(
len(priority_edges), 1
)
)
# Not given any deploy classifier, this is the best we can do
scores = infr._make_lnbnn_scores(priority_edges)
metric = 'normscore'
priority = scores
else:
infr.print(
'WARNING: No verifiers to prioritize {} edge(s)'.format(
len(priority_edges)
)
)
metric = 'random'
priority = np.zeros(len(priority_edges)) + 1e-6
infr.set_edge_attrs(metric, ut.dzip(priority_edges, priority))
return metric, priority
[docs] def ensure_prioritized(infr, priority_edges):
priority_edges = list(priority_edges)
metric, priority = infr.ensure_priority_scores(priority_edges)
infr.prioritize(metric=metric, edges=priority_edges, scores=priority)
[docs] @profile
def add_candidate_edges(infr, candidate_edges):
candidate_edges = list(candidate_edges)
new_edges = infr.ensure_edges_from(candidate_edges)
if infr.test_mode:
infr.apply_edge_truth(new_edges)
if infr.params['redun.enabled']:
priority_edges = list(infr.filter_edges_flagged_as_redun(candidate_edges))
infr.print(
'Got {} candidate edges, {} are new, '
'and {} are non-redundant'.format(
len(candidate_edges), len(new_edges), len(priority_edges)
)
)
else:
infr.print(
'Got {} candidate edges and {} are new'.format(
len(candidate_edges), len(new_edges)
)
)
priority_edges = candidate_edges
if len(priority_edges) > 0:
infr.ensure_prioritized(priority_edges)
if hasattr(infr, 'on_new_candidate_edges'):
# hack callback for demo
infr.on_new_candidate_edges(infr, new_edges)
return len(priority_edges)
[docs] @profile
def refresh_candidate_edges(infr):
"""
Search for candidate edges.
Assign each edge a priority and add to queue.
"""
infr.print('refresh_candidate_edges', 1)
infr.assert_consistency_invariant()
if infr.ibs is not None:
candidate_edges = infr.find_lnbnn_candidate_edges()
elif hasattr(infr, 'dummy_verif'):
infr.print('Searching for dummy candidates')
infr.print(
'dummy vsone params ='
+ ut.repr4(infr.dummy_verif.dummy_params, nl=1, si=True)
)
ranks_top = infr.params['ranking.ntop']
candidate_edges = infr.dummy_verif.find_candidate_edges(K=ranks_top)
else:
raise Exception('No method available to search for candidate edges')
infr.add_candidate_edges(candidate_edges)
infr.assert_consistency_invariant()
@profile
def _make_task_probs(infr, edges):
"""
Predict edge probs for each pairwise classifier task
"""
if infr.verifiers is None:
raise ValueError('no classifiers exist')
if not isinstance(infr.verifiers, dict):
raise NotImplementedError('need to deploy or implement eval prediction')
task_keys = list(infr.verifiers.keys())
task_probs = {}
# infr.print('[make_taks_probs] predict {} for {} edges'.format(
# ut.conj_phrase(task_keys, 'and'), len(edges)))
for task_key in task_keys:
infr.print('predict {} for {} edges'.format(task_key, len(edges)))
verif = infr.verifiers[task_key]
probs_df = verif.predict_proba_df(edges)
task_probs[task_key] = probs_df
return task_probs
@profile
def _make_lnbnn_scores(infr, edges):
edge_to_data = infr._get_cm_edge_data(edges)
edges = list(edge_to_data.keys())
edge_scores = list(ut.take_column(edge_to_data.values(), 'score'))
edge_scores = ut.replace_nones(edge_scores, np.nan)
edge_scores = np.array(edge_scores)
# take the inf-norm
normscores = edge_scores / vt.safe_max(edge_scores, nans=False)
return normscores
if __name__ == '__main__':
r"""
CommandLine:
python -m wbia.algo.graph.mixin_matching
python -m wbia.algo.graph.mixin_matching --allexamples
"""
import multiprocessing
multiprocessing.freeze_support() # for win32
import utool as ut # NOQA
ut.doctest_funcs()