Source code for scitex_ml.utils._under_sample

#!/usr/bin/env python3


from collections import Counter

import numpy as np


[docs] def under_sample(y, replace=False): """ Input: Labels Return: Indices Example: t = ['a', 'b', 'c', 'b', 'c', 'a', 'c'] print(under_sample(t)) # [5 0 1 3 4 6] print(under_sample(t)) # [5 0 1 3 6 2] """ # find the minority and majority classes class_counts = Counter(y) # majority_class = max(class_counts, key=class_counts.get) minority_class = min(class_counts, key=class_counts.get) # compute the number of sample to draw from the majority class using # a negative binomial distribution n_minority_class = class_counts[minority_class] n_majority_resampled = n_minority_class # draw randomly with or without replacement indices = np.hstack( [ np.random.choice( np.flatnonzero(y == k), size=n_majority_resampled, replace=replace, ) for k in class_counts.keys() ] ) return indices
if __name__ == "__main__": t = np.array(["a", "b", "c", "b", "c", "a", "c"]) print(under_sample(t))