Source code for hum.gen.signal_generators

"""
Generating Signals
"""
import numpy as np
import random
from itertools import chain
import pandas as pd

DFLT_WORD_LENGTH = 30
DFLT_ALPHABET = list('abcde')


[docs]def normal_dist(mu, sigma): """ Returns a random sample from a normal distribution with mean mu and variance sigma """ return sigma * np.random.randn() + mu
[docs]def gen_words( N=DFLT_WORD_LENGTH, alphabet=DFLT_ALPHABET, spread_pct=0.01, proba_dist='normal', ): """ Returns a generator of lists, with each consisting of n repetitions of a random word for alphabet, with n being determined by a normal distribution with mean N and variance N * spread_pct >>> gen = gen_words(N=3, alphabet=['foo'], spread_pct=0) >>> assert next(gen) == ['foo', 'foo', 'foo'] """ if proba_dist == 'normal': sigma = N * spread_pct mu = N dist = normal_dist(mu, sigma) else: raise NotImplementedError( f'Probability distribution {proba_dist} not implemented' ) while True: length = dist word = [random.choice(alphabet)] * int(length) yield word
def categorical_gen(gen_it): yield from chain.from_iterable(gen_it())
[docs]def alphabet_to_bins(alphabet=DFLT_ALPHABET): """ Returns a dictionary matching each word in alphabet to bins of size 10.0 ranging from 0 to 10 * len(alphabet) >>> alphabet_to_bins() {'a': (0.0, 10.0), 'b': (10.0, 20.0), 'c': (20.0, 30.0), 'd': (30.0, 40.0), 'e': (40.0, 50.0)} """ length = len(alphabet) low = 0.0 high = 10.0 * length bins = np.linspace(low, high, length + 1) # print(bins) return {letter: (bins[i], bins[i + 1]) for i, letter in enumerate(alphabet)}
# def context_to_signal(cat_gen, dict_symbol_to_interval) # symbol = next(cat_gen) # low, hi = dict_symbol_to_interval[symbol] # res = np.random.uniform(low=0.0, high=1.0)
[docs]def call_repeatedly(func, *args, **kwargs): """ Returns a generator that calls func repeatedly with the given args and kwargs >>> gen = call_repeatedly(alphabet_to_bins) >>> assert next(gen) == {'a': (0.0, 10.0), 'b': (10.0, 20.0), 'c': (20.0, 30.0), 'd': (30.0, 40.0), 'e': (40.0, 50.0)} >>> next(gen) {'a': (0.0, 10.0), 'b': (10.0, 20.0), 'c': (20.0, 30.0), 'd': (30.0, 40.0), 'e': (40.0, 50.0)} """ while True: yield func(*args, **kwargs)
[docs]def bernoulli(p_out=0.1): """ Returns a random sample of a bernoulli distribution with probability p_out >>> assert bernoulli(0) == 0 >>> assert bernoulli(1) == 1 """ a = [0, 1] p = [1.0 - p_out, p_out] return int(np.random.choice(a, size=1, replace=True, p=p))
[docs]def bernoulli_gen(p_out=0.5): """ Returns a generator that returns random samples of a bernoulli distribution with probability p_out """ return call_repeatedly(bernoulli, p_out=p_out)
[docs]def inlier_outlier(segment, interval_size, outlier_status): """ >>> assert inlier_outlier([0,1], 10, 0) < 1 >>> low = np.random.randint(5, 10) >>> high = np.random.randint(low, 20) >>> assert inlier_outlier([low, high], low*high, 1) < low + low*high """ low, high = segment if outlier_status == 0: return np.random.uniform(low, high) else: low, high = high, low + interval_size return np.random.uniform(low, high) % interval_size
def signal(symbol_gen, outlier_gen, alphabet): while True: symb = next(symbol_gen) outlier = next(outlier_gen) length = len(alphabet) low, high = alphabet_to_bins(alphabet)[symb] yield inlier_outlier((low, high), length * 10, outlier) def create_session(symbol_gen, outlier_gen, alphabet, session_length=50): symbs = [] outliers = [] sigs = [] for _ in range(session_length): symb = next(symbol_gen) symbs.append(symb) outlier = next(outlier_gen) outliers.append(outlier) length = len(alphabet) low, high = alphabet_to_bins(alphabet)[symb] sigs.append(inlier_outlier((low, high), length * 10, outlier)) return symbs, outliers, sigs
[docs]def string_to_num(word): """ Converts a string to a list of numbers corresponding to the alphabetical order of the characters in the string >>> assert string_to_num('abc') == [0, 1, 2] >>> assert string_to_num('generator') == [2, 1, 3, 1, 5, 0, 6, 4, 5] """ all_letters = sorted(list(set(word))) return [all_letters.index(letter) for letter in word]
def session_to_df(session): symbs, outliers, sigs = session df = pd.DataFrame() df['symbols'] = string_to_num(''.join(symbs)) df['outliers'] = outliers df['signal'] = sigs return df