amachine.am_generator
1import random 2import numpy as np 3import matplotlib.pyplot as plt 4from pathlib import Path 5from copy import deepcopy 6 7from .am_create import star, star_join, isomorphic_to 8import random 9 10class Generator : 11 12 def __init__(self, random_seed=None ) -> None : 13 14 if random_seed : 15 16 np.random.seed( random_seed ) 17 random.seed( random_seed ) 18 19 self.vocabulary = { 20 21 # 1. Printable ASCII: space (32) through tilde (126) → 95 chars 22 "ASCII" : [chr(i) for i in range(32, 127)], 23 24 # 2. Greek capitals Α–Τ (19), skipping U+03A2 (undefined slot) 25 "greek_upper" : [chr(i) for i in range(0x0391, 0x03A5) if i != 0x03A2][:19], 26 27 # 3. Greek smalls α–τ (19), skipping U+03C2 (ς, final-sigma variant) 28 "greek_lower" : [chr(i) for i in range(0x03B1, 0x03C5) if i != 0x03C2][:19], 29 30 # 4. Geometric Shapes block: U+25A0–U+25FF → 96 chars 31 "geometric" : [chr(i) for i in range(0x25A0, 0x2600)] 32 } 33 34 self.exit_symbol = self.vocabulary[ "geometric" ][ 0 ] 35 36 def generate( 37 self, n_states : int, 38 n_symbols, 39 n_modes : int, 40 mode_alphas : list[float], 41 residency_factor : float, 42 levels : int, 43 output_dir : Path ) : 44 45 # 0-9 ASCII + lowercase letters + uppercase letters 46 normal_symbols = [ chr(i) for i in range( 48, 58 ) ] 47 normal_symbols += [ chr(i) for i in range( 97, 123 ) ] 48 normal_symbols += [ chr(i) for i in range( 65, 91 ) ] 49 enter_symbols = self.vocabulary[ "greek_lower" ] + self.vocabulary[ "greek_upper" ] 50 51 n_gen = n_modes*2 52 53 # n_gen^levels leaves — correct for a full n-ary tree 54 HMMs = [ 55 star( 56 exit_symbol=self.exit_symbol, 57 enter_symbols=enter_symbols[ 0:n_modes*2 ], 58 normal_symbols=normal_symbols, 59 n_modes=n_modes, 60 n_isomorphic=2, 61 randomness=0.3, 62 connectedness=0.5, 63 residency_factor=residency_factor, 64 n_normal_symbols=n_symbols, 65 t_states_per_HMM=n_states 66 ) 67 for i in range( n_gen**levels ) 68 ] 69 70 es_per_lev = n_gen 71 es_offset = n_modes*2 72 73 # Reduce bottom-up, levels-1 times: 74 # n_gen^levels → n_gen^(levels-1) → ... → n_gen 75 for i in range( levels - 1 ) : 76 next_level = [] 77 x = 0 78 79 es = enter_symbols[ es_offset + es_per_lev*i : es_offset+es_per_lev*(i+1) ] 80 81 for k in range( len(HMMs) // n_gen ) : # shrinks each pass 82 m = star_join( 83 exit_symbol=self.exit_symbol, 84 enter_symbols=es, 85 HMMs=HMMs[ x:x+n_gen ], 86 mode_residency_factor=0.5 87 ) 88 next_level.append( m ) 89 x += n_gen 90 HMMs = deepcopy( next_level ) 91 92 # HMMs now has exactly n_gen elements — join into root 93 return star_join( 94 exit_symbol=self.exit_symbol, 95 enter_symbols=normal_symbols, 96 HMMs=HMMs, 97 mode_residency_factor=0.5 98 ) 99 100if __name__ == "__main__": 101 102 n_modes = 1 103 alpha = 0.3 104 residency_factor = 0.5 105 n_symbols = 4 106 n_states = 27 107 random_seed=42 108 109 generator = Generator( random_seed=random_seed ) 110 111 path = Path("../data/aM_7-mode-iso" ) 112 path.mkdir(parents=True, exist_ok=True) 113 114 aM = generator.generate( 115 n_states=n_states, 116 n_symbols=n_symbols, 117 n_modes=n_modes, 118 mode_alphas=[ alpha ]*n_modes, 119 residency_factor=residency_factor, 120 levels=3, 121 output_dir=path ) 122 123 aM.minimize(retain_names=True) 124 print( aM.is_epsilon_HMM() ) 125 126 aM.draw_graph(output_dir="./") 127 128 print( f"h_mu : {aM.h_mu()}" ) 129 print( f"C_mu : {aM.C_mu()}" ) 130 # print( f"E : {aM.E()}" ) 131 132 # aM.to_q_weighted() 133 # print( aM.is_epsilon_HMM( verbose=True) ) 134 # aM.draw_graph(output_dir="./") 135 136 # p = aM.get_fractional_stationary_distribution() 137 138 # rm = aM.get_reverse() 139 140 # mspq = aM.get_msp_qw( exact_state_cap=5000 ) 141 # print( "Done" ) 142 143 # print( aM.is_topological_epsilon_HMM() ) 144 # print( aM.is_epsilon_HMM() ) 145 # aM.draw_graph(output_dir="./") 146 147 # aM.minimize(retain_names=True) 148 # print( aM.is_topological_epsilon_HMM() ) 149 # print( aM.is_epsilon_HMM() ) 150 # aM.draw_graph(output_dir="./") 151 152 # aM.save_config( path, with_complexity=True, with_block_convergence=True ) 153 154 # data = aM.generate_data( 155 # file_prefix=path / "train", 156 # n_gen=1_000_000_000, 157 # include_states=True, 158 # isomorphic_shifts=set({1}), 159 # random_seed=random_seed ) 160 161 # aM.generate_data( 162 # file_prefix=path / "test", 163 # n_gen=250_000_000, 164 # include_states=True, 165 # isomorphic_shifts=set({1}), 166 # random_seed=random_seed )
class
Generator:
11class Generator : 12 13 def __init__(self, random_seed=None ) -> None : 14 15 if random_seed : 16 17 np.random.seed( random_seed ) 18 random.seed( random_seed ) 19 20 self.vocabulary = { 21 22 # 1. Printable ASCII: space (32) through tilde (126) → 95 chars 23 "ASCII" : [chr(i) for i in range(32, 127)], 24 25 # 2. Greek capitals Α–Τ (19), skipping U+03A2 (undefined slot) 26 "greek_upper" : [chr(i) for i in range(0x0391, 0x03A5) if i != 0x03A2][:19], 27 28 # 3. Greek smalls α–τ (19), skipping U+03C2 (ς, final-sigma variant) 29 "greek_lower" : [chr(i) for i in range(0x03B1, 0x03C5) if i != 0x03C2][:19], 30 31 # 4. Geometric Shapes block: U+25A0–U+25FF → 96 chars 32 "geometric" : [chr(i) for i in range(0x25A0, 0x2600)] 33 } 34 35 self.exit_symbol = self.vocabulary[ "geometric" ][ 0 ] 36 37 def generate( 38 self, n_states : int, 39 n_symbols, 40 n_modes : int, 41 mode_alphas : list[float], 42 residency_factor : float, 43 levels : int, 44 output_dir : Path ) : 45 46 # 0-9 ASCII + lowercase letters + uppercase letters 47 normal_symbols = [ chr(i) for i in range( 48, 58 ) ] 48 normal_symbols += [ chr(i) for i in range( 97, 123 ) ] 49 normal_symbols += [ chr(i) for i in range( 65, 91 ) ] 50 enter_symbols = self.vocabulary[ "greek_lower" ] + self.vocabulary[ "greek_upper" ] 51 52 n_gen = n_modes*2 53 54 # n_gen^levels leaves — correct for a full n-ary tree 55 HMMs = [ 56 star( 57 exit_symbol=self.exit_symbol, 58 enter_symbols=enter_symbols[ 0:n_modes*2 ], 59 normal_symbols=normal_symbols, 60 n_modes=n_modes, 61 n_isomorphic=2, 62 randomness=0.3, 63 connectedness=0.5, 64 residency_factor=residency_factor, 65 n_normal_symbols=n_symbols, 66 t_states_per_HMM=n_states 67 ) 68 for i in range( n_gen**levels ) 69 ] 70 71 es_per_lev = n_gen 72 es_offset = n_modes*2 73 74 # Reduce bottom-up, levels-1 times: 75 # n_gen^levels → n_gen^(levels-1) → ... → n_gen 76 for i in range( levels - 1 ) : 77 next_level = [] 78 x = 0 79 80 es = enter_symbols[ es_offset + es_per_lev*i : es_offset+es_per_lev*(i+1) ] 81 82 for k in range( len(HMMs) // n_gen ) : # shrinks each pass 83 m = star_join( 84 exit_symbol=self.exit_symbol, 85 enter_symbols=es, 86 HMMs=HMMs[ x:x+n_gen ], 87 mode_residency_factor=0.5 88 ) 89 next_level.append( m ) 90 x += n_gen 91 HMMs = deepcopy( next_level ) 92 93 # HMMs now has exactly n_gen elements — join into root 94 return star_join( 95 exit_symbol=self.exit_symbol, 96 enter_symbols=normal_symbols, 97 HMMs=HMMs, 98 mode_residency_factor=0.5 99 )
Generator(random_seed=None)
13 def __init__(self, random_seed=None ) -> None : 14 15 if random_seed : 16 17 np.random.seed( random_seed ) 18 random.seed( random_seed ) 19 20 self.vocabulary = { 21 22 # 1. Printable ASCII: space (32) through tilde (126) → 95 chars 23 "ASCII" : [chr(i) for i in range(32, 127)], 24 25 # 2. Greek capitals Α–Τ (19), skipping U+03A2 (undefined slot) 26 "greek_upper" : [chr(i) for i in range(0x0391, 0x03A5) if i != 0x03A2][:19], 27 28 # 3. Greek smalls α–τ (19), skipping U+03C2 (ς, final-sigma variant) 29 "greek_lower" : [chr(i) for i in range(0x03B1, 0x03C5) if i != 0x03C2][:19], 30 31 # 4. Geometric Shapes block: U+25A0–U+25FF → 96 chars 32 "geometric" : [chr(i) for i in range(0x25A0, 0x2600)] 33 } 34 35 self.exit_symbol = self.vocabulary[ "geometric" ][ 0 ]
def
generate( self, n_states: int, n_symbols, n_modes: int, mode_alphas: list[float], residency_factor: float, levels: int, output_dir: pathlib.Path):
37 def generate( 38 self, n_states : int, 39 n_symbols, 40 n_modes : int, 41 mode_alphas : list[float], 42 residency_factor : float, 43 levels : int, 44 output_dir : Path ) : 45 46 # 0-9 ASCII + lowercase letters + uppercase letters 47 normal_symbols = [ chr(i) for i in range( 48, 58 ) ] 48 normal_symbols += [ chr(i) for i in range( 97, 123 ) ] 49 normal_symbols += [ chr(i) for i in range( 65, 91 ) ] 50 enter_symbols = self.vocabulary[ "greek_lower" ] + self.vocabulary[ "greek_upper" ] 51 52 n_gen = n_modes*2 53 54 # n_gen^levels leaves — correct for a full n-ary tree 55 HMMs = [ 56 star( 57 exit_symbol=self.exit_symbol, 58 enter_symbols=enter_symbols[ 0:n_modes*2 ], 59 normal_symbols=normal_symbols, 60 n_modes=n_modes, 61 n_isomorphic=2, 62 randomness=0.3, 63 connectedness=0.5, 64 residency_factor=residency_factor, 65 n_normal_symbols=n_symbols, 66 t_states_per_HMM=n_states 67 ) 68 for i in range( n_gen**levels ) 69 ] 70 71 es_per_lev = n_gen 72 es_offset = n_modes*2 73 74 # Reduce bottom-up, levels-1 times: 75 # n_gen^levels → n_gen^(levels-1) → ... → n_gen 76 for i in range( levels - 1 ) : 77 next_level = [] 78 x = 0 79 80 es = enter_symbols[ es_offset + es_per_lev*i : es_offset+es_per_lev*(i+1) ] 81 82 for k in range( len(HMMs) // n_gen ) : # shrinks each pass 83 m = star_join( 84 exit_symbol=self.exit_symbol, 85 enter_symbols=es, 86 HMMs=HMMs[ x:x+n_gen ], 87 mode_residency_factor=0.5 88 ) 89 next_level.append( m ) 90 x += n_gen 91 HMMs = deepcopy( next_level ) 92 93 # HMMs now has exactly n_gen elements — join into root 94 return star_join( 95 exit_symbol=self.exit_symbol, 96 enter_symbols=normal_symbols, 97 HMMs=HMMs, 98 mode_residency_factor=0.5 99 )