amachine.am_generator

  1import random
  2import numpy as np
  3import matplotlib.pyplot as plt
  4from pathlib import Path
  5from copy import deepcopy
  6
  7from .am_create import star, star_join, isomorphic_to
  8import random
  9
 10class Generator :
 11
 12	def __init__(self, random_seed=None ) -> None :
 13		
 14		if random_seed :
 15		
 16			np.random.seed( random_seed ) 
 17			random.seed( random_seed )
 18
 19		self.vocabulary = { 
 20			
 21			# 1. Printable ASCII: space (32) through tilde (126) → 95 chars
 22			"ASCII" : [chr(i) for i in range(32, 127)],
 23
 24			# 2. Greek capitals Α–Τ (19), skipping U+03A2 (undefined slot)
 25			"greek_upper" : [chr(i) for i in range(0x0391, 0x03A5) if i != 0x03A2][:19],
 26
 27			# 3. Greek smalls α–τ (19), skipping U+03C2 (ς, final-sigma variant)
 28			"greek_lower" : [chr(i) for i in range(0x03B1, 0x03C5) if i != 0x03C2][:19],
 29
 30			# 4. Geometric Shapes block: U+25A0–U+25FF → 96 chars
 31			"geometric" : [chr(i) for i in range(0x25A0, 0x2600)]
 32		}
 33
 34		self.exit_symbol = self.vocabulary[ "geometric" ][ 0 ]
 35
 36	def generate( 
 37		self, n_states : int, 
 38		n_symbols, 
 39		n_modes : int,
 40		mode_alphas : list[float],
 41		residency_factor : float,
 42		levels : int,
 43		output_dir : Path ) :
 44		
 45		# 0-9 ASCII + lowercase letters + uppercase letters
 46		normal_symbols  = [ chr(i) for i in range( 48,  58 ) ]
 47		normal_symbols += [ chr(i) for i in range( 97, 123 ) ]
 48		normal_symbols += [ chr(i) for i in range( 65,  91 ) ]
 49		enter_symbols = self.vocabulary[ "greek_lower" ] + self.vocabulary[ "greek_upper" ]
 50
 51		n_gen = n_modes*2
 52
 53		# n_gen^levels leaves — correct for a full n-ary tree
 54		HMMs = [
 55		    star(
 56		        exit_symbol=self.exit_symbol,
 57		        enter_symbols=enter_symbols[ 0:n_modes*2 ],
 58		        normal_symbols=normal_symbols,
 59		        n_modes=n_modes,
 60		        n_isomorphic=2,
 61		        randomness=0.3,
 62		        connectedness=0.5,
 63		        residency_factor=residency_factor,
 64		        n_normal_symbols=n_symbols,
 65		        t_states_per_HMM=n_states
 66		    )
 67		    for i in range( n_gen**levels )
 68		]
 69
 70		es_per_lev = n_gen
 71		es_offset = n_modes*2
 72
 73		# Reduce bottom-up, levels-1 times:
 74		#   n_gen^levels → n_gen^(levels-1) → ... → n_gen
 75		for i in range( levels - 1 ) :
 76		    next_level = []
 77		    x = 0
 78
 79		    es = enter_symbols[ es_offset + es_per_lev*i : es_offset+es_per_lev*(i+1) ]
 80
 81		    for k in range( len(HMMs) // n_gen ) :  # shrinks each pass
 82		        m = star_join(
 83		            exit_symbol=self.exit_symbol,
 84		            enter_symbols=es,
 85		            HMMs=HMMs[ x:x+n_gen ],
 86		            mode_residency_factor=0.5
 87		        )
 88		        next_level.append( m )
 89		        x += n_gen
 90		    HMMs = deepcopy( next_level )
 91
 92		# HMMs now has exactly n_gen elements — join into root
 93		return star_join(
 94		    exit_symbol=self.exit_symbol,
 95		    enter_symbols=normal_symbols,
 96		    HMMs=HMMs,
 97		    mode_residency_factor=0.5
 98		)
 99
100if __name__ == "__main__":
101
102	n_modes = 1
103	alpha = 0.3
104	residency_factor = 0.5
105	n_symbols = 4
106	n_states = 27
107	random_seed=42
108
109	generator = Generator( random_seed=random_seed )
110
111	path = Path("../data/aM_7-mode-iso" )
112	path.mkdir(parents=True, exist_ok=True)
113
114	aM = generator.generate( 
115		n_states=n_states, 
116		n_symbols=n_symbols, 
117		n_modes=n_modes, 
118		mode_alphas=[ alpha ]*n_modes,
119		residency_factor=residency_factor,
120		levels=3,
121		output_dir=path )
122
123	aM.minimize(retain_names=True)
124	print( aM.is_epsilon_HMM() )
125
126	aM.draw_graph(output_dir="./")
127
128	print( f"h_mu : {aM.h_mu()}" )
129	print( f"C_mu : {aM.C_mu()}" )
130	# print( f"E    : {aM.E()}" )
131
132	# aM.to_q_weighted()
133	# print( aM.is_epsilon_HMM( verbose=True) )
134	# aM.draw_graph(output_dir="./")
135
136	# p = aM.get_fractional_stationary_distribution()
137
138	# rm = aM.get_reverse()
139
140	# mspq = aM.get_msp_qw( exact_state_cap=5000 )
141	# print( "Done" )
142
143	# print( aM.is_topological_epsilon_HMM() )
144	# print( aM.is_epsilon_HMM() )
145	# aM.draw_graph(output_dir="./")
146
147	# aM.minimize(retain_names=True)
148	# print( aM.is_topological_epsilon_HMM() )
149	# print( aM.is_epsilon_HMM() )
150	# aM.draw_graph(output_dir="./")
151
152	# aM.save_config( path, with_complexity=True, with_block_convergence=True )
153
154	# data = aM.generate_data( 
155	# 	file_prefix=path / "train", 
156	# 	n_gen=1_000_000_000, 
157	# 	include_states=True,
158	# 	isomorphic_shifts=set({1}),
159	# 	random_seed=random_seed )
160
161	# aM.generate_data( 
162	# 	file_prefix=path / "test", 
163	# 	n_gen=250_000_000, 
164	# 	include_states=True,
165	# 	isomorphic_shifts=set({1}),
166	# 	random_seed=random_seed )
class Generator:
11class Generator :
12
13	def __init__(self, random_seed=None ) -> None :
14		
15		if random_seed :
16		
17			np.random.seed( random_seed ) 
18			random.seed( random_seed )
19
20		self.vocabulary = { 
21			
22			# 1. Printable ASCII: space (32) through tilde (126) → 95 chars
23			"ASCII" : [chr(i) for i in range(32, 127)],
24
25			# 2. Greek capitals Α–Τ (19), skipping U+03A2 (undefined slot)
26			"greek_upper" : [chr(i) for i in range(0x0391, 0x03A5) if i != 0x03A2][:19],
27
28			# 3. Greek smalls α–τ (19), skipping U+03C2 (ς, final-sigma variant)
29			"greek_lower" : [chr(i) for i in range(0x03B1, 0x03C5) if i != 0x03C2][:19],
30
31			# 4. Geometric Shapes block: U+25A0–U+25FF → 96 chars
32			"geometric" : [chr(i) for i in range(0x25A0, 0x2600)]
33		}
34
35		self.exit_symbol = self.vocabulary[ "geometric" ][ 0 ]
36
37	def generate( 
38		self, n_states : int, 
39		n_symbols, 
40		n_modes : int,
41		mode_alphas : list[float],
42		residency_factor : float,
43		levels : int,
44		output_dir : Path ) :
45		
46		# 0-9 ASCII + lowercase letters + uppercase letters
47		normal_symbols  = [ chr(i) for i in range( 48,  58 ) ]
48		normal_symbols += [ chr(i) for i in range( 97, 123 ) ]
49		normal_symbols += [ chr(i) for i in range( 65,  91 ) ]
50		enter_symbols = self.vocabulary[ "greek_lower" ] + self.vocabulary[ "greek_upper" ]
51
52		n_gen = n_modes*2
53
54		# n_gen^levels leaves — correct for a full n-ary tree
55		HMMs = [
56		    star(
57		        exit_symbol=self.exit_symbol,
58		        enter_symbols=enter_symbols[ 0:n_modes*2 ],
59		        normal_symbols=normal_symbols,
60		        n_modes=n_modes,
61		        n_isomorphic=2,
62		        randomness=0.3,
63		        connectedness=0.5,
64		        residency_factor=residency_factor,
65		        n_normal_symbols=n_symbols,
66		        t_states_per_HMM=n_states
67		    )
68		    for i in range( n_gen**levels )
69		]
70
71		es_per_lev = n_gen
72		es_offset = n_modes*2
73
74		# Reduce bottom-up, levels-1 times:
75		#   n_gen^levels → n_gen^(levels-1) → ... → n_gen
76		for i in range( levels - 1 ) :
77		    next_level = []
78		    x = 0
79
80		    es = enter_symbols[ es_offset + es_per_lev*i : es_offset+es_per_lev*(i+1) ]
81
82		    for k in range( len(HMMs) // n_gen ) :  # shrinks each pass
83		        m = star_join(
84		            exit_symbol=self.exit_symbol,
85		            enter_symbols=es,
86		            HMMs=HMMs[ x:x+n_gen ],
87		            mode_residency_factor=0.5
88		        )
89		        next_level.append( m )
90		        x += n_gen
91		    HMMs = deepcopy( next_level )
92
93		# HMMs now has exactly n_gen elements — join into root
94		return star_join(
95		    exit_symbol=self.exit_symbol,
96		    enter_symbols=normal_symbols,
97		    HMMs=HMMs,
98		    mode_residency_factor=0.5
99		)
Generator(random_seed=None)
13	def __init__(self, random_seed=None ) -> None :
14		
15		if random_seed :
16		
17			np.random.seed( random_seed ) 
18			random.seed( random_seed )
19
20		self.vocabulary = { 
21			
22			# 1. Printable ASCII: space (32) through tilde (126) → 95 chars
23			"ASCII" : [chr(i) for i in range(32, 127)],
24
25			# 2. Greek capitals Α–Τ (19), skipping U+03A2 (undefined slot)
26			"greek_upper" : [chr(i) for i in range(0x0391, 0x03A5) if i != 0x03A2][:19],
27
28			# 3. Greek smalls α–τ (19), skipping U+03C2 (ς, final-sigma variant)
29			"greek_lower" : [chr(i) for i in range(0x03B1, 0x03C5) if i != 0x03C2][:19],
30
31			# 4. Geometric Shapes block: U+25A0–U+25FF → 96 chars
32			"geometric" : [chr(i) for i in range(0x25A0, 0x2600)]
33		}
34
35		self.exit_symbol = self.vocabulary[ "geometric" ][ 0 ]
vocabulary
exit_symbol
def generate( self, n_states: int, n_symbols, n_modes: int, mode_alphas: list[float], residency_factor: float, levels: int, output_dir: pathlib.Path):
37	def generate( 
38		self, n_states : int, 
39		n_symbols, 
40		n_modes : int,
41		mode_alphas : list[float],
42		residency_factor : float,
43		levels : int,
44		output_dir : Path ) :
45		
46		# 0-9 ASCII + lowercase letters + uppercase letters
47		normal_symbols  = [ chr(i) for i in range( 48,  58 ) ]
48		normal_symbols += [ chr(i) for i in range( 97, 123 ) ]
49		normal_symbols += [ chr(i) for i in range( 65,  91 ) ]
50		enter_symbols = self.vocabulary[ "greek_lower" ] + self.vocabulary[ "greek_upper" ]
51
52		n_gen = n_modes*2
53
54		# n_gen^levels leaves — correct for a full n-ary tree
55		HMMs = [
56		    star(
57		        exit_symbol=self.exit_symbol,
58		        enter_symbols=enter_symbols[ 0:n_modes*2 ],
59		        normal_symbols=normal_symbols,
60		        n_modes=n_modes,
61		        n_isomorphic=2,
62		        randomness=0.3,
63		        connectedness=0.5,
64		        residency_factor=residency_factor,
65		        n_normal_symbols=n_symbols,
66		        t_states_per_HMM=n_states
67		    )
68		    for i in range( n_gen**levels )
69		]
70
71		es_per_lev = n_gen
72		es_offset = n_modes*2
73
74		# Reduce bottom-up, levels-1 times:
75		#   n_gen^levels → n_gen^(levels-1) → ... → n_gen
76		for i in range( levels - 1 ) :
77		    next_level = []
78		    x = 0
79
80		    es = enter_symbols[ es_offset + es_per_lev*i : es_offset+es_per_lev*(i+1) ]
81
82		    for k in range( len(HMMs) // n_gen ) :  # shrinks each pass
83		        m = star_join(
84		            exit_symbol=self.exit_symbol,
85		            enter_symbols=es,
86		            HMMs=HMMs[ x:x+n_gen ],
87		            mode_residency_factor=0.5
88		        )
89		        next_level.append( m )
90		        x += n_gen
91		    HMMs = deepcopy( next_level )
92
93		# HMMs now has exactly n_gen elements — join into root
94		return star_join(
95		    exit_symbol=self.exit_symbol,
96		    enter_symbols=normal_symbols,
97		    HMMs=HMMs,
98		    mode_residency_factor=0.5
99		)