# SMSD 1000+ Molecule Benchmark — Diverse Molecules
# Format: SMILES<TAB>name
# Lines 1-50: Tiny fragments (1-5 atoms)
# Lines 51-150: Small aromatics (6-12 atoms)
# Lines 151-350: Drug-like (13-30 atoms) — FDA approved drugs
# Lines 351-550: Complex drugs (31-50 atoms) — kinase inhibitors, antibiotics
# Lines 551-700: Large molecules (51-100 atoms) — macrolides, peptides
# Lines 701-800: Very large (100+ atoms) — vancomycin, cyclosporin, polymers
# Lines 801-900: Tautomer pairs — from Tautobase and literature
# Lines 901-950: Symmetric molecules — adamantane, cubane, fullerenes, coronene
# Lines 951-1050: Edge cases — charged, isotopes, radicals, organometallics
#
# ============================================================
# SECTION 1: Tiny fragments (1-5 atoms) — Lines 1-50
# ============================================================
C	methane
CC	ethane
CCC	propane
CCCC	n-butane
CC(C)C	isobutane
C=C	ethylene
C#C	acetylene
C=CC	propene
CC=CC	2-butene
C=C=C	allene
CO	methanol
CCO	ethanol
C=O	formaldehyde
CC=O	acetaldehyde
CC(=O)C	acetone
OC=O	formic-acid
CC(=O)O	acetic-acid
C(=O)O	formic-acid-v2
CN	methylamine
CCN	ethylamine
C(=O)N	formamide
CC(=O)N	acetamide
CS	methanethiol
CCS	ethanethiol
CF	fluoromethane
CCl	chloromethane
CBr	bromomethane
CI	iodomethane
O	water
OO	hydrogen-peroxide
N	ammonia
NN	hydrazine
S	hydrogen-sulfide
NO	hydroxylamine
O=CO	formic-acid-keto
C(=O)Cl	formyl-chloride
CC#N	acetonitrile
C#N	hydrogen-cyanide
[O-][N+](=O)C	nitromethane
C(F)(F)F	trifluoromethane
C(Cl)(Cl)Cl	chloroform
C(F)(F)(F)F	tetrafluoromethane
CC(=O)OC	methyl-acetate
OCC	ethanol-v2
NCC	ethylamine-v2
SCC	ethanethiol-v2
C1CC1	cyclopropane
C1CCC1	cyclobutane
C1CCCC1	cyclopentane
C=CC=C	1,3-butadiene
# ============================================================
# SECTION 2: Small aromatics (6-12 atoms) — Lines 51-150
# ============================================================
c1ccccc1	benzene
Cc1ccccc1	toluene
CCc1ccccc1	ethylbenzene
c1ccc(cc1)C	toluene-v2
c1ccc(cc1)O	phenol
c1ccc(cc1)N	aniline
c1ccc(cc1)F	fluorobenzene
c1ccc(cc1)Cl	chlorobenzene
c1ccc(cc1)Br	bromobenzene
c1ccc(cc1)I	iodobenzene
c1ccc(cc1)C=O	benzaldehyde
c1ccc(cc1)C(=O)O	benzoic-acid
c1ccc(cc1)C#N	benzonitrile
c1ccc(cc1)OC	anisole
c1ccc(cc1)[N+](=O)[O-]	nitrobenzene
c1ccccc1c1ccccc1	biphenyl
c1ccc2ccccc2c1	naphthalene
c1cc2ccccc2cc1	naphthalene-v2
c1ccc2c(c1)[nH]c1ccccc12	indole
c1ccc2[nH]ccc2c1	indole-v2
c1ccncc1	pyridine
c1ccoc1	furan
c1ccsc1	thiophene
c1cc[nH]c1	pyrrole
c1cnc[nH]1	imidazole
c1ccnnc1	pyridazine
c1ccncn1	pyrimidine
c1ccnc(n1)N	2-aminopyrimidine
c1cnc2ccccc2n1	quinoline
c1ccc2ncccc2c1	quinoline-v2
c1ccc2cnccc2c1	isoquinoline
c1cc(ccc1O)O	hydroquinone
c1cc(c(cc1O)O)O	pyrogallol
c1ccc(c(c1)O)O	catechol
c1cc(c(cc1Cl)Cl)Cl	1,2,4-trichlorobenzene
c1cc2c(cc1)ccc1ccccc12	phenanthrene
c1ccc2cc3ccccc3cc2c1	anthracene
Oc1ccccc1O	catechol-v2
CC(=O)c1ccccc1	acetophenone
c1ccc(cc1)CC	ethylbenzene-v2
c1ccc(-c2ccccc2)cc1	biphenyl-v2
C1=Cc2ccccc2C1	indene
c1cc(oc1C=O)O	5-hydroxyfurfural
c1coc(-c2ccccc2)c1	2-phenylfuran
c1csc(-c2ccccc2)c1	2-phenylthiophene
c1cc(-c2ccccc2)[nH]c1	2-phenylpyrrole
c1cnc(-c2ccccc2)[nH]1	2-phenylimidazole
c1ccnc(-c2ccccc2)c1	2-phenylpyridine
c1cc2c(s1)cccc2	benzothiophene
c1cc2c(o1)cccc2	benzofuran
c1cnc2ccccc2c1	quinazoline-partial
c1ccc2c(c1)ncs2	benzothiazole
c1ccc2c(c1)nco2	benzoxazole
c1ccc2c(c1)nc[nH]2	benzimidazole
c1ccc2c(c1)ccn2	indoline-partial
c1ccc(cc1)C(=O)c1ccccc1	benzophenone
c1ccc(cc1)N=Nc1ccccc1	azobenzene
C(c1ccccc1)(c1ccccc1)O	benzhydrol
c1cc2cc3ccc4cccc5ccc6cc1c2c3c4c56	triphenylene
c1ccc(-c2cccc(-c3ccccc3)c2)cc1	m-terphenyl
c1cccc(c1)O	m-cresol-partial
c1cc(ccc1N)C	p-toluidine
Oc1ccc(cc1)C(=O)O	4-hydroxybenzoic-acid
c1cc(ncc1F)N	2-amino-5-fluoropyridine
c1cnc(nc1N)N	2,4-diaminopyrimidine
c1ccc2oc(=O)ccc2c1	coumarin
c1cc(=O)[nH]c(=O)[nH]1	uracil
c1cc(=O)oc(c1)O	4-hydroxy-2H-pyranone
c1ccc(cc1)/C=C/c1ccccc1	trans-stilbene
c1ccc2c(c1)C=CC=C2	azulene-partial
c1ccc(cc1)S	thiophenol
c1ccc(cc1)P	phenylphosphine
c1ccc(cc1)B(O)O	phenylboronic-acid
c1cc(c(c(c1)F)F)F	1,2,3-trifluorobenzene
c1cc2c(cc1N)[nH]nn2	5-aminobenzotriazole
c1ccc(-c2nnn[nH]2)cc1	5-phenyltetrazole
c1nnc(-c2ccccc2)[nH]1	3-phenyl-1,2,4-triazole
c1cccc(c1)c1ccncc1	3-phenylpyridine
c1cccc(c1)c1ccccn1	2-(3-pyridyl)pyridine
c1ccc(-c2ccco2)cc1	2-phenylfuran-v2
c1ccc(-c2cccs2)cc1	2-phenylthiophene-v2
c1ccnc(c1)c1ccccn1	2,2-bipyridine
c1ccc2[nH]c(-c3ccccc3)nc2c1	2-phenylbenzimidazole
c1ccc(cc1)c1ccc(cc1)O	4-hydroxybiphenyl
CCc1cc(C)c(c(c1)C)CC	mesitylene-variant
c1ccc2c(c1)ccc1ccc3ccccc3c12	chrysene
c1cc2ccc3cccc4ccc(c1)c2c34	pyrene
Oc1cc(O)c2c(c1)oc(-c1ccc(O)cc1)c(c2=O)O	luteolin
Oc1cc(O)c2c(c1)oc(-c1ccccc1)cc2=O	chrysin
c1ccc(cc1)Oc1ccccc1	diphenyl-ether
c1ccc(cc1)Sc1ccccc1	diphenyl-sulfide
c1ccnc2ccc3cccnc3c12	1,10-phenanthroline
c1cn2ccnc2cn1	purine-partial
Nc1ncnc2[nH]cnc12	adenine
Nc1nc2[nH]cnc2c(=O)[nH]1	guanine
O=c1cc[nH]c(=O)[nH]1	uracil-v2
Cc1c[nH]c(=O)[nH]c1=O	thymine
Nc1cc[nH]c(=O)n1	cytosine
c1cccc2nsnc12	2,1,3-benzothiadiazole
c1cccc2nonc12	2,1,3-benzoxadiazole
c1ccc2nonc2c1	benzofurazan-v2
c1ccc2nsnc2c1	benzothiadiazole-v2
c1cccc2[nH]nnc12	benzotriazole
c1ccc2c(c1)cn[nH]2	indazole
c1ccc2c(c1)ccn2C	N-methylindole
c1ccc2c(c1)c1ccccc1n2	carbazole-partial
c1ccc(-c2ccccn2)c(F)c1	2-(2-fluorophenyl)pyridine
c1cnc(nc1)c1nccnc1	2,2-bipyrimidine
c1cnc(nc1)Cl	2-chloropyrimidine
c1cnc2c(n1)cccc2	quinazoline
C1=CC2=C3C=CC=CC3=CC=C2C=C1	acenaphthylene
# ============================================================
# SECTION 3: Drug-like molecules (13-30 atoms) — FDA approved drugs — Lines 151-350
# ============================================================
CC(=O)Oc1ccccc1C(=O)O	aspirin
CC(=O)Nc1ccc(O)cc1	acetaminophen
CC(C)Cc1ccc(cc1)C(C)C(=O)O	ibuprofen
COc1ccc2cc(ccc2c1)C(C)C(=O)O	naproxen
OC(=O)c1ccccc1O	salicylic-acid
Cn1cnc2c1c(=O)n(c(=O)n2C)C	caffeine
Cn1cnc2c1c(=O)[nH]c(=O)n2C	theophylline
Cn1c(=O)c2[nH]cnc2n(c1=O)C	theobromine
CC(=O)OC1CC(=O)OC(C1)(C)C	mevalonic-lactone-acetate
OC(=O)Cc1ccc(cc1)Cl	4-chlorophenylacetic-acid
OC(=O)c1cc(O)c(O)c(O)c1	gallic-acid
CC12CCC3C(C1CCC2O)CCC4=CC(=O)CCC34C	testosterone
CC12CCC3C(C1CCC2(C#C)O)CCC4=CC(=O)CCC34C	ethisterone
CC12CCC3C(C1CCC2O)CCC4CC(=O)CCC34C	5a-androstanedione-partial
CC12CCC3c4ccc(cc4CCC3C1CCC2O)O	estradiol
OC1(C#C)CCC2C3CCC4=CC(=O)CCC4C3CCC12C	norethindrone
OC(=O)c1cccnc1	nicotinic-acid
OC(=O)c1cc(O)c(O)c(O)c1O	digallic-acid-partial
CC(=O)Oc1cc(OC(C)=O)c2C(=O)CC(Oc2c1)c1ccc(OC(C)=O)cc1	naringenin-triacetate-partial
c1cc(c(cc1Cl)Cl)OCc1coc(n1)N	guanfacine
OC(=O)c1cc(=O)[nH]c(=O)[nH]1	orotic-acid
c1cc(ccc1C(=O)O)NCC(=O)O	4-aminohippuric-acid-partial
NC(=O)c1ccncc1	nicotinamide
OC(=O)CCCCC1SCC2NC(=O)NC12	biotin
NCCc1c[nH]c2ccc(O)cc12	serotonin
NCCc1ccc(O)c(O)c1	dopamine
NC(Cc1ccc(O)c(O)c1)C(=O)O	L-DOPA
NC(Cc1c[nH]c2ccccc12)C(=O)O	tryptophan
NC(Cc1ccc(O)cc1)C(=O)O	tyrosine
NC(CCCNC(=N)N)C(=O)O	arginine
NC(CS)C(=O)O	cysteine
NC(CC(=O)O)C(=O)O	aspartic-acid
NC(CCC(=O)O)C(=O)O	glutamic-acid
NC(CC(=O)N)C(=O)O	asparagine
NC(CCC(=O)N)C(=O)O	glutamine
NC(Cc1ccccc1)C(=O)O	phenylalanine
NC(Cc1cnc[nH]1)C(=O)O	histidine
CC(N)C(=O)O	alanine
CC(CC)C(N)C(=O)O	isoleucine
CC(C)CC(N)C(=O)O	leucine
CC(C)C(N)C(=O)O	valine
CSCCC(N)C(=O)O	methionine
NCC(=O)O	glycine
C1CC(NC1)C(=O)O	proline
NC(CO)C(=O)O	serine
NC(C(C)O)C(=O)O	threonine
NC(CCCCN)C(=O)O	lysine
c1cc2c(cc1Cl)nc([nH]2)NC(=O)OC(C)C	chlorpropham-partial
ClC(Cl)=C(Cl)Cl	tetrachloroethylene
Cc1ncc(COP(O)(O)=O)c(CN)c1O	pyridoxal-5-phosphate
OC(=O)C(O)C(O)C(O)C(O)CO	gluconic-acid
OCC1OC(O)C(O)C(O)C1O	glucose
OCC1OC(O)C(O)C(O)C1O	glucose-v2
OC1C(O)C(OC1(CO)O)CO	fructose
OCC1OC(OC2OC(CO)C(O)C(O)C2O)C(O)C(O)C1O	lactose
OCC1OC(OC2C(CO)OC(O)C(O)C2O)C(O)C(O)C1O	maltose
OCC1OC(OC2(CO)OC(CO)C(O)C2O)C(O)C(O)C1O	sucrose
CC(=O)Nc1nnc(s1)S(=O)(=O)N	acetazolamide
NS(=O)(=O)c1ccc(Cl)c(c1)C(F)(F)F	furosemide-fragment
NS(=O)(=O)c1cc2c(cc1Cl)NCNS2(=O)=O	hydrochlorothiazide
CC1=CC(=O)c2c(O1)cc(cc2O)O	dehydrogriseofulvin-partial
OC(=O)c1cc(O)ccc1O	gentisic-acid
CC(O)C(=O)O	lactic-acid
CC(=O)C(=O)O	pyruvic-acid
OC(=O)CC(O)(CC(=O)O)C(=O)O	citric-acid
OC(=O)CC(=O)CC(=O)O	3-oxoglutaric-acid
OC(=O)C=CC(=O)O	fumaric-acid
OC(=O)CC(O)C(=O)O	malic-acid
OC(=O)C(O)C(O)C(=O)O	tartaric-acid
OC(=O)CCC(=O)O	glutaric-acid
OC(=O)CCCCC(=O)O	adipic-acid
OC(=O)CCCCCCCC(=O)O	azelaic-acid
O=C(O)c1ccc(N=Nc2ccc(C(=O)O)cc2)cc1	azobenzene-4,4-dicarboxylic-acid
c1ccc(cc1)C(=O)Nc1ccc(cc1)O	4-hydroxybenzanilide
COC(=O)c1ccccc1OC(=O)C	aspirin-methyl-ester
CC(=O)OCC	ethyl-acetate
CCCCCCCCCCCC(=O)O	lauric-acid
CCCCCCCCCCCCCCCC(=O)O	palmitic-acid
CCCCCCCCCCCCCCCCCC(=O)O	stearic-acid
CCCCCCCCC=CCCCCCCCC(=O)O	oleic-acid
CCCCC=CCC=CCCCCCCCC(=O)O	linoleic-acid-partial
CCc1c(c2cc(cc(c2oc1=O)O)O)O	6-ethyl-7-hydroxycoumarin-partial
CC(=O)C1CCC2C3CCC4=CC(=O)CCC4(C)C3CCC12C	progesterone
OCC(NC(=O)C(Cl)Cl)C(O)c1ccc(cc1)[N+](=O)[O-]	chloramphenicol
NC(=O)c1ccc(nc1)C(=O)N	nicotinamide-deriv
OC(=O)CN(CC(=O)O)CC(=O)O	nitrilotriacetic-acid
c1ccc(cc1)C(O)(c1ccccc1)c1ccccc1	triphenylmethanol
CC(=O)Nc1ccc(cc1O)S(=O)(=O)N	sulfacetamide-partial
Nc1ccc(cc1)S(=O)(=O)Nc1ccccn1	sulfapyridine
Nc1ccc(cc1)S(=O)(=O)Nc1ncccn1	sulfadiazine
Nc1ccc(cc1)S(=O)(=O)Nc1cc(C)nn1C	sulfamethazine-partial
Nc1ccc(cc1)S(=O)(=O)Nc1ccnn1C	sulfamethoxazole-partial
Nc1ccc(cc1)S(=O)(=O)N	sulfanilamide
CCOC(=O)c1cc(OCC)c(N)cc1OCC	trimethoprim-partial
c1ccc(cc1)C(=O)c1ccc(CC(=O)O)cc1	fenbufen
COc1ccc(cc1OC)Cc1cnc2cc(Cl)ccc2n1	papaverine-partial
Clc1cc2c(cc1Cl)nc(n2)NC(=O)OCC	diuron-partial
CC12CCC3C(C1CCC2(O)C(=O)CO)CCC4=CC(=O)CCC34C	cortisol-partial
OC(=O)CN(CCN(CC(=O)O)CC(=O)O)CC(=O)O	EDTA
OC1C(O)C(O)C(CO)OC1O	galactose
NC1=NC(=O)N(C=C1)C1CC(O)C(CO)O1	cytidine
Nc1ncnc2n(C3CC(O)C(CO)O3)cnc12	adenosine
Nc1nc2n(cnc2c(=O)[nH]1)C1CC(O)C(CO)O1	guanosine
O=c1ccn(C2CC(O)C(CO)O2)c(=O)[nH]1	uridine
Cc1cn(C2CC(O)C(CO)O2)c(=O)[nH]c1=O	thymidine
Oc1cccc2c1nc(n2)C(=O)O	kynurenic-acid-partial
c1ccc(cc1)NC(=O)c1ccncc1	isonicotinanilide
CCN(CC)c1ccc2c(c1)oc(=O)c(c2O)C=O	fluorescent-dye-partial
OC(=O)c1cccc2ccccc12	1-naphthoic-acid
OC(=O)c1ccc2ccccc2c1	2-naphthoic-acid
c1ccc2c(c1)oc(-c1ccccc1)c(c2=O)O	flavonol
Oc1cc(O)c2c(c1)oc(-c1ccc(O)c(O)c1)c(O)c2=O	quercetin
Oc1cc(O)c2c(c1)oc(-c1ccc(O)cc1)cc2=O	apigenin
Oc1cc(O)c2c(c1)oc(-c1ccc(O)c(O)c1)cc2=O	eriodictyol-partial
CCOc1ccc(cc1)NC(=O)C	phenacetin
CCC(CC)COC(=O)c1ccc(N)cc1	benzocaine-deriv
c1ccc(c(c1)C(=O)O)NC2CC(=O)NC(=O)C2	thalidomide-fragment
CC(C)(C)NCC(O)c1ccc(O)c(O)c1	salbutamol
CCCC(CCC)C(=O)O	valproic-acid
CC(C)NCC(O)c1ccc(O)c(O)c1	isoprenaline
CC(C)(C)NCC(O)c1cc(O)cc(O)c1	terbutaline
O=C(O)c1ccc(-c2ccccc2)cc1	4-biphenylcarboxylic-acid
OC(=O)c1ccc(O)cc1	4-hydroxybenzoic-acid-v2
COc1ccc(C=O)cc1O	vanillin
COc1cc(C=O)ccc1O	isovanillin
COc1cc(C=CC(=O)O)ccc1O	ferulic-acid
COc1cc(CC=C)ccc1O	eugenol
CC(=O)Oc1ccc(cc1)OC(C)=O	hydroquinone-diacetate
OCc1ccc(CO)cc1	1,4-benzenedimethanol
CC(=O)c1ccc(O)cc1	4-hydroxyacetophenone
Nc1ccc(cc1)C(=O)Nc1ccccc1	4-aminobenzanilide
OC(=O)c1cccc(c1)C(F)(F)F	3-trifluoromethylbenzoic-acid
c1cc(c(cc1F)F)C(=O)O	2,4-difluorobenzoic-acid
NC(=N)c1ccc(cc1)OCC(O)CO	4-amidinophenyl-GPE
Clc1ccc(nc1)NC(=O)c1ccccc1	niclosamide-partial
OC(=O)c1cc(Cl)cc(Cl)c1O	3,5-dichlorosalicylic-acid
Nc1ccc2nc(N)sc2c1	2,6-diaminobenzothiazole
CC(C)NCC(O)COc1cccc2ccccc12	propranolol
CCNCC(O)COc1ccc(cc1)OCC	metoprolol-partial
CC(C)NCC(O)COc1ccc(cc1)CC(=O)N	atenolol
OC(COc1cccc2ccccc12)CNC(C)C	propranolol-v2
c1ccc(cc1)S(=O)(=O)c1ccc(N)cc1	dapsone
OC(=O)c1ccc([N+](=O)[O-])cc1	4-nitrobenzoic-acid
OC(=O)c1ccc(N)cc1	4-aminobenzoic-acid
OC(=O)c1ccc(NC=O)cc1	4-formaminobenzoic-acid
c1ccc(cc1)CC(=O)c1ccc(cc1)Cl	4-chlorodeoxybenzoin
OC(=O)c1ccc(-c2cccc(C(=O)O)c2)cc1	isophthalic-acid-deriv
c1ccc2c(c1)ccc1cc3ccccc3cc12	fluoranthene-partial
c1ccc2c(c1)cc1ccc3ccccc3c1c2	benz[a]anthracene
Oc1ccc2c(c1)ccc1cc3ccccc3cc12	3-hydroxychrysene
CC(C)c1cc(C)c(Oc2ccccc2)c(C)c1	BHT-ether
CCOc1ccc(cc1)NC(=O)c1ccc(O)cc1	4-hydroxy-4-ethoxyformanilide
CCCCCCCCCCCCCCCCCCCCCCCC	tetracosane
CCCCCCCCCCCCCCCCCCCCCCCCCCCCCC	triacontane
OC(=O)c1cc(F)ccc1O	5-fluorosalicylic-acid
COC(=O)c1ccc(N)cc1	methyl-4-aminobenzoate
OC(=O)c1ccc2c(c1)OCO2	piperonylic-acid
COc1ccc(C=O)cc1	4-methoxybenzaldehyde
OC(=O)c1cccc(O)c1O	2,3-dihydroxybenzoic-acid
CC(=O)Oc1ccc(cc1)C(C)C(=O)O	ibuprofen-acetate-frag
OC(=O)c1cc(Br)cc(Br)c1O	3,5-dibromosalicylic-acid
CC(O)c1ccccc1	1-phenylethan-1-ol
OC(c1ccccc1)c1ccccc1	benzhydrol-v2
c1ccc(cc1)NC(=O)CC(=O)O	hippuric-acid
c1ccc(cc1)NC(=O)c1ccc(O)cc1	4-hydroxybenzanilide-v2
OC(=O)c1cc(O)c(O)cc1O	4,5-dihydroxyphthalic-acid-partial
c1cc(=O)[nH]c(=O)c1C(=O)O	orotic-acid-v2
CC(=O)Nc1ccc(S(=O)(=O)O)cc1	N-acetylsulfanilic-acid
CC(=O)Nc1ccc(S(N)(=O)=O)cc1	sulfacetamide
OC(=O)c1ccnc(O)c1	3-hydroxypyridine-4-carboxylic-acid
CCN(CC)c1ccc(N)cc1	N,N-diethyl-p-phenylenediamine
c1cnc(c(n1)N)C(=O)O	aminopyrimidine-carboxylate
OCC(O)CO	glycerol
OC(=O)C(O)(CC(=O)O)CC(=O)O	citric-acid-v2
OC(=O)C(N)CSSCC(N)C(=O)O	cystine
OC(=O)C(N)CCO	homoserine
OC(=O)C(N)CC(=O)O	aspartate-v2
OC(=O)C(N)CCSC	methionine-v2
OC(=O)C(N)CC1=CNC=N1	histidine-v2
OC(=O)C(N)CCCCNC(=N)N	arginine-v2
OC(=O)C(N)CC(C)C	leucine-v2
OC(=O)C(N)C(CC)C	isoleucine-v2
OC(=O)C(N)C(C)O	threonine-v2
OCC(N)C(=O)O	serine-v2
c1ccc(cc1)C=CC(=O)O	cinnamic-acid
c1ccc(cc1)CC(=O)O	phenylacetic-acid
OC(=O)c1ccc(N(=O)=O)cc1	4-nitrobenzoic-acid-v2
c1cccc(c1)C(=O)Nc1ccccc1	benzanilide
CC(=O)OC1CCCC(=O)O1	mevalonic-lactone-acetate-v2
OC(=O)c1ccc(Cl)c(Cl)c1	3,4-dichlorobenzoic-acid
NC(=O)c1ccc(F)cc1	4-fluorobenzamide
OC(=O)c1ccc(OC)cc1	4-methoxybenzoic-acid
c1cnc2c(c1)ccc(c2)N	3-aminoquinoline
OC(=O)C=CC(=O)O	maleic-acid
OC(=O)C(F)(F)F	trifluoroacetic-acid
CC(F)(F)C(=O)O	difluoropropionic-acid
OC(=O)c1ccccc1Cl	2-chlorobenzoic-acid
CC(=O)NC(CC(=O)O)C(=O)O	N-acetyl-aspartate
NC(Cc1c[nH]cn1)C(=O)O	histidine-v3
# ============================================================
# SECTION 4: Complex drugs (31-50 atoms) — kinase inhibitors, antibiotics — Lines 351-550
# ============================================================
c1ccc(c(c1)NC(=O)c1cnc2cc(ccc2n1)NC(=O)c1ccccc1)C	imatinib-fragment
c1cc(ccc1NC(=O)Nc1ccc(cc1)Cl)NC(=O)c1cccc(c1)C(F)(F)F	sorafenib-partial
c1ccc(nc1)c1ccnc(n1)Nc1ccc(cc1)NC(=O)c1ccccc1	imatinib-core
Cc1ccc(cc1Nc2nccc(n2)c2cccnc2)NC(=O)c1ccc(cc1)CN1CCN(CC1)C	imatinib
COc1cc(ccc1OCCCN1CCC(CC1)c1noc2cc(F)ccc12)C(=O)N	iloperidone
c1cc(ccc1C(=O)O)NC(=O)Cc1ccc(c(c1)Cl)Nc1ncnc2cc(OC)c(OC)cc12	erlotinib-fragment
COc1cc2ncnc(Nc3ccc(F)c(Cl)c3)c2cc1OCCCN1CCOCC1	gefitinib
c1ccc(c(c1)C(=O)Nc1ccc(cc1)C(=O)N1CCN(CC1)C)NC1CCCCC1	BACE-inhibitor-partial
CC(=O)Nc1ccc(O)c(c1)C(=O)Nc1ccc(F)cc1	flutamide-deriv
CCOc1cc2ncc(C#N)c(Nc3ccc(OCc4ccccn4)c(Cl)c3)c2cc1NC(=O)C=C	neratinib-partial
Fc1ccc(cc1)C1CCNCC1COc1ccc2oc(=O)[nH]c2c1	paroxetine
CNCCC=c1ccc2c(c1)Cc1ccccc1S2	prothiadene-partial
CN1CCC(=C2c3ccccc3CCc3ccccc32)CC1	cyproheptadine
CN1c2ccccc2C(=NCC(=O)O)c2ccccc21	clobazam-glycine
c1ccc(cc1)C(c1ccc(cc1)Cl)n1ccnc1	clotrimazole
Clc1ccc(cc1)C(c1ccccc1Cl)n1ccnc1	miconazole-partial
CC(Cn1ccnc1)OC(c1ccc(cc1)Cl)c1ccc(cc1)Cl	econazole-partial
OC(Cn1ccnc1)(c1ccc(F)cc1)c1ccc(F)cc1	fluconazole
CC(=O)OCC(=O)C1(OC(C)=O)CCC2C3CCC4=CC(=O)CCC4(C)C3C(O)CC12C	cortisone-acetate
CC12CCC3C(C1CCC2(O)C(=O)CO)CCC4=CC(=O)C=CC34C	prednisone
CC12CCC3C(C1CC(O)C2(O)C(=O)CO)CCC4=CC(=O)C=CC34C	prednisolone
CC12CCC3C(C1CC(O)C2(O)C(=O)COC(=O)C)CCC4=CC(=O)C=CC34C	prednisolone-acetate
CC12C=CC3C(CCC4=CC(=O)CCC34C)C1CCC2(O)C#C	norethynodrel
CC12CCC(=O)C=C1CCC1C2CCC2(C1CCC2O)C	nandrolone
OC(=O)CNC(=O)c1cc(c(cc1I)O)I	diiodohydroxyphenyl-glycine
CC(C)(C)c1ccc(cc1)C(O)CCCN1CCC(CC1)C(O)(c1ccccc1)c1ccccc1	terfenadine
CC(CS)C(=O)N1CCCC1C(=O)O	captopril
CCOC(=O)C(CCc1ccccc1)NC(C)C(=O)N1CCCC1C(=O)O	enalapril
OC(=O)C1CC(=O)N(C1)C(C(=O)O)CCc1ccccc1	lisinopril-fragment
OC(=O)C1CCCN1C(=O)C(NC(=O)C(CC(=O)O)CC(=O)O)CCc1ccccc1	ramipril-fragment
c1ccc(c(c1)C(=O)O)Nc1cc(Cl)cc(Cl)c1	diclofenac
c1cc(c(cc1F)Cl)NC(=O)c1cnc2cc(F)ccc2n1	quinazoline-drug
OC(=O)C1CCC(=O)N1	5-oxoproline
CC1CC2C3CCC4=CC(=O)C=CC4(C)C3(F)C(O)CC2(C)C1(O)C(=O)COC(=O)C	dexamethasone-acetate
CC1CC2C3CCC4=CC(=O)C=CC4(C)C3(F)C(O)CC2(C)C1(O)C(=O)CO	dexamethasone
CC1CC2C3CCC4=CC(=O)C=CC4(C)C3C(O)CC2(C)C1(O)C(=O)CO	methylprednisolone
CC(=O)OCC(=O)C1(O)CCC2C3CCC4=CC(=O)CCC4(C)C3C(O)CC21C	hydrocortisone-acetate
ClC1=C(Cl)C(Cl)(Cl)C(Cl)=C1Cl	hexachlorocyclopentadiene
CC(=O)Nc1nnc(s1)S(=O)(=O)N	acetazolamide-v2
c1ccc(cc1)N=C(N)Nc1ccc(Cl)cc1	chloroguanide-partial
Nc1nc(N)c2nc(CNc3ccc(C(=O)NC(CCC(=O)O)C(=O)O)cc3)cnc2n1	methotrexate-core
OC(=O)CCC(=O)NC(CCC(=O)O)C(=O)O	glutamic-acid-succinate
c1cc(nc2ccccc12)CC2=CNCCN2	naphazoline-partial
Cn1c2ccccc2c2cc(ccc12)CC(=O)O	indomethacin-fragment
Cn1c2ccc(OC)cc2c2cc(CC(=O)O)ccc12	indomethacin-core
CC(=O)c1ccc2n1CCC1CCCNC1C2	vincamine-partial
CN(C)CCc1c[nH]c2ccc(CS(=O)(=O)N3CCNCC3)cc12	sumatriptan
COC(=O)C(C1CCCCN1)c1ccccc1	methylphenidate
c1ccc(cc1)C(CCN1CCC(CC1)(O)c1ccc(cc1)Cl)c1ccc(cc1)F	haloperidol
CC(=O)OC1CC(C)C=C2C=CC(C)C(CCC3CC(O)CC(=O)O3)C21	lovastatin-partial
CC(C)c1nc(CN(C)C(=O)NC(C(CC)C)C(=O)NC(Cc2ccccc2)C(=O)NC(C)C(=O)O)cs1	ritonavir-fragment
CC(C)c1nc(cs1)CN(C)C(=O)NC(C(CC)C)C(=O)NC(CC(O)C(Cc1ccccc1)NC(=O)OCc1cccc(c1)C)C(=O)NC(C)C(=O)NC(CC1CCCCC1)C#N	ritonavir-partial
c1ccc(cc1)CC(NC(=O)C(CC(=O)O)NC(=O)OCc1ccccc1)C(=O)NC(Cc1ccccc1)C(O)CN1CC2CCCCC2CC1	saquinavir-partial
CC(C)(C)NC(=O)C1CC2CCCCC2CN1CC(O)C(Cc1ccccc1)NC(=O)C(CC(N)=O)NC(=O)c1ccc2ccccc2n1	nelfinavir-partial
CC(=O)N1CCN(CC1)c1ccc(OCC2COC(Cn3ccnc3)(O2)c2ccc(Cl)cc2Cl)cc1	ketoconazole-partial
CC(=O)Nc1ccc(cc1O)S(=O)(=O)N	N-acetylsulfanilamide-hydroxy
ClCCN(CCCl)P1(=O)NCCCO1	cyclophosphamide
OC(=O)CCCC(=O)N1CC(=O)NC1=O	thalidomide-glutarate
CC1=CC(=O)N(C(=O)N1)C1CCC(CC1)N1CC(=O)NC1=O	thalidomide-analog
CN1CCC(C1)c1c(O)cc(=O)c2ccc(F)cc12	alvocidib-partial
OC(=O)c1cccc(c1)NC(=O)c1ccc(Cl)c(Cl)c1	tolfenamic-acid-deriv
Cc1cccc(c1)Nc1ccc(cc1C(F)(F)F)C(=O)O	flufenamic-acid-partial
c1ccc(cc1)CC(C(=O)NC(C)C(=O)O)NC(=O)C(CC(=O)O)NC(=O)OCc1ccccc1	enalaprilat-precursor
CC(=O)Nc1cc(ccc1O)C(O)CNC(C)(C)C	albuterol-v2
CC12CC3CC(C1)(CC(C3)(C2)N)N	memantine
c1nc(c2ccccc2n1)c1ccccn1	2-(pyridin-2-yl)quinoxaline
N=c1[nH]c(=O)n(c(=O)n1C1CCCC1)C1CCCC1	alloxazine-dicyclopentyl
OC(=O)c1ccc(NC(=O)CC2SCC3NC(=O)NC32)cc1	biotin-PABA
c1cn2c(nc1=O)cccc2	quinazolinone-partial
NC(=O)c1cccc(c1)NC(=O)Nc1cccc(c1)C(=O)N	urea-deriv
CC(C)Cc1ccc(cc1)C(C)C(=O)NC1CCCCC1	ibuprofen-cyclohexylamide
CC(C)OC(=O)C(C)Oc1ccc(Cl)cc1	fenofibrate-fragment
OC(=O)C(Oc1ccc(Cl)cc1)c1ccc(Cl)cc1	fenofibric-acid-deriv
OC(=O)c1ccc(SCc2ccc(Cl)cc2)cc1	thioether-drug
CC(=O)c1cc(C)nc(NC2CCCCC2)n1	abacavir-fragment
OC1COCOC1c1ccc(F)cc1	fluorinated-dioxolane
OC1CN2CCC1CC2c1ccnc2cc(F)ccc12	fluoroquinolone-core
O=c1[nH]c2ccccc2n1CC(=O)O	benzimidazolone-acetate
C(#Cc1ccccc1)c1ccccc1	diphenylacetylene
CC(=O)OC1CCC2C3CCC4CC(=O)CCC4(C)C3CCC12C	nandrolone-decanoate-core
CN(C)CCCN1c2ccccc2Sc2ccc(Cl)cc21	chlorpromazine
CN(C)CCCN1c2ccccc2Sc2ccc(cc21)C(F)(F)F	triflupromazine
c1ccc(c(c1)O)C(=O)Nc1ccc(cc1Cl)Cl	diclofenac-fragment
CC1Cc2ccccc2N1NC(=O)c1ccc(cc1)Cl	indapamide-fragment
NS(=O)(=O)c1cc2c(cc1Cl)NC(C(=O)N2)c1ccccc1	chlorthalidone-partial
c1ccc2c(c1)nc(=O)c1[nH]c3ccccc3n12	tryptanthrin-partial
CC(=O)Nc1ccc2c(c1)C(=O)c1ccccc1S2	phenothiazine-drug-partial
c1ccc2c(c1)nc(c(n2)Cl)c1ccncc1	quinoxaline-pyridine
OC(=O)C(O)C(O)=O	tartronic-acid
CC12CCC3C(CCC4CC(O)CCC34C)C1CCC2=O	epiandrosterone-partial
OC1C(O)C(CO)OC1n1cnc2c(N)ncnc12	adenosine-v2
Nc1ncnc2n(cnc12)C1OC(COP(O)(=O)OP(O)(=O)OP(O)(O)=O)C(O)C1O	ATP
Nc1ncnc2n(cnc12)C1OC(COP(O)(=O)OP(O)(O)=O)C(O)C1O	ADP
Nc1ncnc2n(cnc12)C1OC(COP(O)(O)=O)C(O)C1O	AMP
Nc1nc2n(cnc2c(=O)[nH]1)C1OC(COP(O)(=O)OP(O)(=O)OP(O)(O)=O)C(O)C1O	GTP
Nc1nc2n(cnc2c(=O)[nH]1)C1OC(COP(O)(=O)OP(O)(O)=O)C(O)C1O	GDP
OC1CC(OP(O)(O)=O)C(n2ccc(=O)[nH]c2=O)O1	UMP
OC1CC(OP(O)(O)=O)C(n2cc(C)c(=O)[nH]c2=O)O1	TMP
CC(=O)SCCNC(=O)CCNC(=O)C(O)C(C)(C)COP(=O)(O)OP(=O)(O)OCC1OC(n2cnc3c(N)ncnc32)C(O)C1OP(=O)(O)O	acetyl-CoA
OC(=O)CC(O)(CC(=O)SCCNC(=O)CCNC(=O)C(O)C(C)(C)COP(=O)(O)OP(=O)(O)OCC1OC(n2cnc3c(N)ncnc32)C(O)C1OP(=O)(O)O)C(=O)O	citrate-CoA-partial
CC(C)C(NC(=O)C(CC(=O)O)NC(=O)OCc1ccccc1)C(=O)NC(CCCCN)C(=O)OC	peptide-segment-1
NC(=O)c1ccc[n+](c1)C1OC(COP(=O)(O)OP(=O)(O)OCC2OC(n3cnc4c(N)ncnc43)C(O)C2O)C(O)C1O	NAD+
NC(=O)C1=CN(C=CC1)C1OC(COP(=O)(O)OP(=O)(O)OCC2OC(n3cnc4c(N)ncnc43)C(O)C2O)C(O)C1O	NADH
OC(=O)CC(CC(=O)O)C(=O)O	aconitic-acid-partial
OC(=O)C(O)CC(=O)O	malic-acid-v2
OC(=O)CCC(=O)O	succinic-acid
OC(=O)CC(=O)C(=O)O	oxaloacetic-acid
OC(=O)CC(=O)SCC(NC(=O)CCC(NC(=O)C(N)CCC(=O)O)C(=O)O)C(=O)NCC(=O)O	glutathione-S-conjugate
NC(CCC(=O)NC(CS)C(=O)NCC(=O)O)C(=O)O	glutathione
NC(CCC(=O)NC(CSSC(CC(=O)NCC(=O)O)NC(=O)CCC(N)C(=O)O)C(=O)NCC(=O)O)C(=O)O	oxidized-glutathione-partial
CC(C)CC(NC(=O)CNC(=O)C(NC(=O)C(N)CC(=O)O)CCCNC(=N)N)C(=O)NC(CC(=O)O)C(=O)O	peptide-DRGLD
CC(=O)NC1C(O)C(OC1(C)CO)OC1C(O)C(O)C(O)OC1CO	N-acetylglucosamine-galactose
Oc1ccc(cc1)C1CC(=O)c2c(O1)cc(OC1OC(CO)C(O)C(O)C1O)cc2O	naringin-aglycone
Nc1ccc(cc1)S(=O)(=O)Nc1cccc(C)n1	sulfamerazine
c1ccc2c(c1)nc(n2C)CC(=O)O	indomethacin-benzimidazole
OC(=O)Cn1c(=O)c2ccccc2n(C)c1=O	benzoyleneurea-acetate
CC(C)NCC(O)c1ccc(OCC(=O)N)cc1	atenolol-v2
OC(=O)c1ccc(NC(=O)c2cccc(Cl)c2)cc1	chlorobenzamido-benzoate
CC1CC2C3CC(F)C4=CC(=O)C=CC4(C)C3(F)C(O)CC2(C)C1(O)C(=O)CO	flumethasone
CC1CC2C3CC(F)C4=CC(=O)C=CC4(C)C3C(O)CC2(C)C1(O)C(=O)CO	betamethasone
CC1CC2C3CCC4=CC(=O)C=CC4(C)C3(F)C(O)CC2(C)C1(O)C(=O)CO	triamcinolone-partial
CC12CC(O)C3C(CCC4=CC(=O)CCC34C)C1CCC2(O)C(=O)CO	11-deoxycortisol
CC(C)C1=C(C(=C(N1CCC(CC(CC(=O)O)O)O)C2=CC=C(C=C2)F)C3=CC=CC=C3)C(=O)NC4=CC=CC=C4	atorvastatin
CC(C)C1=NC(=NC(=C1C=CC(CC(CC(=O)O)O)O)C2=CC=C(C=C2)F)N(C)S(=O)(=O)C	rosuvastatin
CC1(C)SC(NC1C(=O)O)C(=O)NC(C(=O)O)c1ccc(O)cc1	amoxicillin-core
CC1(C)SC(NC1C(=O)O)C(=O)NC(C(=O)O)c1ccccc1	ampicillin-core
OC(=O)C1N2C(=O)C(NC(=O)C(N)c3ccccc3)C2SC1(C)C	ampicillin
OC(=O)C1N2C(=O)C(NC(=O)C(N)c3ccc(O)cc3)C2SC1(C)C	amoxicillin
CC(=O)OCC1=C(N2C(SC1)C(NC(=O)Cc1ccsc1)C2=O)C(=O)O	cefalotin-partial
CC(=O)OCC1=C(N2C(SC1)C(NC(=O)Cc1cccs1)C2=O)C(=O)O	cephalothin
COC(=O)NC1(SSC2(NC(=O)OC)C(=O)N2CC=C)C(=O)N1CC=C	epidithiodioxopiperazine-partial
c1nc(c2cc(F)ccc2n1)c1ccncc1	fluoroquinoxaline-pyridine
CC1=NN(C(=O)C1)c1ccccc1	3-methyl-1-phenylpyrazolone
c1ccc(-c2nnc(SCC(=O)O)n2C)cc1	phenyltetrazole-thio-acetate
c1ccc(cc1)S(=O)(=O)NC(=O)Nc1ccc(Cl)cc1	chlorpropamide-partial
CC(=O)Nc1ccc(cc1)S(=O)(=O)NC(=O)NC1CCCCC1	tolbutamide-analog
CC(=O)NC1=CC(=O)N(C1=O)C1OC(CO)C(O)C1O	5-fluorouracil-riboside-like
OC1CC(OP(O)(O)=O)C(O1)(CO)O	fructose-6-phosphate
OC1C(OP(O)(O)=O)C(O)C(O)C(O1)COP(O)(O)=O	glucose-1,6-bisphosphate
c1ccc(cc1)C(=O)NC(CCCNC(=N)N)C(=O)NC(CC(C)C)C(=O)O	Bz-Arg-Leu-OH
CC(=O)NC(Cc1ccc(O)cc1)C(=O)NC(CC(C)C)C(=O)O	Ac-Tyr-Leu-OH
CC(=O)NC(CCCCN)C(=O)NC(CC(=O)O)C(=O)NC(Cc1ccccc1)C(=O)O	Ac-Lys-Asp-Phe-OH
NC(CC(=O)O)C(=O)NC(CS)C(=O)NC(Cc1cnc[nH]1)C(=O)NC(CC(C)C)C(=O)O	Asp-Cys-His-Leu
OC(=O)CNC(=O)C(CC(=O)O)NC(=O)C(N)Cc1ccc(O)cc1	Tyr-Asp-Gly
NC(Cc1ccccc1)C(=O)NC(CC(=O)N)C(=O)NC(CCCNC(=N)N)C(=O)NC(CO)C(=O)O	Phe-Asn-Arg-Ser
OC(=O)C(CC(C)C)NC(=O)C(CC(=O)O)NC(=O)C(N)Cc1c[nH]c2ccccc12	Trp-Asp-Leu
CC(=O)NC(CCC(=O)O)C(=O)NC(Cc1ccccc1)C(=O)NC(CCCCN)C(=O)O	Ac-Glu-Phe-Lys
OC(=O)C(CCCNC(=N)N)NC(=O)C(CC(C)C)NC(=O)C(N)Cc1ccc(O)cc1	Tyr-Leu-Arg
CC(C)C(NC(=O)C(N)CS)C(=O)NC(CCC(=O)N)C(=O)NC(CC(=O)O)C(=O)NC(CCCCN)C(=O)O	Cys-Val-Gln-Asp-Lys
OC(=O)C(CC(C)C)NC(=O)C(Cc1cnc[nH]1)NC(=O)C(N)CCC(=O)O	Glu-His-Leu
NC(CC(=O)O)C(=O)NC(Cc1ccc(O)cc1)C(=O)NC(CC(C)C)C(=O)NC(Cc1ccccc1)C(=O)NC(CO)C(=O)O	Asp-Tyr-Leu-Phe-Ser
c1ccnc(c1)C(=O)NCCCCCCNC(=O)c1ccncc1	bis-isonicotinoyl-hexanediamine
CC12CC3CC(CC(C3)C1)C2	adamantane-v2
CN(C)c1ccc(cc1)C(=O)Nc1ccc(N(C)C)cc1	DMAP-benzamide
Oc1ccc(cc1O)C(=O)Nc1ccc(O)c(O)c1	catechol-aminobenzamide
OC(=O)CCCCCCCCCC=CCCCCCCCC	gondoic-acid
c1ccc(cc1)Oc1ccc(cc1)C(=O)c1ccc(Oc2ccccc2)cc1	PEEK-monomer
OC(=O)c1cc(OC)c(OC)c(OC)c1	3,4,5-trimethoxybenzoic-acid
COc1ccc(C2CC(=O)c3c(O2)cc(O)cc3O)cc1OC	hesperetin
Oc1cc(O)c2c(c1)oc(-c1ccc(O)cc1)c(O)c2=O	kaempferol
Oc1cc(O)c2c(c1)oc(-c1ccc(O)c(O)c1)c(O)c2=O	quercetin-v2
COc1cc(-c2oc3cc(O)cc(O)c3c(=O)c2O)ccc1O	isorhamnetin
COc1cc(-c2oc3cc(O)cc(O)c3c(=O)c2OC)ccc1O	3,4-dimethylquercetin-partial
CC(=O)OC1CC(OC(C)=O)C(OC(C)=O)C(OC(C)=O)C1OC(C)=O	glucose-pentaacetate
OC1C(O)C(OC(OP(O)(O)=O)C1O)CO	mannose-1-phosphate
OC1C(O)C(O)C(O)C(COP(O)(O)=O)O1	galactose-6-phosphate
c1nc(c(=O)[nH]c1N)F	5-fluorocytosine
c1nc(c(c(n1)Cl)Cl)Cl	2,4,6-trichloropyrimidine
Nc1ncnc(Cl)n1	2-amino-4-chloro-s-triazine
c1ccc(cc1)COC(=O)c1ccc(N)cc1	benzyl-4-aminobenzoate
CCCCn1cc(C(=O)O)c(=O)c2cc(F)c(N3CCN(C)CC3)cc12	pefloxacin-partial
OC(=O)c1cn(C2CC2)c2cc(N3CCNCC3)c(F)cc2c1=O	ciprofloxacin
CC1COc2c(N3CCN(C)CC3)c(F)cc4c(=O)c(C(=O)O)cn1c24	levofloxacin-partial
OC(=O)c1cn(CC)c2cc(N3CCNCC3)c(F)cc2c1=O	norfloxacin
CCN1C=C(C(=O)C2=CC(=C(C=C21)N3CCNCC3)F)C(=O)O	enoxacin-partial
C1CN(CCN1)c1cc2c(cc1F)c(=O)c(cn2C1CC1)C(=O)O	ciprofloxacin-v2
CC(C)(C)NCC(O)COc1cccc2CC(O)C(=O)Nc12	carvedilol-partial
CC(C)(C)NCC(O)COc1cccc2[nH]c3ccccc3c12	carvedilol-core
OC(=O)c1cc(F)c(NC2=NCCN2)c(F)c1	diflunisal-urea-partial
Cc1onc(c1C(=O)Nc1ccc(C(F)(F)F)cc1)c1ccc(C)cc1	leflunomide-partial
CC(=O)Oc1ccccc1C(=O)Oc1ccccc1	aspirin-phenyl-ester
COc1ccc(CC2CNC(=O)C(=CC3=CC(=C(C=C3)OC)OC)N2)cc1OC	papaverine-reduced-partial
# ============================================================
# SECTION 5: Large molecules (51-100 atoms) — macrolides, peptides — Lines 551-700
# ============================================================
CCC1OC(=O)C(C)C(OC2CC(C)(OC)C(O)C(C)O2)C(C)C(OC2OC(C)CC(C2O)N(C)C)C(C)(O)CC(C)C(=O)C(C)C(O)C1(C)O	erythromycin-A
CCC1C(C(C(N(CC(CC(C(C(C(C(C(=O)O1)C)OC2CC(C(C(O2)C)O)(C)OC)C)OC3C(C(CC(O3)C)N(C)C)O)(C)O)C)C)C)O)(C)O	azithromycin
CC1CC=CC=CC(OC(=O)CC(OC(=O)C(C(CC(CC(C(=CC(C(C(CC=CC(C(C1O)C)O)O)OC1OC(C(C(C1O)N(C)C)O)C)CC=O)C)O)O)OC1OC(C)C(O)C(C1)(O)OC)C)C(=O)O)C	amphotericin-B-partial
CC(O)CC(O)CC=CC=CC=CC=CC(=O)CC(=O)CC1OC1CC2OC2CC(O)CC3OC3(C)C(O)C(C)O	nystatin-fragment
CC1=C2C(C(=O)C3(C(CC4C(C3C(C(C2(C)C)(CC1OC(=O)C(C(C5=CC=CC=C5)NC(=O)C6=CC=CC=C6)O)O)OC(=O)C7=CC=CC=C7)(CO4)OC(=O)C)O)C)OC(=O)C	paclitaxel
CC1=C2C(C(=O)C3(C(CC4C(C3C(C(C2(C)C)(CC1OC(=O)C(C(C5=CC=CC=C5)NC(=O)OC(C)(C)C)O)O)OC(=O)C6=CC=CC=C6)(CO4)OC(=O)C)O)C)O	docetaxel
CC(C)CC(NC(=O)C(CC(C)C)NC(=O)C(CC1=CC=CC=C1)NC(=O)C(CCC(=O)N)NC(=O)C(CC(=O)O)NC(=O)OC(C)(C)C)C(=O)NC(CCCCN)C(=O)O	hexapeptide-1
CC(C)CC(NC(=O)C(CC(C)C)NC(=O)C(Cc1ccccc1)NC(=O)CNC(=O)C(N)CCCCN)C(=O)NC(CC(=O)O)C(=O)NC(CCC(=O)O)C(=O)O	hexapeptide-2
CC(C)C(NC(=O)C(CC(C)C)NC(=O)C(CC(=O)O)NC(=O)CNC(=O)C(N)CS)C(=O)NC(Cc1ccccc1)C(=O)NC(CCCCN)C(=O)O	hexapeptide-3
CC(C)CC(NC(=O)C(N)CC(=O)O)C(=O)NC(Cc1c[nH]c2ccccc12)C(=O)NC(CC(C)C)C(=O)NC(Cc1cnc[nH]1)C(=O)NC(CC(C)C)C(=O)O	hexapeptide-4
CC(=O)NC(CS)C(=O)NC(CC(=O)O)C(=O)NC(Cc1ccccc1)C(=O)NC(CC(=O)N)C(=O)NC(CCCNC(=N)N)C(=O)NC(Cc1ccc(O)cc1)C(=O)O	heptapeptide-1
CC(C)CC(NC(=O)C(CC(=O)O)NC(=O)C(N)Cc1ccccc1)C(=O)NC(CCC(=O)N)C(=O)NC(CO)C(=O)NC(CCCCN)C(=O)NC(CC(C)C)C(=O)NC(C)C(=O)O	octapeptide-1
CC(C)CC1NC(=O)C(CC(C)C)NC(=O)C(Cc2ccc(O)cc2)NC(=O)C(CCC(=O)N)NC(=O)C(CC(=O)O)NC(=O)C(CCCNC(=N)N)NC(=O)C(Cc2ccccc2)NC(=O)C(CC(C)C)NC1=O	cyclic-octapeptide
CC(C)C(NC(=O)C(CC(C)C)NC(=O)C(Cc1c[nH]c2ccccc12)NC(=O)C(Cc1ccc(O)cc1)NC(=O)CNC(=O)C(N)CS)C(=O)NC(Cc1cnc[nH]1)C(=O)NC(CC(=O)O)C(=O)NC(CCCCN)C(=O)NC(CC(=O)N)C(=O)O	nonapeptide-1
OC(=O)CNC(=O)C(CC(=O)O)NC(=O)C(CS)NC(=O)C(CC(C)C)NC(=O)C(N)Cc1c[nH]c2ccccc12	pentapeptide-WLCDA
OC(=O)C(CCCNC(=N)N)NC(=O)C(Cc1ccc(O)cc1)NC(=O)C(CC(C)C)NC(=O)C(N)Cc1ccccc1	tetrapeptide-FLYR
OC(=O)C(Cc1ccc(O)cc1)NC(=O)C(CC(=O)O)NC(=O)C(N)Cc1cnc[nH]1	tripeptide-HDY
OC(=O)C(CC(C)C)NC(=O)C(Cc1ccccc1)NC(=O)C(N)CCC(=O)O	tripeptide-EFL
CC(C)CC(NC(=O)C(Cc1ccccc1)NC(=O)C(CC(C)C)NC(=O)C(CCCNC(=N)N)NC(=O)C(Cc1ccc(O)cc1)NC(=O)C(N)CCCCN)C(=O)O	pentapeptide-KYRLF
OCC1OC(OC2C(OC(OC3C(O)C(O)C(O)OC3CO)C(O)C2O)CO)C(O)C(O)C1O	trisaccharide-1
OCC1OC(OC2C(OC(OC3C(O)C(O)OC(CO)C3O)C(O)C2O)CO)C(O)C(O)C1O	trisaccharide-2
CC(=O)NC1C(O)C(OC1CO)OC1C(O)C(OC(O)C1NC(=O)C)CO	chitobiose
CC1OC(OC2C(O)C(O)C(O)OC2CO)C(O)C(O)C1NC(=O)C	N-acetylglucosamine-rhamnoside
OCC(O)C(O)C(O)C(O)COP(=O)(O)O	glucose-6-phosphate-openchain
OC1C(OP(=O)(O)O)C(O)C(O)C(O1)CO	glucose-1-phosphate
OC1C(O)C(OP(=O)(O)OP(=O)(O)OCC2OC(n3ccc(=O)[nH]c3=O)C(O)C2O)C(O)C(O1)CO	UDP-glucose
OC1C(O)C(OP(=O)(O)OP(=O)(O)OCC2OC(n3ccc(N)nc3=O)C(O)C2O)C(O)C(O1)CO	CDP-glucose-partial
CC(C)C(NC(=O)C(NC(=O)c1ccncc1)CC(=O)O)C(=O)NC(Cc1ccc(O)cc1)C(=O)NC(C(=O)O)CC(=O)N	pentapeptide-nDVYN
Oc1ccc(C=NNC(=O)c2ccncc2)cc1	isonicotinoyl-hydrazone
Oc1ccc(cc1)C(=O)NC(CC(=O)O)C(=O)NC(Cc1ccc(O)cc1)C(=O)NC(CCCCN)C(=O)O	tetrapeptide-pHBz
CC1OC(=O)C(C)C(OC2CC(C)(OC)C(O)C(C)O2)C(C)C(OC2OC(C)CC(C2O)N(C)C)C(C)(O)CC(C)CN(C)C(C)C(O)C1(C)O	erythromycin-B-partial
OC(=O)c1cc(O)c(O)c(O)c1OC1OC(C(=O)O)C(O)C(O)C1O	galloyl-glucuronate
OCC1OC(OCC2OC(OCC3OC(O)C(O)C(O)C3O)C(O)C(O)C2O)C(O)C(O)C1O	maltotriose-partial
CC12CCC3C(C1CCC2O)CCC4=CC(=O)CCC34	androstenone-partial
COc1ccc(cc1OC)C1CC(=O)c2c(O)cc(OC3OC(CO)C(O)C(O)C3O)cc2O1	hesperidin-aglycone-glc
CC(=O)OC1CC(C)C=C2C=CC(C)C(CCC3CC(OC4CC(O)CC(O)C4)CC(=O)O3)C21	lovastatin
CC(C)C(NC(=O)C(Cc1ccccc1)NC(=O)C(NC(=O)OC(C)(C)C)CC(=O)O)C(=O)NC(CO)C(=O)NC(CC(C)C)C(=O)NC(CCCCN)C(=O)NC(CC(=O)N)C(=O)NC(C)C(=O)O	octapeptide-Boc
CC(C)(C)OC(=O)NC(Cc1ccccc1)C(O)CC(NC(=O)C(CC(=O)O)NC(=O)OC(C)(C)C)Cc1ccccc1	HIV-protease-inhib-core
CC(C)(C)NC(=O)C1CC2CCCCC2CN1CC(O)C(Cc1ccccc1)NC(=O)C(CC(N)=O)NC(=O)c1ccc2ccccc2n1	saquinavir
CC(C)(C)NC(=O)C1CC2CCCCC2CN1CC(O)C(Cc1ccccc1)NC(=O)OCC1=CN=C2C(N)=NC=NC12	indinavir-partial
CC(C)CC(NC(=O)C(Cc1ccccc1)NC(=O)OCC1=CC=CC=C1)C(=O)NC(CC1CCCCC1)CN	nelfinavir-core
OC(=O)CCCCCCC=CCCCCCCCC=CCCCCC	docosahexaenoic-acid-partial
CCCCCCCCCCCCCCCCCCCC(=O)OCC(COC(=O)CCCCCCCCCCCCCCCCC)OC(=O)CCCCCCCCCCCCCCCC	tripalmitin-partial
OP(=O)(O)OCC(COC(=O)CCCCCCCCCCCCCCC)OC(=O)CCCCCCCCCCCCCCC	phosphatidic-acid-fragment
CCCCCCCC=CCCCCCCCC(=O)OCC(COP(=O)(O)OCC(N)C(=O)O)OC(=O)CCCCCCCC=CCCCCCCC	phosphatidylserine-partial
Oc1ccc(cc1)C1Oc2cc(O)cc(O)c2CC1OC1OC(CO)C(O)C(O)C1O	catechin-glucoside
Oc1cc(O)c2c(c1)OC(c1ccc(O)c(O)c1)C(O)C2c1c(O)cc(O)c2c1OC(c1ccc(O)c(O)c1)C(O)C2	procyanidin-B-partial
CC1(C)CCC2(CCC3C(=CCC4C5(C)CCC(O)C(C)(C)C5CCC34C)C2C1)C(=O)O	oleanolic-acid
CC1(C)CCC2(CCC3(C)C(=CCC4C5(C)CC(O)C(O)C(C)(C)C5CCC43)C2C1)C(=O)O	asiatic-acid-partial
CC1(C)CCC2(CCC3(C)C(=CCC4C3CCC3C(C)(C)C(O)CCC34C)C2C1)C(=O)O	ursolic-acid
CC1CCC2(CCC3(CCC4(C)C5CCC6(C)CCC(OC7OC(CO)C(O)C(O)C7O)C(C)(C)C6CC5=CCC4C3C2C1C)C(=O)O)C	glycyrrhetinic-acid-3-glucoside-partial
OC1(C(=O)O)CC(OC(=O)C(O)c2ccc(O)c(O)c2)C(O)(CC1OC(=O)C(O)c1ccc(O)c(O)c1)C(=O)O	chlorogenic-acid-trimer-partial
OC(=O)c1cc(O)c(O)c(O)c1OC1OC(COC(=O)c2cc(O)c(O)c(O)c2)C(O)C(OC(=O)c2cc(O)c(O)c(O)c2)C1OC(=O)c1cc(O)c(O)c(O)c1	pentagalloylglucose-partial
CC1OC(CC(O)C1O)OC(C(=O)O)C(O)c1ccc(O)c(O)c1	protocatechuic-acid-rhamnoside
CC(C)C(NC(=O)C(Cc1ccc(O)cc1)NC(=O)C(CC(=O)O)NC(=O)C(Cc1c[nH]c2ccccc12)NC(=O)C(N)CCCCN)C(=O)NC(Cc1cnc[nH]1)C(=O)NC(CCCNC(=N)N)C(=O)NC(CC(C)C)C(=O)NC(CCC(=O)N)C(=O)NC(CO)C(=O)O	decapeptide-1
CC(C)C(NC(=O)C(Cc1ccccc1)NC(=O)C(CC(C)C)NC(=O)C(CC(=O)N)NC(=O)C(N)CS)C(=O)NC(Cc1ccc(O)cc1)C(=O)NC(CCCCN)C(=O)NC(CC(=O)O)C(=O)NC(CCC(=O)O)C(=O)NC(CCCNC(=N)N)C(=O)O	decapeptide-2
CC(=O)SCCNC(=O)CCNC(=O)C(O)C(C)(C)COP(=O)(O)OP(=O)(O)OCC1OC(n2cnc3c(N)ncnc32)C(O)C1OP(=O)(O)O	acetyl-CoA-v2
OC(=O)CCCC(=O)SCCNC(=O)CCNC(=O)C(O)C(C)(C)COP(=O)(O)OP(=O)(O)OCC1OC(n2cnc3c(N)ncnc32)C(O)C1OP(=O)(O)O	succinyl-CoA
OC(=O)C(CC(=O)SCCNC(=O)CCNC(=O)C(O)C(C)(C)COP(=O)(O)OP(=O)(O)OCC1OC(n2cnc3c(N)ncnc32)C(O)C1OP(=O)(O)O)O	3-hydroxy-3-methylglutaryl-CoA-partial
Oc1cc(O)c2c(c1)OC(c1ccc(O)c(O)c1)C(O)C2	catechin
Oc1cc(O)c2c(c1)OC(c1ccc(O)c(O)c1)CC2=O	taxifolin
Oc1ccc(C2Oc3cc(O)cc(O)c3CC2O)cc1O	epicatechin
CC12CCC(=O)C(C)(CCC(O)(CO)C3CCC4(C)C(CCC5(C)C4CC=C4C6CC(C)(C)CCC6(CCC45C)C(=O)O)C3(C)C)C1CCC2O	ginsenoside-partial
CC1OC(O)CC(O)C1OC1CC(O)(CC(=O)O1)C(=O)CC1OC(CC(O)C1O)OC1CC(O)(CC(=O)O1)C(=O)CC1OC(CC(O)C1O)O	erythronolide-chain-partial
CC(C)Cc1ccc(cc1)C(C)C(=O)OCC(=O)NC(CCCCNC(=O)C(Cc1ccccc1)NC(=O)OC(C)(C)C)C(=O)NCC(=O)O	ibuprofen-tripeptide-conjugate
CC(C)C(NC(=O)C(Cc1ccccc1)NC(=O)C(N)CCCNC(=N)N)C(=O)NC(CO)C(=O)NC(CC(C)C)C(=O)NC(Cc1c[nH]c2ccccc12)C(=O)NC(CC(=O)O)C(=O)NC(CCC(=O)N)C(=O)NC(CC(=O)N)C(=O)NC(CCCCN)C(=O)O	decapeptide-3
Cc1cc(C(=O)SCCNC(=O)CCNC(=O)C(O)C(C)(C)COP(=O)(O)OP(=O)(O)OCC2OC(n3cnc4c(N)ncnc43)C(O)C2OP(=O)(O)O)oc1C	methylmalonyl-CoA-partial
OC1C(O)C(OC(CO)C1O)OP(=O)(O)OP(=O)(O)OCC1OC(n2ccc(N)nc2=O)C(O)C1O	CDP-glucose
CC(C)(COP(=O)(O)OP(=O)(O)OCC1OC(n2cnc3c(N)ncnc32)C(O)C1OP(=O)(O)O)C(O)C(=O)NCCC(=O)NCCS	CoA
Cc1cc2nc3c(=O)[nH]c(=O)nc3n(CC(O)C(O)C(O)COP(=O)(O)OP(=O)(O)OCC3OC(n4cnc5c(N)ncnc54)C(O)C3OP(=O)(O)O)c2cc1C	FAD
OC(C(O)C(O)COP(=O)(O)OP(=O)(O)OCC1OC(n2cnc3c(N)ncnc32)C(O)C1OP(=O)(O)O)c1cc2c(cc1C)n(C)c(=O)[nH]c2=O	FAD-v2
Oc1cc(O)c2c(c1)OC(c1ccc(O)c(O)c1)C(OC1OC(CO)C(O)C(O)C1O)C2=O	quercetin-3-glucoside
COc1cc(ccc1O)C1Oc2cc(OC3OC(CO)C(O)C(O)C3O)cc(O)c2C(=O)C1O	hesperidin-partial
CC(=O)OCC(=O)C1(OC(C)=O)CCC2C3CC=C4CC(OC5CC(N(C)C)C(O)C(C)O5)CCC4(C)C3CCC12C	digitoxin-partial
OC1CC(O)C(OC2CC(O)C(OC3CC(O)C(O)C(C)O3)C(C)O2)C(C)O1	tridigitoxose
CC(C)CC(NC(=O)C(N)CC(=O)O)C(=O)NC(Cc1ccccc1)C(=O)NC(CCCNC(=N)N)C(=O)NC(CC(=O)N)C(=O)NC(CO)C(=O)NC(CC(C)C)C(=O)NC(Cc1ccc(O)cc1)C(=O)O	octapeptide-DLFRNSLY
CC(C)C(NC(=O)C(CS)NC(=O)C(N)Cc1ccccc1)C(=O)NC(Cc1cnc[nH]1)C(=O)NC(CC(=O)O)C(=O)NC(CCC(=O)N)C(=O)NC(CCCCN)C(=O)NC(CC(C)C)C(=O)O	octapeptide-FCVHDQKL
OC(=O)C(CC(C)C)NC(=O)C(Cc1c[nH]c2ccccc12)NC(=O)C(CC(=O)O)NC(=O)C(N)Cc1ccc(O)cc1	Tyr-Asp-Trp-Leu
NC(Cc1ccccc1)C(=O)NC(CCCNC(=N)N)C(=O)NC(CC(C)C)C(=O)NC(Cc1ccc(O)cc1)C(=O)NC(CC(=O)O)C(=O)NC(CCCCN)C(=O)O	Phe-Arg-Leu-Tyr-Asp-Lys
OC(=O)C(CCCCN)NC(=O)C(CO)NC(=O)C(CCC(=O)N)NC(=O)C(Cc1cnc[nH]1)NC(=O)C(N)CC(C)C	Leu-His-Gln-Ser-Lys
CC(C)CC(NC(=O)C(CC(C)C)NC(=O)C(Cc1ccccc1)NC(=O)C(N)CCC(=O)O)C(=O)NC(CC(=O)O)C(=O)NC(CO)C(=O)NC(Cc1ccc(O)cc1)C(=O)O	Glu-Phe-Leu-Leu-Asp-Ser-Tyr
OC(=O)C(Cc1ccc(O)cc1)NC(=O)C(CCCNC(=N)N)NC(=O)C(CC(=O)N)NC(=O)C(N)CCCCN	Lys-Asn-Arg-Tyr
CC(=O)NC(Cc1c[nH]c2ccccc12)C(=O)NC(CCC(=O)O)C(=O)NC(CC(C)C)C(=O)NC(CS)C(=O)NC(Cc1ccccc1)C(=O)NC(CC(=O)O)C(=O)O	Ac-Trp-Glu-Leu-Cys-Phe-Asp
OC(=O)C(CCCCN)NC(=O)C(CCC(=O)N)NC(=O)C(CC(=O)N)NC(=O)C(N)C(CC)C	Ile-Asn-Gln-Lys
CC(C)C(NC(=O)C(N)Cc1ccccc1)C(=O)NC(CCCCN)C(=O)NC(CC(=O)O)C(=O)NC(Cc1ccc(O)cc1)C(=O)NC(CCC(=O)O)C(=O)NC(CC(C)C)C(=O)NC(CCCNC(=N)N)C(=O)NC(CO)C(=O)O	Phe-Val-Lys-Asp-Tyr-Glu-Leu-Arg-Ser
CC12CCC3(C)C(CCC4C5(C)CCC(O)C(C)(C)C5CCC43)C1CCC2(O)C=O	betulinaldehyde-partial
CC1(C)CCC2(CCC3(C)C(CCC4C5CCC6(CCC(OC7OC(CO)C(O)C(O)C7O)C(C)(C)C6CC5=CCC43)C(=O)O)C2C1)C	soyasaponin-aglycone-glc
COC(=O)C1=COC(OC2OC(CO)C(O)C(O)C2O)C2C1CC(O)=C2C(=O)OC	geniposide-partial
CC(=O)OC1CCC2(C)C3CCC4(C)C(CC=C4C4=CC(=O)OC4)C3CC(OC3OC(CO)C(O)C(O)C3O)C2C1C	digitoxigenin-glucoside-partial
OC1C(O)C(OC(CO)C1O)OC1CC(O)C(OC1CO)OC1CC(O)C(O)C(O1)CO	cellotriose-partial
Oc1cc(O)c2c(c1)oc(-c1ccc(O)c(O)c1)c(OC1OC(COC2OC(C)C(O)C(O)C2O)C(O)C(O)C1O)c2=O	rutin-partial
COc1ccc(C2CC(=O)c3c(O2)cc(OC2OC(COC3OC(C)C(O)C(O)C3O)C(O)C(O)C2O)cc3O)cc1O	hesperidin-full-partial
CC(=O)OC1CC(OC(C)=O)C(OC(C)=O)C(OC(C)=O)C1OC(C)=O	galactose-pentaacetate
CC(=O)OC1C(OC(C)=O)C(OC(C)=O)C(COC(C)=O)OC1OC(C)=O	mannose-pentaacetate
CC(=O)OCC1OC(OC2C(OC(C)=O)C(OC(C)=O)C(OC(C)=O)OC2COC(C)=O)C(OC(C)=O)C(OC(C)=O)C1OC(C)=O	lactose-octaacetate
OCC1OC(OC2C(O)C(OC(O)C2O)CO)C(O)C(O)C1O	cellobiose
OCC1OC(OC2C(O)C(OC(OC3C(O)C(O)C(O)OC3CO)C2O)CO)C(O)C(O)C1O	cellotriose-v2
CC(C)(COP(=O)(O)OP(=O)(O)OCC1OC(n2cnc3c(N)ncnc32)C(O)C1OP(=O)(O)O)C(O)C(=O)NCCC(=O)NCCSC(=O)C	propionyl-CoA
CC(C)(COP(=O)(O)OP(=O)(O)OCC1OC(n2cnc3c(N)ncnc32)C(O)C1OP(=O)(O)O)C(O)C(=O)NCCC(=O)NCCSC(=O)CCC(=O)O	succinyl-CoA-v2
CC(C)(COP(=O)(O)OP(=O)(O)OCC1OC(n2cnc3c(N)ncnc32)C(O)C1OP(=O)(O)O)C(O)C(=O)NCCC(=O)NCCSC(=O)CC(O)C(=O)O	malyl-CoA-partial
CC(C)(COP(=O)(O)OP(=O)(O)OCC1OC(n2cnc3c(N)ncnc32)C(O)C1OP(=O)(O)O)C(O)C(=O)NCCC(=O)NCCSC(=O)CCCCCCC	octanoyl-CoA-partial
CC(C)(COP(=O)(O)OP(=O)(O)OCC1OC(n2cnc3c(N)ncnc32)C(O)C1OP(=O)(O)O)C(O)C(=O)NCCC(=O)NCCSC(=O)CCCCCCCCCCCCCCC	palmitoyl-CoA
# ============================================================
# SECTION 6: Very large molecules (100+ atoms) — Lines 701-800
# ============================================================
CN(C)C(CC(C)C)C(=O)NC(C(O)C(=O)NC(CC(C)C)C(=O)N(C)CC(=O)N(C)C(CC(C)C)C(=O)NC(C)C(=O)NC(CC)C(=O)N(C)C(CC(C)C)C(=O)NC(CC(C)C)C(=O)NC(CC(C)C)C(=O)N(C)C(CC(C)C)C(=O)NC(C(C)CC)C(=O)N1CCCC1C(=O)O)CC1=CC=CC=C1	linear-cyclosporin-partial
CCC(C)C(NC(=O)C(C(C)CC)N(C)C(=O)C(CC(C)C)N(C)C(=O)CN(C)C(=O)C(CC(C)C)NC(=O)C(C)NC(=O)C(CC(C)C)NC(=O)C(CC(C)C)NC(=O)C(CC(C)C)N(C)C(=O)C(CC(C)C)NC(=O)C(C(C)CC)N(C)C(=O)C1CCCN1C(=O)C(CC(C)C)N(C)C)C(=O)O	cyclosporin-linear-v2
CC1CC(O)C(OC2CC(C)(OC)C(O)C(C)O2)C(C)(OC2OC(C)CC(N(C)C)C2O)OC(=O)C(C)C(OC2CC(C)(OC)C(O)C(C)O2)C(C)C(O)(CC(C)C(=O)C(C)C(O)C1(C)O)CC	tylosin-partial
CC1CC(OC2CCCCO2)OC(=O)CC(O)CC2CC(=O)C(CC(OC(=O)CC(OC(=O)CC(OC1=O)CC1OC(CC(O)C1O)OC1CC(OC(=O)CC(O)CC3CC(=O)C(CC(O)CC(O)CC4OC(CC(O)C4O)OC4CC(O)(CC(=O)O4)C(=O)C4CCC(CC4O)O)C3O)C(O)C1O)C2O)C(O)CC1OC(CC(O)C1O)O)O	amphotericin-B-extended-partial
CC(CC=CC(C)C1OC1CC(O)C(C)CC(=O)CC(O)CC(O)CC(O)CC(O)CC(O)CC(O)CC(O)CC(O)CC(=O)OC(C)CC(=O)CC(=O)CC(=O)CC(=O)CC(=O)CC(=O)CC(=O)CC(=O)CC(C)O)O	polyene-chain
CCCCCCCCCCCCOC(=O)CCCCCCCCCCCCCCCCCCC(=O)OCCCCCCCCCCCC	dilauryl-sebacate
CCCCCCCCCCCCCCCCCC(=O)OCC(COC(=O)CCCCCCCCCCCCCCCCC)OC(=O)CCCCCCCCC=CCCCCCCCC	mixed-triglyceride
CC(C)CC(NC(=O)C(CC(C)C)NC(=O)C(Cc1ccccc1)NC(=O)C(CC(=O)N)NC(=O)C(CCCNC(=N)N)NC(=O)C(Cc1ccc(O)cc1)NC(=O)C(N)CCCCN)C(=O)NC(Cc1cnc[nH]1)C(=O)NC(CC(=O)O)C(=O)NC(CCC(=O)O)C(=O)NC(CO)C(=O)NC(CC(C)C)C(=O)NC(Cc1c[nH]c2ccccc12)C(=O)NC(C)C(=O)NC(CCCNC(=N)N)C(=O)NC(CC(=O)N)C(=O)O	hexadecapeptide-1
CC(C)CC(NC(=O)C(N)CC(=O)O)C(=O)NC(Cc1c[nH]c2ccccc12)C(=O)NC(CC(C)C)C(=O)NC(Cc1cnc[nH]1)C(=O)NC(CC(C)C)C(=O)NC(Cc1ccccc1)C(=O)NC(CCCNC(=N)N)C(=O)NC(Cc1ccc(O)cc1)C(=O)NC(CC(=O)N)C(=O)NC(CO)C(=O)NC(CCC(=O)O)C(=O)NC(CC(=O)O)C(=O)NC(CCCCN)C(=O)NC(C)C(=O)O	hexadecapeptide-2
CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC	hexatetracontane
CCOCCOCCOCCOCCOCCOCCOCCOCCOCCOCCOCCOCCOCCOCCOCCOCC	PEG-16
CCOCCOCCOCCOCCOCCOCCOCCOCCOCCOCCOCCOCC	PEG-12
CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC	hexacontane
CC(=O)NC(CS)C(=O)NC(CC(=O)O)C(=O)NC(CCC(=O)N)C(=O)NC(Cc1ccccc1)C(=O)NC(CCCNC(=N)N)C(=O)NC(CC(C)C)C(=O)NC(Cc1ccc(O)cc1)C(=O)NC(CCCCN)C(=O)NC(CC(=O)N)C(=O)NC(CO)C(=O)NC(CCC(=O)O)C(=O)NC(CC(C)C)C(=O)NC(C)C(=O)NC(CC(=O)O)C(=O)O	pentadecapeptide-1
CC(C)C(NC(=O)C(N)CS)C(=O)NC(Cc1ccccc1)C(=O)NC(CCCCN)C(=O)NC(CC(=O)O)C(=O)NC(CCC(=O)O)C(=O)NC(CCCNC(=N)N)C(=O)NC(CC(=O)N)C(=O)NC(Cc1ccc(O)cc1)C(=O)NC(CC(C)C)C(=O)NC(CO)C(=O)NC(Cc1cnc[nH]1)C(=O)NC(C)C(=O)NC(Cc1c[nH]c2ccccc12)C(=O)NC(CCC(=O)N)C(=O)O	hexadecapeptide-3
CCCCCCCCCCCCCCCC(=O)OCC(COP(=O)([O-])OCC[N+](C)(C)C)OC(=O)CCCCCCCCCCCCCCC	DPPC
CCCCCCCCCCCCCCCC(=O)OCC(COP(=O)([O-])OCC[NH3+])OC(=O)CCCCCCCCCCCCCCC	DPPE
CCCCCCCCCCCCCCCCCCCCCCCCCCCC(=O)OCC(COP(=O)([O-])OCC[N+](C)(C)C)OC(=O)CCCCCCCCCCCCCCCCCCCCCCCCCCC	phosphatidylcholine-C28
OC1C(O)C(OC(CO)C1O)OC1C(O)C(OC(CO)C1O)OC1C(O)C(OC(CO)C1O)OC1C(O)C(OC(CO)C1O)OC1C(O)C(O)C(O)OC1CO	maltopentaose
OCC1OC(OC2C(O)C(OC(OC3C(O)C(OC(OC4C(O)C(OC(O)C(O)C4O)CO)C(O)C3O)CO)C(O)C2O)CO)C(O)C(O)C1O	maltotetraose
CC1OC(OC2CC(O)(CC(=O)O2)C(=O)C2CCC3(C)C(CCC4(C)C3CCC3C(CC(O)C5CC(C)(C)CC(OC6OC(C)C(O)C(O)C6O)C5O)C34C)C2(C)C)C(O)C(O)C1O	ginsenoside-Rb1-partial
CC1(C)CCC2(CCC3(C)C(=CCC4C5(C)CCC(OC6OC(CO)C(O)C(O)C6OC6OC(CO)C(O)C(O)C6O)C(C)(C)C5CCC43)C2C1)C(=O)O	soyasaponin-partial
CC(C)CC(NC(=O)C(Cc1ccccc1)NC(=O)C(N)CCCNC(=N)N)C(=O)NC(CO)C(=O)NC(CC(C)C)C(=O)NC(Cc1c[nH]c2ccccc12)C(=O)NC(CC(=O)O)C(=O)NC(CCC(=O)N)C(=O)NC(CCCCN)C(=O)NC(Cc1ccc(O)cc1)C(=O)NC(CC(=O)N)C(=O)NC(CCCNC(=N)N)C(=O)NC(CCC(=O)O)C(=O)NC(CC(C)C)C(=O)NC(C)C(=O)O	octadecapeptide-1
CCC(=O)OC1CC(OC(=O)CC)C(OC(=O)CC)C2(COC(=O)CC)COC3CC4OC5(C)CCC6CC(OC(=O)CC)C(OC(=O)CC)C(OC(=O)CC)C6(COC(=O)CC)OC5CC4(COC(=O)CC)OC3C12	sucrose-octapropanoate
CC(=O)OCC1OC(OC2C(OC(OC3C(OC(OC4C(OC(OC5C(OC(O)C(OC(C)=O)C5OC(C)=O)COC(C)=O)C(OC(C)=O)C4OC(C)=O)COC(C)=O)C(OC(C)=O)C3OC(C)=O)COC(C)=O)C(OC(C)=O)C2OC(C)=O)COC(C)=O)C(OC(C)=O)C(OC(C)=O)C1OC(C)=O	maltopentaose-peracetate-partial
CCCCCCCCCCCCCCCCCCCC(=O)OCC(COP(=O)([O-])OCC(CO)O)OC(=O)CCCCCCCCCCCCCCCCCCC	DPPG-C20
OCC1OC(OC2C(O)C(O)C(O)OC2CO)C(O)C(O)C1OC1OC(CO)C(O)C(O)C1O	panose
CC12CCC3C(C1CCC2OC1OC(CO)C(O)C(O)C1O)CCC4=CC(=O)CCC34C	testosterone-glucoside
OC1C(O)C(OC(COC(=O)c2cc(O)c(O)c(O)c2)C1OC(=O)c1cc(O)c(O)c(O)c1)OC(=O)c1cc(O)c(O)c(O)c1	trigalloylglucose
CC(=O)NC(CC(=O)O)C(=O)NC(Cc1ccccc1)C(=O)NC(CCCNC(=N)N)C(=O)NC(Cc1ccc(O)cc1)C(=O)NC(CC(C)C)C(=O)NC(CC(=O)N)C(=O)NC(CO)C(=O)NC(CCC(=O)O)C(=O)NC(CCCCN)C(=O)NC(CC(=O)O)C(=O)NC(C)C(=O)NC(CCC(=O)N)C(=O)NC(CC(C)C)C(=O)NC(CS)C(=O)NC(Cc1cnc[nH]1)C(=O)NC(Cc1c[nH]c2ccccc12)C(=O)O	octadecapeptide-2
CC(=O)SCCNC(=O)CCNC(=O)C(O)C(C)(C)COP(=O)(O)OP(=O)(O)OCC1OC(n2cnc3c(N)ncnc32)C(O)C1OP(=O)(O)O	acetyl-CoA-v3
C(COCCOCCOCCOCCOCCOCCOCCOCCOCCOCC)OCCOCCOCCOCCOCCOCCOCCOCCOCCOCC	PEG-20-diether
CCCCCCCCCCCCCCCCCCCC(=O)NCCOP(=O)([O-])OCC(COC(=O)CCCCCCCCCCCCCCCCC)OC(=O)CCCCCCCCCCCCCCCC	sphingomyelin-partial
OC1C(O)C(O)C(OC2C(O)C(O)C(OC3C(O)C(O)C(OC4C(O)C(O)C(OC5C(O)C(O)C(OC6C(O)C(O)C(O)OC6CO)OC5CO)OC4CO)OC3CO)OC2CO)OC1CO	maltohexaose
CC(C)CC(NC(=O)C(N)Cc1ccccc1)C(=O)NC(Cc1c[nH]c2ccccc12)C(=O)NC(CC(C)C)C(=O)NC(Cc1cnc[nH]1)C(=O)NC(CC(C)C)C(=O)NC(Cc1ccccc1)C(=O)NC(CCCNC(=N)N)C(=O)NC(Cc1ccc(O)cc1)C(=O)NC(CC(=O)N)C(=O)NC(CO)C(=O)NC(CCC(=O)O)C(=O)NC(CC(=O)O)C(=O)NC(CCCCN)C(=O)NC(C)C(=O)NC(CCC(=O)N)C(=O)NC(CC(C)C)C(=O)NC(CS)C(=O)NC(Cc1cnc[nH]1)C(=O)NC(Cc1c[nH]c2ccccc12)C(=O)O	icosapeptide-1
CC(C)C(NC(=O)C(Cc1ccc(O)cc1)NC(=O)C(CCCCN)NC(=O)C(CC(=O)O)NC(=O)C(N)CCC(=O)O)C(=O)NC(Cc1ccccc1)C(=O)NC(CCCNC(=N)N)C(=O)NC(Cc1c[nH]c2ccccc12)C(=O)NC(CC(C)C)C(=O)NC(CC(=O)N)C(=O)NC(CO)C(=O)NC(CCC(=O)N)C(=O)NC(CC(=O)O)C(=O)NC(CCCCN)C(=O)NC(C)C(=O)NC(Cc1cnc[nH]1)C(=O)NC(CC(C)C)C(=O)NC(CCC(=O)O)C(=O)NC(CS)C(=O)NC(CC(=O)N)C(=O)O	icosapeptide-2
CCOCCOCCOCCOCCOCCOCCOCCOCCOCCOCCOCCOCCOCCOCCOCCOCCOCCOCCOCCOCCOCCOCC	PEG-24
CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC	dotriacontane
CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC	heptacontane
CC(C)CC(NC(=O)C(CC(=O)O)NC(=O)C(N)Cc1ccccc1)C(=O)NC(CCC(=O)N)C(=O)NC(CO)C(=O)NC(CCCCN)C(=O)NC(CC(C)C)C(=O)NC(C)C(=O)NC(Cc1ccc(O)cc1)C(=O)NC(CCCNC(=N)N)C(=O)NC(CC(=O)N)C(=O)NC(CC(C)C)C(=O)NC(Cc1c[nH]c2ccccc12)C(=O)NC(CC(=O)O)C(=O)NC(CCC(=O)O)C(=O)NC(Cc1cnc[nH]1)C(=O)NC(CS)C(=O)NC(Cc1ccccc1)C(=O)NC(CCCCN)C(=O)NC(CC(=O)N)C(=O)O	icosapeptide-3
CC1CC2C3CCC4=CC(=O)CCC4(C)C3(F)C(O)CC2(C)C1(O)C(=O)COC1OC(CO)C(O)C(O)C1O	dexamethasone-glucoside
CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC(=O)OCC(COP(=O)([O-])OCC[N+](C)(C)C)OC(=O)CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC	DPPC-C36-partial
CC(C)C(NC(=O)C(Cc1ccc(O)cc1)NC(=O)C(CC(=O)O)NC(=O)C(CC(C)C)NC(=O)C(CC(=O)N)NC(=O)C(CCCNC(=N)N)NC(=O)C(Cc1ccccc1)NC(=O)C(N)CS)C(=O)NC(Cc1c[nH]c2ccccc12)C(=O)NC(CCC(=O)O)C(=O)NC(CO)C(=O)NC(CC(=O)O)C(=O)NC(CCCCN)C(=O)NC(CCC(=O)N)C(=O)NC(C)C(=O)NC(Cc1cnc[nH]1)C(=O)NC(CC(C)C)C(=O)NC(CCCCN)C(=O)NC(CC(=O)N)C(=O)NC(CC(C)C)C(=O)NC(CCC(=O)O)C(=O)O	icosapeptide-4
OCC(O)C(OC1OC(CO)C(O)C(O)C1O)C(O)C(O)COP(=O)(O)OP(=O)(O)OCC1OC(n2ccc(=O)[nH]c2=O)C(O)C1O	UDP-galactose-openchain
OC1C(O)C(OP(=O)(O)OP(=O)(O)OCC2OC(n3cnc4c(N)ncnc43)C(O)C2O)C(OC1CO)O	ADP-glucose
CC(C)(COP(=O)(O)OP(=O)(O)OCC1OC(n2cnc3c(N)ncnc32)C(O)C1OP(=O)(O)O)C(O)C(=O)NCCC(=O)NCCSC(=O)CCCCCCCCCCCCCCCC	palmitoyl-CoA-partial
OC(C(O)C(O)COP(=O)(O)O)C(O)C(O)COP(=O)(O)O	fructose-1,6-bisphosphate-openchain
OC(=O)C(N)CCCCNC(=O)C(N)CCCCNC(=O)C(N)CCCCNC(=O)C(N)CCCCNC(=O)C(N)CCCCNC(=O)C(N)CCCCNC(=O)C(N)CCCCNC(=O)C(N)CCCCNC(=O)C(N)CCCCNC(=O)C(N)CCCCN	polylysine-10
OC(=O)CNC(=O)CNC(=O)CNC(=O)CNC(=O)CNC(=O)CNC(=O)CNC(=O)CNC(=O)CNC(=O)CNC(=O)CNC(=O)CNC(=O)CNC(=O)CNC(=O)CN	polyglycine-15
OC(=O)C(C)NC(=O)C(C)NC(=O)C(C)NC(=O)C(C)NC(=O)C(C)NC(=O)C(C)NC(=O)C(C)NC(=O)C(C)NC(=O)C(C)NC(=O)C(C)N	polyalanine-10
CC(C)C(NC(=O)C(CC(C)C)NC(=O)C(CC(C)C)NC(=O)C(CC(C)C)NC(=O)C(CC(C)C)NC(=O)C(CC(C)C)NC(=O)C(CC(C)C)NC(=O)C(CC(C)C)NC(=O)C(CC(C)C)NC(=O)C(N)CC(C)C)C(=O)O	polyleucine-10
CCCCCCCCCCCCCCCCCCCCOC(=O)CCC(=O)OCCCCCCCCCCCCCCCCCCCC	diarachidyl-succinate
CCCCCCCCCCCCCCCC(=O)OCC(COC(=O)CCCCCCCCCCCCCCC)OC(=O)CCCCCCCCCCCCCCC	tripalmitin
OCC1OC(OC2C(O)C(OC(OC3C(O)C(OC(O)C(O)C3O)CO)C(O)C2O)CO)C(O)C(O)C1O	maltotriose-v2
CC(=O)NC(Cc1ccccc1)C(=O)NC(CCCNC(=N)N)C(=O)NC(Cc1ccc(O)cc1)C(=O)NC(CC(C)C)C(=O)NC(CC(=O)N)C(=O)NC(CO)C(=O)NC(CCC(=O)O)C(=O)NC(CCCCN)C(=O)NC(CC(=O)O)C(=O)NC(C)C(=O)NC(CCC(=O)N)C(=O)NC(CC(C)C)C(=O)NC(CS)C(=O)NC(Cc1cnc[nH]1)C(=O)NC(Cc1c[nH]c2ccccc12)C(=O)NC(CC(=O)O)C(=O)NC(CCCCN)C(=O)NC(CCC(=O)O)C(=O)NC(CC(C)C)C(=O)O	icosapeptide-5
CCOCCOCCOCCOCCOCCOCCOCCOCCOCCOCCOCCOCCOCCOCCOCCOCCOCCOCCOCCOCCOCCOCCOCCOCCOCCOCC	PEG-28
CC(C)(COP(=O)(O)OP(=O)(O)OCC1OC(n2cnc3c(N)ncnc32)C(O)C1OP(=O)(O)O)C(O)C(=O)NCCC(=O)NCCSC(=O)CC(=O)O	malonyl-CoA
CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC	octacontane
CC(C)CC(NC(=O)C(N)Cc1c[nH]c2ccccc12)C(=O)NC(Cc1ccccc1)C(=O)NC(CC(C)C)C(=O)NC(Cc1ccc(O)cc1)C(=O)NC(CCCNC(=N)N)C(=O)NC(CO)C(=O)NC(CC(=O)O)C(=O)NC(CCC(=O)N)C(=O)NC(CCCCN)C(=O)NC(CC(=O)N)C(=O)NC(C)C(=O)NC(CCC(=O)O)C(=O)NC(CC(C)C)C(=O)NC(CS)C(=O)NC(Cc1cnc[nH]1)C(=O)NC(CC(=O)O)C(=O)NC(CCCCN)C(=O)NC(CC(C)C)C(=O)NC(Cc1c[nH]c2ccccc12)C(=O)NC(CCC(=O)N)C(=O)O	icosapeptide-6
# ============================================================
# SECTION 7: Tautomer pairs — Lines 801-900
# ============================================================
O=c1cc[nH]c(=O)[nH]1	uracil-keto
Oc1ccnc(O)n1	uracil-enol
Cc1c[nH]c(=O)[nH]c1=O	thymine-keto
Cc1cnc(O)nc1O	thymine-enol
Nc1nc2[nH]cnc2c(=O)[nH]1	guanine-keto
Nc1nc2[nH]cnc2c(O)n1	guanine-enol
O=c1[nH]cnc2[nH]cnc12	hypoxanthine-keto
Oc1ncnc2[nH]cnc12	hypoxanthine-enol
O=c1[nH]c(=O)c2[nH]cnc2[nH]1	xanthine-keto
Oc1nc(O)c2[nH]cnc2n1	xanthine-enol
CC(=O)CC(=O)C	2,4-pentanedione-keto
CC(=O)C=C(C)O	2,4-pentanedione-enol
OC=O	formic-acid-v3
O=CO	formic-acid-v4
OC(=O)c1ccccc1O	salicylic-acid-phenol
O=C(O)c1ccccc1O	salicylic-acid-carboxyl
NC(=O)c1ccccc1	benzamide
N=C(O)c1ccccc1	benzamide-iminol
CC(=O)Nc1ccccc1	acetanilide-amide
CC(=N)Oc1ccccc1	acetanilide-iminol
OC(=O)CC(=O)OCC	ethyl-acetoacetate-keto
OC(=O)C=C(O)OCC	ethyl-acetoacetate-enol
CC(=O)c1ccc(O)cc1	4-hydroxyacetophenone-keto
CC(=O)c1ccc(O)cc1	4-hydroxyacetophenone-keto-v2
C=CC(=O)C	methyl-vinyl-ketone
C=CC(O)=C	methyl-vinyl-ketone-enol
Oc1cccc2ccccc12	1-naphthol
O=C1C=Cc2ccccc2C1	2(1H)-naphthalenone
OC1=CC=Cc2ccccc21	1-naphthol-v2
O=C1CC=Cc2ccccc21	1(2H)-naphthalenone
CC(=O)CC(=O)OC	methyl-acetoacetate-keto
CC(O)=CC(=O)OC	methyl-acetoacetate-enol
O=c1[nH]c(=O)[nH]c(=O)[nH]1	barbituric-acid-triketo
Oc1nc(O)nc(O)n1	barbituric-acid-trienol
CCC(=O)c1[nH]c(CC)c(=O)[nH]1	3,5-diethyl-barbiturate-partial
CCC(O)=c1[nH]c(CC)c(O)n1	3,5-diethyl-barbiturate-enol-partial
O=C1NC(=O)c2ccccc21	isatin-diketo
O=C1NC(=O)c2ccccc21	isatin-diketo-v2
O=c1[nH]c2ccccc2[nH]1	2-oxobenzimidazole-keto
Oc1nc2ccccc2[nH]1	2-hydroxybenzimidazole-enol
Nc1ccccn1	2-aminopyridine-amino
N=c1cc[nH]cc1	2-aminopyridine-imino
c1ccc(cc1)N=NC(C(=O)O)=C(O)c1ccccc1	azo-tautomer-1
c1ccc(cc1)NN=C(C(=O)O)C(=O)c1ccccc1	azo-tautomer-2
CC1=NN(c2ccccc2)C(=O)C1	3-methyl-1-phenylpyrazolin-5-one-CH
CC1=NNC(c2ccccc2)=C1O	3-methyl-1-phenylpyrazolin-5-one-OH
O=C1C=CC(=O)C=C1	quinone
OC1=CC=C(O)C=C1	hydroquinone-taut
OC1=CC(=O)CC(=O)C1	phloroglucinol-keto
Oc1cc(O)cc(O)c1	phloroglucinol-enol
O=c1ccoc2ccccc12	chromanone
Oc1ccoc2ccccc12	chromanone-enol
CC(=O)C(C(=O)C)Cl	chloroacetylacetone-keto
CC(=O)C(=C(C)O)Cl	chloroacetylacetone-enol
NC(=O)NN	semicarbazide
N=C(O)NN	semicarbazide-iminol
NC(=O)N	urea
N=C(O)N	urea-iminol
O=c1[nH]c(=S)[nH]c(=O)[nH]1	thiobarbituric-acid-keto
Oc1nc(=S)[nH]c(O)n1	thiobarbituric-acid-enol
CC(=O)C1=C(O)C=CC=C1	2-acetylphenol-keto
CC(=O)c1ccccc1O	2-acetylphenol-phenol
NNC(=O)c1ccncc1	isoniazid-amide
NN=C(O)c1ccncc1	isoniazid-iminol
O=C1CC(=O)CC(=O)C1	cyclohexanetrione
OC1=CC(=O)CC(O)=C1	cyclohexanetrione-dienol
NC(=O)c1[nH]ncc1	3-pyrazolecarboxamide
N=C(O)c1[nH]ncc1	3-pyrazolecarboxamide-iminol
C1=CC=CC=C1N=NC1C=CC(=O)C=C1	4-phenylazo-phenol-quinone
C1=CC=CC=C1N=NC1=CC=C(O)C=C1	4-phenylazo-phenol-phenol
O=c1[nH]cnc2cc(Cl)ccc12	6-chloro-quinazolinone-keto
Oc1ncnc2cc(Cl)ccc12	6-chloro-quinazolinone-enol
CC1=CC(=O)NC(=O)N1	6-methyluracil-keto
CC1=CC(O)=NC(O)=N1	6-methyluracil-dienol
NC(=S)c1ccccc1	thiobenzamide
N=C(S)c1ccccc1	thiobenzamide-thiol
NNC(=S)N	thiosemicarbazide-thione
NN=C(S)N	thiosemicarbazide-thiol
O=c1cc(-c2ccccc2)[nH]c(=O)[nH]1	6-phenyluracil-diketo
Oc1cc(-c2ccccc2)nc(O)n1	6-phenyluracil-dienol
CC(O)=CC=O	3-hydroxy-2-butenal-enol
CC(=O)CC=O	3-hydroxy-2-butenal-keto
O=c1cc(-c2ccc(O)cc2)oc2cc(O)cc(O)c12	apigenin-keto-taut
Oc1cc(O)c2c(c1)oc(-c1ccc(O)cc1)cc2O	apigenin-phenol-taut
c1nc2c([nH]1)ncn2C1CCCO1	adenine-THF-mimic
c1nc2c(n1C1CCCO1)nc[nH]2	adenine-THF-mimic-taut
O=c1[nH]c(N)ccn1	cytosine-keto
Oc1nc(N)ccn1	cytosine-enol
O=C1NC=Cc2ccccc21	2-quinolinone-keto
OC1=NC=Cc2ccccc21	2-quinolinol-enol
O=C1NC(=O)c2ccccc2N1	2,4-quinazolinedione-diketo
OC1=NC(=O)c2ccccc2N1	2,4-quinazolinedione-monoenol
CC(=O)C1CCCCC1=O	2-acetylcyclohexanone-diketo
CC(=O)C1=CCCCC1O	2-acetylcyclohexanone-enol
CC(=O)C=C1CCCCC1=O	2-acetylcyclohex-2-enone-keto
CC(O)=CC1=CCCCC1O	2-acetylcyclohex-2-enone-dienol
# ============================================================
# SECTION 8: Symmetric molecules — Lines 901-950
# ============================================================
C1C2CC3CC1CC(C2)C3	adamantane
C12C3C4C1C5C3C2C45	cubane
C1=CC2=CC3=CC4=CC5=CC6=CC7=CC8=CC9=CC%10=CC1=CC2=CC3=CC4=CC5=CC6=CC7=CC8=CC9=C%10	[20]annulene-partial
c1cc2ccc3ccc4ccc5ccc6ccc1c7c2c3c4c5c67	coronene
c1ccc(-c2ccccc2)cc1	biphenyl-sym
c1ccc2c(c1)cc1ccc3cccc4ccc2c1c34	pyrene-v2
C1CCCCC1	cyclohexane
C1CCCCCCC1	cyclooctane
C1CCCCCCCCCC1	cyclodecane
C1CCCCCCCCCCCC1	cyclododecane
C1CCCCCCCCCCCCCC1	cyclopentadecane
C1CCCCCCCCCCCCCCCC1	cycloheptadecane
C(c1ccccc1)(c1ccccc1)(c1ccccc1)c1ccccc1	tetraphenylmethane
C(c1ccccc1)(c1ccccc1)c1ccccc1	triphenylmethane
c1ccc(-c2ccc(-c3ccc(-c4ccccc4)cc3)cc2)cc1	p-quaterphenyl
C1CC2(CC3(CC4(CC5(CC6(CC(C1)(CC1(CC(CC7(CC(CC(CC8(CCCC8)CC)CC)CC7)C1)CC)CC)CC6)CC5)CC4)CC3)CC2)CC	diamondoid-fragment
C(=C\c1ccccc1)\c1ccccc1	stilbene-sym
c1ccc(cc1)C#Cc1ccccc1	tolane
c1ccc(cc1)NNc1ccccc1	hydrazobenzene
c1ccc(-c2cccc(-c3ccccc3)n2)cc1	2,6-diphenylpyridine
C1=CC=C(C=C1)C2=CC=C(C=C2)C3=CC=CC=C3	p-terphenyl-v2
c1ccnc(-c2ccccn2)c1	2,2-bipyridine-v2
c1ccc(-c2ccc(-c3ccccc3)o2)cc1	2,5-diphenylfuran
c1ccc(-c2ccc(-c3ccccc3)s2)cc1	2,5-diphenylthiophene
c1ccc(-c2ccc(-c3ccccc3)[nH]2)cc1	2,5-diphenylpyrrole
c1ccc(cc1)c1cc(-c2ccccc2)cc(-c2ccccc2)c1	1,3,5-triphenylbenzene
c1ccc(cc1)C(c1ccccc1)c1ccccc1	triphenylmethane-v2
N(c1ccccc1)(c1ccccc1)c1ccccc1	triphenylamine
P(c1ccccc1)(c1ccccc1)c1ccccc1	triphenylphosphine
O=S(c1ccccc1)(c1ccccc1)=O	diphenylsulfone
c1ccc(cc1)SSc1ccccc1	diphenyl-disulfide
c1cc(-c2cccc(-c3cccc(-c4cccc(-c5cccc(-c6ccccc6)c5)c4)c3)c2)cc1	cyclohexaphenylene-chain
C(C(C(C(F)(F)F)(F)F)(F)F)(C(C(C(F)(F)F)(F)F)(F)F)(F)F	perfluoroneopentane
c1ccc2cc3ccccc3cc2c1	anthracene-sym
c1ccc2c(c1)-c1ccccc1-2	fluorene-sym
C12C3C4C5C1C6C7C8C2C9C3C%10C4C%11C5C6C%12C7C8C9C%10C%11%12	dodecahedrane-partial
C1CCC(CC1)C1CCC(CC1)C1CCCCC1	tricyclohexylmethane-partial
OC(c1ccccc1)(c1ccccc1)c1ccccc1	triphenylmethanol-sym
c1cc2c3c(cccc3c1)CC2	acenaphthylene-partial
c1ccc(-c2ccc3ccc4ccc(-c5ccccc5)c5ccc6cccc6c5c4c3c2)cc1	perylene-diphenyl
CC(C)(C)c1ccc(cc1)C(c1ccc(C(C)(C)C)cc1)c1ccc(C(C)(C)C)cc1	tri-tert-butyl-triphenylmethane
C1CC2CCC3CCC4CCC5CCC6CCC1C1C2C3C4C5C61	twistane-extended-partial
CC(C)(C)c1cc(C(C)(C)C)cc(C(C)(C)C)c1	1,3,5-tri-tert-butylbenzene
c1ccc(-c2cc(-c3ccccc3)cc(-c3ccccc3)c2)cc1	1,3,5-triphenylbenzene-v2
c1ccc(cc1)c1ccc(cc1)c1ccc(cc1)c1ccc(cc1)c1ccccc1	p-quinquephenyl
# ============================================================
# SECTION 9: Edge cases — charged, isotopes, radicals, organometallics — Lines 951-1050
# ============================================================
[NH4+]	ammonium
[O-]C(=O)C	acetate-anion
[O-]c1ccccc1	phenoxide
[O-]S(=O)(=O)c1ccc(C)cc1	tosylate
[Na+].[Cl-]	sodium-chloride
[K+].[O-]C(=O)C	potassium-acetate
[Ca+2].[Cl-].[Cl-]	calcium-chloride
[NH3+]CC(=O)[O-]	glycine-zwitterion
[NH3+]C(CC(=O)[O-])C(=O)[O-]	aspartate-zwitterion
[NH3+]C(CCCC[NH3+])C(=O)[O-]	lysine-dication
[O-]C(=O)CC([O-])(CC(=O)[O-])C(=O)[O-]	citrate-trianion
[O-]P(=O)([O-])OP(=O)([O-])OP(=O)([O-])[O-]	triphosphate
[O-]S(=O)(=O)[O-]	sulfate
[O-][N+](=O)c1ccc([N+](=O)[O-])cc1	1,4-dinitrobenzene
[NH3+]CC([O-])=O	glycine-zwitterion-v2
c1cc[nH+]cc1	pyridinium
C[N+](C)(C)C	tetramethylammonium
CC[O-]	ethoxide
[O-]c1cc(Cl)c(Cl)cc1[O-]	dichlorophenolate-dianion
c1ccc([O-])c(c1)[S-]	thiophenolate-anion
[2H]C([2H])([2H])O	deuterated-methanol
[2H]C([2H])([2H])[2H]	deuteromethane
[13C]c1ccccc1	13C-benzene
[13CH4]	13C-methane
C([2H])([2H])([2H])C([2H])([2H])[2H]	deuteroethane
[18O]	water-O18
[15N]c1ccccc1	15N-aniline
[14C](=O)O	14C-CO2
[3H]O	tritiated-water
[2H]c1ccccc1	deuterobenzene
[CH2]c1ccccc1	benzyl-radical
[O]	oxygen-radical
[CH3]	methyl-radical
[OH]	hydroxyl-radical
[N]=O	nitric-oxide
O=[N]c1ccccc1	nitrosobenzene-radical
[O][O]	dioxygen-diradical
[CH2]=[CH2]	ethylene-diradical
[Li]CCCC	n-butyllithium
[Mg](Cl)CCc1ccccc1	phenethylmagnesium-chloride
[Mg](Br)c1ccccc1	phenylmagnesium-bromide
CC(=O)O[Zn]OC(=O)C	zinc-diacetate
[Cu]c1ccccn1	copper-pyridine
OC(=O)[C@@H](O)[C@H](O)C(=O)O	L-tartaric-acid
OC(=O)[C@H](O)[C@@H](O)C(=O)O	D-tartaric-acid
OC(=O)[C@@H](N)CC(=O)O	L-aspartic-acid
OC(=O)[C@H](N)CC(=O)O	D-aspartic-acid
O[C@@H]1[C@H](O)[C@@H](O)[C@H](O)[C@@H](CO)O1	alpha-D-glucose
O[C@H]1[C@@H](O)[C@H](O)[C@@H](O)[C@H](CO)O1	beta-L-glucose
C[C@@H](O)CC	(R)-2-butanol
C[C@H](O)CC	(S)-2-butanol
OC(=O)[C@@H](N)Cc1ccccc1	L-phenylalanine
OC(=O)[C@H](N)Cc1ccccc1	D-phenylalanine
OC(=O)[C@@H](N)[C@@H](CC)C	L-isoleucine
OC(=O)[C@@H](N)[C@H](CC)C	L-allo-isoleucine
C(/C=C/c1ccccc1)=C\c1ccccc1	E,Z-1,4-diphenylbutadiene
C(/C=C\c1ccccc1)=C\c1ccccc1	Z,Z-1,4-diphenylbutadiene
C/C=C/C=C/C	E,E-2,4-hexadiene
C/C=C\C=C/C	Z,Z-2,4-hexadiene
C/C=C/C=C\C	E,Z-2,4-hexadiene
CC(C)(C)[Si](C)(C)O	trimethylsilanol-tBu
C[Si](C)(C)c1ccccc1	trimethylsilylbenzene
C[Si](C)(C)C#C	trimethylsilylacetylene
CC(=O)O[Si](C)(C)C	trimethylsilyl-acetate
c1ccc(cc1)[Si](c1ccccc1)(c1ccccc1)c1ccccc1	tetraphenylsilane
C[Sn](C)(C)c1ccccc1	tetramethylstannane-phenyl
CC(=O)O[Tl]	thallium-acetate
[B-](c1ccccc1)(c1ccccc1)(c1ccccc1)c1ccccc1	tetraphenylborate
C1=CC(=CC(=C1)P(C2=CC=CC=C2)C2=CC=CC=C2)P(C3=CC=CC=C3)C3=CC=CC=C3	BISBI-ligand-partial
c1ccc(cc1)P(c1ccccc1)c1ccc(P(c2ccccc2)c2ccccc2)cc1	DPPF-partial
[O-][Cr](=O)(=O)[O-]	chromate
[O-][Mn](=O)(=O)[O-]	permanganate
[Pt](Cl)(Cl)([NH3])[NH3]	cisplatin
[Pt]([NH3])([NH3])(Cl)Cl	transplatin
O=C1OC(=O)c2cc3C(=O)OC(=O)c3cc12	mellitic-trianhydride-partial
C1=C[C-]=CC=C[C-]=CC=C[C-]=CC=C1	cyclopentadienyl-trianion-partial
[Fe+2]	iron-II
[Cu+2]	copper-II
c1ccc(cc1)[P+](c1ccccc1)(c1ccccc1)C	methyltriphenylphosphonium
[O-][N+](=O)c1ccc(cc1)[N+](=O)[O-]	p-dinitrobenzene
[O-]c1ccc(cc1)[N+](=O)[O-]	4-nitrophenolate
[O-]S(=O)(=O)c1ccccc1	benzenesulfonate
CC(=O)[O-].[Na+]	sodium-acetate
[O-]C(=O)c1ccccc1.[Na+]	sodium-benzoate
CC(=O)OCC#C	propargyl-acetate
C#CC(=O)O	propiolic-acid
C#CC#C	diacetylene
C#CC#CC#C	triacetylene
[C-]#[O+]	carbon-monoxide
[N-]=[N+]=N[H]	hydrazoic-acid
[O-]P(=O)([O-])O	hydrogen-phosphate
[O-]P(=O)(O)OC	methyl-phosphate-anion
OC(=O)C(=O)[O-]	oxalate-monoanion
[O-]C(=O)C(=O)[O-]	oxalate-dianion
[O-]C(=O)CCC(=O)[O-]	glutarate-dianion
[O-]C(=O)c1cc(ccc1O)[O-]	gentisate-dianion
[NH3+]C(Cc1ccc(O)cc1)C(=O)[O-]	tyrosine-zwitterion
[NH3+]C(Cc1c[nH]c2ccccc12)C(=O)[O-]	tryptophan-zwitterion
[NH3+]C(Cc1cnc[nH]1)C(=O)[O-]	histidine-zwitterion
[NH3+]C(CS)C(=O)[O-]	cysteine-zwitterion
[NH3+]C(CCCNC(=[NH2+])N)C(=O)[O-]	arginine-zwitterion-diprotonated
CC[C@@H](C)[C@@H](N)C(=O)O	L-isoleucine-stereo
CC[C@H](C)[C@@H](N)C(=O)O	D-allo-isoleucine-stereo
O[C@@H]1[C@@H](O)[C@H](O)[C@@H](O)[C@@H](CO)O1	alpha-D-galactose
O[C@@H]1[C@@H](O)[C@@H](O)[C@H](O)[C@@H](CO)O1	alpha-D-mannose
O[C@H]1OC(CO)[C@@H](O)[C@@H](O)[C@@H]1O	beta-D-glucose-stereo
OC[C@H]1OC(O)[C@H](O)[C@@H](O)[C@@H]1O	alpha-D-glucose-v2
C(\C=C\C=C\C=C\C=C\C)=C\C=C\C=C\C	all-trans-retinol-chain-partial
C(\C=C/C=C\C=C/C=C\C=C/C)=C\C	DHA-chain-partial
C/C=C\C/C=C\C/C=C\CC	linolenic-chain-partial
[2H]C([2H])([2H])C([2H])([2H])C([2H])([2H])[2H]	deuteropropane
[13C]([13C]([13C]([13C](=O)O)O)O)O	13C-tartaric-acid
[2H]c1c([2H])c([2H])c([2H])c([2H])c1[2H]	perdeuterobenzene
[18F]c1ccccc1	18F-fluorobenzene
[11C](=O)O	11C-CO2
[125I]c1ccc(O)cc1	125I-iodophenol
CC(=O)O[Pd]OC(=O)C	palladium-diacetate
[Ru](Cl)(Cl)(=O)=O	ruthenium-tetroxide-partial
[Au]Cl	gold-chloride
[Ag]OC(=O)CF	silver-fluoroacetate
[Zn](OC(C)=O)OC(C)=O	zinc-acetate-v2
C[Hg]Cl	methylmercury-chloride
CC[Pb](CC)(CC)CC	tetraethyllead
c1ccnc(c1)N=Nc1ccccn1	2,2-azopyridine
[O-][N+](=O)c1cc([N+](=O)[O-])cc([N+](=O)[O-])c1	1,3,5-trinitrobenzene
c1cc([N+](=O)[O-])cc([N+](=O)[O-])c1[N+](=O)[O-]	TNB-v2
Cc1c([N+](=O)[O-])cc([N+](=O)[O-])cc1[N+](=O)[O-]	TNT
O=[N+]([O-])c1ccc(O)c([N+](=O)[O-])c1	2,4-dinitrophenol
[O-][N+](=O)/C=C/c1ccccc1	trans-beta-nitrostyrene
C=CC(=O)[O-]	acrylate-anion
CC(=O)CC(=O)[O-]	acetoacetate-anion
OC(=O)C(F)(Cl)Br	halothane-acid-analog
C(#N)c1ccc(C#N)cc1	terephthalonitrile
N#Cc1cccc(C#N)c1	isophthalonitrile
N#CC#N	cyanogen
OC(=O)C#CC(=O)O	acetylenedicarboxylic-acid
FC(F)(F)c1ccc(C(F)(F)F)cc1	1,4-bis-trifluoromethylbenzene
FC(F)(F)C(F)(F)C(F)(F)C(F)(F)F	perfluorobutane
FC(F)(F)C(F)(F)C(F)(F)C(F)(F)C(F)(F)C(F)(F)F	perfluorohexane
FC(F)(F)C(F)(F)C(F)(F)C(F)(F)C(F)(F)C(F)(F)C(F)(F)C(F)(F)F	perfluorooctane
B(O)(O)c1ccc(B(O)O)cc1	1,4-phenylenediboronic-acid
OB(O)c1cccc(B(O)O)c1	1,3-phenylenediboronic-acid
c1ccc(cc1)B1OC(C(O1)(C)C)(C)C	phenylboronic-acid-pinacol-ester
[Se]c1ccccc1	selenophenol
[Te]c1ccccc1	tellurobenzene
c1ccc(cc1)[As](c1ccccc1)c1ccccc1	triphenylarsine
c1ccc(cc1)[Sb](c1ccccc1)c1ccccc1	triphenylstibine
c1ccc(cc1)[Bi](c1ccccc1)c1ccccc1	triphenylbismuthine
OP(=O)(O)OP(=O)(O)O	pyrophosphoric-acid
OP(=O)(O)OP(=O)(O)OP(=O)(O)O	triphosphoric-acid
[O-]S(=O)(=O)OS(=O)(=O)[O-]	pyrosulfate
[O-]B([O-])[O-]	borate-trianion
OC(=O)c1cc([Se]c2ccccc2)ccc1	selenoether-benzoic-acid
CC(=O)[S-]	thioacetate-anion
S=C=NC1CCCCC1	cyclohexyl-isothiocyanate
O=C=Nc1ccccc1	phenyl-isocyanate
S=C=Nc1ccccc1	phenyl-isothiocyanate
O=C=NC1CCCCC1	cyclohexyl-isocyanate
[N-]=[N+]=Nc1ccccc1	phenyl-azide
