""" Routines for dealing with symbols """
from ase.atoms import Symbols
import re
[docs]
def filename_from_symbols(symbols, max_len=10):
"""
Choose a best short representation of given Symbols.
>>> filename_from_symbols("GeXTeXGeXTeXGeXTeXGeXTeX9", max_len=12)
'{GeXTeX}4X8'
>>> filename_from_symbols("GeXTeXGeXTeXGeXTeXGeXTeX9")
'Ge4Te4X16'
>>> filename_from_symbols("GeXTeXGeXTeXH")
'{GeXTeX}2H'
>>> filename_from_symbols("GeXTeXGeXTeX")
'GeXTeX'
>>> filename_from_symbols("C4H4OC4H4OC2C4H4OC4H4OC2")
'C20H16O4'
>>> filename_from_symbols("CO2")
'CO2'
"""
if not isinstance(symbols, Symbols):
symbols = Symbols.fromsymbols(symbols)
out = symbols.get_chemical_formula('reduce')
if len(out) <= max_len:
return out
out = pretty_symbols(symbols)
out = re.sub(r"^\{([^}]*)\}[0-9]+$", r"\1", out)
if len(out) <= max_len:
return out
out = symbols.get_chemical_formula()
if len(out) <= max_len:
return out
return symbols.get_chemical_formula(empirical=True)
[docs]
def pretty_symbols(symbols):
"""
Make a symbols string more pretty. Especially suitable for a long 2D semiinfinite bulks.
>>> pretty_symbols("GeXTeXGeXTeXGeXTeXGeXTeX9")
'{GeXTeX}4X8'
>>> pretty_symbols("CO2")
'CO2'
#TODO - this test does not work yet:
> >> pretty_symbols("C4H4OC4H4OC2C4H4OC4H4OC2")
'{{C4H4O}2C2}2'
"""
symbols=re.sub("([A-Z][a-z]*)([0-9]+)", lambda m: m.group(1)*int(m.group(2)), str(symbols))
prev=[]
i = 0
while i < len(symbols):
for t in prev:
lt = len(t)
j = i+lt
if t == symbols[i:j]:
k = j+lt
while t == symbols[j:k]:
j=k
k+=lt
repeat = (j - i) // lt + 1
t = pretty_symbols(t)
if re.match('^[A-Z][a-z]*$', t):
sub = f'{t}{repeat}'
else:
sub = f'{{{t}}}{repeat}'
symbols = f'{symbols[:i-lt]}{sub}{symbols[j:]}'
i = i - lt + len(sub)
prev = [ p[:-lt] + sub for p in prev if len(p) >= lt ]
break
else:
prev = [ p + symbols[i] for p in prev ]
prev.append(symbols[i])
i=i+1
return symbols