Source code for ase2sprkkr.ase.symbols

""" Routines for dealing with symbols """
from ase.atoms import Symbols
import re

[docs] def filename_from_symbols(symbols, max_len=10): """ Choose a best short representation of given Symbols. >>> filename_from_symbols("GeXTeXGeXTeXGeXTeXGeXTeX9", max_len=12) '{GeXTeX}4X8' >>> filename_from_symbols("GeXTeXGeXTeXGeXTeXGeXTeX9") 'Ge4Te4X16' >>> filename_from_symbols("GeXTeXGeXTeXH") '{GeXTeX}2H' >>> filename_from_symbols("GeXTeXGeXTeX") 'GeXTeX' >>> filename_from_symbols("C4H4OC4H4OC2C4H4OC4H4OC2") 'C20H16O4' >>> filename_from_symbols("CO2") 'CO2' """ if not isinstance(symbols, Symbols): symbols = Symbols.fromsymbols(symbols) out = symbols.get_chemical_formula('reduce') if len(out) <= max_len: return out out = pretty_symbols(symbols) out = re.sub(r"^\{([^}]*)\}[0-9]+$", r"\1", out) if len(out) <= max_len: return out out = symbols.get_chemical_formula() if len(out) <= max_len: return out return symbols.get_chemical_formula(empirical=True)
[docs] def pretty_symbols(symbols): """ Make a symbols string more pretty. Especially suitable for a long 2D semiinfinite bulks. >>> pretty_symbols("GeXTeXGeXTeXGeXTeXGeXTeX9") '{GeXTeX}4X8' >>> pretty_symbols("CO2") 'CO2' #TODO - this test does not work yet: > >> pretty_symbols("C4H4OC4H4OC2C4H4OC4H4OC2") '{{C4H4O}2C2}2' """ symbols=re.sub("([A-Z][a-z]*)([0-9]+)", lambda m: m.group(1)*int(m.group(2)), str(symbols)) prev=[] i = 0 while i < len(symbols): for t in prev: lt = len(t) j = i+lt if t == symbols[i:j]: k = j+lt while t == symbols[j:k]: j=k k+=lt repeat = (j - i) // lt + 1 t = pretty_symbols(t) if re.match('^[A-Z][a-z]*$', t): sub = f'{t}{repeat}' else: sub = f'{{{t}}}{repeat}' symbols = f'{symbols[:i-lt]}{sub}{symbols[j:]}' i = i - lt + len(sub) prev = [ p[:-lt] + sub for p in prev if len(p) >= lt ] break else: prev = [ p + symbols[i] for p in prev ] prev.append(symbols[i]) i=i+1 return symbols