Source code for ase2sprkkr.ase.symbols

"""Routines for dealing with symbols"""

from ase.atoms import Symbols
import re


[docs] def filename_from_symbols(symbols, max_len=10): """ Choose a best short representation of given Symbols. >>> filename_from_symbols("GeXTeXGeXTeXGeXTeXGeXTeX9", max_len=12) '{GeXTeX}4X8' >>> filename_from_symbols("GeXTeXGeXTeXGeXTeXGeXTeX9") 'Ge4Te4X16' >>> filename_from_symbols("GeXTeXGeXTeXH") '{GeXTeX}2H' >>> filename_from_symbols("GeXTeXGeXTeX") 'GeXTeX' >>> filename_from_symbols("C4H4OC4H4OC2C4H4OC4H4OC2") 'C20H16O4' >>> filename_from_symbols("CO2") 'CO2' """ if not isinstance(symbols, Symbols): symbols = Symbols.fromsymbols(symbols) out = symbols.get_chemical_formula("reduce") if len(out) <= max_len: return out out = pretty_symbols(symbols) out = re.sub(r"^\{([^}]*)\}[0-9]+$", r"\1", out) if len(out) <= max_len: return out out = symbols.get_chemical_formula() if len(out) <= max_len: return out return symbols.get_chemical_formula(empirical=True)
[docs] def pretty_symbols(symbols): """ Make a symbols string more pretty. Especially suitable for a long 2D semiinfinite bulks. >>> pretty_symbols("GeXTeXGeXTeXGeXTeXGeXTeX9") '{GeXTeX}4X8' >>> pretty_symbols("CO2") 'CO2' #TODO - this test does not work yet: > >> pretty_symbols("C4H4OC4H4OC2C4H4OC4H4OC2") '{{C4H4O}2C2}2' """ symbols = re.sub("([A-Z][a-z]*)([0-9]+)", lambda m: m.group(1) * int(m.group(2)), str(symbols)) prev = [] i = 0 while i < len(symbols): for t in prev: lt = len(t) j = i + lt if t == symbols[i:j]: k = j + lt while t == symbols[j:k]: j = k k += lt repeat = (j - i) // lt + 1 t = pretty_symbols(t) if re.match("^[A-Z][a-z]*$", t): sub = f"{t}{repeat}" else: sub = f"{{{t}}}{repeat}" symbols = f"{symbols[: i - lt]}{sub}{symbols[j:]}" i = i - lt + len(sub) prev = [p[:-lt] + sub for p in prev if len(p) >= lt] break else: prev = [p + symbols[i] for p in prev] prev.append(symbols[i]) i = i + 1 return symbols