Source code for orsopy.utils.chemical_formula
"""
A simple class to resolve and store chemical formula strings.
"""
import re
from collections import OrderedDict
[docs]
class Formula(list):
"""
Evaluate strings for element chemical formula.
"""
elements = (
r"A[cglmrstu]|B[aehikr]?|C[adeflmorsu]?|D[bsy]{0,1}|E[rsu]|F[emr]?|"
r"G[ade]|H[efgosx]?|I[nr]?|Kr?|L[airu]|M[dgnot]|N[abdeiop]?|"
r"Os?|P[abdmortu]?|R[abefghnu]|S[bcegimnr]?|T[abcehilm]|"
r"Uu[bhopqst]|U|V|W|Xe|Yb?|Z[nr]"
)
isotopes = (
r"(A[cglmrstu]|B[aehikr]?|C[adeflmorsu]?|D[bsy]{0,1}|E[rsu]|F[emr]?|"
r"G[ade]|H[efgos]?|I[nr]?|Kr?|L[airu]|M[dgnot]|N[abdeiop]?|"
r"Os?|P[abdmortu]?|R[abefghnu]|S[bcegimnr]?|T[abcehilm]|"
r"Uu[bhopqst]|U|V|W|Xe|Yb?|Z[nr])"
r"\[[1-9][0-9]{0,2}\]"
)
def __init__(self, string, sort=True, strict=False):
self._strict = strict
if isinstance(string, list):
list.__init__(self, string)
if isinstance(string, Formula):
self._do_sort = string._do_sort
self.HR_formula = string.HR_formula
else:
self._do_sort = sort
self.HR_formula = str(self)
else:
self._do_sort = sort
self.HR_formula = string
list.__init__(self, [])
self.parse_string(string)
self.merge_same()
[docs]
def parse_string(self, string):
# remove gaps and ignored characters
string = string.replace(" ", "").replace("\t", "").replace("\n", "")
string = string.replace("{", "").replace("}", "").replace("_", "").replace("$", "")
groups = self.split_groups(string)
for group, factor in groups:
try:
items = self.parse_group(group, case_sensitive=True)
except ValueError:
if self._strict:
raise ValueError("Could not parse formula in case sensitive mode")
items = self.parse_group(group, case_sensitive=False)
items = [(i[0], i[1] * factor) for i in items]
# noinspection PyMethodFirstArgAssignment
self += items
[docs]
@staticmethod
def split_groups(string):
if "(" not in string:
return [(string, 1.0)]
out = []
start = string.index("(")
end = start
if start > 0:
out.append((string[:start], 1.0))
while end < len(string):
end = start + string[start:].find(")")
_next = end + 1
if end < start:
raise ValueError("Brackets need to be closed")
while not (_next == len(string) or string[_next].isalpha() or string[_next] == "("):
_next += 1
block = string[start + 1 : end]
if "(" in block:
raise ValueError("Only one level of brackets is allowed")
number = string[end + 1 : _next]
if number == "":
out.append((block, 1.0))
else:
out.append((block, float(number)))
if _next == len(string):
break
if "(" not in string[_next:]:
out.append((string[_next:], 1.0))
break
else:
start = _next + string[_next:].index("(")
end = start
if start > _next:
out.append((string[_next:start], 1.0))
return out
[docs]
def parse_group(self, group, case_sensitive=True):
if case_sensitive:
flags = 0
else:
flags = re.IGNORECASE
out = []
mele = re.search(self.elements, group, flags=flags)
miso = re.search(self.isotopes, group, flags=flags)
if miso is not None and miso.start() == mele.start():
prev = miso
else:
prev = mele
if prev is None or prev.start() != 0:
raise ValueError("Did not find any valid element in string")
pos = prev.end()
while pos < len(group):
mele = re.search(self.elements, group[pos:], flags=flags)
miso = re.search(self.isotopes, group[pos:], flags=flags)
if miso is not None and miso.start() == mele.start():
_next = miso
else:
_next = mele
if _next is None:
break
if _next.start() == 0:
out.append((prev.string[prev.start() : prev.end()].capitalize(), 1.0))
else:
out.append(
(prev.string[prev.start() : prev.end()].capitalize(), float(group[pos : pos + _next.start()]))
)
prev = _next
pos += _next.end()
if pos == len(group):
out.append((prev.string[prev.start() :].capitalize(), 1.0))
else:
out.append((prev.string[prev.start() : prev.end()].capitalize(), float(group[pos:])))
return out
[docs]
def merge_same(self):
elements = OrderedDict({})
for ele, amount in self:
if ele in elements:
elements[ele] += amount
else:
elements[ele] = amount
self[:] = [items for items in elements.items() if items[1] != 0]
if self._do_sort:
self.sort()
def __str__(self):
output = ""
for element, number in self:
if number == 1.0:
output += element
elif number.is_integer():
output += element + str(int(number))
else:
output += element + str(number)
return output
def __contains__(self, item):
# check if an element is in the formula
return item in [el[0] for el in self]
def __add__(self, other):
out = Formula(self[:] + other[:], sort=self._do_sort)
out.merge_same()
return out
def __sub__(self, other):
sother = -1 * other
out = Formula(self[:] + sother[:], sort=self._do_sort)
out.merge_same()
return out
def __mul__(self, other):
return Formula([(el[0], other * el[1]) for el in self], sort=self._do_sort)
def __rmul__(self, other):
return self * other