WebSVN – wimsdev – Blame – /trunk/wims/src/Misc/chemeq/src/extract-nist-data.py

Rev	Author	Line No.	Line
17748	georgesk	1	#! /usr/bin/python3
		2
		3	"""
		4	This utility is part of chemeq's source package. It is used to
		5	extract data from NIST atomic weight tables.
		6
		7	Chemeq is a basic standalone filter written in C language,
		8	flex and bison. It inputs strings like:
		9
		10	`2H2 + O2 ---> 2 H2O`
		11
		12	and can outputs LaTeX code and messages about the equilibrium of a
		13	chemical reaction.
		14	----------------------------------------
		15
		16	Copyright (c) 2023 Georges Khaznadar
		17	License: GPL V3+
		18
		19	This program is free software: you can redistribute it and/or modify
		20	it under the terms of the GNU General Public License as published by
		21	the Free Software Foundation, either version 3 of the License, or
		22	(at your option) any later version.
		23
		24	This program is distributed in the hope that it will be useful,
		25	but WITHOUT ANY WARRANTY; without even the implied warranty of
		26	MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
		27	GNU General Public License for more details.
		28
		29	You should have received a copy of the GNU General Public License
		30	along with this program. If not, see <http://www.gnu.org/licenses/>.
		31	"""
		32
		33	import sys, re
		34
		35	class data:
		36	"""
		37	Data read from NIST atomic weight tables
		38	"""
		39
		40	def __init__(self, A, Z, symbol, isotope_weight, weight):
		41	self.A = A
		42	self.Z = Z
		43	self.symbol = symbol
		44	self.isotope_weight = isotope_weight
		45	self.weight = weight
		46	return
		47
		48	def __str__(self):
		49	if self.weight is not None:
		50	result = "{symbol}: Z = {Z}, A = {A}, molar mass = {weight}".format(**self.__dict__)
		51	else:
		52	result = "{symbol}: Z = {Z}, A = {A}, isotope mass = {isotope_weight}".format(**self.__dict__)
		53	return result + " mean molar mass " + self.mean_molar_mass
		54
		55	@property
		56	def mean_molar_mass(self):
		57	"""
		58	Computes a "best mean" molar mass based on data found
		59	in NIST atomic weight tables
		60	@return a string, which can be converted to a float
		61	"""
		62	if self.weight:
		63	# ex: 1 H 1 1.00782503223(9) 0.999885(70) [1.00784,1.00811] m
		64	m = re.match(r"\[(\S+),(\S+)\]", self.weight)
		65	if m:
		66	# the molar mass is the mean value of two limits
		67	return str(0.5 * (float(m.group(1))+ float(m.group(2))))
		68	# ex: 2 He 3 3.0160293201(25) 0.00000134(3) 4.002602(2)
		69	m = re.match(r"(\S+)\(.*\)", self.weight)
		70	if m:
		71	# get rid of the additional numbers in parenthesis
		72	return m.group(1)
		73	# ex: 89 Ac 227 227.0277523(25) [227]
		74	m = re.match(r"\[(\S+)\]", self.weight)
		75	if m:
		76	# get rid of the square brackets
		77	return m.group(1)
		78	elif self.isotope_weight:
		79	# ex: 99 Es 252 252.082980(54)
		80	m = re.match(r"([.0-9]+)\(.*\)", self.isotope_weight)
		81	if m:
		82	# get rid of everything which is not a pure number at the end
		83	return m.group(1)
		84	return "ERROR: format not recognized"
		85
		86	pattern = re.compile(r"^(?P<Z>\d+)\s+(?P<symbol>[A-Za-z]+)\s+(?P<A>\d+)\s+(?P<isotope_weight>\S+)(\s+(\S+))?(\s+(?P<weight>\S+))?.*")
		87
		88	def parse(infile):
		89	"""
		90	Parse data from NIST atomic weight table, available at
		91	https://physics.nist.gov/cgi-bin/Compositions/stand_alone.pl?ele=&all=all&ascii=ascii&isotype=some
		92	@param infile an open text file
		93	@return a dictionary Z value => `data` instance
		94	"""
		95	result = {}
		96	# the molar weight is always written after column 46, for
		97	# lines which match `pattern`.
		98	WEIGHT_COLUMN = 46
		99	for l in infile.readlines():
		100	l = l.rstrip() # remove spaces on the right
		101	m = pattern.match(l)
		102	if m :
		103	if len(l) > WEIGHT_COLUMN:
		104	weight = re.match(r"\s*(\S+)", l[WEIGHT_COLUMN:]).group(1)
		105	else:
		106	weight = None
		107	d = data(
		108	m.group("A"), m.group("Z"), m.group("symbol"),
		109	m.group("isotope_weight"), weight)
		110	result[int(m.group("Z"))] = d
		111	return result
		112
		113	def rewrite_mendeleiev(nist_data):
		114	"""
		115	Create a C++ source from file mendeleiv.cc.in, by replacing previous
		116	molar mass information with data provided by nist_data
		117	@param nist_data a dictionary Z value => `data` instance
		118	@return a string featuring a valid C++ source
		119	"""
		120	result = ""
		121	struct_pattern = re.compile(r"\{\{(?P<name>N_[^,]+),\s\"(?P<symbol>[A-Za-z]+)\",\s\"(?P<Z>\d+)\",(?P<remainder>[^\}]*)\}\},")
		122	with open("mendeleiev.cc.in") as infile:
		123	for l in infile.readlines():
		124	m = struct_pattern.match(l.strip())
		125	if m:
		126	print(m.group("name"), m.group("symbol"), m.group("Z"), m.group("remainder"))
		127	return result
		128
		129	if __name__ == "__main__":
		130	if len(sys.argv) > 1:
		131	fname = sys.argv[1]
		132	else:
		133	fname = "NIST-data.txt"
		134	with open(fname) as infile:
		135	nist_data = parse(infile)
		136	for Z in sorted(nist_data.keys()):
		137	print(nist_data[Z])
		138	rewrite_mendeleiev(nist_data)

Subversion Repositories wimsdev

(root)/trunk/wims/src/Misc/chemeq/src/extract-nist-data.py – Rev 17748