Details | Last modification | View Log | RSS feed
Rev | Author | Line No. | Line |
---|---|---|---|
17748 | georgesk | 1 | #! /usr/bin/python3 |
2 | |||
3 | """ |
||
4 | This utility is part of chemeq's source package. It is used to |
||
5 | extract data from NIST atomic weight tables. |
||
6 | |||
7 | Chemeq is a basic standalone filter written in C language, |
||
8 | flex and bison. It inputs strings like: |
||
9 | |||
10 | `2H2 + O2 ---> 2 H2O` |
||
11 | |||
12 | and can outputs LaTeX code and messages about the equilibrium of a |
||
13 | chemical reaction. |
||
14 | ---------------------------------------- |
||
15 | |||
16 | Copyright (c) 2023 Georges Khaznadar |
||
17 | License: GPL V3+ |
||
18 | |||
19 | This program is free software: you can redistribute it and/or modify |
||
20 | it under the terms of the GNU General Public License as published by |
||
21 | the Free Software Foundation, either version 3 of the License, or |
||
22 | (at your option) any later version. |
||
23 | |||
24 | This program is distributed in the hope that it will be useful, |
||
25 | but WITHOUT ANY WARRANTY; without even the implied warranty of |
||
26 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
||
27 | GNU General Public License for more details. |
||
28 | |||
29 | You should have received a copy of the GNU General Public License |
||
30 | along with this program. If not, see <http://www.gnu.org/licenses/>. |
||
31 | """ |
||
32 | |||
33 | import sys, re |
||
34 | |||
35 | class data: |
||
36 | """ |
||
37 | Data read from NIST atomic weight tables |
||
38 | """ |
||
39 | |||
40 | def __init__(self, A, Z, symbol, isotope_weight, weight): |
||
41 | self.A = A |
||
42 | self.Z = Z |
||
43 | self.symbol = symbol |
||
44 | self.isotope_weight = isotope_weight |
||
45 | self.weight = weight |
||
46 | return |
||
47 | |||
48 | def __str__(self): |
||
49 | if self.weight is not None: |
||
50 | result = "{symbol}: Z = {Z}, A = {A}, molar mass = {weight}".format(**self.__dict__) |
||
51 | else: |
||
52 | result = "{symbol}: Z = {Z}, A = {A}, isotope mass = {isotope_weight}".format(**self.__dict__) |
||
53 | return result + " mean molar mass " + self.mean_molar_mass |
||
54 | |||
55 | @property |
||
56 | def mean_molar_mass(self): |
||
57 | """ |
||
58 | Computes a "best mean" molar mass based on data found |
||
59 | in NIST atomic weight tables |
||
60 | @return a string, which can be converted to a float |
||
61 | """ |
||
62 | if self.weight: |
||
63 | # ex: 1 H 1 1.00782503223(9) 0.999885(70) [1.00784,1.00811] m |
||
64 | m = re.match(r"\[(\S+),(\S+)\]", self.weight) |
||
65 | if m: |
||
66 | # the molar mass is the mean value of two limits |
||
67 | return str(0.5 * (float(m.group(1))+ float(m.group(2)))) |
||
68 | # ex: 2 He 3 3.0160293201(25) 0.00000134(3) 4.002602(2) |
||
69 | m = re.match(r"(\S+)\(.*\)", self.weight) |
||
70 | if m: |
||
71 | # get rid of the additional numbers in parenthesis |
||
72 | return m.group(1) |
||
73 | # ex: 89 Ac 227 227.0277523(25) [227] |
||
74 | m = re.match(r"\[(\S+)\]", self.weight) |
||
75 | if m: |
||
76 | # get rid of the square brackets |
||
77 | return m.group(1) |
||
78 | elif self.isotope_weight: |
||
79 | # ex: 99 Es 252 252.082980(54) |
||
80 | m = re.match(r"([.0-9]+)\(.*\)", self.isotope_weight) |
||
81 | if m: |
||
82 | # get rid of everything which is not a pure number at the end |
||
83 | return m.group(1) |
||
84 | return "ERROR: format not recognized" |
||
85 | |||
86 | pattern = re.compile(r"^(?P<Z>\d+)\s+(?P<symbol>[A-Za-z]+)\s+(?P<A>\d+)\s+(?P<isotope_weight>\S+)(\s+(\S+))?(\s+(?P<weight>\S+))?.*") |
||
87 | |||
88 | def parse(infile): |
||
89 | """ |
||
90 | Parse data from NIST atomic weight table, available at |
||
91 | https://physics.nist.gov/cgi-bin/Compositions/stand_alone.pl?ele=&all=all&ascii=ascii&isotype=some |
||
92 | @param infile an open text file |
||
93 | @return a dictionary Z value => `data` instance |
||
94 | """ |
||
95 | result = {} |
||
96 | # the molar weight is always written after column 46, for |
||
97 | # lines which match `pattern`. |
||
98 | WEIGHT_COLUMN = 46 |
||
99 | for l in infile.readlines(): |
||
100 | l = l.rstrip() # remove spaces on the right |
||
101 | m = pattern.match(l) |
||
102 | if m : |
||
103 | if len(l) > WEIGHT_COLUMN: |
||
104 | weight = re.match(r"\s*(\S+)", l[WEIGHT_COLUMN:]).group(1) |
||
105 | else: |
||
106 | weight = None |
||
107 | d = data( |
||
108 | m.group("A"), m.group("Z"), m.group("symbol"), |
||
109 | m.group("isotope_weight"), weight) |
||
110 | result[int(m.group("Z"))] = d |
||
111 | return result |
||
112 | |||
113 | def rewrite_mendeleiev(nist_data): |
||
114 | """ |
||
115 | Create a C++ source from file mendeleiv.cc.in, by replacing previous |
||
116 | molar mass information with data provided by nist_data |
||
117 | @param nist_data a dictionary Z value => `data` instance |
||
118 | @return a string featuring a valid C++ source |
||
119 | """ |
||
120 | result = "" |
||
121 | struct_pattern = re.compile(r"\{\{(?P<name>N_[^,]+),\s*\"(?P<symbol>[A-Za-z]+)\",\s*\"(?P<Z>\d+)\",(?P<remainder>[^\}]*)\}\},") |
||
122 | with open("mendeleiev.cc.in") as infile: |
||
123 | for l in infile.readlines(): |
||
124 | m = struct_pattern.match(l.strip()) |
||
125 | if m: |
||
126 | print(m.group("name"), m.group("symbol"), m.group("Z"), m.group("remainder")) |
||
127 | return result |
||
128 | |||
129 | if __name__ == "__main__": |
||
130 | if len(sys.argv) > 1: |
||
131 | fname = sys.argv[1] |
||
132 | else: |
||
133 | fname = "NIST-data.txt" |
||
134 | with open(fname) as infile: |
||
135 | nist_data = parse(infile) |
||
136 | for Z in sorted(nist_data.keys()): |
||
137 | print(nist_data[Z]) |
||
138 | rewrite_mendeleiev(nist_data) |