Rev 7246 | Details | Compare with Previous | Last modification | View Log | RSS feed
Rev | Author | Line No. | Line |
---|---|---|---|
3662 | schaersvoo | 1 | /* |
2 | Sketch Elements: Chemistry molecular diagram drawing tool. |
||
3 | |||
4 | (c) 2005 Dr. Alex M. Clark |
||
5 | |||
6 | Released as GNUware, under the Gnu Public License (GPL) |
||
7 | |||
8 | See www.gnu.org for details. |
||
9 | */ |
||
10 | |||
11 | package WIMSchem; |
||
12 | |||
13 | import java.util.*; |
||
14 | import java.io.*; |
||
15 | import java.text.*; |
||
16 | import java.lang.*; |
||
17 | import java.nio.channels.*; |
||
18 | |||
19 | /* |
||
20 | Handles reading and writing of molecules to and from streams. Two formats are supported: native, which is a direct translation of |
||
21 | the underlying data content; and a subset of MDL MOL, using only the fields that are relevant to WIMSchem. |
||
22 | */ |
||
23 | |||
7246 | schaersvoo | 24 | public class MoleculeStream |
25 | { |
||
3662 | schaersvoo | 26 | // special implementation of the reader for when the format is not known a-priori, or might be a combination-of-two formats |
27 | // as used by the clipboard; do some extra work to try to pull out the WIMSchem file preferentially |
||
7246 | schaersvoo | 28 | public static Molecule readUnknown(InputStream istr) throws IOException |
3662 | schaersvoo | 29 | { |
7246 | schaersvoo | 30 | return readUnknown(new BufferedReader(new InputStreamReader(istr))); |
3662 | schaersvoo | 31 | } |
32 | |||
7246 | schaersvoo | 33 | public static Molecule readUnknown(BufferedReader in) throws IOException |
3662 | schaersvoo | 34 | { |
35 | Molecule mdlmol=null,elmol=null; |
||
36 | final int BUFFMAX=100000; |
||
37 | in.mark(BUFFMAX); |
||
38 | try |
||
39 | { |
||
7246 | schaersvoo | 40 | mdlmol=readMDLMOL(in); |
3662 | schaersvoo | 41 | if (mdlmol!=null) in.mark(BUFFMAX); // so the WIMSchem version could follow |
42 | } |
||
43 | catch (IOException e) |
||
44 | { |
||
45 | mdlmol=null; |
||
46 | in.reset(); |
||
47 | } |
||
48 | |||
49 | try |
||
50 | { |
||
7246 | schaersvoo | 51 | elmol=readNative(in); |
3662 | schaersvoo | 52 | } |
53 | catch (IOException e) {elmol=null;} |
||
54 | |||
55 | if (elmol!=null) return elmol; |
||
56 | if (mdlmol!=null) return mdlmol; |
||
57 | |||
58 | throw new IOException("Unknown or invalid format."); |
||
59 | } |
||
60 | |||
7246 | schaersvoo | 61 | public static Molecule readNative(InputStream istr) throws IOException |
3662 | schaersvoo | 62 | { |
7246 | schaersvoo | 63 | return readNative(new BufferedReader(new InputStreamReader(istr))); |
3662 | schaersvoo | 64 | } |
7246 | schaersvoo | 65 | public static Molecule readNative(BufferedReader in) throws IOException |
3662 | schaersvoo | 66 | { |
67 | Molecule mol=new Molecule(); |
||
68 | final String GENERIC_ERROR="Invalid WIMSchem file."; |
||
69 | |||
70 | try |
||
71 | { |
||
72 | String line=in.readLine(); |
||
7246 | schaersvoo | 73 | if (!line.startsWith("SketchEl!") && !line.startsWith("WIMSchem!")) throw new IOException("Not a WIMSchem file...could not find start tag \"SketchEl!\""); |
3662 | schaersvoo | 74 | int p1=line.indexOf('('),p2=line.indexOf(','),p3=line.indexOf(')'); |
75 | if (p1==0 || p2==0 || p3==0) throw new IOException(GENERIC_ERROR); |
||
76 | |||
77 | int numAtoms=Integer.parseInt(line.substring(p1+1,p2).trim()); |
||
78 | int numBonds=Integer.parseInt(line.substring(p2+1,p3).trim()); |
||
79 | for (int n=0;n<numAtoms;n++) |
||
7246 | schaersvoo | 80 | { |
3662 | schaersvoo | 81 | line=in.readLine(); |
82 | String[] bits=line.split("[\\=\\,\\;]"); |
||
7246 | schaersvoo | 83 | if (bits.length<5) throw new IOException("WIMSchem format error: to few arguments in atomsection line"+n); |
84 | int num=mol.addAtom(bits[0],Double.parseDouble(bits[1].trim()),Double.parseDouble(bits[2].trim()), |
||
3662 | schaersvoo | 85 | Integer.parseInt(bits[3].trim()),Integer.parseInt(bits[4].trim())); |
86 | for (int i=5;i<bits.length;i++) if (bits[i].length()>0) |
||
87 | { |
||
7246 | schaersvoo | 88 | if (bits[i].charAt(0)=='e') mol.setAtomHExplicit(num,Integer.parseInt(bits[i].substring(1))); |
89 | else if (bits[i].charAt(0)=='n') mol.setAtomMapNum(num,Integer.parseInt(bits[i].substring(1))); |
||
3662 | schaersvoo | 90 | } |
91 | } |
||
92 | for (int n=0;n<numBonds;n++) |
||
93 | { |
||
94 | line=in.readLine(); |
||
95 | String[] bits=line.split("[\\-\\=\\,]"); |
||
7246 | schaersvoo | 96 | if (bits.length<4) throw new IOException("WIMSchem fromat error : to few aguments in bondsection line "+n); |
97 | mol.addBond(Integer.parseInt(bits[0].trim()),Integer.parseInt(bits[1].trim()), |
||
3662 | schaersvoo | 98 | Integer.parseInt(bits[2].trim()),Integer.parseInt(bits[3].trim())); |
99 | } |
||
100 | line=in.readLine(); |
||
7246 | schaersvoo | 101 | if (line.compareTo("!End")!=0 && line.compareTo("!FIN")!=0) throw new IOException("could not find the end tag \"!End\""); |
3662 | schaersvoo | 102 | } |
103 | catch (Exception e) |
||
104 | { |
||
105 | throw new IOException(GENERIC_ERROR); |
||
106 | } |
||
107 | |||
108 | return mol; |
||
109 | } |
||
110 | |||
7246 | schaersvoo | 111 | public static void writeNative(OutputStream ostr,Molecule mol) throws IOException |
3662 | schaersvoo | 112 | { |
7246 | schaersvoo | 113 | writeNative(new BufferedWriter(new OutputStreamWriter(ostr)),mol); |
3662 | schaersvoo | 114 | } |
7246 | schaersvoo | 115 | public static void writeNative(BufferedWriter out,Molecule mol) throws IOException |
3662 | schaersvoo | 116 | { |
7292 | schaersvoo | 117 | DecimalFormat fmt = new DecimalFormat("0.0000",new DecimalFormatSymbols(Locale.US)); |
7246 | schaersvoo | 118 | |
119 | out.write("SketchEl!("+mol.numAtoms()+","+mol.numBonds()+")\n"); |
||
120 | for (int n=1;n<=mol.numAtoms();n++) |
||
3662 | schaersvoo | 121 | { |
7246 | schaersvoo | 122 | String hy=mol.atomHExplicit(n)!=Molecule.HEXPLICIT_UNKNOWN ? ("e"+mol.atomHExplicit(n)) : ("i"+mol.atomHydrogens(n)); |
123 | out.write(mol.atomElement(n)+"="+fmt.format(mol.atomX(n))+","+fmt.format(mol.atomY(n))+";"+ |
||
124 | mol.atomCharge(n)+","+mol.atomUnpaired(n)+","+hy); |
||
125 | if (mol.atomMapNum(n)>0) out.write(",n"+mol.atomMapNum(n)); |
||
3662 | schaersvoo | 126 | out.write("\n"); |
127 | } |
||
7246 | schaersvoo | 128 | for (int n=1;n<=mol.numBonds();n++) |
3662 | schaersvoo | 129 | { |
7246 | schaersvoo | 130 | out.write(mol.bondFrom(n)+"-"+mol.bondTo(n)+"="+mol.bondOrder(n)+","+mol.bondType(n)+"\n"); |
3662 | schaersvoo | 131 | } |
7246 | schaersvoo | 132 | out.write("!End\n"); |
3662 | schaersvoo | 133 | |
134 | out.flush(); |
||
135 | } |
||
136 | |||
7246 | schaersvoo | 137 | public static Molecule readMDLMOL(BufferedReader in) throws IOException |
3662 | schaersvoo | 138 | { |
139 | Molecule mol=new Molecule(); |
||
140 | final String GENERIC_ERROR="Invalid MDL MOL file."; |
||
141 | |||
142 | try |
||
143 | { |
||
144 | String line=null; |
||
145 | for (int n=0;n<4;n++) line=in.readLine(); |
||
146 | if (!line.substring(34,39).equals("V2000")) throw new IOException(GENERIC_ERROR); |
||
147 | int numAtoms=Integer.parseInt(line.substring(0,3).trim()); |
||
148 | int numBonds=Integer.parseInt(line.substring(3,6).trim()); |
||
149 | for (int n=0;n<numAtoms;n++) |
||
150 | { |
||
151 | line=in.readLine(); |
||
152 | double x=Double.parseDouble(line.substring(0,10).trim()); |
||
153 | double y=Double.parseDouble(line.substring(10,20).trim()); |
||
154 | String el=line.substring(31,34).trim(); |
||
155 | int chg=Integer.parseInt(line.substring(36,39).trim()),rad=0; |
||
156 | int mapnum=Integer.parseInt(line.substring(60,63).trim()); |
||
157 | if (chg<=3) {} |
||
158 | else if (chg==4) {chg=0; rad=2;} |
||
159 | else chg=4-chg; |
||
7246 | schaersvoo | 160 | mol.addAtom(el,x,y,chg,rad); |
161 | mol.setAtomMapNum(mol.numAtoms(),mapnum); |
||
3662 | schaersvoo | 162 | } |
163 | for (int n=0;n<numBonds;n++) |
||
164 | { |
||
165 | line=in.readLine(); |
||
166 | int from=Integer.parseInt(line.substring(0,3).trim()),to=Integer.parseInt(line.substring(3,6).trim()); |
||
167 | int type=Integer.parseInt(line.substring(6,9).trim()),stereo=Integer.parseInt(line.substring(9,12).trim()); |
||
168 | if (from==to || from<1 || from>numAtoms || to<1 || to>numAtoms) throw new IOException(GENERIC_ERROR); |
||
169 | int order=type>=1 && type<=3 ? type : 1; |
||
170 | int style=Molecule.BONDTYPE_NORMAL; |
||
171 | if (stereo==1) style=Molecule.BONDTYPE_INCLINED; |
||
172 | else if (stereo==6) style=Molecule.BONDTYPE_DECLINED; |
||
173 | // !! supposed to be for double bonds... else if (stereo==3 || stereo==4) style=Molecule.BONDTYPE_UNKNOWN; |
||
7246 | schaersvoo | 174 | mol.addBond(from,to,order,style); |
3662 | schaersvoo | 175 | } |
176 | while (true) |
||
177 | { |
||
178 | line=in.readLine(); |
||
179 | if (line.startsWith("M END")) break; |
||
180 | int type=0; |
||
181 | if (line.startsWith("M CHG")) type=1; |
||
182 | else if (line.startsWith("M RAD")) type=2; |
||
7246 | schaersvoo | 183 | else if (line.startsWith("M RGP")) type=3; |
184 | |||
3662 | schaersvoo | 185 | if (type>0) |
186 | { |
||
187 | int len=Integer.parseInt(line.substring(6,9).trim()); |
||
188 | for (int n=0;n<len;n++) |
||
189 | { |
||
190 | int apos=Integer.parseInt(line.substring(9+8*n,13+8*n).trim()); |
||
191 | int aval=Integer.parseInt(line.substring(13+8*n,17+8*n).trim()); |
||
7246 | schaersvoo | 192 | if (apos<1 || apos>mol.numAtoms()) continue; |
193 | |||
194 | if (type==1) mol.setAtomCharge(apos,aval); |
||
195 | else if (type==2) mol.setAtomUnpaired(apos,aval); |
||
196 | else if (type==3) mol.setAtomElement(apos,"R"+aval); |
||
3662 | schaersvoo | 197 | } |
198 | } |
||
199 | } |
||
200 | } |
||
7246 | schaersvoo | 201 | catch (Exception e) {throw new IOException(GENERIC_ERROR,e);} |
3662 | schaersvoo | 202 | |
203 | return mol; |
||
204 | } |
||
205 | |||
7246 | schaersvoo | 206 | public static void writeMDLMOL(OutputStream ostr,Molecule mol) throws IOException |
3662 | schaersvoo | 207 | { |
7246 | schaersvoo | 208 | writeMDLMOL(new BufferedWriter(new OutputStreamWriter(ostr)),mol); |
3662 | schaersvoo | 209 | } |
7246 | schaersvoo | 210 | public static void writeMDLMOL(BufferedWriter out,Molecule mol) throws IOException |
3662 | schaersvoo | 211 | { |
212 | DecimalFormat fmt=new DecimalFormat("0.0000",new DecimalFormatSymbols(Locale.US)); |
||
7246 | schaersvoo | 213 | |
3662 | schaersvoo | 214 | out.write("\nWIMSchem molfile\n\n"); |
7246 | schaersvoo | 215 | out.write(intrpad(mol.numAtoms(),3)+intrpad(mol.numBonds(),3)+" 0 0 0 0 0 0 0 0999 V2000\n"); |
3662 | schaersvoo | 216 | |
217 | String line; |
||
218 | |||
7246 | schaersvoo | 219 | int numRGroups=0,rgAtom[]=new int[mol.numAtoms()],rgNumber[]=new int[mol.numAtoms()]; |
220 | |||
221 | // export atoms, and make a few notes along the way |
||
222 | |||
223 | for (int n=1;n<=mol.numAtoms();n++) |
||
3662 | schaersvoo | 224 | { |
7246 | schaersvoo | 225 | String str=fmt.format(mol.atomX(n)); |
3662 | schaersvoo | 226 | line=rep(" ",10-str.length())+str; |
7246 | schaersvoo | 227 | str=fmt.format(mol.atomY(n)); |
3662 | schaersvoo | 228 | line+=rep(" ",10-str.length())+str; |
229 | line+=" 0.0000 "; |
||
7246 | schaersvoo | 230 | |
231 | str=mol.atomElement(n); |
||
232 | if (str.length()>1 && str.charAt(0)=='R' && str.charAt(1)>='0' && str.charAt(1)<='9') |
||
233 | { |
||
234 | rgAtom[numRGroups]=n; |
||
235 | rgNumber[numRGroups]=Util.safeInt(str.substring(1)); |
||
236 | numRGroups++; |
||
237 | str="R#"; |
||
238 | } |
||
3662 | schaersvoo | 239 | line+=str+rep(" ",4-str.length())+"0"; |
240 | |||
7246 | schaersvoo | 241 | int chg=mol.atomCharge(n),spin=mol.atomUnpaired(n),mapnum=mol.atomMapNum(n); |
3662 | schaersvoo | 242 | if (chg>=-3 && chg<=-1) chg=4-chg; |
243 | else if (chg==0 && spin==2) chg=4; |
||
244 | else if (chg<1 || chg>3) chg=0; |
||
245 | line+=intrpad(chg,3)+" 0 0 0 0 0 0 0"+intrpad(mapnum,3)+" 0 0"; |
||
246 | |||
247 | out.write(line+"\n"); |
||
248 | } |
||
249 | |||
7246 | schaersvoo | 250 | // export bonds |
251 | |||
252 | for (int n=1;n<=mol.numBonds();n++) |
||
3662 | schaersvoo | 253 | { |
7246 | schaersvoo | 254 | int type=mol.bondOrder(n); |
3662 | schaersvoo | 255 | if (type<1 || type>3) type=1; |
7246 | schaersvoo | 256 | int stereo=mol.bondType(n); |
3662 | schaersvoo | 257 | if (stereo==Molecule.BONDTYPE_NORMAL) {} |
258 | else if (stereo==Molecule.BONDTYPE_INCLINED) {stereo=1; type=1;} |
||
259 | else if (stereo==Molecule.BONDTYPE_DECLINED) {stereo=6; type=1;} |
||
260 | else if (stereo==Molecule.BONDTYPE_UNKNOWN) {stereo=4; type=1;} |
||
261 | else stereo=0; |
||
262 | |||
7246 | schaersvoo | 263 | out.write(intrpad(mol.bondFrom(n),3)+intrpad(mol.bondTo(n),3)+intrpad(type,3)+intrpad(stereo,3)+" 0 0 0\n"); |
3662 | schaersvoo | 264 | } |
265 | |||
7246 | schaersvoo | 266 | // export charges |
267 | |||
3662 | schaersvoo | 268 | int count=0; |
269 | line=""; |
||
7246 | schaersvoo | 270 | for (int n=1;n<=mol.numAtoms();n++) if (mol.atomCharge(n)!=0) |
3662 | schaersvoo | 271 | { |
7246 | schaersvoo | 272 | line+=intrpad(n,4)+intrpad(mol.atomCharge(n),4); |
3662 | schaersvoo | 273 | count++; |
274 | if (count==8) |
||
275 | { |
||
276 | out.write("M CHG"+intrpad(count,3)+line+"\n"); |
||
277 | count=0; line=""; |
||
278 | } |
||
279 | } |
||
280 | if (count>0) out.write("M CHG"+intrpad(count,3)+line+"\n"); |
||
281 | |||
7246 | schaersvoo | 282 | // export "unpaired" atom counts (aka radicals, sort of) |
283 | |||
3662 | schaersvoo | 284 | count=0; |
285 | line=""; |
||
7246 | schaersvoo | 286 | for (int n=1;n<=mol.numAtoms();n++) if (mol.atomUnpaired(n)!=0) |
3662 | schaersvoo | 287 | { |
7246 | schaersvoo | 288 | line+=intrpad(n,4)+intrpad(mol.atomUnpaired(n),4); |
3662 | schaersvoo | 289 | count++; |
290 | if (count==8) |
||
291 | { |
||
292 | out.write("M RAD"+intrpad(count,3)+line+"\n"); |
||
293 | count=0; line=""; |
||
294 | } |
||
295 | } |
||
296 | if (count>0) out.write("M RAD"+intrpad(count,3)+line+"\n"); |
||
297 | |||
7246 | schaersvoo | 298 | // export R-group identifiers |
299 | |||
300 | count=0; |
||
301 | line=""; |
||
302 | for (int n=0;n<numRGroups;n++) |
||
303 | { |
||
304 | line+=intrpad(rgAtom[n],4)+intrpad(rgNumber[n],4); |
||
305 | count++; |
||
306 | if (count==8) |
||
307 | { |
||
308 | out.write("M RGP"+intrpad(count,3)+line+"\n"); |
||
309 | count=0; line=""; |
||
310 | } |
||
311 | } |
||
312 | if (count>0) out.write("M RGP"+intrpad(count,3)+line+"\n"); |
||
313 | |||
3662 | schaersvoo | 314 | out.write("M END\n"); |
315 | out.flush(); |
||
316 | } |
||
317 | |||
7246 | schaersvoo | 318 | public static void writeCMLXML(OutputStream ostr,Molecule mol) throws IOException |
3662 | schaersvoo | 319 | { |
7246 | schaersvoo | 320 | writeCMLXML(new BufferedWriter(new OutputStreamWriter(ostr)),mol); |
3662 | schaersvoo | 321 | } |
7246 | schaersvoo | 322 | public static void writeCMLXML(BufferedWriter out,Molecule mol) throws IOException |
3662 | schaersvoo | 323 | { |
324 | out.write("<cml>\n"); |
||
325 | out.write(" <molecule>\n"); |
||
326 | |||
327 | out.write(" <atomArray>\n"); |
||
7246 | schaersvoo | 328 | for (int n=1;n<=mol.numAtoms();n++) |
3662 | schaersvoo | 329 | { |
7246 | schaersvoo | 330 | out.write(" <atom id=\"a"+n+"\" elementType=\""+mol.atomElement(n)+"\""+ |
331 | " x2=\""+mol.atomX(n)+"\" y2=\""+mol.atomY(n)+"\" hydrogenCount=\""+mol.atomHydrogens(n)+"\"/>\n"); |
||
3662 | schaersvoo | 332 | } |
333 | out.write(" </atomArray>\n"); |
||
334 | |||
335 | out.write(" <bondArray>\n"); |
||
7246 | schaersvoo | 336 | for (int n=1;n<=mol.numBonds();n++) |
3662 | schaersvoo | 337 | { |
7246 | schaersvoo | 338 | out.write(" <bond id=\"b"+n+"\" atomRefs2=\"a"+mol.bondFrom(n)+" a"+mol.bondTo(n)+"\" order=\""+mol.bondOrder(n)+"\"/>\n"); |
3662 | schaersvoo | 339 | } |
340 | out.write(" </bondArray>\n"); |
||
341 | |||
342 | out.write(" </molecule>\n"); |
||
343 | out.write("</cml>\n"); |
||
344 | out.flush(); |
||
345 | } |
||
346 | |||
347 | // examines the beginning of a file and decides whether it can be considered a database of structures which this class is capable |
||
348 | // of reading... |
||
349 | // (NB: currently this includes MDL SD-files, and nothing else) |
||
7246 | schaersvoo | 350 | static boolean examineIsDatabase(FileInputStream istr) throws IOException |
3662 | schaersvoo | 351 | { |
352 | long lastpos=istr.getChannel().position(); |
||
7246 | schaersvoo | 353 | boolean isdb=findNextPosition(istr,0)>=0; |
3662 | schaersvoo | 354 | istr.getChannel().position(lastpos); |
355 | return isdb; |
||
356 | } |
||
357 | |||
7246 | schaersvoo | 358 | static long findNextPosition(FileInputStream istr,long startpos) throws IOException |
3662 | schaersvoo | 359 | { |
360 | FileChannel fch=istr.getChannel(); |
||
361 | fch.position(startpos); |
||
362 | long pos=startpos,size=fch.size(),nextpos=-1; |
||
363 | |||
364 | String rec=""; |
||
365 | while (nextpos<size) |
||
366 | { |
||
367 | int inp=istr.read(); |
||
368 | pos++; |
||
369 | if (inp<0) break; |
||
370 | char ch=(char)inp; |
||
371 | if (ch=='\r') continue; |
||
372 | rec=rec.concat(String.valueOf(ch)); |
||
373 | if (rec.endsWith("$$$$\n")) {nextpos=pos; break;} |
||
374 | } |
||
375 | if (nextpos<0) return -1; |
||
376 | |||
377 | try |
||
378 | { |
||
379 | BufferedReader in=new BufferedReader(new StringReader(rec)); |
||
7246 | schaersvoo | 380 | Molecule mol=readMDLMOL(in); |
3662 | schaersvoo | 381 | if (mol==null) nextpos=-1; |
382 | } |
||
383 | catch (IOException e) {nextpos=-1;} |
||
384 | |||
385 | return nextpos; |
||
386 | } |
||
387 | |||
7246 | schaersvoo | 388 | static Molecule fetchFromPosition(FileInputStream istr,long pos) throws IOException |
3662 | schaersvoo | 389 | { |
390 | istr.getChannel().position(pos); |
||
7246 | schaersvoo | 391 | return readMDLMOL(new BufferedReader(new InputStreamReader(istr))); |
3662 | schaersvoo | 392 | } |
393 | |||
394 | // miscellaneous help |
||
395 | |||
396 | static String intrpad(int Val,int Len) |
||
397 | { |
||
398 | String str=Integer.toString(Val); |
||
399 | str=rep(" ",Len-str.length())+str; |
||
400 | if (str.length()>Len) str=str.substring(0,Len); |
||
401 | return str; |
||
402 | } |
||
403 | static String rep(String Ch,int Len) |
||
404 | { |
||
405 | if (Len<=0) return ""; |
||
406 | String str=Ch; |
||
407 | while (str.length()<Len) str=str+Ch; |
||
408 | return str; |
||
409 | } |
||
410 | } |