Rev 3718 | Rev 8094 | Go to most recent revision | Show entire file | Ignore whitespace | Details | Blame | Last modification | View Log | RSS feed
Rev 3718 | Rev 7676 | ||
---|---|---|---|
Line 13... | Line 13... | ||
13 | * You should have received a copy of the GNU General Public License |
13 | * You should have received a copy of the GNU General Public License |
14 | * along with this program; if not, write to the Free Software |
14 | * along with this program; if not, write to the Free Software |
15 | * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. |
15 | * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. |
16 | */ |
16 | */ |
17 | 17 | ||
18 |
|
18 | /* Check type of a file */ |
19 | 19 | ||
20 | /*************** Customization: change values hereafter ****************/ |
20 | /*************** Customization: change values hereafter ****************/ |
21 | 21 | ||
22 |
|
22 | /* limit of data buffers */ |
23 | #define buflim 1024*1024*16 |
23 | #define buflim 1024*1024*16 |
24 | 24 | ||
25 | /***************** Nothing should need change hereafter *****************/ |
25 | /***************** Nothing should need change hereafter *****************/ |
26 | 26 | ||
27 | #include "../wims.h" |
27 | #include "../wims.h" |
Line 36... | Line 36... | ||
36 | FILE *outf; |
36 | FILE *outf; |
37 | 37 | ||
38 | struct { |
38 | struct { |
39 | char *name, *trans; |
39 | char *name, *trans; |
40 | } backtrans[]={ |
40 | } backtrans[]={ |
41 | {"\\ge\\", |
41 | {"\\ge\\", " >= "}, |
42 | {"\\geq\\", |
42 | {"\\geq\\", " >= "}, |
43 | {"\\le\\", |
43 | {"\\le\\", " <= "}, |
44 | {"\\leq\\", |
44 | {"\\leq\\", " <= "}, |
45 | {"\\to\\", |
45 | {"\\to\\", " -> "}, |
46 | {"\\rightarrow\\", |
46 | {"\\rightarrow\\", " -> "}, |
47 | {"\\longrightarrow\\", " --> "}, |
47 | {"\\longrightarrow\\", " --> "}, |
48 | {"\\Rightarrow\\", |
48 | {"\\Rightarrow\\", " => "}, |
49 | {"\\Longrightarrow\\", " ==> "}, |
49 | {"\\Longrightarrow\\", " ==> "}, |
50 | {"\\Leftrightarrow\\", " <=> "}, |
50 | {"\\Leftrightarrow\\", " <=> "}, |
51 | {"\\Longleftrightarrow\\", " <==> "}, |
51 | {"\\Longleftrightarrow\\", " <==> "}, |
52 | {"\\Longleftarrow\\", " <== "}, |
52 | {"\\Longleftarrow\\", " <== "}, |
53 | }; |
53 | }; |
Line 60... | Line 60... | ||
60 | p=malloc(n); |
60 | p=malloc(n); |
61 | if(p==NULL) exit(1); |
61 | if(p==NULL) exit(1); |
62 | return p; |
62 | return p; |
63 | } |
63 | } |
64 | 64 | ||
65 |
|
65 | /* Points to the end of the word */ |
66 | char *find_word_end(char *p) |
66 | char *find_word_end(char *p) |
67 | { |
67 | { |
68 | int i; |
68 | int i; |
69 | for(i=0;!isspace(*p) && *p!=0 && i<MAX_LINELEN; p++,i++); |
69 | for(i=0;!isspace(*p) && *p!=0 && i<MAX_LINELEN; p++,i++); |
70 | return p; |
70 | return p; |
71 | } |
71 | } |
72 | 72 | ||
73 |
|
73 | /* Strips leading spaces */ |
74 | char *find_word_start(char *p) |
74 | char *find_word_start(char *p) |
75 | { |
75 | { |
76 | int i; |
76 | int i; |
77 | for(i=0; isspace(*p) && i<MAX_LINELEN; p++,i++); |
77 | for(i=0; isspace(*p) && i<MAX_LINELEN; p++,i++); |
78 | return p; |
78 | return p; |
Line 81... | Line 81... | ||
81 | char *find_tag_end(char *p) |
81 | char *find_tag_end(char *p) |
82 | { |
82 | { |
83 | char *pp, *old; |
83 | char *pp, *old; |
84 | pp=p; if(*pp=='<') pp++; |
84 | pp=p; if(*pp=='<') pp++; |
85 | for(; *pp && *pp!='>'; pp++) { |
85 | for(; *pp && *pp!='>'; pp++) { |
86 |
|
86 | if(*pp=='"') { |
87 |
|
87 | pp=strchr(pp+1,'"'); |
88 |
|
88 | if(pp==NULL) {pp=p+strlen(p); break;} else continue; |
89 |
|
89 | } |
90 | } |
90 | } |
91 |
|
91 | /* this is probably an syntax error of the page */ |
92 | if(*pp==0 && pp>p+2048) { |
92 | if(*pp==0 && pp>p+2048) { |
93 |
|
93 | old=p; if(*old=='<') old++; |
94 |
|
94 | pp=strchr(old,'>'); |
95 |
|
95 | if(pp==NULL) pp=strchr(old,'<'); |
96 |
|
96 | if(pp==NULL) pp=find_word_end(find_word_start(old)); |
97 | } |
97 | } |
98 | if(*pp=='>') pp++; return pp; |
98 | if(*pp=='>') pp++; return pp; |
99 | } |
99 | } |
100 | 100 | ||
101 | char *find_tag(char *p, char *tag) |
101 | char *find_tag(char *p, char *tag) |
102 | { |
102 | { |
103 | char *pp; |
103 | char *pp; |
104 | int len; |
104 | int len; |
105 | len=strlen(tag); |
105 | len=strlen(tag); |
106 | for(pp=strchr(p,'<'); pp!=NULL && *pp; pp=strchr(pp+1,'<')) { |
106 | for(pp=strchr(p,'<'); pp!=NULL && *pp; pp=strchr(pp+1,'<')) { |
107 |
|
107 | if(strncasecmp(pp+1,tag,len)==0 && !isalnum(*(pp+1+len))) return pp; |
108 | } |
108 | } |
109 | return p+strlen(p); |
109 | return p+strlen(p); |
110 | } |
110 | } |
111 | 111 | ||
112 |
|
112 | /* modify a string. Bufferlen must be ast least MAX_LINELEN */ |
113 | void string_modify(char *start, char *bad_beg, char *bad_end, char *good,...) |
113 | void string_modify(char *start, char *bad_beg, char *bad_end, char *good,...) |
114 | { |
114 | { |
115 | char buf[MAX_LINELEN+1]; |
115 | char buf[MAX_LINELEN+1]; |
116 | va_list vp; |
116 | va_list vp; |
117 | 117 | ||
118 | va_start(vp,good); |
118 | va_start(vp,good); |
119 | vsnprintf(buf,sizeof(buf),good,vp); va_end(vp); |
119 | vsnprintf(buf,sizeof(buf),good,vp); va_end(vp); |
120 | if(strlen(start)-(bad_end-bad_beg)+strlen(buf)>=MAX_LINELEN) { |
120 | if(strlen(start)-(bad_end-bad_beg)+strlen(buf)>=MAX_LINELEN) { |
121 |
|
121 | return; |
122 | } |
122 | } |
123 | strcat(buf,bad_end); |
123 | strcat(buf,bad_end); |
124 | ovlstrcpy(bad_beg,buf); |
124 | ovlstrcpy(bad_beg,buf); |
125 | } |
125 | } |
126 | 126 | ||
127 | void cutamp(char *p) |
127 | void cutamp(char *p) |
128 | { |
128 | { |
129 | char *pp; |
129 | char *pp; |
130 | for(pp=strchr(p,'&'); pp; pp=strchr(pp+1,'&')) { |
130 | for(pp=strchr(p,'&'); pp; pp=strchr(pp+1,'&')) { |
131 |
|
131 | if(strncmp(pp,"&",5)==0) { |
132 |
|
132 | ovlstrcpy(pp+1,pp+5); continue; |
133 |
|
133 | } |
134 |
|
134 | if(strncmp(pp,"<",4)==0) { |
135 |
|
135 | *pp='<'; ovlstrcpy(pp+1,pp+4); continue; |
136 |
|
136 | } |
137 |
|
137 | if(strncmp(pp,">",4)==0) { |
138 |
|
138 | *pp='>'; ovlstrcpy(pp+1,pp+4); continue; |
139 |
|
139 | } |
140 | 140 | ||
141 | } |
141 | } |
142 | } |
142 | } |
143 | 143 | ||
144 |
|
144 | /* get the file */ |
145 | void prepare_file(void) |
145 | void prepare_file(void) |
146 | { |
146 | { |
147 | FILE *f; |
147 | FILE *f; |
148 | long int flen; |
148 | long int flen; |
149 | 149 | ||
Line 164... | Line 164... | ||
164 | char *p1, *p2, buf[256]; |
164 | char *p1, *p2, buf[256]; |
165 | 165 | ||
166 | mathbuf[0]=0; |
166 | mathbuf[0]=0; |
167 | pt=find_word_start(p); |
167 | pt=find_word_start(p); |
168 | if(strncmp(pt,"\\begin{displaymath}", |
168 | if(strncmp(pt,"\\begin{displaymath}", |
169 |
|
169 | strlen("\\begin{displaymath}"))==0) { |
170 |
|
170 | pt=strchr(pt,'}')+1; |
171 |
|
171 | pv=strstr(pt,"\\end{displaymath}"); |
172 |
|
172 | if(pv==NULL) return; |
173 |
|
173 | goto insmath; |
174 | } |
174 | } |
175 | if(*pt=='%') pt=strchr(pt,'$'); if(pt==NULL) return; |
175 | if(*pt=='%') pt=strchr(pt,'$'); if(pt==NULL) return; |
176 | if(*pt!='$') return; do pt++; while(*pt=='$'); |
176 | if(*pt!='$') return; do pt++; while(*pt=='$'); |
177 | pv=strchr(pt,'$'); if(pv==NULL) return; |
177 | pv=strchr(pt,'$'); if(pv==NULL) return; |
178 | insmath: if(pv-pt>=MAX_LINELEN-256) return; |
178 | insmath: if(pv-pt>=MAX_LINELEN-256) return; |
179 | memmove(mathbuf,pt,pv-pt); mathbuf[pv-pt]=0; |
179 | memmove(mathbuf,pt,pv-pt); mathbuf[pv-pt]=0; |
180 | if(strstr(mathbuf,"...\n...")!=NULL) { |
180 | if(strstr(mathbuf,"...\n...")!=NULL) { |
181 |
|
181 | ovlstrcpy(mathbuf,"......"); return; |
182 | } |
182 | } |
183 | cutamp(mathbuf); latex2html=1; |
183 | cutamp(mathbuf); latex2html=1; |
184 | for(p1=strstr(mathbuf,"\\mathbb");p1;p1=strstr(p1+1,"\\mathbb")) { |
184 | for(p1=strstr(mathbuf,"\\mathbb");p1;p1=strstr(p1+1,"\\mathbb")) { |
185 |
|
185 | char c,*d; |
186 |
|
186 | p2=find_word_start(p1+strlen("\\mathbb")); c=0; |
187 |
|
187 | if(strchr("NZQRC",*p2)!=NULL) c=*p2; |
188 |
|
188 | else if(*p2=='{' && *(p2+2)=='}' && strchr("NZQRC",*(p2+1))!=NULL) { |
189 |
|
189 | c=*(p2+1); p2+=2; |
190 |
|
190 | } |
191 |
|
191 | if(c) { |
192 |
|
192 | p2=find_word_start(++p2); |
193 |
|
193 | if(isalnum(*p2)) d=" "; else d=""; |
194 |
|
194 | string_modify(mathbuf,p1,p2,"\\%c%c%s",c,c,d); |
195 |
|
195 | } |
196 | } |
196 | } |
197 | for(p1=strstr(mathbuf,"{\\"); p1; p1=strstr(p1+1,"{\\")) { |
197 | for(p1=strstr(mathbuf,"{\\"); p1; p1=strstr(p1+1,"{\\")) { |
198 |
|
198 | if(p1>mathbuf && isalpha(*(p1-1))) continue; |
199 |
|
199 | for(p2=p1+2; p2<p1+24 && isalpha(*p2); p2++); |
200 |
|
200 | if(*p2!='}' || isalnum(*(p2+1))) continue; |
201 |
|
201 | memmove(buf,p1+1,p2-p1-1); buf[p2-p1-1]='\\'; buf[p2-p1]=0; |
202 |
|
202 | if(strstr(hmsame,buf)==NULL) continue; |
203 |
|
203 | ovlstrcpy(p2,p2+1); ovlstrcpy(p1,p1+1); |
204 | } |
204 | } |
205 | if(strstr(mathbuf,"\\begin{")!=NULL) return; |
205 | if(strstr(mathbuf,"\\begin{")!=NULL) return; |
206 | for(p1=strchr(mathbuf,'{'); p1; p1=strchr(p1+1,'{')) { |
206 | for(p1=strchr(mathbuf,'{'); p1; p1=strchr(p1+1,'{')) { |
207 |
|
207 | if((p1>mathbuf && isalpha(*(p1-1))) || |
208 |
|
208 | !isalnum(*(p1+1)) || *(p1+2)!='}') continue; |
209 |
|
209 | *p1=*(p1+1); ovlstrcpy(p1+1,p1+3); |
210 | } |
210 | } |
211 | if(strchr(mathbuf,'[')!=NULL) { |
211 | if(strchr(mathbuf,'[')!=NULL) { |
212 | char mbuf[MAX_LINELEN+1]; |
212 | char mbuf[MAX_LINELEN+1]; |
213 |
|
213 | snprintf(mbuf,sizeof(mbuf),"{%s}",mathbuf); |
214 |
|
214 | ovlstrcpy(mathbuf,mbuf); |
215 | } |
215 | } |
216 |
|
216 | /* try to simplify */ |
217 | if(strchr(mathbuf,'{')==NULL && strchr(mathbuf,'\\')!=NULL) { |
217 | if(strchr(mathbuf,'{')==NULL && strchr(mathbuf,'\\')!=NULL) { |
218 |
|
218 | int i, tt; |
219 |
|
219 | tt=0; |
220 |
|
220 | for(p1=strchr(mathbuf,'\\'); p1; p1=strchr(p1+1,'\\')) { |
221 |
|
221 | for(p2=p1+1;isalpha(*p2);p2++); |
222 |
|
222 | if(p2==p1+1 || p2>p1+24) {tt=1; break;} |
223 |
|
223 | memmove(buf,p1,p2-p1);buf[p2-p1]='\\';buf[p2-p1+1]=0; |
224 |
|
224 | for(i=0;i<backtransno && strcmp(buf,backtrans[i].name)!=0;i++); |
225 |
|
225 | if(i>=backtransno && strstr(hmsame,buf)==NULL) { |
226 |
|
226 | tt=1; break; |
227 |
|
227 | } |
228 |
|
228 | } |
229 |
|
229 | if(tt==0) { |
230 |
|
230 | for(p1=strchr(mathbuf,'\\'); p1; p1=strchr(p1+1,'\\')) { |
231 |
|
231 | for(p2=p1+1;isalpha(*p2);p2++); |
232 |
|
232 | if(p2==p1+1 || p2>p1+24) break; |
233 |
|
233 | memmove(buf,p1,p2-p1);buf[p2-p1]='\\';buf[p2-p1+1]=0; |
234 |
|
234 | for(i=0;i<backtransno && strcmp(buf,backtrans[i].name)!=0;i++); |
235 |
|
235 | if(i<backtransno) |
236 |
|
236 | string_modify(buf,p1,p2,backtrans[i].trans); |
237 |
|
237 | else *p1=' '; |
238 |
|
238 | } |
239 |
|
239 | } |
240 | } |
240 | } |
241 | } |
241 | } |
242 | 242 | ||
243 | void output(void) |
243 | void output(void) |
244 | { |
244 | { |
245 | char *p, *pp, *p2, *pt; |
245 | char *p, *pp, *p2, *pt; |
246 | char buf[MAX_LINELEN+1]; |
246 | char buf[MAX_LINELEN+1]; |
247 | p=filebuf; |
247 | p=filebuf; |
248 | restart: |
248 | restart: |
249 | pp=find_tag(p,"body"); if(*pp!=0) { |
249 | pp=find_tag(p,"body"); if(*pp!=0) { |
250 |
|
250 | p=find_tag_end(pp); goto restart; |
251 | } |
251 | } |
252 | pp=find_tag(p,"html"); if(*pp!=0) { |
252 | pp=find_tag(p,"html"); if(*pp!=0) { |
253 |
|
253 | p=find_tag_end(pp); goto restart; |
254 | } |
254 | } |
255 | *find_tag(p,"/body")=0; *find_tag(p,"/html")=0; |
255 | *find_tag(p,"/body")=0; *find_tag(p,"/html")=0; |
256 | for(pp=strstr(p,"\n\n"); pp; pp=strstr(pp+1,"\n\n")) *pp=' '; |
256 | for(pp=strstr(p,"\n\n"); pp; pp=strstr(pp+1,"\n\n")) *pp=' '; |
257 | for(pp=strchr(p,'<');pp!=NULL;pp=strchr(find_tag_end(pp),'<')) { |
257 | for(pp=strchr(p,'<');pp!=NULL;pp=strchr(find_tag_end(pp),'<')) { |
258 |
|
258 | if(pp>p) {fwrite(p,1,pp-p,outf); p=pp;} |
259 |
|
259 | if(latex2html && strncasecmp(pp,"<br><hr>",8)==0 && |
260 |
|
260 | *find_word_start(pp+8)==0) break; |
261 |
|
261 | if(strncasecmp(pp+1,"!-- MATH",8)==0) { |
262 |
|
262 | p2=strstr(pp+8,"-->"); if(p2==NULL) continue; |
263 |
|
263 | *p2=0; getmath(pp+9); *p2='-'; |
264 |
|
264 | p=p2+3; pt=find_word_start(p); |
265 |
|
265 | if(mathbuf[0] && strncasecmp(pt,"<IMG",4)==0 && isspace(*(pt+4))) { |
266 |
|
266 | p=find_tag_end(pt); pp=pt; |
267 |
|
267 | fprintf(outf,"\\(%s\\)",mathbuf); |
268 |
|
268 | } |
269 |
|
269 | continue; |
270 |
|
270 | } |
271 |
|
271 | if(strncasecmp(pp+1,"a",1)==0 && isspace(*(pp+2))) { |
272 | 272 | ||
273 | 273 | ||
274 | 274 | ||
275 |
|
275 | continue; |
276 |
|
276 | } |
277 |
|
277 | if(strncasecmp(pp+1,"img",3)==0 && isspace(*(pp+4))) { |
278 |
|
278 | p2=find_tag_end(pp); |
279 |
|
279 | if(p2-pp>=MAX_LINELEN-256) continue; |
280 |
|
280 | memmove(buf,pp+1,p2-pp-2); buf[p2-pp-2]=0; |
281 |
|
281 | pt=strstr(buf,"ALT=\""); if(pt==NULL) pt=strstr(buf,"alt=\""); |
282 |
|
282 | if(pt!=NULL) { |
283 |
|
283 | pt+=strlen("ALT=\""); |
284 |
|
284 | getmath(pt); if(mathbuf[0]) { |
285 |
|
285 | fprintf(outf,"\\(%s\\)",mathbuf); p=p2; |
286 |
|
286 | } |
287 |
|
287 | } |
288 |
|
288 | } |
289 | } |
289 | } |
290 | if(pp==NULL) fprintf(outf,"%s",p); |
290 | if(pp==NULL) fprintf(outf,"%s",p); |
291 | } |
291 | } |
292 | 292 | ||
293 | int main(int argc, char *argv[]) |
293 | int main(int argc, char *argv[]) |
Line 301... | Line 301... | ||
301 | if(p==NULL || *p==0) return 1; |
301 | if(p==NULL || *p==0) return 1; |
302 | p=find_word_start(p); pp=find_word_end(p); |
302 | p=find_word_start(p); pp=find_word_end(p); |
303 | if(pp<=p || pp-p>sizeof(fn1)-1) return 1; |
303 | if(pp<=p || pp-p>sizeof(fn1)-1) return 1; |
304 | memmove(fn1,p,pp-p); fn1[pp-p]=0; |
304 | memmove(fn1,p,pp-p); fn1[pp-p]=0; |
305 | p=find_word_start(pp); pp=find_word_end(p); |
305 | p=find_word_start(pp); pp=find_word_end(p); |
306 | if(pp<=p || pp-p>sizeof(fn2)-1) ovlstrcpy(fn2,fn1); |
306 | if(pp<=p || pp-p>sizeof(fn2)-1) ovlstrcpy(fn2,fn1); |
307 | else {memmove(fn2,p,pp-p); fn2[pp-p]=0;} |
307 | else {memmove(fn2,p,pp-p); fn2[pp-p]=0;} |
308 | prepare_file(); |
308 | prepare_file(); |
309 | output(); |
309 | output(); |
310 | fclose(outf); |
310 | fclose(outf); |
311 | return 0; |
311 | return 0; |
312 | } |
312 | } |
313 | - |