Rev 11124 | Details | Compare with Previous | Last modification | View Log | RSS feed
Rev | Author | Line No. | Line |
---|---|---|---|
10 | reyssat | 1 | /* Copyright (C) 1998-2003 XIAO, Gang of Universite de Nice - Sophia Antipolis |
2 | * |
||
3 | * This program is free software; you can redistribute it and/or modify |
||
4 | * it under the terms of the GNU General Public License as published by |
||
5 | * the Free Software Foundation; either version 2 of the License, or |
||
6 | * (at your option) any later version. |
||
7 | * |
||
8 | * This program is distributed in the hope that it will be useful, |
||
9 | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
||
10 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
||
11 | * GNU General Public License for more details. |
||
12 | * |
||
13 | * You should have received a copy of the GNU General Public License |
||
14 | * along with this program; if not, write to the Free Software |
||
15 | * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. |
||
16 | */ |
||
17 | |||
7676 | bpr | 18 | /* Check type of a file */ |
10 | reyssat | 19 | |
20 | /*************** Customization: change values hereafter ****************/ |
||
21 | |||
7676 | bpr | 22 | /* limit of data buffers */ |
10 | reyssat | 23 | #define buflim 1024*1024*16 |
24 | |||
25 | /***************** Nothing should need change hereafter *****************/ |
||
8185 | bpr | 26 | |
8094 | bpr | 27 | #include "../Lib/libwims.h" |
10 | reyssat | 28 | |
8185 | bpr | 29 | #define hmsame "\\CC\\Delta\\Gamma\\Lambda\\NN\\Omega\\Phi\\Pi\\Psi\\QQ\\RR\\Sigma\\Xi\\ZZ\\alpha\\beta\\cap\\chi\\cup\\delta\\div\\eta\\exists\\forall\\gamma\\in\\infty\\iota\\kappa\\lambda\\mu\\nabla\\nu\\omega\\pi\\pm\\psi\\rho\\sigma\\subset\\subseteq\\tau\\theta\\times\\varepsilon\\varphi\\xi\\zeta\\" |
30 | |||
10 | reyssat | 31 | char fn1[1024]="", fn2[1024]=""; |
32 | char mathbuf[MAX_LINELEN+1]; |
||
33 | char *filebuf; |
||
34 | int filelen=0; |
||
35 | int latex2html=0; |
||
36 | FILE *outf; |
||
37 | |||
38 | struct { |
||
39 | char *name, *trans; |
||
40 | } backtrans[]={ |
||
7676 | bpr | 41 | {"\\ge\\", " >= "}, |
42 | {"\\geq\\", " >= "}, |
||
43 | {"\\le\\", " <= "}, |
||
44 | {"\\leq\\", " <= "}, |
||
45 | {"\\to\\", " -> "}, |
||
46 | {"\\rightarrow\\", " -> "}, |
||
10 | reyssat | 47 | {"\\longrightarrow\\", " --> "}, |
7676 | bpr | 48 | {"\\Rightarrow\\", " => "}, |
10 | reyssat | 49 | {"\\Longrightarrow\\", " ==> "}, |
50 | {"\\Leftrightarrow\\", " <=> "}, |
||
51 | {"\\Longleftrightarrow\\", " <==> "}, |
||
52 | {"\\Longleftarrow\\", " <== "}, |
||
53 | }; |
||
54 | |||
55 | #define backtransno (sizeof(backtrans)/sizeof(backtrans[0])) |
||
56 | |||
57 | char *find_tag_end(char *p) |
||
58 | { |
||
12248 | bpr | 59 | char *pp, *old; |
60 | pp=p; if(*pp=='<') pp++; |
||
61 | for(; *pp && *pp!='>'; pp++) { |
||
62 | if(*pp=='"') { |
||
63 | pp=strchr(pp+1,'"'); |
||
64 | if(pp==NULL) {pp=p+strlen(p); break;} else continue; |
||
10 | reyssat | 65 | } |
12248 | bpr | 66 | } |
7676 | bpr | 67 | /* this is probably an syntax error of the page */ |
12248 | bpr | 68 | if(*pp==0 && pp>p+2048) { |
69 | old=p; if(*old=='<') old++; |
||
70 | pp=strchr(old,'>'); |
||
71 | if(pp==NULL) pp=strchr(old,'<'); |
||
72 | if(pp==NULL) pp=find_word_end(find_word_start(old)); |
||
73 | } |
||
74 | if(*pp=='>') pp++; |
||
75 | return pp; |
||
10 | reyssat | 76 | } |
77 | |||
78 | char *find_tag(char *p, char *tag) |
||
79 | { |
||
12248 | bpr | 80 | char *pp; |
81 | int len; |
||
82 | len=strlen(tag); |
||
83 | for(pp=strchr(p,'<'); pp!=NULL && *pp; pp=strchr(pp+1,'<')) { |
||
84 | if(strncasecmp(pp+1,tag,len)==0 && !isalnum(*(pp+1+len))) return pp; |
||
85 | } |
||
86 | return p+strlen(p); |
||
10 | reyssat | 87 | } |
88 | |||
89 | void cutamp(char *p) |
||
90 | { |
||
12248 | bpr | 91 | char *pp; |
92 | for(pp=strchr(p,'&'); pp; pp=strchr(pp+1,'&')) { |
||
93 | if(strncmp(pp,"&",5)==0) { |
||
94 | ovlstrcpy(pp+1,pp+5); continue; |
||
95 | } |
||
96 | if(strncmp(pp,"<",4)==0) { |
||
97 | *pp='<'; ovlstrcpy(pp+1,pp+4); continue; |
||
98 | } |
||
99 | if(strncmp(pp,">",4)==0) { |
||
100 | *pp='>'; ovlstrcpy(pp+1,pp+4); continue; |
||
101 | } |
||
7676 | bpr | 102 | |
12248 | bpr | 103 | } |
10 | reyssat | 104 | } |
105 | |||
7676 | bpr | 106 | /* get the file */ |
10 | reyssat | 107 | void prepare_file(void) |
108 | { |
||
12248 | bpr | 109 | FILE *f; |
110 | long int flen; |
||
10 | reyssat | 111 | |
12248 | bpr | 112 | filelen=0; |
113 | f=fopen(fn1,"r"); if(f==NULL) return; |
||
114 | fseek(f,0,SEEK_END);flen=ftell(f); fseek(f,0,SEEK_SET); |
||
115 | if(flen>buflim) return; |
||
116 | filebuf=xmalloc(2*flen+1024);flen=fread(filebuf,1,flen,f); |
||
117 | fclose(f); |
||
118 | if(flen<0 || flen>=buflim) flen=0; |
||
119 | filebuf[flen]=0; |
||
120 | filelen=flen; |
||
121 | outf=fopen(fn2,"w"); if(outf==NULL) return; |
||
10 | reyssat | 122 | } |
123 | |||
124 | void getmath(char *p) |
||
125 | { |
||
12248 | bpr | 126 | char *pt, *pv; |
127 | char *p1, *p2, buf[256]; |
||
10 | reyssat | 128 | |
12248 | bpr | 129 | mathbuf[0]=0; |
130 | pt=find_word_start(p); |
||
131 | if(strncmp(pt,"\\begin{displaymath}", |
||
132 | strlen("\\begin{displaymath}"))==0) { |
||
133 | pt=strchr(pt,'}')+1; |
||
134 | pv=strstr(pt,"\\end{displaymath}"); |
||
135 | if(pv==NULL) return; |
||
136 | goto insmath; |
||
137 | } |
||
138 | if(*pt=='%') pt=strchr(pt,'$'); |
||
139 | if(pt==NULL) return; |
||
140 | if(*pt!='$') return; |
||
141 | do pt++; while(*pt=='$'); |
||
142 | pv=strchr(pt,'$'); if(pv==NULL) return; |
||
143 | insmath: if(pv-pt>=MAX_LINELEN-256) return; |
||
144 | memmove(mathbuf,pt,pv-pt); mathbuf[pv-pt]=0; |
||
145 | if(strstr(mathbuf,"...\n...")!=NULL) { |
||
146 | ovlstrcpy(mathbuf,"......"); return; |
||
147 | } |
||
148 | cutamp(mathbuf); latex2html=1; |
||
149 | for(p1=strstr(mathbuf,"\\mathbb");p1;p1=strstr(p1+1,"\\mathbb")) { |
||
150 | char c,*d; |
||
151 | p2=find_word_start(p1+strlen("\\mathbb")); c=0; |
||
152 | if(strchr("NZQRC",*p2)!=NULL) c=*p2; |
||
153 | else |
||
154 | if(*p2=='{' && *(p2+2)=='}' && strchr("NZQRC",*(p2+1))!=NULL) { |
||
155 | c=*(p2+1); p2+=2; |
||
156 | } |
||
157 | if(c) { |
||
158 | p2=find_word_start(++p2); |
||
159 | if(isalnum(*p2)) d=" "; else d=""; |
||
160 | string_modify(mathbuf,p1,p2,"\\%c%c%s",c,c,d); |
||
10 | reyssat | 161 | } |
12248 | bpr | 162 | } |
163 | for(p1=strstr(mathbuf,"{\\"); p1; p1=strstr(p1+1,"{\\")) { |
||
164 | if(p1>mathbuf && isalpha(*(p1-1))) continue; |
||
165 | for(p2=p1+2; p2<p1+24 && isalpha(*p2); p2++); |
||
166 | if(*p2!='}' || isalnum(*(p2+1))) continue; |
||
167 | memmove(buf,p1+1,p2-p1-1); buf[p2-p1-1]='\\'; buf[p2-p1]=0; |
||
168 | if(strstr(hmsame,buf)==NULL) continue; |
||
169 | ovlstrcpy(p2,p2+1); ovlstrcpy(p1,p1+1); |
||
170 | } |
||
171 | if(strstr(mathbuf,"\\begin{")!=NULL) return; |
||
172 | for(p1=strchr(mathbuf,'{'); p1; p1=strchr(p1+1,'{')) { |
||
173 | if((p1>mathbuf && isalpha(*(p1-1))) || |
||
174 | !isalnum(*(p1+1)) || *(p1+2)!='}') continue; |
||
175 | *p1=*(p1+1); ovlstrcpy(p1+1,p1+3); |
||
176 | } |
||
177 | if(strchr(mathbuf,'[')!=NULL) { |
||
178 | char mbuf[MAX_LINELEN+1]; |
||
179 | snprintf(mbuf,sizeof(mbuf),"{%s}",mathbuf); |
||
180 | ovlstrcpy(mathbuf,mbuf); |
||
181 | } |
||
182 | /* try to simplify */ |
||
183 | if(strchr(mathbuf,'{')==NULL && strchr(mathbuf,'\\')!=NULL) { |
||
184 | int i, tt; |
||
185 | tt=0; |
||
186 | for(p1=strchr(mathbuf,'\\'); p1; p1=strchr(p1+1,'\\')) { |
||
187 | for(p2=p1+1;isalpha(*p2);p2++); |
||
188 | if(p2==p1+1 || p2>p1+24) {tt=1; break;} |
||
189 | memmove(buf,p1,p2-p1);buf[p2-p1]='\\';buf[p2-p1+1]=0; |
||
190 | for(i=0;i<backtransno && strcmp(buf,backtrans[i].name)!=0;i++); |
||
191 | if(i>=backtransno && strstr(hmsame,buf)==NULL) { |
||
192 | tt=1; break; |
||
7676 | bpr | 193 | } |
10 | reyssat | 194 | } |
12248 | bpr | 195 | if(tt==0) { |
7676 | bpr | 196 | for(p1=strchr(mathbuf,'\\'); p1; p1=strchr(p1+1,'\\')) { |
12248 | bpr | 197 | for(p2=p1+1;isalpha(*p2);p2++); |
198 | if(p2==p1+1 || p2>p1+24) break; |
||
199 | memmove(buf,p1,p2-p1);buf[p2-p1]='\\';buf[p2-p1+1]=0; |
||
200 | for(i=0;i<backtransno && strcmp(buf,backtrans[i].name)!=0;i++); |
||
201 | if(i<backtransno) |
||
202 | string_modify(buf,p1,p2,backtrans[i].trans); |
||
203 | else *p1=' '; |
||
7676 | bpr | 204 | } |
10 | reyssat | 205 | } |
12248 | bpr | 206 | } |
10 | reyssat | 207 | } |
208 | |||
209 | void output(void) |
||
210 | { |
||
12248 | bpr | 211 | char *p, *pp, *p2, *pt; |
212 | char buf[MAX_LINELEN+1]; |
||
213 | p=filebuf; |
||
214 | restart: |
||
215 | pp=find_tag(p,"body"); |
||
216 | if(*pp!=0) { |
||
217 | p=find_tag_end(pp); goto restart; |
||
218 | } |
||
219 | pp=find_tag(p,"html"); |
||
220 | if(*pp!=0) { |
||
221 | p=find_tag_end(pp); goto restart; |
||
222 | } |
||
223 | *find_tag(p,"/body")=0; *find_tag(p,"/html")=0; |
||
224 | for(pp=strstr(p,"\n\n"); pp; pp=strstr(pp+1,"\n\n")) *pp=' '; |
||
225 | for(pp=strchr(p,'<');pp!=NULL;pp=strchr(find_tag_end(pp),'<')) { |
||
226 | if(pp>p) {fwrite(p,1,pp-p,outf); p=pp;} |
||
227 | if(latex2html && strncasecmp(pp,"<br><hr>",8)==0 && |
||
228 | *find_word_start(pp+8)==0) break; |
||
229 | if(strncasecmp(pp+1,"!-- MATH",8)==0) { |
||
230 | p2=strstr(pp+8,"-->"); if(p2==NULL) continue; |
||
231 | *p2=0; getmath(pp+9); *p2='-'; |
||
232 | p=p2+3; pt=find_word_start(p); |
||
233 | if(mathbuf[0] && strncasecmp(pt,"<IMG",4)==0 && isspace(*(pt+4))) { |
||
234 | p=find_tag_end(pt); pp=pt; |
||
235 | fprintf(outf,"\\(%s\\)",mathbuf); |
||
236 | } |
||
237 | continue; |
||
10 | reyssat | 238 | } |
12248 | bpr | 239 | if(strncasecmp(pp+1,"a",1)==0 && isspace(*(pp+2))) { |
240 | continue; |
||
10 | reyssat | 241 | } |
12248 | bpr | 242 | if(strncasecmp(pp+1,"img",3)==0 && isspace(*(pp+4))) { |
243 | p2=find_tag_end(pp); |
||
244 | if(p2-pp>=MAX_LINELEN-256) continue; |
||
245 | memmove(buf,pp+1,p2-pp-2); buf[p2-pp-2]=0; |
||
246 | pt=strstr(buf,"ALT=\""); if(pt==NULL) pt=strstr(buf,"alt=\""); |
||
247 | if(pt!=NULL) { |
||
248 | pt+=strlen("ALT=\""); |
||
249 | getmath(pt); if(mathbuf[0]) { |
||
250 | fprintf(outf,"\\(%s\\)",mathbuf); p=p2; |
||
251 | } |
||
7676 | bpr | 252 | } |
10 | reyssat | 253 | } |
12248 | bpr | 254 | } |
255 | if(pp==NULL) fprintf(outf,"%s",p); |
||
10 | reyssat | 256 | } |
257 | |||
258 | int main(int argc, char *argv[]) |
||
259 | { |
||
12248 | bpr | 260 | char *p, *pp; |
261 | char *mod; |
||
10 | reyssat | 262 | |
12248 | bpr | 263 | mod=getenv("w_module"); |
264 | if(mod!=NULL && strncmp(mod,"adm/",4)!=0 && strcmp(mod,"home")!=0) return 1; |
||
265 | if(mod==NULL) p=argv[1]; else p=getenv("wims_exec_parm"); |
||
266 | if(p==NULL || *p==0) return 1; |
||
267 | p=find_word_start(p); pp=find_word_end(p); |
||
268 | if(pp<=p || pp-p>sizeof(fn1)-1) return 1; |
||
269 | memmove(fn1,p,pp-p); fn1[pp-p]=0; |
||
270 | p=find_word_start(pp); pp=find_word_end(p); |
||
271 | if(pp<=p || pp-p>sizeof(fn2)-1) ovlstrcpy(fn2,fn1); |
||
272 | else {memmove(fn2,p,pp-p); fn2[pp-p]=0;} |
||
273 | prepare_file(); |
||
274 | output(); |
||
275 | fclose(outf); |
||
276 | return 0; |
||
10 | reyssat | 277 | } |