Rev 8863 | Go to most recent revision | Details | Compare with Previous | Last modification | View Log | RSS feed
Rev | Author | Line No. | Line |
---|---|---|---|
10 | reyssat | 1 | /* Copyright (C) 1998-2003 XIAO, Gang of Universite de Nice - Sophia Antipolis |
2 | * |
||
3 | * This program is free software; you can redistribute it and/or modify |
||
4 | * it under the terms of the GNU General Public License as published by |
||
5 | * the Free Software Foundation; either version 2 of the License, or |
||
6 | * (at your option) any later version. |
||
7 | * |
||
8 | * This program is distributed in the hope that it will be useful, |
||
9 | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
||
10 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
||
11 | * GNU General Public License for more details. |
||
12 | * |
||
13 | * You should have received a copy of the GNU General Public License |
||
14 | * along with this program; if not, write to the Free Software |
||
15 | * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. |
||
16 | */ |
||
17 | |||
7676 | bpr | 18 | /* Check type of a file */ |
10 | reyssat | 19 | |
20 | /*************** Customization: change values hereafter ****************/ |
||
21 | |||
7676 | bpr | 22 | /* limit of data buffers */ |
10 | reyssat | 23 | #define buflim 1024*1024*16 |
24 | |||
25 | /***************** Nothing should need change hereafter *****************/ |
||
8185 | bpr | 26 | |
8094 | bpr | 27 | #include "../Lib/libwims.h" |
10 | reyssat | 28 | |
8185 | bpr | 29 | #define hmsame "\\CC\\Delta\\Gamma\\Lambda\\NN\\Omega\\Phi\\Pi\\Psi\\QQ\\RR\\Sigma\\Xi\\ZZ\\alpha\\beta\\cap\\chi\\cup\\delta\\div\\eta\\exists\\forall\\gamma\\in\\infty\\iota\\kappa\\lambda\\mu\\nabla\\nu\\omega\\pi\\pm\\psi\\rho\\sigma\\subset\\subseteq\\tau\\theta\\times\\varepsilon\\varphi\\xi\\zeta\\" |
30 | |||
10 | reyssat | 31 | char fn1[1024]="", fn2[1024]=""; |
32 | char mathbuf[MAX_LINELEN+1]; |
||
33 | char *filebuf; |
||
34 | int filelen=0; |
||
35 | int latex2html=0; |
||
36 | FILE *outf; |
||
37 | |||
38 | struct { |
||
39 | char *name, *trans; |
||
40 | } backtrans[]={ |
||
7676 | bpr | 41 | {"\\ge\\", " >= "}, |
42 | {"\\geq\\", " >= "}, |
||
43 | {"\\le\\", " <= "}, |
||
44 | {"\\leq\\", " <= "}, |
||
45 | {"\\to\\", " -> "}, |
||
46 | {"\\rightarrow\\", " -> "}, |
||
10 | reyssat | 47 | {"\\longrightarrow\\", " --> "}, |
7676 | bpr | 48 | {"\\Rightarrow\\", " => "}, |
10 | reyssat | 49 | {"\\Longrightarrow\\", " ==> "}, |
50 | {"\\Leftrightarrow\\", " <=> "}, |
||
51 | {"\\Longleftrightarrow\\", " <==> "}, |
||
52 | {"\\Longleftarrow\\", " <== "}, |
||
53 | }; |
||
54 | |||
55 | #define backtransno (sizeof(backtrans)/sizeof(backtrans[0])) |
||
56 | |||
57 | char *find_tag_end(char *p) |
||
58 | { |
||
59 | char *pp, *old; |
||
60 | pp=p; if(*pp=='<') pp++; |
||
61 | for(; *pp && *pp!='>'; pp++) { |
||
7676 | bpr | 62 | if(*pp=='"') { |
63 | pp=strchr(pp+1,'"'); |
||
64 | if(pp==NULL) {pp=p+strlen(p); break;} else continue; |
||
65 | } |
||
10 | reyssat | 66 | } |
7676 | bpr | 67 | /* this is probably an syntax error of the page */ |
10 | reyssat | 68 | if(*pp==0 && pp>p+2048) { |
7676 | bpr | 69 | old=p; if(*old=='<') old++; |
70 | pp=strchr(old,'>'); |
||
71 | if(pp==NULL) pp=strchr(old,'<'); |
||
72 | if(pp==NULL) pp=find_word_end(find_word_start(old)); |
||
10 | reyssat | 73 | } |
11124 | georgesk | 74 | if(*pp=='>') pp++; |
75 | return pp; |
||
10 | reyssat | 76 | } |
77 | |||
78 | char *find_tag(char *p, char *tag) |
||
79 | { |
||
80 | char *pp; |
||
81 | int len; |
||
82 | len=strlen(tag); |
||
83 | for(pp=strchr(p,'<'); pp!=NULL && *pp; pp=strchr(pp+1,'<')) { |
||
7676 | bpr | 84 | if(strncasecmp(pp+1,tag,len)==0 && !isalnum(*(pp+1+len))) return pp; |
10 | reyssat | 85 | } |
86 | return p+strlen(p); |
||
87 | } |
||
88 | |||
89 | void cutamp(char *p) |
||
90 | { |
||
91 | char *pp; |
||
92 | for(pp=strchr(p,'&'); pp; pp=strchr(pp+1,'&')) { |
||
7676 | bpr | 93 | if(strncmp(pp,"&",5)==0) { |
94 | ovlstrcpy(pp+1,pp+5); continue; |
||
95 | } |
||
96 | if(strncmp(pp,"<",4)==0) { |
||
97 | *pp='<'; ovlstrcpy(pp+1,pp+4); continue; |
||
98 | } |
||
99 | if(strncmp(pp,">",4)==0) { |
||
100 | *pp='>'; ovlstrcpy(pp+1,pp+4); continue; |
||
101 | } |
||
102 | |||
10 | reyssat | 103 | } |
104 | } |
||
105 | |||
7676 | bpr | 106 | /* get the file */ |
10 | reyssat | 107 | void prepare_file(void) |
108 | { |
||
109 | FILE *f; |
||
110 | long int flen; |
||
111 | |||
112 | filelen=0; |
||
113 | f=fopen(fn1,"r"); if(f==NULL) return; |
||
114 | fseek(f,0,SEEK_END);flen=ftell(f); fseek(f,0,SEEK_SET); |
||
115 | if(flen>buflim) return; |
||
116 | filebuf=xmalloc(2*flen+1024);flen=fread(filebuf,1,flen,f); |
||
117 | fclose(f); |
||
11124 | georgesk | 118 | if(flen<0 || flen>=buflim) flen=0; |
119 | filebuf[flen]=0; |
||
10 | reyssat | 120 | filelen=flen; |
121 | outf=fopen(fn2,"w"); if(outf==NULL) return; |
||
122 | } |
||
123 | |||
124 | void getmath(char *p) |
||
125 | { |
||
126 | char *pt, *pv; |
||
127 | char *p1, *p2, buf[256]; |
||
128 | |||
129 | mathbuf[0]=0; |
||
130 | pt=find_word_start(p); |
||
131 | if(strncmp(pt,"\\begin{displaymath}", |
||
7676 | bpr | 132 | strlen("\\begin{displaymath}"))==0) { |
133 | pt=strchr(pt,'}')+1; |
||
134 | pv=strstr(pt,"\\end{displaymath}"); |
||
135 | if(pv==NULL) return; |
||
136 | goto insmath; |
||
10 | reyssat | 137 | } |
11124 | georgesk | 138 | if(*pt=='%') pt=strchr(pt,'$'); |
139 | if(pt==NULL) return; |
||
140 | if(*pt!='$') return; |
||
141 | do pt++; while(*pt=='$'); |
||
10 | reyssat | 142 | pv=strchr(pt,'$'); if(pv==NULL) return; |
143 | insmath: if(pv-pt>=MAX_LINELEN-256) return; |
||
144 | memmove(mathbuf,pt,pv-pt); mathbuf[pv-pt]=0; |
||
145 | if(strstr(mathbuf,"...\n...")!=NULL) { |
||
7676 | bpr | 146 | ovlstrcpy(mathbuf,"......"); return; |
10 | reyssat | 147 | } |
148 | cutamp(mathbuf); latex2html=1; |
||
149 | for(p1=strstr(mathbuf,"\\mathbb");p1;p1=strstr(p1+1,"\\mathbb")) { |
||
7676 | bpr | 150 | char c,*d; |
151 | p2=find_word_start(p1+strlen("\\mathbb")); c=0; |
||
152 | if(strchr("NZQRC",*p2)!=NULL) c=*p2; |
||
153 | else if(*p2=='{' && *(p2+2)=='}' && strchr("NZQRC",*(p2+1))!=NULL) { |
||
154 | c=*(p2+1); p2+=2; |
||
155 | } |
||
156 | if(c) { |
||
157 | p2=find_word_start(++p2); |
||
158 | if(isalnum(*p2)) d=" "; else d=""; |
||
159 | string_modify(mathbuf,p1,p2,"\\%c%c%s",c,c,d); |
||
160 | } |
||
10 | reyssat | 161 | } |
162 | for(p1=strstr(mathbuf,"{\\"); p1; p1=strstr(p1+1,"{\\")) { |
||
7676 | bpr | 163 | if(p1>mathbuf && isalpha(*(p1-1))) continue; |
164 | for(p2=p1+2; p2<p1+24 && isalpha(*p2); p2++); |
||
165 | if(*p2!='}' || isalnum(*(p2+1))) continue; |
||
166 | memmove(buf,p1+1,p2-p1-1); buf[p2-p1-1]='\\'; buf[p2-p1]=0; |
||
167 | if(strstr(hmsame,buf)==NULL) continue; |
||
168 | ovlstrcpy(p2,p2+1); ovlstrcpy(p1,p1+1); |
||
10 | reyssat | 169 | } |
170 | if(strstr(mathbuf,"\\begin{")!=NULL) return; |
||
171 | for(p1=strchr(mathbuf,'{'); p1; p1=strchr(p1+1,'{')) { |
||
7676 | bpr | 172 | if((p1>mathbuf && isalpha(*(p1-1))) || |
173 | !isalnum(*(p1+1)) || *(p1+2)!='}') continue; |
||
174 | *p1=*(p1+1); ovlstrcpy(p1+1,p1+3); |
||
10 | reyssat | 175 | } |
176 | if(strchr(mathbuf,'[')!=NULL) { |
||
177 | char mbuf[MAX_LINELEN+1]; |
||
7676 | bpr | 178 | snprintf(mbuf,sizeof(mbuf),"{%s}",mathbuf); |
179 | ovlstrcpy(mathbuf,mbuf); |
||
10 | reyssat | 180 | } |
7676 | bpr | 181 | /* try to simplify */ |
10 | reyssat | 182 | if(strchr(mathbuf,'{')==NULL && strchr(mathbuf,'\\')!=NULL) { |
7676 | bpr | 183 | int i, tt; |
184 | tt=0; |
||
185 | for(p1=strchr(mathbuf,'\\'); p1; p1=strchr(p1+1,'\\')) { |
||
186 | for(p2=p1+1;isalpha(*p2);p2++); |
||
187 | if(p2==p1+1 || p2>p1+24) {tt=1; break;} |
||
188 | memmove(buf,p1,p2-p1);buf[p2-p1]='\\';buf[p2-p1+1]=0; |
||
189 | for(i=0;i<backtransno && strcmp(buf,backtrans[i].name)!=0;i++); |
||
190 | if(i>=backtransno && strstr(hmsame,buf)==NULL) { |
||
191 | tt=1; break; |
||
192 | } |
||
193 | } |
||
194 | if(tt==0) { |
||
195 | for(p1=strchr(mathbuf,'\\'); p1; p1=strchr(p1+1,'\\')) { |
||
196 | for(p2=p1+1;isalpha(*p2);p2++); |
||
197 | if(p2==p1+1 || p2>p1+24) break; |
||
198 | memmove(buf,p1,p2-p1);buf[p2-p1]='\\';buf[p2-p1+1]=0; |
||
199 | for(i=0;i<backtransno && strcmp(buf,backtrans[i].name)!=0;i++); |
||
200 | if(i<backtransno) |
||
201 | string_modify(buf,p1,p2,backtrans[i].trans); |
||
202 | else *p1=' '; |
||
203 | } |
||
204 | } |
||
10 | reyssat | 205 | } |
206 | } |
||
207 | |||
208 | void output(void) |
||
209 | { |
||
210 | char *p, *pp, *p2, *pt; |
||
211 | char buf[MAX_LINELEN+1]; |
||
212 | p=filebuf; |
||
213 | restart: |
||
214 | pp=find_tag(p,"body"); if(*pp!=0) { |
||
7676 | bpr | 215 | p=find_tag_end(pp); goto restart; |
10 | reyssat | 216 | } |
217 | pp=find_tag(p,"html"); if(*pp!=0) { |
||
7676 | bpr | 218 | p=find_tag_end(pp); goto restart; |
10 | reyssat | 219 | } |
220 | *find_tag(p,"/body")=0; *find_tag(p,"/html")=0; |
||
221 | for(pp=strstr(p,"\n\n"); pp; pp=strstr(pp+1,"\n\n")) *pp=' '; |
||
222 | for(pp=strchr(p,'<');pp!=NULL;pp=strchr(find_tag_end(pp),'<')) { |
||
7676 | bpr | 223 | if(pp>p) {fwrite(p,1,pp-p,outf); p=pp;} |
224 | if(latex2html && strncasecmp(pp,"<br><hr>",8)==0 && |
||
225 | *find_word_start(pp+8)==0) break; |
||
226 | if(strncasecmp(pp+1,"!-- MATH",8)==0) { |
||
227 | p2=strstr(pp+8,"-->"); if(p2==NULL) continue; |
||
228 | *p2=0; getmath(pp+9); *p2='-'; |
||
229 | p=p2+3; pt=find_word_start(p); |
||
230 | if(mathbuf[0] && strncasecmp(pt,"<IMG",4)==0 && isspace(*(pt+4))) { |
||
231 | p=find_tag_end(pt); pp=pt; |
||
232 | fprintf(outf,"\\(%s\\)",mathbuf); |
||
233 | } |
||
234 | continue; |
||
235 | } |
||
236 | if(strncasecmp(pp+1,"a",1)==0 && isspace(*(pp+2))) { |
||
10 | reyssat | 237 | |
7676 | bpr | 238 | |
239 | |||
240 | continue; |
||
241 | } |
||
242 | if(strncasecmp(pp+1,"img",3)==0 && isspace(*(pp+4))) { |
||
243 | p2=find_tag_end(pp); |
||
244 | if(p2-pp>=MAX_LINELEN-256) continue; |
||
245 | memmove(buf,pp+1,p2-pp-2); buf[p2-pp-2]=0; |
||
246 | pt=strstr(buf,"ALT=\""); if(pt==NULL) pt=strstr(buf,"alt=\""); |
||
247 | if(pt!=NULL) { |
||
248 | pt+=strlen("ALT=\""); |
||
249 | getmath(pt); if(mathbuf[0]) { |
||
250 | fprintf(outf,"\\(%s\\)",mathbuf); p=p2; |
||
251 | } |
||
252 | } |
||
253 | } |
||
10 | reyssat | 254 | } |
255 | if(pp==NULL) fprintf(outf,"%s",p); |
||
256 | } |
||
257 | |||
258 | int main(int argc, char *argv[]) |
||
259 | { |
||
260 | char *p, *pp; |
||
261 | char *mod; |
||
262 | |||
263 | mod=getenv("w_module"); |
||
264 | if(mod!=NULL && strncmp(mod,"adm/",4)!=0 && strcmp(mod,"home")!=0) return 1; |
||
265 | if(mod==NULL) p=argv[1]; else p=getenv("wims_exec_parm"); |
||
266 | if(p==NULL || *p==0) return 1; |
||
267 | p=find_word_start(p); pp=find_word_end(p); |
||
268 | if(pp<=p || pp-p>sizeof(fn1)-1) return 1; |
||
269 | memmove(fn1,p,pp-p); fn1[pp-p]=0; |
||
270 | p=find_word_start(pp); pp=find_word_end(p); |
||
7676 | bpr | 271 | if(pp<=p || pp-p>sizeof(fn2)-1) ovlstrcpy(fn2,fn1); |
10 | reyssat | 272 | else {memmove(fn2,p,pp-p); fn2[pp-p]=0;} |
273 | prepare_file(); |
||
274 | output(); |
||
275 | fclose(outf); |
||
276 | return 0; |
||
277 | } |