Rev 8100 | Rev 8185 | Go to most recent revision | Details | Compare with Previous | Last modification | View Log | RSS feed
Rev | Author | Line No. | Line |
---|---|---|---|
10 | reyssat | 1 | /* Copyright (C) 1998-2003 XIAO, Gang of Universite de Nice - Sophia Antipolis |
2 | * |
||
3 | * This program is free software; you can redistribute it and/or modify |
||
4 | * it under the terms of the GNU General Public License as published by |
||
5 | * the Free Software Foundation; either version 2 of the License, or |
||
6 | * (at your option) any later version. |
||
7 | * |
||
8 | * This program is distributed in the hope that it will be useful, |
||
9 | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
||
10 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
||
11 | * GNU General Public License for more details. |
||
12 | * |
||
13 | * You should have received a copy of the GNU General Public License |
||
14 | * along with this program; if not, write to the Free Software |
||
15 | * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. |
||
16 | */ |
||
17 | |||
7676 | bpr | 18 | /* Check type of a file */ |
10 | reyssat | 19 | |
20 | /*************** Customization: change values hereafter ****************/ |
||
21 | |||
7676 | bpr | 22 | /* limit of data buffers */ |
10 | reyssat | 23 | #define buflim 1024*1024*16 |
24 | |||
25 | /***************** Nothing should need change hereafter *****************/ |
||
26 | #include "../wims.h" |
||
8094 | bpr | 27 | #include "../Lib/libwims.h" |
10 | reyssat | 28 | #include "../hmname.c" |
29 | |||
30 | char fn1[1024]="", fn2[1024]=""; |
||
31 | char mathbuf[MAX_LINELEN+1]; |
||
32 | char *filebuf; |
||
33 | int filelen=0; |
||
34 | int latex2html=0; |
||
35 | FILE *outf; |
||
36 | |||
37 | struct { |
||
38 | char *name, *trans; |
||
39 | } backtrans[]={ |
||
7676 | bpr | 40 | {"\\ge\\", " >= "}, |
41 | {"\\geq\\", " >= "}, |
||
42 | {"\\le\\", " <= "}, |
||
43 | {"\\leq\\", " <= "}, |
||
44 | {"\\to\\", " -> "}, |
||
45 | {"\\rightarrow\\", " -> "}, |
||
10 | reyssat | 46 | {"\\longrightarrow\\", " --> "}, |
7676 | bpr | 47 | {"\\Rightarrow\\", " => "}, |
10 | reyssat | 48 | {"\\Longrightarrow\\", " ==> "}, |
49 | {"\\Leftrightarrow\\", " <=> "}, |
||
50 | {"\\Longleftrightarrow\\", " <==> "}, |
||
51 | {"\\Longleftarrow\\", " <== "}, |
||
52 | }; |
||
53 | |||
54 | #define backtransno (sizeof(backtrans)/sizeof(backtrans[0])) |
||
55 | |||
56 | char *find_tag_end(char *p) |
||
57 | { |
||
58 | char *pp, *old; |
||
59 | pp=p; if(*pp=='<') pp++; |
||
60 | for(; *pp && *pp!='>'; pp++) { |
||
7676 | bpr | 61 | if(*pp=='"') { |
62 | pp=strchr(pp+1,'"'); |
||
63 | if(pp==NULL) {pp=p+strlen(p); break;} else continue; |
||
64 | } |
||
10 | reyssat | 65 | } |
7676 | bpr | 66 | /* this is probably an syntax error of the page */ |
10 | reyssat | 67 | if(*pp==0 && pp>p+2048) { |
7676 | bpr | 68 | old=p; if(*old=='<') old++; |
69 | pp=strchr(old,'>'); |
||
70 | if(pp==NULL) pp=strchr(old,'<'); |
||
71 | if(pp==NULL) pp=find_word_end(find_word_start(old)); |
||
10 | reyssat | 72 | } |
73 | if(*pp=='>') pp++; return pp; |
||
74 | } |
||
75 | |||
76 | char *find_tag(char *p, char *tag) |
||
77 | { |
||
78 | char *pp; |
||
79 | int len; |
||
80 | len=strlen(tag); |
||
81 | for(pp=strchr(p,'<'); pp!=NULL && *pp; pp=strchr(pp+1,'<')) { |
||
7676 | bpr | 82 | if(strncasecmp(pp+1,tag,len)==0 && !isalnum(*(pp+1+len))) return pp; |
10 | reyssat | 83 | } |
84 | return p+strlen(p); |
||
85 | } |
||
86 | |||
8100 | bpr | 87 | /* modify a string. Bufferlen must be at least MAX_LINELEN */ |
88 | void string_modify3(char *start, char *bad_beg, char *bad_end, char *good,...) |
||
10 | reyssat | 89 | { |
90 | char buf[MAX_LINELEN+1]; |
||
91 | va_list vp; |
||
7676 | bpr | 92 | |
10 | reyssat | 93 | va_start(vp,good); |
94 | vsnprintf(buf,sizeof(buf),good,vp); va_end(vp); |
||
95 | if(strlen(start)-(bad_end-bad_beg)+strlen(buf)>=MAX_LINELEN) { |
||
7676 | bpr | 96 | return; |
10 | reyssat | 97 | } |
98 | strcat(buf,bad_end); |
||
3718 | reyssat | 99 | ovlstrcpy(bad_beg,buf); |
10 | reyssat | 100 | } |
101 | |||
102 | void cutamp(char *p) |
||
103 | { |
||
104 | char *pp; |
||
105 | for(pp=strchr(p,'&'); pp; pp=strchr(pp+1,'&')) { |
||
7676 | bpr | 106 | if(strncmp(pp,"&",5)==0) { |
107 | ovlstrcpy(pp+1,pp+5); continue; |
||
108 | } |
||
109 | if(strncmp(pp,"<",4)==0) { |
||
110 | *pp='<'; ovlstrcpy(pp+1,pp+4); continue; |
||
111 | } |
||
112 | if(strncmp(pp,">",4)==0) { |
||
113 | *pp='>'; ovlstrcpy(pp+1,pp+4); continue; |
||
114 | } |
||
115 | |||
10 | reyssat | 116 | } |
117 | } |
||
118 | |||
7676 | bpr | 119 | /* get the file */ |
10 | reyssat | 120 | void prepare_file(void) |
121 | { |
||
122 | FILE *f; |
||
123 | long int flen; |
||
124 | |||
125 | filelen=0; |
||
126 | f=fopen(fn1,"r"); if(f==NULL) return; |
||
127 | fseek(f,0,SEEK_END);flen=ftell(f); fseek(f,0,SEEK_SET); |
||
128 | if(flen>buflim) return; |
||
129 | filebuf=xmalloc(2*flen+1024);flen=fread(filebuf,1,flen,f); |
||
130 | fclose(f); |
||
131 | if(flen<0 || flen>=buflim) flen=0; filebuf[flen]=0; |
||
132 | filelen=flen; |
||
133 | outf=fopen(fn2,"w"); if(outf==NULL) return; |
||
134 | } |
||
135 | |||
136 | void getmath(char *p) |
||
137 | { |
||
138 | char *pt, *pv; |
||
139 | char *p1, *p2, buf[256]; |
||
140 | |||
141 | mathbuf[0]=0; |
||
142 | pt=find_word_start(p); |
||
143 | if(strncmp(pt,"\\begin{displaymath}", |
||
7676 | bpr | 144 | strlen("\\begin{displaymath}"))==0) { |
145 | pt=strchr(pt,'}')+1; |
||
146 | pv=strstr(pt,"\\end{displaymath}"); |
||
147 | if(pv==NULL) return; |
||
148 | goto insmath; |
||
10 | reyssat | 149 | } |
150 | if(*pt=='%') pt=strchr(pt,'$'); if(pt==NULL) return; |
||
151 | if(*pt!='$') return; do pt++; while(*pt=='$'); |
||
152 | pv=strchr(pt,'$'); if(pv==NULL) return; |
||
153 | insmath: if(pv-pt>=MAX_LINELEN-256) return; |
||
154 | memmove(mathbuf,pt,pv-pt); mathbuf[pv-pt]=0; |
||
155 | if(strstr(mathbuf,"...\n...")!=NULL) { |
||
7676 | bpr | 156 | ovlstrcpy(mathbuf,"......"); return; |
10 | reyssat | 157 | } |
158 | cutamp(mathbuf); latex2html=1; |
||
159 | for(p1=strstr(mathbuf,"\\mathbb");p1;p1=strstr(p1+1,"\\mathbb")) { |
||
7676 | bpr | 160 | char c,*d; |
161 | p2=find_word_start(p1+strlen("\\mathbb")); c=0; |
||
162 | if(strchr("NZQRC",*p2)!=NULL) c=*p2; |
||
163 | else if(*p2=='{' && *(p2+2)=='}' && strchr("NZQRC",*(p2+1))!=NULL) { |
||
164 | c=*(p2+1); p2+=2; |
||
165 | } |
||
166 | if(c) { |
||
167 | p2=find_word_start(++p2); |
||
168 | if(isalnum(*p2)) d=" "; else d=""; |
||
169 | string_modify(mathbuf,p1,p2,"\\%c%c%s",c,c,d); |
||
170 | } |
||
10 | reyssat | 171 | } |
172 | for(p1=strstr(mathbuf,"{\\"); p1; p1=strstr(p1+1,"{\\")) { |
||
7676 | bpr | 173 | if(p1>mathbuf && isalpha(*(p1-1))) continue; |
174 | for(p2=p1+2; p2<p1+24 && isalpha(*p2); p2++); |
||
175 | if(*p2!='}' || isalnum(*(p2+1))) continue; |
||
176 | memmove(buf,p1+1,p2-p1-1); buf[p2-p1-1]='\\'; buf[p2-p1]=0; |
||
177 | if(strstr(hmsame,buf)==NULL) continue; |
||
178 | ovlstrcpy(p2,p2+1); ovlstrcpy(p1,p1+1); |
||
10 | reyssat | 179 | } |
180 | if(strstr(mathbuf,"\\begin{")!=NULL) return; |
||
181 | for(p1=strchr(mathbuf,'{'); p1; p1=strchr(p1+1,'{')) { |
||
7676 | bpr | 182 | if((p1>mathbuf && isalpha(*(p1-1))) || |
183 | !isalnum(*(p1+1)) || *(p1+2)!='}') continue; |
||
184 | *p1=*(p1+1); ovlstrcpy(p1+1,p1+3); |
||
10 | reyssat | 185 | } |
186 | if(strchr(mathbuf,'[')!=NULL) { |
||
187 | char mbuf[MAX_LINELEN+1]; |
||
7676 | bpr | 188 | snprintf(mbuf,sizeof(mbuf),"{%s}",mathbuf); |
189 | ovlstrcpy(mathbuf,mbuf); |
||
10 | reyssat | 190 | } |
7676 | bpr | 191 | /* try to simplify */ |
10 | reyssat | 192 | if(strchr(mathbuf,'{')==NULL && strchr(mathbuf,'\\')!=NULL) { |
7676 | bpr | 193 | int i, tt; |
194 | tt=0; |
||
195 | for(p1=strchr(mathbuf,'\\'); p1; p1=strchr(p1+1,'\\')) { |
||
196 | for(p2=p1+1;isalpha(*p2);p2++); |
||
197 | if(p2==p1+1 || p2>p1+24) {tt=1; break;} |
||
198 | memmove(buf,p1,p2-p1);buf[p2-p1]='\\';buf[p2-p1+1]=0; |
||
199 | for(i=0;i<backtransno && strcmp(buf,backtrans[i].name)!=0;i++); |
||
200 | if(i>=backtransno && strstr(hmsame,buf)==NULL) { |
||
201 | tt=1; break; |
||
202 | } |
||
203 | } |
||
204 | if(tt==0) { |
||
205 | for(p1=strchr(mathbuf,'\\'); p1; p1=strchr(p1+1,'\\')) { |
||
206 | for(p2=p1+1;isalpha(*p2);p2++); |
||
207 | if(p2==p1+1 || p2>p1+24) break; |
||
208 | memmove(buf,p1,p2-p1);buf[p2-p1]='\\';buf[p2-p1+1]=0; |
||
209 | for(i=0;i<backtransno && strcmp(buf,backtrans[i].name)!=0;i++); |
||
210 | if(i<backtransno) |
||
211 | string_modify(buf,p1,p2,backtrans[i].trans); |
||
212 | else *p1=' '; |
||
213 | } |
||
214 | } |
||
10 | reyssat | 215 | } |
216 | } |
||
217 | |||
218 | void output(void) |
||
219 | { |
||
220 | char *p, *pp, *p2, *pt; |
||
221 | char buf[MAX_LINELEN+1]; |
||
222 | p=filebuf; |
||
223 | restart: |
||
224 | pp=find_tag(p,"body"); if(*pp!=0) { |
||
7676 | bpr | 225 | p=find_tag_end(pp); goto restart; |
10 | reyssat | 226 | } |
227 | pp=find_tag(p,"html"); if(*pp!=0) { |
||
7676 | bpr | 228 | p=find_tag_end(pp); goto restart; |
10 | reyssat | 229 | } |
230 | *find_tag(p,"/body")=0; *find_tag(p,"/html")=0; |
||
231 | for(pp=strstr(p,"\n\n"); pp; pp=strstr(pp+1,"\n\n")) *pp=' '; |
||
232 | for(pp=strchr(p,'<');pp!=NULL;pp=strchr(find_tag_end(pp),'<')) { |
||
7676 | bpr | 233 | if(pp>p) {fwrite(p,1,pp-p,outf); p=pp;} |
234 | if(latex2html && strncasecmp(pp,"<br><hr>",8)==0 && |
||
235 | *find_word_start(pp+8)==0) break; |
||
236 | if(strncasecmp(pp+1,"!-- MATH",8)==0) { |
||
237 | p2=strstr(pp+8,"-->"); if(p2==NULL) continue; |
||
238 | *p2=0; getmath(pp+9); *p2='-'; |
||
239 | p=p2+3; pt=find_word_start(p); |
||
240 | if(mathbuf[0] && strncasecmp(pt,"<IMG",4)==0 && isspace(*(pt+4))) { |
||
241 | p=find_tag_end(pt); pp=pt; |
||
242 | fprintf(outf,"\\(%s\\)",mathbuf); |
||
243 | } |
||
244 | continue; |
||
245 | } |
||
246 | if(strncasecmp(pp+1,"a",1)==0 && isspace(*(pp+2))) { |
||
10 | reyssat | 247 | |
7676 | bpr | 248 | |
249 | |||
250 | continue; |
||
251 | } |
||
252 | if(strncasecmp(pp+1,"img",3)==0 && isspace(*(pp+4))) { |
||
253 | p2=find_tag_end(pp); |
||
254 | if(p2-pp>=MAX_LINELEN-256) continue; |
||
255 | memmove(buf,pp+1,p2-pp-2); buf[p2-pp-2]=0; |
||
256 | pt=strstr(buf,"ALT=\""); if(pt==NULL) pt=strstr(buf,"alt=\""); |
||
257 | if(pt!=NULL) { |
||
258 | pt+=strlen("ALT=\""); |
||
259 | getmath(pt); if(mathbuf[0]) { |
||
260 | fprintf(outf,"\\(%s\\)",mathbuf); p=p2; |
||
261 | } |
||
262 | } |
||
263 | } |
||
10 | reyssat | 264 | } |
265 | if(pp==NULL) fprintf(outf,"%s",p); |
||
266 | } |
||
267 | |||
268 | int main(int argc, char *argv[]) |
||
269 | { |
||
270 | char *p, *pp; |
||
271 | char *mod; |
||
272 | |||
273 | mod=getenv("w_module"); |
||
274 | if(mod!=NULL && strncmp(mod,"adm/",4)!=0 && strcmp(mod,"home")!=0) return 1; |
||
275 | if(mod==NULL) p=argv[1]; else p=getenv("wims_exec_parm"); |
||
276 | if(p==NULL || *p==0) return 1; |
||
277 | p=find_word_start(p); pp=find_word_end(p); |
||
278 | if(pp<=p || pp-p>sizeof(fn1)-1) return 1; |
||
279 | memmove(fn1,p,pp-p); fn1[pp-p]=0; |
||
280 | p=find_word_start(pp); pp=find_word_end(p); |
||
7676 | bpr | 281 | if(pp<=p || pp-p>sizeof(fn2)-1) ovlstrcpy(fn2,fn1); |
10 | reyssat | 282 | else {memmove(fn2,p,pp-p); fn2[pp-p]=0;} |
283 | prepare_file(); |
||
284 | output(); |
||
285 | fclose(outf); |
||
286 | return 0; |
||
287 | } |