Rev 11124 | Show entire file | Ignore whitespace | Details | Blame | Last modification | View Log | RSS feed
Rev 11124 | Rev 12248 | ||
---|---|---|---|
Line 54... | Line 54... | ||
54 | 54 | ||
55 | #define backtransno (sizeof(backtrans)/sizeof(backtrans[0])) |
55 | #define backtransno (sizeof(backtrans)/sizeof(backtrans[0])) |
56 | 56 | ||
57 | char *find_tag_end(char *p) |
57 | char *find_tag_end(char *p) |
58 | { |
58 | { |
59 |
|
59 | char *pp, *old; |
60 |
|
60 | pp=p; if(*pp=='<') pp++; |
61 |
|
61 | for(; *pp && *pp!='>'; pp++) { |
62 |
|
62 | if(*pp=='"') { |
63 |
|
63 | pp=strchr(pp+1,'"'); |
64 |
|
64 | if(pp==NULL) {pp=p+strlen(p); break;} else continue; |
65 |
|
65 | } |
66 |
|
66 | } |
67 | /* this is probably an syntax error of the page */ |
67 | /* this is probably an syntax error of the page */ |
68 |
|
68 | if(*pp==0 && pp>p+2048) { |
69 |
|
69 | old=p; if(*old=='<') old++; |
70 |
|
70 | pp=strchr(old,'>'); |
71 |
|
71 | if(pp==NULL) pp=strchr(old,'<'); |
72 |
|
72 | if(pp==NULL) pp=find_word_end(find_word_start(old)); |
73 |
|
73 | } |
74 |
|
74 | if(*pp=='>') pp++; |
75 |
|
75 | return pp; |
76 | } |
76 | } |
77 | 77 | ||
78 | char *find_tag(char *p, char *tag) |
78 | char *find_tag(char *p, char *tag) |
79 | { |
79 | { |
80 |
|
80 | char *pp; |
81 |
|
81 | int len; |
82 |
|
82 | len=strlen(tag); |
83 |
|
83 | for(pp=strchr(p,'<'); pp!=NULL && *pp; pp=strchr(pp+1,'<')) { |
84 |
|
84 | if(strncasecmp(pp+1,tag,len)==0 && !isalnum(*(pp+1+len))) return pp; |
85 |
|
85 | } |
86 |
|
86 | return p+strlen(p); |
87 | } |
87 | } |
88 | 88 | ||
89 | void cutamp(char *p) |
89 | void cutamp(char *p) |
90 | { |
90 | { |
91 |
|
91 | char *pp; |
92 |
|
92 | for(pp=strchr(p,'&'); pp; pp=strchr(pp+1,'&')) { |
93 |
|
93 | if(strncmp(pp,"&",5)==0) { |
94 |
|
94 | ovlstrcpy(pp+1,pp+5); continue; |
95 |
|
95 | } |
96 |
|
96 | if(strncmp(pp,"<",4)==0) { |
97 |
|
97 | *pp='<'; ovlstrcpy(pp+1,pp+4); continue; |
98 |
|
98 | } |
99 |
|
99 | if(strncmp(pp,">",4)==0) { |
100 |
|
100 | *pp='>'; ovlstrcpy(pp+1,pp+4); continue; |
101 |
|
101 | } |
102 | 102 | ||
103 |
|
103 | } |
104 | } |
104 | } |
105 | 105 | ||
106 | /* get the file */ |
106 | /* get the file */ |
107 | void prepare_file(void) |
107 | void prepare_file(void) |
108 | { |
108 | { |
109 |
|
109 | FILE *f; |
110 |
|
110 | long int flen; |
111 | 111 | ||
112 |
|
112 | filelen=0; |
113 |
|
113 | f=fopen(fn1,"r"); if(f==NULL) return; |
114 |
|
114 | fseek(f,0,SEEK_END);flen=ftell(f); fseek(f,0,SEEK_SET); |
115 |
|
115 | if(flen>buflim) return; |
116 |
|
116 | filebuf=xmalloc(2*flen+1024);flen=fread(filebuf,1,flen,f); |
117 |
|
117 | fclose(f); |
118 |
|
118 | if(flen<0 || flen>=buflim) flen=0; |
119 |
|
119 | filebuf[flen]=0; |
120 |
|
120 | filelen=flen; |
121 |
|
121 | outf=fopen(fn2,"w"); if(outf==NULL) return; |
122 | } |
122 | } |
123 | 123 | ||
124 | void getmath(char *p) |
124 | void getmath(char *p) |
125 | { |
125 | { |
126 |
|
126 | char *pt, *pv; |
127 |
|
127 | char *p1, *p2, buf[256]; |
128 | 128 | ||
129 |
|
129 | mathbuf[0]=0; |
130 |
|
130 | pt=find_word_start(p); |
131 |
|
131 | if(strncmp(pt,"\\begin{displaymath}", |
132 |
|
132 | strlen("\\begin{displaymath}"))==0) { |
133 |
|
133 | pt=strchr(pt,'}')+1; |
134 |
|
134 | pv=strstr(pt,"\\end{displaymath}"); |
135 |
|
135 | if(pv==NULL) return; |
136 |
|
136 | goto insmath; |
137 |
|
137 | } |
138 |
|
138 | if(*pt=='%') pt=strchr(pt,'$'); |
139 |
|
139 | if(pt==NULL) return; |
140 |
|
140 | if(*pt!='$') return; |
141 |
|
141 | do pt++; while(*pt=='$'); |
142 |
|
142 | pv=strchr(pt,'$'); if(pv==NULL) return; |
143 |
|
143 | insmath: if(pv-pt>=MAX_LINELEN-256) return; |
144 |
|
144 | memmove(mathbuf,pt,pv-pt); mathbuf[pv-pt]=0; |
145 |
|
145 | if(strstr(mathbuf,"...\n...")!=NULL) { |
146 |
|
146 | ovlstrcpy(mathbuf,"......"); return; |
147 |
|
147 | } |
148 |
|
148 | cutamp(mathbuf); latex2html=1; |
149 |
|
149 | for(p1=strstr(mathbuf,"\\mathbb");p1;p1=strstr(p1+1,"\\mathbb")) { |
150 |
|
150 | char c,*d; |
151 |
|
151 | p2=find_word_start(p1+strlen("\\mathbb")); c=0; |
152 |
|
152 | if(strchr("NZQRC",*p2)!=NULL) c=*p2; |
- | 153 | else |
|
153 |
|
154 | if(*p2=='{' && *(p2+2)=='}' && strchr("NZQRC",*(p2+1))!=NULL) { |
154 |
|
155 | c=*(p2+1); p2+=2; |
155 | } |
156 | } |
156 |
|
157 | if(c) { |
157 |
|
158 | p2=find_word_start(++p2); |
158 |
|
159 | if(isalnum(*p2)) d=" "; else d=""; |
159 |
|
160 | string_modify(mathbuf,p1,p2,"\\%c%c%s",c,c,d); |
160 |
|
161 | } |
161 |
|
162 | } |
162 |
|
163 | for(p1=strstr(mathbuf,"{\\"); p1; p1=strstr(p1+1,"{\\")) { |
163 |
|
164 | if(p1>mathbuf && isalpha(*(p1-1))) continue; |
164 |
|
165 | for(p2=p1+2; p2<p1+24 && isalpha(*p2); p2++); |
165 |
|
166 | if(*p2!='}' || isalnum(*(p2+1))) continue; |
166 |
|
167 | memmove(buf,p1+1,p2-p1-1); buf[p2-p1-1]='\\'; buf[p2-p1]=0; |
167 |
|
168 | if(strstr(hmsame,buf)==NULL) continue; |
168 |
|
169 | ovlstrcpy(p2,p2+1); ovlstrcpy(p1,p1+1); |
169 |
|
170 | } |
170 |
|
171 | if(strstr(mathbuf,"\\begin{")!=NULL) return; |
171 |
|
172 | for(p1=strchr(mathbuf,'{'); p1; p1=strchr(p1+1,'{')) { |
172 |
|
173 | if((p1>mathbuf && isalpha(*(p1-1))) || |
173 |
|
174 | !isalnum(*(p1+1)) || *(p1+2)!='}') continue; |
174 |
|
175 | *p1=*(p1+1); ovlstrcpy(p1+1,p1+3); |
175 |
|
176 | } |
176 |
|
177 | if(strchr(mathbuf,'[')!=NULL) { |
177 |
|
178 | char mbuf[MAX_LINELEN+1]; |
178 |
|
179 | snprintf(mbuf,sizeof(mbuf),"{%s}",mathbuf); |
179 |
|
180 | ovlstrcpy(mathbuf,mbuf); |
180 |
|
181 | } |
181 | /* try to simplify */ |
182 | /* try to simplify */ |
182 |
|
183 | if(strchr(mathbuf,'{')==NULL && strchr(mathbuf,'\\')!=NULL) { |
183 |
|
184 | int i, tt; |
184 |
|
185 | tt=0; |
- | 186 | for(p1=strchr(mathbuf,'\\'); p1; p1=strchr(p1+1,'\\')) { |
|
- | 187 | for(p2=p1+1;isalpha(*p2);p2++); |
|
- | 188 | if(p2==p1+1 || p2>p1+24) {tt=1; break;} |
|
- | 189 | memmove(buf,p1,p2-p1);buf[p2-p1]='\\';buf[p2-p1+1]=0; |
|
- | 190 | for(i=0;i<backtransno && strcmp(buf,backtrans[i].name)!=0;i++); |
|
- | 191 | if(i>=backtransno && strstr(hmsame,buf)==NULL) { |
|
- | 192 | tt=1; break; |
|
- | 193 | } |
|
- | 194 | } |
|
- | 195 | if(tt==0) { |
|
185 | for(p1=strchr(mathbuf,'\\'); p1; p1=strchr(p1+1,'\\')) { |
196 | for(p1=strchr(mathbuf,'\\'); p1; p1=strchr(p1+1,'\\')) { |
186 |
|
197 | for(p2=p1+1;isalpha(*p2);p2++); |
187 | if(p2==p1+1 || p2>p1+24) {tt=1; break;} |
- | |
188 | memmove(buf,p1,p2-p1);buf[p2-p1]='\\';buf[p2-p1+1]=0; |
- | |
189 | for(i=0;i<backtransno && strcmp(buf,backtrans[i].name)!=0;i++); |
- | |
190 | if(i>=backtransno && strstr(hmsame,buf)==NULL) { |
- | |
191 | tt=1; break; |
- | |
192 | } |
- | |
193 | } |
- | |
194 | if(tt==0) { |
- | |
195 | for(p1=strchr(mathbuf,'\\'); p1; p1=strchr(p1+1,'\\')) { |
- | |
196 | for(p2=p1+1;isalpha(*p2);p2++); |
- | |
197 |
|
198 | if(p2==p1+1 || p2>p1+24) break; |
198 |
|
199 | memmove(buf,p1,p2-p1);buf[p2-p1]='\\';buf[p2-p1+1]=0; |
199 |
|
200 | for(i=0;i<backtransno && strcmp(buf,backtrans[i].name)!=0;i++); |
200 |
|
201 | if(i<backtransno) |
201 |
|
202 | string_modify(buf,p1,p2,backtrans[i].trans); |
202 |
|
203 | else *p1=' '; |
203 | } |
- | |
204 | } |
204 | } |
205 | } |
205 | } |
- | 206 | } |
|
206 | } |
207 | } |
207 | 208 | ||
208 | void output(void) |
209 | void output(void) |
209 | { |
210 | { |
210 |
|
211 | char *p, *pp, *p2, *pt; |
211 |
|
212 | char buf[MAX_LINELEN+1]; |
212 |
|
213 | p=filebuf; |
213 |
|
214 | restart: |
214 |
|
215 | pp=find_tag(p,"body"); |
- | 216 | if(*pp!=0) { |
|
- | 217 | p=find_tag_end(pp); goto restart; |
|
- | 218 | } |
|
- | 219 | pp=find_tag(p,"html"); |
|
- | 220 | if(*pp!=0) { |
|
215 |
|
221 | p=find_tag_end(pp); goto restart; |
- | 222 | } |
|
- | 223 | *find_tag(p,"/body")=0; *find_tag(p,"/html")=0; |
|
- | 224 | for(pp=strstr(p,"\n\n"); pp; pp=strstr(pp+1,"\n\n")) *pp=' '; |
|
- | 225 | for(pp=strchr(p,'<');pp!=NULL;pp=strchr(find_tag_end(pp),'<')) { |
|
- | 226 | if(pp>p) {fwrite(p,1,pp-p,outf); p=pp;} |
|
- | 227 | if(latex2html && strncasecmp(pp,"<br><hr>",8)==0 && |
|
- | 228 | *find_word_start(pp+8)==0) break; |
|
- | 229 | if(strncasecmp(pp+1,"!-- MATH",8)==0) { |
|
- | 230 | p2=strstr(pp+8,"-->"); if(p2==NULL) continue; |
|
- | 231 | *p2=0; getmath(pp+9); *p2='-'; |
|
- | 232 | p=p2+3; pt=find_word_start(p); |
|
- | 233 | if(mathbuf[0] && strncasecmp(pt,"<IMG",4)==0 && isspace(*(pt+4))) { |
|
- | 234 | p=find_tag_end(pt); pp=pt; |
|
- | 235 | fprintf(outf,"\\(%s\\)",mathbuf); |
|
- | 236 | } |
|
- | 237 | continue; |
|
- | 238 | } |
|
- | 239 | if(strncasecmp(pp+1,"a",1)==0 && isspace(*(pp+2))) { |
|
- | 240 | continue; |
|
216 | } |
241 | } |
217 | pp=find_tag(p,"html"); if(*pp!=0) { |
- | |
218 | p=find_tag_end(pp); goto restart; |
- | |
219 | } |
- | |
220 | *find_tag(p,"/body")=0; *find_tag(p,"/html")=0; |
- | |
221 | for(pp=strstr(p,"\n\n"); pp; pp=strstr(pp+1,"\n\n")) *pp=' '; |
- | |
222 | for(pp=strchr(p,'<');pp!=NULL;pp=strchr(find_tag_end(pp),'<')) { |
- | |
223 | if(pp>p) {fwrite(p,1,pp-p,outf); p=pp;} |
- | |
224 | if(latex2html && strncasecmp(pp,"<br><hr>",8)==0 && |
- | |
225 | *find_word_start(pp+8)==0) break; |
- | |
226 | if(strncasecmp(pp+1,"!-- MATH",8)==0) { |
- | |
227 | p2=strstr(pp+8,"-->"); if(p2==NULL) continue; |
- | |
228 | *p2=0; getmath(pp+9); *p2='-'; |
- | |
229 | p=p2+3; pt=find_word_start(p); |
- | |
230 | if(mathbuf[0] && strncasecmp(pt,"<IMG",4)==0 && isspace(*(pt+4))) { |
- | |
231 | p=find_tag_end(pt); pp=pt; |
- | |
232 | fprintf(outf,"\\(%s\\)",mathbuf); |
- | |
233 | } |
- | |
234 | continue; |
- | |
235 | } |
- | |
236 | if(strncasecmp(pp+1,"a",1)==0 && isspace(*(pp+2))) { |
- | |
237 | - | ||
238 | - | ||
239 | - | ||
240 | continue; |
- | |
241 | } |
- | |
242 |
|
242 | if(strncasecmp(pp+1,"img",3)==0 && isspace(*(pp+4))) { |
243 |
|
243 | p2=find_tag_end(pp); |
244 |
|
244 | if(p2-pp>=MAX_LINELEN-256) continue; |
245 |
|
245 | memmove(buf,pp+1,p2-pp-2); buf[p2-pp-2]=0; |
246 |
|
246 | pt=strstr(buf,"ALT=\""); if(pt==NULL) pt=strstr(buf,"alt=\""); |
247 |
|
247 | if(pt!=NULL) { |
248 |
|
248 | pt+=strlen("ALT=\""); |
249 |
|
249 | getmath(pt); if(mathbuf[0]) { |
250 |
|
250 | fprintf(outf,"\\(%s\\)",mathbuf); p=p2; |
251 | } |
- | |
252 |
|
251 | } |
253 | } |
252 | } |
254 | } |
253 | } |
- | 254 | } |
|
255 |
|
255 | if(pp==NULL) fprintf(outf,"%s",p); |
256 | } |
256 | } |
257 | 257 | ||
258 | int main(int argc, char *argv[]) |
258 | int main(int argc, char *argv[]) |
259 | { |
259 | { |
260 |
|
260 | char *p, *pp; |
261 |
|
261 | char *mod; |
262 | 262 | ||
263 |
|
263 | mod=getenv("w_module"); |
264 |
|
264 | if(mod!=NULL && strncmp(mod,"adm/",4)!=0 && strcmp(mod,"home")!=0) return 1; |
265 |
|
265 | if(mod==NULL) p=argv[1]; else p=getenv("wims_exec_parm"); |
266 |
|
266 | if(p==NULL || *p==0) return 1; |
267 |
|
267 | p=find_word_start(p); pp=find_word_end(p); |
268 |
|
268 | if(pp<=p || pp-p>sizeof(fn1)-1) return 1; |
269 |
|
269 | memmove(fn1,p,pp-p); fn1[pp-p]=0; |
270 |
|
270 | p=find_word_start(pp); pp=find_word_end(p); |
271 |
|
271 | if(pp<=p || pp-p>sizeof(fn2)-1) ovlstrcpy(fn2,fn1); |
272 |
|
272 | else {memmove(fn2,p,pp-p); fn2[pp-p]=0;} |
273 |
|
273 | prepare_file(); |
274 |
|
274 | output(); |
275 |
|
275 | fclose(outf); |
276 |
|
276 | return 0; |
277 | } |
277 | } |