Subversion Repositories wimsdev

Rev

Rev 3718 | Rev 8094 | Go to most recent revision | Show entire file | Ignore whitespace | Details | Blame | Last modification | View Log | RSS feed

Rev 3718 Rev 7676
Line 13... Line 13...
13
 *  You should have received a copy of the GNU General Public License
13
 *  You should have received a copy of the GNU General Public License
14
 *  along with this program; if not, write to the Free Software
14
 *  along with this program; if not, write to the Free Software
15
 *  Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
15
 *  Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
16
 */
16
 */
17
 
17
 
18
        /* Check type of a file */
18
/* Check type of a file */
19
 
19
 
20
/*************** Customization: change values hereafter ****************/
20
/*************** Customization: change values hereafter ****************/
21
 
21
 
22
        /* limit of data buffers */
22
/* limit of data buffers */
23
#define buflim 1024*1024*16
23
#define buflim 1024*1024*16
24
 
24
 
25
/***************** Nothing should need change hereafter *****************/
25
/***************** Nothing should need change hereafter *****************/
26
 
26
 
27
#include "../wims.h"
27
#include "../wims.h"
Line 36... Line 36...
36
FILE *outf;
36
FILE *outf;
37
 
37
 
38
struct {
38
struct {
39
    char *name, *trans;
39
    char *name, *trans;
40
} backtrans[]={
40
} backtrans[]={
41
    {"\\ge\\",          " >= "},
41
    {"\\ge\\", " >= "},
42
    {"\\geq\\",         " >= "},
42
    {"\\geq\\", " >= "},
43
    {"\\le\\",          " <= "},
43
    {"\\le\\", " <= "},
44
    {"\\leq\\",         " <= "},
44
    {"\\leq\\", " <= "},
45
    {"\\to\\",          " -> "},
45
    {"\\to\\", " -> "},
46
    {"\\rightarrow\\",  " -> "},
46
    {"\\rightarrow\\", " -> "},
47
    {"\\longrightarrow\\", " --> "},
47
    {"\\longrightarrow\\", " --> "},
48
    {"\\Rightarrow\\",  " => "},
48
    {"\\Rightarrow\\", " => "},
49
    {"\\Longrightarrow\\", " ==> "},
49
    {"\\Longrightarrow\\", " ==> "},
50
    {"\\Leftrightarrow\\", " <=> "},
50
    {"\\Leftrightarrow\\", " <=> "},
51
    {"\\Longleftrightarrow\\", " <==> "},
51
    {"\\Longleftrightarrow\\", " <==> "},
52
    {"\\Longleftarrow\\", " <== "},
52
    {"\\Longleftarrow\\", " <== "},
53
};
53
};
Line 60... Line 60...
60
    p=malloc(n);
60
    p=malloc(n);
61
    if(p==NULL) exit(1);
61
    if(p==NULL) exit(1);
62
    return p;
62
    return p;
63
}
63
}
64
 
64
 
65
        /* Points to the end of the word */
65
/* Points to the end of the word */
66
char *find_word_end(char *p)
66
char *find_word_end(char *p)
67
{
67
{
68
    int i;
68
    int i;
69
    for(i=0;!isspace(*p) && *p!=0 && i<MAX_LINELEN; p++,i++);
69
    for(i=0;!isspace(*p) && *p!=0 && i<MAX_LINELEN; p++,i++);
70
    return p;
70
    return p;
71
}
71
}
72
 
72
 
73
        /* Strips leading spaces */
73
/* Strips leading spaces */
74
char *find_word_start(char *p)
74
char *find_word_start(char *p)
75
{
75
{
76
    int i;
76
    int i;
77
    for(i=0; isspace(*p) && i<MAX_LINELEN; p++,i++);
77
    for(i=0; isspace(*p) && i<MAX_LINELEN; p++,i++);
78
    return p;
78
    return p;
Line 81... Line 81...
81
char *find_tag_end(char *p)
81
char *find_tag_end(char *p)
82
{
82
{
83
    char *pp, *old;
83
    char *pp, *old;
84
    pp=p; if(*pp=='<') pp++;
84
    pp=p; if(*pp=='<') pp++;
85
    for(; *pp && *pp!='>'; pp++) {
85
    for(; *pp && *pp!='>'; pp++) {
86
        if(*pp=='"') {
86
      if(*pp=='"') {
87
            pp=strchr(pp+1,'"');
87
          pp=strchr(pp+1,'"');
88
            if(pp==NULL) {pp=p+strlen(p); break;} else continue;
88
          if(pp==NULL) {pp=p+strlen(p); break;} else continue;
89
        }
89
      }
90
    }
90
    }
91
        /* this is probably an syntax error of the page */
91
/* this is probably an syntax error of the page */
92
    if(*pp==0 && pp>p+2048) {
92
    if(*pp==0 && pp>p+2048) {
93
        old=p; if(*old=='<') old++;
93
      old=p; if(*old=='<') old++;
94
        pp=strchr(old,'>');
94
      pp=strchr(old,'>');
95
        if(pp==NULL) pp=strchr(old,'<');
95
      if(pp==NULL) pp=strchr(old,'<');
96
        if(pp==NULL) pp=find_word_end(find_word_start(old));
96
      if(pp==NULL) pp=find_word_end(find_word_start(old));
97
    }
97
    }
98
    if(*pp=='>') pp++; return pp;
98
    if(*pp=='>') pp++; return pp;
99
}
99
}
100
 
100
 
101
char *find_tag(char *p, char *tag)
101
char *find_tag(char *p, char *tag)
102
{
102
{
103
    char *pp;
103
    char *pp;
104
    int len;
104
    int len;
105
    len=strlen(tag);
105
    len=strlen(tag);
106
    for(pp=strchr(p,'<'); pp!=NULL && *pp; pp=strchr(pp+1,'<')) {
106
    for(pp=strchr(p,'<'); pp!=NULL && *pp; pp=strchr(pp+1,'<')) {
107
        if(strncasecmp(pp+1,tag,len)==0 && !isalnum(*(pp+1+len))) return pp;
107
      if(strncasecmp(pp+1,tag,len)==0 && !isalnum(*(pp+1+len))) return pp;
108
    }
108
    }
109
    return p+strlen(p);
109
    return p+strlen(p);
110
}
110
}
111
 
111
 
112
        /* modify a string. Bufferlen must be ast least MAX_LINELEN */
112
/* modify a string. Bufferlen must be ast least MAX_LINELEN */
113
void string_modify(char *start, char *bad_beg, char *bad_end, char *good,...)
113
void string_modify(char *start, char *bad_beg, char *bad_end, char *good,...)
114
{
114
{
115
    char buf[MAX_LINELEN+1];
115
    char buf[MAX_LINELEN+1];
116
    va_list vp;
116
    va_list vp;
117
   
117
 
118
    va_start(vp,good);
118
    va_start(vp,good);
119
    vsnprintf(buf,sizeof(buf),good,vp); va_end(vp);
119
    vsnprintf(buf,sizeof(buf),good,vp); va_end(vp);
120
    if(strlen(start)-(bad_end-bad_beg)+strlen(buf)>=MAX_LINELEN) {
120
    if(strlen(start)-(bad_end-bad_beg)+strlen(buf)>=MAX_LINELEN) {
121
        return;
121
      return;
122
    }
122
    }
123
    strcat(buf,bad_end);
123
    strcat(buf,bad_end);
124
    ovlstrcpy(bad_beg,buf);
124
    ovlstrcpy(bad_beg,buf);
125
}
125
}
126
 
126
 
127
void cutamp(char *p)
127
void cutamp(char *p)
128
{
128
{
129
    char *pp;
129
    char *pp;
130
    for(pp=strchr(p,'&'); pp; pp=strchr(pp+1,'&')) {
130
    for(pp=strchr(p,'&'); pp; pp=strchr(pp+1,'&')) {
131
        if(strncmp(pp,"&amp;",5)==0) {
131
      if(strncmp(pp,"&amp;",5)==0) {
132
            ovlstrcpy(pp+1,pp+5); continue;
132
          ovlstrcpy(pp+1,pp+5); continue;
133
        }
133
      }
134
        if(strncmp(pp,"&lt;",4)==0) {
134
      if(strncmp(pp,"&lt;",4)==0) {
135
            *pp='<'; ovlstrcpy(pp+1,pp+4); continue;
135
          *pp='<'; ovlstrcpy(pp+1,pp+4); continue;
136
        }
136
      }
137
        if(strncmp(pp,"&gt;",4)==0) {
137
      if(strncmp(pp,"&gt;",4)==0) {
138
            *pp='>'; ovlstrcpy(pp+1,pp+4); continue;
138
          *pp='>'; ovlstrcpy(pp+1,pp+4); continue;
139
        }
139
      }
140
       
140
 
141
    }
141
    }
142
}
142
}
143
 
143
 
144
        /* get the file */
144
/* get the file */
145
void prepare_file(void)
145
void prepare_file(void)
146
{
146
{
147
    FILE *f;
147
    FILE *f;
148
    long int flen;
148
    long int flen;
149
 
149
 
Line 164... Line 164...
164
    char *p1, *p2, buf[256];
164
    char *p1, *p2, buf[256];
165
 
165
 
166
    mathbuf[0]=0;
166
    mathbuf[0]=0;
167
    pt=find_word_start(p);
167
    pt=find_word_start(p);
168
    if(strncmp(pt,"\\begin{displaymath}",
168
    if(strncmp(pt,"\\begin{displaymath}",
169
                   strlen("\\begin{displaymath}"))==0) {
169
               strlen("\\begin{displaymath}"))==0) {
170
        pt=strchr(pt,'}')+1;
170
      pt=strchr(pt,'}')+1;
171
        pv=strstr(pt,"\\end{displaymath}");
171
      pv=strstr(pt,"\\end{displaymath}");
172
        if(pv==NULL) return;
172
      if(pv==NULL) return;
173
        goto insmath;
173
      goto insmath;
174
    }
174
    }
175
    if(*pt=='%') pt=strchr(pt,'$'); if(pt==NULL) return;
175
    if(*pt=='%') pt=strchr(pt,'$'); if(pt==NULL) return;
176
    if(*pt!='$') return; do pt++; while(*pt=='$');
176
    if(*pt!='$') return; do pt++; while(*pt=='$');
177
    pv=strchr(pt,'$'); if(pv==NULL) return;
177
    pv=strchr(pt,'$'); if(pv==NULL) return;
178
    insmath: if(pv-pt>=MAX_LINELEN-256) return;
178
    insmath: if(pv-pt>=MAX_LINELEN-256) return;
179
    memmove(mathbuf,pt,pv-pt); mathbuf[pv-pt]=0;
179
    memmove(mathbuf,pt,pv-pt); mathbuf[pv-pt]=0;
180
    if(strstr(mathbuf,"...\n...")!=NULL) {
180
    if(strstr(mathbuf,"...\n...")!=NULL) {
181
        ovlstrcpy(mathbuf,"......"); return;
181
      ovlstrcpy(mathbuf,"......"); return;
182
    }
182
    }
183
    cutamp(mathbuf); latex2html=1;
183
    cutamp(mathbuf); latex2html=1;
184
    for(p1=strstr(mathbuf,"\\mathbb");p1;p1=strstr(p1+1,"\\mathbb")) {
184
    for(p1=strstr(mathbuf,"\\mathbb");p1;p1=strstr(p1+1,"\\mathbb")) {
185
        char c,*d;
185
      char c,*d;
186
        p2=find_word_start(p1+strlen("\\mathbb")); c=0;
186
      p2=find_word_start(p1+strlen("\\mathbb")); c=0;
187
        if(strchr("NZQRC",*p2)!=NULL) c=*p2;
187
      if(strchr("NZQRC",*p2)!=NULL) c=*p2;
188
        else if(*p2=='{' && *(p2+2)=='}' && strchr("NZQRC",*(p2+1))!=NULL) {
188
      else if(*p2=='{' && *(p2+2)=='}' && strchr("NZQRC",*(p2+1))!=NULL) {
189
                c=*(p2+1); p2+=2;
189
            c=*(p2+1); p2+=2;
190
        }
190
      }
191
        if(c) {
191
      if(c) {
192
            p2=find_word_start(++p2);
192
          p2=find_word_start(++p2);
193
            if(isalnum(*p2)) d=" "; else d="";
193
          if(isalnum(*p2)) d=" "; else d="";
194
            string_modify(mathbuf,p1,p2,"\\%c%c%s",c,c,d);
194
          string_modify(mathbuf,p1,p2,"\\%c%c%s",c,c,d);
195
        }
195
      }
196
    }
196
    }
197
    for(p1=strstr(mathbuf,"{\\"); p1; p1=strstr(p1+1,"{\\")) {
197
    for(p1=strstr(mathbuf,"{\\"); p1; p1=strstr(p1+1,"{\\")) {
198
        if(p1>mathbuf && isalpha(*(p1-1))) continue;
198
      if(p1>mathbuf && isalpha(*(p1-1))) continue;
199
        for(p2=p1+2; p2<p1+24 && isalpha(*p2); p2++);
199
      for(p2=p1+2; p2<p1+24 && isalpha(*p2); p2++);
200
        if(*p2!='}' || isalnum(*(p2+1))) continue;
200
      if(*p2!='}' || isalnum(*(p2+1))) continue;
201
        memmove(buf,p1+1,p2-p1-1); buf[p2-p1-1]='\\'; buf[p2-p1]=0;
201
      memmove(buf,p1+1,p2-p1-1); buf[p2-p1-1]='\\'; buf[p2-p1]=0;
202
        if(strstr(hmsame,buf)==NULL) continue;
202
      if(strstr(hmsame,buf)==NULL) continue;
203
        ovlstrcpy(p2,p2+1); ovlstrcpy(p1,p1+1);
203
      ovlstrcpy(p2,p2+1); ovlstrcpy(p1,p1+1);
204
    }
204
    }
205
    if(strstr(mathbuf,"\\begin{")!=NULL) return;
205
    if(strstr(mathbuf,"\\begin{")!=NULL) return;
206
    for(p1=strchr(mathbuf,'{'); p1; p1=strchr(p1+1,'{')) {
206
    for(p1=strchr(mathbuf,'{'); p1; p1=strchr(p1+1,'{')) {
207
        if((p1>mathbuf && isalpha(*(p1-1))) ||
207
      if((p1>mathbuf && isalpha(*(p1-1))) ||
208
           !isalnum(*(p1+1)) || *(p1+2)!='}') continue;
208
         !isalnum(*(p1+1)) || *(p1+2)!='}') continue;
209
        *p1=*(p1+1); ovlstrcpy(p1+1,p1+3);
209
      *p1=*(p1+1); ovlstrcpy(p1+1,p1+3);
210
    }
210
    }
211
    if(strchr(mathbuf,'[')!=NULL) {
211
    if(strchr(mathbuf,'[')!=NULL) {
212
        char mbuf[MAX_LINELEN+1];
212
        char mbuf[MAX_LINELEN+1];
213
        snprintf(mbuf,sizeof(mbuf),"{%s}",mathbuf);
213
      snprintf(mbuf,sizeof(mbuf),"{%s}",mathbuf);
214
        ovlstrcpy(mathbuf,mbuf);
214
      ovlstrcpy(mathbuf,mbuf);
215
    }
215
    }
216
        /* try to simplify */
216
/* try to simplify */
217
    if(strchr(mathbuf,'{')==NULL && strchr(mathbuf,'\\')!=NULL) {
217
    if(strchr(mathbuf,'{')==NULL && strchr(mathbuf,'\\')!=NULL) {
218
        int i, tt;
218
      int i, tt;
219
        tt=0;
219
      tt=0;
220
        for(p1=strchr(mathbuf,'\\'); p1; p1=strchr(p1+1,'\\')) {
220
      for(p1=strchr(mathbuf,'\\'); p1; p1=strchr(p1+1,'\\')) {
221
            for(p2=p1+1;isalpha(*p2);p2++);
221
          for(p2=p1+1;isalpha(*p2);p2++);
222
            if(p2==p1+1 || p2>p1+24) {tt=1; break;}
222
          if(p2==p1+1 || p2>p1+24) {tt=1; break;}
223
            memmove(buf,p1,p2-p1);buf[p2-p1]='\\';buf[p2-p1+1]=0;
223
          memmove(buf,p1,p2-p1);buf[p2-p1]='\\';buf[p2-p1+1]=0;
224
            for(i=0;i<backtransno && strcmp(buf,backtrans[i].name)!=0;i++);
224
          for(i=0;i<backtransno && strcmp(buf,backtrans[i].name)!=0;i++);
225
            if(i>=backtransno && strstr(hmsame,buf)==NULL) {
225
          if(i>=backtransno && strstr(hmsame,buf)==NULL) {
226
                tt=1; break;
226
            tt=1; break;
227
            }
227
          }
228
        }
228
      }
229
        if(tt==0) {
229
      if(tt==0) {
230
            for(p1=strchr(mathbuf,'\\'); p1; p1=strchr(p1+1,'\\')) {
230
          for(p1=strchr(mathbuf,'\\'); p1; p1=strchr(p1+1,'\\')) {
231
                for(p2=p1+1;isalpha(*p2);p2++);
231
            for(p2=p1+1;isalpha(*p2);p2++);
232
                if(p2==p1+1 || p2>p1+24) break;
232
            if(p2==p1+1 || p2>p1+24) break;
233
                memmove(buf,p1,p2-p1);buf[p2-p1]='\\';buf[p2-p1+1]=0;
233
            memmove(buf,p1,p2-p1);buf[p2-p1]='\\';buf[p2-p1+1]=0;
234
                for(i=0;i<backtransno && strcmp(buf,backtrans[i].name)!=0;i++);
234
            for(i=0;i<backtransno && strcmp(buf,backtrans[i].name)!=0;i++);
235
                if(i<backtransno)
235
            if(i<backtransno)
236
                  string_modify(buf,p1,p2,backtrans[i].trans);
236
              string_modify(buf,p1,p2,backtrans[i].trans);
237
                else *p1=' ';
237
            else *p1=' ';
238
            }
238
          }
239
        }
239
      }
240
    }
240
    }
241
}
241
}
242
 
242
 
243
void output(void)
243
void output(void)
244
{
244
{
245
    char *p, *pp, *p2, *pt;
245
    char *p, *pp, *p2, *pt;
246
    char buf[MAX_LINELEN+1];
246
    char buf[MAX_LINELEN+1];
247
    p=filebuf;
247
    p=filebuf;
248
    restart:
248
    restart:
249
    pp=find_tag(p,"body"); if(*pp!=0) {
249
    pp=find_tag(p,"body"); if(*pp!=0) {
250
        p=find_tag_end(pp); goto restart;
250
      p=find_tag_end(pp); goto restart;
251
    }
251
    }
252
    pp=find_tag(p,"html"); if(*pp!=0) {
252
    pp=find_tag(p,"html"); if(*pp!=0) {
253
        p=find_tag_end(pp); goto restart;
253
      p=find_tag_end(pp); goto restart;
254
    }
254
    }
255
    *find_tag(p,"/body")=0; *find_tag(p,"/html")=0;
255
    *find_tag(p,"/body")=0; *find_tag(p,"/html")=0;
256
    for(pp=strstr(p,"\n\n"); pp; pp=strstr(pp+1,"\n\n")) *pp=' ';
256
    for(pp=strstr(p,"\n\n"); pp; pp=strstr(pp+1,"\n\n")) *pp=' ';
257
    for(pp=strchr(p,'<');pp!=NULL;pp=strchr(find_tag_end(pp),'<')) {
257
    for(pp=strchr(p,'<');pp!=NULL;pp=strchr(find_tag_end(pp),'<')) {
258
        if(pp>p) {fwrite(p,1,pp-p,outf); p=pp;}
258
      if(pp>p) {fwrite(p,1,pp-p,outf); p=pp;}
259
        if(latex2html && strncasecmp(pp,"<br><hr>",8)==0 &&
259
      if(latex2html && strncasecmp(pp,"<br><hr>",8)==0 &&
260
           *find_word_start(pp+8)==0) break;
260
         *find_word_start(pp+8)==0) break;
261
        if(strncasecmp(pp+1,"!-- MATH",8)==0) {
261
      if(strncasecmp(pp+1,"!-- MATH",8)==0) {
262
            p2=strstr(pp+8,"-->"); if(p2==NULL) continue;
262
          p2=strstr(pp+8,"-->"); if(p2==NULL) continue;
263
            *p2=0; getmath(pp+9); *p2='-';
263
          *p2=0; getmath(pp+9); *p2='-';
264
            p=p2+3; pt=find_word_start(p);
264
          p=p2+3; pt=find_word_start(p);
265
            if(mathbuf[0] && strncasecmp(pt,"<IMG",4)==0 && isspace(*(pt+4))) {
265
          if(mathbuf[0] && strncasecmp(pt,"<IMG",4)==0 && isspace(*(pt+4))) {
266
                p=find_tag_end(pt); pp=pt;
266
            p=find_tag_end(pt); pp=pt;
267
                fprintf(outf,"\\(%s\\)",mathbuf);
267
            fprintf(outf,"\\(%s\\)",mathbuf);
268
            }
268
          }
269
            continue;
269
          continue;
270
        }
270
      }
271
        if(strncasecmp(pp+1,"a",1)==0 && isspace(*(pp+2))) {
271
      if(strncasecmp(pp+1,"a",1)==0 && isspace(*(pp+2))) {
272
 
272
 
273
           
273
 
274
           
274
 
275
            continue;
275
          continue;
276
        }
276
      }
277
        if(strncasecmp(pp+1,"img",3)==0 && isspace(*(pp+4))) {
277
      if(strncasecmp(pp+1,"img",3)==0 && isspace(*(pp+4))) {
278
            p2=find_tag_end(pp);
278
          p2=find_tag_end(pp);
279
            if(p2-pp>=MAX_LINELEN-256) continue;
279
          if(p2-pp>=MAX_LINELEN-256) continue;
280
            memmove(buf,pp+1,p2-pp-2); buf[p2-pp-2]=0;
280
          memmove(buf,pp+1,p2-pp-2); buf[p2-pp-2]=0;
281
            pt=strstr(buf,"ALT=\""); if(pt==NULL) pt=strstr(buf,"alt=\"");
281
          pt=strstr(buf,"ALT=\""); if(pt==NULL) pt=strstr(buf,"alt=\"");
282
            if(pt!=NULL) {
282
          if(pt!=NULL) {
283
                pt+=strlen("ALT=\"");
283
            pt+=strlen("ALT=\"");
284
                getmath(pt); if(mathbuf[0]) {
284
            getmath(pt); if(mathbuf[0]) {
285
                    fprintf(outf,"\\(%s\\)",mathbuf); p=p2;
285
                fprintf(outf,"\\(%s\\)",mathbuf); p=p2;
286
                }
286
            }
287
            }
287
          }
288
        }
288
      }
289
    }
289
    }
290
    if(pp==NULL) fprintf(outf,"%s",p);
290
    if(pp==NULL) fprintf(outf,"%s",p);
291
}
291
}
292
 
292
 
293
int main(int argc, char *argv[])
293
int main(int argc, char *argv[])
Line 301... Line 301...
301
    if(p==NULL || *p==0) return 1;
301
    if(p==NULL || *p==0) return 1;
302
    p=find_word_start(p); pp=find_word_end(p);
302
    p=find_word_start(p); pp=find_word_end(p);
303
    if(pp<=p || pp-p>sizeof(fn1)-1) return 1;
303
    if(pp<=p || pp-p>sizeof(fn1)-1) return 1;
304
    memmove(fn1,p,pp-p); fn1[pp-p]=0;
304
    memmove(fn1,p,pp-p); fn1[pp-p]=0;
305
    p=find_word_start(pp); pp=find_word_end(p);
305
    p=find_word_start(pp); pp=find_word_end(p);
306
    if(pp<=p || pp-p>sizeof(fn2)-1) ovlstrcpy(fn2,fn1);
306
    if(pp<=p || pp-p>sizeof(fn2)-1) ovlstrcpy(fn2,fn1);
307
    else {memmove(fn2,p,pp-p); fn2[pp-p]=0;}
307
    else {memmove(fn2,p,pp-p); fn2[pp-p]=0;}
308
    prepare_file();
308
    prepare_file();
309
    output();
309
    output();
310
    fclose(outf);
310
    fclose(outf);
311
    return 0;
311
    return 0;
312
}
312
}
313
 
-