Subversion Repositories wimsdev

Rev

Rev 3718 | Go to most recent revision | Blame | Compare with Previous | Last modification | View Log | RSS feed

  1. /*    Copyright (C) 1998-2003 XIAO, Gang of Universite de Nice - Sophia Antipolis
  2.  *
  3.  *  This program is free software; you can redistribute it and/or modify
  4.  *  it under the terms of the GNU General Public License as published by
  5.  *  the Free Software Foundation; either version 2 of the License, or
  6.  *  (at your option) any later version.
  7.  *
  8.  *  This program is distributed in the hope that it will be useful,
  9.  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
  10.  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  11.  *  GNU General Public License for more details.
  12.  *
  13.  *  You should have received a copy of the GNU General Public License
  14.  *  along with this program; if not, write to the Free Software
  15.  *  Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  16.  */
  17.  
  18.         /* Check type of a file */
  19.  
  20. /*************** Customization: change values hereafter ****************/
  21.  
  22.         /* limit of data buffers */
  23. #define buflim 1024*1024*16
  24.  
  25. /***************** Nothing should need change hereafter *****************/
  26.  
  27. #include "../wims.h"
  28. #include "../hmname.c"
  29.  
  30. char fn1[1024]="", fn2[1024]="";
  31. char mathbuf[MAX_LINELEN+1];
  32. char *filebuf;
  33. int filelen=0;
  34. int latex2html=0;
  35. FILE *outf;
  36.  
  37. struct {
  38.     char *name, *trans;
  39. } backtrans[]={
  40.     {"\\ge\\",          " >= "},
  41.     {"\\geq\\",         " >= "},
  42.     {"\\le\\",          " <= "},
  43.     {"\\leq\\",         " <= "},
  44.     {"\\to\\",          " -> "},
  45.     {"\\rightarrow\\",  " -> "},
  46.     {"\\longrightarrow\\", " --> "},
  47.     {"\\Rightarrow\\",  " => "},
  48.     {"\\Longrightarrow\\", " ==> "},
  49.     {"\\Leftrightarrow\\", " <=> "},
  50.     {"\\Longleftrightarrow\\", " <==> "},
  51.     {"\\Longleftarrow\\", " <== "},
  52. };
  53.  
  54. #define backtransno (sizeof(backtrans)/sizeof(backtrans[0]))
  55.  
  56. void *xmalloc(size_t n)
  57. {
  58.     void *p;
  59.     p=malloc(n);
  60.     if(p==NULL) exit(1);
  61.     return p;
  62. }
  63.  
  64.         /* Points to the end of the word */
  65. char *find_word_end(char *p)
  66. {
  67.     int i;
  68.     for(i=0;!isspace(*p) && *p!=0 && i<MAX_LINELEN; p++,i++);
  69.     return p;
  70. }
  71.  
  72.         /* Strips leading spaces */
  73. char *find_word_start(char *p)
  74. {
  75.     int i;
  76.     for(i=0; isspace(*p) && i<MAX_LINELEN; p++,i++);
  77.     return p;
  78. }
  79.  
  80. char *find_tag_end(char *p)
  81. {
  82.     char *pp, *old;
  83.     pp=p; if(*pp=='<') pp++;
  84.     for(; *pp && *pp!='>'; pp++) {
  85.         if(*pp=='"') {
  86.             pp=strchr(pp+1,'"');
  87.             if(pp==NULL) {pp=p+strlen(p); break;} else continue;
  88.         }
  89.     }
  90.         /* this is probably an syntax error of the page */
  91.     if(*pp==0 && pp>p+2048) {
  92.         old=p; if(*old=='<') old++;
  93.         pp=strchr(old,'>');
  94.         if(pp==NULL) pp=strchr(old,'<');
  95.         if(pp==NULL) pp=find_word_end(find_word_start(old));
  96.     }
  97.     if(*pp=='>') pp++; return pp;
  98. }
  99.  
  100. char *find_tag(char *p, char *tag)
  101. {
  102.     char *pp;
  103.     int len;
  104.     len=strlen(tag);
  105.     for(pp=strchr(p,'<'); pp!=NULL && *pp; pp=strchr(pp+1,'<')) {
  106.         if(strncasecmp(pp+1,tag,len)==0 && !isalnum(*(pp+1+len))) return pp;
  107.     }
  108.     return p+strlen(p);
  109. }
  110.  
  111.         /* modify a string. Bufferlen must be ast least MAX_LINELEN */
  112. void string_modify(char *start, char *bad_beg, char *bad_end, char *good,...)
  113. {
  114.     char buf[MAX_LINELEN+1];
  115.     va_list vp;
  116.    
  117.     va_start(vp,good);
  118.     vsnprintf(buf,sizeof(buf),good,vp); va_end(vp);
  119.     if(strlen(start)-(bad_end-bad_beg)+strlen(buf)>=MAX_LINELEN) {
  120.         return;
  121.     }
  122.     strcat(buf,bad_end);
  123.     strcpy(bad_beg,buf);
  124. }
  125.  
  126. void cutamp(char *p)
  127. {
  128.     char *pp;
  129.     for(pp=strchr(p,'&'); pp; pp=strchr(pp+1,'&')) {
  130.         if(strncmp(pp,"&amp;",5)==0) {
  131.             strcpy(pp+1,pp+5); continue;
  132.         }
  133.         if(strncmp(pp,"&lt;",4)==0) {
  134.             *pp='<'; strcpy(pp+1,pp+4); continue;
  135.         }
  136.         if(strncmp(pp,"&gt;",4)==0) {
  137.             *pp='>'; strcpy(pp+1,pp+4); continue;
  138.         }
  139.        
  140.     }
  141. }
  142.  
  143.         /* get the file */
  144. void prepare_file(void)
  145. {
  146.     FILE *f;
  147.     long int flen;
  148.  
  149.     filelen=0;
  150.     f=fopen(fn1,"r"); if(f==NULL) return;
  151.     fseek(f,0,SEEK_END);flen=ftell(f); fseek(f,0,SEEK_SET);
  152.     if(flen>buflim) return;
  153.     filebuf=xmalloc(2*flen+1024);flen=fread(filebuf,1,flen,f);
  154.     fclose(f);
  155.     if(flen<0 || flen>=buflim) flen=0; filebuf[flen]=0;
  156.     filelen=flen;
  157.     outf=fopen(fn2,"w"); if(outf==NULL) return;
  158. }
  159.  
  160. void getmath(char *p)
  161. {
  162.     char *pt, *pv;
  163.     char *p1, *p2, buf[256];
  164.  
  165.     mathbuf[0]=0;
  166.     pt=find_word_start(p);
  167.     if(strncmp(pt,"\\begin{displaymath}",
  168.                    strlen("\\begin{displaymath}"))==0) {
  169.         pt=strchr(pt,'}')+1;
  170.         pv=strstr(pt,"\\end{displaymath}");
  171.         if(pv==NULL) return;
  172.         goto insmath;
  173.     }
  174.     if(*pt=='%') pt=strchr(pt,'$'); if(pt==NULL) return;
  175.     if(*pt!='$') return; do pt++; while(*pt=='$');
  176.     pv=strchr(pt,'$'); if(pv==NULL) return;
  177.     insmath: if(pv-pt>=MAX_LINELEN-256) return;
  178.     memmove(mathbuf,pt,pv-pt); mathbuf[pv-pt]=0;
  179.     if(strstr(mathbuf,"...\n...")!=NULL) {
  180.         strcpy(mathbuf,"......"); return;
  181.     }
  182.     cutamp(mathbuf); latex2html=1;
  183.     for(p1=strstr(mathbuf,"\\mathbb");p1;p1=strstr(p1+1,"\\mathbb")) {
  184.         char c,*d;
  185.         p2=find_word_start(p1+strlen("\\mathbb")); c=0;
  186.         if(strchr("NZQRC",*p2)!=NULL) c=*p2;
  187.         else if(*p2=='{' && *(p2+2)=='}' && strchr("NZQRC",*(p2+1))!=NULL) {
  188.                 c=*(p2+1); p2+=2;
  189.         }
  190.         if(c) {
  191.             p2=find_word_start(++p2);
  192.             if(isalnum(*p2)) d=" "; else d="";
  193.             string_modify(mathbuf,p1,p2,"\\%c%c%s",c,c,d);
  194.         }
  195.     }
  196.     for(p1=strstr(mathbuf,"{\\"); p1; p1=strstr(p1+1,"{\\")) {
  197.         if(p1>mathbuf && isalpha(*(p1-1))) continue;
  198.         for(p2=p1+2; p2<p1+24 && isalpha(*p2); p2++);
  199.         if(*p2!='}' || isalnum(*(p2+1))) continue;
  200.         memmove(buf,p1+1,p2-p1-1); buf[p2-p1-1]='\\'; buf[p2-p1]=0;
  201.         if(strstr(hmsame,buf)==NULL) continue;
  202.         strcpy(p2,p2+1); strcpy(p1,p1+1);
  203.     }
  204.     if(strstr(mathbuf,"\\begin{")!=NULL) return;
  205.     for(p1=strchr(mathbuf,'{'); p1; p1=strchr(p1+1,'{')) {
  206.         if((p1>mathbuf && isalpha(*(p1-1))) ||
  207.            !isalnum(*(p1+1)) || *(p1+2)!='}') continue;
  208.         *p1=*(p1+1); strcpy(p1+1,p1+3);
  209.     }
  210.     if(strchr(mathbuf,'[')!=NULL) {
  211.         char mbuf[MAX_LINELEN+1];
  212.         snprintf(mbuf,sizeof(mbuf),"{%s}",mathbuf);
  213.         strcpy(mathbuf,mbuf);
  214.     }
  215.         /* try to simplify */
  216.     if(strchr(mathbuf,'{')==NULL && strchr(mathbuf,'\\')!=NULL) {
  217.         int i, tt;
  218.         tt=0;
  219.         for(p1=strchr(mathbuf,'\\'); p1; p1=strchr(p1+1,'\\')) {
  220.             for(p2=p1+1;isalpha(*p2);p2++);
  221.             if(p2==p1+1 || p2>p1+24) {tt=1; break;}
  222.             memmove(buf,p1,p2-p1);buf[p2-p1]='\\';buf[p2-p1+1]=0;
  223.             for(i=0;i<backtransno && strcmp(buf,backtrans[i].name)!=0;i++);
  224.             if(i>=backtransno && strstr(hmsame,buf)==NULL) {
  225.                 tt=1; break;
  226.             }
  227.         }
  228.         if(tt==0) {
  229.             for(p1=strchr(mathbuf,'\\'); p1; p1=strchr(p1+1,'\\')) {
  230.                 for(p2=p1+1;isalpha(*p2);p2++);
  231.                 if(p2==p1+1 || p2>p1+24) break;
  232.                 memmove(buf,p1,p2-p1);buf[p2-p1]='\\';buf[p2-p1+1]=0;
  233.                 for(i=0;i<backtransno && strcmp(buf,backtrans[i].name)!=0;i++);
  234.                 if(i<backtransno)
  235.                   string_modify(buf,p1,p2,backtrans[i].trans);
  236.                 else *p1=' ';
  237.             }
  238.         }
  239.     }
  240. }
  241.  
  242. void output(void)
  243. {
  244.     char *p, *pp, *p2, *pt;
  245.     char buf[MAX_LINELEN+1];
  246.     p=filebuf;
  247.     restart:
  248.     pp=find_tag(p,"body"); if(*pp!=0) {
  249.         p=find_tag_end(pp); goto restart;
  250.     }
  251.     pp=find_tag(p,"html"); if(*pp!=0) {
  252.         p=find_tag_end(pp); goto restart;
  253.     }
  254.     *find_tag(p,"/body")=0; *find_tag(p,"/html")=0;
  255.     for(pp=strstr(p,"\n\n"); pp; pp=strstr(pp+1,"\n\n")) *pp=' ';
  256.     for(pp=strchr(p,'<');pp!=NULL;pp=strchr(find_tag_end(pp),'<')) {
  257.         if(pp>p) {fwrite(p,1,pp-p,outf); p=pp;}
  258.         if(latex2html && strncasecmp(pp,"<br><hr>",8)==0 &&
  259.            *find_word_start(pp+8)==0) break;
  260.         if(strncasecmp(pp+1,"!-- MATH",8)==0) {
  261.             p2=strstr(pp+8,"-->"); if(p2==NULL) continue;
  262.             *p2=0; getmath(pp+9); *p2='-';
  263.             p=p2+3; pt=find_word_start(p);
  264.             if(mathbuf[0] && strncasecmp(pt,"<IMG",4)==0 && isspace(*(pt+4))) {
  265.                 p=find_tag_end(pt); pp=pt;
  266.                 fprintf(outf,"\\(%s\\)",mathbuf);
  267.             }
  268.             continue;
  269.         }
  270.         if(strncasecmp(pp+1,"a",1)==0 && isspace(*(pp+2))) {
  271.  
  272.            
  273.            
  274.             continue;
  275.         }
  276.         if(strncasecmp(pp+1,"img",3)==0 && isspace(*(pp+4))) {
  277.             p2=find_tag_end(pp);
  278.             if(p2-pp>=MAX_LINELEN-256) continue;
  279.             memmove(buf,pp+1,p2-pp-2); buf[p2-pp-2]=0;
  280.             pt=strstr(buf,"ALT=\""); if(pt==NULL) pt=strstr(buf,"alt=\"");
  281.             if(pt!=NULL) {
  282.                 pt+=strlen("ALT=\"");
  283.                 getmath(pt); if(mathbuf[0]) {
  284.                     fprintf(outf,"\\(%s\\)",mathbuf); p=p2;
  285.                 }
  286.             }
  287.         }
  288.     }
  289.     if(pp==NULL) fprintf(outf,"%s",p);
  290. }
  291.  
  292. int main(int argc, char *argv[])
  293. {
  294.     char *p, *pp;
  295.     char *mod;
  296.  
  297.     mod=getenv("w_module");
  298.     if(mod!=NULL && strncmp(mod,"adm/",4)!=0 && strcmp(mod,"home")!=0) return 1;
  299.     if(mod==NULL) p=argv[1]; else p=getenv("wims_exec_parm");
  300.     if(p==NULL || *p==0) return 1;
  301.     p=find_word_start(p); pp=find_word_end(p);
  302.     if(pp<=p || pp-p>sizeof(fn1)-1) return 1;
  303.     memmove(fn1,p,pp-p); fn1[pp-p]=0;
  304.     p=find_word_start(pp); pp=find_word_end(p);
  305.     if(pp<=p || pp-p>sizeof(fn2)-1) strcpy(fn2,fn1);
  306.     else {memmove(fn2,p,pp-p); fn2[pp-p]=0;}
  307.     prepare_file();
  308.     output();
  309.     fclose(outf);
  310.     return 0;
  311. }
  312.  
  313.