Subversion Repositories wimsdev

Rev

Rev 11124 | Go to most recent revision | Blame | Compare with Previous | Last modification | View Log | RSS feed

  1. /*    Copyright (C) 1998-2003 XIAO, Gang of Universite de Nice - Sophia Antipolis
  2.  *
  3.  *  This program is free software; you can redistribute it and/or modify
  4.  *  it under the terms of the GNU General Public License as published by
  5.  *  the Free Software Foundation; either version 2 of the License, or
  6.  *  (at your option) any later version.
  7.  *
  8.  *  This program is distributed in the hope that it will be useful,
  9.  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
  10.  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  11.  *  GNU General Public License for more details.
  12.  *
  13.  *  You should have received a copy of the GNU General Public License
  14.  *  along with this program; if not, write to the Free Software
  15.  *  Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  16.  */
  17.  
  18. /* Extract text from phtml file */
  19.  
  20. /*************** Customization: change values hereafter ****************/
  21.  
  22. /* limit of data buffers */
  23. #define buflim 1024*1024*16
  24.  
  25. /***************** Nothing should need change hereafter *****************/
  26.  
  27. #include "../Lib/libwims.h"
  28. char filename[1024]="";
  29. char *filebuf;
  30. int filelen=0;
  31.  
  32. /* get the file */
  33. void prepare_file(void)
  34. {
  35.     FILE *f;
  36.     long int flen;
  37.  
  38.     filelen=0;
  39.     f=fopen(filename,"r"); if(f==NULL) return;
  40.     fseek(f,0,SEEK_END);flen=ftell(f); fseek(f,0,SEEK_SET);
  41.     if(flen>buflim) return;
  42.     filebuf=xmalloc(2*flen+1024);flen=fread(filebuf,1,flen,f);
  43.     fclose(f);
  44.     if(flen>0 && flen<buflim) filebuf[flen]=0; else flen=0;
  45.     filelen=flen;
  46. }
  47.  
  48. void processbuf(void)
  49. {
  50.     char *p;
  51.     deaccent(filebuf);
  52.     for(p=filebuf; *p; p++) *p=tolower(*p);
  53.     for(p=strpbrk(filebuf,"'-"); p!=NULL; p=strpbrk(p+1,"'-")) *p=' ';
  54.     for(p=strstr(filebuf,"&nbsp;"); p!=NULL; p=strstr(p+1,"&nbsp;")) {
  55.       *p=' '; ovlstrcpy(p+1,p+6);
  56.     }
  57. }
  58.  
  59. void output(void)
  60. {
  61.     char *p, *pp, lastc;
  62.     p=find_word_start(filebuf); lastc=0;
  63.     if(*p=='!' || *p==':') goto cont1;
  64.     for(;*p;p++) {
  65.       if(*p=='\n') {
  66.           if(!isspace(lastc)) {printf(" "); lastc=' ';}
  67.           cont2: p=find_word_start(p);
  68.           if(*p=='!' || *p==':') {
  69.             if(lastc!=' ') printf(". ");
  70.             lastc='     ';
  71.             cont1: p=strchr(p,'\n');
  72.             if(p==NULL) return;
  73.             if(*(p-1)=='\\') {p++; goto cont1;}
  74.             goto cont2;
  75.           }
  76.           for(pp=p; isalnum(*pp) || *pp=='_' || *pp=='$'; pp++);
  77.           pp=find_word_start(pp);
  78.           if(*pp=='=') goto cont1;
  79.       }
  80.       if(*p=='\\' && *(p+1)=='\n') {
  81.           printf("\n"); p++; continue;
  82.       }
  83.       if(*p=='<' && (isalpha(*(p+1)) || *(p+1)=='/')) {
  84.           p=strchr(p,'>'); goto nextp;
  85.       }
  86.       if(*p=='$') {
  87.           if(lastc != ' ') {
  88.             if(!isspace(lastc)) printf(" ");
  89.             printf(". "); lastc='       ';
  90.           }
  91.           p++;
  92.           if(*p=='(') {p=find_matching(p+1,')'); goto nextp;}
  93.           if(*p=='[') {p=find_matching(p+1,']'); goto nextp;}
  94.           while(isalnum(*p) || *p=='_') p++;
  95.           p--; continue;
  96.       }
  97.       if(*p=='&') {
  98.           char *p2;
  99.           for(p2=p+1; isalnum(*p2) || *p2=='#'; p2++);
  100.           if(*p2==';') {
  101.             p++; if(isalpha(*p)) {printf("%c",*p); lastc=*p;}
  102.             p=p2; continue;
  103.           }
  104.       }
  105.       if(!isspace(*p) && strchr(":!?.;,\"()[]{}=/\\+*^%@~`<>|",*p)==NULL)
  106.           {printf("%c",*p); lastc=*p;}
  107.       else {
  108.           if(isspace(*p) && !isspace(lastc)) {
  109.             printf(" "); lastc=' ';
  110.           }
  111.           if(!isspace(*p)) {
  112.             switch(lastc) {
  113.                 case ' ': printf(". "); lastc=' '; break;
  114.                 case '  ': break;
  115.                 default: printf(" . "); lastc=' '; break;
  116.             }
  117.           }
  118.       }
  119.       nextp: if(p==NULL || *p==0) break;
  120.     }
  121. }
  122.  
  123. int main(int argc, char *argv[])
  124. {
  125.     if(argc<=1) return 0;
  126.     snprintf(filename,sizeof(filename)-128,"%s",argv[1]);
  127.     prepare_file();
  128.     processbuf();
  129.     output();
  130.     return 0;
  131. }
  132.  
  133.