Subversion Repositories wimsdev

Rev

Rev 11132 | Blame | Compare with Previous | Last modification | View Log | RSS feed

  1. /*    Copyright (C) 1998-2003 XIAO, Gang of Universite de Nice - Sophia Antipolis
  2.  *
  3.  *  This program is free software; you can redistribute it and/or modify
  4.  *  it under the terms of the GNU General Public License as published by
  5.  *  the Free Software Foundation; either version 2 of the License, or
  6.  *  (at your option) any later version.
  7.  *
  8.  *  This program is distributed in the hope that it will be useful,
  9.  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
  10.  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  11.  *  GNU General Public License for more details.
  12.  *
  13.  *  You should have received a copy of the GNU General Public License
  14.  *  along with this program; if not, write to the Free Software
  15.  *  Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  16.  */
  17.  
  18. /* Extract text from phtml file */
  19.  
  20. /*************** Customization: change values hereafter ****************/
  21.  
  22. /* limit of data buffers */
  23. #define buflim 1024*1024*16
  24.  
  25. /***************** Nothing should need change hereafter *****************/
  26.  
  27. #include "../Lib/libwims.h"
  28. char filename[1024]="";
  29. char *filebuf;
  30. int filelen=0;
  31.  
  32. /* get the file */
  33. void prepare_file(void)
  34. {
  35.   FILE *f;
  36.   long int flen;
  37.  
  38.   filelen=0;
  39.   f=fopen(filename,"r"); if(f==NULL) return;
  40.   fseek(f,0,SEEK_END);flen=ftell(f); fseek(f,0,SEEK_SET);
  41.   if(flen>buflim) return;
  42.   filebuf=xmalloc(2*flen+1024);flen=fread(filebuf,1,flen,f);
  43.   fclose(f);
  44.   if(flen>0 && flen<buflim) filebuf[flen]=0; else flen=0;
  45.   filelen=flen;
  46. }
  47.  
  48. void processbuf(void)
  49. {
  50.   char *p;
  51.   deaccent(filebuf);
  52.   for(p=filebuf; *p; p++) *p=tolower(*p);
  53.   for(p=strpbrk(filebuf,"'-"); p!=NULL; p=strpbrk(p+1,"'-")) *p=' ';
  54.   for(p=strstr(filebuf,"&nbsp;"); p!=NULL; p=strstr(p+1,"&nbsp;")) {
  55.     *p=' '; ovlstrcpy(p+1,p+6);
  56.   }
  57. }
  58.  
  59. void output(void)
  60. {
  61.   char *p, *pp, lastc;
  62.   p=find_word_start(filebuf); lastc=0;
  63.   if(*p=='!' || *p==':') goto cont1;
  64.   for(;*p;p++) {
  65.     if(*p=='\n') {
  66.       if(!isspace(lastc)) {printf(" "); lastc=' ';}
  67.       cont2: p=find_word_start(p);
  68.       if(*p=='!' || *p==':') {
  69.         if(lastc!='     ') printf(". ");
  70.         lastc=' ';
  71.         cont1: p=strchr(p,'\n');
  72.         if(p==NULL) return;
  73.         if(*(p-1)=='\\') {p++; goto cont1;}
  74.         goto cont2;
  75.       }
  76.       for(pp=p; isalnum(*pp) || *pp=='_' || *pp=='$'; pp++);
  77.       pp=find_word_start(pp);
  78.       if(*pp=='=') goto cont1;
  79.     }
  80.     if(*p=='\\' && *(p+1)=='\n') {
  81.       printf("\n"); p++; continue;
  82.     }
  83.     if(*p=='<' && (isalpha(*(p+1)) || *(p+1)=='/')) {
  84.         p=strchr(p,'>'); goto nextp;
  85.     }
  86.     if(*p=='$') {
  87.       if(lastc != '     ') {
  88.         if(!isspace(lastc)) printf(" ");
  89.         printf(". "); lastc='   ';
  90.       }
  91.       p++;
  92.       if(*p=='(') {p=find_matching(p+1,')'); goto nextp;}
  93.       if(*p=='[') {p=find_matching(p+1,']'); goto nextp;}
  94.       while(isalnum(*p) || *p=='_') p++;
  95.       p--; continue;
  96.     }
  97.     if(*p=='&') {
  98.       char *p2;
  99.       for(p2=p+1; isalnum(*p2) || *p2=='#'; p2++);
  100.       if(*p2==';') {
  101.         p++; if(isalpha(*p)) {printf("%c",*p); lastc=*p;}
  102.         p=p2; continue;
  103.       }
  104.     }
  105.     if(!isspace(*p) && strchr(":!?.;,\"()[]{}=/\\+*^%@~`<>|",*p)==NULL)
  106.       {printf("%c",*p); lastc=*p;}
  107.     else {
  108.       if(isspace(*p) && !isspace(lastc)) {
  109.         printf(" "); lastc=' ';
  110.       }
  111.       if(!isspace(*p)) {
  112.         switch(lastc) {
  113.           case ' ': printf(". "); lastc='       '; break;
  114.           case '        ': break;
  115.           default: printf(" . "); lastc='       '; break;
  116.         }
  117.       }
  118.     }
  119.     nextp: if(p==NULL || *p==0) break;
  120.   }
  121. }
  122.  
  123. int main(int argc, char *argv[])
  124. {
  125.   if(argc<=1) return 0;
  126.   snprintf(filename,sizeof(filename)-128,"%s",argv[1]);
  127.   prepare_file();
  128.   processbuf();
  129.   output();
  130.   return 0;
  131. }
  132.  
  133.