Subversion Repositories wimsdev

Rev

Rev 3718 | Go to most recent revision | Blame | Compare with Previous | Last modification | View Log | RSS feed

  1. /*    Copyright (C) 1998-2003 XIAO, Gang of Universite de Nice - Sophia Antipolis
  2.  *
  3.  *  This program is free software; you can redistribute it and/or modify
  4.  *  it under the terms of the GNU General Public License as published by
  5.  *  the Free Software Foundation; either version 2 of the License, or
  6.  *  (at your option) any later version.
  7.  *
  8.  *  This program is distributed in the hope that it will be useful,
  9.  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
  10.  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  11.  *  GNU General Public License for more details.
  12.  *
  13.  *  You should have received a copy of the GNU General Public License
  14.  *  along with this program; if not, write to the Free Software
  15.  *  Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  16.  */
  17.  
  18.         /* Extract text from phtml file */
  19.  
  20. /*************** Customization: change values hereafter ****************/
  21.  
  22.         /* limit of data buffers */
  23. #define buflim 1024*1024*16
  24.  
  25. /***************** Nothing should need change hereafter *****************/
  26.  
  27. #include "../wims.h"
  28. #include "../Lib/libwims.h"
  29. char filename[1024]="";
  30. char *filebuf;
  31. int filelen=0;
  32.  
  33.         /* get the file */
  34. void prepare_file(void)
  35. {
  36.     FILE *f;
  37.     long int flen;
  38.  
  39.     filelen=0;
  40.     f=fopen(filename,"r"); if(f==NULL) return;
  41.     fseek(f,0,SEEK_END);flen=ftell(f); fseek(f,0,SEEK_SET);
  42.     if(flen>buflim) return;
  43.     filebuf=xmalloc(2*flen+1024);flen=fread(filebuf,1,flen,f);
  44.     fclose(f);
  45.     if(flen>0 && flen<buflim) filebuf[flen]=0; else flen=0;
  46.     filelen=flen;
  47. }
  48.  
  49. void processbuf(void)
  50. {
  51.     char *p;
  52.     deaccent(filebuf);
  53.     for(p=filebuf; *p; p++) *p=tolower(*p);
  54.     for(p=strpbrk(filebuf,"'-"); p!=NULL; p=strpbrk(p+1,"'-")) *p=' ';
  55.     for(p=strstr(filebuf,"&nbsp;"); p!=NULL; p=strstr(p+1,"&nbsp;")) {
  56.         *p=' '; strcpy(p+1,p+6);
  57.     }
  58. }
  59.  
  60. void output(void)
  61. {
  62.     char *p, *pp, lastc;
  63.     p=find_word_start(filebuf); lastc=0;
  64.     if(*p=='!' || *p==':') goto cont1;
  65.     for(;*p;p++) {
  66.         if(*p=='\n') {
  67.             if(!isspace(lastc)) {printf(" "); lastc=' ';}
  68.             cont2: p=find_word_start(p);
  69.             if(*p=='!' || *p==':') {
  70.                 if(lastc!='     ') printf(". "); lastc='        ';
  71.                 cont1: p=strchr(p,'\n');
  72.                 if(p==NULL) return;
  73.                 if(*(p-1)=='\\') {p++; goto cont1;}
  74.                 goto cont2;
  75.             }
  76.             for(pp=p; isalnum(*pp) || *pp=='_' || *pp=='$'; pp++);
  77.             pp=find_word_start(pp);
  78.             if(*pp=='=') goto cont1;
  79.         }
  80.         if(*p=='\\' && *(p+1)=='\n') {
  81.             printf("\n"); p++; continue;
  82.         }
  83.         if(*p=='<' && (isalpha(*(p+1)) || *(p+1)=='/')) {
  84.             p=strchr(p,'>'); goto nextp;
  85.         }
  86.         if(*p=='$') {
  87.             if(lastc != '       ') {
  88.                 if(!isspace(lastc)) printf(" ");
  89.                 printf(". "); lastc='   ';
  90.             }
  91.             p++;
  92.             if(*p=='(') {p=find_matching(p+1,')'); goto nextp;}
  93.             if(*p=='[') {p=find_matching(p+1,']'); goto nextp;}
  94.             while(isalnum(*p) || *p=='_') p++;
  95.             p--; continue;
  96.         }
  97.         if(*p=='&') {
  98.             char *p2;
  99.             for(p2=p+1; isalnum(*p2) || *p2=='#'; p2++);
  100.             if(*p2==';') {
  101.                 p++; if(isalpha(*p)) {printf("%c",*p); lastc=*p;}
  102.                 p=p2; continue;
  103.             }
  104.         }
  105.         if(!isspace(*p) && strchr(":!?.;,\"()[]{}=/\\+*^%@~`<>|",*p)==NULL)
  106.             {printf("%c",*p); lastc=*p;}
  107.         else {
  108.             if(isspace(*p) && !isspace(lastc)) {
  109.                 printf(" "); lastc=' ';
  110.             }
  111.             if(!isspace(*p)) {
  112.                 switch(lastc) {
  113.                     case ' ': printf(". "); lastc='     '; break;
  114.                     case '      ': break;
  115.                     default: printf(" . "); lastc='     '; break;
  116.                 }
  117.             }
  118.         }
  119.         nextp: if(p==NULL || *p==0) break;
  120.     }
  121. }
  122.  
  123. int main(int argc, char *argv[])
  124. {
  125.     if(argc<=1) return 0;
  126.     snprintf(filename,sizeof(filename)-128,"%s",argv[1]);
  127.     prepare_file();
  128.     processbuf();
  129.     output();
  130.     return 0;
  131. }
  132.  
  133.