Subversion Repositories wimsdev

Rev

Rev 11132 | Details | Compare with Previous | Last modification | View Log | RSS feed

Rev Author Line No. Line
10 reyssat 1
/*    Copyright (C) 1998-2003 XIAO, Gang of Universite de Nice - Sophia Antipolis
2
 *
3
 *  This program is free software; you can redistribute it and/or modify
4
 *  it under the terms of the GNU General Public License as published by
5
 *  the Free Software Foundation; either version 2 of the License, or
6
 *  (at your option) any later version.
7
 *
8
 *  This program is distributed in the hope that it will be useful,
9
 *  but WITHOUT ANY WARRANTY; without even the implied warranty of
10
 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
11
 *  GNU General Public License for more details.
12
 *
13
 *  You should have received a copy of the GNU General Public License
14
 *  along with this program; if not, write to the Free Software
15
 *  Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
16
 */
17
 
7676 bpr 18
/* Extract text from phtml file */
10 reyssat 19
 
20
/*************** Customization: change values hereafter ****************/
21
 
7676 bpr 22
/* limit of data buffers */
10 reyssat 23
#define buflim 1024*1024*16
24
 
25
/***************** Nothing should need change hereafter *****************/
26
 
27
#include "../Lib/libwims.h"
28
char filename[1024]="";
29
char *filebuf;
30
int filelen=0;
31
 
7676 bpr 32
/* get the file */
10 reyssat 33
void prepare_file(void)
34
{
12248 bpr 35
  FILE *f;
36
  long int flen;
10 reyssat 37
 
12248 bpr 38
  filelen=0;
39
  f=fopen(filename,"r"); if(f==NULL) return;
40
  fseek(f,0,SEEK_END);flen=ftell(f); fseek(f,0,SEEK_SET);
41
  if(flen>buflim) return;
42
  filebuf=xmalloc(2*flen+1024);flen=fread(filebuf,1,flen,f);
43
  fclose(f);
44
  if(flen>0 && flen<buflim) filebuf[flen]=0; else flen=0;
45
  filelen=flen;
10 reyssat 46
}
47
 
48
void processbuf(void)
49
{
12248 bpr 50
  char *p;
51
  deaccent(filebuf);
52
  for(p=filebuf; *p; p++) *p=tolower(*p);
53
  for(p=strpbrk(filebuf,"'-"); p!=NULL; p=strpbrk(p+1,"'-")) *p=' ';
54
  for(p=strstr(filebuf,"&nbsp;"); p!=NULL; p=strstr(p+1,"&nbsp;")) {
55
    *p=' '; ovlstrcpy(p+1,p+6);
56
  }
10 reyssat 57
}
58
 
59
void output(void)
60
{
12248 bpr 61
  char *p, *pp, lastc;
62
  p=find_word_start(filebuf); lastc=0;
63
  if(*p=='!' || *p==':') goto cont1;
64
  for(;*p;p++) {
65
    if(*p=='\n') {
66
      if(!isspace(lastc)) {printf(" "); lastc=' ';}
67
      cont2: p=find_word_start(p);
68
      if(*p=='!' || *p==':') {
69
        if(lastc!='     ') printf(". ");
70
        lastc=' ';
71
        cont1: p=strchr(p,'\n');
72
        if(p==NULL) return;
73
        if(*(p-1)=='\\') {p++; goto cont1;}
74
        goto cont2;
7676 bpr 75
      }
12248 bpr 76
      for(pp=p; isalnum(*pp) || *pp=='_' || *pp=='$'; pp++);
77
      pp=find_word_start(pp);
78
      if(*pp=='=') goto cont1;
79
    }
80
    if(*p=='\\' && *(p+1)=='\n') {
81
      printf("\n"); p++; continue;
82
    }
83
    if(*p=='<' && (isalpha(*(p+1)) || *(p+1)=='/')) {
84
        p=strchr(p,'>'); goto nextp;
85
    }
86
    if(*p=='$') {
87
      if(lastc != '     ') {
88
        if(!isspace(lastc)) printf(" ");
89
        printf(". "); lastc='   ';
7676 bpr 90
      }
12248 bpr 91
      p++;
92
      if(*p=='(') {p=find_matching(p+1,')'); goto nextp;}
93
      if(*p=='[') {p=find_matching(p+1,']'); goto nextp;}
94
      while(isalnum(*p) || *p=='_') p++;
95
      p--; continue;
96
    }
97
    if(*p=='&') {
98
      char *p2;
99
      for(p2=p+1; isalnum(*p2) || *p2=='#'; p2++);
100
      if(*p2==';') {
101
        p++; if(isalpha(*p)) {printf("%c",*p); lastc=*p;}
102
        p=p2; continue;
7676 bpr 103
      }
12248 bpr 104
    }
105
    if(!isspace(*p) && strchr(":!?.;,\"()[]{}=/\\+*^%@~`<>|",*p)==NULL)
106
      {printf("%c",*p); lastc=*p;}
107
    else {
108
      if(isspace(*p) && !isspace(lastc)) {
109
        printf(" "); lastc=' ';
7676 bpr 110
      }
12248 bpr 111
      if(!isspace(*p)) {
112
        switch(lastc) {
113
          case ' ': printf(". "); lastc='       '; break;
114
          case '        ': break;
115
          default: printf(" . "); lastc='       '; break;
116
        }
7676 bpr 117
      }
10 reyssat 118
    }
12248 bpr 119
    nextp: if(p==NULL || *p==0) break;
120
  }
10 reyssat 121
}
122
 
123
int main(int argc, char *argv[])
124
{
12248 bpr 125
  if(argc<=1) return 0;
126
  snprintf(filename,sizeof(filename)-128,"%s",argv[1]);
127
  prepare_file();
128
  processbuf();
129
  output();
130
  return 0;
10 reyssat 131
}
132