Subversion Repositories wimsdev

Rev

Rev 7676 | Rev 11124 | Go to most recent revision | Details | Compare with Previous | Last modification | View Log | RSS feed

Rev Author Line No. Line
10 reyssat 1
/*    Copyright (C) 1998-2003 XIAO, Gang of Universite de Nice - Sophia Antipolis
2
 *
3
 *  This program is free software; you can redistribute it and/or modify
4
 *  it under the terms of the GNU General Public License as published by
5
 *  the Free Software Foundation; either version 2 of the License, or
6
 *  (at your option) any later version.
7
 *
8
 *  This program is distributed in the hope that it will be useful,
9
 *  but WITHOUT ANY WARRANTY; without even the implied warranty of
10
 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
11
 *  GNU General Public License for more details.
12
 *
13
 *  You should have received a copy of the GNU General Public License
14
 *  along with this program; if not, write to the Free Software
15
 *  Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
16
 */
17
 
7676 bpr 18
/* Extract text from phtml file */
10 reyssat 19
 
20
/*************** Customization: change values hereafter ****************/
21
 
7676 bpr 22
/* limit of data buffers */
10 reyssat 23
#define buflim 1024*1024*16
24
 
25
/***************** Nothing should need change hereafter *****************/
26
 
27
#include "../Lib/libwims.h"
28
char filename[1024]="";
29
char *filebuf;
30
int filelen=0;
31
 
7676 bpr 32
/* get the file */
10 reyssat 33
void prepare_file(void)
34
{
35
    FILE *f;
36
    long int flen;
37
 
38
    filelen=0;
39
    f=fopen(filename,"r"); if(f==NULL) return;
40
    fseek(f,0,SEEK_END);flen=ftell(f); fseek(f,0,SEEK_SET);
41
    if(flen>buflim) return;
42
    filebuf=xmalloc(2*flen+1024);flen=fread(filebuf,1,flen,f);
43
    fclose(f);
44
    if(flen>0 && flen<buflim) filebuf[flen]=0; else flen=0;
45
    filelen=flen;
46
}
47
 
48
void processbuf(void)
49
{
50
    char *p;
51
    deaccent(filebuf);
52
    for(p=filebuf; *p; p++) *p=tolower(*p);
53
    for(p=strpbrk(filebuf,"'-"); p!=NULL; p=strpbrk(p+1,"'-")) *p=' ';
54
    for(p=strstr(filebuf,"&nbsp;"); p!=NULL; p=strstr(p+1,"&nbsp;")) {
7676 bpr 55
      *p=' '; ovlstrcpy(p+1,p+6);
10 reyssat 56
    }
57
}
58
 
59
void output(void)
60
{
61
    char *p, *pp, lastc;
62
    p=find_word_start(filebuf); lastc=0;
63
    if(*p=='!' || *p==':') goto cont1;
64
    for(;*p;p++) {
7676 bpr 65
      if(*p=='\n') {
66
          if(!isspace(lastc)) {printf(" "); lastc=' ';}
67
          cont2: p=find_word_start(p);
68
          if(*p=='!' || *p==':') {
69
            if(lastc!=' ') printf(". "); lastc='        ';
70
            cont1: p=strchr(p,'\n');
71
            if(p==NULL) return;
72
            if(*(p-1)=='\\') {p++; goto cont1;}
73
            goto cont2;
74
          }
75
          for(pp=p; isalnum(*pp) || *pp=='_' || *pp=='$'; pp++);
76
          pp=find_word_start(pp);
77
          if(*pp=='=') goto cont1;
78
      }
79
      if(*p=='\\' && *(p+1)=='\n') {
80
          printf("\n"); p++; continue;
81
      }
82
      if(*p=='<' && (isalpha(*(p+1)) || *(p+1)=='/')) {
83
          p=strchr(p,'>'); goto nextp;
84
      }
85
      if(*p=='$') {
86
          if(lastc != ' ') {
87
            if(!isspace(lastc)) printf(" ");
88
            printf(". "); lastc='       ';
89
          }
90
          p++;
91
          if(*p=='(') {p=find_matching(p+1,')'); goto nextp;}
92
          if(*p=='[') {p=find_matching(p+1,']'); goto nextp;}
93
          while(isalnum(*p) || *p=='_') p++;
94
          p--; continue;
95
      }
96
      if(*p=='&') {
97
          char *p2;
98
          for(p2=p+1; isalnum(*p2) || *p2=='#'; p2++);
99
          if(*p2==';') {
100
            p++; if(isalpha(*p)) {printf("%c",*p); lastc=*p;}
101
            p=p2; continue;
102
          }
103
      }
104
      if(!isspace(*p) && strchr(":!?.;,\"()[]{}=/\\+*^%@~`<>|",*p)==NULL)
105
          {printf("%c",*p); lastc=*p;}
106
      else {
107
          if(isspace(*p) && !isspace(lastc)) {
108
            printf(" "); lastc=' ';
109
          }
110
          if(!isspace(*p)) {
111
            switch(lastc) {
112
                case ' ': printf(". "); lastc=' '; break;
113
                case '  ': break;
114
                default: printf(" . "); lastc=' '; break;
115
            }
116
          }
117
      }
118
      nextp: if(p==NULL || *p==0) break;
10 reyssat 119
    }
120
}
121
 
122
int main(int argc, char *argv[])
123
{
124
    if(argc<=1) return 0;
125
    snprintf(filename,sizeof(filename)-128,"%s",argv[1]);
126
    prepare_file();
127
    processbuf();
128
    output();
129
    return 0;
130
}
131