Subversion Repositories wimsdev

Rev

Rev 11124 | Go to most recent revision | Details | Compare with Previous | Last modification | View Log | RSS feed

Rev Author Line No. Line
10 reyssat 1
/*    Copyright (C) 1998-2003 XIAO, Gang of Universite de Nice - Sophia Antipolis
2
 *
3
 *  This program is free software; you can redistribute it and/or modify
4
 *  it under the terms of the GNU General Public License as published by
5
 *  the Free Software Foundation; either version 2 of the License, or
6
 *  (at your option) any later version.
7
 *
8
 *  This program is distributed in the hope that it will be useful,
9
 *  but WITHOUT ANY WARRANTY; without even the implied warranty of
10
 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
11
 *  GNU General Public License for more details.
12
 *
13
 *  You should have received a copy of the GNU General Public License
14
 *  along with this program; if not, write to the Free Software
15
 *  Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
16
 */
17
 
7676 bpr 18
/* Extract text from phtml file */
10 reyssat 19
 
20
/*************** Customization: change values hereafter ****************/
21
 
7676 bpr 22
/* limit of data buffers */
10 reyssat 23
#define buflim 1024*1024*16
24
 
25
/***************** Nothing should need change hereafter *****************/
26
 
27
#include "../Lib/libwims.h"
28
char filename[1024]="";
29
char *filebuf;
30
int filelen=0;
31
 
7676 bpr 32
/* get the file */
10 reyssat 33
void prepare_file(void)
34
{
35
    FILE *f;
36
    long int flen;
37
 
38
    filelen=0;
39
    f=fopen(filename,"r"); if(f==NULL) return;
40
    fseek(f,0,SEEK_END);flen=ftell(f); fseek(f,0,SEEK_SET);
41
    if(flen>buflim) return;
42
    filebuf=xmalloc(2*flen+1024);flen=fread(filebuf,1,flen,f);
43
    fclose(f);
44
    if(flen>0 && flen<buflim) filebuf[flen]=0; else flen=0;
45
    filelen=flen;
46
}
47
 
48
void processbuf(void)
49
{
50
    char *p;
51
    deaccent(filebuf);
52
    for(p=filebuf; *p; p++) *p=tolower(*p);
53
    for(p=strpbrk(filebuf,"'-"); p!=NULL; p=strpbrk(p+1,"'-")) *p=' ';
54
    for(p=strstr(filebuf,"&nbsp;"); p!=NULL; p=strstr(p+1,"&nbsp;")) {
7676 bpr 55
      *p=' '; ovlstrcpy(p+1,p+6);
10 reyssat 56
    }
57
}
58
 
59
void output(void)
60
{
61
    char *p, *pp, lastc;
62
    p=find_word_start(filebuf); lastc=0;
63
    if(*p=='!' || *p==':') goto cont1;
64
    for(;*p;p++) {
7676 bpr 65
      if(*p=='\n') {
66
          if(!isspace(lastc)) {printf(" "); lastc=' ';}
67
          cont2: p=find_word_start(p);
68
          if(*p=='!' || *p==':') {
11124 georgesk 69
            if(lastc!=' ') printf(". ");
11132 bpr 70
            lastc='     ';
7676 bpr 71
            cont1: p=strchr(p,'\n');
72
            if(p==NULL) return;
73
            if(*(p-1)=='\\') {p++; goto cont1;}
74
            goto cont2;
75
          }
76
          for(pp=p; isalnum(*pp) || *pp=='_' || *pp=='$'; pp++);
77
          pp=find_word_start(pp);
78
          if(*pp=='=') goto cont1;
79
      }
80
      if(*p=='\\' && *(p+1)=='\n') {
81
          printf("\n"); p++; continue;
82
      }
83
      if(*p=='<' && (isalpha(*(p+1)) || *(p+1)=='/')) {
84
          p=strchr(p,'>'); goto nextp;
85
      }
86
      if(*p=='$') {
87
          if(lastc != ' ') {
88
            if(!isspace(lastc)) printf(" ");
89
            printf(". "); lastc='       ';
90
          }
91
          p++;
92
          if(*p=='(') {p=find_matching(p+1,')'); goto nextp;}
93
          if(*p=='[') {p=find_matching(p+1,']'); goto nextp;}
94
          while(isalnum(*p) || *p=='_') p++;
95
          p--; continue;
96
      }
97
      if(*p=='&') {
98
          char *p2;
99
          for(p2=p+1; isalnum(*p2) || *p2=='#'; p2++);
100
          if(*p2==';') {
101
            p++; if(isalpha(*p)) {printf("%c",*p); lastc=*p;}
102
            p=p2; continue;
103
          }
104
      }
105
      if(!isspace(*p) && strchr(":!?.;,\"()[]{}=/\\+*^%@~`<>|",*p)==NULL)
106
          {printf("%c",*p); lastc=*p;}
107
      else {
108
          if(isspace(*p) && !isspace(lastc)) {
109
            printf(" "); lastc=' ';
110
          }
111
          if(!isspace(*p)) {
112
            switch(lastc) {
113
                case ' ': printf(". "); lastc=' '; break;
114
                case '  ': break;
115
                default: printf(" . "); lastc=' '; break;
116
            }
117
          }
118
      }
119
      nextp: if(p==NULL || *p==0) break;
10 reyssat 120
    }
121
}
122
 
123
int main(int argc, char *argv[])
124
{
125
    if(argc<=1) return 0;
126
    snprintf(filename,sizeof(filename)-128,"%s",argv[1]);
127
    prepare_file();
128
    processbuf();
129
    output();
130
    return 0;
131
}
132