Subversion Repositories wimsdev

Rev

Rev 10 | Rev 7676 | Go to most recent revision | Details | Compare with Previous | Last modification | View Log | RSS feed

Rev Author Line No. Line
10 reyssat 1
/*    Copyright (C) 1998-2003 XIAO, Gang of Universite de Nice - Sophia Antipolis
2
 *
3
 *  This program is free software; you can redistribute it and/or modify
4
 *  it under the terms of the GNU General Public License as published by
5
 *  the Free Software Foundation; either version 2 of the License, or
6
 *  (at your option) any later version.
7
 *
8
 *  This program is distributed in the hope that it will be useful,
9
 *  but WITHOUT ANY WARRANTY; without even the implied warranty of
10
 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
11
 *  GNU General Public License for more details.
12
 *
13
 *  You should have received a copy of the GNU General Public License
14
 *  along with this program; if not, write to the Free Software
15
 *  Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
16
 */
17
 
18
        /* Extract text from phtml file */
19
 
20
/*************** Customization: change values hereafter ****************/
21
 
22
        /* limit of data buffers */
23
#define buflim 1024*1024*16
24
 
25
/***************** Nothing should need change hereafter *****************/
26
 
27
#include "../wims.h"
28
#include "../Lib/libwims.h"
29
char filename[1024]="";
30
char *filebuf;
31
int filelen=0;
32
 
33
        /* get the file */
34
void prepare_file(void)
35
{
36
    FILE *f;
37
    long int flen;
38
 
39
    filelen=0;
40
    f=fopen(filename,"r"); if(f==NULL) return;
41
    fseek(f,0,SEEK_END);flen=ftell(f); fseek(f,0,SEEK_SET);
42
    if(flen>buflim) return;
43
    filebuf=xmalloc(2*flen+1024);flen=fread(filebuf,1,flen,f);
44
    fclose(f);
45
    if(flen>0 && flen<buflim) filebuf[flen]=0; else flen=0;
46
    filelen=flen;
47
}
48
 
49
void processbuf(void)
50
{
51
    char *p;
52
    deaccent(filebuf);
53
    for(p=filebuf; *p; p++) *p=tolower(*p);
54
    for(p=strpbrk(filebuf,"'-"); p!=NULL; p=strpbrk(p+1,"'-")) *p=' ';
55
    for(p=strstr(filebuf,"&nbsp;"); p!=NULL; p=strstr(p+1,"&nbsp;")) {
3718 reyssat 56
        *p=' '; ovlstrcpy(p+1,p+6);
10 reyssat 57
    }
58
}
59
 
60
void output(void)
61
{
62
    char *p, *pp, lastc;
63
    p=find_word_start(filebuf); lastc=0;
64
    if(*p=='!' || *p==':') goto cont1;
65
    for(;*p;p++) {
66
        if(*p=='\n') {
67
            if(!isspace(lastc)) {printf(" "); lastc=' ';}
68
            cont2: p=find_word_start(p);
69
            if(*p=='!' || *p==':') {
70
                if(lastc!='     ') printf(". "); lastc='        ';
71
                cont1: p=strchr(p,'\n');
72
                if(p==NULL) return;
73
                if(*(p-1)=='\\') {p++; goto cont1;}
74
                goto cont2;
75
            }
76
            for(pp=p; isalnum(*pp) || *pp=='_' || *pp=='$'; pp++);
77
            pp=find_word_start(pp);
78
            if(*pp=='=') goto cont1;
79
        }
80
        if(*p=='\\' && *(p+1)=='\n') {
81
            printf("\n"); p++; continue;
82
        }
83
        if(*p=='<' && (isalpha(*(p+1)) || *(p+1)=='/')) {
84
            p=strchr(p,'>'); goto nextp;
85
        }
86
        if(*p=='$') {
87
            if(lastc != '       ') {
88
                if(!isspace(lastc)) printf(" ");
89
                printf(". "); lastc='   ';
90
            }
91
            p++;
92
            if(*p=='(') {p=find_matching(p+1,')'); goto nextp;}
93
            if(*p=='[') {p=find_matching(p+1,']'); goto nextp;}
94
            while(isalnum(*p) || *p=='_') p++;
95
            p--; continue;
96
        }
97
        if(*p=='&') {
98
            char *p2;
99
            for(p2=p+1; isalnum(*p2) || *p2=='#'; p2++);
100
            if(*p2==';') {
101
                p++; if(isalpha(*p)) {printf("%c",*p); lastc=*p;}
102
                p=p2; continue;
103
            }
104
        }
105
        if(!isspace(*p) && strchr(":!?.;,\"()[]{}=/\\+*^%@~`<>|",*p)==NULL)
106
            {printf("%c",*p); lastc=*p;}
107
        else {
108
            if(isspace(*p) && !isspace(lastc)) {
109
                printf(" "); lastc=' ';
110
            }
111
            if(!isspace(*p)) {
112
                switch(lastc) {
113
                    case ' ': printf(". "); lastc='     '; break;
114
                    case '      ': break;
115
                    default: printf(" . "); lastc='     '; break;
116
                }
117
            }
118
        }
119
        nextp: if(p==NULL || *p==0) break;
120
    }
121
}
122
 
123
int main(int argc, char *argv[])
124
{
125
    if(argc<=1) return 0;
126
    snprintf(filename,sizeof(filename)-128,"%s",argv[1]);
127
    prepare_file();
128
    processbuf();
129
    output();
130
    return 0;
131
}
132