Rev 3718 | Rev 8185 | Go to most recent revision | Show entire file | Ignore whitespace | Details | Blame | Last modification | View Log | RSS feed
Rev 3718 | Rev 7676 | ||
---|---|---|---|
Line 13... | Line 13... | ||
13 | * You should have received a copy of the GNU General Public License |
13 | * You should have received a copy of the GNU General Public License |
14 | * along with this program; if not, write to the Free Software |
14 | * along with this program; if not, write to the Free Software |
15 | * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. |
15 | * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. |
16 | */ |
16 | */ |
17 | 17 | ||
18 |
|
18 | /* Extract text from phtml file */ |
19 | 19 | ||
20 | /*************** Customization: change values hereafter ****************/ |
20 | /*************** Customization: change values hereafter ****************/ |
21 | 21 | ||
22 |
|
22 | /* limit of data buffers */ |
23 | #define buflim 1024*1024*16 |
23 | #define buflim 1024*1024*16 |
24 | 24 | ||
25 | /***************** Nothing should need change hereafter *****************/ |
25 | /***************** Nothing should need change hereafter *****************/ |
26 | 26 | ||
27 | #include "../wims.h" |
27 | #include "../wims.h" |
28 | #include "../Lib/libwims.h" |
28 | #include "../Lib/libwims.h" |
29 | char filename[1024]=""; |
29 | char filename[1024]=""; |
30 | char *filebuf; |
30 | char *filebuf; |
31 | int filelen=0; |
31 | int filelen=0; |
32 | 32 | ||
33 |
|
33 | /* get the file */ |
34 | void prepare_file(void) |
34 | void prepare_file(void) |
35 | { |
35 | { |
36 | FILE *f; |
36 | FILE *f; |
37 | long int flen; |
37 | long int flen; |
38 | 38 | ||
Line 51... | Line 51... | ||
51 | char *p; |
51 | char *p; |
52 | deaccent(filebuf); |
52 | deaccent(filebuf); |
53 | for(p=filebuf; *p; p++) *p=tolower(*p); |
53 | for(p=filebuf; *p; p++) *p=tolower(*p); |
54 | for(p=strpbrk(filebuf,"'-"); p!=NULL; p=strpbrk(p+1,"'-")) *p=' '; |
54 | for(p=strpbrk(filebuf,"'-"); p!=NULL; p=strpbrk(p+1,"'-")) *p=' '; |
55 | for(p=strstr(filebuf," "); p!=NULL; p=strstr(p+1," ")) { |
55 | for(p=strstr(filebuf," "); p!=NULL; p=strstr(p+1," ")) { |
56 |
|
56 | *p=' '; ovlstrcpy(p+1,p+6); |
57 | } |
57 | } |
58 | } |
58 | } |
59 | 59 | ||
60 | void output(void) |
60 | void output(void) |
61 | { |
61 | { |
62 | char *p, *pp, lastc; |
62 | char *p, *pp, lastc; |
63 | p=find_word_start(filebuf); lastc=0; |
63 | p=find_word_start(filebuf); lastc=0; |
64 | if(*p=='!' || *p==':') goto cont1; |
64 | if(*p=='!' || *p==':') goto cont1; |
65 | for(;*p;p++) { |
65 | for(;*p;p++) { |
66 |
|
66 | if(*p=='\n') { |
67 |
|
67 | if(!isspace(lastc)) {printf(" "); lastc=' ';} |
68 |
|
68 | cont2: p=find_word_start(p); |
69 |
|
69 | if(*p=='!' || *p==':') { |
70 |
|
70 | if(lastc!=' ') printf(". "); lastc=' '; |
71 |
|
71 | cont1: p=strchr(p,'\n'); |
72 |
|
72 | if(p==NULL) return; |
73 |
|
73 | if(*(p-1)=='\\') {p++; goto cont1;} |
74 |
|
74 | goto cont2; |
75 |
|
75 | } |
76 |
|
76 | for(pp=p; isalnum(*pp) || *pp=='_' || *pp=='$'; pp++); |
77 |
|
77 | pp=find_word_start(pp); |
78 |
|
78 | if(*pp=='=') goto cont1; |
79 |
|
79 | } |
80 |
|
80 | if(*p=='\\' && *(p+1)=='\n') { |
81 |
|
81 | printf("\n"); p++; continue; |
82 |
|
82 | } |
83 |
|
83 | if(*p=='<' && (isalpha(*(p+1)) || *(p+1)=='/')) { |
84 |
|
84 | p=strchr(p,'>'); goto nextp; |
85 |
|
85 | } |
86 |
|
86 | if(*p=='$') { |
87 |
|
87 | if(lastc != ' ') { |
88 |
|
88 | if(!isspace(lastc)) printf(" "); |
89 |
|
89 | printf(". "); lastc=' '; |
90 |
|
90 | } |
91 |
|
91 | p++; |
92 |
|
92 | if(*p=='(') {p=find_matching(p+1,')'); goto nextp;} |
93 |
|
93 | if(*p=='[') {p=find_matching(p+1,']'); goto nextp;} |
94 |
|
94 | while(isalnum(*p) || *p=='_') p++; |
95 |
|
95 | p--; continue; |
96 |
|
96 | } |
97 |
|
97 | if(*p=='&') { |
98 |
|
98 | char *p2; |
99 |
|
99 | for(p2=p+1; isalnum(*p2) || *p2=='#'; p2++); |
100 |
|
100 | if(*p2==';') { |
101 |
|
101 | p++; if(isalpha(*p)) {printf("%c",*p); lastc=*p;} |
102 |
|
102 | p=p2; continue; |
103 |
|
103 | } |
104 |
|
104 | } |
105 |
|
105 | if(!isspace(*p) && strchr(":!?.;,\"()[]{}=/\\+*^%@~`<>|",*p)==NULL) |
106 |
|
106 | {printf("%c",*p); lastc=*p;} |
107 |
|
107 | else { |
108 |
|
108 | if(isspace(*p) && !isspace(lastc)) { |
109 |
|
109 | printf(" "); lastc=' '; |
110 |
|
110 | } |
111 |
|
111 | if(!isspace(*p)) { |
112 |
|
112 | switch(lastc) { |
113 |
|
113 | case ' ': printf(". "); lastc=' '; break; |
114 |
|
114 | case ' ': break; |
115 |
|
115 | default: printf(" . "); lastc=' '; break; |
116 |
|
116 | } |
117 |
|
117 | } |
118 |
|
118 | } |
119 |
|
119 | nextp: if(p==NULL || *p==0) break; |
120 | } |
120 | } |
121 | } |
121 | } |
122 | 122 | ||
123 | int main(int argc, char *argv[]) |
123 | int main(int argc, char *argv[]) |
124 | { |
124 | { |