Rev 11132 | Show entire file | Ignore whitespace | Details | Blame | Last modification | View Log | RSS feed
Rev 11132 | Rev 12248 | ||
---|---|---|---|
Line 30... | Line 30... | ||
30 | int filelen=0; |
30 | int filelen=0; |
31 | 31 | ||
32 | /* get the file */ |
32 | /* get the file */ |
33 | void prepare_file(void) |
33 | void prepare_file(void) |
34 | { |
34 | { |
35 |
|
35 | FILE *f; |
36 |
|
36 | long int flen; |
37 | 37 | ||
38 |
|
38 | filelen=0; |
39 |
|
39 | f=fopen(filename,"r"); if(f==NULL) return; |
40 |
|
40 | fseek(f,0,SEEK_END);flen=ftell(f); fseek(f,0,SEEK_SET); |
41 |
|
41 | if(flen>buflim) return; |
42 |
|
42 | filebuf=xmalloc(2*flen+1024);flen=fread(filebuf,1,flen,f); |
43 |
|
43 | fclose(f); |
44 |
|
44 | if(flen>0 && flen<buflim) filebuf[flen]=0; else flen=0; |
45 |
|
45 | filelen=flen; |
46 | } |
46 | } |
47 | 47 | ||
48 | void processbuf(void) |
48 | void processbuf(void) |
49 | { |
49 | { |
50 |
|
50 | char *p; |
51 |
|
51 | deaccent(filebuf); |
52 |
|
52 | for(p=filebuf; *p; p++) *p=tolower(*p); |
53 |
|
53 | for(p=strpbrk(filebuf,"'-"); p!=NULL; p=strpbrk(p+1,"'-")) *p=' '; |
54 |
|
54 | for(p=strstr(filebuf," "); p!=NULL; p=strstr(p+1," ")) { |
55 |
|
55 | *p=' '; ovlstrcpy(p+1,p+6); |
56 |
|
56 | } |
57 | } |
57 | } |
58 | 58 | ||
59 | void output(void) |
59 | void output(void) |
60 | { |
60 | { |
61 |
|
61 | char *p, *pp, lastc; |
62 |
|
62 | p=find_word_start(filebuf); lastc=0; |
63 |
|
63 | if(*p=='!' || *p==':') goto cont1; |
64 |
|
64 | for(;*p;p++) { |
65 |
|
65 | if(*p=='\n') { |
66 |
|
66 | if(!isspace(lastc)) {printf(" "); lastc=' ';} |
67 |
|
67 | cont2: p=find_word_start(p); |
68 |
|
68 | if(*p=='!' || *p==':') { |
69 |
|
69 | if(lastc!=' ') printf(". "); |
70 |
|
70 | lastc=' '; |
71 |
|
71 | cont1: p=strchr(p,'\n'); |
72 |
|
72 | if(p==NULL) return; |
73 |
|
73 | if(*(p-1)=='\\') {p++; goto cont1;} |
74 |
|
74 | goto cont2; |
75 | } |
- | |
76 | for(pp=p; isalnum(*pp) || *pp=='_' || *pp=='$'; pp++); |
- | |
77 | pp=find_word_start(pp); |
- | |
78 | if(*pp=='=') goto cont1; |
- | |
79 | } |
- | |
80 | if(*p=='\\' && *(p+1)=='\n') { |
- | |
81 | printf("\n"); p++; continue; |
- | |
82 | } |
75 | } |
- | 76 | for(pp=p; isalnum(*pp) || *pp=='_' || *pp=='$'; pp++); |
|
- | 77 | pp=find_word_start(pp); |
|
- | 78 | if(*pp=='=') goto cont1; |
|
- | 79 | } |
|
- | 80 | if(*p=='\\' && *(p+1)=='\n') { |
|
- | 81 | printf("\n"); p++; continue; |
|
- | 82 | } |
|
83 |
|
83 | if(*p=='<' && (isalpha(*(p+1)) || *(p+1)=='/')) { |
84 |
|
84 | p=strchr(p,'>'); goto nextp; |
- | 85 | } |
|
- | 86 | if(*p=='$') { |
|
- | 87 | if(lastc != ' ') { |
|
- | 88 | if(!isspace(lastc)) printf(" "); |
|
- | 89 | printf(". "); lastc=' '; |
|
- | 90 | } |
|
- | 91 | p++; |
|
- | 92 | if(*p=='(') {p=find_matching(p+1,')'); goto nextp;} |
|
- | 93 | if(*p=='[') {p=find_matching(p+1,']'); goto nextp;} |
|
- | 94 | while(isalnum(*p) || *p=='_') p++; |
|
- | 95 | p--; continue; |
|
- | 96 | } |
|
- | 97 | if(*p=='&') { |
|
- | 98 | char *p2; |
|
- | 99 | for(p2=p+1; isalnum(*p2) || *p2=='#'; p2++); |
|
- | 100 | if(*p2==';') { |
|
- | 101 | p++; if(isalpha(*p)) {printf("%c",*p); lastc=*p;} |
|
- | 102 | p=p2; continue; |
|
85 | } |
103 | } |
86 | if(*p=='$') { |
- | |
87 | if(lastc != ' ') { |
- | |
88 | if(!isspace(lastc)) printf(" "); |
- | |
89 | printf(". "); lastc=' '; |
- | |
90 |
|
104 | } |
91 | p++; |
- | |
92 | if(*p=='(') {p=find_matching(p+1,')'); goto nextp;} |
- | |
93 |
|
105 | if(!isspace(*p) && strchr(":!?.;,\"()[]{}=/\\+*^%@~`<>|",*p)==NULL) |
94 | while(isalnum(*p) || *p=='_') p++; |
- | |
95 |
|
106 | {printf("%c",*p); lastc=*p;} |
96 |
|
107 | else { |
97 | if(*p=='&') { |
- | |
98 | char *p2; |
- | |
99 | for(p2=p+1; isalnum(*p2) || *p2=='#'; p2++); |
- | |
100 | if(*p2==';') { |
- | |
101 |
|
108 | if(isspace(*p) && !isspace(lastc)) { |
102 |
|
109 | printf(" "); lastc=' '; |
103 | } |
- | |
104 | } |
110 | } |
105 | if(!isspace(*p) && strchr(":!?.;,\"()[]{}=/\\+*^%@~`<>|",*p)==NULL) |
- | |
106 | {printf("%c",*p); lastc=*p;} |
- | |
107 | else { |
- | |
108 | if(isspace(*p) && !isspace(lastc)) { |
- | |
109 | printf(" "); lastc=' '; |
- | |
110 | } |
- | |
111 |
|
111 | if(!isspace(*p)) { |
112 |
|
112 | switch(lastc) { |
113 |
|
113 | case ' ': printf(". "); lastc=' '; break; |
114 |
|
114 | case ' ': break; |
115 |
|
115 | default: printf(" . "); lastc=' '; break; |
116 | } |
- | |
117 |
|
116 | } |
118 | } |
117 | } |
119 | nextp: if(p==NULL || *p==0) break; |
- | |
120 | } |
118 | } |
- | 119 | nextp: if(p==NULL || *p==0) break; |
|
- | 120 | } |
|
121 | } |
121 | } |
122 | 122 | ||
123 | int main(int argc, char *argv[]) |
123 | int main(int argc, char *argv[]) |
124 | { |
124 | { |
125 |
|
125 | if(argc<=1) return 0; |
126 |
|
126 | snprintf(filename,sizeof(filename)-128,"%s",argv[1]); |
127 |
|
127 | prepare_file(); |
128 |
|
128 | processbuf(); |
129 |
|
129 | output(); |
130 |
|
130 | return 0; |
131 | } |
131 | } |
132 | 132 |