Rev 7676 | Rev 8123 | Go to most recent revision | Show entire file | Ignore whitespace | Details | Blame | Last modification | View Log | RSS feed
Rev 7676 | Rev 8100 | ||
---|---|---|---|
Line 23... | Line 23... | ||
23 | #define entrylim 32768 |
23 | #define entrylim 32768 |
24 | /* limit of dictionary length */ |
24 | /* limit of dictionary length */ |
25 | #define diclim 1024*1024 |
25 | #define diclim 1024*1024 |
26 | 26 | ||
27 | /***************** Nothing should need change hereafter *****************/ |
27 | /***************** Nothing should need change hereafter *****************/ |
28 | - | ||
29 | #include "../Lib/basicstr.c" |
- | |
30 | 28 | ||
31 | 29 | ||
32 | char inpbuf[MAX_LINELEN+1], outbuf[2*MAX_LINELEN+2]; |
30 | char inpbuf[MAX_LINELEN+1], outbuf[2*MAX_LINELEN+2]; |
33 | char *dicbuf; |
31 | char *dicbuf; |
34 | struct entry { |
32 | struct entry { |
Line 45... | Line 43... | ||
45 | int unknown_type=unk_delete; |
43 | int unknown_type=unk_delete; |
46 | int nocase=0,leaveline=0; |
44 | int nocase=0,leaveline=0; |
47 | char *unknown, unkbuf[1024]; |
45 | char *unknown, unkbuf[1024]; |
48 | 46 | ||
49 | int compare(int i1, const char *s2) |
47 | int compare(int i1, const char *s2) |
50 | { |
48 | { |
51 | int k; |
49 | int k; |
52 | if(nocase) k=strncasecmp((char*)entry[i1].original,s2,entry[i1].olen); |
50 | if(nocase) k=strncasecmp((char*)entry[i1].original,s2,entry[i1].olen); |
53 | else k=strncmp((char*)entry[i1].original,s2,entry[i1].olen); |
51 | else k=strncmp((char*)entry[i1].original,s2,entry[i1].olen); |
54 | if(k==0 && (isalnum(*(s2+entry[i1].olen)) || (*(s2+entry[i1].olen)&128)!=0)) return -1; |
52 | if(k==0 && (isalnum(*(s2+entry[i1].olen)) || (*(s2+entry[i1].olen)&128)!=0)) return -1; |
55 | else return k; |
53 | else return k; |
56 | } |
54 | } |
57 | 55 | ||
58 | /* searches a list. Returns index if found, -1 if nomatch. |
56 | /* searches a list. Returns index if found, -1 if nomatch. |
59 | * Uses binary search, list must be sorted. */ |
57 | * Uses binary search, list must be sorted. */ |
60 | int |
58 | int search_list2(struct entry *list, int items, size_t item_size, const char *str) |
61 | { |
59 | { |
62 | int i1,i2,j,k,t,t1; |
60 | int i1,i2,j,k,t,t1; |
63 | unsigned char c; |
61 | unsigned char c; |
64 | 62 | ||
65 | if(items<=0) return -1; |
63 | if(items<=0) return -1; |
Line 79... | Line 77... | ||
79 | more: |
77 | more: |
80 | if((t=list[j].earlier)<0) { |
78 | if((t=list[j].earlier)<0) { |
81 | if(k==0) return j; else return -1; |
79 | if(k==0) return j; else return -1; |
82 | } |
80 | } |
83 | if(compare(t,str)!=0) return -1; |
81 | if(compare(t,str)!=0) return -1; |
84 | for(j=t1=t,k=0;j<items && list[j].earlier==t1 && (k=compare(j,str))<=0; j++) |
82 | for(j=t1=t,k=0;j<items && list[j].earlier==t1 && (k=compare(j,str))<=0; j++) { |
85 | if(k==0) t=j; |
83 | if(k==0) t=j; |
- | 84 | } |
|
86 | return t; |
85 | return t; |
87 | } |
86 | } |
88 | 87 | ||
89 | /* change all spaces into ' ', and collapse multiple occurences */ |
88 | /* change all spaces into ' ', and collapse multiple occurences */ |
90 | void |
89 | void singlespace2(char *p) |
91 | { |
90 | { |
92 | char *pp, *p2; |
91 | char *pp, *p2; |
93 | for(pp=p;*pp;pp++) { |
92 | for(pp=p;*pp;pp++) { |
94 | if(!isspace(*pp)) continue; |
93 | if(!isspace(*pp)) continue; |
95 | if(leaveline) { |
94 | if(leaveline) { |
Line 133... | Line 132... | ||
133 | else return; |
132 | else return; |
134 | for(i=0,p1=dicbuf;p1!=NULL && *p1!=0 && i<entrylim;p1=p2) { |
133 | for(i=0,p1=dicbuf;p1!=NULL && *p1!=0 && i<entrylim;p1=p2) { |
135 | p2=strchr(p1+1,'\n'); if(p2>p1) *p2++=0; |
134 | p2=strchr(p1+1,'\n'); if(p2>p1) *p2++=0; |
136 | pp=strchr(p1,':'); if(pp==NULL) continue; |
135 | pp=strchr(p1,':'); if(pp==NULL) continue; |
137 | *pp++=0; |
136 | *pp++=0; |
138 |
|
137 | strip_trailing_spaces2(p1); strip_trailing_spaces2(pp); |
139 |
|
138 | singlespace2(p1); |
140 | p1=find_word_start(p1); pp=find_word_start(pp); |
139 | p1=find_word_start(p1); pp=find_word_start(pp); |
141 | if(*p1==0) continue; |
140 | if(*p1==0) continue; |
142 | if(has_digits==0) { |
141 | if(has_digits==0) { |
143 | char *p; |
142 | char *p; |
144 | for(p=p1;*p!=0 && p<pp && !isdigit(*p);p++); |
143 | for(p=p1;*p!=0 && p<pp && !isdigit(*p);p++); |
Line 178... | Line 177... | ||
178 | strchr("_",*pp)!=NULL);pp++); |
177 | strchr("_",*pp)!=NULL);pp++); |
179 | p2=find_word_start(p2); |
178 | p2=find_word_start(p2); |
180 | if(pp==p1 || |
179 | if(pp==p1 || |
181 | (has_digits==0 && isdigit(*pp)) || |
180 | (has_digits==0 && isdigit(*pp)) || |
182 | (*pp!=0 && !isspace(*pp) && strchr(",.?!/;",*pp)==NULL)) continue; |
181 | (*pp!=0 && !isspace(*pp) && strchr(",.?!/;",*pp)==NULL)) continue; |
183 | t= |
182 | t=search_list2(entry,entrycount,sizeof(entry[0]),p1); |
184 | if(t<0) { |
183 | if(t<0) { |
185 | switch(unknown_type) { |
184 | switch(unknown_type) { |
186 | case unk_leave: break; |
185 | case unk_leave: break; |
187 | case unk_delete: { |
186 | case unk_delete: { |
188 | ovlstrcpy(p1,find_word_start(pp)); p2=p1; |
187 | ovlstrcpy(p1,find_word_start(pp)); p2=p1; |
189 | break; |
188 | break; |
190 | } |
189 | } |
191 | case unk_replace: { |
190 | case unk_replace: { |
192 |
|
191 | string_modify3(outbuf,p1,pp,unkbuf); |
193 | p2=find_word_start(p1+strlen(unkbuf)); |
192 | p2=find_word_start(p1+strlen(unkbuf)); |
194 | } |
193 | } |
195 | } |
194 | } |
196 | continue; |
195 | continue; |
197 | } |
196 | } |
198 |
|
197 | string_modify3(outbuf,p1,p1+strlen((char*)entry[t].original), |
199 | (char*)entry[t].replace); |
198 | (char*)entry[t].replace); |
200 | p2=find_word_start(p1+strlen((char*)entry[t].replace)); |
199 | p2=find_word_start(p1+strlen((char*)entry[t].replace)); |
201 | } |
200 | } |
202 | snprintf(p,MAX_LINELEN,"%s",outbuf); |
201 | snprintf(p,MAX_LINELEN,"%s",outbuf); |
203 | } |
202 | } |