Subversion Repositories wimsdev

Rev

Rev 7676 | Rev 8123 | Go to most recent revision | Show entire file | Ignore whitespace | Details | Blame | Last modification | View Log | RSS feed

Rev 7676 Rev 8100
Line 23... Line 23...
23
#define entrylim 32768
23
#define entrylim 32768
24
/* limit of dictionary length */
24
/* limit of dictionary length */
25
#define diclim 1024*1024
25
#define diclim 1024*1024
26
 
26
 
27
/***************** Nothing should need change hereafter *****************/
27
/***************** Nothing should need change hereafter *****************/
28
 
-
 
29
#include "../Lib/basicstr.c"
-
 
30
 
28
 
31
 
29
 
32
char inpbuf[MAX_LINELEN+1], outbuf[2*MAX_LINELEN+2];
30
char inpbuf[MAX_LINELEN+1], outbuf[2*MAX_LINELEN+2];
33
char *dicbuf;
31
char *dicbuf;
34
struct entry {
32
struct entry {
Line 45... Line 43...
45
int unknown_type=unk_delete;
43
int unknown_type=unk_delete;
46
int nocase=0,leaveline=0;
44
int nocase=0,leaveline=0;
47
char *unknown, unkbuf[1024];
45
char *unknown, unkbuf[1024];
48
 
46
 
49
int compare(int i1, const char *s2)
47
int compare(int i1, const char *s2)
50
{
48
{
51
    int k;
49
    int k;
52
    if(nocase) k=strncasecmp((char*)entry[i1].original,s2,entry[i1].olen);
50
    if(nocase) k=strncasecmp((char*)entry[i1].original,s2,entry[i1].olen);
53
    else k=strncmp((char*)entry[i1].original,s2,entry[i1].olen);
51
    else k=strncmp((char*)entry[i1].original,s2,entry[i1].olen);
54
    if(k==0 && (isalnum(*(s2+entry[i1].olen)) || (*(s2+entry[i1].olen)&128)!=0)) return -1;
52
    if(k==0 && (isalnum(*(s2+entry[i1].olen)) || (*(s2+entry[i1].olen)&128)!=0)) return -1;
55
    else return k;
53
    else return k;
56
}
54
}
57
 
55
 
58
/* searches a list. Returns index if found, -1 if nomatch.
56
/* searches a list. Returns index if found, -1 if nomatch.
59
 * Uses binary search, list must be sorted. */
57
 * Uses binary search, list must be sorted. */
60
int search_list(struct entry *list, int items, size_t item_size, const char *str)
58
int search_list2(struct entry *list, int items, size_t item_size, const char *str)
61
{
59
{
62
    int i1,i2,j,k,t,t1;
60
    int i1,i2,j,k,t,t1;
63
    unsigned char c;
61
    unsigned char c;
64
 
62
 
65
    if(items<=0) return -1;
63
    if(items<=0) return -1;
Line 79... Line 77...
79
    more:
77
    more:
80
    if((t=list[j].earlier)<0) {
78
    if((t=list[j].earlier)<0) {
81
      if(k==0) return j; else return -1;
79
      if(k==0) return j; else return -1;
82
    }
80
    }
83
    if(compare(t,str)!=0) return -1;
81
    if(compare(t,str)!=0) return -1;
84
    for(j=t1=t,k=0;j<items && list[j].earlier==t1 && (k=compare(j,str))<=0; j++)
82
    for(j=t1=t,k=0;j<items && list[j].earlier==t1 && (k=compare(j,str))<=0; j++) {
85
      if(k==0) t=j;
83
      if(k==0) t=j;
-
 
84
    }
86
    return t;
85
    return t;
87
}
86
}
88
 
87
 
89
/* change all spaces into ' ', and collapse multiple occurences */
88
/* change all spaces into ' ', and collapse multiple occurences */
90
void singlespace(char *p)
89
void singlespace2(char *p)
91
{
90
{
92
    char *pp, *p2;
91
    char *pp, *p2;
93
    for(pp=p;*pp;pp++) {
92
    for(pp=p;*pp;pp++) {
94
      if(!isspace(*pp)) continue;
93
      if(!isspace(*pp)) continue;
95
      if(leaveline) {
94
      if(leaveline) {
Line 133... Line 132...
133
    else return;
132
    else return;
134
    for(i=0,p1=dicbuf;p1!=NULL && *p1!=0 && i<entrylim;p1=p2) {
133
    for(i=0,p1=dicbuf;p1!=NULL && *p1!=0 && i<entrylim;p1=p2) {
135
      p2=strchr(p1+1,'\n'); if(p2>p1) *p2++=0;
134
      p2=strchr(p1+1,'\n'); if(p2>p1) *p2++=0;
136
      pp=strchr(p1,':'); if(pp==NULL) continue;
135
      pp=strchr(p1,':'); if(pp==NULL) continue;
137
      *pp++=0;
136
      *pp++=0;
138
      strip_trailing_spaces(p1); strip_trailing_spaces(pp);
137
      strip_trailing_spaces2(p1); strip_trailing_spaces2(pp);
139
      singlespace(p1);
138
      singlespace2(p1);
140
      p1=find_word_start(p1); pp=find_word_start(pp);
139
      p1=find_word_start(p1); pp=find_word_start(pp);
141
      if(*p1==0) continue;
140
      if(*p1==0) continue;
142
      if(has_digits==0) {
141
      if(has_digits==0) {
143
          char *p;
142
          char *p;
144
          for(p=p1;*p!=0 && p<pp && !isdigit(*p);p++);
143
          for(p=p1;*p!=0 && p<pp && !isdigit(*p);p++);
Line 178... Line 177...
178
           strchr("_",*pp)!=NULL);pp++);
177
           strchr("_",*pp)!=NULL);pp++);
179
      p2=find_word_start(p2);
178
      p2=find_word_start(p2);
180
      if(pp==p1 ||
179
      if(pp==p1 ||
181
         (has_digits==0 && isdigit(*pp)) ||
180
         (has_digits==0 && isdigit(*pp)) ||
182
         (*pp!=0 && !isspace(*pp) && strchr(",.?!/;",*pp)==NULL)) continue;
181
         (*pp!=0 && !isspace(*pp) && strchr(",.?!/;",*pp)==NULL)) continue;
183
      t=search_list(entry,entrycount,sizeof(entry[0]),p1);
182
      t=search_list2(entry,entrycount,sizeof(entry[0]),p1);
184
      if(t<0) {
183
      if(t<0) {
185
          switch(unknown_type) {
184
          switch(unknown_type) {
186
            case unk_leave: break;
185
            case unk_leave: break;
187
            case unk_delete: {
186
            case unk_delete: {
188
                ovlstrcpy(p1,find_word_start(pp)); p2=p1;
187
                ovlstrcpy(p1,find_word_start(pp)); p2=p1;
189
                break;
188
                break;
190
            }
189
            }
191
            case unk_replace: {
190
            case unk_replace: {
192
                string_modify(outbuf,p1,pp,unkbuf);
191
                string_modify3(outbuf,p1,pp,unkbuf);
193
                p2=find_word_start(p1+strlen(unkbuf));
192
                p2=find_word_start(p1+strlen(unkbuf));
194
            }
193
            }
195
          }
194
          }
196
          continue;
195
          continue;
197
      }
196
      }
198
      string_modify(outbuf,p1,p1+strlen((char*)entry[t].original),
197
      string_modify3(outbuf,p1,p1+strlen((char*)entry[t].original),
199
                  (char*)entry[t].replace);
198
                  (char*)entry[t].replace);
200
      p2=find_word_start(p1+strlen((char*)entry[t].replace));
199
      p2=find_word_start(p1+strlen((char*)entry[t].replace));
201
    }
200
    }
202
    snprintf(p,MAX_LINELEN,"%s",outbuf);
201
    snprintf(p,MAX_LINELEN,"%s",outbuf);
203
}
202
}