Subversion Repositories wimsdev

Rev

Rev 3808 | Rev 8161 | Go to most recent revision | Show entire file | Ignore whitespace | Details | Blame | Last modification | View Log | RSS feed

Rev 3808 Rev 8113
Line 13... Line 13...
13
 *  You should have received a copy of the GNU General Public License
13
 *  You should have received a copy of the GNU General Public License
14
 *  along with this program; if not, write to the Free Software
14
 *  along with this program; if not, write to the Free Software
15
 *  Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
15
 *  Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
16
 */
16
 */
17
 
17
 
18
#define suflim  256
18
#define suflim    256
19
#define sufbuflim 102400
19
#define sufbuflim 102400
20
 
20
 
21
int suffixcnt;
21
int suffixcnt;
22
struct {
22
struct {
23
    unsigned char *original;
23
    unsigned char *original;
24
    int olen;
24
    int olen;
25
    unsigned char *replace;
25
    unsigned char *replace;
26
}
26
}
27
suf[suflim];
27
suf[suflim];
28
char *sufbuf;
28
char *sufbuf;
29
int sufwordlen, sufminlen;
29
int sufwordlen, sufminlen;
30
 
30
 
31
        /* Suffix translation, to be used within translator. */
31
/* Suffix translation, to be used within translator. */
32
 
32
 
33
int sufcomp(int t, const unsigned char *s2)
33
int sufcomp(int t, const unsigned char *s2)
34
{
34
{
35
    int k;
35
    int k;
36
   
36
 
37
    for(k=0;k<suf[t].olen && k<sufwordlen
37
    for(k=0;k<suf[t].olen && k<sufwordlen
38
        && suf[t].original[k]==s2[sufwordlen-k-1];k++);
38
      && suf[t].original[k]==s2[sufwordlen-k-1];k++);
39
    if(k>=suf[t].olen) {
39
    if(k>=suf[t].olen) {
40
        if(sufwordlen>k) return -1; else return 0;
40
      if(sufwordlen>k) return -1; else return 0;
41
    }
41
    }
42
    else return suf[t].original[k]-s2[sufwordlen-k-1];
42
    else return suf[t].original[k]-s2[sufwordlen-k-1];
43
}
43
}
44
 
44
 
45
        /* searches a list. Returns index if found, -1 if nomatch.
45
/* searches a list. Returns index if found, -1 if nomatch.
46
         * This routine is faster than naive one by one comparisons,
46
 * This routine is faster than naive one by one comparisons,
47
         * and is especially suited for large lists. */
47
 * and is especially suited for large lists.
-
 
48
 */
48
int suffix_list(void *list, int items, size_t item_size, const unsigned char *str)
49
int suffix_list(void *list, int items, size_t item_size, const unsigned char *str)
49
{
50
{
50
    int i1,i2,j,k,t,v;
51
    int i1,i2,j,k,t,v;
51
    unsigned char c,d;
52
    unsigned char c,d;
52
   
53
 
53
    if(items<=0) return -1;
54
    if(items<=0) return -1;
54
    k=sufcomp(0,str);
55
    k=sufcomp(0,str);
55
    if(k==0) return 0; if(k>0) return -1;
56
    if(k==0) return 0; if(k>0) return -1;
56
    j=items-1; k=sufcomp(j,str);
57
    j=items-1; k=sufcomp(j,str);
57
    if(k==0) return j;
58
    if(k==0) return j;
58
    if(k>0) for(i1=0,i2=j;i2>i1+1;) {
59
    if(k>0) for(i1=0,i2=j;i2>i1+1;) {
59
        j=i1+(i2-i1)/2; k=sufcomp(j,str);
60
      j=i1+(i2-i1)/2; k=sufcomp(j,str);
60
        if(k==0) return j;
61
      if(k==0) return j;
61
        if(k>0) {i2=j; continue;}
62
      if(k>0) {i2=j; continue;}
62
        if(k<0) {i1=j; continue;}      
63
      if(k<0) {i1=j; continue;}
63
    }
64
    }
64
    if(k>0 && j>0) j--;
65
    if(k>0 && j>0) j--;
65
    backcheck:
66
    backcheck:
66
    v=j;for(t=0;t<suf[j].olen && t<sufwordlen
67
    v=j;for(t=0;t<suf[j].olen && t<sufwordlen
67
        && suf[j].original[t]==str[sufwordlen-t-1];t++);
68
      && suf[j].original[t]==str[sufwordlen-t-1];t++);
68
    if(t<sufminlen) return -1; if(t>=suf[j].olen) return j;
69
    if(t<sufminlen) return -1; if(t>=suf[j].olen) return j;
69
    for(j--,c=str[sufwordlen-1],d=str[sufwordlen-t];
70
    for(j--,c=str[sufwordlen-1],d=str[sufwordlen-t];
70
        j>=0 && suf[j].original[0]==c && suf[j].olen>t
71
      j>=0 && suf[j].original[0]==c && suf[j].olen>t
71
        && suf[j].original[t-1]==d;j--);
72
      && suf[j].original[t-1]==d;j--);
72
    if(j>=0 && suf[j].original[0]==c &&
73
    if(j>=0 && suf[j].original[0]==c &&
73
       strncmp((char*)suf[j].original,(char*)suf[v].original,suf[j].olen)==0)
74
       strncmp((char*)suf[j].original,(char*)suf[v].original,suf[j].olen)==0)
74
      return j;
75
      return j;
75
    else goto backcheck;
76
    else goto backcheck;
76
}
77
}
77
 
78
 
78
        /* Prepare dictionary.  */
79
/* Prepare dictionary.  */
79
void suffix_dic(char *sdicname)
80
void suffix_dic(char *sdicname)
80
{
81
{
81
    int i,k,l;
82
    int i,k,l;
82
    FILE *suff;
83
    FILE *suff;
83
    char *p1, *p2, *pp;
84
    char *p1, *p2, *pp;
Line 90... Line 91...
90
    sufbuf=xmalloc(flen+16);flen=fread(sufbuf,1,flen,suff);
91
    sufbuf=xmalloc(flen+16);flen=fread(sufbuf,1,flen,suff);
91
    fclose(suff);
92
    fclose(suff);
92
    if(flen>0 && flen<sufbuflim) sufbuf[flen]=0;
93
    if(flen>0 && flen<sufbuflim) sufbuf[flen]=0;
93
    else return;
94
    else return;
94
    for(i=0,p1=sufbuf;p1!=NULL && *p1!=0 && i<suflim;p1=p2) {
95
    for(i=0,p1=sufbuf;p1!=NULL && *p1!=0 && i<suflim;p1=p2) {
95
        p2=strchr(p1+1,'\n'); if(p2>p1) *p2++=0;
96
      p2=strchr(p1+1,'\n'); if(p2>p1) *p2++=0;
96
        pp=strchr(p1,':'); if(pp==NULL) continue;
97
      pp=strchr(p1,':'); if(pp==NULL) continue;
97
        *pp++=0;
98
      *pp++=0;
98
        strip_trailing_spaces(p1); strip_trailing_spaces(pp);
99
      strip_trailing_spaces(p1); strip_trailing_spaces(pp);
99
        p1=find_word_start(p1); pp=find_word_start(pp);
100
      p1=find_word_start(p1); pp=find_word_start(pp);
100
        if(*p1==0) continue;
101
      if(*p1==0) continue;
101
        if(i>0) {
102
      if(i>0) {
102
            k=strcmp((char*)suf[i-1].original,p1);
103
          k=strcmp((char*)suf[i-1].original,p1);
103
            if(k>0) {
104
          if(k>0) {
104
                pp=strrchr(sdicname,'/'); if(pp==NULL) pp=sdicname; else pp++;
105
            pp=strrchr(sdicname,'/'); if(pp==NULL) pp=sdicname; else pp++;
105
                error("unsorted_dictionary %s: %s > %s.\n",
106
            error("unsorted_dictionary %s: %s > %s.\n",
106
                      pp,suf[i-1].original,p1);
107
                  pp,suf[i-1].original,p1);
107
            }
108
          }
108
            if(k==0) {
109
          if(k==0) {
109
                pp=strrchr(sdicname,'/'); if(pp==NULL) pp=sdicname; else pp++;
110
            pp=strrchr(sdicname,'/'); if(pp==NULL) pp=sdicname; else pp++;
110
                error("duplication_in_dictionary %s: %s.\n",pp,p1);
111
            error("duplication_in_dictionary %s: %s.\n",pp,p1);
111
            }
112
          }
112
        }
113
      }
113
        suf[i].original=(unsigned char*)p1; suf[i].olen=l=strlen(p1);
114
      suf[i].original=(unsigned char*)p1; suf[i].olen=l=strlen(p1);
114
        if(l<sufminlen) sufminlen=l;
115
      if(l<sufminlen) sufminlen=l;
115
        suf[i].replace=(unsigned char*)pp; i++;
116
      suf[i].replace=(unsigned char*)pp; i++;
116
    }
117
    }
117
    suffixcnt=i;
118
    suffixcnt=i;
118
}
119
}
119
 
120
 
120
        /* Suffix translation. */
121
/* Suffix translation. */
121
void suffix_translate(char *p)
122
void suffix_translate(char *p)
122
{
123
{
123
    char *p1, *p2;
124
    char *p1, *p2;
124
    int t;
125
    int t;
125
 
126
 
126
    for(p1=find_word_start(p);
127
    for(p1=find_word_start(p);
127
        p1!=NULL && p1-p<MAX_LINELEN && *p1!=0;
128
      p1!=NULL && p1-p<MAX_LINELEN && *p1!=0;
128
        p1=p2) {
129
      p1=p2) {
129
        if(!isalpha(*p1)) {p2=p1+1; continue;}
130
       if(!isalpha(*p1)) {p2=p1+1; continue;}
130
        for(p2=p1;isalpha(*p2);p2++);
131
       for(p2=p1;isalpha(*p2);p2++);
131
        if(*p2!=0 && strchr(" ,.?!'\"\n`:;()[]{}<>",*p2)==NULL) continue;
132
       if(*p2!=0 && strchr(" ,.?!'\"\n`:;()[]{}<>",*p2)==NULL) continue;
132
        sufwordlen=p2-p1;
133
       sufwordlen=p2-p1;
133
        t=suffix_list(suf,suffixcnt,sizeof(suf[0]),(unsigned char*)p1);
134
       t=suffix_list(suf,suffixcnt,sizeof(suf[0]),(unsigned char*)p1);
134
        if(t<0) continue;
135
       if(t<0) continue;
135
        string_modify(p,p2-suf[t].olen,p2,(char*)suf[t].replace);
136
       string_modify(p,p2-suf[t].olen,p2,(char*)suf[t].replace);
136
        p2=p2-suf[t].olen+strlen((char*)suf[t].replace);
137
       p2=p2-suf[t].olen+strlen((char*)suf[t].replace);
137
    }
138
     }
138
    p[MAX_LINELEN]=0;
139
     p[MAX_LINELEN]=0;
139
}
140
}
140
 
141
 
141
void suffix(char *p, char *sdicname)
142
void suffix(char *p, char *sdicname)
142
{
143
{
143
    suffix_dic(sdicname); if(suffixcnt>0) suffix_translate(p);
144
    suffix_dic(sdicname); if(suffixcnt>0) suffix_translate(p);