Subversion Repositories wimsdev

Rev

Rev 11124 | Rev 18181 | Go to most recent revision | Show entire file | Ignore whitespace | Details | Blame | Last modification | View Log | RSS feed

Rev 11124 Rev 12248
Line 21... Line 21...
21
#define suflim    256
21
#define suflim    256
22
#define sufbuflim 102400
22
#define sufbuflim 102400
23
 
23
 
24
int suffixcnt;
24
int suffixcnt;
25
struct {
25
struct {
26
    unsigned char *original;
26
  unsigned char *original;
27
    int olen;
27
  int olen;
28
    unsigned char *replace;
28
  unsigned char *replace;
29
}
29
}
30
suf[suflim];
30
suf[suflim];
31
char *sufbuf;
31
char *sufbuf;
32
int sufwordlen, sufminlen;
32
int sufwordlen, sufminlen;
33
 
33
 
34
/* Suffix translation, to be used within translator. */
34
/* Suffix translation, to be used within translator. */
35
 
35
 
36
int sufcomp(int t, const unsigned char *s2)
36
int sufcomp(int t, const unsigned char *s2)
37
{
37
{
38
    int k;
38
  int k;
39
 
39
 
40
    for(k=0;k<suf[t].olen && k<sufwordlen
40
  for(k=0;k<suf[t].olen && k<sufwordlen
41
      && suf[t].original[k]==s2[sufwordlen-k-1];k++);
41
    && suf[t].original[k]==s2[sufwordlen-k-1];k++);
42
    if(k>=suf[t].olen) {
42
  if(k>=suf[t].olen) {
43
      if(sufwordlen>k) return -1; else return 0;
43
    if(sufwordlen>k) return -1; else return 0;
44
    }
44
  }
45
    else return suf[t].original[k]-s2[sufwordlen-k-1];
45
  else return suf[t].original[k]-s2[sufwordlen-k-1];
46
}
46
}
47
 
47
 
48
/* searches a list. Returns index if found, -1 if nomatch.
48
/* searches a list. Returns index if found, -1 if nomatch.
49
 * This routine is faster than naive one by one comparisons,
49
 * This routine is faster than naive one by one comparisons,
50
 * and is especially suited for large lists.
50
 * and is especially suited for large lists.
51
 */
51
 */
52
int suffix_list(void *list, int items, size_t item_size, const unsigned char *str)
52
int suffix_list(void *list, int items, size_t item_size, const unsigned char *str)
53
{
53
{
54
    int i1,i2,j,k,t,v;
54
  int i1,i2,j,k,t,v;
55
    unsigned char c,d;
55
  unsigned char c,d;
56
 
56
 
57
    if(items<=0) return -1;
57
  if(items<=0) return -1;
58
    k=sufcomp(0,str);
58
  k=sufcomp(0,str);
59
    if(k==0) return 0;
59
  if(k==0) return 0;
60
    if(k>0) return -1;
60
  if(k>0) return -1;
61
    j=items-1; k=sufcomp(j,str);
61
  j=items-1; k=sufcomp(j,str);
-
 
62
  if(k==0) return j;
-
 
63
  if(k>0) for(i1=0,i2=j;i2>i1+1;) {
-
 
64
    j=i1+(i2-i1)/2; k=sufcomp(j,str);
62
    if(k==0) return j;
65
    if(k==0) return j;
63
    if(k>0) for(i1=0,i2=j;i2>i1+1;) {
-
 
64
      j=i1+(i2-i1)/2; k=sufcomp(j,str);
-
 
65
      if(k==0) return j;
-
 
66
      if(k>0) {i2=j; continue;}
66
    if(k>0) {i2=j; continue;}
67
      if(k<0) {i1=j; continue;}
67
    if(k<0) {i1=j; continue;}
68
    }
68
  }
69
    if(k>0 && j>0) j--;
69
  if(k>0 && j>0) j--;
70
    backcheck:
70
  backcheck:
71
    v=j;for(t=0;t<suf[j].olen && t<sufwordlen
71
  v=j;for(t=0;t<suf[j].olen && t<sufwordlen
72
      && suf[j].original[t]==str[sufwordlen-t-1];t++);
72
    && suf[j].original[t]==str[sufwordlen-t-1];t++);
73
    if(t<sufminlen) return -1;
73
  if(t<sufminlen) return -1;
74
    if(t>=suf[j].olen) return j;
74
  if(t>=suf[j].olen) return j;
75
    for(j--,c=str[sufwordlen-1],d=str[sufwordlen-t];
75
  for(j--,c=str[sufwordlen-1],d=str[sufwordlen-t];
76
      j>=0 && suf[j].original[0]==c && suf[j].olen>t
76
    j>=0 && suf[j].original[0]==c && suf[j].olen>t
77
      && suf[j].original[t-1]==d;j--);
77
    && suf[j].original[t-1]==d;j--);
78
    if(j>=0 && suf[j].original[0]==c &&
78
  if(j>=0 && suf[j].original[0]==c &&
79
       strncmp((char*)suf[j].original,(char*)suf[v].original,suf[j].olen)==0)
79
       strncmp((char*)suf[j].original,(char*)suf[v].original,suf[j].olen)==0)
80
      return j;
80
    return j;
81
    else goto backcheck;
81
  else goto backcheck;
82
}
82
}
83
 
83
 
84
/* Prepare dictionary.  */
84
/* Prepare dictionary.  */
85
void suffix_dic(char *sdicname)
85
void suffix_dic(char *sdicname)
86
{
86
{
87
    int i,l;
87
  int i,l;
88
    FILE *suff;
88
  FILE *suff;
89
    char *p1, *p2, *pp;
89
  char *p1, *p2, *pp;
90
    long int flen;
90
  long int flen;
91
 
91
 
92
    suffixcnt=0; sufminlen=100000;
92
  suffixcnt=0; sufminlen=100000;
93
    suff=fopen(sdicname,"r"); if(suff==NULL) return;
93
  suff=fopen(sdicname,"r"); if(suff==NULL) return;
94
    fseek(suff,0,SEEK_END);flen=ftell(suff); fseek(suff,0,SEEK_SET);
94
  fseek(suff,0,SEEK_END);flen=ftell(suff); fseek(suff,0,SEEK_SET);
95
    if(flen>sufbuflim) return;
95
  if(flen>sufbuflim) return;
96
    sufbuf=xmalloc(flen+16);flen=fread(sufbuf,1,flen,suff);
96
  sufbuf=xmalloc(flen+16);flen=fread(sufbuf,1,flen,suff);
97
    fclose(suff);
97
  fclose(suff);
98
    if(flen>0 && flen<sufbuflim) sufbuf[flen]=0;
98
  if(flen>0 && flen<sufbuflim) sufbuf[flen]=0;
99
    else return;
99
  else return;
100
    for(i=0,p1=sufbuf;p1!=NULL && *p1!=0 && i<suflim;p1=p2) {
100
  for(i=0,p1=sufbuf;p1!=NULL && *p1!=0 && i<suflim;p1=p2) {
101
    p2=strchr(p1+1,'\n'); if(p2>p1) *p2++=0;
101
  p2=strchr(p1+1,'\n'); if(p2>p1) *p2++=0;
102
    pp=strchr(p1,':'); if(pp==NULL) continue;
102
  pp=strchr(p1,':'); if(pp==NULL) continue;
103
    *pp++=0;
103
  *pp++=0;
104
    strip_trailing_spaces2(p1); strip_trailing_spaces2(pp);
104
  strip_trailing_spaces2(p1); strip_trailing_spaces2(pp);
105
    singlespace2(p1);
105
  singlespace2(p1);
106
    p1=find_word_start(p1); pp=find_word_start(pp);
106
  p1=find_word_start(p1); pp=find_word_start(pp);
107
    if(*p1==0) continue;
107
  if(*p1==0) continue;
108
    suf[i].original=(unsigned char*)p1; suf[i].olen=l=strlen(p1);
108
  suf[i].original=(unsigned char*)p1; suf[i].olen=l=strlen(p1);
109
    if(l<sufminlen) sufminlen=l;
109
  if(l<sufminlen) sufminlen=l;
110
    suf[i].replace=(unsigned char*)pp; i++;
110
  suf[i].replace=(unsigned char*)pp; i++;
111
    }
111
  }
112
    suffixcnt=i;
112
  suffixcnt=i;
113
}
113
}
114
 
114
 
115
/* Suffix translation. */
115
/* Suffix translation. */
116
/* FIXME : ne rien faire si le résultat est de longueur inferieur à 2
116
/* FIXME : ne rien faire si le résultat est de longueur inferieur à 2
117
 * car ensuite cela sera neglige.
117
 * car ensuite cela sera neglige.
118
 */
118
 */
119
 
119
 
120
void suffix_translate(char *p)
120
void suffix_translate(char *p)
121
{
121
{
122
    char *p1, *p2;
122
  char *p1, *p2;
123
    int t;
123
  int t;
124
 
124
 
125
    for(p1=find_word_start(p);
125
  for(p1=find_word_start(p);
126
      p1!=NULL && p1-p<MAX_LINELEN && *p1!=0;
126
    p1!=NULL && p1-p<MAX_LINELEN && *p1!=0;
127
      p1=p2) {
127
    p1=p2) {
128
       if(!isalpha(*p1)) {p2=p1+1; continue;}
128
      if(!isalpha(*p1)) {p2=p1+1; continue;}
129
       for(p2=p1;isalpha(*p2);p2++);
129
      for(p2=p1;isalpha(*p2);p2++);
130
       if(*p2!=0 && strchr(" ,.?!'\"\n`:;()[]{}<>",*p2)==NULL) continue;
130
      if(*p2!=0 && strchr(" ,.?!'\"\n`:;()[]{}<>",*p2)==NULL) continue;
131
       sufwordlen=p2-p1;
131
      sufwordlen=p2-p1;
132
       t=suffix_list(suf,suffixcnt,sizeof(suf[0]),(unsigned char*)p1);
132
      t=suffix_list(suf,suffixcnt,sizeof(suf[0]),(unsigned char*)p1);
133
       if(t<0) continue;
133
      if(t<0) continue;
134
       string_modify3(p,p2-suf[t].olen,p2,(char*)suf[t].replace);
134
      string_modify3(p,p2-suf[t].olen,p2,(char*)suf[t].replace);
135
       p2=p2-suf[t].olen+strlen((char*)suf[t].replace);
135
      p2=p2-suf[t].olen+strlen((char*)suf[t].replace);
136
     }
136
   }
137
     p[MAX_LINELEN]=0;
137
   p[MAX_LINELEN]=0;
138
}
138
}
139
 
139
 
140
void suffix(char *p, char *sdicname)
140
void suffix(char *p, char *sdicname)
141
{
141
{
142
    suffix_dic(sdicname); if(suffixcnt>0) suffix_translate(p);
142
  suffix_dic(sdicname); if(suffixcnt>0) suffix_translate(p);
143
}
143
}
144
 
144