Subversion Repositories wimsdev

Compare Revisions

Ignore whitespace Rev 6894 → Rev 6895

/trunk/wims/src/Misc/suffix.c
15,7 → 15,7
* Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
*/
 
#define suflim 256
#define suflim 256
#define sufbuflim 102400
 
int suffixcnt;
23,33 → 23,33
unsigned char *original;
int olen;
unsigned char *replace;
}
}
suf[suflim];
char *sufbuf;
int sufwordlen, sufminlen;
 
/* Suffix translation, to be used within translator. */
/* Suffix translation, to be used within translator. */
 
int sufcomp(int t, const unsigned char *s2)
{
int k;
 
for(k=0;k<suf[t].olen && k<sufwordlen
&& suf[t].original[k]==s2[sufwordlen-k-1];k++);
&& suf[t].original[k]==s2[sufwordlen-k-1];k++);
if(k>=suf[t].olen) {
if(sufwordlen>k) return -1; else return 0;
if(sufwordlen>k) return -1; else return 0;
}
else return suf[t].original[k]-s2[sufwordlen-k-1];
}
 
/* searches a list. Returns index if found, -1 if nomatch.
* This routine is faster than naive one by one comparisons,
* and is especially suited for large lists. */
/* searches a list. Returns index if found, -1 if nomatch.
* This routine is faster than naive one by one comparisons,
* and is especially suited for large lists. */
int suffix_list(void *list, int items, size_t item_size, const unsigned char *str)
{
int i1,i2,j,k,t,v;
unsigned char c,d;
 
if(items<=0) return -1;
k=sufcomp(0,str);
if(k==0) return 0; if(k>0) return -1;
56,26 → 56,26
j=items-1; k=sufcomp(j,str);
if(k==0) return j;
if(k>0) for(i1=0,i2=j;i2>i1+1;) {
j=i1+(i2-i1)/2; k=sufcomp(j,str);
if(k==0) return j;
if(k>0) {i2=j; continue;}
if(k<0) {i1=j; continue;}
j=i1+(i2-i1)/2; k=sufcomp(j,str);
if(k==0) return j;
if(k>0) {i2=j; continue;}
if(k<0) {i1=j; continue;}
}
if(k>0 && j>0) j--;
backcheck:
v=j;for(t=0;t<suf[j].olen && t<sufwordlen
&& suf[j].original[t]==str[sufwordlen-t-1];t++);
&& suf[j].original[t]==str[sufwordlen-t-1];t++);
if(t<sufminlen) return -1; if(t>=suf[j].olen) return j;
for(j--,c=str[sufwordlen-1],d=str[sufwordlen-t];
j>=0 && suf[j].original[0]==c && suf[j].olen>t
&& suf[j].original[t-1]==d;j--);
if(j>=0 && suf[j].original[0]==c &&
j>=0 && suf[j].original[0]==c && suf[j].olen>t
&& suf[j].original[t-1]==d;j--);
if(j>=0 && suf[j].original[0]==c &&
strncmp((char*)suf[j].original,(char*)suf[v].original,suf[j].olen)==0)
return j;
else goto backcheck;
}
 
/* Prepare dictionary. */
/* Prepare dictionary. */
void suffix_dic(char *sdicname)
{
int i,l;
92,21 → 92,25
if(flen>0 && flen<sufbuflim) sufbuf[flen]=0;
else return;
for(i=0,p1=sufbuf;p1!=NULL && *p1!=0 && i<suflim;p1=p2) {
p2=strchr(p1+1,'\n'); if(p2>p1) *p2++=0;
pp=strchr(p1,':'); if(pp==NULL) continue;
*pp++=0;
strip_trailing_spaces(p1); strip_trailing_spaces(pp);
singlespace(p1);
p1=find_word_start(p1); pp=find_word_start(pp);
if(*p1==0) continue;
suf[i].original=(unsigned char*)p1; suf[i].olen=l=strlen(p1);
if(l<sufminlen) sufminlen=l;
suf[i].replace=(unsigned char*)pp; i++;
p2=strchr(p1+1,'\n'); if(p2>p1) *p2++=0;
pp=strchr(p1,':'); if(pp==NULL) continue;
*pp++=0;
strip_trailing_spaces(p1); strip_trailing_spaces(pp);
singlespace(p1);
p1=find_word_start(p1); pp=find_word_start(pp);
if(*p1==0) continue;
suf[i].original=(unsigned char*)p1; suf[i].olen=l=strlen(p1);
if(l<sufminlen) sufminlen=l;
suf[i].replace=(unsigned char*)pp; i++;
}
suffixcnt=i;
}
 
/* Suffix translation. */
/* Suffix translation. */
/* FIXME : ne rien faire si le résultat est de longueur inferieur à 2
* car ensuite cela sera neglige.
*/
 
void suffix_translate(char *p)
{
char *p1, *p2;
113,16 → 117,16
int t;
 
for(p1=find_word_start(p);
p1!=NULL && p1-p<MAX_LINELEN && *p1!=0;
p1=p2) {
if(!isalpha(*p1)) {p2=p1+1; continue;}
for(p2=p1;isalpha(*p2);p2++);
if(*p2!=0 && strchr(" ,.?!'\"\n`:;()[]{}<>",*p2)==NULL) continue;
sufwordlen=p2-p1;
t=suffix_list(suf,suffixcnt,sizeof(suf[0]),(unsigned char*)p1);
if(t<0) continue;
string_modify(p,p2-suf[t].olen,p2,(char*)suf[t].replace);
p2=p2-suf[t].olen+strlen((char*)suf[t].replace);
p1!=NULL && p1-p<MAX_LINELEN && *p1!=0;
p1=p2) {
if(!isalpha(*p1)) {p2=p1+1; continue;}
for(p2=p1;isalpha(*p2);p2++);
if(*p2!=0 && strchr(" ,.?!'\"\n`:;()[]{}<>",*p2)==NULL) continue;
sufwordlen=p2-p1;
t=suffix_list(suf,suffixcnt,sizeof(suf[0]),(unsigned char*)p1);
if(t<0) continue;
string_modify(p,p2-suf[t].olen,p2,(char*)suf[t].replace);
p2=p2-suf[t].olen+strlen((char*)suf[t].replace);
}
p[MAX_LINELEN]=0;
}