Subversion Repositories wimsdev

Compare Revisions

Ignore whitespace Rev 7675 → Rev 7676

/trunk/wims/src/Misc/translator.c
15,15 → 15,15
* Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
*/
 
/* Versatile translation according to a dictionary */
/* Versatile translation according to a dictionary */
 
/*************** Customization: change values hereafter ****************/
 
/* limit of dictionary entries */
/* limit of dictionary entries */
#define entrylim 32768
/* limit of dictionary length */
#define diclim 2*1024*1024
/* limit of source length */
/* limit of dictionary length */
#define diclim 2*1024*1024
/* limit of source length */
#define sourcelim 16*1024*1024
 
/***************** Nothing should need change hereafter *****************/
56,13 → 56,13
return p;
}
 
/* Exit without translating anything */
/* Exit without translating anything */
void escape(void)
{
printf("%s",inpbuf); exit(0);
}
 
/* Points to the end of the word */
/* Points to the end of the word */
char *find_word_end(char *p)
{
int i;
70,7 → 70,7
return p;
}
 
/* Strips leading spaces */
/* Strips leading spaces */
char *find_word_start(char *p)
{
int i;
78,7 → 78,7
return p;
}
 
/* strip trailing spaces; return string end. */
/* strip trailing spaces; return string end. */
char *strip_trailing_spaces(char *p)
{
char *pp;
96,8 → 96,8
else return k;
}
 
/* searches a list. Returns index if found, -1 if nomatch.
* Uses binary search, list must be sorted. */
/* searches a list. Returns index if found, -1 if nomatch.
* Uses binary search, list must be sorted. */
int search_list(struct entry *list, int items, size_t item_size, const char *str)
{
int i1,i2,j,k,t,t1;
110,68 → 110,68
j=items-1; k=list[j].original[0]-c; if(k==0) k=compare(j,str);
if(k==0) return j;
if(k>0) for(i1=0,i2=j;i2>i1+1;) {
j=i1+(i2-i1)/2;
k=list[j].original[0]-c; if(k==0) k=compare(j,str);
if(k==0) goto more;
if(k>0) {i2=j; continue;}
if(k<0) {i1=j; continue;}
j=i1+(i2-i1)/2;
k=list[j].original[0]-c; if(k==0) k=compare(j,str);
if(k==0) goto more;
if(k>0) {i2=j; continue;}
if(k<0) {i1=j; continue;}
}
if(k>0) {j--;k=compare(j,str);}
more:
if((t=list[j].earlier)<0) {
if(k==0) return j; else return -1;
if(k==0) return j; else return -1;
}
if(compare(t,str)!=0) return -1;
for(j=t1=t,k=0;j<items && list[j].earlier==t1 && (k=compare(j,str))<=0; j++) {
if(k==0) t=j;
if(k==0) t=j;
}
return t;
}
 
/* modify a string. Bufferlen must be ast least 2*MAX_LINELEN */
/* modify a string. Bufferlen must be ast least 2*MAX_LINELEN */
void string_modify(char *start, char *bad_beg, char *bad_end, char *good,...)
{
char buf[MAX_LINELEN+1];
va_list vp;
 
va_start(vp,good);
vsnprintf(buf,sizeof(buf),good,vp); va_end(vp);
if(strlen(start)-(bad_end-bad_beg)+strlen(buf)>=2*MAX_LINELEN)
return; /* this is an error situation. */
return; /* this is an error situation. */
strcat(buf,bad_end);
ovlstrcpy(bad_beg,buf);
}
 
/* change all spaces into ' ', and collapse multiple occurences */
/* change all spaces into ' ', and collapse multiple occurences */
void singlespace(char *p)
{
char *pp, *p2;
for(pp=p;*pp;pp++) {
if(!isspace(*pp)) continue;
if(leaveline) {
if(*pp==13) ovlstrcpy(pp,pp+1);
if(*pp=='\n') {
pp++;
gopt: for(p2=pp; isspace(*p2) && *p2!='\n'; p2++);
if(p2>pp) ovlstrcpy(pp,p2); pp--;
}
else {
pp++; if(!isspace(*pp) || *pp=='\n') continue;
goto gopt;
}
}
else {
if(*pp!=' ') *pp=' ';
pp++; if(!isspace(*pp)) continue;
for(p2=pp;isspace(*p2);p2++);
ovlstrcpy(pp,p2); pp--;
}
if(!isspace(*pp)) continue;
if(leaveline) {
if(*pp==13) ovlstrcpy(pp,pp+1);
if(*pp=='\n') {
pp++;
gopt: for(p2=pp; isspace(*p2) && *p2!='\n'; p2++);
if(p2>pp) ovlstrcpy(pp,p2); pp--;
}
else {
pp++; if(!isspace(*pp) || *pp=='\n') continue;
goto gopt;
}
}
else {
if(*pp!=' ') *pp=' ';
pp++; if(!isspace(*pp)) continue;
for(p2=pp;isspace(*p2);p2++);
ovlstrcpy(pp,p2); pp--;
}
}
}
 
#include "suffix.c"
 
/* Prepare dictionary */
/* Prepare dictionary */
void prepare_dic(void)
{
int i,l;
180,17 → 180,17
long int flen;
fname=getenv("w_dictionary");
if(fname==NULL || *fname==0 || *fname=='/' || strstr(fname,"..")) {
p1=getenv("w_module"); if(p1 && strncmp(p1,"classes/",strlen("classes/"))==0) {
p1=getenv("w_wims_class"); p2=getenv("w_wims_home");
if(p1 && p2) {
snprintf(buf,sizeof(buf),"%s/log/classes/%s/",p2,p1);
if(strncmp(fname,buf,strlen(buf))!=0) escape();
}
else escape();
}
else {
p1=getenv("untrust"); if(p1 && strstr(p1,"yes")) escape();
}
p1=getenv("w_module"); if(p1 && strncmp(p1,"classes/",strlen("classes/"))==0) {
p1=getenv("w_wims_class"); p2=getenv("w_wims_home");
if(p1 && p2) {
snprintf(buf,sizeof(buf),"%s/log/classes/%s/",p2,p1);
if(strncmp(fname,buf,strlen(buf))!=0) escape();
}
else escape();
}
else {
p1=getenv("untrust"); if(p1 && strstr(p1,"yes")) escape();
}
}
/* replace escape() by return if there is some suffix dictionary, */
 
202,35 → 202,35
if(flen>0 && flen<diclim) dicbuf[flen]=0;
else escape();
for(i=0,p1=dicbuf;p1!=NULL && *p1!=0 && i<entrylim;p1=p2) {
p2=strchr(p1+1,'\n'); if(p2>p1) *p2++=0;
pp=strchr(p1,':'); if(pp==NULL) continue;
*pp++=0;
strip_trailing_spaces(p1); strip_trailing_spaces(pp);
singlespace(p1);
p1=find_word_start(p1); pp=find_word_start(pp);
if(*p1==0) continue;
if(has_digits==0) {
char *p;
for(p=p1;*p!=0 && p<pp && !isdigit(*p);p++);
if(isdigit(*p)) has_digits=1;
}
entry[i].original=(unsigned char*)p1;
entry[i].replace=(unsigned char*)pp;
entry[i].olen=l=strlen(p1); entry[i].earlier=-1;
if(i>0) {
int l1,l2;
l1=entry[i-1].earlier; if(l1>=0) l2=entry[l1].olen;
else {l2=entry[i-1].olen;l1=i-1;}
if(l>l2 && isspace(p1[l2])
&& strncmp((char*)entry[l1].original,p1,l2)==0)
entry[i].earlier=entry[i-1].earlier=l1;
}
i++;
p2=strchr(p1+1,'\n'); if(p2>p1) *p2++=0;
pp=strchr(p1,':'); if(pp==NULL) continue;
*pp++=0;
strip_trailing_spaces(p1); strip_trailing_spaces(pp);
singlespace(p1);
p1=find_word_start(p1); pp=find_word_start(pp);
if(*p1==0) continue;
if(has_digits==0) {
char *p;
for(p=p1;*p!=0 && p<pp && !isdigit(*p);p++);
if(isdigit(*p)) has_digits=1;
}
entry[i].original=(unsigned char*)p1;
entry[i].replace=(unsigned char*)pp;
entry[i].olen=l=strlen(p1); entry[i].earlier=-1;
if(i>0) {
int l1,l2;
l1=entry[i-1].earlier; if(l1>=0) l2=entry[l1].olen;
else {l2=entry[i-1].olen;l1=i-1;}
if(l>l2 && isspace(p1[l2])
&& strncmp((char*)entry[l1].original,p1,l2)==0)
entry[i].earlier=entry[i-1].earlier=l1;
}
i++;
}
entrycount=i; if(entrycount<=0) escape();
}
 
/* now make the translation. */
/* now make the translation. */
void translate(void)
{
char *p1, *p2, *pp;
237,35 → 237,35
int t;
 
for(p1=find_word_start(outbuf);
p1!=NULL && p1-outbuf<MAX_LINELEN && *p1!=0;
p1=p2) {
p2=find_word_end(p1);
for(pp=p1;pp<p2 &&
((!has_digits && isalpha(*pp)) ||
(has_digits && isalnum(*pp)) || (*pp&128)!=0 ||
strchr("_",*pp)!=NULL);pp++);
p2=find_word_start(p2);
if(pp==p1 ||
(has_digits==0 && isdigit(*pp)) ||
(*pp!=0 && !isspace(*pp) && strchr(",.?!/;",*pp)==NULL)) continue;
t=search_list(entry,entrycount,sizeof(entry[0]),p1);
if(t<0) {
switch(unknown_type) {
case unk_leave: break;
case unk_delete: {
ovlstrcpy(p1,find_word_start(pp)); p2=p1;
break;
}
case unk_replace: {
string_modify(outbuf,p1,pp,unkbuf);
p2=find_word_start(p1+strlen(unkbuf));
}
}
continue;
}
string_modify(outbuf,p1,p1+strlen((char*)entry[t].original),
(char*)entry[t].replace);
p2=find_word_start(p1+strlen((char*)entry[t].replace));
p1!=NULL && p1-outbuf<MAX_LINELEN && *p1!=0;
p1=p2) {
p2=find_word_end(p1);
for(pp=p1;pp<p2 &&
((!has_digits && isalpha(*pp)) ||
(has_digits && isalnum(*pp)) || (*pp&128)!=0 ||
strchr("_",*pp)!=NULL);pp++);
p2=find_word_start(p2);
if(pp==p1 ||
(has_digits==0 && isdigit(*pp)) ||
(*pp!=0 && !isspace(*pp) && strchr(",.?!/;",*pp)==NULL)) continue;
t=search_list(entry,entrycount,sizeof(entry[0]),p1);
if(t<0) {
switch(unknown_type) {
case unk_leave: break;
case unk_delete: {
ovlstrcpy(p1,find_word_start(pp)); p2=p1;
break;
}
case unk_replace: {
string_modify(outbuf,p1,pp,unkbuf);
p2=find_word_start(p1+strlen(unkbuf));
}
}
continue;
}
string_modify(outbuf,p1,p1+strlen((char*)entry[t].original),
(char*)entry[t].replace);
p2=find_word_start(p1+strlen((char*)entry[t].replace));
}
outbuf[MAX_LINELEN]=0; printf("%s",outbuf);
}
293,34 → 293,34
 
switches();
if(!fromfile) {
s=getenv("wims_exec_parm");
if(s==NULL || *s==0) return 0; /* Nothing to translate */
l=strlen(s); if(l<=0 || l>sourcelim) return 0; /* too long */
inpbuf=xmalloc(l+16); memmove(inpbuf,s,l+1);
s=getenv("wims_exec_parm");
if(s==NULL || *s==0) return 0; /* Nothing to translate */
l=strlen(s); if(l<=0 || l>sourcelim) return 0; /* too long */
inpbuf=xmalloc(l+16); memmove(inpbuf,s,l+1);
}
else {
FILE *f;
s=getenv("translator_input"); if(s==NULL || *s==0) return 0;
f=fopen(s,"r"); if(f==NULL) return 0; /* no file */
fseek(f,0,SEEK_END); l=ftell(f); fseek(f,0,SEEK_SET);
if(l<=0 || l>sourcelim) return 0; /* too long */
inpbuf=xmalloc(l+16); (void)fread(inpbuf,1,l,f); fclose(f); inpbuf[l]=0;
FILE *f;
s=getenv("translator_input"); if(s==NULL || *s==0) return 0;
f=fopen(s,"r"); if(f==NULL) return 0; /* no file */
fseek(f,0,SEEK_END); l=ftell(f); fseek(f,0,SEEK_SET);
if(l<=0 || l>sourcelim) return 0; /* too long */
inpbuf=xmalloc(l+16); (void)fread(inpbuf,1,l,f); fclose(f); inpbuf[l]=0;
}
p1=inpbuf; prepare_dic();
if(leaveline) c='\n'; else c=' ';
do {
l=strlen(p1);
if(l>MAX_LINELEN-1024) l=MAX_LINELEN-1024; p2=p1+l;
if(*p2) {
while(p2>p1 && *p2!=c) p2--;
}
if(p2<=p1) return 0;
memmove(outbuf,p1,p2-p1); outbuf[p2-p1]=0;
singlespace(outbuf);
s=getenv("w_suffix_dictionary");
if(s!=NULL && *s!=0) suffix(outbuf,s);
translate();
if(*p2==c) {printf("%c",c); p1=++p2;}
l=strlen(p1);
if(l>MAX_LINELEN-1024) l=MAX_LINELEN-1024; p2=p1+l;
if(*p2) {
while(p2>p1 && *p2!=c) p2--;
}
if(p2<=p1) return 0;
memmove(outbuf,p1,p2-p1); outbuf[p2-p1]=0;
singlespace(outbuf);
s=getenv("w_suffix_dictionary");
if(s!=NULL && *s!=0) suffix(outbuf,s);
translate();
if(*p2==c) {printf("%c",c); p1=++p2;}
}
while(*p2);
return 0;