Subversion Repositories wimsdev

Rev

Rev 3718 | Go to most recent revision | Blame | Compare with Previous | Last modification | View Log | RSS feed

  1. /*    Copyright (C) 1998-2003 XIAO, Gang of Universite de Nice - Sophia Antipolis
  2.  *
  3.  *  This program is free software; you can redistribute it and/or modify
  4.  *  it under the terms of the GNU General Public License as published by
  5.  *  the Free Software Foundation; either version 2 of the License, or
  6.  *  (at your option) any later version.
  7.  *
  8.  *  This program is distributed in the hope that it will be useful,
  9.  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
  10.  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  11.  *  GNU General Public License for more details.
  12.  *
  13.  *  You should have received a copy of the GNU General Public License
  14.  *  along with this program; if not, write to the Free Software
  15.  *  Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  16.  */
  17.  
  18.         /* Versatile translation according to a dictionary */
  19.  
  20. char inpbuf[MAX_LINELEN+1], troutbuf[2*MAX_LINELEN+2];
  21. struct entry {
  22.     unsigned char *original, *replace;
  23.     int olen,earlier;
  24. } entry[MAX_DICENTRIES];
  25. int entrycount=0;
  26.  
  27. struct dic {
  28.     char name[MAX_FNAME+1];
  29.     char unknown[256];
  30.     char *buf;
  31.     int unknown_type;
  32.     int start;
  33.     int len;
  34. } dic[MAX_DICS];
  35. int diccnt;
  36. int transdic, macrodic;
  37.  
  38. enum {
  39.     unk_delete, unk_leave, unk_replace
  40. };
  41.  
  42. int compare(struct entry *e, const char *s2)
  43. {
  44.     int k;
  45.     k=strncmp(e->original,s2,e->olen);
  46.     if(k==0 && isalnum(*(s2+e->olen))) return -1;
  47.     else return k;
  48. }
  49.  
  50.         /* searches a list. Returns index if found, -1 if nomatch.
  51.          * Uses binary search, list must be sorted. */
  52. int search_dic(struct entry *list, int items, size_t item_size, const char *str)
  53. {
  54.     int i1,i2,j,k,t,t1;
  55.     unsigned char c;
  56.  
  57.     if(items<=0) return -1;
  58.     j=0; c=str[0];
  59.     k=list[0].original[0]-c; if(k==0) k=compare(list,str);
  60.     if(k==0) goto more; if(k>0) return -1;
  61.     j=items-1; k=list[j].original[0]-c; if(k==0) k=compare(list+j,str);
  62.     if(k==0) return j;
  63.     if(k>0) for(i1=0,i2=j;i2>i1+1;) {
  64.         j=i1+(i2-i1)/2;
  65.         k=list[j].original[0]-c; if(k==0) k=compare(list+j,str);
  66.         if(k==0) goto more;
  67.         if(k>0) {i2=j; continue;}
  68.         if(k<0) {i1=j; continue;}      
  69.     }
  70.     if(k>0) {j--;k=compare(list+j,str);}
  71.     more:
  72.     if((t=list[j].earlier)<0) {
  73.         if(k==0) return j; else return -1;
  74.     }
  75.     if(compare(entry+t,str)!=0) return -1;
  76.     for(j=t1=t,k=0;j<items+(list-entry) && entry[j].earlier==t1 && (k=compare(entry+j,str))<=0; j++)
  77.       if(k==0) t=j;
  78.     return t-(list-entry);
  79. }
  80.  
  81. #include "suffix.c"
  82.  
  83.         /* Prepare dictionary */
  84. struct dic *prepare_dic(char *fname)
  85. {
  86.     int i,l;
  87.     struct dic *thisdic;
  88.     FILE *dicf;
  89.     char *p1, *p2, *pp;
  90.     char tbuf[256], buf[MAX_LINELEN+1];
  91.     long int flen;
  92.    
  93.     if(diccnt>=MAX_DICS) error("too_many_dictionaries");
  94.     thisdic=dic+diccnt; diccnt++;
  95.     thisdic->len=0;
  96.     thisdic->start=entrycount;
  97.     snprintf(thisdic->name,sizeof(thisdic->name),"%s",fname);
  98.     dicf=fopen(mkfname(NULL,"%s/%s",styledir,fname),"r"); if(dicf==NULL) return NULL;
  99.     fseek(dicf,0,SEEK_END);flen=ftell(dicf); fseek(dicf,0,SEEK_SET);
  100.     if(flen>=MAX_DICSIZE) return NULL;
  101.     thisdic->buf=xmalloc(flen+16);flen=fread(thisdic->buf,1,flen,dicf);
  102.     fclose(dicf);
  103.     if(flen>0 && flen<MAX_DICSIZE) thisdic->buf[flen]=0;
  104.     else return NULL;
  105.     for(i=entrycount,p1=thisdic->buf;p1!=NULL && *p1!=0 && i<MAX_DICENTRIES;p1=p2) {
  106.         p2=strchr(p1+1,'\n'); if(p2>p1) *p2++=0;
  107.         pp=strchr(p1,':'); if(pp==NULL) continue;
  108.         *pp++=0;
  109.         strip_trailing_spaces(p1); strip_trailing_spaces(pp);
  110.         singlespace(p1);
  111.         p1=find_word_start(p1); pp=find_word_start(pp);
  112.         if(*p1==0) continue;
  113.         if(i>entrycount && compare(entry+i-1,p1)>0)
  114.           error("unsorted_dictionary %s: %s > %s.\n",
  115.                 fname,entry[i-1].original,p1);
  116.         if(i>entrycount && strcmp(entry[i-1].original,p1)==0)
  117.           error("duplication_in_dictionary %s: %s.\n",
  118.                 fname,p1);
  119.         entry[i].original=p1; entry[i].replace=pp;
  120.         entry[i].olen=l=strlen(p1); entry[i].earlier=-1;
  121.         if(i>0) {
  122.             int l1,l2;
  123.             l1=entry[i-1].earlier; if(l1>=0) l2=entry[l1].olen;
  124.             else {l2=entry[i-1].olen;l1=i-1;}
  125.             if(l>l2 && isspace(p1[l2])
  126.                && strncmp(entry[l1].original,p1,l2)==0)
  127.               entry[i].earlier=entry[i-1].earlier=l1;
  128.         }
  129.         i++;
  130.     }
  131.     thisdic->len=i-entrycount;
  132.     pp=strrchr("fname",'/'); if(pp==NULL) pp=fname;
  133.     snprintf(tbuf,sizeof(tbuf),"unknown_%s",pp);
  134.     _getdef(defbuf,tbuf,buf);
  135.     p1=find_word_start(buf); *find_word_end(p1)=0;
  136.     for(pp=p1; *pp; pp++) *pp=tolower(*pp);
  137.     thisdic->unknown_type=unk_delete;
  138.     if(strcmp(p1,"leave")==0) thisdic->unknown_type=unk_leave;
  139.     else if(strcmp(p1,"delete")!=0) {
  140.         thisdic->unknown_type=unk_replace;
  141.         snprintf(thisdic->unknown,sizeof(thisdic->unknown),"%s",p1);
  142.     }
  143.     entrycount=i;
  144.     if(debug) fprintf(stderr,"Dictionary %d: %s, %d entries.\n",
  145.                       diccnt,fname,thisdic->len);
  146.     return thisdic;
  147. }
  148.  
  149.         /* make the translation. */
  150. void _translate(char *p, int i)
  151. {
  152.     char *p1, *p2, *pp;
  153.     int t;
  154.  
  155.     if(i<0 || i>=diccnt) return;
  156.     if(dic[i].len<=0) return;
  157.     snprintf(troutbuf,sizeof(troutbuf),"%s",p);
  158.     for(p1=find_word_start(troutbuf);
  159.         p1!=NULL && p1-troutbuf<MAX_LINELEN && *p1!=0;
  160.         p1=p2) {
  161.         p2=find_word_end(p1);
  162.         for(pp=p1;pp<p2 && (isalnum(*pp) || strchr("_",*pp)!=NULL);pp++);
  163.         p2=find_word_start(p2);
  164.         if(pp==p1 || (*pp!=0 && strchr(" ,.?!",*pp)==NULL)) continue;
  165.         t=search_dic(entry+dic[i].start,dic[i].len,sizeof(entry[0]),p1);
  166.         if(t<0) {
  167.             switch(dic[i].unknown_type) {
  168.                 case unk_leave: break;
  169.                 case unk_delete: {
  170.                     strcpy(p1,find_word_start(pp)); p2=p1;
  171.                     break;
  172.                 }
  173.                 case unk_replace: {
  174.                     string_modify(troutbuf,p1,pp,dic[i].unknown);
  175.                     p2=find_word_start(p1+strlen(dic[i].unknown));
  176.                 }
  177.             }
  178.             continue;
  179.         }
  180.         t+=dic[i].start;
  181.         string_modify(troutbuf,p1,p1+strlen(entry[t].original),
  182.                       entry[t].replace);
  183.         p2=find_word_start(p1+strlen(entry[t].replace));
  184.     }
  185.     snprintf(p,MAX_LINELEN,"%s",troutbuf);
  186. }
  187.  
  188.         /* make translation using file name */
  189. void translate(char *p, char *dicname)
  190. {
  191.     int i;
  192.     for(i=0;i<diccnt && strcmp(dicname,dic[i].name)!=0;i++);
  193.     if(i<diccnt) _translate(p,i);
  194. }
  195.  
  196.         /* Returns dictionary index, or -1 if not found */
  197. int getdic(char *dicname)
  198. {
  199.     int i;
  200.     char *p1, *p2, buf[MAX_LINELEN+1];
  201.     for(i=0;i<diccnt && strcmp(dicname,dic[i].name)!=0;i++);
  202.     if(i<diccnt) return i;
  203.     _getdef(defbuf,"dictionaries",buf);
  204.     p1=wordchr(buf,dicname); if(p1==NULL) return -1;
  205.     for(p2=p1; myisalnum(*p2) || *p2=='.'; p2++);
  206.     if(p2-p1 >= MAX_NAMELEN) return -1;
  207.     *p2=0; i=diccnt;
  208.     prepare_dic(dicname); return i;
  209. }
  210.  
  211.