Subversion Repositories wimsdev

Rev

Rev 8161 | Blame | Compare with Previous | Last modification | View Log | RSS feed

  1. /*    Copyright (C) 1998-2003 XIAO, Gang of Universite de Nice - Sophia Antipolis
  2.  *
  3.  *  This program is free software; you can redistribute it and/or modify
  4.  *  it under the terms of the GNU General Public License as published by
  5.  *  the Free Software Foundation; either version 2 of the License, or
  6.  *  (at your option) any later version.
  7.  *
  8.  *  This program is distributed in the hope that it will be useful,
  9.  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
  10.  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  11.  *  GNU General Public License for more details.
  12.  *
  13.  *  You should have received a copy of the GNU General Public License
  14.  *  along with this program; if not, write to the Free Software
  15.  *  Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  16.  */
  17.  
  18. /* Versatile translation according to a dictionary */
  19.  
  20. #include "symtext.h"
  21.  
  22. char inpbuf[MAX_LINELEN+1], troutbuf[2*MAX_LINELEN+2];
  23. struct entry entry[MAX_DICENTRIES];
  24. int entrycount=0;
  25.  
  26. struct dic dic[MAX_DICS];
  27. int diccnt;
  28. int transdic, macrodic;
  29.  
  30. int compare(struct entry *e, const char *s2)
  31. {
  32.     int k;
  33.     k=strncmp((char*)e->original, (char*)s2, e->olen);
  34.     if(k==0 && isalnum(*(s2+e->olen))) return -1;
  35.     else return k;
  36. }
  37.  
  38.         /* searches a list. Returns index if found, -1 if nomatch.
  39.          * Uses binary search, list must be sorted. */
  40. int search_dic(struct entry *list, int items, size_t item_size, const char *str)
  41. {
  42.     int i1,i2,j,k,t,t1;
  43.     unsigned char c;
  44.  
  45.     if(items<=0) return -1;
  46.     j=0; c=str[0];
  47.     k=list[0].original[0]-c; if(k==0) k=compare(list,str);
  48.     if(k==0) goto more; if(k>0) return -1;
  49.     j=items-1; k=list[j].original[0]-c; if(k==0) k=compare(list+j,str);
  50.     if(k==0) return j;
  51.     if(k>0) for(i1=0,i2=j;i2>i1+1;) {
  52.         j=i1+(i2-i1)/2;
  53.         k=list[j].original[0]-c; if(k==0) k=compare(list+j,str);
  54.         if(k==0) goto more;
  55.         if(k>0) {i2=j; continue;}
  56.         if(k<0) {i1=j; continue;}
  57.     }
  58.     if(k>0) {j--;k=compare(list+j,str);}
  59.     more:
  60.     if((t=list[j].earlier)<0) {
  61.         if(k==0) return j; else return -1;
  62.     }
  63.     if(compare(entry+t,str)!=0) return -1;
  64.     for(j=t1=t,k=0;j<items+(list-entry) && entry[j].earlier==t1 && (k=compare(entry+j,str))<=0; j++)
  65.       if(k==0) t=j;
  66.     return t-(list-entry);
  67. }
  68.  
  69. /* Prepare dictionary */
  70. struct dic *prepare_dic(char *fname)
  71. {
  72.     int i,l;
  73.     struct dic *thisdic;
  74.     FILE *dicf;
  75.     char *p1, *p2, *pp;
  76.     char tbuf[256], buf[MAX_LINELEN+1];
  77.     long int flen;
  78.  
  79.     if(diccnt>=MAX_DICS) sym_error("too_many_dictionaries");
  80.     thisdic=dic+diccnt; diccnt++;
  81.     thisdic->len=0;
  82.     thisdic->start=entrycount;
  83.     snprintf(thisdic->name,sizeof(thisdic->name),"%s",fname);
  84.     dicf=fopen(mkfname(NULL,"%s/%s",styledir,fname),"r"); if(dicf==NULL) return NULL;
  85.     fseek(dicf,0,SEEK_END);flen=ftell(dicf); fseek(dicf,0,SEEK_SET);
  86.     if(flen>=MAX_DICSIZE) return NULL;
  87.     thisdic->buf=xmalloc(flen+16);flen=fread(thisdic->buf,1,flen,dicf);
  88.     fclose(dicf);
  89.     if(flen>0 && flen<MAX_DICSIZE) thisdic->buf[flen]=0;
  90.     else return NULL;
  91.     for(i=entrycount,p1=thisdic->buf;p1!=NULL && *p1!=0 && i<MAX_DICENTRIES;p1=p2) {
  92.         p2=strchr(p1+1,'\n'); if(p2>p1) *p2++=0;
  93.         pp=strchr(p1,':'); if(pp==NULL) continue;
  94.         *pp++=0;
  95.         strip_trailing_spaces(p1); strip_trailing_spaces(pp);
  96.         singlespace(p1);
  97.         p1=find_word_start(p1); pp=find_word_start(pp);
  98.         if(*p1==0) continue;
  99.         if(i>entrycount && compare(entry+i-1,p1)>0)
  100.           sym_error("unsorted_dictionary %s: %s > %s.\n",
  101.                 fname,entry[i-1].original,p1);
  102.         if(i>entrycount && strcmp((char*)entry[i-1].original,p1)==0)
  103.           sym_error("duplication_in_dictionary %s: %s.\n",
  104.                 fname,p1);
  105.         entry[i].original=(unsigned char*)p1;
  106.         entry[i].replace=(unsigned char*)pp;
  107.         entry[i].olen=l=strlen(p1); entry[i].earlier=-1;
  108.         if(i>0) {
  109.             int l1,l2;
  110.             l1=entry[i-1].earlier; if(l1>=0) l2=entry[l1].olen;
  111.             else {l2=entry[i-1].olen;l1=i-1;}
  112.             if(l>l2 && isspace(p1[l2])
  113.                && strncmp((char*)entry[l1].original,p1,l2)==0)
  114.               entry[i].earlier=entry[i-1].earlier=l1;
  115.         }
  116.         i++;
  117.     }
  118.     thisdic->len=i-entrycount;
  119.     pp=strrchr(fname,'/'); if(pp==NULL) pp=fname;
  120.     snprintf(tbuf,sizeof(tbuf),"unknown_%s",pp);
  121.     _getdef(defbuf,tbuf,buf);
  122.     p1=find_word_start(buf); *find_word_end(p1)=0;
  123.     for(pp=p1; *pp; pp++) *pp=tolower(*pp);
  124.     thisdic->unknown_type=unk_delete;
  125.     if(strcmp(p1,"leave")==0) thisdic->unknown_type=unk_leave;
  126.     else if(strcmp(p1,"delete")!=0) {
  127.         thisdic->unknown_type=unk_replace;
  128.         snprintf(thisdic->unknown,sizeof(thisdic->unknown),"%s",p1);
  129.     }
  130.     entrycount=i;
  131.     if(debug) fprintf(stderr,"Dictionary %d: %s, %d entries.\n",
  132.                       diccnt,fname,thisdic->len);
  133.     return thisdic;
  134. }
  135.  
  136. /* make the translation. */
  137. void _translate(char *p, int i)
  138. {
  139.     char *p1, *p2, *pp;
  140.     int t;
  141.  
  142.     if(i<0 || i>=diccnt) return;
  143.     if(dic[i].len<=0) return;
  144.     snprintf(troutbuf,sizeof(troutbuf),"%s",p);
  145.     for(p1=find_word_start(troutbuf);
  146.         p1!=NULL && p1-troutbuf<MAX_LINELEN && *p1!=0;
  147.         p1=p2) {
  148.         p2=find_word_end(p1);
  149.         for(pp=p1;pp<p2 && (isalnum(*pp) || strchr("_",*pp)!=NULL);pp++);
  150.         p2=find_word_start(p2);
  151.         if(pp==p1 || (*pp!=0 && strchr(" ,.?!",*pp)==NULL)) continue;
  152.         t=search_dic(entry+dic[i].start,dic[i].len,sizeof(entry[0]),p1);
  153.         if(t<0) {
  154.             switch(dic[i].unknown_type) {
  155.                 case unk_leave: break;
  156.                 case unk_delete: {
  157.                     ovlstrcpy(p1,find_word_start(pp)); p2=p1;
  158.                     break;
  159.                 }
  160.                 case unk_replace: {
  161.                     string_modify(troutbuf,p1,pp,dic[i].unknown);
  162.                     p2=find_word_start(p1+strlen(dic[i].unknown));
  163.                 }
  164.             }
  165.             continue;
  166.         }
  167.         t+=dic[i].start;
  168.         string_modify(troutbuf,p1,p1+strlen((char*)entry[t].original),
  169.                       (char*)entry[t].replace);
  170.         p2=find_word_start(p1+strlen((char*)entry[t].replace));
  171.     }
  172.     snprintf(p,MAX_LINELEN,"%s",troutbuf);
  173. }
  174.  
  175.         /* make translation using file name */
  176. void translate(char *p, char *dicname)
  177. {
  178.     int i;
  179.     for(i=0;i<diccnt && strcmp(dicname,dic[i].name)!=0;i++);
  180.     if(i<diccnt) _translate(p,i);
  181. }
  182.  
  183.         /* Returns dictionary index, or -1 if not found */
  184. int getdic(char *dicname)
  185. {
  186.     int i;
  187.     char *p1, *p2, buf[MAX_LINELEN+1];
  188.     for(i=0;i<diccnt && strcmp(dicname,dic[i].name)!=0;i++);
  189.     if(i<diccnt) return i;
  190.     _getdef(defbuf,"dictionaries",buf);
  191.     p1=wordchr(buf,dicname); if(p1==NULL) return -1;
  192.     for(p2=p1; myisalnum(*p2) || *p2=='.'; p2++);
  193.     if(p2-p1 >= MAX_NAMELEN) return -1;
  194.     *p2=0; i=diccnt;
  195.     prepare_dic(dicname); return i;
  196. }
  197.  
  198.