Subversion Repositories wimsdev

Rev

Rev 8161 | Details | Compare with Previous | Last modification | View Log | RSS feed

Rev Author Line No. Line
10 reyssat 1
/*    Copyright (C) 1998-2003 XIAO, Gang of Universite de Nice - Sophia Antipolis
2
 *
3
 *  This program is free software; you can redistribute it and/or modify
4
 *  it under the terms of the GNU General Public License as published by
5
 *  the Free Software Foundation; either version 2 of the License, or
6
 *  (at your option) any later version.
7
 *
8
 *  This program is distributed in the hope that it will be useful,
9
 *  but WITHOUT ANY WARRANTY; without even the implied warranty of
10
 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
11
 *  GNU General Public License for more details.
12
 *
13
 *  You should have received a copy of the GNU General Public License
14
 *  along with this program; if not, write to the Free Software
15
 *  Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
16
 */
17
 
8161 bpr 18
/* Versatile translation according to a dictionary */
10 reyssat 19
 
8161 bpr 20
#include "symtext.h"
21
 
10 reyssat 22
char inpbuf[MAX_LINELEN+1], troutbuf[2*MAX_LINELEN+2];
8161 bpr 23
struct entry entry[MAX_DICENTRIES];
10 reyssat 24
int entrycount=0;
25
 
8161 bpr 26
struct dic dic[MAX_DICS];
10 reyssat 27
int diccnt;
28
int transdic, macrodic;
29
 
30
int compare(struct entry *e, const char *s2)
31
{
32
    int k;
3808 kbelabas 33
    k=strncmp((char*)e->original, (char*)s2, e->olen);
10 reyssat 34
    if(k==0 && isalnum(*(s2+e->olen))) return -1;
35
    else return k;
36
}
37
 
8161 bpr 38
        /* searches a list. Returns index if found, -1 if nomatch.
10 reyssat 39
         * Uses binary search, list must be sorted. */
40
int search_dic(struct entry *list, int items, size_t item_size, const char *str)
41
{
42
    int i1,i2,j,k,t,t1;
43
    unsigned char c;
44
 
45
    if(items<=0) return -1;
46
    j=0; c=str[0];
47
    k=list[0].original[0]-c; if(k==0) k=compare(list,str);
48
    if(k==0) goto more; if(k>0) return -1;
49
    j=items-1; k=list[j].original[0]-c; if(k==0) k=compare(list+j,str);
50
    if(k==0) return j;
51
    if(k>0) for(i1=0,i2=j;i2>i1+1;) {
52
        j=i1+(i2-i1)/2;
53
        k=list[j].original[0]-c; if(k==0) k=compare(list+j,str);
54
        if(k==0) goto more;
55
        if(k>0) {i2=j; continue;}
8161 bpr 56
        if(k<0) {i1=j; continue;}
10 reyssat 57
    }
58
    if(k>0) {j--;k=compare(list+j,str);}
59
    more:
60
    if((t=list[j].earlier)<0) {
61
        if(k==0) return j; else return -1;
62
    }
63
    if(compare(entry+t,str)!=0) return -1;
8161 bpr 64
    for(j=t1=t,k=0;j<items+(list-entry) && entry[j].earlier==t1 && (k=compare(entry+j,str))<=0; j++)
10 reyssat 65
      if(k==0) t=j;
66
    return t-(list-entry);
67
}
68
 
8161 bpr 69
/* Prepare dictionary */
10 reyssat 70
struct dic *prepare_dic(char *fname)
71
{
72
    int i,l;
73
    struct dic *thisdic;
74
    FILE *dicf;
75
    char *p1, *p2, *pp;
76
    char tbuf[256], buf[MAX_LINELEN+1];
77
    long int flen;
8161 bpr 78
 
8195 bpr 79
    if(diccnt>=MAX_DICS) sym_error("too_many_dictionaries");
10 reyssat 80
    thisdic=dic+diccnt; diccnt++;
81
    thisdic->len=0;
82
    thisdic->start=entrycount;
83
    snprintf(thisdic->name,sizeof(thisdic->name),"%s",fname);
84
    dicf=fopen(mkfname(NULL,"%s/%s",styledir,fname),"r"); if(dicf==NULL) return NULL;
85
    fseek(dicf,0,SEEK_END);flen=ftell(dicf); fseek(dicf,0,SEEK_SET);
86
    if(flen>=MAX_DICSIZE) return NULL;
87
    thisdic->buf=xmalloc(flen+16);flen=fread(thisdic->buf,1,flen,dicf);
88
    fclose(dicf);
89
    if(flen>0 && flen<MAX_DICSIZE) thisdic->buf[flen]=0;
90
    else return NULL;
91
    for(i=entrycount,p1=thisdic->buf;p1!=NULL && *p1!=0 && i<MAX_DICENTRIES;p1=p2) {
92
        p2=strchr(p1+1,'\n'); if(p2>p1) *p2++=0;
93
        pp=strchr(p1,':'); if(pp==NULL) continue;
94
        *pp++=0;
95
        strip_trailing_spaces(p1); strip_trailing_spaces(pp);
96
        singlespace(p1);
97
        p1=find_word_start(p1); pp=find_word_start(pp);
98
        if(*p1==0) continue;
99
        if(i>entrycount && compare(entry+i-1,p1)>0)
8195 bpr 100
          sym_error("unsorted_dictionary %s: %s > %s.\n",
10 reyssat 101
                fname,entry[i-1].original,p1);
3808 kbelabas 102
        if(i>entrycount && strcmp((char*)entry[i-1].original,p1)==0)
8195 bpr 103
          sym_error("duplication_in_dictionary %s: %s.\n",
10 reyssat 104
                fname,p1);
3808 kbelabas 105
        entry[i].original=(unsigned char*)p1;
8161 bpr 106
        entry[i].replace=(unsigned char*)pp;
10 reyssat 107
        entry[i].olen=l=strlen(p1); entry[i].earlier=-1;
108
        if(i>0) {
109
            int l1,l2;
110
            l1=entry[i-1].earlier; if(l1>=0) l2=entry[l1].olen;
111
            else {l2=entry[i-1].olen;l1=i-1;}
112
            if(l>l2 && isspace(p1[l2])
8161 bpr 113
               && strncmp((char*)entry[l1].original,p1,l2)==0)
10 reyssat 114
              entry[i].earlier=entry[i-1].earlier=l1;
115
        }
116
        i++;
117
    }
118
    thisdic->len=i-entrycount;
3808 kbelabas 119
    pp=strrchr(fname,'/'); if(pp==NULL) pp=fname;
10 reyssat 120
    snprintf(tbuf,sizeof(tbuf),"unknown_%s",pp);
121
    _getdef(defbuf,tbuf,buf);
122
    p1=find_word_start(buf); *find_word_end(p1)=0;
123
    for(pp=p1; *pp; pp++) *pp=tolower(*pp);
124
    thisdic->unknown_type=unk_delete;
125
    if(strcmp(p1,"leave")==0) thisdic->unknown_type=unk_leave;
126
    else if(strcmp(p1,"delete")!=0) {
127
        thisdic->unknown_type=unk_replace;
128
        snprintf(thisdic->unknown,sizeof(thisdic->unknown),"%s",p1);
129
    }
130
    entrycount=i;
131
    if(debug) fprintf(stderr,"Dictionary %d: %s, %d entries.\n",
132
                      diccnt,fname,thisdic->len);
133
    return thisdic;
134
}
135
 
8161 bpr 136
/* make the translation. */
10 reyssat 137
void _translate(char *p, int i)
138
{
139
    char *p1, *p2, *pp;
140
    int t;
141
 
142
    if(i<0 || i>=diccnt) return;
143
    if(dic[i].len<=0) return;
144
    snprintf(troutbuf,sizeof(troutbuf),"%s",p);
145
    for(p1=find_word_start(troutbuf);
146
        p1!=NULL && p1-troutbuf<MAX_LINELEN && *p1!=0;
147
        p1=p2) {
148
        p2=find_word_end(p1);
149
        for(pp=p1;pp<p2 && (isalnum(*pp) || strchr("_",*pp)!=NULL);pp++);
150
        p2=find_word_start(p2);
151
        if(pp==p1 || (*pp!=0 && strchr(" ,.?!",*pp)==NULL)) continue;
152
        t=search_dic(entry+dic[i].start,dic[i].len,sizeof(entry[0]),p1);
153
        if(t<0) {
154
            switch(dic[i].unknown_type) {
155
                case unk_leave: break;
156
                case unk_delete: {
3718 reyssat 157
                    ovlstrcpy(p1,find_word_start(pp)); p2=p1;
10 reyssat 158
                    break;
159
                }
160
                case unk_replace: {
161
                    string_modify(troutbuf,p1,pp,dic[i].unknown);
162
                    p2=find_word_start(p1+strlen(dic[i].unknown));
163
                }
164
            }
165
            continue;
166
        }
167
        t+=dic[i].start;
3808 kbelabas 168
        string_modify(troutbuf,p1,p1+strlen((char*)entry[t].original),
169
                      (char*)entry[t].replace);
170
        p2=find_word_start(p1+strlen((char*)entry[t].replace));
10 reyssat 171
    }
172
    snprintf(p,MAX_LINELEN,"%s",troutbuf);
173
}
174
 
175
        /* make translation using file name */
176
void translate(char *p, char *dicname)
177
{
178
    int i;
179
    for(i=0;i<diccnt && strcmp(dicname,dic[i].name)!=0;i++);
180
    if(i<diccnt) _translate(p,i);
181
}
182
 
183
        /* Returns dictionary index, or -1 if not found */
184
int getdic(char *dicname)
185
{
186
    int i;
187
    char *p1, *p2, buf[MAX_LINELEN+1];
188
    for(i=0;i<diccnt && strcmp(dicname,dic[i].name)!=0;i++);
189
    if(i<diccnt) return i;
190
    _getdef(defbuf,"dictionaries",buf);
191
    p1=wordchr(buf,dicname); if(p1==NULL) return -1;
192
    for(p2=p1; myisalnum(*p2) || *p2=='.'; p2++);
193
    if(p2-p1 >= MAX_NAMELEN) return -1;
194
    *p2=0; i=diccnt;
195
    prepare_dic(dicname); return i;
196
}
197