Subversion Repositories wimsdev

Rev

Rev 3718 | Rev 8161 | Go to most recent revision | Details | Compare with Previous | Last modification | View Log | RSS feed

Rev Author Line No. Line
10 reyssat 1
/*    Copyright (C) 1998-2003 XIAO, Gang of Universite de Nice - Sophia Antipolis
2
 *
3
 *  This program is free software; you can redistribute it and/or modify
4
 *  it under the terms of the GNU General Public License as published by
5
 *  the Free Software Foundation; either version 2 of the License, or
6
 *  (at your option) any later version.
7
 *
8
 *  This program is distributed in the hope that it will be useful,
9
 *  but WITHOUT ANY WARRANTY; without even the implied warranty of
10
 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
11
 *  GNU General Public License for more details.
12
 *
13
 *  You should have received a copy of the GNU General Public License
14
 *  along with this program; if not, write to the Free Software
15
 *  Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
16
 */
17
 
18
        /* Versatile translation according to a dictionary */
19
 
20
char inpbuf[MAX_LINELEN+1], troutbuf[2*MAX_LINELEN+2];
21
struct entry {
22
    unsigned char *original, *replace;
23
    int olen,earlier;
24
} entry[MAX_DICENTRIES];
25
int entrycount=0;
26
 
27
struct dic {
28
    char name[MAX_FNAME+1];
29
    char unknown[256];
30
    char *buf;
31
    int unknown_type;
32
    int start;
33
    int len;
34
} dic[MAX_DICS];
35
int diccnt;
36
int transdic, macrodic;
37
 
38
enum {
39
    unk_delete, unk_leave, unk_replace
40
};
41
 
42
int compare(struct entry *e, const char *s2)
43
{
44
    int k;
3808 kbelabas 45
    k=strncmp((char*)e->original, (char*)s2, e->olen);
10 reyssat 46
    if(k==0 && isalnum(*(s2+e->olen))) return -1;
47
    else return k;
48
}
49
 
50
        /* searches a list. Returns index if found, -1 if nomatch.
51
         * Uses binary search, list must be sorted. */
52
int search_dic(struct entry *list, int items, size_t item_size, const char *str)
53
{
54
    int i1,i2,j,k,t,t1;
55
    unsigned char c;
56
 
57
    if(items<=0) return -1;
58
    j=0; c=str[0];
59
    k=list[0].original[0]-c; if(k==0) k=compare(list,str);
60
    if(k==0) goto more; if(k>0) return -1;
61
    j=items-1; k=list[j].original[0]-c; if(k==0) k=compare(list+j,str);
62
    if(k==0) return j;
63
    if(k>0) for(i1=0,i2=j;i2>i1+1;) {
64
        j=i1+(i2-i1)/2;
65
        k=list[j].original[0]-c; if(k==0) k=compare(list+j,str);
66
        if(k==0) goto more;
67
        if(k>0) {i2=j; continue;}
68
        if(k<0) {i1=j; continue;}      
69
    }
70
    if(k>0) {j--;k=compare(list+j,str);}
71
    more:
72
    if((t=list[j].earlier)<0) {
73
        if(k==0) return j; else return -1;
74
    }
75
    if(compare(entry+t,str)!=0) return -1;
76
    for(j=t1=t,k=0;j<items+(list-entry) && entry[j].earlier==t1 && (k=compare(entry+j,str))<=0; j++)
77
      if(k==0) t=j;
78
    return t-(list-entry);
79
}
80
 
81
#include "suffix.c"
82
 
83
        /* Prepare dictionary */
84
struct dic *prepare_dic(char *fname)
85
{
86
    int i,l;
87
    struct dic *thisdic;
88
    FILE *dicf;
89
    char *p1, *p2, *pp;
90
    char tbuf[256], buf[MAX_LINELEN+1];
91
    long int flen;
92
 
93
    if(diccnt>=MAX_DICS) error("too_many_dictionaries");
94
    thisdic=dic+diccnt; diccnt++;
95
    thisdic->len=0;
96
    thisdic->start=entrycount;
97
    snprintf(thisdic->name,sizeof(thisdic->name),"%s",fname);
98
    dicf=fopen(mkfname(NULL,"%s/%s",styledir,fname),"r"); if(dicf==NULL) return NULL;
99
    fseek(dicf,0,SEEK_END);flen=ftell(dicf); fseek(dicf,0,SEEK_SET);
100
    if(flen>=MAX_DICSIZE) return NULL;
101
    thisdic->buf=xmalloc(flen+16);flen=fread(thisdic->buf,1,flen,dicf);
102
    fclose(dicf);
103
    if(flen>0 && flen<MAX_DICSIZE) thisdic->buf[flen]=0;
104
    else return NULL;
105
    for(i=entrycount,p1=thisdic->buf;p1!=NULL && *p1!=0 && i<MAX_DICENTRIES;p1=p2) {
106
        p2=strchr(p1+1,'\n'); if(p2>p1) *p2++=0;
107
        pp=strchr(p1,':'); if(pp==NULL) continue;
108
        *pp++=0;
109
        strip_trailing_spaces(p1); strip_trailing_spaces(pp);
110
        singlespace(p1);
111
        p1=find_word_start(p1); pp=find_word_start(pp);
112
        if(*p1==0) continue;
113
        if(i>entrycount && compare(entry+i-1,p1)>0)
114
          error("unsorted_dictionary %s: %s > %s.\n",
115
                fname,entry[i-1].original,p1);
3808 kbelabas 116
        if(i>entrycount && strcmp((char*)entry[i-1].original,p1)==0)
10 reyssat 117
          error("duplication_in_dictionary %s: %s.\n",
118
                fname,p1);
3808 kbelabas 119
        entry[i].original=(unsigned char*)p1;
120
        entry[i].replace=(unsigned char*)pp;
10 reyssat 121
        entry[i].olen=l=strlen(p1); entry[i].earlier=-1;
122
        if(i>0) {
123
            int l1,l2;
124
            l1=entry[i-1].earlier; if(l1>=0) l2=entry[l1].olen;
125
            else {l2=entry[i-1].olen;l1=i-1;}
126
            if(l>l2 && isspace(p1[l2])
3808 kbelabas 127
               && strncmp((char*)entry[l1].original,p1,l2)==0)
10 reyssat 128
              entry[i].earlier=entry[i-1].earlier=l1;
129
        }
130
        i++;
131
    }
132
    thisdic->len=i-entrycount;
3808 kbelabas 133
    pp=strrchr(fname,'/'); if(pp==NULL) pp=fname;
10 reyssat 134
    snprintf(tbuf,sizeof(tbuf),"unknown_%s",pp);
135
    _getdef(defbuf,tbuf,buf);
136
    p1=find_word_start(buf); *find_word_end(p1)=0;
137
    for(pp=p1; *pp; pp++) *pp=tolower(*pp);
138
    thisdic->unknown_type=unk_delete;
139
    if(strcmp(p1,"leave")==0) thisdic->unknown_type=unk_leave;
140
    else if(strcmp(p1,"delete")!=0) {
141
        thisdic->unknown_type=unk_replace;
142
        snprintf(thisdic->unknown,sizeof(thisdic->unknown),"%s",p1);
143
    }
144
    entrycount=i;
145
    if(debug) fprintf(stderr,"Dictionary %d: %s, %d entries.\n",
146
                      diccnt,fname,thisdic->len);
147
    return thisdic;
148
}
149
 
150
        /* make the translation. */
151
void _translate(char *p, int i)
152
{
153
    char *p1, *p2, *pp;
154
    int t;
155
 
156
    if(i<0 || i>=diccnt) return;
157
    if(dic[i].len<=0) return;
158
    snprintf(troutbuf,sizeof(troutbuf),"%s",p);
159
    for(p1=find_word_start(troutbuf);
160
        p1!=NULL && p1-troutbuf<MAX_LINELEN && *p1!=0;
161
        p1=p2) {
162
        p2=find_word_end(p1);
163
        for(pp=p1;pp<p2 && (isalnum(*pp) || strchr("_",*pp)!=NULL);pp++);
164
        p2=find_word_start(p2);
165
        if(pp==p1 || (*pp!=0 && strchr(" ,.?!",*pp)==NULL)) continue;
166
        t=search_dic(entry+dic[i].start,dic[i].len,sizeof(entry[0]),p1);
167
        if(t<0) {
168
            switch(dic[i].unknown_type) {
169
                case unk_leave: break;
170
                case unk_delete: {
3718 reyssat 171
                    ovlstrcpy(p1,find_word_start(pp)); p2=p1;
10 reyssat 172
                    break;
173
                }
174
                case unk_replace: {
175
                    string_modify(troutbuf,p1,pp,dic[i].unknown);
176
                    p2=find_word_start(p1+strlen(dic[i].unknown));
177
                }
178
            }
179
            continue;
180
        }
181
        t+=dic[i].start;
3808 kbelabas 182
        string_modify(troutbuf,p1,p1+strlen((char*)entry[t].original),
183
                      (char*)entry[t].replace);
184
        p2=find_word_start(p1+strlen((char*)entry[t].replace));
10 reyssat 185
    }
186
    snprintf(p,MAX_LINELEN,"%s",troutbuf);
187
}
188
 
189
        /* make translation using file name */
190
void translate(char *p, char *dicname)
191
{
192
    int i;
193
    for(i=0;i<diccnt && strcmp(dicname,dic[i].name)!=0;i++);
194
    if(i<diccnt) _translate(p,i);
195
}
196
 
197
        /* Returns dictionary index, or -1 if not found */
198
int getdic(char *dicname)
199
{
200
    int i;
201
    char *p1, *p2, buf[MAX_LINELEN+1];
202
    for(i=0;i<diccnt && strcmp(dicname,dic[i].name)!=0;i++);
203
    if(i<diccnt) return i;
204
    _getdef(defbuf,"dictionaries",buf);
205
    p1=wordchr(buf,dicname); if(p1==NULL) return -1;
206
    for(p2=p1; myisalnum(*p2) || *p2=='.'; p2++);
207
    if(p2-p1 >= MAX_NAMELEN) return -1;
208
    *p2=0; i=diccnt;
209
    prepare_dic(dicname); return i;
210
}
211