Subversion Repositories wimsdev

Rev

Rev 10 | Rev 3808 | Go to most recent revision | Details | Compare with Previous | Last modification | View Log | RSS feed

Rev Author Line No. Line
10 reyssat 1
/*    Copyright (C) 1998-2003 XIAO, Gang of Universite de Nice - Sophia Antipolis
2
 *
3
 *  This program is free software; you can redistribute it and/or modify
4
 *  it under the terms of the GNU General Public License as published by
5
 *  the Free Software Foundation; either version 2 of the License, or
6
 *  (at your option) any later version.
7
 *
8
 *  This program is distributed in the hope that it will be useful,
9
 *  but WITHOUT ANY WARRANTY; without even the implied warranty of
10
 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
11
 *  GNU General Public License for more details.
12
 *
13
 *  You should have received a copy of the GNU General Public License
14
 *  along with this program; if not, write to the Free Software
15
 *  Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
16
 */
17
 
18
        /* Versatile translation according to a dictionary */
19
 
20
char inpbuf[MAX_LINELEN+1], troutbuf[2*MAX_LINELEN+2];
21
struct entry {
22
    unsigned char *original, *replace;
23
    int olen,earlier;
24
} entry[MAX_DICENTRIES];
25
int entrycount=0;
26
 
27
struct dic {
28
    char name[MAX_FNAME+1];
29
    char unknown[256];
30
    char *buf;
31
    int unknown_type;
32
    int start;
33
    int len;
34
} dic[MAX_DICS];
35
int diccnt;
36
int transdic, macrodic;
37
 
38
enum {
39
    unk_delete, unk_leave, unk_replace
40
};
41
 
42
int compare(struct entry *e, const char *s2)
43
{
44
    int k;
45
    k=strncmp(e->original,s2,e->olen);
46
    if(k==0 && isalnum(*(s2+e->olen))) return -1;
47
    else return k;
48
}
49
 
50
        /* searches a list. Returns index if found, -1 if nomatch.
51
         * Uses binary search, list must be sorted. */
52
int search_dic(struct entry *list, int items, size_t item_size, const char *str)
53
{
54
    int i1,i2,j,k,t,t1;
55
    unsigned char c;
56
 
57
    if(items<=0) return -1;
58
    j=0; c=str[0];
59
    k=list[0].original[0]-c; if(k==0) k=compare(list,str);
60
    if(k==0) goto more; if(k>0) return -1;
61
    j=items-1; k=list[j].original[0]-c; if(k==0) k=compare(list+j,str);
62
    if(k==0) return j;
63
    if(k>0) for(i1=0,i2=j;i2>i1+1;) {
64
        j=i1+(i2-i1)/2;
65
        k=list[j].original[0]-c; if(k==0) k=compare(list+j,str);
66
        if(k==0) goto more;
67
        if(k>0) {i2=j; continue;}
68
        if(k<0) {i1=j; continue;}      
69
    }
70
    if(k>0) {j--;k=compare(list+j,str);}
71
    more:
72
    if((t=list[j].earlier)<0) {
73
        if(k==0) return j; else return -1;
74
    }
75
    if(compare(entry+t,str)!=0) return -1;
76
    for(j=t1=t,k=0;j<items+(list-entry) && entry[j].earlier==t1 && (k=compare(entry+j,str))<=0; j++)
77
      if(k==0) t=j;
78
    return t-(list-entry);
79
}
80
 
81
#include "suffix.c"
82
 
83
        /* Prepare dictionary */
84
struct dic *prepare_dic(char *fname)
85
{
86
    int i,l;
87
    struct dic *thisdic;
88
    FILE *dicf;
89
    char *p1, *p2, *pp;
90
    char tbuf[256], buf[MAX_LINELEN+1];
91
    long int flen;
92
 
93
    if(diccnt>=MAX_DICS) error("too_many_dictionaries");
94
    thisdic=dic+diccnt; diccnt++;
95
    thisdic->len=0;
96
    thisdic->start=entrycount;
97
    snprintf(thisdic->name,sizeof(thisdic->name),"%s",fname);
98
    dicf=fopen(mkfname(NULL,"%s/%s",styledir,fname),"r"); if(dicf==NULL) return NULL;
99
    fseek(dicf,0,SEEK_END);flen=ftell(dicf); fseek(dicf,0,SEEK_SET);
100
    if(flen>=MAX_DICSIZE) return NULL;
101
    thisdic->buf=xmalloc(flen+16);flen=fread(thisdic->buf,1,flen,dicf);
102
    fclose(dicf);
103
    if(flen>0 && flen<MAX_DICSIZE) thisdic->buf[flen]=0;
104
    else return NULL;
105
    for(i=entrycount,p1=thisdic->buf;p1!=NULL && *p1!=0 && i<MAX_DICENTRIES;p1=p2) {
106
        p2=strchr(p1+1,'\n'); if(p2>p1) *p2++=0;
107
        pp=strchr(p1,':'); if(pp==NULL) continue;
108
        *pp++=0;
109
        strip_trailing_spaces(p1); strip_trailing_spaces(pp);
110
        singlespace(p1);
111
        p1=find_word_start(p1); pp=find_word_start(pp);
112
        if(*p1==0) continue;
113
        if(i>entrycount && compare(entry+i-1,p1)>0)
114
          error("unsorted_dictionary %s: %s > %s.\n",
115
                fname,entry[i-1].original,p1);
116
        if(i>entrycount && strcmp(entry[i-1].original,p1)==0)
117
          error("duplication_in_dictionary %s: %s.\n",
118
                fname,p1);
119
        entry[i].original=p1; entry[i].replace=pp;
120
        entry[i].olen=l=strlen(p1); entry[i].earlier=-1;
121
        if(i>0) {
122
            int l1,l2;
123
            l1=entry[i-1].earlier; if(l1>=0) l2=entry[l1].olen;
124
            else {l2=entry[i-1].olen;l1=i-1;}
125
            if(l>l2 && isspace(p1[l2])
126
               && strncmp(entry[l1].original,p1,l2)==0)
127
              entry[i].earlier=entry[i-1].earlier=l1;
128
        }
129
        i++;
130
    }
131
    thisdic->len=i-entrycount;
132
    pp=strrchr("fname",'/'); if(pp==NULL) pp=fname;
133
    snprintf(tbuf,sizeof(tbuf),"unknown_%s",pp);
134
    _getdef(defbuf,tbuf,buf);
135
    p1=find_word_start(buf); *find_word_end(p1)=0;
136
    for(pp=p1; *pp; pp++) *pp=tolower(*pp);
137
    thisdic->unknown_type=unk_delete;
138
    if(strcmp(p1,"leave")==0) thisdic->unknown_type=unk_leave;
139
    else if(strcmp(p1,"delete")!=0) {
140
        thisdic->unknown_type=unk_replace;
141
        snprintf(thisdic->unknown,sizeof(thisdic->unknown),"%s",p1);
142
    }
143
    entrycount=i;
144
    if(debug) fprintf(stderr,"Dictionary %d: %s, %d entries.\n",
145
                      diccnt,fname,thisdic->len);
146
    return thisdic;
147
}
148
 
149
        /* make the translation. */
150
void _translate(char *p, int i)
151
{
152
    char *p1, *p2, *pp;
153
    int t;
154
 
155
    if(i<0 || i>=diccnt) return;
156
    if(dic[i].len<=0) return;
157
    snprintf(troutbuf,sizeof(troutbuf),"%s",p);
158
    for(p1=find_word_start(troutbuf);
159
        p1!=NULL && p1-troutbuf<MAX_LINELEN && *p1!=0;
160
        p1=p2) {
161
        p2=find_word_end(p1);
162
        for(pp=p1;pp<p2 && (isalnum(*pp) || strchr("_",*pp)!=NULL);pp++);
163
        p2=find_word_start(p2);
164
        if(pp==p1 || (*pp!=0 && strchr(" ,.?!",*pp)==NULL)) continue;
165
        t=search_dic(entry+dic[i].start,dic[i].len,sizeof(entry[0]),p1);
166
        if(t<0) {
167
            switch(dic[i].unknown_type) {
168
                case unk_leave: break;
169
                case unk_delete: {
3718 reyssat 170
                    ovlstrcpy(p1,find_word_start(pp)); p2=p1;
10 reyssat 171
                    break;
172
                }
173
                case unk_replace: {
174
                    string_modify(troutbuf,p1,pp,dic[i].unknown);
175
                    p2=find_word_start(p1+strlen(dic[i].unknown));
176
                }
177
            }
178
            continue;
179
        }
180
        t+=dic[i].start;
181
        string_modify(troutbuf,p1,p1+strlen(entry[t].original),
182
                      entry[t].replace);
183
        p2=find_word_start(p1+strlen(entry[t].replace));
184
    }
185
    snprintf(p,MAX_LINELEN,"%s",troutbuf);
186
}
187
 
188
        /* make translation using file name */
189
void translate(char *p, char *dicname)
190
{
191
    int i;
192
    for(i=0;i<diccnt && strcmp(dicname,dic[i].name)!=0;i++);
193
    if(i<diccnt) _translate(p,i);
194
}
195
 
196
        /* Returns dictionary index, or -1 if not found */
197
int getdic(char *dicname)
198
{
199
    int i;
200
    char *p1, *p2, buf[MAX_LINELEN+1];
201
    for(i=0;i<diccnt && strcmp(dicname,dic[i].name)!=0;i++);
202
    if(i<diccnt) return i;
203
    _getdef(defbuf,"dictionaries",buf);
204
    p1=wordchr(buf,dicname); if(p1==NULL) return -1;
205
    for(p2=p1; myisalnum(*p2) || *p2=='.'; p2++);
206
    if(p2-p1 >= MAX_NAMELEN) return -1;
207
    *p2=0; i=diccnt;
208
    prepare_dic(dicname); return i;
209
}
210