Subversion Repositories wimsdev

Rev

Rev 432 | Rev 3247 | Go to most recent revision | Details | Compare with Previous | Last modification | View Log | RSS feed

Rev Author Line No. Line
10 reyssat 1
/*    Copyright (C) 1998-2003 XIAO, Gang of Universite de Nice - Sophia Antipolis
2
 *
3
 *  This program is free software; you can redistribute it and/or modify
4
 *  it under the terms of the GNU General Public License as published by
5
 *  the Free Software Foundation; either version 2 of the License, or
6
 *  (at your option) any later version.
7
 *
8
 *  This program is distributed in the hope that it will be useful,
9
 *  but WITHOUT ANY WARRANTY; without even the implied warranty of
10
 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
11
 *  GNU General Public License for more details.
12
 *
13
 *  You should have received a copy of the GNU General Public License
14
 *  along with this program; if not, write to the Free Software
15
 *  Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
16
 */
17
 
18
        /* This is an internal program,
19
         * used to index modules for search engine. */
20
 
21
#include "../wims.h"
22
 
23
#define MAX_LANGS       MAX_LANGUAGES
24
#define MAX_MODULES     65536
25
char *moduledir=        "public_html/modules";
26
char *sheetdir=         "public_html/bases/sheet";
27
char *dicdir=           "public_html/bases";
28
char *outdir=           "public_html/bases/site2";
29
char *maindic=          "sys/words";
30
char *groupdic=         "sys/wgrp/wgrp";
31
char *suffixdic=        "sys/suffix";
32
char *ignoredic=        "sys/indignore";
33
char *conffile=         "log/wims.conf";
34
char *mlistbase=        "list";
35
 
36
char lang[MAX_LANGS][4]={
1792 bpr 37
    "en","fr","cn","es","it","nl","si","ca","pt"
10 reyssat 38
};
39
#define DEFAULT_LANGCNT 6
40
char allang[MAX_LANGS][4]={
1792 bpr 41
    "en","fr","cn","es","it","nl","tw","de","si","ca","pt"
10 reyssat 42
};
43
#define allangcnt 8
44
char ignore[MAX_LANGS][MAX_LINELEN+1];
45
char mlistfile[MAX_LANGS][256];
46
int langcnt;
47
FILE *langf, *titf, *descf, *weightf, *robotf, *indf, *listf, *addrf, *serialf, *authorf, *versionf;
48
 
49
struct cat {
50
    char *name;
51
    char typ;
52
} cat[]={
53
        {"all_types",   'A'},
54
        {"exercise",    'X'},
55
        {"oef",         'O'},
56
        {"tool",        'T'},
57
        {"recreation",  'R'},
58
        {"reference",   'Y'},
59
        {"document",    'D'},
60
        {"popup",       'P'},
61
        {"datamodule",  'M'}
62
};
63
#define catno (sizeof(cat)/sizeof(cat[0]))
64
 
65
struct mod {
66
    char *name;
67
    unsigned char langs[MAX_LANGS];
68
    int counts[MAX_LANGS];
69
    int  langcnt;
70
} mod[MAX_MODULES];
71
int modcnt;
72
 
73
char *mlist;
74
 
75
void *xmalloc(size_t n)
76
{
77
    void *p;
78
    p=malloc(n);
79
    if(p==NULL) {
80
        printf("Malloc failure.\n");
81
        exit(1);
82
    }
83
    return p;
84
}
85
 
86
char *acctab="çéèêëúùûüáàâäãóòôöõíìïîñýÇÉÈÊËÚÙÛÜÁÀÂÃÄÓÒÔÖÕÍÌÏÎÑÝ",
87
     *deatab="ceeeeuuuuaaaaaoooooiiiinyCEEEEUUUUAAAAAOOOOOIIIINY";
88
 
89
        /* fold accented letters to unaccented */
90
void deaccent(char *p)
91
{
92
    signed char *sp;
93
    char *v;
94
    for(sp=p;*sp;sp++) {
95
        if(*sp<0 && (v=strchr(acctab,*sp))!=NULL)
96
          *sp=*(deatab+(v-acctab));
97
        if(!isalnum(*sp) && strchr(",.&$+*",*sp)==0) *sp=' ';
98
        else *sp=tolower(*sp);
99
    }
100
}
101
 
102
        /* translate everything non-alphanumeric into space */
103
void towords(char *p)
104
{
105
    char *pp;
106
    for(pp=p;*pp;pp++) if(!isalnum(*pp) && strchr("&$+*",*pp)==0) *pp=' ';
107
}
108
 
109
        /* Points to the end of the word */
110
char *find_word_end(char *p)
111
{
112
    int i;
113
    for(i=0;!isspace(*p) && *p!=0 && i<MAX_LINELEN; p++,i++);
114
    return p;
115
}
116
 
117
        /* Strips leading spaces */
118
char *find_word_start(char *p)
119
{
120
    int i;
121
    for(i=0; isspace(*p) && i<MAX_LINELEN; p++,i++);
122
    return p;
123
}
124
 
125
        /* Find first occurrence of word */
126
char *wordchr(char *p, char *w)
127
{
128
    char *r;
129
 
130
    for(r=strstr(p,w);r!=NULL &&
131
        ( (r>p && !isspace(*(r-1))) || (!isspace(*(r+strlen(w))) && *(r+strlen(w))!=0) );
132
        r=strstr(r+1,w));
133
    return r;
134
}
135
 
136
        /* find a variable in a string (math expression).
137
         * Returns the pointer or NULL. */
138
char *varchr(char *p, char *v)
139
{
140
    char *pp; int n=strlen(v);
141
    for(pp=strstr(p,v); pp!=NULL; pp=strstr(pp+1,v)) {
142
        if((pp==p || !isalnum(*(pp-1))) &&
143
           (!isalnum(*(pp+n)) || *(pp+n)==0)) break;
144
    }
145
    return pp;
146
}
147
 
148
        /* strip trailing spaces; return string end. */
149
char *strip_trailing_spaces(char *p)
150
{
151
    char *pp;
152
    if(*p==0) return p;
153
    for(pp=p+strlen(p)-1; pp>=p && isspace(*pp); *(pp--)=0);
154
    return pp;
155
}
156
 
157
char *find_tag_end(char *p)
158
{
159
    char *pp;
160
    pp=p; if(*pp=='<') pp++;
161
    for(; *pp && *pp!='>'; pp++) {
162
        if(*pp=='<') {
163
            pp=find_tag_end(pp)-1; continue;
164
        }
165
        if(*pp=='"') {
166
            pp=strchr(pp+1,'"');
167
            if(pp==NULL) return p+strlen(p); else continue;
168
        }
169
        if(*pp=='\'') {
170
            pp=strchr(pp+1,'\'');
171
            if(pp==NULL) return p+strlen(p); else continue;
172
        }
173
    }
174
    if(*pp=='>') pp++; return pp;
175
}
176
 
177
char *find_tag(char *p, char *tag)
178
{
179
    char *pp;
180
    int len;
181
    len=strlen(tag);
182
    for(pp=strchr(p,'<'); pp!=NULL && *pp; pp=strchr(pp+1,'<')) {
183
        if(strncasecmp(pp+1,tag,len)==0 && !isalnum(*(pp+1+len))) return pp;
184
    }
185
    return p+strlen(p);
186
}
187
 
188
        /* remove all html tags */
189
void detag(char *p)
190
{
191
    char *pp, *p2;
192
    for(pp=strchr(p,'<'); pp!=NULL; pp=strchr(pp,'<')) {
193
        p2=find_tag_end(pp);
194
        if(*p2==0) {*pp=0; return; }
195
        strcpy(pp,p2);
196
    }
197
}
198
 
199
        /* modify a string. Bufferlen must be ast least MAX_LINELEN */
200
void string_modify(char *start, char *bad_beg, char *bad_end, char *good,...)
201
{
202
    char buf[MAX_LINELEN+1];
203
    va_list vp;
204
 
205
    va_start(vp,good);
206
    vsnprintf(buf,sizeof(buf),good,vp); va_end(vp);
207
    if(strlen(start)-(bad_end-bad_beg)+strlen(buf)>=MAX_LINELEN)
208
      return;
209
    strcat(buf,bad_end);
210
    strcpy(bad_beg,buf);
211
}
212
 
213
void _getdef(char buf[], char *name, char value[])
214
{
215
    char *p1, *p2, *p3;
216
 
217
    value[0]=0;
218
    for(p1=strstr(buf,name); p1!=NULL; p1=strstr(p1+1,name)) {
219
        p2=find_word_start(p1+strlen(name));
220
        if((p1>buf && !isspace(*(p1-1))) || *p2!='=') continue;
221
        p3=p1; while(p3>buf && isspace(*(p3-1)) && *(p3-1)!='\n') p3--;
222
        if(p3>buf && *(p3-1)!='\n') continue;
223
        p2=find_word_start(p2+1);
224
        p3=strchr(p2,'\n');
225
        snprintf(value,MAX_LINELEN,"%s",p2);
226
        if(p3!=NULL && p3-p2<MAX_LINELEN) value[p3-p2]=0;
227
        strip_trailing_spaces(value);
228
        break;
229
    }
230
}
231
 
232
        /* Get variable definition from a file.
233
         * Result stored in buffer value of length MAX_LINELEN. */
234
void getdef(char *fname, char *name, char value[])
235
{
236
    FILE *f;
237
    char *buf;
238
    int l;
239
 
240
    value[0]=0;
241
    f=fopen(fname,"r"); if(f==NULL) return;
242
    fseek(f,0,SEEK_END); l=ftell(f); fseek(f,0,SEEK_SET);
243
    buf=xmalloc(l+256); l=fread(buf,1,l,f);
244
    fclose(f);
245
    if(l<=0) return; else buf[l]=0;
246
    _getdef(buf,name,value);
247
    free(buf);
248
}
249
 
250
#include "translator_.c"
251
 
252
char *mdicbuf, *gdicbuf;
253
char gentry[sizeof(entry)], mentry[sizeof(entry)];
254
int gentrycount, mentrycount;
255
 
256
        /* Preparation of data */
257
void prep(void)
258
{
259
    char buf[MAX_LINELEN+1];
260
    char *p1,*p2,*s,*old;
261
    int i,l,thislang,t;
262
    FILE *f;
263
 
264
    s=getenv("modind_outdir"); if(s!=NULL && *s!=0) outdir=s;
265
    s=getenv("modind_sheetdir"); if(s!=NULL && *s!=0) sheetdir=s;
266
    snprintf(buf,sizeof(buf),"%s/addr",outdir);
267
    addrf=fopen(buf,"w");
268
    snprintf(buf,sizeof(buf),"%s/serial",outdir);
269
    serialf=fopen(buf,"w");
270
    modcnt=langcnt=0;
271
    getdef(conffile,"site_languages",buf);
272
    for(p1=buf;*p1;p1++) if(!isalnum(*p1)) *p1=' ';
273
    for(p1=find_word_start(buf); *p1 && langcnt<MAX_LANGS; p1=find_word_start(p2)) {
274
        p2=find_word_end(p1);
275
        if(p2!=p1+2 || !isalpha(*p1) || !isalpha(*(p1+1))) continue;
276
        memmove(lang[langcnt],p1,2); lang[langcnt++][2]=0;
277
    }
278
    if(langcnt==0) {    /* default languages */
279
        langcnt=DEFAULT_LANGCNT;
280
    }
281
    s=getenv("mlist"); if(s==NULL) exit(1);
282
    l=strlen(s); if(l<0 || l>100*MAX_LINELEN) exit(1);
283
    mlist=xmalloc(l+16); strcpy(mlist,s); old="";
284
    for(i=0;i<langcnt;i++) {
285
        snprintf(buf,sizeof(buf),"%s/%s.%s",dicdir,ignoredic,lang[i]);
286
        f=fopen(buf,"r"); if(f==NULL) continue;
287
        l=fread(ignore[i],1,MAX_LINELEN,f);fclose(f);
288
        if(l<0 || l>=MAX_LINELEN) l=0;
289
        ignore[i][l]=0;
290
    }
291
    for(t=0, p1=find_word_start(mlist);
292
        *p1 && modcnt<MAX_MODULES;
293
        p1=find_word_start(p2), t++) {
294
        p2=find_word_end(p1);
295
        l=p2-p1; if(*p2) *p2++=0;
296
        fprintf(addrf,"%d:%s\n",t,p1);
297
        fprintf(serialf,"%s:%d\n",p1,t);
298
        thislang=-1;
299
        if(l>3 && p1[l-3]=='.') {
300
            for(i=0;i<langcnt;i++) if(strcasecmp(lang[i],p1+l-2)==0) break;
301
            if(i<langcnt) {p1[l-3]=0; thislang=i;}
302
            else {      /* unknown language, not referenced */
303
                continue;
304
            }
305
        }
306
        if(modcnt>0 && strcmp(old,p1)==0 && thislang>=0) {
307
            if(mod[modcnt-1].langcnt<langcnt) {
308
                mod[modcnt-1].langs[mod[modcnt-1].langcnt]=thislang;
309
                mod[modcnt-1].counts[mod[modcnt-1].langcnt]=t;
310
                (mod[modcnt-1].langcnt)++;
311
            }
312
        }
313
        else {
314
            mod[modcnt].name=old=p1;
315
            if(thislang>=0) {
316
                mod[modcnt].langs[0]=thislang;
317
                mod[modcnt].langcnt=1;
318
            }
319
            else mod[modcnt].langcnt=0;
320
            mod[modcnt].counts[0]=t;
321
            modcnt++;
322
        }
323
    }
324
    snprintf(buf,sizeof(buf),"%s/language",outdir);
325
    langf=fopen(buf,"w");
326
    snprintf(buf,sizeof(buf),"%s/title",outdir);
327
    titf=fopen(buf,"w");
328
    snprintf(buf,sizeof(buf),"%s/description",outdir);
329
    descf=fopen(buf,"w");
330
    snprintf(buf,sizeof(buf),"%s/author",outdir);
331
    authorf=fopen(buf,"w");
332
    snprintf(buf,sizeof(buf),"%s/version",outdir);
333
    versionf=fopen(buf,"w");
334
    snprintf(buf,sizeof(buf),"%s/lists/robot.phtml",outdir);
335
    robotf=fopen(buf,"w");
336
    fclose(addrf); fclose(serialf);
337
    if(!robotf || !versionf || !authorf || !descf || !titf || !descf) {
338
        fprintf(stderr,"modind: error creating output files.\n");
339
        exit(1);
340
    }
341
}
342
 
343
void sprep(void)
344
{
345
    char *p1,*p2,*s;
346
    int i,l,thislang;
347
 
348
    modcnt=0;
349
    s=getenv("slist"); if(s==NULL) return;
350
    l=strlen(s); if(l<0 || l>100*MAX_LINELEN) return;
351
    mlist=xmalloc(l+16); strcpy(mlist,s);
352
    for(p1=find_word_start(mlist); *p1 && modcnt<MAX_MODULES; p1=find_word_start(p2)) {
353
        p2=find_word_end(p1);
354
        l=p2-p1; if(*p2) *p2++=0;
355
        for(i=0;i<langcnt;i++) if(strncasecmp(lang[i],p1,2)==0) break;
356
        if(i<langcnt) thislang=i; else continue;
357
        mod[modcnt].name=p1;
358
        mod[modcnt].langs[0]=thislang;
359
        mod[modcnt].langcnt=1;
360
        modcnt++;
361
    }
362
}
363
 
364
void clean(void)
365
{
366
    fclose(langf); fclose(titf); fclose(descf); fclose(robotf);
367
    fclose(authorf); fclose(versionf);
368
}
369
 
370
char *sheetindex[]={
371
      "title", "description",
372
      "duration", "severity",
373
      "level", "domain",
374
      "keywords", "reserved1", "reserved2", "remark"
375
};
376
#define SHEETINDEX_NO (sizeof(sheetindex)/sizeof(sheetindex[0]))
377
char sindbuf[SHEETINDEX_NO][MAX_LINELEN+1];
378
enum{s_title, s_description,
379
      s_duration, s_severity,
380
      s_level, s_domain,
381
      s_keywords, s_reserved1, s_reserved2,
382
      s_remark
383
};
384
 
385
char *modindex[]={
386
      "title", "description",
387
      "author", "address", "copyright",
388
      "version", "wims_version", "language",
389
      "category", "level", "domain", "keywords",
390
      "require"
391
};
392
#define MODINDEX_NO (sizeof(modindex)/sizeof(modindex[0]))
393
char indbuf[MODINDEX_NO][MAX_LINELEN+1];
394
enum{i_title, i_description,
395
      i_author,i_address,i_copyright,
396
      i_version,i_wims_version,i_language,
397
      i_category,i_level,i_domain,i_keywords,
398
      i_require
399
};
400
 
401
char *module_special_file[]={
402
    "intro","help","about"
403
};
404
#define MODSPEC_NO (sizeof(module_special_file)/sizeof(module_special_file[0]))
405
char module_language[4];
406
 
407
        /* read and treat module's INDEX file */
408
int module_index(const char *name)
409
{
410
    char *p, fbuf[MAX_LINELEN+1], ibuf[MAX_LINELEN+1];
411
    FILE *indf;
412
    int i,l;
413
 
414
    snprintf(fbuf,sizeof(fbuf),"%s/%s/INDEX",moduledir,name);
415
    indf=fopen(fbuf,"r"); if(indf==NULL) return -1;
416
    l=fread(ibuf,1,MAX_LINELEN,indf); fclose(indf);
417
    if(l>0 && l<MAX_LINELEN) ibuf[l]=0; else return -1;
418
    for(i=0;i<MODINDEX_NO;i++) {
419
        _getdef(ibuf,modindex[i],indbuf[i]);
420
                /* compatibility precaution */
421
        if(indbuf[i][0]==':') indbuf[i][0]='.';
422
    }
423
    p=find_word_start(indbuf[i_language]);
424
    if(isalpha(*p) && isalpha(*(p+1))) {
425
        memmove(module_language,p,2); module_language[2]=0;
426
    }
427
    else strcpy(module_language,"en");
428
    return 0;
429
}
430
 
431
int sheet_index(int serial)
432
{
433
    char *p1, *p2, fbuf[MAX_LINELEN+1], ibuf[MAX_LINELEN+1];
434
    FILE *indf;
435
    int i,l;
436
 
437
    snprintf(fbuf,sizeof(fbuf),"%s/%s.def",sheetdir,mod[serial].name);
438
    indf=fopen(fbuf,"r"); if(indf==NULL) return -1;
439
    l=fread(ibuf,1,MAX_LINELEN,indf); fclose(indf);
440
    if(l>0 && l<MAX_LINELEN) ibuf[l]=0; else return -1;
441
    for(i=0;i<SHEETINDEX_NO;i++) sindbuf[i][0]=0;
442
    for(i=0,p1=find_word_start(ibuf);
443
        i<SHEETINDEX_NO-1 && *p1!=':' && *p1!=0;
444
        i++,p1=p2) {
445
        p2=strchr(p1,'\n');
446
        if(p2!=NULL) *p2++=0; else p2=p1+strlen(p1);
447
        p1=find_word_start(p1); strip_trailing_spaces(p1);
448
        snprintf(sindbuf[i],MAX_LINELEN,"%s",p1);
449
    }
450
    p2=strstr(p1,"\n:"); if(p2==NULL) p2=p1+strlen(p1);
451
    else *p2=0;
452
    p1=find_word_start(p1); strip_trailing_spaces(p1);
453
    for(p2=p1;*p2;p2++) if(*p2=='\n') *p2=' ';
454
    strcpy(sindbuf[s_remark],p1);
455
    return 0;
456
}
457
 
458
unsigned char categories[16];
459
char taken[MAX_LINELEN+1];
460
int catcnt, takenlen, tweight;
461
 
462
void appenditem(char *word, int lind, int serial, int weight, char *l)
463
{
464
    char nbuf[MAX_LINELEN+1], buf[MAX_LINELEN+1];
465
    int i, ll;
466
    char *p;
467
    FILE *f;
468
 
469
    if(!isalnum(*word) || (ll=strlen(word))<2 ||
470
       wordchr(taken,word)!=NULL ||
471
       wordchr(ignore[lind],word)!=NULL ||
472
       takenlen>=MAX_LINELEN-ll-16)
473
      return;
474
    if(ll==2 && (!isdigit(word[0]) || !isalpha(word[1]))) return;
475
    for(p=word;*p;p++) if(!isalnum(*p) && *p!=' ') return;
476
    taken[takenlen++]=' '; taken[takenlen++]=' ';
477
    strcpy(taken+takenlen,word);
478
    takenlen+=ll; tweight+=weight;
479
    snprintf(buf,sizeof(buf),"%s:%d?%d\n",word,serial,weight);
480
    for(i=0;i<catcnt;i++) {
481
        snprintf(nbuf,sizeof(nbuf),"%s/%c.%s",
482
                 outdir,categories[i],lang[lind]);
483
        f=fopen(nbuf,"a");
484
        if(f!=NULL) {fputs(buf,f); fclose(f);}
485
    }
486
}
487
 
488
void onemodule(const char *name, int serial, int lind)
489
{
490
    int i;
491
    unsigned char trlist[]={
492
        i_title,i_description,i_category,i_domain,i_keywords,
493
          i_require,i_author
494
    };
495
    #define trcnt (sizeof(trlist)/sizeof(trlist[0]))
496
    char *p1, *p2, *pp, buf[MAX_LINELEN+1], lbuf[16];
497
    FILE *f;
498
 
499
    if(module_index(name)) return;
500
    towords(indbuf[i_category]);
501
    for(i=catcnt=0;i<catno && catcnt<16;i++) {
502
        if(wordchr(indbuf[i_category],cat[i].name)!=NULL)
503
          categories[catcnt++]=cat[i].typ;
504
    }
505
    if(catcnt==0) return;
506
    if(categories[0]!=cat[0].typ)
507
      categories[catcnt++]=cat[0].typ;
508
    for(i=0;i<catcnt;i++) {
509
        snprintf(buf,sizeof(buf),"%s/lists/%c.%s",
510
                 outdir,categories[i],lang[lind]);
511
        f=fopen(buf,"a");
512
        if(f!=NULL) {fprintf(f,"%s\n",name); fclose(f);}
513
    }
514
    fprintf(langf,"%d:%s\n",serial,module_language);
515
    fprintf(titf,"%d:%s\n",serial,indbuf[i_title]);
516
    fprintf(descf,"%d:%s\n",serial,indbuf[i_description]);
517
    fprintf(authorf,"%d:%s\n",serial,indbuf[i_author]);
518
    fprintf(versionf,"%d:%s\n",serial,indbuf[i_version]);
519
    snprintf(buf,sizeof(buf),"%s",indbuf[i_description]);
520
    for(pp=strchr(buf,','); pp; pp=strchr(pp,','))
521
      string_modify(buf,pp,pp+1,"&#44;");
522
    if(strcmp(module_language,lang[lind])==0)
523
      fprintf(robotf,"%s ,%s,%s,%s,%s\n",name,module_language,name,
524
              indbuf[i_title], buf);
525
    entrycount=mentrycount; dicbuf=mdicbuf;
526
    memmove(entry,mentry,mentrycount*sizeof(entry[0]));
527
    unknown_type=unk_leave;
528
    for(i=0;i<trcnt;i++) {
529
        detag(indbuf[trlist[i]]);
530
        deaccent(indbuf[trlist[i]]);
531
        singlespace(indbuf[trlist[i]]);
532
        suffix_translate(indbuf[trlist[i]]);
533
        translate(indbuf[trlist[i]]);
534
    }
535
    taken[0]=0; takenlen=tweight=0;
536
    strcpy(buf,indbuf[i_title]); towords(buf);
537
    for(p1=find_word_start(buf);*p1;
538
        p1=find_word_start(p2)) {
539
        p2=find_word_end(p1); if(*p2) *p2++=0;
540
        appenditem(p1,lind,serial,4,module_language);
541
    }
542
    snprintf(buf,sizeof(buf),"%s %s %s %s %s",
543
             indbuf[i_description],indbuf[i_keywords],
544
             indbuf[i_domain],indbuf[i_require],indbuf[i_author]);
545
    towords(buf);
546
    for(p1=find_word_start(buf);*p1;
547
        p1=find_word_start(p2)) {
548
        p2=find_word_end(p1); if(*p2) *p2++=0;
549
        appenditem(p1,lind,serial,2,module_language);
550
    }
551
    entrycount=gentrycount; dicbuf=gdicbuf;
552
    memmove(entry,gentry,gentrycount*sizeof(entry[0]));
553
    unknown_type=unk_delete;
554
    strcpy(buf,indbuf[i_title]); translate(buf);
555
    for(p1=find_word_start(buf); *p1;
556
        p1=find_word_start(p2)) {
557
        p2=strchr(p1,',');
558
        if(p2!=NULL) *p2++=0; else p2=p1+strlen(p1);
559
        if(strlen(p1)<=0) continue;
560
        appenditem(p1,lind,serial,4,module_language);
561
    }
562
    snprintf(buf,sizeof(buf),"%s, %s, %s",
563
             indbuf[i_description],indbuf[i_keywords],
564
             indbuf[i_domain]);
565
    translate(buf);
566
    for(p1=find_word_start(buf); *p1;
567
        p1=find_word_start(p2)) {
568
        p2=strchr(p1,',');
569
        if(p2!=NULL) *p2++=0; else p2=p1+strlen(p1);
570
        if(strlen(p1)<=0) continue;
571
        appenditem(p1,lind,serial,2,module_language);
572
    }
573
    snprintf(buf,sizeof(buf),"%s",indbuf[i_level]);
574
    strcpy(lbuf,"level");
575
    for(p1=buf; *p1; p1++) if(!isalnum(*p1)) *p1=' ';
576
    for(p1=find_word_start(buf); *p1;
577
        p1=find_word_start(p2)) {
578
        p2=find_word_end(p1);
579
        if(p2!=NULL) *p2++=0; else p2=p1+strlen(p1);
580
        if(!isalpha(*p1) ||
581
           (!isdigit(*(p1+1)) && *(p1+1)!=0) ||
582
           (*(p1+1)!=0 && *(p1+2)!=0))
583
          continue;
584
        *p1=tolower(*p1);
585
        strcpy(lbuf+strlen("level"),p1);
586
        appenditem(lbuf,lind,serial,2,module_language);
587
    }
588
    fprintf(weightf,"%d:%d\n",serial,tweight);
589
}
590
 
591
void modules(void)
592
{
593
    int i,j,k,d;
594
    char namebuf[MAX_LINELEN+1];
595
    char mdic[MAX_LINELEN+1], sdic[MAX_LINELEN+1], gdic[MAX_LINELEN+1];
596
 
597
    for(j=0;j<langcnt;j++) {
598
        snprintf(namebuf,sizeof(namebuf),"%s/weight.%s",outdir,lang[j]);
599
        weightf=fopen(namebuf,"w");
600
        snprintf(mdic,sizeof(mdic),"%s/%s.%s",dicdir,maindic,lang[j]);
601
        snprintf(sdic,sizeof(sdic),"%s/%s.%s",dicdir,suffixdic,lang[j]);
602
        snprintf(gdic,sizeof(gdic),"%s/%s.%s",dicdir,groupdic,lang[j]);
603
        suffix_dic(sdic); prepare_dic(gdic);
604
        gdicbuf=dicbuf; gentrycount=entrycount;
605
        memmove(gentry,entry,gentrycount*sizeof(entry[0]));
606
        prepare_dic(mdic);
607
        mdicbuf=dicbuf; mentrycount=entrycount;
608
        memmove(mentry,entry,mentrycount*sizeof(entry[0]));
609
        unknown_type=unk_leave; translate(ignore[j]);
610
        for(i=0;i<modcnt;i++) {
611
            if(mod[i].langcnt>0) {
612
                for(d=k=0;k<mod[i].langcnt;k++)
613
                  if(mod[i].langs[k]<mod[i].langs[d]) d=k;
614
                for(k=0;k<mod[i].langcnt && mod[i].langs[k]!=j;k++);
615
                if(k>=mod[i].langcnt) k=d;
616
                snprintf(namebuf,MAX_LINELEN,"%s.%s",mod[i].name,
617
                         lang[mod[i].langs[k]]);
618
                onemodule(namebuf,mod[i].counts[k],j);
619
            }
620
            else {
621
                onemodule(mod[i].name,mod[i].counts[0],j);
622
            }
623
        }
624
        if(mentrycount>0) free(mdicbuf);
625
        if(gentrycount>0) free(gdicbuf);
626
        if(suffixcnt>0) free(sufbuf);
627
        if(weightf) fclose(weightf);
628
    }
629
}
630
 
631
void sappenditem(char *word, int lind, int serial, int weight)
632
{
633
    int ll;
634
    char *p;
635
 
636
    if(!isalnum(*word) || (ll=strlen(word))<2 ||
637
       wordchr(taken,word)!=NULL ||
638
       wordchr(ignore[lind],word)!=NULL ||
639
       takenlen>=MAX_LINELEN-ll-16)
640
      return;
641
    if(ll==2 && (!isdigit(word[0]) || !isalpha(word[1]))) return;
642
    for(p=word;*p;p++) if(!isalnum(*p) && *p!=' ') return;
643
    taken[takenlen++]=' ';taken[takenlen++]=' ';
644
    strcpy(taken+takenlen,word);
645
    takenlen+=ll; tweight+=weight;
646
    fprintf(indf,"%s:%d?%d\n",word,serial,weight);
647
}
648
 
649
void onesheet(int serial, int lind)
650
{
651
    int i;
652
    unsigned char trlist[]={
653
        s_title,s_description,s_domain,s_keywords,s_remark
654
    };
655
    #define trcnt (sizeof(trlist)/sizeof(trlist[0]))
656
    char *p1, *p2, buf[MAX_LINELEN+1];
657
 
658
    if(sheet_index(serial)) return;
659
    fprintf(listf,"%s\n",mod[serial].name+3);
660
    fprintf(titf,"%d:%s\n",serial,sindbuf[s_title]);
661
    fprintf(descf,"%d:%s\n",serial,sindbuf[s_description]);
662
    entrycount=mentrycount; dicbuf=mdicbuf;
663
    memmove(entry,mentry,mentrycount*sizeof(entry[0]));
664
    unknown_type=unk_leave;
665
    for(i=0;i<trcnt;i++) {
666
        detag(sindbuf[trlist[i]]);
667
        deaccent(sindbuf[trlist[i]]);
668
        singlespace(sindbuf[trlist[i]]);
669
        suffix_translate(sindbuf[trlist[i]]);
670
        translate(sindbuf[trlist[i]]);
671
    }
672
    taken[0]=0; takenlen=tweight=0;
673
    strcpy(buf,sindbuf[s_title]); towords(buf);
674
    for(p1=find_word_start(buf);*p1;
675
        p1=find_word_start(p2)) {
676
        p2=find_word_end(p1); if(*p2) *p2++=0;
677
        sappenditem(p1,lind,serial,4);
678
    }
679
    snprintf(buf,sizeof(buf),"%s %s %s %s",
680
             sindbuf[s_description],sindbuf[s_keywords],
681
             sindbuf[s_domain],sindbuf[s_remark]);
682
    towords(buf);
683
    for(p1=find_word_start(buf);*p1;
684
        p1=find_word_start(p2)) {
685
        p2=find_word_end(p1); if(*p2) *p2++=0;
686
        sappenditem(p1,lind,serial,2);
687
    }
688
    entrycount=gentrycount; dicbuf=gdicbuf;
689
    memmove(entry,gentry,gentrycount*sizeof(entry[0]));
690
    unknown_type=unk_delete;
691
    strcpy(buf,sindbuf[s_title]); translate(buf);
692
    for(p1=find_word_start(buf); *p1;
693
        p1=find_word_start(p2)) {
694
        p2=strchr(p1,',');
695
        if(p2!=NULL) *p2++=0; else p2=p1+strlen(p1);
696
        if(strlen(p1)<=0) continue;
697
        sappenditem(p1,lind,serial,4);
698
    }
699
    snprintf(buf,sizeof(buf),"%s, %s, %s, %s",
700
             sindbuf[s_description],sindbuf[s_keywords],
701
             sindbuf[s_domain],sindbuf[s_remark]);
702
    translate(buf);
703
    for(p1=find_word_start(buf); *p1;
704
        p1=find_word_start(p2)) {
705
        p2=strchr(p1,',');
706
        if(p2!=NULL) *p2++=0; else p2=p1+strlen(p1);
707
        if(strlen(p1)<=0) continue;
708
        sappenditem(p1,lind,serial,2);
709
    }
710
    fprintf(weightf,"%d:%d\n",serial,tweight);
711
}
712
 
713
void sheets(void)
714
{
715
    int i,j;
716
    char mdic[MAX_LINELEN+1], sdic[MAX_LINELEN+1], gdic[MAX_LINELEN+1];
717
    char buf[MAX_LINELEN+1];
718
 
719
    for(j=0;j<langcnt;j++) {
720
        snprintf(buf,sizeof(buf),"%s/index/title.%s",sheetdir,lang[j]);
721
        titf=fopen(buf,"w");
722
        snprintf(buf,sizeof(buf),"%s/index/description.%s",sheetdir,lang[j]);
723
        descf=fopen(buf,"w");
724
        snprintf(buf,sizeof(buf),"%s/index/%s",sheetdir,lang[j]);
725
        indf=fopen(buf,"w");
726
        snprintf(buf,sizeof(buf),"%s/index/list.%s",sheetdir,lang[j]);
727
        listf=fopen(buf,"w");
728
        snprintf(buf,sizeof(buf),"%s/index/weight.%s",sheetdir,lang[j]);
729
        weightf=fopen(buf,"w");
730
        snprintf(buf,sizeof(buf),"%s/index/addr.%s",sheetdir,lang[j]);
731
        addrf=fopen(buf,"w");
732
        snprintf(buf,sizeof(buf),"%s/index/serial.%s",sheetdir,lang[j]);
733
        serialf=fopen(buf,"w");
734
        snprintf(mdic,sizeof(mdic),"%s/%s.%s",dicdir,maindic,lang[j]);
735
        snprintf(sdic,sizeof(sdic),"%s/%s.%s",dicdir,suffixdic,lang[j]);
736
        snprintf(gdic,sizeof(gdic),"%s/%s.%s",dicdir,groupdic,lang[j]);
737
        suffix_dic(sdic); prepare_dic(gdic);
738
        gdicbuf=dicbuf; gentrycount=entrycount;
739
        memmove(gentry,entry,gentrycount*sizeof(entry[0]));
740
        prepare_dic(mdic);
741
        mdicbuf=dicbuf; mentrycount=entrycount;
742
        memmove(mentry,entry,mentrycount*sizeof(entry[0]));
743
        unknown_type=unk_leave; translate(ignore[j]);
744
        for(i=0;i<modcnt;i++) {
745
            if(mod[i].langs[0]!=j) continue;
746
            fprintf(addrf,"%d:%s\n",i,mod[i].name+3);
747
            fprintf(serialf,"%s:%d\n",mod[i].name+3,i);
748
            onesheet(i,j);
749
        }
750
        if(mentrycount>0) free(mdicbuf);
751
        if(gentrycount>0) free(gdicbuf);
752
        if(suffixcnt>0) free(sufbuf);
753
        fclose(titf); fclose(descf); fclose(indf); fclose(listf);
754
        fclose(weightf); fclose(addrf); fclose(serialf);
755
    }
756
}
757
 
758
int main()
759
{
760
    prep();
761
    if(modcnt>0) modules();
762
    clean();
763
    sprep();
764
    if(modcnt>0) sheets();
765
    return 0;
766
}
767