Subversion Repositories wimsdev

Rev

Rev 6818 | Rev 6881 | Go to most recent revision | Details | Compare with Previous | Last modification | View Log | RSS feed

Rev Author Line No. Line
10 reyssat 1
/*    Copyright (C) 1998-2003 XIAO, Gang of Universite de Nice - Sophia Antipolis
2
 *
3
 *  This program is free software; you can redistribute it and/or modify
4
 *  it under the terms of the GNU General Public License as published by
5
 *  the Free Software Foundation; either version 2 of the License, or
6
 *  (at your option) any later version.
7
 *
8
 *  This program is distributed in the hope that it will be useful,
9
 *  but WITHOUT ANY WARRANTY; without even the implied warranty of
10
 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
11
 *  GNU General Public License for more details.
12
 *
13
 *  You should have received a copy of the GNU General Public License
14
 *  along with this program; if not, write to the Free Software
15
 *  Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
16
 */
17
 
18
        /* This is an internal program,
19
         * used to index modules for search engine. */
20
 
21
#include "../wims.h"
3718 reyssat 22
#include "../Lib/basicstr.c"
10 reyssat 23
 
24
#define MAX_LANGS       MAX_LANGUAGES
25
#define MAX_MODULES     65536
26
char *moduledir=        "public_html/modules";
27
char *sheetdir=         "public_html/bases/sheet";
28
char *dicdir=           "public_html/bases";
29
char *outdir=           "public_html/bases/site2";
30
char *maindic=          "sys/words";
31
char *groupdic=         "sys/wgrp/wgrp";
32
char *suffixdic=        "sys/suffix";
33
char *ignoredic=        "sys/indignore";
34
char *conffile=         "log/wims.conf";
35
char *mlistbase=        "list";
36
 
37
char lang[MAX_LANGS][4]={
1792 bpr 38
    "en","fr","cn","es","it","nl","si","ca","pt"
10 reyssat 39
};
40
#define DEFAULT_LANGCNT 6
41
char allang[MAX_LANGS][4]={
6564 bpr 42
    "en","fr","cn","es","it","nl","de","si","ca","pt"
10 reyssat 43
};
44
#define allangcnt 8
45
char ignore[MAX_LANGS][MAX_LINELEN+1];
46
char mlistfile[MAX_LANGS][256];
47
int langcnt;
48
FILE *langf, *titf, *descf, *weightf, *robotf, *indf, *listf, *addrf, *serialf, *authorf, *versionf;
49
 
50
struct cat {
51
    char *name;
52
    char typ;
53
} cat[]={
54
        {"all_types",   'A'},
55
        {"exercise",    'X'},
56
        {"oef",         'O'},
57
        {"tool",        'T'},
58
        {"recreation",  'R'},
59
        {"reference",   'Y'},
60
        {"document",    'D'},
61
        {"popup",       'P'},
62
        {"datamodule",  'M'}
63
};
64
#define catno (sizeof(cat)/sizeof(cat[0]))
65
 
66
struct mod {
67
    char *name;
68
    unsigned char langs[MAX_LANGS];
69
    int counts[MAX_LANGS];
70
    int  langcnt;
71
} mod[MAX_MODULES];
72
int modcnt;
73
 
74
char *mlist;
75
 
76
void *xmalloc(size_t n)
77
{
78
    void *p;
79
    p=malloc(n);
80
    if(p==NULL) {
81
        printf("Malloc failure.\n");
82
        exit(1);
83
    }
84
    return p;
85
}
86
 
87
char *acctab="çéèêëúùûüáàâäãóòôöõíìïîñýÇÉÈÊËÚÙÛÜÁÀÂÃÄÓÒÔÖÕÍÌÏÎÑÝ",
88
     *deatab="ceeeeuuuuaaaaaoooooiiiinyCEEEEUUUUAAAAAOOOOOIIIINY";
89
 
6818 reyssat 90
        /* fold known accented letters to unaccented, other strange characters to space */
10 reyssat 91
void deaccent(char *p)
92
{
3247 bpr 93
    char *sp;
10 reyssat 94
    char *v;
95
    for(sp=p;*sp;sp++) {
96
        if(*sp<0 && (v=strchr(acctab,*sp))!=NULL)
97
          *sp=*(deatab+(v-acctab));
98
        if(!isalnum(*sp) && strchr(",.&$+*",*sp)==0) *sp=' ';
99
        else *sp=tolower(*sp);
100
    }
101
}
102
 
103
        /* translate everything non-alphanumeric into space */
104
void towords(char *p)
105
{
106
    char *pp;
107
    for(pp=p;*pp;pp++) if(!isalnum(*pp) && strchr("&$+*",*pp)==0) *pp=' ';
108
}
109
 
110
        /* Points to the end of the word */
111
char *find_word_end(char *p)
112
{
113
    int i;
114
    for(i=0;!isspace(*p) && *p!=0 && i<MAX_LINELEN; p++,i++);
115
    return p;
116
}
117
 
118
        /* Strips leading spaces */
119
char *find_word_start(char *p)
120
{
121
    int i;
122
    for(i=0; isspace(*p) && i<MAX_LINELEN; p++,i++);
123
    return p;
124
}
125
 
126
        /* Find first occurrence of word */
127
char *wordchr(char *p, char *w)
128
{
129
    char *r;
130
 
131
    for(r=strstr(p,w);r!=NULL &&
132
        ( (r>p && !isspace(*(r-1))) || (!isspace(*(r+strlen(w))) && *(r+strlen(w))!=0) );
133
        r=strstr(r+1,w));
134
    return r;
135
}
136
 
137
        /* find a variable in a string (math expression).
138
         * Returns the pointer or NULL. */
139
char *varchr(char *p, char *v)
140
{
141
    char *pp; int n=strlen(v);
142
    for(pp=strstr(p,v); pp!=NULL; pp=strstr(pp+1,v)) {
143
        if((pp==p || !isalnum(*(pp-1))) &&
144
           (!isalnum(*(pp+n)) || *(pp+n)==0)) break;
145
    }
146
    return pp;
147
}
148
 
149
        /* strip trailing spaces; return string end. */
150
char *strip_trailing_spaces(char *p)
151
{
152
    char *pp;
153
    if(*p==0) return p;
154
    for(pp=p+strlen(p)-1; pp>=p && isspace(*pp); *(pp--)=0);
155
    return pp;
156
}
157
 
158
char *find_tag_end(char *p)
159
{
160
    char *pp;
161
    pp=p; if(*pp=='<') pp++;
162
    for(; *pp && *pp!='>'; pp++) {
163
        if(*pp=='<') {
164
            pp=find_tag_end(pp)-1; continue;
165
        }
166
        if(*pp=='"') {
167
            pp=strchr(pp+1,'"');
168
            if(pp==NULL) return p+strlen(p); else continue;
169
        }
170
        if(*pp=='\'') {
171
            pp=strchr(pp+1,'\'');
172
            if(pp==NULL) return p+strlen(p); else continue;
173
        }
174
    }
175
    if(*pp=='>') pp++; return pp;
176
}
177
 
178
char *find_tag(char *p, char *tag)
179
{
180
    char *pp;
181
    int len;
182
    len=strlen(tag);
183
    for(pp=strchr(p,'<'); pp!=NULL && *pp; pp=strchr(pp+1,'<')) {
184
        if(strncasecmp(pp+1,tag,len)==0 && !isalnum(*(pp+1+len))) return pp;
185
    }
186
    return p+strlen(p);
187
}
188
 
189
        /* remove all html tags */
190
void detag(char *p)
191
{
192
    char *pp, *p2;
193
    for(pp=strchr(p,'<'); pp!=NULL; pp=strchr(pp,'<')) {
194
        p2=find_tag_end(pp);
195
        if(*p2==0) {*pp=0; return; }
3718 reyssat 196
        ovlstrcpy(pp,p2);
10 reyssat 197
    }
198
}
199
 
200
        /* modify a string. Bufferlen must be ast least MAX_LINELEN */
201
void string_modify(char *start, char *bad_beg, char *bad_end, char *good,...)
202
{
203
    char buf[MAX_LINELEN+1];
204
    va_list vp;
205
 
206
    va_start(vp,good);
207
    vsnprintf(buf,sizeof(buf),good,vp); va_end(vp);
208
    if(strlen(start)-(bad_end-bad_beg)+strlen(buf)>=MAX_LINELEN)
209
      return;
210
    strcat(buf,bad_end);
3718 reyssat 211
    ovlstrcpy(bad_beg,buf);
10 reyssat 212
}
213
 
6819 reyssat 214
/* add a space after comma to see end of words */
215
 
216
void comma(char *p)
217
{
218
    char *pp;
219
    for(pp=strchr(p,','); pp; pp=strchr(pp+1,','))
220
      string_modify(p,pp,pp+1,", ");
221
}
222
 
223
 
10 reyssat 224
void _getdef(char buf[], char *name, char value[])
225
{
226
    char *p1, *p2, *p3;
227
 
228
    value[0]=0;
229
    for(p1=strstr(buf,name); p1!=NULL; p1=strstr(p1+1,name)) {
230
        p2=find_word_start(p1+strlen(name));
231
        if((p1>buf && !isspace(*(p1-1))) || *p2!='=') continue;
232
        p3=p1; while(p3>buf && isspace(*(p3-1)) && *(p3-1)!='\n') p3--;
233
        if(p3>buf && *(p3-1)!='\n') continue;
6564 bpr 234
        p3=strchr(p2,'\n');
10 reyssat 235
        p2=find_word_start(p2+1);
6564 bpr 236
        if(p3 <= p2) continue;
10 reyssat 237
        snprintf(value,MAX_LINELEN,"%s",p2);
238
        if(p3!=NULL && p3-p2<MAX_LINELEN) value[p3-p2]=0;
239
        strip_trailing_spaces(value);
240
        break;
241
    }
242
}
243
 
244
        /* Get variable definition from a file.
245
         * Result stored in buffer value of length MAX_LINELEN. */
246
void getdef(char *fname, char *name, char value[])
247
{
248
    FILE *f;
249
    char *buf;
250
    int l;
251
 
252
    value[0]=0;
253
    f=fopen(fname,"r"); if(f==NULL) return;
254
    fseek(f,0,SEEK_END); l=ftell(f); fseek(f,0,SEEK_SET);
255
    buf=xmalloc(l+256); l=fread(buf,1,l,f);
256
    fclose(f);
257
    if(l<=0) return; else buf[l]=0;
258
    _getdef(buf,name,value);
259
    free(buf);
260
}
261
 
262
#include "translator_.c"
263
 
264
char *mdicbuf, *gdicbuf;
265
char gentry[sizeof(entry)], mentry[sizeof(entry)];
266
int gentrycount, mentrycount;
267
 
268
        /* Preparation of data */
269
void prep(void)
270
{
271
    char buf[MAX_LINELEN+1];
272
    char *p1,*p2,*s,*old;
273
    int i,l,thislang,t;
274
    FILE *f;
275
 
276
    s=getenv("modind_outdir"); if(s!=NULL && *s!=0) outdir=s;
277
    s=getenv("modind_sheetdir"); if(s!=NULL && *s!=0) sheetdir=s;
278
    snprintf(buf,sizeof(buf),"%s/addr",outdir);
279
    addrf=fopen(buf,"w");
280
    snprintf(buf,sizeof(buf),"%s/serial",outdir);
281
    serialf=fopen(buf,"w");
282
    modcnt=langcnt=0;
283
    getdef(conffile,"site_languages",buf);
284
    for(p1=buf;*p1;p1++) if(!isalnum(*p1)) *p1=' ';
285
    for(p1=find_word_start(buf); *p1 && langcnt<MAX_LANGS; p1=find_word_start(p2)) {
286
        p2=find_word_end(p1);
287
        if(p2!=p1+2 || !isalpha(*p1) || !isalpha(*(p1+1))) continue;
288
        memmove(lang[langcnt],p1,2); lang[langcnt++][2]=0;
289
    }
290
    if(langcnt==0) {    /* default languages */
291
        langcnt=DEFAULT_LANGCNT;
292
    }
293
    s=getenv("mlist"); if(s==NULL) exit(1);
294
    l=strlen(s); if(l<0 || l>100*MAX_LINELEN) exit(1);
3718 reyssat 295
    mlist=xmalloc(l+16); ovlstrcpy(mlist,s); old="";
10 reyssat 296
    for(i=0;i<langcnt;i++) {
297
        snprintf(buf,sizeof(buf),"%s/%s.%s",dicdir,ignoredic,lang[i]);
298
        f=fopen(buf,"r"); if(f==NULL) continue;
299
        l=fread(ignore[i],1,MAX_LINELEN,f);fclose(f);
300
        if(l<0 || l>=MAX_LINELEN) l=0;
301
        ignore[i][l]=0;
302
    }
303
    for(t=0, p1=find_word_start(mlist);
304
        *p1 && modcnt<MAX_MODULES;
305
        p1=find_word_start(p2), t++) {
306
        p2=find_word_end(p1);
307
        l=p2-p1; if(*p2) *p2++=0;
308
        fprintf(addrf,"%d:%s\n",t,p1);
309
        fprintf(serialf,"%s:%d\n",p1,t);
310
        thislang=-1;
6564 bpr 311
/* language is taken from the address */
10 reyssat 312
        if(l>3 && p1[l-3]=='.') {
313
            for(i=0;i<langcnt;i++) if(strcasecmp(lang[i],p1+l-2)==0) break;
314
            if(i<langcnt) {p1[l-3]=0; thislang=i;}
315
            else {      /* unknown language, not referenced */
316
                continue;
317
            }
318
        }
319
        if(modcnt>0 && strcmp(old,p1)==0 && thislang>=0) {
320
            if(mod[modcnt-1].langcnt<langcnt) {
321
                mod[modcnt-1].langs[mod[modcnt-1].langcnt]=thislang;
322
                mod[modcnt-1].counts[mod[modcnt-1].langcnt]=t;
323
                (mod[modcnt-1].langcnt)++;
324
            }
325
        }
326
        else {
327
            mod[modcnt].name=old=p1;
328
            if(thislang>=0) {
329
                mod[modcnt].langs[0]=thislang;
330
                mod[modcnt].langcnt=1;
331
            }
332
            else mod[modcnt].langcnt=0;
333
            mod[modcnt].counts[0]=t;
334
            modcnt++;
335
        }
336
    }
337
    snprintf(buf,sizeof(buf),"%s/language",outdir);
338
    langf=fopen(buf,"w");
339
    snprintf(buf,sizeof(buf),"%s/title",outdir);
340
    titf=fopen(buf,"w");
341
    snprintf(buf,sizeof(buf),"%s/description",outdir);
342
    descf=fopen(buf,"w");
343
    snprintf(buf,sizeof(buf),"%s/author",outdir);
344
    authorf=fopen(buf,"w");
345
    snprintf(buf,sizeof(buf),"%s/version",outdir);
346
    versionf=fopen(buf,"w");
347
    snprintf(buf,sizeof(buf),"%s/lists/robot.phtml",outdir);
348
    robotf=fopen(buf,"w");
349
    fclose(addrf); fclose(serialf);
350
    if(!robotf || !versionf || !authorf || !descf || !titf || !descf) {
351
        fprintf(stderr,"modind: error creating output files.\n");
352
        exit(1);
353
    }
354
}
355
 
356
void sprep(void)
357
{
358
    char *p1,*p2,*s;
359
    int i,l,thislang;
360
 
361
    modcnt=0;
362
    s=getenv("slist"); if(s==NULL) return;
363
    l=strlen(s); if(l<0 || l>100*MAX_LINELEN) return;
3718 reyssat 364
    mlist=xmalloc(l+16); ovlstrcpy(mlist,s);
10 reyssat 365
    for(p1=find_word_start(mlist); *p1 && modcnt<MAX_MODULES; p1=find_word_start(p2)) {
366
        p2=find_word_end(p1);
367
        l=p2-p1; if(*p2) *p2++=0;
368
        for(i=0;i<langcnt;i++) if(strncasecmp(lang[i],p1,2)==0) break;
369
        if(i<langcnt) thislang=i; else continue;
370
        mod[modcnt].name=p1;
371
        mod[modcnt].langs[0]=thislang;
372
        mod[modcnt].langcnt=1;
373
        modcnt++;
374
    }
375
}
376
 
377
void clean(void)
378
{
379
    fclose(langf); fclose(titf); fclose(descf); fclose(robotf);
380
    fclose(authorf); fclose(versionf);
381
}
382
 
383
char *sheetindex[]={
384
      "title", "description",
385
      "duration", "severity",
386
      "level", "domain",
387
      "keywords", "reserved1", "reserved2", "remark"
388
};
389
#define SHEETINDEX_NO (sizeof(sheetindex)/sizeof(sheetindex[0]))
390
char sindbuf[SHEETINDEX_NO][MAX_LINELEN+1];
391
enum{s_title, s_description,
392
      s_duration, s_severity,
393
      s_level, s_domain,
394
      s_keywords, s_reserved1, s_reserved2,
395
      s_remark
396
};
397
 
398
char *modindex[]={
399
      "title", "description",
400
      "author", "address", "copyright",
401
      "version", "wims_version", "language",
6394 bpr 402
      "category", "level", "domain", "keywords",
6799 bpr 403
      "keywords_ca", "keywords_en", "keywords_fr", "keywords_it", "keywords_nl",
404
      "title_ca", "title_en", "title_fr", "title_it", "title_nl",
10 reyssat 405
      "require"
406
};
407
#define MODINDEX_NO (sizeof(modindex)/sizeof(modindex[0]))
408
char indbuf[MODINDEX_NO][MAX_LINELEN+1];
409
enum{i_title, i_description,
410
      i_author,i_address,i_copyright,
411
      i_version,i_wims_version,i_language,
412
      i_category,i_level,i_domain,i_keywords,
6799 bpr 413
      i_keywords_ca,i_keywords_en,i_keywords_fr,i_keywords_it,i_keywords_nl,
414
      i_title_ca,i_title_en,i_title_fr,i_title_it,i_title_nl,
10 reyssat 415
      i_require
416
};
417
 
418
char *module_special_file[]={
419
    "intro","help","about"
420
};
421
#define MODSPEC_NO (sizeof(module_special_file)/sizeof(module_special_file[0]))
422
char module_language[4];
423
 
424
        /* read and treat module's INDEX file */
425
int module_index(const char *name)
426
{
427
    char *p, fbuf[MAX_LINELEN+1], ibuf[MAX_LINELEN+1];
428
    FILE *indf;
429
    int i,l;
430
 
431
    snprintf(fbuf,sizeof(fbuf),"%s/%s/INDEX",moduledir,name);
432
    indf=fopen(fbuf,"r"); if(indf==NULL) return -1;
433
    l=fread(ibuf,1,MAX_LINELEN,indf); fclose(indf);
434
    if(l>0 && l<MAX_LINELEN) ibuf[l]=0; else return -1;
435
    for(i=0;i<MODINDEX_NO;i++) {
436
        _getdef(ibuf,modindex[i],indbuf[i]);
437
                /* compatibility precaution */
438
        if(indbuf[i][0]==':') indbuf[i][0]='.';
439
    }
440
    p=find_word_start(indbuf[i_language]);
441
    if(isalpha(*p) && isalpha(*(p+1))) {
442
        memmove(module_language,p,2); module_language[2]=0;
443
    }
3718 reyssat 444
    else ovlstrcpy(module_language,"en");
10 reyssat 445
    return 0;
446
}
447
 
448
int sheet_index(int serial)
449
{
450
    char *p1, *p2, fbuf[MAX_LINELEN+1], ibuf[MAX_LINELEN+1];
451
    FILE *indf;
452
    int i,l;
453
 
454
    snprintf(fbuf,sizeof(fbuf),"%s/%s.def",sheetdir,mod[serial].name);
455
    indf=fopen(fbuf,"r"); if(indf==NULL) return -1;
456
    l=fread(ibuf,1,MAX_LINELEN,indf); fclose(indf);
457
    if(l>0 && l<MAX_LINELEN) ibuf[l]=0; else return -1;
458
    for(i=0;i<SHEETINDEX_NO;i++) sindbuf[i][0]=0;
459
    for(i=0,p1=find_word_start(ibuf);
460
        i<SHEETINDEX_NO-1 && *p1!=':' && *p1!=0;
461
        i++,p1=p2) {
462
        p2=strchr(p1,'\n');
463
        if(p2!=NULL) *p2++=0; else p2=p1+strlen(p1);
464
        p1=find_word_start(p1); strip_trailing_spaces(p1);
465
        snprintf(sindbuf[i],MAX_LINELEN,"%s",p1);
466
    }
467
    p2=strstr(p1,"\n:"); if(p2==NULL) p2=p1+strlen(p1);
468
    else *p2=0;
469
    p1=find_word_start(p1); strip_trailing_spaces(p1);
470
    for(p2=p1;*p2;p2++) if(*p2=='\n') *p2=' ';
3718 reyssat 471
    ovlstrcpy(sindbuf[s_remark],p1);
10 reyssat 472
    return 0;
473
}
474
 
475
unsigned char categories[16];
476
char taken[MAX_LINELEN+1];
477
int catcnt, takenlen, tweight;
478
 
479
void appenditem(char *word, int lind, int serial, int weight, char *l)
480
{
481
    char nbuf[MAX_LINELEN+1], buf[MAX_LINELEN+1];
482
    int i, ll;
483
    char *p;
484
    FILE *f;
485
 
486
    if(!isalnum(*word) || (ll=strlen(word))<2 ||
487
       wordchr(taken,word)!=NULL ||
488
       wordchr(ignore[lind],word)!=NULL ||
489
       takenlen>=MAX_LINELEN-ll-16)
490
      return;
491
    if(ll==2 && (!isdigit(word[0]) || !isalpha(word[1]))) return;
492
    for(p=word;*p;p++) if(!isalnum(*p) && *p!=' ') return;
493
    taken[takenlen++]=' '; taken[takenlen++]=' ';
3718 reyssat 494
    ovlstrcpy(taken+takenlen,word);
10 reyssat 495
    takenlen+=ll; tweight+=weight;
496
    snprintf(buf,sizeof(buf),"%s:%d?%d\n",word,serial,weight);
497
    for(i=0;i<catcnt;i++) {
498
        snprintf(nbuf,sizeof(nbuf),"%s/%c.%s",
499
                 outdir,categories[i],lang[lind]);
500
        f=fopen(nbuf,"a");
501
        if(f!=NULL) {fputs(buf,f); fclose(f);}
502
    }
503
}
504
 
505
void onemodule(const char *name, int serial, int lind)
506
{
507
    int i;
508
    unsigned char trlist[]={
509
        i_title,i_description,i_category,i_domain,i_keywords,
6394 bpr 510
          i_require,i_author,
6799 bpr 511
          i_keywords_ca,i_keywords_en,i_keywords_fr,i_keywords_it,i_keywords_nl,
512
          i_title_ca,i_title_en,i_title_fr,i_title_it,i_title_nl
10 reyssat 513
    };
514
    #define trcnt (sizeof(trlist)/sizeof(trlist[0]))
6564 bpr 515
    char *p1, *p2, *pp, *q, buf[MAX_LINELEN+1], lbuf[16];
10 reyssat 516
    FILE *f;
517
 
518
    if(module_index(name)) return;
519
    towords(indbuf[i_category]);
6818 reyssat 520
        /*  list the categories (among A=all,X=eXercise,O,D,...) corresponding to this module  */
10 reyssat 521
    for(i=catcnt=0;i<catno && catcnt<16;i++) {
522
        if(wordchr(indbuf[i_category],cat[i].name)!=NULL)
523
          categories[catcnt++]=cat[i].typ;
524
    }
525
    if(catcnt==0) return;
526
    if(categories[0]!=cat[0].typ)
527
      categories[catcnt++]=cat[0].typ;
6818 reyssat 528
        /*  write module's name in the category.language files, for instance lists/X.fr for french exercises  */
10 reyssat 529
    for(i=0;i<catcnt;i++) {
530
        snprintf(buf,sizeof(buf),"%s/lists/%c.%s",
531
                 outdir,categories[i],lang[lind]);
532
        f=fopen(buf,"a");
533
        if(f!=NULL) {fprintf(f,"%s\n",name); fclose(f);}
534
    }
6818 reyssat 535
        /*  add serial number and language (resp.title, ...) to corresponding file  */
10 reyssat 536
    fprintf(langf,"%d:%s\n",serial,module_language);
537
    fprintf(titf,"%d:%s\n",serial,indbuf[i_title]);
538
    fprintf(descf,"%d:%s\n",serial,indbuf[i_description]);
539
    fprintf(authorf,"%d:%s\n",serial,indbuf[i_author]);
540
    fprintf(versionf,"%d:%s\n",serial,indbuf[i_version]);
6818 reyssat 541
        /*  add module's information in html page for robots  */
10 reyssat 542
    snprintf(buf,sizeof(buf),"%s",indbuf[i_description]);
543
    for(pp=strchr(buf,','); pp; pp=strchr(pp,','))
544
      string_modify(buf,pp,pp+1,"&#44;");
545
    if(strcmp(module_language,lang[lind])==0)
546
      fprintf(robotf,"%s ,%s,%s,%s,%s\n",name,module_language,name,
547
              indbuf[i_title], buf);
6818 reyssat 548
        /*  Normalize the information, using main dictionary bases/sys/words.xx */
10 reyssat 549
    entrycount=mentrycount; dicbuf=mdicbuf;
550
    memmove(entry,mentry,mentrycount*sizeof(entry[0]));
6819 reyssat 551
    unknown_type=unk_leave;  /* used in translator_.c */
552
 
10 reyssat 553
    for(i=0;i<trcnt;i++) {
554
        detag(indbuf[trlist[i]]);
555
        deaccent(indbuf[trlist[i]]);
6819 reyssat 556
        comma(indbuf[trlist[i]]);
10 reyssat 557
        singlespace(indbuf[trlist[i]]);
558
        suffix_translate(indbuf[trlist[i]]);
559
        translate(indbuf[trlist[i]]);
560
    }
6818 reyssat 561
        /*  append words of title  */
10 reyssat 562
    taken[0]=0; takenlen=tweight=0;
3718 reyssat 563
    ovlstrcpy(buf,indbuf[i_title]); towords(buf);
10 reyssat 564
    for(p1=find_word_start(buf);*p1;
565
        p1=find_word_start(p2)) {
566
        p2=find_word_end(p1); if(*p2) *p2++=0;
567
        appenditem(p1,lind,serial,4,module_language);
568
    }
6818 reyssat 569
        /*  append words of every other information except level  */
6799 bpr 570
    snprintf(buf,sizeof(buf),"%s %s %s %s %s %s %s %s %s %s %s %s %s %s %s",
10 reyssat 571
             indbuf[i_description],indbuf[i_keywords],
6806 bpr 572
             indbuf[i_keywords_ca],indbuf[i_keywords_en],indbuf[i_keywords_fr],
6394 bpr 573
             indbuf[i_keywords_it],indbuf[i_keywords_nl],
6806 bpr 574
             indbuf[i_title_ca],indbuf[i_title_en],indbuf[i_title_fr],
6799 bpr 575
             indbuf[i_title_it],indbuf[i_title_nl],
10 reyssat 576
             indbuf[i_domain],indbuf[i_require],indbuf[i_author]);
577
    towords(buf);
578
    for(p1=find_word_start(buf);*p1;
579
        p1=find_word_start(p2)) {
580
        p2=find_word_end(p1); if(*p2) *p2++=0;
581
        appenditem(p1,lind,serial,2,module_language);
582
    }
6818 reyssat 583
        /*  this time the dictionary is the group dictionary  sys/wgrp/wgrpwith a g (=global ? general ?), not an m . see below main,suffix,group.
584
        and delete unknown ?? and translate  */
10 reyssat 585
    entrycount=gentrycount; dicbuf=gdicbuf;
586
    memmove(entry,gentry,gentrycount*sizeof(entry[0]));
587
    unknown_type=unk_delete;
6818 reyssat 588
        /*  append words (?) of every other information except level  */
3718 reyssat 589
    ovlstrcpy(buf,indbuf[i_title]); translate(buf);
10 reyssat 590
    for(p1=find_word_start(buf); *p1;
591
        p1=find_word_start(p2)) {
592
        p2=strchr(p1,',');
593
        if(p2!=NULL) *p2++=0; else p2=p1+strlen(p1);
594
        if(strlen(p1)<=0) continue;
595
        appenditem(p1,lind,serial,4,module_language);
596
    }
6818 reyssat 597
        /*  append words (?) of every other information except level  */
6799 bpr 598
    snprintf(buf,sizeof(buf),"%s, %s, %s, %s, %s, %s, %s, %s",
10 reyssat 599
             indbuf[i_description],indbuf[i_keywords],
6799 bpr 600
             indbuf[i_keywords_ca], indbuf[i_keywords_en],indbuf[i_keywords_fr],
6394 bpr 601
             indbuf[i_keywords_it], indbuf[i_keywords_nl],
10 reyssat 602
             indbuf[i_domain]);
603
    translate(buf);
604
    for(p1=find_word_start(buf); *p1;
605
        p1=find_word_start(p2)) {
606
        p2=strchr(p1,',');
607
        if(p2!=NULL) *p2++=0; else p2=p1+strlen(p1);
608
        if(strlen(p1)<=0) continue;
609
        appenditem(p1,lind,serial,2,module_language);
610
    }
6818 reyssat 611
        /*  append level information, with weight 2 */
10 reyssat 612
    snprintf(buf,sizeof(buf),"%s",indbuf[i_level]);
3718 reyssat 613
    ovlstrcpy(lbuf,"level");
10 reyssat 614
    for(p1=buf; *p1; p1++) if(!isalnum(*p1)) *p1=' ';
6564 bpr 615
    q=buf+strlen(buf);
616
    for(p1=find_word_start(buf); (*p1) && (p1 < q) ;
10 reyssat 617
        p1=find_word_start(p2)) {
618
        p2=find_word_end(p1);
619
        if(p2!=NULL) *p2++=0; else p2=p1+strlen(p1);
620
        if(!isalpha(*p1) ||
621
           (!isdigit(*(p1+1)) && *(p1+1)!=0) ||
622
           (*(p1+1)!=0 && *(p1+2)!=0))
623
          continue;
624
        *p1=tolower(*p1);
3718 reyssat 625
        ovlstrcpy(lbuf+strlen("level"),p1);
10 reyssat 626
        appenditem(lbuf,lind,serial,2,module_language);
627
    }
6818 reyssat 628
        /*  append total weight of module to weight file site2/weight.xx  */
10 reyssat 629
    fprintf(weightf,"%d:%d\n",serial,tweight);
630
}
631
 
632
void modules(void)
633
{
634
    int i,j,k,d;
635
    char namebuf[MAX_LINELEN+1];
636
    char mdic[MAX_LINELEN+1], sdic[MAX_LINELEN+1], gdic[MAX_LINELEN+1];
637
 
638
    for(j=0;j<langcnt;j++) {
639
        snprintf(namebuf,sizeof(namebuf),"%s/weight.%s",outdir,lang[j]);
640
        weightf=fopen(namebuf,"w");
641
        snprintf(mdic,sizeof(mdic),"%s/%s.%s",dicdir,maindic,lang[j]);
642
        snprintf(sdic,sizeof(sdic),"%s/%s.%s",dicdir,suffixdic,lang[j]);
643
        snprintf(gdic,sizeof(gdic),"%s/%s.%s",dicdir,groupdic,lang[j]);
644
        suffix_dic(sdic); prepare_dic(gdic);
645
        gdicbuf=dicbuf; gentrycount=entrycount;
646
        memmove(gentry,entry,gentrycount*sizeof(entry[0]));
647
        prepare_dic(mdic);
648
        mdicbuf=dicbuf; mentrycount=entrycount;
649
        memmove(mentry,entry,mentrycount*sizeof(entry[0]));
650
        unknown_type=unk_leave; translate(ignore[j]);
651
        for(i=0;i<modcnt;i++) {
652
            if(mod[i].langcnt>0) {
653
                for(d=k=0;k<mod[i].langcnt;k++)
654
                  if(mod[i].langs[k]<mod[i].langs[d]) d=k;
655
                for(k=0;k<mod[i].langcnt && mod[i].langs[k]!=j;k++);
656
                if(k>=mod[i].langcnt) k=d;
657
                snprintf(namebuf,MAX_LINELEN,"%s.%s",mod[i].name,
658
                         lang[mod[i].langs[k]]);
659
                onemodule(namebuf,mod[i].counts[k],j);
660
            }
661
            else {
662
                onemodule(mod[i].name,mod[i].counts[0],j);
663
            }
664
        }
665
        if(mentrycount>0) free(mdicbuf);
666
        if(gentrycount>0) free(gdicbuf);
667
        if(suffixcnt>0) free(sufbuf);
668
        if(weightf) fclose(weightf);
669
    }
670
}
671
 
672
void sappenditem(char *word, int lind, int serial, int weight)
673
{
674
    int ll;
675
    char *p;
676
 
677
    if(!isalnum(*word) || (ll=strlen(word))<2 ||
678
       wordchr(taken,word)!=NULL ||
679
       wordchr(ignore[lind],word)!=NULL ||
680
       takenlen>=MAX_LINELEN-ll-16)
681
      return;
682
    if(ll==2 && (!isdigit(word[0]) || !isalpha(word[1]))) return;
683
    for(p=word;*p;p++) if(!isalnum(*p) && *p!=' ') return;
684
    taken[takenlen++]=' ';taken[takenlen++]=' ';
3718 reyssat 685
    ovlstrcpy(taken+takenlen,word);
10 reyssat 686
    takenlen+=ll; tweight+=weight;
687
    fprintf(indf,"%s:%d?%d\n",word,serial,weight);
688
}
689
 
690
void onesheet(int serial, int lind)
691
{
692
    int i;
693
    unsigned char trlist[]={
694
        s_title,s_description,s_domain,s_keywords,s_remark
695
    };
696
    #define trcnt (sizeof(trlist)/sizeof(trlist[0]))
697
    char *p1, *p2, buf[MAX_LINELEN+1];
698
 
699
    if(sheet_index(serial)) return;
700
    fprintf(listf,"%s\n",mod[serial].name+3);
701
    fprintf(titf,"%d:%s\n",serial,sindbuf[s_title]);
702
    fprintf(descf,"%d:%s\n",serial,sindbuf[s_description]);
703
    entrycount=mentrycount; dicbuf=mdicbuf;
704
    memmove(entry,mentry,mentrycount*sizeof(entry[0]));
705
    unknown_type=unk_leave;
706
    for(i=0;i<trcnt;i++) {
707
        detag(sindbuf[trlist[i]]);
708
        deaccent(sindbuf[trlist[i]]);
6819 reyssat 709
        comma(sindbuf[trlist[i]]);
10 reyssat 710
        singlespace(sindbuf[trlist[i]]);
711
        suffix_translate(sindbuf[trlist[i]]);
712
        translate(sindbuf[trlist[i]]);
713
    }
714
    taken[0]=0; takenlen=tweight=0;
3718 reyssat 715
    ovlstrcpy(buf,sindbuf[s_title]); towords(buf);
10 reyssat 716
    for(p1=find_word_start(buf);*p1;
717
        p1=find_word_start(p2)) {
718
        p2=find_word_end(p1); if(*p2) *p2++=0;
719
        sappenditem(p1,lind,serial,4);
720
    }
721
    snprintf(buf,sizeof(buf),"%s %s %s %s",
722
             sindbuf[s_description],sindbuf[s_keywords],
723
             sindbuf[s_domain],sindbuf[s_remark]);
724
    towords(buf);
725
    for(p1=find_word_start(buf);*p1;
726
        p1=find_word_start(p2)) {
727
        p2=find_word_end(p1); if(*p2) *p2++=0;
728
        sappenditem(p1,lind,serial,2);
729
    }
730
    entrycount=gentrycount; dicbuf=gdicbuf;
731
    memmove(entry,gentry,gentrycount*sizeof(entry[0]));
732
    unknown_type=unk_delete;
3718 reyssat 733
    ovlstrcpy(buf,sindbuf[s_title]); translate(buf);
10 reyssat 734
    for(p1=find_word_start(buf); *p1;
735
        p1=find_word_start(p2)) {
736
        p2=strchr(p1,',');
737
        if(p2!=NULL) *p2++=0; else p2=p1+strlen(p1);
738
        if(strlen(p1)<=0) continue;
739
        sappenditem(p1,lind,serial,4);
740
    }
741
    snprintf(buf,sizeof(buf),"%s, %s, %s, %s",
742
             sindbuf[s_description],sindbuf[s_keywords],
743
             sindbuf[s_domain],sindbuf[s_remark]);
744
    translate(buf);
745
    for(p1=find_word_start(buf); *p1;
746
        p1=find_word_start(p2)) {
747
        p2=strchr(p1,',');
748
        if(p2!=NULL) *p2++=0; else p2=p1+strlen(p1);
749
        if(strlen(p1)<=0) continue;
750
        sappenditem(p1,lind,serial,2);
751
    }
752
    fprintf(weightf,"%d:%d\n",serial,tweight);
753
}
754
 
755
void sheets(void)
756
{
757
    int i,j;
758
    char mdic[MAX_LINELEN+1], sdic[MAX_LINELEN+1], gdic[MAX_LINELEN+1];
759
    char buf[MAX_LINELEN+1];
760
 
761
    for(j=0;j<langcnt;j++) {
762
        snprintf(buf,sizeof(buf),"%s/index/title.%s",sheetdir,lang[j]);
763
        titf=fopen(buf,"w");
764
        snprintf(buf,sizeof(buf),"%s/index/description.%s",sheetdir,lang[j]);
765
        descf=fopen(buf,"w");
766
        snprintf(buf,sizeof(buf),"%s/index/%s",sheetdir,lang[j]);
767
        indf=fopen(buf,"w");
768
        snprintf(buf,sizeof(buf),"%s/index/list.%s",sheetdir,lang[j]);
769
        listf=fopen(buf,"w");
770
        snprintf(buf,sizeof(buf),"%s/index/weight.%s",sheetdir,lang[j]);
771
        weightf=fopen(buf,"w");
772
        snprintf(buf,sizeof(buf),"%s/index/addr.%s",sheetdir,lang[j]);
773
        addrf=fopen(buf,"w");
774
        snprintf(buf,sizeof(buf),"%s/index/serial.%s",sheetdir,lang[j]);
775
        serialf=fopen(buf,"w");
776
        snprintf(mdic,sizeof(mdic),"%s/%s.%s",dicdir,maindic,lang[j]);
777
        snprintf(sdic,sizeof(sdic),"%s/%s.%s",dicdir,suffixdic,lang[j]);
778
        snprintf(gdic,sizeof(gdic),"%s/%s.%s",dicdir,groupdic,lang[j]);
779
        suffix_dic(sdic); prepare_dic(gdic);
780
        gdicbuf=dicbuf; gentrycount=entrycount;
781
        memmove(gentry,entry,gentrycount*sizeof(entry[0]));
782
        prepare_dic(mdic);
783
        mdicbuf=dicbuf; mentrycount=entrycount;
784
        memmove(mentry,entry,mentrycount*sizeof(entry[0]));
785
        unknown_type=unk_leave; translate(ignore[j]);
786
        for(i=0;i<modcnt;i++) {
787
            if(mod[i].langs[0]!=j) continue;
788
            fprintf(addrf,"%d:%s\n",i,mod[i].name+3);
789
            fprintf(serialf,"%s:%d\n",mod[i].name+3,i);
790
            onesheet(i,j);
791
        }
792
        if(mentrycount>0) free(mdicbuf);
793
        if(gentrycount>0) free(gdicbuf);
794
        if(suffixcnt>0) free(sufbuf);
795
        fclose(titf); fclose(descf); fclose(indf); fclose(listf);
796
        fclose(weightf); fclose(addrf); fclose(serialf);
797
    }
798
}
799
 
800
int main()
801
{
802
    prep();
803
    if(modcnt>0) modules();
804
    clean();
805
    sprep();
806
    if(modcnt>0) sheets();
807
    return 0;
808
}
809