Subversion Repositories wimsdev

Rev

Rev 6819 | Rev 6884 | Go to most recent revision | Details | Compare with Previous | Last modification | View Log | RSS feed

Rev Author Line No. Line
10 reyssat 1
/*    Copyright (C) 1998-2003 XIAO, Gang of Universite de Nice - Sophia Antipolis
2
 *
3
 *  This program is free software; you can redistribute it and/or modify
4
 *  it under the terms of the GNU General Public License as published by
5
 *  the Free Software Foundation; either version 2 of the License, or
6
 *  (at your option) any later version.
7
 *
8
 *  This program is distributed in the hope that it will be useful,
9
 *  but WITHOUT ANY WARRANTY; without even the implied warranty of
10
 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
11
 *  GNU General Public License for more details.
12
 *
13
 *  You should have received a copy of the GNU General Public License
14
 *  along with this program; if not, write to the Free Software
15
 *  Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
16
 */
17
 
18
        /* This is an internal program,
19
         * used to index modules for search engine. */
20
 
21
#include "../wims.h"
3718 reyssat 22
#include "../Lib/basicstr.c"
10 reyssat 23
 
24
#define MAX_LANGS       MAX_LANGUAGES
25
#define MAX_MODULES     65536
26
char *moduledir=        "public_html/modules";
27
char *sheetdir=         "public_html/bases/sheet";
28
char *dicdir=           "public_html/bases";
29
char *outdir=           "public_html/bases/site2";
30
char *maindic=          "sys/words";
31
char *groupdic=         "sys/wgrp/wgrp";
32
char *suffixdic=        "sys/suffix";
6881 bpr 33
char *domaindic=        "sys/domaindic";
10 reyssat 34
char *ignoredic=        "sys/indignore";
35
char *conffile=         "log/wims.conf";
36
char *mlistbase=        "list";
37
 
38
char lang[MAX_LANGS][4]={
1792 bpr 39
    "en","fr","cn","es","it","nl","si","ca","pt"
10 reyssat 40
};
41
#define DEFAULT_LANGCNT 6
42
char allang[MAX_LANGS][4]={
6564 bpr 43
    "en","fr","cn","es","it","nl","de","si","ca","pt"
10 reyssat 44
};
45
#define allangcnt 8
46
char ignore[MAX_LANGS][MAX_LINELEN+1];
47
char mlistfile[MAX_LANGS][256];
48
int langcnt;
49
FILE *langf, *titf, *descf, *weightf, *robotf, *indf, *listf, *addrf, *serialf, *authorf, *versionf;
50
 
51
struct cat {
52
    char *name;
53
    char typ;
54
} cat[]={
55
        {"all_types",   'A'},
56
        {"exercise",    'X'},
57
        {"oef",         'O'},
58
        {"tool",        'T'},
59
        {"recreation",  'R'},
60
        {"reference",   'Y'},
61
        {"document",    'D'},
62
        {"popup",       'P'},
63
        {"datamodule",  'M'}
64
};
65
#define catno (sizeof(cat)/sizeof(cat[0]))
66
 
67
struct mod {
68
    char *name;
69
    unsigned char langs[MAX_LANGS];
70
    int counts[MAX_LANGS];
71
    int  langcnt;
72
} mod[MAX_MODULES];
73
int modcnt;
74
 
75
char *mlist;
76
 
77
void *xmalloc(size_t n)
78
{
79
    void *p;
80
    p=malloc(n);
81
    if(p==NULL) {
82
        printf("Malloc failure.\n");
83
        exit(1);
84
    }
85
    return p;
86
}
87
 
88
char *acctab="çéèêëúùûüáàâäãóòôöõíìïîñýÇÉÈÊËÚÙÛÜÁÀÂÃÄÓÒÔÖÕÍÌÏÎÑÝ",
89
     *deatab="ceeeeuuuuaaaaaoooooiiiinyCEEEEUUUUAAAAAOOOOOIIIINY";
90
 
6818 reyssat 91
        /* fold known accented letters to unaccented, other strange characters to space */
10 reyssat 92
void deaccent(char *p)
93
{
3247 bpr 94
    char *sp;
10 reyssat 95
    char *v;
96
    for(sp=p;*sp;sp++) {
97
        if(*sp<0 && (v=strchr(acctab,*sp))!=NULL)
98
          *sp=*(deatab+(v-acctab));
99
        if(!isalnum(*sp) && strchr(",.&$+*",*sp)==0) *sp=' ';
100
        else *sp=tolower(*sp);
101
    }
102
}
103
 
104
        /* translate everything non-alphanumeric into space */
105
void towords(char *p)
106
{
107
    char *pp;
108
    for(pp=p;*pp;pp++) if(!isalnum(*pp) && strchr("&$+*",*pp)==0) *pp=' ';
109
}
110
 
111
        /* Points to the end of the word */
112
char *find_word_end(char *p)
113
{
114
    int i;
115
    for(i=0;!isspace(*p) && *p!=0 && i<MAX_LINELEN; p++,i++);
116
    return p;
117
}
118
 
119
        /* Strips leading spaces */
120
char *find_word_start(char *p)
121
{
122
    int i;
123
    for(i=0; isspace(*p) && i<MAX_LINELEN; p++,i++);
124
    return p;
125
}
126
 
127
        /* Find first occurrence of word */
128
char *wordchr(char *p, char *w)
129
{
130
    char *r;
131
 
6881 bpr 132
    for(r=strstr(p,w);r!=NULL &&
10 reyssat 133
        ( (r>p && !isspace(*(r-1))) || (!isspace(*(r+strlen(w))) && *(r+strlen(w))!=0) );
134
        r=strstr(r+1,w));
135
    return r;
136
}
137
 
138
        /* find a variable in a string (math expression).
139
         * Returns the pointer or NULL. */
140
char *varchr(char *p, char *v)
141
{
142
    char *pp; int n=strlen(v);
143
    for(pp=strstr(p,v); pp!=NULL; pp=strstr(pp+1,v)) {
144
        if((pp==p || !isalnum(*(pp-1))) &&
145
           (!isalnum(*(pp+n)) || *(pp+n)==0)) break;
146
    }
147
    return pp;
148
}
149
 
150
        /* strip trailing spaces; return string end. */
151
char *strip_trailing_spaces(char *p)
152
{
153
    char *pp;
154
    if(*p==0) return p;
155
    for(pp=p+strlen(p)-1; pp>=p && isspace(*pp); *(pp--)=0);
156
    return pp;
157
}
158
 
159
char *find_tag_end(char *p)
160
{
161
    char *pp;
162
    pp=p; if(*pp=='<') pp++;
163
    for(; *pp && *pp!='>'; pp++) {
164
        if(*pp=='<') {
165
            pp=find_tag_end(pp)-1; continue;
166
        }
167
        if(*pp=='"') {
168
            pp=strchr(pp+1,'"');
169
            if(pp==NULL) return p+strlen(p); else continue;
170
        }
171
        if(*pp=='\'') {
172
            pp=strchr(pp+1,'\'');
173
            if(pp==NULL) return p+strlen(p); else continue;
174
        }
175
    }
176
    if(*pp=='>') pp++; return pp;
177
}
178
 
179
char *find_tag(char *p, char *tag)
180
{
181
    char *pp;
182
    int len;
183
    len=strlen(tag);
184
    for(pp=strchr(p,'<'); pp!=NULL && *pp; pp=strchr(pp+1,'<')) {
185
        if(strncasecmp(pp+1,tag,len)==0 && !isalnum(*(pp+1+len))) return pp;
186
    }
187
    return p+strlen(p);
188
}
189
 
190
        /* remove all html tags */
191
void detag(char *p)
192
{
193
    char *pp, *p2;
194
    for(pp=strchr(p,'<'); pp!=NULL; pp=strchr(pp,'<')) {
195
        p2=find_tag_end(pp);
196
        if(*p2==0) {*pp=0; return; }
3718 reyssat 197
        ovlstrcpy(pp,p2);
10 reyssat 198
    }
199
}
200
 
201
        /* modify a string. Bufferlen must be ast least MAX_LINELEN */
202
void string_modify(char *start, char *bad_beg, char *bad_end, char *good,...)
203
{
204
    char buf[MAX_LINELEN+1];
205
    va_list vp;
6881 bpr 206
 
10 reyssat 207
    va_start(vp,good);
208
    vsnprintf(buf,sizeof(buf),good,vp); va_end(vp);
209
    if(strlen(start)-(bad_end-bad_beg)+strlen(buf)>=MAX_LINELEN)
210
      return;
211
    strcat(buf,bad_end);
3718 reyssat 212
    ovlstrcpy(bad_beg,buf);
10 reyssat 213
}
214
 
6819 reyssat 215
/* add a space after comma to see end of words */
216
 
217
void comma(char *p)
218
{
219
    char *pp;
220
    for(pp=strchr(p,','); pp; pp=strchr(pp+1,','))
221
      string_modify(p,pp,pp+1,", ");
222
}
223
 
224
 
10 reyssat 225
void _getdef(char buf[], char *name, char value[])
226
{
227
    char *p1, *p2, *p3;
228
 
229
    value[0]=0;
230
    for(p1=strstr(buf,name); p1!=NULL; p1=strstr(p1+1,name)) {
231
        p2=find_word_start(p1+strlen(name));
232
        if((p1>buf && !isspace(*(p1-1))) || *p2!='=') continue;
233
        p3=p1; while(p3>buf && isspace(*(p3-1)) && *(p3-1)!='\n') p3--;
234
        if(p3>buf && *(p3-1)!='\n') continue;
6564 bpr 235
        p3=strchr(p2,'\n');
10 reyssat 236
        p2=find_word_start(p2+1);
6564 bpr 237
        if(p3 <= p2) continue;
10 reyssat 238
        snprintf(value,MAX_LINELEN,"%s",p2);
239
        if(p3!=NULL && p3-p2<MAX_LINELEN) value[p3-p2]=0;
240
        strip_trailing_spaces(value);
241
        break;
242
    }
243
}
244
 
245
        /* Get variable definition from a file.
246
         * Result stored in buffer value of length MAX_LINELEN. */
247
void getdef(char *fname, char *name, char value[])
248
{
249
    FILE *f;
250
    char *buf;
251
    int l;
6881 bpr 252
 
10 reyssat 253
    value[0]=0;
254
    f=fopen(fname,"r"); if(f==NULL) return;
255
    fseek(f,0,SEEK_END); l=ftell(f); fseek(f,0,SEEK_SET);
256
    buf=xmalloc(l+256); l=fread(buf,1,l,f);
257
    fclose(f);
258
    if(l<=0) return; else buf[l]=0;
259
    _getdef(buf,name,value);
260
    free(buf);
261
}
262
 
263
#include "translator_.c"
264
 
6881 bpr 265
char *mdicbuf, *gdicbuf, *ddicbuf;
266
char gentry[sizeof(entry)], mentry[sizeof(entry)], dentry[sizeof(entry)];
267
int gentrycount, mentrycount, dentrycount;
10 reyssat 268
 
269
        /* Preparation of data */
270
void prep(void)
271
{
272
    char buf[MAX_LINELEN+1];
273
    char *p1,*p2,*s,*old;
274
    int i,l,thislang,t;
275
    FILE *f;
6881 bpr 276
 
10 reyssat 277
    s=getenv("modind_outdir"); if(s!=NULL && *s!=0) outdir=s;
278
    s=getenv("modind_sheetdir"); if(s!=NULL && *s!=0) sheetdir=s;
279
    snprintf(buf,sizeof(buf),"%s/addr",outdir);
280
    addrf=fopen(buf,"w");
281
    snprintf(buf,sizeof(buf),"%s/serial",outdir);
282
    serialf=fopen(buf,"w");
283
    modcnt=langcnt=0;
284
    getdef(conffile,"site_languages",buf);
285
    for(p1=buf;*p1;p1++) if(!isalnum(*p1)) *p1=' ';
286
    for(p1=find_word_start(buf); *p1 && langcnt<MAX_LANGS; p1=find_word_start(p2)) {
287
        p2=find_word_end(p1);
288
        if(p2!=p1+2 || !isalpha(*p1) || !isalpha(*(p1+1))) continue;
289
        memmove(lang[langcnt],p1,2); lang[langcnt++][2]=0;
290
    }
291
    if(langcnt==0) {    /* default languages */
292
        langcnt=DEFAULT_LANGCNT;
293
    }
294
    s=getenv("mlist"); if(s==NULL) exit(1);
295
    l=strlen(s); if(l<0 || l>100*MAX_LINELEN) exit(1);
3718 reyssat 296
    mlist=xmalloc(l+16); ovlstrcpy(mlist,s); old="";
10 reyssat 297
    for(i=0;i<langcnt;i++) {
298
        snprintf(buf,sizeof(buf),"%s/%s.%s",dicdir,ignoredic,lang[i]);
299
        f=fopen(buf,"r"); if(f==NULL) continue;
300
        l=fread(ignore[i],1,MAX_LINELEN,f);fclose(f);
301
        if(l<0 || l>=MAX_LINELEN) l=0;
302
        ignore[i][l]=0;
303
    }
304
    for(t=0, p1=find_word_start(mlist);
305
        *p1 && modcnt<MAX_MODULES;
306
        p1=find_word_start(p2), t++) {
307
        p2=find_word_end(p1);
308
        l=p2-p1; if(*p2) *p2++=0;
309
        fprintf(addrf,"%d:%s\n",t,p1);
310
        fprintf(serialf,"%s:%d\n",p1,t);
311
        thislang=-1;
6564 bpr 312
/* language is taken from the address */
10 reyssat 313
        if(l>3 && p1[l-3]=='.') {
314
            for(i=0;i<langcnt;i++) if(strcasecmp(lang[i],p1+l-2)==0) break;
315
            if(i<langcnt) {p1[l-3]=0; thislang=i;}
316
            else {      /* unknown language, not referenced */
317
                continue;
318
            }
319
        }
320
        if(modcnt>0 && strcmp(old,p1)==0 && thislang>=0) {
321
            if(mod[modcnt-1].langcnt<langcnt) {
322
                mod[modcnt-1].langs[mod[modcnt-1].langcnt]=thislang;
323
                mod[modcnt-1].counts[mod[modcnt-1].langcnt]=t;
324
                (mod[modcnt-1].langcnt)++;
325
            }
326
        }
327
        else {
328
            mod[modcnt].name=old=p1;
329
            if(thislang>=0) {
330
                mod[modcnt].langs[0]=thislang;
331
                mod[modcnt].langcnt=1;
332
            }
333
            else mod[modcnt].langcnt=0;
334
            mod[modcnt].counts[0]=t;
335
            modcnt++;
336
        }
337
    }
338
    snprintf(buf,sizeof(buf),"%s/language",outdir);
339
    langf=fopen(buf,"w");
340
    snprintf(buf,sizeof(buf),"%s/title",outdir);
341
    titf=fopen(buf,"w");
342
    snprintf(buf,sizeof(buf),"%s/description",outdir);
343
    descf=fopen(buf,"w");
344
    snprintf(buf,sizeof(buf),"%s/author",outdir);
345
    authorf=fopen(buf,"w");
346
    snprintf(buf,sizeof(buf),"%s/version",outdir);
347
    versionf=fopen(buf,"w");
348
    snprintf(buf,sizeof(buf),"%s/lists/robot.phtml",outdir);
349
    robotf=fopen(buf,"w");
350
    fclose(addrf); fclose(serialf);
351
    if(!robotf || !versionf || !authorf || !descf || !titf || !descf) {
352
        fprintf(stderr,"modind: error creating output files.\n");
353
        exit(1);
354
    }
355
}
356
 
357
void sprep(void)
358
{
359
    char *p1,*p2,*s;
360
    int i,l,thislang;
6881 bpr 361
 
10 reyssat 362
    modcnt=0;
363
    s=getenv("slist"); if(s==NULL) return;
364
    l=strlen(s); if(l<0 || l>100*MAX_LINELEN) return;
3718 reyssat 365
    mlist=xmalloc(l+16); ovlstrcpy(mlist,s);
10 reyssat 366
    for(p1=find_word_start(mlist); *p1 && modcnt<MAX_MODULES; p1=find_word_start(p2)) {
367
        p2=find_word_end(p1);
368
        l=p2-p1; if(*p2) *p2++=0;
369
        for(i=0;i<langcnt;i++) if(strncasecmp(lang[i],p1,2)==0) break;
370
        if(i<langcnt) thislang=i; else continue;
371
        mod[modcnt].name=p1;
372
        mod[modcnt].langs[0]=thislang;
373
        mod[modcnt].langcnt=1;
374
        modcnt++;
375
    }
376
}
377
 
378
void clean(void)
379
{
380
    fclose(langf); fclose(titf); fclose(descf); fclose(robotf);
381
    fclose(authorf); fclose(versionf);
382
}
383
 
384
char *sheetindex[]={
6881 bpr 385
      "title", "description",
10 reyssat 386
      "duration", "severity",
387
      "level", "domain",
388
      "keywords", "reserved1", "reserved2", "remark"
389
};
390
#define SHEETINDEX_NO (sizeof(sheetindex)/sizeof(sheetindex[0]))
391
char sindbuf[SHEETINDEX_NO][MAX_LINELEN+1];
392
enum{s_title, s_description,
393
      s_duration, s_severity,
394
      s_level, s_domain,
395
      s_keywords, s_reserved1, s_reserved2,
396
      s_remark
397
};
398
 
399
char *modindex[]={
6881 bpr 400
      "title", "description",
10 reyssat 401
      "author", "address", "copyright",
402
      "version", "wims_version", "language",
6881 bpr 403
      "category", "level", "domain", "keywords",
6799 bpr 404
      "keywords_ca", "keywords_en", "keywords_fr", "keywords_it", "keywords_nl",
405
      "title_ca", "title_en", "title_fr", "title_it", "title_nl",
10 reyssat 406
      "require"
407
};
408
#define MODINDEX_NO (sizeof(modindex)/sizeof(modindex[0]))
409
char indbuf[MODINDEX_NO][MAX_LINELEN+1];
410
enum{i_title, i_description,
411
      i_author,i_address,i_copyright,
412
      i_version,i_wims_version,i_language,
413
      i_category,i_level,i_domain,i_keywords,
6799 bpr 414
      i_keywords_ca,i_keywords_en,i_keywords_fr,i_keywords_it,i_keywords_nl,
415
      i_title_ca,i_title_en,i_title_fr,i_title_it,i_title_nl,
10 reyssat 416
      i_require
417
};
418
 
419
char *module_special_file[]={
420
    "intro","help","about"
421
};
422
#define MODSPEC_NO (sizeof(module_special_file)/sizeof(module_special_file[0]))
423
char module_language[4];
424
 
425
        /* read and treat module's INDEX file */
426
int module_index(const char *name)
427
{
428
    char *p, fbuf[MAX_LINELEN+1], ibuf[MAX_LINELEN+1];
429
    FILE *indf;
430
    int i,l;
431
 
432
    snprintf(fbuf,sizeof(fbuf),"%s/%s/INDEX",moduledir,name);
433
    indf=fopen(fbuf,"r"); if(indf==NULL) return -1;
434
    l=fread(ibuf,1,MAX_LINELEN,indf); fclose(indf);
435
    if(l>0 && l<MAX_LINELEN) ibuf[l]=0; else return -1;
436
    for(i=0;i<MODINDEX_NO;i++) {
437
        _getdef(ibuf,modindex[i],indbuf[i]);
438
                /* compatibility precaution */
439
        if(indbuf[i][0]==':') indbuf[i][0]='.';
440
    }
441
    p=find_word_start(indbuf[i_language]);
442
    if(isalpha(*p) && isalpha(*(p+1))) {
443
        memmove(module_language,p,2); module_language[2]=0;
444
    }
3718 reyssat 445
    else ovlstrcpy(module_language,"en");
10 reyssat 446
    return 0;
447
}
448
 
449
int sheet_index(int serial)
450
{
451
    char *p1, *p2, fbuf[MAX_LINELEN+1], ibuf[MAX_LINELEN+1];
452
    FILE *indf;
453
    int i,l;
454
 
455
    snprintf(fbuf,sizeof(fbuf),"%s/%s.def",sheetdir,mod[serial].name);
456
    indf=fopen(fbuf,"r"); if(indf==NULL) return -1;
457
    l=fread(ibuf,1,MAX_LINELEN,indf); fclose(indf);
458
    if(l>0 && l<MAX_LINELEN) ibuf[l]=0; else return -1;
459
    for(i=0;i<SHEETINDEX_NO;i++) sindbuf[i][0]=0;
460
    for(i=0,p1=find_word_start(ibuf);
461
        i<SHEETINDEX_NO-1 && *p1!=':' && *p1!=0;
462
        i++,p1=p2) {
463
        p2=strchr(p1,'\n');
464
        if(p2!=NULL) *p2++=0; else p2=p1+strlen(p1);
465
        p1=find_word_start(p1); strip_trailing_spaces(p1);
466
        snprintf(sindbuf[i],MAX_LINELEN,"%s",p1);
467
    }
468
    p2=strstr(p1,"\n:"); if(p2==NULL) p2=p1+strlen(p1);
469
    else *p2=0;
470
    p1=find_word_start(p1); strip_trailing_spaces(p1);
471
    for(p2=p1;*p2;p2++) if(*p2=='\n') *p2=' ';
3718 reyssat 472
    ovlstrcpy(sindbuf[s_remark],p1);
10 reyssat 473
    return 0;
474
}
475
 
476
unsigned char categories[16];
477
char taken[MAX_LINELEN+1];
478
int catcnt, takenlen, tweight;
479
 
480
void appenditem(char *word, int lind, int serial, int weight, char *l)
481
{
482
    char nbuf[MAX_LINELEN+1], buf[MAX_LINELEN+1];
483
    int i, ll;
484
    char *p;
485
    FILE *f;
6881 bpr 486
 
10 reyssat 487
    if(!isalnum(*word) || (ll=strlen(word))<2 ||
488
       wordchr(taken,word)!=NULL ||
489
       wordchr(ignore[lind],word)!=NULL ||
490
       takenlen>=MAX_LINELEN-ll-16)
491
      return;
492
    if(ll==2 && (!isdigit(word[0]) || !isalpha(word[1]))) return;
493
    for(p=word;*p;p++) if(!isalnum(*p) && *p!=' ') return;
494
    taken[takenlen++]=' '; taken[takenlen++]=' ';
3718 reyssat 495
    ovlstrcpy(taken+takenlen,word);
10 reyssat 496
    takenlen+=ll; tweight+=weight;
497
    snprintf(buf,sizeof(buf),"%s:%d?%d\n",word,serial,weight);
498
    for(i=0;i<catcnt;i++) {
499
        snprintf(nbuf,sizeof(nbuf),"%s/%c.%s",
500
                 outdir,categories[i],lang[lind]);
501
        f=fopen(nbuf,"a");
502
        if(f!=NULL) {fputs(buf,f); fclose(f);}
503
    }
504
}
505
 
6881 bpr 506
void appenditem1 (char *buf, int lind, int serial, int weight, char *l )
507
{
508
  char *p1, *p2 ;
509
  for(p1=find_word_start(buf); *p1;
510
        p1=find_word_start(p2)) {
511
        p2=strchr(p1,',');
512
        if(p2!=NULL) *p2++=0; else p2=p1+strlen(p1);
513
        if(strlen(p1)<=0) continue;
514
        appenditem(p1,lind,serial,weight,module_language);
515
  }
516
}
517
void appenditem2 (char *buf, int lind, int serial, int weight, char *l )
518
{
519
  char *p1, *p2 ;
520
  for(p1=find_word_start(buf);*p1;
521
        p1=find_word_start(p2)) {
522
        p2=find_word_end(p1); if(*p2) *p2++=0;
523
        appenditem(p1,lind,serial,weight,module_language);
524
  }
525
}
10 reyssat 526
void onemodule(const char *name, int serial, int lind)
527
{
528
    int i;
529
    unsigned char trlist[]={
530
        i_title,i_description,i_category,i_domain,i_keywords,
6394 bpr 531
          i_require,i_author,
6799 bpr 532
          i_keywords_ca,i_keywords_en,i_keywords_fr,i_keywords_it,i_keywords_nl,
533
          i_title_ca,i_title_en,i_title_fr,i_title_it,i_title_nl
10 reyssat 534
    };
535
    #define trcnt (sizeof(trlist)/sizeof(trlist[0]))
6564 bpr 536
    char *p1, *p2, *pp, *q, buf[MAX_LINELEN+1], lbuf[16];
10 reyssat 537
    FILE *f;
6881 bpr 538
 
10 reyssat 539
    if(module_index(name)) return;
540
    towords(indbuf[i_category]);
6818 reyssat 541
        /*  list the categories (among A=all,X=eXercise,O,D,...) corresponding to this module  */
10 reyssat 542
    for(i=catcnt=0;i<catno && catcnt<16;i++) {
6881 bpr 543
        if(wordchr(indbuf[i_category],cat[i].name)!=NULL)
10 reyssat 544
          categories[catcnt++]=cat[i].typ;
545
    }
546
    if(catcnt==0) return;
547
    if(categories[0]!=cat[0].typ)
548
      categories[catcnt++]=cat[0].typ;
6818 reyssat 549
        /*  write module's name in the category.language files, for instance lists/X.fr for french exercises  */
10 reyssat 550
    for(i=0;i<catcnt;i++) {
551
        snprintf(buf,sizeof(buf),"%s/lists/%c.%s",
552
                 outdir,categories[i],lang[lind]);
553
        f=fopen(buf,"a");
554
        if(f!=NULL) {fprintf(f,"%s\n",name); fclose(f);}
555
    }
6818 reyssat 556
        /*  add serial number and language (resp.title, ...) to corresponding file  */
10 reyssat 557
    fprintf(langf,"%d:%s\n",serial,module_language);
558
    fprintf(titf,"%d:%s\n",serial,indbuf[i_title]);
559
    fprintf(descf,"%d:%s\n",serial,indbuf[i_description]);
560
    fprintf(authorf,"%d:%s\n",serial,indbuf[i_author]);
561
    fprintf(versionf,"%d:%s\n",serial,indbuf[i_version]);
6881 bpr 562
 
6818 reyssat 563
        /*  add module's information in html page for robots  */
10 reyssat 564
    snprintf(buf,sizeof(buf),"%s",indbuf[i_description]);
565
    for(pp=strchr(buf,','); pp; pp=strchr(pp,','))
566
      string_modify(buf,pp,pp+1,"&#44;");
567
    if(strcmp(module_language,lang[lind])==0)
568
      fprintf(robotf,"%s ,%s,%s,%s,%s\n",name,module_language,name,
569
              indbuf[i_title], buf);
6819 reyssat 570
 
6881 bpr 571
        /*  Normalize the information, using dictionary
572
        -- bases/sys/domain.xx without suffix (--> english version)
573
        -- bases/sys/words.xx with suffix */
574
    entrycount=dentrycount; dicbuf=ddicbuf;
575
    memmove(entry,dentry,dentrycount*sizeof(entry[0]));
576
    unknown_type=unk_leave;
10 reyssat 577
    for(i=0;i<trcnt;i++) {
578
        detag(indbuf[trlist[i]]);
579
        deaccent(indbuf[trlist[i]]);
6819 reyssat 580
        comma(indbuf[trlist[i]]);
10 reyssat 581
        singlespace(indbuf[trlist[i]]);
6881 bpr 582
        translate(indbuf[trlist[i]]);
583
    }
584
 
585
    entrycount=mentrycount; dicbuf=mdicbuf;
586
    memmove(entry,mentry,mentrycount*sizeof(entry[0]));
587
    unknown_type=unk_leave;  /* used in translator_.c */
588
    for(i=0;i<trcnt;i++) {
10 reyssat 589
        suffix_translate(indbuf[trlist[i]]);
590
        translate(indbuf[trlist[i]]);
591
    }
6881 bpr 592
 
593
/* taken contains all words already seen in the module index */
10 reyssat 594
    taken[0]=0; takenlen=tweight=0;
6881 bpr 595
/*  append words of title  */
3718 reyssat 596
    ovlstrcpy(buf,indbuf[i_title]); towords(buf);
6881 bpr 597
    appenditem2(buf,lind,serial,4,module_language);
598
 
599
/*  append words of every other information except level  */
6799 bpr 600
    snprintf(buf,sizeof(buf),"%s %s %s %s %s %s %s %s %s %s %s %s %s %s %s",
10 reyssat 601
             indbuf[i_description],indbuf[i_keywords],
6806 bpr 602
             indbuf[i_keywords_ca],indbuf[i_keywords_en],indbuf[i_keywords_fr],
6394 bpr 603
             indbuf[i_keywords_it],indbuf[i_keywords_nl],
6806 bpr 604
             indbuf[i_title_ca],indbuf[i_title_en],indbuf[i_title_fr],
6799 bpr 605
             indbuf[i_title_it],indbuf[i_title_nl],
10 reyssat 606
             indbuf[i_domain],indbuf[i_require],indbuf[i_author]);
607
    towords(buf);
6881 bpr 608
    appenditem2(buf,lind,serial,4,module_language);
609
 
610
        /*  this time the dictionary is the group dictionary  sys/wgrp/wgrp
611
         with a g (groupdic), not an m (maindic) . see below main, suffix, group.
6818 reyssat 612
        and delete unknown ?? and translate  */
10 reyssat 613
    entrycount=gentrycount; dicbuf=gdicbuf;
614
    memmove(entry,gentry,gentrycount*sizeof(entry[0]));
6881 bpr 615
 
616
/*  append words (?) of every other information except level  */
617
    ovlstrcpy(buf,indbuf[i_title]);
10 reyssat 618
    unknown_type=unk_delete;
6881 bpr 619
    translate(buf);
620
    appenditem1(buf,lind,serial,2,module_language);
621
 
622
/*  append words (?) of information of description except level  */
623
    snprintf(buf,sizeof(buf),"%s", indbuf[i_description]);
624
    unknown_type=unk_delete;
625
    translate(buf);
626
    appenditem1(buf,lind,serial,4,module_language);
627
 
628
/*  append words (or group of words) of keywords and domain level  */
629
    snprintf(buf,sizeof(buf),"%s, %s, %s, %s, %s, %s, %s",
630
             indbuf[i_domain],indbuf[i_keywords],
6799 bpr 631
             indbuf[i_keywords_ca], indbuf[i_keywords_en],indbuf[i_keywords_fr],
6881 bpr 632
             indbuf[i_keywords_it], indbuf[i_keywords_nl]);
633
        unknown_type=unk_leave;
10 reyssat 634
    translate(buf);
6881 bpr 635
    appenditem1(buf,lind,serial,2,module_language);
636
 
6818 reyssat 637
        /*  append level information, with weight 2 */
10 reyssat 638
    snprintf(buf,sizeof(buf),"%s",indbuf[i_level]);
3718 reyssat 639
    ovlstrcpy(lbuf,"level");
10 reyssat 640
    for(p1=buf; *p1; p1++) if(!isalnum(*p1)) *p1=' ';
6564 bpr 641
    q=buf+strlen(buf);
642
    for(p1=find_word_start(buf); (*p1) && (p1 < q) ;
10 reyssat 643
        p1=find_word_start(p2)) {
6881 bpr 644
        p2=find_word_end(p1);
10 reyssat 645
        if(p2!=NULL) *p2++=0; else p2=p1+strlen(p1);
6881 bpr 646
        if(!isalpha(*p1) ||
10 reyssat 647
           (!isdigit(*(p1+1)) && *(p1+1)!=0) ||
648
           (*(p1+1)!=0 && *(p1+2)!=0))
649
          continue;
650
        *p1=tolower(*p1);
3718 reyssat 651
        ovlstrcpy(lbuf+strlen("level"),p1);
10 reyssat 652
        appenditem(lbuf,lind,serial,2,module_language);
653
    }
6818 reyssat 654
        /*  append total weight of module to weight file site2/weight.xx  */
10 reyssat 655
    fprintf(weightf,"%d:%d\n",serial,tweight);
656
}
657
 
658
void modules(void)
659
{
660
    int i,j,k,d;
661
    char namebuf[MAX_LINELEN+1];
6881 bpr 662
    char mdic[MAX_LINELEN+1], sdic[MAX_LINELEN+1], gdic[MAX_LINELEN+1], ddic[MAX_LINELEN+1];
10 reyssat 663
 
664
    for(j=0;j<langcnt;j++) {
665
        snprintf(namebuf,sizeof(namebuf),"%s/weight.%s",outdir,lang[j]);
666
        weightf=fopen(namebuf,"w");
667
        snprintf(mdic,sizeof(mdic),"%s/%s.%s",dicdir,maindic,lang[j]);
668
        snprintf(sdic,sizeof(sdic),"%s/%s.%s",dicdir,suffixdic,lang[j]);
669
        snprintf(gdic,sizeof(gdic),"%s/%s.%s",dicdir,groupdic,lang[j]);
6881 bpr 670
        snprintf(ddic,sizeof(ddic),"%s/%s.%s",dicdir,domaindic,lang[j]);
10 reyssat 671
        suffix_dic(sdic); prepare_dic(gdic);
672
        gdicbuf=dicbuf; gentrycount=entrycount;
673
        memmove(gentry,entry,gentrycount*sizeof(entry[0]));
674
        prepare_dic(mdic);
675
        mdicbuf=dicbuf; mentrycount=entrycount;
676
        memmove(mentry,entry,mentrycount*sizeof(entry[0]));
6881 bpr 677
        prepare_dic(ddic);
678
        ddicbuf=dicbuf; dentrycount=entrycount;
679
        memmove(dentry,entry,dentrycount*sizeof(entry[0]));
10 reyssat 680
        unknown_type=unk_leave; translate(ignore[j]);
681
        for(i=0;i<modcnt;i++) {
682
            if(mod[i].langcnt>0) {
683
                for(d=k=0;k<mod[i].langcnt;k++)
684
                  if(mod[i].langs[k]<mod[i].langs[d]) d=k;
685
                for(k=0;k<mod[i].langcnt && mod[i].langs[k]!=j;k++);
686
                if(k>=mod[i].langcnt) k=d;
687
                snprintf(namebuf,MAX_LINELEN,"%s.%s",mod[i].name,
688
                         lang[mod[i].langs[k]]);
689
                onemodule(namebuf,mod[i].counts[k],j);
690
            }
691
            else {
692
                onemodule(mod[i].name,mod[i].counts[0],j);
693
            }
694
        }
695
        if(mentrycount>0) free(mdicbuf);
696
        if(gentrycount>0) free(gdicbuf);
697
        if(suffixcnt>0) free(sufbuf);
6881 bpr 698
        if(dentrycount>0) free(ddicbuf);
10 reyssat 699
        if(weightf) fclose(weightf);
700
    }
701
}
702
 
6881 bpr 703
/* FIXME ? differences with appenditem - use fprintf instead of  snprintf */
10 reyssat 704
void sappenditem(char *word, int lind, int serial, int weight)
705
{
706
    int ll;
707
    char *p;
6881 bpr 708
 
10 reyssat 709
    if(!isalnum(*word) || (ll=strlen(word))<2 ||
710
       wordchr(taken,word)!=NULL ||
711
       wordchr(ignore[lind],word)!=NULL ||
712
       takenlen>=MAX_LINELEN-ll-16)
713
      return;
714
    if(ll==2 && (!isdigit(word[0]) || !isalpha(word[1]))) return;
715
    for(p=word;*p;p++) if(!isalnum(*p) && *p!=' ') return;
716
    taken[takenlen++]=' ';taken[takenlen++]=' ';
3718 reyssat 717
    ovlstrcpy(taken+takenlen,word);
10 reyssat 718
    takenlen+=ll; tweight+=weight;
719
    fprintf(indf,"%s:%d?%d\n",word,serial,weight);
720
}
721
 
722
void onesheet(int serial, int lind)
723
{
724
    int i;
725
    unsigned char trlist[]={
726
        s_title,s_description,s_domain,s_keywords,s_remark
727
    };
728
    #define trcnt (sizeof(trlist)/sizeof(trlist[0]))
729
    char *p1, *p2, buf[MAX_LINELEN+1];
6881 bpr 730
 
10 reyssat 731
    if(sheet_index(serial)) return;
732
    fprintf(listf,"%s\n",mod[serial].name+3);
733
    fprintf(titf,"%d:%s\n",serial,sindbuf[s_title]);
734
    fprintf(descf,"%d:%s\n",serial,sindbuf[s_description]);
6881 bpr 735
    entrycount=dentrycount; dicbuf=ddicbuf;
736
    memmove(entry,dentry,dentrycount*sizeof(entry[0]));
10 reyssat 737
    unknown_type=unk_leave;
738
    for(i=0;i<trcnt;i++) {
739
        detag(sindbuf[trlist[i]]);
740
        deaccent(sindbuf[trlist[i]]);
6819 reyssat 741
        comma(sindbuf[trlist[i]]);
10 reyssat 742
        singlespace(sindbuf[trlist[i]]);
6881 bpr 743
        translate(sindbuf[trlist[i]]);
744
    }
745
 
746
    entrycount=mentrycount; dicbuf=mdicbuf;
747
    memmove(entry,mentry,mentrycount*sizeof(entry[0]));
748
    unknown_type=unk_leave;
749
    for(i=0;i<trcnt;i++) {
10 reyssat 750
        suffix_translate(sindbuf[trlist[i]]);
751
        translate(sindbuf[trlist[i]]);
752
    }
753
    taken[0]=0; takenlen=tweight=0;
3718 reyssat 754
    ovlstrcpy(buf,sindbuf[s_title]); towords(buf);
10 reyssat 755
    for(p1=find_word_start(buf);*p1;
756
        p1=find_word_start(p2)) {
757
        p2=find_word_end(p1); if(*p2) *p2++=0;
758
        sappenditem(p1,lind,serial,4);
759
    }
760
    snprintf(buf,sizeof(buf),"%s %s %s %s",
761
             sindbuf[s_description],sindbuf[s_keywords],
762
             sindbuf[s_domain],sindbuf[s_remark]);
763
    towords(buf);
764
    for(p1=find_word_start(buf);*p1;
765
        p1=find_word_start(p2)) {
766
        p2=find_word_end(p1); if(*p2) *p2++=0;
767
        sappenditem(p1,lind,serial,2);
768
    }
769
    entrycount=gentrycount; dicbuf=gdicbuf;
770
    memmove(entry,gentry,gentrycount*sizeof(entry[0]));
771
    unknown_type=unk_delete;
3718 reyssat 772
    ovlstrcpy(buf,sindbuf[s_title]); translate(buf);
10 reyssat 773
    for(p1=find_word_start(buf); *p1;
774
        p1=find_word_start(p2)) {
775
        p2=strchr(p1,',');
776
        if(p2!=NULL) *p2++=0; else p2=p1+strlen(p1);
777
        if(strlen(p1)<=0) continue;
778
        sappenditem(p1,lind,serial,4);
779
    }
780
    snprintf(buf,sizeof(buf),"%s, %s, %s, %s",
781
             sindbuf[s_description],sindbuf[s_keywords],
782
             sindbuf[s_domain],sindbuf[s_remark]);
783
    translate(buf);
784
    for(p1=find_word_start(buf); *p1;
785
        p1=find_word_start(p2)) {
6881 bpr 786
        p2=strchr(p1,',');
10 reyssat 787
        if(p2!=NULL) *p2++=0; else p2=p1+strlen(p1);
788
        if(strlen(p1)<=0) continue;
789
        sappenditem(p1,lind,serial,2);
790
    }
791
    fprintf(weightf,"%d:%d\n",serial,tweight);
792
}
793
 
6881 bpr 794
 
10 reyssat 795
void sheets(void)
796
{
797
    int i,j;
798
    char mdic[MAX_LINELEN+1], sdic[MAX_LINELEN+1], gdic[MAX_LINELEN+1];
799
    char buf[MAX_LINELEN+1];
800
 
801
    for(j=0;j<langcnt;j++) {
802
        snprintf(buf,sizeof(buf),"%s/index/title.%s",sheetdir,lang[j]);
803
        titf=fopen(buf,"w");
804
        snprintf(buf,sizeof(buf),"%s/index/description.%s",sheetdir,lang[j]);
805
        descf=fopen(buf,"w");
806
        snprintf(buf,sizeof(buf),"%s/index/%s",sheetdir,lang[j]);
807
        indf=fopen(buf,"w");
808
        snprintf(buf,sizeof(buf),"%s/index/list.%s",sheetdir,lang[j]);
809
        listf=fopen(buf,"w");
810
        snprintf(buf,sizeof(buf),"%s/index/weight.%s",sheetdir,lang[j]);
811
        weightf=fopen(buf,"w");
812
        snprintf(buf,sizeof(buf),"%s/index/addr.%s",sheetdir,lang[j]);
813
        addrf=fopen(buf,"w");
814
        snprintf(buf,sizeof(buf),"%s/index/serial.%s",sheetdir,lang[j]);
815
        serialf=fopen(buf,"w");
816
        snprintf(mdic,sizeof(mdic),"%s/%s.%s",dicdir,maindic,lang[j]);
817
        snprintf(sdic,sizeof(sdic),"%s/%s.%s",dicdir,suffixdic,lang[j]);
818
        snprintf(gdic,sizeof(gdic),"%s/%s.%s",dicdir,groupdic,lang[j]);
819
        suffix_dic(sdic); prepare_dic(gdic);
820
        gdicbuf=dicbuf; gentrycount=entrycount;
821
        memmove(gentry,entry,gentrycount*sizeof(entry[0]));
822
        prepare_dic(mdic);
823
        mdicbuf=dicbuf; mentrycount=entrycount;
824
        memmove(mentry,entry,mentrycount*sizeof(entry[0]));
825
        unknown_type=unk_leave; translate(ignore[j]);
826
        for(i=0;i<modcnt;i++) {
827
            if(mod[i].langs[0]!=j) continue;
828
            fprintf(addrf,"%d:%s\n",i,mod[i].name+3);
829
            fprintf(serialf,"%s:%d\n",mod[i].name+3,i);
830
            onesheet(i,j);
831
        }
832
        if(mentrycount>0) free(mdicbuf);
833
        if(gentrycount>0) free(gdicbuf);
834
        if(suffixcnt>0) free(sufbuf);
835
        fclose(titf); fclose(descf); fclose(indf); fclose(listf);
836
        fclose(weightf); fclose(addrf); fclose(serialf);
837
    }
838
}
839
 
840
int main()
841
{
842
    prep();
843
    if(modcnt>0) modules();
844
    clean();
845
    sprep();
846
    if(modcnt>0) sheets();
847
    return 0;
848
}
849