Subversion Repositories wimsdev

Rev

Rev 6973 | Rev 8100 | Go to most recent revision | Details | Compare with Previous | Last modification | View Log | RSS feed

Rev Author Line No. Line
10 reyssat 1
/*    Copyright (C) 1998-2003 XIAO, Gang of Universite de Nice - Sophia Antipolis
2
 *
3
 *  This program is free software; you can redistribute it and/or modify
4
 *  it under the terms of the GNU General Public License as published by
5
 *  the Free Software Foundation; either version 2 of the License, or
6
 *  (at your option) any later version.
7
 *
8
 *  This program is distributed in the hope that it will be useful,
9
 *  but WITHOUT ANY WARRANTY; without even the implied warranty of
10
 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
11
 *  GNU General Public License for more details.
12
 *
13
 *  You should have received a copy of the GNU General Public License
14
 *  along with this program; if not, write to the Free Software
15
 *  Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
16
 */
17
 
6884 bpr 18
/*  This is an internal program,
7915 bpr 19
 * used to index modules for search engine.
6884 bpr 20
 */
10 reyssat 21
 
22
#include "../wims.h"
3718 reyssat 23
#include "../Lib/basicstr.c"
10 reyssat 24
 
6884 bpr 25
#define MAX_LANGS    MAX_LANGUAGES
26
#define MAX_MODULES    65536
27
char *moduledir=    "public_html/modules";
28
char *sheetdir=     "public_html/bases/sheet";
29
char *dicdir=       "public_html/bases";
30
char *outdir=       "public_html/bases/site2";
31
char *maindic=      "sys/words";
32
char *groupdic=     "sys/wgrp/wgrp";
33
char *suffixdic=    "sys/suffix";
34
char *domaindic=    "sys/domaindic";
35
char *ignoredic=    "sys/indignore";
36
char *conffile=     "log/wims.conf";
37
char *mlistbase=    "list";
10 reyssat 38
 
39
char lang[MAX_LANGS][4]={
1792 bpr 40
    "en","fr","cn","es","it","nl","si","ca","pt"
10 reyssat 41
};
6884 bpr 42
#define DEFAULT_LANGCNT    6
10 reyssat 43
char allang[MAX_LANGS][4]={
6564 bpr 44
    "en","fr","cn","es","it","nl","de","si","ca","pt"
10 reyssat 45
};
46
#define allangcnt 8
47
char ignore[MAX_LANGS][MAX_LINELEN+1];
48
char mlistfile[MAX_LANGS][256];
49
int langcnt;
6961 bpr 50
FILE *langf, *titf, *descf, *weightf, *robotf, *indf, *listf, *addrf, *serialf, *authorf, *versionf, *remf;
10 reyssat 51
 
52
struct cat {
53
    char *name;
54
    char typ;
55
} cat[]={
6884 bpr 56
    {"all_types", 'A'},
57
    {"exercise",  'X'},
58
    {"oef",       'O'},
59
    {"tool",      'T'},
60
    {"recreation",'R'},
61
    {"reference", 'Y'},
62
    {"document",  'D'},
63
    {"popup",     'P'},
64
    {"datamodule",'M'}
10 reyssat 65
};
66
#define catno (sizeof(cat)/sizeof(cat[0]))
67
 
68
struct mod {
69
    char *name;
70
    unsigned char langs[MAX_LANGS];
71
    int counts[MAX_LANGS];
72
    int  langcnt;
73
} mod[MAX_MODULES];
74
int modcnt;
75
 
76
char *mlist;
77
 
78
void *xmalloc(size_t n)
79
{
80
    void *p;
81
    p=malloc(n);
82
    if(p==NULL) {
6884 bpr 83
    printf("Malloc failure.\n");
84
    exit(1);
10 reyssat 85
    }
86
    return p;
87
}
88
 
89
char *acctab="çéèêëúùûüáàâäãóòôöõíìïîñýÇÉÈÊËÚÙÛÜÁÀÂÃÄÓÒÔÖÕÍÌÏÎÑÝ",
90
     *deatab="ceeeeuuuuaaaaaoooooiiiinyCEEEEUUUUAAAAAOOOOOIIIINY";
91
 
6884 bpr 92
/*  fold known accented letters to unaccented, other strange characters to space
7915 bpr 93
 *  apostrophe is among the exceptions to be kept (important for multi-word expressions)
6884 bpr 94
 */
10 reyssat 95
void deaccent(char *p)
96
{
3247 bpr 97
    char *sp;
10 reyssat 98
    char *v;
99
    for(sp=p;*sp;sp++) {
6884 bpr 100
    if(*sp<0 && (v=strchr(acctab,*sp))!=NULL)
101
      *sp=*(deatab+(v-acctab));
102
    if(!isalnum(*sp) && strchr(",.&$+*",*sp)==0) *sp=' ';
103
    else *sp=tolower(*sp);
10 reyssat 104
    }
105
}
106
 
6884 bpr 107
/*  translate everything non-alphanumeric into space */
10 reyssat 108
void towords(char *p)
109
{
110
    char *pp;
111
    for(pp=p;*pp;pp++) if(!isalnum(*pp) && strchr("&$+*",*pp)==0) *pp=' ';
112
}
113
 
6884 bpr 114
/*  Points to the end of the word */
10 reyssat 115
char *find_word_end(char *p)
116
{
117
    int i;
118
    for(i=0;!isspace(*p) && *p!=0 && i<MAX_LINELEN; p++,i++);
119
    return p;
120
}
121
 
6884 bpr 122
/*  Strips leading spaces */
10 reyssat 123
char *find_word_start(char *p)
124
{
125
    int i;
126
    for(i=0; isspace(*p) && i<MAX_LINELEN; p++,i++);
127
    return p;
128
}
129
 
6884 bpr 130
/*  Find first occurrence of word */
10 reyssat 131
char *wordchr(char *p, char *w)
132
{
133
    char *r;
134
 
6881 bpr 135
    for(r=strstr(p,w);r!=NULL &&
6884 bpr 136
    ( (r>p && !isspace(*(r-1))) || (!isspace(*(r+strlen(w))) && *(r+strlen(w))!=0) );
137
    r=strstr(r+1,w));
10 reyssat 138
    return r;
139
}
140
 
6884 bpr 141
/*  find a variable in a string (math expression).
142
 * Returns the pointer or NULL.
143
 */
10 reyssat 144
char *varchr(char *p, char *v)
145
{
146
    char *pp; int n=strlen(v);
147
    for(pp=strstr(p,v); pp!=NULL; pp=strstr(pp+1,v)) {
6884 bpr 148
    if((pp==p || !isalnum(*(pp-1))) &&
149
       (!isalnum(*(pp+n)) || *(pp+n)==0)) break;
10 reyssat 150
    }
151
    return pp;
152
}
153
 
6884 bpr 154
/*  strip trailing spaces; return string end. */
10 reyssat 155
char *strip_trailing_spaces(char *p)
156
{
157
    char *pp;
158
    if(*p==0) return p;
159
    for(pp=p+strlen(p)-1; pp>=p && isspace(*pp); *(pp--)=0);
160
    return pp;
161
}
162
 
163
char *find_tag_end(char *p)
164
{
165
    char *pp;
166
    pp=p; if(*pp=='<') pp++;
167
    for(; *pp && *pp!='>'; pp++) {
6884 bpr 168
    if(*pp=='<') {
169
        pp=find_tag_end(pp)-1; continue;
10 reyssat 170
    }
6884 bpr 171
    if(*pp=='"') {
172
        pp=strchr(pp+1,'"');
173
        if(pp==NULL) return p+strlen(p); else continue;
174
    }
175
    if(*pp=='\'') {
176
        pp=strchr(pp+1,'\'');
177
        if(pp==NULL) return p+strlen(p); else continue;
178
    }
179
    }
10 reyssat 180
    if(*pp=='>') pp++; return pp;
181
}
182
 
183
char *find_tag(char *p, char *tag)
184
{
185
    char *pp;
186
    int len;
187
    len=strlen(tag);
188
    for(pp=strchr(p,'<'); pp!=NULL && *pp; pp=strchr(pp+1,'<')) {
6884 bpr 189
    if(strncasecmp(pp+1,tag,len)==0 && !isalnum(*(pp+1+len))) return pp;
10 reyssat 190
    }
191
    return p+strlen(p);
192
}
193
 
6884 bpr 194
/*  remove all html tags */
10 reyssat 195
void detag(char *p)
196
{
197
    char *pp, *p2;
198
    for(pp=strchr(p,'<'); pp!=NULL; pp=strchr(pp,'<')) {
6884 bpr 199
    p2=find_tag_end(pp);
200
    if(*p2==0) {*pp=0; return; }
201
    ovlstrcpy(pp,p2);
10 reyssat 202
    }
203
}
204
 
6884 bpr 205
/*  modify a string. Bufferlen must be at least MAX_LINELEN */
10 reyssat 206
void string_modify(char *start, char *bad_beg, char *bad_end, char *good,...)
207
{
208
    char buf[MAX_LINELEN+1];
209
    va_list vp;
6881 bpr 210
 
10 reyssat 211
    va_start(vp,good);
212
    vsnprintf(buf,sizeof(buf),good,vp); va_end(vp);
213
    if(strlen(start)-(bad_end-bad_beg)+strlen(buf)>=MAX_LINELEN)
214
      return;
215
    strcat(buf,bad_end);
3718 reyssat 216
    ovlstrcpy(bad_beg,buf);
10 reyssat 217
}
218
 
6819 reyssat 219
/* add a space after comma to see end of words */
220
 
221
void comma(char *p)
222
{
223
    char *pp;
224
    for(pp=strchr(p,','); pp; pp=strchr(pp+1,','))
225
      string_modify(p,pp,pp+1,", ");
226
}
227
 
10 reyssat 228
void _getdef(char buf[], char *name, char value[])
229
{
230
    char *p1, *p2, *p3;
231
 
232
    value[0]=0;
233
    for(p1=strstr(buf,name); p1!=NULL; p1=strstr(p1+1,name)) {
6884 bpr 234
    p2=find_word_start(p1+strlen(name));
235
    if((p1>buf && !isspace(*(p1-1))) || *p2!='=') continue;
236
    p3=p1; while(p3>buf && isspace(*(p3-1)) && *(p3-1)!='\n') p3--;
237
    if(p3>buf && *(p3-1)!='\n') continue;
238
    p3=strchr(p2,'\n');
239
    p2=find_word_start(p2+1);
240
    if(p3 <= p2) continue;
241
    snprintf(value,MAX_LINELEN,"%s",p2);
242
    if(p3!=NULL && p3-p2<MAX_LINELEN) value[p3-p2]=0;
243
    strip_trailing_spaces(value);
244
    break;
10 reyssat 245
    }
246
}
247
 
6884 bpr 248
/*  Get variable definition from a file.
249
 * Result stored in buffer value of length MAX_LINELEN.
250
 */
10 reyssat 251
void getdef(char *fname, char *name, char value[])
252
{
253
    FILE *f;
254
    char *buf;
255
    int l;
6881 bpr 256
 
10 reyssat 257
    value[0]=0;
258
    f=fopen(fname,"r"); if(f==NULL) return;
259
    fseek(f,0,SEEK_END); l=ftell(f); fseek(f,0,SEEK_SET);
260
    buf=xmalloc(l+256); l=fread(buf,1,l,f);
261
    fclose(f);
262
    if(l<=0) return; else buf[l]=0;
263
    _getdef(buf,name,value);
264
    free(buf);
265
}
266
 
267
#include "translator_.c"
268
 
6881 bpr 269
char *mdicbuf, *gdicbuf, *ddicbuf;
270
char gentry[sizeof(entry)], mentry[sizeof(entry)], dentry[sizeof(entry)];
271
int gentrycount, mentrycount, dentrycount;
10 reyssat 272
 
6884 bpr 273
/*  Preparation of data */
10 reyssat 274
void prep(void)
275
{
276
    char buf[MAX_LINELEN+1];
277
    char *p1,*p2,*s,*old;
278
    int i,l,thislang,t;
279
    FILE *f;
6881 bpr 280
 
10 reyssat 281
    s=getenv("modind_outdir"); if(s!=NULL && *s!=0) outdir=s;
282
    s=getenv("modind_sheetdir"); if(s!=NULL && *s!=0) sheetdir=s;
283
    snprintf(buf,sizeof(buf),"%s/addr",outdir);
284
    addrf=fopen(buf,"w");
285
    snprintf(buf,sizeof(buf),"%s/serial",outdir);
286
    serialf=fopen(buf,"w");
287
    modcnt=langcnt=0;
6884 bpr 288
/* take the langs declared in conffile */
10 reyssat 289
    getdef(conffile,"site_languages",buf);
290
    for(p1=buf;*p1;p1++) if(!isalnum(*p1)) *p1=' ';
291
    for(p1=find_word_start(buf); *p1 && langcnt<MAX_LANGS; p1=find_word_start(p2)) {
6884 bpr 292
    p2=find_word_end(p1);
293
    if(p2!=p1+2 || !isalpha(*p1) || !isalpha(*(p1+1))) continue;
294
    memmove(lang[langcnt],p1,2); lang[langcnt++][2]=0;
10 reyssat 295
    }
6884 bpr 296
    if(langcnt==0) {/*  default languages */
297
    langcnt=DEFAULT_LANGCNT;
10 reyssat 298
    }
299
    s=getenv("mlist"); if(s==NULL) exit(1);
300
    l=strlen(s); if(l<0 || l>100*MAX_LINELEN) exit(1);
3718 reyssat 301
    mlist=xmalloc(l+16); ovlstrcpy(mlist,s); old="";
10 reyssat 302
    for(i=0;i<langcnt;i++) {
6884 bpr 303
    snprintf(buf,sizeof(buf),"%s/%s.%s",dicdir,ignoredic,lang[i]);
304
    f=fopen(buf,"r"); if(f==NULL) continue;
305
    l=fread(ignore[i],1,MAX_LINELEN,f);fclose(f);
306
    if(l<0 || l>=MAX_LINELEN) l=0;
307
    ignore[i][l]=0;
10 reyssat 308
    }
309
    for(t=0, p1=find_word_start(mlist);
6884 bpr 310
    *p1 && modcnt<MAX_MODULES;
311
    p1=find_word_start(p2), t++) {
312
    p2=find_word_end(p1);
313
    l=p2-p1; if(*p2) *p2++=0;
314
    fprintf(addrf,"%d:%s\n",t,p1);
315
    fprintf(serialf,"%s:%d\n",p1,t);
316
    thislang=-1;
6564 bpr 317
/* language is taken from the address */
6884 bpr 318
    if(l>3 && p1[l-3]=='.') {
319
        for(i=0;i<langcnt;i++) if(strcasecmp(lang[i],p1+l-2)==0) break;
320
        if(i<langcnt) {p1[l-3]=0; thislang=i;}
321
        else {/*  unknown language, not referenced */
322
        continue;
323
        }
10 reyssat 324
    }
6884 bpr 325
    if(modcnt>0 && strcmp(old,p1)==0 && thislang>=0) {
326
        if(mod[modcnt-1].langcnt<langcnt) {
327
        mod[modcnt-1].langs[mod[modcnt-1].langcnt]=thislang;
328
        mod[modcnt-1].counts[mod[modcnt-1].langcnt]=t;
329
        (mod[modcnt-1].langcnt)++;
330
        }
331
    }
332
    else {
333
        mod[modcnt].name=old=p1;
334
        if(thislang>=0) {
335
        mod[modcnt].langs[0]=thislang;
336
        mod[modcnt].langcnt=1;
337
        }
338
        else mod[modcnt].langcnt=0;
339
        mod[modcnt].counts[0]=t;
340
        modcnt++;
341
    }
342
    }
10 reyssat 343
    snprintf(buf,sizeof(buf),"%s/language",outdir);
344
    langf=fopen(buf,"w");
345
    snprintf(buf,sizeof(buf),"%s/title",outdir);
346
    titf=fopen(buf,"w");
347
    snprintf(buf,sizeof(buf),"%s/description",outdir);
348
    descf=fopen(buf,"w");
349
    snprintf(buf,sizeof(buf),"%s/author",outdir);
350
    authorf=fopen(buf,"w");
351
    snprintf(buf,sizeof(buf),"%s/version",outdir);
352
    versionf=fopen(buf,"w");
353
    snprintf(buf,sizeof(buf),"%s/lists/robot.phtml",outdir);
354
    robotf=fopen(buf,"w");
355
    fclose(addrf); fclose(serialf);
356
    if(!robotf || !versionf || !authorf || !descf || !titf || !descf) {
6884 bpr 357
    fprintf(stderr,"modind: error creating output files.\n");
358
    exit(1);
10 reyssat 359
    }
360
}
361
 
362
void sprep(void)
363
{
364
    char *p1,*p2,*s;
365
    int i,l,thislang;
6881 bpr 366
 
10 reyssat 367
    modcnt=0;
368
    s=getenv("slist"); if(s==NULL) return;
369
    l=strlen(s); if(l<0 || l>100*MAX_LINELEN) return;
3718 reyssat 370
    mlist=xmalloc(l+16); ovlstrcpy(mlist,s);
10 reyssat 371
    for(p1=find_word_start(mlist); *p1 && modcnt<MAX_MODULES; p1=find_word_start(p2)) {
6884 bpr 372
    p2=find_word_end(p1);
373
    l=p2-p1; if(*p2) *p2++=0;
374
    for(i=0;i<langcnt;i++) if(strncasecmp(lang[i],p1,2)==0) break;
375
    if(i<langcnt) thislang=i; else continue;
376
    mod[modcnt].name=p1;
377
    mod[modcnt].langs[0]=thislang;
378
    mod[modcnt].langcnt=1;
379
    modcnt++;
10 reyssat 380
    }
381
}
382
 
383
void clean(void)
384
{
385
    fclose(langf); fclose(titf); fclose(descf); fclose(robotf);
386
    fclose(authorf); fclose(versionf);
387
}
388
 
389
char *sheetindex[]={
6881 bpr 390
      "title", "description",
10 reyssat 391
      "duration", "severity",
392
      "level", "domain",
6967 bpr 393
      "keywords", "reserved1", "reserved2", "information"
10 reyssat 394
};
395
#define SHEETINDEX_NO (sizeof(sheetindex)/sizeof(sheetindex[0]))
396
char sindbuf[SHEETINDEX_NO][MAX_LINELEN+1];
397
enum{s_title, s_description,
398
      s_duration, s_severity,
399
      s_level, s_domain,
400
      s_keywords, s_reserved1, s_reserved2,
6967 bpr 401
      s_information
10 reyssat 402
};
403
 
404
char *modindex[]={
6881 bpr 405
      "title", "description",
10 reyssat 406
      "author", "address", "copyright",
407
      "version", "wims_version", "language",
6881 bpr 408
      "category", "level", "domain", "keywords",
6799 bpr 409
      "keywords_ca", "keywords_en", "keywords_fr", "keywords_it", "keywords_nl",
410
      "title_ca", "title_en", "title_fr", "title_it", "title_nl",
10 reyssat 411
      "require"
412
};
413
#define MODINDEX_NO (sizeof(modindex)/sizeof(modindex[0]))
414
char indbuf[MODINDEX_NO][MAX_LINELEN+1];
415
enum{i_title, i_description,
416
      i_author,i_address,i_copyright,
417
      i_version,i_wims_version,i_language,
418
      i_category,i_level,i_domain,i_keywords,
6799 bpr 419
      i_keywords_ca,i_keywords_en,i_keywords_fr,i_keywords_it,i_keywords_nl,
420
      i_title_ca,i_title_en,i_title_fr,i_title_it,i_title_nl,
10 reyssat 421
      i_require
422
};
423
 
424
char *module_special_file[]={
425
    "intro","help","about"
426
};
427
#define MODSPEC_NO (sizeof(module_special_file)/sizeof(module_special_file[0]))
428
char module_language[4];
429
 
6884 bpr 430
/*  read and treat module's INDEX file */
10 reyssat 431
int module_index(const char *name)
432
{
433
    char *p, fbuf[MAX_LINELEN+1], ibuf[MAX_LINELEN+1];
434
    FILE *indf;
435
    int i,l;
436
 
437
    snprintf(fbuf,sizeof(fbuf),"%s/%s/INDEX",moduledir,name);
438
    indf=fopen(fbuf,"r"); if(indf==NULL) return -1;
439
    l=fread(ibuf,1,MAX_LINELEN,indf); fclose(indf);
440
    if(l>0 && l<MAX_LINELEN) ibuf[l]=0; else return -1;
6884 bpr 441
/* treate all fields in *modindex */
10 reyssat 442
    for(i=0;i<MODINDEX_NO;i++) {
6884 bpr 443
    _getdef(ibuf,modindex[i],indbuf[i]);
444
/*  compatibility precaution */
445
    if(indbuf[i][0]==':') indbuf[i][0]='.';
10 reyssat 446
    }
447
    p=find_word_start(indbuf[i_language]);
448
    if(isalpha(*p) && isalpha(*(p+1))) {
6884 bpr 449
    memmove(module_language,p,2); module_language[2]=0;
10 reyssat 450
    }
3718 reyssat 451
    else ovlstrcpy(module_language,"en");
10 reyssat 452
    return 0;
453
}
454
 
455
int sheet_index(int serial)
456
{
457
    char *p1, *p2, fbuf[MAX_LINELEN+1], ibuf[MAX_LINELEN+1];
458
    FILE *indf;
459
    int i,l;
460
 
461
    snprintf(fbuf,sizeof(fbuf),"%s/%s.def",sheetdir,mod[serial].name);
462
    indf=fopen(fbuf,"r"); if(indf==NULL) return -1;
463
    l=fread(ibuf,1,MAX_LINELEN,indf); fclose(indf);
464
    if(l>0 && l<MAX_LINELEN) ibuf[l]=0; else return -1;
465
    for(i=0;i<SHEETINDEX_NO;i++) sindbuf[i][0]=0;
466
    for(i=0,p1=find_word_start(ibuf);
6884 bpr 467
    i<SHEETINDEX_NO-1 && *p1!=':' && *p1!=0;
468
    i++,p1=p2) {
469
    p2=strchr(p1,'\n');
470
    if(p2!=NULL) *p2++=0; else p2=p1+strlen(p1);
471
    p1=find_word_start(p1); strip_trailing_spaces(p1);
472
    snprintf(sindbuf[i],MAX_LINELEN,"%s",p1);
10 reyssat 473
    }
474
    p2=strstr(p1,"\n:"); if(p2==NULL) p2=p1+strlen(p1);
475
    else *p2=0;
476
    p1=find_word_start(p1); strip_trailing_spaces(p1);
477
    for(p2=p1;*p2;p2++) if(*p2=='\n') *p2=' ';
6967 bpr 478
    ovlstrcpy(sindbuf[s_information],p1);
10 reyssat 479
    return 0;
480
}
481
 
482
unsigned char categories[16];
483
char taken[MAX_LINELEN+1];
484
int catcnt, takenlen, tweight;
485
 
486
void appenditem(char *word, int lind, int serial, int weight, char *l)
487
{
488
    char nbuf[MAX_LINELEN+1], buf[MAX_LINELEN+1];
489
    int i, ll;
490
    char *p;
491
    FILE *f;
6881 bpr 492
 
10 reyssat 493
    if(!isalnum(*word) || (ll=strlen(word))<2 ||
494
       wordchr(taken,word)!=NULL ||
495
       wordchr(ignore[lind],word)!=NULL ||
496
       takenlen>=MAX_LINELEN-ll-16)
497
      return;
498
    if(ll==2 && (!isdigit(word[0]) || !isalpha(word[1]))) return;
499
    for(p=word;*p;p++) if(!isalnum(*p) && *p!=' ') return;
500
    taken[takenlen++]=' '; taken[takenlen++]=' ';
3718 reyssat 501
    ovlstrcpy(taken+takenlen,word);
10 reyssat 502
    takenlen+=ll; tweight+=weight;
503
    snprintf(buf,sizeof(buf),"%s:%d?%d\n",word,serial,weight);
504
    for(i=0;i<catcnt;i++) {
6884 bpr 505
    snprintf(nbuf,sizeof(nbuf),"%s/%c.%s",
506
         outdir,categories[i],lang[lind]);
507
    f=fopen(nbuf,"a");
508
    if(f!=NULL) {fputs(buf,f); fclose(f);}
10 reyssat 509
    }
510
}
511
 
6881 bpr 512
void appenditem1 (char *buf, int lind, int serial, int weight, char *l )
513
{
514
  char *p1, *p2 ;
515
  for(p1=find_word_start(buf); *p1;
6884 bpr 516
    p1=find_word_start(p2)) {
517
    p2=strchr(p1,',');
518
    if(p2!=NULL) *p2++=0; else p2=p1+strlen(p1);
519
    if(strlen(p1)<=0) continue;
520
    appenditem(p1,lind,serial,weight,module_language);
6881 bpr 521
  }
522
}
523
void appenditem2 (char *buf, int lind, int serial, int weight, char *l )
524
{
525
  char *p1, *p2 ;
526
  for(p1=find_word_start(buf);*p1;
6884 bpr 527
    p1=find_word_start(p2)) {
528
    p2=find_word_end(p1); if(*p2) *p2++=0;
529
    appenditem(p1,lind,serial,weight,module_language);
6881 bpr 530
  }
531
}
10 reyssat 532
void onemodule(const char *name, int serial, int lind)
533
{
534
    int i;
535
    unsigned char trlist[]={
6884 bpr 536
    i_title,i_description,i_category,i_domain,i_keywords,
537
      i_require,i_author,
538
      i_keywords_ca,i_keywords_en,i_keywords_fr,i_keywords_it,i_keywords_nl,
539
      i_title_ca,i_title_en,i_title_fr,i_title_it,i_title_nl
10 reyssat 540
    };
541
    #define trcnt (sizeof(trlist)/sizeof(trlist[0]))
6564 bpr 542
    char *p1, *p2, *pp, *q, buf[MAX_LINELEN+1], lbuf[16];
10 reyssat 543
    FILE *f;
6881 bpr 544
 
10 reyssat 545
    if(module_index(name)) return;
546
    towords(indbuf[i_category]);
7915 bpr 547
/*   list the categories (among A=all,X=eXercise,O,D,...) corresponding
6884 bpr 548
 *   to this module
549
 */
10 reyssat 550
    for(i=catcnt=0;i<catno && catcnt<16;i++) {
6884 bpr 551
    if(wordchr(indbuf[i_category],cat[i].name)!=NULL)
552
      categories[catcnt++]=cat[i].typ;
10 reyssat 553
    }
554
    if(catcnt==0) return;
555
    if(categories[0]!=cat[0].typ)
556
      categories[catcnt++]=cat[0].typ;
6884 bpr 557
/*  write module's name in the category.language files, for instance lists/X.fr
558
 * for french exercises
559
 */
10 reyssat 560
    for(i=0;i<catcnt;i++) {
6884 bpr 561
    snprintf(buf,sizeof(buf),"%s/lists/%c.%s",
562
         outdir,categories[i],lang[lind]);
563
    f=fopen(buf,"a");
564
    if(f!=NULL) {fprintf(f,"%s\n",name); fclose(f);}
10 reyssat 565
    }
6884 bpr 566
/*   add serial number and language (resp.title, ...) to corresponding file  */
10 reyssat 567
    fprintf(langf,"%d:%s\n",serial,module_language);
568
    fprintf(titf,"%d:%s\n",serial,indbuf[i_title]);
569
    fprintf(descf,"%d:%s\n",serial,indbuf[i_description]);
570
    fprintf(authorf,"%d:%s\n",serial,indbuf[i_author]);
571
    fprintf(versionf,"%d:%s\n",serial,indbuf[i_version]);
6881 bpr 572
 
6884 bpr 573
/*   add module's information in html page for robots  */
10 reyssat 574
    snprintf(buf,sizeof(buf),"%s",indbuf[i_description]);
575
    for(pp=strchr(buf,','); pp; pp=strchr(pp,','))
576
      string_modify(buf,pp,pp+1,"&#44;");
577
    if(strcmp(module_language,lang[lind])==0)
578
      fprintf(robotf,"%s ,%s,%s,%s,%s\n",name,module_language,name,
6884 bpr 579
          indbuf[i_title], buf);
6819 reyssat 580
 
6884 bpr 581
/*   Normalize the information of trlist, using dictionary
7915 bpr 582
 *  -- bases/sys/domain.xx without suffix translation (--> english version)
6884 bpr 583
 */
6881 bpr 584
    entrycount=dentrycount; dicbuf=ddicbuf;
585
    memmove(entry,dentry,dentrycount*sizeof(entry[0]));
586
    unknown_type=unk_leave;
10 reyssat 587
    for(i=0;i<trcnt;i++) {
6884 bpr 588
    detag(indbuf[trlist[i]]);
589
    deaccent(indbuf[trlist[i]]);
590
    comma(indbuf[trlist[i]]);
591
    singlespace(indbuf[trlist[i]]);
592
    translate(indbuf[trlist[i]]);
6881 bpr 593
    }
6884 bpr 594
/*   Normalize the information, using dictionary
7915 bpr 595
 *   bases/sys/words.xx with suffix translation
6884 bpr 596
 */
6881 bpr 597
    entrycount=mentrycount; dicbuf=mdicbuf;
598
    memmove(entry,mentry,mentrycount*sizeof(entry[0]));
6884 bpr 599
    unknown_type=unk_leave;/*  used in translator_.c */
6881 bpr 600
    for(i=0;i<trcnt;i++) {
6884 bpr 601
    suffix_translate(indbuf[trlist[i]]);
602
    translate(indbuf[trlist[i]]);
10 reyssat 603
    }
6881 bpr 604
 
605
/* taken contains all words already seen in the module index */
10 reyssat 606
    taken[0]=0; takenlen=tweight=0;
6881 bpr 607
/*  append words of title  */
3718 reyssat 608
    ovlstrcpy(buf,indbuf[i_title]); towords(buf);
6881 bpr 609
    appenditem2(buf,lind,serial,4,module_language);
610
 
6884 bpr 611
/*  extract words of every other information except level */
6799 bpr 612
    snprintf(buf,sizeof(buf),"%s %s %s %s %s %s %s %s %s %s %s %s %s %s %s",
6884 bpr 613
         indbuf[i_description],indbuf[i_keywords],
614
         indbuf[i_keywords_ca],indbuf[i_keywords_en],indbuf[i_keywords_fr],
615
         indbuf[i_keywords_it],indbuf[i_keywords_nl],
616
         indbuf[i_title_ca],indbuf[i_title_en],indbuf[i_title_fr],
617
         indbuf[i_title_it],indbuf[i_title_nl],
618
         indbuf[i_domain],indbuf[i_require],indbuf[i_author]);
10 reyssat 619
    towords(buf);
6884 bpr 620
    appenditem2(buf,lind,serial,2,module_language);
6881 bpr 621
 
6884 bpr 622
/*   this time the dictionary is the group dictionary  sys/wgrp/wgrp
623
 *   with a g (groupdic), not an m (maindic) . see below main, suffix, group.
7915 bpr 624
 *   and delete unknown ?? and translate
6884 bpr 625
 */
10 reyssat 626
    entrycount=gentrycount; dicbuf=gdicbuf;
627
    memmove(entry,gentry,gentrycount*sizeof(entry[0]));
6881 bpr 628
 
6884 bpr 629
/*  append words of every title information  */
6881 bpr 630
    ovlstrcpy(buf,indbuf[i_title]);
10 reyssat 631
    unknown_type=unk_delete;
6881 bpr 632
    translate(buf);
633
    appenditem1(buf,lind,serial,2,module_language);
634
 
6884 bpr 635
/*  append words of information of description except level  */
6881 bpr 636
    snprintf(buf,sizeof(buf),"%s", indbuf[i_description]);
637
    unknown_type=unk_delete;
638
    translate(buf);
639
    appenditem1(buf,lind,serial,4,module_language);
640
 
6884 bpr 641
/*  append words (or group of words) of keywords and domain  */
6881 bpr 642
    snprintf(buf,sizeof(buf),"%s, %s, %s, %s, %s, %s, %s",
6884 bpr 643
         indbuf[i_domain],indbuf[i_keywords],
644
         indbuf[i_keywords_ca], indbuf[i_keywords_en],indbuf[i_keywords_fr],
645
         indbuf[i_keywords_it], indbuf[i_keywords_nl]);
646
    unknown_type=unk_leave;
10 reyssat 647
    translate(buf);
6881 bpr 648
    appenditem1(buf,lind,serial,2,module_language);
649
 
6884 bpr 650
/*   append level information, with weight 2 */
10 reyssat 651
    snprintf(buf,sizeof(buf),"%s",indbuf[i_level]);
3718 reyssat 652
    ovlstrcpy(lbuf,"level");
10 reyssat 653
    for(p1=buf; *p1; p1++) if(!isalnum(*p1)) *p1=' ';
6564 bpr 654
    q=buf+strlen(buf);
655
    for(p1=find_word_start(buf); (*p1) && (p1 < q) ;
6884 bpr 656
    p1=find_word_start(p2)) {
657
    p2=find_word_end(p1);
658
    if(p2!=NULL) *p2++=0; else p2=p1+strlen(p1);
659
    if(!isalpha(*p1) ||
660
       (!isdigit(*(p1+1)) && *(p1+1)!=0) ||
661
       (*(p1+1)!=0 && *(p1+2)!=0))
662
      continue;
663
    *p1=tolower(*p1);
664
    ovlstrcpy(lbuf+strlen("level"),p1);
665
    appenditem(lbuf,lind,serial,2,module_language);
10 reyssat 666
    }
6884 bpr 667
/*   append total weight of module to weight file site2/weight.xx  */
10 reyssat 668
    fprintf(weightf,"%d:%d\n",serial,tweight);
669
}
670
 
671
void modules(void)
672
{
673
    int i,j,k,d;
674
    char namebuf[MAX_LINELEN+1];
6881 bpr 675
    char mdic[MAX_LINELEN+1], sdic[MAX_LINELEN+1], gdic[MAX_LINELEN+1], ddic[MAX_LINELEN+1];
10 reyssat 676
 
677
    for(j=0;j<langcnt;j++) {
6884 bpr 678
    snprintf(namebuf,sizeof(namebuf),"%s/weight.%s",outdir,lang[j]);
679
    weightf=fopen(namebuf,"w");
680
    snprintf(mdic,sizeof(mdic),"%s/%s.%s",dicdir,maindic,lang[j]);
681
    snprintf(sdic,sizeof(sdic),"%s/%s.%s",dicdir,suffixdic,lang[j]);
682
    snprintf(gdic,sizeof(gdic),"%s/%s.%s",dicdir,groupdic,lang[j]);
683
    snprintf(ddic,sizeof(ddic),"%s/%s.%s",dicdir,domaindic,lang[j]);
684
    suffix_dic(sdic); prepare_dic(gdic);
685
    gdicbuf=dicbuf; gentrycount=entrycount;
686
    memmove(gentry,entry,gentrycount*sizeof(entry[0]));
687
    prepare_dic(mdic);
688
    mdicbuf=dicbuf; mentrycount=entrycount;
689
    memmove(mentry,entry,mentrycount*sizeof(entry[0]));
690
    prepare_dic(ddic);
691
    ddicbuf=dicbuf; dentrycount=entrycount;
692
    memmove(dentry,entry,dentrycount*sizeof(entry[0]));
693
    unknown_type=unk_leave; translate(ignore[j]);
694
    for(i=0;i<modcnt;i++) {
695
        if(mod[i].langcnt>0) {
696
        for(d=k=0;k<mod[i].langcnt;k++)
697
          if(mod[i].langs[k]<mod[i].langs[d]) d=k;
698
        for(k=0;k<mod[i].langcnt && mod[i].langs[k]!=j;k++);
699
        if(k>=mod[i].langcnt) k=d;
700
        snprintf(namebuf,MAX_LINELEN,"%s.%s",mod[i].name,
701
             lang[mod[i].langs[k]]);
702
        onemodule(namebuf,mod[i].counts[k],j);
703
        }
704
        else {
705
        onemodule(mod[i].name,mod[i].counts[0],j);
706
        }
10 reyssat 707
    }
6884 bpr 708
    if(mentrycount>0) free(mdicbuf);
709
    if(gentrycount>0) free(gdicbuf);
710
    if(suffixcnt>0) free(sufbuf);
711
    if(dentrycount>0) free(ddicbuf);
712
    if(weightf) fclose(weightf);
713
    }
10 reyssat 714
}
715
 
6881 bpr 716
/* FIXME ? differences with appenditem - use fprintf instead of  snprintf */
10 reyssat 717
void sappenditem(char *word, int lind, int serial, int weight)
718
{
719
    int ll;
720
    char *p;
6881 bpr 721
 
10 reyssat 722
    if(!isalnum(*word) || (ll=strlen(word))<2 ||
723
       wordchr(taken,word)!=NULL ||
724
       wordchr(ignore[lind],word)!=NULL ||
725
       takenlen>=MAX_LINELEN-ll-16)
726
      return;
727
    if(ll==2 && (!isdigit(word[0]) || !isalpha(word[1]))) return;
728
    for(p=word;*p;p++) if(!isalnum(*p) && *p!=' ') return;
729
    taken[takenlen++]=' ';taken[takenlen++]=' ';
3718 reyssat 730
    ovlstrcpy(taken+takenlen,word);
10 reyssat 731
    takenlen+=ll; tweight+=weight;
732
    fprintf(indf,"%s:%d?%d\n",word,serial,weight);
733
}
734
 
735
void onesheet(int serial, int lind)
736
{
737
    int i;
738
    unsigned char trlist[]={
6967 bpr 739
    s_title,s_description,s_domain,s_keywords,s_information
10 reyssat 740
    };
741
    #define trcnt (sizeof(trlist)/sizeof(trlist[0]))
742
    char *p1, *p2, buf[MAX_LINELEN+1];
6881 bpr 743
 
10 reyssat 744
    if(sheet_index(serial)) return;
745
    fprintf(listf,"%s\n",mod[serial].name+3);
746
    fprintf(titf,"%d:%s\n",serial,sindbuf[s_title]);
747
    fprintf(descf,"%d:%s\n",serial,sindbuf[s_description]);
6967 bpr 748
    fprintf(remf,"%d:%s\n",serial,sindbuf[s_information]);
7915 bpr 749
 
6881 bpr 750
    entrycount=dentrycount; dicbuf=ddicbuf;
751
    memmove(entry,dentry,dentrycount*sizeof(entry[0]));
10 reyssat 752
    unknown_type=unk_leave;
753
    for(i=0;i<trcnt;i++) {
6884 bpr 754
    detag(sindbuf[trlist[i]]);
755
    deaccent(sindbuf[trlist[i]]);
756
    comma(sindbuf[trlist[i]]);
757
    singlespace(sindbuf[trlist[i]]);
758
    translate(sindbuf[trlist[i]]);
6881 bpr 759
    }
7915 bpr 760
 
6881 bpr 761
    entrycount=mentrycount; dicbuf=mdicbuf;
762
    memmove(entry,mentry,mentrycount*sizeof(entry[0]));
763
    unknown_type=unk_leave;
764
    for(i=0;i<trcnt;i++) {
6884 bpr 765
    suffix_translate(sindbuf[trlist[i]]);
766
    translate(sindbuf[trlist[i]]);
10 reyssat 767
    }
768
    taken[0]=0; takenlen=tweight=0;
3718 reyssat 769
    ovlstrcpy(buf,sindbuf[s_title]); towords(buf);
10 reyssat 770
    for(p1=find_word_start(buf);*p1;
6884 bpr 771
    p1=find_word_start(p2)) {
772
    p2=find_word_end(p1); if(*p2) *p2++=0;
773
    sappenditem(p1,lind,serial,4);
10 reyssat 774
    }
775
    snprintf(buf,sizeof(buf),"%s %s %s %s",
6884 bpr 776
         sindbuf[s_description],sindbuf[s_keywords],
6967 bpr 777
         sindbuf[s_domain],sindbuf[s_information]);
10 reyssat 778
    towords(buf);
779
    for(p1=find_word_start(buf);*p1;
6884 bpr 780
    p1=find_word_start(p2)) {
781
    p2=find_word_end(p1); if(*p2) *p2++=0;
782
    sappenditem(p1,lind,serial,2);
10 reyssat 783
    }
784
    entrycount=gentrycount; dicbuf=gdicbuf;
785
    memmove(entry,gentry,gentrycount*sizeof(entry[0]));
786
    unknown_type=unk_delete;
3718 reyssat 787
    ovlstrcpy(buf,sindbuf[s_title]); translate(buf);
10 reyssat 788
    for(p1=find_word_start(buf); *p1;
6884 bpr 789
    p1=find_word_start(p2)) {
790
    p2=strchr(p1,',');
791
    if(p2!=NULL) *p2++=0; else p2=p1+strlen(p1);
792
    if(strlen(p1)<=0) continue;
793
    sappenditem(p1,lind,serial,4);
10 reyssat 794
    }
795
    snprintf(buf,sizeof(buf),"%s, %s, %s, %s",
6884 bpr 796
         sindbuf[s_description],sindbuf[s_keywords],
6967 bpr 797
         sindbuf[s_domain],sindbuf[s_information]);
10 reyssat 798
    translate(buf);
799
    for(p1=find_word_start(buf); *p1;
6884 bpr 800
    p1=find_word_start(p2)) {
801
    p2=strchr(p1,',');
802
    if(p2!=NULL) *p2++=0; else p2=p1+strlen(p1);
803
    if(strlen(p1)<=0) continue;
804
    sappenditem(p1,lind,serial,2);
10 reyssat 805
    }
806
    fprintf(weightf,"%d:%d\n",serial,tweight);
807
}
808
 
809
void sheets(void)
810
{
811
    int i,j;
6961 bpr 812
    char mdic[MAX_LINELEN+1], sdic[MAX_LINELEN+1], gdic[MAX_LINELEN+1], ddic[MAX_LINELEN+1];
10 reyssat 813
    char buf[MAX_LINELEN+1];
7915 bpr 814
 
10 reyssat 815
    for(j=0;j<langcnt;j++) {
6884 bpr 816
    snprintf(buf,sizeof(buf),"%s/index/title.%s",sheetdir,lang[j]);
817
    titf=fopen(buf,"w");
818
    snprintf(buf,sizeof(buf),"%s/index/description.%s",sheetdir,lang[j]);
819
    descf=fopen(buf,"w");
820
    snprintf(buf,sizeof(buf),"%s/index/%s",sheetdir,lang[j]);
821
    indf=fopen(buf,"w");
822
    snprintf(buf,sizeof(buf),"%s/index/list.%s",sheetdir,lang[j]);
823
    listf=fopen(buf,"w");
824
    snprintf(buf,sizeof(buf),"%s/index/weight.%s",sheetdir,lang[j]);
825
    weightf=fopen(buf,"w");
826
    snprintf(buf,sizeof(buf),"%s/index/addr.%s",sheetdir,lang[j]);
827
    addrf=fopen(buf,"w");
6967 bpr 828
    snprintf(buf,sizeof(buf),"%s/index/information.%s",sheetdir,lang[j]);
6961 bpr 829
    remf=fopen(buf,"w");
6884 bpr 830
    snprintf(buf,sizeof(buf),"%s/index/serial.%s",sheetdir,lang[j]);
831
    serialf=fopen(buf,"w");
832
    snprintf(mdic,sizeof(mdic),"%s/%s.%s",dicdir,maindic,lang[j]);
833
    snprintf(sdic,sizeof(sdic),"%s/%s.%s",dicdir,suffixdic,lang[j]);
834
    snprintf(gdic,sizeof(gdic),"%s/%s.%s",dicdir,groupdic,lang[j]);
6961 bpr 835
    snprintf(ddic,sizeof(ddic),"%s/%s.%s",dicdir,domaindic,lang[j]);
6884 bpr 836
    suffix_dic(sdic); prepare_dic(gdic);
837
    gdicbuf=dicbuf; gentrycount=entrycount;
838
    memmove(gentry,entry,gentrycount*sizeof(entry[0]));
839
    prepare_dic(mdic);
840
    mdicbuf=dicbuf; mentrycount=entrycount;
841
    memmove(mentry,entry,mentrycount*sizeof(entry[0]));
6973 bpr 842
    prepare_dic(ddic);
843
    ddicbuf=dicbuf; dentrycount=entrycount;
844
    memmove(dentry,entry,dentrycount*sizeof(entry[0]));
6884 bpr 845
    unknown_type=unk_leave; translate(ignore[j]);
846
    for(i=0;i<modcnt;i++) {
847
        if(mod[i].langs[0]!=j) continue;
848
        fprintf(addrf,"%d:%s\n",i,mod[i].name+3);
849
        fprintf(serialf,"%s:%d\n",mod[i].name+3,i);
850
        onesheet(i,j);
10 reyssat 851
    }
6884 bpr 852
    if(mentrycount>0) free(mdicbuf);
853
    if(gentrycount>0) free(gdicbuf);
854
    if(suffixcnt>0) free(sufbuf);
6961 bpr 855
    if(dentrycount>0) free(ddicbuf);
6884 bpr 856
    fclose(titf); fclose(descf); fclose(indf); fclose(listf);
857
    fclose(weightf); fclose(addrf); fclose(serialf);
858
    }
10 reyssat 859
}
860
 
861
int main()
862
{
863
    prep();
864
    if(modcnt>0) modules();
865
    clean();
866
    sprep();
867
    if(modcnt>0) sheets();
868
    return 0;
869
}
870