Subversion Repositories wimsdev

Rev

Rev 7915 | Rev 8123 | Go to most recent revision | Details | Compare with Previous | Last modification | View Log | RSS feed

Rev Author Line No. Line
10 reyssat 1
/*    Copyright (C) 1998-2003 XIAO, Gang of Universite de Nice - Sophia Antipolis
2
 *
3
 *  This program is free software; you can redistribute it and/or modify
4
 *  it under the terms of the GNU General Public License as published by
5
 *  the Free Software Foundation; either version 2 of the License, or
6
 *  (at your option) any later version.
7
 *
8
 *  This program is distributed in the hope that it will be useful,
9
 *  but WITHOUT ANY WARRANTY; without even the implied warranty of
10
 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
11
 *  GNU General Public License for more details.
12
 *
13
 *  You should have received a copy of the GNU General Public License
14
 *  along with this program; if not, write to the Free Software
15
 *  Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
16
 */
17
 
6884 bpr 18
/*  This is an internal program,
7915 bpr 19
 * used to index modules for search engine.
6884 bpr 20
 */
10 reyssat 21
 
22
#include "../wims.h"
8100 bpr 23
#include "../Lib/libwims.h"
10 reyssat 24
 
6884 bpr 25
#define MAX_LANGS    MAX_LANGUAGES
26
#define MAX_MODULES    65536
27
char *moduledir=    "public_html/modules";
28
char *sheetdir=     "public_html/bases/sheet";
29
char *dicdir=       "public_html/bases";
30
char *outdir=       "public_html/bases/site2";
31
char *maindic=      "sys/words";
32
char *groupdic=     "sys/wgrp/wgrp";
33
char *suffixdic=    "sys/suffix";
34
char *domaindic=    "sys/domaindic";
35
char *ignoredic=    "sys/indignore";
36
char *conffile=     "log/wims.conf";
37
char *mlistbase=    "list";
10 reyssat 38
 
39
char lang[MAX_LANGS][4]={
1792 bpr 40
    "en","fr","cn","es","it","nl","si","ca","pt"
10 reyssat 41
};
6884 bpr 42
#define DEFAULT_LANGCNT    6
10 reyssat 43
char allang[MAX_LANGS][4]={
6564 bpr 44
    "en","fr","cn","es","it","nl","de","si","ca","pt"
10 reyssat 45
};
46
#define allangcnt 8
47
char ignore[MAX_LANGS][MAX_LINELEN+1];
48
char mlistfile[MAX_LANGS][256];
49
int langcnt;
6961 bpr 50
FILE *langf, *titf, *descf, *weightf, *robotf, *indf, *listf, *addrf, *serialf, *authorf, *versionf, *remf;
10 reyssat 51
 
52
struct cat {
53
    char *name;
54
    char typ;
55
} cat[]={
6884 bpr 56
    {"all_types", 'A'},
57
    {"exercise",  'X'},
58
    {"oef",       'O'},
59
    {"tool",      'T'},
60
    {"recreation",'R'},
61
    {"reference", 'Y'},
62
    {"document",  'D'},
63
    {"popup",     'P'},
64
    {"datamodule",'M'}
10 reyssat 65
};
66
#define catno (sizeof(cat)/sizeof(cat[0]))
67
 
68
struct mod {
69
    char *name;
70
    unsigned char langs[MAX_LANGS];
71
    int counts[MAX_LANGS];
72
    int  langcnt;
73
} mod[MAX_MODULES];
74
int modcnt;
75
 
76
char *mlist;
77
 
8100 bpr 78
/*
10 reyssat 79
void *xmalloc(size_t n)
80
{
81
    void *p;
82
    p=malloc(n);
83
    if(p==NULL) {
6884 bpr 84
    printf("Malloc failure.\n");
85
    exit(1);
10 reyssat 86
    }
87
    return p;
88
}
8100 bpr 89
*/
10 reyssat 90
 
8100 bpr 91
/*
92
char *acctab="çéèêëúùûüáàâäãóòôöõíìïîñýÿÇÉÈÊËÚÙÛÜÁÀÂÃÄÓÒÔÖÕÍÌÏÎÑÝ",
93
     *deatab="ceeeeuuuuaaaaaoooooiiiinyyCEEEEUUUUAAAAAOOOOOIIIINY";
94
*/
6884 bpr 95
/*  fold known accented letters to unaccented, other strange characters to space
7915 bpr 96
 *  apostrophe is among the exceptions to be kept (important for multi-word expressions)
6884 bpr 97
 */
8100 bpr 98
void deaccent2(char *p)
10 reyssat 99
{
3247 bpr 100
    char *sp;
10 reyssat 101
    char *v;
102
    for(sp=p;*sp;sp++) {
6884 bpr 103
    if(*sp<0 && (v=strchr(acctab,*sp))!=NULL)
104
      *sp=*(deatab+(v-acctab));
105
    if(!isalnum(*sp) && strchr(",.&$+*",*sp)==0) *sp=' ';
106
    else *sp=tolower(*sp);
10 reyssat 107
    }
108
}
109
 
6884 bpr 110
/*  translate everything non-alphanumeric into space */
10 reyssat 111
void towords(char *p)
112
{
113
    char *pp;
114
    for(pp=p;*pp;pp++) if(!isalnum(*pp) && strchr("&$+*",*pp)==0) *pp=' ';
115
}
116
 
6884 bpr 117
/*  Points to the end of the word */
8100 bpr 118
/*
10 reyssat 119
char *find_word_end(char *p)
120
{
121
    int i;
122
    for(i=0;!isspace(*p) && *p!=0 && i<MAX_LINELEN; p++,i++);
123
    return p;
124
}
8100 bpr 125
*/
6884 bpr 126
/*  Strips leading spaces */
8100 bpr 127
/*
10 reyssat 128
char *find_word_start(char *p)
129
{
130
    int i;
131
    for(i=0; isspace(*p) && i<MAX_LINELEN; p++,i++);
132
    return p;
133
}
8100 bpr 134
*/
6884 bpr 135
/*  Find first occurrence of word */
8100 bpr 136
char *wordchr2(char *p, char *w)
10 reyssat 137
{
138
    char *r;
139
 
6881 bpr 140
    for(r=strstr(p,w);r!=NULL &&
6884 bpr 141
    ( (r>p && !isspace(*(r-1))) || (!isspace(*(r+strlen(w))) && *(r+strlen(w))!=0) );
142
    r=strstr(r+1,w));
10 reyssat 143
    return r;
144
}
145
 
6884 bpr 146
/*  find a variable in a string (math expression).
147
 * Returns the pointer or NULL.
148
 */
8100 bpr 149
/*char *varchr(char *p, char *v)
10 reyssat 150
{
151
    char *pp; int n=strlen(v);
152
    for(pp=strstr(p,v); pp!=NULL; pp=strstr(pp+1,v)) {
6884 bpr 153
    if((pp==p || !isalnum(*(pp-1))) &&
154
       (!isalnum(*(pp+n)) || *(pp+n)==0)) break;
10 reyssat 155
    }
156
    return pp;
157
}
8100 bpr 158
*/
6884 bpr 159
/*  strip trailing spaces; return string end. */
8100 bpr 160
char *strip_trailing_spaces2(char *p)
10 reyssat 161
{
162
    char *pp;
163
    if(*p==0) return p;
164
    for(pp=p+strlen(p)-1; pp>=p && isspace(*pp); *(pp--)=0);
165
    return pp;
166
}
167
 
168
char *find_tag_end(char *p)
169
{
170
    char *pp;
171
    pp=p; if(*pp=='<') pp++;
172
    for(; *pp && *pp!='>'; pp++) {
6884 bpr 173
    if(*pp=='<') {
174
        pp=find_tag_end(pp)-1; continue;
10 reyssat 175
    }
6884 bpr 176
    if(*pp=='"') {
177
        pp=strchr(pp+1,'"');
178
        if(pp==NULL) return p+strlen(p); else continue;
179
    }
180
    if(*pp=='\'') {
181
        pp=strchr(pp+1,'\'');
182
        if(pp==NULL) return p+strlen(p); else continue;
183
    }
184
    }
10 reyssat 185
    if(*pp=='>') pp++; return pp;
186
}
187
 
188
char *find_tag(char *p, char *tag)
189
{
190
    char *pp;
191
    int len;
192
    len=strlen(tag);
193
    for(pp=strchr(p,'<'); pp!=NULL && *pp; pp=strchr(pp+1,'<')) {
6884 bpr 194
    if(strncasecmp(pp+1,tag,len)==0 && !isalnum(*(pp+1+len))) return pp;
10 reyssat 195
    }
196
    return p+strlen(p);
197
}
198
 
6884 bpr 199
/*  remove all html tags */
10 reyssat 200
void detag(char *p)
201
{
202
    char *pp, *p2;
203
    for(pp=strchr(p,'<'); pp!=NULL; pp=strchr(pp,'<')) {
6884 bpr 204
    p2=find_tag_end(pp);
205
    if(*p2==0) {*pp=0; return; }
206
    ovlstrcpy(pp,p2);
10 reyssat 207
    }
208
}
209
 
6884 bpr 210
/*  modify a string. Bufferlen must be at least MAX_LINELEN */
8100 bpr 211
void string_modify3(char *start, char *bad_beg, char *bad_end, char *good,...)
10 reyssat 212
{
213
    char buf[MAX_LINELEN+1];
214
    va_list vp;
6881 bpr 215
 
10 reyssat 216
    va_start(vp,good);
217
    vsnprintf(buf,sizeof(buf),good,vp); va_end(vp);
218
    if(strlen(start)-(bad_end-bad_beg)+strlen(buf)>=MAX_LINELEN)
8100 bpr 219
      return; /* this is an error situation. */
10 reyssat 220
    strcat(buf,bad_end);
3718 reyssat 221
    ovlstrcpy(bad_beg,buf);
10 reyssat 222
}
223
 
6819 reyssat 224
/* add a space after comma to see end of words */
225
 
226
void comma(char *p)
227
{
228
    char *pp;
229
    for(pp=strchr(p,','); pp; pp=strchr(pp+1,','))
8100 bpr 230
      string_modify3(p,pp,pp+1,", ");
6819 reyssat 231
}
232
 
10 reyssat 233
void _getdef(char buf[], char *name, char value[])
234
{
235
    char *p1, *p2, *p3;
236
 
237
    value[0]=0;
238
    for(p1=strstr(buf,name); p1!=NULL; p1=strstr(p1+1,name)) {
6884 bpr 239
    p2=find_word_start(p1+strlen(name));
240
    if((p1>buf && !isspace(*(p1-1))) || *p2!='=') continue;
241
    p3=p1; while(p3>buf && isspace(*(p3-1)) && *(p3-1)!='\n') p3--;
242
    if(p3>buf && *(p3-1)!='\n') continue;
243
    p3=strchr(p2,'\n');
244
    p2=find_word_start(p2+1);
245
    if(p3 <= p2) continue;
246
    snprintf(value,MAX_LINELEN,"%s",p2);
247
    if(p3!=NULL && p3-p2<MAX_LINELEN) value[p3-p2]=0;
8100 bpr 248
    strip_trailing_spaces2(value);
6884 bpr 249
    break;
10 reyssat 250
    }
251
}
252
 
6884 bpr 253
/*  Get variable definition from a file.
254
 * Result stored in buffer value of length MAX_LINELEN.
255
 */
10 reyssat 256
void getdef(char *fname, char *name, char value[])
257
{
258
    FILE *f;
259
    char *buf;
260
    int l;
6881 bpr 261
 
10 reyssat 262
    value[0]=0;
263
    f=fopen(fname,"r"); if(f==NULL) return;
264
    fseek(f,0,SEEK_END); l=ftell(f); fseek(f,0,SEEK_SET);
265
    buf=xmalloc(l+256); l=fread(buf,1,l,f);
266
    fclose(f);
267
    if(l<=0) return; else buf[l]=0;
268
    _getdef(buf,name,value);
269
    free(buf);
270
}
271
 
272
#include "translator_.c"
273
 
6881 bpr 274
char *mdicbuf, *gdicbuf, *ddicbuf;
275
char gentry[sizeof(entry)], mentry[sizeof(entry)], dentry[sizeof(entry)];
276
int gentrycount, mentrycount, dentrycount;
10 reyssat 277
 
6884 bpr 278
/*  Preparation of data */
10 reyssat 279
void prep(void)
280
{
281
    char buf[MAX_LINELEN+1];
282
    char *p1,*p2,*s,*old;
283
    int i,l,thislang,t;
284
    FILE *f;
6881 bpr 285
 
10 reyssat 286
    s=getenv("modind_outdir"); if(s!=NULL && *s!=0) outdir=s;
287
    s=getenv("modind_sheetdir"); if(s!=NULL && *s!=0) sheetdir=s;
288
    snprintf(buf,sizeof(buf),"%s/addr",outdir);
289
    addrf=fopen(buf,"w");
290
    snprintf(buf,sizeof(buf),"%s/serial",outdir);
291
    serialf=fopen(buf,"w");
292
    modcnt=langcnt=0;
6884 bpr 293
/* take the langs declared in conffile */
10 reyssat 294
    getdef(conffile,"site_languages",buf);
295
    for(p1=buf;*p1;p1++) if(!isalnum(*p1)) *p1=' ';
296
    for(p1=find_word_start(buf); *p1 && langcnt<MAX_LANGS; p1=find_word_start(p2)) {
6884 bpr 297
    p2=find_word_end(p1);
298
    if(p2!=p1+2 || !isalpha(*p1) || !isalpha(*(p1+1))) continue;
299
    memmove(lang[langcnt],p1,2); lang[langcnt++][2]=0;
10 reyssat 300
    }
6884 bpr 301
    if(langcnt==0) {/*  default languages */
302
    langcnt=DEFAULT_LANGCNT;
10 reyssat 303
    }
304
    s=getenv("mlist"); if(s==NULL) exit(1);
305
    l=strlen(s); if(l<0 || l>100*MAX_LINELEN) exit(1);
3718 reyssat 306
    mlist=xmalloc(l+16); ovlstrcpy(mlist,s); old="";
10 reyssat 307
    for(i=0;i<langcnt;i++) {
6884 bpr 308
    snprintf(buf,sizeof(buf),"%s/%s.%s",dicdir,ignoredic,lang[i]);
309
    f=fopen(buf,"r"); if(f==NULL) continue;
310
    l=fread(ignore[i],1,MAX_LINELEN,f);fclose(f);
311
    if(l<0 || l>=MAX_LINELEN) l=0;
312
    ignore[i][l]=0;
10 reyssat 313
    }
314
    for(t=0, p1=find_word_start(mlist);
6884 bpr 315
    *p1 && modcnt<MAX_MODULES;
316
    p1=find_word_start(p2), t++) {
317
    p2=find_word_end(p1);
318
    l=p2-p1; if(*p2) *p2++=0;
319
    fprintf(addrf,"%d:%s\n",t,p1);
320
    fprintf(serialf,"%s:%d\n",p1,t);
321
    thislang=-1;
6564 bpr 322
/* language is taken from the address */
6884 bpr 323
    if(l>3 && p1[l-3]=='.') {
324
        for(i=0;i<langcnt;i++) if(strcasecmp(lang[i],p1+l-2)==0) break;
325
        if(i<langcnt) {p1[l-3]=0; thislang=i;}
326
        else {/*  unknown language, not referenced */
327
        continue;
328
        }
10 reyssat 329
    }
6884 bpr 330
    if(modcnt>0 && strcmp(old,p1)==0 && thislang>=0) {
331
        if(mod[modcnt-1].langcnt<langcnt) {
332
        mod[modcnt-1].langs[mod[modcnt-1].langcnt]=thislang;
333
        mod[modcnt-1].counts[mod[modcnt-1].langcnt]=t;
334
        (mod[modcnt-1].langcnt)++;
335
        }
336
    }
337
    else {
338
        mod[modcnt].name=old=p1;
339
        if(thislang>=0) {
340
        mod[modcnt].langs[0]=thislang;
341
        mod[modcnt].langcnt=1;
342
        }
343
        else mod[modcnt].langcnt=0;
344
        mod[modcnt].counts[0]=t;
345
        modcnt++;
346
    }
347
    }
10 reyssat 348
    snprintf(buf,sizeof(buf),"%s/language",outdir);
349
    langf=fopen(buf,"w");
350
    snprintf(buf,sizeof(buf),"%s/title",outdir);
351
    titf=fopen(buf,"w");
352
    snprintf(buf,sizeof(buf),"%s/description",outdir);
353
    descf=fopen(buf,"w");
354
    snprintf(buf,sizeof(buf),"%s/author",outdir);
355
    authorf=fopen(buf,"w");
356
    snprintf(buf,sizeof(buf),"%s/version",outdir);
357
    versionf=fopen(buf,"w");
358
    snprintf(buf,sizeof(buf),"%s/lists/robot.phtml",outdir);
359
    robotf=fopen(buf,"w");
360
    fclose(addrf); fclose(serialf);
361
    if(!robotf || !versionf || !authorf || !descf || !titf || !descf) {
6884 bpr 362
    fprintf(stderr,"modind: error creating output files.\n");
363
    exit(1);
10 reyssat 364
    }
365
}
366
 
367
void sprep(void)
368
{
369
    char *p1,*p2,*s;
370
    int i,l,thislang;
6881 bpr 371
 
10 reyssat 372
    modcnt=0;
373
    s=getenv("slist"); if(s==NULL) return;
374
    l=strlen(s); if(l<0 || l>100*MAX_LINELEN) return;
3718 reyssat 375
    mlist=xmalloc(l+16); ovlstrcpy(mlist,s);
10 reyssat 376
    for(p1=find_word_start(mlist); *p1 && modcnt<MAX_MODULES; p1=find_word_start(p2)) {
6884 bpr 377
    p2=find_word_end(p1);
378
    l=p2-p1; if(*p2) *p2++=0;
379
    for(i=0;i<langcnt;i++) if(strncasecmp(lang[i],p1,2)==0) break;
380
    if(i<langcnt) thislang=i; else continue;
381
    mod[modcnt].name=p1;
382
    mod[modcnt].langs[0]=thislang;
383
    mod[modcnt].langcnt=1;
384
    modcnt++;
10 reyssat 385
    }
386
}
387
 
388
void clean(void)
389
{
390
    fclose(langf); fclose(titf); fclose(descf); fclose(robotf);
391
    fclose(authorf); fclose(versionf);
392
}
393
 
394
char *sheetindex[]={
6881 bpr 395
      "title", "description",
10 reyssat 396
      "duration", "severity",
397
      "level", "domain",
6967 bpr 398
      "keywords", "reserved1", "reserved2", "information"
10 reyssat 399
};
400
#define SHEETINDEX_NO (sizeof(sheetindex)/sizeof(sheetindex[0]))
401
char sindbuf[SHEETINDEX_NO][MAX_LINELEN+1];
402
enum{s_title, s_description,
403
      s_duration, s_severity,
404
      s_level, s_domain,
405
      s_keywords, s_reserved1, s_reserved2,
6967 bpr 406
      s_information
10 reyssat 407
};
408
 
409
char *modindex[]={
6881 bpr 410
      "title", "description",
10 reyssat 411
      "author", "address", "copyright",
412
      "version", "wims_version", "language",
6881 bpr 413
      "category", "level", "domain", "keywords",
6799 bpr 414
      "keywords_ca", "keywords_en", "keywords_fr", "keywords_it", "keywords_nl",
415
      "title_ca", "title_en", "title_fr", "title_it", "title_nl",
10 reyssat 416
      "require"
417
};
418
#define MODINDEX_NO (sizeof(modindex)/sizeof(modindex[0]))
419
char indbuf[MODINDEX_NO][MAX_LINELEN+1];
420
enum{i_title, i_description,
421
      i_author,i_address,i_copyright,
422
      i_version,i_wims_version,i_language,
423
      i_category,i_level,i_domain,i_keywords,
6799 bpr 424
      i_keywords_ca,i_keywords_en,i_keywords_fr,i_keywords_it,i_keywords_nl,
425
      i_title_ca,i_title_en,i_title_fr,i_title_it,i_title_nl,
10 reyssat 426
      i_require
427
};
428
 
429
char *module_special_file[]={
430
    "intro","help","about"
431
};
432
#define MODSPEC_NO (sizeof(module_special_file)/sizeof(module_special_file[0]))
433
char module_language[4];
434
 
6884 bpr 435
/*  read and treat module's INDEX file */
10 reyssat 436
int module_index(const char *name)
437
{
438
    char *p, fbuf[MAX_LINELEN+1], ibuf[MAX_LINELEN+1];
439
    FILE *indf;
440
    int i,l;
441
 
442
    snprintf(fbuf,sizeof(fbuf),"%s/%s/INDEX",moduledir,name);
443
    indf=fopen(fbuf,"r"); if(indf==NULL) return -1;
444
    l=fread(ibuf,1,MAX_LINELEN,indf); fclose(indf);
445
    if(l>0 && l<MAX_LINELEN) ibuf[l]=0; else return -1;
6884 bpr 446
/* treate all fields in *modindex */
10 reyssat 447
    for(i=0;i<MODINDEX_NO;i++) {
6884 bpr 448
    _getdef(ibuf,modindex[i],indbuf[i]);
449
/*  compatibility precaution */
450
    if(indbuf[i][0]==':') indbuf[i][0]='.';
10 reyssat 451
    }
452
    p=find_word_start(indbuf[i_language]);
453
    if(isalpha(*p) && isalpha(*(p+1))) {
6884 bpr 454
    memmove(module_language,p,2); module_language[2]=0;
10 reyssat 455
    }
3718 reyssat 456
    else ovlstrcpy(module_language,"en");
10 reyssat 457
    return 0;
458
}
459
 
460
int sheet_index(int serial)
461
{
462
    char *p1, *p2, fbuf[MAX_LINELEN+1], ibuf[MAX_LINELEN+1];
463
    FILE *indf;
464
    int i,l;
465
 
466
    snprintf(fbuf,sizeof(fbuf),"%s/%s.def",sheetdir,mod[serial].name);
467
    indf=fopen(fbuf,"r"); if(indf==NULL) return -1;
468
    l=fread(ibuf,1,MAX_LINELEN,indf); fclose(indf);
469
    if(l>0 && l<MAX_LINELEN) ibuf[l]=0; else return -1;
470
    for(i=0;i<SHEETINDEX_NO;i++) sindbuf[i][0]=0;
471
    for(i=0,p1=find_word_start(ibuf);
6884 bpr 472
    i<SHEETINDEX_NO-1 && *p1!=':' && *p1!=0;
473
    i++,p1=p2) {
474
    p2=strchr(p1,'\n');
475
    if(p2!=NULL) *p2++=0; else p2=p1+strlen(p1);
8100 bpr 476
    p1=find_word_start(p1); strip_trailing_spaces2(p1);
6884 bpr 477
    snprintf(sindbuf[i],MAX_LINELEN,"%s",p1);
10 reyssat 478
    }
479
    p2=strstr(p1,"\n:"); if(p2==NULL) p2=p1+strlen(p1);
480
    else *p2=0;
8100 bpr 481
    p1=find_word_start(p1); strip_trailing_spaces2(p1);
10 reyssat 482
    for(p2=p1;*p2;p2++) if(*p2=='\n') *p2=' ';
6967 bpr 483
    ovlstrcpy(sindbuf[s_information],p1);
10 reyssat 484
    return 0;
485
}
486
 
487
unsigned char categories[16];
488
char taken[MAX_LINELEN+1];
489
int catcnt, takenlen, tweight;
490
 
491
void appenditem(char *word, int lind, int serial, int weight, char *l)
492
{
493
    char nbuf[MAX_LINELEN+1], buf[MAX_LINELEN+1];
494
    int i, ll;
495
    char *p;
496
    FILE *f;
6881 bpr 497
 
10 reyssat 498
    if(!isalnum(*word) || (ll=strlen(word))<2 ||
8100 bpr 499
       wordchr2(taken,word)!=NULL ||
500
       wordchr2(ignore[lind],word)!=NULL ||
10 reyssat 501
       takenlen>=MAX_LINELEN-ll-16)
502
      return;
503
    if(ll==2 && (!isdigit(word[0]) || !isalpha(word[1]))) return;
504
    for(p=word;*p;p++) if(!isalnum(*p) && *p!=' ') return;
505
    taken[takenlen++]=' '; taken[takenlen++]=' ';
3718 reyssat 506
    ovlstrcpy(taken+takenlen,word);
10 reyssat 507
    takenlen+=ll; tweight+=weight;
508
    snprintf(buf,sizeof(buf),"%s:%d?%d\n",word,serial,weight);
509
    for(i=0;i<catcnt;i++) {
6884 bpr 510
    snprintf(nbuf,sizeof(nbuf),"%s/%c.%s",
511
         outdir,categories[i],lang[lind]);
512
    f=fopen(nbuf,"a");
513
    if(f!=NULL) {fputs(buf,f); fclose(f);}
10 reyssat 514
    }
515
}
516
 
6881 bpr 517
void appenditem1 (char *buf, int lind, int serial, int weight, char *l )
518
{
519
  char *p1, *p2 ;
520
  for(p1=find_word_start(buf); *p1;
6884 bpr 521
    p1=find_word_start(p2)) {
522
    p2=strchr(p1,',');
523
    if(p2!=NULL) *p2++=0; else p2=p1+strlen(p1);
524
    if(strlen(p1)<=0) continue;
525
    appenditem(p1,lind,serial,weight,module_language);
6881 bpr 526
  }
527
}
528
void appenditem2 (char *buf, int lind, int serial, int weight, char *l )
529
{
530
  char *p1, *p2 ;
531
  for(p1=find_word_start(buf);*p1;
6884 bpr 532
    p1=find_word_start(p2)) {
533
    p2=find_word_end(p1); if(*p2) *p2++=0;
534
    appenditem(p1,lind,serial,weight,module_language);
6881 bpr 535
  }
536
}
10 reyssat 537
void onemodule(const char *name, int serial, int lind)
538
{
539
    int i;
540
    unsigned char trlist[]={
6884 bpr 541
    i_title,i_description,i_category,i_domain,i_keywords,
542
      i_require,i_author,
543
      i_keywords_ca,i_keywords_en,i_keywords_fr,i_keywords_it,i_keywords_nl,
544
      i_title_ca,i_title_en,i_title_fr,i_title_it,i_title_nl
10 reyssat 545
    };
546
    #define trcnt (sizeof(trlist)/sizeof(trlist[0]))
6564 bpr 547
    char *p1, *p2, *pp, *q, buf[MAX_LINELEN+1], lbuf[16];
10 reyssat 548
    FILE *f;
6881 bpr 549
 
10 reyssat 550
    if(module_index(name)) return;
551
    towords(indbuf[i_category]);
7915 bpr 552
/*   list the categories (among A=all,X=eXercise,O,D,...) corresponding
6884 bpr 553
 *   to this module
554
 */
10 reyssat 555
    for(i=catcnt=0;i<catno && catcnt<16;i++) {
8100 bpr 556
    if(wordchr2(indbuf[i_category],cat[i].name)!=NULL)
6884 bpr 557
      categories[catcnt++]=cat[i].typ;
10 reyssat 558
    }
559
    if(catcnt==0) return;
560
    if(categories[0]!=cat[0].typ)
561
      categories[catcnt++]=cat[0].typ;
6884 bpr 562
/*  write module's name in the category.language files, for instance lists/X.fr
563
 * for french exercises
564
 */
10 reyssat 565
    for(i=0;i<catcnt;i++) {
6884 bpr 566
    snprintf(buf,sizeof(buf),"%s/lists/%c.%s",
567
         outdir,categories[i],lang[lind]);
568
    f=fopen(buf,"a");
569
    if(f!=NULL) {fprintf(f,"%s\n",name); fclose(f);}
10 reyssat 570
    }
6884 bpr 571
/*   add serial number and language (resp.title, ...) to corresponding file  */
10 reyssat 572
    fprintf(langf,"%d:%s\n",serial,module_language);
573
    fprintf(titf,"%d:%s\n",serial,indbuf[i_title]);
574
    fprintf(descf,"%d:%s\n",serial,indbuf[i_description]);
575
    fprintf(authorf,"%d:%s\n",serial,indbuf[i_author]);
576
    fprintf(versionf,"%d:%s\n",serial,indbuf[i_version]);
6881 bpr 577
 
6884 bpr 578
/*   add module's information in html page for robots  */
10 reyssat 579
    snprintf(buf,sizeof(buf),"%s",indbuf[i_description]);
580
    for(pp=strchr(buf,','); pp; pp=strchr(pp,','))
8100 bpr 581
      string_modify3(buf,pp,pp+1,"&#44;");
10 reyssat 582
    if(strcmp(module_language,lang[lind])==0)
583
      fprintf(robotf,"%s ,%s,%s,%s,%s\n",name,module_language,name,
6884 bpr 584
          indbuf[i_title], buf);
6819 reyssat 585
 
6884 bpr 586
/*   Normalize the information of trlist, using dictionary
7915 bpr 587
 *  -- bases/sys/domain.xx without suffix translation (--> english version)
6884 bpr 588
 */
6881 bpr 589
    entrycount=dentrycount; dicbuf=ddicbuf;
590
    memmove(entry,dentry,dentrycount*sizeof(entry[0]));
591
    unknown_type=unk_leave;
10 reyssat 592
    for(i=0;i<trcnt;i++) {
6884 bpr 593
    detag(indbuf[trlist[i]]);
8100 bpr 594
    deaccent2(indbuf[trlist[i]]);
6884 bpr 595
    comma(indbuf[trlist[i]]);
8100 bpr 596
    singlespace2(indbuf[trlist[i]]);
6884 bpr 597
    translate(indbuf[trlist[i]]);
6881 bpr 598
    }
6884 bpr 599
/*   Normalize the information, using dictionary
7915 bpr 600
 *   bases/sys/words.xx with suffix translation
6884 bpr 601
 */
6881 bpr 602
    entrycount=mentrycount; dicbuf=mdicbuf;
603
    memmove(entry,mentry,mentrycount*sizeof(entry[0]));
6884 bpr 604
    unknown_type=unk_leave;/*  used in translator_.c */
6881 bpr 605
    for(i=0;i<trcnt;i++) {
6884 bpr 606
    suffix_translate(indbuf[trlist[i]]);
607
    translate(indbuf[trlist[i]]);
10 reyssat 608
    }
6881 bpr 609
 
610
/* taken contains all words already seen in the module index */
10 reyssat 611
    taken[0]=0; takenlen=tweight=0;
6881 bpr 612
/*  append words of title  */
3718 reyssat 613
    ovlstrcpy(buf,indbuf[i_title]); towords(buf);
6881 bpr 614
    appenditem2(buf,lind,serial,4,module_language);
615
 
6884 bpr 616
/*  extract words of every other information except level */
6799 bpr 617
    snprintf(buf,sizeof(buf),"%s %s %s %s %s %s %s %s %s %s %s %s %s %s %s",
6884 bpr 618
         indbuf[i_description],indbuf[i_keywords],
619
         indbuf[i_keywords_ca],indbuf[i_keywords_en],indbuf[i_keywords_fr],
620
         indbuf[i_keywords_it],indbuf[i_keywords_nl],
621
         indbuf[i_title_ca],indbuf[i_title_en],indbuf[i_title_fr],
622
         indbuf[i_title_it],indbuf[i_title_nl],
623
         indbuf[i_domain],indbuf[i_require],indbuf[i_author]);
10 reyssat 624
    towords(buf);
6884 bpr 625
    appenditem2(buf,lind,serial,2,module_language);
6881 bpr 626
 
6884 bpr 627
/*   this time the dictionary is the group dictionary  sys/wgrp/wgrp
628
 *   with a g (groupdic), not an m (maindic) . see below main, suffix, group.
7915 bpr 629
 *   and delete unknown ?? and translate
6884 bpr 630
 */
10 reyssat 631
    entrycount=gentrycount; dicbuf=gdicbuf;
632
    memmove(entry,gentry,gentrycount*sizeof(entry[0]));
6881 bpr 633
 
6884 bpr 634
/*  append words of every title information  */
6881 bpr 635
    ovlstrcpy(buf,indbuf[i_title]);
10 reyssat 636
    unknown_type=unk_delete;
6881 bpr 637
    translate(buf);
638
    appenditem1(buf,lind,serial,2,module_language);
639
 
6884 bpr 640
/*  append words of information of description except level  */
6881 bpr 641
    snprintf(buf,sizeof(buf),"%s", indbuf[i_description]);
642
    unknown_type=unk_delete;
643
    translate(buf);
644
    appenditem1(buf,lind,serial,4,module_language);
645
 
6884 bpr 646
/*  append words (or group of words) of keywords and domain  */
6881 bpr 647
    snprintf(buf,sizeof(buf),"%s, %s, %s, %s, %s, %s, %s",
6884 bpr 648
         indbuf[i_domain],indbuf[i_keywords],
649
         indbuf[i_keywords_ca], indbuf[i_keywords_en],indbuf[i_keywords_fr],
650
         indbuf[i_keywords_it], indbuf[i_keywords_nl]);
651
    unknown_type=unk_leave;
10 reyssat 652
    translate(buf);
6881 bpr 653
    appenditem1(buf,lind,serial,2,module_language);
654
 
6884 bpr 655
/*   append level information, with weight 2 */
10 reyssat 656
    snprintf(buf,sizeof(buf),"%s",indbuf[i_level]);
3718 reyssat 657
    ovlstrcpy(lbuf,"level");
10 reyssat 658
    for(p1=buf; *p1; p1++) if(!isalnum(*p1)) *p1=' ';
6564 bpr 659
    q=buf+strlen(buf);
660
    for(p1=find_word_start(buf); (*p1) && (p1 < q) ;
6884 bpr 661
    p1=find_word_start(p2)) {
662
    p2=find_word_end(p1);
663
    if(p2!=NULL) *p2++=0; else p2=p1+strlen(p1);
664
    if(!isalpha(*p1) ||
665
       (!isdigit(*(p1+1)) && *(p1+1)!=0) ||
666
       (*(p1+1)!=0 && *(p1+2)!=0))
667
      continue;
668
    *p1=tolower(*p1);
669
    ovlstrcpy(lbuf+strlen("level"),p1);
670
    appenditem(lbuf,lind,serial,2,module_language);
10 reyssat 671
    }
6884 bpr 672
/*   append total weight of module to weight file site2/weight.xx  */
10 reyssat 673
    fprintf(weightf,"%d:%d\n",serial,tweight);
674
}
675
 
676
void modules(void)
677
{
678
    int i,j,k,d;
679
    char namebuf[MAX_LINELEN+1];
6881 bpr 680
    char mdic[MAX_LINELEN+1], sdic[MAX_LINELEN+1], gdic[MAX_LINELEN+1], ddic[MAX_LINELEN+1];
10 reyssat 681
 
682
    for(j=0;j<langcnt;j++) {
6884 bpr 683
    snprintf(namebuf,sizeof(namebuf),"%s/weight.%s",outdir,lang[j]);
684
    weightf=fopen(namebuf,"w");
685
    snprintf(mdic,sizeof(mdic),"%s/%s.%s",dicdir,maindic,lang[j]);
686
    snprintf(sdic,sizeof(sdic),"%s/%s.%s",dicdir,suffixdic,lang[j]);
687
    snprintf(gdic,sizeof(gdic),"%s/%s.%s",dicdir,groupdic,lang[j]);
688
    snprintf(ddic,sizeof(ddic),"%s/%s.%s",dicdir,domaindic,lang[j]);
689
    suffix_dic(sdic); prepare_dic(gdic);
690
    gdicbuf=dicbuf; gentrycount=entrycount;
691
    memmove(gentry,entry,gentrycount*sizeof(entry[0]));
692
    prepare_dic(mdic);
693
    mdicbuf=dicbuf; mentrycount=entrycount;
694
    memmove(mentry,entry,mentrycount*sizeof(entry[0]));
695
    prepare_dic(ddic);
696
    ddicbuf=dicbuf; dentrycount=entrycount;
697
    memmove(dentry,entry,dentrycount*sizeof(entry[0]));
698
    unknown_type=unk_leave; translate(ignore[j]);
699
    for(i=0;i<modcnt;i++) {
700
        if(mod[i].langcnt>0) {
701
        for(d=k=0;k<mod[i].langcnt;k++)
702
          if(mod[i].langs[k]<mod[i].langs[d]) d=k;
703
        for(k=0;k<mod[i].langcnt && mod[i].langs[k]!=j;k++);
704
        if(k>=mod[i].langcnt) k=d;
705
        snprintf(namebuf,MAX_LINELEN,"%s.%s",mod[i].name,
706
             lang[mod[i].langs[k]]);
707
        onemodule(namebuf,mod[i].counts[k],j);
708
        }
709
        else {
710
        onemodule(mod[i].name,mod[i].counts[0],j);
711
        }
10 reyssat 712
    }
6884 bpr 713
    if(mentrycount>0) free(mdicbuf);
714
    if(gentrycount>0) free(gdicbuf);
715
    if(suffixcnt>0) free(sufbuf);
716
    if(dentrycount>0) free(ddicbuf);
717
    if(weightf) fclose(weightf);
718
    }
10 reyssat 719
}
720
 
6881 bpr 721
/* FIXME ? differences with appenditem - use fprintf instead of  snprintf */
10 reyssat 722
void sappenditem(char *word, int lind, int serial, int weight)
723
{
724
    int ll;
725
    char *p;
6881 bpr 726
 
10 reyssat 727
    if(!isalnum(*word) || (ll=strlen(word))<2 ||
8100 bpr 728
       wordchr2(taken,word)!=NULL ||
729
       wordchr2(ignore[lind],word)!=NULL ||
10 reyssat 730
       takenlen>=MAX_LINELEN-ll-16)
731
      return;
732
    if(ll==2 && (!isdigit(word[0]) || !isalpha(word[1]))) return;
733
    for(p=word;*p;p++) if(!isalnum(*p) && *p!=' ') return;
734
    taken[takenlen++]=' ';taken[takenlen++]=' ';
3718 reyssat 735
    ovlstrcpy(taken+takenlen,word);
10 reyssat 736
    takenlen+=ll; tweight+=weight;
737
    fprintf(indf,"%s:%d?%d\n",word,serial,weight);
738
}
739
 
740
void onesheet(int serial, int lind)
741
{
742
    int i;
743
    unsigned char trlist[]={
6967 bpr 744
    s_title,s_description,s_domain,s_keywords,s_information
10 reyssat 745
    };
746
    #define trcnt (sizeof(trlist)/sizeof(trlist[0]))
747
    char *p1, *p2, buf[MAX_LINELEN+1];
6881 bpr 748
 
10 reyssat 749
    if(sheet_index(serial)) return;
750
    fprintf(listf,"%s\n",mod[serial].name+3);
751
    fprintf(titf,"%d:%s\n",serial,sindbuf[s_title]);
752
    fprintf(descf,"%d:%s\n",serial,sindbuf[s_description]);
6967 bpr 753
    fprintf(remf,"%d:%s\n",serial,sindbuf[s_information]);
7915 bpr 754
 
6881 bpr 755
    entrycount=dentrycount; dicbuf=ddicbuf;
756
    memmove(entry,dentry,dentrycount*sizeof(entry[0]));
10 reyssat 757
    unknown_type=unk_leave;
758
    for(i=0;i<trcnt;i++) {
6884 bpr 759
    detag(sindbuf[trlist[i]]);
8100 bpr 760
    deaccent2(sindbuf[trlist[i]]);
6884 bpr 761
    comma(sindbuf[trlist[i]]);
8100 bpr 762
    singlespace2(sindbuf[trlist[i]]);
6884 bpr 763
    translate(sindbuf[trlist[i]]);
6881 bpr 764
    }
7915 bpr 765
 
6881 bpr 766
    entrycount=mentrycount; dicbuf=mdicbuf;
767
    memmove(entry,mentry,mentrycount*sizeof(entry[0]));
768
    unknown_type=unk_leave;
769
    for(i=0;i<trcnt;i++) {
6884 bpr 770
    suffix_translate(sindbuf[trlist[i]]);
771
    translate(sindbuf[trlist[i]]);
10 reyssat 772
    }
773
    taken[0]=0; takenlen=tweight=0;
3718 reyssat 774
    ovlstrcpy(buf,sindbuf[s_title]); towords(buf);
10 reyssat 775
    for(p1=find_word_start(buf);*p1;
6884 bpr 776
    p1=find_word_start(p2)) {
777
    p2=find_word_end(p1); if(*p2) *p2++=0;
778
    sappenditem(p1,lind,serial,4);
10 reyssat 779
    }
780
    snprintf(buf,sizeof(buf),"%s %s %s %s",
6884 bpr 781
         sindbuf[s_description],sindbuf[s_keywords],
6967 bpr 782
         sindbuf[s_domain],sindbuf[s_information]);
10 reyssat 783
    towords(buf);
784
    for(p1=find_word_start(buf);*p1;
6884 bpr 785
    p1=find_word_start(p2)) {
786
    p2=find_word_end(p1); if(*p2) *p2++=0;
787
    sappenditem(p1,lind,serial,2);
10 reyssat 788
    }
789
    entrycount=gentrycount; dicbuf=gdicbuf;
790
    memmove(entry,gentry,gentrycount*sizeof(entry[0]));
791
    unknown_type=unk_delete;
3718 reyssat 792
    ovlstrcpy(buf,sindbuf[s_title]); translate(buf);
10 reyssat 793
    for(p1=find_word_start(buf); *p1;
6884 bpr 794
    p1=find_word_start(p2)) {
795
    p2=strchr(p1,',');
796
    if(p2!=NULL) *p2++=0; else p2=p1+strlen(p1);
797
    if(strlen(p1)<=0) continue;
798
    sappenditem(p1,lind,serial,4);
10 reyssat 799
    }
800
    snprintf(buf,sizeof(buf),"%s, %s, %s, %s",
6884 bpr 801
         sindbuf[s_description],sindbuf[s_keywords],
6967 bpr 802
         sindbuf[s_domain],sindbuf[s_information]);
10 reyssat 803
    translate(buf);
804
    for(p1=find_word_start(buf); *p1;
6884 bpr 805
    p1=find_word_start(p2)) {
806
    p2=strchr(p1,',');
807
    if(p2!=NULL) *p2++=0; else p2=p1+strlen(p1);
808
    if(strlen(p1)<=0) continue;
809
    sappenditem(p1,lind,serial,2);
10 reyssat 810
    }
811
    fprintf(weightf,"%d:%d\n",serial,tweight);
812
}
813
 
814
void sheets(void)
815
{
816
    int i,j;
6961 bpr 817
    char mdic[MAX_LINELEN+1], sdic[MAX_LINELEN+1], gdic[MAX_LINELEN+1], ddic[MAX_LINELEN+1];
10 reyssat 818
    char buf[MAX_LINELEN+1];
7915 bpr 819
 
10 reyssat 820
    for(j=0;j<langcnt;j++) {
6884 bpr 821
    snprintf(buf,sizeof(buf),"%s/index/title.%s",sheetdir,lang[j]);
822
    titf=fopen(buf,"w");
823
    snprintf(buf,sizeof(buf),"%s/index/description.%s",sheetdir,lang[j]);
824
    descf=fopen(buf,"w");
825
    snprintf(buf,sizeof(buf),"%s/index/%s",sheetdir,lang[j]);
826
    indf=fopen(buf,"w");
827
    snprintf(buf,sizeof(buf),"%s/index/list.%s",sheetdir,lang[j]);
828
    listf=fopen(buf,"w");
829
    snprintf(buf,sizeof(buf),"%s/index/weight.%s",sheetdir,lang[j]);
830
    weightf=fopen(buf,"w");
831
    snprintf(buf,sizeof(buf),"%s/index/addr.%s",sheetdir,lang[j]);
832
    addrf=fopen(buf,"w");
6967 bpr 833
    snprintf(buf,sizeof(buf),"%s/index/information.%s",sheetdir,lang[j]);
6961 bpr 834
    remf=fopen(buf,"w");
6884 bpr 835
    snprintf(buf,sizeof(buf),"%s/index/serial.%s",sheetdir,lang[j]);
836
    serialf=fopen(buf,"w");
837
    snprintf(mdic,sizeof(mdic),"%s/%s.%s",dicdir,maindic,lang[j]);
838
    snprintf(sdic,sizeof(sdic),"%s/%s.%s",dicdir,suffixdic,lang[j]);
839
    snprintf(gdic,sizeof(gdic),"%s/%s.%s",dicdir,groupdic,lang[j]);
6961 bpr 840
    snprintf(ddic,sizeof(ddic),"%s/%s.%s",dicdir,domaindic,lang[j]);
6884 bpr 841
    suffix_dic(sdic); prepare_dic(gdic);
842
    gdicbuf=dicbuf; gentrycount=entrycount;
843
    memmove(gentry,entry,gentrycount*sizeof(entry[0]));
844
    prepare_dic(mdic);
845
    mdicbuf=dicbuf; mentrycount=entrycount;
846
    memmove(mentry,entry,mentrycount*sizeof(entry[0]));
6973 bpr 847
    prepare_dic(ddic);
848
    ddicbuf=dicbuf; dentrycount=entrycount;
849
    memmove(dentry,entry,dentrycount*sizeof(entry[0]));
6884 bpr 850
    unknown_type=unk_leave; translate(ignore[j]);
851
    for(i=0;i<modcnt;i++) {
852
        if(mod[i].langs[0]!=j) continue;
853
        fprintf(addrf,"%d:%s\n",i,mod[i].name+3);
854
        fprintf(serialf,"%s:%d\n",mod[i].name+3,i);
855
        onesheet(i,j);
10 reyssat 856
    }
6884 bpr 857
    if(mentrycount>0) free(mdicbuf);
858
    if(gentrycount>0) free(gdicbuf);
859
    if(suffixcnt>0) free(sufbuf);
6961 bpr 860
    if(dentrycount>0) free(ddicbuf);
6884 bpr 861
    fclose(titf); fclose(descf); fclose(indf); fclose(listf);
862
    fclose(weightf); fclose(addrf); fclose(serialf);
863
    }
10 reyssat 864
}
865
 
866
int main()
867
{
868
    prep();
869
    if(modcnt>0) modules();
870
    clean();
871
    sprep();
872
    if(modcnt>0) sheets();
873
    return 0;
874
}
875