Subversion Repositories wimsdev

Rev

Rev 8863 | Rev 9091 | Go to most recent revision | Details | Compare with Previous | Last modification | View Log | RSS feed

Rev Author Line No. Line
10 reyssat 1
/*    Copyright (C) 1998-2003 XIAO, Gang of Universite de Nice - Sophia Antipolis
2
 *
3
 *  This program is free software; you can redistribute it and/or modify
4
 *  it under the terms of the GNU General Public License as published by
5
 *  the Free Software Foundation; either version 2 of the License, or
6
 *  (at your option) any later version.
7
 *
8
 *  This program is distributed in the hope that it will be useful,
9
 *  but WITHOUT ANY WARRANTY; without even the implied warranty of
10
 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
11
 *  GNU General Public License for more details.
12
 *
13
 *  You should have received a copy of the GNU General Public License
14
 *  along with this program; if not, write to the Free Software
15
 *  Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
16
 */
17
 
6884 bpr 18
/*  This is an internal program,
7915 bpr 19
 * used to index modules for search engine.
6884 bpr 20
 */
10 reyssat 21
 
8100 bpr 22
#include "../Lib/libwims.h"
8123 bpr 23
#include "translator_.h"
24
#include "suffix.h"
10 reyssat 25
 
6884 bpr 26
#define MAX_LANGS    MAX_LANGUAGES
27
#define MAX_MODULES    65536
28
char *moduledir=    "public_html/modules";
29
char *sheetdir=     "public_html/bases/sheet";
30
char *dicdir=       "public_html/bases";
31
char *outdir=       "public_html/bases/site2";
9090 bpr 32
char *sheetoutdir=  "public_html/bases/sheet/index";
6884 bpr 33
char *maindic=      "sys/words";
34
char *groupdic=     "sys/wgrp/wgrp";
35
char *suffixdic=    "sys/suffix";
36
char *domaindic=    "sys/domaindic";
37
char *ignoredic=    "sys/indignore";
38
char *conffile=     "log/wims.conf";
39
char *mlistbase=    "list";
10 reyssat 40
 
41
char lang[MAX_LANGS][4]={
1792 bpr 42
    "en","fr","cn","es","it","nl","si","ca","pt"
10 reyssat 43
};
6884 bpr 44
#define DEFAULT_LANGCNT    6
10 reyssat 45
char allang[MAX_LANGS][4]={
6564 bpr 46
    "en","fr","cn","es","it","nl","de","si","ca","pt"
10 reyssat 47
};
48
#define allangcnt 8
49
char ignore[MAX_LANGS][MAX_LINELEN+1];
50
char mlistfile[MAX_LANGS][256];
51
int langcnt;
8650 bpr 52
FILE *langf, *titf, *descf, *weightf, *robotf, *indf, *listf, *addrf, *serialf, *authorf, *versionf, *remf, *titlef;
10 reyssat 53
 
54
struct cat {
55
    char *name;
56
    char typ;
57
} cat[]={
6884 bpr 58
    {"all_types", 'A'},
59
    {"exercise",  'X'},
60
    {"oef",       'O'},
61
    {"tool",      'T'},
62
    {"recreation",'R'},
63
    {"reference", 'Y'},
64
    {"document",  'D'},
65
    {"popup",     'P'},
66
    {"datamodule",'M'}
10 reyssat 67
};
68
#define catno (sizeof(cat)/sizeof(cat[0]))
69
 
70
struct mod {
71
    char *name;
72
    unsigned char langs[MAX_LANGS];
73
    int counts[MAX_LANGS];
74
    int  langcnt;
75
} mod[MAX_MODULES];
76
int modcnt;
77
 
78
char *mlist;
79
 
6884 bpr 80
/*  fold known accented letters to unaccented, other strange characters to space
7915 bpr 81
 *  apostrophe is among the exceptions to be kept (important for multi-word expressions)
6884 bpr 82
 */
8100 bpr 83
void deaccent2(char *p)
10 reyssat 84
{
3247 bpr 85
    char *sp;
10 reyssat 86
    char *v;
87
    for(sp=p;*sp;sp++) {
6884 bpr 88
    if(*sp<0 && (v=strchr(acctab,*sp))!=NULL)
89
      *sp=*(deatab+(v-acctab));
90
    if(!isalnum(*sp) && strchr(",.&$+*",*sp)==0) *sp=' ';
91
    else *sp=tolower(*sp);
10 reyssat 92
    }
93
}
94
 
6884 bpr 95
/*  translate everything non-alphanumeric into space */
10 reyssat 96
void towords(char *p)
97
{
98
    char *pp;
99
    for(pp=p;*pp;pp++) if(!isalnum(*pp) && strchr("&$+*",*pp)==0) *pp=' ';
100
}
101
 
6884 bpr 102
/*  Find first occurrence of word */
8100 bpr 103
char *wordchr2(char *p, char *w)
10 reyssat 104
{
105
    char *r;
106
 
6881 bpr 107
    for(r=strstr(p,w);r!=NULL &&
6884 bpr 108
    ( (r>p && !isspace(*(r-1))) || (!isspace(*(r+strlen(w))) && *(r+strlen(w))!=0) );
109
    r=strstr(r+1,w));
10 reyssat 110
    return r;
111
}
112
 
113
char *find_tag_end(char *p)
114
{
115
    char *pp;
116
    pp=p; if(*pp=='<') pp++;
117
    for(; *pp && *pp!='>'; pp++) {
6884 bpr 118
    if(*pp=='<') {
119
        pp=find_tag_end(pp)-1; continue;
10 reyssat 120
    }
6884 bpr 121
    if(*pp=='"') {
122
        pp=strchr(pp+1,'"');
123
        if(pp==NULL) return p+strlen(p); else continue;
124
    }
125
    if(*pp=='\'') {
126
        pp=strchr(pp+1,'\'');
127
        if(pp==NULL) return p+strlen(p); else continue;
128
    }
129
    }
10 reyssat 130
    if(*pp=='>') pp++; return pp;
131
}
132
 
133
char *find_tag(char *p, char *tag)
134
{
135
    char *pp;
136
    int len;
137
    len=strlen(tag);
138
    for(pp=strchr(p,'<'); pp!=NULL && *pp; pp=strchr(pp+1,'<')) {
6884 bpr 139
    if(strncasecmp(pp+1,tag,len)==0 && !isalnum(*(pp+1+len))) return pp;
10 reyssat 140
    }
141
    return p+strlen(p);
142
}
143
 
6884 bpr 144
/*  remove all html tags */
10 reyssat 145
void detag(char *p)
146
{
147
    char *pp, *p2;
148
    for(pp=strchr(p,'<'); pp!=NULL; pp=strchr(pp,'<')) {
6884 bpr 149
    p2=find_tag_end(pp);
150
    if(*p2==0) {*pp=0; return; }
151
    ovlstrcpy(pp,p2);
10 reyssat 152
    }
153
}
154
 
6819 reyssat 155
/* add a space after comma to see end of words */
156
 
157
void comma(char *p)
158
{
159
    char *pp;
160
    for(pp=strchr(p,','); pp; pp=strchr(pp+1,','))
8100 bpr 161
      string_modify3(p,pp,pp+1,", ");
6819 reyssat 162
}
163
 
10 reyssat 164
void _getdef(char buf[], char *name, char value[])
165
{
166
    char *p1, *p2, *p3;
167
 
168
    value[0]=0;
169
    for(p1=strstr(buf,name); p1!=NULL; p1=strstr(p1+1,name)) {
6884 bpr 170
    p2=find_word_start(p1+strlen(name));
171
    if((p1>buf && !isspace(*(p1-1))) || *p2!='=') continue;
172
    p3=p1; while(p3>buf && isspace(*(p3-1)) && *(p3-1)!='\n') p3--;
173
    if(p3>buf && *(p3-1)!='\n') continue;
174
    p3=strchr(p2,'\n');
175
    p2=find_word_start(p2+1);
176
    if(p3 <= p2) continue;
177
    snprintf(value,MAX_LINELEN,"%s",p2);
178
    if(p3!=NULL && p3-p2<MAX_LINELEN) value[p3-p2]=0;
8100 bpr 179
    strip_trailing_spaces2(value);
6884 bpr 180
    break;
10 reyssat 181
    }
182
}
183
 
6884 bpr 184
/*  Get variable definition from a file.
185
 * Result stored in buffer value of length MAX_LINELEN.
186
 */
10 reyssat 187
void getdef(char *fname, char *name, char value[])
188
{
189
    FILE *f;
190
    char *buf;
191
    int l;
6881 bpr 192
 
10 reyssat 193
    value[0]=0;
194
    f=fopen(fname,"r"); if(f==NULL) return;
195
    fseek(f,0,SEEK_END); l=ftell(f); fseek(f,0,SEEK_SET);
196
    buf=xmalloc(l+256); l=fread(buf,1,l,f);
197
    fclose(f);
198
    if(l<=0) return; else buf[l]=0;
199
    _getdef(buf,name,value);
200
    free(buf);
201
}
202
 
8123 bpr 203
char *mdicbuf, *gdicbuf, *ddicbuf, *gentry, *mentry, *dentry;
10 reyssat 204
 
6881 bpr 205
int gentrycount, mentrycount, dentrycount;
10 reyssat 206
 
6884 bpr 207
/*  Preparation of data */
10 reyssat 208
void prep(void)
209
{
210
    char buf[MAX_LINELEN+1];
211
    char *p1,*p2,*s,*old;
212
    int i,l,thislang,t;
213
    FILE *f;
6881 bpr 214
 
10 reyssat 215
    s=getenv("modind_outdir"); if(s!=NULL && *s!=0) outdir=s;
216
    s=getenv("modind_sheetdir"); if(s!=NULL && *s!=0) sheetdir=s;
9090 bpr 217
    s=getenv("modind_sheetoutdir"); if(s!=NULL && *s!=0) sheetoutdir=s;
10 reyssat 218
    snprintf(buf,sizeof(buf),"%s/addr",outdir);
219
    addrf=fopen(buf,"w");
9090 bpr 220
    if(!addrf) { fprintf(stderr,"modind: error creating output files addr.\n"); exit(1);}
10 reyssat 221
    snprintf(buf,sizeof(buf),"%s/serial",outdir);
222
    serialf=fopen(buf,"w");
9090 bpr 223
    if(!serialf) { fprintf(stderr,"modind: error creating output files serial.\n"); exit(1);}
10 reyssat 224
    modcnt=langcnt=0;
6884 bpr 225
/* take the langs declared in conffile */
10 reyssat 226
    getdef(conffile,"site_languages",buf);
227
    for(p1=buf;*p1;p1++) if(!isalnum(*p1)) *p1=' ';
228
    for(p1=find_word_start(buf); *p1 && langcnt<MAX_LANGS; p1=find_word_start(p2)) {
6884 bpr 229
    p2=find_word_end(p1);
230
    if(p2!=p1+2 || !isalpha(*p1) || !isalpha(*(p1+1))) continue;
231
    memmove(lang[langcnt],p1,2); lang[langcnt++][2]=0;
10 reyssat 232
    }
6884 bpr 233
    if(langcnt==0) {/*  default languages */
234
    langcnt=DEFAULT_LANGCNT;
10 reyssat 235
    }
236
    s=getenv("mlist"); if(s==NULL) exit(1);
237
    l=strlen(s); if(l<0 || l>100*MAX_LINELEN) exit(1);
3718 reyssat 238
    mlist=xmalloc(l+16); ovlstrcpy(mlist,s); old="";
10 reyssat 239
    for(i=0;i<langcnt;i++) {
9090 bpr 240
      snprintf(buf,sizeof(buf),"%s/%s.%s",dicdir,ignoredic,lang[i]);
241
      f=fopen(buf,"r"); if(f==NULL) continue;
242
      l=fread(ignore[i],1,MAX_LINELEN,f);fclose(f);
243
      if(l<0 || l>=MAX_LINELEN) l=0;
244
      ignore[i][l]=0;
10 reyssat 245
    }
9090 bpr 246
    for(t=0, p1=find_word_start(mlist); *p1 && modcnt<MAX_MODULES;
247
        p1=find_word_start(p2), t++) {
248
      p2=find_word_end(p1);
249
      l=p2-p1; if(*p2) *p2++=0;
250
      fprintf(addrf,"%d:%s\n",t,p1);
251
      fprintf(serialf,"%s:%d\n",p1,t);
252
      thislang=-1;
6564 bpr 253
/* language is taken from the address */
9090 bpr 254
      if(l>3 && p1[l-3]=='.') {
6884 bpr 255
        for(i=0;i<langcnt;i++) if(strcasecmp(lang[i],p1+l-2)==0) break;
256
        if(i<langcnt) {p1[l-3]=0; thislang=i;}
257
        else {/*  unknown language, not referenced */
258
        continue;
259
        }
9090 bpr 260
      }
261
      if(modcnt>0 && strcmp(old,p1)==0 && thislang>=0) {
6884 bpr 262
        if(mod[modcnt-1].langcnt<langcnt) {
263
        mod[modcnt-1].langs[mod[modcnt-1].langcnt]=thislang;
264
        mod[modcnt-1].counts[mod[modcnt-1].langcnt]=t;
265
        (mod[modcnt-1].langcnt)++;
266
        }
9090 bpr 267
     }
268
     else {
6884 bpr 269
        mod[modcnt].name=old=p1;
270
        if(thislang>=0) {
9090 bpr 271
          mod[modcnt].langs[0]=thislang;
272
          mod[modcnt].langcnt=1;
6884 bpr 273
        }
274
        else mod[modcnt].langcnt=0;
275
        mod[modcnt].counts[0]=t;
276
        modcnt++;
277
    }
278
    }
10 reyssat 279
    snprintf(buf,sizeof(buf),"%s/language",outdir);
280
    langf=fopen(buf,"w");
281
    snprintf(buf,sizeof(buf),"%s/title",outdir);
282
    titf=fopen(buf,"w");
283
    snprintf(buf,sizeof(buf),"%s/description",outdir);
284
    descf=fopen(buf,"w");
285
    snprintf(buf,sizeof(buf),"%s/author",outdir);
286
    authorf=fopen(buf,"w");
287
    snprintf(buf,sizeof(buf),"%s/version",outdir);
288
    versionf=fopen(buf,"w");
289
    snprintf(buf,sizeof(buf),"%s/lists/robot.phtml",outdir);
290
    robotf=fopen(buf,"w");
291
    fclose(addrf); fclose(serialf);
9090 bpr 292
    if(!robotf || !versionf || !authorf || !descf || !titf || !langf) {
293
      fprintf(stderr,"modind: error creating output files.\n");
294
      exit(1);
10 reyssat 295
    }
296
}
297
 
298
void sprep(void)
299
{
300
    char *p1,*p2,*s;
301
    int i,l,thislang;
6881 bpr 302
 
10 reyssat 303
    modcnt=0;
304
    s=getenv("slist"); if(s==NULL) return;
305
    l=strlen(s); if(l<0 || l>100*MAX_LINELEN) return;
3718 reyssat 306
    mlist=xmalloc(l+16); ovlstrcpy(mlist,s);
10 reyssat 307
    for(p1=find_word_start(mlist); *p1 && modcnt<MAX_MODULES; p1=find_word_start(p2)) {
6884 bpr 308
    p2=find_word_end(p1);
309
    l=p2-p1; if(*p2) *p2++=0;
310
    for(i=0;i<langcnt;i++) if(strncasecmp(lang[i],p1,2)==0) break;
311
    if(i<langcnt) thislang=i; else continue;
312
    mod[modcnt].name=p1;
313
    mod[modcnt].langs[0]=thislang;
314
    mod[modcnt].langcnt=1;
315
    modcnt++;
10 reyssat 316
    }
317
}
318
 
319
void clean(void)
320
{
321
    fclose(langf); fclose(titf); fclose(descf); fclose(robotf);
322
    fclose(authorf); fclose(versionf);
323
}
324
 
325
char *sheetindex[]={
6881 bpr 326
      "title", "description",
10 reyssat 327
      "duration", "severity",
328
      "level", "domain",
6967 bpr 329
      "keywords", "reserved1", "reserved2", "information"
10 reyssat 330
};
331
#define SHEETINDEX_NO (sizeof(sheetindex)/sizeof(sheetindex[0]))
332
char sindbuf[SHEETINDEX_NO][MAX_LINELEN+1];
333
enum{s_title, s_description,
334
      s_duration, s_severity,
335
      s_level, s_domain,
336
      s_keywords, s_reserved1, s_reserved2,
6967 bpr 337
      s_information
10 reyssat 338
};
339
 
340
char *modindex[]={
6881 bpr 341
      "title", "description",
10 reyssat 342
      "author", "address", "copyright",
343
      "version", "wims_version", "language",
6881 bpr 344
      "category", "level", "domain", "keywords",
6799 bpr 345
      "keywords_ca", "keywords_en", "keywords_fr", "keywords_it", "keywords_nl",
346
      "title_ca", "title_en", "title_fr", "title_it", "title_nl",
10 reyssat 347
      "require"
348
};
349
#define MODINDEX_NO (sizeof(modindex)/sizeof(modindex[0]))
350
char indbuf[MODINDEX_NO][MAX_LINELEN+1];
351
enum{i_title, i_description,
352
      i_author,i_address,i_copyright,
353
      i_version,i_wims_version,i_language,
354
      i_category,i_level,i_domain,i_keywords,
6799 bpr 355
      i_keywords_ca,i_keywords_en,i_keywords_fr,i_keywords_it,i_keywords_nl,
356
      i_title_ca,i_title_en,i_title_fr,i_title_it,i_title_nl,
10 reyssat 357
      i_require
358
};
359
 
360
char *module_special_file[]={
361
    "intro","help","about"
362
};
363
#define MODSPEC_NO (sizeof(module_special_file)/sizeof(module_special_file[0]))
364
char module_language[4];
365
 
6884 bpr 366
/*  read and treat module's INDEX file */
10 reyssat 367
int module_index(const char *name)
368
{
369
    char *p, fbuf[MAX_LINELEN+1], ibuf[MAX_LINELEN+1];
370
    FILE *indf;
371
    int i,l;
372
 
373
    snprintf(fbuf,sizeof(fbuf),"%s/%s/INDEX",moduledir,name);
9090 bpr 374
    indf=fopen(fbuf,"r");
375
    if(indf==NULL) {
376
      fprintf(stderr,"modind: INDEX of %s not found\n",fbuf); return -1
377
    }
10 reyssat 378
    l=fread(ibuf,1,MAX_LINELEN,indf); fclose(indf);
379
    if(l>0 && l<MAX_LINELEN) ibuf[l]=0; else return -1;
6884 bpr 380
/* treate all fields in *modindex */
10 reyssat 381
    for(i=0;i<MODINDEX_NO;i++) {
9090 bpr 382
      _getdef(ibuf,modindex[i],indbuf[i]);
6884 bpr 383
/*  compatibility precaution */
9090 bpr 384
      if(indbuf[i][0]==':') indbuf[i][0]='.';
10 reyssat 385
    }
386
    p=find_word_start(indbuf[i_language]);
387
    if(isalpha(*p) && isalpha(*(p+1))) {
9090 bpr 388
      memmove(module_language,p,2); module_language[2]=0;
10 reyssat 389
    }
3718 reyssat 390
    else ovlstrcpy(module_language,"en");
10 reyssat 391
    return 0;
392
}
393
 
394
int sheet_index(int serial)
395
{
396
    char *p1, *p2, fbuf[MAX_LINELEN+1], ibuf[MAX_LINELEN+1];
397
    FILE *indf;
398
    int i,l;
399
 
400
    snprintf(fbuf,sizeof(fbuf),"%s/%s.def",sheetdir,mod[serial].name);
401
    indf=fopen(fbuf,"r"); if(indf==NULL) return -1;
402
    l=fread(ibuf,1,MAX_LINELEN,indf); fclose(indf);
403
    if(l>0 && l<MAX_LINELEN) ibuf[l]=0; else return -1;
404
    for(i=0;i<SHEETINDEX_NO;i++) sindbuf[i][0]=0;
405
    for(i=0,p1=find_word_start(ibuf);
9090 bpr 406
      i<SHEETINDEX_NO-1 && *p1!=':' && *p1!=0;
407
      i++,p1=p2) {
408
       p2=strchr(p1,'\n');
409
       if(p2!=NULL) *p2++=0; else p2=p1+strlen(p1);
410
       p1=find_word_start(p1); strip_trailing_spaces2(p1);
411
       snprintf(sindbuf[i],MAX_LINELEN,"%s",p1);
10 reyssat 412
    }
413
    p2=strstr(p1,"\n:"); if(p2==NULL) p2=p1+strlen(p1);
414
    else *p2=0;
8100 bpr 415
    p1=find_word_start(p1); strip_trailing_spaces2(p1);
10 reyssat 416
    for(p2=p1;*p2;p2++) if(*p2=='\n') *p2=' ';
6967 bpr 417
    ovlstrcpy(sindbuf[s_information],p1);
10 reyssat 418
    return 0;
419
}
420
 
421
unsigned char categories[16];
422
char taken[MAX_LINELEN+1];
423
int catcnt, takenlen, tweight;
424
 
425
void appenditem(char *word, int lind, int serial, int weight, char *l)
426
{
427
    char nbuf[MAX_LINELEN+1], buf[MAX_LINELEN+1];
428
    int i, ll;
429
    char *p;
430
    FILE *f;
6881 bpr 431
 
10 reyssat 432
    if(!isalnum(*word) || (ll=strlen(word))<2 ||
8100 bpr 433
       wordchr2(taken,word)!=NULL ||
434
       wordchr2(ignore[lind],word)!=NULL ||
10 reyssat 435
       takenlen>=MAX_LINELEN-ll-16)
436
      return;
437
    if(ll==2 && (!isdigit(word[0]) || !isalpha(word[1]))) return;
438
    for(p=word;*p;p++) if(!isalnum(*p) && *p!=' ') return;
439
    taken[takenlen++]=' '; taken[takenlen++]=' ';
3718 reyssat 440
    ovlstrcpy(taken+takenlen,word);
10 reyssat 441
    takenlen+=ll; tweight+=weight;
442
    snprintf(buf,sizeof(buf),"%s:%d?%d\n",word,serial,weight);
443
    for(i=0;i<catcnt;i++) {
6884 bpr 444
    snprintf(nbuf,sizeof(nbuf),"%s/%c.%s",
445
         outdir,categories[i],lang[lind]);
446
    f=fopen(nbuf,"a");
447
    if(f!=NULL) {fputs(buf,f); fclose(f);}
10 reyssat 448
    }
449
}
450
 
6881 bpr 451
void appenditem1 (char *buf, int lind, int serial, int weight, char *l )
452
{
453
  char *p1, *p2 ;
454
  for(p1=find_word_start(buf); *p1;
6884 bpr 455
    p1=find_word_start(p2)) {
456
    p2=strchr(p1,',');
457
    if(p2!=NULL) *p2++=0; else p2=p1+strlen(p1);
458
    if(strlen(p1)<=0) continue;
459
    appenditem(p1,lind,serial,weight,module_language);
6881 bpr 460
  }
461
}
462
void appenditem2 (char *buf, int lind, int serial, int weight, char *l )
463
{
464
  char *p1, *p2 ;
465
  for(p1=find_word_start(buf);*p1;
6884 bpr 466
    p1=find_word_start(p2)) {
467
    p2=find_word_end(p1); if(*p2) *p2++=0;
468
    appenditem(p1,lind,serial,weight,module_language);
6881 bpr 469
  }
470
}
10 reyssat 471
void onemodule(const char *name, int serial, int lind)
472
{
473
    int i;
474
    unsigned char trlist[]={
6884 bpr 475
    i_title,i_description,i_category,i_domain,i_keywords,
476
      i_require,i_author,
477
      i_keywords_ca,i_keywords_en,i_keywords_fr,i_keywords_it,i_keywords_nl,
478
      i_title_ca,i_title_en,i_title_fr,i_title_it,i_title_nl
10 reyssat 479
    };
480
    #define trcnt (sizeof(trlist)/sizeof(trlist[0]))
6564 bpr 481
    char *p1, *p2, *pp, *q, buf[MAX_LINELEN+1], lbuf[16];
10 reyssat 482
    FILE *f;
6881 bpr 483
 
10 reyssat 484
    if(module_index(name)) return;
485
    towords(indbuf[i_category]);
7915 bpr 486
/*   list the categories (among A=all,X=eXercise,O,D,...) corresponding
6884 bpr 487
 *   to this module
488
 */
10 reyssat 489
    for(i=catcnt=0;i<catno && catcnt<16;i++) {
8100 bpr 490
    if(wordchr2(indbuf[i_category],cat[i].name)!=NULL)
6884 bpr 491
      categories[catcnt++]=cat[i].typ;
10 reyssat 492
    }
493
    if(catcnt==0) return;
494
    if(categories[0]!=cat[0].typ)
495
      categories[catcnt++]=cat[0].typ;
6884 bpr 496
/*  write module's name in the category.language files, for instance lists/X.fr
497
 * for french exercises
498
 */
10 reyssat 499
    for(i=0;i<catcnt;i++) {
9090 bpr 500
      snprintf(buf,sizeof(buf),"%s/lists/%c.%s",
6884 bpr 501
         outdir,categories[i],lang[lind]);
9090 bpr 502
      f=fopen(buf,"a");
503
      if(f!=NULL) {fprintf(f,"%s\n",name); fclose(f);}
10 reyssat 504
    }
6884 bpr 505
/*   add serial number and language (resp.title, ...) to corresponding file  */
10 reyssat 506
    fprintf(langf,"%d:%s\n",serial,module_language);
507
    fprintf(titf,"%d:%s\n",serial,indbuf[i_title]);
508
    fprintf(descf,"%d:%s\n",serial,indbuf[i_description]);
509
    fprintf(authorf,"%d:%s\n",serial,indbuf[i_author]);
510
    fprintf(versionf,"%d:%s\n",serial,indbuf[i_version]);
6881 bpr 511
 
6884 bpr 512
/*   add module's information in html page for robots  */
10 reyssat 513
    snprintf(buf,sizeof(buf),"%s",indbuf[i_description]);
514
    for(pp=strchr(buf,','); pp; pp=strchr(pp,','))
8100 bpr 515
      string_modify3(buf,pp,pp+1,"&#44;");
10 reyssat 516
    if(strcmp(module_language,lang[lind])==0)
517
      fprintf(robotf,"%s ,%s,%s,%s,%s\n",name,module_language,name,
6884 bpr 518
          indbuf[i_title], buf);
6819 reyssat 519
 
6884 bpr 520
/*   Normalize the information of trlist, using dictionary
7915 bpr 521
 *  -- bases/sys/domain.xx without suffix translation (--> english version)
6884 bpr 522
 */
6881 bpr 523
    entrycount=dentrycount; dicbuf=ddicbuf;
524
    memmove(entry,dentry,dentrycount*sizeof(entry[0]));
525
    unknown_type=unk_leave;
10 reyssat 526
    for(i=0;i<trcnt;i++) {
6884 bpr 527
    detag(indbuf[trlist[i]]);
8100 bpr 528
    deaccent2(indbuf[trlist[i]]);
6884 bpr 529
    comma(indbuf[trlist[i]]);
8100 bpr 530
    singlespace2(indbuf[trlist[i]]);
6884 bpr 531
    translate(indbuf[trlist[i]]);
6881 bpr 532
    }
6884 bpr 533
/*   Normalize the information, using dictionary
7915 bpr 534
 *   bases/sys/words.xx with suffix translation
6884 bpr 535
 */
6881 bpr 536
    entrycount=mentrycount; dicbuf=mdicbuf;
537
    memmove(entry,mentry,mentrycount*sizeof(entry[0]));
6884 bpr 538
    unknown_type=unk_leave;/*  used in translator_.c */
6881 bpr 539
    for(i=0;i<trcnt;i++) {
6884 bpr 540
    suffix_translate(indbuf[trlist[i]]);
541
    translate(indbuf[trlist[i]]);
10 reyssat 542
    }
6881 bpr 543
 
544
/* taken contains all words already seen in the module index */
10 reyssat 545
    taken[0]=0; takenlen=tweight=0;
6881 bpr 546
/*  append words of title  */
3718 reyssat 547
    ovlstrcpy(buf,indbuf[i_title]); towords(buf);
6881 bpr 548
    appenditem2(buf,lind,serial,4,module_language);
549
 
6884 bpr 550
/*  extract words of every other information except level */
6799 bpr 551
    snprintf(buf,sizeof(buf),"%s %s %s %s %s %s %s %s %s %s %s %s %s %s %s",
6884 bpr 552
         indbuf[i_description],indbuf[i_keywords],
553
         indbuf[i_keywords_ca],indbuf[i_keywords_en],indbuf[i_keywords_fr],
554
         indbuf[i_keywords_it],indbuf[i_keywords_nl],
555
         indbuf[i_title_ca],indbuf[i_title_en],indbuf[i_title_fr],
556
         indbuf[i_title_it],indbuf[i_title_nl],
557
         indbuf[i_domain],indbuf[i_require],indbuf[i_author]);
10 reyssat 558
    towords(buf);
6884 bpr 559
    appenditem2(buf,lind,serial,2,module_language);
6881 bpr 560
 
6884 bpr 561
/*   this time the dictionary is the group dictionary  sys/wgrp/wgrp
562
 *   with a g (groupdic), not an m (maindic) . see below main, suffix, group.
7915 bpr 563
 *   and delete unknown ?? and translate
6884 bpr 564
 */
10 reyssat 565
    entrycount=gentrycount; dicbuf=gdicbuf;
566
    memmove(entry,gentry,gentrycount*sizeof(entry[0]));
6881 bpr 567
 
6884 bpr 568
/*  append words of every title information  */
6881 bpr 569
    ovlstrcpy(buf,indbuf[i_title]);
10 reyssat 570
    unknown_type=unk_delete;
6881 bpr 571
    translate(buf);
572
    appenditem1(buf,lind,serial,2,module_language);
573
 
6884 bpr 574
/*  append words of information of description except level  */
6881 bpr 575
    snprintf(buf,sizeof(buf),"%s", indbuf[i_description]);
576
    unknown_type=unk_delete;
577
    translate(buf);
578
    appenditem1(buf,lind,serial,4,module_language);
579
 
6884 bpr 580
/*  append words (or group of words) of keywords and domain  */
6881 bpr 581
    snprintf(buf,sizeof(buf),"%s, %s, %s, %s, %s, %s, %s",
6884 bpr 582
         indbuf[i_domain],indbuf[i_keywords],
583
         indbuf[i_keywords_ca], indbuf[i_keywords_en],indbuf[i_keywords_fr],
584
         indbuf[i_keywords_it], indbuf[i_keywords_nl]);
585
    unknown_type=unk_leave;
10 reyssat 586
    translate(buf);
6881 bpr 587
    appenditem1(buf,lind,serial,2,module_language);
588
 
6884 bpr 589
/*   append level information, with weight 2 */
10 reyssat 590
    snprintf(buf,sizeof(buf),"%s",indbuf[i_level]);
3718 reyssat 591
    ovlstrcpy(lbuf,"level");
10 reyssat 592
    for(p1=buf; *p1; p1++) if(!isalnum(*p1)) *p1=' ';
6564 bpr 593
    q=buf+strlen(buf);
594
    for(p1=find_word_start(buf); (*p1) && (p1 < q) ;
6884 bpr 595
    p1=find_word_start(p2)) {
9090 bpr 596
      p2=find_word_end(p1);
597
      if(p2!=NULL) *p2++=0; else p2=p1+strlen(p1);
598
      if(!isalpha(*p1) ||
6884 bpr 599
       (!isdigit(*(p1+1)) && *(p1+1)!=0) ||
600
       (*(p1+1)!=0 && *(p1+2)!=0))
601
      continue;
9090 bpr 602
      *p1=tolower(*p1);
603
      ovlstrcpy(lbuf+strlen("level"),p1);
604
      appenditem(lbuf,lind,serial,2,module_language);
10 reyssat 605
    }
6884 bpr 606
/*   append total weight of module to weight file site2/weight.xx  */
10 reyssat 607
    fprintf(weightf,"%d:%d\n",serial,tweight);
608
}
609
 
610
void modules(void)
611
{
612
    int i,j,k,d;
613
    char namebuf[MAX_LINELEN+1];
6881 bpr 614
    char mdic[MAX_LINELEN+1], sdic[MAX_LINELEN+1], gdic[MAX_LINELEN+1], ddic[MAX_LINELEN+1];
10 reyssat 615
 
616
    for(j=0;j<langcnt;j++) {
6884 bpr 617
    snprintf(namebuf,sizeof(namebuf),"%s/weight.%s",outdir,lang[j]);
618
    weightf=fopen(namebuf,"w");
619
    snprintf(mdic,sizeof(mdic),"%s/%s.%s",dicdir,maindic,lang[j]);
620
    snprintf(sdic,sizeof(sdic),"%s/%s.%s",dicdir,suffixdic,lang[j]);
621
    snprintf(gdic,sizeof(gdic),"%s/%s.%s",dicdir,groupdic,lang[j]);
622
    snprintf(ddic,sizeof(ddic),"%s/%s.%s",dicdir,domaindic,lang[j]);
623
    suffix_dic(sdic); prepare_dic(gdic);
624
    gdicbuf=dicbuf; gentrycount=entrycount;
625
    memmove(gentry,entry,gentrycount*sizeof(entry[0]));
626
    prepare_dic(mdic);
627
    mdicbuf=dicbuf; mentrycount=entrycount;
628
    memmove(mentry,entry,mentrycount*sizeof(entry[0]));
629
    prepare_dic(ddic);
630
    ddicbuf=dicbuf; dentrycount=entrycount;
631
    memmove(dentry,entry,dentrycount*sizeof(entry[0]));
632
    unknown_type=unk_leave; translate(ignore[j]);
633
    for(i=0;i<modcnt;i++) {
634
        if(mod[i].langcnt>0) {
635
        for(d=k=0;k<mod[i].langcnt;k++)
636
          if(mod[i].langs[k]<mod[i].langs[d]) d=k;
637
        for(k=0;k<mod[i].langcnt && mod[i].langs[k]!=j;k++);
638
        if(k>=mod[i].langcnt) k=d;
639
        snprintf(namebuf,MAX_LINELEN,"%s.%s",mod[i].name,
640
             lang[mod[i].langs[k]]);
641
        onemodule(namebuf,mod[i].counts[k],j);
642
        }
643
        else {
644
        onemodule(mod[i].name,mod[i].counts[0],j);
645
        }
10 reyssat 646
    }
6884 bpr 647
    if(mentrycount>0) free(mdicbuf);
648
    if(gentrycount>0) free(gdicbuf);
649
    if(suffixcnt>0) free(sufbuf);
650
    if(dentrycount>0) free(ddicbuf);
651
    if(weightf) fclose(weightf);
652
    }
10 reyssat 653
}
654
 
6881 bpr 655
/* FIXME ? differences with appenditem - use fprintf instead of  snprintf */
10 reyssat 656
void sappenditem(char *word, int lind, int serial, int weight)
657
{
658
    int ll;
659
    char *p;
6881 bpr 660
 
10 reyssat 661
    if(!isalnum(*word) || (ll=strlen(word))<2 ||
8100 bpr 662
       wordchr2(taken,word)!=NULL ||
663
       wordchr2(ignore[lind],word)!=NULL ||
10 reyssat 664
       takenlen>=MAX_LINELEN-ll-16)
665
      return;
666
    if(ll==2 && (!isdigit(word[0]) || !isalpha(word[1]))) return;
667
    for(p=word;*p;p++) if(!isalnum(*p) && *p!=' ') return;
668
    taken[takenlen++]=' ';taken[takenlen++]=' ';
3718 reyssat 669
    ovlstrcpy(taken+takenlen,word);
10 reyssat 670
    takenlen+=ll; tweight+=weight;
671
    fprintf(indf,"%s:%d?%d\n",word,serial,weight);
672
}
673
 
674
void onesheet(int serial, int lind)
675
{
676
    int i;
677
    unsigned char trlist[]={
6967 bpr 678
    s_title,s_description,s_domain,s_keywords,s_information
10 reyssat 679
    };
680
    #define trcnt (sizeof(trlist)/sizeof(trlist[0]))
681
    char *p1, *p2, buf[MAX_LINELEN+1];
6881 bpr 682
 
10 reyssat 683
    if(sheet_index(serial)) return;
684
    fprintf(listf,"%s\n",mod[serial].name+3);
685
    fprintf(titf,"%d:%s\n",serial,sindbuf[s_title]);
686
    fprintf(descf,"%d:%s\n",serial,sindbuf[s_description]);
6967 bpr 687
    fprintf(remf,"%d:%s\n",serial,sindbuf[s_information]);
8650 bpr 688
    fprintf(titlef,"%s:%s\n",mod[serial].name,sindbuf[s_title]);
7915 bpr 689
 
6881 bpr 690
    entrycount=dentrycount; dicbuf=ddicbuf;
691
    memmove(entry,dentry,dentrycount*sizeof(entry[0]));
10 reyssat 692
    unknown_type=unk_leave;
693
    for(i=0;i<trcnt;i++) {
6884 bpr 694
    detag(sindbuf[trlist[i]]);
8100 bpr 695
    deaccent2(sindbuf[trlist[i]]);
6884 bpr 696
    comma(sindbuf[trlist[i]]);
8100 bpr 697
    singlespace2(sindbuf[trlist[i]]);
6884 bpr 698
    translate(sindbuf[trlist[i]]);
6881 bpr 699
    }
7915 bpr 700
 
6881 bpr 701
    entrycount=mentrycount; dicbuf=mdicbuf;
702
    memmove(entry,mentry,mentrycount*sizeof(entry[0]));
703
    unknown_type=unk_leave;
704
    for(i=0;i<trcnt;i++) {
6884 bpr 705
    suffix_translate(sindbuf[trlist[i]]);
706
    translate(sindbuf[trlist[i]]);
10 reyssat 707
    }
708
    taken[0]=0; takenlen=tweight=0;
3718 reyssat 709
    ovlstrcpy(buf,sindbuf[s_title]); towords(buf);
10 reyssat 710
    for(p1=find_word_start(buf);*p1;
6884 bpr 711
    p1=find_word_start(p2)) {
712
    p2=find_word_end(p1); if(*p2) *p2++=0;
713
    sappenditem(p1,lind,serial,4);
10 reyssat 714
    }
715
    snprintf(buf,sizeof(buf),"%s %s %s %s",
6884 bpr 716
         sindbuf[s_description],sindbuf[s_keywords],
6967 bpr 717
         sindbuf[s_domain],sindbuf[s_information]);
10 reyssat 718
    towords(buf);
719
    for(p1=find_word_start(buf);*p1;
6884 bpr 720
    p1=find_word_start(p2)) {
721
    p2=find_word_end(p1); if(*p2) *p2++=0;
722
    sappenditem(p1,lind,serial,2);
10 reyssat 723
    }
724
    entrycount=gentrycount; dicbuf=gdicbuf;
725
    memmove(entry,gentry,gentrycount*sizeof(entry[0]));
726
    unknown_type=unk_delete;
3718 reyssat 727
    ovlstrcpy(buf,sindbuf[s_title]); translate(buf);
10 reyssat 728
    for(p1=find_word_start(buf); *p1;
6884 bpr 729
    p1=find_word_start(p2)) {
730
    p2=strchr(p1,',');
731
    if(p2!=NULL) *p2++=0; else p2=p1+strlen(p1);
732
    if(strlen(p1)<=0) continue;
733
    sappenditem(p1,lind,serial,4);
10 reyssat 734
    }
735
    snprintf(buf,sizeof(buf),"%s, %s, %s, %s",
6884 bpr 736
         sindbuf[s_description],sindbuf[s_keywords],
6967 bpr 737
         sindbuf[s_domain],sindbuf[s_information]);
10 reyssat 738
    translate(buf);
739
    for(p1=find_word_start(buf); *p1;
6884 bpr 740
    p1=find_word_start(p2)) {
741
    p2=strchr(p1,',');
742
    if(p2!=NULL) *p2++=0; else p2=p1+strlen(p1);
743
    if(strlen(p1)<=0) continue;
744
    sappenditem(p1,lind,serial,2);
10 reyssat 745
    }
746
    fprintf(weightf,"%d:%d\n",serial,tweight);
747
}
748
 
749
void sheets(void)
750
{
751
    int i,j;
6961 bpr 752
    char mdic[MAX_LINELEN+1], sdic[MAX_LINELEN+1], gdic[MAX_LINELEN+1], ddic[MAX_LINELEN+1];
10 reyssat 753
    char buf[MAX_LINELEN+1];
7915 bpr 754
 
10 reyssat 755
    for(j=0;j<langcnt;j++) {
9090 bpr 756
      snprintf(buf,sizeof(buf),"%s/title.%s",sheetoutdir,lang[j]);
757
      titf=fopen(buf,"w");
758
      snprintf(buf,sizeof(buf),"%s/description.%s",sheetoutdir,lang[j]);
759
      descf=fopen(buf,"w");
760
      snprintf(buf,sizeof(buf),"%s/%s",sheetoutdir,lang[j]);
761
      indf=fopen(buf,"w");
762
      snprintf(buf,sizeof(buf),"%s/list.%s",sheetoutdir,lang[j]);
763
      listf=fopen(buf,"w");
764
      snprintf(buf,sizeof(buf),"%s/weight.%s",sheetoutdir,lang[j]);
765
      weightf=fopen(buf,"w");
766
      snprintf(buf,sizeof(buf),"%s/addr.%s",sheetoutdir,lang[j]);
767
      addrf=fopen(buf,"w");
768
      snprintf(buf,sizeof(buf),"%s/information.%s",sheetoutdir,lang[j]);
769
      remf=fopen(buf,"w");
770
      snprintf(buf,sizeof(buf),"%s/serial.%s",sheetoutdir,lang[j]);
771
      serialf=fopen(buf,"w");
772
      snprintf(buf,sizeof(buf),"%s/tit.%s",sheetoutdir,lang[j]);
773
      titlef=fopen(buf,"w");
774
      if(!titlef || !serialf || !remf || !addrf || !weightf || !listf
775
        || !indf || !descf || !titf ) {
776
      fprintf(stderr,"modind: error creating output files for sheet %s.\n",sheetoutdir); exit(1);
777
    }
6884 bpr 778
    snprintf(mdic,sizeof(mdic),"%s/%s.%s",dicdir,maindic,lang[j]);
779
    snprintf(sdic,sizeof(sdic),"%s/%s.%s",dicdir,suffixdic,lang[j]);
780
    snprintf(gdic,sizeof(gdic),"%s/%s.%s",dicdir,groupdic,lang[j]);
6961 bpr 781
    snprintf(ddic,sizeof(ddic),"%s/%s.%s",dicdir,domaindic,lang[j]);
6884 bpr 782
    suffix_dic(sdic); prepare_dic(gdic);
783
    gdicbuf=dicbuf; gentrycount=entrycount;
784
    memmove(gentry,entry,gentrycount*sizeof(entry[0]));
785
    prepare_dic(mdic);
786
    mdicbuf=dicbuf; mentrycount=entrycount;
787
    memmove(mentry,entry,mentrycount*sizeof(entry[0]));
6973 bpr 788
    prepare_dic(ddic);
789
    ddicbuf=dicbuf; dentrycount=entrycount;
790
    memmove(dentry,entry,dentrycount*sizeof(entry[0]));
6884 bpr 791
    unknown_type=unk_leave; translate(ignore[j]);
792
    for(i=0;i<modcnt;i++) {
793
        if(mod[i].langs[0]!=j) continue;
794
        fprintf(addrf,"%d:%s\n",i,mod[i].name+3);
795
        fprintf(serialf,"%s:%d\n",mod[i].name+3,i);
796
        onesheet(i,j);
10 reyssat 797
    }
6884 bpr 798
    if(mentrycount>0) free(mdicbuf);
799
    if(gentrycount>0) free(gdicbuf);
800
    if(suffixcnt>0) free(sufbuf);
6961 bpr 801
    if(dentrycount>0) free(ddicbuf);
6884 bpr 802
    fclose(titf); fclose(descf); fclose(indf); fclose(listf);
803
    fclose(weightf); fclose(addrf); fclose(serialf);
804
    }
10 reyssat 805
}
806
 
807
int main()
808
{
8123 bpr 809
    gentry=xmalloc(entry_size);
810
    dentry=xmalloc(entry_size);
811
    mentry=xmalloc(entry_size);
10 reyssat 812
    prep();
813
    if(modcnt>0) modules();
814
    clean();
815
    sprep();
816
    if(modcnt>0) sheets();
817
    return 0;
818
}
819