Subversion Repositories wimsdev

Rev

Rev 8149 | Rev 8650 | Go to most recent revision | Details | Compare with Previous | Last modification | View Log | RSS feed

Rev Author Line No. Line
10 reyssat 1
/*    Copyright (C) 1998-2003 XIAO, Gang of Universite de Nice - Sophia Antipolis
2
 *
3
 *  This program is free software; you can redistribute it and/or modify
4
 *  it under the terms of the GNU General Public License as published by
5
 *  the Free Software Foundation; either version 2 of the License, or
6
 *  (at your option) any later version.
7
 *
8
 *  This program is distributed in the hope that it will be useful,
9
 *  but WITHOUT ANY WARRANTY; without even the implied warranty of
10
 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
11
 *  GNU General Public License for more details.
12
 *
13
 *  You should have received a copy of the GNU General Public License
14
 *  along with this program; if not, write to the Free Software
15
 *  Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
16
 */
17
 
6884 bpr 18
/*  This is an internal program,
7915 bpr 19
 * used to index modules for search engine.
6884 bpr 20
 */
10 reyssat 21
 
8100 bpr 22
#include "../Lib/libwims.h"
8123 bpr 23
#include "translator_.h"
24
#include "suffix.h"
10 reyssat 25
 
6884 bpr 26
#define MAX_LANGS    MAX_LANGUAGES
27
#define MAX_MODULES    65536
28
char *moduledir=    "public_html/modules";
29
char *sheetdir=     "public_html/bases/sheet";
30
char *dicdir=       "public_html/bases";
31
char *outdir=       "public_html/bases/site2";
32
char *maindic=      "sys/words";
33
char *groupdic=     "sys/wgrp/wgrp";
34
char *suffixdic=    "sys/suffix";
35
char *domaindic=    "sys/domaindic";
36
char *ignoredic=    "sys/indignore";
37
char *conffile=     "log/wims.conf";
38
char *mlistbase=    "list";
10 reyssat 39
 
40
char lang[MAX_LANGS][4]={
1792 bpr 41
    "en","fr","cn","es","it","nl","si","ca","pt"
10 reyssat 42
};
6884 bpr 43
#define DEFAULT_LANGCNT    6
10 reyssat 44
char allang[MAX_LANGS][4]={
6564 bpr 45
    "en","fr","cn","es","it","nl","de","si","ca","pt"
10 reyssat 46
};
47
#define allangcnt 8
48
char ignore[MAX_LANGS][MAX_LINELEN+1];
49
char mlistfile[MAX_LANGS][256];
50
int langcnt;
6961 bpr 51
FILE *langf, *titf, *descf, *weightf, *robotf, *indf, *listf, *addrf, *serialf, *authorf, *versionf, *remf;
10 reyssat 52
 
53
struct cat {
54
    char *name;
55
    char typ;
56
} cat[]={
6884 bpr 57
    {"all_types", 'A'},
58
    {"exercise",  'X'},
59
    {"oef",       'O'},
60
    {"tool",      'T'},
61
    {"recreation",'R'},
62
    {"reference", 'Y'},
63
    {"document",  'D'},
64
    {"popup",     'P'},
65
    {"datamodule",'M'}
10 reyssat 66
};
67
#define catno (sizeof(cat)/sizeof(cat[0]))
68
 
69
struct mod {
70
    char *name;
71
    unsigned char langs[MAX_LANGS];
72
    int counts[MAX_LANGS];
73
    int  langcnt;
74
} mod[MAX_MODULES];
75
int modcnt;
76
 
77
char *mlist;
78
 
6884 bpr 79
/*  fold known accented letters to unaccented, other strange characters to space
7915 bpr 80
 *  apostrophe is among the exceptions to be kept (important for multi-word expressions)
6884 bpr 81
 */
8100 bpr 82
void deaccent2(char *p)
10 reyssat 83
{
3247 bpr 84
    char *sp;
10 reyssat 85
    char *v;
86
    for(sp=p;*sp;sp++) {
6884 bpr 87
    if(*sp<0 && (v=strchr(acctab,*sp))!=NULL)
88
      *sp=*(deatab+(v-acctab));
89
    if(!isalnum(*sp) && strchr(",.&$+*",*sp)==0) *sp=' ';
90
    else *sp=tolower(*sp);
10 reyssat 91
    }
92
}
93
 
6884 bpr 94
/*  translate everything non-alphanumeric into space */
10 reyssat 95
void towords(char *p)
96
{
97
    char *pp;
98
    for(pp=p;*pp;pp++) if(!isalnum(*pp) && strchr("&$+*",*pp)==0) *pp=' ';
99
}
100
 
6884 bpr 101
/*  Find first occurrence of word */
8100 bpr 102
char *wordchr2(char *p, char *w)
10 reyssat 103
{
104
    char *r;
105
 
6881 bpr 106
    for(r=strstr(p,w);r!=NULL &&
6884 bpr 107
    ( (r>p && !isspace(*(r-1))) || (!isspace(*(r+strlen(w))) && *(r+strlen(w))!=0) );
108
    r=strstr(r+1,w));
10 reyssat 109
    return r;
110
}
111
 
6884 bpr 112
/*  strip trailing spaces; return string end. */
8100 bpr 113
char *strip_trailing_spaces2(char *p)
10 reyssat 114
{
115
    char *pp;
116
    if(*p==0) return p;
117
    for(pp=p+strlen(p)-1; pp>=p && isspace(*pp); *(pp--)=0);
118
    return pp;
119
}
120
 
121
char *find_tag_end(char *p)
122
{
123
    char *pp;
124
    pp=p; if(*pp=='<') pp++;
125
    for(; *pp && *pp!='>'; pp++) {
6884 bpr 126
    if(*pp=='<') {
127
        pp=find_tag_end(pp)-1; continue;
10 reyssat 128
    }
6884 bpr 129
    if(*pp=='"') {
130
        pp=strchr(pp+1,'"');
131
        if(pp==NULL) return p+strlen(p); else continue;
132
    }
133
    if(*pp=='\'') {
134
        pp=strchr(pp+1,'\'');
135
        if(pp==NULL) return p+strlen(p); else continue;
136
    }
137
    }
10 reyssat 138
    if(*pp=='>') pp++; return pp;
139
}
140
 
141
char *find_tag(char *p, char *tag)
142
{
143
    char *pp;
144
    int len;
145
    len=strlen(tag);
146
    for(pp=strchr(p,'<'); pp!=NULL && *pp; pp=strchr(pp+1,'<')) {
6884 bpr 147
    if(strncasecmp(pp+1,tag,len)==0 && !isalnum(*(pp+1+len))) return pp;
10 reyssat 148
    }
149
    return p+strlen(p);
150
}
151
 
6884 bpr 152
/*  remove all html tags */
10 reyssat 153
void detag(char *p)
154
{
155
    char *pp, *p2;
156
    for(pp=strchr(p,'<'); pp!=NULL; pp=strchr(pp,'<')) {
6884 bpr 157
    p2=find_tag_end(pp);
158
    if(*p2==0) {*pp=0; return; }
159
    ovlstrcpy(pp,p2);
10 reyssat 160
    }
161
}
162
 
6884 bpr 163
/*  modify a string. Bufferlen must be at least MAX_LINELEN */
8100 bpr 164
void string_modify3(char *start, char *bad_beg, char *bad_end, char *good,...)
10 reyssat 165
{
166
    char buf[MAX_LINELEN+1];
167
    va_list vp;
6881 bpr 168
 
10 reyssat 169
    va_start(vp,good);
170
    vsnprintf(buf,sizeof(buf),good,vp); va_end(vp);
171
    if(strlen(start)-(bad_end-bad_beg)+strlen(buf)>=MAX_LINELEN)
8100 bpr 172
      return; /* this is an error situation. */
10 reyssat 173
    strcat(buf,bad_end);
3718 reyssat 174
    ovlstrcpy(bad_beg,buf);
10 reyssat 175
}
176
 
6819 reyssat 177
/* add a space after comma to see end of words */
178
 
179
void comma(char *p)
180
{
181
    char *pp;
182
    for(pp=strchr(p,','); pp; pp=strchr(pp+1,','))
8100 bpr 183
      string_modify3(p,pp,pp+1,", ");
6819 reyssat 184
}
185
 
10 reyssat 186
void _getdef(char buf[], char *name, char value[])
187
{
188
    char *p1, *p2, *p3;
189
 
190
    value[0]=0;
191
    for(p1=strstr(buf,name); p1!=NULL; p1=strstr(p1+1,name)) {
6884 bpr 192
    p2=find_word_start(p1+strlen(name));
193
    if((p1>buf && !isspace(*(p1-1))) || *p2!='=') continue;
194
    p3=p1; while(p3>buf && isspace(*(p3-1)) && *(p3-1)!='\n') p3--;
195
    if(p3>buf && *(p3-1)!='\n') continue;
196
    p3=strchr(p2,'\n');
197
    p2=find_word_start(p2+1);
198
    if(p3 <= p2) continue;
199
    snprintf(value,MAX_LINELEN,"%s",p2);
200
    if(p3!=NULL && p3-p2<MAX_LINELEN) value[p3-p2]=0;
8100 bpr 201
    strip_trailing_spaces2(value);
6884 bpr 202
    break;
10 reyssat 203
    }
204
}
205
 
6884 bpr 206
/*  Get variable definition from a file.
207
 * Result stored in buffer value of length MAX_LINELEN.
208
 */
10 reyssat 209
void getdef(char *fname, char *name, char value[])
210
{
211
    FILE *f;
212
    char *buf;
213
    int l;
6881 bpr 214
 
10 reyssat 215
    value[0]=0;
216
    f=fopen(fname,"r"); if(f==NULL) return;
217
    fseek(f,0,SEEK_END); l=ftell(f); fseek(f,0,SEEK_SET);
218
    buf=xmalloc(l+256); l=fread(buf,1,l,f);
219
    fclose(f);
220
    if(l<=0) return; else buf[l]=0;
221
    _getdef(buf,name,value);
222
    free(buf);
223
}
224
 
8123 bpr 225
char *mdicbuf, *gdicbuf, *ddicbuf, *gentry, *mentry, *dentry;
10 reyssat 226
 
6881 bpr 227
int gentrycount, mentrycount, dentrycount;
10 reyssat 228
 
6884 bpr 229
/*  Preparation of data */
10 reyssat 230
void prep(void)
231
{
232
    char buf[MAX_LINELEN+1];
233
    char *p1,*p2,*s,*old;
234
    int i,l,thislang,t;
235
    FILE *f;
6881 bpr 236
 
10 reyssat 237
    s=getenv("modind_outdir"); if(s!=NULL && *s!=0) outdir=s;
238
    s=getenv("modind_sheetdir"); if(s!=NULL && *s!=0) sheetdir=s;
239
    snprintf(buf,sizeof(buf),"%s/addr",outdir);
240
    addrf=fopen(buf,"w");
241
    snprintf(buf,sizeof(buf),"%s/serial",outdir);
242
    serialf=fopen(buf,"w");
243
    modcnt=langcnt=0;
6884 bpr 244
/* take the langs declared in conffile */
10 reyssat 245
    getdef(conffile,"site_languages",buf);
246
    for(p1=buf;*p1;p1++) if(!isalnum(*p1)) *p1=' ';
247
    for(p1=find_word_start(buf); *p1 && langcnt<MAX_LANGS; p1=find_word_start(p2)) {
6884 bpr 248
    p2=find_word_end(p1);
249
    if(p2!=p1+2 || !isalpha(*p1) || !isalpha(*(p1+1))) continue;
250
    memmove(lang[langcnt],p1,2); lang[langcnt++][2]=0;
10 reyssat 251
    }
6884 bpr 252
    if(langcnt==0) {/*  default languages */
253
    langcnt=DEFAULT_LANGCNT;
10 reyssat 254
    }
255
    s=getenv("mlist"); if(s==NULL) exit(1);
256
    l=strlen(s); if(l<0 || l>100*MAX_LINELEN) exit(1);
3718 reyssat 257
    mlist=xmalloc(l+16); ovlstrcpy(mlist,s); old="";
10 reyssat 258
    for(i=0;i<langcnt;i++) {
6884 bpr 259
    snprintf(buf,sizeof(buf),"%s/%s.%s",dicdir,ignoredic,lang[i]);
260
    f=fopen(buf,"r"); if(f==NULL) continue;
261
    l=fread(ignore[i],1,MAX_LINELEN,f);fclose(f);
262
    if(l<0 || l>=MAX_LINELEN) l=0;
263
    ignore[i][l]=0;
10 reyssat 264
    }
265
    for(t=0, p1=find_word_start(mlist);
6884 bpr 266
    *p1 && modcnt<MAX_MODULES;
267
    p1=find_word_start(p2), t++) {
268
    p2=find_word_end(p1);
269
    l=p2-p1; if(*p2) *p2++=0;
270
    fprintf(addrf,"%d:%s\n",t,p1);
271
    fprintf(serialf,"%s:%d\n",p1,t);
272
    thislang=-1;
6564 bpr 273
/* language is taken from the address */
6884 bpr 274
    if(l>3 && p1[l-3]=='.') {
275
        for(i=0;i<langcnt;i++) if(strcasecmp(lang[i],p1+l-2)==0) break;
276
        if(i<langcnt) {p1[l-3]=0; thislang=i;}
277
        else {/*  unknown language, not referenced */
278
        continue;
279
        }
10 reyssat 280
    }
6884 bpr 281
    if(modcnt>0 && strcmp(old,p1)==0 && thislang>=0) {
282
        if(mod[modcnt-1].langcnt<langcnt) {
283
        mod[modcnt-1].langs[mod[modcnt-1].langcnt]=thislang;
284
        mod[modcnt-1].counts[mod[modcnt-1].langcnt]=t;
285
        (mod[modcnt-1].langcnt)++;
286
        }
287
    }
288
    else {
289
        mod[modcnt].name=old=p1;
290
        if(thislang>=0) {
291
        mod[modcnt].langs[0]=thislang;
292
        mod[modcnt].langcnt=1;
293
        }
294
        else mod[modcnt].langcnt=0;
295
        mod[modcnt].counts[0]=t;
296
        modcnt++;
297
    }
298
    }
10 reyssat 299
    snprintf(buf,sizeof(buf),"%s/language",outdir);
300
    langf=fopen(buf,"w");
301
    snprintf(buf,sizeof(buf),"%s/title",outdir);
302
    titf=fopen(buf,"w");
303
    snprintf(buf,sizeof(buf),"%s/description",outdir);
304
    descf=fopen(buf,"w");
305
    snprintf(buf,sizeof(buf),"%s/author",outdir);
306
    authorf=fopen(buf,"w");
307
    snprintf(buf,sizeof(buf),"%s/version",outdir);
308
    versionf=fopen(buf,"w");
309
    snprintf(buf,sizeof(buf),"%s/lists/robot.phtml",outdir);
310
    robotf=fopen(buf,"w");
311
    fclose(addrf); fclose(serialf);
312
    if(!robotf || !versionf || !authorf || !descf || !titf || !descf) {
6884 bpr 313
    fprintf(stderr,"modind: error creating output files.\n");
314
    exit(1);
10 reyssat 315
    }
316
}
317
 
318
void sprep(void)
319
{
320
    char *p1,*p2,*s;
321
    int i,l,thislang;
6881 bpr 322
 
10 reyssat 323
    modcnt=0;
324
    s=getenv("slist"); if(s==NULL) return;
325
    l=strlen(s); if(l<0 || l>100*MAX_LINELEN) return;
3718 reyssat 326
    mlist=xmalloc(l+16); ovlstrcpy(mlist,s);
10 reyssat 327
    for(p1=find_word_start(mlist); *p1 && modcnt<MAX_MODULES; p1=find_word_start(p2)) {
6884 bpr 328
    p2=find_word_end(p1);
329
    l=p2-p1; if(*p2) *p2++=0;
330
    for(i=0;i<langcnt;i++) if(strncasecmp(lang[i],p1,2)==0) break;
331
    if(i<langcnt) thislang=i; else continue;
332
    mod[modcnt].name=p1;
333
    mod[modcnt].langs[0]=thislang;
334
    mod[modcnt].langcnt=1;
335
    modcnt++;
10 reyssat 336
    }
337
}
338
 
339
void clean(void)
340
{
341
    fclose(langf); fclose(titf); fclose(descf); fclose(robotf);
342
    fclose(authorf); fclose(versionf);
343
}
344
 
345
char *sheetindex[]={
6881 bpr 346
      "title", "description",
10 reyssat 347
      "duration", "severity",
348
      "level", "domain",
6967 bpr 349
      "keywords", "reserved1", "reserved2", "information"
10 reyssat 350
};
351
#define SHEETINDEX_NO (sizeof(sheetindex)/sizeof(sheetindex[0]))
352
char sindbuf[SHEETINDEX_NO][MAX_LINELEN+1];
353
enum{s_title, s_description,
354
      s_duration, s_severity,
355
      s_level, s_domain,
356
      s_keywords, s_reserved1, s_reserved2,
6967 bpr 357
      s_information
10 reyssat 358
};
359
 
360
char *modindex[]={
6881 bpr 361
      "title", "description",
10 reyssat 362
      "author", "address", "copyright",
363
      "version", "wims_version", "language",
6881 bpr 364
      "category", "level", "domain", "keywords",
6799 bpr 365
      "keywords_ca", "keywords_en", "keywords_fr", "keywords_it", "keywords_nl",
366
      "title_ca", "title_en", "title_fr", "title_it", "title_nl",
10 reyssat 367
      "require"
368
};
369
#define MODINDEX_NO (sizeof(modindex)/sizeof(modindex[0]))
370
char indbuf[MODINDEX_NO][MAX_LINELEN+1];
371
enum{i_title, i_description,
372
      i_author,i_address,i_copyright,
373
      i_version,i_wims_version,i_language,
374
      i_category,i_level,i_domain,i_keywords,
6799 bpr 375
      i_keywords_ca,i_keywords_en,i_keywords_fr,i_keywords_it,i_keywords_nl,
376
      i_title_ca,i_title_en,i_title_fr,i_title_it,i_title_nl,
10 reyssat 377
      i_require
378
};
379
 
380
char *module_special_file[]={
381
    "intro","help","about"
382
};
383
#define MODSPEC_NO (sizeof(module_special_file)/sizeof(module_special_file[0]))
384
char module_language[4];
385
 
6884 bpr 386
/*  read and treat module's INDEX file */
10 reyssat 387
int module_index(const char *name)
388
{
389
    char *p, fbuf[MAX_LINELEN+1], ibuf[MAX_LINELEN+1];
390
    FILE *indf;
391
    int i,l;
392
 
393
    snprintf(fbuf,sizeof(fbuf),"%s/%s/INDEX",moduledir,name);
394
    indf=fopen(fbuf,"r"); if(indf==NULL) return -1;
395
    l=fread(ibuf,1,MAX_LINELEN,indf); fclose(indf);
396
    if(l>0 && l<MAX_LINELEN) ibuf[l]=0; else return -1;
6884 bpr 397
/* treate all fields in *modindex */
10 reyssat 398
    for(i=0;i<MODINDEX_NO;i++) {
6884 bpr 399
    _getdef(ibuf,modindex[i],indbuf[i]);
400
/*  compatibility precaution */
401
    if(indbuf[i][0]==':') indbuf[i][0]='.';
10 reyssat 402
    }
403
    p=find_word_start(indbuf[i_language]);
404
    if(isalpha(*p) && isalpha(*(p+1))) {
6884 bpr 405
    memmove(module_language,p,2); module_language[2]=0;
10 reyssat 406
    }
3718 reyssat 407
    else ovlstrcpy(module_language,"en");
10 reyssat 408
    return 0;
409
}
410
 
411
int sheet_index(int serial)
412
{
413
    char *p1, *p2, fbuf[MAX_LINELEN+1], ibuf[MAX_LINELEN+1];
414
    FILE *indf;
415
    int i,l;
416
 
417
    snprintf(fbuf,sizeof(fbuf),"%s/%s.def",sheetdir,mod[serial].name);
418
    indf=fopen(fbuf,"r"); if(indf==NULL) return -1;
419
    l=fread(ibuf,1,MAX_LINELEN,indf); fclose(indf);
420
    if(l>0 && l<MAX_LINELEN) ibuf[l]=0; else return -1;
421
    for(i=0;i<SHEETINDEX_NO;i++) sindbuf[i][0]=0;
422
    for(i=0,p1=find_word_start(ibuf);
6884 bpr 423
    i<SHEETINDEX_NO-1 && *p1!=':' && *p1!=0;
424
    i++,p1=p2) {
425
    p2=strchr(p1,'\n');
426
    if(p2!=NULL) *p2++=0; else p2=p1+strlen(p1);
8100 bpr 427
    p1=find_word_start(p1); strip_trailing_spaces2(p1);
6884 bpr 428
    snprintf(sindbuf[i],MAX_LINELEN,"%s",p1);
10 reyssat 429
    }
430
    p2=strstr(p1,"\n:"); if(p2==NULL) p2=p1+strlen(p1);
431
    else *p2=0;
8100 bpr 432
    p1=find_word_start(p1); strip_trailing_spaces2(p1);
10 reyssat 433
    for(p2=p1;*p2;p2++) if(*p2=='\n') *p2=' ';
6967 bpr 434
    ovlstrcpy(sindbuf[s_information],p1);
10 reyssat 435
    return 0;
436
}
437
 
438
unsigned char categories[16];
439
char taken[MAX_LINELEN+1];
440
int catcnt, takenlen, tweight;
441
 
442
void appenditem(char *word, int lind, int serial, int weight, char *l)
443
{
444
    char nbuf[MAX_LINELEN+1], buf[MAX_LINELEN+1];
445
    int i, ll;
446
    char *p;
447
    FILE *f;
6881 bpr 448
 
10 reyssat 449
    if(!isalnum(*word) || (ll=strlen(word))<2 ||
8100 bpr 450
       wordchr2(taken,word)!=NULL ||
451
       wordchr2(ignore[lind],word)!=NULL ||
10 reyssat 452
       takenlen>=MAX_LINELEN-ll-16)
453
      return;
454
    if(ll==2 && (!isdigit(word[0]) || !isalpha(word[1]))) return;
455
    for(p=word;*p;p++) if(!isalnum(*p) && *p!=' ') return;
456
    taken[takenlen++]=' '; taken[takenlen++]=' ';
3718 reyssat 457
    ovlstrcpy(taken+takenlen,word);
10 reyssat 458
    takenlen+=ll; tweight+=weight;
459
    snprintf(buf,sizeof(buf),"%s:%d?%d\n",word,serial,weight);
460
    for(i=0;i<catcnt;i++) {
6884 bpr 461
    snprintf(nbuf,sizeof(nbuf),"%s/%c.%s",
462
         outdir,categories[i],lang[lind]);
463
    f=fopen(nbuf,"a");
464
    if(f!=NULL) {fputs(buf,f); fclose(f);}
10 reyssat 465
    }
466
}
467
 
6881 bpr 468
void appenditem1 (char *buf, int lind, int serial, int weight, char *l )
469
{
470
  char *p1, *p2 ;
471
  for(p1=find_word_start(buf); *p1;
6884 bpr 472
    p1=find_word_start(p2)) {
473
    p2=strchr(p1,',');
474
    if(p2!=NULL) *p2++=0; else p2=p1+strlen(p1);
475
    if(strlen(p1)<=0) continue;
476
    appenditem(p1,lind,serial,weight,module_language);
6881 bpr 477
  }
478
}
479
void appenditem2 (char *buf, int lind, int serial, int weight, char *l )
480
{
481
  char *p1, *p2 ;
482
  for(p1=find_word_start(buf);*p1;
6884 bpr 483
    p1=find_word_start(p2)) {
484
    p2=find_word_end(p1); if(*p2) *p2++=0;
485
    appenditem(p1,lind,serial,weight,module_language);
6881 bpr 486
  }
487
}
10 reyssat 488
void onemodule(const char *name, int serial, int lind)
489
{
490
    int i;
491
    unsigned char trlist[]={
6884 bpr 492
    i_title,i_description,i_category,i_domain,i_keywords,
493
      i_require,i_author,
494
      i_keywords_ca,i_keywords_en,i_keywords_fr,i_keywords_it,i_keywords_nl,
495
      i_title_ca,i_title_en,i_title_fr,i_title_it,i_title_nl
10 reyssat 496
    };
497
    #define trcnt (sizeof(trlist)/sizeof(trlist[0]))
6564 bpr 498
    char *p1, *p2, *pp, *q, buf[MAX_LINELEN+1], lbuf[16];
10 reyssat 499
    FILE *f;
6881 bpr 500
 
10 reyssat 501
    if(module_index(name)) return;
502
    towords(indbuf[i_category]);
7915 bpr 503
/*   list the categories (among A=all,X=eXercise,O,D,...) corresponding
6884 bpr 504
 *   to this module
505
 */
10 reyssat 506
    for(i=catcnt=0;i<catno && catcnt<16;i++) {
8100 bpr 507
    if(wordchr2(indbuf[i_category],cat[i].name)!=NULL)
6884 bpr 508
      categories[catcnt++]=cat[i].typ;
10 reyssat 509
    }
510
    if(catcnt==0) return;
511
    if(categories[0]!=cat[0].typ)
512
      categories[catcnt++]=cat[0].typ;
6884 bpr 513
/*  write module's name in the category.language files, for instance lists/X.fr
514
 * for french exercises
515
 */
10 reyssat 516
    for(i=0;i<catcnt;i++) {
6884 bpr 517
    snprintf(buf,sizeof(buf),"%s/lists/%c.%s",
518
         outdir,categories[i],lang[lind]);
519
    f=fopen(buf,"a");
520
    if(f!=NULL) {fprintf(f,"%s\n",name); fclose(f);}
10 reyssat 521
    }
6884 bpr 522
/*   add serial number and language (resp.title, ...) to corresponding file  */
10 reyssat 523
    fprintf(langf,"%d:%s\n",serial,module_language);
524
    fprintf(titf,"%d:%s\n",serial,indbuf[i_title]);
525
    fprintf(descf,"%d:%s\n",serial,indbuf[i_description]);
526
    fprintf(authorf,"%d:%s\n",serial,indbuf[i_author]);
527
    fprintf(versionf,"%d:%s\n",serial,indbuf[i_version]);
6881 bpr 528
 
6884 bpr 529
/*   add module's information in html page for robots  */
10 reyssat 530
    snprintf(buf,sizeof(buf),"%s",indbuf[i_description]);
531
    for(pp=strchr(buf,','); pp; pp=strchr(pp,','))
8100 bpr 532
      string_modify3(buf,pp,pp+1,"&#44;");
10 reyssat 533
    if(strcmp(module_language,lang[lind])==0)
534
      fprintf(robotf,"%s ,%s,%s,%s,%s\n",name,module_language,name,
6884 bpr 535
          indbuf[i_title], buf);
6819 reyssat 536
 
6884 bpr 537
/*   Normalize the information of trlist, using dictionary
7915 bpr 538
 *  -- bases/sys/domain.xx without suffix translation (--> english version)
6884 bpr 539
 */
6881 bpr 540
    entrycount=dentrycount; dicbuf=ddicbuf;
541
    memmove(entry,dentry,dentrycount*sizeof(entry[0]));
542
    unknown_type=unk_leave;
10 reyssat 543
    for(i=0;i<trcnt;i++) {
6884 bpr 544
    detag(indbuf[trlist[i]]);
8100 bpr 545
    deaccent2(indbuf[trlist[i]]);
6884 bpr 546
    comma(indbuf[trlist[i]]);
8100 bpr 547
    singlespace2(indbuf[trlist[i]]);
6884 bpr 548
    translate(indbuf[trlist[i]]);
6881 bpr 549
    }
6884 bpr 550
/*   Normalize the information, using dictionary
7915 bpr 551
 *   bases/sys/words.xx with suffix translation
6884 bpr 552
 */
6881 bpr 553
    entrycount=mentrycount; dicbuf=mdicbuf;
554
    memmove(entry,mentry,mentrycount*sizeof(entry[0]));
6884 bpr 555
    unknown_type=unk_leave;/*  used in translator_.c */
6881 bpr 556
    for(i=0;i<trcnt;i++) {
6884 bpr 557
    suffix_translate(indbuf[trlist[i]]);
558
    translate(indbuf[trlist[i]]);
10 reyssat 559
    }
6881 bpr 560
 
561
/* taken contains all words already seen in the module index */
10 reyssat 562
    taken[0]=0; takenlen=tweight=0;
6881 bpr 563
/*  append words of title  */
3718 reyssat 564
    ovlstrcpy(buf,indbuf[i_title]); towords(buf);
6881 bpr 565
    appenditem2(buf,lind,serial,4,module_language);
566
 
6884 bpr 567
/*  extract words of every other information except level */
6799 bpr 568
    snprintf(buf,sizeof(buf),"%s %s %s %s %s %s %s %s %s %s %s %s %s %s %s",
6884 bpr 569
         indbuf[i_description],indbuf[i_keywords],
570
         indbuf[i_keywords_ca],indbuf[i_keywords_en],indbuf[i_keywords_fr],
571
         indbuf[i_keywords_it],indbuf[i_keywords_nl],
572
         indbuf[i_title_ca],indbuf[i_title_en],indbuf[i_title_fr],
573
         indbuf[i_title_it],indbuf[i_title_nl],
574
         indbuf[i_domain],indbuf[i_require],indbuf[i_author]);
10 reyssat 575
    towords(buf);
6884 bpr 576
    appenditem2(buf,lind,serial,2,module_language);
6881 bpr 577
 
6884 bpr 578
/*   this time the dictionary is the group dictionary  sys/wgrp/wgrp
579
 *   with a g (groupdic), not an m (maindic) . see below main, suffix, group.
7915 bpr 580
 *   and delete unknown ?? and translate
6884 bpr 581
 */
10 reyssat 582
    entrycount=gentrycount; dicbuf=gdicbuf;
583
    memmove(entry,gentry,gentrycount*sizeof(entry[0]));
6881 bpr 584
 
6884 bpr 585
/*  append words of every title information  */
6881 bpr 586
    ovlstrcpy(buf,indbuf[i_title]);
10 reyssat 587
    unknown_type=unk_delete;
6881 bpr 588
    translate(buf);
589
    appenditem1(buf,lind,serial,2,module_language);
590
 
6884 bpr 591
/*  append words of information of description except level  */
6881 bpr 592
    snprintf(buf,sizeof(buf),"%s", indbuf[i_description]);
593
    unknown_type=unk_delete;
594
    translate(buf);
595
    appenditem1(buf,lind,serial,4,module_language);
596
 
6884 bpr 597
/*  append words (or group of words) of keywords and domain  */
6881 bpr 598
    snprintf(buf,sizeof(buf),"%s, %s, %s, %s, %s, %s, %s",
6884 bpr 599
         indbuf[i_domain],indbuf[i_keywords],
600
         indbuf[i_keywords_ca], indbuf[i_keywords_en],indbuf[i_keywords_fr],
601
         indbuf[i_keywords_it], indbuf[i_keywords_nl]);
602
    unknown_type=unk_leave;
10 reyssat 603
    translate(buf);
6881 bpr 604
    appenditem1(buf,lind,serial,2,module_language);
605
 
6884 bpr 606
/*   append level information, with weight 2 */
10 reyssat 607
    snprintf(buf,sizeof(buf),"%s",indbuf[i_level]);
3718 reyssat 608
    ovlstrcpy(lbuf,"level");
10 reyssat 609
    for(p1=buf; *p1; p1++) if(!isalnum(*p1)) *p1=' ';
6564 bpr 610
    q=buf+strlen(buf);
611
    for(p1=find_word_start(buf); (*p1) && (p1 < q) ;
6884 bpr 612
    p1=find_word_start(p2)) {
613
    p2=find_word_end(p1);
614
    if(p2!=NULL) *p2++=0; else p2=p1+strlen(p1);
615
    if(!isalpha(*p1) ||
616
       (!isdigit(*(p1+1)) && *(p1+1)!=0) ||
617
       (*(p1+1)!=0 && *(p1+2)!=0))
618
      continue;
619
    *p1=tolower(*p1);
620
    ovlstrcpy(lbuf+strlen("level"),p1);
621
    appenditem(lbuf,lind,serial,2,module_language);
10 reyssat 622
    }
6884 bpr 623
/*   append total weight of module to weight file site2/weight.xx  */
10 reyssat 624
    fprintf(weightf,"%d:%d\n",serial,tweight);
625
}
626
 
627
void modules(void)
628
{
629
    int i,j,k,d;
630
    char namebuf[MAX_LINELEN+1];
6881 bpr 631
    char mdic[MAX_LINELEN+1], sdic[MAX_LINELEN+1], gdic[MAX_LINELEN+1], ddic[MAX_LINELEN+1];
10 reyssat 632
 
633
    for(j=0;j<langcnt;j++) {
6884 bpr 634
    snprintf(namebuf,sizeof(namebuf),"%s/weight.%s",outdir,lang[j]);
635
    weightf=fopen(namebuf,"w");
636
    snprintf(mdic,sizeof(mdic),"%s/%s.%s",dicdir,maindic,lang[j]);
637
    snprintf(sdic,sizeof(sdic),"%s/%s.%s",dicdir,suffixdic,lang[j]);
638
    snprintf(gdic,sizeof(gdic),"%s/%s.%s",dicdir,groupdic,lang[j]);
639
    snprintf(ddic,sizeof(ddic),"%s/%s.%s",dicdir,domaindic,lang[j]);
640
    suffix_dic(sdic); prepare_dic(gdic);
641
    gdicbuf=dicbuf; gentrycount=entrycount;
642
    memmove(gentry,entry,gentrycount*sizeof(entry[0]));
643
    prepare_dic(mdic);
644
    mdicbuf=dicbuf; mentrycount=entrycount;
645
    memmove(mentry,entry,mentrycount*sizeof(entry[0]));
646
    prepare_dic(ddic);
647
    ddicbuf=dicbuf; dentrycount=entrycount;
648
    memmove(dentry,entry,dentrycount*sizeof(entry[0]));
649
    unknown_type=unk_leave; translate(ignore[j]);
650
    for(i=0;i<modcnt;i++) {
651
        if(mod[i].langcnt>0) {
652
        for(d=k=0;k<mod[i].langcnt;k++)
653
          if(mod[i].langs[k]<mod[i].langs[d]) d=k;
654
        for(k=0;k<mod[i].langcnt && mod[i].langs[k]!=j;k++);
655
        if(k>=mod[i].langcnt) k=d;
656
        snprintf(namebuf,MAX_LINELEN,"%s.%s",mod[i].name,
657
             lang[mod[i].langs[k]]);
658
        onemodule(namebuf,mod[i].counts[k],j);
659
        }
660
        else {
661
        onemodule(mod[i].name,mod[i].counts[0],j);
662
        }
10 reyssat 663
    }
6884 bpr 664
    if(mentrycount>0) free(mdicbuf);
665
    if(gentrycount>0) free(gdicbuf);
666
    if(suffixcnt>0) free(sufbuf);
667
    if(dentrycount>0) free(ddicbuf);
668
    if(weightf) fclose(weightf);
669
    }
10 reyssat 670
}
671
 
6881 bpr 672
/* FIXME ? differences with appenditem - use fprintf instead of  snprintf */
10 reyssat 673
void sappenditem(char *word, int lind, int serial, int weight)
674
{
675
    int ll;
676
    char *p;
6881 bpr 677
 
10 reyssat 678
    if(!isalnum(*word) || (ll=strlen(word))<2 ||
8100 bpr 679
       wordchr2(taken,word)!=NULL ||
680
       wordchr2(ignore[lind],word)!=NULL ||
10 reyssat 681
       takenlen>=MAX_LINELEN-ll-16)
682
      return;
683
    if(ll==2 && (!isdigit(word[0]) || !isalpha(word[1]))) return;
684
    for(p=word;*p;p++) if(!isalnum(*p) && *p!=' ') return;
685
    taken[takenlen++]=' ';taken[takenlen++]=' ';
3718 reyssat 686
    ovlstrcpy(taken+takenlen,word);
10 reyssat 687
    takenlen+=ll; tweight+=weight;
688
    fprintf(indf,"%s:%d?%d\n",word,serial,weight);
689
}
690
 
691
void onesheet(int serial, int lind)
692
{
693
    int i;
694
    unsigned char trlist[]={
6967 bpr 695
    s_title,s_description,s_domain,s_keywords,s_information
10 reyssat 696
    };
697
    #define trcnt (sizeof(trlist)/sizeof(trlist[0]))
698
    char *p1, *p2, buf[MAX_LINELEN+1];
6881 bpr 699
 
10 reyssat 700
    if(sheet_index(serial)) return;
701
    fprintf(listf,"%s\n",mod[serial].name+3);
702
    fprintf(titf,"%d:%s\n",serial,sindbuf[s_title]);
703
    fprintf(descf,"%d:%s\n",serial,sindbuf[s_description]);
6967 bpr 704
    fprintf(remf,"%d:%s\n",serial,sindbuf[s_information]);
7915 bpr 705
 
6881 bpr 706
    entrycount=dentrycount; dicbuf=ddicbuf;
707
    memmove(entry,dentry,dentrycount*sizeof(entry[0]));
10 reyssat 708
    unknown_type=unk_leave;
709
    for(i=0;i<trcnt;i++) {
6884 bpr 710
    detag(sindbuf[trlist[i]]);
8100 bpr 711
    deaccent2(sindbuf[trlist[i]]);
6884 bpr 712
    comma(sindbuf[trlist[i]]);
8100 bpr 713
    singlespace2(sindbuf[trlist[i]]);
6884 bpr 714
    translate(sindbuf[trlist[i]]);
6881 bpr 715
    }
7915 bpr 716
 
6881 bpr 717
    entrycount=mentrycount; dicbuf=mdicbuf;
718
    memmove(entry,mentry,mentrycount*sizeof(entry[0]));
719
    unknown_type=unk_leave;
720
    for(i=0;i<trcnt;i++) {
6884 bpr 721
    suffix_translate(sindbuf[trlist[i]]);
722
    translate(sindbuf[trlist[i]]);
10 reyssat 723
    }
724
    taken[0]=0; takenlen=tweight=0;
3718 reyssat 725
    ovlstrcpy(buf,sindbuf[s_title]); towords(buf);
10 reyssat 726
    for(p1=find_word_start(buf);*p1;
6884 bpr 727
    p1=find_word_start(p2)) {
728
    p2=find_word_end(p1); if(*p2) *p2++=0;
729
    sappenditem(p1,lind,serial,4);
10 reyssat 730
    }
731
    snprintf(buf,sizeof(buf),"%s %s %s %s",
6884 bpr 732
         sindbuf[s_description],sindbuf[s_keywords],
6967 bpr 733
         sindbuf[s_domain],sindbuf[s_information]);
10 reyssat 734
    towords(buf);
735
    for(p1=find_word_start(buf);*p1;
6884 bpr 736
    p1=find_word_start(p2)) {
737
    p2=find_word_end(p1); if(*p2) *p2++=0;
738
    sappenditem(p1,lind,serial,2);
10 reyssat 739
    }
740
    entrycount=gentrycount; dicbuf=gdicbuf;
741
    memmove(entry,gentry,gentrycount*sizeof(entry[0]));
742
    unknown_type=unk_delete;
3718 reyssat 743
    ovlstrcpy(buf,sindbuf[s_title]); translate(buf);
10 reyssat 744
    for(p1=find_word_start(buf); *p1;
6884 bpr 745
    p1=find_word_start(p2)) {
746
    p2=strchr(p1,',');
747
    if(p2!=NULL) *p2++=0; else p2=p1+strlen(p1);
748
    if(strlen(p1)<=0) continue;
749
    sappenditem(p1,lind,serial,4);
10 reyssat 750
    }
751
    snprintf(buf,sizeof(buf),"%s, %s, %s, %s",
6884 bpr 752
         sindbuf[s_description],sindbuf[s_keywords],
6967 bpr 753
         sindbuf[s_domain],sindbuf[s_information]);
10 reyssat 754
    translate(buf);
755
    for(p1=find_word_start(buf); *p1;
6884 bpr 756
    p1=find_word_start(p2)) {
757
    p2=strchr(p1,',');
758
    if(p2!=NULL) *p2++=0; else p2=p1+strlen(p1);
759
    if(strlen(p1)<=0) continue;
760
    sappenditem(p1,lind,serial,2);
10 reyssat 761
    }
762
    fprintf(weightf,"%d:%d\n",serial,tweight);
763
}
764
 
765
void sheets(void)
766
{
767
    int i,j;
6961 bpr 768
    char mdic[MAX_LINELEN+1], sdic[MAX_LINELEN+1], gdic[MAX_LINELEN+1], ddic[MAX_LINELEN+1];
10 reyssat 769
    char buf[MAX_LINELEN+1];
7915 bpr 770
 
10 reyssat 771
    for(j=0;j<langcnt;j++) {
6884 bpr 772
    snprintf(buf,sizeof(buf),"%s/index/title.%s",sheetdir,lang[j]);
773
    titf=fopen(buf,"w");
774
    snprintf(buf,sizeof(buf),"%s/index/description.%s",sheetdir,lang[j]);
775
    descf=fopen(buf,"w");
776
    snprintf(buf,sizeof(buf),"%s/index/%s",sheetdir,lang[j]);
777
    indf=fopen(buf,"w");
778
    snprintf(buf,sizeof(buf),"%s/index/list.%s",sheetdir,lang[j]);
779
    listf=fopen(buf,"w");
780
    snprintf(buf,sizeof(buf),"%s/index/weight.%s",sheetdir,lang[j]);
781
    weightf=fopen(buf,"w");
782
    snprintf(buf,sizeof(buf),"%s/index/addr.%s",sheetdir,lang[j]);
783
    addrf=fopen(buf,"w");
6967 bpr 784
    snprintf(buf,sizeof(buf),"%s/index/information.%s",sheetdir,lang[j]);
6961 bpr 785
    remf=fopen(buf,"w");
6884 bpr 786
    snprintf(buf,sizeof(buf),"%s/index/serial.%s",sheetdir,lang[j]);
787
    serialf=fopen(buf,"w");
788
    snprintf(mdic,sizeof(mdic),"%s/%s.%s",dicdir,maindic,lang[j]);
789
    snprintf(sdic,sizeof(sdic),"%s/%s.%s",dicdir,suffixdic,lang[j]);
790
    snprintf(gdic,sizeof(gdic),"%s/%s.%s",dicdir,groupdic,lang[j]);
6961 bpr 791
    snprintf(ddic,sizeof(ddic),"%s/%s.%s",dicdir,domaindic,lang[j]);
6884 bpr 792
    suffix_dic(sdic); prepare_dic(gdic);
793
    gdicbuf=dicbuf; gentrycount=entrycount;
794
    memmove(gentry,entry,gentrycount*sizeof(entry[0]));
795
    prepare_dic(mdic);
796
    mdicbuf=dicbuf; mentrycount=entrycount;
797
    memmove(mentry,entry,mentrycount*sizeof(entry[0]));
6973 bpr 798
    prepare_dic(ddic);
799
    ddicbuf=dicbuf; dentrycount=entrycount;
800
    memmove(dentry,entry,dentrycount*sizeof(entry[0]));
6884 bpr 801
    unknown_type=unk_leave; translate(ignore[j]);
802
    for(i=0;i<modcnt;i++) {
803
        if(mod[i].langs[0]!=j) continue;
804
        fprintf(addrf,"%d:%s\n",i,mod[i].name+3);
805
        fprintf(serialf,"%s:%d\n",mod[i].name+3,i);
806
        onesheet(i,j);
10 reyssat 807
    }
6884 bpr 808
    if(mentrycount>0) free(mdicbuf);
809
    if(gentrycount>0) free(gdicbuf);
810
    if(suffixcnt>0) free(sufbuf);
6961 bpr 811
    if(dentrycount>0) free(ddicbuf);
6884 bpr 812
    fclose(titf); fclose(descf); fclose(indf); fclose(listf);
813
    fclose(weightf); fclose(addrf); fclose(serialf);
814
    }
10 reyssat 815
}
816
 
817
int main()
818
{
8123 bpr 819
    gentry=xmalloc(entry_size);
820
    dentry=xmalloc(entry_size);
821
    mentry=xmalloc(entry_size);
10 reyssat 822
    prep();
823
    if(modcnt>0) modules();
824
    clean();
825
    sprep();
826
    if(modcnt>0) sheets();
827
    return 0;
828
}
829