Subversion Repositories wimsdev

Rev

Rev 8659 | Rev 9090 | Go to most recent revision | Details | Compare with Previous | Last modification | View Log | RSS feed

Rev Author Line No. Line
10 reyssat 1
/*    Copyright (C) 1998-2003 XIAO, Gang of Universite de Nice - Sophia Antipolis
2
 *
3
 *  This program is free software; you can redistribute it and/or modify
4
 *  it under the terms of the GNU General Public License as published by
5
 *  the Free Software Foundation; either version 2 of the License, or
6
 *  (at your option) any later version.
7
 *
8
 *  This program is distributed in the hope that it will be useful,
9
 *  but WITHOUT ANY WARRANTY; without even the implied warranty of
10
 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
11
 *  GNU General Public License for more details.
12
 *
13
 *  You should have received a copy of the GNU General Public License
14
 *  along with this program; if not, write to the Free Software
15
 *  Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
16
 */
17
 
6884 bpr 18
/*  This is an internal program,
7915 bpr 19
 * used to index modules for search engine.
6884 bpr 20
 */
10 reyssat 21
 
8100 bpr 22
#include "../Lib/libwims.h"
8123 bpr 23
#include "translator_.h"
24
#include "suffix.h"
10 reyssat 25
 
6884 bpr 26
#define MAX_LANGS    MAX_LANGUAGES
27
#define MAX_MODULES    65536
28
char *moduledir=    "public_html/modules";
29
char *sheetdir=     "public_html/bases/sheet";
30
char *dicdir=       "public_html/bases";
31
char *outdir=       "public_html/bases/site2";
32
char *maindic=      "sys/words";
33
char *groupdic=     "sys/wgrp/wgrp";
34
char *suffixdic=    "sys/suffix";
35
char *domaindic=    "sys/domaindic";
36
char *ignoredic=    "sys/indignore";
37
char *conffile=     "log/wims.conf";
38
char *mlistbase=    "list";
10 reyssat 39
 
40
char lang[MAX_LANGS][4]={
1792 bpr 41
    "en","fr","cn","es","it","nl","si","ca","pt"
10 reyssat 42
};
6884 bpr 43
#define DEFAULT_LANGCNT    6
10 reyssat 44
char allang[MAX_LANGS][4]={
6564 bpr 45
    "en","fr","cn","es","it","nl","de","si","ca","pt"
10 reyssat 46
};
47
#define allangcnt 8
48
char ignore[MAX_LANGS][MAX_LINELEN+1];
49
char mlistfile[MAX_LANGS][256];
50
int langcnt;
8650 bpr 51
FILE *langf, *titf, *descf, *weightf, *robotf, *indf, *listf, *addrf, *serialf, *authorf, *versionf, *remf, *titlef;
10 reyssat 52
 
53
struct cat {
54
    char *name;
55
    char typ;
56
} cat[]={
6884 bpr 57
    {"all_types", 'A'},
58
    {"exercise",  'X'},
59
    {"oef",       'O'},
60
    {"tool",      'T'},
61
    {"recreation",'R'},
62
    {"reference", 'Y'},
63
    {"document",  'D'},
64
    {"popup",     'P'},
65
    {"datamodule",'M'}
10 reyssat 66
};
67
#define catno (sizeof(cat)/sizeof(cat[0]))
68
 
69
struct mod {
70
    char *name;
71
    unsigned char langs[MAX_LANGS];
72
    int counts[MAX_LANGS];
73
    int  langcnt;
74
} mod[MAX_MODULES];
75
int modcnt;
76
 
77
char *mlist;
78
 
6884 bpr 79
/*  fold known accented letters to unaccented, other strange characters to space
7915 bpr 80
 *  apostrophe is among the exceptions to be kept (important for multi-word expressions)
6884 bpr 81
 */
8100 bpr 82
void deaccent2(char *p)
10 reyssat 83
{
3247 bpr 84
    char *sp;
10 reyssat 85
    char *v;
86
    for(sp=p;*sp;sp++) {
6884 bpr 87
    if(*sp<0 && (v=strchr(acctab,*sp))!=NULL)
88
      *sp=*(deatab+(v-acctab));
89
    if(!isalnum(*sp) && strchr(",.&$+*",*sp)==0) *sp=' ';
90
    else *sp=tolower(*sp);
10 reyssat 91
    }
92
}
93
 
6884 bpr 94
/*  translate everything non-alphanumeric into space */
10 reyssat 95
void towords(char *p)
96
{
97
    char *pp;
98
    for(pp=p;*pp;pp++) if(!isalnum(*pp) && strchr("&$+*",*pp)==0) *pp=' ';
99
}
100
 
6884 bpr 101
/*  Find first occurrence of word */
8100 bpr 102
char *wordchr2(char *p, char *w)
10 reyssat 103
{
104
    char *r;
105
 
6881 bpr 106
    for(r=strstr(p,w);r!=NULL &&
6884 bpr 107
    ( (r>p && !isspace(*(r-1))) || (!isspace(*(r+strlen(w))) && *(r+strlen(w))!=0) );
108
    r=strstr(r+1,w));
10 reyssat 109
    return r;
110
}
111
 
112
char *find_tag_end(char *p)
113
{
114
    char *pp;
115
    pp=p; if(*pp=='<') pp++;
116
    for(; *pp && *pp!='>'; pp++) {
6884 bpr 117
    if(*pp=='<') {
118
        pp=find_tag_end(pp)-1; continue;
10 reyssat 119
    }
6884 bpr 120
    if(*pp=='"') {
121
        pp=strchr(pp+1,'"');
122
        if(pp==NULL) return p+strlen(p); else continue;
123
    }
124
    if(*pp=='\'') {
125
        pp=strchr(pp+1,'\'');
126
        if(pp==NULL) return p+strlen(p); else continue;
127
    }
128
    }
10 reyssat 129
    if(*pp=='>') pp++; return pp;
130
}
131
 
132
char *find_tag(char *p, char *tag)
133
{
134
    char *pp;
135
    int len;
136
    len=strlen(tag);
137
    for(pp=strchr(p,'<'); pp!=NULL && *pp; pp=strchr(pp+1,'<')) {
6884 bpr 138
    if(strncasecmp(pp+1,tag,len)==0 && !isalnum(*(pp+1+len))) return pp;
10 reyssat 139
    }
140
    return p+strlen(p);
141
}
142
 
6884 bpr 143
/*  remove all html tags */
10 reyssat 144
void detag(char *p)
145
{
146
    char *pp, *p2;
147
    for(pp=strchr(p,'<'); pp!=NULL; pp=strchr(pp,'<')) {
6884 bpr 148
    p2=find_tag_end(pp);
149
    if(*p2==0) {*pp=0; return; }
150
    ovlstrcpy(pp,p2);
10 reyssat 151
    }
152
}
153
 
6819 reyssat 154
/* add a space after comma to see end of words */
155
 
156
void comma(char *p)
157
{
158
    char *pp;
159
    for(pp=strchr(p,','); pp; pp=strchr(pp+1,','))
8100 bpr 160
      string_modify3(p,pp,pp+1,", ");
6819 reyssat 161
}
162
 
10 reyssat 163
void _getdef(char buf[], char *name, char value[])
164
{
165
    char *p1, *p2, *p3;
166
 
167
    value[0]=0;
168
    for(p1=strstr(buf,name); p1!=NULL; p1=strstr(p1+1,name)) {
6884 bpr 169
    p2=find_word_start(p1+strlen(name));
170
    if((p1>buf && !isspace(*(p1-1))) || *p2!='=') continue;
171
    p3=p1; while(p3>buf && isspace(*(p3-1)) && *(p3-1)!='\n') p3--;
172
    if(p3>buf && *(p3-1)!='\n') continue;
173
    p3=strchr(p2,'\n');
174
    p2=find_word_start(p2+1);
175
    if(p3 <= p2) continue;
176
    snprintf(value,MAX_LINELEN,"%s",p2);
177
    if(p3!=NULL && p3-p2<MAX_LINELEN) value[p3-p2]=0;
8100 bpr 178
    strip_trailing_spaces2(value);
6884 bpr 179
    break;
10 reyssat 180
    }
181
}
182
 
6884 bpr 183
/*  Get variable definition from a file.
184
 * Result stored in buffer value of length MAX_LINELEN.
185
 */
10 reyssat 186
void getdef(char *fname, char *name, char value[])
187
{
188
    FILE *f;
189
    char *buf;
190
    int l;
6881 bpr 191
 
10 reyssat 192
    value[0]=0;
193
    f=fopen(fname,"r"); if(f==NULL) return;
194
    fseek(f,0,SEEK_END); l=ftell(f); fseek(f,0,SEEK_SET);
195
    buf=xmalloc(l+256); l=fread(buf,1,l,f);
196
    fclose(f);
197
    if(l<=0) return; else buf[l]=0;
198
    _getdef(buf,name,value);
199
    free(buf);
200
}
201
 
8123 bpr 202
char *mdicbuf, *gdicbuf, *ddicbuf, *gentry, *mentry, *dentry;
10 reyssat 203
 
6881 bpr 204
int gentrycount, mentrycount, dentrycount;
10 reyssat 205
 
6884 bpr 206
/*  Preparation of data */
10 reyssat 207
void prep(void)
208
{
209
    char buf[MAX_LINELEN+1];
210
    char *p1,*p2,*s,*old;
211
    int i,l,thislang,t;
212
    FILE *f;
6881 bpr 213
 
10 reyssat 214
    s=getenv("modind_outdir"); if(s!=NULL && *s!=0) outdir=s;
215
    s=getenv("modind_sheetdir"); if(s!=NULL && *s!=0) sheetdir=s;
216
    snprintf(buf,sizeof(buf),"%s/addr",outdir);
217
    addrf=fopen(buf,"w");
218
    snprintf(buf,sizeof(buf),"%s/serial",outdir);
219
    serialf=fopen(buf,"w");
220
    modcnt=langcnt=0;
6884 bpr 221
/* take the langs declared in conffile */
10 reyssat 222
    getdef(conffile,"site_languages",buf);
223
    for(p1=buf;*p1;p1++) if(!isalnum(*p1)) *p1=' ';
224
    for(p1=find_word_start(buf); *p1 && langcnt<MAX_LANGS; p1=find_word_start(p2)) {
6884 bpr 225
    p2=find_word_end(p1);
226
    if(p2!=p1+2 || !isalpha(*p1) || !isalpha(*(p1+1))) continue;
227
    memmove(lang[langcnt],p1,2); lang[langcnt++][2]=0;
10 reyssat 228
    }
6884 bpr 229
    if(langcnt==0) {/*  default languages */
230
    langcnt=DEFAULT_LANGCNT;
10 reyssat 231
    }
232
    s=getenv("mlist"); if(s==NULL) exit(1);
233
    l=strlen(s); if(l<0 || l>100*MAX_LINELEN) exit(1);
3718 reyssat 234
    mlist=xmalloc(l+16); ovlstrcpy(mlist,s); old="";
10 reyssat 235
    for(i=0;i<langcnt;i++) {
6884 bpr 236
    snprintf(buf,sizeof(buf),"%s/%s.%s",dicdir,ignoredic,lang[i]);
237
    f=fopen(buf,"r"); if(f==NULL) continue;
238
    l=fread(ignore[i],1,MAX_LINELEN,f);fclose(f);
239
    if(l<0 || l>=MAX_LINELEN) l=0;
240
    ignore[i][l]=0;
10 reyssat 241
    }
242
    for(t=0, p1=find_word_start(mlist);
6884 bpr 243
    *p1 && modcnt<MAX_MODULES;
244
    p1=find_word_start(p2), t++) {
245
    p2=find_word_end(p1);
246
    l=p2-p1; if(*p2) *p2++=0;
247
    fprintf(addrf,"%d:%s\n",t,p1);
248
    fprintf(serialf,"%s:%d\n",p1,t);
249
    thislang=-1;
6564 bpr 250
/* language is taken from the address */
6884 bpr 251
    if(l>3 && p1[l-3]=='.') {
252
        for(i=0;i<langcnt;i++) if(strcasecmp(lang[i],p1+l-2)==0) break;
253
        if(i<langcnt) {p1[l-3]=0; thislang=i;}
254
        else {/*  unknown language, not referenced */
255
        continue;
256
        }
10 reyssat 257
    }
6884 bpr 258
    if(modcnt>0 && strcmp(old,p1)==0 && thislang>=0) {
259
        if(mod[modcnt-1].langcnt<langcnt) {
260
        mod[modcnt-1].langs[mod[modcnt-1].langcnt]=thislang;
261
        mod[modcnt-1].counts[mod[modcnt-1].langcnt]=t;
262
        (mod[modcnt-1].langcnt)++;
263
        }
264
    }
265
    else {
266
        mod[modcnt].name=old=p1;
267
        if(thislang>=0) {
268
        mod[modcnt].langs[0]=thislang;
269
        mod[modcnt].langcnt=1;
270
        }
271
        else mod[modcnt].langcnt=0;
272
        mod[modcnt].counts[0]=t;
273
        modcnt++;
274
    }
275
    }
10 reyssat 276
    snprintf(buf,sizeof(buf),"%s/language",outdir);
277
    langf=fopen(buf,"w");
278
    snprintf(buf,sizeof(buf),"%s/title",outdir);
279
    titf=fopen(buf,"w");
280
    snprintf(buf,sizeof(buf),"%s/description",outdir);
281
    descf=fopen(buf,"w");
282
    snprintf(buf,sizeof(buf),"%s/author",outdir);
283
    authorf=fopen(buf,"w");
284
    snprintf(buf,sizeof(buf),"%s/version",outdir);
285
    versionf=fopen(buf,"w");
286
    snprintf(buf,sizeof(buf),"%s/lists/robot.phtml",outdir);
287
    robotf=fopen(buf,"w");
288
    fclose(addrf); fclose(serialf);
289
    if(!robotf || !versionf || !authorf || !descf || !titf || !descf) {
6884 bpr 290
    fprintf(stderr,"modind: error creating output files.\n");
291
    exit(1);
10 reyssat 292
    }
293
}
294
 
295
void sprep(void)
296
{
297
    char *p1,*p2,*s;
298
    int i,l,thislang;
6881 bpr 299
 
10 reyssat 300
    modcnt=0;
301
    s=getenv("slist"); if(s==NULL) return;
302
    l=strlen(s); if(l<0 || l>100*MAX_LINELEN) return;
3718 reyssat 303
    mlist=xmalloc(l+16); ovlstrcpy(mlist,s);
10 reyssat 304
    for(p1=find_word_start(mlist); *p1 && modcnt<MAX_MODULES; p1=find_word_start(p2)) {
6884 bpr 305
    p2=find_word_end(p1);
306
    l=p2-p1; if(*p2) *p2++=0;
307
    for(i=0;i<langcnt;i++) if(strncasecmp(lang[i],p1,2)==0) break;
308
    if(i<langcnt) thislang=i; else continue;
309
    mod[modcnt].name=p1;
310
    mod[modcnt].langs[0]=thislang;
311
    mod[modcnt].langcnt=1;
312
    modcnt++;
10 reyssat 313
    }
314
}
315
 
316
void clean(void)
317
{
318
    fclose(langf); fclose(titf); fclose(descf); fclose(robotf);
319
    fclose(authorf); fclose(versionf);
320
}
321
 
322
char *sheetindex[]={
6881 bpr 323
      "title", "description",
10 reyssat 324
      "duration", "severity",
325
      "level", "domain",
6967 bpr 326
      "keywords", "reserved1", "reserved2", "information"
10 reyssat 327
};
328
#define SHEETINDEX_NO (sizeof(sheetindex)/sizeof(sheetindex[0]))
329
char sindbuf[SHEETINDEX_NO][MAX_LINELEN+1];
330
enum{s_title, s_description,
331
      s_duration, s_severity,
332
      s_level, s_domain,
333
      s_keywords, s_reserved1, s_reserved2,
6967 bpr 334
      s_information
10 reyssat 335
};
336
 
337
char *modindex[]={
6881 bpr 338
      "title", "description",
10 reyssat 339
      "author", "address", "copyright",
340
      "version", "wims_version", "language",
6881 bpr 341
      "category", "level", "domain", "keywords",
6799 bpr 342
      "keywords_ca", "keywords_en", "keywords_fr", "keywords_it", "keywords_nl",
343
      "title_ca", "title_en", "title_fr", "title_it", "title_nl",
10 reyssat 344
      "require"
345
};
346
#define MODINDEX_NO (sizeof(modindex)/sizeof(modindex[0]))
347
char indbuf[MODINDEX_NO][MAX_LINELEN+1];
348
enum{i_title, i_description,
349
      i_author,i_address,i_copyright,
350
      i_version,i_wims_version,i_language,
351
      i_category,i_level,i_domain,i_keywords,
6799 bpr 352
      i_keywords_ca,i_keywords_en,i_keywords_fr,i_keywords_it,i_keywords_nl,
353
      i_title_ca,i_title_en,i_title_fr,i_title_it,i_title_nl,
10 reyssat 354
      i_require
355
};
356
 
357
char *module_special_file[]={
358
    "intro","help","about"
359
};
360
#define MODSPEC_NO (sizeof(module_special_file)/sizeof(module_special_file[0]))
361
char module_language[4];
362
 
6884 bpr 363
/*  read and treat module's INDEX file */
10 reyssat 364
int module_index(const char *name)
365
{
366
    char *p, fbuf[MAX_LINELEN+1], ibuf[MAX_LINELEN+1];
367
    FILE *indf;
368
    int i,l;
369
 
370
    snprintf(fbuf,sizeof(fbuf),"%s/%s/INDEX",moduledir,name);
371
    indf=fopen(fbuf,"r"); if(indf==NULL) return -1;
372
    l=fread(ibuf,1,MAX_LINELEN,indf); fclose(indf);
373
    if(l>0 && l<MAX_LINELEN) ibuf[l]=0; else return -1;
6884 bpr 374
/* treate all fields in *modindex */
10 reyssat 375
    for(i=0;i<MODINDEX_NO;i++) {
6884 bpr 376
    _getdef(ibuf,modindex[i],indbuf[i]);
377
/*  compatibility precaution */
378
    if(indbuf[i][0]==':') indbuf[i][0]='.';
10 reyssat 379
    }
380
    p=find_word_start(indbuf[i_language]);
381
    if(isalpha(*p) && isalpha(*(p+1))) {
6884 bpr 382
    memmove(module_language,p,2); module_language[2]=0;
10 reyssat 383
    }
3718 reyssat 384
    else ovlstrcpy(module_language,"en");
10 reyssat 385
    return 0;
386
}
387
 
388
int sheet_index(int serial)
389
{
390
    char *p1, *p2, fbuf[MAX_LINELEN+1], ibuf[MAX_LINELEN+1];
391
    FILE *indf;
392
    int i,l;
393
 
394
    snprintf(fbuf,sizeof(fbuf),"%s/%s.def",sheetdir,mod[serial].name);
395
    indf=fopen(fbuf,"r"); if(indf==NULL) return -1;
396
    l=fread(ibuf,1,MAX_LINELEN,indf); fclose(indf);
397
    if(l>0 && l<MAX_LINELEN) ibuf[l]=0; else return -1;
398
    for(i=0;i<SHEETINDEX_NO;i++) sindbuf[i][0]=0;
399
    for(i=0,p1=find_word_start(ibuf);
6884 bpr 400
    i<SHEETINDEX_NO-1 && *p1!=':' && *p1!=0;
401
    i++,p1=p2) {
402
    p2=strchr(p1,'\n');
403
    if(p2!=NULL) *p2++=0; else p2=p1+strlen(p1);
8100 bpr 404
    p1=find_word_start(p1); strip_trailing_spaces2(p1);
6884 bpr 405
    snprintf(sindbuf[i],MAX_LINELEN,"%s",p1);
10 reyssat 406
    }
407
    p2=strstr(p1,"\n:"); if(p2==NULL) p2=p1+strlen(p1);
408
    else *p2=0;
8100 bpr 409
    p1=find_word_start(p1); strip_trailing_spaces2(p1);
10 reyssat 410
    for(p2=p1;*p2;p2++) if(*p2=='\n') *p2=' ';
6967 bpr 411
    ovlstrcpy(sindbuf[s_information],p1);
10 reyssat 412
    return 0;
413
}
414
 
415
unsigned char categories[16];
416
char taken[MAX_LINELEN+1];
417
int catcnt, takenlen, tweight;
418
 
419
void appenditem(char *word, int lind, int serial, int weight, char *l)
420
{
421
    char nbuf[MAX_LINELEN+1], buf[MAX_LINELEN+1];
422
    int i, ll;
423
    char *p;
424
    FILE *f;
6881 bpr 425
 
10 reyssat 426
    if(!isalnum(*word) || (ll=strlen(word))<2 ||
8100 bpr 427
       wordchr2(taken,word)!=NULL ||
428
       wordchr2(ignore[lind],word)!=NULL ||
10 reyssat 429
       takenlen>=MAX_LINELEN-ll-16)
430
      return;
431
    if(ll==2 && (!isdigit(word[0]) || !isalpha(word[1]))) return;
432
    for(p=word;*p;p++) if(!isalnum(*p) && *p!=' ') return;
433
    taken[takenlen++]=' '; taken[takenlen++]=' ';
3718 reyssat 434
    ovlstrcpy(taken+takenlen,word);
10 reyssat 435
    takenlen+=ll; tweight+=weight;
436
    snprintf(buf,sizeof(buf),"%s:%d?%d\n",word,serial,weight);
437
    for(i=0;i<catcnt;i++) {
6884 bpr 438
    snprintf(nbuf,sizeof(nbuf),"%s/%c.%s",
439
         outdir,categories[i],lang[lind]);
440
    f=fopen(nbuf,"a");
441
    if(f!=NULL) {fputs(buf,f); fclose(f);}
10 reyssat 442
    }
443
}
444
 
6881 bpr 445
void appenditem1 (char *buf, int lind, int serial, int weight, char *l )
446
{
447
  char *p1, *p2 ;
448
  for(p1=find_word_start(buf); *p1;
6884 bpr 449
    p1=find_word_start(p2)) {
450
    p2=strchr(p1,',');
451
    if(p2!=NULL) *p2++=0; else p2=p1+strlen(p1);
452
    if(strlen(p1)<=0) continue;
453
    appenditem(p1,lind,serial,weight,module_language);
6881 bpr 454
  }
455
}
456
void appenditem2 (char *buf, int lind, int serial, int weight, char *l )
457
{
458
  char *p1, *p2 ;
459
  for(p1=find_word_start(buf);*p1;
6884 bpr 460
    p1=find_word_start(p2)) {
461
    p2=find_word_end(p1); if(*p2) *p2++=0;
462
    appenditem(p1,lind,serial,weight,module_language);
6881 bpr 463
  }
464
}
10 reyssat 465
void onemodule(const char *name, int serial, int lind)
466
{
467
    int i;
468
    unsigned char trlist[]={
6884 bpr 469
    i_title,i_description,i_category,i_domain,i_keywords,
470
      i_require,i_author,
471
      i_keywords_ca,i_keywords_en,i_keywords_fr,i_keywords_it,i_keywords_nl,
472
      i_title_ca,i_title_en,i_title_fr,i_title_it,i_title_nl
10 reyssat 473
    };
474
    #define trcnt (sizeof(trlist)/sizeof(trlist[0]))
6564 bpr 475
    char *p1, *p2, *pp, *q, buf[MAX_LINELEN+1], lbuf[16];
10 reyssat 476
    FILE *f;
6881 bpr 477
 
10 reyssat 478
    if(module_index(name)) return;
479
    towords(indbuf[i_category]);
7915 bpr 480
/*   list the categories (among A=all,X=eXercise,O,D,...) corresponding
6884 bpr 481
 *   to this module
482
 */
10 reyssat 483
    for(i=catcnt=0;i<catno && catcnt<16;i++) {
8100 bpr 484
    if(wordchr2(indbuf[i_category],cat[i].name)!=NULL)
6884 bpr 485
      categories[catcnt++]=cat[i].typ;
10 reyssat 486
    }
487
    if(catcnt==0) return;
488
    if(categories[0]!=cat[0].typ)
489
      categories[catcnt++]=cat[0].typ;
6884 bpr 490
/*  write module's name in the category.language files, for instance lists/X.fr
491
 * for french exercises
492
 */
10 reyssat 493
    for(i=0;i<catcnt;i++) {
6884 bpr 494
    snprintf(buf,sizeof(buf),"%s/lists/%c.%s",
495
         outdir,categories[i],lang[lind]);
496
    f=fopen(buf,"a");
497
    if(f!=NULL) {fprintf(f,"%s\n",name); fclose(f);}
10 reyssat 498
    }
6884 bpr 499
/*   add serial number and language (resp.title, ...) to corresponding file  */
10 reyssat 500
    fprintf(langf,"%d:%s\n",serial,module_language);
501
    fprintf(titf,"%d:%s\n",serial,indbuf[i_title]);
502
    fprintf(descf,"%d:%s\n",serial,indbuf[i_description]);
503
    fprintf(authorf,"%d:%s\n",serial,indbuf[i_author]);
504
    fprintf(versionf,"%d:%s\n",serial,indbuf[i_version]);
6881 bpr 505
 
6884 bpr 506
/*   add module's information in html page for robots  */
10 reyssat 507
    snprintf(buf,sizeof(buf),"%s",indbuf[i_description]);
508
    for(pp=strchr(buf,','); pp; pp=strchr(pp,','))
8100 bpr 509
      string_modify3(buf,pp,pp+1,"&#44;");
10 reyssat 510
    if(strcmp(module_language,lang[lind])==0)
511
      fprintf(robotf,"%s ,%s,%s,%s,%s\n",name,module_language,name,
6884 bpr 512
          indbuf[i_title], buf);
6819 reyssat 513
 
6884 bpr 514
/*   Normalize the information of trlist, using dictionary
7915 bpr 515
 *  -- bases/sys/domain.xx without suffix translation (--> english version)
6884 bpr 516
 */
6881 bpr 517
    entrycount=dentrycount; dicbuf=ddicbuf;
518
    memmove(entry,dentry,dentrycount*sizeof(entry[0]));
519
    unknown_type=unk_leave;
10 reyssat 520
    for(i=0;i<trcnt;i++) {
6884 bpr 521
    detag(indbuf[trlist[i]]);
8100 bpr 522
    deaccent2(indbuf[trlist[i]]);
6884 bpr 523
    comma(indbuf[trlist[i]]);
8100 bpr 524
    singlespace2(indbuf[trlist[i]]);
6884 bpr 525
    translate(indbuf[trlist[i]]);
6881 bpr 526
    }
6884 bpr 527
/*   Normalize the information, using dictionary
7915 bpr 528
 *   bases/sys/words.xx with suffix translation
6884 bpr 529
 */
6881 bpr 530
    entrycount=mentrycount; dicbuf=mdicbuf;
531
    memmove(entry,mentry,mentrycount*sizeof(entry[0]));
6884 bpr 532
    unknown_type=unk_leave;/*  used in translator_.c */
6881 bpr 533
    for(i=0;i<trcnt;i++) {
6884 bpr 534
    suffix_translate(indbuf[trlist[i]]);
535
    translate(indbuf[trlist[i]]);
10 reyssat 536
    }
6881 bpr 537
 
538
/* taken contains all words already seen in the module index */
10 reyssat 539
    taken[0]=0; takenlen=tweight=0;
6881 bpr 540
/*  append words of title  */
3718 reyssat 541
    ovlstrcpy(buf,indbuf[i_title]); towords(buf);
6881 bpr 542
    appenditem2(buf,lind,serial,4,module_language);
543
 
6884 bpr 544
/*  extract words of every other information except level */
6799 bpr 545
    snprintf(buf,sizeof(buf),"%s %s %s %s %s %s %s %s %s %s %s %s %s %s %s",
6884 bpr 546
         indbuf[i_description],indbuf[i_keywords],
547
         indbuf[i_keywords_ca],indbuf[i_keywords_en],indbuf[i_keywords_fr],
548
         indbuf[i_keywords_it],indbuf[i_keywords_nl],
549
         indbuf[i_title_ca],indbuf[i_title_en],indbuf[i_title_fr],
550
         indbuf[i_title_it],indbuf[i_title_nl],
551
         indbuf[i_domain],indbuf[i_require],indbuf[i_author]);
10 reyssat 552
    towords(buf);
6884 bpr 553
    appenditem2(buf,lind,serial,2,module_language);
6881 bpr 554
 
6884 bpr 555
/*   this time the dictionary is the group dictionary  sys/wgrp/wgrp
556
 *   with a g (groupdic), not an m (maindic) . see below main, suffix, group.
7915 bpr 557
 *   and delete unknown ?? and translate
6884 bpr 558
 */
10 reyssat 559
    entrycount=gentrycount; dicbuf=gdicbuf;
560
    memmove(entry,gentry,gentrycount*sizeof(entry[0]));
6881 bpr 561
 
6884 bpr 562
/*  append words of every title information  */
6881 bpr 563
    ovlstrcpy(buf,indbuf[i_title]);
10 reyssat 564
    unknown_type=unk_delete;
6881 bpr 565
    translate(buf);
566
    appenditem1(buf,lind,serial,2,module_language);
567
 
6884 bpr 568
/*  append words of information of description except level  */
6881 bpr 569
    snprintf(buf,sizeof(buf),"%s", indbuf[i_description]);
570
    unknown_type=unk_delete;
571
    translate(buf);
572
    appenditem1(buf,lind,serial,4,module_language);
573
 
6884 bpr 574
/*  append words (or group of words) of keywords and domain  */
6881 bpr 575
    snprintf(buf,sizeof(buf),"%s, %s, %s, %s, %s, %s, %s",
6884 bpr 576
         indbuf[i_domain],indbuf[i_keywords],
577
         indbuf[i_keywords_ca], indbuf[i_keywords_en],indbuf[i_keywords_fr],
578
         indbuf[i_keywords_it], indbuf[i_keywords_nl]);
579
    unknown_type=unk_leave;
10 reyssat 580
    translate(buf);
6881 bpr 581
    appenditem1(buf,lind,serial,2,module_language);
582
 
6884 bpr 583
/*   append level information, with weight 2 */
10 reyssat 584
    snprintf(buf,sizeof(buf),"%s",indbuf[i_level]);
3718 reyssat 585
    ovlstrcpy(lbuf,"level");
10 reyssat 586
    for(p1=buf; *p1; p1++) if(!isalnum(*p1)) *p1=' ';
6564 bpr 587
    q=buf+strlen(buf);
588
    for(p1=find_word_start(buf); (*p1) && (p1 < q) ;
6884 bpr 589
    p1=find_word_start(p2)) {
590
    p2=find_word_end(p1);
591
    if(p2!=NULL) *p2++=0; else p2=p1+strlen(p1);
592
    if(!isalpha(*p1) ||
593
       (!isdigit(*(p1+1)) && *(p1+1)!=0) ||
594
       (*(p1+1)!=0 && *(p1+2)!=0))
595
      continue;
596
    *p1=tolower(*p1);
597
    ovlstrcpy(lbuf+strlen("level"),p1);
598
    appenditem(lbuf,lind,serial,2,module_language);
10 reyssat 599
    }
6884 bpr 600
/*   append total weight of module to weight file site2/weight.xx  */
10 reyssat 601
    fprintf(weightf,"%d:%d\n",serial,tweight);
602
}
603
 
604
void modules(void)
605
{
606
    int i,j,k,d;
607
    char namebuf[MAX_LINELEN+1];
6881 bpr 608
    char mdic[MAX_LINELEN+1], sdic[MAX_LINELEN+1], gdic[MAX_LINELEN+1], ddic[MAX_LINELEN+1];
10 reyssat 609
 
610
    for(j=0;j<langcnt;j++) {
6884 bpr 611
    snprintf(namebuf,sizeof(namebuf),"%s/weight.%s",outdir,lang[j]);
612
    weightf=fopen(namebuf,"w");
613
    snprintf(mdic,sizeof(mdic),"%s/%s.%s",dicdir,maindic,lang[j]);
614
    snprintf(sdic,sizeof(sdic),"%s/%s.%s",dicdir,suffixdic,lang[j]);
615
    snprintf(gdic,sizeof(gdic),"%s/%s.%s",dicdir,groupdic,lang[j]);
616
    snprintf(ddic,sizeof(ddic),"%s/%s.%s",dicdir,domaindic,lang[j]);
617
    suffix_dic(sdic); prepare_dic(gdic);
618
    gdicbuf=dicbuf; gentrycount=entrycount;
619
    memmove(gentry,entry,gentrycount*sizeof(entry[0]));
620
    prepare_dic(mdic);
621
    mdicbuf=dicbuf; mentrycount=entrycount;
622
    memmove(mentry,entry,mentrycount*sizeof(entry[0]));
623
    prepare_dic(ddic);
624
    ddicbuf=dicbuf; dentrycount=entrycount;
625
    memmove(dentry,entry,dentrycount*sizeof(entry[0]));
626
    unknown_type=unk_leave; translate(ignore[j]);
627
    for(i=0;i<modcnt;i++) {
628
        if(mod[i].langcnt>0) {
629
        for(d=k=0;k<mod[i].langcnt;k++)
630
          if(mod[i].langs[k]<mod[i].langs[d]) d=k;
631
        for(k=0;k<mod[i].langcnt && mod[i].langs[k]!=j;k++);
632
        if(k>=mod[i].langcnt) k=d;
633
        snprintf(namebuf,MAX_LINELEN,"%s.%s",mod[i].name,
634
             lang[mod[i].langs[k]]);
635
        onemodule(namebuf,mod[i].counts[k],j);
636
        }
637
        else {
638
        onemodule(mod[i].name,mod[i].counts[0],j);
639
        }
10 reyssat 640
    }
6884 bpr 641
    if(mentrycount>0) free(mdicbuf);
642
    if(gentrycount>0) free(gdicbuf);
643
    if(suffixcnt>0) free(sufbuf);
644
    if(dentrycount>0) free(ddicbuf);
645
    if(weightf) fclose(weightf);
646
    }
10 reyssat 647
}
648
 
6881 bpr 649
/* FIXME ? differences with appenditem - use fprintf instead of  snprintf */
10 reyssat 650
void sappenditem(char *word, int lind, int serial, int weight)
651
{
652
    int ll;
653
    char *p;
6881 bpr 654
 
10 reyssat 655
    if(!isalnum(*word) || (ll=strlen(word))<2 ||
8100 bpr 656
       wordchr2(taken,word)!=NULL ||
657
       wordchr2(ignore[lind],word)!=NULL ||
10 reyssat 658
       takenlen>=MAX_LINELEN-ll-16)
659
      return;
660
    if(ll==2 && (!isdigit(word[0]) || !isalpha(word[1]))) return;
661
    for(p=word;*p;p++) if(!isalnum(*p) && *p!=' ') return;
662
    taken[takenlen++]=' ';taken[takenlen++]=' ';
3718 reyssat 663
    ovlstrcpy(taken+takenlen,word);
10 reyssat 664
    takenlen+=ll; tweight+=weight;
665
    fprintf(indf,"%s:%d?%d\n",word,serial,weight);
666
}
667
 
668
void onesheet(int serial, int lind)
669
{
670
    int i;
671
    unsigned char trlist[]={
6967 bpr 672
    s_title,s_description,s_domain,s_keywords,s_information
10 reyssat 673
    };
674
    #define trcnt (sizeof(trlist)/sizeof(trlist[0]))
675
    char *p1, *p2, buf[MAX_LINELEN+1];
6881 bpr 676
 
10 reyssat 677
    if(sheet_index(serial)) return;
678
    fprintf(listf,"%s\n",mod[serial].name+3);
679
    fprintf(titf,"%d:%s\n",serial,sindbuf[s_title]);
680
    fprintf(descf,"%d:%s\n",serial,sindbuf[s_description]);
6967 bpr 681
    fprintf(remf,"%d:%s\n",serial,sindbuf[s_information]);
8650 bpr 682
    fprintf(titlef,"%s:%s\n",mod[serial].name,sindbuf[s_title]);
7915 bpr 683
 
6881 bpr 684
    entrycount=dentrycount; dicbuf=ddicbuf;
685
    memmove(entry,dentry,dentrycount*sizeof(entry[0]));
10 reyssat 686
    unknown_type=unk_leave;
687
    for(i=0;i<trcnt;i++) {
6884 bpr 688
    detag(sindbuf[trlist[i]]);
8100 bpr 689
    deaccent2(sindbuf[trlist[i]]);
6884 bpr 690
    comma(sindbuf[trlist[i]]);
8100 bpr 691
    singlespace2(sindbuf[trlist[i]]);
6884 bpr 692
    translate(sindbuf[trlist[i]]);
6881 bpr 693
    }
7915 bpr 694
 
6881 bpr 695
    entrycount=mentrycount; dicbuf=mdicbuf;
696
    memmove(entry,mentry,mentrycount*sizeof(entry[0]));
697
    unknown_type=unk_leave;
698
    for(i=0;i<trcnt;i++) {
6884 bpr 699
    suffix_translate(sindbuf[trlist[i]]);
700
    translate(sindbuf[trlist[i]]);
10 reyssat 701
    }
702
    taken[0]=0; takenlen=tweight=0;
3718 reyssat 703
    ovlstrcpy(buf,sindbuf[s_title]); towords(buf);
10 reyssat 704
    for(p1=find_word_start(buf);*p1;
6884 bpr 705
    p1=find_word_start(p2)) {
706
    p2=find_word_end(p1); if(*p2) *p2++=0;
707
    sappenditem(p1,lind,serial,4);
10 reyssat 708
    }
709
    snprintf(buf,sizeof(buf),"%s %s %s %s",
6884 bpr 710
         sindbuf[s_description],sindbuf[s_keywords],
6967 bpr 711
         sindbuf[s_domain],sindbuf[s_information]);
10 reyssat 712
    towords(buf);
713
    for(p1=find_word_start(buf);*p1;
6884 bpr 714
    p1=find_word_start(p2)) {
715
    p2=find_word_end(p1); if(*p2) *p2++=0;
716
    sappenditem(p1,lind,serial,2);
10 reyssat 717
    }
718
    entrycount=gentrycount; dicbuf=gdicbuf;
719
    memmove(entry,gentry,gentrycount*sizeof(entry[0]));
720
    unknown_type=unk_delete;
3718 reyssat 721
    ovlstrcpy(buf,sindbuf[s_title]); translate(buf);
10 reyssat 722
    for(p1=find_word_start(buf); *p1;
6884 bpr 723
    p1=find_word_start(p2)) {
724
    p2=strchr(p1,',');
725
    if(p2!=NULL) *p2++=0; else p2=p1+strlen(p1);
726
    if(strlen(p1)<=0) continue;
727
    sappenditem(p1,lind,serial,4);
10 reyssat 728
    }
729
    snprintf(buf,sizeof(buf),"%s, %s, %s, %s",
6884 bpr 730
         sindbuf[s_description],sindbuf[s_keywords],
6967 bpr 731
         sindbuf[s_domain],sindbuf[s_information]);
10 reyssat 732
    translate(buf);
733
    for(p1=find_word_start(buf); *p1;
6884 bpr 734
    p1=find_word_start(p2)) {
735
    p2=strchr(p1,',');
736
    if(p2!=NULL) *p2++=0; else p2=p1+strlen(p1);
737
    if(strlen(p1)<=0) continue;
738
    sappenditem(p1,lind,serial,2);
10 reyssat 739
    }
740
    fprintf(weightf,"%d:%d\n",serial,tweight);
741
}
742
 
743
void sheets(void)
744
{
745
    int i,j;
6961 bpr 746
    char mdic[MAX_LINELEN+1], sdic[MAX_LINELEN+1], gdic[MAX_LINELEN+1], ddic[MAX_LINELEN+1];
10 reyssat 747
    char buf[MAX_LINELEN+1];
7915 bpr 748
 
10 reyssat 749
    for(j=0;j<langcnt;j++) {
6884 bpr 750
    snprintf(buf,sizeof(buf),"%s/index/title.%s",sheetdir,lang[j]);
751
    titf=fopen(buf,"w");
752
    snprintf(buf,sizeof(buf),"%s/index/description.%s",sheetdir,lang[j]);
753
    descf=fopen(buf,"w");
754
    snprintf(buf,sizeof(buf),"%s/index/%s",sheetdir,lang[j]);
755
    indf=fopen(buf,"w");
756
    snprintf(buf,sizeof(buf),"%s/index/list.%s",sheetdir,lang[j]);
757
    listf=fopen(buf,"w");
758
    snprintf(buf,sizeof(buf),"%s/index/weight.%s",sheetdir,lang[j]);
759
    weightf=fopen(buf,"w");
760
    snprintf(buf,sizeof(buf),"%s/index/addr.%s",sheetdir,lang[j]);
761
    addrf=fopen(buf,"w");
6967 bpr 762
    snprintf(buf,sizeof(buf),"%s/index/information.%s",sheetdir,lang[j]);
6961 bpr 763
    remf=fopen(buf,"w");
6884 bpr 764
    snprintf(buf,sizeof(buf),"%s/index/serial.%s",sheetdir,lang[j]);
765
    serialf=fopen(buf,"w");
8650 bpr 766
    snprintf(buf,sizeof(buf),"%s/index/tit.%s",sheetdir,lang[j]);
8659 bpr 767
    titlef=fopen(buf,"w");
6884 bpr 768
    snprintf(mdic,sizeof(mdic),"%s/%s.%s",dicdir,maindic,lang[j]);
769
    snprintf(sdic,sizeof(sdic),"%s/%s.%s",dicdir,suffixdic,lang[j]);
770
    snprintf(gdic,sizeof(gdic),"%s/%s.%s",dicdir,groupdic,lang[j]);
6961 bpr 771
    snprintf(ddic,sizeof(ddic),"%s/%s.%s",dicdir,domaindic,lang[j]);
6884 bpr 772
    suffix_dic(sdic); prepare_dic(gdic);
773
    gdicbuf=dicbuf; gentrycount=entrycount;
774
    memmove(gentry,entry,gentrycount*sizeof(entry[0]));
775
    prepare_dic(mdic);
776
    mdicbuf=dicbuf; mentrycount=entrycount;
777
    memmove(mentry,entry,mentrycount*sizeof(entry[0]));
6973 bpr 778
    prepare_dic(ddic);
779
    ddicbuf=dicbuf; dentrycount=entrycount;
780
    memmove(dentry,entry,dentrycount*sizeof(entry[0]));
6884 bpr 781
    unknown_type=unk_leave; translate(ignore[j]);
782
    for(i=0;i<modcnt;i++) {
783
        if(mod[i].langs[0]!=j) continue;
784
        fprintf(addrf,"%d:%s\n",i,mod[i].name+3);
785
        fprintf(serialf,"%s:%d\n",mod[i].name+3,i);
786
        onesheet(i,j);
10 reyssat 787
    }
6884 bpr 788
    if(mentrycount>0) free(mdicbuf);
789
    if(gentrycount>0) free(gdicbuf);
790
    if(suffixcnt>0) free(sufbuf);
6961 bpr 791
    if(dentrycount>0) free(ddicbuf);
6884 bpr 792
    fclose(titf); fclose(descf); fclose(indf); fclose(listf);
793
    fclose(weightf); fclose(addrf); fclose(serialf);
794
    }
10 reyssat 795
}
796
 
797
int main()
798
{
8123 bpr 799
    gentry=xmalloc(entry_size);
800
    dentry=xmalloc(entry_size);
801
    mentry=xmalloc(entry_size);
10 reyssat 802
    prep();
803
    if(modcnt>0) modules();
804
    clean();
805
    sprep();
806
    if(modcnt>0) sheets();
807
    return 0;
808
}
809