Subversion Repositories wimsdev

Rev

Rev 16987 | Go to most recent revision | Details | Compare with Previous | Last modification | View Log | RSS feed

Rev Author Line No. Line
10 reyssat 1
/*    Copyright (C) 1998-2003 XIAO, Gang of Universite de Nice - Sophia Antipolis
2
 *
3
 *  This program is free software; you can redistribute it and/or modify
4
 *  it under the terms of the GNU General Public License as published by
5
 *  the Free Software Foundation; either version 2 of the License, or
6
 *  (at your option) any later version.
7
 *
8
 *  This program is distributed in the hope that it will be useful,
9
 *  but WITHOUT ANY WARRANTY; without even the implied warranty of
10
 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
11
 *  GNU General Public License for more details.
12
 *
13
 *  You should have received a copy of the GNU General Public License
14
 *  along with this program; if not, write to the Free Software
15
 *  Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
16
 */
17
 
6884 bpr 18
/*  This is an internal program,
7915 bpr 19
 * used to index modules for search engine.
6884 bpr 20
 */
10 reyssat 21
 
8100 bpr 22
#include "../Lib/libwims.h"
8123 bpr 23
#include "translator_.h"
24
#include "suffix.h"
10 reyssat 25
 
6884 bpr 26
#define MAX_LANGS    MAX_LANGUAGES
27
#define MAX_MODULES    65536
28
char *moduledir=    "public_html/modules";
29
char *sheetdir=     "public_html/bases/sheet";
15375 bpr 30
char *glossarydir=  "public_html/scripts/data/glossary";
6884 bpr 31
char *dicdir=       "public_html/bases";
32
char *outdir=       "public_html/bases/site2";
9090 bpr 33
char *sheetoutdir=  "public_html/bases/sheet/index";
15375 bpr 34
char *glossaryoutdir=  "public_html/scripts/data/glossary/index";
6884 bpr 35
char *maindic=      "sys/words";
36
char *groupdic=     "sys/wgrp/wgrp";
37
char *suffixdic=    "sys/suffix";
38
char *domaindic=    "sys/domaindic";
39
char *ignoredic=    "sys/indignore";
40
char *conffile=     "log/wims.conf";
9092 bpr 41
char *mlistbase=    "lists";
10 reyssat 42
 
43
char lang[MAX_LANGS][4]={
1792 bpr 44
    "en","fr","cn","es","it","nl","si","ca","pt"
10 reyssat 45
};
6884 bpr 46
#define DEFAULT_LANGCNT    6
10 reyssat 47
char allang[MAX_LANGS][4]={
6564 bpr 48
    "en","fr","cn","es","it","nl","de","si","ca","pt"
10 reyssat 49
};
50
#define allangcnt 8
51
char ignore[MAX_LANGS][MAX_LINELEN+1];
52
char mlistfile[MAX_LANGS][256];
53
int langcnt;
15444 bpr 54
FILE *langf, *titf, *descf, *weightf, *robotf, *indf, *listf, *addrf, *serialf, *authorf, *versionf, *remf;
16987 bpr 55
FILE *titf_ca,*titf_en,*titf_es,*titf_fr,*titf_it,*titf_nl;
10 reyssat 56
struct cat {
57
    char *name;
58
    char typ;
59
} cat[]={
6884 bpr 60
    {"all_types", 'A'},
61
    {"exercise",  'X'},
62
    {"oef",       'O'},
63
    {"tool",      'T'},
64
    {"recreation",'R'},
65
    {"reference", 'Y'},
66
    {"document",  'D'},
67
    {"popup",     'P'},
68
    {"datamodule",'M'}
10 reyssat 69
};
70
#define catno (sizeof(cat)/sizeof(cat[0]))
71
 
72
struct mod {
73
    char *name;
74
    unsigned char langs[MAX_LANGS];
75
    int counts[MAX_LANGS];
15440 bpr 76
    int langcnt;
10 reyssat 77
} mod[MAX_MODULES];
15440 bpr 78
 
79
// serial-> the name of the module indexed by serial, lang and its classe
80
struct revmod {
81
    char name[MAX_MODULELEN+1];
82
    int lang;
83
    int imod;
15482 bpr 84
    char keywords[MAX_FNAME];
15440 bpr 85
} revmod[MAX_MODULES];
10 reyssat 86
int modcnt;
87
 
88
char *mlist;
15442 bpr 89
char *sheetindex[]={
90
  "title", "description",
91
  "duration", "severity",
92
  "level", "domain",
93
  "keywords", "reserved1", "reserved2", "information"
94
};
95
/* correspond to the order of sheetindex */
96
char *glindex[]={
97
  "gl_title", "gl_description",
98
  "", "",
99
  "gl_level", "gl_domain",
100
  "gl_keywords","","",""};
10 reyssat 101
 
15442 bpr 102
#define SHEETINDEX_NO (sizeof(sheetindex)/sizeof(sheetindex[0]))
103
char gsindbuf[SHEETINDEX_NO+1][MAX_LINELEN+1];
104
 
105
/* do not modify the order, correspond to the order in the sheet file */
106
enum{s_title, s_description,
107
      s_duration, s_severity,
108
      s_level, s_domain,
109
      s_keywords, s_reserved1, s_reserved2,
110
      s_information
111
};
112
 
113
char *modindex[]={
114
  "title", "description",
115
  "author", "address", "copyright",
116
  "version", "wims_version", "language",
117
  "category", "level", "domain", "keywords",
118
  "keywords_ca", "keywords_en", "keywords_fr", "keywords_it", "keywords_nl",
16987 bpr 119
  "title_ca", "title_en", "title_es", "title_fr", "title_it", "title_nl",
15442 bpr 120
  "require"
121
};
122
#define MODINDEX_NO (sizeof(modindex)/sizeof(modindex[0]))
123
char indbuf[MODINDEX_NO][MAX_LINELEN+1];
124
enum{i_title, i_description,
125
  i_author,i_address,i_copyright,
126
  i_version,i_wims_version,i_language,
127
  i_category,i_level,i_domain,i_keywords,
128
  i_keywords_ca,i_keywords_en,i_keywords_fr,i_keywords_it,i_keywords_nl,
16987 bpr 129
  i_title_ca,i_title_en,i_title_es,i_title_fr,i_title_it,i_title_nl,
15442 bpr 130
  i_require
131
};
132
 
133
char *module_special_file[]={
134
  "intro","help","about"
135
};
136
#define MODSPEC_NO (sizeof(module_special_file)/sizeof(module_special_file[0]))
137
char module_language[4];
138
 
139
char *mdicbuf, *gdicbuf, *ddicbuf, *gentry, *mentry, *dentry;
140
 
141
int gentrycount, mentrycount, dentrycount;
142
 
143
 
6884 bpr 144
/*  fold known accented letters to unaccented, other strange characters to space
7915 bpr 145
 *  apostrophe is among the exceptions to be kept (important for multi-word expressions)
6884 bpr 146
 */
8100 bpr 147
void deaccent2(char *p)
10 reyssat 148
{
12248 bpr 149
  char *sp;
150
  char *v;
151
  for(sp=p;*sp;sp++) {
152
  if(*sp<0 && (v=strchr(acctab,*sp))!=NULL)
153
    *sp=*(deatab+(v-acctab));
154
  if(!isalnum(*sp) && strchr(",.&$+*",*sp)==0) *sp=' ';
155
  else *sp=tolower(*sp);
156
  }
10 reyssat 157
}
158
 
6884 bpr 159
/*  translate everything non-alphanumeric into space */
10 reyssat 160
void towords(char *p)
161
{
12248 bpr 162
  char *pp;
163
  for(pp=p;*pp;pp++) if(!isalnum(*pp) && strchr("&$+*",*pp)==0) *pp=' ';
10 reyssat 164
}
165
 
6884 bpr 166
/*  Find first occurrence of word */
8100 bpr 167
char *wordchr2(char *p, char *w)
10 reyssat 168
{
12248 bpr 169
  char *r;
10 reyssat 170
 
12248 bpr 171
  for(r=strstr(p,w);r!=NULL &&
6884 bpr 172
    ( (r>p && !isspace(*(r-1))) || (!isspace(*(r+strlen(w))) && *(r+strlen(w))!=0) );
12248 bpr 173
  r=strstr(r+1,w));
174
  return r;
10 reyssat 175
}
176
 
177
char *find_tag_end(char *p)
178
{
12248 bpr 179
  char *pp;
180
  pp=p; if(*pp=='<') pp++;
181
  for(; *pp && *pp!='>'; pp++) {
6884 bpr 182
    if(*pp=='<') {
12248 bpr 183
      pp=find_tag_end(pp)-1; continue;
10 reyssat 184
    }
6884 bpr 185
    if(*pp=='"') {
12248 bpr 186
      pp=strchr(pp+1,'"');
187
      if(pp==NULL) return p+strlen(p); else continue;
6884 bpr 188
    }
189
    if(*pp=='\'') {
12248 bpr 190
      pp=strchr(pp+1,'\'');
191
      if(pp==NULL) return p+strlen(p); else continue;
6884 bpr 192
    }
12248 bpr 193
  }
194
  if(*pp=='>') pp++;
195
  return pp;
10 reyssat 196
}
197
 
198
char *find_tag(char *p, char *tag)
199
{
12248 bpr 200
  char *pp;
201
  int len;
202
  len=strlen(tag);
203
  for(pp=strchr(p,'<'); pp!=NULL && *pp; pp=strchr(pp+1,'<')) {
6884 bpr 204
    if(strncasecmp(pp+1,tag,len)==0 && !isalnum(*(pp+1+len))) return pp;
12248 bpr 205
  }
206
  return p+strlen(p);
10 reyssat 207
}
208
 
6884 bpr 209
/*  remove all html tags */
10 reyssat 210
void detag(char *p)
211
{
12248 bpr 212
  char *pp, *p2;
213
  for(pp=strchr(p,'<'); pp!=NULL; pp=strchr(pp,'<')) {
6884 bpr 214
    p2=find_tag_end(pp);
215
    if(*p2==0) {*pp=0; return; }
216
    ovlstrcpy(pp,p2);
12248 bpr 217
  }
10 reyssat 218
}
219
 
6819 reyssat 220
/* add a space after comma to see end of words */
221
 
222
void comma(char *p)
223
{
12248 bpr 224
  char *pp;
225
  for(pp=strchr(p,','); pp; pp=strchr(pp+1,','))
226
    string_modify3(p,pp,pp+1,", ");
6819 reyssat 227
}
15482 bpr 228
/* replace / by , */
229
void backslash(char *p)
230
{
231
  char *pp;
232
  for(pp=strchr(p,'/'); pp; pp=strchr(pp+1,'/'))
233
    string_modify3(p,pp,pp+1,",");
234
}
15375 bpr 235
/* _getdef from lines.c except the error msg*/
10 reyssat 236
void _getdef(char buf[], char *name, char value[])
237
{
15375 bpr 238
  char *p1, *p2, *p3, *p4;
10 reyssat 239
 
15375 bpr 240
  if(*name==0) goto nothing;      /* this would create segfault. */
12248 bpr 241
  for(p1=strstr(buf,name); p1!=NULL; p1=strstr(p1+1,name)) {
6884 bpr 242
    p2=find_word_start(p1+strlen(name));
243
    if((p1>buf && !isspace(*(p1-1))) || *p2!='=') continue;
15375 bpr 244
    p3=p1; while(p3>buf && *(p3-1)!='\n') p3--;
245
    p3=find_word_start(p3);
246
    if(p3<p1 && *p3!='!') continue;
247
    if(p3<p1) {
248
      p3++; p4=find_word_end(p3);
249
      if(find_word_start(p4)!=p1) continue;
250
      if(p4-p3!=3 || (strncmp(p3,"set",3)!=0 &&
251
           strncmp(p3,"let",3)!=0 &&
252
           strncmp(p3,"def",3)!=0)) {
253
        if(p4-p3!=6 || strncmp(p3,"define",6)!=0) continue;
254
      }
255
    }
256
    p2++;p3=strchr(p2,'\n'); if(p3==NULL) p3=p2+strlen(p2);
257
    p2=find_word_start(p2);
258
    if(p2>p3) goto nothing;
259
    /*if(p3-p2>=MAX_LINELEN) user_error("cmd_output_too_long");*/
260
    memmove(value,p2,p3-p2); value[p3-p2]=0;
261
    strip_trailing_spaces(value); return;
12248 bpr 262
  }
15375 bpr 263
nothing:
15394 bpr 264
  value[0]=0;
10 reyssat 265
}
266
 
6884 bpr 267
/*  Get variable definition from a file.
268
 * Result stored in buffer value of length MAX_LINELEN.
269
 */
10 reyssat 270
void getdef(char *fname, char *name, char value[])
271
{
12248 bpr 272
  FILE *f;
273
  char *buf;
274
  int l;
6881 bpr 275
 
12248 bpr 276
  value[0]=0;
277
  f=fopen(fname,"r"); if(f==NULL) return;
278
  fseek(f,0,SEEK_END); l=ftell(f); fseek(f,0,SEEK_SET);
279
  buf=xmalloc(l+256); l=fread(buf,1,l,f);
280
  fclose(f);
281
  if(l<=0) return; else buf[l]=0;
282
  _getdef(buf,name,value);
283
  free(buf);
10 reyssat 284
}
285
 
15442 bpr 286
void init(void)
10 reyssat 287
{
12248 bpr 288
  char buf[MAX_LINELEN+1];
15442 bpr 289
  char *p1,*p2,*s;
290
  int i,l;
12248 bpr 291
  FILE *f;
6881 bpr 292
 
12248 bpr 293
  s=getenv("modind_outdir"); if(s!=NULL && *s!=0) outdir=s;
294
  s=getenv("modind_sheetdir"); if(s!=NULL && *s!=0) sheetdir=s;
295
  s=getenv("modind_sheetoutdir"); if(s!=NULL && *s!=0) sheetoutdir=s;
15442 bpr 296
  s=getenv("modind_glossaryoutdir"); if(s!=NULL && *s!=0) glossaryoutdir=s;
6884 bpr 297
/* take the langs declared in conffile */
12248 bpr 298
  getdef(conffile,"site_languages",buf);
15442 bpr 299
  langcnt=0;
12248 bpr 300
  for(p1=buf;*p1;p1++) if(!isalnum(*p1)) *p1=' ';
301
  for(p1=find_word_start(buf); *p1 && langcnt<MAX_LANGS; p1=find_word_start(p2)) {
6884 bpr 302
    p2=find_word_end(p1);
303
    if(p2!=p1+2 || !isalpha(*p1) || !isalpha(*(p1+1))) continue;
304
    memmove(lang[langcnt],p1,2); lang[langcnt++][2]=0;
12248 bpr 305
  }
306
  if(langcnt==0) {/*  default languages */
6884 bpr 307
    langcnt=DEFAULT_LANGCNT;
12248 bpr 308
  }
309
  for(i=0;i<langcnt;i++) {
310
    snprintf(buf,sizeof(buf),"%s/%s.%s",dicdir,ignoredic,lang[i]);
311
    f=fopen(buf,"r"); if(f==NULL) continue;
312
    l=fread(ignore[i],1,MAX_LINELEN,f);fclose(f);
313
    if(l<0 || l>=MAX_LINELEN) l=0;
314
    ignore[i][l]=0;
315
  }
15442 bpr 316
}
317
/*  Preparation of data */
318
void prep(void)
319
{
320
  char buf[MAX_LINELEN+1];
321
  char *p1,*p2,*s,*old;
322
  int i,l,thislang,t;
15444 bpr 323
  modcnt=0; old="";
15442 bpr 324
  snprintf(buf,sizeof(buf),"%s/addr",outdir);
325
  addrf=fopen(buf,"w");
326
  if(!addrf) { fprintf(stderr,"modind: error creating output files addr.\n"); exit(1);}
327
  snprintf(buf,sizeof(buf),"%s/serial",outdir);
328
  serialf=fopen(buf,"w");
329
  if(!serialf) { fprintf(stderr,"modind: error creating output files serial.\n"); exit(1);}
330
 
331
  s=getenv("mlist"); if(s==NULL) exit(1);
332
  l=strlen(s); if(l<0 || l>100*MAX_LINELEN) exit(1);
15444 bpr 333
  mlist=xmalloc(l+16); ovlstrcpy(mlist,s);
15442 bpr 334
 
12248 bpr 335
  for(t=0, p1=find_word_start(mlist); *p1 && modcnt<MAX_MODULES;
9090 bpr 336
        p1=find_word_start(p2), t++) {
12248 bpr 337
    p2=find_word_end(p1);
338
    l=p2-p1; if(*p2) *p2++=0;
339
    fprintf(addrf,"%d:%s\n",t,p1);
340
    fprintf(serialf,"%s:%d\n",p1,t);
341
    thislang=-1;
6564 bpr 342
/* language is taken from the address */
12248 bpr 343
    if(l>3 && p1[l-3]=='.') {
344
      for(i=0;i<langcnt;i++) if(strcasecmp(lang[i],p1+l-2)==0) break;
345
      if(i<langcnt) {p1[l-3]=0; thislang=i;}
346
      else {/*  unknown language, not referenced */
6884 bpr 347
        continue;
9090 bpr 348
      }
12248 bpr 349
    }
350
    if(modcnt>0 && strcmp(old,p1)==0 && thislang>=0) {
351
      if(mod[modcnt-1].langcnt<langcnt) {
6884 bpr 352
        mod[modcnt-1].langs[mod[modcnt-1].langcnt]=thislang;
353
        mod[modcnt-1].counts[mod[modcnt-1].langcnt]=t;
354
        (mod[modcnt-1].langcnt)++;
12248 bpr 355
      }
6884 bpr 356
    }
12248 bpr 357
    else {
358
      mod[modcnt].name=old=p1;
359
      if(thislang>=0) {
360
        mod[modcnt].langs[0]=thislang;
361
        mod[modcnt].langcnt=1;
362
      }
363
      else mod[modcnt].langcnt=0;
364
      mod[modcnt].counts[0]=t;
365
      modcnt++;
6884 bpr 366
    }
12248 bpr 367
  }
368
  snprintf(buf,sizeof(buf),"%s/language",outdir);
369
  langf=fopen(buf,"w");
370
  snprintf(buf,sizeof(buf),"%s/title",outdir);
371
  titf=fopen(buf,"w");
16987 bpr 372
  snprintf(buf,sizeof(buf),"%s/title_ca",outdir);
373
  titf_ca=fopen(buf,"w");
374
  snprintf(buf,sizeof(buf),"%s/title_en",outdir);
375
  titf_en=fopen(buf,"w");
376
  snprintf(buf,sizeof(buf),"%s/title_es",outdir);
377
  titf_es=fopen(buf,"w");
378
  snprintf(buf,sizeof(buf),"%s/title_fr",outdir);
379
  titf_fr=fopen(buf,"w");
380
  snprintf(buf,sizeof(buf),"%s/title_it",outdir);
381
  titf_it=fopen(buf,"w");
382
  snprintf(buf,sizeof(buf),"%s/title_nl",outdir);
383
  titf_nl=fopen(buf,"w");
12248 bpr 384
  snprintf(buf,sizeof(buf),"%s/description",outdir);
385
  descf=fopen(buf,"w");
386
  snprintf(buf,sizeof(buf),"%s/author",outdir);
387
  authorf=fopen(buf,"w");
388
  snprintf(buf,sizeof(buf),"%s/version",outdir);
389
  versionf=fopen(buf,"w");
390
  snprintf(buf,sizeof(buf),"%s/%s/robot.phtml",outdir,mlistbase);
391
  robotf=fopen(buf,"w");
392
  fclose(addrf); fclose(serialf);
16987 bpr 393
  if(!robotf || !versionf || !authorf || !descf || !titf
394
    || !titf_ca || !titf_en || !titf_es || !titf_fr || !titf_it || !titf_nl
395
    || !langf) {
12248 bpr 396
    fprintf(stderr,"modind: error creating output files.\n");
397
    exit(1);
398
  }
10 reyssat 399
}
400
 
401
void sprep(void)
402
{
15440 bpr 403
  char buf[MAX_LINELEN+1];
12248 bpr 404
  char *p1,*p2,*s;
15440 bpr 405
  int i,l,t,thislang;
6881 bpr 406
 
12248 bpr 407
  modcnt=0;
15440 bpr 408
  snprintf(buf,sizeof(buf),"%s/addr",sheetoutdir);
409
  addrf=fopen(buf,"w");
410
  if(!addrf) { fprintf(stderr,"modind: error creating output files addr.\n"); exit(1);}
411
  snprintf(buf,sizeof(buf),"%s/serial",sheetoutdir);
412
  serialf=fopen(buf,"w");
12248 bpr 413
  s=getenv("slist"); if(s==NULL) return;
414
  l=strlen(s); if(l<0 || l>100*MAX_LINELEN) return;
415
  mlist=xmalloc(l+16); ovlstrcpy(mlist,s);
15440 bpr 416
  for(t=0,p1=find_word_start(mlist); *p1 && modcnt<MAX_MODULES; p1=find_word_start(p2),t++) {
6884 bpr 417
    p2=find_word_end(p1);
418
    l=p2-p1; if(*p2) *p2++=0;
419
    for(i=0;i<langcnt;i++) if(strncasecmp(lang[i],p1,2)==0) break;
420
    if(i<langcnt) thislang=i; else continue;
15440 bpr 421
    ovlstrcpy(revmod[t].name,p1);
422
    revmod[t].lang=thislang;
6884 bpr 423
    mod[modcnt].name=p1;
424
    mod[modcnt].langs[0]=thislang;
425
    mod[modcnt].langcnt=1;
15440 bpr 426
    revmod[t].imod=modcnt;
17024 bpr 427
    mod[modcnt].counts[0]=t;
15440 bpr 428
    fprintf(addrf,"%d:%s\n",modcnt,p1);
429
    fprintf(serialf,"%s:%d\n",p1,modcnt);
15539 bpr 430
    modcnt++;
12248 bpr 431
  }
17024 bpr 432
  fclose(addrf); fclose(serialf);
10 reyssat 433
}
434
 
15375 bpr 435
void gprep(void)
436
{
15440 bpr 437
  char buf[MAX_LINELEN+1];
438
  char *p1,*p2,*s,*old;
439
  int l,i,t,thislang;
15444 bpr 440
  modcnt=0; old="";
15440 bpr 441
  snprintf(buf,sizeof(buf),"%s/addr",glossaryoutdir);
442
  addrf=fopen(buf,"w");
443
  if(!addrf) { fprintf(stderr,"modind: error creating output files addr.\n"); exit(1);}
444
  snprintf(buf,sizeof(buf),"%s/serial",glossaryoutdir);
445
  serialf=fopen(buf,"w");
15375 bpr 446
  s=getenv("glist"); if(s==NULL) return;
447
  l=strlen(s); if(l<0 || l>100*MAX_LINELEN) return;
448
  mlist=xmalloc(l+16); ovlstrcpy(mlist,s);
15440 bpr 449
  for(t=0,p1=find_word_start(mlist); *p1 && modcnt<MAX_MODULES; p1=find_word_start(p2),t++) {
15375 bpr 450
    p2=find_word_end(p1);
451
    if(*p2) *p2++=0;
15440 bpr 452
    fprintf(addrf,"%d:%s\n",t,p1);
453
    fprintf(serialf,"%s:%d\n",p1,t);
454
    ovlstrcpy(revmod[t].name,p1);
15482 bpr 455
    ovlstrcpy(revmod[t].keywords,p1);
15375 bpr 456
    s=strchr(p1,'/');
457
    if(s != NULL) s=strchr(s+1,'/');
458
    if(s==NULL) {
459
      fprintf(stderr,"modind: no language %s\n",p1); exit(1);
460
    }
15482 bpr 461
    revmod[t].keywords[s-p1]=0;
15375 bpr 462
    s++;
463
    for(i=0;i<langcnt;i++) if(strncasecmp(lang[i],s,2)==0) break;
15440 bpr 464
    thislang = i<langcnt ? i : -1;
465
    revmod[t].lang=i;
466
    s[0]=s[1]='x';
467
    if(modcnt>0 && strcmp(old,p1)==0 && thislang >= 0) {
468
      if(mod[modcnt-1].langcnt<langcnt) {
469
        mod[modcnt-1].langs[mod[modcnt-1].langcnt]=thislang;
470
        mod[modcnt-1].counts[mod[modcnt-1].langcnt]=t;
471
        (mod[modcnt-1].langcnt)++;
472
      }
473
      revmod[t].imod=modcnt-1;
474
    }
475
    else {
476
      mod[modcnt].name=old=p1;
477
      if(thislang>=0) {
478
        mod[modcnt].langs[0]=thislang;
479
        mod[modcnt].langcnt=1;
480
      }
481
      else mod[modcnt].langcnt=0;
482
      mod[modcnt].counts[0]=t;
483
      revmod[t].imod=modcnt;
484
      modcnt++;
485
    }
15375 bpr 486
  }
15440 bpr 487
  fclose(addrf); fclose(serialf);
15375 bpr 488
}
489
 
6884 bpr 490
/*  read and treat module's INDEX file */
10 reyssat 491
int module_index(const char *name)
492
{
12248 bpr 493
  char *p, fbuf[MAX_LINELEN+1], ibuf[MAX_LINELEN+1];
494
  FILE *indf;
495
  int i,l;
10 reyssat 496
 
12248 bpr 497
  snprintf(fbuf,sizeof(fbuf),"%s/%s/INDEX",moduledir,name);
498
  indf=fopen(fbuf,"r");
499
  if(indf==NULL) {
500
    fprintf(stderr,"modind: INDEX of %s not found\n",fbuf); return -1;
501
  }
502
  l=fread(ibuf,1,MAX_LINELEN,indf); fclose(indf);
503
  if(l>0 && l<MAX_LINELEN) ibuf[l]=0; else return -1;
6884 bpr 504
/* treate all fields in *modindex */
12248 bpr 505
  for(i=0;i<MODINDEX_NO;i++) {
506
    _getdef(ibuf,modindex[i],indbuf[i]);
6884 bpr 507
/*  compatibility precaution */
12248 bpr 508
    if(indbuf[i][0]==':') indbuf[i][0]='.';
509
  }
510
  p=find_word_start(indbuf[i_language]);
511
  if(isalpha(*p) && isalpha(*(p+1))) {
512
    memmove(module_language,p,2); module_language[2]=0;
513
  }
514
  else ovlstrcpy(module_language,"en");
515
  return 0;
10 reyssat 516
}
517
 
518
int sheet_index(int serial)
519
{
12248 bpr 520
  char *p1, *p2, fbuf[MAX_LINELEN+1], ibuf[MAX_LINELEN+1];
521
  FILE *indf;
522
  int i,l;
10 reyssat 523
 
12248 bpr 524
  snprintf(fbuf,sizeof(fbuf),"%s/%s.def",sheetdir,mod[serial].name);
525
  indf=fopen(fbuf,"r"); if(indf==NULL) return -1;
526
  l=fread(ibuf,1,MAX_LINELEN,indf); fclose(indf);
527
  if(l>0 && l<MAX_LINELEN) ibuf[l]=0; else return -1;
15375 bpr 528
  for(i=0;i<SHEETINDEX_NO;i++) gsindbuf[i][0]=0;
12248 bpr 529
  for(i=0,p1=find_word_start(ibuf);
9090 bpr 530
      i<SHEETINDEX_NO-1 && *p1!=':' && *p1!=0;
531
      i++,p1=p2) {
12248 bpr 532
    p2=strchr(p1,'\n');
533
    if(p2!=NULL) *p2++=0; else p2=p1+strlen(p1);
8100 bpr 534
    p1=find_word_start(p1); strip_trailing_spaces2(p1);
15375 bpr 535
    snprintf(gsindbuf[i],MAX_LINELEN,"%s",p1);
12248 bpr 536
  }
537
  p2=strstr(p1,"\n:"); if(p2==NULL) p2=p1+strlen(p1);
538
  else *p2=0;
539
  p1=find_word_start(p1); strip_trailing_spaces2(p1);
540
  for(p2=p1;*p2;p2++) if(*p2=='\n') *p2=' ';
15375 bpr 541
  ovlstrcpy(gsindbuf[s_information],p1);
15440 bpr 542
  ovlstrcpy(gsindbuf[SHEETINDEX_NO],revmod[serial].name);
12248 bpr 543
  return 0;
10 reyssat 544
}
545
 
15375 bpr 546
int glossary_index(int serial)
547
{
15440 bpr 548
  char nbuf[MAX_LINELEN+1],fbuf[MAX_LINELEN+1], ibuf[MAX_LINELEN+1],*p,*s;
15375 bpr 549
  FILE *indf;
550
  int i,l;
15440 bpr 551
  s=lang[revmod[serial].lang];
552
  p=strchr(nbuf,'/');
553
  if(p != NULL) p=strchr(p+1,'/');
554
  if(p != NULL) {p[1]=s[0];p[2]=s[1];}
555
  snprintf(fbuf,sizeof(fbuf),"%s/%s",glossarydir,revmod[serial].name);
15375 bpr 556
  indf=fopen(fbuf,"r");
557
  l=fread(ibuf,1,MAX_LINELEN,indf); fclose(indf);
558
  if(l>0 && l<MAX_LINELEN) ibuf[l]=0; else return -1;
559
  for(i=0;i<SHEETINDEX_NO;i++) {
560
    _getdef(ibuf,glindex[i],gsindbuf[i]);
561
  }
15482 bpr 562
  s=gsindbuf[s_keywords]+strlen(gsindbuf[s_keywords]);
563
  *s++ = ',';
564
  ovlstrcpy(s,revmod[serial].keywords);
565
  backslash(revmod[serial].keywords);
15440 bpr 566
  ovlstrcpy(gsindbuf[SHEETINDEX_NO],nbuf);
15375 bpr 567
  return 0;
568
}
569
 
10 reyssat 570
unsigned char categories[16];
571
char taken[MAX_LINELEN+1];
572
int catcnt, takenlen, tweight;
573
 
15394 bpr 574
/* file management for appenditem */
575
#define MAX_FILES (MAX_LANGS*catno)
576
 
577
char *fnames[MAX_FILES];
578
FILE *files[MAX_FILES];
579
int open_files;
580
 
581
FILE * file_from_list(char *name){
582
  int i, l = 0, r = open_files;
583
  while (r>l){
584
    int m = (l+r)/2;
585
    int cmp = strcmp(name,fnames[m]);
586
    if (!cmp) return files[m];
587
    if (cmp < 0) r = m; else l = m+1;
588
  }
589
  for (i=open_files; i > l; i--) {files[i]=files[i-1]; fnames[i]=fnames[i-1];}
590
  fnames[l] = xmalloc(MAX_FNAME);
591
  ovlstrcpy(fnames[l],name);
592
  open_files++;
593
  return files[l]=fopen(name,"a");
594
}
595
 
10 reyssat 596
void appenditem(char *word, int lind, int serial, int weight, char *l)
597
{
12248 bpr 598
  char nbuf[MAX_LINELEN+1], buf[MAX_LINELEN+1];
599
  int i, ll;
600
  char *p;
601
  FILE *f;
6881 bpr 602
 
12248 bpr 603
  if(!isalnum(*word) || (ll=strlen(word))<2 ||
604
     wordchr2(taken,word)!=NULL ||
605
     wordchr2(ignore[lind],word)!=NULL ||
606
     takenlen>=MAX_LINELEN-ll-16)
607
    return;
608
  if(ll==2 && (!isdigit(word[0]) || !isalpha(word[1]))) return;
609
  for(p=word;*p;p++) if(!isalnum(*p) && *p!=' ') return;
610
  taken[takenlen++]=' '; taken[takenlen++]=' ';
611
  ovlstrcpy(taken+takenlen,word);
612
  takenlen+=ll; tweight+=weight;
613
  snprintf(buf,sizeof(buf),"%s:%d?%d\n",word,serial,weight);
614
  for(i=0;i<catcnt;i++) {
6884 bpr 615
    snprintf(nbuf,sizeof(nbuf),"%s/%c.%s",
12248 bpr 616
       outdir,categories[i],lang[lind]);
15394 bpr 617
    f = file_from_list(nbuf);
618
    if(f!=NULL) {fputs(buf,f);}
12248 bpr 619
  }
10 reyssat 620
}
621
 
6881 bpr 622
void appenditem1 (char *buf, int lind, int serial, int weight, char *l )
623
{
624
  char *p1, *p2 ;
625
  for(p1=find_word_start(buf); *p1;
6884 bpr 626
    p1=find_word_start(p2)) {
627
    p2=strchr(p1,',');
628
    if(p2!=NULL) *p2++=0; else p2=p1+strlen(p1);
629
    if(strlen(p1)<=0) continue;
630
    appenditem(p1,lind,serial,weight,module_language);
6881 bpr 631
  }
632
}
633
void appenditem2 (char *buf, int lind, int serial, int weight, char *l )
634
{
635
  char *p1, *p2 ;
636
  for(p1=find_word_start(buf);*p1;
12248 bpr 637
      p1=find_word_start(p2)) {
6884 bpr 638
    p2=find_word_end(p1); if(*p2) *p2++=0;
639
    appenditem(p1,lind,serial,weight,module_language);
6881 bpr 640
  }
641
}
10 reyssat 642
void onemodule(const char *name, int serial, int lind)
643
{
12248 bpr 644
  int i;
645
  unsigned char trlist[]={
646
  i_title,i_description,i_category,i_domain,i_keywords,
647
  i_require,i_author,
648
  i_keywords_ca,i_keywords_en,i_keywords_fr,i_keywords_it,i_keywords_nl,
649
  i_title_ca,i_title_en,i_title_fr,i_title_it,i_title_nl
650
  };
15375 bpr 651
  int trcnt=sizeof(trlist)/sizeof(trlist[0]);
15777 georgesk 652
  char *p1, *p2, *pp, *q, buf[15*MAX_LINELEN+15], lbuf[16];
12248 bpr 653
  FILE *f;
6881 bpr 654
 
12248 bpr 655
  if(module_index(name)) return;
656
  towords(indbuf[i_category]);
7915 bpr 657
/*   list the categories (among A=all,X=eXercise,O,D,...) corresponding
6884 bpr 658
 *   to this module
659
 */
12248 bpr 660
  for(i=catcnt=0;i<catno && catcnt<16;i++) {
15380 bpr 661
    if(wordchr2(indbuf[i_category],cat[i].name)!=NULL)
662
      categories[catcnt++]=cat[i].typ;
12248 bpr 663
  }
664
  if(catcnt==0) return;
665
  if(categories[0]!=cat[0].typ)
666
    categories[catcnt++]=cat[0].typ;
6884 bpr 667
/*  write module's name in the category.language files, for instance lists/X.fr
668
 * for french exercises
669
 */
12248 bpr 670
  for(i=0;i<catcnt;i++) {
671
    snprintf(buf,sizeof(buf),"%s/%s/%c.%s",
672
       outdir,mlistbase,categories[i],lang[lind]);
673
    f=fopen(buf,"a");
674
    if(f!=NULL) {fprintf(f,"%s\n",name); fclose(f);}
675
  }
6884 bpr 676
/*   add serial number and language (resp.title, ...) to corresponding file  */
12248 bpr 677
  fprintf(langf,"%d:%s\n",serial,module_language);
678
  fprintf(titf,"%d:%s\n",serial,indbuf[i_title]);
16987 bpr 679
  if(indbuf[i_title_ca][0]!=0)
680
    fprintf(titf_ca,"%d:%s\n",serial,indbuf[i_title_ca]);
681
  else
682
    fprintf(titf_ca,"%d:%s\n",serial,indbuf[i_title]);
683
  if(indbuf[i_title_en][0]!=0)
684
    fprintf(titf_en,"%d:%s\n",serial,indbuf[i_title_en]);
685
  else
686
    fprintf(titf_en,"%d:%s\n",serial,indbuf[i_title]);
687
  if(indbuf[i_title_es][0]!=0)
688
    fprintf(titf_es,"%d:%s\n",serial,indbuf[i_title_es]);
689
  else
690
    fprintf(titf_es,"%d:%s\n",serial,indbuf[i_title]);
691
  if(indbuf[i_title_fr][0]!=0)
692
    fprintf(titf_fr,"%d:%s\n",serial,indbuf[i_title_fr]);
693
  else
694
    fprintf(titf_fr,"%d:%s\n",serial,indbuf[i_title]);
695
  if(indbuf[i_title_it][0]!=0)
696
    fprintf(titf_it,"%d:%s\n",serial,indbuf[i_title_it]);
697
  else
698
    fprintf(titf_it,"%d:%s\n",serial,indbuf[i_title]);
699
  if(indbuf[i_title_nl][0]!=0)
700
    fprintf(titf_nl,"%d:%s\n",serial,indbuf[i_title_nl]);
701
  else
702
    fprintf(titf_nl,"%d:%s\n",serial,indbuf[i_title]);
703
 
12248 bpr 704
  fprintf(descf,"%d:%s\n",serial,indbuf[i_description]);
705
  fprintf(authorf,"%d:%s\n",serial,indbuf[i_author]);
706
  fprintf(versionf,"%d:%s\n",serial,indbuf[i_version]);
6881 bpr 707
 
6884 bpr 708
/*   add module's information in html page for robots  */
12248 bpr 709
  snprintf(buf,sizeof(buf),"%s",indbuf[i_description]);
710
  for(pp=strchr(buf,','); pp; pp=strchr(pp,','))
711
    string_modify3(buf,pp,pp+1,"&#44;");
712
  if(strcmp(module_language,lang[lind])==0)
713
    fprintf(robotf,"%s ,%s,%s,%s,%s\n",name,module_language,name,
714
        indbuf[i_title], buf);
6819 reyssat 715
 
6884 bpr 716
/*   Normalize the information of trlist, using dictionary
7915 bpr 717
 *  -- bases/sys/domain.xx without suffix translation (--> english version)
6884 bpr 718
 */
15380 bpr 719
  entrycount=dentrycount; dicbuf=ddicbuf;
720
  memmove(entry,dentry,dentrycount*sizeof(entry[0]));
721
  unknown_type=unk_leave;
722
  for(i=0;i<trcnt;i++) {
723
    detag(indbuf[trlist[i]]);
724
    deaccent2(indbuf[trlist[i]]);
725
    comma(indbuf[trlist[i]]);
726
    singlespace2(indbuf[trlist[i]]);
727
    translate(indbuf[trlist[i]]);
728
  }
6884 bpr 729
/*   Normalize the information, using dictionary
7915 bpr 730
 *   bases/sys/words.xx with suffix translation
6884 bpr 731
 */
15380 bpr 732
  entrycount=mentrycount; dicbuf=mdicbuf;
733
  memmove(entry,mentry,mentrycount*sizeof(entry[0]));
734
  unknown_type=unk_leave;/*  used in translator_.c */
735
  for(i=0;i<trcnt;i++) {
736
  suffix_translate(indbuf[trlist[i]]);
737
  translate(indbuf[trlist[i]]);
738
  }
6881 bpr 739
 
740
/* taken contains all words already seen in the module index */
15380 bpr 741
  taken[0]=0; takenlen=tweight=0;
6881 bpr 742
/*  append words of title  */
15380 bpr 743
  ovlstrcpy(buf,indbuf[i_title]); towords(buf);
744
  appenditem2(buf,lind,serial,4,module_language);
6881 bpr 745
 
6884 bpr 746
/*  extract words of every other information except level */
15380 bpr 747
  snprintf(buf,sizeof(buf),"%s %s %s %s %s %s %s %s %s %s %s %s %s %s %s",
748
    indbuf[i_description],indbuf[i_keywords],
749
    indbuf[i_keywords_ca],indbuf[i_keywords_en],indbuf[i_keywords_fr],
750
    indbuf[i_keywords_it],indbuf[i_keywords_nl],
751
    indbuf[i_title_ca],indbuf[i_title_en],indbuf[i_title_fr],
752
    indbuf[i_title_it],indbuf[i_title_nl],
753
    indbuf[i_domain],indbuf[i_require],indbuf[i_author]);
754
  towords(buf);
755
  appenditem2(buf,lind,serial,2,module_language);
6881 bpr 756
 
6884 bpr 757
/*   this time the dictionary is the group dictionary  sys/wgrp/wgrp
758
 *   with a g (groupdic), not an m (maindic) . see below main, suffix, group.
7915 bpr 759
 *   and delete unknown ?? and translate
6884 bpr 760
 */
12248 bpr 761
  entrycount=gentrycount; dicbuf=gdicbuf;
762
  memmove(entry,gentry,gentrycount*sizeof(entry[0]));
6881 bpr 763
 
15380 bpr 764
/* append words of every title information  */
12248 bpr 765
  ovlstrcpy(buf,indbuf[i_title]);
766
  unknown_type=unk_delete;
767
  translate(buf);
768
  appenditem1(buf,lind,serial,2,module_language);
6881 bpr 769
 
15380 bpr 770
/* append words of information of description except level  */
12248 bpr 771
  snprintf(buf,sizeof(buf),"%s", indbuf[i_description]);
772
  unknown_type=unk_delete;
773
  translate(buf);
774
  appenditem1(buf,lind,serial,4,module_language);
6881 bpr 775
 
15380 bpr 776
/* append words (or group of words) of keywords and domain  */
12248 bpr 777
  snprintf(buf,sizeof(buf),"%s, %s, %s, %s, %s, %s, %s",
15380 bpr 778
    indbuf[i_domain],indbuf[i_keywords],
779
    indbuf[i_keywords_ca], indbuf[i_keywords_en],indbuf[i_keywords_fr],
780
    indbuf[i_keywords_it], indbuf[i_keywords_nl]);
12248 bpr 781
  unknown_type=unk_leave;
782
  translate(buf);
783
  appenditem1(buf,lind,serial,2,module_language);
6881 bpr 784
 
15380 bpr 785
/* append level information, with weight 2 */
12248 bpr 786
  snprintf(buf,sizeof(buf),"%s",indbuf[i_level]);
787
  ovlstrcpy(lbuf,"level");
788
  for(p1=buf; *p1; p1++) if(!isalnum(*p1)) *p1=' ';
789
  q=buf+strlen(buf);
15380 bpr 790
  for(p1=find_word_start(buf); (*p1) && (p1 < q) ; p1=find_word_start(p2)) {
12248 bpr 791
    p2=find_word_end(p1);
792
    if(p2!=NULL) *p2++=0; else p2=p1+strlen(p1);
793
    if(strncmp(p1, "Lang" , p2-p1) &&
794
     (!isalpha(*p1) ||
795
     (!isdigit(*(p1+1)) && *(p1+1)!=0) ||
796
     (*(p1+1)!=0 && *(p1+2)!=0)))
797
       continue;
798
    *p1=tolower(*p1);
799
    ovlstrcpy(lbuf+strlen("level"),p1);
800
    appenditem(lbuf,lind,serial,2,module_language);
801
  }
6884 bpr 802
/*   append total weight of module to weight file site2/weight.xx  */
12248 bpr 803
  fprintf(weightf,"%d:%d\n",serial,tweight);
10 reyssat 804
}
805
 
806
void modules(void)
807
{
12248 bpr 808
  int i,j,k,d;
809
  char namebuf[MAX_LINELEN+1];
810
  char mdic[MAX_LINELEN+1], sdic[MAX_LINELEN+1], gdic[MAX_LINELEN+1], ddic[MAX_LINELEN+1];
10 reyssat 811
 
12248 bpr 812
  for(j=0;j<langcnt;j++) {
6884 bpr 813
    snprintf(namebuf,sizeof(namebuf),"%s/weight.%s",outdir,lang[j]);
814
    weightf=fopen(namebuf,"w");
815
    snprintf(mdic,sizeof(mdic),"%s/%s.%s",dicdir,maindic,lang[j]);
816
    snprintf(sdic,sizeof(sdic),"%s/%s.%s",dicdir,suffixdic,lang[j]);
817
    snprintf(gdic,sizeof(gdic),"%s/%s.%s",dicdir,groupdic,lang[j]);
818
    snprintf(ddic,sizeof(ddic),"%s/%s.%s",dicdir,domaindic,lang[j]);
819
    suffix_dic(sdic); prepare_dic(gdic);
820
    gdicbuf=dicbuf; gentrycount=entrycount;
821
    memmove(gentry,entry,gentrycount*sizeof(entry[0]));
822
    prepare_dic(mdic);
823
    mdicbuf=dicbuf; mentrycount=entrycount;
824
    memmove(mentry,entry,mentrycount*sizeof(entry[0]));
825
    prepare_dic(ddic);
826
    ddicbuf=dicbuf; dentrycount=entrycount;
827
    memmove(dentry,entry,dentrycount*sizeof(entry[0]));
828
    unknown_type=unk_leave; translate(ignore[j]);
829
    for(i=0;i<modcnt;i++) {
12248 bpr 830
      if(mod[i].langcnt>0) {
15336 bpr 831
      /* look for another language */
6884 bpr 832
        for(d=k=0;k<mod[i].langcnt;k++)
15336 bpr 833
          if(mod[i].langs[k]<mod[i].langs[d]) d=k;
6884 bpr 834
        for(k=0;k<mod[i].langcnt && mod[i].langs[k]!=j;k++);
835
        if(k>=mod[i].langcnt) k=d;
836
        snprintf(namebuf,MAX_LINELEN,"%s.%s",mod[i].name,
12248 bpr 837
           lang[mod[i].langs[k]]);
6884 bpr 838
        onemodule(namebuf,mod[i].counts[k],j);
12248 bpr 839
      }
840
      else {
6884 bpr 841
        onemodule(mod[i].name,mod[i].counts[0],j);
12248 bpr 842
      }
10 reyssat 843
    }
6884 bpr 844
    if(mentrycount>0) free(mdicbuf);
845
    if(gentrycount>0) free(gdicbuf);
846
    if(suffixcnt>0) free(sufbuf);
847
    if(dentrycount>0) free(ddicbuf);
848
    if(weightf) fclose(weightf);
12248 bpr 849
  }
10 reyssat 850
}
15394 bpr 851
void clean(void)
852
{
853
  int i;
854
  for (i = 0; i < open_files; i++) fclose(files[i]);
855
  fclose(langf); fclose(titf); fclose(descf); fclose(robotf);
856
  fclose(authorf); fclose(versionf);
16987 bpr 857
  fclose(titf_fr); fclose(titf_it);fclose(titf_es);fclose(titf_nl);
858
  fclose(titf_ca);fclose(titf_en);
15394 bpr 859
}
10 reyssat 860
 
6881 bpr 861
/* FIXME ? differences with appenditem - use fprintf instead of  snprintf */
10 reyssat 862
void sappenditem(char *word, int lind, int serial, int weight)
863
{
12248 bpr 864
  int ll;
865
  char *p;
6881 bpr 866
 
12248 bpr 867
  if(!isalnum(*word) || (ll=strlen(word))<2 ||
868
     wordchr2(taken,word)!=NULL ||
869
     wordchr2(ignore[lind],word)!=NULL ||
870
     takenlen>=MAX_LINELEN-ll-16)
871
    return;
872
  if(ll==2 && (!isdigit(word[0]) || !isalpha(word[1]))) return;
873
  for(p=word;*p;p++) if(!isalnum(*p) && *p!=' ') return;
874
  taken[takenlen++]=' ';taken[takenlen++]=' ';
875
  ovlstrcpy(taken+takenlen,word);
876
  takenlen+=ll; tweight+=weight;
877
  fprintf(indf,"%s:%d?%d\n",word,serial,weight);
10 reyssat 878
}
15380 bpr 879
/* onesg / onemodule are similar */
15375 bpr 880
void onesg(int serial, int lind, int index(int))
10 reyssat 881
{
12248 bpr 882
  int i;
883
  unsigned char trlist[]={
15375 bpr 884
    s_title,s_description,s_domain,s_keywords,s_information
12248 bpr 885
  };
15380 bpr 886
  int trcnt=sizeof(trlist)/sizeof(trlist[0]);
15778 georgesk 887
  char *p1, *p2, *q, buf[4*MAX_LINELEN+4], lbuf[16];
6881 bpr 888
 
15375 bpr 889
  if(index(serial)) return;
890
  fprintf(titf,"%d:%s\n",serial,gsindbuf[s_title]);
891
  fprintf(descf,"%d:%s\n",serial,gsindbuf[s_description]);
892
  fprintf(remf,"%d:%s\n",serial,gsindbuf[s_information]);
7915 bpr 893
 
15380 bpr 894
/*   Normalize the information of trlist, using dictionary
895
 *  -- bases/sys/domain.xx without suffix translation (--> english version)
896
 */
12248 bpr 897
  entrycount=dentrycount; dicbuf=ddicbuf;
898
  memmove(entry,dentry,dentrycount*sizeof(entry[0]));
899
  unknown_type=unk_leave;
15380 bpr 900
  for(i=0;i<trcnt;i++) {
15375 bpr 901
    detag(gsindbuf[trlist[i]]);
902
    deaccent2(gsindbuf[trlist[i]]);
903
    comma(gsindbuf[trlist[i]]);
904
    singlespace2(gsindbuf[trlist[i]]);
905
    translate(gsindbuf[trlist[i]]);
12248 bpr 906
  }
15380 bpr 907
/*   Normalize the information, using dictionary
908
 *   bases/sys/words.xx with suffix translation
909
 */
12248 bpr 910
  entrycount=mentrycount; dicbuf=mdicbuf;
911
  memmove(entry,mentry,mentrycount*sizeof(entry[0]));
15380 bpr 912
  unknown_type=unk_leave;/*  used in translator_.c */
913
  for(i=0;i<trcnt;i++) {
15375 bpr 914
    suffix_translate(gsindbuf[trlist[i]]);
915
    translate(gsindbuf[trlist[i]]);
12248 bpr 916
  }
15380 bpr 917
 
918
/* taken contains all words already seen in the module index */
12248 bpr 919
  taken[0]=0; takenlen=tweight=0;
15380 bpr 920
/*  append words of title  */
15375 bpr 921
  ovlstrcpy(buf,gsindbuf[s_title]); towords(buf);
12248 bpr 922
  for(p1=find_word_start(buf);*p1;
923
      p1=find_word_start(p2)) {
6884 bpr 924
    p2=find_word_end(p1); if(*p2) *p2++=0;
925
    sappenditem(p1,lind,serial,4);
12248 bpr 926
  }
15380 bpr 927
 
928
/*  extract words of every other information except level */
12248 bpr 929
  snprintf(buf,sizeof(buf),"%s %s %s %s",
15375 bpr 930
         gsindbuf[s_description],gsindbuf[s_keywords],
931
         gsindbuf[s_domain],gsindbuf[s_information]);
12248 bpr 932
  towords(buf);
15375 bpr 933
  for(p1=find_word_start(buf);*p1;p1=find_word_start(p2)) {
934
    p2=find_word_end(p1); if(*p2) *p2++=0;
935
    sappenditem(p1,lind,serial,2);
12248 bpr 936
  }
15380 bpr 937
/*   this time the dictionary is the group dictionary  sys/wgrp/wgrp
938
 *   with a g (groupdic), not an m (maindic) . see below main, suffix, group.
939
 *   and delete unknown ?? and translate
940
 */
12248 bpr 941
  entrycount=gentrycount; dicbuf=gdicbuf;
942
  memmove(entry,gentry,gentrycount*sizeof(entry[0]));
15380 bpr 943
 
944
/*  append words of every title information  */
945
  ovlstrcpy(buf,gsindbuf[s_title]);
12248 bpr 946
  unknown_type=unk_delete;
15380 bpr 947
  translate(buf);
15375 bpr 948
  for(p1=find_word_start(buf); *p1; p1=find_word_start(p2)) {
949
    p2=strchr(p1,',');
950
    if(p2!=NULL) *p2++=0; else p2=p1+strlen(p1);
951
    if(strlen(p1)<=0) continue;
952
    sappenditem(p1,lind,serial,4);
12248 bpr 953
  }
15380 bpr 954
 
955
/*  append words (or group of words) of keywords and domain  */
12248 bpr 956
  snprintf(buf,sizeof(buf),"%s, %s",
15375 bpr 957
       gsindbuf[s_keywords],
958
       gsindbuf[s_domain]);
15380 bpr 959
  unknown_type=unk_leave;
12248 bpr 960
  translate(buf);
15380 bpr 961
  for(p1=find_word_start(buf); *p1; p1=find_word_start(p2)) {
6884 bpr 962
    p2=strchr(p1,',');
963
    if(p2!=NULL) *p2++=0; else p2=p1+strlen(p1);
964
    if(strlen(p1)<=0) continue;
965
    sappenditem(p1,lind,serial,2);
12248 bpr 966
  }
15380 bpr 967
 
968
/*   append level information, with weight 2 */
969
  snprintf(buf,sizeof(buf),"%s",gsindbuf[s_level]);
970
  ovlstrcpy(lbuf,"level");
971
  for(p1=buf; *p1; p1++) if(!isalnum(*p1)) *p1=' ';
972
  q=buf+strlen(buf);
973
  for(p1=find_word_start(buf); (*p1) && (p1 < q) ;
974
  p1=find_word_start(p2)) {
975
    p2=find_word_end(p1);
976
    if(p2!=NULL) *p2++=0; else p2=p1+strlen(p1);
977
    if(strncmp(p1, "Lang" , p2-p1) &&
978
        (!isalpha(*p1) || (!isdigit(*(p1+1))) ||
979
        (*(p1+1)!=0 && *(p1+2)!=0)))
980
      continue;
981
    *p1=tolower(*p1);
982
    ovlstrcpy(lbuf+strlen("level"),p1);
983
    sappenditem(lbuf,lind,serial,2);
984
  }
985
/*   append total weight of module to weight file site2/weight.xx  */
12248 bpr 986
  fprintf(weightf,"%d:%d\n",serial,tweight);
10 reyssat 987
}
988
 
15375 bpr 989
void sgs(char *outdir, int index(int))
10 reyssat 990
{
15440 bpr 991
  int i,j,k,d;
12248 bpr 992
  char mdic[MAX_LINELEN+1], sdic[MAX_LINELEN+1], gdic[MAX_LINELEN+1], ddic[MAX_LINELEN+1];
993
  char buf[MAX_LINELEN+1];
7915 bpr 994
 
15442 bpr 995
  //snprintf(buf,sizeof(buf),"%s/list",outdir);
996
  //listf=fopen(buf,"w");
15440 bpr 997
  snprintf(buf,sizeof(buf),"%s/title",outdir);
998
  titf=fopen(buf,"w");
999
  snprintf(buf,sizeof(buf),"%s/description",outdir);
1000
  descf=fopen(buf,"w");
1001
  snprintf(buf,sizeof(buf),"%s/information",outdir);
1002
  remf=fopen(buf,"w");
15442 bpr 1003
  if(!remf || !descf || !titf ) {
15440 bpr 1004
    fprintf(stderr,"modind: error creating output files for %s.\n",outdir); exit(1);
1005
  }
12248 bpr 1006
  for(j=0;j<langcnt;j++) {
15375 bpr 1007
    snprintf(buf,sizeof(buf),"%s/%s",outdir,lang[j]);
12248 bpr 1008
    indf=fopen(buf,"w");
15375 bpr 1009
    snprintf(buf,sizeof(buf),"%s/weight.%s",outdir,lang[j]);
12248 bpr 1010
    weightf=fopen(buf,"w");
15440 bpr 1011
    if(!weightf || !indf ) {
1012
      fprintf(stderr,"modind: error creating output files for %s.\n",outdir); exit(1);
9090 bpr 1013
    }
6884 bpr 1014
    snprintf(mdic,sizeof(mdic),"%s/%s.%s",dicdir,maindic,lang[j]);
1015
    snprintf(sdic,sizeof(sdic),"%s/%s.%s",dicdir,suffixdic,lang[j]);
1016
    snprintf(gdic,sizeof(gdic),"%s/%s.%s",dicdir,groupdic,lang[j]);
6961 bpr 1017
    snprintf(ddic,sizeof(ddic),"%s/%s.%s",dicdir,domaindic,lang[j]);
6884 bpr 1018
    suffix_dic(sdic); prepare_dic(gdic);
1019
    gdicbuf=dicbuf; gentrycount=entrycount;
1020
    memmove(gentry,entry,gentrycount*sizeof(entry[0]));
1021
    prepare_dic(mdic);
1022
    mdicbuf=dicbuf; mentrycount=entrycount;
1023
    memmove(mentry,entry,mentrycount*sizeof(entry[0]));
6973 bpr 1024
    prepare_dic(ddic);
1025
    ddicbuf=dicbuf; dentrycount=entrycount;
1026
    memmove(dentry,entry,dentrycount*sizeof(entry[0]));
6884 bpr 1027
    unknown_type=unk_leave; translate(ignore[j]);
15440 bpr 1028
    for(i=0;i<modcnt;i++)
1029
      if(mod[i].langcnt>0) {
1030
      /* look for another language */
1031
        for(d=k=0;k<mod[i].langcnt;k++)
1032
          if(mod[i].langs[k]<mod[i].langs[d]) d=k;
1033
        for(k=0;k<mod[i].langcnt && mod[i].langs[k]!=j;k++);
1034
        if(k>=mod[i].langcnt) k=d;
1035
        onesg(mod[i].counts[k],mod[i].langs[k],index);
1036
      }
6884 bpr 1037
    if(mentrycount>0) free(mdicbuf);
1038
    if(gentrycount>0) free(gdicbuf);
1039
    if(suffixcnt>0) free(sufbuf);
6961 bpr 1040
    if(dentrycount>0) free(ddicbuf);
15440 bpr 1041
    fclose(indf); fclose(weightf);
12248 bpr 1042
  }
15444 bpr 1043
  fclose(titf); fclose(descf); fclose(remf);
10 reyssat 1044
}
1045
 
1046
int main()
1047
{
12248 bpr 1048
  gentry=xmalloc(entry_size);
1049
  dentry=xmalloc(entry_size);
1050
  mentry=xmalloc(entry_size);
15442 bpr 1051
  init();
12248 bpr 1052
  prep();
1053
  if(modcnt>0) modules();
1054
  clean();
1055
  sprep();
15375 bpr 1056
  if(modcnt>0) sgs(sheetoutdir,sheet_index);
1057
  gprep();
1058
  if(modcnt>0) sgs(glossaryoutdir,glossary_index);
12248 bpr 1059
  return 0;
10 reyssat 1060
}