Subversion Repositories wimsdev

Rev

Rev 17024 | Details | Compare with Previous | Last modification | View Log | RSS feed

Rev Author Line No. Line
10 reyssat 1
/*    Copyright (C) 1998-2003 XIAO, Gang of Universite de Nice - Sophia Antipolis
2
 *
3
 *  This program is free software; you can redistribute it and/or modify
4
 *  it under the terms of the GNU General Public License as published by
5
 *  the Free Software Foundation; either version 2 of the License, or
6
 *  (at your option) any later version.
7
 *
8
 *  This program is distributed in the hope that it will be useful,
9
 *  but WITHOUT ANY WARRANTY; without even the implied warranty of
10
 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
11
 *  GNU General Public License for more details.
12
 *
13
 *  You should have received a copy of the GNU General Public License
14
 *  along with this program; if not, write to the Free Software
15
 *  Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
16
 */
17
 
6884 bpr 18
/*  This is an internal program,
7915 bpr 19
 * used to index modules for search engine.
6884 bpr 20
 */
10 reyssat 21
 
8100 bpr 22
#include "../Lib/libwims.h"
8123 bpr 23
#include "translator_.h"
24
#include "suffix.h"
10 reyssat 25
 
6884 bpr 26
#define MAX_LANGS    MAX_LANGUAGES
27
#define MAX_MODULES    65536
28
char *moduledir=    "public_html/modules";
29
char *sheetdir=     "public_html/bases/sheet";
15375 bpr 30
char *glossarydir=  "public_html/scripts/data/glossary";
6884 bpr 31
char *dicdir=       "public_html/bases";
32
char *outdir=       "public_html/bases/site2";
9090 bpr 33
char *sheetoutdir=  "public_html/bases/sheet/index";
15375 bpr 34
char *glossaryoutdir=  "public_html/scripts/data/glossary/index";
6884 bpr 35
char *maindic=      "sys/words";
36
char *groupdic=     "sys/wgrp/wgrp";
37
char *suffixdic=    "sys/suffix";
38
char *domaindic=    "sys/domaindic";
39
char *ignoredic=    "sys/indignore";
40
char *conffile=     "log/wims.conf";
9092 bpr 41
char *mlistbase=    "lists";
10 reyssat 42
 
43
char lang[MAX_LANGS][4]={
1792 bpr 44
    "en","fr","cn","es","it","nl","si","ca","pt"
10 reyssat 45
};
6884 bpr 46
#define DEFAULT_LANGCNT    6
10 reyssat 47
char allang[MAX_LANGS][4]={
6564 bpr 48
    "en","fr","cn","es","it","nl","de","si","ca","pt"
10 reyssat 49
};
50
#define allangcnt 8
51
char ignore[MAX_LANGS][MAX_LINELEN+1];
52
char mlistfile[MAX_LANGS][256];
53
int langcnt;
15444 bpr 54
FILE *langf, *titf, *descf, *weightf, *robotf, *indf, *listf, *addrf, *serialf, *authorf, *versionf, *remf;
16987 bpr 55
FILE *titf_ca,*titf_en,*titf_es,*titf_fr,*titf_it,*titf_nl;
10 reyssat 56
struct cat {
57
    char *name;
58
    char typ;
59
} cat[]={
6884 bpr 60
    {"all_types", 'A'},
61
    {"exercise",  'X'},
62
    {"oef",       'O'},
63
    {"tool",      'T'},
64
    {"recreation",'R'},
65
    {"reference", 'Y'},
66
    {"document",  'D'},
67
    {"popup",     'P'},
68
    {"datamodule",'M'}
10 reyssat 69
};
70
#define catno (sizeof(cat)/sizeof(cat[0]))
71
 
72
struct mod {
73
    char *name;
74
    unsigned char langs[MAX_LANGS];
75
    int counts[MAX_LANGS];
15440 bpr 76
    int langcnt;
10 reyssat 77
} mod[MAX_MODULES];
15440 bpr 78
 
79
// serial-> the name of the module indexed by serial, lang and its classe
80
struct revmod {
81
    char name[MAX_MODULELEN+1];
82
    int lang;
83
    int imod;
15482 bpr 84
    char keywords[MAX_FNAME];
15440 bpr 85
} revmod[MAX_MODULES];
10 reyssat 86
int modcnt;
87
 
88
char *mlist;
15442 bpr 89
char *sheetindex[]={
90
  "title", "description",
91
  "duration", "severity",
92
  "level", "domain",
93
  "keywords", "reserved1", "reserved2", "information"
94
};
95
/* correspond to the order of sheetindex */
96
char *glindex[]={
97
  "gl_title", "gl_description",
98
  "", "",
99
  "gl_level", "gl_domain",
100
  "gl_keywords","","",""};
10 reyssat 101
 
15442 bpr 102
#define SHEETINDEX_NO (sizeof(sheetindex)/sizeof(sheetindex[0]))
103
char gsindbuf[SHEETINDEX_NO+1][MAX_LINELEN+1];
104
 
105
/* do not modify the order, correspond to the order in the sheet file */
106
enum{s_title, s_description,
107
      s_duration, s_severity,
108
      s_level, s_domain,
109
      s_keywords, s_reserved1, s_reserved2,
110
      s_information
111
};
112
 
113
char *modindex[]={
114
  "title", "description",
115
  "author", "address", "copyright",
116
  "version", "wims_version", "language",
117
  "category", "level", "domain", "keywords",
118
  "keywords_ca", "keywords_en", "keywords_fr", "keywords_it", "keywords_nl",
16987 bpr 119
  "title_ca", "title_en", "title_es", "title_fr", "title_it", "title_nl",
15442 bpr 120
  "require"
121
};
122
#define MODINDEX_NO (sizeof(modindex)/sizeof(modindex[0]))
123
char indbuf[MODINDEX_NO][MAX_LINELEN+1];
124
enum{i_title, i_description,
125
  i_author,i_address,i_copyright,
126
  i_version,i_wims_version,i_language,
127
  i_category,i_level,i_domain,i_keywords,
128
  i_keywords_ca,i_keywords_en,i_keywords_fr,i_keywords_it,i_keywords_nl,
16987 bpr 129
  i_title_ca,i_title_en,i_title_es,i_title_fr,i_title_it,i_title_nl,
15442 bpr 130
  i_require
131
};
132
 
133
char *module_special_file[]={
134
  "intro","help","about"
135
};
136
#define MODSPEC_NO (sizeof(module_special_file)/sizeof(module_special_file[0]))
137
char module_language[4];
138
 
139
char *mdicbuf, *gdicbuf, *ddicbuf, *gentry, *mentry, *dentry;
140
 
141
int gentrycount, mentrycount, dentrycount;
142
 
143
 
6884 bpr 144
/*  fold known accented letters to unaccented, other strange characters to space
7915 bpr 145
 *  apostrophe is among the exceptions to be kept (important for multi-word expressions)
6884 bpr 146
 */
8100 bpr 147
void deaccent2(char *p)
10 reyssat 148
{
12248 bpr 149
  char *sp;
150
  char *v;
151
  for(sp=p;*sp;sp++) {
152
  if(*sp<0 && (v=strchr(acctab,*sp))!=NULL)
153
    *sp=*(deatab+(v-acctab));
154
  if(!isalnum(*sp) && strchr(",.&$+*",*sp)==0) *sp=' ';
155
  else *sp=tolower(*sp);
156
  }
10 reyssat 157
}
158
 
6884 bpr 159
/*  translate everything non-alphanumeric into space */
10 reyssat 160
void towords(char *p)
161
{
12248 bpr 162
  char *pp;
163
  for(pp=p;*pp;pp++) if(!isalnum(*pp) && strchr("&$+*",*pp)==0) *pp=' ';
10 reyssat 164
}
165
 
6884 bpr 166
/*  Find first occurrence of word */
8100 bpr 167
char *wordchr2(char *p, char *w)
10 reyssat 168
{
12248 bpr 169
  char *r;
10 reyssat 170
 
12248 bpr 171
  for(r=strstr(p,w);r!=NULL &&
6884 bpr 172
    ( (r>p && !isspace(*(r-1))) || (!isspace(*(r+strlen(w))) && *(r+strlen(w))!=0) );
12248 bpr 173
  r=strstr(r+1,w));
174
  return r;
10 reyssat 175
}
176
 
177
char *find_tag_end(char *p)
178
{
12248 bpr 179
  char *pp;
180
  pp=p; if(*pp=='<') pp++;
181
  for(; *pp && *pp!='>'; pp++) {
6884 bpr 182
    if(*pp=='<') {
12248 bpr 183
      pp=find_tag_end(pp)-1; continue;
10 reyssat 184
    }
6884 bpr 185
    if(*pp=='"') {
12248 bpr 186
      pp=strchr(pp+1,'"');
187
      if(pp==NULL) return p+strlen(p); else continue;
6884 bpr 188
    }
189
    if(*pp=='\'') {
12248 bpr 190
      pp=strchr(pp+1,'\'');
191
      if(pp==NULL) return p+strlen(p); else continue;
6884 bpr 192
    }
12248 bpr 193
  }
194
  if(*pp=='>') pp++;
195
  return pp;
10 reyssat 196
}
197
 
198
char *find_tag(char *p, char *tag)
199
{
12248 bpr 200
  char *pp;
201
  int len;
202
  len=strlen(tag);
203
  for(pp=strchr(p,'<'); pp!=NULL && *pp; pp=strchr(pp+1,'<')) {
6884 bpr 204
    if(strncasecmp(pp+1,tag,len)==0 && !isalnum(*(pp+1+len))) return pp;
12248 bpr 205
  }
206
  return p+strlen(p);
10 reyssat 207
}
208
 
6884 bpr 209
/*  remove all html tags */
10 reyssat 210
void detag(char *p)
211
{
12248 bpr 212
  char *pp, *p2;
213
  for(pp=strchr(p,'<'); pp!=NULL; pp=strchr(pp,'<')) {
6884 bpr 214
    p2=find_tag_end(pp);
215
    if(*p2==0) {*pp=0; return; }
216
    ovlstrcpy(pp,p2);
12248 bpr 217
  }
10 reyssat 218
}
219
 
6819 reyssat 220
/* add a space after comma to see end of words */
221
 
222
void comma(char *p)
223
{
12248 bpr 224
  char *pp;
225
  for(pp=strchr(p,','); pp; pp=strchr(pp+1,','))
226
    string_modify3(p,pp,pp+1,", ");
6819 reyssat 227
}
15482 bpr 228
/* replace / by , */
17038 bpr 229
void slash2comma(char *p)
15482 bpr 230
{
231
  char *pp;
232
  for(pp=strchr(p,'/'); pp; pp=strchr(pp+1,'/'))
233
    string_modify3(p,pp,pp+1,",");
234
}
15375 bpr 235
/* _getdef from lines.c except the error msg*/
10 reyssat 236
void _getdef(char buf[], char *name, char value[])
237
{
15375 bpr 238
  char *p1, *p2, *p3, *p4;
10 reyssat 239
 
15375 bpr 240
  if(*name==0) goto nothing;      /* this would create segfault. */
12248 bpr 241
  for(p1=strstr(buf,name); p1!=NULL; p1=strstr(p1+1,name)) {
6884 bpr 242
    p2=find_word_start(p1+strlen(name));
243
    if((p1>buf && !isspace(*(p1-1))) || *p2!='=') continue;
15375 bpr 244
    p3=p1; while(p3>buf && *(p3-1)!='\n') p3--;
245
    p3=find_word_start(p3);
246
    if(p3<p1 && *p3!='!') continue;
247
    if(p3<p1) {
248
      p3++; p4=find_word_end(p3);
249
      if(find_word_start(p4)!=p1) continue;
250
      if(p4-p3!=3 || (strncmp(p3,"set",3)!=0 &&
251
           strncmp(p3,"let",3)!=0 &&
252
           strncmp(p3,"def",3)!=0)) {
253
        if(p4-p3!=6 || strncmp(p3,"define",6)!=0) continue;
254
      }
255
    }
256
    p2++;p3=strchr(p2,'\n'); if(p3==NULL) p3=p2+strlen(p2);
257
    p2=find_word_start(p2);
258
    if(p2>p3) goto nothing;
259
    /*if(p3-p2>=MAX_LINELEN) user_error("cmd_output_too_long");*/
260
    memmove(value,p2,p3-p2); value[p3-p2]=0;
261
    strip_trailing_spaces(value); return;
12248 bpr 262
  }
15375 bpr 263
nothing:
15394 bpr 264
  value[0]=0;
10 reyssat 265
}
266
 
6884 bpr 267
/*  Get variable definition from a file.
268
 * Result stored in buffer value of length MAX_LINELEN.
269
 */
10 reyssat 270
void getdef(char *fname, char *name, char value[])
271
{
12248 bpr 272
  FILE *f;
273
  char *buf;
274
  int l;
6881 bpr 275
 
12248 bpr 276
  value[0]=0;
277
  f=fopen(fname,"r"); if(f==NULL) return;
278
  fseek(f,0,SEEK_END); l=ftell(f); fseek(f,0,SEEK_SET);
279
  buf=xmalloc(l+256); l=fread(buf,1,l,f);
280
  fclose(f);
281
  if(l<=0) return; else buf[l]=0;
282
  _getdef(buf,name,value);
283
  free(buf);
10 reyssat 284
}
285
 
15442 bpr 286
void init(void)
10 reyssat 287
{
12248 bpr 288
  char buf[MAX_LINELEN+1];
15442 bpr 289
  char *p1,*p2,*s;
290
  int i,l;
12248 bpr 291
  FILE *f;
6881 bpr 292
 
12248 bpr 293
  s=getenv("modind_outdir"); if(s!=NULL && *s!=0) outdir=s;
294
  s=getenv("modind_sheetdir"); if(s!=NULL && *s!=0) sheetdir=s;
295
  s=getenv("modind_sheetoutdir"); if(s!=NULL && *s!=0) sheetoutdir=s;
15442 bpr 296
  s=getenv("modind_glossaryoutdir"); if(s!=NULL && *s!=0) glossaryoutdir=s;
6884 bpr 297
/* take the langs declared in conffile */
12248 bpr 298
  getdef(conffile,"site_languages",buf);
15442 bpr 299
  langcnt=0;
12248 bpr 300
  for(p1=buf;*p1;p1++) if(!isalnum(*p1)) *p1=' ';
301
  for(p1=find_word_start(buf); *p1 && langcnt<MAX_LANGS; p1=find_word_start(p2)) {
6884 bpr 302
    p2=find_word_end(p1);
303
    if(p2!=p1+2 || !isalpha(*p1) || !isalpha(*(p1+1))) continue;
304
    memmove(lang[langcnt],p1,2); lang[langcnt++][2]=0;
12248 bpr 305
  }
306
  if(langcnt==0) {/*  default languages */
6884 bpr 307
    langcnt=DEFAULT_LANGCNT;
12248 bpr 308
  }
309
  for(i=0;i<langcnt;i++) {
310
    snprintf(buf,sizeof(buf),"%s/%s.%s",dicdir,ignoredic,lang[i]);
311
    f=fopen(buf,"r"); if(f==NULL) continue;
312
    l=fread(ignore[i],1,MAX_LINELEN,f);fclose(f);
313
    if(l<0 || l>=MAX_LINELEN) l=0;
314
    ignore[i][l]=0;
315
  }
15442 bpr 316
}
317
/*  Preparation of data */
318
void prep(void)
319
{
320
  char buf[MAX_LINELEN+1];
321
  char *p1,*p2,*s,*old;
322
  int i,l,thislang,t;
15444 bpr 323
  modcnt=0; old="";
15442 bpr 324
  snprintf(buf,sizeof(buf),"%s/addr",outdir);
325
  addrf=fopen(buf,"w");
326
  if(!addrf) { fprintf(stderr,"modind: error creating output files addr.\n"); exit(1);}
327
  snprintf(buf,sizeof(buf),"%s/serial",outdir);
328
  serialf=fopen(buf,"w");
329
  if(!serialf) { fprintf(stderr,"modind: error creating output files serial.\n"); exit(1);}
330
 
331
  s=getenv("mlist"); if(s==NULL) exit(1);
332
  l=strlen(s); if(l<0 || l>100*MAX_LINELEN) exit(1);
15444 bpr 333
  mlist=xmalloc(l+16); ovlstrcpy(mlist,s);
15442 bpr 334
 
12248 bpr 335
  for(t=0, p1=find_word_start(mlist); *p1 && modcnt<MAX_MODULES;
9090 bpr 336
        p1=find_word_start(p2), t++) {
12248 bpr 337
    p2=find_word_end(p1);
338
    l=p2-p1; if(*p2) *p2++=0;
339
    fprintf(addrf,"%d:%s\n",t,p1);
340
    fprintf(serialf,"%s:%d\n",p1,t);
341
    thislang=-1;
6564 bpr 342
/* language is taken from the address */
12248 bpr 343
    if(l>3 && p1[l-3]=='.') {
344
      for(i=0;i<langcnt;i++) if(strcasecmp(lang[i],p1+l-2)==0) break;
345
      if(i<langcnt) {p1[l-3]=0; thislang=i;}
346
      else {/*  unknown language, not referenced */
6884 bpr 347
        continue;
9090 bpr 348
      }
12248 bpr 349
    }
350
    if(modcnt>0 && strcmp(old,p1)==0 && thislang>=0) {
351
      if(mod[modcnt-1].langcnt<langcnt) {
6884 bpr 352
        mod[modcnt-1].langs[mod[modcnt-1].langcnt]=thislang;
353
        mod[modcnt-1].counts[mod[modcnt-1].langcnt]=t;
354
        (mod[modcnt-1].langcnt)++;
12248 bpr 355
      }
6884 bpr 356
    }
12248 bpr 357
    else {
358
      mod[modcnt].name=old=p1;
359
      if(thislang>=0) {
360
        mod[modcnt].langs[0]=thislang;
361
        mod[modcnt].langcnt=1;
362
      }
17038 bpr 363
      else
364
        mod[modcnt].langcnt=0;
12248 bpr 365
      mod[modcnt].counts[0]=t;
366
      modcnt++;
6884 bpr 367
    }
12248 bpr 368
  }
369
  snprintf(buf,sizeof(buf),"%s/language",outdir);
370
  langf=fopen(buf,"w");
371
  snprintf(buf,sizeof(buf),"%s/title",outdir);
372
  titf=fopen(buf,"w");
16987 bpr 373
  snprintf(buf,sizeof(buf),"%s/title_ca",outdir);
374
  titf_ca=fopen(buf,"w");
375
  snprintf(buf,sizeof(buf),"%s/title_en",outdir);
376
  titf_en=fopen(buf,"w");
377
  snprintf(buf,sizeof(buf),"%s/title_es",outdir);
378
  titf_es=fopen(buf,"w");
379
  snprintf(buf,sizeof(buf),"%s/title_fr",outdir);
380
  titf_fr=fopen(buf,"w");
381
  snprintf(buf,sizeof(buf),"%s/title_it",outdir);
382
  titf_it=fopen(buf,"w");
383
  snprintf(buf,sizeof(buf),"%s/title_nl",outdir);
384
  titf_nl=fopen(buf,"w");
12248 bpr 385
  snprintf(buf,sizeof(buf),"%s/description",outdir);
386
  descf=fopen(buf,"w");
387
  snprintf(buf,sizeof(buf),"%s/author",outdir);
388
  authorf=fopen(buf,"w");
389
  snprintf(buf,sizeof(buf),"%s/version",outdir);
390
  versionf=fopen(buf,"w");
391
  snprintf(buf,sizeof(buf),"%s/%s/robot.phtml",outdir,mlistbase);
392
  robotf=fopen(buf,"w");
393
  fclose(addrf); fclose(serialf);
16987 bpr 394
  if(!robotf || !versionf || !authorf || !descf || !titf
395
    || !titf_ca || !titf_en || !titf_es || !titf_fr || !titf_it || !titf_nl
396
    || !langf) {
12248 bpr 397
    fprintf(stderr,"modind: error creating output files.\n");
398
    exit(1);
399
  }
10 reyssat 400
}
401
 
402
void sprep(void)
403
{
15440 bpr 404
  char buf[MAX_LINELEN+1];
12248 bpr 405
  char *p1,*p2,*s;
15440 bpr 406
  int i,l,t,thislang;
6881 bpr 407
 
12248 bpr 408
  modcnt=0;
15440 bpr 409
  snprintf(buf,sizeof(buf),"%s/addr",sheetoutdir);
410
  addrf=fopen(buf,"w");
411
  if(!addrf) { fprintf(stderr,"modind: error creating output files addr.\n"); exit(1);}
412
  snprintf(buf,sizeof(buf),"%s/serial",sheetoutdir);
413
  serialf=fopen(buf,"w");
12248 bpr 414
  s=getenv("slist"); if(s==NULL) return;
415
  l=strlen(s); if(l<0 || l>100*MAX_LINELEN) return;
416
  mlist=xmalloc(l+16); ovlstrcpy(mlist,s);
15440 bpr 417
  for(t=0,p1=find_word_start(mlist); *p1 && modcnt<MAX_MODULES; p1=find_word_start(p2),t++) {
6884 bpr 418
    p2=find_word_end(p1);
419
    l=p2-p1; if(*p2) *p2++=0;
420
    for(i=0;i<langcnt;i++) if(strncasecmp(lang[i],p1,2)==0) break;
421
    if(i<langcnt) thislang=i; else continue;
15440 bpr 422
    ovlstrcpy(revmod[t].name,p1);
423
    revmod[t].lang=thislang;
17038 bpr 424
    revmod[t].imod=modcnt;
6884 bpr 425
    mod[modcnt].name=p1;
426
    mod[modcnt].langs[0]=thislang;
427
    mod[modcnt].langcnt=1;
17024 bpr 428
    mod[modcnt].counts[0]=t;
15440 bpr 429
    fprintf(addrf,"%d:%s\n",modcnt,p1);
430
    fprintf(serialf,"%s:%d\n",p1,modcnt);
15539 bpr 431
    modcnt++;
12248 bpr 432
  }
17024 bpr 433
  fclose(addrf); fclose(serialf);
10 reyssat 434
}
435
 
15375 bpr 436
void gprep(void)
437
{
15440 bpr 438
  char buf[MAX_LINELEN+1];
439
  char *p1,*p2,*s,*old;
440
  int l,i,t,thislang;
15444 bpr 441
  modcnt=0; old="";
15440 bpr 442
  snprintf(buf,sizeof(buf),"%s/addr",glossaryoutdir);
443
  addrf=fopen(buf,"w");
444
  if(!addrf) { fprintf(stderr,"modind: error creating output files addr.\n"); exit(1);}
445
  snprintf(buf,sizeof(buf),"%s/serial",glossaryoutdir);
446
  serialf=fopen(buf,"w");
15375 bpr 447
  s=getenv("glist"); if(s==NULL) return;
448
  l=strlen(s); if(l<0 || l>100*MAX_LINELEN) return;
449
  mlist=xmalloc(l+16); ovlstrcpy(mlist,s);
15440 bpr 450
  for(t=0,p1=find_word_start(mlist); *p1 && modcnt<MAX_MODULES; p1=find_word_start(p2),t++) {
15375 bpr 451
    p2=find_word_end(p1);
452
    if(*p2) *p2++=0;
15440 bpr 453
    fprintf(addrf,"%d:%s\n",t,p1);
454
    fprintf(serialf,"%s:%d\n",p1,t);
455
    ovlstrcpy(revmod[t].name,p1);
15482 bpr 456
    ovlstrcpy(revmod[t].keywords,p1);
15375 bpr 457
    s=strchr(p1,'/');
458
    if(s != NULL) s=strchr(s+1,'/');
459
    if(s==NULL) {
460
      fprintf(stderr,"modind: no language %s\n",p1); exit(1);
461
    }
15482 bpr 462
    revmod[t].keywords[s-p1]=0;
15375 bpr 463
    s++;
464
    for(i=0;i<langcnt;i++) if(strncasecmp(lang[i],s,2)==0) break;
15440 bpr 465
    thislang = i<langcnt ? i : -1;
466
    revmod[t].lang=i;
467
    s[0]=s[1]='x';
468
    if(modcnt>0 && strcmp(old,p1)==0 && thislang >= 0) {
469
      if(mod[modcnt-1].langcnt<langcnt) {
470
        mod[modcnt-1].langs[mod[modcnt-1].langcnt]=thislang;
471
        mod[modcnt-1].counts[mod[modcnt-1].langcnt]=t;
472
        (mod[modcnt-1].langcnt)++;
473
      }
474
      revmod[t].imod=modcnt-1;
475
    }
476
    else {
477
      mod[modcnt].name=old=p1;
478
      if(thislang>=0) {
479
        mod[modcnt].langs[0]=thislang;
480
        mod[modcnt].langcnt=1;
481
      }
482
      else mod[modcnt].langcnt=0;
483
      mod[modcnt].counts[0]=t;
484
      revmod[t].imod=modcnt;
485
      modcnt++;
486
    }
15375 bpr 487
  }
15440 bpr 488
  fclose(addrf); fclose(serialf);
15375 bpr 489
}
490
 
6884 bpr 491
/*  read and treat module's INDEX file */
10 reyssat 492
int module_index(const char *name)
493
{
12248 bpr 494
  char *p, fbuf[MAX_LINELEN+1], ibuf[MAX_LINELEN+1];
495
  FILE *indf;
496
  int i,l;
10 reyssat 497
 
12248 bpr 498
  snprintf(fbuf,sizeof(fbuf),"%s/%s/INDEX",moduledir,name);
499
  indf=fopen(fbuf,"r");
500
  if(indf==NULL) {
501
    fprintf(stderr,"modind: INDEX of %s not found\n",fbuf); return -1;
502
  }
503
  l=fread(ibuf,1,MAX_LINELEN,indf); fclose(indf);
504
  if(l>0 && l<MAX_LINELEN) ibuf[l]=0; else return -1;
6884 bpr 505
/* treate all fields in *modindex */
12248 bpr 506
  for(i=0;i<MODINDEX_NO;i++) {
507
    _getdef(ibuf,modindex[i],indbuf[i]);
6884 bpr 508
/*  compatibility precaution */
12248 bpr 509
    if(indbuf[i][0]==':') indbuf[i][0]='.';
510
  }
511
  p=find_word_start(indbuf[i_language]);
512
  if(isalpha(*p) && isalpha(*(p+1))) {
513
    memmove(module_language,p,2); module_language[2]=0;
514
  }
515
  else ovlstrcpy(module_language,"en");
516
  return 0;
10 reyssat 517
}
518
 
519
int sheet_index(int serial)
520
{
12248 bpr 521
  char *p1, *p2, fbuf[MAX_LINELEN+1], ibuf[MAX_LINELEN+1];
522
  FILE *indf;
523
  int i,l;
10 reyssat 524
 
12248 bpr 525
  snprintf(fbuf,sizeof(fbuf),"%s/%s.def",sheetdir,mod[serial].name);
526
  indf=fopen(fbuf,"r"); if(indf==NULL) return -1;
527
  l=fread(ibuf,1,MAX_LINELEN,indf); fclose(indf);
528
  if(l>0 && l<MAX_LINELEN) ibuf[l]=0; else return -1;
15375 bpr 529
  for(i=0;i<SHEETINDEX_NO;i++) gsindbuf[i][0]=0;
12248 bpr 530
  for(i=0,p1=find_word_start(ibuf);
9090 bpr 531
      i<SHEETINDEX_NO-1 && *p1!=':' && *p1!=0;
532
      i++,p1=p2) {
12248 bpr 533
    p2=strchr(p1,'\n');
534
    if(p2!=NULL) *p2++=0; else p2=p1+strlen(p1);
8100 bpr 535
    p1=find_word_start(p1); strip_trailing_spaces2(p1);
15375 bpr 536
    snprintf(gsindbuf[i],MAX_LINELEN,"%s",p1);
12248 bpr 537
  }
538
  p2=strstr(p1,"\n:"); if(p2==NULL) p2=p1+strlen(p1);
539
  else *p2=0;
540
  p1=find_word_start(p1); strip_trailing_spaces2(p1);
541
  for(p2=p1;*p2;p2++) if(*p2=='\n') *p2=' ';
15375 bpr 542
  ovlstrcpy(gsindbuf[s_information],p1);
15440 bpr 543
  ovlstrcpy(gsindbuf[SHEETINDEX_NO],revmod[serial].name);
12248 bpr 544
  return 0;
10 reyssat 545
}
546
 
15375 bpr 547
int glossary_index(int serial)
548
{
15440 bpr 549
  char nbuf[MAX_LINELEN+1],fbuf[MAX_LINELEN+1], ibuf[MAX_LINELEN+1],*p,*s;
15375 bpr 550
  FILE *indf;
551
  int i,l;
15440 bpr 552
  s=lang[revmod[serial].lang];
553
  p=strchr(nbuf,'/');
554
  if(p != NULL) p=strchr(p+1,'/');
555
  if(p != NULL) {p[1]=s[0];p[2]=s[1];}
556
  snprintf(fbuf,sizeof(fbuf),"%s/%s",glossarydir,revmod[serial].name);
15375 bpr 557
  indf=fopen(fbuf,"r");
558
  l=fread(ibuf,1,MAX_LINELEN,indf); fclose(indf);
559
  if(l>0 && l<MAX_LINELEN) ibuf[l]=0; else return -1;
560
  for(i=0;i<SHEETINDEX_NO;i++) {
561
    _getdef(ibuf,glindex[i],gsindbuf[i]);
562
  }
15482 bpr 563
  s=gsindbuf[s_keywords]+strlen(gsindbuf[s_keywords]);
564
  *s++ = ',';
565
  ovlstrcpy(s,revmod[serial].keywords);
17038 bpr 566
  slash2comma(revmod[serial].keywords);
15440 bpr 567
  ovlstrcpy(gsindbuf[SHEETINDEX_NO],nbuf);
15375 bpr 568
  return 0;
569
}
570
 
10 reyssat 571
unsigned char categories[16];
572
char taken[MAX_LINELEN+1];
573
int catcnt, takenlen, tweight;
574
 
15394 bpr 575
/* file management for appenditem */
576
#define MAX_FILES (MAX_LANGS*catno)
577
 
578
char *fnames[MAX_FILES];
579
FILE *files[MAX_FILES];
580
int open_files;
581
 
582
FILE * file_from_list(char *name){
583
  int i, l = 0, r = open_files;
584
  while (r>l){
585
    int m = (l+r)/2;
586
    int cmp = strcmp(name,fnames[m]);
587
    if (!cmp) return files[m];
588
    if (cmp < 0) r = m; else l = m+1;
589
  }
590
  for (i=open_files; i > l; i--) {files[i]=files[i-1]; fnames[i]=fnames[i-1];}
591
  fnames[l] = xmalloc(MAX_FNAME);
592
  ovlstrcpy(fnames[l],name);
593
  open_files++;
594
  return files[l]=fopen(name,"a");
595
}
596
 
10 reyssat 597
void appenditem(char *word, int lind, int serial, int weight, char *l)
598
{
12248 bpr 599
  char nbuf[MAX_LINELEN+1], buf[MAX_LINELEN+1];
600
  int i, ll;
601
  char *p;
602
  FILE *f;
6881 bpr 603
 
12248 bpr 604
  if(!isalnum(*word) || (ll=strlen(word))<2 ||
605
     wordchr2(taken,word)!=NULL ||
606
     wordchr2(ignore[lind],word)!=NULL ||
607
     takenlen>=MAX_LINELEN-ll-16)
608
    return;
609
  if(ll==2 && (!isdigit(word[0]) || !isalpha(word[1]))) return;
610
  for(p=word;*p;p++) if(!isalnum(*p) && *p!=' ') return;
611
  taken[takenlen++]=' '; taken[takenlen++]=' ';
612
  ovlstrcpy(taken+takenlen,word);
613
  takenlen+=ll; tweight+=weight;
614
  snprintf(buf,sizeof(buf),"%s:%d?%d\n",word,serial,weight);
615
  for(i=0;i<catcnt;i++) {
6884 bpr 616
    snprintf(nbuf,sizeof(nbuf),"%s/%c.%s",
12248 bpr 617
       outdir,categories[i],lang[lind]);
15394 bpr 618
    f = file_from_list(nbuf);
619
    if(f!=NULL) {fputs(buf,f);}
12248 bpr 620
  }
10 reyssat 621
}
622
 
6881 bpr 623
void appenditem1 (char *buf, int lind, int serial, int weight, char *l )
624
{
625
  char *p1, *p2 ;
626
  for(p1=find_word_start(buf); *p1;
6884 bpr 627
    p1=find_word_start(p2)) {
628
    p2=strchr(p1,',');
629
    if(p2!=NULL) *p2++=0; else p2=p1+strlen(p1);
630
    if(strlen(p1)<=0) continue;
631
    appenditem(p1,lind,serial,weight,module_language);
6881 bpr 632
  }
633
}
634
void appenditem2 (char *buf, int lind, int serial, int weight, char *l )
635
{
636
  char *p1, *p2 ;
637
  for(p1=find_word_start(buf);*p1;
12248 bpr 638
      p1=find_word_start(p2)) {
6884 bpr 639
    p2=find_word_end(p1); if(*p2) *p2++=0;
640
    appenditem(p1,lind,serial,weight,module_language);
6881 bpr 641
  }
642
}
10 reyssat 643
void onemodule(const char *name, int serial, int lind)
644
{
12248 bpr 645
  int i;
646
  unsigned char trlist[]={
647
  i_title,i_description,i_category,i_domain,i_keywords,
648
  i_require,i_author,
649
  i_keywords_ca,i_keywords_en,i_keywords_fr,i_keywords_it,i_keywords_nl,
650
  i_title_ca,i_title_en,i_title_fr,i_title_it,i_title_nl
651
  };
15375 bpr 652
  int trcnt=sizeof(trlist)/sizeof(trlist[0]);
15777 georgesk 653
  char *p1, *p2, *pp, *q, buf[15*MAX_LINELEN+15], lbuf[16];
12248 bpr 654
  FILE *f;
6881 bpr 655
 
12248 bpr 656
  if(module_index(name)) return;
657
  towords(indbuf[i_category]);
7915 bpr 658
/*   list the categories (among A=all,X=eXercise,O,D,...) corresponding
6884 bpr 659
 *   to this module
660
 */
12248 bpr 661
  for(i=catcnt=0;i<catno && catcnt<16;i++) {
15380 bpr 662
    if(wordchr2(indbuf[i_category],cat[i].name)!=NULL)
663
      categories[catcnt++]=cat[i].typ;
12248 bpr 664
  }
665
  if(catcnt==0) return;
666
  if(categories[0]!=cat[0].typ)
667
    categories[catcnt++]=cat[0].typ;
6884 bpr 668
/*  write module's name in the category.language files, for instance lists/X.fr
669
 * for french exercises
670
 */
12248 bpr 671
  for(i=0;i<catcnt;i++) {
672
    snprintf(buf,sizeof(buf),"%s/%s/%c.%s",
673
       outdir,mlistbase,categories[i],lang[lind]);
674
    f=fopen(buf,"a");
675
    if(f!=NULL) {fprintf(f,"%s\n",name); fclose(f);}
676
  }
6884 bpr 677
/*   add serial number and language (resp.title, ...) to corresponding file  */
12248 bpr 678
  fprintf(langf,"%d:%s\n",serial,module_language);
679
  fprintf(titf,"%d:%s\n",serial,indbuf[i_title]);
16987 bpr 680
  if(indbuf[i_title_ca][0]!=0)
681
    fprintf(titf_ca,"%d:%s\n",serial,indbuf[i_title_ca]);
682
  else
683
    fprintf(titf_ca,"%d:%s\n",serial,indbuf[i_title]);
684
  if(indbuf[i_title_en][0]!=0)
685
    fprintf(titf_en,"%d:%s\n",serial,indbuf[i_title_en]);
686
  else
687
    fprintf(titf_en,"%d:%s\n",serial,indbuf[i_title]);
688
  if(indbuf[i_title_es][0]!=0)
689
    fprintf(titf_es,"%d:%s\n",serial,indbuf[i_title_es]);
690
  else
691
    fprintf(titf_es,"%d:%s\n",serial,indbuf[i_title]);
692
  if(indbuf[i_title_fr][0]!=0)
693
    fprintf(titf_fr,"%d:%s\n",serial,indbuf[i_title_fr]);
694
  else
695
    fprintf(titf_fr,"%d:%s\n",serial,indbuf[i_title]);
696
  if(indbuf[i_title_it][0]!=0)
697
    fprintf(titf_it,"%d:%s\n",serial,indbuf[i_title_it]);
698
  else
699
    fprintf(titf_it,"%d:%s\n",serial,indbuf[i_title]);
700
  if(indbuf[i_title_nl][0]!=0)
701
    fprintf(titf_nl,"%d:%s\n",serial,indbuf[i_title_nl]);
702
  else
703
    fprintf(titf_nl,"%d:%s\n",serial,indbuf[i_title]);
704
 
12248 bpr 705
  fprintf(descf,"%d:%s\n",serial,indbuf[i_description]);
706
  fprintf(authorf,"%d:%s\n",serial,indbuf[i_author]);
707
  fprintf(versionf,"%d:%s\n",serial,indbuf[i_version]);
6881 bpr 708
 
6884 bpr 709
/*   add module's information in html page for robots  */
12248 bpr 710
  snprintf(buf,sizeof(buf),"%s",indbuf[i_description]);
711
  for(pp=strchr(buf,','); pp; pp=strchr(pp,','))
712
    string_modify3(buf,pp,pp+1,"&#44;");
713
  if(strcmp(module_language,lang[lind])==0)
714
    fprintf(robotf,"%s ,%s,%s,%s,%s\n",name,module_language,name,
715
        indbuf[i_title], buf);
6819 reyssat 716
 
6884 bpr 717
/*   Normalize the information of trlist, using dictionary
7915 bpr 718
 *  -- bases/sys/domain.xx without suffix translation (--> english version)
6884 bpr 719
 */
15380 bpr 720
  entrycount=dentrycount; dicbuf=ddicbuf;
721
  memmove(entry,dentry,dentrycount*sizeof(entry[0]));
722
  unknown_type=unk_leave;
723
  for(i=0;i<trcnt;i++) {
724
    detag(indbuf[trlist[i]]);
725
    deaccent2(indbuf[trlist[i]]);
726
    comma(indbuf[trlist[i]]);
727
    singlespace2(indbuf[trlist[i]]);
728
    translate(indbuf[trlist[i]]);
729
  }
6884 bpr 730
/*   Normalize the information, using dictionary
7915 bpr 731
 *   bases/sys/words.xx with suffix translation
6884 bpr 732
 */
15380 bpr 733
  entrycount=mentrycount; dicbuf=mdicbuf;
734
  memmove(entry,mentry,mentrycount*sizeof(entry[0]));
735
  unknown_type=unk_leave;/*  used in translator_.c */
736
  for(i=0;i<trcnt;i++) {
737
  suffix_translate(indbuf[trlist[i]]);
738
  translate(indbuf[trlist[i]]);
739
  }
6881 bpr 740
 
741
/* taken contains all words already seen in the module index */
15380 bpr 742
  taken[0]=0; takenlen=tweight=0;
6881 bpr 743
/*  append words of title  */
15380 bpr 744
  ovlstrcpy(buf,indbuf[i_title]); towords(buf);
745
  appenditem2(buf,lind,serial,4,module_language);
6881 bpr 746
 
6884 bpr 747
/*  extract words of every other information except level */
15380 bpr 748
  snprintf(buf,sizeof(buf),"%s %s %s %s %s %s %s %s %s %s %s %s %s %s %s",
749
    indbuf[i_description],indbuf[i_keywords],
750
    indbuf[i_keywords_ca],indbuf[i_keywords_en],indbuf[i_keywords_fr],
751
    indbuf[i_keywords_it],indbuf[i_keywords_nl],
752
    indbuf[i_title_ca],indbuf[i_title_en],indbuf[i_title_fr],
753
    indbuf[i_title_it],indbuf[i_title_nl],
754
    indbuf[i_domain],indbuf[i_require],indbuf[i_author]);
755
  towords(buf);
756
  appenditem2(buf,lind,serial,2,module_language);
6881 bpr 757
 
6884 bpr 758
/*   this time the dictionary is the group dictionary  sys/wgrp/wgrp
759
 *   with a g (groupdic), not an m (maindic) . see below main, suffix, group.
7915 bpr 760
 *   and delete unknown ?? and translate
6884 bpr 761
 */
12248 bpr 762
  entrycount=gentrycount; dicbuf=gdicbuf;
763
  memmove(entry,gentry,gentrycount*sizeof(entry[0]));
6881 bpr 764
 
15380 bpr 765
/* append words of every title information  */
12248 bpr 766
  ovlstrcpy(buf,indbuf[i_title]);
767
  unknown_type=unk_delete;
768
  translate(buf);
769
  appenditem1(buf,lind,serial,2,module_language);
6881 bpr 770
 
15380 bpr 771
/* append words of information of description except level  */
12248 bpr 772
  snprintf(buf,sizeof(buf),"%s", indbuf[i_description]);
773
  unknown_type=unk_delete;
774
  translate(buf);
775
  appenditem1(buf,lind,serial,4,module_language);
6881 bpr 776
 
15380 bpr 777
/* append words (or group of words) of keywords and domain  */
12248 bpr 778
  snprintf(buf,sizeof(buf),"%s, %s, %s, %s, %s, %s, %s",
15380 bpr 779
    indbuf[i_domain],indbuf[i_keywords],
780
    indbuf[i_keywords_ca], indbuf[i_keywords_en],indbuf[i_keywords_fr],
781
    indbuf[i_keywords_it], indbuf[i_keywords_nl]);
12248 bpr 782
  unknown_type=unk_leave;
783
  translate(buf);
784
  appenditem1(buf,lind,serial,2,module_language);
6881 bpr 785
 
15380 bpr 786
/* append level information, with weight 2 */
12248 bpr 787
  snprintf(buf,sizeof(buf),"%s",indbuf[i_level]);
788
  ovlstrcpy(lbuf,"level");
789
  for(p1=buf; *p1; p1++) if(!isalnum(*p1)) *p1=' ';
790
  q=buf+strlen(buf);
15380 bpr 791
  for(p1=find_word_start(buf); (*p1) && (p1 < q) ; p1=find_word_start(p2)) {
12248 bpr 792
    p2=find_word_end(p1);
793
    if(p2!=NULL) *p2++=0; else p2=p1+strlen(p1);
794
    if(strncmp(p1, "Lang" , p2-p1) &&
795
     (!isalpha(*p1) ||
796
     (!isdigit(*(p1+1)) && *(p1+1)!=0) ||
797
     (*(p1+1)!=0 && *(p1+2)!=0)))
798
       continue;
799
    *p1=tolower(*p1);
800
    ovlstrcpy(lbuf+strlen("level"),p1);
801
    appenditem(lbuf,lind,serial,2,module_language);
802
  }
6884 bpr 803
/*   append total weight of module to weight file site2/weight.xx  */
12248 bpr 804
  fprintf(weightf,"%d:%d\n",serial,tweight);
10 reyssat 805
}
806
 
807
void modules(void)
808
{
12248 bpr 809
  int i,j,k,d;
810
  char namebuf[MAX_LINELEN+1];
811
  char mdic[MAX_LINELEN+1], sdic[MAX_LINELEN+1], gdic[MAX_LINELEN+1], ddic[MAX_LINELEN+1];
10 reyssat 812
 
12248 bpr 813
  for(j=0;j<langcnt;j++) {
6884 bpr 814
    snprintf(namebuf,sizeof(namebuf),"%s/weight.%s",outdir,lang[j]);
815
    weightf=fopen(namebuf,"w");
816
    snprintf(mdic,sizeof(mdic),"%s/%s.%s",dicdir,maindic,lang[j]);
817
    snprintf(sdic,sizeof(sdic),"%s/%s.%s",dicdir,suffixdic,lang[j]);
818
    snprintf(gdic,sizeof(gdic),"%s/%s.%s",dicdir,groupdic,lang[j]);
819
    snprintf(ddic,sizeof(ddic),"%s/%s.%s",dicdir,domaindic,lang[j]);
820
    suffix_dic(sdic); prepare_dic(gdic);
821
    gdicbuf=dicbuf; gentrycount=entrycount;
822
    memmove(gentry,entry,gentrycount*sizeof(entry[0]));
823
    prepare_dic(mdic);
824
    mdicbuf=dicbuf; mentrycount=entrycount;
825
    memmove(mentry,entry,mentrycount*sizeof(entry[0]));
826
    prepare_dic(ddic);
827
    ddicbuf=dicbuf; dentrycount=entrycount;
828
    memmove(dentry,entry,dentrycount*sizeof(entry[0]));
829
    unknown_type=unk_leave; translate(ignore[j]);
830
    for(i=0;i<modcnt;i++) {
12248 bpr 831
      if(mod[i].langcnt>0) {
15336 bpr 832
      /* look for another language */
6884 bpr 833
        for(d=k=0;k<mod[i].langcnt;k++)
15336 bpr 834
          if(mod[i].langs[k]<mod[i].langs[d]) d=k;
6884 bpr 835
        for(k=0;k<mod[i].langcnt && mod[i].langs[k]!=j;k++);
836
        if(k>=mod[i].langcnt) k=d;
837
        snprintf(namebuf,MAX_LINELEN,"%s.%s",mod[i].name,
12248 bpr 838
           lang[mod[i].langs[k]]);
6884 bpr 839
        onemodule(namebuf,mod[i].counts[k],j);
12248 bpr 840
      }
841
      else {
6884 bpr 842
        onemodule(mod[i].name,mod[i].counts[0],j);
12248 bpr 843
      }
10 reyssat 844
    }
6884 bpr 845
    if(mentrycount>0) free(mdicbuf);
846
    if(gentrycount>0) free(gdicbuf);
847
    if(suffixcnt>0) free(sufbuf);
848
    if(dentrycount>0) free(ddicbuf);
849
    if(weightf) fclose(weightf);
12248 bpr 850
  }
10 reyssat 851
}
15394 bpr 852
void clean(void)
853
{
854
  int i;
855
  for (i = 0; i < open_files; i++) fclose(files[i]);
856
  fclose(langf); fclose(titf); fclose(descf); fclose(robotf);
857
  fclose(authorf); fclose(versionf);
16987 bpr 858
  fclose(titf_fr); fclose(titf_it);fclose(titf_es);fclose(titf_nl);
859
  fclose(titf_ca);fclose(titf_en);
15394 bpr 860
}
10 reyssat 861
 
6881 bpr 862
/* FIXME ? differences with appenditem - use fprintf instead of  snprintf */
10 reyssat 863
void sappenditem(char *word, int lind, int serial, int weight)
864
{
12248 bpr 865
  int ll;
866
  char *p;
6881 bpr 867
 
12248 bpr 868
  if(!isalnum(*word) || (ll=strlen(word))<2 ||
869
     wordchr2(taken,word)!=NULL ||
870
     wordchr2(ignore[lind],word)!=NULL ||
871
     takenlen>=MAX_LINELEN-ll-16)
872
    return;
873
  if(ll==2 && (!isdigit(word[0]) || !isalpha(word[1]))) return;
874
  for(p=word;*p;p++) if(!isalnum(*p) && *p!=' ') return;
875
  taken[takenlen++]=' ';taken[takenlen++]=' ';
876
  ovlstrcpy(taken+takenlen,word);
877
  takenlen+=ll; tweight+=weight;
878
  fprintf(indf,"%s:%d?%d\n",word,serial,weight);
10 reyssat 879
}
15380 bpr 880
/* onesg / onemodule are similar */
15375 bpr 881
void onesg(int serial, int lind, int index(int))
10 reyssat 882
{
12248 bpr 883
  int i;
884
  unsigned char trlist[]={
15375 bpr 885
    s_title,s_description,s_domain,s_keywords,s_information
12248 bpr 886
  };
15380 bpr 887
  int trcnt=sizeof(trlist)/sizeof(trlist[0]);
15778 georgesk 888
  char *p1, *p2, *q, buf[4*MAX_LINELEN+4], lbuf[16];
6881 bpr 889
 
15375 bpr 890
  if(index(serial)) return;
891
  fprintf(titf,"%d:%s\n",serial,gsindbuf[s_title]);
892
  fprintf(descf,"%d:%s\n",serial,gsindbuf[s_description]);
893
  fprintf(remf,"%d:%s\n",serial,gsindbuf[s_information]);
7915 bpr 894
 
15380 bpr 895
/*   Normalize the information of trlist, using dictionary
896
 *  -- bases/sys/domain.xx without suffix translation (--> english version)
897
 */
12248 bpr 898
  entrycount=dentrycount; dicbuf=ddicbuf;
899
  memmove(entry,dentry,dentrycount*sizeof(entry[0]));
900
  unknown_type=unk_leave;
15380 bpr 901
  for(i=0;i<trcnt;i++) {
15375 bpr 902
    detag(gsindbuf[trlist[i]]);
903
    deaccent2(gsindbuf[trlist[i]]);
904
    comma(gsindbuf[trlist[i]]);
905
    singlespace2(gsindbuf[trlist[i]]);
906
    translate(gsindbuf[trlist[i]]);
12248 bpr 907
  }
15380 bpr 908
/*   Normalize the information, using dictionary
909
 *   bases/sys/words.xx with suffix translation
910
 */
12248 bpr 911
  entrycount=mentrycount; dicbuf=mdicbuf;
912
  memmove(entry,mentry,mentrycount*sizeof(entry[0]));
15380 bpr 913
  unknown_type=unk_leave;/*  used in translator_.c */
914
  for(i=0;i<trcnt;i++) {
15375 bpr 915
    suffix_translate(gsindbuf[trlist[i]]);
916
    translate(gsindbuf[trlist[i]]);
12248 bpr 917
  }
15380 bpr 918
 
919
/* taken contains all words already seen in the module index */
12248 bpr 920
  taken[0]=0; takenlen=tweight=0;
15380 bpr 921
/*  append words of title  */
15375 bpr 922
  ovlstrcpy(buf,gsindbuf[s_title]); towords(buf);
12248 bpr 923
  for(p1=find_word_start(buf);*p1;
924
      p1=find_word_start(p2)) {
6884 bpr 925
    p2=find_word_end(p1); if(*p2) *p2++=0;
926
    sappenditem(p1,lind,serial,4);
12248 bpr 927
  }
15380 bpr 928
 
929
/*  extract words of every other information except level */
12248 bpr 930
  snprintf(buf,sizeof(buf),"%s %s %s %s",
15375 bpr 931
         gsindbuf[s_description],gsindbuf[s_keywords],
932
         gsindbuf[s_domain],gsindbuf[s_information]);
12248 bpr 933
  towords(buf);
15375 bpr 934
  for(p1=find_word_start(buf);*p1;p1=find_word_start(p2)) {
935
    p2=find_word_end(p1); if(*p2) *p2++=0;
936
    sappenditem(p1,lind,serial,2);
12248 bpr 937
  }
15380 bpr 938
/*   this time the dictionary is the group dictionary  sys/wgrp/wgrp
939
 *   with a g (groupdic), not an m (maindic) . see below main, suffix, group.
940
 *   and delete unknown ?? and translate
941
 */
12248 bpr 942
  entrycount=gentrycount; dicbuf=gdicbuf;
943
  memmove(entry,gentry,gentrycount*sizeof(entry[0]));
15380 bpr 944
 
945
/*  append words of every title information  */
946
  ovlstrcpy(buf,gsindbuf[s_title]);
12248 bpr 947
  unknown_type=unk_delete;
15380 bpr 948
  translate(buf);
15375 bpr 949
  for(p1=find_word_start(buf); *p1; p1=find_word_start(p2)) {
950
    p2=strchr(p1,',');
951
    if(p2!=NULL) *p2++=0; else p2=p1+strlen(p1);
952
    if(strlen(p1)<=0) continue;
953
    sappenditem(p1,lind,serial,4);
12248 bpr 954
  }
15380 bpr 955
 
956
/*  append words (or group of words) of keywords and domain  */
12248 bpr 957
  snprintf(buf,sizeof(buf),"%s, %s",
15375 bpr 958
       gsindbuf[s_keywords],
959
       gsindbuf[s_domain]);
15380 bpr 960
  unknown_type=unk_leave;
12248 bpr 961
  translate(buf);
15380 bpr 962
  for(p1=find_word_start(buf); *p1; p1=find_word_start(p2)) {
6884 bpr 963
    p2=strchr(p1,',');
964
    if(p2!=NULL) *p2++=0; else p2=p1+strlen(p1);
965
    if(strlen(p1)<=0) continue;
966
    sappenditem(p1,lind,serial,2);
12248 bpr 967
  }
15380 bpr 968
 
969
/*   append level information, with weight 2 */
970
  snprintf(buf,sizeof(buf),"%s",gsindbuf[s_level]);
971
  ovlstrcpy(lbuf,"level");
972
  for(p1=buf; *p1; p1++) if(!isalnum(*p1)) *p1=' ';
973
  q=buf+strlen(buf);
974
  for(p1=find_word_start(buf); (*p1) && (p1 < q) ;
975
  p1=find_word_start(p2)) {
976
    p2=find_word_end(p1);
977
    if(p2!=NULL) *p2++=0; else p2=p1+strlen(p1);
978
    if(strncmp(p1, "Lang" , p2-p1) &&
979
        (!isalpha(*p1) || (!isdigit(*(p1+1))) ||
980
        (*(p1+1)!=0 && *(p1+2)!=0)))
981
      continue;
982
    *p1=tolower(*p1);
983
    ovlstrcpy(lbuf+strlen("level"),p1);
984
    sappenditem(lbuf,lind,serial,2);
985
  }
986
/*   append total weight of module to weight file site2/weight.xx  */
12248 bpr 987
  fprintf(weightf,"%d:%d\n",serial,tweight);
10 reyssat 988
}
989
 
15375 bpr 990
void sgs(char *outdir, int index(int))
10 reyssat 991
{
15440 bpr 992
  int i,j,k,d;
12248 bpr 993
  char mdic[MAX_LINELEN+1], sdic[MAX_LINELEN+1], gdic[MAX_LINELEN+1], ddic[MAX_LINELEN+1];
994
  char buf[MAX_LINELEN+1];
7915 bpr 995
 
15442 bpr 996
  //snprintf(buf,sizeof(buf),"%s/list",outdir);
997
  //listf=fopen(buf,"w");
15440 bpr 998
  snprintf(buf,sizeof(buf),"%s/title",outdir);
999
  titf=fopen(buf,"w");
1000
  snprintf(buf,sizeof(buf),"%s/description",outdir);
1001
  descf=fopen(buf,"w");
1002
  snprintf(buf,sizeof(buf),"%s/information",outdir);
1003
  remf=fopen(buf,"w");
15442 bpr 1004
  if(!remf || !descf || !titf ) {
15440 bpr 1005
    fprintf(stderr,"modind: error creating output files for %s.\n",outdir); exit(1);
1006
  }
12248 bpr 1007
  for(j=0;j<langcnt;j++) {
15375 bpr 1008
    snprintf(buf,sizeof(buf),"%s/%s",outdir,lang[j]);
12248 bpr 1009
    indf=fopen(buf,"w");
15375 bpr 1010
    snprintf(buf,sizeof(buf),"%s/weight.%s",outdir,lang[j]);
12248 bpr 1011
    weightf=fopen(buf,"w");
15440 bpr 1012
    if(!weightf || !indf ) {
1013
      fprintf(stderr,"modind: error creating output files for %s.\n",outdir); exit(1);
9090 bpr 1014
    }
6884 bpr 1015
    snprintf(mdic,sizeof(mdic),"%s/%s.%s",dicdir,maindic,lang[j]);
1016
    snprintf(sdic,sizeof(sdic),"%s/%s.%s",dicdir,suffixdic,lang[j]);
1017
    snprintf(gdic,sizeof(gdic),"%s/%s.%s",dicdir,groupdic,lang[j]);
6961 bpr 1018
    snprintf(ddic,sizeof(ddic),"%s/%s.%s",dicdir,domaindic,lang[j]);
6884 bpr 1019
    suffix_dic(sdic); prepare_dic(gdic);
1020
    gdicbuf=dicbuf; gentrycount=entrycount;
1021
    memmove(gentry,entry,gentrycount*sizeof(entry[0]));
1022
    prepare_dic(mdic);
1023
    mdicbuf=dicbuf; mentrycount=entrycount;
1024
    memmove(mentry,entry,mentrycount*sizeof(entry[0]));
6973 bpr 1025
    prepare_dic(ddic);
1026
    ddicbuf=dicbuf; dentrycount=entrycount;
1027
    memmove(dentry,entry,dentrycount*sizeof(entry[0]));
6884 bpr 1028
    unknown_type=unk_leave; translate(ignore[j]);
15440 bpr 1029
    for(i=0;i<modcnt;i++)
1030
      if(mod[i].langcnt>0) {
1031
      /* look for another language */
1032
        for(d=k=0;k<mod[i].langcnt;k++)
1033
          if(mod[i].langs[k]<mod[i].langs[d]) d=k;
1034
        for(k=0;k<mod[i].langcnt && mod[i].langs[k]!=j;k++);
1035
        if(k>=mod[i].langcnt) k=d;
1036
        onesg(mod[i].counts[k],mod[i].langs[k],index);
1037
      }
6884 bpr 1038
    if(mentrycount>0) free(mdicbuf);
1039
    if(gentrycount>0) free(gdicbuf);
1040
    if(suffixcnt>0) free(sufbuf);
6961 bpr 1041
    if(dentrycount>0) free(ddicbuf);
15440 bpr 1042
    fclose(indf); fclose(weightf);
12248 bpr 1043
  }
15444 bpr 1044
  fclose(titf); fclose(descf); fclose(remf);
10 reyssat 1045
}
1046
 
1047
int main()
1048
{
12248 bpr 1049
  gentry=xmalloc(entry_size);
1050
  dentry=xmalloc(entry_size);
1051
  mentry=xmalloc(entry_size);
15442 bpr 1052
  init();
12248 bpr 1053
  prep();
1054
  if(modcnt>0) modules();
1055
  clean();
1056
  sprep();
15375 bpr 1057
  if(modcnt>0) sgs(sheetoutdir,sheet_index);
1058
  gprep();
1059
  if(modcnt>0) sgs(glossaryoutdir,glossary_index);
12248 bpr 1060
  return 0;
10 reyssat 1061
}