Subversion Repositories wimsdev

Rev

Rev 15778 | Rev 17024 | Go to most recent revision | Details | Compare with Previous | Last modification | View Log | RSS feed

Rev Author Line No. Line
10 reyssat 1
/*    Copyright (C) 1998-2003 XIAO, Gang of Universite de Nice - Sophia Antipolis
2
 *
3
 *  This program is free software; you can redistribute it and/or modify
4
 *  it under the terms of the GNU General Public License as published by
5
 *  the Free Software Foundation; either version 2 of the License, or
6
 *  (at your option) any later version.
7
 *
8
 *  This program is distributed in the hope that it will be useful,
9
 *  but WITHOUT ANY WARRANTY; without even the implied warranty of
10
 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
11
 *  GNU General Public License for more details.
12
 *
13
 *  You should have received a copy of the GNU General Public License
14
 *  along with this program; if not, write to the Free Software
15
 *  Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
16
 */
17
 
6884 bpr 18
/*  This is an internal program,
7915 bpr 19
 * used to index modules for search engine.
6884 bpr 20
 */
10 reyssat 21
 
8100 bpr 22
#include "../Lib/libwims.h"
8123 bpr 23
#include "translator_.h"
24
#include "suffix.h"
10 reyssat 25
 
6884 bpr 26
#define MAX_LANGS    MAX_LANGUAGES
27
#define MAX_MODULES    65536
28
char *moduledir=    "public_html/modules";
29
char *sheetdir=     "public_html/bases/sheet";
15375 bpr 30
char *glossarydir=  "public_html/scripts/data/glossary";
6884 bpr 31
char *dicdir=       "public_html/bases";
32
char *outdir=       "public_html/bases/site2";
9090 bpr 33
char *sheetoutdir=  "public_html/bases/sheet/index";
15375 bpr 34
char *glossaryoutdir=  "public_html/scripts/data/glossary/index";
6884 bpr 35
char *maindic=      "sys/words";
36
char *groupdic=     "sys/wgrp/wgrp";
37
char *suffixdic=    "sys/suffix";
38
char *domaindic=    "sys/domaindic";
39
char *ignoredic=    "sys/indignore";
40
char *conffile=     "log/wims.conf";
9092 bpr 41
char *mlistbase=    "lists";
10 reyssat 42
 
43
char lang[MAX_LANGS][4]={
1792 bpr 44
    "en","fr","cn","es","it","nl","si","ca","pt"
10 reyssat 45
};
6884 bpr 46
#define DEFAULT_LANGCNT    6
10 reyssat 47
char allang[MAX_LANGS][4]={
6564 bpr 48
    "en","fr","cn","es","it","nl","de","si","ca","pt"
10 reyssat 49
};
50
#define allangcnt 8
51
char ignore[MAX_LANGS][MAX_LINELEN+1];
52
char mlistfile[MAX_LANGS][256];
53
int langcnt;
15444 bpr 54
FILE *langf, *titf, *descf, *weightf, *robotf, *indf, *listf, *addrf, *serialf, *authorf, *versionf, *remf;
16987 bpr 55
FILE *titf_ca,*titf_en,*titf_es,*titf_fr,*titf_it,*titf_nl;
10 reyssat 56
struct cat {
57
    char *name;
58
    char typ;
59
} cat[]={
6884 bpr 60
    {"all_types", 'A'},
61
    {"exercise",  'X'},
62
    {"oef",       'O'},
63
    {"tool",      'T'},
64
    {"recreation",'R'},
65
    {"reference", 'Y'},
66
    {"document",  'D'},
67
    {"popup",     'P'},
68
    {"datamodule",'M'}
10 reyssat 69
};
70
#define catno (sizeof(cat)/sizeof(cat[0]))
71
 
72
struct mod {
73
    char *name;
74
    unsigned char langs[MAX_LANGS];
75
    int counts[MAX_LANGS];
15440 bpr 76
    int langcnt;
10 reyssat 77
} mod[MAX_MODULES];
15440 bpr 78
 
79
// serial-> the name of the module indexed by serial, lang and its classe
80
struct revmod {
81
    char name[MAX_MODULELEN+1];
82
    int lang;
83
    int imod;
15482 bpr 84
    char keywords[MAX_FNAME];
15440 bpr 85
} revmod[MAX_MODULES];
10 reyssat 86
int modcnt;
87
 
88
char *mlist;
15442 bpr 89
char *sheetindex[]={
90
  "title", "description",
91
  "duration", "severity",
92
  "level", "domain",
93
  "keywords", "reserved1", "reserved2", "information"
94
};
95
/* correspond to the order of sheetindex */
96
char *glindex[]={
97
  "gl_title", "gl_description",
98
  "", "",
99
  "gl_level", "gl_domain",
100
  "gl_keywords","","",""};
10 reyssat 101
 
15442 bpr 102
#define SHEETINDEX_NO (sizeof(sheetindex)/sizeof(sheetindex[0]))
103
char gsindbuf[SHEETINDEX_NO+1][MAX_LINELEN+1];
104
 
105
/* do not modify the order, correspond to the order in the sheet file */
106
enum{s_title, s_description,
107
      s_duration, s_severity,
108
      s_level, s_domain,
109
      s_keywords, s_reserved1, s_reserved2,
110
      s_information
111
};
112
 
113
char *modindex[]={
114
  "title", "description",
115
  "author", "address", "copyright",
116
  "version", "wims_version", "language",
117
  "category", "level", "domain", "keywords",
118
  "keywords_ca", "keywords_en", "keywords_fr", "keywords_it", "keywords_nl",
16987 bpr 119
  "title_ca", "title_en", "title_es", "title_fr", "title_it", "title_nl",
15442 bpr 120
  "require"
121
};
122
#define MODINDEX_NO (sizeof(modindex)/sizeof(modindex[0]))
123
char indbuf[MODINDEX_NO][MAX_LINELEN+1];
124
enum{i_title, i_description,
125
  i_author,i_address,i_copyright,
126
  i_version,i_wims_version,i_language,
127
  i_category,i_level,i_domain,i_keywords,
128
  i_keywords_ca,i_keywords_en,i_keywords_fr,i_keywords_it,i_keywords_nl,
16987 bpr 129
  i_title_ca,i_title_en,i_title_es,i_title_fr,i_title_it,i_title_nl,
15442 bpr 130
  i_require
131
};
132
 
133
char *module_special_file[]={
134
  "intro","help","about"
135
};
136
#define MODSPEC_NO (sizeof(module_special_file)/sizeof(module_special_file[0]))
137
char module_language[4];
138
 
139
char *mdicbuf, *gdicbuf, *ddicbuf, *gentry, *mentry, *dentry;
140
 
141
int gentrycount, mentrycount, dentrycount;
142
 
143
 
6884 bpr 144
/*  fold known accented letters to unaccented, other strange characters to space
7915 bpr 145
 *  apostrophe is among the exceptions to be kept (important for multi-word expressions)
6884 bpr 146
 */
8100 bpr 147
void deaccent2(char *p)
10 reyssat 148
{
12248 bpr 149
  char *sp;
150
  char *v;
151
  for(sp=p;*sp;sp++) {
152
  if(*sp<0 && (v=strchr(acctab,*sp))!=NULL)
153
    *sp=*(deatab+(v-acctab));
154
  if(!isalnum(*sp) && strchr(",.&$+*",*sp)==0) *sp=' ';
155
  else *sp=tolower(*sp);
156
  }
10 reyssat 157
}
158
 
6884 bpr 159
/*  translate everything non-alphanumeric into space */
10 reyssat 160
void towords(char *p)
161
{
12248 bpr 162
  char *pp;
163
  for(pp=p;*pp;pp++) if(!isalnum(*pp) && strchr("&$+*",*pp)==0) *pp=' ';
10 reyssat 164
}
165
 
6884 bpr 166
/*  Find first occurrence of word */
8100 bpr 167
char *wordchr2(char *p, char *w)
10 reyssat 168
{
12248 bpr 169
  char *r;
10 reyssat 170
 
12248 bpr 171
  for(r=strstr(p,w);r!=NULL &&
6884 bpr 172
    ( (r>p && !isspace(*(r-1))) || (!isspace(*(r+strlen(w))) && *(r+strlen(w))!=0) );
12248 bpr 173
  r=strstr(r+1,w));
174
  return r;
10 reyssat 175
}
176
 
177
char *find_tag_end(char *p)
178
{
12248 bpr 179
  char *pp;
180
  pp=p; if(*pp=='<') pp++;
181
  for(; *pp && *pp!='>'; pp++) {
6884 bpr 182
    if(*pp=='<') {
12248 bpr 183
      pp=find_tag_end(pp)-1; continue;
10 reyssat 184
    }
6884 bpr 185
    if(*pp=='"') {
12248 bpr 186
      pp=strchr(pp+1,'"');
187
      if(pp==NULL) return p+strlen(p); else continue;
6884 bpr 188
    }
189
    if(*pp=='\'') {
12248 bpr 190
      pp=strchr(pp+1,'\'');
191
      if(pp==NULL) return p+strlen(p); else continue;
6884 bpr 192
    }
12248 bpr 193
  }
194
  if(*pp=='>') pp++;
195
  return pp;
10 reyssat 196
}
197
 
198
char *find_tag(char *p, char *tag)
199
{
12248 bpr 200
  char *pp;
201
  int len;
202
  len=strlen(tag);
203
  for(pp=strchr(p,'<'); pp!=NULL && *pp; pp=strchr(pp+1,'<')) {
6884 bpr 204
    if(strncasecmp(pp+1,tag,len)==0 && !isalnum(*(pp+1+len))) return pp;
12248 bpr 205
  }
206
  return p+strlen(p);
10 reyssat 207
}
208
 
6884 bpr 209
/*  remove all html tags */
10 reyssat 210
void detag(char *p)
211
{
12248 bpr 212
  char *pp, *p2;
213
  for(pp=strchr(p,'<'); pp!=NULL; pp=strchr(pp,'<')) {
6884 bpr 214
    p2=find_tag_end(pp);
215
    if(*p2==0) {*pp=0; return; }
216
    ovlstrcpy(pp,p2);
12248 bpr 217
  }
10 reyssat 218
}
219
 
6819 reyssat 220
/* add a space after comma to see end of words */
221
 
222
void comma(char *p)
223
{
12248 bpr 224
  char *pp;
225
  for(pp=strchr(p,','); pp; pp=strchr(pp+1,','))
226
    string_modify3(p,pp,pp+1,", ");
6819 reyssat 227
}
15482 bpr 228
/* replace / by , */
229
void backslash(char *p)
230
{
231
  char *pp;
232
  for(pp=strchr(p,'/'); pp; pp=strchr(pp+1,'/'))
233
    string_modify3(p,pp,pp+1,",");
234
}
15375 bpr 235
/* _getdef from lines.c except the error msg*/
10 reyssat 236
void _getdef(char buf[], char *name, char value[])
237
{
15375 bpr 238
  char *p1, *p2, *p3, *p4;
10 reyssat 239
 
15375 bpr 240
  if(*name==0) goto nothing;      /* this would create segfault. */
12248 bpr 241
  for(p1=strstr(buf,name); p1!=NULL; p1=strstr(p1+1,name)) {
6884 bpr 242
    p2=find_word_start(p1+strlen(name));
243
    if((p1>buf && !isspace(*(p1-1))) || *p2!='=') continue;
15375 bpr 244
    p3=p1; while(p3>buf && *(p3-1)!='\n') p3--;
245
    p3=find_word_start(p3);
246
    if(p3<p1 && *p3!='!') continue;
247
    if(p3<p1) {
248
      p3++; p4=find_word_end(p3);
249
      if(find_word_start(p4)!=p1) continue;
250
      if(p4-p3!=3 || (strncmp(p3,"set",3)!=0 &&
251
           strncmp(p3,"let",3)!=0 &&
252
           strncmp(p3,"def",3)!=0)) {
253
        if(p4-p3!=6 || strncmp(p3,"define",6)!=0) continue;
254
      }
255
    }
256
    p2++;p3=strchr(p2,'\n'); if(p3==NULL) p3=p2+strlen(p2);
257
    p2=find_word_start(p2);
258
    if(p2>p3) goto nothing;
259
    /*if(p3-p2>=MAX_LINELEN) user_error("cmd_output_too_long");*/
260
    memmove(value,p2,p3-p2); value[p3-p2]=0;
261
    strip_trailing_spaces(value); return;
12248 bpr 262
  }
15375 bpr 263
nothing:
15394 bpr 264
  value[0]=0;
10 reyssat 265
}
266
 
6884 bpr 267
/*  Get variable definition from a file.
268
 * Result stored in buffer value of length MAX_LINELEN.
269
 */
10 reyssat 270
void getdef(char *fname, char *name, char value[])
271
{
12248 bpr 272
  FILE *f;
273
  char *buf;
274
  int l;
6881 bpr 275
 
12248 bpr 276
  value[0]=0;
277
  f=fopen(fname,"r"); if(f==NULL) return;
278
  fseek(f,0,SEEK_END); l=ftell(f); fseek(f,0,SEEK_SET);
279
  buf=xmalloc(l+256); l=fread(buf,1,l,f);
280
  fclose(f);
281
  if(l<=0) return; else buf[l]=0;
282
  _getdef(buf,name,value);
283
  free(buf);
10 reyssat 284
}
285
 
15442 bpr 286
void init(void)
10 reyssat 287
{
12248 bpr 288
  char buf[MAX_LINELEN+1];
15442 bpr 289
  char *p1,*p2,*s;
290
  int i,l;
12248 bpr 291
  FILE *f;
6881 bpr 292
 
12248 bpr 293
  s=getenv("modind_outdir"); if(s!=NULL && *s!=0) outdir=s;
294
  s=getenv("modind_sheetdir"); if(s!=NULL && *s!=0) sheetdir=s;
295
  s=getenv("modind_sheetoutdir"); if(s!=NULL && *s!=0) sheetoutdir=s;
15442 bpr 296
  s=getenv("modind_glossaryoutdir"); if(s!=NULL && *s!=0) glossaryoutdir=s;
6884 bpr 297
/* take the langs declared in conffile */
12248 bpr 298
  getdef(conffile,"site_languages",buf);
15442 bpr 299
  langcnt=0;
12248 bpr 300
  for(p1=buf;*p1;p1++) if(!isalnum(*p1)) *p1=' ';
301
  for(p1=find_word_start(buf); *p1 && langcnt<MAX_LANGS; p1=find_word_start(p2)) {
6884 bpr 302
    p2=find_word_end(p1);
303
    if(p2!=p1+2 || !isalpha(*p1) || !isalpha(*(p1+1))) continue;
304
    memmove(lang[langcnt],p1,2); lang[langcnt++][2]=0;
12248 bpr 305
  }
306
  if(langcnt==0) {/*  default languages */
6884 bpr 307
    langcnt=DEFAULT_LANGCNT;
12248 bpr 308
  }
309
  for(i=0;i<langcnt;i++) {
310
    snprintf(buf,sizeof(buf),"%s/%s.%s",dicdir,ignoredic,lang[i]);
311
    f=fopen(buf,"r"); if(f==NULL) continue;
312
    l=fread(ignore[i],1,MAX_LINELEN,f);fclose(f);
313
    if(l<0 || l>=MAX_LINELEN) l=0;
314
    ignore[i][l]=0;
315
  }
15442 bpr 316
}
317
/*  Preparation of data */
318
void prep(void)
319
{
320
  char buf[MAX_LINELEN+1];
321
  char *p1,*p2,*s,*old;
322
  int i,l,thislang,t;
15444 bpr 323
  modcnt=0; old="";
15442 bpr 324
  snprintf(buf,sizeof(buf),"%s/addr",outdir);
325
  addrf=fopen(buf,"w");
326
  if(!addrf) { fprintf(stderr,"modind: error creating output files addr.\n"); exit(1);}
327
  snprintf(buf,sizeof(buf),"%s/serial",outdir);
328
  serialf=fopen(buf,"w");
329
  if(!serialf) { fprintf(stderr,"modind: error creating output files serial.\n"); exit(1);}
330
 
331
  s=getenv("mlist"); if(s==NULL) exit(1);
332
  l=strlen(s); if(l<0 || l>100*MAX_LINELEN) exit(1);
15444 bpr 333
  mlist=xmalloc(l+16); ovlstrcpy(mlist,s);
15442 bpr 334
 
12248 bpr 335
  for(t=0, p1=find_word_start(mlist); *p1 && modcnt<MAX_MODULES;
9090 bpr 336
        p1=find_word_start(p2), t++) {
12248 bpr 337
    p2=find_word_end(p1);
338
    l=p2-p1; if(*p2) *p2++=0;
339
    fprintf(addrf,"%d:%s\n",t,p1);
340
    fprintf(serialf,"%s:%d\n",p1,t);
341
    thislang=-1;
6564 bpr 342
/* language is taken from the address */
12248 bpr 343
    if(l>3 && p1[l-3]=='.') {
344
      for(i=0;i<langcnt;i++) if(strcasecmp(lang[i],p1+l-2)==0) break;
345
      if(i<langcnt) {p1[l-3]=0; thislang=i;}
346
      else {/*  unknown language, not referenced */
6884 bpr 347
        continue;
9090 bpr 348
      }
12248 bpr 349
    }
350
    if(modcnt>0 && strcmp(old,p1)==0 && thislang>=0) {
351
      if(mod[modcnt-1].langcnt<langcnt) {
6884 bpr 352
        mod[modcnt-1].langs[mod[modcnt-1].langcnt]=thislang;
353
        mod[modcnt-1].counts[mod[modcnt-1].langcnt]=t;
354
        (mod[modcnt-1].langcnt)++;
12248 bpr 355
      }
6884 bpr 356
    }
12248 bpr 357
    else {
358
      mod[modcnt].name=old=p1;
359
      if(thislang>=0) {
360
        mod[modcnt].langs[0]=thislang;
361
        mod[modcnt].langcnt=1;
362
      }
363
      else mod[modcnt].langcnt=0;
364
      mod[modcnt].counts[0]=t;
365
      modcnt++;
6884 bpr 366
    }
12248 bpr 367
  }
368
  snprintf(buf,sizeof(buf),"%s/language",outdir);
369
  langf=fopen(buf,"w");
370
  snprintf(buf,sizeof(buf),"%s/title",outdir);
371
  titf=fopen(buf,"w");
16987 bpr 372
  snprintf(buf,sizeof(buf),"%s/title_ca",outdir);
373
  titf_ca=fopen(buf,"w");
374
  snprintf(buf,sizeof(buf),"%s/title_en",outdir);
375
  titf_en=fopen(buf,"w");
376
  snprintf(buf,sizeof(buf),"%s/title_es",outdir);
377
  titf_es=fopen(buf,"w");
378
  snprintf(buf,sizeof(buf),"%s/title_fr",outdir);
379
  titf_fr=fopen(buf,"w");
380
  snprintf(buf,sizeof(buf),"%s/title_it",outdir);
381
  titf_it=fopen(buf,"w");
382
  snprintf(buf,sizeof(buf),"%s/title_nl",outdir);
383
  titf_nl=fopen(buf,"w");
12248 bpr 384
  snprintf(buf,sizeof(buf),"%s/description",outdir);
385
  descf=fopen(buf,"w");
386
  snprintf(buf,sizeof(buf),"%s/author",outdir);
387
  authorf=fopen(buf,"w");
388
  snprintf(buf,sizeof(buf),"%s/version",outdir);
389
  versionf=fopen(buf,"w");
390
  snprintf(buf,sizeof(buf),"%s/%s/robot.phtml",outdir,mlistbase);
391
  robotf=fopen(buf,"w");
392
  fclose(addrf); fclose(serialf);
16987 bpr 393
  if(!robotf || !versionf || !authorf || !descf || !titf
394
    || !titf_ca || !titf_en || !titf_es || !titf_fr || !titf_it || !titf_nl
395
    || !langf) {
12248 bpr 396
    fprintf(stderr,"modind: error creating output files.\n");
397
    exit(1);
398
  }
10 reyssat 399
}
400
 
401
void sprep(void)
402
{
15440 bpr 403
  char buf[MAX_LINELEN+1];
12248 bpr 404
  char *p1,*p2,*s;
15440 bpr 405
  int i,l,t,thislang;
6881 bpr 406
 
12248 bpr 407
  modcnt=0;
15440 bpr 408
  snprintf(buf,sizeof(buf),"%s/addr",sheetoutdir);
409
  addrf=fopen(buf,"w");
410
  if(!addrf) { fprintf(stderr,"modind: error creating output files addr.\n"); exit(1);}
411
  snprintf(buf,sizeof(buf),"%s/serial",sheetoutdir);
412
  serialf=fopen(buf,"w");
12248 bpr 413
  s=getenv("slist"); if(s==NULL) return;
414
  l=strlen(s); if(l<0 || l>100*MAX_LINELEN) return;
415
  mlist=xmalloc(l+16); ovlstrcpy(mlist,s);
15440 bpr 416
  for(t=0,p1=find_word_start(mlist); *p1 && modcnt<MAX_MODULES; p1=find_word_start(p2),t++) {
6884 bpr 417
    p2=find_word_end(p1);
418
    l=p2-p1; if(*p2) *p2++=0;
419
    for(i=0;i<langcnt;i++) if(strncasecmp(lang[i],p1,2)==0) break;
420
    if(i<langcnt) thislang=i; else continue;
15440 bpr 421
    ovlstrcpy(revmod[t].name,p1);
422
    revmod[t].lang=thislang;
6884 bpr 423
    mod[modcnt].name=p1;
424
    mod[modcnt].langs[0]=thislang;
425
    mod[modcnt].langcnt=1;
15440 bpr 426
    revmod[t].imod=modcnt;
427
    fprintf(addrf,"%d:%s\n",modcnt,p1);
428
    fprintf(serialf,"%s:%d\n",p1,modcnt);
15539 bpr 429
    modcnt++;
12248 bpr 430
  }
15440 bpr 431
 fclose(addrf); fclose(serialf);
10 reyssat 432
}
433
 
15375 bpr 434
void gprep(void)
435
{
15440 bpr 436
  char buf[MAX_LINELEN+1];
437
  char *p1,*p2,*s,*old;
438
  int l,i,t,thislang;
15444 bpr 439
  modcnt=0; old="";
15440 bpr 440
  snprintf(buf,sizeof(buf),"%s/addr",glossaryoutdir);
441
  addrf=fopen(buf,"w");
442
  if(!addrf) { fprintf(stderr,"modind: error creating output files addr.\n"); exit(1);}
443
  snprintf(buf,sizeof(buf),"%s/serial",glossaryoutdir);
444
  serialf=fopen(buf,"w");
15375 bpr 445
  s=getenv("glist"); if(s==NULL) return;
446
  l=strlen(s); if(l<0 || l>100*MAX_LINELEN) return;
447
  mlist=xmalloc(l+16); ovlstrcpy(mlist,s);
15440 bpr 448
  for(t=0,p1=find_word_start(mlist); *p1 && modcnt<MAX_MODULES; p1=find_word_start(p2),t++) {
15375 bpr 449
    p2=find_word_end(p1);
450
    if(*p2) *p2++=0;
15440 bpr 451
    fprintf(addrf,"%d:%s\n",t,p1);
452
    fprintf(serialf,"%s:%d\n",p1,t);
453
    ovlstrcpy(revmod[t].name,p1);
15482 bpr 454
    ovlstrcpy(revmod[t].keywords,p1);
15375 bpr 455
    s=strchr(p1,'/');
456
    if(s != NULL) s=strchr(s+1,'/');
457
    if(s==NULL) {
458
      fprintf(stderr,"modind: no language %s\n",p1); exit(1);
459
    }
15482 bpr 460
    revmod[t].keywords[s-p1]=0;
15375 bpr 461
    s++;
462
    for(i=0;i<langcnt;i++) if(strncasecmp(lang[i],s,2)==0) break;
15440 bpr 463
    thislang = i<langcnt ? i : -1;
464
    revmod[t].lang=i;
465
    s[0]=s[1]='x';
466
    if(modcnt>0 && strcmp(old,p1)==0 && thislang >= 0) {
467
      if(mod[modcnt-1].langcnt<langcnt) {
468
        mod[modcnt-1].langs[mod[modcnt-1].langcnt]=thislang;
469
        mod[modcnt-1].counts[mod[modcnt-1].langcnt]=t;
470
        (mod[modcnt-1].langcnt)++;
471
      }
472
      revmod[t].imod=modcnt-1;
473
    }
474
    else {
475
      mod[modcnt].name=old=p1;
476
      if(thislang>=0) {
477
        mod[modcnt].langs[0]=thislang;
478
        mod[modcnt].langcnt=1;
479
      }
480
      else mod[modcnt].langcnt=0;
481
      mod[modcnt].counts[0]=t;
482
      revmod[t].imod=modcnt;
483
      modcnt++;
484
    }
15375 bpr 485
  }
15440 bpr 486
  fclose(addrf); fclose(serialf);
15375 bpr 487
}
488
 
6884 bpr 489
/*  read and treat module's INDEX file */
10 reyssat 490
int module_index(const char *name)
491
{
12248 bpr 492
  char *p, fbuf[MAX_LINELEN+1], ibuf[MAX_LINELEN+1];
493
  FILE *indf;
494
  int i,l;
10 reyssat 495
 
12248 bpr 496
  snprintf(fbuf,sizeof(fbuf),"%s/%s/INDEX",moduledir,name);
497
  indf=fopen(fbuf,"r");
498
  if(indf==NULL) {
499
    fprintf(stderr,"modind: INDEX of %s not found\n",fbuf); return -1;
500
  }
501
  l=fread(ibuf,1,MAX_LINELEN,indf); fclose(indf);
502
  if(l>0 && l<MAX_LINELEN) ibuf[l]=0; else return -1;
6884 bpr 503
/* treate all fields in *modindex */
12248 bpr 504
  for(i=0;i<MODINDEX_NO;i++) {
505
    _getdef(ibuf,modindex[i],indbuf[i]);
6884 bpr 506
/*  compatibility precaution */
12248 bpr 507
    if(indbuf[i][0]==':') indbuf[i][0]='.';
508
  }
509
  p=find_word_start(indbuf[i_language]);
510
  if(isalpha(*p) && isalpha(*(p+1))) {
511
    memmove(module_language,p,2); module_language[2]=0;
512
  }
513
  else ovlstrcpy(module_language,"en");
514
  return 0;
10 reyssat 515
}
516
 
517
int sheet_index(int serial)
518
{
12248 bpr 519
  char *p1, *p2, fbuf[MAX_LINELEN+1], ibuf[MAX_LINELEN+1];
520
  FILE *indf;
521
  int i,l;
10 reyssat 522
 
12248 bpr 523
  snprintf(fbuf,sizeof(fbuf),"%s/%s.def",sheetdir,mod[serial].name);
524
  indf=fopen(fbuf,"r"); if(indf==NULL) return -1;
525
  l=fread(ibuf,1,MAX_LINELEN,indf); fclose(indf);
526
  if(l>0 && l<MAX_LINELEN) ibuf[l]=0; else return -1;
15375 bpr 527
  for(i=0;i<SHEETINDEX_NO;i++) gsindbuf[i][0]=0;
12248 bpr 528
  for(i=0,p1=find_word_start(ibuf);
9090 bpr 529
      i<SHEETINDEX_NO-1 && *p1!=':' && *p1!=0;
530
      i++,p1=p2) {
12248 bpr 531
    p2=strchr(p1,'\n');
532
    if(p2!=NULL) *p2++=0; else p2=p1+strlen(p1);
8100 bpr 533
    p1=find_word_start(p1); strip_trailing_spaces2(p1);
15375 bpr 534
    snprintf(gsindbuf[i],MAX_LINELEN,"%s",p1);
12248 bpr 535
  }
536
  p2=strstr(p1,"\n:"); if(p2==NULL) p2=p1+strlen(p1);
537
  else *p2=0;
538
  p1=find_word_start(p1); strip_trailing_spaces2(p1);
539
  for(p2=p1;*p2;p2++) if(*p2=='\n') *p2=' ';
15375 bpr 540
  ovlstrcpy(gsindbuf[s_information],p1);
15440 bpr 541
  ovlstrcpy(gsindbuf[SHEETINDEX_NO],revmod[serial].name);
12248 bpr 542
  return 0;
10 reyssat 543
}
544
 
15375 bpr 545
int glossary_index(int serial)
546
{
15440 bpr 547
  char nbuf[MAX_LINELEN+1],fbuf[MAX_LINELEN+1], ibuf[MAX_LINELEN+1],*p,*s;
15375 bpr 548
  FILE *indf;
549
  int i,l;
15440 bpr 550
  s=lang[revmod[serial].lang];
551
  p=strchr(nbuf,'/');
552
  if(p != NULL) p=strchr(p+1,'/');
553
  if(p != NULL) {p[1]=s[0];p[2]=s[1];}
554
  snprintf(fbuf,sizeof(fbuf),"%s/%s",glossarydir,revmod[serial].name);
15375 bpr 555
  indf=fopen(fbuf,"r");
556
  l=fread(ibuf,1,MAX_LINELEN,indf); fclose(indf);
557
  if(l>0 && l<MAX_LINELEN) ibuf[l]=0; else return -1;
558
  for(i=0;i<SHEETINDEX_NO;i++) {
559
    _getdef(ibuf,glindex[i],gsindbuf[i]);
560
  }
15482 bpr 561
  s=gsindbuf[s_keywords]+strlen(gsindbuf[s_keywords]);
562
  *s++ = ',';
563
  ovlstrcpy(s,revmod[serial].keywords);
564
  backslash(revmod[serial].keywords);
15440 bpr 565
  ovlstrcpy(gsindbuf[SHEETINDEX_NO],nbuf);
15375 bpr 566
  return 0;
567
}
568
 
10 reyssat 569
unsigned char categories[16];
570
char taken[MAX_LINELEN+1];
571
int catcnt, takenlen, tweight;
572
 
15394 bpr 573
/* file management for appenditem */
574
#define MAX_FILES (MAX_LANGS*catno)
575
 
576
char *fnames[MAX_FILES];
577
FILE *files[MAX_FILES];
578
int open_files;
579
 
580
FILE * file_from_list(char *name){
581
  int i, l = 0, r = open_files;
582
  while (r>l){
583
    int m = (l+r)/2;
584
    int cmp = strcmp(name,fnames[m]);
585
    if (!cmp) return files[m];
586
    if (cmp < 0) r = m; else l = m+1;
587
  }
588
  for (i=open_files; i > l; i--) {files[i]=files[i-1]; fnames[i]=fnames[i-1];}
589
  fnames[l] = xmalloc(MAX_FNAME);
590
  ovlstrcpy(fnames[l],name);
591
  open_files++;
592
  return files[l]=fopen(name,"a");
593
}
594
 
10 reyssat 595
void appenditem(char *word, int lind, int serial, int weight, char *l)
596
{
12248 bpr 597
  char nbuf[MAX_LINELEN+1], buf[MAX_LINELEN+1];
598
  int i, ll;
599
  char *p;
600
  FILE *f;
6881 bpr 601
 
12248 bpr 602
  if(!isalnum(*word) || (ll=strlen(word))<2 ||
603
     wordchr2(taken,word)!=NULL ||
604
     wordchr2(ignore[lind],word)!=NULL ||
605
     takenlen>=MAX_LINELEN-ll-16)
606
    return;
607
  if(ll==2 && (!isdigit(word[0]) || !isalpha(word[1]))) return;
608
  for(p=word;*p;p++) if(!isalnum(*p) && *p!=' ') return;
609
  taken[takenlen++]=' '; taken[takenlen++]=' ';
610
  ovlstrcpy(taken+takenlen,word);
611
  takenlen+=ll; tweight+=weight;
612
  snprintf(buf,sizeof(buf),"%s:%d?%d\n",word,serial,weight);
613
  for(i=0;i<catcnt;i++) {
6884 bpr 614
    snprintf(nbuf,sizeof(nbuf),"%s/%c.%s",
12248 bpr 615
       outdir,categories[i],lang[lind]);
15394 bpr 616
    f = file_from_list(nbuf);
617
    if(f!=NULL) {fputs(buf,f);}
12248 bpr 618
  }
10 reyssat 619
}
620
 
6881 bpr 621
void appenditem1 (char *buf, int lind, int serial, int weight, char *l )
622
{
623
  char *p1, *p2 ;
624
  for(p1=find_word_start(buf); *p1;
6884 bpr 625
    p1=find_word_start(p2)) {
626
    p2=strchr(p1,',');
627
    if(p2!=NULL) *p2++=0; else p2=p1+strlen(p1);
628
    if(strlen(p1)<=0) continue;
629
    appenditem(p1,lind,serial,weight,module_language);
6881 bpr 630
  }
631
}
632
void appenditem2 (char *buf, int lind, int serial, int weight, char *l )
633
{
634
  char *p1, *p2 ;
635
  for(p1=find_word_start(buf);*p1;
12248 bpr 636
      p1=find_word_start(p2)) {
6884 bpr 637
    p2=find_word_end(p1); if(*p2) *p2++=0;
638
    appenditem(p1,lind,serial,weight,module_language);
6881 bpr 639
  }
640
}
10 reyssat 641
void onemodule(const char *name, int serial, int lind)
642
{
12248 bpr 643
  int i;
644
  unsigned char trlist[]={
645
  i_title,i_description,i_category,i_domain,i_keywords,
646
  i_require,i_author,
647
  i_keywords_ca,i_keywords_en,i_keywords_fr,i_keywords_it,i_keywords_nl,
648
  i_title_ca,i_title_en,i_title_fr,i_title_it,i_title_nl
649
  };
15375 bpr 650
  int trcnt=sizeof(trlist)/sizeof(trlist[0]);
15777 georgesk 651
  char *p1, *p2, *pp, *q, buf[15*MAX_LINELEN+15], lbuf[16];
12248 bpr 652
  FILE *f;
6881 bpr 653
 
12248 bpr 654
  if(module_index(name)) return;
655
  towords(indbuf[i_category]);
7915 bpr 656
/*   list the categories (among A=all,X=eXercise,O,D,...) corresponding
6884 bpr 657
 *   to this module
658
 */
12248 bpr 659
  for(i=catcnt=0;i<catno && catcnt<16;i++) {
15380 bpr 660
    if(wordchr2(indbuf[i_category],cat[i].name)!=NULL)
661
      categories[catcnt++]=cat[i].typ;
12248 bpr 662
  }
663
  if(catcnt==0) return;
664
  if(categories[0]!=cat[0].typ)
665
    categories[catcnt++]=cat[0].typ;
6884 bpr 666
/*  write module's name in the category.language files, for instance lists/X.fr
667
 * for french exercises
668
 */
12248 bpr 669
  for(i=0;i<catcnt;i++) {
670
    snprintf(buf,sizeof(buf),"%s/%s/%c.%s",
671
       outdir,mlistbase,categories[i],lang[lind]);
672
    f=fopen(buf,"a");
673
    if(f!=NULL) {fprintf(f,"%s\n",name); fclose(f);}
674
  }
6884 bpr 675
/*   add serial number and language (resp.title, ...) to corresponding file  */
12248 bpr 676
  fprintf(langf,"%d:%s\n",serial,module_language);
677
  fprintf(titf,"%d:%s\n",serial,indbuf[i_title]);
16987 bpr 678
  if(indbuf[i_title_ca][0]!=0)
679
    fprintf(titf_ca,"%d:%s\n",serial,indbuf[i_title_ca]);
680
  else
681
    fprintf(titf_ca,"%d:%s\n",serial,indbuf[i_title]);
682
  if(indbuf[i_title_en][0]!=0)
683
    fprintf(titf_en,"%d:%s\n",serial,indbuf[i_title_en]);
684
  else
685
    fprintf(titf_en,"%d:%s\n",serial,indbuf[i_title]);
686
  if(indbuf[i_title_es][0]!=0)
687
    fprintf(titf_es,"%d:%s\n",serial,indbuf[i_title_es]);
688
  else
689
    fprintf(titf_es,"%d:%s\n",serial,indbuf[i_title]);
690
  if(indbuf[i_title_fr][0]!=0)
691
    fprintf(titf_fr,"%d:%s\n",serial,indbuf[i_title_fr]);
692
  else
693
    fprintf(titf_fr,"%d:%s\n",serial,indbuf[i_title]);
694
  if(indbuf[i_title_it][0]!=0)
695
    fprintf(titf_it,"%d:%s\n",serial,indbuf[i_title_it]);
696
  else
697
    fprintf(titf_it,"%d:%s\n",serial,indbuf[i_title]);
698
  if(indbuf[i_title_nl][0]!=0)
699
    fprintf(titf_nl,"%d:%s\n",serial,indbuf[i_title_nl]);
700
  else
701
    fprintf(titf_nl,"%d:%s\n",serial,indbuf[i_title]);
702
 
12248 bpr 703
  fprintf(descf,"%d:%s\n",serial,indbuf[i_description]);
704
  fprintf(authorf,"%d:%s\n",serial,indbuf[i_author]);
705
  fprintf(versionf,"%d:%s\n",serial,indbuf[i_version]);
6881 bpr 706
 
6884 bpr 707
/*   add module's information in html page for robots  */
12248 bpr 708
  snprintf(buf,sizeof(buf),"%s",indbuf[i_description]);
709
  for(pp=strchr(buf,','); pp; pp=strchr(pp,','))
710
    string_modify3(buf,pp,pp+1,"&#44;");
711
  if(strcmp(module_language,lang[lind])==0)
712
    fprintf(robotf,"%s ,%s,%s,%s,%s\n",name,module_language,name,
713
        indbuf[i_title], buf);
6819 reyssat 714
 
6884 bpr 715
/*   Normalize the information of trlist, using dictionary
7915 bpr 716
 *  -- bases/sys/domain.xx without suffix translation (--> english version)
6884 bpr 717
 */
15380 bpr 718
  entrycount=dentrycount; dicbuf=ddicbuf;
719
  memmove(entry,dentry,dentrycount*sizeof(entry[0]));
720
  unknown_type=unk_leave;
721
  for(i=0;i<trcnt;i++) {
722
    detag(indbuf[trlist[i]]);
723
    deaccent2(indbuf[trlist[i]]);
724
    comma(indbuf[trlist[i]]);
725
    singlespace2(indbuf[trlist[i]]);
726
    translate(indbuf[trlist[i]]);
727
  }
6884 bpr 728
/*   Normalize the information, using dictionary
7915 bpr 729
 *   bases/sys/words.xx with suffix translation
6884 bpr 730
 */
15380 bpr 731
  entrycount=mentrycount; dicbuf=mdicbuf;
732
  memmove(entry,mentry,mentrycount*sizeof(entry[0]));
733
  unknown_type=unk_leave;/*  used in translator_.c */
734
  for(i=0;i<trcnt;i++) {
735
  suffix_translate(indbuf[trlist[i]]);
736
  translate(indbuf[trlist[i]]);
737
  }
6881 bpr 738
 
739
/* taken contains all words already seen in the module index */
15380 bpr 740
  taken[0]=0; takenlen=tweight=0;
6881 bpr 741
/*  append words of title  */
15380 bpr 742
  ovlstrcpy(buf,indbuf[i_title]); towords(buf);
743
  appenditem2(buf,lind,serial,4,module_language);
6881 bpr 744
 
6884 bpr 745
/*  extract words of every other information except level */
15380 bpr 746
  snprintf(buf,sizeof(buf),"%s %s %s %s %s %s %s %s %s %s %s %s %s %s %s",
747
    indbuf[i_description],indbuf[i_keywords],
748
    indbuf[i_keywords_ca],indbuf[i_keywords_en],indbuf[i_keywords_fr],
749
    indbuf[i_keywords_it],indbuf[i_keywords_nl],
750
    indbuf[i_title_ca],indbuf[i_title_en],indbuf[i_title_fr],
751
    indbuf[i_title_it],indbuf[i_title_nl],
752
    indbuf[i_domain],indbuf[i_require],indbuf[i_author]);
753
  towords(buf);
754
  appenditem2(buf,lind,serial,2,module_language);
6881 bpr 755
 
6884 bpr 756
/*   this time the dictionary is the group dictionary  sys/wgrp/wgrp
757
 *   with a g (groupdic), not an m (maindic) . see below main, suffix, group.
7915 bpr 758
 *   and delete unknown ?? and translate
6884 bpr 759
 */
12248 bpr 760
  entrycount=gentrycount; dicbuf=gdicbuf;
761
  memmove(entry,gentry,gentrycount*sizeof(entry[0]));
6881 bpr 762
 
15380 bpr 763
/* append words of every title information  */
12248 bpr 764
  ovlstrcpy(buf,indbuf[i_title]);
765
  unknown_type=unk_delete;
766
  translate(buf);
767
  appenditem1(buf,lind,serial,2,module_language);
6881 bpr 768
 
15380 bpr 769
/* append words of information of description except level  */
12248 bpr 770
  snprintf(buf,sizeof(buf),"%s", indbuf[i_description]);
771
  unknown_type=unk_delete;
772
  translate(buf);
773
  appenditem1(buf,lind,serial,4,module_language);
6881 bpr 774
 
15380 bpr 775
/* append words (or group of words) of keywords and domain  */
12248 bpr 776
  snprintf(buf,sizeof(buf),"%s, %s, %s, %s, %s, %s, %s",
15380 bpr 777
    indbuf[i_domain],indbuf[i_keywords],
778
    indbuf[i_keywords_ca], indbuf[i_keywords_en],indbuf[i_keywords_fr],
779
    indbuf[i_keywords_it], indbuf[i_keywords_nl]);
12248 bpr 780
  unknown_type=unk_leave;
781
  translate(buf);
782
  appenditem1(buf,lind,serial,2,module_language);
6881 bpr 783
 
15380 bpr 784
/* append level information, with weight 2 */
12248 bpr 785
  snprintf(buf,sizeof(buf),"%s",indbuf[i_level]);
786
  ovlstrcpy(lbuf,"level");
787
  for(p1=buf; *p1; p1++) if(!isalnum(*p1)) *p1=' ';
788
  q=buf+strlen(buf);
15380 bpr 789
  for(p1=find_word_start(buf); (*p1) && (p1 < q) ; p1=find_word_start(p2)) {
12248 bpr 790
    p2=find_word_end(p1);
791
    if(p2!=NULL) *p2++=0; else p2=p1+strlen(p1);
792
    if(strncmp(p1, "Lang" , p2-p1) &&
793
     (!isalpha(*p1) ||
794
     (!isdigit(*(p1+1)) && *(p1+1)!=0) ||
795
     (*(p1+1)!=0 && *(p1+2)!=0)))
796
       continue;
797
    *p1=tolower(*p1);
798
    ovlstrcpy(lbuf+strlen("level"),p1);
799
    appenditem(lbuf,lind,serial,2,module_language);
800
  }
6884 bpr 801
/*   append total weight of module to weight file site2/weight.xx  */
12248 bpr 802
  fprintf(weightf,"%d:%d\n",serial,tweight);
10 reyssat 803
}
804
 
805
void modules(void)
806
{
12248 bpr 807
  int i,j,k,d;
808
  char namebuf[MAX_LINELEN+1];
809
  char mdic[MAX_LINELEN+1], sdic[MAX_LINELEN+1], gdic[MAX_LINELEN+1], ddic[MAX_LINELEN+1];
10 reyssat 810
 
12248 bpr 811
  for(j=0;j<langcnt;j++) {
6884 bpr 812
    snprintf(namebuf,sizeof(namebuf),"%s/weight.%s",outdir,lang[j]);
813
    weightf=fopen(namebuf,"w");
814
    snprintf(mdic,sizeof(mdic),"%s/%s.%s",dicdir,maindic,lang[j]);
815
    snprintf(sdic,sizeof(sdic),"%s/%s.%s",dicdir,suffixdic,lang[j]);
816
    snprintf(gdic,sizeof(gdic),"%s/%s.%s",dicdir,groupdic,lang[j]);
817
    snprintf(ddic,sizeof(ddic),"%s/%s.%s",dicdir,domaindic,lang[j]);
818
    suffix_dic(sdic); prepare_dic(gdic);
819
    gdicbuf=dicbuf; gentrycount=entrycount;
820
    memmove(gentry,entry,gentrycount*sizeof(entry[0]));
821
    prepare_dic(mdic);
822
    mdicbuf=dicbuf; mentrycount=entrycount;
823
    memmove(mentry,entry,mentrycount*sizeof(entry[0]));
824
    prepare_dic(ddic);
825
    ddicbuf=dicbuf; dentrycount=entrycount;
826
    memmove(dentry,entry,dentrycount*sizeof(entry[0]));
827
    unknown_type=unk_leave; translate(ignore[j]);
828
    for(i=0;i<modcnt;i++) {
12248 bpr 829
      if(mod[i].langcnt>0) {
15336 bpr 830
      /* look for another language */
6884 bpr 831
        for(d=k=0;k<mod[i].langcnt;k++)
15336 bpr 832
          if(mod[i].langs[k]<mod[i].langs[d]) d=k;
6884 bpr 833
        for(k=0;k<mod[i].langcnt && mod[i].langs[k]!=j;k++);
834
        if(k>=mod[i].langcnt) k=d;
835
        snprintf(namebuf,MAX_LINELEN,"%s.%s",mod[i].name,
12248 bpr 836
           lang[mod[i].langs[k]]);
6884 bpr 837
        onemodule(namebuf,mod[i].counts[k],j);
12248 bpr 838
      }
839
      else {
6884 bpr 840
        onemodule(mod[i].name,mod[i].counts[0],j);
12248 bpr 841
      }
10 reyssat 842
    }
6884 bpr 843
    if(mentrycount>0) free(mdicbuf);
844
    if(gentrycount>0) free(gdicbuf);
845
    if(suffixcnt>0) free(sufbuf);
846
    if(dentrycount>0) free(ddicbuf);
847
    if(weightf) fclose(weightf);
12248 bpr 848
  }
10 reyssat 849
}
15394 bpr 850
void clean(void)
851
{
852
  int i;
853
  for (i = 0; i < open_files; i++) fclose(files[i]);
854
  fclose(langf); fclose(titf); fclose(descf); fclose(robotf);
855
  fclose(authorf); fclose(versionf);
16987 bpr 856
  fclose(titf_fr); fclose(titf_it);fclose(titf_es);fclose(titf_nl);
857
  fclose(titf_ca);fclose(titf_en);
15394 bpr 858
}
10 reyssat 859
 
6881 bpr 860
/* FIXME ? differences with appenditem - use fprintf instead of  snprintf */
10 reyssat 861
void sappenditem(char *word, int lind, int serial, int weight)
862
{
12248 bpr 863
  int ll;
864
  char *p;
6881 bpr 865
 
12248 bpr 866
  if(!isalnum(*word) || (ll=strlen(word))<2 ||
867
     wordchr2(taken,word)!=NULL ||
868
     wordchr2(ignore[lind],word)!=NULL ||
869
     takenlen>=MAX_LINELEN-ll-16)
870
    return;
871
  if(ll==2 && (!isdigit(word[0]) || !isalpha(word[1]))) return;
872
  for(p=word;*p;p++) if(!isalnum(*p) && *p!=' ') return;
873
  taken[takenlen++]=' ';taken[takenlen++]=' ';
874
  ovlstrcpy(taken+takenlen,word);
875
  takenlen+=ll; tweight+=weight;
876
  fprintf(indf,"%s:%d?%d\n",word,serial,weight);
10 reyssat 877
}
15380 bpr 878
/* onesg / onemodule are similar */
15375 bpr 879
void onesg(int serial, int lind, int index(int))
10 reyssat 880
{
12248 bpr 881
  int i;
882
  unsigned char trlist[]={
15375 bpr 883
    s_title,s_description,s_domain,s_keywords,s_information
12248 bpr 884
  };
15380 bpr 885
  int trcnt=sizeof(trlist)/sizeof(trlist[0]);
15778 georgesk 886
  char *p1, *p2, *q, buf[4*MAX_LINELEN+4], lbuf[16];
6881 bpr 887
 
15375 bpr 888
  if(index(serial)) return;
889
  fprintf(titf,"%d:%s\n",serial,gsindbuf[s_title]);
890
  fprintf(descf,"%d:%s\n",serial,gsindbuf[s_description]);
891
  fprintf(remf,"%d:%s\n",serial,gsindbuf[s_information]);
7915 bpr 892
 
15380 bpr 893
/*   Normalize the information of trlist, using dictionary
894
 *  -- bases/sys/domain.xx without suffix translation (--> english version)
895
 */
12248 bpr 896
  entrycount=dentrycount; dicbuf=ddicbuf;
897
  memmove(entry,dentry,dentrycount*sizeof(entry[0]));
898
  unknown_type=unk_leave;
15380 bpr 899
  for(i=0;i<trcnt;i++) {
15375 bpr 900
    detag(gsindbuf[trlist[i]]);
901
    deaccent2(gsindbuf[trlist[i]]);
902
    comma(gsindbuf[trlist[i]]);
903
    singlespace2(gsindbuf[trlist[i]]);
904
    translate(gsindbuf[trlist[i]]);
12248 bpr 905
  }
15380 bpr 906
/*   Normalize the information, using dictionary
907
 *   bases/sys/words.xx with suffix translation
908
 */
12248 bpr 909
  entrycount=mentrycount; dicbuf=mdicbuf;
910
  memmove(entry,mentry,mentrycount*sizeof(entry[0]));
15380 bpr 911
  unknown_type=unk_leave;/*  used in translator_.c */
912
  for(i=0;i<trcnt;i++) {
15375 bpr 913
    suffix_translate(gsindbuf[trlist[i]]);
914
    translate(gsindbuf[trlist[i]]);
12248 bpr 915
  }
15380 bpr 916
 
917
/* taken contains all words already seen in the module index */
12248 bpr 918
  taken[0]=0; takenlen=tweight=0;
15380 bpr 919
/*  append words of title  */
15375 bpr 920
  ovlstrcpy(buf,gsindbuf[s_title]); towords(buf);
12248 bpr 921
  for(p1=find_word_start(buf);*p1;
922
      p1=find_word_start(p2)) {
6884 bpr 923
    p2=find_word_end(p1); if(*p2) *p2++=0;
924
    sappenditem(p1,lind,serial,4);
12248 bpr 925
  }
15380 bpr 926
 
927
/*  extract words of every other information except level */
12248 bpr 928
  snprintf(buf,sizeof(buf),"%s %s %s %s",
15375 bpr 929
         gsindbuf[s_description],gsindbuf[s_keywords],
930
         gsindbuf[s_domain],gsindbuf[s_information]);
12248 bpr 931
  towords(buf);
15375 bpr 932
  for(p1=find_word_start(buf);*p1;p1=find_word_start(p2)) {
933
    p2=find_word_end(p1); if(*p2) *p2++=0;
934
    sappenditem(p1,lind,serial,2);
12248 bpr 935
  }
15380 bpr 936
/*   this time the dictionary is the group dictionary  sys/wgrp/wgrp
937
 *   with a g (groupdic), not an m (maindic) . see below main, suffix, group.
938
 *   and delete unknown ?? and translate
939
 */
12248 bpr 940
  entrycount=gentrycount; dicbuf=gdicbuf;
941
  memmove(entry,gentry,gentrycount*sizeof(entry[0]));
15380 bpr 942
 
943
/*  append words of every title information  */
944
  ovlstrcpy(buf,gsindbuf[s_title]);
12248 bpr 945
  unknown_type=unk_delete;
15380 bpr 946
  translate(buf);
15375 bpr 947
  for(p1=find_word_start(buf); *p1; p1=find_word_start(p2)) {
948
    p2=strchr(p1,',');
949
    if(p2!=NULL) *p2++=0; else p2=p1+strlen(p1);
950
    if(strlen(p1)<=0) continue;
951
    sappenditem(p1,lind,serial,4);
12248 bpr 952
  }
15380 bpr 953
 
954
/*  append words (or group of words) of keywords and domain  */
12248 bpr 955
  snprintf(buf,sizeof(buf),"%s, %s",
15375 bpr 956
       gsindbuf[s_keywords],
957
       gsindbuf[s_domain]);
15380 bpr 958
  unknown_type=unk_leave;
12248 bpr 959
  translate(buf);
15380 bpr 960
  for(p1=find_word_start(buf); *p1; p1=find_word_start(p2)) {
6884 bpr 961
    p2=strchr(p1,',');
962
    if(p2!=NULL) *p2++=0; else p2=p1+strlen(p1);
963
    if(strlen(p1)<=0) continue;
964
    sappenditem(p1,lind,serial,2);
12248 bpr 965
  }
15380 bpr 966
 
967
/*   append level information, with weight 2 */
968
  snprintf(buf,sizeof(buf),"%s",gsindbuf[s_level]);
969
  ovlstrcpy(lbuf,"level");
970
  for(p1=buf; *p1; p1++) if(!isalnum(*p1)) *p1=' ';
971
  q=buf+strlen(buf);
972
  for(p1=find_word_start(buf); (*p1) && (p1 < q) ;
973
  p1=find_word_start(p2)) {
974
    p2=find_word_end(p1);
975
    if(p2!=NULL) *p2++=0; else p2=p1+strlen(p1);
976
    if(strncmp(p1, "Lang" , p2-p1) &&
977
        (!isalpha(*p1) || (!isdigit(*(p1+1))) ||
978
        (*(p1+1)!=0 && *(p1+2)!=0)))
979
      continue;
980
    *p1=tolower(*p1);
981
    ovlstrcpy(lbuf+strlen("level"),p1);
982
    sappenditem(lbuf,lind,serial,2);
983
  }
984
/*   append total weight of module to weight file site2/weight.xx  */
12248 bpr 985
  fprintf(weightf,"%d:%d\n",serial,tweight);
10 reyssat 986
}
987
 
15375 bpr 988
void sgs(char *outdir, int index(int))
10 reyssat 989
{
15440 bpr 990
  int i,j,k,d;
12248 bpr 991
  char mdic[MAX_LINELEN+1], sdic[MAX_LINELEN+1], gdic[MAX_LINELEN+1], ddic[MAX_LINELEN+1];
992
  char buf[MAX_LINELEN+1];
7915 bpr 993
 
15442 bpr 994
  //snprintf(buf,sizeof(buf),"%s/list",outdir);
995
  //listf=fopen(buf,"w");
15440 bpr 996
  snprintf(buf,sizeof(buf),"%s/title",outdir);
997
  titf=fopen(buf,"w");
998
  snprintf(buf,sizeof(buf),"%s/description",outdir);
999
  descf=fopen(buf,"w");
1000
  snprintf(buf,sizeof(buf),"%s/information",outdir);
1001
  remf=fopen(buf,"w");
15442 bpr 1002
  if(!remf || !descf || !titf ) {
15440 bpr 1003
    fprintf(stderr,"modind: error creating output files for %s.\n",outdir); exit(1);
1004
  }
12248 bpr 1005
  for(j=0;j<langcnt;j++) {
15375 bpr 1006
    snprintf(buf,sizeof(buf),"%s/%s",outdir,lang[j]);
12248 bpr 1007
    indf=fopen(buf,"w");
15375 bpr 1008
    snprintf(buf,sizeof(buf),"%s/weight.%s",outdir,lang[j]);
12248 bpr 1009
    weightf=fopen(buf,"w");
15440 bpr 1010
    if(!weightf || !indf ) {
1011
      fprintf(stderr,"modind: error creating output files for %s.\n",outdir); exit(1);
9090 bpr 1012
    }
6884 bpr 1013
    snprintf(mdic,sizeof(mdic),"%s/%s.%s",dicdir,maindic,lang[j]);
1014
    snprintf(sdic,sizeof(sdic),"%s/%s.%s",dicdir,suffixdic,lang[j]);
1015
    snprintf(gdic,sizeof(gdic),"%s/%s.%s",dicdir,groupdic,lang[j]);
6961 bpr 1016
    snprintf(ddic,sizeof(ddic),"%s/%s.%s",dicdir,domaindic,lang[j]);
6884 bpr 1017
    suffix_dic(sdic); prepare_dic(gdic);
1018
    gdicbuf=dicbuf; gentrycount=entrycount;
1019
    memmove(gentry,entry,gentrycount*sizeof(entry[0]));
1020
    prepare_dic(mdic);
1021
    mdicbuf=dicbuf; mentrycount=entrycount;
1022
    memmove(mentry,entry,mentrycount*sizeof(entry[0]));
6973 bpr 1023
    prepare_dic(ddic);
1024
    ddicbuf=dicbuf; dentrycount=entrycount;
1025
    memmove(dentry,entry,dentrycount*sizeof(entry[0]));
6884 bpr 1026
    unknown_type=unk_leave; translate(ignore[j]);
15440 bpr 1027
    for(i=0;i<modcnt;i++)
1028
      if(mod[i].langcnt>0) {
1029
      /* look for another language */
1030
        for(d=k=0;k<mod[i].langcnt;k++)
1031
          if(mod[i].langs[k]<mod[i].langs[d]) d=k;
1032
        for(k=0;k<mod[i].langcnt && mod[i].langs[k]!=j;k++);
1033
        if(k>=mod[i].langcnt) k=d;
1034
        onesg(mod[i].counts[k],mod[i].langs[k],index);
1035
      }
6884 bpr 1036
    if(mentrycount>0) free(mdicbuf);
1037
    if(gentrycount>0) free(gdicbuf);
1038
    if(suffixcnt>0) free(sufbuf);
6961 bpr 1039
    if(dentrycount>0) free(ddicbuf);
15440 bpr 1040
    fclose(indf); fclose(weightf);
12248 bpr 1041
  }
15444 bpr 1042
  fclose(titf); fclose(descf); fclose(remf);
10 reyssat 1043
}
1044
 
1045
int main()
1046
{
12248 bpr 1047
  gentry=xmalloc(entry_size);
1048
  dentry=xmalloc(entry_size);
1049
  mentry=xmalloc(entry_size);
15442 bpr 1050
  init();
12248 bpr 1051
  prep();
1052
  if(modcnt>0) modules();
1053
  clean();
1054
  sprep();
15375 bpr 1055
  if(modcnt>0) sgs(sheetoutdir,sheet_index);
1056
  gprep();
1057
  if(modcnt>0) sgs(glossaryoutdir,glossary_index);
12248 bpr 1058
  return 0;
10 reyssat 1059
}