Rev 17024 | Details | Compare with Previous | Last modification | View Log | RSS feed
Rev | Author | Line No. | Line |
---|---|---|---|
10 | reyssat | 1 | /* Copyright (C) 1998-2003 XIAO, Gang of Universite de Nice - Sophia Antipolis |
2 | * |
||
3 | * This program is free software; you can redistribute it and/or modify |
||
4 | * it under the terms of the GNU General Public License as published by |
||
5 | * the Free Software Foundation; either version 2 of the License, or |
||
6 | * (at your option) any later version. |
||
7 | * |
||
8 | * This program is distributed in the hope that it will be useful, |
||
9 | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
||
10 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
||
11 | * GNU General Public License for more details. |
||
12 | * |
||
13 | * You should have received a copy of the GNU General Public License |
||
14 | * along with this program; if not, write to the Free Software |
||
15 | * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. |
||
16 | */ |
||
17 | |||
6884 | bpr | 18 | /* This is an internal program, |
7915 | bpr | 19 | * used to index modules for search engine. |
6884 | bpr | 20 | */ |
10 | reyssat | 21 | |
8100 | bpr | 22 | #include "../Lib/libwims.h" |
8123 | bpr | 23 | #include "translator_.h" |
24 | #include "suffix.h" |
||
10 | reyssat | 25 | |
6884 | bpr | 26 | #define MAX_LANGS MAX_LANGUAGES |
27 | #define MAX_MODULES 65536 |
||
28 | char *moduledir= "public_html/modules"; |
||
29 | char *sheetdir= "public_html/bases/sheet"; |
||
15375 | bpr | 30 | char *glossarydir= "public_html/scripts/data/glossary"; |
6884 | bpr | 31 | char *dicdir= "public_html/bases"; |
32 | char *outdir= "public_html/bases/site2"; |
||
9090 | bpr | 33 | char *sheetoutdir= "public_html/bases/sheet/index"; |
15375 | bpr | 34 | char *glossaryoutdir= "public_html/scripts/data/glossary/index"; |
6884 | bpr | 35 | char *maindic= "sys/words"; |
36 | char *groupdic= "sys/wgrp/wgrp"; |
||
37 | char *suffixdic= "sys/suffix"; |
||
38 | char *domaindic= "sys/domaindic"; |
||
39 | char *ignoredic= "sys/indignore"; |
||
40 | char *conffile= "log/wims.conf"; |
||
9092 | bpr | 41 | char *mlistbase= "lists"; |
10 | reyssat | 42 | |
43 | char lang[MAX_LANGS][4]={ |
||
1792 | bpr | 44 | "en","fr","cn","es","it","nl","si","ca","pt" |
10 | reyssat | 45 | }; |
6884 | bpr | 46 | #define DEFAULT_LANGCNT 6 |
10 | reyssat | 47 | char allang[MAX_LANGS][4]={ |
6564 | bpr | 48 | "en","fr","cn","es","it","nl","de","si","ca","pt" |
10 | reyssat | 49 | }; |
50 | #define allangcnt 8 |
||
51 | char ignore[MAX_LANGS][MAX_LINELEN+1]; |
||
52 | char mlistfile[MAX_LANGS][256]; |
||
53 | int langcnt; |
||
15444 | bpr | 54 | FILE *langf, *titf, *descf, *weightf, *robotf, *indf, *listf, *addrf, *serialf, *authorf, *versionf, *remf; |
16987 | bpr | 55 | FILE *titf_ca,*titf_en,*titf_es,*titf_fr,*titf_it,*titf_nl; |
10 | reyssat | 56 | struct cat { |
57 | char *name; |
||
58 | char typ; |
||
59 | } cat[]={ |
||
6884 | bpr | 60 | {"all_types", 'A'}, |
61 | {"exercise", 'X'}, |
||
62 | {"oef", 'O'}, |
||
63 | {"tool", 'T'}, |
||
64 | {"recreation",'R'}, |
||
65 | {"reference", 'Y'}, |
||
66 | {"document", 'D'}, |
||
67 | {"popup", 'P'}, |
||
68 | {"datamodule",'M'} |
||
10 | reyssat | 69 | }; |
70 | #define catno (sizeof(cat)/sizeof(cat[0])) |
||
71 | |||
72 | struct mod { |
||
73 | char *name; |
||
74 | unsigned char langs[MAX_LANGS]; |
||
75 | int counts[MAX_LANGS]; |
||
15440 | bpr | 76 | int langcnt; |
10 | reyssat | 77 | } mod[MAX_MODULES]; |
15440 | bpr | 78 | |
79 | // serial-> the name of the module indexed by serial, lang and its classe |
||
80 | struct revmod { |
||
81 | char name[MAX_MODULELEN+1]; |
||
82 | int lang; |
||
83 | int imod; |
||
15482 | bpr | 84 | char keywords[MAX_FNAME]; |
15440 | bpr | 85 | } revmod[MAX_MODULES]; |
10 | reyssat | 86 | int modcnt; |
87 | |||
88 | char *mlist; |
||
15442 | bpr | 89 | char *sheetindex[]={ |
90 | "title", "description", |
||
91 | "duration", "severity", |
||
92 | "level", "domain", |
||
93 | "keywords", "reserved1", "reserved2", "information" |
||
94 | }; |
||
95 | /* correspond to the order of sheetindex */ |
||
96 | char *glindex[]={ |
||
97 | "gl_title", "gl_description", |
||
98 | "", "", |
||
99 | "gl_level", "gl_domain", |
||
100 | "gl_keywords","","",""}; |
||
10 | reyssat | 101 | |
15442 | bpr | 102 | #define SHEETINDEX_NO (sizeof(sheetindex)/sizeof(sheetindex[0])) |
103 | char gsindbuf[SHEETINDEX_NO+1][MAX_LINELEN+1]; |
||
104 | |||
105 | /* do not modify the order, correspond to the order in the sheet file */ |
||
106 | enum{s_title, s_description, |
||
107 | s_duration, s_severity, |
||
108 | s_level, s_domain, |
||
109 | s_keywords, s_reserved1, s_reserved2, |
||
110 | s_information |
||
111 | }; |
||
112 | |||
113 | char *modindex[]={ |
||
114 | "title", "description", |
||
115 | "author", "address", "copyright", |
||
116 | "version", "wims_version", "language", |
||
117 | "category", "level", "domain", "keywords", |
||
118 | "keywords_ca", "keywords_en", "keywords_fr", "keywords_it", "keywords_nl", |
||
16987 | bpr | 119 | "title_ca", "title_en", "title_es", "title_fr", "title_it", "title_nl", |
15442 | bpr | 120 | "require" |
121 | }; |
||
122 | #define MODINDEX_NO (sizeof(modindex)/sizeof(modindex[0])) |
||
123 | char indbuf[MODINDEX_NO][MAX_LINELEN+1]; |
||
124 | enum{i_title, i_description, |
||
125 | i_author,i_address,i_copyright, |
||
126 | i_version,i_wims_version,i_language, |
||
127 | i_category,i_level,i_domain,i_keywords, |
||
128 | i_keywords_ca,i_keywords_en,i_keywords_fr,i_keywords_it,i_keywords_nl, |
||
16987 | bpr | 129 | i_title_ca,i_title_en,i_title_es,i_title_fr,i_title_it,i_title_nl, |
15442 | bpr | 130 | i_require |
131 | }; |
||
132 | |||
133 | char *module_special_file[]={ |
||
134 | "intro","help","about" |
||
135 | }; |
||
136 | #define MODSPEC_NO (sizeof(module_special_file)/sizeof(module_special_file[0])) |
||
137 | char module_language[4]; |
||
138 | |||
139 | char *mdicbuf, *gdicbuf, *ddicbuf, *gentry, *mentry, *dentry; |
||
140 | |||
141 | int gentrycount, mentrycount, dentrycount; |
||
142 | |||
143 | |||
6884 | bpr | 144 | /* fold known accented letters to unaccented, other strange characters to space |
7915 | bpr | 145 | * apostrophe is among the exceptions to be kept (important for multi-word expressions) |
6884 | bpr | 146 | */ |
8100 | bpr | 147 | void deaccent2(char *p) |
10 | reyssat | 148 | { |
12248 | bpr | 149 | char *sp; |
150 | char *v; |
||
151 | for(sp=p;*sp;sp++) { |
||
152 | if(*sp<0 && (v=strchr(acctab,*sp))!=NULL) |
||
153 | *sp=*(deatab+(v-acctab)); |
||
154 | if(!isalnum(*sp) && strchr(",.&$+*",*sp)==0) *sp=' '; |
||
155 | else *sp=tolower(*sp); |
||
156 | } |
||
10 | reyssat | 157 | } |
158 | |||
6884 | bpr | 159 | /* translate everything non-alphanumeric into space */ |
10 | reyssat | 160 | void towords(char *p) |
161 | { |
||
12248 | bpr | 162 | char *pp; |
163 | for(pp=p;*pp;pp++) if(!isalnum(*pp) && strchr("&$+*",*pp)==0) *pp=' '; |
||
10 | reyssat | 164 | } |
165 | |||
6884 | bpr | 166 | /* Find first occurrence of word */ |
8100 | bpr | 167 | char *wordchr2(char *p, char *w) |
10 | reyssat | 168 | { |
12248 | bpr | 169 | char *r; |
10 | reyssat | 170 | |
12248 | bpr | 171 | for(r=strstr(p,w);r!=NULL && |
6884 | bpr | 172 | ( (r>p && !isspace(*(r-1))) || (!isspace(*(r+strlen(w))) && *(r+strlen(w))!=0) ); |
12248 | bpr | 173 | r=strstr(r+1,w)); |
174 | return r; |
||
10 | reyssat | 175 | } |
176 | |||
177 | char *find_tag_end(char *p) |
||
178 | { |
||
12248 | bpr | 179 | char *pp; |
180 | pp=p; if(*pp=='<') pp++; |
||
181 | for(; *pp && *pp!='>'; pp++) { |
||
6884 | bpr | 182 | if(*pp=='<') { |
12248 | bpr | 183 | pp=find_tag_end(pp)-1; continue; |
10 | reyssat | 184 | } |
6884 | bpr | 185 | if(*pp=='"') { |
12248 | bpr | 186 | pp=strchr(pp+1,'"'); |
187 | if(pp==NULL) return p+strlen(p); else continue; |
||
6884 | bpr | 188 | } |
189 | if(*pp=='\'') { |
||
12248 | bpr | 190 | pp=strchr(pp+1,'\''); |
191 | if(pp==NULL) return p+strlen(p); else continue; |
||
6884 | bpr | 192 | } |
12248 | bpr | 193 | } |
194 | if(*pp=='>') pp++; |
||
195 | return pp; |
||
10 | reyssat | 196 | } |
197 | |||
198 | char *find_tag(char *p, char *tag) |
||
199 | { |
||
12248 | bpr | 200 | char *pp; |
201 | int len; |
||
202 | len=strlen(tag); |
||
203 | for(pp=strchr(p,'<'); pp!=NULL && *pp; pp=strchr(pp+1,'<')) { |
||
6884 | bpr | 204 | if(strncasecmp(pp+1,tag,len)==0 && !isalnum(*(pp+1+len))) return pp; |
12248 | bpr | 205 | } |
206 | return p+strlen(p); |
||
10 | reyssat | 207 | } |
208 | |||
6884 | bpr | 209 | /* remove all html tags */ |
10 | reyssat | 210 | void detag(char *p) |
211 | { |
||
12248 | bpr | 212 | char *pp, *p2; |
213 | for(pp=strchr(p,'<'); pp!=NULL; pp=strchr(pp,'<')) { |
||
6884 | bpr | 214 | p2=find_tag_end(pp); |
215 | if(*p2==0) {*pp=0; return; } |
||
216 | ovlstrcpy(pp,p2); |
||
12248 | bpr | 217 | } |
10 | reyssat | 218 | } |
219 | |||
6819 | reyssat | 220 | /* add a space after comma to see end of words */ |
221 | |||
222 | void comma(char *p) |
||
223 | { |
||
12248 | bpr | 224 | char *pp; |
225 | for(pp=strchr(p,','); pp; pp=strchr(pp+1,',')) |
||
226 | string_modify3(p,pp,pp+1,", "); |
||
6819 | reyssat | 227 | } |
15482 | bpr | 228 | /* replace / by , */ |
17038 | bpr | 229 | void slash2comma(char *p) |
15482 | bpr | 230 | { |
231 | char *pp; |
||
232 | for(pp=strchr(p,'/'); pp; pp=strchr(pp+1,'/')) |
||
233 | string_modify3(p,pp,pp+1,","); |
||
234 | } |
||
15375 | bpr | 235 | /* _getdef from lines.c except the error msg*/ |
10 | reyssat | 236 | void _getdef(char buf[], char *name, char value[]) |
237 | { |
||
15375 | bpr | 238 | char *p1, *p2, *p3, *p4; |
10 | reyssat | 239 | |
15375 | bpr | 240 | if(*name==0) goto nothing; /* this would create segfault. */ |
12248 | bpr | 241 | for(p1=strstr(buf,name); p1!=NULL; p1=strstr(p1+1,name)) { |
6884 | bpr | 242 | p2=find_word_start(p1+strlen(name)); |
243 | if((p1>buf && !isspace(*(p1-1))) || *p2!='=') continue; |
||
15375 | bpr | 244 | p3=p1; while(p3>buf && *(p3-1)!='\n') p3--; |
245 | p3=find_word_start(p3); |
||
246 | if(p3<p1 && *p3!='!') continue; |
||
247 | if(p3<p1) { |
||
248 | p3++; p4=find_word_end(p3); |
||
249 | if(find_word_start(p4)!=p1) continue; |
||
250 | if(p4-p3!=3 || (strncmp(p3,"set",3)!=0 && |
||
251 | strncmp(p3,"let",3)!=0 && |
||
252 | strncmp(p3,"def",3)!=0)) { |
||
253 | if(p4-p3!=6 || strncmp(p3,"define",6)!=0) continue; |
||
254 | } |
||
255 | } |
||
256 | p2++;p3=strchr(p2,'\n'); if(p3==NULL) p3=p2+strlen(p2); |
||
257 | p2=find_word_start(p2); |
||
258 | if(p2>p3) goto nothing; |
||
259 | /*if(p3-p2>=MAX_LINELEN) user_error("cmd_output_too_long");*/ |
||
260 | memmove(value,p2,p3-p2); value[p3-p2]=0; |
||
261 | strip_trailing_spaces(value); return; |
||
12248 | bpr | 262 | } |
15375 | bpr | 263 | nothing: |
15394 | bpr | 264 | value[0]=0; |
10 | reyssat | 265 | } |
266 | |||
6884 | bpr | 267 | /* Get variable definition from a file. |
268 | * Result stored in buffer value of length MAX_LINELEN. |
||
269 | */ |
||
10 | reyssat | 270 | void getdef(char *fname, char *name, char value[]) |
271 | { |
||
12248 | bpr | 272 | FILE *f; |
273 | char *buf; |
||
274 | int l; |
||
6881 | bpr | 275 | |
12248 | bpr | 276 | value[0]=0; |
277 | f=fopen(fname,"r"); if(f==NULL) return; |
||
278 | fseek(f,0,SEEK_END); l=ftell(f); fseek(f,0,SEEK_SET); |
||
279 | buf=xmalloc(l+256); l=fread(buf,1,l,f); |
||
280 | fclose(f); |
||
281 | if(l<=0) return; else buf[l]=0; |
||
282 | _getdef(buf,name,value); |
||
283 | free(buf); |
||
10 | reyssat | 284 | } |
285 | |||
15442 | bpr | 286 | void init(void) |
10 | reyssat | 287 | { |
12248 | bpr | 288 | char buf[MAX_LINELEN+1]; |
15442 | bpr | 289 | char *p1,*p2,*s; |
290 | int i,l; |
||
12248 | bpr | 291 | FILE *f; |
6881 | bpr | 292 | |
12248 | bpr | 293 | s=getenv("modind_outdir"); if(s!=NULL && *s!=0) outdir=s; |
294 | s=getenv("modind_sheetdir"); if(s!=NULL && *s!=0) sheetdir=s; |
||
295 | s=getenv("modind_sheetoutdir"); if(s!=NULL && *s!=0) sheetoutdir=s; |
||
15442 | bpr | 296 | s=getenv("modind_glossaryoutdir"); if(s!=NULL && *s!=0) glossaryoutdir=s; |
6884 | bpr | 297 | /* take the langs declared in conffile */ |
12248 | bpr | 298 | getdef(conffile,"site_languages",buf); |
15442 | bpr | 299 | langcnt=0; |
12248 | bpr | 300 | for(p1=buf;*p1;p1++) if(!isalnum(*p1)) *p1=' '; |
301 | for(p1=find_word_start(buf); *p1 && langcnt<MAX_LANGS; p1=find_word_start(p2)) { |
||
6884 | bpr | 302 | p2=find_word_end(p1); |
303 | if(p2!=p1+2 || !isalpha(*p1) || !isalpha(*(p1+1))) continue; |
||
304 | memmove(lang[langcnt],p1,2); lang[langcnt++][2]=0; |
||
12248 | bpr | 305 | } |
306 | if(langcnt==0) {/* default languages */ |
||
6884 | bpr | 307 | langcnt=DEFAULT_LANGCNT; |
12248 | bpr | 308 | } |
309 | for(i=0;i<langcnt;i++) { |
||
310 | snprintf(buf,sizeof(buf),"%s/%s.%s",dicdir,ignoredic,lang[i]); |
||
311 | f=fopen(buf,"r"); if(f==NULL) continue; |
||
312 | l=fread(ignore[i],1,MAX_LINELEN,f);fclose(f); |
||
313 | if(l<0 || l>=MAX_LINELEN) l=0; |
||
314 | ignore[i][l]=0; |
||
315 | } |
||
15442 | bpr | 316 | } |
317 | /* Preparation of data */ |
||
318 | void prep(void) |
||
319 | { |
||
320 | char buf[MAX_LINELEN+1]; |
||
321 | char *p1,*p2,*s,*old; |
||
322 | int i,l,thislang,t; |
||
15444 | bpr | 323 | modcnt=0; old=""; |
15442 | bpr | 324 | snprintf(buf,sizeof(buf),"%s/addr",outdir); |
325 | addrf=fopen(buf,"w"); |
||
326 | if(!addrf) { fprintf(stderr,"modind: error creating output files addr.\n"); exit(1);} |
||
327 | snprintf(buf,sizeof(buf),"%s/serial",outdir); |
||
328 | serialf=fopen(buf,"w"); |
||
329 | if(!serialf) { fprintf(stderr,"modind: error creating output files serial.\n"); exit(1);} |
||
330 | |||
331 | s=getenv("mlist"); if(s==NULL) exit(1); |
||
332 | l=strlen(s); if(l<0 || l>100*MAX_LINELEN) exit(1); |
||
15444 | bpr | 333 | mlist=xmalloc(l+16); ovlstrcpy(mlist,s); |
15442 | bpr | 334 | |
12248 | bpr | 335 | for(t=0, p1=find_word_start(mlist); *p1 && modcnt<MAX_MODULES; |
9090 | bpr | 336 | p1=find_word_start(p2), t++) { |
12248 | bpr | 337 | p2=find_word_end(p1); |
338 | l=p2-p1; if(*p2) *p2++=0; |
||
339 | fprintf(addrf,"%d:%s\n",t,p1); |
||
340 | fprintf(serialf,"%s:%d\n",p1,t); |
||
341 | thislang=-1; |
||
6564 | bpr | 342 | /* language is taken from the address */ |
12248 | bpr | 343 | if(l>3 && p1[l-3]=='.') { |
344 | for(i=0;i<langcnt;i++) if(strcasecmp(lang[i],p1+l-2)==0) break; |
||
345 | if(i<langcnt) {p1[l-3]=0; thislang=i;} |
||
346 | else {/* unknown language, not referenced */ |
||
6884 | bpr | 347 | continue; |
9090 | bpr | 348 | } |
12248 | bpr | 349 | } |
350 | if(modcnt>0 && strcmp(old,p1)==0 && thislang>=0) { |
||
351 | if(mod[modcnt-1].langcnt<langcnt) { |
||
6884 | bpr | 352 | mod[modcnt-1].langs[mod[modcnt-1].langcnt]=thislang; |
353 | mod[modcnt-1].counts[mod[modcnt-1].langcnt]=t; |
||
354 | (mod[modcnt-1].langcnt)++; |
||
12248 | bpr | 355 | } |
6884 | bpr | 356 | } |
12248 | bpr | 357 | else { |
358 | mod[modcnt].name=old=p1; |
||
359 | if(thislang>=0) { |
||
360 | mod[modcnt].langs[0]=thislang; |
||
361 | mod[modcnt].langcnt=1; |
||
362 | } |
||
17038 | bpr | 363 | else |
364 | mod[modcnt].langcnt=0; |
||
12248 | bpr | 365 | mod[modcnt].counts[0]=t; |
366 | modcnt++; |
||
6884 | bpr | 367 | } |
12248 | bpr | 368 | } |
369 | snprintf(buf,sizeof(buf),"%s/language",outdir); |
||
370 | langf=fopen(buf,"w"); |
||
371 | snprintf(buf,sizeof(buf),"%s/title",outdir); |
||
372 | titf=fopen(buf,"w"); |
||
16987 | bpr | 373 | snprintf(buf,sizeof(buf),"%s/title_ca",outdir); |
374 | titf_ca=fopen(buf,"w"); |
||
375 | snprintf(buf,sizeof(buf),"%s/title_en",outdir); |
||
376 | titf_en=fopen(buf,"w"); |
||
377 | snprintf(buf,sizeof(buf),"%s/title_es",outdir); |
||
378 | titf_es=fopen(buf,"w"); |
||
379 | snprintf(buf,sizeof(buf),"%s/title_fr",outdir); |
||
380 | titf_fr=fopen(buf,"w"); |
||
381 | snprintf(buf,sizeof(buf),"%s/title_it",outdir); |
||
382 | titf_it=fopen(buf,"w"); |
||
383 | snprintf(buf,sizeof(buf),"%s/title_nl",outdir); |
||
384 | titf_nl=fopen(buf,"w"); |
||
12248 | bpr | 385 | snprintf(buf,sizeof(buf),"%s/description",outdir); |
386 | descf=fopen(buf,"w"); |
||
387 | snprintf(buf,sizeof(buf),"%s/author",outdir); |
||
388 | authorf=fopen(buf,"w"); |
||
389 | snprintf(buf,sizeof(buf),"%s/version",outdir); |
||
390 | versionf=fopen(buf,"w"); |
||
391 | snprintf(buf,sizeof(buf),"%s/%s/robot.phtml",outdir,mlistbase); |
||
392 | robotf=fopen(buf,"w"); |
||
393 | fclose(addrf); fclose(serialf); |
||
16987 | bpr | 394 | if(!robotf || !versionf || !authorf || !descf || !titf |
395 | || !titf_ca || !titf_en || !titf_es || !titf_fr || !titf_it || !titf_nl |
||
396 | || !langf) { |
||
12248 | bpr | 397 | fprintf(stderr,"modind: error creating output files.\n"); |
398 | exit(1); |
||
399 | } |
||
10 | reyssat | 400 | } |
401 | |||
402 | void sprep(void) |
||
403 | { |
||
15440 | bpr | 404 | char buf[MAX_LINELEN+1]; |
12248 | bpr | 405 | char *p1,*p2,*s; |
15440 | bpr | 406 | int i,l,t,thislang; |
6881 | bpr | 407 | |
12248 | bpr | 408 | modcnt=0; |
15440 | bpr | 409 | snprintf(buf,sizeof(buf),"%s/addr",sheetoutdir); |
410 | addrf=fopen(buf,"w"); |
||
411 | if(!addrf) { fprintf(stderr,"modind: error creating output files addr.\n"); exit(1);} |
||
412 | snprintf(buf,sizeof(buf),"%s/serial",sheetoutdir); |
||
413 | serialf=fopen(buf,"w"); |
||
12248 | bpr | 414 | s=getenv("slist"); if(s==NULL) return; |
415 | l=strlen(s); if(l<0 || l>100*MAX_LINELEN) return; |
||
416 | mlist=xmalloc(l+16); ovlstrcpy(mlist,s); |
||
15440 | bpr | 417 | for(t=0,p1=find_word_start(mlist); *p1 && modcnt<MAX_MODULES; p1=find_word_start(p2),t++) { |
6884 | bpr | 418 | p2=find_word_end(p1); |
419 | l=p2-p1; if(*p2) *p2++=0; |
||
420 | for(i=0;i<langcnt;i++) if(strncasecmp(lang[i],p1,2)==0) break; |
||
421 | if(i<langcnt) thislang=i; else continue; |
||
15440 | bpr | 422 | ovlstrcpy(revmod[t].name,p1); |
423 | revmod[t].lang=thislang; |
||
17038 | bpr | 424 | revmod[t].imod=modcnt; |
6884 | bpr | 425 | mod[modcnt].name=p1; |
426 | mod[modcnt].langs[0]=thislang; |
||
427 | mod[modcnt].langcnt=1; |
||
17024 | bpr | 428 | mod[modcnt].counts[0]=t; |
15440 | bpr | 429 | fprintf(addrf,"%d:%s\n",modcnt,p1); |
430 | fprintf(serialf,"%s:%d\n",p1,modcnt); |
||
15539 | bpr | 431 | modcnt++; |
12248 | bpr | 432 | } |
17024 | bpr | 433 | fclose(addrf); fclose(serialf); |
10 | reyssat | 434 | } |
435 | |||
15375 | bpr | 436 | void gprep(void) |
437 | { |
||
15440 | bpr | 438 | char buf[MAX_LINELEN+1]; |
439 | char *p1,*p2,*s,*old; |
||
440 | int l,i,t,thislang; |
||
15444 | bpr | 441 | modcnt=0; old=""; |
15440 | bpr | 442 | snprintf(buf,sizeof(buf),"%s/addr",glossaryoutdir); |
443 | addrf=fopen(buf,"w"); |
||
444 | if(!addrf) { fprintf(stderr,"modind: error creating output files addr.\n"); exit(1);} |
||
445 | snprintf(buf,sizeof(buf),"%s/serial",glossaryoutdir); |
||
446 | serialf=fopen(buf,"w"); |
||
15375 | bpr | 447 | s=getenv("glist"); if(s==NULL) return; |
448 | l=strlen(s); if(l<0 || l>100*MAX_LINELEN) return; |
||
449 | mlist=xmalloc(l+16); ovlstrcpy(mlist,s); |
||
15440 | bpr | 450 | for(t=0,p1=find_word_start(mlist); *p1 && modcnt<MAX_MODULES; p1=find_word_start(p2),t++) { |
15375 | bpr | 451 | p2=find_word_end(p1); |
452 | if(*p2) *p2++=0; |
||
15440 | bpr | 453 | fprintf(addrf,"%d:%s\n",t,p1); |
454 | fprintf(serialf,"%s:%d\n",p1,t); |
||
455 | ovlstrcpy(revmod[t].name,p1); |
||
15482 | bpr | 456 | ovlstrcpy(revmod[t].keywords,p1); |
15375 | bpr | 457 | s=strchr(p1,'/'); |
458 | if(s != NULL) s=strchr(s+1,'/'); |
||
459 | if(s==NULL) { |
||
460 | fprintf(stderr,"modind: no language %s\n",p1); exit(1); |
||
461 | } |
||
15482 | bpr | 462 | revmod[t].keywords[s-p1]=0; |
15375 | bpr | 463 | s++; |
464 | for(i=0;i<langcnt;i++) if(strncasecmp(lang[i],s,2)==0) break; |
||
15440 | bpr | 465 | thislang = i<langcnt ? i : -1; |
466 | revmod[t].lang=i; |
||
467 | s[0]=s[1]='x'; |
||
468 | if(modcnt>0 && strcmp(old,p1)==0 && thislang >= 0) { |
||
469 | if(mod[modcnt-1].langcnt<langcnt) { |
||
470 | mod[modcnt-1].langs[mod[modcnt-1].langcnt]=thislang; |
||
471 | mod[modcnt-1].counts[mod[modcnt-1].langcnt]=t; |
||
472 | (mod[modcnt-1].langcnt)++; |
||
473 | } |
||
474 | revmod[t].imod=modcnt-1; |
||
475 | } |
||
476 | else { |
||
477 | mod[modcnt].name=old=p1; |
||
478 | if(thislang>=0) { |
||
479 | mod[modcnt].langs[0]=thislang; |
||
480 | mod[modcnt].langcnt=1; |
||
481 | } |
||
482 | else mod[modcnt].langcnt=0; |
||
483 | mod[modcnt].counts[0]=t; |
||
484 | revmod[t].imod=modcnt; |
||
485 | modcnt++; |
||
486 | } |
||
15375 | bpr | 487 | } |
15440 | bpr | 488 | fclose(addrf); fclose(serialf); |
15375 | bpr | 489 | } |
490 | |||
6884 | bpr | 491 | /* read and treat module's INDEX file */ |
10 | reyssat | 492 | int module_index(const char *name) |
493 | { |
||
12248 | bpr | 494 | char *p, fbuf[MAX_LINELEN+1], ibuf[MAX_LINELEN+1]; |
495 | FILE *indf; |
||
496 | int i,l; |
||
10 | reyssat | 497 | |
12248 | bpr | 498 | snprintf(fbuf,sizeof(fbuf),"%s/%s/INDEX",moduledir,name); |
499 | indf=fopen(fbuf,"r"); |
||
500 | if(indf==NULL) { |
||
501 | fprintf(stderr,"modind: INDEX of %s not found\n",fbuf); return -1; |
||
502 | } |
||
503 | l=fread(ibuf,1,MAX_LINELEN,indf); fclose(indf); |
||
504 | if(l>0 && l<MAX_LINELEN) ibuf[l]=0; else return -1; |
||
6884 | bpr | 505 | /* treate all fields in *modindex */ |
12248 | bpr | 506 | for(i=0;i<MODINDEX_NO;i++) { |
507 | _getdef(ibuf,modindex[i],indbuf[i]); |
||
6884 | bpr | 508 | /* compatibility precaution */ |
12248 | bpr | 509 | if(indbuf[i][0]==':') indbuf[i][0]='.'; |
510 | } |
||
511 | p=find_word_start(indbuf[i_language]); |
||
512 | if(isalpha(*p) && isalpha(*(p+1))) { |
||
513 | memmove(module_language,p,2); module_language[2]=0; |
||
514 | } |
||
515 | else ovlstrcpy(module_language,"en"); |
||
516 | return 0; |
||
10 | reyssat | 517 | } |
518 | |||
519 | int sheet_index(int serial) |
||
520 | { |
||
12248 | bpr | 521 | char *p1, *p2, fbuf[MAX_LINELEN+1], ibuf[MAX_LINELEN+1]; |
522 | FILE *indf; |
||
523 | int i,l; |
||
10 | reyssat | 524 | |
12248 | bpr | 525 | snprintf(fbuf,sizeof(fbuf),"%s/%s.def",sheetdir,mod[serial].name); |
526 | indf=fopen(fbuf,"r"); if(indf==NULL) return -1; |
||
527 | l=fread(ibuf,1,MAX_LINELEN,indf); fclose(indf); |
||
528 | if(l>0 && l<MAX_LINELEN) ibuf[l]=0; else return -1; |
||
15375 | bpr | 529 | for(i=0;i<SHEETINDEX_NO;i++) gsindbuf[i][0]=0; |
12248 | bpr | 530 | for(i=0,p1=find_word_start(ibuf); |
9090 | bpr | 531 | i<SHEETINDEX_NO-1 && *p1!=':' && *p1!=0; |
532 | i++,p1=p2) { |
||
12248 | bpr | 533 | p2=strchr(p1,'\n'); |
534 | if(p2!=NULL) *p2++=0; else p2=p1+strlen(p1); |
||
8100 | bpr | 535 | p1=find_word_start(p1); strip_trailing_spaces2(p1); |
15375 | bpr | 536 | snprintf(gsindbuf[i],MAX_LINELEN,"%s",p1); |
12248 | bpr | 537 | } |
538 | p2=strstr(p1,"\n:"); if(p2==NULL) p2=p1+strlen(p1); |
||
539 | else *p2=0; |
||
540 | p1=find_word_start(p1); strip_trailing_spaces2(p1); |
||
541 | for(p2=p1;*p2;p2++) if(*p2=='\n') *p2=' '; |
||
15375 | bpr | 542 | ovlstrcpy(gsindbuf[s_information],p1); |
15440 | bpr | 543 | ovlstrcpy(gsindbuf[SHEETINDEX_NO],revmod[serial].name); |
12248 | bpr | 544 | return 0; |
10 | reyssat | 545 | } |
546 | |||
15375 | bpr | 547 | int glossary_index(int serial) |
548 | { |
||
15440 | bpr | 549 | char nbuf[MAX_LINELEN+1],fbuf[MAX_LINELEN+1], ibuf[MAX_LINELEN+1],*p,*s; |
15375 | bpr | 550 | FILE *indf; |
551 | int i,l; |
||
15440 | bpr | 552 | s=lang[revmod[serial].lang]; |
553 | p=strchr(nbuf,'/'); |
||
554 | if(p != NULL) p=strchr(p+1,'/'); |
||
555 | if(p != NULL) {p[1]=s[0];p[2]=s[1];} |
||
556 | snprintf(fbuf,sizeof(fbuf),"%s/%s",glossarydir,revmod[serial].name); |
||
15375 | bpr | 557 | indf=fopen(fbuf,"r"); |
558 | l=fread(ibuf,1,MAX_LINELEN,indf); fclose(indf); |
||
559 | if(l>0 && l<MAX_LINELEN) ibuf[l]=0; else return -1; |
||
560 | for(i=0;i<SHEETINDEX_NO;i++) { |
||
561 | _getdef(ibuf,glindex[i],gsindbuf[i]); |
||
562 | } |
||
15482 | bpr | 563 | s=gsindbuf[s_keywords]+strlen(gsindbuf[s_keywords]); |
564 | *s++ = ','; |
||
565 | ovlstrcpy(s,revmod[serial].keywords); |
||
17038 | bpr | 566 | slash2comma(revmod[serial].keywords); |
15440 | bpr | 567 | ovlstrcpy(gsindbuf[SHEETINDEX_NO],nbuf); |
15375 | bpr | 568 | return 0; |
569 | } |
||
570 | |||
10 | reyssat | 571 | unsigned char categories[16]; |
572 | char taken[MAX_LINELEN+1]; |
||
573 | int catcnt, takenlen, tweight; |
||
574 | |||
15394 | bpr | 575 | /* file management for appenditem */ |
576 | #define MAX_FILES (MAX_LANGS*catno) |
||
577 | |||
578 | char *fnames[MAX_FILES]; |
||
579 | FILE *files[MAX_FILES]; |
||
580 | int open_files; |
||
581 | |||
582 | FILE * file_from_list(char *name){ |
||
583 | int i, l = 0, r = open_files; |
||
584 | while (r>l){ |
||
585 | int m = (l+r)/2; |
||
586 | int cmp = strcmp(name,fnames[m]); |
||
587 | if (!cmp) return files[m]; |
||
588 | if (cmp < 0) r = m; else l = m+1; |
||
589 | } |
||
590 | for (i=open_files; i > l; i--) {files[i]=files[i-1]; fnames[i]=fnames[i-1];} |
||
591 | fnames[l] = xmalloc(MAX_FNAME); |
||
592 | ovlstrcpy(fnames[l],name); |
||
593 | open_files++; |
||
594 | return files[l]=fopen(name,"a"); |
||
595 | } |
||
596 | |||
10 | reyssat | 597 | void appenditem(char *word, int lind, int serial, int weight, char *l) |
598 | { |
||
12248 | bpr | 599 | char nbuf[MAX_LINELEN+1], buf[MAX_LINELEN+1]; |
600 | int i, ll; |
||
601 | char *p; |
||
602 | FILE *f; |
||
6881 | bpr | 603 | |
12248 | bpr | 604 | if(!isalnum(*word) || (ll=strlen(word))<2 || |
605 | wordchr2(taken,word)!=NULL || |
||
606 | wordchr2(ignore[lind],word)!=NULL || |
||
607 | takenlen>=MAX_LINELEN-ll-16) |
||
608 | return; |
||
609 | if(ll==2 && (!isdigit(word[0]) || !isalpha(word[1]))) return; |
||
610 | for(p=word;*p;p++) if(!isalnum(*p) && *p!=' ') return; |
||
611 | taken[takenlen++]=' '; taken[takenlen++]=' '; |
||
612 | ovlstrcpy(taken+takenlen,word); |
||
613 | takenlen+=ll; tweight+=weight; |
||
614 | snprintf(buf,sizeof(buf),"%s:%d?%d\n",word,serial,weight); |
||
615 | for(i=0;i<catcnt;i++) { |
||
6884 | bpr | 616 | snprintf(nbuf,sizeof(nbuf),"%s/%c.%s", |
12248 | bpr | 617 | outdir,categories[i],lang[lind]); |
15394 | bpr | 618 | f = file_from_list(nbuf); |
619 | if(f!=NULL) {fputs(buf,f);} |
||
12248 | bpr | 620 | } |
10 | reyssat | 621 | } |
622 | |||
6881 | bpr | 623 | void appenditem1 (char *buf, int lind, int serial, int weight, char *l ) |
624 | { |
||
625 | char *p1, *p2 ; |
||
626 | for(p1=find_word_start(buf); *p1; |
||
6884 | bpr | 627 | p1=find_word_start(p2)) { |
628 | p2=strchr(p1,','); |
||
629 | if(p2!=NULL) *p2++=0; else p2=p1+strlen(p1); |
||
630 | if(strlen(p1)<=0) continue; |
||
631 | appenditem(p1,lind,serial,weight,module_language); |
||
6881 | bpr | 632 | } |
633 | } |
||
634 | void appenditem2 (char *buf, int lind, int serial, int weight, char *l ) |
||
635 | { |
||
636 | char *p1, *p2 ; |
||
637 | for(p1=find_word_start(buf);*p1; |
||
12248 | bpr | 638 | p1=find_word_start(p2)) { |
6884 | bpr | 639 | p2=find_word_end(p1); if(*p2) *p2++=0; |
640 | appenditem(p1,lind,serial,weight,module_language); |
||
6881 | bpr | 641 | } |
642 | } |
||
10 | reyssat | 643 | void onemodule(const char *name, int serial, int lind) |
644 | { |
||
12248 | bpr | 645 | int i; |
646 | unsigned char trlist[]={ |
||
647 | i_title,i_description,i_category,i_domain,i_keywords, |
||
648 | i_require,i_author, |
||
649 | i_keywords_ca,i_keywords_en,i_keywords_fr,i_keywords_it,i_keywords_nl, |
||
650 | i_title_ca,i_title_en,i_title_fr,i_title_it,i_title_nl |
||
651 | }; |
||
15375 | bpr | 652 | int trcnt=sizeof(trlist)/sizeof(trlist[0]); |
15777 | georgesk | 653 | char *p1, *p2, *pp, *q, buf[15*MAX_LINELEN+15], lbuf[16]; |
12248 | bpr | 654 | FILE *f; |
6881 | bpr | 655 | |
12248 | bpr | 656 | if(module_index(name)) return; |
657 | towords(indbuf[i_category]); |
||
7915 | bpr | 658 | /* list the categories (among A=all,X=eXercise,O,D,...) corresponding |
6884 | bpr | 659 | * to this module |
660 | */ |
||
12248 | bpr | 661 | for(i=catcnt=0;i<catno && catcnt<16;i++) { |
15380 | bpr | 662 | if(wordchr2(indbuf[i_category],cat[i].name)!=NULL) |
663 | categories[catcnt++]=cat[i].typ; |
||
12248 | bpr | 664 | } |
665 | if(catcnt==0) return; |
||
666 | if(categories[0]!=cat[0].typ) |
||
667 | categories[catcnt++]=cat[0].typ; |
||
6884 | bpr | 668 | /* write module's name in the category.language files, for instance lists/X.fr |
669 | * for french exercises |
||
670 | */ |
||
12248 | bpr | 671 | for(i=0;i<catcnt;i++) { |
672 | snprintf(buf,sizeof(buf),"%s/%s/%c.%s", |
||
673 | outdir,mlistbase,categories[i],lang[lind]); |
||
674 | f=fopen(buf,"a"); |
||
675 | if(f!=NULL) {fprintf(f,"%s\n",name); fclose(f);} |
||
676 | } |
||
6884 | bpr | 677 | /* add serial number and language (resp.title, ...) to corresponding file */ |
12248 | bpr | 678 | fprintf(langf,"%d:%s\n",serial,module_language); |
679 | fprintf(titf,"%d:%s\n",serial,indbuf[i_title]); |
||
16987 | bpr | 680 | if(indbuf[i_title_ca][0]!=0) |
681 | fprintf(titf_ca,"%d:%s\n",serial,indbuf[i_title_ca]); |
||
682 | else |
||
683 | fprintf(titf_ca,"%d:%s\n",serial,indbuf[i_title]); |
||
684 | if(indbuf[i_title_en][0]!=0) |
||
685 | fprintf(titf_en,"%d:%s\n",serial,indbuf[i_title_en]); |
||
686 | else |
||
687 | fprintf(titf_en,"%d:%s\n",serial,indbuf[i_title]); |
||
688 | if(indbuf[i_title_es][0]!=0) |
||
689 | fprintf(titf_es,"%d:%s\n",serial,indbuf[i_title_es]); |
||
690 | else |
||
691 | fprintf(titf_es,"%d:%s\n",serial,indbuf[i_title]); |
||
692 | if(indbuf[i_title_fr][0]!=0) |
||
693 | fprintf(titf_fr,"%d:%s\n",serial,indbuf[i_title_fr]); |
||
694 | else |
||
695 | fprintf(titf_fr,"%d:%s\n",serial,indbuf[i_title]); |
||
696 | if(indbuf[i_title_it][0]!=0) |
||
697 | fprintf(titf_it,"%d:%s\n",serial,indbuf[i_title_it]); |
||
698 | else |
||
699 | fprintf(titf_it,"%d:%s\n",serial,indbuf[i_title]); |
||
700 | if(indbuf[i_title_nl][0]!=0) |
||
701 | fprintf(titf_nl,"%d:%s\n",serial,indbuf[i_title_nl]); |
||
702 | else |
||
703 | fprintf(titf_nl,"%d:%s\n",serial,indbuf[i_title]); |
||
704 | |||
12248 | bpr | 705 | fprintf(descf,"%d:%s\n",serial,indbuf[i_description]); |
706 | fprintf(authorf,"%d:%s\n",serial,indbuf[i_author]); |
||
707 | fprintf(versionf,"%d:%s\n",serial,indbuf[i_version]); |
||
6881 | bpr | 708 | |
6884 | bpr | 709 | /* add module's information in html page for robots */ |
12248 | bpr | 710 | snprintf(buf,sizeof(buf),"%s",indbuf[i_description]); |
711 | for(pp=strchr(buf,','); pp; pp=strchr(pp,',')) |
||
712 | string_modify3(buf,pp,pp+1,","); |
||
713 | if(strcmp(module_language,lang[lind])==0) |
||
714 | fprintf(robotf,"%s ,%s,%s,%s,%s\n",name,module_language,name, |
||
715 | indbuf[i_title], buf); |
||
6819 | reyssat | 716 | |
6884 | bpr | 717 | /* Normalize the information of trlist, using dictionary |
7915 | bpr | 718 | * -- bases/sys/domain.xx without suffix translation (--> english version) |
6884 | bpr | 719 | */ |
15380 | bpr | 720 | entrycount=dentrycount; dicbuf=ddicbuf; |
721 | memmove(entry,dentry,dentrycount*sizeof(entry[0])); |
||
722 | unknown_type=unk_leave; |
||
723 | for(i=0;i<trcnt;i++) { |
||
724 | detag(indbuf[trlist[i]]); |
||
725 | deaccent2(indbuf[trlist[i]]); |
||
726 | comma(indbuf[trlist[i]]); |
||
727 | singlespace2(indbuf[trlist[i]]); |
||
728 | translate(indbuf[trlist[i]]); |
||
729 | } |
||
6884 | bpr | 730 | /* Normalize the information, using dictionary |
7915 | bpr | 731 | * bases/sys/words.xx with suffix translation |
6884 | bpr | 732 | */ |
15380 | bpr | 733 | entrycount=mentrycount; dicbuf=mdicbuf; |
734 | memmove(entry,mentry,mentrycount*sizeof(entry[0])); |
||
735 | unknown_type=unk_leave;/* used in translator_.c */ |
||
736 | for(i=0;i<trcnt;i++) { |
||
737 | suffix_translate(indbuf[trlist[i]]); |
||
738 | translate(indbuf[trlist[i]]); |
||
739 | } |
||
6881 | bpr | 740 | |
741 | /* taken contains all words already seen in the module index */ |
||
15380 | bpr | 742 | taken[0]=0; takenlen=tweight=0; |
6881 | bpr | 743 | /* append words of title */ |
15380 | bpr | 744 | ovlstrcpy(buf,indbuf[i_title]); towords(buf); |
745 | appenditem2(buf,lind,serial,4,module_language); |
||
6881 | bpr | 746 | |
6884 | bpr | 747 | /* extract words of every other information except level */ |
15380 | bpr | 748 | snprintf(buf,sizeof(buf),"%s %s %s %s %s %s %s %s %s %s %s %s %s %s %s", |
749 | indbuf[i_description],indbuf[i_keywords], |
||
750 | indbuf[i_keywords_ca],indbuf[i_keywords_en],indbuf[i_keywords_fr], |
||
751 | indbuf[i_keywords_it],indbuf[i_keywords_nl], |
||
752 | indbuf[i_title_ca],indbuf[i_title_en],indbuf[i_title_fr], |
||
753 | indbuf[i_title_it],indbuf[i_title_nl], |
||
754 | indbuf[i_domain],indbuf[i_require],indbuf[i_author]); |
||
755 | towords(buf); |
||
756 | appenditem2(buf,lind,serial,2,module_language); |
||
6881 | bpr | 757 | |
6884 | bpr | 758 | /* this time the dictionary is the group dictionary sys/wgrp/wgrp |
759 | * with a g (groupdic), not an m (maindic) . see below main, suffix, group. |
||
7915 | bpr | 760 | * and delete unknown ?? and translate |
6884 | bpr | 761 | */ |
12248 | bpr | 762 | entrycount=gentrycount; dicbuf=gdicbuf; |
763 | memmove(entry,gentry,gentrycount*sizeof(entry[0])); |
||
6881 | bpr | 764 | |
15380 | bpr | 765 | /* append words of every title information */ |
12248 | bpr | 766 | ovlstrcpy(buf,indbuf[i_title]); |
767 | unknown_type=unk_delete; |
||
768 | translate(buf); |
||
769 | appenditem1(buf,lind,serial,2,module_language); |
||
6881 | bpr | 770 | |
15380 | bpr | 771 | /* append words of information of description except level */ |
12248 | bpr | 772 | snprintf(buf,sizeof(buf),"%s", indbuf[i_description]); |
773 | unknown_type=unk_delete; |
||
774 | translate(buf); |
||
775 | appenditem1(buf,lind,serial,4,module_language); |
||
6881 | bpr | 776 | |
15380 | bpr | 777 | /* append words (or group of words) of keywords and domain */ |
12248 | bpr | 778 | snprintf(buf,sizeof(buf),"%s, %s, %s, %s, %s, %s, %s", |
15380 | bpr | 779 | indbuf[i_domain],indbuf[i_keywords], |
780 | indbuf[i_keywords_ca], indbuf[i_keywords_en],indbuf[i_keywords_fr], |
||
781 | indbuf[i_keywords_it], indbuf[i_keywords_nl]); |
||
12248 | bpr | 782 | unknown_type=unk_leave; |
783 | translate(buf); |
||
784 | appenditem1(buf,lind,serial,2,module_language); |
||
6881 | bpr | 785 | |
15380 | bpr | 786 | /* append level information, with weight 2 */ |
12248 | bpr | 787 | snprintf(buf,sizeof(buf),"%s",indbuf[i_level]); |
788 | ovlstrcpy(lbuf,"level"); |
||
789 | for(p1=buf; *p1; p1++) if(!isalnum(*p1)) *p1=' '; |
||
790 | q=buf+strlen(buf); |
||
15380 | bpr | 791 | for(p1=find_word_start(buf); (*p1) && (p1 < q) ; p1=find_word_start(p2)) { |
12248 | bpr | 792 | p2=find_word_end(p1); |
793 | if(p2!=NULL) *p2++=0; else p2=p1+strlen(p1); |
||
794 | if(strncmp(p1, "Lang" , p2-p1) && |
||
795 | (!isalpha(*p1) || |
||
796 | (!isdigit(*(p1+1)) && *(p1+1)!=0) || |
||
797 | (*(p1+1)!=0 && *(p1+2)!=0))) |
||
798 | continue; |
||
799 | *p1=tolower(*p1); |
||
800 | ovlstrcpy(lbuf+strlen("level"),p1); |
||
801 | appenditem(lbuf,lind,serial,2,module_language); |
||
802 | } |
||
6884 | bpr | 803 | /* append total weight of module to weight file site2/weight.xx */ |
12248 | bpr | 804 | fprintf(weightf,"%d:%d\n",serial,tweight); |
10 | reyssat | 805 | } |
806 | |||
807 | void modules(void) |
||
808 | { |
||
12248 | bpr | 809 | int i,j,k,d; |
810 | char namebuf[MAX_LINELEN+1]; |
||
811 | char mdic[MAX_LINELEN+1], sdic[MAX_LINELEN+1], gdic[MAX_LINELEN+1], ddic[MAX_LINELEN+1]; |
||
10 | reyssat | 812 | |
12248 | bpr | 813 | for(j=0;j<langcnt;j++) { |
6884 | bpr | 814 | snprintf(namebuf,sizeof(namebuf),"%s/weight.%s",outdir,lang[j]); |
815 | weightf=fopen(namebuf,"w"); |
||
816 | snprintf(mdic,sizeof(mdic),"%s/%s.%s",dicdir,maindic,lang[j]); |
||
817 | snprintf(sdic,sizeof(sdic),"%s/%s.%s",dicdir,suffixdic,lang[j]); |
||
818 | snprintf(gdic,sizeof(gdic),"%s/%s.%s",dicdir,groupdic,lang[j]); |
||
819 | snprintf(ddic,sizeof(ddic),"%s/%s.%s",dicdir,domaindic,lang[j]); |
||
820 | suffix_dic(sdic); prepare_dic(gdic); |
||
821 | gdicbuf=dicbuf; gentrycount=entrycount; |
||
822 | memmove(gentry,entry,gentrycount*sizeof(entry[0])); |
||
823 | prepare_dic(mdic); |
||
824 | mdicbuf=dicbuf; mentrycount=entrycount; |
||
825 | memmove(mentry,entry,mentrycount*sizeof(entry[0])); |
||
826 | prepare_dic(ddic); |
||
827 | ddicbuf=dicbuf; dentrycount=entrycount; |
||
828 | memmove(dentry,entry,dentrycount*sizeof(entry[0])); |
||
829 | unknown_type=unk_leave; translate(ignore[j]); |
||
830 | for(i=0;i<modcnt;i++) { |
||
12248 | bpr | 831 | if(mod[i].langcnt>0) { |
15336 | bpr | 832 | /* look for another language */ |
6884 | bpr | 833 | for(d=k=0;k<mod[i].langcnt;k++) |
15336 | bpr | 834 | if(mod[i].langs[k]<mod[i].langs[d]) d=k; |
6884 | bpr | 835 | for(k=0;k<mod[i].langcnt && mod[i].langs[k]!=j;k++); |
836 | if(k>=mod[i].langcnt) k=d; |
||
837 | snprintf(namebuf,MAX_LINELEN,"%s.%s",mod[i].name, |
||
12248 | bpr | 838 | lang[mod[i].langs[k]]); |
6884 | bpr | 839 | onemodule(namebuf,mod[i].counts[k],j); |
12248 | bpr | 840 | } |
841 | else { |
||
6884 | bpr | 842 | onemodule(mod[i].name,mod[i].counts[0],j); |
12248 | bpr | 843 | } |
10 | reyssat | 844 | } |
6884 | bpr | 845 | if(mentrycount>0) free(mdicbuf); |
846 | if(gentrycount>0) free(gdicbuf); |
||
847 | if(suffixcnt>0) free(sufbuf); |
||
848 | if(dentrycount>0) free(ddicbuf); |
||
849 | if(weightf) fclose(weightf); |
||
12248 | bpr | 850 | } |
10 | reyssat | 851 | } |
15394 | bpr | 852 | void clean(void) |
853 | { |
||
854 | int i; |
||
855 | for (i = 0; i < open_files; i++) fclose(files[i]); |
||
856 | fclose(langf); fclose(titf); fclose(descf); fclose(robotf); |
||
857 | fclose(authorf); fclose(versionf); |
||
16987 | bpr | 858 | fclose(titf_fr); fclose(titf_it);fclose(titf_es);fclose(titf_nl); |
859 | fclose(titf_ca);fclose(titf_en); |
||
15394 | bpr | 860 | } |
10 | reyssat | 861 | |
6881 | bpr | 862 | /* FIXME ? differences with appenditem - use fprintf instead of snprintf */ |
10 | reyssat | 863 | void sappenditem(char *word, int lind, int serial, int weight) |
864 | { |
||
12248 | bpr | 865 | int ll; |
866 | char *p; |
||
6881 | bpr | 867 | |
12248 | bpr | 868 | if(!isalnum(*word) || (ll=strlen(word))<2 || |
869 | wordchr2(taken,word)!=NULL || |
||
870 | wordchr2(ignore[lind],word)!=NULL || |
||
871 | takenlen>=MAX_LINELEN-ll-16) |
||
872 | return; |
||
873 | if(ll==2 && (!isdigit(word[0]) || !isalpha(word[1]))) return; |
||
874 | for(p=word;*p;p++) if(!isalnum(*p) && *p!=' ') return; |
||
875 | taken[takenlen++]=' ';taken[takenlen++]=' '; |
||
876 | ovlstrcpy(taken+takenlen,word); |
||
877 | takenlen+=ll; tweight+=weight; |
||
878 | fprintf(indf,"%s:%d?%d\n",word,serial,weight); |
||
10 | reyssat | 879 | } |
15380 | bpr | 880 | /* onesg / onemodule are similar */ |
15375 | bpr | 881 | void onesg(int serial, int lind, int index(int)) |
10 | reyssat | 882 | { |
12248 | bpr | 883 | int i; |
884 | unsigned char trlist[]={ |
||
15375 | bpr | 885 | s_title,s_description,s_domain,s_keywords,s_information |
12248 | bpr | 886 | }; |
15380 | bpr | 887 | int trcnt=sizeof(trlist)/sizeof(trlist[0]); |
15778 | georgesk | 888 | char *p1, *p2, *q, buf[4*MAX_LINELEN+4], lbuf[16]; |
6881 | bpr | 889 | |
15375 | bpr | 890 | if(index(serial)) return; |
891 | fprintf(titf,"%d:%s\n",serial,gsindbuf[s_title]); |
||
892 | fprintf(descf,"%d:%s\n",serial,gsindbuf[s_description]); |
||
893 | fprintf(remf,"%d:%s\n",serial,gsindbuf[s_information]); |
||
7915 | bpr | 894 | |
15380 | bpr | 895 | /* Normalize the information of trlist, using dictionary |
896 | * -- bases/sys/domain.xx without suffix translation (--> english version) |
||
897 | */ |
||
12248 | bpr | 898 | entrycount=dentrycount; dicbuf=ddicbuf; |
899 | memmove(entry,dentry,dentrycount*sizeof(entry[0])); |
||
900 | unknown_type=unk_leave; |
||
15380 | bpr | 901 | for(i=0;i<trcnt;i++) { |
15375 | bpr | 902 | detag(gsindbuf[trlist[i]]); |
903 | deaccent2(gsindbuf[trlist[i]]); |
||
904 | comma(gsindbuf[trlist[i]]); |
||
905 | singlespace2(gsindbuf[trlist[i]]); |
||
906 | translate(gsindbuf[trlist[i]]); |
||
12248 | bpr | 907 | } |
15380 | bpr | 908 | /* Normalize the information, using dictionary |
909 | * bases/sys/words.xx with suffix translation |
||
910 | */ |
||
12248 | bpr | 911 | entrycount=mentrycount; dicbuf=mdicbuf; |
912 | memmove(entry,mentry,mentrycount*sizeof(entry[0])); |
||
15380 | bpr | 913 | unknown_type=unk_leave;/* used in translator_.c */ |
914 | for(i=0;i<trcnt;i++) { |
||
15375 | bpr | 915 | suffix_translate(gsindbuf[trlist[i]]); |
916 | translate(gsindbuf[trlist[i]]); |
||
12248 | bpr | 917 | } |
15380 | bpr | 918 | |
919 | /* taken contains all words already seen in the module index */ |
||
12248 | bpr | 920 | taken[0]=0; takenlen=tweight=0; |
15380 | bpr | 921 | /* append words of title */ |
15375 | bpr | 922 | ovlstrcpy(buf,gsindbuf[s_title]); towords(buf); |
12248 | bpr | 923 | for(p1=find_word_start(buf);*p1; |
924 | p1=find_word_start(p2)) { |
||
6884 | bpr | 925 | p2=find_word_end(p1); if(*p2) *p2++=0; |
926 | sappenditem(p1,lind,serial,4); |
||
12248 | bpr | 927 | } |
15380 | bpr | 928 | |
929 | /* extract words of every other information except level */ |
||
12248 | bpr | 930 | snprintf(buf,sizeof(buf),"%s %s %s %s", |
15375 | bpr | 931 | gsindbuf[s_description],gsindbuf[s_keywords], |
932 | gsindbuf[s_domain],gsindbuf[s_information]); |
||
12248 | bpr | 933 | towords(buf); |
15375 | bpr | 934 | for(p1=find_word_start(buf);*p1;p1=find_word_start(p2)) { |
935 | p2=find_word_end(p1); if(*p2) *p2++=0; |
||
936 | sappenditem(p1,lind,serial,2); |
||
12248 | bpr | 937 | } |
15380 | bpr | 938 | /* this time the dictionary is the group dictionary sys/wgrp/wgrp |
939 | * with a g (groupdic), not an m (maindic) . see below main, suffix, group. |
||
940 | * and delete unknown ?? and translate |
||
941 | */ |
||
12248 | bpr | 942 | entrycount=gentrycount; dicbuf=gdicbuf; |
943 | memmove(entry,gentry,gentrycount*sizeof(entry[0])); |
||
15380 | bpr | 944 | |
945 | /* append words of every title information */ |
||
946 | ovlstrcpy(buf,gsindbuf[s_title]); |
||
12248 | bpr | 947 | unknown_type=unk_delete; |
15380 | bpr | 948 | translate(buf); |
15375 | bpr | 949 | for(p1=find_word_start(buf); *p1; p1=find_word_start(p2)) { |
950 | p2=strchr(p1,','); |
||
951 | if(p2!=NULL) *p2++=0; else p2=p1+strlen(p1); |
||
952 | if(strlen(p1)<=0) continue; |
||
953 | sappenditem(p1,lind,serial,4); |
||
12248 | bpr | 954 | } |
15380 | bpr | 955 | |
956 | /* append words (or group of words) of keywords and domain */ |
||
12248 | bpr | 957 | snprintf(buf,sizeof(buf),"%s, %s", |
15375 | bpr | 958 | gsindbuf[s_keywords], |
959 | gsindbuf[s_domain]); |
||
15380 | bpr | 960 | unknown_type=unk_leave; |
12248 | bpr | 961 | translate(buf); |
15380 | bpr | 962 | for(p1=find_word_start(buf); *p1; p1=find_word_start(p2)) { |
6884 | bpr | 963 | p2=strchr(p1,','); |
964 | if(p2!=NULL) *p2++=0; else p2=p1+strlen(p1); |
||
965 | if(strlen(p1)<=0) continue; |
||
966 | sappenditem(p1,lind,serial,2); |
||
12248 | bpr | 967 | } |
15380 | bpr | 968 | |
969 | /* append level information, with weight 2 */ |
||
970 | snprintf(buf,sizeof(buf),"%s",gsindbuf[s_level]); |
||
971 | ovlstrcpy(lbuf,"level"); |
||
972 | for(p1=buf; *p1; p1++) if(!isalnum(*p1)) *p1=' '; |
||
973 | q=buf+strlen(buf); |
||
974 | for(p1=find_word_start(buf); (*p1) && (p1 < q) ; |
||
975 | p1=find_word_start(p2)) { |
||
976 | p2=find_word_end(p1); |
||
977 | if(p2!=NULL) *p2++=0; else p2=p1+strlen(p1); |
||
978 | if(strncmp(p1, "Lang" , p2-p1) && |
||
979 | (!isalpha(*p1) || (!isdigit(*(p1+1))) || |
||
980 | (*(p1+1)!=0 && *(p1+2)!=0))) |
||
981 | continue; |
||
982 | *p1=tolower(*p1); |
||
983 | ovlstrcpy(lbuf+strlen("level"),p1); |
||
984 | sappenditem(lbuf,lind,serial,2); |
||
985 | } |
||
986 | /* append total weight of module to weight file site2/weight.xx */ |
||
12248 | bpr | 987 | fprintf(weightf,"%d:%d\n",serial,tweight); |
10 | reyssat | 988 | } |
989 | |||
15375 | bpr | 990 | void sgs(char *outdir, int index(int)) |
10 | reyssat | 991 | { |
15440 | bpr | 992 | int i,j,k,d; |
12248 | bpr | 993 | char mdic[MAX_LINELEN+1], sdic[MAX_LINELEN+1], gdic[MAX_LINELEN+1], ddic[MAX_LINELEN+1]; |
994 | char buf[MAX_LINELEN+1]; |
||
7915 | bpr | 995 | |
15442 | bpr | 996 | //snprintf(buf,sizeof(buf),"%s/list",outdir); |
997 | //listf=fopen(buf,"w"); |
||
15440 | bpr | 998 | snprintf(buf,sizeof(buf),"%s/title",outdir); |
999 | titf=fopen(buf,"w"); |
||
1000 | snprintf(buf,sizeof(buf),"%s/description",outdir); |
||
1001 | descf=fopen(buf,"w"); |
||
1002 | snprintf(buf,sizeof(buf),"%s/information",outdir); |
||
1003 | remf=fopen(buf,"w"); |
||
15442 | bpr | 1004 | if(!remf || !descf || !titf ) { |
15440 | bpr | 1005 | fprintf(stderr,"modind: error creating output files for %s.\n",outdir); exit(1); |
1006 | } |
||
12248 | bpr | 1007 | for(j=0;j<langcnt;j++) { |
15375 | bpr | 1008 | snprintf(buf,sizeof(buf),"%s/%s",outdir,lang[j]); |
12248 | bpr | 1009 | indf=fopen(buf,"w"); |
15375 | bpr | 1010 | snprintf(buf,sizeof(buf),"%s/weight.%s",outdir,lang[j]); |
12248 | bpr | 1011 | weightf=fopen(buf,"w"); |
15440 | bpr | 1012 | if(!weightf || !indf ) { |
1013 | fprintf(stderr,"modind: error creating output files for %s.\n",outdir); exit(1); |
||
9090 | bpr | 1014 | } |
6884 | bpr | 1015 | snprintf(mdic,sizeof(mdic),"%s/%s.%s",dicdir,maindic,lang[j]); |
1016 | snprintf(sdic,sizeof(sdic),"%s/%s.%s",dicdir,suffixdic,lang[j]); |
||
1017 | snprintf(gdic,sizeof(gdic),"%s/%s.%s",dicdir,groupdic,lang[j]); |
||
6961 | bpr | 1018 | snprintf(ddic,sizeof(ddic),"%s/%s.%s",dicdir,domaindic,lang[j]); |
6884 | bpr | 1019 | suffix_dic(sdic); prepare_dic(gdic); |
1020 | gdicbuf=dicbuf; gentrycount=entrycount; |
||
1021 | memmove(gentry,entry,gentrycount*sizeof(entry[0])); |
||
1022 | prepare_dic(mdic); |
||
1023 | mdicbuf=dicbuf; mentrycount=entrycount; |
||
1024 | memmove(mentry,entry,mentrycount*sizeof(entry[0])); |
||
6973 | bpr | 1025 | prepare_dic(ddic); |
1026 | ddicbuf=dicbuf; dentrycount=entrycount; |
||
1027 | memmove(dentry,entry,dentrycount*sizeof(entry[0])); |
||
6884 | bpr | 1028 | unknown_type=unk_leave; translate(ignore[j]); |
15440 | bpr | 1029 | for(i=0;i<modcnt;i++) |
1030 | if(mod[i].langcnt>0) { |
||
1031 | /* look for another language */ |
||
1032 | for(d=k=0;k<mod[i].langcnt;k++) |
||
1033 | if(mod[i].langs[k]<mod[i].langs[d]) d=k; |
||
1034 | for(k=0;k<mod[i].langcnt && mod[i].langs[k]!=j;k++); |
||
1035 | if(k>=mod[i].langcnt) k=d; |
||
1036 | onesg(mod[i].counts[k],mod[i].langs[k],index); |
||
1037 | } |
||
6884 | bpr | 1038 | if(mentrycount>0) free(mdicbuf); |
1039 | if(gentrycount>0) free(gdicbuf); |
||
1040 | if(suffixcnt>0) free(sufbuf); |
||
6961 | bpr | 1041 | if(dentrycount>0) free(ddicbuf); |
15440 | bpr | 1042 | fclose(indf); fclose(weightf); |
12248 | bpr | 1043 | } |
15444 | bpr | 1044 | fclose(titf); fclose(descf); fclose(remf); |
10 | reyssat | 1045 | } |
1046 | |||
1047 | int main() |
||
1048 | { |
||
12248 | bpr | 1049 | gentry=xmalloc(entry_size); |
1050 | dentry=xmalloc(entry_size); |
||
1051 | mentry=xmalloc(entry_size); |
||
15442 | bpr | 1052 | init(); |
12248 | bpr | 1053 | prep(); |
1054 | if(modcnt>0) modules(); |
||
1055 | clean(); |
||
1056 | sprep(); |
||
15375 | bpr | 1057 | if(modcnt>0) sgs(sheetoutdir,sheet_index); |
1058 | gprep(); |
||
1059 | if(modcnt>0) sgs(glossaryoutdir,glossary_index); |
||
12248 | bpr | 1060 | return 0; |
10 | reyssat | 1061 | } |