Rev 6819 | Rev 6884 | Go to most recent revision | Show entire file | Ignore whitespace | Details | Blame | Last modification | View Log | RSS feed
Rev 6819 | Rev 6881 | ||
---|---|---|---|
Line 28... | Line 28... | ||
28 | char *dicdir= "public_html/bases"; |
28 | char *dicdir= "public_html/bases"; |
29 | char *outdir= "public_html/bases/site2"; |
29 | char *outdir= "public_html/bases/site2"; |
30 | char *maindic= "sys/words"; |
30 | char *maindic= "sys/words"; |
31 | char *groupdic= "sys/wgrp/wgrp"; |
31 | char *groupdic= "sys/wgrp/wgrp"; |
32 | char *suffixdic= "sys/suffix"; |
32 | char *suffixdic= "sys/suffix"; |
- | 33 | char *domaindic= "sys/domaindic"; |
|
33 | char *ignoredic= "sys/indignore"; |
34 | char *ignoredic= "sys/indignore"; |
34 | char *conffile= "log/wims.conf"; |
35 | char *conffile= "log/wims.conf"; |
35 | char *mlistbase= "list"; |
36 | char *mlistbase= "list"; |
36 | 37 | ||
37 | char lang[MAX_LANGS][4]={ |
38 | char lang[MAX_LANGS][4]={ |
Line 126... | Line 127... | ||
126 | /* Find first occurrence of word */ |
127 | /* Find first occurrence of word */ |
127 | char *wordchr(char *p, char *w) |
128 | char *wordchr(char *p, char *w) |
128 | { |
129 | { |
129 | char *r; |
130 | char *r; |
130 | 131 | ||
131 | for(r=strstr(p,w);r!=NULL && |
132 | for(r=strstr(p,w);r!=NULL && |
132 | ( (r>p && !isspace(*(r-1))) || (!isspace(*(r+strlen(w))) && *(r+strlen(w))!=0) ); |
133 | ( (r>p && !isspace(*(r-1))) || (!isspace(*(r+strlen(w))) && *(r+strlen(w))!=0) ); |
133 | r=strstr(r+1,w)); |
134 | r=strstr(r+1,w)); |
134 | return r; |
135 | return r; |
135 | } |
136 | } |
136 | 137 | ||
Line 200... | Line 201... | ||
200 | /* modify a string. Bufferlen must be ast least MAX_LINELEN */ |
201 | /* modify a string. Bufferlen must be ast least MAX_LINELEN */ |
201 | void string_modify(char *start, char *bad_beg, char *bad_end, char *good,...) |
202 | void string_modify(char *start, char *bad_beg, char *bad_end, char *good,...) |
202 | { |
203 | { |
203 | char buf[MAX_LINELEN+1]; |
204 | char buf[MAX_LINELEN+1]; |
204 | va_list vp; |
205 | va_list vp; |
205 | 206 | ||
206 | va_start(vp,good); |
207 | va_start(vp,good); |
207 | vsnprintf(buf,sizeof(buf),good,vp); va_end(vp); |
208 | vsnprintf(buf,sizeof(buf),good,vp); va_end(vp); |
208 | if(strlen(start)-(bad_end-bad_beg)+strlen(buf)>=MAX_LINELEN) |
209 | if(strlen(start)-(bad_end-bad_beg)+strlen(buf)>=MAX_LINELEN) |
209 | return; |
210 | return; |
210 | strcat(buf,bad_end); |
211 | strcat(buf,bad_end); |
Line 216... | Line 217... | ||
216 | void comma(char *p) |
217 | void comma(char *p) |
217 | { |
218 | { |
218 | char *pp; |
219 | char *pp; |
219 | for(pp=strchr(p,','); pp; pp=strchr(pp+1,',')) |
220 | for(pp=strchr(p,','); pp; pp=strchr(pp+1,',')) |
220 | string_modify(p,pp,pp+1,", "); |
221 | string_modify(p,pp,pp+1,", "); |
221 | } |
222 | } |
222 | 223 | ||
223 | 224 | ||
224 | void _getdef(char buf[], char *name, char value[]) |
225 | void _getdef(char buf[], char *name, char value[]) |
225 | { |
226 | { |
226 | char *p1, *p2, *p3; |
227 | char *p1, *p2, *p3; |
227 | 228 | ||
Line 246... | Line 247... | ||
246 | void getdef(char *fname, char *name, char value[]) |
247 | void getdef(char *fname, char *name, char value[]) |
247 | { |
248 | { |
248 | FILE *f; |
249 | FILE *f; |
249 | char *buf; |
250 | char *buf; |
250 | int l; |
251 | int l; |
251 | 252 | ||
252 | value[0]=0; |
253 | value[0]=0; |
253 | f=fopen(fname,"r"); if(f==NULL) return; |
254 | f=fopen(fname,"r"); if(f==NULL) return; |
254 | fseek(f,0,SEEK_END); l=ftell(f); fseek(f,0,SEEK_SET); |
255 | fseek(f,0,SEEK_END); l=ftell(f); fseek(f,0,SEEK_SET); |
255 | buf=xmalloc(l+256); l=fread(buf,1,l,f); |
256 | buf=xmalloc(l+256); l=fread(buf,1,l,f); |
256 | fclose(f); |
257 | fclose(f); |
Line 259... | Line 260... | ||
259 | free(buf); |
260 | free(buf); |
260 | } |
261 | } |
261 | 262 | ||
262 | #include "translator_.c" |
263 | #include "translator_.c" |
263 | 264 | ||
264 | char *mdicbuf, *gdicbuf; |
265 | char *mdicbuf, *gdicbuf, *ddicbuf; |
265 | char gentry[sizeof(entry)], mentry[sizeof(entry)]; |
266 | char gentry[sizeof(entry)], mentry[sizeof(entry)], dentry[sizeof(entry)]; |
266 | int gentrycount, mentrycount; |
267 | int gentrycount, mentrycount, dentrycount; |
267 | 268 | ||
268 | /* Preparation of data */ |
269 | /* Preparation of data */ |
269 | void prep(void) |
270 | void prep(void) |
270 | { |
271 | { |
271 | char buf[MAX_LINELEN+1]; |
272 | char buf[MAX_LINELEN+1]; |
272 | char *p1,*p2,*s,*old; |
273 | char *p1,*p2,*s,*old; |
273 | int i,l,thislang,t; |
274 | int i,l,thislang,t; |
274 | FILE *f; |
275 | FILE *f; |
275 | 276 | ||
276 | s=getenv("modind_outdir"); if(s!=NULL && *s!=0) outdir=s; |
277 | s=getenv("modind_outdir"); if(s!=NULL && *s!=0) outdir=s; |
277 | s=getenv("modind_sheetdir"); if(s!=NULL && *s!=0) sheetdir=s; |
278 | s=getenv("modind_sheetdir"); if(s!=NULL && *s!=0) sheetdir=s; |
278 | snprintf(buf,sizeof(buf),"%s/addr",outdir); |
279 | snprintf(buf,sizeof(buf),"%s/addr",outdir); |
279 | addrf=fopen(buf,"w"); |
280 | addrf=fopen(buf,"w"); |
280 | snprintf(buf,sizeof(buf),"%s/serial",outdir); |
281 | snprintf(buf,sizeof(buf),"%s/serial",outdir); |
Line 355... | Line 356... | ||
355 | 356 | ||
356 | void sprep(void) |
357 | void sprep(void) |
357 | { |
358 | { |
358 | char *p1,*p2,*s; |
359 | char *p1,*p2,*s; |
359 | int i,l,thislang; |
360 | int i,l,thislang; |
360 | 361 | ||
361 | modcnt=0; |
362 | modcnt=0; |
362 | s=getenv("slist"); if(s==NULL) return; |
363 | s=getenv("slist"); if(s==NULL) return; |
363 | l=strlen(s); if(l<0 || l>100*MAX_LINELEN) return; |
364 | l=strlen(s); if(l<0 || l>100*MAX_LINELEN) return; |
364 | mlist=xmalloc(l+16); ovlstrcpy(mlist,s); |
365 | mlist=xmalloc(l+16); ovlstrcpy(mlist,s); |
365 | for(p1=find_word_start(mlist); *p1 && modcnt<MAX_MODULES; p1=find_word_start(p2)) { |
366 | for(p1=find_word_start(mlist); *p1 && modcnt<MAX_MODULES; p1=find_word_start(p2)) { |
Line 379... | Line 380... | ||
379 | fclose(langf); fclose(titf); fclose(descf); fclose(robotf); |
380 | fclose(langf); fclose(titf); fclose(descf); fclose(robotf); |
380 | fclose(authorf); fclose(versionf); |
381 | fclose(authorf); fclose(versionf); |
381 | } |
382 | } |
382 | 383 | ||
383 | char *sheetindex[]={ |
384 | char *sheetindex[]={ |
384 | "title", "description", |
385 | "title", "description", |
385 | "duration", "severity", |
386 | "duration", "severity", |
386 | "level", "domain", |
387 | "level", "domain", |
387 | "keywords", "reserved1", "reserved2", "remark" |
388 | "keywords", "reserved1", "reserved2", "remark" |
388 | }; |
389 | }; |
389 | #define SHEETINDEX_NO (sizeof(sheetindex)/sizeof(sheetindex[0])) |
390 | #define SHEETINDEX_NO (sizeof(sheetindex)/sizeof(sheetindex[0])) |
Line 394... | Line 395... | ||
394 | s_keywords, s_reserved1, s_reserved2, |
395 | s_keywords, s_reserved1, s_reserved2, |
395 | s_remark |
396 | s_remark |
396 | }; |
397 | }; |
397 | 398 | ||
398 | char *modindex[]={ |
399 | char *modindex[]={ |
399 | "title", "description", |
400 | "title", "description", |
400 | "author", "address", "copyright", |
401 | "author", "address", "copyright", |
401 | "version", "wims_version", "language", |
402 | "version", "wims_version", "language", |
402 | "category", "level", "domain", "keywords", |
403 | "category", "level", "domain", "keywords", |
403 | "keywords_ca", "keywords_en", "keywords_fr", "keywords_it", "keywords_nl", |
404 | "keywords_ca", "keywords_en", "keywords_fr", "keywords_it", "keywords_nl", |
404 | "title_ca", "title_en", "title_fr", "title_it", "title_nl", |
405 | "title_ca", "title_en", "title_fr", "title_it", "title_nl", |
405 | "require" |
406 | "require" |
406 | }; |
407 | }; |
407 | #define MODINDEX_NO (sizeof(modindex)/sizeof(modindex[0])) |
408 | #define MODINDEX_NO (sizeof(modindex)/sizeof(modindex[0])) |
Line 480... | Line 481... | ||
480 | { |
481 | { |
481 | char nbuf[MAX_LINELEN+1], buf[MAX_LINELEN+1]; |
482 | char nbuf[MAX_LINELEN+1], buf[MAX_LINELEN+1]; |
482 | int i, ll; |
483 | int i, ll; |
483 | char *p; |
484 | char *p; |
484 | FILE *f; |
485 | FILE *f; |
485 | 486 | ||
486 | if(!isalnum(*word) || (ll=strlen(word))<2 || |
487 | if(!isalnum(*word) || (ll=strlen(word))<2 || |
487 | wordchr(taken,word)!=NULL || |
488 | wordchr(taken,word)!=NULL || |
488 | wordchr(ignore[lind],word)!=NULL || |
489 | wordchr(ignore[lind],word)!=NULL || |
489 | takenlen>=MAX_LINELEN-ll-16) |
490 | takenlen>=MAX_LINELEN-ll-16) |
490 | return; |
491 | return; |
Line 500... | Line 501... | ||
500 | f=fopen(nbuf,"a"); |
501 | f=fopen(nbuf,"a"); |
501 | if(f!=NULL) {fputs(buf,f); fclose(f);} |
502 | if(f!=NULL) {fputs(buf,f); fclose(f);} |
502 | } |
503 | } |
503 | } |
504 | } |
504 | 505 | ||
- | 506 | void appenditem1 (char *buf, int lind, int serial, int weight, char *l ) |
|
- | 507 | { |
|
- | 508 | char *p1, *p2 ; |
|
- | 509 | for(p1=find_word_start(buf); *p1; |
|
- | 510 | p1=find_word_start(p2)) { |
|
- | 511 | p2=strchr(p1,','); |
|
- | 512 | if(p2!=NULL) *p2++=0; else p2=p1+strlen(p1); |
|
- | 513 | if(strlen(p1)<=0) continue; |
|
- | 514 | appenditem(p1,lind,serial,weight,module_language); |
|
- | 515 | } |
|
- | 516 | } |
|
- | 517 | void appenditem2 (char *buf, int lind, int serial, int weight, char *l ) |
|
- | 518 | { |
|
- | 519 | char *p1, *p2 ; |
|
- | 520 | for(p1=find_word_start(buf);*p1; |
|
- | 521 | p1=find_word_start(p2)) { |
|
- | 522 | p2=find_word_end(p1); if(*p2) *p2++=0; |
|
- | 523 | appenditem(p1,lind,serial,weight,module_language); |
|
- | 524 | } |
|
- | 525 | } |
|
505 | void onemodule(const char *name, int serial, int lind) |
526 | void onemodule(const char *name, int serial, int lind) |
506 | { |
527 | { |
507 | int i; |
528 | int i; |
508 | unsigned char trlist[]={ |
529 | unsigned char trlist[]={ |
509 | i_title,i_description,i_category,i_domain,i_keywords, |
530 | i_title,i_description,i_category,i_domain,i_keywords, |
Line 512... | Line 533... | ||
512 | i_title_ca,i_title_en,i_title_fr,i_title_it,i_title_nl |
533 | i_title_ca,i_title_en,i_title_fr,i_title_it,i_title_nl |
513 | }; |
534 | }; |
514 | #define trcnt (sizeof(trlist)/sizeof(trlist[0])) |
535 | #define trcnt (sizeof(trlist)/sizeof(trlist[0])) |
515 | char *p1, *p2, *pp, *q, buf[MAX_LINELEN+1], lbuf[16]; |
536 | char *p1, *p2, *pp, *q, buf[MAX_LINELEN+1], lbuf[16]; |
516 | FILE *f; |
537 | FILE *f; |
517 | 538 | ||
518 | if(module_index(name)) return; |
539 | if(module_index(name)) return; |
519 | towords(indbuf[i_category]); |
540 | towords(indbuf[i_category]); |
520 | /* list the categories (among A=all,X=eXercise,O,D,...) corresponding to this module */ |
541 | /* list the categories (among A=all,X=eXercise,O,D,...) corresponding to this module */ |
521 | for(i=catcnt=0;i<catno && catcnt<16;i++) { |
542 | for(i=catcnt=0;i<catno && catcnt<16;i++) { |
522 | if(wordchr(indbuf[i_category],cat[i].name)!=NULL) |
543 | if(wordchr(indbuf[i_category],cat[i].name)!=NULL) |
523 | categories[catcnt++]=cat[i].typ; |
544 | categories[catcnt++]=cat[i].typ; |
524 | } |
545 | } |
525 | if(catcnt==0) return; |
546 | if(catcnt==0) return; |
526 | if(categories[0]!=cat[0].typ) |
547 | if(categories[0]!=cat[0].typ) |
527 | categories[catcnt++]=cat[0].typ; |
548 | categories[catcnt++]=cat[0].typ; |
Line 536... | Line 557... | ||
536 | fprintf(langf,"%d:%s\n",serial,module_language); |
557 | fprintf(langf,"%d:%s\n",serial,module_language); |
537 | fprintf(titf,"%d:%s\n",serial,indbuf[i_title]); |
558 | fprintf(titf,"%d:%s\n",serial,indbuf[i_title]); |
538 | fprintf(descf,"%d:%s\n",serial,indbuf[i_description]); |
559 | fprintf(descf,"%d:%s\n",serial,indbuf[i_description]); |
539 | fprintf(authorf,"%d:%s\n",serial,indbuf[i_author]); |
560 | fprintf(authorf,"%d:%s\n",serial,indbuf[i_author]); |
540 | fprintf(versionf,"%d:%s\n",serial,indbuf[i_version]); |
561 | fprintf(versionf,"%d:%s\n",serial,indbuf[i_version]); |
- | 562 | ||
541 | /* add module's information in html page for robots */ |
563 | /* add module's information in html page for robots */ |
542 | snprintf(buf,sizeof(buf),"%s",indbuf[i_description]); |
564 | snprintf(buf,sizeof(buf),"%s",indbuf[i_description]); |
543 | for(pp=strchr(buf,','); pp; pp=strchr(pp,',')) |
565 | for(pp=strchr(buf,','); pp; pp=strchr(pp,',')) |
544 | string_modify(buf,pp,pp+1,","); |
566 | string_modify(buf,pp,pp+1,","); |
545 | if(strcmp(module_language,lang[lind])==0) |
567 | if(strcmp(module_language,lang[lind])==0) |
546 | fprintf(robotf,"%s ,%s,%s,%s,%s\n",name,module_language,name, |
568 | fprintf(robotf,"%s ,%s,%s,%s,%s\n",name,module_language,name, |
547 | indbuf[i_title], buf); |
569 | indbuf[i_title], buf); |
548 | /* Normalize the information, using main dictionary bases/sys/words.xx */ |
- | |
549 | entrycount=mentrycount; dicbuf=mdicbuf; |
- | |
550 | memmove(entry,mentry,mentrycount*sizeof(entry[0])); |
- | |
551 | unknown_type=unk_leave; /* used in translator_.c */ |
- | |
552 | 570 | ||
- | 571 | /* Normalize the information, using dictionary |
|
- | 572 | -- bases/sys/domain.xx without suffix (--> english version) |
|
- | 573 | -- bases/sys/words.xx with suffix */ |
|
- | 574 | entrycount=dentrycount; dicbuf=ddicbuf; |
|
- | 575 | memmove(entry,dentry,dentrycount*sizeof(entry[0])); |
|
- | 576 | unknown_type=unk_leave; |
|
553 | for(i=0;i<trcnt;i++) { |
577 | for(i=0;i<trcnt;i++) { |
554 | detag(indbuf[trlist[i]]); |
578 | detag(indbuf[trlist[i]]); |
555 | deaccent(indbuf[trlist[i]]); |
579 | deaccent(indbuf[trlist[i]]); |
556 | comma(indbuf[trlist[i]]); |
580 | comma(indbuf[trlist[i]]); |
557 | singlespace(indbuf[trlist[i]]); |
581 | singlespace(indbuf[trlist[i]]); |
- | 582 | translate(indbuf[trlist[i]]); |
|
- | 583 | } |
|
- | 584 | ||
- | 585 | entrycount=mentrycount; dicbuf=mdicbuf; |
|
- | 586 | memmove(entry,mentry,mentrycount*sizeof(entry[0])); |
|
- | 587 | unknown_type=unk_leave; /* used in translator_.c */ |
|
- | 588 | for(i=0;i<trcnt;i++) { |
|
558 | suffix_translate(indbuf[trlist[i]]); |
589 | suffix_translate(indbuf[trlist[i]]); |
559 | translate(indbuf[trlist[i]]); |
590 | translate(indbuf[trlist[i]]); |
560 | } |
591 | } |
- | 592 | ||
561 |
|
593 | /* taken contains all words already seen in the module index */ |
562 | taken[0]=0; takenlen=tweight=0; |
594 | taken[0]=0; takenlen=tweight=0; |
- | 595 | /* append words of title */ |
|
563 | ovlstrcpy(buf,indbuf[i_title]); towords(buf); |
596 | ovlstrcpy(buf,indbuf[i_title]); towords(buf); |
564 | for(p1=find_word_start(buf);*p1; |
- | |
565 | p1=find_word_start(p2)) { |
- | |
566 | p2=find_word_end(p1); if(*p2) *p2++=0; |
- | |
567 |
|
597 | appenditem2(buf,lind,serial,4,module_language); |
568 | } |
598 | |
569 |
|
599 | /* append words of every other information except level */ |
570 | snprintf(buf,sizeof(buf),"%s %s %s %s %s %s %s %s %s %s %s %s %s %s %s", |
600 | snprintf(buf,sizeof(buf),"%s %s %s %s %s %s %s %s %s %s %s %s %s %s %s", |
571 | indbuf[i_description],indbuf[i_keywords], |
601 | indbuf[i_description],indbuf[i_keywords], |
572 | indbuf[i_keywords_ca],indbuf[i_keywords_en],indbuf[i_keywords_fr], |
602 | indbuf[i_keywords_ca],indbuf[i_keywords_en],indbuf[i_keywords_fr], |
573 | indbuf[i_keywords_it],indbuf[i_keywords_nl], |
603 | indbuf[i_keywords_it],indbuf[i_keywords_nl], |
574 | indbuf[i_title_ca],indbuf[i_title_en],indbuf[i_title_fr], |
604 | indbuf[i_title_ca],indbuf[i_title_en],indbuf[i_title_fr], |
575 | indbuf[i_title_it],indbuf[i_title_nl], |
605 | indbuf[i_title_it],indbuf[i_title_nl], |
576 | indbuf[i_domain],indbuf[i_require],indbuf[i_author]); |
606 | indbuf[i_domain],indbuf[i_require],indbuf[i_author]); |
577 | towords(buf); |
607 | towords(buf); |
578 | for(p1=find_word_start(buf);*p1; |
- | |
579 | p1=find_word_start(p2)) { |
- | |
580 | p2=find_word_end(p1); if(*p2) *p2++=0; |
- | |
581 |
|
608 | appenditem2(buf,lind,serial,4,module_language); |
582 | } |
609 | |
583 | /* this time the dictionary is the group dictionary sys/wgrp/ |
610 | /* this time the dictionary is the group dictionary sys/wgrp/wgrp |
- | 611 | with a g (groupdic), not an m (maindic) . see below main, suffix, group. |
|
584 | and delete unknown ?? and translate */ |
612 | and delete unknown ?? and translate */ |
585 | entrycount=gentrycount; dicbuf=gdicbuf; |
613 | entrycount=gentrycount; dicbuf=gdicbuf; |
586 | memmove(entry,gentry,gentrycount*sizeof(entry[0])); |
614 | memmove(entry,gentry,gentrycount*sizeof(entry[0])); |
- | 615 | ||
- | 616 | /* append words (?) of every other information except level */ |
|
- | 617 | ovlstrcpy(buf,indbuf[i_title]); |
|
587 | unknown_type=unk_delete; |
618 | unknown_type=unk_delete; |
- | 619 | translate(buf); |
|
- | 620 | appenditem1(buf,lind,serial,2,module_language); |
|
- | 621 | ||
588 |
|
622 | /* append words (?) of information of description except level */ |
589 |
|
623 | snprintf(buf,sizeof(buf),"%s", indbuf[i_description]); |
590 |
|
624 | unknown_type=unk_delete; |
591 | p1=find_word_start(p2)) { |
- | |
592 |
|
625 | translate(buf); |
593 | if(p2!=NULL) *p2++=0; else p2=p1+strlen(p1); |
- | |
594 | if(strlen(p1)<=0) continue; |
- | |
595 |
|
626 | appenditem1(buf,lind,serial,4,module_language); |
596 | } |
627 | |
597 |
|
628 | /* append words (or group of words) of keywords and domain level */ |
598 | snprintf(buf,sizeof(buf)," |
629 | snprintf(buf,sizeof(buf),"%s, %s, %s, %s, %s, %s, %s", |
599 | indbuf[ |
630 | indbuf[i_domain],indbuf[i_keywords], |
600 | indbuf[i_keywords_ca], indbuf[i_keywords_en],indbuf[i_keywords_fr], |
631 | indbuf[i_keywords_ca], indbuf[i_keywords_en],indbuf[i_keywords_fr], |
601 | indbuf[i_keywords_it], indbuf[i_keywords_nl] |
632 | indbuf[i_keywords_it], indbuf[i_keywords_nl]); |
602 |
|
633 | unknown_type=unk_leave; |
603 | translate(buf); |
634 | translate(buf); |
604 | for(p1=find_word_start(buf); *p1; |
- | |
605 | p1=find_word_start(p2)) { |
- | |
606 | p2=strchr(p1,','); |
- | |
607 | if(p2!=NULL) *p2++=0; else p2=p1+strlen(p1); |
- | |
608 | if(strlen(p1)<=0) continue; |
- | |
609 |
|
635 | appenditem1(buf,lind,serial,2,module_language); |
610 | } |
636 | |
611 | /* append level information, with weight 2 */ |
637 | /* append level information, with weight 2 */ |
612 | snprintf(buf,sizeof(buf),"%s",indbuf[i_level]); |
638 | snprintf(buf,sizeof(buf),"%s",indbuf[i_level]); |
613 | ovlstrcpy(lbuf,"level"); |
639 | ovlstrcpy(lbuf,"level"); |
614 | for(p1=buf; *p1; p1++) if(!isalnum(*p1)) *p1=' '; |
640 | for(p1=buf; *p1; p1++) if(!isalnum(*p1)) *p1=' '; |
615 | q=buf+strlen(buf); |
641 | q=buf+strlen(buf); |
616 | for(p1=find_word_start(buf); (*p1) && (p1 < q) ; |
642 | for(p1=find_word_start(buf); (*p1) && (p1 < q) ; |
617 | p1=find_word_start(p2)) { |
643 | p1=find_word_start(p2)) { |
618 | p2=find_word_end(p1); |
644 | p2=find_word_end(p1); |
619 | if(p2!=NULL) *p2++=0; else p2=p1+strlen(p1); |
645 | if(p2!=NULL) *p2++=0; else p2=p1+strlen(p1); |
620 | if(!isalpha(*p1) || |
646 | if(!isalpha(*p1) || |
621 | (!isdigit(*(p1+1)) && *(p1+1)!=0) || |
647 | (!isdigit(*(p1+1)) && *(p1+1)!=0) || |
622 | (*(p1+1)!=0 && *(p1+2)!=0)) |
648 | (*(p1+1)!=0 && *(p1+2)!=0)) |
623 | continue; |
649 | continue; |
624 | *p1=tolower(*p1); |
650 | *p1=tolower(*p1); |
625 | ovlstrcpy(lbuf+strlen("level"),p1); |
651 | ovlstrcpy(lbuf+strlen("level"),p1); |
Line 631... | Line 657... | ||
631 | 657 | ||
632 | void modules(void) |
658 | void modules(void) |
633 | { |
659 | { |
634 | int i,j,k,d; |
660 | int i,j,k,d; |
635 | char namebuf[MAX_LINELEN+1]; |
661 | char namebuf[MAX_LINELEN+1]; |
636 | char mdic[MAX_LINELEN+1], sdic[MAX_LINELEN+1], gdic[MAX_LINELEN+1]; |
662 | char mdic[MAX_LINELEN+1], sdic[MAX_LINELEN+1], gdic[MAX_LINELEN+1], ddic[MAX_LINELEN+1]; |
637 | 663 | ||
638 | for(j=0;j<langcnt;j++) { |
664 | for(j=0;j<langcnt;j++) { |
639 | snprintf(namebuf,sizeof(namebuf),"%s/weight.%s",outdir,lang[j]); |
665 | snprintf(namebuf,sizeof(namebuf),"%s/weight.%s",outdir,lang[j]); |
640 | weightf=fopen(namebuf,"w"); |
666 | weightf=fopen(namebuf,"w"); |
641 | snprintf(mdic,sizeof(mdic),"%s/%s.%s",dicdir,maindic,lang[j]); |
667 | snprintf(mdic,sizeof(mdic),"%s/%s.%s",dicdir,maindic,lang[j]); |
642 | snprintf(sdic,sizeof(sdic),"%s/%s.%s",dicdir,suffixdic,lang[j]); |
668 | snprintf(sdic,sizeof(sdic),"%s/%s.%s",dicdir,suffixdic,lang[j]); |
643 | snprintf(gdic,sizeof(gdic),"%s/%s.%s",dicdir,groupdic,lang[j]); |
669 | snprintf(gdic,sizeof(gdic),"%s/%s.%s",dicdir,groupdic,lang[j]); |
- | 670 | snprintf(ddic,sizeof(ddic),"%s/%s.%s",dicdir,domaindic,lang[j]); |
|
644 | suffix_dic(sdic); prepare_dic(gdic); |
671 | suffix_dic(sdic); prepare_dic(gdic); |
645 | gdicbuf=dicbuf; gentrycount=entrycount; |
672 | gdicbuf=dicbuf; gentrycount=entrycount; |
646 | memmove(gentry,entry,gentrycount*sizeof(entry[0])); |
673 | memmove(gentry,entry,gentrycount*sizeof(entry[0])); |
647 | prepare_dic(mdic); |
674 | prepare_dic(mdic); |
648 | mdicbuf=dicbuf; mentrycount=entrycount; |
675 | mdicbuf=dicbuf; mentrycount=entrycount; |
649 | memmove(mentry,entry,mentrycount*sizeof(entry[0])); |
676 | memmove(mentry,entry,mentrycount*sizeof(entry[0])); |
- | 677 | prepare_dic(ddic); |
|
- | 678 | ddicbuf=dicbuf; dentrycount=entrycount; |
|
- | 679 | memmove(dentry,entry,dentrycount*sizeof(entry[0])); |
|
650 | unknown_type=unk_leave; translate(ignore[j]); |
680 | unknown_type=unk_leave; translate(ignore[j]); |
651 | for(i=0;i<modcnt;i++) { |
681 | for(i=0;i<modcnt;i++) { |
652 | if(mod[i].langcnt>0) { |
682 | if(mod[i].langcnt>0) { |
653 | for(d=k=0;k<mod[i].langcnt;k++) |
683 | for(d=k=0;k<mod[i].langcnt;k++) |
654 | if(mod[i].langs[k]<mod[i].langs[d]) d=k; |
684 | if(mod[i].langs[k]<mod[i].langs[d]) d=k; |
Line 663... | Line 693... | ||
663 | } |
693 | } |
664 | } |
694 | } |
665 | if(mentrycount>0) free(mdicbuf); |
695 | if(mentrycount>0) free(mdicbuf); |
666 | if(gentrycount>0) free(gdicbuf); |
696 | if(gentrycount>0) free(gdicbuf); |
667 | if(suffixcnt>0) free(sufbuf); |
697 | if(suffixcnt>0) free(sufbuf); |
- | 698 | if(dentrycount>0) free(ddicbuf); |
|
668 | if(weightf) fclose(weightf); |
699 | if(weightf) fclose(weightf); |
669 | } |
700 | } |
670 | } |
701 | } |
671 | 702 | ||
- | 703 | /* FIXME ? differences with appenditem - use fprintf instead of snprintf */ |
|
672 | void sappenditem(char *word, int lind, int serial, int weight) |
704 | void sappenditem(char *word, int lind, int serial, int weight) |
673 | { |
705 | { |
674 | int ll; |
706 | int ll; |
675 | char *p; |
707 | char *p; |
676 | 708 | ||
677 | if(!isalnum(*word) || (ll=strlen(word))<2 || |
709 | if(!isalnum(*word) || (ll=strlen(word))<2 || |
678 | wordchr(taken,word)!=NULL || |
710 | wordchr(taken,word)!=NULL || |
679 | wordchr(ignore[lind],word)!=NULL || |
711 | wordchr(ignore[lind],word)!=NULL || |
680 | takenlen>=MAX_LINELEN-ll-16) |
712 | takenlen>=MAX_LINELEN-ll-16) |
681 | return; |
713 | return; |
Line 693... | Line 725... | ||
693 | unsigned char trlist[]={ |
725 | unsigned char trlist[]={ |
694 | s_title,s_description,s_domain,s_keywords,s_remark |
726 | s_title,s_description,s_domain,s_keywords,s_remark |
695 | }; |
727 | }; |
696 | #define trcnt (sizeof(trlist)/sizeof(trlist[0])) |
728 | #define trcnt (sizeof(trlist)/sizeof(trlist[0])) |
697 | char *p1, *p2, buf[MAX_LINELEN+1]; |
729 | char *p1, *p2, buf[MAX_LINELEN+1]; |
698 | 730 | ||
699 | if(sheet_index(serial)) return; |
731 | if(sheet_index(serial)) return; |
700 | fprintf(listf,"%s\n",mod[serial].name+3); |
732 | fprintf(listf,"%s\n",mod[serial].name+3); |
701 | fprintf(titf,"%d:%s\n",serial,sindbuf[s_title]); |
733 | fprintf(titf,"%d:%s\n",serial,sindbuf[s_title]); |
702 | fprintf(descf,"%d:%s\n",serial,sindbuf[s_description]); |
734 | fprintf(descf,"%d:%s\n",serial,sindbuf[s_description]); |
703 | entrycount= |
735 | entrycount=dentrycount; dicbuf=ddicbuf; |
704 | memmove(entry, |
736 | memmove(entry,dentry,dentrycount*sizeof(entry[0])); |
705 | unknown_type=unk_leave; |
737 | unknown_type=unk_leave; |
706 | for(i=0;i<trcnt;i++) { |
738 | for(i=0;i<trcnt;i++) { |
707 | detag(sindbuf[trlist[i]]); |
739 | detag(sindbuf[trlist[i]]); |
708 | deaccent(sindbuf[trlist[i]]); |
740 | deaccent(sindbuf[trlist[i]]); |
709 | comma(sindbuf[trlist[i]]); |
741 | comma(sindbuf[trlist[i]]); |
710 | singlespace(sindbuf[trlist[i]]); |
742 | singlespace(sindbuf[trlist[i]]); |
- | 743 | translate(sindbuf[trlist[i]]); |
|
- | 744 | } |
|
- | 745 | ||
- | 746 | entrycount=mentrycount; dicbuf=mdicbuf; |
|
- | 747 | memmove(entry,mentry,mentrycount*sizeof(entry[0])); |
|
- | 748 | unknown_type=unk_leave; |
|
- | 749 | for(i=0;i<trcnt;i++) { |
|
711 | suffix_translate(sindbuf[trlist[i]]); |
750 | suffix_translate(sindbuf[trlist[i]]); |
712 | translate(sindbuf[trlist[i]]); |
751 | translate(sindbuf[trlist[i]]); |
713 | } |
752 | } |
714 | taken[0]=0; takenlen=tweight=0; |
753 | taken[0]=0; takenlen=tweight=0; |
715 | ovlstrcpy(buf,sindbuf[s_title]); towords(buf); |
754 | ovlstrcpy(buf,sindbuf[s_title]); towords(buf); |
716 | for(p1=find_word_start(buf);*p1; |
755 | for(p1=find_word_start(buf);*p1; |
717 | p1=find_word_start(p2)) { |
756 | p1=find_word_start(p2)) { |
718 | p2=find_word_end(p1); if(*p2) *p2++=0; |
757 | p2=find_word_end(p1); if(*p2) *p2++=0; |
719 | sappenditem(p1,lind,serial,4); |
758 | sappenditem(p1,lind,serial,4); |
720 | } |
759 | } |
721 | snprintf(buf,sizeof(buf),"%s %s %s %s", |
760 | snprintf(buf,sizeof(buf),"%s %s %s %s", |
722 | sindbuf[s_description],sindbuf[s_keywords], |
761 | sindbuf[s_description],sindbuf[s_keywords], |
723 | sindbuf[s_domain],sindbuf[s_remark]); |
762 | sindbuf[s_domain],sindbuf[s_remark]); |
Line 742... | Line 781... | ||
742 | sindbuf[s_description],sindbuf[s_keywords], |
781 | sindbuf[s_description],sindbuf[s_keywords], |
743 | sindbuf[s_domain],sindbuf[s_remark]); |
782 | sindbuf[s_domain],sindbuf[s_remark]); |
744 | translate(buf); |
783 | translate(buf); |
745 | for(p1=find_word_start(buf); *p1; |
784 | for(p1=find_word_start(buf); *p1; |
746 | p1=find_word_start(p2)) { |
785 | p1=find_word_start(p2)) { |
747 | p2=strchr(p1,','); |
786 | p2=strchr(p1,','); |
748 | if(p2!=NULL) *p2++=0; else p2=p1+strlen(p1); |
787 | if(p2!=NULL) *p2++=0; else p2=p1+strlen(p1); |
749 | if(strlen(p1)<=0) continue; |
788 | if(strlen(p1)<=0) continue; |
750 | sappenditem(p1,lind,serial,2); |
789 | sappenditem(p1,lind,serial,2); |
751 | } |
790 | } |
752 | fprintf(weightf,"%d:%d\n",serial,tweight); |
791 | fprintf(weightf,"%d:%d\n",serial,tweight); |
753 | } |
792 | } |
- | 793 | ||
754 | 794 | ||
755 | void sheets(void) |
795 | void sheets(void) |
756 | { |
796 | { |
757 | int i,j; |
797 | int i,j; |
758 | char mdic[MAX_LINELEN+1], sdic[MAX_LINELEN+1], gdic[MAX_LINELEN+1]; |
798 | char mdic[MAX_LINELEN+1], sdic[MAX_LINELEN+1], gdic[MAX_LINELEN+1]; |