Rev 6973 | Rev 8100 | Go to most recent revision | Show entire file | Ignore whitespace | Details | Blame | Last modification | View Log | RSS feed
Rev 6973 | Rev 7915 | ||
---|---|---|---|
Line 14... | Line 14... | ||
14 | * along with this program; if not, write to the Free Software |
14 | * along with this program; if not, write to the Free Software |
15 | * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. |
15 | * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. |
16 | */ |
16 | */ |
17 | 17 | ||
18 | /* This is an internal program, |
18 | /* This is an internal program, |
19 | * used to index modules for search engine. |
19 | * used to index modules for search engine. |
20 | */ |
20 | */ |
21 | 21 | ||
22 | #include "../wims.h" |
22 | #include "../wims.h" |
23 | #include "../Lib/basicstr.c" |
23 | #include "../Lib/basicstr.c" |
24 | 24 | ||
Line 88... | Line 88... | ||
88 | 88 | ||
89 | char *acctab="çéèêëúùûüáàâäãóòôöõíìïîñýÇÉÈÊËÚÙÛÜÁÀÂÃÄÓÒÔÖÕÍÌÏÎÑÝ", |
89 | char *acctab="çéèêëúùûüáàâäãóòôöõíìïîñýÇÉÈÊËÚÙÛÜÁÀÂÃÄÓÒÔÖÕÍÌÏÎÑÝ", |
90 | *deatab="ceeeeuuuuaaaaaoooooiiiinyCEEEEUUUUAAAAAOOOOOIIIINY"; |
90 | *deatab="ceeeeuuuuaaaaaoooooiiiinyCEEEEUUUUAAAAAOOOOOIIIINY"; |
91 | 91 | ||
92 | /* fold known accented letters to unaccented, other strange characters to space |
92 | /* fold known accented letters to unaccented, other strange characters to space |
93 | * apostrophe is among the exceptions to be kept (important for multi-word expressions) |
93 | * apostrophe is among the exceptions to be kept (important for multi-word expressions) |
94 | */ |
94 | */ |
95 | void deaccent(char *p) |
95 | void deaccent(char *p) |
96 | { |
96 | { |
97 | char *sp; |
97 | char *sp; |
98 | char *v; |
98 | char *v; |
Line 542... | Line 542... | ||
542 | char *p1, *p2, *pp, *q, buf[MAX_LINELEN+1], lbuf[16]; |
542 | char *p1, *p2, *pp, *q, buf[MAX_LINELEN+1], lbuf[16]; |
543 | FILE *f; |
543 | FILE *f; |
544 | 544 | ||
545 | if(module_index(name)) return; |
545 | if(module_index(name)) return; |
546 | towords(indbuf[i_category]); |
546 | towords(indbuf[i_category]); |
547 | /* list the categories (among A=all,X=eXercise,O,D,...) corresponding |
547 | /* list the categories (among A=all,X=eXercise,O,D,...) corresponding |
548 | * to this module |
548 | * to this module |
549 | */ |
549 | */ |
550 | for(i=catcnt=0;i<catno && catcnt<16;i++) { |
550 | for(i=catcnt=0;i<catno && catcnt<16;i++) { |
551 | if(wordchr(indbuf[i_category],cat[i].name)!=NULL) |
551 | if(wordchr(indbuf[i_category],cat[i].name)!=NULL) |
552 | categories[catcnt++]=cat[i].typ; |
552 | categories[catcnt++]=cat[i].typ; |
Line 577... | Line 577... | ||
577 | if(strcmp(module_language,lang[lind])==0) |
577 | if(strcmp(module_language,lang[lind])==0) |
578 | fprintf(robotf,"%s ,%s,%s,%s,%s\n",name,module_language,name, |
578 | fprintf(robotf,"%s ,%s,%s,%s,%s\n",name,module_language,name, |
579 | indbuf[i_title], buf); |
579 | indbuf[i_title], buf); |
580 | 580 | ||
581 | /* Normalize the information of trlist, using dictionary |
581 | /* Normalize the information of trlist, using dictionary |
582 | * -- bases/sys/domain.xx without suffix translation (--> english version) |
582 | * -- bases/sys/domain.xx without suffix translation (--> english version) |
583 | */ |
583 | */ |
584 | entrycount=dentrycount; dicbuf=ddicbuf; |
584 | entrycount=dentrycount; dicbuf=ddicbuf; |
585 | memmove(entry,dentry,dentrycount*sizeof(entry[0])); |
585 | memmove(entry,dentry,dentrycount*sizeof(entry[0])); |
586 | unknown_type=unk_leave; |
586 | unknown_type=unk_leave; |
587 | for(i=0;i<trcnt;i++) { |
587 | for(i=0;i<trcnt;i++) { |
Line 590... | Line 590... | ||
590 | comma(indbuf[trlist[i]]); |
590 | comma(indbuf[trlist[i]]); |
591 | singlespace(indbuf[trlist[i]]); |
591 | singlespace(indbuf[trlist[i]]); |
592 | translate(indbuf[trlist[i]]); |
592 | translate(indbuf[trlist[i]]); |
593 | } |
593 | } |
594 | /* Normalize the information, using dictionary |
594 | /* Normalize the information, using dictionary |
595 | * bases/sys/words.xx with suffix translation |
595 | * bases/sys/words.xx with suffix translation |
596 | */ |
596 | */ |
597 | entrycount=mentrycount; dicbuf=mdicbuf; |
597 | entrycount=mentrycount; dicbuf=mdicbuf; |
598 | memmove(entry,mentry,mentrycount*sizeof(entry[0])); |
598 | memmove(entry,mentry,mentrycount*sizeof(entry[0])); |
599 | unknown_type=unk_leave;/* used in translator_.c */ |
599 | unknown_type=unk_leave;/* used in translator_.c */ |
600 | for(i=0;i<trcnt;i++) { |
600 | for(i=0;i<trcnt;i++) { |
Line 619... | Line 619... | ||
619 | towords(buf); |
619 | towords(buf); |
620 | appenditem2(buf,lind,serial,2,module_language); |
620 | appenditem2(buf,lind,serial,2,module_language); |
621 | 621 | ||
622 | /* this time the dictionary is the group dictionary sys/wgrp/wgrp |
622 | /* this time the dictionary is the group dictionary sys/wgrp/wgrp |
623 | * with a g (groupdic), not an m (maindic) . see below main, suffix, group. |
623 | * with a g (groupdic), not an m (maindic) . see below main, suffix, group. |
624 | * and delete unknown ?? and translate |
624 | * and delete unknown ?? and translate |
625 | */ |
625 | */ |
626 | entrycount=gentrycount; dicbuf=gdicbuf; |
626 | entrycount=gentrycount; dicbuf=gdicbuf; |
627 | memmove(entry,gentry,gentrycount*sizeof(entry[0])); |
627 | memmove(entry,gentry,gentrycount*sizeof(entry[0])); |
628 | 628 | ||
629 | /* append words of every title information */ |
629 | /* append words of every title information */ |
Line 744... | Line 744... | ||
744 | if(sheet_index(serial)) return; |
744 | if(sheet_index(serial)) return; |
745 | fprintf(listf,"%s\n",mod[serial].name+3); |
745 | fprintf(listf,"%s\n",mod[serial].name+3); |
746 | fprintf(titf,"%d:%s\n",serial,sindbuf[s_title]); |
746 | fprintf(titf,"%d:%s\n",serial,sindbuf[s_title]); |
747 | fprintf(descf,"%d:%s\n",serial,sindbuf[s_description]); |
747 | fprintf(descf,"%d:%s\n",serial,sindbuf[s_description]); |
748 | fprintf(remf,"%d:%s\n",serial,sindbuf[s_information]); |
748 | fprintf(remf,"%d:%s\n",serial,sindbuf[s_information]); |
749 | 749 | ||
750 | entrycount=dentrycount; dicbuf=ddicbuf; |
750 | entrycount=dentrycount; dicbuf=ddicbuf; |
751 | memmove(entry,dentry,dentrycount*sizeof(entry[0])); |
751 | memmove(entry,dentry,dentrycount*sizeof(entry[0])); |
752 | unknown_type=unk_leave; |
752 | unknown_type=unk_leave; |
753 | for(i=0;i<trcnt;i++) { |
753 | for(i=0;i<trcnt;i++) { |
754 | detag(sindbuf[trlist[i]]); |
754 | detag(sindbuf[trlist[i]]); |
755 | deaccent(sindbuf[trlist[i]]); |
755 | deaccent(sindbuf[trlist[i]]); |
756 | comma(sindbuf[trlist[i]]); |
756 | comma(sindbuf[trlist[i]]); |
757 | singlespace(sindbuf[trlist[i]]); |
757 | singlespace(sindbuf[trlist[i]]); |
758 | translate(sindbuf[trlist[i]]); |
758 | translate(sindbuf[trlist[i]]); |
759 | } |
759 | } |
760 | 760 | ||
761 | entrycount=mentrycount; dicbuf=mdicbuf; |
761 | entrycount=mentrycount; dicbuf=mdicbuf; |
762 | memmove(entry,mentry,mentrycount*sizeof(entry[0])); |
762 | memmove(entry,mentry,mentrycount*sizeof(entry[0])); |
763 | unknown_type=unk_leave; |
763 | unknown_type=unk_leave; |
764 | for(i=0;i<trcnt;i++) { |
764 | for(i=0;i<trcnt;i++) { |
765 | suffix_translate(sindbuf[trlist[i]]); |
765 | suffix_translate(sindbuf[trlist[i]]); |
766 | translate(sindbuf[trlist[i]]); |
766 | translate(sindbuf[trlist[i]]); |
767 | } |
767 | } |
768 | taken[0]=0; takenlen=tweight=0; |
768 | taken[0]=0; takenlen=tweight=0; |
769 | ovlstrcpy(buf,sindbuf[s_title]); towords(buf); |
769 | ovlstrcpy(buf,sindbuf[s_title]); towords(buf); |
770 | for(p1=find_word_start(buf);*p1; |
770 | for(p1=find_word_start(buf);*p1; |
771 | p1=find_word_start(p2)) { |
771 | p1=find_word_start(p2)) { |
772 | p2=find_word_end(p1); if(*p2) *p2++=0; |
772 | p2=find_word_end(p1); if(*p2) *p2++=0; |
773 | sappenditem(p1,lind,serial,4); |
773 | sappenditem(p1,lind,serial,4); |
774 | } |
774 | } |
775 | snprintf(buf,sizeof(buf),"%s %s %s %s", |
775 | snprintf(buf,sizeof(buf),"%s %s %s %s", |
776 | sindbuf[s_description],sindbuf[s_keywords], |
776 | sindbuf[s_description],sindbuf[s_keywords], |
777 | sindbuf[s_domain],sindbuf[s_information]); |
777 | sindbuf[s_domain],sindbuf[s_information]); |
778 | towords(buf); |
778 | towords(buf); |
779 | for(p1=find_word_start(buf);*p1; |
779 | for(p1=find_word_start(buf);*p1; |
780 | p1=find_word_start(p2)) { |
780 | p1=find_word_start(p2)) { |
781 | p2=find_word_end(p1); if(*p2) *p2++=0; |
781 | p2=find_word_end(p1); if(*p2) *p2++=0; |
782 | sappenditem(p1,lind,serial,2); |
782 | sappenditem(p1,lind,serial,2); |
783 | } |
783 | } |
784 | entrycount=gentrycount; dicbuf=gdicbuf; |
784 | entrycount=gentrycount; dicbuf=gdicbuf; |
785 | memmove(entry,gentry,gentrycount*sizeof(entry[0])); |
785 | memmove(entry,gentry,gentrycount*sizeof(entry[0])); |
786 | unknown_type=unk_delete; |
786 | unknown_type=unk_delete; |
787 | ovlstrcpy(buf,sindbuf[s_title]); translate(buf); |
787 | ovlstrcpy(buf,sindbuf[s_title]); translate(buf); |
788 | for(p1=find_word_start(buf); *p1; |
788 | for(p1=find_word_start(buf); *p1; |
789 | p1=find_word_start(p2)) { |
789 | p1=find_word_start(p2)) { |
790 | p2=strchr(p1,','); |
790 | p2=strchr(p1,','); |
791 | if(p2!=NULL) *p2++=0; else p2=p1+strlen(p1); |
791 | if(p2!=NULL) *p2++=0; else p2=p1+strlen(p1); |
792 | if(strlen(p1)<=0) continue; |
792 | if(strlen(p1)<=0) continue; |
793 | sappenditem(p1,lind,serial,4); |
793 | sappenditem(p1,lind,serial,4); |
794 | } |
794 | } |
795 | snprintf(buf,sizeof(buf),"%s, %s, %s, %s", |
795 | snprintf(buf,sizeof(buf),"%s, %s, %s, %s", |
Line 809... | Line 809... | ||
809 | void sheets(void) |
809 | void sheets(void) |
810 | { |
810 | { |
811 | int i,j; |
811 | int i,j; |
812 | char mdic[MAX_LINELEN+1], sdic[MAX_LINELEN+1], gdic[MAX_LINELEN+1], ddic[MAX_LINELEN+1]; |
812 | char mdic[MAX_LINELEN+1], sdic[MAX_LINELEN+1], gdic[MAX_LINELEN+1], ddic[MAX_LINELEN+1]; |
813 | char buf[MAX_LINELEN+1]; |
813 | char buf[MAX_LINELEN+1]; |
814 | 814 | ||
815 | for(j=0;j<langcnt;j++) { |
815 | for(j=0;j<langcnt;j++) { |
816 | snprintf(buf,sizeof(buf),"%s/index/title.%s",sheetdir,lang[j]); |
816 | snprintf(buf,sizeof(buf),"%s/index/title.%s",sheetdir,lang[j]); |
817 | titf=fopen(buf,"w"); |
817 | titf=fopen(buf,"w"); |
818 | snprintf(buf,sizeof(buf),"%s/index/description.%s",sheetdir,lang[j]); |
818 | snprintf(buf,sizeof(buf),"%s/index/description.%s",sheetdir,lang[j]); |
819 | descf=fopen(buf,"w"); |
819 | descf=fopen(buf,"w"); |