Rev 15379 | Rev 15393 | Go to most recent revision | Show entire file | Ignore whitespace | Details | Blame | Last modification | View Log | RSS feed
Rev 15379 | Rev 15380 | ||
---|---|---|---|
Line 550... | Line 550... | ||
550 | towords(indbuf[i_category]); |
550 | towords(indbuf[i_category]); |
551 | /* list the categories (among A=all,X=eXercise,O,D,...) corresponding |
551 | /* list the categories (among A=all,X=eXercise,O,D,...) corresponding |
552 | * to this module |
552 | * to this module |
553 | */ |
553 | */ |
554 | for(i=catcnt=0;i<catno && catcnt<16;i++) { |
554 | for(i=catcnt=0;i<catno && catcnt<16;i++) { |
555 | if(wordchr2(indbuf[i_category],cat[i].name)!=NULL) |
555 | if(wordchr2(indbuf[i_category],cat[i].name)!=NULL) |
556 | categories[catcnt++]=cat[i].typ; |
556 | categories[catcnt++]=cat[i].typ; |
557 | } |
557 | } |
558 | if(catcnt==0) return; |
558 | if(catcnt==0) return; |
559 | if(categories[0]!=cat[0].typ) |
559 | if(categories[0]!=cat[0].typ) |
560 | categories[catcnt++]=cat[0].typ; |
560 | categories[catcnt++]=cat[0].typ; |
561 | /* write module's name in the category.language files, for instance lists/X.fr |
561 | /* write module's name in the category.language files, for instance lists/X.fr |
Line 583... | Line 583... | ||
583 | indbuf[i_title], buf); |
583 | indbuf[i_title], buf); |
584 | 584 | ||
585 | /* Normalize the information of trlist, using dictionary |
585 | /* Normalize the information of trlist, using dictionary |
586 | * -- bases/sys/domain.xx without suffix translation (--> english version) |
586 | * -- bases/sys/domain.xx without suffix translation (--> english version) |
587 | */ |
587 | */ |
588 |
|
588 | entrycount=dentrycount; dicbuf=ddicbuf; |
589 |
|
589 | memmove(entry,dentry,dentrycount*sizeof(entry[0])); |
590 |
|
590 | unknown_type=unk_leave; |
591 |
|
591 | for(i=0;i<trcnt;i++) { |
592 |
|
592 | detag(indbuf[trlist[i]]); |
593 |
|
593 | deaccent2(indbuf[trlist[i]]); |
594 |
|
594 | comma(indbuf[trlist[i]]); |
595 |
|
595 | singlespace2(indbuf[trlist[i]]); |
596 |
|
596 | translate(indbuf[trlist[i]]); |
597 |
|
597 | } |
598 | /* Normalize the information, using dictionary |
598 | /* Normalize the information, using dictionary |
599 | * bases/sys/words.xx with suffix translation |
599 | * bases/sys/words.xx with suffix translation |
600 | */ |
600 | */ |
601 |
|
601 | entrycount=mentrycount; dicbuf=mdicbuf; |
602 |
|
602 | memmove(entry,mentry,mentrycount*sizeof(entry[0])); |
603 |
|
603 | unknown_type=unk_leave;/* used in translator_.c */ |
604 |
|
604 | for(i=0;i<trcnt;i++) { |
605 |
|
605 | suffix_translate(indbuf[trlist[i]]); |
606 |
|
606 | translate(indbuf[trlist[i]]); |
607 |
|
607 | } |
608 | 608 | ||
609 | /* taken contains all words already seen in the module index */ |
609 | /* taken contains all words already seen in the module index */ |
610 |
|
610 | taken[0]=0; takenlen=tweight=0; |
611 | /* append words of title */ |
611 | /* append words of title */ |
612 |
|
612 | ovlstrcpy(buf,indbuf[i_title]); towords(buf); |
613 |
|
613 | appenditem2(buf,lind,serial,4,module_language); |
614 | 614 | ||
615 | /* extract words of every other information except level */ |
615 | /* extract words of every other information except level */ |
616 |
|
616 | snprintf(buf,sizeof(buf),"%s %s %s %s %s %s %s %s %s %s %s %s %s %s %s", |
617 |
|
617 | indbuf[i_description],indbuf[i_keywords], |
618 |
|
618 | indbuf[i_keywords_ca],indbuf[i_keywords_en],indbuf[i_keywords_fr], |
619 |
|
619 | indbuf[i_keywords_it],indbuf[i_keywords_nl], |
620 |
|
620 | indbuf[i_title_ca],indbuf[i_title_en],indbuf[i_title_fr], |
621 |
|
621 | indbuf[i_title_it],indbuf[i_title_nl], |
622 |
|
622 | indbuf[i_domain],indbuf[i_require],indbuf[i_author]); |
623 |
|
623 | towords(buf); |
624 |
|
624 | appenditem2(buf,lind,serial,2,module_language); |
625 | 625 | ||
626 | /* this time the dictionary is the group dictionary sys/wgrp/wgrp |
626 | /* this time the dictionary is the group dictionary sys/wgrp/wgrp |
627 | * with a g (groupdic), not an m (maindic) . see below main, suffix, group. |
627 | * with a g (groupdic), not an m (maindic) . see below main, suffix, group. |
628 | * and delete unknown ?? and translate |
628 | * and delete unknown ?? and translate |
629 | */ |
629 | */ |
630 | entrycount=gentrycount; dicbuf=gdicbuf; |
630 | entrycount=gentrycount; dicbuf=gdicbuf; |
631 | memmove(entry,gentry,gentrycount*sizeof(entry[0])); |
631 | memmove(entry,gentry,gentrycount*sizeof(entry[0])); |
632 | 632 | ||
633 | /* |
633 | /* append words of every title information */ |
634 | ovlstrcpy(buf,indbuf[i_title]); |
634 | ovlstrcpy(buf,indbuf[i_title]); |
635 | unknown_type=unk_delete; |
635 | unknown_type=unk_delete; |
636 | translate(buf); |
636 | translate(buf); |
637 | appenditem1(buf,lind,serial,2,module_language); |
637 | appenditem1(buf,lind,serial,2,module_language); |
638 | 638 | ||
639 | /* |
639 | /* append words of information of description except level */ |
640 | snprintf(buf,sizeof(buf),"%s", indbuf[i_description]); |
640 | snprintf(buf,sizeof(buf),"%s", indbuf[i_description]); |
641 | unknown_type=unk_delete; |
641 | unknown_type=unk_delete; |
642 | translate(buf); |
642 | translate(buf); |
643 | appenditem1(buf,lind,serial,4,module_language); |
643 | appenditem1(buf,lind,serial,4,module_language); |
644 | 644 | ||
645 | /* |
645 | /* append words (or group of words) of keywords and domain */ |
646 | snprintf(buf,sizeof(buf),"%s, %s, %s, %s, %s, %s, %s", |
646 | snprintf(buf,sizeof(buf),"%s, %s, %s, %s, %s, %s, %s", |
647 |
|
647 | indbuf[i_domain],indbuf[i_keywords], |
648 |
|
648 | indbuf[i_keywords_ca], indbuf[i_keywords_en],indbuf[i_keywords_fr], |
649 |
|
649 | indbuf[i_keywords_it], indbuf[i_keywords_nl]); |
650 | unknown_type=unk_leave; |
650 | unknown_type=unk_leave; |
651 | translate(buf); |
651 | translate(buf); |
652 | appenditem1(buf,lind,serial,2,module_language); |
652 | appenditem1(buf,lind,serial,2,module_language); |
653 | 653 | ||
654 | /* |
654 | /* append level information, with weight 2 */ |
655 | snprintf(buf,sizeof(buf),"%s",indbuf[i_level]); |
655 | snprintf(buf,sizeof(buf),"%s",indbuf[i_level]); |
656 | ovlstrcpy(lbuf,"level"); |
656 | ovlstrcpy(lbuf,"level"); |
657 | for(p1=buf; *p1; p1++) if(!isalnum(*p1)) *p1=' '; |
657 | for(p1=buf; *p1; p1++) if(!isalnum(*p1)) *p1=' '; |
658 | q=buf+strlen(buf); |
658 | q=buf+strlen(buf); |
659 | for(p1=find_word_start(buf); (*p1) && (p1 < q) ; |
659 | for(p1=find_word_start(buf); (*p1) && (p1 < q) ; p1=find_word_start(p2)) { |
660 | p1=find_word_start(p2)) { |
- | |
661 | p2=find_word_end(p1); |
660 | p2=find_word_end(p1); |
662 | if(p2!=NULL) *p2++=0; else p2=p1+strlen(p1); |
661 | if(p2!=NULL) *p2++=0; else p2=p1+strlen(p1); |
663 | if(strncmp(p1, "Lang" , p2-p1) && |
662 | if(strncmp(p1, "Lang" , p2-p1) && |
664 | (!isalpha(*p1) || |
663 | (!isalpha(*p1) || |
665 | (!isdigit(*(p1+1)) && *(p1+1)!=0) || |
664 | (!isdigit(*(p1+1)) && *(p1+1)!=0) || |
Line 735... | Line 734... | ||
735 | taken[takenlen++]=' ';taken[takenlen++]=' '; |
734 | taken[takenlen++]=' ';taken[takenlen++]=' '; |
736 | ovlstrcpy(taken+takenlen,word); |
735 | ovlstrcpy(taken+takenlen,word); |
737 | takenlen+=ll; tweight+=weight; |
736 | takenlen+=ll; tweight+=weight; |
738 | fprintf(indf,"%s:%d?%d\n",word,serial,weight); |
737 | fprintf(indf,"%s:%d?%d\n",word,serial,weight); |
739 | } |
738 | } |
740 | - | ||
- | 739 | /* onesg / onemodule are similar */ |
|
741 | void onesg(int serial, int lind, int index(int)) |
740 | void onesg(int serial, int lind, int index(int)) |
742 | { |
741 | { |
743 | int i; |
742 | int i; |
744 | unsigned char trlist[]={ |
743 | unsigned char trlist[]={ |
745 | s_title,s_description,s_domain,s_keywords,s_information |
744 | s_title,s_description,s_domain,s_keywords,s_information |
746 | }; |
745 | }; |
747 | int |
746 | int trcnt=sizeof(trlist)/sizeof(trlist[0]); |
748 | char *p1, *p2, buf[ |
747 | char *p1, *p2, *q, buf[MAX_LINELEN+1], lbuf[16]; |
749 | 748 | ||
750 | if(index(serial)) return; |
749 | if(index(serial)) return; |
751 | fprintf(listf,"%s\n",mod[serial].name); |
750 | fprintf(listf,"%s\n",mod[serial].name); |
752 | fprintf(titf,"%d:%s\n",serial,gsindbuf[s_title]); |
751 | fprintf(titf,"%d:%s\n",serial,gsindbuf[s_title]); |
753 | fprintf(descf,"%d:%s\n",serial,gsindbuf[s_description]); |
752 | fprintf(descf,"%d:%s\n",serial,gsindbuf[s_description]); |
754 | fprintf(remf,"%d:%s\n",serial,gsindbuf[s_information]); |
753 | fprintf(remf,"%d:%s\n",serial,gsindbuf[s_information]); |
755 | fprintf(titlef,"%s:%s\n",mod[serial].name,gsindbuf[s_title]); |
754 | fprintf(titlef,"%s:%s\n",mod[serial].name,gsindbuf[s_title]); |
756 | 755 | ||
- | 756 | /* Normalize the information of trlist, using dictionary |
|
- | 757 | * -- bases/sys/domain.xx without suffix translation (--> english version) |
|
- | 758 | */ |
|
757 | entrycount=dentrycount; dicbuf=ddicbuf; |
759 | entrycount=dentrycount; dicbuf=ddicbuf; |
758 | memmove(entry,dentry,dentrycount*sizeof(entry[0])); |
760 | memmove(entry,dentry,dentrycount*sizeof(entry[0])); |
759 | unknown_type=unk_leave; |
761 | unknown_type=unk_leave; |
760 | for(i=0;i< |
762 | for(i=0;i<trcnt;i++) { |
761 | detag(gsindbuf[trlist[i]]); |
763 | detag(gsindbuf[trlist[i]]); |
762 | deaccent2(gsindbuf[trlist[i]]); |
764 | deaccent2(gsindbuf[trlist[i]]); |
763 | comma(gsindbuf[trlist[i]]); |
765 | comma(gsindbuf[trlist[i]]); |
764 | singlespace2(gsindbuf[trlist[i]]); |
766 | singlespace2(gsindbuf[trlist[i]]); |
765 | translate(gsindbuf[trlist[i]]); |
767 | translate(gsindbuf[trlist[i]]); |
766 | } |
768 | } |
- | 769 | /* Normalize the information, using dictionary |
|
- | 770 | * bases/sys/words.xx with suffix translation |
|
767 | 771 | */ |
|
768 | entrycount=mentrycount; dicbuf=mdicbuf; |
772 | entrycount=mentrycount; dicbuf=mdicbuf; |
769 | memmove(entry,mentry,mentrycount*sizeof(entry[0])); |
773 | memmove(entry,mentry,mentrycount*sizeof(entry[0])); |
770 | unknown_type=unk_leave; |
774 | unknown_type=unk_leave;/* used in translator_.c */ |
771 | for(i=0;i< |
775 | for(i=0;i<trcnt;i++) { |
772 | suffix_translate(gsindbuf[trlist[i]]); |
776 | suffix_translate(gsindbuf[trlist[i]]); |
773 | translate(gsindbuf[trlist[i]]); |
777 | translate(gsindbuf[trlist[i]]); |
774 | } |
778 | } |
- | 779 | ||
- | 780 | /* taken contains all words already seen in the module index */ |
|
775 | taken[0]=0; takenlen=tweight=0; |
781 | taken[0]=0; takenlen=tweight=0; |
- | 782 | /* append words of title */ |
|
776 | ovlstrcpy(buf,gsindbuf[s_title]); towords(buf); |
783 | ovlstrcpy(buf,gsindbuf[s_title]); towords(buf); |
777 | for(p1=find_word_start(buf);*p1; |
784 | for(p1=find_word_start(buf);*p1; |
778 | p1=find_word_start(p2)) { |
785 | p1=find_word_start(p2)) { |
779 | p2=find_word_end(p1); if(*p2) *p2++=0; |
786 | p2=find_word_end(p1); if(*p2) *p2++=0; |
780 | sappenditem(p1,lind,serial,4); |
787 | sappenditem(p1,lind,serial,4); |
781 | } |
788 | } |
- | 789 | ||
- | 790 | /* extract words of every other information except level */ |
|
782 | snprintf(buf,sizeof(buf),"%s %s %s %s", |
791 | snprintf(buf,sizeof(buf),"%s %s %s %s", |
783 | gsindbuf[s_description],gsindbuf[s_keywords], |
792 | gsindbuf[s_description],gsindbuf[s_keywords], |
784 | gsindbuf[s_domain],gsindbuf[s_information]); |
793 | gsindbuf[s_domain],gsindbuf[s_information]); |
785 | towords(buf); |
794 | towords(buf); |
786 | for(p1=find_word_start(buf);*p1;p1=find_word_start(p2)) { |
795 | for(p1=find_word_start(buf);*p1;p1=find_word_start(p2)) { |
787 | p2=find_word_end(p1); if(*p2) *p2++=0; |
796 | p2=find_word_end(p1); if(*p2) *p2++=0; |
788 | sappenditem(p1,lind,serial,2); |
797 | sappenditem(p1,lind,serial,2); |
789 | } |
798 | } |
- | 799 | /* this time the dictionary is the group dictionary sys/wgrp/wgrp |
|
- | 800 | * with a g (groupdic), not an m (maindic) . see below main, suffix, group. |
|
- | 801 | * and delete unknown ?? and translate |
|
- | 802 | */ |
|
790 | entrycount=gentrycount; dicbuf=gdicbuf; |
803 | entrycount=gentrycount; dicbuf=gdicbuf; |
791 | memmove(entry,gentry,gentrycount*sizeof(entry[0])); |
804 | memmove(entry,gentry,gentrycount*sizeof(entry[0])); |
- | 805 | ||
- | 806 | /* append words of every title information */ |
|
- | 807 | ovlstrcpy(buf,gsindbuf[s_title]); |
|
792 | unknown_type=unk_delete; |
808 | unknown_type=unk_delete; |
793 |
|
809 | translate(buf); |
794 | for(p1=find_word_start(buf); *p1; p1=find_word_start(p2)) { |
810 | for(p1=find_word_start(buf); *p1; p1=find_word_start(p2)) { |
795 | p2=strchr(p1,','); |
811 | p2=strchr(p1,','); |
796 | if(p2!=NULL) *p2++=0; else p2=p1+strlen(p1); |
812 | if(p2!=NULL) *p2++=0; else p2=p1+strlen(p1); |
797 | if(strlen(p1)<=0) continue; |
813 | if(strlen(p1)<=0) continue; |
798 | sappenditem(p1,lind,serial,4); |
814 | sappenditem(p1,lind,serial,4); |
799 | } |
815 | } |
- | 816 | ||
800 |
|
817 | /* append words (or group of words) of keywords and domain */ |
801 | snprintf(buf,sizeof(buf),"%s, %s", |
818 | snprintf(buf,sizeof(buf),"%s, %s", |
802 | gsindbuf[s_keywords], |
819 | gsindbuf[s_keywords], |
803 | gsindbuf[s_domain]); |
820 | gsindbuf[s_domain]); |
- | 821 | unknown_type=unk_leave; |
|
804 | translate(buf); |
822 | translate(buf); |
805 | for(p1=find_word_start(buf); *p1; |
823 | for(p1=find_word_start(buf); *p1; p1=find_word_start(p2)) { |
806 | p1=find_word_start(p2)) { |
- | |
807 | p2=strchr(p1,','); |
824 | p2=strchr(p1,','); |
808 | if(p2!=NULL) *p2++=0; else p2=p1+strlen(p1); |
825 | if(p2!=NULL) *p2++=0; else p2=p1+strlen(p1); |
809 | if(strlen(p1)<=0) continue; |
826 | if(strlen(p1)<=0) continue; |
810 | sappenditem(p1,lind,serial,2); |
827 | sappenditem(p1,lind,serial,2); |
811 | } |
828 | } |
- | 829 | ||
- | 830 | /* append level information, with weight 2 */ |
|
- | 831 | snprintf(buf,sizeof(buf),"%s",gsindbuf[s_level]); |
|
- | 832 | ovlstrcpy(lbuf,"level"); |
|
- | 833 | for(p1=buf; *p1; p1++) if(!isalnum(*p1)) *p1=' '; |
|
- | 834 | q=buf+strlen(buf); |
|
- | 835 | for(p1=find_word_start(buf); (*p1) && (p1 < q) ; |
|
- | 836 | p1=find_word_start(p2)) { |
|
- | 837 | p2=find_word_end(p1); |
|
- | 838 | if(p2!=NULL) *p2++=0; else p2=p1+strlen(p1); |
|
- | 839 | if(strncmp(p1, "Lang" , p2-p1) && |
|
- | 840 | (!isalpha(*p1) || (!isdigit(*(p1+1))) || |
|
- | 841 | (*(p1+1)!=0 && *(p1+2)!=0))) |
|
- | 842 | continue; |
|
- | 843 | *p1=tolower(*p1); |
|
- | 844 | ovlstrcpy(lbuf+strlen("level"),p1); |
|
- | 845 | sappenditem(lbuf,lind,serial,2); |
|
- | 846 | } |
|
- | 847 | /* append total weight of module to weight file site2/weight.xx */ |
|
812 | fprintf(weightf,"%d:%d\n",serial,tweight); |
848 | fprintf(weightf,"%d:%d\n",serial,tweight); |
813 | } |
849 | } |
814 | 850 | ||
815 | void sgs(char *outdir, int index(int)) |
851 | void sgs(char *outdir, int index(int)) |
816 | { |
852 | { |