Rev 3247 | Rev 6394 | Go to most recent revision | Show entire file | Ignore whitespace | Details | Blame | Last modification | View Log | RSS feed
Rev 3247 | Rev 3718 | ||
---|---|---|---|
Line 17... | Line 17... | ||
17 | 17 | ||
18 | /* This is an internal program, |
18 | /* This is an internal program, |
19 | * used to index modules for search engine. */ |
19 | * used to index modules for search engine. */ |
20 | 20 | ||
21 | #include "../wims.h" |
21 | #include "../wims.h" |
- | 22 | #include "../Lib/basicstr.c" |
|
22 | 23 | ||
23 | #define MAX_LANGS MAX_LANGUAGES |
24 | #define MAX_LANGS MAX_LANGUAGES |
24 | #define MAX_MODULES 65536 |
25 | #define MAX_MODULES 65536 |
25 | char *moduledir= "public_html/modules"; |
26 | char *moduledir= "public_html/modules"; |
26 | char *sheetdir= "public_html/bases/sheet"; |
27 | char *sheetdir= "public_html/bases/sheet"; |
Line 190... | Line 191... | ||
190 | { |
191 | { |
191 | char *pp, *p2; |
192 | char *pp, *p2; |
192 | for(pp=strchr(p,'<'); pp!=NULL; pp=strchr(pp,'<')) { |
193 | for(pp=strchr(p,'<'); pp!=NULL; pp=strchr(pp,'<')) { |
193 | p2=find_tag_end(pp); |
194 | p2=find_tag_end(pp); |
194 | if(*p2==0) {*pp=0; return; } |
195 | if(*p2==0) {*pp=0; return; } |
195 |
|
196 | ovlstrcpy(pp,p2); |
196 | } |
197 | } |
197 | } |
198 | } |
198 | 199 | ||
199 | /* modify a string. Bufferlen must be ast least MAX_LINELEN */ |
200 | /* modify a string. Bufferlen must be ast least MAX_LINELEN */ |
200 | void string_modify(char *start, char *bad_beg, char *bad_end, char *good,...) |
201 | void string_modify(char *start, char *bad_beg, char *bad_end, char *good,...) |
Line 205... | Line 206... | ||
205 | va_start(vp,good); |
206 | va_start(vp,good); |
206 | vsnprintf(buf,sizeof(buf),good,vp); va_end(vp); |
207 | vsnprintf(buf,sizeof(buf),good,vp); va_end(vp); |
207 | if(strlen(start)-(bad_end-bad_beg)+strlen(buf)>=MAX_LINELEN) |
208 | if(strlen(start)-(bad_end-bad_beg)+strlen(buf)>=MAX_LINELEN) |
208 | return; |
209 | return; |
209 | strcat(buf,bad_end); |
210 | strcat(buf,bad_end); |
210 |
|
211 | ovlstrcpy(bad_beg,buf); |
211 | } |
212 | } |
212 | 213 | ||
213 | void _getdef(char buf[], char *name, char value[]) |
214 | void _getdef(char buf[], char *name, char value[]) |
214 | { |
215 | { |
215 | char *p1, *p2, *p3; |
216 | char *p1, *p2, *p3; |
Line 278... | Line 279... | ||
278 | if(langcnt==0) { /* default languages */ |
279 | if(langcnt==0) { /* default languages */ |
279 | langcnt=DEFAULT_LANGCNT; |
280 | langcnt=DEFAULT_LANGCNT; |
280 | } |
281 | } |
281 | s=getenv("mlist"); if(s==NULL) exit(1); |
282 | s=getenv("mlist"); if(s==NULL) exit(1); |
282 | l=strlen(s); if(l<0 || l>100*MAX_LINELEN) exit(1); |
283 | l=strlen(s); if(l<0 || l>100*MAX_LINELEN) exit(1); |
283 | mlist=xmalloc(l+16); |
284 | mlist=xmalloc(l+16); ovlstrcpy(mlist,s); old=""; |
284 | for(i=0;i<langcnt;i++) { |
285 | for(i=0;i<langcnt;i++) { |
285 | snprintf(buf,sizeof(buf),"%s/%s.%s",dicdir,ignoredic,lang[i]); |
286 | snprintf(buf,sizeof(buf),"%s/%s.%s",dicdir,ignoredic,lang[i]); |
286 | f=fopen(buf,"r"); if(f==NULL) continue; |
287 | f=fopen(buf,"r"); if(f==NULL) continue; |
287 | l=fread(ignore[i],1,MAX_LINELEN,f);fclose(f); |
288 | l=fread(ignore[i],1,MAX_LINELEN,f);fclose(f); |
288 | if(l<0 || l>=MAX_LINELEN) l=0; |
289 | if(l<0 || l>=MAX_LINELEN) l=0; |
Line 346... | Line 347... | ||
346 | int i,l,thislang; |
347 | int i,l,thislang; |
347 | 348 | ||
348 | modcnt=0; |
349 | modcnt=0; |
349 | s=getenv("slist"); if(s==NULL) return; |
350 | s=getenv("slist"); if(s==NULL) return; |
350 | l=strlen(s); if(l<0 || l>100*MAX_LINELEN) return; |
351 | l=strlen(s); if(l<0 || l>100*MAX_LINELEN) return; |
351 | mlist=xmalloc(l+16); |
352 | mlist=xmalloc(l+16); ovlstrcpy(mlist,s); |
352 | for(p1=find_word_start(mlist); *p1 && modcnt<MAX_MODULES; p1=find_word_start(p2)) { |
353 | for(p1=find_word_start(mlist); *p1 && modcnt<MAX_MODULES; p1=find_word_start(p2)) { |
353 | p2=find_word_end(p1); |
354 | p2=find_word_end(p1); |
354 | l=p2-p1; if(*p2) *p2++=0; |
355 | l=p2-p1; if(*p2) *p2++=0; |
355 | for(i=0;i<langcnt;i++) if(strncasecmp(lang[i],p1,2)==0) break; |
356 | for(i=0;i<langcnt;i++) if(strncasecmp(lang[i],p1,2)==0) break; |
356 | if(i<langcnt) thislang=i; else continue; |
357 | if(i<langcnt) thislang=i; else continue; |
Line 422... | Line 423... | ||
422 | } |
423 | } |
423 | p=find_word_start(indbuf[i_language]); |
424 | p=find_word_start(indbuf[i_language]); |
424 | if(isalpha(*p) && isalpha(*(p+1))) { |
425 | if(isalpha(*p) && isalpha(*(p+1))) { |
425 | memmove(module_language,p,2); module_language[2]=0; |
426 | memmove(module_language,p,2); module_language[2]=0; |
426 | } |
427 | } |
427 | else |
428 | else ovlstrcpy(module_language,"en"); |
428 | return 0; |
429 | return 0; |
429 | } |
430 | } |
430 | 431 | ||
431 | int sheet_index(int serial) |
432 | int sheet_index(int serial) |
432 | { |
433 | { |
Line 449... | Line 450... | ||
449 | } |
450 | } |
450 | p2=strstr(p1,"\n:"); if(p2==NULL) p2=p1+strlen(p1); |
451 | p2=strstr(p1,"\n:"); if(p2==NULL) p2=p1+strlen(p1); |
451 | else *p2=0; |
452 | else *p2=0; |
452 | p1=find_word_start(p1); strip_trailing_spaces(p1); |
453 | p1=find_word_start(p1); strip_trailing_spaces(p1); |
453 | for(p2=p1;*p2;p2++) if(*p2=='\n') *p2=' '; |
454 | for(p2=p1;*p2;p2++) if(*p2=='\n') *p2=' '; |
454 |
|
455 | ovlstrcpy(sindbuf[s_remark],p1); |
455 | return 0; |
456 | return 0; |
456 | } |
457 | } |
457 | 458 | ||
458 | unsigned char categories[16]; |
459 | unsigned char categories[16]; |
459 | char taken[MAX_LINELEN+1]; |
460 | char taken[MAX_LINELEN+1]; |
Line 472... | Line 473... | ||
472 | takenlen>=MAX_LINELEN-ll-16) |
473 | takenlen>=MAX_LINELEN-ll-16) |
473 | return; |
474 | return; |
474 | if(ll==2 && (!isdigit(word[0]) || !isalpha(word[1]))) return; |
475 | if(ll==2 && (!isdigit(word[0]) || !isalpha(word[1]))) return; |
475 | for(p=word;*p;p++) if(!isalnum(*p) && *p!=' ') return; |
476 | for(p=word;*p;p++) if(!isalnum(*p) && *p!=' ') return; |
476 | taken[takenlen++]=' '; taken[takenlen++]=' '; |
477 | taken[takenlen++]=' '; taken[takenlen++]=' '; |
477 |
|
478 | ovlstrcpy(taken+takenlen,word); |
478 | takenlen+=ll; tweight+=weight; |
479 | takenlen+=ll; tweight+=weight; |
479 | snprintf(buf,sizeof(buf),"%s:%d?%d\n",word,serial,weight); |
480 | snprintf(buf,sizeof(buf),"%s:%d?%d\n",word,serial,weight); |
480 | for(i=0;i<catcnt;i++) { |
481 | for(i=0;i<catcnt;i++) { |
481 | snprintf(nbuf,sizeof(nbuf),"%s/%c.%s", |
482 | snprintf(nbuf,sizeof(nbuf),"%s/%c.%s", |
482 | outdir,categories[i],lang[lind]); |
483 | outdir,categories[i],lang[lind]); |
Line 531... | Line 532... | ||
531 | singlespace(indbuf[trlist[i]]); |
532 | singlespace(indbuf[trlist[i]]); |
532 | suffix_translate(indbuf[trlist[i]]); |
533 | suffix_translate(indbuf[trlist[i]]); |
533 | translate(indbuf[trlist[i]]); |
534 | translate(indbuf[trlist[i]]); |
534 | } |
535 | } |
535 | taken[0]=0; takenlen=tweight=0; |
536 | taken[0]=0; takenlen=tweight=0; |
536 |
|
537 | ovlstrcpy(buf,indbuf[i_title]); towords(buf); |
537 | for(p1=find_word_start(buf);*p1; |
538 | for(p1=find_word_start(buf);*p1; |
538 | p1=find_word_start(p2)) { |
539 | p1=find_word_start(p2)) { |
539 | p2=find_word_end(p1); if(*p2) *p2++=0; |
540 | p2=find_word_end(p1); if(*p2) *p2++=0; |
540 | appenditem(p1,lind,serial,4,module_language); |
541 | appenditem(p1,lind,serial,4,module_language); |
541 | } |
542 | } |
Line 549... | Line 550... | ||
549 | appenditem(p1,lind,serial,2,module_language); |
550 | appenditem(p1,lind,serial,2,module_language); |
550 | } |
551 | } |
551 | entrycount=gentrycount; dicbuf=gdicbuf; |
552 | entrycount=gentrycount; dicbuf=gdicbuf; |
552 | memmove(entry,gentry,gentrycount*sizeof(entry[0])); |
553 | memmove(entry,gentry,gentrycount*sizeof(entry[0])); |
553 | unknown_type=unk_delete; |
554 | unknown_type=unk_delete; |
554 |
|
555 | ovlstrcpy(buf,indbuf[i_title]); translate(buf); |
555 | for(p1=find_word_start(buf); *p1; |
556 | for(p1=find_word_start(buf); *p1; |
556 | p1=find_word_start(p2)) { |
557 | p1=find_word_start(p2)) { |
557 | p2=strchr(p1,','); |
558 | p2=strchr(p1,','); |
558 | if(p2!=NULL) *p2++=0; else p2=p1+strlen(p1); |
559 | if(p2!=NULL) *p2++=0; else p2=p1+strlen(p1); |
559 | if(strlen(p1)<=0) continue; |
560 | if(strlen(p1)<=0) continue; |
Line 569... | Line 570... | ||
569 | if(p2!=NULL) *p2++=0; else p2=p1+strlen(p1); |
570 | if(p2!=NULL) *p2++=0; else p2=p1+strlen(p1); |
570 | if(strlen(p1)<=0) continue; |
571 | if(strlen(p1)<=0) continue; |
571 | appenditem(p1,lind,serial,2,module_language); |
572 | appenditem(p1,lind,serial,2,module_language); |
572 | } |
573 | } |
573 | snprintf(buf,sizeof(buf),"%s",indbuf[i_level]); |
574 | snprintf(buf,sizeof(buf),"%s",indbuf[i_level]); |
574 |
|
575 | ovlstrcpy(lbuf,"level"); |
575 | for(p1=buf; *p1; p1++) if(!isalnum(*p1)) *p1=' '; |
576 | for(p1=buf; *p1; p1++) if(!isalnum(*p1)) *p1=' '; |
576 | for(p1=find_word_start(buf); *p1; |
577 | for(p1=find_word_start(buf); *p1; |
577 | p1=find_word_start(p2)) { |
578 | p1=find_word_start(p2)) { |
578 | p2=find_word_end(p1); |
579 | p2=find_word_end(p1); |
579 | if(p2!=NULL) *p2++=0; else p2=p1+strlen(p1); |
580 | if(p2!=NULL) *p2++=0; else p2=p1+strlen(p1); |
580 | if(!isalpha(*p1) || |
581 | if(!isalpha(*p1) || |
581 | (!isdigit(*(p1+1)) && *(p1+1)!=0) || |
582 | (!isdigit(*(p1+1)) && *(p1+1)!=0) || |
582 | (*(p1+1)!=0 && *(p1+2)!=0)) |
583 | (*(p1+1)!=0 && *(p1+2)!=0)) |
583 | continue; |
584 | continue; |
584 | *p1=tolower(*p1); |
585 | *p1=tolower(*p1); |
585 |
|
586 | ovlstrcpy(lbuf+strlen("level"),p1); |
586 | appenditem(lbuf,lind,serial,2,module_language); |
587 | appenditem(lbuf,lind,serial,2,module_language); |
587 | } |
588 | } |
588 | fprintf(weightf,"%d:%d\n",serial,tweight); |
589 | fprintf(weightf,"%d:%d\n",serial,tweight); |
589 | } |
590 | } |
590 | 591 | ||
Line 639... | Line 640... | ||
639 | takenlen>=MAX_LINELEN-ll-16) |
640 | takenlen>=MAX_LINELEN-ll-16) |
640 | return; |
641 | return; |
641 | if(ll==2 && (!isdigit(word[0]) || !isalpha(word[1]))) return; |
642 | if(ll==2 && (!isdigit(word[0]) || !isalpha(word[1]))) return; |
642 | for(p=word;*p;p++) if(!isalnum(*p) && *p!=' ') return; |
643 | for(p=word;*p;p++) if(!isalnum(*p) && *p!=' ') return; |
643 | taken[takenlen++]=' ';taken[takenlen++]=' '; |
644 | taken[takenlen++]=' ';taken[takenlen++]=' '; |
644 |
|
645 | ovlstrcpy(taken+takenlen,word); |
645 | takenlen+=ll; tweight+=weight; |
646 | takenlen+=ll; tweight+=weight; |
646 | fprintf(indf,"%s:%d?%d\n",word,serial,weight); |
647 | fprintf(indf,"%s:%d?%d\n",word,serial,weight); |
647 | } |
648 | } |
648 | 649 | ||
649 | void onesheet(int serial, int lind) |
650 | void onesheet(int serial, int lind) |
Line 668... | Line 669... | ||
668 | singlespace(sindbuf[trlist[i]]); |
669 | singlespace(sindbuf[trlist[i]]); |
669 | suffix_translate(sindbuf[trlist[i]]); |
670 | suffix_translate(sindbuf[trlist[i]]); |
670 | translate(sindbuf[trlist[i]]); |
671 | translate(sindbuf[trlist[i]]); |
671 | } |
672 | } |
672 | taken[0]=0; takenlen=tweight=0; |
673 | taken[0]=0; takenlen=tweight=0; |
673 |
|
674 | ovlstrcpy(buf,sindbuf[s_title]); towords(buf); |
674 | for(p1=find_word_start(buf);*p1; |
675 | for(p1=find_word_start(buf);*p1; |
675 | p1=find_word_start(p2)) { |
676 | p1=find_word_start(p2)) { |
676 | p2=find_word_end(p1); if(*p2) *p2++=0; |
677 | p2=find_word_end(p1); if(*p2) *p2++=0; |
677 | sappenditem(p1,lind,serial,4); |
678 | sappenditem(p1,lind,serial,4); |
678 | } |
679 | } |
Line 686... | Line 687... | ||
686 | sappenditem(p1,lind,serial,2); |
687 | sappenditem(p1,lind,serial,2); |
687 | } |
688 | } |
688 | entrycount=gentrycount; dicbuf=gdicbuf; |
689 | entrycount=gentrycount; dicbuf=gdicbuf; |
689 | memmove(entry,gentry,gentrycount*sizeof(entry[0])); |
690 | memmove(entry,gentry,gentrycount*sizeof(entry[0])); |
690 | unknown_type=unk_delete; |
691 | unknown_type=unk_delete; |
691 |
|
692 | ovlstrcpy(buf,sindbuf[s_title]); translate(buf); |
692 | for(p1=find_word_start(buf); *p1; |
693 | for(p1=find_word_start(buf); *p1; |
693 | p1=find_word_start(p2)) { |
694 | p1=find_word_start(p2)) { |
694 | p2=strchr(p1,','); |
695 | p2=strchr(p1,','); |
695 | if(p2!=NULL) *p2++=0; else p2=p1+strlen(p1); |
696 | if(p2!=NULL) *p2++=0; else p2=p1+strlen(p1); |
696 | if(strlen(p1)<=0) continue; |
697 | if(strlen(p1)<=0) continue; |