Rev 7915 | Rev 8123 | Go to most recent revision | Show entire file | Ignore whitespace | Details | Blame | Last modification | View Log | RSS feed
Rev 7915 | Rev 8100 | ||
---|---|---|---|
Line 18... | Line 18... | ||
18 | /* This is an internal program, |
18 | /* This is an internal program, |
19 | * used to index modules for search engine. |
19 | * used to index modules for search engine. |
20 | */ |
20 | */ |
21 | 21 | ||
22 | #include "../wims.h" |
22 | #include "../wims.h" |
23 | #include "../Lib/ |
23 | #include "../Lib/libwims.h" |
24 | 24 | ||
25 | #define MAX_LANGS MAX_LANGUAGES |
25 | #define MAX_LANGS MAX_LANGUAGES |
26 | #define MAX_MODULES 65536 |
26 | #define MAX_MODULES 65536 |
27 | char *moduledir= "public_html/modules"; |
27 | char *moduledir= "public_html/modules"; |
28 | char *sheetdir= "public_html/bases/sheet"; |
28 | char *sheetdir= "public_html/bases/sheet"; |
Line 73... | Line 73... | ||
73 | } mod[MAX_MODULES]; |
73 | } mod[MAX_MODULES]; |
74 | int modcnt; |
74 | int modcnt; |
75 | 75 | ||
76 | char *mlist; |
76 | char *mlist; |
77 | 77 | ||
- | 78 | /* |
|
78 | void *xmalloc(size_t n) |
79 | void *xmalloc(size_t n) |
79 | { |
80 | { |
80 | void *p; |
81 | void *p; |
81 | p=malloc(n); |
82 | p=malloc(n); |
82 | if(p==NULL) { |
83 | if(p==NULL) { |
83 | printf("Malloc failure.\n"); |
84 | printf("Malloc failure.\n"); |
84 | exit(1); |
85 | exit(1); |
85 | } |
86 | } |
86 | return p; |
87 | return p; |
87 | } |
88 | } |
- | 89 | */ |
|
88 | 90 | ||
- | 91 | /* |
|
89 |
|
92 | char *acctab="çéèêëúùûüáàâäãóòôöõíìïîñýÿÇÉÈÊËÚÙÛÜÁÀÂÃÄÓÒÔÖÕÍÌÏÎÑÝ", |
90 |
|
93 | *deatab="ceeeeuuuuaaaaaoooooiiiinyyCEEEEUUUUAAAAAOOOOOIIIINY"; |
91 | 94 | */ |
|
92 | /* fold known accented letters to unaccented, other strange characters to space |
95 | /* fold known accented letters to unaccented, other strange characters to space |
93 | * apostrophe is among the exceptions to be kept (important for multi-word expressions) |
96 | * apostrophe is among the exceptions to be kept (important for multi-word expressions) |
94 | */ |
97 | */ |
95 | void |
98 | void deaccent2(char *p) |
96 | { |
99 | { |
97 | char *sp; |
100 | char *sp; |
98 | char *v; |
101 | char *v; |
99 | for(sp=p;*sp;sp++) { |
102 | for(sp=p;*sp;sp++) { |
100 | if(*sp<0 && (v=strchr(acctab,*sp))!=NULL) |
103 | if(*sp<0 && (v=strchr(acctab,*sp))!=NULL) |
Line 110... | Line 113... | ||
110 | char *pp; |
113 | char *pp; |
111 | for(pp=p;*pp;pp++) if(!isalnum(*pp) && strchr("&$+*",*pp)==0) *pp=' '; |
114 | for(pp=p;*pp;pp++) if(!isalnum(*pp) && strchr("&$+*",*pp)==0) *pp=' '; |
112 | } |
115 | } |
113 | 116 | ||
114 | /* Points to the end of the word */ |
117 | /* Points to the end of the word */ |
- | 118 | /* |
|
115 | char *find_word_end(char *p) |
119 | char *find_word_end(char *p) |
116 | { |
120 | { |
117 | int i; |
121 | int i; |
118 | for(i=0;!isspace(*p) && *p!=0 && i<MAX_LINELEN; p++,i++); |
122 | for(i=0;!isspace(*p) && *p!=0 && i<MAX_LINELEN; p++,i++); |
119 | return p; |
123 | return p; |
120 | } |
124 | } |
121 | 125 | */ |
|
122 | /* Strips leading spaces */ |
126 | /* Strips leading spaces */ |
- | 127 | /* |
|
123 | char *find_word_start(char *p) |
128 | char *find_word_start(char *p) |
124 | { |
129 | { |
125 | int i; |
130 | int i; |
126 | for(i=0; isspace(*p) && i<MAX_LINELEN; p++,i++); |
131 | for(i=0; isspace(*p) && i<MAX_LINELEN; p++,i++); |
127 | return p; |
132 | return p; |
128 | } |
133 | } |
129 | 134 | */ |
|
130 | /* Find first occurrence of word */ |
135 | /* Find first occurrence of word */ |
131 | char * |
136 | char *wordchr2(char *p, char *w) |
132 | { |
137 | { |
133 | char *r; |
138 | char *r; |
134 | 139 | ||
135 | for(r=strstr(p,w);r!=NULL && |
140 | for(r=strstr(p,w);r!=NULL && |
136 | ( (r>p && !isspace(*(r-1))) || (!isspace(*(r+strlen(w))) && *(r+strlen(w))!=0) ); |
141 | ( (r>p && !isspace(*(r-1))) || (!isspace(*(r+strlen(w))) && *(r+strlen(w))!=0) ); |
Line 139... | Line 144... | ||
139 | } |
144 | } |
140 | 145 | ||
141 | /* find a variable in a string (math expression). |
146 | /* find a variable in a string (math expression). |
142 | * Returns the pointer or NULL. |
147 | * Returns the pointer or NULL. |
143 | */ |
148 | */ |
144 |
|
149 | /*char *varchr(char *p, char *v) |
145 | { |
150 | { |
146 | char *pp; int n=strlen(v); |
151 | char *pp; int n=strlen(v); |
147 | for(pp=strstr(p,v); pp!=NULL; pp=strstr(pp+1,v)) { |
152 | for(pp=strstr(p,v); pp!=NULL; pp=strstr(pp+1,v)) { |
148 | if((pp==p || !isalnum(*(pp-1))) && |
153 | if((pp==p || !isalnum(*(pp-1))) && |
149 | (!isalnum(*(pp+n)) || *(pp+n)==0)) break; |
154 | (!isalnum(*(pp+n)) || *(pp+n)==0)) break; |
150 | } |
155 | } |
151 | return pp; |
156 | return pp; |
152 | } |
157 | } |
153 | 158 | */ |
|
154 | /* strip trailing spaces; return string end. */ |
159 | /* strip trailing spaces; return string end. */ |
155 | char * |
160 | char *strip_trailing_spaces2(char *p) |
156 | { |
161 | { |
157 | char *pp; |
162 | char *pp; |
158 | if(*p==0) return p; |
163 | if(*p==0) return p; |
159 | for(pp=p+strlen(p)-1; pp>=p && isspace(*pp); *(pp--)=0); |
164 | for(pp=p+strlen(p)-1; pp>=p && isspace(*pp); *(pp--)=0); |
160 | return pp; |
165 | return pp; |
Line 165... | Line 170... | ||
165 | char *pp; |
170 | char *pp; |
166 | pp=p; if(*pp=='<') pp++; |
171 | pp=p; if(*pp=='<') pp++; |
167 | for(; *pp && *pp!='>'; pp++) { |
172 | for(; *pp && *pp!='>'; pp++) { |
168 | if(*pp=='<') { |
173 | if(*pp=='<') { |
169 | pp=find_tag_end(pp)-1; continue; |
174 | pp=find_tag_end(pp)-1; continue; |
170 | } |
175 | } |
171 | if(*pp=='"') { |
176 | if(*pp=='"') { |
172 | pp=strchr(pp+1,'"'); |
177 | pp=strchr(pp+1,'"'); |
173 | if(pp==NULL) return p+strlen(p); else continue; |
178 | if(pp==NULL) return p+strlen(p); else continue; |
174 | } |
179 | } |
175 | if(*pp=='\'') { |
180 | if(*pp=='\'') { |
176 | pp=strchr(pp+1,'\''); |
181 | pp=strchr(pp+1,'\''); |
177 | if(pp==NULL) return p+strlen(p); else continue; |
182 | if(pp==NULL) return p+strlen(p); else continue; |
178 | } |
183 | } |
179 | } |
184 | } |
180 | if(*pp=='>') pp++; return pp; |
185 | if(*pp=='>') pp++; return pp; |
181 | } |
186 | } |
182 | 187 | ||
183 | char *find_tag(char *p, char *tag) |
188 | char *find_tag(char *p, char *tag) |
184 | { |
189 | { |
185 | char *pp; |
190 | char *pp; |
186 | int len; |
191 | int len; |
187 | len=strlen(tag); |
192 | len=strlen(tag); |
188 | for(pp=strchr(p,'<'); pp!=NULL && *pp; pp=strchr(pp+1,'<')) { |
193 | for(pp=strchr(p,'<'); pp!=NULL && *pp; pp=strchr(pp+1,'<')) { |
189 | if(strncasecmp(pp+1,tag,len)==0 && !isalnum(*(pp+1+len))) return pp; |
194 | if(strncasecmp(pp+1,tag,len)==0 && !isalnum(*(pp+1+len))) return pp; |
Line 201... | Line 206... | ||
201 | ovlstrcpy(pp,p2); |
206 | ovlstrcpy(pp,p2); |
202 | } |
207 | } |
203 | } |
208 | } |
204 | 209 | ||
205 | /* modify a string. Bufferlen must be at least MAX_LINELEN */ |
210 | /* modify a string. Bufferlen must be at least MAX_LINELEN */ |
206 | void |
211 | void string_modify3(char *start, char *bad_beg, char *bad_end, char *good,...) |
207 | { |
212 | { |
208 | char buf[MAX_LINELEN+1]; |
213 | char buf[MAX_LINELEN+1]; |
209 | va_list vp; |
214 | va_list vp; |
210 | 215 | ||
211 | va_start(vp,good); |
216 | va_start(vp,good); |
212 | vsnprintf(buf,sizeof(buf),good,vp); va_end(vp); |
217 | vsnprintf(buf,sizeof(buf),good,vp); va_end(vp); |
213 | if(strlen(start)-(bad_end-bad_beg)+strlen(buf)>=MAX_LINELEN) |
218 | if(strlen(start)-(bad_end-bad_beg)+strlen(buf)>=MAX_LINELEN) |
214 | return; |
219 | return; /* this is an error situation. */ |
215 | strcat(buf,bad_end); |
220 | strcat(buf,bad_end); |
216 | ovlstrcpy(bad_beg,buf); |
221 | ovlstrcpy(bad_beg,buf); |
217 | } |
222 | } |
218 | 223 | ||
219 | /* add a space after comma to see end of words */ |
224 | /* add a space after comma to see end of words */ |
220 | 225 | ||
221 | void comma(char *p) |
226 | void comma(char *p) |
222 | { |
227 | { |
223 | char *pp; |
228 | char *pp; |
224 | for(pp=strchr(p,','); pp; pp=strchr(pp+1,',')) |
229 | for(pp=strchr(p,','); pp; pp=strchr(pp+1,',')) |
225 |
|
230 | string_modify3(p,pp,pp+1,", "); |
226 | } |
231 | } |
227 | 232 | ||
228 | void _getdef(char buf[], char *name, char value[]) |
233 | void _getdef(char buf[], char *name, char value[]) |
229 | { |
234 | { |
230 | char *p1, *p2, *p3; |
235 | char *p1, *p2, *p3; |
Line 238... | Line 243... | ||
238 | p3=strchr(p2,'\n'); |
243 | p3=strchr(p2,'\n'); |
239 | p2=find_word_start(p2+1); |
244 | p2=find_word_start(p2+1); |
240 | if(p3 <= p2) continue; |
245 | if(p3 <= p2) continue; |
241 | snprintf(value,MAX_LINELEN,"%s",p2); |
246 | snprintf(value,MAX_LINELEN,"%s",p2); |
242 | if(p3!=NULL && p3-p2<MAX_LINELEN) value[p3-p2]=0; |
247 | if(p3!=NULL && p3-p2<MAX_LINELEN) value[p3-p2]=0; |
243 |
|
248 | strip_trailing_spaces2(value); |
244 | break; |
249 | break; |
245 | } |
250 | } |
246 | } |
251 | } |
247 | 252 | ||
248 | /* Get variable definition from a file. |
253 | /* Get variable definition from a file. |
Line 466... | Line 471... | ||
466 | for(i=0,p1=find_word_start(ibuf); |
471 | for(i=0,p1=find_word_start(ibuf); |
467 | i<SHEETINDEX_NO-1 && *p1!=':' && *p1!=0; |
472 | i<SHEETINDEX_NO-1 && *p1!=':' && *p1!=0; |
468 | i++,p1=p2) { |
473 | i++,p1=p2) { |
469 | p2=strchr(p1,'\n'); |
474 | p2=strchr(p1,'\n'); |
470 | if(p2!=NULL) *p2++=0; else p2=p1+strlen(p1); |
475 | if(p2!=NULL) *p2++=0; else p2=p1+strlen(p1); |
471 | p1=find_word_start(p1); |
476 | p1=find_word_start(p1); strip_trailing_spaces2(p1); |
472 | snprintf(sindbuf[i],MAX_LINELEN,"%s",p1); |
477 | snprintf(sindbuf[i],MAX_LINELEN,"%s",p1); |
473 | } |
478 | } |
474 | p2=strstr(p1,"\n:"); if(p2==NULL) p2=p1+strlen(p1); |
479 | p2=strstr(p1,"\n:"); if(p2==NULL) p2=p1+strlen(p1); |
475 | else *p2=0; |
480 | else *p2=0; |
476 | p1=find_word_start(p1); |
481 | p1=find_word_start(p1); strip_trailing_spaces2(p1); |
477 | for(p2=p1;*p2;p2++) if(*p2=='\n') *p2=' '; |
482 | for(p2=p1;*p2;p2++) if(*p2=='\n') *p2=' '; |
478 | ovlstrcpy(sindbuf[s_information],p1); |
483 | ovlstrcpy(sindbuf[s_information],p1); |
479 | return 0; |
484 | return 0; |
480 | } |
485 | } |
481 | 486 | ||
Line 489... | Line 494... | ||
489 | int i, ll; |
494 | int i, ll; |
490 | char *p; |
495 | char *p; |
491 | FILE *f; |
496 | FILE *f; |
492 | 497 | ||
493 | if(!isalnum(*word) || (ll=strlen(word))<2 || |
498 | if(!isalnum(*word) || (ll=strlen(word))<2 || |
494 |
|
499 | wordchr2(taken,word)!=NULL || |
495 |
|
500 | wordchr2(ignore[lind],word)!=NULL || |
496 | takenlen>=MAX_LINELEN-ll-16) |
501 | takenlen>=MAX_LINELEN-ll-16) |
497 | return; |
502 | return; |
498 | if(ll==2 && (!isdigit(word[0]) || !isalpha(word[1]))) return; |
503 | if(ll==2 && (!isdigit(word[0]) || !isalpha(word[1]))) return; |
499 | for(p=word;*p;p++) if(!isalnum(*p) && *p!=' ') return; |
504 | for(p=word;*p;p++) if(!isalnum(*p) && *p!=' ') return; |
500 | taken[takenlen++]=' '; taken[takenlen++]=' '; |
505 | taken[takenlen++]=' '; taken[takenlen++]=' '; |
Line 546... | Line 551... | ||
546 | towords(indbuf[i_category]); |
551 | towords(indbuf[i_category]); |
547 | /* list the categories (among A=all,X=eXercise,O,D,...) corresponding |
552 | /* list the categories (among A=all,X=eXercise,O,D,...) corresponding |
548 | * to this module |
553 | * to this module |
549 | */ |
554 | */ |
550 | for(i=catcnt=0;i<catno && catcnt<16;i++) { |
555 | for(i=catcnt=0;i<catno && catcnt<16;i++) { |
551 | if( |
556 | if(wordchr2(indbuf[i_category],cat[i].name)!=NULL) |
552 | categories[catcnt++]=cat[i].typ; |
557 | categories[catcnt++]=cat[i].typ; |
553 | } |
558 | } |
554 | if(catcnt==0) return; |
559 | if(catcnt==0) return; |
555 | if(categories[0]!=cat[0].typ) |
560 | if(categories[0]!=cat[0].typ) |
556 | categories[catcnt++]=cat[0].typ; |
561 | categories[catcnt++]=cat[0].typ; |
Line 571... | Line 576... | ||
571 | fprintf(versionf,"%d:%s\n",serial,indbuf[i_version]); |
576 | fprintf(versionf,"%d:%s\n",serial,indbuf[i_version]); |
572 | 577 | ||
573 | /* add module's information in html page for robots */ |
578 | /* add module's information in html page for robots */ |
574 | snprintf(buf,sizeof(buf),"%s",indbuf[i_description]); |
579 | snprintf(buf,sizeof(buf),"%s",indbuf[i_description]); |
575 | for(pp=strchr(buf,','); pp; pp=strchr(pp,',')) |
580 | for(pp=strchr(buf,','); pp; pp=strchr(pp,',')) |
576 |
|
581 | string_modify3(buf,pp,pp+1,","); |
577 | if(strcmp(module_language,lang[lind])==0) |
582 | if(strcmp(module_language,lang[lind])==0) |
578 | fprintf(robotf,"%s ,%s,%s,%s,%s\n",name,module_language,name, |
583 | fprintf(robotf,"%s ,%s,%s,%s,%s\n",name,module_language,name, |
579 | indbuf[i_title], buf); |
584 | indbuf[i_title], buf); |
580 | 585 | ||
581 | /* Normalize the information of trlist, using dictionary |
586 | /* Normalize the information of trlist, using dictionary |
Line 584... | Line 589... | ||
584 | entrycount=dentrycount; dicbuf=ddicbuf; |
589 | entrycount=dentrycount; dicbuf=ddicbuf; |
585 | memmove(entry,dentry,dentrycount*sizeof(entry[0])); |
590 | memmove(entry,dentry,dentrycount*sizeof(entry[0])); |
586 | unknown_type=unk_leave; |
591 | unknown_type=unk_leave; |
587 | for(i=0;i<trcnt;i++) { |
592 | for(i=0;i<trcnt;i++) { |
588 | detag(indbuf[trlist[i]]); |
593 | detag(indbuf[trlist[i]]); |
589 |
|
594 | deaccent2(indbuf[trlist[i]]); |
590 | comma(indbuf[trlist[i]]); |
595 | comma(indbuf[trlist[i]]); |
591 |
|
596 | singlespace2(indbuf[trlist[i]]); |
592 | translate(indbuf[trlist[i]]); |
597 | translate(indbuf[trlist[i]]); |
593 | } |
598 | } |
594 | /* Normalize the information, using dictionary |
599 | /* Normalize the information, using dictionary |
595 | * bases/sys/words.xx with suffix translation |
600 | * bases/sys/words.xx with suffix translation |
596 | */ |
601 | */ |
Line 718... | Line 723... | ||
718 | { |
723 | { |
719 | int ll; |
724 | int ll; |
720 | char *p; |
725 | char *p; |
721 | 726 | ||
722 | if(!isalnum(*word) || (ll=strlen(word))<2 || |
727 | if(!isalnum(*word) || (ll=strlen(word))<2 || |
723 |
|
728 | wordchr2(taken,word)!=NULL || |
724 |
|
729 | wordchr2(ignore[lind],word)!=NULL || |
725 | takenlen>=MAX_LINELEN-ll-16) |
730 | takenlen>=MAX_LINELEN-ll-16) |
726 | return; |
731 | return; |
727 | if(ll==2 && (!isdigit(word[0]) || !isalpha(word[1]))) return; |
732 | if(ll==2 && (!isdigit(word[0]) || !isalpha(word[1]))) return; |
728 | for(p=word;*p;p++) if(!isalnum(*p) && *p!=' ') return; |
733 | for(p=word;*p;p++) if(!isalnum(*p) && *p!=' ') return; |
729 | taken[takenlen++]=' ';taken[takenlen++]=' '; |
734 | taken[takenlen++]=' ';taken[takenlen++]=' '; |
Line 750... | Line 755... | ||
750 | entrycount=dentrycount; dicbuf=ddicbuf; |
755 | entrycount=dentrycount; dicbuf=ddicbuf; |
751 | memmove(entry,dentry,dentrycount*sizeof(entry[0])); |
756 | memmove(entry,dentry,dentrycount*sizeof(entry[0])); |
752 | unknown_type=unk_leave; |
757 | unknown_type=unk_leave; |
753 | for(i=0;i<trcnt;i++) { |
758 | for(i=0;i<trcnt;i++) { |
754 | detag(sindbuf[trlist[i]]); |
759 | detag(sindbuf[trlist[i]]); |
755 |
|
760 | deaccent2(sindbuf[trlist[i]]); |
756 | comma(sindbuf[trlist[i]]); |
761 | comma(sindbuf[trlist[i]]); |
757 |
|
762 | singlespace2(sindbuf[trlist[i]]); |
758 | translate(sindbuf[trlist[i]]); |
763 | translate(sindbuf[trlist[i]]); |
759 | } |
764 | } |
760 | 765 | ||
761 | entrycount=mentrycount; dicbuf=mdicbuf; |
766 | entrycount=mentrycount; dicbuf=mdicbuf; |
762 | memmove(entry,mentry,mentrycount*sizeof(entry[0])); |
767 | memmove(entry,mentry,mentrycount*sizeof(entry[0])); |