Subversion Repositories wimsdev

Rev

Rev 7915 | Rev 8123 | Go to most recent revision | Show entire file | Ignore whitespace | Details | Blame | Last modification | View Log | RSS feed

Rev 7915 Rev 8100
Line 18... Line 18...
18
/*  This is an internal program,
18
/*  This is an internal program,
19
 * used to index modules for search engine.
19
 * used to index modules for search engine.
20
 */
20
 */
21
 
21
 
22
#include "../wims.h"
22
#include "../wims.h"
23
#include "../Lib/basicstr.c"
23
#include "../Lib/libwims.h"
24
 
24
 
25
#define MAX_LANGS    MAX_LANGUAGES
25
#define MAX_LANGS    MAX_LANGUAGES
26
#define MAX_MODULES    65536
26
#define MAX_MODULES    65536
27
char *moduledir=    "public_html/modules";
27
char *moduledir=    "public_html/modules";
28
char *sheetdir=     "public_html/bases/sheet";
28
char *sheetdir=     "public_html/bases/sheet";
Line 73... Line 73...
73
} mod[MAX_MODULES];
73
} mod[MAX_MODULES];
74
int modcnt;
74
int modcnt;
75
 
75
 
76
char *mlist;
76
char *mlist;
77
 
77
 
-
 
78
/*
78
void *xmalloc(size_t n)
79
void *xmalloc(size_t n)
79
{
80
{
80
    void *p;
81
    void *p;
81
    p=malloc(n);
82
    p=malloc(n);
82
    if(p==NULL) {
83
    if(p==NULL) {
83
    printf("Malloc failure.\n");
84
    printf("Malloc failure.\n");
84
    exit(1);
85
    exit(1);
85
    }
86
    }
86
    return p;
87
    return p;
87
}
88
}
-
 
89
*/
88
 
90
 
-
 
91
/*
89
char *acctab="çéèêëúùûüáàâäãóòôöõíìïîñýÇÉÈÊËÚÙÛÜÁÀÂÃÄÓÒÔÖÕÍÌÏÎÑÝ",
92
char *acctab="çéèêëúùûüáàâäãóòôöõíìïîñýÿÇÉÈÊËÚÙÛÜÁÀÂÃÄÓÒÔÖÕÍÌÏÎÑÝ",
90
     *deatab="ceeeeuuuuaaaaaoooooiiiinyCEEEEUUUUAAAAAOOOOOIIIINY";
93
     *deatab="ceeeeuuuuaaaaaoooooiiiinyyCEEEEUUUUAAAAAOOOOOIIIINY";
91
 
94
*/
92
/*  fold known accented letters to unaccented, other strange characters to space
95
/*  fold known accented letters to unaccented, other strange characters to space
93
 *  apostrophe is among the exceptions to be kept (important for multi-word expressions)
96
 *  apostrophe is among the exceptions to be kept (important for multi-word expressions)
94
 */
97
 */
95
void deaccent(char *p)
98
void deaccent2(char *p)
96
{
99
{
97
    char *sp;
100
    char *sp;
98
    char *v;
101
    char *v;
99
    for(sp=p;*sp;sp++) {
102
    for(sp=p;*sp;sp++) {
100
    if(*sp<0 && (v=strchr(acctab,*sp))!=NULL)
103
    if(*sp<0 && (v=strchr(acctab,*sp))!=NULL)
Line 110... Line 113...
110
    char *pp;
113
    char *pp;
111
    for(pp=p;*pp;pp++) if(!isalnum(*pp) && strchr("&$+*",*pp)==0) *pp=' ';
114
    for(pp=p;*pp;pp++) if(!isalnum(*pp) && strchr("&$+*",*pp)==0) *pp=' ';
112
}
115
}
113
 
116
 
114
/*  Points to the end of the word */
117
/*  Points to the end of the word */
-
 
118
/*
115
char *find_word_end(char *p)
119
char *find_word_end(char *p)
116
{
120
{
117
    int i;
121
    int i;
118
    for(i=0;!isspace(*p) && *p!=0 && i<MAX_LINELEN; p++,i++);
122
    for(i=0;!isspace(*p) && *p!=0 && i<MAX_LINELEN; p++,i++);
119
    return p;
123
    return p;
120
}
124
}
121
 
125
*/
122
/*  Strips leading spaces */
126
/*  Strips leading spaces */
-
 
127
/*
123
char *find_word_start(char *p)
128
char *find_word_start(char *p)
124
{
129
{
125
    int i;
130
    int i;
126
    for(i=0; isspace(*p) && i<MAX_LINELEN; p++,i++);
131
    for(i=0; isspace(*p) && i<MAX_LINELEN; p++,i++);
127
    return p;
132
    return p;
128
}
133
}
129
 
134
*/
130
/*  Find first occurrence of word */
135
/*  Find first occurrence of word */
131
char *wordchr(char *p, char *w)
136
char *wordchr2(char *p, char *w)
132
{
137
{
133
    char *r;
138
    char *r;
134
 
139
 
135
    for(r=strstr(p,w);r!=NULL &&
140
    for(r=strstr(p,w);r!=NULL &&
136
    ( (r>p && !isspace(*(r-1))) || (!isspace(*(r+strlen(w))) && *(r+strlen(w))!=0) );
141
    ( (r>p && !isspace(*(r-1))) || (!isspace(*(r+strlen(w))) && *(r+strlen(w))!=0) );
Line 139... Line 144...
139
}
144
}
140
 
145
 
141
/*  find a variable in a string (math expression).
146
/*  find a variable in a string (math expression).
142
 * Returns the pointer or NULL.
147
 * Returns the pointer or NULL.
143
 */
148
 */
144
char *varchr(char *p, char *v)
149
/*char *varchr(char *p, char *v)
145
{
150
{
146
    char *pp; int n=strlen(v);
151
    char *pp; int n=strlen(v);
147
    for(pp=strstr(p,v); pp!=NULL; pp=strstr(pp+1,v)) {
152
    for(pp=strstr(p,v); pp!=NULL; pp=strstr(pp+1,v)) {
148
    if((pp==p || !isalnum(*(pp-1))) &&
153
    if((pp==p || !isalnum(*(pp-1))) &&
149
       (!isalnum(*(pp+n)) || *(pp+n)==0)) break;
154
       (!isalnum(*(pp+n)) || *(pp+n)==0)) break;
150
    }
155
    }
151
    return pp;
156
    return pp;
152
}
157
}
153
 
158
*/
154
/*  strip trailing spaces; return string end. */
159
/*  strip trailing spaces; return string end. */
155
char *strip_trailing_spaces(char *p)
160
char *strip_trailing_spaces2(char *p)
156
{
161
{
157
    char *pp;
162
    char *pp;
158
    if(*p==0) return p;
163
    if(*p==0) return p;
159
    for(pp=p+strlen(p)-1; pp>=p && isspace(*pp); *(pp--)=0);
164
    for(pp=p+strlen(p)-1; pp>=p && isspace(*pp); *(pp--)=0);
160
    return pp;
165
    return pp;
Line 165... Line 170...
165
    char *pp;
170
    char *pp;
166
    pp=p; if(*pp=='<') pp++;
171
    pp=p; if(*pp=='<') pp++;
167
    for(; *pp && *pp!='>'; pp++) {
172
    for(; *pp && *pp!='>'; pp++) {
168
    if(*pp=='<') {
173
    if(*pp=='<') {
169
        pp=find_tag_end(pp)-1; continue;
174
        pp=find_tag_end(pp)-1; continue;
170
    }
175
    }
171
    if(*pp=='"') {
176
    if(*pp=='"') {
172
        pp=strchr(pp+1,'"');
177
        pp=strchr(pp+1,'"');
173
        if(pp==NULL) return p+strlen(p); else continue;
178
        if(pp==NULL) return p+strlen(p); else continue;
174
    }
179
    }
175
    if(*pp=='\'') {
180
    if(*pp=='\'') {
176
        pp=strchr(pp+1,'\'');
181
        pp=strchr(pp+1,'\'');
177
        if(pp==NULL) return p+strlen(p); else continue;
182
        if(pp==NULL) return p+strlen(p); else continue;
178
    }
183
    }
179
    }
184
    }
180
    if(*pp=='>') pp++; return pp;
185
    if(*pp=='>') pp++; return pp;
181
}
186
}
182
 
187
 
183
char *find_tag(char *p, char *tag)
188
char *find_tag(char *p, char *tag)
184
{
189
{
185
    char *pp;
190
    char *pp;
186
    int len;
191
    int len;
187
    len=strlen(tag);
192
    len=strlen(tag);
188
    for(pp=strchr(p,'<'); pp!=NULL && *pp; pp=strchr(pp+1,'<')) {
193
    for(pp=strchr(p,'<'); pp!=NULL && *pp; pp=strchr(pp+1,'<')) {
189
    if(strncasecmp(pp+1,tag,len)==0 && !isalnum(*(pp+1+len))) return pp;
194
    if(strncasecmp(pp+1,tag,len)==0 && !isalnum(*(pp+1+len))) return pp;
Line 201... Line 206...
201
    ovlstrcpy(pp,p2);
206
    ovlstrcpy(pp,p2);
202
    }
207
    }
203
}
208
}
204
 
209
 
205
/*  modify a string. Bufferlen must be at least MAX_LINELEN */
210
/*  modify a string. Bufferlen must be at least MAX_LINELEN */
206
void string_modify(char *start, char *bad_beg, char *bad_end, char *good,...)
211
void string_modify3(char *start, char *bad_beg, char *bad_end, char *good,...)
207
{
212
{
208
    char buf[MAX_LINELEN+1];
213
    char buf[MAX_LINELEN+1];
209
    va_list vp;
214
    va_list vp;
210
 
215
 
211
    va_start(vp,good);
216
    va_start(vp,good);
212
    vsnprintf(buf,sizeof(buf),good,vp); va_end(vp);
217
    vsnprintf(buf,sizeof(buf),good,vp); va_end(vp);
213
    if(strlen(start)-(bad_end-bad_beg)+strlen(buf)>=MAX_LINELEN)
218
    if(strlen(start)-(bad_end-bad_beg)+strlen(buf)>=MAX_LINELEN)
214
      return;
219
      return; /* this is an error situation. */
215
    strcat(buf,bad_end);
220
    strcat(buf,bad_end);
216
    ovlstrcpy(bad_beg,buf);
221
    ovlstrcpy(bad_beg,buf);
217
}
222
}
218
 
223
 
219
/* add a space after comma to see end of words */
224
/* add a space after comma to see end of words */
220
 
225
 
221
void comma(char *p)
226
void comma(char *p)
222
{
227
{
223
    char *pp;
228
    char *pp;
224
    for(pp=strchr(p,','); pp; pp=strchr(pp+1,','))
229
    for(pp=strchr(p,','); pp; pp=strchr(pp+1,','))
225
      string_modify(p,pp,pp+1,", ");
230
      string_modify3(p,pp,pp+1,", ");
226
}
231
}
227
 
232
 
228
void _getdef(char buf[], char *name, char value[])
233
void _getdef(char buf[], char *name, char value[])
229
{
234
{
230
    char *p1, *p2, *p3;
235
    char *p1, *p2, *p3;
Line 238... Line 243...
238
    p3=strchr(p2,'\n');
243
    p3=strchr(p2,'\n');
239
    p2=find_word_start(p2+1);
244
    p2=find_word_start(p2+1);
240
    if(p3 <= p2) continue;
245
    if(p3 <= p2) continue;
241
    snprintf(value,MAX_LINELEN,"%s",p2);
246
    snprintf(value,MAX_LINELEN,"%s",p2);
242
    if(p3!=NULL && p3-p2<MAX_LINELEN) value[p3-p2]=0;
247
    if(p3!=NULL && p3-p2<MAX_LINELEN) value[p3-p2]=0;
243
    strip_trailing_spaces(value);
248
    strip_trailing_spaces2(value);
244
    break;
249
    break;
245
    }
250
    }
246
}
251
}
247
 
252
 
248
/*  Get variable definition from a file.
253
/*  Get variable definition from a file.
Line 466... Line 471...
466
    for(i=0,p1=find_word_start(ibuf);
471
    for(i=0,p1=find_word_start(ibuf);
467
    i<SHEETINDEX_NO-1 && *p1!=':' && *p1!=0;
472
    i<SHEETINDEX_NO-1 && *p1!=':' && *p1!=0;
468
    i++,p1=p2) {
473
    i++,p1=p2) {
469
    p2=strchr(p1,'\n');
474
    p2=strchr(p1,'\n');
470
    if(p2!=NULL) *p2++=0; else p2=p1+strlen(p1);
475
    if(p2!=NULL) *p2++=0; else p2=p1+strlen(p1);
471
    p1=find_word_start(p1); strip_trailing_spaces(p1);
476
    p1=find_word_start(p1); strip_trailing_spaces2(p1);
472
    snprintf(sindbuf[i],MAX_LINELEN,"%s",p1);
477
    snprintf(sindbuf[i],MAX_LINELEN,"%s",p1);
473
    }
478
    }
474
    p2=strstr(p1,"\n:"); if(p2==NULL) p2=p1+strlen(p1);
479
    p2=strstr(p1,"\n:"); if(p2==NULL) p2=p1+strlen(p1);
475
    else *p2=0;
480
    else *p2=0;
476
    p1=find_word_start(p1); strip_trailing_spaces(p1);
481
    p1=find_word_start(p1); strip_trailing_spaces2(p1);
477
    for(p2=p1;*p2;p2++) if(*p2=='\n') *p2=' ';
482
    for(p2=p1;*p2;p2++) if(*p2=='\n') *p2=' ';
478
    ovlstrcpy(sindbuf[s_information],p1);
483
    ovlstrcpy(sindbuf[s_information],p1);
479
    return 0;
484
    return 0;
480
}
485
}
481
 
486
 
Line 489... Line 494...
489
    int i, ll;
494
    int i, ll;
490
    char *p;
495
    char *p;
491
    FILE *f;
496
    FILE *f;
492
 
497
 
493
    if(!isalnum(*word) || (ll=strlen(word))<2 ||
498
    if(!isalnum(*word) || (ll=strlen(word))<2 ||
494
       wordchr(taken,word)!=NULL ||
499
       wordchr2(taken,word)!=NULL ||
495
       wordchr(ignore[lind],word)!=NULL ||
500
       wordchr2(ignore[lind],word)!=NULL ||
496
       takenlen>=MAX_LINELEN-ll-16)
501
       takenlen>=MAX_LINELEN-ll-16)
497
      return;
502
      return;
498
    if(ll==2 && (!isdigit(word[0]) || !isalpha(word[1]))) return;
503
    if(ll==2 && (!isdigit(word[0]) || !isalpha(word[1]))) return;
499
    for(p=word;*p;p++) if(!isalnum(*p) && *p!=' ') return;
504
    for(p=word;*p;p++) if(!isalnum(*p) && *p!=' ') return;
500
    taken[takenlen++]=' '; taken[takenlen++]=' ';
505
    taken[takenlen++]=' '; taken[takenlen++]=' ';
Line 546... Line 551...
546
    towords(indbuf[i_category]);
551
    towords(indbuf[i_category]);
547
/*   list the categories (among A=all,X=eXercise,O,D,...) corresponding
552
/*   list the categories (among A=all,X=eXercise,O,D,...) corresponding
548
 *   to this module
553
 *   to this module
549
 */
554
 */
550
    for(i=catcnt=0;i<catno && catcnt<16;i++) {
555
    for(i=catcnt=0;i<catno && catcnt<16;i++) {
551
    if(wordchr(indbuf[i_category],cat[i].name)!=NULL)
556
    if(wordchr2(indbuf[i_category],cat[i].name)!=NULL)
552
      categories[catcnt++]=cat[i].typ;
557
      categories[catcnt++]=cat[i].typ;
553
    }
558
    }
554
    if(catcnt==0) return;
559
    if(catcnt==0) return;
555
    if(categories[0]!=cat[0].typ)
560
    if(categories[0]!=cat[0].typ)
556
      categories[catcnt++]=cat[0].typ;
561
      categories[catcnt++]=cat[0].typ;
Line 571... Line 576...
571
    fprintf(versionf,"%d:%s\n",serial,indbuf[i_version]);
576
    fprintf(versionf,"%d:%s\n",serial,indbuf[i_version]);
572
 
577
 
573
/*   add module's information in html page for robots  */
578
/*   add module's information in html page for robots  */
574
    snprintf(buf,sizeof(buf),"%s",indbuf[i_description]);
579
    snprintf(buf,sizeof(buf),"%s",indbuf[i_description]);
575
    for(pp=strchr(buf,','); pp; pp=strchr(pp,','))
580
    for(pp=strchr(buf,','); pp; pp=strchr(pp,','))
576
      string_modify(buf,pp,pp+1,"&#44;");
581
      string_modify3(buf,pp,pp+1,"&#44;");
577
    if(strcmp(module_language,lang[lind])==0)
582
    if(strcmp(module_language,lang[lind])==0)
578
      fprintf(robotf,"%s ,%s,%s,%s,%s\n",name,module_language,name,
583
      fprintf(robotf,"%s ,%s,%s,%s,%s\n",name,module_language,name,
579
          indbuf[i_title], buf);
584
          indbuf[i_title], buf);
580
 
585
 
581
/*   Normalize the information of trlist, using dictionary
586
/*   Normalize the information of trlist, using dictionary
Line 584... Line 589...
584
    entrycount=dentrycount; dicbuf=ddicbuf;
589
    entrycount=dentrycount; dicbuf=ddicbuf;
585
    memmove(entry,dentry,dentrycount*sizeof(entry[0]));
590
    memmove(entry,dentry,dentrycount*sizeof(entry[0]));
586
    unknown_type=unk_leave;
591
    unknown_type=unk_leave;
587
    for(i=0;i<trcnt;i++) {
592
    for(i=0;i<trcnt;i++) {
588
    detag(indbuf[trlist[i]]);
593
    detag(indbuf[trlist[i]]);
589
    deaccent(indbuf[trlist[i]]);
594
    deaccent2(indbuf[trlist[i]]);
590
    comma(indbuf[trlist[i]]);
595
    comma(indbuf[trlist[i]]);
591
    singlespace(indbuf[trlist[i]]);
596
    singlespace2(indbuf[trlist[i]]);
592
    translate(indbuf[trlist[i]]);
597
    translate(indbuf[trlist[i]]);
593
    }
598
    }
594
/*   Normalize the information, using dictionary
599
/*   Normalize the information, using dictionary
595
 *   bases/sys/words.xx with suffix translation
600
 *   bases/sys/words.xx with suffix translation
596
 */
601
 */
Line 718... Line 723...
718
{
723
{
719
    int ll;
724
    int ll;
720
    char *p;
725
    char *p;
721
 
726
 
722
    if(!isalnum(*word) || (ll=strlen(word))<2 ||
727
    if(!isalnum(*word) || (ll=strlen(word))<2 ||
723
       wordchr(taken,word)!=NULL ||
728
       wordchr2(taken,word)!=NULL ||
724
       wordchr(ignore[lind],word)!=NULL ||
729
       wordchr2(ignore[lind],word)!=NULL ||
725
       takenlen>=MAX_LINELEN-ll-16)
730
       takenlen>=MAX_LINELEN-ll-16)
726
      return;
731
      return;
727
    if(ll==2 && (!isdigit(word[0]) || !isalpha(word[1]))) return;
732
    if(ll==2 && (!isdigit(word[0]) || !isalpha(word[1]))) return;
728
    for(p=word;*p;p++) if(!isalnum(*p) && *p!=' ') return;
733
    for(p=word;*p;p++) if(!isalnum(*p) && *p!=' ') return;
729
    taken[takenlen++]=' ';taken[takenlen++]=' ';
734
    taken[takenlen++]=' ';taken[takenlen++]=' ';
Line 750... Line 755...
750
    entrycount=dentrycount; dicbuf=ddicbuf;
755
    entrycount=dentrycount; dicbuf=ddicbuf;
751
    memmove(entry,dentry,dentrycount*sizeof(entry[0]));
756
    memmove(entry,dentry,dentrycount*sizeof(entry[0]));
752
    unknown_type=unk_leave;
757
    unknown_type=unk_leave;
753
    for(i=0;i<trcnt;i++) {
758
    for(i=0;i<trcnt;i++) {
754
    detag(sindbuf[trlist[i]]);
759
    detag(sindbuf[trlist[i]]);
755
    deaccent(sindbuf[trlist[i]]);
760
    deaccent2(sindbuf[trlist[i]]);
756
    comma(sindbuf[trlist[i]]);
761
    comma(sindbuf[trlist[i]]);
757
    singlespace(sindbuf[trlist[i]]);
762
    singlespace2(sindbuf[trlist[i]]);
758
    translate(sindbuf[trlist[i]]);
763
    translate(sindbuf[trlist[i]]);
759
    }
764
    }
760
 
765
 
761
    entrycount=mentrycount; dicbuf=mdicbuf;
766
    entrycount=mentrycount; dicbuf=mdicbuf;
762
    memmove(entry,mentry,mentrycount*sizeof(entry[0]));
767
    memmove(entry,mentry,mentrycount*sizeof(entry[0]));