Rev 6881 | Rev 6961 | Go to most recent revision | Show entire file | Ignore whitespace | Details | Blame | Last modification | View Log | RSS feed
Rev 6881 | Rev 6884 | ||
---|---|---|---|
Line 13... | Line 13... | ||
13 | * You should have received a copy of the GNU General Public License |
13 | * You should have received a copy of the GNU General Public License |
14 | * along with this program; if not, write to the Free Software |
14 | * along with this program; if not, write to the Free Software |
15 | * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. |
15 | * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. |
16 | */ |
16 | */ |
17 | 17 | ||
18 |
|
18 | /* This is an internal program, |
19 | |
19 | * used to index modules for search engine. |
- | 20 | */ |
|
20 | 21 | ||
21 | #include "../wims.h" |
22 | #include "../wims.h" |
22 | #include "../Lib/basicstr.c" |
23 | #include "../Lib/basicstr.c" |
23 | 24 | ||
24 | #define MAX_LANGS |
25 | #define MAX_LANGS MAX_LANGUAGES |
25 | #define MAX_MODULES |
26 | #define MAX_MODULES 65536 |
26 | char *moduledir= |
27 | char *moduledir= "public_html/modules"; |
27 | char *sheetdir= |
28 | char *sheetdir= "public_html/bases/sheet"; |
28 | char *dicdir= |
29 | char *dicdir= "public_html/bases"; |
29 | char *outdir= |
30 | char *outdir= "public_html/bases/site2"; |
30 | char *maindic= |
31 | char *maindic= "sys/words"; |
31 | char *groupdic= |
32 | char *groupdic= "sys/wgrp/wgrp"; |
32 | char *suffixdic= |
33 | char *suffixdic= "sys/suffix"; |
33 | char *domaindic= |
34 | char *domaindic= "sys/domaindic"; |
34 | char *ignoredic= |
35 | char *ignoredic= "sys/indignore"; |
35 | char *conffile= |
36 | char *conffile= "log/wims.conf"; |
36 | char *mlistbase= |
37 | char *mlistbase= "list"; |
37 | 38 | ||
38 | char lang[MAX_LANGS][4]={ |
39 | char lang[MAX_LANGS][4]={ |
39 | "en","fr","cn","es","it","nl","si","ca","pt" |
40 | "en","fr","cn","es","it","nl","si","ca","pt" |
40 | }; |
41 | }; |
41 | #define DEFAULT_LANGCNT 6 |
42 | #define DEFAULT_LANGCNT 6 |
42 | char allang[MAX_LANGS][4]={ |
43 | char allang[MAX_LANGS][4]={ |
43 | "en","fr","cn","es","it","nl","de","si","ca","pt" |
44 | "en","fr","cn","es","it","nl","de","si","ca","pt" |
44 | }; |
45 | }; |
45 | #define allangcnt 8 |
46 | #define allangcnt 8 |
46 | char ignore[MAX_LANGS][MAX_LINELEN+1]; |
47 | char ignore[MAX_LANGS][MAX_LINELEN+1]; |
Line 50... | Line 51... | ||
50 | 51 | ||
51 | struct cat { |
52 | struct cat { |
52 | char *name; |
53 | char *name; |
53 | char typ; |
54 | char typ; |
54 | } cat[]={ |
55 | } cat[]={ |
55 |
|
56 | {"all_types", 'A'}, |
56 |
|
57 | {"exercise", 'X'}, |
57 |
|
58 | {"oef", 'O'}, |
58 |
|
59 | {"tool", 'T'}, |
59 |
|
60 | {"recreation",'R'}, |
60 |
|
61 | {"reference", 'Y'}, |
61 |
|
62 | {"document", 'D'}, |
62 |
|
63 | {"popup", 'P'}, |
63 |
|
64 | {"datamodule",'M'} |
64 | }; |
65 | }; |
65 | #define catno (sizeof(cat)/sizeof(cat[0])) |
66 | #define catno (sizeof(cat)/sizeof(cat[0])) |
66 | 67 | ||
67 | struct mod { |
68 | struct mod { |
68 | char *name; |
69 | char *name; |
Line 77... | Line 78... | ||
77 | void *xmalloc(size_t n) |
78 | void *xmalloc(size_t n) |
78 | { |
79 | { |
79 | void *p; |
80 | void *p; |
80 | p=malloc(n); |
81 | p=malloc(n); |
81 | if(p==NULL) { |
82 | if(p==NULL) { |
82 |
|
83 | printf("Malloc failure.\n"); |
83 |
|
84 | exit(1); |
84 | } |
85 | } |
85 | return p; |
86 | return p; |
86 | } |
87 | } |
87 | 88 | ||
88 | char *acctab="çéèêëúùûüáàâäãóòôöõíìïîñýÇÉÈÊËÚÙÛÜÁÀÂÃÄÓÒÔÖÕÍÌÏÎÑÝ", |
89 | char *acctab="çéèêëúùûüáàâäãóòôöõíìïîñýÇÉÈÊËÚÙÛÜÁÀÂÃÄÓÒÔÖÕÍÌÏÎÑÝ", |
89 | *deatab="ceeeeuuuuaaaaaoooooiiiinyCEEEEUUUUAAAAAOOOOOIIIINY"; |
90 | *deatab="ceeeeuuuuaaaaaoooooiiiinyCEEEEUUUUAAAAAOOOOOIIIINY"; |
90 | 91 | ||
91 |
|
92 | /* fold known accented letters to unaccented, other strange characters to space |
- | 93 | * apostrophe is among the exceptions to be kept (important for multi-word expressions) |
|
- | 94 | */ |
|
92 | void deaccent(char *p) |
95 | void deaccent(char *p) |
93 | { |
96 | { |
94 | char *sp; |
97 | char *sp; |
95 | char *v; |
98 | char *v; |
96 | for(sp=p;*sp;sp++) { |
99 | for(sp=p;*sp;sp++) { |
97 |
|
100 | if(*sp<0 && (v=strchr(acctab,*sp))!=NULL) |
98 |
|
101 | *sp=*(deatab+(v-acctab)); |
99 |
|
102 | if(!isalnum(*sp) && strchr(",.&$+*",*sp)==0) *sp=' '; |
100 |
|
103 | else *sp=tolower(*sp); |
101 | } |
104 | } |
102 | } |
105 | } |
103 | 106 | ||
104 |
|
107 | /* translate everything non-alphanumeric into space */ |
105 | void towords(char *p) |
108 | void towords(char *p) |
106 | { |
109 | { |
107 | char *pp; |
110 | char *pp; |
108 | for(pp=p;*pp;pp++) if(!isalnum(*pp) && strchr("&$+*",*pp)==0) *pp=' '; |
111 | for(pp=p;*pp;pp++) if(!isalnum(*pp) && strchr("&$+*",*pp)==0) *pp=' '; |
109 | } |
112 | } |
110 | 113 | ||
111 |
|
114 | /* Points to the end of the word */ |
112 | char *find_word_end(char *p) |
115 | char *find_word_end(char *p) |
113 | { |
116 | { |
114 | int i; |
117 | int i; |
115 | for(i=0;!isspace(*p) && *p!=0 && i<MAX_LINELEN; p++,i++); |
118 | for(i=0;!isspace(*p) && *p!=0 && i<MAX_LINELEN; p++,i++); |
116 | return p; |
119 | return p; |
117 | } |
120 | } |
118 | 121 | ||
119 |
|
122 | /* Strips leading spaces */ |
120 | char *find_word_start(char *p) |
123 | char *find_word_start(char *p) |
121 | { |
124 | { |
122 | int i; |
125 | int i; |
123 | for(i=0; isspace(*p) && i<MAX_LINELEN; p++,i++); |
126 | for(i=0; isspace(*p) && i<MAX_LINELEN; p++,i++); |
124 | return p; |
127 | return p; |
125 | } |
128 | } |
126 | 129 | ||
127 |
|
130 | /* Find first occurrence of word */ |
128 | char *wordchr(char *p, char *w) |
131 | char *wordchr(char *p, char *w) |
129 | { |
132 | { |
130 | char *r; |
133 | char *r; |
131 | 134 | ||
132 | for(r=strstr(p,w);r!=NULL && |
135 | for(r=strstr(p,w);r!=NULL && |
133 |
|
136 | ( (r>p && !isspace(*(r-1))) || (!isspace(*(r+strlen(w))) && *(r+strlen(w))!=0) ); |
134 |
|
137 | r=strstr(r+1,w)); |
135 | return r; |
138 | return r; |
136 | } |
139 | } |
137 | 140 | ||
138 |
|
141 | /* find a variable in a string (math expression). |
139 | |
142 | * Returns the pointer or NULL. |
- | 143 | */ |
|
140 | char *varchr(char *p, char *v) |
144 | char *varchr(char *p, char *v) |
141 | { |
145 | { |
142 | char *pp; int n=strlen(v); |
146 | char *pp; int n=strlen(v); |
143 | for(pp=strstr(p,v); pp!=NULL; pp=strstr(pp+1,v)) { |
147 | for(pp=strstr(p,v); pp!=NULL; pp=strstr(pp+1,v)) { |
144 |
|
148 | if((pp==p || !isalnum(*(pp-1))) && |
145 |
|
149 | (!isalnum(*(pp+n)) || *(pp+n)==0)) break; |
146 | } |
150 | } |
147 | return pp; |
151 | return pp; |
148 | } |
152 | } |
149 | 153 | ||
150 |
|
154 | /* strip trailing spaces; return string end. */ |
151 | char *strip_trailing_spaces(char *p) |
155 | char *strip_trailing_spaces(char *p) |
152 | { |
156 | { |
153 | char *pp; |
157 | char *pp; |
154 | if(*p==0) return p; |
158 | if(*p==0) return p; |
155 | for(pp=p+strlen(p)-1; pp>=p && isspace(*pp); *(pp--)=0); |
159 | for(pp=p+strlen(p)-1; pp>=p && isspace(*pp); *(pp--)=0); |
Line 159... | Line 163... | ||
159 | char *find_tag_end(char *p) |
163 | char *find_tag_end(char *p) |
160 | { |
164 | { |
161 | char *pp; |
165 | char *pp; |
162 | pp=p; if(*pp=='<') pp++; |
166 | pp=p; if(*pp=='<') pp++; |
163 | for(; *pp && *pp!='>'; pp++) { |
167 | for(; *pp && *pp!='>'; pp++) { |
164 |
|
168 | if(*pp=='<') { |
165 |
|
169 | pp=find_tag_end(pp)-1; continue; |
166 |
|
170 | } |
167 |
|
171 | if(*pp=='"') { |
168 |
|
172 | pp=strchr(pp+1,'"'); |
169 |
|
173 | if(pp==NULL) return p+strlen(p); else continue; |
170 |
|
174 | } |
171 |
|
175 | if(*pp=='\'') { |
172 |
|
176 | pp=strchr(pp+1,'\''); |
173 |
|
177 | if(pp==NULL) return p+strlen(p); else continue; |
174 |
|
178 | } |
175 | } |
179 | } |
176 | if(*pp=='>') pp++; return pp; |
180 | if(*pp=='>') pp++; return pp; |
177 | } |
181 | } |
178 | 182 | ||
179 | char *find_tag(char *p, char *tag) |
183 | char *find_tag(char *p, char *tag) |
180 | { |
184 | { |
181 | char *pp; |
185 | char *pp; |
182 | int len; |
186 | int len; |
183 | len=strlen(tag); |
187 | len=strlen(tag); |
184 | for(pp=strchr(p,'<'); pp!=NULL && *pp; pp=strchr(pp+1,'<')) { |
188 | for(pp=strchr(p,'<'); pp!=NULL && *pp; pp=strchr(pp+1,'<')) { |
185 |
|
189 | if(strncasecmp(pp+1,tag,len)==0 && !isalnum(*(pp+1+len))) return pp; |
186 | } |
190 | } |
187 | return p+strlen(p); |
191 | return p+strlen(p); |
188 | } |
192 | } |
189 | 193 | ||
190 |
|
194 | /* remove all html tags */ |
191 | void detag(char *p) |
195 | void detag(char *p) |
192 | { |
196 | { |
193 | char *pp, *p2; |
197 | char *pp, *p2; |
194 | for(pp=strchr(p,'<'); pp!=NULL; pp=strchr(pp,'<')) { |
198 | for(pp=strchr(p,'<'); pp!=NULL; pp=strchr(pp,'<')) { |
195 |
|
199 | p2=find_tag_end(pp); |
196 |
|
200 | if(*p2==0) {*pp=0; return; } |
197 |
|
201 | ovlstrcpy(pp,p2); |
198 | } |
202 | } |
199 | } |
203 | } |
200 | 204 | ||
201 |
|
205 | /* modify a string. Bufferlen must be at least MAX_LINELEN */ |
202 | void string_modify(char *start, char *bad_beg, char *bad_end, char *good,...) |
206 | void string_modify(char *start, char *bad_beg, char *bad_end, char *good,...) |
203 | { |
207 | { |
204 | char buf[MAX_LINELEN+1]; |
208 | char buf[MAX_LINELEN+1]; |
205 | va_list vp; |
209 | va_list vp; |
206 | 210 | ||
Line 217... | Line 221... | ||
217 | void comma(char *p) |
221 | void comma(char *p) |
218 | { |
222 | { |
219 | char *pp; |
223 | char *pp; |
220 | for(pp=strchr(p,','); pp; pp=strchr(pp+1,',')) |
224 | for(pp=strchr(p,','); pp; pp=strchr(pp+1,',')) |
221 | string_modify(p,pp,pp+1,", "); |
225 | string_modify(p,pp,pp+1,", "); |
222 | } |
226 | } |
223 | - | ||
224 | 227 | ||
225 | void _getdef(char buf[], char *name, char value[]) |
228 | void _getdef(char buf[], char *name, char value[]) |
226 | { |
229 | { |
227 | char *p1, *p2, *p3; |
230 | char *p1, *p2, *p3; |
228 | 231 | ||
229 | value[0]=0; |
232 | value[0]=0; |
230 | for(p1=strstr(buf,name); p1!=NULL; p1=strstr(p1+1,name)) { |
233 | for(p1=strstr(buf,name); p1!=NULL; p1=strstr(p1+1,name)) { |
231 |
|
234 | p2=find_word_start(p1+strlen(name)); |
232 |
|
235 | if((p1>buf && !isspace(*(p1-1))) || *p2!='=') continue; |
233 |
|
236 | p3=p1; while(p3>buf && isspace(*(p3-1)) && *(p3-1)!='\n') p3--; |
234 |
|
237 | if(p3>buf && *(p3-1)!='\n') continue; |
235 |
|
238 | p3=strchr(p2,'\n'); |
236 |
|
239 | p2=find_word_start(p2+1); |
237 |
|
240 | if(p3 <= p2) continue; |
238 |
|
241 | snprintf(value,MAX_LINELEN,"%s",p2); |
239 |
|
242 | if(p3!=NULL && p3-p2<MAX_LINELEN) value[p3-p2]=0; |
240 |
|
243 | strip_trailing_spaces(value); |
241 |
|
244 | break; |
242 | } |
245 | } |
243 | } |
246 | } |
244 | 247 | ||
245 |
|
248 | /* Get variable definition from a file. |
246 |
|
249 | * Result stored in buffer value of length MAX_LINELEN. |
- | 250 | */ |
|
247 | void getdef(char *fname, char *name, char value[]) |
251 | void getdef(char *fname, char *name, char value[]) |
248 | { |
252 | { |
249 | FILE *f; |
253 | FILE *f; |
250 | char *buf; |
254 | char *buf; |
251 | int l; |
255 | int l; |
252 | 256 | ||
253 | value[0]=0; |
257 | value[0]=0; |
254 | f=fopen(fname,"r"); if(f==NULL) return; |
258 | f=fopen(fname,"r"); if(f==NULL) return; |
255 | fseek(f,0,SEEK_END); l=ftell(f); fseek(f,0,SEEK_SET); |
259 | fseek(f,0,SEEK_END); l=ftell(f); fseek(f,0,SEEK_SET); |
256 | buf=xmalloc(l+256); l=fread(buf,1,l,f); |
260 | buf=xmalloc(l+256); l=fread(buf,1,l,f); |
257 | fclose(f); |
261 | fclose(f); |
Line 259... | Line 263... | ||
259 | _getdef(buf,name,value); |
263 | _getdef(buf,name,value); |
260 | free(buf); |
264 | free(buf); |
261 | } |
265 | } |
262 | 266 | ||
263 | #include "translator_.c" |
267 | #include "translator_.c" |
264 | 268 | ||
265 | char *mdicbuf, *gdicbuf, *ddicbuf; |
269 | char *mdicbuf, *gdicbuf, *ddicbuf; |
266 | char gentry[sizeof(entry)], mentry[sizeof(entry)], dentry[sizeof(entry)]; |
270 | char gentry[sizeof(entry)], mentry[sizeof(entry)], dentry[sizeof(entry)]; |
267 | int gentrycount, mentrycount, dentrycount; |
271 | int gentrycount, mentrycount, dentrycount; |
268 | 272 | ||
269 |
|
273 | /* Preparation of data */ |
270 | void prep(void) |
274 | void prep(void) |
271 | { |
275 | { |
272 | char buf[MAX_LINELEN+1]; |
276 | char buf[MAX_LINELEN+1]; |
273 | char *p1,*p2,*s,*old; |
277 | char *p1,*p2,*s,*old; |
274 | int i,l,thislang,t; |
278 | int i,l,thislang,t; |
Line 279... | Line 283... | ||
279 | snprintf(buf,sizeof(buf),"%s/addr",outdir); |
283 | snprintf(buf,sizeof(buf),"%s/addr",outdir); |
280 | addrf=fopen(buf,"w"); |
284 | addrf=fopen(buf,"w"); |
281 | snprintf(buf,sizeof(buf),"%s/serial",outdir); |
285 | snprintf(buf,sizeof(buf),"%s/serial",outdir); |
282 | serialf=fopen(buf,"w"); |
286 | serialf=fopen(buf,"w"); |
283 | modcnt=langcnt=0; |
287 | modcnt=langcnt=0; |
- | 288 | /* take the langs declared in conffile */ |
|
284 | getdef(conffile,"site_languages",buf); |
289 | getdef(conffile,"site_languages",buf); |
285 | for(p1=buf;*p1;p1++) if(!isalnum(*p1)) *p1=' '; |
290 | for(p1=buf;*p1;p1++) if(!isalnum(*p1)) *p1=' '; |
286 | for(p1=find_word_start(buf); *p1 && langcnt<MAX_LANGS; p1=find_word_start(p2)) { |
291 | for(p1=find_word_start(buf); *p1 && langcnt<MAX_LANGS; p1=find_word_start(p2)) { |
287 |
|
292 | p2=find_word_end(p1); |
288 |
|
293 | if(p2!=p1+2 || !isalpha(*p1) || !isalpha(*(p1+1))) continue; |
289 |
|
294 | memmove(lang[langcnt],p1,2); lang[langcnt++][2]=0; |
290 | } |
295 | } |
291 | if(langcnt==0) { |
296 | if(langcnt==0) {/* default languages */ |
292 |
|
297 | langcnt=DEFAULT_LANGCNT; |
293 | } |
298 | } |
294 | s=getenv("mlist"); if(s==NULL) exit(1); |
299 | s=getenv("mlist"); if(s==NULL) exit(1); |
295 | l=strlen(s); if(l<0 || l>100*MAX_LINELEN) exit(1); |
300 | l=strlen(s); if(l<0 || l>100*MAX_LINELEN) exit(1); |
296 | mlist=xmalloc(l+16); ovlstrcpy(mlist,s); old=""; |
301 | mlist=xmalloc(l+16); ovlstrcpy(mlist,s); old=""; |
297 | for(i=0;i<langcnt;i++) { |
302 | for(i=0;i<langcnt;i++) { |
298 |
|
303 | snprintf(buf,sizeof(buf),"%s/%s.%s",dicdir,ignoredic,lang[i]); |
299 |
|
304 | f=fopen(buf,"r"); if(f==NULL) continue; |
300 |
|
305 | l=fread(ignore[i],1,MAX_LINELEN,f);fclose(f); |
301 |
|
306 | if(l<0 || l>=MAX_LINELEN) l=0; |
302 |
|
307 | ignore[i][l]=0; |
303 | } |
308 | } |
304 | for(t=0, p1=find_word_start(mlist); |
309 | for(t=0, p1=find_word_start(mlist); |
305 |
|
310 | *p1 && modcnt<MAX_MODULES; |
306 |
|
311 | p1=find_word_start(p2), t++) { |
307 |
|
312 | p2=find_word_end(p1); |
308 |
|
313 | l=p2-p1; if(*p2) *p2++=0; |
309 |
|
314 | fprintf(addrf,"%d:%s\n",t,p1); |
310 |
|
315 | fprintf(serialf,"%s:%d\n",p1,t); |
311 |
|
316 | thislang=-1; |
312 | /* language is taken from the address */ |
317 | /* language is taken from the address */ |
313 |
|
318 | if(l>3 && p1[l-3]=='.') { |
314 |
|
319 | for(i=0;i<langcnt;i++) if(strcasecmp(lang[i],p1+l-2)==0) break; |
315 |
|
320 | if(i<langcnt) {p1[l-3]=0; thislang=i;} |
316 |
|
321 | else {/* unknown language, not referenced */ |
317 |
|
322 | continue; |
318 |
|
323 | } |
319 |
|
324 | } |
320 |
|
325 | if(modcnt>0 && strcmp(old,p1)==0 && thislang>=0) { |
321 |
|
326 | if(mod[modcnt-1].langcnt<langcnt) { |
322 |
|
327 | mod[modcnt-1].langs[mod[modcnt-1].langcnt]=thislang; |
323 |
|
328 | mod[modcnt-1].counts[mod[modcnt-1].langcnt]=t; |
324 |
|
329 | (mod[modcnt-1].langcnt)++; |
325 |
|
330 | } |
326 |
|
331 | } |
327 |
|
332 | else { |
328 |
|
333 | mod[modcnt].name=old=p1; |
329 |
|
334 | if(thislang>=0) { |
330 |
|
335 | mod[modcnt].langs[0]=thislang; |
331 |
|
336 | mod[modcnt].langcnt=1; |
332 |
|
337 | } |
333 |
|
338 | else mod[modcnt].langcnt=0; |
334 |
|
339 | mod[modcnt].counts[0]=t; |
335 |
|
340 | modcnt++; |
336 |
|
341 | } |
337 | } |
342 | } |
338 | snprintf(buf,sizeof(buf),"%s/language",outdir); |
343 | snprintf(buf,sizeof(buf),"%s/language",outdir); |
339 | langf=fopen(buf,"w"); |
344 | langf=fopen(buf,"w"); |
340 | snprintf(buf,sizeof(buf),"%s/title",outdir); |
345 | snprintf(buf,sizeof(buf),"%s/title",outdir); |
341 | titf=fopen(buf,"w"); |
346 | titf=fopen(buf,"w"); |
Line 347... | Line 352... | ||
347 | versionf=fopen(buf,"w"); |
352 | versionf=fopen(buf,"w"); |
348 | snprintf(buf,sizeof(buf),"%s/lists/robot.phtml",outdir); |
353 | snprintf(buf,sizeof(buf),"%s/lists/robot.phtml",outdir); |
349 | robotf=fopen(buf,"w"); |
354 | robotf=fopen(buf,"w"); |
350 | fclose(addrf); fclose(serialf); |
355 | fclose(addrf); fclose(serialf); |
351 | if(!robotf || !versionf || !authorf || !descf || !titf || !descf) { |
356 | if(!robotf || !versionf || !authorf || !descf || !titf || !descf) { |
352 |
|
357 | fprintf(stderr,"modind: error creating output files.\n"); |
353 |
|
358 | exit(1); |
354 | } |
359 | } |
355 | } |
360 | } |
356 | 361 | ||
357 | void sprep(void) |
362 | void sprep(void) |
358 | { |
363 | { |
Line 362... | Line 367... | ||
362 | modcnt=0; |
367 | modcnt=0; |
363 | s=getenv("slist"); if(s==NULL) return; |
368 | s=getenv("slist"); if(s==NULL) return; |
364 | l=strlen(s); if(l<0 || l>100*MAX_LINELEN) return; |
369 | l=strlen(s); if(l<0 || l>100*MAX_LINELEN) return; |
365 | mlist=xmalloc(l+16); ovlstrcpy(mlist,s); |
370 | mlist=xmalloc(l+16); ovlstrcpy(mlist,s); |
366 | for(p1=find_word_start(mlist); *p1 && modcnt<MAX_MODULES; p1=find_word_start(p2)) { |
371 | for(p1=find_word_start(mlist); *p1 && modcnt<MAX_MODULES; p1=find_word_start(p2)) { |
367 |
|
372 | p2=find_word_end(p1); |
368 |
|
373 | l=p2-p1; if(*p2) *p2++=0; |
369 |
|
374 | for(i=0;i<langcnt;i++) if(strncasecmp(lang[i],p1,2)==0) break; |
370 |
|
375 | if(i<langcnt) thislang=i; else continue; |
371 |
|
376 | mod[modcnt].name=p1; |
372 |
|
377 | mod[modcnt].langs[0]=thislang; |
373 |
|
378 | mod[modcnt].langcnt=1; |
374 |
|
379 | modcnt++; |
375 | } |
380 | } |
376 | } |
381 | } |
377 | 382 | ||
378 | void clean(void) |
383 | void clean(void) |
379 | { |
384 | { |
Line 420... | Line 425... | ||
420 | "intro","help","about" |
425 | "intro","help","about" |
421 | }; |
426 | }; |
422 | #define MODSPEC_NO (sizeof(module_special_file)/sizeof(module_special_file[0])) |
427 | #define MODSPEC_NO (sizeof(module_special_file)/sizeof(module_special_file[0])) |
423 | char module_language[4]; |
428 | char module_language[4]; |
424 | 429 | ||
425 |
|
430 | /* read and treat module's INDEX file */ |
426 | int module_index(const char *name) |
431 | int module_index(const char *name) |
427 | { |
432 | { |
428 | char *p, fbuf[MAX_LINELEN+1], ibuf[MAX_LINELEN+1]; |
433 | char *p, fbuf[MAX_LINELEN+1], ibuf[MAX_LINELEN+1]; |
429 | FILE *indf; |
434 | FILE *indf; |
430 | int i,l; |
435 | int i,l; |
431 | 436 | ||
432 | snprintf(fbuf,sizeof(fbuf),"%s/%s/INDEX",moduledir,name); |
437 | snprintf(fbuf,sizeof(fbuf),"%s/%s/INDEX",moduledir,name); |
433 | indf=fopen(fbuf,"r"); if(indf==NULL) return -1; |
438 | indf=fopen(fbuf,"r"); if(indf==NULL) return -1; |
434 | l=fread(ibuf,1,MAX_LINELEN,indf); fclose(indf); |
439 | l=fread(ibuf,1,MAX_LINELEN,indf); fclose(indf); |
435 | if(l>0 && l<MAX_LINELEN) ibuf[l]=0; else return -1; |
440 | if(l>0 && l<MAX_LINELEN) ibuf[l]=0; else return -1; |
- | 441 | /* treate all fields in *modindex */ |
|
436 | for(i=0;i<MODINDEX_NO;i++) { |
442 | for(i=0;i<MODINDEX_NO;i++) { |
437 |
|
443 | _getdef(ibuf,modindex[i],indbuf[i]); |
438 |
|
444 | /* compatibility precaution */ |
439 |
|
445 | if(indbuf[i][0]==':') indbuf[i][0]='.'; |
440 | } |
446 | } |
441 | p=find_word_start(indbuf[i_language]); |
447 | p=find_word_start(indbuf[i_language]); |
442 | if(isalpha(*p) && isalpha(*(p+1))) { |
448 | if(isalpha(*p) && isalpha(*(p+1))) { |
443 |
|
449 | memmove(module_language,p,2); module_language[2]=0; |
444 | } |
450 | } |
445 | else ovlstrcpy(module_language,"en"); |
451 | else ovlstrcpy(module_language,"en"); |
446 | return 0; |
452 | return 0; |
447 | } |
453 | } |
448 | 454 | ||
Line 456... | Line 462... | ||
456 | indf=fopen(fbuf,"r"); if(indf==NULL) return -1; |
462 | indf=fopen(fbuf,"r"); if(indf==NULL) return -1; |
457 | l=fread(ibuf,1,MAX_LINELEN,indf); fclose(indf); |
463 | l=fread(ibuf,1,MAX_LINELEN,indf); fclose(indf); |
458 | if(l>0 && l<MAX_LINELEN) ibuf[l]=0; else return -1; |
464 | if(l>0 && l<MAX_LINELEN) ibuf[l]=0; else return -1; |
459 | for(i=0;i<SHEETINDEX_NO;i++) sindbuf[i][0]=0; |
465 | for(i=0;i<SHEETINDEX_NO;i++) sindbuf[i][0]=0; |
460 | for(i=0,p1=find_word_start(ibuf); |
466 | for(i=0,p1=find_word_start(ibuf); |
461 |
|
467 | i<SHEETINDEX_NO-1 && *p1!=':' && *p1!=0; |
462 |
|
468 | i++,p1=p2) { |
463 |
|
469 | p2=strchr(p1,'\n'); |
464 |
|
470 | if(p2!=NULL) *p2++=0; else p2=p1+strlen(p1); |
465 |
|
471 | p1=find_word_start(p1); strip_trailing_spaces(p1); |
466 |
|
472 | snprintf(sindbuf[i],MAX_LINELEN,"%s",p1); |
467 | } |
473 | } |
468 | p2=strstr(p1,"\n:"); if(p2==NULL) p2=p1+strlen(p1); |
474 | p2=strstr(p1,"\n:"); if(p2==NULL) p2=p1+strlen(p1); |
469 | else *p2=0; |
475 | else *p2=0; |
470 | p1=find_word_start(p1); strip_trailing_spaces(p1); |
476 | p1=find_word_start(p1); strip_trailing_spaces(p1); |
471 | for(p2=p1;*p2;p2++) if(*p2=='\n') *p2=' '; |
477 | for(p2=p1;*p2;p2++) if(*p2=='\n') *p2=' '; |
Line 494... | Line 500... | ||
494 | taken[takenlen++]=' '; taken[takenlen++]=' '; |
500 | taken[takenlen++]=' '; taken[takenlen++]=' '; |
495 | ovlstrcpy(taken+takenlen,word); |
501 | ovlstrcpy(taken+takenlen,word); |
496 | takenlen+=ll; tweight+=weight; |
502 | takenlen+=ll; tweight+=weight; |
497 | snprintf(buf,sizeof(buf),"%s:%d?%d\n",word,serial,weight); |
503 | snprintf(buf,sizeof(buf),"%s:%d?%d\n",word,serial,weight); |
498 | for(i=0;i<catcnt;i++) { |
504 | for(i=0;i<catcnt;i++) { |
499 |
|
505 | snprintf(nbuf,sizeof(nbuf),"%s/%c.%s", |
500 |
|
506 | outdir,categories[i],lang[lind]); |
501 |
|
507 | f=fopen(nbuf,"a"); |
502 |
|
508 | if(f!=NULL) {fputs(buf,f); fclose(f);} |
503 | } |
509 | } |
504 | } |
510 | } |
505 | 511 | ||
506 | void appenditem1 (char *buf, int lind, int serial, int weight, char *l ) |
512 | void appenditem1 (char *buf, int lind, int serial, int weight, char *l ) |
507 | { |
513 | { |
508 | char *p1, *p2 ; |
514 | char *p1, *p2 ; |
509 | for(p1=find_word_start(buf); *p1; |
515 | for(p1=find_word_start(buf); *p1; |
510 |
|
516 | p1=find_word_start(p2)) { |
511 |
|
517 | p2=strchr(p1,','); |
512 |
|
518 | if(p2!=NULL) *p2++=0; else p2=p1+strlen(p1); |
513 |
|
519 | if(strlen(p1)<=0) continue; |
514 |
|
520 | appenditem(p1,lind,serial,weight,module_language); |
515 | } |
521 | } |
516 | } |
522 | } |
517 | void appenditem2 (char *buf, int lind, int serial, int weight, char *l ) |
523 | void appenditem2 (char *buf, int lind, int serial, int weight, char *l ) |
518 | { |
524 | { |
519 | char *p1, *p2 ; |
525 | char *p1, *p2 ; |
520 | for(p1=find_word_start(buf);*p1; |
526 | for(p1=find_word_start(buf);*p1; |
521 |
|
527 | p1=find_word_start(p2)) { |
522 |
|
528 | p2=find_word_end(p1); if(*p2) *p2++=0; |
523 |
|
529 | appenditem(p1,lind,serial,weight,module_language); |
524 | } |
530 | } |
525 | } |
531 | } |
526 | void onemodule(const char *name, int serial, int lind) |
532 | void onemodule(const char *name, int serial, int lind) |
527 | { |
533 | { |
528 | int i; |
534 | int i; |
529 | unsigned char trlist[]={ |
535 | unsigned char trlist[]={ |
530 |
|
536 | i_title,i_description,i_category,i_domain,i_keywords, |
531 |
|
537 | i_require,i_author, |
532 |
|
538 | i_keywords_ca,i_keywords_en,i_keywords_fr,i_keywords_it,i_keywords_nl, |
533 |
|
539 | i_title_ca,i_title_en,i_title_fr,i_title_it,i_title_nl |
534 | }; |
540 | }; |
535 | #define trcnt (sizeof(trlist)/sizeof(trlist[0])) |
541 | #define trcnt (sizeof(trlist)/sizeof(trlist[0])) |
536 | char *p1, *p2, *pp, *q, buf[MAX_LINELEN+1], lbuf[16]; |
542 | char *p1, *p2, *pp, *q, buf[MAX_LINELEN+1], lbuf[16]; |
537 | FILE *f; |
543 | FILE *f; |
538 | 544 | ||
539 | if(module_index(name)) return; |
545 | if(module_index(name)) return; |
540 | towords(indbuf[i_category]); |
546 | towords(indbuf[i_category]); |
541 |
|
547 | /* list the categories (among A=all,X=eXercise,O,D,...) corresponding |
- | 548 | * to this module |
|
- | 549 | */ |
|
542 | for(i=catcnt=0;i<catno && catcnt<16;i++) { |
550 | for(i=catcnt=0;i<catno && catcnt<16;i++) { |
543 |
|
551 | if(wordchr(indbuf[i_category],cat[i].name)!=NULL) |
544 |
|
552 | categories[catcnt++]=cat[i].typ; |
545 | } |
553 | } |
546 | if(catcnt==0) return; |
554 | if(catcnt==0) return; |
547 | if(categories[0]!=cat[0].typ) |
555 | if(categories[0]!=cat[0].typ) |
548 | categories[catcnt++]=cat[0].typ; |
556 | categories[catcnt++]=cat[0].typ; |
549 |
|
557 | /* write module's name in the category.language files, for instance lists/X.fr |
- | 558 | * for french exercises |
|
- | 559 | */ |
|
550 | for(i=0;i<catcnt;i++) { |
560 | for(i=0;i<catcnt;i++) { |
551 |
|
561 | snprintf(buf,sizeof(buf),"%s/lists/%c.%s", |
552 |
|
562 | outdir,categories[i],lang[lind]); |
553 |
|
563 | f=fopen(buf,"a"); |
554 |
|
564 | if(f!=NULL) {fprintf(f,"%s\n",name); fclose(f);} |
555 | } |
565 | } |
556 |
|
566 | /* add serial number and language (resp.title, ...) to corresponding file */ |
557 | fprintf(langf,"%d:%s\n",serial,module_language); |
567 | fprintf(langf,"%d:%s\n",serial,module_language); |
558 | fprintf(titf,"%d:%s\n",serial,indbuf[i_title]); |
568 | fprintf(titf,"%d:%s\n",serial,indbuf[i_title]); |
559 | fprintf(descf,"%d:%s\n",serial,indbuf[i_description]); |
569 | fprintf(descf,"%d:%s\n",serial,indbuf[i_description]); |
560 | fprintf(authorf,"%d:%s\n",serial,indbuf[i_author]); |
570 | fprintf(authorf,"%d:%s\n",serial,indbuf[i_author]); |
561 | fprintf(versionf,"%d:%s\n",serial,indbuf[i_version]); |
571 | fprintf(versionf,"%d:%s\n",serial,indbuf[i_version]); |
562 | 572 | ||
563 |
|
573 | /* add module's information in html page for robots */ |
564 | snprintf(buf,sizeof(buf),"%s",indbuf[i_description]); |
574 | snprintf(buf,sizeof(buf),"%s",indbuf[i_description]); |
565 | for(pp=strchr(buf,','); pp; pp=strchr(pp,',')) |
575 | for(pp=strchr(buf,','); pp; pp=strchr(pp,',')) |
566 | string_modify(buf,pp,pp+1,","); |
576 | string_modify(buf,pp,pp+1,","); |
567 | if(strcmp(module_language,lang[lind])==0) |
577 | if(strcmp(module_language,lang[lind])==0) |
568 | fprintf(robotf,"%s ,%s,%s,%s,%s\n",name,module_language,name, |
578 | fprintf(robotf,"%s ,%s,%s,%s,%s\n",name,module_language,name, |
569 |
|
579 | indbuf[i_title], buf); |
570 | 580 | ||
571 |
|
581 | /* Normalize the information of trlist, using dictionary |
572 | |
582 | * -- bases/sys/domain.xx without suffix translation (--> english version) |
573 | |
583 | */ |
574 | entrycount=dentrycount; dicbuf=ddicbuf; |
584 | entrycount=dentrycount; dicbuf=ddicbuf; |
575 | memmove(entry,dentry,dentrycount*sizeof(entry[0])); |
585 | memmove(entry,dentry,dentrycount*sizeof(entry[0])); |
576 | unknown_type=unk_leave; |
586 | unknown_type=unk_leave; |
577 | for(i=0;i<trcnt;i++) { |
587 | for(i=0;i<trcnt;i++) { |
578 |
|
588 | detag(indbuf[trlist[i]]); |
579 |
|
589 | deaccent(indbuf[trlist[i]]); |
580 |
|
590 | comma(indbuf[trlist[i]]); |
581 |
|
591 | singlespace(indbuf[trlist[i]]); |
582 |
|
592 | translate(indbuf[trlist[i]]); |
583 | } |
593 | } |
- | 594 | /* Normalize the information, using dictionary |
|
- | 595 | * bases/sys/words.xx with suffix translation |
|
584 | 596 | */ |
|
585 | entrycount=mentrycount; dicbuf=mdicbuf; |
597 | entrycount=mentrycount; dicbuf=mdicbuf; |
586 | memmove(entry,mentry,mentrycount*sizeof(entry[0])); |
598 | memmove(entry,mentry,mentrycount*sizeof(entry[0])); |
587 | unknown_type=unk_leave; |
599 | unknown_type=unk_leave;/* used in translator_.c */ |
588 | for(i=0;i<trcnt;i++) { |
600 | for(i=0;i<trcnt;i++) { |
589 |
|
601 | suffix_translate(indbuf[trlist[i]]); |
590 |
|
602 | translate(indbuf[trlist[i]]); |
591 | } |
603 | } |
592 | 604 | ||
593 | /* taken contains all words already seen in the module index */ |
605 | /* taken contains all words already seen in the module index */ |
594 | taken[0]=0; takenlen=tweight=0; |
606 | taken[0]=0; takenlen=tweight=0; |
595 | /* append words of title */ |
607 | /* append words of title */ |
596 | ovlstrcpy(buf,indbuf[i_title]); towords(buf); |
608 | ovlstrcpy(buf,indbuf[i_title]); towords(buf); |
597 | appenditem2(buf,lind,serial,4,module_language); |
609 | appenditem2(buf,lind,serial,4,module_language); |
598 | 610 | ||
599 | /* |
611 | /* extract words of every other information except level */ |
600 | snprintf(buf,sizeof(buf),"%s %s %s %s %s %s %s %s %s %s %s %s %s %s %s", |
612 | snprintf(buf,sizeof(buf),"%s %s %s %s %s %s %s %s %s %s %s %s %s %s %s", |
601 |
|
613 | indbuf[i_description],indbuf[i_keywords], |
602 |
|
614 | indbuf[i_keywords_ca],indbuf[i_keywords_en],indbuf[i_keywords_fr], |
603 |
|
615 | indbuf[i_keywords_it],indbuf[i_keywords_nl], |
604 |
|
616 | indbuf[i_title_ca],indbuf[i_title_en],indbuf[i_title_fr], |
605 |
|
617 | indbuf[i_title_it],indbuf[i_title_nl], |
606 |
|
618 | indbuf[i_domain],indbuf[i_require],indbuf[i_author]); |
607 | towords(buf); |
619 | towords(buf); |
608 | appenditem2(buf,lind,serial, |
620 | appenditem2(buf,lind,serial,2,module_language); |
609 | 621 | ||
610 |
|
622 | /* this time the dictionary is the group dictionary sys/wgrp/wgrp |
611 | |
623 | * with a g (groupdic), not an m (maindic) . see below main, suffix, group. |
612 | |
624 | * and delete unknown ?? and translate |
- | 625 | */ |
|
613 | entrycount=gentrycount; dicbuf=gdicbuf; |
626 | entrycount=gentrycount; dicbuf=gdicbuf; |
614 | memmove(entry,gentry,gentrycount*sizeof(entry[0])); |
627 | memmove(entry,gentry,gentrycount*sizeof(entry[0])); |
615 | 628 | ||
616 | /* append words |
629 | /* append words of every title information */ |
617 | ovlstrcpy(buf,indbuf[i_title]); |
630 | ovlstrcpy(buf,indbuf[i_title]); |
618 | unknown_type=unk_delete; |
631 | unknown_type=unk_delete; |
619 | translate(buf); |
632 | translate(buf); |
620 | appenditem1(buf,lind,serial,2,module_language); |
633 | appenditem1(buf,lind,serial,2,module_language); |
621 | 634 | ||
622 | /* append words |
635 | /* append words of information of description except level */ |
623 | snprintf(buf,sizeof(buf),"%s", indbuf[i_description]); |
636 | snprintf(buf,sizeof(buf),"%s", indbuf[i_description]); |
624 | unknown_type=unk_delete; |
637 | unknown_type=unk_delete; |
625 | translate(buf); |
638 | translate(buf); |
626 | appenditem1(buf,lind,serial,4,module_language); |
639 | appenditem1(buf,lind,serial,4,module_language); |
627 | 640 | ||
628 | /* append words (or group of words) of keywords and domain |
641 | /* append words (or group of words) of keywords and domain */ |
629 | snprintf(buf,sizeof(buf),"%s, %s, %s, %s, %s, %s, %s", |
642 | snprintf(buf,sizeof(buf),"%s, %s, %s, %s, %s, %s, %s", |
630 |
|
643 | indbuf[i_domain],indbuf[i_keywords], |
631 |
|
644 | indbuf[i_keywords_ca], indbuf[i_keywords_en],indbuf[i_keywords_fr], |
632 |
|
645 | indbuf[i_keywords_it], indbuf[i_keywords_nl]); |
633 |
|
646 | unknown_type=unk_leave; |
634 | translate(buf); |
647 | translate(buf); |
635 | appenditem1(buf,lind,serial,2,module_language); |
648 | appenditem1(buf,lind,serial,2,module_language); |
636 | 649 | ||
637 |
|
650 | /* append level information, with weight 2 */ |
638 | snprintf(buf,sizeof(buf),"%s",indbuf[i_level]); |
651 | snprintf(buf,sizeof(buf),"%s",indbuf[i_level]); |
639 | ovlstrcpy(lbuf,"level"); |
652 | ovlstrcpy(lbuf,"level"); |
640 | for(p1=buf; *p1; p1++) if(!isalnum(*p1)) *p1=' '; |
653 | for(p1=buf; *p1; p1++) if(!isalnum(*p1)) *p1=' '; |
641 | q=buf+strlen(buf); |
654 | q=buf+strlen(buf); |
642 | for(p1=find_word_start(buf); (*p1) && (p1 < q) ; |
655 | for(p1=find_word_start(buf); (*p1) && (p1 < q) ; |
643 |
|
656 | p1=find_word_start(p2)) { |
644 |
|
657 | p2=find_word_end(p1); |
645 |
|
658 | if(p2!=NULL) *p2++=0; else p2=p1+strlen(p1); |
646 |
|
659 | if(!isalpha(*p1) || |
647 |
|
660 | (!isdigit(*(p1+1)) && *(p1+1)!=0) || |
648 |
|
661 | (*(p1+1)!=0 && *(p1+2)!=0)) |
649 |
|
662 | continue; |
650 |
|
663 | *p1=tolower(*p1); |
651 |
|
664 | ovlstrcpy(lbuf+strlen("level"),p1); |
652 |
|
665 | appenditem(lbuf,lind,serial,2,module_language); |
653 | } |
666 | } |
654 |
|
667 | /* append total weight of module to weight file site2/weight.xx */ |
655 | fprintf(weightf,"%d:%d\n",serial,tweight); |
668 | fprintf(weightf,"%d:%d\n",serial,tweight); |
656 | } |
669 | } |
657 | 670 | ||
658 | void modules(void) |
671 | void modules(void) |
659 | { |
672 | { |
660 | int i,j,k,d; |
673 | int i,j,k,d; |
661 | char namebuf[MAX_LINELEN+1]; |
674 | char namebuf[MAX_LINELEN+1]; |
662 | char mdic[MAX_LINELEN+1], sdic[MAX_LINELEN+1], gdic[MAX_LINELEN+1], ddic[MAX_LINELEN+1]; |
675 | char mdic[MAX_LINELEN+1], sdic[MAX_LINELEN+1], gdic[MAX_LINELEN+1], ddic[MAX_LINELEN+1]; |
663 | 676 | ||
664 | for(j=0;j<langcnt;j++) { |
677 | for(j=0;j<langcnt;j++) { |
665 |
|
678 | snprintf(namebuf,sizeof(namebuf),"%s/weight.%s",outdir,lang[j]); |
666 |
|
679 | weightf=fopen(namebuf,"w"); |
667 |
|
680 | snprintf(mdic,sizeof(mdic),"%s/%s.%s",dicdir,maindic,lang[j]); |
668 |
|
681 | snprintf(sdic,sizeof(sdic),"%s/%s.%s",dicdir,suffixdic,lang[j]); |
669 |
|
682 | snprintf(gdic,sizeof(gdic),"%s/%s.%s",dicdir,groupdic,lang[j]); |
670 |
|
683 | snprintf(ddic,sizeof(ddic),"%s/%s.%s",dicdir,domaindic,lang[j]); |
671 |
|
684 | suffix_dic(sdic); prepare_dic(gdic); |
672 |
|
685 | gdicbuf=dicbuf; gentrycount=entrycount; |
673 |
|
686 | memmove(gentry,entry,gentrycount*sizeof(entry[0])); |
674 |
|
687 | prepare_dic(mdic); |
675 |
|
688 | mdicbuf=dicbuf; mentrycount=entrycount; |
676 |
|
689 | memmove(mentry,entry,mentrycount*sizeof(entry[0])); |
677 |
|
690 | prepare_dic(ddic); |
678 |
|
691 | ddicbuf=dicbuf; dentrycount=entrycount; |
679 |
|
692 | memmove(dentry,entry,dentrycount*sizeof(entry[0])); |
680 |
|
693 | unknown_type=unk_leave; translate(ignore[j]); |
681 |
|
694 | for(i=0;i<modcnt;i++) { |
682 |
|
695 | if(mod[i].langcnt>0) { |
683 |
|
696 | for(d=k=0;k<mod[i].langcnt;k++) |
684 |
|
697 | if(mod[i].langs[k]<mod[i].langs[d]) d=k; |
685 |
|
698 | for(k=0;k<mod[i].langcnt && mod[i].langs[k]!=j;k++); |
686 |
|
699 | if(k>=mod[i].langcnt) k=d; |
687 |
|
700 | snprintf(namebuf,MAX_LINELEN,"%s.%s",mod[i].name, |
688 |
|
701 | lang[mod[i].langs[k]]); |
689 |
|
702 | onemodule(namebuf,mod[i].counts[k],j); |
690 |
|
703 | } |
691 |
|
704 | else { |
692 |
|
705 | onemodule(mod[i].name,mod[i].counts[0],j); |
693 |
|
706 | } |
694 |
|
707 | } |
695 |
|
708 | if(mentrycount>0) free(mdicbuf); |
696 |
|
709 | if(gentrycount>0) free(gdicbuf); |
697 |
|
710 | if(suffixcnt>0) free(sufbuf); |
698 |
|
711 | if(dentrycount>0) free(ddicbuf); |
699 |
|
712 | if(weightf) fclose(weightf); |
700 | } |
713 | } |
701 | } |
714 | } |
702 | 715 | ||
703 | /* FIXME ? differences with appenditem - use fprintf instead of snprintf */ |
716 | /* FIXME ? differences with appenditem - use fprintf instead of snprintf */ |
704 | void sappenditem(char *word, int lind, int serial, int weight) |
717 | void sappenditem(char *word, int lind, int serial, int weight) |
Line 721... | Line 734... | ||
721 | 734 | ||
722 | void onesheet(int serial, int lind) |
735 | void onesheet(int serial, int lind) |
723 | { |
736 | { |
724 | int i; |
737 | int i; |
725 | unsigned char trlist[]={ |
738 | unsigned char trlist[]={ |
726 |
|
739 | s_title,s_description,s_domain,s_keywords,s_remark |
727 | }; |
740 | }; |
728 | #define trcnt (sizeof(trlist)/sizeof(trlist[0])) |
741 | #define trcnt (sizeof(trlist)/sizeof(trlist[0])) |
729 | char *p1, *p2, buf[MAX_LINELEN+1]; |
742 | char *p1, *p2, buf[MAX_LINELEN+1]; |
730 | 743 | ||
731 | if(sheet_index(serial)) return; |
744 | if(sheet_index(serial)) return; |
Line 734... | Line 747... | ||
734 | fprintf(descf,"%d:%s\n",serial,sindbuf[s_description]); |
747 | fprintf(descf,"%d:%s\n",serial,sindbuf[s_description]); |
735 | entrycount=dentrycount; dicbuf=ddicbuf; |
748 | entrycount=dentrycount; dicbuf=ddicbuf; |
736 | memmove(entry,dentry,dentrycount*sizeof(entry[0])); |
749 | memmove(entry,dentry,dentrycount*sizeof(entry[0])); |
737 | unknown_type=unk_leave; |
750 | unknown_type=unk_leave; |
738 | for(i=0;i<trcnt;i++) { |
751 | for(i=0;i<trcnt;i++) { |
739 |
|
752 | detag(sindbuf[trlist[i]]); |
740 |
|
753 | deaccent(sindbuf[trlist[i]]); |
741 |
|
754 | comma(sindbuf[trlist[i]]); |
742 |
|
755 | singlespace(sindbuf[trlist[i]]); |
743 |
|
756 | translate(sindbuf[trlist[i]]); |
744 | } |
757 | } |
745 | 758 | ||
746 | entrycount=mentrycount; dicbuf=mdicbuf; |
759 | entrycount=mentrycount; dicbuf=mdicbuf; |
747 | memmove(entry,mentry,mentrycount*sizeof(entry[0])); |
760 | memmove(entry,mentry,mentrycount*sizeof(entry[0])); |
748 | unknown_type=unk_leave; |
761 | unknown_type=unk_leave; |
749 | for(i=0;i<trcnt;i++) { |
762 | for(i=0;i<trcnt;i++) { |
750 |
|
763 | suffix_translate(sindbuf[trlist[i]]); |
751 |
|
764 | translate(sindbuf[trlist[i]]); |
752 | } |
765 | } |
753 | taken[0]=0; takenlen=tweight=0; |
766 | taken[0]=0; takenlen=tweight=0; |
754 | ovlstrcpy(buf,sindbuf[s_title]); towords(buf); |
767 | ovlstrcpy(buf,sindbuf[s_title]); towords(buf); |
755 | for(p1=find_word_start(buf);*p1; |
768 | for(p1=find_word_start(buf);*p1; |
756 |
|
769 | p1=find_word_start(p2)) { |
757 |
|
770 | p2=find_word_end(p1); if(*p2) *p2++=0; |
758 |
|
771 | sappenditem(p1,lind,serial,4); |
759 | } |
772 | } |
760 | snprintf(buf,sizeof(buf),"%s %s %s %s", |
773 | snprintf(buf,sizeof(buf),"%s %s %s %s", |
761 |
|
774 | sindbuf[s_description],sindbuf[s_keywords], |
762 |
|
775 | sindbuf[s_domain],sindbuf[s_remark]); |
763 | towords(buf); |
776 | towords(buf); |
764 | for(p1=find_word_start(buf);*p1; |
777 | for(p1=find_word_start(buf);*p1; |
765 |
|
778 | p1=find_word_start(p2)) { |
766 |
|
779 | p2=find_word_end(p1); if(*p2) *p2++=0; |
767 |
|
780 | sappenditem(p1,lind,serial,2); |
768 | } |
781 | } |
769 | entrycount=gentrycount; dicbuf=gdicbuf; |
782 | entrycount=gentrycount; dicbuf=gdicbuf; |
770 | memmove(entry,gentry,gentrycount*sizeof(entry[0])); |
783 | memmove(entry,gentry,gentrycount*sizeof(entry[0])); |
771 | unknown_type=unk_delete; |
784 | unknown_type=unk_delete; |
772 | ovlstrcpy(buf,sindbuf[s_title]); translate(buf); |
785 | ovlstrcpy(buf,sindbuf[s_title]); translate(buf); |
773 | for(p1=find_word_start(buf); *p1; |
786 | for(p1=find_word_start(buf); *p1; |
774 |
|
787 | p1=find_word_start(p2)) { |
775 |
|
788 | p2=strchr(p1,','); |
776 |
|
789 | if(p2!=NULL) *p2++=0; else p2=p1+strlen(p1); |
777 |
|
790 | if(strlen(p1)<=0) continue; |
778 |
|
791 | sappenditem(p1,lind,serial,4); |
779 | } |
792 | } |
780 | snprintf(buf,sizeof(buf),"%s, %s, %s, %s", |
793 | snprintf(buf,sizeof(buf),"%s, %s, %s, %s", |
781 |
|
794 | sindbuf[s_description],sindbuf[s_keywords], |
782 |
|
795 | sindbuf[s_domain],sindbuf[s_remark]); |
783 | translate(buf); |
796 | translate(buf); |
784 | for(p1=find_word_start(buf); *p1; |
797 | for(p1=find_word_start(buf); *p1; |
785 |
|
798 | p1=find_word_start(p2)) { |
786 |
|
799 | p2=strchr(p1,','); |
787 |
|
800 | if(p2!=NULL) *p2++=0; else p2=p1+strlen(p1); |
788 |
|
801 | if(strlen(p1)<=0) continue; |
789 |
|
802 | sappenditem(p1,lind,serial,2); |
790 | } |
803 | } |
791 | fprintf(weightf,"%d:%d\n",serial,tweight); |
804 | fprintf(weightf,"%d:%d\n",serial,tweight); |
792 | } |
805 | } |
793 | - | ||
794 | 806 | ||
795 | void sheets(void) |
807 | void sheets(void) |
796 | { |
808 | { |
797 | int i,j; |
809 | int i,j; |
798 | char mdic[MAX_LINELEN+1], sdic[MAX_LINELEN+1], gdic[MAX_LINELEN+1]; |
810 | char mdic[MAX_LINELEN+1], sdic[MAX_LINELEN+1], gdic[MAX_LINELEN+1]; |
799 | char buf[MAX_LINELEN+1]; |
811 | char buf[MAX_LINELEN+1]; |
800 | 812 | ||
801 | for(j=0;j<langcnt;j++) { |
813 | for(j=0;j<langcnt;j++) { |
802 |
|
814 | snprintf(buf,sizeof(buf),"%s/index/title.%s",sheetdir,lang[j]); |
803 |
|
815 | titf=fopen(buf,"w"); |
804 |
|
816 | snprintf(buf,sizeof(buf),"%s/index/description.%s",sheetdir,lang[j]); |
805 |
|
817 | descf=fopen(buf,"w"); |
806 |
|
818 | snprintf(buf,sizeof(buf),"%s/index/%s",sheetdir,lang[j]); |
807 |
|
819 | indf=fopen(buf,"w"); |
808 |
|
820 | snprintf(buf,sizeof(buf),"%s/index/list.%s",sheetdir,lang[j]); |
809 |
|
821 | listf=fopen(buf,"w"); |
810 |
|
822 | snprintf(buf,sizeof(buf),"%s/index/weight.%s",sheetdir,lang[j]); |
811 |
|
823 | weightf=fopen(buf,"w"); |
812 |
|
824 | snprintf(buf,sizeof(buf),"%s/index/addr.%s",sheetdir,lang[j]); |
813 |
|
825 | addrf=fopen(buf,"w"); |
814 |
|
826 | snprintf(buf,sizeof(buf),"%s/index/serial.%s",sheetdir,lang[j]); |
815 |
|
827 | serialf=fopen(buf,"w"); |
816 |
|
828 | snprintf(mdic,sizeof(mdic),"%s/%s.%s",dicdir,maindic,lang[j]); |
817 |
|
829 | snprintf(sdic,sizeof(sdic),"%s/%s.%s",dicdir,suffixdic,lang[j]); |
818 |
|
830 | snprintf(gdic,sizeof(gdic),"%s/%s.%s",dicdir,groupdic,lang[j]); |
819 |
|
831 | suffix_dic(sdic); prepare_dic(gdic); |
820 |
|
832 | gdicbuf=dicbuf; gentrycount=entrycount; |
821 |
|
833 | memmove(gentry,entry,gentrycount*sizeof(entry[0])); |
822 |
|
834 | prepare_dic(mdic); |
823 |
|
835 | mdicbuf=dicbuf; mentrycount=entrycount; |
824 |
|
836 | memmove(mentry,entry,mentrycount*sizeof(entry[0])); |
825 |
|
837 | unknown_type=unk_leave; translate(ignore[j]); |
826 |
|
838 | for(i=0;i<modcnt;i++) { |
827 |
|
839 | if(mod[i].langs[0]!=j) continue; |
828 |
|
840 | fprintf(addrf,"%d:%s\n",i,mod[i].name+3); |
829 |
|
841 | fprintf(serialf,"%s:%d\n",mod[i].name+3,i); |
830 |
|
842 | onesheet(i,j); |
831 |
|
843 | } |
832 |
|
844 | if(mentrycount>0) free(mdicbuf); |
833 |
|
845 | if(gentrycount>0) free(gdicbuf); |
834 |
|
846 | if(suffixcnt>0) free(sufbuf); |
835 |
|
847 | fclose(titf); fclose(descf); fclose(indf); fclose(listf); |
836 |
|
848 | fclose(weightf); fclose(addrf); fclose(serialf); |
837 | } |
849 | } |
838 | } |
850 | } |
839 | 851 | ||
840 | int main() |
852 | int main() |
841 | { |
853 | { |