Rev 8086 | Rev 8122 | Go to most recent revision | Details | Compare with Previous | Last modification | View Log | RSS feed
Rev | Author | Line No. | Line |
---|---|---|---|
10 | reyssat | 1 | /* Copyright (C) 2002-2003 XIAO, Gang of Universite de Nice - Sophia Antipolis |
2 | * |
||
3 | * This program is free software; you can redistribute it and/or modify |
||
4 | * it under the terms of the GNU General Public License as published by |
||
5 | * the Free Software Foundation; either version 2 of the License, or |
||
6 | * (at your option) any later version. |
||
7 | * |
||
8 | * This program is distributed in the hope that it will be useful, |
||
9 | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
||
10 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
||
11 | * GNU General Public License for more details. |
||
12 | * |
||
13 | * You should have received a copy of the GNU General Public License |
||
14 | * along with this program; if not, write to the Free Software |
||
15 | * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. |
||
16 | */ |
||
17 | |||
8084 | bpr | 18 | /* This program makes comparison between two text strings, |
19 | * according to the symtext syntax. */ |
||
10 | reyssat | 20 | |
21 | /* Input data: via environment variables. |
||
22 | * wims_exec_parm: line 1 = command (comp,expand,wordlist,random,1,2,3,...) |
||
23 | * line 2 = text to examine (for comp). |
||
24 | * line 3 and up = symtext syntax. |
||
25 | * w_symtext: dictionary style. |
||
26 | * w_symtext_option: option words. |
||
8084 | bpr | 27 | * |
10 | reyssat | 28 | * Output: two lines. |
29 | * Line 1: ERROR or OK |
||
30 | * Line 2: result depending on command. |
||
31 | */ |
||
32 | |||
33 | |||
34 | const char *codechar="_0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz"; |
||
35 | |||
36 | #include "symtext.h" |
||
37 | |||
38 | struct block blockbuf[MAX_BLOCKS]; |
||
39 | int nextblock; |
||
40 | listtype listbuf[MAX_LISTS]; |
||
41 | int nextlist; |
||
42 | listtype tagbuf[MAX_BLOCKS]; |
||
43 | int nexttag; |
||
44 | |||
45 | struct poolstruct poolbuf[MAX_POOLS]; |
||
46 | int nextpool; |
||
47 | |||
48 | int options; |
||
8084 | bpr | 49 | #define op_nocase (1<<0) |
50 | #define op_deaccent (1<<1) |
||
51 | #define op_reaccent (1<<2) |
||
52 | #define op_nopunct (1<<3) |
||
53 | #define op_nomath (1<<4) |
||
54 | #define op_noparenth (1<<5) |
||
55 | #define op_nocs (1<<6) |
||
56 | #define op_noquote (1<<7) |
||
57 | #define op_matchall (1<<8) |
||
58 | #define op_alphaonly (1<<9) |
||
59 | #define op_alnumonly (1<<10) |
||
10 | reyssat | 60 | |
61 | char cmdbuf[256], stbuf[MAX_LINELEN+1], textbuf[MAX_LINELEN+1]; |
||
62 | char wbuf[MAX_LINELEN+1]; |
||
63 | char cmdparm[1024]; |
||
64 | char defbuf[MAX_LINELEN+1]; |
||
65 | char style[MAX_NAMELEN+1]; |
||
66 | char styledir[MAX_FNAME+1]; |
||
67 | char optionbuf[1024]; |
||
68 | char outbuf[4096]; |
||
69 | char *outptr, *wptr; |
||
70 | int debug; |
||
71 | |||
72 | enum { |
||
73 | cmd_none, cmd_comp, cmd_debug, cmd_random, cmd_1, cmd_wordlist |
||
74 | }; |
||
75 | struct { |
||
76 | char *name; int value; |
||
77 | } cmdlist[]={ |
||
8084 | bpr | 78 | {"1", cmd_1}, |
79 | {"comp", cmd_comp}, |
||
80 | {"compare", cmd_comp}, |
||
81 | {"debug", cmd_debug}, |
||
82 | {"match", cmd_comp}, |
||
83 | {"rand", cmd_random}, |
||
84 | {"random", cmd_random}, |
||
10 | reyssat | 85 | {"wordlist",cmd_wordlist}, |
8084 | bpr | 86 | {"words", cmd_wordlist} |
10 | reyssat | 87 | }; |
88 | #define cmdcnt (sizeof(cmdlist)/sizeof(cmdlist[0])) |
||
89 | int cmd; |
||
90 | |||
91 | void error(char *msg,...) |
||
92 | { |
||
93 | va_list vp; |
||
94 | char buf[1024]; |
||
95 | |||
96 | va_start(vp,msg); |
||
97 | vsnprintf(buf,sizeof(buf),msg,vp); |
||
98 | va_end(vp); |
||
99 | printf("ERROR\n%s\n",buf); |
||
100 | exit(1); |
||
101 | } |
||
102 | |||
103 | void _error(char *msg) |
||
104 | { |
||
105 | error(msg); |
||
106 | } |
||
107 | |||
8084 | bpr | 108 | /* read-in a file into buffer. Use open() and read(). |
109 | * Return buffer address which will be malloc'ed if buf=NULL. |
||
110 | */ |
||
10 | reyssat | 111 | char *readfile(char *fname, char buf[], long int buflen) |
112 | { |
||
113 | int fd, t; |
||
114 | struct stat st; |
||
115 | long int l, lc; |
||
116 | char *bf; |
||
117 | t=0; if(buf) buf[0]=0; |
||
118 | if(stat(fname,&st)) return NULL; |
||
119 | l=st.st_size; if(l<0) return NULL; |
||
120 | if(l>=buflen) { |
||
8084 | bpr | 121 | if(buflen<MAX_LINELEN) l=buflen-1; |
122 | else error("file_too_long %s",fname); |
||
10 | reyssat | 123 | } |
124 | fd=open(fname,O_RDONLY); if(fd==-1) return NULL; |
||
125 | if(buf==NULL) bf=xmalloc(l+8); else {bf=buf;if(l==0) {t=1; l=buflen-1;}} |
||
126 | lc=read(fd,bf,l); close(fd); |
||
8084 | bpr | 127 | if(lc<0 || lc>l || (lc!=l && t==0)) |
128 | {if(buf==NULL) free(bf); else buf[0]=0; return NULL;} |
||
10 | reyssat | 129 | bf[lc]=0; _tolinux(bf); return bf; |
130 | } |
||
131 | |||
8084 | bpr | 132 | /* get option word in a string */ |
10 | reyssat | 133 | void _getopt(char *name, char *p) |
134 | { |
||
135 | char *p1, *p2, *p3, *p4; |
||
136 | char buf[MAX_LINELEN+1]; |
||
8084 | bpr | 137 | |
10 | reyssat | 138 | snprintf(buf,sizeof(buf),"%s",p); |
139 | p1=find_word_start(name); |
||
140 | for(p2=buf;*p2;p2++) { |
||
8084 | bpr | 141 | if(myisspace(*p2)) *p2=' '; |
142 | if(*p2=='=') *p2=' '; |
||
10 | reyssat | 143 | } |
144 | *p=0; |
||
145 | p2=wordchr(buf,p1); if(p2==NULL) return; |
||
146 | for(p3=find_word_end(p2);myisspace(*p3);p3++) { |
||
8084 | bpr | 147 | if(*p3==' ') { |
148 | p3=find_word_start(p3); |
||
149 | switch(*p3) { |
||
150 | case '"': { |
||
151 | p4=strchr(p3+1,'"'); |
||
152 | goto tested; |
||
153 | } |
||
154 | case '(': { |
||
155 | p4=find_matching(p3+1,')'); |
||
156 | goto tested; |
||
157 | } |
||
158 | case '[': { |
||
159 | p4=find_matching(p3+1,']'); |
||
160 | goto tested; |
||
161 | } |
||
162 | case '{': { |
||
163 | p4=find_matching(p3+1,'}'); |
||
164 | tested: |
||
165 | if(p4) { |
||
166 | p3++; *p4=0; break; |
||
167 | } |
||
168 | else goto nomatch; |
||
169 | } |
||
170 | default: { |
||
171 | nomatch: |
||
172 | *find_word_end(p3)=0; |
||
173 | } |
||
174 | } |
||
175 | mystrncpy(p,p3,MAX_LINELEN); |
||
176 | return; |
||
177 | } |
||
10 | reyssat | 178 | } |
179 | *find_word_end(p2)=0; |
||
180 | memmove(p,p2,strlen(p2)+1); |
||
181 | } |
||
182 | |||
183 | void _getdef(char buf[], char *name, char value[]) |
||
184 | { |
||
185 | char *p1, *p2, *p3, *p4; |
||
186 | |||
8084 | bpr | 187 | if(*name==0) goto nothing; /* this would create segfault. */ |
10 | reyssat | 188 | for(p1=strstr(buf,name); p1!=NULL; p1=strstr(p1+1,name)) { |
8084 | bpr | 189 | p2=find_word_start(p1+strlen(name)); |
190 | if((p1>buf && !isspace(*(p1-1))) || *p2!='=') continue; |
||
191 | p3=p1; while(p3>buf && *(p3-1)!='\n') p3--; |
||
192 | p3=find_word_start(p3); |
||
193 | if(p3<p1 && *p3!='!') continue; |
||
194 | if(p3<p1) { |
||
195 | p3++; p4=find_word_end(p3); |
||
196 | if(find_word_start(p4)!=p1) continue; |
||
197 | if(p4-p3!=3 || (strncmp(p3,"set",3)!=0 && |
||
198 | strncmp(p3,"let",3)!=0 && |
||
199 | strncmp(p3,"def",3)!=0)) { |
||
200 | if(p4-p3!=6 || strncmp(p3,"define",6)!=0) continue; |
||
201 | } |
||
202 | } |
||
203 | p2++;p3=strchr(p2,'\n'); if(p3==NULL) p3=p2+strlen(p2); |
||
204 | p2=find_word_start(p2); |
||
205 | if(p2>p3) goto nothing; |
||
206 | if(p3-p2>=MAX_LINELEN) error("string_too_long def %s",name); |
||
207 | memmove(value,p2,p3-p2); value[p3-p2]=0; |
||
208 | strip_trailing_spaces(value); return; |
||
10 | reyssat | 209 | } |
210 | nothing: |
||
211 | value[0]=0; return; |
||
212 | } |
||
213 | |||
214 | char fnbuf[MAX_FNAME+1]; |
||
215 | |||
8084 | bpr | 216 | /* make a filename and check length */ |
10 | reyssat | 217 | char *mkfname(char buf[], char *s,...) |
218 | { |
||
219 | va_list vp; |
||
220 | char *p; |
||
221 | |||
222 | if(buf==NULL) p=fnbuf; else p=buf; |
||
223 | va_start(vp,s); |
||
224 | vsnprintf(p,MAX_FNAME,s,vp); |
||
225 | va_end(vp); |
||
226 | if(strlen(p)>=MAX_FNAME-1) error("name_too_long %.20s",p); |
||
227 | return p; |
||
228 | } |
||
229 | |||
8100 | bpr | 230 | #include "../../Lib/lines.c" |
10 | reyssat | 231 | #include "translate.c" |
232 | #include "match.c" |
||
233 | #include "compile.c" |
||
234 | |||
235 | void getparms(void) |
||
236 | { |
||
237 | char *p, *p2, *p3, lbuf[8]; |
||
238 | char buf[MAX_LINELEN+1], pbuf[MAX_LINELEN+1]; |
||
239 | struct stat st; |
||
240 | int i; |
||
8084 | bpr | 241 | |
10 | reyssat | 242 | cmd=0; |
243 | p=getenv("wims_exec_parm"); |
||
244 | if(p==NULL) return; |
||
245 | snprintf(pbuf,sizeof(pbuf),"%s",p); |
||
246 | rows2lines(pbuf); |
||
247 | p2=strchr(pbuf,'\n'); if(p2==NULL) return; else *p2++=0; |
||
8084 | bpr | 248 | p=find_word_start(pbuf); |
10 | reyssat | 249 | p3=find_word_end(p); if(p3-p>=sizeof(cmdbuf)) return; |
250 | if(*p==0) return; else *p3++=0; |
||
251 | memmove(cmdbuf,p,p3-p); cmdbuf[p3-p]=0; |
||
252 | p=p2; p2=strchr(p,'\n'); if(p2==NULL) p2=p+strlen(p); else *p2++=0; |
||
253 | if(p2<=find_word_start(p)) return; |
||
254 | if(p2-p<sizeof(textbuf)) { |
||
8084 | bpr | 255 | memmove(textbuf,p,p2-p); textbuf[p2-p]=0; |
10 | reyssat | 256 | } |
257 | p=p2; p2=p+strlen(p); |
||
258 | if(p2>p && p2-p<sizeof(stbuf)) { |
||
8084 | bpr | 259 | memmove(stbuf,p,p2-p); stbuf[p2-p]=0; |
10 | reyssat | 260 | } |
261 | i=search_list(cmdlist,cmdcnt,sizeof(cmdlist[0]),cmdbuf); |
||
262 | if(i>=0) cmd=cmdlist[i].value; |
||
263 | else error("bad_command %.20s",cmdbuf); |
||
264 | snprintf(cmdparm,sizeof(cmdparm),"%s",p2); |
||
8084 | bpr | 265 | |
10 | reyssat | 266 | options=0; |
267 | p=getenv("w_module_language"); if(p==NULL) p=""; |
||
268 | snprintf(lbuf,sizeof(lbuf),"%2s",p); |
||
269 | if(*p3) { |
||
8084 | bpr | 270 | snprintf(buf,sizeof(buf),"%s",p3); |
271 | _getopt("style",buf); |
||
272 | snprintf(style,sizeof(style),"%s",find_word_start(buf)); |
||
273 | *find_word_end(style)=0; |
||
274 | snprintf(buf,sizeof(buf),"%s",p3); |
||
275 | _getopt("language",buf); |
||
276 | if(buf[0]) snprintf(lbuf,sizeof(lbuf),"%2s",buf); |
||
10 | reyssat | 277 | } |
278 | lbuf[2]=0; |
||
3718 | reyssat | 279 | if(!myisalpha(lbuf[0]) || !myisalpha(lbuf[1])) ovlstrcpy(lbuf,"en"); |
10 | reyssat | 280 | styledir[0]=defbuf[0]=optionbuf[0]=buf[0]=0; |
281 | if(*style) { |
||
8084 | bpr | 282 | p=getenv("module_dir"); |
283 | if(p==NULL) { /* non-wims operation */ |
||
284 | snprintf(styledir,sizeof(styledir),"%s",style); |
||
285 | } |
||
286 | else { |
||
287 | for(i=0;i<MAX_NAMELEN && myisalnum(style[i]);i++); |
||
288 | style[i]=0; |
||
289 | if(style[0]) { /* style defined */ |
||
290 | if(*p!='/' && strstr(p,"..")==NULL) { /* check module dir */ |
||
291 | snprintf(styledir,sizeof(styledir),"%s/symtext/%s/%s/def",p,lbuf,style); |
||
292 | if(stat(styledir,&st)) styledir[0]=0; |
||
293 | } |
||
294 | if(styledir[0]==0) { /* check default */ |
||
295 | snprintf(styledir,sizeof(styledir),"%s/symtext/%s/%s/def",defaultdir,lbuf,style); |
||
296 | if(stat(styledir,&st)) error("style_not_found %s",style); |
||
297 | } |
||
298 | } |
||
299 | } |
||
300 | if(styledir[0]) { /* get def */ |
||
301 | readfile(styledir,defbuf,sizeof(defbuf)); |
||
302 | styledir[strlen(styledir)-4]=0; |
||
303 | suffix_dic(mkfname(NULL,"%s/suffix",styledir)); |
||
304 | transdic=diccnt; |
||
305 | if(prepare_dic("trans")==NULL) transdic=-1; |
||
306 | dic[transdic].unknown_type=unk_leave; |
||
307 | macrodic=diccnt; |
||
308 | if(prepare_dic("macros")==NULL) macrodic=-1; |
||
309 | dic[macrodic].unknown_type=unk_delete; |
||
310 | } |
||
10 | reyssat | 311 | } |
312 | _getdef(defbuf,"option",buf); |
||
313 | snprintf(optionbuf,sizeof(optionbuf),"%s %s",p3,buf); |
||
314 | if(wordchr(optionbuf,"nocase")!=NULL) options|=op_nocase; |
||
315 | if(wordchr(optionbuf,"deaccent")!=NULL) options|=op_deaccent; |
||
316 | if(wordchr(optionbuf,"reaccent")!=NULL) options|=op_reaccent; |
||
317 | if(wordchr(optionbuf,"nopunct")!=NULL) options|=op_nopunct; |
||
318 | if(wordchr(optionbuf,"nomath")!=NULL) options|=op_nomath; |
||
319 | if(wordchr(optionbuf,"noparenthesis")!=NULL) options|=op_noparenth; |
||
320 | if(wordchr(optionbuf,"noparentheses")!=NULL) options|=op_noparenth; |
||
321 | if(wordchr(optionbuf,"nocs")!=NULL) options|=op_nocs; |
||
322 | if(wordchr(optionbuf,"noquote")!=NULL) options|=op_noquote; |
||
323 | if(wordchr(optionbuf,"matchall")!=NULL) options|=op_matchall; |
||
324 | if(wordchr(optionbuf,"abconly")!=NULL) options|=op_alphaonly; |
||
325 | if(wordchr(optionbuf,"onlyabc")!=NULL) options|=op_alphaonly; |
||
326 | if(wordchr(optionbuf,"alnumonly")!=NULL) options|=op_alnumonly; |
||
327 | if(wordchr(optionbuf,"onlyalnum")!=NULL) options|=op_alnumonly; |
||
8084 | bpr | 328 | |
10 | reyssat | 329 | if(cmd==cmd_comp || cmd==cmd_debug) { |
8084 | bpr | 330 | _getopt("debug",optionbuf); |
331 | if(optionbuf[0]) { |
||
332 | i=atoi(optionbuf); |
||
333 | if(i>0 || strcmp(optionbuf,"0")==0) debug=i; else debug=1; |
||
334 | if(debug>0) cmd=cmd_debug; |
||
335 | } |
||
10 | reyssat | 336 | } |
337 | strip_enclosing_par(textbuf); |
||
338 | strfold(textbuf); |
||
339 | } |
||
340 | |||
341 | int verify_tables(void) |
||
342 | { |
||
343 | if(verify_order(builtin,builtincnt,sizeof(builtin[0]))) return -1; |
||
344 | if(verify_order(cmdlist,cmdcnt,sizeof(cmdlist[0]))) return -1; |
||
8084 | bpr | 345 | |
10 | reyssat | 346 | return 0; |
347 | } |
||
8086 | bpr | 348 | void (*string_modify)(char *start, char *bad_beg, char *bad_end, char *good,...)=string_modify1; |
10 | reyssat | 349 | |
350 | int main(int argc, char *argv[]) |
||
351 | { |
||
352 | int i, n, mat; |
||
353 | char *p1, *p2; |
||
354 | char lbuf[MAX_LINELEN+1]; |
||
355 | |||
356 | if(argc>1 && strcmp(argv[1],"-t")==0) { |
||
8084 | bpr | 357 | if(verify_tables()==0) { |
358 | printf("Table orders OK.\n"); |
||
359 | return 0; |
||
360 | } |
||
361 | else return 1; |
||
10 | reyssat | 362 | } |
363 | error1=error2=_error; debug=0; |
||
364 | wptr=wbuf; wbuf[0]=0; |
||
365 | getparms(); |
||
366 | Mnext=Mbuf; Mcnt=0; |
||
367 | switch(cmd) { |
||
8084 | bpr | 368 | case cmd_comp: { |
369 | comp: |
||
370 | n=linenum(stbuf); |
||
371 | for(mat=0,i=1,p1=stbuf;i<=n;i++,p1=p2) { |
||
372 | p2=find_line_end(p1); if(*p2) *p2++=0; |
||
373 | p1=find_word_start(p1); |
||
374 | if(*p1==0) continue; |
||
375 | snprintf(lbuf,sizeof(lbuf),"%s",p1); |
||
376 | compile(lbuf); |
||
377 | mat=match(textbuf); |
||
378 | if(mat) { |
||
379 | printf("MATCH %d %s\n",i,outbuf); |
||
380 | if((options&op_matchall)==0) break; |
||
381 | } |
||
382 | } |
||
383 | if(debug) fprintf(stderr,"word list: %s\n",wbuf); |
||
384 | break; |
||
385 | } |
||
386 | case cmd_debug: { |
||
387 | if(debug==0) debug=1; |
||
388 | fprintf(stderr,"debug=%d.\n",debug); |
||
389 | for(i=0;i<diccnt;i++) |
||
390 | fprintf(stderr,"Dictionary %d: %s, %d entries.\n", |
||
391 | i+1,dic[i].name,dic[i].len); |
||
392 | goto comp; |
||
393 | } |
||
394 | case cmd_random: { |
||
395 | |||
396 | break; |
||
397 | } |
||
398 | case cmd_wordlist: { |
||
399 | |||
400 | break; |
||
401 | } |
||
402 | case cmd_1: { |
||
403 | |||
404 | break; |
||
405 | } |
||
406 | |||
407 | case cmd_none: |
||
408 | default: return 1; |
||
10 | reyssat | 409 | } |
410 | return 0; |
||
411 | } |
||
412 |