Rev 8160 | Rev 11119 | Go to most recent revision | Details | Compare with Previous | Last modification | View Log | RSS feed
Rev | Author | Line No. | Line |
---|---|---|---|
10 | reyssat | 1 | /* Copyright (C) 1998-2003 XIAO, Gang of Universite de Nice - Sophia Antipolis |
2 | * |
||
3 | * This program is free software; you can redistribute it and/or modify |
||
4 | * it under the terms of the GNU General Public License as published by |
||
5 | * the Free Software Foundation; either version 2 of the License, or |
||
6 | * (at your option) any later version. |
||
7 | * |
||
8 | * This program is distributed in the hope that it will be useful, |
||
9 | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
||
10 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
||
11 | * GNU General Public License for more details. |
||
12 | * |
||
13 | * You should have received a copy of the GNU General Public License |
||
14 | * along with this program; if not, write to the Free Software |
||
15 | * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. |
||
16 | */ |
||
8160 | bpr | 17 | #include "libwims.h" |
10 | reyssat | 18 | |
3247 | bpr | 19 | char t_buf[4][MAX_LINELEN+1]; |
10 | reyssat | 20 | char maskbuf[MAX_LINELEN+1]; |
21 | |||
7840 | bpr | 22 | /* internal routine. */ |
10 | reyssat | 23 | void _text_cut(char *p, char *w) |
24 | { |
||
25 | char *p1, *p2; |
||
8195 | bpr | 26 | p1=wordchr(p,w); if(p1==NULL) error("syntax_error"); |
10 | reyssat | 27 | *p1=0; p2=find_word_start(p1+strlen(w)); |
3717 | reyssat | 28 | ovlstrcpy(t_buf[0],p); ovlstrcpy(t_buf[1],p2); |
10 | reyssat | 29 | strip_trailing_spaces(t_buf[0]); |
30 | substitute(t_buf[0]); substitute(t_buf[1]); |
||
31 | } |
||
32 | |||
7840 | bpr | 33 | /* Extract characters in buf[0] which are identical to |
34 | * corresponding characters in buf[1]. */ |
||
10 | reyssat | 35 | void text_common(char *p) |
36 | { |
||
37 | int i,j,n1,n2; |
||
38 | _text_cut(p,"and"); |
||
39 | n1=strlen(t_buf[0]);n2=strlen(t_buf[1]); |
||
40 | if(n2<n1) n1=n2; |
||
41 | for(i=j=0;i<n1;i++) { |
||
7840 | bpr | 42 | if(t_buf[0][i]==t_buf[1][i] && maskbuf[i]!='0') p[j++]=t_buf[0][i]; |
10 | reyssat | 43 | } |
44 | p[j]=0; |
||
45 | } |
||
46 | |||
7840 | bpr | 47 | /* Returns a mask string composed of '0's and '1's, where |
48 | * '0' means corresponding positions of buf[0] and buf[1] are |
||
8068 | bpr | 49 | * equal. |
50 | */ |
||
10 | reyssat | 51 | void text_compare(char *p) |
52 | { |
||
53 | int min,max, i; |
||
54 | _text_cut(p,"and"); |
||
55 | min=strlen(t_buf[0]); max=strlen(t_buf[1]); |
||
56 | if(min>max) { |
||
7840 | bpr | 57 | i=min; min=max; max=i; |
10 | reyssat | 58 | } |
59 | for(i=0; i<min; i++) { |
||
7840 | bpr | 60 | if(t_buf[0][i]==t_buf[1][i]) p[i]='0'; else p[i]='1'; |
10 | reyssat | 61 | } |
62 | for(; i<max; i++) p[i]='1'; |
||
63 | p[max]=0; |
||
64 | } |
||
65 | |||
7840 | bpr | 66 | /* copy text according to mask. */ |
10 | reyssat | 67 | void text_copy(char *p) |
68 | { |
||
69 | int i, j, n; |
||
7840 | bpr | 70 | |
10 | reyssat | 71 | snprintf(t_buf[0],MAX_LINELEN,"%s",p); |
72 | strip_trailing_spaces(t_buf[0]); substitute(t_buf[0]); |
||
73 | n=strlen(t_buf[0]); |
||
74 | for(i=j=0;i<n;i++) { |
||
7840 | bpr | 75 | if(maskbuf[i]!='0') p[j++]=t_buf[0][i]; |
10 | reyssat | 76 | } |
77 | p[j]=0; |
||
78 | } |
||
79 | |||
7840 | bpr | 80 | /* returns count of characters in buf[1] which appear in buf[0]. */ |
10 | reyssat | 81 | void text_count(char *p) |
82 | { |
||
83 | int i, n, c; |
||
84 | _text_cut(p,"in"); |
||
85 | n=strlen(t_buf[1]); |
||
86 | for(i=c=0;i<n;i++) { |
||
7840 | bpr | 87 | if(strchr(t_buf[0],t_buf[1][i])!=NULL && maskbuf[i]!='0') c++; |
10 | reyssat | 88 | } |
89 | snprintf(p,MAX_LINELEN,"%d",c); |
||
90 | } |
||
91 | |||
7840 | bpr | 92 | /* Extract characters in buf[0] which are different than |
93 | * corresponding characters in buf[1]. |
||
94 | */ |
||
10 | reyssat | 95 | void text_diff(char *p) |
96 | { |
||
97 | int i,j,n1,n2; |
||
98 | _text_cut(p,"from"); |
||
99 | n1=strlen(t_buf[0]);n2=strlen(t_buf[1]); |
||
100 | if(n2<n1) n1=n2; |
||
101 | for(i=j=0;i<n1;i++) { |
||
7840 | bpr | 102 | if(t_buf[0][i]!=t_buf[1][i] && maskbuf[i]!='0') p[j++]=t_buf[0][i]; |
10 | reyssat | 103 | } |
104 | p[j]=0; |
||
105 | } |
||
106 | |||
7840 | bpr | 107 | /* put chars in buf[0] in a new string, into positions |
108 | * corresponding to '1's in the mask buf[1]. |
||
109 | * Positions corresponding to '0's are filled by space. |
||
110 | * Fill stops at the end of buf[0]. If buf[1] is |
||
111 | * too short, it is reused from the start. |
||
8068 | bpr | 112 | * FIXME: using 0 as a mask gives a bug |
7840 | bpr | 113 | */ |
8068 | bpr | 114 | /* |
115 | @@ !text expand abcdef mask 01 |
||
116 | @@ a b c d e f |
||
117 | @@ !text expand abcdef mask 011 |
||
118 | @@ ab cd e |
||
119 | @@ !text expand abcdefg using 101110 |
||
120 | @@ a bcd e fg |
||
121 | */ |
||
10 | reyssat | 122 | void text_expand(char *p) |
123 | { |
||
124 | int i,j,k,n1,n2; |
||
125 | _text_cut(p,"using"); |
||
126 | n1=strlen(t_buf[0]);n2=strlen(t_buf[1]); |
||
127 | if(n2==0) {p[0]=0; return;} |
||
128 | for(i=j=k=0;i<n1 && j<MAX_LINELEN;j++,k=j%n2) { |
||
7840 | bpr | 129 | if(t_buf[1][k]=='0') p[j]=' '; |
130 | else p[j]=t_buf[0][i++]; |
||
10 | reyssat | 131 | } |
132 | p[j]=0; |
||
133 | } |
||
134 | |||
7840 | bpr | 135 | /* character by character replacement of buf[1] by buf[0], |
136 | * replacing only mask-effective chars. |
||
137 | * The resulting string is as long as buf[1], and the replacement |
||
138 | * stops when chars buf[0] has run out. |
||
139 | */ |
||
8068 | bpr | 140 | /* |
141 | @@ !text insert abcefg into hijkl mask 10100 |
||
142 | @@ aibkl |
||
143 | */ |
||
10 | reyssat | 144 | void text_insert(char *p) |
145 | { |
||
146 | int i,j,n1,n2; |
||
147 | _text_cut(p,"into"); |
||
148 | n1=strlen(t_buf[0]);n2=strlen(t_buf[1]); |
||
149 | for(i=j=0; i<n2 && j<n1; i++) { |
||
7840 | bpr | 150 | if(maskbuf[i]!='0') t_buf[1][i]=t_buf[0][j++]; |
10 | reyssat | 151 | } |
152 | snprintf(p,MAX_LINELEN,"%s",t_buf[1]); |
||
153 | } |
||
154 | |||
155 | #define MAX_TLEN 96 |
||
156 | |||
7840 | bpr | 157 | /* interact of two strings according to rules |
158 | * defined a table. |
||
159 | */ |
||
10 | reyssat | 160 | void text_interact(char *p) |
161 | { |
||
162 | char *table, *dline, *tline[MAX_TLEN]; |
||
163 | char *p1, *p2; |
||
164 | int i,j1,j2,k,l,l2,n; |
||
7840 | bpr | 165 | |
10 | reyssat | 166 | table=wordchr(p,"table"); |
8195 | bpr | 167 | if(table==NULL) error("syntax_error"); |
10 | reyssat | 168 | *table=0; strip_trailing_spaces(p); |
169 | table=find_word_start(table+strlen("table")); |
||
170 | snprintf(t_buf[2],MAX_LINELEN,"%s",table); |
||
171 | _text_cut(p,"and"); |
||
172 | strip_trailing_spaces(t_buf[2]); substitute(t_buf[2]); |
||
173 | n=linenum(t_buf[2])-1; |
||
8195 | bpr | 174 | if(n>=MAX_TLEN) error("text_bad_table"); |
10 | reyssat | 175 | p2=strchr(t_buf[2],'\n'); if(p2!=NULL) *p2++=0; |
8195 | bpr | 176 | if(strlen(t_buf[2])!=n) error("text_bad_table"); |
10 | reyssat | 177 | dline=t_buf[2]; |
178 | for(i=0,p1=p2;i<n;i++,p1=p2) { |
||
8195 | bpr | 179 | if(p1==NULL) error("text_bad_table"); |
7840 | bpr | 180 | p2=strchr(p1,'\n'); |
181 | if(p2!=NULL) *p2++=0; |
||
8195 | bpr | 182 | if(strlen(p1)!=n) error("text_bad_table"); |
7840 | bpr | 183 | tline[i]=p1; |
10 | reyssat | 184 | } |
185 | l=strlen(t_buf[0]); l2=strlen(t_buf[1]); if(l2<l) l=l2; |
||
186 | for(i=k=0;i<l;i++) { |
||
7840 | bpr | 187 | if(maskbuf[i]!='0') { |
188 | p1=strchr(dline,t_buf[0][i]); |
||
189 | p2=strchr(dline,t_buf[1][i]); |
||
190 | if(p1==NULL || p2==NULL) continue; |
||
191 | j1=p1-dline; j2=p2-dline; |
||
192 | if(j1>=n || j2>=n) continue; /* should not occur */ |
||
193 | p[k++]=tline[j1][j2]; |
||
194 | } |
||
10 | reyssat | 195 | } |
196 | p[k]=0; |
||
197 | } |
||
198 | |||
7840 | bpr | 199 | /* returns a mask string composed of '0's and '1's, where |
8068 | bpr | 200 | * '0' means corresponding char in buf[1] appears in buf[0]. |
201 | */ |
||
202 | /* |
||
203 | @@ !text mark a in abcaefa |
||
204 | @@ 001001 |
||
205 | */ |
||
10 | reyssat | 206 | void text_mark(char *p) |
207 | { |
||
208 | int i, n; |
||
209 | _text_cut(p,"in"); |
||
210 | n=strlen(t_buf[1]); |
||
211 | for(i=0;i<n;i++) { |
||
7840 | bpr | 212 | if(strchr(t_buf[0],t_buf[1][i])!=NULL) p[i]='1'; |
213 | else p[i]='0'; |
||
10 | reyssat | 214 | } |
215 | p[i]=0; |
||
216 | } |
||
217 | |||
7840 | bpr | 218 | /* Returns a string whose characters are the maximum |
219 | * of the two corresponding chars in buf[0] and buf[1]. |
||
220 | * Length of the string is the longuest one. |
||
221 | */ |
||
10 | reyssat | 222 | void text_max(char *p) |
223 | { |
||
224 | int min,max, i, j, k; |
||
225 | _text_cut(p,"and"); |
||
226 | min=strlen(t_buf[0]); max=strlen(t_buf[1]); |
||
227 | if(min>max) { |
||
7840 | bpr | 228 | i=min; min=max; max=i; j=0; |
10 | reyssat | 229 | } |
230 | else j=1; |
||
231 | for(i=k=0; i<min; i++) { |
||
7840 | bpr | 232 | if(maskbuf[i]=='0') continue; |
233 | if((unsigned char)t_buf[0][i]>(unsigned char)t_buf[1][i]) |
||
3247 | bpr | 234 | p[k++]=t_buf[0][i]; |
7840 | bpr | 235 | else p[k++]=t_buf[1][i]; |
10 | reyssat | 236 | } |
237 | for(;i<max;i++) { |
||
7840 | bpr | 238 | if(maskbuf[i]!='0') p[k++]=t_buf[j][i]; |
10 | reyssat | 239 | } |
240 | p[k]=0; |
||
241 | } |
||
242 | |||
7840 | bpr | 243 | /* Returns a string whose characters are the minimum |
244 | * of the two corresponding chars in buf[0] and buf[1]. |
||
245 | * Length of the string is the shortest one. |
||
246 | */ |
||
10 | reyssat | 247 | void text_min(char *p) |
248 | { |
||
249 | int min,max, i,k; |
||
250 | _text_cut(p,"and"); |
||
251 | min=strlen(t_buf[0]); max=strlen(t_buf[1]); |
||
252 | if(min>max) { |
||
7840 | bpr | 253 | i=min; min=max; max=i; |
10 | reyssat | 254 | } |
255 | for(i=k=0; i<min; i++) { |
||
7840 | bpr | 256 | if(maskbuf[i]=='0') continue; |
257 | if((unsigned char)t_buf[0][i]< (unsigned char)t_buf[1][i]) |
||
3247 | bpr | 258 | p[k++]=t_buf[0][i]; |
7840 | bpr | 259 | else p[k++]=t_buf[1][i]; |
10 | reyssat | 260 | } |
261 | p[k]=0; |
||
262 | } |
||
263 | |||
7840 | bpr | 264 | /* extract chars in buf[0] which occur in buf[1]. */ |
10 | reyssat | 265 | void text_occur(char *p) |
266 | { |
||
267 | int i,j,n; |
||
268 | char buf[MAX_LINELEN+1]; |
||
269 | memset(buf,0,sizeof(buf)); |
||
270 | _text_cut(p,"in"); |
||
271 | n=strlen(t_buf[1]); |
||
272 | for(i=0;i<n;i++) { |
||
7840 | bpr | 273 | char *pp; |
274 | if(maskbuf[i]=='0') continue; |
||
275 | pp=strchr(t_buf[0],t_buf[1][i]); |
||
276 | if(pp!=NULL) buf[pp - t_buf[0]]=1; |
||
10 | reyssat | 277 | } |
278 | n=strlen(t_buf[0]); |
||
279 | for(i=j=0;i<n;i++) { |
||
7840 | bpr | 280 | if(buf[i]) p[j++]=t_buf[0][i]; |
10 | reyssat | 281 | } |
282 | p[j]=0; |
||
283 | } |
||
284 | |||
7840 | bpr | 285 | /* remove characters of buf[1] in buf[0]. */ |
10 | reyssat | 286 | void text_remove(char *p) |
287 | { |
||
288 | int i, j, n; |
||
289 | _text_cut(p,"in"); |
||
290 | n=strlen(t_buf[1]); |
||
291 | for(i=j=0;i<n;i++) { |
||
7840 | bpr | 292 | if(strchr(t_buf[0],t_buf[1][i])==NULL |
293 | && maskbuf[i]!='0') p[j++]=t_buf[1][i]; |
||
10 | reyssat | 294 | } |
295 | p[j]=0; |
||
296 | } |
||
297 | |||
7840 | bpr | 298 | /* Cyclic reordering of text. */ |
10 | reyssat | 299 | void text_reorder(char *p) |
300 | { |
||
301 | int i,j,k,l,n,t; |
||
302 | int list[10240]; |
||
303 | char buf[MAX_LINELEN+1]; |
||
304 | _text_cut(p,"by"); *p=0; |
||
305 | n=itemnum(t_buf[1]); if(n<=0 || n>=10240) return; |
||
306 | for(i=0;i<n;i++) { |
||
307 | buf[0]=0; fnd_item(t_buf[1],i+1,buf); |
||
7840 | bpr | 308 | j=atoi(buf); if(j<=0 || j>n) return; |
309 | list[i]=j; |
||
10 | reyssat | 310 | } |
311 | t=strlen(t_buf[0]); |
||
312 | for(i=l=0;l<t && i<t+n;i++) { |
||
7840 | bpr | 313 | j=i/n; k=j*n+list[i%n]; |
314 | if(k>t || k<=0) continue; |
||
315 | p[l++]=t_buf[0][k-1]; |
||
10 | reyssat | 316 | } |
317 | p[l]=0; |
||
318 | } |
||
319 | |||
7840 | bpr | 320 | /* repeat a string to a given length. */ |
10 | reyssat | 321 | void text_repeat(char *p) |
322 | { |
||
323 | int n,i,k; |
||
324 | _text_cut(p,"to"); |
||
325 | n=strevalue(t_buf[1]); if(n>MAX_LINELEN) n=MAX_LINELEN; |
||
326 | if(n<0) n=0; |
||
327 | k=strlen(t_buf[0]); if(k<=0) {*p=0; return;} |
||
328 | for(i=0;i<n;i++) { |
||
7840 | bpr | 329 | p[i]=t_buf[0][i%k]; |
10 | reyssat | 330 | } |
331 | p[i]=0; |
||
332 | } |
||
333 | |||
7840 | bpr | 334 | /* reverse a string */ |
10 | reyssat | 335 | void text_reverse(char *p) |
336 | { |
||
337 | int i,n; |
||
338 | char buf[MAX_LINELEN+1]; |
||
339 | snprintf(t_buf[0],sizeof(t_buf[0]),"%s",p); |
||
340 | substitute(t_buf[0]); |
||
341 | n=strlen(t_buf[0]); if(n>MAX_LINELEN) n=MAX_LINELEN; |
||
342 | for(i=0;i<n;i++) buf[i]=t_buf[0][n-1-i]; |
||
343 | buf[n]=0; |
||
3717 | reyssat | 344 | ovlstrcpy(p,buf); |
10 | reyssat | 345 | } |
346 | |||
7840 | bpr | 347 | /* remove characters of buf[1] not in buf[0]. */ |
10 | reyssat | 348 | void text_select(char *p) |
349 | { |
||
350 | int i, j, n; |
||
351 | _text_cut(p,"in"); |
||
352 | n=strlen(t_buf[1]); |
||
353 | for(i=j=0;i<n;i++) { |
||
7840 | bpr | 354 | if(strchr(t_buf[0],t_buf[1][i])!=NULL |
355 | && maskbuf[i]!='0') p[j++]=t_buf[1][i]; |
||
10 | reyssat | 356 | } |
357 | p[j]=0; |
||
358 | } |
||
359 | |||
7840 | bpr | 360 | /* tag: bit 0 is mask. */ |
10 | reyssat | 361 | struct { |
362 | char *name; |
||
363 | int tag; |
||
364 | void (*routine) (char *p); |
||
365 | } text_proc[]={ |
||
7840 | bpr | 366 | {"appear", 1, text_occur}, |
367 | {"common", 1, text_common}, |
||
368 | {"compare", 0, text_compare}, |
||
369 | {"copy", 1, text_copy}, |
||
370 | {"count", 1, text_count}, |
||
371 | {"delete", 1, text_remove}, |
||
372 | {"diff", 1, text_diff}, |
||
373 | {"differ", 1, text_diff}, |
||
374 | {"drop", 1, text_remove}, |
||
375 | {"expand", 0, text_expand}, |
||
376 | {"extract", 1, text_select}, |
||
377 | {"insert", 1, text_insert}, |
||
378 | {"interact", 1, text_interact}, |
||
379 | {"mark", 0, text_mark}, |
||
380 | {"max", 1, text_max}, |
||
381 | {"min", 1, text_min}, |
||
382 | {"occur", 1, text_occur}, |
||
383 | {"occurrence",1, text_occur}, |
||
384 | {"pick", 1, text_select}, |
||
385 | {"pickup", 1, text_select}, |
||
386 | {"remove", 1, text_remove}, |
||
387 | {"reorder", 0, text_reorder}, |
||
388 | {"repeat", 0, text_repeat}, |
||
389 | {"reverse", 0, text_reverse}, |
||
390 | {"select", 1, text_select} |
||
10 | reyssat | 391 | }; |
392 | #define TEXT_PROC_NO (sizeof(text_proc)/sizeof(text_proc[0])) |
||
393 | |||
394 | int textab_verify(void) { |
||
395 | return verify_order(text_proc,TEXT_PROC_NO,sizeof(text_proc[0])); |
||
396 | } |
||
397 | |||
7840 | bpr | 398 | /* main entry point for text routines */ |
10 | reyssat | 399 | void text(char *p) |
400 | { |
||
401 | int i,j,n; |
||
402 | char *p1, *p2; |
||
403 | char c,cc; |
||
404 | char buf[MAX_LINELEN+1]; |
||
405 | p1=find_word_start(p); p2=find_word_end(p1); |
||
8195 | bpr | 406 | if(p2<=p1 || *p2==0) error("syntax_error"); |
10 | reyssat | 407 | *p2=0; |
408 | i=search_list(text_proc,TEXT_PROC_NO,sizeof(text_proc[0]),p1); |
||
8195 | bpr | 409 | if(i<0) error("syntax_error"); |
10 | reyssat | 410 | snprintf(buf,sizeof(buf),"%s",find_word_start(p2+1)); |
411 | if((text_proc[i].tag&1)!=0 && (p1=wordchr(buf,"mask"))!=NULL) { |
||
7840 | bpr | 412 | *p1=0; strip_trailing_spaces(buf); |
413 | p2=find_word_start(p1+strlen("mask")); |
||
414 | strip_trailing_spaces(p2); |
||
415 | snprintf(maskbuf,sizeof(maskbuf),"%s",p2); |
||
416 | substitute(maskbuf); |
||
417 | n=strlen(maskbuf); if(n==0) goto zeromask; |
||
418 | c=maskbuf[n-1]; cc=0; |
||
419 | if(c=='+') cc='1'; if(c=='-') cc='0'; |
||
420 | if(cc!=0) memset(maskbuf+n-1,cc,sizeof(maskbuf)-n); |
||
421 | else for(j=n;j<MAX_LINELEN;j++) maskbuf[j]=maskbuf[j%n]; |
||
422 | maskbuf[sizeof(maskbuf)-1]=0; |
||
10 | reyssat | 423 | } |
424 | else zeromask: memset(maskbuf,0,sizeof(maskbuf)); |
||
425 | text_proc[i].routine(buf); |
||
3717 | reyssat | 426 | buf[MAX_LINELEN]=0;ovlstrcpy(p,buf); |
10 | reyssat | 427 | } |