Subversion Repositories wimsdev

Rev

Rev 7840 | Rev 8195 | Go to most recent revision | Blame | Compare with Previous | Last modification | View Log | RSS feed

  1. /*    Copyright (C) 1998-2003 XIAO, Gang of Universite de Nice - Sophia Antipolis
  2.  *
  3.  *  This program is free software; you can redistribute it and/or modify
  4.  *  it under the terms of the GNU General Public License as published by
  5.  *  the Free Software Foundation; either version 2 of the License, or
  6.  *  (at your option) any later version.
  7.  *
  8.  *  This program is distributed in the hope that it will be useful,
  9.  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
  10.  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  11.  *  GNU General Public License for more details.
  12.  *
  13.  *  You should have received a copy of the GNU General Public License
  14.  *  along with this program; if not, write to the Free Software
  15.  *  Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  16.  */
  17.  
  18. char t_buf[4][MAX_LINELEN+1];
  19. char maskbuf[MAX_LINELEN+1];
  20.  
  21. /* internal routine. */
  22. void _text_cut(char *p, char *w)
  23. {
  24.     char *p1, *p2;
  25.     p1=wordchr(p,w); if(p1==NULL) error2("syntax_error");
  26.     *p1=0; p2=find_word_start(p1+strlen(w));
  27.     ovlstrcpy(t_buf[0],p); ovlstrcpy(t_buf[1],p2);
  28.     strip_trailing_spaces(t_buf[0]);
  29.     substitute(t_buf[0]); substitute(t_buf[1]);
  30. }
  31.  
  32. /* Extract characters in buf[0] which are identical to
  33.  * corresponding characters in buf[1]. */
  34. void text_common(char *p)
  35. {
  36.     int i,j,n1,n2;
  37.     _text_cut(p,"and");
  38.     n1=strlen(t_buf[0]);n2=strlen(t_buf[1]);
  39.     if(n2<n1) n1=n2;
  40.     for(i=j=0;i<n1;i++) {
  41.       if(t_buf[0][i]==t_buf[1][i] && maskbuf[i]!='0') p[j++]=t_buf[0][i];
  42.     }
  43.     p[j]=0;
  44. }
  45.  
  46. /* Returns a mask string composed of '0's and '1's, where
  47.  * '0' means corresponding positions of buf[0] and buf[1] are
  48.  * equal.
  49.  */
  50. void text_compare(char *p)
  51. {
  52.     int min,max, i;
  53.     _text_cut(p,"and");
  54.     min=strlen(t_buf[0]); max=strlen(t_buf[1]);
  55.     if(min>max) {
  56.       i=min; min=max; max=i;
  57.     }
  58.     for(i=0; i<min; i++) {
  59.       if(t_buf[0][i]==t_buf[1][i]) p[i]='0'; else p[i]='1';
  60.     }
  61.     for(; i<max; i++) p[i]='1';
  62.     p[max]=0;
  63. }
  64.  
  65. /* copy text according to mask. */
  66. void text_copy(char *p)
  67. {
  68.     int i, j, n;
  69.  
  70.     snprintf(t_buf[0],MAX_LINELEN,"%s",p);
  71.     strip_trailing_spaces(t_buf[0]); substitute(t_buf[0]);
  72.     n=strlen(t_buf[0]);
  73.     for(i=j=0;i<n;i++) {
  74.       if(maskbuf[i]!='0') p[j++]=t_buf[0][i];
  75.     }
  76.     p[j]=0;
  77. }
  78.  
  79. /* returns count of characters in buf[1] which appear in buf[0]. */
  80. void text_count(char *p)
  81. {
  82.     int i, n, c;
  83.     _text_cut(p,"in");
  84.     n=strlen(t_buf[1]);
  85.     for(i=c=0;i<n;i++) {
  86.       if(strchr(t_buf[0],t_buf[1][i])!=NULL && maskbuf[i]!='0') c++;
  87.     }
  88.     snprintf(p,MAX_LINELEN,"%d",c);
  89. }
  90.  
  91. /* Extract characters in buf[0] which are different than
  92.  * corresponding characters in buf[1].
  93.  */
  94. void text_diff(char *p)
  95. {
  96.     int i,j,n1,n2;
  97.     _text_cut(p,"from");
  98.     n1=strlen(t_buf[0]);n2=strlen(t_buf[1]);
  99.     if(n2<n1) n1=n2;
  100.     for(i=j=0;i<n1;i++) {
  101.       if(t_buf[0][i]!=t_buf[1][i] && maskbuf[i]!='0') p[j++]=t_buf[0][i];
  102.     }
  103.     p[j]=0;
  104. }
  105.  
  106. /* put chars in buf[0] in a new string, into positions
  107.  * corresponding to '1's in the mask buf[1].
  108.  * Positions corresponding to '0's are filled by space.
  109.  * Fill stops at the end of buf[0]. If buf[1] is
  110.  * too short, it is reused from the start.
  111.  * FIXME: using 0 as a mask gives a bug
  112.  */
  113. /*
  114.    @@ !text expand abcdef mask 01
  115.    @@ a b c d e f
  116.    @@ !text expand abcdef mask 011
  117.    @@ ab cd e
  118.    @@ !text expand abcdefg using 101110
  119.    @@ a bcd e fg
  120.  */
  121. void text_expand(char *p)
  122. {
  123.     int i,j,k,n1,n2;
  124.     _text_cut(p,"using");
  125.     n1=strlen(t_buf[0]);n2=strlen(t_buf[1]);
  126.     if(n2==0) {p[0]=0; return;}
  127.     for(i=j=k=0;i<n1 && j<MAX_LINELEN;j++,k=j%n2) {
  128.       if(t_buf[1][k]=='0') p[j]=' ';
  129.       else p[j]=t_buf[0][i++];
  130.     }
  131.     p[j]=0;
  132. }
  133.  
  134. /* character by character replacement of buf[1] by buf[0],
  135.  * replacing only mask-effective chars.
  136.  * The resulting string is as long as buf[1], and the replacement
  137.  * stops when chars buf[0] has run out.
  138.  */
  139. /*
  140.   @@ !text insert abcefg into hijkl mask 10100
  141.   @@ aibkl
  142.  */
  143. void text_insert(char *p)
  144. {
  145.     int i,j,n1,n2;
  146.     _text_cut(p,"into");
  147.     n1=strlen(t_buf[0]);n2=strlen(t_buf[1]);
  148.     for(i=j=0; i<n2 && j<n1; i++) {
  149.       if(maskbuf[i]!='0') t_buf[1][i]=t_buf[0][j++];
  150.     }
  151.     snprintf(p,MAX_LINELEN,"%s",t_buf[1]);
  152. }
  153.  
  154. #define MAX_TLEN 96
  155.  
  156. /* interact of two strings according to rules
  157.  * defined a table.
  158.  */
  159. void text_interact(char *p)
  160. {
  161.     char *table, *dline, *tline[MAX_TLEN];
  162.     char *p1, *p2;
  163.     int i,j1,j2,k,l,l2,n;
  164.  
  165.     table=wordchr(p,"table");
  166.     if(table==NULL) error2("syntax_error");
  167.     *table=0; strip_trailing_spaces(p);
  168.     table=find_word_start(table+strlen("table"));
  169.     snprintf(t_buf[2],MAX_LINELEN,"%s",table);
  170.     _text_cut(p,"and");
  171.     strip_trailing_spaces(t_buf[2]); substitute(t_buf[2]);
  172.     n=linenum(t_buf[2])-1;
  173.     if(n>=MAX_TLEN) error2("text_bad_table");
  174.     p2=strchr(t_buf[2],'\n'); if(p2!=NULL) *p2++=0;
  175.     if(strlen(t_buf[2])!=n) error2("text_bad_table");
  176.     dline=t_buf[2];
  177.     for(i=0,p1=p2;i<n;i++,p1=p2) {
  178.       if(p1==NULL) error2("text_bad_table");
  179.       p2=strchr(p1,'\n');
  180.       if(p2!=NULL) *p2++=0;
  181.       if(strlen(p1)!=n) error2("text_bad_table");
  182.       tline[i]=p1;
  183.     }
  184.     l=strlen(t_buf[0]); l2=strlen(t_buf[1]); if(l2<l) l=l2;
  185.     for(i=k=0;i<l;i++) {
  186.       if(maskbuf[i]!='0') {
  187.           p1=strchr(dline,t_buf[0][i]);
  188.           p2=strchr(dline,t_buf[1][i]);
  189.           if(p1==NULL || p2==NULL) continue;
  190.           j1=p1-dline; j2=p2-dline;
  191.           if(j1>=n || j2>=n) continue; /* should not occur */
  192.           p[k++]=tline[j1][j2];
  193.       }
  194.     }
  195.     p[k]=0;
  196. }
  197.  
  198. /* returns a mask string composed of '0's and '1's, where
  199.  * '0' means corresponding char in buf[1] appears in buf[0].
  200.  */
  201. /*
  202.    @@ !text mark a in abcaefa
  203.    @@ 001001
  204.  */
  205. void text_mark(char *p)
  206. {
  207.     int i, n;
  208.     _text_cut(p,"in");
  209.     n=strlen(t_buf[1]);
  210.     for(i=0;i<n;i++) {
  211.       if(strchr(t_buf[0],t_buf[1][i])!=NULL) p[i]='1';
  212.       else p[i]='0';
  213.     }
  214.     p[i]=0;
  215. }
  216.  
  217. /* Returns a string whose characters are the maximum
  218.  * of the two corresponding chars in buf[0] and buf[1].
  219.  * Length of the string is the longuest one.
  220.  */
  221. void text_max(char *p)
  222. {
  223.     int min,max, i, j, k;
  224.     _text_cut(p,"and");
  225.     min=strlen(t_buf[0]); max=strlen(t_buf[1]);
  226.     if(min>max) {
  227.       i=min; min=max; max=i; j=0;
  228.     }
  229.     else j=1;
  230.     for(i=k=0; i<min; i++) {
  231.       if(maskbuf[i]=='0') continue;
  232.       if((unsigned char)t_buf[0][i]>(unsigned char)t_buf[1][i])
  233.           p[k++]=t_buf[0][i];
  234.       else p[k++]=t_buf[1][i];
  235.     }
  236.     for(;i<max;i++) {
  237.       if(maskbuf[i]!='0') p[k++]=t_buf[j][i];
  238.     }
  239.     p[k]=0;
  240. }
  241.  
  242. /* Returns a string whose characters are the minimum
  243.  * of the two corresponding chars in buf[0] and buf[1].
  244.  * Length of the string is the shortest one.
  245.  */
  246. void text_min(char *p)
  247. {
  248.     int min,max, i,k;
  249.     _text_cut(p,"and");
  250.     min=strlen(t_buf[0]); max=strlen(t_buf[1]);
  251.     if(min>max) {
  252.       i=min; min=max; max=i;
  253.     }
  254.     for(i=k=0; i<min; i++) {
  255.       if(maskbuf[i]=='0') continue;
  256.       if((unsigned char)t_buf[0][i]< (unsigned char)t_buf[1][i])
  257.           p[k++]=t_buf[0][i];
  258.       else p[k++]=t_buf[1][i];
  259.     }
  260.     p[k]=0;
  261. }
  262.  
  263. /* extract chars in buf[0] which occur in buf[1]. */
  264. void text_occur(char *p)
  265. {
  266.     int i,j,n;
  267.     char buf[MAX_LINELEN+1];
  268.     memset(buf,0,sizeof(buf));
  269.     _text_cut(p,"in");
  270.     n=strlen(t_buf[1]);
  271.     for(i=0;i<n;i++) {
  272.       char *pp;
  273.       if(maskbuf[i]=='0') continue;
  274.       pp=strchr(t_buf[0],t_buf[1][i]);
  275.       if(pp!=NULL) buf[pp - t_buf[0]]=1;
  276.     }
  277.     n=strlen(t_buf[0]);
  278.     for(i=j=0;i<n;i++) {
  279.       if(buf[i]) p[j++]=t_buf[0][i];
  280.     }
  281.     p[j]=0;
  282. }
  283.  
  284. /* remove characters of buf[1] in buf[0]. */
  285. void text_remove(char *p)
  286. {
  287.     int i, j, n;
  288.     _text_cut(p,"in");
  289.     n=strlen(t_buf[1]);
  290.     for(i=j=0;i<n;i++) {
  291.       if(strchr(t_buf[0],t_buf[1][i])==NULL
  292.          && maskbuf[i]!='0') p[j++]=t_buf[1][i];
  293.     }
  294.     p[j]=0;
  295. }
  296.  
  297. /* Cyclic reordering of text. */
  298. void text_reorder(char *p)
  299. {
  300.     int i,j,k,l,n,t;
  301.     int list[10240];
  302.     char buf[MAX_LINELEN+1];
  303.     _text_cut(p,"by"); *p=0;
  304.     n=itemnum(t_buf[1]); if(n<=0 || n>=10240) return;
  305.     for(i=0;i<n;i++) {
  306.         buf[0]=0; fnd_item(t_buf[1],i+1,buf);
  307.       j=atoi(buf); if(j<=0 || j>n) return;
  308.       list[i]=j;
  309.     }
  310.     t=strlen(t_buf[0]);
  311.     for(i=l=0;l<t && i<t+n;i++) {
  312.       j=i/n; k=j*n+list[i%n];
  313.       if(k>t || k<=0) continue;
  314.       p[l++]=t_buf[0][k-1];
  315.     }
  316.     p[l]=0;
  317. }
  318.  
  319. /* repeat a string to a given length. */
  320. void text_repeat(char *p)
  321. {
  322.     int n,i,k;
  323.     _text_cut(p,"to");
  324.     n=strevalue(t_buf[1]); if(n>MAX_LINELEN) n=MAX_LINELEN;
  325.     if(n<0) n=0;
  326.     k=strlen(t_buf[0]); if(k<=0) {*p=0; return;}
  327.     for(i=0;i<n;i++) {
  328.       p[i]=t_buf[0][i%k];
  329.     }
  330.     p[i]=0;
  331. }
  332.  
  333. /* reverse a string */
  334. void text_reverse(char *p)
  335. {
  336.     int i,n;
  337.     char buf[MAX_LINELEN+1];
  338.     snprintf(t_buf[0],sizeof(t_buf[0]),"%s",p);
  339.     substitute(t_buf[0]);
  340.     n=strlen(t_buf[0]); if(n>MAX_LINELEN) n=MAX_LINELEN;
  341.     for(i=0;i<n;i++) buf[i]=t_buf[0][n-1-i];
  342.     buf[n]=0;
  343.     ovlstrcpy(p,buf);
  344. }
  345.  
  346. /* remove characters of buf[1] not in buf[0]. */
  347. void text_select(char *p)
  348. {
  349.     int i, j, n;
  350.     _text_cut(p,"in");
  351.     n=strlen(t_buf[1]);
  352.     for(i=j=0;i<n;i++) {
  353.       if(strchr(t_buf[0],t_buf[1][i])!=NULL
  354.          && maskbuf[i]!='0') p[j++]=t_buf[1][i];
  355.     }
  356.     p[j]=0;
  357. }
  358.  
  359. /* tag: bit 0 is mask. */
  360. struct {
  361.     char *name;
  362.     int tag;
  363.     void (*routine) (char *p);
  364. } text_proc[]={
  365.       {"appear",    1,  text_occur},
  366.       {"common",    1,  text_common},
  367.       {"compare",   0,  text_compare},
  368.       {"copy",      1,  text_copy},
  369.       {"count",     1,  text_count},
  370.       {"delete",    1,  text_remove},
  371.       {"diff",      1,  text_diff},
  372.       {"differ",    1,  text_diff},
  373.       {"drop",      1,  text_remove},
  374.       {"expand",    0,  text_expand},
  375.       {"extract",   1,  text_select},
  376.       {"insert",    1,  text_insert},
  377.       {"interact",  1,  text_interact},
  378.       {"mark",      0,  text_mark},
  379.       {"max",       1,  text_max},
  380.       {"min",       1,  text_min},
  381.       {"occur",     1,  text_occur},
  382.       {"occurrence",1,  text_occur},
  383.       {"pick",      1,  text_select},
  384.       {"pickup",    1,  text_select},
  385.       {"remove",    1,  text_remove},
  386.       {"reorder",   0,  text_reorder},
  387.       {"repeat",    0,  text_repeat},
  388.       {"reverse",   0,  text_reverse},
  389.       {"select",    1,  text_select}
  390. };
  391. #define TEXT_PROC_NO (sizeof(text_proc)/sizeof(text_proc[0]))
  392.  
  393. int textab_verify(void) {
  394.     return verify_order(text_proc,TEXT_PROC_NO,sizeof(text_proc[0]));
  395. }
  396.  
  397. /* main entry point for text routines */
  398. void text(char *p)
  399. {
  400.     int i,j,n;
  401.     char *p1, *p2;
  402.     char c,cc;
  403.     char buf[MAX_LINELEN+1];
  404.     p1=find_word_start(p); p2=find_word_end(p1);
  405.     if(p2<=p1 || *p2==0) error2("syntax_error");
  406.     *p2=0;
  407.     i=search_list(text_proc,TEXT_PROC_NO,sizeof(text_proc[0]),p1);
  408.     if(i<0) error2("syntax_error");
  409.     snprintf(buf,sizeof(buf),"%s",find_word_start(p2+1));
  410.     if((text_proc[i].tag&1)!=0 && (p1=wordchr(buf,"mask"))!=NULL) {
  411.       *p1=0; strip_trailing_spaces(buf);
  412.       p2=find_word_start(p1+strlen("mask"));
  413.       strip_trailing_spaces(p2);
  414.       snprintf(maskbuf,sizeof(maskbuf),"%s",p2);
  415.       substitute(maskbuf);
  416.       n=strlen(maskbuf); if(n==0) goto zeromask;
  417.       c=maskbuf[n-1]; cc=0;
  418.       if(c=='+') cc='1'; if(c=='-') cc='0';
  419.       if(cc!=0) memset(maskbuf+n-1,cc,sizeof(maskbuf)-n);
  420.       else for(j=n;j<MAX_LINELEN;j++) maskbuf[j]=maskbuf[j%n];
  421.       maskbuf[sizeof(maskbuf)-1]=0;
  422.     }
  423.     else zeromask: memset(maskbuf,0,sizeof(maskbuf));
  424.     text_proc[i].routine(buf);
  425.     buf[MAX_LINELEN]=0;ovlstrcpy(p,buf);
  426. }
  427.