Subversion Repositories wimsdev

Rev

Rev 7676 | Rev 8195 | Go to most recent revision | Blame | Compare with Previous | Last modification | View Log | RSS feed

  1. /*    Copyright (C) 2002-2003 XIAO, Gang of Universite de Nice - Sophia Antipolis
  2.  *
  3.  *  This program is free software; you can redistribute it and/or modify
  4.  *  it under the terms of the GNU General Public License as published by
  5.  *  the Free Software Foundation; either version 2 of the License, or
  6.  *  (at your option) any later version.
  7.  *
  8.  *  This program is distributed in the hope that it will be useful,
  9.  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
  10.  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  11.  *  GNU General Public License for more details.
  12.  *
  13.  *  You should have received a copy of the GNU General Public License
  14.  *  along with this program; if not, write to the Free Software
  15.  *  Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  16.  */
  17.  
  18. /* This program computes an optimal coding of variable lengths
  19.  * on a given distribution of probabilities,
  20.  * using Huffman algorithm */
  21.  
  22. /* Input data: via two environment variables.
  23.  * wims_exec_parm is a comma-separated list of probability distributions.
  24.  * Limited to MAX_ITEMS.
  25.  * The input data will be scaled to unit sum.
  26.  * w_huffman_radix is the encoding radix, between 2 and MAX_RADIX.
  27.  *
  28.  * Output: two lines.
  29.  * Line 1: Entropy and Average code length, comma-separated.
  30.  * Line 2: comma-separated list of codes.
  31.  */
  32.  
  33. #define MAX_ITEMS 2048
  34. #define MAX_CODELEN 100
  35.  
  36. const char *codechar="0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz";
  37. #define MAX_RADIX strlen(codechar)
  38.  
  39. #include "../Lib/libwims.h"
  40.  
  41. struct {
  42.     double prob;
  43.     int ind;
  44.     unsigned char code[MAX_CODELEN];
  45.     int codelen;
  46. } maintab[MAX_ITEMS*2];
  47. int itemcnt, origcnt;
  48.  
  49. int indtab[MAX_ITEMS];
  50. int indcnt;
  51. int radix;
  52. double entropy, avelen;
  53.  
  54. void error(char *msg)
  55. {
  56.     fprintf(stderr,"%s\n",msg);
  57.     printf("ERROR\n");
  58.     exit(1);
  59. }
  60.  
  61. int indcmp(const void *p1, const void *p2)
  62. {
  63.     const int *i1, *i2;
  64.     double d1, d2;
  65.  
  66.     i1=p1; i2=p2;
  67.     d1=maintab[*i1].prob; d2=maintab[*i2].prob;
  68.     if(d1==d2) return 0;
  69.     if(d1>d2) return -1;
  70.     else return 1;
  71. }
  72.  
  73. void huffman(void)
  74. {
  75.     int t, i, j, l;
  76.     double d;
  77.  
  78.     while(indcnt>radix) {
  79.       qsort(indtab,indcnt,sizeof(indtab[0]),indcmp);
  80.       if(radix>2) t=(indcnt+radix-3)%(radix-1)+2;
  81.       else t=2;
  82.       d=0;
  83.       for(i=indcnt-t; i<indcnt; i++) {
  84.           d+=maintab[indtab[i]].prob;
  85.           maintab[indtab[i]].ind=itemcnt;
  86.       }
  87.       maintab[itemcnt].prob=d;
  88.       maintab[itemcnt].ind=-1;
  89.       maintab[itemcnt].codelen=-1;
  90.       indtab[indcnt-t]=itemcnt;
  91.       indcnt-=t-1; itemcnt++;
  92.     }
  93.     for(i=0;i<indcnt;i++) {
  94.       maintab[indtab[i]].codelen=1;
  95.       maintab[indtab[i]].code[0]=i;
  96.       maintab[indtab[i]].ind=0;
  97.     }
  98.     for(i=itemcnt-1;i>=0;i--) {
  99.       if(maintab[i].codelen>0) continue;
  100.       j=maintab[i].ind; l=maintab[j].codelen;
  101.       if(l>=MAX_CODELEN) error("Code too long.");
  102.       memmove(maintab[i].code,maintab[j].code,l);
  103.       maintab[i].code[l]=maintab[j].ind++;
  104.       maintab[i].codelen=l+1;
  105.       maintab[i].ind=0;
  106.     }
  107. }
  108.  
  109. void output(void)
  110. {
  111.     int i, j;
  112.     double d;
  113.  
  114.     d=0;
  115.     for(i=0;i<origcnt;i++) d+=maintab[i].prob*maintab[i].codelen;
  116.     printf("%f,%f\n",entropy,d);
  117.     for(i=0;i<origcnt;i++) {
  118.       for(j=0;j<maintab[i].codelen;j++) printf("%c",codechar[(int) maintab[i].code[j]]);
  119.       if(i<origcnt-1) printf(",");
  120.       else printf("\n");
  121.     }
  122. }
  123.  
  124. void getparm(char *p)
  125. {
  126.     int i;
  127.     char *p1, *p2;
  128.     double d1, dt;
  129.  
  130.     origcnt=0; dt=0;
  131.     for(p1=find_word_start(p);
  132.       *p1; p1=find_word_start(p2)) {
  133.       for(p2=p1; *p2 && strchr(",;",*p2)==NULL; p2++);
  134.       if(*p2) *p2++=0;
  135.       d1=strevalue(p1);
  136.       if(!isfinite(d1) || d1<0) {
  137.           char buf[256];
  138.           snprintf(buf,sizeof(buf),"Bad data: %s",p1);
  139.           error(buf);
  140.       }
  141.       maintab[origcnt++].prob=d1;
  142.       dt+=d1;
  143.     }
  144.     if(dt*1000000<1) error("Empty data sum.");
  145.     if(origcnt<2) error("Insufficient data for encoding.");
  146.     itemcnt=origcnt;
  147.     if(dt!=1) for(i=0; i<origcnt; i++) maintab[i].prob/=dt;
  148.  
  149.     entropy=0;
  150.     for(i=0;i<origcnt;i++) {
  151.       maintab[i].codelen=-1; maintab[i].ind=-1;
  152.       indtab[i]=i;
  153.       d1=maintab[i].prob;
  154.       if(d1>0) entropy-=d1*log(d1);
  155.     }
  156.     entropy=entropy/log(radix);
  157.     indcnt=origcnt;
  158. }
  159.  
  160. int main()
  161. {
  162.     char *p;
  163.     int r;
  164.  
  165.     error1=error; error2=error; error3=error;
  166.     p=getenv("w_huffman_radix");
  167.     if(p==NULL || *p==0) p=getenv("huffman_radix");
  168.     if(p==NULL || *p==0) radix=2;
  169.     else {
  170.       r=atoi(p); if(r!=0) radix=r;
  171.     }
  172.     if(radix<2 || radix>MAX_RADIX) error("Bad radix.");
  173.  
  174.     p=getenv("wims_exec_parm");
  175.     if(p==NULL || *p==0) error("No input data.");
  176.     getparm(p);
  177.  
  178.     huffman();
  179.     output();
  180.  
  181.     return 0;
  182. }
  183.  
  184.