Subversion Repositories wimsdev

Rev

Rev 8171 | Go to most recent revision | Blame | Compare with Previous | Last modification | View Log | RSS feed

  1. /*    Copyright (C) 2002-2003 XIAO, Gang of Universite de Nice - Sophia Antipolis
  2.  *
  3.  *  This program is free software; you can redistribute it and/or modify
  4.  *  it under the terms of the GNU General Public License as published by
  5.  *  the Free Software Foundation; either version 2 of the License, or
  6.  *  (at your option) any later version.
  7.  *
  8.  *  This program is distributed in the hope that it will be useful,
  9.  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
  10.  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  11.  *  GNU General Public License for more details.
  12.  *
  13.  *  You should have received a copy of the GNU General Public License
  14.  *  along with this program; if not, write to the Free Software
  15.  *  Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  16.  */
  17.  
  18. /* This program computes an optimal coding of variable lengths
  19.  * on a given distribution of probabilities,
  20.  * using Huffman algorithm */
  21.  
  22. /* Input data: via two environment variables.
  23.  * wims_exec_parm is a comma-separated list of probability distributions.
  24.  * Limited to MAX_ITEMS.
  25.  * The input data will be scaled to unit sum.
  26.  * w_huffman_radix is the encoding radix, between 2 and MAX_RADIX.
  27.  *
  28.  * Output: two lines.
  29.  * Line 1: Entropy and Average code length, comma-separated.
  30.  * Line 2: comma-separated list of codes.
  31.  */
  32.  
  33. #define MAX_ITEMS 2048
  34. #define MAX_CODELEN 100
  35.  
  36. const char *codechar="0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz";
  37. #define MAX_RADIX strlen(codechar)
  38.  
  39. #include "../Lib/libwims.h"
  40.  
  41. struct {
  42.     double prob;
  43.     int ind;
  44.     unsigned char code[MAX_CODELEN];
  45.     int codelen;
  46. } maintab[MAX_ITEMS*2];
  47. int itemcnt, origcnt;
  48.  
  49. int indtab[MAX_ITEMS];
  50. int indcnt;
  51. int radix;
  52. double entropy, avelen;
  53.  
  54.  
  55. int indcmp(const void *p1, const void *p2)
  56. {
  57.     const int *i1, *i2;
  58.     double d1, d2;
  59.  
  60.     i1=p1; i2=p2;
  61.     d1=maintab[*i1].prob; d2=maintab[*i2].prob;
  62.     if(d1==d2) return 0;
  63.     if(d1>d2) return -1;
  64.     else return 1;
  65. }
  66.  
  67. void huffman(void)
  68. {
  69.     int t, i, j, l;
  70.     double d;
  71.  
  72.     while(indcnt>radix) {
  73.       qsort(indtab,indcnt,sizeof(indtab[0]),indcmp);
  74.       if(radix>2) t=(indcnt+radix-3)%(radix-1)+2;
  75.       else t=2;
  76.       d=0;
  77.       for(i=indcnt-t; i<indcnt; i++) {
  78.           d+=maintab[indtab[i]].prob;
  79.           maintab[indtab[i]].ind=itemcnt;
  80.       }
  81.       maintab[itemcnt].prob=d;
  82.       maintab[itemcnt].ind=-1;
  83.       maintab[itemcnt].codelen=-1;
  84.       indtab[indcnt-t]=itemcnt;
  85.       indcnt-=t-1; itemcnt++;
  86.     }
  87.     for(i=0;i<indcnt;i++) {
  88.       maintab[indtab[i]].codelen=1;
  89.       maintab[indtab[i]].code[0]=i;
  90.       maintab[indtab[i]].ind=0;
  91.     }
  92.     for(i=itemcnt-1;i>=0;i--) {
  93.       if(maintab[i].codelen>0) continue;
  94.       j=maintab[i].ind; l=maintab[j].codelen;
  95.       if(l>=MAX_CODELEN) error("Code too long.");
  96.       memmove(maintab[i].code,maintab[j].code,l);
  97.       maintab[i].code[l]=maintab[j].ind++;
  98.       maintab[i].codelen=l+1;
  99.       maintab[i].ind=0;
  100.     }
  101. }
  102.  
  103. void output(void)
  104. {
  105.     int i, j;
  106.     double d;
  107.  
  108.     d=0;
  109.     for(i=0;i<origcnt;i++) d+=maintab[i].prob*maintab[i].codelen;
  110.     printf("%f,%f\n",entropy,d);
  111.     for(i=0;i<origcnt;i++) {
  112.       for(j=0;j<maintab[i].codelen;j++) printf("%c",codechar[(int) maintab[i].code[j]]);
  113.       if(i<origcnt-1) printf(",");
  114.       else printf("\n");
  115.     }
  116. }
  117.  
  118. void getparm(char *p)
  119. {
  120.     int i;
  121.     char *p1, *p2;
  122.     double d1, dt;
  123.  
  124.     origcnt=0; dt=0;
  125.     for(p1=find_word_start(p);
  126.       *p1; p1=find_word_start(p2)) {
  127.       for(p2=p1; *p2 && strchr(",;",*p2)==NULL; p2++);
  128.       if(*p2) *p2++=0;
  129.       d1=strevalue(p1);
  130.       if(!isfinite(d1) || d1<0) {
  131.           char buf[256];
  132.           snprintf(buf,sizeof(buf),"Bad data: %s",p1);
  133.           error(buf);
  134.       }
  135.       maintab[origcnt++].prob=d1;
  136.       dt+=d1;
  137.     }
  138.     if(dt*1000000<1) error("Empty data sum.");
  139.     if(origcnt<2) error("Insufficient data for encoding.");
  140.     itemcnt=origcnt;
  141.     if(dt!=1) for(i=0; i<origcnt; i++) maintab[i].prob/=dt;
  142.  
  143.     entropy=0;
  144.     for(i=0;i<origcnt;i++) {
  145.       maintab[i].codelen=-1; maintab[i].ind=-1;
  146.       indtab[i]=i;
  147.       d1=maintab[i].prob;
  148.       if(d1>0) entropy-=d1*log(d1);
  149.     }
  150.     entropy=entropy/log(radix);
  151.     indcnt=origcnt;
  152. }
  153.  
  154. int main()
  155. {
  156.     char *p;
  157.     int r;
  158.  
  159.     p=getenv("w_huffman_radix");
  160.     if(p==NULL || *p==0) p=getenv("huffman_radix");
  161.     if(p==NULL || *p==0) radix=2;
  162.     else {
  163.       r=atoi(p); if(r!=0) radix=r;
  164.     }
  165.     if(radix<2 || radix>MAX_RADIX) error("Bad radix.");
  166.  
  167.     p=getenv("wims_exec_parm");
  168.     if(p==NULL || *p==0) error("No input data.");
  169.     getparm(p);
  170.  
  171.     huffman();
  172.     output();
  173.  
  174.     return 0;
  175. }
  176.  
  177.