Subversion Repositories wimsdev

Rev

Rev 7676 | Go to most recent revision | Blame | Compare with Previous | Last modification | View Log | RSS feed

  1. /*    Copyright (C) 2002-2003 XIAO, Gang of Universite de Nice - Sophia Antipolis
  2.  *
  3.  *  This program is free software; you can redistribute it and/or modify
  4.  *  it under the terms of the GNU General Public License as published by
  5.  *  the Free Software Foundation; either version 2 of the License, or
  6.  *  (at your option) any later version.
  7.  *
  8.  *  This program is distributed in the hope that it will be useful,
  9.  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
  10.  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  11.  *  GNU General Public License for more details.
  12.  *
  13.  *  You should have received a copy of the GNU General Public License
  14.  *  along with this program; if not, write to the Free Software
  15.  *  Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  16.  */
  17.  
  18.         /* This program computes an optimal coding of variable lengths
  19.          * on a given distribution of probabilities,
  20.          * using Huffman algorithm */
  21.  
  22. /* Input data: via two environment variables.
  23.  * wims_exec_parm is a comma-separated list of probability distributions.
  24.  *      Limited to MAX_ITEMS.
  25.  *      The input data will be scaled to unit sum.
  26.  * w_huffman_radix is the encoding radix, between 2 and MAX_RADIX.
  27.  *
  28.  * Output: two lines.
  29.  * Line 1: Entropy and Average code length, comma-separated.
  30.  * Line 2: comma-separated list of codes.
  31.  */
  32.  
  33. #define MAX_ITEMS       2048
  34. #define MAX_CODELEN     100
  35.  
  36. const char *codechar="0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz";
  37. #define MAX_RADIX       strlen(codechar)
  38.  
  39. #include "../Lib/libwims.h"
  40.  
  41. struct {
  42.     double prob;
  43.     int ind;
  44.     unsigned char code[MAX_CODELEN];
  45.     int codelen;
  46. } maintab[MAX_ITEMS*2];
  47. int itemcnt, origcnt;
  48.  
  49. int indtab[MAX_ITEMS];
  50. int indcnt;
  51. int radix;
  52. double entropy, avelen;
  53.  
  54. void error(char *msg)
  55. {
  56.     fprintf(stderr,"%s\n",msg);
  57.     printf("ERROR\n");
  58.     exit(1);
  59. }
  60.  
  61. int indcmp(const void *p1, const void *p2)
  62. {
  63.     const int *i1, *i2;
  64.     double d1, d2;
  65.    
  66.     i1=p1; i2=p2;
  67.     d1=maintab[*i1].prob; d2=maintab[*i2].prob;
  68.     if(d1==d2) return 0;
  69.     if(d1>d2) return -1;
  70.     else return 1;
  71. }
  72.  
  73. void huffman(void)
  74. {
  75.     int t, i, j, l;
  76.     double d;
  77.    
  78.     while(indcnt>radix) {
  79.         qsort(indtab,indcnt,sizeof(indtab[0]),indcmp);
  80.         if(radix>2) t=(indcnt+radix-3)%(radix-1)+2;
  81.         else t=2;
  82.         d=0;
  83.         for(i=indcnt-t; i<indcnt; i++) {
  84.             d+=maintab[indtab[i]].prob;
  85.             maintab[indtab[i]].ind=itemcnt;
  86.         }
  87.         maintab[itemcnt].prob=d;
  88.         maintab[itemcnt].ind=-1;
  89.         maintab[itemcnt].codelen=-1;
  90.         indtab[indcnt-t]=itemcnt;
  91.         indcnt-=t-1; itemcnt++;
  92.     }
  93.     for(i=0;i<indcnt;i++) {
  94.         maintab[indtab[i]].codelen=1;
  95.         maintab[indtab[i]].code[0]=i;
  96.         maintab[indtab[i]].ind=0;
  97.     }
  98.     for(i=itemcnt-1;i>=0;i--) {
  99.         if(maintab[i].codelen>0) continue;
  100.         j=maintab[i].ind; l=maintab[j].codelen;
  101.         if(l>=MAX_CODELEN) error("Code too long.");
  102.         memmove(maintab[i].code,maintab[j].code,l);
  103.         maintab[i].code[l]=maintab[j].ind++;
  104.         maintab[i].codelen=l+1;
  105.         maintab[i].ind=0;
  106.     }
  107. }
  108.  
  109. void output(void)
  110. {
  111.     int i, j;
  112.     double d;
  113.  
  114.     d=0;
  115.     for(i=0;i<origcnt;i++) d+=maintab[i].prob*maintab[i].codelen;
  116.     printf("%f,%f\n",entropy,d);
  117.     for(i=0;i<origcnt;i++) {
  118.         for(j=0;j<maintab[i].codelen;j++) printf("%c",codechar[(int) maintab[i].code[j]]);
  119.         if(i<origcnt-1) printf(",");
  120.         else printf("\n");
  121.     }
  122. }
  123.  
  124. void getparm(char *p)
  125. {
  126.     int i;
  127.     char *p1, *p2;
  128.     double d1, dt;
  129.  
  130.     origcnt=0; dt=0;
  131.     for(p1=find_word_start(p);
  132.         *p1; p1=find_word_start(p2)) {
  133.         for(p2=p1; *p2 && strchr(",;",*p2)==NULL; p2++);
  134.         if(*p2) *p2++=0;
  135.         d1=strevalue(p1);
  136.         if(!finite(d1) || d1<0) {
  137.             char buf[256];
  138.             snprintf(buf,sizeof(buf),"Bad data: %s",p1);
  139.             error(buf);
  140.         }
  141.         maintab[origcnt++].prob=d1;
  142.         dt+=d1;
  143.     }
  144.     if(dt*1000000<1) error("Empty data sum.");
  145.     if(origcnt<2) error("Insufficient data for encoding.");
  146.     itemcnt=origcnt;
  147.     if(dt!=1) for(i=0; i<origcnt; i++) maintab[i].prob/=dt;
  148.    
  149.     entropy=0;
  150.     for(i=0;i<origcnt;i++) {
  151.         maintab[i].codelen=-1; maintab[i].ind=-1;
  152.         indtab[i]=i;
  153.         d1=maintab[i].prob;
  154.         if(d1>0) entropy-=d1*log(d1);
  155.     }
  156.     entropy=entropy/log(radix);
  157.     indcnt=origcnt;
  158. }
  159.  
  160. int main()
  161. {
  162.     char *p;
  163.     int r;
  164.    
  165.     error1=error; error2=error; error3=error;
  166.     p=getenv("w_huffman_radix");
  167.     if(p==NULL || *p==0) p=getenv("huffman_radix");
  168.     if(p==NULL || *p==0) radix=2;
  169.     else {
  170.         r=atoi(p); if(r!=0) radix=r;
  171.     }
  172.     if(radix<2 || radix>MAX_RADIX) error("Bad radix.");
  173.    
  174.     p=getenv("wims_exec_parm");
  175.     if(p==NULL || *p==0) error("No input data.");
  176.     getparm(p);
  177.    
  178.     huffman();
  179.     output();
  180.    
  181.     return 0;
  182. }
  183.  
  184.