Rev 8171 | Go to most recent revision | Details | Compare with Previous | Last modification | View Log | RSS feed
Rev | Author | Line No. | Line |
---|---|---|---|
10 | reyssat | 1 | /* Copyright (C) 2002-2003 XIAO, Gang of Universite de Nice - Sophia Antipolis |
2 | * |
||
3 | * This program is free software; you can redistribute it and/or modify |
||
4 | * it under the terms of the GNU General Public License as published by |
||
5 | * the Free Software Foundation; either version 2 of the License, or |
||
6 | * (at your option) any later version. |
||
7 | * |
||
8 | * This program is distributed in the hope that it will be useful, |
||
9 | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
||
10 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
||
11 | * GNU General Public License for more details. |
||
12 | * |
||
13 | * You should have received a copy of the GNU General Public License |
||
14 | * along with this program; if not, write to the Free Software |
||
15 | * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. |
||
16 | */ |
||
17 | |||
7676 | bpr | 18 | /* This program computes an optimal coding of variable lengths |
19 | * on a given distribution of probabilities, |
||
20 | * using Huffman algorithm */ |
||
10 | reyssat | 21 | |
22 | /* Input data: via two environment variables. |
||
23 | * wims_exec_parm is a comma-separated list of probability distributions. |
||
7676 | bpr | 24 | * Limited to MAX_ITEMS. |
25 | * The input data will be scaled to unit sum. |
||
10 | reyssat | 26 | * w_huffman_radix is the encoding radix, between 2 and MAX_RADIX. |
7676 | bpr | 27 | * |
10 | reyssat | 28 | * Output: two lines. |
29 | * Line 1: Entropy and Average code length, comma-separated. |
||
30 | * Line 2: comma-separated list of codes. |
||
31 | */ |
||
32 | |||
7676 | bpr | 33 | #define MAX_ITEMS 2048 |
34 | #define MAX_CODELEN 100 |
||
10 | reyssat | 35 | |
36 | const char *codechar="0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz"; |
||
7676 | bpr | 37 | #define MAX_RADIX strlen(codechar) |
10 | reyssat | 38 | |
39 | #include "../Lib/libwims.h" |
||
40 | |||
41 | struct { |
||
42 | double prob; |
||
43 | int ind; |
||
44 | unsigned char code[MAX_CODELEN]; |
||
45 | int codelen; |
||
46 | } maintab[MAX_ITEMS*2]; |
||
47 | int itemcnt, origcnt; |
||
48 | |||
49 | int indtab[MAX_ITEMS]; |
||
50 | int indcnt; |
||
51 | int radix; |
||
52 | double entropy, avelen; |
||
53 | |||
54 | |||
55 | int indcmp(const void *p1, const void *p2) |
||
56 | { |
||
57 | const int *i1, *i2; |
||
58 | double d1, d2; |
||
7676 | bpr | 59 | |
10 | reyssat | 60 | i1=p1; i2=p2; |
61 | d1=maintab[*i1].prob; d2=maintab[*i2].prob; |
||
62 | if(d1==d2) return 0; |
||
63 | if(d1>d2) return -1; |
||
64 | else return 1; |
||
65 | } |
||
66 | |||
67 | void huffman(void) |
||
68 | { |
||
69 | int t, i, j, l; |
||
70 | double d; |
||
7676 | bpr | 71 | |
10 | reyssat | 72 | while(indcnt>radix) { |
7676 | bpr | 73 | qsort(indtab,indcnt,sizeof(indtab[0]),indcmp); |
74 | if(radix>2) t=(indcnt+radix-3)%(radix-1)+2; |
||
75 | else t=2; |
||
76 | d=0; |
||
77 | for(i=indcnt-t; i<indcnt; i++) { |
||
78 | d+=maintab[indtab[i]].prob; |
||
79 | maintab[indtab[i]].ind=itemcnt; |
||
80 | } |
||
81 | maintab[itemcnt].prob=d; |
||
82 | maintab[itemcnt].ind=-1; |
||
83 | maintab[itemcnt].codelen=-1; |
||
84 | indtab[indcnt-t]=itemcnt; |
||
85 | indcnt-=t-1; itemcnt++; |
||
10 | reyssat | 86 | } |
87 | for(i=0;i<indcnt;i++) { |
||
7676 | bpr | 88 | maintab[indtab[i]].codelen=1; |
89 | maintab[indtab[i]].code[0]=i; |
||
90 | maintab[indtab[i]].ind=0; |
||
10 | reyssat | 91 | } |
92 | for(i=itemcnt-1;i>=0;i--) { |
||
7676 | bpr | 93 | if(maintab[i].codelen>0) continue; |
94 | j=maintab[i].ind; l=maintab[j].codelen; |
||
95 | if(l>=MAX_CODELEN) error("Code too long."); |
||
96 | memmove(maintab[i].code,maintab[j].code,l); |
||
97 | maintab[i].code[l]=maintab[j].ind++; |
||
98 | maintab[i].codelen=l+1; |
||
99 | maintab[i].ind=0; |
||
10 | reyssat | 100 | } |
101 | } |
||
102 | |||
103 | void output(void) |
||
104 | { |
||
105 | int i, j; |
||
106 | double d; |
||
107 | |||
108 | d=0; |
||
109 | for(i=0;i<origcnt;i++) d+=maintab[i].prob*maintab[i].codelen; |
||
110 | printf("%f,%f\n",entropy,d); |
||
111 | for(i=0;i<origcnt;i++) { |
||
7676 | bpr | 112 | for(j=0;j<maintab[i].codelen;j++) printf("%c",codechar[(int) maintab[i].code[j]]); |
113 | if(i<origcnt-1) printf(","); |
||
114 | else printf("\n"); |
||
10 | reyssat | 115 | } |
116 | } |
||
117 | |||
118 | void getparm(char *p) |
||
119 | { |
||
120 | int i; |
||
121 | char *p1, *p2; |
||
122 | double d1, dt; |
||
123 | |||
124 | origcnt=0; dt=0; |
||
125 | for(p1=find_word_start(p); |
||
7676 | bpr | 126 | *p1; p1=find_word_start(p2)) { |
127 | for(p2=p1; *p2 && strchr(",;",*p2)==NULL; p2++); |
||
128 | if(*p2) *p2++=0; |
||
129 | d1=strevalue(p1); |
||
8171 | bpr | 130 | if(!isfinite(d1) || d1<0) { |
7676 | bpr | 131 | char buf[256]; |
132 | snprintf(buf,sizeof(buf),"Bad data: %s",p1); |
||
133 | error(buf); |
||
134 | } |
||
135 | maintab[origcnt++].prob=d1; |
||
136 | dt+=d1; |
||
10 | reyssat | 137 | } |
138 | if(dt*1000000<1) error("Empty data sum."); |
||
139 | if(origcnt<2) error("Insufficient data for encoding."); |
||
140 | itemcnt=origcnt; |
||
141 | if(dt!=1) for(i=0; i<origcnt; i++) maintab[i].prob/=dt; |
||
7676 | bpr | 142 | |
10 | reyssat | 143 | entropy=0; |
144 | for(i=0;i<origcnt;i++) { |
||
7676 | bpr | 145 | maintab[i].codelen=-1; maintab[i].ind=-1; |
146 | indtab[i]=i; |
||
147 | d1=maintab[i].prob; |
||
148 | if(d1>0) entropy-=d1*log(d1); |
||
10 | reyssat | 149 | } |
150 | entropy=entropy/log(radix); |
||
151 | indcnt=origcnt; |
||
152 | } |
||
153 | |||
154 | int main() |
||
155 | { |
||
156 | char *p; |
||
157 | int r; |
||
7676 | bpr | 158 | |
10 | reyssat | 159 | p=getenv("w_huffman_radix"); |
160 | if(p==NULL || *p==0) p=getenv("huffman_radix"); |
||
161 | if(p==NULL || *p==0) radix=2; |
||
162 | else { |
||
7676 | bpr | 163 | r=atoi(p); if(r!=0) radix=r; |
10 | reyssat | 164 | } |
165 | if(radix<2 || radix>MAX_RADIX) error("Bad radix."); |
||
7676 | bpr | 166 | |
10 | reyssat | 167 | p=getenv("wims_exec_parm"); |
168 | if(p==NULL || *p==0) error("No input data."); |
||
169 | getparm(p); |
||
7676 | bpr | 170 | |
10 | reyssat | 171 | huffman(); |
172 | output(); |
||
7676 | bpr | 173 | |
10 | reyssat | 174 | return 0; |
175 | } |
||
176 |