Rev 7676 | Go to most recent revision | Details | Last modification | View Log | RSS feed
Rev | Author | Line No. | Line |
---|---|---|---|
10 | reyssat | 1 | /* Copyright (C) 2002-2003 XIAO, Gang of Universite de Nice - Sophia Antipolis |
2 | * |
||
3 | * This program is free software; you can redistribute it and/or modify |
||
4 | * it under the terms of the GNU General Public License as published by |
||
5 | * the Free Software Foundation; either version 2 of the License, or |
||
6 | * (at your option) any later version. |
||
7 | * |
||
8 | * This program is distributed in the hope that it will be useful, |
||
9 | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
||
10 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
||
11 | * GNU General Public License for more details. |
||
12 | * |
||
13 | * You should have received a copy of the GNU General Public License |
||
14 | * along with this program; if not, write to the Free Software |
||
15 | * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. |
||
16 | */ |
||
17 | |||
18 | /* This program computes an optimal coding of variable lengths |
||
19 | * on a given distribution of probabilities, |
||
20 | * using Huffman algorithm */ |
||
21 | |||
22 | /* Input data: via two environment variables. |
||
23 | * wims_exec_parm is a comma-separated list of probability distributions. |
||
24 | * Limited to MAX_ITEMS. |
||
25 | * The input data will be scaled to unit sum. |
||
26 | * w_huffman_radix is the encoding radix, between 2 and MAX_RADIX. |
||
27 | * |
||
28 | * Output: two lines. |
||
29 | * Line 1: Entropy and Average code length, comma-separated. |
||
30 | * Line 2: comma-separated list of codes. |
||
31 | */ |
||
32 | |||
33 | #define MAX_ITEMS 2048 |
||
34 | #define MAX_CODELEN 100 |
||
35 | |||
36 | const char *codechar="0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz"; |
||
37 | #define MAX_RADIX strlen(codechar) |
||
38 | |||
39 | #include "../Lib/libwims.h" |
||
40 | |||
41 | struct { |
||
42 | double prob; |
||
43 | int ind; |
||
44 | unsigned char code[MAX_CODELEN]; |
||
45 | int codelen; |
||
46 | } maintab[MAX_ITEMS*2]; |
||
47 | int itemcnt, origcnt; |
||
48 | |||
49 | int indtab[MAX_ITEMS]; |
||
50 | int indcnt; |
||
51 | int radix; |
||
52 | double entropy, avelen; |
||
53 | |||
54 | void error(char *msg) |
||
55 | { |
||
56 | fprintf(stderr,"%s\n",msg); |
||
57 | printf("ERROR\n"); |
||
58 | exit(1); |
||
59 | } |
||
60 | |||
61 | int indcmp(const void *p1, const void *p2) |
||
62 | { |
||
63 | const int *i1, *i2; |
||
64 | double d1, d2; |
||
65 | |||
66 | i1=p1; i2=p2; |
||
67 | d1=maintab[*i1].prob; d2=maintab[*i2].prob; |
||
68 | if(d1==d2) return 0; |
||
69 | if(d1>d2) return -1; |
||
70 | else return 1; |
||
71 | } |
||
72 | |||
73 | void huffman(void) |
||
74 | { |
||
75 | int t, i, j, l; |
||
76 | double d; |
||
77 | |||
78 | while(indcnt>radix) { |
||
79 | qsort(indtab,indcnt,sizeof(indtab[0]),indcmp); |
||
80 | if(radix>2) t=(indcnt+radix-3)%(radix-1)+2; |
||
81 | else t=2; |
||
82 | d=0; |
||
83 | for(i=indcnt-t; i<indcnt; i++) { |
||
84 | d+=maintab[indtab[i]].prob; |
||
85 | maintab[indtab[i]].ind=itemcnt; |
||
86 | } |
||
87 | maintab[itemcnt].prob=d; |
||
88 | maintab[itemcnt].ind=-1; |
||
89 | maintab[itemcnt].codelen=-1; |
||
90 | indtab[indcnt-t]=itemcnt; |
||
91 | indcnt-=t-1; itemcnt++; |
||
92 | } |
||
93 | for(i=0;i<indcnt;i++) { |
||
94 | maintab[indtab[i]].codelen=1; |
||
95 | maintab[indtab[i]].code[0]=i; |
||
96 | maintab[indtab[i]].ind=0; |
||
97 | } |
||
98 | for(i=itemcnt-1;i>=0;i--) { |
||
99 | if(maintab[i].codelen>0) continue; |
||
100 | j=maintab[i].ind; l=maintab[j].codelen; |
||
101 | if(l>=MAX_CODELEN) error("Code too long."); |
||
102 | memmove(maintab[i].code,maintab[j].code,l); |
||
103 | maintab[i].code[l]=maintab[j].ind++; |
||
104 | maintab[i].codelen=l+1; |
||
105 | maintab[i].ind=0; |
||
106 | } |
||
107 | } |
||
108 | |||
109 | void output(void) |
||
110 | { |
||
111 | int i, j; |
||
112 | double d; |
||
113 | |||
114 | d=0; |
||
115 | for(i=0;i<origcnt;i++) d+=maintab[i].prob*maintab[i].codelen; |
||
116 | printf("%f,%f\n",entropy,d); |
||
117 | for(i=0;i<origcnt;i++) { |
||
118 | for(j=0;j<maintab[i].codelen;j++) printf("%c",codechar[(int) maintab[i].code[j]]); |
||
119 | if(i<origcnt-1) printf(","); |
||
120 | else printf("\n"); |
||
121 | } |
||
122 | } |
||
123 | |||
124 | void getparm(char *p) |
||
125 | { |
||
126 | int i; |
||
127 | char *p1, *p2; |
||
128 | double d1, dt; |
||
129 | |||
130 | origcnt=0; dt=0; |
||
131 | for(p1=find_word_start(p); |
||
132 | *p1; p1=find_word_start(p2)) { |
||
133 | for(p2=p1; *p2 && strchr(",;",*p2)==NULL; p2++); |
||
134 | if(*p2) *p2++=0; |
||
135 | d1=strevalue(p1); |
||
136 | if(!finite(d1) || d1<0) { |
||
137 | char buf[256]; |
||
138 | snprintf(buf,sizeof(buf),"Bad data: %s",p1); |
||
139 | error(buf); |
||
140 | } |
||
141 | maintab[origcnt++].prob=d1; |
||
142 | dt+=d1; |
||
143 | } |
||
144 | if(dt*1000000<1) error("Empty data sum."); |
||
145 | if(origcnt<2) error("Insufficient data for encoding."); |
||
146 | itemcnt=origcnt; |
||
147 | if(dt!=1) for(i=0; i<origcnt; i++) maintab[i].prob/=dt; |
||
148 | |||
149 | entropy=0; |
||
150 | for(i=0;i<origcnt;i++) { |
||
151 | maintab[i].codelen=-1; maintab[i].ind=-1; |
||
152 | indtab[i]=i; |
||
153 | d1=maintab[i].prob; |
||
154 | if(d1>0) entropy-=d1*log(d1); |
||
155 | } |
||
156 | entropy=entropy/log(radix); |
||
157 | indcnt=origcnt; |
||
158 | } |
||
159 | |||
160 | int main() |
||
161 | { |
||
162 | char *p; |
||
163 | int r; |
||
164 | |||
165 | error1=error; error2=error; error3=error; |
||
166 | p=getenv("w_huffman_radix"); |
||
167 | if(p==NULL || *p==0) p=getenv("huffman_radix"); |
||
168 | if(p==NULL || *p==0) radix=2; |
||
169 | else { |
||
170 | r=atoi(p); if(r!=0) radix=r; |
||
171 | } |
||
172 | if(radix<2 || radix>MAX_RADIX) error("Bad radix."); |
||
173 | |||
174 | p=getenv("wims_exec_parm"); |
||
175 | if(p==NULL || *p==0) error("No input data."); |
||
176 | getparm(p); |
||
177 | |||
178 | huffman(); |
||
179 | output(); |
||
180 | |||
181 | return 0; |
||
182 | } |
||
183 |