Rev 8195 | Show entire file | Ignore whitespace | Details | Blame | Last modification | View Log | RSS feed
Rev 8195 | Rev 12248 | ||
---|---|---|---|
Line 37... | Line 37... | ||
37 | #define MAX_RADIX strlen(codechar) |
37 | #define MAX_RADIX strlen(codechar) |
38 | 38 | ||
39 | #include "../Lib/libwims.h" |
39 | #include "../Lib/libwims.h" |
40 | 40 | ||
41 | struct { |
41 | struct { |
42 |
|
42 | double prob; |
43 |
|
43 | int ind; |
44 |
|
44 | unsigned char code[MAX_CODELEN]; |
45 |
|
45 | int codelen; |
46 | } maintab[MAX_ITEMS*2]; |
46 | } maintab[MAX_ITEMS*2]; |
47 | int itemcnt, origcnt; |
47 | int itemcnt, origcnt; |
48 | 48 | ||
49 | int indtab[MAX_ITEMS]; |
49 | int indtab[MAX_ITEMS]; |
50 | int indcnt; |
50 | int indcnt; |
Line 52... | Line 52... | ||
52 | double entropy, avelen; |
52 | double entropy, avelen; |
53 | 53 | ||
54 | 54 | ||
55 | int indcmp(const void *p1, const void *p2) |
55 | int indcmp(const void *p1, const void *p2) |
56 | { |
56 | { |
57 |
|
57 | const int *i1, *i2; |
58 |
|
58 | double d1, d2; |
59 | 59 | ||
60 |
|
60 | i1=p1; i2=p2; |
61 |
|
61 | d1=maintab[*i1].prob; d2=maintab[*i2].prob; |
62 |
|
62 | if(d1==d2) return 0; |
63 |
|
63 | if(d1>d2) return -1; |
64 |
|
64 | else return 1; |
65 | } |
65 | } |
66 | 66 | ||
67 | void huffman(void) |
67 | void huffman(void) |
68 | { |
68 | { |
69 |
|
69 | int t, i, j, l; |
70 |
|
70 | double d; |
71 | 71 | ||
72 |
|
72 | while(indcnt>radix) { |
73 |
|
73 | qsort(indtab,indcnt,sizeof(indtab[0]),indcmp); |
74 |
|
74 | if(radix>2) t=(indcnt+radix-3)%(radix-1)+2; |
75 |
|
75 | else t=2; |
76 |
|
76 | d=0; |
77 |
|
77 | for(i=indcnt-t; i<indcnt; i++) { |
78 |
|
78 | d+=maintab[indtab[i]].prob; |
79 |
|
79 | maintab[indtab[i]].ind=itemcnt; |
80 |
|
80 | } |
81 |
|
81 | maintab[itemcnt].prob=d; |
82 |
|
82 | maintab[itemcnt].ind=-1; |
83 |
|
83 | maintab[itemcnt].codelen=-1; |
84 |
|
84 | indtab[indcnt-t]=itemcnt; |
85 |
|
85 | indcnt-=t-1; itemcnt++; |
86 |
|
86 | } |
87 |
|
87 | for(i=0;i<indcnt;i++) { |
88 |
|
88 | maintab[indtab[i]].codelen=1; |
89 |
|
89 | maintab[indtab[i]].code[0]=i; |
90 |
|
90 | maintab[indtab[i]].ind=0; |
91 |
|
91 | } |
92 |
|
92 | for(i=itemcnt-1;i>=0;i--) { |
93 |
|
93 | if(maintab[i].codelen>0) continue; |
94 |
|
94 | j=maintab[i].ind; l=maintab[j].codelen; |
95 |
|
95 | if(l>=MAX_CODELEN) error("Code too long."); |
96 |
|
96 | memmove(maintab[i].code,maintab[j].code,l); |
97 |
|
97 | maintab[i].code[l]=maintab[j].ind++; |
98 |
|
98 | maintab[i].codelen=l+1; |
99 |
|
99 | maintab[i].ind=0; |
100 |
|
100 | } |
101 | } |
101 | } |
102 | 102 | ||
103 | void output(void) |
103 | void output(void) |
104 | { |
104 | { |
105 |
|
105 | int i, j; |
106 |
|
106 | double d; |
- | 107 | ||
- | 108 | d=0; |
|
- | 109 | for(i=0;i<origcnt;i++) d+=maintab[i].prob*maintab[i].codelen; |
|
- | 110 | printf("%f,%f\n",entropy,d); |
|
- | 111 | for(i=0;i<origcnt;i++) { |
|
- | 112 | for(j=0;j<maintab[i].codelen;j++) printf("%c",codechar[(int) maintab[i].code[j]]); |
|
- | 113 | if(i<origcnt-1) printf(","); |
|
- | 114 | else printf("\n"); |
|
- | 115 | } |
|
- | 116 | } |
|
- | 117 | ||
- | 118 | void getparm(char *p) |
|
- | 119 | { |
|
- | 120 | int i; |
|
- | 121 | char *p1, *p2; |
|
- | 122 | double d1, dt; |
|
107 | 123 | ||
108 | d=0; |
- | |
109 | for(i=0;i<origcnt;i++) d+=maintab[i].prob*maintab[i].codelen; |
- | |
110 | printf("%f,%f\n",entropy,d); |
- | |
111 | for(i=0;i<origcnt;i++) { |
- | |
112 | for(j=0;j<maintab[i].codelen;j++) printf("%c",codechar[(int) maintab[i].code[j]]); |
- | |
113 | if(i<origcnt-1) printf(","); |
- | |
114 | else printf("\n"); |
- | |
115 | } |
- | |
116 | } |
- | |
117 | - | ||
118 | void getparm(char *p) |
- | |
119 | { |
- | |
120 | int i; |
- | |
121 | char *p1, *p2; |
- | |
122 | double d1, dt; |
- | |
123 | - | ||
124 |
|
124 | origcnt=0; dt=0; |
125 |
|
125 | for(p1=find_word_start(p); |
126 | *p1; p1=find_word_start(p2)) { |
126 | *p1; p1=find_word_start(p2)) { |
127 |
|
127 | for(p2=p1; *p2 && strchr(",;",*p2)==NULL; p2++); |
128 |
|
128 | if(*p2) *p2++=0; |
129 |
|
129 | d1=strevalue(p1); |
130 |
|
130 | if(!isfinite(d1) || d1<0) { |
131 |
|
131 | char buf[256]; |
132 |
|
132 | snprintf(buf,sizeof(buf),"Bad data: %s",p1); |
133 |
|
133 | error(buf); |
134 |
|
134 | } |
135 |
|
135 | maintab[origcnt++].prob=d1; |
136 |
|
136 | dt+=d1; |
137 |
|
137 | } |
138 |
|
138 | if(dt*1000000<1) error("Empty data sum."); |
139 |
|
139 | if(origcnt<2) error("Insufficient data for encoding."); |
140 |
|
140 | itemcnt=origcnt; |
141 |
|
141 | if(dt!=1) for(i=0; i<origcnt; i++) maintab[i].prob/=dt; |
142 | 142 | ||
143 |
|
143 | entropy=0; |
144 |
|
144 | for(i=0;i<origcnt;i++) { |
145 |
|
145 | maintab[i].codelen=-1; maintab[i].ind=-1; |
146 |
|
146 | indtab[i]=i; |
147 |
|
147 | d1=maintab[i].prob; |
148 |
|
148 | if(d1>0) entropy-=d1*log(d1); |
149 |
|
149 | } |
150 |
|
150 | entropy=entropy/log(radix); |
151 |
|
151 | indcnt=origcnt; |
152 | } |
152 | } |
153 | 153 | ||
154 | int main() |
154 | int main() |
155 | { |
155 | { |
156 |
|
156 | char *p; |
157 |
|
157 | int r; |
158 | 158 | ||
159 |
|
159 | p=getenv("w_huffman_radix"); |
160 |
|
160 | if(p==NULL || *p==0) p=getenv("huffman_radix"); |
161 |
|
161 | if(p==NULL || *p==0) radix=2; |
162 |
|
162 | else { |
163 |
|
163 | r=atoi(p); if(r!=0) radix=r; |
164 |
|
164 | } |
165 |
|
165 | if(radix<2 || radix>MAX_RADIX) error("Bad radix."); |
166 | 166 | ||
167 |
|
167 | p=getenv("wims_exec_parm"); |
168 |
|
168 | if(p==NULL || *p==0) error("No input data."); |
169 |
|
169 | getparm(p); |
170 | 170 | ||
171 |
|
171 | huffman(); |
172 |
|
172 | output(); |
173 | 173 | ||
174 |
|
174 | return 0; |
175 | } |
175 | } |
176 | 176 |