Subversion Repositories wimsdev

Rev

Rev 10 | Rev 8171 | Go to most recent revision | Show entire file | Ignore whitespace | Details | Blame | Last modification | View Log | RSS feed

Rev 10 Rev 7676
Line 13... Line 13...
13
 *  You should have received a copy of the GNU General Public License
13
 *  You should have received a copy of the GNU General Public License
14
 *  along with this program; if not, write to the Free Software
14
 *  along with this program; if not, write to the Free Software
15
 *  Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
15
 *  Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
16
 */
16
 */
17
 
17
 
18
        /* This program computes an optimal coding of variable lengths
18
/* This program computes an optimal coding of variable lengths
19
         * on a given distribution of probabilities,
19
 * on a given distribution of probabilities,
20
         * using Huffman algorithm */
20
 * using Huffman algorithm */
21
 
21
 
22
/* Input data: via two environment variables.
22
/* Input data: via two environment variables.
23
 * wims_exec_parm is a comma-separated list of probability distributions.
23
 * wims_exec_parm is a comma-separated list of probability distributions.
24
 *      Limited to MAX_ITEMS.
24
 * Limited to MAX_ITEMS.
25
 *      The input data will be scaled to unit sum.
25
 * The input data will be scaled to unit sum.
26
 * w_huffman_radix is the encoding radix, between 2 and MAX_RADIX.
26
 * w_huffman_radix is the encoding radix, between 2 and MAX_RADIX.
27
 *
27
 *
28
 * Output: two lines.
28
 * Output: two lines.
29
 * Line 1: Entropy and Average code length, comma-separated.
29
 * Line 1: Entropy and Average code length, comma-separated.
30
 * Line 2: comma-separated list of codes.
30
 * Line 2: comma-separated list of codes.
31
 */
31
 */
32
 
32
 
33
#define MAX_ITEMS       2048
33
#define MAX_ITEMS 2048
34
#define MAX_CODELEN     100
34
#define MAX_CODELEN 100
35
 
35
 
36
const char *codechar="0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz";
36
const char *codechar="0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz";
37
#define MAX_RADIX       strlen(codechar)
37
#define MAX_RADIX strlen(codechar)
38
 
38
 
39
#include "../Lib/libwims.h"
39
#include "../Lib/libwims.h"
40
 
40
 
41
struct {
41
struct {
42
    double prob;
42
    double prob;
Line 60... Line 60...
60
 
60
 
61
int indcmp(const void *p1, const void *p2)
61
int indcmp(const void *p1, const void *p2)
62
{
62
{
63
    const int *i1, *i2;
63
    const int *i1, *i2;
64
    double d1, d2;
64
    double d1, d2;
65
   
65
 
66
    i1=p1; i2=p2;
66
    i1=p1; i2=p2;
67
    d1=maintab[*i1].prob; d2=maintab[*i2].prob;
67
    d1=maintab[*i1].prob; d2=maintab[*i2].prob;
68
    if(d1==d2) return 0;
68
    if(d1==d2) return 0;
69
    if(d1>d2) return -1;
69
    if(d1>d2) return -1;
70
    else return 1;
70
    else return 1;
71
}
71
}
72
 
72
 
73
void huffman(void)
73
void huffman(void)
74
{
74
{
75
    int t, i, j, l;
75
    int t, i, j, l;
76
    double d;
76
    double d;
77
   
77
 
78
    while(indcnt>radix) {
78
    while(indcnt>radix) {
79
        qsort(indtab,indcnt,sizeof(indtab[0]),indcmp);
79
      qsort(indtab,indcnt,sizeof(indtab[0]),indcmp);
80
        if(radix>2) t=(indcnt+radix-3)%(radix-1)+2;
80
      if(radix>2) t=(indcnt+radix-3)%(radix-1)+2;
81
        else t=2;
81
      else t=2;
82
        d=0;
82
      d=0;
83
        for(i=indcnt-t; i<indcnt; i++) {
83
      for(i=indcnt-t; i<indcnt; i++) {
84
            d+=maintab[indtab[i]].prob;
84
          d+=maintab[indtab[i]].prob;
85
            maintab[indtab[i]].ind=itemcnt;
85
          maintab[indtab[i]].ind=itemcnt;
86
        }
86
      }
87
        maintab[itemcnt].prob=d;
87
      maintab[itemcnt].prob=d;
88
        maintab[itemcnt].ind=-1;
88
      maintab[itemcnt].ind=-1;
89
        maintab[itemcnt].codelen=-1;
89
      maintab[itemcnt].codelen=-1;
90
        indtab[indcnt-t]=itemcnt;
90
      indtab[indcnt-t]=itemcnt;
91
        indcnt-=t-1; itemcnt++;
91
      indcnt-=t-1; itemcnt++;
92
    }
92
    }
93
    for(i=0;i<indcnt;i++) {
93
    for(i=0;i<indcnt;i++) {
94
        maintab[indtab[i]].codelen=1;
94
      maintab[indtab[i]].codelen=1;
95
        maintab[indtab[i]].code[0]=i;
95
      maintab[indtab[i]].code[0]=i;
96
        maintab[indtab[i]].ind=0;
96
      maintab[indtab[i]].ind=0;
97
    }
97
    }
98
    for(i=itemcnt-1;i>=0;i--) {
98
    for(i=itemcnt-1;i>=0;i--) {
99
        if(maintab[i].codelen>0) continue;
99
      if(maintab[i].codelen>0) continue;
100
        j=maintab[i].ind; l=maintab[j].codelen;
100
      j=maintab[i].ind; l=maintab[j].codelen;
101
        if(l>=MAX_CODELEN) error("Code too long.");
101
      if(l>=MAX_CODELEN) error("Code too long.");
102
        memmove(maintab[i].code,maintab[j].code,l);
102
      memmove(maintab[i].code,maintab[j].code,l);
103
        maintab[i].code[l]=maintab[j].ind++;
103
      maintab[i].code[l]=maintab[j].ind++;
104
        maintab[i].codelen=l+1;
104
      maintab[i].codelen=l+1;
105
        maintab[i].ind=0;
105
      maintab[i].ind=0;
106
    }
106
    }
107
}
107
}
108
 
108
 
109
void output(void)
109
void output(void)
110
{
110
{
111
    int i, j;
111
    int i, j;
Line 113... Line 113...
113
 
113
 
114
    d=0;
114
    d=0;
115
    for(i=0;i<origcnt;i++) d+=maintab[i].prob*maintab[i].codelen;
115
    for(i=0;i<origcnt;i++) d+=maintab[i].prob*maintab[i].codelen;
116
    printf("%f,%f\n",entropy,d);
116
    printf("%f,%f\n",entropy,d);
117
    for(i=0;i<origcnt;i++) {
117
    for(i=0;i<origcnt;i++) {
118
        for(j=0;j<maintab[i].codelen;j++) printf("%c",codechar[(int) maintab[i].code[j]]);
118
      for(j=0;j<maintab[i].codelen;j++) printf("%c",codechar[(int) maintab[i].code[j]]);
119
        if(i<origcnt-1) printf(",");
119
      if(i<origcnt-1) printf(",");
120
        else printf("\n");
120
      else printf("\n");
121
    }
121
    }
122
}
122
}
123
 
123
 
124
void getparm(char *p)
124
void getparm(char *p)
125
{
125
{
Line 127... Line 127...
127
    char *p1, *p2;
127
    char *p1, *p2;
128
    double d1, dt;
128
    double d1, dt;
129
 
129
 
130
    origcnt=0; dt=0;
130
    origcnt=0; dt=0;
131
    for(p1=find_word_start(p);
131
    for(p1=find_word_start(p);
132
        *p1; p1=find_word_start(p2)) {
132
      *p1; p1=find_word_start(p2)) {
133
        for(p2=p1; *p2 && strchr(",;",*p2)==NULL; p2++);
133
      for(p2=p1; *p2 && strchr(",;",*p2)==NULL; p2++);
134
        if(*p2) *p2++=0;
134
      if(*p2) *p2++=0;
135
        d1=strevalue(p1);
135
      d1=strevalue(p1);
136
        if(!finite(d1) || d1<0) {
136
      if(!finite(d1) || d1<0) {
137
            char buf[256];
137
          char buf[256];
138
            snprintf(buf,sizeof(buf),"Bad data: %s",p1);
138
          snprintf(buf,sizeof(buf),"Bad data: %s",p1);
139
            error(buf);
139
          error(buf);
140
        }
140
      }
141
        maintab[origcnt++].prob=d1;
141
      maintab[origcnt++].prob=d1;
142
        dt+=d1;
142
      dt+=d1;
143
    }
143
    }
144
    if(dt*1000000<1) error("Empty data sum.");
144
    if(dt*1000000<1) error("Empty data sum.");
145
    if(origcnt<2) error("Insufficient data for encoding.");
145
    if(origcnt<2) error("Insufficient data for encoding.");
146
    itemcnt=origcnt;
146
    itemcnt=origcnt;
147
    if(dt!=1) for(i=0; i<origcnt; i++) maintab[i].prob/=dt;
147
    if(dt!=1) for(i=0; i<origcnt; i++) maintab[i].prob/=dt;
148
   
148
 
149
    entropy=0;
149
    entropy=0;
150
    for(i=0;i<origcnt;i++) {
150
    for(i=0;i<origcnt;i++) {
151
        maintab[i].codelen=-1; maintab[i].ind=-1;
151
      maintab[i].codelen=-1; maintab[i].ind=-1;
152
        indtab[i]=i;
152
      indtab[i]=i;
153
        d1=maintab[i].prob;
153
      d1=maintab[i].prob;
154
        if(d1>0) entropy-=d1*log(d1);
154
      if(d1>0) entropy-=d1*log(d1);
155
    }
155
    }
156
    entropy=entropy/log(radix);
156
    entropy=entropy/log(radix);
157
    indcnt=origcnt;
157
    indcnt=origcnt;
158
}
158
}
159
 
159
 
160
int main()
160
int main()
161
{
161
{
162
    char *p;
162
    char *p;
163
    int r;
163
    int r;
164
   
164
 
165
    error1=error; error2=error; error3=error;
165
    error1=error; error2=error; error3=error;
166
    p=getenv("w_huffman_radix");
166
    p=getenv("w_huffman_radix");
167
    if(p==NULL || *p==0) p=getenv("huffman_radix");
167
    if(p==NULL || *p==0) p=getenv("huffman_radix");
168
    if(p==NULL || *p==0) radix=2;
168
    if(p==NULL || *p==0) radix=2;
169
    else {
169
    else {
170
        r=atoi(p); if(r!=0) radix=r;
170
      r=atoi(p); if(r!=0) radix=r;
171
    }
171
    }
172
    if(radix<2 || radix>MAX_RADIX) error("Bad radix.");
172
    if(radix<2 || radix>MAX_RADIX) error("Bad radix.");
173
   
173
 
174
    p=getenv("wims_exec_parm");
174
    p=getenv("wims_exec_parm");
175
    if(p==NULL || *p==0) error("No input data.");
175
    if(p==NULL || *p==0) error("No input data.");
176
    getparm(p);
176
    getparm(p);
177
   
177
 
178
    huffman();
178
    huffman();
179
    output();
179
    output();
180
   
180
 
181
    return 0;
181
    return 0;
182
}
182
}
183
 
183