Rev 11132 | Show entire file | Ignore whitespace | Details | Blame | Last modification | View Log | RSS feed
Rev 11132 | Rev 12248 | ||
---|---|---|---|
Line 34... | Line 34... | ||
34 | char inpbuf[MAX_LINELEN+1], outbuf[2*MAX_LINELEN+2]; |
34 | char inpbuf[MAX_LINELEN+1], outbuf[2*MAX_LINELEN+2]; |
35 | char *dicbuf; |
35 | char *dicbuf; |
36 | char dicname[1024], suffixname[1024]; |
36 | char dicname[1024], suffixname[1024]; |
37 | 37 | ||
38 | struct entry { |
38 | struct entry { |
39 |
|
39 | char *original; |
40 |
|
40 | char *replace; |
41 | } entry[entrylim]; |
41 | } entry[entrylim]; |
42 | int entrycount; |
42 | int entrycount; |
43 | 43 | ||
44 | int nocase=0, hassuffix=0, leaveline=0; |
44 | int nocase=0, hassuffix=0, leaveline=0; |
45 | int entrycount, ocount; |
45 | int entrycount, ocount; |
46 | 46 | ||
47 | int compare(const void *s1, const void *s2) |
47 | int compare(const void *s1, const void *s2) |
48 | { |
48 | { |
49 |
|
49 | const struct entry *p1, *p2; |
50 |
|
50 | p1=s1; p2=s2; |
51 |
|
51 | if(nocase) return strcasecmp(p1->original,p2->original); |
52 |
|
52 | else return strcmp(p1->original,p2->original); |
53 | } |
53 | } |
54 | 54 | ||
55 | void sortdic(void) |
55 | void sortdic(void) |
56 | { |
56 | { |
57 |
|
57 | qsort(entry,entrycount,sizeof(entry[0]),compare); |
58 | } |
58 | } |
59 | 59 | ||
60 | /* change all spaces into ' ', and collapse multiple occurences */ |
60 | /* change all spaces into ' ', and collapse multiple occurences */ |
61 | void singlespace2(char *p) |
61 | void singlespace2(char *p) |
62 | { |
62 | { |
63 |
|
63 | char *pp, *p2; |
64 |
|
64 | for(pp=p;*pp;pp++) { |
65 |
|
65 | if(!isspace(*pp)) continue; |
66 |
|
66 | if(leaveline) { |
67 |
|
67 | if(*pp==13) ovlstrcpy(pp,pp+1); |
68 |
|
68 | if(*pp=='\n') { |
69 |
|
69 | pp++; |
70 |
|
70 | gopt: for(p2=pp; isspace(*p2) && *p2!='\n'; p2++); |
71 |
|
71 | if(p2>pp) ovlstrcpy(pp,p2); |
72 |
|
72 | pp--; |
73 | } |
- | |
74 | else { |
- | |
75 | pp++; if(!isspace(*pp) || *pp=='\n') continue; |
- | |
76 | goto gopt; |
- | |
77 | } |
- | |
78 | } |
73 | } |
79 | else { |
74 | else { |
80 |
|
75 | pp++; |
81 |
|
76 | if(!isspace(*pp) || *pp=='\n') continue; |
82 | for(p2=pp;isspace(*p2);p2++); |
- | |
83 |
|
77 | goto gopt; |
84 | } |
78 | } |
85 | } |
79 | } |
- | 80 | else { |
|
- | 81 | if(*pp!=' ') *pp=' '; |
|
- | 82 | pp++; if(!isspace(*pp)) continue; |
|
- | 83 | for(p2=pp;isspace(*p2);p2++); |
|
- | 84 | ovlstrcpy(pp,p2); pp--; |
|
- | 85 | } |
|
- | 86 | } |
|
86 | } |
87 | } |
87 | 88 | ||
88 | /* Prepare dictionary */ |
89 | /* Prepare dictionary */ |
89 | static |
90 | static |
90 | void prepare_dics(void) |
91 | void prepare_dics(void) |
91 | { |
92 | { |
92 |
|
93 | int i; |
93 |
|
94 | FILE *dicf; |
94 |
|
95 | char *p1, *p2, *pp; |
95 |
|
96 | long int flen; |
96 | 97 | ||
97 |
|
98 | entrycount=0; |
98 |
|
99 | dicf=fopen(dicname,"r"); if(dicf==NULL) return; |
99 |
|
100 | fseek(dicf,0,SEEK_END);flen=ftell(dicf); fseek(dicf,0,SEEK_SET); |
100 |
|
101 | if(flen>diclim) return; |
101 |
|
102 | dicbuf=xmalloc(2*flen+1024);flen=fread(dicbuf,1,flen,dicf); |
102 |
|
103 | fclose(dicf); |
103 |
|
104 | if(flen>0 && flen<diclim) dicbuf[flen]=0; |
104 |
|
105 | else return; |
105 |
|
106 | for(i=0,p1=dicbuf;p1!=NULL && *p1!=0 && i<entrylim;p1=p2) { |
106 |
|
107 | p2=strchr(p1+1,'\n'); if(p2>p1) *p2++=0; |
107 |
|
108 | pp=strchr(p1,sepchar); if(pp==NULL) continue; |
108 |
|
109 | *pp++=0; |
109 |
|
110 | strip_trailing_spaces2(p1); strip_trailing_spaces2(pp); |
110 |
|
111 | singlespace2(p1); |
111 |
|
112 | p1=find_word_start(p1); pp=find_word_start(pp); |
112 |
|
113 | if(*p1==0) continue; |
113 |
|
114 | entry[i].original=p1; entry[i].replace=pp; i++; |
114 |
|
115 | } |
115 |
|
116 | entrycount=i; |
116 | } |
117 | } |
117 | 118 | ||
118 | void output(void) |
119 | void output(void) |
119 | { |
120 | { |
120 |
|
121 | int i; |
121 |
|
122 | FILE *f; |
122 | 123 | ||
123 |
|
124 | ocount=0; |
124 |
|
125 | strcat(dicname,".sorted"); |
125 |
|
126 | f=fopen(dicname,"w"); if(f==NULL) return; |
126 |
|
127 | for(i=0;i<entrycount;i++) { |
127 |
|
128 | if(i>0 && strcmp(entry[i].original,entry[i-1].original)==0 |
128 |
|
129 | && strcmp(entry[i].replace,entry[i-1].replace)==0) |
129 |
|
130 | continue; |
130 |
|
131 | if(grpchar!=0) { |
131 |
|
132 | if(i>0 && strcmp(entry[i].original,entry[i-1].original)==0) |
132 |
|
133 | fprintf(f,"%c%s",grpchar, entry[i].replace); |
133 | else { |
- | |
134 | if(i>0) fprintf(f,"\n"); |
- | |
135 | fprintf(f,"%s%c%s",entry[i].original,sepchar,entry[i].replace); |
- | |
136 | ocount++; |
- | |
137 | } |
- | |
138 | - | ||
139 | } |
- | |
140 | else { |
134 | else { |
- | 135 | if(i>0) fprintf(f,"\n"); |
|
141 |
|
136 | fprintf(f,"%s%c%s",entry[i].original,sepchar,entry[i].replace); |
142 |
|
137 | ocount++; |
143 | } |
138 | } |
144 | } |
139 | } |
- | 140 | else { |
|
- | 141 | fprintf(f,"%s%c%s\n",entry[i].original,sepchar,entry[i].replace); |
|
- | 142 | ocount++; |
|
- | 143 | } |
|
- | 144 | } |
|
145 |
|
145 | if(grpchar!=0) fprintf(f,"\n"); |
146 |
|
146 | fclose(f); |
147 | } |
147 | } |
148 | 148 | ||
149 | int main(int argc, char *argv[]) |
149 | int main(int argc, char *argv[]) |
150 | { |
150 | { |
151 |
|
151 | char *ss, *gr; |
152 |
|
152 | if(argc<2) return -1; |
153 | 153 | ||
154 |
|
154 | ss=getenv("dicsort_separator"); |
155 |
|
155 | if(ss!=NULL && *ss!=0) sepchar=*ss; |
156 |
|
156 | gr=getenv("dicsort_grouping"); |
157 |
|
157 | if(gr!=NULL && *gr!=0) grpchar=*gr; |
158 |
|
158 | snprintf(dicname,sizeof(dicname)-128,"%s",argv[1]); prepare_dics(); |
159 |
|
159 | if(argc>2) { |
160 |
|
160 | snprintf(suffixname,sizeof(suffixname),"%s",argv[2]); |
161 |
|
161 | suffix_dic(suffixname); hassuffix=1; |
162 |
|
162 | } |
163 |
|
163 | else suffixname[0]=hassuffix=0; |
164 |
|
164 | sortdic(); output(); |
165 |
|
165 | printf("%s: sorted %d entries.\n",dicname, ocount); |
166 |
|
166 | return 0; |
167 | } |
167 | } |
168 | 168 |