Rev 10 | Rev 7676 | Go to most recent revision | Details | Compare with Previous | Last modification | View Log | RSS feed
Rev | Author | Line No. | Line |
---|---|---|---|
10 | reyssat | 1 | /* Copyright (C) 1998-2003 XIAO, Gang of Universite de Nice - Sophia Antipolis |
2 | * |
||
3 | * This program is free software; you can redistribute it and/or modify |
||
4 | * it under the terms of the GNU General Public License as published by |
||
5 | * the Free Software Foundation; either version 2 of the License, or |
||
6 | * (at your option) any later version. |
||
7 | * |
||
8 | * This program is distributed in the hope that it will be useful, |
||
9 | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
||
10 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
||
11 | * GNU General Public License for more details. |
||
12 | * |
||
13 | * You should have received a copy of the GNU General Public License |
||
14 | * along with this program; if not, write to the Free Software |
||
15 | * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. |
||
16 | */ |
||
17 | |||
18 | /* This is a small program used simply to fetch web pages. |
||
19 | * No fancy functionalities such as link redirection or site sucking is |
||
20 | * present. |
||
21 | * Page fetched can only be sent to stdout. */ |
||
22 | |||
23 | #include "../wims.h" |
||
24 | #include <netdb.h> |
||
25 | #include <sys/socket.h> |
||
26 | #include <netinet/in.h> |
||
27 | |||
28 | char *cheater1="User-Agent: WIMS-webget"; |
||
29 | char *cheater2="Accept: image/gif, image/x-xbitmap, image/jpeg, image/pjpeg, image/png, */*\r\n\ |
||
30 | Accept-Encoding: gzip\r\n\ |
||
31 | Accept-Language: en, fr, it, de, es\r\n\ |
||
32 | Accept-Charset: iso-8859-1,*,utf-8"; |
||
33 | char pbuf[4096], tbuf[4096]; |
||
34 | char tfname[1024]; |
||
35 | char *tmpdir="/tmp"; |
||
36 | int soc, port, https; |
||
37 | int charcnt; |
||
38 | FILE *outf; |
||
39 | |||
40 | void errorquit(char *msg) |
||
41 | { |
||
42 | fprintf(stderr,"%s: %s\n",msg,strerror(errno)); exit(1); |
||
43 | } |
||
44 | |||
45 | /* Points to the end of the word */ |
||
46 | char *find_word_end(char *p) |
||
47 | { |
||
48 | int i; |
||
49 | for(i=0;!isspace(*p) && *p!=0 && i<MAX_LINELEN; p++,i++); |
||
50 | return p; |
||
51 | } |
||
52 | |||
53 | /* Strips leading spaces */ |
||
54 | char *find_word_start(char *p) |
||
55 | { |
||
56 | int i; |
||
57 | for(i=0; isspace(*p) && i<MAX_LINELEN; p++,i++); |
||
58 | return p; |
||
59 | } |
||
60 | |||
61 | /* Secured execution */ |
||
62 | void secure(char *host) |
||
63 | { |
||
64 | char *p1, *p2, *p3, buf[MAX_LINELEN+1]; |
||
65 | long int l; |
||
66 | FILE *f; |
||
67 | |||
68 | p1=getenv("w_module"); if(p1==NULL || *p1==0) return; |
||
69 | p1=getenv("untrust"); if(p1==NULL || *p1==0) return; |
||
70 | f=fopen("webget.sites","r"); if(f==NULL) return; |
||
71 | l=fread(buf,1,MAX_LINELEN,f); fclose(f); |
||
72 | if(l<=0 || l>MAX_LINELEN) return; |
||
73 | buf[l]=0; |
||
74 | for(p1=find_word_start(buf);*p1;p1=find_word_start(p2)) { |
||
75 | p2=find_word_end(p1); if(*p2) *p2++=0; |
||
76 | p3=strstr(host,p1); if(p3==NULL) continue; |
||
77 | if((p3==host || *(p3-1)=='.') && *(p3+strlen(p1))==0) return; |
||
78 | } |
||
79 | exit(1); /* unauthorized sites refused. */ |
||
80 | } |
||
81 | |||
82 | /* open a TCP/IP socket with host/port |
||
83 | * returns the file descriptor for the socket */ |
||
84 | int net_connect(char *host) |
||
85 | { |
||
86 | struct hostent *hp; |
||
87 | struct sockaddr_in sin; |
||
88 | int soc; |
||
89 | |||
90 | secure(host); |
||
91 | if(!(hp = gethostbyname(host))) errorquit("unknown host."); |
||
92 | if((soc = socket(hp->h_addrtype,SOCK_STREAM,0))<0) |
||
93 | errorquit("socket() error"); |
||
94 | memmove(&sin.sin_addr,hp->h_addr,hp->h_length); |
||
95 | sin.sin_port=htons(port); |
||
96 | sin.sin_family = hp->h_addrtype; |
||
97 | if(connect(soc,(struct sockaddr *)&sin,sizeof(sin))<0) { |
||
98 | close(soc); errorquit("connect() error"); |
||
99 | } |
||
100 | return soc; |
||
101 | } |
||
102 | |||
103 | int gethttps(char *host) |
||
104 | { |
||
105 | char buf[65536]; |
||
106 | char *tp; |
||
107 | |||
108 | tp=getenv("tmp_dir"); if(tp!=NULL && *tp!=0) tmpdir=tp; |
||
109 | snprintf(tfname,sizeof(tfname),"%s/https.tmp",tmpdir); |
||
110 | snprintf(buf,sizeof(buf),"\ |
||
111 | mkdir -p %s\n\ |
||
112 | openssl s_client -connect %s:%d -quiet 2>/dev/null >%s <<@\n\ |
||
113 | %s\n\ |
||
114 | @\n", tmpdir,host,port,tfname,tbuf); |
||
3840 | kbelabas | 115 | if (system(buf)) |
116 | errorquit("system() error"); |
||
10 | reyssat | 117 | return open(tfname,O_RDONLY); |
118 | } |
||
119 | |||
120 | int main(int argc, char *argv[]) |
||
121 | { |
||
122 | char *parm, *pt, *p1, *p2, *p3, *p4, *dp, *pre; |
||
123 | char nbuf[4096], *pp1, *pp2; |
||
124 | char c; |
||
125 | |||
126 | parm=getenv("wims_exec_parm"); |
||
127 | if(parm==NULL || *parm==0) errorquit("no_parameter"); |
||
128 | snprintf(pbuf,sizeof(pbuf),"%s",parm); |
||
129 | p1=find_word_start(pbuf); p2=find_word_end(p1); |
||
130 | if(*p2!=0) *p2++=0; https=0; |
||
131 | outf=stdout; pp1=getenv("w_webget_output"); |
||
132 | pp2=getenv("session_dir"); |
||
133 | if(pp1!=NULL && strstr(pp1,"..")==NULL && isalnum(*pp1) && pp2!=NULL) { |
||
134 | snprintf(nbuf,sizeof(nbuf),"%s/%s",pp2,pp1); |
||
135 | outf=fopen(nbuf,"w"); if(outf==NULL) outf=stdout; |
||
136 | } |
||
137 | dp=getenv("w_webget_option"); |
||
138 | if(dp!=NULL && strstr(dp,"direct")!=NULL) { /* direct get */ |
||
139 | p1=getenv("w_webget_host"); |
||
140 | p2=getenv("w_webget_port"); |
||
141 | if(p1==NULL || p2==NULL) errorquit("incomplete_request"); |
||
142 | port=atoi(p2); |
||
143 | soc=net_connect(p1); if(soc==-1) return 1; |
||
144 | c=' '; for(p3=parm; *p3; p3++) { |
||
3840 | kbelabas | 145 | if(*p3=='\n' && c!='\r') (void)write(soc,"\r",1); |
146 | (void)write(soc,p3,1); c=*p3; |
||
10 | reyssat | 147 | } |
3840 | kbelabas | 148 | (void)write(soc,"\r\n\r\n",4); |
10 | reyssat | 149 | pt=getenv("w_module"); |
150 | if(pt==NULL || *pt==0 || strncmp(pt,"adm/",4)==0 ) { /* File to post? */ |
||
151 | pt=getenv("w_webget_post"); if(pt!=NULL && *pt!=0) { |
||
152 | FILE *f; |
||
153 | char buf[4096]; |
||
154 | size_t l; |
||
155 | f=fopen(pt,"r"); if(f!=NULL) { |
||
156 | do { |
||
157 | l=fread(buf,1,sizeof(buf),f); |
||
3840 | kbelabas | 158 | if(l>0 && l<=sizeof(buf)) (void)write(soc,buf,l); |
10 | reyssat | 159 | } while(l==sizeof(buf)); |
160 | fclose(f); |
||
161 | } |
||
162 | } |
||
163 | } |
||
164 | if(strstr(dp,"normalread")!=NULL) goto read; |
||
165 | charcnt=0; |
||
166 | while(read(soc,pbuf,1)>0 && charcnt<10240) { |
||
167 | fputc(pbuf[0],outf); charcnt++; |
||
168 | } |
||
169 | close(soc); |
||
170 | return 0; |
||
171 | } |
||
172 | if(strncasecmp(p1,"http://",strlen("http://"))==0) p1+=strlen("http://"); |
||
173 | else if(strncasecmp(p1,"https://",strlen("https://"))==0) { |
||
174 | https=1; p1+=strlen("https://"); |
||
175 | } |
||
176 | p3=strchr(p1,'/'); if(p3==NULL) p3=""; |
||
177 | else {*p3++=0; while(*p3=='/') p3++;} |
||
178 | if(strncasecmp(p3,"http://",strlen("http://"))==0 || |
||
179 | strncasecmp(p3,"https://",strlen("https://"))==0) pre=""; |
||
180 | else pre="/"; |
||
181 | snprintf(tbuf,sizeof(tbuf),"GET %s%s HTTP/1.0\r\n%s\r\n\ |
||
182 | Host: %s\r\n\ |
||
183 | %s\r\n\r\n", |
||
184 | pre,p3,cheater1,p1,cheater2); |
||
185 | p4=strchr(p1,':'); if(p4==NULL) { |
||
186 | if(https) port=443; else port=80; |
||
187 | } |
||
188 | else {*p4++=0; port=atoi(p4);} |
||
189 | if(https) { |
||
190 | soc=gethttps(p1); goto read; |
||
191 | } |
||
192 | soc=net_connect(p1); |
||
3840 | kbelabas | 193 | (void)write(soc,tbuf,strlen(tbuf)); |
10 | reyssat | 194 | /* header */ |
195 | read: if(soc==-1) return 1; |
||
196 | c=-1; |
||
197 | while(read(soc,pbuf,1)>0) { |
||
198 | if(pbuf[0]=='\r') continue; |
||
199 | fputc(pbuf[0],stderr); |
||
200 | if((c=='\n') && (pbuf[0]=='\n')) break; else c=pbuf[0]; |
||
201 | } |
||
202 | /* body */ |
||
203 | charcnt=0; |
||
204 | while(read(soc,pbuf,1)>0 && charcnt<MAX_WEBGETFLEN) { |
||
205 | fputc(pbuf[0],outf); charcnt++; |
||
206 | } |
||
207 | close(soc); |
||
208 | if(outf!=stdout) fclose(outf); |
||
209 | if(https) unlink(tfname); |
||
210 | return 0; |
||
211 | } |
||
212 |