Rev 11539 | Details | Compare with Previous | Last modification | View Log | RSS feed
Rev | Author | Line No. | Line |
---|---|---|---|
10 | reyssat | 1 | /* Copyright (C) 1998-2003 XIAO, Gang of Universite de Nice - Sophia Antipolis |
2 | * |
||
3 | * This program is free software; you can redistribute it and/or modify |
||
4 | * it under the terms of the GNU General Public License as published by |
||
5 | * the Free Software Foundation; either version 2 of the License, or |
||
6 | * (at your option) any later version. |
||
7 | * |
||
8 | * This program is distributed in the hope that it will be useful, |
||
9 | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
||
10 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
||
11 | * GNU General Public License for more details. |
||
12 | * |
||
13 | * You should have received a copy of the GNU General Public License |
||
14 | * along with this program; if not, write to the Free Software |
||
15 | * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. |
||
16 | */ |
||
17 | |||
18 | /* This is a small program used simply to fetch web pages. |
||
19 | * No fancy functionalities such as link redirection or site sucking is |
||
20 | * present. |
||
21 | * Page fetched can only be sent to stdout. */ |
||
22 | |||
23 | #include <netdb.h> |
||
24 | #include <sys/socket.h> |
||
25 | #include <netinet/in.h> |
||
26 | |||
8185 | bpr | 27 | #include "../includes.h" |
28 | #include "../wimsdef.h" |
||
29 | |||
10 | reyssat | 30 | char *cheater1="User-Agent: WIMS-webget"; |
31 | char *cheater2="Accept: image/gif, image/x-xbitmap, image/jpeg, image/pjpeg, image/png, */*\r\n\ |
||
32 | Accept-Encoding: gzip\r\n\ |
||
33 | Accept-Language: en, fr, it, de, es\r\n\ |
||
34 | Accept-Charset: iso-8859-1,*,utf-8"; |
||
35 | char pbuf[4096], tbuf[4096]; |
||
36 | char tfname[1024]; |
||
37 | char *tmpdir="/tmp"; |
||
38 | int soc, port, https; |
||
39 | int charcnt; |
||
40 | FILE *outf; |
||
41 | |||
42 | void errorquit(char *msg) |
||
43 | { |
||
12248 | bpr | 44 | fprintf(stderr,"%s: %s\n",msg,strerror(errno)); exit(1); |
10 | reyssat | 45 | } |
46 | |||
7676 | bpr | 47 | /* Points to the end of the word */ |
10 | reyssat | 48 | char *find_word_end(char *p) |
49 | { |
||
12248 | bpr | 50 | int i; |
51 | for(i=0;!isspace(*p) && *p!=0 && i<MAX_LINELEN; p++,i++); |
||
52 | return p; |
||
10 | reyssat | 53 | } |
54 | |||
7676 | bpr | 55 | /* Strips leading spaces */ |
10 | reyssat | 56 | char *find_word_start(char *p) |
57 | { |
||
12248 | bpr | 58 | int i; |
59 | for(i=0; isspace(*p) && i<MAX_LINELEN; p++,i++); |
||
60 | return p; |
||
10 | reyssat | 61 | } |
62 | |||
7676 | bpr | 63 | /* Secured execution */ |
10 | reyssat | 64 | void secure(char *host) |
65 | { |
||
12248 | bpr | 66 | char *p1, *p2, *p3, buf[MAX_LINELEN+1]; |
67 | long int l; |
||
68 | FILE *f; |
||
10 | reyssat | 69 | |
12248 | bpr | 70 | p1=getenv("w_module"); if(p1==NULL || *p1==0) return; |
71 | p1=getenv("untrust"); if(p1==NULL || *p1==0) return; |
||
72 | f=fopen("webget.sites","r"); if(f==NULL) return; |
||
73 | l=fread(buf,1,MAX_LINELEN,f); fclose(f); |
||
74 | if(l<=0 || l>MAX_LINELEN) return; |
||
75 | buf[l]=0; |
||
76 | for(p1=find_word_start(buf);*p1;p1=find_word_start(p2)) { |
||
77 | p2=find_word_end(p1); if(*p2) *p2++=0; |
||
78 | p3=strstr(host,p1); if(p3==NULL) continue; |
||
79 | if((p3==host || *(p3-1)=='.') && *(p3+strlen(p1))==0) return; |
||
80 | } |
||
81 | exit(1); /* unauthorized sites refused. */ |
||
10 | reyssat | 82 | } |
83 | |||
7676 | bpr | 84 | /* open a TCP/IP socket with host/port |
85 | * returns the file descriptor for the socket */ |
||
10 | reyssat | 86 | int net_connect(char *host) |
87 | { |
||
12248 | bpr | 88 | struct hostent *hp; |
89 | struct sockaddr_in sin; |
||
90 | int soc; |
||
10 | reyssat | 91 | |
12248 | bpr | 92 | secure(host); |
93 | if(!(hp = gethostbyname(host))) errorquit("unknown host."); |
||
94 | if((soc = socket(hp->h_addrtype,SOCK_STREAM,0))<0) |
||
95 | errorquit("socket() error"); |
||
96 | memmove(&sin.sin_addr,hp->h_addr,hp->h_length); |
||
97 | sin.sin_port=htons(port); |
||
98 | sin.sin_family = hp->h_addrtype; |
||
99 | if(connect(soc,(struct sockaddr *)&sin,sizeof(sin))<0) { |
||
100 | close(soc); errorquit("connect() error"); |
||
101 | } |
||
102 | return soc; |
||
10 | reyssat | 103 | } |
104 | |||
105 | int gethttps(char *host) |
||
106 | { |
||
12248 | bpr | 107 | char buf[65536]; |
108 | char *tp; |
||
7676 | bpr | 109 | |
12248 | bpr | 110 | tp=getenv("tmp_dir"); if(tp!=NULL && *tp!=0) tmpdir=tp; |
111 | snprintf(tfname,sizeof(tfname),"%s/https.tmp",tmpdir); |
||
112 | snprintf(buf,sizeof(buf),"\ |
||
10 | reyssat | 113 | mkdir -p %s\n\ |
114 | openssl s_client -connect %s:%d -quiet 2>/dev/null >%s <<@\n\ |
||
115 | %s\n\ |
||
116 | @\n", tmpdir,host,port,tfname,tbuf); |
||
12248 | bpr | 117 | if (system(buf)) |
118 | errorquit("system() error"); |
||
119 | return open(tfname,O_RDONLY); |
||
10 | reyssat | 120 | } |
121 | |||
122 | int main(int argc, char *argv[]) |
||
123 | { |
||
12248 | bpr | 124 | char *parm, *pt, *p1, *p2, *p3, *p4, *dp, *pre; |
125 | char nbuf[4096], *pp1, *pp2; |
||
126 | char c; |
||
7676 | bpr | 127 | |
12248 | bpr | 128 | parm=getenv("wims_exec_parm"); |
129 | if(parm==NULL || *parm==0) errorquit("no_parameter"); |
||
130 | snprintf(pbuf,sizeof(pbuf),"%s",parm); |
||
131 | p1=find_word_start(pbuf); p2=find_word_end(p1); |
||
132 | if(*p2!=0) *p2++=0; |
||
133 | https=0; |
||
134 | outf=stdout; pp1=getenv("w_webget_output"); |
||
135 | pp2=getenv("session_dir"); |
||
136 | if(pp1!=NULL && strstr(pp1,"..")==NULL && isalnum(*pp1) && pp2!=NULL) { |
||
137 | snprintf(nbuf,sizeof(nbuf),"%s/%s",pp2,pp1); |
||
138 | outf=fopen(nbuf,"w"); if(outf==NULL) outf=stdout; |
||
139 | } |
||
140 | dp=getenv("w_webget_option"); |
||
141 | if(dp!=NULL && strstr(dp,"direct")!=NULL) { /* direct get */ |
||
142 | p1=getenv("w_webget_host"); |
||
143 | p2=getenv("w_webget_port"); |
||
144 | if(p1==NULL || p2==NULL) errorquit("incomplete_request"); |
||
145 | port=atoi(p2); |
||
146 | soc=net_connect(p1); if(soc==-1) return 1; |
||
147 | c=' '; |
||
148 | for(p3=parm; *p3; p3++) { |
||
149 | if(*p3=='\n' && c!='\r') (void)write(soc,"\r",1); |
||
150 | (void)write(soc,p3,1); c=*p3; |
||
10 | reyssat | 151 | } |
12248 | bpr | 152 | (void)write(soc,"\r\n\r\n",4); |
153 | pt=getenv("w_module"); |
||
154 | if(pt==NULL || *pt==0 || strncmp(pt,"adm/",4)==0 ) { /* File to post? */ |
||
155 | pt=getenv("w_webget_post"); |
||
156 | if(pt!=NULL && *pt!=0) { |
||
157 | FILE *f; |
||
158 | char buf[4096]; |
||
159 | size_t l; |
||
160 | f=fopen(pt,"r"); if(f!=NULL) { |
||
161 | do { |
||
162 | l=fread(buf,1,sizeof(buf),f); |
||
163 | if(l>0 && l<=sizeof(buf)) (void)write(soc,buf,l); |
||
164 | } |
||
165 | while(l==sizeof(buf)); |
||
166 | fclose(f); |
||
7676 | bpr | 167 | } |
10 | reyssat | 168 | } |
12248 | bpr | 169 | } |
170 | if(strstr(dp,"normalread")!=NULL) goto read; |
||
171 | charcnt=0; |
||
172 | while(read(soc,pbuf,1)>0 && charcnt<10240) { |
||
173 | fputc(pbuf[0],outf); charcnt++; |
||
174 | } |
||
175 | close(soc); |
||
176 | return 0; |
||
177 | } |
||
178 | if(strncasecmp(p1,"http://",strlen("http://"))==0) p1+=strlen("http://"); |
||
179 | else |
||
180 | if(strncasecmp(p1,"https://",strlen("https://"))==0) { |
||
7676 | bpr | 181 | https=1; p1+=strlen("https://"); |
10 | reyssat | 182 | } |
12248 | bpr | 183 | p3=strchr(p1,'/'); if(p3==NULL) p3=""; |
184 | else {*p3++=0; while(*p3=='/') p3++;} |
||
185 | if(strncasecmp(p3,"http://",strlen("http://"))==0 || |
||
186 | strncasecmp(p3,"https://",strlen("https://"))==0) pre=""; |
||
187 | else pre="/"; |
||
188 | snprintf(tbuf,sizeof(tbuf),"GET %s%s HTTP/1.0\r\n%s\r\n\ |
||
10 | reyssat | 189 | Host: %s\r\n\ |
190 | %s\r\n\r\n", |
||
7676 | bpr | 191 | pre,p3,cheater1,p1,cheater2); |
12248 | bpr | 192 | p4=strchr(p1,':'); |
193 | if(p4==NULL) { |
||
194 | if(https) port=443; else port=80; |
||
195 | } |
||
196 | else {*p4++=0; port=atoi(p4);} |
||
197 | if(https) { |
||
198 | soc=gethttps(p1); goto read; |
||
199 | } |
||
200 | soc=net_connect(p1); |
||
201 | (void)write(soc,tbuf,strlen(tbuf)); |
||
7676 | bpr | 202 | /* header */ |
12248 | bpr | 203 | read: if(soc==-1) return 1; |
204 | c=-1; |
||
205 | while(read(soc,pbuf,1)>0) { |
||
206 | if(pbuf[0]=='\r') continue; |
||
207 | fputc(pbuf[0],stderr); |
||
208 | if((c=='\n') && (pbuf[0]=='\n')) break; else c=pbuf[0]; |
||
209 | } |
||
7676 | bpr | 210 | /* body */ |
12248 | bpr | 211 | charcnt=0; |
212 | while(read(soc,pbuf,1)>0 && charcnt<MAX_WEBGETFLEN) { |
||
213 | fputc(pbuf[0],outf); charcnt++; |
||
214 | } |
||
215 | close(soc); |
||
216 | if(outf!=stdout) fclose(outf); |
||
217 | if(https) unlink(tfname); |
||
218 | return 0; |
||
10 | reyssat | 219 | } |
220 |