Rev 11133 | Rev 11539 | Go to most recent revision | Details | Compare with Previous | Last modification | View Log | RSS feed
Rev | Author | Line No. | Line |
---|---|---|---|
10 | reyssat | 1 | /* Copyright (C) 1998-2003 XIAO, Gang of Universite de Nice - Sophia Antipolis |
2 | * |
||
3 | * This program is free software; you can redistribute it and/or modify |
||
4 | * it under the terms of the GNU General Public License as published by |
||
5 | * the Free Software Foundation; either version 2 of the License, or |
||
6 | * (at your option) any later version. |
||
7 | * |
||
8 | * This program is distributed in the hope that it will be useful, |
||
9 | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
||
10 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
||
11 | * GNU General Public License for more details. |
||
12 | * |
||
13 | * You should have received a copy of the GNU General Public License |
||
14 | * along with this program; if not, write to the Free Software |
||
15 | * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. |
||
16 | */ |
||
17 | |||
18 | /* This is a small program used simply to fetch web pages. |
||
19 | * No fancy functionalities such as link redirection or site sucking is |
||
20 | * present. |
||
21 | * Page fetched can only be sent to stdout. */ |
||
22 | |||
23 | #include <netdb.h> |
||
24 | #include <sys/socket.h> |
||
25 | #include <netinet/in.h> |
||
26 | |||
8185 | bpr | 27 | #include "../includes.h" |
28 | #include "../wimsdef.h" |
||
29 | |||
11527 | georgesk | 30 | void inline IGNORE() {} /* Ignore GCC Unused Result */ |
31 | void IGNORE(); /* see http://stackoverflow.com/a/16245669/490291 */ |
||
32 | |||
10 | reyssat | 33 | char *cheater1="User-Agent: WIMS-webget"; |
34 | char *cheater2="Accept: image/gif, image/x-xbitmap, image/jpeg, image/pjpeg, image/png, */*\r\n\ |
||
35 | Accept-Encoding: gzip\r\n\ |
||
36 | Accept-Language: en, fr, it, de, es\r\n\ |
||
37 | Accept-Charset: iso-8859-1,*,utf-8"; |
||
38 | char pbuf[4096], tbuf[4096]; |
||
39 | char tfname[1024]; |
||
40 | char *tmpdir="/tmp"; |
||
41 | int soc, port, https; |
||
42 | int charcnt; |
||
43 | FILE *outf; |
||
44 | |||
45 | void errorquit(char *msg) |
||
46 | { |
||
47 | fprintf(stderr,"%s: %s\n",msg,strerror(errno)); exit(1); |
||
48 | } |
||
49 | |||
7676 | bpr | 50 | /* Points to the end of the word */ |
10 | reyssat | 51 | char *find_word_end(char *p) |
52 | { |
||
53 | int i; |
||
54 | for(i=0;!isspace(*p) && *p!=0 && i<MAX_LINELEN; p++,i++); |
||
55 | return p; |
||
56 | } |
||
57 | |||
7676 | bpr | 58 | /* Strips leading spaces */ |
10 | reyssat | 59 | char *find_word_start(char *p) |
60 | { |
||
61 | int i; |
||
62 | for(i=0; isspace(*p) && i<MAX_LINELEN; p++,i++); |
||
63 | return p; |
||
64 | } |
||
65 | |||
7676 | bpr | 66 | /* Secured execution */ |
10 | reyssat | 67 | void secure(char *host) |
68 | { |
||
69 | char *p1, *p2, *p3, buf[MAX_LINELEN+1]; |
||
70 | long int l; |
||
71 | FILE *f; |
||
72 | |||
73 | p1=getenv("w_module"); if(p1==NULL || *p1==0) return; |
||
74 | p1=getenv("untrust"); if(p1==NULL || *p1==0) return; |
||
75 | f=fopen("webget.sites","r"); if(f==NULL) return; |
||
76 | l=fread(buf,1,MAX_LINELEN,f); fclose(f); |
||
77 | if(l<=0 || l>MAX_LINELEN) return; |
||
78 | buf[l]=0; |
||
79 | for(p1=find_word_start(buf);*p1;p1=find_word_start(p2)) { |
||
7676 | bpr | 80 | p2=find_word_end(p1); if(*p2) *p2++=0; |
81 | p3=strstr(host,p1); if(p3==NULL) continue; |
||
82 | if((p3==host || *(p3-1)=='.') && *(p3+strlen(p1))==0) return; |
||
10 | reyssat | 83 | } |
84 | exit(1); /* unauthorized sites refused. */ |
||
85 | } |
||
86 | |||
7676 | bpr | 87 | /* open a TCP/IP socket with host/port |
88 | * returns the file descriptor for the socket */ |
||
10 | reyssat | 89 | int net_connect(char *host) |
90 | { |
||
91 | struct hostent *hp; |
||
92 | struct sockaddr_in sin; |
||
93 | int soc; |
||
94 | |||
95 | secure(host); |
||
96 | if(!(hp = gethostbyname(host))) errorquit("unknown host."); |
||
97 | if((soc = socket(hp->h_addrtype,SOCK_STREAM,0))<0) |
||
98 | errorquit("socket() error"); |
||
99 | memmove(&sin.sin_addr,hp->h_addr,hp->h_length); |
||
100 | sin.sin_port=htons(port); |
||
101 | sin.sin_family = hp->h_addrtype; |
||
102 | if(connect(soc,(struct sockaddr *)&sin,sizeof(sin))<0) { |
||
7676 | bpr | 103 | close(soc); errorquit("connect() error"); |
10 | reyssat | 104 | } |
105 | return soc; |
||
106 | } |
||
107 | |||
108 | int gethttps(char *host) |
||
109 | { |
||
110 | char buf[65536]; |
||
111 | char *tp; |
||
7676 | bpr | 112 | |
113 | tp=getenv("tmp_dir"); if(tp!=NULL && *tp!=0) tmpdir=tp; |
||
10 | reyssat | 114 | snprintf(tfname,sizeof(tfname),"%s/https.tmp",tmpdir); |
115 | snprintf(buf,sizeof(buf),"\ |
||
116 | mkdir -p %s\n\ |
||
117 | openssl s_client -connect %s:%d -quiet 2>/dev/null >%s <<@\n\ |
||
118 | %s\n\ |
||
119 | @\n", tmpdir,host,port,tfname,tbuf); |
||
3840 | kbelabas | 120 | if (system(buf)) |
121 | errorquit("system() error"); |
||
10 | reyssat | 122 | return open(tfname,O_RDONLY); |
123 | } |
||
124 | |||
125 | int main(int argc, char *argv[]) |
||
126 | { |
||
127 | char *parm, *pt, *p1, *p2, *p3, *p4, *dp, *pre; |
||
128 | char nbuf[4096], *pp1, *pp2; |
||
129 | char c; |
||
7676 | bpr | 130 | |
10 | reyssat | 131 | parm=getenv("wims_exec_parm"); |
132 | if(parm==NULL || *parm==0) errorquit("no_parameter"); |
||
133 | snprintf(pbuf,sizeof(pbuf),"%s",parm); |
||
134 | p1=find_word_start(pbuf); p2=find_word_end(p1); |
||
11124 | georgesk | 135 | if(*p2!=0) *p2++=0; |
136 | https=0; |
||
10 | reyssat | 137 | outf=stdout; pp1=getenv("w_webget_output"); |
138 | pp2=getenv("session_dir"); |
||
139 | if(pp1!=NULL && strstr(pp1,"..")==NULL && isalnum(*pp1) && pp2!=NULL) { |
||
7676 | bpr | 140 | snprintf(nbuf,sizeof(nbuf),"%s/%s",pp2,pp1); |
141 | outf=fopen(nbuf,"w"); if(outf==NULL) outf=stdout; |
||
10 | reyssat | 142 | } |
143 | dp=getenv("w_webget_option"); |
||
7676 | bpr | 144 | if(dp!=NULL && strstr(dp,"direct")!=NULL) { /* direct get */ |
145 | p1=getenv("w_webget_host"); |
||
146 | p2=getenv("w_webget_port"); |
||
147 | if(p1==NULL || p2==NULL) errorquit("incomplete_request"); |
||
148 | port=atoi(p2); |
||
149 | soc=net_connect(p1); if(soc==-1) return 1; |
||
150 | c=' '; for(p3=parm; *p3; p3++) { |
||
11527 | georgesk | 151 | if(*p3=='\n' && c!='\r') IGNORE(write(soc,"\r",1)); |
152 | IGNORE(write(soc,p3,1)); c=*p3; |
||
7676 | bpr | 153 | } |
11527 | georgesk | 154 | IGNORE(write(soc,"\r\n\r\n",4)); |
7676 | bpr | 155 | pt=getenv("w_module"); |
156 | if(pt==NULL || *pt==0 || strncmp(pt,"adm/",4)==0 ) { /* File to post? */ |
||
157 | pt=getenv("w_webget_post"); if(pt!=NULL && *pt!=0) { |
||
158 | FILE *f; |
||
159 | char buf[4096]; |
||
160 | size_t l; |
||
161 | f=fopen(pt,"r"); if(f!=NULL) { |
||
162 | do { |
||
163 | l=fread(buf,1,sizeof(buf),f); |
||
11527 | georgesk | 164 | if(l>0 && l<=sizeof(buf)) IGNORE(write(soc,buf,l)); |
7676 | bpr | 165 | } while(l==sizeof(buf)); |
166 | fclose(f); |
||
167 | } |
||
168 | } |
||
169 | } |
||
170 | if(strstr(dp,"normalread")!=NULL) goto read; |
||
171 | charcnt=0; |
||
172 | while(read(soc,pbuf,1)>0 && charcnt<10240) { |
||
173 | fputc(pbuf[0],outf); charcnt++; |
||
174 | } |
||
175 | close(soc); |
||
176 | return 0; |
||
10 | reyssat | 177 | } |
178 | if(strncasecmp(p1,"http://",strlen("http://"))==0) p1+=strlen("http://"); |
||
179 | else if(strncasecmp(p1,"https://",strlen("https://"))==0) { |
||
7676 | bpr | 180 | https=1; p1+=strlen("https://"); |
10 | reyssat | 181 | } |
182 | p3=strchr(p1,'/'); if(p3==NULL) p3=""; |
||
183 | else {*p3++=0; while(*p3=='/') p3++;} |
||
184 | if(strncasecmp(p3,"http://",strlen("http://"))==0 || |
||
185 | strncasecmp(p3,"https://",strlen("https://"))==0) pre=""; |
||
186 | else pre="/"; |
||
187 | snprintf(tbuf,sizeof(tbuf),"GET %s%s HTTP/1.0\r\n%s\r\n\ |
||
188 | Host: %s\r\n\ |
||
189 | %s\r\n\r\n", |
||
7676 | bpr | 190 | pre,p3,cheater1,p1,cheater2); |
10 | reyssat | 191 | p4=strchr(p1,':'); if(p4==NULL) { |
7676 | bpr | 192 | if(https) port=443; else port=80; |
10 | reyssat | 193 | } |
194 | else {*p4++=0; port=atoi(p4);} |
||
195 | if(https) { |
||
7676 | bpr | 196 | soc=gethttps(p1); goto read; |
10 | reyssat | 197 | } |
198 | soc=net_connect(p1); |
||
11527 | georgesk | 199 | IGNORE(write(soc,tbuf,strlen(tbuf))); |
7676 | bpr | 200 | /* header */ |
10 | reyssat | 201 | read: if(soc==-1) return 1; |
202 | c=-1; |
||
203 | while(read(soc,pbuf,1)>0) { |
||
7676 | bpr | 204 | if(pbuf[0]=='\r') continue; |
205 | fputc(pbuf[0],stderr); |
||
206 | if((c=='\n') && (pbuf[0]=='\n')) break; else c=pbuf[0]; |
||
10 | reyssat | 207 | } |
7676 | bpr | 208 | /* body */ |
10 | reyssat | 209 | charcnt=0; |
210 | while(read(soc,pbuf,1)>0 && charcnt<MAX_WEBGETFLEN) { |
||
7676 | bpr | 211 | fputc(pbuf[0],outf); charcnt++; |
10 | reyssat | 212 | } |
213 | close(soc); |
||
214 | if(outf!=stdout) fclose(outf); |
||
215 | if(https) unlink(tfname); |
||
216 | return 0; |
||
217 | } |
||
218 |