1 /* wget.c - Simple downloader to get the resource file in HTTP server
2 *
3 * Copyright 2016 Lipi C.H. Lee <lipisoft@gmail.com>
4 *
5
6 USE_WGET(NEWTOY(wget, "(no-check-certificate)O:", TOYFLAG_USR|TOYFLAG_BIN))
7
8 config WGET
9 bool "wget"
10 default n
11 help
12 usage: wget -O filename URL
13 -O filename: specify output filename
14 URL: uniform resource location, FTP/HTTP only, not HTTPS
15
16 examples:
17 wget -O index.html http://www.example.com
18 wget -O sample.jpg ftp://ftp.example.com:21/sample.jpg
19 */
20
21 #define FOR_wget
22 #include "toys.h"
23
GLOBALS(char * filename;)24 GLOBALS(
25 char *filename;
26 )
27
28 // extract hostname and port from url
29 static unsigned get_hn(const char *url, char *hostname) {
30 unsigned i;
31
32 for (i = 0; url[i] != '\0' && url[i] != '/'; i++) {
33 if(i >= 1024) error_exit("too long hostname in URL");
34 hostname[i] = url[i];
35 }
36 hostname[i] = '\0';
37
38 return i;
39 }
40
41 // extract port number
get_port(const char * url,char * port,unsigned url_i)42 static unsigned get_port(const char *url, char *port, unsigned url_i) {
43 unsigned i;
44 for (i = 0; url[i] != '\0' && url[i] != '/'; i++, url_i++) {
45 if('0' <= url[i] && url[i] <= '9') port[i] = url[i];
46 else error_exit("wrong decimal port number");
47 }
48 if(i <= 6) port[i] = '\0';
49 else error_exit("too long port number");
50
51 return url_i;
52 }
53
strip_v6_brackets(char * hostname)54 static void strip_v6_brackets(char* hostname) {
55 size_t len = strlen(hostname);
56 if (len > 1023) {
57 error_exit("hostname too long, %d bytes\n", len);
58 }
59 char * closing_bracket = strchr(hostname, ']');
60 if (closing_bracket && closing_bracket == hostname + len - 1) {
61 if (strchr(hostname, '[') == hostname) {
62 hostname[len-1] = 0;
63 memmove(hostname, hostname + 1, len - 1);
64 }
65 }
66 }
67
68 // get http infos in URL
get_info(const char * url,char * hostname,char * port,char * path)69 static void get_info(const char *url, char* hostname, char *port, char *path) {
70 unsigned i = 7, len;
71 char ftp = !strncmp(url, "ftp://", 6);
72
73 if (ftp) i--;
74 else if (strncmp(url, "http://", i)) error_exit("only FTP/HTTP support");
75 len = get_hn(url+i, hostname);
76 i += len;
77
78 // `hostname` now contains `host:port`, where host can be any of: a raw IPv4
79 // address; a bracketed, raw IPv6 address, or a hostname. Extract port, if it exists,
80 // by searching for the last ':' in the hostname string.
81 char *port_delim = strrchr(hostname, ':');
82 char use_default_port = 1;
83 if (port_delim) {
84 // Found a colon; is there a closing bracket after it? If so,
85 // then this colon was in the middle of a bracketed IPv6 address
86 if (!strchr(port_delim, ']')) {
87 // No closing bracket; this is a real port
88 use_default_port = 0;
89 get_port(port_delim + 1, port, 0);
90
91 // Mark the new end of the hostname string
92 *port_delim = 0;
93 }
94 }
95
96 if (use_default_port) {
97 strcpy(port, "80");
98 }
99
100 // This is a NOP if hostname is not a bracketed IPv6 address
101 strip_v6_brackets(hostname);
102
103 // get uri in URL
104 if (url[i] == '\0') strcpy(path, "/");
105 else if (url[i] == '/') {
106 if (strlen(url+i) < 1024) strcpy(path, url+i);
107 else error_exit("too long path in URL");
108 } else error_exit("wrong URL");
109
110 if (ftp) xexec((char *[]){"ftpget", hostname, TT.filename, path, 0});
111 }
112
113 // connect to any IPv4 or IPv6 server
conn_svr(const char * hostname,const char * port)114 static int conn_svr(const char *hostname, const char *port) {
115 struct addrinfo hints, *result, *rp;
116 int sock;
117
118 memset(&hints, 0, sizeof(struct addrinfo));
119 hints.ai_family = AF_UNSPEC;
120 hints.ai_socktype = SOCK_STREAM;
121 hints.ai_flags = 0;
122 hints.ai_protocol = 0;
123
124 if ((errno = getaddrinfo(hostname, port, &hints, &result)))
125 error_exit("getaddrinfo: %s", gai_strerror(errno));
126
127 // try all address list(IPv4 or IPv6) until success
128 for (rp = result; rp; rp = rp->ai_next) {
129 if ((sock = socket(rp->ai_family, rp->ai_socktype, rp->ai_protocol))
130 == -1) {
131 perror_msg("socket error");
132 continue;
133 }
134 if (connect(sock, rp->ai_addr, rp->ai_addrlen) != -1)
135 break; // succeed in connecting to any server IP
136 else perror_msg("connect error");
137 close(sock);
138 }
139 freeaddrinfo(result);
140 if(!rp) error_exit("can't connect");
141
142 return sock;
143 }
144
145 // make HTTP request header field
mk_fld(char * name,char * value)146 static void mk_fld(char *name, char *value) {
147 strcat(toybuf, name);
148 strcat(toybuf, ": ");
149 strcat(toybuf, value);
150 strcat(toybuf, "\r\n");
151 }
152
153 // get http response body starting address and its length
get_body(ssize_t len,ssize_t * body_len)154 static char *get_body(ssize_t len, ssize_t *body_len) {
155 int i;
156
157 for (i = 0; i < len-4; i++)
158 if (!strncmp(toybuf+i, "\r\n\r\n", 4)) break;
159
160 *body_len = len - i - 4;
161 return toybuf+i+4;
162 }
163
wget_main(void)164 void wget_main(void)
165 {
166 int sock, redirects = 10;
167 FILE *fp;
168 ssize_t len, body_len;
169 char *body, *result, *rc, *r_str, *redir_loc = 0;
170 char ua[] = "toybox wget/" TOYBOX_VERSION, hostname[1024], port[6], path[1024];
171
172 // TODO extract filename to be saved from URL
173 if (!(toys.optflags & FLAG_O)) help_exit("no filename");
174 if (fopen(TT.filename, "r")) error_exit("'%s' already exists", TT.filename);
175
176 if(!toys.optargs[0]) help_exit("no URL");
177 get_info(toys.optargs[0], hostname, port, path);
178
179 for (;; redirects--) {
180 sock = conn_svr(hostname, port);
181 // compose HTTP request
182 sprintf(toybuf, "GET %s HTTP/1.1\r\n", path);
183 mk_fld("Host", hostname);
184 mk_fld("User-Agent", ua);
185 mk_fld("Connection", "close");
186 strcat(toybuf, "\r\n");
187
188 // send the HTTP request
189 len = strlen(toybuf);
190 if (write(sock, toybuf, len) != len) perror_exit("write error");
191
192 // read HTTP response
193 if ((len = read(sock, toybuf, 4096)) == -1) perror_exit("read error");
194 if (!strstr(toybuf, "\r\n\r\n")) error_exit("too long HTTP response");
195 body = get_body(len, &body_len);
196 redir_loc = strstr(toybuf, "Location: ");
197 result = strtok(toybuf, "\r");
198 strtok(result, " ");
199 rc = strtok(NULL, " ");
200 r_str = strtok(NULL, " ");
201
202 // HTTP res code check
203 if (!strcmp(rc, "301") || !strcmp(rc, "302")) {
204 char* eol = 0;
205 if ((eol = strchr(redir_loc, '\r')) > 0) *eol = 0;
206 else if (redir_loc) error_exit("Could not parse redirect URL");
207 if (redirects < 0) error_exit("Too many redirects");
208
209 printf("Redirection: %s %s \n", rc, r_str);
210 printf("%s \n", redir_loc);
211 redir_loc = redir_loc+strlen("Location: ");
212 close(sock);
213 get_info(redir_loc, hostname, port, path);
214 } else if (!strcmp(rc, "200")) break;
215 else error_exit("res: %s(%s)", rc, r_str);
216 }
217
218
219 if (!(fp = fopen(TT.filename, "w"))) perror_exit("fopen error");
220 if (fwrite(body, 1, body_len, fp) != body_len)
221 error_exit("fwrite error");
222 while ((len = read(sock, toybuf, 4096)) > 0)
223 if (fwrite(toybuf, 1, len, fp) != len)
224 error_exit("fwrite error");
225 if (fclose(fp) == EOF) perror_exit("fclose error");
226 }
227