1 /* wget.c - Simple downloader to get the resource file in HTTP server
2  *
3  * Copyright 2016 Lipi C.H. Lee <lipisoft@gmail.com>
4  *
5 
6 USE_WGET(NEWTOY(wget, "(no-check-certificate)O:", TOYFLAG_USR|TOYFLAG_BIN))
7 
8 config WGET
9   bool "wget"
10   default n
11   help
12     usage: wget -O filename URL
13     -O filename: specify output filename
14     URL: uniform resource location, FTP/HTTP only, not HTTPS
15 
16     examples:
17       wget -O index.html http://www.example.com
18       wget -O sample.jpg ftp://ftp.example.com:21/sample.jpg
19 */
20 
21 #define FOR_wget
22 #include "toys.h"
23 
GLOBALS(char * filename;)24 GLOBALS(
25   char *filename;
26 )
27 
28 // extract hostname and port from url
29 static unsigned get_hn(const char *url, char *hostname) {
30   unsigned i;
31 
32   for (i = 0; url[i] != '\0' && url[i] != '/'; i++) {
33     if(i >= 1024) error_exit("too long hostname in URL");
34     hostname[i] = url[i];
35   }
36   hostname[i] = '\0';
37 
38   return i;
39 }
40 
41 // extract port number
get_port(const char * url,char * port,unsigned url_i)42 static unsigned get_port(const char *url, char *port, unsigned url_i) {
43   unsigned i;
44   for (i = 0; url[i] != '\0' && url[i] != '/'; i++, url_i++) {
45     if('0' <= url[i] && url[i] <= '9') port[i] = url[i];
46     else error_exit("wrong decimal port number");
47   }
48   if(i <= 6) port[i] = '\0';
49   else error_exit("too long port number");
50 
51   return url_i;
52 }
53 
strip_v6_brackets(char * hostname)54 static void strip_v6_brackets(char* hostname) {
55   size_t len = strlen(hostname);
56   if (len > 1023) {
57     error_exit("hostname too long, %d bytes\n", len);
58   }
59   char * closing_bracket = strchr(hostname, ']');
60   if (closing_bracket && closing_bracket == hostname + len - 1) {
61     if (strchr(hostname, '[') == hostname) {
62       hostname[len-1] = 0;
63       memmove(hostname, hostname + 1, len - 1);
64     }
65   }
66 }
67 
68 // get http infos in URL
get_info(const char * url,char * hostname,char * port,char * path)69 static void get_info(const char *url, char* hostname, char *port, char *path) {
70   unsigned i = 7, len;
71   char ftp = !strncmp(url, "ftp://", 6);
72 
73   if (ftp) i--;
74   else if (strncmp(url, "http://", i)) error_exit("only FTP/HTTP support");
75   len = get_hn(url+i, hostname);
76   i += len;
77 
78   // `hostname` now contains `host:port`, where host can be any of: a raw IPv4
79   // address; a bracketed, raw IPv6 address, or a hostname. Extract port, if it exists,
80   // by searching for the last ':' in the hostname string.
81   char *port_delim = strrchr(hostname, ':');
82   char use_default_port = 1;
83   if (port_delim) {
84     // Found a colon; is there a closing bracket after it? If so,
85     // then this colon was in the middle of a bracketed IPv6 address
86     if (!strchr(port_delim, ']')) {
87       // No closing bracket; this is a real port
88       use_default_port = 0;
89       get_port(port_delim + 1, port, 0);
90 
91       // Mark the new end of the hostname string
92       *port_delim = 0;
93     }
94   }
95 
96   if (use_default_port) {
97     strcpy(port, "80");
98   }
99 
100   // This is a NOP if hostname is not a bracketed IPv6 address
101   strip_v6_brackets(hostname);
102 
103   // get uri in URL
104   if (url[i] == '\0') strcpy(path, "/");
105   else if (url[i] == '/') {
106     if (strlen(url+i) < 1024) strcpy(path, url+i);
107     else error_exit("too long path in URL");
108   } else error_exit("wrong URL");
109 
110   if (ftp) xexec((char *[]){"ftpget", hostname, TT.filename, path, 0});
111 }
112 
113 // connect to any IPv4 or IPv6 server
conn_svr(const char * hostname,const char * port)114 static int conn_svr(const char *hostname, const char *port) {
115   struct addrinfo hints, *result, *rp;
116   int sock;
117 
118   memset(&hints, 0, sizeof(struct addrinfo));
119   hints.ai_family = AF_UNSPEC;
120   hints.ai_socktype = SOCK_STREAM;
121   hints.ai_flags = 0;
122   hints.ai_protocol = 0;
123 
124   if ((errno = getaddrinfo(hostname, port, &hints, &result)))
125     error_exit("getaddrinfo: %s", gai_strerror(errno));
126 
127   // try all address list(IPv4 or IPv6) until success
128   for (rp = result; rp; rp = rp->ai_next) {
129     if ((sock = socket(rp->ai_family, rp->ai_socktype, rp->ai_protocol))
130         == -1) {
131       perror_msg("socket error");
132       continue;
133     }
134     if (connect(sock, rp->ai_addr, rp->ai_addrlen) != -1)
135       break; // succeed in connecting to any server IP
136     else perror_msg("connect error");
137     close(sock);
138   }
139   freeaddrinfo(result);
140   if(!rp) error_exit("can't connect");
141 
142   return sock;
143 }
144 
145 // make HTTP request header field
mk_fld(char * name,char * value)146 static void mk_fld(char *name, char *value) {
147   strcat(toybuf, name);
148   strcat(toybuf, ": ");
149   strcat(toybuf, value);
150   strcat(toybuf, "\r\n");
151 }
152 
153 // get http response body starting address and its length
get_body(ssize_t len,ssize_t * body_len)154 static char *get_body(ssize_t len, ssize_t *body_len) {
155   int i;
156 
157   for (i = 0; i < len-4; i++)
158     if (!strncmp(toybuf+i, "\r\n\r\n", 4)) break;
159 
160   *body_len = len - i - 4;
161   return toybuf+i+4;
162 }
163 
wget_main(void)164 void wget_main(void)
165 {
166   int sock, redirects = 10;
167   FILE *fp;
168   ssize_t len, body_len;
169   char *body, *result, *rc, *r_str, *redir_loc = 0;
170   char ua[] = "toybox wget/" TOYBOX_VERSION, hostname[1024], port[6], path[1024];
171 
172   // TODO extract filename to be saved from URL
173   if (!(toys.optflags & FLAG_O)) help_exit("no filename");
174   if (fopen(TT.filename, "r")) error_exit("'%s' already exists", TT.filename);
175 
176   if(!toys.optargs[0]) help_exit("no URL");
177   get_info(toys.optargs[0], hostname, port, path);
178 
179   for (;; redirects--) {
180     sock = conn_svr(hostname, port);
181     // compose HTTP request
182     sprintf(toybuf, "GET %s HTTP/1.1\r\n", path);
183     mk_fld("Host", hostname);
184     mk_fld("User-Agent", ua);
185     mk_fld("Connection", "close");
186     strcat(toybuf, "\r\n");
187 
188     // send the HTTP request
189     len = strlen(toybuf);
190     if (write(sock, toybuf, len) != len) perror_exit("write error");
191 
192     // read HTTP response
193     if ((len = read(sock, toybuf, 4096)) == -1) perror_exit("read error");
194     if (!strstr(toybuf, "\r\n\r\n")) error_exit("too long HTTP response");
195     body = get_body(len, &body_len);
196     redir_loc = strstr(toybuf, "Location: ");
197     result = strtok(toybuf, "\r");
198     strtok(result, " ");
199     rc = strtok(NULL, " ");
200     r_str = strtok(NULL, " ");
201 
202     // HTTP res code check
203     if (!strcmp(rc, "301") || !strcmp(rc, "302")) {
204       char* eol = 0;
205       if ((eol = strchr(redir_loc, '\r')) > 0) *eol = 0;
206       else if (redir_loc) error_exit("Could not parse redirect URL");
207       if (redirects < 0) error_exit("Too many redirects");
208 
209       printf("Redirection: %s %s \n", rc, r_str);
210       printf("%s \n", redir_loc);
211       redir_loc = redir_loc+strlen("Location: ");
212       close(sock);
213       get_info(redir_loc, hostname, port, path);
214     } else if (!strcmp(rc, "200")) break;
215     else error_exit("res: %s(%s)", rc, r_str);
216   }
217 
218 
219   if (!(fp = fopen(TT.filename, "w"))) perror_exit("fopen error");
220   if (fwrite(body, 1, body_len, fp) != body_len)
221     error_exit("fwrite error");
222   while ((len = read(sock, toybuf, 4096)) > 0)
223     if (fwrite(toybuf, 1, len, fp) != len)
224       error_exit("fwrite error");
225   if (fclose(fp) == EOF) perror_exit("fclose error");
226 }
227