AmendHub

Download

jcs

/

wikipedia

/

http.c

 

(View History)

jcs   browser+wikipedia: Support UTF8, article redirections, "View Source" mode Latest amendment: 32 on 2022-09-07

1 /*
2 * Copyright (c) 2020-2022 joshua stein <jcs@jcs.org>
3 *
4 * Permission to use, copy, modify, and distribute this software for any
5 * purpose with or without fee is hereby granted, provided that the above
6 * copyright notice and this permission notice appear in all copies.
7 *
8 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
9 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
10 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
11 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
12 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
13 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
14 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
15 */
16
17 #include <stdio.h>
18 #include <string.h>
19 #include "dnr.h"
20 #include "http.h"
21 #include "util.h"
22 #include "wikipedia.h"
23
24 struct url *
25 url_parse(const char *str)
26 {
27 struct url *url = NULL;
28 char *buf, *scheme, *host, *path;
29 unsigned short port;
30 short ret, pos;
31 size_t len, schemelen, hostlen, pathlen;
32
33 len = strlen(str);
34 scheme = xmalloc(len + 1, "url_parse scheme");
35 host = xmalloc(len + 1, "url_parse host");
36 path = xmalloc(len + 1, "url_parse path");
37
38 /* scheme://host:port/path */
39 ret = sscanf(str, "%[^:]://%[^:]:%d%s%n", scheme, host, &port, path,
40 &pos);
41 if (ret == 4) {
42 if (pos > len)
43 panic("url_parse sscanf overflow");
44 goto consolidate;
45 }
46
47 /* scheme://host/path */
48 ret = sscanf(str, "%[^:]://%[^/]%s%n", scheme, host, path, &pos);
49 if (ret == 3) {
50 if (pos > len)
51 panic("url_parse sscanf overflow");
52 if (strcmp(scheme, "http") == 0)
53 port = 80;
54 else if (strcmp(scheme, "https") == 0)
55 port = 443;
56 else
57 goto cleanup;
58 goto consolidate;
59 }
60
61 goto cleanup;
62
63 consolidate:
64 schemelen = strlen(scheme);
65 hostlen = strlen(host);
66 pathlen = strlen(path);
67
68 /*
69 * Put everything in a single chunk of memory so the caller can just
70 * free(url)
71 */
72 len = sizeof(struct url) + schemelen + 1 + hostlen + 1 + pathlen + 1;
73 url = xmalloc(len, "url");
74
75 url->scheme = (char *)url + sizeof(struct url);
76 len = strlcpy(url->scheme, scheme, schemelen + 1);
77
78 url->host = url->scheme + len + 1;
79 len = strlcpy(url->host, host, hostlen + 1);
80
81 url->path = url->host + len + 1;
82 len = strlcpy(url->path, path, pathlen + 1);
83
84 url->port = port;
85
86 cleanup:
87 xfree(&scheme);
88 xfree(&host);
89 xfree(&path);
90
91 return url;
92 }
93
94 char *
95 url_encode(unsigned char *str)
96 {
97 char *ret = NULL;
98 size_t len, n;
99 bool encode = false;
100 char a, b;
101
102 encode:
103 for (n = 0, len = 0; str[n] != '\0'; n++) {
104 if ((str[n] >= 'A' && str[n] <= 'Z') ||
105 (str[n] >= 'a' && str[n] <= 'z') ||
106 (str[n] >= '0' && str[n] <= '9') ||
107 (str[n] == '-' || str[n] == '_' || str[n] == '.' ||
108 str[n] == '~')) {
109 if (ret)
110 ret[len] = str[n];
111 len++;
112 } else {
113 if (ret) {
114 sprintf(ret + len, "%%%02X", str[n]);
115 }
116 len += 3;
117 }
118 }
119
120 if (ret) {
121 ret[len] = '\0';
122 return ret;
123 }
124
125 ret = xmalloc(len + 1, "url_encode");
126 len = 0;
127 goto encode;
128 }
129
130 struct http_request *
131 http_get(const char *surl)
132 {
133 struct url *url;
134 struct http_request *req;
135 size_t len, alen;
136 short err;
137 char ip_s[16];
138 ip_addr local_ip;
139 tcp_port local_port;
140
141 url = url_parse(surl);
142 if (url == NULL)
143 return NULL;
144
145 req = xmalloczero(sizeof(struct http_request), "http_get");
146 req->url = url;
147 req->tcp_buf_size = (4 * 1500) + sizeof(req->chunk);
148 req->tcp_buf = xmalloc(req->tcp_buf_size, "http_get buf");
149
150 err = _TCPCreate(&req->tcp_iopb, &req->tcp_stream, (Ptr)req->tcp_buf,
151 req->tcp_buf_size, nil, nil, nil, false);
152 if (err) {
153 warn("TCPCreate failed: %d", err);
154 goto error;
155 }
156
157 err = ResolveName(req->url->host, &req->host_ip);
158 if (err) {
159 warn("Couldn't resolve host %s (%d)", req->url->host, err);
160 goto error;
161 }
162
163 long2ip(req->host_ip, (char *)&ip_s);
164
165 err = _TCPActiveOpen(&req->tcp_iopb, req->tcp_stream, req->host_ip,
166 req->url->port, &local_ip, &local_port, nil, nil, false);
167 if (err) {
168 warn("Failed connecting to %s (%s) port %d: %d",
169 req->url->host, ip_s, req->url->port, err);
170 goto error;
171 }
172
173 err = _TCPStatus(&req->tcp_iopb, req->tcp_stream, &req->tcp_status_pb,
174 nil, nil, false);
175 if (err) {
176 warn("Failed TCPStatus on connection to %s (%s) port %d: %d",
177 req->url->host, ip_s, req->url->port, err);
178 goto error;
179 }
180
181 alen = 256 + strlen(req->url->host) + strlen(req->url->path);
182 req->message = xmalloc(alen, "http_get verb");
183 len = snprintf(req->message, alen,
184 "GET %s HTTP/1.0\r\n"
185 "Host: %s\r\n"
186 "User-Agent: %s\r\n"
187 "Accept: */*\r\n"
188 "\r\n", req->url->path, req->url->host, PROGRAM_NAME);
189 if (len > alen)
190 panic("snprintf overflow");
191
192 memset(&req->tcp_wds, 0, sizeof(req->tcp_wds));
193 req->tcp_wds[0].ptr = req->message;
194 req->tcp_wds[0].length = len;
195
196 err = _TCPSend(&req->tcp_iopb, req->tcp_stream, req->tcp_wds, nil, nil,
197 false);
198 if (err) {
199 warn("TCPSend to %s (%s) failed: %d", req->url->host, ip_s, err);
200 goto error;
201 }
202
203 return req;
204
205 error:
206 http_req_free(&req);
207 return NULL;
208 }
209
210 ssize_t
211 http_req_read(struct http_request *req, char *data, size_t len)
212 {
213 short err;
214 unsigned short rlen;
215
216 if (!req)
217 return -1;
218
219 err = _TCPStatus(&req->tcp_iopb, req->tcp_stream, &req->tcp_status_pb,
220 nil, nil, false);
221 if (err)
222 return -1;
223
224 if (req->tcp_status_pb.amtUnreadData == 0)
225 return 0;
226
227 rlen = MIN(req->tcp_status_pb.amtUnreadData, len);
228
229 err = _TCPRcv(&req->tcp_iopb, req->tcp_stream, data, &rlen, nil, nil,
230 false);
231 if (err)
232 return -1;
233
234 return rlen;
235 }
236
237 bool
238 http_req_skip_header(struct http_request *req)
239 {
240 size_t len, n;
241
242 for (;;) {
243 if (req->chunk_len > 3) {
244 /*
245 * Leave last 3 bytes of previous read in case \r\n\r\n happens
246 * across reads.
247 */
248 memmove(req->chunk, req->chunk + req->chunk_len - 3,
249 req->chunk_len - 3);
250 req->chunk_len = 3;
251 }
252 len = http_req_read(req, req->chunk + req->chunk_len,
253 sizeof(req->chunk) - req->chunk_len);
254 if (len < 0)
255 return false;
256 if (len == 0)
257 continue;
258 req->chunk_len += len;
259
260 for (n = 3; n < req->chunk_len; n++) {
261 if (req->chunk[n - 3] != '\r' || req->chunk[n - 2] != '\n' ||
262 req->chunk[n - 1] != '\r' || req->chunk[n] != '\n')
263 continue;
264
265 req->chunk_len -= n + 1;
266 memmove(req->chunk, req->chunk + n + 1, req->chunk_len);
267 req->chunk_off = 0;
268 return true;
269 }
270 }
271
272 return false;
273 }
274
275 short
276 http_req_chunk_peek(void *cookie)
277 {
278 struct http_request *req = (struct http_request *)cookie;
279
280 if (req->chunk_len == 0 || (req->chunk_off + 1 > req->chunk_len)) {
281 req->chunk_len = http_req_read(req, req->chunk, sizeof(req->chunk));
282 req->chunk_off = 0;
283 }
284
285 if (req->chunk_len == 0 || (req->chunk_off + 1 > req->chunk_len))
286 return EOF;
287
288 return req->chunk[req->chunk_off];
289 }
290
291 short
292 http_req_chunk_read(void *cookie)
293 {
294 struct http_request *req = (struct http_request *)cookie;
295 short c;
296
297 c = http_req_chunk_peek(req);
298 if (c == EOF)
299 return c;
300
301 req->chunk_off++;
302
303 return c;
304 }
305
306 void
307 http_req_free(void *reqptr)
308 {
309 unsigned long *addr = (unsigned long *)reqptr;
310 void *ptr = (void *)*addr;
311 struct http_request *req = (struct http_request *)ptr;
312
313 if (req == NULL)
314 return;
315
316 _TCPRelease(&req->tcp_iopb, req->tcp_stream, nil, nil, false);
317
318 // if (req->message != NULL)
319 // xfree(&req->message);
320 // xfree(&req->tcp_buf);
321 // xfree(&req->url);
322 // xfree(reqptr);
323 }