Download
jcs
/wikipedia
/http.c
(View History)
jcs *: Fix lots of bugs, add progress in fetch dialog | Latest amendment: 44 on 2023-08-30 |
1 | /* |
2 | * Copyright (c) 2020-2022 joshua stein <jcs@jcs.org> |
3 | * |
4 | * Permission to use, copy, modify, and distribute this software for any |
5 | * purpose with or without fee is hereby granted, provided that the above |
6 | * copyright notice and this permission notice appear in all copies. |
7 | * |
8 | * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES |
9 | * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF |
10 | * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR |
11 | * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES |
12 | * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN |
13 | * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF |
14 | * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. |
15 | */ |
16 | |
17 | #include <stdio.h> |
18 | #include <string.h> |
19 | #include "dnr.h" |
20 | #include "http.h" |
21 | #include "util.h" |
22 | #include "wikipedia.h" |
23 | |
24 | struct url * |
25 | url_parse(const char *str) |
26 | { |
27 | struct url *url = NULL; |
28 | char *buf, *scheme, *host, *path; |
29 | unsigned short port; |
30 | short ret, pos; |
31 | size_t len, schemelen, hostlen, pathlen; |
32 | |
33 | len = strlen(str); |
34 | scheme = xmalloc(len + 1); |
35 | if (scheme == NULL) { |
36 | warn("http: Failed allocating %ld", len + 1); |
37 | return NULL; |
38 | } |
39 | host = xmalloc(len + 1); |
40 | if (host == NULL) { |
41 | warn("http: Failed allocating %ld", len + 1); |
42 | xfree(&scheme); |
43 | return NULL; |
44 | } |
45 | path = xmalloc(len + 1); |
46 | if (path == NULL) { |
47 | warn("http: Failed allocating %ld", len + 1); |
48 | xfree(&host); |
49 | xfree(&scheme); |
50 | return NULL; |
51 | } |
52 | |
53 | /* scheme://host:port/path */ |
54 | ret = sscanf(str, "%[^:]://%[^:]:%d%s%n", scheme, host, &port, path, |
55 | &pos); |
56 | if (ret == 4) { |
57 | if (pos > len) |
58 | panic("url_parse sscanf overflow"); |
59 | goto consolidate; |
60 | } |
61 | |
62 | /* scheme://host/path */ |
63 | ret = sscanf(str, "%[^:]://%[^/]%s%n", scheme, host, path, &pos); |
64 | if (ret == 3) { |
65 | if (pos > len) |
66 | panic("url_parse sscanf overflow"); |
67 | if (strcmp(scheme, "http") == 0) |
68 | port = 80; |
69 | else if (strcmp(scheme, "https") == 0) |
70 | port = 443; |
71 | else |
72 | goto cleanup; |
73 | goto consolidate; |
74 | } |
75 | |
76 | goto cleanup; |
77 | |
78 | consolidate: |
79 | schemelen = strlen(scheme); |
80 | hostlen = strlen(host); |
81 | pathlen = strlen(path); |
82 | |
83 | /* |
84 | * Put everything in a single chunk of memory so the caller can just |
85 | * free(url) |
86 | */ |
87 | len = sizeof(struct url) + schemelen + 1 + hostlen + 1 + pathlen + 1; |
88 | url = xmalloc(len); |
89 | if (url == NULL) { |
90 | warn("http: Failed allocating %ld for URL", len); |
91 | goto cleanup; |
92 | } |
93 | |
94 | url->scheme = (char *)url + sizeof(struct url); |
95 | len = strlcpy(url->scheme, scheme, schemelen + 1); |
96 | |
97 | url->host = url->scheme + len + 1; |
98 | len = strlcpy(url->host, host, hostlen + 1); |
99 | |
100 | url->path = url->host + len + 1; |
101 | len = strlcpy(url->path, path, pathlen + 1); |
102 | |
103 | url->port = port; |
104 | |
105 | cleanup: |
106 | xfree(&scheme); |
107 | xfree(&host); |
108 | xfree(&path); |
109 | |
110 | return url; |
111 | } |
112 | |
113 | char * |
114 | url_encode(unsigned char *str) |
115 | { |
116 | char *ret = NULL; |
117 | size_t len, n; |
118 | bool encode = false; |
119 | char a, b; |
120 | |
121 | encode: |
122 | for (n = 0, len = 0; str[n] != '\0'; n++) { |
123 | if ((str[n] >= 'A' && str[n] <= 'Z') || |
124 | (str[n] >= 'a' && str[n] <= 'z') || |
125 | (str[n] >= '0' && str[n] <= '9') || |
126 | (str[n] == '-' || str[n] == '_' || str[n] == '.' || |
127 | str[n] == '~')) { |
128 | if (ret) |
129 | ret[len] = str[n]; |
130 | len++; |
131 | } else { |
132 | if (ret) { |
133 | sprintf(ret + len, "%%%02X", str[n]); |
134 | } |
135 | len += 3; |
136 | } |
137 | } |
138 | |
139 | if (ret) { |
140 | ret[len] = '\0'; |
141 | return ret; |
142 | } |
143 | |
144 | ret = xmalloc(len + 1); |
145 | if (ret == NULL) { |
146 | warn("http: Failed allocating %ld", len + 1); |
147 | return NULL; |
148 | } |
149 | len = 0; |
150 | goto encode; |
151 | } |
152 | |
153 | struct http_request * |
154 | http_get(const char *surl) |
155 | { |
156 | struct url *url; |
157 | struct http_request *req; |
158 | size_t len, alen; |
159 | short err; |
160 | char ip_s[16]; |
161 | ip_addr local_ip; |
162 | tcp_port local_port; |
163 | |
164 | url = url_parse(surl); |
165 | if (url == NULL) |
166 | return NULL; |
167 | |
168 | req = xmalloczero(sizeof(struct http_request)); |
169 | if (req == NULL) { |
170 | warn("http: Failed allocating http_request"); |
171 | return NULL; |
172 | } |
173 | req->url = url; |
174 | req->tcp_buf_size = (4 * 1500) + sizeof(req->chunk); |
175 | req->tcp_buf = xmalloc(req->tcp_buf_size); |
176 | if (req->tcp_buf == NULL) { |
177 | warn("http: Failed allocating tcp_buf"); |
178 | xfree(&req); |
179 | return NULL; |
180 | } |
181 | |
182 | err = _TCPCreate(&req->tcp_iopb, &req->tcp_stream, (Ptr)req->tcp_buf, |
183 | req->tcp_buf_size, nil, nil, nil, false); |
184 | if (err) { |
185 | warn("TCPCreate failed: %d", err); |
186 | goto error; |
187 | } |
188 | |
189 | err = DNSResolveName(&req->url->host, &req->host_ip, NULL); |
190 | if (err) { |
191 | warn("Couldn't resolve host %s (%d)", req->url->host, err); |
192 | goto error; |
193 | } |
194 | |
195 | long2ip(req->host_ip, (char *)&ip_s); |
196 | |
197 | err = _TCPActiveOpen(&req->tcp_iopb, req->tcp_stream, req->host_ip, |
198 | req->url->port, &local_ip, &local_port, nil, nil, false); |
199 | if (err) { |
200 | warn("Failed connecting to %s (%s) port %d: %d", |
201 | req->url->host, ip_s, req->url->port, err); |
202 | goto error; |
203 | } |
204 | |
205 | alen = 256 + strlen(req->url->host) + strlen(req->url->path); |
206 | req->message = xmalloc(alen); |
207 | if (req->message == NULL) { |
208 | warn("http: Failed allocating message"); |
209 | goto error; |
210 | } |
211 | len = snprintf(req->message, alen, |
212 | "GET %s HTTP/1.0\r\n" |
213 | "Host: %s\r\n" |
214 | "User-Agent: %s\r\n" |
215 | "Accept: */*\r\n" |
216 | "\r\n", req->url->path, req->url->host, PROGRAM_NAME); |
217 | if (len > alen) |
218 | panic("snprintf overflow"); |
219 | |
220 | memset(&req->tcp_wds, 0, sizeof(req->tcp_wds)); |
221 | req->tcp_wds[0].ptr = req->message; |
222 | req->tcp_wds[0].length = len; |
223 | |
224 | err = _TCPSend(&req->tcp_iopb, req->tcp_stream, req->tcp_wds, nil, nil, |
225 | false); |
226 | if (err) { |
227 | warn("TCPSend to %s (%s) failed: %d", req->url->host, ip_s, err); |
228 | goto error; |
229 | } |
230 | |
231 | return req; |
232 | |
233 | error: |
234 | http_req_free(&req); |
235 | return NULL; |
236 | } |
237 | |
238 | ssize_t |
239 | http_req_read(struct http_request *req, char *data, size_t len) |
240 | { |
241 | short err; |
242 | unsigned short rlen; |
243 | |
244 | if (!req) |
245 | return -1; |
246 | |
247 | err = _TCPStatus(&req->tcp_iopb, req->tcp_stream, &req->tcp_status_pb, |
248 | nil, nil, false); |
249 | if (err) |
250 | return -1; |
251 | |
252 | if (req->tcp_status_pb.amtUnreadData == 0) |
253 | return 0; |
254 | |
255 | rlen = MIN(req->tcp_status_pb.amtUnreadData, len); |
256 | |
257 | err = _TCPRcv(&req->tcp_iopb, req->tcp_stream, data, &rlen, nil, nil, |
258 | false); |
259 | if (err) |
260 | return -1; |
261 | |
262 | return rlen; |
263 | } |
264 | |
265 | bool |
266 | http_req_skip_header(struct http_request *req) |
267 | { |
268 | size_t len, n; |
269 | bool last_nl = false; |
270 | |
271 | for (;;) { |
272 | if (req->chunk_len == sizeof(req->chunk)) |
273 | /* this sure is a long header... */ |
274 | req->chunk_len = 0; |
275 | |
276 | len = http_req_read(req, req->chunk + req->chunk_len, |
277 | sizeof(req->chunk) - req->chunk_len); |
278 | if (len < 0) |
279 | return false; |
280 | if (len == 0) |
281 | continue; |
282 | req->chunk_len += len; |
283 | |
284 | for (n = 1; n < req->chunk_len; n++) { |
285 | if (req->chunk[n - 1] != '\r' || req->chunk[n] != '\n') { |
286 | last_nl = false; |
287 | continue; |
288 | } |
289 | |
290 | /* newline, shift chunk back */ |
291 | if (strncmp(req->chunk, "Content-Length: ", 16) == 0) { |
292 | if (sscanf(req->chunk, "Content-Length: %ld", &len) == 1) |
293 | req->content_len = len; |
294 | } |
295 | req->chunk_len -= n + 1; |
296 | memmove(req->chunk, req->chunk + n + 1, req->chunk_len); |
297 | req->chunk_off = 0; |
298 | |
299 | if (last_nl) |
300 | return true; |
301 | |
302 | last_nl = true; |
303 | n = 0; /* start at 1 on next iteration */ |
304 | } |
305 | } |
306 | |
307 | return false; |
308 | } |
309 | |
310 | short |
311 | http_req_chunk_peek(void *cookie) |
312 | { |
313 | struct http_request *req = (struct http_request *)cookie; |
314 | |
315 | if (req->chunk_len == 0 || (req->chunk_off + 1 > req->chunk_len)) { |
316 | req->chunk_len = http_req_read(req, req->chunk, sizeof(req->chunk)); |
317 | req->chunk_off = 0; |
318 | } |
319 | |
320 | if (req->chunk_len == 0 || (req->chunk_off + 1 > req->chunk_len)) |
321 | return EOF; |
322 | |
323 | return req->chunk[req->chunk_off]; |
324 | } |
325 | |
326 | short |
327 | http_req_chunk_read(void *cookie) |
328 | { |
329 | struct http_request *req = (struct http_request *)cookie; |
330 | short c; |
331 | |
332 | c = http_req_chunk_peek(req); |
333 | if (c == EOF) |
334 | return c; |
335 | |
336 | req->chunk_off++; |
337 | |
338 | return c; |
339 | } |
340 | |
341 | void |
342 | http_req_free(void *reqptr) |
343 | { |
344 | unsigned long *addr = (unsigned long *)reqptr; |
345 | void *ptr = (void *)*addr; |
346 | struct http_request *req = (struct http_request *)ptr; |
347 | |
348 | if (req == NULL) |
349 | return; |
350 | |
351 | _TCPRelease(&req->tcp_iopb, req->tcp_stream, nil, nil, false); |
352 | |
353 | if (req->message != NULL) |
354 | xfree(&req->message); |
355 | xfree(&req->tcp_buf); |
356 | xfree(&req->url); |
357 | xfree(&req); |
358 | |
359 | *addr = 0L; |
360 | } |