Download
jcs
/wikipedia
/http.c
(View History)
jcs browser+wikipedia: Support UTF8, article redirections, "View Source" mode | Latest amendment: 32 on 2022-09-07 |
1 | /* |
2 | * Copyright (c) 2020-2022 joshua stein <jcs@jcs.org> |
3 | * |
4 | * Permission to use, copy, modify, and distribute this software for any |
5 | * purpose with or without fee is hereby granted, provided that the above |
6 | * copyright notice and this permission notice appear in all copies. |
7 | * |
8 | * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES |
9 | * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF |
10 | * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR |
11 | * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES |
12 | * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN |
13 | * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF |
14 | * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. |
15 | */ |
16 | |
17 | #include <stdio.h> |
18 | #include <string.h> |
19 | #include "dnr.h" |
20 | #include "http.h" |
21 | #include "util.h" |
22 | #include "wikipedia.h" |
23 | |
24 | struct url * |
25 | url_parse(const char *str) |
26 | { |
27 | struct url *url = NULL; |
28 | char *buf, *scheme, *host, *path; |
29 | unsigned short port; |
30 | short ret, pos; |
31 | size_t len, schemelen, hostlen, pathlen; |
32 | |
33 | len = strlen(str); |
34 | scheme = xmalloc(len + 1, "url_parse scheme"); |
35 | host = xmalloc(len + 1, "url_parse host"); |
36 | path = xmalloc(len + 1, "url_parse path"); |
37 | |
38 | /* scheme://host:port/path */ |
39 | ret = sscanf(str, "%[^:]://%[^:]:%d%s%n", scheme, host, &port, path, |
40 | &pos); |
41 | if (ret == 4) { |
42 | if (pos > len) |
43 | panic("url_parse sscanf overflow"); |
44 | goto consolidate; |
45 | } |
46 | |
47 | /* scheme://host/path */ |
48 | ret = sscanf(str, "%[^:]://%[^/]%s%n", scheme, host, path, &pos); |
49 | if (ret == 3) { |
50 | if (pos > len) |
51 | panic("url_parse sscanf overflow"); |
52 | if (strcmp(scheme, "http") == 0) |
53 | port = 80; |
54 | else if (strcmp(scheme, "https") == 0) |
55 | port = 443; |
56 | else |
57 | goto cleanup; |
58 | goto consolidate; |
59 | } |
60 | |
61 | goto cleanup; |
62 | |
63 | consolidate: |
64 | schemelen = strlen(scheme); |
65 | hostlen = strlen(host); |
66 | pathlen = strlen(path); |
67 | |
68 | /* |
69 | * Put everything in a single chunk of memory so the caller can just |
70 | * free(url) |
71 | */ |
72 | len = sizeof(struct url) + schemelen + 1 + hostlen + 1 + pathlen + 1; |
73 | url = xmalloc(len, "url"); |
74 | |
75 | url->scheme = (char *)url + sizeof(struct url); |
76 | len = strlcpy(url->scheme, scheme, schemelen + 1); |
77 | |
78 | url->host = url->scheme + len + 1; |
79 | len = strlcpy(url->host, host, hostlen + 1); |
80 | |
81 | url->path = url->host + len + 1; |
82 | len = strlcpy(url->path, path, pathlen + 1); |
83 | |
84 | url->port = port; |
85 | |
86 | cleanup: |
87 | xfree(&scheme); |
88 | xfree(&host); |
89 | xfree(&path); |
90 | |
91 | return url; |
92 | } |
93 | |
94 | char * |
95 | url_encode(unsigned char *str) |
96 | { |
97 | char *ret = NULL; |
98 | size_t len, n; |
99 | bool encode = false; |
100 | char a, b; |
101 | |
102 | encode: |
103 | for (n = 0, len = 0; str[n] != '\0'; n++) { |
104 | if ((str[n] >= 'A' && str[n] <= 'Z') || |
105 | (str[n] >= 'a' && str[n] <= 'z') || |
106 | (str[n] >= '0' && str[n] <= '9') || |
107 | (str[n] == '-' || str[n] == '_' || str[n] == '.' || |
108 | str[n] == '~')) { |
109 | if (ret) |
110 | ret[len] = str[n]; |
111 | len++; |
112 | } else { |
113 | if (ret) { |
114 | sprintf(ret + len, "%%%02X", str[n]); |
115 | } |
116 | len += 3; |
117 | } |
118 | } |
119 | |
120 | if (ret) { |
121 | ret[len] = '\0'; |
122 | return ret; |
123 | } |
124 | |
125 | ret = xmalloc(len + 1, "url_encode"); |
126 | len = 0; |
127 | goto encode; |
128 | } |
129 | |
130 | struct http_request * |
131 | http_get(const char *surl) |
132 | { |
133 | struct url *url; |
134 | struct http_request *req; |
135 | size_t len, alen; |
136 | short err; |
137 | char ip_s[16]; |
138 | ip_addr local_ip; |
139 | tcp_port local_port; |
140 | |
141 | url = url_parse(surl); |
142 | if (url == NULL) |
143 | return NULL; |
144 | |
145 | req = xmalloczero(sizeof(struct http_request), "http_get"); |
146 | req->url = url; |
147 | req->tcp_buf_size = (4 * 1500) + sizeof(req->chunk); |
148 | req->tcp_buf = xmalloc(req->tcp_buf_size, "http_get buf"); |
149 | |
150 | err = _TCPCreate(&req->tcp_iopb, &req->tcp_stream, (Ptr)req->tcp_buf, |
151 | req->tcp_buf_size, nil, nil, nil, false); |
152 | if (err) { |
153 | warn("TCPCreate failed: %d", err); |
154 | goto error; |
155 | } |
156 | |
157 | err = ResolveName(req->url->host, &req->host_ip); |
158 | if (err) { |
159 | warn("Couldn't resolve host %s (%d)", req->url->host, err); |
160 | goto error; |
161 | } |
162 | |
163 | long2ip(req->host_ip, (char *)&ip_s); |
164 | |
165 | err = _TCPActiveOpen(&req->tcp_iopb, req->tcp_stream, req->host_ip, |
166 | req->url->port, &local_ip, &local_port, nil, nil, false); |
167 | if (err) { |
168 | warn("Failed connecting to %s (%s) port %d: %d", |
169 | req->url->host, ip_s, req->url->port, err); |
170 | goto error; |
171 | } |
172 | |
173 | err = _TCPStatus(&req->tcp_iopb, req->tcp_stream, &req->tcp_status_pb, |
174 | nil, nil, false); |
175 | if (err) { |
176 | warn("Failed TCPStatus on connection to %s (%s) port %d: %d", |
177 | req->url->host, ip_s, req->url->port, err); |
178 | goto error; |
179 | } |
180 | |
181 | alen = 256 + strlen(req->url->host) + strlen(req->url->path); |
182 | req->message = xmalloc(alen, "http_get verb"); |
183 | len = snprintf(req->message, alen, |
184 | "GET %s HTTP/1.0\r\n" |
185 | "Host: %s\r\n" |
186 | "User-Agent: %s\r\n" |
187 | "Accept: */*\r\n" |
188 | "\r\n", req->url->path, req->url->host, PROGRAM_NAME); |
189 | if (len > alen) |
190 | panic("snprintf overflow"); |
191 | |
192 | memset(&req->tcp_wds, 0, sizeof(req->tcp_wds)); |
193 | req->tcp_wds[0].ptr = req->message; |
194 | req->tcp_wds[0].length = len; |
195 | |
196 | err = _TCPSend(&req->tcp_iopb, req->tcp_stream, req->tcp_wds, nil, nil, |
197 | false); |
198 | if (err) { |
199 | warn("TCPSend to %s (%s) failed: %d", req->url->host, ip_s, err); |
200 | goto error; |
201 | } |
202 | |
203 | return req; |
204 | |
205 | error: |
206 | http_req_free(&req); |
207 | return NULL; |
208 | } |
209 | |
210 | ssize_t |
211 | http_req_read(struct http_request *req, char *data, size_t len) |
212 | { |
213 | short err; |
214 | unsigned short rlen; |
215 | |
216 | if (!req) |
217 | return -1; |
218 | |
219 | err = _TCPStatus(&req->tcp_iopb, req->tcp_stream, &req->tcp_status_pb, |
220 | nil, nil, false); |
221 | if (err) |
222 | return -1; |
223 | |
224 | if (req->tcp_status_pb.amtUnreadData == 0) |
225 | return 0; |
226 | |
227 | rlen = MIN(req->tcp_status_pb.amtUnreadData, len); |
228 | |
229 | err = _TCPRcv(&req->tcp_iopb, req->tcp_stream, data, &rlen, nil, nil, |
230 | false); |
231 | if (err) |
232 | return -1; |
233 | |
234 | return rlen; |
235 | } |
236 | |
237 | bool |
238 | http_req_skip_header(struct http_request *req) |
239 | { |
240 | size_t len, n; |
241 | |
242 | for (;;) { |
243 | if (req->chunk_len > 3) { |
244 | /* |
245 | * Leave last 3 bytes of previous read in case \r\n\r\n happens |
246 | * across reads. |
247 | */ |
248 | memmove(req->chunk, req->chunk + req->chunk_len - 3, |
249 | req->chunk_len - 3); |
250 | req->chunk_len = 3; |
251 | } |
252 | len = http_req_read(req, req->chunk + req->chunk_len, |
253 | sizeof(req->chunk) - req->chunk_len); |
254 | if (len < 0) |
255 | return false; |
256 | if (len == 0) |
257 | continue; |
258 | req->chunk_len += len; |
259 | |
260 | for (n = 3; n < req->chunk_len; n++) { |
261 | if (req->chunk[n - 3] != '\r' || req->chunk[n - 2] != '\n' || |
262 | req->chunk[n - 1] != '\r' || req->chunk[n] != '\n') |
263 | continue; |
264 | |
265 | req->chunk_len -= n + 1; |
266 | memmove(req->chunk, req->chunk + n + 1, req->chunk_len); |
267 | req->chunk_off = 0; |
268 | return true; |
269 | } |
270 | } |
271 | |
272 | return false; |
273 | } |
274 | |
275 | short |
276 | http_req_chunk_peek(void *cookie) |
277 | { |
278 | struct http_request *req = (struct http_request *)cookie; |
279 | |
280 | if (req->chunk_len == 0 || (req->chunk_off + 1 > req->chunk_len)) { |
281 | req->chunk_len = http_req_read(req, req->chunk, sizeof(req->chunk)); |
282 | req->chunk_off = 0; |
283 | } |
284 | |
285 | if (req->chunk_len == 0 || (req->chunk_off + 1 > req->chunk_len)) |
286 | return EOF; |
287 | |
288 | return req->chunk[req->chunk_off]; |
289 | } |
290 | |
291 | short |
292 | http_req_chunk_read(void *cookie) |
293 | { |
294 | struct http_request *req = (struct http_request *)cookie; |
295 | short c; |
296 | |
297 | c = http_req_chunk_peek(req); |
298 | if (c == EOF) |
299 | return c; |
300 | |
301 | req->chunk_off++; |
302 | |
303 | return c; |
304 | } |
305 | |
306 | void |
307 | http_req_free(void *reqptr) |
308 | { |
309 | unsigned long *addr = (unsigned long *)reqptr; |
310 | void *ptr = (void *)*addr; |
311 | struct http_request *req = (struct http_request *)ptr; |
312 | |
313 | if (req == NULL) |
314 | return; |
315 | |
316 | _TCPRelease(&req->tcp_iopb, req->tcp_stream, nil, nil, false); |
317 | |
318 | // if (req->message != NULL) |
319 | // xfree(&req->message); |
320 | // xfree(&req->tcp_buf); |
321 | // xfree(&req->url); |
322 | // xfree(reqptr); |
323 | } |