AmendHub

Download:

jcs

/

subtext

/

amendments

/

311

zip: Add PKZIP parser, using puff for inflating

This only supports a small subset of the giant PKZIP format spec, but
it's enough to read archives that use a compression type of 8
(inflate). It requires two callbacks, one for deciding whether to
process each file as it is encountered, and one for processing the
in-memory buffer of decompressed data.

jcs made amendment 311 about 1 year ago
--- zip.c Tue Feb 21 17:44:12 2023 +++ zip.c Thu Feb 23 09:55:35 2023 @@ -0,0 +1,210 @@ +/* + * Basic PKZIP parser + * https://pkware.cachefly.net/webdocs/casestudies/APPNOTE.TXT + * https://users.cs.jmu.edu/buchhofp/forensics/formats/pkzip.html + * + * Copyright (c) 2023 joshua stein <jcs@jcs.org> + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ + +#include <fcntl.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> + +#include "logger.h" + +#include "puff.h" +#include "util.h" +#include "zip.h" + +static unsigned char zip_file_magic[] = { 0x50, 0x4b, 0x03, 0x04 }; +static unsigned char zip_dir_magic[] = { 0x50, 0x4b, 0x01, 0x02 }; + +size_t zip_read(short frefnum, void *buf, size_t len); + +size_t +zip_read(short frefnum, void *buf, size_t len) +{ + short error; + long rlen = len; + + error = FSRead(frefnum, &rlen, buf); + if (error) { + warn("error reading zip: %d", frefnum); + return 0; + } + if (rlen != len) + warn("short read on zip: got %ld wanted %ld", rlen, len); + + return rlen; +} + +bool +zip_read_file(Str255 path, zip_extract_decider *decider, + zip_extract_processor *processor) +{ + char buf[32]; + char filename[256]; + unsigned char *comp, *uncomp; + size_t len; + unsigned long comp_len, uncomp_len; + u_int16_t t, fn_len, ex_len; + short ret, error, frefnum; + + error = FSOpen(path, 0, &frefnum); + if (error) { + warn("failed opening %s: %d", PtoCstr(path), error); + CtoPstr(path); + return false; + } + + for (;;) { + if (zip_read(frefnum, &buf, 4) != 4) + goto read_fail; + if (memcmp(buf, &zip_dir_magic, 4) == 0) + break; + if (memcmp(buf, &zip_file_magic, 4) != 0) + goto read_fail; + + /* version */ + if (zip_read(frefnum, &buf, 2) != 2) + goto read_fail; + + /* flags */ + if (zip_read(frefnum, &buf, 2) != 2) + goto read_fail; + t = GET_U16(&buf); + if (t & (1 << 0)) { + logger_printf("[zip] encryption not supported"); + goto read_fail; + } + + /* compression */ + if (zip_read(frefnum, &buf, 2) != 2) + goto read_fail; + t = GET_U16(&buf); + if (t != 8) { + logger_printf("[zip] compression %d not supported", t); + goto read_fail; + } + + /* mod time/date */ + if (zip_read(frefnum, &buf, 4) != 4) + goto read_fail; + + /* crc32 */ + if (zip_read(frefnum, &buf, 4) != 4) + goto read_fail; + + /* compressed size */ + if (zip_read(frefnum, &buf, 4) != 4) + goto read_fail; + comp_len = GET_U32(&buf); + if (comp_len == 0xffffffff) { + logger_printf("[zip] ZIP64 not supported"); + goto read_fail; + } + if (comp_len == 0) { + logger_printf("[zip] data descriptor not supported"); + goto read_fail; + } + + /* uncompressed size */ + if (zip_read(frefnum, &buf, 4) != 4) + goto read_fail; + uncomp_len = GET_U32(&buf); + if (uncomp_len == 0xffffffff) { + logger_printf("[zip] ZIP64 not supported"); + goto read_fail; + } + if (uncomp_len == 0) { + logger_printf("[zip] data descriptor not supported"); + goto read_fail; + } + + /* file name len */ + if (zip_read(frefnum, &buf, 2) != 2) + goto read_fail; + fn_len = GET_U16(&buf); + if (fn_len >= sizeof(buf)) { + logger_printf("[zip] filename len %d too big", fn_len); + goto read_fail; + } + + /* extra field len */ + if (zip_read(frefnum, &buf, 2) != 2) + goto read_fail; + ex_len = GET_U16(&buf); + + /* filename */ + if (fn_len > sizeof(filename)) { + logger_printf("[zip] filename too long (%ld)", fn_len); + goto read_fail; + } + + if (zip_read(frefnum, &filename, fn_len) != fn_len) + goto read_fail; + filename[fn_len] = '\0'; + + /* extra field */ + if (zip_read(frefnum, &buf, ex_len) != ex_len) + goto read_fail; + + if (decider(filename, uncomp_len)) { + /* don't use xmalloc, these are not fatal */ + comp = malloc(comp_len); + if (comp == NULL) { + logger_printf("[zip] failed to malloc(%ld)", comp_len); + goto read_fail; + } + + /* read compressed data */ + if (zip_read(frefnum, comp, comp_len) != comp_len) { + free(comp); + goto read_fail; + } + + uncomp = malloc(uncomp_len); + if (uncomp == NULL) { + logger_printf("[zip] failed to malloc(%ld)", uncomp_len); + free(comp); + goto read_fail; + } + + ret = puff(uncomp, &uncomp_len, comp, &comp_len); + if (ret != 0) { + logger_printf("[zip] unzip failed: %d", ret); + free(comp); + free(uncomp); + goto read_fail; + } + free(comp); + + processor(filename, uncomp_len, uncomp); + free(uncomp); + } else { + /* skip over it */ + SetFPos(frefnum, fsFromMark, comp_len); + } + } + + FSClose(frefnum); + return true; + +read_fail: + FSClose(frefnum); + return false; + +} --- zip.h Tue Feb 21 17:44:22 2023 +++ zip.h Tue Feb 21 17:44:22 2023 @@ -0,0 +1,39 @@ +/* + * Copyright (c) 2023 joshua stein <jcs@jcs.org> + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ + +#ifndef __ZIP_H__ +#define __ZIP_H__ + +#include "puff.h" +#include "util.h" + +#define GET_U16(buf) (u_int16_t)(\ + (((unsigned char *)buf)[1] << 8) | \ + (((unsigned char *)buf)[0])) +#define GET_U32(buf) (u_int32_t)(\ + (((unsigned char *)buf)[3] << 24) | \ + (((unsigned char *)buf)[2] << 16) | \ + (((unsigned char *)buf)[1] << 8) | \ + (((unsigned char *)buf)[0])) + +typedef bool zip_extract_decider(char *filename, size_t extracted_size); +typedef void zip_extract_processor(char *filename, size_t extracted_size, + unsigned char *extracted_data); + +bool zip_read_file(Str255 path, zip_extract_decider *decider, + zip_extract_processor *processor); + +#endif