jcs
/subtext
/amendments
/311
zip: Add PKZIP parser, using puff for inflating
This only supports a small subset of the giant PKZIP format spec, but
it's enough to read archives that use a compression type of 8
(inflate). It requires two callbacks, one for deciding whether to
process each file as it is encountered, and one for processing the
in-memory buffer of decompressed data.
jcs made amendment 311 about 1 year ago
--- zip.c Tue Feb 21 17:44:12 2023
+++ zip.c Thu Feb 23 09:55:35 2023
@@ -0,0 +1,210 @@
+/*
+ * Basic PKZIP parser
+ * https://pkware.cachefly.net/webdocs/casestudies/APPNOTE.TXT
+ * https://users.cs.jmu.edu/buchhofp/forensics/formats/pkzip.html
+ *
+ * Copyright (c) 2023 joshua stein <jcs@jcs.org>
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+
+#include <fcntl.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "logger.h"
+
+#include "puff.h"
+#include "util.h"
+#include "zip.h"
+
+static unsigned char zip_file_magic[] = { 0x50, 0x4b, 0x03, 0x04 };
+static unsigned char zip_dir_magic[] = { 0x50, 0x4b, 0x01, 0x02 };
+
+size_t zip_read(short frefnum, void *buf, size_t len);
+
+size_t
+zip_read(short frefnum, void *buf, size_t len)
+{
+ short error;
+ long rlen = len;
+
+ error = FSRead(frefnum, &rlen, buf);
+ if (error) {
+ warn("error reading zip: %d", frefnum);
+ return 0;
+ }
+ if (rlen != len)
+ warn("short read on zip: got %ld wanted %ld", rlen, len);
+
+ return rlen;
+}
+
+bool
+zip_read_file(Str255 path, zip_extract_decider *decider,
+ zip_extract_processor *processor)
+{
+ char buf[32];
+ char filename[256];
+ unsigned char *comp, *uncomp;
+ size_t len;
+ unsigned long comp_len, uncomp_len;
+ u_int16_t t, fn_len, ex_len;
+ short ret, error, frefnum;
+
+ error = FSOpen(path, 0, &frefnum);
+ if (error) {
+ warn("failed opening %s: %d", PtoCstr(path), error);
+ CtoPstr(path);
+ return false;
+ }
+
+ for (;;) {
+ if (zip_read(frefnum, &buf, 4) != 4)
+ goto read_fail;
+ if (memcmp(buf, &zip_dir_magic, 4) == 0)
+ break;
+ if (memcmp(buf, &zip_file_magic, 4) != 0)
+ goto read_fail;
+
+ /* version */
+ if (zip_read(frefnum, &buf, 2) != 2)
+ goto read_fail;
+
+ /* flags */
+ if (zip_read(frefnum, &buf, 2) != 2)
+ goto read_fail;
+ t = GET_U16(&buf);
+ if (t & (1 << 0)) {
+ logger_printf("[zip] encryption not supported");
+ goto read_fail;
+ }
+
+ /* compression */
+ if (zip_read(frefnum, &buf, 2) != 2)
+ goto read_fail;
+ t = GET_U16(&buf);
+ if (t != 8) {
+ logger_printf("[zip] compression %d not supported", t);
+ goto read_fail;
+ }
+
+ /* mod time/date */
+ if (zip_read(frefnum, &buf, 4) != 4)
+ goto read_fail;
+
+ /* crc32 */
+ if (zip_read(frefnum, &buf, 4) != 4)
+ goto read_fail;
+
+ /* compressed size */
+ if (zip_read(frefnum, &buf, 4) != 4)
+ goto read_fail;
+ comp_len = GET_U32(&buf);
+ if (comp_len == 0xffffffff) {
+ logger_printf("[zip] ZIP64 not supported");
+ goto read_fail;
+ }
+ if (comp_len == 0) {
+ logger_printf("[zip] data descriptor not supported");
+ goto read_fail;
+ }
+
+ /* uncompressed size */
+ if (zip_read(frefnum, &buf, 4) != 4)
+ goto read_fail;
+ uncomp_len = GET_U32(&buf);
+ if (uncomp_len == 0xffffffff) {
+ logger_printf("[zip] ZIP64 not supported");
+ goto read_fail;
+ }
+ if (uncomp_len == 0) {
+ logger_printf("[zip] data descriptor not supported");
+ goto read_fail;
+ }
+
+ /* file name len */
+ if (zip_read(frefnum, &buf, 2) != 2)
+ goto read_fail;
+ fn_len = GET_U16(&buf);
+ if (fn_len >= sizeof(buf)) {
+ logger_printf("[zip] filename len %d too big", fn_len);
+ goto read_fail;
+ }
+
+ /* extra field len */
+ if (zip_read(frefnum, &buf, 2) != 2)
+ goto read_fail;
+ ex_len = GET_U16(&buf);
+
+ /* filename */
+ if (fn_len > sizeof(filename)) {
+ logger_printf("[zip] filename too long (%ld)", fn_len);
+ goto read_fail;
+ }
+
+ if (zip_read(frefnum, &filename, fn_len) != fn_len)
+ goto read_fail;
+ filename[fn_len] = '\0';
+
+ /* extra field */
+ if (zip_read(frefnum, &buf, ex_len) != ex_len)
+ goto read_fail;
+
+ if (decider(filename, uncomp_len)) {
+ /* don't use xmalloc, these are not fatal */
+ comp = malloc(comp_len);
+ if (comp == NULL) {
+ logger_printf("[zip] failed to malloc(%ld)", comp_len);
+ goto read_fail;
+ }
+
+ /* read compressed data */
+ if (zip_read(frefnum, comp, comp_len) != comp_len) {
+ free(comp);
+ goto read_fail;
+ }
+
+ uncomp = malloc(uncomp_len);
+ if (uncomp == NULL) {
+ logger_printf("[zip] failed to malloc(%ld)", uncomp_len);
+ free(comp);
+ goto read_fail;
+ }
+
+ ret = puff(uncomp, &uncomp_len, comp, &comp_len);
+ if (ret != 0) {
+ logger_printf("[zip] unzip failed: %d", ret);
+ free(comp);
+ free(uncomp);
+ goto read_fail;
+ }
+ free(comp);
+
+ processor(filename, uncomp_len, uncomp);
+ free(uncomp);
+ } else {
+ /* skip over it */
+ SetFPos(frefnum, fsFromMark, comp_len);
+ }
+ }
+
+ FSClose(frefnum);
+ return true;
+
+read_fail:
+ FSClose(frefnum);
+ return false;
+
+}
--- zip.h Tue Feb 21 17:44:22 2023
+++ zip.h Tue Feb 21 17:44:22 2023
@@ -0,0 +1,39 @@
+/*
+ * Copyright (c) 2023 joshua stein <jcs@jcs.org>
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+
+#ifndef __ZIP_H__
+#define __ZIP_H__
+
+#include "puff.h"
+#include "util.h"
+
+#define GET_U16(buf) (u_int16_t)(\
+ (((unsigned char *)buf)[1] << 8) | \
+ (((unsigned char *)buf)[0]))
+#define GET_U32(buf) (u_int32_t)(\
+ (((unsigned char *)buf)[3] << 24) | \
+ (((unsigned char *)buf)[2] << 16) | \
+ (((unsigned char *)buf)[1] << 8) | \
+ (((unsigned char *)buf)[0]))
+
+typedef bool zip_extract_decider(char *filename, size_t extracted_size);
+typedef void zip_extract_processor(char *filename, size_t extracted_size,
+ unsigned char *extracted_data);
+
+bool zip_read_file(Str255 path, zip_extract_decider *decider,
+ zip_extract_processor *processor);
+
+#endif