/* * Copyright (c) 2022 joshua stein * * Permission to use, copy, modify, and distribute this software for any * purpose with or without fee is hereby granted, provided that the above * copyright notice and this permission notice appear in all copies. * * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. */ #include "utf8.h" const struct utf8_macroman_pair { unsigned char macroman; utf8_char utf8; } utf8_macroman_pairs[] = { { 'x', { 0xc3, 0x97 } }, { '', { 0xc3, 0xa8 } }, { '', { 0xc3, 0xa9 } }, { '', { 0xc3, 0xb6 } }, { '', { 0xc3, 0xbc } }, { '"', { 0xe2, 0x80, 0x9c } }, { '', { 0xe2, 0x80, 0x93 } }, { '', { 0xe2, 0x80, 0x94 } }, { '\'', { 0xe2, 0x80, 0x98 } }, { '\'', { 0xe2, 0x80, 0x99 } }, { 0, 0 } }; unsigned char utf8_to_macroman(utf8_char *utf8) { short n; short bytes = 0; struct utf8_macroman_pair *p = NULL; if ((*utf8)[0] >= 0xc2 && (*utf8)[0] <= 0xdf && (*utf8)[1] != 0) bytes = 2; else if ((*utf8)[0] >= 0xe0 && (*utf8)[0] <= 0xef && (*utf8)[2] != 0) bytes = 3; else if ((*utf8)[0] >= 0xf0 && (*utf8)[0] <= 0xf4 && (*utf8)[3] != 0) bytes = 4; else if ((*utf8)[0] != 0 && (*utf8)[1] != 0 && (*utf8)[2] != 0 && (*utf8)[3] != 0) return '?'; if (bytes == 0) return 0; for (n = 0; ; n++) { p = (struct utf8_macroman_pair *)&utf8_macroman_pairs[n]; if (p->macroman == 0) break; if (p->utf8[0] != (*utf8)[0]) continue; if (p->utf8[1] != (*utf8)[1]) continue; if (p->utf8[2] == 0 && (*utf8)[2] == 0) return p->macroman; if (p->utf8[2] != (*utf8)[2]) continue; if (p->utf8[3] == 0 && (*utf8)[3] == 0) return p->macroman; if (p->utf8[3] != (*utf8)[3]) continue; if (p->utf8[4] == (*utf8)[4]) return p->macroman; } return '?'; } const utf8_char * macroman_to_utf8(unsigned char c) { short n; struct utf8_macroman_pair *p = NULL; for (n = 0; ; n++) { p = (struct utf8_macroman_pair *)&utf8_macroman_pairs[n]; if (p->macroman == 0) break; if (p->macroman == c) return (const utf8_char *)&p->utf8; } return NULL; } unsigned char * macroman_to_utf8_string(unsigned char *str, size_t len) { const utf8_char *utf8; unsigned char *tmp, *ret, c; size_t ulen, n; const utf8_char *u; tmp = xmalloc((len * 4) + 1); if (tmp == NULL) { warn("utf8: Failed allocating (%ld * 4) + 1", len); return NULL; } ulen = 0; for (n = 0; n < len; n++) { c = str[n]; u = macroman_to_utf8(c); if (u == NULL) { tmp[ulen++] = c; continue; } tmp[ulen++] = (*u)[0]; if ((*u)[1] == 0) continue; tmp[ulen++] = (*u)[1]; if ((*u)[2] == 0) continue; tmp[ulen++] = (*u)[2]; if ((*u)[3] == 0) continue; tmp[ulen++] = (*u)[3]; if ((*u)[4] == 0) continue; tmp[ulen++] = (*u)[4]; } tmp[ulen] = '\0'; ret = (unsigned char *)xstrdup((char *)tmp); xfree(&tmp); if (ret == NULL) warn("utf8: Out of memory!"); return ret; }