/* * Copyright (c) 2007-2014, Lloyd Hilaiel <me@lloyd.io> * * Permission to use, copy, modify, and/or distribute this software for any * purpose with or without fee is hereby granted, provided that the above * copyright notice and this permission notice appear in all copies. * * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. */ #include "yajl_encode.h" #include <assert.h> #include <stdlib.h> #include <string.h> #include <stdio.h> static void CharToHex(unsigned char c, char * hexBuf) { const char * hexchar = "0123456789ABCDEF"; hexBuf[0] = hexchar[c >> 4]; hexBuf[1] = hexchar[c & 0x0F]; } void yajl_string_encode(const yajl_print_t print, void * ctx, const unsigned char * str, size_t len, int escape_solidus) { size_t beg = 0; size_t end = 0; char hexBuf[7]; hexBuf[0] = '\\'; hexBuf[1] = 'u'; hexBuf[2] = '0'; hexBuf[3] = '0'; hexBuf[6] = 0; while (end < len) { const char * escaped = NULL; switch (str[end]) { case '\r': escaped = "\\r"; break; case '\n': escaped = "\\n"; break; case '\\': escaped = "\\\\"; break; /* it is not required to escape a solidus in JSON: * read sec. 2.5: http://www.ietf.org/rfc/rfc4627.txt * specifically, this production from the grammar: * unescaped = %x20-21 / %x23-5B / %x5D-10FFFF */ case '/': if (escape_solidus) escaped = "\\/"; break; case '"': escaped = "\\\""; break; case '\f': escaped = "\\f"; break; case '\b': escaped = "\\b"; break; case '\t': escaped = "\\t"; break; default: if ((unsigned char) str[end] < 32) { CharToHex(str[end], hexBuf + 4); escaped = hexBuf; } break; } if (escaped != NULL) { print(ctx, (const char *) (str + beg), end - beg); print(ctx, escaped, (unsigned int)strlen(escaped)); beg = ++end; } else { ++end; } } print(ctx, (const char *) (str + beg), end - beg); } static void hexToDigit(unsigned int * val, const unsigned char * hex) { unsigned int i; for (i=0;i<4;i++) { unsigned char c = hex[i]; if (c >= 'A') c = (c & ~0x20) - 7; c -= '0'; assert(!(c & 0xF0)); *val = (*val << 4) | c; } } static void Utf32toUtf8(unsigned int codepoint, char * utf8Buf) { if (codepoint < 0x80) { utf8Buf[0] = (char) codepoint; utf8Buf[1] = 0; } else if (codepoint < 0x0800) { utf8Buf[0] = (char) ((codepoint >> 6) | 0xC0); utf8Buf[1] = (char) ((codepoint & 0x3F) | 0x80); utf8Buf[2] = 0; } else if (codepoint < 0x10000) { utf8Buf[0] = (char) ((codepoint >> 12) | 0xE0); utf8Buf[1] = (char) (((codepoint >> 6) & 0x3F) | 0x80); utf8Buf[2] = (char) ((codepoint & 0x3F) | 0x80); utf8Buf[3] = 0; } else if (codepoint < 0x200000) { utf8Buf[0] =(char)((codepoint >> 18) | 0xF0); utf8Buf[1] =(char)(((codepoint >> 12) & 0x3F) | 0x80); utf8Buf[2] =(char)(((codepoint >> 6) & 0x3F) | 0x80); utf8Buf[3] =(char)((codepoint & 0x3F) | 0x80); utf8Buf[4] = 0; } else { utf8Buf[0] = '?'; utf8Buf[1] = 0; } } void yajl_string_decode(yajl_buf buf, const unsigned char * str, size_t len) { size_t beg = 0; size_t end = 0; while (end < len) { if (str[end] == '\\') { char utf8Buf[5]; const char * unescaped = "?"; yajl_buf_append(buf, str + beg, end - beg); switch (str[++end]) { case 'r': unescaped = "\r"; break; case 'n': unescaped = "\n"; break; case '\\': unescaped = "\\"; break; case '/': unescaped = "/"; break; case '"': unescaped = "\""; break; case 'f': unescaped = "\f"; break; case 'b': unescaped = "\b"; break; case 't': unescaped = "\t"; break; case 'u': { unsigned int codepoint = 0; hexToDigit(&codepoint, str + ++end); end+=3; /* check if this is a surrogate */ if ((codepoint & 0xFC00) == 0xD800) { end++; if (str[end] == '\\' && str[end + 1] == 'u') { unsigned int surrogate = 0; hexToDigit(&surrogate, str + end + 2); codepoint = (((codepoint & 0x3F) << 10) | ((((codepoint >> 6) & 0xF) + 1) << 16) | (surrogate & 0x3FF)); end += 5; } else { unescaped = "?"; break; } } Utf32toUtf8(codepoint, utf8Buf); unescaped = utf8Buf; if (codepoint == 0) { yajl_buf_append(buf, unescaped, 1); beg = ++end; continue; } break; } default: assert("this should never happen" == NULL); } yajl_buf_append(buf, unescaped, (unsigned int)strlen(unescaped)); beg = ++end; } else { end++; } } yajl_buf_append(buf, str + beg, end - beg); } #define ADV_PTR s++; if (!(len--)) return 0; int yajl_string_validate_utf8(const unsigned char * s, size_t len) { if (!len) return 1; if (!s) return 0; while (len--) { /* single byte */ if (*s <= 0x7f) { /* noop */ } /* two byte */ else if ((*s >> 5) == 0x6) { ADV_PTR; if (!((*s >> 6) == 0x2)) return 0; } /* three byte */ else if ((*s >> 4) == 0x0e) { ADV_PTR; if (!((*s >> 6) == 0x2)) return 0; ADV_PTR; if (!((*s >> 6) == 0x2)) return 0; } /* four byte */ else if ((*s >> 3) == 0x1e) { ADV_PTR; if (!((*s >> 6) == 0x2)) return 0; ADV_PTR; if (!((*s >> 6) == 0x2)) return 0; ADV_PTR; if (!((*s >> 6) == 0x2)) return 0; } else { return 0; } s++; } return 1; }