diff options
author | Daniil Cherednik <dan.cherednik@gmail.com> | 2023-05-05 11:09:01 +0300 |
---|---|---|
committer | Daniil Cherednik <dan.cherednik@gmail.com> | 2023-05-05 11:09:01 +0300 |
commit | b5a989b16cafa8a3b3bc076f1097a0eda6f48c06 (patch) | |
tree | 4da744117a5aab37758921fa43b95a3068e5aec1 /contrib/libs/libfyaml/src/lib/fy-token.c | |
parent | fc1cffcfa7f0497a1f97b384a24bcbf23362f3be (diff) | |
download | ydb-b5a989b16cafa8a3b3bc076f1097a0eda6f48c06.tar.gz |
Ydb stable 23-1-2623.1.26
x-stable-origin-commit: 22184a7e157553d447f17a2dffc4ea2d32dfd74d
Diffstat (limited to 'contrib/libs/libfyaml/src/lib/fy-token.c')
-rw-r--r-- | contrib/libs/libfyaml/src/lib/fy-token.c | 1869 |
1 files changed, 1869 insertions, 0 deletions
diff --git a/contrib/libs/libfyaml/src/lib/fy-token.c b/contrib/libs/libfyaml/src/lib/fy-token.c new file mode 100644 index 0000000000..91efa89f33 --- /dev/null +++ b/contrib/libs/libfyaml/src/lib/fy-token.c @@ -0,0 +1,1869 @@ +/* + * fy-token.c - YAML token methods + * + * Copyright (c) 2019 Pantelis Antoniou <pantelis.antoniou@konsulko.com> + * + * SPDX-License-Identifier: MIT + */ +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif + +#include <stdio.h> +#include <string.h> +#include <sys/types.h> +#include <sys/stat.h> +#include <assert.h> +#include <stdlib.h> +#include <errno.h> +#include <stdarg.h> + +#include <libfyaml.h> + +#include "fy-parse.h" + +#include "fy-ctype.h" +#include "fy-utf8.h" +#include "fy-emit-accum.h" + +#include "fy-walk.h" + +#include "fy-token.h" + +enum fy_scalar_style fy_token_scalar_style(struct fy_token *fyt) +{ + return fy_token_scalar_style_inline(fyt); +} + +enum fy_token_type fy_token_get_type(struct fy_token *fyt) +{ + return fy_token_get_type_inline(fyt); +} + +void fy_token_clean_rl(struct fy_token_list *fytl, struct fy_token *fyt) +{ + int i; + + if (!fyt) + return; + + /* release reference */ + fy_input_unref(fyt->handle.fyi); + fyt->handle.fyi = NULL; + + /* release comment references */ + if (fyt->comment) { + for (i = 0; i < fycp_max; i++) + fy_input_unref(fyt->comment[i].fyi); + free(fyt->comment); + fyt->comment = NULL; + } + + switch (fyt->type) { + case FYTT_TAG: + fy_token_unref(fyt->tag.fyt_td); + fyt->tag.fyt_td = NULL; + if (fyt->tag.handle0) { + free(fyt->tag.handle0); + fyt->tag.handle0 = NULL; + } + if (fyt->tag.suffix0) { + free(fyt->tag.suffix0); + fyt->tag.suffix0 = NULL; + } + break; + + case FYTT_TAG_DIRECTIVE: + if (fyt->tag_directive.prefix0) { + free(fyt->tag_directive.prefix0); + fyt->tag_directive.prefix0 = NULL; + } + if (fyt->tag_directive.handle0) { + free(fyt->tag_directive.handle0); + fyt->tag_directive.handle0 = NULL; + } + break; + + case FYTT_PE_MAP_KEY: + fy_document_destroy(fyt->map_key.fyd); + fyt->map_key.fyd = NULL; + break; + + case FYTT_SCALAR: + if (fyt->scalar.path_key_storage) { + free(fyt->scalar.path_key_storage); + fyt->scalar.path_key_storage = NULL; + } + break; + + case FYTT_ALIAS: + if (fyt->alias.expr) { + fy_path_expr_free(fyt->alias.expr); + fyt->alias.expr = NULL; + } + break; + + default: + break; + } + + if (fyt->text0) { + free(fyt->text0); + fyt->text0 = NULL; + } + + fyt->type = FYTT_NONE; + fyt->analyze_flags = 0; + fyt->text_len = 0; + fyt->text = NULL; +} + +void fy_token_list_unref_all_rl(struct fy_token_list *fytl, struct fy_token_list *fytl_tofree) +{ + struct fy_token *fyt; + + while ((fyt = fy_token_list_pop(fytl_tofree)) != NULL) + fy_token_unref_rl(fytl, fyt); +} + +static bool fy_token_text_needs_rebuild(struct fy_token *fyt) +{ + const struct fy_atom *fya; + + if (!fy_token_text_is_direct(fyt)) + return false; + + fya = fy_token_atom(fyt); + if (!fya || !fya->fyi) + return false; + + return fya->fyi_generation != fya->fyi->generation; +} + +static int fy_tag_token_format_internal(const struct fy_token *fyt, void *out, size_t *outszp) +{ + char *o = NULL, *oe = NULL; + size_t outsz; + const char *handle, *suffix; + size_t handle_size, suffix_size; + int len, code_length, rlen; + uint8_t code[4]; + const char *t, *s, *e; + + if (!fyt || fyt->type != FYTT_TAG) + return 0; + + if (out && *outszp <= 0) + return 0; + + if (out) { + outsz = *outszp; + o = out; + oe = (char *)out + outsz; + } + + if (!fyt->tag.fyt_td) + return -1; + + handle = fy_tag_directive_token_prefix(fyt->tag.fyt_td, &handle_size); + if (!handle) + return -1; + + suffix = fy_atom_data(&fyt->handle) + fyt->tag.skip + fyt->tag.handle_length; + suffix_size = fyt->tag.suffix_length; + +#define O_CPY(_src, _len) \ + do { \ + int _l = (_len); \ + if (o && _l) { \ + int _cl = _l; \ + if (_cl > (oe - o)) \ + _cl = oe - o; \ + memcpy(o, (_src), _cl); \ + o += _cl; \ + } \ + len += _l; \ + } while(0) + + len = 0; + O_CPY(handle, handle_size); + + /* escape suffix as a URI */ + s = suffix; + e = s + suffix_size; + while (s < e) { + /* find next escape */ + t = memchr(s, '%', e - s); + rlen = (t ? t : e) - s; + O_CPY(s, rlen); + + /* end of string */ + if (!t) + break; + s = t; + + code_length = sizeof(code); + t = fy_uri_esc(s, e - s, code, &code_length); + if (!t) + break; + + /* output escaped utf8 */ + O_CPY(code, code_length); + s = t; + } + +#undef O_CPY + return len; + +} + +int fy_tag_token_format_text_length(const struct fy_token *fyt) +{ + return fy_tag_token_format_internal(fyt, NULL, NULL); +} + +const char *fy_tag_token_format_text(const struct fy_token *fyt, char *buf, size_t maxsz) +{ + fy_tag_token_format_internal(fyt, buf, &maxsz); + return buf; +} + +static int fy_tag_directive_token_format_internal(const struct fy_token *fyt, + void *out, size_t *outszp) +{ + char *o = NULL, *oe = NULL; + size_t outsz; + int len; + const char *handle, *prefix; + size_t handle_size, prefix_size; + + if (!fyt || fyt->type != FYTT_TAG_DIRECTIVE) + return 0; + + if (out && *outszp <= 0) + return 0; + + if (out) { + outsz = *outszp; + o = out; + oe = (char *)out + outsz; + } + +#define O_CPY(_src, _len) \ + do { \ + int _l = (_len); \ + if (o && _l) { \ + int _cl = _l; \ + if (_cl > (oe - o)) \ + _cl = oe - o; \ + memcpy(o, (_src), _cl); \ + o += _cl; \ + } \ + len += _l; \ + } while(0) + + len = 0; + + handle = fy_atom_data(&fyt->handle); + handle_size = fy_atom_size(&fyt->handle); + + prefix = handle + handle_size - fyt->tag_directive.uri_length; + prefix_size = fyt->tag_directive.uri_length; + handle_size = fyt->tag_directive.tag_length; + + if (handle_size) + O_CPY(handle, handle_size); + else + O_CPY("!<", 2); + O_CPY(prefix, prefix_size); + if (!handle_size) + O_CPY(">", 1); + +#undef O_CPY + return len; + +} + +int fy_tag_directive_token_format_text_length(const struct fy_token *fyt) +{ + return fy_tag_directive_token_format_internal(fyt, NULL, NULL); +} + +const char *fy_tag_directive_token_format_text(const struct fy_token *fyt, char *buf, size_t maxsz) +{ + fy_tag_directive_token_format_internal(fyt, buf, &maxsz); + return buf; +} + +const char *fy_tag_directive_token_prefix(struct fy_token *fyt, size_t *lenp) +{ + const char *ptr; + size_t len; + + if (!fyt || fyt->type != FYTT_TAG_DIRECTIVE) { + *lenp = 0; + return NULL; + } + ptr = fy_atom_data(&fyt->handle); + len = fy_atom_size(&fyt->handle); + ptr = ptr + len - fyt->tag_directive.uri_length; + *lenp = fyt->tag_directive.uri_length; + + return ptr; +} + +const char *fy_tag_directive_token_prefix0(struct fy_token *fyt) +{ + char *text0; + const char *text; + size_t len; + + if (!fyt || fyt->type != FYTT_TAG_DIRECTIVE) + return NULL; + + /* use the cache if it's there (and doesn't need a rebuild) */ + if (fyt->tag_directive.prefix0 && !fy_token_text_needs_rebuild(fyt)) + return fyt->tag_directive.prefix0; + + if (fyt->tag_directive.prefix0) { + free(fyt->tag_directive.prefix0); + fyt->tag_directive.prefix0 = NULL; + } + + text = fy_tag_directive_token_prefix(fyt, &len); + if (!text) + return NULL; + + text0 = malloc(len + 1); + if (!text0) + return NULL; + memcpy(text0, text, len); + text0[len] = '\0'; + + fyt->tag_directive.prefix0 = text0; + + return fyt->tag_directive.prefix0; +} + +const char *fy_tag_directive_token_handle(struct fy_token *fyt, size_t *lenp) +{ + const char *ptr; + + if (!fyt || fyt->type != FYTT_TAG_DIRECTIVE) { + *lenp = 0; + return NULL; + } + ptr = fy_atom_data(&fyt->handle); + *lenp = fyt->tag_directive.tag_length; + return ptr; +} + +const char *fy_tag_directive_token_handle0(struct fy_token *fyt) +{ + char *text0; + const char *text; + size_t len; + + if (!fyt || fyt->type != FYTT_TAG_DIRECTIVE) + return NULL; + + /* use the cache if it's there (and doesn't need a rebuild) */ + if (fyt->tag_directive.handle0 && !fy_token_text_needs_rebuild(fyt)) + return fyt->tag_directive.handle0; + + if (fyt->tag_directive.handle0) { + free(fyt->tag_directive.handle0); + fyt->tag_directive.handle0 = NULL; + } + + text = fy_tag_directive_token_handle(fyt, &len); + if (!text) + return NULL; + + text0 = malloc(len + 1); + if (!text0) + return NULL; + memcpy(text0, text, len); + text0[len] = '\0'; + + fyt->tag_directive.handle0 = text0; + + return fyt->tag_directive.handle0; +} + +struct fy_token *fy_token_vcreate_rl(struct fy_token_list *fytl, enum fy_token_type type, va_list ap) +{ + struct fy_token *fyt = NULL; + struct fy_atom *handle; + struct fy_token *fyt_td; + + if ((unsigned int)type >= FYTT_COUNT) + goto err_out; + + fyt = fy_token_alloc_rl(fytl); + if (!fyt) + goto err_out; + fyt->type = type; + + handle = va_arg(ap, struct fy_atom *); + if (handle) + fyt->handle = *handle; + else + fy_atom_reset(&fyt->handle); + + switch (fyt->type) { + case FYTT_TAG_DIRECTIVE: + fyt->tag_directive.tag_length = va_arg(ap, unsigned int); + fyt->tag_directive.uri_length = va_arg(ap, unsigned int); + fyt->tag_directive.is_default = va_arg(ap, int) ? true : false; + fyt->tag_directive.prefix0 = NULL; + fyt->tag_directive.handle0 = NULL; + break; + case FYTT_SCALAR: + fyt->scalar.style = va_arg(ap, enum fy_scalar_style); + if (fyt->scalar.style != FYSS_ANY && (unsigned int)fyt->scalar.style >= FYSS_MAX) + goto err_out; + fyt->scalar.path_key = NULL; + fyt->scalar.path_key_len = 0; + fyt->scalar.path_key_storage = NULL; + break; + case FYTT_TAG: + fyt->tag.skip = va_arg(ap, unsigned int); + fyt->tag.handle_length = va_arg(ap, unsigned int); + fyt->tag.suffix_length = va_arg(ap, unsigned int); + + fyt_td = va_arg(ap, struct fy_token *); + if (!fyt_td) + goto err_out; + fyt->tag.fyt_td = fy_token_ref(fyt_td); + fyt->tag.handle0 = NULL; + fyt->tag.suffix0 = NULL; + break; + + case FYTT_VERSION_DIRECTIVE: + fyt->version_directive.vers = *va_arg(ap, struct fy_version *); + break; + + case FYTT_ALIAS: + fyt->alias.expr = va_arg(ap, struct fy_path_expr *); + break; + + case FYTT_KEY: + fyt->key.flow_level = va_arg(ap, int); + break; + + case FYTT_PE_MAP_KEY: + fyt->map_key.fyd = va_arg(ap, struct fy_document *); + break; + + case FYTT_PE_SEQ_INDEX: + fyt->seq_index.index = va_arg(ap, int); + break; + + case FYTT_PE_SEQ_SLICE: + fyt->seq_slice.start_index = va_arg(ap, int); + fyt->seq_slice.end_index = va_arg(ap, int); + break; + + case FYTT_NONE: + goto err_out; + + default: + break; + } + + if (fyt->handle.fyi) + fy_input_ref(fyt->handle.fyi); + + return fyt; + +err_out: + fy_token_unref(fyt); + + return NULL; +} + +struct fy_token *fy_token_create_rl(struct fy_token_list *fytl, enum fy_token_type type, ...) +{ + struct fy_token *fyt; + va_list ap; + + va_start(ap, type); + fyt = fy_token_vcreate_rl(fytl, type, ap); + va_end(ap); + + return fyt; +} + +struct fy_token *fy_token_vcreate(enum fy_token_type type, va_list ap) +{ + return fy_token_vcreate_rl(NULL, type, ap); +} + +struct fy_token *fy_token_create(enum fy_token_type type, ...) +{ + struct fy_token *fyt; + va_list ap; + + va_start(ap, type); + fyt = fy_token_vcreate_rl(NULL, type, ap); + va_end(ap); + + return fyt; +} + +struct fy_token *fy_parse_token_create(struct fy_parser *fyp, enum fy_token_type type, ...) +{ + struct fy_token *fyt; + va_list ap; + + if (!fyp) + return NULL; + + va_start(ap, type); + fyt = fy_token_vcreate_rl(fyp->recycled_token_list, type, ap); + va_end(ap); + + return fyt; +} + +int fy_token_format_text_length(struct fy_token *fyt) +{ + int length; + + if (!fyt) + return 0; + + switch (fyt->type) { + + case FYTT_TAG: + return fy_tag_token_format_text_length(fyt); + + case FYTT_TAG_DIRECTIVE: + return fy_tag_directive_token_format_text_length(fyt); + + default: + break; + } + + length = fy_atom_format_text_length(&fyt->handle); + + return length; +} + +const char *fy_token_format_text(struct fy_token *fyt, char *buf, size_t maxsz) +{ + const char *str; + + if (maxsz == 0) + return buf; + + if (!fyt) { + if (maxsz > 0) + buf[0] = '\0'; + return buf; + } + + switch (fyt->type) { + + case FYTT_TAG: + return fy_tag_token_format_text(fyt, buf, maxsz); + + case FYTT_TAG_DIRECTIVE: + return fy_tag_directive_token_format_text(fyt, buf, maxsz); + + default: + break; + } + + str = fy_atom_format_text(&fyt->handle, buf, maxsz); + + return str; +} + +int fy_token_format_utf8_length(struct fy_token *fyt) +{ + const char *str; + size_t len; + + if (!fyt) + return 0; + + switch (fyt->type) { + + case FYTT_TAG: + case FYTT_TAG_DIRECTIVE: + str = fy_token_get_text(fyt, &len); + if (!str) + return 0; + return fy_utf8_count(str, len); + + default: + break; + } + + return fy_atom_format_utf8_length(&fyt->handle); +} + + +struct fy_atom *fy_token_atom(struct fy_token *fyt) +{ + return fyt ? &fyt->handle : NULL; +} + +const struct fy_mark *fy_token_start_mark(struct fy_token *fyt) +{ + const struct fy_atom *atom; + + atom = fy_token_atom(fyt); + if (atom) + return &atom->start_mark; + + /* something we don't track */ + return NULL; +} + +const struct fy_mark *fy_token_end_mark(struct fy_token *fyt) +{ + const struct fy_atom *atom; + + atom = fy_token_atom(fyt); + if (atom) + return &atom->end_mark; + + /* something we don't track */ + return NULL; +} + +int fy_token_text_analyze(struct fy_token *fyt) +{ + const char *s, *e; + const char *value = NULL; + enum fy_atom_style style; + int c, w, cn, cp, col; + size_t len; + int flags; + + if (!fyt) + return FYTTAF_CAN_BE_SIMPLE_KEY | FYTTAF_DIRECT_OUTPUT | + FYTTAF_EMPTY | FYTTAF_CAN_BE_DOUBLE_QUOTED; + + if (fyt->analyze_flags) + return fyt->analyze_flags; + + /* only tokens that can generate text */ + if (fyt->type != FYTT_SCALAR && + fyt->type != FYTT_TAG && + fyt->type != FYTT_ANCHOR && + fyt->type != FYTT_ALIAS) { + flags = FYTTAF_NO_TEXT_TOKEN; + fyt->analyze_flags = flags; + return flags; + } + + flags = FYTTAF_TEXT_TOKEN; + + style = fy_token_atom_style(fyt); + + /* can this token be a simple key initial condition */ + if (!fy_atom_style_is_block(style) && style != FYAS_URI) + flags |= FYTTAF_CAN_BE_SIMPLE_KEY; + + /* can this token be directly output initial condition */ + if (!fy_atom_style_is_block(style)) + flags |= FYTTAF_DIRECT_OUTPUT; + + /* get value */ + value = fy_token_get_text(fyt, &len); + if (!value || len == 0) { + flags |= FYTTAF_EMPTY | FYTTAF_CAN_BE_DOUBLE_QUOTED | FYTTAF_CAN_BE_UNQUOTED_PATH_KEY; + fyt->analyze_flags = flags; + return flags; + } + + flags |= FYTTAF_CAN_BE_PLAIN | + FYTTAF_CAN_BE_SINGLE_QUOTED | + FYTTAF_CAN_BE_DOUBLE_QUOTED | + FYTTAF_CAN_BE_LITERAL | + FYTTAF_CAN_BE_LITERAL | + FYTTAF_CAN_BE_FOLDED | + FYTTAF_CAN_BE_PLAIN_FLOW | + FYTTAF_CAN_BE_UNQUOTED_PATH_KEY; + + /* start with document indicators must be quoted at indent 0 */ + if (len >= 3 && (!memcmp(value, "---", 3) || !memcmp(value, "...", 3))) + flags |= FYTTAF_QUOTE_AT_0; + + s = value; + e = value + len; + + col = 0; + + /* get first character */ + cn = fy_utf8_get(s, e - s, &w); + s += w; + col = fy_token_is_lb(fyt, cn) ? 0 : (col + 1); + + /* disable folded right off the bat, it's a pain */ + flags &= ~FYTTAF_CAN_BE_FOLDED; + + /* plain scalars can't start with any indicator (or space/lb) */ + if ((flags & (FYTTAF_CAN_BE_PLAIN | FYTTAF_CAN_BE_PLAIN_FLOW)) && + (fy_is_indicator(cn) || fy_token_is_lb(fyt, cn) || fy_is_ws(cn))) + flags &= ~(FYTTAF_CAN_BE_PLAIN | + FYTTAF_CAN_BE_PLAIN_FLOW); + + /* plain scalars in flow mode can't start with a flow indicator */ + if ((flags & FYTTAF_CAN_BE_PLAIN_FLOW) && + fy_is_flow_indicator(cn)) + flags &= ~FYTTAF_CAN_BE_PLAIN_FLOW; + + /* plain unquoted path keys can only start with [a-zA-Z_] */ + if ((flags & FYTTAF_CAN_BE_UNQUOTED_PATH_KEY) && + !fy_is_first_alpha(cn)) + flags &= ~FYTTAF_CAN_BE_UNQUOTED_PATH_KEY; + + cp = -1; + for (c = cn; c >= 0; s += w, cp = c, c = cn) { + + /* can be -1 on end */ + cn = fy_utf8_get(s, e - s, &w); + + /* zero can't be output, only in double quoted mode */ + if (c == 0) { + flags &= ~(FYTTAF_DIRECT_OUTPUT | + FYTTAF_CAN_BE_PLAIN | + FYTTAF_CAN_BE_SINGLE_QUOTED | + FYTTAF_CAN_BE_LITERAL | + FYTTAF_CAN_BE_FOLDED | + FYTTAF_CAN_BE_PLAIN_FLOW | + FYTTAF_CAN_BE_UNQUOTED_PATH_KEY); + flags |= FYTTAF_CAN_BE_DOUBLE_QUOTED; + + } else if (fy_is_ws(c)) { + + flags |= FYTTAF_HAS_WS; + if (fy_is_ws(cn)) + flags |= FYTTAF_HAS_CONSECUTIVE_WS; + + } else if (fy_token_is_lb(fyt, c)) { + + flags |= FYTTAF_HAS_LB; + if (fy_token_is_lb(fyt, cn)) + flags |= FYTTAF_HAS_CONSECUTIVE_LB; + + /* only non linebreaks can be simple keys */ + flags &= ~FYTTAF_CAN_BE_SIMPLE_KEY; + + /* anything with linebreaks, can't be direct */ + flags &= ~FYTTAF_DIRECT_OUTPUT; + } + + if ((flags & FYTTAF_CAN_BE_UNQUOTED_PATH_KEY) && !fy_is_alnum(c)) + flags &= ~FYTTAF_CAN_BE_UNQUOTED_PATH_KEY; + + /* illegal plain combination */ + if ((flags & FYTTAF_CAN_BE_PLAIN) && + ((c == ':' && fy_is_blankz_m(cn, fy_token_atom_lb_mode(fyt))) || + (fy_is_blankz_m(c, fy_token_atom_lb_mode(fyt)) && cn == '#') || + (cp < 0 && c == '#' && cn < 0) || + !fy_is_print(c))) { + flags &= ~(FYTTAF_CAN_BE_PLAIN | + FYTTAF_CAN_BE_PLAIN_FLOW); + } + + /* illegal plain flow combination */ + if ((flags & FYTTAF_CAN_BE_PLAIN_FLOW) && + (fy_is_flow_indicator(c) || (c == ':' && fy_is_flow_indicator(cn)))) + flags &= ~FYTTAF_CAN_BE_PLAIN_FLOW; + + /* non printable characters, turn off these styles */ + if ((flags & (FYTTAF_CAN_BE_SINGLE_QUOTED | + FYTTAF_CAN_BE_LITERAL | + FYTTAF_CAN_BE_FOLDED)) && !fy_is_print(c)) + flags &= ~(FYTTAF_CAN_BE_SINGLE_QUOTED | + FYTTAF_CAN_BE_LITERAL | + FYTTAF_CAN_BE_FOLDED); + + /* if there's an escape, it can't be direct */ + if ((flags & FYTTAF_DIRECT_OUTPUT) && + ((style == FYAS_URI && c == '%') || + (style == FYAS_SINGLE_QUOTED && c == '\'') || + (style == FYAS_DOUBLE_QUOTED && c == '\\'))) + flags &= ~FYTTAF_DIRECT_OUTPUT; + + col = fy_token_is_lb(fyt, c) ? 0 : (col + 1); + + /* last character */ + if (cn < 0) { + /* if ends with whitespace or linebreak, can't be plain */ + if (fy_is_ws(cn) || fy_token_is_lb(fyt, cn)) + flags &= ~(FYTTAF_CAN_BE_PLAIN | + FYTTAF_CAN_BE_PLAIN_FLOW); + } + } + + fyt->analyze_flags = flags; + return flags; +} + +const char *fy_tag_token_get_directive_handle(struct fy_token *fyt, size_t *td_handle_sizep) +{ + if (!fyt || fyt->type != FYTT_TAG || !fyt->tag.fyt_td) + return NULL; + + return fy_tag_directive_token_handle(fyt->tag.fyt_td, td_handle_sizep); +} + +const char *fy_tag_token_get_directive_prefix(struct fy_token *fyt, size_t *td_prefix_sizep) +{ + if (!fyt || fyt->type != FYTT_TAG || !fyt->tag.fyt_td) + return NULL; + + return fy_tag_directive_token_prefix(fyt->tag.fyt_td, td_prefix_sizep); +} + +const char *fy_token_get_direct_output(struct fy_token *fyt, size_t *sizep) +{ + const struct fy_atom *fya; + + fya = fy_token_atom(fyt); + if (!fya || !fya->direct_output || + (fyt->type == FYTT_TAG || fyt->type == FYTT_TAG_DIRECTIVE) ) { + *sizep = 0; + return NULL; + } + *sizep = fy_atom_size(fya); + return fy_atom_data(fya); +} + +const char *fy_tag_token_handle(struct fy_token *fyt, size_t *lenp) +{ + return fy_tag_token_get_directive_handle(fyt, lenp); +} + +const char *fy_tag_token_suffix(struct fy_token *fyt, size_t *lenp) +{ + const char *tag, *prefix, *handle, *suffix; + size_t tag_len, prefix_len, handle_len, suffix_len; + + if (!fyt || fyt->type != FYTT_TAG) { + *lenp = 0; + return NULL; + } + + tag = fy_token_get_text(fyt, &tag_len); + if (!tag) + return NULL; + prefix = fy_tag_token_get_directive_prefix(fyt, &prefix_len); + if (!prefix) + return NULL; + handle = fy_tag_token_handle(fyt, &handle_len); + if (!handle || !handle_len) { + suffix = tag; + suffix_len = tag_len; + } else { + assert(prefix_len <= tag_len); + assert(tag_len >= prefix_len); + suffix = tag + prefix_len; + suffix_len = tag_len - prefix_len; + } + *lenp = suffix_len; + return suffix; +} + +const char *fy_tag_token_handle0(struct fy_token *fyt) +{ + char *text0; + const char *text; + size_t len; + + if (!fyt || fyt->type != FYTT_TAG) + return NULL; + + /* use the cache if it's there (and doesn't need a rebuild) */ + if (fyt->tag.handle0 && !fy_token_text_needs_rebuild(fyt)) + return fyt->tag.handle0; + + if (fyt->tag.handle0) { + free(fyt->tag.handle0); + fyt->tag.handle0 = NULL; + } + + text = fy_tag_token_handle(fyt, &len); + if (!text) + return NULL; + + text0 = malloc(len + 1); + if (!text0) + return NULL; + memcpy(text0, text, len); + text0[len] = '\0'; + + fyt->tag.handle0 = text0; + + return fyt->tag.handle0; +} + +const char *fy_tag_token_suffix0(struct fy_token *fyt) +{ + char *text0; + const char *text; + size_t len; + + if (!fyt || fyt->type != FYTT_TAG) + return NULL; + + /* use the cache if it's there (and doesn't need a rebuild) */ + if (fyt->tag.suffix0 && !fy_token_text_needs_rebuild(fyt)) + return fyt->tag.suffix0; + + if (fyt->tag.suffix0) { + free(fyt->tag.suffix0); + fyt->tag.suffix0 = NULL; + } + + text = fy_tag_token_suffix(fyt, &len); + if (!text) + return NULL; + + text0 = malloc(len + 1); + if (!text0) + return NULL; + memcpy(text0, text, len); + text0[len] = '\0'; + + fyt->tag.suffix0 = text0; + + return fyt->tag.suffix0; +} + +const struct fy_version * fy_version_directive_token_version(struct fy_token *fyt) +{ + if (!fyt || fyt->type != FYTT_VERSION_DIRECTIVE) + return NULL; + return &fyt->version_directive.vers; +} + +static void fy_token_prepare_text(struct fy_token *fyt) +{ + int ret; + + assert(fyt); + + /* get text length of this token */ + ret = fy_token_format_text_length(fyt); + + /* no text on this token? */ + if (ret == -1) { + fyt->text_len = 0; + fyt->text = fyt->text0 = strdup(""); + return; + } + + fyt->text0 = malloc(ret + 1); + if (!fyt->text0) { + fyt->text_len = 0; + fyt->text = fyt->text0 = strdup(""); + return; + } + + fyt->text0[0] = '\0'; + + fyt->text_len = ret; + + fy_token_format_text(fyt, fyt->text0, ret + 1); + fyt->text0[ret] = '\0'; + + fyt->text_len = ret; + fyt->text = fyt->text0; +} + +const char *fy_token_get_text(struct fy_token *fyt, size_t *lenp) +{ + /* return empty */ + if (!fyt) { + *lenp = 0; + return ""; + } + + /* already found something */ + if (fyt->text && !fy_token_text_needs_rebuild(fyt)) { + *lenp = fyt->text_len; + return fyt->text; + } + + /* try direct output first */ + fyt->text = fy_token_get_direct_output(fyt, &fyt->text_len); + if (!fyt->text) + fy_token_prepare_text(fyt); + + *lenp = fyt->text_len; + return fyt->text; +} + +const char *fy_token_get_text0(struct fy_token *fyt) +{ + /* return empty */ + if (!fyt) + return ""; + + /* created text is always zero terminated */ + if (!fyt->text0) + fy_token_prepare_text(fyt); + + return fyt->text0; +} + +size_t fy_token_get_text_length(struct fy_token *fyt) +{ + return fy_token_format_text_length(fyt); +} + +enum comment_out_state { + cos_normal, + cos_lastnl, + cos_lastnlhash, + cos_lastnlhashspc, +}; + +const char *fy_token_get_comment(struct fy_token *fyt, char *buf, size_t maxsz, + enum fy_comment_placement which) +{ + struct fy_atom *handle; + struct fy_atom_iter iter; + const struct fy_iter_chunk *ic; + char *s, *e; + const char *ss, *ee; + int c, w, ret; + enum comment_out_state state; + bool output; + + if (!buf || maxsz == 0 || (unsigned int)which >= fycp_max) + return NULL; + + /* return empty? */ + handle = fy_token_comment_handle(fyt, which, false); + if (!handle || !fy_atom_is_set(handle)) + return NULL; + + /* start expecting # */ + state = cos_lastnl; + + s = buf; + e = s + maxsz; + fy_atom_iter_start(handle, &iter); + ic = NULL; + while ((ic = fy_atom_iter_chunk_next(&iter, ic, &ret)) != NULL) { + ss = ic->str; + ee = ss + ic->len; + + while ((c = fy_utf8_get(ss, ee - ss, &w)) > 0) { + + output = true; + switch (state) { + case cos_normal: + if (fy_is_lb_m(c, handle->lb_mode)) + state = cos_lastnl; + break; + + case cos_lastnl: + if (c == '#') { + state = cos_lastnlhash; + output = false; + break; + } + state = cos_normal; + break; + + case cos_lastnlhash: + if (c == ' ') { + state = cos_lastnlhashspc; + output = false; + break; + } + state = cos_normal; + break; + + case cos_lastnlhashspc: + state = cos_normal; + break; + } + + if (output) { + s = fy_utf8_put(s, (size_t)(e - s), c); + if (!s) + return NULL; + } + + ss += w; + } + } + fy_atom_iter_finish(&iter); + + if (ret != 0 || s >= e) + return NULL; + *s = '\0'; + + return buf; +} + +const char *fy_token_get_scalar_path_key(struct fy_token *fyt, size_t *lenp) +{ + struct fy_atom *atom; + struct fy_atom_iter iter; + struct fy_emit_accum ea; /* use an emit accumulator */ + uint8_t non_utf8[4]; + size_t non_utf8_len, k; + int c, i, w, digit; + int aflags; + + if (!fyt || fyt->type != FYTT_SCALAR) { + *lenp = 0; + return NULL; + } + + /* was it cached? return */ + if (fyt->scalar.path_key) { + *lenp = fyt->scalar.path_key_len; + return fyt->scalar.path_key; + } + + /* analyze the token */ + aflags = fy_token_text_analyze(fyt); + + /* simple one? perfect */ + if ((aflags & FYTTAF_CAN_BE_UNQUOTED_PATH_KEY) == FYTTAF_CAN_BE_UNQUOTED_PATH_KEY) { + fyt->scalar.path_key = fy_token_get_text(fyt, &fyt->scalar.path_key_len); + *lenp = fyt->scalar.path_key_len; + return fyt->scalar.path_key; + } + + /* not possible, need to quote (and escape) */ + + /* no atom? i.e. empty */ + atom = fy_token_atom(fyt); + if (!atom) { + fyt->scalar.path_key = ""; + fyt->scalar.path_key_len = 0; + *lenp = 0; + return fyt->scalar.path_key; + } + + /* no inplace buffer; we will need the malloc'ed contents anyway */ + fy_emit_accum_init(&ea, NULL, 0, 0, fylb_cr_nl); + + fy_atom_iter_start(atom, &iter); + fy_emit_accum_start(&ea, 0, fy_token_atom_lb_mode(fyt)); + + /* output in quoted form */ + fy_emit_accum_utf8_put(&ea, '"'); + + for (;;) { + non_utf8_len = sizeof(non_utf8); + c = fy_atom_iter_utf8_quoted_get(&iter, &non_utf8_len, non_utf8); + if (c < 0) + break; + + if (c == 0 && non_utf8_len > 0) { + for (k = 0; k < non_utf8_len; k++) { + c = (int)non_utf8[k] & 0xff; + fy_emit_accum_utf8_put(&ea, '\\'); + fy_emit_accum_utf8_put(&ea, 'x'); + digit = ((unsigned int)c >> 4) & 15; + fy_emit_accum_utf8_put(&ea, + digit <= 9 ? ('0' + digit) : ('A' + digit - 10)); + digit = (unsigned int)c & 15; + fy_emit_accum_utf8_put(&ea, + digit <= 9 ? ('0' + digit) : ('A' + digit - 10)); + } + continue; + } + + if (!fy_is_printq(c) || c == '"' || c == '\\') { + + fy_emit_accum_utf8_put(&ea, '\\'); + + switch (c) { + + /* common YAML & JSON escapes */ + case '\b': + fy_emit_accum_utf8_put(&ea, 'b'); + break; + case '\f': + fy_emit_accum_utf8_put(&ea, 'f'); + break; + case '\n': + fy_emit_accum_utf8_put(&ea, 'n'); + break; + case '\r': + fy_emit_accum_utf8_put(&ea, 'r'); + break; + case '\t': + fy_emit_accum_utf8_put(&ea, 't'); + break; + case '"': + fy_emit_accum_utf8_put(&ea, '"'); + break; + case '\\': + fy_emit_accum_utf8_put(&ea, '\\'); + break; + + /* YAML only escapes */ + case '\0': + fy_emit_accum_utf8_put(&ea, '0'); + break; + case '\a': + fy_emit_accum_utf8_put(&ea, 'a'); + break; + case '\v': + fy_emit_accum_utf8_put(&ea, 'v'); + break; + case '\e': + fy_emit_accum_utf8_put(&ea, 'e'); + break; + case 0x85: + fy_emit_accum_utf8_put(&ea, 'N'); + break; + case 0xa0: + fy_emit_accum_utf8_put(&ea, '_'); + break; + case 0x2028: + fy_emit_accum_utf8_put(&ea, 'L'); + break; + case 0x2029: + fy_emit_accum_utf8_put(&ea, 'P'); + break; + + default: + /* any kind of binary value */ + if ((unsigned int)c <= 0xff) { + fy_emit_accum_utf8_put(&ea, 'x'); + w = 2; + } else if ((unsigned int)c <= 0xffff) { + fy_emit_accum_utf8_put(&ea, 'u'); + w = 4; + } else if ((unsigned int)c <= 0xffffffff) { + fy_emit_accum_utf8_put(&ea, 'U'); + w = 8; + } + + for (i = w - 1; i >= 0; i--) { + digit = ((unsigned int)c >> (i * 4)) & 15; + fy_emit_accum_utf8_put(&ea, + digit <= 9 ? ('0' + digit) : ('A' + digit - 10)); + } + break; + } + + continue; + } + + /* regular character */ + fy_emit_accum_utf8_put(&ea, c); + } + + fy_atom_iter_finish(&iter); + + /* closing quote */ + fy_emit_accum_utf8_put(&ea, '"'); + + fy_emit_accum_make_0_terminated(&ea); + + /* get the output (note it's now NULL terminated) */ + fyt->scalar.path_key_storage = fy_emit_accum_steal(&ea, &fyt->scalar.path_key_len); + fyt->scalar.path_key = fyt->scalar.path_key_storage; + fy_emit_accum_cleanup(&ea); + + *lenp = fyt->scalar.path_key_len; + + return fyt->scalar.path_key; +} + +size_t fy_token_get_scalar_path_key_length(struct fy_token *fyt) +{ + const char *text; + size_t len; + + text = fy_token_get_scalar_path_key(fyt, &len); + if (!text) + return 0; + return len; +} + +const char *fy_token_get_scalar_path_key0(struct fy_token *fyt) +{ + const char *text; + size_t len; + + if (!fyt || fyt->type != FYTT_SCALAR) { + return NULL; + } + + /* storage is \0 terminated */ + if (fyt->scalar.path_key_storage) + return fyt->scalar.path_key_storage; + + text = fyt->scalar.path_key; + len = fyt->scalar.path_key_len; + if (!text) + text = fy_token_get_scalar_path_key(fyt, &len); + + /* something is catastrophically wrong */ + if (!text) + return NULL; + + if (fyt->scalar.path_key_storage) + return fyt->scalar.path_key_storage; + + fyt->scalar.path_key_storage = malloc(len + 1); + if (!fyt->scalar.path_key_storage) + return NULL; + + memcpy(fyt->scalar.path_key_storage, text, len); + fyt->scalar.path_key_storage[len] = '\0'; + + return fyt->scalar.path_key_storage; +} + +unsigned int fy_analyze_scalar_content(const char *data, size_t size, + bool json_mode, enum fy_lb_mode lb_mode, enum fy_flow_ws_mode fws_mode) +{ + const char *s, *e; + int c, lastc, nextc, w, ww, col, break_run; + unsigned int flags; + bool first; + + flags = FYACF_EMPTY | FYACF_BLOCK_PLAIN | FYACF_FLOW_PLAIN | + FYACF_PRINTABLE | FYACF_SINGLE_QUOTED | FYACF_DOUBLE_QUOTED | + FYACF_SIZE0 | FYACF_VALID_ANCHOR; + + s = data; + e = data + size; + + col = 0; + first = true; + lastc = -1; + break_run = 0; + while (s < e && (c = fy_utf8_get(s, e - s, &w)) >= 0) { + + flags &= ~FYACF_SIZE0; + + lastc = c; + + if (first) { + if (fy_is_ws(c)) + flags |= FYACF_STARTS_WITH_WS; + else if (fy_is_generic_lb_m(c, lb_mode)) + flags |= FYACF_STARTS_WITH_LB; + /* scalars starting with & or * must be quoted */ + if (c == '&' || c == '*') + flags &= ~(FYACF_BLOCK_PLAIN | FYACF_FLOW_PLAIN); + first = false; + } + nextc = fy_utf8_get(s + w, e - (s + w), &ww); + + /* anything other than white space or linebreak */ + if ((flags & FYACF_EMPTY) && + !fy_is_ws(c) && !fy_is_generic_lb_m(c, lb_mode)) + flags &= ~FYACF_EMPTY; + + if ((flags & FYACF_VALID_ANCHOR) && + (fy_utf8_strchr(",[]{}&*:", c) || fy_is_ws(c) || + fy_is_any_lb(c) || fy_is_unicode_control(c) || + fy_is_unicode_space(c))) + flags &= ~FYACF_VALID_ANCHOR; + + /* linebreak */ + if (fy_is_generic_lb_m(c, lb_mode)) { + flags |= FYACF_LB; + if (!(flags & FYACF_CONSECUTIVE_LB) && + fy_is_generic_lb_m(nextc, lb_mode)) + flags |= FYACF_CONSECUTIVE_LB; + break_run++; + } else + break_run = 0; + + /* white space */ + if (!(flags & FYACF_WS) && fy_is_ws(c)) { + flags |= FYACF_WS; + flags &= ~FYACF_VALID_ANCHOR; + } + + /* anything not printable (or \r, \n) */ + if ((flags & FYACF_PRINTABLE) && + !fy_is_printq(c)) { + flags &= ~FYACF_PRINTABLE; + flags &= ~(FYACF_BLOCK_PLAIN | FYACF_FLOW_PLAIN | + FYACF_SINGLE_QUOTED | FYACF_VALID_ANCHOR); + } + + /* check for document indicators (at column 0) */ + if (!(flags & FYACF_DOC_IND) && + ((col == 0 && (e - s) >= 3 && + (!strncmp(s, "---", 3) || !strncmp(s, "...", 3))))) { + flags |= FYACF_DOC_IND; + flags &= ~(FYACF_BLOCK_PLAIN | FYACF_FLOW_PLAIN | FYACF_VALID_ANCHOR); + } + + /* comment indicator can't be present after a space or lb */ + /* : followed by blank can't be any plain */ + if (flags & (FYACF_BLOCK_PLAIN | FYACF_FLOW_PLAIN) && + (((fy_is_blank(c) || fy_is_generic_lb_m(c, lb_mode)) && nextc == '#') || + (c == ':' && fy_is_blankz_m(nextc, lb_mode)))) + flags &= ~(FYACF_BLOCK_PLAIN | FYACF_FLOW_PLAIN); + + /* : followed by flow markers can't be a plain in flow context */ + if ((flags & FYACF_FLOW_PLAIN) && + (fy_utf8_strchr(",[]{}", c) || (c == ':' && fy_utf8_strchr(",[]{}", nextc)))) + flags &= ~FYACF_FLOW_PLAIN; + + if (!(flags & FYACF_JSON_ESCAPE) && !fy_is_json_unescaped(c)) + flags |= FYACF_JSON_ESCAPE; + + if (fy_is_generic_lb_m(c, lb_mode)) + col = 0; + else + col++; + + s += w; + } + + /* this contains arbitrary binany values, mark it as such */ + if (s < e) + return FYACF_DOUBLE_QUOTED; + + if (fy_is_ws(lastc)) + flags |= FYACF_ENDS_WITH_WS; + else if (fy_is_generic_lb_m(lastc, lb_mode)) + flags |= FYACF_ENDS_WITH_LB; + + if (break_run > 1) + flags |= FYACF_TRAILING_LB; + + return flags; +} + +char *fy_token_debug_text(struct fy_token *fyt) +{ + const char *typetxt; + const char *text; + char *buf; + size_t length; + int wlen; + int rc __FY_DEBUG_UNUSED__; + + if (!fyt || !fy_token_type_is_valid(fyt->type)) { + typetxt = "<NULL>"; + goto out; + } + typetxt = fy_token_type_txt[fyt->type]; + + /* should never happen really */ + assert(typetxt); + +out: + text = fy_token_get_text(fyt, &length); + + wlen = length > 8 ? 8 : length; + + rc = asprintf(&buf, "%s:%.*s%s", typetxt, wlen, text, wlen < (int)length ? "..." : ""); + assert(rc != -1); + + return buf; +} + +int fy_token_memcmp(struct fy_token *fyt, const void *ptr, size_t len) +{ + const char *value = NULL; + size_t tlen = 0; + + /* special zero length handling */ + if (len == 0 && fyt && fy_token_get_text_length(fyt) == 0) + return 0; + + /* handle NULL cases */ + if (!fyt && (!ptr || !len)) + return 0; + + if (!fyt && (ptr || len)) + return -1; + + if (fyt && (!ptr || !len)) + return 1; + + /* those two are special */ + if (fyt->type == FYTT_TAG || fyt->type == FYTT_TAG_DIRECTIVE) { + value = fy_token_get_text(fyt, &tlen); + if (!value) + return -1; + return tlen == len ? memcmp(value, ptr, tlen) : tlen < len ? -1 : 1; + } + + return fy_atom_memcmp(fy_token_atom(fyt), ptr, len); +} + +int fy_token_strcmp(struct fy_token *fyt, const char *str) +{ + size_t len; + + len = str ? strlen(str) : 0; + + return fy_token_memcmp(fyt, str, len); +} + +int fy_token_cmp(struct fy_token *fyt1, struct fy_token *fyt2) +{ + const char *t1, *t2; + size_t l1, l2, l; + int ret; + + /* handles both NULL */ + if (fyt1 == fyt2) + return 0; + + /* fyt1 is null, 2 wins */ + if (!fyt1 && fyt2) + return -1; + + /* fyt2 is null, 1 wins */ + if (fyt1 && !fyt2) + return 1; + + /* tokens with different types can't be equal */ + if (fyt1->type != fyt2->type) + return fyt2->type > fyt1->type ? -1 : 1; + + /* special case, these can't use the atom comparisons */ + if (fyt1->type == FYTT_TAG || fyt1->type == FYTT_TAG_DIRECTIVE) { + t1 = fy_token_get_text(fyt1, &l1); + t2 = fy_token_get_text(fyt2, &l2); + l = l1 > l2 ? l2 : l1; + ret = memcmp(t1, t2, l); + if (ret) + return ret; + return l1 == l2 ? 0 : l2 > l1 ? -1 : 1; + } + + /* just pass it to the atom comparison methods */ + return fy_atom_cmp(fy_token_atom(fyt1), fy_token_atom(fyt2)); +} + +void fy_token_iter_start(struct fy_token *fyt, struct fy_token_iter *iter) +{ + if (!iter) + return; + + memset(iter, 0, sizeof(*iter)); + + iter->unget_c = -1; + + if (!fyt) + return; + + iter->fyt = fyt; + + /* TAG or TAG_DIRECTIVE may only work by getting the text */ + if (fyt->type == FYTT_TAG || fyt->type == FYTT_TAG_DIRECTIVE) + iter->ic.str = fy_token_get_text(fyt, &iter->ic.len); + else /* try the direct output next */ + iter->ic.str = fy_token_get_direct_output(fyt, &iter->ic.len); + + /* got it */ + if (iter->ic.str) { + memset(&iter->atom_iter, 0, sizeof(iter->atom_iter)); + return; + } + + assert(fyt->type != FYTT_TAG && fyt->type != FYTT_TAG_DIRECTIVE); + + /* fall back to the atom iterator */ + fy_atom_iter_start(fy_token_atom(fyt), &iter->atom_iter); +} + +void fy_token_iter_finish(struct fy_token_iter *iter) +{ + if (!iter) + return; + + if (!iter->ic.str) + fy_atom_iter_finish(&iter->atom_iter); +} + +struct fy_token_iter * +fy_token_iter_create(struct fy_token *fyt) +{ + struct fy_token_iter *iter; + + iter = malloc(sizeof(*iter)); + if (!iter) + return NULL; + fy_token_iter_start(fyt, iter); + return iter; +} + +void fy_token_iter_destroy(struct fy_token_iter *iter) +{ + if (!iter) + return; + + fy_token_iter_finish(iter); + free(iter); +} + +const struct fy_iter_chunk *fy_token_iter_peek_chunk(struct fy_token_iter *iter) +{ + if (!iter) + return NULL; + + /* direct mode? */ + if (iter->ic.str) + return &iter->ic; + + /* fallback to the atom iterator */ + return fy_atom_iter_peek_chunk(&iter->atom_iter); +} + +void fy_token_iter_advance(struct fy_token_iter *iter, size_t len) +{ + if (!iter) + return; + + /* direct mode? */ + if (iter->ic.str) { + if (len > iter->ic.len) + len = iter->ic.len; + iter->ic.str += len; + iter->ic.len -= len; + return; + } + + /* fallback to the atom iterator */ + fy_atom_iter_advance(&iter->atom_iter, len); +} + +const struct fy_iter_chunk * +fy_token_iter_chunk_next(struct fy_token_iter *iter, const struct fy_iter_chunk *curr, int *errp) +{ + if (!iter) + return NULL; + + if (errp) + *errp = 0; + + /* first time in */ + if (!curr) { + if (iter->ic.str) + return iter->ic.len ? &iter->ic : NULL; + return fy_atom_iter_chunk_next(&iter->atom_iter, NULL, errp); + } + + /* direct, all consumed */ + if (curr == &iter->ic) { + iter->ic.str += iter->ic.len; + iter->ic.len = 0; + return NULL; + } + + /* fallback */ + return fy_atom_iter_chunk_next(&iter->atom_iter, curr, errp); +} + +ssize_t fy_token_iter_read(struct fy_token_iter *iter, void *buf, size_t count) +{ + if (!iter || !buf) + return -1; + + /* direct mode */ + if (iter->ic.str) { + if (count > iter->ic.len) + count = iter->ic.len; + memcpy(buf, iter->ic.str, count); + iter->ic.str += count; + iter->ic.len -= count; + return count; + } + + return fy_atom_iter_read(&iter->atom_iter, buf, count); +} + +int fy_token_iter_getc(struct fy_token_iter *iter) +{ + int c; + + if (!iter) + return -1; + + /* first try the pushed ungetc */ + if (iter->unget_c != -1) { + c = iter->unget_c; + iter->unget_c = -1; + return c; + } + + /* direct mode */ + if (iter->ic.str) { + if (!iter->ic.len) + return -1; + c = *iter->ic.str++; + iter->ic.len--; + return c; + } + + return fy_atom_iter_getc(&iter->atom_iter); +} + +int fy_token_iter_ungetc(struct fy_token_iter *iter, int c) +{ + if (iter->unget_c != -1) + return -1; + if (c == -1) { + iter->unget_c = -1; + return 0; + } + iter->unget_c = c & 0xff; + return c & 0xff; +} + +int fy_token_iter_peekc(struct fy_token_iter *iter) +{ + int c; + + c = fy_token_iter_getc(iter); + if (c == -1) + return -1; + + return fy_token_iter_ungetc(iter, c); +} + +int fy_token_iter_utf8_get(struct fy_token_iter *iter) +{ + int c, w, w1; + + /* first try the pushed ungetc */ + if (iter->unget_c != -1) { + c = iter->unget_c; + iter->unget_c = -1; + return c; + } + + /* direct */ + if (iter->ic.str) { + + /* not even 1 octet */ + if (!iter->ic.len) + return -1; + + /* get width by the first octet */ + w = fy_utf8_width_by_first_octet((uint8_t)*iter->ic.str); + if (!w || (unsigned int)w > iter->ic.len) + return -1; + + /* get the next character */ + c = fy_utf8_get(iter->ic.str, w, &w1); + + iter->ic.str += w; + iter->ic.len -= w; + + return c; + } + + return fy_atom_iter_utf8_get(&iter->atom_iter); +} + +int fy_token_iter_utf8_unget(struct fy_token_iter *iter, int c) +{ + if (iter->unget_c != -1) + return -1; + + if (c == -1) { + iter->unget_c = -1; + return 0; + } + + iter->unget_c = c; + return c; +} + +int fy_token_iter_utf8_peek(struct fy_token_iter *iter) +{ + int c; + + c = fy_token_iter_utf8_get(iter); + if (c == -1) + return -1; + + return fy_token_iter_utf8_unget(iter, c); +} + +enum fy_scalar_style +fy_scalar_token_get_style(struct fy_token *fyt) +{ + if (!fyt || fyt->type != FYTT_SCALAR) + return FYSS_ANY; + return fyt->scalar.style; +} + +const struct fy_tag *fy_tag_token_tag(struct fy_token *fyt) +{ + if (!fyt || fyt->type != FYTT_TAG) + return NULL; + + /* always refresh, should be relatively infrequent */ + fyt->tag.tag.handle = fy_tag_token_handle0(fyt); + fyt->tag.tag.prefix = fy_tag_token_suffix0(fyt); + + return &fyt->tag.tag; +} + +const struct fy_tag * +fy_tag_directive_token_tag(struct fy_token *fyt) +{ + if (!fyt || fyt->type != FYTT_TAG_DIRECTIVE) + return NULL; + + /* always refresh, should be relatively infrequent */ + fyt->tag_directive.tag.handle = fy_tag_directive_token_handle0(fyt); + fyt->tag_directive.tag.prefix = fy_tag_directive_token_prefix0(fyt); + + return &fyt->tag_directive.tag; +} + +struct fy_atom *fy_token_comment_handle(struct fy_token *fyt, enum fy_comment_placement placement, bool alloc) +{ + struct fy_atom *handle; + size_t size; + + if (!fyt || (unsigned int)placement >= fycp_max) + return NULL; + + if (!fyt->comment) { + if (!alloc) + return NULL; + + size = sizeof(*fyt->comment) * fycp_max; + fyt->comment = malloc(size); + if (!fyt->comment) + return NULL; + memset(fyt->comment, 0, size); + } + handle = &fyt->comment[placement]; + + return handle; +} + +bool fy_token_has_any_comment(struct fy_token *fyt) +{ + struct fy_atom *handle; + enum fy_comment_placement placement; + + if (!fyt || !fyt->comment) + return false; + + for (placement = fycp_top; placement <= fycp_bottom; placement++) { + handle = &fyt->comment[placement]; + if (fy_atom_is_set(handle)) + return true; + } + return false; +} |