diff options
author | innokentii <innokentii@yandex-team.com> | 2023-02-01 16:01:14 +0300 |
---|---|---|
committer | innokentii <innokentii@yandex-team.com> | 2023-02-01 16:01:14 +0300 |
commit | 96b135245109ad10d4e6a51d9f7fa61add23e839 (patch) | |
tree | 754dd793a8952d93f8da462c781404f3563114f8 /contrib/libs/libfyaml/src | |
parent | acce22d4812c919616875de449eb6eb69006593a (diff) | |
download | ydb-96b135245109ad10d4e6a51d9f7fa61add23e839.tar.gz |
Add basic yaml config resolver
Diffstat (limited to 'contrib/libs/libfyaml/src')
41 files changed, 40037 insertions, 0 deletions
diff --git a/contrib/libs/libfyaml/src/lib/fy-accel.c b/contrib/libs/libfyaml/src/lib/fy-accel.c new file mode 100644 index 0000000000..1b1ac99f32 --- /dev/null +++ b/contrib/libs/libfyaml/src/lib/fy-accel.c @@ -0,0 +1,416 @@ +/* + * fy-accel.c - YAML accelerated access methods + * + * Copyright (c) 2019 Pantelis Antoniou <pantelis.antoniou@konsulko.com> + * + * SPDX-License-Identifier: MIT + */ +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif + +#include <limits.h> +#include <string.h> + +#include <libfyaml.h> + +#include "fy-parse.h" +#include "fy-doc.h" + +#include "fy-accel.h" + +#define XXH_STATIC_LINKING_ONLY +#include "xxhash.h" + +/* powers of two and the closest primes before + * + * pow2: 1 2 4 8 16 32 64 128 256 512 1024 2048 4096 8192 16384 32768 65536 + * prime: 1 2 3 7 13 31 61 127 251 509 1021 2039 4093 8191 16381 32749 65521 + * + * pow2: 131072 262144 524288 + * prime: 130657 262051 524201 + */ + +/* 64K bucket should be enough for everybody */ +static const uint32_t prime_lt_pow2[] = { + 1, 2, 3, 7, 13, 31, 61, 127, 251, 509, 1021, + 2039, 4093, 8191, 16381, 32749, 65521, + 130657, 262051, 524201 +}; + +static inline unsigned int +fy_accel_hash_to_pos(struct fy_accel *xl, const void *hash, unsigned int nbuckets) +{ + uint64_t pos; + + switch (xl->hd->size) { + case 1: + pos = *(const uint8_t *)hash; + break; + case 2: + assert(!((uintptr_t)hash & 1)); + pos = *(const uint16_t *)hash; + break; + case 4: + assert(!((uintptr_t)hash & 3)); + pos = *(const uint32_t *)hash; + break; + case 8: + assert(!((uintptr_t)hash & 7)); + pos = *(const uint64_t *)hash; + break; + default: + /* sigh, what ever */ + pos = XXH32(hash, xl->hd->size, 0); + break; + } + + return (unsigned int)(pos % nbuckets); +} + +static inline bool +fy_accel_hash_eq(struct fy_accel *xl, const void *hash1, const void *hash2) +{ + switch (xl->hd->size) { + case 1: + return *(const uint8_t *)hash1 == *(const uint8_t *)hash2; + case 2: + assert(!((uintptr_t)hash1 & 1)); + assert(!((uintptr_t)hash2 & 1)); + return *(const uint16_t *)hash1 == *(const uint16_t *)hash2; + case 4: + assert(!((uintptr_t)hash1 & 3)); + assert(!((uintptr_t)hash2 & 3)); + return *(const uint32_t *)hash1 == *(const uint32_t *)hash2; + case 8: + assert(!((uintptr_t)hash1 & 7)); + assert(!((uintptr_t)hash2 & 7)); + return *(const uint64_t *)hash1 == *(const uint64_t *)hash2; + default: + break; + } + + return !memcmp(hash1, hash2, xl->hd->size); +} + +int fy_accel_resize(struct fy_accel *xl, unsigned int min_buckets) +{ + unsigned int next_pow2, exp, i, nbuckets, pos; + struct fy_accel_entry_list *xlel; + struct fy_accel_entry *xle; + struct fy_accel_entry_list *buckets_new; + + /* get the next power of two larger or equal */ + next_pow2 = 1; + exp = 0; + while (next_pow2 < min_buckets && + exp < sizeof(prime_lt_pow2)/sizeof(prime_lt_pow2[0])) { + next_pow2 <<= 1; + exp++; + } + + nbuckets = prime_lt_pow2[exp]; + if (nbuckets == xl->nbuckets) + return 0; + + buckets_new = malloc(sizeof(*buckets_new) * nbuckets); + if (!buckets_new) + return -1; + + for (i = 0, xlel = buckets_new; i < nbuckets; i++, xlel++) + fy_accel_entry_list_init(xlel); + + if (xl->buckets) { + for (i = 0, xlel = xl->buckets; i < xl->nbuckets; i++, xlel++) { + while ((xle = fy_accel_entry_list_pop(xlel)) != NULL) { + pos = fy_accel_hash_to_pos(xl, xle->hash, nbuckets); + fy_accel_entry_list_add_tail(&buckets_new[pos], xle); + } + } + free(xl->buckets); + } + xl->buckets = buckets_new; + xl->nbuckets = nbuckets; + xl->next_exp2 = exp; + + return 0; +} + +int fy_accel_grow(struct fy_accel *xl) +{ + if (!xl) + return -1; + + /* should not grow indefinetely */ + if (xl->next_exp2 >= sizeof(prime_lt_pow2)/sizeof(prime_lt_pow2[0])) + return -1; + + return fy_accel_resize(xl, prime_lt_pow2[xl->next_exp2 + 1]); +} + +int fy_accel_shrink(struct fy_accel *xl) +{ + if (!xl) + return -1; + + /* should not shrink indefinetely */ + if (xl->next_exp2 <= 0) + return -1; + + return fy_accel_resize(xl, prime_lt_pow2[xl->next_exp2 - 1]); +} + +int +fy_accel_setup(struct fy_accel *xl, + const struct fy_hash_desc *hd, + void *userdata, + unsigned int min_buckets) +{ + if (!xl || !hd || !hd->size || !hd->hash) + return -1; + + memset(xl, 0, sizeof(*xl)); + xl->hd = hd; + xl->userdata = userdata; + xl->count = 0; + + return fy_accel_resize(xl, min_buckets); +} + +void fy_accel_cleanup(struct fy_accel *xl) +{ + unsigned int i; + struct fy_accel_entry_list *xlel; + struct fy_accel_entry *xle; + + if (!xl) + return; + + for (i = 0, xlel = xl->buckets; i < xl->nbuckets; i++, xlel++) { + while ((xle = fy_accel_entry_list_pop(xlel)) != NULL) { + free(xle); + assert(xl->count > 0); + xl->count--; + } + } + + free(xl->buckets); +} + +struct fy_accel_entry * +fy_accel_entry_insert(struct fy_accel *xl, const void *key, const void *value) +{ + struct fy_accel_entry *xle, *xlet; + struct fy_accel_entry_list *xlel; + unsigned int pos, bucket_size; + int rc; + + if (!xl) + return NULL; + + xle = malloc(sizeof(*xle) + xl->hd->size); + if (!xle) + goto err_out; + + rc = xl->hd->hash(xl, key, xl->userdata, xle->hash); + if (rc) + goto err_out; + xle->key = key; + xle->value = value; + + pos = fy_accel_hash_to_pos(xl, xle->hash, xl->nbuckets); + xlel = &xl->buckets[pos]; + + fy_accel_entry_list_add_tail(xlel, xle); + + assert(xl->count < UINT_MAX); + xl->count++; + + /* if we don't auto-resize, return */ + if (xl->hd->max_bucket_grow_limit) { + bucket_size = 0; + for (xlet = fy_accel_entry_list_first(xlel); xlet; xlet = fy_accel_entry_next(xlel, xlet)) { + bucket_size++; + if (bucket_size >= xl->hd->max_bucket_grow_limit) + break; + } + + /* we don't really care whether the grow up succeeds or not */ + if (bucket_size >= xl->hd->max_bucket_grow_limit) + (void)fy_accel_grow(xl); + } + + return xle; +err_out: + if (xle) + free(xle); + return NULL; +} + +struct fy_accel_entry * +fy_accel_entry_lookup(struct fy_accel *xl, const void *key) +{ + struct fy_accel_entry_iter xli; + struct fy_accel_entry *xle; + + xle = fy_accel_entry_iter_start(&xli, xl, key); + fy_accel_entry_iter_finish(&xli); + + return xle; +} + +struct fy_accel_entry * +fy_accel_entry_lookup_key_value(struct fy_accel *xl, const void *key, const void *value) +{ + struct fy_accel_entry_iter xli; + struct fy_accel_entry *xle; + + for (xle = fy_accel_entry_iter_start(&xli, xl, key); xle; + xle = fy_accel_entry_iter_next(&xli)) { + + if (xle->value == value) + break; + } + fy_accel_entry_iter_finish(&xli); + + return xle; +} + +void +fy_accel_entry_remove(struct fy_accel *xl, struct fy_accel_entry *xle) +{ + unsigned int pos; + + if (!xl || !xle) + return; + + pos = fy_accel_hash_to_pos(xl, xle->hash, xl->nbuckets); + + fy_accel_entry_list_del(&xl->buckets[pos], xle); + + assert(xl->count > 0); + xl->count--; + + free(xle); +} + +int +fy_accel_insert(struct fy_accel *xl, const void *key, const void *value) +{ + struct fy_accel_entry *xle; + + xle = fy_accel_entry_lookup(xl, key); + if (xle) + return -1; /* exists */ + + xle = fy_accel_entry_insert(xl, key, value); + if (!xle) + return -1; /* failure to insert */ + + return 0; +} + +const void * +fy_accel_lookup(struct fy_accel *xl, const void *key) +{ + struct fy_accel_entry *xle; + + xle = fy_accel_entry_lookup(xl, key); + return xle ? xle->value : NULL; +} + +int +fy_accel_remove(struct fy_accel *xl, const void *data) +{ + struct fy_accel_entry *xle; + + xle = fy_accel_entry_lookup(xl, data); + if (!xle) + return -1; + + fy_accel_entry_remove(xl, xle); + + return 0; +} + +struct fy_accel_entry * +fy_accel_entry_iter_next_internal(struct fy_accel_entry_iter *xli) +{ + struct fy_accel *xl; + struct fy_accel_entry *xle; + struct fy_accel_entry_list *xlel; + const void *key; + void *hash; + + if (!xli) + return NULL; + + xl = xli->xl; + hash = xli->hash; + xlel = xli->xlel; + if (!xl || !hash || !xlel) + return NULL; + key = xli->key; + + xle = !xli->xle ? fy_accel_entry_list_first(xlel) : + fy_accel_entry_next(xlel, xli->xle); + for (; xle; xle = fy_accel_entry_next(xlel, xle)) { + if (fy_accel_hash_eq(xl, hash, xle->hash) && + xl->hd->eq(xl, hash, xle->key, key, xl->userdata)) + break; + } + return xli->xle = xle; +} + +struct fy_accel_entry * +fy_accel_entry_iter_start(struct fy_accel_entry_iter *xli, struct fy_accel *xl, const void *key) +{ + unsigned int pos; + int rc; + + if (!xli || !xl) + return NULL; + xli->xl = xl; + xli->key = key; + if (xl->hd->size <= sizeof(xli->hash_inline)) + xli->hash = xli->hash_inline; + else + xli->hash = malloc(xl->hd->size); + xli->xlel = NULL; + + if (!xli->hash) + goto err_out; + + rc = xl->hd->hash(xl, key, xl->userdata, xli->hash); + if (rc) + goto err_out; + + pos = fy_accel_hash_to_pos(xl, xli->hash, xl->nbuckets); + xli->xlel = &xl->buckets[pos]; + + xli->xle = NULL; + + return fy_accel_entry_iter_next_internal(xli); + +err_out: + fy_accel_entry_iter_finish(xli); + return NULL; +} + +void fy_accel_entry_iter_finish(struct fy_accel_entry_iter *xli) +{ + if (!xli) + return; + + if (xli->hash && xli->hash != xli->hash_inline) + free(xli->hash); +} + +struct fy_accel_entry * +fy_accel_entry_iter_next(struct fy_accel_entry_iter *xli) +{ + if (!xli || !xli->xle) + return NULL; + + return fy_accel_entry_iter_next_internal(xli); +} diff --git a/contrib/libs/libfyaml/src/lib/fy-accel.h b/contrib/libs/libfyaml/src/lib/fy-accel.h new file mode 100644 index 0000000000..2e917cebfb --- /dev/null +++ b/contrib/libs/libfyaml/src/lib/fy-accel.h @@ -0,0 +1,92 @@ +/* + * fy-accel.h - YAML accelerated access methods + * + * Copyright (c) 2020 Pantelis Antoniou <pantelis.antoniou@konsulko.com> + * + * SPDX-License-Identifier: MIT + */ +#ifndef FY_ACCEL_H +#define FY_ACCEL_H + +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif + +#include <stdbool.h> + +#include <libfyaml.h> + +#include "fy-list.h" +#include "fy-typelist.h" + +struct fy_accel_entry { + struct fy_list_head node; + const void *key; + const void *value; + uint8_t hash[0]; +}; +FY_TYPE_FWD_DECL_LIST(accel_entry); +FY_TYPE_DECL_LIST(accel_entry); + +struct fy_accel; + +struct fy_hash_desc { + unsigned int size; + unsigned int max_bucket_grow_limit; + bool unique; + int (*hash)(struct fy_accel *xl, const void *key, void *userdata, void *hash); + bool (*eq)(struct fy_accel *xl, const void *hash, const void *key1, const void *key2, void *userdata); +}; + +struct fy_accel { + const struct fy_hash_desc *hd; + void *userdata; + unsigned int count; + unsigned int nbuckets; + unsigned int next_exp2; + struct fy_accel_entry_list *buckets; +}; + +int +fy_accel_setup(struct fy_accel *xl, + const struct fy_hash_desc *hd, + void *userdata, + unsigned int min_buckets); + +void fy_accel_cleanup(struct fy_accel *xl); + +int fy_accel_resize(struct fy_accel *xl, unsigned int min_buckets); +int fy_accel_grow(struct fy_accel *xl); +int fy_accel_shrink(struct fy_accel *xl); + +int fy_accel_insert(struct fy_accel *xl, const void *key, const void *value); +const void *fy_accel_lookup(struct fy_accel *xl, const void *key); +int fy_accel_remove(struct fy_accel *xl, const void *key); + +struct fy_accel_entry_iter { + struct fy_accel *xl; + const void *key; + void *hash; + struct fy_accel_entry_list *xlel; + struct fy_accel_entry *xle; + uint64_t hash_inline[4]; /* to avoid allocation */ +}; + +struct fy_accel_entry * +fy_accel_entry_insert(struct fy_accel *xl, const void *key, const void *value); + +struct fy_accel_entry * +fy_accel_entry_lookup(struct fy_accel *xl, const void *key); +struct fy_accel_entry * +fy_accel_entry_lookup_key_value(struct fy_accel *xl, const void *key, const void *value); + +void fy_accel_entry_remove(struct fy_accel *xl, struct fy_accel_entry *xle); + +struct fy_accel_entry * +fy_accel_entry_iter_start(struct fy_accel_entry_iter *xli, + struct fy_accel *xl, const void *key); +void fy_accel_entry_iter_finish(struct fy_accel_entry_iter *xli); +struct fy_accel_entry * +fy_accel_entry_iter_next(struct fy_accel_entry_iter *xli); + +#endif diff --git a/contrib/libs/libfyaml/src/lib/fy-atom.c b/contrib/libs/libfyaml/src/lib/fy-atom.c new file mode 100644 index 0000000000..a38e6fbf7b --- /dev/null +++ b/contrib/libs/libfyaml/src/lib/fy-atom.c @@ -0,0 +1,1877 @@ +/* + * fy-atom.c - YAML atom methods + * + * Copyright (c) 2019 Pantelis Antoniou <pantelis.antoniou@konsulko.com> + * + * SPDX-License-Identifier: MIT + */ +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif + +#include <stdio.h> +#include <string.h> +#include <sys/types.h> +#include <sys/stat.h> +#include <assert.h> +#include <stdlib.h> +#include <errno.h> +#include <stdarg.h> +#include <ctype.h> + +#include <libfyaml.h> + +#include "fy-parse.h" +#include "fy-doc.h" + +struct fy_atom *fy_reader_fill_atom(struct fy_reader *fyr, int advance, struct fy_atom *handle) +{ + /* start mark */ + fy_reader_fill_atom_start(fyr, handle); + + /* advance the given number of characters */ + if (advance > 0) + fy_reader_advance_by(fyr, advance); + + fy_reader_fill_atom_end(fyr, handle); + + return handle; +} + +int fy_reader_advance_mark(struct fy_reader *fyr, int advance, struct fy_mark *m) +{ + int i, c, tabsize; + bool is_line_break; + + tabsize = fy_reader_tabsize(fyr); + i = 0; + while (advance-- > 0) { + c = fy_reader_peek_at(fyr, i++); + if (c == -1) + return -1; + m->input_pos += fy_utf8_width(c); + + /* first check for CR/LF */ + if (c == '\r' && fy_reader_peek_at(fyr, i) == '\n') { + m->input_pos++; + i++; + is_line_break = true; + } else if (fy_reader_is_lb(fyr, c)) + is_line_break = true; + else + is_line_break = false; + + if (is_line_break) { + m->column = 0; + m->line++; + } else if (tabsize > 0 && fy_is_tab(c)) + m->column += (tabsize - (fy_reader_column(fyr) % tabsize)); + else + m->column++; + } + + return 0; +} + +struct fy_atom *fy_reader_fill_atom_mark(struct fy_reader *fyr, + const struct fy_mark *start_mark, + const struct fy_mark *end_mark, + struct fy_atom *handle) +{ + if (!fyr || !start_mark || !end_mark || !handle) + return NULL; + + memset(handle, 0, sizeof(*handle)); + + handle->start_mark = *start_mark; + handle->end_mark = *end_mark; + handle->fyi = fy_reader_current_input(fyr); + handle->fyi_generation = fy_reader_current_input_generation(fyr); + + /* default is plain, modify at return */ + handle->style = FYAS_PLAIN; + handle->chomp = FYAC_CLIP; + /* by default we don't do storage hints, it's the job of the caller */ + handle->storage_hint = 0; + handle->storage_hint_valid = false; + + return handle; +} + +struct fy_atom *fy_reader_fill_atom_at(struct fy_reader *fyr, int advance, int count, struct fy_atom *handle) +{ + struct fy_mark start_mark, end_mark; + int rc; + + if (!fyr || !handle) + return NULL; + + /* start mark */ + fy_reader_get_mark(fyr, &start_mark); + rc = fy_reader_advance_mark(fyr, advance, &start_mark); + (void)rc; + /* ignore the return, if the advance failed, it's the end of input */ + + /* end mark */ + end_mark = start_mark; + rc = fy_reader_advance_mark(fyr, count, &end_mark); + (void)rc; + /* ignore the return, if the advance failed, it's the end of input */ + + return fy_reader_fill_atom_mark(fyr, &start_mark, &end_mark, handle); +} + +static inline void +fy_atom_iter_chunk_reset(struct fy_atom_iter *iter) +{ + iter->top = 0; + iter->read = 0; +} + +static int +fy_atom_iter_grow_chunk(struct fy_atom_iter *iter) +{ + struct fy_atom_iter_chunk *chunks, *c; + size_t asz; + const char *old_s, *old_e, *ss; + unsigned int i; + size_t offset; + + old_s = (const char *)iter->chunks; + old_e = (const char *)(iter->chunks + iter->alloc); + + asz = sizeof(*chunks) * iter->alloc * 2; + chunks = realloc(iter->chunks == iter->startup_chunks ? NULL : iter->chunks, asz); + if (!chunks) /* out of memory */ + return -1; + if (iter->chunks == iter->startup_chunks) + memcpy(chunks, iter->startup_chunks, sizeof(iter->startup_chunks)); + + /* for chunks that point to the inplace buffer, reassign pointers */ + for (ss = old_s, c = chunks, i = 0; i < iter->top; ss += sizeof(*c), c++, i++) { + if (c->ic.str < old_s || c->ic.str >= old_e || + c->ic.len > sizeof(c->inplace_buf)) + continue; + + /* get offset */ + offset = (size_t)(c->ic.str - ss); + + /* verify that it points to the inplace_buf area */ + assert(offset >= offsetof(struct fy_atom_iter_chunk, inplace_buf)); + offset -= offsetof(struct fy_atom_iter_chunk, inplace_buf); + + c->ic.str = c->inplace_buf + offset; + } + + iter->alloc *= 2; + iter->chunks = chunks; + + return 0; +} + +static int +_fy_atom_iter_add_chunk(struct fy_atom_iter *iter, const char *str, size_t len) +{ + struct fy_atom_iter_chunk *c; + int ret; + + if (!len) + return 0; + + /* grow iter chunks? */ + if (iter->top >= iter->alloc) { + ret = fy_atom_iter_grow_chunk(iter); + if (ret) + return ret; + } + assert(iter->top < iter->alloc); + c = &iter->chunks[iter->top++]; + + c->ic.str = str; + c->ic.len = len; + + return 0; +} + +static int +_fy_atom_iter_add_chunk_copy(struct fy_atom_iter *iter, const char *str, size_t len) +{ + struct fy_atom_iter_chunk *c; + int ret; + + if (!len) + return 0; + + assert(len <= sizeof(c->inplace_buf)); + + if (iter->top >= iter->alloc) { + ret = fy_atom_iter_grow_chunk(iter); + if (ret) + return ret; + } + assert(iter->top < iter->alloc); + c = &iter->chunks[iter->top++]; + + memcpy(c->inplace_buf, str, len); + + c->ic.str = c->inplace_buf; + c->ic.len = len; + + return 0; +} + +/* keep it around without a warning even though it's unused */ +static int +_fy_atom_iter_add_utf8(struct fy_atom_iter *iter, int c) + FY_ATTRIBUTE(__unused__); + +static int +_fy_atom_iter_add_utf8(struct fy_atom_iter *iter, int c) +{ + char buf[FY_UTF8_FORMAT_BUFMIN]; + char *e; + + /* only fails if invalid utf8 */ + e = fy_utf8_put(buf, sizeof(buf), c); + if (!e) + return -1; + + return _fy_atom_iter_add_chunk_copy(iter, buf, e - buf); +} + +/* optimized linebreaks */ +static int +_fy_atom_iter_add_lb(struct fy_atom_iter *iter, int c) +{ + switch (c) { + /* those are generic linebreaks */ + case '\r': + case '\n': + case 0x85: + return _fy_atom_iter_add_chunk(iter, "\n", 1); + /* these are specific linebreaks */ + case 0x2028: + return _fy_atom_iter_add_chunk(iter, "\xe2\x80\xa8", 3); + case 0x2029: + return _fy_atom_iter_add_chunk(iter, "\xe2\x80\xa9", 3); + } + /* not a linebreak */ + return -1; +} + +// #define DEBUG_CHUNK + +#ifndef DEBUG_CHUNK +#define fy_atom_iter_add_chunk _fy_atom_iter_add_chunk +#define fy_atom_iter_add_chunk_copy _fy_atom_iter_add_chunk_copy +#define fy_atom_iter_add_utf8 _fy_atom_iter_add_utf8 +#define fy_atom_iter_add_lb _fy_atom_iter_add_lb +#else +#define fy_atom_iter_add_chunk(_iter, _str, _len) \ + ({ \ + const char *__str = (_str); \ + size_t __len2 = (_len); \ + char *__out = NULL; \ + int __ret = 0; \ + \ + if (__len2 > 0) { \ + __out = fy_utf8_format_text_alloc(__str, __len2, fyue_doublequote); \ + assert(__out); \ + fprintf(stderr, "%s:%d chunk #%zu \"%s\"\n", __func__, __LINE__, __len2, __out); \ + __ret = _fy_atom_iter_add_chunk((_iter), __str, __len2); \ + free(__out); \ + } \ + __ret; \ + }) + +#define fy_atom_iter_add_chunk_copy(_iter, _str, _len) \ + ({ \ + const char *__str = (_str); \ + size_t __len2 = (_len); \ + char *__out = NULL; \ + int __ret = 0; \ + \ + if (__len2 > 0) { \ + __out = fy_utf8_format_text_alloc(__str, __len2, fyue_doublequote); \ + assert(__out); \ + fprintf(stderr, "%s:%d chunk-copy #%zu \"%s\"\n", __func__, __LINE__, __len2, __out); \ + /* fprintf(stderr, "%s:%d chunk-copy \"%.*s\"\n", __func__, __LINE__, (int)__len, __str); */ \ + __ret = _fy_atom_iter_add_chunk_copy((_iter), __str, __len2); \ + free(__out); \ + } \ + __ret; \ + }) +#define fy_atom_iter_add_utf8(_iter, _c) \ + ({ \ + int __c = (_c); \ + fprintf(stderr, "%s:%d utf8 %d\n", __func__, __LINE__, __c); \ + _fy_atom_iter_add_utf8((_iter), (_c)); \ + }) +#define fy_atom_iter_add_lb(_iter, _c) \ + ({ \ + int __c = (_c); \ + fprintf(stderr, "%s:%d lb 0x%02x\n", __func__, __LINE__, __c); \ + _fy_atom_iter_add_lb((_iter), (_c)); \ + }) +#endif + +static void +fy_atom_iter_line_analyze(struct fy_atom_iter *iter, struct fy_atom_iter_line_info *li, + const char *line_start, size_t len) +{ + const struct fy_atom *atom = iter->atom; + const char *s, *e, *ss; + int col, c, w, ts, cws, advws; + bool last_was_ws, is_block; + int lastc; + + s = line_start; + e = line_start + len; + + is_block = atom->style == FYAS_LITERAL || atom->style == FYAS_FOLDED; + + /* short circuit non multiline, non ws atoms */ + if ((atom->direct_output && !atom->has_lb && !atom->has_ws) || + atom->style == FYAS_DOUBLE_QUOTED_MANUAL) { + li->start = s; + li->end = e; + li->nws_start = s; + li->nws_end = e; + li->chomp_start = s; + li->final = true; + li->empty = atom->empty; + li->trailing_breaks = 0; + li->trailing_breaks_ws = false; + li->start_ws = 0; + li->end_ws = 0; + li->indented = false; + li->lb_end = is_block ? atom->ends_with_lb : false; + li->final = true; + li->actual_lb = -1; + li->s_tb = li->e_tb = NULL; + li->ends_with_backslash = false; + return; + } + + li->start = s; + li->end = NULL; + li->nws_start = NULL; + li->nws_end = NULL; + li->chomp_start = NULL; + li->empty = true; + li->trailing_breaks = 0; + li->trailing_breaks_ws = false; + li->first = false; + li->start_ws = (size_t)-1; + li->end_ws = (size_t)-1; + li->indented = false; + li->lb_end = false; + li->final = false; + li->actual_lb = -1; + li->ends_with_backslash = false; + + last_was_ws = false; + + ts = atom->tabsize ? atom->tabsize : 8; /* pick it up from the atom (if there is) */ + + /* consecutive whitespace */ + cws = 0; + + lastc = -1; + li->s_tb = s; + for (col = 0, ss = s; (c = fy_utf8_get(ss, (e - ss), &w)) >= 0; ss += w) { + + lastc = c; + + /* mark start of chomp */ + if (is_block && !li->chomp_start && (unsigned int)col >= iter->chomp) { + li->chomp_start = ss; + + /* if the character at the chomp point is whitespace + * then we're indented + */ + li->indented = fy_is_ws(c); + +#if defined(DEBUG_CHUNK) + fprintf(stderr, "%s:%d chomp_start=%d\n", __FILE__, __LINE__, (int)(li->chomp_start - li->start)); +#endif + + } + + if (fy_is_lb_m(c, atom->lb_mode)) { + +#ifdef DEBUG_CHUNK + fprintf(stderr, "%s:%d lb=0x%x\n", __FILE__, __LINE__, c); +#endif + + col = 0; + if (!li->end) { + + li->end = ss; + li->end_ws = cws; + li->lb_end = true; + li->actual_lb = c; + +#ifdef DEBUG_CHUNK + fprintf(stderr, "%s:%d set actual_lb=0x%x\n", __FILE__, __LINE__, li->actual_lb); +#endif + + cws = 0; + } + + /* no chomp point hit, use whatever we have here */ + if (is_block && !li->chomp_start) { + li->chomp_start = ss; +#if defined(DEBUG_CHUNK) + fprintf(stderr, "%s:%d chomp_start=%d\n", __FILE__, __LINE__, (int)(li->chomp_start - li->start)); +#endif + + } + + if (!last_was_ws) { + cws = 0; + li->nws_end = ss; + last_was_ws = true; + } + + } else if (fy_is_space(c)) { + + col++; + cws++; + + if (!last_was_ws) { + li->nws_end = ss; + last_was_ws = true; + } + + } else if (fy_is_tab(c)) { + + bool can_be_nws_end; + + advws = ts - (col % ts); + col += advws; + +#if defined(DEBUG_CHUNK) + fprintf(stderr, "%s:%d tab col=%d chomp=%d\n", __FILE__, __LINE__, col, (int)(li->chomp_start - li->start)); +#endif + + if (fy_atom_style_is_block(atom->style) && col >= (int)(li->chomp_start - li->start)) { +#if defined(DEBUG_CHUNK) + fprintf(stderr, "%s:%d tab col=%d chomp=%d\n", __FILE__, __LINE__, col, (int)(li->chomp_start - li->start)); +#endif + goto do_nws; + } + + cws += advws; + + can_be_nws_end = true; + if (atom->style == FYAS_DOUBLE_QUOTED && ss > li->start && ss[-1] == '\\') { + can_be_nws_end = false; +#ifdef DEBUG_CHUNK + fprintf(stderr, "%s:%d backslashed tab\n", __FILE__, __LINE__); +#endif + } + + if (can_be_nws_end && !last_was_ws) { + li->nws_end = ss; + last_was_ws = true; + } + + } else { + + col++; +do_nws: + /* mark start of non whitespace */ + if (!li->nws_start) + li->nws_start = ss; + + if (li->empty) + li->empty = false; + + if (li->start_ws == (size_t)-1) + li->start_ws = cws; + + last_was_ws = false; + cws = 0; + } + + /* if we got both break */ + if (li->end && iter->chomp >= 0) + break; + } + li->e_tb = ss; + + li->final = c < 0; + + if (li->final && atom->ends_with_eof) { + + /* mark start of chomp */ + if (is_block && !li->chomp_start && (unsigned int)col >= iter->chomp) { + li->chomp_start = ss; + + /* if the character at the chomp point is whitespace + * then we're indented + */ + li->indented = fy_is_ws(lastc); +#ifdef DEBUG_CHUNK + fprintf(stderr, "%s:%d is_block && !li->chomp_start && (unsigned int)col >= iter->chomp\n", __FILE__, __LINE__); +#endif +#if defined(DEBUG_CHUNK) + fprintf(stderr, "%s:%d chomp_start=%d\n", __FILE__, __LINE__, (int)(li->chomp_start - li->start)); +#endif + + } + + if (!li->end) { + li->end = ss; + li->end_ws = cws; + li->lb_end = true; + +#ifdef DEBUG_CHUNK + fprintf(stderr, "%s:%d li->final && atom->ends_with_eof && !li->end\n", __FILE__, __LINE__); +#endif + cws = 0; + } + + /* no chomp point hit, use whatever we have here */ + if (is_block && !li->chomp_start) { + li->chomp_start = ss; +#ifdef DEBUG_CHUNK + fprintf(stderr, "%s:%d li->final && atom->ends_with_eof && !li->chomp_start\n", __FILE__, __LINE__); +#endif +#if defined(DEBUG_CHUNK) + fprintf(stderr, "%s:%d chomp_start=%d\n", __FILE__, __LINE__, (int)(li->chomp_start - li->start)); +#endif + } + + if (!last_was_ws) { + cws = 0; + li->nws_end = ss; + last_was_ws = true; +#ifdef DEBUG_CHUNK + fprintf(stderr, "%s:%d li->final && atom->ends_with_eof && !last_was_ws\n", __FILE__, __LINE__); +#endif + } + } + + if (!last_was_ws) + li->nws_end = ss; + + if (!li->nws_start) + li->nws_start = ss; + + if (!li->nws_end) + li->nws_end = ss; + + /* if we haven't hit the chomp, point use whatever we're now */ + if (is_block && !li->chomp_start) { + li->chomp_start = ss; +#if defined(DEBUG_CHUNK) + fprintf(stderr, "%s:%d chomp_start=%d\n", __FILE__, __LINE__, (int)(li->chomp_start - li->start)); +#endif + } + + if (li->start_ws == (size_t)-1) + li->start_ws = 0; + + if (li->end_ws == (size_t)-1) + li->end_ws = 0; + + /* mark next line to the end if no linebreak found */ + if (!li->end) { + li->end = iter->e; + li->last = true; + li->end_ws = cws; + li->lb_end = false; + goto out; + } + + /* find out if any trailing breaks exist afterwards */ + for (; (c = fy_utf8_get(ss, (e - ss), &w)) >= 0 && (fy_is_ws(c) || fy_is_lb_m(c, atom->lb_mode)); ss += w) { + + if (!li->trailing_breaks_ws && is_block && (unsigned int)col > iter->chomp) + li->trailing_breaks_ws = true; + + if (fy_is_lb_m(c, atom->lb_mode)) { + li->trailing_breaks++; + col = 0; + } else { + /* indented whitespace counts as break */ + if (fy_is_tab(c)) + col += (ts - (col % ts)); + else + col++; + } + + } + + /* and mark as last if only whitespace and breaks after this point */ + li->last = ss >= e; + +out: + assert(li->start); + assert(li->end); + assert(li->nws_start); + assert(li->nws_end); + assert(!is_block || li->chomp_start); + + li->ends_with_backslash = atom->style == FYAS_DOUBLE_QUOTED && + !li->empty && + (li->nws_end > li->nws_start && li->nws_end[-1] == '\\') && + ((li->nws_end - li->nws_start) <= 1 || li->nws_end[-2] != '\\'); +#ifdef DEBUG_CHUNK + fprintf(stderr, "%s:%d ends_with_backslash=%s\n", __FILE__, __LINE__, li->ends_with_backslash ? "true" : "false"); +#endif +} + +void fy_atom_iter_start(const struct fy_atom *atom, struct fy_atom_iter *iter) +{ + struct fy_atom_iter_line_info *li; + size_t len; + + if (!atom || !iter) + return; + + memset(iter, 0, sizeof(*iter)); + + iter->atom = atom; + iter->s = fy_atom_data(atom); + len = fy_atom_size(atom); + iter->e = iter->s + len; + + iter->chomp = atom->increment; + + /* default tab size is 8 */ + iter->tabsize = atom->tabsize ? atom->tabsize: 8; + + memset(iter->li, 0, sizeof(iter->li)); + li = &iter->li[1]; + fy_atom_iter_line_analyze(iter, li, iter->s, len); + li->first = true; + + /* if there's single quote at the start of a line ending the atom */ + iter->dangling_end_quote = atom->end_mark.column == 0; + iter->single_line = atom->start_mark.line == atom->end_mark.line; + iter->empty = atom->empty; + iter->last_ends_with_backslash = li->ends_with_backslash; +#ifdef DEBUG_CHUNK + fprintf(stderr, "%s:%d single_line=%s empty=%s last_ends_with_backslash=%s\n", __FILE__, __LINE__, + iter->single_line ? "true" : "false", + iter->empty ? "true" : "false", + iter->last_ends_with_backslash ? "true" : "false"); +#endif + + /* current is 0, next is 1 */ + iter->current = 0; + + iter->alloc = sizeof(iter->startup_chunks)/sizeof(iter->startup_chunks[0]); + iter->top = 0; + iter->read = 0; + iter->chunks = iter->startup_chunks; + + iter->done = false; + + iter->unget_c = -1; +} + +void fy_atom_iter_finish(struct fy_atom_iter *iter) +{ + if (iter->chunks && iter->chunks != iter->startup_chunks) + free(iter->chunks); + + iter->chunks = NULL; +} + +static const struct fy_atom_iter_line_info * +fy_atom_iter_line(struct fy_atom_iter *iter) +{ + const struct fy_atom *atom = iter->atom; + struct fy_atom_iter_line_info *li, *nli; + const char *ss; + + /* return while there's a next line */ + if (!iter) + return NULL; + + /* make next line the current one */ + iter->current = !iter->current; + + li = &iter->li[iter->current]; + + /* if we're out, we're out */ + if (li->start >= iter->e) + return NULL; + + /* scan next line (special handling for '\r\n') */ + ss = li->end; + if (ss < iter->e) { + if (*ss == '\r' && (ss + 1) < iter->e && ss[1] == '\n') + ss += 2; + else + ss += fy_utf8_width_by_first_octet((uint8_t)*ss); + } + + /* get current and next line */ + fy_atom_iter_line_analyze(iter, &iter->li[!iter->current], ss, iter->e - ss); + + /* if no more, mark the next as NULL */ + nli = &iter->li[!iter->current]; + if (nli->start >= iter->e) + nli = NULL; + + /* for quoted, output the white space start */ + if (atom->style == FYAS_SINGLE_QUOTED || atom->style == FYAS_DOUBLE_QUOTED) { + li->s = li->first ? li->start : li->nws_start; + li->e = li->last ? li->end : li->nws_end; + + /* just empty */ + if (li->empty && li->first && li->last && !iter->single_line) + li->s = li->e; + + } else if (atom->style == FYAS_LITERAL || atom->style == FYAS_FOLDED) { + li->s = li->chomp_start; + li->e = li->end; + if (li->empty && li->first && li->last) + li->s = li->e; + } else { + li->s = li->nws_start; + li->e = li->nws_end; + } + + /* bah, I hate this, */ + if (li->s > li->e) + li->s = li->e; + + assert(li->s <= li->e); + + /* we never fold LS or PS linebreaks (on yaml 1.1) */ + li->need_nl = fy_is_lb_LS_PS(li->actual_lb) && fy_is_lb_m(li->actual_lb, iter->atom->lb_mode) && !li->ends_with_backslash; + li->need_sep = false; + +#ifdef DEBUG_CHUNK + fprintf(stderr, "%s:%d need_nl=%s\n", __FILE__, __LINE__, li->need_nl ? "true" : "false"); +#endif + if (li->need_nl) + return li; + + switch (atom->style) { + case FYAS_PLAIN: + case FYAS_URI: + li->need_nl = !li->last && li->empty; + li->need_sep = !li->need_nl && nli && !nli->empty; + break; + + case FYAS_DOUBLE_QUOTED_MANUAL: + li->need_nl = false; + li->need_sep = false; + break; + + case FYAS_COMMENT: + li->need_nl = !li->final; + li->need_sep = false; + break; + + case FYAS_DOUBLE_QUOTED: +#ifdef DEBUG_CHUNK + fprintf(stderr, "%s:%d ends_with_backslash=%s\n", __FILE__, __LINE__, li->ends_with_backslash ? "true" : "false"); +#endif + if (li->ends_with_backslash) { + + li->need_nl = false; + li->need_sep = false; + break; + } + + /* fall-through */ + case FYAS_SINGLE_QUOTED: + li->need_nl = (!li->last && !li->first && li->empty) || + (nli && iter->empty && !li->first); + + if (li->need_nl) + break; + + li->need_sep = (nli && !nli->empty) || + (!nli && li->last && iter->dangling_end_quote) || + (nli && nli->final && nli->empty); + + break; + case FYAS_LITERAL: + li->need_nl = true; + break; + case FYAS_FOLDED: + li->need_nl = !li->last && (li->empty || li->indented || li->trailing_breaks_ws || (nli && nli->indented)); + if (li->need_nl) + break; + li->need_sep = nli && !nli->indented && !nli->empty; + break; + default: + break; + } + + return li; +} + +static int +fy_atom_iter_format(struct fy_atom_iter *iter) +{ + const struct fy_atom *atom = iter->atom; + const struct fy_atom_iter_line_info *li; + const char *s, *e, *t; + int value, code_length, rlen, ret; + uint8_t code[4], *tt; + int j, pending_nl; + int *pending_lb = NULL, *pending_lb_new = NULL; + int pending_lb_size = 0; + enum fy_utf8_escape esc_mode; + size_t i; + + /* done? */ + li = fy_atom_iter_line(iter); + if (!li) { + iter->done = true; + return 0; + } + if (iter->done) + return 0; + + s = li->s; + e = li->e; + + switch (atom->style) { + + case FYAS_LITERAL: + case FYAS_PLAIN: + case FYAS_FOLDED: + case FYAS_COMMENT: + if (s < e) { + ret = fy_atom_iter_add_chunk(iter, s, e - s); + if (ret) + goto out; + } + + break; + + case FYAS_SINGLE_QUOTED: + if (li->last) + e = li->nws_end; + while (s < e) { + /* find next single quote */ + t = memchr(s, '\'', e - s); + rlen = (t ? t : e) - s; + + ret = fy_atom_iter_add_chunk(iter, s, rlen); + if (ret) + goto out; + + /* end of string */ + if (!t) + break; + + s = t; + /* next character single quote too */ + if ((e - s) >= 2 && s[1] == '\'') + fy_atom_iter_add_chunk(iter, s, 1); + + /* skip over this single quote char */ + s++; + } + break; + + case FYAS_DOUBLE_QUOTED: + if (li->last) + e = li->nws_end; + + esc_mode = atom->json_mode ? fyue_doublequote_json : + atom->lb_mode == fylb_cr_nl ? fyue_doublequote : fyue_doublequote_yaml_1_1; + + while (s < e) { + /* find next escape */ + t = memchr(s, '\\', e - s); + /* copy up to there (or end) */ + rlen = (t ? t : e) - s; + ret = fy_atom_iter_add_chunk(iter, s, rlen); + if (ret) + goto out; + + if (!t || (e - t) < 2) + break; + + ret = fy_utf8_parse_escape(&t, e - t, esc_mode); + if (ret < 0) + goto out; + s = t; + value = ret; + + tt = fy_utf8_put(code, sizeof(code), value); + if (!tt) { + ret = -1; + goto out; + } + + ret = fy_atom_iter_add_chunk_copy(iter, (const char *)code, tt - code); + if (ret) + goto out; + } + break; + + case FYAS_URI: + while (s < e) { + /* find next escape */ + t = memchr(s, '%', e - s); + rlen = (t ? t : e) - s; + ret = fy_atom_iter_add_chunk(iter, s, rlen); + if (ret) + goto out; + + /* end of string */ + if (!t) + break; + s = t; + + code_length = sizeof(code); + t = fy_uri_esc(s, e - s, code, &code_length); + if (!t) { + ret = -1; + goto out; + } + + /* output escaped utf8 */ + ret = fy_atom_iter_add_chunk_copy(iter, (const char *)code, code_length); + if (ret) + goto out; + s = t; + } + break; + + case FYAS_DOUBLE_QUOTED_MANUAL: + /* manual scalar just goes out */ + ret = fy_atom_iter_add_chunk(iter, s, e - s); + if (ret) + goto out; + s = e; + break; + + default: + ret = -1; + goto out; + } + + if (li->last) { + + if (fy_atom_style_is_block(atom->style)) { + + switch (atom->chomp) { + case FYAC_STRIP: + case FYAC_CLIP: + + pending_lb_size = 16; + pending_lb = FY_ALLOCA(sizeof(*pending_lb) * pending_lb_size); + + pending_nl = 0; + if (!li->empty) { + pending_lb[0] = li->actual_lb > 0 ? li->actual_lb : '\n'; + pending_nl = 1; + } + while ((li = fy_atom_iter_line(iter)) != NULL) { + if (!iter->empty && li->chomp_start < li->end) { + for (j = 0; j < pending_nl; j++) { + ret = fy_atom_iter_add_lb(iter, pending_lb[j]); + if (ret) + goto out; + } + pending_nl = 0; + + ret = fy_atom_iter_add_chunk(iter, li->chomp_start, li->end - li->chomp_start); + if (ret) + goto out; + } + if (li->lb_end && !iter->empty) { + if (pending_nl >= pending_lb_size) { + pending_lb_new = FY_ALLOCA(sizeof(*pending_lb) * pending_lb_size * 2); + memcpy(pending_lb_new, pending_lb, sizeof(*pending_lb) * pending_lb_size); + pending_lb_size *= 2; + pending_lb = pending_lb_new; + } + pending_lb[pending_nl] = li->actual_lb > 0 ? li->actual_lb : '\n'; + pending_nl++; + } + } + + if (atom->chomp == FYAC_CLIP && (pending_nl || atom->ends_with_eof)) { + ret = fy_atom_iter_add_lb(iter, pending_lb[0]); + if (ret) + goto out; + } + break; + case FYAC_KEEP: + if (li->lb_end || atom->ends_with_eof) { + ret = fy_atom_iter_add_lb(iter, li->actual_lb > 0 ? li->actual_lb : '\n'); + if (ret) + goto out; + } + + /* nothing more if it's an EOF */ + if (atom->ends_with_eof && atom->empty) + break; + + while ((li = fy_atom_iter_line(iter)) != NULL) { + if (!iter->empty && li->chomp_start < li->end) { + ret = fy_atom_iter_add_chunk(iter, li->chomp_start, li->end - li->chomp_start); + if (ret) + goto out; + } + if (li->lb_end) { + ret = fy_atom_iter_add_lb(iter, li->actual_lb > 0 ? li->actual_lb : '\n'); + if (ret) + goto out; + } + } + break; + } + + iter->done = true; + + } else { + +#ifdef DEBUG_CHUNK + fprintf(stderr, "%s:%d trailing_breaks=%zu end_ws=%zu empty=%s\n", __func__, __LINE__, + li->trailing_breaks, li->end_ws, + li->empty ? "true" : "false"); +#endif + + if (li->trailing_breaks == 0 && li->end_ws > 0) { + /* end of quote in a non-blank line having white space */ + ret = fy_atom_iter_add_chunk(iter, li->nws_end, (size_t)(li->e - li->nws_end)); + if (ret) + goto out; + } else if (li->trailing_breaks == 1) { + + if (atom->style == FYAS_DOUBLE_QUOTED) { +#ifdef DEBUG_CHUNK + fprintf(stderr, "%s:%d %s <>%d single trailing break %c\n", __FILE__, __LINE__, __func__, + (int)(li->s_tb - li->s), li->s_tb[-1]); +#endif + } + + if (!li->ends_with_backslash) { + ret = fy_atom_iter_add_chunk(iter, " ", 1); + if (ret) + goto out; + } + } else if (li->trailing_breaks > 1) { + for (i = 0; i < li->trailing_breaks - 1; i++) { + ret = fy_atom_iter_add_lb(iter, '\n'); + if (ret) + goto out; + } + } + + iter->done = true; + } + + } else { + if (li->need_sep) { + ret = fy_atom_iter_add_chunk(iter, " ", 1); + if (ret) + goto out; + } + + if (li->need_nl) { + ret = fy_atom_iter_add_lb(iter, li->actual_lb > 0 ? li->actual_lb : '\n'); + if (ret) + goto out; + } + } + + /* got more */ + ret = 1; + +out: + return ret; +} + +const struct fy_iter_chunk * +fy_atom_iter_peek_chunk(struct fy_atom_iter *iter) +{ + if (iter->read >= iter->top) + return NULL; + + return &iter->chunks[iter->read].ic; +} + +void fy_atom_iter_advance(struct fy_atom_iter *iter, size_t len) +{ + struct fy_atom_iter_chunk *ac; + size_t rlen; + + /* while more and not out */ + while (len > 0 && iter->read < iter->top) { + + ac = iter->chunks + iter->read; + + /* get next run length */ + rlen = len > ac->ic.len ? ac->ic.len : len; + + /* remove from chunk */ + ac->ic.str += rlen; + ac->ic.len -= rlen; + + /* advance if out of data */ + if (ac->ic.len == 0) + iter->read++; + + /* remove run from length */ + len -= rlen; + } + + /* reset when everything is gone */ + if (iter->read >= iter->top) + fy_atom_iter_chunk_reset(iter); +} + +const struct fy_iter_chunk * +fy_atom_iter_chunk_next(struct fy_atom_iter *iter, const struct fy_iter_chunk *curr, int *errp) +{ + const struct fy_iter_chunk *ic; + int ret; + + ic = fy_atom_iter_peek_chunk(iter); + if (curr && curr == ic) + fy_atom_iter_advance(iter, ic->len); + + /* need to pull in data? */ + ic = fy_atom_iter_peek_chunk(iter); + if (!curr || !ic) { + fy_atom_iter_chunk_reset(iter); + + do { + ret = fy_atom_iter_format(iter); + + /* either end or error, means we don't have data */ + if (ret <= 0) { + if (errp) + *errp = ret < 0 ? -1 : 0; + return NULL; + } + + } while (!fy_atom_iter_peek_chunk(iter)); + } + ic = fy_atom_iter_peek_chunk(iter); + if (errp) + *errp = 0; + return ic; +} + +int fy_atom_format_text_length(struct fy_atom *atom) +{ + struct fy_atom_iter iter; + const struct fy_iter_chunk *ic; + size_t len; + int ret; + + if (!atom) + return -1; + + if (atom->storage_hint_valid) + return atom->storage_hint; + + len = 0; + fy_atom_iter_start(atom, &iter); + ic = NULL; + while ((ic = fy_atom_iter_chunk_next(&iter, ic, &ret)) != NULL) + len += ic->len; + fy_atom_iter_finish(&iter); + + /* something funky going on here */ + if ((int)len < 0) + return -1; + + if (ret != 0) + return ret; + + atom->storage_hint = (size_t)len; + atom->storage_hint_valid = true; + return (int)len; +} + +const char *fy_atom_format_text(struct fy_atom *atom, char *buf, size_t maxsz) +{ + struct fy_atom_iter iter; + const struct fy_iter_chunk *ic; + char *s, *e; + int ret; + + if (!atom || !buf) + return NULL; + + s = buf; + e = s + maxsz; + fy_atom_iter_start(atom, &iter); + ic = NULL; + while ((ic = fy_atom_iter_chunk_next(&iter, ic, &ret)) != NULL) { + /* must fit */ + if ((size_t)(e - s) < ic->len) + return NULL; + memcpy(s, ic->str, ic->len); + s += ic->len; + } + fy_atom_iter_finish(&iter); + + if (ret != 0 || s >= e) + return NULL; + *s = '\0'; + + return buf; +} + +int fy_atom_format_utf8_length(struct fy_atom *atom) +{ + struct fy_atom_iter iter; + const struct fy_iter_chunk *ic; + const char *s, *e; + size_t len; + int ret, rem, run, w; + + if (!atom) + return -1; + + len = 0; + rem = 0; + fy_atom_iter_start(atom, &iter); + ic = NULL; + while ((ic = fy_atom_iter_chunk_next(&iter, ic, &ret)) != NULL) { + s = ic->str; + e = s + ic->len; + + /* add the remainder */ + run = (e - s) > rem ? rem : (e - s); + s += run; + + /* count utf8 characters */ + while (s < e) { + w = fy_utf8_width_by_first_octet(*(uint8_t *)s); + + /* how many bytes of this run */ + run = (e - s) > w ? w : (e - s); + /* the remainder of this run */ + rem = w - run; + /* one more character */ + len++; + /* and advance */ + s += run; + } + } + fy_atom_iter_finish(&iter); + + /* something funky going on here */ + if ((int)len < 0) + return -1; + + if (ret != 0) + return ret; + + return (int)len; +} + + +struct fy_atom_iter * +fy_atom_iter_create(const struct fy_atom *atom) +{ + struct fy_atom_iter *iter; + + iter = malloc(sizeof(*iter)); + if (!iter) + return NULL; + if (atom) + fy_atom_iter_start(atom, iter); + else + memset(iter, 0, sizeof(*iter)); + return iter; +} + +void fy_atom_iter_destroy(struct fy_atom_iter *iter) +{ + if (!iter) + return; + + fy_atom_iter_finish(iter); + free(iter); +} + +ssize_t fy_atom_iter_read(struct fy_atom_iter *iter, void *buf, size_t count) +{ + ssize_t nread; + size_t nrun; + const struct fy_iter_chunk *ic; + int ret; + + if (!iter || !buf) + return -1; + + ret = 0; + nread = 0; + while (count > 0) { + ic = fy_atom_iter_peek_chunk(iter); + if (ic) { + nrun = count > ic->len ? ic->len : count; + memcpy(buf, ic->str, nrun); + nread += nrun; + count -= nrun; + fy_atom_iter_advance(iter, nrun); + continue; + } + + fy_atom_iter_chunk_reset(iter); + do { + ret = fy_atom_iter_format(iter); + + /* either end or error, means we don't have data */ + if (ret <= 0) + return ret == 0 ? nread : -1; + + } while (!fy_atom_iter_peek_chunk(iter)); + } + + return nread; +} + +int fy_atom_iter_getc(struct fy_atom_iter *iter) +{ + uint8_t buf; + ssize_t nread; + int c; + + if (!iter) + return -1; + + /* first try the pushed ungetc */ + if (iter->unget_c != -1) { + c = iter->unget_c; + iter->unget_c = -1; + return c & 0xff; + } + + /* read first octet */ + nread = fy_atom_iter_read(iter, &buf, 1); + if (nread != 1) + return -1; + + return (int)buf & 0xff; +} + +int fy_atom_iter_ungetc(struct fy_atom_iter *iter, int c) +{ + if (!iter) + return -1; + + if (iter->unget_c != -1) + return -1; + if (c == -1) { + iter->unget_c = -1; + return 0; + } + iter->unget_c = c & 0xff; + return c & 0xff; +} + +int fy_atom_iter_peekc(struct fy_atom_iter *iter) +{ + int c; + + c = fy_atom_iter_getc(iter); + if (c == -1) + return -1; + + return fy_atom_iter_ungetc(iter, c); +} + +int fy_atom_iter_utf8_get(struct fy_atom_iter *iter) +{ + uint8_t buf[4]; /* maximum utf8 is 4 octets */ + ssize_t nread; + int c, w; + + if (!iter) + return -1; + + /* first try the pushed ungetc */ + if (iter->unget_c != -1) { + c = iter->unget_c; + iter->unget_c = -1; + return c & 0xff; + } + + /* read first octet */ + nread = fy_atom_iter_read(iter, &buf[0], 1); + if (nread != 1) + return -1; + + /* get width from it (0 means illegal) */ + w = fy_utf8_width_by_first_octet(buf[0]); + if (!w) + return -1; + + /* read the rest octets (if possible) */ + if (w > 1) { + nread = fy_atom_iter_read(iter, buf + 1, w - 1); + if (nread != (w - 1)) + return -1; + } + + /* and return the decoded utf8 character */ + return fy_utf8_get(buf, w, &w); +} + +int fy_atom_iter_utf8_quoted_get(struct fy_atom_iter *iter, size_t *lenp, uint8_t *buf) +{ + ssize_t nread; + int c, w, ww; + + if (!iter || !lenp || !buf || *lenp < 4) + return -1; + + /* first try the pushed ungetc */ + if (iter->unget_c != -1) { + c = iter->unget_c; + iter->unget_c = -1; + *lenp = 0; + return c & 0xff; + } + + /* read first octet */ + nread = fy_atom_iter_read(iter, &buf[0], 1); + if (nread != 1) + return -1; + + /* get width from it (0 means illegal) - return it and mark it */ + w = fy_utf8_width_by_first_octet(buf[0]); + if (!w) { + *lenp = 1; + return 0; + } + + /* read the rest octets (if possible) */ + if (w > 1) { + nread = fy_atom_iter_read(iter, buf + 1, w - 1); + if (nread != (w - 1)) { + if (nread != -1 && nread < (w - 1)) + *lenp += nread; + return 0; + } + } + + /* and return the decoded utf8 character */ + c = fy_utf8_get(buf, w, &ww); + if (c >= 0) { + *lenp = 0; + return c; + } + *lenp = w; + return 0; +} + +int fy_atom_iter_utf8_unget(struct fy_atom_iter *iter, int c) +{ + if (iter->unget_c != -1) + return -1; + if (c == -1) { + iter->unget_c = -1; + return 0; + } + iter->unget_c = c; + return c; +} + +int fy_atom_iter_utf8_peek(struct fy_atom_iter *iter) +{ + int c; + + c = fy_atom_iter_utf8_get(iter); + if (c == -1) + return -1; + + return fy_atom_iter_utf8_unget(iter, c); +} + +int fy_atom_memcmp(struct fy_atom *atom, const void *ptr, size_t len) +{ + const char *dstr, *str; + size_t dlen, tlen; + struct fy_atom_iter iter; + int c, ct, ret; + + /* empty? just fine */ + if (!atom && !ptr && !len) + return 0; + + /* empty atom but not ptr */ + if (!atom && (ptr || len)) + return -1; + + /* non empty atom and empty ptr */ + if (atom && (!ptr || !len)) + return 1; + + /* direct output, nice */ + if (atom->direct_output) { + dlen = fy_atom_size(atom); + dstr = fy_atom_data(atom); + tlen = dlen > len ? len : dlen; + ret = memcmp(dstr, ptr, tlen); + if (ret) + return ret; + + return dlen == len ? 0 : len > dlen ? -1 : 1; + } + + str = ptr; + ct = -1; + fy_atom_iter_start(atom, &iter); + while ((c = fy_atom_iter_getc(&iter)) >= 0 && len) { + ct = *str & 0xff; + if (ct != c) + break; + str++; + len--; + } + fy_atom_iter_finish(&iter); + + /* out of data on both */ + if (c == -1 && !len) + return 0; + + return ct > c ? -1 : 1; +} + +int fy_atom_strcmp(struct fy_atom *atom, const char *str) +{ + size_t len; + + len = str ? strlen(str) : 0; + + return fy_atom_memcmp(atom, str, len); +} + +bool fy_atom_is_number(struct fy_atom *atom) +{ + struct fy_atom_iter iter; + int c, len, dec, fract, enot; + bool first_zero; + + /* empty? just fine */ + if (!atom || atom->size0) + return false; + + len = 0; + + fy_atom_iter_start(atom, &iter); + + /* skip minus sign if it's there */ + c = fy_atom_iter_peekc(&iter); + if (c == '-') { + (void)fy_atom_iter_getc(&iter); + len++; + } + + /* skip digits */ + first_zero = false; + dec = 0; + while ((c = fy_atom_iter_peekc(&iter)) >= 0 && isdigit(c)) { + if (dec == 0 && c == '0') + first_zero = true; + else if (dec == 1 && first_zero) + goto err_out; /* 0[0-9] is bad */ + (void)fy_atom_iter_getc(&iter); + dec++; + len++; + } + + /* no digits is bad */ + if (!dec) + goto err_out; + + fract = 0; + /* dot? */ + c = fy_atom_iter_peekc(&iter); + if (c == '.') { + + (void)fy_atom_iter_getc(&iter); + len++; + /* skip decimal part */ + while ((c = fy_atom_iter_peekc(&iter)) >= 0 && isdigit(c)) { + (void)fy_atom_iter_getc(&iter); + len++; + fract++; + } + + /* . without fractional */ + if (!fract) + goto err_out; + } + + enot = 0; + /* scientific notation */ + c = fy_atom_iter_peekc(&iter); + if (c == 'e' || c == 'E') { + (void)fy_atom_iter_getc(&iter); + len++; + + /* skip sign if it's there */ + c = fy_atom_iter_peekc(&iter); + if (c == '+' || c == '-') { + (void)fy_atom_iter_getc(&iter); + len++; + } + + /* skip exponent part */ + while ((c = fy_atom_iter_peekc(&iter)) >= 0 && isdigit(c)) { + (void)fy_atom_iter_getc(&iter); + len++; + enot++; + } + + if (!enot) + goto err_out; + } + + c = fy_atom_iter_peekc(&iter); + + fy_atom_iter_finish(&iter); + + /* everything must be consumed (and something must) */ + return c < 0 && len > 0; + +err_out: + fy_atom_iter_finish(&iter); + + return false; +} + +int fy_atom_cmp(struct fy_atom *atom1, struct fy_atom *atom2) +{ + struct fy_atom_iter iter1, iter2; + const char *d1, *d2; + size_t l1, l2, l; + int c1, c2, ret; + + /* handles NULL case too */ + if (atom1 == atom2) + return true; + + /* either null, can't do */ + if (!atom1 || !atom2) + return false; + + /* direct output? */ + if (atom1->direct_output) { + d1 = fy_atom_data(atom1); + l1 = fy_atom_size(atom1); + } else { + d1 = NULL; + l1 = 0; + } + if (atom2->direct_output) { + d2 = fy_atom_data(atom2); + l2 = fy_atom_size(atom2); + } else { + d2 = NULL; + l2 = 0; + } + + /* we have both atoms with direct output */ + if (d1 && d2) { + l = l1 > l2 ? l2 : l1; + ret = memcmp(d1, d2, l); + if (ret) + return ret; + return l1 == l2 ? 0 : l2 > l1 ? -1 : 1; + } + + /* only atom2 is direct */ + if (d2) + return fy_atom_memcmp(atom1, d2, l2); + + /* only atom1 is direct, (note reversing sign) */ + if (d1) + return -fy_atom_memcmp(atom2, d1, l1); + + /* neither is direct, do it with iterators */ + fy_atom_iter_start(atom1, &iter1); + fy_atom_iter_start(atom2, &iter2); + do { + c1 = fy_atom_iter_getc(&iter1); + c2 = fy_atom_iter_getc(&iter2); + } while (c1 == c2 && c1 >= 0 && c2 >= 0); + fy_atom_iter_finish(&iter2); + fy_atom_iter_finish(&iter1); + + if (c1 == -1 && c2 == -1) + return 0; + + return c2 > c1 ? -1 : 1; +} + +const struct fy_raw_line * +fy_atom_raw_line_iter_next(struct fy_atom_raw_line_iter *iter) +{ + struct fy_raw_line *l; + int c, w, col, col8, count; + unsigned int ts; + const char *s; + + if (!iter || !iter->rs || iter->rs > iter->ae) + return NULL; + + l = &iter->line; + + ts = iter->atom->tabsize; + + /* track back to the start of the line */ + s = iter->rs; + + /* we allow a single zero size iteration */ + if (l->lineno > 0 && iter->rs >= iter->ae) + return NULL; + + while (s > iter->is) { + c = fy_utf8_get_right(iter->is, (int)(s - iter->is), &w); + if (c <= 0 || fy_is_lb_m(c, iter->atom->lb_mode)) + break; + s -= w; + } + + l->line_start = s; + col = col8 = 0; + count = 0; + c = -1; + w = 0; + + /* track until the start of the content */ + while (s < iter->as) { + c = fy_utf8_get(s, (int)(iter->ae - s), &w); + /* we should never hit that */ + if (c <= 0) + return NULL; + if (fy_is_tab(c)) { + col8 += (8 - (col8 % 8)); + if (ts) + col += (ts - (col % ts)); + else + col++; + } else if (!fy_is_lb_m(c, iter->atom->lb_mode)) { + col++; + col8++; + } else + return NULL; + count++; + + s += w; + } + /* mark start of content */ + l->content_start = s; + l->content_start_col = col; + l->content_start_col8 = col8; + l->content_start_count = count; + + /* track until the end of the content (or lb) */ + while (s < iter->ae) { + c = fy_utf8_get(s, (int)(iter->ae - s), &w); + /* we should never hit that */ + if (c <= 0) + return NULL; + if (fy_is_tab(c)) { + col8 += (8 - (col8 % 8)); + if (ts) + col += (ts - (col % ts)); + else + col++; + } else if (!fy_is_lb_m(c, iter->atom->lb_mode)) { + col++; + col8++; + } else + break; + count++; + + s += w; + } + + l->content_len = (size_t)(s - l->content_start); + l->content_count = count - l->content_start_count; + l->content_end_col = col; + l->content_end_col8 = col8; + + /* if the stop was due to end of the atom */ + if (s >= iter->ae) { + while (s < iter->ie) { + c = fy_utf8_get(s, (int)(iter->ie - s), &w); + /* just end of input */ + if (c <= 0) + break; + + if (fy_is_tab(c)) { + col8 += (8 - (col8 % 8)); + if (ts) + col += (ts - (col % ts)); + else + col++; + } else if (!fy_is_lb_m(c, iter->atom->lb_mode)) { + col++; + col8++; + } else + break; + count++; + + s += w; + } + } + + l->line_len = (size_t)(s - l->line_start); + l->line_count = count; + + if (fy_is_lb_m(c, iter->atom->lb_mode)) { + s += w; + /* special case for MSDOS */ + if (c == '\r' && (s < iter->ie && s[1] == '\n')) + s++; + /* len_lb includes the lb */ + l->line_len_lb = (size_t)(s - l->line_start); + } else + l->line_len_lb = l->line_len; + + /* start at line #1 */ + l->lineno++; + + iter->rs = s; + + return l; +} + +void fy_atom_raw_line_iter_start(const struct fy_atom *atom, + struct fy_atom_raw_line_iter *iter) +{ + struct fy_input *fyi; + + if (!atom || !iter) + return; + + memset(iter, 0, sizeof(*iter)); + + fyi = atom->fyi; + if (!fyi) + return; + + iter->atom = atom; + + iter->as = fy_atom_data(atom); + iter->ae = iter->as + fy_atom_size(atom); + + iter->is = fy_input_start(fyi); + iter->ie = iter->is + fy_input_size(fyi); + + iter->rs = iter->as; +} + +void fy_atom_raw_line_iter_finish(struct fy_atom_raw_line_iter *iter) +{ + /* nothing */ +} diff --git a/contrib/libs/libfyaml/src/lib/fy-atom.h b/contrib/libs/libfyaml/src/lib/fy-atom.h new file mode 100644 index 0000000000..2a8b563323 --- /dev/null +++ b/contrib/libs/libfyaml/src/lib/fy-atom.h @@ -0,0 +1,330 @@ +/* + * fy-atom.h - internal YAML atom methods + * + * Copyright (c) 2019 Pantelis Antoniou <pantelis.antoniou@konsulko.com> + * + * SPDX-License-Identifier: MIT + */ + +#ifndef FY_ATOM_H +#define FY_ATOM_H + +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif + +#include <stdint.h> +#include <stdbool.h> +#include <stdio.h> +#include <stdarg.h> + +#include <libfyaml.h> + +#include "fy-list.h" +#include "fy-input.h" + +struct fy_reader; +struct fy_input; +struct fy_node; + +enum fy_atom_style { + /* YAML atoms */ + FYAS_PLAIN, + FYAS_SINGLE_QUOTED, + FYAS_DOUBLE_QUOTED, + FYAS_LITERAL, + FYAS_FOLDED, + FYAS_URI, /* special style for URIs */ + FYAS_DOUBLE_QUOTED_MANUAL, + FYAS_COMMENT /* (possibly multi line) comment */ +}; + +static inline bool fy_atom_style_is_quoted(enum fy_atom_style style) +{ + return style == FYAS_SINGLE_QUOTED || style == FYAS_DOUBLE_QUOTED; +} + +static inline bool fy_atom_style_is_block(enum fy_atom_style style) +{ + return style == FYAS_LITERAL || style == FYAS_FOLDED; +} + +enum fy_atom_chomp { + FYAC_STRIP, + FYAC_CLIP, + FYAC_KEEP, +}; + +struct fy_atom { + struct fy_mark start_mark; + struct fy_mark end_mark; + size_t storage_hint; /* guaranteed to fit in this amount of bytes */ + struct fy_input *fyi; /* input on which atom is on */ + uint64_t fyi_generation; /* to detect reallocs */ + unsigned int increment; + union { + uint64_t tozero; /* fast way to zero everything here */ + struct { + /* save a little bit of space with bitfields */ + enum fy_atom_style style : 8; /* note that it's a big perf win for bytes */ + enum fy_atom_chomp chomp : 8; + unsigned int tabsize : 8; + enum fy_lb_mode lb_mode : 1; + enum fy_flow_ws_mode fws_mode : 1; + bool direct_output : 1; /* can directly output */ + bool storage_hint_valid : 1; + bool empty : 1; /* atom contains whitespace and linebreaks only if length > 0 */ + bool has_lb : 1; /* atom contains at least one linebreak */ + bool has_ws : 1; /* atom contains at least one whitespace */ + bool starts_with_ws : 1; /* atom starts with whitespace */ + bool starts_with_lb : 1; /* atom starts with linebreak */ + bool ends_with_ws : 1; /* atom ends with whitespace */ + bool ends_with_lb : 1; /* atom ends with linebreak */ + bool trailing_lb : 1; /* atom ends with trailing linebreaks > 1 */ + bool size0 : 1; /* atom contains absolutely nothing */ + bool valid_anchor : 1; /* atom is a valid anchor */ + bool json_mode : 1; /* atom was read in json mode */ + bool ends_with_eof : 1; /* atom ends at EOF of input */ + }; + }; +}; + +static inline bool fy_atom_is_set(const struct fy_atom *atom) +{ + return atom && atom->fyi; +} + +static inline void fy_atom_reset(struct fy_atom *atom) +{ + if (atom) + atom->fyi = NULL; +} + +static inline bool fy_atom_json_mode(struct fy_atom *handle) +{ + if (!handle) + return false; + + return handle->json_mode; +} + +static inline enum fy_lb_mode fy_atom_lb_mode(struct fy_atom *handle) +{ + if (!handle) + return fylb_cr_nl; + + return handle->lb_mode; +} + +static inline enum fy_flow_ws_mode fy_atom_flow_ws_mode(struct fy_atom *handle) +{ + if (!handle) + return fyfws_space_tab; + + return handle->fws_mode; +} + +/* all atoms are scalars so... */ +static inline bool fy_atom_is_lb(struct fy_atom *handle, int c) +{ + return fy_is_generic_lb_m(c, fy_atom_lb_mode(handle)); +} + +static inline bool fy_atom_is_flow_ws(struct fy_atom *handle, int c) +{ + return fy_is_flow_ws_m(c, fy_atom_flow_ws_mode(handle)); +} + +int fy_atom_format_text_length(struct fy_atom *atom); +const char *fy_atom_format_text(struct fy_atom *atom, char *buf, size_t maxsz); + +int fy_atom_format_utf8_length(struct fy_atom *atom); + +static inline void +fy_reader_fill_atom_start(struct fy_reader *fyr, struct fy_atom *handle) +{ + /* start mark */ + fy_reader_get_mark(fyr, &handle->start_mark); + handle->fyi = fy_reader_current_input(fyr); + handle->fyi_generation = fy_reader_current_input_generation(fyr); + + handle->increment = 0; + handle->tozero = 0; + + /* note that handle->data may be zero for empty input */ +} + +static inline void +fy_reader_fill_atom_end_at(struct fy_reader *fyr, struct fy_atom *handle, struct fy_mark *end_mark) +{ + if (end_mark) + handle->end_mark = *end_mark; + else + fy_reader_get_mark(fyr, &handle->end_mark); + + /* default is plain, modify at return */ + handle->style = FYAS_PLAIN; + handle->chomp = FYAC_CLIP; + /* by default we don't do storage hints, it's the job of the caller */ + handle->storage_hint = 0; + handle->storage_hint_valid = false; + handle->tabsize = fy_reader_tabsize(fyr); + handle->json_mode = fy_reader_json_mode(fyr); + handle->lb_mode = fy_reader_lb_mode(fyr); + handle->fws_mode = fy_reader_flow_ws_mode(fyr); +} + +static inline void +fy_reader_fill_atom_end(struct fy_reader *fyr, struct fy_atom *handle) +{ + fy_reader_fill_atom_end_at(fyr, handle, NULL); +} + +struct fy_atom *fy_reader_fill_atom(struct fy_reader *fyr, int advance, struct fy_atom *handle); +struct fy_atom *fy_reader_fill_atom_mark(struct fy_reader *fyr, const struct fy_mark *start_mark, + const struct fy_mark *end_mark, struct fy_atom *handle); +struct fy_atom *fy_reader_fill_atom_at(struct fy_reader *fyr, int advance, int count, struct fy_atom *handle); + +#define fy_reader_fill_atom_a(_fyr, _advance) fy_reader_fill_atom((_fyr), (_advance), FY_ALLOCA(sizeof(struct fy_atom))) + +struct fy_atom *fy_fill_node_atom(struct fy_node *fyn, struct fy_atom *handle); + +#define fy_fill_node_atom_a(_fyn) fy_fill_node_atom((_fyn), FY_ALLOCA(sizeof(struct fy_atom))) + +struct fy_atom_iter_line_info { + const char *start; + const char *end; + const char *nws_start; + const char *nws_end; + const char *chomp_start; + bool empty : 1; + bool trailing_breaks_ws : 1; + bool first : 1; /* first */ + bool last : 1; /* last (only ws/lb afterwards */ + bool final : 1; /* the final iterator */ + bool indented : 1; + bool lb_end : 1; + bool need_nl : 1; + bool need_sep : 1; + bool ends_with_backslash : 1; /* last ended in \\ */ + size_t trailing_ws; + size_t trailing_breaks; + size_t start_ws, end_ws; + const char *s; + const char *e; + int actual_lb; /* the line break */ + const char *s_tb; /* start of trailing breaks run */ + const char *e_tb; /* end of trailing breaks run */ +}; + +struct fy_atom_iter_chunk { + struct fy_iter_chunk ic; + /* note that it is guaranteed for copied chunks to be + * less or equal to 10 characters (the maximum digitbuf + * for double quoted escapes */ + char inplace_buf[10]; /* small copies in place */ +}; + +#define NR_STARTUP_CHUNKS 8 +#define SZ_STARTUP_COPY_BUFFER 32 + +struct fy_atom_iter { + const struct fy_atom *atom; + const char *s, *e; + unsigned int chomp; + int tabsize; + bool single_line : 1; + bool dangling_end_quote : 1; + bool last_ends_with_backslash : 1; + bool empty : 1; + bool current : 1; + bool done : 1; /* last iteration (for block styles) */ + struct fy_atom_iter_line_info li[2]; + unsigned int alloc; + unsigned int top; + unsigned int read; + struct fy_atom_iter_chunk *chunks; + struct fy_atom_iter_chunk startup_chunks[NR_STARTUP_CHUNKS]; + int unget_c; +}; + +void fy_atom_iter_start(const struct fy_atom *atom, struct fy_atom_iter *iter); +void fy_atom_iter_finish(struct fy_atom_iter *iter); +const struct fy_iter_chunk *fy_atom_iter_peek_chunk(struct fy_atom_iter *iter); +const struct fy_iter_chunk *fy_atom_iter_chunk_next(struct fy_atom_iter *iter, const struct fy_iter_chunk *curr, int *errp); +void fy_atom_iter_advance(struct fy_atom_iter *iter, size_t len); + +struct fy_atom_iter *fy_atom_iter_create(const struct fy_atom *atom); +void fy_atom_iter_destroy(struct fy_atom_iter *iter); +ssize_t fy_atom_iter_read(struct fy_atom_iter *iter, void *buf, size_t count); +int fy_atom_iter_getc(struct fy_atom_iter *iter); +int fy_atom_iter_ungetc(struct fy_atom_iter *iter, int c); +int fy_atom_iter_peekc(struct fy_atom_iter *iter); +int fy_atom_iter_utf8_get(struct fy_atom_iter *iter); +int fy_atom_iter_utf8_quoted_get(struct fy_atom_iter *iter, size_t *lenp, uint8_t *buf); +int fy_atom_iter_utf8_unget(struct fy_atom_iter *iter, int c); +int fy_atom_iter_utf8_peek(struct fy_atom_iter *iter); + +int fy_atom_memcmp(struct fy_atom *atom, const void *ptr, size_t len); +int fy_atom_strcmp(struct fy_atom *atom, const char *str); +bool fy_atom_is_number(struct fy_atom *atom); +int fy_atom_cmp(struct fy_atom *atom1, struct fy_atom *atom2); + +static inline const char *fy_atom_data(const struct fy_atom *atom) +{ + if (!atom) + return NULL; + + return (char *)fy_input_start(atom->fyi) + atom->start_mark.input_pos; +} + +static inline size_t fy_atom_size(const struct fy_atom *atom) +{ + if (!atom) + return 0; + + return atom->end_mark.input_pos - atom->start_mark.input_pos; +} + +static inline bool fy_plain_atom_streq(const struct fy_atom *atom, const char *str) +{ + size_t size = strlen(str); + + if (!atom || !str || atom->style != FYAS_PLAIN || fy_atom_size(atom) != size) + return false; + + return !memcmp(str, fy_atom_data(atom), size); +} + +struct fy_raw_line { + int lineno; + const char *line_start; + size_t line_len; + size_t line_len_lb; + size_t line_count; + const char *content_start; + size_t content_len; + size_t content_start_count; + size_t content_count; + int content_start_col; + int content_start_col8; /* this is the tab 8 */ + int content_end_col; + int content_end_col8; +}; + +struct fy_atom_raw_line_iter { + const struct fy_atom *atom; + const char *is, *ie; /* input start, end */ + const char *as, *ae; /* atom start, end */ + const char *rs; + struct fy_raw_line line; +}; + +void fy_atom_raw_line_iter_start(const struct fy_atom *atom, + struct fy_atom_raw_line_iter *iter); +void fy_atom_raw_line_iter_finish(struct fy_atom_raw_line_iter *iter); + +const struct fy_raw_line * +fy_atom_raw_line_iter_next(struct fy_atom_raw_line_iter *iter); + +#endif diff --git a/contrib/libs/libfyaml/src/lib/fy-composer.c b/contrib/libs/libfyaml/src/lib/fy-composer.c new file mode 100644 index 0000000000..2202497d69 --- /dev/null +++ b/contrib/libs/libfyaml/src/lib/fy-composer.c @@ -0,0 +1,356 @@ +/* + * fy-composer.c - Composer support + * + * Copyright (c) 2021 Pantelis Antoniou <pantelis.antoniou@konsulko.com> + * + * SPDX-License-Identifier: MIT + */ +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif + +#include <stdio.h> +#include <string.h> +#include <assert.h> +#include <stdlib.h> +#include <errno.h> +#include <stdarg.h> + +#include <libfyaml.h> + +#include "fy-parse.h" +#include "fy-doc.h" + +#include "fy-utils.h" + +struct fy_composer * +fy_composer_create(struct fy_composer_cfg *cfg) +{ + struct fy_composer *fyc; + struct fy_path *fypp; + + /* verify configuration and mandatory ops */ + if (!cfg || !cfg->ops || + !cfg->ops->process_event) + return NULL; + + fyc = malloc(sizeof(*fyc)); + if (!fyc) + return NULL; + memset(fyc, 0, sizeof(*fyc)); + fyc->cfg = *cfg; + + fy_path_list_init(&fyc->paths); + fypp = fy_path_create(); + if (!fypp) + goto err_no_path; + fy_path_list_add_tail(&fyc->paths, fypp); + + return fyc; + +err_no_path: + free(fyc); + return NULL; +} + +void fy_composer_destroy(struct fy_composer *fyc) +{ + struct fy_path *fypp; + + if (!fyc) + return; + + fy_diag_unref(fyc->cfg.diag); + while ((fypp = fy_path_list_pop(&fyc->paths)) != NULL) + fy_path_destroy(fypp); + free(fyc); +} + +static enum fy_composer_return +fy_composer_process_event_private(struct fy_composer *fyc, struct fy_event *fye, struct fy_path *fypp) +{ + const struct fy_composer_ops *ops; + struct fy_eventp *fyep; + struct fy_path_component *fypc, *fypc_last; + struct fy_path *fyppt; + struct fy_document *fyd; + bool is_collection, is_map, is_start, is_end; + int rc = 0; + enum fy_composer_return ret; + bool stop_req = false; + + assert(fyc); + assert(fye); + assert(fypp); + + fyep = fy_container_of(fye, struct fy_eventp, e); + + ops = fyc->cfg.ops; + assert(ops); + + rc = 0; + + switch (fye->type) { + case FYET_MAPPING_START: + is_collection = true; + is_start = true; + is_end = false; + is_map = true; + break; + + case FYET_MAPPING_END: + is_collection = true; + is_start = false; + is_end = true; + is_map = true; + break; + + case FYET_SEQUENCE_START: + is_collection = true; + is_start = true; + is_end = false; + is_map = false; + break; + + case FYET_SEQUENCE_END: + is_collection = true; + is_start = false; + is_end = true; + is_map = false; + break; + + case FYET_SCALAR: + is_collection = false; + is_start = true; + is_end = true; + is_map = false; + break; + + case FYET_ALIAS: + is_collection = false; + is_start = true; + is_end = true; + is_map = false; + break; + + case FYET_STREAM_START: + case FYET_STREAM_END: + case FYET_DOCUMENT_START: + case FYET_DOCUMENT_END: + return ops->process_event(fyc, fypp, fye); + + default: + return FYCR_OK_CONTINUE; + } + + fypc_last = fy_path_component_list_tail(&fypp->components); + + if (fy_path_component_is_mapping(fypc_last) && fypc_last->map.accumulating_complex_key) { + + /* get the next one */ + fyppt = fy_path_next(&fyc->paths, fypp); + assert(fyppt); + assert(fyppt != fypp); + assert(fyppt->parent == fypp); + + /* and pass along */ + ret = fy_composer_process_event_private(fyc, fye, fyppt); + if (!fy_composer_return_is_ok(ret)) { + /* XXX TODO handle skip */ + return ret; + } + if (!stop_req) + stop_req = ret == FYCR_OK_STOP; + + rc = fy_document_builder_process_event(fypp->fydb, fyep); + if (rc == 0) + return FYCR_OK_CONTINUE; + fyc_error_check(fyc, rc > 0, err_out, + "fy_document_builder_process_event() failed\n"); + + /* get the document */ + fyd = fy_document_builder_take_document(fypp->fydb); + fyc_error_check(fyc, fyd, err_out, + "fy_document_builder_take_document() failed\n"); + + fypc_last->map.is_complex_key = true; + fypc_last->map.accumulating_complex_key = false; + fypc_last->map.complex_key = fyd; + fypc_last->map.has_key = true; + fypc_last->map.await_key = false; + fypc_last->map.complex_key_complete = true; + fypc_last->map.root = false; + + fyppt = fy_path_list_pop_tail(&fyc->paths); + assert(fyppt); + + fy_path_destroy(fyppt); + + fyc_error_check(fyc, rc >= 0, err_out, + "fy_path_component_build_text() failed\n"); + + return !stop_req ? FYCR_OK_CONTINUE : FYCR_OK_STOP; + } + + /* start of something on a mapping */ + if (is_start && fy_path_component_is_mapping(fypc_last) && fypc_last->map.await_key && is_collection) { + + /* the configuration must support a document builder for complex keys */ + FYC_TOKEN_ERROR_CHECK(fyc, fy_event_get_token(fye), FYEM_DOC, + ops->create_document_builder, err_out, + "composer configuration does not support complex keys"); + + /* call out for creating the document builder */ + fypp->fydb = ops->create_document_builder(fyc); + fyc_error_check(fyc, fypp->fydb, err_out, + "ops->create_document_builder() failed\n"); + + /* and pass the current event; must return 0 since we know it's a collection start */ + rc = fy_document_builder_process_event(fypp->fydb, fyep); + fyc_error_check(fyc, !rc, err_out, + "fy_document_builder_process_event() failed\n"); + + fypc_last->map.is_complex_key = true; + fypc_last->map.accumulating_complex_key = true; + fypc_last->map.complex_key = NULL; + fypc_last->map.complex_key_complete = false; + + /* create new path */ + fyppt = fy_path_create(); + fyc_error_check(fyc, fyppt, err_out, + "fy_path_create() failed\n"); + + /* append it to the end */ + fyppt->parent = fypp; + fy_path_list_add_tail(&fyc->paths, fyppt); + + /* and pass along */ + ret = fy_composer_process_event_private(fyc, fye, fyppt); + if (!fy_composer_return_is_ok(ret)) { + /* XXX TODO handle skip */ + return ret; + } + if (!stop_req) + stop_req = ret == FYCR_OK_STOP; + + return !stop_req ? FYCR_OK_CONTINUE : FYCR_OK_STOP; + } + + if (is_start && fy_path_component_is_sequence(fypc_last)) { /* start in a sequence */ + + if (fypc_last->seq.idx < 0) + fypc_last->seq.idx = 0; + else + fypc_last->seq.idx++; + } + + if (is_collection && is_start) { + + /* collection start */ + if (is_map) { + fypc = fy_path_component_create_mapping(fypp); + fyc_error_check(fyc, fypc, err_out, + "fy_path_component_create_mapping() failed\n"); + } else { + fypc = fy_path_component_create_sequence(fypp); + fyc_error_check(fyc, fypc, err_out, + "fy_path_component_create_sequence() failed\n"); + } + + /* append to the tail */ + fy_path_component_list_add_tail(&fypp->components, fypc); + + } else if (is_collection && is_end) { + + /* collection end */ + assert(fypc_last); + fy_path_component_clear_state(fypc_last); + + } else if (!is_collection && fy_path_component_is_mapping(fypc_last) && fypc_last->map.await_key) { + + fypc_last->map.is_complex_key = false; + fypc_last->map.scalar.tag = fy_token_ref(fy_event_get_tag_token(fye)); + fypc_last->map.scalar.key = fy_token_ref(fy_event_get_token(fye)); + fypc_last->map.has_key = true; + fypc_last->map.root = false; + + } + + /* process the event */ + ret = ops->process_event(fyc, fypp, fye); + if (!fy_composer_return_is_ok(ret)) { + /* XXX TODO handle skip */ + return ret; + } + if (!stop_req) + stop_req = ret == FYCR_OK_STOP; + + if (is_collection && is_end) { + /* for the end of a collection, pop the last component */ + fypc = fy_path_component_list_pop_tail(&fypp->components); + assert(fypc); + + assert(fypc == fypc_last); + + fy_path_component_recycle(fypp, fypc); + + /* and get the new last */ + fypc_last = fy_path_component_list_tail(&fypp->components); + } + + /* at the end of something */ + if (is_end && fy_path_component_is_mapping(fypc_last)) { + if (!fypc_last->map.await_key) { + fy_path_component_clear_state(fypc_last); + fypc_last->map.await_key = true; + } else + fypc_last->map.await_key = false; + } + + return !stop_req ? FYCR_OK_CONTINUE : FYCR_OK_STOP; + +err_out: + return FYCR_ERROR; +} + +enum fy_composer_return +fy_composer_process_event(struct fy_composer *fyc, struct fy_event *fye) +{ + struct fy_path *fypp; + int rc; + + if (!fyc || !fye) + return -1; + + /* start at the head */ + fypp = fy_path_list_head(&fyc->paths); + + /* no top? something's very out of order */ + if (!fypp) + return -1; + + rc = fy_composer_process_event_private(fyc, fye, fypp); + + return rc; +} + +struct fy_composer_cfg *fy_composer_get_cfg(struct fy_composer *fyc) +{ + if (!fyc) + return NULL; + return &fyc->cfg; +} + +void *fy_composer_get_cfg_userdata(struct fy_composer *fyc) +{ + if (!fyc) + return NULL; + return fyc->cfg.userdata; +} + +struct fy_diag *fy_composer_get_diag(struct fy_composer *fyc) +{ + if (!fyc) + return NULL; + return fyc->cfg.diag; +} diff --git a/contrib/libs/libfyaml/src/lib/fy-composer.h b/contrib/libs/libfyaml/src/lib/fy-composer.h new file mode 100644 index 0000000000..d690f22622 --- /dev/null +++ b/contrib/libs/libfyaml/src/lib/fy-composer.h @@ -0,0 +1,57 @@ +/* + * fy-composer.h - YAML composer + * + * Copyright (c) 2021 Pantelis Antoniou <pantelis.antoniou@konsulko.com> + * + * SPDX-License-Identifier: MIT + */ +#ifndef FY_COMPOSER_H +#define FY_COMPOSER_H + +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif + +#include <stdbool.h> + +#include <libfyaml.h> + +#include "fy-list.h" +#include "fy-typelist.h" + +#include "fy-emit-accum.h" +#include "fy-path.h" + +struct fy_composer; +struct fy_token; +struct fy_diag; +struct fy_event; +struct fy_eventp; +struct fy_document_builder; + +struct fy_composer_ops { + /* single process event callback */ + enum fy_composer_return (*process_event)(struct fy_composer *fyc, struct fy_path *path, struct fy_event *fye); + struct fy_document_builder *(*create_document_builder)(struct fy_composer *fyc); +}; + +struct fy_composer_cfg { + const struct fy_composer_ops *ops; + void *userdata; + struct fy_diag *diag; +}; + +struct fy_composer { + struct fy_composer_cfg cfg; + struct fy_path_list paths; +}; + +struct fy_composer *fy_composer_create(struct fy_composer_cfg *cfg); +void fy_composer_destroy(struct fy_composer *fyc); +int fy_composer_process_event(struct fy_composer *fyc, struct fy_event *fye); + +struct fy_composer_cfg *fy_composer_get_cfg(struct fy_composer *fyc); +void *fy_composer_get_cfg_userdata(struct fy_composer *fyc); +struct fy_diag *fy_composer_get_diag(struct fy_composer *fyc); + +#endif diff --git a/contrib/libs/libfyaml/src/lib/fy-ctype.c b/contrib/libs/libfyaml/src/lib/fy-ctype.c new file mode 100644 index 0000000000..560fec63fa --- /dev/null +++ b/contrib/libs/libfyaml/src/lib/fy-ctype.c @@ -0,0 +1,66 @@ +/* + * fy-ctype.c - ctype utilities + * + * Copyright (c) 2019 Pantelis Antoniou <pantelis.antoniou@konsulko.com> + * + * SPDX-License-Identifier: MIT + * + */ +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif + +#include <libfyaml.h> + +#include "fy-ctype.h" + +const char *fy_uri_esc(const char *s, size_t len, uint8_t *code, int *code_len) +{ + const char *e = s + len; + int j, k, width; + uint8_t octet; + char c; + + width = 0; + k = 0; + do { + /* check for enough space for %XX */ + if ((e - s) < 3) + return NULL; + + /* if more than one run, expect '%' */ + if (s[0] != '%') + return NULL; + + octet = 0; + for (j = 0; j < 2; j++) { + c = s[1 + j]; + octet <<= 4; + if (c >= '0' && c <= '9') + octet |= c - '0'; + else if (c >= 'a' && c <= 'f') + octet |= 10 + c - 'a'; + else + octet |= 10 + c - 'A'; + } + if (!width) { + width = fy_utf8_width_by_first_octet(octet); + if (!width) + return NULL; + k = 0; + } + if (k >= *code_len) + return NULL; + + code[k++] = octet; + + /* skip over the 3 character escape */ + s += 3; + + } while (--width > 0); + + *code_len = k; + + return s; +} + diff --git a/contrib/libs/libfyaml/src/lib/fy-ctype.h b/contrib/libs/libfyaml/src/lib/fy-ctype.h new file mode 100644 index 0000000000..8a8e7fb907 --- /dev/null +++ b/contrib/libs/libfyaml/src/lib/fy-ctype.h @@ -0,0 +1,339 @@ +/* + * fy-ctype.h - ctype like macros header + * + * Copyright (c) 2019 Pantelis Antoniou <pantelis.antoniou@konsulko.com> + * + * SPDX-License-Identifier: MIT + */ +#ifndef FY_CTYPE_H +#define FY_CTYPE_H + +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif + +#include <stdint.h> +#include <stdlib.h> +#include <string.h> +#include <assert.h> + +#include <libfyaml.h> + +#include "fy-utf8.h" + +enum fy_lb_mode { + fylb_cr_nl, /* only \r, \n (json + >= yaml1.2 */ + fylb_cr_nl_N_L_P, /* NEL/LS/PS (yaml1.1) */ +}; + +enum fy_flow_ws_mode { + fyfws_space_tab, /* space + TAB (yaml) */ + fyfws_space, /* only space (json) */ +}; + +static inline bool fy_is_first_alpha(int c) +{ + return (c >= 'a' && c <= 'z') || + (c >= 'A' && c <= 'Z') || + c == '_'; +} + +static inline bool fy_is_alpha(int c) +{ + return fy_is_first_alpha(c) || c == '-'; +} + +static inline bool fy_is_num(int c) +{ + return c >= '0' && c <= '9'; +} + +static inline bool fy_is_first_alnum(int c) +{ + return fy_is_first_alpha(c); +} + +static inline bool fy_is_alnum(int c) +{ + return fy_is_alpha(c) || fy_is_num(c); +} + +static inline bool fy_is_space(int c) +{ + return c == ' '; +} + +static inline bool fy_is_tab(int c) +{ + return c == '\t'; +} + +static inline bool fy_is_ws(int c) +{ + return fy_is_space(c) || fy_is_tab(c); +} + +static inline bool fy_is_hex(int c) +{ + return (c >= '0' && c <= '9') || + (c >= 'a' && c <= 'f') || + (c >= 'A' && c <= 'F'); +} + +static inline bool fy_is_uri(int c) +{ + return fy_is_alnum(c) || fy_utf8_strchr(";/?:@&=+$,.!~*\'()[]%", c); +} + +static inline bool fy_is_lb_r_n(int c) +{ + return c == '\r' || c == '\n'; +} + +static inline bool fy_is_lb_NEL(int c) +{ + return c == 0x85; +} + +static inline bool fy_is_lb_LS_PS(int c) +{ + return c == 0x2028 || c == 0x2029; +} + +static inline bool fy_is_unicode_lb(int c) +{ + /* note that YAML1.1 supports NEL #x85, LS #x2028 and PS #x2029 as linebreaks */ + /* YAML1.2 and higher does not */ + return fy_is_lb_NEL(c) || fy_is_lb_LS_PS(c); +} + +static inline bool fy_is_any_lb(int c) +{ + return fy_is_lb_r_n(c) || fy_is_unicode_lb(c); +} + +static inline bool fy_is_z(int c) +{ + return c <= 0; +} + +static inline bool fy_is_blank(int c) +{ + return c == ' ' || c == '\t'; +} + +#define FY_UTF8_BOM 0xfeff + +static inline bool fy_is_print(int c) +{ + return c == '\n' || c == '\r' || + (c >= 0x0020 && c <= 0x007e) || + (c >= 0x00a0 && c <= 0xd7ff) || + (c >= 0xe000 && c <= 0xfffd && c != FY_UTF8_BOM); +} + +static inline bool fy_is_printq(int c) +{ + return c != '\t' && c != 0xa0 && !fy_is_any_lb(c) && fy_is_print(c); +} + +static inline bool fy_is_nb_char(int c) +{ + return (c >= 0x0020 && c <= 0x007e) || + (c >= 0x00a0 && c <= 0xd7ff) || + (c >= 0xe000 && c <= 0xfffd && c != FY_UTF8_BOM); +} + +static inline bool fy_is_ns_char(int c) +{ + return fy_is_nb_char(c) && !fy_is_ws(c); +} + +static inline bool fy_is_indicator(int c) +{ + return !!fy_utf8_strchr("-?:,[]{}#&*!|>'\"%%@`", c); +} + +static inline bool fy_is_flow_indicator(int c) +{ + return !!fy_utf8_strchr(",[]{}", c); +} + +static inline bool fy_is_path_flow_scalar_start(int c) +{ + return c == '\'' || c == '"'; +} + +static inline bool fy_is_path_flow_key_start(int c) +{ + return c == '"' || c == '\'' || c == '{' || c == '['; +} + +static inline bool fy_is_path_flow_key_end(int c) +{ + return c == '"' || c == '\'' || c == '}' || c == ']'; +} + +static inline bool fy_is_unicode_control(int c) +{ + return (c >= 0 && c <= 0x1f) || (c >= 0x80 && c <= 0x9f); +} + +static inline bool fy_is_unicode_space(int c) +{ + return c == 0x20 || c == 0xa0 || + (c >= 0x2000 && c <= 0x200a) || + c == 0x202f || c == 0x205f || c == 0x3000; +} + +static inline bool fy_is_json_unescaped(int c) +{ + return c >= 0x20 && c <= 0x110000 && c != '"' && c != '\\'; +} + +static inline bool fy_is_json_unescaped_range_only(int c) +{ + return c >= 0x20 && c <= 0x110000; +} + +static inline bool fy_is_lb_m(int c, enum fy_lb_mode lb_mode) +{ + if (fy_is_lb_r_n(c)) + return true; + return lb_mode == fylb_cr_nl_N_L_P && fy_is_unicode_lb(c); +} + +static inline bool fy_is_generic_lb_m(int c, enum fy_lb_mode lb_mode) +{ + if (fy_is_lb_r_n(c)) + return true; + return lb_mode == fylb_cr_nl_N_L_P && fy_is_lb_NEL(c); +} + +static inline bool fy_is_lbz_m(int c, enum fy_lb_mode lb_mode) +{ + return fy_is_lb_m(c, lb_mode) || fy_is_z(c); +} + +static inline bool fy_is_generic_lbz_m(int c, enum fy_lb_mode lb_mode) +{ + return fy_is_generic_lb_m(c, lb_mode) || fy_is_z(c); +} + +static inline bool fy_is_blankz_m(int c, enum fy_lb_mode lb_mode) +{ + return fy_is_ws(c) || fy_is_lbz_m(c, lb_mode); +} + +static inline bool fy_is_generic_blankz_m(int c, enum fy_lb_mode lb_mode) +{ + return fy_is_ws(c) || fy_is_generic_lbz_m(c, lb_mode); +} + +static inline bool fy_is_flow_ws_m(int c, enum fy_flow_ws_mode fws_mode) +{ + return fy_is_space(c) || (fws_mode == fyfws_space_tab && fy_is_tab(c)); +} + +#define FY_CTYPE_AT_BUILDER(_kind) \ +static inline const void * \ +fy_find_ ## _kind (const void *s, size_t len) \ +{ \ + const char *cs = (char *)s; \ + const char *e = cs + len; \ + int c, w; \ + for (; cs < e && (c = fy_utf8_get(cs, e - cs, &w)) >= 0; cs += w) { \ + assert(w); \ + if (fy_is_ ## _kind (c)) \ + return cs; \ + } \ + return NULL; \ +} \ +static inline const void * \ +fy_find_non_ ## _kind (const void *s, size_t len) \ +{ \ + const char *cs = (char *)s; \ + const char *e = cs + len; \ + int c, w; \ + for (; cs < e && (c = fy_utf8_get(cs, e - cs, &w)) >= 0; cs += w) { \ + assert(w); \ + if (!(fy_is_ ## _kind (c))) \ + return cs; \ + assert(w); \ + } \ + return NULL; \ +} \ +struct useless_struct_for_semicolon + +FY_CTYPE_AT_BUILDER(first_alpha); +FY_CTYPE_AT_BUILDER(alpha); +FY_CTYPE_AT_BUILDER(num); +FY_CTYPE_AT_BUILDER(first_alnum); +FY_CTYPE_AT_BUILDER(alnum); +FY_CTYPE_AT_BUILDER(space); +FY_CTYPE_AT_BUILDER(tab); +FY_CTYPE_AT_BUILDER(ws); +FY_CTYPE_AT_BUILDER(hex); +FY_CTYPE_AT_BUILDER(uri); +FY_CTYPE_AT_BUILDER(z); +FY_CTYPE_AT_BUILDER(any_lb); +FY_CTYPE_AT_BUILDER(blank); +FY_CTYPE_AT_BUILDER(print); +FY_CTYPE_AT_BUILDER(printq); +FY_CTYPE_AT_BUILDER(nb_char); +FY_CTYPE_AT_BUILDER(ns_char); +FY_CTYPE_AT_BUILDER(indicator); +FY_CTYPE_AT_BUILDER(flow_indicator); +FY_CTYPE_AT_BUILDER(path_flow_key_start); +FY_CTYPE_AT_BUILDER(path_flow_key_end); +FY_CTYPE_AT_BUILDER(unicode_control); +FY_CTYPE_AT_BUILDER(unicode_space); +FY_CTYPE_AT_BUILDER(json_unescaped); + +/* + * Very special linebreak/ws methods + * Things get interesting due to \r\n and + * unicode linebreaks/spaces + */ + +/* skip for a _single_ linebreak */ +static inline const void *fy_skip_lb(const void *ptr, int left) +{ + int c, width; + + /* get the utf8 character at this point */ + c = fy_utf8_get(ptr, left, &width); + if (c < 0 || !fy_is_any_lb(c)) + return NULL; + + /* MS-DOS: check if next character is '\n' */ + if (c == '\r' && left > width && *(char *)ptr == '\n') + width++; + + return (char *)ptr + width; +} + +/* given a pointer to a chunk of memory, return pointer to first + * ws character after the last non-ws character, or the end + * of the chunk + */ +static inline const void *fy_last_non_ws(const void *ptr, int left) +{ + const char *s, *e; + int c; + + s = ptr; + e = s + left; + while (e > s) { + c = e[-1]; + if (c != ' ' && c != '\t') + return e; + e--; + + } + return NULL; +} + +const char *fy_uri_esc(const char *s, size_t len, uint8_t *code, int *code_len); + +#endif diff --git a/contrib/libs/libfyaml/src/lib/fy-diag.c b/contrib/libs/libfyaml/src/lib/fy-diag.c new file mode 100644 index 0000000000..ca324fa5d0 --- /dev/null +++ b/contrib/libs/libfyaml/src/lib/fy-diag.c @@ -0,0 +1,1386 @@ +/* + * fy-diag.c - diagnostics + * + * Copyright (c) 2019 Pantelis Antoniou <pantelis.antoniou@konsulko.com> + * + * SPDX-License-Identifier: MIT + */ +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif + +#include <stdio.h> +#include <string.h> +#if defined (__unix__) || (defined (__APPLE__) && defined (__MACH__)) +#include <sys/types.h> +#include <sys/stat.h> +#include <unistd.h> +#endif +#include <assert.h> +#include <stdlib.h> +#include <errno.h> +#include <stdarg.h> +#include <ctype.h> + +#include <libfyaml.h> + +#include "fy-parse.h" + +static const char *error_type_txt[] = { + [FYET_DEBUG] = "debug", + [FYET_INFO] = "info", + [FYET_NOTICE] = "notice", + [FYET_WARNING] = "warning", + [FYET_ERROR] = "error", +}; + +int fy_diag_diag(struct fy_diag *diag, enum fy_error_type level, const char* fmt, ...) +{ + int ret; + + va_list args; + struct fy_diag_ctx ctx = { + .level = level, + .module = FYEM_UNKNOWN, + .source_func = __func__, + .source_file = __FILE__, + .source_line = __LINE__, + .file = NULL, + .line = 0, + .column = 0, + }; + + va_start(args, fmt); + ret = fy_diagf(diag, &ctx, fmt, args); + va_end(args); + + return ret; +} + +const char *fy_error_type_to_string(enum fy_error_type type) +{ + + if ((unsigned int)type >= FYET_MAX) + return ""; + return error_type_txt[type]; +} + +enum fy_error_type fy_string_to_error_type(const char *str) +{ + unsigned int i; + int level; + + if (!str) + return FYET_MAX; + + if (isdigit(*str)) { + level = atoi(str); + if (level >= 0 && level < FYET_MAX) + return (enum fy_error_type)level; + } + + for (i = 0; i < FYET_MAX; i++) { + if (!strcmp(str, error_type_txt[i])) + return (enum fy_error_type)i; + } + + return FYET_MAX; +} + +static const char *error_module_txt[] = { + [FYEM_UNKNOWN] = "unknown", + [FYEM_ATOM] = "atom", + [FYEM_SCAN] = "scan", + [FYEM_PARSE] = "parse", + [FYEM_DOC] = "doc", + [FYEM_BUILD] = "build", + [FYEM_INTERNAL] = "internal", + [FYEM_SYSTEM] = "system", +}; + +const char *fy_error_module_to_string(enum fy_error_module module) +{ + + if ((unsigned int)module >= FYEM_MAX) + return ""; + return error_module_txt[module]; +} + +enum fy_error_module fy_string_to_error_module(const char *str) +{ + unsigned int i; + + if (!str) + return FYEM_MAX; + + for (i = 0; i < FYEM_MAX; i++) { + if (!strcmp(str, error_module_txt[i])) + return (enum fy_error_module)i; + } + + return FYEM_MAX; +} + +static const char *fy_error_level_str(enum fy_error_type level) +{ + static const char *txt[] = { + [FYET_DEBUG] = "DBG", + [FYET_INFO] = "INF", + [FYET_NOTICE] = "NOT", + [FYET_WARNING] = "WRN", + [FYET_ERROR] = "ERR", + }; + + if ((unsigned int)level >= FYET_MAX) + return "*unknown*"; + return txt[level]; +} + +static const char *fy_error_module_str(enum fy_error_module module) +{ + static const char *txt[] = { + [FYEM_UNKNOWN] = "UNKWN", + [FYEM_ATOM] = "ATOM ", + [FYEM_SCAN] = "SCAN ", + [FYEM_PARSE] = "PARSE", + [FYEM_DOC] = "DOC ", + [FYEM_BUILD] = "BUILD", + [FYEM_INTERNAL] = "INTRL", + [FYEM_SYSTEM] = "SYSTM", + }; + + if ((unsigned int)module >= FYEM_MAX) + return "*unknown*"; + return txt[module]; +} + +/* really concervative options */ +static const struct fy_diag_term_info default_diag_term_info_template = { + .rows = 25, + .columns = 80 +}; + +static const struct fy_diag_cfg default_diag_cfg_template = { + .fp = NULL, /* must be overriden */ + .level = FYET_INFO, + .module_mask = (1U << FYEM_MAX) - 1, /* all modules */ + .show_source = false, + .show_position = false, + .show_type = true, + .show_module = false, + .colorize = false, /* can be overriden */ + .source_width = 50, + .position_width = 10, + .type_width = 5, + .module_width = 6, +}; + +void fy_diag_cfg_default(struct fy_diag_cfg *cfg) +{ + if (!cfg) + return; + + *cfg = default_diag_cfg_template; + cfg->fp = stderr; + cfg->colorize = isatty(fileno(stderr)) == 1; +} + +void fy_diag_cfg_from_parser_flags(struct fy_diag_cfg *cfg, enum fy_parse_cfg_flags pflags) +{ + /* nothing */ +} + +static bool fy_diag_isatty(struct fy_diag *diag) +{ + return diag && diag->cfg.fp && isatty(fileno(diag->cfg.fp)); +} + +static void fy_diag_update_term_info(struct fy_diag *diag) +{ + int fd, rows, columns, ret; + + /* start by setting things to the default */ + diag->term_info = default_diag_term_info_template; + + fd = diag->cfg.fp && isatty(fileno(diag->cfg.fp)) ? + fileno(diag->cfg.fp) : -1; + + if (fd == -1) + goto out; + + rows = columns = 0; +#if defined (__unix__) || (defined (__APPLE__) && defined (__MACH__)) + ret = fy_term_query_size(fd, &rows, &columns); + if (ret != 0) + goto out; + + if (rows > 0 && columns > 0) { + diag->term_info.rows = rows; + diag->term_info.columns = columns; + } +#endif +out: + diag->terminal_probed = true; +} + +void fy_diag_errorp_free(struct fy_diag_errorp *errp) +{ + if (errp->space) + free(errp->space); + fy_token_unref(errp->e.fyt); + free(errp); +} + +struct fy_diag *fy_diag_create(const struct fy_diag_cfg *cfg) +{ + struct fy_diag *diag; + + diag = malloc(sizeof(*diag)); + if (!diag) + return NULL; + memset(diag, 0, sizeof(*diag)); + + if (!cfg) + fy_diag_cfg_default(&diag->cfg); + else + diag->cfg = *cfg; + diag->on_error = false; + diag->refs = 1; + + diag->terminal_probed = false; + if (!fy_diag_isatty(diag)) + fy_diag_update_term_info(diag); + + fy_diag_errorp_list_init(&diag->errors); + + return diag; +} + +void fy_diag_destroy(struct fy_diag *diag) +{ + struct fy_diag_errorp *errp; + + if (!diag) + return; + + diag->destroyed = true; + + /* free everything */ + while ((errp = fy_diag_errorp_list_pop(&diag->errors)) != NULL) + fy_diag_errorp_free(errp); + + return fy_diag_unref(diag); +} + +bool fy_diag_got_error(struct fy_diag *diag) +{ + return diag && diag->on_error; +} + +void fy_diag_reset_error(struct fy_diag *diag) +{ + struct fy_diag_errorp *errp; + + if (!diag) + return; + + diag->on_error = false; + + while ((errp = fy_diag_errorp_list_pop(&diag->errors)) != NULL) + fy_diag_errorp_free(errp); +} + +void fy_diag_set_collect_errors(struct fy_diag *diag, bool collect_errors) +{ + struct fy_diag_errorp *errp; + + if (!diag || diag->destroyed) + return; + + diag->collect_errors = collect_errors; + + /* clear collected errors on disable */ + if (!diag->collect_errors) { + while ((errp = fy_diag_errorp_list_pop(&diag->errors)) != NULL) + fy_diag_errorp_free(errp); + } +} + +struct fy_diag_error *fy_diag_errors_iterate(struct fy_diag *diag, void **prevp) +{ + struct fy_diag_errorp *errp; + + if (!diag || !prevp) + return NULL; + + if (!*prevp) + errp = fy_diag_errorp_list_head(&diag->errors); + else { + errp = *prevp; + errp = fy_diag_errorp_next(&diag->errors, errp); + } + + if (!errp) + return NULL; + *prevp = errp; + return &errp->e; +} + +void fy_diag_free(struct fy_diag *diag) +{ + if (!diag) + return; + free(diag); +} + +const struct fy_diag_cfg *fy_diag_get_cfg(struct fy_diag *diag) +{ + if (!diag) + return NULL; + return &diag->cfg; +} + +void fy_diag_set_cfg(struct fy_diag *diag, const struct fy_diag_cfg *cfg) +{ + if (!diag) + return; + + if (!cfg) + fy_diag_cfg_default(&diag->cfg); + else + diag->cfg = *cfg; + + fy_diag_update_term_info(diag); +} + +void fy_diag_set_level(struct fy_diag *diag, enum fy_error_type level) +{ + if (!diag || (unsigned int)level >= FYET_MAX) + return; + diag->cfg.level = level; +} + +void fy_diag_set_colorize(struct fy_diag *diag, bool colorize) +{ + if (!diag) + return; + diag->cfg.colorize = colorize; +} + +struct fy_diag *fy_diag_ref(struct fy_diag *diag) +{ + if (!diag) + return NULL; + + assert(diag->refs + 1 > 0); + diag->refs++; + + return diag; +} + +void fy_diag_unref(struct fy_diag *diag) +{ + if (!diag) + return; + + assert(diag->refs > 0); + + if (diag->refs == 1) + fy_diag_free(diag); + else + diag->refs--; +} + +ssize_t fy_diag_write(struct fy_diag *diag, const void *buf, size_t count) +{ + size_t ret; + + if (!diag || !buf) + return -1; + + /* no more output */ + if (diag->destroyed) + return 0; + + ret = 0; + if (diag->cfg.fp) { + ret = fwrite(buf, 1, count, diag->cfg.fp); + } else if (diag->cfg.output_fn) { + diag->cfg.output_fn(diag, diag->cfg.user, buf, count); + ret = count; + } + + return ret == count ? (ssize_t)count : -1; +} + +int fy_diag_vprintf(struct fy_diag *diag, const char *fmt, va_list ap) +{ + char *buf; + int rc; + + if (!diag || !fmt) + return -1; + + /* no more output */ + if (diag->destroyed) + return 0; + + if (diag->cfg.fp) + return vfprintf(diag->cfg.fp, fmt, ap); + + if (diag->cfg.output_fn) { + rc = vasprintf(&buf, fmt, ap); + if (rc < 0) + return rc; + diag->cfg.output_fn(diag, diag->cfg.user, buf, (size_t)rc); + free(buf); + return rc; + } + + return -1; +} + +int fy_diag_printf(struct fy_diag *diag, const char *fmt, ...) +{ + va_list ap; + int rc; + + va_start(ap, fmt); + rc = fy_diag_vprintf(diag, fmt, ap); + va_end(ap); + + return rc; +} + +int fy_vdiag(struct fy_diag *diag, const struct fy_diag_ctx *fydc, + const char *fmt, va_list ap) +{ + char *msg = NULL; + char *source = NULL, *position = NULL, *typestr = NULL, *modulestr = NULL; + const char *file_stripped = NULL; + const char *color_start = NULL, *color_end = NULL; + enum fy_error_type level; + int rc; + + if (!diag || !fydc || !fmt) + return -1; + + level = fydc->level; + + /* turn errors into debugs while not reset */ + if (level >= FYET_ERROR && diag->on_error) + level = FYET_DEBUG; + + if (level < diag->cfg.level) { + rc = 0; + goto out; + } + + /* check module enable mask */ + if (!(diag->cfg.module_mask & FY_BIT(fydc->module))) { + rc = 0; + goto out; + } + + alloca_vsprintf(&msg, fmt, ap); + + /* source part */ + if (diag->cfg.show_source) { + if (fydc->source_file) { + file_stripped = strrchr(fydc->source_file, '/'); + if (!file_stripped) + file_stripped = fydc->source_file; + else + file_stripped++; + } else + file_stripped = ""; + alloca_sprintf(&source, "%s:%d @%s()%s", + file_stripped, fydc->source_line, fydc->source_func, " "); + } + + /* position part */ + if (diag->cfg.show_position && fydc->line >= 0 && fydc->column >= 0) + alloca_sprintf(&position, "<%3d:%2d>%s", fydc->line, fydc->column, ": "); + + /* type part */ + if (diag->cfg.show_type) + alloca_sprintf(&typestr, "[%s]%s", fy_error_level_str(level), ": "); + + /* module part */ + if (diag->cfg.show_module) + alloca_sprintf(&modulestr, "<%s>%s", fy_error_module_str(fydc->module), ": "); + + if (diag->cfg.colorize) { + switch (level) { + case FYET_DEBUG: + color_start = "\x1b[37m"; /* normal white */ + break; + case FYET_INFO: + color_start = "\x1b[37;1m"; /* bright white */ + break; + case FYET_NOTICE: + color_start = "\x1b[34;1m"; /* bright blue */ + break; + case FYET_WARNING: + color_start = "\x1b[33;1m"; /* bright yellow */ + break; + case FYET_ERROR: + color_start = "\x1b[31;1m"; /* bright red */ + break; + default: /* handles FYET_MAX */ + break; + } + if (color_start) + color_end = "\x1b[0m"; + } + + rc = fy_diag_printf(diag, "%s" "%*s" "%*s" "%*s" "%*s" "%s" "%s\n", + color_start ? color_start : "", + source ? diag->cfg.source_width : 0, source ? source : "", + position ? diag->cfg.position_width : 0, position ? position : "", + typestr ? diag->cfg.type_width : 0, typestr ? typestr : "", + modulestr ? diag->cfg.module_width : 0, modulestr ? modulestr : "", + msg, + color_end ? color_end : ""); + + if (rc > 0) + rc++; + +out: + /* if it's the first error we're generating set the + * on_error flag until the top caller clears it + */ + if (!diag->on_error && fydc->level >= FYET_ERROR) + diag->on_error = true; + + return rc; +} + +int fy_diagf(struct fy_diag *diag, const struct fy_diag_ctx *fydc, + const char *fmt, ...) +{ + va_list ap; + int rc; + + va_start(ap, fmt); + rc = fy_vdiag(diag, fydc, fmt, ap); + va_end(ap); + + return rc; +} + +static void fy_diag_get_error_colors(struct fy_diag *diag, enum fy_error_type type, + const char **start, const char **end, const char **white) +{ + if (!diag->cfg.colorize) { + *start = *end = *white = ""; + return; + } + + switch (type) { + case FYET_DEBUG: + *start = "\x1b[37m"; /* normal white */ + break; + case FYET_INFO: + *start = "\x1b[37;1m"; /* bright white */ + break; + case FYET_NOTICE: + *start = "\x1b[34;1m"; /* bright blue */ + break; + case FYET_WARNING: + *start = "\x1b[33;1m"; /* bright yellow */ + break; + case FYET_ERROR: + *start = "\x1b[31;1m"; /* bright red */ + break; + default: + *start = "\x1b[0m"; /* catch-all reset */ + break; + } + *end = "\x1b[0m"; + *white = "\x1b[37;1m"; +} + +void fy_diag_error_atom_display(struct fy_diag *diag, enum fy_error_type type, struct fy_atom *atom) +{ + const struct fy_raw_line *l, *ln; + struct fy_raw_line l_tmp; + struct fy_atom_raw_line_iter iter; + int content_start_col, content_end_col, content_width; + int pass, cols, min_col, max_col, total_lines, max_line_count, max_line_col8, max_width; + int start_col, end_col; + const char *color_start, *color_end, *white; + bool first_line, last_line; + const char *display; + int display_len, line_shift; + char qc, first_mark; + char *rowbuf = NULL, *rbs = NULL, *rbe = NULL; + const char *s, *e; + int col8, c, w; + int tab8_len, tilde_start, tilde_width, tilde_width_m1; + size_t rowbufsz; + + (void)end_col; + + if (!diag || !atom) + return; + + fy_diag_get_error_colors(diag, type, &color_start, &color_end, &white); + + /* two passes, first one collects extents */ + + start_col = -1; + end_col = -1; + min_col = -1; + max_col = -1; + max_line_count = -1; + max_line_col8 = -1; + total_lines = 0; + line_shift = -1; + for (pass = 0; pass < 2; pass++) { + + /* on the start of the second pass */ + if (pass > 0) { + + cols = 0; + + /* if it's probed, use what's there */ + if (diag->terminal_probed && diag->term_info.columns > 0) + cols = diag->term_info.columns; + + /* heuristic, avoid probing terminal size if maximum column is less than 80 + * columns. This is faster and avoid problems with terminals... + */ + if (!cols && max_line_col8 < 80) + cols = 80; + + /* no choice but to probe */ + if (!cols) { + /* only need the terminal width when outputting an error */ + if (!diag->terminal_probed && fy_diag_isatty(diag)) + fy_diag_update_term_info(diag); + + cols = diag->term_info.columns; + } + + /* worse case utf8 + 2 color sequences + zero terminated */ + rowbufsz = cols * 4 + 2 * 16 + 1; + rowbuf = FY_ALLOCA(rowbufsz); + rbe = rowbuf + rowbufsz; + + /* if the maximum column number is less than the terminal + * width everything fits, and we're fine */ + if (max_line_col8 < cols) { + line_shift = 0; + } else { + max_width = max_col - min_col; + + /* try to center */ + line_shift = min_col + (max_width - cols) / 2; + + /* the start of the content must always be included */ + if (start_col < line_shift) + line_shift = start_col; + } + } + + fy_atom_raw_line_iter_start(atom, &iter); + l = fy_atom_raw_line_iter_next(&iter); + for (; l != NULL; l = ln) { + + /* save it */ + l_tmp = *l; + l = &l_tmp; + + /* get the next too */ + ln = fy_atom_raw_line_iter_next(&iter); + + first_line = l->lineno <= 1; + last_line = ln == NULL; + + content_start_col = l->content_start_col8; + content_end_col = l->content_end_col8; + + /* adjust for single and double quoted to include the quote marks (usually works) */ + if (fy_atom_style_is_quoted(atom->style)) { + qc = atom->style == FYAS_SINGLE_QUOTED ? '\'' : '"'; + if (first_line && l->content_start > l->line_start && + l->content_start[-1] == qc) + content_start_col--; + if (last_line && (l->content_start + l->content_len) < (l->line_start + l->line_len) && + l->content_start[l->content_len] == qc) + content_end_col++; + } + + content_width = content_end_col - content_start_col; + + if (pass == 0) { + + total_lines++; + + if (min_col < 0 || content_start_col < min_col) + min_col = content_start_col; + if (max_col < 0 || content_end_col > max_col) + max_col = content_end_col; + if (max_line_count < 0 || (int)l->line_count > max_line_count) + max_line_count = (int)l->line_count; + if (first_line) + start_col = content_start_col; + if (last_line) + end_col = content_end_col; + + /* optimize by using the content end as a starting point */ + s = l->content_start + l->content_len; + e = l->line_start + l->line_count; + col8 = l->content_end_col8; + while ((c = fy_utf8_get(s, (e - s), &w)) >= 0) { + s += w; + if (fy_is_tab(c)) + col8 += 8 - (col8 % 8); + else + col8++; + } + /* update the max column number of the lines */ + if (max_line_col8 < 0 || col8 > max_line_col8) + max_line_col8 = col8; + + continue; + } + + /* output pass */ + + /* the defaults if everything fits */ + first_mark = first_line ? '^' : '~'; + + tab8_len = 0; + + /* find the starting point */ + s = l->line_start; + e = s + l->line_len; + col8 = 0; + while (col8 < line_shift && (c = fy_utf8_get(s, (e - s), &w)) >= 0) { + if (fy_is_tab(c)) + col8 += 8 - (col8 % 8); + else + col8++; + s += w; + } + if (col8 > line_shift) + tab8_len = col8 - line_shift; /* the remaining of the tab */ + else + tab8_len = 0; + + /* start filling the row buffer */ + assert(rowbuf); + rbs = rowbuf; + rbe = rowbuf + rowbufsz; + + /* remaining tabs */ + while (tab8_len > 0) { + *rbs++ = ' '; + tab8_len--; + } + + /* go forward until end of line or cols */ + while (col8 < (line_shift + cols) && (c = fy_utf8_get(s, (e - s), &w)) >= 0 && rbs < rbe) { + if (fy_is_tab(c)) { + s++; + tab8_len = 8 - (col8 % 8); + col8 += tab8_len; + while (tab8_len > 0 && rbs < rbe) { + *rbs++ = ' '; + tab8_len--; + } + } else { + while (w > 0 && rbs < rbe) { + *rbs++ = *s++; + w--; + } + col8++; + } + } + display = rowbuf; + display_len = rbs - rowbuf; + + tilde_start = content_start_col - line_shift; + tilde_width = content_width; + if (tilde_start + tilde_width > cols) + tilde_width = cols - tilde_start; + if ((size_t)tilde_width >= rowbufsz) + tilde_width = rowbufsz - 1; /* guard */ + tilde_width_m1 = tilde_width > 0 ? (tilde_width - 1) : 0; + + /* output the line */ + fy_diag_write(diag, display, display_len); + + /* set the tildes */ + assert((int)rowbufsz > tilde_width_m1 + 1); + memset(rowbuf, '~', tilde_width_m1); + rowbuf[tilde_width_m1] = '\0'; + + fy_diag_printf(diag, "\n%*s%s%c%.*s%s\n", + tilde_start, "", + color_start, first_mark, tilde_width_m1, rowbuf, color_end); + + } + fy_atom_raw_line_iter_finish(&iter); + } +} + +void fy_diag_error_token_display(struct fy_diag *diag, enum fy_error_type type, struct fy_token *fyt) +{ + if (!diag || !fyt) + return; + + fy_diag_error_atom_display(diag, type, fy_token_atom(fyt)); +} + +void fy_diag_vreport(struct fy_diag *diag, + const struct fy_diag_report_ctx *fydrc, + const char *fmt, va_list ap) +{ + const char *name, *color_start = NULL, *color_end = NULL, *white = NULL; + char *msg_str = NULL, *name_str = NULL; + const struct fy_mark *start_mark; + int line, column; + struct fy_diag_errorp *errp; + struct fy_diag_error *err; + size_t spacesz, msgsz, filesz; + char *s; + + if (!diag || !fydrc || !fmt || !fydrc->fyt) + return; + + start_mark = fy_token_start_mark(fydrc->fyt); + + if (fydrc->has_override) { + name = fydrc->override_file; + line = fydrc->override_line; + column = fydrc->override_column; + } else { + name = fy_input_get_filename(fy_token_get_input(fydrc->fyt)); + line = start_mark->line + 1; + column = start_mark->column + 1; + } + + /* it will strip trailing newlines */ + alloca_vsprintf(&msg_str, fmt, ap); + + /* get the colors */ + fy_diag_get_error_colors(diag, fydrc->type, &color_start, &color_end, &white); + + if (name || (line > 0 && column > 0)) { + if (line > 0 && column > 0) + alloca_sprintf(&name_str, "%s%s:%d:%d: ", white, name, line, column); + else + alloca_sprintf(&name_str, "%s%s: ", white, name); + } + + if (!diag->collect_errors) { + fy_diag_printf(diag, "%s" "%s%s: %s" "%s\n", + name_str ? name_str : "", + color_start, fy_error_type_to_string(fydrc->type), color_end, + msg_str); + + fy_diag_error_token_display(diag, fydrc->type, fydrc->fyt); + + fy_token_unref(fydrc->fyt); + + } else if ((errp = malloc(sizeof(*errp))) != NULL) { + + msgsz = strlen(msg_str) + 1; + filesz = strlen(name) + 1; + spacesz = msgsz + filesz; + + errp->space = malloc(spacesz); + if (!errp->space) { + free(errp); + goto out; + } + s = errp->space; + + err = &errp->e; + memset(err, 0, sizeof(*err)); + err->type = fydrc->type; + err->module = fydrc->module; + err->fyt = fydrc->fyt; + err->msg = s; + memcpy(s, msg_str, msgsz); + s += msgsz; + err->file = s; + memcpy(s, name, filesz); + s += filesz; + err->line = line; + err->column = column; + + fy_diag_errorp_list_add_tail(&diag->errors, errp); + } +out: + if (!diag->on_error && fydrc->type == FYET_ERROR) + diag->on_error = true; +} + +void fy_diag_report(struct fy_diag *diag, + const struct fy_diag_report_ctx *fydrc, + const char *fmt, ...) +{ + va_list ap; + + va_start(ap, fmt); + fy_diag_vreport(diag, fydrc, fmt, ap); + va_end(ap); +} + +/* parser */ + +int fy_parser_vdiag(struct fy_parser *fyp, unsigned int flags, + const char *file, int line, const char *func, + const char *fmt, va_list ap) +{ + struct fy_diag_ctx fydc; + int rc; + + if (!fyp || !fyp->diag || !fmt) + return -1; + + /* perform the enable tests early to avoid the overhead */ + if (((flags & FYDF_LEVEL_MASK) >> FYDF_LEVEL_SHIFT) < fyp->diag->cfg.level) + return 0; + + /* fill in fy_diag_ctx */ + memset(&fydc, 0, sizeof(fydc)); + + fydc.level = (flags & FYDF_LEVEL_MASK) >> FYDF_LEVEL_SHIFT; + fydc.module = (flags & FYDF_MODULE_MASK) >> FYDF_MODULE_SHIFT; + fydc.source_file = file; + fydc.source_line = line; + fydc.source_func = func; + fydc.line = fyp_line(fyp); + fydc.column = fyp_column(fyp); + + rc = fy_vdiag(fyp->diag, &fydc, fmt, ap); + + if (fyp && !fyp->stream_error && fyp->diag->on_error) + fyp->stream_error = true; + + return rc; +} + +int fy_parser_diag(struct fy_parser *fyp, unsigned int flags, + const char *file, int line, const char *func, + const char *fmt, ...) +{ + va_list ap; + int rc; + + va_start(ap, fmt); + rc = fy_parser_vdiag(fyp, flags, file, line, func, fmt, ap); + va_end(ap); + + return rc; +} + +void fy_parser_diag_vreport(struct fy_parser *fyp, + const struct fy_diag_report_ctx *fydrc, + const char *fmt, va_list ap) +{ + struct fy_diag *diag; + + if (!fyp || !fyp->diag || !fydrc || !fmt) + return; + + diag = fyp->diag; + + fy_diag_vreport(diag, fydrc, fmt, ap); + + if (fyp && !fyp->stream_error && diag->on_error) + fyp->stream_error = true; +} + +void fy_parser_diag_report(struct fy_parser *fyp, + const struct fy_diag_report_ctx *fydrc, + const char *fmt, ...) +{ + va_list ap; + + va_start(ap, fmt); + fy_parser_diag_vreport(fyp, fydrc, fmt, ap); + va_end(ap); +} + +/* document */ + +int fy_document_vdiag(struct fy_document *fyd, unsigned int flags, + const char *file, int line, const char *func, + const char *fmt, va_list ap) +{ + struct fy_diag_ctx fydc; + int rc; + + if (!fyd || !fmt || !fyd->diag) + return -1; + + /* perform the enable tests early to avoid the overhead */ + if (((flags & FYDF_LEVEL_MASK) >> FYDF_LEVEL_SHIFT) < fyd->diag->cfg.level) + return 0; + + /* fill in fy_diag_ctx */ + memset(&fydc, 0, sizeof(fydc)); + + fydc.level = (flags & FYDF_LEVEL_MASK) >> FYDF_LEVEL_SHIFT; + fydc.module = (flags & FYDF_MODULE_MASK) >> FYDF_MODULE_SHIFT; + fydc.source_file = file; + fydc.source_line = line; + fydc.source_func = func; + fydc.line = -1; + fydc.column = -1; + + rc = fy_vdiag(fyd->diag, &fydc, fmt, ap); + + return rc; +} + +int fy_document_diag(struct fy_document *fyd, unsigned int flags, + const char *file, int line, const char *func, + const char *fmt, ...) +{ + va_list ap; + int rc; + + va_start(ap, fmt); + rc = fy_document_vdiag(fyd, flags, file, line, func, fmt, ap); + va_end(ap); + + return rc; +} + +void fy_document_diag_vreport(struct fy_document *fyd, + const struct fy_diag_report_ctx *fydrc, + const char *fmt, va_list ap) +{ + if (!fyd || !fyd->diag || !fydrc || !fmt) + return; + + fy_diag_vreport(fyd->diag, fydrc, fmt, ap); +} + +void fy_document_diag_report(struct fy_document *fyd, + const struct fy_diag_report_ctx *fydrc, + const char *fmt, ...) +{ + va_list ap; + + va_start(ap, fmt); + fy_document_diag_vreport(fyd, fydrc, fmt, ap); + va_end(ap); +} + +/* composer */ +int fy_composer_vdiag(struct fy_composer *fyc, unsigned int flags, + const char *file, int line, const char *func, + const char *fmt, va_list ap) +{ + struct fy_diag_ctx fydc; + int rc; + + if (!fyc || !fmt || !fyc->cfg.diag) + return -1; + + /* perform the enable tests early to avoid the overhead */ + if (((flags & FYDF_LEVEL_MASK) >> FYDF_LEVEL_SHIFT) < fyc->cfg.diag->cfg.level) + return 0; + + /* fill in fy_diag_ctx */ + memset(&fydc, 0, sizeof(fydc)); + + fydc.level = (flags & FYDF_LEVEL_MASK) >> FYDF_LEVEL_SHIFT; + fydc.module = (flags & FYDF_MODULE_MASK) >> FYDF_MODULE_SHIFT; + fydc.source_file = file; + fydc.source_line = line; + fydc.source_func = func; + fydc.line = -1; + fydc.column = -1; + + rc = fy_vdiag(fyc->cfg.diag, &fydc, fmt, ap); + + return rc; +} + +int fy_composer_diag(struct fy_composer *fyc, unsigned int flags, + const char *file, int line, const char *func, + const char *fmt, ...) +{ + va_list ap; + int rc; + + va_start(ap, fmt); + rc = fy_composer_vdiag(fyc, flags, file, line, func, fmt, ap); + va_end(ap); + + return rc; +} + +void fy_composer_diag_vreport(struct fy_composer *fyc, + const struct fy_diag_report_ctx *fydrc, + const char *fmt, va_list ap) +{ + if (!fyc || !fyc->cfg.diag || !fydrc || !fmt) + return; + + fy_diag_vreport(fyc->cfg.diag, fydrc, fmt, ap); +} + +void fy_composer_diag_report(struct fy_composer *fyc, + const struct fy_diag_report_ctx *fydrc, + const char *fmt, ...) +{ + va_list ap; + + va_start(ap, fmt); + fy_composer_diag_vreport(fyc, fydrc, fmt, ap); + va_end(ap); +} + +/* document_builder */ +int fy_document_builder_vdiag(struct fy_document_builder *fydb, unsigned int flags, + const char *file, int line, const char *func, + const char *fmt, va_list ap) +{ + struct fy_diag_ctx fydc; + int rc; + + if (!fydb || !fmt || !fydb->cfg.diag) + return -1; + + /* perform the enable tests early to avoid the overhead */ + if (((flags & FYDF_LEVEL_MASK) >> FYDF_LEVEL_SHIFT) < fydb->cfg.diag->cfg.level) + return 0; + + /* fill in fy_diag_ctx */ + memset(&fydc, 0, sizeof(fydc)); + + fydc.level = (flags & FYDF_LEVEL_MASK) >> FYDF_LEVEL_SHIFT; + fydc.module = (flags & FYDF_MODULE_MASK) >> FYDF_MODULE_SHIFT; + fydc.source_file = file; + fydc.source_line = line; + fydc.source_func = func; + fydc.line = -1; + fydc.column = -1; + + rc = fy_vdiag(fydb->cfg.diag, &fydc, fmt, ap); + + return rc; +} + +int fy_document_builder_diag(struct fy_document_builder *fydb, unsigned int flags, + const char *file, int line, const char *func, + const char *fmt, ...) +{ + va_list ap; + int rc; + + va_start(ap, fmt); + rc = fy_document_builder_vdiag(fydb, flags, file, line, func, fmt, ap); + va_end(ap); + + return rc; +} + +void fy_document_builder_diag_vreport(struct fy_document_builder *fydb, + const struct fy_diag_report_ctx *fydrc, + const char *fmt, va_list ap) +{ + if (!fydb || !fydb->cfg.diag || !fydrc || !fmt) + return; + + fy_diag_vreport(fydb->cfg.diag, fydrc, fmt, ap); +} + +void fy_document_builder_diag_report(struct fy_document_builder *fydb, + const struct fy_diag_report_ctx *fydrc, + const char *fmt, ...) +{ + va_list ap; + + va_start(ap, fmt); + fy_document_builder_diag_vreport(fydb, fydrc, fmt, ap); + va_end(ap); +} + +/* reader */ + +int fy_reader_vdiag(struct fy_reader *fyr, unsigned int flags, + const char *file, int line, const char *func, + const char *fmt, va_list ap) +{ + struct fy_diag_ctx fydc; + int fydc_level, fyd_level; + + if (!fyr || !fyr->diag || !fmt) + return -1; + + /* perform the enable tests early to avoid the overhead */ + fydc_level = (flags & FYDF_LEVEL_MASK) >> FYDF_LEVEL_SHIFT; + fyd_level = fyr->diag->cfg.level; + + if (fydc_level < fyd_level) + return 0; + + /* fill in fy_diag_ctx */ + memset(&fydc, 0, sizeof(fydc)); + + fydc.level = fydc_level; + fydc.module = FYEM_SCAN; /* reader is always scanner */ + fydc.source_file = file; + fydc.source_line = line; + fydc.source_func = func; + fydc.line = fyr->line; + fydc.column = fyr->column; + + return fy_vdiag(fyr->diag, &fydc, fmt, ap); +} + +int fy_reader_diag(struct fy_reader *fyr, unsigned int flags, + const char *file, int line, const char *func, + const char *fmt, ...) +{ + va_list ap; + int rc; + + va_start(ap, fmt); + rc = fy_reader_vdiag(fyr, flags, file, line, func, fmt, ap); + va_end(ap); + + return rc; +} + +void fy_reader_diag_vreport(struct fy_reader *fyr, + const struct fy_diag_report_ctx *fydrc, + const char *fmt, va_list ap) +{ + if (!fyr || !fyr->diag || !fydrc || !fmt) + return; + + fy_diag_vreport(fyr->diag, fydrc, fmt, ap); +} + +void fy_reader_diag_report(struct fy_reader *fyr, + const struct fy_diag_report_ctx *fydrc, + const char *fmt, ...) +{ + va_list ap; + + va_start(ap, fmt); + fy_reader_diag_vreport(fyr, fydrc, fmt, ap); + va_end(ap); +} + +void fy_diag_node_vreport(struct fy_diag *diag, struct fy_node *fyn, + enum fy_error_type type, const char *fmt, va_list ap) +{ + struct fy_diag_report_ctx drc; + bool save_on_error; + + if (!fyn || !diag) + return; + + save_on_error = diag->on_error; + diag->on_error = false; + + memset(&drc, 0, sizeof(drc)); + drc.type = type; + drc.module = FYEM_UNKNOWN; + drc.fyt = fy_node_token(fyn); + fy_diag_vreport(diag, &drc, fmt, ap); + + diag->on_error = save_on_error; +} + +void fy_diag_node_report(struct fy_diag *diag, struct fy_node *fyn, + enum fy_error_type type, const char *fmt, ...) +{ + va_list ap; + + va_start(ap, fmt); + fy_diag_node_vreport(diag, fyn, type, fmt, ap); + va_end(ap); +} + +void fy_diag_node_override_vreport(struct fy_diag *diag, struct fy_node *fyn, + enum fy_error_type type, const char *file, + int line, int column, + const char *fmt, va_list ap) +{ + struct fy_diag_report_ctx drc; + bool save_on_error; + + if (!fyn || !diag) + return; + + save_on_error = diag->on_error; + diag->on_error = false; + + memset(&drc, 0, sizeof(drc)); + drc.type = type; + drc.module = FYEM_UNKNOWN; + drc.fyt = fy_node_token(fyn); + drc.has_override = true; + drc.override_file = file; + drc.override_line = line; + drc.override_column = column; + fy_diag_vreport(diag, &drc, fmt, ap); + + diag->on_error = save_on_error; +} + +void fy_diag_node_override_report(struct fy_diag *diag, struct fy_node *fyn, + enum fy_error_type type, const char *file, + int line, int column, const char *fmt, ...) +{ + va_list ap; + + va_start(ap, fmt); + fy_diag_node_override_vreport(diag, fyn, type, file, line, column, fmt, ap); + va_end(ap); +} + +void fy_node_vreport(struct fy_node *fyn, enum fy_error_type type, + const char *fmt, va_list ap) +{ + if (!fyn || !fyn->fyd) + return; + + fy_diag_node_vreport(fyn->fyd->diag, fyn, type, fmt, ap); +} + +void fy_node_report(struct fy_node *fyn, enum fy_error_type type, + const char *fmt, ...) +{ + va_list ap; + + va_start(ap, fmt); + fy_node_vreport(fyn, type, fmt, ap); + va_end(ap); +} + +void fy_node_override_vreport(struct fy_node *fyn, enum fy_error_type type, + const char *file, int line, int column, + const char *fmt, va_list ap) +{ + if (!fyn || !fyn->fyd) + return; + + fy_diag_node_override_vreport(fyn->fyd->diag, fyn, type, + file, line, column, fmt, ap); +} + +void fy_node_override_report(struct fy_node *fyn, enum fy_error_type type, + const char *file, int line, int column, + const char *fmt, ...) +{ + va_list ap; + + va_start(ap, fmt); + fy_node_override_vreport(fyn, type, file, line, column, fmt, ap); + va_end(ap); +} diff --git a/contrib/libs/libfyaml/src/lib/fy-diag.h b/contrib/libs/libfyaml/src/lib/fy-diag.h new file mode 100644 index 0000000000..2834c0a041 --- /dev/null +++ b/contrib/libs/libfyaml/src/lib/fy-diag.h @@ -0,0 +1,730 @@ +/* + * fy-diag.h - diagnostics + * + * Copyright (c) 2019 Pantelis Antoniou <pantelis.antoniou@konsulko.com> + * + * SPDX-License-Identifier: MIT + */ +#ifndef FY_DIAG_H +#define FY_DIAG_H + +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif + +#include <stdint.h> +#include <stdbool.h> +#include <stdio.h> +#include <stdarg.h> + +#include <libfyaml.h> + +#include "fy-list.h" +#include "fy-token.h" + +#if !defined(NDEBUG) && defined(HAVE_DEVMODE) && HAVE_DEVMODE +#define FY_DEVMODE +#else +#undef FY_DEVMODE +#endif + +/* error flags (above 0x100 is library specific) */ +#define FYEF_SOURCE 0x0001 +#define FYEF_POSITION 0x0002 +#define FYEF_TYPE 0x0004 +#define FYEF_USERSTART 0x0100 + +#define FYDF_LEVEL_SHIFT 0 +#define FYDF_LEVEL_MASK (0x0f << FYDF_LEVEL_SHIFT) +#define FYDF_LEVEL(x) (((unsigned int)(x) << FYDF_LEVEL_SHIFT) & FYDF_LEVEL_MASK) +#define FYDF_DEBUG FYDF_LEVEL(FYET_DEBUG) +#define FYDF_INFO FYDF_LEVEL(FYET_INFO) +#define FYDF_NOTICE FYDF_LEVEL(FYET_NOTICE) +#define FYDF_WARNING FYDF_LEVEL(FYET_WARNING) +#define FYDF_ERROR FYDF_LEVEL(FYET_ERROR) + +#define FYDF_MODULE_SHIFT 4 +#define FYDF_MODULE_MASK (0x0f << FYDF_MODULE_SHIFT) +#define FYDF_MODULE(x) (((unsigned int)(x) << FYDF_MODULE_SHIFT) & FYDF_MODULE_MASK) +#define FYDF_ATOM FYDF_MODULE(FYEM_ATOM) +#define FYDF_SCANNER FYDF_MODULE(FYEM_SCANNER) +#define FYDF_PARSER FYDF_MODULE(FYEM_PARSER) +#define FYDF_TREE FYDF_MODULE(FYEM_TREE) +#define FYDF_BUILDER FYDF_MODULE(FYEM_BUILDER) +#define FYDF_INTERNAL FYDF_MODULE(FYEM_INTERNAL) +#define FYDF_SYSTEM FYDF_MODULE(FYEM_SYSTEM) +#define FYDF_MODULE_USER_MASK 7 +#define FYDF_MODULE_USER(x) FYDF_MODULE(8 + ((x) & FYDF_MODULE_USER_MASK)) + +struct fy_diag_term_info { + int rows; + int columns; +}; + +struct fy_diag_report_ctx { + enum fy_error_type type; + enum fy_error_module module; + struct fy_token *fyt; + bool has_override; + const char *override_file; + int override_line; + int override_column; +}; + +FY_TYPE_FWD_DECL_LIST(diag_errorp); +struct fy_diag_errorp { + struct fy_list_head node; + char *space; + struct fy_diag_error e; +}; +FY_TYPE_DECL_LIST(diag_errorp); + +struct fy_diag { + struct fy_diag_cfg cfg; + int refs; + bool on_error : 1; + bool destroyed : 1; + bool collect_errors : 1; + bool terminal_probed : 1; + struct fy_diag_term_info term_info; + struct fy_diag_errorp_list errors; +}; + +void fy_diag_free(struct fy_diag *diag); + +void fy_diag_vreport(struct fy_diag *diag, + const struct fy_diag_report_ctx *fydrc, + const char *fmt, va_list ap); +void fy_diag_report(struct fy_diag *diag, + const struct fy_diag_report_ctx *fydrc, + const char *fmt, ...) + FY_ATTRIBUTE(format(printf, 3, 4)); + +#ifdef FY_DEVMODE +#define __FY_DEBUG_UNUSED__ /* nothing */ +#else +#define __FY_DEBUG_UNUSED__ FY_ATTRIBUTE(__unused__) +#endif + +/* parser diagnostics */ + +struct fy_parser; + +void fy_diag_cfg_from_parser_flags(struct fy_diag_cfg *cfg, enum fy_parse_cfg_flags pflags); + +int fy_parser_vdiag(struct fy_parser *fyp, unsigned int flags, + const char *file, int line, const char *func, + const char *fmt, va_list ap); + +int fy_parser_diag(struct fy_parser *fyp, unsigned int flags, + const char *file, int line, const char *func, + const char *fmt, ...) + FY_ATTRIBUTE(format(printf, 6, 7)); + +void fy_diag_error_atom_display(struct fy_diag *diag, enum fy_error_type type, + struct fy_atom *atom); +void fy_diag_error_token_display(struct fy_diag *diag, enum fy_error_type type, + struct fy_token *fyt); + +void fy_parser_diag_vreport(struct fy_parser *fyp, + const struct fy_diag_report_ctx *fydrc, + const char *fmt, va_list ap); +void fy_parser_diag_report(struct fy_parser *fyp, + const struct fy_diag_report_ctx *fydrc, + const char *fmt, ...) + FY_ATTRIBUTE(format(printf, 3, 4)); + +#ifdef FY_DEVMODE + +#define fyp_debug(_fyp, _module, _fmt, ...) \ + fy_parser_diag((_fyp), FYET_DEBUG | FYDF_MODULE(_module), \ + __FILE__, __LINE__, __func__, \ + (_fmt) , ## __VA_ARGS__) +#else + +#define fyp_debug(_fyp, _module, _fmt, ...) \ + do { } while(0) + +#endif + +#define fyp_info(_fyp, _fmt, ...) \ + fy_parser_diag((_fyp), FYET_INFO, __FILE__, __LINE__, __func__, \ + (_fmt) , ## __VA_ARGS__) +#define fyp_notice(_fyp, _fmt, ...) \ + fy_parser_diag((_fyp), FYET_NOTICE, __FILE__, __LINE__, __func__, \ + (_fmt) , ## __VA_ARGS__) +#define fyp_warning(_fyp, _fmt, ...) \ + fy_parser_diag((_fyp), FYET_WARNING, __FILE__, __LINE__, __func__, \ + (_fmt) , ## __VA_ARGS__) +#define fyp_error(_fyp, _fmt, ...) \ + fy_parser_diag((_fyp), FYET_ERROR, __FILE__, __LINE__, __func__, \ + (_fmt) , ## __VA_ARGS__) + +#define fyp_scan_debug(_fyp, _fmt, ...) \ + fyp_debug((_fyp), FYEM_SCAN, (_fmt) , ## __VA_ARGS__) +#define fyp_parse_debug(_fyp, _fmt, ...) \ + fyp_debug((_fyp), FYEM_PARSE, (_fmt) , ## __VA_ARGS__) +#define fyp_doc_debug(_fyp, _fmt, ...) \ + fyp_debug((_fyp), FYEM_DOC, (_fmt) , ## __VA_ARGS__) + +#define fyp_error_check(_fyp, _cond, _label, _fmt, ...) \ + do { \ + if (!(_cond)) { \ + fyp_error((_fyp), _fmt, ## __VA_ARGS__); \ + goto _label ; \ + } \ + } while(0) + +#define _FYP_TOKEN_DIAG(_fyp, _fyt, _type, _module, _fmt, ...) \ + do { \ + struct fy_diag_report_ctx _drc; \ + memset(&_drc, 0, sizeof(_drc)); \ + _drc.type = (_type); \ + _drc.module = (_module); \ + _drc.fyt = (_fyt); \ + fy_parser_diag_report((_fyp), &_drc, (_fmt) , ## __VA_ARGS__); \ + } while(0) + +#define FYP_TOKEN_DIAG(_fyp, _fyt, _type, _module, _fmt, ...) \ + _FYP_TOKEN_DIAG(_fyp, fy_token_ref(_fyt), _type, _module, _fmt, ## __VA_ARGS__) + +#define FYP_PARSE_DIAG(_fyp, _adv, _cnt, _type, _module, _fmt, ...) \ + _FYP_TOKEN_DIAG(_fyp, \ + fy_token_create(FYTT_INPUT_MARKER, \ + fy_fill_atom_at((_fyp), (_adv), (_cnt), \ + FY_ALLOCA(sizeof(struct fy_atom)))), \ + _type, _module, _fmt, ## __VA_ARGS__) + +#define FYP_MARK_DIAG(_fyp, _sm, _em, _type, _module, _fmt, ...) \ + _FYP_TOKEN_DIAG(_fyp, \ + fy_token_create(FYTT_INPUT_MARKER, \ + fy_fill_atom_mark(((_fyp)), (_sm), (_em), \ + FY_ALLOCA(sizeof(struct fy_atom)))), \ + _type, _module, _fmt, ## __VA_ARGS__) + +#define FYP_NODE_DIAG(_fyp, _fyn, _type, _module, _fmt, ...) \ + _FYP_TOKEN_DIAG(_fyp, fy_node_token(_fyn), _type, _module, _fmt, ## __VA_ARGS__) + +#define FYP_TOKEN_ERROR(_fyp, _fyt, _module, _fmt, ...) \ + FYP_TOKEN_DIAG(_fyp, _fyt, FYET_ERROR, _module, _fmt, ## __VA_ARGS__) + +#define FYP_PARSE_ERROR(_fyp, _adv, _cnt, _module, _fmt, ...) \ + FYP_PARSE_DIAG(_fyp, _adv, _cnt, FYET_ERROR, _module, _fmt, ## __VA_ARGS__) + +#define FYP_MARK_ERROR(_fyp, _sm, _em, _module, _fmt, ...) \ + FYP_MARK_DIAG(_fyp, _sm, _em, FYET_ERROR, _module, _fmt, ## __VA_ARGS__) + +#define FYP_NODE_ERROR(_fyp, _fyn, _module, _fmt, ...) \ + FYP_NODE_DIAG(_fyp, _fyn, FYET_ERROR, _module, _fmt, ## __VA_ARGS__) + +#define FYP_TOKEN_ERROR_CHECK(_fyp, _fyt, _module, _cond, _label, _fmt, ...) \ + do { \ + if (!(_cond)) { \ + FYP_TOKEN_ERROR(_fyp, _fyt, _module, _fmt, ## __VA_ARGS__); \ + goto _label; \ + } \ + } while(0) + +#define FYP_PARSE_ERROR_CHECK(_fyp, _adv, _cnt, _module, _cond, _label, _fmt, ...) \ + do { \ + if (!(_cond)) { \ + FYP_PARSE_ERROR(_fyp, _adv, _cnt, _module, _fmt, ## __VA_ARGS__); \ + goto _label; \ + } \ + } while(0) + +#define FYP_MARK_ERROR_CHECK(_fyp, _sm, _em, _module, _cond, _label, _fmt, ...) \ + do { \ + if (!(_cond)) { \ + FYP_MARK_ERROR(_fyp, _sm, _em, _module, _fmt, ## __VA_ARGS__); \ + goto _label; \ + } \ + } while(0) + +#define FYP_NODE_ERROR_CHECK(_fyp, _fyn, _module, _cond, _label, _fmt, ...) \ + do { \ + if (!(_cond)) { \ + FYP_NODE_ERROR(_fyp, _fyn, _module, _fmt, ## __VA_ARGS__); \ + goto _label; \ + } \ + } while(0) + +#define FYP_TOKEN_WARNING(_fyp, _fyt, _module, _fmt, ...) \ + FYP_TOKEN_DIAG(_fyp, _fyt, FYET_WARNING, _module, _fmt, ## __VA_ARGS__) + +#define FYP_PARSE_WARNING(_fyp, _adv, _cnt, _module, _fmt, ...) \ + FYP_PARSE_DIAG(_fyp, _adv, _cnt, FYET_WARNING, _module, _fmt, ## __VA_ARGS__) + +#define FYP_MARK_WARNING(_fyp, _sm, _em, _module, _fmt, ...) \ + FYP_MARK_DIAG(_fyp, _sm, _em, FYET_WARNING, _module, _fmt, ## __VA_ARGS__) + +#define FYP_NODE_WARNING(_fyp, _fyn, _type, _module, _fmt, ...) \ + FYP_NODE_DIAG(_fyp, _fyn, FYET_WARNING, _module, _fmt, ## __VA_ARGS__) + +/* reader diagnostics */ + +struct fy_reader; + +int fy_reader_vdiag(struct fy_reader *fyr, unsigned int flags, + const char *file, int line, const char *func, + const char *fmt, va_list ap); + +int fy_reader_diag(struct fy_reader *fyr, unsigned int flags, + const char *file, int line, const char *func, + const char *fmt, ...) + FY_ATTRIBUTE(format(printf, 6, 7)); + +void fy_reader_diag_vreport(struct fy_reader *fyr, + const struct fy_diag_report_ctx *fydrc, + const char *fmt, va_list ap); +void fy_reader_diag_report(struct fy_reader *fyr, + const struct fy_diag_report_ctx *fydrc, + const char *fmt, ...) + FY_ATTRIBUTE(format(printf, 3, 4)); + +#ifdef FY_DEVMODE + +#define fyr_debug(_fyr, _fmt, ...) \ + fy_reader_diag((_fyr), FYET_DEBUG, \ + __FILE__, __LINE__, __func__, \ + (_fmt) , ## __VA_ARGS__) +#else + +#define fyr_debug(_fyr, _fmt, ...) \ + do { } while(0) + +#endif + +#define fyr_info(_fyr, _fmt, ...) \ + fy_reader_diag((_fyr), FYET_INFO, __FILE__, __LINE__, __func__, \ + (_fmt) , ## __VA_ARGS__) +#define fyr_notice(_fyr, _fmt, ...) \ + fy_reader_diag((_fyr), FYET_NOTICE, __FILE__, __LINE__, __func__, \ + (_fmt) , ## __VA_ARGS__) +#define fyr_warning(_fyr, _fmt, ...) \ + fy_reader_diag((_fyr), FYET_WARNING, __FILE__, __LINE__, __func__, \ + (_fmt) , ## __VA_ARGS__) +#define fyr_error(_fyr, _fmt, ...) \ + fy_reader_diag((_fyr), FYET_ERROR, __FILE__, __LINE__, __func__, \ + (_fmt) , ## __VA_ARGS__) + +#define fyr_error_check(_fyr, _cond, _label, _fmt, ...) \ + do { \ + if (!(_cond)) { \ + fyr_error((_fyr), _fmt, ## __VA_ARGS__); \ + goto _label ; \ + } \ + } while(0) + +#define _FYR_TOKEN_DIAG(_fyr, _fyt, _type, _module, _fmt, ...) \ + do { \ + struct fy_diag_report_ctx _drc; \ + memset(&_drc, 0, sizeof(_drc)); \ + _drc.type = (_type); \ + _drc.module = (_module); \ + _drc.fyt = (_fyt); \ + fy_reader_diag_report((_fyr), &_drc, (_fmt) , ## __VA_ARGS__); \ + } while(0) + +#define FYR_TOKEN_DIAG(_fyr, _fyt, _type, _module, _fmt, ...) \ + _FYR_TOKEN_DIAG(_fyr, fy_token_ref(_fyt), _type, _module, _fmt, ## __VA_ARGS__) + +#define FYR_PARSE_DIAG(_fyr, _adv, _cnt, _type, _module, _fmt, ...) \ + _FYR_TOKEN_DIAG(_fyr, \ + fy_token_create(FYTT_INPUT_MARKER, \ + fy_reader_fill_atom_at((_fyr), (_adv), (_cnt), \ + FY_ALLOCA(sizeof(struct fy_atom)))), \ + _type, _module, _fmt, ## __VA_ARGS__) + +#define FYR_MARK_DIAG(_fyr, _sm, _em, _type, _module, _fmt, ...) \ + _FYR_TOKEN_DIAG(_fyr, \ + fy_token_create(FYTT_INPUT_MARKER, \ + fy_reader_fill_atom_mark(((_fyr)), (_sm), (_em), \ + FY_ALLOCA(sizeof(struct fy_atom)))), \ + _type, _module, _fmt, ## __VA_ARGS__) + +#define FYR_NODE_DIAG(_fyr, _fyn, _type, _module, _fmt, ...) \ + _FYR_TOKEN_DIAG(_fyr, fy_node_token(_fyn), _type, _module, _fmt, ## __VA_ARGS__) + +#define FYR_TOKEN_ERROR(_fyr, _fyt, _module, _fmt, ...) \ + FYR_TOKEN_DIAG(_fyr, _fyt, FYET_ERROR, _module, _fmt, ## __VA_ARGS__) + +#define FYR_PARSE_ERROR(_fyr, _adv, _cnt, _module, _fmt, ...) \ + FYR_PARSE_DIAG(_fyr, _adv, _cnt, FYET_ERROR, _module, _fmt, ## __VA_ARGS__) + +#define FYR_MARK_ERROR(_fyr, _sm, _em, _module, _fmt, ...) \ + FYR_MARK_DIAG(_fyr, _sm, _em, FYET_ERROR, _module, _fmt, ## __VA_ARGS__) + +#define FYR_NODE_ERROR(_fyr, _fyn, _module, _fmt, ...) \ + FYR_NODE_DIAG(_fyr, _fyn, FYET_ERROR, _module, _fmt, ## __VA_ARGS__) + +#define FYR_TOKEN_ERROR_CHECK(_fyr, _fyt, _module, _cond, _label, _fmt, ...) \ + do { \ + if (!(_cond)) { \ + FYR_TOKEN_ERROR(_fyr, _fyt, _module, _fmt, ## __VA_ARGS__); \ + goto _label; \ + } \ + } while(0) + +#define FYR_PARSE_ERROR_CHECK(_fyr, _adv, _cnt, _module, _cond, _label, _fmt, ...) \ + do { \ + if (!(_cond)) { \ + FYR_PARSE_ERROR(_fyr, _adv, _cnt, _module, _fmt, ## __VA_ARGS__); \ + goto _label; \ + } \ + } while(0) + +#define FYR_MARK_ERROR_CHECK(_fyr, _sm, _em, _module, _cond, _label, _fmt, ...) \ + do { \ + if (!(_cond)) { \ + FYR_MARK_ERROR(_fyr, _sm, _em, _module, _fmt, ## __VA_ARGS__); \ + goto _label; \ + } \ + } while(0) + +#define FYR_NODE_ERROR_CHECK(_fyr, _fyn, _module, _cond, _label, _fmt, ...) \ + do { \ + if (!(_cond)) { \ + FYR_NODE_ERROR(_fyr, _fyn, _module, _fmt, ## __VA_ARGS__); \ + goto _label; \ + } \ + } while(0) + +#define FYR_TOKEN_WARNING(_fyr, _fyt, _module, _fmt, ...) \ + FYR_TOKEN_DIAG(_fyr, _fyt, FYET_WARNING, _module, _fmt, ## __VA_ARGS__) + +#define FYR_PARSE_WARNING(_fyr, _adv, _cnt, _module, _fmt, ...) \ + FYR_PARSE_DIAG(_fyr, _adv, _cnt, FYET_WARNING, _module, _fmt, ## __VA_ARGS__) + +#define FYR_MARK_WARNING(_fyr, _sm, _em, _module, _fmt, ...) \ + FYR_MARK_DIAG(_fyr, _sm, _em, FYET_WARNING, _module, _fmt, ## __VA_ARGS__) + +#define FYR_NODE_WARNING(_fyr, _fyn, _type, _module, _fmt, ...) \ + FYR_NODE_DIAG(_fyr, _fyn, FYET_WARNING, _module, _fmt, ## __VA_ARGS__) + +/* doc */ +struct fy_document; + +int fy_document_vdiag(struct fy_document *fyd, unsigned int flags, + const char *file, int line, const char *func, + const char *fmt, va_list ap); + +int fy_document_diag(struct fy_document *fyd, unsigned int flags, + const char *file, int line, const char *func, + const char *fmt, ...) + FY_ATTRIBUTE(format(printf, 6, 7)); + +void fy_document_diag_vreport(struct fy_document *fyd, + const struct fy_diag_report_ctx *fydrc, + const char *fmt, va_list ap); +void fy_document_diag_report(struct fy_document *fyd, + const struct fy_diag_report_ctx *fydrc, + const char *fmt, ...) + FY_ATTRIBUTE(format(printf, 3, 4)); + +#ifdef FY_DEVMODE + +#define fyd_debug(_fyd, _module, _fmt, ...) \ + fy_document_diag((_fyd), FYET_DEBUG | FYDF_MODULE(_module), \ + __FILE__, __LINE__, __func__, \ + (_fmt) , ## __VA_ARGS__) + +#else + +#define fyd_debug(_fyd, _module, _fmt, ...) \ + do { } while(0) + +#endif + +#define fyd_info(_fyd, _fmt, ...) \ + fy_document_diag((_fyd), FYET_INFO, __FILE__, __LINE__, __func__, \ + (_fmt) , ## __VA_ARGS__) +#define fyd_notice(_fyd, _fmt, ...) \ + fy_document_diag((_fyd), FYET_NOTICE, __FILE__, __LINE__, __func__, \ + (_fmt) , ## __VA_ARGS__) +#define fyd_warning(_fyd, _fmt, ...) \ + fy_document_diag((_fyd), FYET_WARNING, __FILE__, __LINE__, __func__, \ + (_fmt) , ## __VA_ARGS__) +#define fyd_error(_fyd, _fmt, ...) \ + fy_document_diag((_fyd), FYET_ERROR, __FILE__, __LINE__, __func__, \ + (_fmt) , ## __VA_ARGS__) + +#define fyd_doc_debug(_fyd, _fmt, ...) \ + fyd_debug((_fyd), FYEM_DOC, (_fmt) , ## __VA_ARGS__) + +#define fyd_error_check(_fyd, _cond, _label, _fmt, ...) \ + do { \ + if (!(_cond)) { \ + fyd_error((_fyd), _fmt, ## __VA_ARGS__); \ + goto _label ; \ + } \ + } while(0) + +#define _FYD_TOKEN_DIAG(_fyd, _fyt, _type, _module, _fmt, ...) \ + do { \ + struct fy_diag_report_ctx _drc; \ + memset(&_drc, 0, sizeof(_drc)); \ + _drc.type = (_type); \ + _drc.module = (_module); \ + _drc.fyt = (_fyt); \ + fy_document_diag_report((_fyd), &_drc, (_fmt) , ## __VA_ARGS__); \ + } while(0) + +#define FYD_TOKEN_DIAG(_fyd, _fyt, _type, _module, _fmt, ...) \ + _FYD_TOKEN_DIAG(_fyd, fy_token_ref(_fyt), _type, _module, _fmt, ## __VA_ARGS__) + +#define FYD_NODE_DIAG(_fyd, _fyn, _type, _module, _fmt, ...) \ + _FYD_TOKEN_DIAG(_fyd, fy_node_token(_fyn), _type, _module, _fmt, ## __VA_ARGS__) + +#define FYD_TOKEN_ERROR(_fyd, _fyt, _module, _fmt, ...) \ + FYD_TOKEN_DIAG(_fyd, _fyt, FYET_ERROR, _module, _fmt, ## __VA_ARGS__) + +#define FYD_NODE_ERROR(_fyd, _fyn, _module, _fmt, ...) \ + FYD_NODE_DIAG(_fyd, _fyn, FYET_ERROR, _module, _fmt, ## __VA_ARGS__) + +#define FYD_TOKEN_ERROR_CHECK(_fyd, _fyt, _module, _cond, _label, _fmt, ...) \ + do { \ + if (!(_cond)) { \ + FYD_TOKEN_ERROR(_fyd, _fyt, _module, _fmt, ## __VA_ARGS__); \ + goto _label; \ + } \ + } while(0) + +#define FYD_NODE_ERROR_CHECK(_fyd, _fyn, _module, _cond, _label, _fmt, ...) \ + do { \ + if (!(_cond)) { \ + FYD_NODE_ERROR(_fyd, _fyn, _module, _fmt, ## __VA_ARGS__); \ + goto _label; \ + } \ + } while(0) + +#define FYD_TOKEN_WARNING(_fyd, _fyt, _module, _fmt, ...) \ + FYD_TOKEN_DIAG(_fyd, _fyt, FYET_WARNING, _module, _fmt, ## __VA_ARGS__) + +#define FYD_NODE_WARNING(_fyd, _fyn, _type, _module, _fmt, ...) \ + FYD_NODE_DIAG(_fyd, _fyn, FYET_WARNING, _module, _fmt, ## __VA_ARGS__) + +/* composer */ +struct fy_composer; + +int fy_composer_vdiag(struct fy_composer *fyc, unsigned int flags, + const char *file, int line, const char *func, + const char *fmt, va_list ap); + +int fy_composer_diag(struct fy_composer *fyc, unsigned int flags, + const char *file, int line, const char *func, + const char *fmt, ...) + FY_ATTRIBUTE(format(printf, 6, 7)); + +void fy_composer_diag_vreport(struct fy_composer *fyc, + const struct fy_diag_report_ctx *fydrc, + const char *fmt, va_list ap); +void fy_composer_diag_report(struct fy_composer *fyc, + const struct fy_diag_report_ctx *fydrc, + const char *fmt, ...) + FY_ATTRIBUTE(format(printf, 3, 4)); + +#ifdef FY_DEVMODE + +#define fyc_debug(_fyc, _module, _fmt, ...) \ + fy_composer_diag((_fyc), FYET_DEBUG | FYDF_MODULE(_module), \ + __FILE__, __LINE__, __func__, \ + (_fmt) , ## __VA_ARGS__) + +#else + +#define fyc_debug(_fyc, _module, _fmt, ...) \ + do { } while(0) + +#endif + +#define fyc_info(_fyc, _fmt, ...) \ + fy_composer_diag((_fyc), FYET_INFO, __FILE__, __LINE__, __func__, \ + (_fmt) , ## __VA_ARGS__) +#define fyc_notice(_fyc, _fmt, ...) \ + fy_composer_diag((_fyc), FYET_NOTICE, __FILE__, __LINE__, __func__, \ + (_fmt) , ## __VA_ARGS__) +#define fyc_warning(_fyc, _fmt, ...) \ + fy_composer_diag((_fyc), FYET_WARNING, __FILE__, __LINE__, __func__, \ + (_fmt) , ## __VA_ARGS__) +#define fyc_error(_fyc, _fmt, ...) \ + fy_composer_diag((_fyc), FYET_ERROR, __FILE__, __LINE__, __func__, \ + (_fmt) , ## __VA_ARGS__) + +#define fyc_error_check(_fyc, _cond, _label, _fmt, ...) \ + do { \ + if (!(_cond)) { \ + fyc_error((_fyc), _fmt, ## __VA_ARGS__); \ + goto _label ; \ + } \ + } while(0) + +#define _FYC_TOKEN_DIAG(_fyc, _fyt, _type, _module, _fmt, ...) \ + do { \ + struct fy_diag_report_ctx _drc; \ + memset(&_drc, 0, sizeof(_drc)); \ + _drc.type = (_type); \ + _drc.module = (_module); \ + _drc.fyt = (_fyt); \ + fy_composer_diag_report((_fyc), &_drc, (_fmt) , ## __VA_ARGS__); \ + } while(0) + +#define FYC_TOKEN_DIAG(_fyc, _fyt, _type, _module, _fmt, ...) \ + _FYC_TOKEN_DIAG(_fyc, fy_token_ref(_fyt), _type, _module, _fmt, ## __VA_ARGS__) + +#define FYC_NODE_DIAG(_fyc, _fyn, _type, _module, _fmt, ...) \ + _FYC_TOKEN_DIAG(_fyc, fy_node_token(_fyn), _type, _module, _fmt, ## __VA_ARGS__) + +#define FYC_TOKEN_ERROR(_fyc, _fyt, _module, _fmt, ...) \ + FYC_TOKEN_DIAG(_fyc, _fyt, FYET_ERROR, _module, _fmt, ## __VA_ARGS__) + +#define FYC_TOKEN_ERROR_CHECK(_fyc, _fyt, _module, _cond, _label, _fmt, ...) \ + do { \ + if (!(_cond)) { \ + FYC_TOKEN_ERROR(_fyc, _fyt, _module, _fmt, ## __VA_ARGS__); \ + goto _label; \ + } \ + } while(0) + +#define FYC_TOKEN_WARNING(_fyc, _fyt, _module, _fmt, ...) \ + FYC_TOKEN_DIAG(_fyc, _fyt, FYET_WARNING, _module, _fmt, ## __VA_ARGS__) + +/* document builder */ +struct fy_document_builder; + +int fy_document_builder_vdiag(struct fy_document_builder *fydb, unsigned int flags, + const char *file, int line, const char *func, + const char *fmt, va_list ap); + +int fy_document_builder_diag(struct fy_document_builder *fydb, unsigned int flags, + const char *file, int line, const char *func, + const char *fmt, ...) + FY_ATTRIBUTE(format(printf, 6, 7)); + +void fy_document_builder_diag_vreport(struct fy_document_builder *fydb, + const struct fy_diag_report_ctx *fydrc, + const char *fmt, va_list ap); +void fy_document_builder_diag_report(struct fy_document_builder *fydb, + const struct fy_diag_report_ctx *fydrc, + const char *fmt, ...) + FY_ATTRIBUTE(format(printf, 3, 4)); + +#ifdef FY_DEVMODE + +#define fydb_debug(_fydb, _module, _fmt, ...) \ + fy_document_builder_diag((_fydb), FYET_DEBUG | FYDF_MODULE(_module), \ + __FILE__, __LINE__, __func__, \ + (_fmt) , ## __VA_ARGS__) + +#else + +#define fydb_debug(_fydb, _module, _fmt, ...) \ + do { } while(0) + +#endif + +#define fydb_info(_fydb, _fmt, ...) \ + fy_document_builder_diag((_fydb), FYET_INFO, __FILE__, __LINE__, __func__, \ + (_fmt) , ## __VA_ARGS__) +#define fydb_notice(_fydb, _fmt, ...) \ + fy_document_builder_diag((_fydb), FYET_NOTICE, __FILE__, __LINE__, __func__, \ + (_fmt) , ## __VA_ARGS__) +#define fydb_warning(_fydb, _fmt, ...) \ + fy_document_builder_diag((_fydb), FYET_WARNING, __FILE__, __LINE__, __func__, \ + (_fmt) , ## __VA_ARGS__) +#define fydb_error(_fydb, _fmt, ...) \ + fy_document_builder_diag((_fydb), FYET_ERROR, __FILE__, __LINE__, __func__, \ + (_fmt) , ## __VA_ARGS__) + +#define fydb_error_check(_fydb, _cond, _label, _fmt, ...) \ + do { \ + if (!(_cond)) { \ + fydb_error((_fydb), _fmt, ## __VA_ARGS__); \ + goto _label ; \ + } \ + } while(0) + +#define _FYDB_TOKEN_DIAG(_fydb, _fyt, _type, _module, _fmt, ...) \ + do { \ + struct fy_diag_report_ctx _drc; \ + memset(&_drc, 0, sizeof(_drc)); \ + _drc.type = (_type); \ + _drc.module = (_module); \ + _drc.fyt = (_fyt); \ + fy_document_builder_diag_report((_fydb), &_drc, (_fmt) , ## __VA_ARGS__); \ + } while(0) + +#define FYDB_TOKEN_DIAG(_fydb, _fyt, _type, _module, _fmt, ...) \ + _FYDB_TOKEN_DIAG(_fydb, fy_token_ref(_fyt), _type, _module, _fmt, ## __VA_ARGS__) + +#define FYDB_NODE_DIAG(_fydb, _fyn, _type, _module, _fmt, ...) \ + _FYDB_TOKEN_DIAG(_fydb, fy_node_token(_fyn), _type, _module, _fmt, ## __VA_ARGS__) + +#define FYDB_TOKEN_ERROR(_fydb, _fyt, _module, _fmt, ...) \ + FYDB_TOKEN_DIAG(_fydb, _fyt, FYET_ERROR, _module, _fmt, ## __VA_ARGS__) + +#define FYDB_NODE_ERROR(_fydb, _fyn, _module, _fmt, ...) \ + FYDB_NODE_DIAG(_fydb, _fyn, FYET_ERROR, _module, _fmt, ## __VA_ARGS__) + +#define FYDB_TOKEN_ERROR_CHECK(_fydb, _fyt, _module, _cond, _label, _fmt, ...) \ + do { \ + if (!(_cond)) { \ + FYDB_TOKEN_ERROR(_fydb, _fyt, _module, _fmt, ## __VA_ARGS__); \ + goto _label; \ + } \ + } while(0) + +#define FYDB_NODE_ERROR_CHECK(_fydb, _fyn, _module, _cond, _label, _fmt, ...) \ + do { \ + if (!(_cond)) { \ + FYDB_NODE_ERROR(_fydb, _fyn, _module, _fmt, ## __VA_ARGS__); \ + goto _label; \ + } \ + } while(0) + +#define FYDB_TOKEN_WARNING(_fydb, _fyt, _module, _fmt, ...) \ + FYDB_TOKEN_DIAG(_fydb, _fyt, FYET_WARNING, _module, _fmt, ## __VA_ARGS__) + +/* alloca formatted print methods */ +#define alloca_vsprintf(_res, _fmt, _ap) \ + do { \ + const char *__fmt = (_fmt); \ + va_list _ap_orig; \ + int _size; \ + int _sizew __FY_DEBUG_UNUSED__; \ + char *_buf = NULL, *_s; \ + \ + va_copy(_ap_orig, (_ap)); \ + _size = vsnprintf(NULL, 0, __fmt, _ap_orig); \ + va_end(_ap_orig); \ + if (_size != -1) { \ + _buf = FY_ALLOCA(_size + 1); \ + _sizew = vsnprintf(_buf, _size + 1, __fmt, _ap); \ + assert(_size == _sizew); \ + _s = _buf + strlen(_buf); \ + while (_s > _buf && _s[-1] == '\n') \ + *--_s = '\0'; \ + } \ + *(_res) = _buf; \ + } while(false) + +#define alloca_sprintf(_res, _fmt, ...) \ + do { \ + const char *__fmt = (_fmt); \ + int _size; \ + int _sizew __FY_DEBUG_UNUSED__; \ + char *_buf = NULL, *_s; \ + \ + _size = snprintf(NULL, 0, __fmt, ## __VA_ARGS__); \ + if (_size != -1) { \ + _buf = FY_ALLOCA(_size + 1); \ + _sizew = snprintf(_buf, _size + 1, __fmt, __VA_ARGS__); \ + assert(_size == _sizew); \ + _s = _buf + strlen(_buf); \ + while (_s > _buf && _s[-1] == '\n') \ + *--_s = '\0'; \ + } \ + *(_res) = _buf; \ + } while(false) + +#endif diff --git a/contrib/libs/libfyaml/src/lib/fy-doc.c b/contrib/libs/libfyaml/src/lib/fy-doc.c new file mode 100644 index 0000000000..8cb6a39b86 --- /dev/null +++ b/contrib/libs/libfyaml/src/lib/fy-doc.c @@ -0,0 +1,7340 @@ +/* + * fy-doc.c - YAML document methods + * + * Copyright (c) 2019 Pantelis Antoniou <pantelis.antoniou@konsulko.com> + * + * SPDX-License-Identifier: MIT + */ +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif + +#include <stdio.h> +#include <string.h> +#include <assert.h> +#include <stdlib.h> +#include <ctype.h> +#include <errno.h> +#if defined (__unix__) || (defined (__APPLE__) && defined (__MACH__)) +#include <unistd.h> +#endif + +#include <libfyaml.h> + +#include "fy-parse.h" +#include "fy-doc.h" + +#include "fy-utils.h" + +#define XXH_STATIC_LINKING_ONLY +#include "xxhash.h" + +static const struct fy_hash_desc hd_anchor; +static const struct fy_hash_desc hd_nanchor; +static const struct fy_hash_desc hd_mapping; + +int fy_node_hash_uint(struct fy_node *fyn, unsigned int *hashp); + +static struct fy_node * +fy_node_by_path_internal(struct fy_node *fyn, + const char *path, size_t pathlen, + enum fy_node_walk_flags flags); + +#define FY_NODE_PATH_WALK_DEPTH_DEFAULT 16 + +static inline unsigned int +fy_node_walk_max_depth_from_flags(enum fy_node_walk_flags flags) +{ + unsigned int max_depth; + + max_depth = ((unsigned int)flags >> FYNWF_MAXDEPTH_SHIFT) & FYNWF_MAXDEPTH_MASK; + if (max_depth == 0) + max_depth = FY_NODE_PATH_WALK_DEPTH_DEFAULT; + + return max_depth; +} + +static inline unsigned int +fy_node_walk_marker_from_flags(enum fy_node_walk_flags flags) +{ + return ((unsigned int)flags >> FYNWF_MARKER_SHIFT) & FYNWF_MARKER_MASK; +} + +/* internal simple key to optimize string lookups */ +static inline bool is_simple_key(const char *str, size_t len) +{ + const char *s, *e; + char c; + + if (!str) + return false; + + if (len == (size_t)-1) + len = strlen(str); + + for (s = str, e = s + len; s < e; s++) { + + c = *s; + + /* note no isalpha() it's locale specific */ + if (!((c >= 'A' && c <= 'Z') || + (c >= 'a' && c <= 'z') || + (c >= '0' && c <= '9') || + (c == '_'))) + break; + } + + return s == e; +} + +static void fy_resolve_parent_node(struct fy_document *fyd, struct fy_node *fyn, struct fy_node *fyn_parent); + +void fy_anchor_destroy(struct fy_anchor *fya) +{ + if (!fya) + return; + fy_token_unref(fya->anchor); + free(fya); +} + +struct fy_anchor *fy_anchor_create(struct fy_document *fyd, + struct fy_node *fyn, struct fy_token *anchor) +{ + struct fy_anchor *fya = NULL; + + fya = malloc(sizeof(*fya)); + if (!fya) + return NULL; + + fya->fyn = fyn; + fya->anchor = anchor; + fya->multiple = false; + + return fya; +} + +struct fy_anchor *fy_document_anchor_iterate(struct fy_document *fyd, void **prevp) +{ + struct fy_anchor_list *fyal; + + if (!fyd || !prevp) + return NULL; + + fyal = &fyd->anchors; + + return *prevp = *prevp ? fy_anchor_next(fyal, *prevp) : fy_anchor_list_head(fyal); +} + +#define FYDSAF_COPY FY_BIT(0) +#define FYDSAF_MALLOCED FY_BIT(1) + +static int fy_document_set_anchor_internal(struct fy_document *fyd, struct fy_node *fyn, const char *text, size_t len, + unsigned int flags) +{ + const bool copy = !!(flags & FYDSAF_COPY); + const bool malloced = !!(flags & FYDSAF_MALLOCED); + struct fy_anchor *fya = NULL, *fyam = NULL; + struct fy_input *fyi = NULL; + struct fy_token *fyt = NULL; + struct fy_accel_entry *xle; + struct fy_atom handle; + char *data_copy = NULL; + const char *origtext; + size_t origlen; + int rc; + + if (!fyd || !fyn || fyn->fyd != fyd) + return -1; + + if (text && len == (size_t)-1) + len = strlen(text); + + fya = fy_document_lookup_anchor_by_node(fyd, fyn); + + if (!text) { + /* no anchor, and trying to delete? OK */ + if (fya) + return 0; + /* remove the anchor */ + fy_anchor_list_del(&fyd->anchors, fya); + + if (fy_document_is_accelerated(fyd)) { + xle = fy_accel_entry_lookup_key_value(fyd->axl, fya->anchor, fya); + fy_accel_entry_remove(fyd->axl, xle); + + xle = fy_accel_entry_lookup_key_value(fyd->naxl, fya->fyn, fya); + fy_accel_entry_remove(fyd->naxl, xle); + } + + fy_anchor_destroy(fya); + return 0; + } + + /* trying to add duplicate anchor */ + if (fya) { + origtext = fy_token_get_text(fya->anchor, &origlen); + fyd_error_check(fyd, origtext, err_out, + "fy_token_get_text() failed"); + + FYD_NODE_ERROR(fyd, fyn, FYEM_DOC, + "cannot set anchor %.*s (anchor %.*s already exists)", + (int)len, text, (int)origlen, origtext); + if (malloced && text) + free((void *)text); + fya = NULL; + goto err_out; + } + + if (copy) { + data_copy = malloc(len); + fyd_error_check(fyd, data_copy, err_out, + "malloc() failed"); + memcpy(data_copy, text, len); + fyi = fy_input_from_malloc_data(data_copy, len, &handle, true); + } else if (malloced) + data_copy = (char *)text; + else + data_copy = NULL; + + if (data_copy) + fyi = fy_input_from_malloc_data((void *)text, len, &handle, true); + else + fyi = fy_input_from_data(text, len, &handle, true); + fyd_error_check(fyd, fyi, err_out, + "fy_input_from_data() failed"); + data_copy = NULL; + + /* it must not be something funky */ + if (!handle.valid_anchor) + goto err_out; + + fyt = fy_token_create(FYTT_ANCHOR, &handle); + if (!fyt) + goto err_out; + + fya = fy_anchor_create(fyd, fyn, fyt); + if (!fya) + goto err_out; + + fy_anchor_list_add(&fyd->anchors, fya); + if (fy_document_is_accelerated(fyd)) { + xle = fy_accel_entry_lookup(fyd->axl, fya->anchor); + if (xle) { + fyam = (void *)xle->value; + /* multiple */ + if (!fyam->multiple) + fyam->multiple = true; + fya->multiple = true; + + fyd_notice(fyd, "register anchor %.*s is multiple", (int)len, text); + } + + xle = fy_accel_entry_insert(fyd->axl, fya->anchor, fya); + fyd_error_check(fyd, xle, err_out, + "fy_accel_entry_insert() fyd->axl failed"); + } + + if (fy_document_is_accelerated(fyd)) { + rc = fy_accel_insert(fyd->naxl, fyn, fya); + fyd_error_check(fyd, !rc, err_out_rc, + "fy_accel_insert() fyd->naxl failed"); + } + + /* take away the input reference */ + fy_input_unref(fyi); + + return 0; +err_out: + rc = -1; +err_out_rc: + if (data_copy) + free(data_copy); + fy_anchor_destroy(fya); + fy_token_unref(fyt); + fy_input_unref(fyi); + fyd->diag->on_error = false; + return rc; +} + +int fy_document_set_anchor(struct fy_document *fyd, struct fy_node *fyn, const char *text, size_t len) +{ + return fy_document_set_anchor_internal(fyd, fyn, text, len, 0); +} + +int fy_node_set_anchor(struct fy_node *fyn, const char *text, size_t len) +{ + if (!fyn) + return -1; + return fy_document_set_anchor_internal(fyn->fyd, fyn, text, len, 0); +} + +int fy_node_set_anchor_copy(struct fy_node *fyn, const char *text, size_t len) +{ + if (!fyn) + return -1; + return fy_document_set_anchor_internal(fyn->fyd, fyn, text, len, FYDSAF_COPY); +} + +int fy_node_set_vanchorf(struct fy_node *fyn, const char *fmt, va_list ap) +{ + char *str; + + if (!fyn || !fmt) + return -1; + + alloca_vsprintf(&str, fmt, ap); + return fy_document_set_anchor_internal(fyn->fyd, fyn, str, FY_NT, FYDSAF_COPY); +} + +int fy_node_set_anchorf(struct fy_node *fyn, const char *fmt, ...) +{ + va_list ap; + int ret; + + va_start(ap, fmt); + ret = fy_node_set_vanchorf(fyn, fmt, ap); + va_end(ap); + + return ret; +} + +int fy_node_remove_anchor(struct fy_node *fyn) +{ + return fy_node_set_anchor(fyn, NULL, 0); +} + +struct fy_anchor *fy_node_get_anchor(struct fy_node *fyn) +{ + if (!fyn) + return NULL; + + return fy_document_lookup_anchor_by_node(fyn->fyd, fyn); +} + +struct fy_anchor *fy_node_get_nearest_anchor(struct fy_node *fyn) +{ + struct fy_anchor *fya; + struct fy_node *fynt; + + while ((fya = fy_node_get_anchor(fyn)) == NULL && (fynt = fy_node_get_parent(fyn))) + fyn = fynt; + + return fya; +} + +struct fy_node *fy_node_get_nearest_child_of(struct fy_node *fyn_base, + struct fy_node *fyn) +{ + struct fy_node *fynp; + + if (!fyn) + return NULL; + if (!fyn_base) + fyn_base = fy_document_root(fy_node_document(fyn)); + if (!fyn_base) + return NULL; + + /* move up until we hit a node that's a child of fyn_base */ + fynp = fyn; + while (fyn && (fynp = fy_node_get_parent(fyn)) != NULL && fyn_base != fynp) + fyn = fynp; + + return fyn; +} + +void fy_parse_document_destroy(struct fy_parser *fyp, struct fy_document *fyd) +{ + struct fy_node *fyn; + struct fy_anchor *fya; + struct fy_anchor *fyan; + struct fy_accel_entry *xle; + + if (!fyd) + return; + + fy_document_cleanup_path_expr_data(fyd); + + fyn = fyd->root; + fyd->root = NULL; + fy_node_detach_and_free(fyn); + + /* remove all anchors */ + for (fya = fy_anchor_list_head(&fyd->anchors); fya; fya = fyan) { + fyan = fy_anchor_next(&fyd->anchors, fya); + fy_anchor_list_del(&fyd->anchors, fya); + + if (fy_document_is_accelerated(fyd)) { + xle = fy_accel_entry_lookup_key_value(fyd->axl, fya->anchor, fya); + fy_accel_entry_remove(fyd->axl, xle); + + xle = fy_accel_entry_lookup_key_value(fyd->naxl, fya->fyn, fya); + fy_accel_entry_remove(fyd->naxl, xle); + } + + fy_anchor_destroy(fya); + } + + if (fy_document_is_accelerated(fyd)) { + fy_accel_cleanup(fyd->axl); + free(fyd->axl); + + fy_accel_cleanup(fyd->naxl); + free(fyd->naxl); + } + + fy_document_state_unref(fyd->fyds); + + fy_diag_unref(fyd->diag); + + free(fyd); +} + +struct fy_document *fy_parse_document_create(struct fy_parser *fyp, struct fy_eventp *fyep) +{ + struct fy_document *fyd = NULL; + struct fy_document_state *fyds; + struct fy_event *fye = NULL; + int rc; + + if (!fyp || !fyep) + return NULL; + + fye = &fyep->e; + + FYP_TOKEN_ERROR_CHECK(fyp, fy_event_get_token(fye), FYEM_DOC, + fye->type == FYET_DOCUMENT_START, err_out, + "invalid start of event stream"); + + fyd = malloc(sizeof(*fyd)); + fyp_error_check(fyp, fyd, err_out, + "malloc() failed"); + + memset(fyd, 0, sizeof(*fyd)); + + fyd->diag = fy_diag_ref(fyp->diag); + fyd->parse_cfg = fyp->cfg; + + fy_anchor_list_init(&fyd->anchors); + if (fy_document_can_be_accelerated(fyd)) { + fyd->axl = malloc(sizeof(*fyd->axl)); + fyp_error_check(fyp, fyd->axl, err_out, + "malloc() failed"); + + /* start with a very small bucket list */ + rc = fy_accel_setup(fyd->axl, &hd_anchor, fyd, 8); + fyp_error_check(fyp, !rc, err_out, + "fy_accel_setup() failed"); + + fyd->naxl = malloc(sizeof(*fyd->naxl)); + fyp_error_check(fyp, fyd->axl, err_out, + "malloc() failed"); + + /* start with a very small bucket list */ + rc = fy_accel_setup(fyd->naxl, &hd_nanchor, fyd, 8); + fyp_error_check(fyp, !rc, err_out, + "fy_accel_setup() failed"); + } + + fyd->root = NULL; + + fyds = fye->document_start.document_state; + fye->document_start.document_state = NULL; + + /* and we're done with this event */ + fy_parse_eventp_recycle(fyp, fyep); + + /* drop the old reference */ + fy_document_state_unref(fyd->fyds); + + /* note that we keep the reference */ + fyd->fyds = fyds; + + fy_document_list_init(&fyd->children); + + return fyd; + +err_out: + fy_parse_document_destroy(fyp, fyd); + fy_parse_eventp_recycle(fyp, fyep); + fyd->diag->on_error = false; + return NULL; +} + +const struct fy_parse_cfg *fy_document_get_cfg(struct fy_document *fyd) +{ + if (!fyd) + return NULL; + return &fyd->parse_cfg; +} + +struct fy_diag *fy_document_get_diag(struct fy_document *fyd) +{ + if (!fyd || !fyd->diag) + return NULL; + return fy_diag_ref(fyd->diag); +} + +int fy_document_set_diag(struct fy_document *fyd, struct fy_diag *diag) +{ + struct fy_diag_cfg dcfg; + + if (!fyd) + return -1; + + /* default? */ + if (!diag) { + fy_diag_cfg_default(&dcfg); + diag = fy_diag_create(&dcfg); + if (!diag) + return -1; + } + + fy_diag_unref(fyd->diag); + fyd->diag = fy_diag_ref(diag); + + return 0; +} + +struct fy_document *fy_node_document(struct fy_node *fyn) +{ + return fyn ? fyn->fyd : NULL; +} + +static inline struct fy_anchor * +fy_document_accel_lookup_anchor_by_token(struct fy_document *fyd, struct fy_token *fyt) +{ + assert(fyd); + assert(fyd->axl); + return (void *)fy_accel_lookup(fyd->axl, fyt); +} + +static inline struct fy_anchor * +fy_document_accel_lookup_anchor_by_node(struct fy_document *fyd, struct fy_node *fyn) +{ + assert(fyd); + assert(fyd->naxl); + return (void *)fy_accel_lookup(fyd->naxl, fyn); +} + +static inline struct fy_node_pair * +fy_node_accel_lookup_by_node(struct fy_node *fyn, struct fy_node *fyn_key) +{ + assert(fyn); + assert(fyn->xl); + + return (void *)fy_accel_lookup(fyn->xl, (const void *)fyn_key); +} + +struct fy_anchor * +fy_document_lookup_anchor(struct fy_document *fyd, const char *anchor, size_t len) +{ + struct fy_anchor *fya; + struct fy_anchor_list *fyal; + struct fy_input *fyi; + struct fy_atom handle; + struct fy_token *fyt; + const char *text; + size_t text_len; + + if (!fyd || !anchor) + return NULL; + + if (len == (size_t)-1) + len = strlen(anchor); + + if (fy_document_is_accelerated(fyd)) { + fyi = fy_input_from_data(anchor, len, &handle, true); + if (!fyi) + return NULL; + + fyt = fy_token_create(FYTT_ANCHOR, &handle); + + if (!fyt) { + fy_input_unref(fyi); + return NULL; + } + + fya = fy_document_accel_lookup_anchor_by_token(fyd, fyt); + + fy_input_unref(fyi); + fy_token_unref(fyt); + + if (!fya) + return NULL; + + /* single anchor? return it */ + if (!fya->multiple) + return fya; + + /* multiple anchors, fall-through */ + } + + /* note that we're performing the lookup in reverse creation order + * so that we pick the most recent + */ + fyal = &fyd->anchors; + for (fya = fy_anchor_list_tail(fyal); fya; fya = fy_anchor_prev(fyal, fya)) { + text = fy_anchor_get_text(fya, &text_len); + if (!text) + return NULL; + + if (len == text_len && !memcmp(anchor, text, len)) + return fya; + } + + return NULL; +} + +struct fy_anchor * +fy_document_lookup_anchor_by_token(struct fy_document *fyd, + struct fy_token *anchor) +{ + struct fy_anchor *fya, *fya_found, *fya_found2; + struct fy_anchor_list *fyal; + const char *anchor_text, *text; + size_t anchor_len, text_len; + int count; + + if (!fyd || !anchor) + return NULL; + + /* first try direct match (it's faster and the common case) */ + if (fy_document_is_accelerated(fyd)) { + fya = fy_document_accel_lookup_anchor_by_token(fyd, anchor); + if (!fya) + return NULL; + + /* single anchor? return it */ + if (!fya->multiple) + return fya; + + /* multiple anchors, fall-through */ + } + + anchor_text = fy_token_get_text(anchor, &anchor_len); + if (!anchor_text) + return NULL; + + fyal = &fyd->anchors; + + /* first pass, try with a single match */ + count = 0; + fya_found = NULL; + for (fya = fy_anchor_list_head(fyal); fya; fya = fy_anchor_next(fyal, fya)) { + text = fy_anchor_get_text(fya, &text_len); + if (!text) + return NULL; + + if (anchor_len == text_len && !memcmp(anchor_text, text, anchor_len)) { + count++; + fya_found = fya; + } + } + + /* not found */ + if (!count) + return NULL; + + /* single one? fine */ + if (count == 1) + return fya_found; + + /* multiple ones, must pick the one that's the last one before + * the requesting token */ + /* fyd_notice(fyd, "multiple anchors for %.*s", (int)anchor_len, anchor_text); */ + + /* only try the ones on the same input + * we don't try to cover the case where the label is referenced + * by other constructed documents + */ + fya_found2 = NULL; + for (fya = fy_anchor_list_head(fyal); fya; fya = fy_anchor_next(fyal, fya)) { + + /* only on the same input */ + if (fy_token_get_input(fya->anchor) != fy_token_get_input(anchor)) + continue; + + text = fy_anchor_get_text(fya, &text_len); + if (!text) + return NULL; + + if (anchor_len == text_len && !memcmp(anchor_text, text, anchor_len) && + fy_token_start_pos(fya->anchor) < fy_token_start_pos(anchor)) { + fya_found2 = fya; + } + } + + /* just return the one find earlier */ + if (!fya_found2) + return fya_found; + + /* return the one that was the latest */ + return fya_found2; +} + +struct fy_anchor *fy_document_lookup_anchor_by_node(struct fy_document *fyd, struct fy_node *fyn) +{ + struct fy_anchor *fya; + struct fy_anchor_list *fyal; + + if (!fyd || !fyn) + return NULL; + + if (fy_document_is_accelerated(fyd)) { + fya = fy_document_accel_lookup_anchor_by_node(fyd, fyn); + } else { + fyal = &fyd->anchors; + for (fya = fy_anchor_list_head(fyal); fya; fya = fy_anchor_next(fyal, fya)) { + if (fya->fyn == fyn) + break; + } + } + + return fya; +} + +const char *fy_anchor_get_text(struct fy_anchor *fya, size_t *lenp) +{ + if (!fya || !lenp) + return NULL; + return fy_token_get_text(fya->anchor, lenp); +} + +struct fy_node *fy_anchor_node(struct fy_anchor *fya) +{ + if (!fya) + return NULL; + return fya->fyn; +} + +int fy_node_pair_free(struct fy_node_pair *fynp) +{ + int rc, rc_ret = 0; + + if (!fynp) + return 0; + + rc = fy_node_free(fynp->key); + if (rc) + rc_ret = -1; + rc = fy_node_free(fynp->value); + if (rc) + rc_ret = -1; + + free(fynp); + + return rc_ret; +} + +void fy_node_pair_detach_and_free(struct fy_node_pair *fynp) +{ + if (!fynp) + return; + + fy_node_detach_and_free(fynp->key); + fy_node_detach_and_free(fynp->value); + free(fynp); +} + +struct fy_node_pair *fy_node_pair_alloc(struct fy_document *fyd) +{ + struct fy_node_pair *fynp = NULL; + + fynp = malloc(sizeof(*fynp)); + if (!fynp) + return NULL; + + fynp->key = NULL; + fynp->value = NULL; + fynp->fyd = fyd; + fynp->parent = NULL; + return fynp; +} + +int fy_node_free(struct fy_node *fyn) +{ + struct fy_document *fyd; + struct fy_node *fyni; + struct fy_node_pair *fynp; + struct fy_anchor *fya, *fyan; + struct fy_accel_entry_iter xli; + struct fy_accel_entry *xle, *xlen; + + if (!fyn) + return 0; + + /* a document must exist */ + fyd = fyn->fyd; + if (!fyd) + return -1; + + if (fyn->attached) + return -1; + + if (fy_document_is_accelerated(fyd)) { + for (xle = fy_accel_entry_iter_start(&xli, fyd->naxl, fyn); + xle; xle = xlen) { + xlen = fy_accel_entry_iter_next(&xli); + + fya = (void *)xle->value; + + fy_anchor_list_del(&fyd->anchors, fya); + + xle = fy_accel_entry_lookup_key_value(fyd->axl, fya->anchor, fya); + fy_accel_entry_remove(fyd->axl, xle); + + xle = fy_accel_entry_lookup_key_value(fyd->naxl, fya->fyn, fya); + fy_accel_entry_remove(fyd->naxl, xle); + + fy_anchor_destroy(fya); + } + fy_accel_entry_iter_finish(&xli); + } else { + /* remove anchors that are located on this node */ + for (fya = fy_anchor_list_head(&fyd->anchors); fya; fya = fyan) { + fyan = fy_anchor_next(&fyd->anchors, fya); + if (fya->fyn == fyn) { + fy_anchor_list_del(&fyd->anchors, fya); + fy_anchor_destroy(fya); + } + } + } + + /* clear the meta data of this node */ + fy_node_clear_meta(fyn); + + fy_token_unref(fyn->tag); + fyn->tag = NULL; + switch (fyn->type) { + case FYNT_SCALAR: + fy_token_unref(fyn->scalar); + fyn->scalar = NULL; + break; + case FYNT_SEQUENCE: + while ((fyni = fy_node_list_pop(&fyn->sequence)) != NULL) + fy_node_detach_and_free(fyni); + fy_token_unref(fyn->sequence_start); + fy_token_unref(fyn->sequence_end); + fyn->sequence_start = NULL; + fyn->sequence_end = NULL; + break; + case FYNT_MAPPING: + while ((fynp = fy_node_pair_list_pop(&fyn->mapping)) != NULL) { + if (fyn->xl) + fy_accel_remove(fyn->xl, fynp->key); + fy_node_pair_detach_and_free(fynp); + } + fy_token_unref(fyn->mapping_start); + fy_token_unref(fyn->mapping_end); + fyn->mapping_start = NULL; + fyn->mapping_end = NULL; + break; + } + + if (fyn->xl) { + fy_accel_cleanup(fyn->xl); + free(fyn->xl); + } + + fy_node_cleanup_path_expr_data(fyn); + + free(fyn); + + return 0; +} + +void fy_node_detach_and_free(struct fy_node *fyn) +{ + int rc __FY_DEBUG_UNUSED__; + + if (!fyn || !fyn->fyd) + return; + + fyn->attached = false; + + /* it must always succeed */ + rc = fy_node_free(fyn); + assert(!rc); +} + +struct fy_node *fy_node_alloc(struct fy_document *fyd, enum fy_node_type type) +{ + struct fy_node *fyn = NULL; + int rc; + + fyn = malloc(sizeof(*fyn)); + if (!fyn) + return NULL; + + memset(fyn, 0, sizeof(*fyn)); + + fyn->style = FYNS_ANY; + fyn->fyd = fyd; + fyn->type = type; + + switch (fyn->type) { + case FYNT_SCALAR: + break; + + case FYNT_SEQUENCE: + fy_node_list_init(&fyn->sequence); + break; + case FYNT_MAPPING: + fy_node_pair_list_init(&fyn->mapping); + + if (fy_document_is_accelerated(fyd)) { + fyn->xl = malloc(sizeof(*fyn->xl)); + fyd_error_check(fyd, fyn->xl, err_out, + "malloc() failed"); + + /* start with a very small bucket list */ + rc = fy_accel_setup(fyn->xl, &hd_mapping, fyd, 8); + fyd_error_check(fyd, !rc, err_out, + "fy_accel_setup() failed"); + } + break; + } + return fyn; + +err_out: + if (fyn) { + if (fyn->xl) { + fy_accel_cleanup(fyn->xl); + free(fyn->xl); + } + free(fyn); + } + return NULL; +} + +struct fy_token *fy_node_non_synthesized_token(struct fy_node *fyn) +{ + struct fy_token *fyt_start = NULL, *fyt_end = NULL; + struct fy_token *fyt; + struct fy_input *fyi; + struct fy_atom handle; + unsigned int aflags; + const char *s, *e; + size_t size; + + if (!fyn) + return NULL; + + fyi = fy_node_get_input(fyn); + if (!fyi) + return NULL; + + switch (fyn->type) { + case FYNT_SCALAR: + return fy_token_ref(fyn->scalar); + + case FYNT_SEQUENCE: + fyt_start = fyn->sequence_start; + fyt_end = fyn->sequence_end; + break; + + case FYNT_MAPPING: + fyt_start = fyn->mapping_start; + fyt_end = fyn->mapping_end; + break; + + } + + if (!fyt_start || !fyt_end) + return NULL; + + s = (char *)fy_input_start(fyi) + fyt_start->handle.start_mark.input_pos; + e = (char *)fy_input_start(fyi) + fyt_end->handle.end_mark.input_pos; + size = (size_t)(e - s); + + if (size > 0) + aflags = fy_analyze_scalar_content(s, size, + fy_token_atom_json_mode(fyt_start), + fy_token_atom_lb_mode(fyt_start), + fy_token_atom_flow_ws_mode(fyt_start)); + else + aflags = FYACF_EMPTY | FYACF_FLOW_PLAIN | FYACF_BLOCK_PLAIN; + + memset(&handle, 0, sizeof(handle)); + handle.start_mark = fyt_start->handle.start_mark; + handle.end_mark = fyt_end->handle.end_mark; + + /* if it's plain, all is good */ + if (aflags & FYACF_FLOW_PLAIN) { + handle.storage_hint = size; /* maximum */ + handle.storage_hint_valid = false; + handle.direct_output = !!(aflags & FYACF_JSON_ESCAPE); /* direct only when no json escape */ + handle.style = FYAS_PLAIN; + } else { + handle.storage_hint = 0; /* just calculate */ + handle.storage_hint_valid = false; + handle.direct_output = false; + handle.style = FYAS_DOUBLE_QUOTED_MANUAL; + } + handle.empty = !!(aflags & FYACF_EMPTY); + handle.has_lb = !!(aflags & FYACF_LB); + handle.has_ws = !!(aflags & FYACF_WS); + handle.starts_with_ws = !!(aflags & FYACF_STARTS_WITH_WS); + handle.starts_with_lb = !!(aflags & FYACF_STARTS_WITH_LB); + handle.ends_with_ws = !!(aflags & FYACF_ENDS_WITH_WS); + handle.ends_with_lb = !!(aflags & FYACF_ENDS_WITH_LB); + handle.trailing_lb = !!(aflags & FYACF_TRAILING_LB); + handle.size0 = !!(aflags & FYACF_SIZE0); + handle.valid_anchor = !!(aflags & FYACF_VALID_ANCHOR); + handle.json_mode = false; /* always false */ + handle.lb_mode = fylb_cr_nl; /* always \r\n */ + handle.fws_mode = fyfws_space_tab; /* always space + tab */ + + handle.chomp = FYAC_STRIP; + handle.increment = 0; + handle.fyi = fyi; + handle.tabsize = 0; + + fyt = fy_token_create(FYTT_INPUT_MARKER, &handle); + if (!fyt) + return NULL; + + return fyt; +} + +struct fy_token *fy_node_token(struct fy_node *fyn) +{ + struct fy_atom atom; + struct fy_input *fyi = NULL; + struct fy_token *fyt = NULL; + char *buf = NULL; + + if (!fyn) + return NULL; + + /* if it's non synthetic we can use the node extends */ + if (!fy_node_is_synthetic(fyn)) + return fy_node_non_synthesized_token(fyn); + + /* emit to a string and create the token there */ + buf = fy_emit_node_to_string(fyn, FYECF_MODE_FLOW_ONELINE | FYECF_WIDTH_INF); + if (!buf) + goto err_out; + + fyi = fy_input_from_malloc_data(buf, FY_NT, &atom, true); + if (!fyi) + goto err_out; + + fyt = fy_token_create(FYTT_INPUT_MARKER, &atom); + if (!fyt) + goto err_out; + + /* take away the input reference */ + fy_input_unref(fyi); + + return fyt; + +err_out: + fy_input_unref(fyi); + if (buf) + free(buf); + return NULL; +} + +bool fy_node_uses_single_input_only(struct fy_node *fyn, struct fy_input *fyi) +{ + struct fy_node *fyni; + struct fy_node_pair *fynp; + + if (!fyn || !fyi) + return false; + + switch (fyn->type) { + case FYNT_SCALAR: + return fy_token_get_input(fyn->scalar) == fyi; + + case FYNT_SEQUENCE: + if (fy_token_get_input(fyn->sequence_start) != fyi) + return false; + + for (fyni = fy_node_list_head(&fyn->sequence); fyni; + fyni = fy_node_next(&fyn->sequence, fyni)) { + + if (!fy_node_uses_single_input_only(fyni, fyi)) + return false; + } + + if (fy_token_get_input(fyn->sequence_end) != fyi) + return false; + break; + + case FYNT_MAPPING: + if (fy_token_get_input(fyn->mapping_start) != fyi) + return false; + + for (fynp = fy_node_pair_list_head(&fyn->mapping); fynp; + fynp = fy_node_pair_next(&fyn->mapping, fynp)) { + + if (fynp->key && !fy_node_uses_single_input_only(fynp->key, fyi)) + return false; + + if (fynp->value && !fy_node_uses_single_input_only(fynp->value, fyi)) + return false; + } + + if (fy_token_get_input(fyn->mapping_end) != fyi) + return false; + + break; + } + + return true; +} + +struct fy_input *fy_node_get_first_input(struct fy_node *fyn) +{ + if (!fyn) + return NULL; + + switch (fyn->type) { + case FYNT_SCALAR: + return fy_token_get_input(fyn->scalar); + + case FYNT_SEQUENCE: + return fy_token_get_input(fyn->sequence_start); + + case FYNT_MAPPING: + return fy_token_get_input(fyn->mapping_start); + } + + /* should never happen */ + return NULL; +} + +/* a node is synthetic if any of it's tokens reside in + * different inputs, or any sequence/mapping has been + * created via the manual sequence/mapping creation methods + */ +bool fy_node_is_synthetic(struct fy_node *fyn) +{ + return fyn && fyn->synthetic; +} + +/* map this node and all of it's parents synthetic */ +void fy_node_mark_synthetic(struct fy_node *fyn) +{ + if (!fyn) + return; + fyn->synthetic = true; + while ((fyn = fy_node_get_document_parent(fyn)) != NULL) + fyn->synthetic = true; +} + +struct fy_input *fy_node_get_input(struct fy_node *fyn) +{ + struct fy_input *fyi = NULL; + + fyi = fy_node_get_first_input(fyn); + if (!fyi) + return NULL; + + return fy_node_uses_single_input_only(fyn, fyi) ? fyi : NULL; +} + +int fy_document_register_anchor(struct fy_document *fyd, + struct fy_node *fyn, struct fy_token *anchor) +{ + struct fy_anchor *fya, *fyam; + struct fy_accel_entry *xle; + const char *text; + size_t text_len; + int rc; + + fya = fy_anchor_create(fyd, fyn, anchor); + fyd_error_check(fyd, fya, err_out, + "fy_anchor_create() failed"); + + fy_anchor_list_add_tail(&fyd->anchors, fya); + if (fy_document_is_accelerated(fyd)) { + xle = fy_accel_entry_lookup(fyd->axl, fya->anchor); + if (xle) { + fyam = (void *)xle->value; + /* multiple */ + if (!fyam->multiple) + fyam->multiple = true; + fya->multiple = true; + + text = fy_anchor_get_text(fya, &text_len); + fyd_notice(fyd, "register anchor %.*s is multiple", (int)text_len, text); + } + + xle = fy_accel_entry_insert(fyd->axl, fya->anchor, fya); + fyd_error_check(fyd, xle, err_out, + "fy_accel_entry_insert() fyd->axl failed"); + } + + if (fy_document_is_accelerated(fyd)) { + rc = fy_accel_insert(fyd->naxl, fyn, fya); + fyd_error_check(fyd, !rc, err_out_rc, + "fy_accel_insert() fyd->naxl failed"); + } + + return 0; + +err_out: + rc = -1; +err_out_rc: + fyd->diag->on_error = false; + return rc; +} + +struct fy_node_cmp_arg { + fy_node_scalar_compare_fn cmp_fn; + void *arg; +}; + +static int fy_node_scalar_cmp_default(struct fy_node *fyn_a, + struct fy_node *fyn_b, + void *arg); + +static int fy_node_mapping_sort_cmp_default(const struct fy_node_pair *fynp_a, + const struct fy_node_pair *fynp_b, + void *arg); + +bool fy_node_compare_user(struct fy_node *fyn1, struct fy_node *fyn2, + fy_node_mapping_sort_fn sort_fn, void *sort_fn_arg, + fy_node_scalar_compare_fn cmp_fn, void *cmp_fn_arg) +{ + struct fy_node *fyni1, *fyni2; + struct fy_node_pair *fynp1, *fynp2; + bool ret, null1, null2; + struct fy_node_pair **fynpp1, **fynpp2; + int i, count1, count2; + bool alias1, alias2; + struct fy_node_cmp_arg def_arg; + + if (!cmp_fn) { + cmp_fn = fy_node_scalar_cmp_default; + cmp_fn_arg = NULL; + } + if (!sort_fn) { + sort_fn = fy_node_mapping_sort_cmp_default; + def_arg.cmp_fn = cmp_fn; + def_arg.arg = cmp_fn_arg; + sort_fn_arg = &def_arg; + } else { + def_arg.cmp_fn = NULL; + def_arg.arg = NULL; + } + /* equal pointers? */ + if (fyn1 == fyn2) + return true; + + null1 = !fyn1 || (fyn1->type == FYNT_SCALAR && fy_token_get_text_length(fyn1->scalar) == 0); + null2 = !fyn2 || (fyn2->type == FYNT_SCALAR && fy_token_get_text_length(fyn2->scalar) == 0); + + /* both null */ + if (null1 && null2) + return true; + + /* either is NULL, no match */ + if (null1 || null2) + return false; + + /* types must match */ + if (fyn1->type != fyn2->type) + return false; + + ret = true; + + switch (fyn1->type) { + case FYNT_SEQUENCE: + fyni1 = fy_node_list_head(&fyn1->sequence); + fyni2 = fy_node_list_head(&fyn2->sequence); + while (fyni1 && fyni2) { + + ret = fy_node_compare(fyni1, fyni2); + if (!ret) + break; + + fyni1 = fy_node_next(&fyn1->sequence, fyni1); + fyni2 = fy_node_next(&fyn2->sequence, fyni2); + } + if (ret && fyni1 != fyni2 && (!fyni1 || !fyni2)) + ret = false; + + break; + + case FYNT_MAPPING: + count1 = fy_node_mapping_item_count(fyn1); + count2 = fy_node_mapping_item_count(fyn2); + + /* mapping counts must match */ + if (count1 != count2) { + ret = false; + break; + } + + fynpp1 = FY_ALLOCA(sizeof(*fynpp1) * (count1 + 1)); + fy_node_mapping_fill_array(fyn1, fynpp1, count1); + fy_node_mapping_perform_sort(fyn1, sort_fn, sort_fn_arg, fynpp1, count1); + + fynpp2 = FY_ALLOCA(sizeof(*fynpp2) * (count2 + 1)); + fy_node_mapping_fill_array(fyn2, fynpp2, count2); + fy_node_mapping_perform_sort(fyn2, sort_fn, sort_fn_arg, fynpp2, count2); + + for (i = 0; i < count1; i++) { + fynp1 = fynpp1[i]; + fynp2 = fynpp2[i]; + + ret = fy_node_compare(fynp1->key, fynp2->key); + if (!ret) + break; + + ret = fy_node_compare(fynp1->value, fynp2->value); + if (!ret) + break; + } + if (i >= count1) + ret = true; + + break; + + case FYNT_SCALAR: + alias1 = fy_node_is_alias(fyn1); + alias2 = fy_node_is_alias(fyn2); + + /* either both must be aliases or both not */ + if (alias1 != alias2) + return false; + + ret = !cmp_fn(fyn1, fyn2, cmp_fn_arg); + break; + } + + return ret; +} + +bool fy_node_compare(struct fy_node *fyn1, struct fy_node *fyn2) +{ + return fy_node_compare_user(fyn1, fyn2, NULL, NULL, NULL, NULL); +} + +bool fy_node_compare_string(struct fy_node *fyn, const char *str, size_t len) +{ + struct fy_document *fyd = NULL; + bool ret; + + fyd = fy_document_build_from_string(NULL, str, len); + if (!fyd) + return false; + + ret = fy_node_compare(fyn, fy_document_root(fyd)); + + fy_document_destroy(fyd); + + return ret; +} + +bool fy_node_compare_token(struct fy_node *fyn, struct fy_token *fyt) +{ + /* check if there's NULL */ + if (!fyn || !fyt) + return false; + + /* only valid for scalars */ + if (!fy_node_is_scalar(fyn) || fyt->type != FYTT_SCALAR) + return false; + + return fy_token_cmp(fyn->scalar, fyt) == 0; +} + +bool fy_node_compare_text(struct fy_node *fyn, const char *text, size_t len) +{ + const char *textn; + size_t lenn; + + if (!fyn || !text) + return false; + + textn = fy_node_get_scalar(fyn, &lenn); + if (!textn) + return false; + + if (len == FY_NT) + len = strlen(text); + + if (len != lenn) + return false; + + return memcmp(text, textn, len) == 0; +} + +struct fy_node_pair *fy_node_mapping_lookup_pair(struct fy_node *fyn, struct fy_node *fyn_key) +{ + struct fy_node_pair *fynpi, *fynp; + + /* sanity check */ + if (!fy_node_is_mapping(fyn)) + return NULL; + + fynp = NULL; + + + if (fyn->xl) { + fynp = fy_node_accel_lookup_by_node(fyn, fyn_key); + } else { + for (fynpi = fy_node_pair_list_head(&fyn->mapping); fynpi; + fynpi = fy_node_pair_next(&fyn->mapping, fynpi)) { + if (fy_node_compare(fynpi->key, fyn_key)) { + fynp = fynpi; + break; + } + } + } + + return fynp; +} + +int fy_node_mapping_get_pair_index(struct fy_node *fyn, const struct fy_node_pair *fynp) +{ + struct fy_node_pair *fynpi; + int i; + + for (i = 0, fynpi = fy_node_pair_list_head(&fyn->mapping); fynpi; + fynpi = fy_node_pair_next(&fyn->mapping, fynpi), i++) { + + if (fynpi == fynp) + return i; + } + + return -1; +} + +bool fy_node_mapping_key_is_duplicate(struct fy_node *fyn, struct fy_node *fyn_key) +{ + return fy_node_mapping_lookup_pair(fyn, fyn_key) != NULL; +} + +static int +fy_parse_document_load_node(struct fy_parser *fyp, struct fy_document *fyd, + struct fy_eventp *fyep, struct fy_node **fynp, + int *depthp); + +int fy_parse_document_load_alias(struct fy_parser *fyp, struct fy_document *fyd, struct fy_eventp *fyep, struct fy_node **fynp) +{ + *fynp = NULL; + + fyp_doc_debug(fyp, "in %s", __func__); + + /* TODO verify aliases etc */ + fy_parse_eventp_recycle(fyp, fyep); + return 0; +} + +static int +fy_parse_document_load_scalar(struct fy_parser *fyp, struct fy_document *fyd, + struct fy_eventp *fyep, struct fy_node **fynp, + int *depthp) +{ + struct fy_node *fyn = NULL; + struct fy_event *fye; + int rc; + + if (!fyd) + return -1; + + fyp_error_check(fyp, fyep || !fyp->stream_error, err_out, + "no event to process"); + + FYP_PARSE_ERROR_CHECK(fyp, 0, 0, FYEM_DOC, + fyep, err_out, + "premature end of event stream"); + + fyp_doc_debug(fyp, "in %s [%s]", __func__, fy_event_type_txt[fyep->e.type]); + + *fynp = NULL; + + fye = &fyep->e; + + /* we don't free nodes that often, so no need for recycling */ + fyn = fy_node_alloc(fyd, FYNT_SCALAR); + fyp_error_check(fyp, fyn, err_out, + "fy_node_alloc() failed"); + + if (fye->type == FYET_SCALAR) { + + /* move the tags and value to the node */ + if (fye->scalar.value) + fyn->style = fy_node_style_from_scalar_style(fye->scalar.value->scalar.style); + else + fyn->style = FYNS_PLAIN; + fyn->tag = fye->scalar.tag; + fye->scalar.tag = NULL; + + fyn->scalar = fye->scalar.value; + fye->scalar.value = NULL; + + if (fye->scalar.anchor) { + rc = fy_document_register_anchor(fyd, fyn, fye->scalar.anchor); + fyp_error_check(fyp, !rc, err_out_rc, + "fy_document_register_anchor() failed"); + fye->scalar.anchor = NULL; + } + + } else if (fye->type == FYET_ALIAS) { + fyn->style = FYNS_ALIAS; + fyn->scalar = fye->alias.anchor; + fye->alias.anchor = NULL; + } else + assert(0); + + *fynp = fyn; + fyn = NULL; + + /* everything OK */ + fy_parse_eventp_recycle(fyp, fyep); + return 0; + +err_out: + rc = -1; +err_out_rc: + fy_parse_eventp_recycle(fyp, fyep); + fyd->diag->on_error = false; + return rc; +} + +static int +fy_parse_document_load_sequence(struct fy_parser *fyp, struct fy_document *fyd, + struct fy_eventp *fyep, struct fy_node **fynp, + int *depthp) +{ + struct fy_node *fyn = NULL, *fyn_item = NULL; + struct fy_event *fye = NULL; + struct fy_token *fyt_ss = NULL; + int rc; + + fyp_error_check(fyp, fyep || !fyp->stream_error, err_out, + "no event to process"); + + FYP_PARSE_ERROR_CHECK(fyp, 0, 0, FYEM_DOC, + fyep, err_out, + "premature end of event stream"); + + fyp_doc_debug(fyp, "in %s [%s]", __func__, fy_event_type_txt[fyep->e.type]); + + *fynp = NULL; + + fye = &fyep->e; + + fyt_ss = fye->sequence_start.sequence_start; + + /* we don't free nodes that often, so no need for recycling */ + fyn = fy_node_alloc(fyd, FYNT_SEQUENCE); + fyp_error_check(fyp, fyn, err_out, + "fy_node_alloc() failed"); + + fyn->style = fyt_ss && fyt_ss->type == FYTT_FLOW_SEQUENCE_START ? FYNS_FLOW : FYNS_BLOCK; + + fyn->tag = fye->sequence_start.tag; + fye->sequence_start.tag = NULL; + + if (fye->sequence_start.anchor) { + rc = fy_document_register_anchor(fyd, fyn, fye->sequence_start.anchor); + fyp_error_check(fyp, !rc, err_out_rc, + "fy_document_register_anchor() failed"); + fye->sequence_start.anchor = NULL; + } + + if (fye->sequence_start.sequence_start) { + fyn->sequence_start = fye->sequence_start.sequence_start; + fye->sequence_start.sequence_start = NULL; + } else + fyn->sequence_start = NULL; + + assert(fyn->sequence_start); + + /* done with this */ + fy_parse_eventp_recycle(fyp, fyep); + fyep = NULL; + + while ((fyep = fy_parse_private(fyp)) != NULL) { + fye = &fyep->e; + if (fye->type == FYET_SEQUENCE_END) + break; + + rc = fy_parse_document_load_node(fyp, fyd, fyep, &fyn_item, depthp); + fyep = NULL; + fyp_error_check(fyp, !rc, err_out_rc, + "fy_parse_document_load_node() failed"); + + fy_node_list_add_tail(&fyn->sequence, fyn_item); + fyn_item->attached = true; + fyn_item = NULL; + } + + if (!fyep) + goto err_out; + + if (fye->sequence_end.sequence_end) { + fyn->sequence_end = fye->sequence_end.sequence_end; + fye->sequence_end.sequence_end = NULL; + } else + fyn->sequence_end = NULL; + + assert(fyn->sequence_end); + + *fynp = fyn; + fyn = NULL; + + fy_parse_eventp_recycle(fyp, fyep); + return 0; + + /* fallthrough */ +err_out: + rc = -1; +err_out_rc: + fy_parse_eventp_recycle(fyp, fyep); + fy_node_detach_and_free(fyn_item); + fy_node_detach_and_free(fyn); + return rc; +} + +static int +fy_parse_document_load_mapping(struct fy_parser *fyp, struct fy_document *fyd, + struct fy_eventp *fyep, struct fy_node **fynp, + int *depthp) +{ + struct fy_node *fyn = NULL, *fyn_key = NULL, *fyn_value = NULL; + struct fy_node_pair *fynp_item = NULL; + struct fy_event *fye = NULL; + struct fy_token *fyt_ms = NULL; + bool duplicate; + int rc; + + fyp_error_check(fyp, fyep || !fyp->stream_error, err_out, + "no event to process"); + + FYP_PARSE_ERROR_CHECK(fyp, 0, 0, FYEM_DOC, + fyep, err_out, + "premature end of event stream"); + + fyp_doc_debug(fyp, "in %s [%s]", __func__, fy_event_type_txt[fyep->e.type]); + + *fynp = NULL; + + fye = &fyep->e; + + fyt_ms = fye->mapping_start.mapping_start; + + /* we don't free nodes that often, so no need for recycling */ + fyn = fy_node_alloc(fyd, FYNT_MAPPING); + fyp_error_check(fyp, fyn, err_out, + "fy_node_alloc() failed"); + + fyn->style = fyt_ms && fyt_ms->type == FYTT_FLOW_MAPPING_START ? FYNS_FLOW : FYNS_BLOCK; + + fyn->tag = fye->mapping_start.tag; + fye->mapping_start.tag = NULL; + + if (fye->mapping_start.anchor) { + rc = fy_document_register_anchor(fyd, fyn, fye->mapping_start.anchor); + fyp_error_check(fyp, !rc, err_out_rc, + "fy_document_register_anchor() failed"); + fye->mapping_start.anchor = NULL; + } + + if (fye->mapping_start.mapping_start) { + fyn->mapping_start = fye->mapping_start.mapping_start; + fye->mapping_start.mapping_start = NULL; + } + + assert(fyn->mapping_start); + + /* done with this */ + fy_parse_eventp_recycle(fyp, fyep); + fyep = NULL; + + while ((fyep = fy_parse_private(fyp)) != NULL) { + fye = &fyep->e; + if (fye->type == FYET_MAPPING_END) + break; + + fynp_item = fy_node_pair_alloc(fyd); + fyp_error_check(fyp, fynp_item, err_out, + "fy_node_pair_alloc() failed"); + + fyn_key = NULL; + fyn_value = NULL; + + rc = fy_parse_document_load_node(fyp, fyd, + fyep, &fyn_key, depthp); + fyep = NULL; + + assert(fyn_key); + + fyp_error_check(fyp, !rc, err_out_rc, + "fy_parse_document_load_node() failed"); + + /* if we don't allow duplicate keys */ + if (!(fyd->parse_cfg.flags & FYPCF_ALLOW_DUPLICATE_KEYS)) { + + /* make sure we don't add an already existing key */ + duplicate = fy_node_mapping_key_is_duplicate(fyn, fyn_key); + + FYP_NODE_ERROR_CHECK(fyp, fyn_key, FYEM_DOC, + !duplicate, err_out, + "duplicate key"); + } + + fyep = fy_parse_private(fyp); + + fyp_error_check(fyp, fyep || !fyp->stream_error, err_out, + "fy_parse_private() failed"); + + FYP_PARSE_ERROR_CHECK(fyp, 0, 0, FYEM_DOC, + fyep, err_out, + "missing mapping value"); + + fye = &fyep->e; + + rc = fy_parse_document_load_node(fyp, fyd, + fyep, &fyn_value, depthp); + fyep = NULL; + fyp_error_check(fyp, !rc, err_out_rc, + "fy_parse_document_load_node() failed"); + + assert(fyn_value); + + fynp_item->key = fyn_key; + fynp_item->value = fyn_value; + fy_node_pair_list_add_tail(&fyn->mapping, fynp_item); + if (fyn->xl) { + rc = fy_accel_insert(fyn->xl, fynp_item->key, fynp_item); + fyp_error_check(fyp, !rc, err_out_rc, + "fy_accel_insert() failed"); + } + + if (fynp_item->key) + fynp_item->key->attached = true; + if (fynp_item->value) + fynp_item->value->attached = true; + fynp_item = NULL; + fyn_key = NULL; + fyn_value = NULL; + } + + if (!fyep) + goto err_out; + + if (fye->mapping_end.mapping_end) { + fyn->mapping_end = fye->mapping_end.mapping_end; + fye->mapping_end.mapping_end = NULL; + } + + assert(fyn->mapping_end); + + *fynp = fyn; + fyn = NULL; + + fy_parse_eventp_recycle(fyp, fyep); + + return 0; + +err_out: + rc = -1; +err_out_rc: + fy_parse_eventp_recycle(fyp, fyep); + fy_node_pair_free(fynp_item); + fy_node_detach_and_free(fyn_key); + fy_node_detach_and_free(fyn_value); + fy_node_detach_and_free(fyn); + return rc; +} + +static int +fy_parse_document_load_node(struct fy_parser *fyp, struct fy_document *fyd, + struct fy_eventp *fyep, struct fy_node **fynp, + int *depthp) +{ + struct fy_event *fye; + enum fy_event_type type; + int ret; + + *fynp = NULL; + + fyp_error_check(fyp, fyep || !fyp->stream_error, err_out, + "no event to process"); + + FYP_PARSE_ERROR_CHECK(fyp, 0, 0, FYEM_DOC, + fyep, err_out, + "premature end of event stream"); + + fyp_doc_debug(fyp, "in %s [%s]", __func__, fy_event_type_txt[fyep->e.type]); + + fye = &fyep->e; + + type = fye->type; + + FYP_TOKEN_ERROR_CHECK(fyp, fy_event_get_token(fye), FYEM_DOC, + type == FYET_ALIAS || type == FYET_SCALAR || + type == FYET_SEQUENCE_START || type == FYET_MAPPING_START, err_out, + "bad event"); + + (*depthp)++; + + FYP_TOKEN_ERROR_CHECK(fyp, fy_event_get_token(fye), FYEM_DOC, + ((fyp->cfg.flags & FYPCF_DISABLE_DEPTH_LIMIT) || + *depthp <= fy_depth_limit()), err_out, + "depth limit exceeded"); + + switch (type) { + + case FYET_ALIAS: + case FYET_SCALAR: + ret = fy_parse_document_load_scalar(fyp, fyd, + fyep, fynp, depthp); + break; + + case FYET_SEQUENCE_START: + ret = fy_parse_document_load_sequence(fyp, fyd, + fyep, fynp, depthp); + break; + + case FYET_MAPPING_START: + ret = fy_parse_document_load_mapping(fyp, fyd, + fyep, fynp, depthp); + break; + + default: + ret = 0; + break; + } + + --(*depthp); + return ret; + +err_out: + fy_parse_eventp_recycle(fyp, fyep); + return -1; +} + +int fy_parse_document_load_end(struct fy_parser *fyp, struct fy_document *fyd, struct fy_eventp *fyep) +{ + struct fy_event *fye; + int rc; + + fyp_error_check(fyp, fyep || !fyp->stream_error, err_out, + "no event to process"); + + FYP_PARSE_ERROR_CHECK(fyp, 0, 0, FYEM_DOC, + fyep, err_out, + "premature end of event stream"); + + fyp_doc_debug(fyp, "in %s [%s]", __func__, fy_event_type_txt[fyep->e.type]); + + fye = &fyep->e; + + FYP_TOKEN_ERROR_CHECK(fyp, fy_event_get_token(fye), FYEM_DOC, + fye->type == FYET_DOCUMENT_END, err_out, + "bad event"); + + /* recycle the document end event */ + fy_parse_eventp_recycle(fyp, fyep); + + return 0; +err_out: + rc = -1; + fy_parse_eventp_recycle(fyp, fyep); + return rc; +} + +struct fy_document *fy_parse_load_document_recursive(struct fy_parser *fyp) +{ + struct fy_document *fyd = NULL; + struct fy_eventp *fyep = NULL; + struct fy_event *fye = NULL; + int rc, depth; + bool was_stream_start; + +again: + was_stream_start = false; + do { + /* get next event */ + fyep = fy_parse_private(fyp); + + /* no more */ + if (!fyep) + return NULL; + + was_stream_start = fyep->e.type == FYET_STREAM_START; + + if (was_stream_start) { + fy_parse_eventp_recycle(fyp, fyep); + fyep = NULL; + } + + } while (was_stream_start); + + fye = &fyep->e; + + /* STREAM_END */ + if (fye->type == FYET_STREAM_END) { + fy_parse_eventp_recycle(fyp, fyep); + + /* final STREAM_END? */ + if (fyp->state == FYPS_END) + return NULL; + + /* multi-stream */ + goto again; + } + + FYP_TOKEN_ERROR_CHECK(fyp, fy_event_get_token(fye), FYEM_DOC, + fye->type == FYET_DOCUMENT_START, err_out, + "bad event"); + + fyd = fy_parse_document_create(fyp, fyep); + fyep = NULL; + + fyp_error_check(fyp, fyd, err_out, + "fy_parse_document_create() failed"); + + fyp_doc_debug(fyp, "calling load_node() for root"); + depth = 0; + rc = fy_parse_document_load_node(fyp, fyd, fy_parse_private(fyp), + &fyd->root, &depth); + fyp_error_check(fyp, !rc, err_out, + "fy_parse_document_load_node() failed"); + + rc = fy_parse_document_load_end(fyp, fyd, fy_parse_private(fyp)); + fyp_error_check(fyp, !rc, err_out, + "fy_parse_document_load_node() failed"); + + /* always resolve parents */ + fy_resolve_parent_node(fyd, fyd->root, NULL); + + if (fyp->cfg.flags & FYPCF_RESOLVE_DOCUMENT) { + rc = fy_document_resolve(fyd); + fyp_error_check(fyp, !rc, err_out, + "fy_document_resolve() failed"); + } + + return fyd; + +err_out: + fy_parse_eventp_recycle(fyp, fyep); + fy_parse_document_destroy(fyp, fyd); + return NULL; +} + +struct fy_document *fy_parse_load_document_with_builder(struct fy_parser *fyp) +{ + struct fy_document_builder_cfg cfg; + struct fy_document *fyd; + int rc; + + if (!fyp) + return NULL; + + if (!fyp->fydb) { + memset(&cfg, 0, sizeof(cfg)); + cfg.parse_cfg = fyp->cfg; + cfg.userdata = fyp; + cfg.diag = fy_diag_ref(fyp->diag); + + fyp->fydb = fy_document_builder_create(&cfg); + if (!fyp->fydb) + return NULL; + } + + fyd = fy_document_builder_load_document(fyp->fydb, fyp); + if (!fyd) + return NULL; + + if (fyp->cfg.flags & FYPCF_RESOLVE_DOCUMENT) { + rc = fy_document_resolve(fyd); + if (rc) { + fy_document_destroy(fyd); + fyp->stream_error = true; + return NULL; + } + } + + return fyd; +} + +struct fy_document *fy_parse_load_document(struct fy_parser *fyp) +{ + if (!fyp) + return NULL; + + return !(fyp->cfg.flags & FYPCF_PREFER_RECURSIVE) ? + fy_parse_load_document_with_builder(fyp) : + fy_parse_load_document_recursive(fyp); +} + +struct fy_node *fy_node_copy_internal(struct fy_document *fyd, struct fy_node *fyn_from, + struct fy_node *fyn_parent) +{ + struct fy_document *fyd_from; + struct fy_node *fyn, *fyni, *fynit; + struct fy_node_pair *fynp, *fynpt; + struct fy_anchor *fya, *fya_from; + const char *anchor; + size_t anchor_len; + int rc; + + if (!fyd || !fyn_from || !fyn_from->fyd) + return NULL; + + fyd_from = fyn_from->fyd; + + fyn = fy_node_alloc(fyd, fyn_from->type); + fyd_error_check(fyd, fyn, err_out, + "fy_node_alloc() failed"); + + fyn->tag = fy_token_ref(fyn_from->tag); + fyn->style = fyn_from->style; + fyn->parent = fyn_parent; + + switch (fyn->type) { + case FYNT_SCALAR: + fyn->scalar = fy_token_ref(fyn_from->scalar); + break; + + case FYNT_SEQUENCE: + for (fyni = fy_node_list_head(&fyn_from->sequence); fyni; + fyni = fy_node_next(&fyn_from->sequence, fyni)) { + + fynit = fy_node_copy_internal(fyd, fyni, fyn); + fyd_error_check(fyd, fynit, err_out, + "fy_node_copy_internal() failed"); + + fy_node_list_add_tail(&fyn->sequence, fynit); + fynit->attached = true; + } + + break; + case FYNT_MAPPING: + for (fynp = fy_node_pair_list_head(&fyn_from->mapping); fynp; + fynp = fy_node_pair_next(&fyn_from->mapping, fynp)) { + + fynpt = fy_node_pair_alloc(fyd); + fyd_error_check(fyd, fynpt, err_out, + "fy_node_pair_alloc() failed"); + + fynpt->key = fy_node_copy_internal(fyd, fynp->key, fyn); + fynpt->value = fy_node_copy_internal(fyd, fynp->value, fyn); + fynp->parent = fyn; + + fy_node_pair_list_add_tail(&fyn->mapping, fynpt); + if (fyn->xl) { + rc = fy_accel_insert(fyn->xl, fynpt->key, fynpt); + fyd_error_check(fyd, !rc, err_out, + "fy_accel_insert() failed"); + } + if (fynpt->key) { + fynpt->key->attached = true; + fynpt->key->key_root = true; + } + if (fynpt->value) + fynpt->value->attached = true; + } + break; + } + + /* drop an anchor to the copy */ + for (fya_from = fy_anchor_list_head(&fyd_from->anchors); fya_from; + fya_from = fy_anchor_next(&fyd_from->anchors, fya_from)) { + if (fyn_from == fya_from->fyn) + break; + } + + /* source node has an anchor */ + if (fya_from) { + fya = fy_document_lookup_anchor_by_token(fyd, fya_from->anchor); + if (!fya) { + fyd_doc_debug(fyd, "new anchor"); + /* update the new anchor position */ + rc = fy_document_register_anchor(fyd, fyn, fya_from->anchor); + fyd_error_check(fyd, !rc, err_out, + "fy_document_register_anchor() failed"); + + fy_token_ref(fya_from->anchor); + } else { + anchor = fy_anchor_get_text(fya, &anchor_len); + fyd_error_check(fyd, anchor, err_out, + "fy_anchor_get_text() failed"); + fyd_doc_debug(fyd, "not overwritting anchor %.*s", (int)anchor_len, anchor); + } + } + + return fyn; + +err_out: + return NULL; +} + +struct fy_node *fy_node_copy(struct fy_document *fyd, struct fy_node *fyn_from) +{ + struct fy_node *fyn; + + if (!fyd) + return NULL; + + fyn = fy_node_copy_internal(fyd, fyn_from, NULL); + if (!fyn) { + fyd->diag->on_error = false; + return NULL; + } + + return fyn; +} + +struct fy_document *fy_document_clone(struct fy_document *fydsrc) +{ + struct fy_document *fyd = NULL; + + if (!fydsrc) + return NULL; + + fyd = fy_document_create(&fydsrc->parse_cfg); + if (!fyd) + return NULL; + + /* drop the default document state */ + fy_document_state_unref(fyd->fyds); + /* and use the source document state (and ref it) */ + fyd->fyds = fy_document_state_ref(fydsrc->fyds); + assert(fyd->fyds); + + if (fydsrc->root) { + fyd->root = fy_node_copy(fyd, fydsrc->root); + if (!fyd->root) + goto err_out; + } + + return fyd; +err_out: + fy_document_destroy(fyd); + return NULL; +} + +int fy_node_copy_to_scalar(struct fy_document *fyd, struct fy_node *fyn_to, struct fy_node *fyn_from) +{ + struct fy_node *fyn, *fyni; + struct fy_node_pair *fynp; + + fyn = fy_node_copy(fyd, fyn_from); + if (!fyn) + return -1; + + /* the node is guaranteed to be a scalar */ + fy_token_unref(fyn_to->tag); + fyn_to->tag = NULL; + fy_token_unref(fyn_to->scalar); + fyn_to->scalar = NULL; + + fyn_to->type = fyn->type; + fyn_to->tag = fy_token_ref(fyn->tag); + fyn_to->style = fyn->style; + + switch (fyn->type) { + case FYNT_SCALAR: + fyn_to->scalar = fyn->scalar; + fyn->scalar = NULL; + break; + case FYNT_SEQUENCE: + fy_node_list_init(&fyn_to->sequence); + while ((fyni = fy_node_list_pop(&fyn->sequence)) != NULL) + fy_node_list_add_tail(&fyn_to->sequence, fyni); + break; + case FYNT_MAPPING: + fy_node_pair_list_init(&fyn_to->mapping); + while ((fynp = fy_node_pair_list_pop(&fyn->mapping)) != NULL) { + if (fyn->xl) + fy_accel_remove(fyn->xl, fynp->key); + fy_node_pair_list_add_tail(&fyn_to->mapping, fynp); + if (fyn_to->xl) + fy_accel_insert(fyn_to->xl, fynp->key, fynp); + } + break; + } + + /* and free */ + fy_node_free(fyn); + + return 0; +} + +static int fy_document_node_update_tags(struct fy_document *fyd, struct fy_node *fyn) +{ + struct fy_node *fyni; + struct fy_node_pair *fynp, *fynpi; + struct fy_token *fyt_td; + const char *handle; + size_t handle_size; + int rc; + + if (!fyd || !fyn) + return 0; + + /* replace tag reference with the one that the document contains */ + if (fyn->tag) { + fyd_error_check(fyd, fyn->tag->type == FYTT_TAG, err_out, + "bad node tag"); + + handle = fy_tag_directive_token_handle(fyn->tag->tag.fyt_td, &handle_size); + fyd_error_check(fyd, handle, err_out, + "bad tag directive token"); + + fyt_td = fy_document_state_lookup_tag_directive(fyd->fyds, handle, handle_size); + fyd_error_check(fyd, fyt_td, err_out, + "Missing tag directive with handle=%.*s", (int)handle_size, handle); + + /* need to replace this */ + if (fyt_td != fyn->tag->tag.fyt_td) { + fy_token_unref(fyn->tag->tag.fyt_td); + fyn->tag->tag.fyt_td = fy_token_ref(fyt_td); + + } + } + + + switch (fyn->type) { + case FYNT_SCALAR: + break; + + case FYNT_SEQUENCE: + for (fyni = fy_node_list_head(&fyn->sequence); fyni; + fyni = fy_node_next(&fyn->sequence, fyni)) { + + rc = fy_document_node_update_tags(fyd, fyni); + if (rc) + goto err_out_rc; + } + break; + + case FYNT_MAPPING: + for (fynp = fy_node_pair_list_head(&fyn->mapping); fynp; fynp = fynpi) { + + fynpi = fy_node_pair_next(&fyn->mapping, fynp); + + /* the parent of the key is always NULL */ + rc = fy_document_node_update_tags(fyd, fynp->key); + if (rc) + goto err_out_rc; + + rc = fy_document_node_update_tags(fyd, fynp->value); + if (rc) + goto err_out_rc; + } + break; + } + + return 0; + +err_out: + rc = -1; +err_out_rc: + return rc; +} + +int fy_node_insert(struct fy_node *fyn_to, struct fy_node *fyn_from) +{ + struct fy_document *fyd; + struct fy_node *fyn_parent, *fyn_cpy, *fyni, *fyn_prev; + struct fy_node_pair *fynp, *fynpi, *fynpj; + int rc; + + if (!fyn_to || !fyn_to->fyd) + return -1; + + fyd = fyn_to->fyd; + assert(fyd); + + fyn_parent = fy_node_get_document_parent(fyn_to); + fynp = NULL; + if (fyn_parent) { + fyd_error_check(fyd, fyn_parent->type != FYNT_SCALAR, err_out, + "Illegal scalar parent node type"); + + fyd_error_check(fyd, fyn_from, err_out, + "Illegal NULL source node"); + + if (fyn_parent->type == FYNT_MAPPING) { + /* find mapping pair that contains the `to` node */ + for (fynp = fy_node_pair_list_head(&fyn_parent->mapping); fynp; + fynp = fy_node_pair_next(&fyn_parent->mapping, fynp)) { + if (fynp->value == fyn_to) + break; + } + } + } + + /* verify no funkiness on root */ + assert(fyn_parent || fyn_to == fyd->root); + + /* deleting target */ + if (!fyn_from) { + fyn_to->parent = NULL; + + if (!fyn_parent) { + fyd_doc_debug(fyd, "Deleting root node"); + fy_node_detach_and_free(fyn_to); + fyd->root = NULL; + } else if (fyn_parent->type == FYNT_SEQUENCE) { + fyd_doc_debug(fyd, "Deleting sequence node"); + fy_node_list_del(&fyn_parent->sequence, fyn_to); + fy_node_detach_and_free(fyn_to); + } else { + fyd_doc_debug(fyd, "Deleting mapping node"); + /* should never happen, it's checked right above, but play safe */ + assert(fyn_parent->type == FYNT_MAPPING); + + fyd_error_check(fyd, fynp, err_out, + "Illegal mapping node found"); + + fy_node_pair_list_del(&fyn_parent->mapping, fynp); + if (fyn_parent->xl) + fy_accel_remove(fyn_parent->xl, fynp->key); + /* this will also delete fyn_to */ + fy_node_pair_detach_and_free(fynp); + } + return 0; + } + + /* + * from: scalar + * + * to: another-scalar -> scalar + * to: { key: value } -> scalar + * to: [ seq0, seq1 ] -> scalar + * + * from: [ seq2 ] + * to: scalar -> [ seq2 ] + * to: { key: value } -> [ seq2 ] + * to: [ seq0, seq1 ] -> [ seq0, seq1, sec2 ] + * + * from: { another-key: another-value } + * to: scalar -> { another-key: another-value } + * to: { key: value } -> { key: value, another-key: another-value } + * to: [ seq0, seq1 ] -> { another-key: another-value } + * + * from: { key: another-value } + * to: scalar -> { key: another-value } + * to: { key: value } -> { key: another-value } + * to: [ seq0, seq1 ] -> { key: another-value } + * + */ + + /* if types of `from` and `to` differ (or it's a scalar), it's a replace */ + if (fyn_from->type != fyn_to->type || fyn_from->type == FYNT_SCALAR) { + + fyn_cpy = fy_node_copy(fyd, fyn_from); + fyd_error_check(fyd, fyn_cpy, err_out, + "fy_node_copy() failed"); + + if (!fyn_parent) { + fyd_doc_debug(fyd, "Replacing root node"); + fy_node_detach_and_free(fyd->root); + fyd->root = fyn_cpy; + } else if (fyn_parent->type == FYNT_SEQUENCE) { + fyd_doc_debug(fyd, "Replacing sequence node"); + + /* get previous */ + fyn_prev = fy_node_prev(&fyn_parent->sequence, fyn_to); + + /* delete */ + fy_node_list_del(&fyn_parent->sequence, fyn_to); + fy_node_detach_and_free(fyn_to); + + /* if there's no previous insert to head */ + if (!fyn_prev) + fy_node_list_add(&fyn_parent->sequence, fyn_cpy); + else + fy_node_list_insert_after(&fyn_parent->sequence, fyn_prev, fyn_cpy); + } else { + fyd_doc_debug(fyd, "Replacing mapping node value"); + /* should never happen, it's checked right above, but play safe */ + assert(fyn_parent->type == FYNT_MAPPING); + fyd_error_check(fyd, fynp, err_out, + "Illegal mapping node found"); + + fy_node_detach_and_free(fynp->value); + fynp->value = fyn_cpy; + } + + return 0; + } + + /* types match, if it's a sequence append */ + if (fyn_to->type == FYNT_SEQUENCE) { + + fyd_doc_debug(fyd, "Appending to sequence node"); + + for (fyni = fy_node_list_head(&fyn_from->sequence); fyni; + fyni = fy_node_next(&fyn_from->sequence, fyni)) { + + fyn_cpy = fy_node_copy(fyd, fyni); + fyd_error_check(fyd, fyn_cpy, err_out, + "fy_node_copy() failed"); + + fy_node_list_add_tail(&fyn_to->sequence, fyn_cpy); + fyn_cpy->attached = true; + } + } else { + /* only mapping is possible here */ + + /* iterate over all the keys in the `from` */ + for (fynpi = fy_node_pair_list_head(&fyn_from->mapping); fynpi; + fynpi = fy_node_pair_next(&fyn_from->mapping, fynpi)) { + + if (fyn_to->xl) { + fynpj = fy_node_accel_lookup_by_node(fyn_to, fynpi->key); + } else { + /* find whether the key already exists */ + for (fynpj = fy_node_pair_list_head(&fyn_to->mapping); fynpj; + fynpj = fy_node_pair_next(&fyn_to->mapping, fynpj)) { + + if (fy_node_compare(fynpi->key, fynpj->key)) + break; + } + } + + if (!fynpj) { + fyd_doc_debug(fyd, "Appending to mapping node"); + + /* not found? append it */ + fynpj = fy_node_pair_alloc(fyd); + fyd_error_check(fyd, fynpj, err_out, + "fy_node_pair_alloc() failed"); + + fynpj->key = fy_node_copy(fyd, fynpi->key); + fyd_error_check(fyd, !fynpi->key || fynpj->key, err_out, + "fy_node_copy() failed"); + fynpj->value = fy_node_copy(fyd, fynpi->value); + fyd_error_check(fyd, !fynpi->value || fynpj->value, err_out, + "fy_node_copy() failed"); + + fy_node_pair_list_add_tail(&fyn_to->mapping, fynpj); + if (fyn_to->xl) + fy_accel_insert(fyn_to->xl, fynpj->key, fynpj); + + if (fynpj->key) + fynpj->key->attached = true; + if (fynpj->value) + fynpj->value->attached = true; + + } else { + fyd_doc_debug(fyd, "Updating mapping node value (deep merge)"); + + rc = fy_node_insert(fynpj->value, fynpi->value); + fyd_error_check(fyd, !rc, err_out_rc, + "fy_node_insert() failed"); + } + } + } + + /* adjust parents */ + switch (fyn_to->type) { + case FYNT_SCALAR: + break; + + case FYNT_SEQUENCE: + for (fyni = fy_node_list_head(&fyn_to->sequence); fyni; + fyni = fy_node_next(&fyn_to->sequence, fyni)) { + + fyni->parent = fyn_to; + } + break; + + case FYNT_MAPPING: + for (fynp = fy_node_pair_list_head(&fyn_to->mapping); fynp; fynp = fynpi) { + + fynpi = fy_node_pair_next(&fyn_to->mapping, fynp); + + if (fynp->key) { + fynp->key->parent = fyn_to; + fynp->key->key_root = true; + } + if (fynp->value) + fynp->value->parent = fyn_to; + fynp->parent = fyn_to; + } + break; + } + + /* if the documents differ, merge their states */ + if (fyn_to->fyd != fyn_from->fyd) { + rc = fy_document_state_merge(fyn_to->fyd->fyds, fyn_from->fyd->fyds); + fyd_error_check(fyd, !rc, err_out_rc, + "fy_document_state_merge() failed"); + + rc = fy_document_node_update_tags(fyd, fy_document_root(fyd)); + fyd_error_check(fyd, !rc, err_out_rc, + "fy_document_node_update_tags() failed"); + } + + return 0; + +err_out: + rc = -1; +err_out_rc: + return rc; +} + +int fy_document_insert_at(struct fy_document *fyd, + const char *path, size_t pathlen, + struct fy_node *fyn) +{ + int rc; + struct fy_node *fyn2; + + fyn2 = fy_node_by_path(fy_document_root(fyd), path, pathlen, FYNWF_DONT_FOLLOW); + rc = fy_node_insert(fyn2, fyn); + + fy_node_free(fyn); + + return rc; +} + +struct fy_token *fy_document_tag_directive_iterate(struct fy_document *fyd, void **prevp) +{ + struct fy_token_list *fytl; + + if (!fyd || !fyd->fyds || !prevp) + return NULL; + + fytl = &fyd->fyds->fyt_td; + + return *prevp = *prevp ? fy_token_next(fytl, *prevp) : fy_token_list_head(fytl); +} + +struct fy_token *fy_document_tag_directive_lookup(struct fy_document *fyd, const char *handle) +{ + struct fy_token *fyt; + void *iter; + const char *h; + size_t h_size, len; + + if (!fyd || !handle) + return NULL; + len = strlen(handle); + + iter = NULL; + while ((fyt = fy_document_tag_directive_iterate(fyd, &iter)) != NULL) { + h = fy_tag_directive_token_handle(fyt, &h_size); + if (!h) + continue; + if (h_size == len && !memcmp(h, handle, len)) + return fyt; + } + return NULL; +} + +int fy_document_tag_directive_add(struct fy_document *fyd, const char *handle, const char *prefix) +{ + struct fy_token *fyt; + + if (!fyd || !fyd->fyds || !handle || !prefix) + return -1; + + /* it must not exist */ + fyt = fy_document_tag_directive_lookup(fyd, handle); + if (fyt) + return -1; + + return fy_document_state_append_tag(fyd->fyds, handle, prefix, false); +} + +int fy_document_tag_directive_remove(struct fy_document *fyd, const char *handle) +{ + struct fy_token *fyt; + + if (!fyd || !fyd->fyds || !handle) + return -1; + + /* it must not exist */ + fyt = fy_document_tag_directive_lookup(fyd, handle); + if (!fyt || fyt->refs != 1) + return -1; + + fy_token_list_del(&fyd->fyds->fyt_td, fyt); + fy_token_unref(fyt); + + return 0; +} + +static int fy_resolve_alias(struct fy_document *fyd, struct fy_node *fyn) +{ + struct fy_node *fyn_copy = NULL; + int rc; + + fyn_copy = fy_node_resolve_alias(fyn); + FYD_NODE_ERROR_CHECK(fyd, fyn, FYEM_DOC, + fyn_copy, err_out, + "invalid alias"); + + rc = fy_node_copy_to_scalar(fyd, fyn, fyn_copy); + fyd_error_check(fyd, !rc, err_out, + "fy_node_copy_to_scalar() failed"); + + return 0; + +err_out: + fyd->diag->on_error = false; + return -1; +} + +static struct fy_node * +fy_node_follow_alias(struct fy_node *fyn, enum fy_node_walk_flags flags) +{ + enum fy_node_walk_flags ptr_flags; + struct fy_anchor *fya; + const char *anchor_text, *s, *e, *p, *path; + size_t anchor_len, path_len; + struct fy_node *fyn_path_root; + unsigned int marker; + + if (!fyn || !fy_node_is_alias(fyn)) + return NULL; + + ptr_flags = flags & FYNWF_PTR(FYNWF_PTR_MASK); + if (ptr_flags == FYNWF_PTR_YPATH) + return fy_node_alias_resolve_by_ypath(fyn); + + /* try regular label target */ + fya = fy_document_lookup_anchor_by_token(fyn->fyd, fyn->scalar); + if (fya) + return fya->fyn; + + anchor_text = fy_token_get_text(fyn->scalar, &anchor_len); + if (!anchor_text) + return NULL; + + s = anchor_text; + e = s + anchor_len; + + fyn_path_root = NULL; + + if (ptr_flags == FYNWF_PTR_YAML && (p = memchr(s, '/', e - s)) != NULL) { + /* fyd_notice(fyn->fyd, "%s: alias contains a path component %.*s", + __func__, (int)(e - p - 1), p + 1); */ + + if (p > s) { + + fya = fy_document_lookup_anchor(fyn->fyd, s, p - s); + if (!fya) { + /* fyd_notice(fyn->fyd, "%s: unable to resolve alias %.*s @%s", + __func__, (int)(p - s), s, fy_node_get_path(fya->fyn)); */ + return NULL; + } + + /* fyd_notice(fyn->fyd, "%s: alias base %.*s @%s", + __func__, (int)(p - s), s, fy_node_get_path(fya->fyn)); */ + path = ++p; + path_len = e - p; + + fyn_path_root = fya->fyn; + + } else { + /* fyd_notice(fyn->fyd, "%s: absolute %.*s @%s", + __func__, (int)(p - s), s, fy_node_get_path(fya->fyn)); */ + path = s; + path_len = e - s; + + fyn_path_root = fyn->fyd->root; + } + } + + if (!fyn_path_root) + return NULL; + + marker = fy_node_walk_marker_from_flags(flags); + if (marker >= FYNWF_MAX_USER_MARKER) + return NULL; + + /* use the next marker */ + flags &= ~FYNWF_MARKER(FYNWF_MARKER_MASK); + flags |= FYNWF_MARKER(marker + 1); + + return fy_node_by_path_internal(fyn_path_root, path, path_len, flags); +} + +static bool fy_node_pair_is_merge_key(struct fy_node_pair *fynp) +{ + struct fy_node *fyn = fynp->key; + + return fyn && fyn->type == FYNT_SCALAR && fyn->style == FYNS_PLAIN && + fy_plain_atom_streq(fy_token_atom(fyn->scalar), "<<"); +} + +static struct fy_node *fy_alias_get_merge_mapping(struct fy_document *fyd, struct fy_node *fyn) +{ + struct fy_anchor *fya; + + /* must be an alias */ + if (!fy_node_is_alias(fyn)) + return NULL; + + /* anchor must exist */ + fya = fy_document_lookup_anchor_by_token(fyd, fyn->scalar); + if (!fya) + return NULL; + + /* and it must be a mapping */ + if (fya->fyn->type != FYNT_MAPPING) + return NULL; + + return fya->fyn; +} + +static bool fy_node_pair_is_valid_merge_key(struct fy_document *fyd, struct fy_node_pair *fynp) +{ + struct fy_node *fyn, *fyni, *fynm; + + fyn = fynp->value; + + /* value must exist */ + if (!fyn) + return false; + + /* scalar alias */ + fynm = fy_alias_get_merge_mapping(fyd, fyn); + if (fynm) + return true; + + /* it must be a sequence then */ + if (fyn->type != FYNT_SEQUENCE) + return false; + + /* the sequence must only contain valid aliases for mapping */ + for (fyni = fy_node_list_head(&fyn->sequence); fyni; + fyni = fy_node_next(&fyn->sequence, fyni)) { + + /* sequence of aliases only! */ + fynm = fy_alias_get_merge_mapping(fyd, fyni); + if (!fynm) + return false; + + } + + return true; +} + +static int fy_resolve_merge_key_populate(struct fy_document *fyd, struct fy_node *fyn, + struct fy_node_pair *fynp, struct fy_node *fynm) +{ + struct fy_node_pair *fynpi, *fynpn; + + if (!fyd) + return -1; + + fyd_error_check(fyd, + fyn && fynp && fynm && fyn->type == FYNT_MAPPING && fynm->type == FYNT_MAPPING, + err_out, "bad inputs to %s", __func__); + + for (fynpi = fy_node_pair_list_head(&fynm->mapping); fynpi; + fynpi = fy_node_pair_next(&fynm->mapping, fynpi)) { + + /* if we don't allow duplicate keys */ + if (!(fyd->parse_cfg.flags & FYPCF_ALLOW_DUPLICATE_KEYS)) { + + /* make sure we don't override an already existing key */ + if (fy_node_mapping_key_is_duplicate(fyn, fynpi->key)) + continue; + } + + fynpn = fy_node_pair_alloc(fyd); + fyd_error_check(fyd, fynpn, err_out, + "fy_node_pair_alloc() failed"); + + fynpn->key = fy_node_copy(fyd, fynpi->key); + fynpn->value = fy_node_copy(fyd, fynpi->value); + + fy_node_pair_list_insert_after(&fyn->mapping, fynp, fynpn); + if (fyn->xl) + fy_accel_insert(fyn->xl, fynpn->key, fynpn); + } + + return 0; + +err_out: + return -1; +} + +static int fy_resolve_merge_key(struct fy_document *fyd, struct fy_node *fyn, struct fy_node_pair *fynp) +{ + struct fy_node *fynv, *fyni, *fynm; + int rc; + + /* it must be a valid merge key value */ + FYD_NODE_ERROR_CHECK(fyd, fynp->value, FYEM_DOC, + fy_node_pair_is_valid_merge_key(fyd, fynp), err_out, + "invalid merge key value"); + + fynv = fynp->value; + fynm = fy_alias_get_merge_mapping(fyd, fynv); + if (fynm) { + rc = fy_resolve_merge_key_populate(fyd, fyn, fynp, fynm); + fyd_error_check(fyd, !rc, err_out_rc, + "fy_resolve_merge_key_populate() failed"); + + return 0; + } + + /* it must be a sequence then */ + fyd_error_check(fyd, fynv->type == FYNT_SEQUENCE, err_out, + "invalid node type to use for merge key"); + + /* the sequence must only contain valid aliases for mapping */ + for (fyni = fy_node_list_head(&fynv->sequence); fyni; + fyni = fy_node_next(&fynv->sequence, fyni)) { + + fynm = fy_alias_get_merge_mapping(fyd, fyni); + fyd_error_check(fyd, fynm, err_out, + "invalid merge key sequence item (not an alias)"); + + rc = fy_resolve_merge_key_populate(fyd, fyn, fynp, fynm); + fyd_error_check(fyd, !rc, err_out_rc, + "fy_resolve_merge_key_populate() failed"); + } + + return 0; + +err_out: + rc = -1; +err_out_rc: + return rc; +} + +/* the anchors are scalars that have the FYNS_ALIAS style */ +static int fy_resolve_anchor_node(struct fy_document *fyd, struct fy_node *fyn) +{ + struct fy_node *fyni; + struct fy_node_pair *fynp, *fynpi, *fynpit; + int rc, ret_rc = 0; + struct fy_token *fyt; + + if (!fyn) + return 0; + + if (fy_node_is_alias(fyn)) + return fy_resolve_alias(fyd, fyn); + + if (fyn->type == FYNT_SEQUENCE) { + + for (fyni = fy_node_list_head(&fyn->sequence); fyni; + fyni = fy_node_next(&fyn->sequence, fyni)) { + + rc = fy_resolve_anchor_node(fyd, fyni); + if (rc && !ret_rc) + ret_rc = rc; + } + + } else if (fyn->type == FYNT_MAPPING) { + + for (fynp = fy_node_pair_list_head(&fyn->mapping); fynp; fynp = fynpi) { + + fynpi = fy_node_pair_next(&fyn->mapping, fynp); + + if (fy_node_pair_is_merge_key(fynp)) { + rc = fy_resolve_merge_key(fyd, fyn, fynp); + if (rc && !ret_rc) + ret_rc = rc; + + /* remove this node pair */ + if (!rc) { + fy_node_pair_list_del(&fyn->mapping, fynp); + if (fyn->xl) + fy_accel_remove(fyn->xl, fynp->key); + fy_node_pair_detach_and_free(fynp); + } + + } else { + + rc = fy_resolve_anchor_node(fyd, fynp->key); + + if (!rc) { + + /* check whether the keys are duplicate */ + for (fynpit = fy_node_pair_list_head(&fyn->mapping); fynpit; + fynpit = fy_node_pair_next(&fyn->mapping, fynpit)) { + + /* skip this node pair */ + if (fynpit == fynp) + continue; + + if (!fy_node_compare(fynpit->key, fynp->key)) + continue; + + /* whoops, duplicate key after resolution */ + fyt = NULL; + switch (fyn->type) { + case FYNT_SCALAR: + fyt = fyn->scalar; + break; + case FYNT_SEQUENCE: + fyt = fyn->sequence_start; + break; + case FYNT_MAPPING: + fyt = fyn->mapping_start; + break; + } + + FYD_TOKEN_ERROR_CHECK(fyd, fyt, FYEM_DOC, + false, err_out, + "duplicate key after resolving"); + + } + + } + + if (rc && !ret_rc) + ret_rc = rc; + + rc = fy_resolve_anchor_node(fyd, fynp->value); + if (rc && !ret_rc) + ret_rc = rc; + + } + } + } + + return ret_rc; + +err_out: + return -1; +} + +static void fy_resolve_parent_node(struct fy_document *fyd, struct fy_node *fyn, struct fy_node *fyn_parent) +{ + struct fy_node *fyni; + struct fy_node_pair *fynp, *fynpi; + + if (!fyn) + return; + + fyn->parent = fyn_parent; + + switch (fyn->type) { + case FYNT_SCALAR: + break; + + case FYNT_SEQUENCE: + for (fyni = fy_node_list_head(&fyn->sequence); fyni; + fyni = fy_node_next(&fyn->sequence, fyni)) { + + fy_resolve_parent_node(fyd, fyni, fyn); + } + break; + + case FYNT_MAPPING: + for (fynp = fy_node_pair_list_head(&fyn->mapping); fynp; fynp = fynpi) { + + fynpi = fy_node_pair_next(&fyn->mapping, fynp); + + fy_resolve_parent_node(fyd, fynp->key, fyn); + fy_resolve_parent_node(fyd, fynp->value, fyn); + fynp->parent = fyn; + } + break; + } +} + +typedef void (*fy_node_applyf)(struct fy_node *fyn); + +void fy_node_apply(struct fy_node *fyn, fy_node_applyf func) +{ + struct fy_node *fyni; + struct fy_node_pair *fynp; + + if (!fyn || !func) + return; + + (*func)(fyn); + + switch (fyn->type) { + case FYNT_SCALAR: + break; + + case FYNT_SEQUENCE: + for (fyni = fy_node_list_head(&fyn->sequence); fyni; + fyni = fy_node_next(&fyn->sequence, fyni)) + fy_node_apply(fyni, func); + break; + + case FYNT_MAPPING: + for (fynp = fy_node_pair_list_head(&fyn->mapping); fynp; + fynp = fy_node_pair_next(&fyn->mapping, fynp)) { + + fy_node_apply(fynp->key, func); + fy_node_apply(fynp->value, func); + } + break; + } +} + +static void clear_system_marks(struct fy_node *fyn) +{ + fyn->marks &= ~FYNWF_SYSTEM_MARKS; +} + +/* clear all the system markers */ +void fy_node_clear_system_marks(struct fy_node *fyn) +{ + fy_node_apply(fyn, clear_system_marks); +} + +int fy_document_resolve(struct fy_document *fyd) +{ + int rc; + bool ret; + + if (!fyd) + return 0; + + fy_node_clear_system_marks(fyd->root); + + /* for resolution to work, no reference loops should exist */ + ret = fy_check_ref_loop(fyd, fyd->root, + FYNWF_MAXDEPTH_DEFAULT | FYNWF_FOLLOW, NULL); + + fy_node_clear_system_marks(fyd->root); + + if (ret) + goto err_out; + + + /* now resolve any anchor nodes */ + rc = fy_resolve_anchor_node(fyd, fyd->root); + if (rc) + goto err_out_rc; + + /* redo parent resolution */ + fy_resolve_parent_node(fyd, fyd->root, NULL); + + return 0; + +err_out: + rc = -1; +err_out_rc: + fyd->diag->on_error = false; + return rc; +} + +void fy_document_free_nodes(struct fy_document *fyd) +{ + struct fy_document *fyd_child; + + for (fyd_child = fy_document_list_first(&fyd->children); fyd_child; + fyd_child = fy_document_next(&fyd->children, fyd_child)) + fy_document_free_nodes(fyd_child); + + fy_node_detach_and_free(fyd->root); + fyd->root = NULL; +} + +void fy_document_destroy(struct fy_document *fyd) +{ + struct fy_document *fyd_child; + + /* both the document and the parser object must exist */ + if (!fyd) + return; + + /* we have to free the nodes first */ + fy_document_free_nodes(fyd); + + /* recursively delete children */ + while ((fyd_child = fy_document_list_pop(&fyd->children)) != NULL) { + fyd_child->parent = NULL; + fy_document_destroy(fyd_child); + } + + fy_parse_document_destroy(NULL, fyd); +} + +int fy_document_set_parent(struct fy_document *fyd, struct fy_document *fyd_child) +{ + + if (!fyd || !fyd_child || fyd_child->parent) + return -1; + + fyd_child->parent = fyd; + fy_document_list_add_tail(&fyd->children, fyd_child); + + return 0; +} + +static const struct fy_parse_cfg doc_parse_default_cfg = { + .flags = FYPCF_DEFAULT_DOC, +}; + +struct fy_document *fy_document_create(const struct fy_parse_cfg *cfg) +{ + struct fy_document *fyd = NULL; + struct fy_diag *diag; + int rc; + + if (!cfg) + cfg = &doc_parse_default_cfg; + + fyd = malloc(sizeof(*fyd)); + if (!fyd) + goto err_out; + + memset(fyd, 0, sizeof(*fyd)); + fyd->parse_cfg = *cfg; + + diag = cfg->diag; + if (!diag) { + diag = fy_diag_create(NULL); + if (!diag) + goto err_out; + } else + fy_diag_ref(diag); + + fyd->diag = diag; + + fy_anchor_list_init(&fyd->anchors); + if (fy_document_is_accelerated(fyd)) { + fyd->axl = malloc(sizeof(*fyd->axl)); + fyd_error_check(fyd, fyd->axl, err_out, + "malloc() failed"); + + /* start with a very small bucket list */ + rc = fy_accel_setup(fyd->axl, &hd_anchor, fyd, 8); + fyd_error_check(fyd, !rc, err_out, + "fy_accel_setup() failed"); + + fyd->naxl = malloc(sizeof(*fyd->naxl)); + fyd_error_check(fyd, fyd->axl, err_out, + "malloc() failed"); + + /* start with a very small bucket list */ + rc = fy_accel_setup(fyd->naxl, &hd_nanchor, fyd, 8); + fyd_error_check(fyd, !rc, err_out, + "fy_accel_setup() failed"); + } + fyd->root = NULL; + + /* we don't do document create version setting, + * perhaps we should in the future + */ + fyd->fyds = fy_document_state_default(NULL, NULL); + fyd_error_check(fyd, fyd->fyds, err_out, + "fy_document_state_default() failed"); + + /* turn on JSON mode if it's forced */ + fyd->fyds->json_mode = (cfg->flags & + (FYPCF_JSON_MASK << FYPCF_JSON_SHIFT)) == FYPCF_JSON_FORCE; + + fy_document_list_init(&fyd->children); + + return fyd; + +err_out: + fy_parse_document_destroy(NULL, fyd); + return NULL; +} + +struct fy_document_build_string_ctx { + const char *str; + size_t len; +}; + +static int parser_setup_from_string(struct fy_parser *fyp, void *user) +{ + struct fy_document_build_string_ctx *ctx = user; + + return fy_parser_set_string(fyp, ctx->str, ctx->len); +} + +struct fy_document_build_malloc_string_ctx { + char *str; + size_t len; +}; + +static int parser_setup_from_malloc_string(struct fy_parser *fyp, void *user) +{ + struct fy_document_build_malloc_string_ctx *ctx = user; + + return fy_parser_set_malloc_string(fyp, ctx->str, ctx->len); +} + +struct fy_document_build_file_ctx { + const char *file; +}; + +static int parser_setup_from_file(struct fy_parser *fyp, void *user) +{ + struct fy_document_build_file_ctx *ctx = user; + + return fy_parser_set_input_file(fyp, ctx->file); +} + +struct fy_document_build_fp_ctx { + const char *name; + FILE *fp; +}; + +static int parser_setup_from_fp(struct fy_parser *fyp, void *user) +{ + struct fy_document_build_fp_ctx *ctx = user; + + return fy_parser_set_input_fp(fyp, ctx->name, ctx->fp); +} + +struct fy_document_vbuildf_ctx { + const char *fmt; + va_list ap; +}; + +static int parser_setup_from_fmt_ap(struct fy_parser *fyp, void *user) +{ + struct fy_document_vbuildf_ctx *vctx = user; + va_list ap, ap_orig; + int size, sizew; + char *buf; + + /* first try without allocating */ + va_copy(ap_orig, vctx->ap); + size = vsnprintf(NULL, 0, vctx->fmt, ap_orig); + va_end(ap_orig); + + fyp_error_check(fyp, size >= 0, err_out, + "vsnprintf() failed"); + + buf = malloc(size + 1); + fyp_error_check(fyp, buf, err_out, + "malloc() failed"); + + va_copy(ap, vctx->ap); + sizew = vsnprintf(buf, size + 1, vctx->fmt, ap); + fyp_error_check(fyp, sizew == size, err_out, + "vsnprintf() failed"); + va_end(ap); + + buf[size] = '\0'; + + return fy_parser_set_malloc_string(fyp, buf, size); + +err_out: + return -1; +} + +static struct fy_document *fy_document_build_internal(const struct fy_parse_cfg *cfg, + int (*parser_setup)(struct fy_parser *fyp, void *user), + void *user) +{ + struct fy_parser fyp_data, *fyp = &fyp_data; + struct fy_document *fyd = NULL; + struct fy_eventp *fyep; + bool got_stream_end; + int rc; + + if (!parser_setup) + return NULL; + + if (!cfg) + cfg = &doc_parse_default_cfg; + + rc = fy_parse_setup(fyp, cfg); + if (rc) + return NULL; + + rc = (*parser_setup)(fyp, user); + fyp_error_check(fyp, !rc, err_out, + "parser_setup() failed"); + + fyd = fy_parse_load_document(fyp); + + /* we're going to handle stream errors from now */ + if (!fyd) + fyp->stream_error = false; + + /* if we collect diagnostics, we can continue */ + fyp_error_check(fyp, fyd || (fyp->cfg.flags & FYPCF_COLLECT_DIAG), err_out, + "fy_parse_load_document() failed"); + + /* no document, but we're collecting diagnostics */ + if (!fyd) { + + fyp_error(fyp, "fy_parse_load_document() failed"); + + fyp->stream_error = false; + fyd = fy_parse_document_create(fyp, NULL); + fyp_error_check(fyp, fyd, err_out, + "fy_parse_document_create() failed"); + fyd->parse_error = true; + + /* XXX */ + goto out; + } + + got_stream_end = false; + while (!got_stream_end && (fyep = fy_parse_private(fyp)) != NULL) { + if (fyep->e.type == FYET_STREAM_END) + got_stream_end = true; + fy_parse_eventp_recycle(fyp, fyep); + } + + if (got_stream_end) { + fyep = fy_parse_private(fyp); + fyp_error_check(fyp, !fyep, err_out, + "more events after stream end"); + fy_parse_eventp_recycle(fyp, fyep); + } + +out: + fy_parse_cleanup(fyp); + return fyd; + +err_out: + fy_document_destroy(fyd); + fy_parse_cleanup(fyp); + return NULL; +} + +struct fy_document *fy_document_build_from_string(const struct fy_parse_cfg *cfg, + const char *str, size_t len) +{ + struct fy_document_build_string_ctx ctx = { + .str = str, + .len = len, + }; + + return fy_document_build_internal(cfg, parser_setup_from_string, &ctx); +} + +struct fy_document *fy_document_build_from_malloc_string(const struct fy_parse_cfg *cfg, + char *str, size_t len) +{ + struct fy_document_build_malloc_string_ctx ctx = { + .str = str, + .len = len, + }; + + return fy_document_build_internal(cfg, parser_setup_from_malloc_string, &ctx); +} + +struct fy_document *fy_document_build_from_file(const struct fy_parse_cfg *cfg, + const char *file) +{ + struct fy_document_build_file_ctx ctx = { + .file = file, + }; + + return fy_document_build_internal(cfg, parser_setup_from_file, &ctx); +} + +struct fy_document *fy_document_build_from_fp(const struct fy_parse_cfg *cfg, + FILE *fp) +{ + struct fy_document_build_fp_ctx ctx = { + .name = NULL, + .fp = fp, + }; + + return fy_document_build_internal(cfg, parser_setup_from_fp, &ctx); +} + +enum fy_node_type fy_node_get_type(struct fy_node *fyn) +{ + /* a NULL is a plain scalar node */ + return fyn ? fyn->type : FYNT_SCALAR; +} + +enum fy_node_style fy_node_get_style(struct fy_node *fyn) +{ + /* a NULL is a plain scalar node */ + return fyn ? fyn->style : FYNS_PLAIN; +} + +bool fy_node_is_null(struct fy_node *fyn) +{ + if (!fyn) + return true; + + if (fyn->type != FYNT_SCALAR) + return false; + + return fyn->scalar == NULL; +} + +bool fy_node_is_attached(struct fy_node *fyn) +{ + return fyn ? fyn->attached : false; +} + +struct fy_node *fy_node_get_parent(struct fy_node *fyn) +{ + return fyn && !fyn->key_root ? fyn->parent : NULL; +} + +struct fy_node *fy_node_get_document_parent(struct fy_node *fyn) +{ + return fyn ? fyn->parent : NULL; +} + +struct fy_token *fy_node_get_tag_token(struct fy_node *fyn) +{ + return fyn ? fyn->tag : NULL; +} + +struct fy_token *fy_node_get_scalar_token(struct fy_node *fyn) +{ + return fyn && fyn->type == FYNT_SCALAR ? fyn->scalar : NULL; +} + +struct fy_node *fy_node_pair_key(struct fy_node_pair *fynp) +{ + return fynp ? fynp->key : NULL; +} + +struct fy_node *fy_node_pair_value(struct fy_node_pair *fynp) +{ + return fynp ? fynp->value : NULL; +} + +int fy_node_pair_set_key(struct fy_node_pair *fynp, struct fy_node *fyn) +{ + struct fy_node *fyn_map; + struct fy_node_pair *fynpi; + + if (!fynp) + return -1; + + /* the node must not be attached */ + if (fyn && fyn->attached) + return -1; + + /* sanity check and duplication check */ + fyn_map = fynp->parent; + if (fyn_map) { + + /* (in)sanity check */ + if (!fy_node_is_mapping(fyn_map)) + return -1; + + if (fyn_map->xl) { + /* either we're already on the parent list (and it's OK) */ + /* or we're not and we have a duplicate key */ + fynpi = fy_node_accel_lookup_by_node(fyn_map, fyn); + if (fynpi && fynpi != fynp) + return -1; + /* remove that key */ + fy_accel_remove(fyn_map->xl, fynp->key); + } else { + /* check whether the key is a duplicate + * skipping ourselves since our key gets replaced + */ + for (fynpi = fy_node_pair_list_head(&fyn_map->mapping); fynpi; + fynpi = fy_node_pair_next(&fyn_map->mapping, fynpi)) { + + if (fynpi != fynp && fy_node_compare(fynpi->key, fyn)) + return -1; + } + } + + fy_node_mark_synthetic(fyn_map); + } + + fy_node_detach_and_free(fynp->key); + fynp->key = fyn; + + if (fyn_map && fyn_map->xl) + fy_accel_insert(fyn_map->xl, fynp->key, fynp); + + fyn->attached = true; + + return 0; +} + +int fy_node_pair_set_value(struct fy_node_pair *fynp, struct fy_node *fyn) +{ + if (!fynp) + return -1; + /* the node must not be attached */ + if (fyn && fyn->attached) + return -1; + fy_node_detach_and_free(fynp->value); + fynp->value = fyn; + fyn->attached = true; + + if (fynp->parent) + fy_node_mark_synthetic(fynp->parent); + + return 0; +} + +struct fy_node *fy_document_root(struct fy_document *fyd) +{ + return fyd->root; +} + +const char *fy_node_get_tag(struct fy_node *fyn, size_t *lenp) +{ + size_t tmplen; + + if (!lenp) + lenp = &tmplen; + + if (!fyn || !fyn->tag) { + *lenp = 0; + return NULL; + } + + return fy_token_get_text(fyn->tag, lenp); +} + +const char *fy_node_get_scalar(struct fy_node *fyn, size_t *lenp) +{ + size_t tmplen; + + if (!lenp) + lenp = &tmplen; + + if (!fyn || fyn->type != FYNT_SCALAR) { + *lenp = 0; + return NULL; + } + + return fy_token_get_text(fyn->scalar, lenp); +} + +const char *fy_node_get_scalar0(struct fy_node *fyn) +{ + if (!fyn || fyn->type != FYNT_SCALAR) + return NULL; + + return fy_token_get_text0(fyn->scalar); +} + +size_t fy_node_get_scalar_length(struct fy_node *fyn) +{ + + if (!fyn || fyn->type != FYNT_SCALAR) + return 0; + + return fy_token_get_text_length(fyn->scalar); +} + +size_t fy_node_get_scalar_utf8_length(struct fy_node *fyn) +{ + + if (!fyn || fyn->type != FYNT_SCALAR) + return 0; + + return fy_token_format_utf8_length(fyn->scalar); +} + +struct fy_node *fy_node_sequence_iterate(struct fy_node *fyn, void **prevp) +{ + if (!fyn || fyn->type != FYNT_SEQUENCE || !prevp) + return NULL; + + return *prevp = *prevp ? fy_node_next(&fyn->sequence, *prevp) : fy_node_list_head(&fyn->sequence); +} + +struct fy_node *fy_node_sequence_reverse_iterate(struct fy_node *fyn, void **prevp) +{ + if (!fyn || fyn->type != FYNT_SEQUENCE || !prevp) + return NULL; + + return *prevp = *prevp ? fy_node_prev(&fyn->sequence, *prevp) : fy_node_list_tail(&fyn->sequence); +} + +int fy_node_sequence_item_count(struct fy_node *fyn) +{ + struct fy_node *fyni; + int count; + + if (!fyn || fyn->type != FYNT_SEQUENCE) + return -1; + + count = 0; + for (fyni = fy_node_list_head(&fyn->sequence); fyni; fyni = fy_node_next(&fyn->sequence, fyni)) + count++; + return count; +} + +bool fy_node_sequence_is_empty(struct fy_node *fyn) +{ + return !fyn || fyn->type != FYNT_SEQUENCE || fy_node_list_empty(&fyn->sequence); +} + +struct fy_node *fy_node_sequence_get_by_index(struct fy_node *fyn, int index) +{ + struct fy_node *fyni; + void *iterp = NULL; + + if (!fyn || fyn->type != FYNT_SEQUENCE) + return NULL; + + if (index >= 0) { + do { + fyni = fy_node_sequence_iterate(fyn, &iterp); + } while (fyni && --index >= 0); + } else { + do { + fyni = fy_node_sequence_reverse_iterate(fyn, &iterp); + } while (fyni && ++index < 0); + } + + return fyni; +} + +struct fy_node_pair *fy_node_mapping_iterate(struct fy_node *fyn, void **prevp) +{ + if (!fyn || fyn->type != FYNT_MAPPING || !prevp) + return NULL; + + return *prevp = *prevp ? fy_node_pair_next(&fyn->mapping, *prevp) : fy_node_pair_list_head(&fyn->mapping); +} + +struct fy_node_pair *fy_node_mapping_reverse_iterate(struct fy_node *fyn, void **prevp) +{ + if (!fyn || fyn->type != FYNT_MAPPING || !prevp) + return NULL; + + return *prevp = *prevp ? fy_node_pair_prev(&fyn->mapping, *prevp) : fy_node_pair_list_tail(&fyn->mapping); +} + +struct fy_node *fy_node_collection_iterate(struct fy_node *fyn, void **prevp) +{ + struct fy_node_pair *fynp; + + if (!fyn || !prevp) + return NULL; + + switch (fyn->type) { + case FYNT_SEQUENCE: + return fy_node_sequence_iterate(fyn, prevp); + + case FYNT_MAPPING: + fynp = fy_node_mapping_iterate(fyn, prevp); + if (!fynp) + return NULL; + return fynp->value; + + case FYNT_SCALAR: + fyn = !*prevp ? fyn : NULL; + *prevp = fyn; + return fyn; + } + + return NULL; +} + + +int fy_node_mapping_item_count(struct fy_node *fyn) +{ + struct fy_node_pair *fynpi; + int count; + + if (!fyn || fyn->type != FYNT_MAPPING) + return -1; + + count = 0; + for (fynpi = fy_node_pair_list_head(&fyn->mapping); fynpi; fynpi = fy_node_pair_next(&fyn->mapping, fynpi)) + count++; + return count; +} + +bool fy_node_mapping_is_empty(struct fy_node *fyn) +{ + return !fyn || fyn->type != FYNT_MAPPING || fy_node_pair_list_empty(&fyn->mapping); +} + +struct fy_node_pair *fy_node_mapping_get_by_index(struct fy_node *fyn, int index) +{ + struct fy_node_pair *fynpi; + void *iterp = NULL; + + if (!fyn || fyn->type != FYNT_MAPPING) + return NULL; + + if (index >= 0) { + do { + fynpi = fy_node_mapping_iterate(fyn, &iterp); + } while (fynpi && --index >= 0); + } else { + do { + fynpi = fy_node_mapping_reverse_iterate(fyn, &iterp); + } while (fynpi && ++index < 0); + } + + return fynpi; +} + +struct fy_node_pair * +fy_node_mapping_lookup_pair_by_simple_key(struct fy_node *fyn, + const char *key, size_t len) +{ + struct fy_node_pair *fynpi; + struct fy_node *fyn_scalar; + + if (!fyn || fyn->type != FYNT_MAPPING || !key) + return NULL; + + if (len == (size_t)-1) + len = strlen(key); + + if (fyn->xl) { + fyn_scalar = fy_node_create_scalar(fyn->fyd, key, len); + if (!fyn_scalar) + return NULL; + + fynpi = fy_node_accel_lookup_by_node(fyn, fyn_scalar); + + fy_node_free(fyn_scalar); + + if (fynpi) + return fynpi; + } else { + for (fynpi = fy_node_pair_list_head(&fyn->mapping); fynpi; + fynpi = fy_node_pair_next(&fyn->mapping, fynpi)) { + + if (!fy_node_is_scalar(fynpi->key) || fy_node_is_alias(fynpi->key)) + continue; + + if (!fynpi->key && len == 0) + return fynpi; + + if (fynpi->key && !fy_token_memcmp(fynpi->key->scalar, key, len)) + return fynpi; + } + } + + return NULL; +} + +struct fy_node * +fy_node_mapping_lookup_value_by_simple_key(struct fy_node *fyn, + const char *key, size_t len) +{ + struct fy_node_pair *fynp; + + fynp = fy_node_mapping_lookup_pair_by_simple_key(fyn, key, len); + return fynp ? fy_node_pair_value(fynp) : NULL; +} + +struct fy_node_pair * +fy_node_mapping_lookup_pair_by_null_key(struct fy_node *fyn) +{ + struct fy_node_pair *fynpi; + + if (!fyn || fyn->type != FYNT_MAPPING) + return NULL; + + /* no acceleration for NULLs */ + for (fynpi = fy_node_pair_list_head(&fyn->mapping); fynpi; + fynpi = fy_node_pair_next(&fyn->mapping, fynpi)) { + + if (fy_node_is_null(fynpi->key)) + return fynpi; + } + + return NULL; +} + +struct fy_node * +fy_node_mapping_lookup_value_by_null_key(struct fy_node *fyn) +{ + struct fy_node_pair *fynp; + + fynp = fy_node_mapping_lookup_pair_by_null_key(fyn); + return fynp ? fy_node_pair_value(fynp) : NULL; +} + +const char * +fy_node_mapping_lookup_scalar_by_simple_key(struct fy_node *fyn, size_t *lenp, + const char *key, size_t keylen) +{ + struct fy_node *fyn_value; + + fyn_value = fy_node_mapping_lookup_value_by_simple_key(fyn, key, keylen); + if (!fyn_value || !fy_node_is_scalar(fyn_value)) + return NULL; + return fy_node_get_scalar(fyn_value, lenp); +} + +const char * +fy_node_mapping_lookup_scalar0_by_simple_key(struct fy_node *fyn, + const char *key, size_t keylen) +{ + struct fy_node *fyn_value; + + fyn_value = fy_node_mapping_lookup_value_by_simple_key(fyn, key, keylen); + if (!fyn_value || !fy_node_is_scalar(fyn_value)) + return NULL; + return fy_node_get_scalar0(fyn_value); +} + +struct fy_node *fy_node_mapping_lookup_value_by_key(struct fy_node *fyn, struct fy_node *fyn_key) +{ + struct fy_node_pair *fynp; + + fynp = fy_node_mapping_lookup_pair(fyn, fyn_key); + return fynp ? fynp->value : NULL; +} + +struct fy_node *fy_node_mapping_lookup_key_by_key(struct fy_node *fyn, struct fy_node *fyn_key) +{ + struct fy_node_pair *fynp; + + fynp = fy_node_mapping_lookup_pair(fyn, fyn_key); + return fynp ? fynp->key : NULL; +} + +struct fy_node_pair * +fy_node_mapping_lookup_pair_by_string(struct fy_node *fyn, const char *key, size_t len) +{ + struct fy_document *fyd; + struct fy_node_pair *fynp; + + /* try quick and dirty simple scan */ + if (is_simple_key(key, len)) + return fy_node_mapping_lookup_pair_by_simple_key(fyn, key, len); + + fyd = fy_document_build_from_string(NULL, key, len); + if (!fyd) + return NULL; + + fynp = fy_node_mapping_lookup_pair(fyn, fy_document_root(fyd)); + + fy_document_destroy(fyd); + + return fynp; +} + +struct fy_node * +fy_node_mapping_lookup_by_string(struct fy_node *fyn, + const char *key, size_t len) +{ + struct fy_node_pair *fynp; + + fynp = fy_node_mapping_lookup_pair_by_string(fyn, key, len); + return fynp ? fynp->value : NULL; +} + +struct fy_node * +fy_node_mapping_lookup_value_by_string(struct fy_node *fyn, + const char *key, size_t len) +{ + return fy_node_mapping_lookup_by_string(fyn, key, len); +} + +struct fy_node * +fy_node_mapping_lookup_key_by_string(struct fy_node *fyn, + const char *key, size_t len) +{ + struct fy_node_pair *fynp; + + fynp = fy_node_mapping_lookup_pair_by_string(fyn, key, len); + return fynp ? fynp->key : NULL; +} + +bool fy_node_is_empty(struct fy_node *fyn) +{ + struct fy_node *fyni; + struct fy_node_pair *fynp; + struct fy_atom *atom; + + /* skip if no value node or token */ + if (!fyn) + return true; + + switch (fyn->type) { + case FYNT_SCALAR: + atom = fy_token_atom(fyn->scalar); + if (atom && !atom->size0 && !atom->empty) + return false; + break; + case FYNT_SEQUENCE: + for (fyni = fy_node_list_head(&fyn->sequence); fyni; + fyni = fy_node_next(&fyn->sequence, fyni)) { + if (!fy_node_is_empty(fyni)) + return false; + } + break; + case FYNT_MAPPING: + for (fynp = fy_node_pair_list_head(&fyn->mapping); fynp; + fynp = fy_node_pair_next(&fyn->mapping, fynp)) { + if (!fy_node_is_empty(fynp->value)) + return false; + } + break; + } + return true; +} + +#define fy_node_walk_ctx_create_a(_max_depth, _mark, _res) \ + do { \ + unsigned int __max_depth = (_max_depth); \ + struct fy_node_walk_ctx *_ctx; \ + \ + _ctx = FY_ALLOCA(sizeof(*_ctx) + sizeof(struct fy_node *) * __max_depth); \ + _ctx->max_depth = _max_depth; \ + _ctx->next_slot = 0; \ + _ctx->mark = (_mark); \ + *(_res) = _ctx; \ + } while(false) + +static inline void fy_node_walk_mark_start(struct fy_node_walk_ctx *ctx) +{ + ctx->next_slot = 0; +} + +static inline void fy_node_walk_mark_end(struct fy_node_walk_ctx *ctx) +{ + struct fy_node *fyn; + + while (ctx->next_slot > 0) { + fyn = ctx->marked[--ctx->next_slot]; + fyn->marks &= ~ctx->mark; + } +} + +/* fyn is guaranteed to be non NULL and an alias */ +static inline bool fy_node_walk_mark(struct fy_node_walk_ctx *ctx, struct fy_node *fyn) +{ + struct fy_document *fyd = fyn->fyd; + struct fy_token *fyt = NULL; + + switch (fyn->type) { + case FYNT_SCALAR: + fyt = fyn->scalar; + break; + case FYNT_SEQUENCE: + fyt = fyn->sequence_start; + break; + case FYNT_MAPPING: + fyt = fyn->mapping_start; + break; + } + + /* depth error */ + FYD_TOKEN_ERROR_CHECK(fyd, fyt, FYEM_DOC, + ctx->next_slot < ctx->max_depth, err_out, + "max recursion depth exceeded (%u)", ctx->max_depth); + + /* mark found, loop */ + FYD_TOKEN_ERROR_CHECK(fyd, fyt, FYEM_DOC, + !(fyn->marks & ctx->mark), err_out, + "cyclic reference detected"); + + fyn->marks |= ctx->mark; + ctx->marked[ctx->next_slot++] = fyn; + + return true; + +err_out: + return false; +} + +static struct fy_node * +fy_node_follow_aliases(struct fy_node *fyn, enum fy_node_walk_flags flags, bool single) +{ + enum fy_node_walk_flags ptr_flags; + struct fy_ptr_node_list nl; + struct fy_ptr_node *fypn; + + if (!fyn || !fy_node_is_alias(fyn) || !(flags & FYNWF_FOLLOW)) + return fyn; + + ptr_flags = flags & FYNWF_PTR(FYNWF_PTR_MASK); + if (ptr_flags != FYNWF_PTR_YAML && ptr_flags != FYNWF_PTR_YPATH) + return fyn; + + fy_ptr_node_list_init(&nl); + + while (fyn && fy_node_is_alias(fyn)) { + + // fprintf(stderr, "%s: %s\n", __func__, fy_node_get_path_FY_ALLOCA(fyn)); + + /* check for loops */ + if (fy_ptr_node_list_contains(&nl, fyn)) { + fyn = NULL; + break; + } + + /* out of memory? */ + fypn = fy_ptr_node_create(fyn); + if (!fypn) { + fyn = NULL; + break; + } + fy_ptr_node_list_add_tail(&nl, fypn); + + fyn = fy_node_follow_alias(fyn, flags); + + if (single) + break; + } + + /* release */ + while ((fypn = fy_ptr_node_list_pop(&nl)) != NULL) + fy_ptr_node_destroy(fypn); + + return fyn; +} + +struct fy_node *fy_node_resolve_alias(struct fy_node *fyn) +{ + enum fy_node_walk_flags flags; + + if (!fyn) + return NULL; + + flags = FYNWF_FOLLOW | FYNWF_MAXDEPTH_DEFAULT | FYNWF_MARKER_DEFAULT; + if (fyn->fyd->parse_cfg.flags & FYPCF_YPATH_ALIASES) + flags |= FYNWF_PTR_YPATH; + else + flags |= FYNWF_PTR_YAML; + return fy_node_follow_aliases(fyn, flags, false); +} + +struct fy_node *fy_node_dereference(struct fy_node *fyn) +{ + enum fy_node_walk_flags flags; + + if (!fyn || !fy_node_is_alias(fyn)) + return NULL; + + flags = FYNWF_FOLLOW | FYNWF_MAXDEPTH_DEFAULT | FYNWF_MARKER_DEFAULT; + if (fyn->fyd->parse_cfg.flags & FYPCF_YPATH_ALIASES) + flags |= FYNWF_PTR_YPATH; + else + flags |= FYNWF_PTR_YAML; + return fy_node_follow_aliases(fyn, flags, true); +} + +static struct fy_node * +fy_node_by_path_internal(struct fy_node *fyn, + const char *path, size_t pathlen, + enum fy_node_walk_flags flags) +{ + enum fy_node_walk_flags ptr_flags; + struct fy_node *fynt, *fyni; + const char *s, *e, *ss, *ee; + char *end_idx, *json_key, *t, *p, *uri_path; + char c; + int idx, rlen; + size_t len, json_key_len, uri_path_len; + bool has_json_key_esc; + uint8_t code[4]; + int code_length; + bool trailing_slash; + + if (!fyn || !path) + return NULL; + + ptr_flags = flags & FYNWF_PTR(FYNWF_PTR_MASK); + if (ptr_flags == FYNWF_PTR_YPATH) + return fy_node_by_ypath(fyn, path, pathlen); + + s = path; + if (pathlen == (size_t)-1) + pathlen = strlen(path); + e = s + pathlen; + + /* a trailing slash works just like unix and symbolic links + * if it does not exist no symbolic link lookups are performed + * at the end of the operation. + * if it exists they are followed upon resolution + */ + trailing_slash = pathlen > 0 && path[pathlen - 1] == '/'; + + /* and continue on path lookup with the rest */ + + /* skip all prefixed / */ + switch (ptr_flags) { + default: + case FYNWF_PTR_YAML: + while (s < e && *s == '/') + s++; + /* for a last component / always match this one */ + if (s >= e) + goto out; + break; + + case FYNWF_PTR_JSON: + /* "" -> everything here */ + if (s == e) + return fyn; + /* it must have a separator here */ + if (*s != '/') + return NULL; + s++; + break; + + case FYNWF_PTR_RELJSON: + break; + } + + /* fyd_notice(fyn->fyd, "%s:%d following alias @%s \"%.*s\"", + __func__, __LINE__, fy_node_get_path(fyn), (int)(e - s), s); */ + fyn = fy_node_follow_aliases(fyn, flags, true); + + /* scalar can be only last element in the path (it has no key) */ + if (fy_node_is_scalar(fyn)) { + if (*s) + fyn = NULL; /* not end of the path - fail */ + goto out; + } + + /* for a sequence the only allowed key is [n] where n is the index to follow */ + if (fy_node_is_sequence(fyn)) { + + c = -1; + switch (ptr_flags) { + default: + case FYNWF_PTR_YAML: + while (s < e && isspace(*s)) + s++; + + c = *s; + if (c == '[') + s++; + else if (!isdigit(c) && c != '-') + return NULL; + + idx = (int)strtol(s, &end_idx, 10); + + /* no digits found at all */ + if (idx == 0 && end_idx == s) + return NULL; + + s = end_idx; + + while (s < e && isspace(*s)) + s++; + + if (c == '[' && *s++ != ']') + return NULL; + + while (s < e && isspace(*s)) + s++; + + break; + + case FYNWF_PTR_JSON: + case FYNWF_PTR_RELJSON: + + /* special array end - always fails */ + if (*s == '-') + return NULL; + + idx = (int)strtol(s, &end_idx, 10); + + /* no digits found at all */ + if (idx == 0 && end_idx == s) + return NULL; + + /* no negatives */ + if (idx < 0) + return NULL; + + s = end_idx; + + if (s < e && *s == '/') + s++; + + break; + } + + len = e - s; + + fyn = fy_node_sequence_get_by_index(fyn, idx); + if (trailing_slash) + fyn = fy_node_follow_aliases(fyn, flags, false); + fyn = fy_node_by_path_internal(fyn, s, len, flags); + goto out; + } + + /* be a little bit paranoid */ + assert(fy_node_is_mapping(fyn)); + + path = s; + pathlen = (size_t)(e - s); + + switch (ptr_flags) { + default: + case FYNWF_PTR_YAML: + + /* scan ahead for the end of the path component + * note that we don't do UTF8 here, because all the + * escapes are regular ascii characters, i.e. + * '/', '*', '&', '.', '{', '}', '[', ']' and '\\' + */ + + while (s < e) { + c = *s; + /* end of path component? */ + if (c == '/') + break; + s++; + + if (c == '\\') { + /* it must be a valid escape */ + if (s >= e || !strchr("/*&.{}[]\\", *s)) + return NULL; + s++; + } else if (c == '"') { + while (s < e && *s != '"') { + c = *s++; + if (c == '\\' && (s < e && *s == '"')) + s++; + } + /* not a normal double quote end */ + if (s >= e || *s != '"') + return NULL; + s++; + } else if (c == '\'') { + while (s < e && *s != '\'') { + c = *s++; + if (c == '\'' && (s < e && *s == '\'')) + s++; + } + /* not a normal single quote end */ + if (s >= e || *s != '\'') + return NULL; + s++; + } + } + len = s - path; + + fynt = fyn; + fyn = fy_node_mapping_lookup_by_string(fyn, path, len); + + /* failed! last ditch attempt, is there a merge key? */ + if (!fyn && fynt && (flags & FYNWF_FOLLOW) && ptr_flags == FYNWF_PTR_YAML) { + fyn = fy_node_mapping_lookup_by_string(fynt, "<<", 2); + if (!fyn) + goto out; + + if (fy_node_is_alias(fyn)) { + + /* single alias '<<: *foo' */ + fyn = fy_node_mapping_lookup_by_string( + fy_node_follow_aliases(fyn, flags, false), path, len); + + } else if (fy_node_is_sequence(fyn)) { + + /* multi aliases '<<: [ *foo, *bar ]' */ + fynt = fyn; + for (fyni = fy_node_list_head(&fynt->sequence); fyni; + fyni = fy_node_next(&fynt->sequence, fyni)) { + if (!fy_node_is_alias(fyni)) + continue; + fyn = fy_node_mapping_lookup_by_string( + fy_node_follow_aliases(fyni, flags, false), + path, len); + if (fyn) + break; + } + } else + fyn = NULL; + } + break; + + case FYNWF_PTR_JSON: + case FYNWF_PTR_RELJSON: + + has_json_key_esc = false; + while (s < e) { + c = *s; + /* end of path component? */ + if (c == '/') + break; + s++; + if (c == '~') + has_json_key_esc = true; + } + len = s - path; + + if (has_json_key_esc) { + /* note that the escapes reduce the length, so allocating the + * same size is guaranteed safe */ + json_key = FY_ALLOCA(len + 1); + + ss = path; + ee = s; + t = json_key; + while (ss < ee) { + if (*ss != '~') { + *t++ = *ss++; + continue; + } + /* unterminated ~ escape, or neither ~0, ~1 */ + if (ss + 1 >= ee || (ss[1] < '0' && ss[1] > '1')) + return NULL; + *t++ = ss[1] == '0' ? '~' : '/'; + ss += 2; + } + json_key_len = t - json_key; + + path = json_key; + len = json_key_len; + } + + /* URI encoded escaped */ + if ((flags & FYNWF_URI_ENCODED) && memchr(path, '%', len)) { + /* escapes shrink, so safe to allocate as much */ + uri_path = FY_ALLOCA(len + 1); + + ss = path; + ee = path + len; + t = uri_path; + while (ss < ee) { + /* copy run until '%' or end */ + p = memchr(ss, '%', ee - ss); + rlen = (p ? p : ee) - ss; + memcpy(t, ss, rlen); + ss += rlen; + t += rlen; + + /* if end, break */ + if (!p) + break; + + /* collect a utf8 character sequence */ + code_length = sizeof(code); + ss = fy_uri_esc(ss, ee - ss, code, &code_length); + if (!ss) { + /* bad % escape sequence */ + return NULL; + } + memcpy(t, code, code_length); + t += code_length; + } + uri_path_len = t - uri_path; + + path = uri_path; + len = uri_path_len; + } + + fynt = fyn; + fyn = fy_node_mapping_lookup_value_by_simple_key(fyn, path, len); + break; + } + + len = e - s; + + if (len > 0 && trailing_slash) { + /* fyd_notice(fyn->fyd, "%s:%d following alias @%s \"%.*s\"", + __func__, __LINE__, fy_node_get_path(fyn), (int)(e - s), s); */ + fyn = fy_node_follow_aliases(fyn, flags, true); + } + + fyn = fy_node_by_path_internal(fyn, s, len, flags); + +out: + len = e - s; + + if (len > 0 && trailing_slash) { + /* fyd_notice(fyn->fyd, "%s:%d following alias @%s \"%.*s\"", + __func__, __LINE__, fy_node_get_path(fyn), (int)(e - s), s); */ + fyn = fy_node_follow_aliases(fyn, flags, true); + } + + return fyn; +} + +struct fy_node *fy_node_by_path(struct fy_node *fyn, + const char *path, size_t len, + enum fy_node_walk_flags flags) +{ + struct fy_document *fyd; + struct fy_anchor *fya; + const char *s, *e, *t, *anchor; + size_t alen; + char c; + int idx, w; + char *end_idx; + + if (!fyn || !path) + return NULL; + + if (len == (size_t)-1) + len = strlen(path); + + /* verify that the path string is well formed UTF8 */ + s = path; + e = s + len; + + fyd = fyn->fyd; + while (s < e) { + c = fy_utf8_get(s, e - s, &w); + if (c < 0) { + fyd_error(fyd, "fy_node_by_path() malformed path string\n"); + return NULL; + } + s += w; + } + + /* rewind */ + s = path; + + /* if it's a YPATH, just punt to that method */ + if ((flags & FYNWF_PTR(FYNWF_PTR_MASK)) == FYNWF_PTR_YPATH) + return fy_node_by_ypath(fyn, path, len); + + /* fyd_notice(fyn->fyd, "%s: %.*s", __func__, (int)(len), s); */ + + /* first path component may be an alias */ + if ((flags & FYNWF_FOLLOW) && fyn && path) { + while (s < e && isspace(*s)) + s++; + + if (s >= e || *s != '*') + goto regular_path_lookup; + + s++; + + c = -1; + for (t = s; t < e; t++) { + c = *t; + /* it ends on anything non alias */ + if (c == '[' || c == ']' || + c == '{' || c == '}' || + c == ',' || c == ' ' || c == '\t' || + c == '/') + break; + } + + /* bad alias form for path */ + if (c == '[' || c == ']' || c == '{' || c == '}' || c == ',') + return NULL; + + anchor = s; + alen = t - s; + + if (alen) { + /* we must be terminated by '/' or space followed by '/' */ + /* strip until spaces and '/' end */ + while (t < e && (*t == ' ' || *t == '\t')) + t++; + + while (t < e && *t == '/') + t++; + + /* update path */ + path = t; + len = e - t; + + /* fyd_notice(fyn->fyd, "%s: looking up anchor=%.*s", __func__, (int)(alen), anchor); */ + + /* lookup anchor */ + fya = fy_document_lookup_anchor(fyn->fyd, anchor, alen); + if (!fya) { + /* fyd_notice(fyn->fyd, "%s: failed to lookup anchor=%.*s", __func__, (int)(alen), anchor); */ + return NULL; + } + + /* fyd_notice(fyn->fyd, "%s: found anchor=%.*s at %s", + __func__, (int)(alen), anchor, fy_node_get_path(fya->fyn)); */ + + /* nothing more? we're done */ + if (*path == '\0') + return fya->fyn; + + /* anchor found... all good */ + + fyn = fya->fyn; + } else { + /* no anchor it must be of the form *\/ */ + + path = s; + len = e - s; + } + + /* fyd_notice(fyn->fyd, "%s: continuing looking for %.*s", + __func__, (int)(len), path); */ + + } + +regular_path_lookup: + + /* if it's a relative json pointer... */ + if ((flags & FYNWF_PTR(FYNWF_PTR_MASK)) == FYNWF_PTR_RELJSON) { + + /* it must at least be one digit */ + if (len == 0) + return NULL; + + idx = (int)strtol(path, &end_idx, 10); + + /* at least one digit must exist */ + if (idx == 0 && path == end_idx) + return NULL; + + e = path + len; + len = e - end_idx; + path = end_idx; + + /* we don't do the trailing # here */ + if (len == 1 && *path == '#') + return NULL; + + while (idx-- > 0) + fyn = fy_node_get_parent(fyn); + + /* convert to regular json pointer from now on */ + flags &= ~FYNWF_PTR(FYNWF_PTR_MASK); + flags |= FYNWF_PTR_JSON; + } + + return fy_node_by_path_internal(fyn, path, len, flags); +} + +static char * +fy_node_get_reference_internal(struct fy_node *fyn_base, struct fy_node *fyn, bool near) +{ + struct fy_anchor *fya; + const char *path; + char *path2, *path3; + const char *text; + size_t len; + + if (!fyn) + return NULL; + + path2 = NULL; + + /* if the node has an anchor use it (ie return *foo) */ + if (!fyn_base && (fya = fy_node_get_anchor(fyn)) != NULL) { + text = fy_anchor_get_text(fya, &len); + if (!text) + return NULL; + path2 = FY_ALLOCA(1 + len + 1); + path2[0] = '*'; + memcpy(path2 + 1, text, len); + path2[len + 1] = '\0'; + + } else { + + fya = fyn_base ? fy_node_get_anchor(fyn_base) : NULL; + if (!fya && near) + fya = fy_node_get_nearest_anchor(fyn); + if (!fya) { + /* no anchor, direct reference (ie return *\/foo\/bar */ + fy_node_get_path_alloca(fyn, &path); + if (!*path) + return NULL; + path2 = FY_ALLOCA(1 + strlen(path) + 1); + path2[0] = '*'; + strcpy(path2 + 1, path); + } else { + text = fy_anchor_get_text(fya, &len); + if (!text) + return NULL; + if (fy_anchor_node(fya) != fyn) { + fy_node_get_path_relative_to_alloca(fy_anchor_node(fya), fyn, &path); + if (*path) { + /* we have a relative path */ + path2 = FY_ALLOCA(1 + len + 1 + strlen(path) + 1); + path2[0] = '*'; + memcpy(path2 + 1, text, len); + path2[len + 1] = '/'; + memcpy(1 + path2 + len + 1, path, strlen(path) + 1); + } else { + /* absolute path */ + fy_node_get_path_alloca(fyn, &path); + if (!*path) + return NULL; + path2 = FY_ALLOCA(1 + strlen(path) + 1); + path2[0] = '*'; + strcpy(path2 + 1, path); + } + } else { + path2 = FY_ALLOCA(1 + len + 1); + path2[0] = '*'; + memcpy(path2 + 1, text, len); + path2[len + 1] = '\0'; + } + } + } + + if (!path2) + return NULL; + + path3 = strdup(path2); + if (!path3) + return NULL; + + return path3; +} + +char *fy_node_get_reference(struct fy_node *fyn) +{ + return fy_node_get_reference_internal(NULL, fyn, false); +} + +struct fy_node *fy_node_create_reference(struct fy_node *fyn) +{ + struct fy_node *fyn_ref; + char *path, *alias; + + path = fy_node_get_reference(fyn); + if (!path) + return NULL; + + alias = path; + if (*alias == '*') + alias++; + + fyn_ref = fy_node_create_alias_copy(fy_node_document(fyn), alias, FY_NT); + + free(path); + + return fyn_ref; +} + +char *fy_node_get_relative_reference(struct fy_node *fyn_base, struct fy_node *fyn) +{ + return fy_node_get_reference_internal(fyn_base, fyn, false); +} + +struct fy_node *fy_node_create_relative_reference(struct fy_node *fyn_base, struct fy_node *fyn) +{ + struct fy_node *fyn_ref; + char *path, *alias; + + path = fy_node_get_relative_reference(fyn_base, fyn); + if (!path) + return NULL; + + alias = path; + if (*alias == '*') + alias++; + + fyn_ref = fy_node_create_alias_copy(fy_node_document(fyn), alias, FY_NT); + + free(path); + + return fyn_ref; +} + +bool fy_check_ref_loop(struct fy_document *fyd, struct fy_node *fyn, + enum fy_node_walk_flags flags, + struct fy_node_walk_ctx *ctx) +{ + struct fy_node *fyni; + struct fy_node_pair *fynp, *fynpi; + struct fy_node_walk_ctx *ctxn; + bool ret; + + if (!fyn) + return false; + + /* visited? no need to check */ + if (fyn->marks & FY_BIT(FYNWF_VISIT_MARKER)) + return false; + + /* marked node, it's a loop */ + if (ctx && !fy_node_walk_mark(ctx, fyn)) + return true; + + ret = false; + + switch (fyn->type) { + case FYNT_SCALAR: + + /* if it's not an alias, we're done */ + if (!fy_node_is_alias(fyn)) + break; + + ctxn = ctx; + if (!ctxn) + fy_node_walk_ctx_create_a( + fy_node_walk_max_depth_from_flags(flags), FYNWF_REF_MARKER, &ctxn); + + + if (!ctx) { + fy_node_walk_mark_start(ctxn); + + /* mark this node */ + fy_node_walk_mark(ctxn, fyn); + } + + fyni = fy_node_follow_alias(fyn, flags); + + ret = fy_check_ref_loop(fyd, fyni, flags, ctxn); + + if (!ctx) + fy_node_walk_mark_end(ctxn); + + if (ret) + break; + + break; + + case FYNT_SEQUENCE: + for (fyni = fy_node_list_head(&fyn->sequence); fyni; + fyni = fy_node_next(&fyn->sequence, fyni)) { + + ret = fy_check_ref_loop(fyd, fyni, flags, ctx); + if (ret) + break; + } + break; + + case FYNT_MAPPING: + for (fynp = fy_node_pair_list_head(&fyn->mapping); fynp; fynp = fynpi) { + + fynpi = fy_node_pair_next(&fyn->mapping, fynp); + + ret = fy_check_ref_loop(fyd, fynp->key, flags, ctx); + if (ret) + break; + + ret = fy_check_ref_loop(fyd, fynp->value, flags, ctx); + if (ret) + break; + } + break; + } + + /* mark as visited */ + fyn->marks |= FY_BIT(FYNWF_VISIT_MARKER); + + return ret; +} + +char *fy_node_get_parent_address(struct fy_node *fyn) +{ + struct fy_node *parent, *fyni; + struct fy_node_pair *fynp; + struct fy_node *fyna; + char *path = NULL; + const char *str; + size_t len; + int idx; + bool is_key_root; + int ret; + const char *fmt; + char *new_path, *old_path; + + if (!fyn) + return NULL; + + parent = fy_node_get_document_parent(fyn); + if (!parent) + return NULL; + + if (fy_node_is_sequence(parent)) { + + /* for a sequence, find the index */ + idx = 0; + for (fyni = fy_node_list_head(&parent->sequence); fyni; + fyni = fy_node_next(&parent->sequence, fyni)) { + if (fyni == fyn) + break; + idx++; + } + + if (!fyni) + return NULL; + + ret = asprintf(&path, "%d", idx); + if (ret == -1) + return NULL; + } + + if (fy_node_is_mapping(parent)) { + + is_key_root = fyn->key_root; + + idx = 0; + fyna = NULL; + for (fynp = fy_node_pair_list_head(&parent->mapping); fynp; + fynp = fy_node_pair_next(&parent->mapping, fynp)) { + + if ((!is_key_root && fynp->value == fyn) || (is_key_root && fynp->key == fyn)) + break; + idx++; + } + + if (!fynp) + return NULL; + + fyna = fynp->key; + if (!fyna) + return NULL; + + /* if key is a plain scalar try to not use a complex style (even for quoted) */ + if (fyna && fy_node_is_scalar(fyna) && !fy_node_is_alias(fyna) && + (str = fy_token_get_scalar_path_key(fyna->scalar, &len)) != NULL) { + + fmt = !is_key_root ? "%.*s" : ".key(%.*s)"; + ret = asprintf(&path, fmt, (int)len, str); + if (ret == -1) + return NULL; + + } else { + + /* something complex, emit it */ + path = fy_emit_node_to_string(fyna, + FYECF_MODE_FLOW_ONELINE | FYECF_WIDTH_INF | + FYECF_STRIP_LABELS | FYECF_STRIP_TAGS | + FYECF_NO_ENDING_NEWLINE); + if (!path) + return NULL; + + if (is_key_root) { + old_path = path; + ret = asprintf(&new_path, ".key(%s)", path); + if (ret == -1) { + free(path); + return NULL; + } + free(old_path); + path = new_path; + } + } + } + + return path; +} + +char *fy_node_get_path(struct fy_node *fyn) +{ + struct path_track { + struct path_track *prev; + char *path; + }; + struct path_track *track, *newtrack; + char *path, *s, *path_mem; + size_t len; + struct fy_node *parent; + + if (!fyn) + return NULL; + + /* easy on the root */ + parent = fy_node_get_document_parent(fyn); + if (!parent) { + path_mem = strdup("/"); + return path_mem; + } + + track = NULL; + len = 0; + while ((path = fy_node_get_parent_address(fyn))) { + newtrack = FY_ALLOCA(sizeof(*newtrack)); + newtrack->prev = track; + newtrack->path = path; + + track = newtrack; + + len += strlen(path) + 1; + + fyn = fy_node_get_document_parent(fyn); + } + len += 2; + + path_mem = malloc(len); + + s = path_mem; + + while (track) { + len = strlen(track->path); + if (s) { + *s++ = '/'; + memcpy(s, track->path, len); + s += len; + } + free(track->path); + track = track->prev; + } + + if (s) + *s = '\0'; + + return path_mem; +} + +char *fy_node_get_path_relative_to(struct fy_node *fyn_parent, struct fy_node *fyn) +{ + char *path, *ppath, *path2, *path_ret; + size_t pathlen, ppathlen; + struct fy_node *ni, *nj; + + if (!fyn) + return NULL; + + /* must be on the same document */ + if (fyn_parent && (fyn_parent->fyd != fyn->fyd)) + return NULL; + + if (!fyn_parent) + fyn_parent = fyn->fyd->root; + + /* verify that it's a parent */ + ni = fyn; + while ((nj = fy_node_get_parent(ni)) != NULL && nj != fyn_parent) + ni = nj; + + /* not a parent, illegal */ + if (!nj) + return NULL; + + /* here we go... */ + path = ""; + pathlen = 0; + + ni = fyn; + while ((nj = fy_node_get_parent(ni)) != NULL) { + ppath = fy_node_get_parent_address(ni); + if (!ppath) + return NULL; + + ppathlen = strlen(ppath); + + if (pathlen > 0) { + path2 = FY_ALLOCA(pathlen + 1 + ppathlen + 1); + memcpy(path2, ppath, ppathlen); + path2[ppathlen] = '/'; + memcpy(path2 + ppathlen + 1, path, pathlen); + path2[ppathlen + 1 + pathlen] = '\0'; + } else { + path2 = FY_ALLOCA(ppathlen + 1); + memcpy(path2, ppath, ppathlen); + path2[ppathlen] = '\0'; + } + + path = path2; + pathlen = strlen(path); + + free(ppath); + ni = nj; + + if (ni == fyn_parent) + break; + } + + path_ret = strdup(path); + return path_ret; +} + +char *fy_node_get_short_path(struct fy_node *fyn) +{ + struct fy_node *fyn_anchor; + struct fy_anchor *fya; + const char *text; + size_t len; + const char *str; + char *path; + + if (!fyn) + return NULL; + + /* get the nearest anchor traversing upwards */ + fya = fy_node_get_nearest_anchor(fyn); + if (!fya) + return fy_node_get_path(fyn); + + fyn_anchor = fy_anchor_node(fya); + + text = fy_anchor_get_text(fya, &len); + if (!text) + return NULL; + + if (fyn_anchor == fyn) { + alloca_sprintf(&str, "*%.*s", (int)len, text); + } else { + fy_node_get_path_relative_to_alloca(fyn_anchor, fyn, &path); + alloca_sprintf(&str, "*%.*s/%s", (int)len, text, path); + } + + path = strdup(str); + return path; +} + +static struct fy_node * +fy_document_load_node(struct fy_document *fyd, struct fy_parser *fyp, + struct fy_document_state **fydsp) +{ + struct fy_eventp *fyep = NULL; + struct fy_event *fye = NULL; + struct fy_node *fyn = NULL; + int rc, depth; + bool was_stream_start; + + if (!fyd || !fyp) + return NULL; + + /* only single documents */ + fy_parser_set_next_single_document(fyp); + fy_parser_set_default_document_state(fyp, fyd->fyds); + +again: + was_stream_start = false; + do { + /* get next event */ + fyep = fy_parse_private(fyp); + + /* no more */ + if (!fyep) + return NULL; + + was_stream_start = fyep->e.type == FYET_STREAM_START; + + if (was_stream_start) { + fy_parse_eventp_recycle(fyp, fyep); + fyep = NULL; + } + + } while (was_stream_start); + + fye = &fyep->e; + + /* STREAM_END */ + if (fye->type == FYET_STREAM_END) { + fy_parse_eventp_recycle(fyp, fyep); + + /* final STREAM_END? */ + if (fyp->state == FYPS_END) + return NULL; + + /* multi-stream */ + goto again; + } + + FYD_TOKEN_ERROR_CHECK(fyd, fy_event_get_token(fye), FYEM_DOC, + fye->type == FYET_DOCUMENT_START, err_out, + "bad event"); + + fy_parse_eventp_recycle(fyp, fyep); + fyep = NULL; + fye = NULL; + + fyd_doc_debug(fyd, "calling load_node() for root"); + depth = 0; + rc = fy_parse_document_load_node(fyp, fyd, fy_parse_private(fyp), &fyn, &depth); + fyd_error_check(fyd, !rc, err_out, + "fy_parse_document_load_node() failed"); + + rc = fy_parse_document_load_end(fyp, fyd, fy_parse_private(fyp)); + fyd_error_check(fyd, !rc, err_out, + "fy_parse_document_load_node() failed"); + + /* always resolve parents */ + fy_resolve_parent_node(fyd, fyn, NULL); + + if (fydsp) + *fydsp = fy_document_state_ref(fyp->current_document_state); + + return fyn; + +err_out: + fy_parse_eventp_recycle(fyp, fyep); + fyd->diag->on_error = false; + return NULL; +} + +static struct fy_node * +fy_node_build_internal(struct fy_document *fyd, + int (*parser_setup)(struct fy_parser *fyp, void *user), + void *user) +{ + struct fy_document_state *fyds = NULL; + struct fy_node *fyn = NULL; + struct fy_parser fyp_data, *fyp = &fyp_data; + struct fy_parse_cfg cfg; + struct fy_eventp *fyep; + int rc; + bool got_stream_end; + + if (!fyd || !parser_setup) + return NULL; + + cfg = fyd->parse_cfg; + cfg.diag = fyd->diag; + rc = fy_parse_setup(fyp, &cfg); + if (rc) { + fyd->diag->on_error = false; + return NULL; + } + + rc = (*parser_setup)(fyp, user); + fyd_error_check(fyd, !rc, err_out, + "parser_setup() failed"); + + fyn = fy_document_load_node(fyd, fyp, &fyds); + fyd_error_check(fyd, fyn, err_out, + "fy_document_load_node() failed"); + + got_stream_end = false; + while (!got_stream_end && (fyep = fy_parse_private(fyp)) != NULL) { + if (fyep->e.type == FYET_STREAM_END) + got_stream_end = true; + fy_parse_eventp_recycle(fyp, fyep); + } + + if (got_stream_end) { + fyep = fy_parse_private(fyp); + + FYD_TOKEN_ERROR_CHECK(fyd, fy_event_get_token(&fyep->e), FYEM_DOC, + !fyep, err_out, + "trailing events after the last"); + + fy_parse_eventp_recycle(fyp, fyep); + } + + rc = fy_document_state_merge(fyd->fyds, fyds); + fyd_error_check(fyd, !rc, err_out, + "fy_document_state_merge() failed"); + + fy_document_state_unref(fyds); + + fy_parse_cleanup(fyp); + + return fyn; + +err_out: + fy_node_detach_and_free(fyn); + fy_document_state_unref(fyds); + fy_parse_cleanup(fyp); + fyd->diag->on_error = false; + return NULL; +} + +struct fy_node *fy_node_build_from_string(struct fy_document *fyd, const char *str, size_t len) +{ + struct fy_document_build_string_ctx ctx = { + .str = str, + .len = len, + }; + + return fy_node_build_internal(fyd, parser_setup_from_string, &ctx); +} + +struct fy_node *fy_node_build_from_malloc_string(struct fy_document *fyd, char *str, size_t len) +{ + struct fy_document_build_malloc_string_ctx ctx = { + .str = str, + .len = len, + }; + + return fy_node_build_internal(fyd, parser_setup_from_malloc_string, &ctx); +} + +struct fy_node *fy_node_build_from_file(struct fy_document *fyd, const char *file) +{ + struct fy_document_build_file_ctx ctx = { + .file = file, + }; + + return fy_node_build_internal(fyd, parser_setup_from_file, &ctx); +} + +struct fy_node *fy_node_build_from_fp(struct fy_document *fyd, FILE *fp) +{ + struct fy_document_build_fp_ctx ctx = { + .name = NULL, + .fp = fp, + }; + + return fy_node_build_internal(fyd, parser_setup_from_fp, &ctx); +} + +int fy_document_set_root(struct fy_document *fyd, struct fy_node *fyn) +{ + if (!fyd) + return -1; + + if (fyn && fyn->attached) + return -1; + + fy_node_detach_and_free(fyd->root); + fyd->root = NULL; + + fyn->parent = NULL; + fyd->root = fyn; + + if (fyn) + fyn->attached = true; + + return 0; +} + +#define FYNCSIF_ALIAS FY_BIT(0) +#define FYNCSIF_SIMPLE FY_BIT(1) +#define FYNCSIF_COPY FY_BIT(2) +#define FYNCSIF_MALLOCED FY_BIT(3) + +static struct fy_node * +fy_node_create_scalar_internal(struct fy_document *fyd, const char *data, size_t size, + unsigned int flags) +{ + const bool alias = !!(flags & FYNCSIF_ALIAS); + const bool simple = !!(flags & FYNCSIF_SIMPLE); + const bool copy = !!(flags & FYNCSIF_COPY); + const bool malloced = !!(flags & FYNCSIF_MALLOCED); + struct fy_node *fyn = NULL; + struct fy_input *fyi; + struct fy_atom handle; + enum fy_scalar_style style; + char *data_copy = NULL; + + if (!fyd) + return NULL; + + if (data && size == (size_t)-1) + size = strlen(data); + + fyn = fy_node_alloc(fyd, FYNT_SCALAR); + fyd_error_check(fyd, fyn, err_out, + "fy_node_alloc() failed"); + + if (copy) { + data_copy = malloc(size); + fyd_error_check(fyd, data_copy, err_out, + "malloc() failed"); + memcpy(data_copy, data, size); + fyi = fy_input_from_malloc_data(data_copy, size, &handle, simple); + } else if (malloced) + fyi = fy_input_from_malloc_data((void *)data, size, &handle, simple); + else + fyi = fy_input_from_data(data, size, &handle, simple); + fyd_error_check(fyd, fyi, err_out, + "fy_input_from_data() failed"); + data_copy = NULL; + + if (!alias) { + style = handle.style == FYAS_PLAIN ? FYSS_PLAIN : FYSS_DOUBLE_QUOTED; + fyn->scalar = fy_token_create(FYTT_SCALAR, &handle, style); + } else + fyn->scalar = fy_token_create(FYTT_ALIAS, &handle, NULL); + + fyd_error_check(fyd, fyn->scalar, err_out, + "fy_token_create() failed"); + + fyn->style = !alias ? (style == FYSS_PLAIN ? FYNS_PLAIN : FYNS_DOUBLE_QUOTED) : FYNS_ALIAS; + + /* take away the input reference */ + fy_input_unref(fyi); + + return fyn; + +err_out: + if (data_copy) + free(data_copy); + fy_node_detach_and_free(fyn); + fyd->diag->on_error = false; + return NULL; +} + +struct fy_node *fy_node_create_scalar(struct fy_document *fyd, const char *data, size_t size) +{ + return fy_node_create_scalar_internal(fyd, data, size, 0); +} + +struct fy_node *fy_node_create_alias(struct fy_document *fyd, const char *data, size_t size) +{ + return fy_node_create_scalar_internal(fyd, data, size, FYNCSIF_ALIAS); +} + +struct fy_node *fy_node_create_scalar_copy(struct fy_document *fyd, const char *data, size_t size) +{ + return fy_node_create_scalar_internal(fyd, data, size, FYNCSIF_COPY); +} + +struct fy_node *fy_node_create_alias_copy(struct fy_document *fyd, const char *data, size_t size) +{ + return fy_node_create_scalar_internal(fyd, data, size, FYNCSIF_ALIAS | FYNCSIF_COPY); +} + +struct fy_node *fy_node_create_vscalarf(struct fy_document *fyd, const char *fmt, va_list ap) +{ + char *str; + + if (!fyd || !fmt) + return NULL; + + alloca_vsprintf(&str, fmt, ap); + return fy_node_create_scalar_internal(fyd, str, FY_NT, FYNCSIF_COPY); +} + +struct fy_node *fy_node_create_scalarf(struct fy_document *fyd, const char *fmt, ...) +{ + va_list ap; + struct fy_node *fyn; + + va_start(ap, fmt); + fyn = fy_node_create_vscalarf(fyd, fmt, ap); + va_end(ap); + + return fyn; +} + +int fy_node_set_tag(struct fy_node *fyn, const char *data, size_t len) +{ + struct fy_document *fyd; + struct fy_tag_scan_info info; + int handle_length, uri_length, prefix_length; + const char *handle_start; + int rc; + struct fy_atom handle; + struct fy_input *fyi = NULL; + struct fy_token *fyt = NULL, *fyt_td = NULL; + + if (!fyn || !data || !len || !fyn->fyd) + return -1; + + fyd = fyn->fyd; + + if (len == (size_t)-1) + len = strlen(data); + + memset(&info, 0, sizeof(info)); + + rc = fy_tag_scan(data, len, &info); + if (rc) + goto err_out; + + handle_length = info.handle_length; + uri_length = info.uri_length; + prefix_length = info.prefix_length; + + handle_start = data + prefix_length; + + fyt_td = fy_document_state_lookup_tag_directive(fyd->fyds, + handle_start, handle_length); + if (!fyt_td) + goto err_out; + + fyi = fy_input_from_data(data, len, &handle, true); + if (!fyi) + goto err_out; + + handle.style = FYAS_URI; + handle.direct_output = false; + handle.storage_hint = 0; + handle.storage_hint_valid = false; + + fyt = fy_token_create(FYTT_TAG, &handle, prefix_length, + handle_length, uri_length, fyt_td); + if (!fyt) + goto err_out; + + fy_token_unref(fyn->tag); + fyn->tag = fyt; + + /* take away the input reference */ + fy_input_unref(fyi); + + return 0; +err_out: + fyd->diag->on_error = false; + return -1; +} + +int fy_node_remove_tag(struct fy_node *fyn) +{ + if (!fyn || !fyn->tag) + return -1; + + fy_token_unref(fyn->tag); + fyn->tag = NULL; + + return 0; +} + +struct fy_node *fy_node_create_sequence(struct fy_document *fyd) +{ + struct fy_node *fyn; + + fyn = fy_node_alloc(fyd, FYNT_SEQUENCE); + if (!fyn) + return NULL; + + return fyn; +} + +struct fy_node *fy_node_create_mapping(struct fy_document *fyd) +{ + struct fy_node *fyn; + + fyn = fy_node_alloc(fyd, FYNT_MAPPING); + if (!fyn) + return NULL; + + return fyn; +} + +static int fy_node_sequence_insert_prepare(struct fy_node *fyn_seq, struct fy_node *fyn) +{ + struct fy_document *fyd; + + if (!fyn_seq || !fyn || fyn_seq->type != FYNT_SEQUENCE) + return -1; + + /* can't insert a node that's attached already */ + if (fyn->attached) + return -1; + + /* a document must be associated with the sequence */ + fyd = fyn_seq->fyd; + if (!fyd) + return -1; + + /* the documents of the nodes must match */ + if (fyn->fyd != fyd) + return -1; + + fyn->parent = fyn_seq; + + return 0; +} + +int fy_node_sequence_append(struct fy_node *fyn_seq, struct fy_node *fyn) +{ + int ret; + + ret = fy_node_sequence_insert_prepare(fyn_seq, fyn); + if (ret) + return ret; + + fy_node_mark_synthetic(fyn_seq); + fy_node_list_add_tail(&fyn_seq->sequence, fyn); + fyn->attached = true; + return 0; +} + +int fy_node_sequence_prepend(struct fy_node *fyn_seq, struct fy_node *fyn) +{ + int ret; + + ret = fy_node_sequence_insert_prepare(fyn_seq, fyn); + if (ret) + return ret; + + fy_node_mark_synthetic(fyn_seq); + fy_node_list_add(&fyn_seq->sequence, fyn); + fyn->attached = true; + return 0; +} + +static bool fy_node_sequence_contains_node(struct fy_node *fyn_seq, struct fy_node *fyn) +{ + struct fy_node *fyni; + + if (!fyn_seq || !fyn || fyn_seq->type != FYNT_SEQUENCE) + return false; + + for (fyni = fy_node_list_head(&fyn_seq->sequence); fyni; fyni = fy_node_next(&fyn_seq->sequence, fyni)) + if (fyni == fyn) + return true; + + return false; +} + +int fy_node_sequence_insert_before(struct fy_node *fyn_seq, + struct fy_node *fyn_mark, struct fy_node *fyn) +{ + int ret; + + if (!fy_node_sequence_contains_node(fyn_seq, fyn_mark)) + return -1; + + ret = fy_node_sequence_insert_prepare(fyn_seq, fyn); + if (ret) + return ret; + + fy_node_mark_synthetic(fyn_seq); + fy_node_list_insert_before(&fyn_seq->sequence, fyn_mark, fyn); + fyn->attached = true; + + return 0; +} + +int fy_node_sequence_insert_after(struct fy_node *fyn_seq, + struct fy_node *fyn_mark, struct fy_node *fyn) +{ + int ret; + + if (!fy_node_sequence_contains_node(fyn_seq, fyn_mark)) + return -1; + + ret = fy_node_sequence_insert_prepare(fyn_seq, fyn); + if (ret) + return ret; + + fy_node_mark_synthetic(fyn_seq); + fy_node_list_insert_after(&fyn_seq->sequence, fyn_mark, fyn); + fyn->attached = true; + + return 0; +} + +struct fy_node *fy_node_sequence_remove(struct fy_node *fyn_seq, struct fy_node *fyn) +{ + if (!fy_node_sequence_contains_node(fyn_seq, fyn)) + return NULL; + + fy_node_list_del(&fyn_seq->sequence, fyn); + fyn->parent = NULL; + fyn->attached = false; + + fy_node_mark_synthetic(fyn_seq); + + return fyn; +} + +static struct fy_node_pair * +fy_node_mapping_pair_insert_prepare(struct fy_node *fyn_map, + struct fy_node *fyn_key, struct fy_node *fyn_value) +{ + struct fy_document *fyd; + struct fy_node_pair *fynp; + + if (!fyn_map || fyn_map->type != FYNT_MAPPING) + return NULL; + + /* a document must be associated with the mapping */ + fyd = fyn_map->fyd; + if (!fyd) + return NULL; + + /* if not NULL, the documents of the nodes must match */ + if ((fyn_key && fyn_key->fyd != fyd) || + (fyn_value && fyn_value->fyd != fyd)) + return NULL; + + /* if not NULL neither the key nor the value must be attached */ + if ((fyn_key && fyn_key->attached) || + (fyn_value && fyn_value->attached)) + return NULL; + + /* if we don't allow duplicate keys */ + if (!(fyd->parse_cfg.flags & FYPCF_ALLOW_DUPLICATE_KEYS)) { + + if (fy_node_mapping_key_is_duplicate(fyn_map, fyn_key)) + return NULL; + } + + fynp = fy_node_pair_alloc(fyd); + if (!fynp) + return NULL; + + if (fyn_key) { + fyn_key->parent = fyn_map; + fyn_key->key_root = true; + } + if (fyn_value) + fyn_value->parent = fyn_map; + + fynp->key = fyn_key; + fynp->value = fyn_value; + fynp->parent = fyn_map; + + return fynp; +} + +int fy_node_mapping_append(struct fy_node *fyn_map, + struct fy_node *fyn_key, struct fy_node *fyn_value) +{ + struct fy_node_pair *fynp; + + fynp = fy_node_mapping_pair_insert_prepare(fyn_map, fyn_key, fyn_value); + if (!fynp) + return -1; + + fy_node_pair_list_add_tail(&fyn_map->mapping, fynp); + if (fyn_map->xl) + fy_accel_insert(fyn_map->xl , fyn_key, fynp); + + if (fyn_key) + fyn_key->attached = true; + if (fyn_value) + fyn_value->attached = true; + + fy_node_mark_synthetic(fyn_map); + + return 0; +} + +int fy_node_mapping_prepend(struct fy_node *fyn_map, + struct fy_node *fyn_key, struct fy_node *fyn_value) +{ + struct fy_node_pair *fynp; + + fynp = fy_node_mapping_pair_insert_prepare(fyn_map, fyn_key, fyn_value); + if (!fynp) + return -1; + + if (fyn_key) + fyn_key->attached = true; + if (fyn_value) + fyn_value->attached = true; + fy_node_pair_list_add(&fyn_map->mapping, fynp); + if (fyn_map->xl) + fy_accel_insert(fyn_map->xl, fyn_key, fynp); + + fy_node_mark_synthetic(fyn_map); + + return 0; +} + +bool fy_node_mapping_contains_pair(struct fy_node *fyn_map, struct fy_node_pair *fynp) +{ + struct fy_node_pair *fynpi; + + if (!fyn_map || !fynp || fyn_map->type != FYNT_MAPPING) + return false; + + if (fyn_map->xl) { + fynpi = fy_node_accel_lookup_by_node(fyn_map, fynp->key); + if (fynpi == fynp) + return true; + } else { + for (fynpi = fy_node_pair_list_head(&fyn_map->mapping); fynpi; fynpi = fy_node_pair_next(&fyn_map->mapping, fynpi)) + if (fynpi == fynp) + return true; + } + + return false; +} + +int fy_node_mapping_remove(struct fy_node *fyn_map, struct fy_node_pair *fynp) +{ + if (!fy_node_mapping_contains_pair(fyn_map, fynp)) + return -1; + + fy_node_pair_list_del(&fyn_map->mapping, fynp); + if (fyn_map->xl) + fy_accel_remove(fyn_map->xl, fynp->key); + + if (fynp->key) { + fynp->key->parent = NULL; + fynp->key->attached = false; + } + + if (fynp->value) { + fynp->value->parent = NULL; + fynp->value->attached = false; + } + + fynp->parent = NULL; + + return 0; +} + +/* returns value */ +struct fy_node *fy_node_mapping_remove_by_key(struct fy_node *fyn_map, struct fy_node *fyn_key) +{ + struct fy_node_pair *fynp; + struct fy_node *fyn_value; + + fynp = fy_node_mapping_lookup_pair(fyn_map, fyn_key); + if (!fynp) + return NULL; + + fyn_value = fynp->value; + if (fyn_value) { + fyn_value->parent = NULL; + fyn_value->attached = false; + } + + /* do not free the key if it's the same pointer */ + if (fyn_key != fynp->key) + fy_node_detach_and_free(fyn_key); + fynp->value = NULL; + + fy_node_pair_list_del(&fyn_map->mapping, fynp); + if (fyn_map->xl) + fy_accel_remove(fyn_map->xl, fynp->key); + + fy_node_pair_detach_and_free(fynp); + + fy_node_mark_synthetic(fyn_map); + + return fyn_value; +} + +void *fy_node_mapping_sort_ctx_arg(struct fy_node_mapping_sort_ctx *ctx) +{ + return ctx->arg; +} + +static int fy_node_mapping_sort_cmp( +#ifdef __APPLE__ +void *arg, const void *a, const void *b +#else +const void *a, const void *b, void *arg +#endif +) +{ + struct fy_node_mapping_sort_ctx *ctx = arg; + struct fy_node_pair * const *fynppa = a, * const *fynppb = b; + + assert(fynppa >= ctx->fynpp && fynppa < ctx->fynpp + ctx->count); + assert(fynppb >= ctx->fynpp && fynppb < ctx->fynpp + ctx->count); + + return ctx->key_cmp(*fynppa, *fynppb, ctx->arg); +} + +/* not! thread safe! */ +#if !defined(HAVE_QSORT_R) || !HAVE_QSORT_R || defined(__EMSCRIPTEN__) || defined(_MSC_VER) +static struct fy_node_mapping_sort_ctx *fy_node_mapping_sort_ctx_no_qsort_r; + +static int fy_node_mapping_sort_cmp_no_qsort_r(const void *a, const void *b) +{ +#ifdef __APPLE__ + return fy_node_mapping_sort_cmp( + fy_node_mapping_sort_ctx_no_qsort_r, + a, b); +#else + return fy_node_mapping_sort_cmp( a, b, + fy_node_mapping_sort_ctx_no_qsort_r); +#endif +} + +#endif + +static int fy_node_scalar_cmp_default(struct fy_node *fyn_a, + struct fy_node *fyn_b, + void *arg) +{ + /* handles NULL cases */ + if (fyn_a == fyn_b) + return 0; + if (!fyn_a) + return 1; + if (!fyn_b) + return -1; + return fy_token_cmp(fyn_a->scalar, fyn_b->scalar); +} + +/* the default sort method */ +static int fy_node_mapping_sort_cmp_default(const struct fy_node_pair *fynp_a, + const struct fy_node_pair *fynp_b, + void *arg) +{ + int idx_a, idx_b; + bool alias_a, alias_b, scalar_a, scalar_b; + struct fy_node_cmp_arg *cmp_arg; + fy_node_scalar_compare_fn cmp_fn; + void *cmp_fn_arg; + + cmp_arg = arg; + cmp_fn = cmp_arg ? cmp_arg->cmp_fn : fy_node_scalar_cmp_default; + cmp_fn_arg = cmp_arg ? cmp_arg->arg : NULL; + + /* order is: maps first, followed by sequences, and last scalars sorted */ + scalar_a = !fynp_a->key || fy_node_is_scalar(fynp_a->key); + scalar_b = !fynp_b->key || fy_node_is_scalar(fynp_b->key); + + /* scalar? perform comparison */ + if (scalar_a && scalar_b) { + + /* if both are aliases, sort skipping the '*' */ + alias_a = fy_node_is_alias(fynp_a->key); + alias_b = fy_node_is_alias(fynp_b->key); + + /* aliases win */ + if (alias_a && !alias_b) + return -1; + + if (!alias_a && alias_b) + return 1; + + return cmp_fn(fynp_a->key, fynp_b->key, cmp_fn_arg); + } + + /* b is scalar, a is not */ + if (!scalar_a && scalar_b) + return -1; + + /* a is scalar, b is not */ + if (scalar_a && !scalar_b) + return 1; + + /* different types, mappings win */ + if (fynp_a->key->type != fynp_b->key->type) + return fynp_a->key->type == FYNT_MAPPING ? -1 : 1; + + /* ok, need to compare indices now */ + idx_a = fy_node_mapping_get_pair_index(fynp_a->parent, fynp_a); + idx_b = fy_node_mapping_get_pair_index(fynp_b->parent, fynp_b); + + return idx_a > idx_b ? 1 : (idx_a < idx_b ? -1 : 0); +} + +void fy_node_mapping_fill_array(struct fy_node *fyn_map, + struct fy_node_pair **fynpp, int count) +{ + struct fy_node_pair *fynpi; + int i; + + for (i = 0, fynpi = fy_node_pair_list_head(&fyn_map->mapping); i < count && fynpi; + fynpi = fy_node_pair_next(&fyn_map->mapping, fynpi), i++) + fynpp[i] = fynpi; + + /* if there's enough space, put down a NULL at the end */ + if (i < count) + fynpp[i++] = NULL; + assert(i == count); + +} + +void fy_node_mapping_perform_sort(struct fy_node *fyn_map, + fy_node_mapping_sort_fn key_cmp, void *arg, + struct fy_node_pair **fynpp, int count) +{ + struct fy_node_mapping_sort_ctx ctx; + struct fy_node_cmp_arg def_arg; + + if (!key_cmp) { + def_arg.cmp_fn = fy_node_scalar_cmp_default; + def_arg.arg = arg; + } else { + def_arg.cmp_fn = NULL; + def_arg.arg = NULL; + } + ctx.key_cmp = key_cmp ? key_cmp : fy_node_mapping_sort_cmp_default; + ctx.arg = key_cmp ? arg : &def_arg; + ctx.fynpp = fynpp; + ctx.count = count; +#if defined(HAVE_QSORT_R) && HAVE_QSORT_R && !defined(__EMSCRIPTEN__) && !defined(_MSC_VER) +#ifdef __APPLE__ + qsort_r(fynpp, count, sizeof(*fynpp), &ctx, fy_node_mapping_sort_cmp); +#else + qsort_r(fynpp, count, sizeof(*fynpp), fy_node_mapping_sort_cmp, &ctx); +#endif +#else + /* caution, not thread safe */ + fy_node_mapping_sort_ctx_no_qsort_r = &ctx; + qsort(fynpp, count, sizeof(*fynpp), fy_node_mapping_sort_cmp_no_qsort_r); + fy_node_mapping_sort_ctx_no_qsort_r = NULL; +#endif +} + +struct fy_node_pair **fy_node_mapping_sort_array(struct fy_node *fyn_map, + fy_node_mapping_sort_fn key_cmp, void *arg, int *countp) +{ + int count; + struct fy_node_pair **fynpp; + + count = fy_node_mapping_item_count(fyn_map); + if (count < 0) + return NULL; + + fynpp = malloc((count + 1) * sizeof(*fynpp)); + if (!fynpp) + return NULL; + + memset(fynpp, 0, (count + 1) * sizeof(*fynpp)); + + fy_node_mapping_fill_array(fyn_map, fynpp, count); + fy_node_mapping_perform_sort(fyn_map, key_cmp, arg, fynpp, count); + + if (countp) + *countp = count; + + return fynpp; +} + +void fy_node_mapping_release_array(struct fy_node *fyn_map, struct fy_node_pair **fynpp) +{ + if (!fyn_map || !fynpp) + return; + + free(fynpp); +} + +int fy_node_mapping_sort(struct fy_node *fyn_map, + fy_node_mapping_sort_fn key_cmp, + void *arg) +{ + int count, i; + struct fy_node_pair **fynpp, *fynpi; + + fynpp = fy_node_mapping_sort_array(fyn_map, key_cmp, arg, &count); + if (!fynpp) + return -1; + + fy_node_pair_list_init(&fyn_map->mapping); + for (i = 0; i < count; i++) { + fynpi = fynpp[i]; + fy_node_pair_list_add_tail(&fyn_map->mapping, fynpi); + } + + fy_node_mapping_release_array(fyn_map, fynpp); + + return 0; +} + +int fy_node_sort(struct fy_node *fyn, fy_node_mapping_sort_fn key_cmp, void *arg) +{ + struct fy_node *fyni; + struct fy_node_pair *fynp, *fynpi; + int ret; + + if (!fyn) + return 0; + + switch (fyn->type) { + case FYNT_SCALAR: + break; + + case FYNT_SEQUENCE: + for (fyni = fy_node_list_head(&fyn->sequence); fyni; + fyni = fy_node_next(&fyn->sequence, fyni)) { + + fy_node_sort(fyni, key_cmp, arg); + } + break; + + case FYNT_MAPPING: + ret = fy_node_mapping_sort(fyn, key_cmp, arg); + if (ret) + return ret; + + for (fynp = fy_node_pair_list_head(&fyn->mapping); fynp; fynp = fynpi) { + + fynpi = fy_node_pair_next(&fyn->mapping, fynp); + + /* the parent of the key is always NULL */ + ret = fy_node_sort(fynp->key, key_cmp, arg); + if (ret) + return ret; + + ret = fy_node_sort(fynp->value, key_cmp, arg); + if (ret) + return ret; + + fynp->parent = fyn; + } + break; + } + + return 0; +} + +struct fy_node *fy_node_vbuildf(struct fy_document *fyd, const char *fmt, va_list ap) +{ + struct fy_document_vbuildf_ctx vctx; + struct fy_node *fyn; + + vctx.fmt = fmt; + va_copy(vctx.ap, ap); + fyn = fy_node_build_internal(fyd, parser_setup_from_fmt_ap, &vctx); + va_end(ap); + + return fyn; +} + +struct fy_node *fy_node_buildf(struct fy_document *fyd, const char *fmt, ...) +{ + struct fy_node *fyn; + va_list ap; + + va_start(ap, fmt); + fyn = fy_node_vbuildf(fyd, fmt, ap); + va_end(ap); + + return fyn; +} + +struct fy_document *fy_document_vbuildf(const struct fy_parse_cfg *cfg, const char *fmt, va_list ap) +{ + struct fy_document *fyd; + struct fy_document_vbuildf_ctx vctx; + + vctx.fmt = fmt; + va_copy(vctx.ap, ap); + fyd = fy_document_build_internal(cfg, parser_setup_from_fmt_ap, &vctx); + va_end(ap); + + return fyd; +} + +struct fy_document *fy_document_buildf(const struct fy_parse_cfg *cfg, const char *fmt, ...) +{ + struct fy_document *fyd; + va_list ap; + + va_start(ap, fmt); + fyd = fy_document_vbuildf(cfg, fmt, ap); + va_end(ap); + + return fyd; +} + +struct flow_reader_container { + struct fy_reader reader; + const struct fy_parse_cfg *cfg; +}; + +static struct fy_diag *flow_reader_get_diag(struct fy_reader *fyr) +{ + struct flow_reader_container *frc = fy_container_of(fyr, struct flow_reader_container, reader); + return frc->cfg ? frc->cfg->diag : NULL; +} + +static const struct fy_reader_ops reader_ops = { + .get_diag = flow_reader_get_diag, +}; + +struct fy_document * +fy_flow_document_build_from_string(const struct fy_parse_cfg *cfg, + const char *str, size_t len, size_t *consumed) +{ + struct flow_reader_container frc; + struct fy_reader *fyr = NULL; + struct fy_parser fyp_data, *fyp = &fyp_data; + struct fy_parse_cfg cfg_data; + struct fy_input *fyi; + struct fy_document *fyd; + struct fy_mark mark; + int rc; + + if (!str) + return NULL; + + if (consumed) + *consumed = 0; + + if (!cfg) { + memset(&cfg_data, 0, sizeof(cfg_data)); + cfg_data.flags = FYPCF_DEFAULT_PARSE; + cfg = &cfg_data; + } + + memset(&frc, 0, sizeof(frc)); + fyr = &frc.reader; + frc.cfg = cfg; + + fy_reader_setup(fyr, &reader_ops); + + rc = fy_parse_setup(fyp, cfg); + if (rc) + goto err_no_parse; + + fyi = fy_input_from_data(str, len, NULL, false); + if (!fyi) + goto err_no_input; + + rc = fy_reader_input_open(fyr, fyi, NULL); + if (rc) + goto err_no_input_open; + + fy_parser_set_reader(fyp, fyr); + fy_parser_set_flow_only_mode(fyp, true); + + fyd = fy_parse_load_document(fyp); + + fy_parse_cleanup(fyp); + + if (fyd && consumed) { + fy_reader_get_mark(fyr, &mark); + *consumed = mark.input_pos; + } + + fy_reader_cleanup(fyr); + fy_input_unref(fyi); + + return fyd; + +err_no_input_open: + fy_input_unref(fyi); +err_no_input: + fy_parse_cleanup(fyp); +err_no_parse: + fy_reader_cleanup(fyr); + return NULL; +} + +int fy_node_vscanf(struct fy_node *fyn, const char *fmt, va_list ap) +{ + size_t len; + char *fmt_cpy, *s, *e, *t, *te, *key, *fmtspec; + const char *value; + char *value0; + size_t value_len, value0_len; + int count, ret; + struct fy_node *fynv; + va_list apt; + + if (!fyn || !fmt) + goto err_out; + + len = strlen(fmt); + fmt_cpy = FY_ALLOCA(len + 1); + memcpy(fmt_cpy, fmt, len + 1); + s = fmt_cpy; + e = s + len; + + /* the format is of the form 'access key' %fmt[...] */ + /* so we search for a (non escaped '%) */ + value0 = NULL; + value0_len = 0; + count = 0; + while (s < e) { + /* a '%' format must exist */ + t = strchr(s, '%'); + if (!t) + goto err_out; + + /* skip escaped % */ + if (t + 1 < e && t[1] == '%') { + s = t + 2; + continue; + } + + /* trim spaces from key */ + while (isspace(*s)) + s++; + te = t; + while (te > s && isspace(te[-1])) + *--te = '\0'; + + key = s; + + /* we have to scan until the next space that's not in char set */ + fmtspec = t; + while (t < e) { + if (isspace(*t)) + break; + /* character set (may include space) */ + if (*t == '[') { + t++; + /* skip caret */ + if (t < e && *t == '^') + t++; + /* if first character in the set is ']' accept it */ + if (t < e && *t == ']') + t++; + /* now skip until end of character set */ + while (t < e && *t != ']') + t++; + continue; + } + t++; + } + if (t < e) + *t++ = '\0'; + + /* find by (relative) path */ + fynv = fy_node_by_path(fyn, key, t - s, FYNWF_DONT_FOLLOW); + if (!fynv || fynv->type != FYNT_SCALAR) + break; + + /* there must be a text */ + value = fy_token_get_text(fynv->scalar, &value_len); + if (!value) + break; + + /* allocate buffer it's smaller than the one we have already */ + if (!value0 || value0_len < value_len) { + value0 = FY_ALLOCA(value_len + 1); + value0_len = value_len; + } + + memcpy(value0, value, value_len); + value0[value_len] = '\0'; + + va_copy(apt, ap); + /* scanf, all arguments are pointers */ + (void)va_arg(ap, void *); /* advance argument pointer */ + + /* pass it to the system's scanf method */ + ret = vsscanf(value0, fmtspec, apt); + + /* since it's a single specifier, it must be one on success */ + if (ret != 1) + break; + + s = t; + count++; + } + + return count; + +err_out: + errno = -EINVAL; + return -1; +} + +int fy_node_scanf(struct fy_node *fyn, const char *fmt, ...) +{ + va_list ap; + int ret; + + va_start(ap, fmt); + ret = fy_node_vscanf(fyn, fmt, ap); + va_end(ap); + + return ret; +} + +int fy_document_vscanf(struct fy_document *fyd, const char *fmt, va_list ap) +{ + return fy_node_vscanf(fyd->root, fmt, ap); +} + +int fy_document_scanf(struct fy_document *fyd, const char *fmt, ...) +{ + va_list ap; + int ret; + + va_start(ap, fmt); + ret = fy_document_vscanf(fyd, fmt, ap); + va_end(ap); + + return ret; +} + +bool fy_document_has_directives(const struct fy_document *fyd) +{ + struct fy_document_state *fyds; + + if (!fyd) + return false; + + fyds = fyd->fyds; + if (!fyds) + return false; + + return fyds->fyt_vd || !fy_token_list_empty(&fyds->fyt_td); +} + +bool fy_document_has_explicit_document_start(const struct fy_document *fyd) +{ + return fyd ? !fyd->fyds->start_implicit : false; +} + +bool fy_document_has_explicit_document_end(const struct fy_document *fyd) +{ + return fyd ? !fyd->fyds->end_implicit : false; +} + +void *fy_node_get_meta(struct fy_node *fyn) +{ + return fyn && fyn->has_meta ? fyn->meta : NULL; +} + +int fy_node_set_meta(struct fy_node *fyn, void *meta) +{ + struct fy_document *fyd; + + if (!fyn || !fyn->fyd) + return -1; + + fyd = fyn->fyd; + if (fyn->has_meta && fyd->meta_clear_fn) + fyd->meta_clear_fn(fyn, fyn->meta, fyd->meta_user); + fyn->meta = meta; + fyn->has_meta = true; + + return 0; +} + +void fy_node_clear_meta(struct fy_node *fyn) +{ + struct fy_document *fyd; + + if (!fyn || !fyn->has_meta || !fyn->fyd) + return; + + fyd = fyn->fyd; + if (fyd->meta_clear_fn) + fyd->meta_clear_fn(fyn, fyn->meta, fyd->meta_user); + fyn->meta = NULL; + fyn->has_meta = false; +} + +static void fy_node_clear_meta_internal(struct fy_node *fyn) +{ + struct fy_node *fyni; + struct fy_node_pair *fynp, *fynpi; + + if (!fyn) + return; + + switch (fyn->type) { + case FYNT_SCALAR: + break; + + case FYNT_SEQUENCE: + for (fyni = fy_node_list_head(&fyn->sequence); fyni; + fyni = fy_node_next(&fyn->sequence, fyni)) { + + fy_node_clear_meta_internal(fyni); + } + break; + + case FYNT_MAPPING: + for (fynp = fy_node_pair_list_head(&fyn->mapping); fynp; fynp = fynpi) { + + fynpi = fy_node_pair_next(&fyn->mapping, fynp); + + fy_node_clear_meta_internal(fynp->key); + fy_node_clear_meta_internal(fynp->value); + } + break; + } + + fy_node_clear_meta(fyn); +} + +int fy_document_register_meta(struct fy_document *fyd, + fy_node_meta_clear_fn clear_fn, + void *user) +{ + if (!fyd || !clear_fn || fyd->meta_clear_fn) + return -1; + + fyd->meta_clear_fn = clear_fn; + fyd->meta_user = user; + + return 0; +} + +void fy_document_unregister_meta(struct fy_document *fyd) +{ + if (!fyd) + return; + + fy_node_clear_meta_internal(fy_document_root(fyd)); + + fyd->meta_clear_fn = NULL; + fyd->meta_user = NULL; +} + +bool fy_node_set_marker(struct fy_node *fyn, unsigned int marker) +{ + unsigned int prev_marks; + + if (!fyn || marker > FYNWF_MAX_USER_MARKER) + return false; + prev_marks = fyn->marks; + fyn->marks |= FY_BIT(marker); + return !!(prev_marks & FY_BIT(marker)); +} + +bool fy_node_clear_marker(struct fy_node *fyn, unsigned int marker) +{ + unsigned int prev_marks; + + if (!fyn || marker > FYNWF_MAX_USER_MARKER) + return false; + prev_marks = fyn->marks; + fyn->marks &= ~FY_BIT(marker); + return !!(prev_marks & FY_BIT(marker)); +} + +bool fy_node_is_marker_set(struct fy_node *fyn, unsigned int marker) +{ + if (!fyn || marker > FYNWF_MAX_USER_MARKER) + return false; + return !!(fyn->marks & FY_BIT(marker)); +} + +FILE *fy_document_get_error_fp(struct fy_document *fyd) +{ + /* just this for now */ + return stderr; +} + +enum fy_parse_cfg_flags fy_document_get_cfg_flags(const struct fy_document *fyd) +{ + if (!fyd) + return fy_parser_get_cfg_flags(NULL); + + return fyd->parse_cfg.flags; +} + +bool fy_document_can_be_accelerated(struct fy_document *fyd) +{ + if (!fyd) + return false; + + return !(fyd->parse_cfg.flags & FYPCF_DISABLE_ACCELERATORS); +} + +bool fy_document_is_accelerated(struct fy_document *fyd) +{ + if (!fyd) + return false; + + return fyd->axl && fyd->naxl; +} + +static int hd_anchor_hash(struct fy_accel *xl, const void *key, void *userdata, void *hash) +{ + struct fy_token *fyt = (void *)key; + unsigned int *hashp = hash; + const char *text; + size_t len; + + text = fy_token_get_text(fyt, &len); + if (!text) + return -1; + + *hashp = XXH32(text, len, 2654435761U); + return 0; +} + +static bool hd_anchor_eq(struct fy_accel *xl, const void *hash, const void *key1, const void *key2, void *userdata) +{ + struct fy_token *fyt1 = (void *)key1, *fyt2 = (void *)key2; + const char *text1, *text2; + size_t len1, len2; + + text1 = fy_token_get_text(fyt1, &len1); + if (!text1) + return false; + text2 = fy_token_get_text(fyt2, &len2); + if (!text2) + return false; + + return len1 == len2 && !memcmp(text1, text2, len1); +} + +static const struct fy_hash_desc hd_anchor = { + .size = sizeof(unsigned int), + .max_bucket_grow_limit = 6, /* TODO allow tuning */ + .hash = hd_anchor_hash, + .eq = hd_anchor_eq, +}; + +static int hd_nanchor_hash(struct fy_accel *xl, const void *key, void *userdata, void *hash) +{ + struct fy_node *fyn = (void *)key; + unsigned int *hashp = hash; + uintptr_t ptr = (uintptr_t)fyn; + + *hashp = XXH32(&ptr, sizeof(ptr), 2654435761U); + + return 0; +} + +static bool hd_nanchor_eq(struct fy_accel *xl, const void *hash, const void *key1, const void *key2, void *userdata) +{ + struct fy_node *fyn1 = (void *)key1, *fyn2 = (void *)key2; + + return fyn1 == fyn2; +} + +static const struct fy_hash_desc hd_nanchor = { + .size = sizeof(unsigned int), + .max_bucket_grow_limit = 6, /* TODO allow tuning */ + .hash = hd_nanchor_hash, + .eq = hd_nanchor_eq, +}; + + +static int hd_mapping_hash(struct fy_accel *xl, const void *key, void *userdata, void *hash) +{ + return fy_node_hash_uint((struct fy_node *)key, hash); +} + +static bool hd_mapping_eq(struct fy_accel *xl, const void *hash, const void *key1, const void *key2, void *userdata) +{ + return fy_node_compare((struct fy_node *)key1, (struct fy_node *)key2); +} + +static const struct fy_hash_desc hd_mapping = { + .size = sizeof(unsigned int), + .max_bucket_grow_limit = 6, /* TODO allow tuning */ + .hash = hd_mapping_hash, + .eq = hd_mapping_eq, +}; + +typedef void (*fy_hash_update_fn)(void *state, const void *ptr, size_t size); + +static int +fy_node_hash_internal(struct fy_node *fyn, fy_hash_update_fn update_fn, void *state) +{ + struct fy_node *fyni; + struct fy_node_pair *fynp; + struct fy_node_pair **fynpp; + struct fy_token_iter iter; + int i, count, rc; + const struct fy_iter_chunk *ic; + + if (!fyn) { + /* NULL */ + update_fn(state, "s", 1); /* as zero length scalar */ + return 0; + } + + switch (fyn->type) { + case FYNT_SEQUENCE: + /* SEQUENCE */ + update_fn(state, "S", 1); + + for (fyni = fy_node_list_head(&fyn->sequence); fyni; + fyni = fy_node_next(&fyn->sequence, fyni)) { + + rc = fy_node_hash_internal(fyni, update_fn, state); + if (rc) + return rc; + } + + break; + + case FYNT_MAPPING: + count = fy_node_mapping_item_count(fyn); + + fynpp = FY_ALLOCA(sizeof(*fynpp) * (count + 1)); + + fy_node_mapping_fill_array(fyn, fynpp, count); + fy_node_mapping_perform_sort(fyn, NULL, NULL, fynpp, count); + + /* MAPPING */ + update_fn(state, "M", 1); + + for (i = 0; i < count; i++) { + fynp = fynpp[i]; + + /* MAPPING KEY */ + update_fn(state, "K", 1); + rc = fy_node_hash_internal(fynp->key, update_fn, state); + if (rc) + return rc; + + /* MAPPING VALUE */ + update_fn(state, "V", 1); + rc = fy_node_hash_internal(fynp->value, update_fn, state); + if (rc) + return rc; + } + + break; + + case FYNT_SCALAR: + update_fn(state, !fy_node_is_alias(fyn) ? "s" : "A", 1); + + fy_token_iter_start(fyn->scalar, &iter); + ic = NULL; + while ((ic = fy_token_iter_chunk_next(&iter, ic, &rc)) != NULL) + update_fn(state, ic->str, ic->len); + fy_token_iter_finish(&iter); + + break; + } + + return 0; +} + +static void update_xx32(void *state, const void *ptr, size_t size) +{ + XXH32_update(state, ptr, size); +} + +int fy_node_hash_uint(struct fy_node *fyn, unsigned int *hashp) +{ + XXH32_state_t state; + int rc; + + XXH32_reset(&state, 2654435761U); + + rc = fy_node_hash_internal(fyn, update_xx32, &state); + if (rc) + return rc; + + *hashp = XXH32_digest(&state); + return 0; +} + +struct fy_document_state *fy_document_get_document_state(struct fy_document *fyd) +{ + return fyd ? fyd->fyds : NULL; +} + +int fy_document_set_document_state(struct fy_document *fyd, struct fy_document_state *fyds) +{ + /* document must exist and not have any contents */ + if (!fyd || fyd->root) + return -1; + + if (!fyds) + fyds = fy_document_state_default(NULL, NULL); + else + fyds = fy_document_state_ref(fyds); + + if (!fyds) + return -1; + + /* drop the previous document state */ + fy_document_state_unref(fyd->fyds); + /* and use the new document state from now on */ + fyd->fyds = fyds; + + return 0; +} + +struct fy_ptr_node *fy_ptr_node_create(struct fy_node *fyn) +{ + struct fy_ptr_node *fypn; + + if (!fyn) + return NULL; + + fypn = malloc(sizeof(*fypn)); + if (!fypn) + return NULL; + memset(&fypn->node, 0, sizeof(fypn->node)); + fypn->fyn = fyn; + return fypn; +} + +void fy_ptr_node_destroy(struct fy_ptr_node *fypn) +{ + free(fypn); +} + +void fy_ptr_node_list_free_all(struct fy_ptr_node_list *fypnl) +{ + struct fy_ptr_node *fypn; + + while ((fypn = fy_ptr_node_list_pop(fypnl)) != NULL) + fy_ptr_node_destroy(fypn); +} + +bool fy_ptr_node_list_contains(struct fy_ptr_node_list *fypnl, struct fy_node *fyn) +{ + struct fy_ptr_node *fypn; + + if (!fypnl || !fyn) + return false; + for (fypn = fy_ptr_node_list_head(fypnl); fypn; fypn = fy_ptr_node_next(fypnl, fypn)) { + if (fypn->fyn == fyn) + return true; + } + return false; +} + +struct fy_document * +fy_document_create_from_event(struct fy_parser *fyp, struct fy_event *fye) +{ + struct fy_document *fyd; + int rc; + + if (!fyp || !fye || fye->type != FYET_DOCUMENT_START) + return NULL; + + /* TODO update document end */ + fyd = fy_document_create(&fyp->cfg); + fyp_error_check(fyp, fyd, err_out, + "fy_document_create() failed"); + + rc = fy_document_set_document_state(fyd, fye->document_start.document_state); + fyp_error_check(fyp, !rc, err_out, + "fy_document_set_document_state() failed"); + + return fyd; + +err_out: + fy_document_destroy(fyd); + return NULL; +} + +int +fy_document_update_from_event(struct fy_document *fyd, struct fy_parser *fyp, struct fy_event *fye) +{ + if (!fyd || !fyp || !fye || fye->type != FYET_DOCUMENT_END) + return -1; + + /* nothing besides checks */ + return 0; +} + +struct fy_node * +fy_node_create_from_event(struct fy_document *fyd, struct fy_parser *fyp, struct fy_event *fye) +{ + struct fy_node *fyn = NULL; + struct fy_token *value = NULL, *anchor = NULL; + int rc; + + if (!fyd || !fye) + return NULL; + + switch (fye->type) { + default: + break; + + case FYET_SCALAR: + fyn = fy_node_alloc(fyd, FYNT_SCALAR); + fyp_error_check(fyp, fyn, err_out, + "fy_node_alloc() scalar failed"); + + value = fye->scalar.value; + + if (value) /* NULL scalar */ + fyn->style = fy_node_style_from_scalar_style(value->scalar.style); + else + fyn->style = FYNS_PLAIN; + + /* NULLs are OK */ + fyn->tag = fy_token_ref(fye->scalar.tag); + fyn->scalar = fy_token_ref(value); + anchor = fye->scalar.anchor; + break; + + case FYET_ALIAS: + fyn = fy_node_alloc(fyd, FYNT_SCALAR); + fyp_error_check(fyp, fyn, err_out, + "fy_node_alloc() alias failed"); + + value = fye->alias.anchor; + fyn->style = FYNS_ALIAS; + fyn->scalar = fy_token_ref(value); + anchor = NULL; + break; + + case FYET_MAPPING_START: + fyn = fy_node_create_mapping(fyd); + fyp_error_check(fyp, fyn, err_out, + "fy_node_create_mapping() failed"); + + value = fye->mapping_start.mapping_start; + fyn->style = value->type == FYTT_FLOW_MAPPING_START ? FYNS_FLOW : FYNS_BLOCK; + + fyn->tag = fy_token_ref(fye->mapping_start.tag); + fyn->mapping_start = fy_token_ref(value); + fyn->mapping_end = NULL; + anchor = fye->mapping_start.anchor; + break; + + case FYET_SEQUENCE_START: + fyn = fy_node_create_sequence(fyd); + fyp_error_check(fyp, fyn, err_out, + "fy_node_create_sequence() failed"); + + value = fye->sequence_start.sequence_start; + + fyn->style = value->type == FYTT_FLOW_SEQUENCE_START ? FYNS_FLOW : FYNS_BLOCK; + + fyn->tag = fy_token_ref(fye->sequence_start.tag); + fyn->sequence_start = fy_token_ref(value); + fyn->sequence_end = NULL; + anchor = fye->sequence_start.anchor; + + break; + + } + + if (fyn && anchor) { + rc = fy_document_register_anchor(fyd, fyn, fy_token_ref(anchor)); + fyp_error_check(fyp, !rc, err_out, + "fy_document_register_anchor() failed"); + } + + return fyn; + +err_out: + /* NULL OK */ + fy_node_free(fyn); + return NULL; +} + +int +fy_node_update_from_event(struct fy_node *fyn, struct fy_parser *fyp, struct fy_event *fye) +{ + if (!fyn || !fyp || !fye) + return -1; + + switch (fye->type) { + + case FYET_MAPPING_END: + if (!fy_node_is_mapping(fyn)) + return -1; + fy_token_unref(fyn->mapping_end); + fyn->mapping_end = fy_token_ref(fye->mapping_end.mapping_end); + + break; + + case FYET_SEQUENCE_END: + if (!fy_node_is_sequence(fyn)) + return -1; + fy_token_unref(fyn->sequence_end); + fyn->sequence_end = fy_token_ref(fye->sequence_end.sequence_end); + + break; + + default: + return -1; + } + + return 0; +} + +struct fy_node_pair * +fy_node_pair_create_with_key(struct fy_document *fyd, struct fy_node *fyn_parent, struct fy_node *fyn) +{ + struct fy_node_pair *fynp; + bool is_duplicate; + + if (!fyd || !fyn_parent || !fy_node_is_mapping(fyn_parent)) + return NULL; + + /* if we don't allow duplicate keys */ + if (!(fyd->parse_cfg.flags & FYPCF_ALLOW_DUPLICATE_KEYS)) { + + /* make sure we don't add an already existing key */ + is_duplicate = fy_node_mapping_key_is_duplicate(fyn_parent, fyn); + if (is_duplicate) { + FYD_NODE_ERROR(fyd, fyn, FYEM_DOC, + "duplicate mapping key"); + return NULL; + } + } + + fynp = fy_node_pair_alloc(fyd); + fyd_error_check(fyd, fynp, err_out, + "fy_node_pair_alloc() failed"); + + fynp->parent = fyn_parent; + + fynp->key = fyn; + if (fynp->key) + fynp->key->attached = true; + + return fynp; + +err_out: + fy_node_pair_free(fynp); + return NULL; + +} + +int +fy_node_pair_update_with_value(struct fy_node_pair *fynp, struct fy_node *fyn) +{ + struct fy_node *fyn_parent; + int rc; + + /* node pair must exist and value must be NULL */ + if (!fynp || fynp->value || !fynp->parent || !fy_node_is_mapping(fynp->parent) || !fyn->fyd) + return -1; + + fynp->value = fyn; + if (fynp->value) + fynp->value->attached = true; + + fyn_parent = fynp->parent; + + fy_node_pair_list_add_tail(&fyn_parent->mapping, fynp); + if (fyn_parent->xl) { + rc = fy_accel_insert(fyn_parent->xl, fynp->key, fynp); + fyd_error_check(fyn->fyd, !rc, err_out, + "fy_accel_insert() failed"); + } + + return 0; + +err_out: + fy_node_pair_list_del(&fyn_parent->mapping, fynp); + if (fyn) + fyn->attached = false; + fynp->value = NULL; + return -1; +} + +int +fy_node_sequence_add_item(struct fy_node *fyn_parent, struct fy_node *fyn) +{ + /* node pair must exist and value must be NULL */ + if (!fyn_parent || !fyn || !fy_node_is_sequence(fyn_parent) || !fyn->fyd) + return -1; + + fyn->parent = fyn_parent; + fy_node_list_add_tail(&fyn_parent->sequence, fyn); + fyn->attached = true; + return 0; +} + +void fy_document_iterator_setup(struct fy_document_iterator *fydi) +{ + memset(fydi, 0, sizeof(*fydi)); + fydi->state = FYDIS_WAITING_STREAM_START; + fydi->fyd = NULL; + fydi->iterate_root = NULL; + + /* suppress recycling if we must */ + fydi->suppress_recycling_force = getenv("FY_VALGRIND") && !getenv("FY_VALGRIND_RECYCLING"); + fydi->suppress_recycling = fydi->suppress_recycling_force; + + fy_eventp_list_init(&fydi->recycled_eventp); + fy_token_list_init(&fydi->recycled_token); + + if (!fydi->suppress_recycling) { + fydi->recycled_eventp_list = &fydi->recycled_eventp; + fydi->recycled_token_list = &fydi->recycled_token; + } else { + fydi->recycled_eventp_list = NULL; + fydi->recycled_token_list = NULL; + } + + /* start with the stack pointing to the in place data */ + fydi->stack_top = (unsigned int)-1; + fydi->stack_alloc = sizeof(fydi->in_place) / sizeof(fydi->in_place[0]); + fydi->stack = fydi->in_place; +} + +void fy_document_iterator_cleanup(struct fy_document_iterator *fydi) +{ + struct fy_token *fyt; + struct fy_eventp *fyep; + + /* free the stack if it's not the inplace one */ + if (fydi->stack != fydi->in_place) + free(fydi->stack); + fydi->stack_top = (unsigned int)-1; + fydi->stack_alloc = sizeof(fydi->in_place) / sizeof(fydi->in_place[0]); + fydi->stack = fydi->in_place; + + while ((fyt = fy_token_list_pop(&fydi->recycled_token)) != NULL) + fy_token_free(fyt); + + while ((fyep = fy_eventp_list_pop(&fydi->recycled_eventp)) != NULL) + fy_eventp_free(fyep); + + fydi->state = FYDIS_WAITING_STREAM_START; + fydi->fyd = NULL; + fydi->iterate_root = NULL; +} + +struct fy_document_iterator *fy_document_iterator_create(void) +{ + struct fy_document_iterator *fydi; + + fydi = malloc(sizeof(*fydi)); + if (!fydi) + return NULL; + fy_document_iterator_setup(fydi); + return fydi; +} + +void fy_document_iterator_destroy(struct fy_document_iterator *fydi) +{ + if (!fydi) + return; + fy_document_iterator_cleanup(fydi); + free(fydi); +} + +static struct fy_event * +fydi_event_create(struct fy_document_iterator *fydi, struct fy_node *fyn, bool start) +{ + struct fy_eventp *fyep; + struct fy_event *fye; + struct fy_anchor *fya; + struct fy_token *anchor = NULL; + + fyep = fy_document_iterator_eventp_alloc(fydi); + if (!fyep) { + fydi->state = FYDIS_ERROR; + return NULL; + } + fye = &fyep->e; + + if (start) { + fya = fy_node_get_anchor(fyn); + anchor = fya ? fya->anchor : NULL; + } + + switch (fyn->type) { + + case FYNT_SCALAR: + if (fyn->style != FYNS_ALIAS) { + fye->type = FYET_SCALAR; + fye->scalar.anchor = fy_token_ref(anchor); + fye->scalar.tag = fy_token_ref(fyn->tag); + fye->scalar.value = fy_token_ref(fyn->scalar); + } else { + fye->type = FYET_ALIAS; + fye->alias.anchor = fy_token_ref(fyn->scalar); + } + break; + + case FYNT_SEQUENCE: + if (start) { + fye->type = FYET_SEQUENCE_START; + fye->sequence_start.anchor = fy_token_ref(anchor); + fye->sequence_start.tag = fy_token_ref(fyn->tag); + fye->sequence_start.sequence_start = fy_token_ref(fyn->sequence_start); + } else { + fye->type = FYET_SEQUENCE_END; + fye->sequence_end.sequence_end = fy_token_ref(fyn->sequence_end); + } + break; + + case FYNT_MAPPING: + if (start) { + fye->type = FYET_MAPPING_START; + fye->mapping_start.anchor = fy_token_ref(anchor); + fye->mapping_start.tag = fy_token_ref(fyn->tag); + fye->mapping_start.mapping_start = fy_token_ref(fyn->mapping_start); + } else { + fye->type = FYET_MAPPING_END; + fye->mapping_end.mapping_end = fy_token_ref(fyn->mapping_end); + } + break; + } + + return fye; +} + +struct fy_event * +fy_document_iterator_stream_start(struct fy_document_iterator *fydi) +{ + struct fy_event *fye; + + if (!fydi || fydi->state == FYDIS_ERROR) + return NULL; + + /* both none and stream start are the same for this */ + if (fydi->state != FYDIS_WAITING_STREAM_START && + fydi->state != FYDIS_WAITING_STREAM_END_OR_DOCUMENT_START) + goto err_out; + + fye = fy_document_iterator_event_create(fydi, FYET_STREAM_START); + if (!fye) + goto err_out; + + fydi->state = FYDIS_WAITING_DOCUMENT_START; + return fye; + +err_out: + fydi->state = FYDIS_ERROR; + return NULL; +} + +struct fy_event * +fy_document_iterator_stream_end(struct fy_document_iterator *fydi) +{ + struct fy_event *fye; + + if (!fydi || fydi->state == FYDIS_ERROR) + return NULL; + + if (fydi->state != FYDIS_WAITING_STREAM_END_OR_DOCUMENT_START && + fydi->state != FYDIS_WAITING_DOCUMENT_START) + goto err_out; + + fye = fy_document_iterator_event_create(fydi, FYET_STREAM_END); + if (!fye) + goto err_out; + + fydi->state = FYDIS_WAITING_STREAM_START; + return fye; + +err_out: + fydi->state = FYDIS_ERROR; + return NULL; +} + +struct fy_event * +fy_document_iterator_document_start(struct fy_document_iterator *fydi, struct fy_document *fyd) +{ + struct fy_event *fye = NULL; + struct fy_eventp *fyep; + + if (!fydi || fydi->state == FYDIS_ERROR) + return NULL; + + if (!fyd) + goto err_out; + + /* we can transition to document start only from document start or stream end */ + if (fydi->state != FYDIS_WAITING_DOCUMENT_START && + fydi->state != FYDIS_WAITING_STREAM_END_OR_DOCUMENT_START) + goto err_out; + + fyep = fy_document_iterator_eventp_alloc(fydi); + if (!fyep) + goto err_out; + fye = &fyep->e; + + fydi->fyd = fyd; + + /* the iteration root is the document root */ + fydi->iterate_root = fyd->root; + + /* suppress recycling if we must */ + fydi->suppress_recycling = (fyd->parse_cfg.flags & FYPCF_DISABLE_RECYCLING) || + fydi->suppress_recycling_force; + + if (!fydi->suppress_recycling) { + fydi->recycled_eventp_list = &fydi->recycled_eventp; + fydi->recycled_token_list = &fydi->recycled_token; + } else { + fydi->recycled_eventp_list = NULL; + fydi->recycled_token_list = NULL; + } + + fye->type = FYET_DOCUMENT_START; + fye->document_start.document_start = NULL; + fye->document_start.document_state = fy_document_state_ref(fyd->fyds); + fye->document_start.implicit = fyd->fyds->start_implicit; + + /* and go into body */ + fydi->state = FYDIS_WAITING_BODY_START_OR_DOCUMENT_END; + + return fye; + +err_out: + fy_document_iterator_event_free(fydi, fye); + fydi->state = FYDIS_ERROR; + return NULL; +} + +struct fy_event * +fy_document_iterator_document_end(struct fy_document_iterator *fydi) +{ + struct fy_event *fye; + + if (!fydi || fydi->state == FYDIS_ERROR) + return NULL; + + if (!fydi->fyd || !fydi->fyd->fyds || + fydi->state != FYDIS_WAITING_DOCUMENT_END) + goto err_out; + + fye = fy_document_iterator_event_create(fydi, FYET_DOCUMENT_END, (int)fydi->fyd->fyds->end_implicit); + if (!fye) + goto err_out; + + fydi->fyd = NULL; + fydi->iterate_root = NULL; + + fydi->state = FYDIS_WAITING_STREAM_END_OR_DOCUMENT_START; + return fye; + +err_out: + fydi->state = FYDIS_ERROR; + return NULL; +} + +static bool +fy_document_iterator_ensure_space(struct fy_document_iterator *fydi, unsigned int space) +{ + struct fy_document_iterator_body_state *new_stack; + size_t new_size, copy_size; + unsigned int new_stack_alloc; + + /* empty stack should always have enough space */ + if (fydi->stack_top == (unsigned int)-1) { + assert(fydi->stack_alloc >= space); + return true; + } + + if (fydi->stack_top + space < fydi->stack_alloc) + return true; + + /* make sure we have enough space */ + new_stack_alloc = fydi->stack_alloc * 2; + while (fydi->stack_top + space >= new_stack_alloc) + new_stack_alloc *= 2; + + new_size = new_stack_alloc * sizeof(*new_stack); + + if (fydi->stack == fydi->in_place) { + new_stack = malloc(new_size); + if (!new_stack) + return false; + copy_size = (fydi->stack_top + 1) * sizeof(*new_stack); + memcpy(new_stack, fydi->stack, copy_size); + } else { + new_stack = realloc(fydi->stack, new_size); + if (!new_stack) + return false; + } + fydi->stack = new_stack; + fydi->stack_alloc = new_stack_alloc; + return true; +} + +static bool +fydi_push_collection(struct fy_document_iterator *fydi, struct fy_node *fyn) +{ + struct fy_document_iterator_body_state *s; + + /* make sure there's enough space */ + if (!fy_document_iterator_ensure_space(fydi, 1)) + return false; + + /* get the next */ + fydi->stack_top++; + s = &fydi->stack[fydi->stack_top]; + s->fyn = fyn; + + switch (fyn->type) { + case FYNT_SEQUENCE: + s->fyni = fy_node_list_head(&fyn->sequence); + break; + + case FYNT_MAPPING: + s->fynp = fy_node_pair_list_head(&fyn->mapping); + s->processed_key = false; + break; + + default: + assert(0); + break; + } + + return true; +} + +static inline void +fydi_pop_collection(struct fy_document_iterator *fydi) +{ + assert(fydi->stack_top != (unsigned int)-1); + fydi->stack_top--; +} + +static inline struct fy_document_iterator_body_state * +fydi_last_collection(struct fy_document_iterator *fydi) +{ + if (fydi->stack_top == (unsigned int)-1) + return NULL; + return &fydi->stack[fydi->stack_top]; +} + +bool +fy_document_iterator_body_next_internal(struct fy_document_iterator *fydi, + struct fy_document_iterator_body_result *res) +{ + struct fy_document_iterator_body_state *s; + struct fy_node *fyn, *fyn_col; + bool end; + + if (!fydi || !res || fydi->state == FYDIS_ERROR) + return false; + + if (fydi->state != FYDIS_WAITING_BODY_START_OR_DOCUMENT_END && + fydi->state != FYDIS_BODY) + goto err_out; + + end = false; + s = fydi_last_collection(fydi); + if (!s) { + + fyn = fydi->iterate_root; + /* empty root, or last */ + if (!fyn || fydi->state == FYDIS_BODY) { + fydi->state = FYDIS_WAITING_DOCUMENT_END; + return false; + } + + /* ok, in body proper */ + fydi->state = FYDIS_BODY; + + } else { + + fyn_col = s->fyn; + assert(fyn_col); + + fyn = NULL; + if (fyn_col->type == FYNT_SEQUENCE) { + fyn = s->fyni; + if (fyn) + s->fyni = fy_node_next(&fyn_col->sequence, s->fyni); + } else { + assert(fyn_col->type == FYNT_MAPPING); + if (s->fynp) { + if (!s->processed_key) { + fyn = s->fynp->key; + s->processed_key = true; + } else { + fyn = s->fynp->value; + s->processed_key = false; + + /* next in mapping after value */ + s->fynp = fy_node_pair_next(&fyn_col->mapping, s->fynp); + } + } + } + + /* if no next node in the collection, it's the end of the collection */ + if (!fyn) { + fyn = fyn_col; + end = true; + } + } + + assert(fyn); + + /* only for collections */ + if (fyn->type != FYNT_SCALAR) { + if (!end) { + /* push the new sequence */ + if (!fydi_push_collection(fydi, fyn)) + goto err_out; + } else + fydi_pop_collection(fydi); + } + + res->fyn = fyn; + res->end = end; + return true; + +err_out: + fydi->state = FYDIS_ERROR; + return false; +} + +struct fy_event *fy_document_iterator_body_next(struct fy_document_iterator *fydi) +{ + struct fy_document_iterator_body_result res; + + if (!fydi) + return NULL; + + if (!fy_document_iterator_body_next_internal(fydi, &res)) + return NULL; + + return fydi_event_create(fydi, res.fyn, !res.end); +} + +void +fy_document_iterator_node_start(struct fy_document_iterator *fydi, struct fy_node *fyn) +{ + /* do nothing on error */ + if (!fydi || fydi->state == FYDIS_ERROR) + return; + + /* and go into body */ + fydi->state = FYDIS_WAITING_BODY_START_OR_DOCUMENT_END; + fydi->iterate_root = fyn; + fydi->fyd = NULL; +} + +struct fy_node *fy_document_iterator_node_next(struct fy_document_iterator *fydi) +{ + struct fy_document_iterator_body_result res; + + if (!fydi) + return NULL; + + /* do not return ending nodes, are not interested in them */ + do { + if (!fy_document_iterator_body_next_internal(fydi, &res)) + return NULL; + + } while (res.end); + + return res.fyn; +} + +bool fy_document_iterator_get_error(struct fy_document_iterator *fydi) +{ + if (!fydi) + return true; + + if (fydi->state != FYDIS_ERROR) + return false; + + fy_document_iterator_cleanup(fydi); + + return true; +} diff --git a/contrib/libs/libfyaml/src/lib/fy-doc.h b/contrib/libs/libfyaml/src/lib/fy-doc.h new file mode 100644 index 0000000000..d8535c1a1e --- /dev/null +++ b/contrib/libs/libfyaml/src/lib/fy-doc.h @@ -0,0 +1,262 @@ +/* + * fy-doc.h - YAML document internal header file + * + * Copyright (c) 2019 Pantelis Antoniou <pantelis.antoniou@konsulko.com> + * + * SPDX-License-Identifier: MIT + */ +#ifndef FY_DOC_H +#define FY_DOC_H + +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif + +#include <stdint.h> +#include <stdbool.h> +#include <stdio.h> +#include <stdarg.h> + +#include <libfyaml.h> + +#include "fy-ctype.h" +#include "fy-utf8.h" +#include "fy-list.h" +#include "fy-typelist.h" +#include "fy-types.h" +#include "fy-diag.h" +#include "fy-dump.h" +#include "fy-docstate.h" +#include "fy-accel.h" +#include "fy-walk.h" +#include "fy-path.h" + +struct fy_eventp; + +/* TODO vary according to platfom */ +static inline int fy_depth_limit(void) +{ + return FYPCF_GUARANTEED_MINIMUM_DEPTH_LIMIT; +} + +FY_TYPE_FWD_DECL_LIST(document); + +struct fy_node; + +struct fy_node_pair { + struct fy_list_head node; + struct fy_node *key; + struct fy_node *value; + struct fy_document *fyd; + struct fy_node *parent; +}; +FY_TYPE_FWD_DECL_LIST(node_pair); +FY_TYPE_DECL_LIST(node_pair); + +FY_TYPE_FWD_DECL_LIST(node); +struct fy_node { + struct fy_list_head node; + struct fy_token *tag; + enum fy_node_style style; + struct fy_node *parent; + struct fy_document *fyd; + unsigned int marks; +#if !defined(_MSC_VER) + enum fy_node_type type : 2; /* 2 bits are enough for 3 types */ +#else + enum fy_node_type type; /* it converted incorrectly and can't be used as supposed to be */ +#endif + bool has_meta : 1; + bool attached : 1; /* when it's attached somewhere */ + bool synthetic : 1; /* node has been modified programmaticaly */ + bool key_root : 1; /* node is the root of key fy_node_get_parent() will return NULL */ + void *meta; + struct fy_accel *xl; /* mapping access accelerator */ + struct fy_path_expr_node_data *pxnd; + union { + struct fy_token *scalar; + struct fy_node_list sequence; + struct fy_node_pair_list mapping; + }; + union { + struct fy_token *sequence_start; + struct fy_token *mapping_start; + }; + union { + struct fy_token *sequence_end; + struct fy_token *mapping_end; + }; +}; +FY_TYPE_DECL_LIST(node); + +struct fy_node *fy_node_alloc(struct fy_document *fyd, enum fy_node_type type); +struct fy_node_pair *fy_node_pair_alloc(struct fy_document *fyd); +int fy_node_pair_free(struct fy_node_pair *fynp); + +void fy_node_detach_and_free(struct fy_node *fyn); +void fy_node_pair_detach_and_free(struct fy_node_pair *fynp); + +struct fy_anchor { + struct fy_list_head node; + struct fy_node *fyn; + struct fy_token *anchor; + bool multiple : 1; +}; +FY_TYPE_FWD_DECL_LIST(anchor); +FY_TYPE_DECL_LIST(anchor); + +struct fy_document { + struct fy_list_head node; + struct fy_anchor_list anchors; + struct fy_accel *axl; /* name -> anchor access accelerator */ + struct fy_accel *naxl; /* node -> anchor access accelerator */ + struct fy_document_state *fyds; + struct fy_diag *diag; + struct fy_parse_cfg parse_cfg; + struct fy_node *root; + bool parse_error : 1; + + struct fy_document *parent; + struct fy_document_list children; + + fy_node_meta_clear_fn meta_clear_fn; + void *meta_user; + + struct fy_path_expr_document_data *pxdd; +}; +/* only the list declaration/methods */ +FY_TYPE_DECL_LIST(document); + +struct fy_document *fy_parse_document_create(struct fy_parser *fyp, struct fy_eventp *fyep); + +struct fy_node_mapping_sort_ctx { + fy_node_mapping_sort_fn key_cmp; + void *arg; + struct fy_node_pair **fynpp; + int count; +}; + +void fy_node_mapping_perform_sort(struct fy_node *fyn_map, + fy_node_mapping_sort_fn key_cmp, void *arg, + struct fy_node_pair **fynpp, int count); + +void fy_node_mapping_fill_array(struct fy_node *fyn_map, + struct fy_node_pair **fynpp, int count); + +struct fy_node_pair **fy_node_mapping_sort_array(struct fy_node *fyn_map, + fy_node_mapping_sort_fn key_cmp, + void *arg, int *countp); + +void fy_node_mapping_release_array(struct fy_node *fyn_map, struct fy_node_pair **fynpp); + +struct fy_node_walk_ctx { + unsigned int max_depth; + unsigned int next_slot; + unsigned int mark; + struct fy_node *marked[0]; +}; + +bool fy_node_is_empty(struct fy_node *fyn); + +bool fy_check_ref_loop(struct fy_document *fyd, struct fy_node *fyn, + enum fy_node_walk_flags flags, + struct fy_node_walk_ctx *ctx); + +#define FYNWF_VISIT_MARKER (FYNWF_MAX_USER_MARKER + 1) +#define FYNWF_REF_MARKER (FYNWF_MAX_USER_MARKER + 2) + +#define FYNWF_SYSTEM_MARKS (FY_BIT(FYNWF_VISIT_MARKER) | \ + FY_BIT(FYNWF_REF_MARKER)) + +bool fy_node_uses_single_input_only(struct fy_node *fyn, struct fy_input *fyi); +struct fy_input *fy_node_get_first_input(struct fy_node *fyn); +bool fy_node_is_synthetic(struct fy_node *fyn); +void fy_node_mark_synthetic(struct fy_node *fyn); +struct fy_input *fy_node_get_input(struct fy_node *fyn); +int fy_document_register_anchor(struct fy_document *fyd, + struct fy_node *fyn, struct fy_token *anchor); +bool fy_node_mapping_key_is_duplicate(struct fy_node *fyn, struct fy_node *fyn_key); + +struct fy_token *fy_node_non_synthesized_token(struct fy_node *fyn); +struct fy_token *fy_node_token(struct fy_node *fyn); + +FILE *fy_document_get_error_fp(struct fy_document *fyd); +enum fy_parse_cfg_flags fy_document_get_cfg_flags(const struct fy_document *fyd); +bool fy_document_is_accelerated(struct fy_document *fyd); +bool fy_document_can_be_accelerated(struct fy_document *fyd); + +/* TODO move to main include */ +struct fy_node *fy_node_collection_iterate(struct fy_node *fyn, void **prevp); + +/* indirect node */ +FY_TYPE_FWD_DECL_LIST(ptr_node); +struct fy_ptr_node { + struct fy_list_head node; + struct fy_node *fyn; +}; +FY_TYPE_DECL_LIST(ptr_node); + +struct fy_ptr_node *fy_ptr_node_create(struct fy_node *fyn); +void fy_ptr_node_destroy(struct fy_ptr_node *fypn); +void fy_ptr_node_list_free_all(struct fy_ptr_node_list *fypnl); +bool fy_ptr_node_list_contains(struct fy_ptr_node_list *fypnl, struct fy_node *fyn); +int fy_node_linearize_recursive(struct fy_ptr_node_list *fypnl, struct fy_node *fyn); +int fy_node_linearize(struct fy_ptr_node_list *fypnl, struct fy_node *fyn); +void fy_node_iterator_check(struct fy_node *fyn); + + +enum fy_document_iterator_state { + FYDIS_WAITING_STREAM_START, + FYDIS_WAITING_DOCUMENT_START, + FYDIS_WAITING_BODY_START_OR_DOCUMENT_END, + FYDIS_BODY, + FYDIS_WAITING_DOCUMENT_END, + FYDIS_WAITING_STREAM_END_OR_DOCUMENT_START, + FYDIS_ERROR, +}; + +struct fy_document_iterator_body_state { + struct fy_node *fyn; /* the collection node */ + bool processed_key : 1; /* for mapping only */ + union { + struct fy_node *fyni; /* for sequence */ + struct fy_node_pair *fynp; /* for mapping */ + }; +}; + +struct fy_document_iterator { + enum fy_document_iterator_state state; + struct fy_document *fyd; + struct fy_node *iterate_root; + bool suppress_recycling_force : 1; + bool suppress_recycling : 1; + + struct fy_eventp_list recycled_eventp; + struct fy_token_list recycled_token; + + struct fy_eventp_list *recycled_eventp_list; /* NULL when suppressing */ + struct fy_token_list *recycled_token_list; /* NULL when suppressing */ + + unsigned int stack_top; + unsigned int stack_alloc; + struct fy_document_iterator_body_state *stack; + struct fy_document_iterator_body_state in_place[FYPCF_GUARANTEED_MINIMUM_DEPTH_LIMIT]; +}; + +void fy_document_iterator_setup(struct fy_document_iterator *fydi); +void fy_document_iterator_cleanup(struct fy_document_iterator *fydi); +struct fy_document_iterator *fy_document_iterator_create(void); +void fy_document_iterator_destroy(struct fy_document_iterator *fydi); +void fy_document_iterator_start(struct fy_document_iterator *fydi, struct fy_document *fyd); +void fy_document_iterator_end(struct fy_document_iterator *fydi); + +struct fy_document_iterator_body_result { + struct fy_node *fyn; + bool end; +}; + +bool +fy_document_iterator_body_next_internal(struct fy_document_iterator *fydi, + struct fy_document_iterator_body_result *res); + +#endif diff --git a/contrib/libs/libfyaml/src/lib/fy-docbuilder.c b/contrib/libs/libfyaml/src/lib/fy-docbuilder.c new file mode 100644 index 0000000000..08152f0988 --- /dev/null +++ b/contrib/libs/libfyaml/src/lib/fy-docbuilder.c @@ -0,0 +1,542 @@ +/* + * fy-docbuilder.c - YAML document builder methods + * + * Copyright (c) 2022 Pantelis Antoniou <pantelis.antoniou@konsulko.com> + * + * SPDX-License-Identifier: MIT + */ +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif + +#include <stdio.h> +#include <string.h> +#include <assert.h> +#include <stdlib.h> +#include <ctype.h> +#include <errno.h> +#if defined (__unix__) || (defined (__APPLE__) && defined (__MACH__)) +#include <unistd.h> +#endif + +#include <libfyaml.h> + +#include "fy-utils.h" + +#include "fy-docbuilder.h" + +#include "fy-parse.h" +#include "fy-doc.h" + +const char *fy_document_builder_state_txt[] = { + [FYDBS_NODE] = "node", + [FYDBS_MAP_KEY] = "map-key", + [FYDBS_MAP_VAL] = "map-val", + [FYDBS_SEQ] = "seq", +}; + +void +fy_document_builder_reset(struct fy_document_builder *fydb) +{ + struct fy_document_builder_ctx *c; + unsigned int i; + + if (!fydb) + return; + + for (i = 0, c = fydb->stack; i < fydb->next; i++, c++) { + fy_node_free(c->fyn); + c->fyn = NULL; + fy_node_pair_free(c->fynp); + c->fynp = NULL; + } + fydb->next = 0; + + if (fydb->fyd) { + fy_document_destroy(fydb->fyd); + fydb->fyd = NULL; + } + fydb->in_stream = false; + fydb->doc_done = false; +} + +static const struct fy_document_builder_cfg docbuilder_default_cfg = { + .parse_cfg = { + .flags = FYPCF_DEFAULT_DOC, + } +}; + +struct fy_document_builder * +fy_document_builder_create(const struct fy_document_builder_cfg *cfg) +{ + struct fy_document_builder *fydb = NULL; + + if (!cfg) + cfg = &docbuilder_default_cfg; + + fydb = malloc(sizeof(*fydb)); + if (!fydb) + goto err_out; + + memset(fydb, 0, sizeof(*fydb)); + fydb->cfg = *cfg; + fydb->next = 0; + fydb->in_stream = false; + fydb->doc_done = false; + fydb->alloc = fy_depth_limit(); /* always start with this */ + fydb->max_depth = (cfg->parse_cfg.flags & FYPCF_DISABLE_DEPTH_LIMIT) ? 0 : fy_depth_limit(); + + fydb->stack = malloc(fydb->alloc * sizeof(*fydb->stack)); + if (!fydb->stack) + goto err_out; + + return fydb; + +err_out: + if (fydb) { + if (fydb->stack) + free(fydb->stack); + free(fydb); + } + + return NULL; +} + +void +fy_document_builder_destroy(struct fy_document_builder *fydb) +{ + if (!fydb) + return; + + fy_document_builder_reset(fydb); + + fy_diag_unref(fydb->cfg.diag); + if (fydb->stack) + free(fydb->stack); + free(fydb); +} + +struct fy_document * +fy_document_builder_get_document(struct fy_document_builder *fydb) +{ + return fydb ? fydb->fyd : NULL; +} + +bool +fy_document_builder_is_in_stream(struct fy_document_builder *fydb) +{ + return fydb && fydb->in_stream; +} + +bool +fy_document_builder_is_in_document(struct fy_document_builder *fydb) +{ + return fydb && fydb->fyd != NULL && !fydb->doc_done; +} + +bool +fy_document_builder_is_document_complete(struct fy_document_builder *fydb) +{ + return fydb && fydb->fyd != NULL && fydb->doc_done; +} + +struct fy_document * +fy_document_builder_take_document(struct fy_document_builder *fydb) +{ + struct fy_document *fyd; + + if (!fy_document_builder_is_document_complete(fydb)) + return NULL; + fyd = fydb->fyd; + fydb->fyd = NULL; + fydb->doc_done = false; + return fyd; +} + +struct fy_document * +fy_document_builder_peek_document(struct fy_document_builder *fydb) +{ + struct fy_document *fyd; + struct fy_document_builder_ctx *c; + + if (!fydb) + return NULL; + + /* just peek; may be incomplete */ + fyd = fydb->fyd; + + assert(fydb->next > 0); + c = &fydb->stack[0]; + + /* wire the root */ + if (!fyd->root) + fyd->root = c->fyn; + + return fyd; +} + + +void +fy_document_builder_set_in_stream(struct fy_document_builder *fydb) +{ + if (!fydb) + return; + /* reset */ + fy_document_builder_reset(fydb); + + fydb->in_stream = true; +} + +int +fy_document_builder_set_in_document(struct fy_document_builder *fydb, struct fy_document_state *fyds, bool single) +{ + struct fy_document_builder_ctx *c; + int rc; + + if (!fydb) + return -1; + + /* reset */ + fy_document_builder_reset(fydb); + + fydb->in_stream = true; + + fydb->fyd = fy_document_create(&fydb->cfg.parse_cfg); + if (!fydb->fyd) + return -1; + + if (fyds) { + rc = fy_document_set_document_state(fydb->fyd, fyds); + if (rc) + return rc; + } + + fydb->doc_done = false; + fydb->single_mode = single; + + /* be paranoid */ + assert(fydb->next < fydb->alloc); + + c = &fydb->stack[++fydb->next - 1]; + memset(c, 0, sizeof(*c)); + c->s = FYDBS_NODE; + + return 0; +} + +int +fy_document_builder_process_event(struct fy_document_builder *fydb, struct fy_eventp *fyep) +{ + struct fy_event *fye; + enum fy_event_type etype; + struct fy_document *fyd; + struct fy_document_builder_ctx *c, *cp; + struct fy_node *fyn, *fyn_parent; + struct fy_node_pair *fynp; + struct fy_document_builder_ctx *newc; + struct fy_token *fyt; + int rc; + + fye = fyep ? &fyep->e : NULL; + etype = fye ? fye->type : FYET_NONE; + fyt = fye ? fy_event_get_token(fye) : NULL; + + /* not in document */ + if (!fydb->next) { + switch (etype) { + case FYET_STREAM_START: + FYDB_TOKEN_ERROR_CHECK(fydb, fyt, FYEM_DOC, + !fydb->in_stream, err_out, + "STREAM_START while in stream error"); + fydb->in_stream = true; + break; + + case FYET_STREAM_END: + FYDB_TOKEN_ERROR_CHECK(fydb, fyt, FYEM_DOC, + fydb->in_stream, err_out, + "STREAM_END while not in stream error"); + fydb->in_stream = false; + return 1; + + case FYET_DOCUMENT_START: + FYDB_TOKEN_ERROR_CHECK(fydb, fyt, FYEM_DOC, + fydb->in_stream, err_out, + "DOCUMENT_START while not in stream error"); + + /* no-one cares, destroy the document */ + if (!fydb->fyd) + fy_document_destroy(fydb->fyd); + + fydb->fyd = fy_document_create(&fydb->cfg.parse_cfg); + fydb_error_check(fydb, fydb->fyd, err_out, + "fy_document_create() failed"); + + rc = fy_document_set_document_state(fydb->fyd, fyep->e.document_start.document_state); + fydb_error_check(fydb, !rc, err_out, + "fy_document_set_document_state() failed"); + + fydb->doc_done = false; + goto push; + + case FYET_DOCUMENT_END: + FYDB_TOKEN_ERROR_CHECK(fydb, fyt, FYEM_DOC, + fydb->in_stream, err_out, + "DOCUMENT_END while not in stream error"); + + FYDB_TOKEN_ERROR_CHECK(fydb, fyt, FYEM_DOC, + fydb->fyd, err_out, + "DOCUMENT_END without a document"); + + fydb->doc_done = true; + break; + + default: + /* unexpected event */ + FYDB_TOKEN_ERROR(fydb, fyt, FYEM_DOC, + "Unexpected event %s in non-build mode\n", + fy_event_type_txt[etype]); + goto err_out; + } + + return 0; + } + + fyd = fydb->fyd; + + c = &fydb->stack[fydb->next - 1]; + fyn = NULL; + + /* verify that we have a document */ + assert(fydb->fyd); + /* the top state must always be NODE for processing the event */ + assert(c->s == FYDBS_NODE); + + switch (etype) { + case FYET_SCALAR: + case FYET_ALIAS: + fyn = fy_node_alloc(fyd, FYNT_SCALAR); + fydb_error_check(fydb, fyn, err_out, + "fy_node_alloc() SCALAR failed"); + + if (etype == FYET_SCALAR) { + if (fye->scalar.value) + fyn->style = fy_node_style_from_scalar_style(fye->scalar.value->scalar.style); + else + fyn->style = FYNS_PLAIN; + fyn->tag = fy_token_ref(fye->scalar.tag); + if (fye->scalar.anchor) { + rc = fy_document_register_anchor(fyd, fyn, fy_token_ref(fye->scalar.anchor)); + fydb_error_check(fydb, !rc, err_out, + "fy_document_register_anchor() failed"); + } + fyn->scalar = fy_token_ref(fye->scalar.value); + } else { + fyn->style = FYNS_ALIAS; + fyn->scalar = fy_token_ref(fye->alias.anchor); + } + goto complete; + + case FYET_MAPPING_START: + c->s = FYDBS_MAP_KEY; + + fyn = fy_node_alloc(fyd, FYNT_MAPPING); + fydb_error_check(fydb, fyn, err_out, + "fy_node_alloc() MAPPING failed"); + + c->fyn = fyn; + fyn->style = fye->mapping_start.mapping_start->type == FYTT_FLOW_MAPPING_START ? FYNS_FLOW : FYNS_BLOCK; + fyn->tag = fy_token_ref(fye->mapping_start.tag); + if (fye->mapping_start.anchor) { + rc = fy_document_register_anchor(fyd, fyn, fy_token_ref(fye->mapping_start.anchor)); + fydb_error_check(fydb, !rc, err_out, + "fy_document_register_anchor() failed"); + } + fyn->mapping_start = fy_token_ref(fye->mapping_start.mapping_start); + break; + + case FYET_MAPPING_END: + + FYDB_TOKEN_ERROR_CHECK(fydb, fyt, FYEM_DOC, fydb->next > 1, err_out, + "Unexpected MAPPING_END (unexpected end of mapping)"); + + cp = &fydb->stack[fydb->next - 2]; + + FYDB_TOKEN_ERROR_CHECK(fydb, fyt, FYEM_DOC, cp->s == FYDBS_MAP_KEY, err_out, + "Unexpected MAPPING_END (not in mapping)"); + + fyn = cp->fyn; + fyn->mapping_end = fy_token_ref(fye->mapping_end.mapping_end); + fydb->next--; + goto complete; + + case FYET_SEQUENCE_START: + c->s = FYDBS_SEQ; + fyn = fy_node_alloc(fyd, FYNT_SEQUENCE); + fydb_error_check(fydb, fyn, err_out, + "fy_node_alloc() SEQUENCE failed"); + + c->fyn = fyn; + fyn->style = fye->sequence_start.sequence_start->type == FYTT_FLOW_SEQUENCE_START ? FYNS_FLOW : FYNS_BLOCK; + fyn->tag = fy_token_ref(fye->sequence_start.tag); + if (fye->sequence_start.anchor) { + rc = fy_document_register_anchor(fyd, fyn, fy_token_ref(fye->sequence_start.anchor)); + fydb_error_check(fydb, !rc, err_out, + "fy_document_register_anchor() failed"); + } + fyn->sequence_start = fy_token_ref(fye->sequence_start.sequence_start); + break; + + case FYET_SEQUENCE_END: + + FYDB_TOKEN_ERROR_CHECK(fydb, fyt, FYEM_DOC, fydb->next > 1, err_out, + "Unexpected SEQUENCE_END (unexpected end of sequence)"); + + cp = &fydb->stack[fydb->next - 2]; + + FYDB_TOKEN_ERROR_CHECK(fydb, fyt, FYEM_DOC, cp->s == FYDBS_SEQ, err_out, + "Unexpected MAPPING_SEQUENCE (not in sequence)"); + + fyn = cp->fyn; + fyn->sequence_end = fy_token_ref(fye->sequence_end.sequence_end); + fydb->next--; + goto complete; + + default: + /* unexpected event */ + FYDB_TOKEN_ERROR(fydb, fyt, FYEM_DOC, + "Unexpected event %s in build mode\n", + fy_event_type_txt[etype]); + goto err_out; + } + +push: + FYDB_TOKEN_ERROR_CHECK(fydb, fyt, FYEM_DOC, + !fydb->max_depth || fydb->next < fydb->max_depth, err_out, + "Max depth (%d) exceeded\n", fydb->next); + + /* grow the stack? */ + if (fydb->next >= fydb->alloc) { + newc = realloc(fydb->stack, fydb->alloc * 2 * sizeof(*fydb->stack)); + fydb_error_check(fydb, newc, err_out, + "Unable to grow the context stack"); + fydb->alloc *= 2; + fydb->stack = newc; + } + assert(fydb->next < fydb->alloc); + + c = &fydb->stack[++fydb->next - 1]; + memset(c, 0, sizeof(*c)); + c->s = FYDBS_NODE; + return 0; + +err_out: + return -1; + +complete: + assert(fydb->next > 0); + c = &fydb->stack[fydb->next - 1]; + c->fyn = fyn; + assert(fydb->next > 0); + fydb->next--; + + /* root */ + if (fydb->next == 0) { + fyd->root = fyn; + /* if we're in single mode, don't wait for doc end */ + if (fydb->single_mode) + fydb->doc_done = true; + return 1; + } + + c = &fydb->stack[fydb->next - 1]; + + fyn_parent = c->fyn; + + switch (c->s) { + + case FYDBS_MAP_KEY: + fynp = fy_node_pair_alloc(fyd); + assert(fynp); + fynp->key = fyn; + c->fynp = fynp; + + /* if we don't allow duplicate keys */ + if (!(fyd->parse_cfg.flags & FYPCF_ALLOW_DUPLICATE_KEYS)) { + + /* make sure we don't add an already existing key */ + if (fy_node_mapping_key_is_duplicate(fyn_parent, fyn)) { + FYDB_NODE_ERROR(fydb, fyn, FYEM_DOC, "duplicate key"); + goto err_out; + } + } + + c->s = FYDBS_MAP_VAL; + goto push; + + case FYDBS_MAP_VAL: + fynp = c->fynp; + assert(fynp); + fynp->value = fyn; + + /* set the parent of the node pair and value */ + fynp->parent = fyn_parent; + if (fynp->key) { + fynp->key->parent = fyn_parent; + fynp->key->key_root = true; + } + if (fynp->value) + fynp->value->parent = fyn_parent; + + fy_node_pair_list_add_tail(&c->fyn->mapping, fynp); + if (fyn->xl) { + rc = fy_accel_insert(fyn->xl, fynp->key, fynp); + assert(!rc); + } + if (fynp->key) + fynp->key->attached = true; + if (fynp->value) + fynp->value->attached = true; + + c->fynp = NULL; + c->s = FYDBS_MAP_KEY; + goto push; + + case FYDBS_SEQ: + /* append sequence */ + fyn->parent = fyn_parent; + fy_node_list_add_tail(&c->fyn->sequence, fyn); + fyn->attached = true; + goto push; + + case FYDBS_NODE: + /* complete is a scalar */ + fyn->parent = fyn_parent; + return 0; + } + + return 0; +} + +struct fy_document * +fy_document_builder_load_document(struct fy_document_builder *fydb, + struct fy_parser *fyp) +{ + struct fy_eventp *fyep = NULL; + int rc; + + if (fyp->state == FYPS_END) + return NULL; + + while (!fy_document_builder_is_document_complete(fydb) && + (fyep = fy_parse_private(fyp)) != NULL) { + rc = fy_document_builder_process_event(fydb, fyep); + fy_parse_eventp_recycle(fyp, fyep); + if (rc < 0) { + fyp->stream_error = true; + return NULL; + } + } + + /* get ownership of the document */ + return fy_document_builder_take_document(fydb); +} diff --git a/contrib/libs/libfyaml/src/lib/fy-docbuilder.h b/contrib/libs/libfyaml/src/lib/fy-docbuilder.h new file mode 100644 index 0000000000..aa506d6845 --- /dev/null +++ b/contrib/libs/libfyaml/src/lib/fy-docbuilder.h @@ -0,0 +1,98 @@ +/* + * fy-docbuilder.h - YAML document builder internal header file + * + * Copyright (c) 2022 Pantelis Antoniou <pantelis.antoniou@konsulko.com> + * + * SPDX-License-Identifier: MIT + */ +#ifndef FY_DOCBUILDER_H +#define FY_DOCBUILDER_H + +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif + +#include <stdint.h> +#include <stdbool.h> +#include <stdio.h> +#include <stdarg.h> + +#include <libfyaml.h> + +#include "fy-doc.h" + +enum fy_document_builder_state { + FYDBS_NODE, + FYDBS_MAP_KEY, + FYDBS_MAP_VAL, + FYDBS_SEQ, +}; + +struct fy_document_builder_ctx { + enum fy_document_builder_state s; + struct fy_node *fyn; + struct fy_node_pair *fynp; /* for mapping */ +}; + +struct fy_document_builder_cfg { + struct fy_parse_cfg parse_cfg; + void *userdata; + struct fy_diag *diag; +}; + +struct fy_document_builder { + struct fy_document_builder_cfg cfg; + struct fy_document *fyd; + bool single_mode; + bool in_stream; + bool doc_done; + unsigned int next; + unsigned int alloc; + unsigned int max_depth; + struct fy_document_builder_ctx *stack; +}; + +struct fy_document_builder * +fy_document_builder_create(const struct fy_document_builder_cfg *cfg); + +void +fy_document_builder_reset(struct fy_document_builder *fydb); + +void +fy_document_builder_destroy(struct fy_document_builder *fydb); + +struct fy_document * +fy_document_builder_get_document(struct fy_document_builder *fydb); + +bool +fy_document_builder_is_in_stream(struct fy_document_builder *fydb); + +bool +fy_document_builder_is_in_document(struct fy_document_builder *fydb); + +bool +fy_document_builder_is_document_complete(struct fy_document_builder *fydb); + +struct fy_document * +fy_document_builder_take_document(struct fy_document_builder *fydb); + +struct fy_document * +fy_document_builder_peek_document(struct fy_document_builder *fydb); + +void +fy_document_builder_set_in_stream(struct fy_document_builder *fydb); + +int +fy_document_builder_set_in_document(struct fy_document_builder *fydb, struct fy_document_state *fyds, bool single); + +int +fy_document_builder_process_event(struct fy_document_builder *fydb, struct fy_eventp *fyep); + +struct fy_document * +fy_document_builder_load_document(struct fy_document_builder *fydb, + struct fy_parser *fyp); + +struct fy_document * +fy_parse_load_document_with_builder(struct fy_parser *fyp); + +#endif diff --git a/contrib/libs/libfyaml/src/lib/fy-docstate.c b/contrib/libs/libfyaml/src/lib/fy-docstate.c new file mode 100644 index 0000000000..e7876e4ea6 --- /dev/null +++ b/contrib/libs/libfyaml/src/lib/fy-docstate.c @@ -0,0 +1,394 @@ +/* + * fy-docstate.c - YAML document state methods + * + * Copyright (c) 2019 Pantelis Antoniou <pantelis.antoniou@konsulko.com> + * + * SPDX-License-Identifier: MIT + */ +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif + +#include <stdio.h> +#include <string.h> +#include <assert.h> +#include <stdlib.h> +#include <ctype.h> +#include <errno.h> + +#include <libfyaml.h> + +#include "fy-parse.h" +#include "fy-doc.h" + +#include "fy-docstate.h" + +struct fy_document_state *fy_document_state_alloc(void) +{ + struct fy_document_state *fyds; + + fyds = malloc(sizeof(*fyds)); + if (!fyds) + return NULL; + memset(fyds, 0, sizeof(*fyds)); + + fyds->fyt_vd = NULL; + fy_token_list_init(&fyds->fyt_td); + + fyds->refs = 1; + + return fyds; +} + +void fy_document_state_free(struct fy_document_state *fyds) +{ + if (!fyds) + return; + + assert(fyds->refs == 1); + + fy_token_unref(fyds->fyt_vd); + fy_token_list_unref_all(&fyds->fyt_td); + + free(fyds); +} + +struct fy_document_state *fy_document_state_ref(struct fy_document_state *fyds) +{ + if (!fyds) + return NULL; + + assert(fyds->refs + 1 > 0); + + fyds->refs++; + + return fyds; +} + +void fy_document_state_unref(struct fy_document_state *fyds) +{ + if (!fyds) + return; + + assert(fyds->refs > 0); + + if (fyds->refs == 1) + fy_document_state_free(fyds); + else + fyds->refs--; +} + +int fy_document_state_append_tag(struct fy_document_state *fyds, + const char *handle, const char *prefix, + bool is_default) +{ + struct fy_token *fyt = NULL; + struct fy_input *fyi = NULL; + char *data; + size_t size, handle_size, prefix_size; + struct fy_atom atom; + + size = strlen(handle) + 1 + strlen(prefix); + data = malloc(size + 1); + if (!data) + goto err_out; + + snprintf(data, size + 1, "%s %s", handle, prefix); + + fyi = fy_input_from_malloc_data(data, size, &atom, true); + if (!fyi) + goto err_out; + data = NULL; /* ownership now at input */ + + handle_size = strlen(handle); + prefix_size = strlen(prefix); + + fyt = fy_token_create(FYTT_TAG_DIRECTIVE, &atom, + handle_size, prefix_size, + is_default); + if (!fyt) + goto err_out; + + fy_token_list_add_tail(&fyds->fyt_td, fyt); + + if (!fy_tag_is_default_internal(handle, handle_size, prefix, prefix_size)) + fyds->tags_explicit = true; + + /* take away the input reference */ + fy_input_unref(fyi); + + return 0; + +err_out: + fy_token_unref(fyt); + fy_input_unref(fyi); + if (data) + free(data); + return -1; +} + +struct fy_document_state *fy_document_state_default( + const struct fy_version *default_version, + const struct fy_tag * const *default_tags) +{ + struct fy_document_state *fyds = NULL; + const struct fy_tag *fytag; + int i, rc; + + if (!default_version) + default_version = &fy_default_version; + + if (!default_tags) + default_tags = fy_default_tags; + + fyds = fy_document_state_alloc(); + if (!fyds) + goto err_out; + + fyds->version = *default_version; + + fyds->version_explicit = false; + fyds->tags_explicit = false; + fyds->start_implicit = true; + fyds->end_implicit = true; + fyds->json_mode = false; + + memset(&fyds->start_mark, 0, sizeof(fyds->start_mark)); + memset(&fyds->end_mark, 0, sizeof(fyds->end_mark)); + + fyds->fyt_vd = NULL; + fy_token_list_init(&fyds->fyt_td); + + for (i = 0; (fytag = default_tags[i]) != NULL; i++) { + + rc = fy_document_state_append_tag(fyds, fytag->handle, fytag->prefix, true); + if (rc) + goto err_out; + } + + return fyds; +err_out: + fy_document_state_unref(fyds); + return NULL; +} + +struct fy_document_state *fy_document_state_copy(struct fy_document_state *fyds) +{ + struct fy_document_state *fyds_new = NULL; + struct fy_token *fyt_td, *fyt; + + fyds_new = fy_document_state_alloc(); + if (!fyds_new) + goto err_out; + + fyds_new->version = fyds->version; + fyds_new->version_explicit = fyds->version_explicit; + fyds_new->tags_explicit = fyds->tags_explicit; + fyds_new->start_implicit = fyds->start_implicit; + fyds_new->end_implicit = fyds->end_implicit; + fyds_new->json_mode = fyds->json_mode; + + fyds_new->start_mark = fyds->start_mark; + fyds_new->end_mark = fyds->end_mark; + + if (fyds->fyt_vd) { + fyt = fy_token_alloc(); + if (!fyt) + goto err_out; + + fyt->type = FYTT_VERSION_DIRECTIVE; + fyt->handle = fyds->fyt_vd->handle; + fyt->version_directive.vers = fyds->fyt_vd->version_directive.vers; + + /* take reference */ + fy_input_ref(fyt->handle.fyi); + + fyds_new->fyt_vd = fyt; + } + + for (fyt = fy_token_list_first(&fyds->fyt_td); fyt; fyt = fy_token_next(&fyds->fyt_td, fyt)) { + fyt_td = fy_token_alloc(); + if (!fyt_td) + goto err_out; + + fyt_td->type = FYTT_TAG_DIRECTIVE; + fyt_td->tag_directive.tag_length = fyt->tag_directive.tag_length; + fyt_td->tag_directive.uri_length = fyt->tag_directive.uri_length; + fyt_td->tag_directive.is_default = fyt->tag_directive.is_default; + fyt_td->handle = fyt->handle; + fyt_td->tag_directive.prefix0 = NULL; + fyt_td->tag_directive.handle0 = NULL; + + /* take reference */ + fy_input_ref(fyt_td->handle.fyi); + + /* append to the new document state */ + fy_token_list_add_tail(&fyds_new->fyt_td, fyt_td); + } + + return fyds_new; + +err_out: + fy_document_state_unref(fyds_new); + return NULL; +} + +struct fy_token *fy_document_state_lookup_tag_directive(struct fy_document_state *fyds, + const char *handle, size_t handle_size) +{ + const char *td_handle; + size_t td_handle_size; + struct fy_token *fyt; + + if (!fyds) + return NULL; + + for (fyt = fy_token_list_first(&fyds->fyt_td); fyt; fyt = fy_token_next(&fyds->fyt_td, fyt)) { + + td_handle = fy_tag_directive_token_handle(fyt, &td_handle_size); + assert(td_handle); + + if (handle_size == td_handle_size && !memcmp(handle, td_handle, handle_size)) + return fyt; + + } + + return NULL; +} + +int fy_document_state_merge(struct fy_document_state *fyds, + struct fy_document_state *fydsc) +{ + const char *td_prefix, *tdc_handle, *tdc_prefix; + size_t td_prefix_size, tdc_handle_size, tdc_prefix_size; + struct fy_token *fyt, *fytc_td, *fyt_td; + + if (!fyds || !fydsc) + return -1; + + /* check if there's a duplicate handle (which differs */ + for (fytc_td = fy_token_list_first(&fydsc->fyt_td); fytc_td; + fytc_td = fy_token_next(&fydsc->fyt_td, fytc_td)) { + + tdc_handle = fy_tag_directive_token_handle(fytc_td, &tdc_handle_size); + if (!tdc_handle) + goto err_out; + tdc_prefix = fy_tag_directive_token_prefix(fytc_td, &tdc_prefix_size); + if (!tdc_prefix) + goto err_out; + + fyt_td = fy_document_state_lookup_tag_directive(fyds, tdc_handle, tdc_handle_size); + if (fyt_td) { + /* exists, must check whether the prefixes match */ + td_prefix = fy_tag_directive_token_prefix(fyt_td, &td_prefix_size); + assert(td_prefix); + + /* match? do nothing */ + if (tdc_prefix_size == td_prefix_size && + !memcmp(tdc_prefix, td_prefix, td_prefix_size)) + continue; + + if (!fy_token_tag_directive_is_overridable(fyt_td)) + goto err_out; + + /* override tag directive */ + fy_token_list_del(&fyds->fyt_td, fyt_td); + fy_token_unref(fyt_td); + } + + fyt = fy_token_create(FYTT_TAG_DIRECTIVE, + &fytc_td->handle, + fytc_td->tag_directive.tag_length, + fytc_td->tag_directive.uri_length, + fytc_td->tag_directive.is_default); + if (!fyt) + goto err_out; + + fy_token_list_add_tail(&fyds->fyt_td, fyt); + } + + /* merge other document state */ + fyds->version_explicit |= fydsc->version_explicit; + fyds->tags_explicit |= fydsc->tags_explicit; + /* NOTE: json mode is not carried over */ + + if (fyds->version.major < fydsc->version.major || + (fyds->version.major == fydsc->version.major && + fyds->version.minor < fydsc->version.minor)) + fyds->version = fydsc->version; + + return 0; + +err_out: + return -1; +} + +const struct fy_version * +fy_document_state_version(struct fy_document_state *fyds) +{ + /* return the default if not set */ + return fyds ? &fyds->version : &fy_default_version; +} + +const struct fy_mark *fy_document_state_start_mark(struct fy_document_state *fyds) +{ + return fyds ? &fyds->start_mark : NULL; +} + +const struct fy_mark *fy_document_state_end_mark(struct fy_document_state *fyds) +{ + return fyds ? &fyds->end_mark : NULL; +} + +bool fy_document_state_version_explicit(struct fy_document_state *fyds) +{ + return fyds ? fyds->version_explicit : false; +} + +bool fy_document_state_tags_explicit(struct fy_document_state *fyds) +{ + return fyds ? fyds->tags_explicit : false; +} + +bool fy_document_state_start_implicit(struct fy_document_state *fyds) +{ + return fyds ? fyds->start_implicit : true; +} + +bool fy_document_state_end_implicit(struct fy_document_state *fyds) +{ + return fyds ? fyds->end_implicit : true; +} + +bool fy_document_state_json_mode(struct fy_document_state *fyds) +{ + return fyds ? fyds->json_mode : true; +} + +const struct fy_tag * +fy_document_state_tag_directive_iterate(struct fy_document_state *fyds, void **iterp) +{ + struct fy_token *fyt; + const struct fy_tag *tag; + + if (!fyds || !iterp) + return NULL; + + fyt = *iterp; + fyt = !fyt ? fy_token_list_head(&fyds->fyt_td) : fy_token_next(&fyds->fyt_td, fyt); + if (!fyt) + return NULL; + + /* sanity check */ + assert(fyt->type == FYTT_TAG_DIRECTIVE); + + /* always refresh, should be relatively infrequent */ + fyt->tag_directive.tag.handle = fy_tag_directive_token_handle0(fyt); + fyt->tag_directive.tag.prefix = fy_tag_directive_token_prefix0(fyt); + + tag = &fyt->tag_directive.tag; + + *iterp = fyt; + + return tag; +} diff --git a/contrib/libs/libfyaml/src/lib/fy-docstate.h b/contrib/libs/libfyaml/src/lib/fy-docstate.h new file mode 100644 index 0000000000..380c5f0d8c --- /dev/null +++ b/contrib/libs/libfyaml/src/lib/fy-docstate.h @@ -0,0 +1,63 @@ +/* + * fy-docstate.h - YAML document state header. + * + * Copyright (c) 2019 Pantelis Antoniou <pantelis.antoniou@konsulko.com> + * + * SPDX-License-Identifier: MIT + */ +#ifndef FY_DOCSTATE_H +#define FY_DOCSTATE_H + +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif + +#include <stdint.h> +#include <stdbool.h> +#include <stdio.h> +#include <stdarg.h> + +#include <libfyaml.h> + +#include "fy-ctype.h" +#include "fy-list.h" +#include "fy-typelist.h" +#include "fy-token.h" + +struct fy_document; + +struct fy_document_state { + int refs; + struct fy_version version; + bool version_explicit : 1; + bool tags_explicit : 1; + bool start_implicit : 1; + bool end_implicit : 1; + bool json_mode : 1; + struct fy_mark start_mark; + struct fy_mark end_mark; + struct fy_token *fyt_vd; /* version directive */ + struct fy_token_list fyt_td; /* tag directives */ +}; + +struct fy_document_state *fy_document_state_alloc(void); +void fy_document_state_free(struct fy_document_state *fyds); +struct fy_document_state *fy_document_state_ref(struct fy_document_state *fyds); +void fy_document_state_unref(struct fy_document_state *fyds); + +int fy_document_state_append_tag(struct fy_document_state *fyds, + const char *handle, const char *prefix, + bool is_default); + +struct fy_document_state *fy_document_state_default( + const struct fy_version *default_version, + const struct fy_tag * const *default_tags); + +struct fy_document_state *fy_document_state_copy(struct fy_document_state *fyds); +int fy_document_state_merge(struct fy_document_state *fyds, + struct fy_document_state *fydsc); + +struct fy_token *fy_document_state_lookup_tag_directive(struct fy_document_state *fyds, + const char *handle, size_t handle_size); + +#endif diff --git a/contrib/libs/libfyaml/src/lib/fy-dump.c b/contrib/libs/libfyaml/src/lib/fy-dump.c new file mode 100644 index 0000000000..ac1980d879 --- /dev/null +++ b/contrib/libs/libfyaml/src/lib/fy-dump.c @@ -0,0 +1,313 @@ +/* + * fy-dump.c - various debugging methods + * + * Copyright (c) 2019 Pantelis Antoniou <pantelis.antoniou@konsulko.com> + * + * SPDX-License-Identifier: MIT + */ +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif + +#include <stdio.h> +#include <string.h> +#if defined (__unix__) || (defined (__APPLE__) && defined (__MACH__)) +#include <sys/mman.h> +#include <sys/types.h> +#include <sys/stat.h> +#include <fcntl.h> +#include <unistd.h> +#endif +#include <assert.h> +#include <stdlib.h> +#include <errno.h> +#include <stdarg.h> +#include <limits.h> + +#include <libfyaml.h> + +#include "fy-parse.h" +#include "fy-ctype.h" +#include "fy-utf8.h" + +const char *fy_token_type_txt[FYTT_COUNT] = { + [FYTT_NONE] = "<NONE>", + [FYTT_STREAM_START] = "STRM+", + [FYTT_STREAM_END] = "STRM-", + [FYTT_VERSION_DIRECTIVE] = "VRSD", + [FYTT_TAG_DIRECTIVE] = "TAGD", + [FYTT_DOCUMENT_START] = "DOC+", + [FYTT_DOCUMENT_END] = "DOC-", + [FYTT_BLOCK_SEQUENCE_START] = "BSEQ+", + [FYTT_BLOCK_MAPPING_START] = "BMAP+", + [FYTT_BLOCK_END] = "BEND", + [FYTT_FLOW_SEQUENCE_START] = "FSEQ+", + [FYTT_FLOW_SEQUENCE_END] = "FSEQ-", + [FYTT_FLOW_MAPPING_START] = "FMAP+", + [FYTT_FLOW_MAPPING_END] = "FMAP-", + [FYTT_BLOCK_ENTRY] = "BENTR", + [FYTT_FLOW_ENTRY] = "FENTR", + [FYTT_KEY] = "KEY", + [FYTT_SCALAR] = "SCLR", + [FYTT_VALUE] = "VAL", + [FYTT_ALIAS] = "ALIAS", + [FYTT_ANCHOR] = "ANCHR", + [FYTT_TAG] = "TAG", + [FYTT_INPUT_MARKER] = "INPUT_MARKER", + + [FYTT_PE_SLASH] = "PE_SLASH", + [FYTT_PE_ROOT] = "PE_ROOT", + [FYTT_PE_THIS] = "PE_THIS", + [FYTT_PE_PARENT] = "PE_PARENT", + [FYTT_PE_MAP_KEY] = "PE_MAP_KEY", + [FYTT_PE_SEQ_INDEX] = "PE_SEQ_INDEX", + [FYTT_PE_SEQ_SLICE] = "PE_SEQ_SLICE", + [FYTT_PE_SCALAR_FILTER] = "PE_SCALAR_FILTER", + [FYTT_PE_COLLECTION_FILTER] = "PE_COLLECTION_FILTER", + [FYTT_PE_SEQ_FILTER] = "PE_SEQ_FILTER", + [FYTT_PE_MAP_FILTER] = "PE_MAP_FILTER", + [FYTT_PE_UNIQUE_FILTER] = "PE_UNIQUE_FILTER", + [FYTT_PE_EVERY_CHILD] = "PE_EVERY_CHILD", + [FYTT_PE_EVERY_CHILD_R] = "PE_EVERY_CHILD_R", + [FYTT_PE_ALIAS] = "PE_ALIAS", + [FYTT_PE_SIBLING] = "PE_SIBLING", + [FYTT_PE_COMMA] = "PE_COMMA", + [FYTT_PE_BARBAR] = "PE_BARBAR", + [FYTT_PE_AMPAMP] = "PE_AMPAMP", + [FYTT_PE_LPAREN] = "PE_LPAREN", + [FYTT_PE_RPAREN] = "PE_RPAREN", + + [FYTT_PE_EQEQ] = "PE_EQEQ", + [FYTT_PE_NOTEQ] = "PE_NOTEQ", + [FYTT_PE_LT] = "PE_LT", + [FYTT_PE_GT] = "PE_GT", + [FYTT_PE_LTE] = "PE_LTE", + [FYTT_PE_GTE] = "PE_GTE", + + [FYTT_SE_PLUS] = "SE_PLUS", + [FYTT_SE_MINUS] = "SE_MINUS", + [FYTT_SE_MULT] = "SE_MULT", + [FYTT_SE_DIV] = "SE_DIV", + + [FYTT_PE_METHOD] = "PE_METHOD", + [FYTT_SE_METHOD] = "SE_METHOD", +}; + +char *fy_token_dump_format(struct fy_token *fyt, char *buf, size_t bufsz) +{ + const char *typetxt, *text; + size_t size; + enum fy_token_type type; + const char *pfx, *sfx; + + if (fyt && (unsigned int)fyt->type < sizeof(fy_token_type_txt)/ + sizeof(fy_token_type_txt[0])) { + typetxt = fy_token_type_txt[fyt->type]; + type = fyt->type; + } else { + typetxt = "<NULL>"; + type = FYTT_NONE; + } + + size = 0; + switch (type) { + case FYTT_SCALAR: + case FYTT_ALIAS: + case FYTT_ANCHOR: + text = fy_token_get_text(fyt, &size); + break; + default: + text = NULL; + break; + } + + if (!text) { + snprintf(buf, bufsz, "%s", typetxt); + return buf; + } + + pfx = typetxt; + sfx = ""; + switch (type) { + case FYTT_SCALAR: + + pfx = "\""; + + /* not too large */ + if (size > 20) + size = 20; + fy_utf8_format_text_a(text, size, fyue_doublequote, &text); + size = strlen(text); + if (size > 10) { + sfx = "...\""; + size = 7; + } else { + sfx = "\""; + } + break; + case FYTT_ALIAS: + case FYTT_ANCHOR: + sfx = type == FYTT_ALIAS ? "*" : "&"; + if (size > 10) { + sfx = "..."; + size = 7; + } else + sfx = ""; + break; + + default: + break; + } + + snprintf(buf, bufsz, "%s%.*s%s", pfx, (int)size, text, sfx); + + return buf; +} + +char *fy_token_list_dump_format(struct fy_token_list *fytl, + struct fy_token *fyt_highlight, char *buf, size_t bufsz) +{ + char *s, *e; + struct fy_token *fyt; + + s = buf; + e = buf + bufsz - 1; + for (fyt = fy_token_list_first(fytl); fyt; fyt = fy_token_next(fytl, fyt)) { + + if (s >= (e - 1)) + break; + + s += snprintf(s, e - s, "%s%s", + fyt != fy_token_list_first(fytl) ? "," : "", + fyt_highlight == fyt ? "*" : ""); + + fy_token_dump_format(fyt, s, e - s); + + s += strlen(s); + } + *s = '\0'; + + return buf; +} + +char *fy_simple_key_dump_format(struct fy_parser *fyp, struct fy_simple_key *fysk, char *buf, size_t bufsz) +{ + char tbuf[80]; + + if (!fysk) { + if (bufsz > 0) + *buf = '\0'; + return buf; + } + + fy_token_dump_format(fysk->token, tbuf, sizeof(tbuf)); + + snprintf(buf, bufsz, "%s/%c%c/%d/<%d-%d,%d-%d>", tbuf, + fysk->required ? 'R' : '-', + fysk->implicit_complex ? 'C' : '-', + fysk->flow_level, + fysk->mark.line, fysk->mark.column, + fysk->end_mark.line, fysk->end_mark.column); + return buf; +} + +char *fy_simple_key_list_dump_format(struct fy_parser *fyp, struct fy_simple_key_list *fyskl, + struct fy_simple_key *fysk_highlight, char *buf, size_t bufsz) +{ + char *s, *e; + struct fy_simple_key *fysk; + + s = buf; + e = buf + bufsz - 1; + for (fysk = fy_simple_key_list_first(fyskl); fysk; fysk = fy_simple_key_next(fyskl, fysk)) { + + if (s >= (e - 1)) + break; + + s += snprintf(s, e - s, "%s%s", + fysk != fy_simple_key_list_first(fyskl) ? "," : "", + fysk_highlight == fysk ? "*" : ""); + + fy_simple_key_dump_format(fyp, fysk, s, e - s); + + s += strlen(s); + } + *s = '\0'; + + return buf; +} + +#ifdef FY_DEVMODE + +void fyp_debug_dump_token_list(struct fy_parser *fyp, struct fy_token_list *fytl, + struct fy_token *fyt_highlight, const char *banner) +{ + char buf[4096]; + + if (!fyp || !fyp->diag || FYET_DEBUG < fyp->diag->cfg.level) + return; + + fyp_scan_debug(fyp, "%s%s\n", banner, + fy_token_list_dump_format(fytl, fyt_highlight, buf, sizeof(buf))); +} + +void fyp_debug_dump_token(struct fy_parser *fyp, struct fy_token *fyt, const char *banner) +{ + char buf[80]; + + if (!fyp || !fyp->diag || FYET_DEBUG < fyp->diag->cfg.level) + return; + + fyp_scan_debug(fyp, "%s%s\n", banner, + fy_token_dump_format(fyt, buf, sizeof(buf))); +} + +void fyp_debug_dump_simple_key_list(struct fy_parser *fyp, struct fy_simple_key_list *fyskl, + struct fy_simple_key *fysk_highlight, const char *banner) +{ + char buf[4096]; + + if (!fyp || !fyp->diag || FYET_DEBUG < fyp->diag->cfg.level) + return; + + fyp_scan_debug(fyp, "%s%s\n", banner, + fy_simple_key_list_dump_format(fyp, fyskl, fysk_highlight, buf, sizeof(buf))); +} + +void fyp_debug_dump_simple_key(struct fy_parser *fyp, struct fy_simple_key *fysk, const char *banner) +{ + char buf[80]; + + if (!fyp || !fyp->diag || FYET_DEBUG < fyp->diag->cfg.level) + return; + + fyp_scan_debug(fyp, "%s%s\n", banner, + fy_simple_key_dump_format(fyp, fysk, buf, sizeof(buf))); +} + +void fyp_debug_dump_input(struct fy_parser *fyp, const struct fy_input_cfg *fyic, + const char *banner) +{ + switch (fyic->type) { + case fyit_file: + fyp_scan_debug(fyp, "%s: filename=\"%s\"\n", banner, + fyic->file.filename); + break; + case fyit_stream: + fyp_scan_debug(fyp, "%s: stream=\"%s\" fileno=%d\n", banner, + fyic->stream.name, fileno(fyic->stream.fp)); + break; + case fyit_memory: + fyp_scan_debug(fyp, "%s: start=%p size=%zu\n", banner, + fyic->memory.data, fyic->memory.size); + break; + case fyit_alloc: + fyp_scan_debug(fyp, "%s: start=%p size=%zu\n", banner, + fyic->alloc.data, fyic->alloc.size); + break; + default: + break; + } +} + +#endif diff --git a/contrib/libs/libfyaml/src/lib/fy-dump.h b/contrib/libs/libfyaml/src/lib/fy-dump.h new file mode 100644 index 0000000000..13b1c9a823 --- /dev/null +++ b/contrib/libs/libfyaml/src/lib/fy-dump.h @@ -0,0 +1,92 @@ +/* + * fy-dump.h - dumps for various internal structures + * + * Copyright (c) 2019 Pantelis Antoniou <pantelis.antoniou@konsulko.com> + * + * SPDX-License-Identifier: MIT + */ +#ifndef FY_DUMP_H +#define FY_DUMP_H + +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif + +#include <stdint.h> +#include <stdbool.h> +#include <stdio.h> +#include <stdarg.h> + +#include <libfyaml.h> + +#include "fy-list.h" +#include "fy-diag.h" + +struct fy_parser; +struct fy_token; +struct fy_token_list; +struct fy_simple_key; +struct fy_simple_key_list; +struct fy_input_cfg; + +extern const char *fy_token_type_txt[]; + +char *fy_token_dump_format(struct fy_token *fyt, char *buf, size_t bufsz); +char *fy_token_list_dump_format(struct fy_token_list *fytl, + struct fy_token *fyt_highlight, char *buf, size_t bufsz); + +char *fy_simple_key_dump_format(struct fy_parser *fyp, struct fy_simple_key *fysk, char *buf, size_t bufsz); +char *fy_simple_key_list_dump_format(struct fy_parser *fyp, struct fy_simple_key_list *fyskl, + struct fy_simple_key *fysk_highlight, char *buf, size_t bufsz); + +#ifdef FY_DEVMODE + +void fyp_debug_dump_token_list(struct fy_parser *fyp, struct fy_token_list *fytl, + struct fy_token *fyt_highlight, const char *banner); +void fyp_debug_dump_token(struct fy_parser *fyp, struct fy_token *fyt, const char *banner); + +void fyp_debug_dump_simple_key_list(struct fy_parser *fyp, struct fy_simple_key_list *fyskl, + struct fy_simple_key *fysk_highlight, const char *banner); +void fyp_debug_dump_simple_key(struct fy_parser *fyp, struct fy_simple_key *fysk, const char *banner); + +void fyp_debug_dump_input(struct fy_parser *fyp, const struct fy_input_cfg *fyic, + const char *banner); + +#else + +static inline void +fyp_debug_dump_token_list(struct fy_parser *fyp, struct fy_token_list *fytl, + struct fy_token *fyt_highlight, const char *banner) +{ + /* nothing */ +} + +static inline void +fyp_debug_dump_token(struct fy_parser *fyp, struct fy_token *fyt, const char *banner) +{ + /* nothing */ +} + +static inline void +fyp_debug_dump_simple_key_list(struct fy_parser *fyp, struct fy_simple_key_list *fyskl, + struct fy_simple_key *fysk_highlight, const char *banner) +{ + /* nothing */ +} + +static inline void +fyp_debug_dump_simple_key(struct fy_parser *fyp, struct fy_simple_key *fysk, const char *banner) +{ + /* nothing */ +} + +static inline void +fy_debug_dump_input(struct fy_parser *fyp, const struct fy_input_cfg *fyic, + const char *banner) +{ + /* nothing */ +} + +#endif + +#endif diff --git a/contrib/libs/libfyaml/src/lib/fy-emit-accum.h b/contrib/libs/libfyaml/src/lib/fy-emit-accum.h new file mode 100644 index 0000000000..acc8ae7491 --- /dev/null +++ b/contrib/libs/libfyaml/src/lib/fy-emit-accum.h @@ -0,0 +1,383 @@ +/* + * fy-emit-accum.h - internal YAML emitter accumulator header + * + * Copyright (c) 2019 Pantelis Antoniou <pantelis.antoniou@konsulko.com> + * + * SPDX-License-Identifier: MIT + */ +#ifndef FY_EMIT_ACCUM_H +#define FY_EMIT_ACCUM_H + +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif + +#include <stdint.h> +#include <stdbool.h> +#include <stdio.h> + +#include <libfyaml.h> + +#include "fy-utf8.h" +#include "fy-event.h" + +struct fy_emit_accum { + char *accum; + size_t alloc; + size_t next; + char *inplace; + size_t inplacesz; + int col, row; + int ts; + enum fy_lb_mode lb_mode; +}; + +static inline void +fy_emit_accum_init(struct fy_emit_accum *ea, + void *inplace, size_t inplacesz, + int ts, enum fy_lb_mode lb_mode) +{ + memset(ea, 0, sizeof(*ea)); + + ea->inplace = inplace; + ea->inplacesz = inplacesz; + ea->accum = ea->inplace; + ea->alloc = ea->inplacesz; + ea->ts = ts ? ts : 8; + ea->lb_mode = lb_mode; +} + +static inline void +fy_emit_accum_reset(struct fy_emit_accum *ea) +{ + ea->next = 0; + ea->col = 0; + ea->row = 0; +} + +static inline void +fy_emit_accum_cleanup(struct fy_emit_accum *ea) +{ + if (ea->accum && ea->accum != ea->inplace) + free(ea->accum); + ea->accum = ea->inplace; + ea->alloc = ea->inplacesz; + fy_emit_accum_reset(ea); +} + +static inline void +fy_emit_accum_start(struct fy_emit_accum *ea, int col, enum fy_lb_mode lb_mode) +{ + fy_emit_accum_reset(ea); + ea->col = col; + ea->lb_mode = lb_mode; +} + +static inline void +fy_emit_accum_finish(struct fy_emit_accum *ea) +{ + fy_emit_accum_reset(ea); +} + +static inline int +fy_emit_accum_grow(struct fy_emit_accum *ea, size_t need) +{ + size_t atleast, asz; + char *new_accum; + + atleast = ea->alloc + need; + asz = ea->alloc; + /* minimum buffer is 32 */ + if (asz < 32) + asz = 32; + do { + asz *= 2; + } while (asz < atleast); + assert(asz > ea->inplacesz); + new_accum = realloc(ea->accum == ea->inplace ? NULL : ea->accum, asz); + if (!new_accum) /* out of memory */ + return -1; + if (ea->accum && ea->accum == ea->inplace) + memcpy(new_accum, ea->accum, ea->next); + ea->alloc = asz; + ea->accum = new_accum; + + return 0; +} + +static inline int +fy_emit_accum_utf8_put_raw(struct fy_emit_accum *ea, int c) +{ + size_t w, avail; + int ret; + + /* grow if needed */ + w = fy_utf8_width(c); + if (w > (avail = (ea->alloc - ea->next))) { + ret = fy_emit_accum_grow(ea, w - avail); + if (ret != 0) + return ret; + } + (void)fy_utf8_put_unchecked(ea->accum + ea->next, c); + ea->next += w; + + return 0; +} + +static inline int +fy_emit_accum_put_raw(struct fy_emit_accum *ea, int c) +{ + int ret; + + /* only lower ascii please */ + if (c >= 0x80) + return -1; + + /* grow if needed */ + if (ea->next >= ea->alloc) { + ret = fy_emit_accum_grow(ea, 1); + if (ret != 0) + return ret; + } + *(ea->accum + ea->next) = (char)c; + ea->next++; + + return 0; +} + +static inline int +fy_emit_accum_utf8_put(struct fy_emit_accum *ea, int c) +{ + int ret; + + if (!fy_utf8_is_valid(c)) + return -1; + + if (fy_is_lb_m(c, ea->lb_mode)) { + ret = fy_emit_accum_put_raw(ea, '\n'); + if (ret) + return ret; + ea->col = 0; + ea->row++; + } else if (fy_is_tab(c)) { + ret = fy_emit_accum_put_raw(ea, '\t'); + if (ret) + return ret; + ea->col += (ea->ts - (ea->col % ea->ts)); + } else { + if (c < 0x80) { + ret = fy_emit_accum_put_raw(ea, c); + if (ret) + return ret; + } else { + ret = fy_emit_accum_utf8_put_raw(ea, c); + } + ea->col++; + } + + return 0; +} + +static inline int +fy_emit_accum_utf8_write_raw(struct fy_emit_accum *ea, const void *data, size_t len) +{ + size_t avail; + int ret; + + /* grow if needed */ + if (len > (avail = (ea->alloc - ea->next))) { + ret = fy_emit_accum_grow(ea, len - avail); + if (ret != 0) + return ret; + } + memcpy(ea->accum + ea->next, data, len); + ea->next += len; + + return 0; +} + +static inline int +fy_emit_accum_utf8_write(struct fy_emit_accum *ea, const void *data, size_t len) +{ + const char *s, *e; + int c, w, ret; + + for (s = data, e = s + len; (c = fy_utf8_get(s, (e - s), &w)) >= 0; s += w) { + ret = fy_emit_accum_utf8_put(ea, c); + if (ret) + break; + } + return c == FYUG_EOF ? 0 : -1; +} + +static inline int +fy_emit_accum_utf8_printf_raw(struct fy_emit_accum *ea, const char *fmt, ...) + FY_ATTRIBUTE(format(printf, 2, 3)); + +static inline int +fy_emit_accum_utf8_printf_raw(struct fy_emit_accum *ea, const char *fmt, ...) +{ + va_list ap; + size_t avail, len; + int ret; + + /* get the size of the string */ + va_start(ap, fmt); + len = vsnprintf(NULL, 0, fmt, ap); + va_end(ap); + + /* grow if needed */ + if ((len + 1) > (avail = (ea->alloc - ea->next))) { + ret = fy_emit_accum_grow(ea, (len + 1) - avail); + if (ret != 0) + return ret; + } + + va_start(ap, fmt); + (void)vsnprintf(ea->accum + ea->next, len + 1, fmt, ap); + va_end(ap); + + ea->next += len; + + return 0; +} + +static inline const char * +fy_emit_accum_get(struct fy_emit_accum *ea, size_t *lenp) +{ + *lenp = ea->next; + if (!ea->next) { + return ""; + } + return ea->accum; +} + +static inline int +fy_emit_accum_make_0_terminated(struct fy_emit_accum *ea) +{ + int ret; + + /* the empty case is special cased */ + if (!ea->next) + return 0; + + /* grow if needed for the '\0' */ + if (ea->next >= ea->alloc) { + ret = fy_emit_accum_grow(ea, 1); + if (ret != 0) + return ret; + } + assert(ea->next < ea->alloc); + *(ea->accum + ea->next) = '\0'; + return 0; +} + +static inline const char * +fy_emit_accum_get0(struct fy_emit_accum *ea) +{ + int ret; + + ret = fy_emit_accum_make_0_terminated(ea); + if (ret) + return NULL; + return ea->accum; +} + +static inline char * +fy_emit_accum_steal(struct fy_emit_accum *ea, size_t *lenp) +{ + int ret; + char *buf; + + /* empty, return a malloc'ed buffer to "" */ + if (!ea->next) { + buf = strdup(""); + if (!buf) { + *lenp = 0; + return NULL; + } + *lenp = ea->next; + } else if (ea->inplace && ea->accum == ea->inplace) { + buf = malloc(ea->next + 1); + if (!buf) { + *lenp = 0; + return NULL; + } + memcpy(buf, ea->accum, ea->next); + buf[ea->next] = '\0'; + *lenp = ea->next; + } else { + ret = fy_emit_accum_make_0_terminated(ea); + if (ret) { + *lenp = 0; + return NULL; + } + assert(ea->accum && ea->accum != ea->inplace); + buf = ea->accum; + *lenp = ea->next; + /* reset to inplace */ + ea->accum = ea->inplace; + ea->alloc = ea->inplacesz; + } + + fy_emit_accum_cleanup(ea); + return buf; +} + +static inline char * +fy_emit_accum_steal0(struct fy_emit_accum *ea) +{ + size_t len; + + return fy_emit_accum_steal(ea, &len); +} + +static inline bool +fy_emit_accum_empty(struct fy_emit_accum *ea) +{ + return ea->next == 0; +} + +static inline int +fy_emit_accum_size(struct fy_emit_accum *ea) +{ + return ea->next; +} + +static inline int +fy_emit_accum_column(struct fy_emit_accum *ea) +{ + return ea->col; +} + +static inline int +fy_emit_accum_row(struct fy_emit_accum *ea) +{ + return ea->row; +} + +struct fy_emit_accum_state { + int col; + int row; + size_t next; +}; + +static inline void +fy_emit_accum_get_state(struct fy_emit_accum *ea, struct fy_emit_accum_state *s) +{ + s->col = ea->col; + s->row = ea->row; + s->next = ea->next; +} + +static inline void +fy_emit_accum_rewind_state(struct fy_emit_accum *ea, const struct fy_emit_accum_state *s) +{ + /* we can only go back */ + assert(s->next <= ea->next); + ea->col = s->col; + ea->row = s->row; + ea->next = s->next; +} + +#endif diff --git a/contrib/libs/libfyaml/src/lib/fy-emit.c b/contrib/libs/libfyaml/src/lib/fy-emit.c new file mode 100644 index 0000000000..ad0a18dd57 --- /dev/null +++ b/contrib/libs/libfyaml/src/lib/fy-emit.c @@ -0,0 +1,3554 @@ +/* + * fy-emit.c - Internal YAML emitter methods + * + * Copyright (c) 2019 Pantelis Antoniou <pantelis.antoniou@konsulko.com> + * + * SPDX-License-Identifier: MIT + */ +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif + +#include <stdio.h> +#include <string.h> +#include <assert.h> +#include <stdlib.h> +#include <limits.h> +#if defined (__unix__) || (defined (__APPLE__) && defined (__MACH__)) +#include <unistd.h> +#elif defined (_MSC_VER) +#define STDOUT_FILENO _fileno(stdin) +#endif +#include <ctype.h> +#include <errno.h> + +#include <libfyaml.h> + +#include "fy-parse.h" +#include "fy-emit.h" + +/* fwd decl */ +void fy_emit_write(struct fy_emitter *emit, enum fy_emitter_write_type type, const char *str, int len); +void fy_emit_printf(struct fy_emitter *emit, enum fy_emitter_write_type type, const char *fmt, ...) + FY_ATTRIBUTE(format(printf, 3, 4)); + +static inline bool fy_emit_is_json_mode(const struct fy_emitter *emit) +{ + enum fy_emitter_cfg_flags flags; + + if (emit->force_json) + return true; + + flags = emit->cfg.flags & FYECF_MODE(FYECF_MODE_MASK); + return flags == FYECF_MODE_JSON || flags == FYECF_MODE_JSON_TP || flags == FYECF_MODE_JSON_ONELINE; +} + +static inline bool fy_emit_is_flow_mode(const struct fy_emitter *emit) +{ + enum fy_emitter_cfg_flags flags = emit->cfg.flags & FYECF_MODE(FYECF_MODE_MASK); + + return flags == FYECF_MODE_FLOW || flags == FYECF_MODE_FLOW_ONELINE; +} + +static inline bool fy_emit_is_block_mode(const struct fy_emitter *emit) +{ + enum fy_emitter_cfg_flags flags = emit->cfg.flags & FYECF_MODE(FYECF_MODE_MASK); + + return flags == FYECF_MODE_BLOCK || flags == FYECF_MODE_DEJSON || flags == FYECF_MODE_PRETTY; +} + +static inline bool fy_emit_is_oneline(const struct fy_emitter *emit) +{ + enum fy_emitter_cfg_flags flags = emit->cfg.flags & FYECF_MODE(FYECF_MODE_MASK); + + return flags == FYECF_MODE_FLOW_ONELINE || flags == FYECF_MODE_JSON_ONELINE; +} + +static inline bool fy_emit_is_dejson_mode(const struct fy_emitter *emit) +{ + enum fy_emitter_cfg_flags flags = emit->cfg.flags & FYECF_MODE(FYECF_MODE_MASK); + + return flags == FYECF_MODE_DEJSON; +} + +static inline bool fy_emit_is_pretty_mode(const struct fy_emitter *emit) +{ + enum fy_emitter_cfg_flags flags = emit->cfg.flags & FYECF_MODE(FYECF_MODE_MASK); + + return flags == FYECF_MODE_PRETTY; +} + +static inline int fy_emit_indent(struct fy_emitter *emit) +{ + int indent; + + indent = (emit->cfg.flags & FYECF_INDENT(FYECF_INDENT_MASK)) >> FYECF_INDENT_SHIFT; + return indent ? indent : 2; +} + +static inline int fy_emit_width(struct fy_emitter *emit) +{ + int width; + + width = (emit->cfg.flags & FYECF_WIDTH(FYECF_WIDTH_MASK)) >> FYECF_WIDTH_SHIFT; + if (width == 0) + return 80; + if (width == FYECF_WIDTH_MASK) + return INT_MAX; + return width; +} + +static inline bool fy_emit_output_comments(struct fy_emitter *emit) +{ + return !!(emit->cfg.flags & FYECF_OUTPUT_COMMENTS); +} + +static int fy_emit_node_check_json(struct fy_emitter *emit, struct fy_node *fyn) +{ + struct fy_document *fyd; + struct fy_node *fyni; + struct fy_node_pair *fynp, *fynpi; + int ret; + + if (!fyn) + return 0; + + fyd = fyn->fyd; + + switch (fyn->type) { + case FYNT_SCALAR: + FYD_TOKEN_ERROR_CHECK(fyd, fyn->scalar, FYEM_INTERNAL, + !fy_node_is_alias(fyn), err_out, + "aliases not allowed in JSON emit mode"); + break; + + case FYNT_SEQUENCE: + for (fyni = fy_node_list_head(&fyn->sequence); fyni; + fyni = fy_node_next(&fyn->sequence, fyni)) { + ret = fy_emit_node_check_json(emit, fyni); + if (ret) + return ret; + } + break; + + case FYNT_MAPPING: + for (fynp = fy_node_pair_list_head(&fyn->mapping); fynp; fynp = fynpi) { + + fynpi = fy_node_pair_next(&fyn->mapping, fynp); + + ret = fy_emit_node_check_json(emit, fynp->key); + if (ret) + return ret; + ret = fy_emit_node_check_json(emit, fynp->value); + if (ret) + return ret; + } + break; + } + return 0; +err_out: + return -1; +} + +static int fy_emit_node_check(struct fy_emitter *emit, struct fy_node *fyn) +{ + int ret; + + if (!fyn) + return 0; + + if (fy_emit_is_json_mode(emit) && !emit->source_json) { + ret = fy_emit_node_check_json(emit, fyn); + if (ret) + return ret; + } + + return 0; +} + +void fy_emit_node_internal(struct fy_emitter *emit, struct fy_node *fyn, int flags, int indent, bool is_key); +void fy_emit_scalar(struct fy_emitter *emit, struct fy_node *fyn, int flags, int indent, bool is_key); +void fy_emit_sequence(struct fy_emitter *emit, struct fy_node *fyn, int flags, int indent); +void fy_emit_mapping(struct fy_emitter *emit, struct fy_node *fyn, int flags, int indent); + +void fy_emit_write(struct fy_emitter *emit, enum fy_emitter_write_type type, const char *str, int len) +{ + int c, w; + const char *m, *e; + int outlen; + + if (!len) + return; + + outlen = emit->cfg.output(emit, type, str, len, emit->cfg.userdata); + if (outlen != len) + emit->output_error = true; + + e = str + len; + while ((c = fy_utf8_get(str, (e - str), &w)) >= 0) { + + /* special handling for MSDOS */ + if (c == '\r' && (e - str) > 1 && str[1] == '\n') { + str += 2; + emit->column = 0; + emit->line++; + continue; + } + + /* regular line break */ + if (fy_is_lb_r_n(c)) { + emit->column = 0; + emit->line++; + str += w; + continue; + } + + /* completely ignore ANSI color escape sequences */ + if (c == '\x1b' && (e - str) > 2 && str[1] == '[' && + (m = memchr(str, 'm', e - str)) != NULL) { + str = m + 1; + continue; + } + + emit->column++; + str += w; + } +} + +void fy_emit_puts(struct fy_emitter *emit, enum fy_emitter_write_type type, const char *str) +{ + fy_emit_write(emit, type, str, strlen(str)); +} + +void fy_emit_putc(struct fy_emitter *emit, enum fy_emitter_write_type type, int c) +{ + char buf[FY_UTF8_FORMAT_BUFMIN]; + + fy_utf8_format(c, buf, fyue_none); + fy_emit_puts(emit, type, buf); +} + +void fy_emit_vprintf(struct fy_emitter *emit, enum fy_emitter_write_type type, const char *fmt, va_list ap) +{ + char *str; + int size; + va_list ap2; + + va_copy(ap2, ap); + + size = vsnprintf(NULL, 0, fmt, ap); + if (size < 0) + return; + + str = FY_ALLOCA(size + 1); + size = vsnprintf(str, size + 1, fmt, ap2); + if (size < 0) + return; + + fy_emit_write(emit, type, str, size); +} + +void fy_emit_printf(struct fy_emitter *emit, enum fy_emitter_write_type type, const char *fmt, ...) +{ + va_list ap; + + va_start(ap, fmt); + fy_emit_vprintf(emit, type, fmt, ap); + va_end(ap); +} + +void fy_emit_write_ws(struct fy_emitter *emit) +{ + fy_emit_putc(emit, fyewt_whitespace, ' '); + emit->flags |= FYEF_WHITESPACE; +} + +void fy_emit_write_indent(struct fy_emitter *emit, int indent) +{ + int len; + char *ws; + + indent = indent > 0 ? indent : 0; + + if (!fy_emit_indentation(emit) || emit->column > indent || + (emit->column == indent && !fy_emit_whitespace(emit))) + fy_emit_putc(emit, fyewt_linebreak, '\n'); + + if (emit->column < indent) { + len = indent - emit->column; + ws = FY_ALLOCA(len + 1); + memset(ws, ' ', len); + ws[len] = '\0'; + fy_emit_write(emit, fyewt_indent, ws, len); + } + + emit->flags |= FYEF_WHITESPACE | FYEF_INDENTATION; +} + +enum document_indicator { + di_question_mark, + di_colon, + di_dash, + di_left_bracket, + di_right_bracket, + di_left_brace, + di_right_brace, + di_comma, + di_bar, + di_greater, + di_single_quote_start, + di_single_quote_end, + di_double_quote_start, + di_double_quote_end, + di_ambersand, + di_star, +}; + +void fy_emit_write_indicator(struct fy_emitter *emit, + enum document_indicator indicator, + int flags, int indent, + enum fy_emitter_write_type wtype) +{ + switch (indicator) { + + case di_question_mark: + if (!fy_emit_whitespace(emit)) + fy_emit_write_ws(emit); + fy_emit_putc(emit, wtype, '?'); + emit->flags &= ~(FYEF_WHITESPACE | FYEF_OPEN_ENDED); + break; + + case di_colon: + if (!(flags & DDNF_SIMPLE)) { + if (!emit->flow_level && !fy_emit_is_oneline(emit)) + fy_emit_write_indent(emit, indent); + if (!fy_emit_whitespace(emit)) + fy_emit_write_ws(emit); + } + fy_emit_putc(emit, wtype, ':'); + emit->flags &= ~(FYEF_WHITESPACE | FYEF_OPEN_ENDED); + break; + + case di_dash: + if (!fy_emit_whitespace(emit)) + fy_emit_write_ws(emit); + fy_emit_putc(emit, wtype, '-'); + emit->flags &= ~(FYEF_WHITESPACE | FYEF_OPEN_ENDED); + break; + + case di_left_bracket: + case di_left_brace: + emit->flow_level++; + if (!fy_emit_whitespace(emit)) + fy_emit_write_ws(emit); + fy_emit_putc(emit, wtype, indicator == di_left_bracket ? '[' : '{'); + emit->flags |= FYEF_WHITESPACE; + emit->flags &= ~(FYEF_INDENTATION | FYEF_OPEN_ENDED); + break; + + case di_right_bracket: + case di_right_brace: + emit->flow_level--; + fy_emit_putc(emit, wtype, indicator == di_right_bracket ? ']' : '}'); + emit->flags &= ~(FYEF_WHITESPACE | FYEF_INDENTATION | FYEF_OPEN_ENDED); + break; + + case di_comma: + fy_emit_putc(emit, wtype, ','); + emit->flags &= ~(FYEF_WHITESPACE | FYEF_INDENTATION | FYEF_OPEN_ENDED); + break; + + case di_bar: + case di_greater: + if (!fy_emit_whitespace(emit)) + fy_emit_write_ws(emit); + fy_emit_putc(emit, wtype, indicator == di_bar ? '|' : '>'); + emit->flags &= ~(FYEF_INDENTATION | FYEF_WHITESPACE | FYEF_OPEN_ENDED); + break; + + case di_single_quote_start: + case di_double_quote_start: + if (!fy_emit_whitespace(emit)) + fy_emit_write_ws(emit); + fy_emit_putc(emit, wtype, indicator == di_single_quote_start ? '\'' : '"'); + emit->flags &= ~(FYEF_WHITESPACE | FYEF_INDENTATION | FYEF_OPEN_ENDED); + break; + + case di_single_quote_end: + case di_double_quote_end: + fy_emit_putc(emit, wtype, indicator == di_single_quote_end ? '\'' : '"'); + emit->flags &= ~(FYEF_WHITESPACE | FYEF_INDENTATION | FYEF_OPEN_ENDED); + break; + + case di_ambersand: + if (!fy_emit_whitespace(emit)) + fy_emit_write_ws(emit); + fy_emit_putc(emit, wtype, '&'); + emit->flags &= ~(FYEF_WHITESPACE | FYEF_INDENTATION); + break; + + case di_star: + if (!fy_emit_whitespace(emit)) + fy_emit_write_ws(emit); + fy_emit_putc(emit, wtype, '*'); + emit->flags &= ~(FYEF_WHITESPACE | FYEF_INDENTATION); + break; + } +} + +int fy_emit_increase_indent(struct fy_emitter *emit, int flags, int indent) +{ + if (indent < 0) + return (flags & DDNF_FLOW) ? fy_emit_indent(emit) : 0; + + if (!(flags & DDNF_INDENTLESS)) + return indent + fy_emit_indent(emit); + + return indent; +} + +void fy_emit_write_comment(struct fy_emitter *emit, int flags, int indent, const char *str, size_t len, struct fy_atom *handle) +{ + const char *s, *e, *sr; + int c, w; + bool breaks; + + if (!str || !len) + return; + + if (len == (size_t)-1) + len = strlen(str); + + if (!fy_emit_whitespace(emit)) + fy_emit_write_ws(emit); + indent = emit->column; + + s = str; + e = str + len; + + sr = s; /* start of normal output run */ + breaks = false; + while (s < e && (c = fy_utf8_get(s, e - s, &w)) > 0) { + + if (fy_is_lb_m(c, fy_atom_lb_mode(handle))) { + + /* output run */ + fy_emit_write(emit, fyewt_comment, sr, s - sr); + sr = s + w; + fy_emit_write_indent(emit, indent); + emit->flags |= FYEF_INDENTATION; + breaks = true; + } else { + + if (breaks) { + fy_emit_write(emit, fyewt_comment, sr, s - sr); + sr = s; + fy_emit_write_indent(emit, indent); + } + emit->flags &= ~FYEF_INDENTATION; + breaks = false; + } + + s += w; + } + + /* dump what's remaining */ + fy_emit_write(emit, fyewt_comment, sr, s - sr); + + emit->flags |= (FYEF_WHITESPACE | FYEF_INDENTATION); +} + +struct fy_atom *fy_emit_token_comment_handle(struct fy_emitter *emit, struct fy_token *fyt, enum fy_comment_placement placement) +{ + struct fy_atom *handle; + + handle = fy_token_comment_handle(fyt, placement, false); + return handle && fy_atom_is_set(handle) ? handle : NULL; +} + +void fy_emit_document_start_indicator(struct fy_emitter *emit) +{ + /* do not emit twice */ + if (emit->flags & FYEF_HAD_DOCUMENT_START) + return; + + /* do not try to emit if it's json mode */ + if (fy_emit_is_json_mode(emit)) + goto no_doc_emit; + + /* output linebreak anyway */ + if (emit->column) + fy_emit_putc(emit, fyewt_linebreak, '\n'); + + /* stripping doc indicators, do not emit */ + if (emit->cfg.flags & FYECF_STRIP_DOC) + goto no_doc_emit; + + /* ok, emit document start indicator */ + fy_emit_puts(emit, fyewt_document_indicator, "---"); + emit->flags &= ~FYEF_WHITESPACE; + emit->flags |= FYEF_HAD_DOCUMENT_START; + return; + +no_doc_emit: + emit->flags &= ~FYEF_HAD_DOCUMENT_START; +} + +struct fy_token *fy_node_value_token(struct fy_node *fyn) +{ + struct fy_token *fyt; + + if (!fyn) + return NULL; + + switch (fyn->type) { + case FYNT_SCALAR: + fyt = fyn->scalar; + break; + case FYNT_SEQUENCE: + fyt = fyn->sequence_start; + break; + case FYNT_MAPPING: + fyt = fyn->mapping_start; + break; + default: + fyt = NULL; + break; + } + + return fyt; +} + +bool fy_emit_token_has_comment(struct fy_emitter *emit, struct fy_token *fyt, enum fy_comment_placement placement) +{ + return fy_emit_token_comment_handle(emit, fyt, placement) ? true : false; +} + +bool fy_emit_node_has_comment(struct fy_emitter *emit, struct fy_node *fyn, enum fy_comment_placement placement) +{ + return fy_emit_token_has_comment(emit, fy_node_value_token(fyn), placement); +} + +void fy_emit_token_comment(struct fy_emitter *emit, struct fy_token *fyt, int flags, int indent, + enum fy_comment_placement placement) +{ + struct fy_atom *handle; + char *text; + const char *t; + int len; + + handle = fy_emit_token_comment_handle(emit, fyt, placement); + if (!handle) + return; + + len = fy_atom_format_text_length(handle); + if (len < 0) + return; + + text = FY_ALLOCA(len + 1); + + if (placement == fycp_top || placement == fycp_bottom) { + fy_emit_write_indent(emit, indent); + emit->flags |= FYEF_WHITESPACE; + } + + t = fy_atom_format_text(handle, text, len + 1); + + fy_emit_write_comment(emit, flags, indent, t, len, handle); + + emit->flags &= ~FYEF_INDENTATION; + + if (placement == fycp_top || placement == fycp_bottom) { + fy_emit_write_indent(emit, indent); + emit->flags |= FYEF_WHITESPACE; + } +} + +void fy_emit_node_comment(struct fy_emitter *emit, struct fy_node *fyn, int flags, int indent, + enum fy_comment_placement placement) +{ + struct fy_token *fyt; + + if (!fy_emit_output_comments(emit) || (unsigned int)placement >= fycp_max) + return; + + fyt = fy_node_value_token(fyn); + if (!fyt) + return; + + fy_emit_token_comment(emit, fyt, flags, indent, placement); +} + +void fy_emit_common_node_preamble(struct fy_emitter *emit, + struct fy_token *fyt_anchor, + struct fy_token *fyt_tag, + int flags, int indent) +{ + const char *anchor = NULL; + const char *tag = NULL; + const char *td_prefix __FY_DEBUG_UNUSED__; + const char *td_handle; + size_t td_prefix_size, td_handle_size; + size_t tag_len = 0, anchor_len = 0; + bool json_mode = false; + + json_mode = fy_emit_is_json_mode(emit); + + if (!json_mode) { + if (!(emit->cfg.flags & FYECF_STRIP_LABELS)) { + if (fyt_anchor) + anchor = fy_token_get_text(fyt_anchor, &anchor_len); + } + + if (!(emit->cfg.flags & FYECF_STRIP_TAGS)) { + if (fyt_tag) + tag = fy_token_get_text(fyt_tag, &tag_len); + } + + if (anchor) { + fy_emit_write_indicator(emit, di_ambersand, flags, indent, fyewt_anchor); + fy_emit_write(emit, fyewt_anchor, anchor, anchor_len); + } + + if (tag) { + if (!fy_emit_whitespace(emit)) + fy_emit_write_ws(emit); + + td_handle = fy_tag_token_get_directive_handle(fyt_tag, &td_handle_size); + assert(td_handle); + td_prefix = fy_tag_token_get_directive_prefix(fyt_tag, &td_prefix_size); + assert(td_prefix); + + if (!td_handle_size) + fy_emit_printf(emit, fyewt_tag, "!<%.*s>", (int)tag_len, tag); + else + fy_emit_printf(emit, fyewt_tag, "%.*s%.*s", + (int)td_handle_size, td_handle, + (int)(tag_len - td_prefix_size), tag + td_prefix_size); + + emit->flags &= ~(FYEF_WHITESPACE | FYEF_INDENTATION); + } + } + + /* content for root always starts on a new line */ + if ((flags & DDNF_ROOT) && emit->column != 0 && + !(emit->flags & FYEF_HAD_DOCUMENT_START)) { + fy_emit_putc(emit, fyewt_linebreak, '\n'); + emit->flags = FYEF_WHITESPACE | FYEF_INDENTATION; + } +} + +void fy_emit_node_internal(struct fy_emitter *emit, struct fy_node *fyn, int flags, int indent, bool is_key) +{ + enum fy_node_type type; + struct fy_anchor *fya; + struct fy_token *fyt_anchor = NULL; + + if (!(emit->cfg.flags & FYECF_STRIP_LABELS)) { + fya = fy_document_lookup_anchor_by_node(emit->fyd, fyn); + if (fya) + fyt_anchor = fya->anchor; + } + + fy_emit_common_node_preamble(emit, fyt_anchor, fyn->tag, flags, indent); + + type = fyn ? fyn->type : FYNT_SCALAR; + + if (type != FYNT_SCALAR && (flags & DDNF_ROOT) && emit->column != 0) { + fy_emit_putc(emit, fyewt_linebreak, '\n'); + emit->flags = FYEF_WHITESPACE | FYEF_INDENTATION; + } + + switch (type) { + case FYNT_SCALAR: + /* if we're pretty and root at column 0 (meaning it's a single scalar document) output --- */ + if ((flags & DDNF_ROOT) && fy_emit_is_pretty_mode(emit) && !emit->column && + !fy_emit_is_flow_mode(emit) && !(flags & DDNF_FLOW)) + fy_emit_document_start_indicator(emit); + fy_emit_scalar(emit, fyn, flags, indent, is_key); + break; + case FYNT_SEQUENCE: + FYD_TOKEN_ERROR_CHECK(fyn->fyd, fyn->sequence_start, FYEM_INTERNAL, + !is_key || !fy_emit_is_json_mode(emit), err_out, + "JSON does not allow sequences as keys"); + fy_emit_sequence(emit, fyn, flags, indent); + break; + case FYNT_MAPPING: + FYD_TOKEN_ERROR_CHECK(fyn->fyd, fyn->mapping_start, FYEM_INTERNAL, + !is_key || !fy_emit_is_json_mode(emit), err_out, + "JSON does not allow mappings as keys"); + fy_emit_mapping(emit, fyn, flags, indent); + break; + } +err_out: + /* nothing */ + return; +} + +void fy_emit_token_write_plain(struct fy_emitter *emit, struct fy_token *fyt, int flags, int indent) +{ + bool allow_breaks, should_indent, spaces, breaks; + int c; + enum fy_emitter_write_type wtype; + const char *str = NULL; + size_t len = 0; + struct fy_atom *atom; + struct fy_atom_iter iter; + + /* null and not json mode */ + if (!fyt && !fy_emit_is_json_mode(emit)) + goto out; + + wtype = (flags & DDNF_SIMPLE_SCALAR_KEY) ? fyewt_plain_scalar_key : fyewt_plain_scalar; + + atom = fy_token_atom(fyt); + + /* null and json mode */ + if (fy_emit_is_json_mode(emit) && (!fyt || !atom || atom->size0)) { + fy_emit_puts(emit, wtype, "null"); + goto out; + } + + /* simple case first (90% of cases) */ + str = fy_token_get_direct_output(fyt, &len); + if (str && fy_token_atom_style(fyt) == FYAS_PLAIN) { + fy_emit_write(emit, wtype, str, len); + goto out; + } + + if (!atom) + goto out; + + allow_breaks = !(flags & DDNF_SIMPLE) && !fy_emit_is_json_mode(emit) && !fy_emit_is_oneline(emit); + + spaces = false; + breaks = false; + + fy_atom_iter_start(atom, &iter); + fy_emit_accum_start(&emit->ea, emit->column, fy_token_atom_lb_mode(fyt)); + while ((c = fy_atom_iter_utf8_get(&iter)) > 0) { + + if (fy_is_ws(c)) { + + should_indent = allow_breaks && !spaces && + fy_emit_accum_column(&emit->ea) > fy_emit_width(emit); + + if (should_indent && !fy_is_ws(fy_atom_iter_utf8_peek(&iter))) { + fy_emit_output_accum(emit, wtype, &emit->ea); + emit->flags &= ~FYEF_INDENTATION; + fy_emit_write_indent(emit, indent); + } else + fy_emit_accum_utf8_put(&emit->ea, c); + + spaces = true; + + } else if (fy_is_lb_m(c, fy_token_atom_lb_mode(fyt))) { + + /* blergh */ + if (!allow_breaks) + break; + + /* output run */ + if (!breaks) { + fy_emit_output_accum(emit, wtype, &emit->ea); + fy_emit_write_indent(emit, indent); + } + + emit->flags &= ~FYEF_INDENTATION; + fy_emit_write_indent(emit, indent); + + breaks = true; + + } else { + + if (breaks) + fy_emit_write_indent(emit, indent); + + fy_emit_accum_utf8_put(&emit->ea, c); + + emit->flags &= ~FYEF_INDENTATION; + + spaces = false; + breaks = false; + } + } + fy_emit_output_accum(emit, wtype, &emit->ea); + fy_emit_accum_finish(&emit->ea); + fy_atom_iter_finish(&iter); + +out: + emit->flags &= ~(FYEF_WHITESPACE | FYEF_INDENTATION); +} + +void fy_emit_token_write_alias(struct fy_emitter *emit, struct fy_token *fyt, int flags, int indent) +{ + const char *str = NULL; + size_t len = 0; + struct fy_atom_iter iter; + int c; + + if (!fyt) + return; + + fy_emit_write_indicator(emit, di_star, flags, indent, fyewt_alias); + + /* try direct output first (99% of cases) */ + str = fy_token_get_direct_output(fyt, &len); + if (str) { + fy_emit_write(emit, fyewt_alias, str, len); + return; + } + + /* corner case, use iterator */ + fy_atom_iter_start(fy_token_atom(fyt), &iter); + fy_emit_accum_start(&emit->ea, emit->column, fy_token_atom_lb_mode(fyt)); + while ((c = fy_atom_iter_utf8_get(&iter)) > 0) + fy_emit_accum_utf8_put(&emit->ea, c); + fy_emit_output_accum(emit, fyewt_alias, &emit->ea); + fy_emit_accum_finish(&emit->ea); + fy_atom_iter_finish(&iter); +} + +void fy_emit_token_write_quoted(struct fy_emitter *emit, struct fy_token *fyt, int flags, int indent, char qc) +{ + bool allow_breaks, spaces, breaks; + int c, i, w, digit; + enum fy_emitter_write_type wtype; + const char *str = NULL; + size_t len = 0; + bool should_indent, done_esc; + struct fy_atom *atom; + struct fy_atom_iter iter; + enum fy_atom_style target_style; + uint32_t hi_surrogate, lo_surrogate; + uint8_t non_utf8[4]; + size_t non_utf8_len, k; + + wtype = qc == '\'' ? + ((flags & DDNF_SIMPLE_SCALAR_KEY) ? + fyewt_single_quoted_scalar_key : fyewt_single_quoted_scalar) : + ((flags & DDNF_SIMPLE_SCALAR_KEY) ? + fyewt_double_quoted_scalar_key : fyewt_double_quoted_scalar); + + fy_emit_write_indicator(emit, + qc == '\'' ? di_single_quote_start : di_double_quote_start, + flags, indent, wtype); + + /* XXX check whether this is ever the case */ + if (!fyt) + goto out; + + /* note that if the original target style and the target differ + * we can note use direct output + */ + target_style = qc == '"' ? FYAS_DOUBLE_QUOTED : FYAS_SINGLE_QUOTED; + + /* simple case of direct output (large amount of cases) */ + str = fy_token_get_direct_output(fyt, &len); + if (str && fy_token_atom_style(fyt) == target_style) { + fy_emit_write(emit, wtype, str, len); + goto out; + } + + /* no atom? i.e. empty */ + atom = fy_token_atom(fyt); + if (!atom) + goto out; + + allow_breaks = !(flags & DDNF_SIMPLE) && !fy_emit_is_json_mode(emit) && !fy_emit_is_oneline(emit); + + spaces = false; + breaks = false; + + fy_atom_iter_start(atom, &iter); + fy_emit_accum_start(&emit->ea, emit->column, fy_token_atom_lb_mode(fyt)); + for (;;) { + non_utf8_len = sizeof(non_utf8); + c = fy_atom_iter_utf8_quoted_get(&iter, &non_utf8_len, non_utf8); + if (c < 0) + break; + + if (c == 0 && non_utf8_len > 0) { + for (k = 0; k < non_utf8_len; k++) { + c = (int)non_utf8[k] & 0xff; + fy_emit_accum_utf8_put(&emit->ea, '\\'); + fy_emit_accum_utf8_put(&emit->ea, 'x'); + digit = ((unsigned int)c >> 4) & 15; + fy_emit_accum_utf8_put(&emit->ea, + digit <= 9 ? ('0' + digit) : ('A' + digit - 10)); + digit = (unsigned int)c & 15; + fy_emit_accum_utf8_put(&emit->ea, + digit <= 9 ? ('0' + digit) : ('A' + digit - 10)); + } + continue; + } + + if (fy_is_space(c) || (qc == '\'' && fy_is_ws(c))) { + should_indent = allow_breaks && !spaces && + fy_emit_accum_column(&emit->ea) > fy_emit_width(emit); + + if (should_indent && + ((qc == '\'' && fy_is_ws(fy_atom_iter_utf8_peek(&iter))) || + qc == '"')) { + fy_emit_output_accum(emit, wtype, &emit->ea); + + if (qc == '"' && fy_is_ws(fy_atom_iter_utf8_peek(&iter))) + fy_emit_putc(emit, wtype, '\\'); + + emit->flags &= ~FYEF_INDENTATION; + fy_emit_write_indent(emit, indent); + } else + fy_emit_accum_utf8_put(&emit->ea, c); + + spaces = true; + breaks = false; + + } else if (qc == '\'' && fy_is_lb_m(c, fy_token_atom_lb_mode(fyt))) { + + /* blergh */ + if (!allow_breaks) + break; + + /* output run */ + if (!breaks) { + fy_emit_output_accum(emit, wtype, &emit->ea); + fy_emit_write_indent(emit, indent); + } + + emit->flags &= ~FYEF_INDENTATION; + fy_emit_write_indent(emit, indent); + + breaks = true; + } else { + /* output run */ + if (breaks) { + fy_emit_output_accum(emit, wtype, &emit->ea); + fy_emit_write_indent(emit, indent); + } + + /* escape */ + if (qc == '\'' && c == '\'') { + fy_emit_accum_utf8_put(&emit->ea, '\''); + fy_emit_accum_utf8_put(&emit->ea, '\''); + } else if (qc == '"' && + ((!fy_is_printq(c) || c == '"' || c == '\\') || + (fy_emit_is_json_mode(emit) && !fy_is_json_unescaped(c)))) { + + fy_emit_accum_utf8_put(&emit->ea, '\\'); + + /* common YAML and JSON escapes */ + done_esc = false; + switch (c) { + case '\b': + fy_emit_accum_utf8_put(&emit->ea, 'b'); + done_esc = true; + break; + case '\f': + fy_emit_accum_utf8_put(&emit->ea, 'f'); + done_esc = true; + break; + case '\n': + fy_emit_accum_utf8_put(&emit->ea, 'n'); + done_esc = true; + break; + case '\r': + fy_emit_accum_utf8_put(&emit->ea, 'r'); + done_esc = true; + break; + case '\t': + fy_emit_accum_utf8_put(&emit->ea, 't'); + done_esc = true; + break; + case '"': + fy_emit_accum_utf8_put(&emit->ea, '"'); + done_esc = true; + break; + case '\\': + fy_emit_accum_utf8_put(&emit->ea, '\\'); + done_esc = true; + break; + } + + if (done_esc) + goto done; + + if (!fy_emit_is_json_mode(emit)) { + switch (c) { + case '\0': + fy_emit_accum_utf8_put(&emit->ea, '0'); + break; + case '\a': + fy_emit_accum_utf8_put(&emit->ea, 'a'); + break; + case '\v': + fy_emit_accum_utf8_put(&emit->ea, 'v'); + break; + case '\e': + fy_emit_accum_utf8_put(&emit->ea, 'e'); + break; + case 0x85: + fy_emit_accum_utf8_put(&emit->ea, 'N'); + break; + case 0xa0: + fy_emit_accum_utf8_put(&emit->ea, '_'); + break; + case 0x2028: + fy_emit_accum_utf8_put(&emit->ea, 'L'); + break; + case 0x2029: + fy_emit_accum_utf8_put(&emit->ea, 'P'); + break; + default: + if ((unsigned int)c <= 0xff) { + fy_emit_accum_utf8_put(&emit->ea, 'x'); + w = 2; + } else if ((unsigned int)c <= 0xffff) { + fy_emit_accum_utf8_put(&emit->ea, 'u'); + w = 4; + } else if ((unsigned int)c <= 0xffffffff) { + fy_emit_accum_utf8_put(&emit->ea, 'U'); + w = 8; + } + + for (i = w - 1; i >= 0; i--) { + digit = ((unsigned int)c >> (i * 4)) & 15; + fy_emit_accum_utf8_put(&emit->ea, + digit <= 9 ? ('0' + digit) : ('A' + digit - 10)); + } + break; + } + + } else { + /* JSON escapes all others in \uXXXX and \uXXXX\uXXXX */ + w = 4; + if ((unsigned int)c <= 0xffff) { + fy_emit_accum_utf8_put(&emit->ea, 'u'); + for (i = w - 1; i >= 0; i--) { + digit = ((unsigned int)c >> (i * 4)) & 15; + fy_emit_accum_utf8_put(&emit->ea, + digit <= 9 ? ('0' + digit) : ('A' + digit - 10)); + } + } else { + hi_surrogate = 0xd800 | ((((c >> 16) & 0x1f) - 1) << 6) | ((c >> 10) & 0x3f); + lo_surrogate = 0xdc00 | (c & 0x3ff); + + fy_emit_accum_utf8_put(&emit->ea, 'u'); + for (i = w - 1; i >= 0; i--) { + digit = ((unsigned int)hi_surrogate >> (i * 4)) & 15; + fy_emit_accum_utf8_put(&emit->ea, + digit <= 9 ? ('0' + digit) : ('A' + digit - 10)); + } + + fy_emit_accum_utf8_put(&emit->ea, '\\'); + fy_emit_accum_utf8_put(&emit->ea, 'u'); + for (i = w - 1; i >= 0; i--) { + digit = ((unsigned int)lo_surrogate >> (i * 4)) & 15; + fy_emit_accum_utf8_put(&emit->ea, + digit <= 9 ? ('0' + digit) : ('A' + digit - 10)); + } + } + } + } else + fy_emit_accum_utf8_put(&emit->ea, c); +done: + emit->flags &= ~FYEF_INDENTATION; + spaces = false; + breaks = false; + } + } + fy_emit_output_accum(emit, wtype, &emit->ea); + fy_emit_accum_finish(&emit->ea); + fy_atom_iter_finish(&iter); + +out: + fy_emit_write_indicator(emit, + qc == '\'' ? di_single_quote_end : di_double_quote_end, + flags, indent, wtype); +} + +bool fy_emit_token_write_block_hints(struct fy_emitter *emit, struct fy_token *fyt, int flags, int indent, char *chompp) +{ + char chomp = '\0'; + bool explicit_chomp = false; + struct fy_atom *atom; + + atom = fy_token_atom(fyt); + if (!atom) { + emit->flags &= ~FYEF_OPEN_ENDED; + chomp = '-'; + goto out; + } + + if (atom->starts_with_ws || atom->starts_with_lb) { + fy_emit_putc(emit, fyewt_indicator, '0' + fy_emit_indent(emit)); + explicit_chomp = true; + } + + if (!atom->ends_with_lb) { + emit->flags &= ~FYEF_OPEN_ENDED; + chomp = '-'; + goto out; + } + + if (atom->trailing_lb) { + emit->flags |= FYEF_OPEN_ENDED; + chomp = '+'; + goto out; + } + emit->flags &= ~FYEF_OPEN_ENDED; + +out: + if (chomp) + fy_emit_putc(emit, fyewt_indicator, chomp); + *chompp = chomp; + return explicit_chomp; +} + +void fy_emit_token_write_literal(struct fy_emitter *emit, struct fy_token *fyt, int flags, int indent) +{ + bool breaks; + int c; + char chomp; + struct fy_atom *atom; + struct fy_atom_iter iter; + + fy_emit_write_indicator(emit, di_bar, flags, indent, fyewt_indicator); + + fy_emit_token_write_block_hints(emit, fyt, flags, indent, &chomp); + if (flags & DDNF_ROOT) + indent += fy_emit_indent(emit); + + fy_emit_putc(emit, fyewt_linebreak, '\n'); + emit->flags |= FYEF_WHITESPACE | FYEF_INDENTATION; + + atom = fy_token_atom(fyt); + if (!atom) + goto out; + + breaks = true; + + fy_atom_iter_start(atom, &iter); + fy_emit_accum_start(&emit->ea, emit->column, fy_token_atom_lb_mode(fyt)); + while ((c = fy_atom_iter_utf8_get(&iter)) > 0) { + + if (breaks) { + fy_emit_write_indent(emit, indent); + breaks = false; + } + + if (fy_is_lb_m(c, fy_token_atom_lb_mode(fyt))) { + fy_emit_output_accum(emit, fyewt_literal_scalar, &emit->ea); + emit->flags &= ~FYEF_INDENTATION; + breaks = true; + } else + fy_emit_accum_utf8_put(&emit->ea, c); + } + fy_emit_output_accum(emit, fyewt_literal_scalar, &emit->ea); + fy_emit_accum_finish(&emit->ea); + fy_atom_iter_finish(&iter); + +out: + emit->flags &= ~FYEF_INDENTATION; +} + +void fy_emit_token_write_folded(struct fy_emitter *emit, struct fy_token *fyt, int flags, int indent) +{ + bool leading_spaces, breaks; + int c, nrbreaks, nrbreakslim; + char chomp; + struct fy_atom *atom; + struct fy_atom_iter iter; + + fy_emit_write_indicator(emit, di_greater, flags, indent, fyewt_indicator); + + fy_emit_token_write_block_hints(emit, fyt, flags, indent, &chomp); + if (flags & DDNF_ROOT) + indent += fy_emit_indent(emit); + + fy_emit_putc(emit, fyewt_linebreak, '\n'); + emit->flags |= FYEF_WHITESPACE | FYEF_INDENTATION; + + atom = fy_token_atom(fyt); + if (!atom) + return; + + breaks = true; + leading_spaces = true; + + fy_atom_iter_start(atom, &iter); + fy_emit_accum_start(&emit->ea, emit->column, fy_token_atom_lb_mode(fyt)); + while ((c = fy_atom_iter_utf8_get(&iter)) > 0) { + + if (fy_is_lb_m(c, fy_token_atom_lb_mode(fyt))) { + + /* output run */ + if (!fy_emit_accum_empty(&emit->ea)) { + fy_emit_output_accum(emit, fyewt_literal_scalar, &emit->ea); + /* do not output a newline (indent) if at the end or + * this is a leading spaces line */ + if (!fy_is_z(fy_atom_iter_utf8_peek(&iter)) && !leading_spaces) + fy_emit_write_indent(emit, indent); + } + + /* count the number of consecutive breaks */ + nrbreaks = 1; + while (fy_is_lb_m((c = fy_atom_iter_utf8_peek(&iter)), fy_token_atom_lb_mode(fyt))) { + nrbreaks++; + (void)fy_atom_iter_utf8_get(&iter); + } + + /* NOTE: Because the number of indents is tricky + * if it's a non blank, non end, it's the number of breaks + * if it's a blank, it's the number of breaks minus 1 + * if it's the end, it's the number of breaks minus 2 + */ + nrbreakslim = fy_is_z(c) ? 2 : fy_is_blank(c) ? 1 : 0; + while (nrbreaks-- > nrbreakslim) { + emit->flags &= ~FYEF_INDENTATION; + fy_emit_write_indent(emit, indent); + } + + breaks = true; + + } else { + + /* if we had a break, output an indent */ + if (breaks) { + fy_emit_write_indent(emit, indent); + + /* if this line starts with whitespace we need to know */ + leading_spaces = fy_is_ws(c); + } + + if (!breaks && fy_is_space(c) && + !fy_is_space(fy_atom_iter_utf8_peek(&iter)) && + fy_emit_accum_column(&emit->ea) > fy_emit_width(emit)) { + fy_emit_output_accum(emit, fyewt_folded_scalar, &emit->ea); + emit->flags &= ~FYEF_INDENTATION; + fy_emit_write_indent(emit, indent); + } else + fy_emit_accum_utf8_put(&emit->ea, c); + + breaks = false; + } + } + fy_emit_output_accum(emit, fyewt_folded_scalar, &emit->ea); + fy_emit_accum_finish(&emit->ea); + fy_atom_iter_finish(&iter); +} + +static enum fy_node_style +fy_emit_token_scalar_style(struct fy_emitter *emit, struct fy_token *fyt, + int flags, int indent, enum fy_node_style style, + struct fy_token *fyt_tag) +{ + const char *value = NULL; + size_t len = 0; + bool json, flow, is_json_plain; + struct fy_atom *atom; + int aflags = -1; + const char *tag; + size_t tag_len; + + atom = fy_token_atom(fyt); + + flow = fy_emit_is_flow_mode(emit) || (flags & DDNF_FLOW); + + /* check if style is allowed (i.e. no block styles in flow context) */ + if (flow && (style == FYNS_LITERAL || style == FYNS_FOLDED)) + style = FYNS_ANY; + + json = fy_emit_is_json_mode(emit); + + /* literal in JSON mode is output as quoted */ + if (json && (style == FYNS_LITERAL || style == FYNS_FOLDED)) + return FYNS_DOUBLE_QUOTED; + + /* is this a plain json atom? */ + is_json_plain = (json || emit->source_json || fy_emit_is_dejson_mode(emit)) && + (!atom || atom->size0 || + !fy_atom_strcmp(atom, "false") || + !fy_atom_strcmp(atom, "true") || + !fy_atom_strcmp(atom, "null") || + fy_atom_is_number(atom)); + + if (is_json_plain) { + tag = fy_token_get_text(fyt_tag, &tag_len); + + /* XXX hardcoded string tag resultion */ + if (tag && tag_len && + ((tag_len == 1 && *tag == '!') || + (tag_len == 21 && !memcmp(tag, "tag:yaml.org,2002:str", 21)))) + return FYNS_DOUBLE_QUOTED; + } + + /* JSON NULL, but with plain style */ + if (json && (style == FYNS_PLAIN || style == FYNS_ANY) && (!atom || (is_json_plain && !atom->size0))) + return FYNS_PLAIN; + + if (json) + return FYNS_DOUBLE_QUOTED; + + aflags = fy_token_text_analyze(fyt); + + if (flow && (style == FYNS_ANY || style == FYNS_LITERAL || style == FYNS_FOLDED)) { + + if (fyt && !value) + value = fy_token_get_text(fyt, &len); + + /* if there's a linebreak, use double quoted style */ + if (fy_find_any_lb(value, len)) { + style = FYNS_DOUBLE_QUOTED; + goto out; + } + + /* check if there's a non printable */ + if (!fy_find_non_print(value, len)) { + style = FYNS_SINGLE_QUOTED; + goto out; + } + + /* anything not empty is double quoted here */ + style = !(aflags & FYTTAF_EMPTY) ? FYNS_PLAIN : FYNS_DOUBLE_QUOTED; + } + + /* try to pretify */ + if (!flow && fy_emit_is_pretty_mode(emit) && + (style == FYNS_ANY || style == FYNS_DOUBLE_QUOTED || style == FYNS_SINGLE_QUOTED)) { + + /* any original style can be a plain, but contains linebreaks, do a literal */ + if ((aflags & (FYTTAF_CAN_BE_PLAIN | FYTTAF_HAS_LB)) == (FYTTAF_CAN_BE_PLAIN | FYTTAF_HAS_LB)) { + style = FYNS_LITERAL; + goto out; + } + + /* any style, can be just a plain, just make it so */ + if (style == FYNS_ANY && (aflags & (FYTTAF_CAN_BE_PLAIN | FYTTAF_HAS_LB)) == FYTTAF_CAN_BE_PLAIN) { + style = FYNS_PLAIN; + goto out; + } + + } + + if (!flow && emit->source_json && fy_emit_is_dejson_mode(emit)) { + if (is_json_plain || (aflags & (FYTTAF_CAN_BE_PLAIN | FYTTAF_HAS_LB)) == FYTTAF_CAN_BE_PLAIN) { + style = FYNS_PLAIN; + goto out; + } + } + +out: + if (style == FYNS_ANY) { + if (fyt) + value = fy_token_get_text(fyt, &len); + + style = (aflags & FYTTAF_CAN_BE_PLAIN) ? + FYNS_PLAIN : FYNS_DOUBLE_QUOTED; + } + + /* special handling for plains on start of line */ + if ((aflags & FYTTAF_QUOTE_AT_0) && indent == 0 && style == FYNS_PLAIN) + style = FYNS_DOUBLE_QUOTED; + + return style; +} + +void fy_emit_token_scalar(struct fy_emitter *emit, struct fy_token *fyt, int flags, int indent, + enum fy_node_style style, struct fy_token *fyt_tag) +{ + assert(style != FYNS_FLOW && style != FYNS_BLOCK); + + indent = fy_emit_increase_indent(emit, flags, indent); + + if (!fy_emit_whitespace(emit)) + fy_emit_write_ws(emit); + + style = fy_emit_token_scalar_style(emit, fyt, flags, indent, style, fyt_tag); + + switch (style) { + case FYNS_ALIAS: + fy_emit_token_write_alias(emit, fyt, flags, indent); + break; + case FYNS_PLAIN: + fy_emit_token_write_plain(emit, fyt, flags, indent); + break; + case FYNS_DOUBLE_QUOTED: + fy_emit_token_write_quoted(emit, fyt, flags, indent, '"'); + break; + case FYNS_SINGLE_QUOTED: + fy_emit_token_write_quoted(emit, fyt, flags, indent, '\''); + break; + case FYNS_LITERAL: + fy_emit_token_write_literal(emit, fyt, flags, indent); + break; + case FYNS_FOLDED: + fy_emit_token_write_folded(emit, fyt, flags, indent); + break; + default: + break; + } +} + +void fy_emit_scalar(struct fy_emitter *emit, struct fy_node *fyn, int flags, int indent, bool is_key) +{ + enum fy_node_style style; + + /* default style */ + style = fyn ? fyn->style : FYNS_ANY; + + /* all JSON keys are double quoted */ + if (fy_emit_is_json_mode(emit) && is_key) + style = FYNS_DOUBLE_QUOTED; + + fy_emit_token_scalar(emit, + fyn ? fyn->scalar : NULL, + flags, indent, + style, fyn->tag); +} + +static void fy_emit_sequence_prolog(struct fy_emitter *emit, struct fy_emit_save_ctx *sc) +{ + bool json = fy_emit_is_json_mode(emit); + bool oneline = fy_emit_is_oneline(emit); + bool was_flow = sc->flow; + + sc->old_indent = sc->indent; + if (!json) { + if (fy_emit_is_block_mode(emit)) + sc->flow = sc->empty; + else + sc->flow = fy_emit_is_flow_mode(emit) || emit->flow_level || sc->flow_token || sc->empty; + + if (sc->flow) { + if (!emit->flow_level) { + sc->indent = fy_emit_increase_indent(emit, sc->flags, sc->indent); + sc->old_indent = sc->indent; + } + + sc->flags = (sc->flags | DDNF_FLOW) | (sc->flags & ~DDNF_INDENTLESS); + fy_emit_write_indicator(emit, di_left_bracket, sc->flags, sc->indent, fyewt_indicator); + } else { + sc->flags = (sc->flags & ~DDNF_FLOW); + } + } else { + sc->flags = (sc->flags | DDNF_FLOW) | (sc->flags & ~DDNF_INDENTLESS); + fy_emit_write_indicator(emit, di_left_bracket, sc->flags, sc->indent, fyewt_indicator); + } + + if (!oneline) { + if (was_flow || (sc->flags & (DDNF_ROOT | DDNF_SEQ))) + sc->indent = fy_emit_increase_indent(emit, sc->flags, sc->indent); + } + + sc->flags &= ~DDNF_ROOT; +} + +static void fy_emit_sequence_epilog(struct fy_emitter *emit, struct fy_emit_save_ctx *sc) +{ + if (sc->flow || fy_emit_is_json_mode(emit)) { + if (!fy_emit_is_oneline(emit) && !sc->empty) + fy_emit_write_indent(emit, sc->old_indent); + fy_emit_write_indicator(emit, di_right_bracket, sc->flags, sc->old_indent, fyewt_indicator); + } +} + +static void fy_emit_sequence_item_prolog(struct fy_emitter *emit, struct fy_emit_save_ctx *sc, + struct fy_token *fyt_value) +{ + int tmp_indent; + + sc->flags |= DDNF_SEQ; + + if (!fy_emit_is_oneline(emit)) + fy_emit_write_indent(emit, sc->indent); + + if (!sc->flow && !fy_emit_is_json_mode(emit)) + fy_emit_write_indicator(emit, di_dash, sc->flags, sc->indent, fyewt_indicator); + + tmp_indent = sc->indent; + if (fy_emit_token_has_comment(emit, fyt_value, fycp_top)) { + if (!sc->flow && !fy_emit_is_json_mode(emit)) + tmp_indent = fy_emit_increase_indent(emit, sc->flags, sc->indent); + fy_emit_token_comment(emit, fyt_value, sc->flags, tmp_indent, fycp_top); + } +} + +static void fy_emit_sequence_item_epilog(struct fy_emitter *emit, struct fy_emit_save_ctx *sc, + bool last, struct fy_token *fyt_value) +{ + if ((sc->flow || fy_emit_is_json_mode(emit)) && !last) + fy_emit_write_indicator(emit, di_comma, sc->flags, sc->indent, fyewt_indicator); + + fy_emit_token_comment(emit, fyt_value, sc->flags, sc->indent, fycp_right); + + if (last && (sc->flow || fy_emit_is_json_mode(emit)) && !fy_emit_is_oneline(emit) && !sc->empty) + fy_emit_write_indent(emit, sc->old_indent); + + sc->flags &= ~DDNF_SEQ; +} + +void fy_emit_sequence(struct fy_emitter *emit, struct fy_node *fyn, int flags, int indent) +{ + struct fy_node *fyni, *fynin; + struct fy_token *fyt_value; + bool last; + struct fy_emit_save_ctx sct, *sc = &sct; + + memset(sc, 0, sizeof(*sc)); + + sc->flags = flags; + sc->indent = indent; + sc->empty = fy_node_list_empty(&fyn->sequence); + sc->flow_token = fyn->style == FYNS_FLOW; + sc->flow = !!(flags & DDNF_FLOW); + sc->old_indent = sc->indent; + + fy_emit_sequence_prolog(emit, sc); + + for (fyni = fy_node_list_head(&fyn->sequence); fyni; fyni = fynin) { + + fynin = fy_node_next(&fyn->sequence, fyni); + last = !fynin; + fyt_value = fy_node_value_token(fyni); + + fy_emit_sequence_item_prolog(emit, sc, fyt_value); + fy_emit_node_internal(emit, fyni, (sc->flags & ~DDNF_ROOT), sc->indent, false); + fy_emit_sequence_item_epilog(emit, sc, last, fyt_value); + } + + fy_emit_sequence_epilog(emit, sc); +} + +static void fy_emit_mapping_prolog(struct fy_emitter *emit, struct fy_emit_save_ctx *sc) +{ + bool json = fy_emit_is_json_mode(emit); + bool oneline = fy_emit_is_oneline(emit); + + sc->old_indent = sc->indent; + if (!json) { + if (fy_emit_is_block_mode(emit)) + sc->flow = sc->empty; + else + sc->flow = fy_emit_is_flow_mode(emit) || emit->flow_level || sc->flow_token || sc->empty; + + if (sc->flow) { + if (!emit->flow_level) { + sc->indent = fy_emit_increase_indent(emit, sc->flags, sc->indent); + sc->old_indent = sc->indent; + } + + sc->flags = (sc->flags | DDNF_FLOW) | (sc->flags & ~DDNF_INDENTLESS); + fy_emit_write_indicator(emit, di_left_brace, sc->flags, sc->indent, fyewt_indicator); + } else { + sc->flags &= ~(DDNF_FLOW | DDNF_INDENTLESS); + } + } else { + sc->flags = (sc->flags | DDNF_FLOW) | (sc->flags & ~DDNF_INDENTLESS); + fy_emit_write_indicator(emit, di_left_brace, sc->flags, sc->indent, fyewt_indicator); + } + + if (!oneline && !sc->empty) + sc->indent = fy_emit_increase_indent(emit, sc->flags, sc->indent); + + sc->flags &= ~DDNF_ROOT; +} + +static void fy_emit_mapping_epilog(struct fy_emitter *emit, struct fy_emit_save_ctx *sc) +{ + if (sc->flow || fy_emit_is_json_mode(emit)) { + if (!fy_emit_is_oneline(emit) && !sc->empty) + fy_emit_write_indent(emit, sc->old_indent); + fy_emit_write_indicator(emit, di_right_brace, sc->flags, sc->old_indent, fyewt_indicator); + } +} + +static void fy_emit_mapping_key_prolog(struct fy_emitter *emit, struct fy_emit_save_ctx *sc, + struct fy_token *fyt_key, bool simple_key) +{ + sc->flags = DDNF_MAP | (sc->flags & DDNF_FLOW); + + if (simple_key) { + sc->flags |= DDNF_SIMPLE; + if (fyt_key && fyt_key->type == FYTT_SCALAR) + sc->flags |= DDNF_SIMPLE_SCALAR_KEY; + } else { + /* do not emit the ? in flow modes at all */ + if (fy_emit_is_flow_mode(emit)) + sc->flags |= DDNF_SIMPLE; + } + + if (!fy_emit_is_oneline(emit)) + fy_emit_write_indent(emit, sc->indent); + + /* complex? */ + if (!(sc->flags & DDNF_SIMPLE)) + fy_emit_write_indicator(emit, di_question_mark, sc->flags, sc->indent, fyewt_indicator); +} + +static void fy_emit_mapping_key_epilog(struct fy_emitter *emit, struct fy_emit_save_ctx *sc, + struct fy_token *fyt_key) +{ + int tmp_indent; + + /* if the key is an alias, always output an extra whitespace */ + if (fyt_key && fyt_key->type == FYTT_ALIAS) + fy_emit_write_ws(emit); + + sc->flags &= ~DDNF_MAP; + + fy_emit_write_indicator(emit, di_colon, sc->flags, sc->indent, fyewt_indicator); + + tmp_indent = sc->indent; + if (fy_emit_token_has_comment(emit, fyt_key, fycp_right)) { + + if (!sc->flow && !fy_emit_is_json_mode(emit)) + tmp_indent = fy_emit_increase_indent(emit, sc->flags, sc->indent); + + fy_emit_token_comment(emit, fyt_key, sc->flags, tmp_indent, fycp_right); + fy_emit_write_indent(emit, tmp_indent); + } + + sc->flags = DDNF_MAP | (sc->flags & DDNF_FLOW); +} + +static void fy_emit_mapping_value_prolog(struct fy_emitter *emit, struct fy_emit_save_ctx *sc, + struct fy_token *fyt_value) +{ + /* nothing */ +} + +static void fy_emit_mapping_value_epilog(struct fy_emitter *emit, struct fy_emit_save_ctx *sc, + bool last, struct fy_token *fyt_value) +{ + if ((sc->flow || fy_emit_is_json_mode(emit)) && !last) + fy_emit_write_indicator(emit, di_comma, sc->flags, sc->indent, fyewt_indicator); + + fy_emit_token_comment(emit, fyt_value, sc->flags, sc->indent, fycp_right); + + if (last && (sc->flow || fy_emit_is_json_mode(emit)) && !fy_emit_is_oneline(emit) && !sc->empty) + fy_emit_write_indent(emit, sc->old_indent); + + sc->flags &= ~DDNF_MAP; +} + +void fy_emit_mapping(struct fy_emitter *emit, struct fy_node *fyn, int flags, int indent) +{ + struct fy_node_pair *fynp, *fynpn, **fynpp = NULL; + struct fy_token *fyt_key, *fyt_value; + bool last, simple_key, used_malloc = false; + int aflags, i, count; + struct fy_emit_save_ctx sct, *sc = &sct; + + memset(sc, 0, sizeof(*sc)); + + sc->flags = flags; + sc->indent = indent; + sc->empty = fy_node_pair_list_empty(&fyn->mapping); + sc->flow_token = fyn->style == FYNS_FLOW; + sc->flow = !!(flags & DDNF_FLOW); + sc->old_indent = sc->indent; + + fy_emit_mapping_prolog(emit, sc); + + if (!(emit->cfg.flags & (FYECF_SORT_KEYS | FYECF_STRIP_EMPTY_KV))) { + fynp = fy_node_pair_list_head(&fyn->mapping); + fynpp = NULL; + } else { + count = fy_node_mapping_item_count(fyn); + + /* heuristic, avoid allocation for small maps */ + if (count > 64) { + fynpp = malloc((count + 1) * sizeof(*fynpp)); + fyd_error_check(fyn->fyd, fynpp, err_out, + "malloc() failed"); + used_malloc = true; + } else + fynpp = FY_ALLOCA((count + 1) * sizeof(*fynpp)); + + /* fill (removing empty KVs) */ + i = 0; + for (fynp = fy_node_pair_list_head(&fyn->mapping); fynp; + fynp = fy_node_pair_next(&fyn->mapping, fynp)) { + + /* strip key/value pair from the output if it's empty */ + if ((emit->cfg.flags & FYECF_STRIP_EMPTY_KV) && fy_node_is_empty(fynp->value)) + continue; + + fynpp[i++] = fynp; + } + count = i; + fynpp[count] = NULL; + + /* sort the keys */ + if (emit->cfg.flags & FYECF_SORT_KEYS) + fy_node_mapping_perform_sort(fyn, NULL, NULL, fynpp, count); + + i = 0; + fynp = fynpp[i]; + } + + for (; fynp; fynp = fynpn) { + + if (!fynpp) + fynpn = fy_node_pair_next(&fyn->mapping, fynp); + else + fynpn = fynpp[++i]; + + last = !fynpn; + fyt_key = fy_node_value_token(fynp->key); + fyt_value = fy_node_value_token(fynp->value); + + FYD_NODE_ERROR_CHECK(fynp->fyd, fynp->key, FYEM_INTERNAL, + !fy_emit_is_json_mode(emit) || + (fynp->key && fynp->key->type == FYNT_SCALAR), + err_out, "Non scalar keys are not allowed in JSON emit mode"); + + simple_key = false; + if (fynp->key) { + switch (fynp->key->type) { + case FYNT_SCALAR: + aflags = fy_token_text_analyze(fynp->key->scalar); + simple_key = fy_emit_is_json_mode(emit) || + !!(aflags & FYTTAF_CAN_BE_SIMPLE_KEY); + break; + case FYNT_SEQUENCE: + simple_key = fy_node_list_empty(&fynp->key->sequence); + break; + case FYNT_MAPPING: + simple_key = fy_node_pair_list_empty(&fynp->key->mapping); + break; + } + } + + fy_emit_mapping_key_prolog(emit, sc, fyt_key, simple_key); + if (fynp->key) + fy_emit_node_internal(emit, fynp->key, (sc->flags & ~DDNF_ROOT), sc->indent, true); + fy_emit_mapping_key_epilog(emit, sc, fyt_key); + + fy_emit_mapping_value_prolog(emit, sc, fyt_value); + if (fynp->value) + fy_emit_node_internal(emit, fynp->value, (sc->flags & ~DDNF_ROOT), sc->indent, false); + fy_emit_mapping_value_epilog(emit, sc, last, fyt_value); + } + + if (fynpp && used_malloc) + free(fynpp); + + fy_emit_mapping_epilog(emit, sc); + +err_out: + return; +} + +int fy_emit_common_document_start(struct fy_emitter *emit, + struct fy_document_state *fyds, + bool root_tag_or_anchor) +{ + struct fy_token *fyt_chk; + const char *td_handle, *td_prefix; + size_t td_handle_size, td_prefix_size; + enum fy_emitter_cfg_flags flags = emit->cfg.flags; + enum fy_emitter_cfg_flags vd_flags = flags & FYECF_VERSION_DIR(FYECF_VERSION_DIR_MASK); + enum fy_emitter_cfg_flags td_flags = flags & FYECF_TAG_DIR(FYECF_TAG_DIR_MASK); + enum fy_emitter_cfg_flags dsm_flags = flags & FYECF_DOC_START_MARK(FYECF_DOC_START_MARK_MASK); + bool vd, td, dsm; + bool had_non_default_tag = false; + + if (!emit || !fyds || emit->fyds) + return -1; + + emit->fyds = fyds; + + vd = ((vd_flags == FYECF_VERSION_DIR_AUTO && fyds->version_explicit) || + vd_flags == FYECF_VERSION_DIR_ON) && + !(emit->cfg.flags & FYECF_STRIP_DOC); + td = ((td_flags == FYECF_TAG_DIR_AUTO && fyds->tags_explicit) || + td_flags == FYECF_TAG_DIR_ON) && + !(emit->cfg.flags & FYECF_STRIP_DOC); + + /* if either a version or directive tags exist, and no previous + * explicit document end existed, output one now + */ + if (!fy_emit_is_json_mode(emit) && (vd || td) && !(emit->flags & FYEF_HAD_DOCUMENT_END)) { + if (emit->column) + fy_emit_putc(emit, fyewt_linebreak, '\n'); + if (!(emit->cfg.flags & FYECF_STRIP_DOC)) { + fy_emit_puts(emit, fyewt_document_indicator, "..."); + emit->flags &= ~FYEF_WHITESPACE; + emit->flags |= FYEF_HAD_DOCUMENT_END; + } + } + + if (!fy_emit_is_json_mode(emit) && vd) { + if (emit->column) + fy_emit_putc(emit, fyewt_linebreak, '\n'); + fy_emit_printf(emit, fyewt_version_directive, "%%YAML %d.%d", + fyds->version.major, fyds->version.minor); + fy_emit_putc(emit, fyewt_linebreak, '\n'); + emit->flags = FYEF_WHITESPACE | FYEF_INDENTATION; + } + + if (!fy_emit_is_json_mode(emit) && td) { + + for (fyt_chk = fy_token_list_first(&fyds->fyt_td); fyt_chk; fyt_chk = fy_token_next(&fyds->fyt_td, fyt_chk)) { + + td_handle = fy_tag_directive_token_handle(fyt_chk, &td_handle_size); + td_prefix = fy_tag_directive_token_prefix(fyt_chk, &td_prefix_size); + assert(td_handle && td_prefix); + + if (fy_tag_is_default_internal(td_handle, td_handle_size, td_prefix, td_prefix_size)) + continue; + + had_non_default_tag = true; + + if (emit->column) + fy_emit_putc(emit, fyewt_linebreak, '\n'); + fy_emit_printf(emit, fyewt_tag_directive, "%%TAG %.*s %.*s", + (int)td_handle_size, td_handle, + (int)td_prefix_size, td_prefix); + fy_emit_putc(emit, fyewt_linebreak, '\n'); + emit->flags = FYEF_WHITESPACE | FYEF_INDENTATION; + } + } + + /* always output document start indicator: + * - was explicit + * - document has tags + * - document has an explicit version + * - root exists & has a tag or an anchor + */ + dsm = (dsm_flags == FYECF_DOC_START_MARK_AUTO && + (!fyds->start_implicit || + fyds->tags_explicit || fyds->version_explicit || + had_non_default_tag)) || + dsm_flags == FYECF_DOC_START_MARK_ON; + + /* if there was previous output without document end */ + if (!dsm && (emit->flags & FYEF_HAD_DOCUMENT_OUTPUT) && + !(emit->flags & FYEF_HAD_DOCUMENT_END)) + dsm = true; + + /* output document start indicator if we should */ + if (dsm) + fy_emit_document_start_indicator(emit); + + /* clear that in any case */ + emit->flags &= ~FYEF_HAD_DOCUMENT_END; + + return 0; +} + +int fy_emit_document_start(struct fy_emitter *emit, struct fy_document *fyd, + struct fy_node *fyn_root) +{ + struct fy_node *root; + bool root_tag_or_anchor; + int ret; + + if (!emit || !fyd || !fyd->fyds) + return -1; + + root = fyn_root ? fyn_root : fy_document_root(fyd); + + root_tag_or_anchor = root && (root->tag || fy_document_lookup_anchor_by_node(fyd, root)); + + ret = fy_emit_common_document_start(emit, fyd->fyds, root_tag_or_anchor); + if (ret) + return ret; + + emit->fyd = fyd; + + return 0; +} + +int fy_emit_common_document_end(struct fy_emitter *emit, bool override_state, bool implicit_override) +{ + const struct fy_document_state *fyds; + enum fy_emitter_cfg_flags flags = emit->cfg.flags; + enum fy_emitter_cfg_flags dem_flags = flags & FYECF_DOC_END_MARK(FYECF_DOC_END_MARK_MASK); + bool implicit, dem; + + if (!emit || !emit->fyds) + return -1; + + fyds = emit->fyds; + + implicit = fyds->end_implicit; + if (override_state) + implicit = implicit_override; + + dem = ((dem_flags == FYECF_DOC_END_MARK_AUTO && !implicit) || + dem_flags == FYECF_DOC_END_MARK_ON) && + !(emit->cfg.flags & FYECF_STRIP_DOC); + + if (!(emit->cfg.flags & FYECF_NO_ENDING_NEWLINE)) { + if (emit->column != 0) { + fy_emit_putc(emit, fyewt_linebreak, '\n'); + emit->flags = FYEF_WHITESPACE | FYEF_INDENTATION; + } + + if (!fy_emit_is_json_mode(emit) && dem) { + fy_emit_puts(emit, fyewt_document_indicator, "..."); + fy_emit_putc(emit, fyewt_linebreak, '\n'); + emit->flags = FYEF_WHITESPACE | FYEF_INDENTATION; + emit->flags |= FYEF_HAD_DOCUMENT_END; + } else + emit->flags &= ~FYEF_HAD_DOCUMENT_END; + } else { + if (!fy_emit_is_json_mode(emit) && dem) { + if (emit->column != 0) { + fy_emit_putc(emit, fyewt_linebreak, '\n'); + emit->flags = FYEF_WHITESPACE | FYEF_INDENTATION; + } + fy_emit_puts(emit, fyewt_document_indicator, "..."); + emit->flags &= ~(FYEF_WHITESPACE | FYEF_INDENTATION); + emit->flags |= FYEF_HAD_DOCUMENT_END; + } else + emit->flags &= ~FYEF_HAD_DOCUMENT_END; + } + + /* mark that we did output a document earlier */ + emit->flags |= FYEF_HAD_DOCUMENT_OUTPUT; + + /* stop our association with the document */ + emit->fyds = NULL; + + return 0; +} + +int fy_emit_document_end(struct fy_emitter *emit) +{ + int ret; + + ret = fy_emit_common_document_end(emit, false, false); + if (ret) + return ret; + + emit->fyd = NULL; + return 0; +} + +int fy_emit_common_explicit_document_end(struct fy_emitter *emit) +{ + if (!emit) + return -1; + + if (emit->column != 0) { + fy_emit_putc(emit, fyewt_linebreak, '\n'); + emit->flags = FYEF_WHITESPACE | FYEF_INDENTATION; + } + + if (!fy_emit_is_json_mode(emit)) { + fy_emit_puts(emit, fyewt_document_indicator, "..."); + fy_emit_putc(emit, fyewt_linebreak, '\n'); + emit->flags = FYEF_WHITESPACE | FYEF_INDENTATION; + emit->flags |= FYEF_HAD_DOCUMENT_END; + } else + emit->flags &= ~FYEF_HAD_DOCUMENT_END; + + /* mark that we did output a document earlier */ + emit->flags |= FYEF_HAD_DOCUMENT_OUTPUT; + + /* stop our association with the document */ + emit->fyds = NULL; + + return 0; +} + +int fy_emit_explicit_document_end(struct fy_emitter *emit) +{ + int ret; + + ret = fy_emit_common_explicit_document_end(emit); + if (ret) + return ret; + + emit->fyd = NULL; + return 0; +} + +void fy_emit_reset(struct fy_emitter *emit, bool reset_events) +{ + struct fy_eventp *fyep; + + emit->line = 0; + emit->column = 0; + emit->flow_level = 0; + emit->output_error = 0; + /* start as if there was a previous document with an explicit end */ + /* this allows implicit documents start without an indicator */ + emit->flags = FYEF_WHITESPACE | FYEF_INDENTATION | FYEF_HAD_DOCUMENT_END; + + emit->state = FYES_NONE; + + /* reset the accumulator */ + fy_emit_accum_reset(&emit->ea); + + /* streaming mode indent */ + emit->s_indent = -1; + /* streaming mode flags */ + emit->s_flags = DDNF_ROOT; + + emit->state_stack_top = 0; + emit->sc_stack_top = 0; + + /* and release any queued events */ + if (reset_events) { + while ((fyep = fy_eventp_list_pop(&emit->queued_events)) != NULL) + fy_eventp_release(fyep); + } +} + +int fy_emit_setup(struct fy_emitter *emit, const struct fy_emitter_cfg *cfg) +{ + struct fy_diag *diag; + + if (!cfg) + return -1; + + memset(emit, 0, sizeof(*emit)); + + emit->cfg = *cfg; + if (!emit->cfg.output) + emit->cfg.output = fy_emitter_default_output; + + diag = cfg->diag; + + if (!diag) { + diag = fy_diag_create(NULL); + if (!diag) + return -1; + } else + fy_diag_ref(diag); + + emit->diag = diag; + + fy_emit_accum_init(&emit->ea, emit->ea_inplace_buf, sizeof(emit->ea_inplace_buf), 0, fylb_cr_nl); + fy_eventp_list_init(&emit->queued_events); + + emit->state_stack = emit->state_stack_inplace; + emit->state_stack_alloc = sizeof(emit->state_stack_inplace)/sizeof(emit->state_stack_inplace[0]); + + emit->sc_stack = emit->sc_stack_inplace; + emit->sc_stack_alloc = sizeof(emit->sc_stack_inplace)/sizeof(emit->sc_stack_inplace[0]); + + fy_eventp_list_init(&emit->recycled_eventp); + fy_token_list_init(&emit->recycled_token); + + /* suppress recycling if we must */ + emit->suppress_recycling_force = getenv("FY_VALGRIND") && !getenv("FY_VALGRIND_RECYCLING"); + emit->suppress_recycling = emit->suppress_recycling_force; + + if (!emit->suppress_recycling) { + emit->recycled_eventp_list = &emit->recycled_eventp; + emit->recycled_token_list = &emit->recycled_token; + } else { + emit->recycled_eventp_list = NULL; + emit->recycled_token_list = NULL; + } + + fy_emit_reset(emit, false); + + return 0; +} + +void fy_emit_cleanup(struct fy_emitter *emit) +{ + struct fy_eventp *fyep; + struct fy_token *fyt; + + /* call the finalizer if it exists */ + if (emit->finalizer) + emit->finalizer(emit); + + while ((fyt = fy_token_list_pop(&emit->recycled_token)) != NULL) + fy_token_free(fyt); + + while ((fyep = fy_eventp_list_pop(&emit->recycled_eventp)) != NULL) + fy_eventp_free(fyep); + + if (!emit->fyd && emit->fyds) + fy_document_state_unref(emit->fyds); + + fy_emit_accum_cleanup(&emit->ea); + + while ((fyep = fy_eventp_list_pop(&emit->queued_events)) != NULL) + fy_eventp_release(fyep); + + if (emit->state_stack && emit->state_stack != emit->state_stack_inplace) + free(emit->state_stack); + + if (emit->sc_stack && emit->sc_stack != emit->sc_stack_inplace) + free(emit->sc_stack); + + fy_diag_unref(emit->diag); +} + +int fy_emit_node_no_check(struct fy_emitter *emit, struct fy_node *fyn) +{ + if (fyn) + fy_emit_node_internal(emit, fyn, DDNF_ROOT, -1, false); + return 0; +} + +int fy_emit_node(struct fy_emitter *emit, struct fy_node *fyn) +{ + int ret; + + ret = fy_emit_node_check(emit, fyn); + if (ret) + return ret; + + return fy_emit_node_no_check(emit, fyn); +} + +int fy_emit_root_node_no_check(struct fy_emitter *emit, struct fy_node *fyn) +{ + if (!emit || !fyn) + return -1; + + /* top comment first */ + fy_emit_node_comment(emit, fyn, DDNF_ROOT, -1, fycp_top); + + fy_emit_node_internal(emit, fyn, DDNF_ROOT, -1, false); + + /* right comment next */ + fy_emit_node_comment(emit, fyn, DDNF_ROOT, -1, fycp_right); + + /* bottom comment last */ + fy_emit_node_comment(emit, fyn, DDNF_ROOT, -1, fycp_bottom); + + return 0; +} + +int fy_emit_root_node(struct fy_emitter *emit, struct fy_node *fyn) +{ + int ret; + + if (!emit || !fyn) + return -1; + + ret = fy_emit_node_check(emit, fyn); + if (ret) + return ret; + + return fy_emit_root_node_no_check(emit, fyn); +} + +void fy_emit_prepare_document_state(struct fy_emitter *emit, struct fy_document_state *fyds) +{ + if (!emit || !fyds) + return; + + /* if the original document was JSON and the mode is ORIGINAL turn on JSON mode */ + emit->source_json = fyds && fyds->json_mode; + emit->force_json = (emit->cfg.flags & FYECF_MODE(FYECF_MODE_MASK)) == FYECF_MODE_ORIGINAL && + emit->source_json; +} + +int fy_emit_document_no_check(struct fy_emitter *emit, struct fy_document *fyd) +{ + int rc; + + rc = fy_emit_document_start(emit, fyd, NULL); + if (rc) + return rc; + + rc = fy_emit_root_node_no_check(emit, fyd->root); + if (rc) + return rc; + + rc = fy_emit_document_end(emit); + + return rc; +} + +int fy_emit_document(struct fy_emitter *emit, struct fy_document *fyd) +{ + int ret; + + if (!emit) + return -1; + + if (fyd) { + fy_emit_prepare_document_state(emit, fyd->fyds); + + if (fyd->root) { + ret = fy_emit_node_check(emit, fyd->root); + if (ret) + return ret; + } + } + + return fy_emit_document_no_check(emit, fyd); +} + +struct fy_emitter *fy_emitter_create(const struct fy_emitter_cfg *cfg) +{ + struct fy_emitter *emit; + int rc; + + if (!cfg) + return NULL; + + emit = malloc(sizeof(*emit)); + if (!emit) + return NULL; + + rc = fy_emit_setup(emit, cfg); + if (rc) { + free(emit); + return NULL; + } + + return emit; +} + +void fy_emitter_destroy(struct fy_emitter *emit) +{ + if (!emit) + return; + + fy_emit_cleanup(emit); + + free(emit); +} + +const struct fy_emitter_cfg *fy_emitter_get_cfg(struct fy_emitter *emit) +{ + if (!emit) + return NULL; + + return &emit->cfg; +} + +struct fy_diag *fy_emitter_get_diag(struct fy_emitter *emit) +{ + if (!emit || !emit->diag) + return NULL; + return fy_diag_ref(emit->diag); +} + +int fy_emitter_set_diag(struct fy_emitter *emit, struct fy_diag *diag) +{ + struct fy_diag_cfg dcfg; + + if (!emit) + return -1; + + /* default? */ + if (!diag) { + fy_diag_cfg_default(&dcfg); + diag = fy_diag_create(&dcfg); + if (!diag) + return -1; + } + + fy_diag_unref(emit->diag); + emit->diag = fy_diag_ref(diag); + + return 0; +} + +void fy_emitter_set_finalizer(struct fy_emitter *emit, + void (*finalizer)(struct fy_emitter *emit)) +{ + if (!emit) + return; + emit->finalizer = finalizer; +} + +struct fy_emit_buffer_state { + char **bufp; + size_t *sizep; + char *buf; + size_t size; + size_t pos; + size_t need; + bool allocate_buffer; +}; + +static int do_buffer_output(struct fy_emitter *emit, enum fy_emitter_write_type type, const char *str, int leni, void *userdata) +{ + struct fy_emit_buffer_state *state = emit->cfg.userdata; + size_t left, pagesize, size, len; + char *bufnew; + + /* convert to unsigned and use that */ + len = (size_t)leni; + + /* no funky business */ + if (len < 0) + return -1; + + state->need += len; + left = state->size - state->pos; + if (left < len) { + if (!state->allocate_buffer) + return 0; + + pagesize = fy_get_pagesize(); + size = state->need + pagesize - 1; + size = size - size % pagesize; + + bufnew = realloc(state->buf, size); + if (!bufnew) + return -1; + state->buf = bufnew; + state->size = size; + left = state->size - state->pos; + + } + + if (len > left) + len = left; + if (state->buf) + memcpy(state->buf + state->pos, str, len); + state->pos += len; + + return len; +} + +static void +fy_emitter_str_finalizer(struct fy_emitter *emit) +{ + struct fy_emit_buffer_state *state; + + if (!emit || !(state = emit->cfg.userdata)) + return; + + /* if the buffer is allowed to allocate_buffer... */ + if (state->allocate_buffer && state->buf) + free(state->buf); + free(state); + + emit->cfg.userdata = NULL; +} + +static struct fy_emitter * +fy_emitter_create_str_internal(enum fy_emitter_cfg_flags flags, char **bufp, size_t *sizep, bool allocate_buffer) +{ + struct fy_emitter *emit; + struct fy_emitter_cfg emit_cfg; + struct fy_emit_buffer_state *state; + + state = malloc(sizeof(*state)); + if (!state) + return NULL; + + /* if any of these NULL, it's a allocation case */ + if ((!bufp || !sizep) && !allocate_buffer) + return NULL; + + if (bufp && sizep) { + state->bufp = bufp; + state->buf = *bufp; + state->sizep = sizep; + state->size = *sizep; + } else { + state->bufp = NULL; + state->buf = NULL; + state->sizep = NULL; + state->size = 0; + } + state->pos = 0; + state->need = 0; + state->allocate_buffer = allocate_buffer; + + memset(&emit_cfg, 0, sizeof(emit_cfg)); + emit_cfg.output = do_buffer_output; + emit_cfg.userdata = state; + emit_cfg.flags = flags; + + emit = fy_emitter_create(&emit_cfg); + if (!emit) + goto err_out; + + /* set finalizer to cleanup */ + fy_emitter_set_finalizer(emit, fy_emitter_str_finalizer); + + return emit; + +err_out: + if (state) + free(state); + return NULL; +} + +static int +fy_emitter_collect_str_internal(struct fy_emitter *emit, char **bufp, size_t *sizep) +{ + struct fy_emit_buffer_state *state; + char *buf; + int rc; + + state = emit->cfg.userdata; + assert(state); + + /* if NULL, then use the values stored on the state */ + if (!bufp) + bufp = state->bufp; + if (!sizep) + sizep = state->sizep; + + /* terminating zero */ + rc = do_buffer_output(emit, fyewt_terminating_zero, "\0", 1, state); + if (rc != 1) + goto err_out; + + state->size = state->need; + + if (state->allocate_buffer) { + /* resize */ + buf = realloc(state->buf, state->size); + /* very likely since we shrink the buffer, but make sure we don't error out */ + if (buf) + state->buf = buf; + } + + /* retreive the buffer and size */ + *sizep = state->size; + *bufp = state->buf; + + /* reset the buffer, ownership now to the caller */ + state->buf = NULL; + state->size = 0; + state->pos = 0; + state->bufp = NULL; + state->sizep = NULL; + + return 0; + +err_out: + *bufp = NULL; + *sizep = 0; + return -1; +} + +static int fy_emit_str_internal(struct fy_document *fyd, + enum fy_emitter_cfg_flags flags, + struct fy_node *fyn, char **bufp, size_t *sizep, + bool allocate_buffer) +{ + struct fy_emitter *emit = NULL; + int rc = -1; + + emit = fy_emitter_create_str_internal(flags, bufp, sizep, allocate_buffer); + if (!emit) + goto out_err; + + if (fyd) { + fy_emit_prepare_document_state(emit, fyd->fyds); + rc = 0; + if (fyd->root) + rc = fy_emit_node_check(emit, fyd->root); + if (!rc) + rc = fy_emit_document_no_check(emit, fyd); + } else { + rc = fy_emit_node_check(emit, fyn); + if (!rc) + rc = fy_emit_node_no_check(emit, fyn); + } + + if (rc) + goto out_err; + + rc = fy_emitter_collect_str_internal(emit, NULL, NULL); + if (rc) + goto out_err; + + /* OK, all done */ + +out_err: + fy_emitter_destroy(emit); + return rc; +} + +int fy_emit_document_to_buffer(struct fy_document *fyd, enum fy_emitter_cfg_flags flags, char *buf, size_t size) +{ + int rc; + + rc = fy_emit_str_internal(fyd, flags, NULL, &buf, &size, false); + if (rc != 0) + return -1; + return size; +} + +char *fy_emit_document_to_string(struct fy_document *fyd, enum fy_emitter_cfg_flags flags) +{ + char *buf; + size_t size; + int rc; + + buf = NULL; + size = 0; + rc = fy_emit_str_internal(fyd, flags, NULL, &buf, &size, true); + if (rc != 0) + return NULL; + return buf; +} + +struct fy_emitter * +fy_emit_to_buffer(enum fy_emitter_cfg_flags flags, char *buf, size_t size) +{ + if (!buf) + return NULL; + + return fy_emitter_create_str_internal(flags, &buf, &size, false); +} + +char * +fy_emit_to_buffer_collect(struct fy_emitter *emit, size_t *sizep) +{ + int rc; + char *buf; + + if (!emit || !sizep) + return NULL; + + rc = fy_emitter_collect_str_internal(emit, &buf, sizep); + if (rc) { + *sizep = 0; + return NULL; + } + return buf; +} + +struct fy_emitter * +fy_emit_to_string(enum fy_emitter_cfg_flags flags) +{ + return fy_emitter_create_str_internal(flags, NULL, NULL, true); +} + +char * +fy_emit_to_string_collect(struct fy_emitter *emit, size_t *sizep) +{ + int rc; + char *buf; + + if (!emit || !sizep) + return NULL; + + rc = fy_emitter_collect_str_internal(emit, &buf, sizep); + if (rc) { + *sizep = 0; + return NULL; + } + return buf; +} + +static int do_file_output(struct fy_emitter *emit, enum fy_emitter_write_type type, const char *str, int leni, void *userdata) +{ + FILE *fp = userdata; + size_t len; + + len = (size_t)leni; + + /* no funky stuff */ + if (len < 0) + return -1; + + return fwrite(str, 1, len, fp); +} + +int fy_emit_document_to_fp(struct fy_document *fyd, enum fy_emitter_cfg_flags flags, + FILE *fp) +{ + struct fy_emitter emit_state, *emit = &emit_state; + struct fy_emitter_cfg emit_cfg; + int rc; + + if (!fp) + return -1; + + memset(&emit_cfg, 0, sizeof(emit_cfg)); + emit_cfg.output = do_file_output; + emit_cfg.userdata = fp; + emit_cfg.flags = flags; + fy_emit_setup(emit, &emit_cfg); + + fy_emit_prepare_document_state(emit, fyd->fyds); + + rc = 0; + if (fyd->root) + rc = fy_emit_node_check(emit, fyd->root); + + rc = fy_emit_document_no_check(emit, fyd); + + fy_emit_cleanup(emit); + + return rc ? rc : 0; +} + +int fy_emit_document_to_file(struct fy_document *fyd, + enum fy_emitter_cfg_flags flags, + const char *filename) +{ + FILE *fp; + int rc; + + fp = filename ? fopen(filename, "wa") : stdout; + if (!fp) + return -1; + + rc = fy_emit_document_to_fp(fyd, flags, fp); + + if (fp != stdout) + fclose(fp); + + return rc ? rc : 0; +} + +static int do_fd_output(struct fy_emitter *emit, enum fy_emitter_write_type type, const char *str, int leni, void *userdata) +{ + size_t len; + int fd; + ssize_t wrn; + int total; + + len = (size_t)leni; + + /* no funky stuff */ + if (len < 0) + return -1; + + /* get the file descriptor */ + fd = (int)(uintptr_t)userdata; + if (fd < 0) + return -1; + + /* loop output to fd */ + total = 0; + while (len > 0) { + + do { + wrn = write(fd, str, len); + } while (wrn == -1 && errno == EAGAIN); + + if (wrn == -1) + return -1; + + if (wrn == 0) + return total; + + len -= wrn; + str += wrn; + total += wrn; + } + + return total; +} + +int fy_emit_document_to_fd(struct fy_document *fyd, enum fy_emitter_cfg_flags flags, int fd) +{ + struct fy_emitter emit_state, *emit = &emit_state; + struct fy_emitter_cfg emit_cfg; + int rc; + + if (fd < 0) + return -1; + + memset(&emit_cfg, 0, sizeof(emit_cfg)); + emit_cfg.output = do_fd_output; + emit_cfg.userdata = (void *)(uintptr_t)fd; + emit_cfg.flags = flags; + fy_emit_setup(emit, &emit_cfg); + + fy_emit_prepare_document_state(emit, fyd->fyds); + + rc = 0; + if (fyd->root) + rc = fy_emit_node_check(emit, fyd->root); + + rc = fy_emit_document_no_check(emit, fyd); + + fy_emit_cleanup(emit); + + return rc ? rc : 0; +} + +int fy_emit_node_to_buffer(struct fy_node *fyn, enum fy_emitter_cfg_flags flags, char *buf, size_t size) +{ + int rc; + + rc = fy_emit_str_internal(NULL, flags, fyn, &buf, &size, false); + if (rc != 0) + return -1; + return size; +} + +char *fy_emit_node_to_string(struct fy_node *fyn, enum fy_emitter_cfg_flags flags) +{ + char *buf; + size_t size; + int rc; + + buf = NULL; + size = 0; + rc = fy_emit_str_internal(NULL, flags, fyn, &buf, &size, true); + if (rc != 0) + return NULL; + return buf; +} + +static bool fy_emit_ready(struct fy_emitter *emit) +{ + struct fy_eventp *fyep; + int need, count, level; + + /* no events in the list, not ready */ + fyep = fy_eventp_list_head(&emit->queued_events); + if (!fyep) + return false; + + /* some events need more than one */ + switch (fyep->e.type) { + case FYET_DOCUMENT_START: + need = 1; + break; + case FYET_SEQUENCE_START: + need = 2; + break; + case FYET_MAPPING_START: + need = 3; + break; + default: + need = 0; + break; + } + + /* if we don't need any more, that's enough */ + if (!need) + return true; + + level = 0; + count = 0; + for (; fyep; fyep = fy_eventp_next(&emit->queued_events, fyep)) { + count++; + + if (count > need) + return true; + + switch (fyep->e.type) { + case FYET_STREAM_START: + case FYET_DOCUMENT_START: + case FYET_SEQUENCE_START: + case FYET_MAPPING_START: + level++; + break; + case FYET_STREAM_END: + case FYET_DOCUMENT_END: + case FYET_SEQUENCE_END: + case FYET_MAPPING_END: + level--; + break; + default: + break; + } + + if (!level) + return true; + } + + return false; +} + +extern const char *fy_event_type_txt[]; + +const char *fy_emitter_state_txt[] = { + [FYES_NONE] = "NONE", + [FYES_STREAM_START] = "STREAM_START", + [FYES_FIRST_DOCUMENT_START] = "FIRST_DOCUMENT_START", + [FYES_DOCUMENT_START] = "DOCUMENT_START", + [FYES_DOCUMENT_CONTENT] = "DOCUMENT_CONTENT", + [FYES_DOCUMENT_END] = "DOCUMENT_END", + [FYES_SEQUENCE_FIRST_ITEM] = "SEQUENCE_FIRST_ITEM", + [FYES_SEQUENCE_ITEM] = "SEQUENCE_ITEM", + [FYES_MAPPING_FIRST_KEY] = "MAPPING_FIRST_KEY", + [FYES_MAPPING_KEY] = "MAPPING_KEY", + [FYES_MAPPING_SIMPLE_VALUE] = "MAPPING_SIMPLE_VALUE", + [FYES_MAPPING_VALUE] = "MAPPING_VALUE", + [FYES_END] = "END", +}; + +struct fy_eventp * +fy_emit_next_event(struct fy_emitter *emit) +{ + if (!fy_emit_ready(emit)) + return NULL; + + return fy_eventp_list_pop(&emit->queued_events); +} + +struct fy_eventp * +fy_emit_peek_next_event(struct fy_emitter *emit) +{ + if (!fy_emit_ready(emit)) + return NULL; + + return fy_eventp_list_head(&emit->queued_events); +} + +bool fy_emit_streaming_sequence_empty(struct fy_emitter *emit) +{ + struct fy_eventp *fyepn; + struct fy_event *fyen; + + fyepn = fy_emit_peek_next_event(emit); + fyen = fyepn ? &fyepn->e : NULL; + + return !fyen || fyen->type == FYET_SEQUENCE_END; +} + +bool fy_emit_streaming_mapping_empty(struct fy_emitter *emit) +{ + struct fy_eventp *fyepn; + struct fy_event *fyen; + + fyepn = fy_emit_peek_next_event(emit); + fyen = fyepn ? &fyepn->e : NULL; + + return !fyen || fyen->type == FYET_MAPPING_END; +} + +static void fy_emit_goto_state(struct fy_emitter *emit, enum fy_emitter_state state) +{ + if (emit->state == state) + return; + + emit->state = state; +} + +static int fy_emit_push_state(struct fy_emitter *emit, enum fy_emitter_state state) +{ + enum fy_emitter_state *states; + + if (emit->state_stack_top >= emit->state_stack_alloc) { + states = realloc(emit->state_stack == emit->state_stack_inplace ? NULL : emit->state_stack, + sizeof(emit->state_stack[0]) * emit->state_stack_alloc * 2); + if (!states) + return -1; + + if (emit->state_stack == emit->state_stack_inplace) + memcpy(states, emit->state_stack, sizeof(emit->state_stack[0]) * emit->state_stack_top); + emit->state_stack = states; + emit->state_stack_alloc *= 2; + } + emit->state_stack[emit->state_stack_top++] = state; + + return 0; +} + +enum fy_emitter_state fy_emit_pop_state(struct fy_emitter *emit) +{ + if (!emit->state_stack_top) + return FYES_NONE; + + return emit->state_stack[--emit->state_stack_top]; +} + +int fy_emit_push_sc(struct fy_emitter *emit, struct fy_emit_save_ctx *sc) +{ + struct fy_emit_save_ctx *scs; + + if (emit->sc_stack_top >= emit->sc_stack_alloc) { + scs = realloc(emit->sc_stack == emit->sc_stack_inplace ? NULL : emit->sc_stack, + sizeof(emit->sc_stack[0]) * emit->sc_stack_alloc * 2); + if (!scs) + return -1; + + if (emit->sc_stack == emit->sc_stack_inplace) + memcpy(scs, emit->sc_stack, sizeof(emit->sc_stack[0]) * emit->sc_stack_top); + emit->sc_stack = scs; + emit->sc_stack_alloc *= 2; + } + emit->sc_stack[emit->sc_stack_top++] = *sc; + + return 0; +} + +int fy_emit_pop_sc(struct fy_emitter *emit, struct fy_emit_save_ctx *sc) +{ + if (!emit->sc_stack_top) + return -1; + + *sc = emit->sc_stack[--emit->sc_stack_top]; + + return 0; +} + +static int fy_emit_streaming_node(struct fy_emitter *emit, struct fy_eventp *fyep, int flags) +{ + struct fy_event *fye = &fyep->e; + struct fy_emit_save_ctx *sc = &emit->s_sc; + enum fy_node_style style; + int ret, s_flags, s_indent; + + if (fye->type != FYET_ALIAS && fye->type != FYET_SCALAR && + (emit->s_flags & DDNF_ROOT) && emit->column != 0) { + fy_emit_putc(emit, fyewt_linebreak, '\n'); + emit->flags = FYEF_WHITESPACE | FYEF_INDENTATION; + } + + emit->s_flags = flags; + + switch (fye->type) { + case FYET_ALIAS: + fy_emit_token_write_alias(emit, fye->alias.anchor, emit->s_flags, emit->s_indent); + fy_emit_goto_state(emit, fy_emit_pop_state(emit)); + break; + + case FYET_SCALAR: + /* if we're pretty and at column 0 (meaning it's a single scalar document) output --- */ + if ((emit->s_flags & DDNF_ROOT) && fy_emit_is_pretty_mode(emit) && !emit->column && + !fy_emit_is_flow_mode(emit) && !(emit->s_flags & DDNF_FLOW)) + fy_emit_document_start_indicator(emit); + fy_emit_common_node_preamble(emit, fye->scalar.anchor, fye->scalar.tag, emit->s_flags, emit->s_indent); + style = fye->scalar.value ? + fy_node_style_from_scalar_style(fye->scalar.value->scalar.style) : + FYNS_PLAIN; + fy_emit_token_scalar(emit, fye->scalar.value, emit->s_flags, emit->s_indent, style, fye->scalar.tag); + fy_emit_goto_state(emit, fy_emit_pop_state(emit)); + break; + + case FYET_SEQUENCE_START: + + /* save this context */ + ret = fy_emit_push_sc(emit, sc); + if (ret) + return ret; + + s_flags = emit->s_flags; + s_indent = emit->s_indent; + + fy_emit_common_node_preamble(emit, fye->sequence_start.anchor, fye->sequence_start.tag, emit->s_flags, emit->s_indent); + + /* create new context */ + memset(sc, 0, sizeof(*sc)); + sc->flags = emit->s_flags & (DDNF_ROOT | DDNF_SEQ | DDNF_MAP); + sc->indent = emit->s_indent; + sc->empty = fy_emit_streaming_sequence_empty(emit); + sc->flow_token = fye->sequence_start.sequence_start && + fye->sequence_start.sequence_start->type == FYTT_FLOW_SEQUENCE_START; + sc->flow = !!(s_flags & DDNF_FLOW); + sc->old_indent = sc->indent; + sc->s_flags = s_flags; + sc->s_indent = s_indent; + sc->s_flags = s_flags; + sc->s_indent = s_indent; + + fy_emit_sequence_prolog(emit, sc); + sc->flags &= ~DDNF_MAP; + sc->flags |= DDNF_SEQ; + + emit->s_flags = sc->flags; + emit->s_indent = sc->indent; + + fy_emit_goto_state(emit, FYES_SEQUENCE_FIRST_ITEM); + break; + + case FYET_MAPPING_START: + /* save this context */ + ret = fy_emit_push_sc(emit, sc); + if (ret) + return ret; + + s_flags = emit->s_flags; + s_indent = emit->s_indent; + + fy_emit_common_node_preamble(emit, fye->mapping_start.anchor, fye->mapping_start.tag, emit->s_flags, emit->s_indent); + + /* create new context */ + memset(sc, 0, sizeof(*sc)); + sc->flags = emit->s_flags & (DDNF_ROOT | DDNF_SEQ | DDNF_MAP); + sc->indent = emit->s_indent; + sc->empty = fy_emit_streaming_mapping_empty(emit); + sc->flow_token = fye->mapping_start.mapping_start && + fye->mapping_start.mapping_start->type == FYTT_FLOW_MAPPING_START; + sc->flow = !!(s_flags & DDNF_FLOW); + sc->old_indent = sc->indent; + sc->s_flags = s_flags; + sc->s_indent = s_indent; + sc->s_flags = s_flags; + sc->s_indent = s_indent; + + fy_emit_mapping_prolog(emit, sc); + sc->flags &= ~DDNF_SEQ; + sc->flags |= DDNF_MAP; + + emit->s_flags = sc->flags; + emit->s_indent = sc->indent; + + fy_emit_goto_state(emit, FYES_MAPPING_FIRST_KEY); + break; + + default: + fy_error(emit->diag, "%s: expected ALIAS|SCALAR|SEQUENCE_START|MAPPING_START", __func__); + return -1; + } + + return 0; +} + +static int fy_emit_handle_stream_start(struct fy_emitter *emit, struct fy_eventp *fyep) +{ + struct fy_event *fye = &fyep->e; + + if (fye->type != FYET_STREAM_START) { + fy_error(emit->diag, "%s: expected FYET_STREAM_START", __func__); + return -1; + } + + fy_emit_reset(emit, false); + + fy_emit_goto_state(emit, FYES_FIRST_DOCUMENT_START); + + return 0; +} + +static int fy_emit_handle_document_start(struct fy_emitter *emit, struct fy_eventp *fyep, bool first) +{ + struct fy_event *fye = &fyep->e; + struct fy_document_state *fyds; + + if (fye->type != FYET_DOCUMENT_START && + fye->type != FYET_STREAM_END) { + fy_error(emit->diag, "%s: expected FYET_DOCUMENT_START|FYET_STREAM_END", __func__); + return -1; + } + + if (fye->type == FYET_STREAM_END) { + fy_emit_goto_state(emit, FYES_END); + return 0; + } + + /* transfer ownership to the emitter */ + fyds = fye->document_start.document_state; + fye->document_start.document_state = NULL; + + /* prepare (i.e. adapt to the document state) */ + fy_emit_prepare_document_state(emit, fyds); + + fy_emit_common_document_start(emit, fyds, false); + + fy_emit_goto_state(emit, FYES_DOCUMENT_CONTENT); + + return 0; +} + +static int fy_emit_handle_document_end(struct fy_emitter *emit, struct fy_eventp *fyep) +{ + struct fy_document_state *fyds; + struct fy_event *fye = &fyep->e; + int ret; + + if (fye->type != FYET_DOCUMENT_END) { + fy_error(emit->diag, "%s: expected FYET_DOCUMENT_END", __func__); + return -1; + } + + fyds = emit->fyds; + + ret = fy_emit_common_document_end(emit, true, fye->document_end.implicit); + if (ret) + return ret; + + fy_document_state_unref(fyds); + + fy_emit_reset(emit, false); + fy_emit_goto_state(emit, FYES_DOCUMENT_START); + return 0; +} + +static int fy_emit_handle_document_content(struct fy_emitter *emit, struct fy_eventp *fyep) +{ + struct fy_event *fye = &fyep->e; + int ret; + + /* empty document? */ + if (fye->type == FYET_DOCUMENT_END) + return fy_emit_handle_document_end(emit, fyep); + + ret = fy_emit_push_state(emit, FYES_DOCUMENT_END); + if (ret) + return ret; + + return fy_emit_streaming_node(emit, fyep, DDNF_ROOT); +} + +static int fy_emit_handle_sequence_item(struct fy_emitter *emit, struct fy_eventp *fyep, bool first) +{ + struct fy_event *fye = &fyep->e; + struct fy_emit_save_ctx *sc = &emit->s_sc; + struct fy_token *fyt_item = NULL; + int ret; + + fy_token_unref_rl(emit->recycled_token_list, sc->fyt_last_value); + sc->fyt_last_value = NULL; + + switch (fye->type) { + case FYET_SEQUENCE_END: + fy_emit_sequence_item_epilog(emit, sc, true, sc->fyt_last_value); + + /* emit epilog */ + fy_emit_sequence_epilog(emit, sc); + /* pop state */ + ret = fy_emit_pop_sc(emit, sc); + /* pop state */ + fy_emit_goto_state(emit, fy_emit_pop_state(emit)); + + /* restore indent and flags */ + emit->s_indent = sc->s_indent; + emit->s_flags = sc->s_flags; + return ret; + + case FYET_ALIAS: + fyt_item = fye->alias.anchor; + break; + case FYET_SCALAR: + fyt_item = fye->scalar.value; + break; + case FYET_SEQUENCE_START: + fyt_item = fye->sequence_start.sequence_start; + break; + case FYET_MAPPING_START: + fyt_item = fye->mapping_start.mapping_start; + break; + default: + fy_error(emit->diag, "%s: expected SEQUENCE_END|ALIAS|SCALAR|SEQUENCE_START|MAPPING_START", __func__); + return -1; + } + + ret = fy_emit_push_state(emit, FYES_SEQUENCE_ITEM); + if (ret) + return ret; + + /* reset indent and flags for each item */ + emit->s_indent = sc->indent; + emit->s_flags = sc->flags; + + if (!first) + fy_emit_sequence_item_epilog(emit, sc, false, sc->fyt_last_value); + + sc->fyt_last_value = fyt_item; + + fy_emit_sequence_item_prolog(emit, sc, fyt_item); + + ret = fy_emit_streaming_node(emit, fyep, sc->flags); + + switch (fye->type) { + case FYET_ALIAS: + fye->alias.anchor = NULL; /* take ownership */ + break; + case FYET_SCALAR: + fye->scalar.value = NULL; /* take ownership */ + break; + case FYET_SEQUENCE_START: + fye->sequence_start.sequence_start = NULL; /* take ownership */ + break; + case FYET_MAPPING_START: + fye->mapping_start.mapping_start = NULL; /* take ownership */ + break; + default: + break; + } + + return ret; +} + +static int fy_emit_handle_mapping_key(struct fy_emitter *emit, struct fy_eventp *fyep, bool first) +{ + struct fy_event *fye = &fyep->e; + struct fy_emit_save_ctx *sc = &emit->s_sc; + struct fy_token *fyt_key = NULL; + int ret, aflags; + bool simple_key; + + fy_token_unref_rl(emit->recycled_token_list, sc->fyt_last_key); + sc->fyt_last_key = NULL; + fy_token_unref_rl(emit->recycled_token_list, sc->fyt_last_value); + sc->fyt_last_value = NULL; + + simple_key = false; + + switch (fye->type) { + case FYET_MAPPING_END: + fy_emit_mapping_value_epilog(emit, sc, true, sc->fyt_last_value); + + /* emit epilog */ + fy_emit_mapping_epilog(emit, sc); + /* pop state */ + ret = fy_emit_pop_sc(emit, sc); + /* pop state */ + fy_emit_goto_state(emit, fy_emit_pop_state(emit)); + + /* restore indent and flags */ + emit->s_indent = sc->s_indent; + emit->s_flags = sc->s_flags; + return ret; + + case FYET_ALIAS: + fyt_key = fye->alias.anchor; + simple_key = true; + break; + case FYET_SCALAR: + fyt_key = fye->scalar.value; + aflags = fy_token_text_analyze(fyt_key); + simple_key = !!(aflags & FYTTAF_CAN_BE_SIMPLE_KEY); + break; + case FYET_SEQUENCE_START: + fyt_key = fye->sequence_start.sequence_start; + simple_key = fy_emit_streaming_sequence_empty(emit); + break; + case FYET_MAPPING_START: + fyt_key = fye->mapping_start.mapping_start; + simple_key = fy_emit_streaming_mapping_empty(emit); + break; + default: + fy_error(emit->diag, "%s: expected MAPPING_END|ALIAS|SCALAR|SEQUENCE_START|MAPPING_START", __func__); + return -1; + } + + ret = fy_emit_push_state(emit, FYES_MAPPING_VALUE); + if (ret) + return ret; + + /* reset indent and flags for each key/value pair */ + emit->s_indent = sc->indent; + emit->s_flags = sc->flags; + + if (!first) + fy_emit_mapping_value_epilog(emit, sc, false, sc->fyt_last_value); + + sc->fyt_last_key = fyt_key; + + fy_emit_mapping_key_prolog(emit, sc, fyt_key, simple_key); + + ret = fy_emit_streaming_node(emit, fyep, sc->flags); + + switch (fye->type) { + case FYET_ALIAS: + fye->alias.anchor = NULL; /* take ownership */ + break; + case FYET_SCALAR: + fye->scalar.value = NULL; /* take ownership */ + break; + case FYET_SEQUENCE_START: + fye->sequence_start.sequence_start = NULL; /* take ownership */ + break; + case FYET_MAPPING_START: + fye->mapping_start.mapping_start = NULL; /* take ownership */ + break; + default: + break; + } + + return ret; +} + +static int fy_emit_handle_mapping_value(struct fy_emitter *emit, struct fy_eventp *fyep, bool simple) +{ + struct fy_event *fye = &fyep->e; + struct fy_emit_save_ctx *sc = &emit->s_sc; + struct fy_token *fyt_value = NULL; + int ret; + + switch (fye->type) { + case FYET_ALIAS: + fyt_value = fye->alias.anchor; + break; + case FYET_SCALAR: + fyt_value = fye->scalar.value; /* take ownership */ + break; + case FYET_SEQUENCE_START: + fyt_value = fye->sequence_start.sequence_start; + break; + case FYET_MAPPING_START: + fyt_value = fye->mapping_start.mapping_start; + break; + default: + fy_error(emit->diag, "%s: expected ALIAS|SCALAR|SEQUENCE_START|MAPPING_START", __func__); + return -1; + } + + ret = fy_emit_push_state(emit, FYES_MAPPING_KEY); + if (ret) + return ret; + + fy_emit_mapping_key_epilog(emit, sc, sc->fyt_last_key); + + sc->fyt_last_value = fyt_value; + + fy_emit_mapping_value_prolog(emit, sc, fyt_value); + + ret = fy_emit_streaming_node(emit, fyep, sc->flags); + + switch (fye->type) { + case FYET_ALIAS: + fye->alias.anchor = NULL; /* take ownership */ + break; + case FYET_SCALAR: + fye->scalar.value = NULL; /* take ownership */ + break; + case FYET_SEQUENCE_START: + fye->sequence_start.sequence_start = NULL; /* take ownership */ + break; + case FYET_MAPPING_START: + fye->mapping_start.mapping_start = NULL; /* take ownership */ + break; + default: + break; + } + + return ret; +} + +int fy_emit_event_from_parser(struct fy_emitter *emit, struct fy_parser *fyp, struct fy_event *fye) +{ + struct fy_eventp *fyep; + int ret; + + if (!emit || !fye) + return -1; + + /* we're using the raw emitter interface, now mark first state */ + if (emit->state == FYES_NONE) + emit->state = FYES_STREAM_START; + + fyep = fy_container_of(fye, struct fy_eventp, e); + + fy_eventp_list_add_tail(&emit->queued_events, fyep); + + ret = 0; + while ((fyep = fy_emit_next_event(emit)) != NULL) { + + switch (emit->state) { + case FYES_STREAM_START: + ret = fy_emit_handle_stream_start(emit, fyep); + break; + + case FYES_FIRST_DOCUMENT_START: + case FYES_DOCUMENT_START: + ret = fy_emit_handle_document_start(emit, fyep, + emit->state == FYES_FIRST_DOCUMENT_START); + break; + + case FYES_DOCUMENT_CONTENT: + ret = fy_emit_handle_document_content(emit, fyep); + break; + + case FYES_DOCUMENT_END: + ret = fy_emit_handle_document_end(emit, fyep); + break; + + case FYES_SEQUENCE_FIRST_ITEM: + case FYES_SEQUENCE_ITEM: + ret = fy_emit_handle_sequence_item(emit, fyep, + emit->state == FYES_SEQUENCE_FIRST_ITEM); + break; + + case FYES_MAPPING_FIRST_KEY: + case FYES_MAPPING_KEY: + ret = fy_emit_handle_mapping_key(emit, fyep, + emit->state == FYES_MAPPING_FIRST_KEY); + break; + + case FYES_MAPPING_SIMPLE_VALUE: + case FYES_MAPPING_VALUE: + ret = fy_emit_handle_mapping_value(emit, fyep, + emit->state == FYES_MAPPING_SIMPLE_VALUE); + break; + + case FYES_END: + ret = -1; + break; + + default: + assert(1); /* Invalid state. */ + } + + /* always release the event */ + if (!fyp) + fy_eventp_release(fyep); + else + fy_parse_eventp_recycle(fyp, fyep); + + if (ret) + break; + } + + return ret; +} + +int fy_emit_event(struct fy_emitter *emit, struct fy_event *fye) +{ + return fy_emit_event_from_parser(emit, NULL, fye); +} + +struct fy_document_state * +fy_emitter_get_document_state(struct fy_emitter *emit) +{ + return emit ? emit->fyds : NULL; +} + +int fy_emitter_default_output(struct fy_emitter *fye, enum fy_emitter_write_type type, const char *str, int len, void *userdata) +{ + struct fy_emitter_default_output_data d_local, *d; + FILE *fp; + int ret, w; + const char *color = NULL; + const char *s, *e; + + d = userdata; + if (!d) { + /* kinda inneficient but should not matter */ + d = &d_local; + d->fp = stdout; + d->colorize = isatty(STDOUT_FILENO); + d->visible = false; + } + fp = d->fp; + + s = str; + e = str + len; + if (d->colorize) { + switch (type) { + case fyewt_document_indicator: + color = "\x1b[36m"; + break; + case fyewt_tag_directive: + case fyewt_version_directive: + color = "\x1b[33m"; + break; + case fyewt_indent: + if (d->visible) { + fputs("\x1b[32m", fp); + while (s < e && (w = fy_utf8_width_by_first_octet(((uint8_t)*s))) > 0) { + /* open box - U+2423 */ + fputs("\xe2\x90\xa3", fp); + s += w; + } + fputs("\x1b[0m", fp); + return len; + } + break; + case fyewt_indicator: + if (len == 1 && (str[0] == '\'' || str[0] == '"')) + color = "\x1b[33m"; + else if (len == 1 && str[0] == '&') + color = "\x1b[32;1m"; + else + color = "\x1b[35m"; + break; + case fyewt_whitespace: + if (d->visible) { + fputs("\x1b[32m", fp); + while (s < e && (w = fy_utf8_width_by_first_octet(((uint8_t)*s))) > 0) { + /* symbol for space - U+2420 */ + /* symbol for interpunct - U+00B7 */ + fputs("\xc2\xb7", fp); + s += w; + } + fputs("\x1b[0m", fp); + return len; + } + break; + case fyewt_plain_scalar: + color = "\x1b[37;1m"; + break; + case fyewt_single_quoted_scalar: + case fyewt_double_quoted_scalar: + color = "\x1b[33m"; + break; + case fyewt_literal_scalar: + case fyewt_folded_scalar: + color = "\x1b[33m"; + break; + case fyewt_anchor: + case fyewt_tag: + case fyewt_alias: + color = "\x1b[32;1m"; + break; + case fyewt_linebreak: + if (d->visible) { + fputs("\x1b[32m", fp); + while (s < e && (w = fy_utf8_width_by_first_octet(((uint8_t)*s))) > 0) { + /* symbol for space - ^M */ + /* fprintf(fp, "^M\n"); */ + /* down arrow - U+2193 */ + fputs("\xe2\x86\x93\n", fp); + s += w; + } + fputs("\x1b[0m", fp); + return len; + } + color = NULL; + break; + case fyewt_terminating_zero: + color = NULL; + break; + case fyewt_plain_scalar_key: + case fyewt_single_quoted_scalar_key: + case fyewt_double_quoted_scalar_key: + color = "\x1b[36;1m"; + break; + case fyewt_comment: + color = "\x1b[34;1m"; + break; + } + } + + /* don't output the terminating zero */ + if (type == fyewt_terminating_zero) + return len; + + if (color) + fputs(color, fp); + + ret = fwrite(str, 1, len, fp); + + if (color) + fputs("\x1b[0m", fp); + + return ret; +} + +int fy_document_default_emit_to_fp(struct fy_document *fyd, FILE *fp) +{ + struct fy_emitter emit_local, *emit = &emit_local; + struct fy_emitter_cfg ecfg_local, *ecfg = &ecfg_local; + struct fy_emitter_default_output_data d_local, *d = &d_local; + int rc; + + memset(d, 0, sizeof(*d)); + d->fp = fp; + d->colorize = isatty(fileno(fp)); + d->visible = false; + + memset(ecfg, 0, sizeof(*ecfg)); + ecfg->diag = fyd->diag; + ecfg->userdata = d; + + rc = fy_emit_setup(emit, ecfg); + if (rc) + goto err_setup; + + fy_emit_prepare_document_state(emit, fyd->fyds); + + rc = 0; + if (fyd->root) + rc = fy_emit_node_check(emit, fyd->root); + + rc = fy_emit_document_no_check(emit, fyd); + if (rc) + goto err_emit; + + fy_emit_cleanup(emit); + + return 0; + +err_emit: + fy_emit_cleanup(emit); +err_setup: + return -1; +} diff --git a/contrib/libs/libfyaml/src/lib/fy-emit.h b/contrib/libs/libfyaml/src/lib/fy-emit.h new file mode 100644 index 0000000000..3bbd8b5f1c --- /dev/null +++ b/contrib/libs/libfyaml/src/lib/fy-emit.h @@ -0,0 +1,151 @@ +/* + * fy-emit.h - internal YAML emitter header + * + * Copyright (c) 2019 Pantelis Antoniou <pantelis.antoniou@konsulko.com> + * + * SPDX-License-Identifier: MIT + */ +#ifndef FY_EMIT_H +#define FY_EMIT_H + +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif + +#include <stdint.h> +#include <stdbool.h> +#include <stdio.h> + +#include <libfyaml.h> + +#include "fy-utf8.h" +#include "fy-event.h" +#include "fy-emit-accum.h" + +#define FYEF_WHITESPACE 0x0001 +#define FYEF_INDENTATION 0x0002 +#define FYEF_OPEN_ENDED 0x0004 +#define FYEF_HAD_DOCUMENT_START 0x0008 +#define FYEF_HAD_DOCUMENT_END 0x0010 +#define FYEF_HAD_DOCUMENT_OUTPUT 0x0020 + +struct fy_document; +struct fy_emitter; +struct fy_document_state; + +enum fy_emitter_state { + FYES_NONE, /* when not using the raw emitter interface */ + FYES_STREAM_START, + FYES_FIRST_DOCUMENT_START, + FYES_DOCUMENT_START, + FYES_DOCUMENT_CONTENT, + FYES_DOCUMENT_END, + FYES_SEQUENCE_FIRST_ITEM, + FYES_SEQUENCE_ITEM, + FYES_MAPPING_FIRST_KEY, + FYES_MAPPING_KEY, + FYES_MAPPING_SIMPLE_VALUE, + FYES_MAPPING_VALUE, + FYES_END, +}; + +struct fy_emit_save_ctx { + bool flow_token : 1; + bool flow : 1; + bool empty : 1; + int old_indent; + int flags; + int indent; + struct fy_token *fyt_last_key; + struct fy_token *fyt_last_value; + int s_flags; + int s_indent; +}; + +/* internal flags */ +#define DDNF_ROOT 0x0001 +#define DDNF_SEQ 0x0002 +#define DDNF_MAP 0x0004 +#define DDNF_SIMPLE 0x0008 +#define DDNF_FLOW 0x0010 +#define DDNF_INDENTLESS 0x0020 +#define DDNF_SIMPLE_SCALAR_KEY 0x0040 + +struct fy_emitter { + int line; + int column; + int flow_level; + unsigned int flags; + bool output_error : 1; + bool source_json : 1; /* the source was json */ + bool force_json : 1; /* force JSON mode unconditionally */ + bool suppress_recycling_force : 1; + bool suppress_recycling : 1; + + /* current document */ + struct fy_emitter_cfg cfg; /* yeah, it isn't worth just to save a few bytes */ + struct fy_document *fyd; + struct fy_document_state *fyds; /* fyd->fyds when fyd != NULL */ + struct fy_emit_accum ea; + char ea_inplace_buf[256]; /* the in place accumulator buffer before allocating */ + struct fy_diag *diag; + + /* streaming event mode */ + enum fy_emitter_state state; + enum fy_emitter_state *state_stack; + unsigned int state_stack_alloc; + unsigned int state_stack_top; + enum fy_emitter_state state_stack_inplace[64]; + struct fy_eventp_list queued_events; + int s_indent; + int s_flags; + struct fy_emit_save_ctx s_sc; + struct fy_emit_save_ctx *sc_stack; + unsigned int sc_stack_alloc; + unsigned int sc_stack_top; + struct fy_emit_save_ctx sc_stack_inplace[16]; + + /* recycled */ + struct fy_eventp_list recycled_eventp; + struct fy_token_list recycled_token; + + struct fy_eventp_list *recycled_eventp_list; /* NULL when suppressing */ + struct fy_token_list *recycled_token_list; /* NULL when suppressing */ + + /* for special needs */ + void (*finalizer)(struct fy_emitter *emit); +}; + +int fy_emit_setup(struct fy_emitter *emit, const struct fy_emitter_cfg *cfg); +void fy_emit_cleanup(struct fy_emitter *emit); + +void fy_emit_write(struct fy_emitter *emit, enum fy_emitter_write_type type, const char *str, int len); + +static inline bool fy_emit_whitespace(struct fy_emitter *emit) +{ + return !!(emit->flags & FYEF_WHITESPACE); +} + +static inline bool fy_emit_indentation(struct fy_emitter *emit) +{ + return !!(emit->flags & FYEF_INDENTATION); +} + +static inline bool fy_emit_open_ended(struct fy_emitter *emit) +{ + return !!(emit->flags & FYEF_OPEN_ENDED); +} + +static inline void +fy_emit_output_accum(struct fy_emitter *emit, enum fy_emitter_write_type type, struct fy_emit_accum *ea) +{ + const char *text; + size_t len; + + text = fy_emit_accum_get(ea, &len); + if (text && len > 0) + fy_emit_write(emit, type, text, len); + fy_emit_accum_reset(ea); +} + +#endif diff --git a/contrib/libs/libfyaml/src/lib/fy-event.c b/contrib/libs/libfyaml/src/lib/fy-event.c new file mode 100644 index 0000000000..0e049b475e --- /dev/null +++ b/contrib/libs/libfyaml/src/lib/fy-event.c @@ -0,0 +1,914 @@ +/* + * fy-event.c - YAML event methods + * + * Copyright (c) 2021 Pantelis Antoniou <pantelis.antoniou@konsulko.com> + * + * SPDX-License-Identifier: MIT + */ +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif + +#include <stdio.h> +#include <string.h> +#include <sys/types.h> +#include <sys/stat.h> +#include <assert.h> +#include <stdlib.h> +#include <errno.h> +#include <stdarg.h> + +#include <libfyaml.h> + +#include "fy-parse.h" +#include "fy-emit.h" +#include "fy-doc.h" + +#include "fy-ctype.h" +#include "fy-utf8.h" +#include "fy-utils.h" + +#include "fy-event.h" + +struct fy_eventp *fy_eventp_alloc(void) +{ + struct fy_eventp *fyep; + + fyep = malloc(sizeof(*fyep)); + if (!fyep) + return NULL; + + fyep->e.type = FYET_NONE; + + return fyep; +} + +void fy_eventp_clean_rl(struct fy_token_list *fytl, struct fy_eventp *fyep) +{ + struct fy_event *fye; + + if (!fyep) + return; + + fye = &fyep->e; + switch (fye->type) { + case FYET_NONE: + break; + case FYET_STREAM_START: + fy_token_unref_rl(fytl, fye->stream_start.stream_start); + break; + case FYET_STREAM_END: + fy_token_unref_rl(fytl, fye->stream_end.stream_end); + break; + case FYET_DOCUMENT_START: + fy_token_unref_rl(fytl, fye->document_start.document_start); + fy_document_state_unref(fye->document_start.document_state); + break; + case FYET_DOCUMENT_END: + fy_token_unref_rl(fytl, fye->document_end.document_end); + break; + case FYET_MAPPING_START: + fy_token_unref_rl(fytl, fye->mapping_start.anchor); + fy_token_unref_rl(fytl, fye->mapping_start.tag); + fy_token_unref_rl(fytl, fye->mapping_start.mapping_start); + break; + case FYET_MAPPING_END: + fy_token_unref_rl(fytl, fye->mapping_end.mapping_end); + break; + case FYET_SEQUENCE_START: + fy_token_unref_rl(fytl, fye->sequence_start.anchor); + fy_token_unref_rl(fytl, fye->sequence_start.tag); + fy_token_unref_rl(fytl, fye->sequence_start.sequence_start); + break; + case FYET_SEQUENCE_END: + fy_token_unref_rl(fytl, fye->sequence_end.sequence_end); + break; + case FYET_SCALAR: + fy_token_unref_rl(fytl, fye->scalar.anchor); + fy_token_unref_rl(fytl, fye->scalar.tag); + fy_token_unref_rl(fytl, fye->scalar.value); + break; + case FYET_ALIAS: + fy_token_unref_rl(fytl, fye->alias.anchor); + break; + } + + fye->type = FYET_NONE; +} + +void fy_parse_eventp_clean(struct fy_parser *fyp, struct fy_eventp *fyep) +{ + if (!fyp || !fyep) + return; + + fy_eventp_clean_rl(fyp->recycled_token_list, fyep); +} + +void fy_emit_eventp_clean(struct fy_emitter *emit, struct fy_eventp *fyep) +{ + if (!emit || !fyep) + return; + + fy_eventp_clean_rl(emit->recycled_token_list, fyep); +} + +void fy_eventp_free(struct fy_eventp *fyep) +{ + if (!fyep) + return; + + /* clean, safe to do */ + fy_eventp_clean_rl(NULL, fyep); + + free(fyep); +} + +void fy_eventp_release(struct fy_eventp *fyep) +{ + fy_eventp_free(fyep); +} + +struct fy_eventp *fy_parse_eventp_alloc(struct fy_parser *fyp) +{ + struct fy_eventp *fyep = NULL; + + if (!fyp) + return NULL; + + if (fyp->recycled_eventp_list) + fyep = fy_eventp_list_pop(fyp->recycled_eventp_list); + if (!fyep) + fyep = fy_eventp_alloc(); + if (!fyep) + return NULL; + + fyep->e.type = FYET_NONE; + + return fyep; +} + +void fy_parse_eventp_recycle(struct fy_parser *fyp, struct fy_eventp *fyep) +{ + if (!fyp || !fyep) + return; + + /* clean, safe to do */ + fy_parse_eventp_clean(fyp, fyep); + + /* and push to the parser recycle list */ + if (fyp->recycled_eventp_list) + fy_eventp_list_push(fyp->recycled_eventp_list, fyep); + else + fy_eventp_free(fyep); +} + +void fy_parser_event_free(struct fy_parser *fyp, struct fy_event *fye) +{ + struct fy_eventp *fyep; + + if (!fyp || !fye) + return; + + fyep = fy_container_of(fye, struct fy_eventp, e); + + fy_parse_eventp_recycle(fyp, fyep); +} + +void fy_emit_eventp_recycle(struct fy_emitter *emit, struct fy_eventp *fyep) +{ + if (!emit || !fyep) + return; + + /* clean, safe to do */ + fy_emit_eventp_clean(emit, fyep); + + if (emit->recycled_eventp_list) + fy_eventp_list_push(emit->recycled_eventp_list, fyep); + else + fy_eventp_free(fyep); +} + +void fy_emit_event_free(struct fy_emitter *emit, struct fy_event *fye) +{ + struct fy_eventp *fyep; + + if (!emit || !fye) + return; + + fyep = fy_container_of(fye, struct fy_eventp, e); + + fy_emit_eventp_recycle(emit, fyep); +} + +struct fy_eventp * +fy_eventp_vcreate_internal(struct fy_eventp_list *recycled_list, struct fy_diag *diag, + struct fy_document_state *fyds, enum fy_event_type type, va_list ap) +{ + struct fy_eventp *fyep = NULL; + struct fy_event *fye = NULL; + struct fy_document_state *fyds_new = NULL; + const struct fy_version *vers; + const struct fy_tag *tag, * const *tagp; + struct fy_token *fyt; + enum fy_token_type ttype; + int rc, tag_count = 0; + enum fy_node_style style; + enum fy_scalar_style sstyle; + struct fy_token **fyt_anchorp = NULL, **fyt_tagp = NULL; + struct fy_input *fyi = NULL; + struct fy_atom handle; + const char *value; + size_t len; + char *data = NULL; + struct fy_tag_scan_info info; + struct fy_token *fyt_td; + + /* try the recycled list first */ + if (recycled_list) + fyep = fy_eventp_list_pop(recycled_list); + /* if not there yet, allocate a fresh one */ + if (!fyep) + fyep = fy_eventp_alloc(); + if (!fyep) + return NULL; + + fye = &fyep->e; + + fye->type = type; + + switch (type) { + case FYET_NONE: + break; + case FYET_STREAM_START: + fye->stream_start.stream_start = NULL; + break; + case FYET_STREAM_END: + fye->stream_end.stream_end = NULL; + break; + case FYET_DOCUMENT_START: + fye->document_start.document_start = NULL; + fyds_new = fy_document_state_default(fy_document_state_version(fyds), NULL); /* start with the default state */ + if (!fyds_new) { + fy_error(diag, "fy_document_state_alloc() failed\n"); + goto err_out; + } + fye->document_start.implicit = va_arg(ap, int); + vers = va_arg(ap, const struct fy_version *); + if (vers) { + fyds_new->version = *vers; + fyds_new->version_explicit = true; + } + fyds_new->start_implicit = fye->document_start.implicit; + fyds_new->end_implicit = false; /* this is not used right now */ + tag_count = 0; + tagp = va_arg(ap, const struct fy_tag * const *); + if (tagp) { + while ((tag = tagp[tag_count]) != NULL) { + tag_count++; + rc = fy_document_state_append_tag(fyds_new, tag->handle, tag->prefix, false); + if (rc) { + fy_error(diag, "fy_document_state_append_tag() failed on handle='%s' prefix='%s'\n", + tag->handle, tag->prefix); + goto err_out; + } + } + } + if (tag_count) + fyds_new->tags_explicit = true; + fye->document_start.document_state = fyds_new; + fyds_new = NULL; + break; + case FYET_DOCUMENT_END: + fye->document_end.document_end = NULL; + fye->document_end.implicit = va_arg(ap, int); + break; + case FYET_MAPPING_START: + case FYET_SEQUENCE_START: + style = va_arg(ap, enum fy_node_style); + ttype = FYTT_NONE; + + if (style != FYNS_ANY && style != FYNS_FLOW && style != FYNS_BLOCK) { + fy_error(diag, "illegal style for %s_START\n", + type == FYET_MAPPING_START ? "MAPPING" : "SEQUENCE"); + goto err_out; + } + + if (style != FYNS_ANY) { + if (style == FYNS_FLOW) + ttype = type == FYET_MAPPING_START ? + FYTT_FLOW_MAPPING_START : + FYTT_FLOW_SEQUENCE_START; + else + ttype = type == FYET_MAPPING_START ? + FYTT_BLOCK_MAPPING_START : + FYTT_BLOCK_SEQUENCE_START; + fyt = fy_token_create(ttype, NULL); + if (!fyt) { + fy_error(diag, "fy_token_create() failed for %s_START\n", + type == FYET_MAPPING_START ? "MAPPING" : "SEQUENCE"); + goto err_out; + } + } else + fyt = NULL; + + if (type == FYET_MAPPING_START) { + fye->mapping_start.mapping_start = fyt; + fye->mapping_start.anchor = NULL; + fye->mapping_start.tag = NULL; + fyt_anchorp = &fye->mapping_start.anchor; + fyt_tagp = &fye->mapping_start.tag; + } else { + fye->sequence_start.sequence_start = fyt; + fye->sequence_start.anchor = NULL; + fye->sequence_start.tag = NULL; + fyt_anchorp = &fye->sequence_start.anchor; + fyt_tagp = &fye->sequence_start.tag; + } + fyt = NULL; + break; + case FYET_MAPPING_END: + fye->mapping_end.mapping_end = NULL; + break; + case FYET_SEQUENCE_END: + fye->sequence_end.sequence_end = NULL; + break; + + case FYET_SCALAR: + case FYET_ALIAS: + + if (type == FYET_SCALAR) { + sstyle = va_arg(ap, enum fy_scalar_style); + value = va_arg(ap, const char *); + len = va_arg(ap, size_t); + if (!value && len) { + fy_error(diag, "NULL value with len > 0, illegal SCALAR\n"); + goto err_out; + } + if (len == FY_NT) + len = strlen(value); + } else { + sstyle = FYSS_PLAIN; + value = va_arg(ap, const char *); + if (!value) { + fy_error(diag, "NULL value, illegal ALIAS\n"); + goto err_out; + } + len = strlen(value); + } + + fyt = NULL; + fyi = NULL; + + data = malloc(len + 1); + if (!data) { + fy_error(diag, "malloc() failed\n"); + goto err_out; + } + memcpy(data, value, len); + /* always NULL terminate */ + data[len] = '\0'; + fyi = fy_input_from_malloc_data(data, len, &handle, sstyle == FYSS_PLAIN); + if (!fyi) { + fy_error(diag, "fy_input_from_malloc_data() failed\n"); + goto err_out; + } + data = NULL; + + if (type == FYET_SCALAR) { + fyt = fy_token_create(FYTT_SCALAR, &handle, sstyle); + if (!fyt) { + fy_error(diag, "fy_token_create() failed for %s\n", + "SCALAR"); + goto err_out; + } + + fye->scalar.value = fyt; + fyt = NULL; + + fye->scalar.anchor = NULL; + fye->scalar.tag = NULL; + fyt_anchorp = &fye->scalar.anchor; + fyt_tagp = &fye->scalar.tag; + } else { + fyt = fy_token_create(FYTT_ALIAS, &handle, NULL); + if (!fyt) { + fy_error(diag, "fy_token_create() failed for %s\n", + "ALIAS"); + goto err_out; + } + + fye->alias.anchor = fyt; + fyt = NULL; + } + fy_input_unref(fyi); + fyi = NULL; + break; + } + + if (fyt_anchorp && (value = va_arg(ap, const char *)) != NULL) { + + len = strlen(value); + data = malloc(len + 1); + if (!data) { + fy_error(diag, "malloc() failed\n"); + goto err_out; + } + memcpy(data, value, len); + /* always NULL terminate */ + data[len] = '\0'; + + fyi = fy_input_from_malloc_data(data, len, &handle, true); + if (!fyi) { + fy_error(diag, "fy_input_from_malloc_data() failed\n"); + goto err_out; + } + data = NULL; + + /* make sure the input as valid as an anchor */ + if (!handle.valid_anchor) { + fy_error(diag, "input was not valid as anchor\n"); + goto err_out; + } + + fyt = fy_token_create(FYTT_ANCHOR, &handle); + if (!fyt) { + fy_error(diag, "fy_token_create() failed\n"); + goto err_out; + } + *fyt_anchorp = fyt; + fyt = NULL; + fy_input_unref(fyi); + fyi = NULL; + } + + if (fyt_tagp && (value = va_arg(ap, const char *)) != NULL) { + + len = strlen(value); + data = malloc(len + 1); + if (!data) { + fy_error(diag, "malloc() failed\n"); + goto err_out; + } + memcpy(data, value, len); + /* always NULL terminate */ + data[len] = '\0'; + + rc = fy_tag_scan(data, len, &info); + if (rc) { + fy_error(diag, "invalid tag %s (tag_scan)\n", value); + goto err_out; + } + + fyt_td = fy_document_state_lookup_tag_directive(fyds, data + info.prefix_length, info.handle_length); + if (!fyt_td) { + fy_error(diag, "invalid tag %s (lookup tag directive)\n", value); + goto err_out; + } + + fyi = fy_input_from_data(data, len, &handle, true); + if (!fyi) + goto err_out; + data = NULL; + + handle.style = FYAS_URI; + handle.direct_output = false; + handle.storage_hint = 0; + handle.storage_hint_valid = false; + + fyt = fy_token_create(FYTT_TAG, &handle, info.prefix_length, + info.handle_length, info.uri_length, fyt_td); + if (!fyt) { + fy_error(diag, "fy_token_create() failed\n"); + goto err_out; + } + *fyt_tagp = fyt; + fyt = NULL; + fy_input_unref(fyi); + fyi = NULL; + } + + return fyep; + +err_out: + fy_input_unref(fyi); + if (data) + free(data); + fy_document_state_unref(fyds_new); + /* don't bother with recycling on error */ + fy_eventp_free(fyep); + return NULL; +} + +struct fy_eventp * +fy_eventp_create_internal(struct fy_eventp_list *recycled_list, struct fy_diag *diag, + struct fy_document_state *fyds, + enum fy_event_type type, ...) +{ + struct fy_eventp *fyep; + va_list ap; + + va_start(ap, type); + fyep = fy_eventp_vcreate_internal(recycled_list, diag, fyds, type, ap); + va_end(ap); + + return fyep; +} + +struct fy_event * +fy_emit_event_vcreate(struct fy_emitter *emit, enum fy_event_type type, va_list ap) +{ + struct fy_eventp *fyep; + + if (!emit) + return NULL; + + fyep = fy_eventp_vcreate_internal(emit->recycled_eventp_list, emit->diag, emit->fyds, type, ap); + if (!fyep) + return NULL; + + return &fyep->e; +} + +struct fy_event * +fy_emit_event_create(struct fy_emitter *emit, enum fy_event_type type, ...) +{ + struct fy_event *fye; + va_list ap; + + va_start(ap, type); + fye = fy_emit_event_vcreate(emit, type, ap); + va_end(ap); + + return fye; +} + +struct fy_event * +fy_parse_event_vcreate(struct fy_parser *fyp, enum fy_event_type type, va_list ap) +{ + struct fy_eventp *fyep; + + if (!fyp) + return NULL; + + fyep = fy_eventp_vcreate_internal(fyp->recycled_eventp_list, fyp->diag, fyp->current_document_state, type, ap); + if (!fyep) + return NULL; + + return &fyep->e; +} + +struct fy_event * +fy_parse_event_create(struct fy_parser *fyp, enum fy_event_type type, ...) +{ + struct fy_event *fye; + va_list ap; + + va_start(ap, type); + fye = fy_parse_event_vcreate(fyp, type, ap); + va_end(ap); + + return fye; +} + +bool fy_event_is_implicit(struct fy_event *fye) +{ + /* NULL event is implicit */ + if (!fye) + return true; + + switch (fye->type) { + + case FYET_DOCUMENT_START: + return fye->document_start.implicit; + + case FYET_DOCUMENT_END: + return fye->document_end.implicit; + + case FYET_MAPPING_START: + case FYET_MAPPING_END: + case FYET_SEQUENCE_START: + case FYET_SEQUENCE_END: + return fy_event_get_node_style(fye) == FYNS_BLOCK; + + default: + break; + } + + return false; +} + +bool fy_document_event_is_implicit(const struct fy_event *fye) +{ + if (fye->type == FYET_DOCUMENT_START) + return fye->document_start.implicit; + + if (fye->type == FYET_DOCUMENT_END) + return fye->document_end.implicit; + + return false; +} + +struct fy_token *fy_event_get_token(struct fy_event *fye) +{ + if (!fye) + return NULL; + + switch (fye->type) { + case FYET_NONE: + break; + + case FYET_STREAM_START: + return fye->stream_start.stream_start; + + case FYET_STREAM_END: + return fye->stream_end.stream_end; + + case FYET_DOCUMENT_START: + return fye->document_start.document_start; + + case FYET_DOCUMENT_END: + return fye->document_end.document_end; + + case FYET_MAPPING_START: + return fye->mapping_start.mapping_start; + + case FYET_MAPPING_END: + return fye->mapping_end.mapping_end; + + case FYET_SEQUENCE_START: + return fye->sequence_start.sequence_start; + + case FYET_SEQUENCE_END: + return fye->sequence_end.sequence_end; + + case FYET_SCALAR: + return fye->scalar.value; + + case FYET_ALIAS: + return fye->alias.anchor; + + } + + return NULL; +} + +struct fy_token *fy_event_get_anchor_token(struct fy_event *fye) +{ + if (!fye) + return NULL; + + switch (fye->type) { + case FYET_MAPPING_START: + return fye->mapping_start.anchor; + case FYET_SEQUENCE_START: + return fye->sequence_start.anchor; + case FYET_SCALAR: + return fye->scalar.anchor; + default: + break; + } + + return NULL; +} + +struct fy_token *fy_event_get_tag_token(struct fy_event *fye) +{ + if (!fye) + return NULL; + + switch (fye->type) { + case FYET_MAPPING_START: + return fye->mapping_start.tag; + case FYET_SEQUENCE_START: + return fye->sequence_start.tag; + case FYET_SCALAR: + return fye->scalar.tag; + default: + break; + } + + return NULL; +} + +const struct fy_mark *fy_event_start_mark(struct fy_event *fye) +{ + if (!fye) + return NULL; + + switch (fye->type) { + case FYET_NONE: + break; + + case FYET_STREAM_START: + return fy_token_start_mark(fye->stream_start.stream_start); + + case FYET_STREAM_END: + return fy_token_start_mark(fye->stream_end.stream_end); + + case FYET_DOCUMENT_START: + return fy_token_start_mark(fye->document_start.document_start); + + case FYET_DOCUMENT_END: + return fy_token_start_mark(fye->document_end.document_end); + + case FYET_MAPPING_START: + return fy_token_start_mark(fye->mapping_start.mapping_start); + + case FYET_MAPPING_END: + return fy_token_start_mark(fye->mapping_end.mapping_end); + + case FYET_SEQUENCE_START: + return fy_token_start_mark(fye->sequence_start.sequence_start); + + case FYET_SEQUENCE_END: + return fy_token_start_mark(fye->sequence_end.sequence_end); + + case FYET_SCALAR: + return fy_token_start_mark(fye->scalar.value); + + case FYET_ALIAS: + return fy_token_start_mark(fye->alias.anchor); + + } + + return NULL; +} + +const struct fy_mark *fy_event_end_mark(struct fy_event *fye) +{ + if (!fye) + return NULL; + + switch (fye->type) { + case FYET_NONE: + break; + + case FYET_STREAM_START: + return fy_token_end_mark(fye->stream_start.stream_start); + + case FYET_STREAM_END: + return fy_token_end_mark(fye->stream_end.stream_end); + + case FYET_DOCUMENT_START: + return fy_token_end_mark(fye->document_start.document_start); + + case FYET_DOCUMENT_END: + return fy_token_end_mark(fye->document_end.document_end); + + case FYET_MAPPING_START: + return fy_token_end_mark(fye->mapping_start.mapping_start); + + case FYET_MAPPING_END: + return fy_token_end_mark(fye->mapping_end.mapping_end); + + case FYET_SEQUENCE_START: + return fy_token_end_mark(fye->sequence_start.sequence_start); + + case FYET_SEQUENCE_END: + return fy_token_end_mark(fye->sequence_end.sequence_end); + + case FYET_SCALAR: + return fy_token_end_mark(fye->scalar.value); + + case FYET_ALIAS: + return fy_token_end_mark(fye->alias.anchor); + + } + + return NULL; +} + +enum fy_node_style +fy_event_get_node_style(struct fy_event *fye) +{ + struct fy_token *fyt; + + fyt = fy_event_get_token(fye); + if (!fyt) + return FYNS_ANY; + + switch (fye->type) { + /* unstyled events */ + case FYET_NONE: + case FYET_STREAM_START: + case FYET_STREAM_END: + case FYET_DOCUMENT_START: + case FYET_DOCUMENT_END: + return FYNS_ANY; + + case FYET_MAPPING_START: + return fyt && fyt->type == FYTT_FLOW_MAPPING_START ? FYNS_FLOW : FYNS_BLOCK; + + case FYET_MAPPING_END: + return fyt && fyt->type == FYTT_FLOW_MAPPING_END ? FYNS_FLOW : FYNS_BLOCK; + + case FYET_SEQUENCE_START: + return fyt && fyt->type == FYTT_FLOW_SEQUENCE_START ? FYNS_FLOW : FYNS_BLOCK; + + case FYET_SEQUENCE_END: + return fyt && fyt->type == FYTT_FLOW_SEQUENCE_END ? FYNS_FLOW : FYNS_BLOCK; + + case FYET_SCALAR: + return fyt ? fy_node_style_from_scalar_style(fyt->scalar.style) : FYNS_PLAIN; + + case FYET_ALIAS: + return FYNS_ALIAS; + + } + + return FYNS_ANY; +} + +const struct fy_version * +fy_document_start_event_version(struct fy_event *fye) +{ + /* return the default if not set */ + if (!fye || fye->type != FYET_DOCUMENT_START) + return &fy_default_version; + return fy_document_state_version(fye->document_start.document_state); +} + +struct fy_eventp * +fy_document_iterator_eventp_alloc(struct fy_document_iterator *fydi) +{ + struct fy_eventp *fyep = NULL; + + if (!fydi) + return NULL; + + if (fydi->recycled_eventp_list) + fyep = fy_eventp_list_pop(fydi->recycled_eventp_list); + if (!fyep) + fyep = fy_eventp_alloc(); + if (!fyep) + return NULL; + + fyep->e.type = FYET_NONE; + + return fyep; +} + +void fy_document_iterator_eventp_clean(struct fy_document_iterator *fydi, struct fy_eventp *fyep) +{ + if (!fydi || !fyep) + return; + + fy_eventp_clean_rl(fydi->recycled_token_list, fyep); +} + +void fy_document_iterator_eventp_recycle(struct fy_document_iterator *fydi, struct fy_eventp *fyep) +{ + if (!fydi || !fyep) + return; + + /* clean, safe to do */ + fy_document_iterator_eventp_clean(fydi, fyep); + + if (fydi->recycled_eventp_list) + fy_eventp_list_push(fydi->recycled_eventp_list, fyep); + else + fy_eventp_free(fyep); +} + +struct fy_event * +fy_document_iterator_event_vcreate(struct fy_document_iterator *fydi, enum fy_event_type type, va_list ap) +{ + struct fy_eventp *fyep; + + if (!fydi) + return NULL; + + fyep = fy_eventp_vcreate_internal(fydi->recycled_eventp_list, + fydi->fyd ? fydi->fyd->diag : NULL, + fydi->fyd ? fydi->fyd->fyds : NULL, + type, ap); + if (!fyep) + return NULL; + + return &fyep->e; +} + +struct fy_event * +fy_document_iterator_event_create(struct fy_document_iterator *fydi, enum fy_event_type type, ...) +{ + struct fy_event *fye; + va_list ap; + + va_start(ap, type); + fye = fy_document_iterator_event_vcreate(fydi, type, ap); + va_end(ap); + + return fye; +} + +void fy_document_iterator_event_free(struct fy_document_iterator *fydi, struct fy_event *fye) +{ + struct fy_eventp *fyep; + + if (!fydi || !fye) + return; + + fyep = fy_container_of(fye, struct fy_eventp, e); + + fy_document_iterator_eventp_recycle(fydi, fyep); +} diff --git a/contrib/libs/libfyaml/src/lib/fy-event.h b/contrib/libs/libfyaml/src/lib/fy-event.h new file mode 100644 index 0000000000..4da3c8f58b --- /dev/null +++ b/contrib/libs/libfyaml/src/lib/fy-event.h @@ -0,0 +1,51 @@ +/* + * fy-event.h - YAML parser private event definition + * + * Copyright (c) 2019 Pantelis Antoniou <pantelis.antoniou@konsulko.com> + * + * SPDX-License-Identifier: MIT + */ +#ifndef FY_EVENT_H +#define FY_EVENT_H + +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif + +#include <stdbool.h> + +#include <libfyaml.h> + +#include "fy-list.h" +#include "fy-typelist.h" + +/* private event type */ +FY_TYPE_FWD_DECL_LIST(eventp); +struct fy_eventp { + struct fy_list_head node; + struct fy_event e; +}; +FY_TYPE_DECL_LIST(eventp); +FY_PARSE_TYPE_DECL_ALLOC(eventp); + +struct fy_eventp *fy_eventp_alloc(void); +void fy_eventp_free(struct fy_eventp *fyep); + +/* called from internal emitter */ +void fy_eventp_release(struct fy_eventp *fyep); + +struct fy_eventp *fy_parse_eventp_alloc(struct fy_parser *fyp); +void fy_parse_eventp_recycle(struct fy_parser *fyp, struct fy_eventp *fyep); + +struct fy_eventp *fy_emit_eventp_alloc(struct fy_emitter *fye); +void fy_emit_eventp_recycle(struct fy_emitter *emit, struct fy_eventp *fyep); + +struct fy_document_iterator; + +struct fy_eventp *fy_document_iterator_eventp_alloc(struct fy_document_iterator *fydi); +void fy_document_iterator_eventp_recycle(struct fy_document_iterator *fydi, struct fy_eventp *fyep); +struct fy_event *fy_document_iterator_event_create(struct fy_document_iterator *document_iterator, enum fy_event_type type, ...); +struct fy_event *fy_document_iterator_event_vcreate(struct fy_document_iterator *document_iterator, enum fy_event_type type, va_list ap); +void fy_document_iterator_event_free(struct fy_document_iterator *document_iterator, struct fy_event *fye); + +#endif diff --git a/contrib/libs/libfyaml/src/lib/fy-input.c b/contrib/libs/libfyaml/src/lib/fy-input.c new file mode 100644 index 0000000000..ac021ad534 --- /dev/null +++ b/contrib/libs/libfyaml/src/lib/fy-input.c @@ -0,0 +1,1035 @@ +/* + * fy-input.c - YAML input methods + * + * Copyright (c) 2019 Pantelis Antoniou <pantelis.antoniou@konsulko.com> + * + * SPDX-License-Identifier: MIT + */ +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif + +#include <stdio.h> +#include <string.h> +#include <assert.h> +#include <stdlib.h> +#include <stdarg.h> +#include <fcntl.h> +#if defined (__unix__) || (defined (__APPLE__) && defined (__MACH__)) +#include <unistd.h> +#include <sys/mman.h> +#endif +#include <sys/types.h> +#include <sys/stat.h> +#if defined (__unix__) || (defined (__APPLE__) && defined (__MACH__)) +#include <sys/ioctl.h> +#endif +#include <errno.h> + +#include <libfyaml.h> + +#include "fy-parse.h" +#include "fy-ctype.h" + +#include "fy-input.h" + +/* amount of multiplication of page size for CHOP size + * for a 4K page this is 64K blocks + */ +#ifndef FYI_CHOP_MULT +#define FYI_CHOP_MULT 16 +#endif + +struct fy_input *fy_input_alloc(void) +{ + struct fy_input *fyi; + + fyi = malloc(sizeof(*fyi)); + if (!fyi) + return NULL; + memset(fyi, 0, sizeof(*fyi)); + + fyi->state = FYIS_NONE; + fyi->refs = 1; + + return fyi; +} + +void fy_input_free(struct fy_input *fyi) +{ + if (!fyi) + return; + + assert(fyi->refs == 1); + + switch (fyi->state) { + case FYIS_NONE: + case FYIS_QUEUED: + /* nothing to do */ + break; + case FYIS_PARSE_IN_PROGRESS: + case FYIS_PARSED: + fy_input_close(fyi); + break; + } + + /* always release the memory of the alloc memory */ + switch (fyi->cfg.type) { + case fyit_alloc: + free(fyi->cfg.alloc.data); + break; + + default: + break; + } + if (fyi->name) + free(fyi->name); + + free(fyi); +} + +const char *fy_input_get_filename(struct fy_input *fyi) +{ + if (!fyi) + return NULL; + + return fyi->name; +} + +static void fy_input_from_data_setup(struct fy_input *fyi, + struct fy_atom *handle, bool simple) +{ + const char *data; + size_t size; + unsigned int aflags; + + /* this is an internal method, you'd better to pass garbage */ + data = fy_input_start(fyi); + size = fy_input_size(fyi); + + fyi->buffer = NULL; + fyi->allocated = 0; + fyi->read = 0; + fyi->chunk = 0; + fyi->chop = 0; + fyi->fp = NULL; + + if (!handle) + goto out; + + memset(handle, 0, sizeof(*handle)); + + if (size > 0) + aflags = fy_analyze_scalar_content(data, size, + false, fylb_cr_nl, fyfws_space_tab); /* hardcoded yaml mode */ + else + aflags = FYACF_EMPTY | FYACF_FLOW_PLAIN | FYACF_BLOCK_PLAIN | FYACF_SIZE0; + + handle->start_mark.input_pos = 0; + handle->start_mark.line = 0; + handle->start_mark.column = 0; + handle->end_mark.input_pos = size; + handle->end_mark.line = 0; + handle->end_mark.column = fy_utf8_count(data, size); + /* if it's plain, all is good */ + if (simple || (aflags & FYACF_FLOW_PLAIN)) { + handle->storage_hint = size; /* maximum */ + handle->storage_hint_valid = false; + handle->direct_output = !!(aflags & FYACF_JSON_ESCAPE); + handle->style = FYAS_PLAIN; + } else { + handle->storage_hint = 0; /* just calculate */ + handle->storage_hint_valid = false; + handle->direct_output = false; + handle->style = FYAS_DOUBLE_QUOTED_MANUAL; + } + handle->empty = !!(aflags & FYACF_EMPTY); + handle->has_lb = !!(aflags & FYACF_LB); + handle->has_ws = !!(aflags & FYACF_WS); + handle->starts_with_ws = !!(aflags & FYACF_STARTS_WITH_WS); + handle->starts_with_lb = !!(aflags & FYACF_STARTS_WITH_LB); + handle->ends_with_ws = !!(aflags & FYACF_ENDS_WITH_WS); + handle->ends_with_lb = !!(aflags & FYACF_ENDS_WITH_LB); + handle->trailing_lb = !!(aflags & FYACF_TRAILING_LB); + handle->size0 = !!(aflags & FYACF_SIZE0); + handle->valid_anchor = !!(aflags & FYACF_VALID_ANCHOR); + + handle->chomp = FYAC_STRIP; + handle->increment = 0; + handle->fyi = fyi; + handle->fyi_generation = fyi->generation; + handle->tabsize = 0; + handle->json_mode = false; /* XXX hardcoded */ + handle->lb_mode = fylb_cr_nl; + handle->fws_mode = fyfws_space_tab; +out: + fyi->state = FYIS_PARSED; +} + +struct fy_input *fy_input_from_data(const char *data, size_t size, + struct fy_atom *handle, bool simple) +{ + struct fy_input *fyi; + + if (data && size == (size_t)-1) + size = strlen(data); + + fyi = fy_input_alloc(); + if (!fyi) + return NULL; + + fyi->cfg.type = fyit_memory; + fyi->cfg.userdata = NULL; + fyi->cfg.memory.data = data; + fyi->cfg.memory.size = size; + + fy_input_from_data_setup(fyi, handle, simple); + + return fyi; +} + +struct fy_input *fy_input_from_malloc_data(char *data, size_t size, + struct fy_atom *handle, bool simple) +{ + struct fy_input *fyi; + + if (data && size == (size_t)-1) + size = strlen(data); + + fyi = fy_input_alloc(); + if (!fyi) + return NULL; + + fyi->cfg.type = fyit_alloc; + fyi->cfg.userdata = NULL; + fyi->cfg.alloc.data = data; + fyi->cfg.alloc.size = size; + + fy_input_from_data_setup(fyi, handle, simple); + + return fyi; +} + +void fy_input_close(struct fy_input *fyi) +{ + if (!fyi) + return; + + switch (fyi->cfg.type) { + + case fyit_file: + case fyit_fd: + +#if !defined(_MSC_VER) + if (fyi->addr) { + munmap(fyi->addr, fyi->length); + fyi->addr = NULL; + } +#endif + + if (fyi->fd != -1) { + if (!fyi->cfg.no_close_fd) + close(fyi->fd); + fyi->fd = -1; + } + + if (fyi->buffer) { + free(fyi->buffer); + fyi->buffer = NULL; + } + if (fyi->fp) { + if (!fyi->cfg.no_fclose_fp) + fclose(fyi->fp); + fyi->fp = NULL; + } + break; + + case fyit_stream: + case fyit_callback: + if (fyi->buffer) { + free(fyi->buffer); + fyi->buffer = NULL; + } + break; + + case fyit_memory: + /* nothing */ + break; + + case fyit_alloc: + /* nothing */ + break; + + default: + break; + } +} + +struct fy_diag *fy_reader_get_diag(struct fy_reader *fyr) +{ + if (fyr && fyr->ops && fyr->ops->get_diag) + return fyr->ops->get_diag(fyr); + + return NULL; +} + +int fy_reader_file_open(struct fy_reader *fyr, const char *filename) +{ + if (!fyr || !filename) + return -1; + + if (fyr->ops && fyr->ops->file_open) + return fyr->ops->file_open(fyr, filename); + + return open(filename, O_RDONLY); +} + +void fy_reader_reset(struct fy_reader *fyr) +{ + const struct fy_reader_ops *ops; + struct fy_diag *diag; + + if (!fyr) + return; + + ops = fyr->ops; + diag = fyr->diag; + + fy_input_unref(fyr->current_input); + + memset(fyr, 0, sizeof(*fyr)); + + /* by default we're always in yaml mode */ + fyr->mode = fyrm_yaml; + fyr->ops = ops; + fyr->diag = diag; + fyr->current_c = -1; +} + +void fy_reader_setup(struct fy_reader *fyr, const struct fy_reader_ops *ops) +{ + if (!fyr) + return; + + fyr->ops = ops; + fyr->diag = fy_reader_get_diag(fyr); + fyr->current_input = NULL; + fy_reader_reset(fyr); +} + +void fy_reader_cleanup(struct fy_reader *fyr) +{ + if (!fyr) + return; + + fy_input_unref(fyr->current_input); + fyr->current_input = NULL; + fy_reader_reset(fyr); +} + +void fy_reader_apply_mode(struct fy_reader *fyr) +{ + struct fy_input *fyi; + + assert(fyr); + + /* set input mode from the current reader settings */ + switch (fyr->mode) { + case fyrm_yaml: + fyr->json_mode = false; + fyr->lb_mode = fylb_cr_nl; + fyr->fws_mode = fyfws_space_tab; + break; + case fyrm_json: + fyr->json_mode = true; + fyr->lb_mode = fylb_cr_nl; + fyr->fws_mode = fyfws_space; + break; + case fyrm_yaml_1_1: + fyr->json_mode = false; + fyr->lb_mode = fylb_cr_nl_N_L_P; + fyr->fws_mode = fyfws_space_tab; + break; + } + fyi = fyr->current_input; + if (fyi) { + fyi->json_mode = fyr->json_mode; + fyi->lb_mode = fyr->lb_mode; + fyi->fws_mode = fyr->fws_mode; + } +} + +int fy_reader_input_open(struct fy_reader *fyr, struct fy_input *fyi, const struct fy_reader_input_cfg *icfg) +{ + struct stat sb; + int rc; + + if (!fyi) + return -1; + + /* unref any previous input */ + fy_input_unref(fyr->current_input); + fyr->current_input = fy_input_ref(fyi); + + fy_reader_apply_mode(fyr); + + if (!icfg) + memset(&fyr->current_input_cfg, 0, sizeof(fyr->current_input_cfg)); + else + fyr->current_input_cfg = *icfg; + + /* reset common data */ + fyi->buffer = NULL; + fyi->allocated = 0; + fyi->read = 0; + fyi->chunk = 0; + fyi->chop = 0; + fyi->fp = NULL; + + switch (fyi->cfg.type) { + + case fyit_file: + case fyit_fd: + + switch (fyi->cfg.type) { + case fyit_file: + fyi->fd = fy_reader_file_open(fyr, fyi->cfg.file.filename); + fyr_error_check(fyr, fyi->fd != -1, err_out, + "failed to open %s", fyi->cfg.file.filename); + break; + + case fyit_fd: + fyi->fd = fyi->cfg.fd.fd; + fyr_error_check(fyr, fyi->fd >= 0, err_out, + "bad file.fd %d", fyi->cfg.fd.fd); + break; + default: + assert(0); // will never happen + } + + rc = fstat(fyi->fd, &sb); + fyr_error_check(fyr, rc != -1, err_out, + "failed to fstat %s", fyi->cfg.file.filename); + + fyi->length = sb.st_size; + + /* only map if not zero (and is not disabled) */ +#if !defined(_MSC_VER) + if (sb.st_size > 0 && !fyr->current_input_cfg.disable_mmap_opt) { + fyi->addr = mmap(NULL, sb.st_size, PROT_READ, MAP_PRIVATE, fyi->fd, 0); + + /* convert from MAP_FAILED to NULL */ + if (fyi->addr == MAP_FAILED) { + fyr_debug(fyr, "mmap failed for file %s", + fyi->cfg.file.filename); + fyi->addr = NULL; + } + } +#endif + /* if we've managed to mmap, we' good */ + if (fyi->addr) + break; + + /* if we're not ignoring stdio, open a FILE* using the fd */ + if (!fyi->cfg.ignore_stdio) { + fyi->fp = fdopen(fyi->fd, "r"); + fyr_error_check(fyr, rc != -1, err_out, "failed to fdopen %s", fyi->name); + } else + fyi->fp = NULL; + + break; + + case fyit_stream: + if (!fyi->cfg.ignore_stdio) + fyi->fp = fyi->cfg.stream.fp; + else + fyi->fd = fileno(fyi->cfg.stream.fp); + break; + + case fyit_memory: + /* nothing to do for memory */ + break; + + case fyit_alloc: + /* nothing to do for memory */ + break; + + case fyit_callback: + break; + + default: + assert(0); + break; + } + + switch (fyi->cfg.type) { + + /* those two need no memory */ + case fyit_memory: + case fyit_alloc: + break; + + /* all the rest need it */ + default: + /* if we're not in mmap mode */ +#if !defined(_MSC_VER) + if (fyi->addr && !fyr->current_input_cfg.disable_mmap_opt) + break; +#endif + + fyi->chunk = fyi->cfg.chunk; + if (!fyi->chunk) + fyi->chunk = fy_get_pagesize(); + fyi->chop = fyi->chunk * FYI_CHOP_MULT; + fyi->buffer = malloc(fyi->chunk); + fyr_error_check(fyr, fyi->buffer, err_out, + "fy_alloc() failed"); + fyi->allocated = fyi->chunk; + break; + } + + fyr->this_input_start = 0; + fyr->current_input_pos = 0; + fyr->line = 0; + fyr->column = 0; + fyr->current_c = -1; + fyr->current_ptr = NULL; + fyr->current_w = 0; + fyr->current_left = 0; + + fyi->state = FYIS_PARSE_IN_PROGRESS; + + return 0; + +err_out: + fy_input_close(fyi); + return -1; +} + +int fy_reader_input_done(struct fy_reader *fyr) +{ + struct fy_input *fyi; + void *buf; + + if (!fyr) + return -1; + + fyi = fyr->current_input; + if (!fyi) + return 0; + + switch (fyi->cfg.type) { + case fyit_file: + case fyit_fd: + if (fyi->addr) + break; + + /* fall-through */ + + case fyit_stream: + case fyit_callback: + /* chop extra buffer */ + buf = realloc(fyi->buffer, fyr->current_input_pos); + fyr_error_check(fyr, buf || !fyr->current_input_pos, err_out, + "realloc() failed"); + + fyi->buffer = buf; + fyi->allocated = fyr->current_input_pos; + /* increate input generation; required for direct input to work */ + fyi->generation++; + break; + + default: + break; + + } + + fyi->state = FYIS_PARSED; + fy_input_unref(fyi); + fyr->current_input = NULL; + + return 0; + +err_out: + return -1; +} + +int fy_reader_input_scan_token_mark_slow_path(struct fy_reader *fyr) +{ + struct fy_input *fyi, *fyi_new = NULL; + + assert(fyr); + + if (!fy_reader_input_chop_active(fyr)) + return 0; + + fyi = fyr->current_input; + assert(fyi); + + fyi_new = fy_input_alloc(); + fyr_error_check(fyr, fyi_new, err_out, + "fy_input_alloc() failed\n"); + + /* copy the config over */ + fyi_new->cfg = fyi->cfg; + fyi_new->name = strdup(fyi->name); + fyr_error_check(fyr, fyi_new->name, err_out, + "strdup() failed\n"); + + fyi_new->chunk = fyi->chunk; + fyi_new->chop = fyi->chop; + fyi_new->buffer = malloc(fyi->chunk); + fyr_error_check(fyr, fyi_new->buffer, err_out, + "fy_alloc() failed"); + fyi_new->allocated = fyi->chunk; + fyi_new->fp = fyi->fp; + + fyi->fp = NULL; /* the file pointer now assigned to the new */ + + fyi_new->lb_mode = fyi->lb_mode; + fyi_new->fws_mode = fyi->fws_mode; + + fyi_new->state = FYIS_PARSE_IN_PROGRESS; + + /* adjust and copy the left over reads */ + assert(fyi->read >= fyr->current_input_pos); + fyi_new->read = fyi->read - fyr->current_input_pos; + if (fyi_new->read > 0) + memcpy(fyi_new->buffer, (char *)fyi->buffer + fyr->current_input_pos, fyi_new->read); + + fyr->this_input_start += fyr->current_input_pos; + + /* update the reader to point to the new input */ + fyr->current_input = fyi_new; + fyr->current_input_pos = 0; + fyr->current_ptr = fyi_new->buffer; + + fyr_debug(fyr, "chop at this_input_start=%zu chop=%zu\n", fyr->this_input_start, fyi->chop); + + /* free the old input - while references to it exist it will hang around */ + fyi->state = FYIS_PARSED; + fy_input_unref(fyi); + fyi = NULL; + + return 0; +err_out: + fy_input_unref(fyi_new); + return -1; +} + +const void *fy_reader_ptr_slow_path(struct fy_reader *fyr, size_t *leftp) +{ + struct fy_input *fyi; + const void *p; + int left; + + if (fyr->current_ptr) { + if (leftp) + *leftp = fyr->current_left; + return fyr->current_ptr; + } + + fyi = fyr->current_input; + if (!fyi) + return NULL; + + /* tokens cannot cross boundaries */ + switch (fyi->cfg.type) { + case fyit_file: + case fyit_fd: + if (fyi->addr) { + left = fyi->length - (fyr->this_input_start + fyr->current_input_pos); + p = (char *)fyi->addr + fyr->current_input_pos; + break; + } + + /* fall-through */ + + case fyit_stream: + case fyit_callback: + left = fyi->read - (fyr->this_input_start + fyr->current_input_pos); + p = (char *)fyi->buffer + fyr->current_input_pos; + break; + + case fyit_memory: + left = fyi->cfg.memory.size - fyr->current_input_pos; + p = (char *)fyi->cfg.memory.data + fyr->current_input_pos; + break; + + case fyit_alloc: + left = fyi->cfg.alloc.size - fyr->current_input_pos; + p = (char *)fyi->cfg.alloc.data + fyr->current_input_pos; + break; + + + default: + assert(0); /* no streams */ + p = NULL; + left = 0; + break; + } + + if (leftp) + *leftp = left; + + fyr->current_ptr = p; + fyr->current_left = left; + fyr->current_c = fy_utf8_get(p, left, &fyr->current_w); + + return p; +} + +const void *fy_reader_input_try_pull(struct fy_reader *fyr, struct fy_input *fyi, + size_t pull, size_t *leftp) +{ + const void *p; + size_t left, pos, size, nread, nreadreq, missing; + ssize_t snread; + size_t space __FY_DEBUG_UNUSED__; + void *buf; + + if (!fyr || !fyi) { + if (leftp) + *leftp = 0; + return NULL; + } + + p = NULL; + left = 0; + pos = fyr->current_input_pos; + + switch (fyi->cfg.type) { + case fyit_file: + case fyit_fd: + + if (fyi->addr) { + assert(fyi->length >= (fyr->this_input_start + pos)); + + left = fyi->length - (fyr->this_input_start + pos); + if (!left) { + fyr_debug(fyr, "file input exhausted"); + break; + } + p = (char *)fyi->addr + pos; + break; + } + + /* fall-through */ + + case fyit_stream: + case fyit_callback: + + assert(fyi->read >= pos); + + left = fyi->read - pos; + p = (char *)fyi->buffer + pos; + + /* enough to satisfy directly */ + if (left >= pull) + break; + + /* no more */ + if (fyi->eof) { + if (!left) { + fyr_debug(fyr, "input exhausted (EOF)"); + p = NULL; + } + break; + } + + space = fyi->allocated - pos; + + /* if we're missing more than the buffer space */ + missing = pull - left; + + fyr_debug(fyr, "input: allocated=%zu read=%zu pos=%zu pull=%zu left=%zu space=%zu missing=%zu", + fyi->allocated, fyi->read, pos, pull, left, space, missing); + + if (pos + pull > fyi->allocated) { + + /* align size to chunk */ + size = fyi->allocated + missing + fyi->chunk - 1; + size = size - size % fyi->chunk; + + fyr_debug(fyr, "input buffer missing %zu bytes (pull=%zu)", missing, pull); + + buf = realloc(fyi->buffer, size); + if (!buf) { + fyr_error(fyr, "realloc() failed"); + goto err_out; + } + + fyr_debug(fyr, "input read allocated=%zu new-size=%zu", fyi->allocated, size); + + fyi->buffer = buf; + fyi->allocated = size; + fyi->generation++; + + space = fyi->allocated - pos; + p = (char *)fyi->buffer + pos; + } + + /* always try to read up to the allocated space */ + do { + nreadreq = fyi->allocated - fyi->read; + assert(nreadreq > 0); + + if (fyi->cfg.type == fyit_callback) { + + fyr_debug(fyr, "performing callback request of %zu", nreadreq); + + nread = fyi->cfg.callback.input(fyi->cfg.userdata, (char *)fyi->buffer + fyi->read, nreadreq); + + fyr_debug(fyr, "callback returned %zu", nread); + + if (nread <= 0) { + if (!nread) { + fyi->eof = true; + fyr_debug(fyr, "callback got EOF"); + } else { + fyi->err = true; + fyr_debug(fyr, "callback got error"); + } + break; + } + + } else if (fyi->fp) { + + fyr_debug(fyr, "performing fread request of %zu", nreadreq); + + nread = fread((char *)fyi->buffer + fyi->read, 1, nreadreq, fyi->fp); + + fyr_debug(fyr, "fread returned %zu", nread); + + if (nread <= 0) { + fyi->err = ferror(fyi->fp); + if (fyi->err) { + fyi->eof = true; + fyr_debug(fyr, "fread got ERROR"); + goto err_out; + } + + fyi->eof = feof(fyi->fp); + if (fyi->eof) + fyr_debug(fyr, "fread got EOF"); + nread = 0; + break; + } + + } else if (fyi->fd >= 0) { + + fyr_debug(fyr, "performing read request of %zu", nreadreq); + + do { + snread = read(fyi->fd, (char *)fyi->buffer + fyi->read, nreadreq); + } while (snread == -1 && errno == EAGAIN); + + fyr_debug(fyr, "read returned %zd", snread); + + if (snread == -1) { + fyi->err = true; + fyi->eof = true; + fyr_error(fyr, "read() failed: %s", strerror(errno)); + goto err_out; + } + + if (!snread) { + fyi->eof = true; + nread = 0; + break; + } + + nread = snread; + } else { + fyr_error(fyr, "No FILE* nor fd available?"); + fyi->eof = true; + nread = 0; + goto err_out; + } + + assert(nread > 0); + + fyi->read += nread; + left = fyi->read - pos; + + } while (left < pull); + + /* no more, move it to parsed input chunk list */ + if (!left) { + fyr_debug(fyr, "input exhausted"); + p = NULL; + } + break; + + case fyit_memory: + assert(fyi->cfg.memory.size >= pos); + + left = fyi->cfg.memory.size - pos; + if (!left) { + fyr_debug(fyr, "memory input exhausted"); + break; + } + p = (char *)fyi->cfg.memory.data + pos; + break; + + case fyit_alloc: + assert(fyi->cfg.alloc.size >= pos); + + left = fyi->cfg.alloc.size - pos; + if (!left) { + fyr_debug(fyr, "alloc input exhausted"); + break; + } + p = (char *)fyi->cfg.alloc.data + pos; + break; + + + default: + assert(0); + break; + } + + if (leftp) + *leftp = left; + return p; + +err_out: + if (leftp) + *leftp = 0; + return NULL; +} + +void +fy_reader_advance_slow_path(struct fy_reader *fyr, int c) +{ + bool is_line_break = false; + size_t w; + + /* skip this character (optimize case of being the current) */ + w = c == fyr->current_c ? (size_t)fyr->current_w : fy_utf8_width(c); + fy_reader_advance_octets(fyr, w); + + /* first check for CR/LF */ + if (c == '\r' && fy_reader_peek(fyr) == '\n') { + fy_reader_advance_octets(fyr, 1); + is_line_break = true; + } else if (fy_reader_is_lb(fyr, c)) + is_line_break = true; + + if (is_line_break) { + fyr->column = 0; + fyr->line++; + } else if (fyr->tabsize && fy_is_tab(c)) + fyr->column += (fyr->tabsize - (fyr->column % fyr->tabsize)); + else + fyr->column++; +} + +struct fy_input *fy_input_create(const struct fy_input_cfg *fyic) +{ + struct fy_input *fyi = NULL; + int ret; + + fyi = fy_input_alloc(); + if (!fyi) + return NULL; + fyi->cfg = *fyic; + + /* copy filename pointers and switch */ + switch (fyic->type) { + + case fyit_file: + fyi->name = strdup(fyic->file.filename); + break; + + case fyit_fd: + ret = asprintf(&fyi->name, "<fd-%d>", fyic->fd.fd); + if (ret == -1) + fyi->name = NULL; + break; + + case fyit_stream: + if (fyic->stream.name) + fyi->name = strdup(fyic->stream.name); + else if (fyic->stream.fp == stdin) + fyi->name = strdup("<stdin>"); + else { + ret = asprintf(&fyi->name, "<stream-%d>", + fileno(fyic->stream.fp)); + if (ret == -1) + fyi->name = NULL; + } + break; + case fyit_memory: + ret = asprintf(&fyi->name, "<memory-@%p-%p>", + fyic->memory.data, (char *)fyic->memory.data + fyic->memory.size - 1); + if (ret == -1) + fyi->name = NULL; + break; + case fyit_alloc: + ret = asprintf(&fyi->name, "<alloc-@%p-%p>", + fyic->memory.data, (char *)fyic->memory.data + fyic->memory.size - 1); + if (ret == -1) + fyi->name = NULL; + break; + case fyit_callback: + ret = asprintf(&fyi->name, "<callback>"); + if (ret == -1) + fyi->name = NULL; + break; + default: + assert(0); + break; + } + if (!fyi->name) + goto err_out; + + fyi->buffer = NULL; + fyi->allocated = 0; + fyi->read = 0; + fyi->chunk = 0; + fyi->chop = 0; + fyi->fp = NULL; + fyi->fd = -1; + fyi->addr = NULL; + fyi->length = -1; + + /* default modes */ + fyi->lb_mode = fylb_cr_nl; + fyi->fws_mode = fyfws_space_tab; + + return fyi; + +err_out: + fy_input_unref(fyi); + return NULL; +} + +/* ensure that there are at least size octets available */ +const void *fy_reader_ensure_lookahead_slow_path(struct fy_reader *fyr, size_t size, size_t *leftp) +{ + const void *p; + size_t left; + + if (!leftp) + leftp = &left; + + p = fy_reader_ptr(fyr, leftp); + if (!p || *leftp < size) { + + fyr_debug(fyr, "ensure lookahead size=%zd left=%zd (%s - %zu)", + size, *leftp, + fy_input_get_filename(fyr->current_input), + fyr->current_input_pos); + + p = fy_reader_input_try_pull(fyr, fyr->current_input, size, leftp); + if (!p || *leftp < size) + return NULL; + + fyr->current_ptr = p; + fyr->current_left = *leftp; + fyr->current_c = fy_utf8_get(fyr->current_ptr, fyr->current_left, &fyr->current_w); + } + return p; +} + diff --git a/contrib/libs/libfyaml/src/lib/fy-input.h b/contrib/libs/libfyaml/src/lib/fy-input.h new file mode 100644 index 0000000000..f8032ec64a --- /dev/null +++ b/contrib/libs/libfyaml/src/lib/fy-input.h @@ -0,0 +1,683 @@ +/* + * fy-input.h - YAML input methods + * + * Copyright (c) 2019 Pantelis Antoniou <pantelis.antoniou@konsulko.com> + * + * SPDX-License-Identifier: MIT + */ +#ifndef FY_INPUT_H +#define FY_INPUT_H + +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif + +#include <stdint.h> +#include <stdbool.h> +#include <stdio.h> +#include <stdarg.h> +#include <stdlib.h> + +#include <libfyaml.h> + +#include "fy-utils.h" +#include "fy-typelist.h" +#include "fy-ctype.h" + +struct fy_atom; +struct fy_parser; + +enum fy_input_type { + fyit_file, + fyit_stream, + fyit_memory, + fyit_alloc, + fyit_callback, + fyit_fd, +}; + +struct fy_input_cfg { + enum fy_input_type type; + void *userdata; + size_t chunk; + bool ignore_stdio : 1; + bool no_fclose_fp : 1; + bool no_close_fd : 1; + union { + struct { + const char *filename; + } file; + struct { + const char *name; + FILE *fp; + } stream; + struct { + const void *data; + size_t size; + } memory; + struct { + void *data; + size_t size; + } alloc; + struct { + /* negative return is error, 0 is EOF */ + ssize_t (*input)(void *user, void *buf, size_t count); + } callback; + struct { + int fd; + } fd; + }; +}; + +enum fy_input_state { + FYIS_NONE, + FYIS_QUEUED, + FYIS_PARSE_IN_PROGRESS, + FYIS_PARSED, +}; + +FY_TYPE_FWD_DECL_LIST(input); +struct fy_input { + struct fy_list_head node; + enum fy_input_state state; + struct fy_input_cfg cfg; + int refs; /* number of referers */ + char *name; + void *buffer; /* when the file can't be mmaped */ + uint64_t generation; + size_t allocated; + size_t read; + size_t chunk; + size_t chop; + FILE *fp; /* FILE* for the input if it exists */ + int fd; /* fd for file and stream */ + size_t length; /* length of file */ + void *addr; /* mmaped for files, allocated for streams */ + bool eof : 1; /* got EOF */ + bool err : 1; /* got an error */ + + /* propagated */ + bool json_mode; + enum fy_lb_mode lb_mode; + enum fy_flow_ws_mode fws_mode; +}; +FY_TYPE_DECL_LIST(input); + +static inline const void *fy_input_start(const struct fy_input *fyi) +{ + const void *ptr = NULL; + + switch (fyi->cfg.type) { + case fyit_file: + if (fyi->addr) { + ptr = fyi->addr; + break; + } + /* fall-through */ + + case fyit_stream: + case fyit_callback: + ptr = fyi->buffer; + break; + + case fyit_memory: + ptr = fyi->cfg.memory.data; + break; + + case fyit_alloc: + ptr = fyi->cfg.alloc.data; + break; + + default: + break; + } + assert(ptr); + return ptr; +} + +static inline size_t fy_input_size(const struct fy_input *fyi) +{ + size_t size; + + switch (fyi->cfg.type) { + case fyit_file: + if (fyi->addr) { + size = fyi->length; + break; + } + /* fall-through */ + + case fyit_stream: + case fyit_callback: + size = fyi->read; + break; + + case fyit_memory: + size = fyi->cfg.memory.size; + break; + + case fyit_alloc: + size = fyi->cfg.alloc.size; + break; + + default: + size = 0; + break; + } + return size; +} + +struct fy_input *fy_input_alloc(void); +void fy_input_free(struct fy_input *fyi); + +static inline enum fy_input_state fy_input_get_state(struct fy_input *fyi) +{ + return fyi->state; +} + +struct fy_input *fy_input_create(const struct fy_input_cfg *fyic); + +const char *fy_input_get_filename(struct fy_input *fyi); + +struct fy_input *fy_input_from_data(const char *data, size_t size, + struct fy_atom *handle, bool simple); +struct fy_input *fy_input_from_malloc_data(char *data, size_t size, + struct fy_atom *handle, bool simple); + +void fy_input_close(struct fy_input *fyi); + +static inline struct fy_input * +fy_input_ref(struct fy_input *fyi) +{ + if (!fyi) + return NULL; + + + assert(fyi->refs + 1 > 0); + + fyi->refs++; + + return fyi; +} + +static inline void +fy_input_unref(struct fy_input *fyi) +{ + if (!fyi) + return; + + assert(fyi->refs > 0); + + if (fyi->refs == 1) + fy_input_free(fyi); + else + fyi->refs--; +} + +struct fy_reader; + +enum fy_reader_mode { + fyrm_yaml, + fyrm_json, + fyrm_yaml_1_1, /* yaml 1.1 mode */ +}; + +struct fy_reader_ops { + struct fy_diag *(*get_diag)(struct fy_reader *fyr); + int (*file_open)(struct fy_reader *fyr, const char *filename); +}; + +struct fy_reader_input_cfg { + bool disable_mmap_opt; +}; + +struct fy_reader { + const struct fy_reader_ops *ops; + enum fy_reader_mode mode; + + struct fy_reader_input_cfg current_input_cfg; + struct fy_input *current_input; + + size_t this_input_start; /* this input start */ + size_t current_input_pos; /* from start of input */ + const void *current_ptr; /* current pointer into the buffer */ + int current_c; /* current utf8 character at current_ptr (-1 if not cached) */ + int current_w; /* current utf8 character width */ + size_t current_left; /* currently left characters into the buffer */ + + int line; /* always on input */ + int column; + + int tabsize; /* very experimental tab size for indent purposes */ + + struct fy_diag *diag; + + /* decoded mode variables; update when changing modes */ + bool json_mode; + enum fy_lb_mode lb_mode; + enum fy_flow_ws_mode fws_mode; +}; + +void fy_reader_reset(struct fy_reader *fyr); +void fy_reader_setup(struct fy_reader *fyr, const struct fy_reader_ops *ops); +void fy_reader_cleanup(struct fy_reader *fyr); + +int fy_reader_input_open(struct fy_reader *fyr, struct fy_input *fyi, const struct fy_reader_input_cfg *icfg); +int fy_reader_input_done(struct fy_reader *fyr); +int fy_reader_input_scan_token_mark_slow_path(struct fy_reader *fyr); + +static inline bool +fy_reader_input_chop_active(struct fy_reader *fyr) +{ + struct fy_input *fyi; + + assert(fyr); + + fyi = fyr->current_input; + assert(fyi); + + if (!fyi->chop) + return false; + + switch (fyi->cfg.type) { + case fyit_file: + return !fyi->addr && fyi->fp; /* non-mmap mode */ + + case fyit_stream: + case fyit_callback: + return true; + + default: + /* all the others do not support chop */ + break; + } + + return false; +} + +static inline int +fy_reader_input_scan_token_mark(struct fy_reader *fyr) +{ + /* don't chop until ready */ + if (!fy_reader_input_chop_active(fyr) || + fyr->current_input->chop > fyr->current_input_pos) + return 0; + + return fy_reader_input_scan_token_mark_slow_path(fyr); +} + +const void *fy_reader_ptr_slow_path(struct fy_reader *fyr, size_t *leftp); +const void *fy_reader_ensure_lookahead_slow_path(struct fy_reader *fyr, size_t size, size_t *leftp); + +void fy_reader_apply_mode(struct fy_reader *fyr); + +static FY_ALWAYS_INLINE inline enum fy_reader_mode +fy_reader_get_mode(const struct fy_reader *fyr) +{ + assert(fyr); + return fyr->mode; +} + +static FY_ALWAYS_INLINE inline void +fy_reader_set_mode(struct fy_reader *fyr, enum fy_reader_mode mode) +{ + assert(fyr); + fyr->mode = mode; + fy_reader_apply_mode(fyr); +} + +static FY_ALWAYS_INLINE inline struct fy_input * +fy_reader_current_input(const struct fy_reader *fyr) +{ + assert(fyr); + return fyr->current_input; +} + +static FY_ALWAYS_INLINE inline uint64_t +fy_reader_current_input_generation(const struct fy_reader *fyr) +{ + assert(fyr); + assert(fyr->current_input); + return fyr->current_input->generation; +} + +static FY_ALWAYS_INLINE inline int +fy_reader_column(const struct fy_reader *fyr) +{ + assert(fyr); + return fyr->column; +} + +static FY_ALWAYS_INLINE inline int +fy_reader_tabsize(const struct fy_reader *fyr) +{ + assert(fyr); + return fyr->tabsize; +} + +static FY_ALWAYS_INLINE inline int +fy_reader_line(const struct fy_reader *fyr) +{ + assert(fyr); + return fyr->line; +} + +/* force new line at the end of stream */ +static inline void fy_reader_stream_end(struct fy_reader *fyr) +{ + assert(fyr); + + /* force new line */ + if (fyr->column) { + fyr->column = 0; + fyr->line++; + } +} + +static FY_ALWAYS_INLINE inline void +fy_reader_get_mark(struct fy_reader *fyr, struct fy_mark *fym) +{ + assert(fyr); + fym->input_pos = fyr->current_input_pos; + fym->line = fyr->line; + fym->column = fyr->column; +} + +static FY_ALWAYS_INLINE inline const void * +fy_reader_ptr(struct fy_reader *fyr, size_t *leftp) +{ + if (fyr->current_ptr) { + if (leftp) + *leftp = fyr->current_left; + return fyr->current_ptr; + } + + return fy_reader_ptr_slow_path(fyr, leftp); +} + +static FY_ALWAYS_INLINE inline bool +fy_reader_json_mode(const struct fy_reader *fyr) +{ + assert(fyr); + return fyr->json_mode; +} + +static FY_ALWAYS_INLINE inline enum fy_lb_mode +fy_reader_lb_mode(const struct fy_reader *fyr) +{ + assert(fyr); + return fyr->lb_mode; +} + +static FY_ALWAYS_INLINE inline enum fy_flow_ws_mode +fy_reader_flow_ws_mode(const struct fy_reader *fyr) +{ + assert(fyr); + return fyr->fws_mode; +} + +static FY_ALWAYS_INLINE inline bool +fy_reader_is_lb(const struct fy_reader *fyr, int c) +{ + return fy_is_lb_m(c, fy_reader_lb_mode(fyr)); +} + +static FY_ALWAYS_INLINE inline bool +fy_reader_is_lbz(const struct fy_reader *fyr, int c) +{ + return fy_is_lbz_m(c, fy_reader_lb_mode(fyr)); +} + +static FY_ALWAYS_INLINE inline bool +fy_reader_is_blankz(const struct fy_reader *fyr, int c) +{ + return fy_is_blankz_m(c, fy_reader_lb_mode(fyr)); +} + +static FY_ALWAYS_INLINE inline bool +fy_reader_is_generic_lb(const struct fy_reader *fyr, int c) +{ + return fy_is_generic_lb_m(c, fy_reader_lb_mode(fyr)); +} + +static FY_ALWAYS_INLINE inline bool +fy_reader_is_generic_lbz(const struct fy_reader *fyr, int c) +{ + return fy_is_generic_lbz_m(c, fy_reader_lb_mode(fyr)); +} + +static FY_ALWAYS_INLINE inline bool +fy_reader_is_generic_blankz(const struct fy_reader *fyr, int c) +{ + return fy_is_generic_blankz_m(c, fy_reader_lb_mode(fyr)); +} + +static FY_ALWAYS_INLINE inline bool +fy_reader_is_flow_ws(const struct fy_reader *fyr, int c) +{ + return fy_is_flow_ws_m(c, fy_reader_flow_ws_mode(fyr)); +} + +static FY_ALWAYS_INLINE inline bool +fy_reader_is_flow_blank(const struct fy_reader *fyr, int c) +{ + return fy_reader_is_flow_ws(fyr, c); /* same */ +} + +static FY_ALWAYS_INLINE inline bool +fy_reader_is_flow_blankz(const struct fy_reader *fyr, int c) +{ + return fy_is_flow_ws_m(c, fy_reader_flow_ws_mode(fyr)) || + fy_is_generic_lbz_m(c, fy_reader_lb_mode(fyr)); +} + +static FY_ALWAYS_INLINE inline const void * +fy_reader_ensure_lookahead(struct fy_reader *fyr, size_t size, size_t *leftp) +{ + if (fyr->current_ptr && fyr->current_left >= size) { + if (leftp) + *leftp = fyr->current_left; + return fyr->current_ptr; + } + return fy_reader_ensure_lookahead_slow_path(fyr, size, leftp); +} + +/* compare string at the current point (n max) */ +static inline int +fy_reader_strncmp(struct fy_reader *fyr, const char *str, size_t n) +{ + const char *p; + int ret; + + assert(fyr); + p = fy_reader_ensure_lookahead(fyr, n, NULL); + if (!p) + return -1; + ret = strncmp(p, str, n); + return ret ? 1 : 0; +} + +static FY_ALWAYS_INLINE inline int +fy_reader_peek_at_offset(struct fy_reader *fyr, size_t offset) +{ + const uint8_t *p; + size_t left; + int w; + + assert(fyr); + if (offset == 0 && fyr->current_c >= 0) + return fyr->current_c; + + /* ensure that the first octet at least is pulled in */ + p = fy_reader_ensure_lookahead(fyr, offset + 1, &left); + if (!p) + return FYUG_EOF; + + /* get width by first octet */ + w = fy_utf8_width_by_first_octet(p[offset]); + if (!w) + return FYUG_INV; + + /* make sure that there's enough to cover the utf8 width */ + if (offset + w > left) { + p = fy_reader_ensure_lookahead(fyr, offset + w, &left); + if (!p) + return FYUG_PARTIAL; + } + + return fy_utf8_get(p + offset, left - offset, &w); +} + +static FY_ALWAYS_INLINE inline int +fy_reader_peek_at_internal(struct fy_reader *fyr, int pos, ssize_t *offsetp) +{ + int i, c; + size_t offset; + + assert(fyr); + if (!offsetp || *offsetp < 0) { + for (i = 0, offset = 0; i < pos; i++, offset += fy_utf8_width(c)) { + c = fy_reader_peek_at_offset(fyr, offset); + if (c < 0) + return c; + } + } else + offset = (size_t)*offsetp; + + c = fy_reader_peek_at_offset(fyr, offset); + + if (offsetp) + *offsetp = offset + fy_utf8_width(c); + + return c; +} + +static FY_ALWAYS_INLINE inline bool +fy_reader_is_blank_at_offset(struct fy_reader *fyr, size_t offset) +{ + return fy_is_blank(fy_reader_peek_at_offset(fyr, offset)); +} + +static FY_ALWAYS_INLINE inline bool +fy_reader_is_blankz_at_offset(struct fy_reader *fyr, size_t offset) +{ + return fy_reader_is_blankz(fyr, fy_reader_peek_at_offset(fyr, offset)); +} + +static FY_ALWAYS_INLINE inline int +fy_reader_peek_at(struct fy_reader *fyr, int pos) +{ + return fy_reader_peek_at_internal(fyr, pos, NULL); +} + +static FY_ALWAYS_INLINE inline int +fy_reader_peek(struct fy_reader *fyr) +{ + if (fyr->current_c >= 0) + return fyr->current_c; + + return fy_reader_peek_at_offset(fyr, 0); +} + +static FY_ALWAYS_INLINE inline const void * +fy_reader_peek_block(struct fy_reader *fyr, size_t *lenp) +{ + const void *p; + + /* try to pull at least one utf8 character usually */ + p = fy_reader_ensure_lookahead(fyr, 4, lenp); + + /* not a utf8 character available? try a single byte */ + if (!p) + p = fy_reader_ensure_lookahead(fyr, 1, lenp); + if (!*lenp) + p = NULL; + return p; +} + +static FY_ALWAYS_INLINE inline void +fy_reader_advance_octets(struct fy_reader *fyr, size_t advance) +{ + assert(fyr); + assert(fyr->current_left >= advance); + + fyr->current_input_pos += advance; + fyr->current_ptr = (char *)fyr->current_ptr + advance; + fyr->current_left -= advance; + + fyr->current_c = fy_utf8_get(fyr->current_ptr, fyr->current_left, &fyr->current_w); +} + +void fy_reader_advance_slow_path(struct fy_reader *fyr, int c); + +static FY_ALWAYS_INLINE inline void +fy_reader_advance_printable_ascii(struct fy_reader *fyr, int c) +{ + assert(fyr); + fy_reader_advance_octets(fyr, 1); + fyr->column++; +} + +static FY_ALWAYS_INLINE inline void +fy_reader_advance(struct fy_reader *fyr, int c) +{ + if (fy_utf8_is_printable_ascii(c)) + fy_reader_advance_printable_ascii(fyr, c); + else + fy_reader_advance_slow_path(fyr, c); +} + +static FY_ALWAYS_INLINE inline void +fy_reader_advance_ws(struct fy_reader *fyr, int c) +{ + /* skip this character */ + fy_reader_advance_octets(fyr, fy_utf8_width(c)); + + if (fyr->tabsize && fy_is_tab(c)) + fyr->column += (fyr->tabsize - (fyr->column % fyr->tabsize)); + else + fyr->column++; +} + +static FY_ALWAYS_INLINE inline void +fy_reader_advance_space(struct fy_reader *fyr) +{ + fy_reader_advance_octets(fyr, 1); + fyr->column++; +} + +static FY_ALWAYS_INLINE inline int +fy_reader_get(struct fy_reader *fyr) +{ + int value; + + value = fy_reader_peek(fyr); + if (value < 0) + return value; + + fy_reader_advance(fyr, value); + + return value; +} + +static FY_ALWAYS_INLINE inline int +fy_reader_advance_by(struct fy_reader *fyr, int count) +{ + int i, c; + + for (i = 0; i < count; i++) { + c = fy_reader_get(fyr); + if (c < 0) + break; + } + return i ? i : -1; +} + +/* compare string at the current point */ +static inline bool +fy_reader_strcmp(struct fy_reader *fyr, const char *str) +{ + return fy_reader_strncmp(fyr, str, strlen(str)); +} + +#endif diff --git a/contrib/libs/libfyaml/src/lib/fy-list.h b/contrib/libs/libfyaml/src/lib/fy-list.h new file mode 100644 index 0000000000..df35cb28e0 --- /dev/null +++ b/contrib/libs/libfyaml/src/lib/fy-list.h @@ -0,0 +1,79 @@ +/* + * fy-list.h - simple doubly linked list implementation + * + * Copyright (c) 2022 Innokentii Mokin <iam@justregular.dev> + * + * SPDX-License-Identifier: MIT + */ +#ifndef FY_LIST_H +#define FY_LIST_H + +#include <stddef.h> + +#define fy_container_of(ptr, type, member) \ + ( (void)sizeof(0 ? (ptr) : &((type *)0)->member), \ + (type *)((char*)(ptr) - offsetof(type, member)) ) + +struct fy_list_head { + struct fy_list_head *prev; + struct fy_list_head *next; +}; + +static inline void fy_list_init_head(struct fy_list_head *lh) +{ + lh->prev = lh; + lh->next = lh; +} + +static inline void fy_list_add_head(struct fy_list_head *ln, struct fy_list_head *lh) +{ + struct fy_list_head *second = lh->next; + + second->prev = ln; + ln->next = second; + lh->next = ln; + ln->prev = lh; +} + +static inline void fy_list_add_tail(struct fy_list_head *ln, struct fy_list_head *lh) +{ + struct fy_list_head *tail = lh->prev; + + lh->prev = ln; + ln->next = lh; + tail->next = ln; + ln->prev = tail; +} + +static inline bool fy_list_is_empty(struct fy_list_head *lh) +{ + return lh == lh->next; +} + +static inline bool fy_list_is_singular(struct fy_list_head *lh) +{ + return lh != lh->next && lh == lh->next->next; +} + +static inline void fy_list_del(struct fy_list_head *ln) { + ln->prev->next = ln->next; + ln->next->prev = ln->prev; + ln->prev = NULL; + ln->next = NULL; +} + +static inline void fy_list_splice(struct fy_list_head *nlh, struct fy_list_head *lh) { + struct fy_list_head *prev = lh, *next = lh->next, + *head = nlh->next, *tail = nlh->prev; + + if (nlh == nlh->next) { + return; + } + + head->prev = prev; + tail->next = next; + prev->next = head; + next->prev = tail; +} + +#endif diff --git a/contrib/libs/libfyaml/src/lib/fy-parse.c b/contrib/libs/libfyaml/src/lib/fy-parse.c new file mode 100644 index 0000000000..07b5045efc --- /dev/null +++ b/contrib/libs/libfyaml/src/lib/fy-parse.c @@ -0,0 +1,7044 @@ +/* + * fy-parse.c - Internal parse interface + * + * Copyright (c) 2019 Pantelis Antoniou <pantelis.antoniou@konsulko.com> + * + * SPDX-License-Identifier: MIT + */ +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif + +#include <stdio.h> +#include <string.h> +#if defined (__unix__) || (defined (__APPLE__) && defined (__MACH__)) +#include <sys/mman.h> +#endif +#include <sys/types.h> +#include <sys/stat.h> +#if defined (__unix__) || (defined (__APPLE__) && defined (__MACH__)) +#include <sys/ioctl.h> +#endif +#include <fcntl.h> +#if defined (__unix__) || (defined (__APPLE__) && defined (__MACH__)) +#include <unistd.h> +#endif +#include <assert.h> +#include <stdlib.h> +#include <errno.h> +#include <stdarg.h> +#include <limits.h> + +#include <libfyaml.h> + +#include "fy-parse.h" + +#include "fy-utils.h" + +/* only check atom sizes on debug */ +#ifndef NDEBUG +#define ATOM_SIZE_CHECK +#endif + +const char *fy_library_version(void) +{ +#ifndef VERSION +#warn No version defined + return "UNKNOWN"; +#else + return VERSION; +#endif +} + +int fy_parse_input_append(struct fy_parser *fyp, const struct fy_input_cfg *fyic) +{ + struct fy_input *fyi = NULL; + + fyi = fy_input_create(fyic); + fyp_error_check(fyp, fyp != NULL, err_out, + "fy_parse_input_create() failed!"); + + fyi->state = FYIS_QUEUED; + fy_input_list_add_tail(&fyp->queued_inputs, fyi); + + return 0; + +err_out: + fy_input_unref(fyi); + return -1; +} + +bool fy_parse_have_more_inputs(struct fy_parser *fyp) +{ + return !fy_input_list_empty(&fyp->queued_inputs); +} + +int fy_parse_get_next_input(struct fy_parser *fyp) +{ + const char *s; + struct fy_reader_input_cfg icfg; + struct fy_input *fyi; + int rc; + bool json_mode; + enum fy_reader_mode rdmode; + + assert(fyp); + + if (fy_reader_current_input(fyp->reader)) { + fyp_scan_debug(fyp, "get next input: already exists"); + return 1; + } + + /* get next queued input */ + fyi = fy_input_list_pop(&fyp->queued_inputs); + + /* none left? we're done */ + if (!fyi) { + fyp_scan_debug(fyp, "get next input: all inputs exhausted"); + return 0; + } + + json_mode = false; + if ((fyp->cfg.flags & (FYPCF_JSON_MASK << FYPCF_JSON_SHIFT)) == FYPCF_JSON_AUTO) { + /* detection only works for filenames (sucks) */ + if (fyi->cfg.type == fyit_file) { + s = fyi->cfg.file.filename; + if (s) + s = strrchr(s, '.'); + json_mode = s && !strcmp(s, ".json"); + } + } else if ((fyp->cfg.flags & (FYPCF_JSON_MASK << FYPCF_JSON_SHIFT)) == FYPCF_JSON_FORCE) + json_mode = true; + + /* set the initial reader mode according to json option and default version */ + if (!json_mode) + rdmode = fy_version_compare(&fyp->default_version, fy_version_make(1, 1)) <= 0 ? fyrm_yaml_1_1 : fyrm_yaml; + else + rdmode = fyrm_json; + + fy_reader_set_mode(fyp->reader, rdmode); + + memset(&icfg, 0, sizeof(icfg)); + icfg.disable_mmap_opt = !!(fyp->cfg.flags & FYPCF_DISABLE_MMAP_OPT); + + rc = fy_reader_input_open(fyp->reader, fyi, &icfg); + fyp_error_check(fyp, !rc, err_out, + "failed to open input"); + + /* take off the reference; reader now owns */ + fy_input_unref(fyi); + + // inherit the JSON mode + if (fyp->current_document_state) + fyp->current_document_state->json_mode = fyp_json_mode(fyp); + + fyp_scan_debug(fyp, "get next input: new input - %s mode", json_mode ? "JSON" : "YAML"); + + return 1; + +err_out: + fy_input_unref(fyi); + return -1; +} + +static inline void +fy_token_queue_epilogue(struct fy_parser *fyp, struct fy_token *fyt) +{ + /* special handling for zero indented scalars */ + fyp->token_activity_counter++; + if (fyt->type == FYTT_DOCUMENT_START) + fyp->document_first_content_token = true; + else if (fyp->document_first_content_token && fy_token_type_is_content(fyt->type)) + fyp->document_first_content_token = false; +} + +static inline struct fy_token * +fy_token_queue_simple_internal(struct fy_parser *fyp, struct fy_token_list *fytl, enum fy_token_type type, int advance_octets) +{ + struct fy_reader *fyr = fyp->reader; + struct fy_token *fyt; + + /* allocate and copy in place */ + fyt = fy_token_alloc_rl(fyp->recycled_token_list); + if (!fyt) + return NULL; + + fyt->type = type; + + /* the advance is always octets */ + fy_reader_fill_atom_start(fyr, &fyt->handle); + if (advance_octets > 0) { + fy_reader_advance_octets(fyr, advance_octets); + fyr->column += advance_octets; + } + fy_reader_fill_atom_end(fyr, &fyt->handle); + + fy_input_ref(fyt->handle.fyi); + + fy_token_list_add_tail(fytl, fyt); + + return fyt; +} + +static inline struct fy_token * +fy_token_queue_simple(struct fy_parser *fyp, struct fy_token_list *fytl, enum fy_token_type type, int advance_octets) +{ + struct fy_token *fyt; + + fyt = fy_token_queue_simple_internal(fyp, fytl, type, advance_octets); + if (!fyt) + return NULL; + + fy_token_queue_epilogue(fyp, fyt); + return fyt; +} + +struct fy_token * +fy_token_vqueue_internal(struct fy_parser *fyp, struct fy_token_list *fytl, + enum fy_token_type type, va_list ap) +{ + struct fy_token *fyt; + + fyt = fy_token_vcreate_rl(fyp->recycled_token_list, type, ap); + if (!fyt) + return NULL; + fy_token_list_add_tail(fytl, fyt); + + fy_token_queue_epilogue(fyp, fyt); + return fyt; +} + +struct fy_token *fy_token_queue_internal(struct fy_parser *fyp, struct fy_token_list *fytl, + enum fy_token_type type, ...) +{ + va_list ap; + struct fy_token *fyt; + + va_start(ap, type); + fyt = fy_token_vqueue_internal(fyp, fytl, type, ap); + va_end(ap); + + return fyt; +} + +struct fy_token *fy_token_vqueue(struct fy_parser *fyp, enum fy_token_type type, va_list ap) +{ + struct fy_token *fyt; + + fyt = fy_token_vqueue_internal(fyp, &fyp->queued_tokens, type, ap); + if (fyt) + fyp->token_activity_counter++; + return fyt; +} + +struct fy_token *fy_token_queue(struct fy_parser *fyp, enum fy_token_type type, ...) +{ + va_list ap; + struct fy_token *fyt; + + va_start(ap, type); + fyt = fy_token_vqueue(fyp, type, ap); + va_end(ap); + + return fyt; +} + +const struct fy_version fy_default_version = { + .major = 1, + .minor = 2 +}; + +int fy_version_compare(const struct fy_version *va, const struct fy_version *vb) +{ + unsigned int vanum, vbnum; + + if (!va) + va = &fy_default_version; + if (!vb) + vb = &fy_default_version; + +#define FY_VERSION_UINT(_major, _minor) \ + ((((unsigned int)(_major) & 0xff) << 8) | ((unsigned int)((_minor) & 0xff))) + + vanum = FY_VERSION_UINT(va->major, va->minor); + vbnum = FY_VERSION_UINT(vb->major, vb->minor); + +#undef FY_VERSION_UINT + + return vanum == vbnum ? 0 : + vanum < vbnum ? -1 : 1; +} + +const struct fy_version * +fy_version_default(void) +{ + return &fy_default_version; +} + +static const struct fy_version * const fy_map_option_to_version[] = { + [FYPCF_DEFAULT_VERSION_AUTO >> FYPCF_DEFAULT_VERSION_SHIFT] = &fy_default_version, + [FYPCF_DEFAULT_VERSION_1_1 >> FYPCF_DEFAULT_VERSION_SHIFT] = fy_version_make(1, 1), + [FYPCF_DEFAULT_VERSION_1_2 >> FYPCF_DEFAULT_VERSION_SHIFT] = fy_version_make(1, 2), + [FYPCF_DEFAULT_VERSION_1_3 >> FYPCF_DEFAULT_VERSION_SHIFT] = fy_version_make(1, 3), +}; + +bool fy_version_is_supported(const struct fy_version *vers) +{ + unsigned int i; + const struct fy_version *vers_chk; + + if (!vers) + return true; /* NULL means default, which is supported */ + + for (i = 0; i < sizeof(fy_map_option_to_version)/sizeof(fy_map_option_to_version[0]); i++) { + + vers_chk = fy_map_option_to_version[i]; + if (!vers_chk) + continue; + + if (fy_version_compare(vers, vers_chk) == 0) + return true; + } + + return false; +} + +static const struct fy_version * +fy_parse_cfg_to_version(enum fy_parse_cfg_flags flags) +{ + unsigned int idx; + + idx = (flags >> FYPCF_DEFAULT_VERSION_SHIFT) & FYPCF_DEFAULT_VERSION_MASK; + + if (idx >= sizeof(fy_map_option_to_version)/sizeof(fy_map_option_to_version[0])) + return NULL; + + return fy_map_option_to_version[idx]; +} + +const struct fy_version *fy_version_supported_iterate(void **prevp) +{ + const struct fy_version * const *versp; + const struct fy_version *vers; + unsigned int idx; + + if (!prevp) + return NULL; + + versp = (const struct fy_version * const *)*prevp; + if (!versp) { + /* we skip over the first (which is the default) */ + versp = fy_map_option_to_version; + } + + versp++; + + idx = versp - fy_map_option_to_version; + if (idx >= sizeof(fy_map_option_to_version)/sizeof(fy_map_option_to_version[0])) + return NULL; + + vers = *versp; + *prevp = (void **)versp; + + return vers; +} + +const struct fy_tag * const fy_default_tags[] = { + &(struct fy_tag) { .handle = "!", .prefix = "!", }, + &(struct fy_tag) { .handle = "!!", .prefix = "tag:yaml.org,2002:", }, + &(struct fy_tag) { .handle = "", .prefix = "", }, + NULL +}; + +bool fy_tag_handle_is_default(const char *handle, size_t handle_size) +{ + int i; + const struct fy_tag *fytag; + + if (handle_size == (size_t)-1) + handle_size = strlen(handle); + + for (i = 0; (fytag = fy_default_tags[i]) != NULL; i++) { + + if (handle_size == strlen(fytag->handle) && + !memcmp(handle, fytag->handle, handle_size)) + return true; + + } + return false; +} + +bool fy_tag_is_default_internal(const char *handle, size_t handle_size, + const char *prefix, size_t prefix_size) +{ + int i; + const struct fy_tag *fytag; + + if (handle_size == (size_t)-1) + handle_size = strlen(handle); + + if (prefix_size == (size_t)-1) + prefix_size = strlen(prefix); + + for (i = 0; (fytag = fy_default_tags[i]) != NULL; i++) { + + if (handle_size == strlen(fytag->handle) && + !memcmp(handle, fytag->handle, handle_size) && + prefix_size == strlen(fytag->prefix) && + !memcmp(prefix, fytag->prefix, prefix_size)) + return true; + + } + return false; +} + +bool fy_document_state_tag_is_default(struct fy_document_state *fyds, const struct fy_tag *tag) +{ + struct fy_token *fyt_td; + + /* default tag, but it might be overriden */ + fyt_td = fy_document_state_lookup_tag_directive(fyds, tag->handle, strlen(tag->handle)); + if (!fyt_td) + return false; /* Huh? */ + + return fyt_td->tag_directive.is_default; +} + +bool fy_token_tag_directive_is_overridable(struct fy_token *fyt_td) +{ + const struct fy_tag *fytag; + const char *handle, *prefix; + size_t handle_size, prefix_size; + int i; + + if (!fyt_td) + return false; + + handle = fy_tag_directive_token_handle(fyt_td, &handle_size); + prefix = fy_tag_directive_token_prefix(fyt_td, &prefix_size); + if (!handle || !prefix) + return false; + + for (i = 0; (fytag = fy_default_tags[i]) != NULL; i++) { + + if (handle_size == strlen(fytag->handle) && + !memcmp(handle, fytag->handle, handle_size) && + prefix_size == strlen(fytag->prefix) && + !memcmp(prefix, fytag->prefix, prefix_size)) + return true; + + } + return false; +} + +int fy_reset_document_state(struct fy_parser *fyp) +{ + struct fy_document_state *fyds_new = NULL; + + fyp_scan_debug(fyp, "resetting document state"); + + if (!fyp->default_document_state) { + fyds_new = fy_document_state_default(&fyp->default_version, NULL); + fyp_error_check(fyp, fyds_new, err_out, + "fy_document_state_default() failed"); + } else { + fyds_new = fy_document_state_copy(fyp->default_document_state); + fyp_error_check(fyp, fyds_new, err_out, + "fy_document_state_copy() failed"); + } + // inherit the JSON mode + fyds_new->json_mode = fyp_json_mode(fyp); + + if (fyp->current_document_state) + fy_document_state_unref(fyp->current_document_state); + fyp->current_document_state = fyds_new; + + /* TODO check when cleaning flow lists */ + fyp->flow_level = 0; + fyp->flow = FYFT_NONE; + fy_parse_flow_list_recycle_all(fyp, &fyp->flow_stack); + + return 0; + +err_out: + return -1; +} + +int fy_parser_set_default_document_state(struct fy_parser *fyp, + struct fy_document_state *fyds) +{ + if (!fyp) + return -1; + + /* only in a safe state */ + if (fyp->state != FYPS_NONE && fyp->state != FYPS_END) + return -1; + + if (fyp->default_document_state != fyds) { + if (fyp->default_document_state) { + fy_document_state_unref(fyp->default_document_state); + fyp->default_document_state = NULL; + } + + if (fyds) + fyp->default_document_state = fy_document_state_ref(fyds); + } + + fy_reset_document_state(fyp); + + return 0; +} + +void fy_parser_set_next_single_document(struct fy_parser *fyp) +{ + if (!fyp) + return; + + fyp->next_single_document = true; +} + +int fy_check_document_version(struct fy_parser *fyp) +{ + int major, minor; + + major = fyp->current_document_state->version.major; + minor = fyp->current_document_state->version.minor; + + /* we only support YAML version 1.x */ + if (major == 1) { + /* 1.1 is supported without warnings */ + if (minor == 1) + goto ok; + + if (minor == 2 || minor == 3) + goto experimental; + } + + return -1; + +experimental: + fyp_scan_debug(fyp, "Experimental support for version %d.%d", + major, minor); +ok: + return 0; +} + +int fy_parse_version_directive(struct fy_parser *fyp, struct fy_token *fyt, bool scan_mode) +{ + struct fy_document_state *fyds; + const char *vs; + size_t vs_len; + char *vs0; + char *s, *e; + long v; + int rc; + + fyp_error_check(fyp, fyt && fyt->type == FYTT_VERSION_DIRECTIVE, err_out, + "illegal token (or missing) version directive token"); + + fyds = fyp->current_document_state; + fyp_error_check(fyp, fyds, err_out, + "no current document state error"); + + if (!scan_mode) { + FYP_TOKEN_ERROR_CHECK(fyp, fyt, FYEM_PARSE, + !fyds->fyt_vd, err_out, + "duplicate version directive"); + } else { + /* in scan mode, we just override everything */ + fy_token_unref_rl(fyp->recycled_token_list, fyds->fyt_vd); + fyds->fyt_vd = NULL; + } + + /* version directive of the form: MAJ.MIN */ + vs = fy_token_get_text(fyt, &vs_len); + fyp_error_check(fyp, vs, err_out, + "fy_token_get_text() failed"); + vs0 = FY_ALLOCA(vs_len + 1); + memcpy(vs0, vs, vs_len); + vs0[vs_len] = '\0'; + + /* parse version numbers */ + v = strtol(vs0, &e, 10); + fyp_error_check(fyp, e > vs0 && v >= 0 && v <= INT_MAX, err_out, + "illegal major version number (%s)", vs0); + fyp_error_check(fyp, *e == '.', err_out, + "illegal version separator"); + fyds->version.major = (int)v; + + s = e + 1; + v = strtol(s, &e, 10); + fyp_error_check(fyp, e > s && v >= 0 && v <= INT_MAX, err_out, + "illegal minor version number"); + fyp_error_check(fyp, *e == '\0', err_out, + "garbage after version number"); + fyds->version.minor = (int)v; + + fyp_scan_debug(fyp, "document parsed YAML version: %d.%d", + fyds->version.major, + fyds->version.minor); + + rc = fy_check_document_version(fyp); + fyp_error_check(fyp, !rc, err_out_rc, + "unsupport version number %d.%d", + fyds->version.major, + fyds->version.minor); + + fyds->version_explicit = true; + fyds->fyt_vd = fyt; + + return 0; +err_out: + rc = -1; +err_out_rc: + fy_token_unref_rl(fyp->recycled_token_list, fyt); + return rc; +} + +int fy_parse_tag_directive(struct fy_parser *fyp, struct fy_token *fyt, bool scan_mode) +{ + struct fy_document_state *fyds; + struct fy_token *fyt_td; + const char *handle, *prefix; + size_t handle_size, prefix_size; + bool can_override; + + fyds = fyp->current_document_state; + fyp_error_check(fyp, fyds, err_out, + "no current document state error"); + + handle = fy_tag_directive_token_handle(fyt, &handle_size); + fyp_error_check(fyp, handle, err_out, + "bad tag directive token (handle)"); + + prefix = fy_tag_directive_token_prefix(fyt, &prefix_size); + fyp_error_check(fyp, prefix, err_out, + "bad tag directive token (prefix)"); + + fyt_td = fy_document_state_lookup_tag_directive(fyds, handle, handle_size); + + can_override = fyt_td && (fy_token_tag_directive_is_overridable(fyt_td) || scan_mode); + + FYP_TOKEN_ERROR_CHECK(fyp, fyt, FYEM_PARSE, + !fyt_td || can_override, err_out, + "duplicate tag directive"); + + if (fyt_td) { + /* fyp_notice(fyp, "overriding tag"); */ + fy_token_list_del(&fyds->fyt_td, fyt_td); + fy_token_unref_rl(fyp->recycled_token_list, fyt_td); + /* when we override a default tag the tags are explicit */ + fyds->tags_explicit = true; + } + + fy_token_list_add_tail(&fyds->fyt_td, fyt); + fyt = NULL; + + fyp_scan_debug(fyp, "document parsed tag directive with handle=%.*s", + (int)handle_size, handle); + + if (!fy_tag_is_default_internal(handle, handle_size, prefix, prefix_size)) + fyds->tags_explicit = true; + + return 0; +err_out: + fy_token_unref_rl(fyp->recycled_token_list, fyt); + return -1; +} + +static const struct fy_parse_cfg default_parse_cfg = { + .flags = FYPCF_DEFAULT_PARSE, +}; + +static struct fy_diag *fy_parser_reader_get_diag(struct fy_reader *fyr) +{ + struct fy_parser *fyp = fy_container_of(fyr, struct fy_parser, builtin_reader); + return fyp->diag; +} + +static int fy_parser_reader_file_open(struct fy_reader *fyr, const char *name) +{ + struct fy_parser *fyp = fy_container_of(fyr, struct fy_parser, builtin_reader); + char *sp, *s, *e, *t, *newp; + size_t len, maxlen; + int fd; + + if (!fyp || !name || name[0] == '\0') + return -1; + + /* for a full path, or no search path, open directly */ + if (name[0] == '/' || !fyp->cfg.search_path || !fyp->cfg.search_path[0]) { + fd = open(name, O_RDONLY); + if (fd == -1) + fyp_scan_debug(fyp, "failed to open file %s\n", name); + else + fyp_scan_debug(fyp, "opened file %s\n", name); + return fd; + } + + len = strlen(fyp->cfg.search_path); + sp = FY_ALLOCA(len + 1); + memcpy(sp, fyp->cfg.search_path, len + 1); + + /* allocate the maximum possible so that we don't deal with reallocations */ + maxlen = len + 1 + strlen(name); + newp = malloc(maxlen + 1); + if (!newp) + return -1; + + s = sp; + e = sp + strlen(s); + while (s < e) { + /* skip completely empty */ + if (*s == ':') { + s++; + continue; + } + + t = strchr(s, ':'); + if (t) + *t++ = '\0'; + else + t = e; + + len = strlen(s) + 1 + strlen(name) + 1; + snprintf(newp, maxlen, "%s/%s", s, name); + + /* try opening */ + fd = open(newp, O_RDONLY); + if (fd != -1) { + fyp_scan_debug(fyp, "opened file %s at %s", name, newp); + free(newp); + return fd; + } + + s = t; + } + + if (newp) + free(newp); + return -1; +} + +static const struct fy_reader_ops fy_parser_reader_ops = { + .get_diag = fy_parser_reader_get_diag, + .file_open = fy_parser_reader_file_open, +}; + +int fy_parse_setup(struct fy_parser *fyp, const struct fy_parse_cfg *cfg) +{ + struct fy_diag *diag; + struct fy_diag_cfg dcfg; + const struct fy_version *vers; + int rc; + + if (!fyp) + return -1; + + memset(fyp, 0, sizeof(*fyp)); + + diag = cfg ? cfg->diag : NULL; + fyp->cfg = cfg ? *cfg : default_parse_cfg; + + /* supported version? */ + vers = fy_parse_cfg_to_version(fyp->cfg.flags); + if (!vers) + return -1; + + if (!diag) { + fy_diag_cfg_default(&dcfg); + diag = fy_diag_create(&dcfg); + if (!diag) + return -1; + } else + fy_diag_ref(diag); + + fyp->diag = diag; + + fy_reader_setup(&fyp->builtin_reader, &fy_parser_reader_ops); + fyp->reader = &fyp->builtin_reader; + + fyp->default_version = *vers; + + fy_indent_list_init(&fyp->indent_stack); + fy_indent_list_init(&fyp->recycled_indent); + fyp->indent = -2; + fyp->indent_line = -1; + fyp->generated_block_map = false; + fyp->last_was_comma = false; + + fy_simple_key_list_init(&fyp->simple_keys); + fy_simple_key_list_init(&fyp->recycled_simple_key); + + fy_token_list_init(&fyp->queued_tokens); + + fy_input_list_init(&fyp->queued_inputs); + + fyp->state = FYPS_NONE; + fy_parse_state_log_list_init(&fyp->state_stack); + fy_parse_state_log_list_init(&fyp->recycled_parse_state_log); + + fy_eventp_list_init(&fyp->recycled_eventp); + fy_token_list_init(&fyp->recycled_token); + + fy_flow_list_init(&fyp->flow_stack); + fyp->flow = FYFT_NONE; + fy_flow_list_init(&fyp->recycled_flow); + + fyp->pending_complex_key_column = -1; + fyp->last_block_mapping_key_line = -1; + + fyp->suppress_recycling = !!(fyp->cfg.flags & FYPCF_DISABLE_RECYCLING) || + (getenv("FY_VALGRIND") && + !getenv("FY_VALGRIND_RECYCLING")); + + if (fyp->suppress_recycling) + fyp_parse_debug(fyp, "Suppressing recycling"); + + if (!fyp->suppress_recycling) { + fyp->recycled_eventp_list = &fyp->recycled_eventp; + fyp->recycled_token_list = &fyp->recycled_token; + } else { + fyp->recycled_eventp_list = NULL; + fyp->recycled_token_list = NULL; + } + + fyp->current_document_state = NULL; + + rc = fy_reset_document_state(fyp); + fyp_error_check(fyp, !rc, err_out_rc, + "fy_reset_document_state() failed"); + return 0; + +err_out_rc: + return rc; +} + +void fy_parse_cleanup(struct fy_parser *fyp) +{ + struct fy_input *fyi, *fyin; + struct fy_eventp *fyep; + struct fy_token *fyt; + + fy_composer_destroy(fyp->fyc); + fy_document_builder_destroy(fyp->fydb); + + fy_parse_indent_list_recycle_all(fyp, &fyp->indent_stack); + fy_parse_simple_key_list_recycle_all(fyp, &fyp->simple_keys); + fy_token_list_unref_all(&fyp->queued_tokens); + + fy_parse_parse_state_log_list_recycle_all(fyp, &fyp->state_stack); + fy_parse_flow_list_recycle_all(fyp, &fyp->flow_stack); + + fy_token_unref_rl(fyp->recycled_token_list, fyp->stream_end_token); + + fy_document_state_unref(fyp->current_document_state); + fy_document_state_unref(fyp->default_document_state); + + for (fyi = fy_input_list_head(&fyp->queued_inputs); fyi; fyi = fyin) { + fyin = fy_input_next(&fyp->queued_inputs, fyi); + fy_input_unref(fyi); + } + + /* clean the builtin reader */ + fy_reader_cleanup(&fyp->builtin_reader); + + /* and vacuum (free everything) */ + fy_parse_indent_vacuum(fyp); + fy_parse_simple_key_vacuum(fyp); + fy_parse_parse_state_log_vacuum(fyp); + fy_parse_flow_vacuum(fyp); + + /* free the recycled events */ + while ((fyep = fy_eventp_list_pop(&fyp->recycled_eventp)) != NULL) { + /* catch double recycles */ + /* assert(fy_eventp_list_head(&fyp->recycled_eventp)!= fyep); */ + fy_eventp_free(fyep); + } + + /* and the recycled tokens */ + while ((fyt = fy_token_list_pop(&fyp->recycled_token)) != NULL) + fy_token_free(fyt); + + fy_diag_unref(fyp->diag); +} + +static const char *state_txt[] __FY_DEBUG_UNUSED__ = { + [FYPS_NONE] = "NONE", + [FYPS_STREAM_START] = "STREAM_START", + [FYPS_IMPLICIT_DOCUMENT_START] = "IMPLICIT_DOCUMENT_START", + [FYPS_DOCUMENT_START] = "DOCUMENT_START", + [FYPS_DOCUMENT_CONTENT] = "DOCUMENT_CONTENT", + [FYPS_DOCUMENT_END] = "DOCUMENT_END", + [FYPS_BLOCK_NODE] = "BLOCK_NODE", + [FYPS_BLOCK_SEQUENCE_FIRST_ENTRY] = "BLOCK_SEQUENCE_FIRST_ENTRY", + [FYPS_BLOCK_SEQUENCE_ENTRY] = "BLOCK_SEQUENCE_ENTRY", + [FYPS_INDENTLESS_SEQUENCE_ENTRY] = "INDENTLESS_SEQUENCE_ENTRY", + [FYPS_BLOCK_MAPPING_FIRST_KEY] = "BLOCK_MAPPING_FIRST_KEY", + [FYPS_BLOCK_MAPPING_KEY] = "BLOCK_MAPPING_KEY", + [FYPS_BLOCK_MAPPING_VALUE] = "BLOCK_MAPPING_VALUE", + [FYPS_FLOW_SEQUENCE_FIRST_ENTRY] = "FLOW_SEQUENCE_FIRST_ENTRY", + [FYPS_FLOW_SEQUENCE_ENTRY] = "FLOW_SEQUENCE_ENTRY", + [FYPS_FLOW_SEQUENCE_ENTRY_MAPPING_KEY] = "FLOW_SEQUENCE_ENTRY_MAPPING_KEY", + [FYPS_FLOW_SEQUENCE_ENTRY_MAPPING_VALUE] = "FLOW_SEQUENCE_ENTRY_MAPPING_VALUE", + [FYPS_FLOW_SEQUENCE_ENTRY_MAPPING_END] = "FLOW_SEQUENCE_ENTRY_MAPPING_END", + [FYPS_FLOW_MAPPING_FIRST_KEY] = "FLOW_MAPPING_FIRST_KEY", + [FYPS_FLOW_MAPPING_KEY] = "FLOW_MAPPING_KEY", + [FYPS_FLOW_MAPPING_VALUE] = "FLOW_MAPPING_VALUE", + [FYPS_FLOW_MAPPING_EMPTY_VALUE] = "FLOW_MAPPING_EMPTY_VALUE", + [FYPS_SINGLE_DOCUMENT_END] = "SINGLE_DOCUMENT_END", + [FYPS_END] = "END" +}; + +int fy_scan_comment(struct fy_parser *fyp, struct fy_atom *handle, bool single_line) +{ + int c, column, start_column, lines, scan_ahead; + bool has_ws; + + c = fy_parse_peek(fyp); + if (c != '#') + return -1; + + /* if it's no comment parsing is enabled just consume it */ + if (!(fyp->cfg.flags & FYPCF_PARSE_COMMENTS) || !handle) { + fy_advance(fyp, c); + while (!(fyp_is_lbz(fyp, c = fy_parse_peek(fyp)))) + fy_advance(fyp, c); + return 0; + } + + if (handle->fyi) + fy_input_unref(handle->fyi); + + memset(handle, 0, sizeof(*handle)); + + fy_fill_atom_start(fyp, handle); + + lines = 0; + start_column = fyp_column(fyp); + column = fyp_column(fyp); + scan_ahead = 0; + + has_ws = false; + + /* continuation must be a # on the same column */ + while (c == '#' && column == start_column) { + + lines++; + if (c == '#') { + /* chomp until line break */ + fy_advance(fyp, c); + while (!(fyp_is_lbz(fyp, c = fy_parse_peek(fyp)))) { + if (fy_is_ws(c)) + has_ws = true; + fy_advance(fyp, c); + } + + /* end of input break */ + if (fy_is_z(c)) + break; + } + + if (fy_is_ws(c)) + has_ws = true; + + if (!fyp_is_lb(fyp, c)) + break; + + column = 0; + + scan_ahead = 1; /* skipping over lb */ + while (fy_is_blank(c = fy_parse_peek_at(fyp, scan_ahead))) { + scan_ahead++; + column++; + } + + if (fy_is_z(c) || single_line) + break; + + if (c == '#' && column == start_column) { + fy_advance_by(fyp, scan_ahead); + c = fy_parse_peek(fyp); + } + } + + fy_fill_atom_end(fyp, handle); + handle->style = FYAS_COMMENT; + handle->direct_output = false; + handle->storage_hint = 0; + handle->storage_hint_valid = false; + handle->empty = false; + handle->has_lb = true; + handle->has_ws = has_ws; + handle->starts_with_ws = false; /* no-one cares for those */ + handle->starts_with_lb = false; + handle->ends_with_ws = false; + handle->ends_with_lb = false; + handle->trailing_lb = false; + handle->size0 = lines > 0; + handle->valid_anchor = false; + + /* and take the ref */ + fy_input_ref(handle->fyi); + + return 0; +} + +int fy_attach_comments_if_any(struct fy_parser *fyp, struct fy_token *fyt) +{ + struct fy_atom *handle; + struct fy_mark fym; + int c, rc; + + if (!fyp || !fyt) + return -1; + + if (!(fyp->cfg.flags & FYPCF_PARSE_COMMENTS)) + return 0; + + /* if a last comment exists and is valid */ + if (fy_atom_is_set(&fyp->last_comment) && + (handle = fy_token_comment_handle(fyt, fycp_top, true)) != NULL) { + assert (!fy_atom_is_set(handle)); + *handle = fyp->last_comment; + /* erase last comment */ + fy_atom_reset(&fyp->last_comment); + } + + /* right hand comment */ + + /* skip white space */ + while (fy_is_ws(c = fy_parse_peek(fyp))) + fy_advance(fyp, c); + + if (c == '#') { + fy_get_mark(fyp, &fym); + + /* it's a right comment only if it's on the same line */ + if (fym.line == fyt->handle.end_mark.line) + handle = fy_token_comment_handle(fyt, fycp_right, true); + else + handle = &fyp->last_comment; /* otherwise, last comment */ + + rc = fy_scan_comment(fyp, handle, false); + fyp_error_check(fyp, !rc, err_out_rc, + "fy_scan_comment() failed"); + } + return 0; + +err_out_rc: + return rc; +} + +int fy_scan_to_next_token(struct fy_parser *fyp) +{ + int c, c_after_ws, i, rc = 0; + bool tabs_allowed, sloppy_flow, no_indent; + ssize_t offset; + struct fy_atom *handle; + struct fy_reader *fyr; + char *str; + + fyr = fyp->reader; + + rc = fy_reader_input_scan_token_mark(fyr); + fyp_error_check(fyp, !rc, err_out_rc, + "fy_reader_input_scan_token_mark() failed"); + + /* skip BOM at the start of the stream */ + if (fyr->current_input_pos == 0 && (c = fy_parse_peek(fyp)) == FY_UTF8_BOM) { + + fy_advance(fyp, c); + /* reset column */ + fyr->column = 0; + } + + /* json does not have comments or tab handling... */ + if (fyp_json_mode(fyp)) { + fy_reader_skip_ws_cr_nl(fyr); + goto done; + } + + tabs_allowed = fyp->flow_level > 0 || !fyp->simple_key_allowed || fyp_tabsize(fyp) > 0; + sloppy_flow = fyp->flow_level > 0 && (fyp->cfg.flags & FYPCF_SLOPPY_FLOW_INDENTATION); + + for (;;) { + + /* skip white space, tabs are allowed in flow context */ + /* tabs also allowed in block context but not at start of line or after -?: */ + + /* if we're not in sloppy flow indent mode, a tab may not be used as indentation */ + if (!sloppy_flow) { + // fyp_notice(fyp, "not sloppy flow check c='%c' col=%d indent=%d\n", fy_parse_peek(fyp), fyp_column(fyp), fyp->indent); + c = -1; + while (fyp_column(fyp) <= fyp->indent && fy_is_ws(c = fy_parse_peek(fyp))) { + if (fy_is_tab(c)) + break; + fy_advance(fyp, c); + } + + /* it's an error, only if it is used for intentation */ + /* comments and empty lines are OK */ + if (fy_is_tab(c)) { + + /* skip all space and tabs */ + i = 0; + offset = -1; + while (fy_is_ws(c_after_ws = fy_parse_peek_at_internal(fyp, i, &offset))) + i++; + + no_indent = c_after_ws == '#' || fyp_is_lb(fyp, c_after_ws); + + FYP_PARSE_ERROR_CHECK(fyp, 0, 1, FYEM_SCAN, + no_indent, err_out, + "tab character may not be used as indentation"); + + /* advance by that amount */ + fy_advance_by(fyp, i); + } + } + + if (!tabs_allowed) { + /* skip space only */ + fy_reader_skip_space(fyr); + c = fy_parse_peek(fyp); + + /* it's a tab, here we go */ + if (fy_is_tab(c)) { + + /* we need to see if after ws follows a flow start marker */ + + /* skip all space and tabs */ + i = 0; + offset = -1; + while (fy_is_ws(c_after_ws = fy_parse_peek_at_internal(fyp, i, &offset))) + i++; + + /* flow start marker after spaces? allow tabs */ + if (c_after_ws == '{' || c_after_ws == '[') { + fy_advance_by(fyp, i); + c = fy_parse_peek(fyp); + } + } + } else { + fy_reader_skip_ws(fyr); + c = fy_parse_peek(fyp); + } + + /* comment? */ + if (c == '#') { + + handle = NULL; + if (fyp->cfg.flags & FYPCF_PARSE_COMMENTS) + handle = &fyp->last_comment; + + rc = fy_scan_comment(fyp, handle, false); + fyp_error_check(fyp, !rc, err_out_rc, + "fy_scan_comment() failed"); + + tabs_allowed = (fyp->flow_level || !fyp->simple_key_allowed) || fyp_tabsize(fyp); + } + + c = fy_parse_peek(fyp); + + /* not linebreak? we're done */ + if (!fyp_is_lb(fyp, c)) + goto done; + + /* line break */ + fy_advance(fyp, c); + + /* may start simple key (in block ctx) */ + if (!fyp->flow_level && !fyp->simple_key_allowed) { + fyp->simple_key_allowed = true; + fyp->tab_used_for_ws = false; + tabs_allowed = fyp->flow_level || !fyp->simple_key_allowed || fyp_tabsize(fyp); + fyp_scan_debug(fyp, "simple_key_allowed -> %s\n", fyp->simple_key_allowed ? "true" : "false"); + } + } + + fyp_scan_debug(fyp, "%s: no-next-token", __func__); + return 0; + +err_out: + rc = -1; +err_out_rc: + return rc; + +done: + rc = fy_reader_input_scan_token_mark(fyr); + fyp_error_check(fyp, !rc, err_out_rc, + "fy_reader_input_scan_token_mark() failed"); + + fy_utf8_format_a(fy_parse_peek(fyp), fyue_singlequote, &str); + fyp_scan_debug(fyp, "%s: next token starts with c='%s'", __func__, str); + return 0; +} + +static void fy_purge_required_simple_key_report(struct fy_parser *fyp, + struct fy_token *fyt, enum fy_token_type next_type) +{ + bool is_anchor, is_tag; + + is_anchor = fyt && fyt->type == FYTT_ANCHOR; + is_tag = fyt && fyt->type == FYTT_TAG; + + if (is_anchor || is_tag) { + if ((fyp->state == FYPS_BLOCK_MAPPING_VALUE || + fyp->state == FYPS_BLOCK_MAPPING_FIRST_KEY) && + next_type == FYTT_BLOCK_ENTRY) { + + FYP_TOKEN_ERROR(fyp, fyt, FYEM_SCAN, + "invalid %s indent for sequence", + is_anchor ? "anchor" : "tag"); + return; + } + + if (fyp->state == FYPS_BLOCK_MAPPING_VALUE && next_type == FYTT_SCALAR) { + FYP_TOKEN_ERROR(fyp, fyt, FYEM_SCAN, + "invalid %s indent for mapping", + is_anchor ? "anchor" : "tag"); + return; + } + } + + if (fyt) + FYP_TOKEN_ERROR(fyp, fyt, FYEM_SCAN, + "could not find expected ':'"); + else + FYP_PARSE_ERROR(fyp, 0, 1, FYEM_SCAN, + "could not find expected ':'"); +} + +static inline bool +fy_any_simple_keys(struct fy_parser *fyp) +{ + return !fy_simple_key_list_empty(&fyp->simple_keys); +} + +static int fy_purge_stale_simple_keys(struct fy_parser *fyp, bool *did_purgep, + enum fy_token_type next_type) +{ + struct fy_simple_key *fysk; + bool purge; + int line; + + *did_purgep = false; + while ((fysk = fy_simple_key_list_head(&fyp->simple_keys)) != NULL) { + +#ifdef FY_DEVMODE + fyp_scan_debug(fyp, "purge-check: flow_level=%d fysk->flow_level=%d fysk->mark.line=%d line=%d", + fyp->flow_level, fysk->flow_level, + fysk->mark.line, fyp_line(fyp)); + + fyp_debug_dump_simple_key(fyp, fysk, "purge-check: "); +#endif + + line = fysk->mark.line; + + /* in non-flow context we purge keys that are on different line */ + /* in flow context we purge only those with higher flow level */ + if (!fyp->flow_level) { + purge = fyp_line(fyp) > line; + } else { + purge = fyp->flow_level < fysk->flow_level; + /* also purge implicit complex keys on a different line */ + if (!purge && fysk->implicit_complex) { + purge = fyp_line(fyp) > line; + } + } + + if (!purge) + break; + + if (fysk->required) { + fy_purge_required_simple_key_report(fyp, fysk->token, next_type); + goto err_out; + } + +#ifdef FY_DEVMODE + fyp_debug_dump_simple_key(fyp, fysk, "purging: "); +#endif + + fy_simple_key_list_del(&fyp->simple_keys, fysk); + fy_parse_simple_key_recycle(fyp, fysk); + + *did_purgep = true; + } + + if (*did_purgep && fy_simple_key_list_empty(&fyp->simple_keys)) + fyp_scan_debug(fyp, "(purge) simple key list is now empty!"); + + return 0; + +err_out: + return -1; +} + +int fy_push_indent(struct fy_parser *fyp, int indent, bool generated_block_map, int indent_line) +{ + struct fy_indent *fyit; + + fyit = fy_parse_indent_alloc(fyp); + fyp_error_check(fyp, fyit != NULL, err_out, + "fy_indent_alloc() failed"); + + fyit->indent = fyp->indent; + fyit->indent_line = fyp->indent_line; + fyit->generated_block_map = fyp->generated_block_map; + + /* push */ + fy_indent_list_push(&fyp->indent_stack, fyit); + + /* update current state */ + fyp->parent_indent = fyp->indent; + fyp->indent = indent; + fyp->indent_line = indent_line; + fyp->generated_block_map = generated_block_map; + + fyp_scan_debug(fyp, "push_indent %d -> %d - generated_block_map=%s\n", + fyp->parent_indent, fyp->indent, + fyp->generated_block_map ? "true" : "false"); + + return 0; + +err_out: + return -1; +} + +int fy_pop_indent(struct fy_parser *fyp) +{ + struct fy_indent *fyit; + int prev_indent __FY_DEBUG_UNUSED__; + + fyit = fy_indent_list_pop(&fyp->indent_stack); + if (!fyit) + return -1; + + prev_indent = fyp->indent; + + /* pop the indent and update */ + fyp->indent = fyit->indent; + fyp->generated_block_map = fyit->generated_block_map; + fyp->indent_line = fyit->indent_line; + + /* pop and recycle */ + fy_parse_indent_recycle(fyp, fyit); + + /* update the parent indent */ + fyit = fy_indent_list_head(&fyp->indent_stack); + fyp->parent_indent = fyit ? fyit->indent : -2; + + fyp_scan_debug(fyp, "pop indent %d -> %d (parent %d) - generated_block_map=%s\n", + prev_indent, fyp->indent, fyp->parent_indent, + fyp->generated_block_map ? "true" : "false"); + + return 0; +} + +int fy_parse_unroll_indent(struct fy_parser *fyp, int column) +{ + struct fy_token *fyt; + int rc; + + /* do nothing in flow context */ + if (fyp->flow_level) + return 0; + + /* pop while indentation level greater than argument */ + while (fyp->indent > column) { + + fyp_scan_debug(fyp, "unrolling: %d/%d", fyp->indent, column); + + /* create a block end token */ + fyt = fy_token_queue_simple(fyp, &fyp->queued_tokens, FYTT_BLOCK_END, 0); + fyp_error_check(fyp, fyt, err_out, + "fy_token_queue_simple() failed"); + + rc = fy_pop_indent(fyp); + fyp_error_check(fyp, !rc, err_out, + "fy_pop_indent() failed"); + + /* the ident line has now moved */ + fyp->indent_line = fyp_line(fyp); + } + return 0; +err_out: + return -1; +} + +void fy_remove_all_simple_keys(struct fy_parser *fyp) +{ + struct fy_simple_key *fysk; + + fyp_scan_debug(fyp, "SK: removing all"); + + while ((fysk = fy_simple_key_list_pop(&fyp->simple_keys)) != NULL) + fy_parse_simple_key_recycle(fyp, fysk); + + fyp->simple_key_allowed = true; + fyp->tab_used_for_ws = false; + fyp_scan_debug(fyp, "simple_key_allowed -> %s\n", fyp->simple_key_allowed ? "true" : "false"); +} + +struct fy_simple_key *fy_would_remove_required_simple_key(struct fy_parser *fyp) +{ + struct fy_simple_key *fysk; + + /* no simple key? */ + for (fysk = fy_simple_key_list_head(&fyp->simple_keys); + fysk && fysk->flow_level >= fyp->flow_level; + fysk = fy_simple_key_next(&fyp->simple_keys, fysk)) { + if (fysk->required) + return fysk; + } + + return NULL; +} + +int fy_remove_simple_key(struct fy_parser *fyp, enum fy_token_type next_type) +{ + struct fy_simple_key *fysk; + + /* no simple key? */ + while ((fysk = fy_simple_key_list_first(&fyp->simple_keys)) != NULL && + fysk->flow_level >= fyp->flow_level) { + +#ifdef FY_DEVMODE + fyp_debug_dump_simple_key(fyp, fysk, "removing: "); +#endif + + /* remove it from the list */ + fy_simple_key_list_del(&fyp->simple_keys, fysk); + + if (fysk->required) { + fy_purge_required_simple_key_report(fyp, fysk->token, next_type); + goto err_out; + } + + fy_parse_simple_key_recycle(fyp, fysk); + } + + return 0; + +err_out: + fy_parse_simple_key_recycle(fyp, fysk); + return -1; +} + +struct fy_simple_key *fy_simple_key_find(struct fy_parser *fyp, const struct fy_token *fyt) +{ + struct fy_simple_key *fysk; + + if (!fyt) + return NULL; + + /* no simple key? */ + for (fysk = fy_simple_key_list_head(&fyp->simple_keys); fysk; + fysk = fy_simple_key_next(&fyp->simple_keys, fysk)) + if (fysk->token == fyt) + return fysk; + + return NULL; +} + +int fy_save_simple_key(struct fy_parser *fyp, struct fy_mark *mark, struct fy_mark *end_mark, + struct fy_token *fyt, bool required, int flow_level, + enum fy_token_type next_type) +{ + struct fy_simple_key *fysk; + bool did_purge; + int rc; + + fyp_error_check(fyp, fyt && mark && end_mark, err_out, + "illegal arguments to fy_save_simple_key"); + + if (fy_any_simple_keys(fyp)) { + rc = fy_purge_stale_simple_keys(fyp, &did_purge, next_type); + fyp_error_check(fyp, !rc, err_out_rc, + "fy_purge_stale_simple_keys() failed"); + } + + /* if no simple key is allowed, don't save */ + if (!fyp->simple_key_allowed) { + fyp_scan_debug(fyp, "not saving simple key; not allowed"); + return 0; + } + + /* remove pending complex key mark if in non flow context and a new line */ + if (!fyp->flow_level && fyp->pending_complex_key_column >= 0 && + mark->line > fyp->pending_complex_key_mark.line && + mark->column <= fyp->pending_complex_key_mark.column ) { + + fyp_scan_debug(fyp, "resetting pending_complex_key mark->line=%d line=%d\n", + mark->line, fyp->pending_complex_key_mark.line); + + fyp->pending_complex_key_column = -1; + fyp_scan_debug(fyp, "pending_complex_key_column -> %d", + fyp->pending_complex_key_column); + } + + fysk = fy_simple_key_list_head(&fyp->simple_keys); + + /* create new simple key if it does not exist or if has flow level less */ + if (!fysk || fysk->flow_level < fyp->flow_level) { + + fysk = fy_parse_simple_key_alloc(fyp); + fyp_error_check(fyp, fysk != NULL, err_out, + "fy_simple_key_alloc()"); + + fyp_scan_debug(fyp, "new simple key"); + + fy_simple_key_list_push(&fyp->simple_keys, fysk); + + } else { + fyp_error_check(fyp, !fysk->required, err_out, + "cannot save simple key, top is required"); + + if (fysk == fy_simple_key_list_tail(&fyp->simple_keys)) + fyp_scan_debug(fyp, "(reuse) simple key list is now empty!"); + + fyp_scan_debug(fyp, "reusing simple key"); + + } + + fysk->mark = *mark; + fysk->end_mark = *end_mark; + + fysk->required = required; + fysk->token = fyt; + fysk->flow_level = flow_level; + + /* if this is a an implicit flow collection key */ + fysk->implicit_complex = fyp->pending_complex_key_column < 0 && + (fyt->type == FYTT_FLOW_MAPPING_START || fyt->type == FYTT_FLOW_SEQUENCE_START); + + +#ifdef FY_DEVMODE + fyp_debug_dump_simple_key_list(fyp, &fyp->simple_keys, fysk, "fyp->simple_keys (saved): "); +#endif + + return 0; + +err_out: + rc = -1; +err_out_rc: + return rc; +} + +struct fy_simple_key_mark { + struct fy_mark mark; + bool required; + int flow_level; +}; + +void fy_get_simple_key_mark(struct fy_parser *fyp, struct fy_simple_key_mark *fyskm) +{ + fy_get_mark(fyp, &fyskm->mark); + fyskm->flow_level = fyp->flow_level; + fyskm->required = !fyp->flow_level && fyp->indent == fyp_column(fyp); +} + +int fy_save_simple_key_mark(struct fy_parser *fyp, + struct fy_simple_key_mark *fyskm, + enum fy_token_type next_type, + struct fy_mark *end_markp) +{ + struct fy_mark end_mark; + + if (!end_markp) { + fy_get_mark(fyp, &end_mark); + end_markp = &end_mark; + } + + return fy_save_simple_key(fyp, &fyskm->mark, end_markp, + fy_token_list_last(&fyp->queued_tokens), + fyskm->required, fyskm->flow_level, + next_type); +} + +int fy_parse_flow_push(struct fy_parser *fyp) +{ + struct fy_flow *fyf; + + fyf = fy_parse_flow_alloc(fyp); + fyp_error_check(fyp, fyf != NULL, err_out, + "fy_flow_alloc() failed!"); + fyf->flow = fyp->flow; + + fyf->pending_complex_key_column = fyp->pending_complex_key_column; + fyf->pending_complex_key_mark = fyp->pending_complex_key_mark; + + fyp_scan_debug(fyp, "flow_push: flow=%d pending_complex_key_column=%d", + (int)fyf->flow, + fyf->pending_complex_key_column); + + fy_flow_list_push(&fyp->flow_stack, fyf); + + if (fyp->pending_complex_key_column >= 0) { + fyp->pending_complex_key_column = -1; + fyp_scan_debug(fyp, "pending_complex_key_column -> %d", + fyp->pending_complex_key_column); + } + + return 0; +err_out: + return -1; +} + +int fy_parse_flow_pop(struct fy_parser *fyp) +{ + struct fy_flow *fyf; + + fyf = fy_flow_list_pop(&fyp->flow_stack); + fyp_error_check(fyp, fyf, err_out, + "no flow to pop"); + + fyp->flow = fyf->flow; + fyp->pending_complex_key_column = fyf->pending_complex_key_column; + fyp->pending_complex_key_mark = fyf->pending_complex_key_mark; + + fy_parse_flow_recycle(fyp, fyf); + + fyp_scan_debug(fyp, "flow_pop: flow=%d pending_complex_key_column=%d", + (int)fyp->flow, + fyp->pending_complex_key_column); + + return 0; + +err_out: + return -1; +} + +/* special case for allowing whitespace (including tabs) after -?: */ +static int fy_ws_indentation_check(struct fy_parser *fyp, bool *found_tabp, struct fy_mark *tab_mark) +{ + int c, adv, tab_adv; + bool indentation, found_tab; + + found_tab = false; + + /* not meaning in flow mode */ + if (fyp->flow_level) + goto out; + + /* scan forward, keeping track if we found a tab */ + adv = 0; + tab_adv = -1; + if (tab_mark) + fy_get_mark(fyp, tab_mark); + while (fy_is_ws(c = fy_parse_peek_at(fyp, adv))) { + if (!found_tab && fy_is_tab(c)) { + found_tab = true; + tab_adv = adv; + /* XXX somewhat hacky, space is 1 char only so adjust */ + if (tab_mark) { + tab_mark->input_pos += adv; + tab_mark->column += adv; + } + } + adv++; + } + + if (found_tab) { + indentation = fy_utf8_strchr("?:|>", c) || + (c == '-' && fyp_is_blankz(fyp, fy_parse_peek_at(fyp, adv + 1))); + + /* any kind of block indentation is not allowed */ + FYP_PARSE_ERROR_CHECK(fyp, tab_adv, 1, FYEM_SCAN, + !indentation, err_out, + "cannot use tab for indentation of block entry"); + fy_advance_by(fyp, tab_adv + 1); + } + + /* now chomp spaces only afterwards */ + while (fy_is_space(c = fy_parse_peek(fyp))) + fy_advance(fyp, c); + +out: + if (found_tabp) + *found_tabp = found_tab; + + return 0; + +err_out: + return -1; +} + +int fy_fetch_stream_start(struct fy_parser *fyp) +{ + struct fy_token *fyt; + + /* simple key is allowed */ + fyp->simple_key_allowed = true; + fyp->tab_used_for_ws = false; + fyp_scan_debug(fyp, "simple_key_allowed -> %s\n", fyp->simple_key_allowed ? "true" : "false"); + + fyt = fy_token_queue_simple(fyp, &fyp->queued_tokens, FYTT_STREAM_START, 0); + fyp_error_check(fyp, fyt, err_out, + "fy_token_queue_simple() failed"); + return 0; + +err_out: + return -1; +} + +int fy_fetch_stream_end(struct fy_parser *fyp) +{ + struct fy_token *fyt; + int rc; + + /* only reset the stream in regular mode */ + if (!fyp->parse_flow_only) + fy_reader_stream_end(fyp->reader); + + fy_remove_all_simple_keys(fyp); + + if (fyp_block_mode(fyp)) { + rc = fy_parse_unroll_indent(fyp, -1); + fyp_error_check(fyp, !rc, err_out_rc, + "fy_parse_unroll_indent() failed"); + } + + fyt = fy_token_queue_simple(fyp, &fyp->queued_tokens, FYTT_STREAM_END, 0); + fyp_error_check(fyp, fyt, err_out, + "fy_token_queue_simple() failed"); + + return 0; + +err_out: + rc = -1; +err_out_rc: + return rc; +} + +int fy_scan_tag_uri_length(struct fy_parser *fyp, int start) +{ + int c, cn, length; + ssize_t offset, offset1; + + /* first find the utf8 length of the uri */ + length = 0; + offset = -1; + while (fy_is_uri(c = fy_parse_peek_at_internal(fyp, start + length, &offset))) { + + offset1 = offset; + cn = fy_parse_peek_at_internal(fyp, start + length + 1, &offset1); + + /* special handling for detecting URIs ending in ,}] */ + if (fyp_is_blankz(fyp, cn) && fy_utf8_strchr(",}]", c)) + break; + + length++; + } + + return length; +} + +bool fy_scan_tag_uri_is_valid(struct fy_parser *fyp, int start, int length) +{ + int i, j, k, width, c, w; + uint8_t octet, esc_octets[4]; + ssize_t offset; + + offset = -1; + for (i = 0; i < length; i++) { + c = fy_parse_peek_at_internal(fyp, start + i, &offset); + if (c != '%') + continue; + /* reset cursor */ + offset = -1; + + width = 0; + k = 0; + do { + /* % escape */ + FYP_PARSE_ERROR_CHECK(fyp, start + i, 1, FYEM_SCAN, + (length - i) >= 3, err_out, + "short URI escape"); + + if (width > 0) { + c = fy_parse_peek_at(fyp, start + i); + + FYP_PARSE_ERROR_CHECK(fyp, start + i, 1, FYEM_SCAN, + c == '%', err_out, + "missing URI escape"); + } + + octet = 0; + + for (j = 0; j < 2; j++) { + c = fy_parse_peek_at(fyp, start + i + 1 + j); + + FYP_PARSE_ERROR_CHECK(fyp, start + i + 1 + j, 1, FYEM_SCAN, + fy_is_hex(c), err_out, + "non hex URI escape"); + octet <<= 4; + if (c >= '0' && c <= '9') + octet |= c - '0'; + else if (c >= 'a' && c <= 'f') + octet |= 10 + c - 'a'; + else + octet |= 10 + c - 'A'; + } + if (!width) { + width = fy_utf8_width_by_first_octet(octet); + + FYP_PARSE_ERROR_CHECK(fyp, start + i + 1 + j, 1, FYEM_SCAN, + width >= 1 && width <= 4, err_out, + "bad width for hex URI escape"); + k = 0; + } + esc_octets[k++] = octet; + + /* skip over the 3 character escape */ + i += 3; + + } while (--width > 0); + + /* now convert to utf8 */ + c = fy_utf8_get(esc_octets, k, &w); + + FYP_PARSE_ERROR_CHECK(fyp, start + i, 1 + j, FYEM_SCAN, + c >= 0, err_out, + "bad utf8 URI escape"); + } + return true; +err_out: + return false; +} + +int fy_scan_tag_handle_length(struct fy_parser *fyp, int start) +{ + int c, length, i, width; + ssize_t offset; + uint8_t octet; + bool first, was_esc; + + length = 0; + + offset = -1; + c = fy_parse_peek_at_internal(fyp, start + length, &offset); + + FYP_PARSE_ERROR_CHECK(fyp, start + length, 1, FYEM_SCAN, + c == '!', err_out, + "invalid tag handle start"); + length++; + + /* get first character of the tag */ + c = fy_parse_peek_at_internal(fyp, start + length, &offset); + + if (fy_is_ws(c)) + return length; + + /* if first character is !, empty handle */ + if (c == '!') { + length++; + return length; + } + + first = true; + was_esc = false; + + /* now loop while it's alphanumeric */ + for (;;) { + if (c == '%') { + + octet = 0; + + for (i = 0; i < 2; i++) { + c = fy_parse_peek_at_internal(fyp, start + length + 1 + i, &offset); + FYP_PARSE_ERROR_CHECK(fyp, start + length + 1 + i, 1, FYEM_SCAN, + fy_is_hex(c), err_out, + "non hex URI escape"); + octet <<= 4; + if (c >= '0' && c <= '9') + octet |= c - '0'; + else if (c >= 'a' && c <= 'f') + octet |= 10 + c - 'a'; + else + octet |= 10 + c - 'A'; + } + + width = fy_utf8_width_by_first_octet(octet); + FYP_PARSE_ERROR_CHECK(fyp, start + length, 3, FYEM_SCAN, + width == 1, err_out, + "Illegal non 1 byte utf8 tag handle character"); + c = octet; + was_esc = true; + + } else + was_esc = false; + + if ((first && fy_is_first_alnum(c)) || (!first && fy_is_alnum(c))) + length += was_esc ? 3 : 1; + else + break; + + first = false; + c = fy_parse_peek_at_internal(fyp, start + length, &offset); + } + + /* if last character is !, copy it */ + if (c == '!') + length++; + + return length; + +err_out: + return -1; +} + +int fy_scan_yaml_version(struct fy_parser *fyp, struct fy_version *vers) +{ + int c, length, start_length, num; + ssize_t offset; + + memset(vers, 0, sizeof(*vers)); + + /* now loop while it's numeric */ + length = 0; + offset = -1; + num = 0; + while (fy_is_num(c = fy_parse_peek_at_internal(fyp, length, &offset))) { + length++; + num = num * 10; + num += c - '0'; + } + vers->major = num; + + FYP_PARSE_ERROR_CHECK(fyp, length, 1, FYEM_SCAN, + length > 0, err_out, + "version directive missing major number"); + + FYP_PARSE_ERROR_CHECK(fyp, length, 1, FYEM_SCAN, + c == '.', err_out, + "version directive missing dot separator"); + length++; + + start_length = length; + num = 0; + while (fy_is_num(c = fy_parse_peek_at_internal(fyp, length, &offset))) { + length++; + num = num * 10; + num += c - '0'; + } + vers->minor = num; + + /* note that the version is not checked for validity here */ + + FYP_PARSE_ERROR_CHECK(fyp, length, 1, FYEM_SCAN, + length > start_length, err_out, + "version directive missing minor number"); + + return length; + +err_out: + return -1; +} + +int fy_scan_tag_handle(struct fy_parser *fyp, bool is_directive, + struct fy_atom *handle) +{ + int length; + + length = fy_scan_tag_handle_length(fyp, 0); + fyp_error_check(fyp, length > 0, err_out, + "fy_scan_tag_handle_length() failed"); + + fy_fill_atom(fyp, length, handle); + + return 0; + +err_out: + return -1; +} + + +int fy_scan_tag_uri(struct fy_parser *fyp, bool is_directive, + struct fy_atom *handle) +{ + int length; + bool is_valid; + + length = fy_scan_tag_uri_length(fyp, 0); + fyp_error_check(fyp, length > 0, err_out, + "fy_scan_tag_uri_length() failed"); + + is_valid = fy_scan_tag_uri_is_valid(fyp, 0, length); + fyp_error_check(fyp, is_valid, err_out, + "tag URI is invalid"); + + fy_fill_atom(fyp, length, handle); + handle->style = FYAS_URI; /* this is a URI, need to handle URI escapes */ + + return 0; + +err_out: + return -1; +} + +int fy_scan_directive(struct fy_parser *fyp) +{ + int c, advance, version_length, tag_length, uri_length; + struct fy_version vers; + enum fy_reader_mode rdmode; + enum fy_token_type type = FYTT_NONE; + struct fy_atom handle; + bool is_uri_valid; + struct fy_token *fyt; + int i, lastc; + + if (!fy_parse_strcmp(fyp, "YAML") && fy_is_ws(fy_parse_peek_at(fyp, 4))) { + advance = 5; + type = FYTT_VERSION_DIRECTIVE; + } else if (!fy_parse_strcmp(fyp, "TAG") && fy_is_ws(fy_parse_peek_at(fyp, 3))) { + advance = 4; + type = FYTT_TAG_DIRECTIVE; + } else { + /* skip until linebreak (or #) */ + i = 0; + lastc = -1; + while ((c = fy_parse_peek_at(fyp, i)) != -1 && !fyp_is_lb(fyp, c)) { + if (fy_is_ws(lastc) && c == '#') + break; + lastc = c; + i++; + } + + FYP_PARSE_WARNING(fyp, 0, i, FYEM_SCAN, + "Unsupported directive"); + + if (fy_is_ws(lastc) && c == '#') { + while ((c = fy_parse_peek_at(fyp, i)) != -1 && !fyp_is_lb(fyp, c)) + i++; + } + + fy_advance_by(fyp, i); + + /* skip over linebreak too */ + if (fyp_is_lb(fyp, c)) + fy_advance(fyp, c); + + /* bump activity counter */ + fyp->token_activity_counter++; + + return 0; + } + + fyp_error_check(fyp, type != FYTT_NONE, err_out, + "neither YAML|TAG found"); + + /* advance */ + fy_advance_by(fyp, advance); + + /* skip white space */ + while (fy_is_ws(c = fy_parse_peek(fyp))) + fy_advance(fyp, c); + + fy_fill_atom_start(fyp, &handle); + + /* for version directive, parse it */ + if (type == FYTT_VERSION_DIRECTIVE) { + + version_length = fy_scan_yaml_version(fyp, &vers); + fyp_error_check(fyp, version_length > 0, err_out, + "fy_scan_yaml_version() failed"); + + /* set the reader's mode according to the version just scanned */ + rdmode = fy_version_compare(&vers, fy_version_make(1, 1)) <= 0 ? fyrm_yaml_1_1 : fyrm_yaml; + fy_reader_set_mode(fyp->reader, rdmode); + + fy_advance_by(fyp, version_length); + + fy_fill_atom_end(fyp, &handle); + + fyt = fy_token_queue(fyp, FYTT_VERSION_DIRECTIVE, &handle, &vers); + fyp_error_check(fyp, fyt, err_out, + "fy_token_queue() failed"); + + } else { + + tag_length = fy_scan_tag_handle_length(fyp, 0); + fyp_error_check(fyp, tag_length > 0, err_out, + "fy_scan_tag_handle_length() failed"); + + fy_advance_by(fyp, tag_length); + + c = fy_parse_peek(fyp); + FYP_PARSE_ERROR_CHECK(fyp, 0, 1, FYEM_SCAN, + fy_is_ws(c), err_out, + "missing whitespace after TAG"); + + /* skip white space */ + while (fy_is_ws(c = fy_parse_peek(fyp))) + fy_advance(fyp, c); + + uri_length = fy_scan_tag_uri_length(fyp, 0); + fyp_error_check(fyp, uri_length > 0, err_out, + "fy_scan_tag_uri_length() failed"); + + is_uri_valid = fy_scan_tag_uri_is_valid(fyp, 0, uri_length); + fyp_error_check(fyp, is_uri_valid, err_out, + "tag URI is invalid"); + + fy_advance_by(fyp, uri_length); + + fy_fill_atom_end(fyp, &handle); + handle.style = FYAS_URI; + + fyt = fy_token_queue(fyp, FYTT_TAG_DIRECTIVE, &handle, tag_length, uri_length, false); + fyp_error_check(fyp, fyt, err_out, + "fy_token_queue() failed"); + } + + /* skip until linebreak (or #) */ + i = 0; + lastc = -1; + while ((c = fy_parse_peek_at(fyp, i)) != -1 && !fyp_is_lb(fyp, c)) { + if (fy_is_ws(lastc) && c == '#') + break; + + FYP_PARSE_ERROR_CHECK(fyp, i, 1, FYEM_SCAN, + fy_is_ws(c) || fyp_is_lb(fyp, c), err_out, + "garbage after %s directive", + type == FYTT_VERSION_DIRECTIVE ? "version" : "tag"); + lastc = c; + i++; + } + + fy_advance_by(fyp, i); + + /* skip over linebreak */ + if (fyp_is_lb(fyp, c)) + fy_advance(fyp, c); + + return 0; +err_out: + return -1; +} + +int fy_fetch_directive(struct fy_parser *fyp) +{ + int rc; + + fy_remove_all_simple_keys(fyp); + + if (fyp_block_mode(fyp)) { + rc = fy_parse_unroll_indent(fyp, -1); + fyp_error_check(fyp, !rc, err_out_rc, + "fy_parse_unroll_indent() failed"); + } + + rc = fy_scan_directive(fyp); + fyp_error_check(fyp, !rc, err_out_rc, + "fy_scan_directive() failed"); + + return 0; + +err_out_rc: + return rc; +} + +int fy_fetch_document_indicator(struct fy_parser *fyp, enum fy_token_type type) +{ + int rc, c; + struct fy_token *fyt; + + fy_remove_all_simple_keys(fyp); + + if (fyp_block_mode(fyp)) { + rc = fy_parse_unroll_indent(fyp, -1); + fyp_error_check(fyp, !rc, err_out_rc, + "fy_parse_unroll_indent() failed"); + } + + fyp->simple_key_allowed = false; + fyp->tab_used_for_ws = false; + fyp_scan_debug(fyp, "simple_key_allowed -> %s\n", fyp->simple_key_allowed ? "true" : "false"); + + fyt = fy_token_queue_simple(fyp, &fyp->queued_tokens, type, 3); + fyp_error_check(fyp, fyt, err_out, + "fy_token_queue_simple() failed"); + + /* skip whitespace after the indicator */ + while (fy_is_ws(c = fy_parse_peek(fyp))) + fy_advance(fyp, c); + + return 0; + +err_out: + rc = -1; +err_out_rc: + return rc; +} + +static inline bool fy_flow_indent_check_internal(struct fy_parser *fyp, int column, int indent) +{ + return (!fyp->flow_level || column > indent) || + ((fyp->cfg.flags & FYPCF_SLOPPY_FLOW_INDENTATION) && fyp->flow_level); +} + +static inline bool fy_flow_indent_check(struct fy_parser *fyp) +{ + return fy_flow_indent_check_internal(fyp, fyp_column(fyp), fyp->indent); +} + +static inline bool fy_block_indent_check(struct fy_parser *fyp) +{ + return fyp->flow_level > 0 || fyp_column(fyp) > fyp->indent; +} + +int fy_fetch_flow_collection_mark_start(struct fy_parser *fyp, int c) +{ + enum fy_token_type type; + struct fy_simple_key_mark skm; + const char *typestr; + int rc = -1; + struct fy_token *fyt; + + if (c == '[') { + type = FYTT_FLOW_SEQUENCE_START; + typestr = "sequence"; + } else { + type = FYTT_FLOW_MAPPING_START; + typestr = "mapping"; + } + + FYP_PARSE_ERROR_CHECK(fyp, 0, 1, FYEM_SCAN, + fy_flow_indent_check(fyp), err_out, + "wrongly indented %s start in flow mode", typestr); + + fy_get_simple_key_mark(fyp, &skm); + + fyt = fy_token_queue_simple(fyp, &fyp->queued_tokens, type, 1); + fyp_error_check(fyp, fyt, err_out, + "fy_token_queue_simple() failed"); + + rc = fy_save_simple_key_mark(fyp, &skm, type, NULL); + fyp_error_check(fyp, !rc, err_out_rc, + "fy_save_simple_key_mark() failed"); + + /* increase flow level */ + fyp->flow_level++; + fyp_error_check(fyp, fyp->flow_level, err_out, + "overflow for the flow level counter"); + + /* push the current flow to the stack */ + rc = fy_parse_flow_push(fyp); + fyp_error_check(fyp, !rc, err_out_rc, + "fy_parse_flow_push() failed"); + /* set the current flow mode */ + fyp->flow = c == '[' ? FYFT_SEQUENCE : FYFT_MAP; + + fyp->simple_key_allowed = true; + fyp->tab_used_for_ws = false; + fyp_scan_debug(fyp, "simple_key_allowed -> %s\n", fyp->simple_key_allowed ? "true" : "false"); + + /* the comment indicator must have at least a space */ + c = fy_parse_peek(fyp); + + FYP_PARSE_ERROR_CHECK(fyp, 0, 1, FYEM_SCAN, + c != '#', err_out, + "invalid comment after %s start", typestr); + return 0; + +err_out: + rc = -1; + +err_out_rc: + return rc; +} + +int fy_fetch_flow_collection_mark_end(struct fy_parser *fyp, int c) +{ + enum fy_token_type type = FYTT_NONE; + enum fy_flow_type flow; + const char *typestr, *markerstr; + int i, rc; + bool did_purge; + struct fy_mark mark; + struct fy_token *fyt; + + fy_get_mark(fyp, &mark); + + if (c == ']') { + flow = FYFT_SEQUENCE; + type = FYTT_FLOW_SEQUENCE_END; + typestr = "sequence"; + markerstr = "bracket"; + } else { + flow = FYFT_MAP; + type = FYTT_FLOW_MAPPING_END; + typestr = "mapping"; + markerstr = "brace"; + } + + FYP_MARK_ERROR_CHECK(fyp, &fyp->last_comma_mark, &fyp->last_comma_mark, FYEM_SCAN, + !fyp_json_mode(fyp) || !fyp->last_was_comma, err_out, + "JSON disallows trailing comma before closing %s", markerstr); + + FYP_PARSE_ERROR_CHECK(fyp, 0, 1, FYEM_SCAN, + fy_flow_indent_check(fyp), err_out, + "wrongly indented %s end in flow mode", typestr); + + rc = fy_remove_simple_key(fyp, type); + fyp_error_check(fyp, !rc, err_out_rc, + "fy_remove_simple_key() failed"); + + FYP_PARSE_ERROR_CHECK(fyp, 0, 1, FYEM_SCAN, + fyp->flow_level, err_out, + "flow %s with invalid extra closing %s", + typestr, markerstr); + + fyp->flow_level--; + + FYP_PARSE_ERROR_CHECK(fyp, 0, 1, FYEM_SCAN, + fyp->flow == flow, err_out, + "mismatched flow %s end", typestr); + + /* pop the flow type */ + rc = fy_parse_flow_pop(fyp); + fyp_error_check(fyp, !rc, err_out_rc, + "fy_parse_flow_pop() failed"); + + fyp->simple_key_allowed = false; + fyp->tab_used_for_ws = false; + fyp_scan_debug(fyp, "simple_key_allowed -> %s\n", + fyp->simple_key_allowed ? "true" : "false"); + + fyt = fy_token_queue_simple(fyp, &fyp->queued_tokens, type, 1); + fyp_error_check(fyp, fyt, err_out, + "fy_token_queue_simple() failed"); + + if (fyp->parse_flow_only && fyp->flow_level == 0) { + rc = fy_fetch_stream_end(fyp); + fyp_error_check(fyp, !rc, err_out_rc, + "fy_fetch_stream_end() failed"); + return 0; + } + + /* the comment indicator must have at least a space */ + c = fy_parse_peek(fyp); + + FYP_PARSE_ERROR_CHECK(fyp, 0, 1, FYEM_SCAN, + c != '#', err_out, + "invalid comment after end of flow %s", typestr); + + /* due to the weirdness with simple keys and multiline flow keys scan forward + * until a linebreak, ';', or anything else */ + for (i = 0; ; i++) { + c = fy_parse_peek_at(fyp, i); + if (c < 0 || c == ':' || fyp_is_lb(fyp, c) || !fy_is_ws(c)) + break; + } + + /* we must be a key, purge */ + if (c == ':') { + if (fy_any_simple_keys(fyp)) { + rc = fy_purge_stale_simple_keys(fyp, &did_purge, type); + fyp_error_check(fyp, !rc, err_out_rc, + "fy_purge_stale_simple_keys() failed"); + + /* if we did purge and the the list is now empty, we're hosed */ + if (did_purge && fy_simple_key_list_empty(&fyp->simple_keys)) { + FYP_PARSE_ERROR(fyp, 0, 1, FYEM_SCAN, + "invalid multiline flow %s key ", typestr); + goto err_out; + } + } + } + + return 0; + +err_out: + rc = -1; +err_out_rc: + return rc; +} + +int fy_fetch_flow_collection_entry(struct fy_parser *fyp, int c) +{ + enum fy_token_type type = FYTT_NONE; + struct fy_token *fyt, *fyt_last; + struct fy_atom *handle; + int rc; + + type = FYTT_FLOW_ENTRY; + + FYP_PARSE_ERROR_CHECK(fyp, 0, 1, FYEM_SCAN, + fy_flow_indent_check(fyp), err_out, + "wrongly indented entry seperator in flow mode"); + + /* transform '? a,' to '? a: ,' */ + if (fyp->pending_complex_key_column >= 0) { + + fyt = fy_token_queue_simple(fyp, &fyp->queued_tokens, FYTT_VALUE, 0); + fyp_error_check(fyp, fyt, err_out, + "fy_token_queue_simple() failed"); + + fyp->pending_complex_key_column = -1; + + } + + rc = fy_remove_simple_key(fyp, type); + fyp_error_check(fyp, !rc, err_out_rc, + "fy_remove_simple_key() failed"); + + fyp->simple_key_allowed = true; + fyp->tab_used_for_ws = false; + fyp_scan_debug(fyp, "simple_key_allowed -> %s\n", fyp->simple_key_allowed ? "true" : "false"); + + fyt_last = fy_token_list_tail(&fyp->queued_tokens); + + fyt = fy_token_queue_simple(fyp, &fyp->queued_tokens, type, 1); + fyp_error_check(fyp, fyt, err_out, + "fy_token_queue_simple() failed"); + + /* the comment indicator must have at least a space */ + c = fy_parse_peek(fyp); + + FYP_PARSE_ERROR_CHECK(fyp, 0, 1, FYEM_SCAN, + c != '#', err_out, + "invalid comment after comma"); + + /* skip white space */ + while (fy_is_ws(c = fy_parse_peek(fyp))) + fy_advance(fyp, c); + + if (c == '#') { + if (fyt_last) + fyt = fyt_last; + + handle = NULL; + if (fyp->cfg.flags & FYPCF_PARSE_COMMENTS) + handle = fy_token_comment_handle(fyt, fycp_right, true); + + rc = fy_scan_comment(fyp, handle, true); + fyp_error_check(fyp, !rc, err_out_rc, + "fy_scan_comment() failed"); + } + + return 0; +err_out: + rc = -1; +err_out_rc: + return rc; +} + +int fy_fetch_block_entry(struct fy_parser *fyp, int c) +{ + int rc; + struct fy_mark mark; + struct fy_simple_key *fysk; + struct fy_token *fyt; + + fyp_error_check(fyp, c == '-', err_out, + "illegal block entry"); + + FYP_PARSE_ERROR_CHECK(fyp, 0, 1, FYEM_SCAN, + (!fyp->flow_level || (fyp_column(fyp) + 2) > fyp->indent) || + ((fyp->cfg.flags & FYPCF_SLOPPY_FLOW_INDENTATION) && fyp->flow_level), err_out, + "wrongly indented block sequence in flow mode"); + + if (!(fyp->flow_level || fyp->simple_key_allowed)) { + if (!fyp->simple_key_allowed && fyp->state == FYPS_BLOCK_MAPPING_VALUE) + FYP_PARSE_ERROR(fyp, 0, 1, FYEM_SCAN, + "block sequence on the same line as a mapping key"); + else if (fyp->state == FYPS_BLOCK_SEQUENCE_FIRST_ENTRY || + fyp->state == FYPS_BLOCK_SEQUENCE_ENTRY) + FYP_PARSE_ERROR(fyp, 0, 1, FYEM_SCAN, + "block sequence on the same line as a previous item"); + else + FYP_PARSE_ERROR(fyp, 0, 1, FYEM_SCAN, + "block sequence entries not allowed in this context"); + goto err_out; + } + + /* we have to save the start mark */ + fy_get_mark(fyp, &mark); + + if (fyp_block_mode(fyp) && fyp->indent < fyp_column(fyp)) { + + /* push the new indent level */ + rc = fy_push_indent(fyp, fyp_column(fyp), false, fyp_line(fyp)); + fyp_error_check(fyp, !rc, err_out_rc, + "fy_push_indent() failed"); + + fyt = fy_token_queue_simple_internal(fyp, &fyp->queued_tokens, FYTT_BLOCK_SEQUENCE_START, 0); + fyp_error_check(fyp, fyt, err_out, + "fy_token_queue_simple_internal() failed"); + } + + if (c == '-' && fyp->flow_level) { + /* this is an error, but we let the parser catch it */ + ; + } + + fysk = fy_would_remove_required_simple_key(fyp); + + if (fysk) { + if (fysk->token) { + if (fysk->token->type == FYTT_ANCHOR || fysk->token->type == FYTT_TAG) + FYP_TOKEN_ERROR(fyp, fysk->token, FYEM_SCAN, + "invalid %s indent for sequence", + fysk->token->type == FYTT_ANCHOR ? + "anchor" : "tag"); + else + FYP_TOKEN_ERROR(fyp, fysk->token, FYEM_SCAN, + "missing ':'"); + } else + FYP_PARSE_ERROR(fyp, 0, 1, FYEM_SCAN, + "missing ':'"); + goto err_out; + } + + rc = fy_remove_simple_key(fyp, FYTT_BLOCK_ENTRY); + fyp_error_check(fyp, !rc, err_out_rc, + "fy_remove_simple_key() failed"); + + fyp->simple_key_allowed = true; + fyp->tab_used_for_ws = false; + fyp_scan_debug(fyp, "simple_key_allowed -> %s\n", fyp->simple_key_allowed ? "true" : "false"); + + fyt = fy_token_queue_simple(fyp, &fyp->queued_tokens, FYTT_BLOCK_ENTRY, 1); + fyp_error_check(fyp, fyt, err_out, + "fy_token_queue_simple() failed"); + + rc = fy_ws_indentation_check(fyp, NULL, NULL); + fyp_error_check(fyp, !rc, err_out_rc, + "fy_ws_indentation_check() failed"); + + return 0; + +err_out: + rc = -1; +err_out_rc: + return rc; +} + +int fy_fetch_key(struct fy_parser *fyp, int c) +{ + int rc; + struct fy_mark mark; + struct fy_simple_key_mark skm; + bool target_simple_key_allowed; + struct fy_token *fyt; + struct fy_atom *handle; + bool found_tab; + struct fy_mark tab_mark; + + fyp_error_check(fyp, c == '?', err_out, + "illegal block entry or key mark"); + + FYP_PARSE_ERROR_CHECK(fyp, 0, 1, FYEM_SCAN, + fy_flow_indent_check(fyp), err_out, + "wrongly indented mapping key in flow mode"); + + fy_get_simple_key_mark(fyp, &skm); + + /* we have to save the start mark */ + fy_get_mark(fyp, &mark); + + FYP_PARSE_ERROR_CHECK(fyp, 0, 1, FYEM_SCAN, + fyp->flow_level || fyp->simple_key_allowed, err_out, + "invalid mapping key (not allowed in this context)"); + + if (fyp_block_mode(fyp) && fyp->indent < fyp_column(fyp)) { + + /* push the new indent level */ + rc = fy_push_indent(fyp, fyp_column(fyp), true, fyp_line(fyp)); + fyp_error_check(fyp, !rc, err_out_rc, + "fy_push_indent() failed"); + + fyt = fy_token_queue_simple_internal(fyp, &fyp->queued_tokens, FYTT_BLOCK_MAPPING_START, 0); + fyp_error_check(fyp, fyt, err_out, + "fy_token_queue_simple_internal() failed"); + } + + rc = fy_remove_simple_key(fyp, FYTT_KEY); + fyp_error_check(fyp, !rc, err_out_rc, + "fy_remove_simple_key() failed"); + + target_simple_key_allowed = !fyp->flow_level; + + fyp->pending_complex_key_column = fyp_column(fyp); + fyp->pending_complex_key_mark = mark; + fyp_scan_debug(fyp, "pending_complex_key_column %d", + fyp->pending_complex_key_column); + + fyt = fy_token_queue_simple(fyp, &fyp->queued_tokens, FYTT_KEY, 1); + fyp_error_check(fyp, fyt, err_out_rc, + "fy_token_queue_simple() failed"); + /* extra KEY data */ + fyt->key.flow_level = fyp->flow_level; + + fyp->simple_key_allowed = target_simple_key_allowed; + fyp_scan_debug(fyp, "simple_key_allowed -> %s\n", fyp->simple_key_allowed ? "true" : "false"); + + rc = fy_ws_indentation_check(fyp, &found_tab, &tab_mark); + fyp_error_check(fyp, !rc, err_out_rc, + "fy_ws_indentation_check() failed"); + + /* record whether a tab was used for indentation */ + if (fyp->simple_key_allowed && found_tab) { + fyp->tab_used_for_ws = true; + fyp->last_tab_used_for_ws_mark = tab_mark; + } else + fyp->tab_used_for_ws = false; // XXX + + /* comment? */ + if (c == '#') { + + handle = NULL; + if (fyp->cfg.flags & FYPCF_PARSE_COMMENTS) + handle = fy_token_comment_handle(fyt, fycp_right, true); + + rc = fy_scan_comment(fyp, handle, false); + fyp_error_check(fyp, !rc, err_out_rc, + "fy_scan_comment() failed"); + } + + return 0; + +err_out: + rc = -1; +err_out_rc: + return rc; +} + +int fy_fetch_value(struct fy_parser *fyp, int c) +{ + struct fy_token_list sk_tl; + struct fy_simple_key *fysk = NULL; + struct fy_mark mark, mark_insert, mark_end_insert; + struct fy_token *fyt_insert, *fyt; + bool target_simple_key_allowed, is_complex, has_bmap; + bool push_bmap_start, push_key_only, did_purge, final_complex_key; + bool is_multiline __FY_DEBUG_UNUSED__; + struct fy_atom *chandle; + bool found_tab; + struct fy_mark tab_mark; + int rc; + + fyp_error_check(fyp, c == ':', err_out, + "illegal value mark"); + + FYP_PARSE_ERROR_CHECK(fyp, 0, 1, FYEM_SCAN, + !fyp_json_mode(fyp) || fyp->flow == FYFT_MAP, err_out, + "JSON considers keys when not in mapping context invalid"); + + /* special handling for :: weirdness */ + if (!fyp_json_mode(fyp) && fyp->flow_level > 0) { + int adv, nextc, nextcol, tabsize, indent; + + /* this requires some explanation... + * we need to detect x::x, x: :x, and x:\n:x as the same + */ + adv = 1; + indent = fyp->indent; + nextcol = fyp_column(fyp) + 1; + tabsize = fyp_tabsize(fyp); + + while ((nextc = fy_parse_peek_at(fyp, adv)) > 0) { + + if (fyp_is_lb(fyp, nextc)) + nextcol = 0; + else if (fy_is_tab(nextc)) { + if (tabsize) + nextcol += tabsize - (nextcol % tabsize); + else + nextcol++; + } else if (fy_is_space(nextc)) + nextcol++; + else { + if (!fy_flow_indent_check_internal(fyp, nextcol, indent)) + nextc = -1; + break; + } + + adv++; + } + + fyp->colon_follows_colon = nextc == ':'; + } else + fyp->colon_follows_colon = false; + + fy_get_mark(fyp, &mark); + + fy_token_list_init(&sk_tl); + + FYP_PARSE_ERROR_CHECK(fyp, 0, 1, FYEM_SCAN, + fy_flow_indent_check(fyp), err_out, + "wrongly indented mapping value in flow mode"); + + if (fy_any_simple_keys(fyp)) { + rc = fy_purge_stale_simple_keys(fyp, &did_purge, FYTT_VALUE); + fyp_error_check(fyp, !rc, err_out_rc, + "fy_purge_stale_simple_keys() failed"); + } + + /* get the simple key (if available) for the value */ + fysk = fy_simple_key_list_head(&fyp->simple_keys); + if (fysk && fysk->flow_level == fyp->flow_level) + fy_simple_key_list_del(&fyp->simple_keys, fysk); + else + fysk = NULL; + + if (!fysk) { + fyp_scan_debug(fyp, "no simple key flow_level=%d", fyp->flow_level); + + fyt_insert = fy_token_list_tail(&fyp->queued_tokens); + mark_insert = mark; + mark_end_insert = mark; + } else { + assert(fysk->flow_level == fyp->flow_level); + fyt_insert = fysk->token; + mark_insert = fysk->mark; + mark_end_insert = fysk->end_mark; + + fyp_scan_debug(fyp, "have simple key flow_level=%d", fyp->flow_level); + } + + fyp_scan_debug(fyp, "flow_level=%d, column=%d parse_indent=%d", + fyp->flow_level, mark_insert.column, fyp->indent); + + is_complex = fyp->pending_complex_key_column >= 0; + final_complex_key = is_complex && (fyp->flow_level || fyp_column(fyp) <= fyp->pending_complex_key_mark.column); + is_multiline = mark_end_insert.line < fyp_line(fyp); + has_bmap = fyp->generated_block_map; + push_bmap_start = (!fyp->flow_level && mark_insert.column > fyp->indent); + push_key_only = (!is_complex && (fyp->flow_level || has_bmap)) || + (is_complex && !final_complex_key); + + fyp_scan_debug(fyp, "mark_insert.line=%d/%d mark_end_insert.line=%d/%d fyp->line=%d", + mark_insert.line, mark_insert.column, + mark_end_insert.line, mark_end_insert.column, + fyp_line(fyp)); + + fyp_scan_debug(fyp, "simple_key_allowed=%s is_complex=%s final_complex_key=%s is_multiline=%s has_bmap=%s push_bmap_start=%s push_key_only=%s", + fyp->simple_key_allowed ? "true" : "false", + is_complex ? "true" : "false", + final_complex_key ? "true" : "false", + is_multiline ? "true" : "false", + has_bmap ? "true" : "false", + push_bmap_start ? "true" : "false", + push_key_only ? "true" : "false"); + + if (!is_complex && is_multiline && (!fyp->flow_level || fyp->flow != FYFT_MAP)) { + FYP_PARSE_ERROR(fyp, 0, 1, FYEM_SCAN, "Illegal placement of ':' indicator"); + goto err_out; + } + + /* special handling for ?: */ + if (fyp->tab_used_for_ws) { + FYP_PARSE_ERROR(fyp, 0, 1, FYEM_SCAN, "Indentation used tabs for ':' indicator"); + goto err_out; + } + + if (push_bmap_start) { + + assert(!fyp->flow_level); + + fyp_scan_debug(fyp, "--- parse_roll"); + + /* push the new indent level */ + rc = fy_push_indent(fyp, mark_insert.column, true, mark_insert.line); + fyp_error_check(fyp, !rc, err_out_rc, + "fy_push_indent() failed"); + + fyt = fy_token_queue_simple_internal(fyp, &sk_tl, FYTT_BLOCK_MAPPING_START, 0); + fyp_error_check(fyp, fyt, err_out, + "fy_token_queue_simple_internal() failed"); + + /* update with this mark */ + fyt->handle.start_mark = fyt->handle.end_mark = mark_insert; + } + + if (push_bmap_start || push_key_only) { + + fyt = fy_token_queue_simple_internal(fyp, &sk_tl, FYTT_KEY, 0); + fyp_error_check(fyp, fyt, err_out, + "fy_token_queue_simple_internal() failed"); + + /* update with the flow level */ + fyt->key.flow_level = fyp->flow_level; + } + +#ifdef FY_DEVMODE + fyp_debug_dump_token(fyp, fyt_insert, "insert-token: "); + fyp_debug_dump_token_list(fyp, &fyp->queued_tokens, fyt_insert, "fyp->queued_tokens (before): "); + fyp_debug_dump_token_list(fyp, &sk_tl, NULL, "sk_tl: "); +#endif + + if (fyt_insert) { + + if (fysk) + fy_token_list_splice_before(&fyp->queued_tokens, fyt_insert, &sk_tl); + else + fy_token_list_splice_after(&fyp->queued_tokens, fyt_insert, &sk_tl); + } else + fy_token_lists_splice(&fyp->queued_tokens, &sk_tl); + +#ifdef FY_DEVMODE + fyp_debug_dump_token_list(fyp, &fyp->queued_tokens, fyt_insert, "fyp->queued_tokens (after): "); +#endif + + target_simple_key_allowed = fysk ? false : !fyp->flow_level; + + fyt = fy_token_queue_simple(fyp, &fyp->queued_tokens, FYTT_VALUE, 1); + fyp_error_check(fyp, fyt, err_out, + "fy_token_queue_simple() failed"); + + if (fysk) { + fy_parse_simple_key_recycle(fyp, fysk); + fysk = NULL; + } + + + fyp->simple_key_allowed = target_simple_key_allowed; + fyp_scan_debug(fyp, "simple_key_allowed -> %s\n", fyp->simple_key_allowed ? "true" : "false"); + + if (is_complex) { + rc = fy_ws_indentation_check(fyp, &found_tab, &tab_mark); + fyp_error_check(fyp, !rc, err_out_rc, + "fy_ws_indentation_check() failed"); + + /* record whether a tab was used for indentation */ + if (fyp->simple_key_allowed && found_tab) { + fyp->tab_used_for_ws = true; + fyp->last_tab_used_for_ws_mark = tab_mark; + } else + fyp->tab_used_for_ws = false; // XXX + } else + fyp->tab_used_for_ws = false; + + if (final_complex_key) { + fyp->pending_complex_key_column = -1; + fyp_scan_debug(fyp, "pending_complex_key_column -> %d", + fyp->pending_complex_key_column); + } + + if (fyt_insert) { + /* eat whitespace */ + while (fy_is_blank(c = fy_parse_peek(fyp))) + fy_advance(fyp, c); + + /* comment? */ + if (c == '#') { + chandle = NULL; + if (fyp->cfg.flags & FYPCF_PARSE_COMMENTS) + chandle = fy_token_comment_handle(fyt_insert, fycp_right, true); + + rc = fy_scan_comment(fyp, chandle, false); + fyp_error_check(fyp, !rc, err_out_rc, + "fy_scan_comment() failed"); + } + + } + + return 0; + +err_out: + rc = -1; +err_out_rc: + fy_parse_simple_key_recycle(fyp, fysk); + return rc; +} + +int fy_fetch_anchor_or_alias(struct fy_parser *fyp, int c) +{ + struct fy_atom handle; + enum fy_token_type type; + int i = 0, rc = -1, length; + struct fy_simple_key_mark skm; + struct fy_token *fyt; + const char *typestr; + + fyp_error_check(fyp, c == '*' || c == '&', err_out, + "illegal anchor mark (not '*' or '&')"); + + if (c == '*') { + type = FYTT_ALIAS; + typestr = "alias"; + } else { + type = FYTT_ANCHOR; + typestr = "anchor"; + } + + FYP_PARSE_ERROR_CHECK(fyp, 0, 1, FYEM_SCAN, + fy_flow_indent_check(fyp), err_out, + "wrongly indented %s in flow mode", typestr); + + /* we have to save the start mark (including the anchor/alias start) */ + fy_get_simple_key_mark(fyp, &skm); + + /* skip over the anchor mark */ + fy_advance(fyp, c); + + /* start mark */ + fy_fill_atom_start(fyp, &handle); + + length = 0; + + while ((c = fy_parse_peek(fyp)) >= 0) { + if (fyp_is_blankz(fyp, c) || fy_is_flow_indicator(c) || + fy_is_unicode_control(c) || fy_is_unicode_space(c)) + break; + fy_advance(fyp, c); + length++; + } + + if (!fyp_is_blankz(fyp, c) && !fy_is_flow_indicator(c)) { + + FYP_PARSE_ERROR_CHECK(fyp, 0, 1, FYEM_SCAN, + fy_is_unicode_control(c), err_out, + "illegal unicode control character in %s", typestr); + + FYP_PARSE_ERROR_CHECK(fyp, 0, 1, FYEM_SCAN, + fy_is_unicode_space(c), err_out, + "illegal unicode space character in %s", typestr); + } + + FYP_PARSE_ERROR_CHECK(fyp, 0, 1, FYEM_SCAN, + c != FYUG_INV, err_out, + "invalid character in %s", typestr); + + FYP_PARSE_ERROR_CHECK(fyp, 0, 1, FYEM_SCAN, + c != FYUG_PARTIAL, err_out, + "partial character in %s", typestr); + + FYP_PARSE_ERROR_CHECK(fyp, 0, 1, FYEM_SCAN, + length > 0, err_out, + "invalid %s detected", typestr); + + fy_fill_atom_end(fyp, &handle); + + handle.storage_hint = length; + handle.storage_hint_valid = true; + handle.direct_output = true; + handle.empty = false; + handle.has_lb = false; + handle.has_ws = false; + handle.starts_with_ws = false; + handle.starts_with_lb = false; + handle.ends_with_ws = false; + handle.ends_with_lb = false; + handle.trailing_lb = false; + handle.size0 = false; + handle.valid_anchor = true; + + if (type == FYTT_ALIAS) + fyt = fy_token_queue(fyp, type, &handle, NULL); + else + fyt = fy_token_queue(fyp, type, &handle); + fyp_error_check(fyp, fyt, err_out_rc, + "fy_token_queue() failed"); + + /* scan forward for '-' block sequence indicator */ + if (type == FYTT_ANCHOR && !fyp->flow_level) { + for (i = 0; ; i++) { + c = fy_parse_peek_at(fyp, i); + if (c < 0 || fyp_is_lb(fyp, c) || !fy_is_ws(c)) + break; + } + + /* if it's '-' followed by ws we have a problem */ + FYP_PARSE_ERROR_CHECK(fyp, i, 1, FYEM_SCAN, + !(c == '-' && fy_is_ws(fy_parse_peek_at(fyp, i + 1))), err_out, + "illegal block sequence on the same line as anchor"); + } + + + rc = fy_save_simple_key_mark(fyp, &skm, type, NULL); + fyp_error_check(fyp, !rc, err_out_rc, + "fy_save_simple_key_mark() failed"); + + fyp->simple_key_allowed = false; + fyp->tab_used_for_ws = false; + fyp_scan_debug(fyp, "simple_key_allowed -> %s\n", fyp->simple_key_allowed ? "true" : "false"); + + return 0; + +err_out: + rc = -1; +err_out_rc: + return rc; +} + +int fy_fetch_tag(struct fy_parser *fyp, int c) +{ + struct fy_atom handle; + int rc = -1, total_length, handle_length, uri_length, i, prefix_length, suffix_length; + const char *handlep; + bool is_valid; + struct fy_simple_key_mark skm; + struct fy_document_state *fyds; + struct fy_token *fyt_td; + struct fy_token *fyt; + + fyp_error_check(fyp, c == '!', err_out, + "illegal tag mark (not '!')"); + + FYP_PARSE_ERROR_CHECK(fyp, 0 ,1, FYEM_SCAN, + fy_flow_indent_check(fyp), err_out, + "wrongly indented tag in flow mode"); + + fyds = fyp->current_document_state; + + fy_get_simple_key_mark(fyp, &skm); + + if (fy_parse_peek_at(fyp, 1) == '<') { + /* skip over '!<' and '>' */ + prefix_length = 2; + suffix_length = 1; + } else + prefix_length = suffix_length = 0; + + if (prefix_length) + handle_length = 0; /* set the handle to '' */ + else { + /* either !suffix or !handle!suffix */ + /* we scan back to back, and split handle/suffix */ + handle_length = fy_scan_tag_handle_length(fyp, prefix_length); + fyp_error_check(fyp, handle_length > 0, err_out, + "fy_scan_tag_handle_length() failed"); + } + + uri_length = fy_scan_tag_uri_length(fyp, prefix_length + handle_length); + fyp_error_check(fyp, uri_length >= 0, err_out, + "fy_scan_tag_uri_length() failed"); + + /* a handle? */ + if (!prefix_length && (handle_length == 0 || fy_parse_peek_at(fyp, handle_length - 1) != '!')) { + /* special case, '!', handle set to '' and suffix to '!' */ + if (handle_length == 1 && uri_length == 0) { + handle_length = 0; + uri_length = 1; + } else { + uri_length = handle_length - 1 + uri_length; + handle_length = 1; + } + } + + is_valid = fy_scan_tag_uri_is_valid(fyp, prefix_length + handle_length, uri_length); + fyp_error_check(fyp, is_valid, err_out, + "tag URI is invalid"); + + if (suffix_length > 0) { + c = fy_parse_peek_at(fyp, prefix_length + handle_length + uri_length); + + FYP_PARSE_ERROR_CHECK(fyp, prefix_length + handle_length + uri_length, 1, FYEM_SCAN, + c == '>', err_out, + "missing '>' uri terminator"); + } + + total_length = prefix_length + handle_length + uri_length + suffix_length; + fy_fill_atom(fyp, total_length, &handle); + handle.style = FYAS_URI; /* this is a URI, need to handle URI escapes */ + + c = fy_parse_peek(fyp); + + FYP_PARSE_ERROR_CHECK(fyp, 0, 1, FYEM_SCAN, + fyp_is_blankz(fyp, c) || fy_utf8_strchr(",}]", c), err_out, + "invalid tag terminator"); + + handlep = fy_atom_data(&handle) + prefix_length; + + fyt_td = fy_document_state_lookup_tag_directive(fyds, handlep, handle_length); + + FYP_MARK_ERROR_CHECK(fyp, &handle.start_mark, &handle.end_mark, FYEM_PARSE, + fyt_td, err_out, + "undefined tag prefix"); + + fyt = fy_token_queue(fyp, FYTT_TAG, &handle, prefix_length, handle_length, uri_length, fyt_td); + fyp_error_check(fyp, fyt, err_out_rc, + "fy_token_queue() failed"); + + /* scan forward for '-' block sequence indicator */ + if (!fyp->flow_level) { + for (i = 0; ; i++) { + c = fy_parse_peek_at(fyp, i); + if (c < 0 || fyp_is_lb(fyp, c) || !fy_is_ws(c)) + break; + } + + /* if it's '-' followed by ws we have a problem */ + FYP_PARSE_ERROR_CHECK(fyp, i ,1, FYEM_SCAN, + !(c == '-' && fy_is_ws(fy_parse_peek_at(fyp, i + 1))), err_out, + "illegal block sequence on the same line as the tag"); + } + + rc = fy_save_simple_key_mark(fyp, &skm, FYTT_TAG, NULL); + fyp_error_check(fyp, !rc, err_out_rc, + "fy_save_simple_key_mark() failed"); + + fyp->simple_key_allowed = false; + fyp->tab_used_for_ws = false; + fyp_scan_debug(fyp, "simple_key_allowed -> %s\n", fyp->simple_key_allowed ? "true" : "false"); + + return 0; + +err_out: + rc = -1; +err_out_rc: + return rc; +} + +int fy_scan_block_scalar_indent(struct fy_parser *fyp, int indent, int *breaks, int *breaks_length, + int *presentation_breaks_length, int *first_break_length, int *lastc) +{ + int c, max_indent = 0, min_indent, break_length; + + *breaks = 0; + *breaks_length = 0; + *presentation_breaks_length = 0; + *first_break_length = 0; + + /* minimum indent is 0 for zero indent scalars */ + min_indent = fyp->document_first_content_token ? 0 : 1; + + /* scan over the indentation spaces */ + /* we don't format content for display */ + for (;;) { + + /* skip over indentation */ + + if (!fyp_tabsize(fyp)) { + /* we must respect the enclosed indent */ + while (fyp_column(fyp) <= fyp->indent && fy_is_ws(c = fy_parse_peek(fyp))) { + FYP_PARSE_ERROR_CHECK(fyp, 0, 1, FYEM_SCAN, + !fy_is_tab(c), err_out, + "invalid tab character as indent instead of space"); + fy_advance(fyp, c); + } + + /* skip over spaces only */ + while ((c = fy_parse_peek(fyp)) == ' ' && + (!indent || fyp_column(fyp) < indent)) { + fy_advance(fyp, c); + } + } else { + while (fy_is_ws((c = fy_parse_peek(fyp))) && + (!indent || fyp_column(fyp) < indent)) + fy_advance(fyp, c); + } + + if (fyp_column(fyp) > max_indent) + max_indent = fyp_column(fyp); + + /* non-empty line or EOF */ + if (!fyp_is_lb(fyp, c)) { + *lastc = c; + break; + } + + fy_advance(fyp, c); + + break_length = fy_utf8_width(c); + + (*breaks)++; + (*breaks_length) += 1; + + if (fy_is_lb_LS_PS(c)) + (*presentation_breaks_length) += break_length; + + if (!*first_break_length) + *first_break_length = break_length; + } + + if (!indent) { + indent = max_indent; + if (indent < fyp->indent) + indent = fyp->indent; + if (indent < min_indent) + indent = min_indent; + } + + return indent; + +err_out: + return -1; +} + +int fy_fetch_block_scalar(struct fy_parser *fyp, bool is_literal, int c) +{ + struct fy_atom handle; + enum fy_atom_chomp chomp = FYAC_CLIP; /* default */ + int lastc, rc, increment = 0, current_indent, new_indent, indent = 0; + int breaks, breaks_length, presentation_breaks_length, first_break_length; + bool doc_start_end_detected, empty, empty_line, prev_empty_line, indented, prev_indented, first; + bool has_ws, has_lb, starts_with_ws, starts_with_lb, ends_with_ws, ends_with_lb, trailing_lb; + bool pending_nl, ends_with_eof, starts_with_eof; + struct fy_token *fyt; + size_t length, line_length, trailing_ws, trailing_breaks_length; + size_t leading_ws; + size_t prefix_length, suffix_length; + unsigned int chomp_amt; + int actual_lb_length, pending_lb_length; + struct fy_mark indicator_mark; + bool generated_indent; + char *str; +#ifdef ATOM_SIZE_CHECK + size_t tlength; +#endif + + fy_utf8_format_a(c, fyue_singlequote, &str); + fyp_error_check(fyp, c == '|' || c == '>', err_out, + "bad start of block scalar ('%s')", + str); + + fy_get_mark(fyp, &indicator_mark); + + FYP_PARSE_ERROR_CHECK(fyp, 0, 1, FYEM_SCAN, + fy_flow_indent_check(fyp), err_out, + "wrongly indented block scalar in flow mode"); + + FYP_PARSE_ERROR_CHECK(fyp, 0, 1, FYEM_SCAN, + fy_block_indent_check(fyp), err_out, + "wrongly indented block scalar in block mode"); + + rc = fy_remove_simple_key(fyp, FYTT_SCALAR); + fyp_error_check(fyp, !rc, err_out_rc, + "fy_remove_simple_key() failed"); + + fyp->simple_key_allowed = true; + fyp->tab_used_for_ws = false; + fyp_scan_debug(fyp, "simple_key_allowed -> %s\n", fyp->simple_key_allowed ? "true" : "false"); + + /* skip over block scalar start */ + fy_advance(fyp, c); + + /* intentation indicator (either [-+]<digit> or <digit>[-+] */ + c = fy_parse_peek(fyp); + if (c == '+' || c == '-') { + + chomp = c == '+' ? FYAC_KEEP : FYAC_STRIP; + + fy_advance(fyp, c); + + c = fy_parse_peek(fyp); + if (fy_is_num(c)) { + increment = c - '0'; + fyp_error_check(fyp, increment != 0, err_out, + "indentation indicator 0"); + fy_advance(fyp, c); + } + } else if (fy_is_num(c)) { + + increment = c - '0'; + fyp_error_check(fyp, increment != 0, err_out, + "indentation indicator 0"); + fy_advance(fyp, c); + + c = fy_parse_peek(fyp); + if (c == '+' || c == '-') { + chomp = c == '+' ? FYAC_KEEP : FYAC_STRIP; + fy_advance(fyp, c); + } + } + + /* the comment indicator must have at least a space */ + FYP_PARSE_ERROR_CHECK(fyp, 0, 1, FYEM_SCAN, + c != '#', err_out, + "invalid comment without whitespace after block scalar indicator"); + + /* eat whitespace */ + while (fy_is_blank(c = fy_parse_peek(fyp))) + fy_advance(fyp, c); + + /* comment? */ + if (c == '#') { + /* XXX just ignore this one */ + rc = fy_scan_comment(fyp, NULL, true); + fyp_error_check(fyp, !rc, err_out_rc, + "fy_scan_comment() failed"); + } + + c = fy_parse_peek(fyp); + + /* end of the line? */ + FYP_PARSE_ERROR_CHECK(fyp, 0, 1, FYEM_SCAN, + fyp_is_lbz(fyp, c), err_out, + "block scalar no linebreak found"); + + /* if the block scalar indicator is on a different line we need a new indent */ + generated_indent = false; + if (!increment && indicator_mark.line != fyp->indent_line) { + fyp_scan_debug(fyp, "generating indent %d/%d\n", indicator_mark.line, fyp->indent_line); + rc = fy_push_indent(fyp, indicator_mark.column, false, indicator_mark.line); + fyp_error_check(fyp, !rc, err_out_rc, + "fy_push_indent() failed"); + generated_indent = true; + } + + /* advance */ + fy_advance(fyp, c); + + fy_fill_atom_start(fyp, &handle); + + starts_with_eof = c < 0; + + current_indent = fyp->indent >= 0 ? fyp->indent : 0; + indent = increment ? current_indent + increment : 0; + + length = 0; + trailing_breaks_length = 0; + + empty = true; + has_ws = false; + has_lb = false; + starts_with_ws = false; + starts_with_lb = false; + ends_with_ws = false; + ends_with_lb = false; + trailing_lb = false; + + new_indent = fy_scan_block_scalar_indent(fyp, indent, &breaks, &breaks_length, &presentation_breaks_length, &first_break_length, &lastc); + fyp_error_check(fyp, new_indent >= 0, err_out, + "fy_scan_block_scalar_indent() failed"); + + length = breaks_length; + length += presentation_breaks_length; + indent = new_indent; + + doc_start_end_detected = false; + prev_empty_line = true; + + prefix_length = 0; + suffix_length = 0; + + prev_indented = false; + first = true; + pending_nl = false; + pending_lb_length = 0; + + chomp_amt = increment ? (unsigned int)(current_indent + increment) : (unsigned int)-1; + + actual_lb_length = 1; + while ((c = fy_parse_peek(fyp)) > 0 && fyp_column(fyp) >= indent) { + + lastc = c; + + if (first) { + if (fy_is_ws(c)) + starts_with_ws = true; + else if (fyp_is_lb(fyp, c)) + starts_with_lb = true; + } + + /* consume the list */ + line_length = 0; + trailing_ws = 0; + empty_line = true; + leading_ws = 0; + + indented = fy_is_ws(fy_parse_peek(fyp)); + + while (!(fyp_is_lbz(fyp, c = fy_parse_peek(fyp)))) { + + lastc = c; + + if (fyp_column(fyp) == 0 && + (!fy_parse_strncmp(fyp, "...", 3) || + !fy_parse_strncmp(fyp, "---", 3)) && + fy_is_blankz_at_offset(fyp, 3)) { + doc_start_end_detected = true; + break; + } + + if (!fy_is_space(c)) { + empty = false; + empty_line = false; + trailing_ws = 0; + if (chomp_amt == (unsigned int)-1) + chomp_amt = fyp_column(fyp); + } else { + has_ws = true; + if (empty_line) + leading_ws++; + trailing_ws++; + } + + fy_advance(fyp, c); + + line_length += fy_utf8_width(c); + } + + if (doc_start_end_detected) + break; + + if (!fy_is_z(c)) { + /* eat line break */ + actual_lb_length = fy_utf8_width(c); + fy_advance(fyp, c); + + has_lb = true; + new_indent = fy_scan_block_scalar_indent(fyp, indent, &breaks, &breaks_length, &presentation_breaks_length, &first_break_length, &lastc); + fyp_error_check(fyp, new_indent >= 0, err_out, + "fy_scan_block_scalar_indent() failed"); + if (fy_is_lb_LS_PS(c)) + presentation_breaks_length += actual_lb_length; + } else { + has_lb = false; + new_indent = indent; + // was chomp = FYAC_STRIP, very very wrong + + breaks = 0; + breaks_length = 0; + presentation_breaks_length = 0; + first_break_length = 0; + + actual_lb_length = 0; + + } + + if (is_literal) { + + prefix_length = 0; + + if (pending_nl) { + pending_nl = false; + prefix_length += pending_lb_length; + pending_lb_length = 0; + } + + prefix_length += trailing_breaks_length; + trailing_breaks_length = 0; + + suffix_length = 0; + + if (fy_is_lb_LS_PS(c)) { + trailing_breaks_length += breaks_length; + trailing_breaks_length += presentation_breaks_length; + + if (actual_lb_length > 1) + presentation_breaks_length -= actual_lb_length; + + pending_nl = true; + pending_lb_length = 0; + } else { + trailing_breaks_length += breaks_length; + trailing_breaks_length += presentation_breaks_length; + + pending_nl = !empty_line || indented; + pending_lb_length = pending_nl ? 1 : 0; + } + + } else { + + prefix_length = 0; + + if (!trailing_breaks_length) { + if (prev_indented || (prev_empty_line && !first) || indented) { + /* previous line was indented or empty, force output newline */ + if (pending_nl) { + pending_nl = false; + prefix_length += 1; // pending_lb_length; + pending_lb_length = 0; + } + } else if (!prev_empty_line && !prev_indented && !indented && !empty_line) { + /* previous line was not empty and not indented + * while this is not indented and not empty need sep */ + if (pending_nl) { + pending_nl = false; + prefix_length += 1; // pending_lb_length; + pending_lb_length = 0; + } + } + } else { + prefix_length += trailing_breaks_length; + if (prev_indented || indented) + prefix_length++; + } + pending_nl = true; + pending_lb_length = actual_lb_length; + + trailing_breaks_length = 0; + + suffix_length = 0; + trailing_breaks_length += breaks_length; + trailing_breaks_length += presentation_breaks_length; + } + + + length += prefix_length + line_length + suffix_length; + indent = new_indent; + + prev_empty_line = empty_line; + prev_indented = indented; + + prefix_length = 0; + suffix_length = 0; + + first = false; + } + + if (empty) { + trailing_breaks_length = breaks_length; + trailing_breaks_length += presentation_breaks_length; + length = 0; + } + + /* end... */ + fy_fill_atom_end(fyp, &handle); + + if (c == FYUG_INV || c == FYUG_PARTIAL) { + FYP_MARK_ERROR(fyp, &handle.start_mark, &handle.end_mark, FYEM_SCAN, + "block scalar is malformed UTF8"); + goto err_out; + } + + /* are we ended with EOF? */ + ends_with_eof = starts_with_eof || (c == FYUG_EOF && !fyp_is_lb(fyp, lastc) && !breaks); + + /* detect wrongly indented block scalar */ + if (c != FYUG_EOF && !(!empty || fyp_column(fyp) <= fyp->indent || c == '#' || doc_start_end_detected)) { + FYP_MARK_ERROR(fyp, &handle.start_mark, &handle.end_mark, FYEM_SCAN, + "block scalar with wrongly indented line after spaces only"); + goto err_out; + } + + if (empty && c == '#' && fyp_column(fyp) > fyp->indent) { + FYP_MARK_ERROR(fyp, &handle.start_mark, &handle.end_mark, FYEM_SCAN, + "empty block scalar with wrongly indented comment line after spaces only"); + goto err_out; + } + + if (chomp_amt == (unsigned int)-1) + chomp_amt = current_indent; + + switch (chomp) { + case FYAC_CLIP: + if (pending_nl || (!starts_with_eof && ends_with_eof)) { + if (actual_lb_length <= 2) + length += 1; + else + length += actual_lb_length; + + ends_with_lb = true; + ends_with_ws = false; + } else { + if (trailing_breaks_length > 0) + ends_with_lb = true; + else if (fy_is_ws(lastc)) + ends_with_ws = true; + } + break; + case FYAC_KEEP: + if (pending_nl || (!starts_with_eof && ends_with_eof)) + length += actual_lb_length; + + length += breaks + presentation_breaks_length; + + trailing_lb = trailing_breaks_length > 0; + if (pending_nl || (!starts_with_eof && ends_with_eof) || trailing_breaks_length) { + ends_with_lb = true; + ends_with_ws = false; + } else if (fy_is_ws(lastc)) { + ends_with_ws = true; + ends_with_lb = false; + } + break; + case FYAC_STRIP: + ends_with_lb = false; + if (fy_is_ws(lastc)) + ends_with_ws = true; + break; + } + + /* need to process to present */ + handle.style = is_literal ? FYAS_LITERAL : FYAS_FOLDED; + handle.chomp = chomp; + handle.increment = increment ? (unsigned int)(current_indent + increment) : chomp_amt; + + /* no point in trying to do direct output in a block scalar */ + /* TODO maybe revisit in the future */ + handle.direct_output = false; + handle.empty = empty; + handle.has_lb = has_lb; + handle.has_ws = has_ws; + handle.starts_with_ws = starts_with_ws; + handle.starts_with_lb = starts_with_lb; + handle.ends_with_ws = ends_with_ws; + handle.ends_with_lb = ends_with_lb; + handle.trailing_lb = trailing_lb; + handle.size0 = length == 0; + handle.valid_anchor = false; + handle.json_mode = fyp_json_mode(fyp); + handle.lb_mode = fyp_lb_mode(fyp); + handle.fws_mode = fyp_fws_mode(fyp); + handle.tabsize = fyp_tabsize(fyp); + handle.ends_with_eof = ends_with_eof; + +#ifdef ATOM_SIZE_CHECK + tlength = fy_atom_format_text_length(&handle); + if (tlength != length) { + fy_utf8_format_text_a(fy_atom_data(&handle), fy_atom_size(&handle), fyue_doublequote, &str); + fyp_warning(fyp, "%s: storage hint calculation failed real %zu != hint %zu - \"%s\"", __func__, + tlength, length, + str); + length = tlength; + } +#endif + + handle.storage_hint = length; + handle.storage_hint_valid = true; + + fyt = fy_token_queue(fyp, FYTT_SCALAR, &handle, is_literal ? FYSS_LITERAL : FYSS_FOLDED); + fyp_error_check(fyp, fyt, err_out_rc, + "fy_token_queue() failed"); + + if (fyp->cfg.flags & FYPCF_PARSE_COMMENTS) { + rc = fy_attach_comments_if_any(fyp, fyt); + fyp_error_check(fyp, !rc, err_out_rc, + "fy_attach_right_hand_comment() failed"); + } + + if (generated_indent) { + rc = fy_pop_indent(fyp); + fyp_error_check(fyp, !rc, err_out, + "fy_pop_indent() failed"); + + /* the ident line has now moved */ + fyp->indent_line = fyp_line(fyp); + } + + return 0; + +err_out: + rc = -1; +err_out_rc: + return rc; +} + +int fy_reader_fetch_flow_scalar_handle(struct fy_reader *fyr, int c, int indent, struct fy_atom *handle, bool sloppy_indent) +{ + size_t length; + int code_length, i = 0, j, end_c, last_line, lastc; + int breaks_found, blanks_found, break_run, total_code_length; + int breaks_found_length, first_break_length, value; + uint32_t hi_surrogate, lo_surrogate; + bool is_single, is_multiline, esc_lb, ws_lb_only, has_ws, has_lb, has_esc; + bool first, starts_with_ws, starts_with_lb, ends_with_ws, ends_with_lb, trailing_lb = false; + bool unicode_esc, is_json_unesc, has_json_esc; + int last_esc_lb, break_length, presentation_breaks_length; + struct fy_mark mark, mark2; + char escbuf[1 + FY_UTF8_FORMAT_BUFMIN]; + size_t escbuf_len; + enum fy_utf8_escape esc_mode; + const char *ep; + char *str; +#ifdef ATOM_SIZE_CHECK + size_t tlength; +#endif + + (void)last_esc_lb; + + is_single = c == '\''; + end_c = c; + + fy_utf8_format_a(c, fyue_singlequote, &str); + fyr_error_check(fyr, c == '\'' || c == '"', err_out, + "bad start of flow scalar ('%s')", + str); + + fy_reader_get_mark(fyr, &mark); + + /* skip over block scalar start */ + fy_reader_advance(fyr, c); + + fy_reader_fill_atom_start(fyr, handle); + + length = 0; + breaks_found = 0; + breaks_found_length = 0; + first_break_length = 0; + presentation_breaks_length = 0; + blanks_found = 0; + esc_lb = false; + last_esc_lb = -1; + ws_lb_only = true; + has_ws = false; + has_lb = false; + starts_with_ws = false; + starts_with_lb = false; + ends_with_ws = false; + ends_with_lb = false; + has_esc = false; + break_run = 0; + first = true; + has_json_esc = false; + + esc_mode = fy_reader_json_mode(fyr) ? fyue_doublequote_json : + fy_reader_lb_mode(fyr) == fylb_cr_nl ? fyue_doublequote : fyue_doublequote_yaml_1_1; + + last_line = -1; + lastc = -1; + for (;;) { + if (!fy_reader_json_mode(fyr)) { + /* no document indicators please */ + FYR_PARSE_ERROR_CHECK(fyr, 0, 3, FYEM_SCAN, + !(fy_reader_column(fyr) == 0 && + (!fy_reader_strncmp(fyr, "---", 3) || + !fy_reader_strncmp(fyr, "...", 3)) && + fy_reader_is_blankz_at_offset(fyr, 3)), err_out, + "invalid document-%s marker in %s scalar", + c == '-' ? "start" : "end", + is_single ? "single-quoted" : "double-quoted"); + } + + /* no EOF either */ + c = fy_reader_peek(fyr); + + if (c <= 0) { + fy_reader_get_mark(fyr, &mark); + + if (!c || c == FYUG_EOF) + FYR_MARK_ERROR(fyr, &handle->start_mark, &mark, FYEM_SCAN, + "%s scalar without closing quote", + is_single ? "single-quoted" : "double-quoted"); + else + FYR_MARK_ERROR(fyr, &handle->start_mark, &mark, FYEM_SCAN, + "%s scalar is malformed UTF8", + is_single ? "single-quoted" : "double-quoted"); + goto err_out; + } + + if (first) { + if (fy_reader_is_flow_ws(fyr, c)) + starts_with_ws = true; + else if (fy_reader_is_lb(fyr, c)) + starts_with_lb = true; + } + + while (!fy_reader_is_flow_blankz(fyr, c = fy_reader_peek(fyr))) { + + if (ws_lb_only && !(fy_reader_is_flow_ws(fyr, c) || fy_reader_is_lb(fyr, c)) && c != end_c) + ws_lb_only = false; + + esc_lb = false; + last_esc_lb = -1; + /* track line change (and first non blank) */ + if (last_line != fy_reader_line(fyr)) { + last_line = fy_reader_line(fyr); + + if ((indent >= 0 && fy_reader_column(fyr) <= indent) && !sloppy_indent) { + + fy_reader_advance(fyr, c); + fy_reader_get_mark(fyr, &mark2); + FYR_MARK_ERROR(fyr, &mark, &mark2, FYEM_SCAN, + "wrongly indented %s scalar", + is_single ? "single-quoted" : "double-quoted"); + goto err_out; + } + } + + if (breaks_found) { + length += breaks_found > 1 ? (breaks_found_length - first_break_length) : 1; + length += presentation_breaks_length; + breaks_found = 0; + blanks_found = 0; + presentation_breaks_length = 0; + } else if (blanks_found) { + length += blanks_found; + lastc = ' '; + blanks_found = 0; + } + + if (c >= 0 && c <= 0x7f && (fy_utf8_low_ascii_flags[c] & F_SIMPLE_SCALAR)) { + size_t len, consumed; + const char *p, *s, *e; + int8_t cc; + int run; + + run = 0; + while ((p = fy_reader_ensure_lookahead(fyr, 1, &len)) != NULL) { + + s = p; + e = s + len; + + while (s < e && (cc = (int8_t)*s) >= 0 && (fy_utf8_low_ascii_flags[cc] & F_SIMPLE_SCALAR)) + s++; + + consumed = s - p; + if (consumed) { + fy_reader_advance_octets(fyr, consumed); + fyr->column += consumed; + lastc = (int)cc; + } + run += consumed; + + /* we're done if stopped earlier */ + if (s < e) + break; + } + length += run; + break_run = 0; + continue; + } + + /* escaped single quote? */ + if (is_single && c == '\'' && fy_reader_peek_at(fyr, 1) == '\'') { + length++; + fy_reader_advance_by(fyr, 2); + break_run = 0; + lastc = '\''; + continue; + } + + /* right quote? */ + if (c == end_c) + break; + + /* escaped line break (any linebreak will do) */ + if (!is_single && c == '\\' && fy_reader_is_lb(fyr, fy_reader_peek_at(fyr, 1))) { + + esc_lb = true; + last_esc_lb = fy_reader_peek_at(fyr, 1); + + fy_reader_advance_by(fyr, 2); + + c = fy_reader_peek(fyr); + break_run = 0; + lastc = c; + + has_esc = true; + + break; + } + + /* escaped sequence? */ + if (!is_single && c == '\\') { + + /* note we don't generate formatted output */ + /* we are merely checking for validity */ + c = fy_reader_peek_at(fyr, 1); + + /* hex, unicode marks - json only supports u */ + unicode_esc = !fy_reader_json_mode(fyr) ? + (c == 'x' || c == 'u' || c == 'U') : + c == 'u'; + if (unicode_esc) { + + total_code_length = 0; + j = 0; + hi_surrogate = lo_surrogate = 0; + for (;;) { + total_code_length += 2; + + code_length = c == 'x' ? 2 : + c == 'u' ? 4 : 8; + value = 0; + for (i = 0; i < code_length; i++) { + c = fy_reader_peek_at(fyr, total_code_length + i); + + FYR_PARSE_ERROR_CHECK(fyr, 0, total_code_length + i + 1, FYEM_SCAN, + fy_is_hex(c), err_out, + "double-quoted scalar has invalid hex escape"); + + value <<= 4; + if (c >= '0' && c <= '9') + value |= c - '0'; + else if (c >= 'a' && c <= 'f') + value |= 10 + c - 'a'; + else + value |= 10 + c - 'A'; + } + + total_code_length += code_length; + j++; + + /* 0x10000 + (HI - 0xd800) * 0x400 + (LO - 0xdc00) */ + + /* high surrogate */ + if (j == 1 && code_length == 4 && value >= 0xd800 && value <= 0xdbff && + fy_reader_peek_at(fyr, total_code_length) == '\\' && + fy_reader_peek_at(fyr, total_code_length + 1) == 'u') { + hi_surrogate = value; + c = 'u'; + continue; + } + + if (j == 2 && code_length == 4 && hi_surrogate) { + + FYR_PARSE_ERROR_CHECK(fyr, total_code_length - 6, 6, FYEM_SCAN, + value >= 0xdc00 && value <= 0xdfff, err_out, + "Invalid low surrogate value"); + + lo_surrogate = value; + value = 0x10000 + (hi_surrogate - 0xd800) * 0x400 + (lo_surrogate - 0xdc00); + } + + break; + } + + /* check for validity */ + FYR_PARSE_ERROR_CHECK(fyr, 0, total_code_length, FYEM_SCAN, + !(value < 0 || (value >= 0xd800 && value <= 0xdfff) || + value > 0x10ffff), err_out, + "double-quoted scalar has invalid UTF8 escape"); + + fy_reader_advance_by(fyr, total_code_length); + + } else { + escbuf[0] = '\\'; + fy_utf8_put_unchecked(escbuf + 1, c); + escbuf_len = 1 + fy_utf8_width(c); + + ep = escbuf; + + fy_utf8_format_a(c, fyue_singlequote, &str); + value = fy_utf8_parse_escape(&ep, escbuf_len, esc_mode); + FYR_PARSE_ERROR_CHECK(fyr, 0, 2, FYEM_SCAN, + value >= 0, err_out, + "invalid escape '%s' in %s string", + str, + is_single ? "single-quoted" : "double-quoted"); + + fy_reader_advance_by(fyr, 2); + } + + length += fy_utf8_width(value); + + lastc = value; + + if (lastc == '\n') + break_run++; + + has_esc = true; + + continue; + } + + /* check whether we have a JSON unescaped character */ + is_json_unesc = fy_is_json_unescaped_range_only(c); + if (!is_json_unesc) + has_json_esc = true; + + if (!is_single && fy_reader_json_mode(fyr) && has_json_esc) { + FYR_PARSE_ERROR(fyr, 0, 2, FYEM_SCAN, + "Invalid JSON unescaped character"); + goto err_out; + } + + lastc = c; + + /* regular character */ + fy_reader_advance(fyr, c); + + length += fy_utf8_width(c); + break_run = 0; + } + + /* end of scalar */ + if (c == end_c) + break; + + /* consume blanks */ + breaks_found = 0; + breaks_found_length = 0; + blanks_found = 0; + while (fy_reader_is_flow_blank(fyr, c = fy_reader_peek(fyr)) || fy_reader_is_lb(fyr, c)) { + + if (!has_json_esc && !fy_is_json_unescaped(c)) + has_json_esc = true; + + break_run = 0; + + /* check for tab used as indentation */ + if (!fy_reader_tabsize(fyr) && fy_is_tab(c)) { + FYR_PARSE_ERROR_CHECK(fyr, 0, 1, FYEM_SCAN, + fy_reader_column(fyr) > indent, err_out, + "invalid tab used as indentation"); + } + + fy_reader_advance(fyr, c); + + if (fy_reader_is_lb(fyr, c)) { + + if (!fy_is_lb_LS_PS(c)) { + break_length = 1; + } else { + break_length = fy_utf8_width(c); + presentation_breaks_length += break_length; + } + + has_lb = true; + if (!breaks_found) + first_break_length = break_length; + breaks_found++; + breaks_found_length += break_length; + blanks_found = 0; + esc_lb = false; + } else { + has_ws = true; + if (!esc_lb) + blanks_found++; + } + } + first = false; + } + + if (break_run > 0) + ends_with_lb = true; + else if (fy_reader_is_flow_ws(fyr, lastc)) + ends_with_ws = true; + trailing_lb = break_run > 1; + + /* end... */ + fy_reader_fill_atom_end(fyr, handle); + + is_multiline = handle->end_mark.line > handle->start_mark.line; + + /* need to process to present */ + handle->style = is_single ? FYAS_SINGLE_QUOTED : FYAS_DOUBLE_QUOTED; + handle->direct_output = !is_multiline && !has_esc && !has_json_esc && + fy_atom_size(handle) == length; + handle->empty = ws_lb_only; + handle->has_lb = has_lb; + handle->has_ws = has_ws; + handle->starts_with_ws = starts_with_ws; + handle->starts_with_lb = starts_with_lb; + handle->ends_with_ws = ends_with_ws; + handle->ends_with_lb = ends_with_lb; + handle->trailing_lb = trailing_lb; + handle->size0 = length == 0; + handle->valid_anchor = false; + handle->json_mode = fy_reader_json_mode(fyr); + handle->lb_mode = fy_reader_lb_mode(fyr); + handle->fws_mode = fy_reader_flow_ws_mode(fyr); + handle->tabsize = fy_reader_tabsize(fyr); + handle->ends_with_eof = false; /* flow scalars never end with EOF and be valid */ + + /* skip over block scalar end */ + fy_reader_advance_by(fyr, 1); + +#ifdef ATOM_SIZE_CHECK + tlength = fy_atom_format_text_length(handle); + fy_utf8_format_text_a(fy_atom_data(handle), fy_atom_size(handle), fyue_doublequote, &str); + if (tlength != length) { + fyr_warning(fyr, "%s: storage hint calculation failed real %zu != hint %zu - \"%s\"", __func__, + tlength, length, + str); + length = tlength; + } +#endif + + handle->storage_hint = length; + handle->storage_hint_valid = true; + + FYR_MARK_ERROR_CHECK(fyr, &handle->start_mark, &handle->end_mark, FYEM_SCAN, + !fy_reader_json_mode(fyr) || !is_multiline, err_out, + "Multi line double quoted scalars not supported in JSON mode"); + + return 0; + +err_out: + return -1; +} + +int fy_reader_fetch_plain_scalar_handle(struct fy_reader *fyr, int c, int indent, int flow_level, struct fy_atom *handle, bool directive0) +{ + size_t length; + int rc = -1, run, nextc, lastc, breaks_found, blanks_found; + int breaks_found_length, first_break_length, break_length, presentation_breaks_length; + bool has_leading_blanks; + bool last_ptr; + struct fy_mark mark, last_mark; + bool is_multiline, has_lb, has_ws, ends_with_eof; + bool has_json_esc; + char *str; +#ifdef ATOM_SIZE_CHECK + size_t tlength; +#endif + + FYR_PARSE_ERROR_CHECK(fyr, 0, 1, FYEM_SCAN, + !fy_reader_is_blankz(fyr, c), err_out, + "plain scalar cannot start with blank or zero"); + + /* may not start with any of ,[]{}#&*!|>'\"%@` */ + FYR_PARSE_ERROR_CHECK(fyr, 0, 1, FYEM_SCAN, + !fy_utf8_strchr(",[]{}#&*!|>'\"%@`", c), err_out, + "plain scalar cannot start with '%c'", c); + + /* may not start with - not followed by blankz */ + FYR_PARSE_ERROR_CHECK(fyr, 0, 2, FYEM_SCAN, + c != '-' || !fy_reader_is_blank_at_offset(fyr, 1), err_out, + "plain scalar cannot start with '%c' followed by blank", c); + + /* may not start with -?: not followed by blankz (in block context) */ + FYR_PARSE_ERROR_CHECK(fyr, 0, 2, FYEM_SCAN, + flow_level > 0 || !((c == '?' || c == ':') && fy_reader_is_blank_at_offset(fyr, 1)), err_out, + "plain scalar cannot start with '%c' followed by blank (in block context)", c); + + /* may not start with - followed by ",[]{}" in flow context */ + FYR_PARSE_ERROR_CHECK(fyr, 0, 2, FYEM_SCAN, + flow_level == 0 || !(c == '-' && fy_utf8_strchr(",[]{}", fy_reader_peek_at(fyr, 1))), err_out, + "plain scalar cannot start with '%c' followed by ,[]{} (in flow context)", c); + + fy_reader_get_mark(fyr, &mark); + + fy_reader_fill_atom_start(fyr, handle); + + has_leading_blanks = false; + has_lb = false; + has_ws = false; + has_json_esc = false; + + length = 0; + breaks_found = 0; + breaks_found_length = 0; + first_break_length = 0; + presentation_breaks_length = 0; + blanks_found = 0; + last_ptr = false; + memset(&last_mark, 0, sizeof(last_mark)); + c = FYUG_EOF; + lastc = FYUG_EOF; + + for (;;) { + /* break for document indicators */ + if (fy_reader_column(fyr) == 0 && + ((!fy_reader_strncmp(fyr, "---", 3) || !fy_reader_strncmp(fyr, "...", 3)) && + fy_reader_is_blankz_at_offset(fyr, 3))) + break; + + c = fy_reader_peek(fyr); + if (c == '#') + break; + + /* for YAML 1.1 check % directive break */ + if (directive0 && fy_reader_column(fyr) == 0 && c == '%') + break; + + /* quickly deal with runs */ + run = 0; + if (c >= 0 && c <= 0x7f && (fy_utf8_low_ascii_flags[c] & F_SIMPLE_SCALAR)) { + size_t len, consumed; + const char *p, *s, *e; + int8_t cc; + + while ((p = fy_reader_ensure_lookahead(fyr, 1, &len)) != NULL) { + + s = p; + e = s + len; + + while (s < e && (cc = (int8_t)*s) >= 0 && (fy_utf8_low_ascii_flags[cc] & F_SIMPLE_SCALAR)) + s++; + + consumed = s - p; + if (consumed) { + fy_reader_advance_octets(fyr, consumed); + fyr->column += consumed; + } + run += consumed; + + /* we're done if stopped earlier */ + if (s < e) + break; + } + + } + if (run > 0) { + length += run; + if (breaks_found) { + /* minimum 1 sep, or more for consecutive */ + length += breaks_found > 1 ? (breaks_found_length - first_break_length) : 1; + length += presentation_breaks_length; + breaks_found = 0; + blanks_found = 0; + presentation_breaks_length = 0; + } else if (blanks_found) { + /* just the blanks mam' */ + length += blanks_found; + blanks_found = 0; + } + } + + while (!fy_reader_is_blankz(fyr, c = fy_reader_peek(fyr))) { + + + if (c == ':') { + + nextc = fy_reader_peek_at(fyr, 1); + + /* ':' followed by space terminates */ + if (fy_reader_is_blankz(fyr, nextc)) { + /* super rare case :: not followed by space */ + /* :: not followed by space */ + if (lastc != ':' || fy_reader_is_blankz(fyr, nextc)) + break; + } + + /* in flow context ':' followed by flow markers */ + if (flow_level > 0 && fy_utf8_strchr(",[]{}", nextc)) + break; + } + + /* in flow context any or , [ ] { } */ + if (flow_level > 0 && (c == ',' || c == '[' || c == ']' || c == '{' || c == '}')) + break; + + if (breaks_found) { + /* minimum 1 sep, or more for consecutive */ + length += breaks_found > 1 ? (breaks_found_length - first_break_length) : 1; + length += presentation_breaks_length; + breaks_found = 0; + blanks_found = 0; + presentation_breaks_length = 0; + } else if (blanks_found) { + /* just the blanks mam' */ + length += blanks_found; + blanks_found = 0; + } + + /* check whether we have a JSON unescaped character */ + if (!has_json_esc && !fy_is_json_unescaped(c)) + has_json_esc = true; + + fy_reader_advance(fyr, c); + run++; + + length += fy_utf8_width(c); + + lastc = c; + } + + /* save end mark if we processed more than one non-blank */ + if (run > 0) { + /* fyp_scan_debug(fyp, "saving mark"); */ + last_ptr = true; + fy_reader_get_mark(fyr, &last_mark); + } + + /* end? */ + if (!(fy_is_blank(c) || fy_reader_is_lb(fyr, c))) + break; + + has_json_esc = true; + + /* consume blanks */ + breaks_found = 0; + breaks_found_length = 0; + first_break_length = 0; + blanks_found = 0; + do { + fy_reader_advance(fyr, c); + + if (!fy_reader_tabsize(fyr)) { + /* check for tab */ + FYR_PARSE_ERROR_CHECK(fyr, 0, 1, FYEM_SCAN, + c != '\t' || !has_leading_blanks || indent < 0 || fy_reader_column(fyr) >= (indent + 1), err_out, + "invalid tab used as indentation"); + } + + nextc = fy_reader_peek(fyr); + + /* if it's a break */ + if (fy_reader_is_lb(fyr, c)) { + + if (!fy_is_lb_LS_PS(c)) { + break_length = 1; + } else { + break_length = fy_utf8_width(c); + presentation_breaks_length += break_length; + } + + /* first break, turn on leading blanks */ + if (!has_leading_blanks) + has_leading_blanks = true; + if (!breaks_found) + first_break_length = break_length; + breaks_found++; + breaks_found_length += break_length; + blanks_found = 0; + has_lb = true; + } else { + blanks_found++; + has_ws = true; + } + + c = nextc; + + } while (fy_is_blank(c) || fy_reader_is_lb(fyr, c)); + + /* break out if indentation is less */ + if (flow_level <= 0 && indent >= 0 && fy_reader_column(fyr) < (indent + 1)) + break; + } + + /* end... */ + if (!last_ptr) + fy_reader_fill_atom_end(fyr, handle); + else + fy_reader_fill_atom_end_at(fyr, handle, &last_mark); + + if (c == FYUG_INV || c == FYUG_PARTIAL) { + FYR_MARK_ERROR(fyr, &handle->start_mark, &handle->end_mark, FYEM_SCAN, + "plain scalar is malformed UTF8"); + goto err_out; + } + ends_with_eof = c == FYUG_EOF && !fy_reader_is_lb(fyr, lastc); + + is_multiline = handle->end_mark.line > handle->start_mark.line; + + handle->style = FYAS_PLAIN; + handle->chomp = FYAC_STRIP; + handle->direct_output = !is_multiline && !has_json_esc && fy_atom_size(handle) == length; + handle->empty = false; + handle->has_lb = has_lb; + handle->has_ws = has_ws; + handle->starts_with_ws = false; + handle->starts_with_lb = false; + handle->ends_with_ws = false; + handle->ends_with_lb = false; + handle->trailing_lb = false; + handle->size0 = length == 0; + handle->valid_anchor = false; + handle->json_mode = fy_reader_json_mode(fyr); + handle->lb_mode = fy_reader_lb_mode(fyr); + handle->fws_mode = fy_reader_flow_ws_mode(fyr); + handle->tabsize = fy_reader_tabsize(fyr); + handle->ends_with_eof = ends_with_eof; + +#ifdef ATOM_SIZE_CHECK + tlength = fy_atom_format_text_length(handle); + if (tlength != length) { + fy_utf8_format_text_a(fy_atom_data(handle), fy_atom_size(handle), fyue_doublequote, &str); + fyr_warning(fyr, "%s: storage hint calculation failed real %zu != hint %zu - \"%s\"", __func__, + tlength, length, + str); + length = tlength; + } +#endif + + handle->storage_hint = length; + handle->storage_hint_valid = true; + + /* extra check in json mode */ + if (fy_reader_json_mode(fyr)) { + FYR_MARK_ERROR_CHECK(fyr, &handle->start_mark, &handle->end_mark, FYEM_SCAN, + !is_multiline, err_out, + "Multi line plain scalars not supported in JSON mode"); + + FYR_MARK_ERROR_CHECK(fyr, &handle->start_mark, &handle->end_mark, FYEM_SCAN, + !fy_atom_strcmp(handle, "false") || + !fy_atom_strcmp(handle, "true") || + !fy_atom_strcmp(handle, "null") || + fy_atom_is_number(handle), err_out, + "Invalid JSON plain scalar"); + } + + return 0; + +err_out: + rc = -1; + return rc; +} + + +int fy_fetch_flow_scalar(struct fy_parser *fyp, int c) +{ + struct fy_atom handle; + bool is_single, is_complex, is_multiline; + struct fy_mark mark; + struct fy_simple_key_mark skm; + struct fy_token *fyt; + int i = 0, rc = -1; + char *str; + + is_single = c == '\''; + + fy_utf8_format_a(c, fyue_singlequote, &str); + fyp_error_check(fyp, c == '\'' || c == '"', err_out, + "bad start of flow scalar ('%s')", + str); + + FYP_PARSE_ERROR_CHECK(fyp, 0, 1, FYEM_SCAN, + fy_flow_indent_check(fyp), err_out, + "wrongly indented %s scalar in flow mode", + is_single ? "single-quoted" : "double-quoted"); + + fy_get_mark(fyp, &mark); + fy_get_simple_key_mark(fyp, &skm); + + /* errors are generated by reader */ + rc = fy_reader_fetch_flow_scalar_handle(fyp->reader, c, fyp->indent, &handle, !!(fyp->cfg.flags & FYPCF_SLOPPY_FLOW_INDENTATION)); + if (rc) { + fyp->stream_error = true; + goto err_out_rc; + } + + /* and we're done */ + fyt = fy_token_queue(fyp, FYTT_SCALAR, &handle, is_single ? FYSS_SINGLE_QUOTED : FYSS_DOUBLE_QUOTED); + fyp_error_check(fyp, fyt, err_out_rc, + "fy_token_queue() failed"); + + if (fyp->parse_flow_only && fyp->flow_level == 0) { + rc = fy_fetch_stream_end(fyp); + fyp_error_check(fyp, !rc, err_out_rc, + "fy_fetch_stream_end() failed"); + return 0; + } + + is_complex = fyp->pending_complex_key_column >= 0; + is_multiline = handle.end_mark.line > handle.start_mark.line; + + if (!fyp->flow_level) { + /* due to the weirdness with simple keys scan forward + * until a linebreak, ';', or anything else */ + for (i = 0; ; i++) { + c = fy_parse_peek_at(fyp, i); + if (c < 0 || c == ':' || fyp_is_lb(fyp, c) || !fyp_is_flow_ws(fyp, c)) + break; + } + + /* if we're a multiline key that's bad */ + FYP_MARK_ERROR_CHECK(fyp, &mark, &mark, FYEM_SCAN, + !(is_multiline && !is_complex && c == ':'), err_out, + "invalid multiline %s scalar used as key", + is_single ? "single-quoted" : "double-quoted"); + + FYP_PARSE_ERROR_CHECK(fyp, i, 1, FYEM_SCAN, + c < 0 || c == ':' || c == '#' || fyp_is_lb(fyp, c), err_out, + "invalid trailing content after %s scalar", + is_single ? "single-quoted" : "double-quoted"); + } + + /* a plain scalar could be simple key */ + rc = fy_save_simple_key_mark(fyp, &skm, FYTT_SCALAR, &handle.end_mark); + fyp_error_check(fyp, !rc, err_out_rc, + "fy_save_simple_key_mark() failed"); + + /* cannot follow a flow scalar */ + fyp->simple_key_allowed = false; + fyp_scan_debug(fyp, "simple_key_allowed -> %s\n", fyp->simple_key_allowed ? "true" : "false"); + + /* make sure that no comment follows directly afterwards */ + c = fy_parse_peek(fyp); + + FYP_PARSE_ERROR_CHECK(fyp, 0, 1, FYEM_SCAN, + c != '#', err_out, + "invalid comment without whitespace after %s scalar", + is_single ? "single-quoted" : "double-quoted"); + + if (fyp->cfg.flags & FYPCF_PARSE_COMMENTS) { + rc = fy_attach_comments_if_any(fyp, fyt); + fyp_error_check(fyp, !rc, err_out_rc, + "fy_attach_right_hand_comment() failed"); + } + + return 0; + +err_out: + rc = -1; +err_out_rc: + return rc; +} + +void fy_reader_skip_ws_cr_nl(struct fy_reader *fyr) +{ + const char *p, *s, *e; + char cc; + size_t len; + int line, column; + + assert(fyr); + + column = fyr->column; + line = fyr->line; + while ((p = fy_reader_ensure_lookahead(fyr, 1, &len)) != NULL) { + + s = p; + e = s + len; + + while (s < e) { + cc = *s; + if (cc == ' ') { + column++; + } else if (cc == '\n') { + column = 0; + line++; + } else if (cc == '\t') { + if (fyr->tabsize) + column += (fyr->tabsize - (column % fyr->tabsize)); + else + column++; + } else if (cc == '\r') { + column = 0; + line++; + + if (s + 1 > e) { + /* we have a dangling cr at the end of a block */ + + /* advance up to the point here */ + fy_reader_advance_octets(fyr, s - p); + + /* try again (should return enough or NULL) */ + p = fy_reader_ensure_lookahead(fyr, 1, &len); + + /* if we couldn't pull enough we're done */ + if (!p || len < 1) + goto done; + + s = p; + e = s + len; + + if (*s == '\n') + s++; + } + /* \n followed, gulp it down */ + if (*s == '\n') + s++; + } else { + if (s > p) + fy_reader_advance_octets(fyr, s - p); + goto done; + } + + s++; + } + + fy_reader_advance_octets(fyr, s - p); + } + +done: + fyr->line = line; + fyr->column = column; +} + +void fy_reader_skip_ws(struct fy_reader *fyr) +{ + const char *p, *s, *e; + size_t len, consumed; + int column; + + assert(fyr); + + while ((p = fy_reader_ensure_lookahead(fyr, 1, &len)) != NULL) { + + s = p; + e = s + len; + + column = fyr->column; + if (!fyr->tabsize) { + while (s < e && fy_is_ws(*s)) { + column++; + s++; + } + } else { + while (s < e && fy_is_ws(*s)) { + if (fy_is_tab(*s)) + column += fyr->tabsize - (column % fyr->tabsize); + else + column++; + s++; + } + } + + consumed = s - p; + if (consumed) { + fy_reader_advance_octets(fyr, consumed); + fyr->column = column; + } + + /* we're done if stopped earlier */ + if (s < e) + break; + } +} + +void fy_reader_skip_space(struct fy_reader *fyr) +{ + const char *p, *s, *e; + size_t len, consumed; + + assert(fyr); + + while ((p = fy_reader_ensure_lookahead(fyr, 1, &len)) != NULL) { + + s = p; + e = s + len; + + while (s < e && fy_is_space(*s)) + s++; + + consumed = s - p; + if (consumed) { + fy_reader_advance_octets(fyr, consumed); + fyr->column += consumed; + } + + if (s < e) + break; + } +} + +void fy_reader_skip_ws_lb(struct fy_reader *fyr) +{ + const char *p, *s, *e; + size_t len, consumed; + int line, column, c, w; + bool dangling_cr; + enum fy_lb_mode lb_mode; + + assert(fyr); + + /* punt to json mode */ + lb_mode = fy_reader_lb_mode(fyr); + + if (fy_reader_json_mode(fyr) || lb_mode == fylb_cr_nl) { + fy_reader_skip_ws_cr_nl(fyr); + return; + } + + column = fyr->column; + line = fyr->line; + dangling_cr = false; + while ((p = fy_reader_ensure_lookahead(fyr, 1, &len)) != NULL) { + + s = p; + e = s + len; + + if (dangling_cr) { + if (*s == '\n') + s++; + dangling_cr = false; + } + + while (s < e) { + c = (int)*s; + + /* single byte utf8? */ + if (c < 0x80) { + if (c == ' ') { + column++; + } else if (c == '\n') { + column = 0; + line++; + } else if (c == '\t') { + if (fyr->tabsize) + column += (fyr->tabsize - (column % fyr->tabsize)); + else + column++; + } else if (c == '\r') { + column = 0; + line++; + /* check for '\n' following */ + if (s < e) { + if (*s == '\n') + s++; + } else { + /* we have a dangling cr at the end of a block */ + dangling_cr = true; + } + } else { + consumed = s - p; + if (consumed) + fy_reader_advance_octets(fyr, consumed); + goto done; + } + s++; + } else { + c = fy_utf8_get(s, (int)(e - s), &w); + + if (c == FYUG_PARTIAL) { + /* get the width (from the first octet */ + w = fy_utf8_width_by_first_octet((uint8_t)*s); + /* copy the partial utf8 in the buffer */ + + /* advance up to the point here */ + consumed = s - p; + if (consumed) + fy_reader_advance_octets(fyr, consumed); + + /* try again (should return enough or NULL) */ + p = fy_reader_ensure_lookahead(fyr, w, &len); + if (!p) + break; + + /* if we couldn't pull enough we're done */ + if (len < (size_t)w) + goto done; + + continue; + } + + if (lb_mode == fylb_cr_nl_N_L_P && fy_is_unicode_lb(c)) { + column = 0; + line++; + } else { + consumed = s - p; + if (consumed) + fy_reader_advance_octets(fyr, consumed); + goto done; + } + + s += w; + } + } + + consumed = s - p; + if (consumed) + fy_reader_advance_octets(fyr, consumed); + } + +done: + fyr->line = line; + fyr->column = column; +} + +int fy_fetch_plain_scalar(struct fy_parser *fyp, int c) +{ + struct fy_atom handle; + struct fy_simple_key_mark skm; + struct fy_token *fyt; + bool is_multiline, is_complex, is_tab_start = false; + struct fy_mark tab_mark; + int rc = -1, i; + + /* Extremely bad case, a tab... so, either an indentation or separation space in block mode */ + if (!fyp->flow && fy_is_tab(c)) { + + fy_get_mark(fyp, &tab_mark); + is_tab_start = true; + + /* skip all whitespace now */ + fy_reader_skip_ws(fyp->reader); + c = fy_parse_peek(fyp); + + /* if it's a linebreak or a comment start, just try again */ + if (fyp_is_lb(fyp, c) || c == '#') { + /* will need to scan more */ + fyp->token_activity_counter++; + return 0; + } + } + + /* check indentation */ + FYP_PARSE_ERROR_CHECK(fyp, 0, 1, FYEM_SCAN, + fy_flow_indent_check(fyp), err_out, + "wrongly indented flow %s", + fyp->flow == FYFT_SEQUENCE ? "sequence" : "mapping"); + + fy_get_simple_key_mark(fyp, &skm); + + rc = fy_reader_fetch_plain_scalar_handle(fyp->reader, c, fyp->indent, fyp->flow_level, &handle, + fy_document_state_version_compare(fyp->current_document_state, fy_version_make(1, 1)) <= 0); + if (rc) { + fyp->stream_error = true; + goto err_out_rc; + } + + is_multiline = handle.end_mark.line > handle.start_mark.line; + is_complex = fyp->pending_complex_key_column >= 0; + + /* and we're done */ + fyt = fy_token_queue(fyp, FYTT_SCALAR, &handle, FYSS_PLAIN); + fyp_error_check(fyp, fyt, err_out_rc, + "fy_token_queue() failed"); + + if (fyp->parse_flow_only && fyp->flow_level == 0) { + rc = fy_fetch_stream_end(fyp); + fyp_error_check(fyp, !rc, err_out_rc, + "fy_fetch_stream_end() failed"); + return 0; + } + + if (!fyp->flow_level && !is_complex && (is_multiline || is_tab_start)) { + /* due to the weirdness with simple keys scan forward + * until a linebreak, ':', or anything else */ + for (i = 0; ; i++) { + c = fy_parse_peek_at(fyp, i); + if (c < 0 || (c == ':' && fy_is_blankz_at_offset(fyp, i + 1)) || + fyp_is_lb(fyp, c) || !fy_is_ws(c)) + break; + } + + /* if we're a key, that's invalid */ + if (c == ':') { + + if (is_multiline) + FYP_MARK_ERROR(fyp, &handle.start_mark, &handle.end_mark, FYEM_SCAN, + "invalid multiline plain key"); + else + FYP_MARK_ERROR(fyp, &tab_mark, &tab_mark, FYEM_SCAN, + "invalid tab as indendation in a mapping"); + + goto err_out; + } + } + + rc = fy_save_simple_key_mark(fyp, &skm, FYTT_SCALAR, &handle.end_mark); + fyp_error_check(fyp, !rc, err_out_rc, + "fy_save_simple_key_mark() failed"); + + fyp->simple_key_allowed = handle.has_lb; + fyp_scan_debug(fyp, "simple_key_allowed -> %s\n", fyp->simple_key_allowed ? "true" : "false"); + + if (fyp->cfg.flags & FYPCF_PARSE_COMMENTS) { + rc = fy_attach_comments_if_any(fyp, fyt); + fyp_error_check(fyp, !rc, err_out_rc, + "fy_attach_right_hand_comment() failed"); + } + + return 0; + +err_out: + rc = -1; +err_out_rc: + return rc; +} + +int fy_fetch_tokens(struct fy_parser *fyp) +{ + struct fy_mark m; + bool was_double_colon; + int c, rc; + + /* do not fetch any more when stream end is reached */ + if (fyp->stream_end_reached) + return 0; + + if (!fyp->stream_start_produced) { + rc = fy_parse_get_next_input(fyp); + fyp_error_check(fyp, rc >= 0, err_out_rc, + "fy_parse_get_next_input() failed"); + + if (rc > 0) { + rc = fy_fetch_stream_start(fyp); + fyp_error_check(fyp, !rc, err_out_rc, + "fy_fetch_stream_start() failed"); + } + return 0; + } + + fyp_scan_debug(fyp, "-------------------------------------------------"); + rc = fy_scan_to_next_token(fyp); + fyp_error_check(fyp, !rc, err_out_rc, + "fy_scan_to_next_token() failed"); + + if (fyp_block_mode(fyp)) { + rc = fy_parse_unroll_indent(fyp, fyp_column(fyp)); + fyp_error_check(fyp, !rc, err_out_rc, + "fy_parse_unroll_indent() failed"); + } + + c = fy_parse_peek(fyp); + if (c < 0 || c == '\0') { + + fyp->stream_end_reached = true; + + FYP_PARSE_ERROR_CHECK(fyp, 0, 1, FYEM_SCAN, + !fyp_json_mode(fyp) || c != '\0', err_out, + "JSON disallows '\\0' in the input stream"); + + if (c >= 0) + fy_advance(fyp, c); + rc = fy_fetch_stream_end(fyp); + fyp_error_check(fyp, !rc, err_out_rc, + "fy_fetch_stream_end() failed"); + return 0; + } + + if (fyp_column(fyp) == 0 && c == '%') { + + FYP_PARSE_ERROR_CHECK(fyp, 0, 1, FYEM_SCAN, + !fyp_json_mode(fyp), err_out, + "directives not supported in JSON mode"); + + FYP_PARSE_ERROR_CHECK(fyp, 0, 1, FYEM_SCAN, + !fyp->bare_document_only, err_out, + "invalid directive in bare document mode"); + + fy_advance(fyp, c); + rc = fy_fetch_directive(fyp); + fyp_error_check(fyp, !rc, err_out_rc, + "fy_fetch_directive() failed"); + goto out; + } + + /* probable document start/end indicator */ + if (fyp_column(fyp) == 0 && + (!fy_parse_strncmp(fyp, "---", 3) || + !fy_parse_strncmp(fyp, "...", 3)) && + fy_is_blankz_at_offset(fyp, 3)) { + + FYP_PARSE_ERROR_CHECK(fyp, 0, 3, FYEM_SCAN, + !fyp_json_mode(fyp), err_out, + "document %s indicator not supported in JSON mode", + c == '-' ? "start" : "end"); + + FYP_PARSE_ERROR_CHECK(fyp, 0, 3, FYEM_SCAN, + !fyp->bare_document_only, err_out, + "invalid document %s indicator in bare document mode", + c == '-' ? "start" : "end"); + + rc = fy_fetch_document_indicator(fyp, + c == '-' ? FYTT_DOCUMENT_START : + FYTT_DOCUMENT_END); + fyp_error_check(fyp, !rc, err_out_rc, + "fy_fetch_document_indicator() failed"); + + fyp->indent_line = fyp_line(fyp); + + /* for document end, nothing must follow except whitespace and comment */ + if (c == '.') { + c = fy_parse_peek(fyp); + + FYP_PARSE_ERROR_CHECK(fyp, 0, 1, FYEM_SCAN, + c == -1 || c == '#' || fyp_is_lb(fyp, c), err_out, + "invalid content after document end marker"); + } + + goto out; + } + + fyp_scan_debug(fyp, "indent=%d, parent indent=%d\n", + fyp->indent, fyp->parent_indent); + + if (c == '[' || c == '{') { + + fyp->indent_line = fyp_line(fyp); + + fyp_scan_debug(fyp, "calling fy_fetch_flow_collection_mark_start(%c)", c); + rc = fy_fetch_flow_collection_mark_start(fyp, c); + fyp_error_check(fyp, !rc, err_out_rc, + "fy_fetch_flow_collection_mark_start() failed"); + goto out; + } + + if (c == ']' || c == '}') { + + fyp->indent_line = fyp_line(fyp); + + fyp_scan_debug(fyp, "fy_fetch_flow_collection_mark_end(%c)", c); + rc = fy_fetch_flow_collection_mark_end(fyp, c); + fyp_error_check(fyp, !rc, err_out_rc, + "fy_fetch_flow_collection_mark_end() failed"); + goto out; + } + + + if (c == ',') { + + fyp->indent_line = fyp_line(fyp); + + fy_get_mark(fyp, &m); + + fyp_scan_debug(fyp, "fy_fetch_flow_collection_entry(%c)", c); + rc = fy_fetch_flow_collection_entry(fyp, c); + fyp_error_check(fyp, !rc, err_out_rc, + "fy_fetch_flow_collection_entry() failed"); + + fyp->last_was_comma = true; + fyp->last_comma_mark = m; + + goto out; + } + + if (c == '-' && fy_is_blankz_at_offset(fyp, 1)) { + + FYP_PARSE_ERROR_CHECK(fyp, 0, 1, FYEM_SCAN, + !fyp_json_mode(fyp), err_out, + "block entries not supported in JSON mode"); + + fyp->indent_line = fyp_line(fyp); + + fyp_scan_debug(fyp, "fy_fetch_block_entry(%c)", c); + rc = fy_fetch_block_entry(fyp, c); + fyp_error_check(fyp, !rc, err_out_rc, + "fy_fetch_block_entry() failed"); + goto out; + } + + if (c == '?' && fy_is_blankz_at_offset(fyp, 1)) { + + FYP_PARSE_ERROR_CHECK(fyp, 0, 1, FYEM_SCAN, + !fyp_json_mode(fyp), err_out, + "complex keys not supported in JSON mode"); + + fyp->indent_line = fyp_line(fyp); + + fyp_scan_debug(fyp, "fy_fetch_key(%c)", c); + rc = fy_fetch_key(fyp, c); + fyp_error_check(fyp, !rc, err_out_rc, + "fy_fetch_key() failed"); + goto out; + } + + if (c == ':') { + was_double_colon = c == ':' && fyp->colon_follows_colon && fyp->flow_level > 0; + fyp->colon_follows_colon = false; + + if (((fyp->flow_level && !fyp->simple_key_allowed) || fy_is_blankz_at_offset(fyp, 1)) && + !was_double_colon) { + + fyp->indent_line = fyp_line(fyp); + + fyp_scan_debug(fyp, "fy_fetch_value(%c)", c); + rc = fy_fetch_value(fyp, c); + fyp_error_check(fyp, !rc, err_out_rc, + "fy_fetch_value() failed"); + goto out; + } + } + + if (c == '*' || c == '&') { + + FYP_PARSE_ERROR_CHECK(fyp, 0, 1, FYEM_SCAN, + !fyp_json_mode(fyp), err_out, + "%s not supported in JSON mode", + c == '&' ? "anchor" : "alias"); + + fyp_scan_debug(fyp, "fy_fetch_anchor_or_alias(%c)", c); + rc = fy_fetch_anchor_or_alias(fyp, c); + fyp_error_check(fyp, !rc, err_out_rc, + "fy_fetch_anchor_or_alias() failed"); + goto out; + } + + if (c == '!') { + + FYP_PARSE_ERROR_CHECK(fyp, 0, 1, FYEM_SCAN, + !fyp_json_mode(fyp), err_out, + "tag not supported in JSON mode"); + + fyp_scan_debug(fyp, "fy_fetch_tag(%c)", c); + rc = fy_fetch_tag(fyp, c); + fyp_error_check(fyp, !rc, err_out_rc, + "fy_fetch_tag() failed"); + goto out; + } + + if (!fyp->flow_level && (c == '|' || c == '>')) { + + FYP_PARSE_ERROR_CHECK(fyp, 0, 1, FYEM_SCAN, + !fyp_json_mode(fyp), err_out, + "block scalars not supported in JSON mode"); + + fyp_scan_debug(fyp, "fy_fetch_block_scalar(%c)", c); + rc = fy_fetch_block_scalar(fyp, c == '|', c); + fyp_error_check(fyp, !rc, err_out_rc, + "fy_fetch_block_scalar() failed"); + goto out; + } + + if (c == '\'' || c == '"') { + + FYP_PARSE_ERROR_CHECK(fyp, 0, 1, FYEM_SCAN, + c == '"' || !fyp_json_mode(fyp), err_out, + "single quoted scalars not supported in JSON mode"); + + fyp_scan_debug(fyp, "fy_fetch_flow_scalar(%c)", c); + rc = fy_fetch_flow_scalar(fyp, c); + fyp_error_check(fyp, !rc, err_out_rc, + "fy_fetch_flow_scalar() failed"); + goto out; + } + + fyp_scan_debug(fyp, "fy_fetch_plain_scalar(%c)", c); + rc = fy_fetch_plain_scalar(fyp, c); + fyp_error_check(fyp, !rc, err_out_rc, + "fy_fetch_plain_scalar() failed"); + +out: + if (c != ',' && fyp->last_was_comma) + fyp->last_was_comma = false; + + return 0; + +err_out: + rc = -1; +err_out_rc: + return rc; +} + +struct fy_token *fy_scan_peek(struct fy_parser *fyp) +{ + struct fy_token *fyt; + int rc, last_token_activity_counter; + bool have_simple_keys; + + /* nothing if stream end produced (and no stream end token in queue) */ + if (fyp->stream_end_produced) { + fyt = fy_token_list_head(&fyp->queued_tokens); + if (fyt && fyt->type == FYTT_STREAM_END) + return fyt; + + /* OK, we're done, flush everything */ + fy_token_list_unref_all(&fyp->queued_tokens); + + /* try to get the next input */ + rc = fy_parse_get_next_input(fyp); + fyp_error_check(fyp, rc >= 0, err_out, + "fy_parse_get_next_input() failed"); + + /* no more inputs */ + if (rc == 0) { + fyp_scan_debug(fyp, "token stream ends"); + return NULL; + } + + fyp_scan_debug(fyp, "starting new token stream"); + + fyp->stream_start_produced = false; + fyp->stream_end_produced = false; + fyp->stream_end_reached = false; + } + + /* we loop until we have a token and the simple key list is empty */ + for (;;) { + fyt = fy_token_list_head(&fyp->queued_tokens); + have_simple_keys = !fy_simple_key_list_empty(&fyp->simple_keys); + + /* we can produce a token when: + * a) one exists + * b) no simple keys exist at all + */ + if (fyt && !have_simple_keys) + break; + + /* on stream error we're done */ + if (fyp->stream_error) + return NULL; + + /* keep track of token activity, if it didn't change + * after the fetch tokens call, the state machine is stuck + */ + last_token_activity_counter = fyp->token_activity_counter; + + /* fetch more then */ + rc = fy_fetch_tokens(fyp); + fyp_error_check(fyp, !rc, err_out, + "fy_fetch_tokens() failed"); + + fyp_error_check(fyp, last_token_activity_counter != fyp->token_activity_counter, err_out, + "out of tokens and failed to produce anymore"); + } + + switch (fyt->type) { + case FYTT_STREAM_START: + fyp_scan_debug(fyp, "setting stream_start_produced to true"); + fyp->stream_start_produced = true; + break; + case FYTT_STREAM_END: + fyp_scan_debug(fyp, "setting stream_end_produced to true"); + fyp->stream_end_produced = true; + + if (!fyp->parse_flow_only) { + rc = fy_reader_input_done(fyp->reader); + fyp_error_check(fyp, !rc, err_out, + "fy_parse_input_done() failed"); + } + break; + default: + break; + } + + return fyt; + +err_out: + return NULL; +} + +static inline struct fy_token * +fy_scan_remove(struct fy_parser *fyp, struct fy_token *fyt) +{ + if (!fyp || !fyt) + return NULL; + + fy_token_list_del(&fyp->queued_tokens, fyt); + + return fyt; +} + +static inline struct fy_token * +fy_scan_remove_peek(struct fy_parser *fyp, struct fy_token *fyt) +{ + if (fyt != NULL) { + (void)fy_scan_remove(fyp, fyt); + fy_token_unref_rl(fyp->recycled_token_list, fyt); + } + + return fy_scan_peek(fyp); +} + +struct fy_token *fy_scan(struct fy_parser *fyp) +{ + struct fy_token *fyt; + + fyt = fy_scan_remove(fyp, fy_scan_peek(fyp)); + + if (fyt && (fyt->type == FYTT_VERSION_DIRECTIVE || fyt->type == FYTT_TAG_DIRECTIVE)) { + + /* + * NOTE: we need to update the document state with the contents of + * directives, so that tags etc, work correctly. + * This is arguably a big hack, but so is using the scanner in such + * a low level. + * + * This is not very good because we don't keep track of parser state + * so tag directives in the middle of the document are AOK. + * But we don't really care, if you care about stream validity do + * a proper parse. + */ + + /* we take a reference because the parse methods take ownership */ + fy_token_ref(fyt); + + /* we ignore errors, because... they are parse errors, not scan errors */ + + if (fyt->type == FYTT_VERSION_DIRECTIVE) + (void)fy_parse_version_directive(fyp, fyt, true); + else + (void)fy_parse_tag_directive(fyp, fyt, true); + } + +#ifdef FY_DEVMODE + if (fyt) + fyp_debug_dump_token(fyp, fyt, "producing: "); +#endif + return fyt; +} + +void fy_scan_token_free(struct fy_parser *fyp, struct fy_token *fyt) +{ + fy_token_unref_rl(fyp->recycled_token_list, fyt); +} + +int fy_parse_state_push(struct fy_parser *fyp, enum fy_parser_state state) +{ + struct fy_parse_state_log *fypsl; + + fypsl = fy_parse_parse_state_log_alloc(fyp); + fyp_error_check(fyp, fypsl != NULL, err_out, + "fy_parse_state_log_alloc() failed!"); + fypsl->state = state; + fy_parse_state_log_list_push(&fyp->state_stack, fypsl); + + return 0; +err_out: + return -1; +} + +enum fy_parser_state fy_parse_state_pop(struct fy_parser *fyp) +{ + struct fy_parse_state_log *fypsl; + enum fy_parser_state state; + + fypsl = fy_parse_state_log_list_pop(&fyp->state_stack); + if (!fypsl) + return FYPS_NONE; + + state = fypsl->state; + + fy_parse_parse_state_log_recycle(fyp, fypsl); + + return state; +} + +void fy_parse_state_set(struct fy_parser *fyp, enum fy_parser_state state) +{ + fyp_parse_debug(fyp, "state %s -> %s\n", state_txt[fyp->state], state_txt[state]); + fyp->state = state; +} + +enum fy_parser_state fy_parse_state_get(struct fy_parser *fyp) +{ + return fyp->state; +} + +static struct fy_eventp * +fy_parse_node(struct fy_parser *fyp, struct fy_token *fyt, bool is_block) +{ + struct fy_eventp *fyep = NULL; + struct fy_event *fye = NULL; + struct fy_document_state *fyds = NULL; + struct fy_token *anchor = NULL, *tag = NULL; + const char *handle; + size_t handle_size; + struct fy_token *fyt_td; + struct fy_token *fytn; + + fyds = fyp->current_document_state; + assert(fyds); + + fyp_parse_debug(fyp, "parse_node: is_block=%s - fyt %s", + is_block ? "true" : "false", + fy_token_type_txt[fyt->type]); + + if (fyt->type == FYTT_ALIAS) { + fy_parse_state_set(fyp, fy_parse_state_pop(fyp)); + + fyep = fy_parse_eventp_alloc(fyp); + fyp_error_check(fyp, fyep, err_out, + "fy_eventp_alloc() failed!"); + fye = &fyep->e; + + fye->type = FYET_ALIAS; + fye->alias.anchor = fy_scan_remove(fyp, fyt); + + goto return_ok; + } + + while ((!anchor && fyt->type == FYTT_ANCHOR) || (!tag && fyt->type == FYTT_TAG)) { + if (fyt->type == FYTT_ANCHOR) + anchor = fy_scan_remove(fyp, fyt); + else + tag = fy_scan_remove(fyp, fyt); + + fyt = fy_scan_peek(fyp); + fyp_error_check(fyp, fyt, err_out, + "failed to peek token"); + + fyp_parse_debug(fyp, "parse_node: ANCHOR|TAG got - fyt %s", + fy_token_type_txt[fyt->type]); + + FYP_TOKEN_ERROR_CHECK(fyp, fyt, FYEM_PARSE, + fyt->type != FYTT_ALIAS, err_out, + "unexpected alias"); + } + + /* check tag prefix */ + if (tag && tag->tag.handle_length) { + handle = fy_atom_data(&tag->handle) + tag->tag.skip; + handle_size = tag->tag.handle_length; + + fyt_td = fy_document_state_lookup_tag_directive(fyds, handle, handle_size); + + FYP_TOKEN_ERROR_CHECK(fyp, tag, FYEM_PARSE, + fyt_td, err_out, + "undefined tag prefix '%.*s'", (int)handle_size, handle); + } + + if ((fyp->state == FYPS_BLOCK_MAPPING_VALUE || + fyp->state == FYPS_BLOCK_MAPPING_FIRST_KEY) + && fyt->type == FYTT_BLOCK_ENTRY) { + + fyep = fy_parse_eventp_alloc(fyp); + fyp_error_check(fyp, fyep, err_out, + "fy_eventp_alloc() failed!"); + fye = &fyep->e; + + fye->type = FYET_SEQUENCE_START; + fye->sequence_start.anchor = anchor; + fye->sequence_start.tag = tag; + + /* allocate and copy in place */ + fytn = fy_token_alloc_rl(fyp->recycled_token_list); + fyp_error_check(fyp, fytn, err_out, + "fy_token_alloc_rl() failed!"); + fytn->type = FYTT_BLOCK_SEQUENCE_START; + fytn->handle = fyt->handle; + fytn->handle.end_mark = fytn->handle.start_mark; /* no extent */ + fy_input_ref(fytn->handle.fyi); + + fye->sequence_start.sequence_start = fytn; + + fy_parse_state_set(fyp, FYPS_INDENTLESS_SEQUENCE_ENTRY); + goto return_ok; + } + + if (fyt->type == FYTT_SCALAR) { + fy_parse_state_set(fyp, fy_parse_state_pop(fyp)); + + fyep = fy_parse_eventp_alloc(fyp); + fyp_error_check(fyp, fyep, err_out, + "fy_eventp_alloc() failed!"); + fye = &fyep->e; + + fye->type = FYET_SCALAR; + fye->scalar.anchor = anchor; + fye->scalar.tag = tag; + fye->scalar.value = fy_scan_remove(fyp, fyt); + goto return_ok; + } + + if (fyt->type == FYTT_FLOW_SEQUENCE_START) { + + fyep = fy_parse_eventp_alloc(fyp); + fyp_error_check(fyp, fyep, err_out, + "fy_eventp_alloc() failed!"); + fye = &fyep->e; + + fye->type = FYET_SEQUENCE_START; + fye->sequence_start.anchor = anchor; + fye->sequence_start.tag = tag; + fye->sequence_start.sequence_start = fy_scan_remove(fyp, fyt); + fy_parse_state_set(fyp, FYPS_FLOW_SEQUENCE_FIRST_ENTRY); + goto return_ok; + } + + if (fyt->type == FYTT_FLOW_MAPPING_START) { + + fyep = fy_parse_eventp_alloc(fyp); + fyp_error_check(fyp, fyep, err_out, + "fy_eventp_alloc() failed!"); + fye = &fyep->e; + + fye->type = FYET_MAPPING_START; + fye->mapping_start.anchor = anchor; + fye->mapping_start.tag = tag; + fye->mapping_start.mapping_start = fy_scan_remove(fyp, fyt); + fy_parse_state_set(fyp, FYPS_FLOW_MAPPING_FIRST_KEY); + goto return_ok; + } + + if (is_block && fyt->type == FYTT_BLOCK_SEQUENCE_START) { + + fyep = fy_parse_eventp_alloc(fyp); + fyp_error_check(fyp, fyep, err_out, + "fy_eventp_alloc() failed!"); + fye = &fyep->e; + + fye->type = FYET_SEQUENCE_START; + fye->sequence_start.anchor = anchor; + fye->sequence_start.tag = tag; + fye->sequence_start.sequence_start = fy_scan_remove(fyp, fyt); + fy_parse_state_set(fyp, FYPS_BLOCK_SEQUENCE_FIRST_ENTRY); + goto return_ok; + } + + if (is_block && fyt->type == FYTT_BLOCK_MAPPING_START) { + + fyep = fy_parse_eventp_alloc(fyp); + fyp_error_check(fyp, fyep, err_out, + "fy_eventp_alloc() failed!"); + fye = &fyep->e; + + fye->type = FYET_MAPPING_START; + fye->mapping_start.anchor = anchor; + fye->mapping_start.tag = tag; + fye->mapping_start.mapping_start = fy_scan_remove(fyp, fyt); + fy_parse_state_set(fyp, FYPS_BLOCK_MAPPING_FIRST_KEY); + goto return_ok; + } + + if (!anchor && !tag) { + + if (fyt->type == FYTT_FLOW_ENTRY && + (fyp->state == FYPS_FLOW_SEQUENCE_FIRST_ENTRY || + fyp->state == FYPS_FLOW_SEQUENCE_ENTRY)) + + FYP_TOKEN_ERROR(fyp, fyt, FYEM_PARSE, + "flow sequence with invalid %s", + fyp->state == FYPS_FLOW_SEQUENCE_FIRST_ENTRY ? + "comma in the beginning" : "extra comma"); + + else if ((fyt->type == FYTT_DOCUMENT_START || fyt->type == FYTT_DOCUMENT_END) && + (fyp->state == FYPS_FLOW_SEQUENCE_FIRST_ENTRY || + fyp->state == FYPS_FLOW_SEQUENCE_ENTRY)) + + FYP_TOKEN_ERROR(fyp, fyt, FYEM_PARSE, + "invalid document %s indicator in a flow sequence", + fyt->type == FYTT_DOCUMENT_START ? + "start" : "end"); + else + FYP_TOKEN_ERROR(fyp, fyt, FYEM_PARSE, + "did not find expected node content"); + + goto err_out; + } + + fyp_parse_debug(fyp, "parse_node: empty scalar..."); + + /* empty scalar */ + fy_parse_state_set(fyp, fy_parse_state_pop(fyp)); + + fyep = fy_parse_eventp_alloc(fyp); + fyp_error_check(fyp, fyep, err_out, + "fy_eventp_alloc() failed!"); + fye = &fyep->e; + + fye->type = FYET_SCALAR; + fye->scalar.anchor = anchor; + fye->scalar.tag = tag; + fye->scalar.value = NULL; + +return_ok: + fyp_parse_debug(fyp, "parse_node: > %s", + fy_event_type_txt[fye->type]); + + return fyep; + +err_out: + fy_token_unref_rl(fyp->recycled_token_list, anchor); + fy_token_unref_rl(fyp->recycled_token_list, tag); + fy_parse_eventp_recycle(fyp, fyep); + + return NULL; +} + +static struct fy_eventp * +fy_parse_empty_scalar(struct fy_parser *fyp) +{ + struct fy_eventp *fyep; + struct fy_event *fye; + + fyep = fy_parse_eventp_alloc(fyp); + fyp_error_check(fyp, fyep, err_out, + "fy_eventp_alloc() failed!"); + fye = &fyep->e; + + fye->type = FYET_SCALAR; + fye->scalar.anchor = NULL; + fye->scalar.tag = NULL; + fye->scalar.value = NULL; + return fyep; +err_out: + return NULL; +} + +int fy_parse_stream_start(struct fy_parser *fyp) +{ + fyp->indent = -2; + fyp->indent_line = -1; + fyp->generated_block_map = false; + fyp->last_was_comma = false; + fyp->flow = FYFT_NONE; + fyp->pending_complex_key_column = -1; + + fy_parse_indent_list_recycle_all(fyp, &fyp->indent_stack); + fy_parse_simple_key_list_recycle_all(fyp, &fyp->simple_keys); + fy_parse_parse_state_log_list_recycle_all(fyp, &fyp->state_stack); + fy_parse_flow_list_recycle_all(fyp, &fyp->flow_stack); + + fy_token_unref_rl(fyp->recycled_token_list, fyp->stream_end_token); + fyp->stream_end_token = NULL; + + return 0; +} + +int fy_parse_stream_end(struct fy_parser *fyp) +{ + fy_token_unref_rl(fyp->recycled_token_list, fyp->stream_end_token); + fyp->stream_end_token = NULL; + + return 0; +} + +static struct fy_eventp *fy_parse_internal(struct fy_parser *fyp) +{ + struct fy_eventp *fyep = NULL; + struct fy_event *fye = NULL; + struct fy_token *fyt = NULL; + struct fy_document_state *fyds = NULL; + bool is_block, is_seq, is_value, is_first, had_doc_end, had_directives; + enum fy_parser_state orig_state; + struct fy_token *version_directive; + struct fy_token_list tag_directives; + const struct fy_mark *fym; + struct fy_token *fytn; + char tbuf[16] __FY_DEBUG_UNUSED__; + int rc; + + version_directive = NULL; + fy_token_list_init(&tag_directives); + + /* are we done? */ + if (fyp->stream_error || fyp->state == FYPS_END) + return NULL; + + fyt = fy_scan_peek(fyp); + + /* special case without an error message for start */ + if (!fyt && fyp->state == FYPS_NONE) + return NULL; + + /* keep a copy of stream end */ + if (fyt && fyt->type == FYTT_STREAM_END && !fyp->stream_end_token) { + fyp->stream_end_token = fy_token_ref(fyt); + fyp_parse_debug(fyp, "kept copy of STRM-"); + } + + /* keep on producing STREAM_END */ + if (!fyt && fyp->stream_end_token) { + fyt = fyp->stream_end_token; + fy_token_list_add_tail(&fyp->queued_tokens, fyt); + + fyp_parse_debug(fyp, "generated copy of STRM-"); + } + + fyp_error_check(fyp, fyt, err_out, + "failed to peek token"); + + assert(fyt->handle.fyi); + + fyp_parse_debug(fyp, "[%s] <- %s", state_txt[fyp->state], + fy_token_dump_format(fyt, tbuf, sizeof(tbuf))); + + is_first = false; + had_doc_end = false; + + fyep = NULL; + fye = NULL; + + orig_state = fyp->state; + switch (fyp->state) { + case FYPS_NONE: + fy_parse_state_set(fyp, FYPS_STREAM_START); + /* fallthrough */ + + case FYPS_STREAM_START: + + fyp_error_check(fyp, fyt->type == FYTT_STREAM_START, err_out, + "failed to get valid stream start token"); + + fyep = fy_parse_eventp_alloc(fyp); + fyp_error_check(fyp, fyep, err_out, + "fy_eventp_alloc() failed!"); + fye = &fyep->e; + + fye->type = FYET_STREAM_START; + + fye->stream_start.stream_start = fy_scan_remove(fyp, fyt); + + rc = fy_parse_stream_start(fyp); + fyp_error_check(fyp, !rc, err_out, + "stream start failed"); + + fy_parse_state_set(fyp, FYPS_IMPLICIT_DOCUMENT_START); + + fyp->stream_has_content = false; + + return fyep; + + case FYPS_IMPLICIT_DOCUMENT_START: + + /* fallthrough */ + + case FYPS_DOCUMENT_START: + + had_doc_end = false; + + if (!fyp->stream_has_content && fyt->type != FYTT_STREAM_END) + fyp->stream_has_content = true; + + /* remove all extra document end indicators */ + while (fyt->type == FYTT_DOCUMENT_END) { + + /* reset document has content flag */ + fyp->document_has_content = false; + fyp->document_first_content_token = true; + + /* explicit end indicator, no more directives checking */ + fyp->had_directives = false; + + fyt = fy_scan_remove_peek(fyp, fyt); + fyp_error_check(fyp, fyt, err_out, + "failed to peek token"); + +#ifdef FY_DEVMODE + fyp_debug_dump_token(fyp, fyt, "next: "); +#endif + + had_doc_end = true; + } + + if (!fyp->current_document_state) { + + rc = fy_reset_document_state(fyp); + fyp_error_check(fyp, !rc, err_out, + "fy_reset_document_state() failed"); + } + + fyds = fyp->current_document_state; + fyp_error_check(fyp, fyds, err_out, + "no current document state error"); + + /* process directives */ + had_directives = false; + while (fyt->type == FYTT_VERSION_DIRECTIVE || + fyt->type == FYTT_TAG_DIRECTIVE) { + + had_directives = true; + fyp->had_directives = true; + + if (fyt->type == FYTT_VERSION_DIRECTIVE) { + + rc = fy_parse_version_directive(fyp, fy_scan_remove(fyp, fyt), false); + fyt = NULL; + fyp_error_check(fyp, !rc, err_out, + "failed to fy_parse_version_directive()"); + } else { + rc = fy_parse_tag_directive(fyp, fy_scan_remove(fyp, fyt), false); + fyt = NULL; + + fyp_error_check(fyp, !rc, err_out, + "failed to fy_parse_tag_directive()"); + } + + fyt = fy_scan_peek(fyp); + fyp_error_check(fyp, fyt, err_out, + "failed to peek token"); +#ifdef FY_DEVMODE + fyp_debug_dump_token(fyp, fyt, "next: "); +#endif + } + + /* the end */ + if (fyt->type == FYTT_STREAM_END) { + + /* empty content is not allowed in JSON mode */ + FYP_TOKEN_ERROR_CHECK(fyp, fyt, FYEM_PARSE, + !fyp_json_mode(fyp) || + fyp->stream_has_content, err_out, + "JSON does not allow empty root content"); + + FYP_TOKEN_ERROR_CHECK(fyp, fyt, FYEM_PARSE, + !fyp->had_directives || fyp->document_has_content || + !fyds->start_implicit, err_out, + "stream with directives without content"); + + rc = fy_parse_stream_end(fyp); + fyp_error_check(fyp, !rc, err_out, + "stream end failed"); + + fyep = fy_parse_eventp_alloc(fyp); + fyp_error_check(fyp, fyep, err_out, + "fy_eventp_alloc() failed!"); + fye = &fyep->e; + + fye->type = FYET_STREAM_END; + fye->stream_end.stream_end = fy_scan_remove(fyp, fyt); + + fy_parse_state_set(fyp, + fy_parse_have_more_inputs(fyp) ? FYPS_NONE : FYPS_END); + + return fyep; + } + + fyep = fy_parse_eventp_alloc(fyp); + fyp_error_check(fyp, fyep, err_out, + "fy_eventp_alloc() failed!"); + fye = &fyep->e; + + /* document start */ + fye->type = FYET_DOCUMENT_START; + fye->document_start.document_start = NULL; + fye->document_start.document_state = NULL; + + if (!(fyp->state == FYPS_IMPLICIT_DOCUMENT_START || had_doc_end || fyt->type == FYTT_DOCUMENT_START)) { + fyds = fyp->current_document_state; + + /* not BLOCK_MAPPING_START */ + FYP_TOKEN_ERROR_CHECK(fyp, fyt, FYEM_PARSE, + fyt->type == FYTT_BLOCK_MAPPING_START, err_out, + "missing document start"); + + FYP_TOKEN_ERROR_CHECK(fyp, fyt, FYEM_PARSE, + fyds->start_implicit || + fyds->start_mark.line != fy_token_start_line(fyt), err_out, + "invalid mapping starting at --- line"); + + FYP_TOKEN_ERROR_CHECK(fyp, fyt, FYEM_PARSE, + false, err_out, + "invalid mapping in plain multiline"); + } + + fym = fy_token_start_mark(fyt); + if (fym) + fyds->start_mark = *fym; + else + memset(&fyds->start_mark, 0, sizeof(fyds->start_mark)); + + if (fyt->type != FYTT_DOCUMENT_START) { + fye->document_start.document_start = NULL; + + fyds->start_implicit = true; + fyp_parse_debug(fyp, "document_start_implicit=true"); + + FYP_TOKEN_ERROR_CHECK(fyp, fyt, FYEM_PARSE, + fyt->type != FYTT_DOCUMENT_END || !had_directives, err_out, + "directive(s) without a document"); + + fy_parse_state_set(fyp, FYPS_BLOCK_NODE); + } else { + fye->document_start.document_start = fy_scan_remove(fyp, fyt); + + fyds->start_implicit = false; + fyp_parse_debug(fyp, "document_start_implicit=false"); + + fy_parse_state_set(fyp, FYPS_DOCUMENT_CONTENT); + } + + rc = fy_parse_state_push(fyp, FYPS_DOCUMENT_END); + fyp_error_check(fyp, !rc, err_out, + "failed to fy_parse_state_push()"); + + // update document state with json mode + fyds->json_mode = fyp_json_mode(fyp); + fye->document_start.document_state = fy_document_state_ref(fyds); + fye->document_start.implicit = fyds->start_implicit; + + return fyep; + + case FYPS_DOCUMENT_END: + + fyds = fyp->current_document_state; + fyp_error_check(fyp, fyds, err_out, + "no current document state error"); + + if (fyt && (fyt->type == FYTT_VERSION_DIRECTIVE || + fyt->type == FYTT_TAG_DIRECTIVE)) { + int cmpval = fy_document_state_version_compare(fyds, fy_version_make(1, 1)); + + fyp_scan_debug(fyp, "version %d.%d %s %d.%d\n", + fyds->version.major, fyds->version.minor, + cmpval == 0 ? "=" : cmpval > 0 ? ">" : "<", + 1, 1); + + /* YAML 1.1 allows directives without document end */ + FYP_TOKEN_ERROR_CHECK(fyp, fyt, FYEM_PARSE, + cmpval <= 0, err_out, + "missing explicit document end marker before directive(s)"); + + } + + fym = fy_token_end_mark(fyt); + if (fym) + fyds->end_mark = *fym; + else + memset(&fyds->end_mark, 0, sizeof(fyds->end_mark)); + + fyep = fy_parse_eventp_alloc(fyp); + fyp_error_check(fyp, fyep, err_out, + "fy_eventp_alloc() failed!"); + fye = &fyep->e; + + /* document end */ + fye->type = FYET_DOCUMENT_END; + if (fyt->type == FYTT_DOCUMENT_END) { + /* TODO pull the document end token and deliver */ + fye->document_end.document_end = NULL; + fyds->end_implicit = false; + + /* reset document has content flag */ + fyp->document_has_content = false; + fyp->document_first_content_token = true; + + /* reset directives */ + fyp->had_directives = false; + + } else { + fye->document_end.document_end = NULL; + fyds->end_implicit = true; + } + + fye->document_end.implicit = fyds->end_implicit; + + if (!fyp->next_single_document) { + /* multi document mode */ + fy_parse_state_set(fyp, FYPS_DOCUMENT_START); + fyp->had_directives = false; + + /* and reset document state */ + rc = fy_reset_document_state(fyp); + fyp_error_check(fyp, !rc, err_out, + "fy_reset_document_state() failed"); + } else { + /* single document mode */ + fyp->next_single_document = false; + + fy_parse_state_set(fyp, FYPS_SINGLE_DOCUMENT_END); + } + + return fyep; + + case FYPS_DOCUMENT_CONTENT: + + if (fyt->type == FYTT_VERSION_DIRECTIVE || + fyt->type == FYTT_TAG_DIRECTIVE || + fyt->type == FYTT_DOCUMENT_START || + fyt->type == FYTT_DOCUMENT_END || + fyt->type == FYTT_STREAM_END) { + + if (fyt->type == FYTT_DOCUMENT_START || + fyt->type == FYTT_DOCUMENT_END) { + fyp->document_has_content = false; + fyp->document_first_content_token = true; + fyp->had_directives = false; + } + + fy_parse_state_set(fyp, fy_parse_state_pop(fyp)); + + fyep = fy_parse_empty_scalar(fyp); + fyp_error_check(fyp, fyep, err_out, + "fy_parse_empty_scalar() failed"); + return fyep; + } + + fyp->document_has_content = true; + fyp_parse_debug(fyp, "document has content now"); + /* fallthrough */ + + case FYPS_BLOCK_NODE: + + fyep = fy_parse_node(fyp, fyt, + fyp->state == FYPS_BLOCK_NODE || + fyp->state == FYPS_DOCUMENT_CONTENT); + fyp_error_check(fyp, fyep, err_out, + "fy_parse_node() failed"); + return fyep; + + case FYPS_BLOCK_SEQUENCE_FIRST_ENTRY: + is_first = true; + /* fallthrough */ + + case FYPS_BLOCK_SEQUENCE_ENTRY: + case FYPS_INDENTLESS_SEQUENCE_ENTRY: + + if ((fyp->state == FYPS_BLOCK_SEQUENCE_ENTRY || + fyp->state == FYPS_BLOCK_SEQUENCE_FIRST_ENTRY) && + !(fyt->type == FYTT_BLOCK_ENTRY || + fyt->type == FYTT_BLOCK_END)) { + + + FYP_TOKEN_ERROR_CHECK(fyp, fyt, FYEM_PARSE, + !(fyt->type == FYTT_SCALAR), err_out, + "invalid scalar at the end of block sequence"); + + FYP_TOKEN_ERROR_CHECK(fyp, fyt, FYEM_PARSE, + !(fyt->type == FYTT_BLOCK_SEQUENCE_START), err_out, + "wrongly indented sequence item"); + + FYP_TOKEN_ERROR_CHECK(fyp, fyt, FYEM_PARSE, + false, err_out, + "did not find expected '-' indicator"); + } + + if (fyt->type == FYTT_BLOCK_ENTRY) { + + /* BLOCK entry */ + fyt = fy_scan_remove_peek(fyp, fyt); + fyp_error_check(fyp, fyt, err_out, + "failed to peek token"); +#ifdef FY_DEVMODE + fyp_debug_dump_token(fyp, fyt, "next: "); +#endif + + /* check whether it's a sequence entry or not */ + is_seq = fyt->type != FYTT_BLOCK_ENTRY && fyt->type != FYTT_BLOCK_END; + if (!is_seq && fyp->state == FYPS_INDENTLESS_SEQUENCE_ENTRY) + is_seq = fyt->type != FYTT_KEY && fyt->type != FYTT_VALUE; + + if (is_seq) { + rc = fy_parse_state_push(fyp, fyp->state); + fyp_error_check(fyp, !rc, err_out, + "failed to push state"); + + fyep = fy_parse_node(fyp, fyt, true); + fyp_error_check(fyp, fyep, err_out, + "fy_parse_node() failed"); + return fyep; + } + fy_parse_state_set(fyp, FYPS_BLOCK_SEQUENCE_ENTRY); + + fyep = fy_parse_empty_scalar(fyp); + fyp_error_check(fyp, fyep, err_out, + "fy_parse_empty_scalar() failed"); + return fyep; + } + + /* FYTT_BLOCK_END */ + fy_parse_state_set(fyp, fy_parse_state_pop(fyp)); + + fyep = fy_parse_eventp_alloc(fyp); + fyp_error_check(fyp, fyep, err_out, + "fy_eventp_alloc() failed!"); + fye = &fyep->e; + + fye->type = FYET_SEQUENCE_END; + if (orig_state == FYPS_INDENTLESS_SEQUENCE_ENTRY) { + + /* allocate and copy in place */ + fytn = fy_token_alloc_rl(fyp->recycled_token_list); + fyp_error_check(fyp, fytn, err_out, + "fy_token_alloc_rl() failed!"); + fytn->type = FYTT_BLOCK_END; + fytn->handle = fyt->handle; + fytn->handle.end_mark = fytn->handle.start_mark; /* no extent */ + fy_input_ref(fytn->handle.fyi); + + fye->sequence_end.sequence_end = fytn; + } else + fye->sequence_end.sequence_end = fy_scan_remove(fyp, fyt); + + return fyep; + + case FYPS_BLOCK_MAPPING_FIRST_KEY: + is_first = true; + /* fallthrough */ + + case FYPS_BLOCK_MAPPING_KEY: + + if (!(fyt->type == FYTT_KEY || fyt->type == FYTT_BLOCK_END || fyt->type == FYTT_STREAM_END)) { + + if (fyt->type == FYTT_SCALAR) + FYP_TOKEN_ERROR(fyp, fyt, FYEM_PARSE, + !fyp->simple_key_allowed && !fyp->flow_level && fy_parse_peek(fyp) == ':' ? + "invalid block mapping key on same line as previous key" : + "invalid value after mapping"); + else if (fyt->type == FYTT_BLOCK_SEQUENCE_START) + FYP_TOKEN_ERROR(fyp, fyt, FYEM_PARSE, + "wrong indendation in sequence while in mapping"); + else if (fyt->type == FYTT_ANCHOR) + FYP_TOKEN_ERROR(fyp, fyt, FYEM_PARSE, + "two anchors for a single value while in mapping"); + else if (fyt->type == FYTT_BLOCK_MAPPING_START) + FYP_TOKEN_ERROR(fyp, fyt, FYEM_PARSE, + !fyp->flow_level && fyp->last_block_mapping_key_line == fy_token_start_line(fyt) ? + "invalid nested block mapping on the same line" : + "invalid indentation in mapping"); + else if (fyt->type == FYTT_ALIAS) + FYP_TOKEN_ERROR(fyp, fyt, FYEM_PARSE, + "invalid combination of anchor plus alias"); + else + FYP_TOKEN_ERROR(fyp, fyt, FYEM_PARSE, + "did not find expected key"); + goto err_out; + } + + if (fyt->type == FYTT_KEY) { + + fyp->last_block_mapping_key_line = fy_token_end_line(fyt); + + /* KEY entry */ + fyt = fy_scan_remove_peek(fyp, fyt); + fyp_error_check(fyp, fyt, err_out, + "failed to peek token"); +#ifdef FY_DEVMODE + fyp_debug_dump_token(fyp, fyt, "next: "); +#endif + + /* check whether it's a block entry or not */ + is_block = fyt->type != FYTT_KEY && fyt->type != FYTT_VALUE && + fyt->type != FYTT_BLOCK_END; + + if (is_block) { + rc = fy_parse_state_push(fyp, FYPS_BLOCK_MAPPING_VALUE); + fyp_error_check(fyp, !rc, err_out, + "failed to push state"); + + fyep = fy_parse_node(fyp, fyt, true); + fyp_error_check(fyp, fyep, err_out, + "fy_parse_node() failed"); + return fyep; + } + fy_parse_state_set(fyp, FYPS_BLOCK_MAPPING_VALUE); + + fyep = fy_parse_empty_scalar(fyp); + fyp_error_check(fyp, fyep, err_out, + "fy_parse_empty_scalar() failed"); + return fyep; + } + + fyep = fy_parse_eventp_alloc(fyp); + fyp_error_check(fyp, fyep, err_out, + "fy_eventp_alloc() failed!"); + fye = &fyep->e; + + /* FYTT_BLOCK_END */ + fy_parse_state_set(fyp, fy_parse_state_pop(fyp)); + fye->type = FYET_MAPPING_END; + fye->mapping_end.mapping_end = fy_scan_remove(fyp, fyt); + return fyep; + + case FYPS_BLOCK_MAPPING_VALUE: + + if (fyt->type == FYTT_VALUE) { + + /* VALUE entry */ + fyt = fy_scan_remove_peek(fyp, fyt); + fyp_error_check(fyp, fyt, err_out, + "failed to peek token"); +#ifdef FY_DEVMODE + fyp_debug_dump_token(fyp, fyt, "next: "); +#endif + + /* check whether it's a block entry or not */ + is_value = fyt->type != FYTT_KEY && fyt->type != FYTT_VALUE && + fyt->type != FYTT_BLOCK_END; + + if (is_value) { + rc = fy_parse_state_push(fyp, FYPS_BLOCK_MAPPING_KEY); + fyp_error_check(fyp, !rc, err_out, + "failed to push state"); + + fyep = fy_parse_node(fyp, fyt, true); + fyp_error_check(fyp, fyep, err_out, + "fy_parse_node() failed"); + return fyep; + } + } + + fy_parse_state_set(fyp, FYPS_BLOCK_MAPPING_KEY); + + fyep = fy_parse_empty_scalar(fyp); + fyp_error_check(fyp, fyep, err_out, + "fy_parse_empty_scalar() failed"); + return fyep; + + case FYPS_FLOW_SEQUENCE_FIRST_ENTRY: + is_first = true; + /* fallthrough */ + + case FYPS_FLOW_SEQUENCE_ENTRY: + + if (fyt->type != FYTT_FLOW_SEQUENCE_END && + fyt->type != FYTT_STREAM_END) { + + if (!is_first) { + FYP_TOKEN_ERROR_CHECK(fyp, fyt, FYEM_PARSE, + fyt->type == FYTT_FLOW_ENTRY, err_out, + "missing comma in flow %s", + fyp->state == FYPS_FLOW_SEQUENCE_ENTRY ? + "sequence" : "mapping"); + + fyt = fy_scan_remove_peek(fyp, fyt); + fyp_error_check(fyp, fyt, err_out, + "failed to peek token"); +#ifdef FY_DEVMODE + fyp_debug_dump_token(fyp, fyt, "next: "); +#endif + } + + if (fyt->type == FYTT_KEY) { + fy_parse_state_set(fyp, FYPS_FLOW_SEQUENCE_ENTRY_MAPPING_KEY); + + fyep = fy_parse_eventp_alloc(fyp); + fyp_error_check(fyp, fyep, err_out, + "fy_eventp_alloc() failed!"); + fye = &fyep->e; + + /* convert KEY token to either block or flow mapping start */ + if (!fyt->key.flow_level) + fyt->type = FYTT_BLOCK_MAPPING_START; + else + fyt->type = FYTT_FLOW_MAPPING_START; + + fye->type = FYET_MAPPING_START; + fye->mapping_start.anchor = NULL; + fye->mapping_start.tag = NULL; + fye->mapping_start.mapping_start = fy_scan_remove(fyp, fyt); + return fyep; + } + + if (fyt->type != FYTT_FLOW_SEQUENCE_END) { + rc = fy_parse_state_push(fyp, FYPS_FLOW_SEQUENCE_ENTRY); + fyp_error_check(fyp, !rc, err_out, + "failed to push state"); + + fyep = fy_parse_node(fyp, fyt, false); + fyp_error_check(fyp, fyep, err_out, + "fy_parse_node() failed"); + return fyep; + } + } + + if (fyt->type == FYTT_STREAM_END && fyp->flow_level) { + FYP_TOKEN_ERROR(fyp, fyt, FYEM_PARSE, + "flow sequence without a closing bracket"); + goto err_out; + } + + /* FYTT_FLOW_SEQUENCE_END */ + fy_parse_state_set(fyp, fy_parse_state_pop(fyp)); + + fyep = fy_parse_eventp_alloc(fyp); + fyp_error_check(fyp, fyep, err_out, + "fy_eventp_alloc() failed!"); + fye = &fyep->e; + + fye->type = FYET_SEQUENCE_END; + fye->sequence_end.sequence_end = fy_scan_remove(fyp, fyt); + return fyep; + + case FYPS_FLOW_SEQUENCE_ENTRY_MAPPING_KEY: + if (fyt->type != FYTT_VALUE && fyt->type != FYTT_FLOW_ENTRY && + fyt->type != FYTT_FLOW_SEQUENCE_END) { + rc = fy_parse_state_push(fyp, FYPS_FLOW_SEQUENCE_ENTRY_MAPPING_VALUE); + fyp_error_check(fyp, !rc, err_out, + "failed to push state"); + + fyep = fy_parse_node(fyp, fyt, false); + fyp_error_check(fyp, fyep, err_out, + "fy_parse_node() failed"); + return fyep; + } + + /* empty keys are not allowed in JSON mode */ + FYP_TOKEN_ERROR_CHECK(fyp, fyt, FYEM_PARSE, + !fyp_json_mode(fyp), err_out, + "JSON does not allow empty keys of a mapping"); + + fy_parse_state_set(fyp, FYPS_FLOW_SEQUENCE_ENTRY_MAPPING_VALUE); + + fyep = fy_parse_empty_scalar(fyp); + fyp_error_check(fyp, fyep, err_out, + "fy_parse_empty_scalar() failed"); + return fyep; + + case FYPS_FLOW_SEQUENCE_ENTRY_MAPPING_VALUE: + if (fyt->type == FYTT_VALUE) { + fyt = fy_scan_remove_peek(fyp, fyt); + fyp_error_check(fyp, fyt, err_out, + "failed to peek token"); +#ifdef FY_DEVMODE + fyp_debug_dump_token(fyp, fyt, "next: "); +#endif + + if (fyt->type != FYTT_FLOW_ENTRY && fyt->type != FYTT_FLOW_SEQUENCE_END) { + rc = fy_parse_state_push(fyp, FYPS_FLOW_SEQUENCE_ENTRY_MAPPING_END); + fyp_error_check(fyp, !rc, err_out, + "failed to push state"); + + fyep = fy_parse_node(fyp, fyt, false); + fyp_error_check(fyp, fyep, err_out, + "fy_parse_node() failed"); + return fyep; + } + } + + /* empty values are not allowed in JSON mode */ + FYP_TOKEN_ERROR_CHECK(fyp, fyt, FYEM_PARSE, + !fyp_json_mode(fyp), err_out, + "JSON does not allow empty values in a mapping"); + + fy_parse_state_set(fyp, FYPS_FLOW_SEQUENCE_ENTRY_MAPPING_END); + + fyep = fy_parse_empty_scalar(fyp); + fyp_error_check(fyp, fyep, err_out, + "fy_parse_empty_scalar() failed"); + return fyep; + + case FYPS_FLOW_SEQUENCE_ENTRY_MAPPING_END: + fy_parse_state_set(fyp, FYPS_FLOW_SEQUENCE_ENTRY); + + fyep = fy_parse_eventp_alloc(fyp); + fyp_error_check(fyp, fyep, err_out, + "fy_eventp_alloc() failed!"); + fye = &fyep->e; + + fye->type = FYET_MAPPING_END; + + /* allocate and copy in place */ + fytn = fy_token_alloc_rl(fyp->recycled_token_list); + fyp_error_check(fyp, fytn, err_out, + "fy_token_alloc_rl() failed!"); + fytn->type = FYTT_BLOCK_END; + fytn->handle = fyt->handle; + fytn->handle.end_mark = fytn->handle.start_mark; /* no extent */ + fy_input_ref(fytn->handle.fyi); + + fye->mapping_end.mapping_end = fytn; + + return fyep; + + case FYPS_FLOW_MAPPING_FIRST_KEY: + is_first = true; + /* fallthrough */ + + case FYPS_FLOW_MAPPING_KEY: + if (fyt->type != FYTT_FLOW_MAPPING_END) { + + if (!is_first) { + FYP_TOKEN_ERROR_CHECK(fyp, fyt, FYEM_PARSE, + fyt->type == FYTT_FLOW_ENTRY, err_out, + "missing comma in flow %s", + fyp->state == FYPS_FLOW_SEQUENCE_ENTRY ? + "sequence" : "mapping"); + + fyt = fy_scan_remove_peek(fyp, fyt); + fyp_error_check(fyp, fyt, err_out, + "failed to peek token"); +#ifdef FY_DEVMODE + fyp_debug_dump_token(fyp, fyt, "next: "); +#endif + } + + if (fyt->type == FYTT_KEY) { + /* next token */ + fyt = fy_scan_remove_peek(fyp, fyt); + fyp_error_check(fyp, fyt, err_out, + "failed to peek token"); +#ifdef FY_DEVMODE + fyp_debug_dump_token(fyp, fyt, "next: "); +#endif + + /* JSON key checks */ + FYP_TOKEN_ERROR_CHECK(fyp, fyt, FYEM_PARSE, + !fyp_json_mode(fyp) || fyt->type != FYTT_VALUE, + err_out, "JSON does not allow empty keys"); + FYP_TOKEN_ERROR_CHECK(fyp, fyt, FYEM_PARSE, + !fyp_json_mode(fyp) || + (fyt->type == FYTT_SCALAR && + fyt->scalar.style == FYSS_DOUBLE_QUOTED), + err_out, "JSON only allows double quoted scalar keys"); + + if (fyt->type != FYTT_VALUE && + fyt->type != FYTT_FLOW_ENTRY && + fyt->type != FYTT_FLOW_MAPPING_END) { + + rc = fy_parse_state_push(fyp, FYPS_FLOW_MAPPING_VALUE); + fyp_error_check(fyp, !rc, err_out, + "failed to push state"); + + fyep = fy_parse_node(fyp, fyt, false); + fyp_error_check(fyp, fyep, err_out, + "fy_parse_node() failed"); + return fyep; + } + + fy_parse_state_set(fyp, FYPS_FLOW_MAPPING_VALUE); + + fyep = fy_parse_empty_scalar(fyp); + fyp_error_check(fyp, fyep, err_out, + "fy_parse_empty_scalar() failed"); + return fyep; + } + + if (fyt->type != FYTT_FLOW_MAPPING_END) { + + /* empty values are not allowed in JSON mode */ + FYP_TOKEN_ERROR_CHECK(fyp, fyt, FYEM_PARSE, + !fyp_json_mode(fyp), err_out, + "JSON does not allow empty values in a mapping"); + + rc = fy_parse_state_push(fyp, FYPS_FLOW_MAPPING_EMPTY_VALUE); + fyp_error_check(fyp, !rc, err_out, + "failed to push state"); + + fyep = fy_parse_node(fyp, fyt, false); + fyp_error_check(fyp, fyep, err_out, + "fy_parse_node() failed"); + return fyep; + } + } + + /* FYTT_FLOW_MAPPING_END */ + fy_parse_state_set(fyp, fy_parse_state_pop(fyp)); + + fyep = fy_parse_eventp_alloc(fyp); + fyp_error_check(fyp, fyep, err_out, "fy_eventp_alloc() failed!"); + fye = &fyep->e; + + fye->type = FYET_MAPPING_END; + fye->mapping_end.mapping_end = fy_scan_remove(fyp, fyt); + + return fyep; + + case FYPS_FLOW_MAPPING_VALUE: + if (fyt->type == FYTT_VALUE) { + /* next token */ + fyt = fy_scan_remove_peek(fyp, fyt); + fyp_error_check(fyp, fyt, err_out, + "failed to peek token"); +#ifdef FY_DEVMODE + fyp_debug_dump_token(fyp, fyt, "next: "); +#endif + + if (fyt->type != FYTT_FLOW_ENTRY && + fyt->type != FYTT_FLOW_MAPPING_END) { + + rc = fy_parse_state_push(fyp, FYPS_FLOW_MAPPING_KEY); + fyp_error_check(fyp, !rc, err_out, + "failed to push state"); + + fyep = fy_parse_node(fyp, fyt, false); + fyp_error_check(fyp, fyep, err_out, + "fy_parse_node() failed"); + return fyep; + } + } + + /* empty values are not allowed in JSON mode */ + FYP_TOKEN_ERROR_CHECK(fyp, fyt, FYEM_PARSE, + !fyp_json_mode(fyp), err_out, + "JSON does not allow empty values in a mapping"); + + fy_parse_state_set(fyp, FYPS_FLOW_MAPPING_KEY); + + fyep = fy_parse_empty_scalar(fyp); + fyp_error_check(fyp, fyep, err_out, + "fy_parse_empty_scalar() failed"); + return fyep; + + case FYPS_FLOW_MAPPING_EMPTY_VALUE: + fy_parse_state_set(fyp, FYPS_FLOW_MAPPING_KEY); + + fyep = fy_parse_empty_scalar(fyp); + fyp_error_check(fyp, fyep, err_out, + "fy_parse_empty_scalar() failed"); + return fyep; + + case FYPS_SINGLE_DOCUMENT_END: + + FYP_TOKEN_ERROR_CHECK(fyp, fyt, FYEM_PARSE, + fyt->type == FYTT_STREAM_END, err_out, + "Did not find expected stream end"); + + rc = fy_parse_stream_end(fyp); + fyp_error_check(fyp, !rc, err_out, + "stream end failed"); + + fyep = fy_parse_eventp_alloc(fyp); + fyp_error_check(fyp, fyep, err_out, + "fy_eventp_alloc() failed!"); + fye = &fyep->e; + + fye->type = FYET_STREAM_END; + fye->stream_end.stream_end = fy_scan_remove(fyp, fyt); + + fy_parse_state_set(fyp, + fy_parse_have_more_inputs(fyp) ? FYPS_NONE : FYPS_END); + + return fyep; + + case FYPS_END: + /* should never happen */ + assert(0); + break; + } + +err_out: + fy_token_unref_rl(fyp->recycled_token_list, version_directive); + fy_token_list_unref_all_rl(fyp->recycled_token_list, &tag_directives); + fy_parse_eventp_recycle(fyp, fyep); + fyp->stream_error = true; + return NULL; +} + +const char *fy_event_type_txt[] = { + [FYET_NONE] = "NONE", + [FYET_STREAM_START] = "+STR", + [FYET_STREAM_END] = "-STR", + [FYET_DOCUMENT_START] = "+DOC", + [FYET_DOCUMENT_END] = "-DOC", + [FYET_MAPPING_START] = "+MAP", + [FYET_MAPPING_END] = "-MAP", + [FYET_SEQUENCE_START] = "+SEQ", + [FYET_SEQUENCE_END] = "-SEQ", + [FYET_SCALAR] = "=VAL", + [FYET_ALIAS] = "=ALI", +}; + +const char *fy_event_type_get_text(enum fy_event_type type) +{ + if ((unsigned int)type >= ARRAY_SIZE(fy_event_type_txt)) + return "*BAD"; + return fy_event_type_txt[type]; +} + +struct fy_eventp *fy_parse_private(struct fy_parser *fyp) +{ + struct fy_eventp *fyep = NULL; + + fyep = fy_parse_internal(fyp); + fyp_parse_debug(fyp, "> %s", fyep ? fy_event_type_txt[fyep->e.type] : "NULL"); + + return fyep; +} + +struct fy_parser *fy_parser_create(const struct fy_parse_cfg *cfg) +{ + struct fy_parser *fyp; + int rc; + + if (!cfg) + return NULL; + + fyp = malloc(sizeof(*fyp)); + if (!fyp) + return NULL; + + rc = fy_parse_setup(fyp, cfg); + if (rc) { + free(fyp); + return NULL; + } + + return fyp; +} + +void fy_parser_destroy(struct fy_parser *fyp) +{ + if (!fyp) + return; + + fy_parse_cleanup(fyp); + + free(fyp); +} + +const struct fy_parse_cfg *fy_parser_get_cfg(struct fy_parser *fyp) +{ + if (!fyp) + return NULL; + return &fyp->cfg; +} + +struct fy_diag *fy_parser_get_diag(struct fy_parser *fyp) +{ + if (!fyp || !fyp->diag) + return NULL; + return fy_diag_ref(fyp->diag); +} + +int fy_parser_set_diag(struct fy_parser *fyp, struct fy_diag *diag) +{ + struct fy_diag_cfg dcfg; + + if (!fyp) + return -1; + + /* default? */ + if (!diag) { + fy_diag_cfg_default(&dcfg); + diag = fy_diag_create(&dcfg); + if (!diag) + return -1; + } + + fy_diag_unref(fyp->diag); + fyp->diag = fy_diag_ref(diag); + + return 0; +} + +static void fy_parse_input_reset(struct fy_parser *fyp) +{ + struct fy_input *fyi, *fyin; + + for (fyi = fy_input_list_head(&fyp->queued_inputs); fyi; fyi = fyin) { + fyin = fy_input_next(&fyp->queued_inputs, fyi); + fy_input_unref(fyi); + } + + fy_parse_parse_state_log_list_recycle_all(fyp, &fyp->state_stack); + + fyp->stream_start_produced = false; + fyp->stream_end_produced = false; + fyp->stream_end_reached = false; + fyp->state = FYPS_NONE; + + fyp->pending_complex_key_column = -1; + fyp->last_block_mapping_key_line = -1; +} + +int fy_parser_set_input_file(struct fy_parser *fyp, const char *file) +{ + struct fy_input_cfg fyic; + int rc; + + if (!fyp || !file) + return -1; + + memset(&fyic, 0, sizeof(fyic)); + + if (!strcmp(file, "-")) { + fyic.type = fyit_stream; + fyic.stream.name = "stdin"; + fyic.stream.fp = stdin; + } else { + fyic.type = fyit_file; + fyic.file.filename = file; + } + fyic.ignore_stdio = !!(fyp->cfg.flags & FYPCF_DISABLE_BUFFERING); + + /* must not be in the middle of something */ + fyp_error_check(fyp, fyp->state == FYPS_NONE || fyp->state == FYPS_END, + err_out, "parser cannot be reset at state '%s'", + state_txt[fyp->state]); + + fy_parse_input_reset(fyp); + + rc = fy_parse_input_append(fyp, &fyic); + fyp_error_check(fyp, !rc, err_out_rc, + "fy_parse_input_append() failed"); + + return 0; +err_out: + rc = -1; +err_out_rc: + return rc; +} + +int fy_parser_set_string(struct fy_parser *fyp, const char *str, size_t len) +{ + struct fy_input_cfg fyic; + int rc; + + if (!fyp || !str) + return -1; + + if (len == (size_t)-1) + len = strlen(str); + + memset(&fyic, 0, sizeof(fyic)); + + fyic.type = fyit_memory; + fyic.memory.data = str; + fyic.memory.size = len; + + /* must not be in the middle of something */ + fyp_error_check(fyp, fyp->state == FYPS_NONE || fyp->state == FYPS_END, + err_out, "parser cannot be reset at state '%s'", + state_txt[fyp->state]); + + fy_parse_input_reset(fyp); + + rc = fy_parse_input_append(fyp, &fyic); + fyp_error_check(fyp, !rc, err_out_rc, + "fy_parse_input_append() failed"); + + return 0; +err_out: + rc = -1; +err_out_rc: + return rc; +} + +int fy_parser_set_malloc_string(struct fy_parser *fyp, char *str, size_t len) +{ + struct fy_input_cfg fyic; + int rc; + + if (!fyp || !str) + return -1; + + if (len == (size_t)-1) + len = strlen(str); + + memset(&fyic, 0, sizeof(fyic)); + + fyic.type = fyit_alloc; + fyic.alloc.data = str; + fyic.alloc.size = len; + + /* must not be in the middle of something */ + fyp_error_check(fyp, fyp->state == FYPS_NONE || fyp->state == FYPS_END, + err_out, "parser cannot be reset at state '%s'", + state_txt[fyp->state]); + + fy_parse_input_reset(fyp); + + rc = fy_parse_input_append(fyp, &fyic); + fyp_error_check(fyp, !rc, err_out_rc, + "fy_parse_input_append() failed"); + + return 0; +err_out: + rc = -1; +err_out_rc: + return rc; +} + +int fy_parser_set_input_fp(struct fy_parser *fyp, const char *name, FILE *fp) +{ + struct fy_input_cfg fyic; + int rc; + + if (!fyp || !fp) + return -1; + + memset(&fyic, 0, sizeof(fyic)); + + fyic.type = fyit_stream; + fyic.stream.name = name ? name : "<stream>"; + fyic.stream.fp = fp; + fyic.ignore_stdio = !!(fyp->cfg.flags & FYPCF_DISABLE_BUFFERING); + + /* must not be in the middle of something */ + fyp_error_check(fyp, fyp->state == FYPS_NONE || fyp->state == FYPS_END, + err_out, "parser cannot be reset at state '%s'", + state_txt[fyp->state]); + + fy_parse_input_reset(fyp); + + rc = fy_parse_input_append(fyp, &fyic); + fyp_error_check(fyp, !rc, err_out_rc, + "fy_parse_input_append() failed"); + + return 0; +err_out: + rc = -1; +err_out_rc: + return rc; +} + +int fy_parser_set_input_callback(struct fy_parser *fyp, void *user, + ssize_t (*callback)(void *user, void *buf, size_t count)) +{ + struct fy_input_cfg fyic; + int rc; + + if (!fyp || !callback) + return -1; + + memset(&fyic, 0, sizeof(fyic)); + + fyic.type = fyit_callback; + fyic.userdata = user; + fyic.callback.input = callback; + fyic.ignore_stdio = !!(fyp->cfg.flags & FYPCF_DISABLE_BUFFERING); + + /* must not be in the middle of something */ + fyp_error_check(fyp, fyp->state == FYPS_NONE || fyp->state == FYPS_END, + err_out, "parser cannot be reset at state '%s'", + state_txt[fyp->state]); + + fy_parse_input_reset(fyp); + + rc = fy_parse_input_append(fyp, &fyic); + fyp_error_check(fyp, !rc, err_out_rc, + "fy_parse_input_append() failed"); + + return 0; +err_out: + rc = -1; +err_out_rc: + return rc; +} + +int fy_parser_set_input_fd(struct fy_parser *fyp, int fd) +{ + struct fy_input_cfg fyic; + int rc; + + if (!fyp || fd < 0) + return -1; + + memset(&fyic, 0, sizeof(fyic)); + + fyic.type = fyit_fd; + fyic.fd.fd = fd; + fyic.ignore_stdio = !!(fyp->cfg.flags & FYPCF_DISABLE_BUFFERING); + + /* must not be in the middle of something */ + fyp_error_check(fyp, fyp->state == FYPS_NONE || fyp->state == FYPS_END, + err_out, "parser cannot be reset at state '%s'", + state_txt[fyp->state]); + + fy_parse_input_reset(fyp); + + rc = fy_parse_input_append(fyp, &fyic); + fyp_error_check(fyp, !rc, err_out_rc, + "fy_parse_input_append() failed"); + + return 0; +err_out: + rc = -1; +err_out_rc: + return rc; +} + +int fy_parser_reset(struct fy_parser *fyp) +{ + int rc; + + if (!fyp) + return -1; + + fy_parse_input_reset(fyp); + + fy_reader_reset(fyp->reader); + + fyp->next_single_document = false; + fyp->stream_error = false; + fyp->generated_block_map = false; + fyp->last_was_comma = false; + fyp->document_has_content = false; + fyp->document_first_content_token = false; + fyp->bare_document_only = false; + fyp->stream_has_content = false; + fyp->had_directives = false; + + assert(fyp->diag); + fyp->diag->on_error = false; + + rc = fy_reset_document_state(fyp); + fyp_error_check(fyp, !rc, err_out_rc, + "fy_parse_input_reset() failed"); + + return 0; +err_out_rc: + return rc; +} + +struct fy_event *fy_parser_parse(struct fy_parser *fyp) +{ + struct fy_eventp *fyep; + enum fy_composer_return ret; + + if (!fyp) + return NULL; + + fyep = fy_parse_private(fyp); + if (!fyep) + return NULL; + + if (fyp->fyc) { + ret = fy_composer_process_event(fyp->fyc, &fyep->e); + if (ret == FYCR_ERROR) { + fyp->stream_error = true; + fy_parse_eventp_recycle(fyp, fyep); + return NULL; + } + /* note that the stop should be handled by + * an out of band mechanism */ + } + + return &fyep->e; +} + +bool fy_parser_get_stream_error(struct fy_parser *fyp) +{ + if (!fyp) + return true; + + return fyp->stream_error; +} + +enum fy_parse_cfg_flags fy_parser_get_cfg_flags(const struct fy_parser *fyp) +{ + if (!fyp) + return 0; + + return fyp->cfg.flags; +} + +struct fy_document_state *fy_parser_get_document_state(struct fy_parser *fyp) +{ + return fyp ? fyp->current_document_state : NULL; +} + +static enum fy_composer_return +parse_process_event(struct fy_composer *fyc, struct fy_path *path, struct fy_event *fye) +{ + struct fy_parser *fyp = fy_composer_get_cfg_userdata(fyc); + + assert(fyp); + assert(fyp->fyc_cb); + return fyp->fyc_cb(fyp, fye, path, fyp->fyc_userdata); +} + +struct fy_document_builder * +parse_create_document_builder(struct fy_composer *fyc) +{ + struct fy_parser *fyp = fy_composer_get_cfg_userdata(fyc); + struct fy_document_builder *fydb = NULL; + struct fy_document_builder_cfg cfg; + struct fy_document_state *fyds; + int rc; + + memset(&cfg, 0, sizeof(cfg)); + cfg.parse_cfg = fyp->cfg; + cfg.diag = fy_diag_ref(fyp->diag); + + fydb = fy_document_builder_create(&cfg); + fyp_error_check(fyp, fydb, err_out, + "fy_document_builder_create() failed\n"); + + /* start with this document state */ + fyds = fy_parser_get_document_state(fyp); + rc = fy_document_builder_set_in_document(fydb, fyds, true); + fyp_error_check(fyp, !rc, err_out, + "fy_document_builder_set_in_document() failed\n"); + + return fydb; + +err_out: + fy_document_builder_destroy(fydb); + return NULL; +} + +static const struct fy_composer_ops parser_composer_ops = { + .process_event = parse_process_event, + .create_document_builder = parse_create_document_builder, +}; + +int fy_parse_set_composer(struct fy_parser *fyp, fy_parse_composer_cb cb, void *userdata) +{ + struct fy_composer_cfg ccfg; + + if (!fyp) + return -1; + + /* must not be in the middle of something */ + fyp_error_check(fyp, fyp->state == FYPS_NONE || fyp->state == FYPS_END, + err_out, "cannot change composer state at state '%s'", + state_txt[fyp->state]); + + /* clear */ + if (!cb) { + if (fyp->fyc) { + fy_composer_destroy(fyp->fyc); + fyp->fyc = NULL; + } + fyp->fyc_cb = NULL; + fyp->fyc_userdata = NULL; + return 0; + } + + /* already exists */ + if (fyp->fyc) { + fyp->fyc_cb = cb; + fyp->fyc_userdata = userdata; + return 0; + } + + /* prepare the composer configuration */ + memset(&ccfg, 0, sizeof(ccfg)); + ccfg.ops = &parser_composer_ops; + ccfg.userdata = fyp; + ccfg.diag = fy_parser_get_diag(fyp); + fyp->fyc = fy_composer_create(&ccfg); + fyp_error_check(fyp, fyp->fyc, err_out, + "fy_composer_create() failed"); + + fyp->fyc_cb = cb; + fyp->fyc_userdata = userdata; + + return 0; +err_out: + return -1; +} + +static enum fy_composer_return fy_parse_compose_internal(struct fy_parser *fyp) +{ + struct fy_composer *fyc; + struct fy_document_iterator *fydi; + struct fy_event *fye; + struct fy_eventp *fyep; + struct fy_document *fyd = NULL; + enum fy_composer_return ret; + + assert(fyp); + + fyc = fyp->fyc; + assert(fyc); + + /* simple, without resolution */ + if (!(fyp->cfg.flags & FYPCF_RESOLVE_DOCUMENT)) { + + ret = FYCR_OK_STOP; + while ((fyep = fy_parse_private(fyp)) != NULL) { + ret = fy_composer_process_event(fyc, &fyep->e); + fy_parse_eventp_recycle(fyp, fyep); + if (ret != FYCR_OK_CONTINUE) + break; + } + return ret; + } + + fydi = fy_document_iterator_create(); + fyp_error_check(fyp, fydi, err_out, + "fy_document_iterator_create() failed"); + + /* stream start event generation and processing */ + fye = fy_document_iterator_stream_start(fydi); + fyp_error_check(fyp, fye, err_out, + "fy_document_iterator_stream_start() failed"); + ret = fy_composer_process_event(fyc, fye); + fy_document_iterator_event_free(fydi, fye); + fye = NULL; + if (ret != FYCR_OK_CONTINUE) + goto out; + + /* convert to document and then process the generator event stream it */ + while ((fyd = fy_parse_load_document(fyp)) != NULL) { + + /* document start event generation and processing */ + fye = fy_document_iterator_document_start(fydi, fyd); + fyp_error_check(fyp, fye, err_out, + "fy_document_iterator_document_start() failed"); + ret = fy_composer_process_event(fyc, fye); + fy_document_iterator_event_free(fydi, fye); + fye = NULL; + if (ret != FYCR_OK_CONTINUE) + goto out; + + /* and now process the body */ + ret = FYCR_OK_CONTINUE; + while ((fye = fy_document_iterator_body_next(fydi)) != NULL) { + ret = fy_composer_process_event(fyc, fye); + fy_document_iterator_event_free(fydi, fye); + fye = NULL; + if (ret != FYCR_OK_CONTINUE) + goto out; + } + + /* document end event generation and processing */ + fye = fy_document_iterator_document_end(fydi); + fyp_error_check(fyp, fye, err_out, + "fy_document_iterator_document_end() failed"); + ret = fy_composer_process_event(fyc, fye); + fy_document_iterator_event_free(fydi, fye); + fye = NULL; + if (ret != FYCR_OK_CONTINUE) + goto out; + + /* and destroy the document */ + fy_parse_document_destroy(fyp, fyd); + fyd = NULL; + } + + /* stream end event generation and processing */ + fye = fy_document_iterator_stream_end(fydi); + fyp_error_check(fyp, fye, err_out, + "fy_document_iterator_stream_end() failed"); + ret = fy_composer_process_event(fyc, fye); + fy_document_iterator_event_free(fydi, fye); + fye = NULL; + if (ret != FYCR_OK_CONTINUE) + goto out; + +out: + /* NULLs are OK */ + fy_parse_document_destroy(fyp, fyd); + fy_document_iterator_destroy(fydi); + return ret; + +err_out: + ret = FYCR_ERROR; + goto out; +} + +int fy_parse_compose(struct fy_parser *fyp, fy_parse_composer_cb cb, void *userdata) +{ + enum fy_composer_return ret; + int rc, rc_out; + + if (!fyp || !cb) + return -1; + + /* set the composer callback */ + rc = fy_parse_set_composer(fyp, cb, userdata); + fyp_error_check(fyp, !rc, err_out, + "fy_parse_set_composer() failed\n"); + + /* use the composer to parse */ + ret = fy_parse_compose_internal(fyp); + /* on error set the stream error */ + if (ret == FYCR_ERROR) { + fyp->stream_error = true; + rc_out = -1; + } else + rc_out = 0; + + /* reset the parser; the composer clear must always succeed */ + fy_parser_reset(fyp); + + /* clear composer */ + rc = fy_parse_set_composer(fyp, NULL, NULL); + fyp_error_check(fyp, !rc, err_out, + "fy_parse_set_composer() failed\n"); + + return rc_out; + +err_out: + return -1; +} diff --git a/contrib/libs/libfyaml/src/lib/fy-parse.h b/contrib/libs/libfyaml/src/lib/fy-parse.h new file mode 100644 index 0000000000..8ffae8c93d --- /dev/null +++ b/contrib/libs/libfyaml/src/lib/fy-parse.h @@ -0,0 +1,592 @@ +/* + * fy-parse.h - YAML parser internal header file + * + * Copyright (c) 2019 Pantelis Antoniou <pantelis.antoniou@konsulko.com> + * + * SPDX-License-Identifier: MIT + */ +#ifndef FY_PARSE_H +#define FY_PARSE_H + +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif + +#include <stdint.h> +#include <stdbool.h> +#include <stdio.h> + +#include <libfyaml.h> + +#include "fy-ctype.h" +#include "fy-utf8.h" +#include "fy-list.h" +#include "fy-typelist.h" +#include "fy-types.h" +#include "fy-diag.h" +#include "fy-dump.h" +#include "fy-atom.h" +#include "fy-input.h" +#include "fy-ctype.h" +#include "fy-token.h" +#include "fy-event.h" +#include "fy-docstate.h" +#include "fy-doc.h" +#include "fy-docbuilder.h" +#include "fy-emit.h" +#include "fy-accel.h" +#include "fy-emit-accum.h" +#include "fy-path.h" +#include "fy-composer.h" + +struct fy_parser; +struct fy_input; + +enum fy_flow_type { + FYFT_NONE, + FYFT_MAP, + FYFT_SEQUENCE, +}; + +struct fy_flow { + struct fy_list_head node; + enum fy_flow_type flow; + int pending_complex_key_column; + struct fy_mark pending_complex_key_mark; + int parent_indent; +}; +FY_PARSE_TYPE_DECL(flow); + +struct fy_indent { + struct fy_list_head node; + int indent; + int indent_line; + bool generated_block_map : 1; +}; +FY_PARSE_TYPE_DECL(indent); + +struct fy_token; + +struct fy_simple_key { + struct fy_list_head node; + struct fy_mark mark; + struct fy_mark end_mark; + struct fy_token *token; /* associated token */ + int flow_level; + bool required : 1; + bool implicit_complex : 1; +}; +FY_PARSE_TYPE_DECL(simple_key); + +struct fy_document_state; + +enum fy_parser_state { + /** none state */ + FYPS_NONE, + /** Expect STREAM-START. */ + FYPS_STREAM_START, + /** Expect the beginning of an implicit document. */ + FYPS_IMPLICIT_DOCUMENT_START, + /** Expect DOCUMENT-START. */ + FYPS_DOCUMENT_START, + /** Expect the content of a document. */ + FYPS_DOCUMENT_CONTENT, + /** Expect DOCUMENT-END. */ + FYPS_DOCUMENT_END, + /** Expect a block node. */ + FYPS_BLOCK_NODE, + /** Expect the first entry of a block sequence. */ + FYPS_BLOCK_SEQUENCE_FIRST_ENTRY, + /** Expect an entry of a block sequence. */ + FYPS_BLOCK_SEQUENCE_ENTRY, + /** Expect an entry of an indentless sequence. */ + FYPS_INDENTLESS_SEQUENCE_ENTRY, + /** Expect the first key of a block mapping. */ + FYPS_BLOCK_MAPPING_FIRST_KEY, + /** Expect a block mapping key. */ + FYPS_BLOCK_MAPPING_KEY, + /** Expect a block mapping value. */ + FYPS_BLOCK_MAPPING_VALUE, + /** Expect the first entry of a flow sequence. */ + FYPS_FLOW_SEQUENCE_FIRST_ENTRY, + /** Expect an entry of a flow sequence. */ + FYPS_FLOW_SEQUENCE_ENTRY, + /** Expect a key of an ordered mapping. */ + FYPS_FLOW_SEQUENCE_ENTRY_MAPPING_KEY, + /** Expect a value of an ordered mapping. */ + FYPS_FLOW_SEQUENCE_ENTRY_MAPPING_VALUE, + /** Expect the and of an ordered mapping entry. */ + FYPS_FLOW_SEQUENCE_ENTRY_MAPPING_END, + /** Expect the first key of a flow mapping. */ + FYPS_FLOW_MAPPING_FIRST_KEY, + /** Expect a key of a flow mapping. */ + FYPS_FLOW_MAPPING_KEY, + /** Expect a value of a flow mapping. */ + FYPS_FLOW_MAPPING_VALUE, + /** Expect an empty value of a flow mapping. */ + FYPS_FLOW_MAPPING_EMPTY_VALUE, + /** Expect only stream end */ + FYPS_SINGLE_DOCUMENT_END, + /** Expect nothing. */ + FYPS_END +}; + +struct fy_parse_state_log { + struct fy_list_head node; + enum fy_parser_state state; +}; +FY_PARSE_TYPE_DECL(parse_state_log); + +struct fy_parser { + struct fy_parse_cfg cfg; + + struct fy_input_list queued_inputs; /* all the inputs queued */ + struct fy_reader builtin_reader; /* the builtin reader */ + struct fy_reader *reader; /* the external reader, or ptr to builtin_reader */ + + struct fy_version default_version; + bool suppress_recycling : 1; + bool stream_start_produced : 1; + bool stream_end_produced : 1; + bool stream_end_reached : 1; + bool simple_key_allowed : 1; + bool tab_used_for_ws : 1; + bool stream_error : 1; + bool generated_block_map : 1; + bool last_was_comma : 1; + bool document_has_content : 1; + bool document_first_content_token : 1; + bool bare_document_only : 1; /* no document start indicators allowed, no directives */ + bool stream_has_content : 1; + bool parse_flow_only : 1; /* document is in flow form, and stop parsing at the end */ + bool colon_follows_colon : 1; /* "foo"::bar -> "foo": :bar */ + bool had_directives : 1; /* document had directives */ + int flow_level; + int pending_complex_key_column; + struct fy_mark pending_complex_key_mark; + int last_block_mapping_key_line; + struct fy_mark last_comma_mark; + struct fy_mark last_tab_used_for_ws_mark; + + /* copy of stream_end token */ + struct fy_token *stream_end_token; + + /* produced tokens, but not yet consumed */ + struct fy_token_list queued_tokens; + int token_activity_counter; + + /* last comment */ + struct fy_atom last_comment; + + /* indent stack */ + struct fy_indent_list indent_stack; + int indent; + int parent_indent; + int indent_line; + /* simple key stack */ + struct fy_simple_key_list simple_keys; + /* state stack */ + enum fy_parser_state state; + struct fy_parse_state_log_list state_stack; + + /* current parse document */ + struct fy_document_state *current_document_state; + struct fy_document_state *default_document_state; + bool next_single_document; + + /* flow stack */ + enum fy_flow_type flow; + struct fy_flow_list flow_stack; + + /* recycling lists */ + struct fy_indent_list recycled_indent; + struct fy_simple_key_list recycled_simple_key; + struct fy_parse_state_log_list recycled_parse_state_log; + struct fy_flow_list recycled_flow; + + struct fy_eventp_list recycled_eventp; + struct fy_token_list recycled_token; + + struct fy_eventp_list *recycled_eventp_list; + struct fy_token_list *recycled_token_list; + + /* the diagnostic object */ + struct fy_diag *diag; + + int err_term_width; + int err_term_height; + + /* for when using the built-in document builder */ + struct fy_document_builder *fydb; + + /* when using the composer interface */ + struct fy_composer *fyc; + fy_parse_composer_cb fyc_cb; + void *fyc_userdata; +}; + +static inline struct fy_input * +fyp_current_input(const struct fy_parser *fyp) +{ + assert(fyp); + return fy_reader_current_input(fyp->reader); +} + +static inline uint64_t +fyp_current_input_generation(const struct fy_parser *fyp) +{ + assert(fyp); + return fy_reader_current_input_generation(fyp->reader); +} + +static inline int fyp_column(const struct fy_parser *fyp) +{ + assert(fyp); + return fy_reader_column(fyp->reader); +} + +static inline int fyp_line(const struct fy_parser *fyp) +{ + return fy_reader_line(fyp->reader); +} + +static inline int fyp_tabsize(const struct fy_parser *fyp) +{ + return fy_reader_tabsize(fyp->reader); +} + +static inline bool fyp_json_mode(const struct fy_parser *fyp) +{ + assert(fyp); + return fy_reader_json_mode(fyp->reader); +} + +static inline enum fy_lb_mode fyp_lb_mode(const struct fy_parser *fyp) +{ + assert(fyp); + return fy_reader_lb_mode(fyp->reader); +} + +static inline enum fy_flow_ws_mode fyp_fws_mode(const struct fy_parser *fyp) +{ + assert(fyp); + return fy_reader_flow_ws_mode(fyp->reader); +} + +static inline bool fyp_block_mode(struct fy_parser *fyp) +{ + return !fyp_json_mode(fyp) && !fyp->flow_level; +} + +static inline bool fyp_is_lb(const struct fy_parser *fyp, int c) +{ + assert(fyp); + return fy_reader_is_lb(fyp->reader, c); +} + +static inline bool fyp_is_lbz(const struct fy_parser *fyp, int c) +{ + assert(fyp); + return fy_reader_is_lbz(fyp->reader, c); +} + +static inline bool fyp_is_blankz(const struct fy_parser *fyp, int c) +{ + assert(fyp); + return fy_reader_is_blankz(fyp->reader, c); +} + +static inline bool fyp_is_generic_lb(const struct fy_parser *fyp, int c) +{ + assert(fyp); + return fy_reader_is_generic_lb(fyp->reader, c); +} + +static inline bool fyp_is_generic_lbz(const struct fy_parser *fyp, int c) +{ + assert(fyp); + return fy_reader_is_generic_lbz(fyp->reader, c); +} + +static inline bool fyp_is_generic_blankz(const struct fy_parser *fyp, int c) +{ + assert(fyp); + return fy_reader_is_generic_blankz(fyp->reader, c); +} + +static inline bool fyp_is_flow_ws(const struct fy_parser *fyp, int c) +{ + assert(fyp); + return fy_reader_is_flow_ws(fyp->reader, c); +} + +static inline bool fyp_is_flow_blank(const struct fy_parser *fyp, int c) +{ + assert(fyp); + return fy_reader_is_flow_blank(fyp->reader, c); +} + +static inline bool fyp_is_flow_blankz(const struct fy_parser *fyp, int c) +{ + assert(fyp); + return fy_reader_is_flow_blankz(fyp->reader, c); +} + +static inline const void * +fy_ptr_slow_path(struct fy_parser *fyp, size_t *leftp) +{ + assert(fyp); + return fy_reader_ptr_slow_path(fyp->reader, leftp); +} + +static inline const void * +fy_ensure_lookahead_slow_path(struct fy_parser *fyp, size_t size, size_t *leftp) +{ + assert(fyp); + return fy_reader_ensure_lookahead_slow_path(fyp->reader, size, leftp); +} + +/* only allowed if input does not update */ +static inline void +fy_get_mark(struct fy_parser *fyp, struct fy_mark *fym) +{ + assert(fyp); + return fy_reader_get_mark(fyp->reader, fym); +} + +static inline const void * +fy_ptr(struct fy_parser *fyp, size_t *leftp) +{ + assert(fyp); + return fy_reader_ptr(fyp->reader, leftp); +} + +static inline const void * +fy_ensure_lookahead(struct fy_parser *fyp, size_t size, size_t *leftp) +{ + assert(fyp); + return fy_reader_ensure_lookahead(fyp->reader, size, leftp); +} + +/* advance the given number of ascii characters, not utf8 */ +static inline void +fy_advance_octets(struct fy_parser *fyp, size_t advance) +{ + assert(fyp); + return fy_reader_advance_octets(fyp->reader, advance); +} + +/* compare string at the current point (n max) */ +static inline int +fy_parse_strncmp(struct fy_parser *fyp, const char *str, size_t n) +{ + assert(fyp); + return fy_reader_strncmp(fyp->reader, str, n); +} + +static FY_ALWAYS_INLINE inline int +fy_parse_peek_at_offset(struct fy_parser *fyp, size_t offset) +{ + assert(fyp); + return fy_reader_peek_at_offset(fyp->reader, offset); +} + +static FY_ALWAYS_INLINE inline int +fy_parse_peek_at_internal(struct fy_parser *fyp, int pos, ssize_t *offsetp) +{ + assert(fyp); + return fy_reader_peek_at_internal(fyp->reader, pos, offsetp); +} + +static inline bool +fy_is_blank_at_offset(struct fy_parser *fyp, size_t offset) +{ + assert(fyp); + return fy_is_blank(fy_reader_peek_at_offset(fyp->reader, offset)); +} + +static inline bool +fy_is_blankz_at_offset(struct fy_parser *fyp, size_t offset) +{ + assert(fyp); + return fy_reader_is_blankz(fyp->reader, fy_reader_peek_at_offset(fyp->reader, offset)); +} + +static inline bool +fy_is_generic_blankz_at_offset(struct fy_parser *fyp, size_t offset) +{ + assert(fyp); + return fy_reader_is_generic_blankz(fyp->reader, fy_reader_peek_at_offset(fyp->reader, offset)); +} + +static FY_ALWAYS_INLINE inline int +fy_parse_peek_at(struct fy_parser *fyp, int pos) +{ + assert(fyp); + return fy_reader_peek_at_internal(fyp->reader, pos, NULL); +} + +static FY_ALWAYS_INLINE inline int +fy_parse_peek(struct fy_parser *fyp) +{ + assert(fyp); + return fy_reader_peek(fyp->reader); +} + +static FY_ALWAYS_INLINE inline void +fy_advance(struct fy_parser *fyp, int c) +{ + assert(fyp); + fy_reader_advance(fyp->reader, c); +} + +static FY_ALWAYS_INLINE inline void +fy_advance_ws(struct fy_parser *fyp, int c) +{ + assert(fyp); + fy_reader_advance_ws(fyp->reader, c); +} + +static FY_ALWAYS_INLINE inline void +fy_advance_space(struct fy_parser *fyp) +{ + assert(fyp); + fy_reader_advance_space(fyp->reader); +} + +static FY_ALWAYS_INLINE inline int +fy_parse_get(struct fy_parser *fyp) +{ + assert(fyp); + return fy_reader_get(fyp->reader); +} + +static FY_ALWAYS_INLINE inline int +fy_advance_by(struct fy_parser *fyp, int count) +{ + assert(fyp); + return fy_reader_advance_by(fyp->reader, count); +} + +/* compare string at the current point */ +static inline bool +fy_parse_strcmp(struct fy_parser *fyp, const char *str) +{ + assert(fyp); + return fy_reader_strcmp(fyp->reader, str); +} + +static inline void +fy_fill_atom_start(struct fy_parser *fyp, struct fy_atom *handle) +{ + assert(fyp); + fy_reader_fill_atom_start(fyp->reader, handle); +} + +static inline void +fy_fill_atom_end_at(struct fy_parser *fyp, struct fy_atom *handle, struct fy_mark *end_mark) +{ + assert(fyp); + fy_reader_fill_atom_end_at(fyp->reader, handle, end_mark); +} + +static inline void +fy_fill_atom_end(struct fy_parser *fyp, struct fy_atom *handle) +{ + assert(fyp); + fy_reader_fill_atom_end(fyp->reader, handle); +} + +static inline struct fy_atom * +fy_fill_atom(struct fy_parser *fyp, int advance, struct fy_atom *handle) +{ + assert(fyp); + return fy_reader_fill_atom(fyp->reader, advance, handle); +} + +static inline struct fy_atom * +fy_fill_atom_mark(struct fy_parser *fyp, const struct fy_mark *start_mark, + const struct fy_mark *end_mark, struct fy_atom *handle) +{ + assert(fyp); + return fy_reader_fill_atom_mark(fyp->reader, start_mark, end_mark, handle); +} + +static inline struct fy_atom * +fy_fill_atom_at(struct fy_parser *fyp, int advance, int count, struct fy_atom *handle) +{ + assert(fyp); + return fy_reader_fill_atom_at(fyp->reader, advance, count, handle); +} + +static inline void +fy_parser_set_reader(struct fy_parser *fyp, struct fy_reader *fyr) +{ + if (!fyp) + return; + fyp->reader = fyr ? fyr : &fyp->builtin_reader; +} + +static inline void +fy_parser_set_flow_only_mode(struct fy_parser *fyp, bool flow_only_mode) +{ + fyp->parse_flow_only = flow_only_mode; +} + +#define fy_fill_atom_a(_fyp, _advance) \ + fy_fill_atom((_fyp), (_advance), FY_ALLOCA(sizeof(struct fy_atom))) + +struct fy_token *fy_token_vqueue(struct fy_parser *fyp, enum fy_token_type type, va_list ap); +struct fy_token *fy_token_queue(struct fy_parser *fyp, enum fy_token_type type, ...); + +struct fy_token * +fy_token_vqueue_internal(struct fy_parser *fyp, struct fy_token_list *fytl, + enum fy_token_type type, va_list ap); +struct fy_token * +fy_token_queue_internal(struct fy_parser *fyp, struct fy_token_list *fytl, + enum fy_token_type type, ...); + +int fy_parse_setup(struct fy_parser *fyp, const struct fy_parse_cfg *cfg); +void fy_parse_cleanup(struct fy_parser *fyp); + +int fy_parse_input_append(struct fy_parser *fyp, const struct fy_input_cfg *fyic); + +struct fy_eventp *fy_parse_private(struct fy_parser *fyp); + +extern const char *fy_event_type_txt[]; + +enum fy_parse_cfg_flags fy_parser_get_cfg_flags(const struct fy_parser *fyp); + +extern const struct fy_tag * const fy_default_tags[]; + +extern const struct fy_version fy_default_version; /* usually highest stable */ + +bool fy_tag_handle_is_default(const char *handle, size_t handle_size); +bool fy_tag_is_default_internal(const char *handle, size_t handle_size, + const char *prefix, size_t prefix_size); +bool fy_token_tag_directive_is_overridable(struct fy_token *fyt_td); + +int fy_parser_set_default_document_state(struct fy_parser *fyp, + struct fy_document_state *fyds); +void fy_parser_set_next_single_document(struct fy_parser *fyp); + +void *fy_alloc_default(void *userdata, size_t size); +void fy_free_default(void *userdata, void *ptr); +void *fy_realloc_default(void *userdata, void *ptr, size_t size); + +int fy_reader_fetch_flow_scalar_handle(struct fy_reader *fyr, int c, int indent, struct fy_atom *handle, bool sloppy_indent); +int fy_reader_fetch_plain_scalar_handle(struct fy_reader *fyr, int c, int indent, int flow_level, struct fy_atom *handle, bool directive0); + +void fy_reader_skip_ws_cr_nl(struct fy_reader *fyr); + +void fy_reader_skip_ws(struct fy_reader *fyr); +void fy_reader_skip_space(struct fy_reader *fyr); + +static inline int fy_document_state_version_compare(struct fy_document_state *fyds, const struct fy_version *vb) +{ + return fy_version_compare(fy_document_state_version(fyds), vb); +} + +int fy_parse_set_composer(struct fy_parser *fyp, fy_parse_composer_cb cb, void *userdata); + +#endif diff --git a/contrib/libs/libfyaml/src/lib/fy-path.c b/contrib/libs/libfyaml/src/lib/fy-path.c new file mode 100644 index 0000000000..f9d9f99a41 --- /dev/null +++ b/contrib/libs/libfyaml/src/lib/fy-path.c @@ -0,0 +1,660 @@ +/* + * fy-path.c - Internal ypath support + * + * Copyright (c) 2019 Pantelis Antoniou <pantelis.antoniou@konsulko.com> + * + * SPDX-License-Identifier: MIT + */ +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif + +#include <stdio.h> +#include <string.h> +#include <assert.h> +#include <stdlib.h> +#include <errno.h> +#include <stdarg.h> + +#include <libfyaml.h> + +#include "fy-parse.h" +#include "fy-doc.h" + +#include "fy-utils.h" + +#undef DBG +// #define DBG fyp_notice +#define DBG fyp_scan_debug + +static int fy_path_setup(struct fy_path *fypp) +{ + memset(fypp, 0, sizeof(*fypp)); + + fy_path_component_list_init(&fypp->recycled_component); + fy_path_component_list_init(&fypp->components); + + return 0; +} + +static void fy_path_cleanup(struct fy_path *fypp) +{ + struct fy_path_component *fypc; + + if (!fypp) + return; + + if (fypp->fydb) { + fy_document_builder_destroy(fypp->fydb); + fypp->fydb = NULL; + } + + while ((fypc = fy_path_component_list_pop(&fypp->components)) != NULL) + fy_path_component_free(fypc); + + while ((fypc = fy_path_component_list_pop(&fypp->recycled_component)) != NULL) + fy_path_component_free(fypc); +} + +struct fy_path *fy_path_create(void) +{ + struct fy_path *fypp; + int rc; + + fypp = malloc(sizeof(*fypp)); + if (!fypp) + return NULL; + + rc = fy_path_setup(fypp); + if (rc) + return NULL; + + return fypp; +} + +void fy_path_destroy(struct fy_path *fypp) +{ + if (!fypp) + return; + + fy_path_cleanup(fypp); + + free(fypp); +} + +void fy_path_reset(struct fy_path *fypp) +{ + struct fy_path_component *fypc; + + if (!fypp) + return; + + while ((fypc = fy_path_component_list_pop(&fypp->components)) != NULL) + fy_path_component_free(fypc); +} + +struct fy_path_component *fy_path_component_alloc(struct fy_path *fypp) +{ + struct fy_path_component *fypc; + + if (!fypp) + return NULL; + + fypc = fy_path_component_list_pop(&fypp->recycled_component); + if (!fypc) { + fypc = malloc(sizeof(*fypc)); + if (!fypc) + return NULL; + memset(fypc, 0, sizeof(*fypc)); + } + + /* not yet instantiated */ + fypc->type = FYPCT_NONE; + + return fypc; +} + +void fy_path_component_clear_state(struct fy_path_component *fypc) +{ + if (!fypc) + return; + + switch (fypc->type) { + case FYPCT_NONE: + /* nothing */ + break; + + case FYPCT_MAP: + if (fypc->map.has_key) { + if (fypc->map.is_complex_key) { + if (fypc->map.complex_key_complete) + fy_document_destroy(fypc->map.complex_key); + fypc->map.complex_key = NULL; + } else { + fy_token_unref(fypc->map.scalar.tag); + fy_token_unref(fypc->map.scalar.key); + fypc->map.scalar.tag = NULL; + fypc->map.scalar.key = NULL; + } + } + fypc->map.root = true; + fypc->map.has_key = false; + fypc->map.await_key = true; + fypc->map.is_complex_key = false; + fypc->map.accumulating_complex_key = false; + fypc->map.complex_key_complete = false; + break; + + case FYPCT_SEQ: + fypc->seq.idx = -1; + break; + } +} + +void fy_path_component_cleanup(struct fy_path_component *fypc) +{ + if (!fypc) + return; + + fy_path_component_clear_state(fypc); + fypc->type = FYPCT_NONE; +} + +void fy_path_component_free(struct fy_path_component *fypc) +{ + if (!fypc) + return; + + fy_path_component_cleanup(fypc); + free(fypc); +} + +void fy_path_component_destroy(struct fy_path_component *fypc) +{ + if (!fypc) + return; + + fy_path_component_cleanup(fypc); + fy_path_component_free(fypc); +} + +void fy_path_component_recycle(struct fy_path *fypp, struct fy_path_component *fypc) +{ + if (!fypc) + return; + + fy_path_component_cleanup(fypc); + + if (!fypp) + fy_path_component_free(fypc); + else + fy_path_component_list_push(&fypp->recycled_component, fypc); +} + +struct fy_path_component *fy_path_component_create_mapping(struct fy_path *fypp) +{ + struct fy_path_component *fypc; + + if (!fypp) + return NULL; + + fypc = fy_path_component_alloc(fypp); + if (!fypc) + return NULL; + + fypc->type = FYPCT_MAP; + + fypc->map.root = true; + fypc->map.await_key = true; + fypc->map.is_complex_key = false; + fypc->map.accumulating_complex_key = false; + fypc->map.complex_key_complete = false; + + return fypc; +} + +struct fy_path_component *fy_path_component_create_sequence(struct fy_path *fypp) +{ + struct fy_path_component *fypc; + + if (!fypp) + return NULL; + + fypc = fy_path_component_alloc(fypp); + if (!fypc) + return NULL; + + fypc->type = FYPCT_SEQ; + + fypc->seq.idx = -1; + + return fypc; +} + +bool fy_path_component_is_mapping(struct fy_path_component *fypc) +{ + return fypc && fypc->type == FYPCT_MAP; +} + +int fy_path_component_sequence_get_index(struct fy_path_component *fypc) +{ + return fypc && fypc->type == FYPCT_SEQ ? fypc->seq.idx : -1; +} + +struct fy_token *fy_path_component_mapping_get_scalar_key(struct fy_path_component *fypc) +{ + return fypc && fypc->type == FYPCT_MAP && + fypc->map.has_key && !fypc->map.is_complex_key ? fypc->map.scalar.key : NULL; +} + +struct fy_token *fy_path_component_mapping_get_scalar_key_tag(struct fy_path_component *fypc) +{ + return fypc && fypc->type == FYPCT_MAP && + fypc->map.has_key && !fypc->map.is_complex_key ? fypc->map.scalar.tag : NULL; +} + +struct fy_document *fy_path_component_mapping_get_complex_key(struct fy_path_component *fypc) +{ + return fypc && fypc->type == FYPCT_MAP && + fypc->map.has_key && fypc->map.is_complex_key ? fypc->map.complex_key : NULL; +} + +bool fy_path_component_is_sequence(struct fy_path_component *fypc) +{ + return fypc && fypc->type == FYPCT_SEQ; +} + +static int fy_path_component_get_text_internal(struct fy_emit_accum *ea, struct fy_path_component *fypc) +{ + char *doctxt; + const char *text; + size_t len; + + switch (fypc->type) { + case FYPCT_NONE: + abort(); + + case FYPCT_MAP: + + /* we don't handle transitionals */ + if (!fypc->map.has_key || fypc->map.await_key || fypc->map.root) + return -1; + + if (!fypc->map.is_complex_key && fypc->map.scalar.key) { + text = fy_token_get_text(fypc->map.scalar.key, &len); + if (!text) + return -1; + + if (fypc->map.scalar.key->type == FYTT_ALIAS) + fy_emit_accum_utf8_put_raw(ea, '*'); + fy_emit_accum_utf8_write_raw(ea, text, len); + + } else if (fypc->map.complex_key) { + /* complex key */ + doctxt = fy_emit_document_to_string(fypc->map.complex_key, + FYECF_WIDTH_INF | FYECF_INDENT_DEFAULT | + FYECF_MODE_FLOW_ONELINE | FYECF_NO_ENDING_NEWLINE); + fy_emit_accum_utf8_write_raw(ea, doctxt, strlen(doctxt)); + free(doctxt); + } + break; + + case FYPCT_SEQ: + + /* not started filling yet */ + if (fypc->seq.idx < 0) + return -1; + + fy_emit_accum_utf8_printf_raw(ea, "%d", fypc->seq.idx); + break; + } + + return 0; +} + +static int fy_path_get_text_internal(struct fy_emit_accum *ea, struct fy_path *fypp) +{ + struct fy_path_component *fypc; + struct fy_document *fyd; + char *doctxt; + const char *text; + size_t len; + bool local_key = false; + int rc, count; + + if (fypp->parent) { + rc = fy_path_get_text_internal(ea, fypp->parent); + assert(!rc); + if (rc) + return -1; + } + + /* OK, we have to iterate and rebuild the paths */ + for (fypc = fy_path_component_list_head(&fypp->components), count = 0; fypc; + fypc = fy_path_component_next(&fypp->components, fypc), count++) { + + fy_emit_accum_utf8_put_raw(ea, '/'); + + switch (fypc->type) { + case FYPCT_NONE: + abort(); + + case FYPCT_MAP: + + if (!fypc->map.has_key || fypc->map.root) + break; + + /* key reference ? wrap in .key(X)*/ + local_key = false; + if (fypc->map.await_key) + local_key = true; + + if (local_key) + fy_emit_accum_utf8_write_raw(ea, ".key(", 5); + + if (!fypc->map.is_complex_key) { + + if (fypc->map.scalar.key) { + text = fy_token_get_text(fypc->map.scalar.key, &len); + assert(text); + if (!text) + return -1; + if (fypc->map.scalar.key->type == FYTT_ALIAS) + fy_emit_accum_utf8_put_raw(ea, '*'); + fy_emit_accum_utf8_write_raw(ea, text, len); + } else { + fy_emit_accum_utf8_write_raw(ea, ".null()", 7); + } + } else { + if (fypc->map.complex_key) + fyd = fypc->map.complex_key; + else + fyd = fy_document_builder_peek_document(fypp->fydb); + + /* complex key */ + if (fyd) { + doctxt = fy_emit_document_to_string(fyd, + FYECF_WIDTH_INF | FYECF_INDENT_DEFAULT | + FYECF_MODE_FLOW_ONELINE | FYECF_NO_ENDING_NEWLINE); + } else + doctxt = NULL; + + if (doctxt) { + fy_emit_accum_utf8_write_raw(ea, doctxt, strlen(doctxt)); + free(doctxt); + } else { + fy_emit_accum_utf8_write_raw(ea, "<X>", 3); + } + } + + if (local_key) + fy_emit_accum_utf8_put_raw(ea, ')'); + + break; + + case FYPCT_SEQ: + + /* not started filling yet */ + if (fypc->seq.idx < 0) + break; + + fy_emit_accum_utf8_printf_raw(ea, "%d", fypc->seq.idx); + break; + } + } + + return 0; +} + +char *fy_path_get_text(struct fy_path *fypp) +{ + struct fy_emit_accum ea; /* use an emit accumulator */ + char *path = NULL; + size_t len; + int rc; + + /* no inplace buffer; we will need the malloc'ed contents anyway */ + fy_emit_accum_init(&ea, NULL, 0, 0, fylb_cr_nl); + + fy_emit_accum_start(&ea, 0, fylb_cr_nl); + + rc = fy_path_get_text_internal(&ea, fypp); + if (rc) + goto err_out; + + if (fy_emit_accum_empty(&ea)) + fy_emit_accum_utf8_printf_raw(&ea, "/"); + + fy_emit_accum_make_0_terminated(&ea); + + path = fy_emit_accum_steal(&ea, &len); + +err_out: + fy_emit_accum_cleanup(&ea); + + return path; +} + +char *fy_path_component_get_text(struct fy_path_component *fypc) +{ + struct fy_emit_accum ea; /* use an emit accumulator */ + char *text = NULL; + size_t len; + int rc; + + /* no inplace buffer; we will need the malloc'ed contents anyway */ + fy_emit_accum_init(&ea, NULL, 0, 0, fylb_cr_nl); + + fy_emit_accum_start(&ea, 0, fylb_cr_nl); + + rc = fy_path_component_get_text_internal(&ea, fypc); + if (rc) + goto err_out; + + fy_emit_accum_make_0_terminated(&ea); + + text = fy_emit_accum_steal(&ea, &len); + +err_out: + fy_emit_accum_cleanup(&ea); + + return text; +} + +int fy_path_depth(struct fy_path *fypp) +{ + struct fy_path_component *fypc; + int depth; + + if (!fypp) + return 0; + + depth = fy_path_depth(fypp->parent); + for (fypc = fy_path_component_list_head(&fypp->components); fypc; + fypc = fy_path_component_next(&fypp->components, fypc)) { + + depth++; + } + + return depth; +} + +struct fy_path *fy_path_parent(struct fy_path *fypp) +{ + if (!fypp) + return NULL; + return fypp->parent; +} + +struct fy_path_component * +fy_path_last_component(struct fy_path *fypp) +{ + return fy_path_component_list_tail(&fypp->components); +} + +struct fy_path_component * +fy_path_last_not_collection_root_component(struct fy_path *fypp) +{ + struct fy_path_component *fypc_last; + + fypc_last = fy_path_component_list_tail(&fypp->components); + if (!fypc_last) + return NULL; + + if (!fy_path_component_is_collection_root(fypc_last)) + return fypc_last; + + fypc_last = fy_path_component_prev(&fypp->components, fypc_last); + if (fypc_last) + return fypc_last; + + if (fypp->parent) + return fy_path_component_list_tail(&fypp->parent->components); + + return NULL; +} + +bool fy_path_in_root(struct fy_path *fypp) +{ + struct fy_path_component *fypc_last; + + if (!fypp) + return true; + + fypc_last = fy_path_last_not_collection_root_component(fypp); + return fypc_last == NULL; +} + + +bool fy_path_in_mapping(struct fy_path *fypp) +{ + struct fy_path_component *fypc_last; + + if (!fypp) + return false; + + fypc_last = fy_path_last_not_collection_root_component(fypp); + if (!fypc_last) + return false; + + return fypc_last->type == FYPCT_MAP; +} + +bool fy_path_in_sequence(struct fy_path *fypp) +{ + struct fy_path_component *fypc_last; + + if (!fypp) + return false; + + fypc_last = fy_path_last_not_collection_root_component(fypp); + if (!fypc_last) + return false; + + return fypc_last->type == FYPCT_SEQ; +} + +bool fy_path_in_mapping_key(struct fy_path *fypp) +{ + struct fy_path_component *fypc_last; + + if (!fypp) + return false; + + fypc_last = fy_path_last_not_collection_root_component(fypp); + if (!fypc_last) + return false; + + return fypc_last->type == FYPCT_MAP && fypc_last->map.await_key; +} + +bool fy_path_in_mapping_value(struct fy_path *fypp) +{ + struct fy_path_component *fypc_last; + + if (!fypp) + return false; + + fypc_last = fy_path_last_not_collection_root_component(fypp); + if (!fypc_last) + return false; + + return fypc_last->type == FYPCT_MAP && !fypc_last->map.await_key; +} + +bool fy_path_in_collection_root(struct fy_path *fypp) +{ + struct fy_path_component *fypc_last; + + if (!fypp) + return false; + + fypc_last = fy_path_component_list_tail(&fypp->components); + if (!fypc_last) + return false; + + return fy_path_component_is_collection_root(fypc_last); +} + +void *fy_path_get_root_user_data(struct fy_path *fypp) +{ + if (!fypp) + return NULL; + + if (!fypp->parent) + return fypp->user_data; + + return fy_path_get_root_user_data(fypp->parent); +} + +void fy_path_set_root_user_data(struct fy_path *fypp, void *data) +{ + if (!fypp) + return; + + if (!fypp->parent) { + fypp->user_data = data; + return; + } + + fy_path_set_root_user_data(fypp->parent, data); +} + +void *fy_path_component_get_mapping_user_data(struct fy_path_component *fypc) +{ + return fypc && fypc->type == FYPCT_MAP ? fypc->user_data : NULL; +} + +void *fy_path_component_get_mapping_key_user_data(struct fy_path_component *fypc) +{ + return fypc && fypc->type == FYPCT_MAP ? fypc->map.key_user_data : NULL; +} + +void *fy_path_component_get_sequence_user_data(struct fy_path_component *fypc) +{ + return fypc && fypc->type == FYPCT_SEQ ? fypc->user_data : NULL; +} + +void fy_path_component_set_mapping_user_data(struct fy_path_component *fypc, void *data) +{ + if (!fypc || fypc->type != FYPCT_MAP) + return; + + fypc->user_data = data; +} + +void fy_path_component_set_mapping_key_user_data(struct fy_path_component *fypc, void *data) +{ + if (!fypc || fypc->type != FYPCT_MAP) + return; + + fypc->map.key_user_data = data; +} + +void fy_path_component_set_sequence_user_data(struct fy_path_component *fypc, void *data) +{ + if (!fypc || fypc->type != FYPCT_SEQ) + return; + + fypc->user_data = data; +} diff --git a/contrib/libs/libfyaml/src/lib/fy-path.h b/contrib/libs/libfyaml/src/lib/fy-path.h new file mode 100644 index 0000000000..e551d5a7e4 --- /dev/null +++ b/contrib/libs/libfyaml/src/lib/fy-path.h @@ -0,0 +1,114 @@ +/* + * fy-path.h - YAML parser private path definitions + * + * Copyright (c) 2021 Pantelis Antoniou <pantelis.antoniou@konsulko.com> + * + * SPDX-License-Identifier: MIT + */ +#ifndef FY_PATH_H +#define FY_PATH_H + +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif + +#include <stdbool.h> + +#include <libfyaml.h> + +#include "fy-list.h" +#include "fy-typelist.h" + +#include "fy-emit-accum.h" + +FY_TYPE_FWD_DECL_LIST(path_component); + +enum fy_path_component_type { + FYPCT_NONE, /* not yet instantiated */ + FYPCT_MAP, /* it's a mapping */ + FYPCT_SEQ, /* it's a sequence */ +}; + +/* fwd declaration */ +struct fy_document; +struct fy_document_builder; + +#define FY_PATH_MAPPING_SHORT_KEY 32 + +struct fy_path_mapping_state { + bool root : 1; /* no keys, values yet */ + bool await_key : 1; + bool accumulating_complex_key : 1; + bool has_key : 1; /* has a key */ + bool is_complex_key : 1; + bool complex_key_complete : 1; + union { + struct { + struct fy_token *tag; + struct fy_token *key; + } scalar; + struct fy_document *complex_key; + }; + void *key_user_data; +}; + +struct fy_path_sequence_state { + int idx; +}; + +struct fy_path_component { + struct fy_list_head node; + enum fy_path_component_type type; + union { + struct fy_path_mapping_state map; + struct fy_path_sequence_state seq; + }; + void *user_data; +}; +FY_TYPE_DECL_LIST(path_component); + +static inline bool +fy_path_component_is_collection_root(struct fy_path_component *fypc) +{ + if (!fypc) + return false; + + switch (fypc->type) { + case FYPCT_NONE: + break; + case FYPCT_SEQ: + return fypc->seq.idx < 0; + case FYPCT_MAP: + return fypc->map.root; + } + + return false; +} + +FY_TYPE_FWD_DECL_LIST(path); +struct fy_path { + struct fy_list_head node; + struct fy_path_component_list recycled_component; + struct fy_path_component_list components; + struct fy_document_builder *fydb; /* for complex keys */ + struct fy_path *parent; /* when we have a parent */ + void *user_data; +}; +FY_TYPE_DECL_LIST(path); + +struct fy_path *fy_path_create(void); +void fy_path_destroy(struct fy_path *fypp); + +void fy_path_reset(struct fy_path *fypp); + +struct fy_path_component *fy_path_component_alloc(struct fy_path *fypp); +void fy_path_component_cleanup(struct fy_path_component *fypc); +void fy_path_component_free(struct fy_path_component *fypc); +void fy_path_component_destroy(struct fy_path_component *fypc); +void fy_path_component_recycle(struct fy_path *fypp, struct fy_path_component *fypc); +void fy_path_component_clear_state(struct fy_path_component *fypc); + +struct fy_path_component *fy_path_component_create_mapping(struct fy_path *fypp); +struct fy_path_component *fy_path_component_create_sequence(struct fy_path *fypp); + +#endif diff --git a/contrib/libs/libfyaml/src/lib/fy-token.c b/contrib/libs/libfyaml/src/lib/fy-token.c new file mode 100644 index 0000000000..91efa89f33 --- /dev/null +++ b/contrib/libs/libfyaml/src/lib/fy-token.c @@ -0,0 +1,1869 @@ +/* + * fy-token.c - YAML token methods + * + * Copyright (c) 2019 Pantelis Antoniou <pantelis.antoniou@konsulko.com> + * + * SPDX-License-Identifier: MIT + */ +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif + +#include <stdio.h> +#include <string.h> +#include <sys/types.h> +#include <sys/stat.h> +#include <assert.h> +#include <stdlib.h> +#include <errno.h> +#include <stdarg.h> + +#include <libfyaml.h> + +#include "fy-parse.h" + +#include "fy-ctype.h" +#include "fy-utf8.h" +#include "fy-emit-accum.h" + +#include "fy-walk.h" + +#include "fy-token.h" + +enum fy_scalar_style fy_token_scalar_style(struct fy_token *fyt) +{ + return fy_token_scalar_style_inline(fyt); +} + +enum fy_token_type fy_token_get_type(struct fy_token *fyt) +{ + return fy_token_get_type_inline(fyt); +} + +void fy_token_clean_rl(struct fy_token_list *fytl, struct fy_token *fyt) +{ + int i; + + if (!fyt) + return; + + /* release reference */ + fy_input_unref(fyt->handle.fyi); + fyt->handle.fyi = NULL; + + /* release comment references */ + if (fyt->comment) { + for (i = 0; i < fycp_max; i++) + fy_input_unref(fyt->comment[i].fyi); + free(fyt->comment); + fyt->comment = NULL; + } + + switch (fyt->type) { + case FYTT_TAG: + fy_token_unref(fyt->tag.fyt_td); + fyt->tag.fyt_td = NULL; + if (fyt->tag.handle0) { + free(fyt->tag.handle0); + fyt->tag.handle0 = NULL; + } + if (fyt->tag.suffix0) { + free(fyt->tag.suffix0); + fyt->tag.suffix0 = NULL; + } + break; + + case FYTT_TAG_DIRECTIVE: + if (fyt->tag_directive.prefix0) { + free(fyt->tag_directive.prefix0); + fyt->tag_directive.prefix0 = NULL; + } + if (fyt->tag_directive.handle0) { + free(fyt->tag_directive.handle0); + fyt->tag_directive.handle0 = NULL; + } + break; + + case FYTT_PE_MAP_KEY: + fy_document_destroy(fyt->map_key.fyd); + fyt->map_key.fyd = NULL; + break; + + case FYTT_SCALAR: + if (fyt->scalar.path_key_storage) { + free(fyt->scalar.path_key_storage); + fyt->scalar.path_key_storage = NULL; + } + break; + + case FYTT_ALIAS: + if (fyt->alias.expr) { + fy_path_expr_free(fyt->alias.expr); + fyt->alias.expr = NULL; + } + break; + + default: + break; + } + + if (fyt->text0) { + free(fyt->text0); + fyt->text0 = NULL; + } + + fyt->type = FYTT_NONE; + fyt->analyze_flags = 0; + fyt->text_len = 0; + fyt->text = NULL; +} + +void fy_token_list_unref_all_rl(struct fy_token_list *fytl, struct fy_token_list *fytl_tofree) +{ + struct fy_token *fyt; + + while ((fyt = fy_token_list_pop(fytl_tofree)) != NULL) + fy_token_unref_rl(fytl, fyt); +} + +static bool fy_token_text_needs_rebuild(struct fy_token *fyt) +{ + const struct fy_atom *fya; + + if (!fy_token_text_is_direct(fyt)) + return false; + + fya = fy_token_atom(fyt); + if (!fya || !fya->fyi) + return false; + + return fya->fyi_generation != fya->fyi->generation; +} + +static int fy_tag_token_format_internal(const struct fy_token *fyt, void *out, size_t *outszp) +{ + char *o = NULL, *oe = NULL; + size_t outsz; + const char *handle, *suffix; + size_t handle_size, suffix_size; + int len, code_length, rlen; + uint8_t code[4]; + const char *t, *s, *e; + + if (!fyt || fyt->type != FYTT_TAG) + return 0; + + if (out && *outszp <= 0) + return 0; + + if (out) { + outsz = *outszp; + o = out; + oe = (char *)out + outsz; + } + + if (!fyt->tag.fyt_td) + return -1; + + handle = fy_tag_directive_token_prefix(fyt->tag.fyt_td, &handle_size); + if (!handle) + return -1; + + suffix = fy_atom_data(&fyt->handle) + fyt->tag.skip + fyt->tag.handle_length; + suffix_size = fyt->tag.suffix_length; + +#define O_CPY(_src, _len) \ + do { \ + int _l = (_len); \ + if (o && _l) { \ + int _cl = _l; \ + if (_cl > (oe - o)) \ + _cl = oe - o; \ + memcpy(o, (_src), _cl); \ + o += _cl; \ + } \ + len += _l; \ + } while(0) + + len = 0; + O_CPY(handle, handle_size); + + /* escape suffix as a URI */ + s = suffix; + e = s + suffix_size; + while (s < e) { + /* find next escape */ + t = memchr(s, '%', e - s); + rlen = (t ? t : e) - s; + O_CPY(s, rlen); + + /* end of string */ + if (!t) + break; + s = t; + + code_length = sizeof(code); + t = fy_uri_esc(s, e - s, code, &code_length); + if (!t) + break; + + /* output escaped utf8 */ + O_CPY(code, code_length); + s = t; + } + +#undef O_CPY + return len; + +} + +int fy_tag_token_format_text_length(const struct fy_token *fyt) +{ + return fy_tag_token_format_internal(fyt, NULL, NULL); +} + +const char *fy_tag_token_format_text(const struct fy_token *fyt, char *buf, size_t maxsz) +{ + fy_tag_token_format_internal(fyt, buf, &maxsz); + return buf; +} + +static int fy_tag_directive_token_format_internal(const struct fy_token *fyt, + void *out, size_t *outszp) +{ + char *o = NULL, *oe = NULL; + size_t outsz; + int len; + const char *handle, *prefix; + size_t handle_size, prefix_size; + + if (!fyt || fyt->type != FYTT_TAG_DIRECTIVE) + return 0; + + if (out && *outszp <= 0) + return 0; + + if (out) { + outsz = *outszp; + o = out; + oe = (char *)out + outsz; + } + +#define O_CPY(_src, _len) \ + do { \ + int _l = (_len); \ + if (o && _l) { \ + int _cl = _l; \ + if (_cl > (oe - o)) \ + _cl = oe - o; \ + memcpy(o, (_src), _cl); \ + o += _cl; \ + } \ + len += _l; \ + } while(0) + + len = 0; + + handle = fy_atom_data(&fyt->handle); + handle_size = fy_atom_size(&fyt->handle); + + prefix = handle + handle_size - fyt->tag_directive.uri_length; + prefix_size = fyt->tag_directive.uri_length; + handle_size = fyt->tag_directive.tag_length; + + if (handle_size) + O_CPY(handle, handle_size); + else + O_CPY("!<", 2); + O_CPY(prefix, prefix_size); + if (!handle_size) + O_CPY(">", 1); + +#undef O_CPY + return len; + +} + +int fy_tag_directive_token_format_text_length(const struct fy_token *fyt) +{ + return fy_tag_directive_token_format_internal(fyt, NULL, NULL); +} + +const char *fy_tag_directive_token_format_text(const struct fy_token *fyt, char *buf, size_t maxsz) +{ + fy_tag_directive_token_format_internal(fyt, buf, &maxsz); + return buf; +} + +const char *fy_tag_directive_token_prefix(struct fy_token *fyt, size_t *lenp) +{ + const char *ptr; + size_t len; + + if (!fyt || fyt->type != FYTT_TAG_DIRECTIVE) { + *lenp = 0; + return NULL; + } + ptr = fy_atom_data(&fyt->handle); + len = fy_atom_size(&fyt->handle); + ptr = ptr + len - fyt->tag_directive.uri_length; + *lenp = fyt->tag_directive.uri_length; + + return ptr; +} + +const char *fy_tag_directive_token_prefix0(struct fy_token *fyt) +{ + char *text0; + const char *text; + size_t len; + + if (!fyt || fyt->type != FYTT_TAG_DIRECTIVE) + return NULL; + + /* use the cache if it's there (and doesn't need a rebuild) */ + if (fyt->tag_directive.prefix0 && !fy_token_text_needs_rebuild(fyt)) + return fyt->tag_directive.prefix0; + + if (fyt->tag_directive.prefix0) { + free(fyt->tag_directive.prefix0); + fyt->tag_directive.prefix0 = NULL; + } + + text = fy_tag_directive_token_prefix(fyt, &len); + if (!text) + return NULL; + + text0 = malloc(len + 1); + if (!text0) + return NULL; + memcpy(text0, text, len); + text0[len] = '\0'; + + fyt->tag_directive.prefix0 = text0; + + return fyt->tag_directive.prefix0; +} + +const char *fy_tag_directive_token_handle(struct fy_token *fyt, size_t *lenp) +{ + const char *ptr; + + if (!fyt || fyt->type != FYTT_TAG_DIRECTIVE) { + *lenp = 0; + return NULL; + } + ptr = fy_atom_data(&fyt->handle); + *lenp = fyt->tag_directive.tag_length; + return ptr; +} + +const char *fy_tag_directive_token_handle0(struct fy_token *fyt) +{ + char *text0; + const char *text; + size_t len; + + if (!fyt || fyt->type != FYTT_TAG_DIRECTIVE) + return NULL; + + /* use the cache if it's there (and doesn't need a rebuild) */ + if (fyt->tag_directive.handle0 && !fy_token_text_needs_rebuild(fyt)) + return fyt->tag_directive.handle0; + + if (fyt->tag_directive.handle0) { + free(fyt->tag_directive.handle0); + fyt->tag_directive.handle0 = NULL; + } + + text = fy_tag_directive_token_handle(fyt, &len); + if (!text) + return NULL; + + text0 = malloc(len + 1); + if (!text0) + return NULL; + memcpy(text0, text, len); + text0[len] = '\0'; + + fyt->tag_directive.handle0 = text0; + + return fyt->tag_directive.handle0; +} + +struct fy_token *fy_token_vcreate_rl(struct fy_token_list *fytl, enum fy_token_type type, va_list ap) +{ + struct fy_token *fyt = NULL; + struct fy_atom *handle; + struct fy_token *fyt_td; + + if ((unsigned int)type >= FYTT_COUNT) + goto err_out; + + fyt = fy_token_alloc_rl(fytl); + if (!fyt) + goto err_out; + fyt->type = type; + + handle = va_arg(ap, struct fy_atom *); + if (handle) + fyt->handle = *handle; + else + fy_atom_reset(&fyt->handle); + + switch (fyt->type) { + case FYTT_TAG_DIRECTIVE: + fyt->tag_directive.tag_length = va_arg(ap, unsigned int); + fyt->tag_directive.uri_length = va_arg(ap, unsigned int); + fyt->tag_directive.is_default = va_arg(ap, int) ? true : false; + fyt->tag_directive.prefix0 = NULL; + fyt->tag_directive.handle0 = NULL; + break; + case FYTT_SCALAR: + fyt->scalar.style = va_arg(ap, enum fy_scalar_style); + if (fyt->scalar.style != FYSS_ANY && (unsigned int)fyt->scalar.style >= FYSS_MAX) + goto err_out; + fyt->scalar.path_key = NULL; + fyt->scalar.path_key_len = 0; + fyt->scalar.path_key_storage = NULL; + break; + case FYTT_TAG: + fyt->tag.skip = va_arg(ap, unsigned int); + fyt->tag.handle_length = va_arg(ap, unsigned int); + fyt->tag.suffix_length = va_arg(ap, unsigned int); + + fyt_td = va_arg(ap, struct fy_token *); + if (!fyt_td) + goto err_out; + fyt->tag.fyt_td = fy_token_ref(fyt_td); + fyt->tag.handle0 = NULL; + fyt->tag.suffix0 = NULL; + break; + + case FYTT_VERSION_DIRECTIVE: + fyt->version_directive.vers = *va_arg(ap, struct fy_version *); + break; + + case FYTT_ALIAS: + fyt->alias.expr = va_arg(ap, struct fy_path_expr *); + break; + + case FYTT_KEY: + fyt->key.flow_level = va_arg(ap, int); + break; + + case FYTT_PE_MAP_KEY: + fyt->map_key.fyd = va_arg(ap, struct fy_document *); + break; + + case FYTT_PE_SEQ_INDEX: + fyt->seq_index.index = va_arg(ap, int); + break; + + case FYTT_PE_SEQ_SLICE: + fyt->seq_slice.start_index = va_arg(ap, int); + fyt->seq_slice.end_index = va_arg(ap, int); + break; + + case FYTT_NONE: + goto err_out; + + default: + break; + } + + if (fyt->handle.fyi) + fy_input_ref(fyt->handle.fyi); + + return fyt; + +err_out: + fy_token_unref(fyt); + + return NULL; +} + +struct fy_token *fy_token_create_rl(struct fy_token_list *fytl, enum fy_token_type type, ...) +{ + struct fy_token *fyt; + va_list ap; + + va_start(ap, type); + fyt = fy_token_vcreate_rl(fytl, type, ap); + va_end(ap); + + return fyt; +} + +struct fy_token *fy_token_vcreate(enum fy_token_type type, va_list ap) +{ + return fy_token_vcreate_rl(NULL, type, ap); +} + +struct fy_token *fy_token_create(enum fy_token_type type, ...) +{ + struct fy_token *fyt; + va_list ap; + + va_start(ap, type); + fyt = fy_token_vcreate_rl(NULL, type, ap); + va_end(ap); + + return fyt; +} + +struct fy_token *fy_parse_token_create(struct fy_parser *fyp, enum fy_token_type type, ...) +{ + struct fy_token *fyt; + va_list ap; + + if (!fyp) + return NULL; + + va_start(ap, type); + fyt = fy_token_vcreate_rl(fyp->recycled_token_list, type, ap); + va_end(ap); + + return fyt; +} + +int fy_token_format_text_length(struct fy_token *fyt) +{ + int length; + + if (!fyt) + return 0; + + switch (fyt->type) { + + case FYTT_TAG: + return fy_tag_token_format_text_length(fyt); + + case FYTT_TAG_DIRECTIVE: + return fy_tag_directive_token_format_text_length(fyt); + + default: + break; + } + + length = fy_atom_format_text_length(&fyt->handle); + + return length; +} + +const char *fy_token_format_text(struct fy_token *fyt, char *buf, size_t maxsz) +{ + const char *str; + + if (maxsz == 0) + return buf; + + if (!fyt) { + if (maxsz > 0) + buf[0] = '\0'; + return buf; + } + + switch (fyt->type) { + + case FYTT_TAG: + return fy_tag_token_format_text(fyt, buf, maxsz); + + case FYTT_TAG_DIRECTIVE: + return fy_tag_directive_token_format_text(fyt, buf, maxsz); + + default: + break; + } + + str = fy_atom_format_text(&fyt->handle, buf, maxsz); + + return str; +} + +int fy_token_format_utf8_length(struct fy_token *fyt) +{ + const char *str; + size_t len; + + if (!fyt) + return 0; + + switch (fyt->type) { + + case FYTT_TAG: + case FYTT_TAG_DIRECTIVE: + str = fy_token_get_text(fyt, &len); + if (!str) + return 0; + return fy_utf8_count(str, len); + + default: + break; + } + + return fy_atom_format_utf8_length(&fyt->handle); +} + + +struct fy_atom *fy_token_atom(struct fy_token *fyt) +{ + return fyt ? &fyt->handle : NULL; +} + +const struct fy_mark *fy_token_start_mark(struct fy_token *fyt) +{ + const struct fy_atom *atom; + + atom = fy_token_atom(fyt); + if (atom) + return &atom->start_mark; + + /* something we don't track */ + return NULL; +} + +const struct fy_mark *fy_token_end_mark(struct fy_token *fyt) +{ + const struct fy_atom *atom; + + atom = fy_token_atom(fyt); + if (atom) + return &atom->end_mark; + + /* something we don't track */ + return NULL; +} + +int fy_token_text_analyze(struct fy_token *fyt) +{ + const char *s, *e; + const char *value = NULL; + enum fy_atom_style style; + int c, w, cn, cp, col; + size_t len; + int flags; + + if (!fyt) + return FYTTAF_CAN_BE_SIMPLE_KEY | FYTTAF_DIRECT_OUTPUT | + FYTTAF_EMPTY | FYTTAF_CAN_BE_DOUBLE_QUOTED; + + if (fyt->analyze_flags) + return fyt->analyze_flags; + + /* only tokens that can generate text */ + if (fyt->type != FYTT_SCALAR && + fyt->type != FYTT_TAG && + fyt->type != FYTT_ANCHOR && + fyt->type != FYTT_ALIAS) { + flags = FYTTAF_NO_TEXT_TOKEN; + fyt->analyze_flags = flags; + return flags; + } + + flags = FYTTAF_TEXT_TOKEN; + + style = fy_token_atom_style(fyt); + + /* can this token be a simple key initial condition */ + if (!fy_atom_style_is_block(style) && style != FYAS_URI) + flags |= FYTTAF_CAN_BE_SIMPLE_KEY; + + /* can this token be directly output initial condition */ + if (!fy_atom_style_is_block(style)) + flags |= FYTTAF_DIRECT_OUTPUT; + + /* get value */ + value = fy_token_get_text(fyt, &len); + if (!value || len == 0) { + flags |= FYTTAF_EMPTY | FYTTAF_CAN_BE_DOUBLE_QUOTED | FYTTAF_CAN_BE_UNQUOTED_PATH_KEY; + fyt->analyze_flags = flags; + return flags; + } + + flags |= FYTTAF_CAN_BE_PLAIN | + FYTTAF_CAN_BE_SINGLE_QUOTED | + FYTTAF_CAN_BE_DOUBLE_QUOTED | + FYTTAF_CAN_BE_LITERAL | + FYTTAF_CAN_BE_LITERAL | + FYTTAF_CAN_BE_FOLDED | + FYTTAF_CAN_BE_PLAIN_FLOW | + FYTTAF_CAN_BE_UNQUOTED_PATH_KEY; + + /* start with document indicators must be quoted at indent 0 */ + if (len >= 3 && (!memcmp(value, "---", 3) || !memcmp(value, "...", 3))) + flags |= FYTTAF_QUOTE_AT_0; + + s = value; + e = value + len; + + col = 0; + + /* get first character */ + cn = fy_utf8_get(s, e - s, &w); + s += w; + col = fy_token_is_lb(fyt, cn) ? 0 : (col + 1); + + /* disable folded right off the bat, it's a pain */ + flags &= ~FYTTAF_CAN_BE_FOLDED; + + /* plain scalars can't start with any indicator (or space/lb) */ + if ((flags & (FYTTAF_CAN_BE_PLAIN | FYTTAF_CAN_BE_PLAIN_FLOW)) && + (fy_is_indicator(cn) || fy_token_is_lb(fyt, cn) || fy_is_ws(cn))) + flags &= ~(FYTTAF_CAN_BE_PLAIN | + FYTTAF_CAN_BE_PLAIN_FLOW); + + /* plain scalars in flow mode can't start with a flow indicator */ + if ((flags & FYTTAF_CAN_BE_PLAIN_FLOW) && + fy_is_flow_indicator(cn)) + flags &= ~FYTTAF_CAN_BE_PLAIN_FLOW; + + /* plain unquoted path keys can only start with [a-zA-Z_] */ + if ((flags & FYTTAF_CAN_BE_UNQUOTED_PATH_KEY) && + !fy_is_first_alpha(cn)) + flags &= ~FYTTAF_CAN_BE_UNQUOTED_PATH_KEY; + + cp = -1; + for (c = cn; c >= 0; s += w, cp = c, c = cn) { + + /* can be -1 on end */ + cn = fy_utf8_get(s, e - s, &w); + + /* zero can't be output, only in double quoted mode */ + if (c == 0) { + flags &= ~(FYTTAF_DIRECT_OUTPUT | + FYTTAF_CAN_BE_PLAIN | + FYTTAF_CAN_BE_SINGLE_QUOTED | + FYTTAF_CAN_BE_LITERAL | + FYTTAF_CAN_BE_FOLDED | + FYTTAF_CAN_BE_PLAIN_FLOW | + FYTTAF_CAN_BE_UNQUOTED_PATH_KEY); + flags |= FYTTAF_CAN_BE_DOUBLE_QUOTED; + + } else if (fy_is_ws(c)) { + + flags |= FYTTAF_HAS_WS; + if (fy_is_ws(cn)) + flags |= FYTTAF_HAS_CONSECUTIVE_WS; + + } else if (fy_token_is_lb(fyt, c)) { + + flags |= FYTTAF_HAS_LB; + if (fy_token_is_lb(fyt, cn)) + flags |= FYTTAF_HAS_CONSECUTIVE_LB; + + /* only non linebreaks can be simple keys */ + flags &= ~FYTTAF_CAN_BE_SIMPLE_KEY; + + /* anything with linebreaks, can't be direct */ + flags &= ~FYTTAF_DIRECT_OUTPUT; + } + + if ((flags & FYTTAF_CAN_BE_UNQUOTED_PATH_KEY) && !fy_is_alnum(c)) + flags &= ~FYTTAF_CAN_BE_UNQUOTED_PATH_KEY; + + /* illegal plain combination */ + if ((flags & FYTTAF_CAN_BE_PLAIN) && + ((c == ':' && fy_is_blankz_m(cn, fy_token_atom_lb_mode(fyt))) || + (fy_is_blankz_m(c, fy_token_atom_lb_mode(fyt)) && cn == '#') || + (cp < 0 && c == '#' && cn < 0) || + !fy_is_print(c))) { + flags &= ~(FYTTAF_CAN_BE_PLAIN | + FYTTAF_CAN_BE_PLAIN_FLOW); + } + + /* illegal plain flow combination */ + if ((flags & FYTTAF_CAN_BE_PLAIN_FLOW) && + (fy_is_flow_indicator(c) || (c == ':' && fy_is_flow_indicator(cn)))) + flags &= ~FYTTAF_CAN_BE_PLAIN_FLOW; + + /* non printable characters, turn off these styles */ + if ((flags & (FYTTAF_CAN_BE_SINGLE_QUOTED | + FYTTAF_CAN_BE_LITERAL | + FYTTAF_CAN_BE_FOLDED)) && !fy_is_print(c)) + flags &= ~(FYTTAF_CAN_BE_SINGLE_QUOTED | + FYTTAF_CAN_BE_LITERAL | + FYTTAF_CAN_BE_FOLDED); + + /* if there's an escape, it can't be direct */ + if ((flags & FYTTAF_DIRECT_OUTPUT) && + ((style == FYAS_URI && c == '%') || + (style == FYAS_SINGLE_QUOTED && c == '\'') || + (style == FYAS_DOUBLE_QUOTED && c == '\\'))) + flags &= ~FYTTAF_DIRECT_OUTPUT; + + col = fy_token_is_lb(fyt, c) ? 0 : (col + 1); + + /* last character */ + if (cn < 0) { + /* if ends with whitespace or linebreak, can't be plain */ + if (fy_is_ws(cn) || fy_token_is_lb(fyt, cn)) + flags &= ~(FYTTAF_CAN_BE_PLAIN | + FYTTAF_CAN_BE_PLAIN_FLOW); + } + } + + fyt->analyze_flags = flags; + return flags; +} + +const char *fy_tag_token_get_directive_handle(struct fy_token *fyt, size_t *td_handle_sizep) +{ + if (!fyt || fyt->type != FYTT_TAG || !fyt->tag.fyt_td) + return NULL; + + return fy_tag_directive_token_handle(fyt->tag.fyt_td, td_handle_sizep); +} + +const char *fy_tag_token_get_directive_prefix(struct fy_token *fyt, size_t *td_prefix_sizep) +{ + if (!fyt || fyt->type != FYTT_TAG || !fyt->tag.fyt_td) + return NULL; + + return fy_tag_directive_token_prefix(fyt->tag.fyt_td, td_prefix_sizep); +} + +const char *fy_token_get_direct_output(struct fy_token *fyt, size_t *sizep) +{ + const struct fy_atom *fya; + + fya = fy_token_atom(fyt); + if (!fya || !fya->direct_output || + (fyt->type == FYTT_TAG || fyt->type == FYTT_TAG_DIRECTIVE) ) { + *sizep = 0; + return NULL; + } + *sizep = fy_atom_size(fya); + return fy_atom_data(fya); +} + +const char *fy_tag_token_handle(struct fy_token *fyt, size_t *lenp) +{ + return fy_tag_token_get_directive_handle(fyt, lenp); +} + +const char *fy_tag_token_suffix(struct fy_token *fyt, size_t *lenp) +{ + const char *tag, *prefix, *handle, *suffix; + size_t tag_len, prefix_len, handle_len, suffix_len; + + if (!fyt || fyt->type != FYTT_TAG) { + *lenp = 0; + return NULL; + } + + tag = fy_token_get_text(fyt, &tag_len); + if (!tag) + return NULL; + prefix = fy_tag_token_get_directive_prefix(fyt, &prefix_len); + if (!prefix) + return NULL; + handle = fy_tag_token_handle(fyt, &handle_len); + if (!handle || !handle_len) { + suffix = tag; + suffix_len = tag_len; + } else { + assert(prefix_len <= tag_len); + assert(tag_len >= prefix_len); + suffix = tag + prefix_len; + suffix_len = tag_len - prefix_len; + } + *lenp = suffix_len; + return suffix; +} + +const char *fy_tag_token_handle0(struct fy_token *fyt) +{ + char *text0; + const char *text; + size_t len; + + if (!fyt || fyt->type != FYTT_TAG) + return NULL; + + /* use the cache if it's there (and doesn't need a rebuild) */ + if (fyt->tag.handle0 && !fy_token_text_needs_rebuild(fyt)) + return fyt->tag.handle0; + + if (fyt->tag.handle0) { + free(fyt->tag.handle0); + fyt->tag.handle0 = NULL; + } + + text = fy_tag_token_handle(fyt, &len); + if (!text) + return NULL; + + text0 = malloc(len + 1); + if (!text0) + return NULL; + memcpy(text0, text, len); + text0[len] = '\0'; + + fyt->tag.handle0 = text0; + + return fyt->tag.handle0; +} + +const char *fy_tag_token_suffix0(struct fy_token *fyt) +{ + char *text0; + const char *text; + size_t len; + + if (!fyt || fyt->type != FYTT_TAG) + return NULL; + + /* use the cache if it's there (and doesn't need a rebuild) */ + if (fyt->tag.suffix0 && !fy_token_text_needs_rebuild(fyt)) + return fyt->tag.suffix0; + + if (fyt->tag.suffix0) { + free(fyt->tag.suffix0); + fyt->tag.suffix0 = NULL; + } + + text = fy_tag_token_suffix(fyt, &len); + if (!text) + return NULL; + + text0 = malloc(len + 1); + if (!text0) + return NULL; + memcpy(text0, text, len); + text0[len] = '\0'; + + fyt->tag.suffix0 = text0; + + return fyt->tag.suffix0; +} + +const struct fy_version * fy_version_directive_token_version(struct fy_token *fyt) +{ + if (!fyt || fyt->type != FYTT_VERSION_DIRECTIVE) + return NULL; + return &fyt->version_directive.vers; +} + +static void fy_token_prepare_text(struct fy_token *fyt) +{ + int ret; + + assert(fyt); + + /* get text length of this token */ + ret = fy_token_format_text_length(fyt); + + /* no text on this token? */ + if (ret == -1) { + fyt->text_len = 0; + fyt->text = fyt->text0 = strdup(""); + return; + } + + fyt->text0 = malloc(ret + 1); + if (!fyt->text0) { + fyt->text_len = 0; + fyt->text = fyt->text0 = strdup(""); + return; + } + + fyt->text0[0] = '\0'; + + fyt->text_len = ret; + + fy_token_format_text(fyt, fyt->text0, ret + 1); + fyt->text0[ret] = '\0'; + + fyt->text_len = ret; + fyt->text = fyt->text0; +} + +const char *fy_token_get_text(struct fy_token *fyt, size_t *lenp) +{ + /* return empty */ + if (!fyt) { + *lenp = 0; + return ""; + } + + /* already found something */ + if (fyt->text && !fy_token_text_needs_rebuild(fyt)) { + *lenp = fyt->text_len; + return fyt->text; + } + + /* try direct output first */ + fyt->text = fy_token_get_direct_output(fyt, &fyt->text_len); + if (!fyt->text) + fy_token_prepare_text(fyt); + + *lenp = fyt->text_len; + return fyt->text; +} + +const char *fy_token_get_text0(struct fy_token *fyt) +{ + /* return empty */ + if (!fyt) + return ""; + + /* created text is always zero terminated */ + if (!fyt->text0) + fy_token_prepare_text(fyt); + + return fyt->text0; +} + +size_t fy_token_get_text_length(struct fy_token *fyt) +{ + return fy_token_format_text_length(fyt); +} + +enum comment_out_state { + cos_normal, + cos_lastnl, + cos_lastnlhash, + cos_lastnlhashspc, +}; + +const char *fy_token_get_comment(struct fy_token *fyt, char *buf, size_t maxsz, + enum fy_comment_placement which) +{ + struct fy_atom *handle; + struct fy_atom_iter iter; + const struct fy_iter_chunk *ic; + char *s, *e; + const char *ss, *ee; + int c, w, ret; + enum comment_out_state state; + bool output; + + if (!buf || maxsz == 0 || (unsigned int)which >= fycp_max) + return NULL; + + /* return empty? */ + handle = fy_token_comment_handle(fyt, which, false); + if (!handle || !fy_atom_is_set(handle)) + return NULL; + + /* start expecting # */ + state = cos_lastnl; + + s = buf; + e = s + maxsz; + fy_atom_iter_start(handle, &iter); + ic = NULL; + while ((ic = fy_atom_iter_chunk_next(&iter, ic, &ret)) != NULL) { + ss = ic->str; + ee = ss + ic->len; + + while ((c = fy_utf8_get(ss, ee - ss, &w)) > 0) { + + output = true; + switch (state) { + case cos_normal: + if (fy_is_lb_m(c, handle->lb_mode)) + state = cos_lastnl; + break; + + case cos_lastnl: + if (c == '#') { + state = cos_lastnlhash; + output = false; + break; + } + state = cos_normal; + break; + + case cos_lastnlhash: + if (c == ' ') { + state = cos_lastnlhashspc; + output = false; + break; + } + state = cos_normal; + break; + + case cos_lastnlhashspc: + state = cos_normal; + break; + } + + if (output) { + s = fy_utf8_put(s, (size_t)(e - s), c); + if (!s) + return NULL; + } + + ss += w; + } + } + fy_atom_iter_finish(&iter); + + if (ret != 0 || s >= e) + return NULL; + *s = '\0'; + + return buf; +} + +const char *fy_token_get_scalar_path_key(struct fy_token *fyt, size_t *lenp) +{ + struct fy_atom *atom; + struct fy_atom_iter iter; + struct fy_emit_accum ea; /* use an emit accumulator */ + uint8_t non_utf8[4]; + size_t non_utf8_len, k; + int c, i, w, digit; + int aflags; + + if (!fyt || fyt->type != FYTT_SCALAR) { + *lenp = 0; + return NULL; + } + + /* was it cached? return */ + if (fyt->scalar.path_key) { + *lenp = fyt->scalar.path_key_len; + return fyt->scalar.path_key; + } + + /* analyze the token */ + aflags = fy_token_text_analyze(fyt); + + /* simple one? perfect */ + if ((aflags & FYTTAF_CAN_BE_UNQUOTED_PATH_KEY) == FYTTAF_CAN_BE_UNQUOTED_PATH_KEY) { + fyt->scalar.path_key = fy_token_get_text(fyt, &fyt->scalar.path_key_len); + *lenp = fyt->scalar.path_key_len; + return fyt->scalar.path_key; + } + + /* not possible, need to quote (and escape) */ + + /* no atom? i.e. empty */ + atom = fy_token_atom(fyt); + if (!atom) { + fyt->scalar.path_key = ""; + fyt->scalar.path_key_len = 0; + *lenp = 0; + return fyt->scalar.path_key; + } + + /* no inplace buffer; we will need the malloc'ed contents anyway */ + fy_emit_accum_init(&ea, NULL, 0, 0, fylb_cr_nl); + + fy_atom_iter_start(atom, &iter); + fy_emit_accum_start(&ea, 0, fy_token_atom_lb_mode(fyt)); + + /* output in quoted form */ + fy_emit_accum_utf8_put(&ea, '"'); + + for (;;) { + non_utf8_len = sizeof(non_utf8); + c = fy_atom_iter_utf8_quoted_get(&iter, &non_utf8_len, non_utf8); + if (c < 0) + break; + + if (c == 0 && non_utf8_len > 0) { + for (k = 0; k < non_utf8_len; k++) { + c = (int)non_utf8[k] & 0xff; + fy_emit_accum_utf8_put(&ea, '\\'); + fy_emit_accum_utf8_put(&ea, 'x'); + digit = ((unsigned int)c >> 4) & 15; + fy_emit_accum_utf8_put(&ea, + digit <= 9 ? ('0' + digit) : ('A' + digit - 10)); + digit = (unsigned int)c & 15; + fy_emit_accum_utf8_put(&ea, + digit <= 9 ? ('0' + digit) : ('A' + digit - 10)); + } + continue; + } + + if (!fy_is_printq(c) || c == '"' || c == '\\') { + + fy_emit_accum_utf8_put(&ea, '\\'); + + switch (c) { + + /* common YAML & JSON escapes */ + case '\b': + fy_emit_accum_utf8_put(&ea, 'b'); + break; + case '\f': + fy_emit_accum_utf8_put(&ea, 'f'); + break; + case '\n': + fy_emit_accum_utf8_put(&ea, 'n'); + break; + case '\r': + fy_emit_accum_utf8_put(&ea, 'r'); + break; + case '\t': + fy_emit_accum_utf8_put(&ea, 't'); + break; + case '"': + fy_emit_accum_utf8_put(&ea, '"'); + break; + case '\\': + fy_emit_accum_utf8_put(&ea, '\\'); + break; + + /* YAML only escapes */ + case '\0': + fy_emit_accum_utf8_put(&ea, '0'); + break; + case '\a': + fy_emit_accum_utf8_put(&ea, 'a'); + break; + case '\v': + fy_emit_accum_utf8_put(&ea, 'v'); + break; + case '\e': + fy_emit_accum_utf8_put(&ea, 'e'); + break; + case 0x85: + fy_emit_accum_utf8_put(&ea, 'N'); + break; + case 0xa0: + fy_emit_accum_utf8_put(&ea, '_'); + break; + case 0x2028: + fy_emit_accum_utf8_put(&ea, 'L'); + break; + case 0x2029: + fy_emit_accum_utf8_put(&ea, 'P'); + break; + + default: + /* any kind of binary value */ + if ((unsigned int)c <= 0xff) { + fy_emit_accum_utf8_put(&ea, 'x'); + w = 2; + } else if ((unsigned int)c <= 0xffff) { + fy_emit_accum_utf8_put(&ea, 'u'); + w = 4; + } else if ((unsigned int)c <= 0xffffffff) { + fy_emit_accum_utf8_put(&ea, 'U'); + w = 8; + } + + for (i = w - 1; i >= 0; i--) { + digit = ((unsigned int)c >> (i * 4)) & 15; + fy_emit_accum_utf8_put(&ea, + digit <= 9 ? ('0' + digit) : ('A' + digit - 10)); + } + break; + } + + continue; + } + + /* regular character */ + fy_emit_accum_utf8_put(&ea, c); + } + + fy_atom_iter_finish(&iter); + + /* closing quote */ + fy_emit_accum_utf8_put(&ea, '"'); + + fy_emit_accum_make_0_terminated(&ea); + + /* get the output (note it's now NULL terminated) */ + fyt->scalar.path_key_storage = fy_emit_accum_steal(&ea, &fyt->scalar.path_key_len); + fyt->scalar.path_key = fyt->scalar.path_key_storage; + fy_emit_accum_cleanup(&ea); + + *lenp = fyt->scalar.path_key_len; + + return fyt->scalar.path_key; +} + +size_t fy_token_get_scalar_path_key_length(struct fy_token *fyt) +{ + const char *text; + size_t len; + + text = fy_token_get_scalar_path_key(fyt, &len); + if (!text) + return 0; + return len; +} + +const char *fy_token_get_scalar_path_key0(struct fy_token *fyt) +{ + const char *text; + size_t len; + + if (!fyt || fyt->type != FYTT_SCALAR) { + return NULL; + } + + /* storage is \0 terminated */ + if (fyt->scalar.path_key_storage) + return fyt->scalar.path_key_storage; + + text = fyt->scalar.path_key; + len = fyt->scalar.path_key_len; + if (!text) + text = fy_token_get_scalar_path_key(fyt, &len); + + /* something is catastrophically wrong */ + if (!text) + return NULL; + + if (fyt->scalar.path_key_storage) + return fyt->scalar.path_key_storage; + + fyt->scalar.path_key_storage = malloc(len + 1); + if (!fyt->scalar.path_key_storage) + return NULL; + + memcpy(fyt->scalar.path_key_storage, text, len); + fyt->scalar.path_key_storage[len] = '\0'; + + return fyt->scalar.path_key_storage; +} + +unsigned int fy_analyze_scalar_content(const char *data, size_t size, + bool json_mode, enum fy_lb_mode lb_mode, enum fy_flow_ws_mode fws_mode) +{ + const char *s, *e; + int c, lastc, nextc, w, ww, col, break_run; + unsigned int flags; + bool first; + + flags = FYACF_EMPTY | FYACF_BLOCK_PLAIN | FYACF_FLOW_PLAIN | + FYACF_PRINTABLE | FYACF_SINGLE_QUOTED | FYACF_DOUBLE_QUOTED | + FYACF_SIZE0 | FYACF_VALID_ANCHOR; + + s = data; + e = data + size; + + col = 0; + first = true; + lastc = -1; + break_run = 0; + while (s < e && (c = fy_utf8_get(s, e - s, &w)) >= 0) { + + flags &= ~FYACF_SIZE0; + + lastc = c; + + if (first) { + if (fy_is_ws(c)) + flags |= FYACF_STARTS_WITH_WS; + else if (fy_is_generic_lb_m(c, lb_mode)) + flags |= FYACF_STARTS_WITH_LB; + /* scalars starting with & or * must be quoted */ + if (c == '&' || c == '*') + flags &= ~(FYACF_BLOCK_PLAIN | FYACF_FLOW_PLAIN); + first = false; + } + nextc = fy_utf8_get(s + w, e - (s + w), &ww); + + /* anything other than white space or linebreak */ + if ((flags & FYACF_EMPTY) && + !fy_is_ws(c) && !fy_is_generic_lb_m(c, lb_mode)) + flags &= ~FYACF_EMPTY; + + if ((flags & FYACF_VALID_ANCHOR) && + (fy_utf8_strchr(",[]{}&*:", c) || fy_is_ws(c) || + fy_is_any_lb(c) || fy_is_unicode_control(c) || + fy_is_unicode_space(c))) + flags &= ~FYACF_VALID_ANCHOR; + + /* linebreak */ + if (fy_is_generic_lb_m(c, lb_mode)) { + flags |= FYACF_LB; + if (!(flags & FYACF_CONSECUTIVE_LB) && + fy_is_generic_lb_m(nextc, lb_mode)) + flags |= FYACF_CONSECUTIVE_LB; + break_run++; + } else + break_run = 0; + + /* white space */ + if (!(flags & FYACF_WS) && fy_is_ws(c)) { + flags |= FYACF_WS; + flags &= ~FYACF_VALID_ANCHOR; + } + + /* anything not printable (or \r, \n) */ + if ((flags & FYACF_PRINTABLE) && + !fy_is_printq(c)) { + flags &= ~FYACF_PRINTABLE; + flags &= ~(FYACF_BLOCK_PLAIN | FYACF_FLOW_PLAIN | + FYACF_SINGLE_QUOTED | FYACF_VALID_ANCHOR); + } + + /* check for document indicators (at column 0) */ + if (!(flags & FYACF_DOC_IND) && + ((col == 0 && (e - s) >= 3 && + (!strncmp(s, "---", 3) || !strncmp(s, "...", 3))))) { + flags |= FYACF_DOC_IND; + flags &= ~(FYACF_BLOCK_PLAIN | FYACF_FLOW_PLAIN | FYACF_VALID_ANCHOR); + } + + /* comment indicator can't be present after a space or lb */ + /* : followed by blank can't be any plain */ + if (flags & (FYACF_BLOCK_PLAIN | FYACF_FLOW_PLAIN) && + (((fy_is_blank(c) || fy_is_generic_lb_m(c, lb_mode)) && nextc == '#') || + (c == ':' && fy_is_blankz_m(nextc, lb_mode)))) + flags &= ~(FYACF_BLOCK_PLAIN | FYACF_FLOW_PLAIN); + + /* : followed by flow markers can't be a plain in flow context */ + if ((flags & FYACF_FLOW_PLAIN) && + (fy_utf8_strchr(",[]{}", c) || (c == ':' && fy_utf8_strchr(",[]{}", nextc)))) + flags &= ~FYACF_FLOW_PLAIN; + + if (!(flags & FYACF_JSON_ESCAPE) && !fy_is_json_unescaped(c)) + flags |= FYACF_JSON_ESCAPE; + + if (fy_is_generic_lb_m(c, lb_mode)) + col = 0; + else + col++; + + s += w; + } + + /* this contains arbitrary binany values, mark it as such */ + if (s < e) + return FYACF_DOUBLE_QUOTED; + + if (fy_is_ws(lastc)) + flags |= FYACF_ENDS_WITH_WS; + else if (fy_is_generic_lb_m(lastc, lb_mode)) + flags |= FYACF_ENDS_WITH_LB; + + if (break_run > 1) + flags |= FYACF_TRAILING_LB; + + return flags; +} + +char *fy_token_debug_text(struct fy_token *fyt) +{ + const char *typetxt; + const char *text; + char *buf; + size_t length; + int wlen; + int rc __FY_DEBUG_UNUSED__; + + if (!fyt || !fy_token_type_is_valid(fyt->type)) { + typetxt = "<NULL>"; + goto out; + } + typetxt = fy_token_type_txt[fyt->type]; + + /* should never happen really */ + assert(typetxt); + +out: + text = fy_token_get_text(fyt, &length); + + wlen = length > 8 ? 8 : length; + + rc = asprintf(&buf, "%s:%.*s%s", typetxt, wlen, text, wlen < (int)length ? "..." : ""); + assert(rc != -1); + + return buf; +} + +int fy_token_memcmp(struct fy_token *fyt, const void *ptr, size_t len) +{ + const char *value = NULL; + size_t tlen = 0; + + /* special zero length handling */ + if (len == 0 && fyt && fy_token_get_text_length(fyt) == 0) + return 0; + + /* handle NULL cases */ + if (!fyt && (!ptr || !len)) + return 0; + + if (!fyt && (ptr || len)) + return -1; + + if (fyt && (!ptr || !len)) + return 1; + + /* those two are special */ + if (fyt->type == FYTT_TAG || fyt->type == FYTT_TAG_DIRECTIVE) { + value = fy_token_get_text(fyt, &tlen); + if (!value) + return -1; + return tlen == len ? memcmp(value, ptr, tlen) : tlen < len ? -1 : 1; + } + + return fy_atom_memcmp(fy_token_atom(fyt), ptr, len); +} + +int fy_token_strcmp(struct fy_token *fyt, const char *str) +{ + size_t len; + + len = str ? strlen(str) : 0; + + return fy_token_memcmp(fyt, str, len); +} + +int fy_token_cmp(struct fy_token *fyt1, struct fy_token *fyt2) +{ + const char *t1, *t2; + size_t l1, l2, l; + int ret; + + /* handles both NULL */ + if (fyt1 == fyt2) + return 0; + + /* fyt1 is null, 2 wins */ + if (!fyt1 && fyt2) + return -1; + + /* fyt2 is null, 1 wins */ + if (fyt1 && !fyt2) + return 1; + + /* tokens with different types can't be equal */ + if (fyt1->type != fyt2->type) + return fyt2->type > fyt1->type ? -1 : 1; + + /* special case, these can't use the atom comparisons */ + if (fyt1->type == FYTT_TAG || fyt1->type == FYTT_TAG_DIRECTIVE) { + t1 = fy_token_get_text(fyt1, &l1); + t2 = fy_token_get_text(fyt2, &l2); + l = l1 > l2 ? l2 : l1; + ret = memcmp(t1, t2, l); + if (ret) + return ret; + return l1 == l2 ? 0 : l2 > l1 ? -1 : 1; + } + + /* just pass it to the atom comparison methods */ + return fy_atom_cmp(fy_token_atom(fyt1), fy_token_atom(fyt2)); +} + +void fy_token_iter_start(struct fy_token *fyt, struct fy_token_iter *iter) +{ + if (!iter) + return; + + memset(iter, 0, sizeof(*iter)); + + iter->unget_c = -1; + + if (!fyt) + return; + + iter->fyt = fyt; + + /* TAG or TAG_DIRECTIVE may only work by getting the text */ + if (fyt->type == FYTT_TAG || fyt->type == FYTT_TAG_DIRECTIVE) + iter->ic.str = fy_token_get_text(fyt, &iter->ic.len); + else /* try the direct output next */ + iter->ic.str = fy_token_get_direct_output(fyt, &iter->ic.len); + + /* got it */ + if (iter->ic.str) { + memset(&iter->atom_iter, 0, sizeof(iter->atom_iter)); + return; + } + + assert(fyt->type != FYTT_TAG && fyt->type != FYTT_TAG_DIRECTIVE); + + /* fall back to the atom iterator */ + fy_atom_iter_start(fy_token_atom(fyt), &iter->atom_iter); +} + +void fy_token_iter_finish(struct fy_token_iter *iter) +{ + if (!iter) + return; + + if (!iter->ic.str) + fy_atom_iter_finish(&iter->atom_iter); +} + +struct fy_token_iter * +fy_token_iter_create(struct fy_token *fyt) +{ + struct fy_token_iter *iter; + + iter = malloc(sizeof(*iter)); + if (!iter) + return NULL; + fy_token_iter_start(fyt, iter); + return iter; +} + +void fy_token_iter_destroy(struct fy_token_iter *iter) +{ + if (!iter) + return; + + fy_token_iter_finish(iter); + free(iter); +} + +const struct fy_iter_chunk *fy_token_iter_peek_chunk(struct fy_token_iter *iter) +{ + if (!iter) + return NULL; + + /* direct mode? */ + if (iter->ic.str) + return &iter->ic; + + /* fallback to the atom iterator */ + return fy_atom_iter_peek_chunk(&iter->atom_iter); +} + +void fy_token_iter_advance(struct fy_token_iter *iter, size_t len) +{ + if (!iter) + return; + + /* direct mode? */ + if (iter->ic.str) { + if (len > iter->ic.len) + len = iter->ic.len; + iter->ic.str += len; + iter->ic.len -= len; + return; + } + + /* fallback to the atom iterator */ + fy_atom_iter_advance(&iter->atom_iter, len); +} + +const struct fy_iter_chunk * +fy_token_iter_chunk_next(struct fy_token_iter *iter, const struct fy_iter_chunk *curr, int *errp) +{ + if (!iter) + return NULL; + + if (errp) + *errp = 0; + + /* first time in */ + if (!curr) { + if (iter->ic.str) + return iter->ic.len ? &iter->ic : NULL; + return fy_atom_iter_chunk_next(&iter->atom_iter, NULL, errp); + } + + /* direct, all consumed */ + if (curr == &iter->ic) { + iter->ic.str += iter->ic.len; + iter->ic.len = 0; + return NULL; + } + + /* fallback */ + return fy_atom_iter_chunk_next(&iter->atom_iter, curr, errp); +} + +ssize_t fy_token_iter_read(struct fy_token_iter *iter, void *buf, size_t count) +{ + if (!iter || !buf) + return -1; + + /* direct mode */ + if (iter->ic.str) { + if (count > iter->ic.len) + count = iter->ic.len; + memcpy(buf, iter->ic.str, count); + iter->ic.str += count; + iter->ic.len -= count; + return count; + } + + return fy_atom_iter_read(&iter->atom_iter, buf, count); +} + +int fy_token_iter_getc(struct fy_token_iter *iter) +{ + int c; + + if (!iter) + return -1; + + /* first try the pushed ungetc */ + if (iter->unget_c != -1) { + c = iter->unget_c; + iter->unget_c = -1; + return c; + } + + /* direct mode */ + if (iter->ic.str) { + if (!iter->ic.len) + return -1; + c = *iter->ic.str++; + iter->ic.len--; + return c; + } + + return fy_atom_iter_getc(&iter->atom_iter); +} + +int fy_token_iter_ungetc(struct fy_token_iter *iter, int c) +{ + if (iter->unget_c != -1) + return -1; + if (c == -1) { + iter->unget_c = -1; + return 0; + } + iter->unget_c = c & 0xff; + return c & 0xff; +} + +int fy_token_iter_peekc(struct fy_token_iter *iter) +{ + int c; + + c = fy_token_iter_getc(iter); + if (c == -1) + return -1; + + return fy_token_iter_ungetc(iter, c); +} + +int fy_token_iter_utf8_get(struct fy_token_iter *iter) +{ + int c, w, w1; + + /* first try the pushed ungetc */ + if (iter->unget_c != -1) { + c = iter->unget_c; + iter->unget_c = -1; + return c; + } + + /* direct */ + if (iter->ic.str) { + + /* not even 1 octet */ + if (!iter->ic.len) + return -1; + + /* get width by the first octet */ + w = fy_utf8_width_by_first_octet((uint8_t)*iter->ic.str); + if (!w || (unsigned int)w > iter->ic.len) + return -1; + + /* get the next character */ + c = fy_utf8_get(iter->ic.str, w, &w1); + + iter->ic.str += w; + iter->ic.len -= w; + + return c; + } + + return fy_atom_iter_utf8_get(&iter->atom_iter); +} + +int fy_token_iter_utf8_unget(struct fy_token_iter *iter, int c) +{ + if (iter->unget_c != -1) + return -1; + + if (c == -1) { + iter->unget_c = -1; + return 0; + } + + iter->unget_c = c; + return c; +} + +int fy_token_iter_utf8_peek(struct fy_token_iter *iter) +{ + int c; + + c = fy_token_iter_utf8_get(iter); + if (c == -1) + return -1; + + return fy_token_iter_utf8_unget(iter, c); +} + +enum fy_scalar_style +fy_scalar_token_get_style(struct fy_token *fyt) +{ + if (!fyt || fyt->type != FYTT_SCALAR) + return FYSS_ANY; + return fyt->scalar.style; +} + +const struct fy_tag *fy_tag_token_tag(struct fy_token *fyt) +{ + if (!fyt || fyt->type != FYTT_TAG) + return NULL; + + /* always refresh, should be relatively infrequent */ + fyt->tag.tag.handle = fy_tag_token_handle0(fyt); + fyt->tag.tag.prefix = fy_tag_token_suffix0(fyt); + + return &fyt->tag.tag; +} + +const struct fy_tag * +fy_tag_directive_token_tag(struct fy_token *fyt) +{ + if (!fyt || fyt->type != FYTT_TAG_DIRECTIVE) + return NULL; + + /* always refresh, should be relatively infrequent */ + fyt->tag_directive.tag.handle = fy_tag_directive_token_handle0(fyt); + fyt->tag_directive.tag.prefix = fy_tag_directive_token_prefix0(fyt); + + return &fyt->tag_directive.tag; +} + +struct fy_atom *fy_token_comment_handle(struct fy_token *fyt, enum fy_comment_placement placement, bool alloc) +{ + struct fy_atom *handle; + size_t size; + + if (!fyt || (unsigned int)placement >= fycp_max) + return NULL; + + if (!fyt->comment) { + if (!alloc) + return NULL; + + size = sizeof(*fyt->comment) * fycp_max; + fyt->comment = malloc(size); + if (!fyt->comment) + return NULL; + memset(fyt->comment, 0, size); + } + handle = &fyt->comment[placement]; + + return handle; +} + +bool fy_token_has_any_comment(struct fy_token *fyt) +{ + struct fy_atom *handle; + enum fy_comment_placement placement; + + if (!fyt || !fyt->comment) + return false; + + for (placement = fycp_top; placement <= fycp_bottom; placement++) { + handle = &fyt->comment[placement]; + if (fy_atom_is_set(handle)) + return true; + } + return false; +} diff --git a/contrib/libs/libfyaml/src/lib/fy-token.h b/contrib/libs/libfyaml/src/lib/fy-token.h new file mode 100644 index 0000000000..8f904095f5 --- /dev/null +++ b/contrib/libs/libfyaml/src/lib/fy-token.h @@ -0,0 +1,522 @@ +/* + * fy-token.h - YAML token methods header + * + * Copyright (c) 2019 Pantelis Antoniou <pantelis.antoniou@konsulko.com> + * + * SPDX-License-Identifier: MIT + */ +#ifndef FY_TOKEN_H +#define FY_TOKEN_H + +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif + +#include <stdint.h> +#include <stdbool.h> +#include <stdio.h> +#include <stdarg.h> + +#include <libfyaml.h> + +#include "fy-utils.h" +#include "fy-atom.h" + +extern const char *fy_token_type_txt[FYTT_COUNT]; + +struct fy_document; +struct fy_path_expr; + +static inline bool fy_token_type_is_sequence_start(enum fy_token_type type) +{ + return type == FYTT_BLOCK_SEQUENCE_START || type == FYTT_FLOW_SEQUENCE_START; +} + +static inline bool fy_token_type_is_sequence_end(enum fy_token_type type) +{ + return type == FYTT_BLOCK_SEQUENCE_START || type == FYTT_FLOW_SEQUENCE_START; +} + +static inline bool fy_token_type_is_sequence_marker(enum fy_token_type type) +{ + return fy_token_type_is_sequence_start(type) || fy_token_type_is_sequence_end(type); +} + +static inline bool fy_token_type_is_mapping_start(enum fy_token_type type) +{ + return type == FYTT_BLOCK_MAPPING_START || type == FYTT_FLOW_MAPPING_START; +} + +static inline bool fy_token_type_is_mapping_end(enum fy_token_type type) +{ + return type == FYTT_BLOCK_MAPPING_START || type == FYTT_FLOW_MAPPING_START; +} + +static inline bool fy_token_type_is_mapping_marker(enum fy_token_type type) +{ + return fy_token_type_is_mapping_start(type) || fy_token_type_is_mapping_end(type); +} + +/* analyze content flags */ +#define FYACF_EMPTY 0x000001 /* is empty (only ws & lb) */ +#define FYACF_LB 0x000002 /* has a linebreak */ +#define FYACF_BLOCK_PLAIN 0x000004 /* can be a plain scalar in block context */ +#define FYACF_FLOW_PLAIN 0x000008 /* can be a plain scalar in flow context */ +#define FYACF_PRINTABLE 0x000010 /* every character is printable */ +#define FYACF_SINGLE_QUOTED 0x000020 /* can be a single quoted scalar */ +#define FYACF_DOUBLE_QUOTED 0x000040 /* can be a double quoted scalar */ +#define FYACF_CONTAINS_ZERO 0x000080 /* contains a zero */ +#define FYACF_DOC_IND 0x000100 /* contains document indicators */ +#define FYACF_CONSECUTIVE_LB 0x000200 /* has consecutive linebreaks */ +#define FYACF_SIMPLE_KEY 0x000400 /* can be a simple key */ +#define FYACF_WS 0x000800 /* has at least one whitespace */ +#define FYACF_STARTS_WITH_WS 0x001000 /* starts with whitespace */ +#define FYACF_STARTS_WITH_LB 0x002000 /* starts with whitespace */ +#define FYACF_ENDS_WITH_WS 0x004000 /* ends with whitespace */ +#define FYACF_ENDS_WITH_LB 0x008000 /* ends with linebreak */ +#define FYACF_TRAILING_LB 0x010000 /* ends with trailing lb > 1 */ +#define FYACF_SIZE0 0x020000 /* contains absolutely nothing */ +#define FYACF_VALID_ANCHOR 0x040000 /* contains valid anchor (without & prefix) */ +#define FYACF_JSON_ESCAPE 0x080000 /* contains a character that JSON escapes */ + +FY_TYPE_FWD_DECL_LIST(token); +struct fy_token { + struct fy_list_head node; + enum fy_token_type type; + int refs; /* when on document, we switch to reference counting */ + int analyze_flags; /* cache of the analysis flags */ + size_t text_len; + const char *text; + char *text0; /* this is allocated */ + struct fy_atom handle; + struct fy_atom *comment; /* only when enabled */ + union { + struct { + unsigned int tag_length; /* from start */ + unsigned int uri_length; /* from end */ + char *prefix0; + char *handle0; + struct fy_tag tag; + bool is_default; /* true when default */ + } tag_directive; + struct { + enum fy_scalar_style style; + /* path key (if requested only) */ + const char *path_key; + size_t path_key_len; + char *path_key_storage; /* if this is not null, it's \0 terminated */ + } scalar; + struct { + unsigned int skip; + unsigned int handle_length; + unsigned int suffix_length; + struct fy_token *fyt_td; + char *handle0; /* zero terminated and allocated, only used by binding */ + char *suffix0; + struct fy_tag tag; /* prefix is now suffix */ + } tag; + struct { + struct fy_version vers; /* parsed version number */ + } version_directive; + /* path expressions */ + struct { + struct fy_document *fyd; /* when key is complex */ + } map_key; + struct { + int index; + } seq_index; + struct { + int start_index; + int end_index; + } seq_slice; + struct { + struct fy_path_expr *expr; + } alias; + struct { + int flow_level; + } key; + }; +}; +FY_TYPE_DECL_LIST(token); + +static inline bool fy_token_text_is_direct(struct fy_token *fyt) +{ + if (!fyt || !fyt->text) + return false; + return fyt->text && fyt->text != fyt->text0; +} + +void fy_token_clean_rl(struct fy_token_list *fytl, struct fy_token *fyt); +void fy_token_list_unref_all_rl(struct fy_token_list *fytl, struct fy_token_list *fytl_tofree); + +static inline FY_ALWAYS_INLINE struct fy_token * +fy_token_alloc_rl(struct fy_token_list *fytl) +{ + struct fy_token *fyt; + + fyt = NULL; + if (fytl) + fyt = fy_token_list_pop(fytl); + if (!fyt) { + fyt = malloc(sizeof(*fyt)); + if (!fyt) + return NULL; + } + + fyt->type = FYTT_NONE; + fyt->refs = 1; + + fyt->analyze_flags = 0; + fyt->text_len = 0; + fyt->text = NULL; + fyt->text0 = NULL; + fyt->handle.fyi = NULL; + fyt->comment = NULL; + + return fyt; +} + +static inline FY_ALWAYS_INLINE void +fy_token_free_rl(struct fy_token_list *fytl, struct fy_token *fyt) +{ + if (!fyt) + return; + + fy_token_clean_rl(fytl, fyt); + + if (fytl) + fy_token_list_push(fytl, fyt); + else + free(fyt); +} + +static inline FY_ALWAYS_INLINE void +fy_token_unref_rl(struct fy_token_list *fytl, struct fy_token *fyt) +{ + if (!fyt) + return; + + assert(fyt->refs > 0); + + if (--fyt->refs == 0) + fy_token_free_rl(fytl, fyt); +} + +static inline FY_ALWAYS_INLINE struct fy_token * +fy_token_alloc(void) +{ + return fy_token_alloc_rl(NULL); +} + +static inline FY_ALWAYS_INLINE void +fy_token_clean(struct fy_token *fyt) +{ + return fy_token_clean_rl(NULL, fyt); +} + +static inline FY_ALWAYS_INLINE void +fy_token_free(struct fy_token *fyt) +{ + return fy_token_free_rl(NULL, fyt); +} + +static inline FY_ALWAYS_INLINE struct fy_token * +fy_token_ref(struct fy_token *fyt) +{ + /* take care of overflow */ + if (!fyt) + return NULL; + assert(fyt->refs + 1 > 0); + fyt->refs++; + + return fyt; +} + +static inline FY_ALWAYS_INLINE void +fy_token_unref(struct fy_token *fyt) +{ + return fy_token_unref_rl(NULL, fyt); +} + +static inline void +fy_token_list_unref_all(struct fy_token_list *fytl_tofree) +{ + return fy_token_list_unref_all_rl(NULL, fytl_tofree); +} + +/* recycling aware */ +struct fy_token *fy_token_vcreate_rl(struct fy_token_list *fytl, enum fy_token_type type, va_list ap); +struct fy_token *fy_token_create_rl(struct fy_token_list *fytl, enum fy_token_type type, ...); + +struct fy_token *fy_token_vcreate(enum fy_token_type type, va_list ap); +struct fy_token *fy_token_create(enum fy_token_type type, ...); + +static inline struct fy_token * +fy_token_list_vqueue(struct fy_token_list *fytl, enum fy_token_type type, va_list ap) +{ + struct fy_token *fyt; + + fyt = fy_token_vcreate(type, ap); + if (!fyt) + return NULL; + fy_token_list_add_tail(fytl, fyt); + return fyt; +} + +static inline struct fy_token * +fy_token_list_queue(struct fy_token_list *fytl, enum fy_token_type type, ...) +{ + va_list ap; + struct fy_token *fyt; + + va_start(ap, type); + fyt = fy_token_list_vqueue(fytl, type, ap); + va_end(ap); + + return fyt; +} + +int fy_tag_token_format_text_length(const struct fy_token *fyt); +const char *fy_tag_token_format_text(const struct fy_token *fyt, char *buf, size_t maxsz); +int fy_token_format_utf8_length(struct fy_token *fyt); + +int fy_token_format_text_length(struct fy_token *fyt); +const char *fy_token_format_text(struct fy_token *fyt, char *buf, size_t maxsz); + +/* non-parser token methods */ +struct fy_atom *fy_token_atom(struct fy_token *fyt); + +static inline size_t fy_token_start_pos(struct fy_token *fyt) +{ + const struct fy_mark *start_mark; + + if (!fyt) + return (size_t)-1; + + start_mark = fy_token_start_mark(fyt); + return start_mark ? start_mark->input_pos : (size_t)-1; +} + +static inline size_t fy_token_end_pos(struct fy_token *fyt) +{ + const struct fy_mark *end_mark; + + if (!fyt) + return (size_t)-1; + + end_mark = fy_token_end_mark(fyt); + return end_mark ? end_mark->input_pos : (size_t)-1; +} + +static inline int fy_token_start_line(struct fy_token *fyt) +{ + const struct fy_mark *start_mark; + + if (!fyt) + return -1; + + start_mark = fy_token_start_mark(fyt); + return start_mark ? start_mark->line : -1; +} + +static inline int fy_token_start_column(struct fy_token *fyt) +{ + const struct fy_mark *start_mark; + + if (!fyt) + return -1; + + start_mark = fy_token_start_mark(fyt); + return start_mark ? start_mark->column : -1; +} + +static inline int fy_token_end_line(struct fy_token *fyt) +{ + const struct fy_mark *end_mark; + + if (!fyt) + return -1; + + end_mark = fy_token_end_mark(fyt); + return end_mark ? end_mark->line : -1; +} + +static inline int fy_token_end_column(struct fy_token *fyt) +{ + const struct fy_mark *end_mark; + + if (!fyt) + return -1; + + end_mark = fy_token_end_mark(fyt); + return end_mark ? end_mark->column : -1; +} + +static inline bool fy_token_is_multiline(struct fy_token *fyt) +{ + const struct fy_mark *start_mark, *end_mark; + + if (!fyt) + return false; + + start_mark = fy_token_start_mark(fyt); + end_mark = fy_token_end_mark(fyt); + return start_mark && end_mark ? end_mark->line > start_mark->line : false; +} + +const char *fy_token_get_direct_output(struct fy_token *fyt, size_t *sizep); + +static inline struct fy_input *fy_token_get_input(struct fy_token *fyt) +{ + return fyt ? fyt->handle.fyi : NULL; +} + +static inline enum fy_atom_style fy_token_atom_style(struct fy_token *fyt) +{ + if (!fyt) + return FYAS_PLAIN; + + if (fyt->type == FYTT_TAG) + return FYAS_URI; + + return fyt->handle.style; +} + +static inline bool fy_token_atom_json_mode(struct fy_token *fyt) +{ + if (!fyt) + return false; + + return fy_atom_json_mode(&fyt->handle); +} + +static inline enum fy_lb_mode fy_token_atom_lb_mode(struct fy_token *fyt) +{ + if (!fyt) + return fylb_cr_nl; + + return fy_atom_lb_mode(&fyt->handle); +} + +static inline enum fy_flow_ws_mode fy_token_atom_flow_ws_mode(struct fy_token *fyt) +{ + if (!fyt) + return fyfws_space_tab; + + return fy_atom_flow_ws_mode(&fyt->handle); +} + +static inline bool fy_token_is_lb(struct fy_token *fyt, int c) +{ + if (!fyt) + return false; + + return fy_atom_is_lb(&fyt->handle, c); +} + +static inline bool fy_token_is_flow_ws(struct fy_token *fyt, int c) +{ + if (!fyt) + return false; + + return fy_atom_is_flow_ws(&fyt->handle, c); +} + +#define FYTTAF_HAS_LB FY_BIT(0) +#define FYTTAF_HAS_WS FY_BIT(1) +#define FYTTAF_HAS_CONSECUTIVE_LB FY_BIT(2) +#define FYTTAF_HAS_CONSECUTIVE_WS FY_BIT(4) +#define FYTTAF_EMPTY FY_BIT(5) +#define FYTTAF_CAN_BE_SIMPLE_KEY FY_BIT(6) +#define FYTTAF_DIRECT_OUTPUT FY_BIT(7) +#define FYTTAF_NO_TEXT_TOKEN FY_BIT(8) +#define FYTTAF_TEXT_TOKEN FY_BIT(9) +#define FYTTAF_CAN_BE_PLAIN FY_BIT(10) +#define FYTTAF_CAN_BE_SINGLE_QUOTED FY_BIT(11) +#define FYTTAF_CAN_BE_DOUBLE_QUOTED FY_BIT(12) +#define FYTTAF_CAN_BE_LITERAL FY_BIT(13) +#define FYTTAF_CAN_BE_FOLDED FY_BIT(14) +#define FYTTAF_CAN_BE_PLAIN_FLOW FY_BIT(15) +#define FYTTAF_QUOTE_AT_0 FY_BIT(16) +#define FYTTAF_CAN_BE_UNQUOTED_PATH_KEY FY_BIT(17) + +int fy_token_text_analyze(struct fy_token *fyt); + +unsigned int fy_analyze_scalar_content(const char *data, size_t size, + bool json_mode, enum fy_lb_mode lb_mode, enum fy_flow_ws_mode fws_mode); + +/* must be freed */ +char *fy_token_debug_text(struct fy_token *fyt); + +#define fy_token_debug_text_a(_fyt, _res) \ + do { \ + struct fy_token *__fyt = (_fyt); \ + char *_buf, *_rbuf = ""; \ + size_t _len; \ + _buf = fy_token_debug_text(__fyt); \ + if (_buf) { \ + _len = strlen(_buf); \ + _rbuf = FY_ALLOCA(_len + 1); \ + memcpy(_rbuf, _buf, _len + 1); \ + free(_buf); \ + } \ + *(_res) = _rbuf; \ + } while(false) + +int fy_token_memcmp(struct fy_token *fyt, const void *ptr, size_t len); +int fy_token_strcmp(struct fy_token *fyt, const char *str); +int fy_token_cmp(struct fy_token *fyt1, struct fy_token *fyt2); + +struct fy_token_iter { + struct fy_token *fyt; + struct fy_iter_chunk ic; /* direct mode */ + struct fy_atom_iter atom_iter; + int unget_c; +}; + +void fy_token_iter_start(struct fy_token *fyt, struct fy_token_iter *iter); +void fy_token_iter_finish(struct fy_token_iter *iter); + +const char *fy_tag_token_get_directive_handle(struct fy_token *fyt, size_t *td_handle_sizep); +const char *fy_tag_token_get_directive_prefix(struct fy_token *fyt, size_t *td_prefix_sizep); + +static inline bool fy_token_is_number(struct fy_token *fyt) +{ + struct fy_atom *atom; + + if (!fyt || fyt->type != FYTT_SCALAR || fyt->scalar.style != FYSS_PLAIN) + return false; + atom = fy_token_atom(fyt); + if (!atom) + return false; + return fy_atom_is_number(atom); +} + +struct fy_atom *fy_token_comment_handle(struct fy_token *fyt, enum fy_comment_placement placement, bool alloc); +bool fy_token_has_any_comment(struct fy_token *fyt); + +const char *fy_token_get_scalar_path_key(struct fy_token *fyt, size_t *lenp); +size_t fy_token_get_scalar_path_key_length(struct fy_token *fyt); +const char *fy_token_get_scalar_path_key0(struct fy_token *fyt); + +struct fy_atom *fy_token_comment_handle(struct fy_token *fyt, enum fy_comment_placement placement, bool alloc); + +static inline FY_ALWAYS_INLINE enum fy_scalar_style +fy_token_scalar_style_inline(struct fy_token *fyt) +{ + if (!fyt || fyt->type != FYTT_SCALAR) + return FYSS_PLAIN; + + if (fyt->type == FYTT_SCALAR) + return fyt->scalar.style; + + return FYSS_PLAIN; +} + +static inline FY_ALWAYS_INLINE enum fy_token_type +fy_token_get_type_inline(struct fy_token *fyt) +{ + return fyt ? fyt->type : FYTT_NONE; +} + +#endif diff --git a/contrib/libs/libfyaml/src/lib/fy-typelist.h b/contrib/libs/libfyaml/src/lib/fy-typelist.h new file mode 100644 index 0000000000..12acd2dee4 --- /dev/null +++ b/contrib/libs/libfyaml/src/lib/fy-typelist.h @@ -0,0 +1,163 @@ +/* + * fy-typelist.h - typed list method builders + * + * Copyright (c) 2019 Pantelis Antoniou <pantelis.antoniou@konsulko.com> + * + * SPDX-License-Identifier: MIT + */ +#ifndef FY_TYPELIST_H +#define FY_TYPELIST_H + +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif + +#include <stdint.h> +#include <stdbool.h> +#include <stdio.h> + +#include <libfyaml.h> + +#include "fy-list.h" + +/* declare type methods */ + +#define FY_TYPE_FWD_DECL_LIST(_type) \ +/* type safe list wrapper */ \ +struct fy_ ## _type ## _list { struct fy_list_head _lh; }; \ +\ +struct __useless_struct_to_allow_semicolon + +#define FY_TYPE_DECL_LIST(_type) \ +static inline void fy_ ## _type ## _list_init(struct fy_ ## _type ## _list *_l) \ +{ \ + fy_list_init_head(&_l->_lh); \ +} \ +static inline void fy_ ## _type ## _list_add(struct fy_ ## _type ## _list *_l, struct fy_ ## _type *_n) \ +{ \ + if (_l && _n) \ + fy_list_add_head(&_n->node, &_l->_lh); \ +} \ +static inline void fy_ ## _type ## _list_add_tail(struct fy_ ## _type ## _list *_l, struct fy_ ## _type *_n) \ +{ \ + if (_l && _n) \ + fy_list_add_tail(&_n->node, &_l->_lh); \ +} \ +static inline void fy_ ## _type ## _list_push(struct fy_ ## _type ## _list *_l, struct fy_ ## _type *_n) \ +{ \ + if (_l && _n) \ + fy_ ## _type ## _list_add(_l, _n); \ +} \ +static inline void fy_ ## _type ## _list_push_tail(struct fy_ ## _type ## _list *_l, struct fy_ ## _type *_n) \ +{ \ + if (_l && _n) \ + fy_ ## _type ## _list_add_tail(_l, _n); \ +} \ +static inline bool fy_ ## _type ## _list_empty(struct fy_ ## _type ## _list *_l) \ +{ \ + return _l ? fy_list_is_empty(&_l->_lh) : true; \ +} \ +static inline bool fy_ ## _type ## _list_is_singular(struct fy_ ## _type ## _list *_l) \ +{ \ + return _l ? fy_list_is_singular(&_l->_lh) : true; \ +} \ +static inline void fy_ ## _type ## _list_del(struct fy_ ## _type ## _list *_l, struct fy_ ## _type *_n) \ +{ \ + if (_l && _n) { \ + fy_list_del(&_n->node); \ + fy_list_init_head(&_n->node); \ + } \ +} \ +static inline void fy_ ## _type ## _list_insert_after(struct fy_ ## _type ## _list *_l, \ + struct fy_ ## _type *_p, struct fy_ ## _type *_n) \ +{ \ + if (_l && _p && _n) \ + fy_list_add_head(&_n->node, &_p->node); \ +} \ +static inline void fy_ ## _type ## _list_insert_before(struct fy_ ## _type ## _list *_l, \ + struct fy_ ## _type *_p, struct fy_ ## _type *_n) \ +{ \ + if (_l && _p && _n) \ + fy_list_add_tail(&_n->node, &_p->node); \ +} \ +static inline struct fy_ ## _type *fy_ ## _type ## _list_head(struct fy_ ## _type ## _list *_l) \ +{ \ + return !fy_ ## _type ## _list_empty(_l) ? fy_container_of(_l->_lh.next, struct fy_ ## _type, node) : NULL; \ +} \ +static inline struct fy_ ## _type *fy_ ## _type ## _list_tail(struct fy_ ## _type ## _list *_l) \ +{ \ + return !fy_ ## _type ## _list_empty(_l) ? fy_container_of(_l->_lh.prev, struct fy_ ## _type, node) : NULL; \ +} \ +static inline struct fy_ ## _type *fy_ ## _type ## _list_first(struct fy_ ## _type ## _list *_l) \ +{ \ + return fy_ ## _type ## _list_head(_l); \ +} \ +static inline struct fy_ ## _type *fy_ ## _type ## _list_last(struct fy_ ## _type ## _list *_l) \ +{ \ + return fy_ ## _type ## _list_tail(_l); \ +} \ +static inline struct fy_ ## _type *fy_ ## _type ## _list_pop(struct fy_ ## _type ## _list *_l) \ +{ \ + struct fy_ ## _type *_n; \ + \ + _n = fy_ ## _type ## _list_head(_l); \ + if (!_n) \ + return NULL; \ + fy_ ## _type ## _list_del(_l, _n); \ + return _n; \ +} \ +static inline struct fy_ ## _type *fy_ ## _type ## _list_pop_tail(struct fy_ ## _type ## _list *_l) \ +{ \ + struct fy_ ## _type *_n; \ + \ + _n = fy_ ## _type ## _list_tail(_l); \ + if (!_n) \ + return NULL; \ + fy_ ## _type ## _list_del(_l, _n); \ + return _n; \ +} \ +static inline struct fy_ ## _type *fy_ ## _type ## _next(struct fy_ ## _type ## _list *_l, struct fy_ ## _type *_n) \ +{ \ + if (!_n || !_l || _n->node.next == &_l->_lh) \ + return NULL; \ + return fy_container_of(_n->node.next, struct fy_ ## _type, node); \ +} \ +static inline struct fy_ ## _type *fy_ ## _type ## _prev(struct fy_ ## _type ## _list *_l, struct fy_ ## _type *_n) \ +{ \ + if (!_n || !_l || _n->node.prev == &_l->_lh) \ + return NULL; \ + return fy_container_of(_n->node.prev, struct fy_ ## _type, node); \ +} \ +static inline void fy_ ## _type ## _lists_splice( \ + struct fy_ ## _type ## _list *_l, \ + struct fy_ ## _type ## _list *_lfrom) \ +{ \ + /* check arguments for sanity and lists are not empty */ \ + if (!_l || !_lfrom || \ + fy_ ## _type ## _list_empty(_lfrom)) \ + return; \ + fy_list_splice(&_lfrom->_lh, &_l->_lh); \ +} \ +static inline void fy_ ## _type ## _list_splice_after( \ + struct fy_ ## _type ## _list *_l, struct fy_ ## _type *_n, \ + struct fy_ ## _type ## _list *_lfrom) \ +{ \ + /* check arguments for sanity and lists are not empty */ \ + if (!_l || !_n || !_lfrom || \ + fy_ ## _type ## _list_empty(_lfrom)) \ + return; \ + fy_list_splice(&_lfrom->_lh, &_n->node); \ +} \ +static inline void fy_ ## _type ## _list_splice_before( \ + struct fy_ ## _type ## _list *_l, struct fy_ ## _type *_n, \ + struct fy_ ## _type ## _list *_lfrom) \ +{ \ + /* check arguments for sanity and lists are not empty */ \ + if (!_l || !_n || !_lfrom || \ + fy_ ## _type ## _list_empty(_lfrom)) \ + return; \ + fy_list_splice(&_lfrom->_lh, _n->node.prev); \ +} \ +struct __useless_struct_to_allow_semicolon + +#endif diff --git a/contrib/libs/libfyaml/src/lib/fy-types.c b/contrib/libs/libfyaml/src/lib/fy-types.c new file mode 100644 index 0000000000..5e0da300c6 --- /dev/null +++ b/contrib/libs/libfyaml/src/lib/fy-types.c @@ -0,0 +1,35 @@ +/* + * fy-types.c - types definition + * + * Copyright (c) 2019 Pantelis Antoniou <pantelis.antoniou@konsulko.com> + * + * SPDX-License-Identifier: MIT + */ +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif + +#include <stdio.h> +#include <string.h> +#if defined (__unix__) || (defined (__APPLE__) && defined (__MACH__)) +#include <sys/mman.h> +#include <sys/types.h> +#include <sys/stat.h> +#include <fcntl.h> +#include <unistd.h> +#endif +#include <assert.h> +#include <stdlib.h> +#include <errno.h> +#include <stdarg.h> +#include <limits.h> + +#include <libfyaml.h> + +#include "fy-parse.h" + +/* parse only types */ +FY_PARSE_TYPE_DEFINE_SIMPLE(indent); +FY_PARSE_TYPE_DEFINE_SIMPLE(simple_key); +FY_PARSE_TYPE_DEFINE_SIMPLE(parse_state_log); +FY_PARSE_TYPE_DEFINE_SIMPLE(flow); diff --git a/contrib/libs/libfyaml/src/lib/fy-types.h b/contrib/libs/libfyaml/src/lib/fy-types.h new file mode 100644 index 0000000000..889f2dcbbc --- /dev/null +++ b/contrib/libs/libfyaml/src/lib/fy-types.h @@ -0,0 +1,143 @@ +/* + * fy-types.h - common types builder + * + * Copyright (c) 2019 Pantelis Antoniou <pantelis.antoniou@konsulko.com> + * + * SPDX-License-Identifier: MIT + */ +#ifndef FY_TYPES_H +#define FY_TYPES_H + +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif + +#include <stdint.h> +#include <stdbool.h> +#include <stdio.h> + +#include <libfyaml.h> + +#include "fy-list.h" + +struct fy_parser; + +/* define type methods */ +#define FY_ALLOC_TYPE_DEFINE(_type) \ +\ +struct fy_ ## _type *fy_ ## _type ## _alloc_simple_internal( \ + struct fy_ ## _type ## _list *_rl) \ +{ \ + struct fy_ ## _type *_n; \ + \ + _n = fy_ ## _type ## _list_pop(_rl); \ + if (_n) \ + return _n; \ + _n = malloc(sizeof(*_n)); \ + if (_n) \ + fy_list_init_head(&_n->node); \ + return _n; \ +} \ +\ +void fy_ ## _type ## _recycle_internal(struct fy_ ## _type ## _list *_rl, \ + struct fy_ ## _type *_n) \ +{ \ + if (_n) \ + fy_ ## _type ## _list_push(_rl, _n); \ +} \ +\ +void fy_ ## _type ## _vacuum_internal(struct fy_ ## _type ## _list *_rl) \ +{ \ + struct fy_ ## _type *_n; \ + \ + while ((_n = fy_ ## _type ## _list_pop(_rl)) != NULL) \ + free(_n); \ +} \ +\ +struct __useless_struct_to_allow_semicolon + +/* declarations for alloc */ + +#define FY_ALLOC_TYPE_ALLOC(_type) \ +struct fy_ ## _type *fy_ ## _type ## _alloc_simple_internal( \ + struct fy_ ## _type ## _list *_rl); \ +void fy_ ## _type ## _recycle_internal(struct fy_ ## _type ## _list *_rl, \ + struct fy_ ## _type *_n); \ +void fy_ ## _type ## _vacuum_internal(struct fy_ ## _type ## _list *_rl); \ +struct __useless_struct_to_allow_semicolon + +/* parser type methods */ +#define FY_PARSE_TYPE_DECL_ALLOC(_type) \ +\ +struct fy_ ## _type *fy_parse_ ## _type ## _alloc(struct fy_parser *fyp); \ +void fy_parse_ ## _type ## _vacuum(struct fy_parser *fyp); \ +void fy_parse_ ## _type ## _recycle(struct fy_parser *fyp, struct fy_ ## _type *_n); \ +void fy_parse_ ## _type ## _list_recycle_all(struct fy_parser *fyp, struct fy_ ## _type ## _list *_l); \ +\ +struct __useless_struct_to_allow_semicolon + +#define FY_PARSE_TYPE_DECL(_type) \ +FY_TYPE_FWD_DECL_LIST(_type); \ +FY_TYPE_DECL_LIST(_type); \ +FY_PARSE_TYPE_DECL_ALLOC(_type); \ +struct __useless_struct_to_allow_semicolon + +#define FY_PARSE_TYPE_DECL_AFTER_FWD(_type) \ +FY_TYPE_DECL_LIST(_type); \ +FY_PARSE_TYPE_DECL_ALLOC(_type); \ +struct __useless_struct_to_allow_semicolon + +/* define type methods */ +#define FY_PARSE_TYPE_DEFINE(_type) \ +\ +struct fy_ ## _type *fy_parse_ ## _type ## _alloc_simple(struct fy_parser *fyp) \ +{ \ + return fy_ ## _type ## _alloc_simple_internal(&fyp->recycled_ ## _type); \ +} \ +\ +void fy_parse_ ## _type ## _vacuum(struct fy_parser *fyp) \ +{ \ + fy_ ## _type ## _vacuum_internal(&fyp->recycled_ ## _type); \ +} \ +\ +void fy_parse_ ## _type ## _list_recycle_all(struct fy_parser *fyp, struct fy_ ## _type ## _list *_l) \ +{ \ + struct fy_ ## _type *_n; \ + \ + while ((_n = fy_ ## _type ## _list_pop(_l)) != NULL) \ + fy_parse_ ## _type ## _recycle(fyp, _n); \ +} \ +\ +void fy_parse_ ## _type ## _recycle_simple(struct fy_parser *fyp, struct fy_ ## _type *_n) \ +{ \ + if (!fyp->suppress_recycling) \ + fy_ ## _type ## _recycle_internal(&fyp->recycled_ ## _type, _n); \ + else \ + free(_n); \ +} \ +\ +struct __useless_struct_to_allow_semicolon + +#define FY_PARSE_TYPE_DEFINE_ALLOC_SIMPLE(_type) \ +struct fy_ ## _type *fy_parse_ ## _type ## _alloc(struct fy_parser *_fyp) \ +{ \ + return fy_parse_ ## _type ## _alloc_simple(_fyp); \ +} \ +\ +void fy_parse_ ## _type ## _recycle(struct fy_parser *_fyp, struct fy_ ## _type *_n) \ +{ \ + if (_n) \ + fy_parse_ ## _type ## _recycle_simple(_fyp, _n); \ +} \ +\ +struct __useless_struct_to_allow_semicolon + +#define FY_PARSE_TYPE_DEFINE_SIMPLE(_type) \ +\ +FY_ALLOC_TYPE_DEFINE(_type); \ +FY_PARSE_TYPE_DEFINE(_type); \ +FY_PARSE_TYPE_DEFINE_ALLOC_SIMPLE(_type); \ +\ +struct __useless_struct_to_allow_semicolon + +#endif diff --git a/contrib/libs/libfyaml/src/lib/fy-utf8.c b/contrib/libs/libfyaml/src/lib/fy-utf8.c new file mode 100644 index 0000000000..8105f71173 --- /dev/null +++ b/contrib/libs/libfyaml/src/lib/fy-utf8.c @@ -0,0 +1,591 @@ +/* + * fy-utf8.c - utf8 handling methods + * + * Copyright (c) 2019 Pantelis Antoniou <pantelis.antoniou@konsulko.com> + * + * SPDX-License-Identifier: MIT + */ +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif + +#include <stdbool.h> +#include <string.h> +#include <stdlib.h> + +#include <libfyaml.h> + +#include "fy-utf8.h" + +const int8_t fy_utf8_width_table[32] = { + 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, + 0, 0, 0, 0, 0, 0, 0, 0, + 2, 2, 2, 2, 3, 3, 4, 0, +}; + +int fy_utf8_get_generic(const void *ptr, int left, int *widthp) +{ + const uint8_t *p = ptr; + int i, width, value; + + if (left < 1) + return FYUG_EOF; + + /* this is the slow path */ + width = fy_utf8_width_by_first_octet(p[0]); + if (!width) + return FYUG_INV; + if (width > left) + return FYUG_PARTIAL; + + /* initial value */ + value = *p++ & (0xff >> width); + for (i = 1; i < width; i++) { + if ((*p & 0xc0) != 0x80) + return FYUG_INV; + value = (value << 6) | (*p++ & 0x3f); + } + + /* check for validity */ + if ((width == 4 && value < 0x10000) || + (width == 3 && value < 0x800) || + (width == 2 && value < 0x80) || + (value >= 0xd800 && value <= 0xdfff) || value >= 0x110000) + return FYUG_INV; + + *widthp = width; + + return value; +} + +int fy_utf8_get_right_generic(const void *ptr, int left, int *widthp) +{ + const uint8_t *s, *e; + uint8_t v; + + s = ptr; + e = s + left; + + if (left < 1) + return FYUG_EOF; + + /* single byte sequence */ + v = e[-1]; + if ((v & 0x80) == 0) { + if (widthp) + *widthp = 1; + return (int)v & 0x7f; + } + + /* the last byte must be & 0xc0 == 0x80 */ + if ((v & 0xc0) != 0x80) + return FYUG_INV; + + /* at least two byte sequence */ + if (left < 2) + return FYUG_EOF; + + v = e[-2]; + /* the first byte of the sequence (must be a two byte sequence) */ + if ((v & 0xc0) != 0x80) { + /* two byte start is 110x_xxxx */ + if ((v & 0xe0) != 0xc0) + return FYUG_INV; + return fy_utf8_get(e - 2, 2, widthp); + } + + /* at least three byte sequence */ + if (left < 3) + return FYUG_EOF; + + v = e[-3]; + /* the first byte of the sequence (must be a three byte sequence) */ + if ((v & 0xc0) != 0x80) { + /* three byte start is 1110_xxxx */ + if ((v & 0xf0) != 0xe0) + return FYUG_INV; + return fy_utf8_get(e - 3, 3, widthp); + } + + /* at least four byte sequence */ + if (left < 4) + return FYUG_EOF; + + v = e[-4]; + + /* the first byte of the sequence (must be a four byte sequence) */ + /* four byte start is 1111_0xxx */ + if ((v & 0xf8) != 0xf0) { + return FYUG_INV; + } + return fy_utf8_get(e - 4, 4, widthp); +} + +struct fy_utf8_fmt_esc_map { + const int *ch; + const int *map; +}; + +static const struct fy_utf8_fmt_esc_map esc_all = { + .ch = (const int []){ '\\', '\0', '\b', '\r', '\t', '\f', '\n', '\v', '\a', '\e', 0x85, 0xa0, 0x2028, 0x2029, -1 }, + .map = (const int []){ '\\', '0', 'b', 'r', 't', 'f', 'n', 'v', 'a', 'e', 'N', '_', 'L', 'P', 0 } +}; + +static inline int esc_map(const struct fy_utf8_fmt_esc_map *esc_map, int c) +{ + const int *ch; + int cc; + + ch = esc_map->ch; + while ((cc = *ch++) >= 0) { + if (cc == c) + return esc_map->map[(ch - esc_map->ch) - 1]; + } + return -1; +} + +static inline int fy_utf8_esc_map(int c, enum fy_utf8_escape esc) +{ + if (esc == fyue_none) + return -1; + if (esc == fyue_singlequote && c == '\'') + return '\''; + if (fy_utf8_escape_is_any_doublequote(esc) && c == '"') + return '"'; + return esc_map(&esc_all, c); +} + +int fy_utf8_format_text_length(const char *buf, size_t len, + enum fy_utf8_escape esc) +{ + int c, w, l; + const char *s, *e; + + s = buf; + e = buf + len; + l = 0; + while (s < e) { + c = fy_utf8_get(s, e - s, &w); + if (!w || c < 0) + break; + s += w; + if (fy_utf8_esc_map(c, esc)) + w = 2; + l += w; + } + return l + 1; +} + +char *fy_utf8_format_text(const char *buf, size_t len, + char *out, size_t maxsz, + enum fy_utf8_escape esc) +{ + int c, w, cc; + const char *s, *e; + char *os, *oe; + + s = buf; + e = buf + len; + os = out; + oe = out + maxsz - 1; + while (s < e) { + c = fy_utf8_get(s, e - s, &w); + if (!w || c < 0) + break; + s += w; + + if ((cc = fy_utf8_esc_map(c, esc)) > 0) { + if (os + 2 > oe) + break; + *os++ = '\\'; + *os++ = cc; + continue; + } + + if (os + w > oe) + break; + + os = fy_utf8_put_unchecked(os, c); + } + *os++ = '\0'; + return out; +} + +char *fy_utf8_format(int c, char *buf, enum fy_utf8_escape esc) +{ + int cc; + char *s; + + if (!fy_utf8_is_valid(c)) { + *buf = '\0'; + return buf; + } + + s = buf; + if ((cc = fy_utf8_esc_map(c, esc)) > 0) { + *s++ = '\\'; + *s++ = cc; + } else + s = fy_utf8_put_unchecked(s, c); + *s = '\0'; + return buf; +} + +char *fy_utf8_format_text_alloc(const char *buf, size_t len, enum fy_utf8_escape esc) +{ + int outsz; + char *out; + + outsz = fy_utf8_format_text_length(buf, len, esc); + if (outsz < 0) + return NULL; + out = malloc(outsz); + if (!out) + return NULL; + fy_utf8_format_text(buf, len, out, outsz, esc); + + return out; +} + +const void *fy_utf8_memchr_generic(const void *s, int c, size_t n) +{ + int cc, w; + const void *e; + + e = (char*)s + n; + while (s < e && (cc = fy_utf8_get(s, (char*)e - (char*)s, &w)) >= 0) { + if (c == cc) + return s; + s = (char*)s + w; + } + + return NULL; +} + +/* parse an escape and return utf8 value */ +int fy_utf8_parse_escape(const char **strp, size_t len, enum fy_utf8_escape esc) +{ + const char *s, *e; + char c; + int i, value, code_length, cc, w; + unsigned int hi_surrogate, lo_surrogate; + + /* why do you bother us? */ + if (esc == fyue_none) + return -1; + + if (!strp || !*strp || len < 2) + return -1; + + value = -1; + + s = *strp; + e = s + len; + + c = *s++; + + if (esc == fyue_singlequote) { + if (c != '\'') + goto out; + c = *s++; + if (c != '\'') + goto out; + + value = '\''; + goto out; + } + + /* get '\\' */ + if (c != '\\') + goto out; + + c = *s++; + + /* common YAML & JSON escapes */ + switch (c) { + case 'b': + value = '\b'; + break; + case 'f': + value = '\f'; + break; + case 'n': + value = '\n'; + break; + case 'r': + value = '\r'; + break; + case 't': + value = '\t'; + break; + case '"': + value = '"'; + break; + case '/': + value = '/'; + break; + case '\\': + value = '\\'; + break; + default: + break; + } + + if (value >= 0) + goto out; + + if (esc == fyue_doublequote || esc == fyue_doublequote_yaml_1_1) { + switch (c) { + case '0': + value = '\0'; + break; + case 'a': + value = '\a'; + break; + case '\t': + value = '\t'; + break; + case 'v': + value = '\v'; + break; + case 'e': + value = '\e'; + break; + case ' ': + value = ' '; + break; + case 'N': + value = 0x85; /* NEL */ + break; + case '_': + value = 0xa0; + break; + case 'L': + value = 0x2028; /* LS */ + break; + case 'P': /* PS 0x2029 */ + value = 0x2029; /* PS */ + break; + default: + /* weird unicode escapes */ + if ((uint8_t)c >= 0x80) { + /* in non yaml-1.1 mode we don't allow this craziness */ + if (esc == fyue_doublequote) + goto out; + + cc = fy_utf8_get(s - 1, e - (s - 1), &w); + switch (cc) { + case 0x2028: + case 0x2029: + case 0x85: + case 0xa0: + value = cc; + break; + default: + break; + } + } + break; + } + if (value >= 0) + goto out; + } + + /* finally try the unicode escapes */ + code_length = 0; + + if (esc == fyue_doublequote || esc == fyue_doublequote_yaml_1_1) { + switch (c) { + case 'x': + code_length = 2; + break; + case 'u': + code_length = 4; + break; + case 'U': + code_length = 8; + break; + default: + return -1; + } + } else if (esc == fyue_doublequote_json && c == 'u') + code_length = 4; + + if (!code_length || code_length > (e - s)) + goto out; + + value = 0; + for (i = 0; i < code_length; i++) { + c = *s++; + value <<= 4; + if (c >= '0' && c <= '9') + value |= c - '0'; + else if (c >= 'a' && c <= 'f') + value |= 10 + c - 'a'; + else if (c >= 'A' && c <= 'F') + value |= 10 + c - 'A'; + else + goto out; + } + + /* hi/lo surrogate pair */ + if (code_length == 4 && value >= 0xd800 && value <= 0xdbff && + (e - s) >= 6 && s[0] == '\\' && s[1] == 'u') { + hi_surrogate = value; + + s += 2; + + value = 0; + for (i = 0; i < code_length; i++) { + c = *s++; + value <<= 4; + if (c >= '0' && c <= '9') + value |= c - '0'; + else if (c >= 'a' && c <= 'f') + value |= 10 + c - 'a'; + else if (c >= 'A' && c <= 'F') + value |= 10 + c - 'A'; + else + return -1; + } + lo_surrogate = value; + value = 0x10000 + (hi_surrogate - 0xd800) * 0x400 + (lo_surrogate - 0xdc00); + } + +out: + *strp = s; + + return value; +} + +uint8_t fy_utf8_low_ascii_flags[0x80] = { + [0x00] = F_NON_PRINT, // NUL '\0' (null character) + [0x01] = F_NON_PRINT, // SOH (start of heading) + [0x02] = F_NON_PRINT, // STX (start of text) + [0x03] = F_NON_PRINT, // ETX (end of text) + [0x04] = F_NON_PRINT, // EOT (end of transmission) + [0x05] = F_NON_PRINT, // ENQ (enquiry) + [0x06] = F_NON_PRINT, // ACK (acknowledge) + [0x07] = F_NON_PRINT | F_QUOTE_ESC, // BEL '\a' (bell) + [0x08] = F_NON_PRINT | F_QUOTE_ESC, // BS '\b' (backspace) + [0x09] = F_NON_PRINT | F_QUOTE_ESC | F_WS, // HT '\t' (horizontal tab) + [0x0A] = F_NON_PRINT | F_QUOTE_ESC | F_LB, // LF '\n' (new line) + [0x0B] = F_NON_PRINT | F_QUOTE_ESC, // VT '\v' (vertical tab) + [0x0C] = F_NON_PRINT | F_QUOTE_ESC, // FF '\f' (form feed) + [0x0D] = F_NON_PRINT | F_QUOTE_ESC | F_LB, // CR '\r' (carriage ret) + [0x0E] = F_NON_PRINT, // SO (shift out) + [0x0F] = F_NON_PRINT, // SI (shift in) + [0x10] = F_NON_PRINT, // DLE (data link escape) + [0x11] = F_NON_PRINT, // DC1 (device control 1) + [0x12] = F_NON_PRINT, // DC2 (device control 2) + [0x13] = F_NON_PRINT, // DC3 (device control 3) + [0x14] = F_NON_PRINT, // DC4 (device control 4) + [0x15] = F_NON_PRINT, // NAK (negative ack.) + [0x16] = F_NON_PRINT, // SYN (synchronous idle) + [0x17] = F_NON_PRINT, // ETB (end of trans. blk) + [0x18] = F_NON_PRINT, // CAN (cancel) + [0x19] = F_NON_PRINT, // EM (end of medium) + [0x1A] = F_NON_PRINT, // SUB (substitute) + [0x1B] = F_NON_PRINT, // ESC (escape) + [0x1C] = F_NON_PRINT, // FS (file separator) + [0x1D] = F_NON_PRINT, // GS (group separator) + [0x1E] = F_NON_PRINT, // RS (record separator) + [0x1F] = F_NON_PRINT, // US (unit separator) + [' '] = F_WS, + ['!'] = F_PUNCT, + ['"'] = F_PUNCT, + ['#'] = F_PUNCT, + ['$'] = F_PUNCT, + ['%'] = F_PUNCT, + ['&'] = F_PUNCT, + ['\''] = F_PUNCT, + ['('] = F_PUNCT, + [')'] = F_PUNCT, + ['*'] = F_PUNCT, + ['+'] = F_PUNCT, + [','] = F_PUNCT, + ['-'] = F_PUNCT, + ['.'] = F_PUNCT, + ['/'] = F_PUNCT, + ['0'] = F_DIGIT | F_SIMPLE_SCALAR, + ['1'] = F_DIGIT | F_SIMPLE_SCALAR, + ['2'] = F_DIGIT | F_SIMPLE_SCALAR, + ['3'] = F_DIGIT | F_SIMPLE_SCALAR, + ['4'] = F_DIGIT | F_SIMPLE_SCALAR, + ['5'] = F_DIGIT | F_SIMPLE_SCALAR, + ['6'] = F_DIGIT | F_SIMPLE_SCALAR, + ['7'] = F_DIGIT | F_SIMPLE_SCALAR, + ['8'] = F_DIGIT | F_SIMPLE_SCALAR, + ['9'] = F_DIGIT | F_SIMPLE_SCALAR, + [':'] = F_PUNCT, + [';'] = F_PUNCT, + ['<'] = F_PUNCT, + ['='] = F_PUNCT, + ['>'] = F_PUNCT, + ['?'] = F_PUNCT, + ['@'] = F_PUNCT, + ['A'] = F_LETTER | F_SIMPLE_SCALAR, + ['B'] = F_LETTER | F_SIMPLE_SCALAR, + ['C'] = F_LETTER | F_SIMPLE_SCALAR, + ['D'] = F_LETTER | F_SIMPLE_SCALAR, + ['E'] = F_LETTER | F_SIMPLE_SCALAR, + ['F'] = F_LETTER | F_SIMPLE_SCALAR, + ['G'] = F_LETTER | F_SIMPLE_SCALAR, + ['H'] = F_LETTER | F_SIMPLE_SCALAR, + ['I'] = F_LETTER | F_SIMPLE_SCALAR, + ['J'] = F_LETTER | F_SIMPLE_SCALAR, + ['K'] = F_LETTER | F_SIMPLE_SCALAR, + ['L'] = F_LETTER | F_SIMPLE_SCALAR, + ['M'] = F_LETTER | F_SIMPLE_SCALAR, + ['N'] = F_LETTER | F_SIMPLE_SCALAR, + ['O'] = F_LETTER | F_SIMPLE_SCALAR, + ['P'] = F_LETTER | F_SIMPLE_SCALAR, + ['Q'] = F_LETTER | F_SIMPLE_SCALAR, + ['R'] = F_LETTER | F_SIMPLE_SCALAR, + ['S'] = F_LETTER | F_SIMPLE_SCALAR, + ['T'] = F_LETTER | F_SIMPLE_SCALAR, + ['U'] = F_LETTER | F_SIMPLE_SCALAR, + ['V'] = F_LETTER | F_SIMPLE_SCALAR, + ['W'] = F_LETTER | F_SIMPLE_SCALAR, + ['X'] = F_LETTER | F_SIMPLE_SCALAR, + ['Y'] = F_LETTER | F_SIMPLE_SCALAR, + ['Z'] = F_LETTER | F_SIMPLE_SCALAR, + ['['] = F_PUNCT, + ['\\'] = F_PUNCT, // '\\' + [']'] = F_PUNCT, + ['^'] = F_PUNCT, + ['_'] = F_PUNCT | F_SIMPLE_SCALAR, + ['`'] = F_PUNCT, + ['a'] = F_LETTER | F_SIMPLE_SCALAR, + ['b'] = F_LETTER | F_SIMPLE_SCALAR, + ['c'] = F_LETTER | F_SIMPLE_SCALAR, + ['d'] = F_LETTER | F_SIMPLE_SCALAR, + ['e'] = F_LETTER | F_SIMPLE_SCALAR, + ['f'] = F_LETTER | F_SIMPLE_SCALAR, + ['g'] = F_LETTER | F_SIMPLE_SCALAR, + ['h'] = F_LETTER | F_SIMPLE_SCALAR, + ['i'] = F_LETTER | F_SIMPLE_SCALAR, + ['j'] = F_LETTER | F_SIMPLE_SCALAR, + ['k'] = F_LETTER | F_SIMPLE_SCALAR, + ['l'] = F_LETTER | F_SIMPLE_SCALAR, + ['m'] = F_LETTER | F_SIMPLE_SCALAR, + ['n'] = F_LETTER | F_SIMPLE_SCALAR, + ['o'] = F_LETTER | F_SIMPLE_SCALAR, + ['p'] = F_LETTER | F_SIMPLE_SCALAR, + ['q'] = F_LETTER | F_SIMPLE_SCALAR, + ['r'] = F_LETTER | F_SIMPLE_SCALAR, + ['s'] = F_LETTER | F_SIMPLE_SCALAR, + ['t'] = F_LETTER | F_SIMPLE_SCALAR, + ['u'] = F_LETTER | F_SIMPLE_SCALAR, + ['v'] = F_LETTER | F_SIMPLE_SCALAR, + ['w'] = F_LETTER | F_SIMPLE_SCALAR, + ['x'] = F_LETTER | F_SIMPLE_SCALAR, + ['y'] = F_LETTER | F_SIMPLE_SCALAR, + ['z'] = F_LETTER | F_SIMPLE_SCALAR, + ['{'] = F_PUNCT, + ['|'] = F_PUNCT, + ['}'] = F_PUNCT, + ['~'] = F_PUNCT, + [0x7F] = F_NON_PRINT, // DEL +}; diff --git a/contrib/libs/libfyaml/src/lib/fy-utf8.h b/contrib/libs/libfyaml/src/lib/fy-utf8.h new file mode 100644 index 0000000000..fa4faa1062 --- /dev/null +++ b/contrib/libs/libfyaml/src/lib/fy-utf8.h @@ -0,0 +1,229 @@ +/* + * fy-utf8.h - UTF-8 methods + * + * Copyright (c) 2019 Pantelis Antoniou <pantelis.antoniou@konsulko.com> + * + * SPDX-License-Identifier: MIT + */ +#ifndef FY_UTF8_H +#define FY_UTF8_H + +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif + +#include <stdint.h> +#include <stdlib.h> +#include <assert.h> + +#include <libfyaml.h> + +#include "fy-utils.h" + +extern const int8_t fy_utf8_width_table[32]; + +static inline int +fy_utf8_width_by_first_octet_no_table(uint8_t c) +{ + return (c & 0x80) == 0x00 ? 1 : + (c & 0xe0) == 0xc0 ? 2 : + (c & 0xf0) == 0xe0 ? 3 : + (c & 0xf8) == 0xf0 ? 4 : 0; +} + +static inline FY_ALWAYS_INLINE int +fy_utf8_width_by_first_octet(uint8_t c) +{ + return fy_utf8_width_table[c >> 3]; +} + +/* assumes valid utf8 character */ +static inline size_t +fy_utf8_width(int c) +{ + return c < 0x80 ? 1 : + c < 0x800 ? 2 : + c < 0x10000 ? 3 : 4; +} + +static inline bool +fy_utf8_is_valid(int c) +{ + return c >= 0 && !((c >= 0xd800 && c <= 0xdfff) || c >= 0x110000); +} + +static inline bool +fy_utf8_is_printable_ascii(int c) +{ + return c >= 0x20 && c <= 0x7e; +} + +/* generic utf8 decoder (not inlined) */ +int fy_utf8_get_generic(const void *ptr, int left, int *widthp); + +/* -1 for end of input, -2 for invalid character, -3 for partial */ +#define FYUG_EOF -1 +#define FYUG_INV -2 +#define FYUG_PARTIAL -3 + +static inline int fy_utf8_get(const void *ptr, int left, int *widthp) +{ + const uint8_t *p = ptr; + + /* single byte (hot path) */ + if (left <= 0) { + *widthp = 0; + return FYUG_EOF; + } + + if (!(p[0] & 0x80)) { + *widthp = 1; + return p[0] & 0x7f; + } + return fy_utf8_get_generic(ptr, left, widthp); +} + +int fy_utf8_get_right_generic(const void *ptr, int left, int *widthp); + +static inline int fy_utf8_get_right(const void *ptr, int left, int *widthp) +{ + const uint8_t *p = (const uint8_t*)ptr + left; + + /* single byte (hot path) */ + if (left > 0 && !(p[-1] & 0x80)) { + if (widthp) + *widthp = 1; + return p[-1] & 0x7f; + } + return fy_utf8_get_right_generic(ptr, left, widthp); +} + + +/* for when you _know_ that there's enough room and c is valid */ +static inline void *fy_utf8_put_unchecked(void *ptr, int c) +{ + uint8_t *s = ptr; + + assert(c >= 0); + if (c < 0x80) + *s++ = c; + else if (c < 0x800) { + *s++ = (c >> 6) | 0xc0; + *s++ = (c & 0x3f) | 0x80; + } else if (c < 0x10000) { + *s++ = (c >> 12) | 0xe0; + *s++ = ((c >> 6) & 0x3f) | 0x80; + *s++ = (c & 0x3f) | 0x80; + } else { + *s++ = (c >> 18) | 0xf0; + *s++ = ((c >> 12) & 0x3f) | 0x80; + *s++ = ((c >> 6) & 0x3f) | 0x80; + *s++ = (c & 0x3f) | 0x80; + } + return s; +} + +static inline void *fy_utf8_put(void *ptr, size_t left, int c) +{ + if (!fy_utf8_is_valid(c) || fy_utf8_width(c) > left) + return NULL; + + return fy_utf8_put_unchecked(ptr, c); +} + +/* buffer must contain at least 5 characters */ +#define FY_UTF8_FORMAT_BUFMIN 5 +enum fy_utf8_escape { + fyue_none, + fyue_singlequote, + fyue_doublequote, + fyue_doublequote_json, + fyue_doublequote_yaml_1_1, +}; + +static inline bool fy_utf8_escape_is_any_doublequote(enum fy_utf8_escape esc) +{ + return esc >= fyue_doublequote && esc <= fyue_doublequote_yaml_1_1; +} + +char *fy_utf8_format(int c, char *buf, enum fy_utf8_escape esc); + +#define fy_utf8_format_a(_c, _esc, _res) \ + do { \ + char *_buf = FY_ALLOCA(FY_UTF8_FORMAT_BUFMIN); \ + *(_res) = fy_utf8_format((_c), _buf, _esc); \ + } while(false) + +int fy_utf8_format_text_length(const char *buf, size_t len, + enum fy_utf8_escape esc); +char *fy_utf8_format_text(const char *buf, size_t len, + char *out, size_t maxsz, + enum fy_utf8_escape esc); + +#define fy_utf8_format_text_a(_buf, _len, _esc, _res) \ + do { \ + const char *__buf = (_buf); \ + size_t __len = (_len); \ + enum fy_utf8_escape __esc = (_esc); \ + size_t _outsz = fy_utf8_format_text_length(__buf, __len, __esc); \ + char *_out = FY_ALLOCA(_outsz + 1); \ + *(_res) = fy_utf8_format_text(__buf, __len, _out, _outsz, __esc); \ + } while(false) + +char *fy_utf8_format_text_alloc(const char *buf, size_t len, enum fy_utf8_escape esc); + +const void *fy_utf8_memchr_generic(const void *s, int c, size_t n); + +static inline const void *fy_utf8_memchr(const void *s, int c, size_t n) +{ + if (c < 0 || !n) + return NULL; + if (c < 0x80) + return memchr(s, c, n); + return fy_utf8_memchr_generic(s, c, n); +} + +static inline const void *fy_utf8_strchr(const void *s, int c) +{ + if (c < 0) + return NULL; + if (c < 0x80) + return strchr(s, c); + return fy_utf8_memchr_generic(s, c, strlen(s)); +} + +static inline int fy_utf8_count(const void *ptr, size_t len) +{ + const uint8_t *s = ptr, *e = (const uint8_t *)ptr + len; + int w, count; + + count = 0; + while (s < e) { + w = fy_utf8_width_by_first_octet(*s); + + /* malformed? */ + if (!w || s + w > e) + break; + s += w; + + count++; + } + + return count; +} + +int fy_utf8_parse_escape(const char **strp, size_t len, enum fy_utf8_escape esc); + +#define F_NONE 0 +#define F_NON_PRINT FY_BIT(0) /* non printable */ +#define F_SIMPLE_SCALAR FY_BIT(1) /* part of simple scalar */ +#define F_QUOTE_ESC FY_BIT(2) /* escape form, i.e \n */ +#define F_LB FY_BIT(3) /* is a linebreak */ +#define F_WS FY_BIT(4) /* is a whitespace */ +#define F_PUNCT FY_BIT(5) /* is a punctuation mark */ +#define F_LETTER FY_BIT(6) /* is a letter a..z A..Z */ +#define F_DIGIT FY_BIT(7) /* is a digit 0..9 */ + +extern uint8_t fy_utf8_low_ascii_flags[0x80]; + +#endif diff --git a/contrib/libs/libfyaml/src/lib/fy-utils.c b/contrib/libs/libfyaml/src/lib/fy-utils.c new file mode 100644 index 0000000000..8f87ae77d2 --- /dev/null +++ b/contrib/libs/libfyaml/src/lib/fy-utils.c @@ -0,0 +1,721 @@ +/* + * fy-utils.c - Generic utilities for functionality that's missing + * from platforms. + * + * For now only used to implement memstream for Apple platforms. + * + * Copyright (c) 2019 Pantelis Antoniou <pantelis.antoniou@konsulko.com> + * + * SPDX-License-Identifier: MIT + */ +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif + +#include <stdlib.h> +#include <string.h> +#include <errno.h> +#if defined (__unix__) || (defined (__APPLE__) && defined (__MACH__)) +#include <termios.h> +#include <unistd.h> +#include <sys/select.h> +#include <sys/time.h> +#include <sys/types.h> +#elif defined(_MSC_VER) +#include <windows.h> +#endif + +#include "fy-utf8.h" +#include "fy-ctype.h" +#include "fy-utils.h" + +int fy_get_pagesize() { +#if defined (__unix__) || (defined (__APPLE__) && defined (__MACH__)) + return sysconf(_SC_PAGESIZE); +#elif defined (_MSC_VER) + SYSTEM_INFO si; + GetSystemInfo(&si); + return si.dwPageSize; +#endif +} + +#if defined(_MSC_VER) +#ifndef VA_COPY +# ifdef HAVE_VA_COPY +# define VA_COPY(dest, src) va_copy(dest, src) +# else +# ifdef HAVE___VA_COPY +# define VA_COPY(dest, src) __va_copy(dest, src) +# else +# define VA_COPY(dest, src) (dest) = (src) +# endif +# endif +#endif + +#define INIT_SZ 128 + +int +vasprintf(char **str, const char *fmt, va_list ap) +{ + int ret; + va_list ap2; + char *string, *newstr; + size_t len; + + if ((string = malloc(INIT_SZ)) == NULL) + goto fail; + + VA_COPY(ap2, ap); + ret = vsnprintf(string, INIT_SZ, fmt, ap2); + va_end(ap2); + if (ret >= 0 && ret < INIT_SZ) { /* succeeded with initial alloc */ + *str = string; + } else if (ret == INT_MAX || ret < 0) { /* Bad length */ + free(string); + goto fail; + } else { /* bigger than initial, realloc allowing for nul */ + len = (size_t)ret + 1; + if ((newstr = realloc(string, len)) == NULL) { + free(string); + goto fail; + } + VA_COPY(ap2, ap); + ret = vsnprintf(newstr, len, fmt, ap2); + va_end(ap2); + if (ret < 0 || (size_t)ret >= len) { /* failed with realloc'ed string */ + free(newstr); + goto fail; + } + *str = newstr; + } + return (ret); + +fail: + *str = NULL; + errno = ENOMEM; + return (-1); +} + +int asprintf(char **str, const char *fmt, ...) +{ + va_list ap; + int ret; + + *str = NULL; + va_start(ap, fmt); + ret = vasprintf(str, fmt, ap); + va_end(ap); + + return ret; +} +#endif + +#if defined(__APPLE__) && (_POSIX_C_SOURCE < 200809L) + +/* + * adapted from http://piumarta.com/software/memstream/ + * + * Under the MIT license. + */ + +/* + * ---------------------------------------------------------------------------- + * + * OPEN_MEMSTREAM(3) BSD and Linux Library Functions OPEN_MEMSTREAM(3) + * + * SYNOPSIS + * #include "memstream.h" + * + * FILE *open_memstream(char **bufp, size_t *sizep); + * + * DESCRIPTION + * The open_memstream() function opens a stream for writing to a buffer. + * The buffer is dynamically allocated (as with malloc(3)), and + * automatically grows as required. After closing the stream, the caller + * should free(3) this buffer. + * + * When the stream is closed (fclose(3)) or flushed (fflush(3)), the + * locations pointed to by bufp and sizep are updated to contain, + * respectively, a pointer to the buffer and the current size of the + * buffer. These values remain valid only as long as the caller performs + * no further output on the stream. If further output is performed, then + * the stream must again be flushed before trying to access these + * variables. + * + * A null byte is maintained at the end of the buffer. This byte is not + * included in the size value stored at sizep. + * + * The stream's file position can be changed with fseek(3) or fseeko(3). + * Moving the file position past the end of the data already written fills + * the intervening space with zeros. + * + * RETURN VALUE + * Upon successful completion open_memstream() returns a FILE pointer. + * Otherwise, NULL is returned and errno is set to indicate the error. + * + * CONFORMING TO + * POSIX.1-2008 + * + * ---------------------------------------------------------------------------- + */ + +#ifndef min +#define min(X, Y) (((X) < (Y)) ? (X) : (Y)) +#endif + +struct memstream { + size_t position; + size_t size; + size_t capacity; + char *contents; + char **ptr; + size_t *sizeloc; +}; + +static int memstream_grow(struct memstream *ms, size_t minsize) +{ + size_t newcap; + char *newcontents; + + newcap = ms->capacity * 2; + while (newcap <= minsize + 1) + newcap *= 2; + newcontents = realloc(ms->contents, newcap); + if (!newcontents) + return -1; + ms->contents = newcontents; + memset(ms->contents + ms->capacity, 0, newcap - ms->capacity); + ms->capacity = newcap; + *ms->ptr = ms->contents; + return 0; +} + +static int memstream_read(void *cookie, char *buf, int count) +{ + struct memstream *ms = cookie; + size_t n; + + n = min(ms->size - ms->position, (size_t)count); + if (n < 1) + return 0; + memcpy(buf, ms->contents, n); + ms->position += n; + return n; +} + +static int memstream_write(void *cookie, const char *buf, int count) +{ + struct memstream *ms = cookie; + + if (ms->capacity <= ms->position + (size_t)count && + memstream_grow(ms, ms->position + (size_t)count) < 0) + return -1; + memcpy(ms->contents + ms->position, buf, count); + ms->position += count; + ms->contents[ms->position] = '\0'; + if (ms->size < ms->position) + *ms->sizeloc = ms->size = ms->position; + + return count; +} + +static fpos_t memstream_seek(void *cookie, fpos_t offset, int whence) +{ + struct memstream *ms = cookie; + fpos_t pos= 0; + + switch (whence) { + case SEEK_SET: + pos = offset; + break; + case SEEK_CUR: + pos = ms->position + offset; + break; + case SEEK_END: + pos = ms->size + offset; + break; + default: + errno= EINVAL; + return -1; + } + if (pos >= (fpos_t)ms->capacity && memstream_grow(ms, pos) < 0) + return -1; + ms->position = pos; + if (ms->size < ms->position) + *ms->sizeloc = ms->size = ms->position; + return pos; +} + +static int memstream_close(void *cookie) +{ + struct memstream *ms = cookie; + + ms->size = min(ms->size, ms->position); + *ms->ptr = ms->contents; + *ms->sizeloc = ms->size; + ms->contents[ms->size]= 0; + /* ms->contents is what's returned */ + free(ms); + return 0; +} + +FILE *open_memstream(char **ptr, size_t *sizeloc) +{ + struct memstream *ms; + FILE *fp; + + if (!ptr || !sizeloc) { + errno= EINVAL; + goto err_out; + } + + ms = calloc(1, sizeof(struct memstream)); + if (!ms) + goto err_out; + + ms->position = ms->size= 0; + ms->capacity = 4096; + ms->contents = calloc(ms->capacity, 1); + if (!ms->contents) + goto err_free_ms; + ms->ptr = ptr; + ms->sizeloc = sizeloc; + fp= funopen(ms, memstream_read, memstream_write, + memstream_seek, memstream_close); + if (!fp) + goto err_free_all; + *ptr = ms->contents; + *sizeloc = ms->size; + return fp; + +err_free_all: + free(ms->contents); +err_free_ms: + free(ms); +err_out: + return NULL; +} + +#endif /* __APPLE__ && _POSIX_C_SOURCE < 200809L */ + +bool fy_tag_uri_is_valid(const char *data, size_t len) +{ + const char *s, *e; + int w, j, k, width, c; + uint8_t octet, esc_octets[4]; + + s = data; + e = s + len; + + while ((c = fy_utf8_get(s, e - s, &w)) >= 0) { + if (c != '%') { + s += w; + continue; + } + + width = 0; + k = 0; + do { + /* short URI escape */ + if ((e - s) < 3) + return false; + + if (width > 0) { + c = fy_utf8_get(s, e - s, &w); + if (c != '%') + return false; + } + + s += w; + + octet = 0; + + for (j = 0; j < 2; j++) { + c = fy_utf8_get(s, e - s, &w); + if (!fy_is_hex(c)) + return false; + s += w; + + octet <<= 4; + if (c >= '0' && c <= '9') + octet |= c - '0'; + else if (c >= 'a' && c <= 'f') + octet |= 10 + c - 'a'; + else + octet |= 10 + c - 'A'; + } + if (!width) { + width = fy_utf8_width_by_first_octet(octet); + + if (width < 1 || width > 4) + return false; + k = 0; + } + esc_octets[k++] = octet; + + } while (--width > 0); + + /* now convert to utf8 */ + c = fy_utf8_get(esc_octets, k, &w); + + if (c < 0) + return false; + } + + return true; +} + +int fy_tag_handle_length(const char *data, size_t len) +{ + const char *s, *e; + int c, w; + + s = data; + e = s + len; + + c = fy_utf8_get(s, e - s, &w); + if (c != '!') + return -1; + s += w; + + c = fy_utf8_get(s, e - s, &w); + if (fy_is_ws(c)) + return s - data; + /* if first character is !, empty handle */ + if (c == '!') { + s += w; + return s - data; + } + if (!fy_is_first_alpha(c)) + return -1; + s += w; + while (fy_is_alnum(c = fy_utf8_get(s, e - s, &w))) + s += w; + if (c == '!') + s += w; + + return s - data; +} + +int fy_tag_uri_length(const char *data, size_t len) +{ + const char *s, *e; + int c, w, cn, wn, uri_length; + + s = data; + e = s + len; + + while (fy_is_uri(c = fy_utf8_get(s, e - s, &w))) { + cn = fy_utf8_get(s + w, e - (s + w), &wn); + if ((fy_is_z(cn) || fy_is_blank(cn) || fy_is_any_lb(cn)) && fy_utf8_strchr(",}]", c)) + break; + s += w; + } + uri_length = s - data; + + if (!fy_tag_uri_is_valid(data, uri_length)) + return -1; + + return uri_length; +} + +int fy_tag_scan(const char *data, size_t len, struct fy_tag_scan_info *info) +{ + const char *s, *e; + int total_length, handle_length, uri_length, prefix_length, suffix_length; + int c, cn, w, wn; + + s = data; + e = s + len; + + prefix_length = 0; + + /* it must start with '!' */ + c = fy_utf8_get(s, e - s, &w); + if (c != '!') + return -1; + cn = fy_utf8_get(s + w, e - (s + w), &wn); + if (cn == '<') { + prefix_length = 2; + suffix_length = 1; + } else + prefix_length = suffix_length = 0; + + if (prefix_length) { + handle_length = 0; /* set the handle to '' */ + s += prefix_length; + } else { + /* either !suffix or !handle!suffix */ + /* we scan back to back, and split handle/suffix */ + handle_length = fy_tag_handle_length(s, e - s); + if (handle_length <= 0) + return -1; + s += handle_length; + } + + uri_length = fy_tag_uri_length(s, e - s); + if (uri_length < 0) + return -1; + + /* a handle? */ + if (!prefix_length && (handle_length == 0 || data[handle_length - 1] != '!')) { + /* special case, '!', handle set to '' and suffix to '!' */ + if (handle_length == 1 && uri_length == 0) { + handle_length = 0; + uri_length = 1; + } else { + uri_length = handle_length - 1 + uri_length; + handle_length = 1; + } + } + total_length = prefix_length + handle_length + uri_length + suffix_length; + + if (total_length != (int)len) + return -1; + + info->total_length = total_length; + info->handle_length = handle_length; + info->uri_length = uri_length; + info->prefix_length = prefix_length; + info->suffix_length = suffix_length; + + return 0; +} + +#if defined (__unix__) || (defined (__APPLE__) && defined (__MACH__)) +/* simple terminal methods; mainly for getting size of terminal */ +int fy_term_set_raw(int fd, struct termios *oldt) +{ + struct termios newt, t; + int ret; + + /* must be a terminal */ + if (!isatty(fd)) + return -1; + + ret = tcgetattr(fd, &t); + if (ret != 0) + return ret; + + newt = t; + + cfmakeraw(&newt); + + ret = tcsetattr(fd, TCSANOW, &newt); + if (ret != 0) + return ret; + + if (oldt) + *oldt = t; + + return 0; +} + +int fy_term_restore(int fd, const struct termios *oldt) +{ + /* must be a terminal */ + if (!isatty(fd)) + return -1; + + return tcsetattr(fd, TCSANOW, oldt); +} + +ssize_t fy_term_write(int fd, const void *data, size_t count) +{ + ssize_t wrn, r; + + if (!isatty(fd)) + return -1; + r = 0; + wrn = 0; + while (count > 0) { + do { + r = write(fd, data, count); + } while (r == -1 && errno == EAGAIN); + if (r < 0) + break; + wrn += r; + data += r; + count -= r; + } + + /* return the amount written, or the last error code */ + return wrn > 0 ? wrn : r; +} + +int fy_term_safe_write(int fd, const void *data, size_t count) +{ + if (!isatty(fd)) + return -1; + + return fy_term_write(fd, data, count) == (ssize_t)count ? 0 : -1; +} + +ssize_t fy_term_read(int fd, void *data, size_t count, int timeout_us) +{ + ssize_t rdn, r; + struct timeval tv, tvto, *tvp; + fd_set rdfds; + + if (!isatty(fd)) + return -1; + + FD_ZERO(&rdfds); + + memset(&tvto, 0, sizeof(tvto)); + memset(&tv, 0, sizeof(tv)); + + if (timeout_us >= 0) { + tvto.tv_sec = timeout_us / 1000000; + tvto.tv_usec = timeout_us % 1000000; + tvp = &tv; + } else { + tvp = NULL; + } + + r = 0; + rdn = 0; + while (count > 0) { + do { + FD_SET(fd, &rdfds); + if (tvp) + *tvp = tvto; + r = select(fd + 1, &rdfds, NULL, NULL, tvp); + } while (r == -1 && errno == EAGAIN); + + /* select ends, or something weird */ + if (r <= 0 || !FD_ISSET(fd, &rdfds)) + break; + + /* now read */ + do { + r = read(fd, data, count); + } while (r == -1 && errno == EAGAIN); + if (r < 0) + break; + + rdn += r; + data += r; + count -= r; + } + + /* return the amount written, or the last error code */ + return rdn > 0 ? rdn : r; +} + +ssize_t fy_term_read_escape(int fd, void *buf, size_t count) +{ + char *p; + int r, rdn; + char c; + + /* at least 3 characters */ + if (count < 3) + return -1; + + p = buf; + rdn = 0; + + /* ESC */ + r = fy_term_read(fd, &c, 1, 100 * 1000); + if (r != 1 || c != '\x1b') + return -1; + *p++ = c; + count--; + rdn++; + + /* [ */ + r = fy_term_read(fd, &c, 1, 100 * 1000); + if (r != 1 || c != '[') + return rdn; + *p++ = c; + count--; + rdn++; + + /* read until error, out of buffer, or < 0x40 || > 0x7e */ + r = -1; + while (count > 0) { + r = fy_term_read(fd, &c, 1, 100 * 1000); + if (r != 1) + r = -1; + if (r != 1) + break; + *p++ = c; + count--; + rdn++; + + /* end of escape */ + if (c >= 0x40 && c <= 0x7e) + break; + } + + return rdn; +} + +int fy_term_query_size_raw(int fd, int *rows, int *cols) +{ + char buf[32]; + char *s, *e; + ssize_t r; + + /* must be a terminal */ + if (!isatty(fd)) + return -1; + + *rows = *cols = 0; + + /* query text area */ + r = fy_term_safe_write(fd, "\x1b[18t", 5); + if (r != 0) + return r; + + /* read a character */ + r = fy_term_read_escape(fd, buf, sizeof(buf)); + + /* return must be ESC[8;<height>;<width>;t */ + + if (r < 8 || r >= (int)sizeof(buf) - 2) /* minimum ESC[8;1;1t */ + return -1; + + s = buf; + e = s + r; + + /* correct response? starts with ESC[8; */ + if (s[0] != '\x1b' || s[1] != '[' || s[2] != '8' || s[3] != ';') + return -1; + s += 4; + + /* must end with t */ + if (e[-1] != 't') + return -1; + *--e = '\0'; /* remove trailing t, and zero terminate */ + + /* scan two ints separated by ; */ + r = sscanf(s, "%d;%d", rows, cols); + if (r != 2) + return -1; + + return 0; +} + +int fy_term_query_size(int fd, int *rows, int *cols) +{ + struct termios old_term; + int ret, r; + + if (!isatty(fd)) + return -1; + + r = fy_term_set_raw(fd, &old_term); + if (r != 0) + return -1; + + ret = fy_term_query_size_raw(fd, rows, cols); + + r = fy_term_restore(fd, &old_term); + if (r != 0) + return -1; + + return ret; +} +#endif diff --git a/contrib/libs/libfyaml/src/lib/fy-utils.h b/contrib/libs/libfyaml/src/lib/fy-utils.h new file mode 100644 index 0000000000..dd99125e5e --- /dev/null +++ b/contrib/libs/libfyaml/src/lib/fy-utils.h @@ -0,0 +1,77 @@ +/* + * fy-utils.h - internal utilities header file + * + * Copyright (c) 2019 Pantelis Antoniou <pantelis.antoniou@konsulko.com> + * + * SPDX-License-Identifier: MIT + */ + +#ifndef FY_UTILS_H +#define FY_UTILS_H + +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif + +#include <stdio.h> +#include <stdbool.h> +#if defined (__unix__) || (defined (__APPLE__) && defined (__MACH__)) +#include <unistd.h> +#include <termios.h> +#endif + +#if defined(__APPLE__) && (_POSIX_C_SOURCE < 200809L) +FILE *open_memstream(char **ptr, size_t *sizeloc); +#endif + +int fy_get_pagesize(); + +#if defined(_MSC_VER) +int vasprintf(char **strp, const char *fmt, va_list ap); +int asprintf(char **strp, const char *fmt, ...); +#endif + +int fy_tag_handle_length(const char *data, size_t len); +bool fy_tag_uri_is_valid(const char *data, size_t len); +int fy_tag_uri_length(const char *data, size_t len); + +struct fy_tag_scan_info { + int total_length; + int handle_length; + int uri_length; + int prefix_length; + int suffix_length; +}; + +int fy_tag_scan(const char *data, size_t len, struct fy_tag_scan_info *info); + +#ifndef ARRAY_SIZE +#define ARRAY_SIZE(x) ((sizeof(x)/sizeof((x)[0]))) +#endif + +#if !defined(NDEBUG) && (defined(__GNUC__) && __GNUC__ >= 4) +#define FY_ALWAYS_INLINE __attribute__((always_inline)) +#else +#define FY_ALWAYS_INLINE /* nothing */ +#endif + +#if defined(__GNUC__) && __GNUC__ >= 4 +#define FY_UNUSED __attribute__((unused)) +#else +#define FY_UNUSED /* nothing */ +#endif + +int fy_term_set_raw(int fd, struct termios *oldt); +int fy_term_restore(int fd, const struct termios *oldt); +ssize_t fy_term_write(int fd, const void *data, size_t count); +int fy_term_safe_write(int fd, const void *data, size_t count); +ssize_t fy_term_read(int fd, void *data, size_t count, int timeout_us); +ssize_t fy_term_read_escape(int fd, void *buf, size_t count); + +/* the raw methods require the terminal to be in raw mode */ +int fy_term_query_size_raw(int fd, int *rows, int *cols); + +/* the non raw methods will set the terminal to raw and then restore */ +int fy_term_query_size(int fd, int *rows, int *cols); + +#endif diff --git a/contrib/libs/libfyaml/src/lib/fy-walk.c b/contrib/libs/libfyaml/src/lib/fy-walk.c new file mode 100644 index 0000000000..d76125071e --- /dev/null +++ b/contrib/libs/libfyaml/src/lib/fy-walk.c @@ -0,0 +1,5236 @@ +/* + * fy-walk.c - path walker + * + * Copyright (c) 2021 Pantelis Antoniou <pantelis.antoniou@konsulko.com> + * + * SPDX-License-Identifier: MIT + */ +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif + +#include <stdio.h> +#include <string.h> +#include <assert.h> +#include <stdlib.h> +#include <ctype.h> +#include <errno.h> +#if defined (__unix__) || (defined (__APPLE__) && defined (__MACH__)) +#include <unistd.h> +#endif +#include <math.h> +#include <limits.h> + +#include <libfyaml.h> + +#include "fy-parse.h" +#include "fy-doc.h" +#include "fy-walk.h" + +#include "fy-utils.h" + +#undef DEBUG_EXPR +// #define DEBUG_EXPR + +const char *fy_walk_result_type_txt[FWRT_COUNT] = { + [fwrt_none] = "none", + [fwrt_node_ref] = "node-ref", + [fwrt_number] = "number", + [fwrt_string] = "string", + [fwrt_doc] = "doc", + [fwrt_refs] = "refs", +}; + +void fy_walk_result_dump(struct fy_walk_result *fwr, struct fy_diag *diag, enum fy_error_type errlevel, int level, + const char *fmt, ...); + +void fy_walk_result_vdump(struct fy_walk_result *fwr, struct fy_diag *diag, enum fy_error_type errlevel, int level, + const char *fmt, va_list ap) +{ + struct fy_walk_result *fwr2; + char *banner; + char *texta = NULL; + const char *text = ""; + size_t len; + bool save_on_error; + char buf[30]; + int rc __FY_DEBUG_UNUSED__; + + if (!diag) + return; + + if (errlevel < diag->cfg.level) + return; + + save_on_error = diag->on_error; + diag->on_error = true; + + if (fmt) { + banner = NULL; + rc = vasprintf(&banner, fmt, ap); + assert(rc != -1); + assert(banner); + fy_diag_diag(diag, errlevel, "%-*s%s", level*2, "", banner); + free(banner); + } + if (!fwr) + goto out; + + switch (fwr->type) { + case fwrt_none: + text=""; + break; + case fwrt_node_ref: + texta = fy_node_get_path(fwr->fyn); + assert(texta); + text = texta; + break; + case fwrt_number: + snprintf(buf, sizeof(buf), "%f", fwr->number); + text = buf; + break; + case fwrt_string: + text = fwr->string; + break; + case fwrt_doc: + texta = fy_emit_document_to_string(fwr->fyd, FYECF_WIDTH_INF | FYECF_INDENT_DEFAULT | FYECF_MODE_FLOW_ONELINE); + assert(texta); + text = texta; + break; + case fwrt_refs: + text=""; + break; + } + + len = strlen(text); + + fy_diag_diag(diag, errlevel, "%-*s%s%s%.*s", + (level + 1) * 2, "", + fy_walk_result_type_txt[fwr->type], + len ? " " : "", + (int)len, text); + + if (texta) + free(texta); + + if (fwr->type == fwrt_refs) { + for (fwr2 = fy_walk_result_list_head(&fwr->refs); fwr2; fwr2 = fy_walk_result_next(&fwr->refs, fwr2)) + fy_walk_result_dump(fwr2, diag, errlevel, level + 1, NULL); + } +out: + diag->on_error = save_on_error; +} + +void fy_walk_result_dump(struct fy_walk_result *fwr, struct fy_diag *diag, enum fy_error_type errlevel, int level, + const char *fmt, ...) +{ + va_list ap; + + va_start(ap, fmt); + fy_walk_result_vdump(fwr, diag, errlevel, level, fmt, ap); + va_end(ap); +} + +/* NOTE that walk results do not take references and it is invalid to + * use _any_ call that modifies the document structure + */ +struct fy_walk_result *fy_walk_result_alloc_rl(struct fy_walk_result_list *fwrl) +{ + struct fy_walk_result *fwr = NULL; + + if (fwrl) + fwr = fy_walk_result_list_pop(fwrl); + + if (!fwr) { + fwr = malloc(sizeof(*fwr)); + if (!fwr) + return NULL; + memset(fwr, 0, sizeof(*fwr)); + } + fwr->type = fwrt_none; + return fwr; +} + +struct fy_walk_result *fy_walk_result_clone_rl(struct fy_walk_result_list *fwrl, struct fy_walk_result *fwr) +{ + struct fy_walk_result *fwrn = NULL, *fwrn2 = NULL, *fwrn3; + + if (!fwr) + return NULL; + + fwrn = fy_walk_result_alloc_rl(fwrl); + if (!fwrn) + return NULL; + + fwrn->type = fwr->type; + switch (fwr->type) { + case fwrt_none: + break; + case fwrt_node_ref: + fwrn->fyn = fwr->fyn; + break; + case fwrt_number: + fwrn->number = fwr->number; + break; + case fwrt_string: + fwrn->string = strdup(fwr->string); + if (!fwrn->string) + goto err_out; + break; + case fwrt_doc: + fwrn->fyd = fy_document_clone(fwr->fyd); + if (!fwrn->fyd) + goto err_out; + break; + case fwrt_refs: + + fy_walk_result_list_init(&fwrn->refs); + + for (fwrn2 = fy_walk_result_list_head(&fwr->refs); fwrn2; + fwrn2 = fy_walk_result_next(&fwr->refs, fwrn2)) { + + fwrn3 = fy_walk_result_clone_rl(fwrl, fwrn2); + if (!fwrn3) + goto err_out; + + fy_walk_result_list_add_tail(&fwrn->refs, fwrn3); + } + break; + } + return fwrn; +err_out: + if (fwrn) + fy_walk_result_free_rl(fwrl, fwrn); + return NULL; +} + +struct fy_walk_result *fy_walk_result_clone(struct fy_walk_result *fwr) +{ + struct fy_walk_result_list *fwrl; + + if (!fwr) + return NULL; + + fwrl = fy_path_exec_walk_result_rl(fwr->fypx); + return fy_walk_result_clone_rl(fwrl, fwr); +} + +void fy_walk_result_clean_rl(struct fy_walk_result_list *fwrl, struct fy_walk_result *fwr) +{ + struct fy_walk_result *fwrn; + + if (!fwr) + return; + + switch (fwr->type) { + case fwrt_none: + break; + case fwrt_node_ref: + case fwrt_number: + break; + case fwrt_string: + if (fwr->string) + free(fwr->string); + break; + case fwrt_doc: + if (fwr->fyd) + fy_document_destroy(fwr->fyd); + break; + case fwrt_refs: + while ((fwrn = fy_walk_result_list_pop(&fwr->refs)) != NULL) + fy_walk_result_free_rl(fwrl, fwrn); + break; + } + + fwr->type = fwrt_none; +} + +void fy_walk_result_clean(struct fy_walk_result *fwr) +{ + struct fy_walk_result_list *fwrl; + + if (!fwr) + return; + + fwrl = fy_path_exec_walk_result_rl(fwr->fypx); + fy_walk_result_clean_rl(fwrl, fwr); +} + +void fy_walk_result_free_rl(struct fy_walk_result_list *fwrl, struct fy_walk_result *fwr) +{ + struct fy_path_exec *fypx; + + if (!fwr) + return; + + fypx = fwr->fypx; + + fy_walk_result_clean_rl(fwrl, fwr); + + if (fwrl) + fy_walk_result_list_push(fwrl, fwr); + else + free(fwr); + + fy_path_exec_unref(fypx); /* NULL is OK */ +} + +void fy_walk_result_free(struct fy_walk_result *fwr) +{ + struct fy_walk_result_list *fwrl; + + if (!fwr) + return; + + fwrl = fy_path_exec_walk_result_rl(fwr->fypx); + fy_walk_result_free_rl(fwrl, fwr); +} + +void fy_walk_result_list_free_rl(struct fy_walk_result_list *fwrl, struct fy_walk_result_list *results) +{ + struct fy_walk_result *fwr; + + while ((fwr = fy_walk_result_list_pop(results)) != NULL) + fy_walk_result_free_rl(fwrl, fwr); +} + +struct fy_walk_result *fy_walk_result_vcreate_rl(struct fy_walk_result_list *fwrl, enum fy_walk_result_type type, va_list ap) +{ + struct fy_walk_result *fwr = NULL; + + if ((unsigned int)type >= FWRT_COUNT) + goto err_out; + + fwr = fy_walk_result_alloc_rl(fwrl); + if (!fwr) + goto err_out; + + fwr->type = type; + + switch (fwr->type) { + case fwrt_none: + break; + case fwrt_node_ref: + fwr->fyn = va_arg(ap, struct fy_node *); + break; + case fwrt_number: + fwr->number = va_arg(ap, double); + break; + case fwrt_string: + fwr->string = strdup(va_arg(ap, const char *)); + if (!fwr->string) + goto err_out; + break; + case fwrt_doc: + fwr->fyd = va_arg(ap, struct fy_document *); + break; + case fwrt_refs: + fy_walk_result_list_init(&fwr->refs); + break; + } + + return fwr; + +err_out: + fy_walk_result_free_rl(fwrl, fwr); + + return NULL; +} + +struct fy_walk_result *fy_walk_result_create_rl(struct fy_walk_result_list *fwrl, enum fy_walk_result_type type, ...) +{ + struct fy_walk_result *fwr; + va_list ap; + + va_start(ap, type); + fwr = fy_walk_result_vcreate_rl(fwrl, type, ap); + va_end(ap); + + return fwr; +} + +void fy_walk_result_flatten_internal(struct fy_walk_result *fwr, struct fy_walk_result *fwrf) +{ + struct fy_walk_result *fwr2, *fwr2n; + + if (!fwr || !fwrf || fwr->type != fwrt_refs) + return; + + for (fwr2 = fy_walk_result_list_head(&fwr->refs); fwr2; fwr2 = fwr2n) { + + fwr2n = fy_walk_result_next(&fwr->refs, fwr2); + + if (fwr2->type != fwrt_refs) { + fy_walk_result_list_del(&fwr->refs, fwr2); + fy_walk_result_list_add_tail(&fwrf->refs, fwr2); + continue; + } + fy_walk_result_flatten_internal(fwr2, fwrf); + } +} + +bool fy_walk_result_has_leaves_only(struct fy_walk_result *fwr) +{ + struct fy_walk_result *fwrn; + + if (!fwr || fwr->type != fwrt_refs) + return false; + + if (fy_walk_result_list_empty(&fwr->refs)) + return false; + + for (fwrn = fy_walk_result_list_head(&fwr->refs); fwrn; + fwrn = fy_walk_result_next(&fwr->refs, fwrn)) { + + if (fwrn->type == fwrt_refs) + return false; + } + + return true; +} + +struct fy_walk_result * +fy_walk_result_flatten_rl(struct fy_walk_result_list *fwrl, struct fy_walk_result *fwr) +{ + struct fy_walk_result *fwrf; + + if (!fwr) + return NULL; + + fwrf = fy_walk_result_create_rl(fwrl, fwrt_refs); + assert(fwrf); + + fy_walk_result_flatten_internal(fwr, fwrf); + fy_walk_result_free_rl(fwrl, fwr); + return fwrf; +} + +struct fy_walk_result * +fy_walk_result_flatten(struct fy_walk_result *fwr) +{ + struct fy_walk_result_list *fwrl; + + if (!fwr) + return NULL; + + fwrl = fy_path_exec_walk_result_rl(fwr->fypx); + return fy_walk_result_flatten_rl(fwrl, fwr); +} + +struct fy_node * +fy_walk_result_node_iterate(struct fy_walk_result *fwr, void **prevp) +{ + struct fy_walk_result *fwrn; + + if (!fwr || !prevp) + return NULL; + + switch (fwr->type) { + case fwrt_node_ref: + if (!*prevp) { + *prevp = fwr; + return fwr->fyn; + } + *prevp = NULL; + return NULL; + + case fwrt_refs: + if (!*prevp) + fwrn = fy_walk_result_list_head(&fwr->refs); + else + fwrn = fy_walk_result_next(&fwr->refs, *prevp); + + /* skip over any non node refs */ + while (fwrn && fwrn->type != fwrt_node_ref) + fwrn = fy_walk_result_next(&fwr->refs, fwrn); + *prevp = fwrn; + return fwrn ? fwrn->fyn : NULL; + + default: + break; + } + + return NULL; +} + +const char *fy_path_expr_type_txt[FPET_COUNT] = { + [fpet_none] = "none", + /* */ + [fpet_root] = "root", + [fpet_this] = "this", + [fpet_parent] = "parent", + [fpet_every_child] = "every-child", + [fpet_every_child_r] = "every-child-recursive", + [fpet_filter_collection] = "filter-collection", + [fpet_filter_scalar] = "filter-scalar", + [fpet_filter_sequence] = "filter-sequence", + [fpet_filter_mapping] = "filter-mapping", + [fpet_filter_unique] = "filter-unique", + [fpet_seq_index] = "seq-index", + [fpet_seq_slice] = "seq-slice", + [fpet_alias] = "alias", + + [fpet_map_key] = "map-key", + + [fpet_multi] = "multi", + [fpet_chain] = "chain", + [fpet_logical_or] = "logical-or", + [fpet_logical_and] = "logical-and", + + [fpet_eq] = "equals", + [fpet_neq] = "not-equals", + [fpet_lt] = "less-than", + [fpet_gt] = "greater-than", + [fpet_lte] = "less-or-equal-than", + [fpet_gte] = "greater-or-equal-than", + + [fpet_scalar] = "scalar", + + [fpet_plus] = "plus", + [fpet_minus] = "minus", + [fpet_mult] = "multiply", + [fpet_div] = "divide", + + [fpet_lparen] = "left-parentheses", + [fpet_rparen] = "right-parentheses", + [fpet_method] = "method", + + [fpet_scalar_expr] = "scalar-expression", + [fpet_path_expr] = "path-expression", + [fpet_arg_separator] = "argument-separator", +}; + +struct fy_path_expr *fy_path_expr_alloc(void) +{ + struct fy_path_expr *expr = NULL; + + expr = malloc(sizeof(*expr)); + if (!expr) + return NULL; + memset(expr, 0, sizeof(*expr)); + fy_path_expr_list_init(&expr->children); + + return expr; +} + +void fy_path_expr_free(struct fy_path_expr *expr) +{ + struct fy_path_expr *exprn; + + if (!expr) + return; + + while ((exprn = fy_path_expr_list_pop(&expr->children)) != NULL) + fy_path_expr_free(exprn); + + fy_token_unref(expr->fyt); + + free(expr); +} + +struct fy_path_expr *fy_path_expr_alloc_recycle(struct fy_path_parser *fypp) +{ + struct fy_path_expr *expr = NULL; + + if (!fypp || fypp->suppress_recycling) + expr = fy_path_expr_alloc(); + + if (!expr) { + expr = fy_path_expr_list_pop(&fypp->expr_recycle); + if (expr) { + memset(expr, 0, sizeof(*expr)); + fy_path_expr_list_init(&expr->children); + } else + expr = fy_path_expr_alloc(); + } + + if (!expr) + return NULL; + + expr->expr_mode = fypp->expr_mode; + + return expr; +} + +void fy_path_expr_free_recycle(struct fy_path_parser *fypp, struct fy_path_expr *expr) +{ + struct fy_path_expr *exprn; + + if (!fypp || fypp->suppress_recycling) { + fy_path_expr_free(expr); + return; + } + + while ((exprn = fy_path_expr_list_pop(&expr->children)) != NULL) + fy_path_expr_free_recycle(fypp, exprn); + + if (expr->fyt) { + fy_token_unref(expr->fyt); + expr->fyt = NULL; + } + fy_path_expr_list_add_tail(&fypp->expr_recycle, expr); +} + +void fy_expr_stack_setup(struct fy_expr_stack *stack) +{ + if (!stack) + return; + + memset(stack, 0, sizeof(*stack)); + stack->items = stack->items_static; + stack->alloc = ARRAY_SIZE(stack->items_static); +} + +void fy_expr_stack_cleanup(struct fy_expr_stack *stack) +{ + if (!stack) + return; + + while (stack->top > 0) + fy_path_expr_free(stack->items[--stack->top]); + + if (stack->items != stack->items_static) + free(stack->items); + stack->items = stack->items_static; + stack->alloc = ARRAY_SIZE(stack->items_static); +} + +void fy_expr_stack_dump(struct fy_diag *diag, struct fy_expr_stack *stack) +{ + struct fy_path_expr *expr; + unsigned int i; + + if (!stack) + return; + + if (!stack->top) + return; + + i = stack->top; + do { + expr = stack->items[--i]; + fy_path_expr_dump(expr, diag, FYET_NOTICE, 0, NULL); + } while (i > 0); +} + +int fy_expr_stack_size(struct fy_expr_stack *stack) +{ + if (!stack || stack->top >= (unsigned int)INT_MAX) + return -1; + return (int)stack->top; +} + +int fy_expr_stack_push(struct fy_expr_stack *stack, struct fy_path_expr *expr) +{ + struct fy_path_expr **items_new; + unsigned int alloc; + size_t size; + + if (!stack || !expr) + return -1; + + assert(stack->items); + assert(stack->alloc > 0); + + assert(expr->fyt); + + /* grow the stack if required */ + if (stack->top >= stack->alloc) { + alloc = stack->alloc; + size = alloc * sizeof(*items_new); + + if (stack->items == stack->items_static) { + items_new = malloc(size * 2); + if (items_new) + memcpy(items_new, stack->items_static, size); + } else + items_new = realloc(stack->items, size * 2); + + if (!items_new) + return -1; + + stack->alloc = alloc * 2; + stack->items = items_new; + } + + stack->items[stack->top++] = expr; + + return 0; +} + +struct fy_path_expr *fy_expr_stack_peek_at(struct fy_expr_stack *stack, unsigned int pos) +{ + if (!stack || stack->top <= pos) + return NULL; + return stack->items[stack->top - 1 - pos]; +} + +struct fy_path_expr *fy_expr_stack_peek(struct fy_expr_stack *stack) +{ + return fy_expr_stack_peek_at(stack, 0); +} + +struct fy_path_expr *fy_expr_stack_pop(struct fy_expr_stack *stack) +{ + if (!stack || !stack->top) + return NULL; + + return stack->items[--stack->top]; +} + +bool fy_token_type_can_be_path_expr(enum fy_token_type type) +{ + return type == FYTT_NONE || + type == FYTT_PE_LPAREN || + type == FYTT_PE_RPAREN || + type == FYTT_PE_EQEQ || + type == FYTT_PE_NOTEQ || + type == FYTT_PE_GT || + type == FYTT_PE_LT || + type == FYTT_PE_GTE || + type == FYTT_PE_LTE || + type == FYTT_PE_METHOD; +} + +bool fy_token_type_can_be_before_negative_number(enum fy_token_type type) +{ + return type == FYTT_NONE || + type == FYTT_PE_LPAREN || + type == FYTT_PE_RPAREN || + type == FYTT_PE_EQEQ || + type == FYTT_PE_NOTEQ || + type == FYTT_PE_GT || + type == FYTT_PE_LT || + type == FYTT_PE_GTE || + type == FYTT_PE_LTE || + type == FYTT_SE_PLUS || + type == FYTT_SE_MINUS || + type == FYTT_SE_MULT || + type == FYTT_SE_DIV || + + type == FYTT_SE_METHOD; +} + +const char *fy_expr_mode_txt[FYEM_COUNT] = { + [fyem_none] = "none", + [fyem_path] = "path", + [fyem_scalar] = "scalar", +}; + +static struct fy_diag *fy_path_parser_reader_get_diag(struct fy_reader *fyr) +{ + struct fy_path_parser *fypp = fy_container_of(fyr, struct fy_path_parser, reader); + return fypp->cfg.diag; +} + +static const struct fy_reader_ops fy_path_parser_reader_ops = { + .get_diag = fy_path_parser_reader_get_diag, +}; + +void fy_path_parser_setup(struct fy_path_parser *fypp, const struct fy_path_parse_cfg *pcfg) +{ + if (!fypp) + return; + + memset(fypp, 0, sizeof(*fypp)); + if (pcfg) + fypp->cfg = *pcfg; + fy_reader_setup(&fypp->reader, &fy_path_parser_reader_ops); + fy_token_list_init(&fypp->queued_tokens); + fypp->last_queued_token_type = FYTT_NONE; + + fy_expr_stack_setup(&fypp->operators); + fy_expr_stack_setup(&fypp->operands); + + fy_path_expr_list_init(&fypp->expr_recycle); + fypp->suppress_recycling = (fypp->cfg.flags & FYPPCF_DISABLE_RECYCLING) || getenv("FY_VALGRIND"); + + fypp->expr_mode = fyem_path; + fypp->paren_nest_level = 0; +} + +void fy_path_parser_cleanup(struct fy_path_parser *fypp) +{ + struct fy_path_expr *expr; + + if (!fypp) + return; + + fy_expr_stack_cleanup(&fypp->operands); + fy_expr_stack_cleanup(&fypp->operators); + + fy_reader_cleanup(&fypp->reader); + fy_token_list_unref_all(&fypp->queued_tokens); + + while ((expr = fy_path_expr_list_pop(&fypp->expr_recycle)) != NULL) + fy_path_expr_free(expr); + + fypp->last_queued_token_type = FYTT_NONE; + fypp->stream_start_produced = false; + fypp->stream_end_produced = false; + fypp->stream_error = false; + fypp->token_activity_counter = 0; + fypp->paren_nest_level = 0; +} + +int fy_path_parser_open(struct fy_path_parser *fypp, + struct fy_input *fyi, const struct fy_reader_input_cfg *icfg) +{ + int ret; + if (!fypp) + return -1; + + ret = fy_reader_input_open(&fypp->reader, fyi, icfg); + if (ret) + return ret; + /* take a reference to the input */ + fypp->fyi = fy_input_ref(fyi); + return 0; +} + +void fy_path_parser_close(struct fy_path_parser *fypp) +{ + if (!fypp) + return; + + fy_input_unref(fypp->fyi); + + fy_reader_input_done(&fypp->reader); +} + +struct fy_token *fy_path_token_vqueue(struct fy_path_parser *fypp, enum fy_token_type type, va_list ap) +{ + struct fy_token *fyt; + + fyt = fy_token_list_vqueue(&fypp->queued_tokens, type, ap); + if (fyt) { + fypp->token_activity_counter++; + fypp->last_queued_token_type = type; + } + return fyt; +} + +struct fy_token *fy_path_token_queue(struct fy_path_parser *fypp, enum fy_token_type type, ...) +{ + va_list ap; + struct fy_token *fyt; + + va_start(ap, type); + fyt = fy_path_token_vqueue(fypp, type, ap); + va_end(ap); + + return fyt; +} + +int fy_path_fetch_seq_index_or_slice(struct fy_path_parser *fypp, int c) +{ + struct fy_reader *fyr; + struct fy_token *fyt; + bool neg; + int i, j, val, nval, digits, indices[2]; + + fyr = &fypp->reader; + + /* verify that the called context is correct */ + assert(fy_is_num(c) || (c == '-' && fy_is_num(fy_reader_peek_at(fyr, 1)))); + + i = 0; + indices[0] = indices[1] = -1; + + j = 0; + while (j < 2) { + + neg = false; + if (c == '-') { + neg = true; + i++; + } + + digits = 0; + val = 0; + while (fy_is_num((c = fy_reader_peek_at(fyr, i)))) { + nval = (val * 10) | (c - '0'); + FYR_PARSE_ERROR_CHECK(fyr, 0, i, FYEM_SCAN, + nval >= val && nval >= 0, err_out, + "illegal sequence index (overflow)"); + val = nval; + i++; + digits++; + } + FYR_PARSE_ERROR_CHECK(fyr, 0, i, FYEM_SCAN, + (val == 0 && digits == 1) || (val > 0), err_out, + "bad number"); + if (neg) + val = -val; + + indices[j] = val; + + /* continue only on slice : */ + if (c == ':') { + c = fy_reader_peek_at(fyr, i + 1); + if (fy_is_num(c) || (c == '-' && fy_is_num(fy_reader_peek_at(fyr, i + 2)))) { + i++; + j++; + continue; + } + } + + break; + } + + if (j >= 1) + fyt = fy_path_token_queue(fypp, FYTT_PE_SEQ_SLICE, fy_reader_fill_atom_a(fyr, i), indices[0], indices[1]); + else + fyt = fy_path_token_queue(fypp, FYTT_PE_SEQ_INDEX, fy_reader_fill_atom_a(fyr, i), indices[0]); + + fyr_error_check(fyr, fyt, err_out, "fy_path_token_queue() failed\n"); + + return 0; + +err_out: + fypp->stream_error = true; + return -1; +} + +int fy_path_fetch_plain_or_method(struct fy_path_parser *fypp, int c, + enum fy_token_type fytt_plain, + enum fy_token_type fytt_method) +{ + struct fy_reader *fyr; + struct fy_token *fyt; + struct fy_atom *handlep; + int i; + enum fy_token_type type; + + fyr = &fypp->reader; + + assert(fy_is_first_alpha(c)); + + type = fytt_plain; + + i = 1; + while (fy_is_alnum(fy_reader_peek_at(fyr, i))) + i++; + + if (fy_reader_peek_at(fyr, i) == '(') + type = fytt_method; + + handlep = fy_reader_fill_atom_a(fyr, i); + if (type == FYTT_SCALAR) { + fyt = fy_path_token_queue(fypp, FYTT_SCALAR, handlep, FYSS_PLAIN, NULL); + fyr_error_check(fyr, fyt, err_out, "fy_path_token_queue() failed\n"); + } else { + fyt = fy_path_token_queue(fypp, type, handlep, NULL); + fyr_error_check(fyr, fyt, err_out, "fy_path_token_queue() failed\n"); + } + + return 0; + +err_out: + fypp->stream_error = true; + return -1; +} + +int fy_path_fetch_dot_method(struct fy_path_parser *fypp, int c, enum fy_token_type fytt) +{ + struct fy_reader *fyr; + struct fy_token *fyt; + struct fy_atom *handlep; + int i; + + fyr = &fypp->reader; + + assert(c == '.'); + fy_reader_advance(fyr, c); + c = fy_reader_peek(fyr); + assert(fy_is_first_alpha(c)); + + /* verify that the called context is correct */ + i = 1; + while (fy_is_alnum(fy_reader_peek_at(fyr, i))) + i++; + + handlep = fy_reader_fill_atom_a(fyr, i); + + fyt = fy_path_token_queue(fypp, fytt, handlep, NULL); + fyr_error_check(fyr, fyt, err_out, "fy_path_token_queue() failed\n"); + + return 0; + +err_out: + fypp->stream_error = true; + return -1; +} + +int fy_path_fetch_flow_document(struct fy_path_parser *fypp, int c, enum fy_token_type fytt) +{ + struct fy_reader *fyr; + struct fy_token *fyt; + struct fy_document *fyd; + struct fy_atom handle; + struct fy_parser fyp_data, *fyp = &fyp_data; + struct fy_parse_cfg cfg_data, *cfg = NULL; + int rc; + + fyr = &fypp->reader; + + /* verify that the called context is correct */ + assert(fy_is_path_flow_key_start(c)); + + fy_reader_fill_atom_start(fyr, &handle); + + cfg = &cfg_data; + memset(cfg, 0, sizeof(*cfg)); + cfg->flags = FYPCF_DEFAULT_PARSE; + cfg->diag = fypp->cfg.diag; + + rc = fy_parse_setup(fyp, cfg); + fyr_error_check(fyr, !rc, err_out, "fy_parse_setup() failed\n"); + + /* associate with reader and set flow mode */ + fy_parser_set_reader(fyp, fyr); + fy_parser_set_flow_only_mode(fyp, true); + + fyd = fy_parse_load_document(fyp); + + /* cleanup the parser no matter what */ + fy_parse_cleanup(fyp); + + fyr_error_check(fyr, fyd, err_out, "fy_parse_load_document() failed\n"); + + fy_reader_fill_atom_end(fyr, &handle); + + /* document is NULL, is a simple key */ + fyt = fy_path_token_queue(fypp, fytt, &handle, fyd); + fyr_error_check(fyr, fyt, err_out, "fy_path_token_queue() failed\n"); + + return 0; + +err_out: + fypp->stream_error = true; + return -1; +} + +int fy_path_fetch_flow_map_key(struct fy_path_parser *fypp, int c) +{ + return fy_path_fetch_flow_document(fypp, c, FYTT_PE_MAP_KEY); +} + +int fy_path_fetch_flow_scalar(struct fy_path_parser *fypp, int c) +{ + struct fy_reader *fyr; + struct fy_token *fyt; + struct fy_atom handle; + bool is_single; + int rc = -1; + + fyr = &fypp->reader; + + /* verify that the called context is correct */ + assert(fy_is_path_flow_scalar_start(c)); + + is_single = c == '\''; + + rc = fy_reader_fetch_flow_scalar_handle(fyr, c, 0, &handle, false); + if (rc) + goto err_out_rc; + + /* document is NULL, is a simple key */ + fyt = fy_path_token_queue(fypp, FYTT_SCALAR, &handle, is_single ? FYSS_SINGLE_QUOTED : FYSS_DOUBLE_QUOTED); + fyr_error_check(fyr, fyt, err_out, "fy_path_token_queue() failed\n"); + + return 0; + +err_out: + rc = -1; +err_out_rc: + fypp->stream_error = true; + return rc; +} + +int fy_path_fetch_number(struct fy_path_parser *fypp, int c) +{ + struct fy_reader *fyr; + struct fy_token *fyt; + int i, digits; + + fyr = &fypp->reader; + + /* verify that the called context is correct */ + assert(fy_is_num(c) || (c == '-' && fy_is_num(fy_reader_peek_at(fyr, 1)))); + + i = 0; + if (c == '-') + i++; + + digits = 0; + while (fy_is_num((c = fy_reader_peek_at(fyr, i)))) { + i++; + digits++; + } + FYR_PARSE_ERROR_CHECK(fyr, 0, i, FYEM_SCAN, + digits > 0, err_out, + "bad number"); + + fyt = fy_path_token_queue(fypp, FYTT_SCALAR, fy_reader_fill_atom_a(fyr, i), FYSS_PLAIN); + fyr_error_check(fyr, fyt, err_out, "fy_path_token_queue() failed\n"); + + return 0; + +err_out: + fypp->stream_error = true; + return -1; +} + +int fy_path_fetch_tokens(struct fy_path_parser *fypp) +{ + enum fy_token_type type; + struct fy_token *fyt; + struct fy_reader *fyr; + int c, cn, rc, simple_token_count; + + fyr = &fypp->reader; + if (!fypp->stream_start_produced) { + + fyt = fy_path_token_queue(fypp, FYTT_STREAM_START, fy_reader_fill_atom_a(fyr, 0)); + fyr_error_check(fyr, fyt, err_out, "fy_path_token_queue() failed\n"); + + fypp->stream_start_produced = true; + return 0; + } + + /* XXX scan to next token? */ + + c = fy_reader_peek(fyr); + + if (fy_is_z(c)) { + + if (c >= 0) + fy_reader_advance(fyr, c); + + /* produce stream end continuously */ + fyt = fy_path_token_queue(fypp, FYTT_STREAM_END, fy_reader_fill_atom_a(fyr, 0)); + fyr_error_check(fyr, fyt, err_out, "fy_path_token_queue() failed\n"); + + return 0; + } + + fyt = NULL; + type = FYTT_NONE; + simple_token_count = 0; + + /* first do the common tokens */ + switch (c) { + case ',': + type = FYTT_PE_COMMA; + simple_token_count = 1; + break; + + case '|': + if (fy_reader_peek_at(fyr, 1) == '|') { + type = FYTT_PE_BARBAR; + simple_token_count = 2; + break; + } + break; + + case '&': + if (fy_reader_peek_at(fyr, 1) == '&') { + type = FYTT_PE_AMPAMP; + simple_token_count = 2; + break; + } + break; + + case '(': + type = FYTT_PE_LPAREN; + simple_token_count = 1; + break; + + case ')': + type = FYTT_PE_RPAREN; + simple_token_count = 1; + break; + + case '=': + cn = fy_reader_peek_at(fyr, 1); + if (cn == '=') { + type = FYTT_PE_EQEQ; + simple_token_count = 2; + break; + } + break; + + case '>': + cn = fy_reader_peek_at(fyr, 1); + if (cn == '=') { + type = FYTT_PE_GTE; + simple_token_count = 2; + break; + } + type = FYTT_PE_GT; + simple_token_count = 1; + break; + + case '<': + cn = fy_reader_peek_at(fyr, 1); + if (cn == '=') { + type = FYTT_PE_LTE; + simple_token_count = 2; + break; + } + type = FYTT_PE_LT; + simple_token_count = 1; + break; + + case '!': + cn = fy_reader_peek_at(fyr, 1); + if (cn == '=') { + type = FYTT_PE_NOTEQ; + simple_token_count = 2; + break; + } + /* may still be something else */ + break; + + default: + break; + } + + if (type != FYTT_NONE) + goto do_token; + +again: + + switch (fypp->expr_mode) { + case fyem_none: + assert(0); /* should never happen */ + break; + + case fyem_path: + + switch (c) { + case '/': + type = FYTT_PE_SLASH; + simple_token_count = 1; + break; + + case '^': + type = FYTT_PE_ROOT; + simple_token_count = 1; + break; + + case ':': + type = FYTT_PE_SIBLING; + simple_token_count = 1; + break; + + case '$': + type = FYTT_PE_SCALAR_FILTER; + simple_token_count = 1; + break; + + case '%': + type = FYTT_PE_COLLECTION_FILTER; + simple_token_count = 1; + break; + + case '[': + if (fy_reader_peek_at(fyr, 1) == ']') { + type = FYTT_PE_SEQ_FILTER; + simple_token_count = 2; + } + break; + + case '{': + if (fy_reader_peek_at(fyr, 1) == '}') { + type = FYTT_PE_MAP_FILTER; + simple_token_count = 2; + } + break; + + + case '.': + cn = fy_reader_peek_at(fyr, 1); + if (cn == '.') { + type = FYTT_PE_PARENT; + simple_token_count = 2; + } else if (!fy_is_first_alpha(cn)) { + type = FYTT_PE_THIS; + simple_token_count = 1; + } + break; + + case '*': + if (fy_reader_peek_at(fyr, 1) == '*') { + type = FYTT_PE_EVERY_CHILD_R; + simple_token_count = 2; + } else if (!fy_is_first_alpha(fy_reader_peek_at(fyr, 1))) { + type = FYTT_PE_EVERY_CHILD; + simple_token_count = 1; + } else { + type = FYTT_PE_ALIAS; + simple_token_count = 2; + while (fy_is_alnum(fy_reader_peek_at(fyr, simple_token_count))) + simple_token_count++; + } + break; + + case '!': + cn = fy_reader_peek_at(fyr, 1); + if (cn == '=') { + type = FYTT_PE_NOTEQ; + simple_token_count = 2; + break; + } + type = FYTT_PE_UNIQUE_FILTER; + simple_token_count = 1; + break; + + default: + break; + } + break; + + case fyem_scalar: + + + /* it is possible for the expression to be a path + * we only detect a few cases (doing all too complex) + * (/ , (./ , (* + */ + if (fy_token_type_can_be_path_expr(fypp->last_queued_token_type)) { + cn = fy_reader_peek_at(fyr, 1); + if (c == '/' || + (c == '.' && (cn == '/' || cn == ')' || cn == '>' || cn == '<' || cn == '!' || cn == '='))) { + fypp->expr_mode = fyem_path; +#ifdef DEBUG_EXPR + fyr_notice(fyr, "switching to path expr\n"); +#endif + goto again; + } + } + + switch (c) { + case '+': + type = FYTT_SE_PLUS; + simple_token_count = 1; + break; + + case '-': + cn = fy_reader_peek_at(fyr, 1); + if (fy_is_num(cn) && + fy_token_type_can_be_before_negative_number(fypp->last_queued_token_type)) + break; + + type = FYTT_SE_MINUS; + simple_token_count = 1; + break; + + case '*': + type = FYTT_SE_MULT; + simple_token_count = 1; + break; + + case '/': + type = FYTT_SE_DIV; + simple_token_count = 1; + break; + + default: + break; + } + break; + } + +do_token: + /* simple tokens */ + if (simple_token_count > 0) { + fyt = fy_path_token_queue(fypp, type, fy_reader_fill_atom_a(fyr, simple_token_count)); + fyr_error_check(fyr, fyt, err_out, "fy_path_token_queue() failed\n"); + + return 0; + } + + switch (fypp->expr_mode) { + case fyem_none: + assert(0); /* should never happen */ + break; + + case fyem_path: + if (fy_is_first_alpha(c)) + return fy_path_fetch_plain_or_method(fypp, c, FYTT_PE_MAP_KEY, FYTT_PE_METHOD); + + if (fy_is_path_flow_key_start(c)) + return fy_path_fetch_flow_map_key(fypp, c); + + if (fy_is_num(c) || (c == '-' && fy_is_num(fy_reader_peek_at(fyr, 1)))) + return fy_path_fetch_seq_index_or_slice(fypp, c); + + if (c == '.' && fy_is_first_alpha(fy_reader_peek_at(fyr, 1))) + return fy_path_fetch_dot_method(fypp, c, FYTT_PE_METHOD); + + break; + + case fyem_scalar: + + if (fy_is_first_alpha(c)) + return fy_path_fetch_plain_or_method(fypp, c, FYTT_SCALAR, FYTT_SE_METHOD); + + if (fy_is_path_flow_scalar_start(c)) + return fy_path_fetch_flow_scalar(fypp, c); + + if (fy_is_num(c) || (c == '-' && fy_is_num(fy_reader_peek_at(fyr, 1)))) + return fy_path_fetch_number(fypp, c); + +#if 0 + if (c == '.' && fy_is_first_alpha(fy_reader_peek_at(fyr, 1))) + return fy_path_fetch_dot_method(fypp, c, FYTT_SE_METHOD); +#endif + + break; + } + + FYR_PARSE_ERROR(fyr, 0, 1, FYEM_SCAN, "bad path expression starts here c=%d", c); + +err_out: + fypp->stream_error = true; + rc = -1; + return rc; +} + +struct fy_token *fy_path_scan_peek(struct fy_path_parser *fypp, struct fy_token *fyt_prev) +{ + struct fy_token *fyt; + struct fy_reader *fyr; + int rc, last_token_activity_counter; + + fyr = &fypp->reader; + + /* nothing if stream end produced (and no stream end token in queue) */ + if (!fyt_prev && fypp->stream_end_produced && fy_token_list_empty(&fypp->queued_tokens)) { + + fyt = fy_token_list_head(&fypp->queued_tokens); + if (fyt && fyt->type == FYTT_STREAM_END) + return fyt; + + return NULL; + } + + for (;;) { + if (!fyt_prev) + fyt = fy_token_list_head(&fypp->queued_tokens); + else + fyt = fy_token_next(&fypp->queued_tokens, fyt_prev); + if (fyt) + break; + + /* on stream error we're done */ + if (fypp->stream_error) + return NULL; + + /* keep track of token activity, if it didn't change + * after the fetch tokens call, the state machine is stuck + */ + last_token_activity_counter = fypp->token_activity_counter; + + /* fetch more then */ + rc = fy_path_fetch_tokens(fypp); + if (rc) { + fy_error(fypp->cfg.diag, "fy_path_fetch_tokens() failed\n"); + goto err_out; + } + if (last_token_activity_counter == fypp->token_activity_counter) { + fy_error(fypp->cfg.diag, "out of tokens and failed to produce anymore"); + goto err_out; + } + } + + switch (fyt->type) { + case FYTT_STREAM_START: + fypp->stream_start_produced = true; + break; + case FYTT_STREAM_END: + fypp->stream_end_produced = true; + + rc = fy_reader_input_done(fyr); + if (rc) { + fy_error(fypp->cfg.diag, "fy_parse_input_done() failed"); + goto err_out; + } + break; + default: + break; + } + + return fyt; + +err_out: + fypp->stream_error = true; + return NULL; +} + +struct fy_token *fy_path_scan_remove(struct fy_path_parser *fypp, struct fy_token *fyt) +{ + if (!fypp || !fyt) + return NULL; + + fy_token_list_del(&fypp->queued_tokens, fyt); + + return fyt; +} + +struct fy_token *fy_path_scan_remove_peek(struct fy_path_parser *fypp, struct fy_token *fyt) +{ + fy_token_unref(fy_path_scan_remove(fypp, fyt)); + + return fy_path_scan_peek(fypp, NULL); +} + +struct fy_token *fy_path_scan(struct fy_path_parser *fypp) +{ + return fy_path_scan_remove(fypp, fy_path_scan_peek(fypp, NULL)); +} + +void fy_path_expr_dump(struct fy_path_expr *expr, struct fy_diag *diag, enum fy_error_type errlevel, int level, const char *banner) +{ + struct fy_path_expr *expr2; + const char *style = ""; + const char *text; + size_t len; + bool save_on_error; + + if (errlevel < diag->cfg.level) + return; + + save_on_error = diag->on_error; + diag->on_error = true; + + if (banner) + fy_diag_diag(diag, errlevel, "%-*s%s", level*2, "", banner); + + text = fy_token_get_text(expr->fyt, &len); + + style = ""; + if (expr->type == fpet_scalar) { + switch (fy_scalar_token_get_style(expr->fyt)) { + case FYSS_SINGLE_QUOTED: + style = "'"; + break; + case FYSS_DOUBLE_QUOTED: + style = "\""; + break; + default: + style = ""; + break; + } + } + + fy_diag_diag(diag, errlevel, "> %-*s%s:%s %s%.*s%s", + level*2, "", + fy_path_expr_type_txt[expr->type], + fy_expr_mode_txt[expr->expr_mode], + style, (int)len, text, style); + + for (expr2 = fy_path_expr_list_head(&expr->children); expr2; expr2 = fy_path_expr_next(&expr->children, expr2)) + fy_path_expr_dump(expr2, diag, errlevel, level + 1, NULL); + + diag->on_error = save_on_error; +} + +static struct fy_node * +fy_path_expr_to_node_internal(struct fy_document *fyd, struct fy_path_expr *expr) +{ + struct fy_path_expr *expr2; + const char *style = ""; + const char *text; + size_t len; + struct fy_node *fyn = NULL, *fyn2, *fyn_seq = NULL; + int rc; + + text = fy_token_get_text(expr->fyt, &len); + + /* by default use double quoted style */ + style = "\""; + switch (expr->type) { + case fpet_scalar: + switch (fy_scalar_token_get_style(expr->fyt)) { + case FYSS_SINGLE_QUOTED: + style = "'"; + break; + case FYSS_DOUBLE_QUOTED: + style = "\""; + break; + default: + style = ""; + break; + } + break; + + case fpet_map_key: + /* no styles for complex map keys */ + if (expr->fyt->map_key.fyd) + style = ""; + break; + + default: + break; + } + + /* list is empty this is a terminal */ + if (fy_path_expr_list_empty(&expr->children) && + expr->type != fpet_method) { + + fyn = fy_node_buildf(fyd, "%s: %s%.*s%s", + fy_path_expr_type_txt[expr->type], + style, (int)len, text, style); + if (!fyn) + return NULL; + + return fyn; + } + + fyn = fy_node_create_mapping(fyd); + if (!fyn) + goto err_out; + + fyn_seq = fy_node_create_sequence(fyd); + if (!fyn_seq) + goto err_out; + + for (expr2 = fy_path_expr_list_head(&expr->children); expr2; expr2 = fy_path_expr_next(&expr->children, expr2)) { + fyn2 = fy_path_expr_to_node_internal(fyd, expr2); + if (!fyn2) + goto err_out; + rc = fy_node_sequence_append(fyn_seq, fyn2); + if (rc) + goto err_out; + } + + if (expr->type != fpet_method) { + rc = fy_node_mapping_append(fyn, + fy_node_create_scalar(fyd, fy_path_expr_type_txt[expr->type], FY_NT), + fyn_seq); + } else { + rc = fy_node_mapping_append(fyn, + fy_node_create_scalarf(fyd, "%s()", expr->fym->name), + fyn_seq); + } + if (rc) + goto err_out; + + return fyn; + +err_out: + fy_node_free(fyn_seq); + fy_node_free(fyn); + return NULL; +} + +struct fy_document *fy_path_expr_to_document(struct fy_path_expr *expr) +{ + struct fy_document *fyd = NULL; + + if (!expr) + return NULL; + + fyd = fy_document_create(NULL); + if (!fyd) + return NULL; + + fyd->root = fy_path_expr_to_node_internal(fyd, expr); + if (!fyd->root) + goto err_out; + + return fyd; + +err_out: + fy_document_destroy(fyd); + return NULL; +} + +enum fy_path_expr_type fy_map_token_to_path_expr_type(enum fy_token_type type, enum fy_expr_mode mode) +{ + switch (type) { + case FYTT_PE_ROOT: + return fpet_root; + case FYTT_PE_THIS: + return fpet_this; + case FYTT_PE_PARENT: + case FYTT_PE_SIBLING: /* sibling maps to a chain of fpet_parent */ + return fpet_parent; + case FYTT_PE_MAP_KEY: + return fpet_map_key; + case FYTT_PE_SEQ_INDEX: + return fpet_seq_index; + case FYTT_PE_SEQ_SLICE: + return fpet_seq_slice; + case FYTT_PE_EVERY_CHILD: + return fpet_every_child; + case FYTT_PE_EVERY_CHILD_R: + return fpet_every_child_r; + case FYTT_PE_ALIAS: + return fpet_alias; + case FYTT_PE_SCALAR_FILTER: + return fpet_filter_scalar; + case FYTT_PE_COLLECTION_FILTER: + return fpet_filter_collection; + case FYTT_PE_SEQ_FILTER: + return fpet_filter_sequence; + case FYTT_PE_MAP_FILTER: + return fpet_filter_mapping; + case FYTT_PE_UNIQUE_FILTER: + return fpet_filter_unique; + case FYTT_PE_COMMA: + return mode == fyem_path ? fpet_multi : fpet_arg_separator; + case FYTT_PE_SLASH: + return fpet_chain; + case FYTT_PE_BARBAR: + return fpet_logical_or; + case FYTT_PE_AMPAMP: + return fpet_logical_and; + + case FYTT_PE_EQEQ: + return fpet_eq; + case FYTT_PE_NOTEQ: + return fpet_neq; + case FYTT_PE_LT: + return fpet_lt; + case FYTT_PE_GT: + return fpet_gt; + case FYTT_PE_LTE: + return fpet_lte; + case FYTT_PE_GTE: + return fpet_gte; + + case FYTT_SCALAR: + return fpet_scalar; + + case FYTT_SE_PLUS: + return fpet_plus; + case FYTT_SE_MINUS: + return fpet_minus; + case FYTT_SE_MULT: + return fpet_mult; + case FYTT_SE_DIV: + return fpet_div; + + case FYTT_PE_LPAREN: + return fpet_lparen; + case FYTT_PE_RPAREN: + return fpet_rparen; + + case FYTT_SE_METHOD: + case FYTT_PE_METHOD: + return fpet_method; + + default: + /* note parentheses do not have an expression */ + assert(0); + break; + } + return fpet_none; +} + +bool fy_token_type_is_operand(enum fy_token_type type) +{ + return type == FYTT_PE_ROOT || + type == FYTT_PE_THIS || + type == FYTT_PE_PARENT || + type == FYTT_PE_MAP_KEY || + type == FYTT_PE_SEQ_INDEX || + type == FYTT_PE_SEQ_SLICE || + type == FYTT_PE_EVERY_CHILD || + type == FYTT_PE_EVERY_CHILD_R || + type == FYTT_PE_ALIAS || + + type == FYTT_SCALAR; +} + +bool fy_token_type_is_operator(enum fy_token_type type) +{ + return type == FYTT_PE_SLASH || + type == FYTT_PE_SCALAR_FILTER || + type == FYTT_PE_COLLECTION_FILTER || + type == FYTT_PE_SEQ_FILTER || + type == FYTT_PE_MAP_FILTER || + type == FYTT_PE_UNIQUE_FILTER || + type == FYTT_PE_SIBLING || + type == FYTT_PE_COMMA || + type == FYTT_PE_BARBAR || + type == FYTT_PE_AMPAMP || + type == FYTT_PE_LPAREN || + type == FYTT_PE_RPAREN || + + type == FYTT_PE_EQEQ || + type == FYTT_PE_NOTEQ || + type == FYTT_PE_LT || + type == FYTT_PE_GT || + type == FYTT_PE_LTE || + type == FYTT_PE_GTE || + + type == FYTT_SE_PLUS || + type == FYTT_SE_MINUS || + type == FYTT_SE_MULT || + type == FYTT_SE_DIV; +} + +bool fy_token_type_is_operand_or_operator(enum fy_token_type type) +{ + return fy_token_type_is_operand(type) || + fy_token_type_is_operator(type); +} + +int fy_path_expr_type_prec(enum fy_path_expr_type type) +{ + switch (type) { + default: + return -1; /* terminals */ + case fpet_filter_collection: + case fpet_filter_scalar: + case fpet_filter_sequence: + case fpet_filter_mapping: + case fpet_filter_unique: + return 5; + case fpet_logical_or: + case fpet_logical_and: + return 4; + case fpet_multi: + return 11; + case fpet_eq: + case fpet_neq: + case fpet_lt: + case fpet_gt: + case fpet_lte: + case fpet_gte: + return 7; + case fpet_mult: + case fpet_div: + return 9; + case fpet_plus: + case fpet_minus: + return 8; + case fpet_chain: + return 10; + case fpet_lparen: + case fpet_rparen: + case fpet_method: + return 1000; + case fpet_arg_separator: + return 1; /* lowest */ + } + return -1; +} + +static inline FY_UNUSED void +dump_operand_stack(struct fy_path_parser *fypp) +{ + return fy_expr_stack_dump(fypp->cfg.diag, &fypp->operands); +} + +static inline int +push_operand(struct fy_path_parser *fypp, struct fy_path_expr *expr) +{ + return fy_expr_stack_push(&fypp->operands, expr); +} + +static inline FY_UNUSED struct fy_path_expr * +peek_operand_at(struct fy_path_parser *fypp, unsigned int pos) +{ + return fy_expr_stack_peek_at(&fypp->operands, pos); +} + +static inline FY_UNUSED struct fy_path_expr * +peek_operand(struct fy_path_parser *fypp) +{ + return fy_expr_stack_peek(&fypp->operands); +} + +static inline FY_UNUSED struct fy_path_expr * +pop_operand(struct fy_path_parser *fypp) +{ + return fy_expr_stack_pop(&fypp->operands); +} + +#define PREFIX 0 +#define INFIX 1 +#define SUFFIX 2 + +int fy_token_type_operator_placement(enum fy_token_type type) +{ + switch (type) { + case FYTT_PE_SLASH: /* SLASH is special at the start of the expression */ + case FYTT_PE_COMMA: + case FYTT_PE_BARBAR: + case FYTT_PE_AMPAMP: + case FYTT_PE_EQEQ: + case FYTT_PE_NOTEQ: + case FYTT_PE_LT: + case FYTT_PE_GT: + case FYTT_PE_LTE: + case FYTT_PE_GTE: + case FYTT_SE_PLUS: + case FYTT_SE_MINUS: + case FYTT_SE_MULT: + case FYTT_SE_DIV: + return INFIX; + case FYTT_PE_SCALAR_FILTER: + case FYTT_PE_COLLECTION_FILTER: + case FYTT_PE_SEQ_FILTER: + case FYTT_PE_MAP_FILTER: + case FYTT_PE_UNIQUE_FILTER: + return SUFFIX; + case FYTT_PE_SIBLING: + return PREFIX; + default: + break; + } + return -1; +} + +const struct fy_mark *fy_path_expr_start_mark(struct fy_path_expr *expr) +{ + if (!expr) + return NULL; + + return fy_token_start_mark(expr->fyt); +} + +const struct fy_mark *fy_path_expr_end_mark(struct fy_path_expr *expr) +{ + if (!expr) + return NULL; + + return fy_token_end_mark(expr->fyt); +} + +struct fy_token * +expr_to_token_mark(struct fy_path_expr *expr, struct fy_input *fyi) +{ + const struct fy_mark *ms, *me; + struct fy_atom handle; + + if (!expr || !fyi) + return NULL; + + ms = fy_path_expr_start_mark(expr); + assert(ms); + me = fy_path_expr_end_mark(expr); + assert(me); + + memset(&handle, 0, sizeof(handle)); + handle.start_mark = *ms; + handle.end_mark = *me; + handle.fyi = fyi; + handle.style = FYAS_PLAIN; + handle.chomp = FYAC_CLIP; + + return fy_token_create(FYTT_INPUT_MARKER, &handle); +} + +struct fy_token * +expr_lr_to_token_mark(struct fy_path_expr *exprl, struct fy_path_expr *exprr, struct fy_input *fyi) +{ + const struct fy_mark *ms, *me; + struct fy_atom handle; + + if (!exprl || !exprr || !fyi) + return NULL; + + ms = fy_path_expr_start_mark(exprl); + assert(ms); + me = fy_path_expr_end_mark(exprr); + assert(me); + + memset(&handle, 0, sizeof(handle)); + handle.start_mark = *ms; + handle.end_mark = *me; + handle.fyi = fyi; + handle.style = FYAS_PLAIN; + handle.chomp = FYAC_CLIP; + + return fy_token_create(FYTT_INPUT_MARKER, &handle); +} + +int +fy_path_expr_order(struct fy_path_expr *expr1, struct fy_path_expr *expr2) +{ + const struct fy_mark *m1 = NULL, *m2 = NULL; + + if (expr1) + m1 = fy_path_expr_start_mark(expr1); + + if (expr2) + m2 = fy_path_expr_start_mark(expr2); + + if (m1 == m2) + return 0; + + if (!m1) + return -1; + + if (!m2) + return 1; + + return m1->input_pos == m2->input_pos ? 0 : + m1->input_pos < m2->input_pos ? -1 : 1; +} + +int push_operand_lr(struct fy_path_parser *fypp, + enum fy_path_expr_type type, + struct fy_path_expr *exprl, struct fy_path_expr *exprr, + bool optimize) +{ + struct fy_reader *fyr; + struct fy_path_expr *expr = NULL, *exprt; + const struct fy_mark *ms = NULL, *me = NULL; + struct fy_atom handle; + int ret; + + optimize = false; + assert(exprl || exprr); + + fyr = &fypp->reader; + +#if 0 + fyr_notice(fyr, ">>> %s <%s> l=<%s> r=<%s>\n", __func__, + fy_path_expr_type_txt[type], + exprl ?fy_path_expr_type_txt[exprl->type] : "NULL", + exprr ?fy_path_expr_type_txt[exprr->type] : "NULL"); +#endif + + expr = fy_path_expr_alloc_recycle(fypp); + fyr_error_check(fyr, expr, err_out, + "fy_path_expr_alloc_recycle() failed\n"); + + expr->type = type; + expr->fyt = NULL; + + if (exprl) { + assert(exprl->fyt); + ms = fy_token_start_mark(exprl->fyt); + assert(ms); + } else { + ms = fy_token_start_mark(exprr->fyt); + assert(ms); + } + + if (exprr) { + assert(exprr->fyt); + me = fy_token_end_mark(exprr->fyt); + assert(me); + } else { + me = fy_token_end_mark(exprr->fyt); + assert(me); + } + + assert(ms && me); + + memset(&handle, 0, sizeof(handle)); + handle.start_mark = *ms; + handle.end_mark = *me; + handle.fyi = fypp->fyi; + handle.style = FYAS_PLAIN; + handle.chomp = FYAC_CLIP; + + if (exprl) { + if (type == exprl->type && fy_path_expr_type_is_mergeable(type)) { + while ((exprt = fy_path_expr_list_pop(&exprl->children)) != NULL) { + fy_path_expr_list_add_tail(&expr->children, exprt); + exprt->parent = expr; + } + fy_path_expr_free_recycle(fypp, exprl); + } else { + fy_path_expr_list_add_tail(&expr->children, exprl); + exprl->parent = expr; + } + exprl = NULL; + } + + if (exprr) { + if (type == exprr->type && fy_path_expr_type_is_mergeable(type)) { + while ((exprt = fy_path_expr_list_pop(&exprr->children)) != NULL) { + fy_path_expr_list_add_tail(&expr->children, exprt); + exprt->parent = expr; + } + fy_path_expr_free_recycle(fypp, exprr); + } else { + fy_path_expr_list_add_tail(&expr->children, exprr); + exprr->parent = expr; + } + exprr = NULL; + } + + expr->fyt = fy_token_create(FYTT_INPUT_MARKER, &handle); + fyr_error_check(fyr, expr->fyt, err_out, + "expr_to_token_mark() failed\n"); + + ret = push_operand(fypp, expr); + fyr_error_check(fyr, !ret, err_out, + "push_operand() failed\n"); + +#ifdef DEBUG_EXPR + FYR_TOKEN_DIAG(fyr, expr->fyt, + FYDF_NOTICE, FYEM_PARSE, "pushed operand lr"); +#endif + + return 0; +err_out: + fy_path_expr_free(expr); + fy_path_expr_free(exprl); + fy_path_expr_free(exprr); + return -1; +} + +enum fy_method_idx { + fymi_test, + fymi_sum, + fymi_this, + fymi_parent, + fymi_root, + fymi_any, + fymi_all, + fymi_select, + fymi_key, + fymi_value, + fymi_index, + fymi_null, +}; + +static struct fy_walk_result * +common_builtin_ref_exec(const struct fy_method *fym, + struct fy_path_exec *fypx, int level, + struct fy_path_expr *expr, + struct fy_walk_result *input, + struct fy_walk_result **args, int nargs); + +static struct fy_walk_result * +common_builtin_collection_exec(const struct fy_method *fym, + struct fy_path_exec *fypx, int level, + struct fy_path_expr *expr, + struct fy_walk_result *input, + struct fy_walk_result **args, int nargs); + + +static struct fy_walk_result * +test_exec(const struct fy_method *fym, + struct fy_path_exec *fypx, int level, + struct fy_path_expr *expr, + struct fy_walk_result *input, + struct fy_walk_result **args, int nargs); + +static struct fy_walk_result * +sum_exec(const struct fy_method *fym, + struct fy_path_exec *fypx, int level, + struct fy_path_expr *expr, + struct fy_walk_result *input, + struct fy_walk_result **args, int nargs); + +static const struct fy_method fy_methods[] = { + [fymi_test] = { + .name = "test", + .len = 4, + .mode = fyem_scalar, + .nargs = 1, + .exec = test_exec, + }, + [fymi_sum] = { + .name = "sum", + .len = 3, + .mode = fyem_scalar, + .nargs = 2, + .exec = sum_exec, + }, + [fymi_this] = { + .name = "this", + .len = 4, + .mode = fyem_path, + .nargs = 0, + .exec = common_builtin_ref_exec, + }, + [fymi_parent] = { + .name = "parent", + .len = 6, + .mode = fyem_path, + .nargs = 0, + .exec = common_builtin_ref_exec, + }, + [fymi_root] = { + .name = "root", + .len = 4, + .mode = fyem_path, + .nargs = 0, + .exec = common_builtin_ref_exec, + }, + [fymi_any] = { + .name = "any", + .len = 3, + .mode = fyem_path, + .nargs = 1, + .exec = common_builtin_collection_exec, + }, + [fymi_all] = { + .name = "all", + .len = 3, + .mode = fyem_path, + .nargs = 1, + .exec = common_builtin_collection_exec, + }, + [fymi_select] = { + .name = "select", + .len = 6, + .mode = fyem_path, + .nargs = 1, + .exec = common_builtin_collection_exec, + }, + [fymi_key] = { + .name = "key", + .len = 3, + .mode = fyem_path, + .nargs = 1, + .exec = common_builtin_ref_exec, + }, + [fymi_value] = { + .name = "value", + .len = 5, + .mode = fyem_path, + .nargs = 1, + .exec = common_builtin_ref_exec, + }, + [fymi_index] = { + .name = "index", + .len = 5, + .mode = fyem_path, + .nargs = 1, + .exec = common_builtin_ref_exec, + }, + [fymi_null] = { + .name = "null", + .len = 4, + .mode = fyem_path, + .nargs = 0, + .exec = common_builtin_ref_exec, + }, +}; + +static inline int fy_method_to_builtin_idx(const struct fy_method *fym) +{ + if (!fym || fym < fy_methods || fym >= &fy_methods[ARRAY_SIZE(fy_methods)]) + return -1; + return fym - fy_methods; +} + +static struct fy_walk_result * +common_builtin_ref_exec(const struct fy_method *fym, + struct fy_path_exec *fypx, int level, + struct fy_path_expr *expr, + struct fy_walk_result *input, + struct fy_walk_result **args, int nargs) +{ + enum fy_method_idx midx; + struct fy_walk_result *output = NULL; + struct fy_walk_result *fwr, *fwrn; + struct fy_node *fyn, *fynt; + int i; + + if (!fypx || !input) + goto out; + + i = fy_method_to_builtin_idx(fym); + if (i < 0) + goto out; + midx = (enum fy_method_idx)i; + + switch (midx) { + case fymi_key: + case fymi_value: + if (!args || nargs != 1 || !args[0] || args[0]->type != fwrt_string) + goto out; + break; + case fymi_index: + if (!args || nargs != 1 || !args[0] || args[0]->type != fwrt_number) + goto out; + break; + case fymi_root: + case fymi_parent: + case fymi_this: + case fymi_null: + if (nargs != 0) + goto out; + break; + default: + goto out; + } + + output = fy_path_exec_walk_result_create(fypx, fwrt_refs); + assert(output); + + for (fwr = fy_walk_result_iter_start(input); fwr; + fwr = fy_walk_result_iter_next(input, fwr)) { + + if (fwr->type != fwrt_node_ref || !fwr->fyn) + continue; + + fynt = fwr->fyn; + + switch (midx) { + case fymi_key: + case fymi_value: + case fymi_index: + case fymi_this: + case fymi_null: + /* dereference alias */ + if (fy_node_is_alias(fynt)) { + // fprintf(stderr, "%s: %s calling fy_node_alias_resolve_by_ypath()\n", __func__, fy_node_get_path_alloca(fyn)); + fynt = fy_node_alias_resolve_by_ypath(fynt); + } + break; + default: + break; + } + + fyn = NULL; + switch (midx) { + case fymi_key: + if (!fy_node_is_mapping(fynt)) + break; + fyn = fy_node_mapping_lookup_key_by_string(fynt, args[0]->string, FY_NT); + break; + + case fymi_value: + if (!fy_node_is_mapping(fynt)) + break; + fyn = fy_node_mapping_lookup_by_string(fynt, args[0]->string, FY_NT); + break; + + case fymi_index: + if (!fy_node_is_sequence(fynt)) + break; + fyn = fy_node_sequence_get_by_index(fynt, (int)args[0]->number); + break; + + case fymi_root: + fyn = fy_document_root(fy_node_document(fynt)); + break; + + case fymi_parent: + fyn = fy_node_get_parent(fynt); + break; + + case fymi_null: + if (!fy_node_is_mapping(fynt)) + break; + fyn = fy_node_mapping_lookup_value_by_null_key(fynt); + break; + + case fymi_this: + fyn = fynt; + break; + + default: + break; + } + + if (!fyn) + continue; + + fwrn = fy_path_exec_walk_result_create(fypx, fwrt_node_ref, fyn); + assert(fwrn); + + fy_walk_result_list_add_tail(&output->refs, fwrn); + } + + /* output convert zero to NULL, singular to node_ref */ + if (output && output->type == fwrt_refs) { + if (fy_walk_result_list_empty(&output->refs)) { + fy_walk_result_free(output); + output = NULL; + } else if (fy_walk_result_list_is_singular(&output->refs)) { + fwr = fy_walk_result_list_pop(&output->refs); + assert(fwr); + fy_walk_result_free(output); + output = fwr; + } + } + +out: + fy_walk_result_free(input); + if (args) { + for (i = 0; i < nargs; i++) + fy_walk_result_free(args[i]); + } + return output; +} + +static struct fy_walk_result * +common_builtin_collection_exec(const struct fy_method *fym, + struct fy_path_exec *fypx, int level, + struct fy_path_expr *expr, + struct fy_walk_result *input, + struct fy_walk_result **args, int nargs) +{ + enum fy_method_idx midx; + struct fy_walk_result *output = NULL; + struct fy_walk_result *fwr, *fwrn, *fwrt; + struct fy_path_expr *expr_arg; + bool match, done; + int input_count, match_count; + int i; + + if (!fypx || !input) + goto out; + + i = fy_method_to_builtin_idx(fym); + if (i < 0) + goto out; + midx = (enum fy_method_idx)i; + + switch (midx) { + case fymi_any: + case fymi_all: + case fymi_select: + if (!args || nargs != 1 || !args[0]) + goto out; + break; + default: + goto out; + } + + expr_arg = fy_path_expr_list_head(&expr->children); + assert(expr_arg); + + /* only handle inputs of node and refs */ + if (input->type != fwrt_node_ref && input->type != fwrt_refs) + goto out; + + output = NULL; + + switch (midx) { + case fymi_select: + output = fy_path_exec_walk_result_create(fypx, fwrt_refs); + assert(output); + break; + default: + break; + } + + + done = false; + match_count = input_count = 0; + for (fwr = fy_walk_result_iter_start(input); fwr && !done; fwr = fy_walk_result_iter_next(input, fwr)) { + + input_count++; + + fwrt = fy_walk_result_clone(fwr); + assert(fwrt); + + fwrn = fy_path_expr_execute(fypx, level + 1, expr_arg, fwrt, expr->type); + + match = fwrn != NULL; + if (match) + match_count++; + + switch (midx) { + case fymi_any: + /* on any match, we're done */ + if (match) + done = true; + break; + + case fymi_all: + /* on any non match, we're done */ + if (!match) + done = true; + break; + + case fymi_select: + /* select only works on node refs */ + if (fwr->type != fwrt_node_ref) + break; + if (match) { + fwrt = fy_walk_result_clone(fwr); + assert(fwrt); + fy_walk_result_list_add_tail(&output->refs, fwrt); + } + break; + + default: + break; + } + + if (fwrn) + fy_walk_result_free(fwrn); + } + + switch (midx) { + case fymi_any: + if (input_count > 0 && match_count <= input_count) { + output = input; + input = NULL; + } + break; + case fymi_all: + if (input_count > 0 && match_count == input_count) { + output = input; + input = NULL; + } + break; + + default: + break; + } + + /* output convert zero to NULL, singular to node_ref */ + if (output && output->type == fwrt_refs) { + if (fy_walk_result_list_empty(&output->refs)) { + fy_walk_result_free(output); + output = NULL; + } else if (fy_walk_result_list_is_singular(&output->refs)) { + fwr = fy_walk_result_list_pop(&output->refs); + assert(fwr); + fy_walk_result_free(output); + output = fwr; + } + } + +out: + if (input) + fy_walk_result_free(input); + if (args) { + for (i = 0; i < nargs; i++) + fy_walk_result_free(args[i]); + } + + return output; +} + +static struct fy_walk_result * +test_exec(const struct fy_method *fym, + struct fy_path_exec *fypx, int level, + struct fy_path_expr *expr, + struct fy_walk_result *input, + struct fy_walk_result **args, int nargs) +{ + int i; + struct fy_walk_result *output = NULL; + + if (!fypx || !args || nargs != 1) + goto out; + + /* require a single number argument */ + if (!args[0] || args[0]->type != fwrt_number) + goto out; + + /* reuse argument */ + output = args[0]; + args[0] = NULL; + + /* add 1 to the number */ + output->number += 1; + +out: + fy_walk_result_free(input); + if (args) { + for (i = 0; i < nargs; i++) + fy_walk_result_free(args[i]); + } + return output; +} + +static struct fy_walk_result * +sum_exec(const struct fy_method *fym, + struct fy_path_exec *fypx, int level, + struct fy_path_expr *expr, + struct fy_walk_result *input, + struct fy_walk_result **args, int nargs) +{ + int i; + struct fy_walk_result *output = NULL; + + if (!fypx || !args || nargs != 2) + goto out; + + /* require two number argument */ + if (!args[0] || args[0]->type != fwrt_number || + !args[1] || args[1]->type != fwrt_number) + goto out; + + /* reuse argument */ + output = args[0]; + args[0] = NULL; + + /* add 1 to the number */ + output->number += args[1]->number; + +out: + fy_walk_result_free(input); + if (args) { + for (i = 0; i < nargs; i++) + fy_walk_result_free(args[i]); + } + return output; +} + +int evaluate_method(struct fy_path_parser *fypp, struct fy_path_expr *exprm, + struct fy_path_expr *exprl, struct fy_path_expr *exprr) +{ + struct fy_reader *fyr; + struct fy_path_expr *exprt; + struct fy_token *fyt; + const char *text; + size_t len; + const struct fy_method *fym; + unsigned int i, count; + int ret; + + fyr = &fypp->reader; + +#ifdef DEBUG_EXPR + FYR_TOKEN_DIAG(fyr, exprm->fyt, + FYDF_NOTICE, FYEM_PARSE, "evaluating method"); +#endif + + text = fy_token_get_text(exprm->fyt, &len); + fyr_error_check(fyr, text, err_out, + "fy_token_get_text() failed\n"); + + for (i = 0, fym = fy_methods; i < ARRAY_SIZE(fy_methods); i++, fym++) { + if (fym->len == len && !memcmp(text, fym->name, len)) + break; + } + + FYR_TOKEN_ERROR_CHECK(fyr, exprm->fyt, FYEM_PARSE, + i < ARRAY_SIZE(fy_methods), err_out, + "invalid method %.*s\n", (int)len, text); + + /* reuse exprm */ + count = 0; + while ((exprt = fy_expr_stack_peek(&fypp->operands)) != NULL && + fy_path_expr_order(exprm, exprt) < 0) { + + exprt = fy_expr_stack_pop(&fypp->operands); + assert(exprt); + +#ifdef DEBUG_EXPR + FYR_TOKEN_DIAG(fyr, exprt->fyt, + FYDF_NOTICE, FYEM_PARSE, "poped argument %d", count); +#endif + + /* add in reverse order */ + fy_path_expr_list_add(&exprm->children, exprt); + exprt->parent = exprm; + count++; + + } + + if (exprr) { + fyt = expr_lr_to_token_mark(exprm, exprr, fypp->fyi); + fyr_error_check(fyr, fyt, err_out, + "expr_lr_to_token_mark() failed\n"); + + fy_token_unref(exprm->fyt); + exprm->fyt = fyt; + } + + FYR_TOKEN_ERROR_CHECK(fyr, exprm->fyt, FYEM_PARSE, + fym->nargs == count, err_out, + "too %s argument for method %s, expected %d, got %d\n", + fym->nargs < count ? "many" : "few", + fym->name, fym->nargs, count); + + exprm->fym = fym; + + if (exprl) + fy_path_expr_free_recycle(fypp, exprl); + if (exprr) + fy_path_expr_free_recycle(fypp, exprr); + + /* and push as an operand */ + ret = push_operand(fypp, exprm); + fyr_error_check(fyr, !ret, err_out, + "push_operand() failed\n"); + +#ifdef DEBUG_EXPR + FYR_TOKEN_DIAG(fyr, exprm->fyt, + FYDF_NOTICE, FYEM_PARSE, "pushed operand evaluate_method"); +#endif + return 0; + +err_out: + /* we don't need the parentheses operators */ + fy_path_expr_free_recycle(fypp, exprm); + if (exprl) + fy_path_expr_free_recycle(fypp, exprl); + if (exprr) + fy_path_expr_free_recycle(fypp, exprr); + + return -1; +} + +int evaluate_new(struct fy_path_parser *fypp) +{ + struct fy_reader *fyr; + struct fy_path_expr *expr = NULL, *expr_peek, *exprt; + struct fy_path_expr *exprl = NULL, *exprr = NULL, *chain = NULL, *exprm = NULL; + struct fy_path_expr *parent = NULL; + struct fy_token *fyt; + enum fy_path_expr_type type, etype; + int ret; + + fyr = &fypp->reader; + + expr = fy_expr_stack_pop(&fypp->operators); + fyr_error_check(fyr, expr, err_out, + "pop_operator() failed to find token operator to evaluate\n"); + + assert(expr->fyt); + +#ifdef DEBUG_EXPR + FYR_TOKEN_DIAG(fyr, expr->fyt, + FYDF_NOTICE, FYEM_PARSE, "poped operator expression"); +#endif + + exprl = NULL; + exprr = NULL; + type = expr->type; + switch (type) { + + case fpet_chain: + + /* dump_operand_stack(fypp); */ + /* dump_operator_stack(fypp); */ + + /* peek the next operator */ + expr_peek = fy_expr_stack_peek(&fypp->operators); + + /* pop the top in either case */ + exprr = fy_expr_stack_pop(&fypp->operands); + if (!exprr) { + // fyr_notice(fyr, "ROOT value (with no arguments)\n"); + + /* conver to root and push to operands */ + expr->type = fpet_root; + + ret = push_operand(fypp, expr); + fyr_error_check(fyr, !ret, err_out, + "push_operand() failed\n"); + return 0; + } + +#ifdef DEBUG_EXPR + FYR_TOKEN_DIAG(fyr, exprr->fyt, + FYDF_NOTICE, FYEM_PARSE, "exprr"); +#endif + + /* expression is to the left, that means it's a root chain */ + if (fy_path_expr_order(expr, exprr) < 0 && + (!(exprl = fy_expr_stack_peek(&fypp->operands)) || + (expr_peek && fy_path_expr_order(exprl, expr_peek) <= 0))) { + + // fyr_notice(fyr, "ROOT operator (with arguments)\n"); + + exprl = fy_path_expr_alloc_recycle(fypp); + fyr_error_check(fyr, exprl, err_out, + "fy_path_expr_alloc_recycle() failed\n"); + exprl->type = fpet_root; + + /* move token to the root */ + exprl->fyt = expr->fyt; + expr->fyt = NULL; + + } else if (!(exprl = fy_expr_stack_pop(&fypp->operands))) { + + // fyr_notice(fyr, "COLLECTION operator\n"); + + exprl = exprr; + + exprr = fy_path_expr_alloc_recycle(fypp); + fyr_error_check(fyr, exprr, err_out, + "fy_path_expr_alloc_recycle() failed\n"); + exprr->type = fpet_filter_collection; + + /* move token to the filter collection */ + exprr->fyt = expr->fyt; + expr->fyt = NULL; + + } else { + assert(exprr && exprl); + + // fyr_notice(fyr, "CHAIN operator\n"); + } + + /* we don't need the chain operator now */ + fy_path_expr_free_recycle(fypp, expr); + expr = NULL; + + ret = push_operand_lr(fypp, fpet_chain, exprl, exprr, true); + fyr_error_check(fyr, !ret, err_out, + "push_operand_lr() failed\n"); + return 0; + + case fpet_multi: + case fpet_logical_or: + case fpet_logical_and: + + case fpet_eq: + case fpet_neq: + case fpet_lt: + case fpet_gt: + case fpet_lte: + case fpet_gte: + + case fpet_plus: + case fpet_minus: + case fpet_mult: + case fpet_div: + + exprl = NULL; + exprr = NULL; + exprt = fy_expr_stack_peek(&fypp->operators); + + // fyr_error(fyr, "mode=%s top-mode=%s\n", + // fy_expr_mode_txt[fypp->expr_mode], + // exprt ? fy_expr_mode_txt[exprt->expr_mode] : ""); + +#if 0 + exprr = fy_expr_stack_peek(&fypp->operands); + if (exprr && exprt && fy_path_expr_order(exprr, exprt) <= 0) + exprr = NULL; + else +#endif + exprr = fy_expr_stack_pop(&fypp->operands); + fyr_error_check(fyr, exprr, err_out, + "fy_expr_stack_pop() failed for exprr\n"); + +#if 0 + exprl = fy_expr_stack_peek_at(&fypp->operands, 1); + if (exprl && exprt && fy_path_expr_order(exprl, exprt) <= 0) + exprl = NULL; + else +#endif + exprl = fy_expr_stack_pop(&fypp->operands); + fyr_error_check(fyr, exprl, err_out, + "fy_expr_stack_pop() failed for exprl\n"); + + /* we don't need the operator now */ + fy_path_expr_free_recycle(fypp, expr); + expr = NULL; + + ret = push_operand_lr(fypp, type, exprl, exprr, true); + fyr_error_check(fyr, !ret, err_out, + "push_operand_lr() failed\n"); + + break; + + case fpet_filter_collection: + case fpet_filter_scalar: + case fpet_filter_sequence: + case fpet_filter_mapping: + case fpet_filter_unique: + + exprl = fy_expr_stack_pop(&fypp->operands); + FYR_TOKEN_ERROR_CHECK(fyr, expr->fyt, FYEM_PARSE, + exprl, err_out, + "filter operator without argument"); + + exprr = fy_path_expr_alloc_recycle(fypp); + fyr_error_check(fyr, exprr, err_out, + "fy_path_expr_alloc_recycle() failed\n"); + exprr->type = type; + + /* move token to the filter collection */ + exprr->fyt = expr->fyt; + expr->fyt = NULL; + + /* we don't need the operator now */ + fy_path_expr_free_recycle(fypp, expr); + expr = NULL; + + /* push as a chain */ + ret = push_operand_lr(fypp, fpet_chain, exprl, exprr, true); + fyr_error_check(fyr, !ret, err_out, + "push_operand_lr() failed\n"); + + break; + + case fpet_lparen: + abort(); + assert(0); + +#ifdef DEBUG_EXPR + FYR_TOKEN_DIAG(fyr, expr->fyt, + FYDF_NOTICE, FYEM_PARSE, "("); +#endif + + return 0; + + case fpet_arg_separator: + + /* separator is right hand side of the expression now */ + exprr = expr; + expr = NULL; + + /* evaluate until we hit a match to the rparen */ + exprl = fy_expr_stack_peek(&fypp->operators); + + if (!fy_path_expr_type_is_lparen(exprl->type)) { + ret = evaluate_new(fypp); + if (ret) + goto err_out; + } + exprl = NULL; + + fy_path_expr_free_recycle(fypp, exprr); + exprr = NULL; + + break; + + case fpet_rparen: + + /* rparen is right hand side of the expression now */ + exprr = expr; + expr = NULL; + + /* evaluate until we hit a match to the rparen */ + while ((exprl = fy_expr_stack_peek(&fypp->operators)) != NULL) { + + if (fy_path_expr_type_is_lparen(exprl->type)) + break; + + ret = evaluate_new(fypp); + if (ret) + goto err_out; + } + + FYR_TOKEN_ERROR_CHECK(fyr, exprr->fyt, FYEM_PARSE, + exprl, err_out, + "missing matching left parentheses"); + + exprl = fy_expr_stack_pop(&fypp->operators); + assert(exprl); + + exprt = fy_expr_stack_peek(&fypp->operands); + + etype = exprl->expr_mode == fyem_scalar ? fpet_scalar_expr : fpet_path_expr; + + /* already is an expression, reuse */ + if (exprt && exprt->type == etype) { + + fyt = expr_lr_to_token_mark(exprl, exprr, fypp->fyi); + fyr_error_check(fyr, fyt, err_out, + "expr_lr_to_token_mark() failed\n"); + fy_token_unref(exprt->fyt); + exprt->fyt = fyt; + exprt->expr_mode = exprl->expr_mode; + + /* we don't need the parentheses operators */ + fy_path_expr_free_recycle(fypp, exprl); + exprl = NULL; + fy_path_expr_free_recycle(fypp, exprr); + exprr = NULL; + + return 0; + } + + /* if it's method, evaluate */ + exprm = fy_expr_stack_peek(&fypp->operators); + if (exprm && exprm->type == fpet_method) { + + exprm = fy_expr_stack_pop(&fypp->operators); + assert(exprm); + + return evaluate_method(fypp, exprm, exprl, exprr); + } + + expr = fy_path_expr_alloc_recycle(fypp); + fyr_error_check(fyr, expr, err_out, + "fy_path_expr_alloc_recycle() failed\n"); + expr->type = etype; + expr->expr_mode = exprl->expr_mode; + + expr->fyt = expr_lr_to_token_mark(exprl, exprr, fypp->fyi); + + exprt = fy_expr_stack_pop(&fypp->operands); + + FYR_TOKEN_ERROR_CHECK(fyr, exprr->fyt, FYEM_PARSE, + exprt, err_out, + "empty expression in parentheses"); + + fy_path_expr_list_add_tail(&expr->children, exprt); + exprt->parent = expr; + + /* pop all operands that after exprl */ + while ((exprt = fy_expr_stack_peek(&fypp->operands)) != NULL && + fy_path_expr_order(exprt, exprl) >= 0) { +#ifdef DEBUG_EXPR + FYR_TOKEN_DIAG(fyr, exprt->fyt, + FYDF_NOTICE, FYEM_PARSE, "discarding argument"); +#endif + fy_path_expr_free_recycle(fypp, fy_expr_stack_pop(&fypp->operands)); + } + + if (exprl->expr_mode != fyem_none) { + fypp->expr_mode = exprl->expr_mode; +#ifdef DEBUG_EXPR + fyr_notice(fyr, "poping expr_mode %s\n", fy_expr_mode_txt[fypp->expr_mode]); +#endif + } + + /* we don't need the parentheses operators */ + fy_path_expr_free_recycle(fypp, exprl); + exprl = NULL; + fy_path_expr_free_recycle(fypp, exprr); + exprr = NULL; + + ret = push_operand(fypp, expr); + fyr_error_check(fyr, !ret, err_out, + "push_operand() failed\n"); + +#ifdef DEBUG_EXPR + FYR_TOKEN_DIAG(fyr, expr->fyt, + FYDF_NOTICE, FYEM_PARSE, "pushed operand evaluate_new"); +#endif + return 0; + + case fpet_method: + return evaluate_method(fypp, expr, NULL, NULL); + + /* shoud never */ + case fpet_scalar_expr: + case fpet_path_expr: + assert(0); + abort(); + + default: + fyr_error(fyr, "Unknown expression %s\n", fy_path_expr_type_txt[expr->type]); + goto err_out; + } + + return 0; + +err_out: + +#ifdef DEBUG_EXPR + if (expr) + fy_path_expr_dump(expr, fypp->cfg.diag, FYET_NOTICE, 0, "expr:"); + if (exprl) + fy_path_expr_dump(exprl, fypp->cfg.diag, FYET_NOTICE, 0, "exprl:"); + if (exprr) + fy_path_expr_dump(exprr, fypp->cfg.diag, FYET_NOTICE, 0, "exprr:"); + if (chain) + fy_path_expr_dump(chain, fypp->cfg.diag, FYET_NOTICE, 0, "chain:"); + if (parent) + fy_path_expr_dump(parent, fypp->cfg.diag, FYET_NOTICE, 0, "parent:"); + + fy_notice(fypp->cfg.diag, "operator stack\n"); + fy_expr_stack_dump(fypp->cfg.diag, &fypp->operators); + fy_notice(fypp->cfg.diag, "operand stack\n"); + fy_expr_stack_dump(fypp->cfg.diag, &fypp->operands); +#endif + + fy_path_expr_free(expr); + fy_path_expr_free(exprl); + fy_path_expr_free(exprr); + fy_path_expr_free(chain); + fy_path_expr_free(parent); + + return -1; +} + +int fy_path_check_expression_alias(struct fy_path_parser *fypp, struct fy_path_expr *expr) +{ + struct fy_reader *fyr; + struct fy_path_expr *exprn; + int rc; + + if (!expr) + return 0; + + fyr = &fypp->reader; + + /* an alias with a parent.. must be the first one */ + if (expr->type == fpet_alias && expr->parent) { + + exprn = fy_path_expr_list_head(&expr->parent->children); + + /* an alias may only be the first of a path expression */ + FYR_TOKEN_ERROR_CHECK(fyr, expr->fyt, FYEM_PARSE, + expr == exprn, err_out, + "alias is not first in the path expresion"); + } + + for (exprn = fy_path_expr_list_head(&expr->children); exprn; + exprn = fy_path_expr_next(&expr->children, exprn)) { + + rc = fy_path_check_expression_alias(fypp, exprn); + if (rc) + return rc; + } + + return 0; + +err_out: + return -1; +} + +/* check expression for validity */ +int fy_path_check_expression(struct fy_path_parser *fypp, struct fy_path_expr *expr) +{ + int rc; + + rc = fy_path_check_expression_alias(fypp, expr); + if (rc) + return rc; + + return 0; +} + +struct fy_path_expr * +fy_path_parse_expression(struct fy_path_parser *fypp) +{ + struct fy_reader *fyr; + struct fy_token *fyt = NULL; + enum fy_token_type fytt; + struct fy_path_expr *expr, *expr_top, *exprt; + enum fy_expr_mode old_scan_mode, prev_scan_mode; + int ret, rc; +#ifdef DEBUG_EXPR + char *dbg; +#endif + + /* the parser must be in the correct state */ + if (!fypp || fy_expr_stack_size(&fypp->operators) > 0 || fy_expr_stack_size(&fypp->operands) > 0) + return NULL; + + fyr = &fypp->reader; + + /* find stream start */ + fyt = fy_path_scan_peek(fypp, NULL); + FYR_PARSE_ERROR_CHECK(fyr, 0, 1, FYEM_PARSE, + fyt && fyt->type == FYTT_STREAM_START, err_out, + "no tokens available or start without stream start"); + + /* remove stream start */ + fy_token_unref(fy_path_scan_remove(fypp, fyt)); + fyt = NULL; + + prev_scan_mode = fypp->expr_mode; + + while ((fyt = fy_path_scan_peek(fypp, NULL)) != NULL) { + + if (fyt->type == FYTT_STREAM_END) + break; + +#ifdef DEBUG_EXPR + fy_token_debug_text_a(fyt, &dbg); + FYR_TOKEN_DIAG(fyr, fyt, FYET_NOTICE, FYEM_PARSE, "next token %s", dbg); +#endif + fytt = fyt->type; + + /* create an expression in either operator/operand case */ + expr = fy_path_expr_alloc_recycle(fypp); + fyr_error_check(fyr, expr, err_out, + "fy_path_expr_alloc_recycle() failed\n"); + + expr->fyt = fy_path_scan_remove(fypp, fyt); + /* this it the first attempt, it might not be the final one */ + expr->type = fy_map_token_to_path_expr_type(fyt->type, fypp->expr_mode); + fyt = NULL; + +#ifdef DEBUG_EXPR + fy_path_expr_dump(expr, fypp->cfg.diag, FYET_NOTICE, 0, "-> expr"); +#endif + + if (prev_scan_mode != fypp->expr_mode) { +#ifdef DEBUG_EXPR + fyr_warning(fyr, "switched expr_mode %s -> %s\n", + fy_expr_mode_txt[prev_scan_mode], + fy_expr_mode_txt[fypp->expr_mode]); +#endif + expr_top = fy_expr_stack_peek(&fypp->operators); + +#ifdef DEBUG_EXPR + if (expr_top) + fy_path_expr_dump(expr_top, fypp->cfg.diag, FYET_NOTICE, 0, NULL); +#endif + + if (expr_top && fy_path_expr_type_is_lparen(expr_top->type) && + expr_top->expr_mode != fypp->expr_mode) { +#ifdef DEBUG_EXPR + fyr_warning(fyr, "switched top lparen expr_mode %s -> %s\n", + fy_expr_mode_txt[expr_top->expr_mode], + fy_expr_mode_txt[fypp->expr_mode]); + expr_top->expr_mode = fypp->expr_mode; +#endif + } + } + + prev_scan_mode = fypp->expr_mode; + + /* if it's an operand convert it to expression and push */ + if (fy_token_type_is_operand(fytt)) { + + ret = fy_expr_stack_push(&fypp->operands, expr); + fyr_error_check(fyr, !ret, err_out, "push_operand() failed\n"); + expr = NULL; + + continue; + } + + /* specials for SLASH */ + + if (expr->fyt->type == FYTT_PE_SLASH) { + + /* try to get next token */ + fyt = fy_path_scan_peek(fypp, NULL); + if (!fyt) { + if (!fypp->stream_error) { + (void)fy_path_fetch_tokens(fypp); + fyt = fy_path_scan_peek(fypp, NULL); + } + } + + /* last token, it means it's a collection filter (or a root) */ + if (!fyt || fyt->type == FYTT_STREAM_END || fyt->type == FYTT_PE_RPAREN) { + + exprt = fy_expr_stack_peek(&fypp->operands); + + /* if no argument exists it's a root */ + if (!exprt) { + expr->type = fpet_root; + + ret = fy_expr_stack_push(&fypp->operands, expr); + fyr_error_check(fyr, !ret, err_out, "push_operand() failed\n"); + expr = NULL; + continue; + } + + expr->type = fpet_filter_collection; + } + } + +#ifdef DEBUG_EXPR + fy_notice(fypp->cfg.diag, "operator stack (before)\n"); + fy_expr_stack_dump(fypp->cfg.diag, &fypp->operators); + fy_notice(fypp->cfg.diag, "operand stack (before)\n"); + fy_expr_stack_dump(fypp->cfg.diag, &fypp->operands); +#endif + + old_scan_mode = fypp->expr_mode; + + /* for rparen, need to push before */ + if (expr->type == fpet_rparen) { + + FYR_TOKEN_ERROR_CHECK(fyr, expr->fyt, FYEM_PARSE, + fypp->paren_nest_level > 0, err_out, + "Mismatched right parentheses"); + + fypp->paren_nest_level--; + + ret = fy_expr_stack_push(&fypp->operators, expr); + fyr_error_check(fyr, !ret, err_out, "push_operator() failed\n"); + expr = NULL; + + ret = evaluate_new(fypp); + /* evaluate will print diagnostic on error */ + if (ret < 0) + goto err_out; + + } else if (fy_path_expr_type_is_lparen(expr->type)) { + + expr->expr_mode = fypp->expr_mode; + fypp->expr_mode = fyem_scalar; + + fypp->paren_nest_level++; + + /* push the operator */ + ret = fy_expr_stack_push(&fypp->operators, expr); + fyr_error_check(fyr, !ret, err_out, "push_operator() failed\n"); + expr = NULL; + +#ifdef DEBUG_EXPR + if (old_scan_mode != fypp->expr_mode) + fyr_notice(fyr, "expr_mode %s -> %s\n", + fy_expr_mode_txt[old_scan_mode], + fy_expr_mode_txt[fypp->expr_mode]); +#endif + } else { + switch (fypp->expr_mode) { + case fyem_none: + assert(0); /* should never happen */ + break; + + case fyem_path: + if (fy_path_expr_type_is_conditional(expr->type)) { + /* switch to scalar mode */ + fypp->expr_mode = fyem_scalar; + break; + } + break; + case fyem_scalar: + if (expr->type == fpet_root) { + fypp->expr_mode = fyem_path; + break; + } + + /* div out of parentheses, it's a chain */ + if (expr->type == fpet_div && fypp->paren_nest_level == 0) { + expr->type = fpet_chain; + fypp->expr_mode = fyem_path; + + /* mode change means evaluate */ + ret = evaluate_new(fypp); + /* evaluate will print diagnostic on error */ + if (ret < 0) + goto err_out; + + break; + } + break; + } + + if (old_scan_mode != fypp->expr_mode) { +#ifdef DEBUG_EXPR + fyr_notice(fyr, "expr_mode %s -> %s\n", + fy_expr_mode_txt[old_scan_mode], + fy_expr_mode_txt[fypp->expr_mode]); +#endif + } + + ret = -1; + while ((expr_top = fy_expr_stack_peek(&fypp->operators)) != NULL && + fy_path_expr_type_prec(expr->type) <= fy_path_expr_type_prec(expr_top->type) && + !fy_path_expr_type_is_lparen(expr_top->type)) { + + ret = evaluate_new(fypp); + /* evaluate will print diagnostic on error */ + if (ret < 0) + goto err_out; + } + + /* push the operator */ + ret = fy_expr_stack_push(&fypp->operators, expr); + fyr_error_check(fyr, !ret, err_out, "push_operator() failed\n"); + expr = NULL; + } + +#ifdef DEBUG_EXPR + fy_notice(fypp->cfg.diag, "operator stack (after)\n"); + fy_expr_stack_dump(fypp->cfg.diag, &fypp->operators); + fy_notice(fypp->cfg.diag, "operand stack (after)\n"); + fy_expr_stack_dump(fypp->cfg.diag, &fypp->operands); +#endif + + prev_scan_mode = fypp->expr_mode; + } + + if (fypp->stream_error) + goto err_out; + + FYR_PARSE_ERROR_CHECK(fyr, 0, 1, FYEM_PARSE, + fypp->stream_error || (fyt && fyt->type == FYTT_STREAM_END), err_out, + "stream ended without STREAM_END"); + + /* remove stream end */ + fy_token_unref(fy_path_scan_remove(fypp, fyt)); + fyt = NULL; + +#if 0 + FYR_PARSE_ERROR_CHECK(fyr, 0, 1, FYEM_PARSE, + fypp->paren_nest_level == 0, err_out, + "Missing right parenthesis"); +#endif + + /* drain */ + while ((expr_top = fy_expr_stack_peek(&fypp->operators)) != NULL && + !fy_path_expr_type_is_lparen(expr_top->type)) { + + ret = evaluate_new(fypp); + /* evaluate will print diagnostic on error */ + if (ret < 0) + goto err_out; + + } + +#ifdef DEBUG_EXPR + expr_top = fy_expr_stack_peek(&fypp->operators); + if (expr_top) + fy_path_expr_dump(expr_top, fypp->cfg.diag, FYET_NOTICE, 0, "operator top left"); +#endif + + expr = fy_expr_stack_pop(&fypp->operands); + + FYR_PARSE_ERROR_CHECK(fyr, 0, 1, FYEM_PARSE, + expr != NULL, err_out, + "No operands left on operand stack"); + + FYR_TOKEN_ERROR_CHECK(fyr, expr->fyt, FYEM_PARSE, + fy_expr_stack_size(&fypp->operands) == 0, err_out, + "Operand stack contains more than 1 value at end"); + + /* check the expression for validity */ + rc = fy_path_check_expression(fypp, expr); + if (rc) { + fy_path_expr_free(expr); + expr = NULL; + } + + return expr; + +err_out: +#ifdef DEBUG_EXPR + fy_notice(fypp->cfg.diag, "operator stack (error)\n"); + fy_expr_stack_dump(fypp->cfg.diag, &fypp->operators); + fy_notice(fypp->cfg.diag, "operand stack (error)\n"); + fy_expr_stack_dump(fypp->cfg.diag, &fypp->operands); +#endif + fypp->stream_error = true; + return NULL; +} + +static struct fy_node * +fy_path_expr_execute_single_result(struct fy_diag *diag, struct fy_path_expr *expr, struct fy_node *fyn) +{ + struct fy_token *fyt; + struct fy_anchor *fya; + const char *text; + size_t len; + + assert(expr); + + switch (expr->type) { + case fpet_root: + return fyn->fyd->root; + + case fpet_this: + if (fy_node_is_alias(fyn)) { + // fprintf(stderr, "%s:%d %s calling fy_node_alias_resolve_by_ypath()\n", __func__, __LINE__, fy_node_get_path_alloca(fyn)); + fyn = fy_node_alias_resolve_by_ypath(fyn); + } + + return fyn; + + case fpet_parent: + return fy_node_get_parent(fyn); + + case fpet_alias: + fyt = expr->fyt; + assert(fyt); + assert(fyt->type == FYTT_PE_ALIAS); + + text = fy_token_get_text(fyt, &len); + if (!text || len < 1) + break; + + if (*text == '*') { + text++; + len--; + } + fya = fy_document_lookup_anchor(fyn->fyd, text, len); + if (!fya) + break; + return fya->fyn; + + case fpet_seq_index: + fyt = expr->fyt; + assert(fyt); + assert(fyt->type == FYTT_PE_SEQ_INDEX); + + if (fy_node_is_alias(fyn)) { + // fprintf(stderr, "%s:%d %s calling fy_node_alias_resolve_by_ypath()\n", __func__, __LINE__, fy_node_get_path_alloca(fyn)); + fyn = fy_node_alias_resolve_by_ypath(fyn); + } + + /* only on sequence */ + if (!fy_node_is_sequence(fyn)) + break; + + return fy_node_sequence_get_by_index(fyn, fyt->seq_index.index); + + case fpet_map_key: + fyt = expr->fyt; + assert(fyt); + assert(fyt->type == FYTT_PE_MAP_KEY); + + if (fy_node_is_alias(fyn)) { + // fprintf(stderr, "%s:%d %s calling fy_node_alias_resolve_by_ypath()\n", __func__, __LINE__, fy_node_get_path_alloca(fyn)); + fyn = fy_node_alias_resolve_by_ypath(fyn); + } + + if (!fy_node_is_mapping(fyn)) + break; + + if (!fyt->map_key.fyd) { + /* simple key */ + text = fy_token_get_text(fyt, &len); + if (!text || len < 1) + break; + return fy_node_mapping_lookup_value_by_simple_key(fyn, text, len); + } + + return fy_node_mapping_lookup_value_by_key(fyn, fyt->map_key.fyd->root); + + case fpet_filter_scalar: + if (!(fy_node_is_scalar(fyn) || fy_node_is_alias(fyn))) + break; + return fyn; + + case fpet_filter_collection: + + if (fy_node_is_alias(fyn)) { + // fprintf(stderr, "%s:%d %s calling fy_node_alias_resolve_by_ypath()\n", __func__, __LINE__, fy_node_get_path_alloca(fyn)); + fyn = fy_node_alias_resolve_by_ypath(fyn); + } + + if (!(fy_node_is_mapping(fyn) || fy_node_is_sequence(fyn))) + break; + return fyn; + + case fpet_filter_sequence: + + if (fy_node_is_alias(fyn)) { + // fprintf(stderr, "%s:%d %s calling fy_node_alias_resolve_by_ypath()\n", __func__, __LINE__, fy_node_get_path_alloca(fyn)); + fyn = fy_node_alias_resolve_by_ypath(fyn); + } + + if (!fy_node_is_sequence(fyn)) + break; + return fyn; + + case fpet_filter_mapping: + + if (fy_node_is_alias(fyn)) { + // fprintf(stderr, "%s:%d %s calling fy_node_alias_resolve_by_ypath()\n", __func__, __LINE__, fy_node_get_path_alloca(fyn)); + fyn = fy_node_alias_resolve_by_ypath(fyn); + } + + if (!fy_node_is_mapping(fyn)) + break; + return fyn; + + default: + break; + } + + return NULL; +} + +static double +token_number(struct fy_token *fyt) +{ + const char *value; + + if (!fyt || fyt->type != FYTT_SCALAR || (value = fy_token_get_text0(fyt)) == NULL) + return NAN; + return strtod(value, NULL); +} + +void fy_path_exec_cleanup(struct fy_path_exec *fypx) +{ + if (!fypx) + return; + + fy_walk_result_free(fypx->result); + fypx->result = NULL; + fypx->fyn_start = NULL; +} + +/* publicly exported methods */ +struct fy_path_parser *fy_path_parser_create(const struct fy_path_parse_cfg *pcfg) +{ + struct fy_path_parser *fypp; + + fypp = malloc(sizeof(*fypp)); + if (!fypp) + return NULL; + fy_path_parser_setup(fypp, pcfg); + return fypp; +} + +void fy_path_parser_destroy(struct fy_path_parser *fypp) +{ + if (!fypp) + return; + fy_path_parser_cleanup(fypp); + free(fypp); +} + +int fy_path_parser_reset(struct fy_path_parser *fypp) +{ + if (!fypp) + return -1; + fy_path_parser_cleanup(fypp); + return 0; +} + +struct fy_path_expr * +fy_path_parse_expr_from_string(struct fy_path_parser *fypp, + const char *str, size_t len) +{ + struct fy_path_expr *expr = NULL; + struct fy_input *fyi = NULL; + int rc; + + if (!fypp || !str || !len) + return NULL; + + fy_path_parser_reset(fypp); + + fyi = fy_input_from_data(str, len, NULL, false); + if (!fyi) { + fy_error(fypp->cfg.diag, "failed to create ypath input from %.*s\n", + (int)len, str); + goto err_out; + } + + rc = fy_path_parser_open(fypp, fyi, NULL); + if (rc) { + fy_error(fypp->cfg.diag, "failed to open path parser input from %.*s\n", + (int)len, str); + goto err_out; + } + + expr = fy_path_parse_expression(fypp); + if (!expr) { + fy_error(fypp->cfg.diag, "failed to parse path expression %.*s\n", + (int)len, str); + goto err_out; + } + + fy_path_parser_close(fypp); + + fy_input_unref(fyi); + + return expr; + +err_out: + fy_path_expr_free(expr); + fy_path_parser_close(fypp); + fy_input_unref(fyi); + return NULL; +} + +struct fy_path_expr * +fy_path_expr_build_from_string(const struct fy_path_parse_cfg *pcfg, + const char *str, size_t len) +{ + struct fy_path_parser fypp_data, *fypp = &fypp_data; + struct fy_path_expr *expr = NULL; + + if (!str) + return NULL; + + fy_path_parser_setup(fypp, pcfg); + expr = fy_path_parse_expr_from_string(fypp, str, len); + fy_path_parser_cleanup(fypp); + + return expr; +} + +struct fy_path_exec *fy_path_exec_create(const struct fy_path_exec_cfg *xcfg) +{ + struct fy_path_exec *fypx; + + fypx = malloc(sizeof(*fypx)); + if (!fypx) + return NULL; + + memset(fypx, 0, sizeof(*fypx)); + if (xcfg) + fypx->cfg = *xcfg; + fypx->fwr_recycle = NULL; /* initially no recycling list */ + fypx->refs = 1; + + fypx->supress_recycling = !!(fypx->cfg.flags & FYPXCF_DISABLE_RECYCLING) || + (getenv("FY_VALGRIND") && + !getenv("FY_VALGRIND_RECYCLING")); + return fypx; +} + +struct fy_path_exec *fy_path_exec_create_on_document(struct fy_document *fyd) +{ + struct fy_path_exec_cfg xcfg_local, *xcfg = &xcfg_local; + struct fy_path_exec *fypx; + + memset(xcfg, 0, sizeof(*xcfg)); + xcfg->diag = fyd ? fyd->diag : NULL; + + xcfg->flags = (fyd->parse_cfg.flags & FYPCF_DISABLE_RECYCLING) ? + FYPXCF_DISABLE_RECYCLING : 0; + + fypx = fy_path_exec_create(xcfg); + if (!fypx) + return NULL; + return fypx; +} + +void fy_path_exec_destroy(struct fy_path_exec *fypx) +{ + if (!fypx) + return; + fy_path_exec_cleanup(fypx); + free(fypx); +} + +int fy_path_exec_reset(struct fy_path_exec *fypx) +{ + if (!fypx) + return -1; + fy_path_exec_cleanup(fypx); + return 0; +} + +struct fy_walk_result *fy_walk_result_simplify(struct fy_walk_result *fwr) +{ + struct fy_walk_result *fwr2; +#if 0 + struct fy_walk_result *fwrf; + bool recursive; +#endif + + /* no fwr */ + if (!fwr) + return NULL; + + /* non recursive */ + if (fwr->type != fwrt_refs) + return fwr; + + /* refs, if empty, return NULL */ + if (fy_walk_result_list_empty(&fwr->refs)) { + fy_walk_result_free(fwr); + return NULL; + } + + /* single element, switch it out */ + if (fy_walk_result_list_is_singular(&fwr->refs)) { + fwr2 = fy_walk_result_list_pop(&fwr->refs); + assert(fwr2); + + fy_walk_result_free(fwr); + fwr = fwr2; + } + + return fwr; +#if 0 + /* non recursive return immediately */ + if (fwr->type != fwrt_refs) + return fwr; + + /* flatten if recursive */ + recursive = false; + for (fwr2 = fy_walk_result_list_head(&fwr->refs); fwr2; + fwr2 = fy_walk_result_next(&fwr->refs, fwr2)) { + + /* refs, recursive */ + if (fwr2->type == fwrt_refs) { + recursive = true; + break; + } + } + + if (!recursive) + return fwr; + + fwrf = fy_path_exec_walk_result_create(fypx, fwrt_refs); + assert(fwrf); + + fy_walk_result_flatten_internal(fwr, fwrf); + + fy_walk_result_free(fwr); + return fwrf; +#endif + +} + +int fy_walk_result_all_children_recursive_internal(struct fy_path_exec *fypx, struct fy_node *fyn, struct fy_walk_result *output) +{ + struct fy_node *fyni; + struct fy_walk_result *fwr; + void *prevp; + int ret; + + if (!fyn) + return 0; + + assert(output); + assert(output->type == fwrt_refs); + + /* this node */ + fwr = fy_path_exec_walk_result_create(fypx, fwrt_node_ref, fyn); + if (!fwr) + return -1; + fy_walk_result_list_add_tail(&output->refs, fwr); + + if (fy_node_is_scalar(fyn)) + return 0; + + prevp = NULL; + while ((fyni = fy_node_collection_iterate(fyn, &prevp)) != NULL) { + ret = fy_walk_result_all_children_recursive_internal(fypx, fyni, output); + if (ret) + return ret; + } + + return 0; +} + +bool +fy_walk_result_compare_simple(struct fy_path_exec *fypx, enum fy_path_expr_type type, + struct fy_walk_result *fwrl, struct fy_walk_result *fwrr) +{ + struct fy_token *fyt; + struct fy_walk_result *fwrt; + const char *str; + bool match; + + /* both NULL */ + if (!fwrl && !fwrr) { + switch (type) { + case fpet_eq: + return true; + default: + break; + } + return false; + } + + /* any NULL */ + if (!fwrl || !fwrr) { + switch (type) { + case fpet_neq: + return true; + default: + break; + } + return false; + } + + /* both are non NULL */ + + /* none should be multiple */ + assert(fwrl->type != fwrt_refs && fwrr->type != fwrt_refs); + + /* both are the same type */ + if (fwrl->type == fwrr->type) { + + switch (fwrl->type) { + case fwrt_none: + abort(); /* should never happen */ + break; + + case fwrt_node_ref: + switch (type) { + case fpet_eq: + /* simple and fast direct node comparison */ + if (fwrl->fyn == fwrr->fyn) + return true; + return fy_node_compare(fwrl->fyn, fwrr->fyn); + case fpet_neq: + /* simple and fast direct node comparison */ + if (fwrl->fyn != fwrr->fyn) + return true; + return !fy_node_compare(fwrl->fyn, fwrr->fyn); + default: + break; + } + break; + + case fwrt_refs: + assert(0); /* should not get here */ + break; + + case fwrt_doc: + switch (type) { + case fpet_eq: + case fpet_neq: + match = false; + if (fwrl->fyd == fwrr->fyd) + match = true; + else if (!fwrl->fyd || !fwrr->fyd) + match = false; + else + match = fy_node_compare(fwrl->fyd->root, fwrr->fyd->root); + if (type == fpet_neq) + match = !match; + return match; + default: + break; + } + break; + + case fwrt_number: + switch (type) { + case fpet_eq: + return fwrl->number == fwrr->number; + case fpet_neq: + return fwrl->number != fwrr->number; + case fpet_lt: + return fwrl->number < fwrr->number; + case fpet_gt: + return fwrl->number > fwrr->number; + case fpet_lte: + return fwrl->number <= fwrr->number; + case fpet_gte: + return fwrl->number >= fwrr->number; + default: + break; + } + break; + + case fwrt_string: + switch (type) { + case fpet_eq: + return strcmp(fwrl->string, fwrr->string) == 0; + case fpet_neq: + return strcmp(fwrl->string, fwrr->string) != 0; + case fpet_lt: + return strcmp(fwrl->string, fwrr->string) < 0; + case fpet_gt: + return strcmp(fwrl->string, fwrr->string) > 0; + case fpet_lte: + return strcmp(fwrl->string, fwrr->string) <= 0; + case fpet_gte: + return strcmp(fwrl->string, fwrr->string) >= 0; + default: + break; + } + break; + } + return false; + } + + /* only handle the node refs at the left */ + if (fwrr->type == fwrt_node_ref) { + switch (type) { + case fpet_lt: + type = fpet_gte; + break; + case fpet_gt: + type = fpet_lte; + break; + case fpet_lte: + type = fpet_gt; + break; + case fpet_gte: + type = fpet_lt; + break; + default: + break; + } + + /* swap left with right */ + return fy_walk_result_compare_simple(fypx, type, fwrr, fwrl); + } + + switch (fwrl->type) { + case fwrt_node_ref: + + /* non scalar mode, only returns true for non-eq */ + if (!fy_node_is_scalar(fwrl->fyn)) { + /* XXX case of rhs being a document not handled */ + return type == fpet_neq; + } + + fyt = fy_node_get_scalar_token(fwrl->fyn); + assert(fyt); + + str = fy_token_get_text0(fyt); + assert(str); + + fwrt = NULL; + /* node ref against */ + switch (fwrr->type) { + case fwrt_string: + /* create a new temporary walk result */ + fwrt = fy_path_exec_walk_result_create(fypx, fwrt_string, str); + assert(fwrt); + + break; + + case fwrt_number: + /* if it's not a number return true only for non-eq */ + if (!fy_token_is_number(fyt)) + return type == fpet_neq; + + /* create a new temporary walk result */ + fwrt = fy_path_exec_walk_result_create(fypx, fwrt_number, strtod(str, NULL)); + assert(fwrt); + break; + + default: + break; + } + + if (!fwrt) + return false; + + match = fy_walk_result_compare_simple(fypx, type, fwrt, fwrr); + + /* free the temporary result */ + fy_walk_result_free(fwrt); + + return match; + + default: + break; + } + + return false; +} + +struct fy_walk_result * +fy_walk_result_arithmetic_simple(struct fy_path_exec *fypx, + struct fy_path_expr *expr, + struct fy_path_expr *exprl, struct fy_walk_result *fwrl, + struct fy_path_expr *exprr, struct fy_walk_result *fwrr) +{ + struct fy_diag *diag; + struct fy_walk_result *output = NULL; + char *str; + size_t len, len1, len2; + + if (!fwrl || !fwrr) + goto out; + + diag = fypx->cfg.diag; + + /* node refs are not handled yet */ + if (fwrl->type == fwrt_node_ref || fwrr->type == fwrt_node_ref) + goto out; + + /* same type */ + if (fwrl->type == fwrr->type) { + + switch (fwrl->type) { + + case fwrt_string: + /* for strings, only concatenation */ + if (expr->type != fpet_plus) + break; + len1 = strlen(fwrl->string); + len2 = strlen(fwrr->string); + len = len1 + len2; + str = malloc(len + 1); + assert(str); + memcpy(str, fwrl->string, len1); + memcpy(str + len1, fwrr->string, len2); + str[len] = '\0'; + + free(fwrl->string); + fwrl->string = str; + + /* reuse */ + output = fwrl; + fwrl = NULL; + + break; + + case fwrt_number: + /* reuse fwrl */ + output = fwrl; + switch (expr->type) { + case fpet_plus: + output->number = fwrl->number + fwrr->number; + break; + case fpet_minus: + output->number = fwrl->number - fwrr->number; + break; + case fpet_mult: + output->number = fwrl->number * fwrr->number; + break; + case fpet_div: + output->number = fwrr->number ? (fwrl->number / fwrr->number) : INFINITY; + break; + default: + assert(0); + break; + } + fwrl = NULL; + break; + + default: + fy_error(diag, "fwrl->type=%s\n", fy_walk_result_type_txt[fwrl->type]); + assert(0); + break; + } + } + +out: + fy_walk_result_free(fwrl); + fy_walk_result_free(fwrr); + return output; +} + +struct fy_walk_result * +fy_walk_result_conditional_simple(struct fy_path_exec *fypx, + struct fy_path_expr *expr, + struct fy_path_expr *exprl, struct fy_walk_result *fwrl, + struct fy_path_expr *exprr, struct fy_walk_result *fwrr) +{ + bool match; + + match = fy_walk_result_compare_simple(fypx, expr->type, fwrl, fwrr); + + if (!match) { + fy_walk_result_free(fwrl); + fy_walk_result_free(fwrr); + return NULL; + } + + /* path expr, return left hand side result */ + fy_walk_result_free(fwrr); + return fwrl; +} + +struct fy_walk_result * +fy_walk_result_lhs_rhs(struct fy_path_exec *fypx, + struct fy_path_expr *expr, + struct fy_path_expr *exprl, struct fy_walk_result *fwrl, + struct fy_path_expr *exprr, struct fy_walk_result *fwrr) +{ + struct fy_walk_result *output = NULL, *fwr, *fwrrt, *fwrlt, *fwrlc, *fwrrc; + struct fy_walk_result *outputl = NULL, *outputr = NULL; + + assert(expr); + assert(exprl); + assert(exprr); + + /* only supports those */ + if (!fy_path_expr_type_is_conditional(expr->type) && + !fy_path_expr_type_is_arithmetic(expr->type)) + goto out; + + /* both NULL */ + if (!fwrl && !fwrr) + goto out; + + /* any NULL */ + if (!fwrl || !fwrr) { + if (expr->type == fpet_neq) { + output = fwrl; + fwrl = NULL; + } + goto out; + } + + output = fy_path_exec_walk_result_create(fypx, fwrt_refs); + assert(output); + + for (fwrlt = fy_walk_result_iter_start(fwrl); fwrlt; + fwrlt = fy_walk_result_iter_next(fwrl, fwrlt)) { + + /* for recursive ones */ + if (fwrlt->type == fwrt_refs) { + + fwrlc = fy_walk_result_clone(fwrlt); + assert(fwrlc); + fwrrc = fy_walk_result_clone(fwrr); + assert(fwrrc); + + outputl = fy_walk_result_lhs_rhs(fypx, expr, exprl, fwrlc, exprr, fwrrc); + if (outputl) + fy_walk_result_list_add_tail(&output->refs, outputl); + else + fy_walk_result_free(outputl); + continue; + } + + /* non-recursive case */ + for (fwrrt = fy_walk_result_iter_start(fwrr); fwrrt; + fwrrt = fy_walk_result_iter_next(fwrr, fwrrt)) { + + /* for recursive ones */ + if (fwrrt->type == fwrt_refs) { + + fwrlc = fy_walk_result_clone(fwrlt); + assert(fwrlc); + fwrrc = fy_walk_result_clone(fwrrt); + assert(fwrrc); + + outputr = fy_walk_result_lhs_rhs(fypx, expr, exprl, fwrlc, exprr, fwrrc); + if (outputr) + fy_walk_result_list_add_tail(&output->refs, outputr); + else + fy_walk_result_free(outputr); + continue; + } + + + fwrlc = fy_walk_result_clone(fwrlt); + assert(fwrlc); + fwrrc = fy_walk_result_clone(fwrrt); + assert(fwrrc); + + fwr = NULL; + + if (fy_path_expr_type_is_conditional(expr->type)) + fwr = fy_walk_result_conditional_simple(fypx, expr, exprl, fwrlc, exprr, fwrrc); + else if (fy_path_expr_type_is_arithmetic(expr->type)) + fwr = fy_walk_result_arithmetic_simple(fypx, expr, exprl, fwrlc, exprr, fwrrc); + else { + assert(0); + } + + fwrlc = NULL; + fwrrc = NULL; + + if (fwr) + fy_walk_result_list_add_tail(&output->refs, fwr); + } + } + +out: + fy_walk_result_free(fwrl); + fy_walk_result_free(fwrr); + + return fy_walk_result_simplify(output); +} + +struct fy_path_expr * +fy_scalar_walk_result_to_expr(struct fy_path_exec *fypx, struct fy_walk_result *fwr, enum fy_path_expr_type ptype) +{ + struct fy_input *fyit = NULL; + struct fy_path_expr *exprt = NULL; + struct fy_atom handle; + bool collection_addressing; + char *buf; + int rc __FY_DEBUG_UNUSED__; + + exprt = NULL; + + if (!fwr) + return NULL; + + collection_addressing = ptype == fpet_chain || ptype == fpet_multi; + + switch (fwr->type) { + case fwrt_string: + fyit = fy_input_from_malloc_data(fwr->string, FY_NT, &handle, true); + assert(fyit); + + fwr->string = NULL; + fy_walk_result_free(fwr); + fwr = NULL; + + exprt = fy_path_expr_alloc(); + assert(exprt); + if (collection_addressing) { + exprt->type = fpet_map_key; + exprt->fyt = fy_token_create(FYTT_PE_MAP_KEY, &handle, NULL); + assert(exprt->fyt); + } else { + exprt->type = fpet_scalar; + exprt->fyt = fy_token_create(FYTT_SCALAR, &handle, FYSS_PLAIN, NULL); + assert(exprt->fyt); + } + break; + + case fwrt_number: + + rc = asprintf(&buf, "%d", (int)fwr->number); + assert(rc != -1); + + fyit = fy_input_from_malloc_data(buf, FY_NT, &handle, true); + assert(fyit); + + exprt = fy_path_expr_alloc(); + assert(exprt); + if (collection_addressing) { + exprt->type = fpet_seq_index; + exprt->fyt = fy_token_create(FYTT_PE_SEQ_INDEX, &handle, (int)fwr->number); + assert(exprt->fyt); + } else { + exprt->type = fpet_scalar; + exprt->fyt = fy_token_create(FYTT_SCALAR, &handle, FYSS_PLAIN, NULL); + assert(exprt->fyt); + } + + break; + + default: + break; + } + + fy_walk_result_free(fwr); + fy_input_unref(fyit); + + return exprt; +} + +struct fy_walk_result * +fy_path_expr_execute(struct fy_path_exec *fypx, int level, struct fy_path_expr *expr, + struct fy_walk_result *input, enum fy_path_expr_type ptype) +{ + struct fy_diag *diag; + struct fy_walk_result *fwr, *fwrn, *fwrt, *fwrtn; + struct fy_walk_result *output = NULL, *input1, *output1, *input2, *output2; + struct fy_path_expr *exprn, *exprl, *exprr; + struct fy_node *fyn, *fynn, *fyni; + struct fy_token *fyt; + int start, end, count, i; + bool match; + struct fy_path_expr *exprt; + unsigned int nargs; + struct fy_walk_result **fwr_args; + void *prevp; + int rc __FY_DEBUG_UNUSED__; + + /* error */ + if (!fypx || !expr) + goto out; + + diag = fypx->cfg.diag; + +#ifdef DEBUG_EXPR + if (input) + fy_walk_result_dump(input, diag, FYET_NOTICE, level, "input %s\n", fy_path_expr_type_txt[expr->type]); +#endif + + /* recursive */ + if (input && input->type == fwrt_refs && !fy_path_expr_type_handles_refs(expr->type)) { + + output = fy_path_exec_walk_result_create(fypx, fwrt_refs); + assert(output); + + while ((fwr = fy_walk_result_list_pop(&input->refs)) != NULL) { + + fwrn = fy_path_expr_execute(fypx, level + 1, expr, fwr, ptype); + if (fwrn) + fy_walk_result_list_add_tail(&output->refs, fwrn); + } + fy_walk_result_free(input); + input = NULL; + goto out; + } + + + /* single result case is common enough to optimize */ + if (fy_path_expr_type_is_single_result(expr->type)) { + + if (input && input->type == fwrt_node_ref) { + + fynn = fy_path_expr_execute_single_result(diag, expr, input->fyn); + if (!fynn) + goto out; + + fy_walk_result_clean(input); + output = input; + output->type = fwrt_node_ref; + output->fyn = fynn; + input = NULL; + } + + goto out; + } + + /* handle the remaining multi result cases */ + switch (expr->type) { + + case fpet_chain: + + if (!input) + goto out; + + /* iterate over each chain item */ + output = input; + input = NULL; + for (exprn = fy_path_expr_list_head(&expr->children); exprn; + exprn = fy_path_expr_next(&expr->children, exprn)) { + + output = fy_path_expr_execute(fypx, level + 1, exprn, output, expr->type); + if (!output) + break; + } + + break; + + case fpet_multi: + + if (!input) + goto out; + + /* allocate a refs output result */ + output = fy_path_exec_walk_result_create(fypx, fwrt_refs); + assert(output); + + /* iterate over each multi item */ + for (exprn = fy_path_expr_list_head(&expr->children); exprn; + exprn = fy_path_expr_next(&expr->children, exprn)) { + + input2 = fy_walk_result_clone(input); + assert(input2); + + output2 = fy_path_expr_execute(fypx, level + 1, exprn, input2, expr->type); + if (!output2) + continue; + + fy_walk_result_list_add_tail(&output->refs, output2); + } + fy_walk_result_free(input); + input = NULL; + break; + + case fpet_every_child: + + if (!input) + goto out; + + /* only valid for node ref */ + if (input->type != fwrt_node_ref) + break; + + fyn = input->fyn; + + /* every scalar/alias is a single result (although it should not happen) */ + if (fy_node_is_scalar(fyn) || fy_node_is_alias(fyn)) { + output = input; + input = NULL; + break; + + } + + /* re-use input for output root */ + fy_walk_result_clean(input); + output = input; + input = NULL; + + output->type = fwrt_refs; + fy_walk_result_list_init(&output->refs); + + prevp = NULL; + while ((fyni = fy_node_collection_iterate(fyn, &prevp)) != NULL) { + fwr = fy_path_exec_walk_result_create(fypx, fwrt_node_ref, fyni); + assert(fwr); + + fy_walk_result_list_add_tail(&output->refs, fwr); + } + + break; + + case fpet_every_child_r: + + if (!input) + goto out; + + /* only valid for node ref */ + if (input->type != fwrt_node_ref) + break; + + fyn = input->fyn; + + /* re-use input for output root */ + fy_walk_result_clean(input); + output = input; + input = NULL; + + output->type = fwrt_refs; + fy_walk_result_list_init(&output->refs); + + rc = fy_walk_result_all_children_recursive_internal(fypx, fyn, output); + assert(!rc); + + break; + + case fpet_seq_slice: + + if (!input) + goto out; + + /* only valid for node ref on a sequence */ + if (input->type != fwrt_node_ref || !fy_node_is_sequence(input->fyn)) { + break; + } + fyn = input->fyn; + + fyt = expr->fyt; + assert(fyt); + assert(fyt->type == FYTT_PE_SEQ_SLICE); + + start = fyt->seq_slice.start_index; + end = fyt->seq_slice.end_index; + count = fy_node_sequence_item_count(fyn); + + /* don't handle negative slices yet */ + if (start < 0 || end < 1 || start >= end) + break; + + if (count < end) + end = count; + + /* re-use input for output root */ + fy_walk_result_clean(input); + output = input; + input = NULL; + + output->type = fwrt_refs; + fy_walk_result_list_init(&output->refs); + + for (i = start; i < end; i++) { + + fynn = fy_node_sequence_get_by_index(fyn, i); + if (!fynn) + continue; + + fwr = fy_path_exec_walk_result_create(fypx, fwrt_node_ref, fynn); + assert(fwr); + + fy_walk_result_list_add_tail(&output->refs, fwr); + } + + break; + + case fpet_eq: + case fpet_neq: + case fpet_lt: + case fpet_gt: + case fpet_lte: + case fpet_gte: + case fpet_plus: + case fpet_minus: + case fpet_mult: + case fpet_div: + + exprl = fy_path_expr_lhs(expr); + assert(exprl); + + exprr = fy_path_expr_rhs(expr); + assert(exprr); + + if (input) { + input1 = fy_walk_result_clone(input); + assert(input1); + + input2 = input; + input = NULL; + + } else { + input1 = NULL; + input2 = NULL; + } + + /* execute LHS and RHS */ + output1 = fy_path_expr_execute(fypx, level + 1, exprl, input1, expr->type); + output2 = fy_path_expr_execute(fypx, level + 1, exprr, input2, expr->type); + + output = fy_walk_result_lhs_rhs(fypx, expr, exprl, output1, exprr, output2); + + break; + + case fpet_scalar: + + /* duck typing! */ + if (fy_token_is_number(expr->fyt)) { + output = fy_path_exec_walk_result_create(fypx, fwrt_number, token_number(expr->fyt)); + assert(output); + } else { + output = fy_path_exec_walk_result_create(fypx, fwrt_string, fy_token_get_text0(expr->fyt)); + assert(output); + } + + fy_walk_result_free(input); + input = NULL; + + break; + + case fpet_logical_or: + + /* return the first that is not NULL */ + for (exprn = fy_path_expr_list_head(&expr->children); exprn; + exprn = fy_path_expr_next(&expr->children, exprn)) { + + if (input) { + input1 = fy_walk_result_clone(input); + assert(input1); + } else { + input1 = NULL; + } + + output = fy_path_expr_execute(fypx, level + 1, exprn, input1, expr->type); + if (output) + break; + } + break; + + case fpet_logical_and: + output = NULL; + + /* return the last that was not NULL */ + for (exprn = fy_path_expr_list_head(&expr->children); exprn; + exprn = fy_path_expr_next(&expr->children, exprn)) { + + if (input) { + input1 = fy_walk_result_clone(input); + assert(input1); + } else { + input1 = NULL; + } + + output1 = fy_path_expr_execute(fypx, level + 1, exprn, input1, expr->type); + if (output1) { + fy_walk_result_free(output); + output = output1; + } else + break; + } + break; + + case fpet_filter_unique: + + if (!input) + goto out; + + /* flatten input */ + input = fy_walk_result_flatten(input); + assert(input); /* must work */ + + /* for non refs, return input */ + if (input->type != fwrt_refs) { + output = input; + input = NULL; + break; + } + + /* remove duplicates filter */ + for (fwr = fy_walk_result_list_head(&input->refs); fwr; + fwr = fy_walk_result_next(&input->refs, fwr)) { + + /* do not check recursively */ + if (fwr->type == fwrt_refs) + continue; + + /* check the entries from this point forward */ + for (fwrt = fy_walk_result_next(&input->refs, fwr); fwrt; fwrt = fwrtn) { + + fwrtn = fy_walk_result_next(&input->refs, fwrt); + + /* do not check recursively (or the same result) */ + if (fwrt->type == fwrt_refs) + continue; + + assert(fwrt != fwr); + + match = fy_walk_result_compare_simple(fypx, fpet_eq, fwr, fwrt); + + if (match) { + fy_walk_result_list_del(&input->refs, fwrt); + fy_walk_result_free(fwrt); + } + } + } + output = input; + input = NULL; + + break; + + case fpet_scalar_expr: + + exprl = fy_path_expr_list_head(&expr->children); + if (!exprl) { + fy_warning(diag, "%s:%d\n", __FILE__, __LINE__); + goto out; + } + + output = fy_path_expr_execute(fypx, level + 1, exprl, NULL, ptype); + if (!output) { + fy_warning(diag, "%s:%d\n", __FILE__, __LINE__); + goto out; + } + + exprt = fy_scalar_walk_result_to_expr(fypx, output, ptype); + output = NULL; + if (!exprt) { + fy_warning(diag, "%s:%d\n", __FILE__, __LINE__); + break; + } + + output = fy_path_expr_execute(fypx, level + 1, exprt, input, ptype); + if (!output) { + fy_warning(diag, "%s:%d\n", __FILE__, __LINE__); + } + input = NULL; + + fy_path_expr_free(exprt); + break; + + case fpet_path_expr: + exprl = fy_path_expr_list_head(&expr->children); + if (!exprl) + goto out; + + output = fy_path_expr_execute(fypx, level + 1, exprl, input, ptype); + input = NULL; + break; + + case fpet_method: + + assert(expr->fym); + + /* execute the arguments */ + nargs = expr->fym->nargs; + if (nargs > 0) { + fwr_args = FY_ALLOCA(sizeof(*fwr_args) * nargs); + memset(fwr_args, 0, sizeof(*fwr_args) * nargs); + for (i = 0, exprt = fy_path_expr_list_head(&expr->children); exprt; + exprt = fy_path_expr_next(&expr->children, exprt), i++) { + + if (input) { + input1 = fy_walk_result_clone(input); + assert(input1); + } else + input1 = NULL; + + fwr_args[i] = fy_path_expr_execute(fypx, level + 1, exprt, input1, expr->type); + } + } else + fwr_args = NULL; + + output = expr->fym->exec(expr->fym, fypx, level + 1, expr, input, fwr_args, nargs); + input = NULL; + + break; + + default: + fy_error(diag, "%s\n", fy_path_expr_type_txt[expr->type]); + assert(0); + break; + } + +out: + fy_walk_result_free(input); + output = fy_walk_result_simplify(output); + +#ifdef DEBUG_EXPR + if (output) + fy_walk_result_dump(output, diag, FYET_NOTICE, level, "output %s\n", fy_path_expr_type_txt[expr->type]); +#endif + return output; +} + +static int fy_path_exec_execute_internal(struct fy_path_exec *fypx, + struct fy_path_expr *expr, struct fy_node *fyn_start) +{ + struct fy_walk_result *fwr; + + if (!fypx || !expr || !fyn_start) + return -1; + + fy_walk_result_free(fypx->result); + fypx->result = NULL; + + fwr = fy_path_exec_walk_result_create(fypx, fwrt_node_ref, fyn_start); + assert(fwr); + + fwr = fy_path_expr_execute(fypx, 0, expr, fwr, fpet_none); + if (!fwr) + return 0; + + /* flatten results */ + if (fwr->type == fwrt_refs) { + fwr = fy_walk_result_flatten(fwr); + if (!fwr) + return -1; + } + fypx->result = fwr; + + return 0; +} + +int fy_path_exec_execute(struct fy_path_exec *fypx, struct fy_path_expr *expr, struct fy_node *fyn_start) +{ + if (!fypx || !expr || !fyn_start) + return -1; + + fypx->fyn_start = fyn_start; + return fy_path_exec_execute_internal(fypx, expr, fypx->fyn_start); +} + +struct fy_node * +fy_path_exec_results_iterate(struct fy_path_exec *fypx, void **prevp) +{ + struct fy_walk_result *fwr; + + if (!fypx || !prevp) + return NULL; + + if (!fypx->result) + return NULL; + + if (fypx->result->type != fwrt_refs) { + fwr = fypx->result; + + if (fwr->type != fwrt_node_ref) + return NULL; + + if (!*prevp) { + *prevp = fwr; + return fwr->fyn; + } + *prevp = NULL; + return NULL; + } + + /* loop over non node refs for now */ + do { + if (!*prevp) + fwr = fy_walk_result_list_head(&fypx->result->refs); + else + fwr = fy_walk_result_next(&fypx->result->refs, *prevp); + *prevp = fwr; + } while (fwr && fwr->type != fwrt_node_ref); + + return fwr ? fwr->fyn : NULL; +} + +struct fy_walk_result * +fy_path_exec_take_results(struct fy_path_exec *fypx) +{ + struct fy_walk_result *fwr; + + if (!fypx || !fypx->result) + return NULL; + fwr = fypx->result; + fypx->result = NULL; + return fwr; +} + +struct fy_walk_result * +fy_path_exec_walk_result_vcreate(struct fy_path_exec *fypx, enum fy_walk_result_type type, va_list ap) +{ + struct fy_walk_result_list *fwrl; + + if (!fypx) + return NULL; + + fwrl = fy_path_exec_walk_result_rl(fypx); + return fy_walk_result_vcreate_rl(fwrl, type, ap); +} + +struct fy_walk_result * +fy_path_exec_walk_result_create(struct fy_path_exec *fypx, enum fy_walk_result_type type, ...) +{ + struct fy_walk_result_list *fwrl; + struct fy_walk_result *fwr; + va_list ap; + + if (!fypx) + return NULL; + + fwrl = fy_path_exec_walk_result_rl(fypx); + + va_start(ap, type); + fwr = fy_walk_result_vcreate_rl(fwrl, type, ap); + va_end(ap); + + if (!fwr) + return NULL; + + fwr->fypx = fy_path_exec_ref(fypx); + + return fwr; +} + +void +fy_path_exec_walk_result_free(struct fy_path_exec *fypx, struct fy_walk_result *fwr) +{ + struct fy_walk_result_list *fwrl; + + fwrl = fypx ? fy_path_exec_walk_result_rl(fypx) : NULL; + fy_walk_result_free_rl(fwrl, fwr); +} + +int fy_document_setup_path_expr_data(struct fy_document *fyd) +{ + struct fy_path_parse_cfg pcfg_local, *pcfg = &pcfg_local; + struct fy_path_expr_document_data *pxdd; + + if (!fyd || fyd->pxdd) + return 0; + + pxdd = malloc(sizeof(*pxdd)); + if (!pxdd) + goto err_no_mem; + + memset(pxdd, 0, sizeof(*pxdd)); + + fy_walk_result_list_init(&pxdd->fwr_recycle); + + memset(pcfg, 0, sizeof(*pcfg)); + pcfg->diag = fyd->diag; + pxdd->fypp = fy_path_parser_create(pcfg); + if (!pxdd->fypp) + goto err_no_fypp; + + fyd->pxdd = pxdd; + + return 0; + +err_no_fypp: + free(pxdd); +err_no_mem: + return -1; +} + +void fy_document_cleanup_path_expr_data(struct fy_document *fyd) +{ + struct fy_path_expr_document_data *pxdd; + struct fy_walk_result *fwr; + + if (!fyd || !fyd->pxdd) + return; + + pxdd = fyd->pxdd; + + fy_path_parser_destroy(pxdd->fypp); + + while ((fwr = fy_walk_result_list_pop(&pxdd->fwr_recycle)) != NULL) + free(fwr); + + free(fyd->pxdd); + fyd->pxdd = NULL; +} + +int fy_node_setup_path_expr_data(struct fy_node *fyn) +{ + struct fy_path_expr_document_data *pxdd; + struct fy_path_expr_node_data *pxnd; + const char *text; + size_t len; + char *alloc = NULL; + int rc; + + if (!fyn || fyn->pxnd) + return 0; + + /* only on alias nodes */ + if (!fy_node_is_alias(fyn)) + return 0; + + /* a document must exist */ + if (!fyn->fyd) + return -1; + + if (!fyn->fyd->pxdd) { + rc = fy_document_setup_path_expr_data(fyn->fyd); + if (rc) + return rc; + } + pxdd = fyn->fyd->pxdd; + assert(pxdd); + + pxnd = malloc(sizeof(*pxnd)); + if (!pxnd) + goto err_no_mem; + memset(pxnd, 0, sizeof(*pxnd)); + + text = fy_token_get_text(fyn->scalar, &len); + if (!text) + goto err_no_text; + + if (!fy_is_first_alpha(*text)) { + pxnd->fyi = fy_input_from_data(text, len, NULL, false); + if (!pxnd->fyi) + goto err_no_input; + } else { + alloc = malloc(len + 2); + if (!alloc) + goto err_no_input; + alloc[0] = '*'; + memcpy(alloc + 1, text, len); + alloc[len + 1] = '\0'; + + pxnd->fyi = fy_input_from_malloc_data(alloc, len + 1, NULL, false); + if (!pxnd->fyi) + goto err_no_input; + } + + fy_path_parser_reset(pxdd->fypp); + + rc = fy_path_parser_open(pxdd->fypp, pxnd->fyi, NULL); + if (rc) + goto err_no_open; + + pxnd->expr = fy_path_parse_expression(pxdd->fypp); + if (!pxnd->expr) + goto err_parse; + + fy_path_parser_close(pxdd->fypp); + + fyn->pxnd = pxnd; + + return 0; +err_parse: + fy_path_parser_close(pxdd->fypp); +err_no_open: + fy_input_unref(pxnd->fyi); +err_no_input: + if (alloc) + free(alloc); +err_no_text: + free(pxnd); +err_no_mem: + return -1; +} + +void fy_node_cleanup_path_expr_data(struct fy_node *fyn) +{ + struct fy_path_expr_node_data *pxnd; + + if (!fyn || !fyn->pxnd) + return; + + pxnd = fyn->pxnd; + + if (pxnd->expr) + fy_path_expr_free(pxnd->expr); + + if (pxnd->fyi) + fy_input_unref(pxnd->fyi); + + free(pxnd); + fyn->pxnd = NULL; +} + +struct fy_walk_result * +fy_node_alias_resolve_by_ypath_result(struct fy_node *fyn) +{ + struct fy_document *fyd; + struct fy_path_expr_document_data *pxdd = NULL; + struct fy_path_expr_node_data *pxnd = NULL; + struct fy_walk_result *fwr; + struct fy_anchor *fya; + struct fy_path_exec *fypx = NULL; + int rc; + char* path; + + if (!fyn || !fy_node_is_alias(fyn)) + return NULL; + + fyd = fyn->fyd; + if (!fyd) + return NULL; + + /* simple */ + fya = fy_document_lookup_anchor_by_token(fyd, fyn->scalar); + if (fya) { + + fwr = fy_path_exec_walk_result_create(fypx, fwrt_node_ref, fya->fyn); + fyd_error_check(fyd, fwr, err_out, + "fy_walk_result_alloc_rl() failed"); + + return fwr; + } + + /* ok, complex, setup the node data */ + rc = fy_node_setup_path_expr_data(fyn); + fyd_error_check(fyd, !rc, err_out, + "fy_node_setup_path_expr_data() failed"); + + pxnd = fyn->pxnd; + assert(pxnd); + + pxdd = fyd->pxdd; + assert(pxdd); + + if (pxnd->traversals++ > 0) { + fy_node_get_path_alloca(fyn, &path); + FYD_NODE_ERROR(fyd, fyn, FYEM_DOC, + "recursive reference detected at %s\n", + path); + pxnd->traversals--; + return NULL; + } + + fypx = fy_path_exec_create_on_document(fyd); + fyd_error_check(fyd, !rc, err_out, + "fy_path_exec_create_on_document() failed"); + + fy_path_exec_set_result_recycle_list(fypx, &pxdd->fwr_recycle); + +#if 0 + { + struct fy_document *fyd_pe; + const char *text; + size_t len; + + text = fy_token_get_text(fyn->scalar, &len); + if (text) { + fyd_pe = fy_path_expr_to_document(pxnd->expr); + if (fyd_pe) { + fprintf(stderr, "%s: %.*s\n", __func__, (int)len, text); + fy_document_default_emit_to_fp(fyd_pe, stderr); + fy_document_destroy(fyd_pe); + } + } + } +#endif + + // fprintf(stderr, "%s: %s 2\n", __func__, fy_node_get_path_alloca(fyn)); + + /* execute, starting at this */ + rc = fy_path_exec_execute(fypx, pxnd->expr, fyn); + fyd_error_check(fyd, !rc, err_out, + "fy_path_exec_execute() failed"); + + // fprintf(stderr, "%s: %s 3\n", __func__, fy_node_get_path_alloca(fyn)); + + fwr = fy_path_exec_take_results(fypx); + + fy_path_exec_unref(fypx); + + pxnd->traversals--; + + if (!fwr) + return NULL; + + // fprintf(stderr, "%s: %s 4\n", __func__, fy_node_get_path_alloca(fyn)); + + return fwr; + +err_out: + if (pxnd) + pxnd->traversals--; + fy_path_exec_unref(fypx); /* NULL OK */ + return NULL; +} + +struct fy_node *fy_node_alias_resolve_by_ypath(struct fy_node *fyn) +{ + struct fy_anchor *fya; + struct fy_walk_result *fwr; + void *iterp; + + if (!fyn || !fy_node_is_alias(fyn)) + return NULL; + + /* simple and common enough to do it now */ + fya = fy_document_lookup_anchor_by_token(fyn->fyd, fyn->scalar); + if (fya) + return fya->fyn; + + fwr = fy_node_alias_resolve_by_ypath_result(fyn); + if (!fwr) + return NULL; + + iterp = NULL; + fyn = fy_walk_result_node_iterate(fwr, &iterp); + + fy_walk_result_free(fwr); + + return fyn; +} + +struct fy_walk_result * +fy_node_by_ypath_result(struct fy_node *fyn, const char *path, size_t len) +{ + struct fy_path_expr_document_data *pxdd; + struct fy_document *fyd; + struct fy_walk_result *fwr; + struct fy_anchor *fya; + struct fy_input *fyi; + struct fy_path_expr *expr; + struct fy_path_exec *fypx = NULL; + int rc; + + if (!fyn || !path || !len) + return NULL; + + fyd = fyn->fyd; + if (!fyd) + return NULL; + + if (len == FY_NT) + len = strlen(path); + + /* simple */ + fya = fy_document_lookup_anchor(fyn->fyd, path, len); + if (fya) { + fwr = fy_path_exec_walk_result_create(fypx, fwrt_node_ref, fya->fyn); + fyd_error_check(fyd, fwr, err_out, + "fy_walk_result_alloc_rl() failed"); + return fwr; + } + + /* ok, complex, setup the document data */ + rc = fy_document_setup_path_expr_data(fyd); + fyd_error_check(fyd, !rc, err_setup, + "fy_node_setup_path_expr_data() failed"); + + pxdd = fyd->pxdd; + assert(pxdd); + + fyi = fy_input_from_data(path, len, NULL, false); + fyd_error_check(fyd, fyi, err_no_input, + "fy_input_from_data() failed"); + + fy_path_parser_reset(pxdd->fypp); + + rc = fy_path_parser_open(pxdd->fypp, fyi, NULL); + fyd_error_check(fyd, !rc, err_no_open, + "fy_path_parser_open() failed"); + + expr = fy_path_parse_expression(pxdd->fypp); + fyd_error_check(fyd, expr, err_parse, + "fy_path_parse_expression() failed"); + + fy_path_parser_close(pxdd->fypp); + + fypx = fy_path_exec_create_on_document(fyd); + fyd_error_check(fyd, !rc, err_no_fypx, + "fy_path_exec_create_on_document() failed"); + + /* execute, starting at this */ + rc = fy_path_exec_execute(fypx, expr, fyn); + fyd_error_check(fyd, !rc, err_exec, + "fy_path_parse_expression() failed"); + + fwr = fy_path_exec_take_results(fypx); + + fy_path_exec_unref(fypx); + + fy_path_expr_free(expr); + fy_input_unref(fyi); + + return fwr; + +err_exec: + fy_path_expr_free(expr); +err_no_fypx: + fy_path_exec_unref(fypx); +err_parse: + fy_path_parser_close(pxdd->fypp); + +err_no_open: + fy_input_unref(fyi); + +err_no_input: +err_setup: +err_out: + return NULL; +} + +struct fy_node *fy_node_by_ypath(struct fy_node *fyn, const char *path, size_t len) +{ + struct fy_walk_result *fwr; + struct fy_anchor *fya; + void *iterp; + + if (!fyn || !path || !len) + return NULL; + + /* simple */ + fya = fy_document_lookup_anchor(fyn->fyd, path, len); + if (fya) + return fya->fyn; + + fwr = fy_node_by_ypath_result(fyn, path, len); + if (!fwr) + return NULL; + + iterp = NULL; + fyn = fy_walk_result_node_iterate(fwr, &iterp); + + fy_walk_result_free(fwr); + + return fyn; +} diff --git a/contrib/libs/libfyaml/src/lib/fy-walk.h b/contrib/libs/libfyaml/src/lib/fy-walk.h new file mode 100644 index 0000000000..bc200f66eb --- /dev/null +++ b/contrib/libs/libfyaml/src/lib/fy-walk.h @@ -0,0 +1,438 @@ +/* + * fy-walk.h - walker internal header file + * + * Copyright (c) 2021 Pantelis Antoniou <pantelis.antoniou@konsulko.com> + * + * SPDX-License-Identifier: MIT + */ +#ifndef FY_WALK_H +#define FY_WALK_H + +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif + +#include <stdint.h> +#include <stdbool.h> +#include <stdio.h> +#include <stdarg.h> +#include <assert.h> + +#include <libfyaml.h> + +#include "fy-ctype.h" +#include "fy-utf8.h" +#include "fy-list.h" +#include "fy-typelist.h" +#include "fy-types.h" +#include "fy-diag.h" +#include "fy-dump.h" +#include "fy-docstate.h" +#include "fy-accel.h" +#include "fy-token.h" + +struct fy_document; + +enum fy_walk_result_type { + fwrt_none, + fwrt_node_ref, + fwrt_number, + fwrt_string, + fwrt_doc, + fwrt_refs, +}; + +#define FWRT_COUNT (fwrt_refs + 1) +extern const char *fy_walk_result_type_txt[FWRT_COUNT]; + +struct fy_path_exec; + +FY_TYPE_FWD_DECL_LIST(walk_result); +struct fy_walk_result { + struct fy_list_head node; + struct fy_path_exec *fypx; + enum fy_walk_result_type type; + union { + struct fy_node *fyn; + double number; + char *string; + struct fy_walk_result_list refs; + struct fy_document *fyd; + }; +}; +FY_TYPE_DECL_LIST(walk_result); + +struct fy_walk_result *fy_walk_result_alloc_rl(struct fy_walk_result_list *fwrl); +void fy_walk_result_free_rl(struct fy_walk_result_list *fwrl, struct fy_walk_result *fwr); +void fy_walk_result_list_free_rl(struct fy_walk_result_list *fwrl, struct fy_walk_result_list *results); + +void fy_walk_result_free(struct fy_walk_result *fwr); + +struct fy_walk_result *fy_walk_result_vcreate_rl(struct fy_walk_result_list *fwrl, enum fy_walk_result_type type, va_list ap); +struct fy_walk_result *fy_walk_result_create_rl(struct fy_walk_result_list *fwrl, enum fy_walk_result_type type, ...); + +static inline struct fy_walk_result * +fy_walk_result_iter_start(struct fy_walk_result *fwr) +{ + struct fy_walk_result *fwri; + + if (!fwr) + return NULL; + if (fwr->type != fwrt_refs) + return fwr; + fwri = fy_walk_result_list_head(&fwr->refs); + if (!fwri) + return NULL; + return fwri; +} + +static inline struct fy_walk_result * +fy_walk_result_iter_next(struct fy_walk_result *fwr, struct fy_walk_result *fwri) +{ + if (!fwr || !fwri || fwr->type != fwrt_refs) + return NULL; + fwri = fy_walk_result_next(&fwr->refs, fwri); + if (!fwri) + return NULL; + return fwri; +} + +struct fy_node * +fy_walk_result_node_iterate(struct fy_walk_result *fwr, void **prevp); + +enum fy_path_expr_type { + fpet_none, + /* ypath */ + fpet_root, /* /^ or / at the beginning of the expr */ + fpet_this, /* /. */ + fpet_parent, /* /.. */ + fpet_every_child, // /* every immediate child + fpet_every_child_r, // /** every recursive child + fpet_filter_collection, /* match only collection (at the end only) */ + fpet_filter_scalar, /* match only scalars (leaves) */ + fpet_filter_sequence, /* match only sequences */ + fpet_filter_mapping, /* match only mappings */ + fpet_filter_unique, /* removes duplicates */ + fpet_seq_index, + fpet_map_key, /* complex map key (quoted, flow seq or map) */ + fpet_seq_slice, + fpet_alias, + + fpet_multi, /* merge results of children */ + fpet_chain, /* children move in sequence */ + fpet_logical_or, /* first non null result set */ + fpet_logical_and, /* the last non null result set */ + + fpet_eq, /* equal expression */ + fpet_neq, /* not equal */ + fpet_lt, /* less than */ + fpet_gt, /* greater than */ + fpet_lte, /* less or equal than */ + fpet_gte, /* greater or equal than */ + + fpet_scalar, /* scalar */ + + fpet_plus, /* add */ + fpet_minus, /* subtract */ + fpet_mult, /* multiply */ + fpet_div, /* divide */ + + fpet_lparen, /* left paren (they do not appear in final expression) */ + fpet_rparen, /* right parent */ + fpet_method, /* method (or parentheses) */ + + fpet_scalar_expr, /* non-eval phase scalar expression */ + fpet_path_expr, /* non-eval phase path expression */ + fpet_arg_separator, /* argument separator (comma in scalar mode) */ +}; + +#define FPET_COUNT (fpet_arg_separator + 1) + +extern const char *path_expr_type_txt[FPET_COUNT]; + +static inline bool fy_path_expr_type_is_valid(enum fy_path_expr_type type) +{ + return type >= fpet_root && type < FPET_COUNT; +} + +static inline bool fy_path_expr_type_is_single_result(enum fy_path_expr_type type) +{ + return type == fpet_root || + type == fpet_this || + type == fpet_parent || + type == fpet_map_key || + type == fpet_seq_index || + type == fpet_alias || + type == fpet_filter_collection || + type == fpet_filter_scalar || + type == fpet_filter_sequence || + type == fpet_filter_mapping; +} + +static inline bool fy_path_expr_type_is_parent(enum fy_path_expr_type type) +{ + return type == fpet_multi || + type == fpet_chain || + type == fpet_logical_or || + type == fpet_logical_and || + type == fpet_eq || + type == fpet_method || + type == fpet_scalar_expr || + type == fpet_path_expr; +} + +static inline bool fy_path_expr_type_is_mergeable(enum fy_path_expr_type type) +{ + return type == fpet_multi || + type == fpet_chain || + type == fpet_logical_or || + type == fpet_logical_and; +} + +/* type handles refs by itself */ +static inline bool fy_path_expr_type_handles_refs(enum fy_path_expr_type type) +{ + return type == fpet_filter_unique || + type == fpet_method; +} + +static inline bool fy_path_expr_type_is_parent_lhs_rhs(enum fy_path_expr_type type) +{ + return type == fpet_eq || + type == fpet_neq || + type == fpet_lt || + type == fpet_gt || + type == fpet_lte || + type == fpet_gte || + + type == fpet_plus || + type == fpet_minus || + type == fpet_mult || + type == fpet_div; +} + +static inline bool +fy_path_expr_type_is_conditional(enum fy_path_expr_type type) +{ + return type == fpet_eq || + type == fpet_neq || + type == fpet_lt || + type == fpet_gt || + type == fpet_lte || + type == fpet_gte; +} + +static inline bool +fy_path_expr_type_is_arithmetic(enum fy_path_expr_type type) +{ + return type == fpet_plus || + type == fpet_minus || + type == fpet_mult || + type == fpet_div; +} + +static inline bool +fy_path_expr_type_is_lparen(enum fy_path_expr_type type) +{ + return type == fpet_lparen /* || + type == fpet_method */ ; +} + +enum fy_expr_mode { + fyem_none, /* invalid mode */ + fyem_path, /* expression is path */ + fyem_scalar, /* expression is scalar */ +}; + +#define FYEM_COUNT (fyem_scalar + 1) + +extern const char *fy_expr_mode_txt[FYEM_COUNT]; + +struct fy_path_expr; + +struct fy_method { + const char *name; + size_t len; + enum fy_expr_mode mode; + unsigned int nargs; + struct fy_walk_result *(*exec)(const struct fy_method *fym, + struct fy_path_exec *fypx, int level, + struct fy_path_expr *expr, + struct fy_walk_result *input, + struct fy_walk_result **args, int nargs); +}; + +FY_TYPE_FWD_DECL_LIST(path_expr); +struct fy_path_expr { + struct fy_list_head node; + struct fy_path_expr *parent; + enum fy_path_expr_type type; + struct fy_token *fyt; + struct fy_path_expr_list children; + enum fy_expr_mode expr_mode; /* for parens */ + const struct fy_method *fym; +}; +FY_TYPE_DECL_LIST(path_expr); + +static inline struct fy_path_expr * +fy_path_expr_lhs(struct fy_path_expr *expr) +{ + if (!expr || !fy_path_expr_type_is_parent_lhs_rhs(expr->type)) + return NULL; + return fy_path_expr_list_head(&expr->children); +} + +static inline struct fy_path_expr * +fy_path_expr_rhs(struct fy_path_expr *expr) +{ + if (!expr || !fy_path_expr_type_is_parent_lhs_rhs(expr->type)) + return NULL; + return fy_path_expr_list_tail(&expr->children); +} + +const struct fy_mark *fy_path_expr_start_mark(struct fy_path_expr *expr); +const struct fy_mark *fy_path_expr_end_mark(struct fy_path_expr *expr); + +struct fy_expr_stack { + unsigned int top; + unsigned int alloc; + struct fy_path_expr **items; + struct fy_path_expr *items_static[32]; +}; + +void fy_expr_stack_setup(struct fy_expr_stack *stack); +void fy_expr_stack_cleanup(struct fy_expr_stack *stack); +void fy_expr_stack_dump(struct fy_diag *diag, struct fy_expr_stack *stack); +int fy_expr_stack_push(struct fy_expr_stack *stack, struct fy_path_expr *expr); +struct fy_path_expr *fy_expr_stack_peek_at(struct fy_expr_stack *stack, unsigned int pos); +struct fy_path_expr *fy_expr_stack_peek(struct fy_expr_stack *stack); +struct fy_path_expr *fy_expr_stack_pop(struct fy_expr_stack *stack); + +struct fy_path_parser { + struct fy_path_parse_cfg cfg; + struct fy_reader reader; + struct fy_token_list queued_tokens; + enum fy_token_type last_queued_token_type; + bool stream_start_produced; + bool stream_end_produced; + bool stream_error; + int token_activity_counter; + + struct fy_input *fyi; + struct fy_expr_stack operators; + struct fy_expr_stack operands; + + /* to avoid allocating */ + struct fy_path_expr_list expr_recycle; + bool suppress_recycling; + + enum fy_expr_mode expr_mode; + int paren_nest_level; + +}; + +struct fy_path_expr *fy_path_expr_alloc(void); +/* fy_path_expr_free is declared in libfyaml.h */ +// void fy_path_expr_free(struct fy_path_expr *expr); + +void fy_path_parser_setup(struct fy_path_parser *fypp, const struct fy_path_parse_cfg *pcfg); +void fy_path_parser_cleanup(struct fy_path_parser *fypp); +int fy_path_parser_open(struct fy_path_parser *fypp, + struct fy_input *fyi, const struct fy_reader_input_cfg *icfg); +void fy_path_parser_close(struct fy_path_parser *fypp); + +struct fy_token *fy_path_scan(struct fy_path_parser *fypp); + +struct fy_path_expr *fy_path_parse_expression(struct fy_path_parser *fypp); + +void fy_path_expr_dump(struct fy_path_expr *expr, struct fy_diag *diag, enum fy_error_type errlevel, int level, const char *banner); + +struct fy_path_exec { + struct fy_path_exec_cfg cfg; + struct fy_node *fyn_start; + struct fy_walk_result *result; + struct fy_walk_result_list *fwr_recycle; + int refs; + bool supress_recycling; +}; + +struct fy_path_exec *fy_path_exec_create(const struct fy_path_exec_cfg *xcfg); +struct fy_path_exec *fy_path_exec_create_on_document(struct fy_document *fyd); +void fy_path_exec_destroy(struct fy_path_exec *fypx); +void fy_path_exec_cleanup(struct fy_path_exec *fypx); + +static inline struct fy_path_exec * +fy_path_exec_ref(struct fy_path_exec *fypx) +{ + /* take care of overflow */ + if (!fypx) + return NULL; + assert(fypx->refs + 1 > 0); + fypx->refs++; + + return fypx; +} + +static inline void +fy_path_exec_unref(struct fy_path_exec *fypx) +{ + if (!fypx) + return; + + assert(fypx->refs > 0); + + if (--fypx->refs == 0) + fy_path_exec_destroy(fypx); +} + +struct fy_walk_result * +fy_path_expr_execute(struct fy_path_exec *fypx, int level, struct fy_path_expr *expr, + struct fy_walk_result *input, enum fy_path_expr_type ptype); + +static inline struct fy_walk_result_list * +fy_path_exec_walk_result_rl(struct fy_path_exec *fypx) +{ + return fypx && !fypx->supress_recycling ? fypx->fwr_recycle : NULL; +} + +static inline void +fy_path_exec_set_result_recycle_list(struct fy_path_exec *fypx, + struct fy_walk_result_list *fwrl) +{ + if (!fypx) + return; + fypx->fwr_recycle = fwrl; +} + +struct fy_walk_result * +fy_path_exec_walk_result_create(struct fy_path_exec *fypx, enum fy_walk_result_type type, ...); + +void +fy_path_exec_walk_result_free(struct fy_path_exec *fypx, struct fy_walk_result *fwr); + +struct fy_path_expr_document_data { + struct fy_path_parser *fypp; + struct fy_walk_result_list fwr_recycle; +}; + +struct fy_path_expr_node_data { + struct fy_input *fyi; + struct fy_path_expr *expr; + struct fy_node *fyn_target; + int traversals; +}; + +int fy_document_setup_path_expr_data(struct fy_document *fyd); +void fy_document_cleanup_path_expr_data(struct fy_document *fyd); +int fy_node_setup_path_expr_data(struct fy_node *fyn); +void fy_node_cleanup_path_expr_data(struct fy_node *fyn); + +struct fy_walk_result * +fy_node_alias_resolve_by_ypath_result(struct fy_node *fyn); +struct fy_node *fy_node_alias_resolve_by_ypath(struct fy_node *fyn); + +struct fy_walk_result * +fy_node_by_ypath_result(struct fy_node *fyn, const char *path, size_t len); +struct fy_node *fy_node_by_ypath(struct fy_node *fyn, const char *path, size_t len); + +#endif |