/* * fy-atom.h - internal YAML atom methods * * Copyright (c) 2019 Pantelis Antoniou <pantelis.antoniou@konsulko.com> * * SPDX-License-Identifier: MIT */ #ifndef FY_ATOM_H #define FY_ATOM_H #ifdef HAVE_CONFIG_H #include "config.h" #endif #include <stdint.h> #include <stdbool.h> #include <stdio.h> #include <stdarg.h> #include <libfyaml.h> #include "fy-list.h" #include "fy-input.h" struct fy_reader; struct fy_input; struct fy_node; enum fy_atom_style { /* YAML atoms */ FYAS_PLAIN, FYAS_SINGLE_QUOTED, FYAS_DOUBLE_QUOTED, FYAS_LITERAL, FYAS_FOLDED, FYAS_URI, /* special style for URIs */ FYAS_DOUBLE_QUOTED_MANUAL, FYAS_COMMENT /* (possibly multi line) comment */ }; static inline bool fy_atom_style_is_quoted(enum fy_atom_style style) { return style == FYAS_SINGLE_QUOTED || style == FYAS_DOUBLE_QUOTED; } static inline bool fy_atom_style_is_block(enum fy_atom_style style) { return style == FYAS_LITERAL || style == FYAS_FOLDED; } enum fy_atom_chomp { FYAC_STRIP, FYAC_CLIP, FYAC_KEEP, }; struct fy_atom { struct fy_mark start_mark; struct fy_mark end_mark; size_t storage_hint; /* guaranteed to fit in this amount of bytes */ struct fy_input *fyi; /* input on which atom is on */ uint64_t fyi_generation; /* to detect reallocs */ unsigned int increment; union { uint64_t tozero; /* fast way to zero everything here */ struct { /* save a little bit of space with bitfields */ enum fy_atom_style style : 8; /* note that it's a big perf win for bytes */ enum fy_atom_chomp chomp : 8; unsigned int tabsize : 8; enum fy_lb_mode lb_mode : 1; enum fy_flow_ws_mode fws_mode : 1; bool direct_output : 1; /* can directly output */ bool storage_hint_valid : 1; bool empty : 1; /* atom contains whitespace and linebreaks only if length > 0 */ bool has_lb : 1; /* atom contains at least one linebreak */ bool has_ws : 1; /* atom contains at least one whitespace */ bool starts_with_ws : 1; /* atom starts with whitespace */ bool starts_with_lb : 1; /* atom starts with linebreak */ bool ends_with_ws : 1; /* atom ends with whitespace */ bool ends_with_lb : 1; /* atom ends with linebreak */ bool trailing_lb : 1; /* atom ends with trailing linebreaks > 1 */ bool size0 : 1; /* atom contains absolutely nothing */ bool valid_anchor : 1; /* atom is a valid anchor */ bool json_mode : 1; /* atom was read in json mode */ bool ends_with_eof : 1; /* atom ends at EOF of input */ }; }; }; static inline bool fy_atom_is_set(const struct fy_atom *atom) { return atom && atom->fyi; } static inline void fy_atom_reset(struct fy_atom *atom) { if (atom) atom->fyi = NULL; } static inline bool fy_atom_json_mode(struct fy_atom *handle) { if (!handle) return false; return handle->json_mode; } static inline enum fy_lb_mode fy_atom_lb_mode(struct fy_atom *handle) { if (!handle) return fylb_cr_nl; return handle->lb_mode; } static inline enum fy_flow_ws_mode fy_atom_flow_ws_mode(struct fy_atom *handle) { if (!handle) return fyfws_space_tab; return handle->fws_mode; } /* all atoms are scalars so... */ static inline bool fy_atom_is_lb(struct fy_atom *handle, int c) { return fy_is_generic_lb_m(c, fy_atom_lb_mode(handle)); } static inline bool fy_atom_is_flow_ws(struct fy_atom *handle, int c) { return fy_is_flow_ws_m(c, fy_atom_flow_ws_mode(handle)); } int fy_atom_format_text_length(struct fy_atom *atom); const char *fy_atom_format_text(struct fy_atom *atom, char *buf, size_t maxsz); int fy_atom_format_utf8_length(struct fy_atom *atom); static inline void fy_reader_fill_atom_start(struct fy_reader *fyr, struct fy_atom *handle) { /* start mark */ fy_reader_get_mark(fyr, &handle->start_mark); handle->fyi = fy_reader_current_input(fyr); handle->fyi_generation = fy_reader_current_input_generation(fyr); handle->increment = 0; handle->tozero = 0; /* note that handle->data may be zero for empty input */ } static inline void fy_reader_fill_atom_end_at(struct fy_reader *fyr, struct fy_atom *handle, struct fy_mark *end_mark) { if (end_mark) handle->end_mark = *end_mark; else fy_reader_get_mark(fyr, &handle->end_mark); /* default is plain, modify at return */ handle->style = FYAS_PLAIN; handle->chomp = FYAC_CLIP; /* by default we don't do storage hints, it's the job of the caller */ handle->storage_hint = 0; handle->storage_hint_valid = false; handle->tabsize = fy_reader_tabsize(fyr); handle->json_mode = fy_reader_json_mode(fyr); handle->lb_mode = fy_reader_lb_mode(fyr); handle->fws_mode = fy_reader_flow_ws_mode(fyr); } static inline void fy_reader_fill_atom_end(struct fy_reader *fyr, struct fy_atom *handle) { fy_reader_fill_atom_end_at(fyr, handle, NULL); } static inline void fy_atom_reset_storage_hints(struct fy_atom *handle) { handle->storage_hint = 0; handle->storage_hint_valid = false; } struct fy_atom *fy_reader_fill_atom(struct fy_reader *fyr, int advance, struct fy_atom *handle); struct fy_atom *fy_reader_fill_atom_mark(struct fy_reader *fyr, const struct fy_mark *start_mark, const struct fy_mark *end_mark, struct fy_atom *handle); struct fy_atom *fy_reader_fill_atom_at(struct fy_reader *fyr, int advance, int count, struct fy_atom *handle); #define fy_reader_fill_atom_a(_fyr, _advance) fy_reader_fill_atom((_fyr), (_advance), FY_ALLOCA(sizeof(struct fy_atom))) struct fy_atom *fy_fill_node_atom(struct fy_node *fyn, struct fy_atom *handle); #define fy_fill_node_atom_a(_fyn) fy_fill_node_atom((_fyn), FY_ALLOCA(sizeof(struct fy_atom))) struct fy_atom_iter_line_info { const char *start; const char *end; const char *nws_start; const char *nws_end; const char *chomp_start; bool empty : 1; bool trailing_breaks_ws : 1; bool first : 1; /* first */ bool last : 1; /* last (only ws/lb afterwards */ bool final : 1; /* the final iterator */ bool indented : 1; bool lb_end : 1; bool need_nl : 1; bool need_sep : 1; bool ends_with_backslash : 1; /* last ended in \\ */ size_t trailing_ws; size_t trailing_breaks; size_t start_ws, end_ws; const char *s; const char *e; int actual_lb; /* the line break */ const char *s_tb; /* start of trailing breaks run */ const char *e_tb; /* end of trailing breaks run */ }; struct fy_atom_iter_chunk { struct fy_iter_chunk ic; /* note that it is guaranteed for copied chunks to be * less or equal to 10 characters (the maximum digitbuf * for double quoted escapes */ char inplace_buf[10]; /* small copies in place */ }; #define NR_STARTUP_CHUNKS 8 #define SZ_STARTUP_COPY_BUFFER 32 struct fy_atom_iter { const struct fy_atom *atom; const char *s, *e; unsigned int chomp; int tabsize; bool single_line : 1; bool dangling_end_quote : 1; bool last_ends_with_backslash : 1; bool empty : 1; bool current : 1; bool done : 1; /* last iteration (for block styles) */ struct fy_atom_iter_line_info li[2]; unsigned int alloc; unsigned int top; unsigned int read; struct fy_atom_iter_chunk *chunks; struct fy_atom_iter_chunk startup_chunks[NR_STARTUP_CHUNKS]; int unget_c; }; void fy_atom_iter_start(const struct fy_atom *atom, struct fy_atom_iter *iter); void fy_atom_iter_finish(struct fy_atom_iter *iter); const struct fy_iter_chunk *fy_atom_iter_peek_chunk(struct fy_atom_iter *iter); const struct fy_iter_chunk *fy_atom_iter_chunk_next(struct fy_atom_iter *iter, const struct fy_iter_chunk *curr, int *errp); void fy_atom_iter_advance(struct fy_atom_iter *iter, size_t len); struct fy_atom_iter *fy_atom_iter_create(const struct fy_atom *atom); void fy_atom_iter_destroy(struct fy_atom_iter *iter); ssize_t fy_atom_iter_read(struct fy_atom_iter *iter, void *buf, size_t count); int fy_atom_iter_getc(struct fy_atom_iter *iter); int fy_atom_iter_ungetc(struct fy_atom_iter *iter, int c); int fy_atom_iter_peekc(struct fy_atom_iter *iter); int fy_atom_iter_utf8_get(struct fy_atom_iter *iter); int fy_atom_iter_utf8_quoted_get(struct fy_atom_iter *iter, size_t *lenp, uint8_t *buf); int fy_atom_iter_utf8_unget(struct fy_atom_iter *iter, int c); int fy_atom_iter_utf8_peek(struct fy_atom_iter *iter); int fy_atom_memcmp(struct fy_atom *atom, const void *ptr, size_t len); int fy_atom_strcmp(struct fy_atom *atom, const char *str); bool fy_atom_is_number(struct fy_atom *atom); int fy_atom_cmp(struct fy_atom *atom1, struct fy_atom *atom2); static inline const char *fy_atom_data(const struct fy_atom *atom) { if (!atom) return NULL; return (char *)fy_input_start(atom->fyi) + atom->start_mark.input_pos; } static inline size_t fy_atom_size(const struct fy_atom *atom) { if (!atom) return 0; return atom->end_mark.input_pos - atom->start_mark.input_pos; } static inline bool fy_plain_atom_streq(const struct fy_atom *atom, const char *str) { size_t size = strlen(str); if (!atom || !str || atom->style != FYAS_PLAIN || fy_atom_size(atom) != size) return false; return !memcmp(str, fy_atom_data(atom), size); } struct fy_raw_line { int lineno; const char *line_start; size_t line_len; size_t line_len_lb; size_t line_count; const char *content_start; size_t content_len; size_t content_start_count; size_t content_count; int content_start_col; int content_start_col8; /* this is the tab 8 */ int content_end_col; int content_end_col8; }; struct fy_atom_raw_line_iter { const struct fy_atom *atom; const char *is, *ie; /* input start, end */ const char *as, *ae; /* atom start, end */ const char *rs; struct fy_raw_line line; }; void fy_atom_raw_line_iter_start(const struct fy_atom *atom, struct fy_atom_raw_line_iter *iter); void fy_atom_raw_line_iter_finish(struct fy_atom_raw_line_iter *iter); const struct fy_raw_line * fy_atom_raw_line_iter_next(struct fy_atom_raw_line_iter *iter); #endif