aboutsummaryrefslogtreecommitdiffstats
path: root/contrib/libs/libfyaml/src/lib/fy-atom.h
blob: 2a8b5633232aca9a6728c00a5bd1a08536caa762 (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
/*
 * fy-atom.h - internal YAML atom methods
 *
 * Copyright (c) 2019 Pantelis Antoniou <pantelis.antoniou@konsulko.com>
 *
 * SPDX-License-Identifier: MIT
 */

#ifndef FY_ATOM_H
#define FY_ATOM_H

#ifdef HAVE_CONFIG_H
#include "config.h"
#endif

#include <stdint.h>
#include <stdbool.h>
#include <stdio.h>
#include <stdarg.h>

#include <libfyaml.h>

#include "fy-list.h"
#include "fy-input.h"

struct fy_reader;
struct fy_input;
struct fy_node;

enum fy_atom_style {
	/* YAML atoms */
	FYAS_PLAIN,
	FYAS_SINGLE_QUOTED,
	FYAS_DOUBLE_QUOTED,
	FYAS_LITERAL,
	FYAS_FOLDED,
	FYAS_URI,	/* special style for URIs */
	FYAS_DOUBLE_QUOTED_MANUAL,
	FYAS_COMMENT	/* (possibly multi line) comment */
};

static inline bool fy_atom_style_is_quoted(enum fy_atom_style style)
{
	return style == FYAS_SINGLE_QUOTED || style == FYAS_DOUBLE_QUOTED;
}

static inline bool fy_atom_style_is_block(enum fy_atom_style style)
{
	return style == FYAS_LITERAL || style == FYAS_FOLDED;
}

enum fy_atom_chomp {
	FYAC_STRIP,
	FYAC_CLIP,
	FYAC_KEEP,
};

struct fy_atom {
	struct fy_mark start_mark;
	struct fy_mark end_mark;
	size_t storage_hint;	/* guaranteed to fit in this amount of bytes */
	struct fy_input *fyi;	/* input on which atom is on */
	uint64_t fyi_generation;	/* to detect reallocs */
	unsigned int increment;
	union {
		uint64_t tozero;			/* fast way to zero everything here */
		struct {
			/* save a little bit of space with bitfields */
			enum fy_atom_style style : 8;	/* note that it's a big perf win for bytes */
			enum fy_atom_chomp chomp : 8;
			unsigned int tabsize : 8;
			enum fy_lb_mode lb_mode : 1;
			enum fy_flow_ws_mode fws_mode : 1;
			bool direct_output : 1;		/* can directly output */
			bool storage_hint_valid : 1;
			bool empty : 1;			/* atom contains whitespace and linebreaks only if length > 0 */
			bool has_lb : 1;		/* atom contains at least one linebreak */
			bool has_ws : 1;		/* atom contains at least one whitespace */
			bool starts_with_ws : 1;	/* atom starts with whitespace */
			bool starts_with_lb : 1;	/* atom starts with linebreak */
			bool ends_with_ws : 1;		/* atom ends with whitespace */
			bool ends_with_lb : 1;		/* atom ends with linebreak */
			bool trailing_lb : 1;		/* atom ends with trailing linebreaks > 1 */
			bool size0 : 1;			/* atom contains absolutely nothing */
			bool valid_anchor : 1;		/* atom is a valid anchor */
			bool json_mode : 1;		/* atom was read in json mode */
			bool ends_with_eof : 1;		/* atom ends at EOF of input */
		};
	};
};

static inline bool fy_atom_is_set(const struct fy_atom *atom)
{
	return atom && atom->fyi;
}

static inline void fy_atom_reset(struct fy_atom *atom)
{
	if (atom)
		atom->fyi = NULL;
}

static inline bool fy_atom_json_mode(struct fy_atom *handle)
{
	if (!handle)
		return false;

	return handle->json_mode;
}

static inline enum fy_lb_mode fy_atom_lb_mode(struct fy_atom *handle)
{
	if (!handle)
		return fylb_cr_nl;

	return handle->lb_mode;
}

static inline enum fy_flow_ws_mode fy_atom_flow_ws_mode(struct fy_atom *handle)
{
	if (!handle)
		return fyfws_space_tab;

	return handle->fws_mode;
}

/* all atoms are scalars so... */
static inline bool fy_atom_is_lb(struct fy_atom *handle, int c)
{
	return fy_is_generic_lb_m(c, fy_atom_lb_mode(handle));
}

static inline bool fy_atom_is_flow_ws(struct fy_atom *handle, int c)
{
	return fy_is_flow_ws_m(c, fy_atom_flow_ws_mode(handle));
}

int fy_atom_format_text_length(struct fy_atom *atom);
const char *fy_atom_format_text(struct fy_atom *atom, char *buf, size_t maxsz);

int fy_atom_format_utf8_length(struct fy_atom *atom);

static inline void
fy_reader_fill_atom_start(struct fy_reader *fyr, struct fy_atom *handle)
{
	/* start mark */
	fy_reader_get_mark(fyr, &handle->start_mark);
	handle->fyi = fy_reader_current_input(fyr);
	handle->fyi_generation = fy_reader_current_input_generation(fyr);

	handle->increment = 0;
	handle->tozero = 0;

	/* note that handle->data may be zero for empty input */
}

static inline void
fy_reader_fill_atom_end_at(struct fy_reader *fyr, struct fy_atom *handle, struct fy_mark *end_mark)
{
	if (end_mark)
		handle->end_mark = *end_mark;
	else
		fy_reader_get_mark(fyr, &handle->end_mark);

	/* default is plain, modify at return */
	handle->style = FYAS_PLAIN;
	handle->chomp = FYAC_CLIP;
	/* by default we don't do storage hints, it's the job of the caller */
	handle->storage_hint = 0;
	handle->storage_hint_valid = false;
	handle->tabsize = fy_reader_tabsize(fyr);
	handle->json_mode = fy_reader_json_mode(fyr);
	handle->lb_mode = fy_reader_lb_mode(fyr);
	handle->fws_mode = fy_reader_flow_ws_mode(fyr);
}

static inline void
fy_reader_fill_atom_end(struct fy_reader *fyr, struct fy_atom *handle)
{
	fy_reader_fill_atom_end_at(fyr, handle, NULL);
}

struct fy_atom *fy_reader_fill_atom(struct fy_reader *fyr, int advance, struct fy_atom *handle);
struct fy_atom *fy_reader_fill_atom_mark(struct fy_reader *fyr, const struct fy_mark *start_mark,
					 const struct fy_mark *end_mark, struct fy_atom *handle);
struct fy_atom *fy_reader_fill_atom_at(struct fy_reader *fyr, int advance, int count, struct fy_atom *handle);

#define fy_reader_fill_atom_a(_fyr, _advance)  fy_reader_fill_atom((_fyr), (_advance), FY_ALLOCA(sizeof(struct fy_atom)))

struct fy_atom *fy_fill_node_atom(struct fy_node *fyn, struct fy_atom *handle);

#define fy_fill_node_atom_a(_fyn)  fy_fill_node_atom((_fyn), FY_ALLOCA(sizeof(struct fy_atom)))

struct fy_atom_iter_line_info {
	const char *start;
	const char *end;
	const char *nws_start;
	const char *nws_end;
	const char *chomp_start;
	bool empty : 1;
	bool trailing_breaks_ws : 1;
	bool first : 1;		/* first */
	bool last : 1;		/* last (only ws/lb afterwards */
	bool final : 1;		/* the final iterator */
	bool indented : 1;
	bool lb_end : 1;
	bool need_nl : 1;
	bool need_sep : 1;
	bool ends_with_backslash : 1;	/* last ended in \\ */
	size_t trailing_ws;
	size_t trailing_breaks;
	size_t start_ws, end_ws;
	const char *s;
	const char *e;
	int actual_lb;		/* the line break */
	const char *s_tb;	/* start of trailing breaks run */
	const char *e_tb;	/* end of trailing breaks run */
};

struct fy_atom_iter_chunk {
	struct fy_iter_chunk ic;
	/* note that it is guaranteed for copied chunks to be
	 * less or equal to 10 characters (the maximum digitbuf
	 * for double quoted escapes */
	char inplace_buf[10];	/* small copies in place */
};

#define NR_STARTUP_CHUNKS	8
#define SZ_STARTUP_COPY_BUFFER	32

struct fy_atom_iter {
	const struct fy_atom *atom;
	const char *s, *e;
	unsigned int chomp;
	int tabsize;
	bool single_line : 1;
	bool dangling_end_quote : 1;
	bool last_ends_with_backslash : 1;
	bool empty : 1;
	bool current : 1;
	bool done : 1;	/* last iteration (for block styles) */
	struct fy_atom_iter_line_info li[2];
	unsigned int alloc;
	unsigned int top;
	unsigned int read;
	struct fy_atom_iter_chunk *chunks;
	struct fy_atom_iter_chunk startup_chunks[NR_STARTUP_CHUNKS];
	int unget_c;
};

void fy_atom_iter_start(const struct fy_atom *atom, struct fy_atom_iter *iter);
void fy_atom_iter_finish(struct fy_atom_iter *iter);
const struct fy_iter_chunk *fy_atom_iter_peek_chunk(struct fy_atom_iter *iter);
const struct fy_iter_chunk *fy_atom_iter_chunk_next(struct fy_atom_iter *iter, const struct fy_iter_chunk *curr, int *errp);
void fy_atom_iter_advance(struct fy_atom_iter *iter, size_t len);

struct fy_atom_iter *fy_atom_iter_create(const struct fy_atom *atom);
void fy_atom_iter_destroy(struct fy_atom_iter *iter);
ssize_t fy_atom_iter_read(struct fy_atom_iter *iter, void *buf, size_t count);
int fy_atom_iter_getc(struct fy_atom_iter *iter);
int fy_atom_iter_ungetc(struct fy_atom_iter *iter, int c);
int fy_atom_iter_peekc(struct fy_atom_iter *iter);
int fy_atom_iter_utf8_get(struct fy_atom_iter *iter);
int fy_atom_iter_utf8_quoted_get(struct fy_atom_iter *iter, size_t *lenp, uint8_t *buf);
int fy_atom_iter_utf8_unget(struct fy_atom_iter *iter, int c);
int fy_atom_iter_utf8_peek(struct fy_atom_iter *iter);

int fy_atom_memcmp(struct fy_atom *atom, const void *ptr, size_t len);
int fy_atom_strcmp(struct fy_atom *atom, const char *str);
bool fy_atom_is_number(struct fy_atom *atom);
int fy_atom_cmp(struct fy_atom *atom1, struct fy_atom *atom2);

static inline const char *fy_atom_data(const struct fy_atom *atom)
{
	if (!atom)
		return NULL;

	return (char *)fy_input_start(atom->fyi) + atom->start_mark.input_pos;
}

static inline size_t fy_atom_size(const struct fy_atom *atom)
{
	if (!atom)
		return 0;

	return atom->end_mark.input_pos - atom->start_mark.input_pos;
}

static inline bool fy_plain_atom_streq(const struct fy_atom *atom, const char *str)
{
	size_t size = strlen(str);

	if (!atom || !str || atom->style != FYAS_PLAIN || fy_atom_size(atom) != size)
		return false;

	return !memcmp(str, fy_atom_data(atom), size);
}

struct fy_raw_line {
	int lineno;
	const char *line_start;
	size_t line_len;
	size_t line_len_lb;
	size_t line_count;
	const char *content_start;
	size_t content_len;
	size_t content_start_count;
	size_t content_count;
	int content_start_col;
	int content_start_col8;	/* this is the tab 8 */
	int content_end_col;
	int content_end_col8;
};

struct fy_atom_raw_line_iter {
	const struct fy_atom *atom;
	const char *is, *ie;	/* input start, end */
	const char *as, *ae;	/* atom start, end */
	const char *rs;
	struct fy_raw_line line;
};

void fy_atom_raw_line_iter_start(const struct fy_atom *atom,
			     struct fy_atom_raw_line_iter *iter);
void fy_atom_raw_line_iter_finish(struct fy_atom_raw_line_iter *iter);

const struct fy_raw_line *
fy_atom_raw_line_iter_next(struct fy_atom_raw_line_iter *iter);

#endif