aboutsummaryrefslogtreecommitdiffstats
path: root/library/cpp/deprecated/fgood/ffb.h
blob: ca229eb65a118711b63b8439f47160f127648415 (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
#pragma once

#include "fgood.h"

#include <util/string/util.h>  // str_spn
#include <util/string/split.h> // str_spn
#include <util/memory/segmented_string_pool.h>
#include <util/generic/string.h>
#include <util/generic/vector.h>
#include <util/generic/noncopyable.h>

#include <utility>

#include <cstdarg>
#include <cstring>

struct ffb: public TFILEPtr {
    ffb() {
    }
    ffb(FILE* file);
    ffb(const char* name, const char* mode) {
        open(name, mode);
    }
    void operator=(FILE* f); // take ownership
    void open(const char* name, const char* mode);
    int f(const char* c, ...) {
        va_list args;
        va_start(args, c);
        return vfprintf(*this, c, args);
    }
    void s(const char* c) {
        fsput(c, strlen(c));
    }
    void b(const void* cc, int n) {
        fsput((const char*)cc, n);
    }
    void B(const void* cc, int N) {
        fsput((const char*)cc, N);
    }
    void c(char c) {
        fputc(c);
    }
    void cbe(wchar16 c) {     // big endian utf-16
        fputc(char(c >> 8));  //Hi8
        fputc(char(c & 255)); //Lo8
    }
    void sbe(const wchar16* c) {
        for (; *c; c++)
            cbe(*c);
    }
    void fclose() {
        close();
    }
};

// split fields of tab-delimited line of text
// here and below fb actual size must be fb_sz + 1 to allow fb[fb_sz] be zero
int sf(char** fb, char* buf, size_t fb_sz);
int sf(char** fb, char* buf /* fb_sz == 32 */);

// split fields of char-delimited line of text
// Achtung: delim = ' ' imitates awk: initial separators are skipped,
// repeated seps treated as one, all chars less than ' ' treated as separators.
int sf(char fs, char** fb, char* buf, size_t fb_sz = 32);

// split fields of string-delimited line of text (fs is NOT a regexp)
// (usually fs is "@@")
int sf(const char* fs, char** fb, char* buf, size_t fb_sz = 32);

// split fields of char-delimited line of text, set of char-separators is given
// Achtung: repeated seps treated as one, initial seps are skipped
// newlines are NOT ignored.
int sf(const char* seps, char* buf, char** fb, size_t fb_sz = 32);

inline char* chomp(char* buf) {
    char* c = buf + strlen(buf);
    if (c > buf && c[-1] == '\n') {
        *--c = 0;
#ifdef _win32_
        if (c > buf && c[-1] == '\r')
            *--c = 0;
#endif
    }
    return buf;
}

inline char* chomp_cr(char* buf) {
    char* c = buf + strlen(buf);
    if (c > buf && c[-1] == '\n')
        *--c = 0;
    if (c > buf && c[-1] == '\r')
        *--c = 0;
    return buf;
}

class TLineSplitter {
protected:
    enum {                 // Default: Split string by SepStr
        SplitByAnySep = 1, // Split string by Sep
        NoEmptyFields = 2  // Skip all empty fields between separators
    };

private:
    ui32 Flags;
    const str_spn Sep;  // collection of separators
    const char* SepStr; // pointer exact string to separate by
    size_t SepStrLen;   // length of separator string

public:
    TLineSplitter(const char* sep, bool noEmpty)
        : Flags(noEmpty ? NoEmptyFields : 0)
        , Sep(TString(sep, 1).data())
        , SepStr(sep)
        , SepStrLen(strlen(sep))
    {
    }
    TLineSplitter(const str_spn& sep, bool noEmpty = false)
        : Flags(SplitByAnySep | (noEmpty ? NoEmptyFields : 0))
        , Sep(sep)
        , SepStr(nullptr)
        , SepStrLen(1)
    {
    }
    bool AnySep() const {
        return Flags & SplitByAnySep;
    }
    bool SkipEmpty() const {
        return Flags & NoEmptyFields;
    }
    /// Separates string onto tokens
    /// Expecting a zero-terminated string
    /// By default returns empty fields between sequential separators
    void operator()(char* p, TVector<char*>& fields) const;
    /// Same, but for const string - fills vector of pairs (pointer, length)
    void operator()(const char* p, TVector<std::pair<const char*, size_t>>& fields) const;
};

/**
 * Use library/cpp/map_text_file/map_tsv_file.h instead.
 */
class TSFReader {
    TString Buf; // buffer used for non-'\n'-terminated string and for non-freebsd work
    TLineSplitter Split;
    TVector<char*> Fields;
    size_t NF; // Fields.size()
    size_t NR;

    TFILEPtr File;

    bool OpenPipe; // internal flag that turns open() to popen()

    i32 FieldsRequired; // if != -1, != nf, terminate program

public:
    // char separator
    // Achtung: delim = ' ' imitates awk: initial separators are skipped,
    // all chars less than ' ' treated as separators.
    TSFReader(const char* fname = nullptr, char sep = '\t', i32 nf_reqired = -1);
    // exact string separator
    TSFReader(const char* fname, const char* sep, i32 nf_reqired = -1);
    // fully customizable
    TSFReader(const char* fname, const TLineSplitter& spl, i32 nf_reqired = -1);

    void Open(const char* fname, i32 nf_reqired = -1, size_t vbufsize = 1u << 21); // use "/dev/stdin" for stdin
    void Popen(const char* pname, i32 nf_reqired = -1, size_t vbufsize = 1u << 21);

    bool NextLine(segmented_string_pool* pool = nullptr);

    bool IsOpen() const {
        return (FILE*)File != nullptr;
    }
    bool IsEof() const {
        return feof(File);
    }
    void Close() {
        File.close();
    }
    void Rewind() {
        File.seek(0, SEEK_SET);
    }
    void Seek(i64 offset, int mode = SEEK_SET) {
        File.seek(offset, mode);
    }
    i64 Tell() const {
        return ftell(File);
    }
    char*& operator[](size_t ind) {
        //if (ind >= NF)
        //    throw yexception("Can't return reference to unexisting field %" PRISZT, ind);
        return Fields[ind];
    }
    const char* operator[](size_t ind) const {
        if (ind >= NF)
            return nullptr;
        return Fields[ind];
    }
    operator int() const { // note: empty input line makes 0 fields
        return (int)NF;
    }
    const char* Name() const {
        return File.name().data();
    }
    size_t Line() const {
        return NR;
    }
    const TVector<char*>& GetFields() const {
        return Fields;
    }
};

struct prnstr {
    char* buf;
    int pos;
    int asize;
    prnstr()
        : pos(0)
    {
        asize = 32;
        buf = new char[asize];
    }
    explicit prnstr(int asz)
        : pos(0)
    {
        asize = asz;
        buf = new char[asize];
    }
    int f(const char* c, ...);
    int s(const char* c1, const char* c2);
    int s(const char* c1, const char* c2, const char* c3);
    int s(const char* c, size_t len);
    //int s(const char *c);
    int s(const char* c) {
        return c ? s(c, strlen(c)) : 0;
    }
    int s(const TString& c);
    int s_htmesc(const char* c, bool enc_utf = false);
    int s_htmesc_w(const char* c);
    int c(char c);
    int cu(wchar32 c); //for utf-8
    void restart() {
        *buf = 0;
        pos = 0;
    }
    const char* operator~() const {
        return buf;
    }
    int operator+() const {
        return pos;
    }
    ~prnstr() {
        delete[] buf;
    }
    void clear();
    void swap(prnstr& w);
};

// functions that terminate program upon failure
FILE* read_or_die(const char* fname);
FILE* write_or_die(const char* fname);
FILE* fopen_or_die(const char* fname, const char* mode);

// functions that throw upon failure
FILE* fopen_chk(const char* fname, const char* mode);
void fclose_chk(FILE* f, const char* fname_dbg);