1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
|
#pragma once
#include "exthttpcodes.h"
#include <library/cpp/mime/types/mime.h>
#include <util/system/defaults.h>
#include <util/system/compat.h>
#include <util/generic/string.h>
#include <util/generic/ylimits.h>
#include <util/system/maxlen.h>
#include <ctime>
#include <cstdio>
#include <cstdlib>
#include <cstring>
#include <algorithm>
// This is ugly solution but here a lot of work to do it the right way.
#define FETCHER_URL_MAX 8192
extern const i64 DEFAULT_RETRY_AFTER; /// == -1
extern const i64 DEFAULT_IF_MODIFIED_SINCE; /// == -1
extern const i32 DEFAULT_MAX_AGE; /// == -1
extern const i8 DEFAULT_REQUEST_PRIORITY; /// == -1
extern const i32 DEFAULT_RESPONSE_TIMEOUT; /// == -1
#define HTTP_PREFIX "http://"
#define MAX_LANGREGION_LEN 4
#define MAXWORD_LEN 55
enum HTTP_COMPRESSION {
HTTP_COMPRESSION_UNSET = 0,
HTTP_COMPRESSION_ERROR = 1,
HTTP_COMPRESSION_IDENTITY = 2,
HTTP_COMPRESSION_GZIP = 3,
HTTP_COMPRESSION_DEFLATE = 4,
HTTP_COMPRESSION_COMPRESS = 5,
HTTP_COMPRESSION_MAX = 6
};
enum HTTP_METHOD {
HTTP_METHOD_UNDEFINED = -1,
HTTP_METHOD_OPTIONS,
HTTP_METHOD_GET,
HTTP_METHOD_HEAD,
HTTP_METHOD_POST,
HTTP_METHOD_PUT,
HTTP_METHOD_DELETE,
HTTP_METHOD_TRACE,
HTTP_METHOD_CONNECT,
HTTP_METHOD_EXTENSION
};
enum HTTP_CONNECTION {
HTTP_CONNECTION_UNDEFINED = -1,
HTTP_CONNECTION_KEEP_ALIVE = 0,
HTTP_CONNECTION_CLOSE = 1
};
/// Class represents general http header fields.
struct THttpBaseHeader {
public:
i16 error;
i32 header_size;
i32 entity_size;
i64 content_length;
i64 http_time; // seconds since epoch
i64 content_range_start; // Content-Range: first-byte-pos
i64 content_range_end; // Content-Range: last-byte-pos
i64 content_range_entity_length; // Content-Range: entity-length
i8 http_minor;
i8 mime_type;
i8 charset;
i8 compression_method;
i8 transfer_chunked;
i8 connection_closed;
TString base;
public:
void Init() {
error = 0;
header_size = 0;
entity_size = 0;
content_length = -1;
http_time = -1;
http_minor = -1;
mime_type = -1;
charset = -1;
compression_method = HTTP_COMPRESSION_UNSET;
transfer_chunked = -1;
connection_closed = HTTP_CONNECTION_UNDEFINED;
content_range_start = -1;
content_range_end = -1;
content_range_entity_length = -1;
base.clear();
}
void Print() const {
printf("content_length: %" PRIi64 "\n", content_length);
printf("http_time: %" PRIi64 "\n", http_time);
printf("http_minor: %" PRIi8 "\n", http_minor);
printf("mime_type: %" PRIi8 "\n", mime_type);
printf("charset: %" PRIi8 "\n", charset);
printf("compression_method: %" PRIi8 "\n", compression_method);
printf("transfer_chunked: %" PRIi8 "\n", transfer_chunked);
printf("connection_closed: %" PRIi8 "\n", connection_closed);
printf("content_range_start: %" PRIi64 "\n", content_range_start);
printf("content_range_end: %" PRIi64 "\n", content_range_end);
printf("content_range_entity_length: %" PRIi64 "\n", content_range_entity_length);
printf("base: \"%s\"\n", base.c_str());
printf("error: %" PRIi16 "\n", error);
}
int SetBase(const char* path,
const char* hostNamePtr = nullptr,
int hostNameLength = 0) {
if (*path == '/') {
base = "http://";
base += TStringBuf(hostNamePtr, hostNameLength);
base += path;
} else {
base = path;
}
return error;
}
};
enum { HREFLANG_MAX = FETCHER_URL_MAX * 2 };
/// Class represents Http Response Header.
struct THttpHeader: public THttpBaseHeader {
public:
i8 accept_ranges;
i8 squid_error;
i8 x_robots_tag; // deprecated, use x_robots_state instead
i16 http_status;
TString location;
TString rel_canonical;
char hreflangs[HREFLANG_MAX];
i64 retry_after;
TString x_robots_state; // 'xxxxx' format, see `library/html/zoneconf/parsefunc.cpp`
public:
void Init() {
THttpBaseHeader::Init();
accept_ranges = -1;
squid_error = 0;
x_robots_tag = 0;
rel_canonical.clear();
http_status = -1;
location.clear();
hreflangs[0] = 0;
retry_after = DEFAULT_RETRY_AFTER;
x_robots_state = "xxxxx";
}
void Print() const {
THttpBaseHeader::Print();
printf("http_status: %" PRIi16 "\n", http_status);
printf("squid_error: %" PRIi8 "\n", squid_error);
printf("accept_ranges: %" PRIi8 "\n", accept_ranges);
printf("location: \"%s\"\n", location.c_str());
printf("retry_after: %" PRIi64 "\n", retry_after);
}
};
struct THttpRequestHeader: public THttpBaseHeader {
public:
TString request_uri;
char host[HOST_MAX];
char from[MAXWORD_LEN];
char user_agent[MAXWORD_LEN];
char x_yandex_langregion[MAX_LANGREGION_LEN];
char x_yandex_sourcename[MAXWORD_LEN];
char x_yandex_requesttype[MAXWORD_LEN];
char x_yandex_fetchoptions[MAXWORD_LEN];
i8 http_method;
i8 x_yandex_request_priority;
i32 x_yandex_response_timeout;
i32 max_age;
i64 if_modified_since;
public:
THttpRequestHeader() {
Init();
}
void Init() {
request_uri.clear();
host[0] = 0;
from[0] = 0;
user_agent[0] = 0;
x_yandex_langregion[0] = 0;
x_yandex_sourcename[0] = 0;
x_yandex_requesttype[0] = 0;
x_yandex_fetchoptions[0] = 0;
http_method = HTTP_METHOD_UNDEFINED;
x_yandex_request_priority = DEFAULT_REQUEST_PRIORITY;
x_yandex_response_timeout = DEFAULT_RESPONSE_TIMEOUT;
max_age = DEFAULT_MAX_AGE;
if_modified_since = DEFAULT_IF_MODIFIED_SINCE;
THttpBaseHeader::Init();
}
void Print() const {
THttpBaseHeader::Print();
printf("request_uri: \"%s\"\n", request_uri.c_str());
printf("host: \"%s\"\n", host);
printf("from: \"%s\"\n", from);
printf("user_agent: \"%s\"\n", user_agent);
printf("http_method: %" PRIi8 "\n", http_method);
printf("response_timeout: %" PRIi32 "\n", x_yandex_response_timeout);
printf("max_age: %" PRIi32 "\n", max_age);
printf("if_modified_since: %" PRIi64 "\n", if_modified_since);
}
/// It doesn't care about errors in request or headers, where
/// request_uri equals to '*'.
/// This returns copy of the string, which you have to delete.
TString GetUrl() {
TString url;
if (host[0] == 0 || !strcmp(host, "")) {
url = request_uri;
} else {
url = HTTP_PREFIX;
url += host;
url += request_uri;
}
return url;
}
char* GetUrl(char* buffer, size_t size) {
if (host[0] == 0 || !strcmp(host, "")) {
strlcpy(buffer, request_uri.c_str(), size);
} else {
snprintf(buffer, size, "http://%s%s", host, request_uri.c_str());
}
return buffer;
}
};
class THttpAuthHeader: public THttpHeader {
public:
char* realm;
char* nonce;
char* opaque;
bool stale;
int algorithm;
bool qop_auth;
bool use_auth;
//we do not provide auth-int variant as too heavy
//bool qop_auth_int;
THttpAuthHeader()
: realm(nullptr)
, nonce(nullptr)
, opaque(nullptr)
, stale(false)
, algorithm(0)
, qop_auth(false)
, use_auth(true)
{
THttpHeader::Init();
}
~THttpAuthHeader() {
free(realm);
free(nonce);
free(opaque);
}
void Print() {
THttpHeader::Print();
if (use_auth) {
if (realm)
printf("realm: \"%s\"\n", realm);
if (nonce)
printf("nonce: \"%s\"\n", nonce);
if (opaque)
printf("opaque: \"%s\"\n", opaque);
printf("stale: %d\n", stale);
printf("algorithm: %d\n", algorithm);
printf("qop_auth: %d\n", qop_auth);
}
}
};
|