1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
|
#include "fetch_request.h"
#include <library/cpp/deprecated/atomic/atomic.h>
// TRequest
namespace NHttpFetcher {
const TString DEFAULT_ACCEPT_ENCODING = "gzip, deflate";
const size_t DEFAULT_MAX_HEADER_SIZE = 100 << 10;
const size_t DEFAULT_MAX_BODY_SIZE = 1 << 29;
TRequest::TRequest(const TString& url, TCallBack onFetch)
: Url(url)
, Deadline(TInstant::Now() + DEFAULT_REQUEST_TIMEOUT)
, Freshness(DEFAULT_REQUEST_FRESHNESS)
, Priority(40)
, IgnoreRobotsTxt(false)
, LangRegion(ELR_RU)
, OnFetch(onFetch)
, AcceptEncoding(DEFAULT_ACCEPT_ENCODING)
, OnlyHeaders(false)
, MaxHeaderSize(DEFAULT_MAX_HEADER_SIZE)
, MaxBodySize(DEFAULT_MAX_BODY_SIZE)
{
GenerateSequence();
}
TRequest::TRequest(const TString& url, bool ignoreRobotsTxt, TDuration timeout, TDuration freshness, TCallBack onFetch)
: Url(url)
, Deadline(Now() + timeout)
, Freshness(freshness)
, Priority(40)
, IgnoreRobotsTxt(ignoreRobotsTxt)
, LangRegion(ELR_RU)
, OnFetch(onFetch)
, AcceptEncoding(DEFAULT_ACCEPT_ENCODING)
, OnlyHeaders(false)
, MaxHeaderSize(DEFAULT_MAX_HEADER_SIZE)
, MaxBodySize(DEFAULT_MAX_BODY_SIZE)
{
GenerateSequence();
}
TRequest::TRequest(const TString& url, TDuration timeout, TDuration freshness, bool ignoreRobots,
size_t priority, const TMaybe<TString>& login, const TMaybe<TString>& password,
ELangRegion langRegion, TCallBack onFetch)
: Url(url)
, Deadline(Now() + timeout)
, Freshness(freshness)
, Priority(priority)
, Login(login)
, Password(password)
, IgnoreRobotsTxt(ignoreRobots)
, LangRegion(langRegion)
, OnFetch(onFetch)
, AcceptEncoding(DEFAULT_ACCEPT_ENCODING)
, OnlyHeaders(false)
, MaxHeaderSize(DEFAULT_MAX_HEADER_SIZE)
, MaxBodySize(DEFAULT_MAX_BODY_SIZE)
{
GenerateSequence();
}
void TRequest::GenerateSequence() {
static TAtomic nextSeq = 0;
Sequence = AtomicIncrement(nextSeq);
}
TRequestRef TRequest::Clone() {
THolder<TRequest> request = THolder<TRequest>(new TRequest(*this));
request->GenerateSequence();
return request.Release();
}
void TRequest::Dump(IOutputStream& out) {
out << "url: " << Url << "\n";
out << "timeout: " << (Deadline - Now()).MilliSeconds() << " ms\n";
out << "freshness: " << Freshness.Seconds() << "\n";
out << "priority: " << Priority << "\n";
if (!!Login) {
out << "login: " << *Login << "\n";
}
if (!!Password) {
out << "password: " << *Password << "\n";
}
if (!!OAuthToken) {
out << "oauth token: " << *OAuthToken << "\n";
}
if (IgnoreRobotsTxt) {
out << "ignore robots: " << IgnoreRobotsTxt << "\n";
}
out << "lang reg: " << LangRegion2Str(LangRegion) << "\n";
if (!!CustomHost) {
out << "custom host: " << *CustomHost << "\n";
}
if (!!UserAgent) {
out << "user agent: " << *UserAgent << "\n";
}
if (!!AcceptEncoding) {
out << "accept enc: " << *AcceptEncoding << "\n";
}
if (OnlyHeaders) {
out << "only headers: " << OnlyHeaders << "\n";
}
out << "max header sz: " << MaxHeaderSize << "\n";
out << "max body sz: " << MaxBodySize << "\n";
if (!!PostData) {
out << "post data: " << *PostData << "\n";
}
if (!!ContentType) {
out << "content type: " << *ContentType << "\n";
}
}
}
|