#include "exthttpcodes.h"
#include <cstring>
const ui16 CrazyServer = ShouldDelete | MarkSuspect;
struct http_flag {
ui16 http;
ui16 flag;
};
static http_flag HTTP_FLAG[] = {
{HTTP_CONTINUE, MarkSuspect}, // 100
{HTTP_SWITCHING_PROTOCOLS, CrazyServer}, // 101
{HTTP_PROCESSING, CrazyServer}, // 102
{HTTP_OK, ShouldReindex}, // 200
{HTTP_CREATED, CrazyServer}, // 201
{HTTP_ACCEPTED, ShouldDelete}, // 202
{HTTP_NON_AUTHORITATIVE_INFORMATION, ShouldReindex}, // 203
{HTTP_NO_CONTENT, ShouldDelete}, // 204
{HTTP_RESET_CONTENT, ShouldDelete}, // 205
{HTTP_PARTIAL_CONTENT, ShouldReindex}, // 206
{HTTP_MULTI_STATUS, CrazyServer}, // 207
{HTTP_ALREADY_REPORTED, CrazyServer}, // 208
{HTTP_IM_USED, CrazyServer}, // 226
{HTTP_MULTIPLE_CHOICES, CheckLinks | ShouldDelete}, // 300
{HTTP_MOVED_PERMANENTLY, CheckLocation | ShouldDelete | MoveRedir}, // 301
{HTTP_FOUND, CheckLocation | ShouldDelete | MoveRedir}, // 302
{HTTP_SEE_OTHER, CheckLocation | ShouldDelete | MoveRedir}, // 303
{HTTP_NOT_MODIFIED, 0}, // 304
{HTTP_USE_PROXY, ShouldDelete}, // 305
{HTTP_TEMPORARY_REDIRECT, CheckLocation | ShouldDelete | MoveRedir}, // 307
{HTTP_PERMANENT_REDIRECT, CheckLocation | ShouldDelete | MoveRedir}, // 308
{HTTP_BAD_REQUEST, CrazyServer}, // 400
{HTTP_UNAUTHORIZED, ShouldDelete}, // 401
{HTTP_PAYMENT_REQUIRED, ShouldDelete}, // 402
{HTTP_FORBIDDEN, ShouldDelete}, // 403
{HTTP_NOT_FOUND, ShouldDelete}, // 404
{HTTP_METHOD_NOT_ALLOWED, ShouldDelete}, // 405
{HTTP_NOT_ACCEPTABLE, ShouldDelete}, // 406
{HTTP_PROXY_AUTHENTICATION_REQUIRED, CrazyServer}, // 407
{HTTP_REQUEST_TIME_OUT, ShouldDisconnect | ShouldRetry | MarkSuspect}, // 408
{HTTP_CONFLICT, MarkSuspect}, // 409
{HTTP_GONE, ShouldDelete}, // 410
{HTTP_LENGTH_REQUIRED, CrazyServer}, // 411
{HTTP_PRECONDITION_FAILED, CrazyServer}, // 412
{HTTP_REQUEST_ENTITY_TOO_LARGE, CrazyServer}, // 413
{HTTP_REQUEST_URI_TOO_LARGE, ShouldDelete}, // 414
{HTTP_UNSUPPORTED_MEDIA_TYPE, CrazyServer}, // 415
{HTTP_REQUESTED_RANGE_NOT_SATISFIABLE, CrazyServer}, // 416
{HTTP_EXPECTATION_FAILED, ShouldDelete}, // 417
{HTTP_I_AM_A_TEAPOT, CrazyServer}, // 418
{HTTP_AUTHENTICATION_TIMEOUT, ShouldDelete}, // 419
{HTTP_MISDIRECTED_REQUEST, CrazyServer}, // 421
{HTTP_UNPROCESSABLE_ENTITY, CrazyServer}, // 422
{HTTP_LOCKED, ShouldDelete}, // 423
{HTTP_FAILED_DEPENDENCY, CrazyServer}, // 424
{HTTP_UPGRADE_REQUIRED, ShouldDelete}, // 426
{HTTP_PRECONDITION_REQUIRED, ShouldDelete}, // 428
{HTTP_TOO_MANY_REQUESTS, ShouldDisconnect | ShouldRetry | MarkSuspect}, // 429
{HTTP_UNAVAILABLE_FOR_LEGAL_REASONS, ShouldDelete}, // 451
{HTTP_INTERNAL_SERVER_ERROR, MarkSuspect}, // 500
{HTTP_NOT_IMPLEMENTED, ShouldDelete | ShouldDisconnect}, // 501
{HTTP_BAD_GATEWAY, MarkSuspect}, // 502
{HTTP_SERVICE_UNAVAILABLE, ShouldDisconnect | ShouldRetry | MarkSuspect}, // 503
{HTTP_GATEWAY_TIME_OUT, ShouldDisconnect | ShouldRetry | MarkSuspect}, // 504
{HTTP_HTTP_VERSION_NOT_SUPPORTED, CrazyServer | ShouldDisconnect}, // 505
{HTTP_VARIANT_ALSO_NEGOTIATES, CrazyServer | ShouldDisconnect}, // 506
{HTTP_INSUFFICIENT_STORAGE, CrazyServer | ShouldDisconnect}, // 507
{HTTP_LOOP_DETECTED, CrazyServer | ShouldDisconnect}, // 508
{HTTP_BANDWIDTH_LIMIT_EXCEEDED, ShouldDisconnect | ShouldRetry | MarkSuspect}, // 509
{HTTP_NOT_EXTENDED, ShouldDelete}, // 510
{HTTP_NETWORK_AUTHENTICATION_REQUIRED, ShouldDelete}, // 511
// custom
{HTTP_BAD_RESPONSE_HEADER, CrazyServer}, // 1000
{HTTP_CONNECTION_LOST, ShouldRetry}, // 1001
{HTTP_BODY_TOO_LARGE, ShouldDelete | CanBeFake}, // 1002
{HTTP_ROBOTS_TXT_DISALLOW, ShouldDelete}, // 1003
{HTTP_BAD_URL, ShouldDelete}, // 1004
{HTTP_BAD_MIME, ShouldDelete}, // 1005
{HTTP_DNS_FAILURE, ShouldDisconnect | MarkSuspect}, // 1006
{HTTP_BAD_STATUS_CODE, CrazyServer}, // 1007
{HTTP_BAD_HEADER_STRING, CrazyServer}, // 1008
{HTTP_BAD_CHUNK, CrazyServer}, // 1009
{HTTP_CONNECT_FAILED, ShouldDisconnect | ShouldRetry | MarkSuspect}, // 1010
{HTTP_FILTER_DISALLOW, ShouldDelete}, // 1011
{HTTP_LOCAL_EIO, ShouldRetry}, // 1012
{HTTP_BAD_CONTENT_LENGTH, ShouldDelete}, // 1013
{HTTP_BAD_ENCODING, ShouldDelete}, // 1014
{HTTP_LENGTH_UNKNOWN, ShouldDelete}, // 1015
{HTTP_HEADER_EOF, ShouldRetry | CanBeFake}, // 1016
{HTTP_MESSAGE_EOF, ShouldRetry | CanBeFake}, // 1017
{HTTP_CHUNK_EOF, ShouldRetry | CanBeFake}, // 1018
{HTTP_PAST_EOF, ShouldRetry | ShouldDelete | CanBeFake}, // 1019
{HTTP_HEADER_TOO_LARGE, ShouldDelete}, // 1020
{HTTP_URL_TOO_LARGE, ShouldDelete}, // 1021
{HTTP_INTERRUPTED, 0}, // 1022
{HTTP_CUSTOM_NOT_MODIFIED, 0}, // 1023
{HTTP_BAD_CONTENT_ENCODING, ShouldDelete}, // 1024
{HTTP_PROXY_UNKNOWN, 0}, // 1030
{HTTP_PROXY_REQUEST_TIME_OUT, 0}, // 1031
{HTTP_PROXY_INTERNAL_ERROR, 0}, // 1032
{HTTP_PROXY_CONNECT_FAILED, 0}, // 1033
{HTTP_PROXY_CONNECTION_LOST, 0}, // 1034
{HTTP_PROXY_NO_PROXY, 0}, // 1035
{HTTP_PROXY_ERROR, 0}, // 1036
{HTTP_SSL_ERROR, 0}, // 1037
{HTTP_CACHED_COPY_NOT_FOUND, 0}, // 1038
{HTTP_TIMEDOUT_WHILE_BYTES_RECEIVING, ShouldRetry}, // 1039
{HTTP_FETCHER_BAD_RESPONSE, 0}, // 1040
{HTTP_FETCHER_MB_ERROR, 0}, // 1041
{HTTP_SSL_CERT_ERROR, 0}, // 1042
// Custom (replace HTTP 200/304)
{EXT_HTTP_MIRRMOVE, 0}, // 2000
{EXT_HTTP_MANUAL_DELETE, ShouldDelete}, // 2001
{EXT_HTTP_NOTUSED2, ShouldDelete}, // 2002
{EXT_HTTP_NOTUSED3, ShouldDelete}, // 2003
{EXT_HTTP_REFRESH, ShouldDelete | CheckLinks | MoveRedir}, // 2004
{EXT_HTTP_NOINDEX, ShouldDelete | CheckLinks}, // 2005
{EXT_HTTP_BADCODES, ShouldDelete}, // 2006
{EXT_HTTP_SITESTAT, ShouldDelete}, // 2007
{EXT_HTTP_IOERROR, ShouldDelete}, // 2008
{EXT_HTTP_BASEERROR, ShouldDelete}, // 2009
{EXT_HTTP_PARSERROR, ShouldDelete | CanBeFake}, // 2010
{EXT_HTTP_BAD_CHARSET, ShouldDelete | CheckLinks}, // 2011
{EXT_HTTP_BAD_LANGUAGE, ShouldDelete | CheckLinks}, // 2012
{EXT_HTTP_NUMERERROR, ShouldDelete}, // 2013
{EXT_HTTP_EMPTYDOC, ShouldDelete | CheckLinks}, // 2014
{EXT_HTTP_HUGEDOC, ShouldDelete}, // 2015
{EXT_HTTP_LINKGARBAGE, ShouldDelete}, // 2016
{EXT_HTTP_PARSERFAIL, ShouldDelete}, // 2019
{EXT_HTTP_GZIPERROR, ShouldDelete}, // 2020
{EXT_HTTP_MANUAL_DELETE_URL, ShouldDelete}, // 2022
{EXT_HTTP_CUSTOM_PARTIAL_CONTENT, ShouldReindex}, // 2023
{EXT_HTTP_EMPTY_RESPONSE, ShouldDelete}, // 2024
{EXT_HTTP_REL_CANONICAL, ShouldDelete | CheckLinks | MoveRedir}, // 2025
{0, 0}};
static ui16* prepare_flags(http_flag* arg) {
static ui16 flags[EXT_HTTP_CODE_MAX];
http_flag* ptr;
size_t i;
// устанавливаем значение по умолчанию для кодов не перечисленных в таблице выше
for (i = 0; i < EXT_HTTP_CODE_MAX; ++i)
flags[i] = CrazyServer;
// устанавливаем флаги для перечисленных кодов
for (ptr = arg; ptr->http; ++ptr)
flags[ptr->http & (EXT_HTTP_CODE_MAX - 1)] = ptr->flag;
// для стандартных кодов ошибок берем флаги из первого кода каждой группы и проставляем их
// всем кодам не перечисленным в таблице выше
for (size_t group = 0; group < 1000; group += 100)
for (size_t j = group + 1; j < group + 100; ++j)
flags[j] = flags[group];
// предыдущий цикл затер некоторые флаги перечисленные в таблице выше
// восстанавливаем их
for (ptr = arg; ptr->http; ++ptr)
flags[ptr->http & (EXT_HTTP_CODE_MAX - 1)] = ptr->flag;
return flags;
}
ui16* http2status = prepare_flags(HTTP_FLAG);
TStringBuf ExtHttpCodeStr(int code) noexcept {
if (code < HTTP_CODE_MAX) {
return HttpCodeStr(code);
}
switch (code) {
case HTTP_BAD_RESPONSE_HEADER:
return TStringBuf("Bad response header");
case HTTP_CONNECTION_LOST:
return TStringBuf("Connection lost");
case HTTP_BODY_TOO_LARGE:
return TStringBuf("Body too large");
case HTTP_ROBOTS_TXT_DISALLOW:
return TStringBuf("robots.txt disallow");
case HTTP_BAD_URL:
return TStringBuf("Bad url");
case HTTP_BAD_MIME:
return TStringBuf("Bad mime type");
case HTTP_DNS_FAILURE:
return TStringBuf("Dns failure");
case HTTP_BAD_STATUS_CODE:
return TStringBuf("Bad status code");
case HTTP_BAD_HEADER_STRING:
return TStringBuf("Bad header string");
case HTTP_BAD_CHUNK:
return TStringBuf("Bad chunk");
case HTTP_CONNECT_FAILED:
return TStringBuf("Connect failed");
case HTTP_FILTER_DISALLOW:
return TStringBuf("Filter disallow");
case HTTP_LOCAL_EIO:
return TStringBuf("Local eio");
case HTTP_BAD_CONTENT_LENGTH:
return TStringBuf("Bad content length");
case HTTP_BAD_ENCODING:
return TStringBuf("Bad encoding");
case HTTP_LENGTH_UNKNOWN:
return TStringBuf("Length unknown");
case HTTP_HEADER_EOF:
return TStringBuf("Header EOF");
case HTTP_MESSAGE_EOF:
return TStringBuf("Message EOF");
case HTTP_CHUNK_EOF:
return TStringBuf("Chunk EOF");
case HTTP_PAST_EOF:
return TStringBuf("Past EOF");
case HTTP_HEADER_TOO_LARGE:
return TStringBuf("Header is too large");
case HTTP_URL_TOO_LARGE:
return TStringBuf("Url is too large");
case HTTP_INTERRUPTED:
return TStringBuf("Interrupted");
case HTTP_CUSTOM_NOT_MODIFIED:
return TStringBuf("Signature detector thinks that doc is not modified");
case HTTP_BAD_CONTENT_ENCODING:
return TStringBuf("Bad content encoding");
case HTTP_NO_RESOURCES:
return TStringBuf("No resources");
case HTTP_FETCHER_SHUTDOWN:
return TStringBuf("Fetcher shutdown");
case HTTP_CHUNK_TOO_LARGE:
return TStringBuf("Chunk size is too big");
case HTTP_SERVER_BUSY:
return TStringBuf("Server is busy");
case HTTP_SERVICE_UNKNOWN:
return TStringBuf("Service is unknown");
case HTTP_PROXY_UNKNOWN:
return TStringBuf("Zora: unknown error");
case HTTP_PROXY_REQUEST_TIME_OUT:
return TStringBuf("Zora: request time out");
case HTTP_PROXY_INTERNAL_ERROR:
return TStringBuf("Zora: internal server error");
case HTTP_PROXY_CONNECT_FAILED:
return TStringBuf("Spider proxy connect failed");
case HTTP_PROXY_CONNECTION_LOST:
return TStringBuf("Spider proxy connection lost");
case HTTP_PROXY_NO_PROXY:
return TStringBuf("Spider proxy no proxy alive in region");
case HTTP_PROXY_ERROR:
return TStringBuf("Spider proxy returned custom error");
case HTTP_SSL_ERROR:
return TStringBuf("Ssl library returned error");
case HTTP_CACHED_COPY_NOT_FOUND:
return TStringBuf("Cached copy for the url is not available");
case HTTP_TIMEDOUT_WHILE_BYTES_RECEIVING:
return TStringBuf("Timed out while bytes receiving");
// TODO: messages for >2000 codes
default:
return TStringBuf("Unknown HTTP code");
}
}