aboutsummaryrefslogblamecommitdiffstats
path: root/library/cpp/http/fetch/exthttpcodes.cpp
blob: acc05650c890b768c95e34535463892201c1b576 (plain) (tree)
1
2
3
4
5
6
7
8
9
10
                         
                  
                                                    
 


                  
                                

                                                   
 








                                                               
 






                                                                               
 


















                                                                                 
 






                                                                                  
 




                                                                                    
 




                                                                                         
 





































                                                                                
 























                                                                            
 
                                            
                                         
                   
             
                                                                                                                                                       
                                           
                               
                                                                                         
                                                               
 
                                                                                                                                                                        
                                                                                     
                                                        
                                    
 
                                                                                                                                   
                                                               


                 
                                             
 
                                              
                               
                                 
                   
                                      
                                                     
                                  
                                                 
                                 
                                                
                                      
                                                     
                          
                                         
                           
                                               
                              
                                             
                                  
                                                 
                                    
                                                   
                            
                                           
                                 
                                                
                                  
                                                 
                            
                                           
                                     
                                                    
                               
                                              
                                 
                                                
                             
                                            
                              
                                             
                            
                                           
                           
                                          
                                   
                                                     
                                
                                                  
                              
                                             
                                      
                                                                                    
                                       
                                                      
                               
                                              
                                   
                                                  
                                  
                                                       
                              
                                                
                                  
                                                    
                                
                                                     
                                         
                                                        
                                       
                                                             
                                       
                                                             
                                        
                                                              
                                 
                                                                       
                              
                                                                    
                            
                                                            
                                        
                                                                          
                                                 
                                                                 
 
                                             
 
                
                                                   
     
#include "exthttpcodes.h"

#include <cstring>

const ui16 CrazyServer = ShouldDelete | MarkSuspect;

struct http_flag {
    ui16 http;
    ui16 flag;
};
static http_flag HTTP_FLAG[] = {
    {HTTP_CONTINUE, MarkSuspect},            // 100
    {HTTP_SWITCHING_PROTOCOLS, CrazyServer}, // 101
    {HTTP_PROCESSING, CrazyServer},          // 102

    {HTTP_OK, ShouldReindex},                            // 200
    {HTTP_CREATED, CrazyServer},                         // 201
    {HTTP_ACCEPTED, ShouldDelete},                       // 202
    {HTTP_NON_AUTHORITATIVE_INFORMATION, ShouldReindex}, // 203
    {HTTP_NO_CONTENT, ShouldDelete},                     // 204
    {HTTP_RESET_CONTENT, ShouldDelete},                  // 205
    {HTTP_PARTIAL_CONTENT, ShouldReindex},               // 206
    {HTTP_MULTI_STATUS, CrazyServer},                    // 207
    {HTTP_ALREADY_REPORTED, CrazyServer},                // 208
    {HTTP_IM_USED, CrazyServer},                         // 226

    {HTTP_MULTIPLE_CHOICES, CheckLinks | ShouldDelete},                  // 300
    {HTTP_MOVED_PERMANENTLY, CheckLocation | ShouldDelete | MoveRedir},  // 301
    {HTTP_FOUND, CheckLocation | ShouldDelete | MoveRedir},              // 302
    {HTTP_SEE_OTHER, CheckLocation | ShouldDelete | MoveRedir},          // 303
    {HTTP_NOT_MODIFIED, 0},                                              // 304
    {HTTP_USE_PROXY, ShouldDelete},                                      // 305
    {HTTP_TEMPORARY_REDIRECT, CheckLocation | ShouldDelete | MoveRedir}, // 307
    {HTTP_PERMANENT_REDIRECT, CheckLocation | ShouldDelete | MoveRedir}, // 308

    {HTTP_BAD_REQUEST, CrazyServer},                                       // 400
    {HTTP_UNAUTHORIZED, ShouldDelete},                                     // 401
    {HTTP_PAYMENT_REQUIRED, ShouldDelete},                                 // 402
    {HTTP_FORBIDDEN, ShouldDelete},                                        // 403
    {HTTP_NOT_FOUND, ShouldDelete},                                        // 404
    {HTTP_METHOD_NOT_ALLOWED, ShouldDelete},                               // 405
    {HTTP_NOT_ACCEPTABLE, ShouldDelete},                                   // 406
    {HTTP_PROXY_AUTHENTICATION_REQUIRED, CrazyServer},                     // 407
    {HTTP_REQUEST_TIME_OUT, ShouldDisconnect | ShouldRetry | MarkSuspect}, // 408
    {HTTP_CONFLICT, MarkSuspect},                                          // 409
    {HTTP_GONE, ShouldDelete},                                             // 410
    {HTTP_LENGTH_REQUIRED, CrazyServer},                                   // 411
    {HTTP_PRECONDITION_FAILED, CrazyServer},                               // 412
    {HTTP_REQUEST_ENTITY_TOO_LARGE, CrazyServer},                          // 413
    {HTTP_REQUEST_URI_TOO_LARGE, ShouldDelete},                            // 414
    {HTTP_UNSUPPORTED_MEDIA_TYPE, CrazyServer},                            // 415
    {HTTP_REQUESTED_RANGE_NOT_SATISFIABLE, CrazyServer},                   // 416
    {HTTP_EXPECTATION_FAILED, ShouldDelete},                               // 417
    {HTTP_I_AM_A_TEAPOT, CrazyServer},                                     // 418
    {HTTP_AUTHENTICATION_TIMEOUT, ShouldDelete},                           // 419

    {HTTP_MISDIRECTED_REQUEST, CrazyServer},                                // 421
    {HTTP_UNPROCESSABLE_ENTITY, CrazyServer},                               // 422
    {HTTP_LOCKED, ShouldDelete},                                            // 423
    {HTTP_FAILED_DEPENDENCY, CrazyServer},                                  // 424
    {HTTP_UPGRADE_REQUIRED, ShouldDelete},                                  // 426
    {HTTP_PRECONDITION_REQUIRED, ShouldDelete},                             // 428
    {HTTP_TOO_MANY_REQUESTS, ShouldDisconnect | ShouldRetry | MarkSuspect}, // 429
    {HTTP_UNAVAILABLE_FOR_LEGAL_REASONS, ShouldDelete},                     // 451

    {HTTP_INTERNAL_SERVER_ERROR, MarkSuspect},                                // 500
    {HTTP_NOT_IMPLEMENTED, ShouldDelete | ShouldDisconnect},                  // 501
    {HTTP_BAD_GATEWAY, MarkSuspect},                                          // 502
    {HTTP_SERVICE_UNAVAILABLE, ShouldDisconnect | ShouldRetry | MarkSuspect}, // 503
    {HTTP_GATEWAY_TIME_OUT, ShouldDisconnect | ShouldRetry | MarkSuspect},    // 504
    {HTTP_HTTP_VERSION_NOT_SUPPORTED, CrazyServer | ShouldDisconnect},        // 505

    {HTTP_VARIANT_ALSO_NEGOTIATES, CrazyServer | ShouldDisconnect},                // 506
    {HTTP_INSUFFICIENT_STORAGE, CrazyServer | ShouldDisconnect},                   // 507
    {HTTP_LOOP_DETECTED, CrazyServer | ShouldDisconnect},                          // 508
    {HTTP_BANDWIDTH_LIMIT_EXCEEDED, ShouldDisconnect | ShouldRetry | MarkSuspect}, // 509
    {HTTP_NOT_EXTENDED, ShouldDelete},                                             // 510
    {HTTP_NETWORK_AUTHENTICATION_REQUIRED, ShouldDelete},                          // 511

    // custom
    {HTTP_BAD_RESPONSE_HEADER, CrazyServer},                             // 1000
    {HTTP_CONNECTION_LOST, ShouldRetry},                                 // 1001
    {HTTP_BODY_TOO_LARGE, ShouldDelete | CanBeFake},                     // 1002
    {HTTP_ROBOTS_TXT_DISALLOW, ShouldDelete},                            // 1003
    {HTTP_BAD_URL, ShouldDelete},                                        // 1004
    {HTTP_BAD_MIME, ShouldDelete},                                       // 1005
    {HTTP_DNS_FAILURE, ShouldDisconnect | MarkSuspect},                  // 1006
    {HTTP_BAD_STATUS_CODE, CrazyServer},                                 // 1007
    {HTTP_BAD_HEADER_STRING, CrazyServer},                               // 1008
    {HTTP_BAD_CHUNK, CrazyServer},                                       // 1009
    {HTTP_CONNECT_FAILED, ShouldDisconnect | ShouldRetry | MarkSuspect}, // 1010
    {HTTP_FILTER_DISALLOW, ShouldDelete},                                // 1011
    {HTTP_LOCAL_EIO, ShouldRetry},                                       // 1012
    {HTTP_BAD_CONTENT_LENGTH, ShouldDelete},                             // 1013
    {HTTP_BAD_ENCODING, ShouldDelete},                                   // 1014
    {HTTP_LENGTH_UNKNOWN, ShouldDelete},                                 // 1015
    {HTTP_HEADER_EOF, ShouldRetry | CanBeFake},                          // 1016
    {HTTP_MESSAGE_EOF, ShouldRetry | CanBeFake},                         // 1017
    {HTTP_CHUNK_EOF, ShouldRetry | CanBeFake},                           // 1018
    {HTTP_PAST_EOF, ShouldRetry | ShouldDelete | CanBeFake},             // 1019
    {HTTP_HEADER_TOO_LARGE, ShouldDelete},                               // 1020
    {HTTP_URL_TOO_LARGE, ShouldDelete},                                  // 1021
    {HTTP_INTERRUPTED, 0},                                               // 1022
    {HTTP_CUSTOM_NOT_MODIFIED, 0},                                       // 1023
    {HTTP_BAD_CONTENT_ENCODING, ShouldDelete},                           // 1024
    {HTTP_PROXY_UNKNOWN, 0},                                             // 1030
    {HTTP_PROXY_REQUEST_TIME_OUT, 0},                                    // 1031
    {HTTP_PROXY_INTERNAL_ERROR, 0},                                      // 1032
    {HTTP_PROXY_CONNECT_FAILED, 0},                                      // 1033
    {HTTP_PROXY_CONNECTION_LOST, 0},                                     // 1034
    {HTTP_PROXY_NO_PROXY, 0},                                            // 1035
    {HTTP_PROXY_ERROR, 0},                                               // 1036
    {HTTP_SSL_ERROR, 0},                                                 // 1037
    {HTTP_CACHED_COPY_NOT_FOUND, 0},                                     // 1038
    {HTTP_TIMEDOUT_WHILE_BYTES_RECEIVING, ShouldRetry},                  // 1039
    {HTTP_FETCHER_BAD_RESPONSE, 0},                                      // 1040
    {HTTP_FETCHER_MB_ERROR, 0},                                          // 1041
    {HTTP_SSL_CERT_ERROR, 0},                                            // 1042

    // Custom (replace HTTP 200/304)
    {EXT_HTTP_MIRRMOVE, 0},                                          // 2000
    {EXT_HTTP_MANUAL_DELETE, ShouldDelete},                          // 2001
    {EXT_HTTP_NOTUSED2, ShouldDelete},                               // 2002
    {EXT_HTTP_NOTUSED3, ShouldDelete},                               // 2003
    {EXT_HTTP_REFRESH, ShouldDelete | CheckLinks | MoveRedir},       // 2004
    {EXT_HTTP_NOINDEX, ShouldDelete | CheckLinks},                   // 2005
    {EXT_HTTP_BADCODES, ShouldDelete},                               // 2006
    {EXT_HTTP_SITESTAT, ShouldDelete},                               // 2007
    {EXT_HTTP_IOERROR, ShouldDelete},                                // 2008
    {EXT_HTTP_BASEERROR, ShouldDelete},                              // 2009
    {EXT_HTTP_PARSERROR, ShouldDelete | CanBeFake},                  // 2010
    {EXT_HTTP_BAD_CHARSET, ShouldDelete | CheckLinks},               // 2011
    {EXT_HTTP_BAD_LANGUAGE, ShouldDelete | CheckLinks},              // 2012
    {EXT_HTTP_NUMERERROR, ShouldDelete},                             // 2013
    {EXT_HTTP_EMPTYDOC, ShouldDelete | CheckLinks},                  // 2014
    {EXT_HTTP_HUGEDOC, ShouldDelete},                                // 2015
    {EXT_HTTP_LINKGARBAGE, ShouldDelete},                            // 2016
    {EXT_HTTP_PARSERFAIL, ShouldDelete},                             // 2019
    {EXT_HTTP_GZIPERROR, ShouldDelete},                              // 2020
    {EXT_HTTP_MANUAL_DELETE_URL, ShouldDelete},                      // 2022
    {EXT_HTTP_CUSTOM_PARTIAL_CONTENT, ShouldReindex},                // 2023
    {EXT_HTTP_EMPTY_RESPONSE, ShouldDelete},                         // 2024
    {EXT_HTTP_REL_CANONICAL, ShouldDelete | CheckLinks | MoveRedir}, // 2025
    {0, 0}};

static ui16* prepare_flags(http_flag* arg) {
    static ui16 flags[EXT_HTTP_CODE_MAX];
    http_flag* ptr;
    size_t i;

    // устанавливаем значение по умолчанию для кодов не перечисленных в таблице выше
    for (i = 0; i < EXT_HTTP_CODE_MAX; ++i)
        flags[i] = CrazyServer;

    // устанавливаем флаги для перечисленных кодов
    for (ptr = arg; ptr->http; ++ptr)
        flags[ptr->http & (EXT_HTTP_CODE_MAX - 1)] = ptr->flag;

    // для стандартных кодов ошибок берем флаги из первого кода каждой группы и проставляем их
    // всем кодам не перечисленным в таблице выше
    for (size_t group = 0; group < 1000; group += 100)
        for (size_t j = group + 1; j < group + 100; ++j)
            flags[j] = flags[group];

    // предыдущий цикл затер некоторые флаги перечисленные в таблице выше
    // восстанавливаем их
    for (ptr = arg; ptr->http; ++ptr)
        flags[ptr->http & (EXT_HTTP_CODE_MAX - 1)] = ptr->flag;

    return flags;
}

ui16* http2status = prepare_flags(HTTP_FLAG);

TStringBuf ExtHttpCodeStr(int code) noexcept {
    if (code < HTTP_CODE_MAX) {
        return HttpCodeStr(code);
    }
    switch (code) {
        case HTTP_BAD_RESPONSE_HEADER:
            return TStringBuf("Bad response header");
        case HTTP_CONNECTION_LOST:
            return TStringBuf("Connection lost");
        case HTTP_BODY_TOO_LARGE:
            return TStringBuf("Body too large");
        case HTTP_ROBOTS_TXT_DISALLOW:
            return TStringBuf("robots.txt disallow");
        case HTTP_BAD_URL:
            return TStringBuf("Bad url");
        case HTTP_BAD_MIME:
            return TStringBuf("Bad mime type");
        case HTTP_DNS_FAILURE:
            return TStringBuf("Dns failure");
        case HTTP_BAD_STATUS_CODE:
            return TStringBuf("Bad status code");
        case HTTP_BAD_HEADER_STRING:
            return TStringBuf("Bad header string");
        case HTTP_BAD_CHUNK:
            return TStringBuf("Bad chunk");
        case HTTP_CONNECT_FAILED:
            return TStringBuf("Connect failed");
        case HTTP_FILTER_DISALLOW:
            return TStringBuf("Filter disallow");
        case HTTP_LOCAL_EIO:
            return TStringBuf("Local eio");
        case HTTP_BAD_CONTENT_LENGTH:
            return TStringBuf("Bad content length");
        case HTTP_BAD_ENCODING:
            return TStringBuf("Bad encoding");
        case HTTP_LENGTH_UNKNOWN:
            return TStringBuf("Length unknown");
        case HTTP_HEADER_EOF:
            return TStringBuf("Header EOF");
        case HTTP_MESSAGE_EOF:
            return TStringBuf("Message EOF");
        case HTTP_CHUNK_EOF:
            return TStringBuf("Chunk EOF");
        case HTTP_PAST_EOF:
            return TStringBuf("Past EOF");
        case HTTP_HEADER_TOO_LARGE:
            return TStringBuf("Header is too large");
        case HTTP_URL_TOO_LARGE:
            return TStringBuf("Url is too large");
        case HTTP_INTERRUPTED:
            return TStringBuf("Interrupted");
        case HTTP_CUSTOM_NOT_MODIFIED:
            return TStringBuf("Signature detector thinks that doc is not modified");
        case HTTP_BAD_CONTENT_ENCODING:
            return TStringBuf("Bad content encoding");
        case HTTP_NO_RESOURCES:
            return TStringBuf("No resources");
        case HTTP_FETCHER_SHUTDOWN:
            return TStringBuf("Fetcher shutdown");
        case HTTP_CHUNK_TOO_LARGE:
            return TStringBuf("Chunk size is too big");
        case HTTP_SERVER_BUSY:
            return TStringBuf("Server is busy");
        case HTTP_SERVICE_UNKNOWN:
            return TStringBuf("Service is unknown");
        case HTTP_PROXY_UNKNOWN:
            return TStringBuf("Zora: unknown error");
        case HTTP_PROXY_REQUEST_TIME_OUT:
            return TStringBuf("Zora: request time out");
        case HTTP_PROXY_INTERNAL_ERROR:
            return TStringBuf("Zora: internal server error");
        case HTTP_PROXY_CONNECT_FAILED:
            return TStringBuf("Spider proxy connect failed");
        case HTTP_PROXY_CONNECTION_LOST:
            return TStringBuf("Spider proxy connection lost");
        case HTTP_PROXY_NO_PROXY:
            return TStringBuf("Spider proxy no proxy alive in region");
        case HTTP_PROXY_ERROR:
            return TStringBuf("Spider proxy returned custom error");
        case HTTP_SSL_ERROR:
            return TStringBuf("Ssl library returned error");
        case HTTP_CACHED_COPY_NOT_FOUND:
            return TStringBuf("Cached copy for the url is not available");
        case HTTP_TIMEDOUT_WHILE_BYTES_RECEIVING:
            return TStringBuf("Timed out while bytes receiving");

            // TODO: messages for >2000 codes

        default:
            return TStringBuf("Unknown HTTP code");
    }
}