aboutsummaryrefslogblamecommitdiffstats
path: root/library/cpp/json/json_reader.cpp
blob: 072c8deafee8bdb499bd6613ab212eb5478f1228 (plain) (tree)
1
2
3
4
5
6
7
8
9
10
                        
                              
                                                                 
                                                            
 
                               
                                
                                
                 
                                                                  

                                                                                                  

         
























                                                                     
                         















                                                    

                                                                                                          
                                      
                                                          


























































                                                               







                                       


                                          
 

                                                                         
 

                                                     
 
                                  
             
                                      
 

                                                          
     

                                         
 







                                                          

             
                                 
 
                            
 



                                                   
 






                                                   
             


                                      
 


                               
 


                             
 






                                                                   
             

                                      
 


                               
 

                                      
 


                                   
 





                                                                                    
 



                                                                                 
 





                                                                             
             

                                                                              
                                                                    
                                                
      


                                                  
             
                                                     
             
                         
     
                                                 
      
             
 



                                                                                                  
 



















                                                                               

                                                                                
                                       











                                                                             
                                                                                    
                                                                           
                                                                                                                            









                                                                                                                                                                     
                                                                               

             






                                                                                                                                  
                                                                  



                                 

                        





                                                                                                                                    
         



                                                                                                                             
 



                                                                                                  
         
 

                                                                              
         
                 
 
                                                                          
     

                                                                                              
 
                                                                                                           
     
                                                                             
     
                                                                                                 
     
                                                                                                              
     
                                                                                                            
 
                                     
 
                                                                              
                                                                                                              
                                                                                         

                     

                                      
 



























                                                       
 

                                         
 

                                      
 

                                        
 




                                                                                    
             


                                                                                 
 

                                        
 


                                                                              
 


                                                             
 

                                          
 




                                                             
 

                                                          
 



                                                                              
 





                                                                                                           

                                                                                           
 
                                                         
 
                               
                                                              
 
                         
         
                            
 



                                                                  
 



                                                                                      
 


                                                                                                   

     
#include "json_reader.h"

#include "rapidjson_helpers.h"

#include <contrib/libs/rapidjson/include/rapidjson/error/en.h>
#include <contrib/libs/rapidjson/include/rapidjson/error/error.h>
#include <contrib/libs/rapidjson/include/rapidjson/reader.h>

#include <util/generic/stack.h>
#include <util/string/cast.h>
#include <util/system/yassert.h>
#include <util/string/builder.h>

namespace NJson {
    namespace {
        TString PrintError(const rapidjson::ParseResult& result) {
            return TStringBuilder() << TStringBuf("Offset: ") << result.Offset()
                                    << TStringBuf(", Code: ") << (int)result.Code()
                                    << TStringBuf(", Error: ") << GetParseError_En(result.Code());
        }
    }

    static const size_t DEFAULT_BUFFER_LEN = 65536;

    bool TParserCallbacks::OpenComplexValue(EJsonValueType type) {
        TJsonValue* pvalue;
        switch (CurrentState) {
            case START:
                Value.SetType(type);
                ValuesStack.push_back(&Value);
                break;
            case IN_ARRAY:
                pvalue = &ValuesStack.back()->AppendValue(type);
                ValuesStack.push_back(pvalue);
                break;
            case AFTER_MAP_KEY:
                pvalue = &ValuesStack.back()->InsertValue(Key, type);
                ValuesStack.push_back(pvalue);
                CurrentState = IN_MAP;
                break;
            default:
                return false;
        }
        return true;
    }

    bool TParserCallbacks::CloseComplexValue() {
        if (ValuesStack.empty()) {
            return false;
        }

        ValuesStack.pop_back();
        if (!ValuesStack.empty()) {
            switch (ValuesStack.back()->GetType()) {
                case JSON_ARRAY:
                    CurrentState = IN_ARRAY;
                    break;
                case JSON_MAP:
                    CurrentState = IN_MAP;
                    break;
                default:
                    return false;
            }
        } else {
            CurrentState = FINISH;
        }
        return true;
    }

    TParserCallbacks::TParserCallbacks(TJsonValue& value, bool throwOnError, bool notClosedBracketIsError)
        : TJsonCallbacks(throwOnError)
        , Value(value)
        , NotClosedBracketIsError(notClosedBracketIsError)
        , CurrentState(START)
    {
    }

    bool TParserCallbacks::OnNull() {
        return SetValue(JSON_NULL);
    }

    bool TParserCallbacks::OnBoolean(bool val) {
        return SetValue(val);
    }

    bool TParserCallbacks::OnInteger(long long val) {
        return SetValue(val);
    }

    bool TParserCallbacks::OnUInteger(unsigned long long val) {
        return SetValue(val);
    }

    bool TParserCallbacks::OnString(const TStringBuf& val) {
        return SetValue(val);
    }

    bool TParserCallbacks::OnDouble(double val) {
        return SetValue(val);
    }

    bool TParserCallbacks::OnOpenArray() {
        bool res = OpenComplexValue(JSON_ARRAY);
        if (res)
            CurrentState = IN_ARRAY;
        return res;
    }

    bool TParserCallbacks::OnCloseArray() {
        return CloseComplexValue();
    }

    bool TParserCallbacks::OnOpenMap() {
        bool res = OpenComplexValue(JSON_MAP);
        if (res)
            CurrentState = IN_MAP;
        return res;
    }

    bool TParserCallbacks::OnCloseMap() {
        return CloseComplexValue();
    }

    bool TParserCallbacks::OnMapKey(const TStringBuf& val) {
        switch (CurrentState) {
            case IN_MAP:
                Key = val;
                CurrentState = AFTER_MAP_KEY;
                break;
            default:
                return false;
        }
        return true;
    }

    bool TParserCallbacks::OnEnd() {
        if (NotClosedBracketIsError){
            return ValuesStack.empty();
        }
        return true;
    }

    TJsonReaderConfig::TJsonReaderConfig()
        : BufferSize(DEFAULT_BUFFER_LEN)
    {
    }

    void TJsonReaderConfig::SetBufferSize(size_t bufferSize) {
        BufferSize = Max((size_t)1, Min(bufferSize, DEFAULT_BUFFER_LEN));
    }

    size_t TJsonReaderConfig::GetBufferSize() const {
        return BufferSize;
    }

    namespace {
        struct TJsonValueBuilder {
#ifdef NDEBUG
            using TItem = TJsonValue*;

            inline TJsonValue& Access(TItem& item) const {
                return *item;
            }
#else
            struct TItem {
                TJsonValue* V;
                size_t DuplicateKeyCount;

                TItem(TJsonValue* v)
                    : V(v)
                    , DuplicateKeyCount(0)
                {
                }
            };

            inline TJsonValue& Access(TItem& item) const {
                return *item.V;
            }
#endif

            NJson::TJsonValue& V;

            TStack<TItem> S;

            TJsonValueBuilder(NJson::TJsonValue& v)
                : V(v)
            {
                S.emplace(&V);
            }

            template <class T>
            void Set(const T& t) {
                if (Access(S.top()).IsArray()) {
                    Access(S.top()).AppendValue(t);
                } else {
                    Access(S.top()) = t;
                    S.pop();
                }
            }

            bool Null() {
                Set(NJson::JSON_NULL);
                return true;
            }

            bool Bool(bool b) {
                Set(b);
                return true;
            }

            bool Int(int i) {
                Set(i);
                return true;
            }

            template <class U>
            bool ProcessUint(U u) {
                if (Y_LIKELY(u <= static_cast<ui64>(Max<i64>()))) {
                    Set(i64(u));
                } else {
                    Set(u);
                }
                return true;
            }

            bool Uint(unsigned u) {
                return ProcessUint(u);
            }

            bool Int64(i64 i) {
                Set(i);
                return true;
            }

            bool Uint64(ui64 u) {
                return ProcessUint(u);
            }

            bool Double(double d) {
                Set(d);
                return true;
            }

            bool RawNumber(const char* str, rapidjson::SizeType length, bool copy) {
                Y_ASSERT(false && "this method should never be called");
                Y_UNUSED(str);
                Y_UNUSED(length);
                Y_UNUSED(copy);
                return true;
            }

            bool String(const char* str, rapidjson::SizeType length, bool copy) {
                Y_ASSERT(copy);
                Set(TStringBuf(str, length));
                return true;
            }

            bool StartObject() {
                if (Access(S.top()).IsArray()) {
                    S.emplace(&Access(S.top()).AppendValue(NJson::JSON_MAP));
                } else {
                    Access(S.top()).SetType(NJson::JSON_MAP);
                }
                return true;
            }

            bool Key(const char* str, rapidjson::SizeType length, bool copy) {
                Y_ASSERT(copy);
                auto& value = Access(S.top())[TStringBuf(str, length)];
                if (Y_UNLIKELY(value.GetType() != JSON_UNDEFINED)) {
#ifndef NDEBUG
                    ++S.top().DuplicateKeyCount;
#endif
                    value.SetType(JSON_UNDEFINED);
                }
                S.emplace(&value);
                return true;
            }

            inline int GetDuplicateKeyCount() const {
#ifdef NDEBUG
                return 0;
#else
                return S.top().DuplicateKeyCount;
#endif
            }

            bool EndObject(rapidjson::SizeType memberCount) {
                Y_ASSERT(memberCount == Access(S.top()).GetMap().size() + GetDuplicateKeyCount());
                S.pop();
                return true;
            }

            bool StartArray() {
                if (Access(S.top()).IsArray()) {
                    S.emplace(&Access(S.top()).AppendValue(NJson::JSON_ARRAY));
                } else {
                    Access(S.top()).SetType(NJson::JSON_ARRAY);
                }
                return true;
            }

            bool EndArray(rapidjson::SizeType elementCount) {
                Y_ASSERT(elementCount == Access(S.top()).GetArray().size());
                S.pop();
                return true;
            }
        };

        template <class TRapidJsonCompliantInputStream, class THandler>
        auto Read(const TJsonReaderConfig& config,
                  rapidjson::Reader& reader,
                  TRapidJsonCompliantInputStream& is,
                  THandler& handler) {

            ui8 flags = ReaderConfigToRapidJsonFlags::NOCOMMENTS_VALID_NOESCAPE;

            if (config.AllowComments) {
                flags |= ReaderConfigFlags::COMMENTS;
            }

            if (config.DontValidateUtf8) {
                flags &= ~(ReaderConfigFlags::VALIDATE);
            }

            if (config.AllowEscapedApostrophe) {
                flags |= ReaderConfigFlags::ESCAPE;
            }

            switch (flags) {
                case ReaderConfigToRapidJsonFlags::COMMENTS_NOVALID_NOESCAPE:
                    return reader.Parse<rapidjson::kParseCommentsFlag>(is, handler);
                case ReaderConfigToRapidJsonFlags::COMMENTS_VALID_NOESCAPE:
                    return reader.Parse<rapidjson::kParseCommentsFlag | rapidjson::kParseValidateEncodingFlag>(is, handler);
                case ReaderConfigToRapidJsonFlags::COMMENTS_VALID_ESCAPE:
                    return reader.Parse<rapidjson::kParseCommentsFlag | rapidjson::kParseValidateEncodingFlag | rapidjson::kParseEscapedApostropheFlag>(is, handler);
                case ReaderConfigToRapidJsonFlags::COMMENTS_NOVALID_ESCAPE:
                    return reader.Parse<rapidjson::kParseCommentsFlag | rapidjson::kParseEscapedApostropheFlag>(is, handler);
                case ReaderConfigToRapidJsonFlags::NOCOMMENTS_VALID_NOESCAPE:
                    return reader.Parse<rapidjson::kParseValidateEncodingFlag>(is, handler);
                case ReaderConfigToRapidJsonFlags::NOCOMMENTS_VALID_ESCAPE:
                    return reader.Parse<rapidjson::kParseValidateEncodingFlag | rapidjson::kParseEscapedApostropheFlag>(is, handler);
                case  ReaderConfigToRapidJsonFlags::NOCOMMENTS_NOVALID_ESCAPE:
                    return reader.Parse<rapidjson::kParseEscapedApostropheFlag>(is, handler);
                default:
                    return reader.Parse<rapidjson::kParseNoFlags>(is, handler);
            }
        }

        template <class TRapidJsonCompliantInputStream, class THandler>
        bool ReadJson(TRapidJsonCompliantInputStream& is, const TJsonReaderConfig* config, THandler& handler, bool throwOnError) {
            rapidjson::Reader reader;

            auto result = Read(*config, reader, is, handler);

            if (result.IsError()) {
                if (throwOnError) {
                    ythrow TJsonException() << PrintError(result);
                } else {
                    return false;
                }
            }

            return true;
        }

        template <class TRapidJsonCompliantInputStream>
        bool ReadJsonTree(TRapidJsonCompliantInputStream& is, const TJsonReaderConfig* config, TJsonValue* out, bool throwOnError) {
            out->SetType(NJson::JSON_NULL);

            TJsonValueBuilder handler(*out);

            return ReadJson(is, config, handler, throwOnError);
        }

        template <class TData>
        bool ReadJsonTreeImpl(TData* in, const TJsonReaderConfig* config, TJsonValue* out, bool throwOnError) {
            std::conditional_t<std::is_same<TData, TStringBuf>::value, TStringBufStreamWrapper, TInputStreamWrapper> is(*in);
            return ReadJsonTree(is, config, out, throwOnError);
        }

        template <class TData>
        bool ReadJsonTreeImpl(TData* in, bool allowComments, TJsonValue* out, bool throwOnError) {
            TJsonReaderConfig config;
            config.AllowComments = allowComments;
            return ReadJsonTreeImpl(in, &config, out, throwOnError);
        }

        template <class TData>
        bool ReadJsonTreeImpl(TData* in, TJsonValue* out, bool throwOnError) {
            return ReadJsonTreeImpl(in, false, out, throwOnError);
        }
    } //namespace

    bool ReadJsonTree(TStringBuf in, TJsonValue* out, bool throwOnError) {
        return ReadJsonTreeImpl(&in, out, throwOnError);
    }

    bool ReadJsonTree(TStringBuf in, bool allowComments, TJsonValue* out, bool throwOnError) {
        return ReadJsonTreeImpl(&in, allowComments, out, throwOnError);
    }

    bool ReadJsonTree(TStringBuf in, const TJsonReaderConfig* config, TJsonValue* out, bool throwOnError) {
        return ReadJsonTreeImpl(&in, config, out, throwOnError);
    }

    bool ReadJsonTree(IInputStream* in, TJsonValue* out, bool throwOnError) {
        return ReadJsonTreeImpl(in, out, throwOnError);
    }

    bool ReadJsonTree(IInputStream* in, bool allowComments, TJsonValue* out, bool throwOnError) {
        return ReadJsonTreeImpl(in, allowComments, out, throwOnError);
    }

    bool ReadJsonTree(IInputStream* in, const TJsonReaderConfig* config, TJsonValue* out, bool throwOnError) {
        return ReadJsonTreeImpl(in, config, out, throwOnError);
    }

    bool ReadJsonFastTree(TStringBuf in, TJsonValue* out, bool throwOnError, bool notClosedBracketIsError) {
        TParserCallbacks cb(*out, throwOnError, notClosedBracketIsError);

        return ReadJsonFast(in, &cb);
    }

    TJsonValue ReadJsonFastTree(TStringBuf in, bool notClosedBracketIsError) {
        TJsonValue value;
        // There is no way to report an error apart from throwing an exception when we return result by value.
        ReadJsonFastTree(in, &value, /* throwOnError = */ true, notClosedBracketIsError);
        return value;
    }

    namespace {
        struct TJsonCallbacksWrapper {
            TJsonCallbacks& Impl;

            TJsonCallbacksWrapper(TJsonCallbacks& impl)
                : Impl(impl)
            {
            }

            bool Null() {
                return Impl.OnNull();
            }

            bool Bool(bool b) {
                return Impl.OnBoolean(b);
            }

            template <class U>
            bool ProcessUint(U u) {
                if (Y_LIKELY(u <= ui64(Max<i64>()))) {
                    return Impl.OnInteger(i64(u));
                } else {
                    return Impl.OnUInteger(u);
                }
            }

            bool Int(int i) {
                return Impl.OnInteger(i);
            }

            bool Uint(unsigned u) {
                return ProcessUint(u);
            }

            bool Int64(i64 i) {
                return Impl.OnInteger(i);
            }

            bool Uint64(ui64 u) {
                return ProcessUint(u);
            }

            bool Double(double d) {
                return Impl.OnDouble(d);
            }

            bool RawNumber(const char* str, rapidjson::SizeType length, bool copy) {
                Y_ASSERT(false && "this method should never be called");
                Y_UNUSED(str);
                Y_UNUSED(length);
                Y_UNUSED(copy);
                return true;
            }

            bool String(const char* str, rapidjson::SizeType length, bool copy) {
                Y_ASSERT(copy);
                return Impl.OnString(TStringBuf(str, length));
            }

            bool StartObject() {
                return Impl.OnOpenMap();
            }

            bool Key(const char* str, rapidjson::SizeType length, bool copy) {
                Y_ASSERT(copy);
                return Impl.OnMapKey(TStringBuf(str, length));
            }

            bool EndObject(rapidjson::SizeType memberCount) {
                Y_UNUSED(memberCount);
                return Impl.OnCloseMap();
            }

            bool StartArray() {
                return Impl.OnOpenArray();
            }

            bool EndArray(rapidjson::SizeType elementCount) {
                Y_UNUSED(elementCount);
                return Impl.OnCloseArray();
            }
        };
    }

    bool ReadJson(IInputStream* in, TJsonCallbacks* cbs) {
        return ReadJson(in, false, cbs);
    }

    bool ReadJson(IInputStream* in, bool allowComments, TJsonCallbacks* cbs) {
        TJsonReaderConfig config;
        config.AllowComments = allowComments;
        return ReadJson(in, &config, cbs);
    }

    bool ReadJson(IInputStream* in, bool allowComments, bool allowEscapedApostrophe, TJsonCallbacks* cbs) {
        TJsonReaderConfig config;
        config.AllowComments = allowComments;
        config.AllowEscapedApostrophe = allowEscapedApostrophe;
        return ReadJson(in, &config, cbs);
    }

    bool ReadJson(IInputStream* in, const TJsonReaderConfig* config, TJsonCallbacks* cbs) {
        TJsonCallbacksWrapper wrapper(*cbs);
        TInputStreamWrapper is(*in);

        rapidjson::Reader reader;
        auto result = Read(*config, reader, is, wrapper);

        if (result.IsError()) {
            cbs->OnError(result.Offset(), PrintError(result));

            return false;
        }

        return cbs->OnEnd();
    }

    TJsonValue ReadJsonTree(IInputStream* in, bool throwOnError) {
        TJsonValue out;
        ReadJsonTree(in, &out, throwOnError);
        return out;
    }

    TJsonValue ReadJsonTree(IInputStream* in, bool allowComments, bool throwOnError) {
        TJsonValue out;
        ReadJsonTree(in, allowComments, &out, throwOnError);
        return out;
    }

    TJsonValue ReadJsonTree(IInputStream* in, const TJsonReaderConfig* config, bool throwOnError) {
        TJsonValue out;
        ReadJsonTree(in, config, &out, throwOnError);
        return out;
    }

}