aboutsummaryrefslogblamecommitdiffstats
path: root/library/cpp/uri/uri.h
blob: b7550a9e2c9d75a6ce07f299a2c768c80e892cf9 (plain) (tree)
1
2
3
4
5
6
7
8
9
            
 
                   
 
                                         
                                
                                   
                                
                              
                            
                               
                                
 
                  
                
                                                              
                          









                           
 
            
                       





                                                                            
 

                                               
                               
                        
                           
         
 




                                 
 





                                          
 

                                                 
 
                          
         
 


                                         
 


                                                 
 

                                                  
 

                                        
 

                                               
 


                                      
 

                                       
 



                                                                            
 


                                                          
 

                                                    
 









                                                             
 








                                                             
 

                                                            
 
                                                                                                  
 




                                 
 




                                            
 



                                                                                 
 



                                                                                   
 


                                                                         
 

                                                                 
                                                                        
         
 









                                                   
 





                                             
 
                                                
                                                                               
         
 


                                    
 

                                                     
 


                                        
 

                                         
 
                           
 
                                                                 
 

                                                                                              
 
                                                               
 
                                                                                                                                          
 
                                                                                                                               
 







                                                         
 
                                                                                                                                                                                                                        
 
                             
                                      





                                          
 

                    
 




                                    
 

                        
         

                                   
 

                                                  
 

                                   
 
                                  
 
                          
 


                              
 
                                                                                                  
 
                                                                                                                                                                                         
 





                                                                                               
 




                                                                                                                                                 
 

                                                                                                                                                                                             
 

                                                                                                                                                          
 
                                                                                                                                                               
 

                                                                                                  
 
                                                                                                                                                       
 


                                                                                                                                                                                            
 


                                                     
 
                                                          
 
                                                                                                                                                                              
 



                                             
         
 
                                           
 
                                                                      
 



                                                                  
 

                                                        
 

                                                  
 



                                                                                   
 



                                                                                   
 




                                                                                  
 



                                                                            
 





                                                                   
 



                                                         
 





                                                                                            
 
                                                                    
 
                                                            
 

                                                                                          
 

                                                  
 





                                               
 

                                                     
 




                                                  
 

                                                                          
 

                                       
 



                                                         
 





                                              
 

                                                                                                          
 


                                     
 



                                        
 

                                
 

                                                                    
 

                                                                   
 

                                              
 

                                              
 

                                             
 



                                                                            
 



                                                                            
 





                                                     
 
                                                                                                                                       
                                                                                             
 
                                                                                                                                                      
                                                                                                                                                 
 
                                                                                                                      
                                                               
 


                                                                                                   
 
                                                                                  
 


                                                                   
                                  




                                                                 
 

                                                               
                                  





                                                             
 
                      
 






                             
 


                                                                 
 


                                                                   
 


                                        
 
                                                           
 
 
                                                  

                   
                                                           
                                     
 
#pragma once

#include "common.h"
#include "encode.h"

#include <library/cpp/charset/doccodes.h>
#include <util/generic/buffer.h>
#include <util/generic/ptr.h>
#include <util/generic/singleton.h>
#include <util/generic/string.h>
#include <util/memory/alloc.h>
#include <util/stream/mem.h>
#include <util/stream/output.h>
#include <util/stream/str.h>
#include <util/system/yassert.h>

#include <cstdlib>

namespace NUri {
    /********************************************************/
    class TUri
        : public TFeature,
          public TField,
          public TScheme,
          public TState {
    public:
        enum TLinkType {
            LinkIsBad,
            LinkBadAbs,
            LinkIsFragment,
            LinkIsLocal,
            LinkIsGlobal
        };

    private:
        TBuffer Buffer;
        TStringBuf Fields[FieldAllMAX];
        ui32 FieldsSet;
        ui16 Port;
        ui16 DefaultPort;
        TScheme::EKind Scheme;
        /// contains fields out of buffer (and possibly not null-terminated)
        ui32 FieldsDirty;

    private:
        void Alloc(size_t len) {
            Dealloc(); // to prevent copy below
            Buffer.Resize(len);
        }
        void Dealloc() {
            Buffer.Clear();
        }

        void ClearImpl() {
            Port = 0;
            FieldsSet = 0;
            Scheme = SchemeEmpty;
            FieldsDirty = 0;
        }

        void CopyData(const TUri& url) {
            FieldsSet = url.FieldsSet;
            Port = url.Port;
            DefaultPort = url.DefaultPort;
            Scheme = url.Scheme;
            FieldsDirty = url.FieldsDirty;
        }

        void CopyImpl(const TUri& url) {
            for (int i = 0; i < FieldAllMAX; ++i)
                Fields[i] = url.Fields[i];

            RewriteImpl();
        }

    private:
        static ui32 FldFlag(EField fld) {
            return 1 << fld;
        }

    public:
        static bool FldIsValid(EField fld) {
            return 0 <= fld && FieldAllMAX > fld;
        }

        bool FldSetCmp(ui32 chk, ui32 exp) const {
            return (FieldsSet & chk) == exp;
        }

        bool FldSetCmp(ui32 chk) const {
            return FldSetCmp(chk, chk);
        }

        bool FldIsSet(EField fld) const {
            return !FldSetCmp(FldFlag(fld), 0);
        }

    private:
        void FldMarkSet(EField fld) {
            FieldsSet |= FldFlag(fld);
        }

        void FldMarkUnset(EField fld) {
            FieldsSet &= ~FldFlag(fld);
        }

        // use when we know the field is dirty or RewriteImpl will be called
        void FldSetNoDirty(EField fld, const TStringBuf& value) {
            Fields[fld] = value;
            FldMarkSet(fld);
        }

        void FldSet(EField fld, const TStringBuf& value) {
            FldSetNoDirty(fld, value);
            FldMarkDirty(fld);
        }

        const TStringBuf& FldGet(EField fld) const {
            return Fields[fld];
        }

    private:
        /// depending on value, clears or sets it
        void FldChkSet(EField fld, const TStringBuf& value) {
            if (value.IsInited())
                FldSet(fld, value);
            else
                FldClr(fld);
        }
        void FldChkSet(EField fld, const TUri& other) {
            FldChkSet(fld, other.GetField(fld));
        }

        /// set only if initialized
        bool FldTrySet(EField fld, const TStringBuf& value) {
            const bool ok = value.IsInited();
            if (ok)
                FldSet(fld, value);
            return ok;
        }
        bool FldTrySet(EField fld, const TUri& other) {
            return FldTrySet(fld, other.GetField(fld));
        }

    private:
        /// copies the value if it fits
        bool FldTryCpy(EField fld, const TStringBuf& value);

        // main method: sets the field value, possibly copies, etc.
        bool FldSetImpl(EField fld, TStringBuf value, bool strconst = false, bool nocopy = false);

    public: // clear a field
        void FldClr(EField fld) {
            Fields[fld].Clear();
            FldMarkUnset(fld);
            FldMarkClean(fld);
        }

        bool FldTryClr(EField field) {
            const bool ok = FldIsSet(field);
            if (ok)
                FldClr(field);
            return ok;
        }

    public: // set a field value: might leave state dirty and require a Rewrite()
        // copies if fits and not dirty, sets and marks dirty otherwise
        bool FldMemCpy(EField field, const TStringBuf& value) {
            return FldSetImpl(field, value, false);
        }

        // uses directly, marks dirty
        /// @note client MUST guarantee value will be alive until Rewrite is called
        bool FldMemSet(EField field, const TStringBuf& value) {
            return FldSetImpl(field, value, false, true);
        }

        // uses directly, doesn't mark dirty (value scope exceeds "this")
        bool FldMemUse(EField field, const TStringBuf& value) {
            return FldSetImpl(field, value, true);
        }

        // uses directly, doesn't mark dirty
        template <size_t size>
        bool FldMemSet(EField field, const char (&value)[size]) {
            static_assert(size > 0);
            return FldSetImpl(field, TStringBuf(value, size - 1), true);
        }

        // duplicate one field to another
        bool FldDup(EField src, EField dst) {
            if (!FldIsSet(src) || !FldIsValid(dst))
                return false;
            FldSetNoDirty(dst, FldGet(src));
            if (FldIsDirty(src))
                FldMarkDirty(dst);
            else
                FldMarkClean(dst);
            return true;
        }

        // move one field to another
        bool FldMov(EField src, EField dst) {
            if (!FldDup(src, dst))
                return false;
            FldClr(src);
            return true;
        }

    private:
        bool IsInBuffer(const char* buf) const {
            return buf >= Buffer.data() && buf < Buffer.data() + Buffer.size();
        }

    public:
        bool FldIsDirty() const {
            return 0 != FieldsDirty;
        }

        bool FldIsDirty(EField fld) const {
            return 0 != (FieldsDirty & FldFlag(fld));
        }

    private:
        void FldMarkDirty(EField fld) {
            FieldsDirty |= FldFlag(fld);
        }

        void FldMarkClean(EField fld) {
            FieldsDirty &= ~FldFlag(fld);
        }

        void RewriteImpl();

    public:
        static TState::EParsed CheckHost(const TStringBuf& host);

        // convert a [potential] IDN to ascii
        static TMallocPtr<char> IDNToAscii(const wchar32* idna);
        static TMallocPtr<char> IDNToAscii(const TStringBuf& host, ECharset enc = CODES_UTF8);

        // convert hosts with percent-encoded or extended chars

        // returns non-empty string if host can be converted to ASCII with given parameters
        static TStringBuf HostToAscii(TStringBuf host, TMallocPtr<char>& buf, bool hasExtended, bool allowIDN, ECharset enc = CODES_UTF8);

        // returns host if already ascii, or non-empty if it can be converted
        static TStringBuf HostToAscii(const TStringBuf& host, TMallocPtr<char>& buf, bool allowIDN, ECharset enc = CODES_UTF8);

    public:
        explicit TUri(unsigned defaultPort = 0)
            : FieldsSet(0)
            , Port(0)
            , DefaultPort(static_cast<ui16>(defaultPort))
            , Scheme(SchemeEmpty)
            , FieldsDirty(0)
        {
        }

        TUri(const TStringBuf& host, ui16 port, const TStringBuf& path, const TStringBuf& query = TStringBuf(), const TStringBuf& scheme = "http", unsigned defaultPort = 0, const TStringBuf& hashbang = TStringBuf());

        TUri(const TUri& url)
            : FieldsSet(url.FieldsSet)
            , Port(url.Port)
            , DefaultPort(url.DefaultPort)
            , Scheme(url.Scheme)
            , FieldsDirty(url.FieldsDirty)
        {
            CopyImpl(url);
        }

        ~TUri() {
            Clear();
        }

        void Copy(const TUri& url) {
            if (&url != this) {
                CopyData(url);
                CopyImpl(url);
            }
        }

        void Clear() {
            Dealloc();
            ClearImpl();
        }

        ui32 GetFieldMask() const {
            return FieldsSet;
        }

        ui32 GetUrlFieldMask() const {
            return GetFieldMask() & FlagUrlFields;
        }

        ui32 GetDirtyMask() const {
            return FieldsDirty;
        }

        void CheckMissingFields();

        // Process methods

        void Rewrite() {
            if (FldIsDirty())
                RewriteImpl();
        }

    private:
        TState::EParsed AssignImpl(const TParser& parser, TScheme::EKind defscheme = SchemeEmpty);

        TState::EParsed ParseImpl(const TStringBuf& url, const TParseFlags& flags = FeaturesDefault, ui32 maxlen = 0, TScheme::EKind defscheme = SchemeEmpty, ECharset enc = CODES_UTF8);

    public:
        TState::EParsed Assign(const TParser& parser, TScheme::EKind defscheme = SchemeEmpty) {
            const TState::EParsed ret = AssignImpl(parser, defscheme);
            if (ParsedOK == ret)
                Rewrite();
            return ret;
        }

        TState::EParsed ParseUri(const TStringBuf& url, const TParseFlags& flags = FeaturesDefault, ui32 maxlen = 0, ECharset enc = CODES_UTF8) {
            const TState::EParsed ret = ParseImpl(url, flags, maxlen, SchemeEmpty, enc);
            if (ParsedOK == ret)
                Rewrite();
            return ret;
        }

        // parses absolute URIs
        // prepends default scheme (unless unknown) if URI has none
        TState::EParsed ParseAbsUri(const TStringBuf& url, const TParseFlags& flags = FeaturesDefault, ui32 maxlen = 0, TScheme::EKind defscheme = SchemeUnknown, ECharset enc = CODES_UTF8);

        TState::EParsed ParseAbsOrHttpUri(const TStringBuf& url, const TParseFlags& flags = FeaturesDefault, ui32 maxlen = 0, ECharset enc = CODES_UTF8) {
            return ParseAbsUri(url, flags, maxlen, SchemeHTTP, enc);
        }

        TState::EParsed Parse(const TStringBuf& url, const TUri& base, const TParseFlags& flags = FeaturesDefault, ui32 maxlen = 0, ECharset enc = CODES_UTF8);

        TState::EParsed Parse(const TStringBuf& url, const TParseFlags& flags = FeaturesDefault) {
            return ParseUri(url, flags);
        }

        TState::EParsed Parse(const TStringBuf& url, const TParseFlags& flags, const TStringBuf& base_url, ui32 maxlen = 0, ECharset enc = CODES_UTF8);

        TState::EParsed ParseAbs(const TStringBuf& url, const TParseFlags& flags = FeaturesDefault, const TStringBuf& base_url = TStringBuf(), ui32 maxlen = 0, ECharset enc = CODES_UTF8) {
            const TState::EParsed result = Parse(url, flags, base_url, maxlen, enc);
            return ParsedOK != result || IsValidGlobal() ? result : ParsedBadFormat;
        }

        // correctAbs works with head "/.." portions:
        //  1 - reject URL
        //  0 - keep portions
        // -1 - ignore portions

        void Merge(const TUri& base, int correctAbs = -1);

        TLinkType Normalize(const TUri& base, const TStringBuf& link, const TStringBuf& codebase = TStringBuf(), ui64 careFlags = FeaturesDefault, ECharset enc = CODES_UTF8);

    private:
        int PrintFlags(int flags) const {
            if (0 == (FlagUrlFields & flags))
                flags |= FlagUrlFields;
            return flags;
        }

    protected:
        size_t PrintSize(ui32 flags) const;

        // Output method, prints to stream
        IOutputStream& PrintImpl(IOutputStream& out, int flags) const;

        char* PrintImpl(char* str, size_t size, int flags) const {
            TMemoryOutput out(str, size);
            PrintImpl(out, flags) << '\0';
            return str;
        }

        static bool IsAbsPath(const TStringBuf& path) {
            return 1 <= path.length() && path[0] == '/';
        }

        bool IsAbsPathImpl() const {
            return IsAbsPath(GetField(FieldPath));
        }

    public:
        // Output method, prints to stream
        IOutputStream& Print(IOutputStream& out, int flags = FlagUrlFields) const {
            return PrintImpl(out, PrintFlags(flags));
        }

        // Output method, print to str, allocate memory if str is NULL
        // Should be deprecated
        char* Print(char* str, size_t size, int flags = FlagUrlFields) const {
            return nullptr == str ? Serialize(flags) : Serialize(str, size, flags);
        }

        char* Serialize(char* str, size_t size, int flags = FlagUrlFields) const {
            Y_ASSERT(str);
            flags = PrintFlags(flags);
            const size_t printSize = PrintSize(flags) + 1;
            return printSize > size ? nullptr : PrintImpl(str, size, flags);
        }

        char* Serialize(int flags = FlagUrlFields) const {
            flags = PrintFlags(flags);
            const size_t size = PrintSize(flags) + 1;
            return PrintImpl(static_cast<char*>(malloc(size)), size, flags);
        }

        // Output method to str
        void Print(TString& str, int flags = FlagUrlFields) const {
            flags = PrintFlags(flags);
            str.reserve(str.length() + PrintSize(flags));
            TStringOutput out(str);
            PrintImpl(out, flags);
        }

        TString PrintS(int flags = FlagUrlFields) const {
            TString str;
            Print(str, flags);
            return str;
        }

        // Only non-default scheme and port are printed
        char* PrintHost(char* str, size_t size) const {
            return Print(str, size, (Scheme != SchemeHTTP ? FlagScheme : 0) | FlagHostPort);
        }
        TString PrintHostS() const {
            return PrintS((Scheme != SchemeHTTP ? FlagScheme : 0) | FlagHostPort);
        }

        // Info methods
        int Compare(const TUri& A, int flags = FlagUrlFields) const;

        int CompareField(EField fld, const TUri& url) const;

        const TStringBuf& GetField(EField fld) const {
            return FldIsValid(fld) && FldIsSet(fld) ? FldGet(fld) : Default<TStringBuf>();
        }

        ui16 GetPort() const {
            return 0 == Port ? DefaultPort : Port;
        }

        const TStringBuf& GetHost() const {
            if (GetFieldMask() & FlagHostAscii)
                return FldGet(FieldHostAscii);
            if (GetFieldMask() & FlagHost)
                return FldGet(FieldHost);
            return Default<TStringBuf>();
        }

        bool UseHostAscii() {
            return FldMov(FieldHostAscii, FieldHost);
        }

        TScheme::EKind GetScheme() const {
            return Scheme;
        }
        const TSchemeInfo& GetSchemeInfo() const {
            return TSchemeInfo::Get(Scheme);
        }

        bool IsNull(ui32 flags = FlagScheme | FlagHost | FlagPath) const {
            return !FldSetCmp(flags);
        }

        bool IsNull(EField fld) const {
            return !FldIsSet(fld);
        }

        bool IsValidAbs() const {
            if (IsNull(FlagScheme | FlagHost | FlagPath))
                return false;
            return IsAbsPathImpl();
        }

        bool IsValidGlobal() const {
            if (IsNull(FlagScheme | FlagHost))
                return false;
            if (IsNull(FlagPath))
                return true;
            return IsAbsPathImpl();
        }

        bool IsRootless() const {
            return FldSetCmp(FlagScheme | FlagHost | FlagPath, FlagScheme | FlagPath) && !IsAbsPathImpl();
        }

        // for RFC 2396 compatibility
        bool IsOpaque() const {
            return IsRootless();
        }

        // Inline helpers
        TUri& operator=(const TUri& u) {
            Copy(u);
            return *this;
        }

        bool operator!() const {
            return IsNull();
        }

        bool Equal(const TUri& A, int flags = FlagUrlFields) const {
            return (Compare(A, flags) == 0);
        }

        bool Less(const TUri& A, int flags = FlagUrlFields) const {
            return (Compare(A, flags) < 0);
        }

        bool operator==(const TUri& A) const {
            return Equal(A, FlagNoFrag);
        }

        bool operator!=(const TUri& A) const {
            return !Equal(A, FlagNoFrag);
        }

        bool operator<(const TUri& A) const {
            return Less(A, FlagNoFrag);
        }

        bool IsSameDocument(const TUri& other) const {
            // pre: both *this and 'other' should be normalized to valid abs
            Y_ASSERT(IsValidAbs());
            return Equal(other, FlagNoFrag);
        }

        bool IsLocal(const TUri& other) const {
            // pre: both *this and 'other' should be normalized to valid abs
            Y_ASSERT(IsValidAbs() && other.IsValidAbs());
            return Equal(other, FlagScheme | FlagHostPort);
        }

        TLinkType Locality(const TUri& other) const {
            if (IsSameDocument(other))
                return LinkIsFragment;
            else if (IsLocal(other))
                return LinkIsLocal;
            return LinkIsGlobal;
        }

        static IOutputStream& ReEncodeField(IOutputStream& out, const TStringBuf& val, EField fld, ui64 flags = FeaturesEncodeDecode) {
            return NEncode::TEncoder::ReEncode(out, val, NEncode::TEncodeMapper(flags, fld));
        }

        static IOutputStream& ReEncodeToField(IOutputStream& out, const TStringBuf& val, EField srcfld, ui64 srcflags, EField dstfld, ui64 dstflags) {
            return NEncode::TEncoder::ReEncodeTo(out, val, NEncode::TEncodeMapper(srcflags, srcfld), NEncode::TEncodeToMapper(dstflags, dstfld));
        }

        static IOutputStream& ReEncode(IOutputStream& out, const TStringBuf& val, ui64 flags = FeaturesEncodeDecode) {
            return ReEncodeField(out, val, FieldAllMAX, flags);
        }

        static int PathOperationFlag(const TParseFlags& flags) {
            return flags & FeaturePathDenyRootParent ? 1
                                                     : flags & FeaturePathStripRootParent ? -1 : 0;
        }

        static bool PathOperation(char*& pathBeg, char*& pathEnd, int correctAbs);

    private:
        const TSchemeInfo& SetSchemeImpl(const TSchemeInfo& info) {
            Scheme = info.Kind;
            DefaultPort = info.Port;
            if (!info.Str.empty())
                FldSetNoDirty(FieldScheme, info.Str);
            return info;
        }
        const TSchemeInfo& SetSchemeImpl(TScheme::EKind scheme) {
            return SetSchemeImpl(TSchemeInfo::Get(scheme));
        }

    public:
        const TSchemeInfo& SetScheme(const TSchemeInfo& info) {
            SetSchemeImpl(info);
            if (!info.Str.empty())
                FldMarkClean(FieldScheme);
            return info;
        }
        const TSchemeInfo& SetScheme(TScheme::EKind scheme) {
            return SetScheme(TSchemeInfo::Get(scheme));
        }
    };

    class TUriUpdate {
        TUri& Uri_;

    public:
        TUriUpdate(TUri& uri)
            : Uri_(uri)
        {
        }
        ~TUriUpdate() {
            Uri_.Rewrite();
        }

    public:
        bool Set(TField::EField field, const TStringBuf& value) {
            return Uri_.FldMemSet(field, value);
        }

        template <size_t size>
        bool Set(TField::EField field, const char (&value)[size]) {
            return Uri_.FldMemSet(field, value);
        }

        void Clr(TField::EField field) {
            Uri_.FldClr(field);
        }
    };

    const char* LinkTypeToString(const TUri::TLinkType& t);

}

Y_DECLARE_OUT_SPEC(inline, NUri::TUri, out, url) {
    url.Print(out);
}

Y_DECLARE_OUT_SPEC(inline, NUri::TUri::TLinkType, out, t) {
    out << NUri::LinkTypeToString(t);
}