1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
|
package rulesfn
import (
"fmt"
"net"
"net/url"
"strings"
smithyhttp "github.com/aws/smithy-go/transport/http"
)
// IsValidHostLabel returns if the input is a single valid [RFC 1123] host
// label. If allowSubDomains is true, will allow validation to include nested
// host labels. Returns false if the input is not a valid host label. If errors
// occur they will be added to the provided [ErrorCollector].
//
// [RFC 1123]: https://www.ietf.org/rfc/rfc1123.txt
func IsValidHostLabel(input string, allowSubDomains bool) bool {
var labels []string
if allowSubDomains {
labels = strings.Split(input, ".")
} else {
labels = []string{input}
}
for _, label := range labels {
if !smithyhttp.ValidHostLabel(label) {
return false
}
}
return true
}
// ParseURL returns a [URL] if the provided string could be parsed. Returns nil
// if the string could not be parsed. Any parsing error will be added to the
// [ErrorCollector].
//
// If the input URL string contains an IP6 address with a zone index. The
// returned [builtin.URL.Authority] value will contain the percent escaped (%)
// zone index separator.
func ParseURL(input string) *URL {
u, err := url.Parse(input)
if err != nil {
return nil
}
if u.RawQuery != "" {
return nil
}
if u.Scheme != "http" && u.Scheme != "https" {
return nil
}
normalizedPath := u.Path
if !strings.HasPrefix(normalizedPath, "/") {
normalizedPath = "/" + normalizedPath
}
if !strings.HasSuffix(normalizedPath, "/") {
normalizedPath = normalizedPath + "/"
}
// IP6 hosts may have zone indexes that need to be escaped to be valid in a
// URI. The Go URL parser will unescape the `%25` into `%`. This needs to
// be reverted since the returned URL will be used in string builders.
authority := strings.ReplaceAll(u.Host, "%", "%25")
return &URL{
Scheme: u.Scheme,
Authority: authority,
Path: u.Path,
NormalizedPath: normalizedPath,
IsIp: net.ParseIP(hostnameWithoutZone(u)) != nil,
}
}
// URL provides the structure describing the parts of a parsed URL returned by
// [ParseURL].
type URL struct {
Scheme string // https://www.rfc-editor.org/rfc/rfc3986#section-3.1
Authority string // https://www.rfc-editor.org/rfc/rfc3986#section-3.2
Path string // https://www.rfc-editor.org/rfc/rfc3986#section-3.3
NormalizedPath string // https://www.rfc-editor.org/rfc/rfc3986#section-6.2.3
IsIp bool
}
// URIEncode returns an percent-encoded [RFC3986 section 2.1] version of the
// input string.
//
// [RFC3986 section 2.1]: https://www.rfc-editor.org/rfc/rfc3986#section-2.1
func URIEncode(input string) string {
var output strings.Builder
for _, c := range []byte(input) {
if validPercentEncodedChar(c) {
output.WriteByte(c)
continue
}
fmt.Fprintf(&output, "%%%X", c)
}
return output.String()
}
func validPercentEncodedChar(c byte) bool {
return (c >= 'a' && c <= 'z') ||
(c >= 'A' && c <= 'Z') ||
(c >= '0' && c <= '9') ||
c == '-' || c == '_' || c == '.' || c == '~'
}
// hostname implements u.Hostname() but strips the ipv6 zone ID (if present)
// such that net.ParseIP can still recognize IPv6 addresses with zone IDs.
//
// FUTURE(10/2023): netip.ParseAddr handles this natively but we can't take
// that package as a dependency yet due to our min go version (1.15, netip
// starts in 1.18). When we align with go runtime deprecation policy in
// 10/2023, we can remove this.
func hostnameWithoutZone(u *url.URL) string {
full := u.Hostname()
// this more or less mimics the internals of net/ (see unexported
// splitHostZone in that source) but throws the zone away because we don't
// need it
if i := strings.LastIndex(full, "%"); i > -1 {
return full[:i]
}
return full
}
|