diff options
author | hiddenpath <hiddenpath@yandex-team.com> | 2024-04-02 23:50:23 +0300 |
---|---|---|
committer | hiddenpath <hiddenpath@yandex-team.com> | 2024-04-03 00:02:31 +0300 |
commit | 8923c6d2c438e0aeed2e06b8b0275e1864eeee33 (patch) | |
tree | 6b5e476699fc0be5091cb650654ef5f602c8afff /contrib/go/_std_1.22/src/encoding/csv | |
parent | d18afd09df2a08cd023012593b46109b77713a6c (diff) | |
download | ydb-8923c6d2c438e0aeed2e06b8b0275e1864eeee33.tar.gz |
Update golang to 1.22.1
2967d19c907adf59101a1f47b4208bd0b04a6186
Diffstat (limited to 'contrib/go/_std_1.22/src/encoding/csv')
-rw-r--r-- | contrib/go/_std_1.22/src/encoding/csv/reader.go | 466 | ||||
-rw-r--r-- | contrib/go/_std_1.22/src/encoding/csv/writer.go | 184 | ||||
-rw-r--r-- | contrib/go/_std_1.22/src/encoding/csv/ya.make | 8 |
3 files changed, 658 insertions, 0 deletions
diff --git a/contrib/go/_std_1.22/src/encoding/csv/reader.go b/contrib/go/_std_1.22/src/encoding/csv/reader.go new file mode 100644 index 0000000000..d9cab86572 --- /dev/null +++ b/contrib/go/_std_1.22/src/encoding/csv/reader.go @@ -0,0 +1,466 @@ +// Copyright 2011 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// Package csv reads and writes comma-separated values (CSV) files. +// There are many kinds of CSV files; this package supports the format +// described in RFC 4180. +// +// A csv file contains zero or more records of one or more fields per record. +// Each record is separated by the newline character. The final record may +// optionally be followed by a newline character. +// +// field1,field2,field3 +// +// White space is considered part of a field. +// +// Carriage returns before newline characters are silently removed. +// +// Blank lines are ignored. A line with only whitespace characters (excluding +// the ending newline character) is not considered a blank line. +// +// Fields which start and stop with the quote character " are called +// quoted-fields. The beginning and ending quote are not part of the +// field. +// +// The source: +// +// normal string,"quoted-field" +// +// results in the fields +// +// {`normal string`, `quoted-field`} +// +// Within a quoted-field a quote character followed by a second quote +// character is considered a single quote. +// +// "the ""word"" is true","a ""quoted-field""" +// +// results in +// +// {`the "word" is true`, `a "quoted-field"`} +// +// Newlines and commas may be included in a quoted-field +// +// "Multi-line +// field","comma is ," +// +// results in +// +// {`Multi-line +// field`, `comma is ,`} +package csv + +import ( + "bufio" + "bytes" + "errors" + "fmt" + "io" + "unicode" + "unicode/utf8" +) + +// A ParseError is returned for parsing errors. +// Line and column numbers are 1-indexed. +type ParseError struct { + StartLine int // Line where the record starts + Line int // Line where the error occurred + Column int // Column (1-based byte index) where the error occurred + Err error // The actual error +} + +func (e *ParseError) Error() string { + if e.Err == ErrFieldCount { + return fmt.Sprintf("record on line %d: %v", e.Line, e.Err) + } + if e.StartLine != e.Line { + return fmt.Sprintf("record on line %d; parse error on line %d, column %d: %v", e.StartLine, e.Line, e.Column, e.Err) + } + return fmt.Sprintf("parse error on line %d, column %d: %v", e.Line, e.Column, e.Err) +} + +func (e *ParseError) Unwrap() error { return e.Err } + +// These are the errors that can be returned in [ParseError.Err]. +var ( + ErrBareQuote = errors.New("bare \" in non-quoted-field") + ErrQuote = errors.New("extraneous or missing \" in quoted-field") + ErrFieldCount = errors.New("wrong number of fields") + + // Deprecated: ErrTrailingComma is no longer used. + ErrTrailingComma = errors.New("extra delimiter at end of line") +) + +var errInvalidDelim = errors.New("csv: invalid field or comment delimiter") + +func validDelim(r rune) bool { + return r != 0 && r != '"' && r != '\r' && r != '\n' && utf8.ValidRune(r) && r != utf8.RuneError +} + +// A Reader reads records from a CSV-encoded file. +// +// As returned by [NewReader], a Reader expects input conforming to RFC 4180. +// The exported fields can be changed to customize the details before the +// first call to [Reader.Read] or [Reader.ReadAll]. +// +// The Reader converts all \r\n sequences in its input to plain \n, +// including in multiline field values, so that the returned data does +// not depend on which line-ending convention an input file uses. +type Reader struct { + // Comma is the field delimiter. + // It is set to comma (',') by NewReader. + // Comma must be a valid rune and must not be \r, \n, + // or the Unicode replacement character (0xFFFD). + Comma rune + + // Comment, if not 0, is the comment character. Lines beginning with the + // Comment character without preceding whitespace are ignored. + // With leading whitespace the Comment character becomes part of the + // field, even if TrimLeadingSpace is true. + // Comment must be a valid rune and must not be \r, \n, + // or the Unicode replacement character (0xFFFD). + // It must also not be equal to Comma. + Comment rune + + // FieldsPerRecord is the number of expected fields per record. + // If FieldsPerRecord is positive, Read requires each record to + // have the given number of fields. If FieldsPerRecord is 0, Read sets it to + // the number of fields in the first record, so that future records must + // have the same field count. If FieldsPerRecord is negative, no check is + // made and records may have a variable number of fields. + FieldsPerRecord int + + // If LazyQuotes is true, a quote may appear in an unquoted field and a + // non-doubled quote may appear in a quoted field. + LazyQuotes bool + + // If TrimLeadingSpace is true, leading white space in a field is ignored. + // This is done even if the field delimiter, Comma, is white space. + TrimLeadingSpace bool + + // ReuseRecord controls whether calls to Read may return a slice sharing + // the backing array of the previous call's returned slice for performance. + // By default, each call to Read returns newly allocated memory owned by the caller. + ReuseRecord bool + + // Deprecated: TrailingComma is no longer used. + TrailingComma bool + + r *bufio.Reader + + // numLine is the current line being read in the CSV file. + numLine int + + // offset is the input stream byte offset of the current reader position. + offset int64 + + // rawBuffer is a line buffer only used by the readLine method. + rawBuffer []byte + + // recordBuffer holds the unescaped fields, one after another. + // The fields can be accessed by using the indexes in fieldIndexes. + // E.g., For the row `a,"b","c""d",e`, recordBuffer will contain `abc"de` + // and fieldIndexes will contain the indexes [1, 2, 5, 6]. + recordBuffer []byte + + // fieldIndexes is an index of fields inside recordBuffer. + // The i'th field ends at offset fieldIndexes[i] in recordBuffer. + fieldIndexes []int + + // fieldPositions is an index of field positions for the + // last record returned by Read. + fieldPositions []position + + // lastRecord is a record cache and only used when ReuseRecord == true. + lastRecord []string +} + +// NewReader returns a new Reader that reads from r. +func NewReader(r io.Reader) *Reader { + return &Reader{ + Comma: ',', + r: bufio.NewReader(r), + } +} + +// Read reads one record (a slice of fields) from r. +// If the record has an unexpected number of fields, +// Read returns the record along with the error [ErrFieldCount]. +// If the record contains a field that cannot be parsed, +// Read returns a partial record along with the parse error. +// The partial record contains all fields read before the error. +// If there is no data left to be read, Read returns nil, [io.EOF]. +// If [Reader.ReuseRecord] is true, the returned slice may be shared +// between multiple calls to Read. +func (r *Reader) Read() (record []string, err error) { + if r.ReuseRecord { + record, err = r.readRecord(r.lastRecord) + r.lastRecord = record + } else { + record, err = r.readRecord(nil) + } + return record, err +} + +// FieldPos returns the line and column corresponding to +// the start of the field with the given index in the slice most recently +// returned by [Reader.Read]. Numbering of lines and columns starts at 1; +// columns are counted in bytes, not runes. +// +// If this is called with an out-of-bounds index, it panics. +func (r *Reader) FieldPos(field int) (line, column int) { + if field < 0 || field >= len(r.fieldPositions) { + panic("out of range index passed to FieldPos") + } + p := &r.fieldPositions[field] + return p.line, p.col +} + +// InputOffset returns the input stream byte offset of the current reader +// position. The offset gives the location of the end of the most recently +// read row and the beginning of the next row. +func (r *Reader) InputOffset() int64 { + return r.offset +} + +// pos holds the position of a field in the current line. +type position struct { + line, col int +} + +// ReadAll reads all the remaining records from r. +// Each record is a slice of fields. +// A successful call returns err == nil, not err == [io.EOF]. Because ReadAll is +// defined to read until EOF, it does not treat end of file as an error to be +// reported. +func (r *Reader) ReadAll() (records [][]string, err error) { + for { + record, err := r.readRecord(nil) + if err == io.EOF { + return records, nil + } + if err != nil { + return nil, err + } + records = append(records, record) + } +} + +// readLine reads the next line (with the trailing endline). +// If EOF is hit without a trailing endline, it will be omitted. +// If some bytes were read, then the error is never [io.EOF]. +// The result is only valid until the next call to readLine. +func (r *Reader) readLine() ([]byte, error) { + line, err := r.r.ReadSlice('\n') + if err == bufio.ErrBufferFull { + r.rawBuffer = append(r.rawBuffer[:0], line...) + for err == bufio.ErrBufferFull { + line, err = r.r.ReadSlice('\n') + r.rawBuffer = append(r.rawBuffer, line...) + } + line = r.rawBuffer + } + readSize := len(line) + if readSize > 0 && err == io.EOF { + err = nil + // For backwards compatibility, drop trailing \r before EOF. + if line[readSize-1] == '\r' { + line = line[:readSize-1] + } + } + r.numLine++ + r.offset += int64(readSize) + // Normalize \r\n to \n on all input lines. + if n := len(line); n >= 2 && line[n-2] == '\r' && line[n-1] == '\n' { + line[n-2] = '\n' + line = line[:n-1] + } + return line, err +} + +// lengthNL reports the number of bytes for the trailing \n. +func lengthNL(b []byte) int { + if len(b) > 0 && b[len(b)-1] == '\n' { + return 1 + } + return 0 +} + +// nextRune returns the next rune in b or utf8.RuneError. +func nextRune(b []byte) rune { + r, _ := utf8.DecodeRune(b) + return r +} + +func (r *Reader) readRecord(dst []string) ([]string, error) { + if r.Comma == r.Comment || !validDelim(r.Comma) || (r.Comment != 0 && !validDelim(r.Comment)) { + return nil, errInvalidDelim + } + + // Read line (automatically skipping past empty lines and any comments). + var line []byte + var errRead error + for errRead == nil { + line, errRead = r.readLine() + if r.Comment != 0 && nextRune(line) == r.Comment { + line = nil + continue // Skip comment lines + } + if errRead == nil && len(line) == lengthNL(line) { + line = nil + continue // Skip empty lines + } + break + } + if errRead == io.EOF { + return nil, errRead + } + + // Parse each field in the record. + var err error + const quoteLen = len(`"`) + commaLen := utf8.RuneLen(r.Comma) + recLine := r.numLine // Starting line for record + r.recordBuffer = r.recordBuffer[:0] + r.fieldIndexes = r.fieldIndexes[:0] + r.fieldPositions = r.fieldPositions[:0] + pos := position{line: r.numLine, col: 1} +parseField: + for { + if r.TrimLeadingSpace { + i := bytes.IndexFunc(line, func(r rune) bool { + return !unicode.IsSpace(r) + }) + if i < 0 { + i = len(line) + pos.col -= lengthNL(line) + } + line = line[i:] + pos.col += i + } + if len(line) == 0 || line[0] != '"' { + // Non-quoted string field + i := bytes.IndexRune(line, r.Comma) + field := line + if i >= 0 { + field = field[:i] + } else { + field = field[:len(field)-lengthNL(field)] + } + // Check to make sure a quote does not appear in field. + if !r.LazyQuotes { + if j := bytes.IndexByte(field, '"'); j >= 0 { + col := pos.col + j + err = &ParseError{StartLine: recLine, Line: r.numLine, Column: col, Err: ErrBareQuote} + break parseField + } + } + r.recordBuffer = append(r.recordBuffer, field...) + r.fieldIndexes = append(r.fieldIndexes, len(r.recordBuffer)) + r.fieldPositions = append(r.fieldPositions, pos) + if i >= 0 { + line = line[i+commaLen:] + pos.col += i + commaLen + continue parseField + } + break parseField + } else { + // Quoted string field + fieldPos := pos + line = line[quoteLen:] + pos.col += quoteLen + for { + i := bytes.IndexByte(line, '"') + if i >= 0 { + // Hit next quote. + r.recordBuffer = append(r.recordBuffer, line[:i]...) + line = line[i+quoteLen:] + pos.col += i + quoteLen + switch rn := nextRune(line); { + case rn == '"': + // `""` sequence (append quote). + r.recordBuffer = append(r.recordBuffer, '"') + line = line[quoteLen:] + pos.col += quoteLen + case rn == r.Comma: + // `",` sequence (end of field). + line = line[commaLen:] + pos.col += commaLen + r.fieldIndexes = append(r.fieldIndexes, len(r.recordBuffer)) + r.fieldPositions = append(r.fieldPositions, fieldPos) + continue parseField + case lengthNL(line) == len(line): + // `"\n` sequence (end of line). + r.fieldIndexes = append(r.fieldIndexes, len(r.recordBuffer)) + r.fieldPositions = append(r.fieldPositions, fieldPos) + break parseField + case r.LazyQuotes: + // `"` sequence (bare quote). + r.recordBuffer = append(r.recordBuffer, '"') + default: + // `"*` sequence (invalid non-escaped quote). + err = &ParseError{StartLine: recLine, Line: r.numLine, Column: pos.col - quoteLen, Err: ErrQuote} + break parseField + } + } else if len(line) > 0 { + // Hit end of line (copy all data so far). + r.recordBuffer = append(r.recordBuffer, line...) + if errRead != nil { + break parseField + } + pos.col += len(line) + line, errRead = r.readLine() + if len(line) > 0 { + pos.line++ + pos.col = 1 + } + if errRead == io.EOF { + errRead = nil + } + } else { + // Abrupt end of file (EOF or error). + if !r.LazyQuotes && errRead == nil { + err = &ParseError{StartLine: recLine, Line: pos.line, Column: pos.col, Err: ErrQuote} + break parseField + } + r.fieldIndexes = append(r.fieldIndexes, len(r.recordBuffer)) + r.fieldPositions = append(r.fieldPositions, fieldPos) + break parseField + } + } + } + } + if err == nil { + err = errRead + } + + // Create a single string and create slices out of it. + // This pins the memory of the fields together, but allocates once. + str := string(r.recordBuffer) // Convert to string once to batch allocations + dst = dst[:0] + if cap(dst) < len(r.fieldIndexes) { + dst = make([]string, len(r.fieldIndexes)) + } + dst = dst[:len(r.fieldIndexes)] + var preIdx int + for i, idx := range r.fieldIndexes { + dst[i] = str[preIdx:idx] + preIdx = idx + } + + // Check or update the expected fields per record. + if r.FieldsPerRecord > 0 { + if len(dst) != r.FieldsPerRecord && err == nil { + err = &ParseError{ + StartLine: recLine, + Line: recLine, + Column: 1, + Err: ErrFieldCount, + } + } + } else if r.FieldsPerRecord == 0 { + r.FieldsPerRecord = len(dst) + } + return dst, err +} diff --git a/contrib/go/_std_1.22/src/encoding/csv/writer.go b/contrib/go/_std_1.22/src/encoding/csv/writer.go new file mode 100644 index 0000000000..ff3142f0bb --- /dev/null +++ b/contrib/go/_std_1.22/src/encoding/csv/writer.go @@ -0,0 +1,184 @@ +// Copyright 2011 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package csv + +import ( + "bufio" + "io" + "strings" + "unicode" + "unicode/utf8" +) + +// A Writer writes records using CSV encoding. +// +// As returned by [NewWriter], a Writer writes records terminated by a +// newline and uses ',' as the field delimiter. The exported fields can be +// changed to customize the details before +// the first call to [Writer.Write] or [Writer.WriteAll]. +// +// [Writer.Comma] is the field delimiter. +// +// If [Writer.UseCRLF] is true, +// the Writer ends each output line with \r\n instead of \n. +// +// The writes of individual records are buffered. +// After all data has been written, the client should call the +// [Writer.Flush] method to guarantee all data has been forwarded to +// the underlying [io.Writer]. Any errors that occurred should +// be checked by calling the [Writer.Error] method. +type Writer struct { + Comma rune // Field delimiter (set to ',' by NewWriter) + UseCRLF bool // True to use \r\n as the line terminator + w *bufio.Writer +} + +// NewWriter returns a new Writer that writes to w. +func NewWriter(w io.Writer) *Writer { + return &Writer{ + Comma: ',', + w: bufio.NewWriter(w), + } +} + +// Write writes a single CSV record to w along with any necessary quoting. +// A record is a slice of strings with each string being one field. +// Writes are buffered, so [Writer.Flush] must eventually be called to ensure +// that the record is written to the underlying [io.Writer]. +func (w *Writer) Write(record []string) error { + if !validDelim(w.Comma) { + return errInvalidDelim + } + + for n, field := range record { + if n > 0 { + if _, err := w.w.WriteRune(w.Comma); err != nil { + return err + } + } + + // If we don't have to have a quoted field then just + // write out the field and continue to the next field. + if !w.fieldNeedsQuotes(field) { + if _, err := w.w.WriteString(field); err != nil { + return err + } + continue + } + + if err := w.w.WriteByte('"'); err != nil { + return err + } + for len(field) > 0 { + // Search for special characters. + i := strings.IndexAny(field, "\"\r\n") + if i < 0 { + i = len(field) + } + + // Copy verbatim everything before the special character. + if _, err := w.w.WriteString(field[:i]); err != nil { + return err + } + field = field[i:] + + // Encode the special character. + if len(field) > 0 { + var err error + switch field[0] { + case '"': + _, err = w.w.WriteString(`""`) + case '\r': + if !w.UseCRLF { + err = w.w.WriteByte('\r') + } + case '\n': + if w.UseCRLF { + _, err = w.w.WriteString("\r\n") + } else { + err = w.w.WriteByte('\n') + } + } + field = field[1:] + if err != nil { + return err + } + } + } + if err := w.w.WriteByte('"'); err != nil { + return err + } + } + var err error + if w.UseCRLF { + _, err = w.w.WriteString("\r\n") + } else { + err = w.w.WriteByte('\n') + } + return err +} + +// Flush writes any buffered data to the underlying [io.Writer]. +// To check if an error occurred during Flush, call [Writer.Error]. +func (w *Writer) Flush() { + w.w.Flush() +} + +// Error reports any error that has occurred during +// a previous [Writer.Write] or [Writer.Flush]. +func (w *Writer) Error() error { + _, err := w.w.Write(nil) + return err +} + +// WriteAll writes multiple CSV records to w using [Writer.Write] and +// then calls [Writer.Flush], returning any error from the Flush. +func (w *Writer) WriteAll(records [][]string) error { + for _, record := range records { + err := w.Write(record) + if err != nil { + return err + } + } + return w.w.Flush() +} + +// fieldNeedsQuotes reports whether our field must be enclosed in quotes. +// Fields with a Comma, fields with a quote or newline, and +// fields which start with a space must be enclosed in quotes. +// We used to quote empty strings, but we do not anymore (as of Go 1.4). +// The two representations should be equivalent, but Postgres distinguishes +// quoted vs non-quoted empty string during database imports, and it has +// an option to force the quoted behavior for non-quoted CSV but it has +// no option to force the non-quoted behavior for quoted CSV, making +// CSV with quoted empty strings strictly less useful. +// Not quoting the empty string also makes this package match the behavior +// of Microsoft Excel and Google Drive. +// For Postgres, quote the data terminating string `\.`. +func (w *Writer) fieldNeedsQuotes(field string) bool { + if field == "" { + return false + } + + if field == `\.` { + return true + } + + if w.Comma < utf8.RuneSelf { + for i := 0; i < len(field); i++ { + c := field[i] + if c == '\n' || c == '\r' || c == '"' || c == byte(w.Comma) { + return true + } + } + } else { + if strings.ContainsRune(field, w.Comma) || strings.ContainsAny(field, "\"\r\n") { + return true + } + } + + r1, _ := utf8.DecodeRuneInString(field) + return unicode.IsSpace(r1) +} diff --git a/contrib/go/_std_1.22/src/encoding/csv/ya.make b/contrib/go/_std_1.22/src/encoding/csv/ya.make new file mode 100644 index 0000000000..b3fe9b7d97 --- /dev/null +++ b/contrib/go/_std_1.22/src/encoding/csv/ya.make @@ -0,0 +1,8 @@ +GO_LIBRARY() +IF (TRUE) + SRCS( + reader.go + writer.go + ) +ENDIF() +END() |