// Copyright 2010 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
//

/* Package multipart implements MIME multipart parsing, as defined in RFC 2046. The implementation is sufficient for HTTP (RFC 2388) and the multipart bodies generated by popular browsers. # Limits To protect against malicious inputs, this package sets limits on the size of the MIME data it processes. Reader.NextPart and Reader.NextRawPart limit the number of headers in a part to 10000 and Reader.ReadForm limits the total number of headers in all FileHeaders to 10000. These limits may be adjusted with the GODEBUG=multipartmaxheaders=<values> setting. Reader.ReadForm further limits the number of parts in a form to 1000. This limit may be adjusted with the GODEBUG=multipartmaxparts=<value> setting. */
package multipart import ( ) var emptyParams = make(map[string]string) // This constant needs to be at least 76 for this package to work correctly. // This is because \r\n--separator_of_len_70- would fill the buffer and it // wouldn't be safe to consume a single byte from it. const peekBufferSize = 4096 // A Part represents a single part in a multipart body. type Part struct { // The headers of the body, if any, with the keys canonicalized // in the same fashion that the Go http.Request headers are. // For example, "foo-bar" changes case to "Foo-Bar" Header textproto.MIMEHeader mr *Reader disposition string dispositionParams map[string]string // r is either a reader directly reading from mr, or it's a // wrapper around such a reader, decoding the // Content-Transfer-Encoding r io.Reader n int // known data bytes waiting in mr.bufReader total int64 // total data bytes read already err error // error to return when n == 0 readErr error // read error observed from mr.bufReader } // FormName returns the name parameter if p has a Content-Disposition // of type "form-data". Otherwise it returns the empty string. func ( *Part) () string { // See https://tools.ietf.org/html/rfc2183 section 2 for EBNF // of Content-Disposition value format. if .dispositionParams == nil { .parseContentDisposition() } if .disposition != "form-data" { return "" } return .dispositionParams["name"] } // FileName returns the filename parameter of the Part's Content-Disposition // header. If not empty, the filename is passed through filepath.Base (which is // platform dependent) before being returned. func ( *Part) () string { if .dispositionParams == nil { .parseContentDisposition() } := .dispositionParams["filename"] if == "" { return "" } // RFC 7578, Section 4.2 requires that if a filename is provided, the // directory path information must not be used. return filepath.Base() } func ( *Part) () { := .Header.Get("Content-Disposition") var error .disposition, .dispositionParams, = mime.ParseMediaType() if != nil { .dispositionParams = emptyParams } } // NewReader creates a new multipart Reader reading from r using the // given MIME boundary. // // The boundary is usually obtained from the "boundary" parameter of // the message's "Content-Type" header. Use mime.ParseMediaType to // parse such headers. func ( io.Reader, string) *Reader { := []byte("\r\n--" + + "--") return &Reader{ bufReader: bufio.NewReaderSize(&stickyErrorReader{r: }, peekBufferSize), nl: [:2], nlDashBoundary: [:len()-2], dashBoundaryDash: [2:], dashBoundary: [2 : len()-2], } } // stickyErrorReader is an io.Reader which never calls Read on its // underlying Reader once an error has been seen. (the io.Reader // interface's contract promises nothing about the return values of // Read calls after an error, yet this package does do multiple Reads // after error) type stickyErrorReader struct { r io.Reader err error } func ( *stickyErrorReader) ( []byte) ( int, error) { if .err != nil { return 0, .err } , .err = .r.Read() return , .err } func ( *Reader, bool, , int64) (*Part, error) { := &Part{ Header: make(map[string][]string), mr: , } if := .populateHeaders(, ); != nil { return nil, } .r = partReader{} // rawPart is used to switch between Part.NextPart and Part.NextRawPart. if ! { const = "Content-Transfer-Encoding" if strings.EqualFold(.Header.Get(), "quoted-printable") { .Header.Del() .r = quotedprintable.NewReader(.r) } } return , nil } func ( *Part) (, int64) error { := textproto.NewReader(.mr.bufReader) , := readMIMEHeader(, , ) if == nil { .Header = } // TODO: Add a distinguishable error to net/textproto. if != nil && .Error() == "message too large" { = ErrMessageTooLarge } return } // Read reads the body of a part, after its headers and before the // next part (if any) begins. func ( *Part) ( []byte) ( int, error) { return .r.Read() } // partReader implements io.Reader by reading raw bytes directly from the // wrapped *Part, without doing any Transfer-Encoding decoding. type partReader struct { p *Part } func ( partReader) ( []byte) (int, error) { := .p := .mr.bufReader // Read into buffer until we identify some data to return, // or we find a reason to stop (boundary or read error). for .n == 0 && .err == nil { , := .Peek(.Buffered()) .n, .err = scanUntilBoundary(, .mr.dashBoundary, .mr.nlDashBoundary, .total, .readErr) if .n == 0 && .err == nil { // Force buffered I/O to read more into buffer. _, .readErr = .Peek(len() + 1) if .readErr == io.EOF { .readErr = io.ErrUnexpectedEOF } } } // Read out from "data to return" part of buffer. if .n == 0 { return 0, .err } := len() if > .n { = .n } , _ = .Read([:]) .total += int64() .n -= if .n == 0 { return , .err } return , nil } // scanUntilBoundary scans buf to identify how much of it can be safely // returned as part of the Part body. // dashBoundary is "--boundary". // nlDashBoundary is "\r\n--boundary" or "\n--boundary", depending on what mode we are in. // The comments below (and the name) assume "\n--boundary", but either is accepted. // total is the number of bytes read out so far. If total == 0, then a leading "--boundary" is recognized. // readErr is the read error, if any, that followed reading the bytes in buf. // scanUntilBoundary returns the number of data bytes from buf that can be // returned as part of the Part body and also the error to return (if any) // once those data bytes are done. func (, , []byte, int64, error) (int, error) { if == 0 { // At beginning of body, allow dashBoundary. if bytes.HasPrefix(, ) { switch matchAfterPrefix(, , ) { case -1: return len(), nil case 0: return 0, nil case +1: return 0, io.EOF } } if bytes.HasPrefix(, ) { return 0, } } // Search for "\n--boundary". if := bytes.Index(, ); >= 0 { switch matchAfterPrefix([:], , ) { case -1: return + len(), nil case 0: return , nil case +1: return , io.EOF } } if bytes.HasPrefix(, ) { return 0, } // Otherwise, anything up to the final \n is not part of the boundary // and so must be part of the body. // Also if the section from the final \n onward is not a prefix of the boundary, // it too must be part of the body. := bytes.LastIndexByte(, [0]) if >= 0 && bytes.HasPrefix(, [:]) { return , nil } return len(), } // matchAfterPrefix checks whether buf should be considered to match the boundary. // The prefix is "--boundary" or "\r\n--boundary" or "\n--boundary", // and the caller has verified already that bytes.HasPrefix(buf, prefix) is true. // // matchAfterPrefix returns +1 if the buffer does match the boundary, // meaning the prefix is followed by a double dash, space, tab, cr, nl, // or end of input. // It returns -1 if the buffer definitely does NOT match the boundary, // meaning the prefix is followed by some other character. // For example, "--foobar" does not match "--foo". // It returns 0 more input needs to be read to make the decision, // meaning that len(buf) == len(prefix) and readErr == nil. func (, []byte, error) int { if len() == len() { if != nil { return +1 } return 0 } := [len()] if == ' ' || == '\t' || == '\r' || == '\n' { return +1 } // Try to detect boundaryDash if == '-' { if len() == len()+1 { if != nil { // Prefix + "-" does not match return -1 } return 0 } if [len()+1] == '-' { return +1 } } return -1 } func ( *Part) () error { io.Copy(io.Discard, ) return nil } // Reader is an iterator over parts in a MIME multipart body. // Reader's underlying parser consumes its input as needed. Seeking // isn't supported. type Reader struct { bufReader *bufio.Reader tempDir string // used in tests currentPart *Part partsRead int nl []byte // "\r\n" or "\n" (set after seeing first boundary line) nlDashBoundary []byte // nl + "--boundary" dashBoundaryDash []byte // "--boundary--" dashBoundary []byte // "--boundary" } // maxMIMEHeaderSize is the maximum size of a MIME header we will parse, // including header keys, values, and map overhead. const maxMIMEHeaderSize = 10 << 20 // multipartMaxHeaders is the maximum number of header entries NextPart will return, // as well as the maximum combined total of header entries Reader.ReadForm will return // in FileHeaders. var multipartMaxHeaders = godebug.New("multipartmaxheaders") func () int64 { if := multipartMaxHeaders.Value(); != "" { if , := strconv.ParseInt(, 10, 64); == nil && >= 0 { multipartMaxHeaders.IncNonDefault() return } } return 10000 } // NextPart returns the next part in the multipart or an error. // When there are no more parts, the error io.EOF is returned. // // As a special case, if the "Content-Transfer-Encoding" header // has a value of "quoted-printable", that header is instead // hidden and the body is transparently decoded during Read calls. func ( *Reader) () (*Part, error) { return .nextPart(false, maxMIMEHeaderSize, maxMIMEHeaders()) } // NextRawPart returns the next part in the multipart or an error. // When there are no more parts, the error io.EOF is returned. // // Unlike NextPart, it does not have special handling for // "Content-Transfer-Encoding: quoted-printable". func ( *Reader) () (*Part, error) { return .nextPart(true, maxMIMEHeaderSize, maxMIMEHeaders()) } func ( *Reader) ( bool, , int64) (*Part, error) { if .currentPart != nil { .currentPart.Close() } if string(.dashBoundary) == "--" { return nil, fmt.Errorf("multipart: boundary is empty") } := false for { , := .bufReader.ReadSlice('\n') if == io.EOF && .isFinalBoundary() { // If the buffer ends in "--boundary--" without the // trailing "\r\n", ReadSlice will return an error // (since it's missing the '\n'), but this is a valid // multipart EOF so we need to return io.EOF instead of // a fmt-wrapped one. return nil, io.EOF } if != nil { return nil, fmt.Errorf("multipart: NextPart: %w", ) } if .isBoundaryDelimiterLine() { .partsRead++ , := newPart(, , , ) if != nil { return nil, } .currentPart = return , nil } if .isFinalBoundary() { // Expected EOF return nil, io.EOF } if { return nil, fmt.Errorf("multipart: expecting a new Part; got line %q", string()) } if .partsRead == 0 { // skip line continue } // Consume the "\n" or "\r\n" separator between the // body of the previous part and the boundary line we // now expect will follow. (either a new part or the // end boundary) if bytes.Equal(, .nl) { = true continue } return nil, fmt.Errorf("multipart: unexpected line in Next(): %q", ) } } // isFinalBoundary reports whether line is the final boundary line // indicating that all parts are over. // It matches `^--boundary--[ \t]*(\r\n)?$` func ( *Reader) ( []byte) bool { if !bytes.HasPrefix(, .dashBoundaryDash) { return false } := [len(.dashBoundaryDash):] = skipLWSPChar() return len() == 0 || bytes.Equal(, .nl) } func ( *Reader) ( []byte) ( bool) { // https://tools.ietf.org/html/rfc2046#section-5.1 // The boundary delimiter line is then defined as a line // consisting entirely of two hyphen characters ("-", // decimal value 45) followed by the boundary parameter // value from the Content-Type header field, optional linear // whitespace, and a terminating CRLF. if !bytes.HasPrefix(, .dashBoundary) { return false } := [len(.dashBoundary):] = skipLWSPChar() // On the first part, see our lines are ending in \n instead of \r\n // and switch into that mode if so. This is a violation of the spec, // but occurs in practice. if .partsRead == 0 && len() == 1 && [0] == '\n' { .nl = .nl[1:] .nlDashBoundary = .nlDashBoundary[1:] } return bytes.Equal(, .nl) } // skipLWSPChar returns b with leading spaces and tabs removed. // RFC 822 defines: // // LWSP-char = SPACE / HTAB func ( []byte) []byte { for len() > 0 && ([0] == ' ' || [0] == '\t') { = [1:] } return }