Source file src/net/http/internal/chunked.go

     1  // Copyright 2009 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  // The wire protocol for HTTP's "chunked" Transfer-Encoding.
     6  
     7  // Package internal contains HTTP internals shared by net/http and
     8  // net/http/httputil.
     9  package internal
    10  
    11  import (
    12  	"bufio"
    13  	"bytes"
    14  	"errors"
    15  	"fmt"
    16  	"io"
    17  )
    18  
    19  const maxLineLength = 4096 // assumed <= bufio.defaultBufSize
    20  
    21  var ErrLineTooLong = errors.New("header line too long")
    22  
    23  // NewChunkedReader returns a new chunkedReader that translates the data read from r
    24  // out of HTTP "chunked" format before returning it.
    25  // The chunkedReader returns [io.EOF] when the final 0-length chunk is read.
    26  //
    27  // NewChunkedReader is not needed by normal applications. The http package
    28  // automatically decodes chunking when reading response bodies.
    29  func NewChunkedReader(r io.Reader) io.Reader {
    30  	br, ok := r.(*bufio.Reader)
    31  	if !ok {
    32  		br = bufio.NewReader(r)
    33  	}
    34  	return &chunkedReader{r: br}
    35  }
    36  
    37  type chunkedReader struct {
    38  	r        *bufio.Reader
    39  	n        uint64 // unread bytes in chunk
    40  	err      error
    41  	buf      [2]byte
    42  	checkEnd bool  // whether need to check for \r\n chunk footer
    43  	excess   int64 // "excessive" chunk overhead, for malicious sender detection
    44  }
    45  
    46  func (cr *chunkedReader) beginChunk() {
    47  	// chunk-size CRLF
    48  	var line []byte
    49  	line, cr.err = readChunkLine(cr.r)
    50  	if cr.err != nil {
    51  		return
    52  	}
    53  	cr.excess += int64(len(line)) + 2 // header, plus \r\n after the chunk data
    54  	line = trimTrailingWhitespace(line)
    55  	line, cr.err = removeChunkExtension(line)
    56  	if cr.err != nil {
    57  		return
    58  	}
    59  	cr.n, cr.err = parseHexUint(line)
    60  	if cr.err != nil {
    61  		return
    62  	}
    63  	// A sender who sends one byte per chunk will send 5 bytes of overhead
    64  	// for every byte of data. ("1\r\nX\r\n" to send "X".)
    65  	// We want to allow this, since streaming a byte at a time can be legitimate.
    66  	//
    67  	// A sender can use chunk extensions to add arbitrary amounts of additional
    68  	// data per byte read. ("1;very long extension\r\nX\r\n" to send "X".)
    69  	// We don't want to disallow extensions (although we discard them),
    70  	// but we also don't want to allow a sender to reduce the signal/noise ratio
    71  	// arbitrarily.
    72  	//
    73  	// We track the amount of excess overhead read,
    74  	// and produce an error if it grows too large.
    75  	//
    76  	// Currently, we say that we're willing to accept 16 bytes of overhead per chunk,
    77  	// plus twice the amount of real data in the chunk.
    78  	cr.excess -= 16 + (2 * int64(cr.n))
    79  	cr.excess = max(cr.excess, 0)
    80  	if cr.excess > 16*1024 {
    81  		cr.err = errors.New("chunked encoding contains too much non-data")
    82  	}
    83  	if cr.n == 0 {
    84  		cr.err = io.EOF
    85  	}
    86  }
    87  
    88  func (cr *chunkedReader) chunkHeaderAvailable() bool {
    89  	n := cr.r.Buffered()
    90  	if n > 0 {
    91  		peek, _ := cr.r.Peek(n)
    92  		return bytes.IndexByte(peek, '\n') >= 0
    93  	}
    94  	return false
    95  }
    96  
    97  func (cr *chunkedReader) Read(b []uint8) (n int, err error) {
    98  	for cr.err == nil {
    99  		if cr.checkEnd {
   100  			if n > 0 && cr.r.Buffered() < 2 {
   101  				// We have some data. Return early (per the io.Reader
   102  				// contract) instead of potentially blocking while
   103  				// reading more.
   104  				break
   105  			}
   106  			if _, cr.err = io.ReadFull(cr.r, cr.buf[:2]); cr.err == nil {
   107  				if string(cr.buf[:]) != "\r\n" {
   108  					cr.err = errors.New("malformed chunked encoding")
   109  					break
   110  				}
   111  			} else {
   112  				if cr.err == io.EOF {
   113  					cr.err = io.ErrUnexpectedEOF
   114  				}
   115  				break
   116  			}
   117  			cr.checkEnd = false
   118  		}
   119  		if cr.n == 0 {
   120  			if n > 0 && !cr.chunkHeaderAvailable() {
   121  				// We've read enough. Don't potentially block
   122  				// reading a new chunk header.
   123  				break
   124  			}
   125  			cr.beginChunk()
   126  			continue
   127  		}
   128  		if len(b) == 0 {
   129  			break
   130  		}
   131  		rbuf := b
   132  		if uint64(len(rbuf)) > cr.n {
   133  			rbuf = rbuf[:cr.n]
   134  		}
   135  		var n0 int
   136  		n0, cr.err = cr.r.Read(rbuf)
   137  		n += n0
   138  		b = b[n0:]
   139  		cr.n -= uint64(n0)
   140  		// If we're at the end of a chunk, read the next two
   141  		// bytes to verify they are "\r\n".
   142  		if cr.n == 0 && cr.err == nil {
   143  			cr.checkEnd = true
   144  		} else if cr.err == io.EOF {
   145  			cr.err = io.ErrUnexpectedEOF
   146  		}
   147  	}
   148  	return n, cr.err
   149  }
   150  
   151  // Read a line of bytes (up to \n) from b.
   152  // Give up if the line exceeds maxLineLength.
   153  // The returned bytes are owned by the bufio.Reader
   154  // so they are only valid until the next bufio read.
   155  func readChunkLine(b *bufio.Reader) ([]byte, error) {
   156  	p, err := b.ReadSlice('\n')
   157  	if err != nil {
   158  		// We always know when EOF is coming.
   159  		// If the caller asked for a line, there should be a line.
   160  		if err == io.EOF {
   161  			err = io.ErrUnexpectedEOF
   162  		} else if err == bufio.ErrBufferFull {
   163  			err = ErrLineTooLong
   164  		}
   165  		return nil, err
   166  	}
   167  
   168  	// RFC 9112 permits parsers to accept a bare \n as a line ending in headers,
   169  	// but not in chunked encoding lines. See https://www.rfc-editor.org/errata/eid7633,
   170  	// which explicitly rejects a clarification permitting \n as a chunk terminator.
   171  	//
   172  	// Verify that the line ends in a CRLF, and that no CRs appear before the end.
   173  	if idx := bytes.IndexByte(p, '\r'); idx == -1 {
   174  		return nil, errors.New("chunked line ends with bare LF")
   175  	} else if idx != len(p)-2 {
   176  		return nil, errors.New("invalid CR in chunked line")
   177  	}
   178  	p = p[:len(p)-2] // trim CRLF
   179  
   180  	if len(p) >= maxLineLength {
   181  		return nil, ErrLineTooLong
   182  	}
   183  	return p, nil
   184  }
   185  
   186  func trimTrailingWhitespace(b []byte) []byte {
   187  	for len(b) > 0 && isOWS(b[len(b)-1]) {
   188  		b = b[:len(b)-1]
   189  	}
   190  	return b
   191  }
   192  
   193  func isOWS(b byte) bool {
   194  	return b == ' ' || b == '\t'
   195  }
   196  
   197  var semi = []byte(";")
   198  
   199  // removeChunkExtension removes any chunk-extension from p.
   200  // For example,
   201  //
   202  //	"0" => "0"
   203  //	"0;token" => "0"
   204  //	"0;token=val" => "0"
   205  //	`0;token="quoted string"` => "0"
   206  func removeChunkExtension(p []byte) ([]byte, error) {
   207  	p, _, _ = bytes.Cut(p, semi)
   208  	// TODO: care about exact syntax of chunk extensions? We're
   209  	// ignoring and stripping them anyway. For now just never
   210  	// return an error.
   211  	return p, nil
   212  }
   213  
   214  // NewChunkedWriter returns a new chunkedWriter that translates writes into HTTP
   215  // "chunked" format before writing them to w. Closing the returned chunkedWriter
   216  // sends the final 0-length chunk that marks the end of the stream but does
   217  // not send the final CRLF that appears after trailers; trailers and the last
   218  // CRLF must be written separately.
   219  //
   220  // NewChunkedWriter is not needed by normal applications. The http
   221  // package adds chunking automatically if handlers don't set a
   222  // Content-Length header. Using newChunkedWriter inside a handler
   223  // would result in double chunking or chunking with a Content-Length
   224  // length, both of which are wrong.
   225  func NewChunkedWriter(w io.Writer) io.WriteCloser {
   226  	return &chunkedWriter{w}
   227  }
   228  
   229  // Writing to chunkedWriter translates to writing in HTTP chunked Transfer
   230  // Encoding wire format to the underlying Wire chunkedWriter.
   231  type chunkedWriter struct {
   232  	Wire io.Writer
   233  }
   234  
   235  // Write the contents of data as one chunk to Wire.
   236  // NOTE: Note that the corresponding chunk-writing procedure in Conn.Write has
   237  // a bug since it does not check for success of [io.WriteString]
   238  func (cw *chunkedWriter) Write(data []byte) (n int, err error) {
   239  
   240  	// Don't send 0-length data. It looks like EOF for chunked encoding.
   241  	if len(data) == 0 {
   242  		return 0, nil
   243  	}
   244  
   245  	if _, err = fmt.Fprintf(cw.Wire, "%x\r\n", len(data)); err != nil {
   246  		return 0, err
   247  	}
   248  	if n, err = cw.Wire.Write(data); err != nil {
   249  		return
   250  	}
   251  	if n != len(data) {
   252  		err = io.ErrShortWrite
   253  		return
   254  	}
   255  	if _, err = io.WriteString(cw.Wire, "\r\n"); err != nil {
   256  		return
   257  	}
   258  	if bw, ok := cw.Wire.(*FlushAfterChunkWriter); ok {
   259  		err = bw.Flush()
   260  	}
   261  	return
   262  }
   263  
   264  func (cw *chunkedWriter) Close() error {
   265  	_, err := io.WriteString(cw.Wire, "0\r\n")
   266  	return err
   267  }
   268  
   269  // FlushAfterChunkWriter signals from the caller of [NewChunkedWriter]
   270  // that each chunk should be followed by a flush. It is used by the
   271  // [net/http.Transport] code to keep the buffering behavior for headers and
   272  // trailers, but flush out chunks aggressively in the middle for
   273  // request bodies which may be generated slowly. See Issue 6574.
   274  type FlushAfterChunkWriter struct {
   275  	*bufio.Writer
   276  }
   277  
   278  func parseHexUint(v []byte) (n uint64, err error) {
   279  	if len(v) == 0 {
   280  		return 0, errors.New("empty hex number for chunk length")
   281  	}
   282  	for i, b := range v {
   283  		switch {
   284  		case '0' <= b && b <= '9':
   285  			b = b - '0'
   286  		case 'a' <= b && b <= 'f':
   287  			b = b - 'a' + 10
   288  		case 'A' <= b && b <= 'F':
   289  			b = b - 'A' + 10
   290  		default:
   291  			return 0, errors.New("invalid byte in chunk length")
   292  		}
   293  		if i == 16 {
   294  			return 0, errors.New("http chunk length too large")
   295  		}
   296  		n <<= 4
   297  		n |= uint64(b)
   298  	}
   299  	return
   300  }
   301  

View as plain text