Source file src/net/mail/message.go

     1  // Copyright 2011 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  /*
     6  Package mail implements parsing of mail messages.
     7  
     8  For the most part, this package follows the syntax as specified by RFC 5322 and
     9  extended by RFC 6532.
    10  Notable divergences:
    11    - Obsolete address formats are not parsed, including addresses with
    12      embedded route information.
    13    - The full range of spacing (the CFWS syntax element) is not supported,
    14      such as breaking addresses across lines.
    15    - No unicode normalization is performed.
    16    - A leading From line is permitted, as in mbox format (RFC 4155).
    17  */
    18  package mail
    19  
    20  import (
    21  	"bufio"
    22  	"errors"
    23  	"fmt"
    24  	"io"
    25  	"log"
    26  	"mime"
    27  	"net"
    28  	"net/textproto"
    29  	"strings"
    30  	"sync"
    31  	"time"
    32  	"unicode/utf8"
    33  )
    34  
    35  var debug = debugT(false)
    36  
    37  type debugT bool
    38  
    39  func (d debugT) Printf(format string, args ...any) {
    40  	if d {
    41  		log.Printf(format, args...)
    42  	}
    43  }
    44  
    45  // A Message represents a parsed mail message.
    46  type Message struct {
    47  	Header Header
    48  	Body   io.Reader
    49  }
    50  
    51  // ReadMessage reads a message from r.
    52  // The headers are parsed, and the body of the message will be available
    53  // for reading from msg.Body.
    54  func ReadMessage(r io.Reader) (msg *Message, err error) {
    55  	tp := textproto.NewReader(bufio.NewReader(r))
    56  
    57  	hdr, err := readHeader(tp)
    58  	if err != nil && (err != io.EOF || len(hdr) == 0) {
    59  		return nil, err
    60  	}
    61  
    62  	return &Message{
    63  		Header: Header(hdr),
    64  		Body:   tp.R,
    65  	}, nil
    66  }
    67  
    68  // readHeader reads the message headers from r.
    69  // This is like textproto.ReadMIMEHeader, but doesn't validate.
    70  // The fix for issue #53188 tightened up net/textproto to enforce
    71  // restrictions of RFC 7230.
    72  // This package implements RFC 5322, which does not have those restrictions.
    73  // This function copies the relevant code from net/textproto,
    74  // simplified for RFC 5322.
    75  func readHeader(r *textproto.Reader) (map[string][]string, error) {
    76  	m := make(map[string][]string)
    77  
    78  	// The first line cannot start with a leading space.
    79  	if buf, err := r.R.Peek(1); err == nil && (buf[0] == ' ' || buf[0] == '\t') {
    80  		line, err := r.ReadLine()
    81  		if err != nil {
    82  			return m, err
    83  		}
    84  		return m, errors.New("malformed initial line: " + line)
    85  	}
    86  
    87  	for {
    88  		kv, err := r.ReadContinuedLine()
    89  		if kv == "" {
    90  			return m, err
    91  		}
    92  
    93  		// Key ends at first colon.
    94  		k, v, ok := strings.Cut(kv, ":")
    95  		if !ok {
    96  			return m, errors.New("malformed header line: " + kv)
    97  		}
    98  		key := textproto.CanonicalMIMEHeaderKey(k)
    99  
   100  		// Permit empty key, because that is what we did in the past.
   101  		if key == "" {
   102  			continue
   103  		}
   104  
   105  		// Skip initial spaces in value.
   106  		value := strings.TrimLeft(v, " \t")
   107  
   108  		m[key] = append(m[key], value)
   109  
   110  		if err != nil {
   111  			return m, err
   112  		}
   113  	}
   114  }
   115  
   116  // Layouts suitable for passing to time.Parse.
   117  // These are tried in order.
   118  var dateLayouts = sync.OnceValue(func() []string {
   119  	// Generate layouts based on RFC 5322, section 3.3.
   120  
   121  	dows := [...]string{"", "Mon, "}   // day-of-week
   122  	days := [...]string{"2", "02"}     // day = 1*2DIGIT
   123  	years := [...]string{"2006", "06"} // year = 4*DIGIT / 2*DIGIT
   124  	seconds := [...]string{":05", ""}  // second
   125  	// "-0700 (MST)" is not in RFC 5322, but is common.
   126  	zones := [...]string{"-0700", "MST", "UT"} // zone = (("+" / "-") 4DIGIT) / "UT" / "GMT" / ...
   127  
   128  	total := len(dows) * len(days) * len(years) * len(seconds) * len(zones)
   129  	layouts := make([]string, 0, total)
   130  
   131  	for _, dow := range dows {
   132  		for _, day := range days {
   133  			for _, year := range years {
   134  				for _, second := range seconds {
   135  					for _, zone := range zones {
   136  						s := dow + day + " Jan " + year + " 15:04" + second + " " + zone
   137  						layouts = append(layouts, s)
   138  					}
   139  				}
   140  			}
   141  		}
   142  	}
   143  
   144  	return layouts
   145  })
   146  
   147  // ParseDate parses an RFC 5322 date string.
   148  func ParseDate(date string) (time.Time, error) {
   149  	// CR and LF must match and are tolerated anywhere in the date field.
   150  	date = strings.ReplaceAll(date, "\r\n", "")
   151  	if strings.Contains(date, "\r") {
   152  		return time.Time{}, errors.New("mail: header has a CR without LF")
   153  	}
   154  	// Re-using some addrParser methods which support obsolete text, i.e. non-printable ASCII
   155  	p := addrParser{date, nil}
   156  	p.skipSpace()
   157  
   158  	// RFC 5322: zone = (FWS ( "+" / "-" ) 4DIGIT) / obs-zone
   159  	// zone length is always 5 chars unless obsolete (obs-zone)
   160  	if ind := strings.IndexAny(p.s, "+-"); ind != -1 && len(p.s) >= ind+5 {
   161  		date = p.s[:ind+5]
   162  		p.s = p.s[ind+5:]
   163  	} else {
   164  		ind := strings.Index(p.s, "T")
   165  		if ind == 0 {
   166  			// In this case we have the following date formats:
   167  			// * Thu, 20 Nov 1997 09:55:06 MDT
   168  			// * Thu, 20 Nov 1997 09:55:06 MDT (MDT)
   169  			// * Thu, 20 Nov 1997 09:55:06 MDT (This comment)
   170  			ind = strings.Index(p.s[1:], "T")
   171  			if ind != -1 {
   172  				ind++
   173  			}
   174  		}
   175  
   176  		if ind != -1 && len(p.s) >= ind+5 {
   177  			// The last letter T of the obsolete time zone is checked when no standard time zone is found.
   178  			// If T is misplaced, the date to parse is garbage.
   179  			date = p.s[:ind+1]
   180  			p.s = p.s[ind+1:]
   181  		}
   182  	}
   183  	if !p.skipCFWS() {
   184  		return time.Time{}, errors.New("mail: misformatted parenthetical comment")
   185  	}
   186  	for _, layout := range dateLayouts() {
   187  		t, err := time.Parse(layout, date)
   188  		if err == nil {
   189  			return t, nil
   190  		}
   191  	}
   192  	return time.Time{}, errors.New("mail: header could not be parsed")
   193  }
   194  
   195  // A Header represents the key-value pairs in a mail message header.
   196  type Header map[string][]string
   197  
   198  // Get gets the first value associated with the given key.
   199  // It is case insensitive; CanonicalMIMEHeaderKey is used
   200  // to canonicalize the provided key.
   201  // If there are no values associated with the key, Get returns "".
   202  // To access multiple values of a key, or to use non-canonical keys,
   203  // access the map directly.
   204  func (h Header) Get(key string) string {
   205  	return textproto.MIMEHeader(h).Get(key)
   206  }
   207  
   208  var ErrHeaderNotPresent = errors.New("mail: header not in message")
   209  
   210  // Date parses the Date header field.
   211  func (h Header) Date() (time.Time, error) {
   212  	hdr := h.Get("Date")
   213  	if hdr == "" {
   214  		return time.Time{}, ErrHeaderNotPresent
   215  	}
   216  	return ParseDate(hdr)
   217  }
   218  
   219  // AddressList parses the named header field as a list of addresses.
   220  func (h Header) AddressList(key string) ([]*Address, error) {
   221  	hdr := h.Get(key)
   222  	if hdr == "" {
   223  		return nil, ErrHeaderNotPresent
   224  	}
   225  	return ParseAddressList(hdr)
   226  }
   227  
   228  // Address represents a single mail address.
   229  // An address such as "Barry Gibbs <bg@example.com>" is represented
   230  // as Address{Name: "Barry Gibbs", Address: "bg@example.com"}.
   231  type Address struct {
   232  	Name    string // Proper name; may be empty.
   233  	Address string // user@domain
   234  }
   235  
   236  // ParseAddress parses a single RFC 5322 address, e.g. "Barry Gibbs <bg@example.com>"
   237  func ParseAddress(address string) (*Address, error) {
   238  	return (&addrParser{s: address}).parseSingleAddress()
   239  }
   240  
   241  // ParseAddressList parses the given string as a list of addresses.
   242  func ParseAddressList(list string) ([]*Address, error) {
   243  	return (&addrParser{s: list}).parseAddressList()
   244  }
   245  
   246  // An AddressParser is an RFC 5322 address parser.
   247  type AddressParser struct {
   248  	// WordDecoder optionally specifies a decoder for RFC 2047 encoded-words.
   249  	WordDecoder *mime.WordDecoder
   250  }
   251  
   252  // Parse parses a single RFC 5322 address of the
   253  // form "Gogh Fir <gf@example.com>" or "foo@example.com".
   254  func (p *AddressParser) Parse(address string) (*Address, error) {
   255  	return (&addrParser{s: address, dec: p.WordDecoder}).parseSingleAddress()
   256  }
   257  
   258  // ParseList parses the given string as a list of comma-separated addresses
   259  // of the form "Gogh Fir <gf@example.com>" or "foo@example.com".
   260  func (p *AddressParser) ParseList(list string) ([]*Address, error) {
   261  	return (&addrParser{s: list, dec: p.WordDecoder}).parseAddressList()
   262  }
   263  
   264  // String formats the address as a valid RFC 5322 address.
   265  // If the address's name contains non-ASCII characters
   266  // the name will be rendered according to RFC 2047.
   267  func (a *Address) String() string {
   268  	// Format address local@domain
   269  	at := strings.LastIndex(a.Address, "@")
   270  	var local, domain string
   271  	if at < 0 {
   272  		// This is a malformed address ("@" is required in addr-spec);
   273  		// treat the whole address as local-part.
   274  		local = a.Address
   275  	} else {
   276  		local, domain = a.Address[:at], a.Address[at+1:]
   277  	}
   278  
   279  	// Add quotes if needed
   280  	quoteLocal := false
   281  	for i, r := range local {
   282  		if isAtext(r, false) {
   283  			continue
   284  		}
   285  		if r == '.' {
   286  			// Dots are okay if they are surrounded by atext.
   287  			// We only need to check that the previous byte is
   288  			// not a dot, and this isn't the end of the string.
   289  			if i > 0 && local[i-1] != '.' && i < len(local)-1 {
   290  				continue
   291  			}
   292  		}
   293  		quoteLocal = true
   294  		break
   295  	}
   296  	if quoteLocal {
   297  		local = quoteString(local)
   298  
   299  	}
   300  
   301  	s := "<" + local + "@" + domain + ">"
   302  
   303  	if a.Name == "" {
   304  		return s
   305  	}
   306  
   307  	// If every character is printable ASCII, quoting is simple.
   308  	allPrintable := true
   309  	for _, r := range a.Name {
   310  		// isWSP here should actually be isFWS,
   311  		// but we don't support folding yet.
   312  		if !isVchar(r) && !isWSP(r) || isMultibyte(r) {
   313  			allPrintable = false
   314  			break
   315  		}
   316  	}
   317  	if allPrintable {
   318  		return quoteString(a.Name) + " " + s
   319  	}
   320  
   321  	// Text in an encoded-word in a display-name must not contain certain
   322  	// characters like quotes or parentheses (see RFC 2047 section 5.3).
   323  	// When this is the case encode the name using base64 encoding.
   324  	if strings.ContainsAny(a.Name, "\"#$%&'(),.:;<>@[]^`{|}~") {
   325  		return mime.BEncoding.Encode("utf-8", a.Name) + " " + s
   326  	}
   327  	return mime.QEncoding.Encode("utf-8", a.Name) + " " + s
   328  }
   329  
   330  type addrParser struct {
   331  	s   string
   332  	dec *mime.WordDecoder // may be nil
   333  }
   334  
   335  func (p *addrParser) parseAddressList() ([]*Address, error) {
   336  	var list []*Address
   337  	for {
   338  		p.skipSpace()
   339  
   340  		// allow skipping empty entries (RFC5322 obs-addr-list)
   341  		if p.consume(',') {
   342  			continue
   343  		}
   344  
   345  		addrs, err := p.parseAddress(true)
   346  		if err != nil {
   347  			return nil, err
   348  		}
   349  		list = append(list, addrs...)
   350  
   351  		if !p.skipCFWS() {
   352  			return nil, errors.New("mail: misformatted parenthetical comment")
   353  		}
   354  		if p.empty() {
   355  			break
   356  		}
   357  		if p.peek() != ',' {
   358  			return nil, errors.New("mail: expected comma")
   359  		}
   360  
   361  		// Skip empty entries for obs-addr-list.
   362  		for p.consume(',') {
   363  			p.skipSpace()
   364  		}
   365  		if p.empty() {
   366  			break
   367  		}
   368  	}
   369  	return list, nil
   370  }
   371  
   372  func (p *addrParser) parseSingleAddress() (*Address, error) {
   373  	addrs, err := p.parseAddress(true)
   374  	if err != nil {
   375  		return nil, err
   376  	}
   377  	if !p.skipCFWS() {
   378  		return nil, errors.New("mail: misformatted parenthetical comment")
   379  	}
   380  	if !p.empty() {
   381  		return nil, fmt.Errorf("mail: expected single address, got %q", p.s)
   382  	}
   383  	if len(addrs) == 0 {
   384  		return nil, errors.New("mail: empty group")
   385  	}
   386  	if len(addrs) > 1 {
   387  		return nil, errors.New("mail: group with multiple addresses")
   388  	}
   389  	return addrs[0], nil
   390  }
   391  
   392  // parseAddress parses a single RFC 5322 address at the start of p.
   393  func (p *addrParser) parseAddress(handleGroup bool) ([]*Address, error) {
   394  	debug.Printf("parseAddress: %q", p.s)
   395  	p.skipSpace()
   396  	if p.empty() {
   397  		return nil, errors.New("mail: no address")
   398  	}
   399  
   400  	// address = mailbox / group
   401  	// mailbox = name-addr / addr-spec
   402  	// group = display-name ":" [group-list] ";" [CFWS]
   403  
   404  	// addr-spec has a more restricted grammar than name-addr,
   405  	// so try parsing it first, and fallback to name-addr.
   406  	// TODO(dsymonds): Is this really correct?
   407  	spec, err := p.consumeAddrSpec()
   408  	if err == nil {
   409  		var displayName string
   410  		p.skipSpace()
   411  		if !p.empty() && p.peek() == '(' {
   412  			displayName, err = p.consumeDisplayNameComment()
   413  			if err != nil {
   414  				return nil, err
   415  			}
   416  		}
   417  
   418  		return []*Address{{
   419  			Name:    displayName,
   420  			Address: spec,
   421  		}}, err
   422  	}
   423  	debug.Printf("parseAddress: not an addr-spec: %v", err)
   424  	debug.Printf("parseAddress: state is now %q", p.s)
   425  
   426  	// display-name
   427  	var displayName string
   428  	if p.peek() != '<' {
   429  		displayName, err = p.consumePhrase()
   430  		if err != nil {
   431  			return nil, err
   432  		}
   433  	}
   434  	debug.Printf("parseAddress: displayName=%q", displayName)
   435  
   436  	p.skipSpace()
   437  	if handleGroup {
   438  		if p.consume(':') {
   439  			return p.consumeGroupList()
   440  		}
   441  	}
   442  	// angle-addr = "<" addr-spec ">"
   443  	if !p.consume('<') {
   444  		atext := true
   445  		for _, r := range displayName {
   446  			if !isAtext(r, true) {
   447  				atext = false
   448  				break
   449  			}
   450  		}
   451  		if atext {
   452  			// The input is like "foo.bar"; it's possible the input
   453  			// meant to be "foo.bar@domain", or "foo.bar <...>".
   454  			return nil, errors.New("mail: missing '@' or angle-addr")
   455  		}
   456  		// The input is like "Full Name", which couldn't possibly be a
   457  		// valid email address if followed by "@domain"; the input
   458  		// likely meant to be "Full Name <...>".
   459  		return nil, errors.New("mail: no angle-addr")
   460  	}
   461  	spec, err = p.consumeAddrSpec()
   462  	if err != nil {
   463  		return nil, err
   464  	}
   465  	if !p.consume('>') {
   466  		return nil, errors.New("mail: unclosed angle-addr")
   467  	}
   468  	debug.Printf("parseAddress: spec=%q", spec)
   469  
   470  	return []*Address{{
   471  		Name:    displayName,
   472  		Address: spec,
   473  	}}, nil
   474  }
   475  
   476  func (p *addrParser) consumeGroupList() ([]*Address, error) {
   477  	var group []*Address
   478  	// handle empty group.
   479  	p.skipSpace()
   480  	if p.consume(';') {
   481  		if !p.skipCFWS() {
   482  			return nil, errors.New("mail: misformatted parenthetical comment")
   483  		}
   484  		return group, nil
   485  	}
   486  
   487  	for {
   488  		p.skipSpace()
   489  		// embedded groups not allowed.
   490  		addrs, err := p.parseAddress(false)
   491  		if err != nil {
   492  			return nil, err
   493  		}
   494  		group = append(group, addrs...)
   495  
   496  		if !p.skipCFWS() {
   497  			return nil, errors.New("mail: misformatted parenthetical comment")
   498  		}
   499  		if p.consume(';') {
   500  			if !p.skipCFWS() {
   501  				return nil, errors.New("mail: misformatted parenthetical comment")
   502  			}
   503  			break
   504  		}
   505  		if !p.consume(',') {
   506  			return nil, errors.New("mail: expected comma")
   507  		}
   508  	}
   509  	return group, nil
   510  }
   511  
   512  // consumeAddrSpec parses a single RFC 5322 addr-spec at the start of p.
   513  func (p *addrParser) consumeAddrSpec() (spec string, err error) {
   514  	debug.Printf("consumeAddrSpec: %q", p.s)
   515  
   516  	orig := *p
   517  	defer func() {
   518  		if err != nil {
   519  			*p = orig
   520  		}
   521  	}()
   522  
   523  	// local-part = dot-atom / quoted-string
   524  	var localPart string
   525  	p.skipSpace()
   526  	if p.empty() {
   527  		return "", errors.New("mail: no addr-spec")
   528  	}
   529  	if p.peek() == '"' {
   530  		// quoted-string
   531  		debug.Printf("consumeAddrSpec: parsing quoted-string")
   532  		localPart, err = p.consumeQuotedString()
   533  		if localPart == "" {
   534  			err = errors.New("mail: empty quoted string in addr-spec")
   535  		}
   536  	} else {
   537  		// dot-atom
   538  		debug.Printf("consumeAddrSpec: parsing dot-atom")
   539  		localPart, err = p.consumeAtom(true, false)
   540  	}
   541  	if err != nil {
   542  		debug.Printf("consumeAddrSpec: failed: %v", err)
   543  		return "", err
   544  	}
   545  
   546  	if !p.consume('@') {
   547  		return "", errors.New("mail: missing @ in addr-spec")
   548  	}
   549  
   550  	// domain = dot-atom / domain-literal
   551  	var domain string
   552  	p.skipSpace()
   553  	if p.empty() {
   554  		return "", errors.New("mail: no domain in addr-spec")
   555  	}
   556  
   557  	if p.peek() == '[' {
   558  		// domain-literal
   559  		domain, err = p.consumeDomainLiteral()
   560  		if err != nil {
   561  			return "", err
   562  		}
   563  	} else {
   564  		// dot-atom
   565  		domain, err = p.consumeAtom(true, false)
   566  		if err != nil {
   567  			return "", err
   568  		}
   569  	}
   570  
   571  	return localPart + "@" + domain, nil
   572  }
   573  
   574  // consumePhrase parses the RFC 5322 phrase at the start of p.
   575  func (p *addrParser) consumePhrase() (phrase string, err error) {
   576  	debug.Printf("consumePhrase: [%s]", p.s)
   577  	// phrase = 1*word
   578  	var words []string
   579  	var isPrevEncoded bool
   580  	for {
   581  		// obs-phrase allows CFWS after one word
   582  		if len(words) > 0 {
   583  			if !p.skipCFWS() {
   584  				return "", errors.New("mail: misformatted parenthetical comment")
   585  			}
   586  		}
   587  		// word = atom / quoted-string
   588  		var word string
   589  		p.skipSpace()
   590  		if p.empty() {
   591  			break
   592  		}
   593  		isEncoded := false
   594  		if p.peek() == '"' {
   595  			// quoted-string
   596  			word, err = p.consumeQuotedString()
   597  		} else {
   598  			// atom
   599  			// We actually parse dot-atom here to be more permissive
   600  			// than what RFC 5322 specifies.
   601  			word, err = p.consumeAtom(true, true)
   602  			if err == nil {
   603  				word, isEncoded, err = p.decodeRFC2047Word(word)
   604  			}
   605  		}
   606  
   607  		if err != nil {
   608  			break
   609  		}
   610  		debug.Printf("consumePhrase: consumed %q", word)
   611  		if isPrevEncoded && isEncoded {
   612  			words[len(words)-1] += word
   613  		} else {
   614  			words = append(words, word)
   615  		}
   616  		isPrevEncoded = isEncoded
   617  	}
   618  	// Ignore any error if we got at least one word.
   619  	if err != nil && len(words) == 0 {
   620  		debug.Printf("consumePhrase: hit err: %v", err)
   621  		return "", fmt.Errorf("mail: missing word in phrase: %v", err)
   622  	}
   623  	phrase = strings.Join(words, " ")
   624  	return phrase, nil
   625  }
   626  
   627  // consumeQuotedString parses the quoted string at the start of p.
   628  func (p *addrParser) consumeQuotedString() (qs string, err error) {
   629  	// Assume first byte is '"'.
   630  	i := 1
   631  	qsb := make([]rune, 0, 10)
   632  
   633  	escaped := false
   634  
   635  Loop:
   636  	for {
   637  		r, size := utf8.DecodeRuneInString(p.s[i:])
   638  
   639  		switch {
   640  		case size == 0:
   641  			return "", errors.New("mail: unclosed quoted-string")
   642  
   643  		case size == 1 && r == utf8.RuneError:
   644  			return "", fmt.Errorf("mail: invalid utf-8 in quoted-string: %q", p.s)
   645  
   646  		case escaped:
   647  			//  quoted-pair = ("\" (VCHAR / WSP))
   648  
   649  			if !isVchar(r) && !isWSP(r) {
   650  				return "", fmt.Errorf("mail: bad character in quoted-string: %q", r)
   651  			}
   652  
   653  			qsb = append(qsb, r)
   654  			escaped = false
   655  
   656  		case isQtext(r) || isWSP(r):
   657  			// qtext (printable US-ASCII excluding " and \), or
   658  			// FWS (almost; we're ignoring CRLF)
   659  			qsb = append(qsb, r)
   660  
   661  		case r == '"':
   662  			break Loop
   663  
   664  		case r == '\\':
   665  			escaped = true
   666  
   667  		default:
   668  			return "", fmt.Errorf("mail: bad character in quoted-string: %q", r)
   669  
   670  		}
   671  
   672  		i += size
   673  	}
   674  	p.s = p.s[i+1:]
   675  	return string(qsb), nil
   676  }
   677  
   678  // consumeAtom parses an RFC 5322 atom at the start of p.
   679  // If dot is true, consumeAtom parses an RFC 5322 dot-atom instead.
   680  // If permissive is true, consumeAtom will not fail on:
   681  // - leading/trailing/double dots in the atom (see golang.org/issue/4938)
   682  func (p *addrParser) consumeAtom(dot bool, permissive bool) (atom string, err error) {
   683  	i := 0
   684  
   685  Loop:
   686  	for {
   687  		r, size := utf8.DecodeRuneInString(p.s[i:])
   688  		switch {
   689  		case size == 1 && r == utf8.RuneError:
   690  			return "", fmt.Errorf("mail: invalid utf-8 in address: %q", p.s)
   691  
   692  		case size == 0 || !isAtext(r, dot):
   693  			break Loop
   694  
   695  		default:
   696  			i += size
   697  
   698  		}
   699  	}
   700  
   701  	if i == 0 {
   702  		return "", errors.New("mail: invalid string")
   703  	}
   704  	atom, p.s = p.s[:i], p.s[i:]
   705  	if !permissive {
   706  		if strings.HasPrefix(atom, ".") {
   707  			return "", errors.New("mail: leading dot in atom")
   708  		}
   709  		if strings.Contains(atom, "..") {
   710  			return "", errors.New("mail: double dot in atom")
   711  		}
   712  		if strings.HasSuffix(atom, ".") {
   713  			return "", errors.New("mail: trailing dot in atom")
   714  		}
   715  	}
   716  	return atom, nil
   717  }
   718  
   719  // consumeDomainLiteral parses an RFC 5322 domain-literal at the start of p.
   720  func (p *addrParser) consumeDomainLiteral() (string, error) {
   721  	// Skip the leading [
   722  	if !p.consume('[') {
   723  		return "", errors.New(`mail: missing "[" in domain-literal`)
   724  	}
   725  
   726  	// Parse the dtext
   727  	var dtext string
   728  	for {
   729  		if p.empty() {
   730  			return "", errors.New("mail: unclosed domain-literal")
   731  		}
   732  		if p.peek() == ']' {
   733  			break
   734  		}
   735  
   736  		r, size := utf8.DecodeRuneInString(p.s)
   737  		if size == 1 && r == utf8.RuneError {
   738  			return "", fmt.Errorf("mail: invalid utf-8 in domain-literal: %q", p.s)
   739  		}
   740  		if !isDtext(r) {
   741  			return "", fmt.Errorf("mail: bad character in domain-literal: %q", r)
   742  		}
   743  
   744  		dtext += p.s[:size]
   745  		p.s = p.s[size:]
   746  	}
   747  
   748  	// Skip the trailing ]
   749  	if !p.consume(']') {
   750  		return "", errors.New("mail: unclosed domain-literal")
   751  	}
   752  
   753  	// Check if the domain literal is an IP address
   754  	if net.ParseIP(dtext) == nil {
   755  		return "", fmt.Errorf("mail: invalid IP address in domain-literal: %q", dtext)
   756  	}
   757  
   758  	return "[" + dtext + "]", nil
   759  }
   760  
   761  func (p *addrParser) consumeDisplayNameComment() (string, error) {
   762  	if !p.consume('(') {
   763  		return "", errors.New("mail: comment does not start with (")
   764  	}
   765  	comment, ok := p.consumeComment()
   766  	if !ok {
   767  		return "", errors.New("mail: misformatted parenthetical comment")
   768  	}
   769  
   770  	// TODO(stapelberg): parse quoted-string within comment
   771  	words := strings.FieldsFunc(comment, func(r rune) bool { return r == ' ' || r == '\t' })
   772  	for idx, word := range words {
   773  		decoded, isEncoded, err := p.decodeRFC2047Word(word)
   774  		if err != nil {
   775  			return "", err
   776  		}
   777  		if isEncoded {
   778  			words[idx] = decoded
   779  		}
   780  	}
   781  
   782  	return strings.Join(words, " "), nil
   783  }
   784  
   785  func (p *addrParser) consume(c byte) bool {
   786  	if p.empty() || p.peek() != c {
   787  		return false
   788  	}
   789  	p.s = p.s[1:]
   790  	return true
   791  }
   792  
   793  // skipSpace skips the leading space and tab characters.
   794  func (p *addrParser) skipSpace() {
   795  	p.s = strings.TrimLeft(p.s, " \t")
   796  }
   797  
   798  func (p *addrParser) peek() byte {
   799  	return p.s[0]
   800  }
   801  
   802  func (p *addrParser) empty() bool {
   803  	return p.len() == 0
   804  }
   805  
   806  func (p *addrParser) len() int {
   807  	return len(p.s)
   808  }
   809  
   810  // skipCFWS skips CFWS as defined in RFC5322.
   811  func (p *addrParser) skipCFWS() bool {
   812  	p.skipSpace()
   813  
   814  	for {
   815  		if !p.consume('(') {
   816  			break
   817  		}
   818  
   819  		if _, ok := p.consumeComment(); !ok {
   820  			return false
   821  		}
   822  
   823  		p.skipSpace()
   824  	}
   825  
   826  	return true
   827  }
   828  
   829  func (p *addrParser) consumeComment() (string, bool) {
   830  	// '(' already consumed.
   831  	depth := 1
   832  
   833  	var comment string
   834  	for {
   835  		if p.empty() || depth == 0 {
   836  			break
   837  		}
   838  
   839  		if p.peek() == '\\' && p.len() > 1 {
   840  			p.s = p.s[1:]
   841  		} else if p.peek() == '(' {
   842  			depth++
   843  		} else if p.peek() == ')' {
   844  			depth--
   845  		}
   846  		if depth > 0 {
   847  			comment += p.s[:1]
   848  		}
   849  		p.s = p.s[1:]
   850  	}
   851  
   852  	return comment, depth == 0
   853  }
   854  
   855  func (p *addrParser) decodeRFC2047Word(s string) (word string, isEncoded bool, err error) {
   856  	dec := p.dec
   857  	if dec == nil {
   858  		dec = &rfc2047Decoder
   859  	}
   860  
   861  	// Substitute our own CharsetReader function so that we can tell
   862  	// whether an error from the Decode method was due to the
   863  	// CharsetReader (meaning the charset is invalid).
   864  	// We used to look for the charsetError type in the error result,
   865  	// but that behaves badly with CharsetReaders other than the
   866  	// one in rfc2047Decoder.
   867  	adec := *dec
   868  	charsetReaderError := false
   869  	adec.CharsetReader = func(charset string, input io.Reader) (io.Reader, error) {
   870  		if dec.CharsetReader == nil {
   871  			charsetReaderError = true
   872  			return nil, charsetError(charset)
   873  		}
   874  		r, err := dec.CharsetReader(charset, input)
   875  		if err != nil {
   876  			charsetReaderError = true
   877  		}
   878  		return r, err
   879  	}
   880  	word, err = adec.Decode(s)
   881  	if err == nil {
   882  		return word, true, nil
   883  	}
   884  
   885  	// If the error came from the character set reader
   886  	// (meaning the character set itself is invalid
   887  	// but the decoding worked fine until then),
   888  	// return the original text and the error,
   889  	// with isEncoded=true.
   890  	if charsetReaderError {
   891  		return s, true, err
   892  	}
   893  
   894  	// Ignore invalid RFC 2047 encoded-word errors.
   895  	return s, false, nil
   896  }
   897  
   898  var rfc2047Decoder = mime.WordDecoder{
   899  	CharsetReader: func(charset string, input io.Reader) (io.Reader, error) {
   900  		return nil, charsetError(charset)
   901  	},
   902  }
   903  
   904  type charsetError string
   905  
   906  func (e charsetError) Error() string {
   907  	return fmt.Sprintf("charset not supported: %q", string(e))
   908  }
   909  
   910  // isAtext reports whether r is an RFC 5322 atext character.
   911  // If dot is true, period is included.
   912  func isAtext(r rune, dot bool) bool {
   913  	switch r {
   914  	case '.':
   915  		return dot
   916  
   917  	// RFC 5322 3.2.3. specials
   918  	case '(', ')', '<', '>', '[', ']', ':', ';', '@', '\\', ',', '"': // RFC 5322 3.2.3. specials
   919  		return false
   920  	}
   921  	return isVchar(r)
   922  }
   923  
   924  // isQtext reports whether r is an RFC 5322 qtext character.
   925  func isQtext(r rune) bool {
   926  	// Printable US-ASCII, excluding backslash or quote.
   927  	if r == '\\' || r == '"' {
   928  		return false
   929  	}
   930  	return isVchar(r)
   931  }
   932  
   933  // quoteString renders a string as an RFC 5322 quoted-string.
   934  func quoteString(s string) string {
   935  	var b strings.Builder
   936  	b.WriteByte('"')
   937  	for _, r := range s {
   938  		if isQtext(r) || isWSP(r) {
   939  			b.WriteRune(r)
   940  		} else if isVchar(r) {
   941  			b.WriteByte('\\')
   942  			b.WriteRune(r)
   943  		}
   944  	}
   945  	b.WriteByte('"')
   946  	return b.String()
   947  }
   948  
   949  // isVchar reports whether r is an RFC 5322 VCHAR character.
   950  func isVchar(r rune) bool {
   951  	// Visible (printing) characters.
   952  	return '!' <= r && r <= '~' || isMultibyte(r)
   953  }
   954  
   955  // isMultibyte reports whether r is a multi-byte UTF-8 character
   956  // as supported by RFC 6532.
   957  func isMultibyte(r rune) bool {
   958  	return r >= utf8.RuneSelf
   959  }
   960  
   961  // isWSP reports whether r is a WSP (white space).
   962  // WSP is a space or horizontal tab (RFC 5234 Appendix B).
   963  func isWSP(r rune) bool {
   964  	return r == ' ' || r == '\t'
   965  }
   966  
   967  // isDtext reports whether r is an RFC 5322 dtext character.
   968  func isDtext(r rune) bool {
   969  	// Printable US-ASCII, excluding "[", "]", or "\".
   970  	if r == '[' || r == ']' || r == '\\' {
   971  		return false
   972  	}
   973  	return isVchar(r)
   974  }
   975  

View as plain text