Source file src/bytes/iter.go

     1  // Copyright 2024 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  package bytes
     6  
     7  import (
     8  	"iter"
     9  	"unicode"
    10  	"unicode/utf8"
    11  )
    12  
    13  // Lines returns an iterator over the newline-terminated lines in the byte slice s.
    14  // The lines yielded by the iterator include their terminating newlines.
    15  // If s is empty, the iterator yields no lines at all.
    16  // If s does not end in a newline, the final yielded line will not end in a newline.
    17  // It returns a single-use iterator.
    18  func Lines(s []byte) iter.Seq[[]byte] {
    19  	return func(yield func([]byte) bool) {
    20  		for len(s) > 0 {
    21  			var line []byte
    22  			if i := IndexByte(s, '\n'); i >= 0 {
    23  				line, s = s[:i+1], s[i+1:]
    24  			} else {
    25  				line, s = s, nil
    26  			}
    27  			if !yield(line[:len(line):len(line)]) {
    28  				return
    29  			}
    30  		}
    31  	}
    32  }
    33  
    34  // explodeSeq returns an iterator over the runes in s.
    35  func explodeSeq(s []byte) iter.Seq[[]byte] {
    36  	return func(yield func([]byte) bool) {
    37  		for len(s) > 0 {
    38  			_, size := utf8.DecodeRune(s)
    39  			if !yield(s[:size:size]) {
    40  				return
    41  			}
    42  			s = s[size:]
    43  		}
    44  	}
    45  }
    46  
    47  // splitSeq is SplitSeq or SplitAfterSeq, configured by how many
    48  // bytes of sep to include in the results (none or all).
    49  func splitSeq(s, sep []byte, sepSave int) iter.Seq[[]byte] {
    50  	if len(sep) == 0 {
    51  		return explodeSeq(s)
    52  	}
    53  	return func(yield func([]byte) bool) {
    54  		for {
    55  			i := Index(s, sep)
    56  			if i < 0 {
    57  				break
    58  			}
    59  			frag := s[:i+sepSave]
    60  			if !yield(frag[:len(frag):len(frag)]) {
    61  				return
    62  			}
    63  			s = s[i+len(sep):]
    64  		}
    65  		yield(s[:len(s):len(s)])
    66  	}
    67  }
    68  
    69  // SplitSeq returns an iterator over all subslices of s separated by sep.
    70  // The iterator yields the same subslices that would be returned by [Split](s, sep),
    71  // but without constructing a new slice containing the subslices.
    72  // It returns a single-use iterator.
    73  func SplitSeq(s, sep []byte) iter.Seq[[]byte] {
    74  	return splitSeq(s, sep, 0)
    75  }
    76  
    77  // SplitAfterSeq returns an iterator over subslices of s split after each instance of sep.
    78  // The iterator yields the same subslices that would be returned by [SplitAfter](s, sep),
    79  // but without constructing a new slice containing the subslices.
    80  // It returns a single-use iterator.
    81  func SplitAfterSeq(s, sep []byte) iter.Seq[[]byte] {
    82  	return splitSeq(s, sep, len(sep))
    83  }
    84  
    85  // FieldsSeq returns an iterator over subslices of s split around runs of
    86  // whitespace characters, as defined by [unicode.IsSpace].
    87  // The iterator yields the same subslices that would be returned by [Fields](s),
    88  // but without constructing a new slice containing the subslices.
    89  func FieldsSeq(s []byte) iter.Seq[[]byte] {
    90  	return func(yield func([]byte) bool) {
    91  		start := -1
    92  		for i := 0; i < len(s); {
    93  			size := 1
    94  			r := rune(s[i])
    95  			isSpace := asciiSpace[s[i]] != 0
    96  			if r >= utf8.RuneSelf {
    97  				r, size = utf8.DecodeRune(s[i:])
    98  				isSpace = unicode.IsSpace(r)
    99  			}
   100  			if isSpace {
   101  				if start >= 0 {
   102  					if !yield(s[start:i:i]) {
   103  						return
   104  					}
   105  					start = -1
   106  				}
   107  			} else if start < 0 {
   108  				start = i
   109  			}
   110  			i += size
   111  		}
   112  		if start >= 0 {
   113  			yield(s[start:len(s):len(s)])
   114  		}
   115  	}
   116  }
   117  
   118  // FieldsFuncSeq returns an iterator over subslices of s split around runs of
   119  // Unicode code points satisfying f(c).
   120  // The iterator yields the same subslices that would be returned by [FieldsFunc](s),
   121  // but without constructing a new slice containing the subslices.
   122  func FieldsFuncSeq(s []byte, f func(rune) bool) iter.Seq[[]byte] {
   123  	return func(yield func([]byte) bool) {
   124  		start := -1
   125  		for i := 0; i < len(s); {
   126  			size := 1
   127  			r := rune(s[i])
   128  			if r >= utf8.RuneSelf {
   129  				r, size = utf8.DecodeRune(s[i:])
   130  			}
   131  			if f(r) {
   132  				if start >= 0 {
   133  					if !yield(s[start:i:i]) {
   134  						return
   135  					}
   136  					start = -1
   137  				}
   138  			} else if start < 0 {
   139  				start = i
   140  			}
   141  			i += size
   142  		}
   143  		if start >= 0 {
   144  			yield(s[start:len(s):len(s)])
   145  		}
   146  	}
   147  }
   148  

View as plain text