Source file src/cmd/vendor/golang.org/x/mod/zip/zip.go

     1  // Copyright 2019 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  // Package zip provides functions for creating and extracting module zip files.
     6  //
     7  // Module zip files have several restrictions listed below. These are necessary
     8  // to ensure that module zip files can be extracted consistently on supported
     9  // platforms and file systems.
    10  //
    11  // • All file paths within a zip file must start with "<module>@<version>/",
    12  // where "<module>" is the module path and "<version>" is the version.
    13  // The module path must be valid (see [golang.org/x/mod/module.CheckPath]).
    14  // The version must be valid and canonical (see
    15  // [golang.org/x/mod/module.CanonicalVersion]). The path must have a major
    16  // version suffix consistent with the version (see
    17  // [golang.org/x/mod/module.Check]). The part of the file path after the
    18  // "<module>@<version>/" prefix must be valid (see
    19  // [golang.org/x/mod/module.CheckFilePath]).
    20  //
    21  // • No two file paths may be equal under Unicode case-folding (see
    22  // [strings.EqualFold]).
    23  //
    24  // • A go.mod file may or may not appear in the top-level directory. If present,
    25  // it must be named "go.mod", not any other case. Files named "go.mod"
    26  // are not allowed in any other directory.
    27  //
    28  // • The total size in bytes of a module zip file may be at most [MaxZipFile]
    29  // bytes (500 MiB). The total uncompressed size of the files within the
    30  // zip may also be at most [MaxZipFile] bytes.
    31  //
    32  // • Each file's uncompressed size must match its declared 64-bit uncompressed
    33  // size in the zip file header.
    34  //
    35  // • If the zip contains files named "<module>@<version>/go.mod" or
    36  // "<module>@<version>/LICENSE", their sizes in bytes may be at most
    37  // [MaxGoMod] or [MaxLICENSE], respectively (both are 16 MiB).
    38  //
    39  // • Empty directories are ignored. File permissions and timestamps are also
    40  // ignored.
    41  //
    42  // • Symbolic links and other irregular files are not allowed.
    43  //
    44  // Note that this package does not provide hashing functionality. See
    45  // [golang.org/x/mod/sumdb/dirhash].
    46  package zip
    47  
    48  import (
    49  	"archive/zip"
    50  	"bytes"
    51  	"errors"
    52  	"fmt"
    53  	"go/version"
    54  	"io"
    55  	"os"
    56  	"os/exec"
    57  	"path"
    58  	"path/filepath"
    59  	"strings"
    60  	"time"
    61  	"unicode"
    62  	"unicode/utf8"
    63  
    64  	"golang.org/x/mod/modfile"
    65  	"golang.org/x/mod/module"
    66  )
    67  
    68  const (
    69  	// MaxZipFile is the maximum size in bytes of a module zip file. The
    70  	// go command will report an error if either the zip file or its extracted
    71  	// content is larger than this.
    72  	MaxZipFile = 500 << 20
    73  
    74  	// MaxGoMod is the maximum size in bytes of a go.mod file within a
    75  	// module zip file.
    76  	MaxGoMod = 16 << 20
    77  
    78  	// MaxLICENSE is the maximum size in bytes of a LICENSE file within a
    79  	// module zip file.
    80  	MaxLICENSE = 16 << 20
    81  )
    82  
    83  // File provides an abstraction for a file in a directory, zip, or anything
    84  // else that looks like a file.
    85  type File interface {
    86  	// Path returns a clean slash-separated relative path from the module root
    87  	// directory to the file.
    88  	Path() string
    89  
    90  	// Lstat returns information about the file. If the file is a symbolic link,
    91  	// Lstat returns information about the link itself, not the file it points to.
    92  	Lstat() (os.FileInfo, error)
    93  
    94  	// Open provides access to the data within a regular file. Open may return
    95  	// an error if called on a directory or symbolic link.
    96  	Open() (io.ReadCloser, error)
    97  }
    98  
    99  // CheckedFiles reports whether a set of files satisfy the name and size
   100  // constraints required by module zip files. The constraints are listed in the
   101  // package documentation.
   102  //
   103  // Functions that produce this report may include slightly different sets of
   104  // files. See documentation for CheckFiles, CheckDir, and CheckZip for details.
   105  type CheckedFiles struct {
   106  	// Valid is a list of file paths that should be included in a zip file.
   107  	Valid []string
   108  
   109  	// Omitted is a list of files that are ignored when creating a module zip
   110  	// file, along with the reason each file is ignored.
   111  	Omitted []FileError
   112  
   113  	// Invalid is a list of files that should not be included in a module zip
   114  	// file, along with the reason each file is invalid.
   115  	Invalid []FileError
   116  
   117  	// SizeError is non-nil if the total uncompressed size of the valid files
   118  	// exceeds the module zip size limit or if the zip file itself exceeds the
   119  	// limit.
   120  	SizeError error
   121  }
   122  
   123  // Err returns an error if [CheckedFiles] does not describe a valid module zip
   124  // file. [CheckedFiles.SizeError] is returned if that field is set.
   125  // A [FileErrorList] is returned
   126  // if there are one or more invalid files. Other errors may be returned in the
   127  // future.
   128  func (cf CheckedFiles) Err() error {
   129  	if cf.SizeError != nil {
   130  		return cf.SizeError
   131  	}
   132  	if len(cf.Invalid) > 0 {
   133  		return FileErrorList(cf.Invalid)
   134  	}
   135  	return nil
   136  }
   137  
   138  type FileErrorList []FileError
   139  
   140  func (el FileErrorList) Error() string {
   141  	buf := &strings.Builder{}
   142  	sep := ""
   143  	for _, e := range el {
   144  		buf.WriteString(sep)
   145  		buf.WriteString(e.Error())
   146  		sep = "\n"
   147  	}
   148  	return buf.String()
   149  }
   150  
   151  type FileError struct {
   152  	Path string
   153  	Err  error
   154  }
   155  
   156  func (e FileError) Error() string {
   157  	return fmt.Sprintf("%s: %s", e.Path, e.Err)
   158  }
   159  
   160  func (e FileError) Unwrap() error {
   161  	return e.Err
   162  }
   163  
   164  var (
   165  	// Predefined error messages for invalid files. Not exhaustive.
   166  	errPathNotClean    = errors.New("file path is not clean")
   167  	errPathNotRelative = errors.New("file path is not relative")
   168  	errGoModCase       = errors.New("go.mod files must have lowercase names")
   169  	errGoModSize       = fmt.Errorf("go.mod file too large (max size is %d bytes)", MaxGoMod)
   170  	errLICENSESize     = fmt.Errorf("LICENSE file too large (max size is %d bytes)", MaxLICENSE)
   171  
   172  	// Predefined error messages for omitted files. Not exhaustive.
   173  	errVCS           = errors.New("directory is a version control repository")
   174  	errVendored      = errors.New("file is in vendor directory")
   175  	errSubmoduleFile = errors.New("file is in another module")
   176  	errSubmoduleDir  = errors.New("directory is in another module")
   177  	errHgArchivalTxt = errors.New("file is inserted by 'hg archive' and is always omitted")
   178  	errSymlink       = errors.New("file is a symbolic link")
   179  	errNotRegular    = errors.New("not a regular file")
   180  )
   181  
   182  // CheckFiles reports whether a list of files satisfy the name and size
   183  // constraints listed in the package documentation. The returned CheckedFiles
   184  // record contains lists of valid, invalid, and omitted files. Every file in
   185  // the given list will be included in exactly one of those lists.
   186  //
   187  // CheckFiles returns an error if the returned CheckedFiles does not describe
   188  // a valid module zip file (according to CheckedFiles.Err). The returned
   189  // CheckedFiles is still populated when an error is returned.
   190  //
   191  // Note that CheckFiles will not open any files, so Create may still fail when
   192  // CheckFiles is successful due to I/O errors and reported size differences.
   193  func CheckFiles(files []File) (CheckedFiles, error) {
   194  	cf, _, _ := checkFiles(files)
   195  	return cf, cf.Err()
   196  }
   197  
   198  // parseGoVers extracts the Go version specified in the given go.mod file.
   199  // It returns an empty string if the version is not found or if an error
   200  // occurs during file parsing.
   201  //
   202  // The version string is in Go toolchain name syntax, prefixed with "go".
   203  // Examples: "go1.21", "go1.22rc2", "go1.23.0"
   204  func parseGoVers(file string, data []byte) string {
   205  	mfile, err := modfile.ParseLax(file, data, nil)
   206  	if err != nil || mfile.Go == nil {
   207  		return ""
   208  	}
   209  	return "go" + mfile.Go.Version
   210  }
   211  
   212  // checkFiles implements CheckFiles and also returns lists of valid files and
   213  // their sizes, corresponding to cf.Valid. It omits files in submodules, files
   214  // in vendored packages, symlinked files, and various other unwanted files.
   215  //
   216  // The lists returned are used in Create to avoid repeated calls to File.Lstat.
   217  func checkFiles(files []File) (cf CheckedFiles, validFiles []File, validSizes []int64) {
   218  	errPaths := make(map[string]struct{})
   219  	addError := func(path string, omitted bool, err error) {
   220  		if _, ok := errPaths[path]; ok {
   221  			return
   222  		}
   223  		errPaths[path] = struct{}{}
   224  		fe := FileError{Path: path, Err: err}
   225  		if omitted {
   226  			cf.Omitted = append(cf.Omitted, fe)
   227  		} else {
   228  			cf.Invalid = append(cf.Invalid, fe)
   229  		}
   230  	}
   231  
   232  	// Find directories containing go.mod files (other than the root).
   233  	// Files in these directories will be omitted.
   234  	// These directories will not be included in the output zip.
   235  	haveGoMod := make(map[string]bool)
   236  	var vers string
   237  	for _, f := range files {
   238  		p := f.Path()
   239  		dir, base := path.Split(p)
   240  		if strings.EqualFold(base, "go.mod") {
   241  			info, err := f.Lstat()
   242  			if err != nil {
   243  				addError(p, false, err)
   244  				continue
   245  			}
   246  			if !info.Mode().IsRegular() {
   247  				continue
   248  			}
   249  			haveGoMod[dir] = true
   250  			// Extract the Go language version from the root "go.mod" file.
   251  			// This ensures we correctly interpret Go version-specific file omissions.
   252  			// We use f.Open() to handle potential custom Open() implementations
   253  			// that the underlying File type might have.
   254  			if base == "go.mod" && dir == "" {
   255  				if file, err := f.Open(); err == nil {
   256  					if data, err := io.ReadAll(file); err == nil {
   257  						vers = version.Lang(parseGoVers("go.mod", data))
   258  					}
   259  					file.Close()
   260  				}
   261  			}
   262  		}
   263  	}
   264  
   265  	inSubmodule := func(p string) bool {
   266  		for {
   267  			dir, _ := path.Split(p)
   268  			if dir == "" {
   269  				return false
   270  			}
   271  			if haveGoMod[dir] {
   272  				return true
   273  			}
   274  			p = dir[:len(dir)-1]
   275  		}
   276  	}
   277  
   278  	collisions := make(collisionChecker)
   279  	maxSize := int64(MaxZipFile)
   280  	for _, f := range files {
   281  		p := f.Path()
   282  		if p != path.Clean(p) {
   283  			addError(p, false, errPathNotClean)
   284  			continue
   285  		}
   286  		if path.IsAbs(p) {
   287  			addError(p, false, errPathNotRelative)
   288  			continue
   289  		}
   290  		if isVendoredPackage(p, vers) {
   291  			// Skip files in vendored packages.
   292  			addError(p, true, errVendored)
   293  			continue
   294  		}
   295  		if inSubmodule(p) {
   296  			// Skip submodule files.
   297  			addError(p, true, errSubmoduleFile)
   298  			continue
   299  		}
   300  		if p == ".hg_archival.txt" {
   301  			// Inserted by hg archive.
   302  			// The go command drops this regardless of the VCS being used.
   303  			addError(p, true, errHgArchivalTxt)
   304  			continue
   305  		}
   306  		if err := module.CheckFilePath(p); err != nil {
   307  			addError(p, false, err)
   308  			continue
   309  		}
   310  		if strings.ToLower(p) == "go.mod" && p != "go.mod" {
   311  			addError(p, false, errGoModCase)
   312  			continue
   313  		}
   314  		info, err := f.Lstat()
   315  		if err != nil {
   316  			addError(p, false, err)
   317  			continue
   318  		}
   319  		if err := collisions.check(p, info.IsDir()); err != nil {
   320  			addError(p, false, err)
   321  			continue
   322  		}
   323  		if info.Mode()&os.ModeType == os.ModeSymlink {
   324  			// Skip symbolic links (golang.org/issue/27093).
   325  			addError(p, true, errSymlink)
   326  			continue
   327  		}
   328  		if !info.Mode().IsRegular() {
   329  			addError(p, true, errNotRegular)
   330  			continue
   331  		}
   332  		size := info.Size()
   333  		if size >= 0 && size <= maxSize {
   334  			maxSize -= size
   335  		} else if cf.SizeError == nil {
   336  			cf.SizeError = fmt.Errorf("module source tree too large (max size is %d bytes)", MaxZipFile)
   337  		}
   338  		if p == "go.mod" && size > MaxGoMod {
   339  			addError(p, false, errGoModSize)
   340  			continue
   341  		}
   342  		if p == "LICENSE" && size > MaxLICENSE {
   343  			addError(p, false, errLICENSESize)
   344  			continue
   345  		}
   346  
   347  		cf.Valid = append(cf.Valid, p)
   348  		validFiles = append(validFiles, f)
   349  		validSizes = append(validSizes, info.Size())
   350  	}
   351  
   352  	return cf, validFiles, validSizes
   353  }
   354  
   355  // CheckDir reports whether the files in dir satisfy the name and size
   356  // constraints listed in the package documentation. The returned [CheckedFiles]
   357  // record contains lists of valid, invalid, and omitted files. If a directory is
   358  // omitted (for example, a nested module or vendor directory), it will appear in
   359  // the omitted list, but its files won't be listed.
   360  //
   361  // CheckDir returns an error if it encounters an I/O error or if the returned
   362  // [CheckedFiles] does not describe a valid module zip file (according to
   363  // [CheckedFiles.Err]). The returned [CheckedFiles] is still populated when such
   364  // an error is returned.
   365  //
   366  // Note that CheckDir will not open any files, so [CreateFromDir] may still fail
   367  // when CheckDir is successful due to I/O errors.
   368  func CheckDir(dir string) (CheckedFiles, error) {
   369  	// List files (as CreateFromDir would) and check which ones are omitted
   370  	// or invalid.
   371  	files, omitted, err := listFilesInDir(dir)
   372  	if err != nil {
   373  		return CheckedFiles{}, err
   374  	}
   375  	cf, cfErr := CheckFiles(files)
   376  	_ = cfErr // ignore this error; we'll generate our own after rewriting paths.
   377  
   378  	// Replace all paths with file system paths.
   379  	// Paths returned by CheckFiles will be slash-separated paths relative to dir.
   380  	// That's probably not appropriate for error messages.
   381  	for i := range cf.Valid {
   382  		cf.Valid[i] = filepath.Join(dir, cf.Valid[i])
   383  	}
   384  	cf.Omitted = append(cf.Omitted, omitted...)
   385  	for i := range cf.Omitted {
   386  		cf.Omitted[i].Path = filepath.Join(dir, cf.Omitted[i].Path)
   387  	}
   388  	for i := range cf.Invalid {
   389  		cf.Invalid[i].Path = filepath.Join(dir, cf.Invalid[i].Path)
   390  	}
   391  	return cf, cf.Err()
   392  }
   393  
   394  // CheckZip reports whether the files contained in a zip file satisfy the name
   395  // and size constraints listed in the package documentation.
   396  //
   397  // CheckZip returns an error if the returned [CheckedFiles] does not describe
   398  // a valid module zip file (according to [CheckedFiles.Err]). The returned
   399  // CheckedFiles is still populated when an error is returned. CheckZip will
   400  // also return an error if the module path or version is malformed or if it
   401  // encounters an error reading the zip file.
   402  //
   403  // Note that CheckZip does not read individual files, so [Unzip] may still fail
   404  // when CheckZip is successful due to I/O errors.
   405  func CheckZip(m module.Version, zipFile string) (CheckedFiles, error) {
   406  	f, err := os.Open(zipFile)
   407  	if err != nil {
   408  		return CheckedFiles{}, err
   409  	}
   410  	defer f.Close()
   411  	_, cf, err := checkZip(m, f)
   412  	return cf, err
   413  }
   414  
   415  // checkZip implements checkZip and also returns the *zip.Reader. This is
   416  // used in Unzip to avoid redundant I/O.
   417  func checkZip(m module.Version, f *os.File) (*zip.Reader, CheckedFiles, error) {
   418  	// Make sure the module path and version are valid.
   419  	if vers := module.CanonicalVersion(m.Version); vers != m.Version {
   420  		return nil, CheckedFiles{}, fmt.Errorf("version %q is not canonical (should be %q)", m.Version, vers)
   421  	}
   422  	if err := module.Check(m.Path, m.Version); err != nil {
   423  		return nil, CheckedFiles{}, err
   424  	}
   425  
   426  	// Check the total file size.
   427  	info, err := f.Stat()
   428  	if err != nil {
   429  		return nil, CheckedFiles{}, err
   430  	}
   431  	zipSize := info.Size()
   432  	if zipSize > MaxZipFile {
   433  		cf := CheckedFiles{SizeError: fmt.Errorf("module zip file is too large (%d bytes; limit is %d bytes)", zipSize, MaxZipFile)}
   434  		return nil, cf, cf.Err()
   435  	}
   436  
   437  	// Check for valid file names, collisions.
   438  	var cf CheckedFiles
   439  	addError := func(zf *zip.File, err error) {
   440  		cf.Invalid = append(cf.Invalid, FileError{Path: zf.Name, Err: err})
   441  	}
   442  	z, err := zip.NewReader(f, zipSize)
   443  	if err != nil {
   444  		return nil, CheckedFiles{}, err
   445  	}
   446  	prefix := fmt.Sprintf("%s@%s/", m.Path, m.Version)
   447  	collisions := make(collisionChecker)
   448  	var size int64
   449  	for _, zf := range z.File {
   450  		if !strings.HasPrefix(zf.Name, prefix) {
   451  			addError(zf, fmt.Errorf("path does not have prefix %q", prefix))
   452  			continue
   453  		}
   454  		name := zf.Name[len(prefix):]
   455  		if name == "" {
   456  			continue
   457  		}
   458  		isDir := strings.HasSuffix(name, "/")
   459  		if isDir {
   460  			name = name[:len(name)-1]
   461  		}
   462  		if path.Clean(name) != name {
   463  			addError(zf, errPathNotClean)
   464  			continue
   465  		}
   466  		if err := module.CheckFilePath(name); err != nil {
   467  			addError(zf, err)
   468  			continue
   469  		}
   470  		if err := collisions.check(name, isDir); err != nil {
   471  			addError(zf, err)
   472  			continue
   473  		}
   474  		if isDir {
   475  			continue
   476  		}
   477  		if base := path.Base(name); strings.EqualFold(base, "go.mod") {
   478  			if base != name {
   479  				addError(zf, fmt.Errorf("go.mod file not in module root directory"))
   480  				continue
   481  			}
   482  			if name != "go.mod" {
   483  				addError(zf, errGoModCase)
   484  				continue
   485  			}
   486  		}
   487  		sz := int64(zf.UncompressedSize64)
   488  		if sz >= 0 && MaxZipFile-size >= sz {
   489  			size += sz
   490  		} else if cf.SizeError == nil {
   491  			cf.SizeError = fmt.Errorf("total uncompressed size of module contents too large (max size is %d bytes)", MaxZipFile)
   492  		}
   493  		if name == "go.mod" && sz > MaxGoMod {
   494  			addError(zf, fmt.Errorf("go.mod file too large (max size is %d bytes)", MaxGoMod))
   495  			continue
   496  		}
   497  		if name == "LICENSE" && sz > MaxLICENSE {
   498  			addError(zf, fmt.Errorf("LICENSE file too large (max size is %d bytes)", MaxLICENSE))
   499  			continue
   500  		}
   501  		cf.Valid = append(cf.Valid, zf.Name)
   502  	}
   503  
   504  	return z, cf, cf.Err()
   505  }
   506  
   507  // Create builds a zip archive for module m from an abstract list of files
   508  // and writes it to w.
   509  //
   510  // Create verifies the restrictions described in the package documentation
   511  // and should not produce an archive that [Unzip] cannot extract. Create does not
   512  // include files in the output archive if they don't belong in the module zip.
   513  // In particular, Create will not include files in modules found in
   514  // subdirectories, most files in vendor directories, or irregular files (such
   515  // as symbolic links) in the output archive.
   516  func Create(w io.Writer, m module.Version, files []File) (err error) {
   517  	defer func() {
   518  		if err != nil {
   519  			err = &zipError{verb: "create zip", err: err}
   520  		}
   521  	}()
   522  
   523  	// Check that the version is canonical, the module path is well-formed, and
   524  	// the major version suffix matches the major version.
   525  	if vers := module.CanonicalVersion(m.Version); vers != m.Version {
   526  		return fmt.Errorf("version %q is not canonical (should be %q)", m.Version, vers)
   527  	}
   528  	if err := module.Check(m.Path, m.Version); err != nil {
   529  		return err
   530  	}
   531  
   532  	// Check whether files are valid, not valid, or should be omitted.
   533  	// Also check that the valid files don't exceed the maximum size.
   534  	cf, validFiles, validSizes := checkFiles(files)
   535  	if err := cf.Err(); err != nil {
   536  		return err
   537  	}
   538  
   539  	// Create the module zip file.
   540  	zw := zip.NewWriter(w)
   541  	prefix := fmt.Sprintf("%s@%s/", m.Path, m.Version)
   542  
   543  	addFile := func(f File, path string, size int64) error {
   544  		rc, err := f.Open()
   545  		if err != nil {
   546  			return err
   547  		}
   548  		defer rc.Close()
   549  		w, err := zw.Create(prefix + path)
   550  		if err != nil {
   551  			return err
   552  		}
   553  		lr := &io.LimitedReader{R: rc, N: size + 1}
   554  		if _, err := io.Copy(w, lr); err != nil {
   555  			return err
   556  		}
   557  		if lr.N <= 0 {
   558  			return fmt.Errorf("file %q is larger than declared size", path)
   559  		}
   560  		return nil
   561  	}
   562  
   563  	for i, f := range validFiles {
   564  		p := f.Path()
   565  		size := validSizes[i]
   566  		if err := addFile(f, p, size); err != nil {
   567  			return err
   568  		}
   569  	}
   570  
   571  	return zw.Close()
   572  }
   573  
   574  // CreateFromDir creates a module zip file for module m from the contents of
   575  // a directory, dir. The zip content is written to w.
   576  //
   577  // CreateFromDir verifies the restrictions described in the package
   578  // documentation and should not produce an archive that [Unzip] cannot extract.
   579  // CreateFromDir does not include files in the output archive if they don't
   580  // belong in the module zip. In particular, CreateFromDir will not include
   581  // files in modules found in subdirectories, most files in vendor directories,
   582  // or irregular files (such as symbolic links) in the output archive.
   583  // Additionally, unlike [Create], CreateFromDir will not include directories
   584  // named ".bzr", ".git", ".hg", or ".svn".
   585  func CreateFromDir(w io.Writer, m module.Version, dir string) (err error) {
   586  	defer func() {
   587  		if zerr, ok := err.(*zipError); ok {
   588  			zerr.path = dir
   589  		} else if err != nil {
   590  			err = &zipError{verb: "create zip from directory", path: dir, err: err}
   591  		}
   592  	}()
   593  
   594  	files, _, err := listFilesInDir(dir)
   595  	if err != nil {
   596  		return err
   597  	}
   598  
   599  	return Create(w, m, files)
   600  }
   601  
   602  // CreateFromVCS creates a module zip file for module m from the contents of a
   603  // VCS repository stored locally. The zip content is written to w.
   604  //
   605  // repoRoot must be an absolute path to the base of the repository, such as
   606  // "/Users/some-user/some-repo". If the repository is a Git repository,
   607  // this path is expected to point to its worktree: it can't be a bare git
   608  // repo.
   609  //
   610  // revision is the revision of the repository to create the zip from. Examples
   611  // include HEAD or SHA sums for git repositories.
   612  //
   613  // subdir must be the relative path from the base of the repository, such as
   614  // "sub/dir". To create a zip from the base of the repository, pass an empty
   615  // string.
   616  //
   617  // If CreateFromVCS returns [UnrecognizedVCSError], consider falling back to
   618  // [CreateFromDir].
   619  func CreateFromVCS(w io.Writer, m module.Version, repoRoot, revision, subdir string) (err error) {
   620  	defer func() {
   621  		if zerr, ok := err.(*zipError); ok {
   622  			zerr.path = repoRoot
   623  		} else if err != nil {
   624  			err = &zipError{verb: "create zip from version control system", path: repoRoot, err: err}
   625  		}
   626  	}()
   627  
   628  	var filesToCreate []File
   629  
   630  	switch {
   631  	case isGitRepo(repoRoot):
   632  		files, err := filesInGitRepo(repoRoot, revision, subdir)
   633  		if err != nil {
   634  			return err
   635  		}
   636  
   637  		filesToCreate = files
   638  	default:
   639  		return &UnrecognizedVCSError{RepoRoot: repoRoot}
   640  	}
   641  
   642  	return Create(w, m, filesToCreate)
   643  }
   644  
   645  // UnrecognizedVCSError indicates that no recognized version control system was
   646  // found in the given directory.
   647  type UnrecognizedVCSError struct {
   648  	RepoRoot string
   649  }
   650  
   651  func (e *UnrecognizedVCSError) Error() string {
   652  	return fmt.Sprintf("could not find a recognized version control system at %q", e.RepoRoot)
   653  }
   654  
   655  // filesInGitRepo filters out any files that are git ignored in the directory.
   656  func filesInGitRepo(dir, rev, subdir string) ([]File, error) {
   657  	stderr := bytes.Buffer{}
   658  	stdout := bytes.Buffer{}
   659  
   660  	// Incredibly, git produces different archives depending on whether
   661  	// it is running on a Windows system or not, in an attempt to normalize
   662  	// text file line endings. Setting -c core.autocrlf=input means only
   663  	// translate files on the way into the repo, not on the way out (archive).
   664  	// The -c core.eol=lf should be unnecessary but set it anyway.
   665  	//
   666  	// Note: We use git archive to understand which files are actually included,
   667  	// ignoring things like .gitignore'd files. We could also use other
   668  	// techniques like git ls-files, but this approach most closely matches what
   669  	// the Go command does, which is beneficial.
   670  	//
   671  	// Note: some of this code copied from https://go.googlesource.com/go/+/refs/tags/go1.16.5/src/cmd/go/internal/modfetch/codehost/git.go#826.
   672  	cmd := exec.Command("git", "-c", "core.autocrlf=input", "-c", "core.eol=lf", "archive", "--format=zip", rev)
   673  	if subdir != "" {
   674  		cmd.Args = append(cmd.Args, subdir)
   675  	}
   676  	cmd.Dir = dir
   677  	cmd.Env = append(os.Environ(), "PWD="+dir)
   678  	cmd.Stdout = &stdout
   679  	cmd.Stderr = &stderr
   680  	if err := cmd.Run(); err != nil {
   681  		return nil, fmt.Errorf("error running `git archive`: %w, %s", err, stderr.String())
   682  	}
   683  
   684  	rawReader := bytes.NewReader(stdout.Bytes())
   685  	zipReader, err := zip.NewReader(rawReader, int64(stdout.Len()))
   686  	if err != nil {
   687  		return nil, err
   688  	}
   689  
   690  	haveLICENSE := false
   691  	var fs []File
   692  	for _, zf := range zipReader.File {
   693  		if !strings.HasPrefix(zf.Name, subdir) || strings.HasSuffix(zf.Name, "/") {
   694  			continue
   695  		}
   696  
   697  		n := strings.TrimPrefix(zf.Name, subdir)
   698  		if n == "" {
   699  			continue
   700  		}
   701  		n = strings.TrimPrefix(n, "/")
   702  
   703  		fs = append(fs, zipFile{
   704  			name: n,
   705  			f:    zf,
   706  		})
   707  		if n == "LICENSE" {
   708  			haveLICENSE = true
   709  		}
   710  	}
   711  
   712  	if !haveLICENSE && subdir != "" {
   713  		// Note: this method of extracting the license from the root copied from
   714  		// https://go.googlesource.com/go/+/refs/tags/go1.20.4/src/cmd/go/internal/modfetch/coderepo.go#1118
   715  		// https://go.googlesource.com/go/+/refs/tags/go1.20.4/src/cmd/go/internal/modfetch/codehost/git.go#657
   716  		cmd := exec.Command("git", "cat-file", "blob", rev+":LICENSE")
   717  		cmd.Dir = dir
   718  		cmd.Env = append(os.Environ(), "PWD="+dir)
   719  		stdout := bytes.Buffer{}
   720  		cmd.Stdout = &stdout
   721  		if err := cmd.Run(); err == nil {
   722  			fs = append(fs, dataFile{name: "LICENSE", data: stdout.Bytes()})
   723  		}
   724  	}
   725  
   726  	return fs, nil
   727  }
   728  
   729  // isGitRepo reports whether the given directory is a git repo.
   730  func isGitRepo(dir string) bool {
   731  	stdout := &bytes.Buffer{}
   732  	cmd := exec.Command("git", "rev-parse", "--git-dir")
   733  	cmd.Dir = dir
   734  	cmd.Env = append(os.Environ(), "PWD="+dir)
   735  	cmd.Stdout = stdout
   736  	if err := cmd.Run(); err != nil {
   737  		return false
   738  	}
   739  	gitDir := strings.TrimSpace(stdout.String())
   740  	if !filepath.IsAbs(gitDir) {
   741  		gitDir = filepath.Join(dir, gitDir)
   742  	}
   743  	wantDir := filepath.Join(dir, ".git")
   744  	return wantDir == gitDir
   745  }
   746  
   747  type dirFile struct {
   748  	filePath, slashPath string
   749  	info                os.FileInfo
   750  }
   751  
   752  func (f dirFile) Path() string                 { return f.slashPath }
   753  func (f dirFile) Lstat() (os.FileInfo, error)  { return f.info, nil }
   754  func (f dirFile) Open() (io.ReadCloser, error) { return os.Open(f.filePath) }
   755  
   756  type zipFile struct {
   757  	name string
   758  	f    *zip.File
   759  }
   760  
   761  func (f zipFile) Path() string                 { return f.name }
   762  func (f zipFile) Lstat() (os.FileInfo, error)  { return f.f.FileInfo(), nil }
   763  func (f zipFile) Open() (io.ReadCloser, error) { return f.f.Open() }
   764  
   765  type dataFile struct {
   766  	name string
   767  	data []byte
   768  }
   769  
   770  func (f dataFile) Path() string                 { return f.name }
   771  func (f dataFile) Lstat() (os.FileInfo, error)  { return dataFileInfo{f}, nil }
   772  func (f dataFile) Open() (io.ReadCloser, error) { return io.NopCloser(bytes.NewReader(f.data)), nil }
   773  
   774  type dataFileInfo struct {
   775  	f dataFile
   776  }
   777  
   778  func (fi dataFileInfo) Name() string       { return path.Base(fi.f.name) }
   779  func (fi dataFileInfo) Size() int64        { return int64(len(fi.f.data)) }
   780  func (fi dataFileInfo) Mode() os.FileMode  { return 0644 }
   781  func (fi dataFileInfo) ModTime() time.Time { return time.Time{} }
   782  func (fi dataFileInfo) IsDir() bool        { return false }
   783  func (fi dataFileInfo) Sys() interface{}   { return nil }
   784  
   785  // isVendoredPackage attempts to report whether the given filename is contained
   786  // in a package whose import path contains (but does not end with) the component
   787  // "vendor".
   788  //
   789  // The 'vers' parameter specifies the Go version declared in the module's
   790  // go.mod file and must be a valid Go version according to the
   791  // go/version.IsValid function.
   792  // Vendoring behavior has evolved across Go versions, so this function adapts
   793  // its logic accordingly.
   794  func isVendoredPackage(name string, vers string) bool {
   795  	// vendor/modules.txt is a vendored package but was included in 1.23 and earlier.
   796  	// Remove vendor/modules.txt only for 1.24 and beyond to preserve older checksums.
   797  	if version.Compare(vers, "go1.24") >= 0 && name == "vendor/modules.txt" {
   798  		return true
   799  	}
   800  	var i int
   801  	if strings.HasPrefix(name, "vendor/") {
   802  		i += len("vendor/")
   803  	} else if j := strings.Index(name, "/vendor/"); j >= 0 {
   804  		// Calculate the correct starting position within the import path
   805  		// to determine if a package is vendored.
   806  		//
   807  		// Due to a bug in Go versions before 1.24
   808  		// (see https://golang.org/issue/37397), the "/vendor/" prefix within
   809  		// a package path was not always correctly interpreted.
   810  		//
   811  		// This bug affected how vendored packages were identified in cases like:
   812  		//
   813  		//   - "pkg/vendor/vendor.go"   (incorrectly identified as vendored in pre-1.24)
   814  		//   - "pkg/vendor/foo/foo.go" (correctly identified as vendored)
   815  		//
   816  		// To correct this, in Go 1.24 and later, we skip the entire "/vendor/" prefix
   817  		// when it's part of a nested package path (as in the first example above).
   818  		// In earlier versions, we only skipped the length of "/vendor/", leading
   819  		// to the incorrect behavior.
   820  		if version.Compare(vers, "go1.24") >= 0 {
   821  			i = j + len("/vendor/")
   822  		} else {
   823  			i += len("/vendor/")
   824  		}
   825  	} else {
   826  		return false
   827  	}
   828  	return strings.Contains(name[i:], "/")
   829  }
   830  
   831  // Unzip extracts the contents of a module zip file to a directory.
   832  //
   833  // Unzip checks all restrictions listed in the package documentation and returns
   834  // an error if the zip archive is not valid. In some cases, files may be written
   835  // to dir before an error is returned (for example, if a file's uncompressed
   836  // size does not match its declared size).
   837  //
   838  // dir may or may not exist: Unzip will create it and any missing parent
   839  // directories if it doesn't exist. If dir exists, it must be empty.
   840  func Unzip(dir string, m module.Version, zipFile string) (err error) {
   841  	defer func() {
   842  		if err != nil {
   843  			err = &zipError{verb: "unzip", path: zipFile, err: err}
   844  		}
   845  	}()
   846  
   847  	// Check that the directory is empty. Don't create it yet in case there's
   848  	// an error reading the zip.
   849  	if files, _ := os.ReadDir(dir); len(files) > 0 {
   850  		return fmt.Errorf("target directory %v exists and is not empty", dir)
   851  	}
   852  
   853  	// Open the zip and check that it satisfies all restrictions.
   854  	f, err := os.Open(zipFile)
   855  	if err != nil {
   856  		return err
   857  	}
   858  	defer f.Close()
   859  	z, cf, err := checkZip(m, f)
   860  	if err != nil {
   861  		return err
   862  	}
   863  	if err := cf.Err(); err != nil {
   864  		return err
   865  	}
   866  
   867  	// Unzip, enforcing sizes declared in the zip file.
   868  	prefix := fmt.Sprintf("%s@%s/", m.Path, m.Version)
   869  	if err := os.MkdirAll(dir, 0777); err != nil {
   870  		return err
   871  	}
   872  	for _, zf := range z.File {
   873  		name := zf.Name[len(prefix):]
   874  		if name == "" || strings.HasSuffix(name, "/") {
   875  			continue
   876  		}
   877  		dst := filepath.Join(dir, name)
   878  		if err := os.MkdirAll(filepath.Dir(dst), 0777); err != nil {
   879  			return err
   880  		}
   881  		w, err := os.OpenFile(dst, os.O_WRONLY|os.O_CREATE|os.O_EXCL, 0444)
   882  		if err != nil {
   883  			return err
   884  		}
   885  		r, err := zf.Open()
   886  		if err != nil {
   887  			w.Close()
   888  			return err
   889  		}
   890  		lr := &io.LimitedReader{R: r, N: int64(zf.UncompressedSize64) + 1}
   891  		_, err = io.Copy(w, lr)
   892  		r.Close()
   893  		if err != nil {
   894  			w.Close()
   895  			return err
   896  		}
   897  		if err := w.Close(); err != nil {
   898  			return err
   899  		}
   900  		if lr.N <= 0 {
   901  			return fmt.Errorf("uncompressed size of file %s is larger than declared size (%d bytes)", zf.Name, zf.UncompressedSize64)
   902  		}
   903  	}
   904  
   905  	return nil
   906  }
   907  
   908  // collisionChecker finds case-insensitive name collisions and paths that
   909  // are listed as both files and directories.
   910  //
   911  // The keys of this map are processed with strToFold. pathInfo has the original
   912  // path for each folded path.
   913  type collisionChecker map[string]pathInfo
   914  
   915  type pathInfo struct {
   916  	path  string
   917  	isDir bool
   918  }
   919  
   920  func (cc collisionChecker) check(p string, isDir bool) error {
   921  	fold := strToFold(p)
   922  	if other, ok := cc[fold]; ok {
   923  		if p != other.path {
   924  			return fmt.Errorf("case-insensitive file name collision: %q and %q", other.path, p)
   925  		}
   926  		if isDir != other.isDir {
   927  			return fmt.Errorf("entry %q is both a file and a directory", p)
   928  		}
   929  		if !isDir {
   930  			return fmt.Errorf("multiple entries for file %q", p)
   931  		}
   932  		// It's not an error if check is called with the same directory multiple
   933  		// times. check is called recursively on parent directories, so check
   934  		// may be called on the same directory many times.
   935  	} else {
   936  		cc[fold] = pathInfo{path: p, isDir: isDir}
   937  	}
   938  
   939  	if parent := path.Dir(p); parent != "." {
   940  		return cc.check(parent, true)
   941  	}
   942  	return nil
   943  }
   944  
   945  // listFilesInDir walks the directory tree rooted at dir and returns a list of
   946  // files, as well as a list of directories and files that were skipped (for
   947  // example, nested modules and symbolic links).
   948  func listFilesInDir(dir string) (files []File, omitted []FileError, err error) {
   949  	// Extract the Go language version from the root "go.mod" file.
   950  	// This ensures we correctly interpret Go version-specific file omissions.
   951  	var vers string
   952  	if data, err := os.ReadFile(filepath.Join(dir, "go.mod")); err == nil {
   953  		vers = version.Lang(parseGoVers("go.mod", data))
   954  	}
   955  	err = filepath.Walk(dir, func(filePath string, info os.FileInfo, err error) error {
   956  		if err != nil {
   957  			return err
   958  		}
   959  		relPath, err := filepath.Rel(dir, filePath)
   960  		if err != nil {
   961  			return err
   962  		}
   963  		slashPath := filepath.ToSlash(relPath)
   964  
   965  		// Skip some subdirectories inside vendor.
   966  		// We would like Create and CreateFromDir to produce the same result
   967  		// for a set of files, whether expressed as a directory tree or zip.
   968  		if isVendoredPackage(slashPath, vers) {
   969  			omitted = append(omitted, FileError{Path: slashPath, Err: errVendored})
   970  			return nil
   971  		}
   972  
   973  		if info.IsDir() {
   974  			if filePath == dir {
   975  				// Don't skip the top-level directory.
   976  				return nil
   977  			}
   978  
   979  			// Skip VCS directories.
   980  			// fossil repos are regular files with arbitrary names, so we don't try
   981  			// to exclude them.
   982  			switch filepath.Base(filePath) {
   983  			case ".bzr", ".git", ".hg", ".svn":
   984  				omitted = append(omitted, FileError{Path: slashPath, Err: errVCS})
   985  				return filepath.SkipDir
   986  			}
   987  
   988  			// Skip submodules (directories containing go.mod files).
   989  			if goModInfo, err := os.Lstat(filepath.Join(filePath, "go.mod")); err == nil && !goModInfo.IsDir() {
   990  				omitted = append(omitted, FileError{Path: slashPath, Err: errSubmoduleDir})
   991  				return filepath.SkipDir
   992  			}
   993  			return nil
   994  		}
   995  
   996  		// Skip irregular files and files in vendor directories.
   997  		// Irregular files are ignored. They're typically symbolic links.
   998  		if !info.Mode().IsRegular() {
   999  			omitted = append(omitted, FileError{Path: slashPath, Err: errNotRegular})
  1000  			return nil
  1001  		}
  1002  
  1003  		files = append(files, dirFile{
  1004  			filePath:  filePath,
  1005  			slashPath: slashPath,
  1006  			info:      info,
  1007  		})
  1008  		return nil
  1009  	})
  1010  	if err != nil {
  1011  		return nil, nil, err
  1012  	}
  1013  	return files, omitted, nil
  1014  }
  1015  
  1016  type zipError struct {
  1017  	verb, path string
  1018  	err        error
  1019  }
  1020  
  1021  func (e *zipError) Error() string {
  1022  	if e.path == "" {
  1023  		return fmt.Sprintf("%s: %v", e.verb, e.err)
  1024  	} else {
  1025  		return fmt.Sprintf("%s %s: %v", e.verb, e.path, e.err)
  1026  	}
  1027  }
  1028  
  1029  func (e *zipError) Unwrap() error {
  1030  	return e.err
  1031  }
  1032  
  1033  // strToFold returns a string with the property that
  1034  //
  1035  //	strings.EqualFold(s, t) iff strToFold(s) == strToFold(t)
  1036  //
  1037  // This lets us test a large set of strings for fold-equivalent
  1038  // duplicates without making a quadratic number of calls
  1039  // to EqualFold. Note that strings.ToUpper and strings.ToLower
  1040  // do not have the desired property in some corner cases.
  1041  func strToFold(s string) string {
  1042  	// Fast path: all ASCII, no upper case.
  1043  	// Most paths look like this already.
  1044  	for i := 0; i < len(s); i++ {
  1045  		c := s[i]
  1046  		if c >= utf8.RuneSelf || 'A' <= c && c <= 'Z' {
  1047  			goto Slow
  1048  		}
  1049  	}
  1050  	return s
  1051  
  1052  Slow:
  1053  	var buf bytes.Buffer
  1054  	for _, r := range s {
  1055  		// SimpleFold(x) cycles to the next equivalent rune > x
  1056  		// or wraps around to smaller values. Iterate until it wraps,
  1057  		// and we've found the minimum value.
  1058  		for {
  1059  			r0 := r
  1060  			r = unicode.SimpleFold(r0)
  1061  			if r <= r0 {
  1062  				break
  1063  			}
  1064  		}
  1065  		// Exception to allow fast path above: A-Z => a-z
  1066  		if 'A' <= r && r <= 'Z' {
  1067  			r += 'a' - 'A'
  1068  		}
  1069  		buf.WriteRune(r)
  1070  	}
  1071  	return buf.String()
  1072  }
  1073  

View as plain text