diff options
Diffstat (limited to 'vendor/github.com/mholt/archiver/v3/zip.go')
-rw-r--r-- | vendor/github.com/mholt/archiver/v3/zip.go | 605 |
1 files changed, 605 insertions, 0 deletions
diff --git a/vendor/github.com/mholt/archiver/v3/zip.go b/vendor/github.com/mholt/archiver/v3/zip.go new file mode 100644 index 0000000000..192bf60766 --- /dev/null +++ b/vendor/github.com/mholt/archiver/v3/zip.go @@ -0,0 +1,605 @@ +package archiver + +import ( + "archive/zip" + "bytes" + "compress/flate" + "fmt" + "io" + "log" + "os" + "path" + "path/filepath" + "strings" +) + +// Zip provides facilities for operating ZIP archives. +// See https://pkware.cachefly.net/webdocs/casestudies/APPNOTE.TXT. +type Zip struct { + // The compression level to use, as described + // in the compress/flate package. + CompressionLevel int + + // Whether to overwrite existing files; if false, + // an error is returned if the file exists. + OverwriteExisting bool + + // Whether to make all the directories necessary + // to create a zip archive in the desired path. + MkdirAll bool + + // If enabled, selective compression will only + // compress files which are not already in a + // compressed format; this is decided based + // simply on file extension. + SelectiveCompression bool + + // A single top-level folder can be implicitly + // created by the Archive or Unarchive methods + // if the files to be added to the archive + // or the files to be extracted from the archive + // do not all have a common root. This roughly + // mimics the behavior of archival tools integrated + // into OS file browsers which create a subfolder + // to avoid unexpectedly littering the destination + // folder with potentially many files, causing a + // problematic cleanup/organization situation. + // This feature is available for both creation + // and extraction of archives, but may be slightly + // inefficient with lots and lots of files, + // especially on extraction. + ImplicitTopLevelFolder bool + + // If true, errors encountered during reading + // or writing a single file will be logged and + // the operation will continue on remaining files. + ContinueOnError bool + + zw *zip.Writer + zr *zip.Reader + ridx int +} + +// CheckExt ensures the file extension matches the format. +func (*Zip) CheckExt(filename string) error { + if !strings.HasSuffix(filename, ".zip") { + return fmt.Errorf("filename must have a .zip extension") + } + return nil +} + +// Archive creates a .zip file at destination containing +// the files listed in sources. The destination must end +// with ".zip". File paths can be those of regular files +// or directories. Regular files are stored at the 'root' +// of the archive, and directories are recursively added. +func (z *Zip) Archive(sources []string, destination string) error { + err := z.CheckExt(destination) + if err != nil { + return fmt.Errorf("checking extension: %v", err) + } + if !z.OverwriteExisting && fileExists(destination) { + return fmt.Errorf("file already exists: %s", destination) + } + + // make the folder to contain the resulting archive + // if it does not already exist + destDir := filepath.Dir(destination) + if z.MkdirAll && !fileExists(destDir) { + err := mkdir(destDir, 0755) + if err != nil { + return fmt.Errorf("making folder for destination: %v", err) + } + } + + out, err := os.Create(destination) + if err != nil { + return fmt.Errorf("creating %s: %v", destination, err) + } + defer out.Close() + + err = z.Create(out) + if err != nil { + return fmt.Errorf("creating zip: %v", err) + } + defer z.Close() + + var topLevelFolder string + if z.ImplicitTopLevelFolder && multipleTopLevels(sources) { + topLevelFolder = folderNameFromFileName(destination) + } + + for _, source := range sources { + err := z.writeWalk(source, topLevelFolder, destination) + if err != nil { + return fmt.Errorf("walking %s: %v", source, err) + } + } + + return nil +} + +// Unarchive unpacks the .zip file at source to destination. +// Destination will be treated as a folder name. +func (z *Zip) Unarchive(source, destination string) error { + if !fileExists(destination) && z.MkdirAll { + err := mkdir(destination, 0755) + if err != nil { + return fmt.Errorf("preparing destination: %v", err) + } + } + + file, err := os.Open(source) + if err != nil { + return fmt.Errorf("opening source file: %v", err) + } + defer file.Close() + + fileInfo, err := file.Stat() + if err != nil { + return fmt.Errorf("statting source file: %v", err) + } + + err = z.Open(file, fileInfo.Size()) + if err != nil { + return fmt.Errorf("opening zip archive for reading: %v", err) + } + defer z.Close() + + // if the files in the archive do not all share a common + // root, then make sure we extract to a single subfolder + // rather than potentially littering the destination... + if z.ImplicitTopLevelFolder { + files := make([]string, len(z.zr.File)) + for i := range z.zr.File { + files[i] = z.zr.File[i].Name + } + if multipleTopLevels(files) { + destination = filepath.Join(destination, folderNameFromFileName(source)) + } + } + + for { + err := z.extractNext(destination) + if err == io.EOF { + break + } + if err != nil { + if z.ContinueOnError { + log.Printf("[ERROR] Reading file in zip archive: %v", err) + continue + } + return fmt.Errorf("reading file in zip archive: %v", err) + } + } + + return nil +} + +func (z *Zip) extractNext(to string) error { + f, err := z.Read() + if err != nil { + return err // don't wrap error; calling loop must break on io.EOF + } + defer f.Close() + return z.extractFile(f, to) +} + +func (z *Zip) extractFile(f File, to string) error { + header, ok := f.Header.(zip.FileHeader) + if !ok { + return fmt.Errorf("expected header to be zip.FileHeader but was %T", f.Header) + } + + to = filepath.Join(to, header.Name) + + // if a directory, no content; simply make the directory and return + if f.IsDir() { + return mkdir(to, f.Mode()) + } + + // do not overwrite existing files, if configured + if !z.OverwriteExisting && fileExists(to) { + return fmt.Errorf("file already exists: %s", to) + } + + // extract symbolic links as symbolic links + if isSymlink(header.FileInfo()) { + // symlink target is the contents of the file + buf := new(bytes.Buffer) + _, err := io.Copy(buf, f) + if err != nil { + return fmt.Errorf("%s: reading symlink target: %v", header.Name, err) + } + return writeNewSymbolicLink(to, strings.TrimSpace(buf.String())) + } + + return writeNewFile(to, f, f.Mode()) +} + +func (z *Zip) writeWalk(source, topLevelFolder, destination string) error { + sourceInfo, err := os.Stat(source) + if err != nil { + return fmt.Errorf("%s: stat: %v", source, err) + } + destAbs, err := filepath.Abs(destination) + if err != nil { + return fmt.Errorf("%s: getting absolute path of destination %s: %v", source, destination, err) + } + + return filepath.Walk(source, func(fpath string, info os.FileInfo, err error) error { + handleErr := func(err error) error { + if z.ContinueOnError { + log.Printf("[ERROR] Walking %s: %v", fpath, err) + return nil + } + return err + } + if err != nil { + return handleErr(fmt.Errorf("traversing %s: %v", fpath, err)) + } + if info == nil { + return handleErr(fmt.Errorf("%s: no file info", fpath)) + } + + // make sure we do not copy the output file into the output + // file; that results in an infinite loop and disk exhaustion! + fpathAbs, err := filepath.Abs(fpath) + if err != nil { + return handleErr(fmt.Errorf("%s: getting absolute path: %v", fpath, err)) + } + if within(fpathAbs, destAbs) { + return nil + } + + // build the name to be used within the archive + nameInArchive, err := makeNameInArchive(sourceInfo, source, topLevelFolder, fpath) + if err != nil { + return handleErr(err) + } + + var file io.ReadCloser + if info.Mode().IsRegular() { + file, err = os.Open(fpath) + if err != nil { + return handleErr(fmt.Errorf("%s: opening: %v", fpath, err)) + } + defer file.Close() + } + err = z.Write(File{ + FileInfo: FileInfo{ + FileInfo: info, + CustomName: nameInArchive, + }, + ReadCloser: file, + }) + if err != nil { + return handleErr(fmt.Errorf("%s: writing: %s", fpath, err)) + } + + return nil + }) +} + +// Create opens z for writing a ZIP archive to out. +func (z *Zip) Create(out io.Writer) error { + if z.zw != nil { + return fmt.Errorf("zip archive is already created for writing") + } + z.zw = zip.NewWriter(out) + if z.CompressionLevel != flate.DefaultCompression { + z.zw.RegisterCompressor(zip.Deflate, func(out io.Writer) (io.WriteCloser, error) { + return flate.NewWriter(out, z.CompressionLevel) + }) + } + return nil +} + +// Write writes f to z, which must have been opened for writing first. +func (z *Zip) Write(f File) error { + if z.zw == nil { + return fmt.Errorf("zip archive was not created for writing first") + } + if f.FileInfo == nil { + return fmt.Errorf("no file info") + } + if f.FileInfo.Name() == "" { + return fmt.Errorf("missing file name") + } + + header, err := zip.FileInfoHeader(f) + if err != nil { + return fmt.Errorf("%s: getting header: %v", f.Name(), err) + } + + if f.IsDir() { + header.Name += "/" // required - strangely no mention of this in zip spec? but is in godoc... + header.Method = zip.Store + } else { + ext := strings.ToLower(path.Ext(header.Name)) + if _, ok := compressedFormats[ext]; ok && z.SelectiveCompression { + header.Method = zip.Store + } else { + header.Method = zip.Deflate + } + } + + writer, err := z.zw.CreateHeader(header) + if err != nil { + return fmt.Errorf("%s: making header: %v", f.Name(), err) + } + + return z.writeFile(f, writer) +} + +func (z *Zip) writeFile(f File, writer io.Writer) error { + if f.IsDir() { + return nil // directories have no contents + } + if isSymlink(f) { + // file body for symlinks is the symlink target + linkTarget, err := os.Readlink(f.Name()) + if err != nil { + return fmt.Errorf("%s: readlink: %v", f.Name(), err) + } + _, err = writer.Write([]byte(filepath.ToSlash(linkTarget))) + if err != nil { + return fmt.Errorf("%s: writing symlink target: %v", f.Name(), err) + } + return nil + } + + if f.ReadCloser == nil { + return fmt.Errorf("%s: no way to read file contents", f.Name()) + } + _, err := io.Copy(writer, f) + if err != nil { + return fmt.Errorf("%s: copying contents: %v", f.Name(), err) + } + + return nil +} + +// Open opens z for reading an archive from in, +// which is expected to have the given size and +// which must be an io.ReaderAt. +func (z *Zip) Open(in io.Reader, size int64) error { + inRdrAt, ok := in.(io.ReaderAt) + if !ok { + return fmt.Errorf("reader must be io.ReaderAt") + } + if z.zr != nil { + return fmt.Errorf("zip archive is already open for reading") + } + var err error + z.zr, err = zip.NewReader(inRdrAt, size) + if err != nil { + return fmt.Errorf("creating reader: %v", err) + } + z.ridx = 0 + return nil +} + +// Read reads the next file from z, which must have +// already been opened for reading. If there are no +// more files, the error is io.EOF. The File must +// be closed when finished reading from it. +func (z *Zip) Read() (File, error) { + if z.zr == nil { + return File{}, fmt.Errorf("zip archive is not open") + } + if z.ridx >= len(z.zr.File) { + return File{}, io.EOF + } + + // access the file and increment counter so that + // if there is an error processing this file, the + // caller can still iterate to the next file + zf := z.zr.File[z.ridx] + z.ridx++ + + file := File{ + FileInfo: zf.FileInfo(), + Header: zf.FileHeader, + } + + rc, err := zf.Open() + if err != nil { + return file, fmt.Errorf("%s: open compressed file: %v", zf.Name, err) + } + file.ReadCloser = rc + + return file, nil +} + +// Close closes the zip archive(s) opened by Create and Open. +func (z *Zip) Close() error { + if z.zr != nil { + z.zr = nil + } + if z.zw != nil { + zw := z.zw + z.zw = nil + return zw.Close() + } + return nil +} + +// Walk calls walkFn for each visited item in archive. +func (z *Zip) Walk(archive string, walkFn WalkFunc) error { + zr, err := zip.OpenReader(archive) + if err != nil { + return fmt.Errorf("opening zip reader: %v", err) + } + defer zr.Close() + + for _, zf := range zr.File { + zfrc, err := zf.Open() + if err != nil { + zfrc.Close() + if z.ContinueOnError { + log.Printf("[ERROR] Opening %s: %v", zf.Name, err) + continue + } + return fmt.Errorf("opening %s: %v", zf.Name, err) + } + + err = walkFn(File{ + FileInfo: zf.FileInfo(), + Header: zf.FileHeader, + ReadCloser: zfrc, + }) + zfrc.Close() + if err != nil { + if err == ErrStopWalk { + break + } + if z.ContinueOnError { + log.Printf("[ERROR] Walking %s: %v", zf.Name, err) + continue + } + return fmt.Errorf("walking %s: %v", zf.Name, err) + } + } + + return nil +} + +// Extract extracts a single file from the zip archive. +// If the target is a directory, the entire folder will +// be extracted into destination. +func (z *Zip) Extract(source, target, destination string) error { + // target refers to a path inside the archive, which should be clean also + target = path.Clean(target) + + // if the target ends up being a directory, then + // we will continue walking and extracting files + // until we are no longer within that directory + var targetDirPath string + + return z.Walk(source, func(f File) error { + zfh, ok := f.Header.(zip.FileHeader) + if !ok { + return fmt.Errorf("expected header to be zip.FileHeader but was %T", f.Header) + } + + // importantly, cleaning the path strips tailing slash, + // which must be appended to folders within the archive + name := path.Clean(zfh.Name) + if f.IsDir() && target == name { + targetDirPath = path.Dir(name) + } + + if within(target, zfh.Name) { + // either this is the exact file we want, or is + // in the directory we want to extract + + // build the filename we will extract to + end, err := filepath.Rel(targetDirPath, zfh.Name) + if err != nil { + return fmt.Errorf("relativizing paths: %v", err) + } + joined := filepath.Join(destination, end) + + err = z.extractFile(f, joined) + if err != nil { + return fmt.Errorf("extracting file %s: %v", zfh.Name, err) + } + + // if our target was not a directory, stop walk + if targetDirPath == "" { + return ErrStopWalk + } + } else if targetDirPath != "" { + // finished walking the entire directory + return ErrStopWalk + } + + return nil + }) +} + +// Match returns true if the format of file matches this +// type's format. It should not affect reader position. +func (*Zip) Match(file io.ReadSeeker) (bool, error) { + currentPos, err := file.Seek(0, io.SeekCurrent) + if err != nil { + return false, err + } + _, err = file.Seek(0, 0) + if err != nil { + return false, err + } + defer file.Seek(currentPos, io.SeekStart) + + buf := make([]byte, 4) + if n, err := file.Read(buf); err != nil || n < 4 { + return false, nil + } + return bytes.Equal(buf, []byte("PK\x03\x04")), nil +} + +func (z *Zip) String() string { return "zip" } + +// NewZip returns a new, default instance ready to be customized and used. +func NewZip() *Zip { + return &Zip{ + CompressionLevel: flate.DefaultCompression, + MkdirAll: true, + SelectiveCompression: true, + } +} + +// Compile-time checks to ensure type implements desired interfaces. +var ( + _ = Reader(new(Zip)) + _ = Writer(new(Zip)) + _ = Archiver(new(Zip)) + _ = Unarchiver(new(Zip)) + _ = Walker(new(Zip)) + _ = Extractor(new(Zip)) + _ = Matcher(new(Zip)) + _ = ExtensionChecker(new(Zip)) +) + +// compressedFormats is a (non-exhaustive) set of lowercased +// file extensions for formats that are typically already +// compressed. Compressing files that are already compressed +// is inefficient, so use this set of extension to avoid that. +var compressedFormats = map[string]struct{}{ + ".7z": {}, + ".avi": {}, + ".br": {}, + ".bz2": {}, + ".cab": {}, + ".docx": {}, + ".gif": {}, + ".gz": {}, + ".jar": {}, + ".jpeg": {}, + ".jpg": {}, + ".lz": {}, + ".lz4": {}, + ".lzma": {}, + ".m4v": {}, + ".mov": {}, + ".mp3": {}, + ".mp4": {}, + ".mpeg": {}, + ".mpg": {}, + ".png": {}, + ".pptx": {}, + ".rar": {}, + ".sz": {}, + ".tbz2": {}, + ".tgz": {}, + ".tsz": {}, + ".txz": {}, + ".xlsx": {}, + ".xz": {}, + ".zip": {}, + ".zipx": {}, +} + +// DefaultZip is a default instance that is conveniently ready to use. +var DefaultZip = NewZip() |