Go语言学习#14-实现tar.gz格式的压缩解压


=Start=

缘由:

在学习Go 语言的过程中整理总结一下常见功能的实现代码,方便以后需要的时候参考。

正文:

参考解答:

话不多说,先上代码。先可以运行,再逐步按需理解和改进。

package main

import (
    "archive/tar"
    "compress/gzip"
    "flag"
    "fmt"
    "io"
    "log"
    "os"
    "path"
)

// main functions shows how to TarGz a directory and
// UnTarGz a file
func main() {
    var srcDirPath = *flag.String("srcDir", "testdata", "The source directory name.")
    var targetFilePath = *flag.String("targetName", "testdata.tar.gz", "The target file name.")
    flag.Parse()
    fmt.Printf("srcDirPath: %s, type: %T\n", srcDirPath, srcDirPath)
    fmt.Printf("targetFilePath: %s, type: %T\n", targetFilePath, targetFilePath)
    TarGz(srcDirPath, targetFilePath) // 压缩
    UnTarGz(targetFilePath, srcDirPath+"_temp") // 解压
}

// Gzip and tar from source directory or file to destination file
// you need check file exist before you call this function
func TarGz(srcDirPath string, destFilePath string) {
    fw, err := os.Create(destFilePath)
    handleError(err)
    defer fw.Close()

    // Gzip writer
    gw := gzip.NewWriter(fw)
    defer gw.Close()

    // Tar writer
    tw := tar.NewWriter(gw)
    defer tw.Close()

    // Check if it's a file or a directory
    f, err := os.Open(srcDirPath)
    handleError(err)
    fi, err := f.Stat()
    handleError(err)
    if fi.IsDir() {
        // handle source directory
        fmt.Println("Cerating tar.gz from directory...")
        tarGzDir(srcDirPath, path.Base(srcDirPath), tw)
    } else {
        // handle file directly
        fmt.Println("Cerating tar.gz from " + fi.Name() + "...")
        tarGzFile(srcDirPath, fi.Name(), tw, fi)
    }
    fmt.Println("Well done!")
}

// Deal with directories
// if find files, handle them with tarGzFile
// Every recurrence append the base path to the recPath
// recPath is the path inside of tar.gz
func tarGzDir(srcDirPath string, recPath string, tw *tar.Writer) {
    // Open source diretory
    dir, err := os.Open(srcDirPath)
    handleError(err)
    defer dir.Close()

    // Get file info slice
    fis, err := dir.Readdir(0)
    handleError(err)
    for _, fi := range fis {
        // Append path
        curPath := srcDirPath + "/" + fi.Name()
        // Check it is directory or file
        if fi.IsDir() {
            // Directory
            // (Directory won't add unitl all subfiles are added)
            fmt.Printf("Adding path...%s\\n", curPath)
            tarGzDir(curPath, recPath+"/"+fi.Name(), tw)
        } else {
            // File
            fmt.Printf("Adding file...%s\\n", curPath)
        }

        tarGzFile(curPath, recPath+"/"+fi.Name(), tw, fi)
    }
}

// Deal with files
func tarGzFile(srcFile string, recPath string, tw *tar.Writer, fi os.FileInfo) {
    if fi.IsDir() {
        // Create tar header
        hdr := new(tar.Header)
        // if last character of header name is '/' it also can be directory
        // but if you don't set Typeflag, error will occur when you untargz
        hdr.Name = recPath + "/"
        hdr.Typeflag = tar.TypeDir
        hdr.Size = 0
        //hdr.Mode = 0755 | c_ISDIR
        hdr.Mode = int64(fi.Mode())
        hdr.ModTime = fi.ModTime()

        // Write hander
        err := tw.WriteHeader(hdr)
        handleError(err)
    } else {
        // File reader
        fr, err := os.Open(srcFile)
        handleError(err)
        defer fr.Close()

        // Create tar header
        hdr := new(tar.Header)
        hdr.Name = recPath
        hdr.Size = fi.Size()
        hdr.Mode = int64(fi.Mode())
        hdr.ModTime = fi.ModTime()

        // Write hander
        err = tw.WriteHeader(hdr)
        handleError(err)

        // Write file data
        _, err = io.Copy(tw, fr)
        handleError(err)
    }
}

// Ungzip and untar from source file to destination directory
// you need check file exist before you call this function
func UnTarGz(srcFilePath string, destDirPath string) {
    fmt.Println("UnTarGzing " + srcFilePath + "...")
    // Create destination directory
    os.Mkdir(destDirPath, os.ModePerm)

    fr, err := os.Open(srcFilePath)
    handleError(err)
    defer fr.Close()

    // Gzip reader
    gr, err := gzip.NewReader(fr)

    // Tar reader
    tr := tar.NewReader(gr)

    for {
        hdr, err := tr.Next()
        if err == io.EOF {
            // End of tar archive
            break
        }
        //handleError(err)
        fmt.Println("UnTarGzing file..." + hdr.Name)
        // Check if it is diretory or file
        if hdr.Typeflag != tar.TypeDir {
            // Get files from archive
            // Create diretory before create file
            os.MkdirAll(destDirPath+"/"+path.Dir(hdr.Name), os.ModePerm)
            // Write data to file
            fw, _ := os.Create(destDirPath + "/" + hdr.Name)
            handleError(err)
            _, err = io.Copy(fw, tr)
            handleError(err)
        }
    }
    fmt.Println("Well done!")
}

func handleError(err error) {
    log.Println(err)
}

&

package main

import (
    "archive/tar"
    "compress/gzip"
    "flag"
    "fmt"
    "io"
    "os"
    "path/filepath"
    "strings"
)

func main() {
    var srcDirPath = *flag.String("srcDir", "testdata", "The source directory name.")
    var targetFilePath = *flag.String("targetName", "testdata.tar.gz", "The target file name.")
    flag.Parse()
    fmt.Printf("srcDirPath: %s, type: %T\n", srcDirPath, srcDirPath)
    fmt.Printf("targetFilePath: %s, type: %T\n", targetFilePath, targetFilePath)

    TarFilesDirs(srcDirPath, targetFilePath) // 仅压缩,解压可以使用tar命令进行
}

func TarFilesDirs(path string, tarFilePath string) error {
    file, err := os.Create(tarFilePath)
    if err != nil {
        return err
    }

    defer file.Close()
    gz := gzip.NewWriter(file)
    defer gz.Close()

    tw := tar.NewWriter(gz)
    defer tw.Close()

    if err := tarit(path, tw); err != nil {
        return err
    }

    return nil
}

func tarit(source string, tw *tar.Writer) error {
    info, err := os.Stat(source)
    if err != nil {
        return nil
    }
    var baseDir string
    if info.IsDir() {
        baseDir = filepath.Base(source)
    }

    return filepath.Walk(source,
        func(path string, info os.FileInfo, err error) error {
            if err != nil {
                return err
            }

            var link string
            if info.Mode()&os.ModeSymlink != 0 {
                if link, err = os.Readlink(path); err != nil {
                    return err
                }
            }
            header, err := tar.FileInfoHeader(info, link)
            if err != nil {
                return err
            }
            if baseDir != "" {
                header.Name = filepath.Join(baseDir, strings.TrimPrefix(path, source))
            }

            if !info.Mode().IsRegular() { //nothing more to do for non-regular
                return nil
            }
            if err := tw.WriteHeader(header); err != nil {
                return err
            }

            if info.IsDir() {
                return nil
            }

            file, err := os.Open(path)
            if err != nil {
                return err
            }

            defer file.Close()

            buf := make([]byte, 16)
            if _, err = io.CopyBuffer(tw, file, buf); err != nil {
                return err
            }

            return nil
        })
}

 

参考链接:

=END=

, ,

《 “Go语言学习#14-实现tar.gz格式的压缩解压” 》 有 3 条评论

  1. 【Go】使用压缩文件优化io (一)
    https://blog.thinkeridea.com/201906/go/compress_file_io_optimization1.html
    `
    最近遇到一个日志备份 io 过高的问题,业务日志每十分钟备份一次,本来是用 Python 写一个根据规则扫描备份日志问题不大,但是随着业务越来越多,单机上的日志文件越来越大,文件数量也越来越多,导致每每备份的瞬间 io 阻塞严重, CPU 和 load 异常的高,好在备份速度很快,对业务影响不是很大,这个问题会随着业务增长,越来越明显,这段时间抽空对备份方式做了优化,效果十分显著,整理篇文章记录一下。
    `
    【Go】使用压缩文件优化io (二)
    https://blog.thinkeridea.com/201907/go/compress_file_io_optimization2.html

发表回复

您的邮箱地址不会被公开。 必填项已用 * 标注