aboutsummaryrefslogtreecommitdiffstats
path: root/routers/api/actions/artifacts_chunks.go
blob: 36432a0ca084e593337df31fc139b70ea0c678c7 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
// Copyright 2023 The Gitea Authors. All rights reserved.
// SPDX-License-Identifier: MIT

package actions

import (
	"crypto/md5"
	"encoding/base64"
	"fmt"
	"io"
	"path/filepath"
	"sort"
	"time"

	"code.gitea.io/gitea/models/actions"
	"code.gitea.io/gitea/models/db"
	"code.gitea.io/gitea/modules/log"
	"code.gitea.io/gitea/modules/storage"
)

func saveUploadChunk(st storage.ObjectStorage, ctx *ArtifactContext,
	artifact *actions.ActionArtifact,
	contentSize, runID int64,
) (int64, error) {
	// parse content-range header, format: bytes 0-1023/146515
	contentRange := ctx.Req.Header.Get("Content-Range")
	start, end, length := int64(0), int64(0), int64(0)
	if _, err := fmt.Sscanf(contentRange, "bytes %d-%d/%d", &start, &end, &length); err != nil {
		log.Warn("parse content range error: %v, content-range: %s", err, contentRange)
		return -1, fmt.Errorf("parse content range error: %v", err)
	}
	// build chunk store path
	storagePath := fmt.Sprintf("tmp%d/%d-%d-%d-%d.chunk", runID, runID, artifact.ID, start, end)
	// use io.TeeReader to avoid reading all body to md5 sum.
	// it writes data to hasher after reading end
	// if hash is not matched, delete the read-end result
	hasher := md5.New()
	r := io.TeeReader(ctx.Req.Body, hasher)
	// save chunk to storage
	writtenSize, err := st.Save(storagePath, r, -1)
	if err != nil {
		return -1, fmt.Errorf("save chunk to storage error: %v", err)
	}
	// check md5
	reqMd5String := ctx.Req.Header.Get(artifactXActionsResultsMD5Header)
	chunkMd5String := base64.StdEncoding.EncodeToString(hasher.Sum(nil))
	log.Info("[artifact] check chunk md5, sum: %s, header: %s", chunkMd5String, reqMd5String)
	// if md5 not match, delete the chunk
	if reqMd5String != chunkMd5String || writtenSize != contentSize {
		if err := st.Delete(storagePath); err != nil {
			log.Error("Error deleting chunk: %s, %v", storagePath, err)
		}
		return -1, fmt.Errorf("md5 not match")
	}
	log.Info("[artifact] save chunk %s, size: %d, artifact id: %d, start: %d, end: %d",
		storagePath, contentSize, artifact.ID, start, end)
	// return chunk total size
	return length, nil
}

type chunkFileItem struct {
	RunID      int64
	ArtifactID int64
	Start      int64
	End        int64
	Path       string
}

func listChunksByRunID(st storage.ObjectStorage, runID int64) (map[int64][]*chunkFileItem, error) {
	storageDir := fmt.Sprintf("tmp%d", runID)
	var chunks []*chunkFileItem
	if err := st.IterateObjects(storageDir, func(fpath string, obj storage.Object) error {
		baseName := filepath.Base(fpath)
		// when read chunks from storage, it only contains storage dir and basename,
		// no matter the subdirectory setting in storage config
		item := chunkFileItem{Path: storageDir + "/" + baseName}
		if _, err := fmt.Sscanf(baseName, "%d-%d-%d-%d.chunk", &item.RunID, &item.ArtifactID, &item.Start, &item.End); err != nil {
			return fmt.Errorf("parse content range error: %v", err)
		}
		chunks = append(chunks, &item)
		return nil
	}); err != nil {
		return nil, err
	}
	// chunks group by artifact id
	chunksMap := make(map[int64][]*chunkFileItem)
	for _, c := range chunks {
		chunksMap[c.ArtifactID] = append(chunksMap[c.ArtifactID], c)
	}
	return chunksMap, nil
}

func mergeChunksForRun(ctx *ArtifactContext, st storage.ObjectStorage, runID int64, artifactName string) error {
	// read all db artifacts by name
	artifacts, err := db.Find[actions.ActionArtifact](ctx, actions.FindArtifactsOptions{
		RunID:        runID,
		ArtifactName: artifactName,
	})
	if err != nil {
		return err
	}
	// read all uploading chunks from storage
	chunksMap, err := listChunksByRunID(st, runID)
	if err != nil {
		return err
	}
	// range db artifacts to merge chunks
	for _, art := range artifacts {
		chunks, ok := chunksMap[art.ID]
		if !ok {
			log.Debug("artifact %d chunks not found", art.ID)
			continue
		}
		if err := mergeChunksForArtifact(ctx, chunks, st, art); err != nil {
			return err
		}
	}
	return nil
}

func mergeChunksForArtifact(ctx *ArtifactContext, chunks []*chunkFileItem, st storage.ObjectStorage, artifact *actions.ActionArtifact) error {
	sort.Slice(chunks, func(i, j int) bool {
		return chunks[i].Start < chunks[j].Start
	})
	allChunks := make([]*chunkFileItem, 0)
	startAt := int64(-1)
	// check if all chunks are uploaded and in order and clean repeated chunks
	for _, c := range chunks {
		// startAt is -1 means this is the first chunk
		// previous c.ChunkEnd + 1 == c.ChunkStart means this chunk is in order
		// StartAt is not -1 and c.ChunkStart is not startAt + 1 means there is a chunk missing
		if c.Start == (startAt + 1) {
			allChunks = append(allChunks, c)
			startAt = c.End
		}
	}
	// if the last chunk.End + 1 is not equal to chunk.ChunkLength, means chunks are not uploaded completely
	if startAt+1 != artifact.FileCompressedSize {
		log.Debug("[artifact] chunks are not uploaded completely, artifact_id: %d", artifact.ID)
		return nil
	}
	// use multiReader
	readers := make([]io.Reader, 0, len(allChunks))
	closeReaders := func() {
		for _, r := range readers {
			_ = r.(io.Closer).Close() // it guarantees to be io.Closer by the following loop's Open function
		}
		readers = nil
	}
	defer closeReaders()
	for _, c := range allChunks {
		var readCloser io.ReadCloser
		var err error
		if readCloser, err = st.Open(c.Path); err != nil {
			return fmt.Errorf("open chunk error: %v, %s", err, c.Path)
		}
		readers = append(readers, readCloser)
	}
	mergedReader := io.MultiReader(readers...)

	// if chunk is gzip, use gz as extension
	// download-artifact action will use content-encoding header to decide if it should decompress the file
	extension := "chunk"
	if artifact.ContentEncoding == "gzip" {
		extension = "chunk.gz"
	}

	// save merged file
	storagePath := fmt.Sprintf("%d/%d/%d.%s", artifact.RunID%255, artifact.ID%255, time.Now().UnixNano(), extension)
	written, err := st.Save(storagePath, mergedReader, -1)
	if err != nil {
		return fmt.Errorf("save merged file error: %v", err)
	}
	if written != artifact.FileCompressedSize {
		return fmt.Errorf("merged file size is not equal to chunk length")
	}

	defer func() {
		closeReaders() // close before delete
		// drop chunks
		for _, c := range chunks {
			if err := st.Delete(c.Path); err != nil {
				log.Warn("Error deleting chunk: %s, %v", c.Path, err)
			}
		}
	}()

	// save storage path to artifact
	log.Debug("[artifact] merge chunks to artifact: %d, %s", artifact.ID, storagePath)
	artifact.StoragePath = storagePath
	artifact.Status = int64(actions.ArtifactStatusUploadConfirmed)
	if err := actions.UpdateArtifactByID(ctx, artifact.ID, artifact); err != nil {
		return fmt.Errorf("update artifact error: %v", err)
	}

	return nil
}