aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorzeripath <art27@cantab.net>2023-02-28 22:55:43 +0000
committerGitHub <noreply@github.com>2023-02-28 17:55:43 -0500
commit27e49cd01cf33c7adfd7e1a897d95884f7714aca (patch)
tree149f4712bdfaf48172996c477ecf7cbf6e72113e
parent04347eb810689db799003cc342bbbc756716ff12 (diff)
downloadgitea-27e49cd01cf33c7adfd7e1a897d95884f7714aca.tar.gz
gitea-27e49cd01cf33c7adfd7e1a897d95884f7714aca.zip
Properly flush unique queues on startup (#23154)
There have been a number of reports of PRs being blocked whilst being checked which have been difficult to debug. In investigating #23050 I have realised that whilst the Warn there is somewhat of a miscall there was a real bug in the way that the LevelUniqueQueue was being restored on start-up of the PersistableChannelUniqueQueue. Next there is a conflict in the setting of the internal leveldb queue name - This wasn't being set so it was being overridden by other unique queues. This PR fixes these bugs and adds a testcase. Thanks to @brechtvl for noticing the second issue. Fix #23050 and others --------- Signed-off-by: Andrew Thornton <art27@cantab.net> Co-authored-by: techknowlogick <techknowlogick@gitea.io>
-rw-r--r--modules/queue/queue_channel.go5
-rw-r--r--modules/queue/queue_disk_channel.go29
-rw-r--r--modules/queue/queue_disk_channel_test.go8
-rw-r--r--modules/queue/unique_queue_channel.go4
-rw-r--r--modules/queue/unique_queue_channel_test.go7
-rw-r--r--modules/queue/unique_queue_disk_channel.go41
-rw-r--r--modules/queue/unique_queue_disk_channel_test.go259
7 files changed, 332 insertions, 21 deletions
diff --git a/modules/queue/queue_channel.go b/modules/queue/queue_channel.go
index 6f75b8357e..baac097393 100644
--- a/modules/queue/queue_channel.go
+++ b/modules/queue/queue_channel.go
@@ -124,7 +124,10 @@ func (q *ChannelQueue) Shutdown() {
log.Trace("ChannelQueue: %s Flushing", q.name)
// We can't use Cleanup here because that will close the channel
if err := q.FlushWithContext(q.terminateCtx); err != nil {
- log.Warn("ChannelQueue: %s Terminated before completed flushing", q.name)
+ count := atomic.LoadInt64(&q.numInQueue)
+ if count > 0 {
+ log.Warn("ChannelQueue: %s Terminated before completed flushing", q.name)
+ }
return
}
log.Debug("ChannelQueue: %s Flushed", q.name)
diff --git a/modules/queue/queue_disk_channel.go b/modules/queue/queue_disk_channel.go
index c7526714c6..91f91f0dfc 100644
--- a/modules/queue/queue_disk_channel.go
+++ b/modules/queue/queue_disk_channel.go
@@ -94,7 +94,8 @@ func NewPersistableChannelQueue(handle HandlerFunc, cfg, exemplar interface{}) (
},
Workers: 0,
},
- DataDir: config.DataDir,
+ DataDir: config.DataDir,
+ QueueName: config.Name + "-level",
}
levelQueue, err := NewLevelQueue(wrappedHandle, levelCfg, exemplar)
@@ -172,16 +173,18 @@ func (q *PersistableChannelQueue) Run(atShutdown, atTerminate func(func())) {
atShutdown(q.Shutdown)
atTerminate(q.Terminate)
- if lq, ok := q.internal.(*LevelQueue); ok && lq.byteFIFO.Len(lq.shutdownCtx) != 0 {
+ if lq, ok := q.internal.(*LevelQueue); ok && lq.byteFIFO.Len(lq.terminateCtx) != 0 {
// Just run the level queue - we shut it down once it's flushed
go q.internal.Run(func(_ func()) {}, func(_ func()) {})
go func() {
- for !q.IsEmpty() {
- _ = q.internal.Flush(0)
+ for !lq.IsEmpty() {
+ _ = lq.Flush(0)
select {
case <-time.After(100 * time.Millisecond):
- case <-q.internal.(*LevelQueue).shutdownCtx.Done():
- log.Warn("LevelQueue: %s shut down before completely flushed", q.internal.(*LevelQueue).Name())
+ case <-lq.shutdownCtx.Done():
+ if lq.byteFIFO.Len(lq.terminateCtx) > 0 {
+ log.Warn("LevelQueue: %s shut down before completely flushed", q.internal.(*LevelQueue).Name())
+ }
return
}
}
@@ -316,10 +319,22 @@ func (q *PersistableChannelQueue) Shutdown() {
// Redirect all remaining data in the chan to the internal channel
log.Trace("PersistableChannelQueue: %s Redirecting remaining data", q.delayedStarter.name)
close(q.channelQueue.dataChan)
+ countOK, countLost := 0, 0
for data := range q.channelQueue.dataChan {
- _ = q.internal.Push(data)
+ err := q.internal.Push(data)
+ if err != nil {
+ log.Error("PersistableChannelQueue: %s Unable redirect %v due to: %v", q.delayedStarter.name, data, err)
+ countLost++
+ } else {
+ countOK++
+ }
atomic.AddInt64(&q.channelQueue.numInQueue, -1)
}
+ if countLost > 0 {
+ log.Warn("PersistableChannelQueue: %s %d will be restored on restart, %d lost", q.delayedStarter.name, countOK, countLost)
+ } else if countOK > 0 {
+ log.Warn("PersistableChannelQueue: %s %d will be restored on restart", q.delayedStarter.name, countOK)
+ }
log.Trace("PersistableChannelQueue: %s Done Redirecting remaining data", q.delayedStarter.name)
log.Debug("PersistableChannelQueue: %s Shutdown", q.delayedStarter.name)
diff --git a/modules/queue/queue_disk_channel_test.go b/modules/queue/queue_disk_channel_test.go
index 318610355e..4f14a5d79d 100644
--- a/modules/queue/queue_disk_channel_test.go
+++ b/modules/queue/queue_disk_channel_test.go
@@ -39,7 +39,7 @@ func TestPersistableChannelQueue(t *testing.T) {
Workers: 1,
BoostWorkers: 0,
MaxWorkers: 10,
- Name: "first",
+ Name: "test-queue",
}, &testData{})
assert.NoError(t, err)
@@ -135,7 +135,7 @@ func TestPersistableChannelQueue(t *testing.T) {
Workers: 1,
BoostWorkers: 0,
MaxWorkers: 10,
- Name: "second",
+ Name: "test-queue",
}, &testData{})
assert.NoError(t, err)
@@ -227,7 +227,7 @@ func TestPersistableChannelQueue_Pause(t *testing.T) {
Workers: 1,
BoostWorkers: 0,
MaxWorkers: 10,
- Name: "first",
+ Name: "test-queue",
}, &testData{})
assert.NoError(t, err)
@@ -433,7 +433,7 @@ func TestPersistableChannelQueue_Pause(t *testing.T) {
Workers: 1,
BoostWorkers: 0,
MaxWorkers: 10,
- Name: "second",
+ Name: "test-queue",
}, &testData{})
assert.NoError(t, err)
pausable, ok = queue.(Pausable)
diff --git a/modules/queue/unique_queue_channel.go b/modules/queue/unique_queue_channel.go
index c43bd1db3f..62c051aa39 100644
--- a/modules/queue/unique_queue_channel.go
+++ b/modules/queue/unique_queue_channel.go
@@ -177,7 +177,9 @@ func (q *ChannelUniqueQueue) Shutdown() {
go func() {
log.Trace("ChannelUniqueQueue: %s Flushing", q.name)
if err := q.FlushWithContext(q.terminateCtx); err != nil {
- log.Warn("ChannelUniqueQueue: %s Terminated before completed flushing", q.name)
+ if !q.IsEmpty() {
+ log.Warn("ChannelUniqueQueue: %s Terminated before completed flushing", q.name)
+ }
return
}
log.Debug("ChannelUniqueQueue: %s Flushed", q.name)
diff --git a/modules/queue/unique_queue_channel_test.go b/modules/queue/unique_queue_channel_test.go
index 9372694b87..824015b834 100644
--- a/modules/queue/unique_queue_channel_test.go
+++ b/modules/queue/unique_queue_channel_test.go
@@ -8,10 +8,13 @@ import (
"testing"
"time"
+ "code.gitea.io/gitea/modules/log"
+
"github.com/stretchr/testify/assert"
)
func TestChannelUniqueQueue(t *testing.T) {
+ _ = log.NewLogger(1000, "console", "console", `{"level":"warn","stacktracelevel":"NONE","stderr":true}`)
handleChan := make(chan *testData)
handle := func(data ...Data) []Data {
for _, datum := range data {
@@ -52,6 +55,8 @@ func TestChannelUniqueQueue(t *testing.T) {
}
func TestChannelUniqueQueue_Batch(t *testing.T) {
+ _ = log.NewLogger(1000, "console", "console", `{"level":"warn","stacktracelevel":"NONE","stderr":true}`)
+
handleChan := make(chan *testData)
handle := func(data ...Data) []Data {
for _, datum := range data {
@@ -98,6 +103,8 @@ func TestChannelUniqueQueue_Batch(t *testing.T) {
}
func TestChannelUniqueQueue_Pause(t *testing.T) {
+ _ = log.NewLogger(1000, "console", "console", `{"level":"warn","stacktracelevel":"NONE","stderr":true}`)
+
lock := sync.Mutex{}
var queue Queue
var err error
diff --git a/modules/queue/unique_queue_disk_channel.go b/modules/queue/unique_queue_disk_channel.go
index 405726182d..cc8a807c67 100644
--- a/modules/queue/unique_queue_disk_channel.go
+++ b/modules/queue/unique_queue_disk_channel.go
@@ -94,7 +94,8 @@ func NewPersistableChannelUniqueQueue(handle HandlerFunc, cfg, exemplar interfac
},
Workers: 0,
},
- DataDir: config.DataDir,
+ DataDir: config.DataDir,
+ QueueName: config.Name + "-level",
}
queue.channelQueue = channelUniqueQueue.(*ChannelUniqueQueue)
@@ -209,17 +210,29 @@ func (q *PersistableChannelUniqueQueue) Run(atShutdown, atTerminate func(func())
atTerminate(q.Terminate)
_ = q.channelQueue.AddWorkers(q.channelQueue.workers, 0)
- if luq, ok := q.internal.(*LevelUniqueQueue); ok && luq.ByteFIFOUniqueQueue.byteFIFO.Len(luq.shutdownCtx) != 0 {
+ if luq, ok := q.internal.(*LevelUniqueQueue); ok && !luq.IsEmpty() {
// Just run the level queue - we shut it down once it's flushed
- go q.internal.Run(func(_ func()) {}, func(_ func()) {})
+ go luq.Run(func(_ func()) {}, func(_ func()) {})
go func() {
- _ = q.internal.Flush(0)
- log.Debug("LevelUniqueQueue: %s flushed so shutting down", q.internal.(*LevelUniqueQueue).Name())
- q.internal.(*LevelUniqueQueue).Shutdown()
- GetManager().Remove(q.internal.(*LevelUniqueQueue).qid)
+ _ = luq.Flush(0)
+ for !luq.IsEmpty() {
+ _ = luq.Flush(0)
+ select {
+ case <-time.After(100 * time.Millisecond):
+ case <-luq.shutdownCtx.Done():
+ if luq.byteFIFO.Len(luq.terminateCtx) > 0 {
+ log.Warn("LevelUniqueQueue: %s shut down before completely flushed", luq.Name())
+ }
+ return
+ }
+ }
+ log.Debug("LevelUniqueQueue: %s flushed so shutting down", luq.Name())
+ luq.Shutdown()
+ GetManager().Remove(luq.qid)
}()
} else {
log.Debug("PersistableChannelUniqueQueue: %s Skipping running the empty level queue", q.delayedStarter.name)
+ _ = q.internal.Flush(0)
q.internal.(*LevelUniqueQueue).Shutdown()
GetManager().Remove(q.internal.(*LevelUniqueQueue).qid)
}
@@ -285,8 +298,20 @@ func (q *PersistableChannelUniqueQueue) Shutdown() {
// Redirect all remaining data in the chan to the internal channel
close(q.channelQueue.dataChan)
log.Trace("PersistableChannelUniqueQueue: %s Redirecting remaining data", q.delayedStarter.name)
+ countOK, countLost := 0, 0
for data := range q.channelQueue.dataChan {
- _ = q.internal.Push(data)
+ err := q.internal.(*LevelUniqueQueue).Push(data)
+ if err != nil {
+ log.Error("PersistableChannelUniqueQueue: %s Unable redirect %v due to: %v", q.delayedStarter.name, data, err)
+ countLost++
+ } else {
+ countOK++
+ }
+ }
+ if countLost > 0 {
+ log.Warn("PersistableChannelUniqueQueue: %s %d will be restored on restart, %d lost", q.delayedStarter.name, countOK, countLost)
+ } else if countOK > 0 {
+ log.Warn("PersistableChannelUniqueQueue: %s %d will be restored on restart", q.delayedStarter.name, countOK)
}
log.Trace("PersistableChannelUniqueQueue: %s Done Redirecting remaining data", q.delayedStarter.name)
diff --git a/modules/queue/unique_queue_disk_channel_test.go b/modules/queue/unique_queue_disk_channel_test.go
new file mode 100644
index 0000000000..fd76163f4a
--- /dev/null
+++ b/modules/queue/unique_queue_disk_channel_test.go
@@ -0,0 +1,259 @@
+// Copyright 2023 The Gitea Authors. All rights reserved.
+// SPDX-License-Identifier: MIT
+
+package queue
+
+import (
+ "fmt"
+ "strconv"
+ "sync"
+ "testing"
+ "time"
+
+ "code.gitea.io/gitea/modules/log"
+
+ "github.com/stretchr/testify/assert"
+)
+
+func TestPersistableChannelUniqueQueue(t *testing.T) {
+ tmpDir := t.TempDir()
+ fmt.Printf("TempDir %s\n", tmpDir)
+ _ = log.NewLogger(1000, "console", "console", `{"level":"warn","stacktracelevel":"NONE","stderr":true}`)
+
+ // Common function to create the Queue
+ newQueue := func(name string, handle func(data ...Data) []Data) Queue {
+ q, err := NewPersistableChannelUniqueQueue(handle,
+ PersistableChannelUniqueQueueConfiguration{
+ Name: name,
+ DataDir: tmpDir,
+ QueueLength: 200,
+ MaxWorkers: 1,
+ BlockTimeout: 1 * time.Second,
+ BoostTimeout: 5 * time.Minute,
+ BoostWorkers: 1,
+ Workers: 0,
+ }, "task-0")
+ assert.NoError(t, err)
+ return q
+ }
+
+ // runs the provided queue and provides some timer function
+ type channels struct {
+ readyForShutdown chan struct{} // closed when shutdown functions have been assigned
+ readyForTerminate chan struct{} // closed when terminate functions have been assigned
+ signalShutdown chan struct{} // Should close to signal shutdown
+ doneShutdown chan struct{} // closed when shutdown function is done
+ queueTerminate []func() // list of atTerminate functions to call atTerminate - need to be accessed with lock
+ }
+ runQueue := func(q Queue, lock *sync.Mutex) *channels {
+ chans := &channels{
+ readyForShutdown: make(chan struct{}),
+ readyForTerminate: make(chan struct{}),
+ signalShutdown: make(chan struct{}),
+ doneShutdown: make(chan struct{}),
+ }
+ go q.Run(func(atShutdown func()) {
+ go func() {
+ lock.Lock()
+ select {
+ case <-chans.readyForShutdown:
+ default:
+ close(chans.readyForShutdown)
+ }
+ lock.Unlock()
+ <-chans.signalShutdown
+ atShutdown()
+ close(chans.doneShutdown)
+ }()
+ }, func(atTerminate func()) {
+ lock.Lock()
+ defer lock.Unlock()
+ select {
+ case <-chans.readyForTerminate:
+ default:
+ close(chans.readyForTerminate)
+ }
+ chans.queueTerminate = append(chans.queueTerminate, atTerminate)
+ })
+
+ return chans
+ }
+
+ // call to shutdown and terminate the queue associated with the channels
+ doTerminate := func(chans *channels, lock *sync.Mutex) {
+ <-chans.readyForTerminate
+
+ lock.Lock()
+ callbacks := []func(){}
+ callbacks = append(callbacks, chans.queueTerminate...)
+ lock.Unlock()
+
+ for _, callback := range callbacks {
+ callback()
+ }
+ }
+
+ mapLock := sync.Mutex{}
+ executedInitial := map[string][]string{}
+ hasInitial := map[string][]string{}
+
+ fillQueue := func(name string, done chan struct{}) {
+ t.Run("Initial Filling: "+name, func(t *testing.T) {
+ lock := sync.Mutex{}
+
+ startAt100Queued := make(chan struct{})
+ stopAt20Shutdown := make(chan struct{}) // stop and shutdown at the 20th item
+
+ handle := func(data ...Data) []Data {
+ <-startAt100Queued
+ for _, datum := range data {
+ s := datum.(string)
+ mapLock.Lock()
+ executedInitial[name] = append(executedInitial[name], s)
+ mapLock.Unlock()
+ if s == "task-20" {
+ close(stopAt20Shutdown)
+ }
+ }
+ return nil
+ }
+
+ q := newQueue(name, handle)
+
+ // add 100 tasks to the queue
+ for i := 0; i < 100; i++ {
+ _ = q.Push("task-" + strconv.Itoa(i))
+ }
+ close(startAt100Queued)
+
+ chans := runQueue(q, &lock)
+
+ <-chans.readyForShutdown
+ <-stopAt20Shutdown
+ close(chans.signalShutdown)
+ <-chans.doneShutdown
+ _ = q.Push("final")
+
+ // check which tasks are still in the queue
+ for i := 0; i < 100; i++ {
+ if has, _ := q.(UniqueQueue).Has("task-" + strconv.Itoa(i)); has {
+ mapLock.Lock()
+ hasInitial[name] = append(hasInitial[name], "task-"+strconv.Itoa(i))
+ mapLock.Unlock()
+ }
+ }
+ if has, _ := q.(UniqueQueue).Has("final"); has {
+ mapLock.Lock()
+ hasInitial[name] = append(hasInitial[name], "final")
+ mapLock.Unlock()
+ } else {
+ assert.Fail(t, "UnqueQueue %s should have \"final\"", name)
+ }
+ doTerminate(chans, &lock)
+ mapLock.Lock()
+ assert.Equal(t, 101, len(executedInitial[name])+len(hasInitial[name]))
+ mapLock.Unlock()
+ })
+ close(done)
+ }
+
+ doneA := make(chan struct{})
+ doneB := make(chan struct{})
+
+ go fillQueue("QueueA", doneA)
+ go fillQueue("QueueB", doneB)
+
+ <-doneA
+ <-doneB
+
+ executedEmpty := map[string][]string{}
+ hasEmpty := map[string][]string{}
+ emptyQueue := func(name string, done chan struct{}) {
+ t.Run("Empty Queue: "+name, func(t *testing.T) {
+ lock := sync.Mutex{}
+ stop := make(chan struct{})
+
+ // collect the tasks that have been executed
+ handle := func(data ...Data) []Data {
+ lock.Lock()
+ for _, datum := range data {
+ mapLock.Lock()
+ executedEmpty[name] = append(executedEmpty[name], datum.(string))
+ mapLock.Unlock()
+ if datum.(string) == "final" {
+ close(stop)
+ }
+ }
+ lock.Unlock()
+ return nil
+ }
+
+ q := newQueue(name, handle)
+ chans := runQueue(q, &lock)
+
+ <-chans.readyForShutdown
+ <-stop
+ close(chans.signalShutdown)
+ <-chans.doneShutdown
+
+ // check which tasks are still in the queue
+ for i := 0; i < 100; i++ {
+ if has, _ := q.(UniqueQueue).Has("task-" + strconv.Itoa(i)); has {
+ mapLock.Lock()
+ hasEmpty[name] = append(hasEmpty[name], "task-"+strconv.Itoa(i))
+ mapLock.Unlock()
+ }
+ }
+ doTerminate(chans, &lock)
+
+ mapLock.Lock()
+ assert.Equal(t, 101, len(executedInitial[name])+len(executedEmpty[name]))
+ assert.Equal(t, 0, len(hasEmpty[name]))
+ mapLock.Unlock()
+ })
+ close(done)
+ }
+
+ doneA = make(chan struct{})
+ doneB = make(chan struct{})
+
+ go emptyQueue("QueueA", doneA)
+ go emptyQueue("QueueB", doneB)
+
+ <-doneA
+ <-doneB
+
+ mapLock.Lock()
+ t.Logf("TestPersistableChannelUniqueQueue executedInitiallyA=%v, executedInitiallyB=%v, executedToEmptyA=%v, executedToEmptyB=%v",
+ len(executedInitial["QueueA"]), len(executedInitial["QueueB"]), len(executedEmpty["QueueA"]), len(executedEmpty["QueueB"]))
+
+ // reset and rerun
+ executedInitial = map[string][]string{}
+ hasInitial = map[string][]string{}
+ executedEmpty = map[string][]string{}
+ hasEmpty = map[string][]string{}
+ mapLock.Unlock()
+
+ doneA = make(chan struct{})
+ doneB = make(chan struct{})
+
+ go fillQueue("QueueA", doneA)
+ go fillQueue("QueueB", doneB)
+
+ <-doneA
+ <-doneB
+
+ doneA = make(chan struct{})
+ doneB = make(chan struct{})
+
+ go emptyQueue("QueueA", doneA)
+ go emptyQueue("QueueB", doneB)
+
+ <-doneA
+ <-doneB
+
+ mapLock.Lock()
+ t.Logf("TestPersistableChannelUniqueQueue executedInitiallyA=%v, executedInitiallyB=%v, executedToEmptyA=%v, executedToEmptyB=%v",
+ len(executedInitial["QueueA"]), len(executedInitial["QueueB"]), len(executedEmpty["QueueA"]), len(executedEmpty["QueueB"]))
+ mapLock.Unlock()
+}