summaryrefslogtreecommitdiffstats
path: root/modules/queue
diff options
context:
space:
mode:
authorwxiaoguang <wxiaoguang@gmail.com>2023-05-26 15:31:55 +0800
committerGitHub <noreply@github.com>2023-05-26 07:31:55 +0000
commit18f26cfbf7f9b36b838c0e8762bfba98c89b9797 (patch)
treef3bcf0ef2b76601980ab6144e9ce9acd5316a0da /modules/queue
parente4922d484b9ee94ba22e5ff08b3c25c8be09d9c8 (diff)
downloadgitea-18f26cfbf7f9b36b838c0e8762bfba98c89b9797.tar.gz
gitea-18f26cfbf7f9b36b838c0e8762bfba98c89b9797.zip
Improve queue and logger context (#24924)
Before there was a "graceful function": RunWithShutdownFns, it's mainly for some modules which doesn't support context. The old queue system doesn't work well with context, so the old queues need it. After the queue refactoring, the new queue works with context well, so, use Golang context as much as possible, the `RunWithShutdownFns` could be removed (replaced by RunWithCancel for context cancel mechanism), the related code could be simplified. This PR also fixes some legacy queue-init problems, eg: * typo : archiver: "unable to create codes indexer queue" => "unable to create repo-archive queue" * no nil check for failed queues, which causes unfriendly panic After this PR, many goroutines could have better display name: ![image](https://github.com/go-gitea/gitea/assets/2114189/701b2a9b-8065-4137-aeaa-0bda2b34604a) ![image](https://github.com/go-gitea/gitea/assets/2114189/f1d5f50f-0534-40f0-b0be-f2c9daa5fe92)
Diffstat (limited to 'modules/queue')
-rw-r--r--modules/queue/manager.go12
-rw-r--r--modules/queue/manager_test.go6
-rw-r--r--modules/queue/workergroup.go24
-rw-r--r--modules/queue/workerqueue.go33
-rw-r--r--modules/queue/workerqueue_test.go30
5 files changed, 53 insertions, 52 deletions
diff --git a/modules/queue/manager.go b/modules/queue/manager.go
index 95b3bad57b..8b964c0c28 100644
--- a/modules/queue/manager.go
+++ b/modules/queue/manager.go
@@ -88,22 +88,22 @@ func (m *Manager) FlushAll(ctx context.Context, timeout time.Duration) error {
}
// CreateSimpleQueue creates a simple queue from global setting config provider by name
-func CreateSimpleQueue[T any](name string, handler HandlerFuncT[T]) *WorkerPoolQueue[T] {
- return createWorkerPoolQueue(name, setting.CfgProvider, handler, false)
+func CreateSimpleQueue[T any](ctx context.Context, name string, handler HandlerFuncT[T]) *WorkerPoolQueue[T] {
+ return createWorkerPoolQueue(ctx, name, setting.CfgProvider, handler, false)
}
// CreateUniqueQueue creates a unique queue from global setting config provider by name
-func CreateUniqueQueue[T any](name string, handler HandlerFuncT[T]) *WorkerPoolQueue[T] {
- return createWorkerPoolQueue(name, setting.CfgProvider, handler, true)
+func CreateUniqueQueue[T any](ctx context.Context, name string, handler HandlerFuncT[T]) *WorkerPoolQueue[T] {
+ return createWorkerPoolQueue(ctx, name, setting.CfgProvider, handler, true)
}
-func createWorkerPoolQueue[T any](name string, cfgProvider setting.ConfigProvider, handler HandlerFuncT[T], unique bool) *WorkerPoolQueue[T] {
+func createWorkerPoolQueue[T any](ctx context.Context, name string, cfgProvider setting.ConfigProvider, handler HandlerFuncT[T], unique bool) *WorkerPoolQueue[T] {
queueSetting, err := setting.GetQueueSettings(cfgProvider, name)
if err != nil {
log.Error("Failed to get queue settings for %q: %v", name, err)
return nil
}
- w, err := NewWorkerPoolQueueBySetting(name, queueSetting, handler, unique)
+ w, err := NewWorkerPoolQueueWithContext(ctx, name, queueSetting, handler, unique)
if err != nil {
log.Error("Failed to create queue %q: %v", name, err)
return nil
diff --git a/modules/queue/manager_test.go b/modules/queue/manager_test.go
index 50265e27b6..1fd29f813f 100644
--- a/modules/queue/manager_test.go
+++ b/modules/queue/manager_test.go
@@ -29,7 +29,7 @@ func TestManager(t *testing.T) {
if err != nil {
return nil, err
}
- return NewWorkerPoolQueueBySetting(name, qs, func(s ...int) (unhandled []int) { return nil }, false)
+ return newWorkerPoolQueueForTest(name, qs, func(s ...int) (unhandled []int) { return nil }, false)
}
// test invalid CONN_STR
@@ -80,7 +80,7 @@ MAX_WORKERS = 2
assert.NoError(t, err)
- q1 := createWorkerPoolQueue[string]("no-such", cfgProvider, nil, false)
+ q1 := createWorkerPoolQueue[string](context.Background(), "no-such", cfgProvider, nil, false)
assert.Equal(t, "no-such", q1.GetName())
assert.Equal(t, "dummy", q1.GetType()) // no handler, so it becomes dummy
assert.Equal(t, filepath.Join(setting.AppDataPath, "queues/dir1"), q1.baseConfig.DataFullDir)
@@ -96,7 +96,7 @@ MAX_WORKERS = 2
assert.Equal(t, "string", q1.GetItemTypeName())
qid1 := GetManager().qidCounter
- q2 := createWorkerPoolQueue("sub", cfgProvider, func(s ...int) (unhandled []int) { return nil }, false)
+ q2 := createWorkerPoolQueue(context.Background(), "sub", cfgProvider, func(s ...int) (unhandled []int) { return nil }, false)
assert.Equal(t, "sub", q2.GetName())
assert.Equal(t, "level", q2.GetType())
assert.Equal(t, filepath.Join(setting.AppDataPath, "queues/dir2"), q2.baseConfig.DataFullDir)
diff --git a/modules/queue/workergroup.go b/modules/queue/workergroup.go
index 7127ea1117..147a4f335e 100644
--- a/modules/queue/workergroup.go
+++ b/modules/queue/workergroup.go
@@ -5,6 +5,7 @@ package queue
import (
"context"
+ "runtime/pprof"
"sync"
"sync/atomic"
"time"
@@ -13,9 +14,10 @@ import (
)
var (
- infiniteTimerC = make(chan time.Time)
- batchDebounceDuration = 100 * time.Millisecond
- workerIdleDuration = 1 * time.Second
+ infiniteTimerC = make(chan time.Time)
+ batchDebounceDuration = 100 * time.Millisecond
+ workerIdleDuration = 1 * time.Second
+ shutdownDefaultTimeout = 2 * time.Second
unhandledItemRequeueDuration atomic.Int64 // to avoid data race during test
)
@@ -116,13 +118,15 @@ func (q *WorkerPoolQueue[T]) doWorkerHandle(batch []T) {
// If the queue is shutting down, it returns true and try to push the items
// Otherwise it does nothing and returns false
func (q *WorkerPoolQueue[T]) basePushForShutdown(items ...T) bool {
- ctxShutdown := q.ctxShutdown.Load()
- if ctxShutdown == nil {
+ shutdownTimeout := time.Duration(q.shutdownTimeout.Load())
+ if shutdownTimeout == 0 {
return false
}
+ ctxShutdown, ctxShutdownCancel := context.WithTimeout(context.Background(), shutdownTimeout)
+ defer ctxShutdownCancel()
for _, item := range items {
// if there is still any error, the queue can do nothing instead of losing the items
- if err := q.baseQueue.PushItem(*ctxShutdown, q.marshal(item)); err != nil {
+ if err := q.baseQueue.PushItem(ctxShutdown, q.marshal(item)); err != nil {
log.Error("Failed to requeue item for queue %q when shutting down: %v", q.GetName(), err)
}
}
@@ -246,6 +250,8 @@ var skipFlushChan = make(chan flushType) // an empty flush chan, used to skip re
// doRun is the main loop of the queue. All related "doXxx" functions are executed in its context.
func (q *WorkerPoolQueue[T]) doRun() {
+ pprof.SetGoroutineLabels(q.ctxRun)
+
log.Debug("Queue %q starts running", q.GetName())
defer log.Debug("Queue %q stops running", q.GetName())
@@ -271,8 +277,8 @@ func (q *WorkerPoolQueue[T]) doRun() {
}
}
- ctxShutdownPtr := q.ctxShutdown.Load()
- if ctxShutdownPtr != nil {
+ shutdownTimeout := time.Duration(q.shutdownTimeout.Load())
+ if shutdownTimeout != 0 {
// if there is a shutdown context, try to push the items back to the base queue
q.basePushForShutdown(unhandled...)
workerDone := make(chan struct{})
@@ -280,7 +286,7 @@ func (q *WorkerPoolQueue[T]) doRun() {
go func() { wg.wg.Wait(); close(workerDone) }()
select {
case <-workerDone:
- case <-(*ctxShutdownPtr).Done():
+ case <-time.After(shutdownTimeout):
log.Error("Queue %q is shutting down, but workers are still running after timeout", q.GetName())
}
} else {
diff --git a/modules/queue/workerqueue.go b/modules/queue/workerqueue.go
index 5695c6cc23..e0d5183bd9 100644
--- a/modules/queue/workerqueue.go
+++ b/modules/queue/workerqueue.go
@@ -10,9 +10,9 @@ import (
"sync/atomic"
"time"
- "code.gitea.io/gitea/modules/graceful"
"code.gitea.io/gitea/modules/json"
"code.gitea.io/gitea/modules/log"
+ "code.gitea.io/gitea/modules/process"
"code.gitea.io/gitea/modules/setting"
)
@@ -21,8 +21,9 @@ import (
type WorkerPoolQueue[T any] struct {
ctxRun context.Context
ctxRunCancel context.CancelFunc
- ctxShutdown atomic.Pointer[context.Context]
- shutdownDone chan struct{}
+
+ shutdownDone chan struct{}
+ shutdownTimeout atomic.Int64 // in case some buggy handlers (workers) would hang forever, "shutdown" should finish in predictable time
origHandler HandlerFuncT[T]
safeHandler HandlerFuncT[T]
@@ -175,22 +176,19 @@ func (q *WorkerPoolQueue[T]) Has(data T) (bool, error) {
return q.baseQueue.HasItem(q.ctxRun, q.marshal(data))
}
-func (q *WorkerPoolQueue[T]) Run(atShutdown, atTerminate func(func())) {
- atShutdown(func() {
- // in case some queue handlers are slow or have hanging bugs, at most wait for a short time
- q.ShutdownWait(1 * time.Second)
- })
+func (q *WorkerPoolQueue[T]) Run() {
q.doRun()
}
+func (q *WorkerPoolQueue[T]) Cancel() {
+ q.ctxRunCancel()
+}
+
// ShutdownWait shuts down the queue, waits for all workers to finish their jobs, and pushes the unhandled items back to the base queue
// It waits for all workers (handlers) to finish their jobs, in case some buggy handlers would hang forever, a reasonable timeout is needed
func (q *WorkerPoolQueue[T]) ShutdownWait(timeout time.Duration) {
- shutdownCtx, shutdownCtxCancel := context.WithTimeout(context.Background(), timeout)
- defer shutdownCtxCancel()
- if q.ctxShutdown.CompareAndSwap(nil, &shutdownCtx) {
- q.ctxRunCancel()
- }
+ q.shutdownTimeout.Store(int64(timeout))
+ q.ctxRunCancel()
<-q.shutdownDone
}
@@ -207,7 +205,11 @@ func getNewQueueFn(t string) (string, func(cfg *BaseConfig, unique bool) (baseQu
}
}
-func NewWorkerPoolQueueBySetting[T any](name string, queueSetting setting.QueueSettings, handler HandlerFuncT[T], unique bool) (*WorkerPoolQueue[T], error) {
+func newWorkerPoolQueueForTest[T any](name string, queueSetting setting.QueueSettings, handler HandlerFuncT[T], unique bool) (*WorkerPoolQueue[T], error) {
+ return NewWorkerPoolQueueWithContext(context.Background(), name, queueSetting, handler, unique)
+}
+
+func NewWorkerPoolQueueWithContext[T any](ctx context.Context, name string, queueSetting setting.QueueSettings, handler HandlerFuncT[T], unique bool) (*WorkerPoolQueue[T], error) {
if handler == nil {
log.Debug("Use dummy queue for %q because handler is nil and caller doesn't want to process the queue items", name)
queueSetting.Type = "dummy"
@@ -224,10 +226,11 @@ func NewWorkerPoolQueueBySetting[T any](name string, queueSetting setting.QueueS
}
log.Trace("Created queue %q of type %q", name, queueType)
- w.ctxRun, w.ctxRunCancel = context.WithCancel(graceful.GetManager().ShutdownContext())
+ w.ctxRun, _, w.ctxRunCancel = process.GetManager().AddTypedContext(ctx, "Queue: "+w.GetName(), process.SystemProcessType, false)
w.batchChan = make(chan []T)
w.flushChan = make(chan flushType)
w.shutdownDone = make(chan struct{})
+ w.shutdownTimeout.Store(int64(shutdownDefaultTimeout))
w.workerMaxNum = queueSetting.MaxWorkers
w.batchLength = queueSetting.BatchLength
diff --git a/modules/queue/workerqueue_test.go b/modules/queue/workerqueue_test.go
index da9451cd77..e60120162a 100644
--- a/modules/queue/workerqueue_test.go
+++ b/modules/queue/workerqueue_test.go
@@ -16,17 +16,9 @@ import (
)
func runWorkerPoolQueue[T any](q *WorkerPoolQueue[T]) func() {
- var stop func()
- started := make(chan struct{})
- stopped := make(chan struct{})
- go func() {
- q.Run(func(f func()) { stop = f; close(started) }, nil)
- close(stopped)
- }()
- <-started
+ go q.Run()
return func() {
- stop()
- <-stopped
+ q.ShutdownWait(1 * time.Second)
}
}
@@ -57,7 +49,7 @@ func TestWorkerPoolQueueUnhandled(t *testing.T) {
return unhandled
}
- q, _ := NewWorkerPoolQueueBySetting("test-workpoolqueue", queueSetting, handler, false)
+ q, _ := newWorkerPoolQueueForTest("test-workpoolqueue", queueSetting, handler, false)
stop := runWorkerPoolQueue(q)
for i := 0; i < queueSetting.Length; i++ {
testRecorder.Record("push:%v", i)
@@ -145,7 +137,7 @@ func testWorkerPoolQueuePersistence(t *testing.T, queueSetting setting.QueueSett
return nil
}
- q, _ := NewWorkerPoolQueueBySetting("pr_patch_checker_test", queueSetting, testHandler, true)
+ q, _ := newWorkerPoolQueueForTest("pr_patch_checker_test", queueSetting, testHandler, true)
stop := runWorkerPoolQueue(q)
for i := 0; i < testCount; i++ {
_ = q.Push("task-" + strconv.Itoa(i))
@@ -169,7 +161,7 @@ func testWorkerPoolQueuePersistence(t *testing.T, queueSetting setting.QueueSett
return nil
}
- q, _ := NewWorkerPoolQueueBySetting("pr_patch_checker_test", queueSetting, testHandler, true)
+ q, _ := newWorkerPoolQueueForTest("pr_patch_checker_test", queueSetting, testHandler, true)
stop := runWorkerPoolQueue(q)
assert.NoError(t, q.FlushWithContext(context.Background(), 0))
stop()
@@ -194,7 +186,7 @@ func TestWorkerPoolQueueActiveWorkers(t *testing.T) {
return nil
}
- q, _ := NewWorkerPoolQueueBySetting("test-workpoolqueue", setting.QueueSettings{Type: "channel", BatchLength: 1, MaxWorkers: 1, Length: 100}, handler, false)
+ q, _ := newWorkerPoolQueueForTest("test-workpoolqueue", setting.QueueSettings{Type: "channel", BatchLength: 1, MaxWorkers: 1, Length: 100}, handler, false)
stop := runWorkerPoolQueue(q)
for i := 0; i < 5; i++ {
assert.NoError(t, q.Push(i))
@@ -210,7 +202,7 @@ func TestWorkerPoolQueueActiveWorkers(t *testing.T) {
assert.EqualValues(t, 1, q.GetWorkerNumber()) // there is at least one worker after the queue begins working
stop()
- q, _ = NewWorkerPoolQueueBySetting("test-workpoolqueue", setting.QueueSettings{Type: "channel", BatchLength: 1, MaxWorkers: 3, Length: 100}, handler, false)
+ q, _ = newWorkerPoolQueueForTest("test-workpoolqueue", setting.QueueSettings{Type: "channel", BatchLength: 1, MaxWorkers: 3, Length: 100}, handler, false)
stop = runWorkerPoolQueue(q)
for i := 0; i < 15; i++ {
assert.NoError(t, q.Push(i))
@@ -238,23 +230,23 @@ func TestWorkerPoolQueueShutdown(t *testing.T) {
if items[0] == 0 {
close(handlerCalled)
}
- time.Sleep(100 * time.Millisecond)
+ time.Sleep(400 * time.Millisecond)
return items
}
qs := setting.QueueSettings{Type: "level", Datadir: t.TempDir() + "/queue", BatchLength: 3, MaxWorkers: 4, Length: 20}
- q, _ := NewWorkerPoolQueueBySetting("test-workpoolqueue", qs, handler, false)
+ q, _ := newWorkerPoolQueueForTest("test-workpoolqueue", qs, handler, false)
stop := runWorkerPoolQueue(q)
for i := 0; i < qs.Length; i++ {
assert.NoError(t, q.Push(i))
}
<-handlerCalled
- time.Sleep(50 * time.Millisecond) // wait for a while to make sure all workers are active
+ time.Sleep(200 * time.Millisecond) // wait for a while to make sure all workers are active
assert.EqualValues(t, 4, q.GetWorkerActiveNumber())
stop() // stop triggers shutdown
assert.EqualValues(t, 0, q.GetWorkerActiveNumber())
// no item was ever handled, so we still get all of them again
- q, _ = NewWorkerPoolQueueBySetting("test-workpoolqueue", qs, handler, false)
+ q, _ = newWorkerPoolQueueForTest("test-workpoolqueue", qs, handler, false)
assert.EqualValues(t, 20, q.GetQueueItemNumber())
}