diff options
author | wxiaoguang <wxiaoguang@gmail.com> | 2023-05-26 15:31:55 +0800 |
---|---|---|
committer | GitHub <noreply@github.com> | 2023-05-26 07:31:55 +0000 |
commit | 18f26cfbf7f9b36b838c0e8762bfba98c89b9797 (patch) | |
tree | f3bcf0ef2b76601980ab6144e9ce9acd5316a0da /modules/queue | |
parent | e4922d484b9ee94ba22e5ff08b3c25c8be09d9c8 (diff) | |
download | gitea-18f26cfbf7f9b36b838c0e8762bfba98c89b9797.tar.gz gitea-18f26cfbf7f9b36b838c0e8762bfba98c89b9797.zip |
Improve queue and logger context (#24924)
Before there was a "graceful function": RunWithShutdownFns, it's mainly
for some modules which doesn't support context.
The old queue system doesn't work well with context, so the old queues
need it.
After the queue refactoring, the new queue works with context well, so,
use Golang context as much as possible, the `RunWithShutdownFns` could
be removed (replaced by RunWithCancel for context cancel mechanism), the
related code could be simplified.
This PR also fixes some legacy queue-init problems, eg:
* typo : archiver: "unable to create codes indexer queue" => "unable to
create repo-archive queue"
* no nil check for failed queues, which causes unfriendly panic
After this PR, many goroutines could have better display name:
![image](https://github.com/go-gitea/gitea/assets/2114189/701b2a9b-8065-4137-aeaa-0bda2b34604a)
![image](https://github.com/go-gitea/gitea/assets/2114189/f1d5f50f-0534-40f0-b0be-f2c9daa5fe92)
Diffstat (limited to 'modules/queue')
-rw-r--r-- | modules/queue/manager.go | 12 | ||||
-rw-r--r-- | modules/queue/manager_test.go | 6 | ||||
-rw-r--r-- | modules/queue/workergroup.go | 24 | ||||
-rw-r--r-- | modules/queue/workerqueue.go | 33 | ||||
-rw-r--r-- | modules/queue/workerqueue_test.go | 30 |
5 files changed, 53 insertions, 52 deletions
diff --git a/modules/queue/manager.go b/modules/queue/manager.go index 95b3bad57b..8b964c0c28 100644 --- a/modules/queue/manager.go +++ b/modules/queue/manager.go @@ -88,22 +88,22 @@ func (m *Manager) FlushAll(ctx context.Context, timeout time.Duration) error { } // CreateSimpleQueue creates a simple queue from global setting config provider by name -func CreateSimpleQueue[T any](name string, handler HandlerFuncT[T]) *WorkerPoolQueue[T] { - return createWorkerPoolQueue(name, setting.CfgProvider, handler, false) +func CreateSimpleQueue[T any](ctx context.Context, name string, handler HandlerFuncT[T]) *WorkerPoolQueue[T] { + return createWorkerPoolQueue(ctx, name, setting.CfgProvider, handler, false) } // CreateUniqueQueue creates a unique queue from global setting config provider by name -func CreateUniqueQueue[T any](name string, handler HandlerFuncT[T]) *WorkerPoolQueue[T] { - return createWorkerPoolQueue(name, setting.CfgProvider, handler, true) +func CreateUniqueQueue[T any](ctx context.Context, name string, handler HandlerFuncT[T]) *WorkerPoolQueue[T] { + return createWorkerPoolQueue(ctx, name, setting.CfgProvider, handler, true) } -func createWorkerPoolQueue[T any](name string, cfgProvider setting.ConfigProvider, handler HandlerFuncT[T], unique bool) *WorkerPoolQueue[T] { +func createWorkerPoolQueue[T any](ctx context.Context, name string, cfgProvider setting.ConfigProvider, handler HandlerFuncT[T], unique bool) *WorkerPoolQueue[T] { queueSetting, err := setting.GetQueueSettings(cfgProvider, name) if err != nil { log.Error("Failed to get queue settings for %q: %v", name, err) return nil } - w, err := NewWorkerPoolQueueBySetting(name, queueSetting, handler, unique) + w, err := NewWorkerPoolQueueWithContext(ctx, name, queueSetting, handler, unique) if err != nil { log.Error("Failed to create queue %q: %v", name, err) return nil diff --git a/modules/queue/manager_test.go b/modules/queue/manager_test.go index 50265e27b6..1fd29f813f 100644 --- a/modules/queue/manager_test.go +++ b/modules/queue/manager_test.go @@ -29,7 +29,7 @@ func TestManager(t *testing.T) { if err != nil { return nil, err } - return NewWorkerPoolQueueBySetting(name, qs, func(s ...int) (unhandled []int) { return nil }, false) + return newWorkerPoolQueueForTest(name, qs, func(s ...int) (unhandled []int) { return nil }, false) } // test invalid CONN_STR @@ -80,7 +80,7 @@ MAX_WORKERS = 2 assert.NoError(t, err) - q1 := createWorkerPoolQueue[string]("no-such", cfgProvider, nil, false) + q1 := createWorkerPoolQueue[string](context.Background(), "no-such", cfgProvider, nil, false) assert.Equal(t, "no-such", q1.GetName()) assert.Equal(t, "dummy", q1.GetType()) // no handler, so it becomes dummy assert.Equal(t, filepath.Join(setting.AppDataPath, "queues/dir1"), q1.baseConfig.DataFullDir) @@ -96,7 +96,7 @@ MAX_WORKERS = 2 assert.Equal(t, "string", q1.GetItemTypeName()) qid1 := GetManager().qidCounter - q2 := createWorkerPoolQueue("sub", cfgProvider, func(s ...int) (unhandled []int) { return nil }, false) + q2 := createWorkerPoolQueue(context.Background(), "sub", cfgProvider, func(s ...int) (unhandled []int) { return nil }, false) assert.Equal(t, "sub", q2.GetName()) assert.Equal(t, "level", q2.GetType()) assert.Equal(t, filepath.Join(setting.AppDataPath, "queues/dir2"), q2.baseConfig.DataFullDir) diff --git a/modules/queue/workergroup.go b/modules/queue/workergroup.go index 7127ea1117..147a4f335e 100644 --- a/modules/queue/workergroup.go +++ b/modules/queue/workergroup.go @@ -5,6 +5,7 @@ package queue import ( "context" + "runtime/pprof" "sync" "sync/atomic" "time" @@ -13,9 +14,10 @@ import ( ) var ( - infiniteTimerC = make(chan time.Time) - batchDebounceDuration = 100 * time.Millisecond - workerIdleDuration = 1 * time.Second + infiniteTimerC = make(chan time.Time) + batchDebounceDuration = 100 * time.Millisecond + workerIdleDuration = 1 * time.Second + shutdownDefaultTimeout = 2 * time.Second unhandledItemRequeueDuration atomic.Int64 // to avoid data race during test ) @@ -116,13 +118,15 @@ func (q *WorkerPoolQueue[T]) doWorkerHandle(batch []T) { // If the queue is shutting down, it returns true and try to push the items // Otherwise it does nothing and returns false func (q *WorkerPoolQueue[T]) basePushForShutdown(items ...T) bool { - ctxShutdown := q.ctxShutdown.Load() - if ctxShutdown == nil { + shutdownTimeout := time.Duration(q.shutdownTimeout.Load()) + if shutdownTimeout == 0 { return false } + ctxShutdown, ctxShutdownCancel := context.WithTimeout(context.Background(), shutdownTimeout) + defer ctxShutdownCancel() for _, item := range items { // if there is still any error, the queue can do nothing instead of losing the items - if err := q.baseQueue.PushItem(*ctxShutdown, q.marshal(item)); err != nil { + if err := q.baseQueue.PushItem(ctxShutdown, q.marshal(item)); err != nil { log.Error("Failed to requeue item for queue %q when shutting down: %v", q.GetName(), err) } } @@ -246,6 +250,8 @@ var skipFlushChan = make(chan flushType) // an empty flush chan, used to skip re // doRun is the main loop of the queue. All related "doXxx" functions are executed in its context. func (q *WorkerPoolQueue[T]) doRun() { + pprof.SetGoroutineLabels(q.ctxRun) + log.Debug("Queue %q starts running", q.GetName()) defer log.Debug("Queue %q stops running", q.GetName()) @@ -271,8 +277,8 @@ func (q *WorkerPoolQueue[T]) doRun() { } } - ctxShutdownPtr := q.ctxShutdown.Load() - if ctxShutdownPtr != nil { + shutdownTimeout := time.Duration(q.shutdownTimeout.Load()) + if shutdownTimeout != 0 { // if there is a shutdown context, try to push the items back to the base queue q.basePushForShutdown(unhandled...) workerDone := make(chan struct{}) @@ -280,7 +286,7 @@ func (q *WorkerPoolQueue[T]) doRun() { go func() { wg.wg.Wait(); close(workerDone) }() select { case <-workerDone: - case <-(*ctxShutdownPtr).Done(): + case <-time.After(shutdownTimeout): log.Error("Queue %q is shutting down, but workers are still running after timeout", q.GetName()) } } else { diff --git a/modules/queue/workerqueue.go b/modules/queue/workerqueue.go index 5695c6cc23..e0d5183bd9 100644 --- a/modules/queue/workerqueue.go +++ b/modules/queue/workerqueue.go @@ -10,9 +10,9 @@ import ( "sync/atomic" "time" - "code.gitea.io/gitea/modules/graceful" "code.gitea.io/gitea/modules/json" "code.gitea.io/gitea/modules/log" + "code.gitea.io/gitea/modules/process" "code.gitea.io/gitea/modules/setting" ) @@ -21,8 +21,9 @@ import ( type WorkerPoolQueue[T any] struct { ctxRun context.Context ctxRunCancel context.CancelFunc - ctxShutdown atomic.Pointer[context.Context] - shutdownDone chan struct{} + + shutdownDone chan struct{} + shutdownTimeout atomic.Int64 // in case some buggy handlers (workers) would hang forever, "shutdown" should finish in predictable time origHandler HandlerFuncT[T] safeHandler HandlerFuncT[T] @@ -175,22 +176,19 @@ func (q *WorkerPoolQueue[T]) Has(data T) (bool, error) { return q.baseQueue.HasItem(q.ctxRun, q.marshal(data)) } -func (q *WorkerPoolQueue[T]) Run(atShutdown, atTerminate func(func())) { - atShutdown(func() { - // in case some queue handlers are slow or have hanging bugs, at most wait for a short time - q.ShutdownWait(1 * time.Second) - }) +func (q *WorkerPoolQueue[T]) Run() { q.doRun() } +func (q *WorkerPoolQueue[T]) Cancel() { + q.ctxRunCancel() +} + // ShutdownWait shuts down the queue, waits for all workers to finish their jobs, and pushes the unhandled items back to the base queue // It waits for all workers (handlers) to finish their jobs, in case some buggy handlers would hang forever, a reasonable timeout is needed func (q *WorkerPoolQueue[T]) ShutdownWait(timeout time.Duration) { - shutdownCtx, shutdownCtxCancel := context.WithTimeout(context.Background(), timeout) - defer shutdownCtxCancel() - if q.ctxShutdown.CompareAndSwap(nil, &shutdownCtx) { - q.ctxRunCancel() - } + q.shutdownTimeout.Store(int64(timeout)) + q.ctxRunCancel() <-q.shutdownDone } @@ -207,7 +205,11 @@ func getNewQueueFn(t string) (string, func(cfg *BaseConfig, unique bool) (baseQu } } -func NewWorkerPoolQueueBySetting[T any](name string, queueSetting setting.QueueSettings, handler HandlerFuncT[T], unique bool) (*WorkerPoolQueue[T], error) { +func newWorkerPoolQueueForTest[T any](name string, queueSetting setting.QueueSettings, handler HandlerFuncT[T], unique bool) (*WorkerPoolQueue[T], error) { + return NewWorkerPoolQueueWithContext(context.Background(), name, queueSetting, handler, unique) +} + +func NewWorkerPoolQueueWithContext[T any](ctx context.Context, name string, queueSetting setting.QueueSettings, handler HandlerFuncT[T], unique bool) (*WorkerPoolQueue[T], error) { if handler == nil { log.Debug("Use dummy queue for %q because handler is nil and caller doesn't want to process the queue items", name) queueSetting.Type = "dummy" @@ -224,10 +226,11 @@ func NewWorkerPoolQueueBySetting[T any](name string, queueSetting setting.QueueS } log.Trace("Created queue %q of type %q", name, queueType) - w.ctxRun, w.ctxRunCancel = context.WithCancel(graceful.GetManager().ShutdownContext()) + w.ctxRun, _, w.ctxRunCancel = process.GetManager().AddTypedContext(ctx, "Queue: "+w.GetName(), process.SystemProcessType, false) w.batchChan = make(chan []T) w.flushChan = make(chan flushType) w.shutdownDone = make(chan struct{}) + w.shutdownTimeout.Store(int64(shutdownDefaultTimeout)) w.workerMaxNum = queueSetting.MaxWorkers w.batchLength = queueSetting.BatchLength diff --git a/modules/queue/workerqueue_test.go b/modules/queue/workerqueue_test.go index da9451cd77..e60120162a 100644 --- a/modules/queue/workerqueue_test.go +++ b/modules/queue/workerqueue_test.go @@ -16,17 +16,9 @@ import ( ) func runWorkerPoolQueue[T any](q *WorkerPoolQueue[T]) func() { - var stop func() - started := make(chan struct{}) - stopped := make(chan struct{}) - go func() { - q.Run(func(f func()) { stop = f; close(started) }, nil) - close(stopped) - }() - <-started + go q.Run() return func() { - stop() - <-stopped + q.ShutdownWait(1 * time.Second) } } @@ -57,7 +49,7 @@ func TestWorkerPoolQueueUnhandled(t *testing.T) { return unhandled } - q, _ := NewWorkerPoolQueueBySetting("test-workpoolqueue", queueSetting, handler, false) + q, _ := newWorkerPoolQueueForTest("test-workpoolqueue", queueSetting, handler, false) stop := runWorkerPoolQueue(q) for i := 0; i < queueSetting.Length; i++ { testRecorder.Record("push:%v", i) @@ -145,7 +137,7 @@ func testWorkerPoolQueuePersistence(t *testing.T, queueSetting setting.QueueSett return nil } - q, _ := NewWorkerPoolQueueBySetting("pr_patch_checker_test", queueSetting, testHandler, true) + q, _ := newWorkerPoolQueueForTest("pr_patch_checker_test", queueSetting, testHandler, true) stop := runWorkerPoolQueue(q) for i := 0; i < testCount; i++ { _ = q.Push("task-" + strconv.Itoa(i)) @@ -169,7 +161,7 @@ func testWorkerPoolQueuePersistence(t *testing.T, queueSetting setting.QueueSett return nil } - q, _ := NewWorkerPoolQueueBySetting("pr_patch_checker_test", queueSetting, testHandler, true) + q, _ := newWorkerPoolQueueForTest("pr_patch_checker_test", queueSetting, testHandler, true) stop := runWorkerPoolQueue(q) assert.NoError(t, q.FlushWithContext(context.Background(), 0)) stop() @@ -194,7 +186,7 @@ func TestWorkerPoolQueueActiveWorkers(t *testing.T) { return nil } - q, _ := NewWorkerPoolQueueBySetting("test-workpoolqueue", setting.QueueSettings{Type: "channel", BatchLength: 1, MaxWorkers: 1, Length: 100}, handler, false) + q, _ := newWorkerPoolQueueForTest("test-workpoolqueue", setting.QueueSettings{Type: "channel", BatchLength: 1, MaxWorkers: 1, Length: 100}, handler, false) stop := runWorkerPoolQueue(q) for i := 0; i < 5; i++ { assert.NoError(t, q.Push(i)) @@ -210,7 +202,7 @@ func TestWorkerPoolQueueActiveWorkers(t *testing.T) { assert.EqualValues(t, 1, q.GetWorkerNumber()) // there is at least one worker after the queue begins working stop() - q, _ = NewWorkerPoolQueueBySetting("test-workpoolqueue", setting.QueueSettings{Type: "channel", BatchLength: 1, MaxWorkers: 3, Length: 100}, handler, false) + q, _ = newWorkerPoolQueueForTest("test-workpoolqueue", setting.QueueSettings{Type: "channel", BatchLength: 1, MaxWorkers: 3, Length: 100}, handler, false) stop = runWorkerPoolQueue(q) for i := 0; i < 15; i++ { assert.NoError(t, q.Push(i)) @@ -238,23 +230,23 @@ func TestWorkerPoolQueueShutdown(t *testing.T) { if items[0] == 0 { close(handlerCalled) } - time.Sleep(100 * time.Millisecond) + time.Sleep(400 * time.Millisecond) return items } qs := setting.QueueSettings{Type: "level", Datadir: t.TempDir() + "/queue", BatchLength: 3, MaxWorkers: 4, Length: 20} - q, _ := NewWorkerPoolQueueBySetting("test-workpoolqueue", qs, handler, false) + q, _ := newWorkerPoolQueueForTest("test-workpoolqueue", qs, handler, false) stop := runWorkerPoolQueue(q) for i := 0; i < qs.Length; i++ { assert.NoError(t, q.Push(i)) } <-handlerCalled - time.Sleep(50 * time.Millisecond) // wait for a while to make sure all workers are active + time.Sleep(200 * time.Millisecond) // wait for a while to make sure all workers are active assert.EqualValues(t, 4, q.GetWorkerActiveNumber()) stop() // stop triggers shutdown assert.EqualValues(t, 0, q.GetWorkerActiveNumber()) // no item was ever handled, so we still get all of them again - q, _ = NewWorkerPoolQueueBySetting("test-workpoolqueue", qs, handler, false) + q, _ = newWorkerPoolQueueForTest("test-workpoolqueue", qs, handler, false) assert.EqualValues(t, 20, q.GetQueueItemNumber()) } |