]> source.dussan.org Git - gitea.git/commitdiff
Fix queue worker incorrectly stopped when there are still more items in the queue...
authorwxiaoguang <wxiaoguang@gmail.com>
Sat, 2 Mar 2024 16:07:54 +0000 (00:07 +0800)
committerGitHub <noreply@github.com>
Sat, 2 Mar 2024 16:07:54 +0000 (16:07 +0000)
Without `case <-t.C`, the workers would stop incorrectly, the test won't
pass. For the worse case, there might be only one running worker
processing the queue items for long time because other workers are
stopped. The root cause is related to the logic of doDispatchBatchToWorker.
It isn't a serious problem at the moment, so keep it as-is.

modules/queue/workergroup.go
modules/queue/workerqueue.go
modules/queue/workerqueue_test.go

index 147a4f335e1012a3f77e6511df6bed38f3d07fbc..e3801ef2b2dd0a02f4a30425cd48e3cbd131e272 100644 (file)
@@ -60,6 +60,9 @@ func (q *WorkerPoolQueue[T]) doDispatchBatchToWorker(wg *workerGroup[T], flushCh
                full = true
        }
 
+       // TODO: the logic could be improved in the future, to avoid a data-race between "doStartNewWorker" and "workerNum"
+       // The root problem is that if we skip "doStartNewWorker" here, the "workerNum" might be decreased by other workers later
+       // So ideally, it should check whether there are enough workers by some approaches, and start new workers if necessary.
        q.workerNumMu.Lock()
        noWorker := q.workerNum == 0
        if full || noWorker {
@@ -143,7 +146,11 @@ func (q *WorkerPoolQueue[T]) doStartNewWorker(wp *workerGroup[T]) {
                log.Debug("Queue %q starts new worker", q.GetName())
                defer log.Debug("Queue %q stops idle worker", q.GetName())
 
+               atomic.AddInt32(&q.workerStartedCounter, 1) // Only increase counter, used for debugging
+
                t := time.NewTicker(workerIdleDuration)
+               defer t.Stop()
+
                keepWorking := true
                stopWorking := func() {
                        q.workerNumMu.Lock()
@@ -158,13 +165,18 @@ func (q *WorkerPoolQueue[T]) doStartNewWorker(wp *workerGroup[T]) {
                        case batch, ok := <-q.batchChan:
                                if !ok {
                                        stopWorking()
-                               } else {
-                                       q.doWorkerHandle(batch)
-                                       t.Reset(workerIdleDuration)
+                                       continue
+                               }
+                               q.doWorkerHandle(batch)
+                               // reset the idle ticker, and drain the tick after reset in case a tick is already triggered
+                               t.Reset(workerIdleDuration)
+                               select {
+                               case <-t.C:
+                               default:
                                }
                        case <-t.C:
                                q.workerNumMu.Lock()
-                               keepWorking = q.workerNum <= 1
+                               keepWorking = q.workerNum <= 1 // keep the last worker running
                                if !keepWorking {
                                        q.workerNum--
                                }
index b28fd880270ab32383ccc51d28679be58c815a83..4160622d8138809d0e3671a32ed4db4740867e72 100644 (file)
@@ -40,6 +40,8 @@ type WorkerPoolQueue[T any] struct {
        workerMaxNum    int
        workerActiveNum int
        workerNumMu     sync.Mutex
+
+       workerStartedCounter int32
 }
 
 type flushType chan struct{}
index e60120162a7062f2a979f88fe6ffe78053039dd9..e09669c54255e0a8f4ab0e86cb7fa8c4f5358999 100644 (file)
@@ -11,6 +11,7 @@ import (
        "time"
 
        "code.gitea.io/gitea/modules/setting"
+       "code.gitea.io/gitea/modules/test"
 
        "github.com/stretchr/testify/assert"
 )
@@ -175,11 +176,7 @@ func testWorkerPoolQueuePersistence(t *testing.T, queueSetting setting.QueueSett
 }
 
 func TestWorkerPoolQueueActiveWorkers(t *testing.T) {
-       oldWorkerIdleDuration := workerIdleDuration
-       workerIdleDuration = 300 * time.Millisecond
-       defer func() {
-               workerIdleDuration = oldWorkerIdleDuration
-       }()
+       defer test.MockVariableValue(&workerIdleDuration, 300*time.Millisecond)()
 
        handler := func(items ...int) (unhandled []int) {
                time.Sleep(100 * time.Millisecond)
@@ -250,3 +247,25 @@ func TestWorkerPoolQueueShutdown(t *testing.T) {
        q, _ = newWorkerPoolQueueForTest("test-workpoolqueue", qs, handler, false)
        assert.EqualValues(t, 20, q.GetQueueItemNumber())
 }
+
+func TestWorkerPoolQueueWorkerIdleReset(t *testing.T) {
+       defer test.MockVariableValue(&workerIdleDuration, 10*time.Millisecond)()
+
+       handler := func(items ...int) (unhandled []int) {
+               time.Sleep(50 * time.Millisecond)
+               return nil
+       }
+
+       q, _ := newWorkerPoolQueueForTest("test-workpoolqueue", setting.QueueSettings{Type: "channel", BatchLength: 1, MaxWorkers: 2, Length: 100}, handler, false)
+       stop := runWorkerPoolQueue(q)
+       for i := 0; i < 20; i++ {
+               assert.NoError(t, q.Push(i))
+       }
+
+       time.Sleep(500 * time.Millisecond)
+       assert.EqualValues(t, 2, q.GetWorkerNumber())
+       assert.EqualValues(t, 2, q.GetWorkerActiveNumber())
+       // when the queue never becomes empty, the existing workers should keep working
+       assert.EqualValues(t, 2, q.workerStartedCounter)
+       stop()
+}