]> source.dussan.org Git - gitea.git/commitdiff
Cron job to cleanup hook_task table (#13080)
authorBrad Albright <32200834+bhalbright@users.noreply.github.com>
Tue, 26 Jan 2021 21:02:42 +0000 (15:02 -0600)
committerGitHub <noreply@github.com>
Tue, 26 Jan 2021 21:02:42 +0000 (22:02 +0100)
Close **Prune hook_task Table (#10741)**

Added a cron job to delete webhook deliveries in the hook_task table. It can be turned on/off and the schedule controlled globally via app.ini. The data can be deleted by either the age of the delivery which is the default or by deleting the all but the most recent deliveries _per webhook_.

Note: I had previously submitted pr #11416  but I closed it when I realized that I had deleted per repository instead of per webhook. Also, I decided allowing the settings to be overridden via the ui was overkill. Also this version allows the deletion by age which is probably what most people would want.

custom/conf/app.example.ini
docs/content/doc/advanced/config-cheat-sheet.en-us.md
models/webhook.go
models/webhook_test.go
modules/cron/setting.go
modules/cron/tasks_basic.go
options/locale/locale_en-US.ini

index f2b65a0963db6221f17bfcf489266c7f76e123d9..8bc4a2c20475cf0e687a4ff3713e04a531471836 100644 (file)
@@ -1023,6 +1023,21 @@ SCHEDULE = @every 24h
 ; deleted branches than OLDER_THAN ago are subject to deletion
 OLDER_THAN = 24h
 
+; Cleanup hook_task table
+[cron.cleanup_hook_task_table]
+; Whether to enable the job
+ENABLED = true
+; Whether to always run at start up time (if ENABLED)
+RUN_AT_START = false
+; Time interval for job to run
+SCHEDULE = @every 24h
+; OlderThan or PerWebhook. How the records are removed, either by age (i.e. how long ago hook_task record was delivered) or by the number to keep per webhook (i.e. keep most recent x deliveries per webhook).
+CLEANUP_TYPE = OlderThan
+; If CLEANUP_TYPE is set to OlderThan, then any delivered hook_task records older than this expression will be deleted.
+OLDER_THAN = 168h
+; If CLEANUP_TYPE is set to PerWebhook, this is number of hook_task records to keep for a webhook (i.e. keep the most recent x deliveries).
+NUMBER_TO_KEEP = 10
+
 ; Extended cron task - not enabled by default
 
 ; Delete all unactivated accounts
index e8866ab333d6c2b65b5b98480d290b86ece23c80..1d9326ad6ea7824bdc0b90f6b345d6c053052e33 100644 (file)
@@ -694,6 +694,15 @@ NB: You must `REDIRECT_MACARON_LOG` and have `DISABLE_ROUTER_LOG` set to `false`
 - `RUN_AT_START`: **true**: Run repository statistics check at start time.
 - `SCHEDULE`: **@every 24h**: Cron syntax for scheduling repository statistics check.
 
+### Cron - Cleanup hook_task Table (`cron.cleanup_hook_task_table`)
+
+- `ENABLED`: **true**: Enable cleanup hook_task job.
+- `RUN_AT_START`: **false**: Run cleanup hook_task at start time (if ENABLED).
+- `SCHEDULE`: **@every 24h**: Cron syntax for cleaning hook_task table.
+- `CLEANUP_TYPE` **OlderThan** OlderThan or PerWebhook Method to cleanup hook_task, either by age (i.e. how long ago hook_task record was delivered) or by the number to keep per webhook (i.e. keep most recent x deliveries per webhook).
+- `OLDER_THAN`: **168h**: If CLEANUP_TYPE is set to OlderThan, then any delivered hook_task records older than this expression will be deleted.
+- `NUMBER_TO_KEEP`: **10**: If CLEANUP_TYPE is set to PerWebhook, this is number of hook_task records to keep for a webhook (i.e. keep the most recent x deliveries).
+
 #### Cron - Update Migration Poster ID (`cron.update_migration_poster_id`)
 
 - `SCHEDULE`: **@every 24h** : Interval as a duration between each synchronization, it will always attempt synchronization when the instance starts.
index 5174bb6128c821296e2a7cbfc14d86567be7db3f..c7fcfba49e7824b3e91131fbaed28147ab87b697 100644 (file)
@@ -6,6 +6,7 @@
 package models
 
 import (
+       "context"
        "encoding/json"
        "fmt"
        "strings"
@@ -39,6 +40,26 @@ func ToHookContentType(name string) HookContentType {
        return hookContentTypes[name]
 }
 
+// HookTaskCleanupType is the type of cleanup to perform on hook_task
+type HookTaskCleanupType int
+
+const (
+       // OlderThan hook_task rows will be cleaned up by the age of the row
+       OlderThan HookTaskCleanupType = iota
+       // PerWebhook hook_task rows will be cleaned up by leaving the most recent deliveries for each webhook
+       PerWebhook
+)
+
+var hookTaskCleanupTypes = map[string]HookTaskCleanupType{
+       "OlderThan":  OlderThan,
+       "PerWebhook": PerWebhook,
+}
+
+// ToHookTaskCleanupType returns HookTaskCleanupType by given name.
+func ToHookTaskCleanupType(name string) HookTaskCleanupType {
+       return hookTaskCleanupTypes[name]
+}
+
 // Name returns the name of a given web hook's content type
 func (t HookContentType) Name() string {
        switch t {
@@ -738,3 +759,69 @@ func FindRepoUndeliveredHookTasks(repoID int64) ([]*HookTask, error) {
        }
        return tasks, nil
 }
+
+// CleanupHookTaskTable deletes rows from hook_task as needed.
+func CleanupHookTaskTable(ctx context.Context, cleanupType HookTaskCleanupType, olderThan time.Duration, numberToKeep int) error {
+       log.Trace("Doing: CleanupHookTaskTable")
+
+       if cleanupType == OlderThan {
+               deleteOlderThan := time.Now().Add(-olderThan).UnixNano()
+               deletes, err := x.
+                       Where("is_delivered = ? and delivered < ?", true, deleteOlderThan).
+                       Delete(new(HookTask))
+               if err != nil {
+                       return err
+               }
+               log.Trace("Deleted %d rows from hook_task", deletes)
+       } else if cleanupType == PerWebhook {
+               hookIDs := make([]int64, 0, 10)
+               err := x.Table("webhook").
+                       Where("id > 0").
+                       Cols("id").
+                       Find(&hookIDs)
+               if err != nil {
+                       return err
+               }
+               for _, hookID := range hookIDs {
+                       select {
+                       case <-ctx.Done():
+                               return ErrCancelledf("Before deleting hook_task records for hook id %d", hookID)
+                       default:
+                       }
+                       if err = deleteDeliveredHookTasksByWebhook(hookID, numberToKeep); err != nil {
+                               return err
+                       }
+               }
+       }
+       log.Trace("Finished: CleanupHookTaskTable")
+       return nil
+}
+
+func deleteDeliveredHookTasksByWebhook(hookID int64, numberDeliveriesToKeep int) error {
+       log.Trace("Deleting hook_task rows for webhook %d, keeping the most recent %d deliveries", hookID, numberDeliveriesToKeep)
+       var deliveryDates = make([]int64, 0, 10)
+       err := x.Table("hook_task").
+               Where("hook_task.hook_id = ? AND hook_task.is_delivered = ? AND hook_task.delivered is not null", hookID, true).
+               Cols("hook_task.delivered").
+               Join("INNER", "webhook", "hook_task.hook_id = webhook.id").
+               OrderBy("hook_task.delivered desc").
+               Limit(1, int(numberDeliveriesToKeep)).
+               Find(&deliveryDates)
+       if err != nil {
+               return err
+       }
+
+       if len(deliveryDates) > 0 {
+               deletes, err := x.
+                       Where("hook_id = ? and is_delivered = ? and delivered <= ?", hookID, true, deliveryDates[0]).
+                       Delete(new(HookTask))
+               if err != nil {
+                       return err
+               }
+               log.Trace("Deleted %d hook_task rows for webhook %d", deletes, hookID)
+       } else {
+               log.Trace("No hook_task rows to delete for webhook %d", hookID)
+       }
+
+       return nil
+}
index 20acb4e93c0a23c0efae74f52e569ecf0b327a0d..1baf6ef44bfa5f477d8a7fb568fc23dd7fe70732 100644 (file)
@@ -5,8 +5,10 @@
 package models
 
 import (
+       "context"
        "encoding/json"
        "testing"
+       "time"
 
        api "code.gitea.io/gitea/modules/structs"
 
@@ -223,3 +225,115 @@ func TestUpdateHookTask(t *testing.T) {
        assert.NoError(t, UpdateHookTask(hook))
        AssertExistsAndLoadBean(t, hook)
 }
+
+func TestCleanupHookTaskTable_PerWebhook_DeletesDelivered(t *testing.T) {
+       assert.NoError(t, PrepareTestDatabase())
+       hookTask := &HookTask{
+               RepoID:      3,
+               HookID:      3,
+               Typ:         GITEA,
+               URL:         "http://www.example.com/unit_test",
+               Payloader:   &api.PushPayload{},
+               IsDelivered: true,
+               Delivered:   time.Now().UnixNano(),
+       }
+       AssertNotExistsBean(t, hookTask)
+       assert.NoError(t, CreateHookTask(hookTask))
+       AssertExistsAndLoadBean(t, hookTask)
+
+       assert.NoError(t, CleanupHookTaskTable(context.Background(), PerWebhook, 168*time.Hour, 0))
+       AssertNotExistsBean(t, hookTask)
+}
+
+func TestCleanupHookTaskTable_PerWebhook_LeavesUndelivered(t *testing.T) {
+       assert.NoError(t, PrepareTestDatabase())
+       hookTask := &HookTask{
+               RepoID:      2,
+               HookID:      4,
+               Typ:         GITEA,
+               URL:         "http://www.example.com/unit_test",
+               Payloader:   &api.PushPayload{},
+               IsDelivered: false,
+       }
+       AssertNotExistsBean(t, hookTask)
+       assert.NoError(t, CreateHookTask(hookTask))
+       AssertExistsAndLoadBean(t, hookTask)
+
+       assert.NoError(t, CleanupHookTaskTable(context.Background(), PerWebhook, 168*time.Hour, 0))
+       AssertExistsAndLoadBean(t, hookTask)
+}
+
+func TestCleanupHookTaskTable_PerWebhook_LeavesMostRecentTask(t *testing.T) {
+       assert.NoError(t, PrepareTestDatabase())
+       hookTask := &HookTask{
+               RepoID:      2,
+               HookID:      4,
+               Typ:         GITEA,
+               URL:         "http://www.example.com/unit_test",
+               Payloader:   &api.PushPayload{},
+               IsDelivered: true,
+               Delivered:   time.Now().UnixNano(),
+       }
+       AssertNotExistsBean(t, hookTask)
+       assert.NoError(t, CreateHookTask(hookTask))
+       AssertExistsAndLoadBean(t, hookTask)
+
+       assert.NoError(t, CleanupHookTaskTable(context.Background(), PerWebhook, 168*time.Hour, 1))
+       AssertExistsAndLoadBean(t, hookTask)
+}
+
+func TestCleanupHookTaskTable_OlderThan_DeletesDelivered(t *testing.T) {
+       assert.NoError(t, PrepareTestDatabase())
+       hookTask := &HookTask{
+               RepoID:      3,
+               HookID:      3,
+               Typ:         GITEA,
+               URL:         "http://www.example.com/unit_test",
+               Payloader:   &api.PushPayload{},
+               IsDelivered: true,
+               Delivered:   time.Now().AddDate(0, 0, -8).UnixNano(),
+       }
+       AssertNotExistsBean(t, hookTask)
+       assert.NoError(t, CreateHookTask(hookTask))
+       AssertExistsAndLoadBean(t, hookTask)
+
+       assert.NoError(t, CleanupHookTaskTable(context.Background(), OlderThan, 168*time.Hour, 0))
+       AssertNotExistsBean(t, hookTask)
+}
+
+func TestCleanupHookTaskTable_OlderThan_LeavesUndelivered(t *testing.T) {
+       assert.NoError(t, PrepareTestDatabase())
+       hookTask := &HookTask{
+               RepoID:      2,
+               HookID:      4,
+               Typ:         GITEA,
+               URL:         "http://www.example.com/unit_test",
+               Payloader:   &api.PushPayload{},
+               IsDelivered: false,
+       }
+       AssertNotExistsBean(t, hookTask)
+       assert.NoError(t, CreateHookTask(hookTask))
+       AssertExistsAndLoadBean(t, hookTask)
+
+       assert.NoError(t, CleanupHookTaskTable(context.Background(), OlderThan, 168*time.Hour, 0))
+       AssertExistsAndLoadBean(t, hookTask)
+}
+
+func TestCleanupHookTaskTable_OlderThan_LeavesTaskEarlierThanAgeToDelete(t *testing.T) {
+       assert.NoError(t, PrepareTestDatabase())
+       hookTask := &HookTask{
+               RepoID:      2,
+               HookID:      4,
+               Typ:         GITEA,
+               URL:         "http://www.example.com/unit_test",
+               Payloader:   &api.PushPayload{},
+               IsDelivered: true,
+               Delivered:   time.Now().AddDate(0, 0, -6).UnixNano(),
+       }
+       AssertNotExistsBean(t, hookTask)
+       assert.NoError(t, CreateHookTask(hookTask))
+       AssertExistsAndLoadBean(t, hookTask)
+
+       assert.NoError(t, CleanupHookTaskTable(context.Background(), OlderThan, 168*time.Hour, 0))
+       AssertExistsAndLoadBean(t, hookTask)
+}
index 5fe9ca6e14d82ad423ce65417a7bf9d9fcaa2143..d55e5b60ad69e6b749ba854d19a61d3d914c0d66 100644 (file)
@@ -40,6 +40,14 @@ type UpdateExistingConfig struct {
        UpdateExisting bool
 }
 
+// CleanupHookTaskConfig represents a cron task with settings to cleanup hook_task
+type CleanupHookTaskConfig struct {
+       BaseConfig
+       CleanupType  string
+       OlderThan    time.Duration
+       NumberToKeep int
+}
+
 // GetSchedule returns the schedule for the base config
 func (b *BaseConfig) GetSchedule() string {
        return b.Schedule
index a45704e889394b803cde2b22857c01230a4f2087..391cda0f891f53a3c230d69779b0b661de1f3426 100644 (file)
@@ -109,6 +109,22 @@ func registerUpdateMigrationPosterID() {
        })
 }
 
+func registerCleanupHookTaskTable() {
+       RegisterTaskFatal("cleanup_hook_task_table", &CleanupHookTaskConfig{
+               BaseConfig: BaseConfig{
+                       Enabled:    true,
+                       RunAtStart: false,
+                       Schedule:   "@every 24h",
+               },
+               CleanupType:  "OlderThan",
+               OlderThan:    168 * time.Hour,
+               NumberToKeep: 10,
+       }, func(ctx context.Context, _ *models.User, config Config) error {
+               realConfig := config.(*CleanupHookTaskConfig)
+               return models.CleanupHookTaskTable(ctx, models.ToHookTaskCleanupType(realConfig.CleanupType), realConfig.OlderThan, realConfig.NumberToKeep)
+       })
+}
+
 func initBasicTasks() {
        registerUpdateMirrorTask()
        registerRepoHealthCheck()
@@ -119,4 +135,5 @@ func initBasicTasks() {
        if !setting.Repository.DisableMigrations {
                registerUpdateMigrationPosterID()
        }
+       registerCleanupHookTaskTable()
 }
index 6c9604bf8babbb5a45562ae312c1cb0f8ba43e51..1fe43ce29bbf163dad5f407698b236ef775dac2e 100644 (file)
@@ -2065,6 +2065,7 @@ dashboard.resync_all_sshprincipals.desc = (Not needed for the built-in SSH serve
 dashboard.resync_all_hooks = Resynchronize pre-receive, update and post-receive hooks of all repositories.
 dashboard.reinit_missing_repos = Reinitialize all missing Git repositories for which records exist
 dashboard.sync_external_users = Synchronize external user data
+dashboard.cleanup_hook_task_table = Cleanup hook_task table
 dashboard.server_uptime = Server Uptime
 dashboard.current_goroutine = Current Goroutines
 dashboard.current_memory_usage = Current Memory Usage