aboutsummaryrefslogtreecommitdiffstats
path: root/modules/indexer
diff options
context:
space:
mode:
authordark-angel <70754989+inferno-umar@users.noreply.github.com>2024-02-07 14:27:16 +0530
committerGitHub <noreply@github.com>2024-02-07 08:57:16 +0000
commit5c0fc9087211f01375f208d679a1e6de0685320c (patch)
treee35f59688febc6b66e8bc0a5be2fbe0b4e61d59f /modules/indexer
parent2bac85dc3339ecb1cb1418524b2a7966aed9963a (diff)
downloadgitea-5c0fc9087211f01375f208d679a1e6de0685320c.tar.gz
gitea-5c0fc9087211f01375f208d679a1e6de0685320c.zip
fix: Elasticsearch: Request Entity Too Large #28117 (#29062)
Fix for gitea putting everything into one request without batching and sending it to Elasticsearch for indexing as issued in #28117 This issue occured in large repositories while Gitea tries to index the code using ElasticSearch. I've applied necessary changes that takes batch length from below config (app.ini) ``` [queue.code_indexer] BATCH_LENGTH=<length_int> ``` and batches all requests to Elasticsearch in chunks as configured in the above config
Diffstat (limited to 'modules/indexer')
-rw-r--r--modules/indexer/code/elasticsearch/elasticsearch.go16
1 files changed, 11 insertions, 5 deletions
diff --git a/modules/indexer/code/elasticsearch/elasticsearch.go b/modules/indexer/code/elasticsearch/elasticsearch.go
index 2fadbfeb06..0f70f13485 100644
--- a/modules/indexer/code/elasticsearch/elasticsearch.go
+++ b/modules/indexer/code/elasticsearch/elasticsearch.go
@@ -180,11 +180,17 @@ func (b *Indexer) Index(ctx context.Context, repo *repo_model.Repository, sha st
}
if len(reqs) > 0 {
- _, err := b.inner.Client.Bulk().
- Index(b.inner.VersionedIndexName()).
- Add(reqs...).
- Do(ctx)
- return err
+ esBatchSize := 50
+
+ for i := 0; i < len(reqs); i += esBatchSize {
+ _, err := b.inner.Client.Bulk().
+ Index(b.inner.VersionedIndexName()).
+ Add(reqs[i:min(i+esBatchSize, len(reqs))]...).
+ Do(ctx)
+ if err != nil {
+ return err
+ }
+ }
}
return nil
}