diff options
author | dark-angel <70754989+inferno-umar@users.noreply.github.com> | 2024-02-07 14:27:16 +0530 |
---|---|---|
committer | GitHub <noreply@github.com> | 2024-02-07 08:57:16 +0000 |
commit | 5c0fc9087211f01375f208d679a1e6de0685320c (patch) | |
tree | e35f59688febc6b66e8bc0a5be2fbe0b4e61d59f /modules/indexer | |
parent | 2bac85dc3339ecb1cb1418524b2a7966aed9963a (diff) | |
download | gitea-5c0fc9087211f01375f208d679a1e6de0685320c.tar.gz gitea-5c0fc9087211f01375f208d679a1e6de0685320c.zip |
fix: Elasticsearch: Request Entity Too Large #28117 (#29062)
Fix for gitea putting everything into one request without batching and
sending it to Elasticsearch for indexing as issued in #28117
This issue occured in large repositories while Gitea tries to
index the code using ElasticSearch.
I've applied necessary changes that takes batch length from below config
(app.ini)
```
[queue.code_indexer]
BATCH_LENGTH=<length_int>
```
and batches all requests to Elasticsearch in chunks as configured in the
above config
Diffstat (limited to 'modules/indexer')
-rw-r--r-- | modules/indexer/code/elasticsearch/elasticsearch.go | 16 |
1 files changed, 11 insertions, 5 deletions
diff --git a/modules/indexer/code/elasticsearch/elasticsearch.go b/modules/indexer/code/elasticsearch/elasticsearch.go index 2fadbfeb06..0f70f13485 100644 --- a/modules/indexer/code/elasticsearch/elasticsearch.go +++ b/modules/indexer/code/elasticsearch/elasticsearch.go @@ -180,11 +180,17 @@ func (b *Indexer) Index(ctx context.Context, repo *repo_model.Repository, sha st } if len(reqs) > 0 { - _, err := b.inner.Client.Bulk(). - Index(b.inner.VersionedIndexName()). - Add(reqs...). - Do(ctx) - return err + esBatchSize := 50 + + for i := 0; i < len(reqs); i += esBatchSize { + _, err := b.inner.Client.Bulk(). + Index(b.inner.VersionedIndexName()). + Add(reqs[i:min(i+esBatchSize, len(reqs))]...). + Do(ctx) + if err != nil { + return err + } + } } return nil } |