From 80a25531f73c436660458fbe88acd07c9ed6434b Mon Sep 17 00:00:00 2001 From: Robin Appelman Date: Thu, 25 Jul 2024 18:46:47 +0200 Subject: fix: make background scan job compatible with sharding Signed-off-by: Robin Appelman --- apps/files/lib/BackgroundJob/ScanFiles.php | 70 +++++++++++++++++++++++++----- 1 file changed, 58 insertions(+), 12 deletions(-) diff --git a/apps/files/lib/BackgroundJob/ScanFiles.php b/apps/files/lib/BackgroundJob/ScanFiles.php index 97122e738e2..42beb70aaf5 100644 --- a/apps/files/lib/BackgroundJob/ScanFiles.php +++ b/apps/files/lib/BackgroundJob/ScanFiles.php @@ -33,11 +33,11 @@ class ScanFiles extends TimedJob { public const USERS_PER_SESSION = 500; public function __construct( - IConfig $config, + IConfig $config, IEventDispatcher $dispatcher, - LoggerInterface $logger, - IDBConnection $connection, - ITimeFactory $time + LoggerInterface $logger, + IDBConnection $connection, + ITimeFactory $time ) { parent::__construct($time); // Run once per 10 minutes @@ -70,15 +70,61 @@ class ScanFiles extends TimedJob { * @return string|false */ private function getUserToScan() { + if ($this->connection->getShardDefinition("filecache")) { + // for sharded filecache, the "LIMIT" from the normal query doesn't work + + // first we try it with a "LEFT JOIN" on mounts, this is fast, but might return a storage that isn't mounted. + // we also ask for up to 10 results from different storages to increase the odds of finding a result that is mounted + $query = $this->connection->getQueryBuilder(); + $query->select('m.user_id') + ->from('filecache', 'f') + ->leftJoin('f', 'mounts', 'm', $query->expr()->eq('m.storage_id', 'f.storage')) + ->where($query->expr()->lt('f.size', $query->createNamedParameter(0, IQueryBuilder::PARAM_INT))) + ->andWhere($query->expr()->gt('f.parent', $query->createNamedParameter(-1, IQueryBuilder::PARAM_INT))) + ->setMaxResults(10) + ->groupBy("f.storage") + ->runAcrossAllShards(); + + $result = $query->executeQuery(); + while ($res = $result->fetch()) { + if ($res['user_id']) { + return $res['user_id']; + } + } + + // as a fallback, we try a slower approach where we find all mounted storages first + // this is essentially doing the inner join manually + $storages = $this->getAllMountedStorages(); + + $query = $this->connection->getQueryBuilder(); + $query->select('m.user_id') + ->from('filecache', 'f') + ->leftJoin('f', 'mounts', 'm', $query->expr()->eq('m.storage_id', 'f.storage')) + ->where($query->expr()->lt('f.size', $query->createNamedParameter(0, IQueryBuilder::PARAM_INT))) + ->andWhere($query->expr()->gt('f.parent', $query->createNamedParameter(-1, IQueryBuilder::PARAM_INT))) + ->andWhere($query->expr()->in('f.storage', $query->createNamedParameter($storages, IQueryBuilder::PARAM_INT_ARRAY))) + ->setMaxResults(1) + ->runAcrossAllShards(); + return $query->executeQuery()->fetchOne(); + } else { + $query = $this->connection->getQueryBuilder(); + $query->select('m.user_id') + ->from('filecache', 'f') + ->innerJoin('f', 'mounts', 'm', $query->expr()->eq('m.storage_id', 'f.storage')) + ->where($query->expr()->lt('f.size', $query->createNamedParameter(0, IQueryBuilder::PARAM_INT))) + ->andWhere($query->expr()->gt('f.parent', $query->createNamedParameter(-1, IQueryBuilder::PARAM_INT))) + ->setMaxResults(1) + ->runAcrossAllShards(); + + return $query->executeQuery()->fetchOne(); + } + } + + private function getAllMountedStorages(): array { $query = $this->connection->getQueryBuilder(); - $query->select('user_id') - ->from('filecache', 'f') - ->innerJoin('f', 'mounts', 'm', $query->expr()->eq('storage_id', 'storage')) - ->where($query->expr()->lt('size', $query->createNamedParameter(0, IQueryBuilder::PARAM_INT))) - ->andWhere($query->expr()->gt('parent', $query->createNamedParameter(-1, IQueryBuilder::PARAM_INT))) - ->setMaxResults(1); - - return $query->executeQuery()->fetchOne(); + $query->selectDistinct('storage_id') + ->from('mounts'); + return $query->executeQuery()->fetchAll(\PDO::FETCH_COLUMN); } /** -- cgit v1.2.3