diff options
author | Robin Appelman <robin@icewind.nl> | 2024-08-15 18:04:55 +0200 |
---|---|---|
committer | Louis <louis@chmn.me> | 2024-08-28 14:54:14 +0200 |
commit | b2645590cef45a99ee7c7d549f65b3432b81e422 (patch) | |
tree | cb78deb24c83ca555c77c75fab8f9cf470081d96 | |
parent | dc5f0f5fb73f9848ce0fa3f797ac4c817802d33f (diff) | |
download | nextcloud-server-b2645590cef45a99ee7c7d549f65b3432b81e422.tar.gz nextcloud-server-b2645590cef45a99ee7c7d549f65b3432b81e422.zip |
fix: implement sharding compatible cleanup for various bits
Signed-off-by: Robin Appelman <robin@icewind.nl>
-rw-r--r-- | apps/files/lib/BackgroundJob/DeleteOrphanedItems.php | 86 | ||||
-rw-r--r-- | apps/files/lib/Command/DeleteOrphanedFiles.php | 88 | ||||
-rw-r--r-- | apps/files/tests/Command/DeleteOrphanedFilesTest.php | 14 | ||||
-rw-r--r-- | apps/files_sharing/lib/DeleteOrphanedSharesJob.php | 41 | ||||
-rw-r--r-- | lib/private/DB/QueryBuilder/ExtendedQueryBuilder.php | 4 | ||||
-rw-r--r-- | lib/private/DB/QueryBuilder/Sharded/ShardedQueryBuilder.php | 6 | ||||
-rw-r--r-- | lib/private/Preview/BackgroundCleanupJob.php | 57 | ||||
-rw-r--r-- | lib/private/Repair/CleanTags.php | 17 | ||||
-rw-r--r-- | lib/public/DB/QueryBuilder/IQueryBuilder.php | 2 | ||||
-rw-r--r-- | tests/lib/Preview/BackgroundCleanupJobTest.php | 8 | ||||
-rw-r--r-- | tests/lib/Repair/CleanTagsTest.php | 9 |
11 files changed, 264 insertions, 68 deletions
diff --git a/apps/files/lib/BackgroundJob/DeleteOrphanedItems.php b/apps/files/lib/BackgroundJob/DeleteOrphanedItems.php index 32fb569a3d4..b1a795b775c 100644 --- a/apps/files/lib/BackgroundJob/DeleteOrphanedItems.php +++ b/apps/files/lib/BackgroundJob/DeleteOrphanedItems.php @@ -52,35 +52,87 @@ class DeleteOrphanedItems extends TimedJob { * @param string $typeCol * @return int Number of deleted entries */ - protected function cleanUp($table, $idCol, $typeCol) { + protected function cleanUp(string $table, string $idCol, string $typeCol): int { $deletedEntries = 0; - $query = $this->connection->getQueryBuilder(); - $query->select('t1.' . $idCol) - ->from($table, 't1') - ->where($query->expr()->eq($typeCol, $query->expr()->literal('files'))) - ->leftJoin('t1', 'filecache', 't2', $query->expr()->eq($query->expr()->castColumn('t1.' . $idCol, IQueryBuilder::PARAM_INT), 't2.fileid')) - ->andWhere($query->expr()->isNull('t2.fileid')) - ->groupBy('t1.' . $idCol) - ->setMaxResults(self::CHUNK_SIZE); - $deleteQuery = $this->connection->getQueryBuilder(); $deleteQuery->delete($table) - ->where($deleteQuery->expr()->in($idCol, $deleteQuery->createParameter('objectid'))); + ->where($deleteQuery->expr()->eq($idCol, $deleteQuery->createParameter('objectid'))); + + if ($this->connection->getShardDefinition('filecache')) { + $sourceIdChunks = $this->getItemIds($table, $idCol, $typeCol, 1000); + foreach ($sourceIdChunks as $sourceIdChunk) { + $deletedSources = $this->findMissingSources($sourceIdChunk); + $deleteQuery->setParameter('objectid', $deletedSources, IQueryBuilder::PARAM_INT_ARRAY); + $deletedEntries += $deleteQuery->executeStatement(); + } + } else { + $query = $this->connection->getQueryBuilder(); + $query->select('t1.' . $idCol) + ->from($table, 't1') + ->where($query->expr()->eq($typeCol, $query->expr()->literal('files'))) + ->leftJoin('t1', 'filecache', 't2', $query->expr()->eq($query->expr()->castColumn('t1.' . $idCol, IQueryBuilder::PARAM_INT), 't2.fileid')) + ->andWhere($query->expr()->isNull('t2.fileid')) + ->groupBy('t1.' . $idCol) + ->setMaxResults(self::CHUNK_SIZE); + + $deleteQuery = $this->connection->getQueryBuilder(); + $deleteQuery->delete($table) + ->where($deleteQuery->expr()->in($idCol, $deleteQuery->createParameter('objectid'))); - $deletedInLastChunk = self::CHUNK_SIZE; - while ($deletedInLastChunk === self::CHUNK_SIZE) { - $chunk = $query->executeQuery()->fetchAll(\PDO::FETCH_COLUMN); - $deletedInLastChunk = count($chunk); + $deletedInLastChunk = self::CHUNK_SIZE; + while ($deletedInLastChunk === self::CHUNK_SIZE) { + $chunk = $query->executeQuery()->fetchAll(\PDO::FETCH_COLUMN); + $deletedInLastChunk = count($chunk); - $deleteQuery->setParameter('objectid', $chunk, IQueryBuilder::PARAM_INT_ARRAY); - $deletedEntries += $deleteQuery->executeStatement(); + $deleteQuery->setParameter('objectid', $chunk, IQueryBuilder::PARAM_INT_ARRAY); + $deletedEntries += $deleteQuery->executeStatement(); + } } return $deletedEntries; } /** + * @param string $table + * @param string $idCol + * @param string $typeCol + * @param int $chunkSize + * @return \Iterator<int[]> + * @throws \OCP\DB\Exception + */ + private function getItemIds(string $table, string $idCol, string $typeCol, int $chunkSize): \Iterator { + $query = $this->connection->getQueryBuilder(); + $query->select($idCol) + ->from($table) + ->where($query->expr()->eq($typeCol, $query->expr()->literal('files'))) + ->groupBy($idCol) + ->andWhere($query->expr()->gt($idCol, $query->createParameter('min_id'))) + ->setMaxResults($chunkSize); + + $minId = 0; + while (true) { + $query->setParameter('min_id', $minId); + $rows = $query->executeQuery()->fetchAll(\PDO::FETCH_COLUMN); + if (count($rows) > 0) { + $minId = $rows[count($rows) - 1]; + yield $rows; + } else { + break; + } + } + } + + private function findMissingSources(array $ids): array { + $qb = $this->connection->getQueryBuilder(); + $qb->select('fileid') + ->from('filecache') + ->where($qb->expr()->in('fileid', $qb->createNamedParameter($ids, IQueryBuilder::PARAM_INT_ARRAY))); + $found = $qb->executeQuery()->fetchAll(\PDO::FETCH_COLUMN); + return array_diff($ids, $found); + } + + /** * Deleting orphaned system tag mappings * * @return int Number of deleted entries diff --git a/apps/files/lib/Command/DeleteOrphanedFiles.php b/apps/files/lib/Command/DeleteOrphanedFiles.php index b7101c07258..8f93242b255 100644 --- a/apps/files/lib/Command/DeleteOrphanedFiles.php +++ b/apps/files/lib/Command/DeleteOrphanedFiles.php @@ -35,17 +35,18 @@ class DeleteOrphanedFiles extends Command { public function execute(InputInterface $input, OutputInterface $output): int { $deletedEntries = 0; + $fileIdsByStorage = []; - $query = $this->connection->getQueryBuilder(); - $query->select('fc.fileid') - ->from('filecache', 'fc') - ->where($query->expr()->isNull('s.numeric_id')) - ->leftJoin('fc', 'storages', 's', $query->expr()->eq('fc.storage', 's.numeric_id')) - ->setMaxResults(self::CHUNK_SIZE); + $deletedStorages = array_diff($this->getReferencedStorages(), $this->getExistingStorages()); + + $deleteExtended = !$input->getOption('skip-filecache-extended'); + if ($deleteExtended) { + $fileIdsByStorage = $this->getFileIdsForStorages($deletedStorages); + } $deleteQuery = $this->connection->getQueryBuilder(); $deleteQuery->delete('filecache') - ->where($deleteQuery->expr()->eq('fileid', $deleteQuery->createParameter('objectid'))); + ->where($deleteQuery->expr()->in('storage', $deleteQuery->createParameter('storage_ids'))); $deletedInLastChunk = self::CHUNK_SIZE; while ($deletedInLastChunk === self::CHUNK_SIZE) { @@ -61,8 +62,8 @@ class DeleteOrphanedFiles extends Command { $output->writeln("$deletedEntries orphaned file cache entries deleted"); - if (!$input->getOption('skip-filecache-extended')) { - $deletedFileCacheExtended = $this->cleanupOrphanedFileCacheExtended(); + if ($deleteExtended) { + $deletedFileCacheExtended = $this->cleanupOrphanedFileCacheExtended($fileIdsByStorage); $output->writeln("$deletedFileCacheExtended orphaned file cache extended entries deleted"); } @@ -72,28 +73,63 @@ class DeleteOrphanedFiles extends Command { return self::SUCCESS; } - private function cleanupOrphanedFileCacheExtended(): int { - $deletedEntries = 0; - + private function getReferencedStorages(): array { $query = $this->connection->getQueryBuilder(); - $query->select('fce.fileid') - ->from('filecache_extended', 'fce') - ->leftJoin('fce', 'filecache', 'fc', $query->expr()->eq('fce.fileid', 'fc.fileid')) - ->where($query->expr()->isNull('fc.fileid')) - ->setMaxResults(self::CHUNK_SIZE); + $query->select('storage') + ->from('filecache') + ->groupBy('storage') + ->runAcrossAllShards(); + return $query->executeQuery()->fetchAll(\PDO::FETCH_COLUMN); + } - $deleteQuery = $this->connection->getQueryBuilder(); - $deleteQuery->delete('filecache_extended') - ->where($deleteQuery->expr()->in('fileid', $deleteQuery->createParameter('idsToDelete'))); + private function getExistingStorages(): array { + $query = $this->connection->getQueryBuilder(); + $query->select('numeric_id') + ->from('storages') + ->groupBy('numeric_id'); + return $query->executeQuery()->fetchAll(\PDO::FETCH_COLUMN); + } - $result = $query->executeQuery(); - while ($result->rowCount() > 0) { - $idsToDelete = $result->fetchAll(\PDO::FETCH_COLUMN); + /** + * @param int[] $storageIds + * @return array<int, int[]> + */ + private function getFileIdsForStorages(array $storageIds): array { + $query = $this->connection->getQueryBuilder(); + $query->select('storage', 'fileid') + ->from('filecache') + ->where($query->expr()->in('storage', $query->createParameter('storage_ids'))); + + $result = []; + $storageIdChunks = array_chunk($storageIds, self::CHUNK_SIZE); + foreach ($storageIdChunks as $storageIdChunk) { + $query->setParameter('storage_ids', $storageIdChunk, IQueryBuilder::PARAM_INT_ARRAY); + $chunk = $query->executeQuery()->fetchAll(); + foreach ($chunk as $row) { + $result[$row['storage']][] = $row['fileid']; + } + } + return $result; + } - $deleteQuery->setParameter('idsToDelete', $idsToDelete, IQueryBuilder::PARAM_INT_ARRAY); - $deletedEntries += $deleteQuery->executeStatement(); + /** + * @param array<int, int[]> $fileIdsByStorage + * @return int + */ + private function cleanupOrphanedFileCacheExtended(array $fileIdsByStorage): int { + $deletedEntries = 0; - $result = $query->executeQuery(); + $deleteQuery = $this->connection->getQueryBuilder(); + $deleteQuery->delete('filecache_extended') + ->where($deleteQuery->expr()->in('fileid', $deleteQuery->createParameter('file_ids'))); + + foreach ($fileIdsByStorage as $storageId => $fileIds) { + $deleteQuery->hintShardKey('storage', $storageId, true); + $fileChunks = array_chunk($fileIds, self::CHUNK_SIZE); + foreach ($fileChunks as $fileChunk) { + $deleteQuery->setParameter('file_ids', $fileChunk, IQueryBuilder::PARAM_INT_ARRAY); + $deletedEntries += $deleteQuery->executeStatement(); + } } return $deletedEntries; diff --git a/apps/files/tests/Command/DeleteOrphanedFilesTest.php b/apps/files/tests/Command/DeleteOrphanedFilesTest.php index e52f9e1e130..ed9a1866d26 100644 --- a/apps/files/tests/Command/DeleteOrphanedFilesTest.php +++ b/apps/files/tests/Command/DeleteOrphanedFilesTest.php @@ -64,13 +64,19 @@ class DeleteOrphanedFilesTest extends TestCase { } protected function getFile($fileId) { - $stmt = $this->connection->executeQuery('SELECT * FROM `*PREFIX*filecache` WHERE `fileid` = ?', [$fileId]); - return $stmt->fetchAll(); + $query = $this->connection->getQueryBuilder(); + $query->select('*') + ->from('filecache') + ->where($query->expr()->eq('fileid', $query->createNamedParameter($fileId))); + return $query->executeQuery()->fetchAll(); } protected function getMounts($storageId) { - $stmt = $this->connection->executeQuery('SELECT * FROM `*PREFIX*mounts` WHERE `storage_id` = ?', [$storageId]); - return $stmt->fetchAll(); + $query = $this->connection->getQueryBuilder(); + $query->select('*') + ->from('mounts') + ->where($query->expr()->eq('storage_id', $query->createNamedParameter($storageId))); + return $query->executeQuery()->fetchAll(); } /** diff --git a/apps/files_sharing/lib/DeleteOrphanedSharesJob.php b/apps/files_sharing/lib/DeleteOrphanedSharesJob.php index 9a052b3d126..bcbdbaba531 100644 --- a/apps/files_sharing/lib/DeleteOrphanedSharesJob.php +++ b/apps/files_sharing/lib/DeleteOrphanedSharesJob.php @@ -55,6 +55,11 @@ class DeleteOrphanedSharesJob extends TimedJob { * @param array $argument unused argument */ public function run($argument) { + if ($this->db->getShardDefinition('filecache')) { + $this->shardingCleanup(); + return; + } + $qbSelect = $this->db->getQueryBuilder(); $qbSelect->select('id') ->from('share', 's') @@ -96,4 +101,40 @@ class DeleteOrphanedSharesJob extends TimedJob { }, $this->db); } while ($deleted >= self::CHUNK_SIZE && $this->time->getTime() <= $cutOff); } + + private function shardingCleanup(): void { + $qb = $this->db->getQueryBuilder(); + $qb->selectDistinct('file_source') + ->from('share', 's'); + $sourceFiles = $qb->executeQuery()->fetchAll(PDO::FETCH_COLUMN); + + $deleteQb = $this->db->getQueryBuilder(); + $deleteQb->delete('share') + ->where( + $deleteQb->expr()->in('file_source', $deleteQb->createParameter('ids'), IQueryBuilder::PARAM_INT_ARRAY) + ); + + $chunks = array_chunk($sourceFiles, self::CHUNK_SIZE); + foreach ($chunks as $chunk) { + $deletedFiles = $this->findMissingSources($chunk); + $this->atomic(function () use ($deletedFiles, $deleteQb) { + $deleteQb->setParameter('ids', $deletedFiles, IQueryBuilder::PARAM_INT_ARRAY); + $deleted = $deleteQb->executeStatement(); + $this->logger->debug("{deleted} orphaned share(s) deleted", [ + 'app' => 'DeleteOrphanedSharesJob', + 'deleted' => $deleted, + ]); + return $deleted; + }, $this->db); + } + } + + private function findMissingSources(array $ids): array { + $qb = $this->db->getQueryBuilder(); + $qb->select('fileid') + ->from('filecache') + ->where($qb->expr()->in('fileid', $qb->createNamedParameter($ids, IQueryBuilder::PARAM_INT_ARRAY))); + $found = $qb->executeQuery()->fetchAll(\PDO::FETCH_COLUMN); + return array_diff($ids, $found); + } } diff --git a/lib/private/DB/QueryBuilder/ExtendedQueryBuilder.php b/lib/private/DB/QueryBuilder/ExtendedQueryBuilder.php index 8ed88198c19..c40cadfbdb5 100644 --- a/lib/private/DB/QueryBuilder/ExtendedQueryBuilder.php +++ b/lib/private/DB/QueryBuilder/ExtendedQueryBuilder.php @@ -289,8 +289,8 @@ abstract class ExtendedQueryBuilder implements IQueryBuilder { return $this->builder->executeStatement($connection); } - public function hintShardKey(string $column, mixed $value) { - $this->builder->hintShardKey($column, $value); + public function hintShardKey(string $column, mixed $value, bool $overwrite = false) { + $this->builder->hintShardKey($column, $value, $overwrite); return $this; } diff --git a/lib/private/DB/QueryBuilder/Sharded/ShardedQueryBuilder.php b/lib/private/DB/QueryBuilder/Sharded/ShardedQueryBuilder.php index 6496453a1a6..650e414096e 100644 --- a/lib/private/DB/QueryBuilder/Sharded/ShardedQueryBuilder.php +++ b/lib/private/DB/QueryBuilder/Sharded/ShardedQueryBuilder.php @@ -296,7 +296,11 @@ class ShardedQueryBuilder extends ExtendedQueryBuilder { ]; } - public function hintShardKey(string $column, mixed $value) { + public function hintShardKey(string $column, mixed $value, bool $overwrite = false) { + if ($overwrite) { + $this->primaryKeys = []; + $this->shardKeys = []; + } if ($this->shardDefinition?->isKey($column)) { $this->primaryKeys[] = $value; } diff --git a/lib/private/Preview/BackgroundCleanupJob.php b/lib/private/Preview/BackgroundCleanupJob.php index deadcd007b1..acf7bf22f52 100644 --- a/lib/private/Preview/BackgroundCleanupJob.php +++ b/lib/private/Preview/BackgroundCleanupJob.php @@ -16,6 +16,7 @@ use OCP\Files\IMimeTypeLoader; use OCP\Files\NotFoundException; use OCP\Files\NotPermittedException; use OCP\IDBConnection; +use function Symfony\Component\Translation\t; class BackgroundCleanupJob extends TimedJob { /** @var IDBConnection */ @@ -64,6 +65,11 @@ class BackgroundCleanupJob extends TimedJob { } private function getOldPreviewLocations(): \Iterator { + if ($this->connection->getShardDefinition('filecache')) { + // sharding is new enough that we don't need to support this + return; + } + $qb = $this->connection->getQueryBuilder(); $qb->select('a.name') ->from('filecache', 'a') @@ -106,6 +112,15 @@ class BackgroundCleanupJob extends TimedJob { return []; } + if ($this->connection->getShardDefinition('filecache')) { + $chunks = $this->getAllPreviewIds($data['path'], 1000); + foreach ($chunks as $chunk) { + yield from $this->findMissingSources($chunk); + } + + return; + } + /* * This lovely like is the result of the way the new previews are stored * We take the md5 of the name (fileid) and split the first 7 chars. That way @@ -155,4 +170,46 @@ class BackgroundCleanupJob extends TimedJob { $cursor->closeCursor(); } + + private function getAllPreviewIds(string $previewRoot, int $chunkSize): \Iterator { + // See `getNewPreviewLocations` for some more info about the logic here + $like = $this->connection->escapeLikeParameter($previewRoot). '/_/_/_/_/_/_/_/%'; + + $qb = $this->connection->getQueryBuilder(); + $qb->select('name', 'fileid') + ->from('filecache') + ->where( + $qb->expr()->andX( + $qb->expr()->eq('storage', $qb->createNamedParameter($this->previewFolder->getStorageId())), + $qb->expr()->like('path', $qb->createNamedParameter($like)), + $qb->expr()->eq('mimetype', $qb->createNamedParameter($this->mimeTypeLoader->getId('httpd/unix-directory'))), + $qb->expr()->gt('fileid', $qb->createParameter('min_id')), + ) + ) + ->orderBy('fileid', 'ASC') + ->setMaxResults($chunkSize); + + $minId = 0; + while (true) { + $qb->setParameter('min_id', $minId); + $rows = $qb->executeQuery()->fetchAll(); + if (count($rows) > 0) { + $minId = $rows[count($rows) - 1]['fileid']; + yield array_map(function ($row) { + return (int)$row['name']; + }, $rows); + } else { + break; + } + } + } + + private function findMissingSources(array $ids): array { + $qb = $this->connection->getQueryBuilder(); + $qb->select('fileid') + ->from('filecache') + ->where($qb->expr()->in('fileid', $qb->createNamedParameter($ids, IQueryBuilder::PARAM_INT_ARRAY))); + $found = $qb->executeQuery()->fetchAll(\PDO::FETCH_COLUMN); + return array_diff($ids, $found); + } } diff --git a/lib/private/Repair/CleanTags.php b/lib/private/Repair/CleanTags.php index f2fc8156f29..b7960a1def1 100644 --- a/lib/private/Repair/CleanTags.php +++ b/lib/private/Repair/CleanTags.php @@ -107,7 +107,7 @@ class CleanTags implements IRepairStep { $output, '%d tags for delete files have been removed.', 'vcategory_to_object', 'objid', - 'filecache', 'fileid', 'path_hash' + 'filecache', 'fileid', 'fileid' ); } @@ -169,16 +169,17 @@ class CleanTags implements IRepairStep { $orphanItems[] = (int) $row[$deleteId]; } + $deleteQuery = $this->connection->getQueryBuilder(); + $deleteQuery->delete($deleteTable) + ->where( + $deleteQuery->expr()->eq('type', $deleteQuery->expr()->literal('files')) + ) + ->andWhere($deleteQuery->expr()->in($deleteId, $deleteQuery->createParameter('ids'))); if (!empty($orphanItems)) { $orphanItemsBatch = array_chunk($orphanItems, 200); foreach ($orphanItemsBatch as $items) { - $qb->delete($deleteTable) - ->where( - $qb->expr()->eq('type', $qb->expr()->literal('files')) - ) - ->andWhere($qb->expr()->in($deleteId, $qb->createParameter('ids'))); - $qb->setParameter('ids', $items, IQueryBuilder::PARAM_INT_ARRAY); - $qb->execute(); + $deleteQuery->setParameter('ids', $items, IQueryBuilder::PARAM_INT_ARRAY); + $deleteQuery->executeStatement(); } } diff --git a/lib/public/DB/QueryBuilder/IQueryBuilder.php b/lib/public/DB/QueryBuilder/IQueryBuilder.php index 048de26c22a..b673c5ef6ec 100644 --- a/lib/public/DB/QueryBuilder/IQueryBuilder.php +++ b/lib/public/DB/QueryBuilder/IQueryBuilder.php @@ -1036,7 +1036,7 @@ interface IQueryBuilder { * @return $this * @since 30.0.0 */ - public function hintShardKey(string $column, mixed $value); + public function hintShardKey(string $column, mixed $value, bool $overwrite = false); /** * Set the query to run across all shards if sharding is enabled. diff --git a/tests/lib/Preview/BackgroundCleanupJobTest.php b/tests/lib/Preview/BackgroundCleanupJobTest.php index c07ec42b36b..ccd5dba69cf 100644 --- a/tests/lib/Preview/BackgroundCleanupJobTest.php +++ b/tests/lib/Preview/BackgroundCleanupJobTest.php @@ -146,6 +146,10 @@ class BackgroundCleanupJobTest extends \Test\TestCase { } public function testCleanupAjax() { + if ($this->connection->getShardDefinition('filecache')) { + $this->markTestSkipped("ajax cron is not supported for sharded setups"); + return; + } $files = $this->setup11Previews(); $fileIds = array_map(function (File $f) { return $f->getId(); @@ -174,6 +178,10 @@ class BackgroundCleanupJobTest extends \Test\TestCase { } public function testOldPreviews() { + if ($this->connection->getShardDefinition('filecache')) { + $this->markTestSkipped("old previews are not supported for sharded setups"); + return; + } $appdata = \OC::$server->getAppDataDir('preview'); $f1 = $appdata->newFolder('123456781'); diff --git a/tests/lib/Repair/CleanTagsTest.php b/tests/lib/Repair/CleanTagsTest.php index 5e7b82d2198..80c0b2c9770 100644 --- a/tests/lib/Repair/CleanTagsTest.php +++ b/tests/lib/Repair/CleanTagsTest.php @@ -194,13 +194,4 @@ class CleanTagsTest extends \Test\TestCase { $this->createdFile = (int) $this->getLastInsertID('filecache', 'fileid'); return $this->createdFile; } - - /** - * @param $tableName - * @param $idName - * @return int - */ - protected function getLastInsertID($tableName, $idName) { - return $this->connection->lastInsertId("*PREFIX*$tableName"); - } } |