diff options
Diffstat (limited to 'lib/private/DB')
22 files changed, 2256 insertions, 3 deletions
diff --git a/lib/private/DB/ArrayResult.php b/lib/private/DB/ArrayResult.php new file mode 100644 index 00000000000..b567ad23d57 --- /dev/null +++ b/lib/private/DB/ArrayResult.php @@ -0,0 +1,74 @@ +<?php + +declare(strict_types=1); +/** + * SPDX-FileCopyrightText: 2024 Nextcloud GmbH and Nextcloud contributors + * SPDX-License-Identifier: AGPL-3.0-only + */ + +namespace OC\DB; + +use OCP\DB\IResult; +use PDO; + +/** + * Wrap an array or rows into a result interface + */ +class ArrayResult implements IResult { + protected int $count; + + public function __construct( + protected array $rows, + ) { + $this->count = count($this->rows); + } + + public function closeCursor(): bool { + // noop + return true; + } + + public function fetch(int $fetchMode = PDO::FETCH_ASSOC) { + $row = array_shift($this->rows); + if (!$row) { + return false; + } + return match ($fetchMode) { + PDO::FETCH_ASSOC => $row, + PDO::FETCH_NUM => array_values($row), + PDO::FETCH_COLUMN => current($row), + default => throw new \InvalidArgumentException('Fetch mode not supported for array result'), + }; + + } + + public function fetchAll(int $fetchMode = PDO::FETCH_ASSOC): array { + return match ($fetchMode) { + PDO::FETCH_ASSOC => $this->rows, + PDO::FETCH_NUM => array_map(function ($row) { + return array_values($row); + }, $this->rows), + PDO::FETCH_COLUMN => array_map(function ($row) { + return current($row); + }, $this->rows), + default => throw new \InvalidArgumentException('Fetch mode not supported for array result'), + }; + } + + public function fetchColumn() { + return $this->fetchOne(); + } + + public function fetchOne() { + $row = $this->fetch(); + if ($row) { + return current($row); + } else { + return false; + } + } + + public function rowCount(): int { + return $this->count; + } +} diff --git a/lib/private/DB/Connection.php b/lib/private/DB/Connection.php index 9aeba16fb24..3cb8ea17aed 100644 --- a/lib/private/DB/Connection.php +++ b/lib/private/DB/Connection.php @@ -23,10 +23,19 @@ use Doctrine\DBAL\Platforms\SqlitePlatform; use Doctrine\DBAL\Result; use Doctrine\DBAL\Schema\Schema; use Doctrine\DBAL\Statement; +use OC\DB\QueryBuilder\Partitioned\PartitionedQueryBuilder; +use OC\DB\QueryBuilder\Partitioned\PartitionSplit; use OC\DB\QueryBuilder\QueryBuilder; +use OC\DB\QueryBuilder\Sharded\AutoIncrementHandler; +use OC\DB\QueryBuilder\Sharded\CrossShardMoveHelper; +use OC\DB\QueryBuilder\Sharded\RoundRobinShardMapper; +use OC\DB\QueryBuilder\Sharded\ShardConnectionManager; +use OC\DB\QueryBuilder\Sharded\ShardDefinition; use OC\SystemConfig; use OCP\DB\QueryBuilder\IQueryBuilder; +use OCP\DB\QueryBuilder\Sharded\IShardMapper; use OCP\Diagnostics\IEventLogger; +use OCP\ICacheFactory; use OCP\IDBConnection; use OCP\ILogger; use OCP\IRequestId; @@ -76,6 +85,28 @@ class Connection extends PrimaryReadReplicaConnection { protected bool $logRequestId; protected string $requestId; + /** @var array<string, list<string>> */ + protected array $partitions; + /** @var ShardDefinition[] */ + protected array $shards = []; + protected ShardConnectionManager $shardConnectionManager; + protected AutoIncrementHandler $autoIncrementHandler; + + public const SHARD_PRESETS = [ + 'filecache' => [ + 'companion_keys' => [ + 'file_id', + ], + 'companion_tables' => [ + 'filecache_extended', + 'files_metadata', + ], + 'primary_key' => 'fileid', + 'shard_key' => 'storage', + 'table' => 'filecache', + ], + ]; + /** * Initializes a new instance of the Connection class. * @@ -100,6 +131,13 @@ class Connection extends PrimaryReadReplicaConnection { $this->adapter = new $params['adapter']($this); $this->tablePrefix = $params['tablePrefix']; + /** @psalm-suppress InvalidArrayOffset */ + $this->shardConnectionManager = $this->params['shard_connection_manager'] ?? Server::get(ShardConnectionManager::class); + /** @psalm-suppress InvalidArrayOffset */ + $this->autoIncrementHandler = $this->params['auto_increment_handler'] ?? new AutoIncrementHandler( + Server::get(ICacheFactory::class), + $this->shardConnectionManager, + ); $this->systemConfig = \OC::$server->getSystemConfig(); $this->clock = Server::get(ClockInterface::class); $this->logger = Server::get(LoggerInterface::class); @@ -118,10 +156,52 @@ class Connection extends PrimaryReadReplicaConnection { $this->_config->setSQLLogger($debugStack); } + /** @var array<string, array{shards: array[], mapper: ?string}> $shardConfig */ + $shardConfig = $this->params['sharding'] ?? []; + $shardNames = array_keys($shardConfig); + $this->shards = array_map(function (array $config, string $name) { + if (!isset(self::SHARD_PRESETS[$name])) { + throw new \Exception("Shard preset $name not found"); + } + + $shardMapperClass = $config['mapper'] ?? RoundRobinShardMapper::class; + $shardMapper = Server::get($shardMapperClass); + if (!$shardMapper instanceof IShardMapper) { + throw new \Exception("Invalid shard mapper: $shardMapperClass"); + } + return new ShardDefinition( + self::SHARD_PRESETS[$name]['table'], + self::SHARD_PRESETS[$name]['primary_key'], + self::SHARD_PRESETS[$name]['companion_keys'], + self::SHARD_PRESETS[$name]['shard_key'], + $shardMapper, + self::SHARD_PRESETS[$name]['companion_tables'], + $config['shards'] + ); + }, $shardConfig, $shardNames); + $this->shards = array_combine($shardNames, $this->shards); + $this->partitions = array_map(function (ShardDefinition $shard) { + return array_merge([$shard->table], $shard->companionTables); + }, $this->shards); + $this->setNestTransactionsWithSavepoints(true); } /** + * @return IDBConnection[] + */ + public function getShardConnections(): array { + $connections = []; + foreach ($this->shards as $shardDefinition) { + foreach ($shardDefinition->getAllShards() as $shard) { + /** @var ConnectionAdapter $connection */ + $connections[] = $this->shardConnectionManager->getConnection($shardDefinition, $shard); + } + } + return $connections; + } + + /** * @throws Exception */ public function connect($connectionName = null) { @@ -169,11 +249,27 @@ class Connection extends PrimaryReadReplicaConnection { */ public function getQueryBuilder(): IQueryBuilder { $this->queriesBuilt++; - return new QueryBuilder( + + $builder = new QueryBuilder( new ConnectionAdapter($this), $this->systemConfig, $this->logger ); + if (count($this->partitions) > 0) { + $builder = new PartitionedQueryBuilder( + $builder, + $this->shards, + $this->shardConnectionManager, + $this->autoIncrementHandler, + ); + foreach ($this->partitions as $name => $tables) { + $partition = new PartitionSplit($name, $tables); + $builder->addPartition($partition); + } + return $builder; + } else { + return $builder; + } } /** @@ -687,6 +783,9 @@ class Connection extends PrimaryReadReplicaConnection { return $migrator->generateChangeScript($toSchema); } else { $migrator->migrate($toSchema); + foreach ($this->getShardConnections() as $shardConnection) { + $shardConnection->migrateToSchema($toSchema); + } } } @@ -829,4 +928,12 @@ class Connection extends PrimaryReadReplicaConnection { } } } + + public function getShardDefinition(string $name): ?ShardDefinition { + return $this->shards[$name] ?? null; + } + + public function getCrossShardMoveHelper(): CrossShardMoveHelper { + return new CrossShardMoveHelper($this->shardConnectionManager); + } } diff --git a/lib/private/DB/ConnectionAdapter.php b/lib/private/DB/ConnectionAdapter.php index 88083711195..2baeda9cfb7 100644 --- a/lib/private/DB/ConnectionAdapter.php +++ b/lib/private/DB/ConnectionAdapter.php @@ -12,6 +12,8 @@ use Doctrine\DBAL\Exception; use Doctrine\DBAL\Platforms\AbstractPlatform; use Doctrine\DBAL\Schema\Schema; use OC\DB\Exceptions\DbalException; +use OC\DB\QueryBuilder\Sharded\CrossShardMoveHelper; +use OC\DB\QueryBuilder\Sharded\ShardDefinition; use OCP\DB\IPreparedStatement; use OCP\DB\IResult; use OCP\DB\QueryBuilder\IQueryBuilder; @@ -244,4 +246,12 @@ class ConnectionAdapter implements IDBConnection { public function logDatabaseException(\Exception $exception) { $this->inner->logDatabaseException($exception); } + + public function getShardDefinition(string $name): ?ShardDefinition { + return $this->inner->getShardDefinition($name); + } + + public function getCrossShardMoveHelper(): CrossShardMoveHelper { + return $this->inner->getCrossShardMoveHelper(); + } } diff --git a/lib/private/DB/ConnectionFactory.php b/lib/private/DB/ConnectionFactory.php index af182243787..8f161b68ecb 100644 --- a/lib/private/DB/ConnectionFactory.php +++ b/lib/private/DB/ConnectionFactory.php @@ -11,7 +11,11 @@ use Doctrine\Common\EventManager; use Doctrine\DBAL\Configuration; use Doctrine\DBAL\DriverManager; use Doctrine\DBAL\Event\Listeners\OracleSessionInit; +use OC\DB\QueryBuilder\Sharded\AutoIncrementHandler; +use OC\DB\QueryBuilder\Sharded\ShardConnectionManager; use OC\SystemConfig; +use OCP\ICacheFactory; +use OCP\Server; /** * Takes care of creating and configuring Doctrine connections. @@ -54,9 +58,12 @@ class ConnectionFactory { ], ]; + private ShardConnectionManager $shardConnectionManager; + private ICacheFactory $cacheFactory; public function __construct( - private SystemConfig $config + private SystemConfig $config, + ?ICacheFactory $cacheFactory = null, ) { if ($this->config->getValue('mysql.utf8mb4', false)) { $this->defaultConnectionParams['mysql']['charset'] = 'utf8mb4'; @@ -65,6 +72,8 @@ class ConnectionFactory { if ($collationOverride) { $this->defaultConnectionParams['mysql']['collation'] = $collationOverride; } + $this->shardConnectionManager = new ShardConnectionManager($this->config, $this); + $this->cacheFactory = $cacheFactory ?? Server::get(ICacheFactory::class); } /** @@ -214,6 +223,14 @@ class ConnectionFactory { if ($this->config->getValue('dbpersistent', false)) { $connectionParams['persistent'] = true; } + + $connectionParams['sharding'] = $this->config->getValue('dbsharding', []); + $connectionParams['shard_connection_manager'] = $this->shardConnectionManager; + $connectionParams['auto_increment_handler'] = new AutoIncrementHandler( + $this->cacheFactory, + $this->shardConnectionManager, + ); + $connectionParams = array_merge($connectionParams, $additionalConnectionParams); $replica = $this->config->getValue($configPrefix . 'dbreplica', $this->config->getValue('dbreplica', [])) ?: [$connectionParams]; diff --git a/lib/private/DB/QueryBuilder/ExtendedQueryBuilder.php b/lib/private/DB/QueryBuilder/ExtendedQueryBuilder.php index bde6523567f..c40cadfbdb5 100644 --- a/lib/private/DB/QueryBuilder/ExtendedQueryBuilder.php +++ b/lib/private/DB/QueryBuilder/ExtendedQueryBuilder.php @@ -288,4 +288,22 @@ abstract class ExtendedQueryBuilder implements IQueryBuilder { public function executeStatement(?IDBConnection $connection = null): int { return $this->builder->executeStatement($connection); } + + public function hintShardKey(string $column, mixed $value, bool $overwrite = false) { + $this->builder->hintShardKey($column, $value, $overwrite); + return $this; + } + + public function runAcrossAllShards() { + $this->builder->runAcrossAllShards(); + return $this; + } + + public function getOutputColumns(): array { + return $this->builder->getOutputColumns(); + } + + public function prefixTableName(string $table): string { + return $this->builder->prefixTableName($table); + } } diff --git a/lib/private/DB/QueryBuilder/Partitioned/InvalidPartitionedQueryException.php b/lib/private/DB/QueryBuilder/Partitioned/InvalidPartitionedQueryException.php new file mode 100644 index 00000000000..3a5aa2f3e0e --- /dev/null +++ b/lib/private/DB/QueryBuilder/Partitioned/InvalidPartitionedQueryException.php @@ -0,0 +1,79 @@ +<?php + +declare(strict_types=1); +/** + * SPDX-FileCopyrightText: 2024 Nextcloud GmbH and Nextcloud contributors + * SPDX-License-Identifier: AGPL-3.0-only + */ + +namespace OC\DB\QueryBuilder\Partitioned; + +/** + * Partitioned queries impose limitations that queries have to follow: + * + * 1. Any reference to columns not in the "main table" (the table referenced by "FROM"), needs to explicitly include the + * table or alias the column belongs to. + * + * For example: + * ``` + * $query->select("mount_point", "mimetype") + * ->from("mounts", "m") + * ->innerJoin("m", "filecache", "f", $query->expr()->eq("root_id", "fileid")); + * ``` + * will not work, as the query builder doesn't know that the `mimetype` column belongs to the "filecache partition". + * Instead, you need to do + * ``` + * $query->select("mount_point", "f.mimetype") + * ->from("mounts", "m") + * ->innerJoin("m", "filecache", "f", $query->expr()->eq("m.root_id", "f.fileid")); + * ``` + * + * 2. The "ON" condition for the join can only perform a comparison between both sides of the join once. + * + * For example: + * ``` + * $query->select("mount_point", "mimetype") + * ->from("mounts", "m") + * ->innerJoin("m", "filecache", "f", $query->expr()->andX($query->expr()->eq("m.root_id", "f.fileid"), $query->expr()->eq("m.storage_id", "f.storage"))); + * ``` + * will not work. + * + * 3. An "OR" expression in the "WHERE" cannot mention both sides of the join, this does not apply to "AND" expressions. + * + * For example: + * ``` + * $query->select("mount_point", "mimetype") + * ->from("mounts", "m") + * ->innerJoin("m", "filecache", "f", $query->expr()->eq("m.root_id", "f.fileid"))) + * ->where($query->expr()->orX( + * $query->expr()-eq("m.user_id", $query->createNamedParameter("test"))), + * $query->expr()-eq("f.name", $query->createNamedParameter("test"))), + * )); + * ``` + * will not work, but. + * ``` + * $query->select("mount_point", "mimetype") + * ->from("mounts", "m") + * ->innerJoin("m", "filecache", "f", $query->expr()->eq("m.root_id", "f.fileid"))) + * ->where($query->expr()->andX( + * $query->expr()-eq("m.user_id", $query->createNamedParameter("test"))), + * $query->expr()-eq("f.name", $query->createNamedParameter("test"))), + * )); + * ``` + * will. + * + * 4. Queries that join cross-partition cannot use position parameters, only named parameters are allowed + * 5. The "ON" condition of a join cannot contain and "OR" expression. + * 6. Right-joins are not allowed. + * 7. Update, delete and insert statements aren't allowed to contain cross-partition joins. + * 8. Queries that "GROUP BY" a column from the joined partition are not allowed. + * 9. Any `join` call needs to be made before any `where` call. + * 10. Queries that join cross-partition with an "INNER JOIN" or "LEFT JOIN" with a condition on the left side + * cannot use "LIMIT" or "OFFSET" in queries. + * + * The part of the query running on the sharded table has some additional limitations, + * see the `InvalidShardedQueryException` documentation for more information. + */ +class InvalidPartitionedQueryException extends \Exception { + +} diff --git a/lib/private/DB/QueryBuilder/Partitioned/JoinCondition.php b/lib/private/DB/QueryBuilder/Partitioned/JoinCondition.php new file mode 100644 index 00000000000..a08858d1d6b --- /dev/null +++ b/lib/private/DB/QueryBuilder/Partitioned/JoinCondition.php @@ -0,0 +1,173 @@ +<?php + +declare(strict_types=1); +/** + * SPDX-FileCopyrightText: 2024 Robin Appelman <robin@icewind.nl> + * SPDX-License-Identifier: AGPL-3.0-or-later + */ + +namespace OC\DB\QueryBuilder\Partitioned; + +use OC\DB\QueryBuilder\CompositeExpression; +use OC\DB\QueryBuilder\QueryFunction; +use OCP\DB\QueryBuilder\IQueryFunction; + +/** + * Utility class for working with join conditions + */ +class JoinCondition { + public function __construct( + public string|IQueryFunction $fromColumn, + public ?string $fromAlias, + public string|IQueryFunction $toColumn, + public ?string $toAlias, + public array $fromConditions, + public array $toConditions, + ) { + if (is_string($this->fromColumn) && str_starts_with($this->fromColumn, '(')) { + $this->fromColumn = new QueryFunction($this->fromColumn); + } + if (is_string($this->toColumn) && str_starts_with($this->toColumn, '(')) { + $this->toColumn = new QueryFunction($this->toColumn); + } + } + + /** + * @param JoinCondition[] $conditions + * @return JoinCondition + */ + public static function merge(array $conditions): JoinCondition { + $fromColumn = ''; + $toColumn = ''; + $fromAlias = null; + $toAlias = null; + $fromConditions = []; + $toConditions = []; + foreach ($conditions as $condition) { + if (($condition->fromColumn && $fromColumn) || ($condition->toColumn && $toColumn)) { + throw new InvalidPartitionedQueryException("Can't join from {$condition->fromColumn} to {$condition->toColumn} as it already join froms {$fromColumn} to {$toColumn}"); + } + if ($condition->fromColumn) { + $fromColumn = $condition->fromColumn; + } + if ($condition->toColumn) { + $toColumn = $condition->toColumn; + } + if ($condition->fromAlias) { + $fromAlias = $condition->fromAlias; + } + if ($condition->toAlias) { + $toAlias = $condition->toAlias; + } + $fromConditions = array_merge($fromConditions, $condition->fromConditions); + $toConditions = array_merge($toConditions, $condition->toConditions); + } + return new JoinCondition($fromColumn, $fromAlias, $toColumn, $toAlias, $fromConditions, $toConditions); + } + + /** + * @param null|string|CompositeExpression $condition + * @param string $join + * @param string $alias + * @param string $fromAlias + * @return JoinCondition + * @throws InvalidPartitionedQueryException + */ + public static function parse($condition, string $join, string $alias, string $fromAlias): JoinCondition { + if ($condition === null) { + throw new InvalidPartitionedQueryException("Can't join on $join without a condition"); + } + + $result = self::parseSubCondition($condition, $join, $alias, $fromAlias); + if (!$result->fromColumn || !$result->toColumn) { + throw new InvalidPartitionedQueryException("No join condition found from $fromAlias to $alias"); + } + return $result; + } + + private static function parseSubCondition($condition, string $join, string $alias, string $fromAlias): JoinCondition { + if ($condition instanceof CompositeExpression) { + if ($condition->getType() === CompositeExpression::TYPE_OR) { + throw new InvalidPartitionedQueryException("Cannot join on $join with an OR expression"); + } + return self::merge(array_map(function ($subCondition) use ($join, $alias, $fromAlias) { + return self::parseSubCondition($subCondition, $join, $alias, $fromAlias); + }, $condition->getParts())); + } + + $condition = (string)$condition; + $isSubCondition = self::isExtraCondition($condition); + if ($isSubCondition) { + if (self::mentionsAlias($condition, $fromAlias)) { + return new JoinCondition('', null, '', null, [$condition], []); + } else { + return new JoinCondition('', null, '', null, [], [$condition]); + } + } + + $condition = str_replace('`', '', $condition); + + // expect a condition in the form of 'alias1.column1 = alias2.column2' + if (!str_contains($condition, ' = ')) { + throw new InvalidPartitionedQueryException("Can only join on $join with an `eq` condition"); + } + $parts = explode(' = ', $condition, 2); + $parts = array_map(function (string $part) { + return self::clearConditionPart($part); + }, $parts); + + if (!self::isSingleCondition($parts[0]) || !self::isSingleCondition($parts[1])) { + throw new InvalidPartitionedQueryException("Can only join on $join with a single condition"); + } + + + if (self::mentionsAlias($parts[0], $fromAlias)) { + return new JoinCondition($parts[0], self::getAliasForPart($parts[0]), $parts[1], self::getAliasForPart($parts[1]), [], []); + } elseif (self::mentionsAlias($parts[1], $fromAlias)) { + return new JoinCondition($parts[1], self::getAliasForPart($parts[1]), $parts[0], self::getAliasForPart($parts[0]), [], []); + } else { + throw new InvalidPartitionedQueryException("join condition for $join needs to explicitly refer to the table by alias"); + } + } + + private static function isSingleCondition(string $condition): bool { + return !(str_contains($condition, ' OR ') || str_contains($condition, ' AND ')); + } + + private static function getAliasForPart(string $part): ?string { + if (str_contains($part, ' ')) { + return uniqid('join_alias_'); + } else { + return null; + } + } + + private static function clearConditionPart(string $part): string { + if (str_starts_with($part, 'CAST(')) { + // pgsql/mysql cast + $part = substr($part, strlen('CAST(')); + [$part] = explode(' AS ', $part); + } elseif (str_starts_with($part, 'to_number(to_char(')) { + // oracle cast to int + $part = substr($part, strlen('to_number(to_char('), -2); + } elseif (str_starts_with($part, 'to_number(to_char(')) { + // oracle cast to string + $part = substr($part, strlen('to_char('), -1); + } + return $part; + } + + /** + * Check that a condition is an extra limit on the from/to part, and not the join condition + * + * This is done by checking that only one of the halves of the condition references a column + */ + private static function isExtraCondition(string $condition): bool { + $parts = explode(' ', $condition, 2); + return str_contains($parts[0], '`') xor str_contains($parts[1], '`'); + } + + private static function mentionsAlias(string $condition, string $alias): bool { + return str_contains($condition, "$alias."); + } +} diff --git a/lib/private/DB/QueryBuilder/Partitioned/PartitionQuery.php b/lib/private/DB/QueryBuilder/Partitioned/PartitionQuery.php new file mode 100644 index 00000000000..a5024b478d3 --- /dev/null +++ b/lib/private/DB/QueryBuilder/Partitioned/PartitionQuery.php @@ -0,0 +1,75 @@ +<?php + +declare(strict_types=1); +/** + * SPDX-FileCopyrightText: 2024 Nextcloud GmbH and Nextcloud contributors + * SPDX-License-Identifier: AGPL-3.0-only + */ + +namespace OC\DB\QueryBuilder\Partitioned; + +use OCP\DB\QueryBuilder\IQueryBuilder; + +/** + * A sub-query from a partitioned join + */ +class PartitionQuery { + public const JOIN_MODE_INNER = 'inner'; + public const JOIN_MODE_LEFT = 'left'; + // left-join where the left side IS NULL + public const JOIN_MODE_LEFT_NULL = 'left_null'; + + public const JOIN_MODE_RIGHT = 'right'; + + public function __construct( + public IQueryBuilder $query, + public string $joinFromColumn, + public string $joinToColumn, + public string $joinMode, + ) { + if ($joinMode !== self::JOIN_MODE_LEFT && $joinMode !== self::JOIN_MODE_INNER) { + throw new InvalidPartitionedQueryException("$joinMode joins aren't allowed in partitioned queries"); + } + } + + public function mergeWith(array $rows): array { + if (empty($rows)) { + return []; + } + // strip table/alias from column names + $joinFromColumn = preg_replace('/\w+\./', '', $this->joinFromColumn); + $joinToColumn = preg_replace('/\w+\./', '', $this->joinToColumn); + + $joinFromValues = array_map(function (array $row) use ($joinFromColumn) { + return $row[$joinFromColumn]; + }, $rows); + $joinFromValues = array_filter($joinFromValues, function ($value) { + return $value !== null; + }); + $this->query->andWhere($this->query->expr()->in($this->joinToColumn, $this->query->createNamedParameter($joinFromValues, IQueryBuilder::PARAM_STR_ARRAY, ':' . uniqid()))); + + $s = $this->query->getSQL(); + $partitionedRows = $this->query->executeQuery()->fetchAll(); + + $columns = $this->query->getOutputColumns(); + $nullResult = array_combine($columns, array_fill(0, count($columns), null)); + + $partitionedRowsByKey = []; + foreach ($partitionedRows as $partitionedRow) { + $partitionedRowsByKey[$partitionedRow[$joinToColumn]][] = $partitionedRow; + } + $result = []; + foreach ($rows as $row) { + if (isset($partitionedRowsByKey[$row[$joinFromColumn]])) { + if ($this->joinMode !== self::JOIN_MODE_LEFT_NULL) { + foreach ($partitionedRowsByKey[$row[$joinFromColumn]] as $partitionedRow) { + $result[] = array_merge($row, $partitionedRow); + } + } + } elseif ($this->joinMode === self::JOIN_MODE_LEFT || $this->joinMode === self::JOIN_MODE_LEFT_NULL) { + $result[] = array_merge($nullResult, $row); + } + } + return $result; + } +} diff --git a/lib/private/DB/QueryBuilder/Partitioned/PartitionSplit.php b/lib/private/DB/QueryBuilder/Partitioned/PartitionSplit.php new file mode 100644 index 00000000000..ad4c0fab055 --- /dev/null +++ b/lib/private/DB/QueryBuilder/Partitioned/PartitionSplit.php @@ -0,0 +1,74 @@ +<?php + +declare(strict_types=1); +/** + * SPDX-FileCopyrightText: 2024 Robin Appelman <robin@icewind.nl> + * SPDX-License-Identifier: AGPL-3.0-or-later + */ + +namespace OC\DB\QueryBuilder\Partitioned; + +/** + * Information about a database partition, containing the tables in the partition and any active alias + */ +class PartitionSplit { + /** @var array<string, string> */ + public array $aliases = []; + + /** + * @param string[] $tables + */ + public function __construct( + public string $name, + public array $tables, + ) { + } + + public function addAlias(string $table, string $alias): void { + if ($this->containsTable($table)) { + $this->aliases[$alias] = $table; + } + } + + public function addTable(string $table): void { + if (!$this->containsTable($table)) { + $this->tables[] = $table; + } + } + + public function containsTable(string $table): bool { + return in_array($table, $this->tables); + } + + public function containsAlias(string $alias): bool { + return array_key_exists($alias, $this->aliases); + } + + private function getTablesAndAliases(): array { + return array_keys($this->aliases) + $this->tables; + } + + /** + * Check if a query predicate mentions a table or alias from this partition + * + * @param string $predicate + * @return bool + */ + public function checkPredicateForTable(string $predicate): bool { + foreach ($this->getTablesAndAliases() as $name) { + if (str_contains($predicate, "`$name`.`")) { + return true; + } + } + return false; + } + + public function isColumnInPartition(string $column): bool { + foreach ($this->getTablesAndAliases() as $name) { + if (str_starts_with($column, "$name.")) { + return true; + } + } + return false; + } +} diff --git a/lib/private/DB/QueryBuilder/Partitioned/PartitionedQueryBuilder.php b/lib/private/DB/QueryBuilder/Partitioned/PartitionedQueryBuilder.php new file mode 100644 index 00000000000..175b7c1a42e --- /dev/null +++ b/lib/private/DB/QueryBuilder/Partitioned/PartitionedQueryBuilder.php @@ -0,0 +1,426 @@ +<?php + +declare(strict_types=1); +/** + * SPDX-FileCopyrightText: 2024 Nextcloud GmbH and Nextcloud contributors + * SPDX-License-Identifier: AGPL-3.0-only + */ + +namespace OC\DB\QueryBuilder\Partitioned; + +use OC\DB\QueryBuilder\CompositeExpression; +use OC\DB\QueryBuilder\QuoteHelper; +use OC\DB\QueryBuilder\Sharded\AutoIncrementHandler; +use OC\DB\QueryBuilder\Sharded\ShardConnectionManager; +use OC\DB\QueryBuilder\Sharded\ShardedQueryBuilder; +use OCP\DB\IResult; +use OCP\DB\QueryBuilder\IQueryBuilder; +use OCP\DB\QueryBuilder\IQueryFunction; +use OCP\IDBConnection; + +/** + * A special query builder that automatically splits queries that span across multiple database partitions[1]. + * + * This is done by inspecting the query as it's being built, and when a cross-partition join is detected, + * the part of the query that touches the partition is split of into a different sub-query. + * Then, when the query is executed, the results from the sub-queries are automatically merged. + * + * This whole process is intended to be transparent to any code using the query builder, however it does impose some extra + * limitation for queries that work cross-partition. See the documentation from `InvalidPartitionedQueryException` for more details. + * + * When a join is created in the query, this builder checks if it belongs to the same partition as the table from the + * original FROM/UPDATE/DELETE/INSERT and if not, creates a new "sub query" for the partition. + * Then for every part that is added the query, the part is analyzed to determine which partition the query part is referencing + * and the query part is added to the sub query for that partition. + * + * [1]: A set of tables which can't be queried together with the rest of the tables, such as when sharding is used. + */ +class PartitionedQueryBuilder extends ShardedQueryBuilder { + /** @var array<string, PartitionQuery> $splitQueries */ + private array $splitQueries = []; + /** @var list<PartitionSplit> */ + private array $partitions = []; + + /** @var array{'select': string|array, 'alias': ?string}[] */ + private array $selects = []; + private ?PartitionSplit $mainPartition = null; + private bool $hasPositionalParameter = false; + private QuoteHelper $quoteHelper; + private ?int $limit = null; + private ?int $offset = null; + + public function __construct( + IQueryBuilder $builder, + array $shardDefinitions, + ShardConnectionManager $shardConnectionManager, + AutoIncrementHandler $autoIncrementHandler, + ) { + parent::__construct($builder, $shardDefinitions, $shardConnectionManager, $autoIncrementHandler); + $this->quoteHelper = new QuoteHelper(); + } + + private function newQuery(): IQueryBuilder { + // get a fresh, non-partitioning query builder + $builder = $this->builder->getConnection()->getQueryBuilder(); + if ($builder instanceof PartitionedQueryBuilder) { + $builder = $builder->builder; + } + + return new ShardedQueryBuilder( + $builder, + $this->shardDefinitions, + $this->shardConnectionManager, + $this->autoIncrementHandler, + ); + } + + // we need to save selects until we know all the table aliases + public function select(...$selects) { + $this->selects = []; + $this->addSelect(...$selects); + return $this; + } + + public function addSelect(...$select) { + $select = array_map(function ($select) { + return ['select' => $select, 'alias' => null]; + }, $select); + $this->selects = array_merge($this->selects, $select); + return $this; + } + + public function selectAlias($select, $alias) { + $this->selects[] = ['select' => $select, 'alias' => $alias]; + return $this; + } + + /** + * Ensure that a column is being selected by the query + * + * This is mainly used to ensure that the returned rows from both sides of a partition contains the columns of the join predicate + * + * @param string $column + * @return void + */ + private function ensureSelect(string|IQueryFunction $column, ?string $alias = null): void { + $checkColumn = $alias ?: $column; + if (str_contains($checkColumn, '.')) { + [, $checkColumn] = explode('.', $checkColumn); + } + foreach ($this->selects as $select) { + if ($select['select'] === $checkColumn || $select['select'] === '*' || str_ends_with($select['select'], '.' . $checkColumn)) { + return; + } + } + if ($alias) { + $this->selectAlias($column, $alias); + } else { + $this->addSelect($column); + } + } + + /** + * Distribute the select statements to the correct partition + * + * This is done at the end instead of when the `select` call is made, because the `select` calls are generally done + * before we know what tables are involved in the query + * + * @return void + */ + private function applySelects(): void { + foreach ($this->selects as $select) { + foreach ($this->partitions as $partition) { + if (is_string($select['select']) && ( + $select['select'] === '*' || + $partition->isColumnInPartition($select['select'])) + ) { + if (isset($this->splitQueries[$partition->name])) { + if ($select['alias']) { + $this->splitQueries[$partition->name]->query->selectAlias($select['select'], $select['alias']); + } else { + $this->splitQueries[$partition->name]->query->addSelect($select['select']); + } + if ($select['select'] !== '*') { + continue 2; + } + } + } + } + + if ($select['alias']) { + parent::selectAlias($select['select'], $select['alias']); + } else { + parent::addSelect($select['select']); + } + } + $this->selects = []; + } + + + public function addPartition(PartitionSplit $partition): void { + $this->partitions[] = $partition; + } + + private function getPartition(string $table): ?PartitionSplit { + foreach ($this->partitions as $partition) { + if ($partition->containsTable($table) || $partition->containsAlias($table)) { + return $partition; + } + } + return null; + } + + public function from($from, $alias = null) { + if (is_string($from) && $partition = $this->getPartition($from)) { + $this->mainPartition = $partition; + if ($alias) { + $this->mainPartition->addAlias($from, $alias); + } + } + return parent::from($from, $alias); + } + + public function innerJoin($fromAlias, $join, $alias, $condition = null): self { + return $this->join($fromAlias, $join, $alias, $condition); + } + + public function leftJoin($fromAlias, $join, $alias, $condition = null): self { + return $this->join($fromAlias, $join, $alias, $condition, PartitionQuery::JOIN_MODE_LEFT); + } + + public function join($fromAlias, $join, $alias, $condition = null, $joinMode = PartitionQuery::JOIN_MODE_INNER): self { + $partition = $this->getPartition($join); + $fromPartition = $this->getPartition($fromAlias); + if ($partition && $partition !== $this->mainPartition) { + // join from the main db to a partition + + $joinCondition = JoinCondition::parse($condition, $join, $alias, $fromAlias); + $partition->addAlias($join, $alias); + + if (!isset($this->splitQueries[$partition->name])) { + $this->splitQueries[$partition->name] = new PartitionQuery( + $this->newQuery(), + $joinCondition->fromAlias ?? $joinCondition->fromColumn, $joinCondition->toAlias ?? $joinCondition->toColumn, + $joinMode + ); + $this->splitQueries[$partition->name]->query->from($join, $alias); + $this->ensureSelect($joinCondition->fromColumn, $joinCondition->fromAlias); + $this->ensureSelect($joinCondition->toColumn, $joinCondition->toAlias); + } else { + $query = $this->splitQueries[$partition->name]->query; + if ($partition->containsAlias($fromAlias)) { + $query->innerJoin($fromAlias, $join, $alias, $condition); + } else { + throw new InvalidPartitionedQueryException("Can't join across partition boundaries more than once"); + } + } + $this->splitQueries[$partition->name]->query->andWhere(...$joinCondition->toConditions); + parent::andWhere(...$joinCondition->fromConditions); + return $this; + } elseif ($fromPartition && $fromPartition !== $partition) { + // join from partition, to the main db + + $joinCondition = JoinCondition::parse($condition, $join, $alias, $fromAlias); + if (str_starts_with($fromPartition->name, 'from_')) { + $partitionName = $fromPartition->name; + } else { + $partitionName = 'from_' . $fromPartition->name; + } + + if (!isset($this->splitQueries[$partitionName])) { + $newPartition = new PartitionSplit($partitionName, [$join]); + $newPartition->addAlias($join, $alias); + $this->partitions[] = $newPartition; + + $this->splitQueries[$partitionName] = new PartitionQuery( + $this->newQuery(), + $joinCondition->fromAlias ?? $joinCondition->fromColumn, $joinCondition->toAlias ?? $joinCondition->toColumn, + $joinMode + ); + $this->ensureSelect($joinCondition->fromColumn, $joinCondition->fromAlias); + $this->ensureSelect($joinCondition->toColumn, $joinCondition->toAlias); + $this->splitQueries[$partitionName]->query->from($join, $alias); + $this->splitQueries[$partitionName]->query->andWhere(...$joinCondition->toConditions); + parent::andWhere(...$joinCondition->fromConditions); + } else { + $fromPartition->addTable($join); + $fromPartition->addAlias($join, $alias); + + $query = $this->splitQueries[$partitionName]->query; + $query->innerJoin($fromAlias, $join, $alias, $condition); + } + return $this; + } else { + // join within the main db or a partition + if ($joinMode === PartitionQuery::JOIN_MODE_INNER) { + return parent::innerJoin($fromAlias, $join, $alias, $condition); + } elseif ($joinMode === PartitionQuery::JOIN_MODE_LEFT) { + return parent::leftJoin($fromAlias, $join, $alias, $condition); + } elseif ($joinMode === PartitionQuery::JOIN_MODE_RIGHT) { + return parent::rightJoin($fromAlias, $join, $alias, $condition); + } else { + throw new \InvalidArgumentException("Invalid join mode: $joinMode"); + } + } + } + + /** + * Flatten a list of predicates by merging the parts of any "AND" expression into the list of predicates + * + * @param array $predicates + * @return array + */ + private function flattenPredicates(array $predicates): array { + $result = []; + foreach ($predicates as $predicate) { + if ($predicate instanceof CompositeExpression && $predicate->getType() === CompositeExpression::TYPE_AND) { + $result = array_merge($result, $this->flattenPredicates($predicate->getParts())); + } else { + $result[] = $predicate; + } + } + return $result; + } + + /** + * Split an array of predicates (WHERE query parts) by the partition they reference + * @param array $predicates + * @return array<string, array> + */ + private function splitPredicatesByParts(array $predicates): array { + $predicates = $this->flattenPredicates($predicates); + + $partitionPredicates = []; + foreach ($predicates as $predicate) { + $partition = $this->getPartitionForPredicate((string)$predicate); + if ($this->mainPartition === $partition) { + $partitionPredicates[''][] = $predicate; + } elseif ($partition) { + $partitionPredicates[$partition->name][] = $predicate; + } else { + $partitionPredicates[''][] = $predicate; + } + } + return $partitionPredicates; + } + + public function where(...$predicates) { + return $this->andWhere(...$predicates); + } + + public function andWhere(...$where) { + if ($where) { + foreach ($this->splitPredicatesByParts($where) as $alias => $predicates) { + if (isset($this->splitQueries[$alias])) { + // when there is a condition on a table being left-joined it starts to behave as if it's an inner join + // since any joined column that doesn't have the left part will not match the condition + // when there the condition is `$joinToColumn IS NULL` we instead mark the query as excluding the left half + if ($this->splitQueries[$alias]->joinMode === PartitionQuery::JOIN_MODE_LEFT) { + $this->splitQueries[$alias]->joinMode = PartitionQuery::JOIN_MODE_INNER; + + $column = $this->quoteHelper->quoteColumnName($this->splitQueries[$alias]->joinToColumn); + foreach ($predicates as $predicate) { + if ((string)$predicate === "$column IS NULL") { + $this->splitQueries[$alias]->joinMode = PartitionQuery::JOIN_MODE_LEFT_NULL; + } else { + $this->splitQueries[$alias]->query->andWhere($predicate); + } + } + } else { + $this->splitQueries[$alias]->query->andWhere(...$predicates); + } + } else { + parent::andWhere(...$predicates); + } + } + } + return $this; + } + + + private function getPartitionForPredicate(string $predicate): ?PartitionSplit { + foreach ($this->partitions as $partition) { + + if (str_contains($predicate, '?')) { + $this->hasPositionalParameter = true; + } + if ($partition->checkPredicateForTable($predicate)) { + return $partition; + } + } + return null; + } + + public function update($update = null, $alias = null) { + return parent::update($update, $alias); + } + + public function insert($insert = null) { + return parent::insert($insert); + } + + public function delete($delete = null, $alias = null) { + return parent::delete($delete, $alias); + } + + public function setMaxResults($maxResults) { + if ($maxResults > 0) { + $this->limit = (int)$maxResults; + } + return parent::setMaxResults($maxResults); + } + + public function setFirstResult($firstResult) { + if ($firstResult > 0) { + $this->offset = (int)$firstResult; + } + return parent::setFirstResult($firstResult); + } + + public function executeQuery(?IDBConnection $connection = null): IResult { + $this->applySelects(); + if ($this->splitQueries && $this->hasPositionalParameter) { + throw new InvalidPartitionedQueryException("Partitioned queries aren't allowed to to positional arguments"); + } + foreach ($this->splitQueries as $split) { + $split->query->setParameters($this->getParameters(), $this->getParameterTypes()); + } + if (count($this->splitQueries) > 0) { + $hasNonLeftJoins = array_reduce($this->splitQueries, function (bool $hasNonLeftJoins, PartitionQuery $query) { + return $hasNonLeftJoins || $query->joinMode !== PartitionQuery::JOIN_MODE_LEFT; + }, false); + if ($hasNonLeftJoins) { + if (is_int($this->limit)) { + throw new InvalidPartitionedQueryException('Limit is not allowed in partitioned queries'); + } + if (is_int($this->offset)) { + throw new InvalidPartitionedQueryException('Offset is not allowed in partitioned queries'); + } + } + } + + $s = $this->getSQL(); + $result = parent::executeQuery($connection); + if (count($this->splitQueries) > 0) { + return new PartitionedResult($this->splitQueries, $result); + } else { + return $result; + } + } + + public function executeStatement(?IDBConnection $connection = null): int { + if (count($this->splitQueries)) { + throw new InvalidPartitionedQueryException("Partitioning write queries isn't supported"); + } + return parent::executeStatement($connection); + } + + public function getSQL() { + $this->applySelects(); + return parent::getSQL(); + } + + public function getPartitionCount(): int { + return count($this->splitQueries) + 1; + } +} diff --git a/lib/private/DB/QueryBuilder/Partitioned/PartitionedResult.php b/lib/private/DB/QueryBuilder/Partitioned/PartitionedResult.php new file mode 100644 index 00000000000..aa9cc43b38b --- /dev/null +++ b/lib/private/DB/QueryBuilder/Partitioned/PartitionedResult.php @@ -0,0 +1,61 @@ +<?php + +declare(strict_types=1); +/** + * SPDX-FileCopyrightText: 2024 Nextcloud GmbH and Nextcloud contributors + * SPDX-License-Identifier: AGPL-3.0-only + */ + +namespace OC\DB\QueryBuilder\Partitioned; + +use OC\DB\ArrayResult; +use OCP\DB\IResult; +use PDO; + +/** + * Combine the results of multiple join parts into a single result + */ +class PartitionedResult extends ArrayResult { + private bool $fetched = false; + + /** + * @param PartitionQuery[] $splitOfParts + * @param IResult $result + */ + public function __construct( + private array $splitOfParts, + private IResult $result + ) { + parent::__construct([]); + } + + public function closeCursor(): bool { + return $this->result->closeCursor(); + } + + public function fetch(int $fetchMode = PDO::FETCH_ASSOC) { + $this->fetchRows(); + return parent::fetch($fetchMode); + } + + public function fetchAll(int $fetchMode = PDO::FETCH_ASSOC): array { + $this->fetchRows(); + return parent::fetchAll($fetchMode); + } + + public function rowCount(): int { + $this->fetchRows(); + return parent::rowCount(); + } + + private function fetchRows(): void { + if (!$this->fetched) { + $this->fetched = true; + $this->rows = $this->result->fetchAll(); + foreach ($this->splitOfParts as $part) { + $this->rows = $part->mergeWith($this->rows); + } + $this->count = count($this->rows); + } + } +} diff --git a/lib/private/DB/QueryBuilder/QueryBuilder.php b/lib/private/DB/QueryBuilder/QueryBuilder.php index 98280d610b1..5c7e273c9ec 100644 --- a/lib/private/DB/QueryBuilder/QueryBuilder.php +++ b/lib/private/DB/QueryBuilder/QueryBuilder.php @@ -49,6 +49,7 @@ class QueryBuilder implements IQueryBuilder { /** @var string */ protected $lastInsertedTable; + private array $selectedColumns = []; /** * Initializes a new QueryBuilder. @@ -470,6 +471,7 @@ class QueryBuilder implements IQueryBuilder { if (count($selects) === 1 && is_array($selects[0])) { $selects = $selects[0]; } + $this->addOutputColumns($selects); $this->queryBuilder->select( $this->helper->quoteColumnNames($selects) @@ -497,6 +499,7 @@ class QueryBuilder implements IQueryBuilder { $this->queryBuilder->addSelect( $this->helper->quoteColumnName($select) . ' AS ' . $this->helper->quoteColumnName($alias) ); + $this->addOutputColumns([$alias]); return $this; } @@ -518,6 +521,7 @@ class QueryBuilder implements IQueryBuilder { if (!is_array($select)) { $select = [$select]; } + $this->addOutputColumns($select); $quotedSelect = $this->helper->quoteColumnNames($select); @@ -547,6 +551,7 @@ class QueryBuilder implements IQueryBuilder { if (count($selects) === 1 && is_array($selects[0])) { $selects = $selects[0]; } + $this->addOutputColumns($selects); $this->queryBuilder->addSelect( $this->helper->quoteColumnNames($selects) @@ -555,6 +560,30 @@ class QueryBuilder implements IQueryBuilder { return $this; } + private function addOutputColumns(array $columns) { + foreach ($columns as $column) { + if (is_array($column)) { + $this->addOutputColumns($column); + } elseif (is_string($column) && !str_contains($column, '*')) { + if (str_contains($column, '.')) { + [, $column] = explode('.', $column); + } + $this->selectedColumns[] = $column; + } + } + } + + public function getOutputColumns(): array { + return array_unique(array_map(function (string $column) { + if (str_contains($column, '.')) { + [, $column] = explode('.', $column); + return $column; + } else { + return $column; + } + }, $this->selectedColumns)); + } + /** * Turns the query being built into a bulk delete query that ranges over * a certain table. @@ -1300,7 +1329,7 @@ class QueryBuilder implements IQueryBuilder { * @param string $table * @return string */ - protected function prefixTableName($table) { + public function prefixTableName(string $table): string { if ($this->automaticTablePrefix === false || str_starts_with($table, '*PREFIX*')) { return $table; } @@ -1340,4 +1369,14 @@ class QueryBuilder implements IQueryBuilder { public function escapeLikeParameter(string $parameter): string { return $this->connection->escapeLikeParameter($parameter); } + + public function hintShardKey(string $column, mixed $value, bool $overwrite = false) { + return $this; + } + + public function runAcrossAllShards() { + // noop + return $this; + } + } diff --git a/lib/private/DB/QueryBuilder/Sharded/AutoIncrementHandler.php b/lib/private/DB/QueryBuilder/Sharded/AutoIncrementHandler.php new file mode 100644 index 00000000000..d40934669d7 --- /dev/null +++ b/lib/private/DB/QueryBuilder/Sharded/AutoIncrementHandler.php @@ -0,0 +1,152 @@ +<?php + +declare(strict_types=1); +/** + * SPDX-FileCopyrightText: 2024 Robin Appelman <robin@icewind.nl> + * SPDX-License-Identifier: AGPL-3.0-or-later + */ + +namespace OC\DB\QueryBuilder\Sharded; + +use OCP\ICacheFactory; +use OCP\IMemcache; +use OCP\IMemcacheTTL; + +/** + * A helper to atomically determine the next auto increment value for a sharded table + * + * Since we can't use the database's auto-increment (since each db doesn't know about the keys in the other shards) + * we need external logic for doing the auto increment + */ +class AutoIncrementHandler { + public const MIN_VALID_KEY = 1000; + public const TTL = 365 * 24 * 60 * 60; + + private ?IMemcache $cache = null; + + public function __construct( + private ICacheFactory $cacheFactory, + private ShardConnectionManager $shardConnectionManager, + ) { + if (PHP_INT_SIZE < 8) { + throw new \Exception('sharding is only supported with 64bit php'); + } + } + + private function getCache(): IMemcache { + if(is_null($this->cache)) { + $cache = $this->cacheFactory->createDistributed('shared_autoincrement'); + if ($cache instanceof IMemcache) { + $this->cache = $cache; + } else { + throw new \Exception('Distributed cache ' . get_class($cache) . ' is not suitable'); + } + } + return $this->cache; + } + + /** + * Get the next value for the given shard definition + * + * The returned key is unique and incrementing, but not sequential. + * The shard id is encoded in the first byte of the returned value + * + * @param ShardDefinition $shardDefinition + * @return int + * @throws \Exception + */ + public function getNextPrimaryKey(ShardDefinition $shardDefinition, int $shard): int { + $retries = 0; + while ($retries < 5) { + $next = $this->getNextInner($shardDefinition); + if ($next !== null) { + if ($next > ShardDefinition::MAX_PRIMARY_KEY) { + throw new \Exception('Max primary key of ' . ShardDefinition::MAX_PRIMARY_KEY . ' exceeded'); + } + // we encode the shard the primary key was originally inserted into to allow guessing the shard by primary key later on + return ($next << 8) | $shard; + } else { + $retries++; + } + } + throw new \Exception('Failed to get next primary key'); + } + + /** + * auto increment logic without retry + * + * @param ShardDefinition $shardDefinition + * @return int|null either the next primary key or null if the call needs to be retried + */ + private function getNextInner(ShardDefinition $shardDefinition): ?int { + $cache = $this->getCache(); + // because this function will likely be called concurrently from different requests + // the implementation needs to ensure that the cached value can be cleared, invalidated or re-calculated at any point between our cache calls + // care must be taken that the logic remains fully resilient against race conditions + + // in the ideal case, the last primary key is stored in the cache and we can just do an `inc` + // if that is not the case we find the highest used id in the database increment it, and save it in the cache + + // prevent inc from returning `1` if the key doesn't exist by setting it to a non-numeric value + $cache->add($shardDefinition->table, 'empty-placeholder', self::TTL); + $next = $cache->inc($shardDefinition->table); + + if ($cache instanceof IMemcacheTTL) { + $cache->setTTL($shardDefinition->table, self::TTL); + } + + // the "add + inc" trick above isn't strictly atomic, so as a safety we reject any result that to small + // to handle the edge case of the stored value disappearing between the add and inc + if (is_int($next) && $next >= self::MIN_VALID_KEY) { + return $next; + } elseif (is_int($next)) { + // we hit the edge case, so invalidate the cached value + if (!$cache->cas($shardDefinition->table, $next, 'empty-placeholder')) { + // someone else is changing the value concurrently, give up and retry + return null; + } + } + + // discard the encoded initial shard + $current = $this->getMaxFromDb($shardDefinition) >> 8; + $next = max($current, self::MIN_VALID_KEY) + 1; + if ($cache->cas($shardDefinition->table, 'empty-placeholder', $next)) { + return $next; + } + + // another request set the cached value before us, so we should just be able to inc + $next = $cache->inc($shardDefinition->table); + if (is_int($next) && $next >= self::MIN_VALID_KEY) { + return $next; + } elseif(is_int($next)) { + // key got cleared, invalidate and retry + $cache->cas($shardDefinition->table, $next, 'empty-placeholder'); + return null; + } else { + // cleanup any non-numeric value other than the placeholder if that got stored somehow + $cache->ncad($shardDefinition->table, 'empty-placeholder'); + // retry + return null; + } + } + + /** + * Get the maximum primary key value from the shards + */ + private function getMaxFromDb(ShardDefinition $shardDefinition): int { + $max = 0; + foreach ($shardDefinition->getAllShards() as $shard) { + $connection = $this->shardConnectionManager->getConnection($shardDefinition, $shard); + $query = $connection->getQueryBuilder(); + $query->select($shardDefinition->primaryKey) + ->from($shardDefinition->table) + ->orderBy($shardDefinition->primaryKey, 'DESC') + ->setMaxResults(1); + $result = $query->executeQuery()->fetchOne(); + if ($result) { + $max = max($max, $result); + } + } + return $max; + } +} diff --git a/lib/private/DB/QueryBuilder/Sharded/CrossShardMoveHelper.php b/lib/private/DB/QueryBuilder/Sharded/CrossShardMoveHelper.php new file mode 100644 index 00000000000..45f24e32685 --- /dev/null +++ b/lib/private/DB/QueryBuilder/Sharded/CrossShardMoveHelper.php @@ -0,0 +1,162 @@ +<?php + +declare(strict_types=1); +/** + * SPDX-FileCopyrightText: 2024 Robin Appelman <robin@icewind.nl> + * SPDX-License-Identifier: AGPL-3.0-or-later + */ + +namespace OC\DB\QueryBuilder\Sharded; + +use OCP\DB\QueryBuilder\IQueryBuilder; +use OCP\IDBConnection; + +/** + * Utility methods for implementing logic that moves data across shards + */ +class CrossShardMoveHelper { + public function __construct( + private ShardConnectionManager $connectionManager + ) { + } + + public function getConnection(ShardDefinition $shardDefinition, int $shardKey): IDBConnection { + return $this->connectionManager->getConnection($shardDefinition, $shardDefinition->getShardForKey($shardKey)); + } + + /** + * Update the shard key of a set of rows, moving them to a different shard if needed + * + * @param ShardDefinition $shardDefinition + * @param string $table + * @param string $shardColumn + * @param int $sourceShardKey + * @param int $targetShardKey + * @param string $primaryColumn + * @param int[] $primaryKeys + * @return void + */ + public function moveCrossShards(ShardDefinition $shardDefinition, string $table, string $shardColumn, int $sourceShardKey, int $targetShardKey, string $primaryColumn, array $primaryKeys): void { + $sourceShard = $shardDefinition->getShardForKey($sourceShardKey); + $targetShard = $shardDefinition->getShardForKey($targetShardKey); + $sourceConnection = $this->connectionManager->getConnection($shardDefinition, $sourceShard); + if ($sourceShard === $targetShard) { + $this->updateItems($sourceConnection, $table, $shardColumn, $targetShardKey, $primaryColumn, $primaryKeys); + + return; + } + $targetConnection = $this->connectionManager->getConnection($shardDefinition, $targetShard); + + $sourceItems = $this->loadItems($sourceConnection, $table, $primaryColumn, $primaryKeys); + foreach ($sourceItems as &$sourceItem) { + $sourceItem[$shardColumn] = $targetShardKey; + } + if (!$sourceItems) { + return; + } + + $sourceConnection->beginTransaction(); + $targetConnection->beginTransaction(); + try { + $this->saveItems($targetConnection, $table, $sourceItems); + $this->deleteItems($sourceConnection, $table, $primaryColumn, $primaryKeys); + + $targetConnection->commit(); + $sourceConnection->commit(); + } catch (\Exception $e) { + $sourceConnection->rollback(); + $targetConnection->rollback(); + throw $e; + } + } + + /** + * Load rows from a table to move + * + * @param IDBConnection $connection + * @param string $table + * @param string $primaryColumn + * @param int[] $primaryKeys + * @return array[] + */ + public function loadItems(IDBConnection $connection, string $table, string $primaryColumn, array $primaryKeys): array { + $query = $connection->getQueryBuilder(); + $query->select('*') + ->from($table) + ->where($query->expr()->in($primaryColumn, $query->createParameter('keys'))); + + $chunks = array_chunk($primaryKeys, 1000); + + $results = []; + foreach ($chunks as $chunk) { + $query->setParameter('keys', $chunk, IQueryBuilder::PARAM_INT_ARRAY); + $results = array_merge($results, $query->execute()->fetchAll()); + } + + return $results; + } + + /** + * Save modified rows + * + * @param IDBConnection $connection + * @param string $table + * @param array[] $items + * @return void + */ + public function saveItems(IDBConnection $connection, string $table, array $items): void { + if (count($items) === 0) { + return; + } + $query = $connection->getQueryBuilder(); + $query->insert($table); + foreach ($items[0] as $column => $value) { + $query->setValue($column, $query->createParameter($column)); + } + + foreach ($items as $item) { + foreach ($item as $column => $value) { + if (is_int($column)) { + $query->setParameter($column, $value, IQueryBuilder::PARAM_INT); + } else { + $query->setParameter($column, $value); + } + } + $query->executeStatement(); + } + } + + /** + * @param IDBConnection $connection + * @param string $table + * @param string $primaryColumn + * @param int[] $primaryKeys + * @return void + */ + public function updateItems(IDBConnection $connection, string $table, string $shardColumn, int $targetShardKey, string $primaryColumn, array $primaryKeys): void { + $query = $connection->getQueryBuilder(); + $query->update($table) + ->set($shardColumn, $query->createNamedParameter($targetShardKey, IQueryBuilder::PARAM_INT)) + ->where($query->expr()->in($primaryColumn, $query->createNamedParameter($primaryKeys, IQueryBuilder::PARAM_INT_ARRAY))); + $query->executeQuery()->fetchAll(); + } + + /** + * @param IDBConnection $connection + * @param string $table + * @param string $primaryColumn + * @param int[] $primaryKeys + * @return void + */ + public function deleteItems(IDBConnection $connection, string $table, string $primaryColumn, array $primaryKeys): void { + $query = $connection->getQueryBuilder(); + $query->delete($table) + ->where($query->expr()->in($primaryColumn, $query->createParameter('keys'))); + $chunks = array_chunk($primaryKeys, 1000); + + foreach ($chunks as $chunk) { + $query->setParameter('keys', $chunk, IQueryBuilder::PARAM_INT_ARRAY); + $query->executeStatement(); + } + } +} diff --git a/lib/private/DB/QueryBuilder/Sharded/HashShardMapper.php b/lib/private/DB/QueryBuilder/Sharded/HashShardMapper.php new file mode 100644 index 00000000000..af778489a2d --- /dev/null +++ b/lib/private/DB/QueryBuilder/Sharded/HashShardMapper.php @@ -0,0 +1,21 @@ +<?php + +declare(strict_types=1); +/** + * SPDX-FileCopyrightText: 2024 Robin Appelman <robin@icewind.nl> + * SPDX-License-Identifier: AGPL-3.0-or-later + */ + +namespace OC\DB\QueryBuilder\Sharded; + +use OCP\DB\QueryBuilder\Sharded\IShardMapper; + +/** + * Map string key to an int-range by hashing the key + */ +class HashShardMapper implements IShardMapper { + public function getShardForKey(int $key, int $count): int { + $int = unpack('L', substr(md5((string)$key, true), 0, 4))[1]; + return $int % $count; + } +} diff --git a/lib/private/DB/QueryBuilder/Sharded/InvalidShardedQueryException.php b/lib/private/DB/QueryBuilder/Sharded/InvalidShardedQueryException.php new file mode 100644 index 00000000000..733a6acaf9d --- /dev/null +++ b/lib/private/DB/QueryBuilder/Sharded/InvalidShardedQueryException.php @@ -0,0 +1,29 @@ +<?php + +declare(strict_types=1); +/** + * SPDX-FileCopyrightText: 2024 Nextcloud GmbH and Nextcloud contributors + * SPDX-License-Identifier: AGPL-3.0-only + */ + +namespace OC\DB\QueryBuilder\Sharded; + +/** + * Queries on sharded table has the following limitations: + * + * 1. Either the shard key (e.g. "storage") or primary key (e.g. "fileid") must be mentioned in the query. + * Or the query must be explicitly marked as running across all shards. + * + * For queries where it isn't possible to set one of these keys in the query normally, you can set it using `hintShardKey` + * + * 2. Insert statements must always explicitly set the shard key + * 3. A query on a sharded table is not allowed to join on the same table + * 4. Right joins are not allowed on sharded tables + * 5. Updating the shard key where the new shard key maps to a different shard is not allowed + * + * Moving rows to a different shard needs to be implemented manually. `CrossShardMoveHelper` provides + * some tools to help make this easier. + */ +class InvalidShardedQueryException extends \Exception { + +} diff --git a/lib/private/DB/QueryBuilder/Sharded/RoundRobinShardMapper.php b/lib/private/DB/QueryBuilder/Sharded/RoundRobinShardMapper.php new file mode 100644 index 00000000000..a5694b06507 --- /dev/null +++ b/lib/private/DB/QueryBuilder/Sharded/RoundRobinShardMapper.php @@ -0,0 +1,20 @@ +<?php + +declare(strict_types=1); +/** + * SPDX-FileCopyrightText: 2024 Robin Appelman <robin@icewind.nl> + * SPDX-License-Identifier: AGPL-3.0-or-later + */ + +namespace OC\DB\QueryBuilder\Sharded; + +use OCP\DB\QueryBuilder\Sharded\IShardMapper; + +/** + * Map string key to an int-range by hashing the key + */ +class RoundRobinShardMapper implements IShardMapper { + public function getShardForKey(int $key, int $count): int { + return $key % $count; + } +} diff --git a/lib/private/DB/QueryBuilder/Sharded/ShardConnectionManager.php b/lib/private/DB/QueryBuilder/Sharded/ShardConnectionManager.php new file mode 100644 index 00000000000..87cac58bc57 --- /dev/null +++ b/lib/private/DB/QueryBuilder/Sharded/ShardConnectionManager.php @@ -0,0 +1,43 @@ +<?php + +declare(strict_types=1); +/** + * SPDX-FileCopyrightText: 2024 Robin Appelman <robin@icewind.nl> + * SPDX-License-Identifier: AGPL-3.0-or-later + */ + +namespace OC\DB\QueryBuilder\Sharded; + +use OC\DB\ConnectionAdapter; +use OC\DB\ConnectionFactory; +use OC\SystemConfig; +use OCP\IDBConnection; + +/** + * Keeps track of the db connections to the various shards + */ +class ShardConnectionManager { + /** @var array<string, IDBConnection> */ + private array $connections = []; + + public function __construct( + private SystemConfig $config, + private ConnectionFactory $factory, + ) { + } + + public function getConnection(ShardDefinition $shardDefinition, int $shard): IDBConnection { + $connectionKey = $shardDefinition->table . '_' . $shard; + if (!isset($this->connections[$connectionKey])) { + $this->connections[$connectionKey] = $this->createConnection($shardDefinition->shards[$shard]); + } + + return $this->connections[$connectionKey]; + } + + private function createConnection(array $shardConfig): IDBConnection { + $shardConfig['sharding'] = []; + $type = $this->config->getValue('dbtype', 'sqlite'); + return new ConnectionAdapter($this->factory->getConnection($type, $shardConfig)); + } +} diff --git a/lib/private/DB/QueryBuilder/Sharded/ShardDefinition.php b/lib/private/DB/QueryBuilder/Sharded/ShardDefinition.php new file mode 100644 index 00000000000..ebccbb639a6 --- /dev/null +++ b/lib/private/DB/QueryBuilder/Sharded/ShardDefinition.php @@ -0,0 +1,66 @@ +<?php + +declare(strict_types=1); +/** + * SPDX-FileCopyrightText: 2024 Robin Appelman <robin@icewind.nl> + * SPDX-License-Identifier: AGPL-3.0-or-later + */ + +namespace OC\DB\QueryBuilder\Sharded; + +use OCP\DB\QueryBuilder\Sharded\IShardMapper; + +/** + * Configuration for a shard setup + */ +class ShardDefinition { + // we reserve the bottom byte of the primary key for the initial shard, so the total shard count is limited to what we can fit there + public const MAX_SHARDS = 256; + + public const PRIMARY_KEY_MASK = 0x7F_FF_FF_FF_FF_FF_FF_00; + public const PRIMARY_KEY_SHARD_MASK = 0x00_00_00_00_00_00_00_FF; + // since we reserve 1 byte for the shard index, we only have 56 bits of primary key space + public const MAX_PRIMARY_KEY = PHP_INT_MAX >> 8; + + /** + * @param string $table + * @param string $primaryKey + * @param string $shardKey + * @param string[] $companionKeys + * @param IShardMapper $shardMapper + * @param string[] $companionTables + * @param array $shards + */ + public function __construct( + public string $table, + public string $primaryKey, + public array $companionKeys, + public string $shardKey, + public IShardMapper $shardMapper, + public array $companionTables = [], + public array $shards = [], + ) { + if (count($this->shards) >= self::MAX_SHARDS) { + throw new \Exception('Only allowed maximum of ' . self::MAX_SHARDS . ' shards allowed'); + } + } + + public function hasTable(string $table): bool { + if ($this->table === $table) { + return true; + } + return in_array($table, $this->companionTables); + } + + public function getShardForKey(int $key): int { + return $this->shardMapper->getShardForKey($key, count($this->shards)); + } + + public function getAllShards(): array { + return array_keys($this->shards); + } + + public function isKey(string $column): bool { + return $column === $this->primaryKey || in_array($column, $this->companionKeys); + } +} diff --git a/lib/private/DB/QueryBuilder/Sharded/ShardQueryRunner.php b/lib/private/DB/QueryBuilder/Sharded/ShardQueryRunner.php new file mode 100644 index 00000000000..51cd055e801 --- /dev/null +++ b/lib/private/DB/QueryBuilder/Sharded/ShardQueryRunner.php @@ -0,0 +1,197 @@ +<?php + +declare(strict_types=1); +/** + * SPDX-FileCopyrightText: 2024 Robin Appelman <robin@icewind.nl> + * SPDX-License-Identifier: AGPL-3.0-or-later + */ + +namespace OC\DB\QueryBuilder\Sharded; + +use OC\DB\ArrayResult; +use OCP\DB\IResult; +use OCP\DB\QueryBuilder\IQueryBuilder; + +/** + * Logic for running a query across a number of shards, combining the results + */ +class ShardQueryRunner { + public function __construct( + private ShardConnectionManager $shardConnectionManager, + private ShardDefinition $shardDefinition, + ) { + } + + /** + * Get the shards for a specific query or null if the shards aren't known in advance + * + * @param bool $allShards + * @param int[] $shardKeys + * @return null|int[] + */ + public function getShards(bool $allShards, array $shardKeys): ?array { + if ($allShards) { + return $this->shardDefinition->getAllShards(); + } + $allConfiguredShards = $this->shardDefinition->getAllShards(); + if (count($allConfiguredShards) === 1) { + return $allConfiguredShards; + } + if (empty($shardKeys)) { + return null; + } + $shards = array_map(function ($shardKey) { + return $this->shardDefinition->getShardForKey((int)$shardKey); + }, $shardKeys); + return array_values(array_unique($shards)); + } + + /** + * Try to get the shards that the keys are likely to be in, based on the shard the row was created + * + * @param int[] $primaryKeys + * @return int[] + */ + private function getLikelyShards(array $primaryKeys): array { + $shards = []; + foreach ($primaryKeys as $primaryKey) { + $encodedShard = $primaryKey & ShardDefinition::PRIMARY_KEY_SHARD_MASK; + if ($encodedShard < count($this->shardDefinition->shards) && !in_array($encodedShard, $shards)) { + $shards[] = $encodedShard; + } + } + return $shards; + } + + /** + * Execute a SELECT statement across the configured shards + * + * @param IQueryBuilder $query + * @param bool $allShards + * @param int[] $shardKeys + * @param int[] $primaryKeys + * @param array{column: string, order: string}[] $sortList + * @param int|null $limit + * @param int|null $offset + * @return IResult + */ + public function executeQuery( + IQueryBuilder $query, + bool $allShards, + array $shardKeys, + array $primaryKeys, + ?array $sortList = null, + ?int $limit = null, + ?int $offset = null, + ): IResult { + $shards = $this->getShards($allShards, $shardKeys); + $results = []; + if ($shards && count($shards) === 1) { + // trivial case + return $query->executeQuery($this->shardConnectionManager->getConnection($this->shardDefinition, $shards[0])); + } + // we have to emulate limit and offset, so we select offset+limit from all shards to ensure we have enough rows + // and then filter them down after we merged the results + if ($limit !== null && $offset !== null) { + $query->setMaxResults($limit + $offset); + } + + if ($shards) { + // we know exactly what shards we need to query + foreach ($shards as $shard) { + $shardConnection = $this->shardConnectionManager->getConnection($this->shardDefinition, $shard); + $subResult = $query->executeQuery($shardConnection); + $results = array_merge($results, $subResult->fetchAll()); + $subResult->closeCursor(); + } + } else { + // we don't know for sure what shards we need to query, + // we first try the shards that are "likely" to have the rows we want, based on the shard that the row was + // originally created in. If we then still haven't found all rows we try the rest of the shards + $likelyShards = $this->getLikelyShards($primaryKeys); + $unlikelyShards = array_diff($this->shardDefinition->getAllShards(), $likelyShards); + $shards = array_merge($likelyShards, $unlikelyShards); + + foreach ($shards as $shard) { + $shardConnection = $this->shardConnectionManager->getConnection($this->shardDefinition, $shard); + $subResult = $query->executeQuery($shardConnection); + $rows = $subResult->fetchAll(); + $results = array_merge($results, $rows); + $subResult->closeCursor(); + + if (count($rows) >= count($primaryKeys)) { + // we have all the rows we're looking for + break; + } + } + } + + if ($sortList) { + usort($results, function ($a, $b) use ($sortList) { + foreach ($sortList as $sort) { + $valueA = $a[$sort['column']] ?? null; + $valueB = $b[$sort['column']] ?? null; + $cmp = $valueA <=> $valueB; + if ($cmp === 0) { + continue; + } + if ($sort['order'] === 'DESC') { + $cmp = -$cmp; + } + return $cmp; + } + }); + } + + if ($limit !== null && $offset !== null) { + $results = array_slice($results, $offset, $limit); + } elseif ($limit !== null) { + $results = array_slice($results, 0, $limit); + } elseif ($offset !== null) { + $results = array_slice($results, $offset); + } + + return new ArrayResult($results); + } + + /** + * Execute an UPDATE or DELETE statement + * + * @param IQueryBuilder $query + * @param bool $allShards + * @param int[] $shardKeys + * @param int[] $primaryKeys + * @return int + * @throws \OCP\DB\Exception + */ + public function executeStatement(IQueryBuilder $query, bool $allShards, array $shardKeys, array $primaryKeys): int { + if ($query->getType() === \Doctrine\DBAL\Query\QueryBuilder::INSERT) { + throw new \Exception('insert queries need special handling'); + } + + $shards = $this->getShards($allShards, $shardKeys); + $maxCount = count($primaryKeys); + if ($shards && count($shards) === 1) { + return $query->executeStatement($this->shardConnectionManager->getConnection($this->shardDefinition, $shards[0])); + } elseif ($shards) { + $maxCount = PHP_INT_MAX; + } else { + // sort the likely shards before the rest, similar logic to `self::executeQuery` + $likelyShards = $this->getLikelyShards($primaryKeys); + $unlikelyShards = array_diff($this->shardDefinition->getAllShards(), $likelyShards); + $shards = array_merge($likelyShards, $unlikelyShards); + } + + $count = 0; + + foreach ($shards as $shard) { + $shardConnection = $this->shardConnectionManager->getConnection($this->shardDefinition, $shard); + $count += $query->executeStatement($shardConnection); + + if ($count >= $maxCount) { + break; + } + } + return $count; + } +} diff --git a/lib/private/DB/QueryBuilder/Sharded/ShardedQueryBuilder.php b/lib/private/DB/QueryBuilder/Sharded/ShardedQueryBuilder.php new file mode 100644 index 00000000000..e7bc70ce440 --- /dev/null +++ b/lib/private/DB/QueryBuilder/Sharded/ShardedQueryBuilder.php @@ -0,0 +1,407 @@ +<?php + +declare(strict_types=1); +/** + * SPDX-FileCopyrightText: 2024 Robin Appelman <robin@icewind.nl> + * SPDX-License-Identifier: AGPL-3.0-or-later + */ + +namespace OC\DB\QueryBuilder\Sharded; + +use OC\DB\QueryBuilder\CompositeExpression; +use OC\DB\QueryBuilder\ExtendedQueryBuilder; +use OC\DB\QueryBuilder\Parameter; +use OCP\DB\IResult; +use OCP\DB\QueryBuilder\IQueryBuilder; +use OCP\IDBConnection; + +/** + * A special query builder that automatically distributes queries over multiple database shards. + * + * This relies on `PartitionedQueryBuilder` to handle splitting of parts of the query that touch the sharded tables + * from the non-sharded tables. So the query build here should only either touch only sharded table or only non-sharded tables. + * + * Most of the logic in this class is concerned with extracting either the shard key (e.g. "storage") or primary key (e.g. "fileid") + * from the query. The logic for actually running the query across the shards is mostly delegated to `ShardQueryRunner`. + */ +class ShardedQueryBuilder extends ExtendedQueryBuilder { + private array $shardKeys = []; + private array $primaryKeys = []; + private ?ShardDefinition $shardDefinition = null; + /** @var bool Run the query across all shards */ + private bool $allShards = false; + private ?string $insertTable = null; + private mixed $lastInsertId = null; + private ?IDBConnection $lastInsertConnection = null; + private ?int $updateShardKey = null; + private ?int $limit = null; + private ?int $offset = null; + /** @var array{column: string, order: string}[] */ + private array $sortList = []; + private string $mainTable = ''; + + public function __construct( + IQueryBuilder $builder, + protected array $shardDefinitions, + protected ShardConnectionManager $shardConnectionManager, + protected AutoIncrementHandler $autoIncrementHandler, + ) { + parent::__construct($builder); + } + + public function getShardKeys(): array { + return $this->getKeyValues($this->shardKeys); + } + + public function getPrimaryKeys(): array { + return $this->getKeyValues($this->primaryKeys); + } + + private function getKeyValues(array $keys): array { + $values = []; + foreach ($keys as $key) { + $values = array_merge($values, $this->getKeyValue($key)); + } + return array_values(array_unique($values)); + } + + private function getKeyValue($value): array { + if ($value instanceof Parameter) { + $value = (string)$value; + } + if (is_string($value) && str_starts_with($value, ':')) { + $param = $this->getParameter(substr($value, 1)); + if (is_array($param)) { + return $param; + } else { + return [$param]; + } + } elseif ($value !== null) { + return [$value]; + } else { + return []; + } + } + + public function where(...$predicates) { + return $this->andWhere(...$predicates); + } + + public function andWhere(...$where) { + if ($where) { + foreach ($where as $predicate) { + $this->tryLoadShardKey($predicate); + } + parent::andWhere(...$where); + } + return $this; + } + + private function tryLoadShardKey($predicate): void { + if (!$this->shardDefinition) { + return; + } + if ($keys = $this->tryExtractShardKeys($predicate, $this->shardDefinition->shardKey)) { + $this->shardKeys += $keys; + } + if ($keys = $this->tryExtractShardKeys($predicate, $this->shardDefinition->primaryKey)) { + $this->primaryKeys += $keys; + } + foreach ($this->shardDefinition->companionKeys as $companionKey) { + if ($keys = $this->tryExtractShardKeys($predicate, $companionKey)) { + $this->primaryKeys += $keys; + } + } + } + + /** + * @param $predicate + * @param string $column + * @return string[] + */ + private function tryExtractShardKeys($predicate, string $column): array { + if ($predicate instanceof CompositeExpression) { + $values = []; + foreach ($predicate->getParts() as $part) { + $partValues = $this->tryExtractShardKeys($part, $column); + // for OR expressions, we can only rely on the predicate if all parts contain the comparison + if ($predicate->getType() === CompositeExpression::TYPE_OR && !$partValues) { + return []; + } + $values = array_merge($values, $partValues); + } + return $values; + } + $predicate = (string)$predicate; + // expect a condition in the form of 'alias1.column1 = placeholder' or 'alias1.column1 in placeholder' + if (substr_count($predicate, ' ') > 2) { + return []; + } + if (str_contains($predicate, ' = ')) { + $parts = explode(' = ', $predicate); + if ($parts[0] === "`{$column}`" || str_ends_with($parts[0], "`.`{$column}`")) { + return [$parts[1]]; + } else { + return []; + } + } + + if (str_contains($predicate, ' IN ')) { + $parts = explode(' IN ', $predicate); + if ($parts[0] === "`{$column}`" || str_ends_with($parts[0], "`.`{$column}`")) { + return [trim(trim($parts[1], '('), ')')]; + } else { + return []; + } + } + + return []; + } + + public function set($key, $value) { + if ($this->shardDefinition && $key === $this->shardDefinition->shardKey) { + $updateShardKey = $value; + } + return parent::set($key, $value); + } + + public function setValue($column, $value) { + if ($this->shardDefinition) { + if ($this->shardDefinition->isKey($column)) { + $this->primaryKeys[] = $value; + } + if ($column === $this->shardDefinition->shardKey) { + $this->shardKeys[] = $value; + } + } + return parent::setValue($column, $value); + } + + public function values(array $values) { + foreach ($values as $column => $value) { + $this->setValue($column, $value); + } + return $this; + } + + private function actOnTable(string $table): void { + $this->mainTable = $table; + foreach ($this->shardDefinitions as $shardDefinition) { + if ($shardDefinition->hasTable($table)) { + $this->shardDefinition = $shardDefinition; + } + } + } + + public function from($from, $alias = null) { + if (is_string($from) && $from) { + $this->actOnTable($from); + } + return parent::from($from, $alias); + } + + public function update($update = null, $alias = null) { + if (is_string($update) && $update) { + $this->actOnTable($update); + } + return parent::update($update, $alias); + } + + public function insert($insert = null) { + if (is_string($insert) && $insert) { + $this->insertTable = $insert; + $this->actOnTable($insert); + } + return parent::insert($insert); + } + + public function delete($delete = null, $alias = null) { + if (is_string($delete) && $delete) { + $this->actOnTable($delete); + } + return parent::delete($delete, $alias); + } + + private function checkJoin(string $table): void { + if ($this->shardDefinition) { + if ($table === $this->mainTable) { + throw new InvalidShardedQueryException("Sharded query on {$this->mainTable} isn't allowed to join on itself"); + } + if (!$this->shardDefinition->hasTable($table)) { + // this generally shouldn't happen as the partitioning logic should prevent this + // but the check is here just in case + throw new InvalidShardedQueryException("Sharded query on {$this->shardDefinition->table} isn't allowed to join on $table"); + } + } + } + + public function innerJoin($fromAlias, $join, $alias, $condition = null) { + $this->checkJoin($join); + return parent::innerJoin($fromAlias, $join, $alias, $condition); + } + + public function leftJoin($fromAlias, $join, $alias, $condition = null) { + $this->checkJoin($join); + return parent::leftJoin($fromAlias, $join, $alias, $condition); + } + + public function rightJoin($fromAlias, $join, $alias, $condition = null) { + if ($this->shardDefinition) { + throw new InvalidShardedQueryException("Sharded query on {$this->shardDefinition->table} isn't allowed to right join"); + } + return parent::rightJoin($fromAlias, $join, $alias, $condition); + } + + public function join($fromAlias, $join, $alias, $condition = null) { + return $this->innerJoin($fromAlias, $join, $alias, $condition); + } + + public function setMaxResults($maxResults) { + if ($maxResults > 0) { + $this->limit = (int)$maxResults; + } + return parent::setMaxResults($maxResults); + } + + public function setFirstResult($firstResult) { + if ($firstResult > 0) { + $this->offset = (int)$firstResult; + } + if ($this->shardDefinition && count($this->shardDefinition->shards) > 1) { + // we have to emulate offset + return $this; + } else { + return parent::setFirstResult($firstResult); + } + } + + public function addOrderBy($sort, $order = null) { + $this->registerOrder((string)$sort, (string)$order ?? 'ASC'); + return parent::orderBy($sort, $order); + } + + public function orderBy($sort, $order = null) { + $this->sortList = []; + $this->registerOrder((string)$sort, (string)$order ?? 'ASC'); + return parent::orderBy($sort, $order); + } + + private function registerOrder(string $column, string $order): void { + // handle `mime + 0` and similar by just sorting on the first part of the expression + [$column] = explode(' ', $column); + $column = trim($column, '`'); + $this->sortList[] = [ + 'column' => $column, + 'order' => strtoupper($order), + ]; + } + + public function hintShardKey(string $column, mixed $value, bool $overwrite = false) { + if ($overwrite) { + $this->primaryKeys = []; + $this->shardKeys = []; + } + if ($this->shardDefinition?->isKey($column)) { + $this->primaryKeys[] = $value; + } + if ($column === $this->shardDefinition?->shardKey) { + $this->shardKeys[] = $value; + } + return $this; + } + + public function runAcrossAllShards() { + $this->allShards = true; + return $this; + } + + /** + * @throws InvalidShardedQueryException + */ + public function validate(): void { + if ($this->shardDefinition && $this->insertTable) { + if ($this->allShards) { + throw new InvalidShardedQueryException("Can't insert across all shards"); + } + if (empty($this->getShardKeys())) { + throw new InvalidShardedQueryException("Can't insert without shard key"); + } + } + if ($this->shardDefinition && !$this->allShards) { + if (empty($this->getShardKeys()) && empty($this->getPrimaryKeys())) { + throw new InvalidShardedQueryException('No shard key or primary key set for query'); + } + } + if ($this->shardDefinition && $this->updateShardKey) { + $newShardKey = $this->getKeyValue($this->updateShardKey); + $oldShardKeys = $this->getShardKeys(); + if (count($newShardKey) !== 1) { + throw new InvalidShardedQueryException("Can't set shard key to an array"); + } + $newShardKey = current($newShardKey); + if (empty($oldShardKeys)) { + throw new InvalidShardedQueryException("Can't update without shard key"); + } + $oldShards = array_values(array_unique(array_map(function ($shardKey) { + return $this->shardDefinition->getShardForKey((int)$shardKey); + }, $oldShardKeys))); + $newShard = $this->shardDefinition->getShardForKey((int)$newShardKey); + if ($oldShards === [$newShard]) { + throw new InvalidShardedQueryException('Update statement would move rows to a different shard'); + } + } + } + + public function executeQuery(?IDBConnection $connection = null): IResult { + $this->validate(); + if ($this->shardDefinition) { + $runner = new ShardQueryRunner($this->shardConnectionManager, $this->shardDefinition); + return $runner->executeQuery($this->builder, $this->allShards, $this->getShardKeys(), $this->getPrimaryKeys(), $this->sortList, $this->limit, $this->offset); + } + return parent::executeQuery($connection); + } + + public function executeStatement(?IDBConnection $connection = null): int { + $this->validate(); + if ($this->shardDefinition) { + $runner = new ShardQueryRunner($this->shardConnectionManager, $this->shardDefinition); + if ($this->insertTable) { + $shards = $runner->getShards($this->allShards, $this->getShardKeys()); + if (!$shards) { + throw new InvalidShardedQueryException("Can't insert without shard key"); + } + $count = 0; + foreach ($shards as $shard) { + $shardConnection = $this->shardConnectionManager->getConnection($this->shardDefinition, $shard); + if (!$this->primaryKeys && $this->shardDefinition->table === $this->insertTable) { + $id = $this->autoIncrementHandler->getNextPrimaryKey($this->shardDefinition, $shard); + parent::setValue($this->shardDefinition->primaryKey, $this->createParameter('__generated_primary_key')); + $this->setParameter('__generated_primary_key', $id, self::PARAM_INT); + $this->lastInsertId = $id; + } + $count += parent::executeStatement($shardConnection); + + $this->lastInsertConnection = $shardConnection; + } + return $count; + } else { + return $runner->executeStatement($this->builder, $this->allShards, $this->getShardKeys(), $this->getPrimaryKeys()); + } + } + return parent::executeStatement($connection); + } + + public function getLastInsertId(): int { + if ($this->lastInsertId) { + return $this->lastInsertId; + } + if ($this->lastInsertConnection) { + $table = $this->builder->prefixTableName($this->insertTable); + return $this->lastInsertConnection->lastInsertId($table); + } else { + return parent::getLastInsertId(); + } + } + + +} diff --git a/lib/private/DB/SchemaWrapper.php b/lib/private/DB/SchemaWrapper.php index 5720e10fbdb..473c0009237 100644 --- a/lib/private/DB/SchemaWrapper.php +++ b/lib/private/DB/SchemaWrapper.php @@ -36,6 +36,9 @@ class SchemaWrapper implements ISchemaWrapper { public function performDropTableCalls() { foreach ($this->tablesToDelete as $tableName => $true) { $this->connection->dropTable($tableName); + foreach ($this->connection->getShardConnections() as $shardConnection) { + $shardConnection->dropTable($tableName); + } unset($this->tablesToDelete[$tableName]); } } |