diff options
Diffstat (limited to 'lib/private/DB/QueryBuilder/Partitioned')
6 files changed, 924 insertions, 0 deletions
diff --git a/lib/private/DB/QueryBuilder/Partitioned/InvalidPartitionedQueryException.php b/lib/private/DB/QueryBuilder/Partitioned/InvalidPartitionedQueryException.php new file mode 100644 index 00000000000..3a5aa2f3e0e --- /dev/null +++ b/lib/private/DB/QueryBuilder/Partitioned/InvalidPartitionedQueryException.php @@ -0,0 +1,79 @@ +<?php + +declare(strict_types=1); +/** + * SPDX-FileCopyrightText: 2024 Nextcloud GmbH and Nextcloud contributors + * SPDX-License-Identifier: AGPL-3.0-only + */ + +namespace OC\DB\QueryBuilder\Partitioned; + +/** + * Partitioned queries impose limitations that queries have to follow: + * + * 1. Any reference to columns not in the "main table" (the table referenced by "FROM"), needs to explicitly include the + * table or alias the column belongs to. + * + * For example: + * ``` + * $query->select("mount_point", "mimetype") + * ->from("mounts", "m") + * ->innerJoin("m", "filecache", "f", $query->expr()->eq("root_id", "fileid")); + * ``` + * will not work, as the query builder doesn't know that the `mimetype` column belongs to the "filecache partition". + * Instead, you need to do + * ``` + * $query->select("mount_point", "f.mimetype") + * ->from("mounts", "m") + * ->innerJoin("m", "filecache", "f", $query->expr()->eq("m.root_id", "f.fileid")); + * ``` + * + * 2. The "ON" condition for the join can only perform a comparison between both sides of the join once. + * + * For example: + * ``` + * $query->select("mount_point", "mimetype") + * ->from("mounts", "m") + * ->innerJoin("m", "filecache", "f", $query->expr()->andX($query->expr()->eq("m.root_id", "f.fileid"), $query->expr()->eq("m.storage_id", "f.storage"))); + * ``` + * will not work. + * + * 3. An "OR" expression in the "WHERE" cannot mention both sides of the join, this does not apply to "AND" expressions. + * + * For example: + * ``` + * $query->select("mount_point", "mimetype") + * ->from("mounts", "m") + * ->innerJoin("m", "filecache", "f", $query->expr()->eq("m.root_id", "f.fileid"))) + * ->where($query->expr()->orX( + * $query->expr()-eq("m.user_id", $query->createNamedParameter("test"))), + * $query->expr()-eq("f.name", $query->createNamedParameter("test"))), + * )); + * ``` + * will not work, but. + * ``` + * $query->select("mount_point", "mimetype") + * ->from("mounts", "m") + * ->innerJoin("m", "filecache", "f", $query->expr()->eq("m.root_id", "f.fileid"))) + * ->where($query->expr()->andX( + * $query->expr()-eq("m.user_id", $query->createNamedParameter("test"))), + * $query->expr()-eq("f.name", $query->createNamedParameter("test"))), + * )); + * ``` + * will. + * + * 4. Queries that join cross-partition cannot use position parameters, only named parameters are allowed + * 5. The "ON" condition of a join cannot contain and "OR" expression. + * 6. Right-joins are not allowed. + * 7. Update, delete and insert statements aren't allowed to contain cross-partition joins. + * 8. Queries that "GROUP BY" a column from the joined partition are not allowed. + * 9. Any `join` call needs to be made before any `where` call. + * 10. Queries that join cross-partition with an "INNER JOIN" or "LEFT JOIN" with a condition on the left side + * cannot use "LIMIT" or "OFFSET" in queries. + * + * The part of the query running on the sharded table has some additional limitations, + * see the `InvalidShardedQueryException` documentation for more information. + */ +class InvalidPartitionedQueryException extends \Exception { + +} diff --git a/lib/private/DB/QueryBuilder/Partitioned/JoinCondition.php b/lib/private/DB/QueryBuilder/Partitioned/JoinCondition.php new file mode 100644 index 00000000000..a08858d1d6b --- /dev/null +++ b/lib/private/DB/QueryBuilder/Partitioned/JoinCondition.php @@ -0,0 +1,173 @@ +<?php + +declare(strict_types=1); +/** + * SPDX-FileCopyrightText: 2024 Robin Appelman <robin@icewind.nl> + * SPDX-License-Identifier: AGPL-3.0-or-later + */ + +namespace OC\DB\QueryBuilder\Partitioned; + +use OC\DB\QueryBuilder\CompositeExpression; +use OC\DB\QueryBuilder\QueryFunction; +use OCP\DB\QueryBuilder\IQueryFunction; + +/** + * Utility class for working with join conditions + */ +class JoinCondition { + public function __construct( + public string|IQueryFunction $fromColumn, + public ?string $fromAlias, + public string|IQueryFunction $toColumn, + public ?string $toAlias, + public array $fromConditions, + public array $toConditions, + ) { + if (is_string($this->fromColumn) && str_starts_with($this->fromColumn, '(')) { + $this->fromColumn = new QueryFunction($this->fromColumn); + } + if (is_string($this->toColumn) && str_starts_with($this->toColumn, '(')) { + $this->toColumn = new QueryFunction($this->toColumn); + } + } + + /** + * @param JoinCondition[] $conditions + * @return JoinCondition + */ + public static function merge(array $conditions): JoinCondition { + $fromColumn = ''; + $toColumn = ''; + $fromAlias = null; + $toAlias = null; + $fromConditions = []; + $toConditions = []; + foreach ($conditions as $condition) { + if (($condition->fromColumn && $fromColumn) || ($condition->toColumn && $toColumn)) { + throw new InvalidPartitionedQueryException("Can't join from {$condition->fromColumn} to {$condition->toColumn} as it already join froms {$fromColumn} to {$toColumn}"); + } + if ($condition->fromColumn) { + $fromColumn = $condition->fromColumn; + } + if ($condition->toColumn) { + $toColumn = $condition->toColumn; + } + if ($condition->fromAlias) { + $fromAlias = $condition->fromAlias; + } + if ($condition->toAlias) { + $toAlias = $condition->toAlias; + } + $fromConditions = array_merge($fromConditions, $condition->fromConditions); + $toConditions = array_merge($toConditions, $condition->toConditions); + } + return new JoinCondition($fromColumn, $fromAlias, $toColumn, $toAlias, $fromConditions, $toConditions); + } + + /** + * @param null|string|CompositeExpression $condition + * @param string $join + * @param string $alias + * @param string $fromAlias + * @return JoinCondition + * @throws InvalidPartitionedQueryException + */ + public static function parse($condition, string $join, string $alias, string $fromAlias): JoinCondition { + if ($condition === null) { + throw new InvalidPartitionedQueryException("Can't join on $join without a condition"); + } + + $result = self::parseSubCondition($condition, $join, $alias, $fromAlias); + if (!$result->fromColumn || !$result->toColumn) { + throw new InvalidPartitionedQueryException("No join condition found from $fromAlias to $alias"); + } + return $result; + } + + private static function parseSubCondition($condition, string $join, string $alias, string $fromAlias): JoinCondition { + if ($condition instanceof CompositeExpression) { + if ($condition->getType() === CompositeExpression::TYPE_OR) { + throw new InvalidPartitionedQueryException("Cannot join on $join with an OR expression"); + } + return self::merge(array_map(function ($subCondition) use ($join, $alias, $fromAlias) { + return self::parseSubCondition($subCondition, $join, $alias, $fromAlias); + }, $condition->getParts())); + } + + $condition = (string)$condition; + $isSubCondition = self::isExtraCondition($condition); + if ($isSubCondition) { + if (self::mentionsAlias($condition, $fromAlias)) { + return new JoinCondition('', null, '', null, [$condition], []); + } else { + return new JoinCondition('', null, '', null, [], [$condition]); + } + } + + $condition = str_replace('`', '', $condition); + + // expect a condition in the form of 'alias1.column1 = alias2.column2' + if (!str_contains($condition, ' = ')) { + throw new InvalidPartitionedQueryException("Can only join on $join with an `eq` condition"); + } + $parts = explode(' = ', $condition, 2); + $parts = array_map(function (string $part) { + return self::clearConditionPart($part); + }, $parts); + + if (!self::isSingleCondition($parts[0]) || !self::isSingleCondition($parts[1])) { + throw new InvalidPartitionedQueryException("Can only join on $join with a single condition"); + } + + + if (self::mentionsAlias($parts[0], $fromAlias)) { + return new JoinCondition($parts[0], self::getAliasForPart($parts[0]), $parts[1], self::getAliasForPart($parts[1]), [], []); + } elseif (self::mentionsAlias($parts[1], $fromAlias)) { + return new JoinCondition($parts[1], self::getAliasForPart($parts[1]), $parts[0], self::getAliasForPart($parts[0]), [], []); + } else { + throw new InvalidPartitionedQueryException("join condition for $join needs to explicitly refer to the table by alias"); + } + } + + private static function isSingleCondition(string $condition): bool { + return !(str_contains($condition, ' OR ') || str_contains($condition, ' AND ')); + } + + private static function getAliasForPart(string $part): ?string { + if (str_contains($part, ' ')) { + return uniqid('join_alias_'); + } else { + return null; + } + } + + private static function clearConditionPart(string $part): string { + if (str_starts_with($part, 'CAST(')) { + // pgsql/mysql cast + $part = substr($part, strlen('CAST(')); + [$part] = explode(' AS ', $part); + } elseif (str_starts_with($part, 'to_number(to_char(')) { + // oracle cast to int + $part = substr($part, strlen('to_number(to_char('), -2); + } elseif (str_starts_with($part, 'to_number(to_char(')) { + // oracle cast to string + $part = substr($part, strlen('to_char('), -1); + } + return $part; + } + + /** + * Check that a condition is an extra limit on the from/to part, and not the join condition + * + * This is done by checking that only one of the halves of the condition references a column + */ + private static function isExtraCondition(string $condition): bool { + $parts = explode(' ', $condition, 2); + return str_contains($parts[0], '`') xor str_contains($parts[1], '`'); + } + + private static function mentionsAlias(string $condition, string $alias): bool { + return str_contains($condition, "$alias."); + } +} diff --git a/lib/private/DB/QueryBuilder/Partitioned/PartitionQuery.php b/lib/private/DB/QueryBuilder/Partitioned/PartitionQuery.php new file mode 100644 index 00000000000..a5024b478d3 --- /dev/null +++ b/lib/private/DB/QueryBuilder/Partitioned/PartitionQuery.php @@ -0,0 +1,75 @@ +<?php + +declare(strict_types=1); +/** + * SPDX-FileCopyrightText: 2024 Nextcloud GmbH and Nextcloud contributors + * SPDX-License-Identifier: AGPL-3.0-only + */ + +namespace OC\DB\QueryBuilder\Partitioned; + +use OCP\DB\QueryBuilder\IQueryBuilder; + +/** + * A sub-query from a partitioned join + */ +class PartitionQuery { + public const JOIN_MODE_INNER = 'inner'; + public const JOIN_MODE_LEFT = 'left'; + // left-join where the left side IS NULL + public const JOIN_MODE_LEFT_NULL = 'left_null'; + + public const JOIN_MODE_RIGHT = 'right'; + + public function __construct( + public IQueryBuilder $query, + public string $joinFromColumn, + public string $joinToColumn, + public string $joinMode, + ) { + if ($joinMode !== self::JOIN_MODE_LEFT && $joinMode !== self::JOIN_MODE_INNER) { + throw new InvalidPartitionedQueryException("$joinMode joins aren't allowed in partitioned queries"); + } + } + + public function mergeWith(array $rows): array { + if (empty($rows)) { + return []; + } + // strip table/alias from column names + $joinFromColumn = preg_replace('/\w+\./', '', $this->joinFromColumn); + $joinToColumn = preg_replace('/\w+\./', '', $this->joinToColumn); + + $joinFromValues = array_map(function (array $row) use ($joinFromColumn) { + return $row[$joinFromColumn]; + }, $rows); + $joinFromValues = array_filter($joinFromValues, function ($value) { + return $value !== null; + }); + $this->query->andWhere($this->query->expr()->in($this->joinToColumn, $this->query->createNamedParameter($joinFromValues, IQueryBuilder::PARAM_STR_ARRAY, ':' . uniqid()))); + + $s = $this->query->getSQL(); + $partitionedRows = $this->query->executeQuery()->fetchAll(); + + $columns = $this->query->getOutputColumns(); + $nullResult = array_combine($columns, array_fill(0, count($columns), null)); + + $partitionedRowsByKey = []; + foreach ($partitionedRows as $partitionedRow) { + $partitionedRowsByKey[$partitionedRow[$joinToColumn]][] = $partitionedRow; + } + $result = []; + foreach ($rows as $row) { + if (isset($partitionedRowsByKey[$row[$joinFromColumn]])) { + if ($this->joinMode !== self::JOIN_MODE_LEFT_NULL) { + foreach ($partitionedRowsByKey[$row[$joinFromColumn]] as $partitionedRow) { + $result[] = array_merge($row, $partitionedRow); + } + } + } elseif ($this->joinMode === self::JOIN_MODE_LEFT || $this->joinMode === self::JOIN_MODE_LEFT_NULL) { + $result[] = array_merge($nullResult, $row); + } + } + return $result; + } +} diff --git a/lib/private/DB/QueryBuilder/Partitioned/PartitionSplit.php b/lib/private/DB/QueryBuilder/Partitioned/PartitionSplit.php new file mode 100644 index 00000000000..ad4c0fab055 --- /dev/null +++ b/lib/private/DB/QueryBuilder/Partitioned/PartitionSplit.php @@ -0,0 +1,74 @@ +<?php + +declare(strict_types=1); +/** + * SPDX-FileCopyrightText: 2024 Robin Appelman <robin@icewind.nl> + * SPDX-License-Identifier: AGPL-3.0-or-later + */ + +namespace OC\DB\QueryBuilder\Partitioned; + +/** + * Information about a database partition, containing the tables in the partition and any active alias + */ +class PartitionSplit { + /** @var array<string, string> */ + public array $aliases = []; + + /** + * @param string[] $tables + */ + public function __construct( + public string $name, + public array $tables, + ) { + } + + public function addAlias(string $table, string $alias): void { + if ($this->containsTable($table)) { + $this->aliases[$alias] = $table; + } + } + + public function addTable(string $table): void { + if (!$this->containsTable($table)) { + $this->tables[] = $table; + } + } + + public function containsTable(string $table): bool { + return in_array($table, $this->tables); + } + + public function containsAlias(string $alias): bool { + return array_key_exists($alias, $this->aliases); + } + + private function getTablesAndAliases(): array { + return array_keys($this->aliases) + $this->tables; + } + + /** + * Check if a query predicate mentions a table or alias from this partition + * + * @param string $predicate + * @return bool + */ + public function checkPredicateForTable(string $predicate): bool { + foreach ($this->getTablesAndAliases() as $name) { + if (str_contains($predicate, "`$name`.`")) { + return true; + } + } + return false; + } + + public function isColumnInPartition(string $column): bool { + foreach ($this->getTablesAndAliases() as $name) { + if (str_starts_with($column, "$name.")) { + return true; + } + } + return false; + } +} diff --git a/lib/private/DB/QueryBuilder/Partitioned/PartitionedQueryBuilder.php b/lib/private/DB/QueryBuilder/Partitioned/PartitionedQueryBuilder.php new file mode 100644 index 00000000000..d748c791321 --- /dev/null +++ b/lib/private/DB/QueryBuilder/Partitioned/PartitionedQueryBuilder.php @@ -0,0 +1,462 @@ +<?php + +declare(strict_types=1); +/** + * SPDX-FileCopyrightText: 2024 Nextcloud GmbH and Nextcloud contributors + * SPDX-License-Identifier: AGPL-3.0-only + */ + +namespace OC\DB\QueryBuilder\Partitioned; + +use OC\DB\QueryBuilder\CompositeExpression; +use OC\DB\QueryBuilder\QuoteHelper; +use OC\DB\QueryBuilder\Sharded\AutoIncrementHandler; +use OC\DB\QueryBuilder\Sharded\ShardConnectionManager; +use OC\DB\QueryBuilder\Sharded\ShardedQueryBuilder; +use OCP\DB\IResult; +use OCP\DB\QueryBuilder\IQueryBuilder; +use OCP\DB\QueryBuilder\IQueryFunction; +use OCP\IDBConnection; + +/** + * A special query builder that automatically splits queries that span across multiple database partitions[1]. + * + * This is done by inspecting the query as it's being built, and when a cross-partition join is detected, + * the part of the query that touches the partition is split of into a different sub-query. + * Then, when the query is executed, the results from the sub-queries are automatically merged. + * + * This whole process is intended to be transparent to any code using the query builder, however it does impose some extra + * limitation for queries that work cross-partition. See the documentation from `InvalidPartitionedQueryException` for more details. + * + * When a join is created in the query, this builder checks if it belongs to the same partition as the table from the + * original FROM/UPDATE/DELETE/INSERT and if not, creates a new "sub query" for the partition. + * Then for every part that is added the query, the part is analyzed to determine which partition the query part is referencing + * and the query part is added to the sub query for that partition. + * + * [1]: A set of tables which can't be queried together with the rest of the tables, such as when sharding is used. + */ +class PartitionedQueryBuilder extends ShardedQueryBuilder { + /** @var array<string, PartitionQuery> $splitQueries */ + private array $splitQueries = []; + /** @var list<PartitionSplit> */ + private array $partitions = []; + + /** @var array{'select': string|array, 'alias': ?string}[] */ + private array $selects = []; + private ?PartitionSplit $mainPartition = null; + private bool $hasPositionalParameter = false; + private QuoteHelper $quoteHelper; + private ?int $limit = null; + private ?int $offset = null; + + public function __construct( + IQueryBuilder $builder, + array $shardDefinitions, + ShardConnectionManager $shardConnectionManager, + AutoIncrementHandler $autoIncrementHandler, + ) { + parent::__construct($builder, $shardDefinitions, $shardConnectionManager, $autoIncrementHandler); + $this->quoteHelper = new QuoteHelper(); + } + + private function newQuery(): IQueryBuilder { + // get a fresh, non-partitioning query builder + $builder = $this->builder->getConnection()->getQueryBuilder(); + if ($builder instanceof PartitionedQueryBuilder) { + $builder = $builder->builder; + } + + return new ShardedQueryBuilder( + $builder, + $this->shardDefinitions, + $this->shardConnectionManager, + $this->autoIncrementHandler, + ); + } + + // we need to save selects until we know all the table aliases + public function select(...$selects) { + if (count($selects) === 1 && is_array($selects[0])) { + $selects = $selects[0]; + } + $this->selects = []; + $this->addSelect(...$selects); + return $this; + } + + public function addSelect(...$select) { + $select = array_map(function ($select) { + return ['select' => $select, 'alias' => null]; + }, $select); + $this->selects = array_merge($this->selects, $select); + return $this; + } + + public function selectAlias($select, $alias) { + $this->selects[] = ['select' => $select, 'alias' => $alias]; + return $this; + } + + /** + * Ensure that a column is being selected by the query + * + * This is mainly used to ensure that the returned rows from both sides of a partition contains the columns of the join predicate + * + * @param string|IQueryFunction $column + * @return void + */ + private function ensureSelect(string|IQueryFunction $column, ?string $alias = null): void { + $checkColumn = $alias ?: $column; + if (str_contains($checkColumn, '.')) { + [$table, $checkColumn] = explode('.', $checkColumn); + $partition = $this->getPartition($table); + } else { + $partition = null; + } + foreach ($this->selects as $select) { + $select = $select['select']; + if (!is_string($select)) { + continue; + } + + if (str_contains($select, '.')) { + [$table, $select] = explode('.', $select); + $selectPartition = $this->getPartition($table); + } else { + $selectPartition = null; + } + if ( + ($select === $checkColumn || $select === '*') + && $selectPartition === $partition + ) { + return; + } + } + if ($alias) { + $this->selectAlias($column, $alias); + } else { + $this->addSelect($column); + } + } + + /** + * Distribute the select statements to the correct partition + * + * This is done at the end instead of when the `select` call is made, because the `select` calls are generally done + * before we know what tables are involved in the query + * + * @return void + */ + private function applySelects(): void { + foreach ($this->selects as $select) { + foreach ($this->partitions as $partition) { + if (is_string($select['select']) && ( + $select['select'] === '*' + || $partition->isColumnInPartition($select['select'])) + ) { + if (isset($this->splitQueries[$partition->name])) { + if ($select['alias']) { + $this->splitQueries[$partition->name]->query->selectAlias($select['select'], $select['alias']); + } else { + $this->splitQueries[$partition->name]->query->addSelect($select['select']); + } + if ($select['select'] !== '*') { + continue 2; + } + } + } + } + + if ($select['alias']) { + parent::selectAlias($select['select'], $select['alias']); + } else { + parent::addSelect($select['select']); + } + } + $this->selects = []; + } + + + public function addPartition(PartitionSplit $partition): void { + $this->partitions[] = $partition; + } + + private function getPartition(string $table): ?PartitionSplit { + foreach ($this->partitions as $partition) { + if ($partition->containsTable($table) || $partition->containsAlias($table)) { + return $partition; + } + } + return null; + } + + public function from($from, $alias = null) { + if (is_string($from) && $partition = $this->getPartition($from)) { + $this->mainPartition = $partition; + if ($alias) { + $this->mainPartition->addAlias($from, $alias); + } + } + return parent::from($from, $alias); + } + + public function innerJoin($fromAlias, $join, $alias, $condition = null): self { + return $this->join($fromAlias, $join, $alias, $condition); + } + + public function leftJoin($fromAlias, $join, $alias, $condition = null): self { + return $this->join($fromAlias, (string)$join, $alias, $condition, PartitionQuery::JOIN_MODE_LEFT); + } + + public function join($fromAlias, $join, $alias, $condition = null, $joinMode = PartitionQuery::JOIN_MODE_INNER): self { + $partition = $this->getPartition($join); + $fromPartition = $this->getPartition($fromAlias); + if ($partition && $partition !== $this->mainPartition) { + // join from the main db to a partition + + $joinCondition = JoinCondition::parse($condition, $join, $alias, $fromAlias); + $partition->addAlias($join, $alias); + + if (!isset($this->splitQueries[$partition->name])) { + $this->splitQueries[$partition->name] = new PartitionQuery( + $this->newQuery(), + $joinCondition->fromAlias ?? $joinCondition->fromColumn, $joinCondition->toAlias ?? $joinCondition->toColumn, + $joinMode + ); + $this->splitQueries[$partition->name]->query->from($join, $alias); + $this->ensureSelect($joinCondition->fromColumn, $joinCondition->fromAlias); + $this->ensureSelect($joinCondition->toColumn, $joinCondition->toAlias); + } else { + $query = $this->splitQueries[$partition->name]->query; + if ($partition->containsAlias($fromAlias)) { + $query->innerJoin($fromAlias, $join, $alias, $condition); + } else { + throw new InvalidPartitionedQueryException("Can't join across partition boundaries more than once"); + } + } + $this->splitQueries[$partition->name]->query->andWhere(...$joinCondition->toConditions); + parent::andWhere(...$joinCondition->fromConditions); + return $this; + } elseif ($fromPartition && $fromPartition !== $partition) { + // join from partition, to the main db + + $joinCondition = JoinCondition::parse($condition, $join, $alias, $fromAlias); + if (str_starts_with($fromPartition->name, 'from_')) { + $partitionName = $fromPartition->name; + } else { + $partitionName = 'from_' . $fromPartition->name; + } + + if (!isset($this->splitQueries[$partitionName])) { + $newPartition = new PartitionSplit($partitionName, [$join]); + $newPartition->addAlias($join, $alias); + $this->partitions[] = $newPartition; + + $this->splitQueries[$partitionName] = new PartitionQuery( + $this->newQuery(), + $joinCondition->fromAlias ?? $joinCondition->fromColumn, $joinCondition->toAlias ?? $joinCondition->toColumn, + $joinMode + ); + $this->ensureSelect($joinCondition->fromColumn, $joinCondition->fromAlias); + $this->ensureSelect($joinCondition->toColumn, $joinCondition->toAlias); + $this->splitQueries[$partitionName]->query->from($join, $alias); + $this->splitQueries[$partitionName]->query->andWhere(...$joinCondition->toConditions); + parent::andWhere(...$joinCondition->fromConditions); + } else { + $fromPartition->addTable($join); + $fromPartition->addAlias($join, $alias); + + $query = $this->splitQueries[$partitionName]->query; + $query->innerJoin($fromAlias, $join, $alias, $condition); + } + return $this; + } else { + // join within the main db or a partition + if ($joinMode === PartitionQuery::JOIN_MODE_INNER) { + return parent::innerJoin($fromAlias, $join, $alias, $condition); + } elseif ($joinMode === PartitionQuery::JOIN_MODE_LEFT) { + return parent::leftJoin($fromAlias, $join, $alias, $condition); + } elseif ($joinMode === PartitionQuery::JOIN_MODE_RIGHT) { + return parent::rightJoin($fromAlias, $join, $alias, $condition); + } else { + throw new \InvalidArgumentException("Invalid join mode: $joinMode"); + } + } + } + + /** + * Flatten a list of predicates by merging the parts of any "AND" expression into the list of predicates + * + * @param array $predicates + * @return array + */ + private function flattenPredicates(array $predicates): array { + $result = []; + foreach ($predicates as $predicate) { + if ($predicate instanceof CompositeExpression && $predicate->getType() === CompositeExpression::TYPE_AND) { + $result = array_merge($result, $this->flattenPredicates($predicate->getParts())); + } else { + $result[] = $predicate; + } + } + return $result; + } + + /** + * Split an array of predicates (WHERE query parts) by the partition they reference + * + * @param array $predicates + * @return array<string, array> + */ + private function splitPredicatesByParts(array $predicates): array { + $predicates = $this->flattenPredicates($predicates); + + $partitionPredicates = []; + foreach ($predicates as $predicate) { + $partition = $this->getPartitionForPredicate((string)$predicate); + if ($this->mainPartition === $partition) { + $partitionPredicates[''][] = $predicate; + } elseif ($partition) { + $partitionPredicates[$partition->name][] = $predicate; + } else { + $partitionPredicates[''][] = $predicate; + } + } + return $partitionPredicates; + } + + public function where(...$predicates) { + return $this->andWhere(...$predicates); + } + + public function andWhere(...$where) { + if ($where) { + foreach ($this->splitPredicatesByParts($where) as $alias => $predicates) { + if (isset($this->splitQueries[$alias])) { + // when there is a condition on a table being left-joined it starts to behave as if it's an inner join + // since any joined column that doesn't have the left part will not match the condition + // when there the condition is `$joinToColumn IS NULL` we instead mark the query as excluding the left half + if ($this->splitQueries[$alias]->joinMode === PartitionQuery::JOIN_MODE_LEFT) { + $this->splitQueries[$alias]->joinMode = PartitionQuery::JOIN_MODE_INNER; + + $column = $this->quoteHelper->quoteColumnName($this->splitQueries[$alias]->joinToColumn); + foreach ($predicates as $predicate) { + if ((string)$predicate === "$column IS NULL") { + $this->splitQueries[$alias]->joinMode = PartitionQuery::JOIN_MODE_LEFT_NULL; + } else { + $this->splitQueries[$alias]->query->andWhere($predicate); + } + } + } else { + $this->splitQueries[$alias]->query->andWhere(...$predicates); + } + } else { + parent::andWhere(...$predicates); + } + } + } + return $this; + } + + + private function getPartitionForPredicate(string $predicate): ?PartitionSplit { + foreach ($this->partitions as $partition) { + + if (str_contains($predicate, '?')) { + $this->hasPositionalParameter = true; + } + if ($partition->checkPredicateForTable($predicate)) { + return $partition; + } + } + return null; + } + + public function update($update = null, $alias = null) { + return parent::update($update, $alias); + } + + public function insert($insert = null) { + return parent::insert($insert); + } + + public function delete($delete = null, $alias = null) { + return parent::delete($delete, $alias); + } + + public function setMaxResults($maxResults) { + if ($maxResults > 0) { + $this->limit = (int)$maxResults; + } + return parent::setMaxResults($maxResults); + } + + public function setFirstResult($firstResult) { + if ($firstResult > 0) { + $this->offset = (int)$firstResult; + } + return parent::setFirstResult($firstResult); + } + + public function executeQuery(?IDBConnection $connection = null): IResult { + $this->applySelects(); + if ($this->splitQueries && $this->hasPositionalParameter) { + throw new InvalidPartitionedQueryException("Partitioned queries aren't allowed to to positional arguments"); + } + foreach ($this->splitQueries as $split) { + $split->query->setParameters($this->getParameters(), $this->getParameterTypes()); + } + if (count($this->splitQueries) > 0) { + $hasNonLeftJoins = array_reduce($this->splitQueries, function (bool $hasNonLeftJoins, PartitionQuery $query) { + return $hasNonLeftJoins || $query->joinMode !== PartitionQuery::JOIN_MODE_LEFT; + }, false); + if ($hasNonLeftJoins) { + if (is_int($this->limit)) { + throw new InvalidPartitionedQueryException('Limit is not allowed in partitioned queries'); + } + if (is_int($this->offset)) { + throw new InvalidPartitionedQueryException('Offset is not allowed in partitioned queries'); + } + } + } + + $s = $this->getSQL(); + $result = parent::executeQuery($connection); + if (count($this->splitQueries) > 0) { + return new PartitionedResult($this->splitQueries, $result); + } else { + return $result; + } + } + + public function executeStatement(?IDBConnection $connection = null): int { + if (count($this->splitQueries)) { + throw new InvalidPartitionedQueryException("Partitioning write queries isn't supported"); + } + return parent::executeStatement($connection); + } + + public function getSQL() { + $this->applySelects(); + return parent::getSQL(); + } + + public function getPartitionCount(): int { + return count($this->splitQueries) + 1; + } + + public function hintShardKey(string $column, mixed $value, bool $overwrite = false): self { + if (str_contains($column, '.')) { + [$alias, $column] = explode('.', $column); + $partition = $this->getPartition($alias); + if ($partition) { + $this->splitQueries[$partition->name]->query->hintShardKey($column, $value, $overwrite); + } else { + parent::hintShardKey($column, $value, $overwrite); + } + } else { + parent::hintShardKey($column, $value, $overwrite); + } + return $this; + } +} diff --git a/lib/private/DB/QueryBuilder/Partitioned/PartitionedResult.php b/lib/private/DB/QueryBuilder/Partitioned/PartitionedResult.php new file mode 100644 index 00000000000..b3b59e26298 --- /dev/null +++ b/lib/private/DB/QueryBuilder/Partitioned/PartitionedResult.php @@ -0,0 +1,61 @@ +<?php + +declare(strict_types=1); +/** + * SPDX-FileCopyrightText: 2024 Nextcloud GmbH and Nextcloud contributors + * SPDX-License-Identifier: AGPL-3.0-only + */ + +namespace OC\DB\QueryBuilder\Partitioned; + +use OC\DB\ArrayResult; +use OCP\DB\IResult; +use PDO; + +/** + * Combine the results of multiple join parts into a single result + */ +class PartitionedResult extends ArrayResult { + private bool $fetched = false; + + /** + * @param PartitionQuery[] $splitOfParts + * @param IResult $result + */ + public function __construct( + private array $splitOfParts, + private IResult $result, + ) { + parent::__construct([]); + } + + public function closeCursor(): bool { + return $this->result->closeCursor(); + } + + public function fetch(int $fetchMode = PDO::FETCH_ASSOC) { + $this->fetchRows(); + return parent::fetch($fetchMode); + } + + public function fetchAll(int $fetchMode = PDO::FETCH_ASSOC): array { + $this->fetchRows(); + return parent::fetchAll($fetchMode); + } + + public function rowCount(): int { + $this->fetchRows(); + return parent::rowCount(); + } + + private function fetchRows(): void { + if (!$this->fetched) { + $this->fetched = true; + $this->rows = $this->result->fetchAll(); + foreach ($this->splitOfParts as $part) { + $this->rows = $part->mergeWith($this->rows); + } + $this->count = count($this->rows); + } + } +} |