summaryrefslogtreecommitdiffstats
path: root/lib/private/Files/Search/QueryOptimizer/MergeDistributiveOperations.php
blob: 4949ca7396b04fd919f5b530ade781e3e77c1436 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
<?php

/**
 * SPDX-FileCopyrightText: 2023 Nextcloud GmbH and Nextcloud contributors
 * SPDX-License-Identifier: AGPL-3.0-or-later
 */
namespace OC\Files\Search\QueryOptimizer;

use OC\Files\Search\SearchBinaryOperator;
use OC\Files\Search\SearchComparison;
use OCP\Files\Search\ISearchBinaryOperator;
use OCP\Files\Search\ISearchOperator;

/**
 * Attempt to transform
 *
 * (A AND B) OR (A AND C) OR (A AND D AND E) into A AND (B OR C OR (D AND E))
 *
 * This is always valid because logical 'AND' and 'OR' are distributive[1].
 *
 * [1]: https://en.wikipedia.org/wiki/Distributive_property
 */
class MergeDistributiveOperations extends ReplacingOptimizerStep {
	public function processOperator(ISearchOperator &$operator): bool {
		if ($operator instanceof SearchBinaryOperator) {
			// either 'AND' or 'OR'
			$topLevelType = $operator->getType();

			// split the arguments into groups that share a first argument
			$groups = $this->groupBinaryOperatorsByChild($operator->getArguments(), 0);
			$outerOperations = array_map(function (array $operators) use ($topLevelType) {
				// no common operations, no need to change anything
				if (count($operators) === 1) {
					return $operators[0];
				}

				// for groups with size >1 we know they are binary operators with at least 1 child
				/** @var ISearchBinaryOperator $firstArgument */
				$firstArgument = $operators[0];

				// we already checked that all arguments have the same type, so this type applies for all, either 'AND' or 'OR'
				$innerType = $firstArgument->getType();

				// the common operation we move out ('A' from the example)
				$extractedLeftHand = $firstArgument->getArguments()[0];

				// for each argument we remove the extracted operation to get the leftovers ('B', 'C' and '(D AND E)' in the example)
				// note that we leave them inside the "inner" binary operation for when the "inner" operation contained more than two parts
				// in the (common) case where the "inner" operation only has 1 item left it will be cleaned up in a follow up step
				$rightHandArguments = array_map(function (ISearchOperator $inner) {
					/** @var ISearchBinaryOperator $inner */
					$arguments = $inner->getArguments();
					array_shift($arguments);
					if (count($arguments) === 1) {
						return $arguments[0];
					}
					return new SearchBinaryOperator($inner->getType(), $arguments);
				}, $operators);

				// combine the extracted operation ('A') with the remaining bit ('(B OR C OR (D AND E))')
				// note that because of how distribution work, we use the "outer" type "inside" and the "inside" type "outside".
				$extractedRightHand = new SearchBinaryOperator($topLevelType, $rightHandArguments);
				return new SearchBinaryOperator(
					$innerType,
					[$extractedLeftHand, $extractedRightHand]
				);
			}, $groups);

			// combine all groups again
			$operator = new SearchBinaryOperator($topLevelType, $outerOperations);
			parent::processOperator($operator);
			return true;
		}
		return parent::processOperator($operator);
	}

	/**
	 * Group a list of binary search operators that have a common argument
	 *
	 * Non-binary operators, or empty binary operators will each get their own 1-sized group
	 *
	 * @param ISearchOperator[] $operators
	 * @return ISearchOperator[][]
	 */
	private function groupBinaryOperatorsByChild(array $operators, int $index = 0): array {
		$result = [];
		foreach ($operators as $operator) {
			if ($operator instanceof ISearchBinaryOperator && count($operator->getArguments()) > 0) {
				/** @var SearchBinaryOperator|SearchComparison $child */
				$child = $operator->getArguments()[$index];
				$childKey = (string)$child;
				$result[$childKey][] = $operator;
			} else {
				$result[] = [$operator];
			}
		}
		return array_values($result);
	}
}