aboutsummaryrefslogtreecommitdiffstats
path: root/apps/files/lib/BackgroundJob/ScanFiles.php
blob: 1d72ed6cde4adbf53b120301bf2d5002bc5a6c5d (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
<?php

/**
 * SPDX-FileCopyrightText: 2019-2024 Nextcloud GmbH and Nextcloud contributors
 * SPDX-FileCopyrightText: 2016 ownCloud, Inc.
 * SPDX-License-Identifier: AGPL-3.0-only
 */

namespace OCA\Files\BackgroundJob;

use OC\Files\Utils\Scanner;
use OCP\AppFramework\Utility\ITimeFactory;
use OCP\BackgroundJob\TimedJob;
use OCP\DB\QueryBuilder\IQueryBuilder;
use OCP\EventDispatcher\IEventDispatcher;
use OCP\IConfig;
use OCP\IDBConnection;
use Psr\Log\LoggerInterface;

/**
 * Class ScanFiles is a background job used to run the file scanner over the user
 * accounts to ensure integrity of the file cache.
 *
 * @package OCA\Files\BackgroundJob
 */
class ScanFiles extends TimedJob {
	private IConfig $config;
	private IEventDispatcher $dispatcher;
	private LoggerInterface $logger;
	private IDBConnection $connection;

	/** Amount of users that should get scanned per execution */
	public const USERS_PER_SESSION = 500;

	public function __construct(
		IConfig $config,
		IEventDispatcher $dispatcher,
		LoggerInterface $logger,
		IDBConnection $connection,
		ITimeFactory $time,
	) {
		parent::__construct($time);
		// Run once per 10 minutes
		$this->setInterval(60 * 10);

		$this->config = $config;
		$this->dispatcher = $dispatcher;
		$this->logger = $logger;
		$this->connection = $connection;
	}

	protected function runScanner(string $user): void {
		try {
			$scanner = new Scanner(
				$user,
				null,
				$this->dispatcher,
				$this->logger
			);
			$scanner->backgroundScan('');
		} catch (\Exception $e) {
			$this->logger->error($e->getMessage(), ['exception' => $e, 'app' => 'files']);
		}
		\OC_Util::tearDownFS();
	}

	/**
	 * Find a storage which have unindexed files and return a user with access to the storage
	 *
	 * @return string|false
	 */
	private function getUserToScan() {
		if ($this->connection->getShardDefinition('filecache')) {
			// for sharded filecache, the "LIMIT" from the normal query doesn't work

			// first we try it with a "LEFT JOIN" on mounts, this is fast, but might return a storage that isn't mounted.
			// we also ask for up to 10 results from different storages to increase the odds of finding a result that is mounted
			$query = $this->connection->getQueryBuilder();
			$query->select('m.user_id')
				->from('filecache', 'f')
				->leftJoin('f', 'mounts', 'm', $query->expr()->eq('m.storage_id', 'f.storage'))
				->where($query->expr()->lt('f.size', $query->createNamedParameter(0, IQueryBuilder::PARAM_INT)))
				->andWhere($query->expr()->gt('f.parent', $query->createNamedParameter(-1, IQueryBuilder::PARAM_INT)))
				->setMaxResults(10)
				->groupBy('f.storage')
				->runAcrossAllShards();

			$result = $query->executeQuery();
			while ($res = $result->fetch()) {
				if ($res['user_id']) {
					return $res['user_id'];
				}
			}

			// as a fallback, we try a slower approach where we find all mounted storages first
			// this is essentially doing the inner join manually
			$storages = $this->getAllMountedStorages();

			$query = $this->connection->getQueryBuilder();
			$query->select('m.user_id')
				->from('filecache', 'f')
				->leftJoin('f', 'mounts', 'm', $query->expr()->eq('m.storage_id', 'f.storage'))
				->where($query->expr()->lt('f.size', $query->createNamedParameter(0, IQueryBuilder::PARAM_INT)))
				->andWhere($query->expr()->gt('f.parent', $query->createNamedParameter(-1, IQueryBuilder::PARAM_INT)))
				->andWhere($query->expr()->in('f.storage', $query->createNamedParameter($storages, IQueryBuilder::PARAM_INT_ARRAY)))
				->setMaxResults(1)
				->runAcrossAllShards();
			return $query->executeQuery()->fetchOne();
		} else {
			$query = $this->connection->getQueryBuilder();
			$query->select('m.user_id')
				->from('filecache', 'f')
				->innerJoin('f', 'mounts', 'm', $query->expr()->eq('m.storage_id', 'f.storage'))
				->where($query->expr()->lt('f.size', $query->createNamedParameter(0, IQueryBuilder::PARAM_INT)))
				->andWhere($query->expr()->gt('f.parent', $query->createNamedParameter(-1, IQueryBuilder::PARAM_INT)))
				->setMaxResults(1)
				->runAcrossAllShards();

			return $query->executeQuery()->fetchOne();
		}
	}

	private function getAllMountedStorages(): array {
		$query = $this->connection->getQueryBuilder();
		$query->selectDistinct('storage_id')
			->from('mounts');
		return $query->executeQuery()->fetchAll(\PDO::FETCH_COLUMN);
	}

	/**
	 * @param $argument
	 * @throws \Exception
	 */
	protected function run($argument) {
		if ($this->config->getSystemValueBool('files_no_background_scan', false)) {
			return;
		}

		$usersScanned = 0;
		$lastUser = '';
		$user = $this->getUserToScan();
		while ($user && $usersScanned < self::USERS_PER_SESSION && $lastUser !== $user) {
			$this->runScanner($user);
			$lastUser = $user;
			$user = $this->getUserToScan();
			$usersScanned += 1;
		}

		if ($lastUser === $user) {
			$this->logger->warning("User $user still has unscanned files after running background scan, background scan might be stopped prematurely");
		}
	}
}