aboutsummaryrefslogtreecommitdiffstats
path: root/lib/private/Files/Cache/Scanner.php
diff options
context:
space:
mode:
Diffstat (limited to 'lib/private/Files/Cache/Scanner.php')
-rw-r--r--lib/private/Files/Cache/Scanner.php360
1 files changed, 197 insertions, 163 deletions
diff --git a/lib/private/Files/Cache/Scanner.php b/lib/private/Files/Cache/Scanner.php
index 2711fc8ad19..b067f70b8cb 100644
--- a/lib/private/Files/Cache/Scanner.php
+++ b/lib/private/Files/Cache/Scanner.php
@@ -1,37 +1,9 @@
<?php
+
/**
- * @copyright Copyright (c) 2016, ownCloud, Inc.
- *
- * @author Ari Selseng <ari@selseng.net>
- * @author Arthur Schiwon <blizzz@arthur-schiwon.de>
- * @author Björn Schießle <bjoern@schiessle.org>
- * @author Christoph Wurst <christoph@winzerhof-wurst.at>
- * @author Daniel Jagszent <daniel@jagszent.de>
- * @author Joas Schilling <coding@schilljs.com>
- * @author Jörn Friedrich Dreyer <jfd@butonic.de>
- * @author Lukas Reschke <lukas@statuscode.ch>
- * @author Martin Mattel <martin.mattel@diemattels.at>
- * @author Morris Jobke <hey@morrisjobke.de>
- * @author Owen Winkler <a_github@midnightcircus.com>
- * @author Robin Appelman <robin@icewind.nl>
- * @author Robin McCorkell <robin@mccorkell.me.uk>
- * @author Thomas Müller <thomas.mueller@tmit.eu>
- * @author Vincent Petry <vincent@nextcloud.com>
- *
- * @license AGPL-3.0
- *
- * This code is free software: you can redistribute it and/or modify
- * it under the terms of the GNU Affero General Public License, version 3,
- * as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU Affero General Public License for more details.
- *
- * You should have received a copy of the GNU Affero General Public License, version 3,
- * along with this program. If not, see <http://www.gnu.org/licenses/>
- *
+ * SPDX-FileCopyrightText: 2016-2024 Nextcloud GmbH and Nextcloud contributors
+ * SPDX-FileCopyrightText: 2016 ownCloud, Inc.
+ * SPDX-License-Identifier: AGPL-3.0-only
*/
namespace OC\Files\Cache;
@@ -39,9 +11,11 @@ use Doctrine\DBAL\Exception;
use OC\Files\Storage\Wrapper\Encryption;
use OC\Files\Storage\Wrapper\Jail;
use OC\Hooks\BasicEmitter;
+use OC\SystemConfig;
use OCP\Files\Cache\IScanner;
use OCP\Files\ForbiddenException;
use OCP\Files\NotFoundException;
+use OCP\Files\Storage\ILockingStorage;
use OCP\Files\Storage\IReliableEtagStorage;
use OCP\IDBConnection;
use OCP\Lock\ILockingProvider;
@@ -95,8 +69,11 @@ class Scanner extends BasicEmitter implements IScanner {
$this->storage = $storage;
$this->storageId = $this->storage->getId();
$this->cache = $storage->getCache();
- $this->cacheActive = !\OC::$server->getConfig()->getSystemValueBool('filesystem_cache_readonly', false);
- $this->lockingProvider = \OC::$server->getLockingProvider();
+ /** @var SystemConfig $config */
+ $config = \OC::$server->get(SystemConfig::class);
+ $this->cacheActive = !$config->getValue('filesystem_cache_readonly', false);
+ $this->useTransactions = !$config->getValue('filescanner_no_transactions', false);
+ $this->lockingProvider = \OC::$server->get(ILockingProvider::class);
$this->connection = \OC::$server->get(IDBConnection::class);
}
@@ -106,7 +83,7 @@ class Scanner extends BasicEmitter implements IScanner {
*
* @param bool $useTransactions
*/
- public function setUseTransactions($useTransactions) {
+ public function setUseTransactions($useTransactions): void {
$this->useTransactions = $useTransactions;
}
@@ -131,9 +108,9 @@ class Scanner extends BasicEmitter implements IScanner {
* @param string $file
* @param int $reuseExisting
* @param int $parentId
- * @param array|null|false $cacheData existing data in the cache for the file to be scanned
+ * @param array|CacheEntry|null|false $cacheData existing data in the cache for the file to be scanned
* @param bool $lock set to false to disable getting an additional read lock during scanning
- * @param null $data the metadata for the file, as returned by the storage
+ * @param array|null $data the metadata for the file, as returned by the storage
* @return array|null an array of metadata of the scanned file
* @throws \OCP\Lock\LockedException
*/
@@ -145,131 +122,130 @@ class Scanner extends BasicEmitter implements IScanner {
return null;
}
}
+
// only proceed if $file is not a partial file, blacklist is handled by the storage
- if (!self::isPartialFile($file)) {
- // acquire a lock
+ if (self::isPartialFile($file)) {
+ return null;
+ }
+
+ // acquire a lock
+ if ($lock) {
+ if ($this->storage->instanceOfStorage(ILockingStorage::class)) {
+ $this->storage->acquireLock($file, ILockingProvider::LOCK_SHARED, $this->lockingProvider);
+ }
+ }
+
+ try {
+ $data = $data ?? $this->getData($file);
+ } catch (ForbiddenException $e) {
if ($lock) {
- if ($this->storage->instanceOfStorage('\OCP\Files\Storage\ILockingStorage')) {
- $this->storage->acquireLock($file, ILockingProvider::LOCK_SHARED, $this->lockingProvider);
+ if ($this->storage->instanceOfStorage(ILockingStorage::class)) {
+ $this->storage->releaseLock($file, ILockingProvider::LOCK_SHARED, $this->lockingProvider);
}
}
- try {
- $data = $data ?? $this->getData($file);
- } catch (ForbiddenException $e) {
- if ($lock) {
- if ($this->storage->instanceOfStorage('\OCP\Files\Storage\ILockingStorage')) {
- $this->storage->releaseLock($file, ILockingProvider::LOCK_SHARED, $this->lockingProvider);
- }
+ return null;
+ }
+
+ try {
+ if ($data === null) {
+ $this->removeFromCache($file);
+ } else {
+ // pre-emit only if it was a file. By that we avoid counting/treating folders as files
+ if ($data['mimetype'] !== 'httpd/unix-directory') {
+ $this->emit('\OC\Files\Cache\Scanner', 'scanFile', [$file, $this->storageId]);
+ \OC_Hook::emit('\OC\Files\Cache\Scanner', 'scan_file', ['path' => $file, 'storage' => $this->storageId]);
}
- return null;
- }
+ $parent = dirname($file);
+ if ($parent === '.' || $parent === '/') {
+ $parent = '';
+ }
+ if ($parentId === -1) {
+ $parentId = $this->cache->getParentId($file);
+ }
- try {
- if ($data) {
- // pre-emit only if it was a file. By that we avoid counting/treating folders as files
- if ($data['mimetype'] !== 'httpd/unix-directory') {
- $this->emit('\OC\Files\Cache\Scanner', 'scanFile', [$file, $this->storageId]);
- \OC_Hook::emit('\OC\Files\Cache\Scanner', 'scan_file', ['path' => $file, 'storage' => $this->storageId]);
+ // scan the parent if it's not in the cache (id -1) and the current file is not the root folder
+ if ($file && $parentId === -1) {
+ $parentData = $this->scanFile($parent);
+ if ($parentData === null) {
+ return null;
}
- $parent = dirname($file);
- if ($parent === '.' || $parent === '/') {
- $parent = '';
- }
- if ($parentId === -1) {
- $parentId = $this->cache->getParentId($file);
- }
+ $parentId = $parentData['fileid'];
+ }
+ if ($parent) {
+ $data['parent'] = $parentId;
+ }
- // scan the parent if it's not in the cache (id -1) and the current file is not the root folder
- if ($file && $parentId === -1) {
- $parentData = $this->scanFile($parent);
- if (!$parentData) {
- return null;
- }
- $parentId = $parentData['fileid'];
- }
- if ($parent) {
- $data['parent'] = $parentId;
- }
- if (is_null($cacheData)) {
- /** @var CacheEntry $cacheData */
- $cacheData = $this->cache->get($file);
- }
- if ($cacheData && $reuseExisting && isset($cacheData['fileid'])) {
- // prevent empty etag
- $etag = empty($cacheData['etag']) ? $data['etag'] : $cacheData['etag'];
- $fileId = $cacheData['fileid'];
- $data['fileid'] = $fileId;
- // only reuse data if the file hasn't explicitly changed
- if (isset($data['storage_mtime']) && isset($cacheData['storage_mtime']) && $data['storage_mtime'] === $cacheData['storage_mtime']) {
- $data['mtime'] = $cacheData['mtime'];
- if (($reuseExisting & self::REUSE_SIZE) && ($data['size'] === -1)) {
- $data['size'] = $cacheData['size'];
- }
- if ($reuseExisting & self::REUSE_ETAG && !$this->storage->instanceOfStorage(IReliableEtagStorage::class)) {
- $data['etag'] = $etag;
- }
+ $cacheData = $cacheData ?? $this->cache->get($file);
+ if ($reuseExisting && $cacheData !== false && isset($cacheData['fileid'])) {
+ // prevent empty etag
+ $etag = empty($cacheData['etag']) ? $data['etag'] : $cacheData['etag'];
+ $fileId = $cacheData['fileid'];
+ $data['fileid'] = $fileId;
+ // only reuse data if the file hasn't explicitly changed
+ $mtimeUnchanged = isset($data['storage_mtime']) && isset($cacheData['storage_mtime']) && $data['storage_mtime'] === $cacheData['storage_mtime'];
+ // if the folder is marked as unscanned, never reuse etags
+ if ($mtimeUnchanged && $cacheData['size'] !== -1) {
+ $data['mtime'] = $cacheData['mtime'];
+ if (($reuseExisting & self::REUSE_SIZE) && ($data['size'] === -1)) {
+ $data['size'] = $cacheData['size'];
}
-
- // we only updated unencrypted_size if it's already set
- if ($cacheData['unencrypted_size'] === 0) {
- unset($data['unencrypted_size']);
+ if ($reuseExisting & self::REUSE_ETAG && !$this->storage->instanceOfStorage(IReliableEtagStorage::class)) {
+ $data['etag'] = $etag;
}
-
- // Only update metadata that has changed
- $newData = array_diff_assoc($data, $cacheData->getData());
- } else {
- // we only updated unencrypted_size if it's already set
- unset($data['unencrypted_size']);
- $newData = $data;
- $fileId = -1;
- }
- if (!empty($newData)) {
- // Reset the checksum if the data has changed
- $newData['checksum'] = '';
- $newData['parent'] = $parentId;
- $data['fileid'] = $this->addToCache($file, $newData, $fileId);
}
- $data['oldSize'] = ($cacheData && isset($cacheData['size'])) ? $cacheData['size'] : 0;
-
- if ($cacheData && isset($cacheData['encrypted'])) {
- $data['encrypted'] = $cacheData['encrypted'];
+ // we only updated unencrypted_size if it's already set
+ if (isset($cacheData['unencrypted_size']) && $cacheData['unencrypted_size'] === 0) {
+ unset($data['unencrypted_size']);
}
- // post-emit only if it was a file. By that we avoid counting/treating folders as files
- if ($data['mimetype'] !== 'httpd/unix-directory') {
- $this->emit('\OC\Files\Cache\Scanner', 'postScanFile', [$file, $this->storageId]);
- \OC_Hook::emit('\OC\Files\Cache\Scanner', 'post_scan_file', ['path' => $file, 'storage' => $this->storageId]);
+ /**
+ * Only update metadata that has changed.
+ * i.e. get all the values in $data that are not present in the cache already
+ *
+ * We need the OC implementation for usage of "getData" method below.
+ * @var \OC\Files\Cache\CacheEntry $cacheData
+ */
+ $newData = $this->array_diff_assoc_multi($data, $cacheData->getData());
+
+ // make it known to the caller that etag has been changed and needs propagation
+ if (isset($newData['etag'])) {
+ $data['etag_changed'] = true;
}
} else {
- $this->removeFromCache($file);
+ unset($data['unencrypted_size']);
+ $newData = $data;
+ $fileId = -1;
}
- } catch (\Exception $e) {
- if ($lock) {
- if ($this->storage->instanceOfStorage('\OCP\Files\Storage\ILockingStorage')) {
- $this->storage->releaseLock($file, ILockingProvider::LOCK_SHARED, $this->lockingProvider);
- }
+ if (!empty($newData)) {
+ // Reset the checksum if the data has changed
+ $newData['checksum'] = '';
+ $newData['parent'] = $parentId;
+ $data['fileid'] = $this->addToCache($file, $newData, $fileId);
}
- throw $e;
- }
- // release the acquired lock
- if ($lock) {
- if ($this->storage->instanceOfStorage('\OCP\Files\Storage\ILockingStorage')) {
- $this->storage->releaseLock($file, ILockingProvider::LOCK_SHARED, $this->lockingProvider);
+ if ($cacheData !== false) {
+ $data['oldSize'] = $cacheData['size'] ?? 0;
+ $data['encrypted'] = $cacheData['encrypted'] ?? false;
}
- }
- if ($data && !isset($data['encrypted'])) {
- $data['encrypted'] = false;
+ // post-emit only if it was a file. By that we avoid counting/treating folders as files
+ if ($data['mimetype'] !== 'httpd/unix-directory') {
+ $this->emit('\OC\Files\Cache\Scanner', 'postScanFile', [$file, $this->storageId]);
+ \OC_Hook::emit('\OC\Files\Cache\Scanner', 'post_scan_file', ['path' => $file, 'storage' => $this->storageId]);
+ }
+ }
+ } finally {
+ // release the acquired lock
+ if ($lock && $this->storage->instanceOfStorage(ILockingStorage::class)) {
+ $this->storage->releaseLock($file, ILockingProvider::LOCK_SHARED, $this->lockingProvider);
}
- return $data;
}
- return null;
+ return $data;
}
protected function removeFromCache($path) {
@@ -334,41 +310,82 @@ class Scanner extends BasicEmitter implements IScanner {
if ($reuse === -1) {
$reuse = ($recursive === self::SCAN_SHALLOW) ? self::REUSE_ETAG | self::REUSE_SIZE : self::REUSE_ETAG;
}
- if ($lock) {
- if ($this->storage->instanceOfStorage('\OCP\Files\Storage\ILockingStorage')) {
- $this->storage->acquireLock('scanner::' . $path, ILockingProvider::LOCK_EXCLUSIVE, $this->lockingProvider);
- $this->storage->acquireLock($path, ILockingProvider::LOCK_SHARED, $this->lockingProvider);
- }
+
+ if ($lock && $this->storage->instanceOfStorage(ILockingStorage::class)) {
+ $this->storage->acquireLock('scanner::' . $path, ILockingProvider::LOCK_EXCLUSIVE, $this->lockingProvider);
+ $this->storage->acquireLock($path, ILockingProvider::LOCK_SHARED, $this->lockingProvider);
}
+
try {
- try {
- $data = $this->scanFile($path, $reuse, -1, null, $lock);
- if ($data && $data['mimetype'] === 'httpd/unix-directory') {
- $size = $this->scanChildren($path, $recursive, $reuse, $data['fileid'], $lock, $data['size']);
- $data['size'] = $size;
- }
- } catch (NotFoundException $e) {
- $this->removeFromCache($path);
- return null;
+ $data = $this->scanFile($path, $reuse, -1, lock: $lock);
+
+ if ($data !== null && $data['mimetype'] === 'httpd/unix-directory') {
+ $size = $this->scanChildren($path, $recursive, $reuse, $data['fileid'], $lock, $data['size']);
+ $data['size'] = $size;
}
+ } catch (NotFoundException $e) {
+ $this->removeFromCache($path);
+ return null;
} finally {
- if ($lock) {
- if ($this->storage->instanceOfStorage('\OCP\Files\Storage\ILockingStorage')) {
- $this->storage->releaseLock($path, ILockingProvider::LOCK_SHARED, $this->lockingProvider);
- $this->storage->releaseLock('scanner::' . $path, ILockingProvider::LOCK_EXCLUSIVE, $this->lockingProvider);
- }
+ if ($lock && $this->storage->instanceOfStorage(ILockingStorage::class)) {
+ $this->storage->releaseLock($path, ILockingProvider::LOCK_SHARED, $this->lockingProvider);
+ $this->storage->releaseLock('scanner::' . $path, ILockingProvider::LOCK_EXCLUSIVE, $this->lockingProvider);
}
}
return $data;
}
/**
+ * Compares $array1 against $array2 and returns all the values in $array1 that are not in $array2
+ * Note this is a one-way check - i.e. we don't care about things that are in $array2 that aren't in $array1
+ *
+ * Supports multi-dimensional arrays
+ * Also checks keys/indexes
+ * Comparisons are strict just like array_diff_assoc
+ * Order of keys/values does not matter
+ *
+ * @param array $array1
+ * @param array $array2
+ * @return array with the differences between $array1 and $array1
+ * @throws \InvalidArgumentException if $array1 isn't an actual array
+ *
+ */
+ protected function array_diff_assoc_multi(array $array1, array $array2) {
+
+ $result = [];
+
+ foreach ($array1 as $key => $value) {
+
+ // if $array2 doesn't have the same key, that's a result
+ if (!array_key_exists($key, $array2)) {
+ $result[$key] = $value;
+ continue;
+ }
+
+ // if $array2's value for the same key is different, that's a result
+ if ($array2[$key] !== $value && !is_array($value)) {
+ $result[$key] = $value;
+ continue;
+ }
+
+ if (is_array($value)) {
+ $nestedDiff = $this->array_diff_assoc_multi($value, $array2[$key]);
+ if (!empty($nestedDiff)) {
+ $result[$key] = $nestedDiff;
+ continue;
+ }
+ }
+ }
+ return $result;
+ }
+
+ /**
* Get the children currently in the cache
*
* @param int $folderId
- * @return array[]
+ * @return array<string, \OCP\Files\Cache\ICacheEntry>
*/
- protected function getExistingChildren($folderId) {
+ protected function getExistingChildren($folderId): array {
$existingChildren = [];
$children = $this->cache->getFolderContentsById($folderId);
foreach ($children as $child) {
@@ -388,16 +405,20 @@ class Scanner extends BasicEmitter implements IScanner {
* @param int|float $oldSize the size of the folder before (re)scanning the children
* @return int|float the size of the scanned folder or -1 if the size is unknown at this stage
*/
- protected function scanChildren(string $path, $recursive, int $reuse, int $folderId, bool $lock, int|float $oldSize) {
+ protected function scanChildren(string $path, $recursive, int $reuse, int $folderId, bool $lock, int|float $oldSize, &$etagChanged = false) {
if ($reuse === -1) {
$reuse = ($recursive === self::SCAN_SHALLOW) ? self::REUSE_ETAG | self::REUSE_SIZE : self::REUSE_ETAG;
}
$this->emit('\OC\Files\Cache\Scanner', 'scanFolder', [$path, $this->storageId]);
$size = 0;
- $childQueue = $this->handleChildren($path, $recursive, $reuse, $folderId, $lock, $size);
+ $childQueue = $this->handleChildren($path, $recursive, $reuse, $folderId, $lock, $size, $etagChanged);
foreach ($childQueue as $child => [$childId, $childSize]) {
- $childSize = $this->scanChildren($child, $recursive, $reuse, $childId, $lock, $childSize);
+ // "etag changed" propagates up, but not down, so we pass `false` to the children even if we already know that the etag of the current folder changed
+ $childEtagChanged = false;
+ $childSize = $this->scanChildren($child, $recursive, $reuse, $childId, $lock, $childSize, $childEtagChanged);
+ $etagChanged |= $childEtagChanged;
+
if ($childSize === -1) {
$size = -1;
} elseif ($size !== -1) {
@@ -410,8 +431,17 @@ class Scanner extends BasicEmitter implements IScanner {
if ($this->storage->instanceOfStorage(Encryption::class)) {
$this->cache->calculateFolderSize($path);
} else {
- if ($this->cacheActive && $oldSize !== $size) {
- $this->cache->update($folderId, ['size' => $size]);
+ if ($this->cacheActive) {
+ $updatedData = [];
+ if ($oldSize !== $size) {
+ $updatedData['size'] = $size;
+ }
+ if ($etagChanged) {
+ $updatedData['etag'] = uniqid();
+ }
+ if ($updatedData) {
+ $this->cache->update($folderId, $updatedData);
+ }
}
}
$this->emit('\OC\Files\Cache\Scanner', 'postScanFolder', [$path, $this->storageId]);
@@ -421,7 +451,7 @@ class Scanner extends BasicEmitter implements IScanner {
/**
* @param bool|IScanner::SCAN_RECURSIVE_INCOMPLETE $recursive
*/
- private function handleChildren(string $path, $recursive, int $reuse, int $folderId, bool $lock, int|float &$size): array {
+ private function handleChildren(string $path, $recursive, int $reuse, int $folderId, bool $lock, int|float &$size, bool &$etagChanged): array {
// we put this in it's own function so it cleans up the memory before we start recursing
$existingChildren = $this->getExistingChildren($folderId);
$newChildren = iterator_to_array($this->storage->getDirectoryContent($path));
@@ -447,7 +477,7 @@ class Scanner extends BasicEmitter implements IScanner {
$file = trim(\OC\Files\Filesystem::normalizePath($originalFile), '/');
if (trim($originalFile, '/') !== $file) {
// encoding mismatch, might require compatibility wrapper
- \OC::$server->get(LoggerInterface::class)->debug('Scanner: Skipping non-normalized file name "'. $originalFile . '" in path "' . $path . '".', ['app' => 'core']);
+ \OC::$server->get(LoggerInterface::class)->debug('Scanner: Skipping non-normalized file name "' . $originalFile . '" in path "' . $path . '".', ['app' => 'core']);
$this->emit('\OC\Files\Cache\Scanner', 'normalizedNameMismatch', [$path ? $path . '/' . $originalFile : $originalFile]);
// skip this entry
continue;
@@ -469,6 +499,10 @@ class Scanner extends BasicEmitter implements IScanner {
} elseif ($size !== -1) {
$size += $data['size'];
}
+
+ if (isset($data['etag_changed']) && $data['etag_changed']) {
+ $etagChanged = true;
+ }
}
} catch (Exception $ex) {
// might happen if inserting duplicate while a scanning