diff options
Diffstat (limited to 'lib/private/Files/Cache/Scanner.php')
-rw-r--r-- | lib/private/Files/Cache/Scanner.php | 360 |
1 files changed, 197 insertions, 163 deletions
diff --git a/lib/private/Files/Cache/Scanner.php b/lib/private/Files/Cache/Scanner.php index 2711fc8ad19..b067f70b8cb 100644 --- a/lib/private/Files/Cache/Scanner.php +++ b/lib/private/Files/Cache/Scanner.php @@ -1,37 +1,9 @@ <?php + /** - * @copyright Copyright (c) 2016, ownCloud, Inc. - * - * @author Ari Selseng <ari@selseng.net> - * @author Arthur Schiwon <blizzz@arthur-schiwon.de> - * @author Björn Schießle <bjoern@schiessle.org> - * @author Christoph Wurst <christoph@winzerhof-wurst.at> - * @author Daniel Jagszent <daniel@jagszent.de> - * @author Joas Schilling <coding@schilljs.com> - * @author Jörn Friedrich Dreyer <jfd@butonic.de> - * @author Lukas Reschke <lukas@statuscode.ch> - * @author Martin Mattel <martin.mattel@diemattels.at> - * @author Morris Jobke <hey@morrisjobke.de> - * @author Owen Winkler <a_github@midnightcircus.com> - * @author Robin Appelman <robin@icewind.nl> - * @author Robin McCorkell <robin@mccorkell.me.uk> - * @author Thomas Müller <thomas.mueller@tmit.eu> - * @author Vincent Petry <vincent@nextcloud.com> - * - * @license AGPL-3.0 - * - * This code is free software: you can redistribute it and/or modify - * it under the terms of the GNU Affero General Public License, version 3, - * as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Affero General Public License for more details. - * - * You should have received a copy of the GNU Affero General Public License, version 3, - * along with this program. If not, see <http://www.gnu.org/licenses/> - * + * SPDX-FileCopyrightText: 2016-2024 Nextcloud GmbH and Nextcloud contributors + * SPDX-FileCopyrightText: 2016 ownCloud, Inc. + * SPDX-License-Identifier: AGPL-3.0-only */ namespace OC\Files\Cache; @@ -39,9 +11,11 @@ use Doctrine\DBAL\Exception; use OC\Files\Storage\Wrapper\Encryption; use OC\Files\Storage\Wrapper\Jail; use OC\Hooks\BasicEmitter; +use OC\SystemConfig; use OCP\Files\Cache\IScanner; use OCP\Files\ForbiddenException; use OCP\Files\NotFoundException; +use OCP\Files\Storage\ILockingStorage; use OCP\Files\Storage\IReliableEtagStorage; use OCP\IDBConnection; use OCP\Lock\ILockingProvider; @@ -95,8 +69,11 @@ class Scanner extends BasicEmitter implements IScanner { $this->storage = $storage; $this->storageId = $this->storage->getId(); $this->cache = $storage->getCache(); - $this->cacheActive = !\OC::$server->getConfig()->getSystemValueBool('filesystem_cache_readonly', false); - $this->lockingProvider = \OC::$server->getLockingProvider(); + /** @var SystemConfig $config */ + $config = \OC::$server->get(SystemConfig::class); + $this->cacheActive = !$config->getValue('filesystem_cache_readonly', false); + $this->useTransactions = !$config->getValue('filescanner_no_transactions', false); + $this->lockingProvider = \OC::$server->get(ILockingProvider::class); $this->connection = \OC::$server->get(IDBConnection::class); } @@ -106,7 +83,7 @@ class Scanner extends BasicEmitter implements IScanner { * * @param bool $useTransactions */ - public function setUseTransactions($useTransactions) { + public function setUseTransactions($useTransactions): void { $this->useTransactions = $useTransactions; } @@ -131,9 +108,9 @@ class Scanner extends BasicEmitter implements IScanner { * @param string $file * @param int $reuseExisting * @param int $parentId - * @param array|null|false $cacheData existing data in the cache for the file to be scanned + * @param array|CacheEntry|null|false $cacheData existing data in the cache for the file to be scanned * @param bool $lock set to false to disable getting an additional read lock during scanning - * @param null $data the metadata for the file, as returned by the storage + * @param array|null $data the metadata for the file, as returned by the storage * @return array|null an array of metadata of the scanned file * @throws \OCP\Lock\LockedException */ @@ -145,131 +122,130 @@ class Scanner extends BasicEmitter implements IScanner { return null; } } + // only proceed if $file is not a partial file, blacklist is handled by the storage - if (!self::isPartialFile($file)) { - // acquire a lock + if (self::isPartialFile($file)) { + return null; + } + + // acquire a lock + if ($lock) { + if ($this->storage->instanceOfStorage(ILockingStorage::class)) { + $this->storage->acquireLock($file, ILockingProvider::LOCK_SHARED, $this->lockingProvider); + } + } + + try { + $data = $data ?? $this->getData($file); + } catch (ForbiddenException $e) { if ($lock) { - if ($this->storage->instanceOfStorage('\OCP\Files\Storage\ILockingStorage')) { - $this->storage->acquireLock($file, ILockingProvider::LOCK_SHARED, $this->lockingProvider); + if ($this->storage->instanceOfStorage(ILockingStorage::class)) { + $this->storage->releaseLock($file, ILockingProvider::LOCK_SHARED, $this->lockingProvider); } } - try { - $data = $data ?? $this->getData($file); - } catch (ForbiddenException $e) { - if ($lock) { - if ($this->storage->instanceOfStorage('\OCP\Files\Storage\ILockingStorage')) { - $this->storage->releaseLock($file, ILockingProvider::LOCK_SHARED, $this->lockingProvider); - } + return null; + } + + try { + if ($data === null) { + $this->removeFromCache($file); + } else { + // pre-emit only if it was a file. By that we avoid counting/treating folders as files + if ($data['mimetype'] !== 'httpd/unix-directory') { + $this->emit('\OC\Files\Cache\Scanner', 'scanFile', [$file, $this->storageId]); + \OC_Hook::emit('\OC\Files\Cache\Scanner', 'scan_file', ['path' => $file, 'storage' => $this->storageId]); } - return null; - } + $parent = dirname($file); + if ($parent === '.' || $parent === '/') { + $parent = ''; + } + if ($parentId === -1) { + $parentId = $this->cache->getParentId($file); + } - try { - if ($data) { - // pre-emit only if it was a file. By that we avoid counting/treating folders as files - if ($data['mimetype'] !== 'httpd/unix-directory') { - $this->emit('\OC\Files\Cache\Scanner', 'scanFile', [$file, $this->storageId]); - \OC_Hook::emit('\OC\Files\Cache\Scanner', 'scan_file', ['path' => $file, 'storage' => $this->storageId]); + // scan the parent if it's not in the cache (id -1) and the current file is not the root folder + if ($file && $parentId === -1) { + $parentData = $this->scanFile($parent); + if ($parentData === null) { + return null; } - $parent = dirname($file); - if ($parent === '.' || $parent === '/') { - $parent = ''; - } - if ($parentId === -1) { - $parentId = $this->cache->getParentId($file); - } + $parentId = $parentData['fileid']; + } + if ($parent) { + $data['parent'] = $parentId; + } - // scan the parent if it's not in the cache (id -1) and the current file is not the root folder - if ($file && $parentId === -1) { - $parentData = $this->scanFile($parent); - if (!$parentData) { - return null; - } - $parentId = $parentData['fileid']; - } - if ($parent) { - $data['parent'] = $parentId; - } - if (is_null($cacheData)) { - /** @var CacheEntry $cacheData */ - $cacheData = $this->cache->get($file); - } - if ($cacheData && $reuseExisting && isset($cacheData['fileid'])) { - // prevent empty etag - $etag = empty($cacheData['etag']) ? $data['etag'] : $cacheData['etag']; - $fileId = $cacheData['fileid']; - $data['fileid'] = $fileId; - // only reuse data if the file hasn't explicitly changed - if (isset($data['storage_mtime']) && isset($cacheData['storage_mtime']) && $data['storage_mtime'] === $cacheData['storage_mtime']) { - $data['mtime'] = $cacheData['mtime']; - if (($reuseExisting & self::REUSE_SIZE) && ($data['size'] === -1)) { - $data['size'] = $cacheData['size']; - } - if ($reuseExisting & self::REUSE_ETAG && !$this->storage->instanceOfStorage(IReliableEtagStorage::class)) { - $data['etag'] = $etag; - } + $cacheData = $cacheData ?? $this->cache->get($file); + if ($reuseExisting && $cacheData !== false && isset($cacheData['fileid'])) { + // prevent empty etag + $etag = empty($cacheData['etag']) ? $data['etag'] : $cacheData['etag']; + $fileId = $cacheData['fileid']; + $data['fileid'] = $fileId; + // only reuse data if the file hasn't explicitly changed + $mtimeUnchanged = isset($data['storage_mtime']) && isset($cacheData['storage_mtime']) && $data['storage_mtime'] === $cacheData['storage_mtime']; + // if the folder is marked as unscanned, never reuse etags + if ($mtimeUnchanged && $cacheData['size'] !== -1) { + $data['mtime'] = $cacheData['mtime']; + if (($reuseExisting & self::REUSE_SIZE) && ($data['size'] === -1)) { + $data['size'] = $cacheData['size']; } - - // we only updated unencrypted_size if it's already set - if ($cacheData['unencrypted_size'] === 0) { - unset($data['unencrypted_size']); + if ($reuseExisting & self::REUSE_ETAG && !$this->storage->instanceOfStorage(IReliableEtagStorage::class)) { + $data['etag'] = $etag; } - - // Only update metadata that has changed - $newData = array_diff_assoc($data, $cacheData->getData()); - } else { - // we only updated unencrypted_size if it's already set - unset($data['unencrypted_size']); - $newData = $data; - $fileId = -1; - } - if (!empty($newData)) { - // Reset the checksum if the data has changed - $newData['checksum'] = ''; - $newData['parent'] = $parentId; - $data['fileid'] = $this->addToCache($file, $newData, $fileId); } - $data['oldSize'] = ($cacheData && isset($cacheData['size'])) ? $cacheData['size'] : 0; - - if ($cacheData && isset($cacheData['encrypted'])) { - $data['encrypted'] = $cacheData['encrypted']; + // we only updated unencrypted_size if it's already set + if (isset($cacheData['unencrypted_size']) && $cacheData['unencrypted_size'] === 0) { + unset($data['unencrypted_size']); } - // post-emit only if it was a file. By that we avoid counting/treating folders as files - if ($data['mimetype'] !== 'httpd/unix-directory') { - $this->emit('\OC\Files\Cache\Scanner', 'postScanFile', [$file, $this->storageId]); - \OC_Hook::emit('\OC\Files\Cache\Scanner', 'post_scan_file', ['path' => $file, 'storage' => $this->storageId]); + /** + * Only update metadata that has changed. + * i.e. get all the values in $data that are not present in the cache already + * + * We need the OC implementation for usage of "getData" method below. + * @var \OC\Files\Cache\CacheEntry $cacheData + */ + $newData = $this->array_diff_assoc_multi($data, $cacheData->getData()); + + // make it known to the caller that etag has been changed and needs propagation + if (isset($newData['etag'])) { + $data['etag_changed'] = true; } } else { - $this->removeFromCache($file); + unset($data['unencrypted_size']); + $newData = $data; + $fileId = -1; } - } catch (\Exception $e) { - if ($lock) { - if ($this->storage->instanceOfStorage('\OCP\Files\Storage\ILockingStorage')) { - $this->storage->releaseLock($file, ILockingProvider::LOCK_SHARED, $this->lockingProvider); - } + if (!empty($newData)) { + // Reset the checksum if the data has changed + $newData['checksum'] = ''; + $newData['parent'] = $parentId; + $data['fileid'] = $this->addToCache($file, $newData, $fileId); } - throw $e; - } - // release the acquired lock - if ($lock) { - if ($this->storage->instanceOfStorage('\OCP\Files\Storage\ILockingStorage')) { - $this->storage->releaseLock($file, ILockingProvider::LOCK_SHARED, $this->lockingProvider); + if ($cacheData !== false) { + $data['oldSize'] = $cacheData['size'] ?? 0; + $data['encrypted'] = $cacheData['encrypted'] ?? false; } - } - if ($data && !isset($data['encrypted'])) { - $data['encrypted'] = false; + // post-emit only if it was a file. By that we avoid counting/treating folders as files + if ($data['mimetype'] !== 'httpd/unix-directory') { + $this->emit('\OC\Files\Cache\Scanner', 'postScanFile', [$file, $this->storageId]); + \OC_Hook::emit('\OC\Files\Cache\Scanner', 'post_scan_file', ['path' => $file, 'storage' => $this->storageId]); + } + } + } finally { + // release the acquired lock + if ($lock && $this->storage->instanceOfStorage(ILockingStorage::class)) { + $this->storage->releaseLock($file, ILockingProvider::LOCK_SHARED, $this->lockingProvider); } - return $data; } - return null; + return $data; } protected function removeFromCache($path) { @@ -334,41 +310,82 @@ class Scanner extends BasicEmitter implements IScanner { if ($reuse === -1) { $reuse = ($recursive === self::SCAN_SHALLOW) ? self::REUSE_ETAG | self::REUSE_SIZE : self::REUSE_ETAG; } - if ($lock) { - if ($this->storage->instanceOfStorage('\OCP\Files\Storage\ILockingStorage')) { - $this->storage->acquireLock('scanner::' . $path, ILockingProvider::LOCK_EXCLUSIVE, $this->lockingProvider); - $this->storage->acquireLock($path, ILockingProvider::LOCK_SHARED, $this->lockingProvider); - } + + if ($lock && $this->storage->instanceOfStorage(ILockingStorage::class)) { + $this->storage->acquireLock('scanner::' . $path, ILockingProvider::LOCK_EXCLUSIVE, $this->lockingProvider); + $this->storage->acquireLock($path, ILockingProvider::LOCK_SHARED, $this->lockingProvider); } + try { - try { - $data = $this->scanFile($path, $reuse, -1, null, $lock); - if ($data && $data['mimetype'] === 'httpd/unix-directory') { - $size = $this->scanChildren($path, $recursive, $reuse, $data['fileid'], $lock, $data['size']); - $data['size'] = $size; - } - } catch (NotFoundException $e) { - $this->removeFromCache($path); - return null; + $data = $this->scanFile($path, $reuse, -1, lock: $lock); + + if ($data !== null && $data['mimetype'] === 'httpd/unix-directory') { + $size = $this->scanChildren($path, $recursive, $reuse, $data['fileid'], $lock, $data['size']); + $data['size'] = $size; } + } catch (NotFoundException $e) { + $this->removeFromCache($path); + return null; } finally { - if ($lock) { - if ($this->storage->instanceOfStorage('\OCP\Files\Storage\ILockingStorage')) { - $this->storage->releaseLock($path, ILockingProvider::LOCK_SHARED, $this->lockingProvider); - $this->storage->releaseLock('scanner::' . $path, ILockingProvider::LOCK_EXCLUSIVE, $this->lockingProvider); - } + if ($lock && $this->storage->instanceOfStorage(ILockingStorage::class)) { + $this->storage->releaseLock($path, ILockingProvider::LOCK_SHARED, $this->lockingProvider); + $this->storage->releaseLock('scanner::' . $path, ILockingProvider::LOCK_EXCLUSIVE, $this->lockingProvider); } } return $data; } /** + * Compares $array1 against $array2 and returns all the values in $array1 that are not in $array2 + * Note this is a one-way check - i.e. we don't care about things that are in $array2 that aren't in $array1 + * + * Supports multi-dimensional arrays + * Also checks keys/indexes + * Comparisons are strict just like array_diff_assoc + * Order of keys/values does not matter + * + * @param array $array1 + * @param array $array2 + * @return array with the differences between $array1 and $array1 + * @throws \InvalidArgumentException if $array1 isn't an actual array + * + */ + protected function array_diff_assoc_multi(array $array1, array $array2) { + + $result = []; + + foreach ($array1 as $key => $value) { + + // if $array2 doesn't have the same key, that's a result + if (!array_key_exists($key, $array2)) { + $result[$key] = $value; + continue; + } + + // if $array2's value for the same key is different, that's a result + if ($array2[$key] !== $value && !is_array($value)) { + $result[$key] = $value; + continue; + } + + if (is_array($value)) { + $nestedDiff = $this->array_diff_assoc_multi($value, $array2[$key]); + if (!empty($nestedDiff)) { + $result[$key] = $nestedDiff; + continue; + } + } + } + return $result; + } + + /** * Get the children currently in the cache * * @param int $folderId - * @return array[] + * @return array<string, \OCP\Files\Cache\ICacheEntry> */ - protected function getExistingChildren($folderId) { + protected function getExistingChildren($folderId): array { $existingChildren = []; $children = $this->cache->getFolderContentsById($folderId); foreach ($children as $child) { @@ -388,16 +405,20 @@ class Scanner extends BasicEmitter implements IScanner { * @param int|float $oldSize the size of the folder before (re)scanning the children * @return int|float the size of the scanned folder or -1 if the size is unknown at this stage */ - protected function scanChildren(string $path, $recursive, int $reuse, int $folderId, bool $lock, int|float $oldSize) { + protected function scanChildren(string $path, $recursive, int $reuse, int $folderId, bool $lock, int|float $oldSize, &$etagChanged = false) { if ($reuse === -1) { $reuse = ($recursive === self::SCAN_SHALLOW) ? self::REUSE_ETAG | self::REUSE_SIZE : self::REUSE_ETAG; } $this->emit('\OC\Files\Cache\Scanner', 'scanFolder', [$path, $this->storageId]); $size = 0; - $childQueue = $this->handleChildren($path, $recursive, $reuse, $folderId, $lock, $size); + $childQueue = $this->handleChildren($path, $recursive, $reuse, $folderId, $lock, $size, $etagChanged); foreach ($childQueue as $child => [$childId, $childSize]) { - $childSize = $this->scanChildren($child, $recursive, $reuse, $childId, $lock, $childSize); + // "etag changed" propagates up, but not down, so we pass `false` to the children even if we already know that the etag of the current folder changed + $childEtagChanged = false; + $childSize = $this->scanChildren($child, $recursive, $reuse, $childId, $lock, $childSize, $childEtagChanged); + $etagChanged |= $childEtagChanged; + if ($childSize === -1) { $size = -1; } elseif ($size !== -1) { @@ -410,8 +431,17 @@ class Scanner extends BasicEmitter implements IScanner { if ($this->storage->instanceOfStorage(Encryption::class)) { $this->cache->calculateFolderSize($path); } else { - if ($this->cacheActive && $oldSize !== $size) { - $this->cache->update($folderId, ['size' => $size]); + if ($this->cacheActive) { + $updatedData = []; + if ($oldSize !== $size) { + $updatedData['size'] = $size; + } + if ($etagChanged) { + $updatedData['etag'] = uniqid(); + } + if ($updatedData) { + $this->cache->update($folderId, $updatedData); + } } } $this->emit('\OC\Files\Cache\Scanner', 'postScanFolder', [$path, $this->storageId]); @@ -421,7 +451,7 @@ class Scanner extends BasicEmitter implements IScanner { /** * @param bool|IScanner::SCAN_RECURSIVE_INCOMPLETE $recursive */ - private function handleChildren(string $path, $recursive, int $reuse, int $folderId, bool $lock, int|float &$size): array { + private function handleChildren(string $path, $recursive, int $reuse, int $folderId, bool $lock, int|float &$size, bool &$etagChanged): array { // we put this in it's own function so it cleans up the memory before we start recursing $existingChildren = $this->getExistingChildren($folderId); $newChildren = iterator_to_array($this->storage->getDirectoryContent($path)); @@ -447,7 +477,7 @@ class Scanner extends BasicEmitter implements IScanner { $file = trim(\OC\Files\Filesystem::normalizePath($originalFile), '/'); if (trim($originalFile, '/') !== $file) { // encoding mismatch, might require compatibility wrapper - \OC::$server->get(LoggerInterface::class)->debug('Scanner: Skipping non-normalized file name "'. $originalFile . '" in path "' . $path . '".', ['app' => 'core']); + \OC::$server->get(LoggerInterface::class)->debug('Scanner: Skipping non-normalized file name "' . $originalFile . '" in path "' . $path . '".', ['app' => 'core']); $this->emit('\OC\Files\Cache\Scanner', 'normalizedNameMismatch', [$path ? $path . '/' . $originalFile : $originalFile]); // skip this entry continue; @@ -469,6 +499,10 @@ class Scanner extends BasicEmitter implements IScanner { } elseif ($size !== -1) { $size += $data['size']; } + + if (isset($data['etag_changed']) && $data['etag_changed']) { + $etagChanged = true; + } } } catch (Exception $ex) { // might happen if inserting duplicate while a scanning |