diff options
Diffstat (limited to 'lib/private/Files/Cache/Scanner.php')
-rw-r--r-- | lib/private/Files/Cache/Scanner.php | 503 |
1 files changed, 503 insertions, 0 deletions
diff --git a/lib/private/Files/Cache/Scanner.php b/lib/private/Files/Cache/Scanner.php new file mode 100644 index 00000000000..8730707f1c2 --- /dev/null +++ b/lib/private/Files/Cache/Scanner.php @@ -0,0 +1,503 @@ +<?php +/** + * @author Arthur Schiwon <blizzz@owncloud.com> + * @author Björn Schießle <schiessle@owncloud.com> + * @author Joas Schilling <nickvergessen@owncloud.com> + * @author Jörn Friedrich Dreyer <jfd@butonic.de> + * @author Lukas Reschke <lukas@owncloud.com> + * @author Martin Mattel <martin.mattel@diemattels.at> + * @author Michael Gapczynski <GapczynskiM@gmail.com> + * @author Morris Jobke <hey@morrisjobke.de> + * @author Owen Winkler <a_github@midnightcircus.com> + * @author Robin Appelman <icewind@owncloud.com> + * @author Robin McCorkell <robin@mccorkell.me.uk> + * @author Thomas Müller <thomas.mueller@tmit.eu> + * @author Vincent Petry <pvince81@owncloud.com> + * + * @copyright Copyright (c) 2016, ownCloud, Inc. + * @license AGPL-3.0 + * + * This code is free software: you can redistribute it and/or modify + * it under the terms of the GNU Affero General Public License, version 3, + * as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Affero General Public License for more details. + * + * You should have received a copy of the GNU Affero General Public License, version 3, + * along with this program. If not, see <http://www.gnu.org/licenses/> + * + */ + +namespace OC\Files\Cache; + +use OC\Files\Filesystem; +use OC\Hooks\BasicEmitter; +use OCP\Config; +use OCP\Files\Cache\IScanner; +use OCP\Files\Storage\ILockingStorage; +use OCP\Lock\ILockingProvider; + +/** + * Class Scanner + * + * Hooks available in scope \OC\Files\Cache\Scanner: + * - scanFile(string $path, string $storageId) + * - scanFolder(string $path, string $storageId) + * - postScanFile(string $path, string $storageId) + * - postScanFolder(string $path, string $storageId) + * + * @package OC\Files\Cache + */ +class Scanner extends BasicEmitter implements IScanner { + /** + * @var \OC\Files\Storage\Storage $storage + */ + protected $storage; + + /** + * @var string $storageId + */ + protected $storageId; + + /** + * @var \OC\Files\Cache\Cache $cache + */ + protected $cache; + + /** + * @var boolean $cacheActive If true, perform cache operations, if false, do not affect cache + */ + protected $cacheActive; + + /** + * @var bool $useTransactions whether to use transactions + */ + protected $useTransactions = true; + + /** + * @var \OCP\Lock\ILockingProvider + */ + protected $lockingProvider; + + public function __construct(\OC\Files\Storage\Storage $storage) { + $this->storage = $storage; + $this->storageId = $this->storage->getId(); + $this->cache = $storage->getCache(); + $this->cacheActive = !Config::getSystemValue('filesystem_cache_readonly', false); + $this->lockingProvider = \OC::$server->getLockingProvider(); + } + + /** + * Whether to wrap the scanning of a folder in a database transaction + * On default transactions are used + * + * @param bool $useTransactions + */ + public function setUseTransactions($useTransactions) { + $this->useTransactions = $useTransactions; + } + + /** + * get all the metadata of a file or folder + * * + * + * @param string $path + * @return array an array of metadata of the file + */ + protected function getData($path) { + $data = $this->storage->getMetaData($path); + if (is_null($data)) { + \OCP\Util::writeLog('OC\Files\Cache\Scanner', "!!! Path '$path' is not accessible or present !!!", \OCP\Util::DEBUG); + } + return $data; + } + + /** + * scan a single file and store it in the cache + * + * @param string $file + * @param int $reuseExisting + * @param int $parentId + * @param array | null $cacheData existing data in the cache for the file to be scanned + * @param bool $lock set to false to disable getting an additional read lock during scanning + * @return array an array of metadata of the scanned file + * @throws \OC\ServerNotAvailableException + * @throws \OCP\Lock\LockedException + */ + public function scanFile($file, $reuseExisting = 0, $parentId = -1, $cacheData = null, $lock = true) { + + // only proceed if $file is not a partial file nor a blacklisted file + if (!self::isPartialFile($file) and !Filesystem::isFileBlacklisted($file)) { + + //acquire a lock + if ($lock) { + if ($this->storage->instanceOfStorage('\OCP\Files\Storage\ILockingStorage')) { + $this->storage->acquireLock($file, ILockingProvider::LOCK_SHARED, $this->lockingProvider); + } + } + + $data = $this->getData($file); + + if ($data) { + + // pre-emit only if it was a file. By that we avoid counting/treating folders as files + if ($data['mimetype'] !== 'httpd/unix-directory') { + $this->emit('\OC\Files\Cache\Scanner', 'scanFile', array($file, $this->storageId)); + \OC_Hook::emit('\OC\Files\Cache\Scanner', 'scan_file', array('path' => $file, 'storage' => $this->storageId)); + } + + $parent = dirname($file); + if ($parent === '.' or $parent === '/') { + $parent = ''; + } + if ($parentId === -1) { + $parentId = $this->cache->getId($parent); + } + + // scan the parent if it's not in the cache (id -1) and the current file is not the root folder + if ($file and $parentId === -1) { + $parentData = $this->scanFile($parent); + $parentId = $parentData['fileid']; + } + if ($parent) { + $data['parent'] = $parentId; + } + if (is_null($cacheData)) { + /** @var CacheEntry $cacheData */ + $cacheData = $this->cache->get($file); + } + if ($cacheData and $reuseExisting and isset($cacheData['fileid'])) { + // prevent empty etag + if (empty($cacheData['etag'])) { + $etag = $data['etag']; + } else { + $etag = $cacheData['etag']; + } + $fileId = $cacheData['fileid']; + $data['fileid'] = $fileId; + // only reuse data if the file hasn't explicitly changed + if (isset($data['storage_mtime']) && isset($cacheData['storage_mtime']) && $data['storage_mtime'] === $cacheData['storage_mtime']) { + $data['mtime'] = $cacheData['mtime']; + if (($reuseExisting & self::REUSE_SIZE) && ($data['size'] === -1)) { + $data['size'] = $cacheData['size']; + } + if ($reuseExisting & self::REUSE_ETAG) { + $data['etag'] = $etag; + } + } + // Only update metadata that has changed + $newData = array_diff_assoc($data, $cacheData->getData()); + } else { + $newData = $data; + $fileId = -1; + } + if (!empty($newData)) { + // Reset the checksum if the data has changed + $newData['checksum'] = ''; + $data['fileid'] = $this->addToCache($file, $newData, $fileId); + } + if (isset($cacheData['size'])) { + $data['oldSize'] = $cacheData['size']; + } else { + $data['oldSize'] = 0; + } + + if (isset($cacheData['encrypted'])) { + $data['encrypted'] = $cacheData['encrypted']; + } + + // post-emit only if it was a file. By that we avoid counting/treating folders as files + if ($data['mimetype'] !== 'httpd/unix-directory') { + $this->emit('\OC\Files\Cache\Scanner', 'postScanFile', array($file, $this->storageId)); + \OC_Hook::emit('\OC\Files\Cache\Scanner', 'post_scan_file', array('path' => $file, 'storage' => $this->storageId)); + } + + } else { + $this->removeFromCache($file); + } + + //release the acquired lock + if ($lock) { + if ($this->storage->instanceOfStorage('\OCP\Files\Storage\ILockingStorage')) { + $this->storage->releaseLock($file, ILockingProvider::LOCK_SHARED, $this->lockingProvider); + } + } + + if ($data && !isset($data['encrypted'])) { + $data['encrypted'] = false; + } + return $data; + } + + return null; + } + + protected function removeFromCache($path) { + \OC_Hook::emit('Scanner', 'removeFromCache', array('file' => $path)); + $this->emit('\OC\Files\Cache\Scanner', 'removeFromCache', array($path)); + if ($this->cacheActive) { + $this->cache->remove($path); + } + } + + /** + * @param string $path + * @param array $data + * @param int $fileId + * @return int the id of the added file + */ + protected function addToCache($path, $data, $fileId = -1) { + \OC_Hook::emit('Scanner', 'addToCache', array('file' => $path, 'data' => $data)); + $this->emit('\OC\Files\Cache\Scanner', 'addToCache', array($path, $this->storageId, $data)); + if ($this->cacheActive) { + if ($fileId !== -1) { + $this->cache->update($fileId, $data); + return $fileId; + } else { + return $this->cache->put($path, $data); + } + } else { + return -1; + } + } + + /** + * @param string $path + * @param array $data + * @param int $fileId + */ + protected function updateCache($path, $data, $fileId = -1) { + \OC_Hook::emit('Scanner', 'addToCache', array('file' => $path, 'data' => $data)); + $this->emit('\OC\Files\Cache\Scanner', 'updateCache', array($path, $this->storageId, $data)); + if ($this->cacheActive) { + if ($fileId !== -1) { + $this->cache->update($fileId, $data); + } else { + $this->cache->put($path, $data); + } + } + } + + /** + * scan a folder and all it's children + * + * @param string $path + * @param bool $recursive + * @param int $reuse + * @param bool $lock set to false to disable getting an additional read lock during scanning + * @return array an array of the meta data of the scanned file or folder + */ + public function scan($path, $recursive = self::SCAN_RECURSIVE, $reuse = -1, $lock = true) { + if ($reuse === -1) { + $reuse = ($recursive === self::SCAN_SHALLOW) ? self::REUSE_ETAG | self::REUSE_SIZE : self::REUSE_ETAG; + } + if ($lock) { + if ($this->storage->instanceOfStorage('\OCP\Files\Storage\ILockingStorage')) { + $this->storage->acquireLock($path, ILockingProvider::LOCK_SHARED, $this->lockingProvider); + } + } + $data = $this->scanFile($path, $reuse, -1, null, $lock); + if ($data and $data['mimetype'] === 'httpd/unix-directory') { + $size = $this->scanChildren($path, $recursive, $reuse, $data, $lock); + $data['size'] = $size; + } + if ($lock) { + if ($this->storage->instanceOfStorage('\OCP\Files\Storage\ILockingStorage')) { + $this->storage->releaseLock($path, ILockingProvider::LOCK_SHARED, $this->lockingProvider); + } + } + return $data; + } + + /** + * Get the children currently in the cache + * + * @param int $folderId + * @return array[] + */ + protected function getExistingChildren($folderId) { + $existingChildren = array(); + $children = $this->cache->getFolderContentsById($folderId); + foreach ($children as $child) { + $existingChildren[$child['name']] = $child; + } + return $existingChildren; + } + + /** + * Get the children from the storage + * + * @param string $folder + * @return string[] + */ + protected function getNewChildren($folder) { + $children = array(); + if ($dh = $this->storage->opendir($folder)) { + if (is_resource($dh)) { + while (($file = readdir($dh)) !== false) { + if (!Filesystem::isIgnoredDir($file)) { + $children[] = $file; + } + } + } + } + return $children; + } + + /** + * scan all the files and folders in a folder + * + * @param string $path + * @param bool $recursive + * @param int $reuse + * @param array $folderData existing cache data for the folder to be scanned + * @param bool $lock set to false to disable getting an additional read lock during scanning + * @return int the size of the scanned folder or -1 if the size is unknown at this stage + */ + protected function scanChildren($path, $recursive = self::SCAN_RECURSIVE, $reuse = -1, $folderData = null, $lock = true) { + if ($reuse === -1) { + $reuse = ($recursive === self::SCAN_SHALLOW) ? self::REUSE_ETAG | self::REUSE_SIZE : self::REUSE_ETAG; + } + $this->emit('\OC\Files\Cache\Scanner', 'scanFolder', array($path, $this->storageId)); + $size = 0; + $childQueue = array(); + if (is_array($folderData) and isset($folderData['fileid'])) { + $folderId = $folderData['fileid']; + } else { + $folderId = $this->cache->getId($path); + } + $existingChildren = $this->getExistingChildren($folderId); + $newChildren = $this->getNewChildren($path); + + if ($this->useTransactions) { + \OC::$server->getDatabaseConnection()->beginTransaction(); + } + $exceptionOccurred = false; + foreach ($newChildren as $file) { + $child = ($path) ? $path . '/' . $file : $file; + try { + $existingData = isset($existingChildren[$file]) ? $existingChildren[$file] : null; + $data = $this->scanFile($child, $reuse, $folderId, $existingData, $lock); + if ($data) { + if ($data['mimetype'] === 'httpd/unix-directory' and $recursive === self::SCAN_RECURSIVE) { + $childQueue[$child] = $data; + } else if ($data['size'] === -1) { + $size = -1; + } else if ($size !== -1) { + $size += $data['size']; + } + } + } catch (\Doctrine\DBAL\DBALException $ex) { + // might happen if inserting duplicate while a scanning + // process is running in parallel + // log and ignore + \OCP\Util::writeLog('core', 'Exception while scanning file "' . $child . '": ' . $ex->getMessage(), \OCP\Util::DEBUG); + $exceptionOccurred = true; + } catch (\OCP\Lock\LockedException $e) { + if ($this->useTransactions) { + \OC::$server->getDatabaseConnection()->rollback(); + } + throw $e; + } + } + $removedChildren = \array_diff(array_keys($existingChildren), $newChildren); + foreach ($removedChildren as $childName) { + $child = ($path) ? $path . '/' . $childName : $childName; + $this->removeFromCache($child); + } + if ($this->useTransactions) { + \OC::$server->getDatabaseConnection()->commit(); + } + if ($exceptionOccurred) { + // It might happen that the parallel scan process has already + // inserted mimetypes but those weren't available yet inside the transaction + // To make sure to have the updated mime types in such cases, + // we reload them here + \OC::$server->getMimeTypeLoader()->reset(); + } + + foreach ($childQueue as $child => $childData) { + $childSize = $this->scanChildren($child, self::SCAN_RECURSIVE, $reuse, $childData, $lock); + if ($childSize === -1) { + $size = -1; + } else if ($size !== -1) { + $size += $childSize; + } + } + if (!is_array($folderData) or !isset($folderData['size']) or $folderData['size'] !== $size) { + $this->updateCache($path, array('size' => $size), $folderId); + } + $this->emit('\OC\Files\Cache\Scanner', 'postScanFolder', array($path, $this->storageId)); + return $size; + } + + /** + * check if the file should be ignored when scanning + * NOTE: files with a '.part' extension are ignored as well! + * prevents unfinished put requests to be scanned + * + * @param string $file + * @return boolean + */ + public static function isPartialFile($file) { + if (pathinfo($file, PATHINFO_EXTENSION) === 'part') { + return true; + } + if (strpos($file, '.part/') !== false) { + return true; + } + + return false; + } + + /** + * walk over any folders that are not fully scanned yet and scan them + */ + public function backgroundScan() { + if (!$this->cache->inCache('')) { + $this->runBackgroundScanJob(function () { + $this->scan('', self::SCAN_RECURSIVE, self::REUSE_ETAG); + }, ''); + } else { + $lastPath = null; + while (($path = $this->cache->getIncomplete()) !== false && $path !== $lastPath) { + $this->runBackgroundScanJob(function() use ($path) { + $this->scan($path, self::SCAN_RECURSIVE, self::REUSE_ETAG); + }, $path); + // FIXME: this won't proceed with the next item, needs revamping of getIncomplete() + // to make this possible + $lastPath = $path; + } + } + } + + private function runBackgroundScanJob(callable $callback, $path) { + try { + $callback(); + \OC_Hook::emit('Scanner', 'correctFolderSize', array('path' => $path)); + if ($this->cacheActive) { + $this->cache->correctFolderSize($path); + } + } catch (\OCP\Files\StorageInvalidException $e) { + // skip unavailable storages + } catch (\OCP\Files\StorageNotAvailableException $e) { + // skip unavailable storages + } catch (\OCP\Files\ForbiddenException $e) { + // skip forbidden storages + } catch (\OCP\Lock\LockedException $e) { + // skip unavailable storages + } + } + + /** + * Set whether the cache is affected by scan operations + * + * @param boolean $active The active state of the cache + */ + public function setCacheActive($active) { + $this->cacheActive = $active; + } +} |