You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

scanner.php 9.8KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323
  1. <?php
  2. /**
  3. * Copyright (c) 2012 Robin Appelman <icewind@owncloud.com>
  4. * This file is licensed under the Affero General Public License version 3 or
  5. * later.
  6. * See the COPYING-README file.
  7. */
  8. namespace OC\Files\Cache;
  9. use OC\Files\Filesystem;
  10. use OC\Hooks\BasicEmitter;
  11. use OCP\Config;
  12. /**
  13. * Class Scanner
  14. *
  15. * Hooks available in scope \OC\Files\Cache\Scanner:
  16. * - scanFile(string $path, string $storageId)
  17. * - scanFolder(string $path, string $storageId)
  18. * - postScanFile(string $path, string $storageId)
  19. * - postScanFolder(string $path, string $storageId)
  20. *
  21. * @package OC\Files\Cache
  22. */
  23. class Scanner extends BasicEmitter {
  24. /**
  25. * @var \OC\Files\Storage\Storage $storage
  26. */
  27. protected $storage;
  28. /**
  29. * @var string $storageId
  30. */
  31. protected $storageId;
  32. /**
  33. * @var \OC\Files\Cache\Cache $cache
  34. */
  35. protected $cache;
  36. /**
  37. * @var boolean $cacheActive If true, perform cache operations, if false, do not affect cache
  38. */
  39. protected $cacheActive;
  40. const SCAN_RECURSIVE = true;
  41. const SCAN_SHALLOW = false;
  42. const REUSE_ETAG = 1;
  43. const REUSE_SIZE = 2;
  44. public function __construct(\OC\Files\Storage\Storage $storage) {
  45. $this->storage = $storage;
  46. $this->storageId = $this->storage->getId();
  47. $this->cache = $storage->getCache();
  48. $this->cacheActive = !Config::getSystemValue('filesystem_cache_readonly', false);
  49. }
  50. /**
  51. * get all the metadata of a file or folder
  52. * *
  53. *
  54. * @param string $path
  55. * @return array an array of metadata of the file
  56. */
  57. public function getData($path) {
  58. if (!$this->storage->isReadable($path)) {
  59. //cant read, nothing we can do
  60. \OCP\Util::writeLog('OC\Files\Cache\Scanner', "!!! Path '$path' is not readable !!!", \OCP\Util::DEBUG);
  61. return null;
  62. }
  63. $data = array();
  64. $data['mimetype'] = $this->storage->getMimeType($path);
  65. $data['mtime'] = $this->storage->filemtime($path);
  66. if ($data['mimetype'] == 'httpd/unix-directory') {
  67. $data['size'] = -1; //unknown
  68. } else {
  69. $data['size'] = $this->storage->filesize($path);
  70. }
  71. $data['etag'] = $this->storage->getETag($path);
  72. $data['storage_mtime'] = $data['mtime'];
  73. $data['permissions'] = $this->storage->getPermissions($path);
  74. return $data;
  75. }
  76. /**
  77. * scan a single file and store it in the cache
  78. *
  79. * @param string $file
  80. * @param int $reuseExisting
  81. * @param bool $parentExistsInCache
  82. * @return array an array of metadata of the scanned file
  83. */
  84. public function scanFile($file, $reuseExisting = 0, $parentExistsInCache = false) {
  85. if (!self::isPartialFile($file)
  86. and !Filesystem::isFileBlacklisted($file)
  87. ) {
  88. $this->emit('\OC\Files\Cache\Scanner', 'scanFile', array($file, $this->storageId));
  89. \OC_Hook::emit('\OC\Files\Cache\Scanner', 'scan_file', array('path' => $file, 'storage' => $this->storageId));
  90. $data = $this->getData($file);
  91. if ($data) {
  92. if ($file and !$parentExistsInCache) {
  93. $parent = dirname($file);
  94. if ($parent === '.' or $parent === '/') {
  95. $parent = '';
  96. }
  97. if (!$this->cache->inCache($parent)) {
  98. $this->scanFile($parent);
  99. }
  100. }
  101. $newData = $data;
  102. $cacheData = $this->cache->get($file);
  103. if ($cacheData) {
  104. if ($reuseExisting) {
  105. // prevent empty etag
  106. if (empty($cacheData['etag'])) {
  107. $etag = $data['etag'];
  108. $propagateETagChange = true;
  109. } else {
  110. $etag = $cacheData['etag'];
  111. $propagateETagChange = false;
  112. }
  113. // only reuse data if the file hasn't explicitly changed
  114. if (isset($data['storage_mtime']) && isset($cacheData['storage_mtime']) && $data['storage_mtime'] === $cacheData['storage_mtime']) {
  115. if (($reuseExisting & self::REUSE_SIZE) && ($data['size'] === -1)) {
  116. $data['size'] = $cacheData['size'];
  117. }
  118. if ($reuseExisting & self::REUSE_ETAG) {
  119. $data['etag'] = $etag;
  120. if ($propagateETagChange) {
  121. $parent = $file;
  122. while ($parent !== '') {
  123. $parent = dirname($parent);
  124. if ($parent === '.') {
  125. $parent = '';
  126. }
  127. $parentCacheData = $this->cache->get($parent);
  128. \OC_Hook::emit('Scanner', 'updateCache', array('file' => $file, 'data' => $data));
  129. if($this->cacheActive) {
  130. $this->cache->update($parentCacheData['fileid'], array(
  131. 'etag' => $this->storage->getETag($parent),
  132. ));
  133. }
  134. }
  135. }
  136. }
  137. }
  138. // Only update metadata that has changed
  139. $newData = array_diff_assoc($data, $cacheData);
  140. if (isset($newData['etag'])) {
  141. $cacheDataString = print_r($cacheData, true);
  142. $dataString = print_r($data, true);
  143. \OCP\Util::writeLog('OC\Files\Cache\Scanner',
  144. "!!! No reuse of etag for '$file' !!! \ncache: $cacheDataString \ndata: $dataString",
  145. \OCP\Util::DEBUG);
  146. }
  147. }
  148. }
  149. if (!empty($newData)) {
  150. \OC_Hook::emit('Scanner', 'addToCache', array('file' => $file, 'data' => $newData));
  151. if($this->cacheActive) {
  152. $data['fileid'] = $this->cache->put($file, $newData);
  153. }
  154. $this->emit('\OC\Files\Cache\Scanner', 'postScanFile', array($file, $this->storageId));
  155. \OC_Hook::emit('\OC\Files\Cache\Scanner', 'post_scan_file', array('path' => $file, 'storage' => $this->storageId));
  156. }
  157. } else {
  158. \OC_Hook::emit('Scanner', 'removeFromCache', array('file' => $file));
  159. if($this->cacheActive) {
  160. $this->cache->remove($file);
  161. }
  162. }
  163. return $data;
  164. }
  165. return null;
  166. }
  167. /**
  168. * scan a folder and all it's children
  169. *
  170. * @param string $path
  171. * @param bool $recursive
  172. * @param int $reuse
  173. * @return array an array of the meta data of the scanned file or folder
  174. */
  175. public function scan($path, $recursive = self::SCAN_RECURSIVE, $reuse = -1) {
  176. if ($reuse === -1) {
  177. $reuse = ($recursive === self::SCAN_SHALLOW) ? self::REUSE_ETAG | self::REUSE_SIZE : 0;
  178. }
  179. $data = $this->scanFile($path, $reuse);
  180. $size = $this->scanChildren($path, $recursive, $reuse);
  181. $data['size'] = $size;
  182. return $data;
  183. }
  184. /**
  185. * scan all the files and folders in a folder
  186. *
  187. * @param string $path
  188. * @param bool $recursive
  189. * @param int $reuse
  190. * @return int the size of the scanned folder or -1 if the size is unknown at this stage
  191. */
  192. public function scanChildren($path, $recursive = self::SCAN_RECURSIVE, $reuse = -1) {
  193. if ($reuse === -1) {
  194. $reuse = ($recursive === self::SCAN_SHALLOW) ? self::REUSE_ETAG | self::REUSE_SIZE : 0;
  195. }
  196. $this->emit('\OC\Files\Cache\Scanner', 'scanFolder', array($path, $this->storageId));
  197. $size = 0;
  198. $childQueue = array();
  199. $existingChildren = array();
  200. if ($this->cache->inCache($path)) {
  201. $children = $this->cache->getFolderContents($path);
  202. foreach ($children as $child) {
  203. $existingChildren[] = $child['name'];
  204. }
  205. }
  206. $newChildren = array();
  207. if ($this->storage->is_dir($path) && ($dh = $this->storage->opendir($path))) {
  208. $exceptionOccurred = false;
  209. \OC_DB::beginTransaction();
  210. if (is_resource($dh)) {
  211. while (($file = readdir($dh)) !== false) {
  212. $child = ($path) ? $path . '/' . $file : $file;
  213. if (!Filesystem::isIgnoredDir($file)) {
  214. $newChildren[] = $file;
  215. try {
  216. $data = $this->scanFile($child, $reuse, true);
  217. if ($data) {
  218. if ($data['size'] === -1) {
  219. if ($recursive === self::SCAN_RECURSIVE) {
  220. $childQueue[] = $child;
  221. } else {
  222. $size = -1;
  223. }
  224. } else if ($size !== -1) {
  225. $size += $data['size'];
  226. }
  227. }
  228. }
  229. catch (\Doctrine\DBAL\DBALException $ex){
  230. // might happen if inserting duplicate while a scanning
  231. // process is running in parallel
  232. // log and ignore
  233. \OC_Log::write('core', 'Exception while scanning file "' . $child . '": ' . $ex->getMessage(), \OC_Log::DEBUG);
  234. $exceptionOccurred = true;
  235. }
  236. }
  237. }
  238. }
  239. $removedChildren = \array_diff($existingChildren, $newChildren);
  240. foreach ($removedChildren as $childName) {
  241. $child = ($path) ? $path . '/' . $childName : $childName;
  242. \OC_Hook::emit('Scanner', 'removeFromCache', array('file' => $child));
  243. if($this->cacheActive) {
  244. $this->cache->remove($child);
  245. }
  246. }
  247. \OC_DB::commit();
  248. if ($exceptionOccurred){
  249. // It might happen that the parallel scan process has already
  250. // inserted mimetypes but those weren't available yet inside the transaction
  251. // To make sure to have the updated mime types in such cases,
  252. // we reload them here
  253. $this->cache->loadMimetypes();
  254. }
  255. foreach ($childQueue as $child) {
  256. $childSize = $this->scanChildren($child, self::SCAN_RECURSIVE, $reuse);
  257. if ($childSize === -1) {
  258. $size = -1;
  259. } else {
  260. $size += $childSize;
  261. }
  262. }
  263. $newData = array('size' => $size);
  264. \OC_Hook::emit('Scanner', 'addToCache', array('file' => $child, 'data' => $newData));
  265. if($this->cacheActive) {
  266. $this->cache->put($path, $newData);
  267. }
  268. }
  269. $this->emit('\OC\Files\Cache\Scanner', 'postScanFolder', array($path, $this->storageId));
  270. return $size;
  271. }
  272. /**
  273. * check if the file should be ignored when scanning
  274. * NOTE: files with a '.part' extension are ignored as well!
  275. * prevents unfinished put requests to be scanned
  276. * @param string $file
  277. * @return boolean
  278. */
  279. public static function isPartialFile($file) {
  280. if (pathinfo($file, PATHINFO_EXTENSION) === 'part') {
  281. return true;
  282. }
  283. return false;
  284. }
  285. /**
  286. * walk over any folders that are not fully scanned yet and scan them
  287. */
  288. public function backgroundScan() {
  289. $lastPath = null;
  290. while (($path = $this->cache->getIncomplete()) !== false && $path !== $lastPath) {
  291. $this->scan($path, self::SCAN_RECURSIVE, self::REUSE_ETAG);
  292. \OC_Hook::emit('Scanner', 'correctFolderSize', array('path' => $path));
  293. if($this->cacheActive) {
  294. $this->cache->correctFolderSize($path);
  295. }
  296. $lastPath = $path;
  297. }
  298. }
  299. /**
  300. * Set whether the cache is affected by scan operations
  301. * @param boolean $active The active state of the cache
  302. */
  303. public function setCacheActive($active) {
  304. $this->cacheActive = $active;
  305. }
  306. }