You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

HSLFSlideShowImpl.java 44KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099
  1. /* ====================================================================
  2. Licensed to the Apache Software Foundation (ASF) under one or more
  3. contributor license agreements. See the NOTICE file distributed with
  4. this work for additional information regarding copyright ownership.
  5. The ASF licenses this file to You under the Apache License, Version 2.0
  6. (the "License"); you may not use this file except in compliance with
  7. the License. You may obtain a copy of the License at
  8. http://www.apache.org/licenses/LICENSE-2.0
  9. Unless required by applicable law or agreed to in writing, software
  10. distributed under the License is distributed on an "AS IS" BASIS,
  11. WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. See the License for the specific language governing permissions and
  13. limitations under the License.
  14. ==================================================================== */
  15. package org.apache.poi.hslf.usermodel;
  16. import static org.apache.logging.log4j.util.Unbox.box;
  17. import static org.apache.poi.hslf.usermodel.HSLFSlideShow.POWERPOINT_DOCUMENT;
  18. import static org.apache.poi.hslf.usermodel.HSLFSlideShow.PP95_DOCUMENT;
  19. import static org.apache.poi.hslf.usermodel.HSLFSlideShow.PP97_DOCUMENT;
  20. import java.io.ByteArrayInputStream;
  21. import java.io.ByteArrayOutputStream;
  22. import java.io.Closeable;
  23. import java.io.File;
  24. import java.io.IOException;
  25. import java.io.InputStream;
  26. import java.io.OutputStream;
  27. import java.util.ArrayList;
  28. import java.util.Arrays;
  29. import java.util.Collection;
  30. import java.util.Collections;
  31. import java.util.Comparator;
  32. import java.util.HashMap;
  33. import java.util.Iterator;
  34. import java.util.LinkedList;
  35. import java.util.List;
  36. import java.util.Map;
  37. import java.util.NavigableMap;
  38. import java.util.Objects;
  39. import java.util.TreeMap;
  40. import java.util.stream.Collectors;
  41. import org.apache.logging.log4j.LogManager;
  42. import org.apache.logging.log4j.Logger;
  43. import org.apache.poi.POIDocument;
  44. import org.apache.poi.ddf.EscherBSERecord;
  45. import org.apache.poi.ddf.EscherContainerRecord;
  46. import org.apache.poi.ddf.EscherOptRecord;
  47. import org.apache.poi.ddf.EscherRecord;
  48. import org.apache.poi.hpsf.PropertySet;
  49. import org.apache.poi.hslf.exceptions.CorruptPowerPointFileException;
  50. import org.apache.poi.hslf.exceptions.HSLFException;
  51. import org.apache.poi.hslf.exceptions.OldPowerPointFormatException;
  52. import org.apache.poi.hslf.record.CurrentUserAtom;
  53. import org.apache.poi.hslf.record.Document;
  54. import org.apache.poi.hslf.record.DocumentEncryptionAtom;
  55. import org.apache.poi.hslf.record.ExOleObjStg;
  56. import org.apache.poi.hslf.record.PersistPtrHolder;
  57. import org.apache.poi.hslf.record.PersistRecord;
  58. import org.apache.poi.hslf.record.PositionDependentRecord;
  59. import org.apache.poi.hslf.record.Record;
  60. import org.apache.poi.hslf.record.RecordTypes;
  61. import org.apache.poi.hslf.record.UserEditAtom;
  62. import org.apache.poi.poifs.crypt.EncryptionInfo;
  63. import org.apache.poi.poifs.filesystem.DirectoryNode;
  64. import org.apache.poi.poifs.filesystem.DocumentEntry;
  65. import org.apache.poi.poifs.filesystem.DocumentInputStream;
  66. import org.apache.poi.poifs.filesystem.EntryUtils;
  67. import org.apache.poi.poifs.filesystem.POIFSFileSystem;
  68. import org.apache.poi.sl.usermodel.PictureData;
  69. import org.apache.poi.sl.usermodel.PictureData.PictureType;
  70. import org.apache.poi.util.IOUtils;
  71. import org.apache.poi.util.LittleEndian;
  72. import org.apache.poi.util.LittleEndianConsts;
  73. /**
  74. * This class contains the main functionality for the Powerpoint file
  75. * "reader". It is only a very basic class for now
  76. */
  77. public final class HSLFSlideShowImpl extends POIDocument implements Closeable {
  78. private static final Logger LOG = LogManager.getLogger(HSLFSlideShowImpl.class);
  79. static final int UNSET_OFFSET = -1;
  80. //arbitrarily selected; may need to increase
  81. private static final int MAX_RECORD_LENGTH = 200_000_000;
  82. // Holds metadata on where things are in our document
  83. private CurrentUserAtom currentUser;
  84. // Low level contents of the file
  85. private byte[] _docstream;
  86. // Low level contents
  87. private Record[] _records;
  88. // Raw Pictures contained in the pictures stream
  89. private List<HSLFPictureData> _pictures;
  90. // Embedded objects stored in storage records in the document stream, lazily populated.
  91. private HSLFObjectData[] _objects;
  92. /**
  93. * Constructs a Powerpoint document from fileName. Parses the document
  94. * and places all the important stuff into data structures.
  95. *
  96. * @param fileName The name of the file to read.
  97. * @throws IOException if there is a problem while parsing the document.
  98. */
  99. @SuppressWarnings("resource")
  100. public HSLFSlideShowImpl(String fileName) throws IOException {
  101. this(new POIFSFileSystem(new File(fileName)));
  102. }
  103. /**
  104. * Constructs a Powerpoint document from an input stream. Parses the
  105. * document and places all the important stuff into data structures.
  106. *
  107. * @param inputStream the source of the data
  108. * @throws IOException if there is a problem while parsing the document.
  109. */
  110. @SuppressWarnings("resource")
  111. public HSLFSlideShowImpl(InputStream inputStream) throws IOException {
  112. //do Ole stuff
  113. this(new POIFSFileSystem(inputStream));
  114. }
  115. /**
  116. * Constructs a Powerpoint document from a POIFS Filesystem. Parses the
  117. * document and places all the important stuff into data structures.
  118. *
  119. * @param filesystem the POIFS FileSystem to read from
  120. * @throws IOException if there is a problem while parsing the document.
  121. */
  122. public HSLFSlideShowImpl(POIFSFileSystem filesystem) throws IOException {
  123. this(filesystem.getRoot());
  124. }
  125. /**
  126. * Constructs a Powerpoint document from a specific point in a
  127. * POIFS Filesystem. Parses the document and places all the
  128. * important stuff into data structures.
  129. *
  130. * @param dir the POIFS directory to read from
  131. * @throws IOException if there is a problem while parsing the document.
  132. */
  133. public HSLFSlideShowImpl(DirectoryNode dir) throws IOException {
  134. super(handleDualStorage(dir));
  135. try {
  136. // First up, grab the "Current User" stream
  137. // We need this before we can detect Encrypted Documents
  138. readCurrentUserStream();
  139. // Next up, grab the data that makes up the
  140. // PowerPoint stream
  141. readPowerPointStream();
  142. // Now, build records based on the PowerPoint stream
  143. buildRecords();
  144. // Look for any other streams
  145. readOtherStreams();
  146. } catch (RuntimeException | IOException e) {
  147. // clean up the filesystem when we cannot read it here to avoid
  148. // leaking file handles
  149. dir.getFileSystem().close();
  150. throw e;
  151. }
  152. }
  153. private static DirectoryNode handleDualStorage(DirectoryNode dir) throws IOException {
  154. // when there's a dual storage entry, use it, as the outer document can't be read quite probably ...
  155. if (!dir.hasEntry(PP97_DOCUMENT)) {
  156. return dir;
  157. }
  158. return (DirectoryNode) dir.getEntry(PP97_DOCUMENT);
  159. }
  160. /**
  161. * Constructs a new, empty, Powerpoint document.
  162. */
  163. public static HSLFSlideShowImpl create() {
  164. try (InputStream is = HSLFSlideShowImpl.class.getResourceAsStream("/org/apache/poi/hslf/data/empty.ppt")) {
  165. if (is == null) {
  166. throw new HSLFException("Missing resource 'empty.ppt'");
  167. }
  168. return new HSLFSlideShowImpl(is);
  169. } catch (IOException e) {
  170. throw new HSLFException(e);
  171. }
  172. }
  173. /**
  174. * Extracts the main PowerPoint document stream from the
  175. * POI file, ready to be passed
  176. *
  177. * @throws IOException when the powerpoint can't be read
  178. */
  179. private void readPowerPointStream() throws IOException {
  180. final DirectoryNode dir = getDirectory();
  181. if (!dir.hasEntry(POWERPOINT_DOCUMENT) && dir.hasEntry(PP95_DOCUMENT)) {
  182. throw new OldPowerPointFormatException("You seem to have supplied a PowerPoint95 file, which isn't supported");
  183. }
  184. // Get the main document stream
  185. DocumentEntry docProps = (DocumentEntry)dir.getEntry(POWERPOINT_DOCUMENT);
  186. // Grab the document stream
  187. int len = docProps.getSize();
  188. try (InputStream is = dir.createDocumentInputStream(docProps)) {
  189. _docstream = IOUtils.toByteArray(is, len);
  190. }
  191. }
  192. /**
  193. * Builds the list of records, based on the contents
  194. * of the PowerPoint stream
  195. */
  196. private void buildRecords() throws IOException {
  197. // The format of records in a powerpoint file are:
  198. // <little endian 2 byte "info">
  199. // <little endian 2 byte "type">
  200. // <little endian 4 byte "length">
  201. // If it has a zero length, following it will be another record
  202. // <xx xx yy yy 00 00 00 00> <xx xx yy yy zz zz zz zz>
  203. // If it has a length, depending on its type it may have children or data
  204. // If it has children, these will follow straight away
  205. // <xx xx yy yy zz zz zz zz <xx xx yy yy zz zz zz zz>>
  206. // If it has data, this will come straigh after, and run for the length
  207. // <xx xx yy yy zz zz zz zz dd dd dd dd dd dd dd>
  208. // All lengths given exclude the 8 byte record header
  209. // (Data records are known as Atoms)
  210. // Document should start with:
  211. // 0F 00 E8 03 ## ## ## ##
  212. // (type 1000 = document, info 00 0f is normal, rest is document length)
  213. // 01 00 E9 03 28 00 00 00
  214. // (type 1001 = document atom, info 00 01 normal, 28 bytes long)
  215. // 80 16 00 00 E0 10 00 00 xx xx xx xx xx xx xx xx
  216. // 05 00 00 00 0A 00 00 00 xx xx xx
  217. // (the contents of the document atom, not sure what it means yet)
  218. // (records then follow)
  219. // When parsing a document, look to see if you know about that type
  220. // of the current record. If you know it's a type that has children,
  221. // process the record's data area looking for more records
  222. // If you know about the type and it doesn't have children, either do
  223. // something with the data (eg TextRun) or skip over it
  224. // If you don't know about the type, play safe and skip over it (using
  225. // its length to know where the next record will start)
  226. //
  227. _records = read(_docstream, (int) currentUser.getCurrentEditOffset());
  228. }
  229. private Record[] read(byte[] docstream, int usrOffset) throws IOException {
  230. //sort found records by offset.
  231. //(it is not necessary but SlideShow.findMostRecentCoreRecords() expects them sorted)
  232. NavigableMap<Integer, Record> records = new TreeMap<>(); // offset -> record
  233. Map<Integer, Integer> persistIds = new HashMap<>(); // offset -> persistId
  234. initRecordOffsets(docstream, usrOffset, records, persistIds);
  235. HSLFSlideShowEncrypted decryptData = new HSLFSlideShowEncrypted(docstream, records);
  236. for (Map.Entry<Integer, Record> entry : records.entrySet()) {
  237. Integer offset = entry.getKey();
  238. Record record = entry.getValue();
  239. Integer persistId = persistIds.get(offset);
  240. if (record == null) {
  241. // all plain records have been already added,
  242. // only new records need to be decrypted (tbd #35897)
  243. decryptData.decryptRecord(docstream, persistId, offset);
  244. record = Record.buildRecordAtOffset(docstream, offset);
  245. entry.setValue(record);
  246. }
  247. if (record instanceof PersistRecord) {
  248. ((PersistRecord) record).setPersistId(persistId);
  249. }
  250. }
  251. decryptData.close();
  252. return records.values().toArray(new Record[0]);
  253. }
  254. private void initRecordOffsets(byte[] docstream, int usrOffset, NavigableMap<Integer, Record> recordMap, Map<Integer, Integer> offset2id) {
  255. while (usrOffset != 0) {
  256. UserEditAtom usr = (UserEditAtom) Record.buildRecordAtOffset(docstream, usrOffset);
  257. recordMap.put(usrOffset, usr);
  258. int psrOffset = usr.getPersistPointersOffset();
  259. PersistPtrHolder ptr = (PersistPtrHolder) Record.buildRecordAtOffset(docstream, psrOffset);
  260. recordMap.put(psrOffset, ptr);
  261. for (Map.Entry<Integer, Integer> entry : ptr.getSlideLocationsLookup().entrySet()) {
  262. Integer offset = entry.getValue();
  263. Integer id = entry.getKey();
  264. recordMap.put(offset, null); // reserve a slot for the record
  265. offset2id.put(offset, id);
  266. }
  267. usrOffset = usr.getLastUserEditAtomOffset();
  268. // check for corrupted user edit atom and try to repair it
  269. // if the next user edit atom offset is already known, we would go into an endless loop
  270. if (usrOffset > 0 && recordMap.containsKey(usrOffset)) {
  271. // a user edit atom is usually located 36 byte before the smallest known record offset
  272. usrOffset = recordMap.firstKey() - 36;
  273. // check that we really are located on a user edit atom
  274. int ver_inst = LittleEndian.getUShort(docstream, usrOffset);
  275. int type = LittleEndian.getUShort(docstream, usrOffset + 2);
  276. int len = LittleEndian.getInt(docstream, usrOffset + 4);
  277. if (ver_inst == 0 && type == 4085 && (len == 0x1C || len == 0x20)) {
  278. LOG.atWarn().log("Repairing invalid user edit atom");
  279. usr.setLastUserEditAtomOffset(usrOffset);
  280. } else {
  281. throw new CorruptPowerPointFileException("Powerpoint document contains invalid user edit atom");
  282. }
  283. }
  284. }
  285. }
  286. public DocumentEncryptionAtom getDocumentEncryptionAtom() {
  287. for (Record r : _records) {
  288. if (r instanceof DocumentEncryptionAtom) {
  289. return (DocumentEncryptionAtom) r;
  290. }
  291. }
  292. return null;
  293. }
  294. /**
  295. * Find the "Current User" stream, and load it
  296. */
  297. private void readCurrentUserStream() {
  298. try {
  299. currentUser = new CurrentUserAtom(getDirectory());
  300. } catch (IOException ie) {
  301. LOG.atError().withThrowable(ie).log("Error finding Current User Atom");
  302. currentUser = new CurrentUserAtom();
  303. }
  304. }
  305. /**
  306. * Find any other streams from the filesystem, and load them
  307. */
  308. private void readOtherStreams() {
  309. // Currently, there aren't any
  310. }
  311. /**
  312. * Find and read in pictures contained in this presentation.
  313. * This is lazily called as and when we want to touch pictures.
  314. */
  315. private void readPictures() throws IOException {
  316. // if the presentation doesn't contain pictures, will use an empty collection instead
  317. if (!getDirectory().hasEntry("Pictures")) {
  318. _pictures = new ArrayList<>();
  319. return;
  320. }
  321. DocumentEntry entry = (DocumentEntry) getDirectory().getEntry("Pictures");
  322. EscherContainerRecord blipStore = getBlipStore();
  323. byte[] pictstream;
  324. try (DocumentInputStream is = getDirectory().createDocumentInputStream(entry)) {
  325. pictstream = IOUtils.toByteArray(is, entry.getSize());
  326. }
  327. List<PictureFactory> factories = new ArrayList<>();
  328. try (HSLFSlideShowEncrypted decryptData = new HSLFSlideShowEncrypted(getDocumentEncryptionAtom())) {
  329. int pos = 0;
  330. // An empty picture record (length 0) will take up 8 bytes
  331. while (pos <= (pictstream.length - HSLFPictureData.PREAMBLE_SIZE)) {
  332. int offset = pos;
  333. decryptData.decryptPicture(pictstream, offset);
  334. // Image signature
  335. int signature = LittleEndian.getUShort(pictstream, pos);
  336. pos += LittleEndianConsts.SHORT_SIZE;
  337. // Image type + 0xF018
  338. int type = LittleEndian.getUShort(pictstream, pos);
  339. pos += LittleEndianConsts.SHORT_SIZE;
  340. // Image size (excluding the 8 byte header)
  341. int imgsize = LittleEndian.getInt(pictstream, pos);
  342. pos += LittleEndianConsts.INT_SIZE;
  343. // When parsing the BStoreDelay stream, [MS-ODRAW] says that we
  344. // should terminate if the type isn't 0xf007 or 0xf018->0xf117
  345. if (!((type == 0xf007) || (type >= 0xf018 && type <= 0xf117))) {
  346. break;
  347. }
  348. // The image size must be 0 or greater
  349. // (0 is allowed, but odd, since we do wind on by the header each
  350. // time, so we won't get stuck)
  351. if (imgsize < 0) {
  352. throw new CorruptPowerPointFileException("The file contains a picture, at position " + factories.size() + ", which has a negatively sized data length, so we can't trust any of the picture data");
  353. }
  354. // If the type (including the bonus 0xF018) is 0, skip it
  355. PictureType pt = PictureType.forNativeID(type - 0xF018);
  356. if (pt == null) {
  357. LOG.atError().log("Problem reading picture: Invalid image type 0, on picture with length {}.\nYour document will probably become corrupted if you save it! Position: {}", box(imgsize),box(pos));
  358. } else {
  359. //The pictstream can be truncated halfway through a picture.
  360. //This is not a problem if the pictstream contains extra pictures
  361. //that are not used in any slide -- BUG-60305
  362. if (pos + imgsize > pictstream.length) {
  363. LOG.atWarn().log("\"Pictures\" stream may have ended early. In some circumstances, this is not a problem; " +
  364. "in others, this could indicate a corrupt file");
  365. break;
  366. }
  367. // Copy the data, ready to pass to PictureData
  368. byte[] imgdata = IOUtils.safelyClone(pictstream, pos, imgsize, MAX_RECORD_LENGTH);
  369. factories.add(new PictureFactory(blipStore, pt, imgdata, offset, signature));
  370. }
  371. pos += imgsize;
  372. }
  373. }
  374. matchPicturesAndRecords(factories, blipStore);
  375. List<HSLFPictureData> pictures = new ArrayList<>();
  376. for (PictureFactory it : factories) {
  377. try {
  378. HSLFPictureData pict = it.build();
  379. pict.setIndex(pictures.size() + 1); // index is 1-based
  380. pictures.add(pict);
  381. } catch (IllegalArgumentException e) {
  382. LOG.atError().withThrowable(e).log("Problem reading picture. Your document will probably become corrupted if you save it!");
  383. }
  384. }
  385. _pictures = pictures;
  386. }
  387. /**
  388. * Matches all of the {@link PictureFactory PictureFactories} for a slideshow with {@link EscherBSERecord}s in the
  389. * Blip Store for the slideshow.
  390. * <p>
  391. * When reading a slideshow into memory, we have to match the records in the Blip Store with the factories
  392. * representing picture in the pictures stream. This can be difficult, as presentations might have incorrectly
  393. * formatted data. This function attempts to perform matching using multiple heuristics to increase the likelihood
  394. * of finding all pairs, while aiming to reduce the likelihood of associating incorrect pairs.
  395. *
  396. * @param factories Factories for creating {@link HSLFPictureData} out of the pictures stream.
  397. * @param blipStore Blip Store of the presentation being loaded.
  398. */
  399. private static void matchPicturesAndRecords(List<PictureFactory> factories, EscherContainerRecord blipStore) {
  400. // LinkedList because we're sorting and removing.
  401. LinkedList<PictureFactory> unmatchedFactories = new LinkedList<>(factories);
  402. unmatchedFactories.sort(Comparator.comparingInt(PictureFactory::getOffset));
  403. // Arrange records by offset. In the common case of a well-formed slideshow, where every factory has a
  404. // matching record, this is somewhat wasteful, but is necessary to handle the uncommon case where multiple
  405. // records share an offset.
  406. Map<Integer, List<EscherBSERecord>> unmatchedRecords = new HashMap<>();
  407. for (EscherRecord child : blipStore) {
  408. EscherBSERecord record = (EscherBSERecord) child;
  409. unmatchedRecords.computeIfAbsent(record.getOffset(), k -> new ArrayList<>()).add(record);
  410. }
  411. // The first pass through the factories only pairs a factory with a record if we're very confident that they
  412. // are a match. Confidence comes from a perfect match on the offset, and if necessary, the UID. Matched
  413. // factories and records are removed from the unmatched collections.
  414. for (Iterator<PictureFactory> iterator = unmatchedFactories.iterator(); iterator.hasNext(); ) {
  415. PictureFactory factory = iterator.next();
  416. int physicalOffset = factory.getOffset();
  417. List<EscherBSERecord> recordsAtOffset = unmatchedRecords.get(physicalOffset);
  418. if (recordsAtOffset == null || recordsAtOffset.isEmpty()) {
  419. // There are no records that have an offset matching the physical offset in the stream. We'll do
  420. // more complicated and less reliable matching for this factory after all "well known"
  421. // image <-> record pairs have been found.
  422. LOG.atDebug().log("No records with offset {}", box(physicalOffset));
  423. } else if (recordsAtOffset.size() == 1) {
  424. // Only 1 record has the same offset as the target image. Assume these are a pair.
  425. factory.setRecord(recordsAtOffset.get(0));
  426. unmatchedRecords.remove(physicalOffset);
  427. iterator.remove();
  428. } else {
  429. // Multiple records share an offset. Perform additional matching based on UID.
  430. for (int i = 0; i < recordsAtOffset.size(); i++) {
  431. EscherBSERecord record = recordsAtOffset.get(i);
  432. byte[] recordUid = record.getUid();
  433. byte[] imageHeader = Arrays.copyOf(factory.imageData, HSLFPictureData.CHECKSUM_SIZE);
  434. if (Arrays.equals(recordUid, imageHeader)) {
  435. factory.setRecord(record);
  436. recordsAtOffset.remove(i);
  437. iterator.remove();
  438. break;
  439. }
  440. }
  441. }
  442. }
  443. // At this point, any factories remaining didn't have a record with a matching offset. The second pass
  444. // through the factories pairs based on the UID. Factories for which a record with a matching UID cannot be
  445. // found will get a new record.
  446. List<EscherBSERecord> remainingRecords = unmatchedRecords.values()
  447. .stream()
  448. .flatMap(Collection::stream)
  449. .collect(Collectors.toList());
  450. for (PictureFactory factory : unmatchedFactories) {
  451. boolean matched = false;
  452. for (int i = remainingRecords.size() - 1; i >= 0; i--) {
  453. EscherBSERecord record = remainingRecords.get(i);
  454. byte[] recordUid = record.getUid();
  455. byte[] imageHeader = Arrays.copyOf(factory.imageData, HSLFPictureData.CHECKSUM_SIZE);
  456. if (Arrays.equals(recordUid, imageHeader)) {
  457. remainingRecords.remove(i);
  458. factory.setRecord(record);
  459. record.setOffset(factory.getOffset());
  460. matched = true;
  461. }
  462. }
  463. if (!matched) {
  464. // Synthesize a new record
  465. LOG.atDebug().log("No record found for picture at offset {}", box(factory.offset));
  466. EscherBSERecord record = HSLFSlideShow.addNewEscherBseRecord(blipStore, factory.type, factory.imageData, factory.offset);
  467. factory.setRecord(record);
  468. }
  469. }
  470. LOG.atDebug().log("Found {} unmatched records.", box(remainingRecords.size()));
  471. }
  472. /**
  473. * remove duplicated UserEditAtoms and merge PersistPtrHolder, i.e.
  474. * remove document edit history
  475. */
  476. public void normalizeRecords() {
  477. try {
  478. updateAndWriteDependantRecords(null, null);
  479. } catch (IOException e) {
  480. throw new CorruptPowerPointFileException(e);
  481. }
  482. _records = HSLFSlideShowEncrypted.normalizeRecords(_records);
  483. }
  484. /**
  485. * This is a helper functions, which is needed for adding new position dependent records
  486. * or finally write the slideshow to a file.
  487. *
  488. * @param os the stream to write to, if null only the references are updated
  489. * @param interestingRecords a map of interesting records (PersistPtrHolder and UserEditAtom)
  490. * referenced by their RecordType. Only the very last of each type will be saved to the map.
  491. * May be null, if not needed.
  492. */
  493. @SuppressWarnings("WeakerAccess")
  494. public void updateAndWriteDependantRecords(OutputStream os, Map<RecordTypes, PositionDependentRecord> interestingRecords)
  495. throws IOException {
  496. // For position dependent records, hold where they were and now are
  497. // As we go along, update, and hand over, to any Position Dependent
  498. // records we happen across
  499. Map<Integer, Integer> oldToNewPositions = new HashMap<>();
  500. // First pass - figure out where all the position dependent
  501. // records are going to end up, in the new scheme
  502. // (Annoyingly, some powerpoint files have PersistPtrHolders
  503. // that reference slides after the PersistPtrHolder)
  504. UserEditAtom usr = null;
  505. PersistPtrHolder ptr = null;
  506. CountingOS cos = new CountingOS();
  507. for (Record record : _records) {
  508. // all top level records are position dependent
  509. assert (record instanceof PositionDependentRecord);
  510. PositionDependentRecord pdr = (PositionDependentRecord) record;
  511. int oldPos = pdr.getLastOnDiskOffset();
  512. int newPos = cos.size();
  513. pdr.setLastOnDiskOffset(newPos);
  514. if (oldPos != UNSET_OFFSET) {
  515. // new records don't need a mapping, as they aren't in a relation yet
  516. oldToNewPositions.put(oldPos, newPos);
  517. }
  518. // Grab interesting records as they come past
  519. // this will only save the very last record of each type
  520. RecordTypes saveme = null;
  521. int recordType = (int) record.getRecordType();
  522. if (recordType == RecordTypes.PersistPtrIncrementalBlock.typeID) {
  523. saveme = RecordTypes.PersistPtrIncrementalBlock;
  524. ptr = (PersistPtrHolder) pdr;
  525. } else if (recordType == RecordTypes.UserEditAtom.typeID) {
  526. saveme = RecordTypes.UserEditAtom;
  527. usr = (UserEditAtom) pdr;
  528. }
  529. if (interestingRecords != null && saveme != null) {
  530. interestingRecords.put(saveme, pdr);
  531. }
  532. // Dummy write out, so the position winds on properly
  533. record.writeOut(cos);
  534. }
  535. cos.close();
  536. if (usr == null || ptr == null) {
  537. throw new HSLFException("UserEditAtom or PersistPtr can't be determined.");
  538. }
  539. Map<Integer, Integer> persistIds = new HashMap<>();
  540. for (Map.Entry<Integer, Integer> entry : ptr.getSlideLocationsLookup().entrySet()) {
  541. persistIds.put(oldToNewPositions.get(entry.getValue()), entry.getKey());
  542. }
  543. try (HSLFSlideShowEncrypted encData = new HSLFSlideShowEncrypted(getDocumentEncryptionAtom())) {
  544. for (Record record : _records) {
  545. assert (record instanceof PositionDependentRecord);
  546. // We've already figured out their new location, and
  547. // told them that
  548. // Tell them of the positions of the other records though
  549. PositionDependentRecord pdr = (PositionDependentRecord) record;
  550. Integer persistId = persistIds.get(pdr.getLastOnDiskOffset());
  551. if (persistId == null) {
  552. persistId = 0;
  553. }
  554. // For now, we're only handling PositionDependentRecord's that
  555. // happen at the top level.
  556. // In future, we'll need the handle them everywhere, but that's
  557. // a bit trickier
  558. pdr.updateOtherRecordReferences(oldToNewPositions);
  559. // Whatever happens, write out that record tree
  560. if (os != null) {
  561. record.writeOut(encData.encryptRecord(os, persistId, record));
  562. }
  563. }
  564. }
  565. // Update and write out the Current User atom
  566. int oldLastUserEditAtomPos = (int) currentUser.getCurrentEditOffset();
  567. Integer newLastUserEditAtomPos = oldToNewPositions.get(oldLastUserEditAtomPos);
  568. if (newLastUserEditAtomPos == null || usr.getLastOnDiskOffset() != newLastUserEditAtomPos) {
  569. throw new HSLFException("Couldn't find the new location of the last UserEditAtom that used to be at " + oldLastUserEditAtomPos);
  570. }
  571. currentUser.setCurrentEditOffset(usr.getLastOnDiskOffset());
  572. }
  573. /**
  574. * Writes out the slideshow to the currently open file.
  575. * <p>
  576. * <p>This will fail (with an {@link IllegalStateException} if the
  577. * slideshow was opened read-only, opened from an {@link InputStream}
  578. * instead of a File, or if this is not the root document. For those cases,
  579. * you must use {@link #write(OutputStream)} or {@link #write(File)} to
  580. * write to a brand new document.
  581. *
  582. * @throws IOException thrown on errors writing to the file
  583. * @throws IllegalStateException if this isn't from a writable File
  584. * @since POI 3.15 beta 3
  585. */
  586. @Override
  587. public void write() throws IOException {
  588. validateInPlaceWritePossible();
  589. // Write the PowerPoint streams to the current FileSystem
  590. // No need to do anything to other streams, already there!
  591. write(getDirectory().getFileSystem(), false);
  592. // Sync with the File on disk
  593. getDirectory().getFileSystem().writeFilesystem();
  594. }
  595. /**
  596. * Writes out the slideshow file the is represented by an instance
  597. * of this class.
  598. * <p>This will write out only the common OLE2 streams. If you require all
  599. * streams to be written out, use {@link #write(File, boolean)}
  600. * with <code>preserveNodes</code> set to <code>true</code>.
  601. *
  602. * @param newFile The File to write to.
  603. * @throws IOException If there is an unexpected IOException from writing to the File
  604. */
  605. @Override
  606. public void write(File newFile) throws IOException {
  607. // Write out, but only the common streams
  608. write(newFile, false);
  609. }
  610. /**
  611. * Writes out the slideshow file the is represented by an instance
  612. * of this class.
  613. * If you require all streams to be written out (eg Marcos, embeded
  614. * documents), then set <code>preserveNodes</code> set to <code>true</code>
  615. *
  616. * @param newFile The File to write to.
  617. * @param preserveNodes Should all OLE2 streams be written back out, or only the common ones?
  618. * @throws IOException If there is an unexpected IOException from writing to the File
  619. */
  620. public void write(File newFile, boolean preserveNodes) throws IOException {
  621. // Get a new FileSystem to write into
  622. try (POIFSFileSystem outFS = POIFSFileSystem.create(newFile)) {
  623. // Write into the new FileSystem
  624. write(outFS, preserveNodes);
  625. // Send the POIFSFileSystem object out to the underlying stream
  626. outFS.writeFilesystem();
  627. }
  628. }
  629. /**
  630. * Writes out the slideshow file the is represented by an instance
  631. * of this class.
  632. * <p>This will write out only the common OLE2 streams. If you require all
  633. * streams to be written out, use {@link #write(OutputStream, boolean)}
  634. * with <code>preserveNodes</code> set to <code>true</code>.
  635. *
  636. * @param out The OutputStream to write to.
  637. * @throws IOException If there is an unexpected IOException from
  638. * the passed in OutputStream
  639. */
  640. @Override
  641. public void write(OutputStream out) throws IOException {
  642. // Write out, but only the common streams
  643. write(out, false);
  644. }
  645. /**
  646. * Writes out the slideshow file the is represented by an instance
  647. * of this class.
  648. * If you require all streams to be written out (eg Marcos, embeded
  649. * documents), then set <code>preserveNodes</code> set to <code>true</code>
  650. *
  651. * @param out The OutputStream to write to.
  652. * @param preserveNodes Should all OLE2 streams be written back out, or only the common ones?
  653. * @throws IOException If there is an unexpected IOException from
  654. * the passed in OutputStream
  655. */
  656. public void write(OutputStream out, boolean preserveNodes) throws IOException {
  657. // Get a new FileSystem to write into
  658. try (POIFSFileSystem outFS = new POIFSFileSystem()) {
  659. // Write into the new FileSystem
  660. write(outFS, preserveNodes);
  661. // Send the POIFSFileSystem object out to the underlying stream
  662. outFS.writeFilesystem(out);
  663. }
  664. }
  665. private void write(POIFSFileSystem outFS, boolean copyAllOtherNodes) throws IOException {
  666. // read properties and pictures, with old encryption settings where appropriate
  667. if (_pictures == null) {
  668. readPictures();
  669. }
  670. getDocumentSummaryInformation();
  671. // The list of entries we've written out
  672. final List<String> writtenEntries = new ArrayList<>(1);
  673. // set new encryption settings
  674. try (HSLFSlideShowEncrypted encryptedSS = new HSLFSlideShowEncrypted(getDocumentEncryptionAtom())) {
  675. _records = encryptedSS.updateEncryptionRecord(_records);
  676. // Write out the Property Streams
  677. writeProperties(outFS, writtenEntries);
  678. BufAccessBAOS baos = new BufAccessBAOS();
  679. // For position dependent records, hold where they were and now are
  680. // As we go along, update, and hand over, to any Position Dependent
  681. // records we happen across
  682. updateAndWriteDependantRecords(baos, null);
  683. // Update our cached copy of the bytes that make up the PPT stream
  684. _docstream = baos.toByteArray();
  685. baos.close();
  686. // Write the PPT stream into the POIFS layer
  687. ByteArrayInputStream bais = new ByteArrayInputStream(_docstream);
  688. outFS.createOrUpdateDocument(bais, POWERPOINT_DOCUMENT);
  689. writtenEntries.add(POWERPOINT_DOCUMENT);
  690. currentUser.setEncrypted(encryptedSS.getDocumentEncryptionAtom() != null);
  691. currentUser.writeToFS(outFS);
  692. writtenEntries.add("Current User");
  693. if (_pictures.size() > 0) {
  694. BufAccessBAOS pict = new BufAccessBAOS();
  695. for (HSLFPictureData p : _pictures) {
  696. int offset = pict.size();
  697. p.write(pict);
  698. encryptedSS.encryptPicture(pict.getBuf(), offset);
  699. }
  700. outFS.createOrUpdateDocument(
  701. new ByteArrayInputStream(pict.getBuf(), 0, pict.size()), "Pictures"
  702. );
  703. writtenEntries.add("Pictures");
  704. pict.close();
  705. }
  706. }
  707. // If requested, copy over any other streams we spot, eg Macros
  708. if (copyAllOtherNodes) {
  709. EntryUtils.copyNodes(getDirectory().getFileSystem(), outFS, writtenEntries);
  710. }
  711. }
  712. @Override
  713. public EncryptionInfo getEncryptionInfo() {
  714. DocumentEncryptionAtom dea = getDocumentEncryptionAtom();
  715. return (dea != null) ? dea.getEncryptionInfo() : null;
  716. }
  717. /* ******************* adding methods follow ********************* */
  718. /**
  719. * Adds a new root level record, at the end, but before the last
  720. * PersistPtrIncrementalBlock.
  721. */
  722. @SuppressWarnings({"UnusedReturnValue", "WeakerAccess"})
  723. public synchronized int appendRootLevelRecord(Record newRecord) {
  724. int addedAt = -1;
  725. Record[] r = new Record[_records.length + 1];
  726. boolean added = false;
  727. for (int i = (_records.length - 1); i >= 0; i--) {
  728. if (added) {
  729. // Just copy over
  730. r[i] = _records[i];
  731. } else {
  732. r[(i + 1)] = _records[i];
  733. if (_records[i] instanceof PersistPtrHolder) {
  734. r[i] = newRecord;
  735. added = true;
  736. addedAt = i;
  737. }
  738. }
  739. }
  740. _records = r;
  741. return addedAt;
  742. }
  743. /**
  744. * Add a new picture to this presentation.
  745. *
  746. * @return offset of this picture in the Pictures stream
  747. */
  748. public int addPicture(HSLFPictureData img) {
  749. // Process any existing pictures if we haven't yet
  750. if (_pictures == null) {
  751. try {
  752. readPictures();
  753. } catch (IOException e) {
  754. throw new CorruptPowerPointFileException(e.getMessage());
  755. }
  756. }
  757. // Add the new picture in
  758. int offset = 0;
  759. if (_pictures.size() > 0) {
  760. HSLFPictureData prev = _pictures.get(_pictures.size() - 1);
  761. offset = prev.getOffset() + prev.getBseSize();
  762. }
  763. img.setIndex(_pictures.size() + 1); // index is 1-based
  764. _pictures.add(img);
  765. return offset;
  766. }
  767. /* ******************* fetching methods follow ********************* */
  768. /**
  769. * Returns an array of all the records found in the slideshow
  770. */
  771. public Record[] getRecords() {
  772. return _records;
  773. }
  774. /**
  775. * Returns an array of the bytes of the file. Only correct after a
  776. * call to open or write - at all other times might be wrong!
  777. */
  778. public byte[] getUnderlyingBytes() {
  779. return _docstream;
  780. }
  781. /**
  782. * Fetch the Current User Atom of the document
  783. */
  784. public CurrentUserAtom getCurrentUserAtom() {
  785. return currentUser;
  786. }
  787. /**
  788. * Return list of pictures contained in this presentation
  789. *
  790. * @return list with the read pictures or an empty list if the
  791. * presentation doesn't contain pictures.
  792. */
  793. public List<HSLFPictureData> getPictureData() {
  794. if (_pictures == null) {
  795. try {
  796. readPictures();
  797. } catch (IOException e) {
  798. throw new CorruptPowerPointFileException(e.getMessage());
  799. }
  800. }
  801. return Collections.unmodifiableList(_pictures);
  802. }
  803. /**
  804. * Gets embedded object data from the slide show.
  805. *
  806. * @return the embedded objects.
  807. */
  808. public HSLFObjectData[] getEmbeddedObjects() {
  809. if (_objects == null) {
  810. List<HSLFObjectData> objects = new ArrayList<>();
  811. for (Record r : _records) {
  812. if (r instanceof ExOleObjStg) {
  813. objects.add(new HSLFObjectData((ExOleObjStg) r));
  814. }
  815. }
  816. _objects = objects.toArray(new HSLFObjectData[0]);
  817. }
  818. return _objects;
  819. }
  820. private EscherContainerRecord getBlipStore() {
  821. Document documentRecord = null;
  822. for (Record record : _records) {
  823. if (record.getRecordType() == RecordTypes.Document.typeID) {
  824. documentRecord = (Document) record;
  825. break;
  826. }
  827. }
  828. if (documentRecord == null) {
  829. throw new CorruptPowerPointFileException("Document record is missing");
  830. }
  831. EscherContainerRecord blipStore;
  832. EscherContainerRecord dggContainer = documentRecord.getPPDrawingGroup().getDggContainer();
  833. blipStore = HSLFShape.getEscherChild(dggContainer, EscherContainerRecord.BSTORE_CONTAINER);
  834. if (blipStore == null) {
  835. blipStore = new EscherContainerRecord();
  836. blipStore.setRecordId(EscherContainerRecord.BSTORE_CONTAINER);
  837. dggContainer.addChildBefore(blipStore, EscherOptRecord.RECORD_ID);
  838. }
  839. return blipStore;
  840. }
  841. @Override
  842. public void close() throws IOException {
  843. // only close the filesystem, if we are based on the root node.
  844. // embedded documents/slideshows shouldn't close the parent container
  845. if (getDirectory().getParent() == null ||
  846. PP97_DOCUMENT.equals(getDirectory().getName())) {
  847. POIFSFileSystem fs = getDirectory().getFileSystem();
  848. if (fs != null) {
  849. fs.close();
  850. }
  851. }
  852. }
  853. @Override
  854. protected String getEncryptedPropertyStreamName() {
  855. return "EncryptedSummary";
  856. }
  857. void writePropertiesImpl() throws IOException {
  858. super.writeProperties();
  859. }
  860. PropertySet getPropertySetImpl(String setName) throws IOException {
  861. return super.getPropertySet(setName);
  862. }
  863. PropertySet getPropertySetImpl(String setName, EncryptionInfo encryptionInfo) throws IOException {
  864. return super.getPropertySet(setName, encryptionInfo);
  865. }
  866. void writePropertiesImpl(POIFSFileSystem outFS, List<String> writtenEntries) throws IOException {
  867. super.writeProperties(outFS, writtenEntries);
  868. }
  869. void validateInPlaceWritePossibleImpl() throws IllegalStateException {
  870. super.validateInPlaceWritePossible();
  871. }
  872. void clearDirectoryImpl() {
  873. super.clearDirectory();
  874. }
  875. boolean initDirectoryImpl() {
  876. return super.initDirectory();
  877. }
  878. void replaceDirectoryImpl(DirectoryNode newDirectory) throws IOException {
  879. super.replaceDirectory(newDirectory);
  880. }
  881. private static class BufAccessBAOS extends ByteArrayOutputStream {
  882. public byte[] getBuf() {
  883. return buf;
  884. }
  885. }
  886. private static class CountingOS extends OutputStream {
  887. int count;
  888. @Override
  889. public void write(int b) throws IOException {
  890. count++;
  891. }
  892. @Override
  893. public void write(byte[] b) throws IOException {
  894. count += b.length;
  895. }
  896. @Override
  897. public void write(byte[] b, int off, int len) throws IOException {
  898. count += len;
  899. }
  900. public int size() {
  901. return count;
  902. }
  903. }
  904. /**
  905. * Assists in creating {@link HSLFPictureData} when parsing a slideshow.
  906. *
  907. * This class is relied upon heavily by {@link #matchPicturesAndRecords(List, EscherContainerRecord)}.
  908. */
  909. static final class PictureFactory {
  910. final byte[] imageData;
  911. private final EscherContainerRecord recordContainer;
  912. private final PictureData.PictureType type;
  913. private final int offset;
  914. private final int signature;
  915. private EscherBSERecord record;
  916. PictureFactory(
  917. EscherContainerRecord recordContainer,
  918. PictureData.PictureType type,
  919. byte[] imageData,
  920. int offset,
  921. int signature
  922. ) {
  923. this.recordContainer = Objects.requireNonNull(recordContainer);
  924. this.type = Objects.requireNonNull(type);
  925. this.imageData = Objects.requireNonNull(imageData);
  926. this.offset = offset;
  927. this.signature = signature;
  928. }
  929. int getOffset() {
  930. return offset;
  931. }
  932. /**
  933. * Constructs a new {@link HSLFPictureData}.
  934. * <p>
  935. * The {@link EscherBSERecord} must have been set via {@link #setRecord(EscherBSERecord)} prior to invocation.
  936. */
  937. HSLFPictureData build() {
  938. Objects.requireNonNull(record, "Can't build an instance until the record has been assigned.");
  939. return HSLFPictureData.createFromSlideshowData(type, recordContainer, record, imageData, signature);
  940. }
  941. /**
  942. * Sets the {@link EscherBSERecord} with which this factory should create a {@link HSLFPictureData}.
  943. */
  944. PictureFactory setRecord(EscherBSERecord bse) {
  945. record = bse;
  946. return this;
  947. }
  948. }
  949. }