You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

HSLFSlideShowImpl.java 47KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157
  1. /* ====================================================================
  2. Licensed to the Apache Software Foundation (ASF) under one or more
  3. contributor license agreements. See the NOTICE file distributed with
  4. this work for additional information regarding copyright ownership.
  5. The ASF licenses this file to You under the Apache License, Version 2.0
  6. (the "License"); you may not use this file except in compliance with
  7. the License. You may obtain a copy of the License at
  8. http://www.apache.org/licenses/LICENSE-2.0
  9. Unless required by applicable law or agreed to in writing, software
  10. distributed under the License is distributed on an "AS IS" BASIS,
  11. WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. See the License for the specific language governing permissions and
  13. limitations under the License.
  14. ==================================================================== */
  15. package org.apache.poi.hslf.usermodel;
  16. import static org.apache.logging.log4j.util.Unbox.box;
  17. import static org.apache.poi.hslf.usermodel.HSLFSlideShow.POWERPOINT_DOCUMENT;
  18. import static org.apache.poi.hslf.usermodel.HSLFSlideShow.PP95_DOCUMENT;
  19. import static org.apache.poi.hslf.usermodel.HSLFSlideShow.PP97_DOCUMENT;
  20. import java.io.ByteArrayInputStream;
  21. import java.io.Closeable;
  22. import java.io.File;
  23. import java.io.IOException;
  24. import java.io.InputStream;
  25. import java.io.OutputStream;
  26. import java.io.SequenceInputStream;
  27. import java.util.ArrayList;
  28. import java.util.Arrays;
  29. import java.util.Collection;
  30. import java.util.Collections;
  31. import java.util.Comparator;
  32. import java.util.Enumeration;
  33. import java.util.HashMap;
  34. import java.util.Iterator;
  35. import java.util.LinkedList;
  36. import java.util.List;
  37. import java.util.Map;
  38. import java.util.NavigableMap;
  39. import java.util.Objects;
  40. import java.util.TreeMap;
  41. import java.util.stream.Collectors;
  42. import org.apache.commons.collections4.IteratorUtils;
  43. import org.apache.commons.io.output.UnsynchronizedByteArrayOutputStream;
  44. import org.apache.logging.log4j.LogManager;
  45. import org.apache.logging.log4j.Logger;
  46. import org.apache.poi.POIDocument;
  47. import org.apache.poi.ddf.EscherBSERecord;
  48. import org.apache.poi.ddf.EscherContainerRecord;
  49. import org.apache.poi.ddf.EscherOptRecord;
  50. import org.apache.poi.ddf.EscherRecord;
  51. import org.apache.poi.hpsf.PropertySet;
  52. import org.apache.poi.hslf.exceptions.CorruptPowerPointFileException;
  53. import org.apache.poi.hslf.exceptions.HSLFException;
  54. import org.apache.poi.hslf.exceptions.OldPowerPointFormatException;
  55. import org.apache.poi.hslf.record.CurrentUserAtom;
  56. import org.apache.poi.hslf.record.Document;
  57. import org.apache.poi.hslf.record.DocumentEncryptionAtom;
  58. import org.apache.poi.hslf.record.ExOleObjStg;
  59. import org.apache.poi.hslf.record.PersistPtrHolder;
  60. import org.apache.poi.hslf.record.PersistRecord;
  61. import org.apache.poi.hslf.record.PositionDependentRecord;
  62. import org.apache.poi.hslf.record.Record;
  63. import org.apache.poi.hslf.record.RecordTypes;
  64. import org.apache.poi.hslf.record.UserEditAtom;
  65. import org.apache.poi.poifs.crypt.EncryptionInfo;
  66. import org.apache.poi.poifs.filesystem.DirectoryNode;
  67. import org.apache.poi.poifs.filesystem.DocumentEntry;
  68. import org.apache.poi.poifs.filesystem.DocumentInputStream;
  69. import org.apache.poi.poifs.filesystem.Entry;
  70. import org.apache.poi.poifs.filesystem.EntryUtils;
  71. import org.apache.poi.poifs.filesystem.POIFSFileSystem;
  72. import org.apache.poi.sl.usermodel.PictureData;
  73. import org.apache.poi.sl.usermodel.PictureData.PictureType;
  74. import org.apache.poi.util.IOUtils;
  75. import org.apache.poi.util.LittleEndian;
  76. import org.apache.poi.util.LittleEndianConsts;
  77. /**
  78. * This class contains the main functionality for the Powerpoint file
  79. * "reader". It is only a very basic class for now
  80. */
  81. public final class HSLFSlideShowImpl extends POIDocument implements Closeable {
  82. private static final Logger LOG = LogManager.getLogger(HSLFSlideShowImpl.class);
  83. static final int UNSET_OFFSET = -1;
  84. //arbitrarily selected; may need to increase
  85. private static final int DEFAULT_MAX_RECORD_LENGTH = 200_000_000;
  86. private static final int MAX_DOCUMENT_SIZE = 100_000_000;
  87. private static int MAX_RECORD_LENGTH = DEFAULT_MAX_RECORD_LENGTH;
  88. private static final int MAX_IMAGE_LENGTH = 10_000_000;
  89. // Holds metadata on where things are in our document
  90. private CurrentUserAtom currentUser;
  91. // Low level contents of the file
  92. private byte[] _docstream;
  93. // Low level contents
  94. private Record[] _records;
  95. // Raw Pictures contained in the pictures stream
  96. private List<HSLFPictureData> _pictures;
  97. // Embedded objects stored in storage records in the document stream, lazily populated.
  98. private HSLFObjectData[] _objects;
  99. /**
  100. * @param length the max record length allowed for HSLFSlideShowImpl
  101. */
  102. public static void setMaxRecordLength(int length) {
  103. MAX_RECORD_LENGTH = length;
  104. }
  105. /**
  106. * @return the max record length allowed for HSLFSlideShowImpl
  107. */
  108. public static int getMaxRecordLength() {
  109. return MAX_RECORD_LENGTH;
  110. }
  111. /**
  112. * Constructs a Powerpoint document from fileName. Parses the document
  113. * and places all the important stuff into data structures.
  114. *
  115. * @param fileName The name of the file to read.
  116. * @throws IOException if there is a problem while parsing the document.
  117. */
  118. @SuppressWarnings("resource")
  119. public HSLFSlideShowImpl(String fileName) throws IOException {
  120. this(new POIFSFileSystem(new File(fileName)));
  121. }
  122. /**
  123. * Constructs a Powerpoint document from an input stream. Parses the
  124. * document and places all the important stuff into data structures.
  125. *
  126. * @param inputStream the source of the data
  127. * @throws IOException if there is a problem while parsing the document.
  128. */
  129. @SuppressWarnings("resource")
  130. public HSLFSlideShowImpl(InputStream inputStream) throws IOException {
  131. //do Ole stuff
  132. this(new POIFSFileSystem(inputStream));
  133. }
  134. /**
  135. * Constructs a Powerpoint document from a POIFS Filesystem. Parses the
  136. * document and places all the important stuff into data structures.
  137. *
  138. * @param filesystem the POIFS FileSystem to read from
  139. * @throws IOException if there is a problem while parsing the document.
  140. */
  141. public HSLFSlideShowImpl(POIFSFileSystem filesystem) throws IOException {
  142. this(filesystem.getRoot());
  143. }
  144. /**
  145. * Constructs a Powerpoint document from a specific point in a
  146. * POIFS Filesystem. Parses the document and places all the
  147. * important stuff into data structures.
  148. *
  149. * @param dir the POIFS directory to read from
  150. * @throws IOException if there is a problem while parsing the document.
  151. */
  152. public HSLFSlideShowImpl(DirectoryNode dir) throws IOException {
  153. super(handleDualStorage(dir));
  154. try {
  155. // First up, grab the "Current User" stream
  156. // We need this before we can detect Encrypted Documents
  157. readCurrentUserStream();
  158. // Next up, grab the data that makes up the
  159. // PowerPoint stream
  160. readPowerPointStream();
  161. // Now, build records based on the PowerPoint stream
  162. buildRecords();
  163. // Look for any other streams
  164. readOtherStreams();
  165. } catch (RuntimeException | IOException e) {
  166. // clean up the filesystem when we cannot read it here to avoid
  167. // leaking file handles
  168. dir.getFileSystem().close();
  169. throw e;
  170. }
  171. }
  172. private static DirectoryNode handleDualStorage(DirectoryNode dir) throws IOException {
  173. // when there's a dual storage entry, use it, as the outer document can't be read quite probably ...
  174. if (!dir.hasEntry(PP97_DOCUMENT)) {
  175. return dir;
  176. }
  177. return (DirectoryNode) dir.getEntry(PP97_DOCUMENT);
  178. }
  179. /**
  180. * Constructs a new, empty, Powerpoint document.
  181. */
  182. public static HSLFSlideShowImpl create() {
  183. try (InputStream is = HSLFSlideShowImpl.class.getResourceAsStream("/org/apache/poi/hslf/data/empty.ppt")) {
  184. if (is == null) {
  185. throw new HSLFException("Missing resource 'empty.ppt'");
  186. }
  187. return new HSLFSlideShowImpl(is);
  188. } catch (IOException e) {
  189. throw new HSLFException(e);
  190. }
  191. }
  192. /**
  193. * Extracts the main PowerPoint document stream from the
  194. * POI file, ready to be passed
  195. *
  196. * @throws IOException when the powerpoint can't be read
  197. */
  198. private void readPowerPointStream() throws IOException {
  199. final DirectoryNode dir = getDirectory();
  200. if (!dir.hasEntry(POWERPOINT_DOCUMENT) && dir.hasEntry(PP95_DOCUMENT)) {
  201. throw new OldPowerPointFormatException("You seem to have supplied a PowerPoint95 file, which isn't supported");
  202. }
  203. // Get the main document stream
  204. final Entry entry = dir.getEntry(POWERPOINT_DOCUMENT);
  205. if (!(entry instanceof DocumentEntry)) {
  206. throw new IllegalArgumentException("Had unexpected type of entry for name: " + POWERPOINT_DOCUMENT + ": " + entry.getClass());
  207. }
  208. DocumentEntry docProps = (DocumentEntry) entry;
  209. // Grab the document stream
  210. int len = docProps.getSize();
  211. try (InputStream is = dir.createDocumentInputStream(docProps)) {
  212. _docstream = IOUtils.toByteArray(is, len, MAX_DOCUMENT_SIZE);
  213. }
  214. }
  215. /**
  216. * Builds the list of records, based on the contents
  217. * of the PowerPoint stream
  218. */
  219. private void buildRecords() throws IOException {
  220. // The format of records in a powerpoint file are:
  221. // <little endian 2 byte "info">
  222. // <little endian 2 byte "type">
  223. // <little endian 4 byte "length">
  224. // If it has a zero length, following it will be another record
  225. // <xx xx yy yy 00 00 00 00> <xx xx yy yy zz zz zz zz>
  226. // If it has a length, depending on its type it may have children or data
  227. // If it has children, these will follow straight away
  228. // <xx xx yy yy zz zz zz zz <xx xx yy yy zz zz zz zz>>
  229. // If it has data, this will come straigh after, and run for the length
  230. // <xx xx yy yy zz zz zz zz dd dd dd dd dd dd dd>
  231. // All lengths given exclude the 8 byte record header
  232. // (Data records are known as Atoms)
  233. // Document should start with:
  234. // 0F 00 E8 03 ## ## ## ##
  235. // (type 1000 = document, info 00 0f is normal, rest is document length)
  236. // 01 00 E9 03 28 00 00 00
  237. // (type 1001 = document atom, info 00 01 normal, 28 bytes long)
  238. // 80 16 00 00 E0 10 00 00 xx xx xx xx xx xx xx xx
  239. // 05 00 00 00 0A 00 00 00 xx xx xx
  240. // (the contents of the document atom, not sure what it means yet)
  241. // (records then follow)
  242. // When parsing a document, look to see if you know about that type
  243. // of the current record. If you know it's a type that has children,
  244. // process the record's data area looking for more records
  245. // If you know about the type and it doesn't have children, either do
  246. // something with the data (eg TextRun) or skip over it
  247. // If you don't know about the type, play safe and skip over it (using
  248. // its length to know where the next record will start)
  249. //
  250. _records = read(_docstream, (int) currentUser.getCurrentEditOffset());
  251. }
  252. private Record[] read(byte[] docstream, int usrOffset) throws IOException {
  253. //sort found records by offset.
  254. //(it is not necessary but SlideShow.findMostRecentCoreRecords() expects them sorted)
  255. NavigableMap<Integer, Record> records = new TreeMap<>(); // offset -> record
  256. Map<Integer, Integer> persistIds = new HashMap<>(); // offset -> persistId
  257. initRecordOffsets(docstream, usrOffset, records, persistIds);
  258. HSLFSlideShowEncrypted decryptData = new HSLFSlideShowEncrypted(docstream, records);
  259. for (Map.Entry<Integer, Record> entry : records.entrySet()) {
  260. Integer offset = entry.getKey();
  261. Record record = entry.getValue();
  262. Integer persistId = persistIds.get(offset);
  263. if (record == null) {
  264. // all plain records have been already added,
  265. // only new records need to be decrypted (tbd #35897)
  266. decryptData.decryptRecord(docstream, persistId, offset);
  267. record = Record.buildRecordAtOffset(docstream, offset);
  268. entry.setValue(record);
  269. }
  270. if (record instanceof PersistRecord) {
  271. ((PersistRecord) record).setPersistId(persistId);
  272. }
  273. }
  274. decryptData.close();
  275. return records.values().toArray(new Record[0]);
  276. }
  277. private void initRecordOffsets(byte[] docstream, int usrOffset, NavigableMap<Integer, Record> recordMap, Map<Integer, Integer> offset2id) {
  278. while (usrOffset != 0) {
  279. Record builtRecord = Record.buildRecordAtOffset(docstream, usrOffset);
  280. if (!(builtRecord instanceof UserEditAtom)) {
  281. throw new CorruptPowerPointFileException("Did not have a user edit atom: " + builtRecord);
  282. }
  283. UserEditAtom usr = (UserEditAtom) builtRecord;
  284. recordMap.put(usrOffset, usr);
  285. int psrOffset = usr.getPersistPointersOffset();
  286. Record record = Record.buildRecordAtOffset(docstream, psrOffset);
  287. if (record == null) {
  288. throw new CorruptPowerPointFileException("Powerpoint document is missing a PersistPtrHolder at " + psrOffset);
  289. }
  290. if (!(record instanceof PersistPtrHolder)) {
  291. throw new CorruptPowerPointFileException("Record is not a PersistPtrHolder: " + record + " at " + psrOffset);
  292. }
  293. PersistPtrHolder ptr = (PersistPtrHolder) record;
  294. recordMap.put(psrOffset, ptr);
  295. for (Map.Entry<Integer, Integer> entry : ptr.getSlideLocationsLookup().entrySet()) {
  296. Integer offset = entry.getValue();
  297. Integer id = entry.getKey();
  298. recordMap.put(offset, null); // reserve a slot for the record
  299. offset2id.put(offset, id);
  300. }
  301. usrOffset = usr.getLastUserEditAtomOffset();
  302. // check for corrupted user edit atom and try to repair it
  303. // if the next user edit atom offset is already known, we would go into an endless loop
  304. if (usrOffset > 0 && recordMap.containsKey(usrOffset)) {
  305. // a user edit atom is usually located 36 byte before the smallest known record offset
  306. usrOffset = recordMap.firstKey() - 36;
  307. // check that we really are located on a user edit atom
  308. int ver_inst = LittleEndian.getUShort(docstream, usrOffset);
  309. int type = LittleEndian.getUShort(docstream, usrOffset + 2);
  310. int len = LittleEndian.getInt(docstream, usrOffset + 4);
  311. if (ver_inst == 0 && type == 4085 && (len == 0x1C || len == 0x20)) {
  312. LOG.atWarn().log("Repairing invalid user edit atom");
  313. usr.setLastUserEditAtomOffset(usrOffset);
  314. } else {
  315. throw new CorruptPowerPointFileException("Powerpoint document contains invalid user edit atom");
  316. }
  317. }
  318. }
  319. }
  320. public DocumentEncryptionAtom getDocumentEncryptionAtom() {
  321. for (Record r : _records) {
  322. if (r instanceof DocumentEncryptionAtom) {
  323. return (DocumentEncryptionAtom) r;
  324. }
  325. }
  326. return null;
  327. }
  328. /**
  329. * Find the "Current User" stream, and load it
  330. */
  331. private void readCurrentUserStream() {
  332. try {
  333. currentUser = new CurrentUserAtom(getDirectory());
  334. } catch (IOException ie) {
  335. LOG.atError().withThrowable(ie).log("Error finding Current User Atom");
  336. currentUser = new CurrentUserAtom();
  337. }
  338. }
  339. /**
  340. * Find any other streams from the filesystem, and load them
  341. */
  342. private void readOtherStreams() {
  343. // Currently, there aren't any
  344. }
  345. /**
  346. * Find and read in pictures contained in this presentation.
  347. * This is lazily called as and when we want to touch pictures.
  348. */
  349. private void readPictures() throws IOException {
  350. // if the presentation doesn't contain pictures, will use an empty collection instead
  351. if (!getDirectory().hasEntry("Pictures")) {
  352. _pictures = new ArrayList<>();
  353. return;
  354. }
  355. final Entry en = getDirectory().getEntry("Pictures");
  356. if (!(en instanceof DocumentEntry)) {
  357. throw new IllegalArgumentException("Had unexpected type of entry for name: Pictures: " + en.getClass());
  358. }
  359. DocumentEntry entry = (DocumentEntry) en;
  360. EscherContainerRecord blipStore = getBlipStore();
  361. byte[] pictstream;
  362. try (DocumentInputStream is = getDirectory().createDocumentInputStream(entry)) {
  363. pictstream = IOUtils.toByteArray(is, entry.getSize(), MAX_IMAGE_LENGTH);
  364. }
  365. List<PictureFactory> factories = new ArrayList<>();
  366. try (HSLFSlideShowEncrypted decryptData = new HSLFSlideShowEncrypted(getDocumentEncryptionAtom())) {
  367. int pos = 0;
  368. // An empty picture record (length 0) will take up 8 bytes
  369. while (pos <= (pictstream.length - HSLFPictureData.PREAMBLE_SIZE)) {
  370. int offset = pos;
  371. decryptData.decryptPicture(pictstream, offset);
  372. // Image signature
  373. int signature = LittleEndian.getUShort(pictstream, pos);
  374. pos += LittleEndianConsts.SHORT_SIZE;
  375. // Image type + 0xF018
  376. int type = LittleEndian.getUShort(pictstream, pos);
  377. pos += LittleEndianConsts.SHORT_SIZE;
  378. // Image size (excluding the 8 byte header)
  379. int imgsize = LittleEndian.getInt(pictstream, pos);
  380. pos += LittleEndianConsts.INT_SIZE;
  381. // When parsing the BStoreDelay stream, [MS-ODRAW] says that we
  382. // should terminate if the type isn't 0xf007 or 0xf018->0xf117
  383. if (!((type == 0xf007) || (type >= 0xf018 && type <= 0xf117))) {
  384. break;
  385. }
  386. // The image size must be 0 or greater
  387. // (0 is allowed, but odd, since we do wind on by the header each
  388. // time, so we won't get stuck)
  389. if (imgsize < 0) {
  390. throw new CorruptPowerPointFileException("The file contains a picture, at position " + factories.size() + ", which has a negatively sized data length, so we can't trust any of the picture data");
  391. }
  392. // If the type (including the bonus 0xF018) is 0, skip it
  393. PictureType pt = PictureType.forNativeID(type - 0xF018);
  394. if (pt == null) {
  395. LOG.atError().log("Problem reading picture: Invalid image type 0, on picture with length {}.\nYour document will probably become corrupted if you save it! Position: {}", box(imgsize),box(pos));
  396. } else {
  397. //The pictstream can be truncated halfway through a picture.
  398. //This is not a problem if the pictstream contains extra pictures
  399. //that are not used in any slide -- BUG-60305
  400. if (pos + imgsize > pictstream.length) {
  401. LOG.atWarn().log("\"Pictures\" stream may have ended early. In some circumstances, this is not a problem; " +
  402. "in others, this could indicate a corrupt file");
  403. break;
  404. }
  405. // Copy the data, ready to pass to PictureData
  406. byte[] imgdata = IOUtils.safelyClone(pictstream, pos, imgsize, MAX_RECORD_LENGTH);
  407. factories.add(new PictureFactory(blipStore, pt, imgdata, offset, signature));
  408. }
  409. pos += imgsize;
  410. }
  411. }
  412. matchPicturesAndRecords(factories, blipStore);
  413. List<HSLFPictureData> pictures = new ArrayList<>();
  414. for (PictureFactory it : factories) {
  415. try {
  416. HSLFPictureData pict = it.build();
  417. pict.setIndex(pictures.size() + 1); // index is 1-based
  418. pictures.add(pict);
  419. } catch (IllegalArgumentException e) {
  420. LOG.atError().withThrowable(e).log("Problem reading picture. Your document will probably become corrupted if you save it!");
  421. }
  422. }
  423. _pictures = pictures;
  424. }
  425. /**
  426. * Matches all of the {@link PictureFactory PictureFactories} for a slideshow with {@link EscherBSERecord}s in the
  427. * Blip Store for the slideshow.
  428. * <p>
  429. * When reading a slideshow into memory, we have to match the records in the Blip Store with the factories
  430. * representing picture in the pictures stream. This can be difficult, as presentations might have incorrectly
  431. * formatted data. This function attempts to perform matching using multiple heuristics to increase the likelihood
  432. * of finding all pairs, while aiming to reduce the likelihood of associating incorrect pairs.
  433. *
  434. * @param factories Factories for creating {@link HSLFPictureData} out of the pictures stream.
  435. * @param blipStore Blip Store of the presentation being loaded.
  436. */
  437. private static void matchPicturesAndRecords(List<PictureFactory> factories, EscherContainerRecord blipStore) {
  438. // LinkedList because we're sorting and removing.
  439. LinkedList<PictureFactory> unmatchedFactories = new LinkedList<>(factories);
  440. unmatchedFactories.sort(Comparator.comparingInt(PictureFactory::getOffset));
  441. // Arrange records by offset. In the common case of a well-formed slideshow, where every factory has a
  442. // matching record, this is somewhat wasteful, but is necessary to handle the uncommon case where multiple
  443. // records share an offset.
  444. Map<Integer, List<EscherBSERecord>> unmatchedRecords = new HashMap<>();
  445. for (EscherRecord child : blipStore) {
  446. if (!(child instanceof EscherBSERecord)) {
  447. throw new CorruptPowerPointFileException("Did not have a EscherBSERecord: " + child);
  448. }
  449. EscherBSERecord record = (EscherBSERecord) child;
  450. unmatchedRecords.computeIfAbsent(record.getOffset(), k -> new ArrayList<>()).add(record);
  451. }
  452. // The first pass through the factories only pairs a factory with a record if we're very confident that they
  453. // are a match. Confidence comes from a perfect match on the offset, and if necessary, the UID. Matched
  454. // factories and records are removed from the unmatched collections.
  455. for (Iterator<PictureFactory> iterator = unmatchedFactories.iterator(); iterator.hasNext(); ) {
  456. PictureFactory factory = iterator.next();
  457. int physicalOffset = factory.getOffset();
  458. List<EscherBSERecord> recordsAtOffset = unmatchedRecords.get(physicalOffset);
  459. if (recordsAtOffset == null || recordsAtOffset.isEmpty()) {
  460. // There are no records that have an offset matching the physical offset in the stream. We'll do
  461. // more complicated and less reliable matching for this factory after all "well known"
  462. // image <-> record pairs have been found.
  463. LOG.atDebug().log("No records with offset {}", box(physicalOffset));
  464. } else if (recordsAtOffset.size() == 1) {
  465. // Only 1 record has the same offset as the target image. Assume these are a pair.
  466. factory.setRecord(recordsAtOffset.get(0));
  467. unmatchedRecords.remove(physicalOffset);
  468. iterator.remove();
  469. } else {
  470. // Multiple records share an offset. Perform additional matching based on UID.
  471. for (int i = 0; i < recordsAtOffset.size(); i++) {
  472. EscherBSERecord record = recordsAtOffset.get(i);
  473. byte[] recordUid = record.getUid();
  474. byte[] imageHeader = Arrays.copyOf(factory.imageData, HSLFPictureData.CHECKSUM_SIZE);
  475. if (Arrays.equals(recordUid, imageHeader)) {
  476. factory.setRecord(record);
  477. recordsAtOffset.remove(i);
  478. iterator.remove();
  479. break;
  480. }
  481. }
  482. }
  483. }
  484. // At this point, any factories remaining didn't have a record with a matching offset. The second pass
  485. // through the factories pairs based on the UID. Factories for which a record with a matching UID cannot be
  486. // found will get a new record.
  487. List<EscherBSERecord> remainingRecords = unmatchedRecords.values()
  488. .stream()
  489. .flatMap(Collection::stream)
  490. .collect(Collectors.toList());
  491. for (PictureFactory factory : unmatchedFactories) {
  492. boolean matched = false;
  493. for (int i = remainingRecords.size() - 1; i >= 0; i--) {
  494. EscherBSERecord record = remainingRecords.get(i);
  495. byte[] recordUid = record.getUid();
  496. byte[] imageHeader = Arrays.copyOf(factory.imageData, HSLFPictureData.CHECKSUM_SIZE);
  497. if (Arrays.equals(recordUid, imageHeader)) {
  498. remainingRecords.remove(i);
  499. factory.setRecord(record);
  500. record.setOffset(factory.getOffset());
  501. matched = true;
  502. }
  503. }
  504. if (!matched) {
  505. // Synthesize a new record
  506. LOG.atDebug().log("No record found for picture at offset {}", box(factory.offset));
  507. EscherBSERecord record = HSLFSlideShow.addNewEscherBseRecord(blipStore, factory.type, factory.imageData, factory.offset);
  508. factory.setRecord(record);
  509. }
  510. }
  511. LOG.atDebug().log("Found {} unmatched records.", box(remainingRecords.size()));
  512. }
  513. /**
  514. * remove duplicated UserEditAtoms and merge PersistPtrHolder, i.e.
  515. * remove document edit history
  516. */
  517. public void normalizeRecords() {
  518. try {
  519. updateAndWriteDependantRecords(null, null);
  520. } catch (IOException e) {
  521. throw new CorruptPowerPointFileException(e);
  522. }
  523. _records = HSLFSlideShowEncrypted.normalizeRecords(_records);
  524. }
  525. /**
  526. * This is a helper functions, which is needed for adding new position dependent records
  527. * or finally write the slideshow to a file.
  528. *
  529. * @param os the stream to write to, if null only the references are updated
  530. * @param interestingRecords a map of interesting records (PersistPtrHolder and UserEditAtom)
  531. * referenced by their RecordType. Only the very last of each type will be saved to the map.
  532. * May be null, if not needed.
  533. */
  534. @SuppressWarnings("WeakerAccess")
  535. public void updateAndWriteDependantRecords(OutputStream os, Map<RecordTypes, PositionDependentRecord> interestingRecords)
  536. throws IOException {
  537. // For position dependent records, hold where they were and now are
  538. // As we go along, update, and hand over, to any Position Dependent
  539. // records we happen across
  540. Map<Integer, Integer> oldToNewPositions = new HashMap<>();
  541. // First pass - figure out where all the position dependent
  542. // records are going to end up, in the new scheme
  543. // (Annoyingly, some powerpoint files have PersistPtrHolders
  544. // that reference slides after the PersistPtrHolder)
  545. UserEditAtom usr = null;
  546. PersistPtrHolder ptr = null;
  547. CountingOS cos = new CountingOS();
  548. for (Record record : _records) {
  549. // all top level records are position dependent
  550. if (!(record instanceof PositionDependentRecord)) {
  551. throw new CorruptPowerPointFileException("Record is not a position dependent record: " + record);
  552. }
  553. PositionDependentRecord pdr = (PositionDependentRecord) record;
  554. int oldPos = pdr.getLastOnDiskOffset();
  555. int newPos = cos.size();
  556. pdr.setLastOnDiskOffset(newPos);
  557. if (oldPos != UNSET_OFFSET) {
  558. // new records don't need a mapping, as they aren't in a relation yet
  559. oldToNewPositions.put(oldPos, newPos);
  560. }
  561. // Grab interesting records as they come past
  562. // this will only save the very last record of each type
  563. RecordTypes saveme = null;
  564. int recordType = (int) record.getRecordType();
  565. if (recordType == RecordTypes.PersistPtrIncrementalBlock.typeID) {
  566. saveme = RecordTypes.PersistPtrIncrementalBlock;
  567. ptr = (PersistPtrHolder) pdr;
  568. } else if (recordType == RecordTypes.UserEditAtom.typeID) {
  569. saveme = RecordTypes.UserEditAtom;
  570. usr = (UserEditAtom) pdr;
  571. }
  572. if (interestingRecords != null && saveme != null) {
  573. interestingRecords.put(saveme, pdr);
  574. }
  575. // Dummy write out, so the position winds on properly
  576. record.writeOut(cos);
  577. }
  578. cos.close();
  579. if (usr == null || ptr == null) {
  580. throw new HSLFException("UserEditAtom or PersistPtr can't be determined.");
  581. }
  582. Map<Integer, Integer> persistIds = new HashMap<>();
  583. for (Map.Entry<Integer, Integer> entry : ptr.getSlideLocationsLookup().entrySet()) {
  584. persistIds.put(oldToNewPositions.get(entry.getValue()), entry.getKey());
  585. }
  586. try (HSLFSlideShowEncrypted encData = new HSLFSlideShowEncrypted(getDocumentEncryptionAtom())) {
  587. for (Record record : _records) {
  588. // We've already figured out their new location, and
  589. // told them that
  590. // Tell them of the positions of the other records though
  591. PositionDependentRecord pdr = (PositionDependentRecord) record;
  592. Integer persistId = persistIds.get(pdr.getLastOnDiskOffset());
  593. if (persistId == null) {
  594. persistId = 0;
  595. }
  596. // For now, we're only handling PositionDependentRecord's that
  597. // happen at the top level.
  598. // In future, we'll need the handle them everywhere, but that's
  599. // a bit trickier
  600. pdr.updateOtherRecordReferences(oldToNewPositions);
  601. // Whatever happens, write out that record tree
  602. if (os != null) {
  603. record.writeOut(encData.encryptRecord(os, persistId, record));
  604. }
  605. }
  606. }
  607. // Update and write out the Current User atom
  608. int oldLastUserEditAtomPos = (int) currentUser.getCurrentEditOffset();
  609. Integer newLastUserEditAtomPos = oldToNewPositions.get(oldLastUserEditAtomPos);
  610. if (newLastUserEditAtomPos == null || usr.getLastOnDiskOffset() != newLastUserEditAtomPos) {
  611. throw new HSLFException("Couldn't find the new location of the last UserEditAtom that used to be at " + oldLastUserEditAtomPos);
  612. }
  613. currentUser.setCurrentEditOffset(usr.getLastOnDiskOffset());
  614. }
  615. /**
  616. * Writes out the slideshow to the currently open file.
  617. * <p>
  618. * This will fail (with an {@link IllegalStateException} if the
  619. * slideshow was opened read-only, opened from an {@link InputStream}
  620. * instead of a File, or if this is not the root document. For those cases,
  621. * you must use {@link #write(OutputStream)} or {@link #write(File)} to
  622. * write to a brand new document.
  623. *
  624. * @throws IOException thrown on errors writing to the file
  625. * @throws IllegalStateException if this isn't from a writable File
  626. * @since POI 3.15 beta 3
  627. */
  628. @Override
  629. public void write() throws IOException {
  630. validateInPlaceWritePossible();
  631. // Write the PowerPoint streams to the current FileSystem
  632. // No need to do anything to other streams, already there!
  633. write(getDirectory().getFileSystem(), false);
  634. // Sync with the File on disk
  635. getDirectory().getFileSystem().writeFilesystem();
  636. }
  637. /**
  638. * Writes out the slideshow file the is represented by an instance
  639. * of this class.
  640. * <p>This will write out only the common OLE2 streams. If you require all
  641. * streams to be written out, use {@link #write(File, boolean)}
  642. * with {@code preserveNodes} set to {@code true}.
  643. *
  644. * @param newFile The File to write to.
  645. * @throws IOException If there is an unexpected IOException from writing to the File
  646. */
  647. @Override
  648. public void write(File newFile) throws IOException {
  649. // Write out, but only the common streams
  650. write(newFile, false);
  651. }
  652. /**
  653. * Writes out the slideshow file the is represented by an instance
  654. * of this class.
  655. * If you require all streams to be written out (eg Marcos, embedded
  656. * documents), then set {@code preserveNodes} set to {@code true}
  657. *
  658. * @param newFile The File to write to.
  659. * @param preserveNodes Should all OLE2 streams be written back out, or only the common ones?
  660. * @throws IOException If there is an unexpected IOException from writing to the File
  661. */
  662. public void write(File newFile, boolean preserveNodes) throws IOException {
  663. // Get a new FileSystem to write into
  664. try (POIFSFileSystem outFS = POIFSFileSystem.create(newFile)) {
  665. // Write into the new FileSystem
  666. write(outFS, preserveNodes);
  667. // Send the POIFSFileSystem object out to the underlying stream
  668. outFS.writeFilesystem();
  669. }
  670. }
  671. /**
  672. * Writes out the slideshow file the is represented by an instance
  673. * of this class.
  674. * <p>This will write out only the common OLE2 streams. If you require all
  675. * streams to be written out, use {@link #write(OutputStream, boolean)}
  676. * with {@code preserveNodes} set to {@code true}.
  677. *
  678. * @param out The OutputStream to write to.
  679. * @throws IOException If there is an unexpected IOException from
  680. * the passed in OutputStream
  681. */
  682. @Override
  683. public void write(OutputStream out) throws IOException {
  684. // Write out, but only the common streams
  685. write(out, false);
  686. }
  687. /**
  688. * Writes out the slideshow file the is represented by an instance
  689. * of this class.
  690. * If you require all streams to be written out (eg Macros, embedded
  691. * documents), then set {@code preserveNodes} set to {@code true}
  692. *
  693. * @param out The OutputStream to write to.
  694. * @param preserveNodes Should all OLE2 streams be written back out, or only the common ones?
  695. * @throws IOException If there is an unexpected IOException from
  696. * the passed in OutputStream
  697. */
  698. public void write(OutputStream out, boolean preserveNodes) throws IOException {
  699. // Get a new FileSystem to write into
  700. try (POIFSFileSystem outFS = new POIFSFileSystem()) {
  701. // Write into the new FileSystem
  702. write(outFS, preserveNodes);
  703. // Send the POIFSFileSystem object out to the underlying stream
  704. outFS.writeFilesystem(out);
  705. }
  706. }
  707. private void write(POIFSFileSystem outFS, boolean copyAllOtherNodes) throws IOException {
  708. // read properties and pictures, with old encryption settings where appropriate
  709. if (_pictures == null) {
  710. readPictures();
  711. }
  712. getDocumentSummaryInformation();
  713. // The list of entries we've written out
  714. final List<String> writtenEntries = new ArrayList<>(1);
  715. // set new encryption settings
  716. try (HSLFSlideShowEncrypted encryptedSS = new HSLFSlideShowEncrypted(getDocumentEncryptionAtom())) {
  717. _records = encryptedSS.updateEncryptionRecord(_records);
  718. // Write out the Property Streams
  719. writeProperties(outFS, writtenEntries);
  720. try (UnsynchronizedByteArrayOutputStream baos = UnsynchronizedByteArrayOutputStream.builder().get()) {
  721. // For position dependent records, hold where they were and now are
  722. // As we go along, update, and hand over, to any Position Dependent
  723. // records we happen across
  724. updateAndWriteDependantRecords(baos, null);
  725. // Update our cached copy of the bytes that make up the PPT stream
  726. _docstream = baos.toByteArray();
  727. }
  728. // Write the PPT stream into the POIFS layer
  729. ByteArrayInputStream bais = new ByteArrayInputStream(_docstream);
  730. outFS.createOrUpdateDocument(bais, POWERPOINT_DOCUMENT);
  731. writtenEntries.add(POWERPOINT_DOCUMENT);
  732. currentUser.setEncrypted(encryptedSS.getDocumentEncryptionAtom() != null);
  733. currentUser.writeToFS(outFS);
  734. writtenEntries.add("Current User");
  735. if (!_pictures.isEmpty()) {
  736. Enumeration<InputStream> pictEnum = IteratorUtils.asEnumeration(
  737. _pictures.stream().map(data -> encryptOnePicture(encryptedSS, data)).iterator()
  738. );
  739. try (SequenceInputStream sis = new SequenceInputStream(pictEnum)) {
  740. outFS.createOrUpdateDocument(sis, "Pictures");
  741. writtenEntries.add("Pictures");
  742. } catch (IllegalStateException e) {
  743. throw (IOException)e.getCause();
  744. }
  745. }
  746. }
  747. // If requested, copy over any other streams we spot, eg Macros
  748. if (copyAllOtherNodes) {
  749. EntryUtils.copyNodes(getDirectory().getFileSystem(), outFS, writtenEntries);
  750. }
  751. }
  752. private static InputStream encryptOnePicture(HSLFSlideShowEncrypted encryptedSS, HSLFPictureData data) {
  753. try (UnsynchronizedByteArrayOutputStream baos = UnsynchronizedByteArrayOutputStream.builder().get()) {
  754. data.write(baos);
  755. byte[] pictBytes = baos.toByteArray();
  756. encryptedSS.encryptPicture(pictBytes, 0);
  757. return new ByteArrayInputStream(pictBytes);
  758. } catch (IOException e) {
  759. throw new IllegalStateException(e);
  760. }
  761. }
  762. @Override
  763. public EncryptionInfo getEncryptionInfo() {
  764. DocumentEncryptionAtom dea = getDocumentEncryptionAtom();
  765. return (dea != null) ? dea.getEncryptionInfo() : null;
  766. }
  767. /* ******************* adding methods follow ********************* */
  768. /**
  769. * Adds a new root level record, at the end, but before the last
  770. * PersistPtrIncrementalBlock.
  771. */
  772. @SuppressWarnings({"UnusedReturnValue", "WeakerAccess"})
  773. public synchronized int appendRootLevelRecord(Record newRecord) {
  774. int addedAt = -1;
  775. Record[] r = new Record[_records.length + 1];
  776. boolean added = false;
  777. for (int i = (_records.length - 1); i >= 0; i--) {
  778. if (added) {
  779. // Just copy over
  780. r[i] = _records[i];
  781. } else {
  782. r[(i + 1)] = _records[i];
  783. if (_records[i] instanceof PersistPtrHolder) {
  784. r[i] = newRecord;
  785. added = true;
  786. addedAt = i;
  787. }
  788. }
  789. }
  790. _records = r;
  791. return addedAt;
  792. }
  793. /**
  794. * Add a new picture to this presentation.
  795. *
  796. * @return offset of this picture in the Pictures stream
  797. */
  798. public int addPicture(HSLFPictureData img) {
  799. // Process any existing pictures if we haven't yet
  800. if (_pictures == null) {
  801. try {
  802. readPictures();
  803. } catch (IOException e) {
  804. throw new CorruptPowerPointFileException(e.getMessage());
  805. }
  806. }
  807. // Add the new picture in
  808. int offset = 0;
  809. if (!_pictures.isEmpty()) {
  810. HSLFPictureData prev = _pictures.get(_pictures.size() - 1);
  811. offset = prev.getOffset() + prev.getBseSize();
  812. }
  813. img.setIndex(_pictures.size() + 1); // index is 1-based
  814. _pictures.add(img);
  815. return offset;
  816. }
  817. /* ******************* fetching methods follow ********************* */
  818. /**
  819. * Returns an array of all the records found in the slideshow
  820. */
  821. public Record[] getRecords() {
  822. return _records;
  823. }
  824. /**
  825. * Returns an array of the bytes of the file. Only correct after a
  826. * call to open or write - at all other times might be wrong!
  827. */
  828. public byte[] getUnderlyingBytes() {
  829. return _docstream;
  830. }
  831. /**
  832. * Fetch the Current User Atom of the document
  833. */
  834. public CurrentUserAtom getCurrentUserAtom() {
  835. return currentUser;
  836. }
  837. /**
  838. * Return list of pictures contained in this presentation
  839. *
  840. * @return list with the read pictures or an empty list if the
  841. * presentation doesn't contain pictures.
  842. */
  843. public List<HSLFPictureData> getPictureData() {
  844. if (_pictures == null) {
  845. try {
  846. readPictures();
  847. } catch (IOException e) {
  848. throw new CorruptPowerPointFileException(e.getMessage());
  849. }
  850. }
  851. return Collections.unmodifiableList(_pictures);
  852. }
  853. /**
  854. * Gets embedded object data from the slide show.
  855. *
  856. * @return the embedded objects.
  857. */
  858. public HSLFObjectData[] getEmbeddedObjects() {
  859. if (_objects == null) {
  860. List<HSLFObjectData> objects = new ArrayList<>();
  861. for (Record r : _records) {
  862. if (r instanceof ExOleObjStg) {
  863. objects.add(new HSLFObjectData((ExOleObjStg) r));
  864. }
  865. }
  866. _objects = objects.toArray(new HSLFObjectData[0]);
  867. }
  868. return _objects;
  869. }
  870. private EscherContainerRecord getBlipStore() {
  871. Document documentRecord = null;
  872. for (Record record : _records) {
  873. if (record == null) {
  874. throw new CorruptPowerPointFileException("Did not have a valid record: " + record);
  875. }
  876. if (record.getRecordType() == RecordTypes.Document.typeID) {
  877. if (!(record instanceof Document)) {
  878. throw new CorruptPowerPointFileException("Did not have a Document: " + record);
  879. }
  880. documentRecord = (Document) record;
  881. break;
  882. }
  883. }
  884. if (documentRecord == null) {
  885. throw new CorruptPowerPointFileException("Document record is missing");
  886. }
  887. if (documentRecord.getPPDrawingGroup() == null) {
  888. throw new CorruptPowerPointFileException("Drawing group is missing");
  889. }
  890. EscherContainerRecord blipStore;
  891. EscherContainerRecord dggContainer = documentRecord.getPPDrawingGroup().getDggContainer();
  892. blipStore = HSLFShape.getEscherChild(dggContainer, EscherContainerRecord.BSTORE_CONTAINER);
  893. if (blipStore == null) {
  894. blipStore = new EscherContainerRecord();
  895. blipStore.setRecordId(EscherContainerRecord.BSTORE_CONTAINER);
  896. dggContainer.addChildBefore(blipStore, EscherOptRecord.RECORD_ID);
  897. }
  898. return blipStore;
  899. }
  900. @Override
  901. public void close() throws IOException {
  902. // only close the filesystem, if we are based on the root node.
  903. // embedded documents/slideshows shouldn't close the parent container
  904. if (getDirectory().getParent() == null ||
  905. PP97_DOCUMENT.equals(getDirectory().getName())) {
  906. POIFSFileSystem fs = getDirectory().getFileSystem();
  907. if (fs != null) {
  908. fs.close();
  909. }
  910. }
  911. }
  912. @Override
  913. protected String getEncryptedPropertyStreamName() {
  914. return "EncryptedSummary";
  915. }
  916. void writePropertiesImpl() throws IOException {
  917. super.writeProperties();
  918. }
  919. PropertySet getPropertySetImpl(String setName) throws IOException {
  920. return super.getPropertySet(setName);
  921. }
  922. PropertySet getPropertySetImpl(String setName, EncryptionInfo encryptionInfo) throws IOException {
  923. return super.getPropertySet(setName, encryptionInfo);
  924. }
  925. void writePropertiesImpl(POIFSFileSystem outFS, List<String> writtenEntries) throws IOException {
  926. super.writeProperties(outFS, writtenEntries);
  927. }
  928. void validateInPlaceWritePossibleImpl() throws IllegalStateException {
  929. super.validateInPlaceWritePossible();
  930. }
  931. void clearDirectoryImpl() {
  932. super.clearDirectory();
  933. }
  934. boolean initDirectoryImpl() {
  935. return super.initDirectory();
  936. }
  937. void replaceDirectoryImpl(DirectoryNode newDirectory) throws IOException {
  938. super.replaceDirectory(newDirectory);
  939. }
  940. private static class CountingOS extends OutputStream {
  941. int count;
  942. @Override
  943. public void write(int b) throws IOException {
  944. count++;
  945. }
  946. @Override
  947. public void write(byte[] b) throws IOException {
  948. count += b.length;
  949. }
  950. @Override
  951. public void write(byte[] b, int off, int len) throws IOException {
  952. count += len;
  953. }
  954. public int size() {
  955. return count;
  956. }
  957. }
  958. /**
  959. * Assists in creating {@link HSLFPictureData} when parsing a slideshow.
  960. *
  961. * This class is relied upon heavily by {@link #matchPicturesAndRecords(List, EscherContainerRecord)}.
  962. */
  963. static final class PictureFactory {
  964. final byte[] imageData;
  965. private final EscherContainerRecord recordContainer;
  966. private final PictureData.PictureType type;
  967. private final int offset;
  968. private final int signature;
  969. private EscherBSERecord record;
  970. PictureFactory(
  971. EscherContainerRecord recordContainer,
  972. PictureData.PictureType type,
  973. byte[] imageData,
  974. int offset,
  975. int signature
  976. ) {
  977. this.recordContainer = Objects.requireNonNull(recordContainer);
  978. this.type = Objects.requireNonNull(type);
  979. this.imageData = Objects.requireNonNull(imageData);
  980. this.offset = offset;
  981. this.signature = signature;
  982. }
  983. int getOffset() {
  984. return offset;
  985. }
  986. /**
  987. * Constructs a new {@link HSLFPictureData}.
  988. * <p>
  989. * The {@link EscherBSERecord} must have been set via {@link #setRecord(EscherBSERecord)} prior to invocation.
  990. */
  991. HSLFPictureData build() {
  992. Objects.requireNonNull(record, "Can't build an instance until the record has been assigned.");
  993. return HSLFPictureData.createFromSlideshowData(type, recordContainer, record, imageData, signature);
  994. }
  995. /**
  996. * Sets the {@link EscherBSERecord} with which this factory should create a {@link HSLFPictureData}.
  997. */
  998. PictureFactory setRecord(EscherBSERecord bse) {
  999. record = bse;
  1000. return this;
  1001. }
  1002. }
  1003. }