Vous ne pouvez pas sélectionner plus de 25 sujets Les noms de sujets doivent commencer par une lettre ou un nombre, peuvent contenir des tirets ('-') et peuvent comporter jusqu'à 35 caractères.

HSLFSlideShowImpl.java 46KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137
  1. /* ====================================================================
  2. Licensed to the Apache Software Foundation (ASF) under one or more
  3. contributor license agreements. See the NOTICE file distributed with
  4. this work for additional information regarding copyright ownership.
  5. The ASF licenses this file to You under the Apache License, Version 2.0
  6. (the "License"); you may not use this file except in compliance with
  7. the License. You may obtain a copy of the License at
  8. http://www.apache.org/licenses/LICENSE-2.0
  9. Unless required by applicable law or agreed to in writing, software
  10. distributed under the License is distributed on an "AS IS" BASIS,
  11. WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. See the License for the specific language governing permissions and
  13. limitations under the License.
  14. ==================================================================== */
  15. package org.apache.poi.hslf.usermodel;
  16. import static org.apache.logging.log4j.util.Unbox.box;
  17. import static org.apache.poi.hslf.usermodel.HSLFSlideShow.POWERPOINT_DOCUMENT;
  18. import static org.apache.poi.hslf.usermodel.HSLFSlideShow.PP95_DOCUMENT;
  19. import static org.apache.poi.hslf.usermodel.HSLFSlideShow.PP97_DOCUMENT;
  20. import java.io.ByteArrayInputStream;
  21. import java.io.Closeable;
  22. import java.io.File;
  23. import java.io.IOException;
  24. import java.io.InputStream;
  25. import java.io.OutputStream;
  26. import java.io.SequenceInputStream;
  27. import java.util.ArrayList;
  28. import java.util.Arrays;
  29. import java.util.Collection;
  30. import java.util.Collections;
  31. import java.util.Comparator;
  32. import java.util.Enumeration;
  33. import java.util.HashMap;
  34. import java.util.Iterator;
  35. import java.util.LinkedList;
  36. import java.util.List;
  37. import java.util.Map;
  38. import java.util.NavigableMap;
  39. import java.util.Objects;
  40. import java.util.TreeMap;
  41. import java.util.stream.Collectors;
  42. import org.apache.commons.collections4.IteratorUtils;
  43. import org.apache.commons.io.output.UnsynchronizedByteArrayOutputStream;
  44. import org.apache.logging.log4j.LogManager;
  45. import org.apache.logging.log4j.Logger;
  46. import org.apache.poi.POIDocument;
  47. import org.apache.poi.ddf.EscherBSERecord;
  48. import org.apache.poi.ddf.EscherContainerRecord;
  49. import org.apache.poi.ddf.EscherOptRecord;
  50. import org.apache.poi.ddf.EscherRecord;
  51. import org.apache.poi.hpsf.PropertySet;
  52. import org.apache.poi.hslf.exceptions.CorruptPowerPointFileException;
  53. import org.apache.poi.hslf.exceptions.HSLFException;
  54. import org.apache.poi.hslf.exceptions.OldPowerPointFormatException;
  55. import org.apache.poi.hslf.record.CurrentUserAtom;
  56. import org.apache.poi.hslf.record.Document;
  57. import org.apache.poi.hslf.record.DocumentEncryptionAtom;
  58. import org.apache.poi.hslf.record.ExOleObjStg;
  59. import org.apache.poi.hslf.record.PersistPtrHolder;
  60. import org.apache.poi.hslf.record.PersistRecord;
  61. import org.apache.poi.hslf.record.PositionDependentRecord;
  62. import org.apache.poi.hslf.record.Record;
  63. import org.apache.poi.hslf.record.RecordTypes;
  64. import org.apache.poi.hslf.record.UserEditAtom;
  65. import org.apache.poi.poifs.crypt.EncryptionInfo;
  66. import org.apache.poi.poifs.filesystem.DirectoryNode;
  67. import org.apache.poi.poifs.filesystem.DocumentEntry;
  68. import org.apache.poi.poifs.filesystem.DocumentInputStream;
  69. import org.apache.poi.poifs.filesystem.EntryUtils;
  70. import org.apache.poi.poifs.filesystem.POIFSFileSystem;
  71. import org.apache.poi.sl.usermodel.PictureData;
  72. import org.apache.poi.sl.usermodel.PictureData.PictureType;
  73. import org.apache.poi.util.IOUtils;
  74. import org.apache.poi.util.LittleEndian;
  75. import org.apache.poi.util.LittleEndianConsts;
  76. /**
  77. * This class contains the main functionality for the Powerpoint file
  78. * "reader". It is only a very basic class for now
  79. */
  80. public final class HSLFSlideShowImpl extends POIDocument implements Closeable {
  81. private static final Logger LOG = LogManager.getLogger(HSLFSlideShowImpl.class);
  82. static final int UNSET_OFFSET = -1;
  83. //arbitrarily selected; may need to increase
  84. private static final int DEFAULT_MAX_RECORD_LENGTH = 200_000_000;
  85. private static final int MAX_DOCUMENT_SIZE = 100_000_000;
  86. private static int MAX_RECORD_LENGTH = DEFAULT_MAX_RECORD_LENGTH;
  87. // Holds metadata on where things are in our document
  88. private CurrentUserAtom currentUser;
  89. // Low level contents of the file
  90. private byte[] _docstream;
  91. // Low level contents
  92. private Record[] _records;
  93. // Raw Pictures contained in the pictures stream
  94. private List<HSLFPictureData> _pictures;
  95. // Embedded objects stored in storage records in the document stream, lazily populated.
  96. private HSLFObjectData[] _objects;
  97. /**
  98. * @param length the max record length allowed for HSLFSlideShowImpl
  99. */
  100. public static void setMaxRecordLength(int length) {
  101. MAX_RECORD_LENGTH = length;
  102. }
  103. /**
  104. * @return the max record length allowed for HSLFSlideShowImpl
  105. */
  106. public static int getMaxRecordLength() {
  107. return MAX_RECORD_LENGTH;
  108. }
  109. /**
  110. * Constructs a Powerpoint document from fileName. Parses the document
  111. * and places all the important stuff into data structures.
  112. *
  113. * @param fileName The name of the file to read.
  114. * @throws IOException if there is a problem while parsing the document.
  115. */
  116. @SuppressWarnings("resource")
  117. public HSLFSlideShowImpl(String fileName) throws IOException {
  118. this(new POIFSFileSystem(new File(fileName)));
  119. }
  120. /**
  121. * Constructs a Powerpoint document from an input stream. Parses the
  122. * document and places all the important stuff into data structures.
  123. *
  124. * @param inputStream the source of the data
  125. * @throws IOException if there is a problem while parsing the document.
  126. */
  127. @SuppressWarnings("resource")
  128. public HSLFSlideShowImpl(InputStream inputStream) throws IOException {
  129. //do Ole stuff
  130. this(new POIFSFileSystem(inputStream));
  131. }
  132. /**
  133. * Constructs a Powerpoint document from a POIFS Filesystem. Parses the
  134. * document and places all the important stuff into data structures.
  135. *
  136. * @param filesystem the POIFS FileSystem to read from
  137. * @throws IOException if there is a problem while parsing the document.
  138. */
  139. public HSLFSlideShowImpl(POIFSFileSystem filesystem) throws IOException {
  140. this(filesystem.getRoot());
  141. }
  142. /**
  143. * Constructs a Powerpoint document from a specific point in a
  144. * POIFS Filesystem. Parses the document and places all the
  145. * important stuff into data structures.
  146. *
  147. * @param dir the POIFS directory to read from
  148. * @throws IOException if there is a problem while parsing the document.
  149. */
  150. public HSLFSlideShowImpl(DirectoryNode dir) throws IOException {
  151. super(handleDualStorage(dir));
  152. try {
  153. // First up, grab the "Current User" stream
  154. // We need this before we can detect Encrypted Documents
  155. readCurrentUserStream();
  156. // Next up, grab the data that makes up the
  157. // PowerPoint stream
  158. readPowerPointStream();
  159. // Now, build records based on the PowerPoint stream
  160. buildRecords();
  161. // Look for any other streams
  162. readOtherStreams();
  163. } catch (RuntimeException | IOException e) {
  164. // clean up the filesystem when we cannot read it here to avoid
  165. // leaking file handles
  166. dir.getFileSystem().close();
  167. throw e;
  168. }
  169. }
  170. private static DirectoryNode handleDualStorage(DirectoryNode dir) throws IOException {
  171. // when there's a dual storage entry, use it, as the outer document can't be read quite probably ...
  172. if (!dir.hasEntry(PP97_DOCUMENT)) {
  173. return dir;
  174. }
  175. return (DirectoryNode) dir.getEntry(PP97_DOCUMENT);
  176. }
  177. /**
  178. * Constructs a new, empty, Powerpoint document.
  179. */
  180. public static HSLFSlideShowImpl create() {
  181. try (InputStream is = HSLFSlideShowImpl.class.getResourceAsStream("/org/apache/poi/hslf/data/empty.ppt")) {
  182. if (is == null) {
  183. throw new HSLFException("Missing resource 'empty.ppt'");
  184. }
  185. return new HSLFSlideShowImpl(is);
  186. } catch (IOException e) {
  187. throw new HSLFException(e);
  188. }
  189. }
  190. /**
  191. * Extracts the main PowerPoint document stream from the
  192. * POI file, ready to be passed
  193. *
  194. * @throws IOException when the powerpoint can't be read
  195. */
  196. private void readPowerPointStream() throws IOException {
  197. final DirectoryNode dir = getDirectory();
  198. if (!dir.hasEntry(POWERPOINT_DOCUMENT) && dir.hasEntry(PP95_DOCUMENT)) {
  199. throw new OldPowerPointFormatException("You seem to have supplied a PowerPoint95 file, which isn't supported");
  200. }
  201. // Get the main document stream
  202. DocumentEntry docProps = (DocumentEntry)dir.getEntry(POWERPOINT_DOCUMENT);
  203. // Grab the document stream
  204. int len = docProps.getSize();
  205. try (InputStream is = dir.createDocumentInputStream(docProps)) {
  206. _docstream = IOUtils.toByteArray(is, len, MAX_DOCUMENT_SIZE);
  207. }
  208. }
  209. /**
  210. * Builds the list of records, based on the contents
  211. * of the PowerPoint stream
  212. */
  213. private void buildRecords() throws IOException {
  214. // The format of records in a powerpoint file are:
  215. // <little endian 2 byte "info">
  216. // <little endian 2 byte "type">
  217. // <little endian 4 byte "length">
  218. // If it has a zero length, following it will be another record
  219. // <xx xx yy yy 00 00 00 00> <xx xx yy yy zz zz zz zz>
  220. // If it has a length, depending on its type it may have children or data
  221. // If it has children, these will follow straight away
  222. // <xx xx yy yy zz zz zz zz <xx xx yy yy zz zz zz zz>>
  223. // If it has data, this will come straigh after, and run for the length
  224. // <xx xx yy yy zz zz zz zz dd dd dd dd dd dd dd>
  225. // All lengths given exclude the 8 byte record header
  226. // (Data records are known as Atoms)
  227. // Document should start with:
  228. // 0F 00 E8 03 ## ## ## ##
  229. // (type 1000 = document, info 00 0f is normal, rest is document length)
  230. // 01 00 E9 03 28 00 00 00
  231. // (type 1001 = document atom, info 00 01 normal, 28 bytes long)
  232. // 80 16 00 00 E0 10 00 00 xx xx xx xx xx xx xx xx
  233. // 05 00 00 00 0A 00 00 00 xx xx xx
  234. // (the contents of the document atom, not sure what it means yet)
  235. // (records then follow)
  236. // When parsing a document, look to see if you know about that type
  237. // of the current record. If you know it's a type that has children,
  238. // process the record's data area looking for more records
  239. // If you know about the type and it doesn't have children, either do
  240. // something with the data (eg TextRun) or skip over it
  241. // If you don't know about the type, play safe and skip over it (using
  242. // its length to know where the next record will start)
  243. //
  244. _records = read(_docstream, (int) currentUser.getCurrentEditOffset());
  245. }
  246. private Record[] read(byte[] docstream, int usrOffset) throws IOException {
  247. //sort found records by offset.
  248. //(it is not necessary but SlideShow.findMostRecentCoreRecords() expects them sorted)
  249. NavigableMap<Integer, Record> records = new TreeMap<>(); // offset -> record
  250. Map<Integer, Integer> persistIds = new HashMap<>(); // offset -> persistId
  251. initRecordOffsets(docstream, usrOffset, records, persistIds);
  252. HSLFSlideShowEncrypted decryptData = new HSLFSlideShowEncrypted(docstream, records);
  253. for (Map.Entry<Integer, Record> entry : records.entrySet()) {
  254. Integer offset = entry.getKey();
  255. Record record = entry.getValue();
  256. Integer persistId = persistIds.get(offset);
  257. if (record == null) {
  258. // all plain records have been already added,
  259. // only new records need to be decrypted (tbd #35897)
  260. decryptData.decryptRecord(docstream, persistId, offset);
  261. record = Record.buildRecordAtOffset(docstream, offset);
  262. entry.setValue(record);
  263. }
  264. if (record instanceof PersistRecord) {
  265. ((PersistRecord) record).setPersistId(persistId);
  266. }
  267. }
  268. decryptData.close();
  269. return records.values().toArray(new Record[0]);
  270. }
  271. private void initRecordOffsets(byte[] docstream, int usrOffset, NavigableMap<Integer, Record> recordMap, Map<Integer, Integer> offset2id) {
  272. while (usrOffset != 0) {
  273. UserEditAtom usr = (UserEditAtom) Record.buildRecordAtOffset(docstream, usrOffset);
  274. if (usr == null) {
  275. throw new CorruptPowerPointFileException("Powerpoint document contains no user edit atom");
  276. }
  277. recordMap.put(usrOffset, usr);
  278. int psrOffset = usr.getPersistPointersOffset();
  279. Record record = Record.buildRecordAtOffset(docstream, psrOffset);
  280. if (record == null) {
  281. throw new CorruptPowerPointFileException("Powerpoint document is missing a PersistPtrHolder at " + psrOffset);
  282. }
  283. if (!(record instanceof PersistPtrHolder)) {
  284. throw new CorruptPowerPointFileException("Record is not a PersistPtrHolder: " + record + " at " + psrOffset);
  285. }
  286. PersistPtrHolder ptr = (PersistPtrHolder) record;
  287. recordMap.put(psrOffset, ptr);
  288. for (Map.Entry<Integer, Integer> entry : ptr.getSlideLocationsLookup().entrySet()) {
  289. Integer offset = entry.getValue();
  290. Integer id = entry.getKey();
  291. recordMap.put(offset, null); // reserve a slot for the record
  292. offset2id.put(offset, id);
  293. }
  294. usrOffset = usr.getLastUserEditAtomOffset();
  295. // check for corrupted user edit atom and try to repair it
  296. // if the next user edit atom offset is already known, we would go into an endless loop
  297. if (usrOffset > 0 && recordMap.containsKey(usrOffset)) {
  298. // a user edit atom is usually located 36 byte before the smallest known record offset
  299. usrOffset = recordMap.firstKey() - 36;
  300. // check that we really are located on a user edit atom
  301. int ver_inst = LittleEndian.getUShort(docstream, usrOffset);
  302. int type = LittleEndian.getUShort(docstream, usrOffset + 2);
  303. int len = LittleEndian.getInt(docstream, usrOffset + 4);
  304. if (ver_inst == 0 && type == 4085 && (len == 0x1C || len == 0x20)) {
  305. LOG.atWarn().log("Repairing invalid user edit atom");
  306. usr.setLastUserEditAtomOffset(usrOffset);
  307. } else {
  308. throw new CorruptPowerPointFileException("Powerpoint document contains invalid user edit atom");
  309. }
  310. }
  311. }
  312. }
  313. public DocumentEncryptionAtom getDocumentEncryptionAtom() {
  314. for (Record r : _records) {
  315. if (r instanceof DocumentEncryptionAtom) {
  316. return (DocumentEncryptionAtom) r;
  317. }
  318. }
  319. return null;
  320. }
  321. /**
  322. * Find the "Current User" stream, and load it
  323. */
  324. private void readCurrentUserStream() {
  325. try {
  326. currentUser = new CurrentUserAtom(getDirectory());
  327. } catch (IOException ie) {
  328. LOG.atError().withThrowable(ie).log("Error finding Current User Atom");
  329. currentUser = new CurrentUserAtom();
  330. }
  331. }
  332. /**
  333. * Find any other streams from the filesystem, and load them
  334. */
  335. private void readOtherStreams() {
  336. // Currently, there aren't any
  337. }
  338. /**
  339. * Find and read in pictures contained in this presentation.
  340. * This is lazily called as and when we want to touch pictures.
  341. */
  342. private void readPictures() throws IOException {
  343. // if the presentation doesn't contain pictures, will use an empty collection instead
  344. if (!getDirectory().hasEntry("Pictures")) {
  345. _pictures = new ArrayList<>();
  346. return;
  347. }
  348. DocumentEntry entry = (DocumentEntry) getDirectory().getEntry("Pictures");
  349. EscherContainerRecord blipStore = getBlipStore();
  350. byte[] pictstream;
  351. try (DocumentInputStream is = getDirectory().createDocumentInputStream(entry)) {
  352. pictstream = IOUtils.toByteArray(is, entry.getSize());
  353. }
  354. List<PictureFactory> factories = new ArrayList<>();
  355. try (HSLFSlideShowEncrypted decryptData = new HSLFSlideShowEncrypted(getDocumentEncryptionAtom())) {
  356. int pos = 0;
  357. // An empty picture record (length 0) will take up 8 bytes
  358. while (pos <= (pictstream.length - HSLFPictureData.PREAMBLE_SIZE)) {
  359. int offset = pos;
  360. decryptData.decryptPicture(pictstream, offset);
  361. // Image signature
  362. int signature = LittleEndian.getUShort(pictstream, pos);
  363. pos += LittleEndianConsts.SHORT_SIZE;
  364. // Image type + 0xF018
  365. int type = LittleEndian.getUShort(pictstream, pos);
  366. pos += LittleEndianConsts.SHORT_SIZE;
  367. // Image size (excluding the 8 byte header)
  368. int imgsize = LittleEndian.getInt(pictstream, pos);
  369. pos += LittleEndianConsts.INT_SIZE;
  370. // When parsing the BStoreDelay stream, [MS-ODRAW] says that we
  371. // should terminate if the type isn't 0xf007 or 0xf018->0xf117
  372. if (!((type == 0xf007) || (type >= 0xf018 && type <= 0xf117))) {
  373. break;
  374. }
  375. // The image size must be 0 or greater
  376. // (0 is allowed, but odd, since we do wind on by the header each
  377. // time, so we won't get stuck)
  378. if (imgsize < 0) {
  379. throw new CorruptPowerPointFileException("The file contains a picture, at position " + factories.size() + ", which has a negatively sized data length, so we can't trust any of the picture data");
  380. }
  381. // If the type (including the bonus 0xF018) is 0, skip it
  382. PictureType pt = PictureType.forNativeID(type - 0xF018);
  383. if (pt == null) {
  384. LOG.atError().log("Problem reading picture: Invalid image type 0, on picture with length {}.\nYour document will probably become corrupted if you save it! Position: {}", box(imgsize),box(pos));
  385. } else {
  386. //The pictstream can be truncated halfway through a picture.
  387. //This is not a problem if the pictstream contains extra pictures
  388. //that are not used in any slide -- BUG-60305
  389. if (pos + imgsize > pictstream.length) {
  390. LOG.atWarn().log("\"Pictures\" stream may have ended early. In some circumstances, this is not a problem; " +
  391. "in others, this could indicate a corrupt file");
  392. break;
  393. }
  394. // Copy the data, ready to pass to PictureData
  395. byte[] imgdata = IOUtils.safelyClone(pictstream, pos, imgsize, MAX_RECORD_LENGTH);
  396. factories.add(new PictureFactory(blipStore, pt, imgdata, offset, signature));
  397. }
  398. pos += imgsize;
  399. }
  400. }
  401. matchPicturesAndRecords(factories, blipStore);
  402. List<HSLFPictureData> pictures = new ArrayList<>();
  403. for (PictureFactory it : factories) {
  404. try {
  405. HSLFPictureData pict = it.build();
  406. pict.setIndex(pictures.size() + 1); // index is 1-based
  407. pictures.add(pict);
  408. } catch (IllegalArgumentException e) {
  409. LOG.atError().withThrowable(e).log("Problem reading picture. Your document will probably become corrupted if you save it!");
  410. }
  411. }
  412. _pictures = pictures;
  413. }
  414. /**
  415. * Matches all of the {@link PictureFactory PictureFactories} for a slideshow with {@link EscherBSERecord}s in the
  416. * Blip Store for the slideshow.
  417. * <p>
  418. * When reading a slideshow into memory, we have to match the records in the Blip Store with the factories
  419. * representing picture in the pictures stream. This can be difficult, as presentations might have incorrectly
  420. * formatted data. This function attempts to perform matching using multiple heuristics to increase the likelihood
  421. * of finding all pairs, while aiming to reduce the likelihood of associating incorrect pairs.
  422. *
  423. * @param factories Factories for creating {@link HSLFPictureData} out of the pictures stream.
  424. * @param blipStore Blip Store of the presentation being loaded.
  425. */
  426. private static void matchPicturesAndRecords(List<PictureFactory> factories, EscherContainerRecord blipStore) {
  427. // LinkedList because we're sorting and removing.
  428. LinkedList<PictureFactory> unmatchedFactories = new LinkedList<>(factories);
  429. unmatchedFactories.sort(Comparator.comparingInt(PictureFactory::getOffset));
  430. // Arrange records by offset. In the common case of a well-formed slideshow, where every factory has a
  431. // matching record, this is somewhat wasteful, but is necessary to handle the uncommon case where multiple
  432. // records share an offset.
  433. Map<Integer, List<EscherBSERecord>> unmatchedRecords = new HashMap<>();
  434. for (EscherRecord child : blipStore) {
  435. EscherBSERecord record = (EscherBSERecord) child;
  436. unmatchedRecords.computeIfAbsent(record.getOffset(), k -> new ArrayList<>()).add(record);
  437. }
  438. // The first pass through the factories only pairs a factory with a record if we're very confident that they
  439. // are a match. Confidence comes from a perfect match on the offset, and if necessary, the UID. Matched
  440. // factories and records are removed from the unmatched collections.
  441. for (Iterator<PictureFactory> iterator = unmatchedFactories.iterator(); iterator.hasNext(); ) {
  442. PictureFactory factory = iterator.next();
  443. int physicalOffset = factory.getOffset();
  444. List<EscherBSERecord> recordsAtOffset = unmatchedRecords.get(physicalOffset);
  445. if (recordsAtOffset == null || recordsAtOffset.isEmpty()) {
  446. // There are no records that have an offset matching the physical offset in the stream. We'll do
  447. // more complicated and less reliable matching for this factory after all "well known"
  448. // image <-> record pairs have been found.
  449. LOG.atDebug().log("No records with offset {}", box(physicalOffset));
  450. } else if (recordsAtOffset.size() == 1) {
  451. // Only 1 record has the same offset as the target image. Assume these are a pair.
  452. factory.setRecord(recordsAtOffset.get(0));
  453. unmatchedRecords.remove(physicalOffset);
  454. iterator.remove();
  455. } else {
  456. // Multiple records share an offset. Perform additional matching based on UID.
  457. for (int i = 0; i < recordsAtOffset.size(); i++) {
  458. EscherBSERecord record = recordsAtOffset.get(i);
  459. byte[] recordUid = record.getUid();
  460. byte[] imageHeader = Arrays.copyOf(factory.imageData, HSLFPictureData.CHECKSUM_SIZE);
  461. if (Arrays.equals(recordUid, imageHeader)) {
  462. factory.setRecord(record);
  463. recordsAtOffset.remove(i);
  464. iterator.remove();
  465. break;
  466. }
  467. }
  468. }
  469. }
  470. // At this point, any factories remaining didn't have a record with a matching offset. The second pass
  471. // through the factories pairs based on the UID. Factories for which a record with a matching UID cannot be
  472. // found will get a new record.
  473. List<EscherBSERecord> remainingRecords = unmatchedRecords.values()
  474. .stream()
  475. .flatMap(Collection::stream)
  476. .collect(Collectors.toList());
  477. for (PictureFactory factory : unmatchedFactories) {
  478. boolean matched = false;
  479. for (int i = remainingRecords.size() - 1; i >= 0; i--) {
  480. EscherBSERecord record = remainingRecords.get(i);
  481. byte[] recordUid = record.getUid();
  482. byte[] imageHeader = Arrays.copyOf(factory.imageData, HSLFPictureData.CHECKSUM_SIZE);
  483. if (Arrays.equals(recordUid, imageHeader)) {
  484. remainingRecords.remove(i);
  485. factory.setRecord(record);
  486. record.setOffset(factory.getOffset());
  487. matched = true;
  488. }
  489. }
  490. if (!matched) {
  491. // Synthesize a new record
  492. LOG.atDebug().log("No record found for picture at offset {}", box(factory.offset));
  493. EscherBSERecord record = HSLFSlideShow.addNewEscherBseRecord(blipStore, factory.type, factory.imageData, factory.offset);
  494. factory.setRecord(record);
  495. }
  496. }
  497. LOG.atDebug().log("Found {} unmatched records.", box(remainingRecords.size()));
  498. }
  499. /**
  500. * remove duplicated UserEditAtoms and merge PersistPtrHolder, i.e.
  501. * remove document edit history
  502. */
  503. public void normalizeRecords() {
  504. try {
  505. updateAndWriteDependantRecords(null, null);
  506. } catch (IOException e) {
  507. throw new CorruptPowerPointFileException(e);
  508. }
  509. _records = HSLFSlideShowEncrypted.normalizeRecords(_records);
  510. }
  511. /**
  512. * This is a helper functions, which is needed for adding new position dependent records
  513. * or finally write the slideshow to a file.
  514. *
  515. * @param os the stream to write to, if null only the references are updated
  516. * @param interestingRecords a map of interesting records (PersistPtrHolder and UserEditAtom)
  517. * referenced by their RecordType. Only the very last of each type will be saved to the map.
  518. * May be null, if not needed.
  519. */
  520. @SuppressWarnings("WeakerAccess")
  521. public void updateAndWriteDependantRecords(OutputStream os, Map<RecordTypes, PositionDependentRecord> interestingRecords)
  522. throws IOException {
  523. // For position dependent records, hold where they were and now are
  524. // As we go along, update, and hand over, to any Position Dependent
  525. // records we happen across
  526. Map<Integer, Integer> oldToNewPositions = new HashMap<>();
  527. // First pass - figure out where all the position dependent
  528. // records are going to end up, in the new scheme
  529. // (Annoyingly, some powerpoint files have PersistPtrHolders
  530. // that reference slides after the PersistPtrHolder)
  531. UserEditAtom usr = null;
  532. PersistPtrHolder ptr = null;
  533. CountingOS cos = new CountingOS();
  534. for (Record record : _records) {
  535. // all top level records are position dependent
  536. if (!(record instanceof PositionDependentRecord)) {
  537. throw new CorruptPowerPointFileException("Record is not a position dependent record: " + record);
  538. }
  539. PositionDependentRecord pdr = (PositionDependentRecord) record;
  540. int oldPos = pdr.getLastOnDiskOffset();
  541. int newPos = cos.size();
  542. pdr.setLastOnDiskOffset(newPos);
  543. if (oldPos != UNSET_OFFSET) {
  544. // new records don't need a mapping, as they aren't in a relation yet
  545. oldToNewPositions.put(oldPos, newPos);
  546. }
  547. // Grab interesting records as they come past
  548. // this will only save the very last record of each type
  549. RecordTypes saveme = null;
  550. int recordType = (int) record.getRecordType();
  551. if (recordType == RecordTypes.PersistPtrIncrementalBlock.typeID) {
  552. saveme = RecordTypes.PersistPtrIncrementalBlock;
  553. ptr = (PersistPtrHolder) pdr;
  554. } else if (recordType == RecordTypes.UserEditAtom.typeID) {
  555. saveme = RecordTypes.UserEditAtom;
  556. usr = (UserEditAtom) pdr;
  557. }
  558. if (interestingRecords != null && saveme != null) {
  559. interestingRecords.put(saveme, pdr);
  560. }
  561. // Dummy write out, so the position winds on properly
  562. record.writeOut(cos);
  563. }
  564. cos.close();
  565. if (usr == null || ptr == null) {
  566. throw new HSLFException("UserEditAtom or PersistPtr can't be determined.");
  567. }
  568. Map<Integer, Integer> persistIds = new HashMap<>();
  569. for (Map.Entry<Integer, Integer> entry : ptr.getSlideLocationsLookup().entrySet()) {
  570. persistIds.put(oldToNewPositions.get(entry.getValue()), entry.getKey());
  571. }
  572. try (HSLFSlideShowEncrypted encData = new HSLFSlideShowEncrypted(getDocumentEncryptionAtom())) {
  573. for (Record record : _records) {
  574. // We've already figured out their new location, and
  575. // told them that
  576. // Tell them of the positions of the other records though
  577. PositionDependentRecord pdr = (PositionDependentRecord) record;
  578. Integer persistId = persistIds.get(pdr.getLastOnDiskOffset());
  579. if (persistId == null) {
  580. persistId = 0;
  581. }
  582. // For now, we're only handling PositionDependentRecord's that
  583. // happen at the top level.
  584. // In future, we'll need the handle them everywhere, but that's
  585. // a bit trickier
  586. pdr.updateOtherRecordReferences(oldToNewPositions);
  587. // Whatever happens, write out that record tree
  588. if (os != null) {
  589. record.writeOut(encData.encryptRecord(os, persistId, record));
  590. }
  591. }
  592. }
  593. // Update and write out the Current User atom
  594. int oldLastUserEditAtomPos = (int) currentUser.getCurrentEditOffset();
  595. Integer newLastUserEditAtomPos = oldToNewPositions.get(oldLastUserEditAtomPos);
  596. if (newLastUserEditAtomPos == null || usr.getLastOnDiskOffset() != newLastUserEditAtomPos) {
  597. throw new HSLFException("Couldn't find the new location of the last UserEditAtom that used to be at " + oldLastUserEditAtomPos);
  598. }
  599. currentUser.setCurrentEditOffset(usr.getLastOnDiskOffset());
  600. }
  601. /**
  602. * Writes out the slideshow to the currently open file.
  603. * <p>
  604. * This will fail (with an {@link IllegalStateException} if the
  605. * slideshow was opened read-only, opened from an {@link InputStream}
  606. * instead of a File, or if this is not the root document. For those cases,
  607. * you must use {@link #write(OutputStream)} or {@link #write(File)} to
  608. * write to a brand new document.
  609. *
  610. * @throws IOException thrown on errors writing to the file
  611. * @throws IllegalStateException if this isn't from a writable File
  612. * @since POI 3.15 beta 3
  613. */
  614. @Override
  615. public void write() throws IOException {
  616. validateInPlaceWritePossible();
  617. // Write the PowerPoint streams to the current FileSystem
  618. // No need to do anything to other streams, already there!
  619. write(getDirectory().getFileSystem(), false);
  620. // Sync with the File on disk
  621. getDirectory().getFileSystem().writeFilesystem();
  622. }
  623. /**
  624. * Writes out the slideshow file the is represented by an instance
  625. * of this class.
  626. * <p>This will write out only the common OLE2 streams. If you require all
  627. * streams to be written out, use {@link #write(File, boolean)}
  628. * with {@code preserveNodes} set to {@code true}.
  629. *
  630. * @param newFile The File to write to.
  631. * @throws IOException If there is an unexpected IOException from writing to the File
  632. */
  633. @Override
  634. public void write(File newFile) throws IOException {
  635. // Write out, but only the common streams
  636. write(newFile, false);
  637. }
  638. /**
  639. * Writes out the slideshow file the is represented by an instance
  640. * of this class.
  641. * If you require all streams to be written out (eg Marcos, embedded
  642. * documents), then set {@code preserveNodes} set to {@code true}
  643. *
  644. * @param newFile The File to write to.
  645. * @param preserveNodes Should all OLE2 streams be written back out, or only the common ones?
  646. * @throws IOException If there is an unexpected IOException from writing to the File
  647. */
  648. public void write(File newFile, boolean preserveNodes) throws IOException {
  649. // Get a new FileSystem to write into
  650. try (POIFSFileSystem outFS = POIFSFileSystem.create(newFile)) {
  651. // Write into the new FileSystem
  652. write(outFS, preserveNodes);
  653. // Send the POIFSFileSystem object out to the underlying stream
  654. outFS.writeFilesystem();
  655. }
  656. }
  657. /**
  658. * Writes out the slideshow file the is represented by an instance
  659. * of this class.
  660. * <p>This will write out only the common OLE2 streams. If you require all
  661. * streams to be written out, use {@link #write(OutputStream, boolean)}
  662. * with {@code preserveNodes} set to {@code true}.
  663. *
  664. * @param out The OutputStream to write to.
  665. * @throws IOException If there is an unexpected IOException from
  666. * the passed in OutputStream
  667. */
  668. @Override
  669. public void write(OutputStream out) throws IOException {
  670. // Write out, but only the common streams
  671. write(out, false);
  672. }
  673. /**
  674. * Writes out the slideshow file the is represented by an instance
  675. * of this class.
  676. * If you require all streams to be written out (eg Macros, embedded
  677. * documents), then set {@code preserveNodes} set to {@code true}
  678. *
  679. * @param out The OutputStream to write to.
  680. * @param preserveNodes Should all OLE2 streams be written back out, or only the common ones?
  681. * @throws IOException If there is an unexpected IOException from
  682. * the passed in OutputStream
  683. */
  684. public void write(OutputStream out, boolean preserveNodes) throws IOException {
  685. // Get a new FileSystem to write into
  686. try (POIFSFileSystem outFS = new POIFSFileSystem()) {
  687. // Write into the new FileSystem
  688. write(outFS, preserveNodes);
  689. // Send the POIFSFileSystem object out to the underlying stream
  690. outFS.writeFilesystem(out);
  691. }
  692. }
  693. private void write(POIFSFileSystem outFS, boolean copyAllOtherNodes) throws IOException {
  694. // read properties and pictures, with old encryption settings where appropriate
  695. if (_pictures == null) {
  696. readPictures();
  697. }
  698. getDocumentSummaryInformation();
  699. // The list of entries we've written out
  700. final List<String> writtenEntries = new ArrayList<>(1);
  701. // set new encryption settings
  702. try (HSLFSlideShowEncrypted encryptedSS = new HSLFSlideShowEncrypted(getDocumentEncryptionAtom())) {
  703. _records = encryptedSS.updateEncryptionRecord(_records);
  704. // Write out the Property Streams
  705. writeProperties(outFS, writtenEntries);
  706. try (UnsynchronizedByteArrayOutputStream baos = new UnsynchronizedByteArrayOutputStream()) {
  707. // For position dependent records, hold where they were and now are
  708. // As we go along, update, and hand over, to any Position Dependent
  709. // records we happen across
  710. updateAndWriteDependantRecords(baos, null);
  711. // Update our cached copy of the bytes that make up the PPT stream
  712. _docstream = baos.toByteArray();
  713. }
  714. // Write the PPT stream into the POIFS layer
  715. ByteArrayInputStream bais = new ByteArrayInputStream(_docstream);
  716. outFS.createOrUpdateDocument(bais, POWERPOINT_DOCUMENT);
  717. writtenEntries.add(POWERPOINT_DOCUMENT);
  718. currentUser.setEncrypted(encryptedSS.getDocumentEncryptionAtom() != null);
  719. currentUser.writeToFS(outFS);
  720. writtenEntries.add("Current User");
  721. if (!_pictures.isEmpty()) {
  722. Enumeration<InputStream> pictEnum = IteratorUtils.asEnumeration(
  723. _pictures.stream().map(data -> encryptOnePicture(encryptedSS, data)).iterator()
  724. );
  725. try (SequenceInputStream sis = new SequenceInputStream(pictEnum)) {
  726. outFS.createOrUpdateDocument(sis, "Pictures");
  727. writtenEntries.add("Pictures");
  728. } catch (IllegalStateException e) {
  729. throw (IOException)e.getCause();
  730. }
  731. }
  732. }
  733. // If requested, copy over any other streams we spot, eg Macros
  734. if (copyAllOtherNodes) {
  735. EntryUtils.copyNodes(getDirectory().getFileSystem(), outFS, writtenEntries);
  736. }
  737. }
  738. private static InputStream encryptOnePicture(HSLFSlideShowEncrypted encryptedSS, HSLFPictureData data) {
  739. try (UnsynchronizedByteArrayOutputStream baos = new UnsynchronizedByteArrayOutputStream()) {
  740. data.write(baos);
  741. byte[] pictBytes = baos.toByteArray();
  742. encryptedSS.encryptPicture(pictBytes, 0);
  743. return new ByteArrayInputStream(pictBytes);
  744. } catch (IOException e) {
  745. throw new IllegalStateException(e);
  746. }
  747. }
  748. @Override
  749. public EncryptionInfo getEncryptionInfo() {
  750. DocumentEncryptionAtom dea = getDocumentEncryptionAtom();
  751. return (dea != null) ? dea.getEncryptionInfo() : null;
  752. }
  753. /* ******************* adding methods follow ********************* */
  754. /**
  755. * Adds a new root level record, at the end, but before the last
  756. * PersistPtrIncrementalBlock.
  757. */
  758. @SuppressWarnings({"UnusedReturnValue", "WeakerAccess"})
  759. public synchronized int appendRootLevelRecord(Record newRecord) {
  760. int addedAt = -1;
  761. Record[] r = new Record[_records.length + 1];
  762. boolean added = false;
  763. for (int i = (_records.length - 1); i >= 0; i--) {
  764. if (added) {
  765. // Just copy over
  766. r[i] = _records[i];
  767. } else {
  768. r[(i + 1)] = _records[i];
  769. if (_records[i] instanceof PersistPtrHolder) {
  770. r[i] = newRecord;
  771. added = true;
  772. addedAt = i;
  773. }
  774. }
  775. }
  776. _records = r;
  777. return addedAt;
  778. }
  779. /**
  780. * Add a new picture to this presentation.
  781. *
  782. * @return offset of this picture in the Pictures stream
  783. */
  784. public int addPicture(HSLFPictureData img) {
  785. // Process any existing pictures if we haven't yet
  786. if (_pictures == null) {
  787. try {
  788. readPictures();
  789. } catch (IOException e) {
  790. throw new CorruptPowerPointFileException(e.getMessage());
  791. }
  792. }
  793. // Add the new picture in
  794. int offset = 0;
  795. if (!_pictures.isEmpty()) {
  796. HSLFPictureData prev = _pictures.get(_pictures.size() - 1);
  797. offset = prev.getOffset() + prev.getBseSize();
  798. }
  799. img.setIndex(_pictures.size() + 1); // index is 1-based
  800. _pictures.add(img);
  801. return offset;
  802. }
  803. /* ******************* fetching methods follow ********************* */
  804. /**
  805. * Returns an array of all the records found in the slideshow
  806. */
  807. public Record[] getRecords() {
  808. return _records;
  809. }
  810. /**
  811. * Returns an array of the bytes of the file. Only correct after a
  812. * call to open or write - at all other times might be wrong!
  813. */
  814. public byte[] getUnderlyingBytes() {
  815. return _docstream;
  816. }
  817. /**
  818. * Fetch the Current User Atom of the document
  819. */
  820. public CurrentUserAtom getCurrentUserAtom() {
  821. return currentUser;
  822. }
  823. /**
  824. * Return list of pictures contained in this presentation
  825. *
  826. * @return list with the read pictures or an empty list if the
  827. * presentation doesn't contain pictures.
  828. */
  829. public List<HSLFPictureData> getPictureData() {
  830. if (_pictures == null) {
  831. try {
  832. readPictures();
  833. } catch (IOException e) {
  834. throw new CorruptPowerPointFileException(e.getMessage());
  835. }
  836. }
  837. return Collections.unmodifiableList(_pictures);
  838. }
  839. /**
  840. * Gets embedded object data from the slide show.
  841. *
  842. * @return the embedded objects.
  843. */
  844. public HSLFObjectData[] getEmbeddedObjects() {
  845. if (_objects == null) {
  846. List<HSLFObjectData> objects = new ArrayList<>();
  847. for (Record r : _records) {
  848. if (r instanceof ExOleObjStg) {
  849. objects.add(new HSLFObjectData((ExOleObjStg) r));
  850. }
  851. }
  852. _objects = objects.toArray(new HSLFObjectData[0]);
  853. }
  854. return _objects;
  855. }
  856. private EscherContainerRecord getBlipStore() {
  857. Document documentRecord = null;
  858. for (Record record : _records) {
  859. if (record.getRecordType() == RecordTypes.Document.typeID) {
  860. documentRecord = (Document) record;
  861. break;
  862. }
  863. }
  864. if (documentRecord == null) {
  865. throw new CorruptPowerPointFileException("Document record is missing");
  866. }
  867. if (documentRecord.getPPDrawingGroup() == null) {
  868. throw new CorruptPowerPointFileException("Drawing group is missing");
  869. }
  870. EscherContainerRecord blipStore;
  871. EscherContainerRecord dggContainer = documentRecord.getPPDrawingGroup().getDggContainer();
  872. blipStore = HSLFShape.getEscherChild(dggContainer, EscherContainerRecord.BSTORE_CONTAINER);
  873. if (blipStore == null) {
  874. blipStore = new EscherContainerRecord();
  875. blipStore.setRecordId(EscherContainerRecord.BSTORE_CONTAINER);
  876. dggContainer.addChildBefore(blipStore, EscherOptRecord.RECORD_ID);
  877. }
  878. return blipStore;
  879. }
  880. @Override
  881. public void close() throws IOException {
  882. // only close the filesystem, if we are based on the root node.
  883. // embedded documents/slideshows shouldn't close the parent container
  884. if (getDirectory().getParent() == null ||
  885. PP97_DOCUMENT.equals(getDirectory().getName())) {
  886. POIFSFileSystem fs = getDirectory().getFileSystem();
  887. if (fs != null) {
  888. fs.close();
  889. }
  890. }
  891. }
  892. @Override
  893. protected String getEncryptedPropertyStreamName() {
  894. return "EncryptedSummary";
  895. }
  896. void writePropertiesImpl() throws IOException {
  897. super.writeProperties();
  898. }
  899. PropertySet getPropertySetImpl(String setName) throws IOException {
  900. return super.getPropertySet(setName);
  901. }
  902. PropertySet getPropertySetImpl(String setName, EncryptionInfo encryptionInfo) throws IOException {
  903. return super.getPropertySet(setName, encryptionInfo);
  904. }
  905. void writePropertiesImpl(POIFSFileSystem outFS, List<String> writtenEntries) throws IOException {
  906. super.writeProperties(outFS, writtenEntries);
  907. }
  908. void validateInPlaceWritePossibleImpl() throws IllegalStateException {
  909. super.validateInPlaceWritePossible();
  910. }
  911. void clearDirectoryImpl() {
  912. super.clearDirectory();
  913. }
  914. boolean initDirectoryImpl() {
  915. return super.initDirectory();
  916. }
  917. void replaceDirectoryImpl(DirectoryNode newDirectory) throws IOException {
  918. super.replaceDirectory(newDirectory);
  919. }
  920. private static class CountingOS extends OutputStream {
  921. int count;
  922. @Override
  923. public void write(int b) throws IOException {
  924. count++;
  925. }
  926. @Override
  927. public void write(byte[] b) throws IOException {
  928. count += b.length;
  929. }
  930. @Override
  931. public void write(byte[] b, int off, int len) throws IOException {
  932. count += len;
  933. }
  934. public int size() {
  935. return count;
  936. }
  937. }
  938. /**
  939. * Assists in creating {@link HSLFPictureData} when parsing a slideshow.
  940. *
  941. * This class is relied upon heavily by {@link #matchPicturesAndRecords(List, EscherContainerRecord)}.
  942. */
  943. static final class PictureFactory {
  944. final byte[] imageData;
  945. private final EscherContainerRecord recordContainer;
  946. private final PictureData.PictureType type;
  947. private final int offset;
  948. private final int signature;
  949. private EscherBSERecord record;
  950. PictureFactory(
  951. EscherContainerRecord recordContainer,
  952. PictureData.PictureType type,
  953. byte[] imageData,
  954. int offset,
  955. int signature
  956. ) {
  957. this.recordContainer = Objects.requireNonNull(recordContainer);
  958. this.type = Objects.requireNonNull(type);
  959. this.imageData = Objects.requireNonNull(imageData);
  960. this.offset = offset;
  961. this.signature = signature;
  962. }
  963. int getOffset() {
  964. return offset;
  965. }
  966. /**
  967. * Constructs a new {@link HSLFPictureData}.
  968. * <p>
  969. * The {@link EscherBSERecord} must have been set via {@link #setRecord(EscherBSERecord)} prior to invocation.
  970. */
  971. HSLFPictureData build() {
  972. Objects.requireNonNull(record, "Can't build an instance until the record has been assigned.");
  973. return HSLFPictureData.createFromSlideshowData(type, recordContainer, record, imageData, signature);
  974. }
  975. /**
  976. * Sets the {@link EscherBSERecord} with which this factory should create a {@link HSLFPictureData}.
  977. */
  978. PictureFactory setRecord(EscherBSERecord bse) {
  979. record = bse;
  980. return this;
  981. }
  982. }
  983. }