You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

POIDocument.java 18KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497
  1. /* ====================================================================
  2. Licensed to the Apache Software Foundation (ASF) under one or more
  3. contributor license agreements. See the NOTICE file distributed with
  4. this work for additional information regarding copyright ownership.
  5. The ASF licenses this file to You under the Apache License, Version 2.0
  6. (the "License"); you may not use this file except in compliance with
  7. the License. You may obtain a copy of the License at
  8. http://www.apache.org/licenses/LICENSE-2.0
  9. Unless required by applicable law or agreed to in writing, software
  10. distributed under the License is distributed on an "AS IS" BASIS,
  11. WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. See the License for the specific language governing permissions and
  13. limitations under the License.
  14. ==================================================================== */
  15. package org.apache.poi;
  16. import static org.apache.logging.log4j.util.Unbox.box;
  17. import static org.apache.poi.hpsf.PropertySetFactory.newDocumentSummaryInformation;
  18. import java.io.BufferedOutputStream;
  19. import java.io.Closeable;
  20. import java.io.File;
  21. import java.io.FileOutputStream;
  22. import java.io.IOException;
  23. import java.io.InputStream;
  24. import java.io.OutputStream;
  25. import java.security.GeneralSecurityException;
  26. import java.util.List;
  27. import org.apache.commons.io.output.UnsynchronizedByteArrayOutputStream;
  28. import org.apache.logging.log4j.LogManager;
  29. import org.apache.logging.log4j.Logger;
  30. import org.apache.poi.hpsf.DocumentSummaryInformation;
  31. import org.apache.poi.hpsf.PropertySet;
  32. import org.apache.poi.hpsf.PropertySetFactory;
  33. import org.apache.poi.hpsf.SummaryInformation;
  34. import org.apache.poi.hpsf.WritingNotSupportedException;
  35. import org.apache.poi.poifs.crypt.EncryptionInfo;
  36. import org.apache.poi.poifs.crypt.Encryptor;
  37. import org.apache.poi.poifs.crypt.cryptoapi.CryptoAPIDecryptor;
  38. import org.apache.poi.poifs.crypt.cryptoapi.CryptoAPIEncryptor;
  39. import org.apache.poi.poifs.filesystem.DirectoryNode;
  40. import org.apache.poi.poifs.filesystem.DocumentInputStream;
  41. import org.apache.poi.poifs.filesystem.POIFSFileSystem;
  42. import org.apache.poi.util.IOUtils;
  43. import org.apache.poi.util.Internal;
  44. /**
  45. * This holds the common functionality for all POI
  46. * Document classes.
  47. * Currently, this relates to Document Information Properties
  48. */
  49. public abstract class POIDocument implements Closeable {
  50. /** Holds metadata on our document */
  51. private SummaryInformation sInf;
  52. /** Holds further metadata on our document */
  53. private DocumentSummaryInformation dsInf;
  54. /** The directory that our document lives in */
  55. private DirectoryNode directory;
  56. /** For our own logging use */
  57. private static final Logger LOG = LogManager.getLogger(POIDocument.class);
  58. /* Have the property streams been read yet? (Only done on-demand) */
  59. private boolean initialized;
  60. /**
  61. * Constructs a POIDocument with the given directory node.
  62. *
  63. * @param dir The {@link DirectoryNode} where information is read from.
  64. */
  65. protected POIDocument(DirectoryNode dir) {
  66. this.directory = dir;
  67. }
  68. /**
  69. * Constructs from the default POIFS
  70. *
  71. * @param fs the filesystem the document is read from
  72. */
  73. protected POIDocument(POIFSFileSystem fs) {
  74. this(fs.getRoot());
  75. }
  76. /**
  77. * Fetch the Document Summary Information of the document
  78. *
  79. * @return The Document Summary Information or null
  80. * if it could not be read for this document.
  81. */
  82. public DocumentSummaryInformation getDocumentSummaryInformation() {
  83. if(!initialized) {
  84. readProperties();
  85. }
  86. return dsInf;
  87. }
  88. /**
  89. * Fetch the Summary Information of the document
  90. *
  91. * @return The Summary information for the document or null
  92. * if it could not be read for this document.
  93. */
  94. public SummaryInformation getSummaryInformation() {
  95. if(!initialized) {
  96. readProperties();
  97. }
  98. return sInf;
  99. }
  100. /**
  101. * Will create whichever of SummaryInformation
  102. * and DocumentSummaryInformation (HPSF) properties
  103. * are not already part of your document.
  104. * This is normally useful when creating a new
  105. * document from scratch.
  106. * If the information properties are already there,
  107. * then nothing will happen.
  108. */
  109. public void createInformationProperties() {
  110. if (!initialized) {
  111. readProperties();
  112. }
  113. if (sInf == null) {
  114. sInf = PropertySetFactory.newSummaryInformation();
  115. }
  116. if (dsInf == null) {
  117. dsInf = newDocumentSummaryInformation();
  118. }
  119. }
  120. /**
  121. * Find, and create objects for, the standard
  122. * Document Information Properties (HPSF).
  123. * If a given property set is missing or corrupt,
  124. * it will remain null;
  125. */
  126. @Internal
  127. public void readProperties() {
  128. if (initialized) {
  129. return;
  130. }
  131. DocumentSummaryInformation dsi = readPropertySet(DocumentSummaryInformation.class, DocumentSummaryInformation.DEFAULT_STREAM_NAME);
  132. if (dsi != null) {
  133. dsInf = dsi;
  134. }
  135. SummaryInformation si = readPropertySet(SummaryInformation.class, SummaryInformation.DEFAULT_STREAM_NAME);
  136. if (si != null) {
  137. sInf = si;
  138. }
  139. // Mark the fact that we've now loaded up the properties
  140. initialized = true;
  141. }
  142. @SuppressWarnings("unchecked")
  143. private <T> T readPropertySet(Class<T> clazz, String name) {
  144. String localName = clazz.getName().substring(clazz.getName().lastIndexOf('.')+1);
  145. try {
  146. PropertySet ps = getPropertySet(name);
  147. if (clazz.isInstance(ps)) {
  148. return (T)ps;
  149. } else if (ps != null) {
  150. LOG.atWarn().log("{} property set came back with wrong class - {}", localName, ps.getClass().getName());
  151. } else {
  152. LOG.atWarn().log("{} property set came back as null", localName);
  153. }
  154. } catch (IOException e) {
  155. LOG.atError().withThrowable(e).log("can't retrieve property set");
  156. }
  157. return null;
  158. }
  159. /**
  160. * For a given named property entry, either return it or null if
  161. * if it wasn't found
  162. *
  163. * @param setName The property to read
  164. * @return The value of the given property or null if it wasn't found.
  165. *
  166. * @throws IOException If retrieving properties fails
  167. */
  168. @SuppressWarnings("WeakerAccess")
  169. protected PropertySet getPropertySet(String setName) throws IOException {
  170. return getPropertySet(setName, getEncryptionInfo());
  171. }
  172. /**
  173. * For a given named property entry, either return it or null if
  174. * if it wasn't found
  175. *
  176. * @param setName The property to read
  177. * @param encryptionInfo the encryption descriptor in case of cryptoAPI encryption
  178. * @return The value of the given property or null if it wasn't found.
  179. *
  180. * @throws IOException If retrieving properties fails
  181. */
  182. @SuppressWarnings("WeakerAccess")
  183. protected PropertySet getPropertySet(String setName, EncryptionInfo encryptionInfo) throws IOException {
  184. DirectoryNode dirNode = directory;
  185. POIFSFileSystem encPoifs = null;
  186. String step = "getting";
  187. try {
  188. if (encryptionInfo != null && encryptionInfo.isDocPropsEncrypted()) {
  189. step = "getting encrypted";
  190. String encryptedStream = getEncryptedPropertyStreamName();
  191. if (!dirNode.hasEntry(encryptedStream)) {
  192. throw new EncryptedDocumentException("can't find encrypted property stream '"+encryptedStream+"'");
  193. }
  194. CryptoAPIDecryptor dec = (CryptoAPIDecryptor)encryptionInfo.getDecryptor();
  195. encPoifs = dec.getSummaryEntries(dirNode, encryptedStream);
  196. dirNode = encPoifs.getRoot();
  197. }
  198. //directory can be null when creating new documents
  199. if (dirNode == null || !dirNode.hasEntry(setName)) {
  200. return null;
  201. }
  202. // Find the entry, and get an input stream for it
  203. step = "getting";
  204. try (DocumentInputStream dis = dirNode.createDocumentInputStream(dirNode.getEntry(setName))) {
  205. // Create the Property Set
  206. step = "creating";
  207. return PropertySetFactory.create(dis);
  208. }
  209. } catch (IOException e) {
  210. throw e;
  211. } catch (Exception e) {
  212. throw new IOException("Error "+step+" property set with name " + setName, e);
  213. } finally {
  214. IOUtils.closeQuietly(encPoifs);
  215. }
  216. }
  217. /**
  218. * Writes out the updated standard Document Information Properties (HPSF)
  219. * into the currently open POIFSFileSystem
  220. *
  221. * @throws IOException if an error when writing to the open
  222. * {@link POIFSFileSystem} occurs
  223. */
  224. protected void writeProperties() throws IOException {
  225. validateInPlaceWritePossible();
  226. writeProperties(directory.getFileSystem(), null);
  227. }
  228. /**
  229. * Writes out the standard Document Information Properties (HPSF)
  230. * @param outFS the POIFSFileSystem to write the properties into
  231. *
  232. * @throws IOException if an error when writing to the
  233. * {@link POIFSFileSystem} occurs
  234. */
  235. @Internal
  236. public void writeProperties(POIFSFileSystem outFS) throws IOException {
  237. writeProperties(outFS, null);
  238. }
  239. /**
  240. * Writes out the standard Document Information Properties (HPSF)
  241. * @param outFS the {@link POIFSFileSystem} to write the properties into
  242. * @param writtenEntries a list of POIFS entries to add the property names too
  243. *
  244. * @throws IOException if an error when writing to the
  245. * {@link POIFSFileSystem} occurs
  246. */
  247. protected void writeProperties(POIFSFileSystem outFS, List<String> writtenEntries) throws IOException {
  248. final EncryptionInfo ei = getEncryptionInfo();
  249. Encryptor encGen = (ei == null) ? null : ei.getEncryptor();
  250. final boolean encryptProps = (ei != null && ei.isDocPropsEncrypted() && encGen instanceof CryptoAPIEncryptor);
  251. try (POIFSFileSystem tmpFS = new POIFSFileSystem()) {
  252. final POIFSFileSystem fs = (encryptProps) ? tmpFS : outFS;
  253. writePropertySet(SummaryInformation.DEFAULT_STREAM_NAME, getSummaryInformation(), fs, writtenEntries);
  254. writePropertySet(DocumentSummaryInformation.DEFAULT_STREAM_NAME, getDocumentSummaryInformation(), fs, writtenEntries);
  255. if (!encryptProps) {
  256. return;
  257. }
  258. // Only CryptoAPI encryption supports encrypted property sets
  259. // create empty document summary
  260. writePropertySet(DocumentSummaryInformation.DEFAULT_STREAM_NAME, newDocumentSummaryInformation(), outFS);
  261. // remove summary, if previously available
  262. if (outFS.getRoot().hasEntry(SummaryInformation.DEFAULT_STREAM_NAME)) {
  263. outFS.getRoot().getEntry(SummaryInformation.DEFAULT_STREAM_NAME).delete();
  264. }
  265. CryptoAPIEncryptor enc = (CryptoAPIEncryptor) encGen;
  266. try {
  267. enc.setSummaryEntries(outFS.getRoot(), getEncryptedPropertyStreamName(), fs);
  268. } catch (GeneralSecurityException e) {
  269. throw new IOException(e);
  270. }
  271. }
  272. }
  273. private void writePropertySet(String name, PropertySet ps, POIFSFileSystem outFS, List<String> writtenEntries)
  274. throws IOException {
  275. if (ps == null) {
  276. return;
  277. }
  278. writePropertySet(name, ps, outFS);
  279. if (writtenEntries != null) {
  280. writtenEntries.add(name);
  281. }
  282. }
  283. /**
  284. * Writes out a given PropertySet
  285. *
  286. * @param name the (POIFS Level) name of the property to write
  287. * @param set the PropertySet to write out
  288. * @param outFS the {@link POIFSFileSystem} to write the property into
  289. *
  290. * @throws IOException if an error when writing to the
  291. * {@link POIFSFileSystem} occurs
  292. */
  293. private void writePropertySet(String name, PropertySet set, POIFSFileSystem outFS) throws IOException {
  294. try (UnsynchronizedByteArrayOutputStream bOut = new UnsynchronizedByteArrayOutputStream()) {
  295. PropertySet mSet = new PropertySet(set);
  296. mSet.write(bOut);
  297. try (InputStream bIn = bOut.toInputStream()) {
  298. // Create or Update the Property Set stream in the POIFS
  299. outFS.createOrUpdateDocument(bIn, name);
  300. }
  301. LOG.atInfo().log("Wrote property set {} of size {}", name, box(bOut.size()));
  302. } catch(WritingNotSupportedException ignored) {
  303. LOG.atError().log("Couldn't write property set with name {} as not supported by HPSF yet", name);
  304. }
  305. }
  306. /**
  307. * Called during a {@link #write()} to ensure that the Document (and
  308. * associated {@link POIFSFileSystem}) was opened in a way compatible
  309. * with an in-place write.
  310. *
  311. * @throws IllegalStateException if the document was opened suitably
  312. */
  313. protected void validateInPlaceWritePossible() throws IllegalStateException {
  314. if (directory == null) {
  315. throw new IllegalStateException("Newly created Document, cannot save in-place");
  316. }
  317. if (directory.getParent() != null) {
  318. throw new IllegalStateException("This is not the root Document, cannot save embedded resource in-place");
  319. }
  320. if (directory.getFileSystem() == null ||
  321. !directory.getFileSystem().isInPlaceWriteable()) {
  322. throw new IllegalStateException("Opened read-only or via an InputStream, a Writeable File is required");
  323. }
  324. }
  325. /**
  326. * Writes the document out to the currently open {@link File}, via the
  327. * writeable {@link POIFSFileSystem} it was opened from.
  328. *
  329. * <p>This will fail (with an {@link IllegalStateException} if the
  330. * document was opened read-only, opened from an {@link InputStream}
  331. * instead of a File, or if this is not the root document. For those cases,
  332. * you must use {@link #write(OutputStream)} or {@link #write(File)} to
  333. * write to a brand new document.
  334. *
  335. * @since POI 3.15 beta 3
  336. *
  337. * @throws IOException thrown on errors writing to the file
  338. * @throws IllegalStateException if this isn't from a writable File
  339. */
  340. public abstract void write() throws IOException;
  341. /**
  342. * Writes the document out to the specified new {@link File}. If the file
  343. * exists, it will be replaced, otherwise a new one will be created
  344. *
  345. * @since POI 3.15 beta 3
  346. *
  347. * @param newFile The new File to write to.
  348. *
  349. * @throws IOException thrown on errors writing to the file
  350. */
  351. public abstract void write(File newFile) throws IOException;
  352. /**
  353. * Writes the document out to the specified output stream. The
  354. * stream is not closed as part of this operation.
  355. *
  356. * Note - if the Document was opened from a {@link File} rather
  357. * than an {@link InputStream}, you <b>must</b> write out using
  358. * {@link #write()} or to a different File. Overwriting the currently
  359. * open file via an OutputStream isn't possible.
  360. *
  361. * If {@code stream} is a {@link FileOutputStream} on a networked drive
  362. * or has a high cost/latency associated with each written byte,
  363. * consider wrapping the OutputStream in a {@link BufferedOutputStream}
  364. * to improve write performance, or use {@link #write()} / {@link #write(File)}
  365. * if possible.
  366. *
  367. * @param out The stream to write to.
  368. *
  369. * @throws IOException thrown on errors writing to the stream
  370. */
  371. public abstract void write(OutputStream out) throws IOException;
  372. /**
  373. * Closes the underlying {@link POIFSFileSystem} from which
  374. * the document was read, if any. Has no effect on documents
  375. * opened from an InputStream, or newly created ones.<p>
  376. *
  377. * Once {@code close()} has been called, no further operations
  378. * should be called on the document.
  379. */
  380. @Override
  381. public void close() throws IOException {
  382. if (directory != null) {
  383. if (directory.getFileSystem() != null) {
  384. directory.getFileSystem().close();
  385. clearDirectory();
  386. }
  387. }
  388. }
  389. @Internal
  390. public DirectoryNode getDirectory() {
  391. return directory;
  392. }
  393. /**
  394. * Clear/unlink the attached directory entry
  395. */
  396. @Internal
  397. protected void clearDirectory() {
  398. directory = null;
  399. }
  400. /**
  401. * check if we were created by POIFS otherwise create a new dummy POIFS
  402. * for storing the package data
  403. *
  404. * @return {@code true} if dummy directory was created, {@code false} otherwise
  405. */
  406. @SuppressWarnings("resource")
  407. @Internal
  408. protected boolean initDirectory() {
  409. if (directory == null) {
  410. directory = new POIFSFileSystem().getRoot(); // NOSONAR
  411. return true;
  412. }
  413. return false;
  414. }
  415. /**
  416. * Replaces the attached directory, e.g. if this document is written
  417. * to a new POIFSFileSystem
  418. *
  419. * @param newDirectory the new directory
  420. */
  421. @Internal
  422. protected void replaceDirectory(DirectoryNode newDirectory) throws IOException {
  423. if (
  424. // do not close if it is actually the same directory or
  425. newDirectory == directory ||
  426. // also for different directories, but same FileSystem
  427. (newDirectory != null && directory != null && newDirectory.getFileSystem() == directory.getFileSystem())) {
  428. return;
  429. }
  430. // close any previous opened DataSource
  431. if (directory != null && directory.getFileSystem() != null) {
  432. directory.getFileSystem().close();
  433. }
  434. directory = newDirectory;
  435. }
  436. /**
  437. * @return the stream name of the property set collection, if the document is encrypted
  438. */
  439. protected String getEncryptedPropertyStreamName() {
  440. return "encryption";
  441. }
  442. /**
  443. * @return the encryption info if the document is encrypted, otherwise {@code null}
  444. *
  445. * @throws IOException If retrieving the encryption information fails
  446. */
  447. public EncryptionInfo getEncryptionInfo() throws IOException {
  448. return null;
  449. }
  450. }