You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

POIDocument.java 18KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502
  1. /* ====================================================================
  2. Licensed to the Apache Software Foundation (ASF) under one or more
  3. contributor license agreements. See the NOTICE file distributed with
  4. this work for additional information regarding copyright ownership.
  5. The ASF licenses this file to You under the Apache License, Version 2.0
  6. (the "License"); you may not use this file except in compliance with
  7. the License. You may obtain a copy of the License at
  8. http://www.apache.org/licenses/LICENSE-2.0
  9. Unless required by applicable law or agreed to in writing, software
  10. distributed under the License is distributed on an "AS IS" BASIS,
  11. WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. See the License for the specific language governing permissions and
  13. limitations under the License.
  14. ==================================================================== */
  15. package org.apache.poi;
  16. import static org.apache.logging.log4j.util.Unbox.box;
  17. import static org.apache.poi.hpsf.PropertySetFactory.newDocumentSummaryInformation;
  18. import java.io.BufferedOutputStream;
  19. import java.io.ByteArrayInputStream;
  20. import java.io.ByteArrayOutputStream;
  21. import java.io.Closeable;
  22. import java.io.File;
  23. import java.io.FileOutputStream;
  24. import java.io.IOException;
  25. import java.io.InputStream;
  26. import java.io.OutputStream;
  27. import java.security.GeneralSecurityException;
  28. import java.util.List;
  29. import org.apache.logging.log4j.LogManager;
  30. import org.apache.logging.log4j.Logger;
  31. import org.apache.poi.hpsf.DocumentSummaryInformation;
  32. import org.apache.poi.hpsf.PropertySet;
  33. import org.apache.poi.hpsf.PropertySetFactory;
  34. import org.apache.poi.hpsf.SummaryInformation;
  35. import org.apache.poi.hpsf.WritingNotSupportedException;
  36. import org.apache.poi.poifs.crypt.EncryptionInfo;
  37. import org.apache.poi.poifs.crypt.Encryptor;
  38. import org.apache.poi.poifs.crypt.cryptoapi.CryptoAPIDecryptor;
  39. import org.apache.poi.poifs.crypt.cryptoapi.CryptoAPIEncryptor;
  40. import org.apache.poi.poifs.filesystem.DirectoryNode;
  41. import org.apache.poi.poifs.filesystem.DocumentInputStream;
  42. import org.apache.poi.poifs.filesystem.POIFSFileSystem;
  43. import org.apache.poi.util.IOUtils;
  44. import org.apache.poi.util.Internal;
  45. /**
  46. * This holds the common functionality for all POI
  47. * Document classes.
  48. * Currently, this relates to Document Information Properties
  49. */
  50. public abstract class POIDocument implements Closeable {
  51. /** Holds metadata on our document */
  52. private SummaryInformation sInf;
  53. /** Holds further metadata on our document */
  54. private DocumentSummaryInformation dsInf;
  55. /** The directory that our document lives in */
  56. private DirectoryNode directory;
  57. /** For our own logging use */
  58. private static final Logger LOG = LogManager.getLogger(POIDocument.class);
  59. /* Have the property streams been read yet? (Only done on-demand) */
  60. private boolean initialized;
  61. /**
  62. * Constructs a POIDocument with the given directory node.
  63. *
  64. * @param dir The {@link DirectoryNode} where information is read from.
  65. */
  66. protected POIDocument(DirectoryNode dir) {
  67. this.directory = dir;
  68. }
  69. /**
  70. * Constructs from the default POIFS
  71. *
  72. * @param fs the filesystem the document is read from
  73. */
  74. protected POIDocument(POIFSFileSystem fs) {
  75. this(fs.getRoot());
  76. }
  77. /**
  78. * Fetch the Document Summary Information of the document
  79. *
  80. * @return The Document Summary Information or null
  81. * if it could not be read for this document.
  82. */
  83. public DocumentSummaryInformation getDocumentSummaryInformation() {
  84. if(!initialized) {
  85. readProperties();
  86. }
  87. return dsInf;
  88. }
  89. /**
  90. * Fetch the Summary Information of the document
  91. *
  92. * @return The Summary information for the document or null
  93. * if it could not be read for this document.
  94. */
  95. public SummaryInformation getSummaryInformation() {
  96. if(!initialized) {
  97. readProperties();
  98. }
  99. return sInf;
  100. }
  101. /**
  102. * Will create whichever of SummaryInformation
  103. * and DocumentSummaryInformation (HPSF) properties
  104. * are not already part of your document.
  105. * This is normally useful when creating a new
  106. * document from scratch.
  107. * If the information properties are already there,
  108. * then nothing will happen.
  109. */
  110. public void createInformationProperties() {
  111. if (!initialized) {
  112. readProperties();
  113. }
  114. if (sInf == null) {
  115. sInf = PropertySetFactory.newSummaryInformation();
  116. }
  117. if (dsInf == null) {
  118. dsInf = newDocumentSummaryInformation();
  119. }
  120. }
  121. /**
  122. * Find, and create objects for, the standard
  123. * Document Information Properties (HPSF).
  124. * If a given property set is missing or corrupt,
  125. * it will remain null;
  126. */
  127. @Internal
  128. public void readProperties() {
  129. if (initialized) {
  130. return;
  131. }
  132. DocumentSummaryInformation dsi = readPropertySet(DocumentSummaryInformation.class, DocumentSummaryInformation.DEFAULT_STREAM_NAME);
  133. if (dsi != null) {
  134. dsInf = dsi;
  135. }
  136. SummaryInformation si = readPropertySet(SummaryInformation.class, SummaryInformation.DEFAULT_STREAM_NAME);
  137. if (si != null) {
  138. sInf = si;
  139. }
  140. // Mark the fact that we've now loaded up the properties
  141. initialized = true;
  142. }
  143. @SuppressWarnings("unchecked")
  144. private <T> T readPropertySet(Class<T> clazz, String name) {
  145. String localName = clazz.getName().substring(clazz.getName().lastIndexOf('.')+1);
  146. try {
  147. PropertySet ps = getPropertySet(name);
  148. if (clazz.isInstance(ps)) {
  149. return (T)ps;
  150. } else if (ps != null) {
  151. LOG.atWarn().log("{} property set came back with wrong class - {}", localName, ps.getClass().getName());
  152. } else {
  153. LOG.atWarn().log("{} property set came back as null {}", localName, box(5));
  154. }
  155. } catch (IOException e) {
  156. LOG.atError().withThrowable(e).log("can't retrieve property set");
  157. }
  158. return null;
  159. }
  160. /**
  161. * For a given named property entry, either return it or null if
  162. * if it wasn't found
  163. *
  164. * @param setName The property to read
  165. * @return The value of the given property or null if it wasn't found.
  166. *
  167. * @throws IOException If retrieving properties fails
  168. */
  169. @SuppressWarnings("WeakerAccess")
  170. protected PropertySet getPropertySet(String setName) throws IOException {
  171. return getPropertySet(setName, getEncryptionInfo());
  172. }
  173. /**
  174. * For a given named property entry, either return it or null if
  175. * if it wasn't found
  176. *
  177. * @param setName The property to read
  178. * @param encryptionInfo the encryption descriptor in case of cryptoAPI encryption
  179. * @return The value of the given property or null if it wasn't found.
  180. *
  181. * @throws IOException If retrieving properties fails
  182. */
  183. @SuppressWarnings("WeakerAccess")
  184. protected PropertySet getPropertySet(String setName, EncryptionInfo encryptionInfo) throws IOException {
  185. DirectoryNode dirNode = directory;
  186. POIFSFileSystem encPoifs = null;
  187. String step = "getting";
  188. try {
  189. if (encryptionInfo != null && encryptionInfo.isDocPropsEncrypted()) {
  190. step = "getting encrypted";
  191. String encryptedStream = getEncryptedPropertyStreamName();
  192. if (!dirNode.hasEntry(encryptedStream)) {
  193. throw new EncryptedDocumentException("can't find encrypted property stream '"+encryptedStream+"'");
  194. }
  195. CryptoAPIDecryptor dec = (CryptoAPIDecryptor)encryptionInfo.getDecryptor();
  196. encPoifs = dec.getSummaryEntries(dirNode, encryptedStream);
  197. dirNode = encPoifs.getRoot();
  198. }
  199. //directory can be null when creating new documents
  200. if (dirNode == null || !dirNode.hasEntry(setName)) {
  201. return null;
  202. }
  203. // Find the entry, and get an input stream for it
  204. step = "getting";
  205. try (DocumentInputStream dis = dirNode.createDocumentInputStream(dirNode.getEntry(setName))) {
  206. // Create the Property Set
  207. step = "creating";
  208. return PropertySetFactory.create(dis);
  209. }
  210. } catch (IOException e) {
  211. throw e;
  212. } catch (Exception e) {
  213. throw new IOException("Error "+step+" property set with name " + setName, e);
  214. } finally {
  215. IOUtils.closeQuietly(encPoifs);
  216. }
  217. }
  218. /**
  219. * Writes out the updated standard Document Information Properties (HPSF)
  220. * into the currently open POIFSFileSystem
  221. *
  222. * @throws IOException if an error when writing to the open
  223. * {@link POIFSFileSystem} occurs
  224. */
  225. protected void writeProperties() throws IOException {
  226. validateInPlaceWritePossible();
  227. writeProperties(directory.getFileSystem(), null);
  228. }
  229. /**
  230. * Writes out the standard Document Information Properties (HPSF)
  231. * @param outFS the POIFSFileSystem to write the properties into
  232. *
  233. * @throws IOException if an error when writing to the
  234. * {@link POIFSFileSystem} occurs
  235. */
  236. @Internal
  237. public void writeProperties(POIFSFileSystem outFS) throws IOException {
  238. writeProperties(outFS, null);
  239. }
  240. /**
  241. * Writes out the standard Document Information Properties (HPSF)
  242. * @param outFS the {@link POIFSFileSystem} to write the properties into
  243. * @param writtenEntries a list of POIFS entries to add the property names too
  244. *
  245. * @throws IOException if an error when writing to the
  246. * {@link POIFSFileSystem} occurs
  247. */
  248. protected void writeProperties(POIFSFileSystem outFS, List<String> writtenEntries) throws IOException {
  249. final EncryptionInfo ei = getEncryptionInfo();
  250. final boolean encryptProps = (ei != null && ei.isDocPropsEncrypted());
  251. try (POIFSFileSystem tmpFS = new POIFSFileSystem()) {
  252. final POIFSFileSystem fs = (encryptProps) ? tmpFS : outFS;
  253. writePropertySet(SummaryInformation.DEFAULT_STREAM_NAME, getSummaryInformation(), fs, writtenEntries);
  254. writePropertySet(DocumentSummaryInformation.DEFAULT_STREAM_NAME, getDocumentSummaryInformation(), fs, writtenEntries);
  255. if (!encryptProps) {
  256. return;
  257. }
  258. // create empty document summary
  259. writePropertySet(DocumentSummaryInformation.DEFAULT_STREAM_NAME, newDocumentSummaryInformation(), outFS);
  260. // remove summary, if previously available
  261. if (outFS.getRoot().hasEntry(SummaryInformation.DEFAULT_STREAM_NAME)) {
  262. outFS.getRoot().getEntry(SummaryInformation.DEFAULT_STREAM_NAME).delete();
  263. }
  264. Encryptor encGen = ei.getEncryptor();
  265. if (!(encGen instanceof CryptoAPIEncryptor)) {
  266. throw new EncryptedDocumentException(
  267. "Using " + ei.getEncryptionMode() + " encryption. Only CryptoAPI encryption supports encrypted property sets!");
  268. }
  269. CryptoAPIEncryptor enc = (CryptoAPIEncryptor) encGen;
  270. try {
  271. enc.setSummaryEntries(outFS.getRoot(), getEncryptedPropertyStreamName(), fs);
  272. } catch (GeneralSecurityException e) {
  273. throw new IOException(e);
  274. }
  275. }
  276. }
  277. private void writePropertySet(String name, PropertySet ps, POIFSFileSystem outFS, List<String> writtenEntries)
  278. throws IOException {
  279. if (ps == null) {
  280. return;
  281. }
  282. writePropertySet(name, ps, outFS);
  283. if (writtenEntries != null) {
  284. writtenEntries.add(name);
  285. }
  286. }
  287. /**
  288. * Writes out a given PropertySet
  289. *
  290. * @param name the (POIFS Level) name of the property to write
  291. * @param set the PropertySet to write out
  292. * @param outFS the {@link POIFSFileSystem} to write the property into
  293. *
  294. * @throws IOException if an error when writing to the
  295. * {@link POIFSFileSystem} occurs
  296. */
  297. private void writePropertySet(String name, PropertySet set, POIFSFileSystem outFS) throws IOException {
  298. try {
  299. PropertySet mSet = new PropertySet(set);
  300. ByteArrayOutputStream bOut = new ByteArrayOutputStream();
  301. mSet.write(bOut);
  302. byte[] data = bOut.toByteArray();
  303. ByteArrayInputStream bIn = new ByteArrayInputStream(data);
  304. // Create or Update the Property Set stream in the POIFS
  305. outFS.createOrUpdateDocument(bIn, name);
  306. LOG.atInfo().log("Wrote property set {} of size {}", name, box(data.length));
  307. } catch(WritingNotSupportedException ignored) {
  308. LOG.atError().log("Couldn't write property set with name {} as not supported by HPSF yet", name);
  309. }
  310. }
  311. /**
  312. * Called during a {@link #write()} to ensure that the Document (and
  313. * associated {@link POIFSFileSystem}) was opened in a way compatible
  314. * with an in-place write.
  315. *
  316. * @throws IllegalStateException if the document was opened suitably
  317. */
  318. protected void validateInPlaceWritePossible() throws IllegalStateException {
  319. if (directory == null) {
  320. throw new IllegalStateException("Newly created Document, cannot save in-place");
  321. }
  322. if (directory.getParent() != null) {
  323. throw new IllegalStateException("This is not the root Document, cannot save embedded resource in-place");
  324. }
  325. if (directory.getFileSystem() == null ||
  326. !directory.getFileSystem().isInPlaceWriteable()) {
  327. throw new IllegalStateException("Opened read-only or via an InputStream, a Writeable File is required");
  328. }
  329. }
  330. /**
  331. * Writes the document out to the currently open {@link File}, via the
  332. * writeable {@link POIFSFileSystem} it was opened from.
  333. *
  334. * <p>This will fail (with an {@link IllegalStateException} if the
  335. * document was opened read-only, opened from an {@link InputStream}
  336. * instead of a File, or if this is not the root document. For those cases,
  337. * you must use {@link #write(OutputStream)} or {@link #write(File)} to
  338. * write to a brand new document.
  339. *
  340. * @since POI 3.15 beta 3
  341. *
  342. * @throws IOException thrown on errors writing to the file
  343. * @throws IllegalStateException if this isn't from a writable File
  344. */
  345. public abstract void write() throws IOException;
  346. /**
  347. * Writes the document out to the specified new {@link File}. If the file
  348. * exists, it will be replaced, otherwise a new one will be created
  349. *
  350. * @since POI 3.15 beta 3
  351. *
  352. * @param newFile The new File to write to.
  353. *
  354. * @throws IOException thrown on errors writing to the file
  355. */
  356. public abstract void write(File newFile) throws IOException;
  357. /**
  358. * Writes the document out to the specified output stream. The
  359. * stream is not closed as part of this operation.
  360. *
  361. * Note - if the Document was opened from a {@link File} rather
  362. * than an {@link InputStream}, you <b>must</b> write out using
  363. * {@link #write()} or to a different File. Overwriting the currently
  364. * open file via an OutputStream isn't possible.
  365. *
  366. * If {@code stream} is a {@link FileOutputStream} on a networked drive
  367. * or has a high cost/latency associated with each written byte,
  368. * consider wrapping the OutputStream in a {@link BufferedOutputStream}
  369. * to improve write performance, or use {@link #write()} / {@link #write(File)}
  370. * if possible.
  371. *
  372. * @param out The stream to write to.
  373. *
  374. * @throws IOException thrown on errors writing to the stream
  375. */
  376. public abstract void write(OutputStream out) throws IOException;
  377. /**
  378. * Closes the underlying {@link POIFSFileSystem} from which
  379. * the document was read, if any. Has no effect on documents
  380. * opened from an InputStream, or newly created ones.<p>
  381. *
  382. * Once {@code close()} has been called, no further operations
  383. * should be called on the document.
  384. */
  385. @Override
  386. public void close() throws IOException {
  387. if (directory != null) {
  388. if (directory.getFileSystem() != null) {
  389. directory.getFileSystem().close();
  390. clearDirectory();
  391. }
  392. }
  393. }
  394. @Internal
  395. public DirectoryNode getDirectory() {
  396. return directory;
  397. }
  398. /**
  399. * Clear/unlink the attached directory entry
  400. */
  401. @Internal
  402. protected void clearDirectory() {
  403. directory = null;
  404. }
  405. /**
  406. * check if we were created by POIFS otherwise create a new dummy POIFS
  407. * for storing the package data
  408. *
  409. * @return {@code true} if dummy directory was created, {@code false} otherwise
  410. */
  411. @SuppressWarnings("resource")
  412. @Internal
  413. protected boolean initDirectory() {
  414. if (directory == null) {
  415. directory = new POIFSFileSystem().getRoot(); // NOSONAR
  416. return true;
  417. }
  418. return false;
  419. }
  420. /**
  421. * Replaces the attached directory, e.g. if this document is written
  422. * to a new POIFSFileSystem
  423. *
  424. * @param newDirectory the new directory
  425. */
  426. @Internal
  427. protected void replaceDirectory(DirectoryNode newDirectory) throws IOException {
  428. if (
  429. // do not close if it is actually the same directory or
  430. newDirectory == directory ||
  431. // also for different directories, but same FileSystem
  432. (newDirectory != null && directory != null && newDirectory.getFileSystem() == directory.getFileSystem())) {
  433. return;
  434. }
  435. // close any previous opened DataSource
  436. if (directory != null && directory.getFileSystem() != null) {
  437. directory.getFileSystem().close();
  438. }
  439. directory = newDirectory;
  440. }
  441. /**
  442. * @return the stream name of the property set collection, if the document is encrypted
  443. */
  444. protected String getEncryptedPropertyStreamName() {
  445. return "encryption";
  446. }
  447. /**
  448. * @return the encryption info if the document is encrypted, otherwise {@code null}
  449. *
  450. * @throws IOException If retrieving the encryption information fails
  451. */
  452. public EncryptionInfo getEncryptionInfo() throws IOException {
  453. return null;
  454. }
  455. }