You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

POIDocument.java 18KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483
  1. /* ====================================================================
  2. Licensed to the Apache Software Foundation (ASF) under one or more
  3. contributor license agreements. See the NOTICE file distributed with
  4. this work for additional information regarding copyright ownership.
  5. The ASF licenses this file to You under the Apache License, Version 2.0
  6. (the "License"); you may not use this file except in compliance with
  7. the License. You may obtain a copy of the License at
  8. http://www.apache.org/licenses/LICENSE-2.0
  9. Unless required by applicable law or agreed to in writing, software
  10. distributed under the License is distributed on an "AS IS" BASIS,
  11. WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. See the License for the specific language governing permissions and
  13. limitations under the License.
  14. ==================================================================== */
  15. package org.apache.poi;
  16. import static org.apache.poi.hpsf.PropertySetFactory.newDocumentSummaryInformation;
  17. import java.io.ByteArrayInputStream;
  18. import java.io.ByteArrayOutputStream;
  19. import java.io.Closeable;
  20. import java.io.File;
  21. import java.io.IOException;
  22. import java.io.InputStream;
  23. import java.io.OutputStream;
  24. import java.security.GeneralSecurityException;
  25. import java.util.List;
  26. import org.apache.poi.hpsf.DocumentSummaryInformation;
  27. import org.apache.poi.hpsf.PropertySet;
  28. import org.apache.poi.hpsf.PropertySetFactory;
  29. import org.apache.poi.hpsf.SummaryInformation;
  30. import org.apache.poi.hpsf.WritingNotSupportedException;
  31. import org.apache.poi.poifs.crypt.EncryptionInfo;
  32. import org.apache.poi.poifs.crypt.Encryptor;
  33. import org.apache.poi.poifs.crypt.cryptoapi.CryptoAPIDecryptor;
  34. import org.apache.poi.poifs.crypt.cryptoapi.CryptoAPIEncryptor;
  35. import org.apache.poi.poifs.filesystem.DirectoryNode;
  36. import org.apache.poi.poifs.filesystem.DocumentInputStream;
  37. import org.apache.poi.poifs.filesystem.POIFSFileSystem;
  38. import org.apache.poi.util.IOUtils;
  39. import org.apache.poi.util.Internal;
  40. import org.apache.poi.util.POILogFactory;
  41. import org.apache.poi.util.POILogger;
  42. /**
  43. * This holds the common functionality for all POI
  44. * Document classes.
  45. * Currently, this relates to Document Information Properties
  46. */
  47. public abstract class POIDocument implements Closeable {
  48. /** Holds metadata on our document */
  49. private SummaryInformation sInf;
  50. /** Holds further metadata on our document */
  51. private DocumentSummaryInformation dsInf;
  52. /** The directory that our document lives in */
  53. private DirectoryNode directory;
  54. /** For our own logging use */
  55. private static final POILogger logger = POILogFactory.getLogger(POIDocument.class);
  56. /* Have the property streams been read yet? (Only done on-demand) */
  57. private boolean initialized;
  58. /**
  59. * Constructs a POIDocument with the given directory node.
  60. *
  61. * @param dir The {@link DirectoryNode} where information is read from.
  62. */
  63. protected POIDocument(DirectoryNode dir) {
  64. this.directory = dir;
  65. }
  66. /**
  67. * Constructs from the default POIFS
  68. *
  69. * @param fs the filesystem the document is read from
  70. */
  71. protected POIDocument(POIFSFileSystem fs) {
  72. this(fs.getRoot());
  73. }
  74. /**
  75. * Fetch the Document Summary Information of the document
  76. *
  77. * @return The Document Summary Information or null
  78. * if it could not be read for this document.
  79. */
  80. public DocumentSummaryInformation getDocumentSummaryInformation() {
  81. if(!initialized) {
  82. readProperties();
  83. }
  84. return dsInf;
  85. }
  86. /**
  87. * Fetch the Summary Information of the document
  88. *
  89. * @return The Summary information for the document or null
  90. * if it could not be read for this document.
  91. */
  92. public SummaryInformation getSummaryInformation() {
  93. if(!initialized) {
  94. readProperties();
  95. }
  96. return sInf;
  97. }
  98. /**
  99. * Will create whichever of SummaryInformation
  100. * and DocumentSummaryInformation (HPSF) properties
  101. * are not already part of your document.
  102. * This is normally useful when creating a new
  103. * document from scratch.
  104. * If the information properties are already there,
  105. * then nothing will happen.
  106. */
  107. public void createInformationProperties() {
  108. if (!initialized) {
  109. readProperties();
  110. }
  111. if (sInf == null) {
  112. sInf = PropertySetFactory.newSummaryInformation();
  113. }
  114. if (dsInf == null) {
  115. dsInf = newDocumentSummaryInformation();
  116. }
  117. }
  118. /**
  119. * Find, and create objects for, the standard
  120. * Document Information Properties (HPSF).
  121. * If a given property set is missing or corrupt,
  122. * it will remain null;
  123. */
  124. protected void readProperties() {
  125. if (initialized) {
  126. return;
  127. }
  128. DocumentSummaryInformation dsi = readPropertySet(DocumentSummaryInformation.class, DocumentSummaryInformation.DEFAULT_STREAM_NAME);
  129. if (dsi != null) {
  130. dsInf = dsi;
  131. }
  132. SummaryInformation si = readPropertySet(SummaryInformation.class, SummaryInformation.DEFAULT_STREAM_NAME);
  133. if (si != null) {
  134. sInf = si;
  135. }
  136. // Mark the fact that we've now loaded up the properties
  137. initialized = true;
  138. }
  139. @SuppressWarnings("unchecked")
  140. private <T> T readPropertySet(Class<T> clazz, String name) {
  141. String localName = clazz.getName().substring(clazz.getName().lastIndexOf('.')+1);
  142. try {
  143. PropertySet ps = getPropertySet(name);
  144. if (clazz.isInstance(ps)) {
  145. return (T)ps;
  146. } else if (ps != null) {
  147. logger.log(POILogger.WARN, localName+" property set came back with wrong class - "+ps.getClass().getName());
  148. } else {
  149. logger.log(POILogger.WARN, localName+" property set came back as null");
  150. }
  151. } catch (IOException e) {
  152. logger.log(POILogger.ERROR, "can't retrieve property set", e);
  153. }
  154. return null;
  155. }
  156. /**
  157. * For a given named property entry, either return it or null if
  158. * if it wasn't found
  159. *
  160. * @param setName The property to read
  161. * @return The value of the given property or null if it wasn't found.
  162. *
  163. * @throws IOException If retrieving properties fails
  164. */
  165. @SuppressWarnings("WeakerAccess")
  166. protected PropertySet getPropertySet(String setName) throws IOException {
  167. return getPropertySet(setName, getEncryptionInfo());
  168. }
  169. /**
  170. * For a given named property entry, either return it or null if
  171. * if it wasn't found
  172. *
  173. * @param setName The property to read
  174. * @param encryptionInfo the encryption descriptor in case of cryptoAPI encryption
  175. * @return The value of the given property or null if it wasn't found.
  176. *
  177. * @throws IOException If retrieving properties fails
  178. */
  179. @SuppressWarnings("WeakerAccess")
  180. protected PropertySet getPropertySet(String setName, EncryptionInfo encryptionInfo) throws IOException {
  181. DirectoryNode dirNode = directory;
  182. POIFSFileSystem encPoifs = null;
  183. String step = "getting";
  184. try {
  185. if (encryptionInfo != null && encryptionInfo.isDocPropsEncrypted()) {
  186. step = "getting encrypted";
  187. String encryptedStream = getEncryptedPropertyStreamName();
  188. if (!dirNode.hasEntry(encryptedStream)) {
  189. throw new EncryptedDocumentException("can't find encrypted property stream '"+encryptedStream+"'");
  190. }
  191. CryptoAPIDecryptor dec = (CryptoAPIDecryptor)encryptionInfo.getDecryptor();
  192. encPoifs = dec.getSummaryEntries(dirNode, encryptedStream);
  193. dirNode = encPoifs.getRoot();
  194. }
  195. //directory can be null when creating new documents
  196. if (dirNode == null || !dirNode.hasEntry(setName)) {
  197. return null;
  198. }
  199. // Find the entry, and get an input stream for it
  200. step = "getting";
  201. try (DocumentInputStream dis = dirNode.createDocumentInputStream(dirNode.getEntry(setName))) {
  202. // Create the Property Set
  203. step = "creating";
  204. return PropertySetFactory.create(dis);
  205. }
  206. } catch (IOException e) {
  207. throw e;
  208. } catch (Exception e) {
  209. throw new IOException("Error "+step+" property set with name " + setName, e);
  210. } finally {
  211. IOUtils.closeQuietly(encPoifs);
  212. }
  213. }
  214. /**
  215. * Writes out the updated standard Document Information Properties (HPSF)
  216. * into the currently open POIFSFileSystem
  217. *
  218. * @throws IOException if an error when writing to the open
  219. * {@link POIFSFileSystem} occurs
  220. */
  221. protected void writeProperties() throws IOException {
  222. validateInPlaceWritePossible();
  223. writeProperties(directory.getFileSystem(), null);
  224. }
  225. /**
  226. * Writes out the standard Document Information Properties (HPSF)
  227. * @param outFS the POIFSFileSystem to write the properties into
  228. *
  229. * @throws IOException if an error when writing to the
  230. * {@link POIFSFileSystem} occurs
  231. */
  232. protected void writeProperties(POIFSFileSystem outFS) throws IOException {
  233. writeProperties(outFS, null);
  234. }
  235. /**
  236. * Writes out the standard Document Information Properties (HPSF)
  237. * @param outFS the {@link POIFSFileSystem} to write the properties into
  238. * @param writtenEntries a list of POIFS entries to add the property names too
  239. *
  240. * @throws IOException if an error when writing to the
  241. * {@link POIFSFileSystem} occurs
  242. */
  243. protected void writeProperties(POIFSFileSystem outFS, List<String> writtenEntries) throws IOException {
  244. final EncryptionInfo ei = getEncryptionInfo();
  245. final boolean encryptProps = (ei != null && ei.isDocPropsEncrypted());
  246. try (POIFSFileSystem tmpFS = new POIFSFileSystem()) {
  247. final POIFSFileSystem fs = (encryptProps) ? tmpFS : outFS;
  248. writePropertySet(SummaryInformation.DEFAULT_STREAM_NAME, getSummaryInformation(), fs, writtenEntries);
  249. writePropertySet(DocumentSummaryInformation.DEFAULT_STREAM_NAME, getDocumentSummaryInformation(), fs, writtenEntries);
  250. if (!encryptProps) {
  251. return;
  252. }
  253. // create empty document summary
  254. writePropertySet(DocumentSummaryInformation.DEFAULT_STREAM_NAME, newDocumentSummaryInformation(), outFS);
  255. // remove summary, if previously available
  256. if (outFS.getRoot().hasEntry(SummaryInformation.DEFAULT_STREAM_NAME)) {
  257. outFS.getRoot().getEntry(SummaryInformation.DEFAULT_STREAM_NAME).delete();
  258. }
  259. Encryptor encGen = ei.getEncryptor();
  260. if (!(encGen instanceof CryptoAPIEncryptor)) {
  261. throw new EncryptedDocumentException(
  262. "Using " + ei.getEncryptionMode() + " encryption. Only CryptoAPI encryption supports encrypted property sets!");
  263. }
  264. CryptoAPIEncryptor enc = (CryptoAPIEncryptor) encGen;
  265. try {
  266. enc.setSummaryEntries(outFS.getRoot(), getEncryptedPropertyStreamName(), fs);
  267. } catch (GeneralSecurityException e) {
  268. throw new IOException(e);
  269. }
  270. }
  271. }
  272. private void writePropertySet(String name, PropertySet ps, POIFSFileSystem outFS, List<String> writtenEntries)
  273. throws IOException {
  274. if (ps == null) {
  275. return;
  276. }
  277. writePropertySet(name, ps, outFS);
  278. if (writtenEntries != null) {
  279. writtenEntries.add(name);
  280. }
  281. }
  282. /**
  283. * Writes out a given PropertySet
  284. *
  285. * @param name the (POIFS Level) name of the property to write
  286. * @param set the PropertySet to write out
  287. * @param outFS the {@link POIFSFileSystem} to write the property into
  288. *
  289. * @throws IOException if an error when writing to the
  290. * {@link POIFSFileSystem} occurs
  291. */
  292. private void writePropertySet(String name, PropertySet set, POIFSFileSystem outFS) throws IOException {
  293. try {
  294. PropertySet mSet = new PropertySet(set);
  295. ByteArrayOutputStream bOut = new ByteArrayOutputStream();
  296. mSet.write(bOut);
  297. byte[] data = bOut.toByteArray();
  298. ByteArrayInputStream bIn = new ByteArrayInputStream(data);
  299. // Create or Update the Property Set stream in the POIFS
  300. outFS.createOrUpdateDocument(bIn, name);
  301. logger.log(POILogger.INFO, "Wrote property set " + name + " of size " + data.length);
  302. } catch(WritingNotSupportedException ignored) {
  303. logger.log( POILogger.ERROR, "Couldn't write property set with name " + name + " as not supported by HPSF yet");
  304. }
  305. }
  306. /**
  307. * Called during a {@link #write()} to ensure that the Document (and
  308. * associated {@link POIFSFileSystem}) was opened in a way compatible
  309. * with an in-place write.
  310. *
  311. * @throws IllegalStateException if the document was opened suitably
  312. */
  313. protected void validateInPlaceWritePossible() throws IllegalStateException {
  314. if (directory == null) {
  315. throw new IllegalStateException("Newly created Document, cannot save in-place");
  316. }
  317. if (directory.getParent() != null) {
  318. throw new IllegalStateException("This is not the root Document, cannot save embedded resource in-place");
  319. }
  320. if (directory.getFileSystem() == null ||
  321. !directory.getFileSystem().isInPlaceWriteable()) {
  322. throw new IllegalStateException("Opened read-only or via an InputStream, a Writeable File is required");
  323. }
  324. }
  325. /**
  326. * Writes the document out to the currently open {@link File}, via the
  327. * writeable {@link POIFSFileSystem} it was opened from.
  328. *
  329. * <p>This will fail (with an {@link IllegalStateException} if the
  330. * document was opened read-only, opened from an {@link InputStream}
  331. * instead of a File, or if this is not the root document. For those cases,
  332. * you must use {@link #write(OutputStream)} or {@link #write(File)} to
  333. * write to a brand new document.
  334. *
  335. * @since POI 3.15 beta 3
  336. *
  337. * @throws IOException thrown on errors writing to the file
  338. * @throws IllegalStateException if this isn't from a writable File
  339. */
  340. public abstract void write() throws IOException;
  341. /**
  342. * Writes the document out to the specified new {@link File}. If the file
  343. * exists, it will be replaced, otherwise a new one will be created
  344. *
  345. * @since POI 3.15 beta 3
  346. *
  347. * @param newFile The new File to write to.
  348. *
  349. * @throws IOException thrown on errors writing to the file
  350. */
  351. public abstract void write(File newFile) throws IOException;
  352. /**
  353. * Writes the document out to the specified output stream. The
  354. * stream is not closed as part of this operation.
  355. *
  356. * Note - if the Document was opened from a {@link File} rather
  357. * than an {@link InputStream}, you <b>must</b> write out using
  358. * {@link #write()} or to a different File. Overwriting the currently
  359. * open file via an OutputStream isn't possible.
  360. *
  361. * If {@code stream} is a {@link java.io.FileOutputStream} on a networked drive
  362. * or has a high cost/latency associated with each written byte,
  363. * consider wrapping the OutputStream in a {@link java.io.BufferedOutputStream}
  364. * to improve write performance, or use {@link #write()} / {@link #write(File)}
  365. * if possible.
  366. *
  367. * @param out The stream to write to.
  368. *
  369. * @throws IOException thrown on errors writing to the stream
  370. */
  371. public abstract void write(OutputStream out) throws IOException;
  372. /**
  373. * Closes the underlying {@link POIFSFileSystem} from which
  374. * the document was read, if any. Has no effect on documents
  375. * opened from an InputStream, or newly created ones.<p>
  376. *
  377. * Once {@code close()} has been called, no further operations
  378. * should be called on the document.
  379. */
  380. @Override
  381. public void close() throws IOException {
  382. if (directory != null) {
  383. if (directory.getFileSystem() != null) {
  384. directory.getFileSystem().close();
  385. clearDirectory();
  386. }
  387. }
  388. }
  389. @Internal
  390. public DirectoryNode getDirectory() {
  391. return directory;
  392. }
  393. /**
  394. * Clear/unlink the attached directory entry
  395. */
  396. @Internal
  397. protected void clearDirectory() {
  398. directory = null;
  399. }
  400. /**
  401. * check if we were created by POIFS otherwise create a new dummy POIFS
  402. * for storing the package data
  403. *
  404. * @return {@code true} if dummy directory was created, {@code false} otherwise
  405. */
  406. @SuppressWarnings("resource")
  407. @Internal
  408. protected boolean initDirectory() {
  409. if (directory == null) {
  410. directory = new POIFSFileSystem().getRoot(); // NOSONAR
  411. return true;
  412. }
  413. return false;
  414. }
  415. /**
  416. * Replaces the attached directory, e.g. if this document is written
  417. * to a new POIFSFileSystem
  418. *
  419. * @param newDirectory the new directory
  420. */
  421. @Internal
  422. protected void replaceDirectory(DirectoryNode newDirectory) {
  423. directory = newDirectory;
  424. }
  425. /**
  426. * @return the stream name of the property set collection, if the document is encrypted
  427. */
  428. protected String getEncryptedPropertyStreamName() {
  429. return "encryption";
  430. }
  431. /**
  432. * @return the encryption info if the document is encrypted, otherwise {@code null}
  433. *
  434. * @throws IOException If retrieving the encryption information fails
  435. */
  436. public EncryptionInfo getEncryptionInfo() throws IOException {
  437. return null;
  438. }
  439. }