You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

Picture.java 19KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632
  1. /* ====================================================================
  2. Licensed to the Apache Software Foundation (ASF) under one or more
  3. contributor license agreements. See the NOTICE file distributed with
  4. this work for additional information regarding copyright ownership.
  5. The ASF licenses this file to You under the Apache License, Version 2.0
  6. (the "License"); you may not use this file except in compliance with
  7. the License. You may obtain a copy of the License at
  8. http://www.apache.org/licenses/LICENSE-2.0
  9. Unless required by applicable law or agreed to in writing, software
  10. distributed under the License is distributed on an "AS IS" BASIS,
  11. WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. See the License for the specific language governing permissions and
  13. limitations under the License.
  14. ==================================================================== */
  15. package org.apache.poi.hwpf.usermodel;
  16. import java.io.ByteArrayInputStream;
  17. import java.io.ByteArrayOutputStream;
  18. import java.io.IOException;
  19. import java.io.OutputStream;
  20. import java.util.Arrays;
  21. import java.util.List;
  22. import java.util.zip.InflaterInputStream;
  23. import org.apache.poi.ddf.EscherBSERecord;
  24. import org.apache.poi.ddf.EscherBlipRecord;
  25. import org.apache.poi.ddf.EscherComplexProperty;
  26. import org.apache.poi.ddf.EscherOptRecord;
  27. import org.apache.poi.ddf.EscherProperties;
  28. import org.apache.poi.ddf.EscherProperty;
  29. import org.apache.poi.ddf.EscherRecord;
  30. import org.apache.poi.hwpf.model.PICF;
  31. import org.apache.poi.hwpf.model.PICFAndOfficeArtData;
  32. import org.apache.poi.util.PngUtils;
  33. import org.apache.poi.util.POILogFactory;
  34. import org.apache.poi.util.POILogger;
  35. import org.apache.poi.util.StringUtil;
  36. /**
  37. * Represents embedded picture extracted from Word Document
  38. */
  39. public final class Picture
  40. {
  41. @Deprecated
  42. public static final byte[] BMP = new byte[] { 'B', 'M' };
  43. public static final byte[] COMPRESSED1 = { (byte) 0xFE, 0x78, (byte) 0xDA };
  44. public static final byte[] COMPRESSED2 = { (byte) 0xFE, 0x78, (byte) 0x9C };
  45. @Deprecated
  46. public static final byte[] EMF = { 0x01, 0x00, 0x00, 0x00 };
  47. @Deprecated
  48. public static final byte[] GIF = new byte[] { 'G', 'I', 'F' };
  49. public static final byte[] IHDR = new byte[] { 'I', 'H', 'D', 'R' };
  50. @Deprecated
  51. public static final byte[] JPG = new byte[] { (byte) 0xFF, (byte) 0xD8 };
  52. private static final POILogger log = POILogFactory
  53. .getLogger( Picture.class );
  54. @Deprecated
  55. public static final byte[] PNG = new byte[] { (byte) 0x89, 0x50, 0x4E,
  56. 0x47, 0x0D, 0x0A, 0x1A, 0x0A };
  57. @Deprecated
  58. public static final byte[] TIFF = new byte[] { 0x49, 0x49, 0x2A, 0x00 };
  59. @Deprecated
  60. public static final byte[] TIFF1 = new byte[] { 0x4D, 0x4D, 0x00, 0x2A };
  61. @Deprecated
  62. public static final byte[] WMF1 = { (byte) 0xD7, (byte) 0xCD, (byte) 0xC6,
  63. (byte) 0x9A, 0x00, 0x00 };
  64. // Windows 3.x
  65. @Deprecated
  66. public static final byte[] WMF2 = { 0x01, 0x00, 0x09, 0x00, 0x00, 0x03 }; // Windows
  67. // 3.x
  68. private static int getBigEndianInt( byte[] data, int offset )
  69. {
  70. return ( ( ( data[offset] & 0xFF ) << 24 )
  71. + ( ( data[offset + 1] & 0xFF ) << 16 )
  72. + ( ( data[offset + 2] & 0xFF ) << 8 ) + ( data[offset + 3] & 0xFF ) );
  73. }
  74. private static int getBigEndianShort( byte[] data, int offset )
  75. {
  76. return ( ( ( data[offset] & 0xFF ) << 8 ) + ( data[offset + 1] & 0xFF ) );
  77. }
  78. private static boolean matchSignature( byte[] pictureData,
  79. byte[] signature, int offset )
  80. {
  81. boolean matched = offset < pictureData.length;
  82. for ( int i = 0; ( i + offset ) < pictureData.length
  83. && i < signature.length; i++ )
  84. {
  85. if ( pictureData[i + offset] != signature[i] )
  86. {
  87. matched = false;
  88. break;
  89. }
  90. }
  91. return matched;
  92. }
  93. private PICF _picf;
  94. private PICFAndOfficeArtData _picfAndOfficeArtData;
  95. private List<? extends EscherRecord> _blipRecords;
  96. private byte[] content;
  97. private int dataBlockStartOfsset;
  98. private int height = -1;
  99. private int width = -1;
  100. /**
  101. * Builds a Picture object for a Picture stored as
  102. * Escher.
  103. * TODO We need to pass in the PICF data too somehow!
  104. */
  105. public Picture( EscherBlipRecord blipRecord )
  106. {
  107. this._blipRecords = Arrays.asList(new EscherBlipRecord[] {blipRecord});
  108. }
  109. /**
  110. * Builds a Picture object for a Picture stored in the
  111. * DataStream
  112. */
  113. public Picture( int dataBlockStartOfsset, byte[] _dataStream,
  114. boolean fillBytes )
  115. {
  116. _picfAndOfficeArtData = new PICFAndOfficeArtData( _dataStream,
  117. dataBlockStartOfsset );
  118. _picf = _picfAndOfficeArtData.getPicf();
  119. this.dataBlockStartOfsset = dataBlockStartOfsset;
  120. if ( _picfAndOfficeArtData != null && _picfAndOfficeArtData.getBlipRecords() != null) {
  121. _blipRecords = _picfAndOfficeArtData.getBlipRecords();
  122. }
  123. if ( fillBytes ) {
  124. fillImageContent();
  125. }
  126. }
  127. private void fillImageContent()
  128. {
  129. if ( content != null && content.length > 0 )
  130. return;
  131. byte[] rawContent = getRawContent();
  132. /*
  133. * HACK: Detect compressed images. In reality there should be some way
  134. * to determine this from the first 32 bytes, but I can't see any
  135. * similarity between all the samples I have obtained, nor any
  136. * similarity in the data block contents.
  137. */
  138. if ( matchSignature( rawContent, COMPRESSED1, 32 )
  139. || matchSignature( rawContent, COMPRESSED2, 32 ) )
  140. {
  141. try
  142. {
  143. InflaterInputStream in = new InflaterInputStream(
  144. new ByteArrayInputStream( rawContent, 33,
  145. rawContent.length - 33 ) );
  146. ByteArrayOutputStream out = new ByteArrayOutputStream();
  147. byte[] buf = new byte[4096];
  148. int readBytes;
  149. while ( ( readBytes = in.read( buf ) ) > 0 )
  150. {
  151. out.write( buf, 0, readBytes );
  152. }
  153. content = out.toByteArray();
  154. }
  155. catch ( IOException e )
  156. {
  157. /*
  158. * Problems reading from the actual ByteArrayInputStream should
  159. * never happen so this will only ever be a ZipException.
  160. */
  161. log.log( POILogger.INFO,
  162. "Possibly corrupt compression or non-compressed data",
  163. e );
  164. }
  165. }
  166. else
  167. {
  168. // Raw data is not compressed.
  169. content = rawContent;
  170. //PNG created on MAC may have a 16-byte prefix which prevents successful reading.
  171. //Just cut it off!.
  172. if (PngUtils.matchesPngHeader(content, 16))
  173. {
  174. byte[] png = new byte[content.length-16];
  175. System.arraycopy(content, 16, png, 0, png.length);
  176. content = png;
  177. }
  178. }
  179. }
  180. private void fillJPGWidthHeight()
  181. {
  182. /*
  183. * http://www.codecomments.com/archive281-2004-3-158083.html
  184. *
  185. * Algorhitm proposed by Patrick TJ McPhee:
  186. *
  187. * read 2 bytes make sure they are 'ffd8'x repeatedly: read 2 bytes make
  188. * sure the first one is 'ff'x if the second one is 'd9'x stop else if
  189. * the second one is c0 or c2 (or possibly other values ...) skip 2
  190. * bytes read one byte into depth read two bytes into height read two
  191. * bytes into width else read two bytes into length skip forward
  192. * length-2 bytes
  193. *
  194. * Also used Ruby code snippet from:
  195. * http://www.bigbold.com/snippets/posts/show/805 for reference
  196. */
  197. byte[] jpegContent = getContent();
  198. int pointer = 2;
  199. int firstByte = jpegContent[pointer];
  200. int secondByte = jpegContent[pointer + 1];
  201. int endOfPicture = jpegContent.length;
  202. while ( pointer < endOfPicture - 1 )
  203. {
  204. do
  205. {
  206. firstByte = jpegContent[pointer];
  207. secondByte = jpegContent[pointer + 1];
  208. pointer += 2;
  209. }
  210. while ( !( firstByte == (byte) 0xFF ) && pointer < endOfPicture - 1 );
  211. if ( firstByte == ( (byte) 0xFF ) && pointer < endOfPicture - 1 )
  212. {
  213. if ( secondByte == (byte) 0xD9 || secondByte == (byte) 0xDA )
  214. {
  215. break;
  216. }
  217. else if ( ( secondByte & 0xF0 ) == 0xC0
  218. && secondByte != (byte) 0xC4
  219. && secondByte != (byte) 0xC8
  220. && secondByte != (byte) 0xCC )
  221. {
  222. pointer += 5;
  223. this.height = getBigEndianShort( jpegContent, pointer );
  224. this.width = getBigEndianShort( jpegContent, pointer + 2 );
  225. break;
  226. }
  227. else
  228. {
  229. pointer++;
  230. pointer++;
  231. int length = getBigEndianShort( jpegContent, pointer );
  232. pointer += length;
  233. }
  234. }
  235. else
  236. {
  237. pointer++;
  238. }
  239. }
  240. }
  241. void fillPNGWidthHeight()
  242. {
  243. byte[] pngContent = getContent();
  244. /*
  245. * Used PNG file format description from
  246. * http://www.wotsit.org/download.asp?f=png
  247. */
  248. int HEADER_START = PNG.length + 4;
  249. if ( matchSignature( pngContent, IHDR, HEADER_START ) )
  250. {
  251. int IHDR_CHUNK_WIDTH = HEADER_START + 4;
  252. this.width = getBigEndianInt( pngContent, IHDR_CHUNK_WIDTH );
  253. this.height = getBigEndianInt( pngContent, IHDR_CHUNK_WIDTH + 4 );
  254. }
  255. }
  256. private void fillWidthHeight()
  257. {
  258. PictureType pictureType = suggestPictureType();
  259. // trying to extract width and height from pictures content:
  260. switch ( pictureType )
  261. {
  262. case JPEG:
  263. fillJPGWidthHeight();
  264. break;
  265. case PNG:
  266. fillPNGWidthHeight();
  267. break;
  268. default:
  269. // unsupported;
  270. break;
  271. }
  272. }
  273. /**
  274. * @return the horizontal aspect ratio for picture provided by user
  275. * @deprecated use more precise {@link #getHorizontalScalingFactor()}
  276. */
  277. @Deprecated
  278. public int getAspectRatioX()
  279. {
  280. return _picf.getMx() / 10;
  281. }
  282. /**
  283. * @return the vertical aspect ratio for picture provided by user
  284. * @deprecated use more precise {@link #getVerticalScalingFactor()}
  285. */
  286. @Deprecated
  287. public int getAspectRatioY()
  288. {
  289. return _picf.getMy() / 10;
  290. }
  291. /**
  292. * @return picture's content as byte array
  293. */
  294. public byte[] getContent()
  295. {
  296. fillImageContent();
  297. return content;
  298. }
  299. /**
  300. * @return The amount the picture has been cropped on the left in twips
  301. */
  302. @Deprecated
  303. public int getDxaCropLeft()
  304. {
  305. // TODO: use new properties
  306. // if (_picfAndOfficeArtData == null || _picfAndOfficeArtData.getShape()
  307. // == null)
  308. // return 0;
  309. //
  310. // final EscherContainerRecord shape = _picfAndOfficeArtData.getShape();
  311. // EscherOptRecord optRecord = shape.getChildById( (short) 0xF00B );
  312. // if (optRecord == null)
  313. // return 0;
  314. //
  315. // EscherProperty property = optRecord.lookup( 0x0102 );
  316. // if (property == null || !(property instanceof EscherSimpleProperty))
  317. // return 0;
  318. //
  319. // EscherSimpleProperty simpleProperty = (EscherSimpleProperty)
  320. // property;
  321. // return simpleProperty.getPropertyValue();
  322. return _picf.getDxaReserved1();
  323. }
  324. /**
  325. * @return The amount the picture has been cropped on the right in twips
  326. */
  327. @Deprecated
  328. public int getDxaCropRight()
  329. {
  330. return _picf.getDxaReserved2();
  331. }
  332. /**
  333. * Gets the initial width of the picture, in twips, prior to cropping or
  334. * scaling.
  335. *
  336. * @return the initial width of the picture in twips
  337. */
  338. public int getDxaGoal()
  339. {
  340. return _picf.getDxaGoal();
  341. }
  342. /**
  343. * @return The amount the picture has been cropped on the bottom in twips
  344. */
  345. @Deprecated
  346. public int getDyaCropBottom()
  347. {
  348. return _picf.getDyaReserved2();
  349. }
  350. /**
  351. * @return The amount the picture has been cropped on the top in twips
  352. */
  353. @Deprecated
  354. public int getDyaCropTop()
  355. {
  356. return _picf.getDyaReserved1();
  357. }
  358. /**
  359. * Gets the initial height of the picture, in twips, prior to cropping or
  360. * scaling.
  361. *
  362. * @return the initial width of the picture in twips
  363. */
  364. public int getDyaGoal()
  365. {
  366. return _picf.getDyaGoal();
  367. }
  368. /**
  369. * returns pixel height of the picture or -1 if dimensions determining was
  370. * failed
  371. */
  372. public int getHeight()
  373. {
  374. if ( height == -1 )
  375. {
  376. fillWidthHeight();
  377. }
  378. return height;
  379. }
  380. /**
  381. * @return Horizontal scaling factor supplied by user expressed in .001%
  382. * units
  383. */
  384. public int getHorizontalScalingFactor()
  385. {
  386. return _picf.getMx();
  387. }
  388. /**
  389. * Returns the MIME type for the image
  390. *
  391. * @return MIME-type for known types of image or "image/unknown" if unknown
  392. */
  393. public String getMimeType()
  394. {
  395. return suggestPictureType().getMime();
  396. }
  397. /**
  398. * Returns picture's content as it stored in Word file, i.e. possibly in
  399. * compressed form.
  400. *
  401. * @return picture's content as it stored in Word file
  402. */
  403. public byte[] getRawContent()
  404. {
  405. if (_blipRecords == null || _blipRecords.size() != 1) {
  406. return new byte[0];
  407. }
  408. EscherRecord escherRecord = _blipRecords.get( 0 );
  409. if ( escherRecord instanceof EscherBlipRecord )
  410. {
  411. return ( (EscherBlipRecord) escherRecord ).getPicturedata();
  412. }
  413. if ( escherRecord instanceof EscherBSERecord )
  414. {
  415. return ( (EscherBSERecord) escherRecord ).getBlipRecord()
  416. .getPicturedata();
  417. }
  418. return new byte[0];
  419. }
  420. /**
  421. *
  422. * @return size in bytes of the picture
  423. */
  424. public int getSize()
  425. {
  426. return getContent().length;
  427. }
  428. /**
  429. * @return The offset of this picture in the picture bytes, used when
  430. * matching up with {@link CharacterRun#getPicOffset()}
  431. */
  432. public int getStartOffset()
  433. {
  434. return dataBlockStartOfsset;
  435. }
  436. /**
  437. * @return Vertical scaling factor supplied by user expressed in .001% units
  438. */
  439. public int getVerticalScalingFactor()
  440. {
  441. return _picf.getMy();
  442. }
  443. /**
  444. * returns pixel width of the picture or -1 if dimensions determining was
  445. * failed
  446. */
  447. public int getWidth()
  448. {
  449. if ( width == -1 )
  450. {
  451. fillWidthHeight();
  452. }
  453. return width;
  454. }
  455. /**
  456. * returns the description stored in the alternative text
  457. *
  458. * @return pictue description
  459. */
  460. public String getDescription()
  461. {
  462. for(EscherRecord escherRecord : _picfAndOfficeArtData.getShape().getChildRecords()){
  463. if(escherRecord instanceof EscherOptRecord){
  464. EscherOptRecord escherOptRecord = (EscherOptRecord) escherRecord;
  465. for(EscherProperty property : escherOptRecord.getEscherProperties()){
  466. if(EscherProperties.GROUPSHAPE__DESCRIPTION == property.getPropertyNumber()){
  467. byte[] complexData = ((EscherComplexProperty)property).getComplexData();
  468. return StringUtil.getFromUnicodeLE(complexData,0,complexData.length/2-1);
  469. }
  470. }
  471. }
  472. }
  473. return null;
  474. }
  475. /**
  476. * tries to suggest extension for picture's file by matching signatures of
  477. * popular image formats to first bytes of picture's contents
  478. *
  479. * @return suggested file extension
  480. */
  481. public String suggestFileExtension()
  482. {
  483. return suggestPictureType().getExtension();
  484. }
  485. /**
  486. * Tries to suggest a filename: hex representation of picture structure
  487. * offset in "Data" stream plus extension that is tried to determine from
  488. * first byte of picture's content.
  489. *
  490. * @return suggested file name
  491. */
  492. public String suggestFullFileName()
  493. {
  494. String fileExt = suggestFileExtension();
  495. return Integer.toHexString( dataBlockStartOfsset )
  496. + ( fileExt.length() > 0 ? "." + fileExt : "" );
  497. }
  498. public PictureType suggestPictureType()
  499. {
  500. if (_blipRecords == null || _blipRecords.size() != 1 ) {
  501. return PictureType.UNKNOWN;
  502. }
  503. EscherRecord escherRecord = _blipRecords.get( 0 );
  504. switch ( escherRecord.getRecordId() )
  505. {
  506. case (short) 0xF007:
  507. {
  508. EscherBSERecord bseRecord = (EscherBSERecord) escherRecord;
  509. switch ( bseRecord.getBlipTypeWin32() )
  510. {
  511. case 0x00:
  512. return PictureType.UNKNOWN;
  513. case 0x01:
  514. return PictureType.UNKNOWN;
  515. case 0x02:
  516. return PictureType.EMF;
  517. case 0x03:
  518. return PictureType.WMF;
  519. case 0x04:
  520. return PictureType.PICT;
  521. case 0x05:
  522. return PictureType.JPEG;
  523. case 0x06:
  524. return PictureType.PNG;
  525. case 0x07:
  526. return PictureType.BMP;
  527. case 0x11:
  528. return PictureType.TIFF;
  529. case 0x12:
  530. return PictureType.JPEG;
  531. default:
  532. return PictureType.UNKNOWN;
  533. }
  534. }
  535. case (short) 0xF01A:
  536. return PictureType.EMF;
  537. case (short) 0xF01B:
  538. return PictureType.WMF;
  539. case (short) 0xF01C:
  540. return PictureType.PICT;
  541. case (short) 0xF01D:
  542. return PictureType.JPEG;
  543. case (short) 0xF01E:
  544. return PictureType.PNG;
  545. case (short) 0xF01F:
  546. return PictureType.BMP;
  547. case (short) 0xF029:
  548. return PictureType.TIFF;
  549. case (short) 0xF02A:
  550. return PictureType.JPEG;
  551. default:
  552. return PictureType.UNKNOWN;
  553. }
  554. }
  555. /**
  556. * Writes Picture's content bytes to specified OutputStream. Is useful when
  557. * there is need to write picture bytes directly to stream, omitting its
  558. * representation in memory as distinct byte array.
  559. *
  560. * @param out
  561. * a stream to write to
  562. * @throws IOException
  563. * if some exception is occured while writing to specified out
  564. */
  565. public void writeImageContent( OutputStream out ) throws IOException
  566. {
  567. byte[] content = getContent();
  568. if ( content != null && content.length > 0 )
  569. {
  570. out.write( content, 0, content.length );
  571. }
  572. }
  573. }