123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545 |
- /* ====================================================================
- Licensed to the Apache Software Foundation (ASF) under one or more
- contributor license agreements. See the NOTICE file distributed with
- this work for additional information regarding copyright ownership.
- The ASF licenses this file to You under the Apache License, Version 2.0
- (the "License"); you may not use this file except in compliance with
- the License. You may obtain a copy of the License at
-
- http://www.apache.org/licenses/LICENSE-2.0
-
- Unless required by applicable law or agreed to in writing, software
- distributed under the License is distributed on an "AS IS" BASIS,
- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- See the License for the specific language governing permissions and
- limitations under the License.
- ==================================================================== */
-
- package org.apache.poi.hwpf.usermodel;
-
- import java.io.ByteArrayInputStream;
- import java.io.ByteArrayOutputStream;
- import java.io.IOException;
- import java.io.OutputStream;
- import java.util.zip.InflaterInputStream;
-
- import org.apache.poi.util.LittleEndian;
- import org.apache.poi.util.POILogFactory;
- import org.apache.poi.util.POILogger;
-
- /**
- * Represents embedded picture extracted from Word Document
- * @author Dmitry Romanov
- */
- public final class Picture
- {
- private static final POILogger log = POILogFactory.getLogger(Picture.class);
-
- // public static final int FILENAME_OFFSET = 0x7C;
- // public static final int FILENAME_SIZE_OFFSET = 0x6C;
- static final int PICF_OFFSET = 0x0;
- static final int PICT_HEADER_OFFSET = 0x4;
- static final int MFPMM_OFFSET = 0x6;
- static final int PICF_SHAPE_OFFSET = 0xE;
- static final int DXAGOAL_OFFSET = 0x1C;
- static final int DYAGOAL_OFFSET = 0x1E;
- static final int MX_OFFSET = 0x20;
- static final int MY_OFFSET = 0x22;
- static final int DXACROPLEFT_OFFSET = 0x24;
- static final int DYACROPTOP_OFFSET = 0x26;
- static final int DXACROPRIGHT_OFFSET = 0x28;
- static final int DYACROPBOTTOM_OFFSET = 0x2A;
- static final int UNKNOWN_HEADER_SIZE = 0x49;
-
- public static final byte[] GIF = new byte[]{'G', 'I', 'F'};
- public static final byte[] PNG = new byte[]{ (byte)0x89, 0x50, 0x4E, 0x47,0x0D,0x0A,0x1A,0x0A};
- public static final byte[] JPG = new byte[]{(byte)0xFF, (byte)0xD8};
- public static final byte[] BMP = new byte[]{'B', 'M'};
- public static final byte[] TIFF = new byte[]{0x49, 0x49, 0x2A, 0x00};
- public static final byte[] TIFF1 = new byte[]{0x4D, 0x4D, 0x00, 0x2A};
-
- public static final byte[] EMF = { 0x01, 0x00, 0x00, 0x00 };
- public static final byte[] WMF1 = { (byte)0xD7, (byte)0xCD, (byte)0xC6, (byte)0x9A, 0x00, 0x00 };
- public static final byte[] WMF2 = { 0x01, 0x00, 0x09, 0x00, 0x00, 0x03 }; // Windows 3.x
- // TODO: DIB, PICT
-
- public static final byte[] IHDR = new byte[]{'I', 'H', 'D', 'R'};
-
- public static final byte[] COMPRESSED1 = { (byte)0xFE, 0x78, (byte)0xDA };
- public static final byte[] COMPRESSED2 = { (byte)0xFE, 0x78, (byte)0x9C };
-
- private int dataBlockStartOfsset;
- private int pictureBytesStartOffset;
- private int dataBlockSize;
- private int size;
- // private String fileName;
- private byte[] rawContent;
- private byte[] content;
- private byte[] _dataStream;
- private int aspectRatioX;
- private int aspectRatioY;
- private int height = -1;
- private int width = -1;
-
- private int dxaGoal = -1;
- private int dyaGoal = -1;
-
- private int dxaCropLeft = -1;
- private int dyaCropTop = -1;
- private int dxaCropRight = -1;
- private int dyaCropBottom = -1;
-
- public Picture(int dataBlockStartOfsset, byte[] _dataStream, boolean fillBytes)
- {
- this._dataStream = _dataStream;
- this.dataBlockStartOfsset = dataBlockStartOfsset;
- this.dataBlockSize = LittleEndian.getInt(_dataStream, dataBlockStartOfsset);
- this.pictureBytesStartOffset = getPictureBytesStartOffset(dataBlockStartOfsset, _dataStream, dataBlockSize);
- this.size = dataBlockSize - (pictureBytesStartOffset - dataBlockStartOfsset);
-
- if (size<0) {
-
- }
-
- this.dxaGoal = LittleEndian.getShort(_dataStream, dataBlockStartOfsset+DXAGOAL_OFFSET);
- this.dyaGoal = LittleEndian.getShort(_dataStream, dataBlockStartOfsset+DYAGOAL_OFFSET);
-
- this.aspectRatioX = LittleEndian.getShort(_dataStream, dataBlockStartOfsset+MX_OFFSET)/10;
- this.aspectRatioY = LittleEndian.getShort(_dataStream, dataBlockStartOfsset+MY_OFFSET)/10;
-
- this.dxaCropLeft = LittleEndian.getShort(_dataStream, dataBlockStartOfsset+DXACROPLEFT_OFFSET);
- this.dyaCropTop = LittleEndian.getShort(_dataStream, dataBlockStartOfsset+DYACROPTOP_OFFSET);
- this.dxaCropRight = LittleEndian.getShort(_dataStream, dataBlockStartOfsset+DXACROPRIGHT_OFFSET);
- this.dyaCropBottom = LittleEndian.getShort(_dataStream, dataBlockStartOfsset+DYACROPBOTTOM_OFFSET);
-
- if (fillBytes)
- {
- fillImageContent();
- }
- }
-
- public Picture(byte[] _dataStream)
- {
- this._dataStream = _dataStream;
- this.dataBlockStartOfsset = 0;
- this.dataBlockSize = _dataStream.length;
- this.pictureBytesStartOffset = 0;
- this.size = _dataStream.length;
- }
-
- private void fillWidthHeight()
- {
- String ext = suggestFileExtension();
- // trying to extract width and height from pictures content:
- if ("jpg".equalsIgnoreCase(ext)) {
- fillJPGWidthHeight();
- } else if ("png".equalsIgnoreCase(ext)) {
- fillPNGWidthHeight();
- }
- }
-
- /**
- * Tries to suggest a filename: hex representation of picture structure offset in "Data" stream plus extension that
- * is tried to determine from first byte of picture's content.
- *
- * @return suggested file name
- */
- public String suggestFullFileName()
- {
- String fileExt = suggestFileExtension();
- return Integer.toHexString(dataBlockStartOfsset) + (fileExt.length()>0 ? "."+fileExt : "");
- }
-
- /**
- * Writes Picture's content bytes to specified OutputStream.
- * Is useful when there is need to write picture bytes directly to stream, omitting its representation in
- * memory as distinct byte array.
- *
- * @param out a stream to write to
- * @throws IOException if some exception is occured while writing to specified out
- */
- public void writeImageContent(OutputStream out) throws IOException
- {
- if (rawContent!=null && rawContent.length>0) {
- out.write(rawContent, 0, size);
- } else {
- out.write(_dataStream, pictureBytesStartOffset, size);
- }
- }
-
- /**
- * @return The offset of this picture in the picture bytes, used
- * when matching up with {@link CharacterRun#getPicOffset()}
- */
- public int getStartOffset() {
- return dataBlockStartOfsset;
- }
-
- /**
- * @return picture's content as byte array
- */
- public byte[] getContent()
- {
- if (content == null || content.length<=0)
- {
- fillImageContent();
- }
- return content;
- }
-
- public byte[] getRawContent()
- {
- if (rawContent == null || rawContent.length <= 0)
- {
- fillRawImageContent();
- }
- return rawContent;
- }
-
- /**
- *
- * @return size in bytes of the picture
- */
- public int getSize()
- {
- return size;
- }
-
- /**
- * @return the horizontal aspect ratio for picture provided by user
- */
- public int getAspectRatioX() {
- return aspectRatioX;
- }
-
- /**
- * @retrn the vertical aspect ratio for picture provided by user
- */
- public int getAspectRatioY() {
- return aspectRatioY;
- }
-
- /**
- * Gets the initial width of the picture, in twips, prior to cropping or scaling.
- *
- * @return the initial width of the picture in twips
- */
- public int getDxaGoal() {
- return dxaGoal;
- }
-
- /**
- * Gets the initial height of the picture, in twips, prior to cropping or scaling.
- *
- * @return the initial width of the picture in twips
- */
- public int getDyaGoal() {
- return dyaGoal;
- }
-
- /**
- * @return The amount the picture has been cropped on the left in twips
- */
- public int getDxaCropLeft() {
- return dxaCropLeft;
- }
-
- /**
- * @return The amount the picture has been cropped on the top in twips
- */
- public int getDyaCropTop() {
- return dyaCropTop;
- }
-
- /**
- * @return The amount the picture has been cropped on the right in twips
- */
- public int getDxaCropRight() {
- return dxaCropRight;
- }
-
- /**
- * @return The amount the picture has been cropped on the bottom in twips
- */
- public int getDyaCropBottom() {
- return dyaCropBottom;
- }
-
- /**
- * tries to suggest extension for picture's file by matching signatures of popular image formats to first bytes
- * of picture's contents
- * @return suggested file extension
- */
- public String suggestFileExtension()
- {
- String extension = suggestFileExtension(_dataStream, pictureBytesStartOffset);
- if ("".equals(extension)) {
- // May be compressed. Get the uncompressed content and inspect that.
- extension = suggestFileExtension(getContent(), 0);
- }
- return extension;
- }
-
- /**
- * Returns the mime type for the image
- */
- public String getMimeType() {
- String extension = suggestFileExtension();
- if("jpg".equals(extension)) {
- return "image/jpeg";
- }
- if("png".equals(extension)) {
- return "image/png";
- }
- if("gif".equals(extension)) {
- return "image/gif";
- }
- if("bmp".equals(extension)) {
- return "image/bmp";
- }
- if("tiff".equals(extension)) {
- return "image/tiff";
- }
- if("wmf".equals(extension)) {
- return "image/x-wmf";
- }
- if("emf".equals(extension)) {
- return "image/x-emf";
- }
- return "image/unknown";
- }
-
-
- private String suggestFileExtension(byte[] _dataStream, int pictureBytesStartOffset)
- {
- if (matchSignature(_dataStream, JPG, pictureBytesStartOffset)) {
- return "jpg";
- } else if (matchSignature(_dataStream, PNG, pictureBytesStartOffset)) {
- return "png";
- } else if (matchSignature(_dataStream, GIF, pictureBytesStartOffset)) {
- return "gif";
- } else if (matchSignature(_dataStream, BMP, pictureBytesStartOffset)) {
- return "bmp";
- } else if (matchSignature(_dataStream, TIFF, pictureBytesStartOffset) ||
- matchSignature(_dataStream, TIFF1, pictureBytesStartOffset)) {
- return "tiff";
- } else {
- // Need to load the image content before we can try the following tests
- fillImageContent();
-
- if (matchSignature(content, WMF1, 0) || matchSignature(content, WMF2, 0)) {
- return "wmf";
- } else if (matchSignature(content, EMF, 0)) {
- return "emf";
- }
- }
- // TODO: DIB, PICT
- return "";
- }
-
- private static boolean matchSignature(byte[] dataStream, byte[] signature, int pictureBytesOffset)
- {
- boolean matched = pictureBytesOffset < dataStream.length;
- for (int i = 0; (i+pictureBytesOffset) < dataStream.length && i < signature.length; i++)
- {
- if (dataStream[i+pictureBytesOffset] != signature[i])
- {
- matched = false;
- break;
- }
- }
- return matched;
- }
-
- // public String getFileName()
- // {
- // return fileName;
- // }
-
- // private static String extractFileName(int blockStartIndex, byte[] dataStream) {
- // int fileNameStartOffset = blockStartIndex + 0x7C;
- // int fileNameSizeOffset = blockStartIndex + FILENAME_SIZE_OFFSET;
- // int fileNameSize = LittleEndian.getShort(dataStream, fileNameSizeOffset);
- //
- // int fileNameIndex = fileNameStartOffset;
- // char[] fileNameChars = new char[(fileNameSize-1)/2];
- // int charIndex = 0;
- // while(charIndex<fileNameChars.length) {
- // short aChar = LittleEndian.getShort(dataStream, fileNameIndex);
- // fileNameChars[charIndex] = (char)aChar;
- // charIndex++;
- // fileNameIndex += 2;
- // }
- // String fileName = new String(fileNameChars);
- // return fileName.trim();
- // }
-
- private void fillRawImageContent()
- {
- this.rawContent = new byte[size];
- System.arraycopy(_dataStream, pictureBytesStartOffset, rawContent, 0, size);
- }
-
- private void fillImageContent()
- {
- byte[] rawContent = getRawContent();
-
- // HACK: Detect compressed images. In reality there should be some way to determine
- // this from the first 32 bytes, but I can't see any similarity between all the
- // samples I have obtained, nor any similarity in the data block contents.
- if (matchSignature(rawContent, COMPRESSED1, 32) || matchSignature(rawContent, COMPRESSED2, 32))
- {
- try
- {
- InflaterInputStream in = new InflaterInputStream(
- new ByteArrayInputStream(rawContent, 33, rawContent.length - 33));
- ByteArrayOutputStream out = new ByteArrayOutputStream();
- byte[] buf = new byte[4096];
- int readBytes;
- while ((readBytes = in.read(buf)) > 0)
- {
- out.write(buf, 0, readBytes);
- }
- content = out.toByteArray();
- }
- catch (IOException e)
- {
- // Problems reading from the actual ByteArrayInputStream should never happen
- // so this will only ever be a ZipException.
- log.log(POILogger.INFO, "Possibly corrupt compression or non-compressed data", e);
- }
- } else {
- // Raw data is not compressed.
- content = rawContent;
- }
- }
-
- private static int getPictureBytesStartOffset(int dataBlockStartOffset, byte[] _dataStream, int dataBlockSize)
- {
- int realPicoffset = dataBlockStartOffset;
- final int dataBlockEndOffset = dataBlockSize + dataBlockStartOffset;
-
- // Skip over the PICT block
- int PICTFBlockSize = LittleEndian.getShort(_dataStream, dataBlockStartOffset +PICT_HEADER_OFFSET); // Should be 68 bytes
-
- // Now the PICTF1
- int PICTF1BlockOffset = PICTFBlockSize + PICT_HEADER_OFFSET;
- short MM_TYPE = LittleEndian.getShort(_dataStream, dataBlockStartOffset + PICT_HEADER_OFFSET + 2);
- if(MM_TYPE == 0x66) {
- // Skip the stPicName
- int cchPicName = LittleEndian.getUnsignedByte(_dataStream, PICTF1BlockOffset);
- PICTF1BlockOffset += 1 + cchPicName;
- }
- int PICTF1BlockSize = LittleEndian.getShort(_dataStream, dataBlockStartOffset +PICTF1BlockOffset);
-
- int unknownHeaderOffset = (PICTF1BlockSize + PICTF1BlockOffset) < dataBlockEndOffset ? (PICTF1BlockSize + PICTF1BlockOffset) : PICTF1BlockOffset;
- realPicoffset += (unknownHeaderOffset + UNKNOWN_HEADER_SIZE);
- if (realPicoffset>=dataBlockEndOffset) {
- realPicoffset -= UNKNOWN_HEADER_SIZE;
- }
- return realPicoffset;
- }
-
- private void fillJPGWidthHeight() {
- /*
- http://www.codecomments.com/archive281-2004-3-158083.html
-
- Algorhitm proposed by Patrick TJ McPhee:
-
- read 2 bytes
- make sure they are 'ffd8'x
- repeatedly:
- read 2 bytes
- make sure the first one is 'ff'x
- if the second one is 'd9'x stop
- else if the second one is c0 or c2 (or possibly other values ...)
- skip 2 bytes
- read one byte into depth
- read two bytes into height
- read two bytes into width
- else
- read two bytes into length
- skip forward length-2 bytes
-
- Also used Ruby code snippet from: http://www.bigbold.com/snippets/posts/show/805 for reference
- */
- int pointer = pictureBytesStartOffset+2;
- int firstByte = _dataStream[pointer];
- int secondByte = _dataStream[pointer+1];
-
- int endOfPicture = pictureBytesStartOffset + size;
- while(pointer<endOfPicture-1) {
- do {
- firstByte = _dataStream[pointer];
- secondByte = _dataStream[pointer+1];
- pointer += 2;
- } while (!(firstByte==(byte)0xFF) && pointer<endOfPicture-1);
-
- if (firstByte==((byte)0xFF) && pointer<endOfPicture-1) {
- if (secondByte==(byte)0xD9 || secondByte==(byte)0xDA) {
- break;
- } else if ( (secondByte & 0xF0) == 0xC0 && secondByte!=(byte)0xC4 && secondByte!=(byte)0xC8 && secondByte!=(byte)0xCC) {
- pointer += 5;
- this.height = getBigEndianShort(_dataStream, pointer);
- this.width = getBigEndianShort(_dataStream, pointer+2);
- break;
- } else {
- pointer++;
- pointer++;
- int length = getBigEndianShort(_dataStream, pointer);
- pointer+=length;
- }
- } else {
- pointer++;
- }
- }
- }
-
- private void fillPNGWidthHeight()
- {
- /*
- Used PNG file format description from http://www.wotsit.org/download.asp?f=png
- */
- int HEADER_START = pictureBytesStartOffset + PNG.length + 4;
- if (matchSignature(_dataStream, IHDR, HEADER_START)) {
- int IHDR_CHUNK_WIDTH = HEADER_START + 4;
- this.width = getBigEndianInt(_dataStream, IHDR_CHUNK_WIDTH);
- this.height = getBigEndianInt(_dataStream, IHDR_CHUNK_WIDTH + 4);
- }
- }
-
- /**
- * returns pixel width of the picture or -1 if dimensions determining was failed
- */
- public int getWidth()
- {
- if (width == -1)
- {
- fillWidthHeight();
- }
- return width;
- }
-
- /**
- * returns pixel height of the picture or -1 if dimensions determining was failed
- */
- public int getHeight()
- {
- if (height == -1)
- {
- fillWidthHeight();
- }
- return height;
- }
-
- private static int getBigEndianInt(byte[] data, int offset)
- {
- return (((data[offset] & 0xFF)<< 24) + ((data[offset +1] & 0xFF) << 16) + ((data[offset + 2] & 0xFF) << 8) + (data[offset +3] & 0xFF));
- }
-
- private static int getBigEndianShort(byte[] data, int offset)
- {
- return (((data[offset] & 0xFF)<< 8) + (data[offset +1] & 0xFF));
- }
-
- }
|