From: Sergey Vladimirov Date: Fri, 30 Sep 2011 15:49:19 +0000 (+0000) Subject: picture loading completely rewritten, bugs 51902 and 51890 fixed X-Git-Tag: REL_3_8_BETA5~118 X-Git-Url: https://source.dussan.org/?a=commitdiff_plain;h=e975236ab1bab81c0a73cbd57fbe68e7b97eb76e;p=poi.git picture loading completely rewritten, bugs 51902 and 51890 fixed git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@1177709 13f79535-47bb-0310-9956-ffa450edef68 --- diff --git a/src/documentation/content/xdocs/status.xml b/src/documentation/content/xdocs/status.xml index e976ca7f1f..ef8c2d1b71 100644 --- a/src/documentation/content/xdocs/status.xml +++ b/src/documentation/content/xdocs/status.xml @@ -34,6 +34,8 @@ + 51902 - Picture.fillRawImageContent - ArrayIndexOutOfBoundsException (duplicate) + 51890 - ArrayIndexOutOfBounds ExceptionPicture.fillRawImageContent Allow the passing of a File object to WorkbookFactory.create, which permits lower memory processing than the InputStream version 51873 - update HSMF to ignore Outlook 2002 Olk10SideProp entries, which don't behave like normal chunks 51850 - support creating comments in XSSF on an earlier slide when later ones already have them diff --git a/src/java/org/apache/poi/util/LittleEndian.java b/src/java/org/apache/poi/util/LittleEndian.java index ae8a6cdf09..ef6edf6f66 100644 --- a/src/java/org/apache/poi/util/LittleEndian.java +++ b/src/java/org/apache/poi/util/LittleEndian.java @@ -420,11 +420,26 @@ public class LittleEndian implements LittleEndianConsts { * @param data the byte array. * @param offset a starting offset into the byte array. * @return the unsigned value of the byte as a 32 bit integer + * @deprecated Use {@link #getUByte(byte[], int)} instead */ + @Deprecated public static int getUnsignedByte(byte[] data, int offset) { return data[offset] & 0xFF; } + /** + * get the unsigned value of a byte. + * + * @param data + * the byte array. + * @param offset + * a starting offset into the byte array. + * @return the unsigned value of the byte as a 16 bit short + */ + public static short getUByte( byte[] data, int offset ) + { + return (short) ( data[offset] & 0xFF ); + } /** * Copy a portion of a byte array diff --git a/src/scratchpad/src/org/apache/poi/hwpf/model/PICF.java b/src/scratchpad/src/org/apache/poi/hwpf/model/PICF.java new file mode 100644 index 0000000000..ee7a11a8c8 --- /dev/null +++ b/src/scratchpad/src/org/apache/poi/hwpf/model/PICF.java @@ -0,0 +1,148 @@ +/* ==================================================================== + Licensed to the Apache Software Foundation (ASF) under one or more + contributor license agreements. See the NOTICE file distributed with + this work for additional information regarding copyright ownership. + The ASF licenses this file to You under the Apache License, Version 2.0 + (the "License"); you may not use this file except in compliance with + the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +==================================================================== */ +package org.apache.poi.hwpf.model; + +import java.util.Arrays; + +import org.apache.poi.hwpf.model.types.PICFAbstractType; +import org.apache.poi.util.Internal; + +/** + * The PICF structure specifies the type of a picture, as well as the size of + * the picture and information about its border. + *

+ * Class and fields descriptions are quoted from Microsoft Office Word 97-2007 + * Binary File Format and [MS-DOC] - v20110608 Word (.doc) Binary File Format + * + * @author Sergey Vladimirov (vlsergey {at} gmail {dot} com) + */ +@Internal +public class PICF extends PICFAbstractType +{ + + public PICF() + { + } + + public PICF( byte[] std, int offset ) + { + fillFields( std, offset ); + } + + @Override + public boolean equals( Object obj ) + { + if ( this == obj ) + return true; + if ( obj == null ) + return false; + if ( getClass() != obj.getClass() ) + return false; + PICF other = (PICF) obj; + if ( field_10_padding2 != other.field_10_padding2 ) + return false; + if ( field_11_dxaGoal != other.field_11_dxaGoal ) + return false; + if ( field_12_dyaGoal != other.field_12_dyaGoal ) + return false; + if ( field_13_mx != other.field_13_mx ) + return false; + if ( field_14_my != other.field_14_my ) + return false; + if ( field_15_dxaReserved1 != other.field_15_dxaReserved1 ) + return false; + if ( field_16_dyaReserved1 != other.field_16_dyaReserved1 ) + return false; + if ( field_17_dxaReserved2 != other.field_17_dxaReserved2 ) + return false; + if ( field_18_dyaReserved2 != other.field_18_dyaReserved2 ) + return false; + if ( field_19_fReserved != other.field_19_fReserved ) + return false; + if ( field_1_lcb != other.field_1_lcb ) + return false; + if ( field_20_bpp != other.field_20_bpp ) + return false; + if ( !Arrays.equals( field_21_brcTop80, other.field_21_brcTop80 ) ) + return false; + if ( !Arrays.equals( field_22_brcLeft80, other.field_22_brcLeft80 ) ) + return false; + if ( !Arrays.equals( field_23_brcBottom80, other.field_23_brcBottom80 ) ) + return false; + if ( !Arrays.equals( field_24_brcRight80, other.field_24_brcRight80 ) ) + return false; + if ( field_25_dxaReserved3 != other.field_25_dxaReserved3 ) + return false; + if ( field_26_dyaReserved3 != other.field_26_dyaReserved3 ) + return false; + if ( field_27_cProps != other.field_27_cProps ) + return false; + if ( field_2_cbHeader != other.field_2_cbHeader ) + return false; + if ( field_3_mm != other.field_3_mm ) + return false; + if ( field_4_xExt != other.field_4_xExt ) + return false; + if ( field_5_yExt != other.field_5_yExt ) + return false; + if ( field_6_swHMF != other.field_6_swHMF ) + return false; + if ( field_7_grf != other.field_7_grf ) + return false; + if ( field_8_padding != other.field_8_padding ) + return false; + if ( field_9_mmPM != other.field_9_mmPM ) + return false; + return true; + } + + @Override + public int hashCode() + { + final int prime = 31; + int result = 1; + result = prime * result + field_10_padding2; + result = prime * result + field_11_dxaGoal; + result = prime * result + field_12_dyaGoal; + result = prime * result + field_13_mx; + result = prime * result + field_14_my; + result = prime * result + field_15_dxaReserved1; + result = prime * result + field_16_dyaReserved1; + result = prime * result + field_17_dxaReserved2; + result = prime * result + field_18_dyaReserved2; + result = prime * result + field_19_fReserved; + result = prime * result + field_1_lcb; + result = prime * result + field_20_bpp; + result = prime * result + Arrays.hashCode( field_21_brcTop80 ); + result = prime * result + Arrays.hashCode( field_22_brcLeft80 ); + result = prime * result + Arrays.hashCode( field_23_brcBottom80 ); + result = prime * result + Arrays.hashCode( field_24_brcRight80 ); + result = prime * result + field_25_dxaReserved3; + result = prime * result + field_26_dyaReserved3; + result = prime * result + field_27_cProps; + result = prime * result + field_2_cbHeader; + result = prime * result + field_3_mm; + result = prime * result + field_4_xExt; + result = prime * result + field_5_yExt; + result = prime * result + field_6_swHMF; + result = prime * result + field_7_grf; + result = prime * result + field_8_padding; + result = prime * result + field_9_mmPM; + return result; + } + +} diff --git a/src/scratchpad/src/org/apache/poi/hwpf/model/PICFAndOfficeArtData.java b/src/scratchpad/src/org/apache/poi/hwpf/model/PICFAndOfficeArtData.java new file mode 100644 index 0000000000..b17ec44391 --- /dev/null +++ b/src/scratchpad/src/org/apache/poi/hwpf/model/PICFAndOfficeArtData.java @@ -0,0 +1,102 @@ +/* ==================================================================== + Licensed to the Apache Software Foundation (ASF) under one or more + contributor license agreements. See the NOTICE file distributed with + this work for additional information regarding copyright ownership. + The ASF licenses this file to You under the Apache License, Version 2.0 + (the "License"); you may not use this file except in compliance with + the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +==================================================================== */ +package org.apache.poi.hwpf.model; + +import java.util.LinkedList; +import java.util.List; + +import org.apache.poi.ddf.DefaultEscherRecordFactory; +import org.apache.poi.ddf.EscherContainerRecord; +import org.apache.poi.ddf.EscherRecord; +import org.apache.poi.util.Internal; +import org.apache.poi.util.LittleEndian; + +@Internal +public class PICFAndOfficeArtData +{ + + private List _blipRecords; + + private short _cchPicName; + + private PICF _picf; + + private EscherContainerRecord _shape; + + private byte[] _stPicName; + + public PICFAndOfficeArtData( byte[] dataStream, int startOffset ) + { + int offset = startOffset; + + _picf = new PICF( dataStream, offset ); + offset += PICF.getSize(); + + if ( _picf.getMm() == 0x0066 ) + { + _cchPicName = LittleEndian.getUByte( dataStream, offset ); + offset += 1; + + _stPicName = LittleEndian.getByteArray( dataStream, offset, + _cchPicName ); + offset += _cchPicName; + } + + final DefaultEscherRecordFactory escherRecordFactory = new DefaultEscherRecordFactory(); + _shape = new EscherContainerRecord(); + int recordSize = _shape.fillFields( dataStream, offset, + escherRecordFactory ); + offset += recordSize; + + _blipRecords = new LinkedList(); + while ( ( offset - startOffset ) < _picf.getLcb() ) + { + EscherRecord nextRecord = escherRecordFactory.createRecord( + dataStream, offset ); + if ( nextRecord.getRecordId() != (short) 0xF007 + && ( nextRecord.getRecordId() < (short) 0xF018 || nextRecord + .getRecordId() > (short) 0xF117 ) ) + break; + + int blipRecordSize = nextRecord.fillFields( dataStream, offset, + escherRecordFactory ); + offset += blipRecordSize; + + _blipRecords.add( nextRecord ); + } + } + + public List getBlipRecords() + { + return _blipRecords; + } + + public PICF getPicf() + { + return _picf; + } + + public EscherContainerRecord getShape() + { + return _shape; + } + + public byte[] getStPicName() + { + return _stPicName; + } +} diff --git a/src/scratchpad/src/org/apache/poi/hwpf/model/PictureDescriptor.java b/src/scratchpad/src/org/apache/poi/hwpf/model/PictureDescriptor.java deleted file mode 100644 index e59e769c5d..0000000000 --- a/src/scratchpad/src/org/apache/poi/hwpf/model/PictureDescriptor.java +++ /dev/null @@ -1,221 +0,0 @@ -/* ==================================================================== - Licensed to the Apache Software Foundation (ASF) under one or more - contributor license agreements. See the NOTICE file distributed with - this work for additional information regarding copyright ownership. - The ASF licenses this file to You under the Apache License, Version 2.0 - (the "License"); you may not use this file except in compliance with - the License. You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. -==================================================================== */ -package org.apache.poi.hwpf.model; - -import java.util.Arrays; - -import org.apache.poi.util.Internal; -import org.apache.poi.util.LittleEndian; - -/** - * Picture Descriptor (on File) (PICF) - *

- * Based on Microsoft Office Word 97-2007 Binary File Format (.doc) - * Specification; Page 181 of 210 - * - * @author Sergey Vladimirov ( vlsergey {at} gmail {dot} com ) - */ -@Internal -public class PictureDescriptor -{ - private static final int LCB_OFFSET = 0x00; - private static final int CBHEADER_OFFSET = 0x04; - - private static final int MFP_MM_OFFSET = 0x06; - private static final int MFP_XEXT_OFFSET = 0x08; - private static final int MFP_YEXT_OFFSET = 0x0A; - private static final int MFP_HMF_OFFSET = 0x0C; - - private static final int DXAGOAL_OFFSET = 0x1C; - private static final int DYAGOAL_OFFSET = 0x1E; - - private static final int MX_OFFSET = 0x20; - private static final int MY_OFFSET = 0x22; - - private static final int DXACROPLEFT_OFFSET = 0x24; - private static final int DYACROPTOP_OFFSET = 0x26; - private static final int DXACROPRIGHT_OFFSET = 0x28; - private static final int DYACROPBOTTOM_OFFSET = 0x2A; - - /** - * Number of bytes in the PIC structure plus size of following picture data - * which may be a Window's metafile, a bitmap, or the filename of a TIFF - * file. In the case of a Macintosh PICT picture, this includes the size of - * the PIC, the standard "x" metafile, and the Macintosh PICT data. See - * Appendix B for more information. - */ - protected int lcb; - - /** - * Number of bytes in the PIC (to allow for future expansion). - */ - protected int cbHeader; - - /* - * Microsoft Office Word 97-2007 Binary File Format (.doc) Specification - * - * Page 181 of 210 - * - * If a Windows metafile is stored immediately following the PIC structure, - * the mfp is a Window's METAFILEPICT structure. See - * http://msdn2.microsoft.com/en-us/library/ms649017(VS.85).aspx for more - * information about the METAFILEPICT structure and - * http://download.microsoft.com/download/0/B/E/0BE8BDD7-E5E8-422A-ABFD- - * 4342ED7AD886/WindowsMetafileFormat(wmf)Specification.pdf for Windows - * Metafile Format specification. - * - * When the data immediately following the PIC is a TIFF filename, - * mfp.mm==98 If a bitmap is stored after the pic, mfp.mm==99. - * - * When the PIC describes a bitmap, mfp.xExt is the width of the bitmap in - * pixels and mfp.yExt is the height of the bitmap in pixels. - */ - - protected int mfp_mm; - protected int mfp_xExt; - protected int mfp_yExt; - protected int mfp_hMF; - - /** - *

  • Window's bitmap structure when PIC describes a BITMAP (14 bytes) - * - *
  • Rectangle for window origin and extents when metafile is stored -- - * ignored if 0 (8 bytes) - */ - protected byte[] offset14 = new byte[14]; - - /** - * Horizontal measurement in twips of the rectangle the picture should be - * imaged within - */ - protected short dxaGoal = 0; - - /** - * Vertical measurement in twips of the rectangle the picture should be - * imaged within - */ - protected short dyaGoal = 0; - - /** - * Horizontal scaling factor supplied by user expressed in .001% units - */ - protected short mx; - - /** - * Vertical scaling factor supplied by user expressed in .001% units - */ - protected short my; - - /** - * The amount the picture has been cropped on the left in twips - */ - protected short dxaCropLeft = 0; - - /** - * The amount the picture has been cropped on the top in twips - */ - protected short dyaCropTop = 0; - - /** - * The amount the picture has been cropped on the right in twips - */ - protected short dxaCropRight = 0; - - /** - * The amount the picture has been cropped on the bottom in twips - */ - protected short dyaCropBottom = 0; - - public PictureDescriptor() - { - } - - public PictureDescriptor( byte[] _dataStream, int startOffset ) - { - this.lcb = LittleEndian.getInt( _dataStream, startOffset + LCB_OFFSET ); - this.cbHeader = LittleEndian.getUShort( _dataStream, startOffset - + CBHEADER_OFFSET ); - - this.mfp_mm = LittleEndian.getUShort( _dataStream, startOffset - + MFP_MM_OFFSET ); - this.mfp_xExt = LittleEndian.getUShort( _dataStream, startOffset - + MFP_XEXT_OFFSET ); - this.mfp_yExt = LittleEndian.getUShort( _dataStream, startOffset - + MFP_YEXT_OFFSET ); - this.mfp_hMF = LittleEndian.getUShort( _dataStream, startOffset - + MFP_HMF_OFFSET ); - - this.offset14 = LittleEndian.getByteArray( _dataStream, - startOffset + 0x0E, 14 ); - - this.dxaGoal = LittleEndian.getShort( _dataStream, startOffset - + DXAGOAL_OFFSET ); - this.dyaGoal = LittleEndian.getShort( _dataStream, startOffset - + DYAGOAL_OFFSET ); - - this.mx = LittleEndian.getShort( _dataStream, startOffset + MX_OFFSET ); - this.my = LittleEndian.getShort( _dataStream, startOffset + MY_OFFSET ); - - this.dxaCropLeft = LittleEndian.getShort( _dataStream, startOffset - + DXACROPLEFT_OFFSET ); - this.dyaCropTop = LittleEndian.getShort( _dataStream, startOffset - + DYACROPTOP_OFFSET ); - this.dxaCropRight = LittleEndian.getShort( _dataStream, startOffset - + DXACROPRIGHT_OFFSET ); - this.dyaCropBottom = LittleEndian.getShort( _dataStream, startOffset - + DYACROPBOTTOM_OFFSET ); - } - - @Override - public String toString() - { - StringBuilder stringBuilder = new StringBuilder(); - stringBuilder.append( "[PICF]\n" ); - stringBuilder.append( " lcb = " ).append( this.lcb ) - .append( '\n' ); - stringBuilder.append( " cbHeader = " ) - .append( this.cbHeader ).append( '\n' ); - - stringBuilder.append( " mfp.mm = " ).append( this.mfp_mm ) - .append( '\n' ); - stringBuilder.append( " mfp.xExt = " ) - .append( this.mfp_xExt ).append( '\n' ); - stringBuilder.append( " mfp.yExt = " ) - .append( this.mfp_yExt ).append( '\n' ); - stringBuilder.append( " mfp.hMF = " ) - .append( this.mfp_hMF ).append( '\n' ); - - stringBuilder.append( " offset14 = " ) - .append( Arrays.toString( this.offset14 ) ).append( '\n' ); - stringBuilder.append( " dxaGoal = " ) - .append( this.dxaGoal ).append( '\n' ); - stringBuilder.append( " dyaGoal = " ) - .append( this.dyaGoal ).append( '\n' ); - - stringBuilder.append( " dxaCropLeft = " ) - .append( this.dxaCropLeft ).append( '\n' ); - stringBuilder.append( " dyaCropTop = " ) - .append( this.dyaCropTop ).append( '\n' ); - stringBuilder.append( " dxaCropRight = " ) - .append( this.dxaCropRight ).append( '\n' ); - stringBuilder.append( " dyaCropBottom = " ) - .append( this.dyaCropBottom ).append( '\n' ); - - stringBuilder.append( "[/PICF]" ); - return stringBuilder.toString(); - } -} diff --git a/src/scratchpad/src/org/apache/poi/hwpf/model/types/PICFAbstractType.java b/src/scratchpad/src/org/apache/poi/hwpf/model/types/PICFAbstractType.java new file mode 100644 index 0000000000..ed01743182 --- /dev/null +++ b/src/scratchpad/src/org/apache/poi/hwpf/model/types/PICFAbstractType.java @@ -0,0 +1,692 @@ + +package org.apache.poi.hwpf.model.types; + + +import org.apache.poi.hwpf.usermodel.*; +import org.apache.poi.util.*; + +/** + * The PICF structure specifies the type of a picture, as well as the size of the + picture and information about its border.

    Class and fields descriptions are quoted + from Microsoft Office Word 97-2007 + Binary File Format and [MS-DOC] - v20110608 Word (.doc) + Binary File Format + + *

    + * NOTE: This source is automatically generated please do not modify this file. Either subclass or + * remove the record in src/types/definitions. + *

    + * This class is internal. It content or properties may change without notice + * due to changes in our knowledge of internal Microsoft Word binary structures. + + * @author Sergey Vladimirov; according to Microsoft Office Word 97-2007 Binary File Format + Specification [*.doc] and [MS-DOC] - v20110608 Word (.doc) Binary File Format + + */ +@Internal +public abstract class PICFAbstractType +{ + + protected int field_1_lcb; + protected int field_2_cbHeader; + protected short field_3_mm; + protected short field_4_xExt; + protected short field_5_yExt; + protected short field_6_swHMF; + protected int field_7_grf; + protected int field_8_padding; + protected int field_9_mmPM; + protected int field_10_padding2; + protected short field_11_dxaGoal; + protected short field_12_dyaGoal; + protected int field_13_mx; + protected int field_14_my; + protected short field_15_dxaReserved1; + protected short field_16_dyaReserved1; + protected short field_17_dxaReserved2; + protected short field_18_dyaReserved2; + protected byte field_19_fReserved; + protected byte field_20_bpp; + protected byte[] field_21_brcTop80; + protected byte[] field_22_brcLeft80; + protected byte[] field_23_brcBottom80; + protected byte[] field_24_brcRight80; + protected short field_25_dxaReserved3; + protected short field_26_dyaReserved3; + protected short field_27_cProps; + + protected PICFAbstractType() + { + this.field_21_brcTop80 = new byte[4]; + this.field_22_brcLeft80 = new byte[4]; + this.field_23_brcBottom80 = new byte[4]; + this.field_24_brcRight80 = new byte[4]; + } + + protected void fillFields( byte[] data, int offset ) + { + field_1_lcb = LittleEndian.getInt( data, 0x0 + offset ); + field_2_cbHeader = LittleEndian.getShort( data, 0x4 + offset ); + field_3_mm = LittleEndian.getShort( data, 0x6 + offset ); + field_4_xExt = LittleEndian.getShort( data, 0x8 + offset ); + field_5_yExt = LittleEndian.getShort( data, 0xa + offset ); + field_6_swHMF = LittleEndian.getShort( data, 0xc + offset ); + field_7_grf = LittleEndian.getInt( data, 0xe + offset ); + field_8_padding = LittleEndian.getInt( data, 0x12 + offset ); + field_9_mmPM = LittleEndian.getShort( data, 0x16 + offset ); + field_10_padding2 = LittleEndian.getInt( data, 0x18 + offset ); + field_11_dxaGoal = LittleEndian.getShort( data, 0x1c + offset ); + field_12_dyaGoal = LittleEndian.getShort( data, 0x1e + offset ); + field_13_mx = LittleEndian.getShort( data, 0x20 + offset ); + field_14_my = LittleEndian.getShort( data, 0x22 + offset ); + field_15_dxaReserved1 = LittleEndian.getShort( data, 0x24 + offset ); + field_16_dyaReserved1 = LittleEndian.getShort( data, 0x26 + offset ); + field_17_dxaReserved2 = LittleEndian.getShort( data, 0x28 + offset ); + field_18_dyaReserved2 = LittleEndian.getShort( data, 0x2a + offset ); + field_19_fReserved = data[ 0x2c + offset ]; + field_20_bpp = data[ 0x2d + offset ]; + field_21_brcTop80 = LittleEndian.getByteArray( data, 0x2e + offset,4 ); + field_22_brcLeft80 = LittleEndian.getByteArray( data, 0x32 + offset,4 ); + field_23_brcBottom80 = LittleEndian.getByteArray( data, 0x36 + offset,4 ); + field_24_brcRight80 = LittleEndian.getByteArray( data, 0x3a + offset,4 ); + field_25_dxaReserved3 = LittleEndian.getShort( data, 0x3e + offset ); + field_26_dyaReserved3 = LittleEndian.getShort( data, 0x40 + offset ); + field_27_cProps = LittleEndian.getShort( data, 0x42 + offset ); + } + + public void serialize( byte[] data, int offset ) + { + LittleEndian.putInt( data, 0x0 + offset, field_1_lcb ); + LittleEndian.putUShort( data, 0x4 + offset, field_2_cbHeader ); + LittleEndian.putShort( data, 0x6 + offset, field_3_mm ); + LittleEndian.putShort( data, 0x8 + offset, field_4_xExt ); + LittleEndian.putShort( data, 0xa + offset, field_5_yExt ); + LittleEndian.putShort( data, 0xc + offset, field_6_swHMF ); + LittleEndian.putInt( data, 0xe + offset, field_7_grf ); + LittleEndian.putInt( data, 0x12 + offset, field_8_padding ); + LittleEndian.putUShort( data, 0x16 + offset, field_9_mmPM ); + LittleEndian.putInt( data, 0x18 + offset, field_10_padding2 ); + LittleEndian.putShort( data, 0x1c + offset, field_11_dxaGoal ); + LittleEndian.putShort( data, 0x1e + offset, field_12_dyaGoal ); + LittleEndian.putUShort( data, 0x20 + offset, field_13_mx ); + LittleEndian.putUShort( data, 0x22 + offset, field_14_my ); + LittleEndian.putShort( data, 0x24 + offset, field_15_dxaReserved1 ); + LittleEndian.putShort( data, 0x26 + offset, field_16_dyaReserved1 ); + LittleEndian.putShort( data, 0x28 + offset, field_17_dxaReserved2 ); + LittleEndian.putShort( data, 0x2a + offset, field_18_dyaReserved2 ); + data[ 0x2c + offset ] = field_19_fReserved; + data[ 0x2d + offset ] = field_20_bpp; + System.arraycopy( field_21_brcTop80, 0, data, 0x2e + offset, field_21_brcTop80.length ); + System.arraycopy( field_22_brcLeft80, 0, data, 0x32 + offset, field_22_brcLeft80.length ); + System.arraycopy( field_23_brcBottom80, 0, data, 0x36 + offset, field_23_brcBottom80.length ); + System.arraycopy( field_24_brcRight80, 0, data, 0x3a + offset, field_24_brcRight80.length ); + LittleEndian.putShort( data, 0x3e + offset, field_25_dxaReserved3 ); + LittleEndian.putShort( data, 0x40 + offset, field_26_dyaReserved3 ); + LittleEndian.putShort( data, 0x42 + offset, field_27_cProps ); + } + + public byte[] serialize() + { + final byte[] result = new byte[ getSize() ]; + serialize( result, 0 ); + return result; + } + + /** + * Size of record + */ + public static int getSize() + { + return 0 + 4 + 2 + 2 + 2 + 2 + 2 + 4 + 4 + 2 + 4 + 2 + 2 + 2 + 2 + 2 + 2 + 2 + 2 + 1 + 1 + 4 + 4 + 4 + 4 + 2 + 2 + 2; + } + + public String toString() + { + StringBuilder builder = new StringBuilder(); + builder.append("[PICF]\n"); + builder.append(" .lcb = "); + builder.append(" (").append(getLcb()).append(" )\n"); + builder.append(" .cbHeader = "); + builder.append(" (").append(getCbHeader()).append(" )\n"); + builder.append(" .mm = "); + builder.append(" (").append(getMm()).append(" )\n"); + builder.append(" .xExt = "); + builder.append(" (").append(getXExt()).append(" )\n"); + builder.append(" .yExt = "); + builder.append(" (").append(getYExt()).append(" )\n"); + builder.append(" .swHMF = "); + builder.append(" (").append(getSwHMF()).append(" )\n"); + builder.append(" .grf = "); + builder.append(" (").append(getGrf()).append(" )\n"); + builder.append(" .padding = "); + builder.append(" (").append(getPadding()).append(" )\n"); + builder.append(" .mmPM = "); + builder.append(" (").append(getMmPM()).append(" )\n"); + builder.append(" .padding2 = "); + builder.append(" (").append(getPadding2()).append(" )\n"); + builder.append(" .dxaGoal = "); + builder.append(" (").append(getDxaGoal()).append(" )\n"); + builder.append(" .dyaGoal = "); + builder.append(" (").append(getDyaGoal()).append(" )\n"); + builder.append(" .mx = "); + builder.append(" (").append(getMx()).append(" )\n"); + builder.append(" .my = "); + builder.append(" (").append(getMy()).append(" )\n"); + builder.append(" .dxaReserved1 = "); + builder.append(" (").append(getDxaReserved1()).append(" )\n"); + builder.append(" .dyaReserved1 = "); + builder.append(" (").append(getDyaReserved1()).append(" )\n"); + builder.append(" .dxaReserved2 = "); + builder.append(" (").append(getDxaReserved2()).append(" )\n"); + builder.append(" .dyaReserved2 = "); + builder.append(" (").append(getDyaReserved2()).append(" )\n"); + builder.append(" .fReserved = "); + builder.append(" (").append(getFReserved()).append(" )\n"); + builder.append(" .bpp = "); + builder.append(" (").append(getBpp()).append(" )\n"); + builder.append(" .brcTop80 = "); + builder.append(" (").append(getBrcTop80()).append(" )\n"); + builder.append(" .brcLeft80 = "); + builder.append(" (").append(getBrcLeft80()).append(" )\n"); + builder.append(" .brcBottom80 = "); + builder.append(" (").append(getBrcBottom80()).append(" )\n"); + builder.append(" .brcRight80 = "); + builder.append(" (").append(getBrcRight80()).append(" )\n"); + builder.append(" .dxaReserved3 = "); + builder.append(" (").append(getDxaReserved3()).append(" )\n"); + builder.append(" .dyaReserved3 = "); + builder.append(" (").append(getDyaReserved3()).append(" )\n"); + builder.append(" .cProps = "); + builder.append(" (").append(getCProps()).append(" )\n"); + + builder.append("[/PICF]\n"); + return builder.toString(); + } + + /** + * A signed integer that specifies the size, in bytes, of this PICF structure and the subsequent data. + */ + @Internal + public int getLcb() + { + return field_1_lcb; + } + + /** + * A signed integer that specifies the size, in bytes, of this PICF structure and the subsequent data. + */ + @Internal + public void setLcb( int field_1_lcb ) + { + this.field_1_lcb = field_1_lcb; + } + + /** + * An unsigned integer that specifies the size, in bytes, of this PICF structure. This value MUST be 0x44. + */ + @Internal + public int getCbHeader() + { + return field_2_cbHeader; + } + + /** + * An unsigned integer that specifies the size, in bytes, of this PICF structure. This value MUST be 0x44. + */ + @Internal + public void setCbHeader( int field_2_cbHeader ) + { + this.field_2_cbHeader = field_2_cbHeader; + } + + /** + * A signed integer that specifies the format of the picture data. + */ + @Internal + public short getMm() + { + return field_3_mm; + } + + /** + * A signed integer that specifies the format of the picture data. + */ + @Internal + public void setMm( short field_3_mm ) + { + this.field_3_mm = field_3_mm; + } + + /** + * This field is unused and MUST be ignored. + */ + @Internal + public short getXExt() + { + return field_4_xExt; + } + + /** + * This field is unused and MUST be ignored. + */ + @Internal + public void setXExt( short field_4_xExt ) + { + this.field_4_xExt = field_4_xExt; + } + + /** + * This field is unused and MUST be ignored. + */ + @Internal + public short getYExt() + { + return field_5_yExt; + } + + /** + * This field is unused and MUST be ignored. + */ + @Internal + public void setYExt( short field_5_yExt ) + { + this.field_5_yExt = field_5_yExt; + } + + /** + * This field is unused and MUST be ignored. + */ + @Internal + public short getSwHMF() + { + return field_6_swHMF; + } + + /** + * This field is unused and MUST be ignored. + */ + @Internal + public void setSwHMF( short field_6_swHMF ) + { + this.field_6_swHMF = field_6_swHMF; + } + + /** + * This field MUST be ignored. + */ + @Internal + public int getGrf() + { + return field_7_grf; + } + + /** + * This field MUST be ignored. + */ + @Internal + public void setGrf( int field_7_grf ) + { + this.field_7_grf = field_7_grf; + } + + /** + * This value MUST be zero and MUST be ignored. + */ + @Internal + public int getPadding() + { + return field_8_padding; + } + + /** + * This value MUST be zero and MUST be ignored. + */ + @Internal + public void setPadding( int field_8_padding ) + { + this.field_8_padding = field_8_padding; + } + + /** + * This field MUST be ignored. + */ + @Internal + public int getMmPM() + { + return field_9_mmPM; + } + + /** + * This field MUST be ignored. + */ + @Internal + public void setMmPM( int field_9_mmPM ) + { + this.field_9_mmPM = field_9_mmPM; + } + + /** + * This value MUST be zero and MUST be ignored. + */ + @Internal + public int getPadding2() + { + return field_10_padding2; + } + + /** + * This value MUST be zero and MUST be ignored. + */ + @Internal + public void setPadding2( int field_10_padding2 ) + { + this.field_10_padding2 = field_10_padding2; + } + + /** + * Get the dxaGoal field for the PICF record. + */ + @Internal + public short getDxaGoal() + { + return field_11_dxaGoal; + } + + /** + * Set the dxaGoal field for the PICF record. + */ + @Internal + public void setDxaGoal( short field_11_dxaGoal ) + { + this.field_11_dxaGoal = field_11_dxaGoal; + } + + /** + * Get the dyaGoal field for the PICF record. + */ + @Internal + public short getDyaGoal() + { + return field_12_dyaGoal; + } + + /** + * Set the dyaGoal field for the PICF record. + */ + @Internal + public void setDyaGoal( short field_12_dyaGoal ) + { + this.field_12_dyaGoal = field_12_dyaGoal; + } + + /** + * Get the mx field for the PICF record. + */ + @Internal + public int getMx() + { + return field_13_mx; + } + + /** + * Set the mx field for the PICF record. + */ + @Internal + public void setMx( int field_13_mx ) + { + this.field_13_mx = field_13_mx; + } + + /** + * Get the my field for the PICF record. + */ + @Internal + public int getMy() + { + return field_14_my; + } + + /** + * Set the my field for the PICF record. + */ + @Internal + public void setMy( int field_14_my ) + { + this.field_14_my = field_14_my; + } + + /** + * Get the dxaReserved1 field for the PICF record. + */ + @Internal + public short getDxaReserved1() + { + return field_15_dxaReserved1; + } + + /** + * Set the dxaReserved1 field for the PICF record. + */ + @Internal + public void setDxaReserved1( short field_15_dxaReserved1 ) + { + this.field_15_dxaReserved1 = field_15_dxaReserved1; + } + + /** + * Get the dyaReserved1 field for the PICF record. + */ + @Internal + public short getDyaReserved1() + { + return field_16_dyaReserved1; + } + + /** + * Set the dyaReserved1 field for the PICF record. + */ + @Internal + public void setDyaReserved1( short field_16_dyaReserved1 ) + { + this.field_16_dyaReserved1 = field_16_dyaReserved1; + } + + /** + * Get the dxaReserved2 field for the PICF record. + */ + @Internal + public short getDxaReserved2() + { + return field_17_dxaReserved2; + } + + /** + * Set the dxaReserved2 field for the PICF record. + */ + @Internal + public void setDxaReserved2( short field_17_dxaReserved2 ) + { + this.field_17_dxaReserved2 = field_17_dxaReserved2; + } + + /** + * Get the dyaReserved2 field for the PICF record. + */ + @Internal + public short getDyaReserved2() + { + return field_18_dyaReserved2; + } + + /** + * Set the dyaReserved2 field for the PICF record. + */ + @Internal + public void setDyaReserved2( short field_18_dyaReserved2 ) + { + this.field_18_dyaReserved2 = field_18_dyaReserved2; + } + + /** + * Get the fReserved field for the PICF record. + */ + @Internal + public byte getFReserved() + { + return field_19_fReserved; + } + + /** + * Set the fReserved field for the PICF record. + */ + @Internal + public void setFReserved( byte field_19_fReserved ) + { + this.field_19_fReserved = field_19_fReserved; + } + + /** + * Get the bpp field for the PICF record. + */ + @Internal + public byte getBpp() + { + return field_20_bpp; + } + + /** + * Set the bpp field for the PICF record. + */ + @Internal + public void setBpp( byte field_20_bpp ) + { + this.field_20_bpp = field_20_bpp; + } + + /** + * Get the brcTop80 field for the PICF record. + */ + @Internal + public byte[] getBrcTop80() + { + return field_21_brcTop80; + } + + /** + * Set the brcTop80 field for the PICF record. + */ + @Internal + public void setBrcTop80( byte[] field_21_brcTop80 ) + { + this.field_21_brcTop80 = field_21_brcTop80; + } + + /** + * Get the brcLeft80 field for the PICF record. + */ + @Internal + public byte[] getBrcLeft80() + { + return field_22_brcLeft80; + } + + /** + * Set the brcLeft80 field for the PICF record. + */ + @Internal + public void setBrcLeft80( byte[] field_22_brcLeft80 ) + { + this.field_22_brcLeft80 = field_22_brcLeft80; + } + + /** + * Get the brcBottom80 field for the PICF record. + */ + @Internal + public byte[] getBrcBottom80() + { + return field_23_brcBottom80; + } + + /** + * Set the brcBottom80 field for the PICF record. + */ + @Internal + public void setBrcBottom80( byte[] field_23_brcBottom80 ) + { + this.field_23_brcBottom80 = field_23_brcBottom80; + } + + /** + * Get the brcRight80 field for the PICF record. + */ + @Internal + public byte[] getBrcRight80() + { + return field_24_brcRight80; + } + + /** + * Set the brcRight80 field for the PICF record. + */ + @Internal + public void setBrcRight80( byte[] field_24_brcRight80 ) + { + this.field_24_brcRight80 = field_24_brcRight80; + } + + /** + * Get the dxaReserved3 field for the PICF record. + */ + @Internal + public short getDxaReserved3() + { + return field_25_dxaReserved3; + } + + /** + * Set the dxaReserved3 field for the PICF record. + */ + @Internal + public void setDxaReserved3( short field_25_dxaReserved3 ) + { + this.field_25_dxaReserved3 = field_25_dxaReserved3; + } + + /** + * Get the dyaReserved3 field for the PICF record. + */ + @Internal + public short getDyaReserved3() + { + return field_26_dyaReserved3; + } + + /** + * Set the dyaReserved3 field for the PICF record. + */ + @Internal + public void setDyaReserved3( short field_26_dyaReserved3 ) + { + this.field_26_dyaReserved3 = field_26_dyaReserved3; + } + + /** + * This value MUST be 0 and MUST be ignored. + */ + @Internal + public short getCProps() + { + return field_27_cProps; + } + + /** + * This value MUST be 0 and MUST be ignored. + */ + @Internal + public void setCProps( short field_27_cProps ) + { + this.field_27_cProps = field_27_cProps; + } + +} // END OF CLASS diff --git a/src/scratchpad/src/org/apache/poi/hwpf/usermodel/Picture.java b/src/scratchpad/src/org/apache/poi/hwpf/usermodel/Picture.java index 2781b76519..555beb7b11 100644 --- a/src/scratchpad/src/org/apache/poi/hwpf/usermodel/Picture.java +++ b/src/scratchpad/src/org/apache/poi/hwpf/usermodel/Picture.java @@ -23,8 +23,19 @@ import java.io.IOException; import java.io.OutputStream; import java.util.zip.InflaterInputStream; -import org.apache.poi.hwpf.model.PictureDescriptor; -import org.apache.poi.util.LittleEndian; +import org.apache.poi.ddf.EscherSimpleProperty; + +import org.apache.poi.ddf.EscherProperty; + +import org.apache.poi.ddf.EscherOptRecord; + +import org.apache.poi.ddf.EscherContainerRecord; + +import org.apache.poi.ddf.EscherBSERecord; +import org.apache.poi.ddf.EscherBlipRecord; +import org.apache.poi.ddf.EscherRecord; +import org.apache.poi.hwpf.model.PICF; +import org.apache.poi.hwpf.model.PICFAndOfficeArtData; import org.apache.poi.util.POILogFactory; import org.apache.poi.util.POILogger; @@ -33,91 +44,229 @@ import org.apache.poi.util.POILogger; * * @author Dmitry Romanov */ -public final class Picture extends PictureDescriptor +public final class Picture { - private static final POILogger log = POILogFactory - .getLogger( Picture.class ); + @Deprecated + public static final byte[] BMP = new byte[] { 'B', 'M' }; + + public static final byte[] COMPRESSED1 = { (byte) 0xFE, 0x78, (byte) 0xDA }; - // public static final int FILENAME_OFFSET = 0x7C; - // public static final int FILENAME_SIZE_OFFSET = 0x6C; - static final int PICF_OFFSET = 0x0; - static final int PICT_HEADER_OFFSET = 0x4; - static final int MFPMM_OFFSET = 0x6; - static final int PICF_SHAPE_OFFSET = 0xE; - static final int UNKNOWN_HEADER_SIZE = 0x49; + public static final byte[] COMPRESSED2 = { (byte) 0xFE, 0x78, (byte) 0x9C }; @Deprecated - public static final byte[] GIF = PictureType.GIF.getSignatures()[0]; - @Deprecated - public static final byte[] PNG = PictureType.PNG.getSignatures()[0]; + public static final byte[] EMF = { 0x01, 0x00, 0x00, 0x00 }; + @Deprecated - public static final byte[] JPG = PictureType.JPEG.getSignatures()[0]; + public static final byte[] GIF = new byte[] { 'G', 'I', 'F' }; + public static final byte[] IHDR = new byte[] { 'I', 'H', 'D', 'R' }; @Deprecated - public static final byte[] BMP = PictureType.BMP.getSignatures()[0]; + public static final byte[] JPG = new byte[] { (byte) 0xFF, (byte) 0xD8 }; + private static final POILogger log = POILogFactory + .getLogger( Picture.class ); @Deprecated - public static final byte[] TIFF = PictureType.TIFF.getSignatures()[0]; + public static final byte[] PNG = new byte[] { (byte) 0x89, 0x50, 0x4E, + 0x47, 0x0D, 0x0A, 0x1A, 0x0A }; @Deprecated - public static final byte[] TIFF1 = PictureType.TIFF.getSignatures()[1]; + public static final byte[] TIFF = new byte[] { 0x49, 0x49, 0x2A, 0x00 }; @Deprecated - public static final byte[] EMF = PictureType.EMF.getSignatures()[0]; + public static final byte[] TIFF1 = new byte[] { 0x4D, 0x4D, 0x00, 0x2A }; @Deprecated - public static final byte[] WMF1 = PictureType.WMF.getSignatures()[0]; + public static final byte[] WMF1 = { (byte) 0xD7, (byte) 0xCD, (byte) 0xC6, + (byte) 0x9A, 0x00, 0x00 }; // Windows 3.x @Deprecated - public static final byte[] WMF2 = PictureType.WMF.getSignatures()[1]; - // TODO: DIB, PICT + public static final byte[] WMF2 = { 0x01, 0x00, 0x09, 0x00, 0x00, 0x03 }; // Windows + // 3.x - public static final byte[] IHDR = new byte[] { 'I', 'H', 'D', 'R' }; + private static int getBigEndianInt( byte[] data, int offset ) + { + return ( ( ( data[offset] & 0xFF ) << 24 ) + + ( ( data[offset + 1] & 0xFF ) << 16 ) + + ( ( data[offset + 2] & 0xFF ) << 8 ) + ( data[offset + 3] & 0xFF ) ); + } - public static final byte[] COMPRESSED1 = { (byte) 0xFE, 0x78, (byte) 0xDA }; - public static final byte[] COMPRESSED2 = { (byte) 0xFE, 0x78, (byte) 0x9C }; + private static int getBigEndianShort( byte[] data, int offset ) + { + return ( ( ( data[offset] & 0xFF ) << 8 ) + ( data[offset + 1] & 0xFF ) ); + } + + private static boolean matchSignature( byte[] pictureData, + byte[] signature, int offset ) + { + boolean matched = offset < pictureData.length; + for ( int i = 0; ( i + offset ) < pictureData.length + && i < signature.length; i++ ) + { + if ( pictureData[i + offset] != signature[i] ) + { + matched = false; + break; + } + } + return matched; + } + + private PICF _picf; + private PICFAndOfficeArtData _picfAndOfficeArtData; - private int dataBlockStartOfsset; - private int pictureBytesStartOffset; - private int dataBlockSize; - private int size; - // private String fileName; - private byte[] rawContent; private byte[] content; - private byte[] _dataStream; + private int dataBlockStartOfsset; + private int height = -1; private int width = -1; + public Picture( byte[] _dataStream ) + { + super(); + + // XXX: implement + // this._dataStream = _dataStream; + // this.dataBlockStartOfsset = 0; + // this.dataBlockSize = _dataStream.length; + // this.pictureBytesStartOffset = 0; + // this.size = _dataStream.length; + } + public Picture( int dataBlockStartOfsset, byte[] _dataStream, boolean fillBytes ) { - super( _dataStream, dataBlockStartOfsset ); + _picfAndOfficeArtData = new PICFAndOfficeArtData( _dataStream, + dataBlockStartOfsset ); + _picf = _picfAndOfficeArtData.getPicf(); - this._dataStream = _dataStream; this.dataBlockStartOfsset = dataBlockStartOfsset; - this.dataBlockSize = LittleEndian.getInt( _dataStream, - dataBlockStartOfsset ); - this.pictureBytesStartOffset = getPictureBytesStartOffset( - dataBlockStartOfsset, _dataStream, dataBlockSize ); - this.size = dataBlockSize - - ( pictureBytesStartOffset - dataBlockStartOfsset ); - if ( size < 0 ) - { + if ( fillBytes ) + fillImageContent(); + } - } + private void fillImageContent() + { + if ( content != null && content.length > 0 ) + return; - if ( fillBytes ) + byte[] rawContent = getRawContent(); + + /* + * HACK: Detect compressed images. In reality there should be some way + * to determine this from the first 32 bytes, but I can't see any + * similarity between all the samples I have obtained, nor any + * similarity in the data block contents. + */ + if ( matchSignature( rawContent, COMPRESSED1, 32 ) + || matchSignature( rawContent, COMPRESSED2, 32 ) ) { - fillImageContent(); + try + { + InflaterInputStream in = new InflaterInputStream( + new ByteArrayInputStream( rawContent, 33, + rawContent.length - 33 ) ); + ByteArrayOutputStream out = new ByteArrayOutputStream(); + byte[] buf = new byte[4096]; + int readBytes; + while ( ( readBytes = in.read( buf ) ) > 0 ) + { + out.write( buf, 0, readBytes ); + } + content = out.toByteArray(); + } + catch ( IOException e ) + { + /* + * Problems reading from the actual ByteArrayInputStream should + * never happen so this will only ever be a ZipException. + */ + log.log( POILogger.INFO, + "Possibly corrupt compression or non-compressed data", + e ); + } + } + else + { + // Raw data is not compressed. + content = rawContent; } } - public Picture( byte[] _dataStream ) + private void fillJPGWidthHeight() { - super(); + /* + * http://www.codecomments.com/archive281-2004-3-158083.html + * + * Algorhitm proposed by Patrick TJ McPhee: + * + * read 2 bytes make sure they are 'ffd8'x repeatedly: read 2 bytes make + * sure the first one is 'ff'x if the second one is 'd9'x stop else if + * the second one is c0 or c2 (or possibly other values ...) skip 2 + * bytes read one byte into depth read two bytes into height read two + * bytes into width else read two bytes into length skip forward + * length-2 bytes + * + * Also used Ruby code snippet from: + * http://www.bigbold.com/snippets/posts/show/805 for reference + */ + byte[] jpegContent = getContent(); - this._dataStream = _dataStream; - this.dataBlockStartOfsset = 0; - this.dataBlockSize = _dataStream.length; - this.pictureBytesStartOffset = 0; - this.size = _dataStream.length; + int pointer = 2; + int firstByte = jpegContent[pointer]; + int secondByte = jpegContent[pointer + 1]; + int endOfPicture = jpegContent.length; + while ( pointer < endOfPicture - 1 ) + { + do + { + firstByte = jpegContent[pointer]; + secondByte = jpegContent[pointer + 1]; + pointer += 2; + } + while ( !( firstByte == (byte) 0xFF ) && pointer < endOfPicture - 1 ); + + if ( firstByte == ( (byte) 0xFF ) && pointer < endOfPicture - 1 ) + { + if ( secondByte == (byte) 0xD9 || secondByte == (byte) 0xDA ) + { + break; + } + else if ( ( secondByte & 0xF0 ) == 0xC0 + && secondByte != (byte) 0xC4 + && secondByte != (byte) 0xC8 + && secondByte != (byte) 0xCC ) + { + pointer += 5; + this.height = getBigEndianShort( jpegContent, pointer ); + this.width = getBigEndianShort( jpegContent, pointer + 2 ); + break; + } + else + { + pointer++; + pointer++; + int length = getBigEndianShort( jpegContent, pointer ); + pointer += length; + } + } + else + { + pointer++; + } + } + } + + void fillPNGWidthHeight() + { + byte[] pngContent = getContent(); + /* + * Used PNG file format description from + * http://www.wotsit.org/download.asp?f=png + */ + int HEADER_START = PNG.length + 4; + if ( matchSignature( pngContent, IHDR, HEADER_START ) ) + { + int IHDR_CHUNK_WIDTH = HEADER_START + 4; + this.width = getBigEndianInt( pngContent, IHDR_CHUNK_WIDTH ); + this.height = getBigEndianInt( pngContent, IHDR_CHUNK_WIDTH + 4 ); + } } private void fillWidthHeight() @@ -139,48 +288,23 @@ public final class Picture extends PictureDescriptor } /** - * Tries to suggest a filename: hex representation of picture structure - * offset in "Data" stream plus extension that is tried to determine from - * first byte of picture's content. - * - * @return suggested file name - */ - public String suggestFullFileName() - { - String fileExt = suggestFileExtension(); - return Integer.toHexString( dataBlockStartOfsset ) - + ( fileExt.length() > 0 ? "." + fileExt : "" ); - } - - /** - * Writes Picture's content bytes to specified OutputStream. Is useful when - * there is need to write picture bytes directly to stream, omitting its - * representation in memory as distinct byte array. - * - * @param out - * a stream to write to - * @throws IOException - * if some exception is occured while writing to specified out + * @return the horizontal aspect ratio for picture provided by user + * @deprecated use more precise {@link #getHorizontalScalingFactor()} */ - public void writeImageContent( OutputStream out ) throws IOException + @Deprecated + public int getAspectRatioX() { - if ( rawContent != null && rawContent.length > 0 ) - { - out.write( rawContent, 0, size ); - } - else - { - out.write( _dataStream, pictureBytesStartOffset, size ); - } + return _picf.getMx() / 10; } /** - * @return The offset of this picture in the picture bytes, used when - * matching up with {@link CharacterRun#getPicOffset()} + * @retrn the vertical aspect ratio for picture provided by user + * @deprecated use more precise {@link #getVerticalScalingFactor()} */ - public int getStartOffset() + @Deprecated + public int getAspectRatioY() { - return dataBlockStartOfsset; + return _picf.getMy() / 10; } /** @@ -193,61 +317,39 @@ public final class Picture extends PictureDescriptor } /** - * Returns picture's content as it stored in Word file, i.e. possibly in - * compressed form. - * - * @return picture's content as it stored in Word file - */ - public byte[] getRawContent() - { - fillRawImageContent(); - return rawContent; - } - - /** - * - * @return size in bytes of the picture - */ - public int getSize() - { - return size; - } - - /** - * @return the horizontal aspect ratio for picture provided by user - * @deprecated use more precise {@link #getHorizontalScalingFactor()} + * @return The amount the picture has been cropped on the left in twips */ @Deprecated - public int getAspectRatioX() - { - return mx / 10; - } - - /** - * @return Horizontal scaling factor supplied by user expressed in .001% - * units - */ - public int getHorizontalScalingFactor() + public int getDxaCropLeft() { - return mx; + // TODO: use new properties + // if (_picfAndOfficeArtData == null || _picfAndOfficeArtData.getShape() + // == null) + // return 0; + // + // final EscherContainerRecord shape = _picfAndOfficeArtData.getShape(); + // EscherOptRecord optRecord = shape.getChildById( (short) 0xF00B ); + // if (optRecord == null) + // return 0; + // + // EscherProperty property = optRecord.lookup( 0x0102 ); + // if (property == null || !(property instanceof EscherSimpleProperty)) + // return 0; + // + // EscherSimpleProperty simpleProperty = (EscherSimpleProperty) + // property; + // return simpleProperty.getPropertyValue(); + + return _picf.getDxaReserved1(); } /** - * @retrn the vertical aspect ratio for picture provided by user - * @deprecated use more precise {@link #getVerticalScalingFactor()} + * @return The amount the picture has been cropped on the right in twips */ @Deprecated - public int getAspectRatioY() - { - return my / 10; - } - - /** - * @return Vertical scaling factor supplied by user expressed in .001% units - */ - public int getVerticalScalingFactor() + public int getDxaCropRight() { - return my; + return _picf.getDxaReserved2(); } /** @@ -258,61 +360,58 @@ public final class Picture extends PictureDescriptor */ public int getDxaGoal() { - return dxaGoal; + return _picf.getDxaGoal(); } /** - * Gets the initial height of the picture, in twips, prior to cropping or - * scaling. - * - * @return the initial width of the picture in twips - */ - public int getDyaGoal() - { - return dyaGoal; - } - - /** - * @return The amount the picture has been cropped on the left in twips + * @return The amount the picture has been cropped on the bottom in twips */ - public int getDxaCropLeft() + @Deprecated + public int getDyaCropBottom() { - return dxaCropLeft; + return _picf.getDyaReserved2(); } /** * @return The amount the picture has been cropped on the top in twips */ + @Deprecated public int getDyaCropTop() { - return dyaCropTop; + return _picf.getDyaReserved1(); } /** - * @return The amount the picture has been cropped on the right in twips + * Gets the initial height of the picture, in twips, prior to cropping or + * scaling. + * + * @return the initial width of the picture in twips */ - public int getDxaCropRight() + public int getDyaGoal() { - return dxaCropRight; + return _picf.getDyaGoal(); } /** - * @return The amount the picture has been cropped on the bottom in twips + * returns pixel height of the picture or -1 if dimensions determining was + * failed */ - public int getDyaCropBottom() + public int getHeight() { - return dyaCropBottom; + if ( height == -1 ) + { + fillWidthHeight(); + } + return height; } /** - * tries to suggest extension for picture's file by matching signatures of - * popular image formats to first bytes of picture's contents - * - * @return suggested file extension + * @return Horizontal scaling factor supplied by user expressed in .001% + * units */ - public String suggestFileExtension() + public int getHorizontalScalingFactor() { - return suggestPictureType().getExtension(); + return _picf.getMx(); } /** @@ -325,219 +424,56 @@ public final class Picture extends PictureDescriptor return suggestPictureType().getMime(); } - public PictureType suggestPictureType() - { - return PictureType.findMatchingType( getContent() ); - } - - // public String getFileName() - // { - // return fileName; - // } - - // private static String extractFileName(int blockStartIndex, byte[] - // dataStream) { - // int fileNameStartOffset = blockStartIndex + 0x7C; - // int fileNameSizeOffset = blockStartIndex + FILENAME_SIZE_OFFSET; - // int fileNameSize = LittleEndian.getShort(dataStream, fileNameSizeOffset); - // - // int fileNameIndex = fileNameStartOffset; - // char[] fileNameChars = new char[(fileNameSize-1)/2]; - // int charIndex = 0; - // while(charIndex 0 ) - return; - - this.rawContent = new byte[size]; - System.arraycopy( _dataStream, pictureBytesStartOffset, rawContent, 0, - size ); - } - - private void fillImageContent() + /** + * Returns picture's content as it stored in Word file, i.e. possibly in + * compressed form. + * + * @return picture's content as it stored in Word file + */ + public byte[] getRawContent() { - if ( content != null && content.length > 0 ) - return; - - byte[] rawContent = getRawContent(); + if ( _picfAndOfficeArtData.getBlipRecords().size() != 1 ) + return new byte[0]; - // HACK: Detect compressed images. In reality there should be some way - // to determine - // this from the first 32 bytes, but I can't see any similarity between - // all the - // samples I have obtained, nor any similarity in the data block - // contents. - if ( matchSignature( rawContent, COMPRESSED1, 32 ) - || matchSignature( rawContent, COMPRESSED2, 32 ) ) - { - try - { - InflaterInputStream in = new InflaterInputStream( - new ByteArrayInputStream( rawContent, 33, - rawContent.length - 33 ) ); - ByteArrayOutputStream out = new ByteArrayOutputStream(); - byte[] buf = new byte[4096]; - int readBytes; - while ( ( readBytes = in.read( buf ) ) > 0 ) - { - out.write( buf, 0, readBytes ); - } - content = out.toByteArray(); - } - catch ( IOException e ) - { - // Problems reading from the actual ByteArrayInputStream should - // never happen - // so this will only ever be a ZipException. - log.log( POILogger.INFO, - "Possibly corrupt compression or non-compressed data", - e ); - } - } - else + EscherRecord escherRecord = _picfAndOfficeArtData.getBlipRecords().get( + 0 ); + if ( escherRecord instanceof EscherBlipRecord ) { - // Raw data is not compressed. - content = rawContent; + return ( (EscherBlipRecord) escherRecord ).getPicturedata(); } - } - private static boolean matchSignature( byte[] pictureData, - byte[] signature, int offset ) - { - boolean matched = offset < pictureData.length; - for ( int i = 0; ( i + offset ) < pictureData.length - && i < signature.length; i++ ) + if ( escherRecord instanceof EscherBSERecord ) { - if ( pictureData[i + offset] != signature[i] ) - { - matched = false; - break; - } + return ( (EscherBSERecord) escherRecord ).getBlipRecord() + .getPicturedata(); } - return matched; + return new byte[0]; } - private static int getPictureBytesStartOffset( int dataBlockStartOffset, - byte[] _dataStream, int dataBlockSize ) + /** + * + * @return size in bytes of the picture + */ + public int getSize() { - int realPicoffset = dataBlockStartOffset; - final int dataBlockEndOffset = dataBlockSize + dataBlockStartOffset; - - // Skip over the PICT block - int PICTFBlockSize = LittleEndian.getShort( _dataStream, - dataBlockStartOffset + PICT_HEADER_OFFSET ); // Should be 68 - // bytes - - // Now the PICTF1 - int PICTF1BlockOffset = PICTFBlockSize + PICT_HEADER_OFFSET; - short MM_TYPE = LittleEndian.getShort( _dataStream, - dataBlockStartOffset + PICT_HEADER_OFFSET + 2 ); - if ( MM_TYPE == 0x66 ) - { - // Skip the stPicName - int cchPicName = LittleEndian.getUnsignedByte( _dataStream, - PICTF1BlockOffset ); - PICTF1BlockOffset += 1 + cchPicName; - } - int PICTF1BlockSize = LittleEndian.getShort( _dataStream, - dataBlockStartOffset + PICTF1BlockOffset ); - - int unknownHeaderOffset = ( PICTF1BlockSize + PICTF1BlockOffset ) < dataBlockEndOffset ? ( PICTF1BlockSize + PICTF1BlockOffset ) - : PICTF1BlockOffset; - realPicoffset += ( unknownHeaderOffset + UNKNOWN_HEADER_SIZE ); - if ( realPicoffset >= dataBlockEndOffset ) - { - realPicoffset -= UNKNOWN_HEADER_SIZE; - } - return realPicoffset; + return getContent().length; } - private void fillJPGWidthHeight() + /** + * @return The offset of this picture in the picture bytes, used when + * matching up with {@link CharacterRun#getPicOffset()} + */ + public int getStartOffset() { - /* - * http://www.codecomments.com/archive281-2004-3-158083.html - * - * Algorhitm proposed by Patrick TJ McPhee: - * - * read 2 bytes make sure they are 'ffd8'x repeatedly: read 2 bytes make - * sure the first one is 'ff'x if the second one is 'd9'x stop else if - * the second one is c0 or c2 (or possibly other values ...) skip 2 - * bytes read one byte into depth read two bytes into height read two - * bytes into width else read two bytes into length skip forward - * length-2 bytes - * - * Also used Ruby code snippet from: - * http://www.bigbold.com/snippets/posts/show/805 for reference - */ - int pointer = pictureBytesStartOffset + 2; - int firstByte = _dataStream[pointer]; - int secondByte = _dataStream[pointer + 1]; - - int endOfPicture = pictureBytesStartOffset + size; - while ( pointer < endOfPicture - 1 ) - { - do - { - firstByte = _dataStream[pointer]; - secondByte = _dataStream[pointer + 1]; - pointer += 2; - } - while ( !( firstByte == (byte) 0xFF ) && pointer < endOfPicture - 1 ); - - if ( firstByte == ( (byte) 0xFF ) && pointer < endOfPicture - 1 ) - { - if ( secondByte == (byte) 0xD9 || secondByte == (byte) 0xDA ) - { - break; - } - else if ( ( secondByte & 0xF0 ) == 0xC0 - && secondByte != (byte) 0xC4 - && secondByte != (byte) 0xC8 - && secondByte != (byte) 0xCC ) - { - pointer += 5; - this.height = getBigEndianShort( _dataStream, pointer ); - this.width = getBigEndianShort( _dataStream, pointer + 2 ); - break; - } - else - { - pointer++; - pointer++; - int length = getBigEndianShort( _dataStream, pointer ); - pointer += length; - } - } - else - { - pointer++; - } - } + return dataBlockStartOfsset; } - private void fillPNGWidthHeight() + /** + * @return Vertical scaling factor supplied by user expressed in .001% units + */ + public int getVerticalScalingFactor() { - /* - * Used PNG file format description from - * http://www.wotsit.org/download.asp?f=png - */ - int HEADER_START = pictureBytesStartOffset + PNG.length + 4; - if ( matchSignature( _dataStream, IHDR, HEADER_START ) ) - { - int IHDR_CHUNK_WIDTH = HEADER_START + 4; - this.width = getBigEndianInt( _dataStream, IHDR_CHUNK_WIDTH ); - this.height = getBigEndianInt( _dataStream, IHDR_CHUNK_WIDTH + 4 ); - } + return _picf.getMy(); } /** @@ -554,28 +490,106 @@ public final class Picture extends PictureDescriptor } /** - * returns pixel height of the picture or -1 if dimensions determining was - * failed + * tries to suggest extension for picture's file by matching signatures of + * popular image formats to first bytes of picture's contents + * + * @return suggested file extension */ - public int getHeight() + public String suggestFileExtension() { - if ( height == -1 ) - { - fillWidthHeight(); - } - return height; + return suggestPictureType().getExtension(); } - private static int getBigEndianInt( byte[] data, int offset ) + /** + * Tries to suggest a filename: hex representation of picture structure + * offset in "Data" stream plus extension that is tried to determine from + * first byte of picture's content. + * + * @return suggested file name + */ + public String suggestFullFileName() { - return ( ( ( data[offset] & 0xFF ) << 24 ) - + ( ( data[offset + 1] & 0xFF ) << 16 ) - + ( ( data[offset + 2] & 0xFF ) << 8 ) + ( data[offset + 3] & 0xFF ) ); + String fileExt = suggestFileExtension(); + return Integer.toHexString( dataBlockStartOfsset ) + + ( fileExt.length() > 0 ? "." + fileExt : "" ); } - private static int getBigEndianShort( byte[] data, int offset ) + public PictureType suggestPictureType() { - return ( ( ( data[offset] & 0xFF ) << 8 ) + ( data[offset + 1] & 0xFF ) ); + if ( _picfAndOfficeArtData.getBlipRecords().size() != 1 ) + return PictureType.UNKNOWN; + + EscherRecord escherRecord = _picfAndOfficeArtData.getBlipRecords().get( + 0 ); + switch ( escherRecord.getRecordId() ) + { + case (short) 0xF007: + { + EscherBSERecord bseRecord = (EscherBSERecord) escherRecord; + switch ( bseRecord.getBlipTypeWin32() ) + { + case 0x00: + return PictureType.UNKNOWN; + case 0x01: + return PictureType.UNKNOWN; + case 0x02: + return PictureType.EMF; + case 0x03: + return PictureType.WMF; + case 0x04: + return PictureType.PICT; + case 0x05: + return PictureType.JPEG; + case 0x06: + return PictureType.PNG; + case 0x07: + return PictureType.BMP; + case 0x11: + return PictureType.TIFF; + case 0x12: + return PictureType.JPEG; + default: + return PictureType.UNKNOWN; + } + } + case (short) 0xF01A: + return PictureType.EMF; + case (short) 0xF01B: + return PictureType.WMF; + case (short) 0xF01C: + return PictureType.PICT; + case (short) 0xF01D: + return PictureType.JPEG; + case (short) 0xF01E: + return PictureType.PNG; + case (short) 0xF01F: + return PictureType.BMP; + case (short) 0xF029: + return PictureType.TIFF; + case (short) 0xF02A: + return PictureType.JPEG; + default: + return PictureType.UNKNOWN; + } + } + + /** + * Writes Picture's content bytes to specified OutputStream. Is useful when + * there is need to write picture bytes directly to stream, omitting its + * representation in memory as distinct byte array. + * + * @param out + * a stream to write to + * @throws IOException + * if some exception is occured while writing to specified out + */ + public void writeImageContent( OutputStream out ) throws IOException + { + byte[] content = getContent(); + if ( content != null && content.length > 0 ) + { + out.write( content, 0, content.length ); + } } } diff --git a/src/scratchpad/src/org/apache/poi/hwpf/usermodel/PictureType.java b/src/scratchpad/src/org/apache/poi/hwpf/usermodel/PictureType.java index 5858c15e09..18327d8693 100644 --- a/src/scratchpad/src/org/apache/poi/hwpf/usermodel/PictureType.java +++ b/src/scratchpad/src/org/apache/poi/hwpf/usermodel/PictureType.java @@ -21,7 +21,8 @@ package org.apache.poi.hwpf.usermodel; * * @author Sergey Vladimirov (vlsergey {at} gmail {dot} com) */ -public enum PictureType { +public enum PictureType +{ BMP( "image/bmp", "bmp", new byte[][] { { 'B', 'M' } } ), EMF( "image/x-emf", "emf", new byte[][] { { 0x01, 0x00, 0x00, 0x00 } } ), @@ -30,6 +31,8 @@ public enum PictureType { JPEG( "image/jpeg", "jpg", new byte[][] { { (byte) 0xFF, (byte) 0xD8 } } ), + PICT( "image/pict", ".pict", new byte[0][] ), + PNG( "image/png", "png", new byte[][] { { (byte) 0x89, 0x50, 0x4E, 0x47, 0x0D, 0x0A, 0x1A, 0x0A } } ), diff --git a/src/scratchpad/testcases/org/apache/poi/hwpf/usermodel/TestBugs.java b/src/scratchpad/testcases/org/apache/poi/hwpf/usermodel/TestBugs.java index a12baa47d0..50b6e52798 100644 --- a/src/scratchpad/testcases/org/apache/poi/hwpf/usermodel/TestBugs.java +++ b/src/scratchpad/testcases/org/apache/poi/hwpf/usermodel/TestBugs.java @@ -18,17 +18,13 @@ package org.apache.poi.hwpf.usermodel; import java.io.ByteArrayOutputStream; import java.io.FileNotFoundException; -import java.io.FileOutputStream; import java.io.IOException; import java.io.InputStream; import java.util.Arrays; import java.util.Collection; import java.util.List; -import org.apache.poi.poifs.filesystem.NPOIFSFileSystem; - import junit.framework.TestCase; - import org.apache.commons.codec.digest.DigestUtils; import org.apache.poi.POIDataSamples; import org.apache.poi.hwpf.HWPFDocument; @@ -41,8 +37,10 @@ import org.apache.poi.hwpf.model.FileInformationBlock; import org.apache.poi.hwpf.model.PlexOfField; import org.apache.poi.hwpf.model.SubdocumentType; import org.apache.poi.hwpf.model.io.HWPFOutputStream; +import org.apache.poi.poifs.filesystem.NPOIFSFileSystem; import org.apache.poi.util.IOUtils; -import org.apache.poi.util.LittleEndian; +import org.apache.poi.util.POILogFactory; +import org.apache.poi.util.POILogger; /** * Test different problems reported in Apache Bugzilla @@ -52,6 +50,8 @@ import org.apache.poi.util.LittleEndian; */ public class TestBugs extends TestCase { + private static final POILogger logger = POILogFactory + .getLogger( TestBugs.class ); public static void assertEquals( String expected, String actual ) { @@ -188,11 +188,6 @@ public class TestBugs extends TestCase assertNotNull( pic.getContent() ); assertNotNull( pic.getRawContent() ); - // These are probably some sort of offset, need to figure them out - assertEquals( 4, pic.getSize() ); - assertEquals( 0x80000000l, LittleEndian.getUInt( pic.getContent() ) ); - assertEquals( 0x80000000l, LittleEndian.getUInt( pic.getRawContent() ) ); - /* * This is a file with empty EMF image, but present Office Drawing * --sergey @@ -688,6 +683,21 @@ public class TestBugs extends TestCase WordExtractor wordExtractor = new WordExtractor( hwpfDocument ); wordExtractor.getText(); } + } + /** + * [FIXED] Bug 51902 - Picture.fillRawImageContent - + * ArrayIndexOutOfBoundsException + */ + public void testBug51890() + { + HWPFDocument doc = HWPFTestDataSamples.openSampleFile( "Bug51890.doc" ); + for ( Picture picture : doc.getPicturesTable().getAllPictures() ) + { + PictureType pictureType = picture.suggestPictureType(); + logger.log( POILogger.DEBUG, + "Picture at offset " + picture.getStartOffset() + + " has type " + pictureType ); + } } } diff --git a/src/scratchpad/testcases/org/apache/poi/hwpf/usermodel/TestPictures.java b/src/scratchpad/testcases/org/apache/poi/hwpf/usermodel/TestPictures.java index d23a898d2a..526cb5a478 100644 --- a/src/scratchpad/testcases/org/apache/poi/hwpf/usermodel/TestPictures.java +++ b/src/scratchpad/testcases/org/apache/poi/hwpf/usermodel/TestPictures.java @@ -210,26 +210,26 @@ public final class TestPictures extends TestCase { // Look at the pictures table List pictures = pictureTable.getAllPictures(); assertEquals(4, pictures.size()); - - Picture picture = pictures.get(0); - assertEquals("", picture.suggestFileExtension()); - assertEquals("0", picture.suggestFullFileName()); - assertEquals("image/unknown", picture.getMimeType()); - - picture = pictures.get(1); - assertEquals("", picture.suggestFileExtension()); - assertEquals("469", picture.suggestFullFileName()); - assertEquals("image/unknown", picture.getMimeType()); - - picture = pictures.get(2); - assertEquals("", picture.suggestFileExtension()); - assertEquals("8c7", picture.suggestFullFileName()); - assertEquals("image/unknown", picture.getMimeType()); - - picture = pictures.get(3); - assertEquals("", picture.suggestFileExtension()); - assertEquals("10a8", picture.suggestFullFileName()); - assertEquals("image/unknown", picture.getMimeType()); + + Picture picture = pictures.get( 0 ); + assertEquals( "emf", picture.suggestFileExtension() ); + assertEquals( "0.emf", picture.suggestFullFileName() ); + assertEquals( "image/x-emf", picture.getMimeType() ); + + picture = pictures.get( 1 ); + assertEquals( "emf", picture.suggestFileExtension() ); + assertEquals( "469.emf", picture.suggestFullFileName() ); + assertEquals( "image/x-emf", picture.getMimeType() ); + + picture = pictures.get( 2 ); + assertEquals( "emf", picture.suggestFileExtension() ); + assertEquals( "8c7.emf", picture.suggestFullFileName() ); + assertEquals( "image/x-emf", picture.getMimeType() ); + + picture = pictures.get( 3 ); + assertEquals( "emf", picture.suggestFileExtension() ); + assertEquals( "10a8.emf", picture.suggestFullFileName() ); + assertEquals( "image/x-emf", picture.getMimeType() ); } public void testEquation() diff --git a/src/types/definitions/picf_type.xml b/src/types/definitions/picf_type.xml new file mode 100644 index 0000000000..cdda965020 --- /dev/null +++ b/src/types/definitions/picf_type.xml @@ -0,0 +1,71 @@ + + + + AbstractType + The PICF structure specifies the type of a picture, as well as the size of the + picture and information about its border. <p>Class and fields descriptions are quoted + from Microsoft Office Word 97-2007 + Binary File Format and [MS-DOC] - v20110608 Word (.doc) + Binary File Format + + Sergey Vladimirov; according to Microsoft Office Word 97-2007 Binary File Format + Specification [*.doc] and [MS-DOC] - v20110608 Word (.doc) Binary File Format + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +