]> source.dussan.org Git - poi.git/commitdiff
add simpliest "escher" pictures support in Word-to-HTML and Word-to-FO converters
authorSergey Vladimirov <sergey@apache.org>
Thu, 28 Jul 2011 15:08:06 +0000 (15:08 +0000)
committerSergey Vladimirov <sergey@apache.org>
Thu, 28 Jul 2011 15:08:06 +0000 (15:08 +0000)
git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@1151888 13f79535-47bb-0310-9956-ffa450edef68

14 files changed:
src/scratchpad/src/org/apache/poi/hwpf/HWPFDocument.java
src/scratchpad/src/org/apache/poi/hwpf/converter/AbstractWordConverter.java
src/scratchpad/src/org/apache/poi/hwpf/converter/HtmlDocumentFacade.java
src/scratchpad/src/org/apache/poi/hwpf/converter/PicturesManager.java
src/scratchpad/src/org/apache/poi/hwpf/converter/WordToFoConverter.java
src/scratchpad/src/org/apache/poi/hwpf/converter/WordToHtmlConverter.java
src/scratchpad/src/org/apache/poi/hwpf/model/EscherRecordHolder.java
src/scratchpad/src/org/apache/poi/hwpf/usermodel/OfficeDrawing.java
src/scratchpad/src/org/apache/poi/hwpf/usermodel/OfficeDrawings.java
src/scratchpad/src/org/apache/poi/hwpf/usermodel/OfficeDrawingsImpl.java
src/scratchpad/src/org/apache/poi/hwpf/usermodel/Picture.java
src/scratchpad/src/org/apache/poi/hwpf/usermodel/PictureType.java
src/scratchpad/testcases/org/apache/poi/hwpf/converter/TestWordToHtmlConverter.java
test-data/document/pictures_escher.doc [new file with mode: 0644]

index b2d46604594fd7a738f60a456b2440b939cd65cb..fa8a47649cc8cd5cbbe39eb3bb81d5a3f4292391 100644 (file)
@@ -310,8 +310,8 @@ public final class HWPFDocument extends HWPFDocumentCore
     _officeArts = new ShapesTable(_tableStream, _fib);
 
     // And escher pictures
-    _officeDrawingsHeaders = new OfficeDrawingsImpl( _fspaHeaders, _escherRecordHolder );
-    _officeDrawingsMain = new OfficeDrawingsImpl( _fspaMain , _escherRecordHolder);
+    _officeDrawingsHeaders = new OfficeDrawingsImpl( _fspaHeaders, _escherRecordHolder, _mainStream );
+    _officeDrawingsMain = new OfficeDrawingsImpl( _fspaMain , _escherRecordHolder, _mainStream);
 
     _st = new SectionTable(_mainStream, _tableStream, _fib.getFcPlcfsed(), _fib.getLcbPlcfsed(), fcMin, _tpt, _cpSplit);
     _ss = new StyleSheet(_tableStream, _fib.getFcStshf());
index b65e8cf713d529e4b5fa2c790f1abee0fd912fd1..afb5a5f701fcf92c34be6a0e74cf8d041e48e0a1 100644 (file)
@@ -41,6 +41,7 @@ import org.apache.poi.hwpf.usermodel.Notes;
 import org.apache.poi.hwpf.usermodel.OfficeDrawing;
 import org.apache.poi.hwpf.usermodel.Paragraph;
 import org.apache.poi.hwpf.usermodel.Picture;
+import org.apache.poi.hwpf.usermodel.PictureType;
 import org.apache.poi.hwpf.usermodel.Range;
 import org.apache.poi.hwpf.usermodel.Section;
 import org.apache.poi.hwpf.usermodel.Table;
@@ -578,7 +579,10 @@ public abstract class AbstractWordConverter
     protected void processDrawnObject( HWPFDocument doc,
             CharacterRun characterRun, Element block )
     {
-        // main?
+        if ( getPicturesManager() == null )
+            return;
+
+        // TODO: support headers
         OfficeDrawing officeDrawing = doc.getOfficeDrawingsMain()
                 .getOfficeDrawingAt( characterRun.getStartOffset() );
         if ( officeDrawing == null )
@@ -588,10 +592,22 @@ public abstract class AbstractWordConverter
             return;
         }
 
-        // TODO: do something :)
+        byte[] pictureData = officeDrawing.getPictureData();
+        if ( pictureData == null )
+            // usual shape?
+            return;
 
+        final PictureType type = PictureType.findMatchingType( pictureData );
+        String path = getPicturesManager().savePicture( pictureData, type,
+                "s" + characterRun.getStartOffset() + "." + type );
+
+        processDrawnObject( doc, characterRun, officeDrawing, path, block );
     }
 
+    protected abstract void processDrawnObject( HWPFDocument doc,
+            CharacterRun characterRun, OfficeDrawing officeDrawing,
+            String path, Element block );
+
     protected abstract void processEndnoteAutonumbered( HWPFDocument doc,
             int noteIndex, Element block, Range endnoteTextRange );
 
index a6e38073ce2285eae9e34ac82f42ab24ccb351db..a02b2942c5849195ac84cab406aff30d599a9cb4 100644 (file)
@@ -113,6 +113,13 @@ public class HtmlDocumentFacade
         return basicLink;
     }
 
+    public Element createImage( String src )
+    {
+        Element result = document.createElement( "img" );
+        result.setAttribute( "src", src );
+        return result;
+    }
+
     public Element createLineBreak()
     {
         return document.createElement( "br" );
index ac408139cbfac04f7cfc28547494871b7ede53a6..dbbca57003d6aa8291803789c66ae27bae07b9c0 100644 (file)
@@ -16,7 +16,6 @@
 ==================================================================== */
 package org.apache.poi.hwpf.converter;
 
-import org.apache.poi.hwpf.usermodel.Picture;
 import org.apache.poi.hwpf.usermodel.PictureType;
 
 /**
@@ -35,11 +34,11 @@ public interface PicturesManager
      * {@link PictureType#WMF}. FO (Apache FOP) supports at least PNG and SVG
      * types.
      * 
-     * @param picture
-     *            Word picture
+     * @param content
+     *            picture content
      * @return path to file that can be used as reference in HTML (img's src) of
      *         XLS FO (fo:external-graphic's src) or <tt>null</tt> if image were
      *         not saved and should not be referenced from result HTML / FO.
      */
-    String savePicture( Picture picture );
+    String savePicture( byte[] content, PictureType pictureType, String suggestedName );
 }
index 8121e2cd9cc2d8e0f3f6e5ce7ca07c388a8e2796..653b44cec9fc8db5dacb47cdad05c35a5204b3ad 100644 (file)
@@ -37,6 +37,7 @@ import org.apache.poi.hwpf.HWPFDocumentCore;
 import org.apache.poi.hwpf.converter.FontReplacer.Triplet;
 import org.apache.poi.hwpf.usermodel.Bookmark;
 import org.apache.poi.hwpf.usermodel.CharacterRun;
+import org.apache.poi.hwpf.usermodel.OfficeDrawing;
 import org.apache.poi.hwpf.usermodel.Paragraph;
 import org.apache.poi.hwpf.usermodel.Picture;
 import org.apache.poi.hwpf.usermodel.Range;
@@ -262,6 +263,16 @@ public class WordToFoConverter extends AbstractWordConverter
             foDocumentFacade.setDescription( summaryInformation.getComments() );
     }
 
+    @Override
+    protected void processDrawnObject( HWPFDocument doc,
+            CharacterRun characterRun, OfficeDrawing officeDrawing,
+            String path, Element block )
+    {
+        final Element externalGraphic = foDocumentFacade
+                .createExternalGraphic( path );
+        block.appendChild( externalGraphic );
+    }
+
     @Override
     protected void processEndnoteAutonumbered( HWPFDocument doc, int noteIndex,
             Element block, Range endnoteTextRange )
@@ -364,7 +375,10 @@ public class WordToFoConverter extends AbstractWordConverter
         PicturesManager fileManager = getPicturesManager();
         if ( fileManager != null )
         {
-            String url = fileManager.savePicture( picture );
+            String url = fileManager
+                    .savePicture( picture.getContent(),
+                            picture.suggestPictureType(),
+                            picture.suggestFullFileName() );
 
             if ( WordToFoUtils.isNotEmpty( url ) )
             {
index f17a76c13082731d26cfd5068c237c3ee41822ba..beba077c795337e7e298f945595933946dff50fe 100644 (file)
@@ -34,6 +34,7 @@ import org.apache.poi.hwpf.HWPFDocumentCore;
 import org.apache.poi.hwpf.converter.FontReplacer.Triplet;
 import org.apache.poi.hwpf.usermodel.Bookmark;
 import org.apache.poi.hwpf.usermodel.CharacterRun;
+import org.apache.poi.hwpf.usermodel.OfficeDrawing;
 import org.apache.poi.hwpf.usermodel.Paragraph;
 import org.apache.poi.hwpf.usermodel.Picture;
 import org.apache.poi.hwpf.usermodel.Range;
@@ -61,6 +62,22 @@ import static org.apache.poi.hwpf.converter.AbstractWordUtils.TWIPS_PER_INCH;
 public class WordToHtmlConverter extends AbstractWordConverter
 {
 
+    /**
+     * Holds properties values, applied to current <tt>p</tt> element. Those
+     * properties shall not be doubled in children <tt>span</tt> elements.
+     */
+    private static class BlockProperies
+    {
+        final String pFontName;
+        final int pFontSize;
+
+        public BlockProperies( String pFontName, int pFontSize )
+        {
+            this.pFontName = pFontName;
+            this.pFontSize = pFontSize;
+        }
+    }
+
     private static final POILogger logger = POILogFactory
             .getLogger( WordToHtmlConverter.class );
 
@@ -253,6 +270,15 @@ public class WordToHtmlConverter extends AbstractWordConverter
                     .addDescription( summaryInformation.getComments() );
     }
 
+    @Override
+    protected void processDrawnObject( HWPFDocument doc,
+            CharacterRun characterRun, OfficeDrawing officeDrawing,
+            String path, Element block )
+    {
+        Element img = htmlDocumentFacade.createImage( path );
+        block.appendChild( img );
+    }
+
     @Override
     protected void processEndnoteAutonumbered( HWPFDocument doc, int noteIndex,
             Element block, Range endnoteTextRange )
@@ -302,7 +328,10 @@ public class WordToHtmlConverter extends AbstractWordConverter
         PicturesManager fileManager = getPicturesManager();
         if ( fileManager != null )
         {
-            String url = fileManager.savePicture( picture );
+            String url = fileManager
+                    .savePicture( picture.getContent(),
+                            picture.suggestPictureType(),
+                            picture.suggestFullFileName() );
 
             if ( WordToHtmlUtils.isNotEmpty( url ) )
             {
@@ -388,8 +417,7 @@ public class WordToHtmlConverter extends AbstractWordConverter
                                     + "in;overflow:hidden;" ) );
             root.appendChild( inner );
 
-            Element image = htmlDocumentFacade.document.createElement( "img" );
-            image.setAttribute( "src", imageSourcePath );
+            Element image = htmlDocumentFacade.createImage( imageSourcePath );
             image.setAttribute( "class", htmlDocumentFacade
                     .getOrCreateCssClass( image.getTagName(), "i",
                             "position:absolute;left:-" + cropLeft + ";top:-"
@@ -401,8 +429,7 @@ public class WordToHtmlConverter extends AbstractWordConverter
         }
         else
         {
-            root = htmlDocumentFacade.document.createElement( "img" );
-            root.setAttribute( "src", imageSourcePath );
+            root = htmlDocumentFacade.createImage( imageSourcePath );
             root.setAttribute( "style", "width:" + imageWidth + "in;height:"
                     + imageHeight + "in;vertical-align:text-bottom;" );
         }
@@ -691,20 +718,4 @@ public class WordToHtmlConverter extends AbstractWordConverter
         }
     }
 
-    /**
-     * Holds properties values, applied to current <tt>p</tt> element. Those
-     * properties shall not be doubled in children <tt>span</tt> elements.
-     */
-    private static class BlockProperies
-    {
-        final String pFontName;
-        final int pFontSize;
-
-        public BlockProperies( String pFontName, int pFontSize )
-        {
-            this.pFontName = pFontName;
-            this.pFontSize = pFontSize;
-        }
-    }
-
 }
index 310dbc52c3e325dd06d7a32cbf817971265873a7..329707832f281e1d4d3d7e8d863f1023d467c808 100644 (file)
@@ -20,6 +20,7 @@ package org.apache.poi.hwpf.model;
 import java.util.ArrayList;
 import java.util.Iterator;
 import java.util.List;
+
 import org.apache.poi.ddf.DefaultEscherRecordFactory;
 import org.apache.poi.ddf.EscherContainerRecord;
 import org.apache.poi.ddf.EscherRecord;
@@ -119,4 +120,83 @@ public final class EscherRecordHolder {
                // Not found in this lot
                return null;
        }
+
+    public List<? extends EscherContainerRecord> getDgContainers()
+    {
+        List<EscherContainerRecord> dgContainers = new ArrayList<EscherContainerRecord>(
+                1 );
+        for ( EscherRecord escherRecord : getEscherRecords() )
+        {
+            if ( escherRecord.getRecordId() == (short) 0xF002 )
+            {
+                dgContainers.add( (EscherContainerRecord) escherRecord );
+            }
+        }
+        return dgContainers;
+    }
+
+    public List<? extends EscherContainerRecord> getDggContainers()
+    {
+        List<EscherContainerRecord> dggContainers = new ArrayList<EscherContainerRecord>(
+                1 );
+        for ( EscherRecord escherRecord : getEscherRecords() )
+        {
+            if ( escherRecord.getRecordId() == (short) 0xF000 )
+            {
+                dggContainers.add( (EscherContainerRecord) escherRecord );
+            }
+        }
+        return dggContainers;
+    }
+
+    public List<? extends EscherContainerRecord> getBStoreContainers()
+    {
+        List<EscherContainerRecord> bStoreContainers = new ArrayList<EscherContainerRecord>(
+                1 );
+        for ( EscherContainerRecord dggContainer : getDggContainers() )
+        {
+            for ( EscherRecord escherRecord : dggContainer.getChildRecords() )
+            {
+                if ( escherRecord.getRecordId() == (short) 0xF001 )
+                {
+                    bStoreContainers.add( (EscherContainerRecord) escherRecord );
+                }
+            }
+        }
+        return bStoreContainers;
+    }
+
+    public List<? extends EscherContainerRecord> getSpgrContainers()
+    {
+        List<EscherContainerRecord> spgrContainers = new ArrayList<EscherContainerRecord>(
+                1 );
+        for ( EscherContainerRecord dgContainer : getDgContainers() )
+        {
+            for ( EscherRecord escherRecord : dgContainer.getChildRecords() )
+            {
+                if ( escherRecord.getRecordId() == (short) 0xF003 )
+                {
+                    spgrContainers.add( (EscherContainerRecord) escherRecord );
+                }
+            }
+        }
+        return spgrContainers;
+    }
+
+    public List<? extends EscherContainerRecord> getSpContainers()
+    {
+        List<EscherContainerRecord> spContainers = new ArrayList<EscherContainerRecord>(
+                1 );
+        for ( EscherContainerRecord spgrContainer : getSpgrContainers() )
+        {
+            for ( EscherRecord escherRecord : spgrContainer.getChildRecords() )
+            {
+                if ( escherRecord.getRecordId() == (short) 0xF004 )
+                {
+                    spContainers.add( (EscherContainerRecord) escherRecord );
+                }
+            }
+        }
+        return spContainers;
+    }
 }
index 20e3cc522251d13e05010bc794d27214478b6cea..002fa30b835902b0ac5816cabda6d2e7a7aad999 100644 (file)
@@ -1,21 +1,43 @@
+/* ====================================================================
+   Licensed to the Apache Software Foundation (ASF) under one or more
+   contributor license agreements.  See the NOTICE file distributed with
+   this work for additional information regarding copyright ownership.
+   The ASF licenses this file to You under the Apache License, Version 2.0
+   (the "License"); you may not use this file except in compliance with
+   the License.  You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
+==================================================================== */
 package org.apache.poi.hwpf.usermodel;
 
+/**
+ * User-friendly interface to office drawing objects
+ * 
+ * @author Sergey Vladimirov (vlsergey {at} gmail {dot} com)
+ */
 public interface OfficeDrawing
 {
     /**
-     * Shape Identifier
+     * Returns picture data if this shape has (single?) associated picture data
      */
-    int getShapeId();
+    byte[] getPictureData();
 
     /**
-     * Left of rectangle enclosing shape relative to the origin of the shape
+     * Bottom of the rectangle enclosing shape relative to the origin of the
+     * shape
      */
-    int getRectangleLeft();
+    int getRectangleBottom();
 
     /**
-     * Top of rectangle enclosing shape relative to the origin of the shape
+     * Left of rectangle enclosing shape relative to the origin of the shape
      */
-    int getRectangleTop();
+    int getRectangleLeft();
 
     /**
      * Right of rectangle enclosing shape relative to the origin of the shape
@@ -23,9 +45,13 @@ public interface OfficeDrawing
     int getRectangleRight();
 
     /**
-     * Bottom of the rectangle enclosing shape relative to the origin of the
-     * shape
+     * Top of rectangle enclosing shape relative to the origin of the shape
      */
-    int getRectangleBottom();
+    int getRectangleTop();
+
+    /**
+     * Shape Identifier
+     */
+    int getShapeId();
 
 }
index 672d5e193e2d07aba7198ca567c6cc190dfdebe8..58661aa8dd31673621df777190404690fe9438d9 100644 (file)
@@ -1,7 +1,28 @@
+/* ====================================================================
+   Licensed to the Apache Software Foundation (ASF) under one or more
+   contributor license agreements.  See the NOTICE file distributed with
+   this work for additional information regarding copyright ownership.
+   The ASF licenses this file to You under the Apache License, Version 2.0
+   (the "License"); you may not use this file except in compliance with
+   the License.  You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
+==================================================================== */
 package org.apache.poi.hwpf.usermodel;
 
 import java.util.Collection;
 
+/**
+ * User-friendly interface to access document part's office drawings
+ * 
+ * @author Sergey Vladimirov (vlsergey {at} gmail {dot} com)
+ */
 public interface OfficeDrawings
 {
     OfficeDrawing getOfficeDrawingAt( int characterPosition );
index 933821c14797eceb5464fb96220df901b9a59294..370cacd70733d469ae2e283b3ebc10b46f4c2bae 100644 (file)
@@ -1,3 +1,19 @@
+/* ====================================================================
+   Licensed to the Apache Software Foundation (ASF) under one or more
+   contributor license agreements.  See the NOTICE file distributed with
+   this work for additional information regarding copyright ownership.
+   The ASF licenses this file to You under the Apache License, Version 2.0
+   (the "License"); you may not use this file except in compliance with
+   the License.  You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
+==================================================================== */
 package org.apache.poi.hwpf.usermodel;
 
 import java.util.ArrayList;
@@ -5,6 +21,16 @@ import java.util.Collection;
 import java.util.Collections;
 import java.util.List;
 
+import org.apache.poi.ddf.DefaultEscherRecordFactory;
+import org.apache.poi.ddf.EscherBSERecord;
+import org.apache.poi.ddf.EscherBlipRecord;
+import org.apache.poi.ddf.EscherContainerRecord;
+import org.apache.poi.ddf.EscherOptRecord;
+import org.apache.poi.ddf.EscherProperties;
+import org.apache.poi.ddf.EscherRecord;
+import org.apache.poi.ddf.EscherRecordFactory;
+import org.apache.poi.ddf.EscherSimpleProperty;
+import org.apache.poi.ddf.EscherSpRecord;
 import org.apache.poi.hwpf.model.EscherRecordHolder;
 import org.apache.poi.hwpf.model.FSPA;
 import org.apache.poi.hwpf.model.FSPATable;
@@ -13,12 +39,82 @@ public class OfficeDrawingsImpl implements OfficeDrawings
 {
     private final EscherRecordHolder _escherRecordHolder;
     private final FSPATable _fspaTable;
+    private final byte[] _mainStream;
 
     public OfficeDrawingsImpl( FSPATable fspaTable,
-            EscherRecordHolder escherRecordHolder )
+            EscherRecordHolder escherRecordHolder, byte[] mainStream )
     {
         this._fspaTable = fspaTable;
         this._escherRecordHolder = escherRecordHolder;
+        this._mainStream = mainStream;
+    }
+
+    private EscherContainerRecord getEscherShapeRecordContainer(
+            final int shapeId )
+    {
+        for ( EscherContainerRecord spContainer : _escherRecordHolder
+                .getSpContainers() )
+        {
+            EscherSpRecord escherSpRecord = spContainer
+                    .getChildById( (short) 0xF00A );
+            if ( escherSpRecord != null
+                    && escherSpRecord.getShapeId() == shapeId )
+                return spContainer;
+        }
+
+        return null;
+    }
+
+    private EscherBlipRecord getBitmapRecord( int bitmapIndex )
+    {
+        List<? extends EscherContainerRecord> bContainers = _escherRecordHolder
+                .getBStoreContainers();
+        if ( bContainers == null || bContainers.size() != 1 )
+            return null;
+
+        EscherContainerRecord bContainer = bContainers.get( 0 );
+        final List<EscherRecord> bitmapRecords = bContainer.getChildRecords();
+
+        if ( bitmapRecords.size() < bitmapIndex )
+            return null;
+
+        EscherRecord imageRecord = bitmapRecords.get( bitmapIndex - 1 );
+
+        if ( imageRecord instanceof EscherBlipRecord )
+        {
+            return (EscherBlipRecord) imageRecord;
+        }
+
+        if ( imageRecord instanceof EscherBSERecord )
+        {
+            EscherBSERecord bseRecord = (EscherBSERecord) imageRecord;
+
+            EscherBlipRecord blip = bseRecord.getBlipRecord();
+            if ( blip != null )
+            {
+                return blip;
+            }
+
+            if ( bseRecord.getOffset() > 0 )
+            {
+                /*
+                 * Blip stored in delay stream, which in a word doc, is the main
+                 * stream
+                 */
+                EscherRecordFactory recordFactory = new DefaultEscherRecordFactory();
+                EscherRecord record = recordFactory.createRecord( _mainStream,
+                        bseRecord.getOffset() );
+
+                if ( record instanceof EscherBlipRecord )
+                {
+                    record.fillFields( _mainStream, bseRecord.getOffset(),
+                            recordFactory );
+                    return (EscherBlipRecord) record;
+                }
+            }
+        }
+
+        return null;
     }
 
     private OfficeDrawing getOfficeDrawing( final FSPA fspa )
@@ -50,6 +146,30 @@ public class OfficeDrawingsImpl implements OfficeDrawings
                 return fspa.getSpid();
             }
 
+            public byte[] getPictureData()
+            {
+                EscherContainerRecord shapeDescription = getEscherShapeRecordContainer( getShapeId() );
+                if ( shapeDescription == null )
+                    return null;
+
+                EscherOptRecord escherOptRecord = shapeDescription
+                        .getChildById( (short) 0xF00B );
+                if ( escherOptRecord == null )
+                    return null;
+
+                EscherSimpleProperty escherProperty = escherOptRecord
+                        .lookup( EscherProperties.BLIP__BLIPTODISPLAY );
+                if ( escherProperty == null )
+                    return null;
+
+                int bitmapIndex = escherProperty.getPropertyValue();
+                EscherBlipRecord escherBlipRecord = getBitmapRecord( bitmapIndex );
+                if ( escherBlipRecord == null )
+                    return null;
+
+                return escherBlipRecord.getPicturedata();
+            }
+
             @Override
             public String toString()
             {
index b42ad00637dde6c1a6e041d90ec2185fa17706ba..2781b76519f51e27374a0fc1aa0b86ddf932efee 100644 (file)
@@ -30,19 +30,21 @@ import org.apache.poi.util.POILogger;
 
 /**
  * Represents embedded picture extracted from Word Document
+ * 
  * @author Dmitry Romanov
  */
 public final class Picture extends PictureDescriptor
 {
-  private static final POILogger log = POILogFactory.getLogger(Picture.class);
+    private static final POILogger log = POILogFactory
+            .getLogger( Picture.class );
 
-//  public static final int FILENAME_OFFSET = 0x7C;
-//  public static final int FILENAME_SIZE_OFFSET = 0x6C;
-  static final int PICF_OFFSET = 0x0;
-  static final int PICT_HEADER_OFFSET = 0x4;
-  static final int MFPMM_OFFSET = 0x6;
-  static final int PICF_SHAPE_OFFSET = 0xE;
-  static final int UNKNOWN_HEADER_SIZE = 0x49;
+    // public static final int FILENAME_OFFSET = 0x7C;
+    // public static final int FILENAME_SIZE_OFFSET = 0x6C;
+    static final int PICF_OFFSET = 0x0;
+    static final int PICT_HEADER_OFFSET = 0x4;
+    static final int MFPMM_OFFSET = 0x6;
+    static final int PICF_SHAPE_OFFSET = 0xE;
+    static final int UNKNOWN_HEADER_SIZE = 0x49;
 
     @Deprecated
     public static final byte[] GIF = PictureType.GIF.getSignatures()[0];
@@ -66,52 +68,57 @@ public final class Picture extends PictureDescriptor
     public static final byte[] WMF2 = PictureType.WMF.getSignatures()[1];
     // TODO: DIB, PICT
 
-  public static final byte[] IHDR = new byte[]{'I', 'H', 'D', 'R'};
+    public static final byte[] IHDR = new byte[] { 'I', 'H', 'D', 'R' };
 
-  public static final byte[] COMPRESSED1 = { (byte)0xFE, 0x78, (byte)0xDA };
-  public static final byte[] COMPRESSED2 = { (byte)0xFE, 0x78, (byte)0x9C };
+    public static final byte[] COMPRESSED1 = { (byte) 0xFE, 0x78, (byte) 0xDA };
+    public static final byte[] COMPRESSED2 = { (byte) 0xFE, 0x78, (byte) 0x9C };
 
-  private int dataBlockStartOfsset;
-  private int pictureBytesStartOffset;
-  private int dataBlockSize;
-  private int size;
-//  private String fileName;
-  private byte[] rawContent;
-  private byte[] content;
-  private byte[] _dataStream;
-  private int height = -1;
-  private int width = -1;
+    private int dataBlockStartOfsset;
+    private int pictureBytesStartOffset;
+    private int dataBlockSize;
+    private int size;
+    // private String fileName;
+    private byte[] rawContent;
+    private byte[] content;
+    private byte[] _dataStream;
+    private int height = -1;
+    private int width = -1;
 
-  public Picture(int dataBlockStartOfsset, byte[] _dataStream, boolean fillBytes)
-  {
-      super (_dataStream, dataBlockStartOfsset);
-
-    this._dataStream = _dataStream;
-    this.dataBlockStartOfsset = dataBlockStartOfsset;
-    this.dataBlockSize = LittleEndian.getInt(_dataStream, dataBlockStartOfsset);
-    this.pictureBytesStartOffset = getPictureBytesStartOffset(dataBlockStartOfsset, _dataStream, dataBlockSize);
-    this.size = dataBlockSize - (pictureBytesStartOffset - dataBlockStartOfsset);
+    public Picture( int dataBlockStartOfsset, byte[] _dataStream,
+            boolean fillBytes )
+    {
+        super( _dataStream, dataBlockStartOfsset );
+
+        this._dataStream = _dataStream;
+        this.dataBlockStartOfsset = dataBlockStartOfsset;
+        this.dataBlockSize = LittleEndian.getInt( _dataStream,
+                dataBlockStartOfsset );
+        this.pictureBytesStartOffset = getPictureBytesStartOffset(
+                dataBlockStartOfsset, _dataStream, dataBlockSize );
+        this.size = dataBlockSize
+                - ( pictureBytesStartOffset - dataBlockStartOfsset );
+
+        if ( size < 0 )
+        {
 
-    if (size<0) {
+        }
 
+        if ( fillBytes )
+        {
+            fillImageContent();
+        }
     }
 
-    if (fillBytes)
+    public Picture( byte[] _dataStream )
     {
-      fillImageContent();
-    }
-  }
-
-  public Picture(byte[] _dataStream)
-  {
         super();
 
-      this._dataStream = _dataStream;
-      this.dataBlockStartOfsset = 0;
-      this.dataBlockSize = _dataStream.length;
-      this.pictureBytesStartOffset = 0;
-      this.size = _dataStream.length;
-  }
+        this._dataStream = _dataStream;
+        this.dataBlockStartOfsset = 0;
+        this.dataBlockSize = _dataStream.length;
+        this.pictureBytesStartOffset = 0;
+        this.size = _dataStream.length;
+    }
 
     private void fillWidthHeight()
     {
@@ -131,42 +138,50 @@ public final class Picture extends PictureDescriptor
         }
     }
 
-  /**
-   * Tries to suggest a filename: hex representation of picture structure offset in "Data" stream plus extension that
-   * is tried to determine from first byte of picture's content.
-   *
-   * @return suggested file name
-   */
-  public String suggestFullFileName()
-  {
-    String fileExt = suggestFileExtension();
-    return Integer.toHexString(dataBlockStartOfsset) + (fileExt.length()>0 ? "."+fileExt : "");
-  }
-
-  /**
-   * Writes Picture's content bytes to specified OutputStream.
-   * Is useful when there is need to write picture bytes directly to stream, omitting its representation in
-   * memory as distinct byte array.
-   *
-   * @param out a stream to write to
-   * @throws IOException if some exception is occured while writing to specified out
-   */
-  public void writeImageContent(OutputStream out) throws IOException
-  {
-    if (rawContent!=null && rawContent.length>0) {
-      out.write(rawContent, 0, size);
-    } else {
-      out.write(_dataStream, pictureBytesStartOffset, size);
-    }
-  }
-
-  /**
-   * @return The offset of this picture in the picture bytes, used
-   *  when matching up with {@link CharacterRun#getPicOffset()}
-   */
-  public int getStartOffset() {
-     return dataBlockStartOfsset;
-  }
+    /**
+     * Tries to suggest a filename: hex representation of picture structure
+     * offset in "Data" stream plus extension that is tried to determine from
+     * first byte of picture's content.
+     * 
+     * @return suggested file name
+     */
+    public String suggestFullFileName()
+    {
+        String fileExt = suggestFileExtension();
+        return Integer.toHexString( dataBlockStartOfsset )
+                + ( fileExt.length() > 0 ? "." + fileExt : "" );
+    }
+
+    /**
+     * Writes Picture's content bytes to specified OutputStream. Is useful when
+     * there is need to write picture bytes directly to stream, omitting its
+     * representation in memory as distinct byte array.
+     * 
+     * @param out
+     *            a stream to write to
+     * @throws IOException
+     *             if some exception is occured while writing to specified out
+     */
+    public void writeImageContent( OutputStream out ) throws IOException
+    {
+        if ( rawContent != null && rawContent.length > 0 )
+        {
+            out.write( rawContent, 0, size );
+        }
+        else
+        {
+            out.write( _dataStream, pictureBytesStartOffset, size );
+        }
+    }
+
+    /**
+     * @return The offset of this picture in the picture bytes, used when
+     *         matching up with {@link CharacterRun#getPicOffset()}
+     */
+    public int getStartOffset()
+    {
+        return dataBlockStartOfsset;
+    }
 
     /**
      * @return picture's content as byte array
@@ -189,14 +204,14 @@ public final class Picture extends PictureDescriptor
         return rawContent;
     }
 
-  /**
-   *
-   * @return size in bytes of the picture
-   */
-  public int getSize()
-  {
-    return size;
-  }
+    /**
+     * 
+     * @return size in bytes of the picture
+     */
+    public int getSize()
+    {
+        return size;
+    }
 
     /**
      * @return the horizontal aspect ratio for picture provided by user
@@ -236,48 +251,56 @@ public final class Picture extends PictureDescriptor
     }
 
     /**
-     * Gets the initial width of the picture, in twips, prior to cropping or scaling.
-     *
+     * Gets the initial width of the picture, in twips, prior to cropping or
+     * scaling.
+     * 
      * @return the initial width of the picture in twips
      */
-    public int getDxaGoal() {
+    public int getDxaGoal()
+    {
         return dxaGoal;
     }
 
     /**
-     * Gets the initial height of the picture, in twips, prior to cropping or scaling.
-     *
+     * Gets the initial height of the picture, in twips, prior to cropping or
+     * scaling.
+     * 
      * @return the initial width of the picture in twips
      */
-    public int getDyaGoal() {
+    public int getDyaGoal()
+    {
         return dyaGoal;
     }
 
     /**
      * @return The amount the picture has been cropped on the left in twips
      */
-    public int getDxaCropLeft() {
+    public int getDxaCropLeft()
+    {
         return dxaCropLeft;
     }
 
     /**
      * @return The amount the picture has been cropped on the top in twips
      */
-    public int getDyaCropTop() {
+    public int getDyaCropTop()
+    {
         return dyaCropTop;
     }
 
     /**
      * @return The amount the picture has been cropped on the right in twips
      */
-    public int getDxaCropRight() {
+    public int getDxaCropRight()
+    {
         return dxaCropRight;
     }
 
     /**
      * @return The amount the picture has been cropped on the bottom in twips
      */
-    public int getDyaCropBottom() {
+    public int getDyaCropBottom()
+    {
         return dyaCropBottom;
     }
 
@@ -304,225 +327,255 @@ public final class Picture extends PictureDescriptor
 
     public PictureType suggestPictureType()
     {
-        final byte[] imageContent = getContent();
-        for ( PictureType pictureType : PictureType.values() )
-            for ( byte[] signature : pictureType.getSignatures() )
-                if ( matchSignature( imageContent, signature, 0 ) )
-                    return pictureType;
-
-        // TODO: DIB, PICT
-        return PictureType.UNKNOWN;
-    }
-
-  private static boolean matchSignature(byte[] dataStream, byte[] signature, int pictureBytesOffset)
-  {
-    boolean matched = pictureBytesOffset < dataStream.length;
-    for (int i = 0; (i+pictureBytesOffset) < dataStream.length && i < signature.length; i++)
-    {
-      if (dataStream[i+pictureBytesOffset] != signature[i])
-      {
-        matched = false;
-        break;
-      }
-    }
-    return matched;
-  }
-
-//  public String getFileName()
-//  {
-//    return fileName;
-//  }
-
-//  private static String extractFileName(int blockStartIndex, byte[] dataStream) {
-//        int fileNameStartOffset = blockStartIndex + 0x7C;
-//        int fileNameSizeOffset = blockStartIndex + FILENAME_SIZE_OFFSET;
-//        int fileNameSize = LittleEndian.getShort(dataStream, fileNameSizeOffset);
-//
-//        int fileNameIndex = fileNameStartOffset;
-//        char[] fileNameChars = new char[(fileNameSize-1)/2];
-//        int charIndex = 0;
-//        while(charIndex<fileNameChars.length) {
-//            short aChar = LittleEndian.getShort(dataStream, fileNameIndex);
-//            fileNameChars[charIndex] = (char)aChar;
-//            charIndex++;
-//            fileNameIndex += 2;
-//        }
-//        String fileName = new String(fileNameChars);
-//        return fileName.trim();
-//    }
-
-  private void fillRawImageContent()
-  {
+        return PictureType.findMatchingType( getContent() );
+    }
+
+    // public String getFileName()
+    // {
+    // return fileName;
+    // }
+
+    // private static String extractFileName(int blockStartIndex, byte[]
+    // dataStream) {
+    // int fileNameStartOffset = blockStartIndex + 0x7C;
+    // int fileNameSizeOffset = blockStartIndex + FILENAME_SIZE_OFFSET;
+    // int fileNameSize = LittleEndian.getShort(dataStream, fileNameSizeOffset);
+    //
+    // int fileNameIndex = fileNameStartOffset;
+    // char[] fileNameChars = new char[(fileNameSize-1)/2];
+    // int charIndex = 0;
+    // while(charIndex<fileNameChars.length) {
+    // short aChar = LittleEndian.getShort(dataStream, fileNameIndex);
+    // fileNameChars[charIndex] = (char)aChar;
+    // charIndex++;
+    // fileNameIndex += 2;
+    // }
+    // String fileName = new String(fileNameChars);
+    // return fileName.trim();
+    // }
+
+    private void fillRawImageContent()
+    {
         if ( rawContent != null && rawContent.length > 0 )
             return;
 
-    this.rawContent = new byte[size];
-    System.arraycopy(_dataStream, pictureBytesStartOffset, rawContent, 0, size);
-  }
+        this.rawContent = new byte[size];
+        System.arraycopy( _dataStream, pictureBytesStartOffset, rawContent, 0,
+                size );
+    }
 
-  private void fillImageContent()
-  {
+    private void fillImageContent()
+    {
         if ( content != null && content.length > 0 )
             return;
 
-    byte[] rawContent = getRawContent();
-
-    // HACK: Detect compressed images.  In reality there should be some way to determine
-    //       this from the first 32 bytes, but I can't see any similarity between all the
-    //       samples I have obtained, nor any similarity in the data block contents.
-    if (matchSignature(rawContent, COMPRESSED1, 32) || matchSignature(rawContent, COMPRESSED2, 32))
-    {
-      try
-      {
-        InflaterInputStream in = new InflaterInputStream(
-          new ByteArrayInputStream(rawContent, 33, rawContent.length - 33));
-        ByteArrayOutputStream out = new ByteArrayOutputStream();
-        byte[] buf = new byte[4096];
-        int readBytes;
-        while ((readBytes = in.read(buf)) > 0)
+        byte[] rawContent = getRawContent();
+
+        // HACK: Detect compressed images. In reality there should be some way
+        // to determine
+        // this from the first 32 bytes, but I can't see any similarity between
+        // all the
+        // samples I have obtained, nor any similarity in the data block
+        // contents.
+        if ( matchSignature( rawContent, COMPRESSED1, 32 )
+                || matchSignature( rawContent, COMPRESSED2, 32 ) )
+        {
+            try
+            {
+                InflaterInputStream in = new InflaterInputStream(
+                        new ByteArrayInputStream( rawContent, 33,
+                                rawContent.length - 33 ) );
+                ByteArrayOutputStream out = new ByteArrayOutputStream();
+                byte[] buf = new byte[4096];
+                int readBytes;
+                while ( ( readBytes = in.read( buf ) ) > 0 )
+                {
+                    out.write( buf, 0, readBytes );
+                }
+                content = out.toByteArray();
+            }
+            catch ( IOException e )
+            {
+                // Problems reading from the actual ByteArrayInputStream should
+                // never happen
+                // so this will only ever be a ZipException.
+                log.log( POILogger.INFO,
+                        "Possibly corrupt compression or non-compressed data",
+                        e );
+            }
+        }
+        else
+        {
+            // Raw data is not compressed.
+            content = rawContent;
+        }
+    }
+
+    private static boolean matchSignature( byte[] pictureData,
+            byte[] signature, int offset )
+    {
+        boolean matched = offset < pictureData.length;
+        for ( int i = 0; ( i + offset ) < pictureData.length
+                && i < signature.length; i++ )
+        {
+            if ( pictureData[i + offset] != signature[i] )
+            {
+                matched = false;
+                break;
+            }
+        }
+        return matched;
+    }
+
+    private static int getPictureBytesStartOffset( int dataBlockStartOffset,
+            byte[] _dataStream, int dataBlockSize )
+    {
+        int realPicoffset = dataBlockStartOffset;
+        final int dataBlockEndOffset = dataBlockSize + dataBlockStartOffset;
+
+        // Skip over the PICT block
+        int PICTFBlockSize = LittleEndian.getShort( _dataStream,
+                dataBlockStartOffset + PICT_HEADER_OFFSET ); // Should be 68
+                                                             // bytes
+
+        // Now the PICTF1
+        int PICTF1BlockOffset = PICTFBlockSize + PICT_HEADER_OFFSET;
+        short MM_TYPE = LittleEndian.getShort( _dataStream,
+                dataBlockStartOffset + PICT_HEADER_OFFSET + 2 );
+        if ( MM_TYPE == 0x66 )
+        {
+            // Skip the stPicName
+            int cchPicName = LittleEndian.getUnsignedByte( _dataStream,
+                    PICTF1BlockOffset );
+            PICTF1BlockOffset += 1 + cchPicName;
+        }
+        int PICTF1BlockSize = LittleEndian.getShort( _dataStream,
+                dataBlockStartOffset + PICTF1BlockOffset );
+
+        int unknownHeaderOffset = ( PICTF1BlockSize + PICTF1BlockOffset ) < dataBlockEndOffset ? ( PICTF1BlockSize + PICTF1BlockOffset )
+                : PICTF1BlockOffset;
+        realPicoffset += ( unknownHeaderOffset + UNKNOWN_HEADER_SIZE );
+        if ( realPicoffset >= dataBlockEndOffset )
         {
-          out.write(buf, 0, readBytes);
+            realPicoffset -= UNKNOWN_HEADER_SIZE;
         }
-        content = out.toByteArray();
-      }
-      catch (IOException e)
-      {
-        // Problems reading from the actual ByteArrayInputStream should never happen
-        // so this will only ever be a ZipException.
-        log.log(POILogger.INFO, "Possibly corrupt compression or non-compressed data", e);
-      }
-    } else {
-      // Raw data is not compressed.
-      content = rawContent;
-    }
-  }
-
-  private static int getPictureBytesStartOffset(int dataBlockStartOffset, byte[] _dataStream, int dataBlockSize)
-  {
-    int realPicoffset = dataBlockStartOffset;
-    final int dataBlockEndOffset = dataBlockSize + dataBlockStartOffset;
-
-    // Skip over the PICT block
-    int PICTFBlockSize = LittleEndian.getShort(_dataStream, dataBlockStartOffset +PICT_HEADER_OFFSET); // Should be 68 bytes
-
-    // Now the PICTF1
-    int PICTF1BlockOffset = PICTFBlockSize + PICT_HEADER_OFFSET;
-    short MM_TYPE = LittleEndian.getShort(_dataStream, dataBlockStartOffset + PICT_HEADER_OFFSET + 2);
-    if(MM_TYPE == 0x66) {
-       // Skip the stPicName
-       int cchPicName = LittleEndian.getUnsignedByte(_dataStream, PICTF1BlockOffset);
-       PICTF1BlockOffset += 1 + cchPicName;
-    }
-    int PICTF1BlockSize = LittleEndian.getShort(_dataStream, dataBlockStartOffset +PICTF1BlockOffset);
-
-    int unknownHeaderOffset = (PICTF1BlockSize + PICTF1BlockOffset) < dataBlockEndOffset ?  (PICTF1BlockSize + PICTF1BlockOffset) : PICTF1BlockOffset;
-    realPicoffset += (unknownHeaderOffset + UNKNOWN_HEADER_SIZE);
-    if (realPicoffset>=dataBlockEndOffset) {
-        realPicoffset -= UNKNOWN_HEADER_SIZE;
-    }
-    return realPicoffset;
-  }
-
-  private void fillJPGWidthHeight() {
-    /*
-    http://www.codecomments.com/archive281-2004-3-158083.html
-
-    Algorhitm proposed by Patrick TJ McPhee:
-
-    read 2 bytes
-    make sure they are 'ffd8'x
-    repeatedly:
-    read 2 bytes
-    make sure the first one is 'ff'x
-    if the second one is 'd9'x stop
-    else if the second one is c0 or c2 (or possibly other values ...)
-    skip 2 bytes
-    read one byte into depth
-    read two bytes into height
-    read two bytes into width
-    else
-    read two bytes into length
-    skip forward length-2 bytes
-
-    Also used Ruby code snippet from: http://www.bigbold.com/snippets/posts/show/805 for reference
-    */
-    int pointer = pictureBytesStartOffset+2;
-    int firstByte = _dataStream[pointer];
-    int secondByte = _dataStream[pointer+1];
-
-    int endOfPicture = pictureBytesStartOffset + size;
-    while(pointer<endOfPicture-1) {
-      do {
-        firstByte = _dataStream[pointer];
-        secondByte = _dataStream[pointer+1];
-        pointer += 2;
-      } while (!(firstByte==(byte)0xFF) && pointer<endOfPicture-1);
-
-      if (firstByte==((byte)0xFF) && pointer<endOfPicture-1) {
-        if (secondByte==(byte)0xD9 || secondByte==(byte)0xDA) {
-          break;
-        } else if ( (secondByte & 0xF0) == 0xC0 && secondByte!=(byte)0xC4 && secondByte!=(byte)0xC8 && secondByte!=(byte)0xCC) {
-          pointer += 5;
-          this.height = getBigEndianShort(_dataStream, pointer);
-          this.width = getBigEndianShort(_dataStream, pointer+2);
-          break;
-        } else {
-          pointer++;
-          pointer++;
-          int length = getBigEndianShort(_dataStream, pointer);
-          pointer+=length;
+        return realPicoffset;
+    }
+
+    private void fillJPGWidthHeight()
+    {
+        /*
+         * http://www.codecomments.com/archive281-2004-3-158083.html
+         * 
+         * Algorhitm proposed by Patrick TJ McPhee:
+         * 
+         * read 2 bytes make sure they are 'ffd8'x repeatedly: read 2 bytes make
+         * sure the first one is 'ff'x if the second one is 'd9'x stop else if
+         * the second one is c0 or c2 (or possibly other values ...) skip 2
+         * bytes read one byte into depth read two bytes into height read two
+         * bytes into width else read two bytes into length skip forward
+         * length-2 bytes
+         * 
+         * Also used Ruby code snippet from:
+         * http://www.bigbold.com/snippets/posts/show/805 for reference
+         */
+        int pointer = pictureBytesStartOffset + 2;
+        int firstByte = _dataStream[pointer];
+        int secondByte = _dataStream[pointer + 1];
+
+        int endOfPicture = pictureBytesStartOffset + size;
+        while ( pointer < endOfPicture - 1 )
+        {
+            do
+            {
+                firstByte = _dataStream[pointer];
+                secondByte = _dataStream[pointer + 1];
+                pointer += 2;
+            }
+            while ( !( firstByte == (byte) 0xFF ) && pointer < endOfPicture - 1 );
+
+            if ( firstByte == ( (byte) 0xFF ) && pointer < endOfPicture - 1 )
+            {
+                if ( secondByte == (byte) 0xD9 || secondByte == (byte) 0xDA )
+                {
+                    break;
+                }
+                else if ( ( secondByte & 0xF0 ) == 0xC0
+                        && secondByte != (byte) 0xC4
+                        && secondByte != (byte) 0xC8
+                        && secondByte != (byte) 0xCC )
+                {
+                    pointer += 5;
+                    this.height = getBigEndianShort( _dataStream, pointer );
+                    this.width = getBigEndianShort( _dataStream, pointer + 2 );
+                    break;
+                }
+                else
+                {
+                    pointer++;
+                    pointer++;
+                    int length = getBigEndianShort( _dataStream, pointer );
+                    pointer += length;
+                }
+            }
+            else
+            {
+                pointer++;
+            }
         }
-      } else {
-        pointer++;
-      }
-    }
-  }
-
-  private void fillPNGWidthHeight()
-  {
-    /*
-     Used PNG file format description from http://www.wotsit.org/download.asp?f=png
-    */
-    int HEADER_START = pictureBytesStartOffset + PNG.length + 4;
-    if (matchSignature(_dataStream, IHDR, HEADER_START)) {
-      int IHDR_CHUNK_WIDTH = HEADER_START + 4;
-      this.width = getBigEndianInt(_dataStream, IHDR_CHUNK_WIDTH);
-      this.height = getBigEndianInt(_dataStream, IHDR_CHUNK_WIDTH + 4);
-    }
-  }
-
-  /**
-   * returns pixel width of the picture or -1 if dimensions determining was failed
-   */
-  public int getWidth()
-  {
-    if (width == -1)
-    {
-      fillWidthHeight();
-    }
-    return width;
-  }
-
-  /**
-   * returns pixel height of the picture or -1 if dimensions determining was failed
-   */
-  public int getHeight()
-  {
-    if (height == -1)
-    {
-      fillWidthHeight();
-    }
-    return height;
-  }
-
-  private static int getBigEndianInt(byte[] data, int offset)
-  {
-    return (((data[offset] & 0xFF)<< 24) + ((data[offset +1] & 0xFF) << 16) + ((data[offset + 2] & 0xFF) << 8) + (data[offset +3] & 0xFF));
-  }
-
-  private static int getBigEndianShort(byte[] data, int offset)
-  {
-    return (((data[offset] & 0xFF)<< 8) + (data[offset +1] & 0xFF));
-  }
+    }
+
+    private void fillPNGWidthHeight()
+    {
+        /*
+         * Used PNG file format description from
+         * http://www.wotsit.org/download.asp?f=png
+         */
+        int HEADER_START = pictureBytesStartOffset + PNG.length + 4;
+        if ( matchSignature( _dataStream, IHDR, HEADER_START ) )
+        {
+            int IHDR_CHUNK_WIDTH = HEADER_START + 4;
+            this.width = getBigEndianInt( _dataStream, IHDR_CHUNK_WIDTH );
+            this.height = getBigEndianInt( _dataStream, IHDR_CHUNK_WIDTH + 4 );
+        }
+    }
+
+    /**
+     * returns pixel width of the picture or -1 if dimensions determining was
+     * failed
+     */
+    public int getWidth()
+    {
+        if ( width == -1 )
+        {
+            fillWidthHeight();
+        }
+        return width;
+    }
+
+    /**
+     * returns pixel height of the picture or -1 if dimensions determining was
+     * failed
+     */
+    public int getHeight()
+    {
+        if ( height == -1 )
+        {
+            fillWidthHeight();
+        }
+        return height;
+    }
+
+    private static int getBigEndianInt( byte[] data, int offset )
+    {
+        return ( ( ( data[offset] & 0xFF ) << 24 )
+                + ( ( data[offset + 1] & 0xFF ) << 16 )
+                + ( ( data[offset + 2] & 0xFF ) << 8 ) + ( data[offset + 3] & 0xFF ) );
+    }
+
+    private static int getBigEndianShort( byte[] data, int offset )
+    {
+        return ( ( ( data[offset] & 0xFF ) << 8 ) + ( data[offset + 1] & 0xFF ) );
+    }
 
 }
index d89f0e7bd69f4c46a7c2e7dab6eb44a9f28dc4ab..5858c15e092faa13cd61fed67057fcee80242237 100644 (file)
@@ -36,11 +36,34 @@ public enum PictureType {
     TIFF( "image/tiff", "tiff", new byte[][] { { 0x49, 0x49, 0x2A, 0x00 },
             { 0x4D, 0x4D, 0x00, 0x2A } } ),
 
+    UNKNOWN( "image/unknown", "", new byte[][] {} ),
+
     WMF( "image/x-wmf", "wmf", new byte[][] {
             { (byte) 0xD7, (byte) 0xCD, (byte) 0xC6, (byte) 0x9A, 0x00, 0x00 },
-            { 0x01, 0x00, 0x09, 0x00, 0x00, 0x03 } } ),
+            { 0x01, 0x00, 0x09, 0x00, 0x00, 0x03 } } );
+
+    public static PictureType findMatchingType( byte[] pictureContent )
+    {
+        for ( PictureType pictureType : PictureType.values() )
+            for ( byte[] signature : pictureType.getSignatures() )
+                if ( matchSignature( pictureContent, signature ) )
+                    return pictureType;
+
+        // TODO: DIB, PICT
+        return PictureType.UNKNOWN;
+    }
+
+    private static boolean matchSignature( byte[] pictureData, byte[] signature )
+    {
+        if ( pictureData.length < signature.length )
+            return false;
+
+        for ( int i = 0; i < signature.length; i++ )
+            if ( pictureData[i] != signature[i] )
+                return false;
 
-    UNKNOWN( "image/unknown", "", new byte[][] {} );
+        return true;
+    }
 
     private String _extension;
 
@@ -69,4 +92,12 @@ public enum PictureType {
     {
         return _signatures;
     }
+
+    public boolean matchSignature( byte[] pictureData )
+    {
+        for ( byte[] signature : getSignatures() )
+            if ( matchSignature( signature, pictureData ) )
+                return true;
+        return false;
+    }
 }
index 4fd3ac216c22f5802bac7afde2fef72bba583fd5..daa37eae8761c4545cf71b98318a97b95e35989a 100644 (file)
@@ -29,9 +29,8 @@ import junit.framework.TestCase;
 
 import org.apache.poi.POIDataSamples;
 import org.apache.poi.hwpf.HWPFDocument;
-import org.apache.poi.hwpf.usermodel.Picture;
+import org.apache.poi.hwpf.usermodel.PictureType;
 import org.w3c.dom.Document;
-import org.w3c.dom.Element;
 
 /**
  * Test cases for {@link WordToHtmlConverter}
@@ -62,16 +61,21 @@ public class TestWordToHtmlConverter extends TestCase
 
         Document newDocument = DocumentBuilderFactory.newInstance()
                 .newDocumentBuilder().newDocument();
-        WordToHtmlConverter wordToHtmlConverter = !emulatePictureStorage ? new WordToHtmlConverter(
-                newDocument ) : new WordToHtmlConverter( newDocument )
+        WordToHtmlConverter wordToHtmlConverter = new WordToHtmlConverter(
+                newDocument );
+
+        if ( emulatePictureStorage )
         {
-            @Override
-            protected void processImage( Element currentBlock, boolean inlined,
-                    Picture picture )
+            wordToHtmlConverter.setPicturesManager( new PicturesManager()
             {
-                processImage( currentBlock, inlined, picture, "picture.bin" );
-            }
-        };
+                public String savePicture( byte[] content,
+                        PictureType pictureType, String suggestedName )
+                {
+                    return suggestedName;
+                }
+            } );
+        }
+
         wordToHtmlConverter.processDocument( hwpfDocument );
 
         StringWriter stringWriter = new StringWriter();
@@ -172,20 +176,6 @@ public class TestWordToHtmlConverter extends TestCase
         assertContains( result, "<!--Image link to '0.emf' can be here-->" );
     }
 
-    public void testPicture() throws Exception
-    {
-        String result = getHtmlText( "picture.doc", true );
-
-        // picture
-        assertContains( result, "src=\"picture.bin\"" );
-        // visible size
-        assertContains( result, "width:3.1305554in;height:1.7250001in;" );
-        // shift due to crop
-        assertContains( result, "left:-0.09375;top:-0.25694445;" );
-        // size without crop
-        assertContains( result, "width:3.4125in;height:2.325in;" );
-    }
-
     public void testHyperlink() throws Exception
     {
         String result = getHtmlText( "hyperlink.doc" );
@@ -201,14 +191,6 @@ public class TestWordToHtmlConverter extends TestCase
         getHtmlText( "innertable.doc" );
     }
 
-    public void testTableMerges() throws Exception
-    {
-        String result = getHtmlText( "table-merges.doc" );
-        
-        assertContains( result, "<td class=\"td1\" colspan=\"3\">" );
-        assertContains( result, "<td class=\"td2\" colspan=\"2\">" );
-    }
-
     public void testO_kurs_doc() throws Exception
     {
         getHtmlText( "o_kurs.doc" );
@@ -222,4 +204,33 @@ public class TestWordToHtmlConverter extends TestCase
         assertContains( result, "<a name=\"userref\">" );
         assertContains( result, "1" );
     }
+
+    public void testPicture() throws Exception
+    {
+        String result = getHtmlText( "picture.doc", true );
+
+        // picture
+        assertContains( result, "src=\"0.emf\"" );
+        // visible size
+        assertContains( result, "width:3.1305554in;height:1.7250001in;" );
+        // shift due to crop
+        assertContains( result, "left:-0.09375;top:-0.25694445;" );
+        // size without crop
+        assertContains( result, "width:3.4125in;height:2.325in;" );
+    }
+
+    public void testPicturesEscher() throws Exception
+    {
+        String result = getHtmlText( "pictures_escher.doc", true );
+        assertContains( result, "<img src=\"s0.PNG\">" );
+        assertContains( result, "<img src=\"s808.PNG\">" );
+    }
+
+    public void testTableMerges() throws Exception
+    {
+        String result = getHtmlText( "table-merges.doc" );
+
+        assertContains( result, "<td class=\"td1\" colspan=\"3\">" );
+        assertContains( result, "<td class=\"td2\" colspan=\"2\">" );
+    }
 }
diff --git a/test-data/document/pictures_escher.doc b/test-data/document/pictures_escher.doc
new file mode 100644 (file)
index 0000000..4870bc7
Binary files /dev/null and b/test-data/document/pictures_escher.doc differ