]> source.dussan.org Git - poi.git/commitdiff
From bug #38616 - support for extracting images from word files, plus tests for this
authorNick Burch <nick@apache.org>
Tue, 26 Sep 2006 14:46:39 +0000 (14:46 +0000)
committerNick Burch <nick@apache.org>
Tue, 26 Sep 2006 14:46:39 +0000 (14:46 +0000)
git-svn-id: https://svn.apache.org/repos/asf/jakarta/poi/trunk@450066 13f79535-47bb-0310-9956-ffa450edef68

src/scratchpad/src/org/apache/poi/hwpf/HWPFDocument.java
src/scratchpad/src/org/apache/poi/hwpf/model/PicturesTable.java [new file with mode: 0644]
src/scratchpad/src/org/apache/poi/hwpf/usermodel/Picture.java [new file with mode: 0644]
src/scratchpad/testcases/org/apache/poi/hwpf/TestHWPFPictures.java [new file with mode: 0644]
src/scratchpad/testcases/org/apache/poi/hwpf/data/simple_image.jpg [new file with mode: 0644]
src/scratchpad/testcases/org/apache/poi/hwpf/data/simple_image.png [new file with mode: 0644]
src/scratchpad/testcases/org/apache/poi/hwpf/data/testPictures.doc [new file with mode: 0644]
src/scratchpad/testcases/org/apache/poi/hwpf/data/two_images.doc [new file with mode: 0755]

index 7657019e7fbd287e1e889e135ad7661556e61d5d..b425e093e1f9d8da168de2967daceb6bc26494cf 100644 (file)
@@ -88,6 +88,9 @@ public class HWPFDocument extends POIDocument
 
   /** Holds the save history for this document. */
   protected SavedByTable _sbt;
+  
+  /** Holds pictures table */
+  protected PicturesTable _pictures;
 
   protected HWPFDocument()
   {
@@ -184,6 +187,9 @@ public class HWPFDocument extends POIDocument
     {
         _dataStream = new byte[0];
     }
+    
+    // read in the pictures stream
+    _pictures = new PicturesTable(_dataStream);
 
     // get the start of text in the main stream
     int fcMin = _fib.getFcMin();
@@ -287,6 +293,13 @@ public class HWPFDocument extends POIDocument
   {
     return _sbt;
   }
+  
+  /**
+   * @return PicturesTable object, that is able to extract images from this document
+   */
+  public PicturesTable getPicturesTable() {
+         return _pictures;
+  }
 
   /**
    * Writes out the word file that is represented by an instance of this class.
diff --git a/src/scratchpad/src/org/apache/poi/hwpf/model/PicturesTable.java b/src/scratchpad/src/org/apache/poi/hwpf/model/PicturesTable.java
new file mode 100644 (file)
index 0000000..69bde3a
--- /dev/null
@@ -0,0 +1,152 @@
+/* ====================================================================
+   Copyright 2002-2006   Apache Software Foundation
+
+   Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
+==================================================================== */
+
+
+package org.apache.poi.hwpf.model;
+
+import org.apache.poi.util.LittleEndian;
+import org.apache.poi.hwpf.usermodel.CharacterRun;
+import org.apache.poi.hwpf.usermodel.Picture;
+
+import java.util.List;
+import java.util.ArrayList;
+
+
+/**
+ * Holds information about all pictures embedded in Word Document either via "Insert -> Picture -> From File" or via
+ * clipboard. Responsible for images extraction and determining whether some document�s piece contains embedded image.
+ * Analyzes raw data bytestream �Data� (where Word stores all embedded objects) provided by HWPFDocument.
+ *
+ * Word stores images as is within so called "Data stream" - the stream within a Word docfile containing various data
+ * that hang off of characters in the main stream. For example, binary data describing in-line pictures and/or
+ * formfields an also embedded objects-native data. Word picture structures are concatenated one after the other in
+ * the data stream if the document contains pictures.
+ * Data stream is easily reachable via HWPFDocument._dataStream property.
+ * A picture is represented in the document text stream as a special character, an Unicode \u0001 whose
+ * CharacterRun.isSpecial() returns true. The file location of the picture in the Word binary file is accessed
+ * via CharacterRun.getPicOffset(). The CharacterRun.getPicOffset() is a byte offset into the data stream.
+ * Beginning at the position recorded in picOffset, a header data structure, will be stored.
+ *
+ * @author Dmitry Romanov
+ */
+public class PicturesTable
+{
+  static final int TYPE_IMAGE = 0x08;
+  static final int TYPE_IMAGE_WORD2000 = 0x00;
+  static final int TYPE_IMAGE_PASTED_FROM_CLIPBOARD = 0xA;
+  static final int TYPE_IMAGE_PASTED_FROM_CLIPBOARD_WORD2000 = 0x2;
+  static final int TYPE_HORIZONTAL_LINE = 0xE;
+  static final int BLOCK_TYPE_OFFSET = 0xE;
+  static final int MM_MODE_TYPE_OFFSET = 0x6;
+
+  private byte[] _dataStream;
+
+  /** @link dependency
+   * @stereotype instantiate*/
+  /*# Picture lnkPicture; */
+
+  /**
+   *
+   * @param _dataStream
+   */
+  public PicturesTable(byte[] _dataStream)
+  {
+    this._dataStream = _dataStream;
+  }
+
+  /**
+   * determines whether specified CharacterRun contains reference to a picture
+   * @param run
+   */
+  public boolean hasPicture(CharacterRun run) {
+    if (run.isSpecialCharacter() && !run.isObj() && !run.isOle2() && !run.isData() && "\u0001".equals(run.text())) {
+      return isBlockContainsImage(run.getPicOffset());
+    }
+    return false;
+  }
+
+  /**
+   * determines whether specified CharacterRun contains reference to a picture
+   * @param run
+  */
+  public boolean hasHorizontalLine(CharacterRun run) {
+    if (run.isSpecialCharacter() && "\u0001".equals(run.text())) {
+      return isBlockContainsHorizontalLine(run.getPicOffset());
+    }
+    return false;
+  }
+
+  private boolean isPictureRecognized(short blockType, short mappingModeOfMETAFILEPICT) {
+    return (blockType == TYPE_IMAGE || blockType == TYPE_IMAGE_PASTED_FROM_CLIPBOARD || (blockType==TYPE_IMAGE_WORD2000 && mappingModeOfMETAFILEPICT==0x64) || (blockType==TYPE_IMAGE_PASTED_FROM_CLIPBOARD_WORD2000 && mappingModeOfMETAFILEPICT==0x64));
+  }
+
+  private static short getBlockType(byte[] dataStream, int pictOffset) {
+    return LittleEndian.getShort(dataStream, pictOffset + BLOCK_TYPE_OFFSET);
+  }
+
+  private static short getMmMode(byte[] dataStream, int pictOffset) {
+    return LittleEndian.getShort(dataStream, pictOffset + MM_MODE_TYPE_OFFSET);
+  }
+
+  /**
+   * Returns picture object tied to specified CharacterRun
+   * @param run
+   * @param fillBytes if true, Picture will be returned with filled byte array that represent picture's contents. If you don't want
+   * to have that byte array in memory but only write picture's contents to stream, pass false and then use Picture.writeImageContent
+   * @see Picture#writeImageContent(java.io.OutputStream)
+   * @return a Picture object if picture exists for specified CharacterRun, null otherwise. PicturesTable.hasPicture is used to determine this.
+   * @see #hasPicture(org.apache.poi.hwpf.usermodel.CharacterRun) 
+   */
+  public Picture extractPicture(CharacterRun run, boolean fillBytes) {
+    if (hasPicture(run)) {
+      return new Picture(run.getPicOffset(), _dataStream, fillBytes);
+    }
+    return null;
+  }
+
+  /**
+   * @return a list of Picture objects found in current document
+   */
+  public List getAllPictures() {
+    ArrayList pictures = new ArrayList();
+    
+    int pos = 0;
+    boolean atEnd = false;
+    
+    while(pos<_dataStream.length && !atEnd) {
+      if (isBlockContainsImage(pos)) {
+        pictures.add(new Picture(pos, _dataStream, false));
+      }
+      
+      int skipOn = LittleEndian.getInt(_dataStream, pos);
+      if(skipOn <= 0) { atEnd = true; }
+      pos += skipOn;
+    }
+    
+    return pictures;
+  }
+
+  private boolean isBlockContainsImage(int i)
+  {
+    return isPictureRecognized(getBlockType(_dataStream, i), getMmMode(_dataStream, i));
+  }
+
+  private boolean isBlockContainsHorizontalLine(int i)
+  {
+    return getBlockType(_dataStream, i)==TYPE_HORIZONTAL_LINE && getMmMode(_dataStream, i)==0x64;
+  }
+
+}
diff --git a/src/scratchpad/src/org/apache/poi/hwpf/usermodel/Picture.java b/src/scratchpad/src/org/apache/poi/hwpf/usermodel/Picture.java
new file mode 100644 (file)
index 0000000..8adbf09
--- /dev/null
@@ -0,0 +1,350 @@
+/* ====================================================================
+   Copyright 2002-2006   Apache Software Foundation
+
+   Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
+==================================================================== */
+
+
+package org.apache.poi.hwpf.usermodel;
+
+import org.apache.poi.util.LittleEndian;
+
+import java.io.OutputStream;
+import java.io.IOException;
+
+/**
+ * Represents embedded picture extracted from Word Document
+ * @author Dmitry Romanov
+ */
+public class Picture
+{
+//  public static final int FILENAME_OFFSET = 0x7C;
+//  public static final int FILENAME_SIZE_OFFSET = 0x6C;
+  static final int BLOCK_TYPE_OFFSET = 0xE;
+  static final int PICT_HEADER_OFFSET = 0x4;
+  static final int UNKNOWN_HEADER_SIZE = 0x49;
+
+  public static final byte[] GIF = new byte[]{'G', 'I', 'F'};
+  public static final byte[] PNG = new byte[]{ (byte)0x89, 0x50, 0x4E, 0x47,0x0D,0x0A,0x1A,0x0A};
+  public static final byte[] JPG = new byte[]{(byte)0xFF, (byte)0xD8};
+  public static final byte[] BMP = new byte[]{'B', 'M'};
+  public static final byte[] TIFF = new byte[]{0x49, 0x49, 0x2A, 0x00};
+  public static final byte[] TIFF1 = new byte[]{0x4D, 0x4D, 0x00, 0x2A};
+
+  public static final byte[] IHDR = new byte[]{'I', 'H', 'D', 'R'};
+
+  private int dataBlockStartOfsset;
+  private int pictureBytesStartOffset;
+  private int dataBlockSize;
+  private int size;
+//  private String fileName;
+  private byte[] content;
+  private byte[] _dataStream;
+  private int aspectRatioX;
+  private int aspectRatioY;
+  private int height = -1;
+  private int width = -1;
+
+
+  public Picture(int dataBlockStartOfsset, byte[] _dataStream, boolean fillBytes)
+  {
+    this._dataStream = _dataStream;
+    this.dataBlockStartOfsset = dataBlockStartOfsset;
+    this.dataBlockSize = LittleEndian.getInt(_dataStream, dataBlockStartOfsset);
+    this.pictureBytesStartOffset = getPictureBytesStartOffset(dataBlockStartOfsset, _dataStream, dataBlockSize);
+    this.size = dataBlockSize - (pictureBytesStartOffset - dataBlockStartOfsset);
+
+    if (size<0) {
+
+    }
+
+    this.aspectRatioX = extractAspectRatioX(_dataStream, dataBlockStartOfsset);
+    this.aspectRatioY = extractAspectRatioY(_dataStream, dataBlockStartOfsset);
+//    this.fileName = extractFileName(dataBlockStartOfsset, _dataStream);
+//    if (fileName==null || fileName.length()==0) {
+//      fileName = "clipboard";
+//    }
+
+    if (fillBytes)
+    {
+      fillImageContent(_dataStream);
+    }
+
+    String ext = suggestFileExtension();
+    // trying to extract width and height from pictures content:
+    if ("jpg".equalsIgnoreCase(ext)) {
+      fillJPGWidthHeight();
+    } else if ("png".equalsIgnoreCase(ext)) {
+      fillPNGWidthHeight();
+    }
+  }
+
+  private static int extractAspectRatioX(byte[] _dataStream, int dataBlockStartOffset)
+  {
+    return LittleEndian.getShort(_dataStream, dataBlockStartOffset+0x20)/10;
+  }
+
+  private static int extractAspectRatioY(byte[] _dataStream, int dataBlockStartOffset)
+  {
+    return LittleEndian.getShort(_dataStream, dataBlockStartOffset+0x22)/10;
+  }
+
+  /**
+   * Tries to suggest a filename: hex representation of picture structure offset in "Data" stream plus extension that
+   * is tried to determine from first byte of picture's content.
+   *
+   * @return suggested file name
+   */
+  public String suggestFullFileName()
+  {
+    String fileExt = suggestFileExtension();
+    return Integer.toHexString(dataBlockStartOfsset) + (fileExt.length()>0 ? "."+fileExt : "");
+  }
+
+  /**
+   * Writes Picture's content bytes to specified OutputStream.
+   * Is useful when there is need to write picture bytes directly to stream, omitting its representation in
+   * memory as distinct byte array.
+   *
+   * @param out a stream to write to
+   * @throws IOException if some exception is occured while writing to specified out
+   */
+  public void writeImageContent(OutputStream out) throws IOException
+  {
+    if (content!=null && content.length>0) {
+      out.write(content, 0, size);
+    } else {
+      out.write(_dataStream, pictureBytesStartOffset, size);
+    }
+  }
+
+  /**
+   * @return picture's content as byte array
+   */
+  public byte[] getContent()
+  {
+    if (content == null || content.length<=0)
+    {
+      fillImageContent(this._dataStream);
+    }
+    return content;
+  }
+
+  /**
+   *
+   * @return size in bytes of the picture
+   */
+  public int getSize()
+  {
+    return size;
+  }
+
+  /**
+   * returns horizontal aspect ratio for picture provided by user
+   */
+  public int getAspectRatioX()
+  {
+    return aspectRatioX;
+  }
+  /**
+   * returns vertical aspect ratio for picture provided by user
+   */
+  public int getAspectRatioY()
+  {
+    return aspectRatioY;
+  }
+
+  /**
+   * tries to suggest extension for picture's file by matching signatures of popular image formats to first bytes
+   * of picture's contents
+   * @return suggested file extension
+   */
+  public String suggestFileExtension()
+  {
+    if (content!=null && content.length>0) {
+      return suggestFileExtension(content, 0);
+    }
+    return suggestFileExtension(_dataStream, pictureBytesStartOffset);
+  }
+
+
+  private String suggestFileExtension(byte[] _dataStream, int pictureBytesStartOffset)
+  {
+    if (matchSignature(_dataStream, JPG, pictureBytesStartOffset)) {
+      return "jpg";
+    } else if (matchSignature(_dataStream, PNG, pictureBytesStartOffset)) {
+      return "png";
+    } else if (matchSignature(_dataStream, GIF, pictureBytesStartOffset)) {
+      return "gif";
+    } else if (matchSignature(_dataStream, BMP, pictureBytesStartOffset)) {
+      return "bmp";
+    } else if (matchSignature(_dataStream, TIFF, pictureBytesStartOffset)) {
+      return "tiff";
+    } else if (matchSignature(_dataStream, TIFF1, pictureBytesStartOffset)) {
+      return "tiff";
+    }
+    return "";
+  }
+
+  private static boolean matchSignature(byte[] dataStream, byte[] signature, int pictureBytesOffset)
+  {
+    boolean matched = pictureBytesOffset < dataStream.length;
+    for (int i = 0; (i+pictureBytesOffset) < dataStream.length && i < signature.length; i++)
+    {
+      if (dataStream[i+pictureBytesOffset] != signature[i])
+      {
+        matched = false;
+        break;
+      }
+    }
+    return matched;
+  }
+
+//  public String getFileName()
+//  {
+//    return fileName;
+//  }
+
+//  private static String extractFileName(int blockStartIndex, byte[] dataStream) {
+//        int fileNameStartOffset = blockStartIndex + 0x7C;
+//        int fileNameSizeOffset = blockStartIndex + FILENAME_SIZE_OFFSET;
+//        int fileNameSize = LittleEndian.getShort(dataStream, fileNameSizeOffset);
+//
+//        int fileNameIndex = fileNameStartOffset;
+//        char[] fileNameChars = new char[(fileNameSize-1)/2];
+//        int charIndex = 0;
+//        while(charIndex<fileNameChars.length) {
+//            short aChar = LittleEndian.getShort(dataStream, fileNameIndex);
+//            fileNameChars[charIndex] = (char)aChar;
+//            charIndex++;
+//            fileNameIndex += 2;
+//        }
+//        String fileName = new String(fileNameChars);
+//        return fileName.trim();
+//    }
+
+  private void fillImageContent(byte[] dataStream)
+  {
+    this.content = new byte[size];
+    System.arraycopy(dataStream, pictureBytesStartOffset, content, 0, size);
+  }
+
+  private static int getPictureBytesStartOffset(int dataBlockStartOffset, byte[] _dataStream, int dataBlockSize)
+  {
+    final int dataBlockEndOffset = dataBlockSize + dataBlockStartOffset;
+    int realPicoffset = dataBlockStartOffset;
+
+    int PICTFBlockSize = LittleEndian.getShort(_dataStream, dataBlockStartOffset +PICT_HEADER_OFFSET);
+    int PICTF1BlockOffset = PICTFBlockSize + PICT_HEADER_OFFSET;
+    int PICTF1BlockSize = LittleEndian.getShort(_dataStream, dataBlockStartOffset +PICTF1BlockOffset);
+
+    int unknownHeaderOffset = (PICTF1BlockSize + PICTF1BlockOffset) < dataBlockEndOffset ?  (PICTF1BlockSize + PICTF1BlockOffset) : PICTF1BlockOffset;
+    realPicoffset += (unknownHeaderOffset + UNKNOWN_HEADER_SIZE);
+    if (realPicoffset>=dataBlockEndOffset) {
+        realPicoffset -= UNKNOWN_HEADER_SIZE;
+    }
+    return realPicoffset;
+  }
+
+  private void fillJPGWidthHeight() {
+    /*
+    http://www.codecomments.com/archive281-2004-3-158083.html
+
+    Algorhitm proposed by Patrick TJ McPhee:
+
+    read 2 bytes
+    make sure they are 'ffd8'x
+    repeatedly:
+    read 2 bytes
+    make sure the first one is 'ff'x
+    if the second one is 'd9'x stop
+    else if the second one is c0 or c2 (or possibly other values ...)
+    skip 2 bytes
+    read one byte into depth
+    read two bytes into height
+    read two bytes into width
+    else
+    read two bytes into length
+    skip forward length-2 bytes
+
+    Also used Ruby code snippet from: http://www.bigbold.com/snippets/posts/show/805 for reference
+    */
+    int pointer = pictureBytesStartOffset+2;
+    int firstByte = _dataStream[pointer];
+    int secondByte = _dataStream[pointer+1];
+
+    int endOfPicture = pictureBytesStartOffset + size;
+    while(pointer<endOfPicture-1) {
+      do {
+        firstByte = _dataStream[pointer];
+        secondByte = _dataStream[pointer+1];
+      } while (!(firstByte==(byte)0xFF) && pointer<endOfPicture-1);
+
+      if (firstByte==((byte)0xFF) && pointer<endOfPicture-1) {
+        if (secondByte==(byte)0xD9 || secondByte==(byte)0xDA) {
+          break;
+        } else if ( (secondByte & 0xF0) == 0xC0 && secondByte!=(byte)0xC4 && secondByte!=(byte)0xC8 && secondByte!=(byte)0xCC) {
+          pointer += 5;
+          this.height = getBigEndianShort(_dataStream, pointer);
+          this.width = getBigEndianShort(_dataStream, pointer+2);
+          break;
+        } else {
+          pointer++;
+          pointer++;
+          int length = getBigEndianShort(_dataStream, pointer);
+          pointer+=length;
+        }
+      } else {
+        pointer++;
+      }
+    }
+  }
+
+  private void fillPNGWidthHeight()
+  {
+    /*
+     Used PNG file format description from http://www.wotsit.org/download.asp?f=png
+    */
+    int HEADER_START = pictureBytesStartOffset + PNG.length + 4;
+    if (matchSignature(_dataStream, IHDR, HEADER_START)) {
+      int IHDR_CHUNK_WIDTH = HEADER_START + 4;
+      this.width = getBigEndianInt(_dataStream, IHDR_CHUNK_WIDTH);
+      this.height = getBigEndianInt(_dataStream, IHDR_CHUNK_WIDTH + 4);
+    }
+  }
+  /**
+   * returns pixel width of the picture or -1 if dimensions determining was failed
+   */
+  public int getWidth()
+  {
+    return width;
+  }
+  /**
+   * returns pixel height of the picture or -1 if dimensions determining was failed
+   */
+  public int getHeight()
+  {
+    return height;
+  }
+
+  private static int getBigEndianInt(byte[] data, int offset)
+  {
+    return (((data[offset] & 0xFF)<< 24) + ((data[offset +1] & 0xFF) << 16) + ((data[offset + 2] & 0xFF) << 8) + (data[offset +3] & 0xFF));
+  }
+
+  private static int getBigEndianShort(byte[] data, int offset)
+  {
+    return (((data[offset] & 0xFF)<< 8) + (data[offset +1] & 0xFF));
+  }
+
+}
diff --git a/src/scratchpad/testcases/org/apache/poi/hwpf/TestHWPFPictures.java b/src/scratchpad/testcases/org/apache/poi/hwpf/TestHWPFPictures.java
new file mode 100644 (file)
index 0000000..e92e40c
--- /dev/null
@@ -0,0 +1,130 @@
+
+/* ====================================================================
+   Copyright 2002-2004   Apache Software Foundation
+
+   Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
+==================================================================== */
+        
+package org.apache.poi.hwpf;
+
+import java.io.ByteArrayOutputStream;
+import java.io.FileInputStream;
+import java.util.List;
+
+import org.apache.poi.hwpf.model.PicturesTable;
+import org.apache.poi.hwpf.usermodel.Picture;
+
+import junit.framework.TestCase;
+
+/**
+ * Test picture support in HWPF
+ * @author nick
+ */
+public class TestHWPFPictures extends TestCase {
+       private HWPFDocument docA;
+       private HWPFDocument docB;
+       private String docAFile;
+       private String docBFile;
+       
+       private String imgAFile;
+       private String imgBFile;
+       
+       protected void setUp() throws Exception {
+               String dirname = System.getProperty("HWPF.testdata.path");
+               
+               docAFile = dirname + "/testPictures.doc";
+               docBFile = dirname + "/two_images.doc";
+               
+               imgAFile = dirname + "/simple_image.jpg";
+               imgBFile = dirname + "/simple_image.png";
+       }
+       
+       /**
+        * Test just opening the files
+        */
+       public void testOpen() throws Exception {
+               docA = new HWPFDocument(new FileInputStream(docAFile));
+               docB = new HWPFDocument(new FileInputStream(docBFile));
+       }
+       
+       /**
+        * Test that we have the right numbers of images in each file
+        */
+       public void testImageCount() throws Exception {
+               docA = new HWPFDocument(new FileInputStream(docAFile));
+               docB = new HWPFDocument(new FileInputStream(docBFile));
+               
+               assertNotNull(docA.getPicturesTable());
+               assertNotNull(docB.getPicturesTable());
+               
+               PicturesTable picA = docA.getPicturesTable();
+               PicturesTable picB = docB.getPicturesTable();
+               
+               List picturesA = picA.getAllPictures();
+               List picturesB = picB.getAllPictures();
+               
+               assertEquals(7, picturesA.size());
+               assertEquals(2, picturesB.size());
+       }
+       
+       /**
+        * Test that we have the right images in at least one file
+        */
+       public void testImageData() throws Exception {
+               docB = new HWPFDocument(new FileInputStream(docBFile));
+               PicturesTable picB = docB.getPicturesTable();
+               List picturesB = picB.getAllPictures();
+               
+               assertEquals(2, picturesB.size());
+               
+               Picture pic1 = (Picture)picturesB.get(0);
+               Picture pic2 = (Picture)picturesB.get(1);
+               
+               assertNotNull(pic1);
+               assertNotNull(pic2);
+               
+               // Check the same
+               byte[] pic1B = readFile(imgAFile);
+               byte[] pic2B = readFile(imgBFile);
+               
+               assertEquals(pic1B.length, pic1.getContent().length);
+               assertEquals(pic2B.length, pic2.getContent().length);
+
+               assertBytesSame(pic1B, pic1.getContent());
+               assertBytesSame(pic2B, pic2.getContent());
+       }
+       
+       
+       private void assertBytesSame(byte[] a, byte[] b) {
+               assertEquals(a.length, b.length);
+               for(int i=0; i<a.length; i++) {
+                       assertEquals(a[i],b[i]);
+               }
+       }
+       
+       private byte[] readFile(String file) throws Exception {
+               ByteArrayOutputStream baos = new ByteArrayOutputStream();
+               FileInputStream fis = new FileInputStream(file);
+               byte[] buffer = new byte[1024];
+               
+               int read = 0;
+               while(read > -1) {
+                       read = fis.read(buffer);
+                       if(read > 0) {
+                               baos.write(buffer,0,read);
+                       }
+               }
+               
+               return baos.toByteArray();
+       }
+}
diff --git a/src/scratchpad/testcases/org/apache/poi/hwpf/data/simple_image.jpg b/src/scratchpad/testcases/org/apache/poi/hwpf/data/simple_image.jpg
new file mode 100644 (file)
index 0000000..af68bca
Binary files /dev/null and b/src/scratchpad/testcases/org/apache/poi/hwpf/data/simple_image.jpg differ
diff --git a/src/scratchpad/testcases/org/apache/poi/hwpf/data/simple_image.png b/src/scratchpad/testcases/org/apache/poi/hwpf/data/simple_image.png
new file mode 100644 (file)
index 0000000..a9120d7
Binary files /dev/null and b/src/scratchpad/testcases/org/apache/poi/hwpf/data/simple_image.png differ
diff --git a/src/scratchpad/testcases/org/apache/poi/hwpf/data/testPictures.doc b/src/scratchpad/testcases/org/apache/poi/hwpf/data/testPictures.doc
new file mode 100644 (file)
index 0000000..d031993
Binary files /dev/null and b/src/scratchpad/testcases/org/apache/poi/hwpf/data/testPictures.doc differ
diff --git a/src/scratchpad/testcases/org/apache/poi/hwpf/data/two_images.doc b/src/scratchpad/testcases/org/apache/poi/hwpf/data/two_images.doc
new file mode 100755 (executable)
index 0000000..f94867d
Binary files /dev/null and b/src/scratchpad/testcases/org/apache/poi/hwpf/data/two_images.doc differ