_officeArts = new ShapesTable(_tableStream, _fib);
// And escher pictures
- _officeDrawingsHeaders = new OfficeDrawingsImpl( _fspaHeaders, _escherRecordHolder );
- _officeDrawingsMain = new OfficeDrawingsImpl( _fspaMain , _escherRecordHolder);
+ _officeDrawingsHeaders = new OfficeDrawingsImpl( _fspaHeaders, _escherRecordHolder, _mainStream );
+ _officeDrawingsMain = new OfficeDrawingsImpl( _fspaMain , _escherRecordHolder, _mainStream);
_st = new SectionTable(_mainStream, _tableStream, _fib.getFcPlcfsed(), _fib.getLcbPlcfsed(), fcMin, _tpt, _cpSplit);
_ss = new StyleSheet(_tableStream, _fib.getFcStshf());
import org.apache.poi.hwpf.usermodel.OfficeDrawing;
import org.apache.poi.hwpf.usermodel.Paragraph;
import org.apache.poi.hwpf.usermodel.Picture;
+import org.apache.poi.hwpf.usermodel.PictureType;
import org.apache.poi.hwpf.usermodel.Range;
import org.apache.poi.hwpf.usermodel.Section;
import org.apache.poi.hwpf.usermodel.Table;
protected void processDrawnObject( HWPFDocument doc,
CharacterRun characterRun, Element block )
{
- // main?
+ if ( getPicturesManager() == null )
+ return;
+
+ // TODO: support headers
OfficeDrawing officeDrawing = doc.getOfficeDrawingsMain()
.getOfficeDrawingAt( characterRun.getStartOffset() );
if ( officeDrawing == null )
return;
}
- // TODO: do something :)
+ byte[] pictureData = officeDrawing.getPictureData();
+ if ( pictureData == null )
+ // usual shape?
+ return;
+ final PictureType type = PictureType.findMatchingType( pictureData );
+ String path = getPicturesManager().savePicture( pictureData, type,
+ "s" + characterRun.getStartOffset() + "." + type );
+
+ processDrawnObject( doc, characterRun, officeDrawing, path, block );
}
+ protected abstract void processDrawnObject( HWPFDocument doc,
+ CharacterRun characterRun, OfficeDrawing officeDrawing,
+ String path, Element block );
+
protected abstract void processEndnoteAutonumbered( HWPFDocument doc,
int noteIndex, Element block, Range endnoteTextRange );
return basicLink;
}
+ public Element createImage( String src )
+ {
+ Element result = document.createElement( "img" );
+ result.setAttribute( "src", src );
+ return result;
+ }
+
public Element createLineBreak()
{
return document.createElement( "br" );
==================================================================== */
package org.apache.poi.hwpf.converter;
-import org.apache.poi.hwpf.usermodel.Picture;
import org.apache.poi.hwpf.usermodel.PictureType;
/**
* {@link PictureType#WMF}. FO (Apache FOP) supports at least PNG and SVG
* types.
*
- * @param picture
- * Word picture
+ * @param content
+ * picture content
* @return path to file that can be used as reference in HTML (img's src) of
* XLS FO (fo:external-graphic's src) or <tt>null</tt> if image were
* not saved and should not be referenced from result HTML / FO.
*/
- String savePicture( Picture picture );
+ String savePicture( byte[] content, PictureType pictureType, String suggestedName );
}
import org.apache.poi.hwpf.converter.FontReplacer.Triplet;
import org.apache.poi.hwpf.usermodel.Bookmark;
import org.apache.poi.hwpf.usermodel.CharacterRun;
+import org.apache.poi.hwpf.usermodel.OfficeDrawing;
import org.apache.poi.hwpf.usermodel.Paragraph;
import org.apache.poi.hwpf.usermodel.Picture;
import org.apache.poi.hwpf.usermodel.Range;
foDocumentFacade.setDescription( summaryInformation.getComments() );
}
+ @Override
+ protected void processDrawnObject( HWPFDocument doc,
+ CharacterRun characterRun, OfficeDrawing officeDrawing,
+ String path, Element block )
+ {
+ final Element externalGraphic = foDocumentFacade
+ .createExternalGraphic( path );
+ block.appendChild( externalGraphic );
+ }
+
@Override
protected void processEndnoteAutonumbered( HWPFDocument doc, int noteIndex,
Element block, Range endnoteTextRange )
PicturesManager fileManager = getPicturesManager();
if ( fileManager != null )
{
- String url = fileManager.savePicture( picture );
+ String url = fileManager
+ .savePicture( picture.getContent(),
+ picture.suggestPictureType(),
+ picture.suggestFullFileName() );
if ( WordToFoUtils.isNotEmpty( url ) )
{
import org.apache.poi.hwpf.converter.FontReplacer.Triplet;
import org.apache.poi.hwpf.usermodel.Bookmark;
import org.apache.poi.hwpf.usermodel.CharacterRun;
+import org.apache.poi.hwpf.usermodel.OfficeDrawing;
import org.apache.poi.hwpf.usermodel.Paragraph;
import org.apache.poi.hwpf.usermodel.Picture;
import org.apache.poi.hwpf.usermodel.Range;
public class WordToHtmlConverter extends AbstractWordConverter
{
+ /**
+ * Holds properties values, applied to current <tt>p</tt> element. Those
+ * properties shall not be doubled in children <tt>span</tt> elements.
+ */
+ private static class BlockProperies
+ {
+ final String pFontName;
+ final int pFontSize;
+
+ public BlockProperies( String pFontName, int pFontSize )
+ {
+ this.pFontName = pFontName;
+ this.pFontSize = pFontSize;
+ }
+ }
+
private static final POILogger logger = POILogFactory
.getLogger( WordToHtmlConverter.class );
.addDescription( summaryInformation.getComments() );
}
+ @Override
+ protected void processDrawnObject( HWPFDocument doc,
+ CharacterRun characterRun, OfficeDrawing officeDrawing,
+ String path, Element block )
+ {
+ Element img = htmlDocumentFacade.createImage( path );
+ block.appendChild( img );
+ }
+
@Override
protected void processEndnoteAutonumbered( HWPFDocument doc, int noteIndex,
Element block, Range endnoteTextRange )
PicturesManager fileManager = getPicturesManager();
if ( fileManager != null )
{
- String url = fileManager.savePicture( picture );
+ String url = fileManager
+ .savePicture( picture.getContent(),
+ picture.suggestPictureType(),
+ picture.suggestFullFileName() );
if ( WordToHtmlUtils.isNotEmpty( url ) )
{
+ "in;overflow:hidden;" ) );
root.appendChild( inner );
- Element image = htmlDocumentFacade.document.createElement( "img" );
- image.setAttribute( "src", imageSourcePath );
+ Element image = htmlDocumentFacade.createImage( imageSourcePath );
image.setAttribute( "class", htmlDocumentFacade
.getOrCreateCssClass( image.getTagName(), "i",
"position:absolute;left:-" + cropLeft + ";top:-"
}
else
{
- root = htmlDocumentFacade.document.createElement( "img" );
- root.setAttribute( "src", imageSourcePath );
+ root = htmlDocumentFacade.createImage( imageSourcePath );
root.setAttribute( "style", "width:" + imageWidth + "in;height:"
+ imageHeight + "in;vertical-align:text-bottom;" );
}
}
}
- /**
- * Holds properties values, applied to current <tt>p</tt> element. Those
- * properties shall not be doubled in children <tt>span</tt> elements.
- */
- private static class BlockProperies
- {
- final String pFontName;
- final int pFontSize;
-
- public BlockProperies( String pFontName, int pFontSize )
- {
- this.pFontName = pFontName;
- this.pFontSize = pFontSize;
- }
- }
-
}
import java.util.ArrayList;
import java.util.Iterator;
import java.util.List;
+
import org.apache.poi.ddf.DefaultEscherRecordFactory;
import org.apache.poi.ddf.EscherContainerRecord;
import org.apache.poi.ddf.EscherRecord;
// Not found in this lot
return null;
}
+
+ public List<? extends EscherContainerRecord> getDgContainers()
+ {
+ List<EscherContainerRecord> dgContainers = new ArrayList<EscherContainerRecord>(
+ 1 );
+ for ( EscherRecord escherRecord : getEscherRecords() )
+ {
+ if ( escherRecord.getRecordId() == (short) 0xF002 )
+ {
+ dgContainers.add( (EscherContainerRecord) escherRecord );
+ }
+ }
+ return dgContainers;
+ }
+
+ public List<? extends EscherContainerRecord> getDggContainers()
+ {
+ List<EscherContainerRecord> dggContainers = new ArrayList<EscherContainerRecord>(
+ 1 );
+ for ( EscherRecord escherRecord : getEscherRecords() )
+ {
+ if ( escherRecord.getRecordId() == (short) 0xF000 )
+ {
+ dggContainers.add( (EscherContainerRecord) escherRecord );
+ }
+ }
+ return dggContainers;
+ }
+
+ public List<? extends EscherContainerRecord> getBStoreContainers()
+ {
+ List<EscherContainerRecord> bStoreContainers = new ArrayList<EscherContainerRecord>(
+ 1 );
+ for ( EscherContainerRecord dggContainer : getDggContainers() )
+ {
+ for ( EscherRecord escherRecord : dggContainer.getChildRecords() )
+ {
+ if ( escherRecord.getRecordId() == (short) 0xF001 )
+ {
+ bStoreContainers.add( (EscherContainerRecord) escherRecord );
+ }
+ }
+ }
+ return bStoreContainers;
+ }
+
+ public List<? extends EscherContainerRecord> getSpgrContainers()
+ {
+ List<EscherContainerRecord> spgrContainers = new ArrayList<EscherContainerRecord>(
+ 1 );
+ for ( EscherContainerRecord dgContainer : getDgContainers() )
+ {
+ for ( EscherRecord escherRecord : dgContainer.getChildRecords() )
+ {
+ if ( escherRecord.getRecordId() == (short) 0xF003 )
+ {
+ spgrContainers.add( (EscherContainerRecord) escherRecord );
+ }
+ }
+ }
+ return spgrContainers;
+ }
+
+ public List<? extends EscherContainerRecord> getSpContainers()
+ {
+ List<EscherContainerRecord> spContainers = new ArrayList<EscherContainerRecord>(
+ 1 );
+ for ( EscherContainerRecord spgrContainer : getSpgrContainers() )
+ {
+ for ( EscherRecord escherRecord : spgrContainer.getChildRecords() )
+ {
+ if ( escherRecord.getRecordId() == (short) 0xF004 )
+ {
+ spContainers.add( (EscherContainerRecord) escherRecord );
+ }
+ }
+ }
+ return spContainers;
+ }
}
+/* ====================================================================
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements. See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+==================================================================== */
package org.apache.poi.hwpf.usermodel;
+/**
+ * User-friendly interface to office drawing objects
+ *
+ * @author Sergey Vladimirov (vlsergey {at} gmail {dot} com)
+ */
public interface OfficeDrawing
{
/**
- * Shape Identifier
+ * Returns picture data if this shape has (single?) associated picture data
*/
- int getShapeId();
+ byte[] getPictureData();
/**
- * Left of rectangle enclosing shape relative to the origin of the shape
+ * Bottom of the rectangle enclosing shape relative to the origin of the
+ * shape
*/
- int getRectangleLeft();
+ int getRectangleBottom();
/**
- * Top of rectangle enclosing shape relative to the origin of the shape
+ * Left of rectangle enclosing shape relative to the origin of the shape
*/
- int getRectangleTop();
+ int getRectangleLeft();
/**
* Right of rectangle enclosing shape relative to the origin of the shape
int getRectangleRight();
/**
- * Bottom of the rectangle enclosing shape relative to the origin of the
- * shape
+ * Top of rectangle enclosing shape relative to the origin of the shape
*/
- int getRectangleBottom();
+ int getRectangleTop();
+
+ /**
+ * Shape Identifier
+ */
+ int getShapeId();
}
+/* ====================================================================
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements. See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+==================================================================== */
package org.apache.poi.hwpf.usermodel;
import java.util.Collection;
+/**
+ * User-friendly interface to access document part's office drawings
+ *
+ * @author Sergey Vladimirov (vlsergey {at} gmail {dot} com)
+ */
public interface OfficeDrawings
{
OfficeDrawing getOfficeDrawingAt( int characterPosition );
+/* ====================================================================
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements. See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+==================================================================== */
package org.apache.poi.hwpf.usermodel;
import java.util.ArrayList;
import java.util.Collections;
import java.util.List;
+import org.apache.poi.ddf.DefaultEscherRecordFactory;
+import org.apache.poi.ddf.EscherBSERecord;
+import org.apache.poi.ddf.EscherBlipRecord;
+import org.apache.poi.ddf.EscherContainerRecord;
+import org.apache.poi.ddf.EscherOptRecord;
+import org.apache.poi.ddf.EscherProperties;
+import org.apache.poi.ddf.EscherRecord;
+import org.apache.poi.ddf.EscherRecordFactory;
+import org.apache.poi.ddf.EscherSimpleProperty;
+import org.apache.poi.ddf.EscherSpRecord;
import org.apache.poi.hwpf.model.EscherRecordHolder;
import org.apache.poi.hwpf.model.FSPA;
import org.apache.poi.hwpf.model.FSPATable;
{
private final EscherRecordHolder _escherRecordHolder;
private final FSPATable _fspaTable;
+ private final byte[] _mainStream;
public OfficeDrawingsImpl( FSPATable fspaTable,
- EscherRecordHolder escherRecordHolder )
+ EscherRecordHolder escherRecordHolder, byte[] mainStream )
{
this._fspaTable = fspaTable;
this._escherRecordHolder = escherRecordHolder;
+ this._mainStream = mainStream;
+ }
+
+ private EscherContainerRecord getEscherShapeRecordContainer(
+ final int shapeId )
+ {
+ for ( EscherContainerRecord spContainer : _escherRecordHolder
+ .getSpContainers() )
+ {
+ EscherSpRecord escherSpRecord = spContainer
+ .getChildById( (short) 0xF00A );
+ if ( escherSpRecord != null
+ && escherSpRecord.getShapeId() == shapeId )
+ return spContainer;
+ }
+
+ return null;
+ }
+
+ private EscherBlipRecord getBitmapRecord( int bitmapIndex )
+ {
+ List<? extends EscherContainerRecord> bContainers = _escherRecordHolder
+ .getBStoreContainers();
+ if ( bContainers == null || bContainers.size() != 1 )
+ return null;
+
+ EscherContainerRecord bContainer = bContainers.get( 0 );
+ final List<EscherRecord> bitmapRecords = bContainer.getChildRecords();
+
+ if ( bitmapRecords.size() < bitmapIndex )
+ return null;
+
+ EscherRecord imageRecord = bitmapRecords.get( bitmapIndex - 1 );
+
+ if ( imageRecord instanceof EscherBlipRecord )
+ {
+ return (EscherBlipRecord) imageRecord;
+ }
+
+ if ( imageRecord instanceof EscherBSERecord )
+ {
+ EscherBSERecord bseRecord = (EscherBSERecord) imageRecord;
+
+ EscherBlipRecord blip = bseRecord.getBlipRecord();
+ if ( blip != null )
+ {
+ return blip;
+ }
+
+ if ( bseRecord.getOffset() > 0 )
+ {
+ /*
+ * Blip stored in delay stream, which in a word doc, is the main
+ * stream
+ */
+ EscherRecordFactory recordFactory = new DefaultEscherRecordFactory();
+ EscherRecord record = recordFactory.createRecord( _mainStream,
+ bseRecord.getOffset() );
+
+ if ( record instanceof EscherBlipRecord )
+ {
+ record.fillFields( _mainStream, bseRecord.getOffset(),
+ recordFactory );
+ return (EscherBlipRecord) record;
+ }
+ }
+ }
+
+ return null;
}
private OfficeDrawing getOfficeDrawing( final FSPA fspa )
return fspa.getSpid();
}
+ public byte[] getPictureData()
+ {
+ EscherContainerRecord shapeDescription = getEscherShapeRecordContainer( getShapeId() );
+ if ( shapeDescription == null )
+ return null;
+
+ EscherOptRecord escherOptRecord = shapeDescription
+ .getChildById( (short) 0xF00B );
+ if ( escherOptRecord == null )
+ return null;
+
+ EscherSimpleProperty escherProperty = escherOptRecord
+ .lookup( EscherProperties.BLIP__BLIPTODISPLAY );
+ if ( escherProperty == null )
+ return null;
+
+ int bitmapIndex = escherProperty.getPropertyValue();
+ EscherBlipRecord escherBlipRecord = getBitmapRecord( bitmapIndex );
+ if ( escherBlipRecord == null )
+ return null;
+
+ return escherBlipRecord.getPicturedata();
+ }
+
@Override
public String toString()
{
/**
* Represents embedded picture extracted from Word Document
+ *
* @author Dmitry Romanov
*/
public final class Picture extends PictureDescriptor
{
- private static final POILogger log = POILogFactory.getLogger(Picture.class);
+ private static final POILogger log = POILogFactory
+ .getLogger( Picture.class );
-// public static final int FILENAME_OFFSET = 0x7C;
-// public static final int FILENAME_SIZE_OFFSET = 0x6C;
- static final int PICF_OFFSET = 0x0;
- static final int PICT_HEADER_OFFSET = 0x4;
- static final int MFPMM_OFFSET = 0x6;
- static final int PICF_SHAPE_OFFSET = 0xE;
- static final int UNKNOWN_HEADER_SIZE = 0x49;
+ // public static final int FILENAME_OFFSET = 0x7C;
+ // public static final int FILENAME_SIZE_OFFSET = 0x6C;
+ static final int PICF_OFFSET = 0x0;
+ static final int PICT_HEADER_OFFSET = 0x4;
+ static final int MFPMM_OFFSET = 0x6;
+ static final int PICF_SHAPE_OFFSET = 0xE;
+ static final int UNKNOWN_HEADER_SIZE = 0x49;
@Deprecated
public static final byte[] GIF = PictureType.GIF.getSignatures()[0];
public static final byte[] WMF2 = PictureType.WMF.getSignatures()[1];
// TODO: DIB, PICT
- public static final byte[] IHDR = new byte[]{'I', 'H', 'D', 'R'};
+ public static final byte[] IHDR = new byte[] { 'I', 'H', 'D', 'R' };
- public static final byte[] COMPRESSED1 = { (byte)0xFE, 0x78, (byte)0xDA };
- public static final byte[] COMPRESSED2 = { (byte)0xFE, 0x78, (byte)0x9C };
+ public static final byte[] COMPRESSED1 = { (byte) 0xFE, 0x78, (byte) 0xDA };
+ public static final byte[] COMPRESSED2 = { (byte) 0xFE, 0x78, (byte) 0x9C };
- private int dataBlockStartOfsset;
- private int pictureBytesStartOffset;
- private int dataBlockSize;
- private int size;
-// private String fileName;
- private byte[] rawContent;
- private byte[] content;
- private byte[] _dataStream;
- private int height = -1;
- private int width = -1;
+ private int dataBlockStartOfsset;
+ private int pictureBytesStartOffset;
+ private int dataBlockSize;
+ private int size;
+ // private String fileName;
+ private byte[] rawContent;
+ private byte[] content;
+ private byte[] _dataStream;
+ private int height = -1;
+ private int width = -1;
- public Picture(int dataBlockStartOfsset, byte[] _dataStream, boolean fillBytes)
- {
- super (_dataStream, dataBlockStartOfsset);
-
- this._dataStream = _dataStream;
- this.dataBlockStartOfsset = dataBlockStartOfsset;
- this.dataBlockSize = LittleEndian.getInt(_dataStream, dataBlockStartOfsset);
- this.pictureBytesStartOffset = getPictureBytesStartOffset(dataBlockStartOfsset, _dataStream, dataBlockSize);
- this.size = dataBlockSize - (pictureBytesStartOffset - dataBlockStartOfsset);
+ public Picture( int dataBlockStartOfsset, byte[] _dataStream,
+ boolean fillBytes )
+ {
+ super( _dataStream, dataBlockStartOfsset );
+
+ this._dataStream = _dataStream;
+ this.dataBlockStartOfsset = dataBlockStartOfsset;
+ this.dataBlockSize = LittleEndian.getInt( _dataStream,
+ dataBlockStartOfsset );
+ this.pictureBytesStartOffset = getPictureBytesStartOffset(
+ dataBlockStartOfsset, _dataStream, dataBlockSize );
+ this.size = dataBlockSize
+ - ( pictureBytesStartOffset - dataBlockStartOfsset );
+
+ if ( size < 0 )
+ {
- if (size<0) {
+ }
+ if ( fillBytes )
+ {
+ fillImageContent();
+ }
}
- if (fillBytes)
+ public Picture( byte[] _dataStream )
{
- fillImageContent();
- }
- }
-
- public Picture(byte[] _dataStream)
- {
super();
- this._dataStream = _dataStream;
- this.dataBlockStartOfsset = 0;
- this.dataBlockSize = _dataStream.length;
- this.pictureBytesStartOffset = 0;
- this.size = _dataStream.length;
- }
+ this._dataStream = _dataStream;
+ this.dataBlockStartOfsset = 0;
+ this.dataBlockSize = _dataStream.length;
+ this.pictureBytesStartOffset = 0;
+ this.size = _dataStream.length;
+ }
private void fillWidthHeight()
{
}
}
- /**
- * Tries to suggest a filename: hex representation of picture structure offset in "Data" stream plus extension that
- * is tried to determine from first byte of picture's content.
- *
- * @return suggested file name
- */
- public String suggestFullFileName()
- {
- String fileExt = suggestFileExtension();
- return Integer.toHexString(dataBlockStartOfsset) + (fileExt.length()>0 ? "."+fileExt : "");
- }
-
- /**
- * Writes Picture's content bytes to specified OutputStream.
- * Is useful when there is need to write picture bytes directly to stream, omitting its representation in
- * memory as distinct byte array.
- *
- * @param out a stream to write to
- * @throws IOException if some exception is occured while writing to specified out
- */
- public void writeImageContent(OutputStream out) throws IOException
- {
- if (rawContent!=null && rawContent.length>0) {
- out.write(rawContent, 0, size);
- } else {
- out.write(_dataStream, pictureBytesStartOffset, size);
- }
- }
-
- /**
- * @return The offset of this picture in the picture bytes, used
- * when matching up with {@link CharacterRun#getPicOffset()}
- */
- public int getStartOffset() {
- return dataBlockStartOfsset;
- }
+ /**
+ * Tries to suggest a filename: hex representation of picture structure
+ * offset in "Data" stream plus extension that is tried to determine from
+ * first byte of picture's content.
+ *
+ * @return suggested file name
+ */
+ public String suggestFullFileName()
+ {
+ String fileExt = suggestFileExtension();
+ return Integer.toHexString( dataBlockStartOfsset )
+ + ( fileExt.length() > 0 ? "." + fileExt : "" );
+ }
+
+ /**
+ * Writes Picture's content bytes to specified OutputStream. Is useful when
+ * there is need to write picture bytes directly to stream, omitting its
+ * representation in memory as distinct byte array.
+ *
+ * @param out
+ * a stream to write to
+ * @throws IOException
+ * if some exception is occured while writing to specified out
+ */
+ public void writeImageContent( OutputStream out ) throws IOException
+ {
+ if ( rawContent != null && rawContent.length > 0 )
+ {
+ out.write( rawContent, 0, size );
+ }
+ else
+ {
+ out.write( _dataStream, pictureBytesStartOffset, size );
+ }
+ }
+
+ /**
+ * @return The offset of this picture in the picture bytes, used when
+ * matching up with {@link CharacterRun#getPicOffset()}
+ */
+ public int getStartOffset()
+ {
+ return dataBlockStartOfsset;
+ }
/**
* @return picture's content as byte array
return rawContent;
}
- /**
- *
- * @return size in bytes of the picture
- */
- public int getSize()
- {
- return size;
- }
+ /**
+ *
+ * @return size in bytes of the picture
+ */
+ public int getSize()
+ {
+ return size;
+ }
/**
* @return the horizontal aspect ratio for picture provided by user
}
/**
- * Gets the initial width of the picture, in twips, prior to cropping or scaling.
- *
+ * Gets the initial width of the picture, in twips, prior to cropping or
+ * scaling.
+ *
* @return the initial width of the picture in twips
*/
- public int getDxaGoal() {
+ public int getDxaGoal()
+ {
return dxaGoal;
}
/**
- * Gets the initial height of the picture, in twips, prior to cropping or scaling.
- *
+ * Gets the initial height of the picture, in twips, prior to cropping or
+ * scaling.
+ *
* @return the initial width of the picture in twips
*/
- public int getDyaGoal() {
+ public int getDyaGoal()
+ {
return dyaGoal;
}
/**
* @return The amount the picture has been cropped on the left in twips
*/
- public int getDxaCropLeft() {
+ public int getDxaCropLeft()
+ {
return dxaCropLeft;
}
/**
* @return The amount the picture has been cropped on the top in twips
*/
- public int getDyaCropTop() {
+ public int getDyaCropTop()
+ {
return dyaCropTop;
}
/**
* @return The amount the picture has been cropped on the right in twips
*/
- public int getDxaCropRight() {
+ public int getDxaCropRight()
+ {
return dxaCropRight;
}
/**
* @return The amount the picture has been cropped on the bottom in twips
*/
- public int getDyaCropBottom() {
+ public int getDyaCropBottom()
+ {
return dyaCropBottom;
}
public PictureType suggestPictureType()
{
- final byte[] imageContent = getContent();
- for ( PictureType pictureType : PictureType.values() )
- for ( byte[] signature : pictureType.getSignatures() )
- if ( matchSignature( imageContent, signature, 0 ) )
- return pictureType;
-
- // TODO: DIB, PICT
- return PictureType.UNKNOWN;
- }
-
- private static boolean matchSignature(byte[] dataStream, byte[] signature, int pictureBytesOffset)
- {
- boolean matched = pictureBytesOffset < dataStream.length;
- for (int i = 0; (i+pictureBytesOffset) < dataStream.length && i < signature.length; i++)
- {
- if (dataStream[i+pictureBytesOffset] != signature[i])
- {
- matched = false;
- break;
- }
- }
- return matched;
- }
-
-// public String getFileName()
-// {
-// return fileName;
-// }
-
-// private static String extractFileName(int blockStartIndex, byte[] dataStream) {
-// int fileNameStartOffset = blockStartIndex + 0x7C;
-// int fileNameSizeOffset = blockStartIndex + FILENAME_SIZE_OFFSET;
-// int fileNameSize = LittleEndian.getShort(dataStream, fileNameSizeOffset);
-//
-// int fileNameIndex = fileNameStartOffset;
-// char[] fileNameChars = new char[(fileNameSize-1)/2];
-// int charIndex = 0;
-// while(charIndex<fileNameChars.length) {
-// short aChar = LittleEndian.getShort(dataStream, fileNameIndex);
-// fileNameChars[charIndex] = (char)aChar;
-// charIndex++;
-// fileNameIndex += 2;
-// }
-// String fileName = new String(fileNameChars);
-// return fileName.trim();
-// }
-
- private void fillRawImageContent()
- {
+ return PictureType.findMatchingType( getContent() );
+ }
+
+ // public String getFileName()
+ // {
+ // return fileName;
+ // }
+
+ // private static String extractFileName(int blockStartIndex, byte[]
+ // dataStream) {
+ // int fileNameStartOffset = blockStartIndex + 0x7C;
+ // int fileNameSizeOffset = blockStartIndex + FILENAME_SIZE_OFFSET;
+ // int fileNameSize = LittleEndian.getShort(dataStream, fileNameSizeOffset);
+ //
+ // int fileNameIndex = fileNameStartOffset;
+ // char[] fileNameChars = new char[(fileNameSize-1)/2];
+ // int charIndex = 0;
+ // while(charIndex<fileNameChars.length) {
+ // short aChar = LittleEndian.getShort(dataStream, fileNameIndex);
+ // fileNameChars[charIndex] = (char)aChar;
+ // charIndex++;
+ // fileNameIndex += 2;
+ // }
+ // String fileName = new String(fileNameChars);
+ // return fileName.trim();
+ // }
+
+ private void fillRawImageContent()
+ {
if ( rawContent != null && rawContent.length > 0 )
return;
- this.rawContent = new byte[size];
- System.arraycopy(_dataStream, pictureBytesStartOffset, rawContent, 0, size);
- }
+ this.rawContent = new byte[size];
+ System.arraycopy( _dataStream, pictureBytesStartOffset, rawContent, 0,
+ size );
+ }
- private void fillImageContent()
- {
+ private void fillImageContent()
+ {
if ( content != null && content.length > 0 )
return;
- byte[] rawContent = getRawContent();
-
- // HACK: Detect compressed images. In reality there should be some way to determine
- // this from the first 32 bytes, but I can't see any similarity between all the
- // samples I have obtained, nor any similarity in the data block contents.
- if (matchSignature(rawContent, COMPRESSED1, 32) || matchSignature(rawContent, COMPRESSED2, 32))
- {
- try
- {
- InflaterInputStream in = new InflaterInputStream(
- new ByteArrayInputStream(rawContent, 33, rawContent.length - 33));
- ByteArrayOutputStream out = new ByteArrayOutputStream();
- byte[] buf = new byte[4096];
- int readBytes;
- while ((readBytes = in.read(buf)) > 0)
+ byte[] rawContent = getRawContent();
+
+ // HACK: Detect compressed images. In reality there should be some way
+ // to determine
+ // this from the first 32 bytes, but I can't see any similarity between
+ // all the
+ // samples I have obtained, nor any similarity in the data block
+ // contents.
+ if ( matchSignature( rawContent, COMPRESSED1, 32 )
+ || matchSignature( rawContent, COMPRESSED2, 32 ) )
+ {
+ try
+ {
+ InflaterInputStream in = new InflaterInputStream(
+ new ByteArrayInputStream( rawContent, 33,
+ rawContent.length - 33 ) );
+ ByteArrayOutputStream out = new ByteArrayOutputStream();
+ byte[] buf = new byte[4096];
+ int readBytes;
+ while ( ( readBytes = in.read( buf ) ) > 0 )
+ {
+ out.write( buf, 0, readBytes );
+ }
+ content = out.toByteArray();
+ }
+ catch ( IOException e )
+ {
+ // Problems reading from the actual ByteArrayInputStream should
+ // never happen
+ // so this will only ever be a ZipException.
+ log.log( POILogger.INFO,
+ "Possibly corrupt compression or non-compressed data",
+ e );
+ }
+ }
+ else
+ {
+ // Raw data is not compressed.
+ content = rawContent;
+ }
+ }
+
+ private static boolean matchSignature( byte[] pictureData,
+ byte[] signature, int offset )
+ {
+ boolean matched = offset < pictureData.length;
+ for ( int i = 0; ( i + offset ) < pictureData.length
+ && i < signature.length; i++ )
+ {
+ if ( pictureData[i + offset] != signature[i] )
+ {
+ matched = false;
+ break;
+ }
+ }
+ return matched;
+ }
+
+ private static int getPictureBytesStartOffset( int dataBlockStartOffset,
+ byte[] _dataStream, int dataBlockSize )
+ {
+ int realPicoffset = dataBlockStartOffset;
+ final int dataBlockEndOffset = dataBlockSize + dataBlockStartOffset;
+
+ // Skip over the PICT block
+ int PICTFBlockSize = LittleEndian.getShort( _dataStream,
+ dataBlockStartOffset + PICT_HEADER_OFFSET ); // Should be 68
+ // bytes
+
+ // Now the PICTF1
+ int PICTF1BlockOffset = PICTFBlockSize + PICT_HEADER_OFFSET;
+ short MM_TYPE = LittleEndian.getShort( _dataStream,
+ dataBlockStartOffset + PICT_HEADER_OFFSET + 2 );
+ if ( MM_TYPE == 0x66 )
+ {
+ // Skip the stPicName
+ int cchPicName = LittleEndian.getUnsignedByte( _dataStream,
+ PICTF1BlockOffset );
+ PICTF1BlockOffset += 1 + cchPicName;
+ }
+ int PICTF1BlockSize = LittleEndian.getShort( _dataStream,
+ dataBlockStartOffset + PICTF1BlockOffset );
+
+ int unknownHeaderOffset = ( PICTF1BlockSize + PICTF1BlockOffset ) < dataBlockEndOffset ? ( PICTF1BlockSize + PICTF1BlockOffset )
+ : PICTF1BlockOffset;
+ realPicoffset += ( unknownHeaderOffset + UNKNOWN_HEADER_SIZE );
+ if ( realPicoffset >= dataBlockEndOffset )
{
- out.write(buf, 0, readBytes);
+ realPicoffset -= UNKNOWN_HEADER_SIZE;
}
- content = out.toByteArray();
- }
- catch (IOException e)
- {
- // Problems reading from the actual ByteArrayInputStream should never happen
- // so this will only ever be a ZipException.
- log.log(POILogger.INFO, "Possibly corrupt compression or non-compressed data", e);
- }
- } else {
- // Raw data is not compressed.
- content = rawContent;
- }
- }
-
- private static int getPictureBytesStartOffset(int dataBlockStartOffset, byte[] _dataStream, int dataBlockSize)
- {
- int realPicoffset = dataBlockStartOffset;
- final int dataBlockEndOffset = dataBlockSize + dataBlockStartOffset;
-
- // Skip over the PICT block
- int PICTFBlockSize = LittleEndian.getShort(_dataStream, dataBlockStartOffset +PICT_HEADER_OFFSET); // Should be 68 bytes
-
- // Now the PICTF1
- int PICTF1BlockOffset = PICTFBlockSize + PICT_HEADER_OFFSET;
- short MM_TYPE = LittleEndian.getShort(_dataStream, dataBlockStartOffset + PICT_HEADER_OFFSET + 2);
- if(MM_TYPE == 0x66) {
- // Skip the stPicName
- int cchPicName = LittleEndian.getUnsignedByte(_dataStream, PICTF1BlockOffset);
- PICTF1BlockOffset += 1 + cchPicName;
- }
- int PICTF1BlockSize = LittleEndian.getShort(_dataStream, dataBlockStartOffset +PICTF1BlockOffset);
-
- int unknownHeaderOffset = (PICTF1BlockSize + PICTF1BlockOffset) < dataBlockEndOffset ? (PICTF1BlockSize + PICTF1BlockOffset) : PICTF1BlockOffset;
- realPicoffset += (unknownHeaderOffset + UNKNOWN_HEADER_SIZE);
- if (realPicoffset>=dataBlockEndOffset) {
- realPicoffset -= UNKNOWN_HEADER_SIZE;
- }
- return realPicoffset;
- }
-
- private void fillJPGWidthHeight() {
- /*
- http://www.codecomments.com/archive281-2004-3-158083.html
-
- Algorhitm proposed by Patrick TJ McPhee:
-
- read 2 bytes
- make sure they are 'ffd8'x
- repeatedly:
- read 2 bytes
- make sure the first one is 'ff'x
- if the second one is 'd9'x stop
- else if the second one is c0 or c2 (or possibly other values ...)
- skip 2 bytes
- read one byte into depth
- read two bytes into height
- read two bytes into width
- else
- read two bytes into length
- skip forward length-2 bytes
-
- Also used Ruby code snippet from: http://www.bigbold.com/snippets/posts/show/805 for reference
- */
- int pointer = pictureBytesStartOffset+2;
- int firstByte = _dataStream[pointer];
- int secondByte = _dataStream[pointer+1];
-
- int endOfPicture = pictureBytesStartOffset + size;
- while(pointer<endOfPicture-1) {
- do {
- firstByte = _dataStream[pointer];
- secondByte = _dataStream[pointer+1];
- pointer += 2;
- } while (!(firstByte==(byte)0xFF) && pointer<endOfPicture-1);
-
- if (firstByte==((byte)0xFF) && pointer<endOfPicture-1) {
- if (secondByte==(byte)0xD9 || secondByte==(byte)0xDA) {
- break;
- } else if ( (secondByte & 0xF0) == 0xC0 && secondByte!=(byte)0xC4 && secondByte!=(byte)0xC8 && secondByte!=(byte)0xCC) {
- pointer += 5;
- this.height = getBigEndianShort(_dataStream, pointer);
- this.width = getBigEndianShort(_dataStream, pointer+2);
- break;
- } else {
- pointer++;
- pointer++;
- int length = getBigEndianShort(_dataStream, pointer);
- pointer+=length;
+ return realPicoffset;
+ }
+
+ private void fillJPGWidthHeight()
+ {
+ /*
+ * http://www.codecomments.com/archive281-2004-3-158083.html
+ *
+ * Algorhitm proposed by Patrick TJ McPhee:
+ *
+ * read 2 bytes make sure they are 'ffd8'x repeatedly: read 2 bytes make
+ * sure the first one is 'ff'x if the second one is 'd9'x stop else if
+ * the second one is c0 or c2 (or possibly other values ...) skip 2
+ * bytes read one byte into depth read two bytes into height read two
+ * bytes into width else read two bytes into length skip forward
+ * length-2 bytes
+ *
+ * Also used Ruby code snippet from:
+ * http://www.bigbold.com/snippets/posts/show/805 for reference
+ */
+ int pointer = pictureBytesStartOffset + 2;
+ int firstByte = _dataStream[pointer];
+ int secondByte = _dataStream[pointer + 1];
+
+ int endOfPicture = pictureBytesStartOffset + size;
+ while ( pointer < endOfPicture - 1 )
+ {
+ do
+ {
+ firstByte = _dataStream[pointer];
+ secondByte = _dataStream[pointer + 1];
+ pointer += 2;
+ }
+ while ( !( firstByte == (byte) 0xFF ) && pointer < endOfPicture - 1 );
+
+ if ( firstByte == ( (byte) 0xFF ) && pointer < endOfPicture - 1 )
+ {
+ if ( secondByte == (byte) 0xD9 || secondByte == (byte) 0xDA )
+ {
+ break;
+ }
+ else if ( ( secondByte & 0xF0 ) == 0xC0
+ && secondByte != (byte) 0xC4
+ && secondByte != (byte) 0xC8
+ && secondByte != (byte) 0xCC )
+ {
+ pointer += 5;
+ this.height = getBigEndianShort( _dataStream, pointer );
+ this.width = getBigEndianShort( _dataStream, pointer + 2 );
+ break;
+ }
+ else
+ {
+ pointer++;
+ pointer++;
+ int length = getBigEndianShort( _dataStream, pointer );
+ pointer += length;
+ }
+ }
+ else
+ {
+ pointer++;
+ }
}
- } else {
- pointer++;
- }
- }
- }
-
- private void fillPNGWidthHeight()
- {
- /*
- Used PNG file format description from http://www.wotsit.org/download.asp?f=png
- */
- int HEADER_START = pictureBytesStartOffset + PNG.length + 4;
- if (matchSignature(_dataStream, IHDR, HEADER_START)) {
- int IHDR_CHUNK_WIDTH = HEADER_START + 4;
- this.width = getBigEndianInt(_dataStream, IHDR_CHUNK_WIDTH);
- this.height = getBigEndianInt(_dataStream, IHDR_CHUNK_WIDTH + 4);
- }
- }
-
- /**
- * returns pixel width of the picture or -1 if dimensions determining was failed
- */
- public int getWidth()
- {
- if (width == -1)
- {
- fillWidthHeight();
- }
- return width;
- }
-
- /**
- * returns pixel height of the picture or -1 if dimensions determining was failed
- */
- public int getHeight()
- {
- if (height == -1)
- {
- fillWidthHeight();
- }
- return height;
- }
-
- private static int getBigEndianInt(byte[] data, int offset)
- {
- return (((data[offset] & 0xFF)<< 24) + ((data[offset +1] & 0xFF) << 16) + ((data[offset + 2] & 0xFF) << 8) + (data[offset +3] & 0xFF));
- }
-
- private static int getBigEndianShort(byte[] data, int offset)
- {
- return (((data[offset] & 0xFF)<< 8) + (data[offset +1] & 0xFF));
- }
+ }
+
+ private void fillPNGWidthHeight()
+ {
+ /*
+ * Used PNG file format description from
+ * http://www.wotsit.org/download.asp?f=png
+ */
+ int HEADER_START = pictureBytesStartOffset + PNG.length + 4;
+ if ( matchSignature( _dataStream, IHDR, HEADER_START ) )
+ {
+ int IHDR_CHUNK_WIDTH = HEADER_START + 4;
+ this.width = getBigEndianInt( _dataStream, IHDR_CHUNK_WIDTH );
+ this.height = getBigEndianInt( _dataStream, IHDR_CHUNK_WIDTH + 4 );
+ }
+ }
+
+ /**
+ * returns pixel width of the picture or -1 if dimensions determining was
+ * failed
+ */
+ public int getWidth()
+ {
+ if ( width == -1 )
+ {
+ fillWidthHeight();
+ }
+ return width;
+ }
+
+ /**
+ * returns pixel height of the picture or -1 if dimensions determining was
+ * failed
+ */
+ public int getHeight()
+ {
+ if ( height == -1 )
+ {
+ fillWidthHeight();
+ }
+ return height;
+ }
+
+ private static int getBigEndianInt( byte[] data, int offset )
+ {
+ return ( ( ( data[offset] & 0xFF ) << 24 )
+ + ( ( data[offset + 1] & 0xFF ) << 16 )
+ + ( ( data[offset + 2] & 0xFF ) << 8 ) + ( data[offset + 3] & 0xFF ) );
+ }
+
+ private static int getBigEndianShort( byte[] data, int offset )
+ {
+ return ( ( ( data[offset] & 0xFF ) << 8 ) + ( data[offset + 1] & 0xFF ) );
+ }
}
TIFF( "image/tiff", "tiff", new byte[][] { { 0x49, 0x49, 0x2A, 0x00 },
{ 0x4D, 0x4D, 0x00, 0x2A } } ),
+ UNKNOWN( "image/unknown", "", new byte[][] {} ),
+
WMF( "image/x-wmf", "wmf", new byte[][] {
{ (byte) 0xD7, (byte) 0xCD, (byte) 0xC6, (byte) 0x9A, 0x00, 0x00 },
- { 0x01, 0x00, 0x09, 0x00, 0x00, 0x03 } } ),
+ { 0x01, 0x00, 0x09, 0x00, 0x00, 0x03 } } );
+
+ public static PictureType findMatchingType( byte[] pictureContent )
+ {
+ for ( PictureType pictureType : PictureType.values() )
+ for ( byte[] signature : pictureType.getSignatures() )
+ if ( matchSignature( pictureContent, signature ) )
+ return pictureType;
+
+ // TODO: DIB, PICT
+ return PictureType.UNKNOWN;
+ }
+
+ private static boolean matchSignature( byte[] pictureData, byte[] signature )
+ {
+ if ( pictureData.length < signature.length )
+ return false;
+
+ for ( int i = 0; i < signature.length; i++ )
+ if ( pictureData[i] != signature[i] )
+ return false;
- UNKNOWN( "image/unknown", "", new byte[][] {} );
+ return true;
+ }
private String _extension;
{
return _signatures;
}
+
+ public boolean matchSignature( byte[] pictureData )
+ {
+ for ( byte[] signature : getSignatures() )
+ if ( matchSignature( signature, pictureData ) )
+ return true;
+ return false;
+ }
}
import org.apache.poi.POIDataSamples;
import org.apache.poi.hwpf.HWPFDocument;
-import org.apache.poi.hwpf.usermodel.Picture;
+import org.apache.poi.hwpf.usermodel.PictureType;
import org.w3c.dom.Document;
-import org.w3c.dom.Element;
/**
* Test cases for {@link WordToHtmlConverter}
Document newDocument = DocumentBuilderFactory.newInstance()
.newDocumentBuilder().newDocument();
- WordToHtmlConverter wordToHtmlConverter = !emulatePictureStorage ? new WordToHtmlConverter(
- newDocument ) : new WordToHtmlConverter( newDocument )
+ WordToHtmlConverter wordToHtmlConverter = new WordToHtmlConverter(
+ newDocument );
+
+ if ( emulatePictureStorage )
{
- @Override
- protected void processImage( Element currentBlock, boolean inlined,
- Picture picture )
+ wordToHtmlConverter.setPicturesManager( new PicturesManager()
{
- processImage( currentBlock, inlined, picture, "picture.bin" );
- }
- };
+ public String savePicture( byte[] content,
+ PictureType pictureType, String suggestedName )
+ {
+ return suggestedName;
+ }
+ } );
+ }
+
wordToHtmlConverter.processDocument( hwpfDocument );
StringWriter stringWriter = new StringWriter();
assertContains( result, "<!--Image link to '0.emf' can be here-->" );
}
- public void testPicture() throws Exception
- {
- String result = getHtmlText( "picture.doc", true );
-
- // picture
- assertContains( result, "src=\"picture.bin\"" );
- // visible size
- assertContains( result, "width:3.1305554in;height:1.7250001in;" );
- // shift due to crop
- assertContains( result, "left:-0.09375;top:-0.25694445;" );
- // size without crop
- assertContains( result, "width:3.4125in;height:2.325in;" );
- }
-
public void testHyperlink() throws Exception
{
String result = getHtmlText( "hyperlink.doc" );
getHtmlText( "innertable.doc" );
}
- public void testTableMerges() throws Exception
- {
- String result = getHtmlText( "table-merges.doc" );
-
- assertContains( result, "<td class=\"td1\" colspan=\"3\">" );
- assertContains( result, "<td class=\"td2\" colspan=\"2\">" );
- }
-
public void testO_kurs_doc() throws Exception
{
getHtmlText( "o_kurs.doc" );
assertContains( result, "<a name=\"userref\">" );
assertContains( result, "1" );
}
+
+ public void testPicture() throws Exception
+ {
+ String result = getHtmlText( "picture.doc", true );
+
+ // picture
+ assertContains( result, "src=\"0.emf\"" );
+ // visible size
+ assertContains( result, "width:3.1305554in;height:1.7250001in;" );
+ // shift due to crop
+ assertContains( result, "left:-0.09375;top:-0.25694445;" );
+ // size without crop
+ assertContains( result, "width:3.4125in;height:2.325in;" );
+ }
+
+ public void testPicturesEscher() throws Exception
+ {
+ String result = getHtmlText( "pictures_escher.doc", true );
+ assertContains( result, "<img src=\"s0.PNG\">" );
+ assertContains( result, "<img src=\"s808.PNG\">" );
+ }
+
+ public void testTableMerges() throws Exception
+ {
+ String result = getHtmlText( "table-merges.doc" );
+
+ assertContains( result, "<td class=\"td1\" colspan=\"3\">" );
+ assertContains( result, "<td class=\"td2\" colspan=\"2\">" );
+ }
}