public class WordToHtmlConverter extends AbstractWordConverter
{
- /**
- * Holds properties values, applied to current <tt>p</tt> element. Those
- * properties shall not be doubled in children <tt>span</tt> elements.
- */
- private static class BlockProperies
- {
- final String pFontName;
- final int pFontSize;
-
- public BlockProperies( String pFontName, int pFontSize )
- {
- this.pFontName = pFontName;
- this.pFontSize = pFontSize;
- }
- }
-
private static final POILogger logger = POILogFactory
.getLogger( WordToHtmlConverter.class );
basicLink );
}
- @Override
- protected void processLineBreak( Element block, CharacterRun characterRun )
- {
- block.appendChild( htmlDocumentFacade.createLineBreak() );
- }
-
/**
* This method shall store image bytes in external file and convert it if
* necessary. Images shall be stored using PNG format. Other formats may be
* not supported by user browser.
* <p>
- * Please note the
- * {@link WordToHtmlUtils#setPictureProperties(Picture, Element)} method.
+ * Please note the {@link #processImage(Element, boolean, Picture, String)}.
*
* @param currentBlock
* currently processed HTML element, like <tt>p</tt>. Shall be
+ picture.suggestFullFileName() + "' can be here" ) );
}
+ protected void processImage( Element currentBlock, boolean inlined,
+ Picture picture, String imageSourcePath )
+ {
+ final int aspectRatioX = picture.getHorizontalScalingFactor();
+ final int aspectRatioY = picture.getVerticalScalingFactor();
+
+ StringBuilder style = new StringBuilder();
+
+ final float imageWidth;
+ final float imageHeight;
+
+ final float cropTop;
+ final float cropBottom;
+ final float cropLeft;
+ final float cropRight;
+
+ if ( aspectRatioX > 0 )
+ {
+ imageWidth = picture.getDxaGoal() * aspectRatioX / 1000
+ / TWIPS_PER_INCH;
+ cropRight = picture.getDxaCropRight() * aspectRatioX / 1000
+ / TWIPS_PER_INCH;
+ cropLeft = picture.getDxaCropLeft() * aspectRatioX / 1000
+ / TWIPS_PER_INCH;
+ }
+ else
+ {
+ imageWidth = picture.getDxaGoal() / TWIPS_PER_INCH;
+ cropRight = picture.getDxaCropRight() / TWIPS_PER_INCH;
+ cropLeft = picture.getDxaCropLeft() / TWIPS_PER_INCH;
+ }
+
+ if ( aspectRatioY > 0 )
+ {
+ imageHeight = picture.getDyaGoal() * aspectRatioY / 1000
+ / TWIPS_PER_INCH;
+ cropTop = picture.getDyaCropTop() * aspectRatioY / 1000
+ / TWIPS_PER_INCH;
+ cropBottom = picture.getDyaCropBottom() * aspectRatioY / 1000
+ / TWIPS_PER_INCH;
+ }
+ else
+ {
+ imageHeight = picture.getDyaGoal() / TWIPS_PER_INCH;
+ cropTop = picture.getDyaCropTop() / TWIPS_PER_INCH;
+ cropBottom = picture.getDyaCropBottom() / TWIPS_PER_INCH;
+ }
+
+ Element root;
+ if ( cropTop != 0 || cropRight != 0 || cropBottom != 0 || cropLeft != 0 )
+ {
+ float visibleWidth = Math
+ .max( 0, imageWidth - cropLeft - cropRight );
+ float visibleHeight = Math.max( 0, imageHeight - cropTop
+ - cropBottom );
+
+ root = htmlDocumentFacade.document.createElement( "div" );
+ root.setAttribute( "style", "vertical-align:text-bottom;width:"
+ + visibleWidth + "in;height:" + visibleHeight + "in;" );
+
+ // complex
+ Element inner = htmlDocumentFacade.document.createElement( "div" );
+ inner.setAttribute( "style", "position:relative;width:"
+ + visibleWidth + "in;height:" + visibleHeight
+ + "in;overflow:hidden;" );
+ root.appendChild( inner );
+
+ Element image = htmlDocumentFacade.document.createElement( "img" );
+ image.setAttribute( "src", imageSourcePath );
+ image.setAttribute( "style", "position:absolute;left:-" + cropLeft
+ + ";top:-" + cropTop + ";width:" + imageWidth
+ + "in;height:" + imageHeight + "in;" );
+ inner.appendChild( image );
+
+ style.append( "overflow:hidden;" );
+ }
+ else
+ {
+ root = htmlDocumentFacade.document.createElement( "img" );
+ root.setAttribute( "src", imageSourcePath );
+ root.setAttribute( "style", "width:" + imageWidth + "in;height:"
+ + imageHeight + "in;vertical-align:text-bottom;" );
+ }
+
+ currentBlock.appendChild( root );
+ }
+
+ @Override
+ protected void processLineBreak( Element block, CharacterRun characterRun )
+ {
+ block.appendChild( htmlDocumentFacade.createLineBreak() );
+ }
+
protected void processPageref( HWPFDocumentCore hwpfDocument,
Element currentBlock, Range textRange, int currentTableLevel,
String pageref )
}
}
+ /**
+ * Holds properties values, applied to current <tt>p</tt> element. Those
+ * properties shall not be doubled in children <tt>span</tt> elements.
+ */
+ private static class BlockProperies
+ {
+ final String pFontName;
+ final int pFontSize;
+
+ public BlockProperies( String pFontName, int pFontSize )
+ {
+ this.pFontName = pFontName;
+ this.pFontSize = pFontSize;
+ }
+ }
+
}
final int aspectRatioX = picture.getAspectRatioX();
final int aspectRatioY = picture.getAspectRatioY();
+ StringBuilder style = new StringBuilder();
+
if ( aspectRatioX > 0 )
{
- graphicElement
- .setAttribute( "content-width", ( ( picture.getDxaGoal()
- * aspectRatioX / 100 ) / TWIPS_PER_PT )
- + "pt" );
+ style.append( "width:"
+ + ( ( picture.getDxaGoal() * aspectRatioX / 1000 ) / TWIPS_PER_INCH )
+ + "in;" );
}
else
- graphicElement.setAttribute( "content-width",
- ( picture.getDxaGoal() / TWIPS_PER_PT ) + "pt" );
+ style.append( "width:" + ( picture.getDxaGoal() / TWIPS_PER_INCH )
+ + "in;" );
if ( aspectRatioY > 0 )
- graphicElement
- .setAttribute( "content-height", ( ( picture.getDyaGoal()
- * aspectRatioY / 100 ) / TWIPS_PER_PT )
- + "pt" );
- else
- graphicElement.setAttribute( "content-height",
- ( picture.getDyaGoal() / TWIPS_PER_PT ) + "pt" );
-
- if ( aspectRatioX <= 0 || aspectRatioY <= 0 )
{
- graphicElement.setAttribute( "scaling", "uniform" );
+ style.append( "height:"
+ + ( ( picture.getDyaGoal() * aspectRatioY / 1000 ) / TWIPS_PER_INCH )
+ + "in;" );
}
else
- {
- graphicElement.setAttribute( "scaling", "non-uniform" );
- }
+ style.append( "height:" + ( picture.getDyaGoal() / TWIPS_PER_INCH )
+ + "in;" );
graphicElement.setAttribute( "vertical-align", "text-bottom" );
|| picture.getDyaCropBottom() != 0
|| picture.getDxaCropLeft() != 0 )
{
- int rectTop = picture.getDyaCropTop() / TWIPS_PER_PT;
- int rectRight = picture.getDxaCropRight() / TWIPS_PER_PT;
- int rectBottom = picture.getDyaCropBottom() / TWIPS_PER_PT;
- int rectLeft = picture.getDxaCropLeft() / TWIPS_PER_PT;
- graphicElement.setAttribute( "clip", "rect(" + rectTop + "pt, "
- + rectRight + "pt, " + rectBottom + "pt, " + rectLeft
- + "pt)" );
- graphicElement.setAttribute( "oveerflow", "hidden" );
+ float rectTop = picture.getDyaCropTop() / TWIPS_PER_INCH;
+ float rectRight = picture.getDxaCropRight() / TWIPS_PER_INCH;
+ float rectBottom = picture.getDyaCropBottom() / TWIPS_PER_INCH;
+ float rectLeft = picture.getDxaCropLeft() / TWIPS_PER_INCH;
+
+ style.append( "clip:rect(" + rectTop + "in," + rectRight + "in, "
+ + rectBottom + "in, " + rectLeft + "in);" );
+ style.append( "overflow:hidden;" );
}
+
+ graphicElement.setAttribute( "style", style.toString() );
+
}
}
--- /dev/null
+package org.apache.poi.hwpf.model;
+
+import java.util.Arrays;
+
+import org.apache.poi.util.LittleEndian;
+
+/**
+ * Picture Descriptor (on File) (PICF)
+ * <p>
+ * Based on Microsoft Office Word 97-2007 Binary File Format (.doc)
+ * Specification; Page 181 of 210
+ *
+ * @author Sergey Vladimirov ( vlsergey {at} gmail {dot} com )
+ */
+public class PictureDescriptor
+{
+ private static final int LCB_OFFSET = 0x00;
+ private static final int CBHEADER_OFFSET = 0x04;
+
+ private static final int MFP_MM_OFFSET = 0x06;
+ private static final int MFP_XEXT_OFFSET = 0x08;
+ private static final int MFP_YEXT_OFFSET = 0x0A;
+ private static final int MFP_HMF_OFFSET = 0x0C;
+
+ private static final int DXAGOAL_OFFSET = 0x1C;
+ private static final int DYAGOAL_OFFSET = 0x1E;
+
+ private static final int MX_OFFSET = 0x20;
+ private static final int MY_OFFSET = 0x22;
+
+ private static final int DXACROPLEFT_OFFSET = 0x24;
+ private static final int DYACROPTOP_OFFSET = 0x26;
+ private static final int DXACROPRIGHT_OFFSET = 0x28;
+ private static final int DYACROPBOTTOM_OFFSET = 0x2A;
+
+ /**
+ * Number of bytes in the PIC structure plus size of following picture data
+ * which may be a Window's metafile, a bitmap, or the filename of a TIFF
+ * file. In the case of a Macintosh PICT picture, this includes the size of
+ * the PIC, the standard "x" metafile, and the Macintosh PICT data. See
+ * Appendix B for more information.
+ */
+ protected int lcb;
+
+ /**
+ * Number of bytes in the PIC (to allow for future expansion).
+ */
+ protected int cbHeader;
+
+ /*
+ * Microsoft Office Word 97-2007 Binary File Format (.doc) Specification
+ *
+ * Page 181 of 210
+ *
+ * If a Windows metafile is stored immediately following the PIC structure,
+ * the mfp is a Window's METAFILEPICT structure. See
+ * http://msdn2.microsoft.com/en-us/library/ms649017(VS.85).aspx for more
+ * information about the METAFILEPICT structure and
+ * http://download.microsoft.com/download/0/B/E/0BE8BDD7-E5E8-422A-ABFD-
+ * 4342ED7AD886/WindowsMetafileFormat(wmf)Specification.pdf for Windows
+ * Metafile Format specification.
+ *
+ * When the data immediately following the PIC is a TIFF filename,
+ * mfp.mm==98 If a bitmap is stored after the pic, mfp.mm==99.
+ *
+ * When the PIC describes a bitmap, mfp.xExt is the width of the bitmap in
+ * pixels and mfp.yExt is the height of the bitmap in pixels.
+ */
+
+ protected int mfp_mm;
+ protected int mfp_xExt;
+ protected int mfp_yExt;
+ protected int mfp_hMF;
+
+ /**
+ * <li>Window's bitmap structure when PIC describes a BITMAP (14 bytes)
+ *
+ * <li>Rectangle for window origin and extents when metafile is stored --
+ * ignored if 0 (8 bytes)
+ */
+ protected byte[] offset14 = new byte[14];
+
+ /**
+ * Horizontal measurement in twips of the rectangle the picture should be
+ * imaged within
+ */
+ protected short dxaGoal = 0;
+
+ /**
+ * Vertical measurement in twips of the rectangle the picture should be
+ * imaged within
+ */
+ protected short dyaGoal = 0;
+
+ /**
+ * Horizontal scaling factor supplied by user expressed in .001% units
+ */
+ protected short mx;
+
+ /**
+ * Vertical scaling factor supplied by user expressed in .001% units
+ */
+ protected short my;
+
+ /**
+ * The amount the picture has been cropped on the left in twips
+ */
+ protected short dxaCropLeft = 0;
+
+ /**
+ * The amount the picture has been cropped on the top in twips
+ */
+ protected short dyaCropTop = 0;
+
+ /**
+ * The amount the picture has been cropped on the right in twips
+ */
+ protected short dxaCropRight = 0;
+
+ /**
+ * The amount the picture has been cropped on the bottom in twips
+ */
+ protected short dyaCropBottom = 0;
+
+ public PictureDescriptor()
+ {
+ }
+
+ public PictureDescriptor( byte[] _dataStream, int startOffset )
+ {
+ this.lcb = LittleEndian.getInt( _dataStream, startOffset + LCB_OFFSET );
+ this.cbHeader = LittleEndian.getUShort( _dataStream, startOffset
+ + CBHEADER_OFFSET );
+
+ this.mfp_mm = LittleEndian.getUShort( _dataStream, startOffset
+ + MFP_MM_OFFSET );
+ this.mfp_xExt = LittleEndian.getUShort( _dataStream, startOffset
+ + MFP_XEXT_OFFSET );
+ this.mfp_yExt = LittleEndian.getUShort( _dataStream, startOffset
+ + MFP_YEXT_OFFSET );
+ this.mfp_hMF = LittleEndian.getUShort( _dataStream, startOffset
+ + MFP_HMF_OFFSET );
+
+ this.offset14 = LittleEndian.getByteArray( _dataStream,
+ startOffset + 0x0E, 14 );
+
+ this.dxaGoal = LittleEndian.getShort( _dataStream, startOffset
+ + DXAGOAL_OFFSET );
+ this.dyaGoal = LittleEndian.getShort( _dataStream, startOffset
+ + DYAGOAL_OFFSET );
+
+ this.mx = LittleEndian.getShort( _dataStream, startOffset + MX_OFFSET );
+ this.my = LittleEndian.getShort( _dataStream, startOffset + MY_OFFSET );
+
+ this.dxaCropLeft = LittleEndian.getShort( _dataStream, startOffset
+ + DXACROPLEFT_OFFSET );
+ this.dyaCropTop = LittleEndian.getShort( _dataStream, startOffset
+ + DYACROPTOP_OFFSET );
+ this.dxaCropRight = LittleEndian.getShort( _dataStream, startOffset
+ + DXACROPRIGHT_OFFSET );
+ this.dyaCropBottom = LittleEndian.getShort( _dataStream, startOffset
+ + DYACROPBOTTOM_OFFSET );
+ }
+
+ @Override
+ public String toString()
+ {
+ StringBuilder stringBuilder = new StringBuilder();
+ stringBuilder.append( "[PICF]\n" );
+ stringBuilder.append( " lcb = " ).append( this.lcb )
+ .append( '\n' );
+ stringBuilder.append( " cbHeader = " )
+ .append( this.cbHeader ).append( '\n' );
+
+ stringBuilder.append( " mfp.mm = " ).append( this.mfp_mm )
+ .append( '\n' );
+ stringBuilder.append( " mfp.xExt = " )
+ .append( this.mfp_xExt ).append( '\n' );
+ stringBuilder.append( " mfp.yExt = " )
+ .append( this.mfp_yExt ).append( '\n' );
+ stringBuilder.append( " mfp.hMF = " )
+ .append( this.mfp_hMF ).append( '\n' );
+
+ stringBuilder.append( " offset14 = " )
+ .append( Arrays.toString( this.offset14 ) ).append( '\n' );
+ stringBuilder.append( " dxaGoal = " )
+ .append( this.dxaGoal ).append( '\n' );
+ stringBuilder.append( " dyaGoal = " )
+ .append( this.dyaGoal ).append( '\n' );
+
+ stringBuilder.append( " dxaCropLeft = " )
+ .append( this.dxaCropLeft ).append( '\n' );
+ stringBuilder.append( " dyaCropTop = " )
+ .append( this.dyaCropTop ).append( '\n' );
+ stringBuilder.append( " dxaCropRight = " )
+ .append( this.dxaCropRight ).append( '\n' );
+ stringBuilder.append( " dyaCropBottom = " )
+ .append( this.dyaCropBottom ).append( '\n' );
+
+ stringBuilder.append( "[/PICF]" );
+ return stringBuilder.toString();
+ }
+}
import java.io.OutputStream;
import java.util.zip.InflaterInputStream;
+import org.apache.poi.hwpf.model.PictureDescriptor;
import org.apache.poi.util.LittleEndian;
import org.apache.poi.util.POILogFactory;
import org.apache.poi.util.POILogger;
* Represents embedded picture extracted from Word Document
* @author Dmitry Romanov
*/
-public final class Picture
+public final class Picture extends PictureDescriptor
{
private static final POILogger log = POILogFactory.getLogger(Picture.class);
static final int PICT_HEADER_OFFSET = 0x4;
static final int MFPMM_OFFSET = 0x6;
static final int PICF_SHAPE_OFFSET = 0xE;
- static final int DXAGOAL_OFFSET = 0x1C;
- static final int DYAGOAL_OFFSET = 0x1E;
- static final int MX_OFFSET = 0x20;
- static final int MY_OFFSET = 0x22;
- static final int DXACROPLEFT_OFFSET = 0x24;
- static final int DYACROPTOP_OFFSET = 0x26;
- static final int DXACROPRIGHT_OFFSET = 0x28;
- static final int DYACROPBOTTOM_OFFSET = 0x2A;
static final int UNKNOWN_HEADER_SIZE = 0x49;
public static final byte[] GIF = new byte[]{'G', 'I', 'F'};
private byte[] rawContent;
private byte[] content;
private byte[] _dataStream;
- private int aspectRatioX;
- private int aspectRatioY;
private int height = -1;
private int width = -1;
- private int dxaGoal = -1;
- private int dyaGoal = -1;
-
- private int dxaCropLeft = -1;
- private int dyaCropTop = -1;
- private int dxaCropRight = -1;
- private int dyaCropBottom = -1;
-
public Picture(int dataBlockStartOfsset, byte[] _dataStream, boolean fillBytes)
{
+ super (_dataStream, dataBlockStartOfsset);
+
this._dataStream = _dataStream;
this.dataBlockStartOfsset = dataBlockStartOfsset;
this.dataBlockSize = LittleEndian.getInt(_dataStream, dataBlockStartOfsset);
}
- this.dxaGoal = LittleEndian.getShort(_dataStream, dataBlockStartOfsset+DXAGOAL_OFFSET);
- this.dyaGoal = LittleEndian.getShort(_dataStream, dataBlockStartOfsset+DYAGOAL_OFFSET);
-
- this.aspectRatioX = LittleEndian.getShort(_dataStream, dataBlockStartOfsset+MX_OFFSET)/10;
- this.aspectRatioY = LittleEndian.getShort(_dataStream, dataBlockStartOfsset+MY_OFFSET)/10;
-
- this.dxaCropLeft = LittleEndian.getShort(_dataStream, dataBlockStartOfsset+DXACROPLEFT_OFFSET);
- this.dyaCropTop = LittleEndian.getShort(_dataStream, dataBlockStartOfsset+DYACROPTOP_OFFSET);
- this.dxaCropRight = LittleEndian.getShort(_dataStream, dataBlockStartOfsset+DXACROPRIGHT_OFFSET);
- this.dyaCropBottom = LittleEndian.getShort(_dataStream, dataBlockStartOfsset+DYACROPBOTTOM_OFFSET);
-
if (fillBytes)
{
fillImageContent();
public Picture(byte[] _dataStream)
{
+ super();
+
this._dataStream = _dataStream;
this.dataBlockStartOfsset = 0;
this.dataBlockSize = _dataStream.length;
/**
* @return the horizontal aspect ratio for picture provided by user
+ * @deprecated use more precise {@link #getHorizontalScalingFactor()}
*/
- public int getAspectRatioX() {
- return aspectRatioX;
+ @Deprecated
+ public int getAspectRatioX()
+ {
+ return mx / 10;
+ }
+
+ /**
+ * @return Horizontal scaling factor supplied by user expressed in .001%
+ * units
+ */
+ public int getHorizontalScalingFactor()
+ {
+ return mx;
}
/**
* @retrn the vertical aspect ratio for picture provided by user
+ * @deprecated use more precise {@link #getVerticalScalingFactor()}
*/
- public int getAspectRatioY() {
- return aspectRatioY;
+ @Deprecated
+ public int getAspectRatioY()
+ {
+ return my / 10;
+ }
+
+ /**
+ * @return Vertical scaling factor supplied by user expressed in .001% units
+ */
+ public int getVerticalScalingFactor()
+ {
+ return my;
}
/**
import org.apache.poi.POIDataSamples;
import org.apache.poi.hwpf.HWPFDocument;
+import org.apache.poi.hwpf.usermodel.Picture;
+import org.w3c.dom.Document;
+import org.w3c.dom.Element;
/**
* Test cases for {@link WordToHtmlConverter}
private static String getHtmlText( final String sampleFileName )
throws Exception
+ {
+ return getHtmlText( sampleFileName, false );
+ }
+
+ private static String getHtmlText( final String sampleFileName,
+ boolean emulatePictureStorage ) throws Exception
{
HWPFDocument hwpfDocument = new HWPFDocument( POIDataSamples
.getDocumentInstance().openResourceAsStream( sampleFileName ) );
- WordToHtmlConverter wordToHtmlConverter = new WordToHtmlConverter(
- DocumentBuilderFactory.newInstance().newDocumentBuilder()
- .newDocument() );
+ Document newDocument = DocumentBuilderFactory.newInstance()
+ .newDocumentBuilder().newDocument();
+ WordToHtmlConverter wordToHtmlConverter = !emulatePictureStorage ? new WordToHtmlConverter(
+ newDocument ) : new WordToHtmlConverter( newDocument )
+ {
+ @Override
+ protected void processImage( Element currentBlock, boolean inlined,
+ Picture picture )
+ {
+ processImage( currentBlock, inlined, picture, "picture.bin" );
+ }
+ };
wordToHtmlConverter.processDocument( hwpfDocument );
StringWriter stringWriter = new StringWriter();
assertContains( result, "<!--Image link to '0.emf' can be here-->" );
}
+ public void testPicture() throws Exception
+ {
+ String result = getHtmlText( "picture.doc", true );
+
+ // picture
+ assertContains( result, "src=\"picture.bin\"" );
+ // visible size
+ assertContains( result, "width:3.1305554in;height:1.7250001in;" );
+ // shift due to crop
+ assertContains( result, "left:-0.09375;top:-0.25694445;" );
+ // size without crop
+ assertContains( result, "width:3.4125in;height:2.325in;" );
+ }
+
public void testHyperlink() throws Exception
{
String result = getHtmlText( "hyperlink.doc" );
assertEquals(0, plain8s);
}
+ @SuppressWarnings( "deprecation" )
public void testCroppedPictures() {
HWPFDocument doc = HWPFTestDataSamples.openSampleFile("testCroppedPictures.doc");
List<Picture> pics = doc.getPicturesTable().getAllPictures();
Picture pic1 = pics.get(0);
assertEquals(27, pic1.getAspectRatioX());
+ assertEquals(270, pic1.getHorizontalScalingFactor());
assertEquals(27, pic1.getAspectRatioY());
+ assertEquals(271, pic1.getVerticalScalingFactor());
assertEquals(12000, pic1.getDxaGoal()); // 21.17 cm / 2.54 cm/inch * 72dpi * 20 = 12000
assertEquals(9000, pic1.getDyaGoal()); // 15.88 cm / 2.54 cm/inch * 72dpi * 20 = 9000
assertEquals(0, pic1.getDxaCropLeft());
Picture pic2 = pics.get(1);
System.out.println(pic2.getWidth());
assertEquals(76, pic2.getAspectRatioX());
+ assertEquals(764, pic2.getHorizontalScalingFactor());
assertEquals(68, pic2.getAspectRatioY());
+ assertEquals(685, pic2.getVerticalScalingFactor());
assertEquals(12000, pic2.getDxaGoal()); // 21.17 cm / 2.54 cm/inch * 72dpi * 20 = 12000
assertEquals(9000, pic2.getDyaGoal()); // 15.88 cm / 2.54 cm/inch * 72dpi * 20 = 9000
assertEquals(0, pic2.getDxaCropLeft()); // TODO YK: The Picture is cropped but HWPF reads the crop parameters all zeros