]> source.dussan.org Git - poi.git/commitdiff
more progress with WordToFoExtractor, see Bugzilla 51351
authorYegor Kozlov <yegor@apache.org>
Wed, 15 Jun 2011 11:41:22 +0000 (11:41 +0000)
committerYegor Kozlov <yegor@apache.org>
Wed, 15 Jun 2011 11:41:22 +0000 (11:41 +0000)
git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@1136001 13f79535-47bb-0310-9956-ffa450edef68

src/scratchpad/src/org/apache/poi/hwpf/extractor/WordToFoExtractor.java
src/scratchpad/src/org/apache/poi/hwpf/extractor/WordToFoUtils.java

index ccc5a2a8971d370ff21b7e06c8519d8bc3254322..8e2013fbce604418c8b03393a91a9640ed6e843d 100644 (file)
@@ -279,63 +279,95 @@ public class WordToFoExtractor {
        }
     }
 
-    @SuppressWarnings("unused")
-    protected void processImage(Element currentBlock, Picture picture) {
-       // no default implementation -- skip
+    /**
+     * This method shall store image bytes in external file and convert it if
+     * necessary. Images shall be stored using PNG format (for bitmap) or SVG
+     * (for vector). Other formats may be not supported by your XSL FO
+     * processor.
+     * <p>
+     * Please note the
+     * {@link WordToFoUtils#setPictureProperties(Picture, Element)} method.
+     * 
+     * @param currentBlock
+     *            currently processed FO element, like <tt>fo:block</tt>. Shall
+     *            be used as parent of newly created
+     *            <tt>fo:external-graphic</tt> or
+     *            <tt>fo:instream-foreign-object</tt>
+     * @param inlined
+     *            if image is inlined
+     * @param picture
+     *            HWPF object, contained picture data and properties
+     */
+    protected void processImage(Element currentBlock, boolean inlined,
+            Picture picture) {
+        // no default implementation -- skip
     }
 
     protected void processParagraph(HWPFDocument hwpfDocument,
-           Element parentFopElement, int currentTableLevel,
-           Paragraph paragraph, String bulletText) {
-       final Element block = createBlock();
-       parentFopElement.appendChild(block);
+            Element parentFopElement, int currentTableLevel,
+            Paragraph paragraph, String bulletText) {
+        final Element block = createBlock();
+        parentFopElement.appendChild(block);
 
-       WordToFoUtils.setParagraphProperties(paragraph, block);
+        WordToFoUtils.setParagraphProperties(paragraph, block);
 
-       final int charRuns = paragraph.numCharacterRuns();
+        final int charRuns = paragraph.numCharacterRuns();
 
-       if (charRuns == 0) {
-           return;
-       }
+        if (charRuns == 0) {
+            return;
+        }
 
-       final String pFontName;
-       final int pFontSize;
-       final boolean pBold;
-       final boolean pItalic;
-       {
-           CharacterRun characterRun = paragraph.getCharacterRun(0);
-           pFontSize = characterRun.getFontSize() / 2;
-           pFontName = characterRun.getFontName();
-           pBold = characterRun.isBold();
-           pItalic = characterRun.isItalic();
-       }
-       WordToFoUtils.setFontFamily(block, pFontName);
-       WordToFoUtils.setFontSize(block, pFontSize);
-       WordToFoUtils.setBold(block, pBold);
-       WordToFoUtils.setItalic(block, pItalic);
+        final String pFontName;
+        final int pFontSize;
+        final boolean pBold;
+        final boolean pItalic;
+        {
+            CharacterRun characterRun = paragraph.getCharacterRun(0);
+            pFontSize = characterRun.getFontSize() / 2;
+            pFontName = characterRun.getFontName();
+            pBold = characterRun.isBold();
+            pItalic = characterRun.isItalic();
+        }
+        WordToFoUtils.setFontFamily(block, pFontName);
+        WordToFoUtils.setFontSize(block, pFontSize);
+        WordToFoUtils.setBold(block, pBold);
+        WordToFoUtils.setItalic(block, pItalic);
 
-       StringBuilder lineText = new StringBuilder();
+        StringBuilder lineText = new StringBuilder();
 
-       if (WordToFoUtils.isNotEmpty(bulletText)) {
-           Element inline = createInline();
-           block.appendChild(inline);
+        if (WordToFoUtils.isNotEmpty(bulletText)) {
+            Element inline = createInline();
+            block.appendChild(inline);
 
-           Text textNode = createText(bulletText);
-           inline.appendChild(textNode);
+            Text textNode = createText(bulletText);
+            inline.appendChild(textNode);
 
-           lineText.append(bulletText);
-       }
+            lineText.append(bulletText);
+        }
+
+        for (int c = 0; c < charRuns; c++) {
+            CharacterRun characterRun = paragraph.getCharacterRun(c);
+
+            if (hwpfDocument.getPicturesTable().hasPicture(characterRun)) {
+                Picture picture = hwpfDocument.getPicturesTable()
+                        .extractPicture(characterRun, true);
 
-       for (int c = 0; c < charRuns; c++) {
-           CharacterRun characterRun = paragraph.getCharacterRun(c);
+                processImage(block, characterRun.text().charAt(0) == 0x01,
+                        picture);
+                continue;
+            }
 
            String text = characterRun.text();
            if (text.getBytes().length == 0)
                continue;
 
-           if (text.getBytes()[0] == FIELD_BEGIN_MARK) {
-               int skipTo = tryImageWithinField(hwpfDocument, paragraph, c,
-                       block);
+            if (text.getBytes()[0] == FIELD_BEGIN_MARK) {
+                /*
+                 * check if we have a field with calculated image as a result.
+                 * MathType equation, for example.
+                 */
+                int skipTo = tryImageWithinField(hwpfDocument, paragraph, c,
+                        block);
 
                if (skipTo != c) {
                    c = skipTo;
@@ -550,60 +582,62 @@ public class WordToFoExtractor {
     }
 
     protected int tryImageWithinField(HWPFDocument hwpfDocument,
-           Paragraph paragraph, int beginMark, Element currentBlock) {
-       int separatorMark = -1;
-       int pictureMark = -1;
-       int endMark = -1;
-       for (int c = beginMark + 1; c < paragraph.numCharacterRuns(); c++) {
-           CharacterRun characterRun = paragraph.getCharacterRun(c);
-
-           String text = characterRun.text();
-           if (text.getBytes().length == 0)
-               continue;
-
-           if (text.getBytes()[0] == FIELD_SEPARATOR_MARK) {
-               if (separatorMark != -1) {
-                   // double;
-                   return beginMark;
-               }
-
-               separatorMark = c;
-               continue;
-           }
-
-           if (text.getBytes()[0] == FIELD_END_MARK) {
-               if (endMark != -1) {
-                   // double;
-                   return beginMark;
-               }
+            Paragraph paragraph, int beginMark, Element currentBlock) {
+        int separatorMark = -1;
+        int pictureMark = -1;
+        int pictureChar = Integer.MIN_VALUE;
+        int endMark = -1;
+        for (int c = beginMark + 1; c < paragraph.numCharacterRuns(); c++) {
+            CharacterRun characterRun = paragraph.getCharacterRun(c);
+
+            String text = characterRun.text();
+            if (text.getBytes().length == 0)
+                continue;
+
+            if (text.getBytes()[0] == FIELD_SEPARATOR_MARK) {
+                if (separatorMark != -1) {
+                    // double;
+                    return beginMark;
+                }
+
+                separatorMark = c;
+                continue;
+            }
+
+            if (text.getBytes()[0] == FIELD_END_MARK) {
+                if (endMark != -1) {
+                    // double;
+                    return beginMark;
+                }
+
+                endMark = c;
+                break;
+            }
+
+            if (hwpfDocument.getPicturesTable().hasPicture(characterRun)) {
+                if (c != -1) {
+                    // double;
+                    return beginMark;
+                }
+
+                pictureMark = c;
+                pictureChar = characterRun.text().charAt(0);
+                continue;
+            }
+        }
 
-               endMark = c;
-               break;
-           }
+        if (separatorMark == -1 || pictureMark == -1 || endMark == -1)
+            return beginMark;
 
-           if (hwpfDocument.getPicturesTable().hasPicture(characterRun)) {
-               if (pictureMark != -1) {
-                   // double;
-                   return beginMark;
-               }
+        final CharacterRun pictureRun = paragraph.getCharacterRun(pictureMark);
+        final Picture picture = hwpfDocument.getPicturesTable().extractPicture(
+                pictureRun, true);
 
-               pictureMark = c;
-               continue;
-           }
-       }
-
-       if (separatorMark == -1 || pictureMark == -1 || endMark == -1)
-           return beginMark;
+        processImage(currentBlock, pictureChar == 0x01, picture);
 
-       final CharacterRun pictureRun = paragraph.getCharacterRun(pictureMark);
-       final Picture picture = hwpfDocument.getPicturesTable().extractPicture(
-               pictureRun, true);
-       processImage(currentBlock, picture);
-
-       return endMark;
+        return endMark;
     }
 
-
     /**
      * Java main() interface to interact with WordToFoExtractor
      *
index f973faad98ee9c44677660f6eb95275f4a4e2b3d..5acd71113851bfb1a30063797d0a0514519ffb0e 100644 (file)
@@ -9,6 +9,7 @@ import org.apache.poi.hwpf.usermodel.BorderCode;
 import org.apache.poi.hwpf.usermodel.CharacterProperties;
 import org.apache.poi.hwpf.usermodel.CharacterRun;
 import org.apache.poi.hwpf.usermodel.Paragraph;
+import org.apache.poi.hwpf.usermodel.Picture;
 import org.apache.poi.hwpf.usermodel.Range;
 import org.apache.poi.hwpf.usermodel.Section;
 import org.apache.poi.hwpf.usermodel.SectionProperties;
@@ -179,6 +180,31 @@ public class WordToFoUtils {
        }
     }
 
+    public static String getJustification(int js) {
+        switch (js) {
+        case 0:
+            return "start";
+        case 1:
+            return "center";
+        case 2:
+            return "end";
+        case 3:
+        case 4:
+            return "justify";
+        case 5:
+            return "center";
+        case 6:
+            return "left";
+        case 7:
+            return "start";
+        case 8:
+            return "end";
+        case 9:
+            return "justify";
+        }
+        return "";
+    }
+
     public static String getListItemNumberLabel(int number, int format) {
 
        if (format != 0)
@@ -244,48 +270,51 @@ public class WordToFoUtils {
     }
 
     public static void setCharactersProperties(final CharacterRun characterRun,
-           final Element inline) {
-       final CharacterProperties clonedProperties = characterRun
-               .cloneProperties();
-       StringBuilder textDecorations = new StringBuilder();
-
-       setBorder(inline, clonedProperties.getBrc(), EMPTY);
-
-       if (characterRun.isCapitalized()) {
-           inline.setAttribute("text-transform", "uppercase");
-       }
-       if (characterRun.isHighlighted()) {
-           inline.setAttribute("background-color",
-                   getColor(clonedProperties.getIcoHighlight()));
-       }
-       if (characterRun.isStrikeThrough()) {
-           if (textDecorations.length() > 0)
-               textDecorations.append(" ");
-           textDecorations.append("line-through");
-       }
-       if (characterRun.isShadowed()) {
-           inline.setAttribute("text-shadow", characterRun.getFontSize() / 24
-                   + "pt");
-       }
-       if (characterRun.isSmallCaps()) {
-           inline.setAttribute("font-variant", "small-caps");
-       }
-       if (characterRun.getSubSuperScriptIndex() == 1) {
-           inline.setAttribute("baseline-shift", "super");
-           inline.setAttribute("font-size", "smaller");
-       }
-       if (characterRun.getSubSuperScriptIndex() == 2) {
-           inline.setAttribute("baseline-shift", "sub");
-           inline.setAttribute("font-size", "smaller");
-       }
-       if (characterRun.getUnderlineCode() > 0) {
-           if (textDecorations.length() > 0)
-               textDecorations.append(" ");
-           textDecorations.append("underline");
-       }
-       if (textDecorations.length() > 0) {
-           inline.setAttribute("text-decoration", textDecorations.toString());
-       }
+            final Element inline) {
+        final CharacterProperties clonedProperties = characterRun
+                .cloneProperties();
+        StringBuilder textDecorations = new StringBuilder();
+
+        setBorder(inline, clonedProperties.getBrc(), EMPTY);
+
+        if (characterRun.isCapitalized()) {
+            inline.setAttribute("text-transform", "uppercase");
+        }
+        if (characterRun.isHighlighted()) {
+            inline.setAttribute("background-color",
+                    getColor(clonedProperties.getIcoHighlight()));
+        }
+        if (characterRun.isStrikeThrough()) {
+            if (textDecorations.length() > 0)
+                textDecorations.append(" ");
+            textDecorations.append("line-through");
+        }
+        if (characterRun.isShadowed()) {
+            inline.setAttribute("text-shadow", characterRun.getFontSize() / 24
+                    + "pt");
+        }
+        if (characterRun.isSmallCaps()) {
+            inline.setAttribute("font-variant", "small-caps");
+        }
+        if (characterRun.getSubSuperScriptIndex() == 1) {
+            inline.setAttribute("baseline-shift", "super");
+            inline.setAttribute("font-size", "smaller");
+        }
+        if (characterRun.getSubSuperScriptIndex() == 2) {
+            inline.setAttribute("baseline-shift", "sub");
+            inline.setAttribute("font-size", "smaller");
+        }
+        if (characterRun.getUnderlineCode() > 0) {
+            if (textDecorations.length() > 0)
+                textDecorations.append(" ");
+            textDecorations.append("underline");
+        }
+        if (characterRun.isVanished()) {
+            inline.setAttribute("visibility", "hidden");
+        }
+        if (textDecorations.length() > 0) {
+            inline.setAttribute("text-decoration", textDecorations.toString());
+        }
     }
 
     public static void setFontFamily(final Element element,
@@ -335,40 +364,10 @@ public class WordToFoUtils {
     }
 
     public static void setJustification(Paragraph paragraph,
-           final Element element) {
-       final int justification = paragraph.getJustification();
-       switch (justification) {
-       case 0:
-           element.setAttribute("text-align", "start");
-           break;
-       case 1:
-           element.setAttribute("text-align", "center");
-           break;
-       case 2:
-           element.setAttribute("text-align", "end");
-           break;
-       case 3:
-           element.setAttribute("text-align", "justify");
-           break;
-       case 4:
-           element.setAttribute("text-align", "justify");
-           break;
-       case 5:
-           element.setAttribute("text-align", "center");
-           break;
-       case 6:
-           element.setAttribute("text-align", "left");
-           break;
-       case 7:
-           element.setAttribute("text-align", "start");
-           break;
-       case 8:
-           element.setAttribute("text-align", "end");
-           break;
-       case 9:
-           element.setAttribute("text-align", "justify");
-           break;
-       }
+            final Element element) {
+        String justification = getJustification(paragraph.getJustification());
+        if (isNotEmpty(justification))
+            element.setAttribute("text-align", justification);
     }
 
     public static void setParagraphProperties(Paragraph paragraph, Element block) {
@@ -399,6 +398,53 @@ public class WordToFoUtils {
        block.setAttribute("white-space-collapse", "false");
     }
 
+    public static void setPictureProperties(Picture picture,
+            Element graphicElement) {
+        final int aspectRatioX = picture.getAspectRatioX();
+        final int aspectRatioY = picture.getAspectRatioY();
+
+        if (aspectRatioX > 0) {
+            graphicElement.setAttribute("content-width", ((picture.getDxaGoal()
+                    * aspectRatioX / 100) / WordToFoUtils.TWIPS_PER_PT)
+                    + "pt");
+        } else
+            graphicElement.setAttribute("content-width",
+                    (picture.getDxaGoal() / WordToFoUtils.TWIPS_PER_PT) + "pt");
+
+        if (aspectRatioY > 0)
+            graphicElement
+                    .setAttribute("content-height", ((picture.getDyaGoal()
+                            * aspectRatioY / 100) / WordToFoUtils.TWIPS_PER_PT)
+                            + "pt");
+        else
+            graphicElement.setAttribute("content-height",
+                    (picture.getDyaGoal() / WordToFoUtils.TWIPS_PER_PT) + "pt");
+
+        if (aspectRatioX <= 0 || aspectRatioY <= 0) {
+            graphicElement.setAttribute("scaling", "uniform");
+        } else {
+            graphicElement.setAttribute("scaling", "non-uniform");
+        }
+
+        graphicElement.setAttribute("vertical-align", "text-bottom");
+
+        if (picture.getDyaCropTop() != 0 || picture.getDxaCropRight() != 0
+                || picture.getDyaCropBottom() != 0
+                || picture.getDxaCropLeft() != 0) {
+            int rectTop = picture.getDyaCropTop() / WordToFoUtils.TWIPS_PER_PT;
+            int rectRight = picture.getDxaCropRight()
+                    / WordToFoUtils.TWIPS_PER_PT;
+            int rectBottom = picture.getDyaCropBottom()
+                    / WordToFoUtils.TWIPS_PER_PT;
+            int rectLeft = picture.getDxaCropLeft()
+                    / WordToFoUtils.TWIPS_PER_PT;
+            graphicElement.setAttribute("clip", "rect(" + rectTop + "pt, "
+                    + rectRight + "pt, " + rectBottom + "pt, " + rectLeft
+                    + "pt)");
+            graphicElement.setAttribute("oveerflow", "hidden");
+        }
+    }
+
     public static void setTableCellProperties(TableRow tableRow,
            TableCell tableCell, Element element, boolean toppest,
            boolean bottomest, boolean leftest, boolean rightest) {