From: Nick Burch Date: Tue, 14 Sep 2010 16:32:02 +0000 (+0000) Subject: Correct XWPFRun detection of bold/italic in a paragraph with multiple runs of differe... X-Git-Tag: REL_3_7_BETA3~25 X-Git-Url: https://source.dussan.org/?a=commitdiff_plain;h=6b1a10ac9756439f61d0d0ee9a988287c0f71936;p=poi.git Correct XWPFRun detection of bold/italic in a paragraph with multiple runs of different styles git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@996976 13f79535-47bb-0310-9956-ffa450edef68 --- diff --git a/src/documentation/content/xdocs/status.xml b/src/documentation/content/xdocs/status.xml index 1f0db4a167..8d05120a97 100644 --- a/src/documentation/content/xdocs/status.xml +++ b/src/documentation/content/xdocs/status.xml @@ -34,6 +34,7 @@ + Correct XWPFRun detection of bold/italic in a paragraph with multiple runs of different styles Link XWPFPicture to XWPFRun, so that embedded pictures can be access from where they live in the text stream Improve handling of Hyperlinks inside XWPFParagraph objects through XWPFHyperlinkRun Make XWPFParagraph make more use of XWPFRun, and less on internal StringBuffers diff --git a/src/ooxml/java/org/apache/poi/xwpf/usermodel/XWPFDocument.java b/src/ooxml/java/org/apache/poi/xwpf/usermodel/XWPFDocument.java index bc773132ef..ab3b933e51 100644 --- a/src/ooxml/java/org/apache/poi/xwpf/usermodel/XWPFDocument.java +++ b/src/ooxml/java/org/apache/poi/xwpf/usermodel/XWPFDocument.java @@ -1161,7 +1161,7 @@ public class XWPFDocument extends POIXMLDocument implements Document, IBody { * @see org.apache.poi.xwpf.usermodel.IBody#getParagraphArray(int) */ public XWPFParagraph getParagraphArray(int pos) { - if(pos > 0 && pos < paragraphs.size()){ + if(pos >= 0 && pos < paragraphs.size()){ return paragraphs.get(pos); } return null; diff --git a/src/ooxml/java/org/apache/poi/xwpf/usermodel/XWPFRun.java b/src/ooxml/java/org/apache/poi/xwpf/usermodel/XWPFRun.java index d7fbd2d6a9..a72a0c7602 100644 --- a/src/ooxml/java/org/apache/poi/xwpf/usermodel/XWPFRun.java +++ b/src/ooxml/java/org/apache/poi/xwpf/usermodel/XWPFRun.java @@ -118,6 +118,19 @@ public class XWPFRun { public XWPFParagraph getParagraph() { return paragraph; } + + /** + * For isBold, isItalic etc + */ + private boolean isCTOnOff(CTOnOff onoff) { + if(! onoff.isSetVal()) + return true; + if(onoff.getVal() == STOnOff.ON) + return true; + if(onoff.getVal() == STOnOff.TRUE) + return true; + return false; + } /** * Whether the bold property shall be applied to all non-complex script @@ -127,7 +140,9 @@ public class XWPFRun { */ public boolean isBold() { CTRPr pr = run.getRPr(); - return pr != null && pr.isSetB(); + if(pr == null || !pr.isSetB()) + return false; + return isCTOnOff(pr.getB()); } /** @@ -208,7 +223,9 @@ public class XWPFRun { */ public boolean isItalic() { CTRPr pr = run.getRPr(); - return pr != null && pr.isSetI(); + if(pr == null || !pr.isSetI()) + return false; + return isCTOnOff(pr.getI()); } /** @@ -284,7 +301,9 @@ public class XWPFRun { */ public boolean isStrike() { CTRPr pr = run.getRPr(); - return pr != null && pr.isSetStrike(); + if(pr == null || !pr.isSetStrike()) + return false; + return isCTOnOff(pr.getStrike()); } /** diff --git a/src/ooxml/testcases/org/apache/poi/xwpf/extractor/TestXWPFWordExtractor.java b/src/ooxml/testcases/org/apache/poi/xwpf/extractor/TestXWPFWordExtractor.java index 77315f795b..116b699066 100644 --- a/src/ooxml/testcases/org/apache/poi/xwpf/extractor/TestXWPFWordExtractor.java +++ b/src/ooxml/testcases/org/apache/poi/xwpf/extractor/TestXWPFWordExtractor.java @@ -98,16 +98,20 @@ public class TestXWPFWordExtractor extends TestCase { // Now check contents extractor.setFetchHyperlinks(false); assertEquals( - "This is a test document\nThis bit is in bold and italic\n" + - "Back to normal\nWe have a hyperlink here, and another.\n", + "This is a test document.\nThis bit is in bold and italic\n" + + "Back to normal\n" + + "This contains BOLD, ITALIC and BOTH, as well as RED and YELLOW text.\n" + + "We have a hyperlink here, and another.\n", extractor.getText() ); // One hyperlink is a real one, one is just to the top of page extractor.setFetchHyperlinks(true); assertEquals( - "This is a test document\nThis bit is in bold and italic\n" + - "Back to normal\nWe have a hyperlink here, and another.\n", + "This is a test document.\nThis bit is in bold and italic\n" + + "Back to normal\n" + + "This contains BOLD, ITALIC and BOTH, as well as RED and YELLOW text.\n" + + "We have a hyperlink here, and another.\n", extractor.getText() ); } diff --git a/src/ooxml/testcases/org/apache/poi/xwpf/usermodel/TestXWPFRun.java b/src/ooxml/testcases/org/apache/poi/xwpf/usermodel/TestXWPFRun.java index 1e55cc5e99..695f936a58 100644 --- a/src/ooxml/testcases/org/apache/poi/xwpf/usermodel/TestXWPFRun.java +++ b/src/ooxml/testcases/org/apache/poi/xwpf/usermodel/TestXWPFRun.java @@ -20,6 +20,7 @@ import java.math.BigInteger; import junit.framework.TestCase; +import org.apache.poi.xwpf.XWPFTestDataSamples; import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTBr; import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTR; import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTRPr; @@ -190,7 +191,139 @@ public class TestXWPFRun extends TestCase { run.addBreak(BreakType.TEXT_WRAPPING); assertEquals(2, run.getCTR().sizeOfBrArray()); } - + /** + * Test that on an existing document, we do the + * right thing with it + */ + public void testExisting() { + XWPFDocument doc = XWPFTestDataSamples.openSampleDocument("TestDocument.docx"); + XWPFParagraph p; + XWPFRun run; + + + // First paragraph is simple + p = doc.getParagraphArray(0); + assertEquals("This is a test document.", p.getText()); + assertEquals(2, p.getRuns().size()); + + run = p.getRuns().get(0); + assertEquals("This is a test document", run.toString()); + assertEquals(false, run.isBold()); + assertEquals(false, run.isItalic()); + assertEquals(false, run.isStrike()); + assertEquals(null, run.getCTR().getRPr()); + + run = p.getRuns().get(1); + assertEquals(".", run.toString()); + assertEquals(false, run.isBold()); + assertEquals(false, run.isItalic()); + assertEquals(false, run.isStrike()); + assertEquals(null, run.getCTR().getRPr()); + + + // Next paragraph is all in one style, but a different one + p = doc.getParagraphArray(1); + assertEquals("This bit is in bold and italic", p.getText()); + assertEquals(1, p.getRuns().size()); + + run = p.getRuns().get(0); + assertEquals("This bit is in bold and italic", run.toString()); + assertEquals(true, run.isBold()); + assertEquals(true, run.isItalic()); + assertEquals(false, run.isStrike()); + assertEquals(true, run.getCTR().getRPr().isSetB()); + assertEquals(false, run.getCTR().getRPr().getB().isSetVal()); + + + // Back to normal + p = doc.getParagraphArray(2); + assertEquals("Back to normal", p.getText()); + assertEquals(1, p.getRuns().size()); + + run = p.getRuns().get(0); + assertEquals("Back to normal", run.toString()); + assertEquals(false, run.isBold()); + assertEquals(false, run.isItalic()); + assertEquals(false, run.isStrike()); + assertEquals(null, run.getCTR().getRPr()); + + + // Different styles in one paragraph + p = doc.getParagraphArray(3); + assertEquals("This contains BOLD, ITALIC and BOTH, as well as RED and YELLOW text.", p.getText()); + assertEquals(11, p.getRuns().size()); + + run = p.getRuns().get(0); + assertEquals("This contains ", run.toString()); + assertEquals(false, run.isBold()); + assertEquals(false, run.isItalic()); + assertEquals(false, run.isStrike()); + assertEquals(null, run.getCTR().getRPr()); + + run = p.getRuns().get(1); + assertEquals("BOLD", run.toString()); + assertEquals(true, run.isBold()); + assertEquals(false, run.isItalic()); + assertEquals(false, run.isStrike()); + + run = p.getRuns().get(2); + assertEquals(", ", run.toString()); + assertEquals(false, run.isBold()); + assertEquals(false, run.isItalic()); + assertEquals(false, run.isStrike()); + assertEquals(null, run.getCTR().getRPr()); + + run = p.getRuns().get(3); + assertEquals("ITALIC", run.toString()); + assertEquals(false, run.isBold()); + assertEquals(true, run.isItalic()); + assertEquals(false, run.isStrike()); + + run = p.getRuns().get(4); + assertEquals(" and ", run.toString()); + assertEquals(false, run.isBold()); + assertEquals(false, run.isItalic()); + assertEquals(false, run.isStrike()); + assertEquals(null, run.getCTR().getRPr()); + + run = p.getRuns().get(5); + assertEquals("BOTH", run.toString()); + assertEquals(true, run.isBold()); + assertEquals(true, run.isItalic()); + assertEquals(false, run.isStrike()); + + run = p.getRuns().get(6); + assertEquals(", as well as ", run.toString()); + assertEquals(false, run.isBold()); + assertEquals(false, run.isItalic()); + assertEquals(false, run.isStrike()); + assertEquals(null, run.getCTR().getRPr()); + + run = p.getRuns().get(7); + assertEquals("RED", run.toString()); + assertEquals(false, run.isBold()); + assertEquals(false, run.isItalic()); + assertEquals(false, run.isStrike()); + + run = p.getRuns().get(8); + assertEquals(" and ", run.toString()); + assertEquals(false, run.isBold()); + assertEquals(false, run.isItalic()); + assertEquals(false, run.isStrike()); + assertEquals(null, run.getCTR().getRPr()); + + run = p.getRuns().get(9); + assertEquals("YELLOW", run.toString()); + assertEquals(false, run.isBold()); + assertEquals(false, run.isItalic()); + assertEquals(false, run.isStrike()); + + run = p.getRuns().get(10); + assertEquals(" text.", run.toString()); + assertEquals(false, run.isBold()); + assertEquals(false, run.isItalic()); + assertEquals(false, run.isStrike()); + assertEquals(null, run.getCTR().getRPr()); + } } - diff --git a/test-data/document/TestDocument.docx b/test-data/document/TestDocument.docx index 058dec5e4c..d87a542ecd 100644 Binary files a/test-data/document/TestDocument.docx and b/test-data/document/TestDocument.docx differ