diff options
-rw-r--r-- | src/documentation/content/xdocs/status.xml | 1 | ||||
-rw-r--r-- | src/ooxml/java/org/apache/poi/xwpf/usermodel/XWPFParagraph.java | 401 | ||||
-rw-r--r-- | src/ooxml/testcases/org/apache/poi/xwpf/usermodel/TestXWPFSmartTag.java | 39 | ||||
-rw-r--r-- | test-data/document/smarttag-snippet.docx | bin | 0 -> 11862 bytes |
4 files changed, 245 insertions, 196 deletions
diff --git a/src/documentation/content/xdocs/status.xml b/src/documentation/content/xdocs/status.xml index 6db3227e44..12a3c9a4e7 100644 --- a/src/documentation/content/xdocs/status.xml +++ b/src/documentation/content/xdocs/status.xml @@ -34,6 +34,7 @@ <changes> <release version="3.8-beta5" date="2011-??-??"> + <action dev="poi-developers" type="add">52285 - Support XWPF smart tags text in Paragraphs</action> <action dev="poi-developers" type="fix">51875 - More XSSF new-line in formula support</action> <action dev="poi-developers" type="add">POIFS EntryUtils.copyNodes(POFS,POIFS) now uses FilteringDirectoryNode, so can exclude from copying nodes not just directly under the root</action> <action dev="poi-developers" type="add">POIFS Helper FilteringDirectoryNode, which wraps a DirectoryEntry and allows certain parts to be ignored</action> diff --git a/src/ooxml/java/org/apache/poi/xwpf/usermodel/XWPFParagraph.java b/src/ooxml/java/org/apache/poi/xwpf/usermodel/XWPFParagraph.java index 5190f22840..383fa13e68 100644 --- a/src/ooxml/java/org/apache/poi/xwpf/usermodel/XWPFParagraph.java +++ b/src/ooxml/java/org/apache/poi/xwpf/usermodel/XWPFParagraph.java @@ -41,6 +41,7 @@ import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTRunTrackChange; import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTSdtContentRun; import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTSdtRun; import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTSimpleField; +import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTSmartTagRun; import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTSpacing; import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTString; import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTText; @@ -73,87 +74,95 @@ public class XWPFParagraph implements IBodyElement { throw new NullPointerException(); } + // Build up the character runs runs = new ArrayList<XWPFRun>(); + buildRunsInOrderFromXml(paragraph); - // Get all our child nodes in order, and process them - // into XWPFRuns where we can - XmlCursor c = paragraph.newCursor(); - c.selectPath("child::*"); - while (c.toNextSelection()) { - XmlObject o = c.getObject(); - if(o instanceof CTR) { - runs.add(new XWPFRun((CTR)o, this)); - } - if(o instanceof CTHyperlink) { - CTHyperlink link = (CTHyperlink)o; - for(CTR r : link.getRList()) { - runs.add(new XWPFHyperlinkRun(link, r, this)); - } - } - if(o instanceof CTSdtRun) { - CTSdtContentRun run = ((CTSdtRun)o).getSdtContent(); - for(CTR r : run.getRList()) { - runs.add(new XWPFRun(r, this)); - } - } - if(o instanceof CTRunTrackChange) { - for(CTR r : ((CTRunTrackChange)o).getRList()) { - runs.add(new XWPFRun(r, this)); - } - } - if(o instanceof CTSimpleField) { - for(CTR r : ((CTSimpleField)o).getRList()) { - runs.add(new XWPFRun(r, this)); - } - } - } + // Look for bits associated with the runs + for(XWPFRun run : runs) { + CTR r = run.getCTR(); - c.dispose(); - - // Look for bits associated with the runs - for(XWPFRun run : runs) { - CTR r = run.getCTR(); - - // Check for bits that only apply when - // attached to a core document - // TODO Make this nicer by tracking the XWPFFootnotes directly - if(document != null) { - c = r.newCursor(); - c.selectPath("child::*"); - while (c.toNextSelection()) { + // Check for bits that only apply when attached to a core document + // TODO Make this nicer by tracking the XWPFFootnotes directly + XmlCursor c = r.newCursor(); + c.selectPath("child::*"); + while (c.toNextSelection()) { XmlObject o = c.getObject(); if(o instanceof CTFtnEdnRef) { - CTFtnEdnRef ftn = (CTFtnEdnRef)o; - footnoteText.append("[").append(ftn.getId()).append(": "); - XWPFFootnote footnote = - ftn.getDomNode().getLocalName().equals("footnoteReference") ? + CTFtnEdnRef ftn = (CTFtnEdnRef)o; + footnoteText.append("[").append(ftn.getId()).append(": "); + XWPFFootnote footnote = + ftn.getDomNode().getLocalName().equals("footnoteReference") ? document.getFootnoteByID(ftn.getId().intValue()) : document.getEndnoteByID(ftn.getId().intValue()); - - boolean first = true; - for (XWPFParagraph p : footnote.getParagraphs()) { - if (!first) { - footnoteText.append("\n"); - first = false; - } - footnoteText.append(p.getText()); - } - - footnoteText.append("]"); + + boolean first = true; + for (XWPFParagraph p : footnote.getParagraphs()) { + if (!first) { + footnoteText.append("\n"); + first = false; + } + footnoteText.append(p.getText()); + } + + footnoteText.append("]"); } - } - c.dispose(); - } - } + } + c.dispose(); + } } + /** + * Identifies (in order) the parts of the paragraph / + * sub-paragraph that correspond to character text + * runs, and builds the appropriate runs for these. + */ + private void buildRunsInOrderFromXml(XmlObject object) { + XmlCursor c = object.newCursor(); + c.selectPath("child::*"); + while (c.toNextSelection()) { + XmlObject o = c.getObject(); + if (o instanceof CTR) { + runs.add(new XWPFRun((CTR) o, this)); + } + if (o instanceof CTHyperlink) { + CTHyperlink link = (CTHyperlink) o; + for (CTR r : link.getRList()) { + runs.add(new XWPFHyperlinkRun(link, r, this)); + } + } + if (o instanceof CTSdtRun) { + CTSdtContentRun run = ((CTSdtRun) o).getSdtContent(); + for (CTR r : run.getRList()) { + runs.add(new XWPFRun(r, this)); + } + } + if (o instanceof CTRunTrackChange) { + for (CTR r : ((CTRunTrackChange) o).getRList()) { + runs.add(new XWPFRun(r, this)); + } + } + if (o instanceof CTSimpleField) { + for (CTR r : ((CTSimpleField) o).getRList()) { + runs.add(new XWPFRun(r, this)); + } + } + if (o instanceof CTSmartTagRun) { + // Smart Tags can be nested many times. + // This implementation does not preserve the tagging information + buildRunsInOrderFromXml(o); + } + } + c.dispose(); + } + @Internal public CTP getCTP() { return paragraph; } public List<XWPFRun> getRuns(){ - return Collections.unmodifiableList(runs); + return Collections.unmodifiableList(runs); } public boolean isEmpty(){ @@ -176,35 +185,35 @@ public class XWPFParagraph implements IBodyElement { out.append(footnoteText); return out.toString(); } - - /** - * Return styleID of the paragraph if style exist for this paragraph - * if not, null will be returned - * @return styleID as String - */ + + /** + * Return styleID of the paragraph if style exist for this paragraph + * if not, null will be returned + * @return styleID as String + */ public String getStyleID(){ - if (paragraph.getPPr() != null){ - if(paragraph.getPPr().getPStyle()!= null){ - if (paragraph.getPPr().getPStyle().getVal()!= null) - return paragraph.getPPr().getPStyle().getVal(); - } - } - return null; - } + if (paragraph.getPPr() != null){ + if(paragraph.getPPr().getPStyle()!= null){ + if (paragraph.getPPr().getPStyle().getVal()!= null) + return paragraph.getPPr().getPStyle().getVal(); + } + } + return null; + } /** * If style exist for this paragraph * NumId of the paragraph will be returned. - * If style not exist null will be returned - * @return NumID as BigInteger + * If style not exist null will be returned + * @return NumID as BigInteger */ public BigInteger getNumID(){ - if(paragraph.getPPr()!=null){ - if(paragraph.getPPr().getNumPr()!=null){ - if(paragraph.getPPr().getNumPr().getNumId()!=null) - return paragraph.getPPr().getNumPr().getNumId().getVal(); - } - } - return null; + if(paragraph.getPPr()!=null){ + if(paragraph.getPPr().getNumPr()!=null){ + if(paragraph.getPPr().getNumPr().getNumId()!=null) + return paragraph.getPPr().getNumPr().getNumId().getVal(); + } + } + return null; } /** @@ -212,14 +221,14 @@ public class XWPFParagraph implements IBodyElement { * @param numPos */ public void setNumID(BigInteger numPos) { - if(paragraph.getPPr()==null) - paragraph.addNewPPr(); - if(paragraph.getPPr().getNumPr()==null) - paragraph.getPPr().addNewNumPr(); - if(paragraph.getPPr().getNumPr().getNumId()==null){ - paragraph.getPPr().getNumPr().addNewNumId(); - } - paragraph.getPPr().getNumPr().getNumId().setVal(numPos); + if(paragraph.getPPr()==null) + paragraph.addNewPPr(); + if(paragraph.getPPr().getNumPr()==null) + paragraph.getPPr().addNewNumPr(); + if(paragraph.getPPr().getNumPr().getNumId()==null){ + paragraph.getPPr().getNumPr().addNewNumId(); + } + paragraph.getPPr().getNumPr().getNumId().setVal(numPos); } /** @@ -1027,18 +1036,18 @@ public class XWPFParagraph implements IBodyElement { * @param newStyle */ public void setStyle(String newStyle) { - CTPPr pr = getCTPPr(); - CTString style = pr.getPStyle() != null ? pr.getPStyle() : pr.addNewPStyle(); - style.setVal(newStyle); + CTPPr pr = getCTPPr(); + CTString style = pr.getPStyle() != null ? pr.getPStyle() : pr.addNewPStyle(); + style.setVal(newStyle); } /** * @return the style of the paragraph */ public String getStyle() { - CTPPr pr = getCTPPr(); - CTString style = pr.isSetPStyle() ? pr.getPStyle() : null; - return style != null ? style.getVal() : null; + CTPPr pr = getCTPPr(); + CTString style = pr.isSetPStyle() ? pr.getPStyle() : null; + return style != null ? style.getVal() : null; } /** @@ -1094,10 +1103,10 @@ public class XWPFParagraph implements IBodyElement { * @param run */ protected void addRun(CTR run){ - int pos; - pos = paragraph.getRList().size(); - paragraph.addNewR(); - paragraph.setRArray(pos, run); + int pos; + pos = paragraph.getRList().size(); + paragraph.addNewR(); + paragraph.setRArray(pos, run); } /** @@ -1108,65 +1117,65 @@ public class XWPFParagraph implements IBodyElement { * @param startPos */ public TextSegement searchText(String searched,PositionInParagraph startPos){ - - int startRun = startPos.getRun(), - startText = startPos.getText(), - startChar = startPos.getChar(); - int beginRunPos = 0, candCharPos = 0; - boolean newList = false; - for (int runPos=startRun; runPos<paragraph.getRList().size(); runPos++) { - int beginTextPos = 0,beginCharPos = 0, textPos = 0, charPos = 0; - CTR ctRun = paragraph.getRArray(runPos); - XmlCursor c = ctRun.newCursor(); - c.selectPath("./*"); - while(c.toNextSelection()){ - XmlObject o = c.getObject(); - if(o instanceof CTText){ - if(textPos>=startText){ - String candidate = ((CTText)o).getStringValue(); - if(runPos==startRun) - charPos= startChar; - else - charPos = 0; - for(; charPos<candidate.length(); charPos++){ - if((candidate.charAt(charPos)==searched.charAt(0))&&(candCharPos==0)){ - beginTextPos = textPos; - beginCharPos = charPos; - beginRunPos = runPos; - newList = true; - } - if(candidate.charAt(charPos)==searched.charAt(candCharPos)){ - if(candCharPos+1<searched.length()) - candCharPos++; - else if(newList){ - TextSegement segement = new TextSegement(); - segement.setBeginRun(beginRunPos); - segement.setBeginText(beginTextPos); - segement.setBeginChar(beginCharPos); - segement.setEndRun(runPos); - segement.setEndText(textPos); - segement.setEndChar(charPos); - return segement; - } - } - else - candCharPos=0; - } - } - textPos++; - } - else if(o instanceof CTProofErr){ - c.removeXml(); - } - else if(o instanceof CTRPr); - //do nothing - else - candCharPos=0; - } + + int startRun = startPos.getRun(), + startText = startPos.getText(), + startChar = startPos.getChar(); + int beginRunPos = 0, candCharPos = 0; + boolean newList = false; + for (int runPos=startRun; runPos<paragraph.getRList().size(); runPos++) { + int beginTextPos = 0,beginCharPos = 0, textPos = 0, charPos = 0; + CTR ctRun = paragraph.getRArray(runPos); + XmlCursor c = ctRun.newCursor(); + c.selectPath("./*"); + while(c.toNextSelection()){ + XmlObject o = c.getObject(); + if(o instanceof CTText){ + if(textPos>=startText){ + String candidate = ((CTText)o).getStringValue(); + if(runPos==startRun) + charPos= startChar; + else + charPos = 0; + for(; charPos<candidate.length(); charPos++){ + if((candidate.charAt(charPos)==searched.charAt(0))&&(candCharPos==0)){ + beginTextPos = textPos; + beginCharPos = charPos; + beginRunPos = runPos; + newList = true; + } + if(candidate.charAt(charPos)==searched.charAt(candCharPos)){ + if(candCharPos+1<searched.length()) + candCharPos++; + else if(newList){ + TextSegement segement = new TextSegement(); + segement.setBeginRun(beginRunPos); + segement.setBeginText(beginTextPos); + segement.setBeginChar(beginCharPos); + segement.setEndRun(runPos); + segement.setEndText(textPos); + segement.setEndChar(charPos); + return segement; + } + } + else + candCharPos=0; + } + } + textPos++; + } + else if(o instanceof CTProofErr){ + c.removeXml(); + } + else if(o instanceof CTRPr); + //do nothing + else + candCharPos=0; + } c.dispose(); - } - return null; + } + return null; } /** @@ -1175,13 +1184,13 @@ public class XWPFParagraph implements IBodyElement { * @return the inserted run */ public XWPFRun insertNewRun(int pos){ - if (pos >= 0 && pos <= paragraph.sizeOfRArray()) { - CTR ctRun = paragraph.insertNewR(pos); - XWPFRun newRun = new XWPFRun(ctRun, this); - runs.add(pos, newRun); - return newRun; - } - return null; + if (pos >= 0 && pos <= paragraph.sizeOfRArray()) { + CTR ctRun = paragraph.insertNewR(pos); + XWPFRun newRun = new XWPFRun(ctRun, this); + runs.add(pos, newRun); + return newRun; + } + return null; } @@ -1196,27 +1205,27 @@ public class XWPFParagraph implements IBodyElement { int charBegin = segment.getBeginChar(); int runEnd = segment.getEndRun(); int textEnd = segment.getEndText(); - int charEnd = segment.getEndChar(); + int charEnd = segment.getEndChar(); StringBuffer out = new StringBuffer(); - for(int i=runBegin; i<=runEnd;i++){ - int startText=0, endText = paragraph.getRArray(i).getTList().size()-1; - if(i==runBegin) - startText=textBegin; - if(i==runEnd) - endText = textEnd; - for(int j=startText;j<=endText;j++){ - String tmpText = paragraph.getRArray(i).getTArray(j).getStringValue(); - int startChar=0, endChar = tmpText.length()-1; - if((j==textBegin)&&(i==runBegin)) - startChar=charBegin; - if((j==textEnd)&&(i==runEnd)){ - endChar = charEnd; - } - out.append(tmpText.substring(startChar, endChar+1)); - - } - } - return out.toString(); + for(int i=runBegin; i<=runEnd;i++){ + int startText=0, endText = paragraph.getRArray(i).getTList().size()-1; + if(i==runBegin) + startText=textBegin; + if(i==runEnd) + endText = textEnd; + for(int j=startText;j<=endText;j++){ + String tmpText = paragraph.getRArray(i).getTArray(j).getStringValue(); + int startChar=0, endChar = tmpText.length()-1; + if((j==textBegin)&&(i==runBegin)) + startChar=charBegin; + if((j==textEnd)&&(i==runEnd)){ + endChar = charEnd; + } + out.append(tmpText.substring(startChar, endChar+1)); + + } + } + return out.toString(); } /** @@ -1225,12 +1234,12 @@ public class XWPFParagraph implements IBodyElement { * @return true if the run was removed */ public boolean removeRun(int pos){ - if (pos >= 0 && pos < paragraph.sizeOfRArray()){ - getCTP().removeR(pos); - runs.remove(pos); - return true; - } - return false; + if (pos >= 0 && pos < paragraph.sizeOfRArray()){ + getCTP().removeR(pos); + runs.remove(pos); + return true; + } + return false; } /** diff --git a/src/ooxml/testcases/org/apache/poi/xwpf/usermodel/TestXWPFSmartTag.java b/src/ooxml/testcases/org/apache/poi/xwpf/usermodel/TestXWPFSmartTag.java new file mode 100644 index 0000000000..f12e168057 --- /dev/null +++ b/src/ooxml/testcases/org/apache/poi/xwpf/usermodel/TestXWPFSmartTag.java @@ -0,0 +1,39 @@ +/* ====================================================================
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements. See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+==================================================================== */
+package org.apache.poi.xwpf.usermodel;
+
+import java.io.IOException;
+
+import junit.framework.TestCase;
+
+import org.apache.poi.xwpf.XWPFTestDataSamples;
+
+/**
+ * Tests for reading SmartTags from Word docx.
+ *
+ * @author Fabian Lange
+ */
+public final class TestXWPFSmartTag extends TestCase {
+
+ public void testSmartTags() throws IOException {
+ XWPFDocument doc = XWPFTestDataSamples.openSampleDocument("smarttag-snippet.docx");
+ XWPFParagraph p = doc.getParagraphArray(0);
+ assertTrue(p.getText().contains("Carnegie Mellon University School of Computer Science"));
+ p = doc.getParagraphArray(2);
+ assertTrue(p.getText().contains("Alice's Adventures"));
+ }
+}
diff --git a/test-data/document/smarttag-snippet.docx b/test-data/document/smarttag-snippet.docx Binary files differnew file mode 100644 index 0000000000..2c54f18d03 --- /dev/null +++ b/test-data/document/smarttag-snippet.docx |