aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--src/documentation/content/xdocs/status.xml1
-rw-r--r--src/ooxml/java/org/apache/poi/xwpf/usermodel/XWPFParagraph.java401
-rw-r--r--src/ooxml/testcases/org/apache/poi/xwpf/usermodel/TestXWPFSmartTag.java39
-rw-r--r--test-data/document/smarttag-snippet.docxbin0 -> 11862 bytes
4 files changed, 245 insertions, 196 deletions
diff --git a/src/documentation/content/xdocs/status.xml b/src/documentation/content/xdocs/status.xml
index 6db3227e44..12a3c9a4e7 100644
--- a/src/documentation/content/xdocs/status.xml
+++ b/src/documentation/content/xdocs/status.xml
@@ -34,6 +34,7 @@
<changes>
<release version="3.8-beta5" date="2011-??-??">
+ <action dev="poi-developers" type="add">52285 - Support XWPF smart tags text in Paragraphs</action>
<action dev="poi-developers" type="fix">51875 - More XSSF new-line in formula support</action>
<action dev="poi-developers" type="add">POIFS EntryUtils.copyNodes(POFS,POIFS) now uses FilteringDirectoryNode, so can exclude from copying nodes not just directly under the root</action>
<action dev="poi-developers" type="add">POIFS Helper FilteringDirectoryNode, which wraps a DirectoryEntry and allows certain parts to be ignored</action>
diff --git a/src/ooxml/java/org/apache/poi/xwpf/usermodel/XWPFParagraph.java b/src/ooxml/java/org/apache/poi/xwpf/usermodel/XWPFParagraph.java
index 5190f22840..383fa13e68 100644
--- a/src/ooxml/java/org/apache/poi/xwpf/usermodel/XWPFParagraph.java
+++ b/src/ooxml/java/org/apache/poi/xwpf/usermodel/XWPFParagraph.java
@@ -41,6 +41,7 @@ import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTRunTrackChange;
import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTSdtContentRun;
import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTSdtRun;
import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTSimpleField;
+import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTSmartTagRun;
import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTSpacing;
import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTString;
import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTText;
@@ -73,87 +74,95 @@ public class XWPFParagraph implements IBodyElement {
throw new NullPointerException();
}
+ // Build up the character runs
runs = new ArrayList<XWPFRun>();
+ buildRunsInOrderFromXml(paragraph);
- // Get all our child nodes in order, and process them
- // into XWPFRuns where we can
- XmlCursor c = paragraph.newCursor();
- c.selectPath("child::*");
- while (c.toNextSelection()) {
- XmlObject o = c.getObject();
- if(o instanceof CTR) {
- runs.add(new XWPFRun((CTR)o, this));
- }
- if(o instanceof CTHyperlink) {
- CTHyperlink link = (CTHyperlink)o;
- for(CTR r : link.getRList()) {
- runs.add(new XWPFHyperlinkRun(link, r, this));
- }
- }
- if(o instanceof CTSdtRun) {
- CTSdtContentRun run = ((CTSdtRun)o).getSdtContent();
- for(CTR r : run.getRList()) {
- runs.add(new XWPFRun(r, this));
- }
- }
- if(o instanceof CTRunTrackChange) {
- for(CTR r : ((CTRunTrackChange)o).getRList()) {
- runs.add(new XWPFRun(r, this));
- }
- }
- if(o instanceof CTSimpleField) {
- for(CTR r : ((CTSimpleField)o).getRList()) {
- runs.add(new XWPFRun(r, this));
- }
- }
- }
+ // Look for bits associated with the runs
+ for(XWPFRun run : runs) {
+ CTR r = run.getCTR();
- c.dispose();
-
- // Look for bits associated with the runs
- for(XWPFRun run : runs) {
- CTR r = run.getCTR();
-
- // Check for bits that only apply when
- // attached to a core document
- // TODO Make this nicer by tracking the XWPFFootnotes directly
- if(document != null) {
- c = r.newCursor();
- c.selectPath("child::*");
- while (c.toNextSelection()) {
+ // Check for bits that only apply when attached to a core document
+ // TODO Make this nicer by tracking the XWPFFootnotes directly
+ XmlCursor c = r.newCursor();
+ c.selectPath("child::*");
+ while (c.toNextSelection()) {
XmlObject o = c.getObject();
if(o instanceof CTFtnEdnRef) {
- CTFtnEdnRef ftn = (CTFtnEdnRef)o;
- footnoteText.append("[").append(ftn.getId()).append(": ");
- XWPFFootnote footnote =
- ftn.getDomNode().getLocalName().equals("footnoteReference") ?
+ CTFtnEdnRef ftn = (CTFtnEdnRef)o;
+ footnoteText.append("[").append(ftn.getId()).append(": ");
+ XWPFFootnote footnote =
+ ftn.getDomNode().getLocalName().equals("footnoteReference") ?
document.getFootnoteByID(ftn.getId().intValue()) :
document.getEndnoteByID(ftn.getId().intValue());
-
- boolean first = true;
- for (XWPFParagraph p : footnote.getParagraphs()) {
- if (!first) {
- footnoteText.append("\n");
- first = false;
- }
- footnoteText.append(p.getText());
- }
-
- footnoteText.append("]");
+
+ boolean first = true;
+ for (XWPFParagraph p : footnote.getParagraphs()) {
+ if (!first) {
+ footnoteText.append("\n");
+ first = false;
+ }
+ footnoteText.append(p.getText());
+ }
+
+ footnoteText.append("]");
}
- }
- c.dispose();
- }
- }
+ }
+ c.dispose();
+ }
}
+ /**
+ * Identifies (in order) the parts of the paragraph /
+ * sub-paragraph that correspond to character text
+ * runs, and builds the appropriate runs for these.
+ */
+ private void buildRunsInOrderFromXml(XmlObject object) {
+ XmlCursor c = object.newCursor();
+ c.selectPath("child::*");
+ while (c.toNextSelection()) {
+ XmlObject o = c.getObject();
+ if (o instanceof CTR) {
+ runs.add(new XWPFRun((CTR) o, this));
+ }
+ if (o instanceof CTHyperlink) {
+ CTHyperlink link = (CTHyperlink) o;
+ for (CTR r : link.getRList()) {
+ runs.add(new XWPFHyperlinkRun(link, r, this));
+ }
+ }
+ if (o instanceof CTSdtRun) {
+ CTSdtContentRun run = ((CTSdtRun) o).getSdtContent();
+ for (CTR r : run.getRList()) {
+ runs.add(new XWPFRun(r, this));
+ }
+ }
+ if (o instanceof CTRunTrackChange) {
+ for (CTR r : ((CTRunTrackChange) o).getRList()) {
+ runs.add(new XWPFRun(r, this));
+ }
+ }
+ if (o instanceof CTSimpleField) {
+ for (CTR r : ((CTSimpleField) o).getRList()) {
+ runs.add(new XWPFRun(r, this));
+ }
+ }
+ if (o instanceof CTSmartTagRun) {
+ // Smart Tags can be nested many times.
+ // This implementation does not preserve the tagging information
+ buildRunsInOrderFromXml(o);
+ }
+ }
+ c.dispose();
+ }
+
@Internal
public CTP getCTP() {
return paragraph;
}
public List<XWPFRun> getRuns(){
- return Collections.unmodifiableList(runs);
+ return Collections.unmodifiableList(runs);
}
public boolean isEmpty(){
@@ -176,35 +185,35 @@ public class XWPFParagraph implements IBodyElement {
out.append(footnoteText);
return out.toString();
}
-
- /**
- * Return styleID of the paragraph if style exist for this paragraph
- * if not, null will be returned
- * @return styleID as String
- */
+
+ /**
+ * Return styleID of the paragraph if style exist for this paragraph
+ * if not, null will be returned
+ * @return styleID as String
+ */
public String getStyleID(){
- if (paragraph.getPPr() != null){
- if(paragraph.getPPr().getPStyle()!= null){
- if (paragraph.getPPr().getPStyle().getVal()!= null)
- return paragraph.getPPr().getPStyle().getVal();
- }
- }
- return null;
- }
+ if (paragraph.getPPr() != null){
+ if(paragraph.getPPr().getPStyle()!= null){
+ if (paragraph.getPPr().getPStyle().getVal()!= null)
+ return paragraph.getPPr().getPStyle().getVal();
+ }
+ }
+ return null;
+ }
/**
* If style exist for this paragraph
* NumId of the paragraph will be returned.
- * If style not exist null will be returned
- * @return NumID as BigInteger
+ * If style not exist null will be returned
+ * @return NumID as BigInteger
*/
public BigInteger getNumID(){
- if(paragraph.getPPr()!=null){
- if(paragraph.getPPr().getNumPr()!=null){
- if(paragraph.getPPr().getNumPr().getNumId()!=null)
- return paragraph.getPPr().getNumPr().getNumId().getVal();
- }
- }
- return null;
+ if(paragraph.getPPr()!=null){
+ if(paragraph.getPPr().getNumPr()!=null){
+ if(paragraph.getPPr().getNumPr().getNumId()!=null)
+ return paragraph.getPPr().getNumPr().getNumId().getVal();
+ }
+ }
+ return null;
}
/**
@@ -212,14 +221,14 @@ public class XWPFParagraph implements IBodyElement {
* @param numPos
*/
public void setNumID(BigInteger numPos) {
- if(paragraph.getPPr()==null)
- paragraph.addNewPPr();
- if(paragraph.getPPr().getNumPr()==null)
- paragraph.getPPr().addNewNumPr();
- if(paragraph.getPPr().getNumPr().getNumId()==null){
- paragraph.getPPr().getNumPr().addNewNumId();
- }
- paragraph.getPPr().getNumPr().getNumId().setVal(numPos);
+ if(paragraph.getPPr()==null)
+ paragraph.addNewPPr();
+ if(paragraph.getPPr().getNumPr()==null)
+ paragraph.getPPr().addNewNumPr();
+ if(paragraph.getPPr().getNumPr().getNumId()==null){
+ paragraph.getPPr().getNumPr().addNewNumId();
+ }
+ paragraph.getPPr().getNumPr().getNumId().setVal(numPos);
}
/**
@@ -1027,18 +1036,18 @@ public class XWPFParagraph implements IBodyElement {
* @param newStyle
*/
public void setStyle(String newStyle) {
- CTPPr pr = getCTPPr();
- CTString style = pr.getPStyle() != null ? pr.getPStyle() : pr.addNewPStyle();
- style.setVal(newStyle);
+ CTPPr pr = getCTPPr();
+ CTString style = pr.getPStyle() != null ? pr.getPStyle() : pr.addNewPStyle();
+ style.setVal(newStyle);
}
/**
* @return the style of the paragraph
*/
public String getStyle() {
- CTPPr pr = getCTPPr();
- CTString style = pr.isSetPStyle() ? pr.getPStyle() : null;
- return style != null ? style.getVal() : null;
+ CTPPr pr = getCTPPr();
+ CTString style = pr.isSetPStyle() ? pr.getPStyle() : null;
+ return style != null ? style.getVal() : null;
}
/**
@@ -1094,10 +1103,10 @@ public class XWPFParagraph implements IBodyElement {
* @param run
*/
protected void addRun(CTR run){
- int pos;
- pos = paragraph.getRList().size();
- paragraph.addNewR();
- paragraph.setRArray(pos, run);
+ int pos;
+ pos = paragraph.getRList().size();
+ paragraph.addNewR();
+ paragraph.setRArray(pos, run);
}
/**
@@ -1108,65 +1117,65 @@ public class XWPFParagraph implements IBodyElement {
* @param startPos
*/
public TextSegement searchText(String searched,PositionInParagraph startPos){
-
- int startRun = startPos.getRun(),
- startText = startPos.getText(),
- startChar = startPos.getChar();
- int beginRunPos = 0, candCharPos = 0;
- boolean newList = false;
- for (int runPos=startRun; runPos<paragraph.getRList().size(); runPos++) {
- int beginTextPos = 0,beginCharPos = 0, textPos = 0, charPos = 0;
- CTR ctRun = paragraph.getRArray(runPos);
- XmlCursor c = ctRun.newCursor();
- c.selectPath("./*");
- while(c.toNextSelection()){
- XmlObject o = c.getObject();
- if(o instanceof CTText){
- if(textPos>=startText){
- String candidate = ((CTText)o).getStringValue();
- if(runPos==startRun)
- charPos= startChar;
- else
- charPos = 0;
- for(; charPos<candidate.length(); charPos++){
- if((candidate.charAt(charPos)==searched.charAt(0))&&(candCharPos==0)){
- beginTextPos = textPos;
- beginCharPos = charPos;
- beginRunPos = runPos;
- newList = true;
- }
- if(candidate.charAt(charPos)==searched.charAt(candCharPos)){
- if(candCharPos+1<searched.length())
- candCharPos++;
- else if(newList){
- TextSegement segement = new TextSegement();
- segement.setBeginRun(beginRunPos);
- segement.setBeginText(beginTextPos);
- segement.setBeginChar(beginCharPos);
- segement.setEndRun(runPos);
- segement.setEndText(textPos);
- segement.setEndChar(charPos);
- return segement;
- }
- }
- else
- candCharPos=0;
- }
- }
- textPos++;
- }
- else if(o instanceof CTProofErr){
- c.removeXml();
- }
- else if(o instanceof CTRPr);
- //do nothing
- else
- candCharPos=0;
- }
+
+ int startRun = startPos.getRun(),
+ startText = startPos.getText(),
+ startChar = startPos.getChar();
+ int beginRunPos = 0, candCharPos = 0;
+ boolean newList = false;
+ for (int runPos=startRun; runPos<paragraph.getRList().size(); runPos++) {
+ int beginTextPos = 0,beginCharPos = 0, textPos = 0, charPos = 0;
+ CTR ctRun = paragraph.getRArray(runPos);
+ XmlCursor c = ctRun.newCursor();
+ c.selectPath("./*");
+ while(c.toNextSelection()){
+ XmlObject o = c.getObject();
+ if(o instanceof CTText){
+ if(textPos>=startText){
+ String candidate = ((CTText)o).getStringValue();
+ if(runPos==startRun)
+ charPos= startChar;
+ else
+ charPos = 0;
+ for(; charPos<candidate.length(); charPos++){
+ if((candidate.charAt(charPos)==searched.charAt(0))&&(candCharPos==0)){
+ beginTextPos = textPos;
+ beginCharPos = charPos;
+ beginRunPos = runPos;
+ newList = true;
+ }
+ if(candidate.charAt(charPos)==searched.charAt(candCharPos)){
+ if(candCharPos+1<searched.length())
+ candCharPos++;
+ else if(newList){
+ TextSegement segement = new TextSegement();
+ segement.setBeginRun(beginRunPos);
+ segement.setBeginText(beginTextPos);
+ segement.setBeginChar(beginCharPos);
+ segement.setEndRun(runPos);
+ segement.setEndText(textPos);
+ segement.setEndChar(charPos);
+ return segement;
+ }
+ }
+ else
+ candCharPos=0;
+ }
+ }
+ textPos++;
+ }
+ else if(o instanceof CTProofErr){
+ c.removeXml();
+ }
+ else if(o instanceof CTRPr);
+ //do nothing
+ else
+ candCharPos=0;
+ }
c.dispose();
- }
- return null;
+ }
+ return null;
}
/**
@@ -1175,13 +1184,13 @@ public class XWPFParagraph implements IBodyElement {
* @return the inserted run
*/
public XWPFRun insertNewRun(int pos){
- if (pos >= 0 && pos <= paragraph.sizeOfRArray()) {
- CTR ctRun = paragraph.insertNewR(pos);
- XWPFRun newRun = new XWPFRun(ctRun, this);
- runs.add(pos, newRun);
- return newRun;
- }
- return null;
+ if (pos >= 0 && pos <= paragraph.sizeOfRArray()) {
+ CTR ctRun = paragraph.insertNewR(pos);
+ XWPFRun newRun = new XWPFRun(ctRun, this);
+ runs.add(pos, newRun);
+ return newRun;
+ }
+ return null;
}
@@ -1196,27 +1205,27 @@ public class XWPFParagraph implements IBodyElement {
int charBegin = segment.getBeginChar();
int runEnd = segment.getEndRun();
int textEnd = segment.getEndText();
- int charEnd = segment.getEndChar();
+ int charEnd = segment.getEndChar();
StringBuffer out = new StringBuffer();
- for(int i=runBegin; i<=runEnd;i++){
- int startText=0, endText = paragraph.getRArray(i).getTList().size()-1;
- if(i==runBegin)
- startText=textBegin;
- if(i==runEnd)
- endText = textEnd;
- for(int j=startText;j<=endText;j++){
- String tmpText = paragraph.getRArray(i).getTArray(j).getStringValue();
- int startChar=0, endChar = tmpText.length()-1;
- if((j==textBegin)&&(i==runBegin))
- startChar=charBegin;
- if((j==textEnd)&&(i==runEnd)){
- endChar = charEnd;
- }
- out.append(tmpText.substring(startChar, endChar+1));
-
- }
- }
- return out.toString();
+ for(int i=runBegin; i<=runEnd;i++){
+ int startText=0, endText = paragraph.getRArray(i).getTList().size()-1;
+ if(i==runBegin)
+ startText=textBegin;
+ if(i==runEnd)
+ endText = textEnd;
+ for(int j=startText;j<=endText;j++){
+ String tmpText = paragraph.getRArray(i).getTArray(j).getStringValue();
+ int startChar=0, endChar = tmpText.length()-1;
+ if((j==textBegin)&&(i==runBegin))
+ startChar=charBegin;
+ if((j==textEnd)&&(i==runEnd)){
+ endChar = charEnd;
+ }
+ out.append(tmpText.substring(startChar, endChar+1));
+
+ }
+ }
+ return out.toString();
}
/**
@@ -1225,12 +1234,12 @@ public class XWPFParagraph implements IBodyElement {
* @return true if the run was removed
*/
public boolean removeRun(int pos){
- if (pos >= 0 && pos < paragraph.sizeOfRArray()){
- getCTP().removeR(pos);
- runs.remove(pos);
- return true;
- }
- return false;
+ if (pos >= 0 && pos < paragraph.sizeOfRArray()){
+ getCTP().removeR(pos);
+ runs.remove(pos);
+ return true;
+ }
+ return false;
}
/**
diff --git a/src/ooxml/testcases/org/apache/poi/xwpf/usermodel/TestXWPFSmartTag.java b/src/ooxml/testcases/org/apache/poi/xwpf/usermodel/TestXWPFSmartTag.java
new file mode 100644
index 0000000000..f12e168057
--- /dev/null
+++ b/src/ooxml/testcases/org/apache/poi/xwpf/usermodel/TestXWPFSmartTag.java
@@ -0,0 +1,39 @@
+/* ====================================================================
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements. See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+==================================================================== */
+package org.apache.poi.xwpf.usermodel;
+
+import java.io.IOException;
+
+import junit.framework.TestCase;
+
+import org.apache.poi.xwpf.XWPFTestDataSamples;
+
+/**
+ * Tests for reading SmartTags from Word docx.
+ *
+ * @author Fabian Lange
+ */
+public final class TestXWPFSmartTag extends TestCase {
+
+ public void testSmartTags() throws IOException {
+ XWPFDocument doc = XWPFTestDataSamples.openSampleDocument("smarttag-snippet.docx");
+ XWPFParagraph p = doc.getParagraphArray(0);
+ assertTrue(p.getText().contains("Carnegie Mellon University School of Computer Science"));
+ p = doc.getParagraphArray(2);
+ assertTrue(p.getText().contains("Alice's Adventures"));
+ }
+}
diff --git a/test-data/document/smarttag-snippet.docx b/test-data/document/smarttag-snippet.docx
new file mode 100644
index 0000000000..2c54f18d03
--- /dev/null
+++ b/test-data/document/smarttag-snippet.docx
Binary files differ