From: Nick Burch Date: Tue, 6 Dec 2011 04:31:04 +0000 (+0000) Subject: Patch from Fabian from bug #52285 - support Smart Tags in XWPF paragraphs, with test... X-Git-Tag: REL_3_8_BETA5~9 X-Git-Url: https://source.dussan.org/?a=commitdiff_plain;h=6ea60a0623f99bfe3e836ac2a993ec43a83aa82a;p=poi.git Patch from Fabian from bug #52285 - support Smart Tags in XWPF paragraphs, with test (and fixing indents) git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@1210774 13f79535-47bb-0310-9956-ffa450edef68 --- diff --git a/src/documentation/content/xdocs/status.xml b/src/documentation/content/xdocs/status.xml index 6db3227e44..12a3c9a4e7 100644 --- a/src/documentation/content/xdocs/status.xml +++ b/src/documentation/content/xdocs/status.xml @@ -34,6 +34,7 @@ + 52285 - Support XWPF smart tags text in Paragraphs 51875 - More XSSF new-line in formula support POIFS EntryUtils.copyNodes(POFS,POIFS) now uses FilteringDirectoryNode, so can exclude from copying nodes not just directly under the root POIFS Helper FilteringDirectoryNode, which wraps a DirectoryEntry and allows certain parts to be ignored diff --git a/src/ooxml/java/org/apache/poi/xwpf/usermodel/XWPFParagraph.java b/src/ooxml/java/org/apache/poi/xwpf/usermodel/XWPFParagraph.java index 5190f22840..383fa13e68 100644 --- a/src/ooxml/java/org/apache/poi/xwpf/usermodel/XWPFParagraph.java +++ b/src/ooxml/java/org/apache/poi/xwpf/usermodel/XWPFParagraph.java @@ -41,6 +41,7 @@ import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTRunTrackChange; import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTSdtContentRun; import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTSdtRun; import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTSimpleField; +import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTSmartTagRun; import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTSpacing; import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTString; import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTText; @@ -73,87 +74,95 @@ public class XWPFParagraph implements IBodyElement { throw new NullPointerException(); } + // Build up the character runs runs = new ArrayList(); + buildRunsInOrderFromXml(paragraph); - // Get all our child nodes in order, and process them - // into XWPFRuns where we can - XmlCursor c = paragraph.newCursor(); - c.selectPath("child::*"); - while (c.toNextSelection()) { - XmlObject o = c.getObject(); - if(o instanceof CTR) { - runs.add(new XWPFRun((CTR)o, this)); - } - if(o instanceof CTHyperlink) { - CTHyperlink link = (CTHyperlink)o; - for(CTR r : link.getRList()) { - runs.add(new XWPFHyperlinkRun(link, r, this)); - } - } - if(o instanceof CTSdtRun) { - CTSdtContentRun run = ((CTSdtRun)o).getSdtContent(); - for(CTR r : run.getRList()) { - runs.add(new XWPFRun(r, this)); - } - } - if(o instanceof CTRunTrackChange) { - for(CTR r : ((CTRunTrackChange)o).getRList()) { - runs.add(new XWPFRun(r, this)); - } - } - if(o instanceof CTSimpleField) { - for(CTR r : ((CTSimpleField)o).getRList()) { - runs.add(new XWPFRun(r, this)); - } - } - } + // Look for bits associated with the runs + for(XWPFRun run : runs) { + CTR r = run.getCTR(); - c.dispose(); - - // Look for bits associated with the runs - for(XWPFRun run : runs) { - CTR r = run.getCTR(); - - // Check for bits that only apply when - // attached to a core document - // TODO Make this nicer by tracking the XWPFFootnotes directly - if(document != null) { - c = r.newCursor(); - c.selectPath("child::*"); - while (c.toNextSelection()) { + // Check for bits that only apply when attached to a core document + // TODO Make this nicer by tracking the XWPFFootnotes directly + XmlCursor c = r.newCursor(); + c.selectPath("child::*"); + while (c.toNextSelection()) { XmlObject o = c.getObject(); if(o instanceof CTFtnEdnRef) { - CTFtnEdnRef ftn = (CTFtnEdnRef)o; - footnoteText.append("[").append(ftn.getId()).append(": "); - XWPFFootnote footnote = - ftn.getDomNode().getLocalName().equals("footnoteReference") ? + CTFtnEdnRef ftn = (CTFtnEdnRef)o; + footnoteText.append("[").append(ftn.getId()).append(": "); + XWPFFootnote footnote = + ftn.getDomNode().getLocalName().equals("footnoteReference") ? document.getFootnoteByID(ftn.getId().intValue()) : document.getEndnoteByID(ftn.getId().intValue()); - - boolean first = true; - for (XWPFParagraph p : footnote.getParagraphs()) { - if (!first) { - footnoteText.append("\n"); - first = false; - } - footnoteText.append(p.getText()); - } - - footnoteText.append("]"); + + boolean first = true; + for (XWPFParagraph p : footnote.getParagraphs()) { + if (!first) { + footnoteText.append("\n"); + first = false; + } + footnoteText.append(p.getText()); + } + + footnoteText.append("]"); } - } - c.dispose(); - } - } + } + c.dispose(); + } } + /** + * Identifies (in order) the parts of the paragraph / + * sub-paragraph that correspond to character text + * runs, and builds the appropriate runs for these. + */ + private void buildRunsInOrderFromXml(XmlObject object) { + XmlCursor c = object.newCursor(); + c.selectPath("child::*"); + while (c.toNextSelection()) { + XmlObject o = c.getObject(); + if (o instanceof CTR) { + runs.add(new XWPFRun((CTR) o, this)); + } + if (o instanceof CTHyperlink) { + CTHyperlink link = (CTHyperlink) o; + for (CTR r : link.getRList()) { + runs.add(new XWPFHyperlinkRun(link, r, this)); + } + } + if (o instanceof CTSdtRun) { + CTSdtContentRun run = ((CTSdtRun) o).getSdtContent(); + for (CTR r : run.getRList()) { + runs.add(new XWPFRun(r, this)); + } + } + if (o instanceof CTRunTrackChange) { + for (CTR r : ((CTRunTrackChange) o).getRList()) { + runs.add(new XWPFRun(r, this)); + } + } + if (o instanceof CTSimpleField) { + for (CTR r : ((CTSimpleField) o).getRList()) { + runs.add(new XWPFRun(r, this)); + } + } + if (o instanceof CTSmartTagRun) { + // Smart Tags can be nested many times. + // This implementation does not preserve the tagging information + buildRunsInOrderFromXml(o); + } + } + c.dispose(); + } + @Internal public CTP getCTP() { return paragraph; } public List getRuns(){ - return Collections.unmodifiableList(runs); + return Collections.unmodifiableList(runs); } public boolean isEmpty(){ @@ -176,35 +185,35 @@ public class XWPFParagraph implements IBodyElement { out.append(footnoteText); return out.toString(); } - - /** - * Return styleID of the paragraph if style exist for this paragraph - * if not, null will be returned - * @return styleID as String - */ + + /** + * Return styleID of the paragraph if style exist for this paragraph + * if not, null will be returned + * @return styleID as String + */ public String getStyleID(){ - if (paragraph.getPPr() != null){ - if(paragraph.getPPr().getPStyle()!= null){ - if (paragraph.getPPr().getPStyle().getVal()!= null) - return paragraph.getPPr().getPStyle().getVal(); - } - } - return null; - } + if (paragraph.getPPr() != null){ + if(paragraph.getPPr().getPStyle()!= null){ + if (paragraph.getPPr().getPStyle().getVal()!= null) + return paragraph.getPPr().getPStyle().getVal(); + } + } + return null; + } /** * If style exist for this paragraph * NumId of the paragraph will be returned. - * If style not exist null will be returned - * @return NumID as BigInteger + * If style not exist null will be returned + * @return NumID as BigInteger */ public BigInteger getNumID(){ - if(paragraph.getPPr()!=null){ - if(paragraph.getPPr().getNumPr()!=null){ - if(paragraph.getPPr().getNumPr().getNumId()!=null) - return paragraph.getPPr().getNumPr().getNumId().getVal(); - } - } - return null; + if(paragraph.getPPr()!=null){ + if(paragraph.getPPr().getNumPr()!=null){ + if(paragraph.getPPr().getNumPr().getNumId()!=null) + return paragraph.getPPr().getNumPr().getNumId().getVal(); + } + } + return null; } /** @@ -212,14 +221,14 @@ public class XWPFParagraph implements IBodyElement { * @param numPos */ public void setNumID(BigInteger numPos) { - if(paragraph.getPPr()==null) - paragraph.addNewPPr(); - if(paragraph.getPPr().getNumPr()==null) - paragraph.getPPr().addNewNumPr(); - if(paragraph.getPPr().getNumPr().getNumId()==null){ - paragraph.getPPr().getNumPr().addNewNumId(); - } - paragraph.getPPr().getNumPr().getNumId().setVal(numPos); + if(paragraph.getPPr()==null) + paragraph.addNewPPr(); + if(paragraph.getPPr().getNumPr()==null) + paragraph.getPPr().addNewNumPr(); + if(paragraph.getPPr().getNumPr().getNumId()==null){ + paragraph.getPPr().getNumPr().addNewNumId(); + } + paragraph.getPPr().getNumPr().getNumId().setVal(numPos); } /** @@ -1027,18 +1036,18 @@ public class XWPFParagraph implements IBodyElement { * @param newStyle */ public void setStyle(String newStyle) { - CTPPr pr = getCTPPr(); - CTString style = pr.getPStyle() != null ? pr.getPStyle() : pr.addNewPStyle(); - style.setVal(newStyle); + CTPPr pr = getCTPPr(); + CTString style = pr.getPStyle() != null ? pr.getPStyle() : pr.addNewPStyle(); + style.setVal(newStyle); } /** * @return the style of the paragraph */ public String getStyle() { - CTPPr pr = getCTPPr(); - CTString style = pr.isSetPStyle() ? pr.getPStyle() : null; - return style != null ? style.getVal() : null; + CTPPr pr = getCTPPr(); + CTString style = pr.isSetPStyle() ? pr.getPStyle() : null; + return style != null ? style.getVal() : null; } /** @@ -1094,10 +1103,10 @@ public class XWPFParagraph implements IBodyElement { * @param run */ protected void addRun(CTR run){ - int pos; - pos = paragraph.getRList().size(); - paragraph.addNewR(); - paragraph.setRArray(pos, run); + int pos; + pos = paragraph.getRList().size(); + paragraph.addNewR(); + paragraph.setRArray(pos, run); } /** @@ -1108,65 +1117,65 @@ public class XWPFParagraph implements IBodyElement { * @param startPos */ public TextSegement searchText(String searched,PositionInParagraph startPos){ - - int startRun = startPos.getRun(), - startText = startPos.getText(), - startChar = startPos.getChar(); - int beginRunPos = 0, candCharPos = 0; - boolean newList = false; - for (int runPos=startRun; runPos=startText){ - String candidate = ((CTText)o).getStringValue(); - if(runPos==startRun) - charPos= startChar; - else - charPos = 0; - for(; charPos=startText){ + String candidate = ((CTText)o).getStringValue(); + if(runPos==startRun) + charPos= startChar; + else + charPos = 0; + for(; charPos= 0 && pos <= paragraph.sizeOfRArray()) { - CTR ctRun = paragraph.insertNewR(pos); - XWPFRun newRun = new XWPFRun(ctRun, this); - runs.add(pos, newRun); - return newRun; - } - return null; + if (pos >= 0 && pos <= paragraph.sizeOfRArray()) { + CTR ctRun = paragraph.insertNewR(pos); + XWPFRun newRun = new XWPFRun(ctRun, this); + runs.add(pos, newRun); + return newRun; + } + return null; } @@ -1196,27 +1205,27 @@ public class XWPFParagraph implements IBodyElement { int charBegin = segment.getBeginChar(); int runEnd = segment.getEndRun(); int textEnd = segment.getEndText(); - int charEnd = segment.getEndChar(); + int charEnd = segment.getEndChar(); StringBuffer out = new StringBuffer(); - for(int i=runBegin; i<=runEnd;i++){ - int startText=0, endText = paragraph.getRArray(i).getTList().size()-1; - if(i==runBegin) - startText=textBegin; - if(i==runEnd) - endText = textEnd; - for(int j=startText;j<=endText;j++){ - String tmpText = paragraph.getRArray(i).getTArray(j).getStringValue(); - int startChar=0, endChar = tmpText.length()-1; - if((j==textBegin)&&(i==runBegin)) - startChar=charBegin; - if((j==textEnd)&&(i==runEnd)){ - endChar = charEnd; - } - out.append(tmpText.substring(startChar, endChar+1)); - - } - } - return out.toString(); + for(int i=runBegin; i<=runEnd;i++){ + int startText=0, endText = paragraph.getRArray(i).getTList().size()-1; + if(i==runBegin) + startText=textBegin; + if(i==runEnd) + endText = textEnd; + for(int j=startText;j<=endText;j++){ + String tmpText = paragraph.getRArray(i).getTArray(j).getStringValue(); + int startChar=0, endChar = tmpText.length()-1; + if((j==textBegin)&&(i==runBegin)) + startChar=charBegin; + if((j==textEnd)&&(i==runEnd)){ + endChar = charEnd; + } + out.append(tmpText.substring(startChar, endChar+1)); + + } + } + return out.toString(); } /** @@ -1225,12 +1234,12 @@ public class XWPFParagraph implements IBodyElement { * @return true if the run was removed */ public boolean removeRun(int pos){ - if (pos >= 0 && pos < paragraph.sizeOfRArray()){ - getCTP().removeR(pos); - runs.remove(pos); - return true; - } - return false; + if (pos >= 0 && pos < paragraph.sizeOfRArray()){ + getCTP().removeR(pos); + runs.remove(pos); + return true; + } + return false; } /** diff --git a/src/ooxml/testcases/org/apache/poi/xwpf/usermodel/TestXWPFSmartTag.java b/src/ooxml/testcases/org/apache/poi/xwpf/usermodel/TestXWPFSmartTag.java new file mode 100644 index 0000000000..f12e168057 --- /dev/null +++ b/src/ooxml/testcases/org/apache/poi/xwpf/usermodel/TestXWPFSmartTag.java @@ -0,0 +1,39 @@ +/* ==================================================================== + Licensed to the Apache Software Foundation (ASF) under one or more + contributor license agreements. See the NOTICE file distributed with + this work for additional information regarding copyright ownership. + The ASF licenses this file to You under the Apache License, Version 2.0 + (the "License"); you may not use this file except in compliance with + the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +==================================================================== */ +package org.apache.poi.xwpf.usermodel; + +import java.io.IOException; + +import junit.framework.TestCase; + +import org.apache.poi.xwpf.XWPFTestDataSamples; + +/** + * Tests for reading SmartTags from Word docx. + * + * @author Fabian Lange + */ +public final class TestXWPFSmartTag extends TestCase { + + public void testSmartTags() throws IOException { + XWPFDocument doc = XWPFTestDataSamples.openSampleDocument("smarttag-snippet.docx"); + XWPFParagraph p = doc.getParagraphArray(0); + assertTrue(p.getText().contains("Carnegie Mellon University School of Computer Science")); + p = doc.getParagraphArray(2); + assertTrue(p.getText().contains("Alice's Adventures")); + } +} diff --git a/test-data/document/smarttag-snippet.docx b/test-data/document/smarttag-snippet.docx new file mode 100644 index 0000000000..2c54f18d03 Binary files /dev/null and b/test-data/document/smarttag-snippet.docx differ