]> source.dussan.org Git - poi.git/commitdiff
Patch from Fabian from bug #52285 - support Smart Tags in XWPF paragraphs, with test...
authorNick Burch <nick@apache.org>
Tue, 6 Dec 2011 04:31:04 +0000 (04:31 +0000)
committerNick Burch <nick@apache.org>
Tue, 6 Dec 2011 04:31:04 +0000 (04:31 +0000)
git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@1210774 13f79535-47bb-0310-9956-ffa450edef68

src/documentation/content/xdocs/status.xml
src/ooxml/java/org/apache/poi/xwpf/usermodel/XWPFParagraph.java
src/ooxml/testcases/org/apache/poi/xwpf/usermodel/TestXWPFSmartTag.java [new file with mode: 0644]
test-data/document/smarttag-snippet.docx [new file with mode: 0644]

index 6db3227e446ef84fcc9619fa1d1e3a412fe82c28..12a3c9a4e7453afa190d42747658010ce631b1d4 100644 (file)
@@ -34,6 +34,7 @@
 
     <changes>
         <release version="3.8-beta5" date="2011-??-??">
+           <action dev="poi-developers" type="add">52285 - Support XWPF smart tags text in Paragraphs</action>
            <action dev="poi-developers" type="fix">51875 - More XSSF new-line in formula support</action>
            <action dev="poi-developers" type="add">POIFS EntryUtils.copyNodes(POFS,POIFS) now uses FilteringDirectoryNode, so can exclude from copying nodes not just directly under the root</action>
            <action dev="poi-developers" type="add">POIFS Helper FilteringDirectoryNode, which wraps a DirectoryEntry and allows certain parts to be ignored</action>
index 5190f22840925087208f071e2035402a84591757..383fa13e68754c6056d1f0b4f653ddf6ce5312f0 100644 (file)
@@ -41,6 +41,7 @@ import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTRunTrackChange;
 import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTSdtContentRun;
 import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTSdtRun;
 import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTSimpleField;
+import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTSmartTagRun;
 import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTSpacing;
 import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTString;
 import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTText;
@@ -73,87 +74,95 @@ public class XWPFParagraph implements IBodyElement {
             throw new NullPointerException();
         }
 
+        // Build up the character runs
         runs = new ArrayList<XWPFRun>();
+        buildRunsInOrderFromXml(paragraph);
 
-       // Get all our child nodes in order, and process them
-       //  into XWPFRuns where we can
-       XmlCursor c = paragraph.newCursor();
-       c.selectPath("child::*");
-       while (c.toNextSelection()) {
-          XmlObject o = c.getObject();
-          if(o instanceof CTR) {
-             runs.add(new XWPFRun((CTR)o, this));
-          }
-          if(o instanceof CTHyperlink) {
-             CTHyperlink link = (CTHyperlink)o;
-             for(CTR r : link.getRList()) {
-                runs.add(new XWPFHyperlinkRun(link, r, this));
-             }
-          }
-          if(o instanceof CTSdtRun) {
-             CTSdtContentRun run = ((CTSdtRun)o).getSdtContent();
-             for(CTR r : run.getRList()) {
-                runs.add(new XWPFRun(r, this));
-             }
-          }
-          if(o instanceof CTRunTrackChange) {
-             for(CTR r : ((CTRunTrackChange)o).getRList()) {
-                runs.add(new XWPFRun(r, this));
-             }
-          }
-          if(o instanceof CTSimpleField) {
-             for(CTR r : ((CTSimpleField)o).getRList()) {
-                runs.add(new XWPFRun(r, this));
-             }
-          }
-       }
+        // Look for bits associated with the runs
+        for(XWPFRun run : runs) {
+            CTR r = run.getCTR();
 
-       c.dispose();
-       
-       // Look for bits associated with the runs
-       for(XWPFRun run : runs) {
-          CTR r = run.getCTR();
-          
-          // Check for bits that only apply when
-          //  attached to a core document
-          // TODO Make this nicer by tracking the XWPFFootnotes directly
-          if(document != null) {
-             c = r.newCursor();
-             c.selectPath("child::*");
-             while (c.toNextSelection()) {
+            // Check for bits that only apply when attached to a core document
+            // TODO Make this nicer by tracking the XWPFFootnotes directly
+            XmlCursor c = r.newCursor();
+            c.selectPath("child::*");
+            while (c.toNextSelection()) {
                 XmlObject o = c.getObject();
                 if(o instanceof CTFtnEdnRef) {
-                   CTFtnEdnRef ftn = (CTFtnEdnRef)o;
-                   footnoteText.append("[").append(ftn.getId()).append(": ");
-                   XWPFFootnote footnote =
-                      ftn.getDomNode().getLocalName().equals("footnoteReference") ?
+                    CTFtnEdnRef ftn = (CTFtnEdnRef)o;
+                    footnoteText.append("[").append(ftn.getId()).append(": ");
+                    XWPFFootnote footnote =
+                        ftn.getDomNode().getLocalName().equals("footnoteReference") ?
                             document.getFootnoteByID(ftn.getId().intValue()) :
                             document.getEndnoteByID(ftn.getId().intValue());
-   
-                   boolean first = true;
-                   for (XWPFParagraph p : footnote.getParagraphs()) {
-                      if (!first) {
-                         footnoteText.append("\n");
-                         first = false;
-                      }
-                      footnoteText.append(p.getText());
-                   }
-   
-                   footnoteText.append("]");
+
+                    boolean first = true;
+                    for (XWPFParagraph p : footnote.getParagraphs()) {
+                        if (!first) {
+                            footnoteText.append("\n");
+                            first = false;
+                        }
+                        footnoteText.append(p.getText());
+                    }
+
+                    footnoteText.append("]");
                 }
-             }
-             c.dispose();
-          }
-      }
+            }
+            c.dispose();
+        }
     }
 
+    /**
+     * Identifies (in order) the parts of the paragraph /
+     *  sub-paragraph that correspond to character text
+     *  runs, and builds the appropriate runs for these.
+     */
+    private void buildRunsInOrderFromXml(XmlObject object) {
+        XmlCursor c = object.newCursor();
+        c.selectPath("child::*");
+        while (c.toNextSelection()) {
+            XmlObject o = c.getObject();
+            if (o instanceof CTR) {
+                runs.add(new XWPFRun((CTR) o, this));
+            }
+            if (o instanceof CTHyperlink) {
+                CTHyperlink link = (CTHyperlink) o;
+                for (CTR r : link.getRList()) {
+                    runs.add(new XWPFHyperlinkRun(link, r, this));
+                }
+            }
+            if (o instanceof CTSdtRun) {
+                CTSdtContentRun run = ((CTSdtRun) o).getSdtContent();
+                for (CTR r : run.getRList()) {
+                    runs.add(new XWPFRun(r, this));
+                }
+            }
+            if (o instanceof CTRunTrackChange) {
+                for (CTR r : ((CTRunTrackChange) o).getRList()) {
+                    runs.add(new XWPFRun(r, this));
+                }
+            }
+            if (o instanceof CTSimpleField) {
+                for (CTR r : ((CTSimpleField) o).getRList()) {
+                    runs.add(new XWPFRun(r, this));
+                }
+            }
+            if (o instanceof CTSmartTagRun) {
+                // Smart Tags can be nested many times. 
+                // This implementation does not preserve the tagging information
+                buildRunsInOrderFromXml(o);
+            }
+        }
+        c.dispose();
+    }
+    
     @Internal
     public CTP getCTP() {
         return paragraph;
     }
 
     public List<XWPFRun> getRuns(){
-       return Collections.unmodifiableList(runs);
+        return Collections.unmodifiableList(runs);
     }
 
     public boolean isEmpty(){
@@ -176,35 +185,35 @@ public class XWPFParagraph implements IBodyElement {
         out.append(footnoteText);
         return out.toString();
     }
-       
-       /**
-        * Return styleID of the paragraph if style exist for this paragraph
-        * if not, null will be returned     
-        * @return              styleID as String
-        */
+    
+    /**
+     * Return styleID of the paragraph if style exist for this paragraph
+     * if not, null will be returned     
+     * @return        styleID as String
+     */
     public String getStyleID(){
-               if (paragraph.getPPr() != null){
-                       if(paragraph.getPPr().getPStyle()!= null){
-                               if (paragraph.getPPr().getPStyle().getVal()!= null)
-                                       return paragraph.getPPr().getPStyle().getVal();
-                       }
-               }
-               return null;
-    }          
+           if (paragraph.getPPr() != null){
+               if(paragraph.getPPr().getPStyle()!= null){
+                   if (paragraph.getPPr().getPStyle().getVal()!= null)
+                       return paragraph.getPPr().getPStyle().getVal();
+               }
+           }
+           return null;
+    }        
     /**
      * If style exist for this paragraph
      * NumId of the paragraph will be returned.
-        * If style not exist null will be returned     
-     * @return NumID as BigInteger
+     * If style not exist null will be returned     
+     * @return    NumID as BigInteger
      */
     public BigInteger getNumID(){
-       if(paragraph.getPPr()!=null){
-               if(paragraph.getPPr().getNumPr()!=null){
-                       if(paragraph.getPPr().getNumPr().getNumId()!=null)
-                               return paragraph.getPPr().getNumPr().getNumId().getVal();
-               }
-       }
-       return null;
+        if(paragraph.getPPr()!=null){
+            if(paragraph.getPPr().getNumPr()!=null){
+                if(paragraph.getPPr().getNumPr().getNumId()!=null)
+                    return paragraph.getPPr().getNumPr().getNumId().getVal();
+            }
+        }
+        return null;
     }
     
     /**
@@ -212,14 +221,14 @@ public class XWPFParagraph implements IBodyElement {
      * @param numPos
      */
     public void setNumID(BigInteger numPos) {
-       if(paragraph.getPPr()==null)
-               paragraph.addNewPPr();
-       if(paragraph.getPPr().getNumPr()==null)
-               paragraph.getPPr().addNewNumPr();
-       if(paragraph.getPPr().getNumPr().getNumId()==null){
-               paragraph.getPPr().getNumPr().addNewNumId();
-       }
-       paragraph.getPPr().getNumPr().getNumId().setVal(numPos);
+        if(paragraph.getPPr()==null)
+            paragraph.addNewPPr();
+        if(paragraph.getPPr().getNumPr()==null)
+            paragraph.getPPr().addNewNumPr();
+        if(paragraph.getPPr().getNumPr().getNumId()==null){
+            paragraph.getPPr().getNumPr().addNewNumId();
+        }
+        paragraph.getPPr().getNumPr().getNumId().setVal(numPos);
     }
 
     /**
@@ -1027,18 +1036,18 @@ public class XWPFParagraph implements IBodyElement {
      * @param newStyle
      */
     public void setStyle(String newStyle) {
-       CTPPr pr = getCTPPr();
-       CTString style = pr.getPStyle() != null ? pr.getPStyle() : pr.addNewPStyle();
-       style.setVal(newStyle);
+        CTPPr pr = getCTPPr();
+        CTString style = pr.getPStyle() != null ? pr.getPStyle() : pr.addNewPStyle();
+        style.setVal(newStyle);
     }
     
     /**
      * @return  the style of the paragraph
      */
     public String getStyle() {
-       CTPPr pr = getCTPPr();
-       CTString style = pr.isSetPStyle() ? pr.getPStyle() : null;
-       return style != null ? style.getVal() : null;
+        CTPPr pr = getCTPPr();
+        CTString style = pr.isSetPStyle() ? pr.getPStyle() : null;
+        return style != null ? style.getVal() : null;
     }
 
     /**
@@ -1094,10 +1103,10 @@ public class XWPFParagraph implements IBodyElement {
      * @param run
      */
     protected void addRun(CTR run){
-       int pos;
-       pos = paragraph.getRList().size();
-       paragraph.addNewR();
-       paragraph.setRArray(pos, run);
+        int pos;
+        pos = paragraph.getRList().size();
+        paragraph.addNewR();
+        paragraph.setRArray(pos, run);
     }
     
     /**
@@ -1108,65 +1117,65 @@ public class XWPFParagraph implements IBodyElement {
      * @param startPos
      */
     public TextSegement searchText(String searched,PositionInParagraph startPos){
-       
-       int startRun = startPos.getRun(), 
-               startText = startPos.getText(),
-               startChar = startPos.getChar();
-       int beginRunPos = 0, candCharPos = 0;
-       boolean newList = false;
-       for (int runPos=startRun; runPos<paragraph.getRList().size(); runPos++) {
-               int beginTextPos = 0,beginCharPos = 0, textPos = 0,  charPos = 0;       
-               CTR ctRun = paragraph.getRArray(runPos);
-               XmlCursor c = ctRun.newCursor();
-               c.selectPath("./*");
-               while(c.toNextSelection()){
-                       XmlObject o = c.getObject();
-                       if(o instanceof CTText){
-                               if(textPos>=startText){
-                                       String candidate = ((CTText)o).getStringValue();
-                                       if(runPos==startRun)
-                                               charPos= startChar;
-                                       else
-                                               charPos = 0;    
-                                       for(; charPos<candidate.length(); charPos++){
-                                               if((candidate.charAt(charPos)==searched.charAt(0))&&(candCharPos==0)){
-                                                       beginTextPos = textPos;
-                                                       beginCharPos = charPos;
-                                                       beginRunPos = runPos;
-                                                       newList = true;
-                                               }
-                                               if(candidate.charAt(charPos)==searched.charAt(candCharPos)){
-                                                       if(candCharPos+1<searched.length())
-                                                               candCharPos++;
-                                                       else if(newList){
-                                                               TextSegement segement = new TextSegement();
-                                                               segement.setBeginRun(beginRunPos);
-                                                               segement.setBeginText(beginTextPos);
-                                                               segement.setBeginChar(beginCharPos);
-                                                               segement.setEndRun(runPos);
-                                                               segement.setEndText(textPos);
-                                                               segement.setEndChar(charPos);
-                                                               return segement;
-                                                       }
-                                               }
-                                               else
-                                                       candCharPos=0;
-                                       }
-                               }
-                               textPos++;
-                       }
-                       else if(o instanceof CTProofErr){
-                               c.removeXml();
-                       }
-                       else if(o instanceof CTRPr);
-                               //do nothing
-                       else
-                               candCharPos=0;
-               }
+        
+        int startRun = startPos.getRun(), 
+            startText = startPos.getText(),
+            startChar = startPos.getChar();
+        int beginRunPos = 0, candCharPos = 0;
+        boolean newList = false;
+        for (int runPos=startRun; runPos<paragraph.getRList().size(); runPos++) {
+            int beginTextPos = 0,beginCharPos = 0, textPos = 0,  charPos = 0;    
+            CTR ctRun = paragraph.getRArray(runPos);
+            XmlCursor c = ctRun.newCursor();
+            c.selectPath("./*");
+            while(c.toNextSelection()){
+                XmlObject o = c.getObject();
+                if(o instanceof CTText){
+                    if(textPos>=startText){
+                        String candidate = ((CTText)o).getStringValue();
+                        if(runPos==startRun)
+                            charPos= startChar;
+                        else
+                            charPos = 0;    
+                        for(; charPos<candidate.length(); charPos++){
+                            if((candidate.charAt(charPos)==searched.charAt(0))&&(candCharPos==0)){
+                                beginTextPos = textPos;
+                                beginCharPos = charPos;
+                                beginRunPos = runPos;
+                                newList = true;
+                            }
+                            if(candidate.charAt(charPos)==searched.charAt(candCharPos)){
+                                if(candCharPos+1<searched.length())
+                                    candCharPos++;
+                                else if(newList){
+                                    TextSegement segement = new TextSegement();
+                                    segement.setBeginRun(beginRunPos);
+                                    segement.setBeginText(beginTextPos);
+                                    segement.setBeginChar(beginCharPos);
+                                    segement.setEndRun(runPos);
+                                    segement.setEndText(textPos);
+                                    segement.setEndChar(charPos);
+                                    return segement;
+                                }
+                            }
+                            else
+                                candCharPos=0;
+                        }
+                    }
+                    textPos++;
+                }
+                else if(o instanceof CTProofErr){
+                    c.removeXml();
+                }
+                else if(o instanceof CTRPr);
+                    //do nothing
+                else
+                    candCharPos=0;
+            }
 
             c.dispose();
-       }
-       return null;
+        }
+        return null;
     }
     
     /**
@@ -1175,13 +1184,13 @@ public class XWPFParagraph implements IBodyElement {
      * @return  the inserted run
      */
     public XWPFRun insertNewRun(int pos){
-        if (pos >= 0 && pos <= paragraph.sizeOfRArray()) {
-               CTR ctRun = paragraph.insertNewR(pos);
-               XWPFRun newRun = new XWPFRun(ctRun, this);
-               runs.add(pos, newRun);
-               return newRun;
-        }
-        return null;
+         if (pos >= 0 && pos <= paragraph.sizeOfRArray()) {
+            CTR ctRun = paragraph.insertNewR(pos);
+            XWPFRun newRun = new XWPFRun(ctRun, this);
+            runs.add(pos, newRun);
+            return newRun;
+         }
+         return null;
     }
     
     
@@ -1196,27 +1205,27 @@ public class XWPFParagraph implements IBodyElement {
     int charBegin = segment.getBeginChar(); 
     int runEnd = segment.getEndRun();
     int textEnd = segment.getEndText();
-    int charEnd        = segment.getEndChar();
+    int charEnd    = segment.getEndChar();
     StringBuffer out = new StringBuffer();
-       for(int i=runBegin; i<=runEnd;i++){
-               int startText=0, endText = paragraph.getRArray(i).getTList().size()-1;
-               if(i==runBegin)
-                       startText=textBegin;
-               if(i==runEnd)
-                       endText = textEnd;
-               for(int j=startText;j<=endText;j++){
-                       String tmpText = paragraph.getRArray(i).getTArray(j).getStringValue();
-                       int startChar=0, endChar = tmpText.length()-1;
-                       if((j==textBegin)&&(i==runBegin))
-                               startChar=charBegin;
-                       if((j==textEnd)&&(i==runEnd)){
-                               endChar = charEnd;
-                       }
-                               out.append(tmpText.substring(startChar, endChar+1));
-               
-               }
-       }
-       return out.toString();
+        for(int i=runBegin; i<=runEnd;i++){
+            int startText=0, endText = paragraph.getRArray(i).getTList().size()-1;
+            if(i==runBegin)
+                startText=textBegin;
+            if(i==runEnd)
+                endText = textEnd;
+            for(int j=startText;j<=endText;j++){
+                String tmpText = paragraph.getRArray(i).getTArray(j).getStringValue();
+                int startChar=0, endChar = tmpText.length()-1;
+                if((j==textBegin)&&(i==runBegin))
+                    startChar=charBegin;
+                if((j==textEnd)&&(i==runEnd)){
+                    endChar = charEnd;
+                }
+                   out.append(tmpText.substring(startChar, endChar+1));
+        
+            }
+        }
+        return out.toString();
     }
 
     /**
@@ -1225,12 +1234,12 @@ public class XWPFParagraph implements IBodyElement {
      * @return true if the run was removed
      */
     public boolean removeRun(int pos){
-        if (pos >= 0 && pos < paragraph.sizeOfRArray()){
-                getCTP().removeR(pos);
-                runs.remove(pos);
-                return true;
-        }
-        return false;
+         if (pos >= 0 && pos < paragraph.sizeOfRArray()){
+             getCTP().removeR(pos);
+             runs.remove(pos);
+             return true;
+         }
+         return false;
     }
 
     /**
diff --git a/src/ooxml/testcases/org/apache/poi/xwpf/usermodel/TestXWPFSmartTag.java b/src/ooxml/testcases/org/apache/poi/xwpf/usermodel/TestXWPFSmartTag.java
new file mode 100644 (file)
index 0000000..f12e168
--- /dev/null
@@ -0,0 +1,39 @@
+/* ====================================================================\r
+   Licensed to the Apache Software Foundation (ASF) under one or more\r
+   contributor license agreements.  See the NOTICE file distributed with\r
+   this work for additional information regarding copyright ownership.\r
+   The ASF licenses this file to You under the Apache License, Version 2.0\r
+   (the "License"); you may not use this file except in compliance with\r
+   the License.  You may obtain a copy of the License at\r
+\r
+       http://www.apache.org/licenses/LICENSE-2.0\r
+\r
+   Unless required by applicable law or agreed to in writing, software\r
+   distributed under the License is distributed on an "AS IS" BASIS,\r
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\r
+   See the License for the specific language governing permissions and\r
+   limitations under the License.\r
+==================================================================== */\r
+package org.apache.poi.xwpf.usermodel;\r
+\r
+import java.io.IOException;\r
+\r
+import junit.framework.TestCase;\r
+\r
+import org.apache.poi.xwpf.XWPFTestDataSamples;\r
+\r
+/**\r
+ * Tests for reading SmartTags from Word docx.\r
+ *\r
+ * @author  Fabian Lange\r
+ */\r
+public final class TestXWPFSmartTag extends TestCase {\r
+\r
+    public void testSmartTags() throws IOException {\r
+        XWPFDocument doc = XWPFTestDataSamples.openSampleDocument("smarttag-snippet.docx");\r
+        XWPFParagraph p = doc.getParagraphArray(0);\r
+        assertTrue(p.getText().contains("Carnegie Mellon University School of Computer Science"));\r
+        p = doc.getParagraphArray(2);\r
+        assertTrue(p.getText().contains("Alice's Adventures"));\r
+    }\r
+}\r
diff --git a/test-data/document/smarttag-snippet.docx b/test-data/document/smarttag-snippet.docx
new file mode 100644 (file)
index 0000000..2c54f18
Binary files /dev/null and b/test-data/document/smarttag-snippet.docx differ