]> source.dussan.org Git - poi.git/commitdiff
XWPF paragraph improvements - Make XWPFParagraph make more use of XWPFRun, and less...
authorNick Burch <nick@apache.org>
Tue, 14 Sep 2010 13:46:22 +0000 (13:46 +0000)
committerNick Burch <nick@apache.org>
Tue, 14 Sep 2010 13:46:22 +0000 (13:46 +0000)
git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@996899 13f79535-47bb-0310-9956-ffa450edef68

src/documentation/content/xdocs/status.xml
src/ooxml/java/org/apache/poi/xwpf/extractor/XWPFWordExtractor.java
src/ooxml/java/org/apache/poi/xwpf/model/XWPFCommentsDecorator.java
src/ooxml/java/org/apache/poi/xwpf/model/XWPFHyperlinkDecorator.java
src/ooxml/java/org/apache/poi/xwpf/usermodel/XWPFHyperlinkRun.java [new file with mode: 0644]
src/ooxml/java/org/apache/poi/xwpf/usermodel/XWPFParagraph.java
src/ooxml/java/org/apache/poi/xwpf/usermodel/XWPFRun.java
src/ooxml/java/org/apache/poi/xwpf/usermodel/XWPFTableCell.java
src/ooxml/testcases/org/apache/poi/xwpf/extractor/TestXWPFWordExtractor.java
src/ooxml/testcases/org/apache/poi/xwpf/model/TestXWPFHeaderFooterPolicy.java

index a0fc2a0159ba6334e327c69286fc2a6c497a74cd..014a0856524271a307770228736e67c9710786ef 100644 (file)
@@ -34,6 +34,8 @@
 
     <changes>
         <release version="3.7-beta3" date="2010-??-??">
+           <action dev="poi-developers" type="fix">Improve handling of Hyperlinks inside XWPFParagraph objects through XWPFHyperlinkRun</action>
+           <action dev="poi-developers" type="fix">Make XWPFParagraph make more use of XWPFRun, and less on internal StringBuffers</action>
            <action dev="poi-developers" type="add">Add a getBodyElements() method to XWPF IBody, to make access to embedded paragraphs and tables easier</action>
            <action dev="poi-developers" type="add">More XSLFRelation entries for common .pptx file parts</action>
            <action dev="poi-developers" type="fix">49872 - avoid exception in XSSFFormulaEvaluator.evaluateInCell when evaluating shared formulas</action>
index 9c159956d02408e5042cf62469626216ccd68d69..c990776daf8912cfa969f1a131578efb7e7f6e2d 100644 (file)
@@ -29,8 +29,11 @@ import org.apache.poi.xwpf.model.XWPFHeaderFooterPolicy;
 import org.apache.poi.xwpf.model.XWPFHyperlinkDecorator;
 import org.apache.poi.xwpf.model.XWPFParagraphDecorator;
 import org.apache.poi.xwpf.usermodel.XWPFDocument;
+import org.apache.poi.xwpf.usermodel.XWPFHyperlink;
+import org.apache.poi.xwpf.usermodel.XWPFHyperlinkRun;
 import org.apache.poi.xwpf.usermodel.XWPFParagraph;
 import org.apache.poi.xwpf.usermodel.XWPFRelation;
+import org.apache.poi.xwpf.usermodel.XWPFRun;
 import org.apache.poi.xwpf.usermodel.XWPFTable;
 import org.apache.xmlbeans.XmlException;
 import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTSectPr;
@@ -103,9 +106,28 @@ public class XWPFWordExtractor extends POIXMLTextExtractor {
                                        extractHeaders(text, headerFooterPolicy);
                                }
 
-                               XWPFParagraphDecorator decorator = new XWPFCommentsDecorator(
-                                               new XWPFHyperlinkDecorator(paragraph, null, fetchHyperlinks));
-                               text.append(decorator.getText()).append('\n');
+                               // Do the paragraph text
+                               for(XWPFRun run : paragraph.getRuns()) {
+                                  text.append(run.toString());
+                                  if(run instanceof XWPFHyperlinkRun && fetchHyperlinks) {
+                                     XWPFHyperlink link = ((XWPFHyperlinkRun)run).getHyperlink(document);
+                                     if(link != null)
+                                        text.append(" <" + link.getURL() + ">");
+                                  }
+                               }
+
+                               // Add comments
+                               XWPFCommentsDecorator decorator = new XWPFCommentsDecorator(paragraph, null);
+                               text.append(decorator.getCommentText()).append('\n');
+                               
+                               // Do endnotes, footnotes and pictures
+                               for(String str : new String[] {
+                                     paragraph.getFootnoteText(), paragraph.getPictureText()
+                               }) {
+                                  if(str != null && str.length() > 0) {
+                                     text.append(str + "\n");
+                                  }
+                               }
 
                                if (ctSectPr!=null) {
                                        extractFooters(text, headerFooterPolicy);
index 7a528237a1c143ab78a143e5b011970e66dcf595..be7bf0408e0bcfead42a806bcbdd7b6609816544 100644 (file)
@@ -46,6 +46,10 @@ public class XWPFCommentsDecorator extends XWPFParagraphDecorator {
                }
        }
 
+       public String getCommentText() {
+          return commentText.toString();
+       }
+       
        public String getText() {
                return super.getText() + commentText;
        }
index 3ad4c492d396a5a2fed3621f74b6760c337458ed..fb7d6ba930e562906887bdb2b56a92c7c89570a4 100644 (file)
@@ -19,15 +19,18 @@ package org.apache.poi.xwpf.model;
 import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTHyperlink;
 import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTR;
 import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTText;
+import org.apache.poi.xwpf.usermodel.XWPFHyperlinkRun;
 import org.apache.poi.xwpf.usermodel.XWPFParagraph;
 
 /**
  * Decorator class for XWPFParagraph allowing to add hyperlinks 
  *  found in paragraph to its text.
  *  
- * TODO - add the hyperlink text in the right place, and not just
- *  at the end
+ * Note - adds the hyperlink at the end, not in the right place...
+ *  
+ * @deprecated Use {@link XWPFHyperlinkRun} instead
  */
+@Deprecated
 public class XWPFHyperlinkDecorator extends XWPFParagraphDecorator {
        private StringBuffer hyperlinkText;
        
diff --git a/src/ooxml/java/org/apache/poi/xwpf/usermodel/XWPFHyperlinkRun.java b/src/ooxml/java/org/apache/poi/xwpf/usermodel/XWPFHyperlinkRun.java
new file mode 100644 (file)
index 0000000..9a40b52
--- /dev/null
@@ -0,0 +1,64 @@
+/* ====================================================================
+   Licensed to the Apache Software Foundation (ASF) under one or more
+   contributor license agreements.  See the NOTICE file distributed with
+   this work for additional information regarding copyright ownership.
+   The ASF licenses this file to You under the Apache License, Version 2.0
+   (the "License"); you may not use this file except in compliance with
+   the License.  You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
+==================================================================== */
+package org.apache.poi.xwpf.usermodel;
+
+import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTHyperlink;
+import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTR;
+
+/**
+ * A run of text with a Hyperlink applied to it.
+ * Any given Hyperlink may be made up of multiple of these.
+ */
+public class XWPFHyperlinkRun extends XWPFRun
+{
+   private CTHyperlink hyperlink;
+   
+   public XWPFHyperlinkRun(CTHyperlink hyperlink, CTR run, XWPFParagraph p) {
+      super(run, p);
+      this.hyperlink = hyperlink;
+   }
+   
+   public CTHyperlink getCTHyperlink() {
+      return hyperlink;
+   }
+   
+   public String getAnchor() {
+      return hyperlink.getAnchor();
+   }
+   
+   /**
+    * Returns the ID of the hyperlink, if one is set.
+    */
+   public String getHyperlinkId() {
+      return hyperlink.getId();
+   }
+   public void setHyperlinkId(String id) {
+      hyperlink.setId(id);
+   }
+   
+   /**
+    * If this Hyperlink is an external reference hyperlink,
+    *  return the object for it.
+    */
+   public XWPFHyperlink getHyperlink(XWPFDocument document) {
+      String id = getHyperlinkId();
+      if(id == null)
+         return null;
+      
+      return document.getHyperlinkByID(id);
+   }
+}
index 57e1a6467c7f13e3ff1cb9dde6a6b119995ee3f2..2a57f6988ef50a9e56a768862769e3dc619815c5 100644 (file)
@@ -20,21 +20,19 @@ import java.math.BigInteger;
 import java.util.ArrayList;
 import java.util.Collections;
 import java.util.List;
-import java.util.Arrays;
 
 import org.apache.poi.util.Internal;
 import org.apache.xmlbeans.XmlCursor;
 import org.apache.xmlbeans.XmlObject;
 import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTBorder;
-import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTEmpty;
 import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTFtnEdnRef;
+import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTHyperlink;
 import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTInd;
 import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTJc;
 import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTOnOff;
 import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTP;
 import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTPBdr;
 import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTPPr;
-import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTPTab;
 import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTPicture;
 import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTProofErr;
 import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTR;
@@ -66,10 +64,6 @@ public class XWPFParagraph implements IBodyElement{
     protected XWPFDocument document;
     protected List<XWPFRun> runs;
     
-    /**
-     * TODO - replace with RichText String
-     */
-    private StringBuffer text = new StringBuffer();
     private StringBuffer pictureText = new StringBuffer();
     private StringBuffer footnoteText = new StringBuffer();
 
@@ -91,102 +85,76 @@ public class XWPFParagraph implements IBodyElement{
         }
         
         runs = new ArrayList<XWPFRun>();
-        if (prgrph.getRList().size() > 0) {
-           for(CTR ctRun : prgrph.getRList()) {
-              runs.add(new XWPFRun(ctRun, this));
-           }
-        }
-
-        if (!isEmpty()) {
-           readNewText();
-        }
-    }
-    
-    protected String readNewText() {
-      StringBuffer text = new StringBuffer();
-      
-      // All the runs to loop over
-      // TODO - replace this with some sort of XPath expression
-      // to directly find all the CTRs, in the right order
-      ArrayList<CTR> rs = new ArrayList<CTR>();
-      rs.addAll( paragraph.getRList() );
-      
-      for (CTSdtRun sdt : paragraph.getSdtList()) {
-          CTSdtContentRun run = sdt.getSdtContent();
-          rs.addAll( run.getRList() );
-      }
-      for (CTRunTrackChange c : paragraph.getDelList()) {
-          rs.addAll( c.getRList() );
-      }
-      for (CTRunTrackChange c : paragraph.getInsList()) {
-          rs.addAll( c.getRList() );
-      }
-      for (CTSimpleField f : paragraph.getFldSimpleList()) {
-         rs.addAll( f.getRList() );
-      }
 
-      // Get text of the paragraph
-      for (int j = 0; j < rs.size(); j++) {
-          // Grab the text and tabs of the paragraph
-          // Do so in a way that preserves the ordering
-          XmlCursor c = rs.get(j).newCursor();
-          c.selectPath("./*");
-          while (c.toNextSelection()) {
-              XmlObject o = c.getObject();
-              if (o instanceof CTText) {
-                  String tagName = o.getDomNode().getNodeName();
-                  // Field Codes (w:instrText, defined in spec sec. 17.16.23)
-                  //  come up as instances of CTText, but we don't want them
-                  //  in the normal text output
-                  if (!"w:instrText".equals(tagName)) {
-                     text.append(((CTText) o).getStringValue());
-                  }
-              }
-              if (o instanceof CTPTab) {
-                  text.append("\t");
-              }
-              if (o instanceof CTEmpty) {
-                 // Some inline text elements get returned not as
-                 //  themselves, but as CTEmpty, owing to some odd
-                 //  definitions around line 5642 of the XSDs
-                 String tagName = o.getDomNode().getNodeName();
-                 if ("w:tab".equals(tagName)) {
-                    text.append("\t");
-                 }
-                 if ("w:cr".equals(tagName)) {
-                    text.append("\n");
-                 }
-              }
-              
-              // Check for bits that only apply when
-              //  attached to a core document
-              if(document != null) {
-                 //got a reference to a footnote
-                 if (o instanceof CTFtnEdnRef) {
-                     CTFtnEdnRef ftn = (CTFtnEdnRef) o;
-                     footnoteText.append("[").append(ftn.getId()).append(": ");
-                     XWPFFootnote footnote =
-                             ftn.getDomNode().getLocalName().equals("footnoteReference") ?
-                                     document.getFootnoteByID(ftn.getId().intValue()) :
-                                     document.getEndnoteByID(ftn.getId().intValue());
+       // Get all our child nodes in order, and process them
+       //  into XWPFRuns where we can
+       XmlCursor c = paragraph.newCursor();
+       c.selectPath("child::*");
+       while (c.toNextSelection()) {
+          XmlObject o = c.getObject();
+          if(o instanceof CTR) {
+             runs.add(new XWPFRun((CTR)o, this));
+          }
+          if(o instanceof CTHyperlink) {
+             CTHyperlink link = (CTHyperlink)o;
+             for(CTR r : link.getRList()) {
+                runs.add(new XWPFHyperlinkRun(link, r, this));
+             }
+          }
+          if(o instanceof CTSdtRun) {
+             CTSdtContentRun run = ((CTSdtRun)o).getSdtContent();
+             for(CTR r : run.getRList()) {
+                runs.add(new XWPFRun(r, this));
+             }
+          }
+          if(o instanceof CTRunTrackChange) {
+             for(CTR r : ((CTRunTrackChange)o).getRList()) {
+                runs.add(new XWPFRun(r, this));
+             }
+          }
+          if(o instanceof CTSimpleField) {
+             for(CTR r : ((CTSimpleField)o).getRList()) {
+                runs.add(new XWPFRun(r, this));
+             }
+          }
+       }
+       
+       // Look for bits associated with the runs
+       for(XWPFRun run : runs) {
+          CTR r = run.getCTR();
+          
+          // Check for bits that only apply when
+          //  attached to a core document
+          if(document != null) {
+             c = r.newCursor();
+             c.selectPath("child::*");
+             while (c.toNextSelection()) {
+                XmlObject o = c.getObject();
+                if(o instanceof CTFtnEdnRef) {
+                   CTFtnEdnRef ftn = (CTFtnEdnRef)o;
+                   footnoteText.append("[").append(ftn.getId()).append(": ");
+                   XWPFFootnote footnote =
+                      ftn.getDomNode().getLocalName().equals("footnoteReference") ?
+                            document.getFootnoteByID(ftn.getId().intValue()) :
+                            document.getEndnoteByID(ftn.getId().intValue());
    
-                     boolean first = true;
-                     for (XWPFParagraph p : footnote.getParagraphs()) {
-                         if (!first) {
-                             footnoteText.append("\n");
-                             first = false;
-                         }
-                         footnoteText.append(p.getText());
-                     }
+                   boolean first = true;
+                   for (XWPFParagraph p : footnote.getParagraphs()) {
+                      if (!first) {
+                         footnoteText.append("\n");
+                         first = false;
+                      }
+                      footnoteText.append(p.getText());
+                   }
    
-                     footnoteText.append("]");
-                 }
-              }
+                   footnoteText.append("]");
+                }
+             }
           }
 
           // Loop over pictures inside our
           // paragraph, looking for text in them
-          for(CTPicture pict : rs.get(j).getPictList()) {
+          for(CTPicture pict : r.getPictList()) {
               XmlObject[] t = pict
                       .selectPath("declare namespace w='http://schemas.openxmlformats.org/wordprocessingml/2006/main' .//w:t");
               for (int m = 0; m < t.length; m++) {
@@ -200,9 +168,6 @@ public class XWPFParagraph implements IBodyElement{
               }
           }
       }
-      
-      this.text = text;
-      return text.toString();
     }
 
     @Internal
@@ -228,7 +193,10 @@ public class XWPFParagraph implements IBodyElement{
      */
     public String getText() {
         StringBuffer out = new StringBuffer();
-        out.append(text).append(footnoteText).append(pictureText);
+        for(XWPFRun run : runs) {
+           out.append(run.toString());
+        }
+        out.append(footnoteText).append(pictureText);
         return out.toString();
     }
        
@@ -282,7 +250,11 @@ public class XWPFParagraph implements IBodyElement{
      * paragraph
      */
     public String getParagraphText() {
-        return text.toString();
+       StringBuffer out = new StringBuffer();
+       for(XWPFRun run : runs) {
+          out.append(run.toString());
+       }
+       return out.toString();
     }
 
     /**
@@ -1143,9 +1115,6 @@ public class XWPFParagraph implements IBodyElement{
        pos = paragraph.getRList().size();
        paragraph.addNewR();
        paragraph.setRArray(pos, run);
-       for (CTText ctText: paragraph.getRArray(pos).getTList()) {
-                       this.text.append(ctText.getStringValue());      
-               }
     }
     
     /**
index dccd02bfbbbbb0fd5beddc4b193ac1bfdbeef6c2..2b37383b3dd12d53d645b7c7ffe5e723215bf8cc 100644 (file)
@@ -19,12 +19,15 @@ package org.apache.poi.xwpf.usermodel;
 import java.math.BigInteger;
 
 import org.apache.poi.util.Internal;
+import org.apache.xmlbeans.XmlObject;
 import org.apache.xmlbeans.XmlString;
 import org.apache.xmlbeans.XmlCursor;
 import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTBr;
+import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTEmpty;
 import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTFonts;
 import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTHpsMeasure;
 import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTOnOff;
+import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTPTab;
 import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTR;
 import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTRPr;
 import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTSignedHpsMeasure;
@@ -492,4 +495,45 @@ public class XWPFRun {
         }
     }
 
+    /**
+     * Returns the string version of the text, with tabs and
+     *  carriage returns in place of their xml equivalents.
+     */
+    public String toString() {
+       StringBuffer text = new StringBuffer();
+       
+       // Grab the text and tabs of the text run
+       // Do so in a way that preserves the ordering
+       XmlCursor c = run.newCursor();
+       c.selectPath("./*");
+       while (c.toNextSelection()) {
+           XmlObject o = c.getObject();
+           if (o instanceof CTText) {
+               String tagName = o.getDomNode().getNodeName();
+               // Field Codes (w:instrText, defined in spec sec. 17.16.23)
+               //  come up as instances of CTText, but we don't want them
+               //  in the normal text output
+               if (!"w:instrText".equals(tagName)) {
+                  text.append(((CTText) o).getStringValue());
+               }
+           }
+           if (o instanceof CTPTab) {
+               text.append("\t");
+           }
+           if (o instanceof CTEmpty) {
+              // Some inline text elements get returned not as
+              //  themselves, but as CTEmpty, owing to some odd
+              //  definitions around line 5642 of the XSDs
+              String tagName = o.getDomNode().getNodeName();
+              if ("w:tab".equals(tagName)) {
+                 text.append("\t");
+              }
+              if ("w:cr".equals(tagName)) {
+                 text.append("\n");
+              }
+           }
+       }
+       
+       return text.toString();
+    }
 }
index 2497d801a956acc15f0f15f8d91147508c3abb91..008b5904b850b4884d7cf8668dda18620b469645 100644 (file)
@@ -327,7 +327,7 @@ public class XWPFTableCell implements IBody {
        public String getText(){
                StringBuffer text = new StringBuffer();
                for (XWPFParagraph p : paragraphs) {
-                       text.append(p.readNewText());
+                       text.append(p.getText());
                }
                return text.toString();
        }
index 5b47b6d0b15cae8874ea5750bf7258f04ebd76b5..77315f795bdc9c473b81e9b65b4703b2e5a4d08d 100644 (file)
@@ -96,22 +96,18 @@ public class TestXWPFWordExtractor extends TestCase {
         XWPFWordExtractor extractor = new XWPFWordExtractor(doc);
 
         // Now check contents
-        // TODO - fix once correctly handling contents
         extractor.setFetchHyperlinks(false);
         assertEquals(
-//                             "This is a test document\nThis bit is in bold and italic\n" +
-//                             "Back to normal\nWe have a hyperlink here, and another.\n",
-                "This is a test document\nThis bit is in bold and italic\n" +
-                        "Back to normal\nWe have a  here, and .hyperlinkanother\n",
+                               "This is a test document\nThis bit is in bold and italic\n" +
+                               "Back to normal\nWe have a hyperlink here, and another.\n",
                 extractor.getText()
         );
 
+        // One hyperlink is a real one, one is just to the top of page
         extractor.setFetchHyperlinks(true);
         assertEquals(
-//                             "This is a test document\nThis bit is in bold and italic\n" +
-//                             "Back to normal\nWe have a hyperlink here, and another.\n",
-                "This is a test document\nThis bit is in bold and italic\n" +
-                        "Back to normal\nWe have a  here, and .hyperlink <http://poi.apache.org/>another\n",
+                               "This is a test document\nThis bit is in bold and italic\n" +
+                               "Back to normal\nWe have a hyperlink <http://poi.apache.org/> here, and another.\n",
                 extractor.getText()
         );
     }
index d30503b0a865b1366d99d1f20f0fd3eca4879812..86b22466bc7d5788b9f43f6b238bd89fb67dd19e 100644 (file)
@@ -144,7 +144,7 @@ public class TestXWPFHeaderFooterPolicy extends TestCase {
                policy = oddEven.getHeaderFooterPolicy();
 
                assertEquals(
-                       "[]ODD Page Header text\n\n",
+                       "[ODD Page Header text]\n\n",
                        policy.getDefaultHeader().getText()
                );
                assertEquals(