]> source.dussan.org Git - poi.git/commitdiff
Improved hyperlink and comment fetching for xwpf text extraction, based on the patch...
authorNick Burch <nick@apache.org>
Sun, 27 Apr 2008 16:36:51 +0000 (16:36 +0000)
committerNick Burch <nick@apache.org>
Sun, 27 Apr 2008 16:36:51 +0000 (16:36 +0000)
git-svn-id: https://svn.apache.org/repos/asf/poi/branches/ooxml@651979 13f79535-47bb-0310-9956-ffa450edef68

src/ooxml/java/org/apache/poi/xwpf/XWPFDocument.java
src/ooxml/java/org/apache/poi/xwpf/extractor/XWPFWordExtractor.java
src/ooxml/java/org/apache/poi/xwpf/model/XMLParagraph.java [new file with mode: 0644]
src/ooxml/java/org/apache/poi/xwpf/model/XWPFCommentsDecorator.java [new file with mode: 0644]
src/ooxml/java/org/apache/poi/xwpf/model/XWPFHyperlinkDecorator.java [new file with mode: 0644]
src/ooxml/java/org/apache/poi/xwpf/model/XWPFParagraphDecorator.java [new file with mode: 0644]
src/ooxml/java/org/apache/poi/xwpf/usermodel/XWPFComment.java [new file with mode: 0644]
src/ooxml/java/org/apache/poi/xwpf/usermodel/XWPFHyperlink.java [new file with mode: 0644]
src/ooxml/java/org/apache/poi/xwpf/usermodel/XWPFParagraph.java [new file with mode: 0644]
src/ooxml/java/org/apache/poi/xwpf/usermodel/XWPFParagraphText.java [new file with mode: 0644]
src/ooxml/java/org/apache/poi/xwpf/usermodel/XWPFTable.java [new file with mode: 0644]

index 05b716d752b79987b6e692ec9e6cfe8a7d822307..4338b11f289b68cdb8a02b80cc45e4d205576489 100644 (file)
@@ -17,6 +17,9 @@
 package org.apache.poi.xwpf;
 
 import java.io.IOException;
+import java.util.LinkedList;
+import java.util.List;
+import java.util.Iterator;
 
 import org.apache.poi.POIXMLDocument;
 import org.apache.xmlbeans.XmlException;
@@ -24,12 +27,22 @@ import org.openxml4j.exceptions.InvalidFormatException;
 import org.openxml4j.exceptions.OpenXML4JException;
 import org.openxml4j.opc.Package;
 import org.openxml4j.opc.PackagePart;
+import org.openxml4j.opc.PackageRelationship;
 import org.openxml4j.opc.PackageRelationshipCollection;
 import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTBody;
 import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTDocument1;
+import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTP;
 import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTStyles;
+import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTComment;
 import org.openxmlformats.schemas.wordprocessingml.x2006.main.DocumentDocument;
 import org.openxmlformats.schemas.wordprocessingml.x2006.main.StylesDocument;
+import org.openxmlformats.schemas.wordprocessingml.x2006.main.CommentsDocument;
+import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTTbl;
+
+import org.apache.poi.xwpf.usermodel.XWPFHyperlink;
+import org.apache.poi.xwpf.usermodel.XWPFParagraph;
+import org.apache.poi.xwpf.usermodel.XWPFComment;
+import org.apache.poi.xwpf.usermodel.XWPFTable;
 
 /**
  * Experimental class to do low level processing
@@ -48,15 +61,59 @@ public class XWPFDocument extends POIXMLDocument {
        public static final String HEADER_CONTENT_TYPE = "application/vnd.openxmlformats-officedocument.wordprocessingml.header+xml";
        public static final String STYLES_CONTENT_TYPE = "application/vnd.openxmlformats-officedocument.wordprocessingml.styles+xml";
        public static final String STYLES_RELATION_TYPE = "http://schemas.openxmlformats.org/officeDocument/2006/relationships/styles";
-       public static final String HYPERLINK_RELATION_TYPE = "http://schemas.openxmlformats.org/officeDocument/2006/relationships/hyperlink"; 
+       public static final String HYPERLINK_RELATION_TYPE = "http://schemas.openxmlformats.org/officeDocument/2006/relationships/hyperlink";
+       public static final String COMMENT_RELATION_TYPE = "http://schemas.openxmlformats.org/officeDocument/2006/relationships/comments";
        
        private DocumentDocument wordDoc;
+       protected List<XWPFComment> comments;
+       protected List<XWPFHyperlink> hyperlinks;
+       protected List<XWPFParagraph> paragraphs;
+       protected List<XWPFTable> tables;
        
        public XWPFDocument(Package container) throws OpenXML4JException, IOException, XmlException {
                super(container);
+
+               hyperlinks = new LinkedList<XWPFHyperlink>();
+               comments = new LinkedList<XWPFComment>();
+               paragraphs = new LinkedList<XWPFParagraph>();
+               tables= new LinkedList<XWPFTable>();
                
                wordDoc =
                        DocumentDocument.Factory.parse(getCorePart().getInputStream());
+               
+               // filling paragraph list
+               for (CTP p : getDocumentBody().getPArray())     {
+                       paragraphs.add(new XWPFParagraph(p, this));
+               }
+
+               // Get the hyperlinks 
+               // TODO: make me optional/separated in private function
+               try     {
+                       Iterator <PackageRelationship> relIter = 
+                               getCorePart().getRelationshipsByType(HYPERLINK_RELATION_TYPE).iterator();
+                       while(relIter.hasNext()) {
+                               PackageRelationship rel = relIter.next();
+                               hyperlinks.add(new XWPFHyperlink(rel.getId(), rel.getTargetURI().toString()));
+                       }
+               } catch(Exception e) {
+                       throw new OpenXML4JException(e.getLocalizedMessage());
+               }
+
+               // Get the comments, if there are any
+               PackageRelationshipCollection commentsRel = getCmntRelations();
+               if(commentsRel != null && commentsRel.size() > 0) {
+                       PackagePart commentsPart = getTargetPart(commentsRel.getRelationship(0));
+                       CommentsDocument cmntdoc = CommentsDocument.Factory.parse(commentsPart.getInputStream());
+                       for(CTComment ctcomment : cmntdoc.getComments().getCommentArray())
+                       {
+                               comments.add(new XWPFComment(ctcomment));
+                       }
+                       
+                       for(CTTbl table : getDocumentBody().getTblArray())
+                       {
+                               tables.add(new XWPFTable(table));
+                       }
+               }
        }
        
        /**
@@ -66,6 +123,42 @@ public class XWPFDocument extends POIXMLDocument {
                return wordDoc.getDocument();
        }
        
+       public Iterator<XWPFParagraph> getParagraphsIterator()
+       {
+               return paragraphs.iterator();
+       }
+       
+       public Iterator<XWPFTable> getTablesIterator()
+       {
+               return tables.iterator();
+       }
+       
+       public XWPFHyperlink getHyperlinkByID(String id)
+       {
+               Iterator<XWPFHyperlink> iter = hyperlinks.iterator();
+               while(iter.hasNext())
+               {
+                       XWPFHyperlink link = iter.next();
+                       if(link.getId().equals(id))
+                               return link; 
+               }
+               
+               return null;
+       }
+       
+       public XWPFComment getCommentByID(String id)
+       {
+               Iterator<XWPFComment> iter = comments.iterator();
+               while(iter.hasNext())
+               {
+                       XWPFComment comment = iter.next();
+                       if(comment.getId().equals(id))
+                               return comment; 
+               }
+               
+               return null;
+       }
+       
        /**
         * Returns the low level body of the document
         */
@@ -91,18 +184,10 @@ public class XWPFDocument extends POIXMLDocument {
                        StylesDocument.Factory.parse(parts[0].getInputStream());
                return sd.getStyles();
        }
-       
-       /**
-        * Returns all the hyperlink relations for the file.
-        * You'll generally want to get the target to get
-        *  the destination of the hyperlink
-        */
-       public PackageRelationshipCollection getHyperlinks() {
-               try {
-                       return getCorePart().getRelationshipsByType(HYPERLINK_RELATION_TYPE); 
-               } catch(InvalidFormatException e) {
-                       // Should never happen
-                       throw new IllegalStateException(e);
-               }
+
+       protected PackageRelationshipCollection getCmntRelations() throws InvalidFormatException
+       {
+               return getCorePart().getRelationshipsByType(COMMENT_RELATION_TYPE);
        }
 }
+
index bd1936d16bce2e320b2d79add41550cdd74fc281..8ca4f0349b8030f38a7319923d71dbefc15e214b 100644 (file)
 package org.apache.poi.xwpf.extractor;
 
 import java.io.IOException;
+import java.util.Iterator;
 
 import org.apache.poi.POIXMLDocument;
 import org.apache.poi.POIXMLTextExtractor;
 import org.apache.poi.xwpf.XWPFDocument;
+import org.apache.poi.xwpf.model.XWPFCommentsDecorator;
+import org.apache.poi.xwpf.model.XWPFHyperlinkDecorator;
+import org.apache.poi.xwpf.model.XWPFParagraphDecorator;
+import org.apache.poi.xwpf.usermodel.XWPFParagraph;
+import org.apache.poi.xwpf.usermodel.XWPFTable;
 import org.apache.xmlbeans.XmlException;
 import org.openxml4j.exceptions.OpenXML4JException;
 import org.openxml4j.opc.Package;
-import org.openxml4j.opc.PackageRelationship;
-import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTBody;
-import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTHyperlink;
-import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTP;
-import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTR;
-import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTText;
 
 /**
  * Helper class to extract text from an OOXML Word file
@@ -45,6 +45,15 @@ public class XWPFWordExtractor extends POIXMLTextExtractor {
                super(document);
                this.document = document;
        }
+
+       /**
+        * Should we also fetch the hyperlinks, when fetching 
+        *  the text content? Default is to only output the
+        *  hyperlink label, and not the contents
+        */
+       public void setFetchHyperlinks(boolean fetch) {
+               fetchHyperlinks = fetch;
+       }
        
        public static void main(String[] args) throws Exception {
                if(args.length < 1) {
@@ -59,56 +68,21 @@ public class XWPFWordExtractor extends POIXMLTextExtractor {
                System.out.println(extractor.getText());
        }
        
-       /**
-        * Should we also fetch the hyperlinks, when fetching 
-        *  the text content? Default is to only output the
-        *  hyperlink label, and not the contents
-        */
-       public void setFetchHyperlinks(boolean fetch) {
-               fetchHyperlinks = fetch;
-       }
-
        public String getText() {
-               CTBody body = document.getDocumentBody();
                StringBuffer text = new StringBuffer();
                
-               // Loop over paragraphs
-               CTP[] ps = body.getPArray();
-               for (int i = 0; i < ps.length; i++) {
-                       // Loop over ranges and hyperlinks
-                       // TODO - properly intersperce ranges and hyperlinks
-                       CTR[] rs = ps[i].getRArray();
-                       for(int j = 0; j < rs.length; j++) {
-                               // Loop over text runs
-                               CTText[] texts = rs[j].getTArray();
-                               for (int k = 0; k < texts.length; k++) {
-                                       text.append(
-                                                       texts[k].getStringValue()
-                                       );
-                               }
-                       }
                        
-                       CTHyperlink[] hls =  ps[i].getHyperlinkArray();
-                       for(CTHyperlink hl : hls) {
-                               for(CTR r : hl.getRArray()) {
-                                       for(CTText txt : r.getTArray()) {
-                                               text.append(txt.getStringValue());
-                                       }
-                               }
-                               if(fetchHyperlinks) {
-                                       String id = hl.getId();
-                                       if(id != null) {
-                                               PackageRelationship hlRel =
-                                                       document.getHyperlinks().getRelationshipByID(id);
-                                               if(hlRel != null) {
-                                                       text.append(" <" + hlRel.getTargetURI().toString() + ">");
-                                               }
-                                       }
-                               }
-                       }
+               Iterator<XWPFParagraph> i = document.getParagraphsIterator();
+               while(i.hasNext()) {
+                       XWPFParagraphDecorator decorator = new XWPFCommentsDecorator(
+                                       new XWPFHyperlinkDecorator(i.next(), null, fetchHyperlinks));
+                       text.append(decorator.getText()+"\n");
+               }
                        
-                       // New line after each paragraph.
-                       text.append("\n");
+               Iterator<XWPFTable> j = document.getTablesIterator();
+               while(j.hasNext())
+               {
+                       text.append(j.next().getText()+"\n");
                }
                
                return text.toString();
diff --git a/src/ooxml/java/org/apache/poi/xwpf/model/XMLParagraph.java b/src/ooxml/java/org/apache/poi/xwpf/model/XMLParagraph.java
new file mode 100644 (file)
index 0000000..c7d4bc2
--- /dev/null
@@ -0,0 +1,37 @@
+/* ====================================================================
+   Licensed to the Apache Software Foundation (ASF) under one or more
+   contributor license agreements.  See the NOTICE file distributed with
+   this work for additional information regarding copyright ownership.
+   The ASF licenses this file to You under the Apache License, Version 2.0
+   (the "License"); you may not use this file except in compliance with
+   the License.  You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
+==================================================================== */
+package org.apache.poi.xwpf.model;
+
+import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTP;
+
+/**
+ * Base class for XWPF paragraphs
+ *
+ * @author Yury Batrakov (batrakov at gmail.com)
+ * 
+ */
+public class XMLParagraph {
+       protected CTP paragraph;
+       
+       public XMLParagraph(CTP paragraph) {
+               this.paragraph = paragraph;
+       }
+
+       public CTP getCTP() {
+               return paragraph;
+       }
+}
\ No newline at end of file
diff --git a/src/ooxml/java/org/apache/poi/xwpf/model/XWPFCommentsDecorator.java b/src/ooxml/java/org/apache/poi/xwpf/model/XWPFCommentsDecorator.java
new file mode 100644 (file)
index 0000000..1abc262
--- /dev/null
@@ -0,0 +1,52 @@
+/* ====================================================================
+   Licensed to the Apache Software Foundation (ASF) under one or more
+   contributor license agreements.  See the NOTICE file distributed with
+   this work for additional information regarding copyright ownership.
+   The ASF licenses this file to You under the Apache License, Version 2.0
+   (the "License"); you may not use this file except in compliance with
+   the License.  You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
+==================================================================== */
+package org.apache.poi.xwpf.model;
+
+import org.apache.poi.xwpf.usermodel.XWPFComment;
+import org.apache.poi.xwpf.usermodel.XWPFParagraph;
+import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTMarkupRange;
+
+/**
+ * Decorator class for XWPFParagraph allowing to add comments 
+ * found in paragraph to its text
+ *
+ * @author Yury Batrakov (batrakov at gmail.com)
+ * 
+ */
+public class XWPFCommentsDecorator extends XWPFParagraphDecorator {
+       private StringBuffer commentText;
+       
+       public XWPFCommentsDecorator(XWPFParagraphDecorator nextDecorator) {
+               this(nextDecorator.paragraph, nextDecorator);
+       }
+       public XWPFCommentsDecorator(XWPFParagraph paragraph, XWPFParagraphDecorator nextDecorator) {
+               super(paragraph, nextDecorator);
+
+               XWPFComment comment;
+               commentText = new StringBuffer();
+
+               for(CTMarkupRange anchor : paragraph.getCTP().getCommentRangeStartArray())
+               {
+                       if((comment = paragraph.getDocRef().getCommentByID(anchor.getId().toString())) != null)
+                               commentText.append("\tComment by " + comment.getAuthor()+": "+comment.getText());
+               }
+       }
+
+       public String getText() {
+               return super.getText() + commentText;
+       }
+}
diff --git a/src/ooxml/java/org/apache/poi/xwpf/model/XWPFHyperlinkDecorator.java b/src/ooxml/java/org/apache/poi/xwpf/model/XWPFHyperlinkDecorator.java
new file mode 100644 (file)
index 0000000..ab5784b
--- /dev/null
@@ -0,0 +1,69 @@
+/* ====================================================================
+   Licensed to the Apache Software Foundation (ASF) under one or more
+   contributor license agreements.  See the NOTICE file distributed with
+   this work for additional information regarding copyright ownership.
+   The ASF licenses this file to You under the Apache License, Version 2.0
+   (the "License"); you may not use this file except in compliance with
+   the License.  You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
+==================================================================== */
+package org.apache.poi.xwpf.model;
+
+import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTHyperlink;
+import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTR;
+import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTText;
+import org.apache.poi.xwpf.usermodel.XWPFParagraph;;
+
+/**
+ * Decorator class for XWPFParagraph allowing to add hyperlinks 
+ *  found in paragraph to its text.
+ *  
+ * TODO - add the hyperlink text in the right place, and not just
+ *  at the end
+ */
+public class XWPFHyperlinkDecorator extends XWPFParagraphDecorator {
+       private StringBuffer hyperlinkText;
+       
+       /**
+        * @param nextDecorator The next decorator to use
+        * @param outputHyperlinkUrls Should we output the links too, or just the link text?
+        */
+       public XWPFHyperlinkDecorator(XWPFParagraphDecorator nextDecorator, boolean outputHyperlinkUrls) {
+               this(nextDecorator.paragraph, nextDecorator, outputHyperlinkUrls);
+       }
+       
+       /**
+        * @param prgrph The paragraph of text to work on
+        * @param outputHyperlinkUrls Should we output the links too, or just the link text?
+        */
+       public XWPFHyperlinkDecorator(XWPFParagraph prgrph, XWPFParagraphDecorator nextDecorator, boolean outputHyperlinkUrls) {
+               super(prgrph, nextDecorator);
+               
+               hyperlinkText = new StringBuffer();
+               
+               // loop over hyperlink anchors
+               for(CTHyperlink link : paragraph.getCTP().getHyperlinkArray()){
+                       for (CTR r : link.getRArray()) {
+                               // Loop over text runs
+                               for (CTText text : r.getTArray()){
+                                       hyperlinkText.append(text.getStringValue());
+                               }
+                       }
+                       if(outputHyperlinkUrls && paragraph.getDocRef().getHyperlinkByID(link.getId()) != null) {
+                               hyperlinkText.append(" <"+paragraph.getDocRef().getHyperlinkByID(link.getId()).getURL()+">");
+                       }
+               }
+       }
+       
+       public String getText()
+       {
+               return super.getText() + hyperlinkText;
+       }
+}
diff --git a/src/ooxml/java/org/apache/poi/xwpf/model/XWPFParagraphDecorator.java b/src/ooxml/java/org/apache/poi/xwpf/model/XWPFParagraphDecorator.java
new file mode 100644 (file)
index 0000000..3946b22
--- /dev/null
@@ -0,0 +1,43 @@
+/* ====================================================================
+   Licensed to the Apache Software Foundation (ASF) under one or more
+   contributor license agreements.  See the NOTICE file distributed with
+   this work for additional information regarding copyright ownership.
+   The ASF licenses this file to You under the Apache License, Version 2.0
+   (the "License"); you may not use this file except in compliance with
+   the License.  You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
+==================================================================== */
+package org.apache.poi.xwpf.model;
+
+import org.apache.poi.xwpf.usermodel.XWPFParagraph;
+
+/**
+ * Base decorator class for XWPFParagraph
+ */
+public abstract class XWPFParagraphDecorator {
+       protected XWPFParagraph paragraph;
+       protected XWPFParagraphDecorator nextDecorator;
+       
+       public XWPFParagraphDecorator(XWPFParagraph paragraph) {
+               this(paragraph, null);
+       }
+       
+       public XWPFParagraphDecorator(XWPFParagraph paragraph, XWPFParagraphDecorator nextDecorator) {
+               this.paragraph = paragraph;
+               this.nextDecorator = nextDecorator;
+       }
+       
+       public String getText() {
+               if(nextDecorator != null) {
+                       return nextDecorator.getText();
+               }
+               return paragraph.getText();
+       }
+}
diff --git a/src/ooxml/java/org/apache/poi/xwpf/usermodel/XWPFComment.java b/src/ooxml/java/org/apache/poi/xwpf/usermodel/XWPFComment.java
new file mode 100644 (file)
index 0000000..7de86d4
--- /dev/null
@@ -0,0 +1,61 @@
+/* ====================================================================
+   Licensed to the Apache Software Foundation (ASF) under one or more
+   contributor license agreements.  See the NOTICE file distributed with
+   this work for additional information regarding copyright ownership.
+   The ASF licenses this file to You under the Apache License, Version 2.0
+   (the "License"); you may not use this file except in compliance with
+   the License.  You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
+==================================================================== */
+package org.apache.poi.xwpf.usermodel;
+
+import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTComment;
+import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTP;
+
+/**
+ * Sketch of XWPF comment class
+ * 
+* @author Yury Batrakov (batrakov at gmail.com)
+ * 
+ */
+public class XWPFComment
+{
+    protected String id;
+    protected String author;
+    protected StringBuffer text;
+    
+    public XWPFComment(CTComment comment)
+    {
+        text = new StringBuffer();
+        id = comment.getId().toString();
+        author = comment.getAuthor();
+        
+        for(CTP ctp : comment.getPArray())
+        {
+            XWPFParagraph p = new XWPFParagraph(ctp);
+            text.append(p.getText());
+        }
+    }
+    
+    public String getId()
+    {
+        return id;
+    }
+    
+    public String getAuthor()
+    {
+        return author;
+    }
+    
+    public String getText()
+    {
+        return text.toString();
+    }
+}
diff --git a/src/ooxml/java/org/apache/poi/xwpf/usermodel/XWPFHyperlink.java b/src/ooxml/java/org/apache/poi/xwpf/usermodel/XWPFHyperlink.java
new file mode 100644 (file)
index 0000000..f9f2f9d
--- /dev/null
@@ -0,0 +1,43 @@
+/* ====================================================================
+   Licensed to the Apache Software Foundation (ASF) under one or more
+   contributor license agreements.  See the NOTICE file distributed with
+   this work for additional information regarding copyright ownership.
+   The ASF licenses this file to You under the Apache License, Version 2.0
+   (the "License"); you may not use this file except in compliance with
+   the License.  You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
+==================================================================== */
+package org.apache.poi.xwpf.usermodel;
+
+/**
+ * Sketch of XWPF hyperlink class
+ * 
+* @author Yury Batrakov (batrakov at gmail.com)
+ * 
+ */
+public class XWPFHyperlink
+{
+    String id, url;
+    public XWPFHyperlink(String id, String url)
+    {
+        this.id = id;
+        this.url = url;
+    }
+    
+    public String getId()
+    {
+        return id;
+    }
+    
+    public String getURL()
+    {
+        return url;
+    }
+}
diff --git a/src/ooxml/java/org/apache/poi/xwpf/usermodel/XWPFParagraph.java b/src/ooxml/java/org/apache/poi/xwpf/usermodel/XWPFParagraph.java
new file mode 100644 (file)
index 0000000..57527ca
--- /dev/null
@@ -0,0 +1,70 @@
+/* ====================================================================
+   Licensed to the Apache Software Foundation (ASF) under one or more
+   contributor license agreements.  See the NOTICE file distributed with
+   this work for additional information regarding copyright ownership.
+   The ASF licenses this file to You under the Apache License, Version 2.0
+   (the "License"); you may not use this file except in compliance with
+   the License.  You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
+==================================================================== */
+package org.apache.poi.xwpf.usermodel;
+
+import org.apache.poi.xwpf.model.XMLParagraph;
+import org.apache.poi.xwpf.XWPFDocument;
+import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTP;
+import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTR;
+import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTText;
+
+/**
+ * Sketch of XWPF paragraph class
+ */
+public class XWPFParagraph extends XMLParagraph
+{
+    protected XWPFDocument docRef; // XXX: we'd like to have access to document's hyperlink, comments and other tables
+    /**
+     * TODO - replace with RichText String
+     */
+    private StringBuffer text = new StringBuffer();
+    
+    public XWPFParagraph(CTP prgrph, XWPFDocument docRef)
+    {
+        super(prgrph);
+        
+        this.docRef = docRef; 
+        CTR[] rs = paragraph.getRArray();
+    
+        // Get text
+        for (int j = 0; j < rs.length; j++) {
+            // Loop over text runs
+            CTText[] texts = rs[j].getTArray();
+            for (int k = 0; k < texts.length; k++) {
+                text.append(
+                        texts[k].getStringValue()
+                );
+            }
+        }
+    }
+    
+    public XWPFParagraph(CTP prgrph) {
+        this(prgrph, null);
+    }
+    
+    public XWPFParagraph(XMLParagraph paragraph) {
+        this(paragraph.getCTP());
+    }
+    
+    public XWPFDocument getDocRef() {
+        return docRef;
+    }
+    
+    public String getText() {
+        return text.toString();
+    }
+}
diff --git a/src/ooxml/java/org/apache/poi/xwpf/usermodel/XWPFParagraphText.java b/src/ooxml/java/org/apache/poi/xwpf/usermodel/XWPFParagraphText.java
new file mode 100644 (file)
index 0000000..a25b164
--- /dev/null
@@ -0,0 +1,6 @@
+package org.apache.poi.xwpf.usermodel;
+
+public class XWPFParagraphText
+{
+
+}
diff --git a/src/ooxml/java/org/apache/poi/xwpf/usermodel/XWPFTable.java b/src/ooxml/java/org/apache/poi/xwpf/usermodel/XWPFTable.java
new file mode 100644 (file)
index 0000000..3f69f41
--- /dev/null
@@ -0,0 +1,55 @@
+/* ====================================================================
+   Licensed to the Apache Software Foundation (ASF) under one or more
+   contributor license agreements.  See the NOTICE file distributed with
+   this work for additional information regarding copyright ownership.
+   The ASF licenses this file to You under the Apache License, Version 2.0
+   (the "License"); you may not use this file except in compliance with
+   the License.  You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
+==================================================================== */
+package org.apache.poi.xwpf.usermodel;
+
+import org.apache.poi.xwpf.usermodel.XWPFParagraph;
+import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTTbl;
+import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTRow;
+import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTTc;
+import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTP;
+
+/**
+ * Sketch of XWPFTable class. Only table's text is being hold.
+ * 
+ * @author Yury Batrakov (batrakov at gmail.com)
+ * 
+ */
+public class XWPFTable
+{
+    protected StringBuffer text=new StringBuffer(); 
+    
+    public XWPFTable(CTTbl table)
+    {
+        for(CTRow row : table.getTrArray())
+        {
+            for(CTTc cell : row.getTcArray())
+            {
+                for(CTP ctp : cell.getPArray())
+                {
+                    XWPFParagraph p = new XWPFParagraph(ctp);
+                    this.text.append(p.getText()+"\t");
+                }
+            }
+            this.text.append("\n");
+        }
+    }
+    
+    public String getText()
+    {
+        return text.toString();
+    }
+}