git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@809662 13f79535-47bb-0310-9956-ffa450edef68

14 years ago · 896840bb82
--- a/src/documentation/content/xdocs/status.xml
+++ b/src/documentation/content/xdocs/status.xml
@@ -33,7 +33,8 @@

    <changes>
        <release version="3.5-beta7" date="2009-??-??">
           <action dev="POI-DEVELOPERS" type="fix">47773 - Support for extraction of header / footer images in HWPF</action>
           <action dev="POI-DEVELOPERS" type="fix">47773 - Fix for extraction paragraphs and sections from headers/footers with XWPFWordExtractor</action>
           <action dev="POI-DEVELOPERS" type="fix">47727 - Support for extraction of header / footer images in HWPF</action>
           <action dev="POI-DEVELOPERS" type="fix">moved all test data to a top-level directory</action>
           <action dev="POI-DEVELOPERS" type="add">47721 - Added implementation for INDIRECT()</action>
           <action dev="POI-DEVELOPERS" type="add">45583 - Avoid exception when reading ClipboardData packet in OLE property sets</action>
--- a/src/ooxml/java/org/apache/poi/xwpf/extractor/XWPFWordExtractor.java
+++ b/src/ooxml/java/org/apache/poi/xwpf/extractor/XWPFWordExtractor.java
@@ -21,6 +21,7 @@ import java.util.Iterator;

 import org.apache.poi.POIXMLDocument;
 import org.apache.poi.POIXMLTextExtractor;
 import org.apache.poi.POIXMLException;
 import org.apache.poi.openxml4j.exceptions.OpenXML4JException;
 import org.apache.poi.openxml4j.opc.OPCPackage;
 import org.apache.poi.xwpf.model.XWPFCommentsDecorator;
@@ -31,6 +32,7 @@ import org.apache.poi.xwpf.usermodel.XWPFDocument;
 import org.apache.poi.xwpf.usermodel.XWPFParagraph;
 import org.apache.poi.xwpf.usermodel.XWPFTable;
 import org.apache.xmlbeans.XmlException;
 import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTSectPr;

 /**
 * Helper class to extract text from an OOXML Word file
@@ -72,45 +74,77 @@ public class XWPFWordExtractor extends POIXMLTextExtractor {
 	public String getText() {
 		StringBuffer text = new StringBuffer();
 		XWPFHeaderFooterPolicy hfPolicy = document.getHeaderFooterPolicy();
 		

 		// Start out with all headers
 		// TODO - put them in where they're needed
 		if(hfPolicy.getFirstPageHeader() != null) {
 			text.append( hfPolicy.getFirstPageHeader().getText() );
 		}
 		if(hfPolicy.getEvenPageHeader() != null) {
 			text.append( hfPolicy.getEvenPageHeader().getText() );
 		}
 		if(hfPolicy.getDefaultHeader() != null) {
 			text.append( hfPolicy.getDefaultHeader().getText() );
 		}
                extractHeaders(text, hfPolicy);
 		
 		// First up, all our paragraph based text
 		Iterator<XWPFParagraph> i = document.getParagraphsIterator();
 		while(i.hasNext()) {
 			XWPFParagraphDecorator decorator = new XWPFCommentsDecorator(
 					new XWPFHyperlinkDecorator(i.next(), null, fetchHyperlinks));
 			text.append(decorator.getText()+"\n");
 		}
                        XWPFParagraph paragraph = i.next();


                        try {
                                CTSectPr ctSectPr = null;
                                if (paragraph.getCTP().getPPr()!=null) {
                                        ctSectPr = paragraph.getCTP().getPPr().getSectPr();
                                }

                                XWPFHeaderFooterPolicy headerFooterPolicy = null;

                                if (ctSectPr!=null) {
                                        headerFooterPolicy = new XWPFHeaderFooterPolicy(document, ctSectPr);

                                        extractHeaders(text, headerFooterPolicy);
                                }

                                XWPFParagraphDecorator decorator = new XWPFCommentsDecorator(
                                                new XWPFHyperlinkDecorator(paragraph, null, fetchHyperlinks));
                                text.append(decorator.getText()).append('\n');

                                if (ctSectPr!=null) {
                                        extractFooters(text, headerFooterPolicy);
                                }
                        } catch (IOException e) {
                                throw new POIXMLException(e);
                        } catch (XmlException e) {
                                throw new POIXMLException(e);
                        }
                }

 		// Then our table based text
 		Iterator<XWPFTable> j = document.getTablesIterator();
 		while(j.hasNext()) {
 			text.append(j.next().getText()+"\n");
                        text.append(j.next().getText()).append('\n');
 		}
 		
 		// Finish up with all the footers
 		// TODO - put them in where they're needed
 		if(hfPolicy.getFirstPageFooter() != null) {
 			text.append( hfPolicy.getFirstPageFooter().getText() );
 		}
 		if(hfPolicy.getEvenPageFooter() != null) {
 			text.append( hfPolicy.getEvenPageFooter().getText() );
 		}
 		if(hfPolicy.getDefaultFooter() != null) {
 			text.append( hfPolicy.getDefaultFooter().getText() );
 		}
                extractFooters(text, hfPolicy);
 		
 		return text.toString();
 	}

        private void extractFooters(StringBuffer text, XWPFHeaderFooterPolicy hfPolicy) {
                if(hfPolicy.getFirstPageFooter() != null) {
                        text.append( hfPolicy.getFirstPageFooter().getText() );
                }
                if(hfPolicy.getEvenPageFooter() != null) {
                        text.append( hfPolicy.getEvenPageFooter().getText() );
                }
                if(hfPolicy.getDefaultFooter() != null) {
                        text.append( hfPolicy.getDefaultFooter().getText() );
                }
        }

        private void extractHeaders(StringBuffer text, XWPFHeaderFooterPolicy hfPolicy) {
                if(hfPolicy.getFirstPageHeader() != null) {
                        text.append( hfPolicy.getFirstPageHeader().getText() );
                }
                if(hfPolicy.getEvenPageHeader() != null) {
                        text.append( hfPolicy.getEvenPageHeader().getText() );
                }
                if(hfPolicy.getDefaultHeader() != null) {
                        text.append( hfPolicy.getDefaultHeader().getText() );
                }
        }
 }
--- a/src/ooxml/java/org/apache/poi/xwpf/model/XWPFHeaderFooterPolicy.java
+++ b/src/ooxml/java/org/apache/poi/xwpf/model/XWPFHeaderFooterPolicy.java
@@ -83,19 +83,26 @@ public class XWPFHeaderFooterPolicy {
 	private XWPFHeader defaultHeader;
 	private XWPFFooter defaultFooter;
 	
 	
        /**
         * Figures out the policy for the given document,
         *  and creates any header and footer objects
         *  as required.
         */
        public XWPFHeaderFooterPolicy(XWPFDocument doc) throws IOException, XmlException {
                this(doc, doc.getDocument().getBody().getSectPr());
        }

 	/**
 	 * Figures out the policy for the given document,
 	 *  and creates any header and footer objects
 	 *  as required.
 	 */
 	public XWPFHeaderFooterPolicy(XWPFDocument doc) throws IOException, XmlException {
 	public XWPFHeaderFooterPolicy(XWPFDocument doc, CTSectPr sectPr) throws IOException, XmlException {
 		// Grab what headers and footers have been defined
 		// For now, we don't care about different ranges, as it
 		//  doesn't seem that .docx properly supports that
 		//  feature of the file format yet
 		this.doc = doc;
 		CTSectPr sectPr = doc.getDocument().getBody().getSectPr();
 		for(int i=0; i<sectPr.sizeOfHeaderReferenceArray(); i++) {
 			// Get the header
 			CTHdrFtrRef ref = sectPr.getHeaderReferenceArray(i);
--- a/src/ooxml/testcases/org/apache/poi/xwpf/extractor/TestXWPFWordExtractor.java
+++ b/src/ooxml/testcases/org/apache/poi/xwpf/extractor/TestXWPFWordExtractor.java
@@ -198,4 +198,13 @@ public class TestXWPFWordExtractor extends TestCase {
        assertTrue(extractor.getText().contains("extremely well"));
    }

    public void testParagraphHeader() {
        XWPFDocument doc = XWPFTestDataSamples.openSampleDocument("Headers.docx");
        XWPFWordExtractor extractor = new XWPFWordExtractor(doc);

        assertTrue(extractor.getText().contains("Section 1"));
        assertTrue(extractor.getText().contains("Section 2"));
        assertTrue(extractor.getText().contains("Section 3"));
    }

 }
--- a/test-data/document/Headers.docx
+++ b/test-data/document/Headers.docx