git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@897875 13f79535-47bb-0310-9956-ffa450edef68tags/REL_3_7_BETA1
==================================================================== */ | ==================================================================== */ | ||||
package org.apache.poi.xslf.extractor; | package org.apache.poi.xslf.extractor; | ||||
import java.io.IOException; | |||||
import org.apache.poi.POIXMLTextExtractor; | import org.apache.poi.POIXMLTextExtractor; | ||||
import org.apache.poi.openxml4j.exceptions.OpenXML4JException; | import org.apache.poi.openxml4j.exceptions.OpenXML4JException; | ||||
import org.apache.poi.openxml4j.opc.OPCPackage; | import org.apache.poi.openxml4j.opc.OPCPackage; | ||||
import org.apache.poi.xslf.XSLFSlideShow; | import org.apache.poi.xslf.XSLFSlideShow; | ||||
import org.apache.poi.xslf.usermodel.DrawingParagraph; | |||||
import org.apache.poi.xslf.usermodel.XMLSlideShow; | import org.apache.poi.xslf.usermodel.XMLSlideShow; | ||||
import org.apache.poi.xslf.usermodel.XSLFCommonSlideData; | |||||
import org.apache.poi.xslf.usermodel.XSLFSlide; | import org.apache.poi.xslf.usermodel.XSLFSlide; | ||||
import org.apache.xmlbeans.XmlException; | import org.apache.xmlbeans.XmlException; | ||||
import org.apache.xmlbeans.XmlObject; | |||||
import org.apache.xmlbeans.XmlCursor; | |||||
import org.openxmlformats.schemas.drawingml.x2006.main.CTRegularTextRun; | |||||
import org.openxmlformats.schemas.drawingml.x2006.main.CTTextBody; | |||||
import org.openxmlformats.schemas.drawingml.x2006.main.CTTextParagraph; | |||||
import org.openxmlformats.schemas.drawingml.x2006.main.CTTextLineBreak; | |||||
import org.openxmlformats.schemas.presentationml.x2006.main.CTComment; | |||||
import org.openxmlformats.schemas.presentationml.x2006.main.CTCommentList; | |||||
import org.openxmlformats.schemas.presentationml.x2006.main.CTGroupShape; | |||||
import org.openxmlformats.schemas.presentationml.x2006.main.CTNotesSlide; | |||||
import org.openxmlformats.schemas.presentationml.x2006.main.CTShape; | |||||
import org.openxmlformats.schemas.presentationml.x2006.main.CTSlide; | |||||
import org.openxmlformats.schemas.presentationml.x2006.main.CTSlideIdListEntry; | |||||
import org.openxmlformats.schemas.presentationml.x2006.main.*; | |||||
import java.io.IOException; | |||||
public class XSLFPowerPointExtractor extends POIXMLTextExtractor { | public class XSLFPowerPointExtractor extends POIXMLTextExtractor { | ||||
private XMLSlideShow slideshow; | private XMLSlideShow slideshow; | ||||
slideshow._getXSLFSlideShow().getSlideComments(slideId); | slideshow._getXSLFSlideShow().getSlideComments(slideId); | ||||
if(slideText) { | if(slideText) { | ||||
extractText(rawSlide.getCSld().getSpTree(), text); | |||||
extractText(slides[i].getCommonSlideData(), text); | |||||
// Comments too for the slide | // Comments too for the slide | ||||
if(comments != null) { | if(comments != null) { | ||||
} | } | ||||
} | } | ||||
} | } | ||||
if(notesText && notes != null) { | if(notesText && notes != null) { | ||||
extractText(notes.getCSld().getSpTree(), text); | |||||
extractText(new XSLFCommonSlideData(notes.getCSld()), text); | |||||
} | } | ||||
} catch(Exception e) { | } catch(Exception e) { | ||||
throw new RuntimeException(e); | throw new RuntimeException(e); | ||||
return text.toString(); | return text.toString(); | ||||
} | } | ||||
private void extractText(CTGroupShape gs, StringBuffer text) { | |||||
CTShape[] shapes = gs.getSpArray(); | |||||
for (int i = 0; i < shapes.length; i++) { | |||||
CTTextBody textBody = | |||||
shapes[i].getTxBody(); | |||||
if(textBody != null) { | |||||
CTTextParagraph[] paras = | |||||
textBody.getPArray(); | |||||
for (int j = 0; j < paras.length; j++) { | |||||
XmlCursor c = paras[j].newCursor(); | |||||
c.selectPath("./*"); | |||||
while (c.toNextSelection()) { | |||||
XmlObject o = c.getObject(); | |||||
if(o instanceof CTRegularTextRun){ | |||||
CTRegularTextRun txrun = (CTRegularTextRun)o; | |||||
text.append( txrun.getT() ); | |||||
} else if (o instanceof CTTextLineBreak){ | |||||
text.append('\n'); | |||||
} | |||||
} | |||||
// End each paragraph with a new line | |||||
text.append("\n"); | |||||
} | |||||
} | |||||
} | |||||
} | |||||
private void extractText(XSLFCommonSlideData data, StringBuffer text) { | |||||
for (DrawingParagraph p : data.getText()) { | |||||
text.append(p.getText()); | |||||
text.append("\n"); | |||||
} | |||||
} | |||||
} | } |
package org.apache.poi.xslf.usermodel; | |||||
import org.openxmlformats.schemas.drawingml.x2006.main.CTTextParagraph; | |||||
import org.openxmlformats.schemas.drawingml.x2006.main.CTRegularTextRun; | |||||
import org.openxmlformats.schemas.drawingml.x2006.main.CTTextLineBreak; | |||||
import org.apache.xmlbeans.XmlCursor; | |||||
import org.apache.xmlbeans.XmlObject; | |||||
public class DrawingParagraph { | |||||
private final CTTextParagraph p; | |||||
public DrawingParagraph(CTTextParagraph p) { | |||||
this.p = p; | |||||
} | |||||
public CharSequence getText() { | |||||
StringBuilder text = new StringBuilder(); | |||||
XmlCursor c = p.newCursor(); | |||||
c.selectPath("./*"); | |||||
while (c.toNextSelection()) { | |||||
XmlObject o = c.getObject(); | |||||
if (o instanceof CTRegularTextRun) { | |||||
CTRegularTextRun txrun = (CTRegularTextRun) o; | |||||
text.append(txrun.getT()); | |||||
} else if (o instanceof CTTextLineBreak) { | |||||
text.append('\n'); | |||||
} | |||||
} | |||||
return text; | |||||
} | |||||
} |
package org.apache.poi.xslf.usermodel; | |||||
import org.openxmlformats.schemas.drawingml.x2006.main.CTTable; | |||||
import org.openxmlformats.schemas.drawingml.x2006.main.CTTableRow; | |||||
public class DrawingTable { | |||||
private final CTTable table; | |||||
public DrawingTable(CTTable table) { | |||||
this.table = table; | |||||
} | |||||
public DrawingTableRow[] getRows() { | |||||
CTTableRow[] ctTableRows = table.getTrArray(); | |||||
DrawingTableRow[] o = new DrawingTableRow[ctTableRows.length]; | |||||
for (int i=0; i<o.length; i++) { | |||||
o[i] = new DrawingTableRow(ctTableRows[i]); | |||||
} | |||||
return o; | |||||
} | |||||
} |
package org.apache.poi.xslf.usermodel; | |||||
import org.openxmlformats.schemas.drawingml.x2006.main.CTTableCell; | |||||
public class DrawingTableCell { | |||||
private final CTTableCell cell; | |||||
private final DrawingTextBody drawingTextBody; | |||||
public DrawingTableCell(CTTableCell cell) { | |||||
this.cell = cell; | |||||
drawingTextBody = new DrawingTextBody(this.cell.getTxBody()); | |||||
} | |||||
public DrawingTextBody getTextBody() { | |||||
return drawingTextBody; | |||||
} | |||||
} |
package org.apache.poi.xslf.usermodel; | |||||
import org.openxmlformats.schemas.drawingml.x2006.main.CTTableRow; | |||||
import org.openxmlformats.schemas.drawingml.x2006.main.CTTableCell; | |||||
public class DrawingTableRow { | |||||
private final CTTableRow row; | |||||
public DrawingTableRow(CTTableRow row) { | |||||
this.row = row; | |||||
} | |||||
public DrawingTableCell[] getCells() { | |||||
CTTableCell[] ctTableCells = row.getTcArray(); | |||||
DrawingTableCell[] o = new DrawingTableCell[ctTableCells.length]; | |||||
for (int i=0; i<o.length; i++) { | |||||
o[i] = new DrawingTableCell(ctTableCells[i]); | |||||
} | |||||
return o; | |||||
} | |||||
} |
package org.apache.poi.xslf.usermodel; | |||||
import org.openxmlformats.schemas.drawingml.x2006.main.CTTextBody; | |||||
import org.openxmlformats.schemas.drawingml.x2006.main.CTTextParagraph; | |||||
public class DrawingTextBody { | |||||
private final CTTextBody textBody; | |||||
public DrawingTextBody(CTTextBody textBody) { | |||||
this.textBody = textBody; | |||||
} | |||||
public DrawingParagraph[] getParagraphs() { | |||||
CTTextParagraph[] pArray = textBody.getPArray(); | |||||
DrawingParagraph[] o = new DrawingParagraph[pArray.length]; | |||||
for (int i=0; i<o.length; i++) { | |||||
o[i] = new DrawingParagraph(pArray[i]); | |||||
} | |||||
return o; | |||||
} | |||||
} |
package org.apache.poi.xslf.usermodel; | |||||
import org.apache.xmlbeans.XmlCursor; | |||||
import org.apache.xmlbeans.XmlObject; | |||||
import org.openxmlformats.schemas.drawingml.x2006.main.CTGraphicalObjectData; | |||||
import org.openxmlformats.schemas.drawingml.x2006.main.CTTable; | |||||
import org.openxmlformats.schemas.drawingml.x2006.main.CTTextBody; | |||||
import org.openxmlformats.schemas.presentationml.x2006.main.CTCommonSlideData; | |||||
import org.openxmlformats.schemas.presentationml.x2006.main.CTGraphicalObjectFrame; | |||||
import org.openxmlformats.schemas.presentationml.x2006.main.CTGroupShape; | |||||
import org.openxmlformats.schemas.presentationml.x2006.main.CTShape; | |||||
import java.util.ArrayList; | |||||
import java.util.Arrays; | |||||
import java.util.List; | |||||
public class XSLFCommonSlideData { | |||||
private final CTCommonSlideData data; | |||||
public XSLFCommonSlideData(CTCommonSlideData data) { | |||||
this.data = data; | |||||
} | |||||
public List<DrawingParagraph> getText() { | |||||
CTGroupShape gs = data.getSpTree(); | |||||
List<DrawingParagraph> out = new ArrayList<DrawingParagraph>(); | |||||
CTShape[] shapes = gs.getSpArray(); | |||||
for (int i = 0; i < shapes.length; i++) { | |||||
CTTextBody ctTextBody = shapes[i].getTxBody(); | |||||
if (ctTextBody==null) { | |||||
continue; | |||||
} | |||||
DrawingTextBody textBody = new DrawingTextBody(ctTextBody); | |||||
out.addAll(Arrays.asList(textBody.getParagraphs())); | |||||
} | |||||
CTGraphicalObjectFrame[] graphicFrames = gs.getGraphicFrameArray(); | |||||
for (CTGraphicalObjectFrame frame: graphicFrames) { | |||||
CTGraphicalObjectData data = frame.getGraphic().getGraphicData(); | |||||
XmlCursor c = data.newCursor(); | |||||
c.selectPath("./*"); | |||||
while (c.toNextSelection()) { | |||||
XmlObject o = c.getObject(); | |||||
if (o instanceof CTTable) { | |||||
DrawingTable table = new DrawingTable((CTTable) o); | |||||
for (DrawingTableRow row : table.getRows()) { | |||||
for (DrawingTableCell cell : row.getCells()) { | |||||
DrawingTextBody textBody = cell.getTextBody(); | |||||
out.addAll(Arrays.asList(textBody.getParagraphs())); | |||||
} | |||||
} | |||||
} | |||||
} | |||||
} | |||||
return out; | |||||
} | |||||
} |
public class XSLFSlide extends XSLFSheet implements Slide { | public class XSLFSlide extends XSLFSheet implements Slide { | ||||
private CTSlide slide; | private CTSlide slide; | ||||
private CTSlideIdListEntry slideId; | private CTSlideIdListEntry slideId; | ||||
private XSLFCommonSlideData data; | |||||
public XSLFSlide(CTSlide slide, CTSlideIdListEntry slideId, SlideShow parent) { | public XSLFSlide(CTSlide slide, CTSlideIdListEntry slideId, SlideShow parent) { | ||||
super(parent); | super(parent); | ||||
this.slide = slide; | this.slide = slide; | ||||
this.slideId = slideId; | this.slideId = slideId; | ||||
this.data = new XSLFCommonSlideData(slide.getCSld()); | |||||
} | } | ||||
/** | /** | ||||
// TODO Auto-generated method stub | // TODO Auto-generated method stub | ||||
} | } | ||||
public XSLFCommonSlideData getCommonSlideData() { | |||||
return data; | |||||
} | |||||
} | } |
// Check comments are there | // Check comments are there | ||||
assertTrue("Unable to find expected word in text\n" + text, text.contains("testdoc")); | assertTrue("Unable to find expected word in text\n" + text, text.contains("testdoc")); | ||||
} | } | ||||
public void testTable() throws Exception { | |||||
POIDataSamples slTests = POIDataSamples.getSlideShowInstance(); | |||||
xmlA = new XSLFSlideShow(OPCPackage.open(slTests.openResourceAsStream("present1.pptx"))); | |||||
XSLFPowerPointExtractor extractor = | |||||
new XSLFPowerPointExtractor(xmlA); | |||||
String text = extractor.getText(); | |||||
assertTrue(text.length() > 0); | |||||
// Check comments are there | |||||
assertTrue("Unable to find expected word in text\n" + text, text.contains("TEST")); | |||||
} | |||||
} | } |