git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@897875 13f79535-47bb-0310-9956-ffa450edef68tags/REL_3_7_BETA1
@@ -16,28 +16,18 @@ | |||
==================================================================== */ | |||
package org.apache.poi.xslf.extractor; | |||
import java.io.IOException; | |||
import org.apache.poi.POIXMLTextExtractor; | |||
import org.apache.poi.openxml4j.exceptions.OpenXML4JException; | |||
import org.apache.poi.openxml4j.opc.OPCPackage; | |||
import org.apache.poi.xslf.XSLFSlideShow; | |||
import org.apache.poi.xslf.usermodel.DrawingParagraph; | |||
import org.apache.poi.xslf.usermodel.XMLSlideShow; | |||
import org.apache.poi.xslf.usermodel.XSLFCommonSlideData; | |||
import org.apache.poi.xslf.usermodel.XSLFSlide; | |||
import org.apache.xmlbeans.XmlException; | |||
import org.apache.xmlbeans.XmlObject; | |||
import org.apache.xmlbeans.XmlCursor; | |||
import org.openxmlformats.schemas.drawingml.x2006.main.CTRegularTextRun; | |||
import org.openxmlformats.schemas.drawingml.x2006.main.CTTextBody; | |||
import org.openxmlformats.schemas.drawingml.x2006.main.CTTextParagraph; | |||
import org.openxmlformats.schemas.drawingml.x2006.main.CTTextLineBreak; | |||
import org.openxmlformats.schemas.presentationml.x2006.main.CTComment; | |||
import org.openxmlformats.schemas.presentationml.x2006.main.CTCommentList; | |||
import org.openxmlformats.schemas.presentationml.x2006.main.CTGroupShape; | |||
import org.openxmlformats.schemas.presentationml.x2006.main.CTNotesSlide; | |||
import org.openxmlformats.schemas.presentationml.x2006.main.CTShape; | |||
import org.openxmlformats.schemas.presentationml.x2006.main.CTSlide; | |||
import org.openxmlformats.schemas.presentationml.x2006.main.CTSlideIdListEntry; | |||
import org.openxmlformats.schemas.presentationml.x2006.main.*; | |||
import java.io.IOException; | |||
public class XSLFPowerPointExtractor extends POIXMLTextExtractor { | |||
private XMLSlideShow slideshow; | |||
@@ -110,7 +100,7 @@ public class XSLFPowerPointExtractor extends POIXMLTextExtractor { | |||
slideshow._getXSLFSlideShow().getSlideComments(slideId); | |||
if(slideText) { | |||
extractText(rawSlide.getCSld().getSpTree(), text); | |||
extractText(slides[i].getCommonSlideData(), text); | |||
// Comments too for the slide | |||
if(comments != null) { | |||
@@ -123,8 +113,9 @@ public class XSLFPowerPointExtractor extends POIXMLTextExtractor { | |||
} | |||
} | |||
} | |||
if(notesText && notes != null) { | |||
extractText(notes.getCSld().getSpTree(), text); | |||
extractText(new XSLFCommonSlideData(notes.getCSld()), text); | |||
} | |||
} catch(Exception e) { | |||
throw new RuntimeException(e); | |||
@@ -134,31 +125,10 @@ public class XSLFPowerPointExtractor extends POIXMLTextExtractor { | |||
return text.toString(); | |||
} | |||
private void extractText(CTGroupShape gs, StringBuffer text) { | |||
CTShape[] shapes = gs.getSpArray(); | |||
for (int i = 0; i < shapes.length; i++) { | |||
CTTextBody textBody = | |||
shapes[i].getTxBody(); | |||
if(textBody != null) { | |||
CTTextParagraph[] paras = | |||
textBody.getPArray(); | |||
for (int j = 0; j < paras.length; j++) { | |||
XmlCursor c = paras[j].newCursor(); | |||
c.selectPath("./*"); | |||
while (c.toNextSelection()) { | |||
XmlObject o = c.getObject(); | |||
if(o instanceof CTRegularTextRun){ | |||
CTRegularTextRun txrun = (CTRegularTextRun)o; | |||
text.append( txrun.getT() ); | |||
} else if (o instanceof CTTextLineBreak){ | |||
text.append('\n'); | |||
} | |||
} | |||
// End each paragraph with a new line | |||
text.append("\n"); | |||
} | |||
} | |||
} | |||
} | |||
private void extractText(XSLFCommonSlideData data, StringBuffer text) { | |||
for (DrawingParagraph p : data.getText()) { | |||
text.append(p.getText()); | |||
text.append("\n"); | |||
} | |||
} | |||
} |
@@ -0,0 +1,33 @@ | |||
package org.apache.poi.xslf.usermodel; | |||
import org.openxmlformats.schemas.drawingml.x2006.main.CTTextParagraph; | |||
import org.openxmlformats.schemas.drawingml.x2006.main.CTRegularTextRun; | |||
import org.openxmlformats.schemas.drawingml.x2006.main.CTTextLineBreak; | |||
import org.apache.xmlbeans.XmlCursor; | |||
import org.apache.xmlbeans.XmlObject; | |||
public class DrawingParagraph { | |||
private final CTTextParagraph p; | |||
public DrawingParagraph(CTTextParagraph p) { | |||
this.p = p; | |||
} | |||
public CharSequence getText() { | |||
StringBuilder text = new StringBuilder(); | |||
XmlCursor c = p.newCursor(); | |||
c.selectPath("./*"); | |||
while (c.toNextSelection()) { | |||
XmlObject o = c.getObject(); | |||
if (o instanceof CTRegularTextRun) { | |||
CTRegularTextRun txrun = (CTRegularTextRun) o; | |||
text.append(txrun.getT()); | |||
} else if (o instanceof CTTextLineBreak) { | |||
text.append('\n'); | |||
} | |||
} | |||
return text; | |||
} | |||
} |
@@ -0,0 +1,23 @@ | |||
package org.apache.poi.xslf.usermodel; | |||
import org.openxmlformats.schemas.drawingml.x2006.main.CTTable; | |||
import org.openxmlformats.schemas.drawingml.x2006.main.CTTableRow; | |||
public class DrawingTable { | |||
private final CTTable table; | |||
public DrawingTable(CTTable table) { | |||
this.table = table; | |||
} | |||
public DrawingTableRow[] getRows() { | |||
CTTableRow[] ctTableRows = table.getTrArray(); | |||
DrawingTableRow[] o = new DrawingTableRow[ctTableRows.length]; | |||
for (int i=0; i<o.length; i++) { | |||
o[i] = new DrawingTableRow(ctTableRows[i]); | |||
} | |||
return o; | |||
} | |||
} |
@@ -0,0 +1,17 @@ | |||
package org.apache.poi.xslf.usermodel; | |||
import org.openxmlformats.schemas.drawingml.x2006.main.CTTableCell; | |||
public class DrawingTableCell { | |||
private final CTTableCell cell; | |||
private final DrawingTextBody drawingTextBody; | |||
public DrawingTableCell(CTTableCell cell) { | |||
this.cell = cell; | |||
drawingTextBody = new DrawingTextBody(this.cell.getTxBody()); | |||
} | |||
public DrawingTextBody getTextBody() { | |||
return drawingTextBody; | |||
} | |||
} |
@@ -0,0 +1,23 @@ | |||
package org.apache.poi.xslf.usermodel; | |||
import org.openxmlformats.schemas.drawingml.x2006.main.CTTableRow; | |||
import org.openxmlformats.schemas.drawingml.x2006.main.CTTableCell; | |||
public class DrawingTableRow { | |||
private final CTTableRow row; | |||
public DrawingTableRow(CTTableRow row) { | |||
this.row = row; | |||
} | |||
public DrawingTableCell[] getCells() { | |||
CTTableCell[] ctTableCells = row.getTcArray(); | |||
DrawingTableCell[] o = new DrawingTableCell[ctTableCells.length]; | |||
for (int i=0; i<o.length; i++) { | |||
o[i] = new DrawingTableCell(ctTableCells[i]); | |||
} | |||
return o; | |||
} | |||
} |
@@ -0,0 +1,23 @@ | |||
package org.apache.poi.xslf.usermodel; | |||
import org.openxmlformats.schemas.drawingml.x2006.main.CTTextBody; | |||
import org.openxmlformats.schemas.drawingml.x2006.main.CTTextParagraph; | |||
public class DrawingTextBody { | |||
private final CTTextBody textBody; | |||
public DrawingTextBody(CTTextBody textBody) { | |||
this.textBody = textBody; | |||
} | |||
public DrawingParagraph[] getParagraphs() { | |||
CTTextParagraph[] pArray = textBody.getPArray(); | |||
DrawingParagraph[] o = new DrawingParagraph[pArray.length]; | |||
for (int i=0; i<o.length; i++) { | |||
o[i] = new DrawingParagraph(pArray[i]); | |||
} | |||
return o; | |||
} | |||
} |
@@ -0,0 +1,67 @@ | |||
package org.apache.poi.xslf.usermodel; | |||
import org.apache.xmlbeans.XmlCursor; | |||
import org.apache.xmlbeans.XmlObject; | |||
import org.openxmlformats.schemas.drawingml.x2006.main.CTGraphicalObjectData; | |||
import org.openxmlformats.schemas.drawingml.x2006.main.CTTable; | |||
import org.openxmlformats.schemas.drawingml.x2006.main.CTTextBody; | |||
import org.openxmlformats.schemas.presentationml.x2006.main.CTCommonSlideData; | |||
import org.openxmlformats.schemas.presentationml.x2006.main.CTGraphicalObjectFrame; | |||
import org.openxmlformats.schemas.presentationml.x2006.main.CTGroupShape; | |||
import org.openxmlformats.schemas.presentationml.x2006.main.CTShape; | |||
import java.util.ArrayList; | |||
import java.util.Arrays; | |||
import java.util.List; | |||
public class XSLFCommonSlideData { | |||
private final CTCommonSlideData data; | |||
public XSLFCommonSlideData(CTCommonSlideData data) { | |||
this.data = data; | |||
} | |||
public List<DrawingParagraph> getText() { | |||
CTGroupShape gs = data.getSpTree(); | |||
List<DrawingParagraph> out = new ArrayList<DrawingParagraph>(); | |||
CTShape[] shapes = gs.getSpArray(); | |||
for (int i = 0; i < shapes.length; i++) { | |||
CTTextBody ctTextBody = shapes[i].getTxBody(); | |||
if (ctTextBody==null) { | |||
continue; | |||
} | |||
DrawingTextBody textBody = new DrawingTextBody(ctTextBody); | |||
out.addAll(Arrays.asList(textBody.getParagraphs())); | |||
} | |||
CTGraphicalObjectFrame[] graphicFrames = gs.getGraphicFrameArray(); | |||
for (CTGraphicalObjectFrame frame: graphicFrames) { | |||
CTGraphicalObjectData data = frame.getGraphic().getGraphicData(); | |||
XmlCursor c = data.newCursor(); | |||
c.selectPath("./*"); | |||
while (c.toNextSelection()) { | |||
XmlObject o = c.getObject(); | |||
if (o instanceof CTTable) { | |||
DrawingTable table = new DrawingTable((CTTable) o); | |||
for (DrawingTableRow row : table.getRows()) { | |||
for (DrawingTableCell cell : row.getCells()) { | |||
DrawingTextBody textBody = cell.getTextBody(); | |||
out.addAll(Arrays.asList(textBody.getParagraphs())); | |||
} | |||
} | |||
} | |||
} | |||
} | |||
return out; | |||
} | |||
} |
@@ -26,11 +26,13 @@ import org.openxmlformats.schemas.presentationml.x2006.main.CTSlideIdListEntry; | |||
public class XSLFSlide extends XSLFSheet implements Slide { | |||
private CTSlide slide; | |||
private CTSlideIdListEntry slideId; | |||
private XSLFCommonSlideData data; | |||
public XSLFSlide(CTSlide slide, CTSlideIdListEntry slideId, SlideShow parent) { | |||
super(parent); | |||
this.slide = slide; | |||
this.slideId = slideId; | |||
this.data = new XSLFCommonSlideData(slide.getCSld()); | |||
} | |||
/** | |||
@@ -88,4 +90,8 @@ public class XSLFSlide extends XSLFSheet implements Slide { | |||
// TODO Auto-generated method stub | |||
} | |||
public XSLFCommonSlideData getCommonSlideData() { | |||
return data; | |||
} | |||
} |
@@ -113,4 +113,17 @@ public class TestXSLFPowerPointExtractor extends TestCase { | |||
// Check comments are there | |||
assertTrue("Unable to find expected word in text\n" + text, text.contains("testdoc")); | |||
} | |||
public void testTable() throws Exception { | |||
POIDataSamples slTests = POIDataSamples.getSlideShowInstance(); | |||
xmlA = new XSLFSlideShow(OPCPackage.open(slTests.openResourceAsStream("present1.pptx"))); | |||
XSLFPowerPointExtractor extractor = | |||
new XSLFPowerPointExtractor(xmlA); | |||
String text = extractor.getText(); | |||
assertTrue(text.length() > 0); | |||
// Check comments are there | |||
assertTrue("Unable to find expected word in text\n" + text, text.contains("TEST")); | |||
} | |||
} |