You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

SlideShowExtractor.java 10KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307
  1. package org.apache.poi.sl.extractor;
  2. import java.util.ArrayList;
  3. import java.util.List;
  4. import org.apache.poi.POITextExtractor;
  5. import org.apache.poi.sl.usermodel.Comment;
  6. import org.apache.poi.sl.usermodel.MasterSheet;
  7. import org.apache.poi.sl.usermodel.Notes;
  8. import org.apache.poi.sl.usermodel.ObjectShape;
  9. import org.apache.poi.sl.usermodel.Placeholder;
  10. import org.apache.poi.sl.usermodel.PlaceholderDetails;
  11. import org.apache.poi.sl.usermodel.Shape;
  12. import org.apache.poi.sl.usermodel.ShapeContainer;
  13. import org.apache.poi.sl.usermodel.Sheet;
  14. import org.apache.poi.sl.usermodel.Slide;
  15. import org.apache.poi.sl.usermodel.SlideShow;
  16. import org.apache.poi.sl.usermodel.TableCell;
  17. import org.apache.poi.sl.usermodel.TableShape;
  18. import org.apache.poi.sl.usermodel.TextParagraph;
  19. import org.apache.poi.sl.usermodel.TextRun;
  20. import org.apache.poi.sl.usermodel.TextShape;
  21. import org.apache.poi.util.LocaleUtil;
  22. import org.apache.poi.util.POILogFactory;
  23. import org.apache.poi.util.POILogger;
  24. /**
  25. * Common SlideShow extractor
  26. *
  27. * @since POI 4.0.0
  28. */
  29. public class SlideShowExtractor<
  30. S extends Shape<S,P>,
  31. P extends TextParagraph<S,P,? extends TextRun>
  32. > extends POITextExtractor {
  33. private static final POILogger LOG = POILogFactory.getLogger(SlideShowExtractor.class);
  34. private SlideShow<S,P> slideshow;
  35. private boolean slidesByDefault = true;
  36. private boolean notesByDefault;
  37. private boolean commentsByDefault;
  38. private boolean masterByDefault;
  39. public SlideShowExtractor(final SlideShow<S,P> slideshow) {
  40. setFilesystem(slideshow);
  41. this.slideshow = slideshow;
  42. }
  43. /**
  44. * Should a call to getText() return slide text? Default is yes
  45. */
  46. public void setSlidesByDefault(final boolean slidesByDefault) {
  47. this.slidesByDefault = slidesByDefault;
  48. }
  49. /**
  50. * Should a call to getText() return notes text? Default is no
  51. */
  52. public void setNotesByDefault(final boolean notesByDefault) {
  53. this.notesByDefault = notesByDefault;
  54. }
  55. /**
  56. * Should a call to getText() return comments text? Default is no
  57. */
  58. public void setCommentsByDefault(final boolean commentsByDefault) {
  59. this.commentsByDefault = commentsByDefault;
  60. }
  61. /**
  62. * Should a call to getText() return text from master? Default is no
  63. */
  64. public void setMasterByDefault(final boolean masterByDefault) {
  65. this.masterByDefault = masterByDefault;
  66. }
  67. @Override
  68. public POITextExtractor getMetadataTextExtractor() {
  69. return slideshow.getMetadataTextExtractor();
  70. }
  71. /**
  72. * Fetches all the slide text from the slideshow, but not the notes, unless
  73. * you've called setSlidesByDefault() and setNotesByDefault() to change this
  74. */
  75. @Override
  76. public String getText() {
  77. final StringBuilder sb = new StringBuilder();
  78. if (masterByDefault) {
  79. for (final MasterSheet<S,P> master : slideshow.getSlideMasters()) {
  80. for (final Shape<S,P> shape : master) {
  81. if (shape instanceof TextShape) {
  82. final TextShape<S,P> ts = (TextShape<S,P>)shape;
  83. final String text = ts.getText();
  84. if (text == null || text.isEmpty() || "*".equals(text)) {
  85. continue;
  86. }
  87. if (ts.isPlaceholder()) {
  88. // don't bother about boiler plate text on master sheets
  89. LOG.log(POILogger.INFO, "Ignoring boiler plate (placeholder) text on slide master:", text);
  90. continue;
  91. }
  92. sb.append(text);
  93. if (!text.endsWith("\n")) {
  94. sb.append("\n");
  95. }
  96. }
  97. }
  98. }
  99. }
  100. for (final Slide<S, P> slide : slideshow.getSlides()) {
  101. sb.append(getText(slide));
  102. }
  103. return sb.toString();
  104. }
  105. public String getText(final Slide<S,P> slide) {
  106. final StringBuilder sb = new StringBuilder();
  107. if (slidesByDefault) {
  108. printShapeText(slide, sb);
  109. }
  110. if (commentsByDefault) {
  111. printComments(slide, sb);
  112. }
  113. if (notesByDefault) {
  114. printNotes(slide, sb);
  115. }
  116. return sb.toString();
  117. }
  118. private String printHeaderReturnFooter(final Sheet<S,P> sheet, final StringBuilder sb) {
  119. final Sheet<S, P> m = (sheet instanceof Slide) ? sheet.getMasterSheet() : sheet;
  120. final StringBuilder footer = new StringBuilder("\n");
  121. addSheetPlaceholderDatails(sheet, Placeholder.HEADER, sb);
  122. addSheetPlaceholderDatails(sheet, Placeholder.FOOTER, footer);
  123. if (masterByDefault) {
  124. // write header texts and determine footer text
  125. for (Shape<S, P> s : m) {
  126. if (!(s instanceof TextShape)) {
  127. continue;
  128. }
  129. final TextShape<S, P> ts = (TextShape<S, P>) s;
  130. final PlaceholderDetails pd = ts.getPlaceholderDetails();
  131. if (pd == null || !pd.isVisible()) {
  132. continue;
  133. }
  134. switch (pd.getPlaceholder()) {
  135. case HEADER:
  136. sb.append(ts.getText());
  137. sb.append('\n');
  138. break;
  139. case SLIDE_NUMBER:
  140. if (sheet instanceof Slide) {
  141. footer.append(ts.getText().replace("‹#›", Integer.toString(((Slide<S, P>) sheet).getSlideNumber() + 1)));
  142. footer.append('\n');
  143. }
  144. break;
  145. case FOOTER:
  146. footer.append(ts.getText());
  147. footer.append('\n');
  148. break;
  149. case DATETIME:
  150. // currently not supported
  151. default:
  152. break;
  153. }
  154. }
  155. }
  156. return (footer.length() > 1) ? footer.toString() : "";
  157. }
  158. private void addSheetPlaceholderDatails(final Sheet<S,P> sheet, final Placeholder placeholder, final StringBuilder sb) {
  159. final PlaceholderDetails headerPD = sheet.getPlaceholderDetails(placeholder);
  160. if (headerPD == null) {
  161. return;
  162. }
  163. final String headerStr = headerPD.getText();
  164. if (headerStr == null) {
  165. return;
  166. }
  167. sb.append(headerStr);
  168. }
  169. private void printShapeText(final Sheet<S,P> sheet, final StringBuilder sb) {
  170. final String footer = printHeaderReturnFooter(sheet, sb);
  171. printShapeText((ShapeContainer<S,P>)sheet, sb);
  172. sb.append(footer);
  173. }
  174. @SuppressWarnings("unchecked")
  175. private void printShapeText(final ShapeContainer<S,P> container, final StringBuilder sb) {
  176. for (Shape<S,P> shape : container) {
  177. if (shape instanceof TextShape) {
  178. printShapeText((TextShape<S,P>)shape, sb);
  179. } else if (shape instanceof TableShape) {
  180. printShapeText((TableShape<S,P>)shape, sb);
  181. } else if (shape instanceof ShapeContainer) {
  182. printShapeText((ShapeContainer<S,P>)shape, sb);
  183. }
  184. }
  185. }
  186. private void printShapeText(final TextShape<S,P> shape, final StringBuilder sb) {
  187. final List<P> paraList = shape.getTextParagraphs();
  188. if (paraList.isEmpty()) {
  189. sb.append('\n');
  190. return;
  191. }
  192. for (final P para : paraList) {
  193. final int oldLen = sb.length();
  194. for (final TextRun tr : para) {
  195. final String str = tr.getRawText().replace("\r", "");
  196. final String newStr;
  197. switch (tr.getTextCap()) {
  198. case ALL:
  199. newStr = str.toUpperCase(LocaleUtil.getUserLocale());
  200. break;
  201. case SMALL:
  202. newStr = str.toLowerCase(LocaleUtil.getUserLocale());
  203. break;
  204. default:
  205. case NONE:
  206. newStr = str;
  207. break;
  208. }
  209. sb.append(newStr);
  210. }
  211. sb.append('\n');
  212. }
  213. }
  214. @SuppressWarnings("Duplicates")
  215. private void printShapeText(final TableShape<S,P> shape, final StringBuilder sb) {
  216. final int nrows = shape.getNumberOfRows();
  217. final int ncols = shape.getNumberOfColumns();
  218. for (int row = 0; row < nrows; row++){
  219. for (int col = 0; col < ncols; col++){
  220. TableCell<S, P> cell = shape.getCell(row, col);
  221. //defensive null checks; don't know if they're necessary
  222. if (cell != null){
  223. String txt = cell.getText();
  224. txt = (txt == null) ? "" : txt;
  225. sb.append(txt);
  226. if (col < ncols-1){
  227. sb.append('\t');
  228. }
  229. }
  230. }
  231. sb.append('\n');
  232. }
  233. }
  234. private void printComments(final Slide<S,P> slide, final StringBuilder sb) {
  235. for (final Comment comment : slide.getComments()) {
  236. sb.append(comment.getAuthor());
  237. sb.append(" - ");
  238. sb.append(comment.getText());
  239. sb.append("\n");
  240. }
  241. }
  242. private void printNotes(final Slide<S,P> slide, final StringBuilder sb) {
  243. final Notes<S, P> notes = slide.getNotes();
  244. if (notes == null) {
  245. return;
  246. }
  247. final String footer = printHeaderReturnFooter(notes, sb);
  248. printShapeText(notes, sb);
  249. sb.append(footer);
  250. }
  251. public List<? extends ObjectShape<S,P>> getOLEShapes() {
  252. final List<ObjectShape<S,P>> oleShapes = new ArrayList<>();
  253. for (final Slide<S,P> slide : slideshow.getSlides()) {
  254. addOLEShapes(oleShapes, slide);
  255. }
  256. return oleShapes;
  257. }
  258. @SuppressWarnings("unchecked")
  259. private void addOLEShapes(final List<ObjectShape<S,P>> oleShapes, ShapeContainer<S,P> container) {
  260. for (Shape<S,P> shape : container) {
  261. if (shape instanceof ShapeContainer) {
  262. addOLEShapes(oleShapes, (ShapeContainer<S,P>)shape);
  263. } else if (shape instanceof ObjectShape) {
  264. oleShapes.add((ObjectShape<S,P>)shape);
  265. }
  266. }
  267. }
  268. }