You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

DataExtraction.java 3.8KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101
  1. /*
  2. * ====================================================================
  3. * Licensed to the Apache Software Foundation (ASF) under one or more
  4. * contributor license agreements. See the NOTICE file distributed with
  5. * this work for additional information regarding copyright ownership.
  6. * The ASF licenses this file to You under the Apache License, Version 2.0
  7. * (the "License"); you may not use this file except in compliance with
  8. * the License. You may obtain a copy of the License at
  9. *
  10. * http://www.apache.org/licenses/LICENSE-2.0
  11. *
  12. * Unless required by applicable law or agreed to in writing, software
  13. * distributed under the License is distributed on an "AS IS" BASIS,
  14. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  15. * See the License for the specific language governing permissions and
  16. * limitations under the License.
  17. * ====================================================================
  18. */
  19. package org.apache.poi.examples.xslf;
  20. import java.awt.Dimension;
  21. import java.io.FileInputStream;
  22. import java.io.IOException;
  23. import java.io.InputStream;
  24. import java.io.PrintStream;
  25. import org.apache.poi.openxml4j.opc.PackagePart;
  26. import org.apache.poi.xslf.usermodel.XMLSlideShow;
  27. import org.apache.poi.xslf.usermodel.XSLFPictureData;
  28. import org.apache.poi.xslf.usermodel.XSLFPictureShape;
  29. import org.apache.poi.xslf.usermodel.XSLFShape;
  30. import org.apache.poi.xslf.usermodel.XSLFSlide;
  31. import org.apache.poi.xslf.usermodel.XSLFTextShape;
  32. /**
  33. * Demonstrates how you can extract data from a .pptx file
  34. */
  35. @SuppressWarnings({"java:S106","java:S4823","java:S1192"})
  36. public final class DataExtraction {
  37. private DataExtraction() {}
  38. public static void main(String[] args) throws IOException {
  39. PrintStream out = System.out;
  40. if (args.length == 0) {
  41. out.println("Input file is required");
  42. return;
  43. }
  44. FileInputStream is = new FileInputStream(args[0]);
  45. try (XMLSlideShow ppt = new XMLSlideShow(is)) {
  46. is.close();
  47. // Get the document's embedded files.
  48. for (PackagePart p : ppt.getAllEmbeddedParts()) {
  49. String type = p.getContentType();
  50. // typically file name
  51. String name = p.getPartName().getName();
  52. out.println("Embedded file (" + type + "): " + name);
  53. InputStream pIs = p.getInputStream();
  54. // make sense of the part data
  55. pIs.close();
  56. }
  57. // Get the document's embedded files.
  58. for (XSLFPictureData data : ppt.getPictureData()) {
  59. String type = data.getContentType();
  60. String name = data.getFileName();
  61. out.println("Picture (" + type + "): " + name);
  62. InputStream pIs = data.getInputStream();
  63. // make sense of the image data
  64. pIs.close();
  65. }
  66. // size of the canvas in points
  67. Dimension pageSize = ppt.getPageSize();
  68. out.println("Pagesize: " + pageSize);
  69. for (XSLFSlide slide : ppt.getSlides()) {
  70. for (XSLFShape shape : slide) {
  71. if (shape instanceof XSLFTextShape) {
  72. XSLFTextShape txShape = (XSLFTextShape) shape;
  73. out.println(txShape.getText());
  74. } else if (shape instanceof XSLFPictureShape) {
  75. XSLFPictureShape pShape = (XSLFPictureShape) shape;
  76. XSLFPictureData pData = pShape.getPictureData();
  77. out.println(pData.getFileName());
  78. } else {
  79. out.println("Process me: " + shape.getClass());
  80. }
  81. }
  82. }
  83. }
  84. }
  85. }