You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

XDGFVisioExtractor.java 1.5KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051
  1. package org.apache.poi.xdgf.extractor;
  2. import java.io.IOException;
  3. import org.apache.poi.POIXMLDocument;
  4. import org.apache.poi.POIXMLTextExtractor;
  5. import org.apache.poi.openxml4j.opc.OPCPackage;
  6. import org.apache.poi.xdgf.usermodel.XDGFPage;
  7. import org.apache.poi.xdgf.usermodel.XmlVisioDocument;
  8. import org.apache.poi.xdgf.usermodel.shape.ShapeTextVisitor;
  9. /**
  10. * Helper class to extract text from an OOXML Visio File
  11. */
  12. public class XDGFVisioExtractor extends POIXMLTextExtractor {
  13. protected final XmlVisioDocument document;
  14. public XDGFVisioExtractor(XmlVisioDocument document) {
  15. super(document);
  16. this.document = document;
  17. }
  18. public XDGFVisioExtractor(OPCPackage openPackage) throws IOException {
  19. this(new XmlVisioDocument(openPackage));
  20. }
  21. public String getText() {
  22. ShapeTextVisitor visitor = new ShapeTextVisitor();
  23. for (XDGFPage page: document.getPages()) {
  24. page.getContent().visitShapes(visitor);
  25. }
  26. return visitor.getText().toString();
  27. }
  28. public static void main(String [] args) throws IOException {
  29. if (args.length < 1) {
  30. System.err.println("Use:");
  31. System.err.println(" XDGFVisioExtractor <filename.vsdx>");
  32. System.exit(1);
  33. }
  34. POIXMLTextExtractor extractor =
  35. new XDGFVisioExtractor(POIXMLDocument.openPackage(
  36. args[0]
  37. ));
  38. System.out.println(extractor.getText());
  39. extractor.close();
  40. }
  41. }