import java.io.InputStream;\r
import java.util.HashSet;\r
import java.util.Set;\r
+import java.util.zip.ZipException;\r
\r
import org.apache.poi.POIOLE2TextExtractor;\r
import org.apache.poi.POITextExtractor;\r
+import org.apache.poi.dev.OOXMLPrettyPrint;\r
import org.apache.poi.extractor.ExtractorFactory;\r
import org.apache.poi.hpsf.extractor.HPSFPropertiesExtractor;\r
import org.apache.poi.openxml4j.exceptions.InvalidFormatException;\r
} finally {\r
ExtractorFactory.setThreadPrefersEventExtractors(before);\r
}\r
+ \r
+ /* Did fail for some documents with special XML contents...\r
+ try {\r
+ OOXMLPrettyPrint.main(new String[] { file.getAbsolutePath(), \r
+ "/tmp/pretty-" + file.getName() });\r
+ } catch (ZipException e) {\r
+ // ignore, not a Zip/OOXML file\r
+ }*/\r
}\r
\r
private void handleExtractingInternal(File file) throws Exception {\r
\r
import java.io.BufferedOutputStream;\r
import java.io.File;\r
-import java.io.FileNotFoundException;\r
import java.io.FileOutputStream;\r
import java.io.IOException;\r
import java.io.OutputStream;\r
import javax.xml.transform.dom.DOMSource;\r
import javax.xml.transform.stream.StreamResult;\r
\r
+import org.apache.poi.util.IOUtils;\r
import org.w3c.dom.Document;\r
import org.xml.sax.InputSource;\r
-import org.xml.sax.SAXException;\r
\r
/**\r
* Reads a zipped OOXML file and produces a copy with the included \r
}\r
\r
private static void handleFile(File file, File outFile) throws ZipException,\r
- IOException, FileNotFoundException, SAXException,\r
- TransformerException, ParserConfigurationException {\r
+ IOException, TransformerException, ParserConfigurationException {\r
System.out.println("Reading zip-file " + file + " and writing pretty-printed XML to " + outFile);\r
\r
ZipFile zipFile = new ZipFile(file);\r
}\r
}\r
\r
- private void handle(ZipFile file, ZipOutputStream out) throws SAXException, IOException, TransformerException {\r
+ private void handle(ZipFile file, ZipOutputStream out) throws IOException, TransformerException {\r
Enumeration<? extends ZipEntry> entries = file.entries();\r
while(entries.hasMoreElements()) {\r
ZipEntry entry = entries.nextElement();\r
\r
- out.putNextEntry(new ZipEntry(entry.getName()));\r
+ String name = entry.getName();\r
+ out.putNextEntry(new ZipEntry(name));\r
try {\r
- Document document = documentBuilder.parse(new InputSource(file.getInputStream(entry)));\r
- pretty(document, out, 2);\r
+ if(name.endsWith(".xml") || name.endsWith(".rels")) {\r
+ Document document = documentBuilder.parse(new InputSource(file.getInputStream(entry)));\r
+ pretty(document, out, 2);\r
+ } else {\r
+ System.out.println("Not pretty-printing non-XML file " + name);\r
+ IOUtils.copy(file.getInputStream(entry), out);\r
+ }\r
+ } catch (Exception e) {\r
+ throw new IOException("While handling entry " + name, e);\r
} finally {\r
out.closeEntry();\r
}\r