import java.io.ByteArrayOutputStream;\r
import java.io.IOException;\r
import java.io.InputStream;\r
+import java.net.URL;\r
import java.util.zip.ZipInputStream;\r
\r
import org.apache.poi.POIDataSamples;\r
}\r
}\r
\r
+ /**\r
+ * Open a remote sample from URL. opening is performd in two phases:\r
+ * (1) download content into a byte array\r
+ * (2) construct HWPFDocument\r
+ *\r
+ * @param sampleFileUrl the url to open\r
+ */\r
+ public static HWPFDocument openRemoteFile( String sampleFileUrl )\r
+ {\r
+ final long start = System.currentTimeMillis();\r
+ try\r
+ {\r
+ InputStream is = new URL( sampleFileUrl ).openStream();\r
+ try\r
+ {\r
+ ByteArrayOutputStream baos = new ByteArrayOutputStream();\r
+ try\r
+ {\r
+ IOUtils.copy( is, baos );\r
+ }\r
+ finally\r
+ {\r
+ baos.close();\r
+ }\r
+\r
+ final long endDownload = System.currentTimeMillis();\r
+ byte[] byteArray = baos.toByteArray();\r
+\r
+ logger.log( POILogger.DEBUG, "Downloaded in ",\r
+ Long.valueOf( endDownload - start ), " ms -- ",\r
+ Long.valueOf( byteArray.length ), " byte(s)" );\r
+\r
+ ByteArrayInputStream bais = new ByteArrayInputStream( byteArray );\r
+ HWPFDocument doc = new HWPFDocument( bais );\r
+ final long endParse = System.currentTimeMillis();\r
+\r
+ logger.log( POILogger.DEBUG, "Parsed in ",\r
+ Long.valueOf( endParse - start ), " ms" );\r
+\r
+ return doc;\r
+ }\r
+ finally\r
+ {\r
+ is.close();\r
+ }\r
+ }\r
+ catch ( IOException e )\r
+ {\r
+ throw new RuntimeException( e );\r
+ }\r
+ }\r
+\r
public static HWPFOldDocument openOldSampleFile(String sampleFileName) {\r
try {\r
InputStream is = POIDataSamples.getDocumentInstance().openResourceAsStream(sampleFileName);\r
}
}
- /**
- * Bug 51524 - PapBinTable constructor is slow
- */
- public void test51524()
- {
- HWPFTestDataSamples.openSampleFileFromArchive( "Bug51524.zip" );
- }
-
/**
* [RESOLVED FIXED] Bug 51604 - replace text fails for doc ( poi 3.8 beta
* release from download site )
}
+
/**
* Bug 51678 - Extracting text from Bug51524.zip is slow
+ * Bug 51524 - PapBinTable constructor is slow
*/
- public void test51678()
+ public void test51678And51524()
{
- HWPFDocument hwpfDocument = HWPFTestDataSamples.openSampleFileFromArchive( "Bug51524.zip" );
- WordExtractor wordExtractor = new WordExtractor( hwpfDocument );
- wordExtractor.getText();
+ // YK: the test will run only if the poi.test.remote system property is set.
+ // TODO: refactor into something nicer!
+ if(System.getProperty("poi.test.remote") != null) {
+ String href = "http://domex.nps.edu/corp/files/govdocs1/007/007488.doc";
+ HWPFDocument hwpfDocument = HWPFTestDataSamples.openRemoteFile( href );
+
+ WordExtractor wordExtractor = new WordExtractor( hwpfDocument );
+ wordExtractor.getText();
+ }
+
}
}