import org.apache.poi.hssf.record.*;
import org.apache.poi.poifs.filesystem.DirectoryNode;
import org.apache.poi.poifs.filesystem.POIFSFileSystem;
+import static org.apache.poi.hssf.model.InternalWorkbook.WORKBOOK_DIR_ENTRY_NAMES;
/**
* Low level event based HSSF reader. Pass either a DocumentInputStream to
*/
public void processWorkbookEvents(HSSFRequest req, DirectoryNode dir) throws IOException {
// some old documents have "WORKBOOK" or "BOOK"
- final String name;
+ String name = null;
Set<String> entryNames = dir.getEntryNames();
- if (entryNames.contains("Workbook")) {
- name = "Workbook";
- } else if (entryNames.contains("WORKBOOK")) {
- name = "WORKBOOK";
- } else if (entryNames.contains("BOOK")) {
- name = "BOOK";
- } else {
- name = "Workbook";
+ for (String potentialName : WORKBOOK_DIR_ENTRY_NAMES) {
+ if (entryNames.contains(potentialName)) {
+ name = potentialName;
+ break;
+ }
+ }
+ // If in doubt, go for the default
+ if (name == null) {
+ name = WORKBOOK_DIR_ENTRY_NAMES[0];
}
InputStream in = dir.createDocumentInputStream(name);
-
processEvents(req, in);
}
*/
private static final int MAX_SENSITIVE_SHEET_NAME_LEN = 31;
+ /**
+ * Normally, the Workbook will be in a POIFS Stream called
+ * "Workbook". However, some weird XLS generators use "WORKBOOK"
+ * or "BOOK".
+ */
+ public static final String[] WORKBOOK_DIR_ENTRY_NAMES = {
+ "Workbook", // as per BIFF8 spec
+ "WORKBOOK", // Typically from third party programs
+ "BOOK", // Typically odd Crystal Reports exports
+ };
private static final POILogger log = POILogFactory.getLogger(InternalWorkbook.class);
private static final int DEBUG = POILogger.DEBUG;
package org.apache.poi.hssf.usermodel;
+import static org.apache.poi.hssf.model.InternalWorkbook.WORKBOOK_DIR_ENTRY_NAMES;
+
import java.io.ByteArrayInputStream;
import java.io.ByteArrayOutputStream;
import java.io.FileNotFoundException;
import org.apache.poi.util.POILogFactory;
import org.apache.poi.util.POILogger;
-
/**
* High level representation of a workbook. This is the first object most users
* will construct whether they are reading or writing a workbook. It is also the
this(fs.getRoot(), fs, preserveNodes);
}
- /**
- * Normally, the Workbook will be in a POIFS Stream
- * called "Workbook". However, some weird XLS generators use "WORKBOOK"
- */
- private static final String[] WORKBOOK_DIR_ENTRY_NAMES = {
- "Workbook", // as per BIFF8 spec
- "WORKBOOK", // Typically from third party programs
- "BOOK", // Typically odd Crystal Reports exports
- };
-
-
public static String getWorkbookDirEntryName(DirectoryNode directory) {
for (int i = 0; i < WORKBOOK_DIR_ENTRY_NAMES.length; i++) {
import org.apache.poi.xwpf.usermodel.XWPFRelation;
import org.apache.xmlbeans.XmlException;
+import static org.apache.poi.hssf.model.InternalWorkbook.WORKBOOK_DIR_ENTRY_NAMES;
+
/**
* Figures out the correct POITextExtractor for your supplied
* document, and returns it.
{
// Look for certain entries in the stream, to figure it
// out from
- if (poifsDir.hasEntry("Workbook") ||
- // some XLS files have different entry-names
- poifsDir.hasEntry("WORKBOOK") || poifsDir.hasEntry("BOOK")) {
- if (getPreferEventExtractor()) {
- return new EventBasedExcelExtractor(poifsDir);
+ for (String workbookName : WORKBOOK_DIR_ENTRY_NAMES) {
+ if (poifsDir.hasEntry(workbookName)) {
+ if (getPreferEventExtractor()) {
+ return new EventBasedExcelExtractor(poifsDir);
+ }
+ return new ExcelExtractor(poifsDir);
}
- return new ExcelExtractor(poifsDir);
}
if (poifsDir.hasEntry("WordDocument")) {
import org.apache.poi.xwpf.extractor.XWPFWordExtractor;
import org.apache.xmlbeans.XmlException;
import org.junit.BeforeClass;
+import org.junit.Ignore;
import org.junit.Test;
/**
}
}
}
+
+ /**
+ * #59074 - No supported documents found in the OLE2 stream on
+ * a valid Excel file
+ */
+ @Ignore
+ @Test
+ public void a() throws Exception {
+ POITextExtractor ext = ExtractorFactory.createExtractor(
+ POIDataSamples.getSpreadSheetInstance().getFile("59074.xls"));
+ assertNotNull(ext);
+
+ String text = ext.getText();
+ ext.close();
+
+System.err.println(text);
+ assertNotNull(text);
+ assertTrue(text.contains("test"));
+ }
}
EXCLUDED.add("43493.xls"); // HSSFWorkbook cannot open it as well
EXCLUDED.add("password.xls");
EXCLUDED.add("46904.xls");
+ EXCLUDED.add("59074.xls"); // Biff 5 / Excel 95
EXCLUDED.add("35897-type4.xls"); // unsupported crypto api header
EXCLUDED.add("xor-encryption-abc.xls"); // unsupported XOR-encryption
EXCLUDED.add("testEXCEL_2.xls"); // Biff 2 / Excel 2, pre-OLE2