private Record getCoreRecordForRefID(int refID) {
Integer coreRecordId = (Integer)
_sheetIdToCoreRecordsLookup.get(new Integer(refID));
- Record r = _mostRecentCoreRecords[coreRecordId.intValue()];
- return r;
+ if(coreRecordId != null) {
+ Record r = _mostRecentCoreRecords[coreRecordId.intValue()];
+ return r;
+ } else {
+ logger.log(POILogger.ERROR, "We tried to look up a reference to a core record, but there was no core ID for reference ID " + refID);
+ return null;
+ }
}
/**
} else {
// Match up the records and the SlideAtomSets
notesSets = notesSLWT.getSlideAtomsSets();
- notesRecords = new org.apache.poi.hslf.record.Notes[notesSets.length];
+ ArrayList notesRecordsL = new ArrayList();
for(int i=0; i<notesSets.length; i++) {
// Get the right core record
Record r = getCoreRecordForSAS(notesSets[i]);
// Ensure it really is a notes record
- if(r instanceof org.apache.poi.hslf.record.Notes) {
- notesRecords[i] = (org.apache.poi.hslf.record.Notes)r;
+ if(r != null && r instanceof org.apache.poi.hslf.record.Notes) {
+ notesRecordsL.add( (org.apache.poi.hslf.record.Notes)r );
+
+ // Record the match between slide id and these notes
+ SlidePersistAtom spa = notesSets[i].getSlidePersistAtom();
+ Integer slideId = new Integer(spa.getSlideIdentifier());
+ slideIdToNotes.put(slideId, new Integer(i));
} else {
logger.log(POILogger.ERROR, "A Notes SlideAtomSet at " + i + " said its record was at refID " + notesSets[i].getSlidePersistAtom().getRefID() + ", but that was actually a " + r);
}
-
- // Record the match between slide id and these notes
- SlidePersistAtom spa = notesSets[i].getSlidePersistAtom();
- Integer slideId = new Integer(spa.getSlideIdentifier());
- slideIdToNotes.put(slideId, new Integer(i));
}
+ notesRecords = new org.apache.poi.hslf.record.Notes[notesRecordsL.size()];
+ notesRecords = (org.apache.poi.hslf.record.Notes[])
+ notesRecordsL.toArray(notesRecords);
}
// Now, do the same thing for our slides
* @author Nick Burch (nick at torchbox dot com)
*/
public class TextExtractor extends TestCase {
- // Extractor primed on the 2 page basic test data
+ /** Extractor primed on the 2 page basic test data */
private PowerPointExtractor ppe;
- // Extractor primed on the 1 page but text-box'd test data
+ /** Extractor primed on the 1 page but text-box'd test data */
private PowerPointExtractor ppe2;
+ /** Where to go looking for our test files */
+ private String dirname;
public TextExtractor() throws Exception {
- String dirname = System.getProperty("HSLF.testdata.path");
+ dirname = System.getProperty("HSLF.testdata.path");
String filename = dirname + "/basic_test_ppt_file.ppt";
ppe = new PowerPointExtractor(filename);
String filename2 = dirname + "/with_textbox.ppt";
ensureTwoStringsTheSame(expectText, notesText);
}
+
+ /**
+ * Test that when presented with a PPT file missing the odd
+ * core record, we can still get the rest of the text out
+ * @throws Exception
+ */
+ public void testMissingCoreRecords() throws Exception {
+ String filename = dirname + "/missing_core_records.ppt";
+ ppe = new PowerPointExtractor(filename);
+
+ String text = ppe.getText(true, false);
+ String nText = ppe.getNotes();
+
+ assertNotNull(text);
+ assertNotNull(nText);
+
+ // Notes record were corrupt, so don't expect any
+ assertEquals(nText.length(), 0);
+
+ // Slide records were fine
+ assertTrue(text.startsWith("Using Disease Surveillance and Response"));
+ }
private void ensureTwoStringsTheSame(String exp, String act) throws Exception {
assertEquals(exp.length(),act.length());