* contents.
*/
public String[] getAllText() {
- ArrayList text = new ArrayList();
+ ArrayList<String> text = new ArrayList<String>();
for(int i=0; i<hdgf.getTopLevelStreams().length; i++) {
findText(hdgf.getTopLevelStreams()[i], text);
}
- return (String[])text.toArray( new String[text.size()] );
+ return text.toArray( new String[text.size()] );
}
- private void findText(Stream stream, ArrayList text) {
+ private void findText(Stream stream, ArrayList<String> text) {
if(stream instanceof PointerContainingStream) {
PointerContainingStream ps = (PointerContainingStream)stream;
for(int i=0; i<ps.getPointedToStreams().length; i++) {
chunk.getName() != null &&
chunk.getName().equals("Text") &&
chunk.getCommands().length > 0) {
+
// First command
Command cmd = chunk.getCommands()[0];
if(cmd != null && cmd.getValue() != null) {
- text.add( cmd.getValue().toString() );
+ // Capture the text, as long as it isn't
+ // simply an empty string
+ String str = cmd.getValue().toString();
+ if(str.equals("") || str.equals("\n")) {
+ // Ignore empty strings
+ } else {
+ text.add( str );
+ }
}
}
}
package org.apache.poi.hdgf.extractor;
import java.io.ByteArrayOutputStream;
-import java.io.File;
import java.io.PrintStream;
import junit.framework.TestCase;
private static POIDataSamples _dgTests = POIDataSamples.getDiagramInstance();
private String defFilename;
+ private int defTextChunks;
protected void setUp() {
defFilename = "Test_Visio-Some_Random_Text.vsd";
+ defTextChunks = 5;
}
/**
extractor = new VisioTextExtractor(_dgTests.openResourceAsStream(defFilename));
assertNotNull(extractor);
assertNotNull(extractor.getAllText());
- assertEquals(3, extractor.getAllText().length);
+ assertEquals(defTextChunks, extractor.getAllText().length);
extractor = new VisioTextExtractor(
new POIFSFileSystem(
);
assertNotNull(extractor);
assertNotNull(extractor.getAllText());
- assertEquals(3, extractor.getAllText().length);
+ assertEquals(defTextChunks, extractor.getAllText().length);
extractor = new VisioTextExtractor(
new HDGFDiagram(
);
assertNotNull(extractor);
assertNotNull(extractor.getAllText());
- assertEquals(3, extractor.getAllText().length);
+ assertEquals(defTextChunks, extractor.getAllText().length);
}
public void testExtraction() throws Exception {
// Check the array fetch
String[] text = extractor.getAllText();
assertNotNull(text);
- assertEquals(3, text.length);
+ assertEquals(defTextChunks, text.length);
- assertEquals("Test View\n", text[0]);
- assertEquals("I am a test view\n", text[1]);
- assertEquals("Some random text, on a page\n", text[2]);
+ assertEquals("text\n", text[0]);
+ assertEquals("View\n", text[1]);
+ assertEquals("Test View\n", text[2]);
+ assertEquals("I am a test view\n", text[3]);
+ assertEquals("Some random text, on a page\n", text[4]);
// And the all-in fetch
String textS = extractor.getText();
- assertEquals("Test View\nI am a test view\nSome random text, on a page\n", textS);
+ assertEquals("text\nView\nTest View\nI am a test view\nSome random text, on a page\n", textS);
}
public void testProblemFiles() throws Exception {
- String[] files = {"44594.vsd", "44594-2.vsd", "ShortChunk1.vsd", "ShortChunk2.vsd", "ShortChunk3.vsd"};
+ String[] files = {
+ "44594.vsd", "44594-2.vsd",
+ "ShortChunk1.vsd", "ShortChunk2.vsd", "ShortChunk3.vsd",
+ "NegativeChunkLength.vsd", "NegativeChunkLength2.vsd"
+ };
for(String file : files){
VisioTextExtractor ex = new VisioTextExtractor(_dgTests.openResourceAsStream(file));
ex.getText();
// Check
capture.flush();
String text = baos.toString();
- assertEquals("Test View\nI am a test view\nSome random text, on a page\n", text);
+ assertEquals(
+ "text\nView\n" +
+ "Test View\nI am a test view\n" +
+ "Some random text, on a page\n",
+ text);
}
}