<!-- Don't forget to update status.xml too! -->
<release version="3.0.3-beta1" date="2008-04-??">
+ <action dev="POI-DEVELOPERS" type="fix">43670, 44501 - Fix how HDGF deals with trailing data in the list of chunk headers</action>
<action dev="POI-DEVELOPERS" type="add">30311 - More work on Conditional Formatting</action>
<action dev="POI-DEVELOPERS" type="fix">refactored all junits' usage of HSSF.testdata.path to one place</action>
<action dev="POI-DEVELOPERS" type="fix">44739 - Small fixes for conditional formatting (regions with max row/col index)</action>
<!-- Don't forget to update changes.xml too! -->
<changes>
<release version="3.0.3-beta1" date="2008-04-??">
+ <action dev="POI-DEVELOPERS" type="fix">43670, 44501 - Fix how HDGF deals with trailing data in the list of chunk headers</action>
<action dev="POI-DEVELOPERS" type="add">30311 - More work on Conditional Formatting</action>
<action dev="POI-DEVELOPERS" type="fix">refactored all junits' usage of HSSF.testdata.path to one place</action>
<action dev="POI-DEVELOPERS" type="fix">44739 - Small fixes for conditional formatting (regions with max row/col index)</action>
ch.unknown3 = (short)LittleEndian.getUnsignedByte(data, offset + 18);
return ch;
- } else if(documentVersion == 5) {
- throw new RuntimeException("TODO");
+ } else if(documentVersion == 5 || documentVersion == 4) {
+ ChunkHeaderV4V5 ch = new ChunkHeaderV4V5();
+
+ ch.type = (int)LittleEndian.getShort(data, offset + 0);
+ ch.id = (int)LittleEndian.getShort(data, offset + 2);
+ ch.unknown2 = (short)LittleEndian.getUnsignedByte(data, offset + 4);
+ ch.unknown3 = (short)LittleEndian.getUnsignedByte(data, offset + 5);
+ ch.unknown1 = (short)LittleEndian.getShort(data, offset + 6);
+ ch.length = (int)LittleEndian.getUInt(data, offset + 8);
+
+ return ch;
+ } else {
+ throw new IllegalArgumentException("Visio files with versions below 4 are not supported, yours was " + documentVersion);
+ }
+ }
+
+ /**
+ * Returns the size of a chunk header for the given document version.
+ */
+ public static int getHeaderSize(int documentVersion) {
+ if(documentVersion > 6) {
+ return ChunkHeaderV11.getHeaderSize();
+ } else if(documentVersion == 6) {
+ return ChunkHeaderV6.getHeaderSize();
} else {
- throw new IllegalArgumentException("Visio files with versions below 5 are not supported, yours was " + documentVersion);
+ return ChunkHeaderV4V5.getHeaderSize();
}
}
--- /dev/null
+/* ====================================================================
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements. See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+==================================================================== */
+package org.apache.poi.hdgf.chunks;
+
+/**
+ * A chunk header from v4 or v5
+ */
+public class ChunkHeaderV4V5 extends ChunkHeader {
+ protected short unknown2;
+ protected short unknown3;
+
+ public short getUnknown2() {
+ return unknown2;
+ }
+ public short getUnknown3() {
+ return unknown3;
+ }
+
+ protected static int getHeaderSize() {
+ return 12;
+ }
+
+ public int getSizeInBytes() {
+ return getHeaderSize();
+ }
+
+ /**
+ * Does the chunk have a trailer?
+ */
+ public boolean hasTrailer() {
+ // V4 and V5 never has trailers
+ return false;
+ }
+
+ /**
+ * Does the chunk have a separator?
+ */
+ public boolean hasSeparator() {
+ // V4 and V5 never has separators
+ return false;
+ }
+}
return unknown3;
}
- public int getSizeInBytes() {
+ protected static int getHeaderSize() {
+ // Looks like it ought to be 19...
return 19;
}
+ public int getSizeInBytes() {
+ return getHeaderSize();
+ }
/**
* Does the chunk have a trailer?
import org.apache.poi.POITextExtractor;
import org.apache.poi.hdgf.HDGFDiagram;
+import org.apache.poi.hdgf.chunks.Chunk;
import org.apache.poi.hdgf.chunks.Chunk.Command;
import org.apache.poi.hdgf.streams.ChunkStream;
import org.apache.poi.hdgf.streams.PointerContainingStream;
if(stream instanceof ChunkStream) {
ChunkStream cs = (ChunkStream)stream;
for(int i=0; i<cs.getChunks().length; i++) {
- if(cs.getChunks()[i] != null &&
- cs.getChunks()[i].getName() != null &&
- cs.getChunks()[i].getName().equals("Text")) {
+ Chunk chunk = cs.getChunks()[i];
+ if(chunk != null &&
+ chunk.getName() != null &&
+ chunk.getName().equals("Text") &&
+ chunk.getCommands().length > 0) {
// First command
- Command cmd = cs.getChunks()[i].getCommands()[0];
+ Command cmd = chunk.getCommands()[0];
if(cmd != null && cmd.getValue() != null) {
text.add( cmd.getValue().toString() );
}
import org.apache.poi.hdgf.chunks.Chunk;
import org.apache.poi.hdgf.chunks.ChunkFactory;
+import org.apache.poi.hdgf.chunks.ChunkHeader;
import org.apache.poi.hdgf.pointers.Pointer;
public class ChunkStream extends Stream {
int pos = 0;
byte[] contents = getStore().getContents();
while(pos < contents.length) {
- Chunk chunk = chunkFactory.createChunk(contents, pos);
- chunksA.add(chunk);
-
- pos += chunk.getOnDiskSize();
+ // Ensure we have enough data to create a chunk from
+ int headerSize = ChunkHeader.getHeaderSize(chunkFactory.getVersion());
+ if(pos+headerSize <= contents.length) {
+ Chunk chunk = chunkFactory.createChunk(contents, pos);
+ chunksA.add(chunk);
+
+ pos += chunk.getOnDiskSize();
+ } else {
+ System.err.println("Needed " + headerSize + " bytes to create the next chunk header, but only found " + (contents.length-pos) + " bytes, ignoring rest of data");
+ pos = contents.length;
+ }
}
chunks = (Chunk[])chunksA.toArray(new Chunk[chunksA.size()]);
package org.apache.poi.hdgf.extractor;
import java.io.ByteArrayOutputStream;
+import java.io.File;
import java.io.FileInputStream;
import java.io.PrintStream;
import junit.framework.TestCase;
import org.apache.poi.hdgf.HDGFDiagram;
-import org.apache.poi.hdgf.chunks.Chunk;
-import org.apache.poi.hdgf.chunks.ChunkFactory;
-import org.apache.poi.hdgf.pointers.Pointer;
-import org.apache.poi.hdgf.pointers.PointerFactory;
-import org.apache.poi.hssf.record.formula.eval.StringOperationEval;
-import org.apache.poi.poifs.filesystem.DocumentEntry;
import org.apache.poi.poifs.filesystem.POIFSFileSystem;
public class TestVisioExtractor extends TestCase {
- private String filename;
+ private String dirname;
+ private String defFilename;
protected void setUp() throws Exception {
- String dirname = System.getProperty("HDGF.testdata.path");
- filename = dirname + "/Test_Visio-Some_Random_Text.vsd";
+ dirname = System.getProperty("HDGF.testdata.path");
+ defFilename = dirname + "/Test_Visio-Some_Random_Text.vsd";
}
/**
public void testCreation() throws Exception {
VisioTextExtractor extractor;
- extractor = new VisioTextExtractor(new FileInputStream(filename));
+ extractor = new VisioTextExtractor(new FileInputStream(defFilename));
assertNotNull(extractor);
assertNotNull(extractor.getAllText());
assertEquals(3, extractor.getAllText().length);
extractor = new VisioTextExtractor(
new POIFSFileSystem(
- new FileInputStream(filename)
+ new FileInputStream(defFilename)
)
);
assertNotNull(extractor);
extractor = new VisioTextExtractor(
new HDGFDiagram(
new POIFSFileSystem(
- new FileInputStream(filename)
+ new FileInputStream(defFilename)
)
)
);
public void testExtraction() throws Exception {
VisioTextExtractor extractor =
- new VisioTextExtractor(new FileInputStream(filename));
+ new VisioTextExtractor(new FileInputStream(defFilename));
// Check the array fetch
String[] text = extractor.getAllText();
assertEquals("Test View\nI am a test view\nSome random text, on a page\n", textS);
}
+ public void testProblemFiles() throws Exception {
+ File a = new File(dirname, "44594.vsd");
+ VisioTextExtractor.main(new String[] {a.toString()});
+
+ File b = new File(dirname, "44594-2.vsd");
+ VisioTextExtractor.main(new String[] {b.toString()});
+
+ File c = new File(dirname, "ShortChunk1.vsd");
+ VisioTextExtractor.main(new String[] {c.toString()});
+
+ File d = new File(dirname, "ShortChunk2.vsd");
+ VisioTextExtractor.main(new String[] {d.toString()});
+
+ File e = new File(dirname, "ShortChunk3.vsd");
+ VisioTextExtractor.main(new String[] {e.toString()});
+ }
+
public void testMain() throws Exception {
PrintStream oldOut = System.out;
ByteArrayOutputStream baos = new ByteArrayOutputStream();
PrintStream capture = new PrintStream(baos);
System.setOut(capture);
- VisioTextExtractor.main(new String[] {filename});
+ VisioTextExtractor.main(new String[] {defFilename});
// Put things back
System.setOut(oldOut);
--- /dev/null
+/* ====================================================================
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements. See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+==================================================================== */
+package org.apache.poi.hdgf.streams;
+
+import java.io.FileInputStream;
+
+import org.apache.poi.hdgf.HDGFDiagram;
+import org.apache.poi.hdgf.chunks.ChunkFactory;
+import org.apache.poi.hdgf.pointers.Pointer;
+import org.apache.poi.hdgf.pointers.PointerFactory;
+import org.apache.poi.poifs.filesystem.DocumentEntry;
+import org.apache.poi.poifs.filesystem.POIFSFileSystem;
+
+/**
+ * Tests for bugs with streams
+ */
+public class TestStreamBugs extends StreamTest {
+ private byte[] contents;
+ private ChunkFactory chunkFactory;
+ private PointerFactory ptrFactory;
+ private POIFSFileSystem filesystem;
+
+ protected void setUp() throws Exception {
+ String dirname = System.getProperty("HDGF.testdata.path");
+ String filename = dirname + "/44594.vsd";
+ ptrFactory = new PointerFactory(11);
+ chunkFactory = new ChunkFactory(11);
+
+ FileInputStream fin = new FileInputStream(filename);
+ filesystem = new POIFSFileSystem(fin);
+
+ DocumentEntry docProps =
+ (DocumentEntry)filesystem.getRoot().getEntry("VisioDocument");
+
+ // Grab the document stream
+ contents = new byte[docProps.getSize()];
+ filesystem.createDocumentInputStream("VisioDocument").read(contents);
+ }
+
+ public void testGetTrailer() throws Exception {
+ Pointer trailerPointer = ptrFactory.createPointer(contents, 0x24);
+ Stream.createStream(trailerPointer, contents, chunkFactory, ptrFactory);
+ }
+
+ public void TOIMPLEMENTtestGetCertainChunks() throws Exception {
+ int offsetA = 3708;
+ int offsetB = 3744;
+ }
+
+ public void testGetChildren() throws Exception {
+ Pointer trailerPointer = ptrFactory.createPointer(contents, 0x24);
+ TrailerStream trailer = (TrailerStream)
+ Stream.createStream(trailerPointer, contents, chunkFactory, ptrFactory);
+
+ // Get without recursing
+ Pointer[] ptrs = trailer.getChildPointers();
+ for(int i=0; i<ptrs.length; i++) {
+ Stream.createStream(ptrs[i], contents, chunkFactory, ptrFactory);
+ }
+
+ // Get with recursing into chunks
+ for(int i=0; i<ptrs.length; i++) {
+ Stream stream =
+ Stream.createStream(ptrs[i], contents, chunkFactory, ptrFactory);
+ if(stream instanceof ChunkStream) {
+ ChunkStream cStream = (ChunkStream)stream;
+ cStream.findChunks();
+ }
+ }
+
+ // Get with recursing into chunks and pointers
+ for(int i=0; i<ptrs.length; i++) {
+ Stream stream =
+ Stream.createStream(ptrs[i], contents, chunkFactory, ptrFactory);
+ if(stream instanceof PointerContainingStream) {
+ PointerContainingStream pStream =
+ (PointerContainingStream)stream;
+ pStream.findChildren(contents);
+ }
+ }
+
+ trailer.findChildren(contents);
+ }
+
+ public void testOpen() throws Exception {
+ HDGFDiagram dg = new HDGFDiagram(filesystem);
+ }
+}