From f957997c235454061481aa2b0644715a047ff00b Mon Sep 17 00:00:00 2001 From: Nick Burch Date: Thu, 28 Jun 2007 11:43:11 +0000 Subject: [PATCH] Tag as 3.0.1-RC3 git-svn-id: https://svn.apache.org/repos/asf/poi/tags/REL_3_0_1_RC3@551531 13f79535-47bb-0310-9956-ffa450edef68 --- legal/NOTICE | 13 +- src/documentation/content/xdocs/book.xml | 1 + src/documentation/content/xdocs/changes.xml | 5 +- src/documentation/content/xdocs/hdgf/book.xml | 34 ++++++ .../content/xdocs/hdgf/index.xml | 98 +++++++++++++++ src/documentation/content/xdocs/hslf/book.xml | 2 +- .../content/xdocs/hslf/index.xml | 4 +- .../content/xdocs/hssf/how-to.xml | 2 +- .../content/xdocs/hwpf/index.xml | 2 +- .../content/xdocs/hwpf/quick-guide.xml | 4 +- src/documentation/content/xdocs/index.xml | 13 +- src/documentation/content/xdocs/status.xml | 5 +- .../src/org/apache/poi/hdgf/chunks/Chunk.java | 33 ++++- .../apache/poi/hdgf/chunks/ChunkFactory.java | 9 +- .../poi/hdgf/chunks/ChunkHeaderV11.java | 4 + .../poi/hdgf/chunks/ChunkSeparator.java | 4 + .../apache/poi/hdgf/chunks/ChunkTrailer.java | 4 + .../org/apache/poi/hdgf/dev/VSDDumper.java | 8 ++ .../hdgf/extractor/VisioTextExtractor.java | 114 ++++++++++++++++++ .../apache/poi/hdgf/streams/ChunkStream.java | 5 + .../org/apache/poi/hdgf/streams/Stream.java | 2 +- .../poi/hdgf/streams/StringsStream.java | 7 +- .../hdgf/extractor/TestVisioExtractor.java | 107 ++++++++++++++++ .../poi/hdgf/streams/TestStreamComplex.java | 60 +++++++++ 24 files changed, 522 insertions(+), 18 deletions(-) create mode 100644 src/documentation/content/xdocs/hdgf/book.xml create mode 100755 src/documentation/content/xdocs/hdgf/index.xml create mode 100644 src/scratchpad/src/org/apache/poi/hdgf/extractor/VisioTextExtractor.java create mode 100644 src/scratchpad/testcases/org/apache/poi/hdgf/extractor/TestVisioExtractor.java diff --git a/legal/NOTICE b/legal/NOTICE index d417a360a7..190d974632 100644 --- a/legal/NOTICE +++ b/legal/NOTICE @@ -1,5 +1,16 @@ -Apache Jakarta POI +Apache POI Copyright 2001-2007 The Apache Software Foundation This product includes software developed by The Apache Software Foundation (http://www.apache.org/). + + +Unit testing support is provided by JUnit, under the +Common Public License Version 1.0: + http://www.opensource.org/licenses/cpl.php +See http://www.junit.org/ + +Small parts of the POI component HDGF are based on VSDump, +and are under the GNU General Public Licence version 3 (GPL v3): + http://gplv3.fsf.org/ +See http://www.gnome.ru/projects/vsdump_en.html diff --git a/src/documentation/content/xdocs/book.xml b/src/documentation/content/xdocs/book.xml index 4666d7765a..a0f10c0dbe 100644 --- a/src/documentation/content/xdocs/book.xml +++ b/src/documentation/content/xdocs/book.xml @@ -39,6 +39,7 @@ + diff --git a/src/documentation/content/xdocs/changes.xml b/src/documentation/content/xdocs/changes.xml index 3783e8428f..697395f8fd 100644 --- a/src/documentation/content/xdocs/changes.xml +++ b/src/documentation/content/xdocs/changes.xml @@ -35,7 +35,7 @@ - + Administrative updates to the Maven POMs, and the release artificat build process 23951 - [PATCH] Fix for HSSF setSheetOrder and tab names 42524 - [PATCH] Better HSLF support for problem shape groups @@ -44,6 +44,9 @@ Additional HSLF support for Title and Slide Master Sheets 42474 - [PATCH] Improved HSLF note to slide matching, and a NPE 42481 - [PATCH] Tweak some HSLF exceptions, to make it clearer what you're catching + 42667 - [PATCH] Fix for HSLF writing of files with tables + Improved way of detecting HSSF cells that contain dates, isADateFormat + Initial, read-only support for Visio documents, as HDGF diff --git a/src/documentation/content/xdocs/hdgf/book.xml b/src/documentation/content/xdocs/hdgf/book.xml new file mode 100644 index 0000000000..fb37a33a75 --- /dev/null +++ b/src/documentation/content/xdocs/hdgf/book.xml @@ -0,0 +1,34 @@ + + + + + + + + + + + + + + + diff --git a/src/documentation/content/xdocs/hdgf/index.xml b/src/documentation/content/xdocs/hdgf/index.xml new file mode 100755 index 0000000000..f14bb1e766 --- /dev/null +++ b/src/documentation/content/xdocs/hdgf/index.xml @@ -0,0 +1,98 @@ + + + + + +
+ POI-HDGF - Java API To Access Microsoft Visio Format Files + Overview + + + +
+ + +
+ Overview + +

HDGF is the POI Project's pure Java implementation of the Visio file format.

+

Currently, HDGF provides a low-level, read-only api for + accessing Visio documents. It also provides a + way + to extract the textual content from a file. +

+

At this time, there is no usermodel api or similar, + only low level access to the streams, chunks and chunk commands. + Users are advised to check the unit tests to see how everything + works. They are also well advised to read the documentation + supplied with + vsdump + to get a feel for how Visio files are structured.

+

To get a feel for the contents of a file, and to track down + where data of interest is stored, HDGF comes with + VSDDumper + to print out the contents of the file. Users should also make + use of + vsdump + to probe the structure of files.

+ + This code currently lives the + scratchpad area + of the POI SVN repository. + Ensure that you have the scratchpad jar or the scratchpad + build area in your + classpath before experimenting with this code. + + +
+ Steps required for write support +

Currently, HDGF is only able to read visio files, it is + not able to write them back out again. We believe the + following are the steps that would need to be taken to + implement it.

+
    +
  1. Re-write the decompression support in LZW4HDGF to be + less opaque, and also under the ASL.
  2. +
  3. Add compression support to the new LZw4HDGF.
  4. +
  5. Have HDGF just write back the raw bytes it read in, and + have a test to ensure the file is un-changed.
  6. +
  7. Have HDGF generate the bytes to write out from the + Stream stores, using the compressed data as appropriate, + without re-compressing. Plus test to ensure file is + un-changed.
  8. +
  9. Have HDGF generate the bytes to write out from the + Stream stores, re-compressing any streams that were + decompressed. Plus test to ensure file is un-changed.
  10. +
  11. Have HDGF re-generate the offsets in pointers for the + locations of the streams. Plus test to ensure file is + un-changed.
  12. +
  13. Have HDGF re-generate the bytes for all the chunks, from + the chunk commands. Tests to ensure the chunks are + serialized properly, and then that the file is un-changed
  14. +
  15. Alter the data of one command, but keep it the same + length, and check visio can open the file when written + out.
  16. +
  17. Alter the data of one command, to a new length, and + check that visio can open the file when written out.
  18. +
+
+
+ +
diff --git a/src/documentation/content/xdocs/hslf/book.xml b/src/documentation/content/xdocs/hslf/book.xml index 0eb4f8cb18..8ccf5c1bc4 100644 --- a/src/documentation/content/xdocs/hslf/book.xml +++ b/src/documentation/content/xdocs/hslf/book.xml @@ -20,7 +20,7 @@ diff --git a/src/documentation/content/xdocs/hslf/index.xml b/src/documentation/content/xdocs/hslf/index.xml index 779a279d16..16a3885d82 100755 --- a/src/documentation/content/xdocs/hslf/index.xml +++ b/src/documentation/content/xdocs/hslf/index.xml @@ -34,12 +34,12 @@ Overview

HSLF is the POI Project's pure Java implementation of the Powerpoint file format.

-

HSSF provides a way to read powerpoint presentations, and extract text from it. +

HSLF provides a way to read powerpoint presentations, and extract text from it. It also provides some (currently limited) edit capabilities.

This code currently lives the - scratchpad area + scratchpad area of the POI SVN repository. Ensure that you have the scratchpad jar or the scratchpad build area in your diff --git a/src/documentation/content/xdocs/hssf/how-to.xml b/src/documentation/content/xdocs/hssf/how-to.xml index cc578afec5..a4ac41209d 100644 --- a/src/documentation/content/xdocs/hssf/how-to.xml +++ b/src/documentation/content/xdocs/hssf/how-to.xml @@ -460,7 +460,7 @@ some of the rows or cells. It can be found at /src/scratchpad/examples/src/org/apache/poi/hssf/eventusermodel/examples/XLS2CSVmra.java, and may be called on the command line, or from within your own code. The latest version is always available from -subversion. +subversion.

This code is currently in the scratchpad section, so you will either diff --git a/src/documentation/content/xdocs/hwpf/index.xml b/src/documentation/content/xdocs/hwpf/index.xml index 1268facbee..1556869617 100644 --- a/src/documentation/content/xdocs/hwpf/index.xml +++ b/src/documentation/content/xdocs/hwpf/index.xml @@ -38,7 +38,7 @@ to pure Java.

HWPF is still in early development. It is in the + href="http://svn.apache.org/viewcvs.cgi/poi/trunk/src/scratchpad/"> scratchpad section of the SVN. You will need to ensure you either have a recent SVN checkout, or a recent SVN nightly build (including the scratchpad jar!)

diff --git a/src/documentation/content/xdocs/hwpf/quick-guide.xml b/src/documentation/content/xdocs/hwpf/quick-guide.xml index 197922f07e..bf046258e7 100644 --- a/src/documentation/content/xdocs/hwpf/quick-guide.xml +++ b/src/documentation/content/xdocs/hwpf/quick-guide.xml @@ -30,7 +30,7 @@

HWPF is still in early development. It is in the + href="http://svn.apache.org/viewcvs.cgi/poi/trunk/src/scratchpad/"> scratchpad section of the SVN. You will need to ensure you either have a recent SVN checkout, or a recent SVN nightly build (including the scratchpad jar!)

@@ -68,7 +68,7 @@ can then get text and other properties.
Further Examples

For now, the best source of additional examples is in the unit tests. + href="http://svn.apache.org/viewvc/poi/trunk/src/scratchpad/testcases/org/apache/poi/hwpf/"> Browse the HWPF unit tests.

diff --git a/src/documentation/content/xdocs/index.xml b/src/documentation/content/xdocs/index.xml index da2d5ed3a8..5968f8084a 100644 --- a/src/documentation/content/xdocs/index.xml +++ b/src/documentation/content/xdocs/index.xml @@ -38,6 +38,10 @@ download the source and binaries from your local mirror.

+

We would also like to confirm that verion 3.0 of Apache POI does + not contain any viruses. Users of broken virus checkers + which do detect a 94 byte file, sci_cec.db, as containing one are + advised to contact their vendor for a fix.

Purpose @@ -107,12 +111,19 @@ development. Jump in!

HSLF for PowerPoint Documents -

HWSL is our port of the Microsoft PowerPoint 97(-2003) file format to pure +

HSLF is our port of the Microsoft PowerPoint 97(-2003) file format to pure Java. It supports read and write capabilities of some, but not yet all of the core records. Please see the HSLF project page for more information.

+
HDGF for Visio Documents +

HDGF is our port of the Microsoft Viso 97(-2003) file format to pure + Java. It currently only supports reading at a very low level, and + simple text extraction. Please see the HDGF project page for more + information.

+
HPSF for Document Properties

HPSF is our port of the OLE 2 property set format to pure Java. Property sets are mostly use to store a document's properties diff --git a/src/documentation/content/xdocs/status.xml b/src/documentation/content/xdocs/status.xml index ef5c5aaeb4..b236f22880 100644 --- a/src/documentation/content/xdocs/status.xml +++ b/src/documentation/content/xdocs/status.xml @@ -32,7 +32,7 @@ - + Administrative updates to the Maven POMs, and the release artificat build process 23951 - [PATCH] Fix for HSSF setSheetOrder and tab names 42524 - [PATCH] Better HSLF support for problem shape groups @@ -41,6 +41,9 @@ Additional HSLF support for Title and Slide Master Sheets 42474 - [PATCH] Improved HSLF note to slide matching, and a NPE 42481 - [PATCH] Tweak some HSLF exceptions, to make it clearer what you're catching + 42667 - [PATCH] Fix for HSLF writing of files with tables + Improved way of detecting HSSF cells that contain dates, isADateFormat + Initial, read-only support for Visio documents, as HDGF diff --git a/src/scratchpad/src/org/apache/poi/hdgf/chunks/Chunk.java b/src/scratchpad/src/org/apache/poi/hdgf/chunks/Chunk.java index 54c37b3e83..5928927c4d 100644 --- a/src/scratchpad/src/org/apache/poi/hdgf/chunks/Chunk.java +++ b/src/scratchpad/src/org/apache/poi/hdgf/chunks/Chunk.java @@ -20,6 +20,9 @@ import java.util.ArrayList; import org.apache.poi.hdgf.chunks.ChunkFactory.CommandDefinition; import org.apache.poi.util.LittleEndian; +import org.apache.poi.util.POILogFactory; +import org.apache.poi.util.POILogger; +import org.apache.poi.util.StringUtil; /** * Base of all chunks, which hold data, flags etc @@ -44,6 +47,9 @@ public class Chunk { /** The name of the chunk, as found from the commandDefinitions */ private String name; + /** For logging warnings about the structure of the file */ + private POILogger logger = POILogFactory.getLogger(Chunk.class); + public Chunk(ChunkHeader header, ChunkTrailer trailer, ChunkSeparator separator, byte[] contents) { this.header = header; this.trailer = trailer; @@ -148,7 +154,9 @@ public class Chunk { // Check we seem to have enough data if(offset >= contents.length) { - System.err.println("Command offset " + offset + " past end of data at " + contents.length); + logger.log(POILogger.WARN, + "Command offset " + offset + " past end of data at " + contents.length + ); continue; } @@ -167,9 +175,27 @@ public class Chunk { LittleEndian.getDouble(contents, offset) ); break; + case 12: + // A Little Endian String + // Starts 8 bytes into the data segment + // Ends at end of data, or 00 00 + int startsAt = 8; + int endsAt = startsAt; + for(int j=startsAt; j data.length) { - System.err.println("Header called for " + header.getLength() +" bytes, but that would take us passed the end of the data!"); + logger.log(POILogger.WARN, + "Header called for " + header.getLength() +" bytes, but that would take us passed the end of the data!"); endOfDataPos = data.length; header.length = data.length - offset - header.getSizeInBytes(); diff --git a/src/scratchpad/src/org/apache/poi/hdgf/chunks/ChunkHeaderV11.java b/src/scratchpad/src/org/apache/poi/hdgf/chunks/ChunkHeaderV11.java index 51eca5649c..c77a249204 100644 --- a/src/scratchpad/src/org/apache/poi/hdgf/chunks/ChunkHeaderV11.java +++ b/src/scratchpad/src/org/apache/poi/hdgf/chunks/ChunkHeaderV11.java @@ -24,6 +24,10 @@ public class ChunkHeaderV11 extends ChunkHeaderV6 { * Does the chunk have a separator? */ public boolean hasSeparator() { + // For some reason, there are two types that don't have a + // separator despite the flags that indicate they do + if(type == 0x1f || type == 0xc9) { return false; } + // If there's a trailer, there's a separator if(hasTrailer()) { return true; } diff --git a/src/scratchpad/src/org/apache/poi/hdgf/chunks/ChunkSeparator.java b/src/scratchpad/src/org/apache/poi/hdgf/chunks/ChunkSeparator.java index 7098f17ea6..5ce4097446 100644 --- a/src/scratchpad/src/org/apache/poi/hdgf/chunks/ChunkSeparator.java +++ b/src/scratchpad/src/org/apache/poi/hdgf/chunks/ChunkSeparator.java @@ -27,4 +27,8 @@ public class ChunkSeparator { separatorData = new byte[4]; System.arraycopy(data, offset, separatorData, 0, 4); } + + public String toString() { + return ""; + } } diff --git a/src/scratchpad/src/org/apache/poi/hdgf/chunks/ChunkTrailer.java b/src/scratchpad/src/org/apache/poi/hdgf/chunks/ChunkTrailer.java index a610b49b14..a590732466 100644 --- a/src/scratchpad/src/org/apache/poi/hdgf/chunks/ChunkTrailer.java +++ b/src/scratchpad/src/org/apache/poi/hdgf/chunks/ChunkTrailer.java @@ -26,4 +26,8 @@ public class ChunkTrailer { trailerData = new byte[8]; System.arraycopy(data, offset, trailerData, 0, 8); } + + public String toString() { + return ""; + } } diff --git a/src/scratchpad/src/org/apache/poi/hdgf/dev/VSDDumper.java b/src/scratchpad/src/org/apache/poi/hdgf/dev/VSDDumper.java index 3c20e4f3ff..614b9259a0 100644 --- a/src/scratchpad/src/org/apache/poi/hdgf/dev/VSDDumper.java +++ b/src/scratchpad/src/org/apache/poi/hdgf/dev/VSDDumper.java @@ -70,6 +70,11 @@ public class VSDDumper { " - " + Integer.toHexString(ptr.getFormat())); System.out.println(ind + " Length is\t" + ptr.getLength() + " - " + Integer.toHexString(ptr.getLength())); + if(ptr.destinationCompressed()) { + int decompLen = stream._getContentsLength(); + System.out.println(ind + " DC.Length is\t" + decompLen + + " - " + Integer.toHexString(decompLen)); + } System.out.println(ind + " Compressed is\t" + ptr.destinationCompressed()); System.out.println(ind + " Stream is\t" + stream.getClass().getName()); @@ -100,6 +105,9 @@ public class VSDDumper { for(int i=0; i"); + System.exit(1); + } + + VisioTextExtractor extractor = + new VisioTextExtractor(new FileInputStream(args[0])); + + // Print not PrintLn as already has \n added to it + System.out.print(extractor.getText()); + } +} diff --git a/src/scratchpad/src/org/apache/poi/hdgf/streams/ChunkStream.java b/src/scratchpad/src/org/apache/poi/hdgf/streams/ChunkStream.java index 75b6beefd1..a59fe43ff9 100644 --- a/src/scratchpad/src/org/apache/poi/hdgf/streams/ChunkStream.java +++ b/src/scratchpad/src/org/apache/poi/hdgf/streams/ChunkStream.java @@ -43,6 +43,11 @@ public class ChunkStream extends Stream { public void findChunks() { ArrayList chunksA = new ArrayList(); + if(getPointer().getOffset() == 0x64b3) { + int i = 0; + i++; + } + int pos = 0; byte[] contents = getStore().getContents(); while(pos < contents.length) { diff --git a/src/scratchpad/src/org/apache/poi/hdgf/streams/Stream.java b/src/scratchpad/src/org/apache/poi/hdgf/streams/Stream.java index 35aa7e5291..163fa83d9a 100644 --- a/src/scratchpad/src/org/apache/poi/hdgf/streams/Stream.java +++ b/src/scratchpad/src/org/apache/poi/hdgf/streams/Stream.java @@ -83,7 +83,7 @@ public abstract class Stream { return new ChunkStream(pointer, store, chunkFactory); } else if(pointer.destinationHasStrings()) { - return new StringsStream(pointer, store); + return new StringsStream(pointer, store, chunkFactory); } // Give up and return a generic one diff --git a/src/scratchpad/src/org/apache/poi/hdgf/streams/StringsStream.java b/src/scratchpad/src/org/apache/poi/hdgf/streams/StringsStream.java index 2688b156e9..b23ff92149 100644 --- a/src/scratchpad/src/org/apache/poi/hdgf/streams/StringsStream.java +++ b/src/scratchpad/src/org/apache/poi/hdgf/streams/StringsStream.java @@ -16,13 +16,16 @@ limitations under the License. ==================================================================== */ package org.apache.poi.hdgf.streams; +import org.apache.poi.hdgf.chunks.ChunkFactory; import org.apache.poi.hdgf.pointers.Pointer; /** - * A Stream which holds Strings + * A Stream which holds Strings. This is just another kind + * of ChunkStream, it seems */ public class StringsStream extends Stream { - protected StringsStream(Pointer pointer, StreamStore store) { + protected StringsStream(Pointer pointer, StreamStore store, ChunkFactory chunkFactory) { super(pointer, store); +// super(pointer, store, chunkFactory); } } diff --git a/src/scratchpad/testcases/org/apache/poi/hdgf/extractor/TestVisioExtractor.java b/src/scratchpad/testcases/org/apache/poi/hdgf/extractor/TestVisioExtractor.java new file mode 100644 index 0000000000..a6541e9b0d --- /dev/null +++ b/src/scratchpad/testcases/org/apache/poi/hdgf/extractor/TestVisioExtractor.java @@ -0,0 +1,107 @@ +/* ==================================================================== + Licensed to the Apache Software Foundation (ASF) under one or more + contributor license agreements. See the NOTICE file distributed with + this work for additional information regarding copyright ownership. + The ASF licenses this file to You under the Apache License, Version 2.0 + (the "License"); you may not use this file except in compliance with + the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +==================================================================== */ +package org.apache.poi.hdgf.extractor; + +import java.io.ByteArrayOutputStream; +import java.io.FileInputStream; +import java.io.PrintStream; + +import junit.framework.TestCase; + +import org.apache.poi.hdgf.HDGFDiagram; +import org.apache.poi.hdgf.chunks.Chunk; +import org.apache.poi.hdgf.chunks.ChunkFactory; +import org.apache.poi.hdgf.pointers.Pointer; +import org.apache.poi.hdgf.pointers.PointerFactory; +import org.apache.poi.hssf.record.formula.eval.StringOperationEval; +import org.apache.poi.poifs.filesystem.DocumentEntry; +import org.apache.poi.poifs.filesystem.POIFSFileSystem; + +public class TestVisioExtractor extends TestCase { + private String filename; + protected void setUp() throws Exception { + String dirname = System.getProperty("HDGF.testdata.path"); + filename = dirname + "/Test_Visio-Some_Random_Text.vsd"; + } + + /** + * Test the 3 different ways of creating one + */ + public void testCreation() throws Exception { + VisioTextExtractor extractor; + + extractor = new VisioTextExtractor(new FileInputStream(filename)); + assertNotNull(extractor); + assertNotNull(extractor.getAllText()); + assertEquals(3, extractor.getAllText().length); + + extractor = new VisioTextExtractor( + new POIFSFileSystem( + new FileInputStream(filename) + ) + ); + assertNotNull(extractor); + assertNotNull(extractor.getAllText()); + assertEquals(3, extractor.getAllText().length); + + extractor = new VisioTextExtractor( + new HDGFDiagram( + new POIFSFileSystem( + new FileInputStream(filename) + ) + ) + ); + assertNotNull(extractor); + assertNotNull(extractor.getAllText()); + assertEquals(3, extractor.getAllText().length); + } + + public void testExtraction() throws Exception { + VisioTextExtractor extractor = + new VisioTextExtractor(new FileInputStream(filename)); + + // Check the array fetch + String[] text = extractor.getAllText(); + assertNotNull(text); + assertEquals(3, text.length); + + assertEquals("Test View\n", text[0]); + assertEquals("I am a test view\n", text[1]); + assertEquals("Some random text, on a page\n", text[2]); + + // And the all-in fetch + String textS = extractor.getText(); + assertEquals("Test View\nI am a test view\nSome random text, on a page\n", textS); + } + + public void testMain() throws Exception { + PrintStream oldOut = System.out; + ByteArrayOutputStream baos = new ByteArrayOutputStream(); + PrintStream capture = new PrintStream(baos); + System.setOut(capture); + + VisioTextExtractor.main(new String[] {filename}); + + // Put things back + System.setOut(oldOut); + + // Check + capture.flush(); + String text = baos.toString(); + assertEquals("Test View\nI am a test view\nSome random text, on a page\n", text); + } +} diff --git a/src/scratchpad/testcases/org/apache/poi/hdgf/streams/TestStreamComplex.java b/src/scratchpad/testcases/org/apache/poi/hdgf/streams/TestStreamComplex.java index c2d03f0c89..5ea21d1a1c 100644 --- a/src/scratchpad/testcases/org/apache/poi/hdgf/streams/TestStreamComplex.java +++ b/src/scratchpad/testcases/org/apache/poi/hdgf/streams/TestStreamComplex.java @@ -18,6 +18,7 @@ package org.apache.poi.hdgf.streams; import java.io.FileInputStream; +import org.apache.poi.hdgf.chunks.Chunk; import org.apache.poi.hdgf.chunks.ChunkFactory; import org.apache.poi.hdgf.pointers.Pointer; import org.apache.poi.hdgf.pointers.PointerFactory; @@ -202,4 +203,63 @@ public class TestStreamComplex extends StreamTest { assertTrue(s8451.getPointedToStreams()[0] instanceof StringsStream); assertTrue(s8451.getPointedToStreams()[1] instanceof StringsStream); } + + public void testChunkWithText() throws Exception { + // Parent ChunkStream is at 0x7194 + // This is one of the last children of the trailer + Pointer trailerPtr = ptrFactory.createPointer(contents, trailerPointerAt); + TrailerStream ts = (TrailerStream) + Stream.createStream(trailerPtr, contents, chunkFactory, ptrFactory); + + ts.findChildren(contents); + + assertNotNull(ts.getChildPointers()); + assertNotNull(ts.getPointedToStreams()); + assertEquals(20, ts.getChildPointers().length); + assertEquals(20, ts.getPointedToStreams().length); + + assertEquals(0x7194, ts.getChildPointers()[13].getOffset()); + assertEquals(0x7194, ts.getPointedToStreams()[13].getPointer().getOffset()); + + PointerContainingStream ps7194 = (PointerContainingStream) + ts.getPointedToStreams()[13]; + + // First child is at 0x64b3 + assertEquals(0x64b3, ps7194.getChildPointers()[0].getOffset()); + assertEquals(0x64b3, ps7194.getPointedToStreams()[0].getPointer().getOffset()); + + ChunkStream cs = (ChunkStream)ps7194.getPointedToStreams()[0]; + + // Should be 26bc bytes un-compressed + assertEquals(0x26bc, cs.getStore().getContents().length); + // And should have lots of children + assertEquals(131, cs.getChunks().length); + + // One of which is Text + boolean hasText = false; + for(int i=0; i