aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorNick Burch <nick@apache.org>2010-09-20 14:26:49 +0000
committerNick Burch <nick@apache.org>2010-09-20 14:26:49 +0000
commit8b2f2bcff633d330eef903ab9babe7129ed1d96e (patch)
tree3ffb76158a2771a785ef6a2490ace86e70d0a1f0
parentd71e92ace2cd7fe60d6368caa95e8d21cd5b6e5e (diff)
downloadpoi-8b2f2bcff633d330eef903ab9babe7129ed1d96e.tar.gz
poi-8b2f2bcff633d330eef903ab9babe7129ed1d96e.zip
Work inspired by bug #48018 - get HWPF lists more consistent in read vs write, and preserve order as apparently that matters. Includes a fair number of list related unit tests, but not for everything
git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@998943 13f79535-47bb-0310-9956-ffa450edef68
-rw-r--r--src/documentation/content/xdocs/status.xml1
-rw-r--r--src/scratchpad/src/org/apache/poi/hwpf/model/ListLevel.java6
-rw-r--r--src/scratchpad/src/org/apache/poi/hwpf/model/ListTables.java122
-rw-r--r--src/scratchpad/src/org/apache/poi/hwpf/usermodel/Paragraph.java14
-rw-r--r--src/scratchpad/testcases/org/apache/poi/hwpf/usermodel/TestLists.java210
-rw-r--r--test-data/document/Lists.docbin0 -> 27648 bytes
6 files changed, 321 insertions, 32 deletions
diff --git a/src/documentation/content/xdocs/status.xml b/src/documentation/content/xdocs/status.xml
index f1b71436e1..d3eaf3f08e 100644
--- a/src/documentation/content/xdocs/status.xml
+++ b/src/documentation/content/xdocs/status.xml
@@ -34,6 +34,7 @@
<changes>
<release version="3.7-beta3" date="2010-??-??">
+ <action dev="poi-developers" type="fix">48018 - Improve HWPF handling of lists in documents read and then saved, by preserving order better</action>
<action dev="poi-developers" type="fix">49820 - Fix HWPF paragraph levels, so that outline levels can be properly fetched</action>
<action dev="poi-developers" type="fix">47271 - Avoid infinite loops on broken HWPF documents with a corrupt CHP style with a parent of itself</action>
<action dev="poi-developers" type="fix">49936 - Handle HWPF documents with problematic HeaderStories better</action>
diff --git a/src/scratchpad/src/org/apache/poi/hwpf/model/ListLevel.java b/src/scratchpad/src/org/apache/poi/hwpf/model/ListLevel.java
index de8e7afccd..d6392af776 100644
--- a/src/scratchpad/src/org/apache/poi/hwpf/model/ListLevel.java
+++ b/src/scratchpad/src/org/apache/poi/hwpf/model/ListLevel.java
@@ -91,7 +91,7 @@ public final class ListLevel
System.arraycopy(buf, offset, _rgbxchNums, 0, RGBXCH_NUMS_SIZE);
offset += RGBXCH_NUMS_SIZE;
- _ixchFollow = buf[offset++];
+ _ixchFollow = buf[offset++];
_dxaSpace = LittleEndian.getInt(buf, offset);
offset += LittleEndian.INT_SIZE;
_dxaIndent = LittleEndian.getInt(buf, offset);
@@ -216,10 +216,10 @@ public final class ListLevel
LittleEndian.putShort(buf, offset, _reserved);
offset += LittleEndian.SHORT_SIZE;
- System.arraycopy(_grpprlChpx, 0, buf, offset, _cbGrpprlChpx);
- offset += _cbGrpprlChpx;
System.arraycopy(_grpprlPapx, 0, buf, offset, _cbGrpprlPapx);
offset += _cbGrpprlPapx;
+ System.arraycopy(_grpprlChpx, 0, buf, offset, _cbGrpprlChpx);
+ offset += _cbGrpprlChpx;
if (_numberText == null) {
// TODO - write junit to test this flow
diff --git a/src/scratchpad/src/org/apache/poi/hwpf/model/ListTables.java b/src/scratchpad/src/org/apache/poi/hwpf/model/ListTables.java
index fd72d42270..9124c1fb20 100644
--- a/src/scratchpad/src/org/apache/poi/hwpf/model/ListTables.java
+++ b/src/scratchpad/src/org/apache/poi/hwpf/model/ListTables.java
@@ -17,19 +17,22 @@
package org.apache.poi.hwpf.model;
-import org.apache.poi.util.LittleEndian;
-import org.apache.poi.util.POILogFactory;
-import org.apache.poi.util.POILogger;
-
-import org.apache.poi.hwpf.model.io.*;
-
-import java.util.HashMap;
+import java.io.ByteArrayOutputStream;
+import java.io.IOException;
import java.util.ArrayList;
+import java.util.Collection;
+import java.util.Collections;
+import java.util.HashMap;
import java.util.Iterator;
+import java.util.List;
+import java.util.Map;
import java.util.NoSuchElementException;
+import java.util.Set;
-import java.io.ByteArrayOutputStream;
-import java.io.IOException;
+import org.apache.poi.hwpf.model.io.HWPFOutputStream;
+import org.apache.poi.util.LittleEndian;
+import org.apache.poi.util.POILogFactory;
+import org.apache.poi.util.POILogger;
/**
* @author Ryan Ackley
@@ -40,8 +43,8 @@ public final class ListTables
private static final int LIST_FORMAT_OVERRIDE_SIZE = 16;
private static POILogger log = POILogFactory.getLogger(ListTables.class);
- HashMap _listMap = new HashMap();
- ArrayList _overrideList = new ArrayList();
+ ListMap _listMap = new ListMap();
+ ArrayList<ListFormatOverride> _overrideList = new ArrayList<ListFormatOverride>();
public ListTables()
{
@@ -109,22 +112,17 @@ public final class ListTables
public void writeListDataTo(HWPFOutputStream tableStream)
throws IOException
{
-
- Integer[] intList = (Integer[])_listMap.keySet().toArray(new Integer[0]);
+ int listSize = _listMap.size();
// use this stream as a buffer for the levels since their size varies.
ByteArrayOutputStream levelBuf = new ByteArrayOutputStream();
- // use a byte array for the lists because we know their size.
- byte[] listBuf = new byte[intList.length * LIST_DATA_SIZE];
-
byte[] shortHolder = new byte[2];
- LittleEndian.putShort(shortHolder, (short)intList.length);
+ LittleEndian.putShort(shortHolder, (short)listSize);
tableStream.write(shortHolder);
- for (int x = 0; x < intList.length; x++)
- {
- ListData lst = (ListData)_listMap.get(intList[x]);
+ for(Integer x : _listMap.sortedKeys()) {
+ ListData lst = _listMap.get(x);
tableStream.write(lst.toByteArray());
ListLevel[] lvls = lst.getLevels();
for (int y = 0; y < lvls.length; y++)
@@ -150,7 +148,7 @@ public final class ListTables
for (int x = 0; x < size; x++)
{
- ListFormatOverride lfo = (ListFormatOverride)_overrideList.get(x);
+ ListFormatOverride lfo = _overrideList.get(x);
tableStream.write(lfo.toByteArray());
ListFormatOverrideLevel[] lfolvls = lfo.getLevelOverrides();
for (int y = 0; y < lfolvls.length; y++)
@@ -164,7 +162,7 @@ public final class ListTables
public ListFormatOverride getOverride(int lfoIndex)
{
- return (ListFormatOverride)_overrideList.get(lfoIndex - 1);
+ return _overrideList.get(lfoIndex - 1);
}
public int getOverrideIndexFromListID(int lstid)
@@ -173,7 +171,7 @@ public final class ListTables
int size = _overrideList.size();
for (int x = 0; x < size; x++)
{
- ListFormatOverride next = (ListFormatOverride)_overrideList.get(x);
+ ListFormatOverride next = _overrideList.get(x);
if (next.getLsid() == lstid)
{
// 1-based index I think
@@ -190,7 +188,7 @@ public final class ListTables
public ListLevel getLevel(int listID, int level)
{
- ListData lst = (ListData)_listMap.get(Integer.valueOf(listID));
+ ListData lst = _listMap.get(Integer.valueOf(listID));
if(level < lst.numLevels()) {
ListLevel lvl = lst.getLevels()[level];
return lvl;
@@ -201,7 +199,7 @@ public final class ListTables
public ListData getListData(int listID)
{
- return (ListData) _listMap.get(Integer.valueOf(listID));
+ return _listMap.get(Integer.valueOf(listID));
}
public boolean equals(Object obj)
@@ -215,12 +213,12 @@ public final class ListTables
if (_listMap.size() == tables._listMap.size())
{
- Iterator it = _listMap.keySet().iterator();
+ Iterator<Integer> it = _listMap.keySet().iterator();
while (it.hasNext())
{
- Object key = it.next();
- ListData lst1 = (ListData)_listMap.get(key);
- ListData lst2 = (ListData)tables._listMap.get(key);
+ Integer key = it.next();
+ ListData lst1 = _listMap.get(key);
+ ListData lst2 = tables._listMap.get(key);
if (!lst1.equals(lst2))
{
return false;
@@ -241,4 +239,70 @@ public final class ListTables
}
return false;
}
+
+ private static class ListMap implements Map<Integer, ListData> {
+ private ArrayList<Integer> keyList = new ArrayList<Integer>();
+ private HashMap<Integer,ListData> parent = new HashMap<Integer,ListData>();
+ private ListMap() {}
+
+ public void clear() {
+ keyList.clear();
+ parent.clear();
+ }
+
+ public boolean containsKey(Object key) {
+ return parent.containsKey(key);
+ }
+
+ public boolean containsValue(Object value) {
+ return parent.containsValue(value);
+ }
+
+ public ListData get(Object key) {
+ return parent.get(key);
+ }
+
+ public boolean isEmpty() {
+ return parent.isEmpty();
+ }
+
+ public ListData put(Integer key, ListData value) {
+ keyList.add(key);
+ return parent.put(key, value);
+ }
+
+ public void putAll(Map<? extends Integer, ? extends ListData> map) {
+ for(Entry<? extends Integer, ? extends ListData> entry : map.entrySet()) {
+ put(entry.getKey(), entry.getValue());
+ }
+ }
+
+ public ListData remove(Object key) {
+ keyList.remove(key);
+ return parent.remove(key);
+ }
+
+ public int size() {
+ return parent.size();
+ }
+
+ public Set<Entry<Integer, ListData>> entrySet() {
+ throw new IllegalStateException("Use sortedKeys() + get() instead");
+ }
+
+ public List<Integer> sortedKeys() {
+ return Collections.unmodifiableList(keyList);
+ }
+ public Set<Integer> keySet() {
+ throw new IllegalStateException("Use sortedKeys() instead");
+ }
+
+ public Collection<ListData> values() {
+ ArrayList<ListData> values = new ArrayList<ListData>();
+ for(Integer key : keyList) {
+ values.add(parent.get(key));
+ }
+ return values;
+ }
+ }
}
diff --git a/src/scratchpad/src/org/apache/poi/hwpf/usermodel/Paragraph.java b/src/scratchpad/src/org/apache/poi/hwpf/usermodel/Paragraph.java
index 87fe4d7f1d..f51576df9f 100644
--- a/src/scratchpad/src/org/apache/poi/hwpf/usermodel/Paragraph.java
+++ b/src/scratchpad/src/org/apache/poi/hwpf/usermodel/Paragraph.java
@@ -434,16 +434,30 @@ public class Paragraph extends Range implements Cloneable {
_papx.updateSprm(SPRM_DCS, dcs.toShort());
}
+ /**
+ * Returns the ilfo, an index to the document's hpllfo, which
+ * describes the automatic number formatting of the paragraph.
+ * A value of zero means it isn't numbered.
+ */
public int getIlfo()
{
return _props.getIlfo();
}
+ /**
+ * Returns the multi-level indent for the paragraph. Will be
+ * zero for non-list paragraphs, and the first level of any
+ * list. Subsequent levels in hold values 1-8.
+ */
public int getIlvl()
{
return _props.getIlvl();
}
+ /**
+ * Returns the heading level (1-8), or 9 if the paragraph
+ * isn't in a heading style.
+ */
public int getLvl()
{
return _props.getLvl();
diff --git a/src/scratchpad/testcases/org/apache/poi/hwpf/usermodel/TestLists.java b/src/scratchpad/testcases/org/apache/poi/hwpf/usermodel/TestLists.java
new file mode 100644
index 0000000000..eafb18fb95
--- /dev/null
+++ b/src/scratchpad/testcases/org/apache/poi/hwpf/usermodel/TestLists.java
@@ -0,0 +1,210 @@
+/* ====================================================================
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements. See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+==================================================================== */
+
+package org.apache.poi.hwpf.usermodel;
+
+import java.io.IOException;
+
+import junit.framework.TestCase;
+
+import org.apache.poi.hwpf.HWPFDocument;
+import org.apache.poi.hwpf.HWPFTestDataSamples;
+
+/**
+ * Tests for our handling of lists
+ */
+public final class TestLists extends TestCase {
+ public void testBasics() {
+ HWPFDocument doc = HWPFTestDataSamples.openSampleFile("Lists.doc");
+ Range r = doc.getRange();
+
+ assertEquals(40, r.numParagraphs());
+ assertEquals("Heading Level 1\r", r.getParagraph(0).text());
+ assertEquals("This document has different lists in it for testing\r", r.getParagraph(1).text());
+ assertEquals("The end!\r", r.getParagraph(38).text());
+ assertEquals("\r", r.getParagraph(39).text());
+
+ assertEquals(0, r.getParagraph(0).getLvl());
+ assertEquals(9, r.getParagraph(1).getLvl());
+ assertEquals(9, r.getParagraph(38).getLvl());
+ assertEquals(9, r.getParagraph(39).getLvl());
+ }
+
+ public void testUnorderedLists() {
+ HWPFDocument doc = HWPFTestDataSamples.openSampleFile("Lists.doc");
+ Range r = doc.getRange();
+ assertEquals(40, r.numParagraphs());
+
+ // Normal bullet points
+ assertEquals("This document has different lists in it for testing\r", r.getParagraph(1).text());
+ assertEquals("Unordered list 1\r", r.getParagraph(2).text());
+ assertEquals("UL 2\r", r.getParagraph(3).text());
+ assertEquals("UL 3\r", r.getParagraph(4).text());
+ assertEquals("Next up is an ordered list:\r", r.getParagraph(5).text());
+
+ assertEquals(9, r.getParagraph(1).getLvl());
+ assertEquals(9, r.getParagraph(2).getLvl());
+ assertEquals(9, r.getParagraph(3).getLvl());
+ assertEquals(9, r.getParagraph(4).getLvl());
+ assertEquals(9, r.getParagraph(5).getLvl());
+
+ assertEquals(0, r.getParagraph(1).getIlvl());
+ assertEquals(0, r.getParagraph(2).getIlvl());
+ assertEquals(0, r.getParagraph(3).getIlvl());
+ assertEquals(0, r.getParagraph(4).getIlvl());
+ assertEquals(0, r.getParagraph(5).getIlvl());
+
+ // Tick bullets
+ assertEquals("Now for an un-ordered list with a different bullet style:\r", r.getParagraph(9).text());
+ assertEquals("Tick 1\r", r.getParagraph(10).text());
+ assertEquals("Tick 2\r", r.getParagraph(11).text());
+ assertEquals("Multi-level un-ordered list:\r", r.getParagraph(12).text());
+
+ assertEquals(9, r.getParagraph(9).getLvl());
+ assertEquals(9, r.getParagraph(10).getLvl());
+ assertEquals(9, r.getParagraph(11).getLvl());
+ assertEquals(9, r.getParagraph(12).getLvl());
+
+ assertEquals(0, r.getParagraph(9).getIlvl());
+ assertEquals(0, r.getParagraph(10).getIlvl());
+ assertEquals(0, r.getParagraph(11).getIlvl());
+ assertEquals(0, r.getParagraph(12).getIlvl());
+
+ // TODO Test for tick not bullet
+ }
+
+ public void testOrderedLists() {
+ HWPFDocument doc = HWPFTestDataSamples.openSampleFile("Lists.doc");
+ Range r = doc.getRange();
+ assertEquals(40, r.numParagraphs());
+
+ assertEquals("Next up is an ordered list:\r", r.getParagraph(5).text());
+ assertEquals("Ordered list 1\r", r.getParagraph(6).text());
+ assertEquals("OL 2\r", r.getParagraph(7).text());
+ assertEquals("OL 3\r", r.getParagraph(8).text());
+ assertEquals("Now for an un-ordered list with a different bullet style:\r", r.getParagraph(9).text());
+
+ assertEquals(9, r.getParagraph(5).getLvl());
+ assertEquals(9, r.getParagraph(6).getLvl());
+ assertEquals(9, r.getParagraph(7).getLvl());
+ assertEquals(9, r.getParagraph(8).getLvl());
+ assertEquals(9, r.getParagraph(9).getLvl());
+
+ assertEquals(0, r.getParagraph(5).getIlvl());
+ assertEquals(0, r.getParagraph(6).getIlvl());
+ assertEquals(0, r.getParagraph(7).getIlvl());
+ assertEquals(0, r.getParagraph(8).getIlvl());
+ assertEquals(0, r.getParagraph(9).getIlvl());
+ }
+
+ public void testMultiLevelLists() {
+ HWPFDocument doc = HWPFTestDataSamples.openSampleFile("Lists.doc");
+ Range r = doc.getRange();
+ assertEquals(40, r.numParagraphs());
+
+ assertEquals("Multi-level un-ordered list:\r", r.getParagraph(12).text());
+ assertEquals("ML 1:1\r", r.getParagraph(13).text());
+ assertEquals("ML 1:2\r", r.getParagraph(14).text());
+ assertEquals("ML 2:1\r", r.getParagraph(15).text());
+ assertEquals("ML 2:2\r", r.getParagraph(16).text());
+ assertEquals("ML 2:3\r", r.getParagraph(17).text());
+ assertEquals("ML 3:1\r", r.getParagraph(18).text());
+ assertEquals("ML 4:1\r", r.getParagraph(19).text());
+ assertEquals("ML 5:1\r", r.getParagraph(20).text());
+ assertEquals("ML 5:2\r", r.getParagraph(21).text());
+ assertEquals("ML 2:4\r", r.getParagraph(22).text());
+ assertEquals("ML 1:3\r", r.getParagraph(23).text());
+ assertEquals("Multi-level ordered list:\r", r.getParagraph(24).text());
+ assertEquals("OL 1\r", r.getParagraph(25).text());
+ assertEquals("OL 2\r", r.getParagraph(26).text());
+ assertEquals("OL 2.1\r", r.getParagraph(27).text());
+ assertEquals("OL 2.2\r", r.getParagraph(28).text());
+ assertEquals("OL 2.2.1\r", r.getParagraph(29).text());
+ assertEquals("OL 2.2.2\r", r.getParagraph(30).text());
+ assertEquals("OL 2.2.2.1\r", r.getParagraph(31).text());
+ assertEquals("OL 2.2.3\r", r.getParagraph(32).text());
+ assertEquals("OL 3\r", r.getParagraph(33).text());
+ assertEquals("Finally we want some indents, to tell the difference\r", r.getParagraph(34).text());
+
+ for(int i=12; i<=34; i++) {
+ assertEquals(9, r.getParagraph(12).getLvl());
+ }
+ assertEquals(0, r.getParagraph(12).getIlvl());
+ assertEquals(0, r.getParagraph(13).getIlvl());
+ assertEquals(0, r.getParagraph(14).getIlvl());
+ assertEquals(1, r.getParagraph(15).getIlvl());
+ assertEquals(1, r.getParagraph(16).getIlvl());
+ assertEquals(1, r.getParagraph(17).getIlvl());
+ assertEquals(2, r.getParagraph(18).getIlvl());
+ assertEquals(3, r.getParagraph(19).getIlvl());
+ assertEquals(4, r.getParagraph(20).getIlvl());
+ assertEquals(4, r.getParagraph(21).getIlvl());
+ assertEquals(1, r.getParagraph(22).getIlvl());
+ assertEquals(0, r.getParagraph(23).getIlvl());
+ assertEquals(0, r.getParagraph(24).getIlvl());
+ assertEquals(0, r.getParagraph(25).getIlvl());
+ assertEquals(0, r.getParagraph(26).getIlvl());
+ assertEquals(1, r.getParagraph(27).getIlvl());
+ assertEquals(1, r.getParagraph(28).getIlvl());
+ assertEquals(2, r.getParagraph(29).getIlvl());
+ assertEquals(2, r.getParagraph(30).getIlvl());
+ assertEquals(3, r.getParagraph(31).getIlvl());
+ assertEquals(2, r.getParagraph(32).getIlvl());
+ assertEquals(0, r.getParagraph(33).getIlvl());
+ assertEquals(0, r.getParagraph(34).getIlvl());
+ }
+
+ public void testIndentedText() {
+ HWPFDocument doc = HWPFTestDataSamples.openSampleFile("Lists.doc");
+ Range r = doc.getRange();
+
+ assertEquals(40, r.numParagraphs());
+ assertEquals("Finally we want some indents, to tell the difference\r", r.getParagraph(34).text());
+ assertEquals("Indented once\r", r.getParagraph(35).text());
+ assertEquals("Indented twice\r", r.getParagraph(36).text());
+ assertEquals("Indented three times\r", r.getParagraph(37).text());
+ assertEquals("The end!\r", r.getParagraph(38).text());
+
+ assertEquals(9, r.getParagraph(34).getLvl());
+ assertEquals(9, r.getParagraph(35).getLvl());
+ assertEquals(9, r.getParagraph(36).getLvl());
+ assertEquals(9, r.getParagraph(37).getLvl());
+ assertEquals(9, r.getParagraph(38).getLvl());
+ assertEquals(9, r.getParagraph(39).getLvl());
+
+ assertEquals(0, r.getParagraph(34).getIlvl());
+ assertEquals(0, r.getParagraph(35).getIlvl());
+ assertEquals(0, r.getParagraph(36).getIlvl());
+ assertEquals(0, r.getParagraph(37).getIlvl());
+ assertEquals(0, r.getParagraph(38).getIlvl());
+ assertEquals(0, r.getParagraph(39).getIlvl());
+
+ // TODO Test the indent
+ }
+
+ public void testWriteRead() throws IOException {
+ HWPFDocument doc = HWPFTestDataSamples.openSampleFile("Lists.doc");
+ doc = HWPFTestDataSamples.writeOutAndReadBack(doc);
+
+ Range r = doc.getRange();
+
+ // Check a couple at random
+ assertEquals(4, r.getParagraph(21).getIlvl());
+ assertEquals(1, r.getParagraph(22).getIlvl());
+ assertEquals(0, r.getParagraph(23).getIlvl());
+ }
+}
diff --git a/test-data/document/Lists.doc b/test-data/document/Lists.doc
new file mode 100644
index 0000000000..80201eef14
--- /dev/null
+++ b/test-data/document/Lists.doc
Binary files differ